17ec681f3Smrg/*
27ec681f3Smrg * Copyright © Microsoft Corporation
37ec681f3Smrg *
47ec681f3Smrg * Permission is hereby granted, free of charge, to any person obtaining a
57ec681f3Smrg * copy of this software and associated documentation files (the "Software"),
67ec681f3Smrg * to deal in the Software without restriction, including without limitation
77ec681f3Smrg * the rights to use, copy, modify, merge, publish, distribute, sublicense,
87ec681f3Smrg * and/or sell copies of the Software, and to permit persons to whom the
97ec681f3Smrg * Software is furnished to do so, subject to the following conditions:
107ec681f3Smrg *
117ec681f3Smrg * The above copyright notice and this permission notice (including the next
127ec681f3Smrg * paragraph) shall be included in all copies or substantial portions of the
137ec681f3Smrg * Software.
147ec681f3Smrg *
157ec681f3Smrg * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
167ec681f3Smrg * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
177ec681f3Smrg * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT.  IN NO EVENT SHALL
187ec681f3Smrg * THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
197ec681f3Smrg * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING
207ec681f3Smrg * FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS
217ec681f3Smrg * IN THE SOFTWARE.
227ec681f3Smrg */
237ec681f3Smrg
247ec681f3Smrg#include "d3d12_compiler.h"
257ec681f3Smrg#include "d3d12_context.h"
267ec681f3Smrg#include "d3d12_debug.h"
277ec681f3Smrg#include "d3d12_screen.h"
287ec681f3Smrg#include "d3d12_nir_passes.h"
297ec681f3Smrg#include "nir_to_dxil.h"
307ec681f3Smrg#include "dxil_nir.h"
317ec681f3Smrg
327ec681f3Smrg#include "pipe/p_state.h"
337ec681f3Smrg
347ec681f3Smrg#include "nir.h"
357ec681f3Smrg#include "nir/nir_draw_helpers.h"
367ec681f3Smrg#include "nir/tgsi_to_nir.h"
377ec681f3Smrg#include "compiler/nir/nir_builder.h"
387ec681f3Smrg#include "tgsi/tgsi_from_mesa.h"
397ec681f3Smrg#include "tgsi/tgsi_ureg.h"
407ec681f3Smrg
417ec681f3Smrg#include "util/u_memory.h"
427ec681f3Smrg#include "util/u_prim.h"
437ec681f3Smrg#include "util/u_simple_shaders.h"
447ec681f3Smrg#include "util/u_dl.h"
457ec681f3Smrg
467ec681f3Smrg#include <directx/d3d12.h>
477ec681f3Smrg#include <dxguids/dxguids.h>
487ec681f3Smrg
497ec681f3Smrg#include <dxcapi.h>
507ec681f3Smrg#include <wrl/client.h>
517ec681f3Smrg
527ec681f3Smrgextern "C" {
537ec681f3Smrg#include "tgsi/tgsi_parse.h"
547ec681f3Smrg#include "tgsi/tgsi_point_sprite.h"
557ec681f3Smrg}
567ec681f3Smrg
577ec681f3Smrgusing Microsoft::WRL::ComPtr;
587ec681f3Smrg
597ec681f3Smrgstruct d3d12_validation_tools
607ec681f3Smrg{
617ec681f3Smrg   d3d12_validation_tools();
627ec681f3Smrg
637ec681f3Smrg   bool validate_and_sign(struct blob *dxil);
647ec681f3Smrg
657ec681f3Smrg   void disassemble(struct blob *dxil);
667ec681f3Smrg
677ec681f3Smrg   void load_dxil_dll();
687ec681f3Smrg
697ec681f3Smrg   struct HModule {
707ec681f3Smrg      HModule();
717ec681f3Smrg      ~HModule();
727ec681f3Smrg
737ec681f3Smrg      bool load(LPCSTR file_name);
747ec681f3Smrg      operator util_dl_library *() const;
757ec681f3Smrg   private:
767ec681f3Smrg      util_dl_library *module;
777ec681f3Smrg   };
787ec681f3Smrg
797ec681f3Smrg   HModule dxil_module;
807ec681f3Smrg   HModule dxc_compiler_module;
817ec681f3Smrg   ComPtr<IDxcCompiler> compiler;
827ec681f3Smrg   ComPtr<IDxcValidator> validator;
837ec681f3Smrg   ComPtr<IDxcLibrary> library;
847ec681f3Smrg};
857ec681f3Smrg
867ec681f3Smrgstruct d3d12_validation_tools *d3d12_validator_create()
877ec681f3Smrg{
887ec681f3Smrg   d3d12_validation_tools *tools = new d3d12_validation_tools();
897ec681f3Smrg   if (tools->validator)
907ec681f3Smrg      return tools;
917ec681f3Smrg   delete tools;
927ec681f3Smrg   return nullptr;
937ec681f3Smrg}
947ec681f3Smrg
957ec681f3Smrgvoid d3d12_validator_destroy(struct d3d12_validation_tools *validator)
967ec681f3Smrg{
977ec681f3Smrg   delete validator;
987ec681f3Smrg}
997ec681f3Smrg
1007ec681f3Smrg
1017ec681f3Smrgconst void *
1027ec681f3Smrgd3d12_get_compiler_options(struct pipe_screen *screen,
1037ec681f3Smrg                           enum pipe_shader_ir ir,
1047ec681f3Smrg                           enum pipe_shader_type shader)
1057ec681f3Smrg{
1067ec681f3Smrg   assert(ir == PIPE_SHADER_IR_NIR);
1077ec681f3Smrg   return dxil_get_nir_compiler_options();
1087ec681f3Smrg}
1097ec681f3Smrg
1107ec681f3Smrgstatic uint32_t
1117ec681f3Smrgresource_dimension(enum glsl_sampler_dim dim)
1127ec681f3Smrg{
1137ec681f3Smrg   switch (dim) {
1147ec681f3Smrg   case GLSL_SAMPLER_DIM_1D:
1157ec681f3Smrg      return RESOURCE_DIMENSION_TEXTURE1D;
1167ec681f3Smrg   case GLSL_SAMPLER_DIM_2D:
1177ec681f3Smrg      return RESOURCE_DIMENSION_TEXTURE2D;
1187ec681f3Smrg   case GLSL_SAMPLER_DIM_3D:
1197ec681f3Smrg      return RESOURCE_DIMENSION_TEXTURE3D;
1207ec681f3Smrg   case GLSL_SAMPLER_DIM_CUBE:
1217ec681f3Smrg      return RESOURCE_DIMENSION_TEXTURECUBE;
1227ec681f3Smrg   default:
1237ec681f3Smrg      return RESOURCE_DIMENSION_UNKNOWN;
1247ec681f3Smrg   }
1257ec681f3Smrg}
1267ec681f3Smrg
1277ec681f3Smrgstatic struct d3d12_shader *
1287ec681f3Smrgcompile_nir(struct d3d12_context *ctx, struct d3d12_shader_selector *sel,
1297ec681f3Smrg            struct d3d12_shader_key *key, struct nir_shader *nir)
1307ec681f3Smrg{
1317ec681f3Smrg   struct d3d12_screen *screen = d3d12_screen(ctx->base.screen);
1327ec681f3Smrg   struct d3d12_shader *shader = rzalloc(sel, d3d12_shader);
1337ec681f3Smrg   shader->key = *key;
1347ec681f3Smrg   shader->nir = nir;
1357ec681f3Smrg   sel->current = shader;
1367ec681f3Smrg
1377ec681f3Smrg   NIR_PASS_V(nir, nir_lower_samplers);
1387ec681f3Smrg   NIR_PASS_V(nir, dxil_nir_create_bare_samplers);
1397ec681f3Smrg
1407ec681f3Smrg   if (key->samples_int_textures)
1417ec681f3Smrg      NIR_PASS_V(nir, dxil_lower_sample_to_txf_for_integer_tex,
1427ec681f3Smrg                 key->tex_wrap_states, key->swizzle_state,
1437ec681f3Smrg                 screen->base.get_paramf(&screen->base, PIPE_CAPF_MAX_TEXTURE_LOD_BIAS));
1447ec681f3Smrg
1457ec681f3Smrg   if (key->vs.needs_format_emulation)
1467ec681f3Smrg      d3d12_nir_lower_vs_vertex_conversion(nir, key->vs.format_conversion);
1477ec681f3Smrg
1487ec681f3Smrg   uint32_t num_ubos_before_lower_to_ubo = nir->info.num_ubos;
1497ec681f3Smrg   uint32_t num_uniforms_before_lower_to_ubo = nir->num_uniforms;
1507ec681f3Smrg   NIR_PASS_V(nir, nir_lower_uniforms_to_ubo, false, false);
1517ec681f3Smrg   shader->has_default_ubo0 = num_uniforms_before_lower_to_ubo > 0 &&
1527ec681f3Smrg                              nir->info.num_ubos > num_ubos_before_lower_to_ubo;
1537ec681f3Smrg
1547ec681f3Smrg   if (key->last_vertex_processing_stage) {
1557ec681f3Smrg      if (key->invert_depth)
1567ec681f3Smrg         NIR_PASS_V(nir, d3d12_nir_invert_depth);
1577ec681f3Smrg      NIR_PASS_V(nir, nir_lower_clip_halfz);
1587ec681f3Smrg      NIR_PASS_V(nir, d3d12_lower_yflip);
1597ec681f3Smrg   }
1607ec681f3Smrg   NIR_PASS_V(nir, nir_lower_packed_ubo_loads);
1617ec681f3Smrg   NIR_PASS_V(nir, d3d12_lower_load_first_vertex);
1627ec681f3Smrg   NIR_PASS_V(nir, d3d12_lower_state_vars, shader);
1637ec681f3Smrg   NIR_PASS_V(nir, dxil_nir_lower_bool_input);
1647ec681f3Smrg
1657ec681f3Smrg   struct nir_to_dxil_options opts = {};
1667ec681f3Smrg   opts.interpolate_at_vertex = screen->have_load_at_vertex;
1677ec681f3Smrg   opts.lower_int16 = !screen->opts4.Native16BitShaderOpsSupported;
1687ec681f3Smrg   opts.ubo_binding_offset = shader->has_default_ubo0 ? 0 : 1;
1697ec681f3Smrg   opts.provoking_vertex = key->fs.provoking_vertex;
1707ec681f3Smrg
1717ec681f3Smrg   struct blob tmp;
1727ec681f3Smrg   if (!nir_to_dxil(nir, &opts, &tmp)) {
1737ec681f3Smrg      debug_printf("D3D12: nir_to_dxil failed\n");
1747ec681f3Smrg      return NULL;
1757ec681f3Smrg   }
1767ec681f3Smrg
1777ec681f3Smrg   // Non-ubo variables
1787ec681f3Smrg   shader->begin_srv_binding = (UINT_MAX);
1797ec681f3Smrg   nir_foreach_variable_with_modes(var, nir, nir_var_uniform) {
1807ec681f3Smrg      auto type = glsl_without_array(var->type);
1817ec681f3Smrg      if (glsl_type_is_sampler(type) && glsl_get_sampler_result_type(type) != GLSL_TYPE_VOID) {
1827ec681f3Smrg         unsigned count = glsl_type_is_array(var->type) ? glsl_get_aoa_size(var->type) : 1;
1837ec681f3Smrg         for (unsigned i = 0; i < count; ++i) {
1847ec681f3Smrg            shader->srv_bindings[var->data.binding + i].binding = var->data.binding;
1857ec681f3Smrg            shader->srv_bindings[var->data.binding + i].dimension = resource_dimension(glsl_get_sampler_dim(type));
1867ec681f3Smrg         }
1877ec681f3Smrg         shader->begin_srv_binding = MIN2(var->data.binding, shader->begin_srv_binding);
1887ec681f3Smrg         shader->end_srv_binding = MAX2(var->data.binding + count, shader->end_srv_binding);
1897ec681f3Smrg      }
1907ec681f3Smrg   }
1917ec681f3Smrg
1927ec681f3Smrg   // Ubo variables
1937ec681f3Smrg   if(nir->info.num_ubos) {
1947ec681f3Smrg      // Ignore state_vars ubo as it is bound as root constants
1957ec681f3Smrg      unsigned num_ubo_bindings = nir->info.num_ubos - (shader->state_vars_used ? 1 : 0);
1967ec681f3Smrg      for(unsigned i = opts.ubo_binding_offset; i < num_ubo_bindings; ++i) {
1977ec681f3Smrg         shader->cb_bindings[shader->num_cb_bindings++].binding = i;
1987ec681f3Smrg      }
1997ec681f3Smrg   }
2007ec681f3Smrg   if (ctx->validation_tools) {
2017ec681f3Smrg      ctx->validation_tools->validate_and_sign(&tmp);
2027ec681f3Smrg
2037ec681f3Smrg      if (d3d12_debug & D3D12_DEBUG_DISASS) {
2047ec681f3Smrg         ctx->validation_tools->disassemble(&tmp);
2057ec681f3Smrg      }
2067ec681f3Smrg   }
2077ec681f3Smrg
2087ec681f3Smrg   blob_finish_get_buffer(&tmp, &shader->bytecode, &shader->bytecode_length);
2097ec681f3Smrg
2107ec681f3Smrg   if (d3d12_debug & D3D12_DEBUG_DXIL) {
2117ec681f3Smrg      char buf[256];
2127ec681f3Smrg      static int i;
2137ec681f3Smrg      snprintf(buf, sizeof(buf), "dump%02d.dxil", i++);
2147ec681f3Smrg      FILE *fp = fopen(buf, "wb");
2157ec681f3Smrg      fwrite(shader->bytecode, sizeof(char), shader->bytecode_length, fp);
2167ec681f3Smrg      fclose(fp);
2177ec681f3Smrg      fprintf(stderr, "wrote '%s'...\n", buf);
2187ec681f3Smrg   }
2197ec681f3Smrg   return shader;
2207ec681f3Smrg}
2217ec681f3Smrg
2227ec681f3Smrgstruct d3d12_selection_context {
2237ec681f3Smrg   struct d3d12_context *ctx;
2247ec681f3Smrg   const struct pipe_draw_info *dinfo;
2257ec681f3Smrg   bool needs_point_sprite_lowering;
2267ec681f3Smrg   bool needs_vertex_reordering;
2277ec681f3Smrg   unsigned provoking_vertex;
2287ec681f3Smrg   bool alternate_tri;
2297ec681f3Smrg   unsigned fill_mode_lowered;
2307ec681f3Smrg   unsigned cull_mode_lowered;
2317ec681f3Smrg   bool manual_depth_range;
2327ec681f3Smrg   unsigned missing_dual_src_outputs;
2337ec681f3Smrg   unsigned frag_result_color_lowering;
2347ec681f3Smrg};
2357ec681f3Smrg
2367ec681f3Smrgstatic unsigned
2377ec681f3Smrgmissing_dual_src_outputs(struct d3d12_context *ctx)
2387ec681f3Smrg{
2397ec681f3Smrg   if (!ctx->gfx_pipeline_state.blend->is_dual_src)
2407ec681f3Smrg      return 0;
2417ec681f3Smrg
2427ec681f3Smrg   struct d3d12_shader_selector *fs = ctx->gfx_stages[PIPE_SHADER_FRAGMENT];
2437ec681f3Smrg   nir_shader *s = fs->initial;
2447ec681f3Smrg
2457ec681f3Smrg   unsigned indices_seen = 0;
2467ec681f3Smrg   nir_foreach_function(function, s) {
2477ec681f3Smrg      if (function->impl) {
2487ec681f3Smrg         nir_foreach_block(block, function->impl) {
2497ec681f3Smrg            nir_foreach_instr(instr, block) {
2507ec681f3Smrg               if (instr->type != nir_instr_type_intrinsic)
2517ec681f3Smrg                  continue;
2527ec681f3Smrg
2537ec681f3Smrg               nir_intrinsic_instr *intr = nir_instr_as_intrinsic(instr);
2547ec681f3Smrg               if (intr->intrinsic != nir_intrinsic_store_deref)
2557ec681f3Smrg                  continue;
2567ec681f3Smrg
2577ec681f3Smrg               nir_variable *var = nir_intrinsic_get_var(intr, 0);
2587ec681f3Smrg               if (var->data.mode != nir_var_shader_out ||
2597ec681f3Smrg                   (var->data.location != FRAG_RESULT_COLOR &&
2607ec681f3Smrg                    var->data.location != FRAG_RESULT_DATA0))
2617ec681f3Smrg                  continue;
2627ec681f3Smrg
2637ec681f3Smrg               indices_seen |= 1u << var->data.index;
2647ec681f3Smrg               if ((indices_seen & 3) == 3)
2657ec681f3Smrg                  return 0;
2667ec681f3Smrg            }
2677ec681f3Smrg         }
2687ec681f3Smrg      }
2697ec681f3Smrg   }
2707ec681f3Smrg
2717ec681f3Smrg   return 3 & ~indices_seen;
2727ec681f3Smrg}
2737ec681f3Smrg
2747ec681f3Smrgstatic unsigned
2757ec681f3Smrgfrag_result_color_lowering(struct d3d12_context *ctx)
2767ec681f3Smrg{
2777ec681f3Smrg   struct d3d12_shader_selector *fs = ctx->gfx_stages[PIPE_SHADER_FRAGMENT];
2787ec681f3Smrg   assert(fs);
2797ec681f3Smrg
2807ec681f3Smrg   if (fs->initial->info.outputs_written & BITFIELD64_BIT(FRAG_RESULT_COLOR))
2817ec681f3Smrg      return ctx->fb.nr_cbufs > 1 ? ctx->fb.nr_cbufs : 0;
2827ec681f3Smrg
2837ec681f3Smrg   return 0;
2847ec681f3Smrg}
2857ec681f3Smrg
2867ec681f3Smrgstatic bool
2877ec681f3Smrgmanual_depth_range(struct d3d12_context *ctx)
2887ec681f3Smrg{
2897ec681f3Smrg   if (!d3d12_need_zero_one_depth_range(ctx))
2907ec681f3Smrg      return false;
2917ec681f3Smrg
2927ec681f3Smrg   /**
2937ec681f3Smrg    * If we can't use the D3D12 zero-one depth-range, we might have to apply
2947ec681f3Smrg    * depth-range ourselves.
2957ec681f3Smrg    *
2967ec681f3Smrg    * Because we only need to override the depth-range to zero-one range in
2977ec681f3Smrg    * the case where we write frag-depth, we only need to apply manual
2987ec681f3Smrg    * depth-range to gl_FragCoord.z.
2997ec681f3Smrg    *
3007ec681f3Smrg    * No extra care is needed to be taken in the case where gl_FragDepth is
3017ec681f3Smrg    * written conditionally, because the GLSL 4.60 spec states:
3027ec681f3Smrg    *
3037ec681f3Smrg    *    If a shader statically assigns a value to gl_FragDepth, and there
3047ec681f3Smrg    *    is an execution path through the shader that does not set
3057ec681f3Smrg    *    gl_FragDepth, then the value of the fragment’s depth may be
3067ec681f3Smrg    *    undefined for executions of the shader that take that path. That
3077ec681f3Smrg    *    is, if the set of linked fragment shaders statically contain a
3087ec681f3Smrg    *    write to gl_FragDepth, then it is responsible for always writing
3097ec681f3Smrg    *    it.
3107ec681f3Smrg    */
3117ec681f3Smrg
3127ec681f3Smrg   struct d3d12_shader_selector *fs = ctx->gfx_stages[PIPE_SHADER_FRAGMENT];
3137ec681f3Smrg   return fs && fs->initial->info.inputs_read & VARYING_BIT_POS;
3147ec681f3Smrg}
3157ec681f3Smrg
3167ec681f3Smrgstatic bool
3177ec681f3Smrgneeds_edge_flag_fix(enum pipe_prim_type mode)
3187ec681f3Smrg{
3197ec681f3Smrg   return (mode == PIPE_PRIM_QUADS ||
3207ec681f3Smrg           mode == PIPE_PRIM_QUAD_STRIP ||
3217ec681f3Smrg           mode == PIPE_PRIM_POLYGON);
3227ec681f3Smrg}
3237ec681f3Smrg
3247ec681f3Smrgstatic unsigned
3257ec681f3Smrgfill_mode_lowered(struct d3d12_context *ctx, const struct pipe_draw_info *dinfo)
3267ec681f3Smrg{
3277ec681f3Smrg   struct d3d12_shader_selector *vs = ctx->gfx_stages[PIPE_SHADER_VERTEX];
3287ec681f3Smrg
3297ec681f3Smrg   if ((ctx->gfx_stages[PIPE_SHADER_GEOMETRY] != NULL &&
3307ec681f3Smrg        !ctx->gfx_stages[PIPE_SHADER_GEOMETRY]->is_gs_variant) ||
3317ec681f3Smrg       ctx->gfx_pipeline_state.rast == NULL ||
3327ec681f3Smrg       (dinfo->mode != PIPE_PRIM_TRIANGLES &&
3337ec681f3Smrg        dinfo->mode != PIPE_PRIM_TRIANGLE_STRIP))
3347ec681f3Smrg      return PIPE_POLYGON_MODE_FILL;
3357ec681f3Smrg
3367ec681f3Smrg   /* D3D12 supports line mode (wireframe) but doesn't support edge flags */
3377ec681f3Smrg   if (((ctx->gfx_pipeline_state.rast->base.fill_front == PIPE_POLYGON_MODE_LINE &&
3387ec681f3Smrg         ctx->gfx_pipeline_state.rast->base.cull_face != PIPE_FACE_FRONT) ||
3397ec681f3Smrg        (ctx->gfx_pipeline_state.rast->base.fill_back == PIPE_POLYGON_MODE_LINE &&
3407ec681f3Smrg         ctx->gfx_pipeline_state.rast->base.cull_face == PIPE_FACE_FRONT)) &&
3417ec681f3Smrg       (vs->initial->info.outputs_written & VARYING_BIT_EDGE ||
3427ec681f3Smrg        needs_edge_flag_fix(ctx->initial_api_prim)))
3437ec681f3Smrg      return PIPE_POLYGON_MODE_LINE;
3447ec681f3Smrg
3457ec681f3Smrg   if (ctx->gfx_pipeline_state.rast->base.fill_front == PIPE_POLYGON_MODE_POINT)
3467ec681f3Smrg      return PIPE_POLYGON_MODE_POINT;
3477ec681f3Smrg
3487ec681f3Smrg   return PIPE_POLYGON_MODE_FILL;
3497ec681f3Smrg}
3507ec681f3Smrg
3517ec681f3Smrgstatic bool
3527ec681f3Smrgneeds_point_sprite_lowering(struct d3d12_context *ctx, const struct pipe_draw_info *dinfo)
3537ec681f3Smrg{
3547ec681f3Smrg   struct d3d12_shader_selector *vs = ctx->gfx_stages[PIPE_SHADER_VERTEX];
3557ec681f3Smrg   struct d3d12_shader_selector *gs = ctx->gfx_stages[PIPE_SHADER_GEOMETRY];
3567ec681f3Smrg
3577ec681f3Smrg   if (gs != NULL && !gs->is_gs_variant) {
3587ec681f3Smrg      /* There is an user GS; Check if it outputs points with PSIZE */
3597ec681f3Smrg      return (gs->initial->info.gs.output_primitive == GL_POINTS &&
3607ec681f3Smrg              gs->initial->info.outputs_written & VARYING_BIT_PSIZ);
3617ec681f3Smrg   } else {
3627ec681f3Smrg      /* No user GS; check if we are drawing wide points */
3637ec681f3Smrg      return ((dinfo->mode == PIPE_PRIM_POINTS ||
3647ec681f3Smrg               fill_mode_lowered(ctx, dinfo) == PIPE_POLYGON_MODE_POINT) &&
3657ec681f3Smrg              (ctx->gfx_pipeline_state.rast->base.point_size > 1.0 ||
3667ec681f3Smrg               ctx->gfx_pipeline_state.rast->base.offset_point ||
3677ec681f3Smrg               (ctx->gfx_pipeline_state.rast->base.point_size_per_vertex &&
3687ec681f3Smrg                vs->initial->info.outputs_written & VARYING_BIT_PSIZ)) &&
3697ec681f3Smrg              (vs->initial->info.outputs_written & VARYING_BIT_POS));
3707ec681f3Smrg   }
3717ec681f3Smrg}
3727ec681f3Smrg
3737ec681f3Smrgstatic unsigned
3747ec681f3Smrgcull_mode_lowered(struct d3d12_context *ctx, unsigned fill_mode)
3757ec681f3Smrg{
3767ec681f3Smrg   if ((ctx->gfx_stages[PIPE_SHADER_GEOMETRY] != NULL &&
3777ec681f3Smrg        !ctx->gfx_stages[PIPE_SHADER_GEOMETRY]->is_gs_variant) ||
3787ec681f3Smrg       ctx->gfx_pipeline_state.rast == NULL ||
3797ec681f3Smrg       ctx->gfx_pipeline_state.rast->base.cull_face == PIPE_FACE_NONE)
3807ec681f3Smrg      return PIPE_FACE_NONE;
3817ec681f3Smrg
3827ec681f3Smrg   return ctx->gfx_pipeline_state.rast->base.cull_face;
3837ec681f3Smrg}
3847ec681f3Smrg
3857ec681f3Smrgstatic unsigned
3867ec681f3Smrgget_provoking_vertex(struct d3d12_selection_context *sel_ctx, bool *alternate)
3877ec681f3Smrg{
3887ec681f3Smrg   struct d3d12_shader_selector *vs = sel_ctx->ctx->gfx_stages[PIPE_SHADER_VERTEX];
3897ec681f3Smrg   struct d3d12_shader_selector *gs = sel_ctx->ctx->gfx_stages[PIPE_SHADER_GEOMETRY];
3907ec681f3Smrg   struct d3d12_shader_selector *last_vertex_stage = gs && !gs->is_gs_variant ? gs : vs;
3917ec681f3Smrg
3927ec681f3Smrg   /* Make sure GL prims match Gallium prims */
3937ec681f3Smrg   STATIC_ASSERT(GL_POINTS == PIPE_PRIM_POINTS);
3947ec681f3Smrg   STATIC_ASSERT(GL_LINES == PIPE_PRIM_LINES);
3957ec681f3Smrg   STATIC_ASSERT(GL_LINE_STRIP == PIPE_PRIM_LINE_STRIP);
3967ec681f3Smrg
3977ec681f3Smrg   enum pipe_prim_type mode;
3987ec681f3Smrg   switch (last_vertex_stage->stage) {
3997ec681f3Smrg   case PIPE_SHADER_GEOMETRY:
4007ec681f3Smrg      mode = (enum pipe_prim_type)last_vertex_stage->current->nir->info.gs.output_primitive;
4017ec681f3Smrg      break;
4027ec681f3Smrg   case PIPE_SHADER_VERTEX:
4037ec681f3Smrg      mode = sel_ctx->dinfo ? (enum pipe_prim_type)sel_ctx->dinfo->mode : PIPE_PRIM_TRIANGLES;
4047ec681f3Smrg      break;
4057ec681f3Smrg   default:
4067ec681f3Smrg      unreachable("Tesselation shaders are not supported");
4077ec681f3Smrg   }
4087ec681f3Smrg
4097ec681f3Smrg   bool flatshade_first = sel_ctx->ctx->gfx_pipeline_state.rast &&
4107ec681f3Smrg                          sel_ctx->ctx->gfx_pipeline_state.rast->base.flatshade_first;
4117ec681f3Smrg   *alternate = (mode == GL_TRIANGLE_STRIP || mode == GL_TRIANGLE_STRIP_ADJACENCY) &&
4127ec681f3Smrg                (!gs || gs->is_gs_variant ||
4137ec681f3Smrg                 gs->initial->info.gs.vertices_out > u_prim_vertex_count(mode)->min);
4147ec681f3Smrg   return flatshade_first ? 0 : u_prim_vertex_count(mode)->min - 1;
4157ec681f3Smrg}
4167ec681f3Smrg
4177ec681f3Smrgstatic bool
4187ec681f3Smrghas_flat_varyings(struct d3d12_context *ctx)
4197ec681f3Smrg{
4207ec681f3Smrg   struct d3d12_shader_selector *fs = ctx->gfx_stages[PIPE_SHADER_FRAGMENT];
4217ec681f3Smrg
4227ec681f3Smrg   if (!fs || !fs->current)
4237ec681f3Smrg      return false;
4247ec681f3Smrg
4257ec681f3Smrg   nir_foreach_variable_with_modes(input, fs->current->nir,
4267ec681f3Smrg                                   nir_var_shader_in) {
4277ec681f3Smrg      if (input->data.interpolation == INTERP_MODE_FLAT)
4287ec681f3Smrg         return true;
4297ec681f3Smrg   }
4307ec681f3Smrg
4317ec681f3Smrg   return false;
4327ec681f3Smrg}
4337ec681f3Smrg
4347ec681f3Smrgstatic bool
4357ec681f3Smrgneeds_vertex_reordering(struct d3d12_selection_context *sel_ctx)
4367ec681f3Smrg{
4377ec681f3Smrg   struct d3d12_context *ctx = sel_ctx->ctx;
4387ec681f3Smrg   bool flat = has_flat_varyings(ctx);
4397ec681f3Smrg   bool xfb = ctx->gfx_pipeline_state.num_so_targets > 0;
4407ec681f3Smrg
4417ec681f3Smrg   if (fill_mode_lowered(ctx, sel_ctx->dinfo) != PIPE_POLYGON_MODE_FILL)
4427ec681f3Smrg      return false;
4437ec681f3Smrg
4447ec681f3Smrg   /* TODO add support for line primitives */
4457ec681f3Smrg
4467ec681f3Smrg   /* When flat shading a triangle and provoking vertex is not the first one, we use load_at_vertex.
4477ec681f3Smrg      If not available for this adapter, or if it's a triangle strip, we need to reorder the vertices */
4487ec681f3Smrg   if (flat && sel_ctx->provoking_vertex >= 2 && (!d3d12_screen(ctx->base.screen)->have_load_at_vertex ||
4497ec681f3Smrg                                                  sel_ctx->alternate_tri))
4507ec681f3Smrg      return true;
4517ec681f3Smrg
4527ec681f3Smrg   /* When transform feedback is enabled and the output is alternating (triangle strip or triangle
4537ec681f3Smrg      strip with adjacency), we need to reorder vertices to get the order expected by OpenGL. This
4547ec681f3Smrg      only works when there is no flat shading involved. In that scenario, we don't care about
4557ec681f3Smrg      the provoking vertex. */
4567ec681f3Smrg   if (xfb && !flat && sel_ctx->alternate_tri) {
4577ec681f3Smrg      sel_ctx->provoking_vertex = 0;
4587ec681f3Smrg      return true;
4597ec681f3Smrg   }
4607ec681f3Smrg
4617ec681f3Smrg   return false;
4627ec681f3Smrg}
4637ec681f3Smrg
4647ec681f3Smrgstatic nir_variable *
4657ec681f3Smrgcreate_varying_from_info(nir_shader *nir, struct d3d12_varying_info *info,
4667ec681f3Smrg                         unsigned slot, nir_variable_mode mode)
4677ec681f3Smrg{
4687ec681f3Smrg   nir_variable *var;
4697ec681f3Smrg   char tmp[100];
4707ec681f3Smrg
4717ec681f3Smrg   snprintf(tmp, ARRAY_SIZE(tmp),
4727ec681f3Smrg            mode == nir_var_shader_in ? "in_%d" : "out_%d",
4737ec681f3Smrg            info->vars[slot].driver_location);
4747ec681f3Smrg   var = nir_variable_create(nir, mode, info->vars[slot].type, tmp);
4757ec681f3Smrg   var->data.location = slot;
4767ec681f3Smrg   var->data.driver_location = info->vars[slot].driver_location;
4777ec681f3Smrg   var->data.interpolation = info->vars[slot].interpolation;
4787ec681f3Smrg
4797ec681f3Smrg   return var;
4807ec681f3Smrg}
4817ec681f3Smrg
4827ec681f3Smrgstatic void
4837ec681f3Smrgfill_varyings(struct d3d12_varying_info *info, nir_shader *s,
4847ec681f3Smrg              nir_variable_mode modes, uint64_t mask)
4857ec681f3Smrg{
4867ec681f3Smrg   nir_foreach_variable_with_modes(var, s, modes) {
4877ec681f3Smrg      unsigned slot = var->data.location;
4887ec681f3Smrg      uint64_t slot_bit = BITFIELD64_BIT(slot);
4897ec681f3Smrg
4907ec681f3Smrg      if (!(mask & slot_bit))
4917ec681f3Smrg         continue;
4927ec681f3Smrg      info->vars[slot].driver_location = var->data.driver_location;
4937ec681f3Smrg      info->vars[slot].type = var->type;
4947ec681f3Smrg      info->vars[slot].interpolation = var->data.interpolation;
4957ec681f3Smrg      info->mask |= slot_bit;
4967ec681f3Smrg   }
4977ec681f3Smrg}
4987ec681f3Smrg
4997ec681f3Smrgstatic void
5007ec681f3Smrgfill_flat_varyings(struct d3d12_gs_variant_key *key, d3d12_shader_selector *fs)
5017ec681f3Smrg{
5027ec681f3Smrg   if (!fs || !fs->current)
5037ec681f3Smrg      return;
5047ec681f3Smrg
5057ec681f3Smrg   nir_foreach_variable_with_modes(input, fs->current->nir,
5067ec681f3Smrg                                   nir_var_shader_in) {
5077ec681f3Smrg      if (input->data.interpolation == INTERP_MODE_FLAT)
5087ec681f3Smrg         key->flat_varyings |= BITFIELD64_BIT(input->data.location);
5097ec681f3Smrg   }
5107ec681f3Smrg}
5117ec681f3Smrg
5127ec681f3Smrgstatic void
5137ec681f3Smrgvalidate_geometry_shader_variant(struct d3d12_selection_context *sel_ctx)
5147ec681f3Smrg{
5157ec681f3Smrg   struct d3d12_context *ctx = sel_ctx->ctx;
5167ec681f3Smrg   d3d12_shader_selector *vs = ctx->gfx_stages[PIPE_SHADER_VERTEX];
5177ec681f3Smrg   d3d12_shader_selector *fs = ctx->gfx_stages[PIPE_SHADER_FRAGMENT];
5187ec681f3Smrg   struct d3d12_gs_variant_key key = {0};
5197ec681f3Smrg   bool variant_needed = false;
5207ec681f3Smrg
5217ec681f3Smrg   d3d12_shader_selector *gs = ctx->gfx_stages[PIPE_SHADER_GEOMETRY];
5227ec681f3Smrg
5237ec681f3Smrg   /* Nothing to do if there is a user geometry shader bound */
5247ec681f3Smrg   if (gs != NULL && !gs->is_gs_variant)
5257ec681f3Smrg      return;
5267ec681f3Smrg
5277ec681f3Smrg   /* Fill the geometry shader variant key */
5287ec681f3Smrg   if (sel_ctx->fill_mode_lowered != PIPE_POLYGON_MODE_FILL) {
5297ec681f3Smrg      key.fill_mode = sel_ctx->fill_mode_lowered;
5307ec681f3Smrg      key.cull_mode = sel_ctx->cull_mode_lowered;
5317ec681f3Smrg      key.has_front_face = BITSET_TEST(fs->initial->info.system_values_read, SYSTEM_VALUE_FRONT_FACE);
5327ec681f3Smrg      if (key.cull_mode != PIPE_FACE_NONE || key.has_front_face)
5337ec681f3Smrg         key.front_ccw = ctx->gfx_pipeline_state.rast->base.front_ccw ^ (ctx->flip_y < 0);
5347ec681f3Smrg      key.edge_flag_fix = needs_edge_flag_fix(ctx->initial_api_prim);
5357ec681f3Smrg      fill_flat_varyings(&key, fs);
5367ec681f3Smrg      if (key.flat_varyings != 0)
5377ec681f3Smrg         key.flatshade_first = ctx->gfx_pipeline_state.rast->base.flatshade_first;
5387ec681f3Smrg      variant_needed = true;
5397ec681f3Smrg   } else if (sel_ctx->needs_point_sprite_lowering) {
5407ec681f3Smrg      key.passthrough = true;
5417ec681f3Smrg      variant_needed = true;
5427ec681f3Smrg   } else if (sel_ctx->needs_vertex_reordering) {
5437ec681f3Smrg      /* TODO support cases where flat shading (pv != 0) and xfb are enabled */
5447ec681f3Smrg      key.provoking_vertex = sel_ctx->provoking_vertex;
5457ec681f3Smrg      key.alternate_tri = sel_ctx->alternate_tri;
5467ec681f3Smrg      variant_needed = true;
5477ec681f3Smrg   }
5487ec681f3Smrg
5497ec681f3Smrg   if (variant_needed) {
5507ec681f3Smrg      fill_varyings(&key.varyings, vs->initial, nir_var_shader_out,
5517ec681f3Smrg                    vs->initial->info.outputs_written);
5527ec681f3Smrg   }
5537ec681f3Smrg
5547ec681f3Smrg   /* Check if the currently bound geometry shader variant is correct */
5557ec681f3Smrg   if (gs && memcmp(&gs->gs_key, &key, sizeof(key)) == 0)
5567ec681f3Smrg      return;
5577ec681f3Smrg
5587ec681f3Smrg   /* Find/create the proper variant and bind it */
5597ec681f3Smrg   gs = variant_needed ? d3d12_get_gs_variant(ctx, &key) : NULL;
5607ec681f3Smrg   ctx->gfx_stages[PIPE_SHADER_GEOMETRY] = gs;
5617ec681f3Smrg}
5627ec681f3Smrg
5637ec681f3Smrgstatic bool
5647ec681f3Smrgd3d12_compare_shader_keys(const d3d12_shader_key *expect, const d3d12_shader_key *have)
5657ec681f3Smrg{
5667ec681f3Smrg   assert(expect->stage == have->stage);
5677ec681f3Smrg   assert(expect);
5687ec681f3Smrg   assert(have);
5697ec681f3Smrg
5707ec681f3Smrg   /* Because we only add varyings we check that a shader has at least the expected in-
5717ec681f3Smrg    * and outputs. */
5727ec681f3Smrg   if (memcmp(&expect->required_varying_inputs, &have->required_varying_inputs,
5737ec681f3Smrg              sizeof(struct d3d12_varying_info)) ||
5747ec681f3Smrg       memcmp(&expect->required_varying_outputs, &have->required_varying_outputs,
5757ec681f3Smrg              sizeof(struct d3d12_varying_info)) ||
5767ec681f3Smrg       (expect->next_varying_inputs != have->next_varying_inputs) ||
5777ec681f3Smrg       (expect->prev_varying_outputs != have->prev_varying_outputs))
5787ec681f3Smrg      return false;
5797ec681f3Smrg
5807ec681f3Smrg   if (expect->stage == PIPE_SHADER_GEOMETRY) {
5817ec681f3Smrg      if (expect->gs.writes_psize) {
5827ec681f3Smrg         if (!have->gs.writes_psize ||
5837ec681f3Smrg             expect->gs.point_pos_stream_out != have->gs.point_pos_stream_out ||
5847ec681f3Smrg             expect->gs.sprite_coord_enable != have->gs.sprite_coord_enable ||
5857ec681f3Smrg             expect->gs.sprite_origin_upper_left != have->gs.sprite_origin_upper_left ||
5867ec681f3Smrg             expect->gs.point_size_per_vertex != have->gs.point_size_per_vertex)
5877ec681f3Smrg            return false;
5887ec681f3Smrg      } else if (have->gs.writes_psize) {
5897ec681f3Smrg         return false;
5907ec681f3Smrg      }
5917ec681f3Smrg      if (expect->gs.primitive_id != have->gs.primitive_id ||
5927ec681f3Smrg          expect->gs.triangle_strip != have->gs.triangle_strip)
5937ec681f3Smrg         return false;
5947ec681f3Smrg   } else if (expect->stage == PIPE_SHADER_FRAGMENT) {
5957ec681f3Smrg      if (expect->fs.frag_result_color_lowering != have->fs.frag_result_color_lowering ||
5967ec681f3Smrg          expect->fs.manual_depth_range != have->fs.manual_depth_range ||
5977ec681f3Smrg          expect->fs.polygon_stipple != have->fs.polygon_stipple ||
5987ec681f3Smrg          expect->fs.cast_to_uint != have->fs.cast_to_uint ||
5997ec681f3Smrg          expect->fs.cast_to_int != have->fs.cast_to_int)
6007ec681f3Smrg         return false;
6017ec681f3Smrg   }
6027ec681f3Smrg
6037ec681f3Smrg   if (expect->tex_saturate_s != have->tex_saturate_s ||
6047ec681f3Smrg       expect->tex_saturate_r != have->tex_saturate_r ||
6057ec681f3Smrg       expect->tex_saturate_t != have->tex_saturate_t)
6067ec681f3Smrg      return false;
6077ec681f3Smrg
6087ec681f3Smrg   if (expect->samples_int_textures != have->samples_int_textures)
6097ec681f3Smrg      return false;
6107ec681f3Smrg
6117ec681f3Smrg   if (expect->n_texture_states != have->n_texture_states)
6127ec681f3Smrg      return false;
6137ec681f3Smrg
6147ec681f3Smrg   if (memcmp(expect->tex_wrap_states, have->tex_wrap_states,
6157ec681f3Smrg              expect->n_texture_states * sizeof(dxil_wrap_sampler_state)))
6167ec681f3Smrg      return false;
6177ec681f3Smrg
6187ec681f3Smrg   if (memcmp(expect->swizzle_state, have->swizzle_state,
6197ec681f3Smrg              expect->n_texture_states * sizeof(dxil_texture_swizzle_state)))
6207ec681f3Smrg      return false;
6217ec681f3Smrg
6227ec681f3Smrg   if (memcmp(expect->sampler_compare_funcs, have->sampler_compare_funcs,
6237ec681f3Smrg              expect->n_texture_states * sizeof(enum compare_func)))
6247ec681f3Smrg      return false;
6257ec681f3Smrg
6267ec681f3Smrg   if (expect->invert_depth != have->invert_depth)
6277ec681f3Smrg      return false;
6287ec681f3Smrg
6297ec681f3Smrg   if (expect->stage == PIPE_SHADER_VERTEX) {
6307ec681f3Smrg      if (expect->vs.needs_format_emulation != have->vs.needs_format_emulation)
6317ec681f3Smrg         return false;
6327ec681f3Smrg
6337ec681f3Smrg      if (expect->vs.needs_format_emulation) {
6347ec681f3Smrg         if (memcmp(expect->vs.format_conversion, have->vs.format_conversion,
6357ec681f3Smrg                    PIPE_MAX_ATTRIBS * sizeof (enum pipe_format)))
6367ec681f3Smrg            return false;
6377ec681f3Smrg      }
6387ec681f3Smrg   }
6397ec681f3Smrg
6407ec681f3Smrg   if (expect->fs.provoking_vertex != have->fs.provoking_vertex)
6417ec681f3Smrg      return false;
6427ec681f3Smrg
6437ec681f3Smrg   return true;
6447ec681f3Smrg}
6457ec681f3Smrg
6467ec681f3Smrgstatic void
6477ec681f3Smrgd3d12_fill_shader_key(struct d3d12_selection_context *sel_ctx,
6487ec681f3Smrg                      d3d12_shader_key *key, d3d12_shader_selector *sel,
6497ec681f3Smrg                      d3d12_shader_selector *prev, d3d12_shader_selector *next)
6507ec681f3Smrg{
6517ec681f3Smrg   pipe_shader_type stage = sel->stage;
6527ec681f3Smrg
6537ec681f3Smrg   uint64_t system_generated_in_values =
6547ec681f3Smrg         VARYING_BIT_PNTC |
6557ec681f3Smrg         VARYING_BIT_PRIMITIVE_ID;
6567ec681f3Smrg
6577ec681f3Smrg   uint64_t system_out_values =
6587ec681f3Smrg         VARYING_BIT_CLIP_DIST0 |
6597ec681f3Smrg         VARYING_BIT_CLIP_DIST1;
6607ec681f3Smrg
6617ec681f3Smrg   memset(key, 0, sizeof(d3d12_shader_key));
6627ec681f3Smrg   key->stage = stage;
6637ec681f3Smrg
6647ec681f3Smrg   if (prev) {
6657ec681f3Smrg      /* We require as inputs what the previous stage has written,
6667ec681f3Smrg       * except certain system values */
6677ec681f3Smrg      if (stage == PIPE_SHADER_FRAGMENT || stage == PIPE_SHADER_GEOMETRY)
6687ec681f3Smrg         system_out_values |= VARYING_BIT_POS;
6697ec681f3Smrg      if (stage == PIPE_SHADER_FRAGMENT)
6707ec681f3Smrg         system_out_values |= VARYING_BIT_PSIZ;
6717ec681f3Smrg      uint64_t mask = prev->current->nir->info.outputs_written & ~system_out_values;
6727ec681f3Smrg      fill_varyings(&key->required_varying_inputs, prev->current->nir,
6737ec681f3Smrg                    nir_var_shader_out, mask);
6747ec681f3Smrg      key->prev_varying_outputs = prev->current->nir->info.outputs_written;
6757ec681f3Smrg
6767ec681f3Smrg      /* Set the provoking vertex based on the previous shader output. Only set the
6777ec681f3Smrg       * key value if the driver actually supports changing the provoking vertex though */
6787ec681f3Smrg      if (stage == PIPE_SHADER_FRAGMENT && sel_ctx->ctx->gfx_pipeline_state.rast &&
6797ec681f3Smrg          !sel_ctx->needs_vertex_reordering &&
6807ec681f3Smrg          d3d12_screen(sel_ctx->ctx->base.screen)->have_load_at_vertex)
6817ec681f3Smrg         key->fs.provoking_vertex = sel_ctx->provoking_vertex;
6827ec681f3Smrg   }
6837ec681f3Smrg
6847ec681f3Smrg   /* We require as outputs what the next stage reads,
6857ec681f3Smrg    * except certain system values */
6867ec681f3Smrg   if (next) {
6877ec681f3Smrg      if (!next->is_gs_variant) {
6887ec681f3Smrg         if (stage == PIPE_SHADER_VERTEX)
6897ec681f3Smrg            system_generated_in_values |= VARYING_BIT_POS;
6907ec681f3Smrg         uint64_t mask = next->current->nir->info.inputs_read & ~system_generated_in_values;
6917ec681f3Smrg         fill_varyings(&key->required_varying_outputs, next->current->nir,
6927ec681f3Smrg                       nir_var_shader_in, mask);
6937ec681f3Smrg      }
6947ec681f3Smrg      key->next_varying_inputs = next->current->nir->info.inputs_read;
6957ec681f3Smrg   }
6967ec681f3Smrg
6977ec681f3Smrg   if (stage == PIPE_SHADER_GEOMETRY ||
6987ec681f3Smrg       (stage == PIPE_SHADER_VERTEX && (!next || next->stage != PIPE_SHADER_GEOMETRY))) {
6997ec681f3Smrg      key->last_vertex_processing_stage = 1;
7007ec681f3Smrg      key->invert_depth = sel_ctx->ctx->reverse_depth_range;
7017ec681f3Smrg      if (sel_ctx->ctx->pstipple.enabled)
7027ec681f3Smrg         key->next_varying_inputs |= VARYING_BIT_POS;
7037ec681f3Smrg   }
7047ec681f3Smrg
7057ec681f3Smrg   if (stage == PIPE_SHADER_GEOMETRY && sel_ctx->ctx->gfx_pipeline_state.rast) {
7067ec681f3Smrg      struct pipe_rasterizer_state *rast = &sel_ctx->ctx->gfx_pipeline_state.rast->base;
7077ec681f3Smrg      if (sel_ctx->needs_point_sprite_lowering) {
7087ec681f3Smrg         key->gs.writes_psize = 1;
7097ec681f3Smrg         key->gs.point_size_per_vertex = rast->point_size_per_vertex;
7107ec681f3Smrg         key->gs.sprite_coord_enable = rast->sprite_coord_enable;
7117ec681f3Smrg         key->gs.sprite_origin_upper_left = (rast->sprite_coord_mode != PIPE_SPRITE_COORD_LOWER_LEFT);
7127ec681f3Smrg         if (sel_ctx->ctx->flip_y < 0)
7137ec681f3Smrg            key->gs.sprite_origin_upper_left = !key->gs.sprite_origin_upper_left;
7147ec681f3Smrg         key->gs.aa_point = rast->point_smooth;
7157ec681f3Smrg         key->gs.stream_output_factor = 6;
7167ec681f3Smrg      } else if (sel_ctx->fill_mode_lowered == PIPE_POLYGON_MODE_LINE) {
7177ec681f3Smrg         key->gs.stream_output_factor = 2;
7187ec681f3Smrg      } else if (sel_ctx->needs_vertex_reordering && !sel->is_gs_variant) {
7197ec681f3Smrg         key->gs.triangle_strip = 1;
7207ec681f3Smrg      }
7217ec681f3Smrg
7227ec681f3Smrg      if (sel->is_gs_variant && next && next->initial->info.inputs_read & VARYING_BIT_PRIMITIVE_ID)
7237ec681f3Smrg         key->gs.primitive_id = 1;
7247ec681f3Smrg   } else if (stage == PIPE_SHADER_FRAGMENT) {
7257ec681f3Smrg      key->fs.missing_dual_src_outputs = sel_ctx->missing_dual_src_outputs;
7267ec681f3Smrg      key->fs.frag_result_color_lowering = sel_ctx->frag_result_color_lowering;
7277ec681f3Smrg      key->fs.manual_depth_range = sel_ctx->manual_depth_range;
7287ec681f3Smrg      key->fs.polygon_stipple = sel_ctx->ctx->pstipple.enabled;
7297ec681f3Smrg      if (sel_ctx->ctx->gfx_pipeline_state.blend &&
7307ec681f3Smrg          sel_ctx->ctx->gfx_pipeline_state.blend->desc.RenderTarget[0].LogicOpEnable &&
7317ec681f3Smrg          !sel_ctx->ctx->gfx_pipeline_state.has_float_rtv) {
7327ec681f3Smrg         key->fs.cast_to_uint = util_format_is_unorm(sel_ctx->ctx->fb.cbufs[0]->format);
7337ec681f3Smrg         key->fs.cast_to_int = !key->fs.cast_to_uint;
7347ec681f3Smrg      }
7357ec681f3Smrg   }
7367ec681f3Smrg
7377ec681f3Smrg   if (sel->samples_int_textures) {
7387ec681f3Smrg      key->samples_int_textures = sel->samples_int_textures;
7397ec681f3Smrg      key->n_texture_states = sel_ctx->ctx->num_sampler_views[stage];
7407ec681f3Smrg      /* Copy only states with integer textures */
7417ec681f3Smrg      for(int i = 0; i < key->n_texture_states; ++i) {
7427ec681f3Smrg         auto& wrap_state = sel_ctx->ctx->tex_wrap_states[stage][i];
7437ec681f3Smrg         if (wrap_state.is_int_sampler) {
7447ec681f3Smrg            memcpy(&key->tex_wrap_states[i], &wrap_state, sizeof(wrap_state));
7457ec681f3Smrg            key->swizzle_state[i] = sel_ctx->ctx->tex_swizzle_state[stage][i];
7467ec681f3Smrg         }
7477ec681f3Smrg      }
7487ec681f3Smrg   }
7497ec681f3Smrg
7507ec681f3Smrg   for (unsigned i = 0; i < sel_ctx->ctx->num_samplers[stage]; ++i) {
7517ec681f3Smrg      if (!sel_ctx->ctx->samplers[stage][i] ||
7527ec681f3Smrg          sel_ctx->ctx->samplers[stage][i]->filter == PIPE_TEX_FILTER_NEAREST)
7537ec681f3Smrg         continue;
7547ec681f3Smrg
7557ec681f3Smrg      if (sel_ctx->ctx->samplers[stage][i]->wrap_r == PIPE_TEX_WRAP_CLAMP)
7567ec681f3Smrg         key->tex_saturate_r |= 1 << i;
7577ec681f3Smrg      if (sel_ctx->ctx->samplers[stage][i]->wrap_s == PIPE_TEX_WRAP_CLAMP)
7587ec681f3Smrg         key->tex_saturate_s |= 1 << i;
7597ec681f3Smrg      if (sel_ctx->ctx->samplers[stage][i]->wrap_t == PIPE_TEX_WRAP_CLAMP)
7607ec681f3Smrg         key->tex_saturate_t |= 1 << i;
7617ec681f3Smrg   }
7627ec681f3Smrg
7637ec681f3Smrg   if (sel->compare_with_lod_bias_grad) {
7647ec681f3Smrg      key->n_texture_states = sel_ctx->ctx->num_sampler_views[stage];
7657ec681f3Smrg      memcpy(key->sampler_compare_funcs, sel_ctx->ctx->tex_compare_func[stage],
7667ec681f3Smrg             key->n_texture_states * sizeof(enum compare_func));
7677ec681f3Smrg      memcpy(key->swizzle_state, sel_ctx->ctx->tex_swizzle_state[stage],
7687ec681f3Smrg             key->n_texture_states * sizeof(dxil_texture_swizzle_state));
7697ec681f3Smrg   }
7707ec681f3Smrg
7717ec681f3Smrg   if (stage == PIPE_SHADER_VERTEX && sel_ctx->ctx->gfx_pipeline_state.ves) {
7727ec681f3Smrg      key->vs.needs_format_emulation = sel_ctx->ctx->gfx_pipeline_state.ves->needs_format_emulation;
7737ec681f3Smrg      if (key->vs.needs_format_emulation) {
7747ec681f3Smrg         memcpy(key->vs.format_conversion, sel_ctx->ctx->gfx_pipeline_state.ves->format_conversion,
7757ec681f3Smrg                sel_ctx->ctx->gfx_pipeline_state.ves->num_elements * sizeof(enum pipe_format));
7767ec681f3Smrg      }
7777ec681f3Smrg   }
7787ec681f3Smrg
7797ec681f3Smrg   if (stage == PIPE_SHADER_FRAGMENT &&
7807ec681f3Smrg       sel_ctx->ctx->gfx_stages[PIPE_SHADER_GEOMETRY] &&
7817ec681f3Smrg       sel_ctx->ctx->gfx_stages[PIPE_SHADER_GEOMETRY]->is_gs_variant &&
7827ec681f3Smrg       sel_ctx->ctx->gfx_stages[PIPE_SHADER_GEOMETRY]->gs_key.has_front_face) {
7837ec681f3Smrg      key->fs.remap_front_facing = 1;
7847ec681f3Smrg   }
7857ec681f3Smrg}
7867ec681f3Smrg
7877ec681f3Smrgstatic void
7887ec681f3Smrgselect_shader_variant(struct d3d12_selection_context *sel_ctx, d3d12_shader_selector *sel,
7897ec681f3Smrg                     d3d12_shader_selector *prev, d3d12_shader_selector *next)
7907ec681f3Smrg{
7917ec681f3Smrg   struct d3d12_context *ctx = sel_ctx->ctx;
7927ec681f3Smrg   d3d12_shader_key key;
7937ec681f3Smrg   nir_shader *new_nir_variant;
7947ec681f3Smrg   unsigned pstipple_binding = UINT32_MAX;
7957ec681f3Smrg
7967ec681f3Smrg   d3d12_fill_shader_key(sel_ctx, &key, sel, prev, next);
7977ec681f3Smrg
7987ec681f3Smrg   /* Check for an existing variant */
7997ec681f3Smrg   for (d3d12_shader *variant = sel->first; variant;
8007ec681f3Smrg        variant = variant->next_variant) {
8017ec681f3Smrg
8027ec681f3Smrg      if (d3d12_compare_shader_keys(&key, &variant->key)) {
8037ec681f3Smrg         sel->current = variant;
8047ec681f3Smrg         return;
8057ec681f3Smrg      }
8067ec681f3Smrg   }
8077ec681f3Smrg
8087ec681f3Smrg   /* Clone the NIR shader */
8097ec681f3Smrg   new_nir_variant = nir_shader_clone(sel, sel->initial);
8107ec681f3Smrg
8117ec681f3Smrg   /* Apply any needed lowering passes */
8127ec681f3Smrg   if (key.gs.writes_psize) {
8137ec681f3Smrg      NIR_PASS_V(new_nir_variant, d3d12_lower_point_sprite,
8147ec681f3Smrg                 !key.gs.sprite_origin_upper_left,
8157ec681f3Smrg                 key.gs.point_size_per_vertex,
8167ec681f3Smrg                 key.gs.sprite_coord_enable,
8177ec681f3Smrg                 key.next_varying_inputs);
8187ec681f3Smrg
8197ec681f3Smrg      nir_function_impl *impl = nir_shader_get_entrypoint(new_nir_variant);
8207ec681f3Smrg      nir_shader_gather_info(new_nir_variant, impl);
8217ec681f3Smrg   }
8227ec681f3Smrg
8237ec681f3Smrg   if (key.gs.primitive_id) {
8247ec681f3Smrg      NIR_PASS_V(new_nir_variant, d3d12_lower_primitive_id);
8257ec681f3Smrg
8267ec681f3Smrg      nir_function_impl *impl = nir_shader_get_entrypoint(new_nir_variant);
8277ec681f3Smrg      nir_shader_gather_info(new_nir_variant, impl);
8287ec681f3Smrg   }
8297ec681f3Smrg
8307ec681f3Smrg   if (key.gs.triangle_strip)
8317ec681f3Smrg      NIR_PASS_V(new_nir_variant, d3d12_lower_triangle_strip);
8327ec681f3Smrg
8337ec681f3Smrg   if (key.fs.polygon_stipple) {
8347ec681f3Smrg      NIR_PASS_V(new_nir_variant, nir_lower_pstipple_fs,
8357ec681f3Smrg                 &pstipple_binding, 0, false);
8367ec681f3Smrg
8377ec681f3Smrg      nir_function_impl *impl = nir_shader_get_entrypoint(new_nir_variant);
8387ec681f3Smrg      nir_shader_gather_info(new_nir_variant, impl);
8397ec681f3Smrg   }
8407ec681f3Smrg
8417ec681f3Smrg   if (key.fs.remap_front_facing) {
8427ec681f3Smrg      d3d12_forward_front_face(new_nir_variant);
8437ec681f3Smrg
8447ec681f3Smrg      nir_function_impl *impl = nir_shader_get_entrypoint(new_nir_variant);
8457ec681f3Smrg      nir_shader_gather_info(new_nir_variant, impl);
8467ec681f3Smrg   }
8477ec681f3Smrg
8487ec681f3Smrg   if (key.fs.missing_dual_src_outputs) {
8497ec681f3Smrg      NIR_PASS_V(new_nir_variant, d3d12_add_missing_dual_src_target,
8507ec681f3Smrg                 key.fs.missing_dual_src_outputs);
8517ec681f3Smrg   } else if (key.fs.frag_result_color_lowering) {
8527ec681f3Smrg      NIR_PASS_V(new_nir_variant, nir_lower_fragcolor,
8537ec681f3Smrg                 key.fs.frag_result_color_lowering);
8547ec681f3Smrg   }
8557ec681f3Smrg
8567ec681f3Smrg   if (key.fs.manual_depth_range)
8577ec681f3Smrg      NIR_PASS_V(new_nir_variant, d3d12_lower_depth_range);
8587ec681f3Smrg
8597ec681f3Smrg   if (sel->compare_with_lod_bias_grad)
8607ec681f3Smrg      NIR_PASS_V(new_nir_variant, d3d12_lower_sample_tex_compare, key.n_texture_states,
8617ec681f3Smrg                 key.sampler_compare_funcs, key.swizzle_state);
8627ec681f3Smrg
8637ec681f3Smrg   if (key.fs.cast_to_uint)
8647ec681f3Smrg      NIR_PASS_V(new_nir_variant, d3d12_lower_uint_cast, false);
8657ec681f3Smrg   if (key.fs.cast_to_int)
8667ec681f3Smrg      NIR_PASS_V(new_nir_variant, d3d12_lower_uint_cast, true);
8677ec681f3Smrg
8687ec681f3Smrg   {
8697ec681f3Smrg      struct nir_lower_tex_options tex_options = { };
8707ec681f3Smrg      tex_options.lower_txp = ~0u; /* No equivalent for textureProj */
8717ec681f3Smrg      tex_options.lower_rect = true;
8727ec681f3Smrg      tex_options.lower_rect_offset = true;
8737ec681f3Smrg      tex_options.saturate_s = key.tex_saturate_s;
8747ec681f3Smrg      tex_options.saturate_r = key.tex_saturate_r;
8757ec681f3Smrg      tex_options.saturate_t = key.tex_saturate_t;
8767ec681f3Smrg
8777ec681f3Smrg      NIR_PASS_V(new_nir_variant, nir_lower_tex, &tex_options);
8787ec681f3Smrg   }
8797ec681f3Smrg
8807ec681f3Smrg   /* Add the needed in and outputs, and re-sort */
8817ec681f3Smrg   uint64_t mask = key.required_varying_inputs.mask & ~new_nir_variant->info.inputs_read;
8827ec681f3Smrg
8837ec681f3Smrg   if (prev) {
8847ec681f3Smrg      while (mask) {
8857ec681f3Smrg         int slot = u_bit_scan64(&mask);
8867ec681f3Smrg         create_varying_from_info(new_nir_variant, &key.required_varying_inputs, slot, nir_var_shader_in);
8877ec681f3Smrg      }
8887ec681f3Smrg      dxil_reassign_driver_locations(new_nir_variant, nir_var_shader_in,
8897ec681f3Smrg                                      key.prev_varying_outputs);
8907ec681f3Smrg   }
8917ec681f3Smrg
8927ec681f3Smrg   mask = key.required_varying_outputs.mask & ~new_nir_variant->info.outputs_written;
8937ec681f3Smrg
8947ec681f3Smrg   if (next) {
8957ec681f3Smrg      while (mask) {
8967ec681f3Smrg         int slot = u_bit_scan64(&mask);
8977ec681f3Smrg         create_varying_from_info(new_nir_variant, &key.required_varying_outputs, slot, nir_var_shader_out);
8987ec681f3Smrg      }
8997ec681f3Smrg      dxil_reassign_driver_locations(new_nir_variant, nir_var_shader_out,
9007ec681f3Smrg                                      key.next_varying_inputs);
9017ec681f3Smrg   }
9027ec681f3Smrg
9037ec681f3Smrg   d3d12_shader *new_variant = compile_nir(ctx, sel, &key, new_nir_variant);
9047ec681f3Smrg   assert(new_variant);
9057ec681f3Smrg
9067ec681f3Smrg   /* keep track of polygon stipple texture binding */
9077ec681f3Smrg   new_variant->pstipple_binding = pstipple_binding;
9087ec681f3Smrg
9097ec681f3Smrg   /* prepend the new shader in the selector chain and pick it */
9107ec681f3Smrg   new_variant->next_variant = sel->first;
9117ec681f3Smrg   sel->current = sel->first = new_variant;
9127ec681f3Smrg}
9137ec681f3Smrg
9147ec681f3Smrgstatic d3d12_shader_selector *
9157ec681f3Smrgget_prev_shader(struct d3d12_context *ctx, pipe_shader_type current)
9167ec681f3Smrg{
9177ec681f3Smrg   /* No TESS_CTRL or TESS_EVAL yet */
9187ec681f3Smrg
9197ec681f3Smrg   switch (current) {
9207ec681f3Smrg   case PIPE_SHADER_VERTEX:
9217ec681f3Smrg      return NULL;
9227ec681f3Smrg   case PIPE_SHADER_FRAGMENT:
9237ec681f3Smrg      if (ctx->gfx_stages[PIPE_SHADER_GEOMETRY])
9247ec681f3Smrg         return ctx->gfx_stages[PIPE_SHADER_GEOMETRY];
9257ec681f3Smrg      FALLTHROUGH;
9267ec681f3Smrg   case PIPE_SHADER_GEOMETRY:
9277ec681f3Smrg      return ctx->gfx_stages[PIPE_SHADER_VERTEX];
9287ec681f3Smrg   default:
9297ec681f3Smrg      unreachable("shader type not supported");
9307ec681f3Smrg   }
9317ec681f3Smrg}
9327ec681f3Smrg
9337ec681f3Smrgstatic d3d12_shader_selector *
9347ec681f3Smrgget_next_shader(struct d3d12_context *ctx, pipe_shader_type current)
9357ec681f3Smrg{
9367ec681f3Smrg   /* No TESS_CTRL or TESS_EVAL yet */
9377ec681f3Smrg
9387ec681f3Smrg   switch (current) {
9397ec681f3Smrg   case PIPE_SHADER_VERTEX:
9407ec681f3Smrg      if (ctx->gfx_stages[PIPE_SHADER_GEOMETRY])
9417ec681f3Smrg         return ctx->gfx_stages[PIPE_SHADER_GEOMETRY];
9427ec681f3Smrg      FALLTHROUGH;
9437ec681f3Smrg   case PIPE_SHADER_GEOMETRY:
9447ec681f3Smrg      return ctx->gfx_stages[PIPE_SHADER_FRAGMENT];
9457ec681f3Smrg   case PIPE_SHADER_FRAGMENT:
9467ec681f3Smrg      return NULL;
9477ec681f3Smrg   default:
9487ec681f3Smrg      unreachable("shader type not supported");
9497ec681f3Smrg   }
9507ec681f3Smrg}
9517ec681f3Smrg
9527ec681f3Smrgenum tex_scan_flags {
9537ec681f3Smrg   TEX_SAMPLE_INTEGER_TEXTURE = 1 << 0,
9547ec681f3Smrg   TEX_CMP_WITH_LOD_BIAS_GRAD = 1 << 1,
9557ec681f3Smrg   TEX_SCAN_ALL_FLAGS         = (1 << 2) - 1
9567ec681f3Smrg};
9577ec681f3Smrg
9587ec681f3Smrgstatic unsigned
9597ec681f3Smrgscan_texture_use(nir_shader *nir)
9607ec681f3Smrg{
9617ec681f3Smrg   unsigned result = 0;
9627ec681f3Smrg   nir_foreach_function(func, nir) {
9637ec681f3Smrg      nir_foreach_block(block, func->impl) {
9647ec681f3Smrg         nir_foreach_instr(instr, block) {
9657ec681f3Smrg            if (instr->type == nir_instr_type_tex) {
9667ec681f3Smrg               auto tex = nir_instr_as_tex(instr);
9677ec681f3Smrg               switch (tex->op) {
9687ec681f3Smrg               case nir_texop_txb:
9697ec681f3Smrg               case nir_texop_txl:
9707ec681f3Smrg               case nir_texop_txd:
9717ec681f3Smrg                  if (tex->is_shadow)
9727ec681f3Smrg                     result |= TEX_CMP_WITH_LOD_BIAS_GRAD;
9737ec681f3Smrg                  FALLTHROUGH;
9747ec681f3Smrg               case nir_texop_tex:
9757ec681f3Smrg                  if (tex->dest_type & (nir_type_int | nir_type_uint))
9767ec681f3Smrg                     result |= TEX_SAMPLE_INTEGER_TEXTURE;
9777ec681f3Smrg               default:
9787ec681f3Smrg                  ;
9797ec681f3Smrg               }
9807ec681f3Smrg            }
9817ec681f3Smrg            if (TEX_SCAN_ALL_FLAGS == result)
9827ec681f3Smrg               return result;
9837ec681f3Smrg         }
9847ec681f3Smrg      }
9857ec681f3Smrg   }
9867ec681f3Smrg   return result;
9877ec681f3Smrg}
9887ec681f3Smrg
9897ec681f3Smrgstatic uint64_t
9907ec681f3Smrgupdate_so_info(struct pipe_stream_output_info *so_info,
9917ec681f3Smrg               uint64_t outputs_written)
9927ec681f3Smrg{
9937ec681f3Smrg   uint64_t so_outputs = 0;
9947ec681f3Smrg   uint8_t reverse_map[64] = {0};
9957ec681f3Smrg   unsigned slot = 0;
9967ec681f3Smrg
9977ec681f3Smrg   while (outputs_written)
9987ec681f3Smrg      reverse_map[slot++] = u_bit_scan64(&outputs_written);
9997ec681f3Smrg
10007ec681f3Smrg   for (unsigned i = 0; i < so_info->num_outputs; i++) {
10017ec681f3Smrg      struct pipe_stream_output *output = &so_info->output[i];
10027ec681f3Smrg
10037ec681f3Smrg      /* Map Gallium's condensed "slots" back to real VARYING_SLOT_* enums */
10047ec681f3Smrg      output->register_index = reverse_map[output->register_index];
10057ec681f3Smrg
10067ec681f3Smrg      so_outputs |= 1ull << output->register_index;
10077ec681f3Smrg   }
10087ec681f3Smrg
10097ec681f3Smrg   return so_outputs;
10107ec681f3Smrg}
10117ec681f3Smrg
10127ec681f3Smrgstruct d3d12_shader_selector *
10137ec681f3Smrgd3d12_create_shader(struct d3d12_context *ctx,
10147ec681f3Smrg                    pipe_shader_type stage,
10157ec681f3Smrg                    const struct pipe_shader_state *shader)
10167ec681f3Smrg{
10177ec681f3Smrg   struct d3d12_shader_selector *sel = rzalloc(nullptr, d3d12_shader_selector);
10187ec681f3Smrg   sel->stage = stage;
10197ec681f3Smrg
10207ec681f3Smrg   struct nir_shader *nir = NULL;
10217ec681f3Smrg
10227ec681f3Smrg   if (shader->type == PIPE_SHADER_IR_NIR) {
10237ec681f3Smrg      nir = (nir_shader *)shader->ir.nir;
10247ec681f3Smrg   } else {
10257ec681f3Smrg      assert(shader->type == PIPE_SHADER_IR_TGSI);
10267ec681f3Smrg      nir = tgsi_to_nir(shader->tokens, ctx->base.screen, false);
10277ec681f3Smrg   }
10287ec681f3Smrg
10297ec681f3Smrg   nir_shader_gather_info(nir, nir_shader_get_entrypoint(nir));
10307ec681f3Smrg
10317ec681f3Smrg   unsigned tex_scan_result = scan_texture_use(nir);
10327ec681f3Smrg   sel->samples_int_textures = (tex_scan_result & TEX_SAMPLE_INTEGER_TEXTURE) != 0;
10337ec681f3Smrg   sel->compare_with_lod_bias_grad = (tex_scan_result & TEX_CMP_WITH_LOD_BIAS_GRAD) != 0;
10347ec681f3Smrg
10357ec681f3Smrg   memcpy(&sel->so_info, &shader->stream_output, sizeof(sel->so_info));
10367ec681f3Smrg   update_so_info(&sel->so_info, nir->info.outputs_written);
10377ec681f3Smrg
10387ec681f3Smrg   assert(nir != NULL);
10397ec681f3Smrg   d3d12_shader_selector *prev = get_prev_shader(ctx, sel->stage);
10407ec681f3Smrg   d3d12_shader_selector *next = get_next_shader(ctx, sel->stage);
10417ec681f3Smrg
10427ec681f3Smrg   uint64_t in_mask = nir->info.stage == MESA_SHADER_VERTEX ?
10437ec681f3Smrg                         0 : VARYING_BIT_PRIMITIVE_ID;
10447ec681f3Smrg
10457ec681f3Smrg   uint64_t out_mask = nir->info.stage == MESA_SHADER_FRAGMENT ?
10467ec681f3Smrg                          (1ull << FRAG_RESULT_STENCIL) :
10477ec681f3Smrg                          VARYING_BIT_PRIMITIVE_ID;
10487ec681f3Smrg
10497ec681f3Smrg   d3d12_fix_io_uint_type(nir, in_mask, out_mask);
10507ec681f3Smrg   NIR_PASS_V(nir, dxil_nir_split_clip_cull_distance);
10517ec681f3Smrg
10527ec681f3Smrg   if (nir->info.stage != MESA_SHADER_VERTEX)
10537ec681f3Smrg      nir->info.inputs_read =
10547ec681f3Smrg            dxil_reassign_driver_locations(nir, nir_var_shader_in,
10557ec681f3Smrg                                            prev ? prev->current->nir->info.outputs_written : 0);
10567ec681f3Smrg   else
10577ec681f3Smrg      nir->info.inputs_read = dxil_sort_by_driver_location(nir, nir_var_shader_in);
10587ec681f3Smrg
10597ec681f3Smrg   if (nir->info.stage != MESA_SHADER_FRAGMENT) {
10607ec681f3Smrg      nir->info.outputs_written =
10617ec681f3Smrg            dxil_reassign_driver_locations(nir, nir_var_shader_out,
10627ec681f3Smrg                                            next ? next->current->nir->info.inputs_read : 0);
10637ec681f3Smrg   } else {
10647ec681f3Smrg      NIR_PASS_V(nir, nir_lower_fragcoord_wtrans);
10657ec681f3Smrg      dxil_sort_ps_outputs(nir);
10667ec681f3Smrg   }
10677ec681f3Smrg
10687ec681f3Smrg   /* Integer cube maps are not supported in DirectX because sampling is not supported
10697ec681f3Smrg    * on integer textures and TextureLoad is not supported for cube maps, so we have to
10707ec681f3Smrg    * lower integer cube maps to be handled like 2D textures arrays*/
10717ec681f3Smrg   NIR_PASS_V(nir, d3d12_lower_int_cubmap_to_array);
10727ec681f3Smrg
10737ec681f3Smrg   /* Keep this initial shader as the blue print for possible variants */
10747ec681f3Smrg   sel->initial = nir;
10757ec681f3Smrg
10767ec681f3Smrg   /*
10777ec681f3Smrg    * We must compile some shader here, because if the previous or a next shaders exists later
10787ec681f3Smrg    * when the shaders are bound, then the key evaluation in the shader selector will access
10797ec681f3Smrg    * the current variant of these  prev and next shader, and we can only assign
10807ec681f3Smrg    * a current variant when it has been successfully compiled.
10817ec681f3Smrg    *
10827ec681f3Smrg    * For shaders that require lowering because certain instructions are not available
10837ec681f3Smrg    * and their emulation is state depended (like sampling an integer texture that must be
10847ec681f3Smrg    * emulated and needs handling of boundary conditions, or shadow compare sampling with LOD),
10857ec681f3Smrg    * we must go through the shader selector here to create a compilable variant.
10867ec681f3Smrg    * For shaders that are not depended on the state this is just compiling the original
10877ec681f3Smrg    * shader.
10887ec681f3Smrg    *
10897ec681f3Smrg    * TODO: get rid of having to compiling the shader here if it can be forseen that it will
10907ec681f3Smrg    * be thrown away (i.e. it depends on states that are likely to change before the shader is
10917ec681f3Smrg    * used for the first time)
10927ec681f3Smrg    */
10937ec681f3Smrg   struct d3d12_selection_context sel_ctx = {0};
10947ec681f3Smrg   sel_ctx.ctx = ctx;
10957ec681f3Smrg   select_shader_variant(&sel_ctx, sel, prev, next);
10967ec681f3Smrg
10977ec681f3Smrg   if (!sel->current) {
10987ec681f3Smrg      ralloc_free(sel);
10997ec681f3Smrg      return NULL;
11007ec681f3Smrg   }
11017ec681f3Smrg
11027ec681f3Smrg   return sel;
11037ec681f3Smrg}
11047ec681f3Smrg
11057ec681f3Smrgvoid
11067ec681f3Smrgd3d12_select_shader_variants(struct d3d12_context *ctx, const struct pipe_draw_info *dinfo)
11077ec681f3Smrg{
11087ec681f3Smrg   static unsigned order[] = {PIPE_SHADER_VERTEX, PIPE_SHADER_GEOMETRY, PIPE_SHADER_FRAGMENT};
11097ec681f3Smrg   struct d3d12_selection_context sel_ctx;
11107ec681f3Smrg
11117ec681f3Smrg   sel_ctx.ctx = ctx;
11127ec681f3Smrg   sel_ctx.dinfo = dinfo;
11137ec681f3Smrg   sel_ctx.needs_point_sprite_lowering = needs_point_sprite_lowering(ctx, dinfo);
11147ec681f3Smrg   sel_ctx.fill_mode_lowered = fill_mode_lowered(ctx, dinfo);
11157ec681f3Smrg   sel_ctx.cull_mode_lowered = cull_mode_lowered(ctx, sel_ctx.fill_mode_lowered);
11167ec681f3Smrg   sel_ctx.provoking_vertex = get_provoking_vertex(&sel_ctx, &sel_ctx.alternate_tri);
11177ec681f3Smrg   sel_ctx.needs_vertex_reordering = needs_vertex_reordering(&sel_ctx);
11187ec681f3Smrg   sel_ctx.missing_dual_src_outputs = missing_dual_src_outputs(ctx);
11197ec681f3Smrg   sel_ctx.frag_result_color_lowering = frag_result_color_lowering(ctx);
11207ec681f3Smrg   sel_ctx.manual_depth_range = manual_depth_range(ctx);
11217ec681f3Smrg
11227ec681f3Smrg   validate_geometry_shader_variant(&sel_ctx);
11237ec681f3Smrg
11247ec681f3Smrg   for (unsigned i = 0; i < ARRAY_SIZE(order); ++i) {
11257ec681f3Smrg      auto sel = ctx->gfx_stages[order[i]];
11267ec681f3Smrg      if (!sel)
11277ec681f3Smrg         continue;
11287ec681f3Smrg
11297ec681f3Smrg      d3d12_shader_selector *prev = get_prev_shader(ctx, sel->stage);
11307ec681f3Smrg      d3d12_shader_selector *next = get_next_shader(ctx, sel->stage);
11317ec681f3Smrg
11327ec681f3Smrg      select_shader_variant(&sel_ctx, sel, prev, next);
11337ec681f3Smrg   }
11347ec681f3Smrg}
11357ec681f3Smrg
11367ec681f3Smrgvoid
11377ec681f3Smrgd3d12_shader_free(struct d3d12_shader_selector *sel)
11387ec681f3Smrg{
11397ec681f3Smrg   auto shader = sel->first;
11407ec681f3Smrg   while (shader) {
11417ec681f3Smrg      free(shader->bytecode);
11427ec681f3Smrg      shader = shader->next_variant;
11437ec681f3Smrg   }
11447ec681f3Smrg   ralloc_free(sel->initial);
11457ec681f3Smrg   ralloc_free(sel);
11467ec681f3Smrg}
11477ec681f3Smrg
11487ec681f3Smrg#ifdef _WIN32
11497ec681f3Smrg// Used to get path to self
11507ec681f3Smrgextern "C" extern IMAGE_DOS_HEADER __ImageBase;
11517ec681f3Smrg#endif
11527ec681f3Smrg
11537ec681f3Smrgvoid d3d12_validation_tools::load_dxil_dll()
11547ec681f3Smrg{
11557ec681f3Smrg   if (!dxil_module.load(UTIL_DL_PREFIX "dxil" UTIL_DL_EXT)) {
11567ec681f3Smrg#ifdef _WIN32
11577ec681f3Smrg      char selfPath[MAX_PATH] = "";
11587ec681f3Smrg      uint32_t pathSize = GetModuleFileNameA((HINSTANCE)&__ImageBase, selfPath, sizeof(selfPath));
11597ec681f3Smrg      if (pathSize == 0 || pathSize == sizeof(selfPath)) {
11607ec681f3Smrg         debug_printf("D3D12: Unable to get path to self");
11617ec681f3Smrg         return;
11627ec681f3Smrg      }
11637ec681f3Smrg
11647ec681f3Smrg      auto lastSlash = strrchr(selfPath, '\\');
11657ec681f3Smrg      if (!lastSlash) {
11667ec681f3Smrg         debug_printf("D3D12: Unable to get path to self");
11677ec681f3Smrg         return;
11687ec681f3Smrg      }
11697ec681f3Smrg
11707ec681f3Smrg      *(lastSlash + 1) = '\0';
11717ec681f3Smrg      if (strcat_s(selfPath, "dxil.dll") != 0) {
11727ec681f3Smrg         debug_printf("D3D12: Unable to get path to dxil.dll next to self");
11737ec681f3Smrg         return;
11747ec681f3Smrg      }
11757ec681f3Smrg
11767ec681f3Smrg      dxil_module.load(selfPath);
11777ec681f3Smrg#endif
11787ec681f3Smrg   }
11797ec681f3Smrg}
11807ec681f3Smrg
11817ec681f3Smrgd3d12_validation_tools::d3d12_validation_tools()
11827ec681f3Smrg{
11837ec681f3Smrg   load_dxil_dll();
11847ec681f3Smrg   DxcCreateInstanceProc dxil_create_func = (DxcCreateInstanceProc)util_dl_get_proc_address(dxil_module, "DxcCreateInstance");
11857ec681f3Smrg
11867ec681f3Smrg   if (dxil_create_func) {
11877ec681f3Smrg      HRESULT hr = dxil_create_func(CLSID_DxcValidator,  IID_PPV_ARGS(&validator));
11887ec681f3Smrg      if (FAILED(hr)) {
11897ec681f3Smrg         debug_printf("D3D12: Unable to create validator\n");
11907ec681f3Smrg      }
11917ec681f3Smrg   }
11927ec681f3Smrg#ifdef _WIN32
11937ec681f3Smrg   else if (!(d3d12_debug & D3D12_DEBUG_EXPERIMENTAL)) {
11947ec681f3Smrg      debug_printf("D3D12: Unable to load DXIL.dll\n");
11957ec681f3Smrg   }
11967ec681f3Smrg#endif
11977ec681f3Smrg
11987ec681f3Smrg   DxcCreateInstanceProc compiler_create_func  = nullptr;
11997ec681f3Smrg   if(dxc_compiler_module.load("dxcompiler.dll"))
12007ec681f3Smrg      compiler_create_func = (DxcCreateInstanceProc)util_dl_get_proc_address(dxc_compiler_module, "DxcCreateInstance");
12017ec681f3Smrg
12027ec681f3Smrg   if (compiler_create_func) {
12037ec681f3Smrg      HRESULT hr = compiler_create_func(CLSID_DxcLibrary, IID_PPV_ARGS(&library));
12047ec681f3Smrg      if (FAILED(hr)) {
12057ec681f3Smrg         debug_printf("D3D12: Unable to create library instance: %x\n", hr);
12067ec681f3Smrg      }
12077ec681f3Smrg
12087ec681f3Smrg      if (d3d12_debug & D3D12_DEBUG_DISASS) {
12097ec681f3Smrg         hr = compiler_create_func(CLSID_DxcCompiler, IID_PPV_ARGS(&compiler));
12107ec681f3Smrg         if (FAILED(hr)) {
12117ec681f3Smrg            debug_printf("D3D12: Unable to create compiler instance\n");
12127ec681f3Smrg         }
12137ec681f3Smrg      }
12147ec681f3Smrg   } else if (d3d12_debug & D3D12_DEBUG_DISASS) {
12157ec681f3Smrg      debug_printf("D3D12: Disassembly requested but compiler couldn't be loaded\n");
12167ec681f3Smrg   }
12177ec681f3Smrg}
12187ec681f3Smrg
12197ec681f3Smrgd3d12_validation_tools::HModule::HModule():
12207ec681f3Smrg   module(0)
12217ec681f3Smrg{
12227ec681f3Smrg}
12237ec681f3Smrg
12247ec681f3Smrgd3d12_validation_tools::HModule::~HModule()
12257ec681f3Smrg{
12267ec681f3Smrg   if (module)
12277ec681f3Smrg      util_dl_close(module);
12287ec681f3Smrg}
12297ec681f3Smrg
12307ec681f3Smrginline
12317ec681f3Smrgd3d12_validation_tools::HModule::operator util_dl_library * () const
12327ec681f3Smrg{
12337ec681f3Smrg   return module;
12347ec681f3Smrg}
12357ec681f3Smrg
12367ec681f3Smrgbool
12377ec681f3Smrgd3d12_validation_tools::HModule::load(LPCSTR file_name)
12387ec681f3Smrg{
12397ec681f3Smrg   module = util_dl_open(file_name);
12407ec681f3Smrg   return module != nullptr;
12417ec681f3Smrg}
12427ec681f3Smrg
12437ec681f3Smrg
12447ec681f3Smrgclass ShaderBlob : public IDxcBlob {
12457ec681f3Smrgpublic:
12467ec681f3Smrg   ShaderBlob(blob* data) : m_data(data) {}
12477ec681f3Smrg
12487ec681f3Smrg   LPVOID STDMETHODCALLTYPE GetBufferPointer(void) override { return m_data->data; }
12497ec681f3Smrg
12507ec681f3Smrg   SIZE_T STDMETHODCALLTYPE GetBufferSize() override { return m_data->size; }
12517ec681f3Smrg
12527ec681f3Smrg   HRESULT STDMETHODCALLTYPE QueryInterface(REFIID, void**) override { return E_NOINTERFACE; }
12537ec681f3Smrg
12547ec681f3Smrg   ULONG STDMETHODCALLTYPE AddRef() override { return 1; }
12557ec681f3Smrg
12567ec681f3Smrg   ULONG STDMETHODCALLTYPE Release() override { return 0; }
12577ec681f3Smrg
12587ec681f3Smrg   blob* m_data;
12597ec681f3Smrg};
12607ec681f3Smrg
12617ec681f3Smrgbool d3d12_validation_tools::validate_and_sign(struct blob *dxil)
12627ec681f3Smrg{
12637ec681f3Smrg   ShaderBlob source(dxil);
12647ec681f3Smrg
12657ec681f3Smrg   ComPtr<IDxcOperationResult> result;
12667ec681f3Smrg
12677ec681f3Smrg   validator->Validate(&source, DxcValidatorFlags_InPlaceEdit, &result);
12687ec681f3Smrg   HRESULT validationStatus;
12697ec681f3Smrg   result->GetStatus(&validationStatus);
12707ec681f3Smrg   if (FAILED(validationStatus) && library) {
12717ec681f3Smrg      ComPtr<IDxcBlobEncoding> printBlob, printBlobUtf8;
12727ec681f3Smrg      result->GetErrorBuffer(&printBlob);
12737ec681f3Smrg      library->GetBlobAsUtf8(printBlob.Get(), printBlobUtf8.GetAddressOf());
12747ec681f3Smrg
12757ec681f3Smrg      char *errorString;
12767ec681f3Smrg      if (printBlobUtf8) {
12777ec681f3Smrg         errorString = reinterpret_cast<char*>(printBlobUtf8->GetBufferPointer());
12787ec681f3Smrg
12797ec681f3Smrg         errorString[printBlobUtf8->GetBufferSize() - 1] = 0;
12807ec681f3Smrg         debug_printf("== VALIDATION ERROR =============================================\n%s\n"
12817ec681f3Smrg                     "== END ==========================================================\n",
12827ec681f3Smrg                     errorString);
12837ec681f3Smrg      }
12847ec681f3Smrg
12857ec681f3Smrg      return false;
12867ec681f3Smrg   }
12877ec681f3Smrg   return true;
12887ec681f3Smrg
12897ec681f3Smrg}
12907ec681f3Smrg
12917ec681f3Smrgvoid d3d12_validation_tools::disassemble(struct blob *dxil)
12927ec681f3Smrg{
12937ec681f3Smrg   if (!compiler) {
12947ec681f3Smrg      fprintf(stderr, "D3D12: No Disassembler\n");
12957ec681f3Smrg      return;
12967ec681f3Smrg   }
12977ec681f3Smrg   ShaderBlob source(dxil);
12987ec681f3Smrg   IDxcBlobEncoding* pDisassembly = nullptr;
12997ec681f3Smrg
13007ec681f3Smrg   if (FAILED(compiler->Disassemble(&source, &pDisassembly))) {
13017ec681f3Smrg      fprintf(stderr, "D3D12: Disassembler failed\n");
13027ec681f3Smrg      return;
13037ec681f3Smrg   }
13047ec681f3Smrg
13057ec681f3Smrg   ComPtr<IDxcBlobEncoding> dissassably(pDisassembly);
13067ec681f3Smrg   ComPtr<IDxcBlobEncoding> blobUtf8;
13077ec681f3Smrg   library->GetBlobAsUtf8(pDisassembly, blobUtf8.GetAddressOf());
13087ec681f3Smrg   if (!blobUtf8) {
13097ec681f3Smrg      fprintf(stderr, "D3D12: Unable to get utf8 encoding\n");
13107ec681f3Smrg      return;
13117ec681f3Smrg   }
13127ec681f3Smrg
13137ec681f3Smrg   char *disassembly = reinterpret_cast<char*>(blobUtf8->GetBufferPointer());
13147ec681f3Smrg   disassembly[blobUtf8->GetBufferSize() - 1] = 0;
13157ec681f3Smrg
13167ec681f3Smrg   fprintf(stderr, "== BEGIN SHADER ============================================\n"
13177ec681f3Smrg           "%s\n"
13187ec681f3Smrg           "== END SHADER ==============================================\n",
13197ec681f3Smrg           disassembly);
13207ec681f3Smrg}
1321