17ec681f3Smrg/*
27ec681f3Smrg * Copyright © 2019 Raspberry Pi
37ec681f3Smrg *
47ec681f3Smrg * Permission is hereby granted, free of charge, to any person obtaining a
57ec681f3Smrg * copy of this software and associated documentation files (the "Software"),
67ec681f3Smrg * to deal in the Software without restriction, including without limitation
77ec681f3Smrg * the rights to use, copy, modify, merge, publish, distribute, sublicense,
87ec681f3Smrg * and/or sell copies of the Software, and to permit persons to whom the
97ec681f3Smrg * Software is furnished to do so, subject to the following conditions:
107ec681f3Smrg *
117ec681f3Smrg * The above copyright notice and this permission notice (including the next
127ec681f3Smrg * paragraph) shall be included in all copies or substantial portions of the
137ec681f3Smrg * Software.
147ec681f3Smrg *
157ec681f3Smrg * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
167ec681f3Smrg * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
177ec681f3Smrg * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT.  IN NO EVENT SHALL
187ec681f3Smrg * THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
197ec681f3Smrg * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING
207ec681f3Smrg * FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS
217ec681f3Smrg * IN THE SOFTWARE.
227ec681f3Smrg */
237ec681f3Smrg
247ec681f3Smrg#include "vk_util.h"
257ec681f3Smrg
267ec681f3Smrg#include "v3dv_debug.h"
277ec681f3Smrg#include "v3dv_private.h"
287ec681f3Smrg
297ec681f3Smrg#include "vk_format_info.h"
307ec681f3Smrg
317ec681f3Smrg#include "common/v3d_debug.h"
327ec681f3Smrg
337ec681f3Smrg#include "compiler/nir/nir_builder.h"
347ec681f3Smrg#include "nir/nir_serialize.h"
357ec681f3Smrg
367ec681f3Smrg#include "util/u_atomic.h"
377ec681f3Smrg#include "util/u_prim.h"
387ec681f3Smrg#include "util/os_time.h"
397ec681f3Smrg
407ec681f3Smrg#include "vulkan/util/vk_format.h"
417ec681f3Smrg
427ec681f3Smrgstatic VkResult
437ec681f3Smrgcompute_vpm_config(struct v3dv_pipeline *pipeline);
447ec681f3Smrg
457ec681f3Smrgvoid
467ec681f3Smrgv3dv_print_v3d_key(struct v3d_key *key,
477ec681f3Smrg                   uint32_t v3d_key_size)
487ec681f3Smrg{
497ec681f3Smrg   struct mesa_sha1 ctx;
507ec681f3Smrg   unsigned char sha1[20];
517ec681f3Smrg   char sha1buf[41];
527ec681f3Smrg
537ec681f3Smrg   _mesa_sha1_init(&ctx);
547ec681f3Smrg
557ec681f3Smrg   _mesa_sha1_update(&ctx, key, v3d_key_size);
567ec681f3Smrg
577ec681f3Smrg   _mesa_sha1_final(&ctx, sha1);
587ec681f3Smrg   _mesa_sha1_format(sha1buf, sha1);
597ec681f3Smrg
607ec681f3Smrg   fprintf(stderr, "key %p: %s\n", key, sha1buf);
617ec681f3Smrg}
627ec681f3Smrg
637ec681f3Smrgstatic void
647ec681f3Smrgpipeline_compute_sha1_from_nir(nir_shader *nir,
657ec681f3Smrg                               unsigned char sha1[20])
667ec681f3Smrg{
677ec681f3Smrg   assert(nir);
687ec681f3Smrg   struct blob blob;
697ec681f3Smrg   blob_init(&blob);
707ec681f3Smrg
717ec681f3Smrg   nir_serialize(&blob, nir, false);
727ec681f3Smrg   if (!blob.out_of_memory)
737ec681f3Smrg      _mesa_sha1_compute(blob.data, blob.size, sha1);
747ec681f3Smrg
757ec681f3Smrg   blob_finish(&blob);
767ec681f3Smrg}
777ec681f3Smrg
787ec681f3Smrgvoid
797ec681f3Smrgv3dv_shader_module_internal_init(struct v3dv_device *device,
807ec681f3Smrg                                 struct vk_shader_module *module,
817ec681f3Smrg                                 nir_shader *nir)
827ec681f3Smrg{
837ec681f3Smrg   vk_object_base_init(&device->vk, &module->base,
847ec681f3Smrg                       VK_OBJECT_TYPE_SHADER_MODULE);
857ec681f3Smrg   module->nir = nir;
867ec681f3Smrg   module->size = 0;
877ec681f3Smrg
887ec681f3Smrg   pipeline_compute_sha1_from_nir(nir, module->sha1);
897ec681f3Smrg}
907ec681f3Smrg
917ec681f3Smrgvoid
927ec681f3Smrgv3dv_shader_variant_destroy(struct v3dv_device *device,
937ec681f3Smrg                            struct v3dv_shader_variant *variant)
947ec681f3Smrg{
957ec681f3Smrg   /* The assembly BO is shared by all variants in the pipeline, so it can't
967ec681f3Smrg    * be freed here and should be freed with the pipeline
977ec681f3Smrg    */
987ec681f3Smrg   ralloc_free(variant->prog_data.base);
997ec681f3Smrg   vk_free(&device->vk.alloc, variant);
1007ec681f3Smrg}
1017ec681f3Smrg
1027ec681f3Smrgstatic void
1037ec681f3Smrgdestroy_pipeline_stage(struct v3dv_device *device,
1047ec681f3Smrg                       struct v3dv_pipeline_stage *p_stage,
1057ec681f3Smrg                       const VkAllocationCallbacks *pAllocator)
1067ec681f3Smrg{
1077ec681f3Smrg   if (!p_stage)
1087ec681f3Smrg      return;
1097ec681f3Smrg
1107ec681f3Smrg   ralloc_free(p_stage->nir);
1117ec681f3Smrg   vk_free2(&device->vk.alloc, pAllocator, p_stage);
1127ec681f3Smrg}
1137ec681f3Smrg
1147ec681f3Smrgstatic void
1157ec681f3Smrgpipeline_free_stages(struct v3dv_device *device,
1167ec681f3Smrg                     struct v3dv_pipeline *pipeline,
1177ec681f3Smrg                     const VkAllocationCallbacks *pAllocator)
1187ec681f3Smrg{
1197ec681f3Smrg   assert(pipeline);
1207ec681f3Smrg
1217ec681f3Smrg   /* FIXME: we can't just use a loop over mesa stage due the bin, would be
1227ec681f3Smrg    * good to find an alternative.
1237ec681f3Smrg    */
1247ec681f3Smrg   destroy_pipeline_stage(device, pipeline->vs, pAllocator);
1257ec681f3Smrg   destroy_pipeline_stage(device, pipeline->vs_bin, pAllocator);
1267ec681f3Smrg   destroy_pipeline_stage(device, pipeline->gs, pAllocator);
1277ec681f3Smrg   destroy_pipeline_stage(device, pipeline->gs_bin, pAllocator);
1287ec681f3Smrg   destroy_pipeline_stage(device, pipeline->fs, pAllocator);
1297ec681f3Smrg   destroy_pipeline_stage(device, pipeline->cs, pAllocator);
1307ec681f3Smrg
1317ec681f3Smrg   pipeline->vs = NULL;
1327ec681f3Smrg   pipeline->vs_bin = NULL;
1337ec681f3Smrg   pipeline->gs = NULL;
1347ec681f3Smrg   pipeline->gs_bin = NULL;
1357ec681f3Smrg   pipeline->fs = NULL;
1367ec681f3Smrg   pipeline->cs = NULL;
1377ec681f3Smrg}
1387ec681f3Smrg
1397ec681f3Smrgstatic void
1407ec681f3Smrgv3dv_destroy_pipeline(struct v3dv_pipeline *pipeline,
1417ec681f3Smrg                      struct v3dv_device *device,
1427ec681f3Smrg                      const VkAllocationCallbacks *pAllocator)
1437ec681f3Smrg{
1447ec681f3Smrg   if (!pipeline)
1457ec681f3Smrg      return;
1467ec681f3Smrg
1477ec681f3Smrg   pipeline_free_stages(device, pipeline, pAllocator);
1487ec681f3Smrg
1497ec681f3Smrg   if (pipeline->shared_data) {
1507ec681f3Smrg      v3dv_pipeline_shared_data_unref(device, pipeline->shared_data);
1517ec681f3Smrg      pipeline->shared_data = NULL;
1527ec681f3Smrg   }
1537ec681f3Smrg
1547ec681f3Smrg   if (pipeline->spill.bo) {
1557ec681f3Smrg      assert(pipeline->spill.size_per_thread > 0);
1567ec681f3Smrg      v3dv_bo_free(device, pipeline->spill.bo);
1577ec681f3Smrg   }
1587ec681f3Smrg
1597ec681f3Smrg   if (pipeline->default_attribute_values) {
1607ec681f3Smrg      v3dv_bo_free(device, pipeline->default_attribute_values);
1617ec681f3Smrg      pipeline->default_attribute_values = NULL;
1627ec681f3Smrg   }
1637ec681f3Smrg
1647ec681f3Smrg   vk_object_free(&device->vk, pAllocator, pipeline);
1657ec681f3Smrg}
1667ec681f3Smrg
1677ec681f3SmrgVKAPI_ATTR void VKAPI_CALL
1687ec681f3Smrgv3dv_DestroyPipeline(VkDevice _device,
1697ec681f3Smrg                     VkPipeline _pipeline,
1707ec681f3Smrg                     const VkAllocationCallbacks *pAllocator)
1717ec681f3Smrg{
1727ec681f3Smrg   V3DV_FROM_HANDLE(v3dv_device, device, _device);
1737ec681f3Smrg   V3DV_FROM_HANDLE(v3dv_pipeline, pipeline, _pipeline);
1747ec681f3Smrg
1757ec681f3Smrg   if (!pipeline)
1767ec681f3Smrg      return;
1777ec681f3Smrg
1787ec681f3Smrg   v3dv_destroy_pipeline(pipeline, device, pAllocator);
1797ec681f3Smrg}
1807ec681f3Smrg
1817ec681f3Smrgstatic const struct spirv_to_nir_options default_spirv_options =  {
1827ec681f3Smrg   .caps = {
1837ec681f3Smrg      .device_group = true,
1847ec681f3Smrg      .multiview = true,
1857ec681f3Smrg      .subgroup_basic = true,
1867ec681f3Smrg      .variable_pointers = true,
1877ec681f3Smrg    },
1887ec681f3Smrg   .ubo_addr_format = nir_address_format_32bit_index_offset,
1897ec681f3Smrg   .ssbo_addr_format = nir_address_format_32bit_index_offset,
1907ec681f3Smrg   .phys_ssbo_addr_format = nir_address_format_64bit_global,
1917ec681f3Smrg   .push_const_addr_format = nir_address_format_logical,
1927ec681f3Smrg   .shared_addr_format = nir_address_format_32bit_offset,
1937ec681f3Smrg};
1947ec681f3Smrg
1957ec681f3Smrgconst nir_shader_compiler_options v3dv_nir_options = {
1967ec681f3Smrg   .lower_uadd_sat = true,
1977ec681f3Smrg   .lower_iadd_sat = true,
1987ec681f3Smrg   .lower_all_io_to_temps = true,
1997ec681f3Smrg   .lower_extract_byte = true,
2007ec681f3Smrg   .lower_extract_word = true,
2017ec681f3Smrg   .lower_insert_byte = true,
2027ec681f3Smrg   .lower_insert_word = true,
2037ec681f3Smrg   .lower_bitfield_insert_to_shifts = true,
2047ec681f3Smrg   .lower_bitfield_extract_to_shifts = true,
2057ec681f3Smrg   .lower_bitfield_reverse = true,
2067ec681f3Smrg   .lower_bit_count = true,
2077ec681f3Smrg   .lower_cs_local_id_from_index = true,
2087ec681f3Smrg   .lower_ffract = true,
2097ec681f3Smrg   .lower_fmod = true,
2107ec681f3Smrg   .lower_pack_unorm_2x16 = true,
2117ec681f3Smrg   .lower_pack_snorm_2x16 = true,
2127ec681f3Smrg   .lower_unpack_unorm_2x16 = true,
2137ec681f3Smrg   .lower_unpack_snorm_2x16 = true,
2147ec681f3Smrg   .lower_pack_unorm_4x8 = true,
2157ec681f3Smrg   .lower_pack_snorm_4x8 = true,
2167ec681f3Smrg   .lower_unpack_unorm_4x8 = true,
2177ec681f3Smrg   .lower_unpack_snorm_4x8 = true,
2187ec681f3Smrg   .lower_pack_half_2x16 = true,
2197ec681f3Smrg   .lower_unpack_half_2x16 = true,
2207ec681f3Smrg   /* FIXME: see if we can avoid the uadd_carry and usub_borrow lowering and
2217ec681f3Smrg    * get the tests to pass since it might produce slightly better code.
2227ec681f3Smrg    */
2237ec681f3Smrg   .lower_uadd_carry = true,
2247ec681f3Smrg   .lower_usub_borrow = true,
2257ec681f3Smrg   /* FIXME: check if we can use multop + umul24 to implement mul2x32_64
2267ec681f3Smrg    * without lowering.
2277ec681f3Smrg    */
2287ec681f3Smrg   .lower_mul_2x32_64 = true,
2297ec681f3Smrg   .lower_fdiv = true,
2307ec681f3Smrg   .lower_find_lsb = true,
2317ec681f3Smrg   .lower_ffma16 = true,
2327ec681f3Smrg   .lower_ffma32 = true,
2337ec681f3Smrg   .lower_ffma64 = true,
2347ec681f3Smrg   .lower_flrp32 = true,
2357ec681f3Smrg   .lower_fpow = true,
2367ec681f3Smrg   .lower_fsat = true,
2377ec681f3Smrg   .lower_fsqrt = true,
2387ec681f3Smrg   .lower_ifind_msb = true,
2397ec681f3Smrg   .lower_isign = true,
2407ec681f3Smrg   .lower_ldexp = true,
2417ec681f3Smrg   .lower_mul_high = true,
2427ec681f3Smrg   .lower_wpos_pntc = true,
2437ec681f3Smrg   .lower_rotate = true,
2447ec681f3Smrg   .lower_to_scalar = true,
2457ec681f3Smrg   .lower_device_index_to_zero = true,
2467ec681f3Smrg   .has_fsub = true,
2477ec681f3Smrg   .has_isub = true,
2487ec681f3Smrg   .vertex_id_zero_based = false, /* FIXME: to set this to true, the intrinsic
2497ec681f3Smrg                                   * needs to be supported */
2507ec681f3Smrg   .lower_interpolate_at = true,
2517ec681f3Smrg   .max_unroll_iterations = 16,
2527ec681f3Smrg   .force_indirect_unrolling = (nir_var_shader_in | nir_var_function_temp),
2537ec681f3Smrg   .divergence_analysis_options =
2547ec681f3Smrg      nir_divergence_multiple_workgroup_per_compute_subgroup
2557ec681f3Smrg};
2567ec681f3Smrg
2577ec681f3Smrgconst nir_shader_compiler_options *
2587ec681f3Smrgv3dv_pipeline_get_nir_options(void)
2597ec681f3Smrg{
2607ec681f3Smrg   return &v3dv_nir_options;
2617ec681f3Smrg}
2627ec681f3Smrg
2637ec681f3Smrg#define OPT(pass, ...) ({                                  \
2647ec681f3Smrg   bool this_progress = false;                             \
2657ec681f3Smrg   NIR_PASS(this_progress, nir, pass, ##__VA_ARGS__);      \
2667ec681f3Smrg   if (this_progress)                                      \
2677ec681f3Smrg      progress = true;                                     \
2687ec681f3Smrg   this_progress;                                          \
2697ec681f3Smrg})
2707ec681f3Smrg
2717ec681f3Smrgstatic void
2727ec681f3Smrgnir_optimize(nir_shader *nir, bool allow_copies)
2737ec681f3Smrg{
2747ec681f3Smrg   bool progress;
2757ec681f3Smrg
2767ec681f3Smrg   do {
2777ec681f3Smrg      progress = false;
2787ec681f3Smrg      OPT(nir_split_array_vars, nir_var_function_temp);
2797ec681f3Smrg      OPT(nir_shrink_vec_array_vars, nir_var_function_temp);
2807ec681f3Smrg      OPT(nir_opt_deref);
2817ec681f3Smrg      OPT(nir_lower_vars_to_ssa);
2827ec681f3Smrg      if (allow_copies) {
2837ec681f3Smrg         /* Only run this pass in the first call to nir_optimize.  Later calls
2847ec681f3Smrg          * assume that we've lowered away any copy_deref instructions and we
2857ec681f3Smrg          * don't want to introduce any more.
2867ec681f3Smrg          */
2877ec681f3Smrg         OPT(nir_opt_find_array_copies);
2887ec681f3Smrg      }
2897ec681f3Smrg      OPT(nir_opt_copy_prop_vars);
2907ec681f3Smrg      OPT(nir_opt_dead_write_vars);
2917ec681f3Smrg      OPT(nir_opt_combine_stores, nir_var_all);
2927ec681f3Smrg
2937ec681f3Smrg      OPT(nir_lower_alu_to_scalar, NULL, NULL);
2947ec681f3Smrg
2957ec681f3Smrg      OPT(nir_copy_prop);
2967ec681f3Smrg      OPT(nir_lower_phis_to_scalar, false);
2977ec681f3Smrg
2987ec681f3Smrg      OPT(nir_copy_prop);
2997ec681f3Smrg      OPT(nir_opt_dce);
3007ec681f3Smrg      OPT(nir_opt_cse);
3017ec681f3Smrg      OPT(nir_opt_combine_stores, nir_var_all);
3027ec681f3Smrg
3037ec681f3Smrg      /* Passing 0 to the peephole select pass causes it to convert
3047ec681f3Smrg       * if-statements that contain only move instructions in the branches
3057ec681f3Smrg       * regardless of the count.
3067ec681f3Smrg       *
3077ec681f3Smrg       * Passing 1 to the peephole select pass causes it to convert
3087ec681f3Smrg       * if-statements that contain at most a single ALU instruction (total)
3097ec681f3Smrg       * in both branches.
3107ec681f3Smrg       */
3117ec681f3Smrg      OPT(nir_opt_peephole_select, 0, false, false);
3127ec681f3Smrg      OPT(nir_opt_peephole_select, 8, false, true);
3137ec681f3Smrg
3147ec681f3Smrg      OPT(nir_opt_intrinsics);
3157ec681f3Smrg      OPT(nir_opt_idiv_const, 32);
3167ec681f3Smrg      OPT(nir_opt_algebraic);
3177ec681f3Smrg      OPT(nir_opt_constant_folding);
3187ec681f3Smrg
3197ec681f3Smrg      OPT(nir_opt_dead_cf);
3207ec681f3Smrg
3217ec681f3Smrg      OPT(nir_opt_if, false);
3227ec681f3Smrg      OPT(nir_opt_conditional_discard);
3237ec681f3Smrg
3247ec681f3Smrg      OPT(nir_opt_remove_phis);
3257ec681f3Smrg      OPT(nir_opt_undef);
3267ec681f3Smrg      OPT(nir_lower_pack);
3277ec681f3Smrg   } while (progress);
3287ec681f3Smrg
3297ec681f3Smrg   OPT(nir_remove_dead_variables, nir_var_function_temp, NULL);
3307ec681f3Smrg}
3317ec681f3Smrg
3327ec681f3Smrgstatic void
3337ec681f3Smrgpreprocess_nir(nir_shader *nir)
3347ec681f3Smrg{
3357ec681f3Smrg   /* We have to lower away local variable initializers right before we
3367ec681f3Smrg    * inline functions.  That way they get properly initialized at the top
3377ec681f3Smrg    * of the function and not at the top of its caller.
3387ec681f3Smrg    */
3397ec681f3Smrg   NIR_PASS_V(nir, nir_lower_variable_initializers, nir_var_function_temp);
3407ec681f3Smrg   NIR_PASS_V(nir, nir_lower_returns);
3417ec681f3Smrg   NIR_PASS_V(nir, nir_inline_functions);
3427ec681f3Smrg   NIR_PASS_V(nir, nir_opt_deref);
3437ec681f3Smrg
3447ec681f3Smrg   /* Pick off the single entrypoint that we want */
3457ec681f3Smrg   foreach_list_typed_safe(nir_function, func, node, &nir->functions) {
3467ec681f3Smrg      if (func->is_entrypoint)
3477ec681f3Smrg         func->name = ralloc_strdup(func, "main");
3487ec681f3Smrg      else
3497ec681f3Smrg         exec_node_remove(&func->node);
3507ec681f3Smrg   }
3517ec681f3Smrg   assert(exec_list_length(&nir->functions) == 1);
3527ec681f3Smrg
3537ec681f3Smrg   /* Vulkan uses the separate-shader linking model */
3547ec681f3Smrg   nir->info.separate_shader = true;
3557ec681f3Smrg
3567ec681f3Smrg   /* Make sure we lower variable initializers on output variables so that
3577ec681f3Smrg    * nir_remove_dead_variables below sees the corresponding stores
3587ec681f3Smrg    */
3597ec681f3Smrg   NIR_PASS_V(nir, nir_lower_variable_initializers, nir_var_shader_out);
3607ec681f3Smrg
3617ec681f3Smrg   /* Now that we've deleted all but the main function, we can go ahead and
3627ec681f3Smrg    * lower the rest of the variable initializers.
3637ec681f3Smrg    */
3647ec681f3Smrg   NIR_PASS_V(nir, nir_lower_variable_initializers, ~0);
3657ec681f3Smrg
3667ec681f3Smrg   /* Split member structs.  We do this before lower_io_to_temporaries so that
3677ec681f3Smrg    * it doesn't lower system values to temporaries by accident.
3687ec681f3Smrg    */
3697ec681f3Smrg   NIR_PASS_V(nir, nir_split_var_copies);
3707ec681f3Smrg   NIR_PASS_V(nir, nir_split_per_member_structs);
3717ec681f3Smrg
3727ec681f3Smrg   if (nir->info.stage == MESA_SHADER_FRAGMENT)
3737ec681f3Smrg      NIR_PASS_V(nir, nir_lower_io_to_vector, nir_var_shader_out);
3747ec681f3Smrg   if (nir->info.stage == MESA_SHADER_FRAGMENT) {
3757ec681f3Smrg      NIR_PASS_V(nir, nir_lower_input_attachments,
3767ec681f3Smrg                 &(nir_input_attachment_options) {
3777ec681f3Smrg                    .use_fragcoord_sysval = false,
3787ec681f3Smrg                       });
3797ec681f3Smrg   }
3807ec681f3Smrg
3817ec681f3Smrg   NIR_PASS_V(nir, nir_lower_explicit_io,
3827ec681f3Smrg              nir_var_mem_push_const,
3837ec681f3Smrg              nir_address_format_32bit_offset);
3847ec681f3Smrg
3857ec681f3Smrg   NIR_PASS_V(nir, nir_lower_explicit_io,
3867ec681f3Smrg              nir_var_mem_ubo | nir_var_mem_ssbo,
3877ec681f3Smrg              nir_address_format_32bit_index_offset);
3887ec681f3Smrg
3897ec681f3Smrg   NIR_PASS_V(nir, nir_remove_dead_variables, nir_var_shader_in |
3907ec681f3Smrg              nir_var_shader_out | nir_var_system_value | nir_var_mem_shared,
3917ec681f3Smrg              NULL);
3927ec681f3Smrg
3937ec681f3Smrg   NIR_PASS_V(nir, nir_propagate_invariant, false);
3947ec681f3Smrg   NIR_PASS_V(nir, nir_lower_io_to_temporaries,
3957ec681f3Smrg              nir_shader_get_entrypoint(nir), true, false);
3967ec681f3Smrg
3977ec681f3Smrg   NIR_PASS_V(nir, nir_lower_system_values);
3987ec681f3Smrg   NIR_PASS_V(nir, nir_lower_clip_cull_distance_arrays);
3997ec681f3Smrg
4007ec681f3Smrg   NIR_PASS_V(nir, nir_lower_alu_to_scalar, NULL, NULL);
4017ec681f3Smrg
4027ec681f3Smrg   NIR_PASS_V(nir, nir_normalize_cubemap_coords);
4037ec681f3Smrg
4047ec681f3Smrg   NIR_PASS_V(nir, nir_lower_global_vars_to_local);
4057ec681f3Smrg
4067ec681f3Smrg   NIR_PASS_V(nir, nir_split_var_copies);
4077ec681f3Smrg   NIR_PASS_V(nir, nir_split_struct_vars, nir_var_function_temp);
4087ec681f3Smrg
4097ec681f3Smrg   nir_optimize(nir, true);
4107ec681f3Smrg
4117ec681f3Smrg   NIR_PASS_V(nir, nir_lower_load_const_to_scalar);
4127ec681f3Smrg
4137ec681f3Smrg   /* Lower a bunch of stuff */
4147ec681f3Smrg   NIR_PASS_V(nir, nir_lower_var_copies);
4157ec681f3Smrg
4167ec681f3Smrg   NIR_PASS_V(nir, nir_lower_indirect_derefs, nir_var_shader_in, UINT32_MAX);
4177ec681f3Smrg
4187ec681f3Smrg   NIR_PASS_V(nir, nir_lower_indirect_derefs,
4197ec681f3Smrg              nir_var_function_temp, 2);
4207ec681f3Smrg
4217ec681f3Smrg   NIR_PASS_V(nir, nir_lower_array_deref_of_vec,
4227ec681f3Smrg              nir_var_mem_ubo | nir_var_mem_ssbo,
4237ec681f3Smrg              nir_lower_direct_array_deref_of_vec_load);
4247ec681f3Smrg
4257ec681f3Smrg   NIR_PASS_V(nir, nir_lower_frexp);
4267ec681f3Smrg
4277ec681f3Smrg   /* Get rid of split copies */
4287ec681f3Smrg   nir_optimize(nir, false);
4297ec681f3Smrg}
4307ec681f3Smrg
4317ec681f3Smrgstatic nir_shader *
4327ec681f3Smrgshader_module_compile_to_nir(struct v3dv_device *device,
4337ec681f3Smrg                             struct v3dv_pipeline_stage *stage)
4347ec681f3Smrg{
4357ec681f3Smrg   nir_shader *nir;
4367ec681f3Smrg   const nir_shader_compiler_options *nir_options = &v3dv_nir_options;
4377ec681f3Smrg
4387ec681f3Smrg   if (!stage->module->nir) {
4397ec681f3Smrg      uint32_t *spirv = (uint32_t *) stage->module->data;
4407ec681f3Smrg      assert(stage->module->size % 4 == 0);
4417ec681f3Smrg
4427ec681f3Smrg      if (unlikely(V3D_DEBUG & V3D_DEBUG_DUMP_SPIRV))
4437ec681f3Smrg         v3dv_print_spirv(stage->module->data, stage->module->size, stderr);
4447ec681f3Smrg
4457ec681f3Smrg      uint32_t num_spec_entries = 0;
4467ec681f3Smrg      struct nir_spirv_specialization *spec_entries =
4477ec681f3Smrg         vk_spec_info_to_nir_spirv(stage->spec_info, &num_spec_entries);
4487ec681f3Smrg      const struct spirv_to_nir_options spirv_options = default_spirv_options;
4497ec681f3Smrg      nir = spirv_to_nir(spirv, stage->module->size / 4,
4507ec681f3Smrg                         spec_entries, num_spec_entries,
4517ec681f3Smrg                         broadcom_shader_stage_to_gl(stage->stage),
4527ec681f3Smrg                         stage->entrypoint,
4537ec681f3Smrg                         &spirv_options, nir_options);
4547ec681f3Smrg      assert(nir);
4557ec681f3Smrg      nir_validate_shader(nir, "after spirv_to_nir");
4567ec681f3Smrg      free(spec_entries);
4577ec681f3Smrg   } else {
4587ec681f3Smrg      /* For NIR modules created by the driver we can't consume the NIR
4597ec681f3Smrg       * directly, we need to clone it first, since ownership of the NIR code
4607ec681f3Smrg       * (as with SPIR-V code for SPIR-V shaders), belongs to the creator
4617ec681f3Smrg       * of the module and modules can be destroyed immediately after been used
4627ec681f3Smrg       * to create pipelines.
4637ec681f3Smrg       */
4647ec681f3Smrg      nir = nir_shader_clone(NULL, stage->module->nir);
4657ec681f3Smrg      nir_validate_shader(nir, "nir module");
4667ec681f3Smrg   }
4677ec681f3Smrg   assert(nir->info.stage == broadcom_shader_stage_to_gl(stage->stage));
4687ec681f3Smrg
4697ec681f3Smrg   const struct nir_lower_sysvals_to_varyings_options sysvals_to_varyings = {
4707ec681f3Smrg      .frag_coord = true,
4717ec681f3Smrg      .point_coord = true,
4727ec681f3Smrg   };
4737ec681f3Smrg   NIR_PASS_V(nir, nir_lower_sysvals_to_varyings, &sysvals_to_varyings);
4747ec681f3Smrg
4757ec681f3Smrg   if (unlikely(V3D_DEBUG & (V3D_DEBUG_NIR |
4767ec681f3Smrg                             v3d_debug_flag_for_shader_stage(
4777ec681f3Smrg                                broadcom_shader_stage_to_gl(stage->stage))))) {
4787ec681f3Smrg      fprintf(stderr, "Initial form: %s prog %d NIR:\n",
4797ec681f3Smrg              broadcom_shader_stage_name(stage->stage),
4807ec681f3Smrg              stage->program_id);
4817ec681f3Smrg      nir_print_shader(nir, stderr);
4827ec681f3Smrg      fprintf(stderr, "\n");
4837ec681f3Smrg   }
4847ec681f3Smrg
4857ec681f3Smrg   preprocess_nir(nir);
4867ec681f3Smrg
4877ec681f3Smrg   return nir;
4887ec681f3Smrg}
4897ec681f3Smrg
4907ec681f3Smrgstatic int
4917ec681f3Smrgtype_size_vec4(const struct glsl_type *type, bool bindless)
4927ec681f3Smrg{
4937ec681f3Smrg   return glsl_count_attribute_slots(type, false);
4947ec681f3Smrg}
4957ec681f3Smrg
4967ec681f3Smrg/* FIXME: the number of parameters for this method is somewhat big. Perhaps
4977ec681f3Smrg * rethink.
4987ec681f3Smrg */
4997ec681f3Smrgstatic unsigned
5007ec681f3Smrgdescriptor_map_add(struct v3dv_descriptor_map *map,
5017ec681f3Smrg                   int set,
5027ec681f3Smrg                   int binding,
5037ec681f3Smrg                   int array_index,
5047ec681f3Smrg                   int array_size,
5057ec681f3Smrg                   uint8_t return_size)
5067ec681f3Smrg{
5077ec681f3Smrg   assert(array_index < array_size);
5087ec681f3Smrg   assert(return_size == 16 || return_size == 32);
5097ec681f3Smrg
5107ec681f3Smrg   unsigned index = 0;
5117ec681f3Smrg   for (unsigned i = 0; i < map->num_desc; i++) {
5127ec681f3Smrg      if (set == map->set[i] &&
5137ec681f3Smrg          binding == map->binding[i] &&
5147ec681f3Smrg          array_index == map->array_index[i]) {
5157ec681f3Smrg         assert(array_size == map->array_size[i]);
5167ec681f3Smrg         if (return_size != map->return_size[index]) {
5177ec681f3Smrg            /* It the return_size is different it means that the same sampler
5187ec681f3Smrg             * was used for operations with different precision
5197ec681f3Smrg             * requirement. In this case we need to ensure that we use the
5207ec681f3Smrg             * larger one.
5217ec681f3Smrg             */
5227ec681f3Smrg            map->return_size[index] = 32;
5237ec681f3Smrg         }
5247ec681f3Smrg         return index;
5257ec681f3Smrg      }
5267ec681f3Smrg      index++;
5277ec681f3Smrg   }
5287ec681f3Smrg
5297ec681f3Smrg   assert(index == map->num_desc);
5307ec681f3Smrg
5317ec681f3Smrg   map->set[map->num_desc] = set;
5327ec681f3Smrg   map->binding[map->num_desc] = binding;
5337ec681f3Smrg   map->array_index[map->num_desc] = array_index;
5347ec681f3Smrg   map->array_size[map->num_desc] = array_size;
5357ec681f3Smrg   map->return_size[map->num_desc] = return_size;
5367ec681f3Smrg   map->num_desc++;
5377ec681f3Smrg
5387ec681f3Smrg   return index;
5397ec681f3Smrg}
5407ec681f3Smrg
5417ec681f3Smrg
5427ec681f3Smrgstatic void
5437ec681f3Smrglower_load_push_constant(nir_builder *b, nir_intrinsic_instr *instr,
5447ec681f3Smrg                         struct v3dv_pipeline *pipeline)
5457ec681f3Smrg{
5467ec681f3Smrg   assert(instr->intrinsic == nir_intrinsic_load_push_constant);
5477ec681f3Smrg   instr->intrinsic = nir_intrinsic_load_uniform;
5487ec681f3Smrg}
5497ec681f3Smrg
5507ec681f3Smrgstatic struct v3dv_descriptor_map*
5517ec681f3Smrgpipeline_get_descriptor_map(struct v3dv_pipeline *pipeline,
5527ec681f3Smrg                            VkDescriptorType desc_type,
5537ec681f3Smrg                            gl_shader_stage gl_stage,
5547ec681f3Smrg                            bool is_sampler)
5557ec681f3Smrg{
5567ec681f3Smrg   enum broadcom_shader_stage broadcom_stage =
5577ec681f3Smrg      gl_shader_stage_to_broadcom(gl_stage);
5587ec681f3Smrg
5597ec681f3Smrg   assert(pipeline->shared_data &&
5607ec681f3Smrg          pipeline->shared_data->maps[broadcom_stage]);
5617ec681f3Smrg
5627ec681f3Smrg   switch(desc_type) {
5637ec681f3Smrg   case VK_DESCRIPTOR_TYPE_SAMPLER:
5647ec681f3Smrg      return &pipeline->shared_data->maps[broadcom_stage]->sampler_map;
5657ec681f3Smrg   case VK_DESCRIPTOR_TYPE_SAMPLED_IMAGE:
5667ec681f3Smrg   case VK_DESCRIPTOR_TYPE_INPUT_ATTACHMENT:
5677ec681f3Smrg   case VK_DESCRIPTOR_TYPE_STORAGE_IMAGE:
5687ec681f3Smrg   case VK_DESCRIPTOR_TYPE_UNIFORM_TEXEL_BUFFER:
5697ec681f3Smrg   case VK_DESCRIPTOR_TYPE_STORAGE_TEXEL_BUFFER:
5707ec681f3Smrg      return &pipeline->shared_data->maps[broadcom_stage]->texture_map;
5717ec681f3Smrg   case VK_DESCRIPTOR_TYPE_COMBINED_IMAGE_SAMPLER:
5727ec681f3Smrg      return is_sampler ?
5737ec681f3Smrg         &pipeline->shared_data->maps[broadcom_stage]->sampler_map :
5747ec681f3Smrg         &pipeline->shared_data->maps[broadcom_stage]->texture_map;
5757ec681f3Smrg   case VK_DESCRIPTOR_TYPE_UNIFORM_BUFFER:
5767ec681f3Smrg      return &pipeline->shared_data->maps[broadcom_stage]->ubo_map;
5777ec681f3Smrg   case VK_DESCRIPTOR_TYPE_STORAGE_BUFFER:
5787ec681f3Smrg      return &pipeline->shared_data->maps[broadcom_stage]->ssbo_map;
5797ec681f3Smrg   default:
5807ec681f3Smrg      unreachable("Descriptor type unknown or not having a descriptor map");
5817ec681f3Smrg   }
5827ec681f3Smrg}
5837ec681f3Smrg
5847ec681f3Smrg/* Gathers info from the intrinsic (set and binding) and then lowers it so it
5857ec681f3Smrg * could be used by the v3d_compiler */
5867ec681f3Smrgstatic void
5877ec681f3Smrglower_vulkan_resource_index(nir_builder *b,
5887ec681f3Smrg                            nir_intrinsic_instr *instr,
5897ec681f3Smrg                            nir_shader *shader,
5907ec681f3Smrg                            struct v3dv_pipeline *pipeline,
5917ec681f3Smrg                            const struct v3dv_pipeline_layout *layout)
5927ec681f3Smrg{
5937ec681f3Smrg   assert(instr->intrinsic == nir_intrinsic_vulkan_resource_index);
5947ec681f3Smrg
5957ec681f3Smrg   nir_const_value *const_val = nir_src_as_const_value(instr->src[0]);
5967ec681f3Smrg
5977ec681f3Smrg   unsigned set = nir_intrinsic_desc_set(instr);
5987ec681f3Smrg   unsigned binding = nir_intrinsic_binding(instr);
5997ec681f3Smrg   struct v3dv_descriptor_set_layout *set_layout = layout->set[set].layout;
6007ec681f3Smrg   struct v3dv_descriptor_set_binding_layout *binding_layout =
6017ec681f3Smrg      &set_layout->binding[binding];
6027ec681f3Smrg   unsigned index = 0;
6037ec681f3Smrg   const VkDescriptorType desc_type = nir_intrinsic_desc_type(instr);
6047ec681f3Smrg
6057ec681f3Smrg   switch (desc_type) {
6067ec681f3Smrg   case VK_DESCRIPTOR_TYPE_UNIFORM_BUFFER:
6077ec681f3Smrg   case VK_DESCRIPTOR_TYPE_STORAGE_BUFFER: {
6087ec681f3Smrg      struct v3dv_descriptor_map *descriptor_map =
6097ec681f3Smrg         pipeline_get_descriptor_map(pipeline, desc_type, shader->info.stage, false);
6107ec681f3Smrg
6117ec681f3Smrg      if (!const_val)
6127ec681f3Smrg         unreachable("non-constant vulkan_resource_index array index");
6137ec681f3Smrg
6147ec681f3Smrg      index = descriptor_map_add(descriptor_map, set, binding,
6157ec681f3Smrg                                 const_val->u32,
6167ec681f3Smrg                                 binding_layout->array_size,
6177ec681f3Smrg                                 32 /* return_size: doesn't really apply for this case */);
6187ec681f3Smrg
6197ec681f3Smrg      if (desc_type == VK_DESCRIPTOR_TYPE_UNIFORM_BUFFER) {
6207ec681f3Smrg         /* skip index 0 which is used for push constants */
6217ec681f3Smrg         index++;
6227ec681f3Smrg      }
6237ec681f3Smrg      break;
6247ec681f3Smrg   }
6257ec681f3Smrg
6267ec681f3Smrg   default:
6277ec681f3Smrg      unreachable("unsupported desc_type for vulkan_resource_index");
6287ec681f3Smrg      break;
6297ec681f3Smrg   }
6307ec681f3Smrg
6317ec681f3Smrg   /* Since we use the deref pass, both vulkan_resource_index and
6327ec681f3Smrg    * vulkan_load_descriptor return a vec2 providing an index and
6337ec681f3Smrg    * offset. Our backend compiler only cares about the index part.
6347ec681f3Smrg    */
6357ec681f3Smrg   nir_ssa_def_rewrite_uses(&instr->dest.ssa,
6367ec681f3Smrg                            nir_imm_ivec2(b, index, 0));
6377ec681f3Smrg   nir_instr_remove(&instr->instr);
6387ec681f3Smrg}
6397ec681f3Smrg
6407ec681f3Smrg/* Returns return_size, so it could be used for the case of not having a
6417ec681f3Smrg * sampler object
6427ec681f3Smrg */
6437ec681f3Smrgstatic uint8_t
6447ec681f3Smrglower_tex_src_to_offset(nir_builder *b, nir_tex_instr *instr, unsigned src_idx,
6457ec681f3Smrg                        nir_shader *shader,
6467ec681f3Smrg                        struct v3dv_pipeline *pipeline,
6477ec681f3Smrg                        const struct v3dv_pipeline_layout *layout)
6487ec681f3Smrg{
6497ec681f3Smrg   nir_ssa_def *index = NULL;
6507ec681f3Smrg   unsigned base_index = 0;
6517ec681f3Smrg   unsigned array_elements = 1;
6527ec681f3Smrg   nir_tex_src *src = &instr->src[src_idx];
6537ec681f3Smrg   bool is_sampler = src->src_type == nir_tex_src_sampler_deref;
6547ec681f3Smrg
6557ec681f3Smrg   /* We compute first the offsets */
6567ec681f3Smrg   nir_deref_instr *deref = nir_instr_as_deref(src->src.ssa->parent_instr);
6577ec681f3Smrg   while (deref->deref_type != nir_deref_type_var) {
6587ec681f3Smrg      assert(deref->parent.is_ssa);
6597ec681f3Smrg      nir_deref_instr *parent =
6607ec681f3Smrg         nir_instr_as_deref(deref->parent.ssa->parent_instr);
6617ec681f3Smrg
6627ec681f3Smrg      assert(deref->deref_type == nir_deref_type_array);
6637ec681f3Smrg
6647ec681f3Smrg      if (nir_src_is_const(deref->arr.index) && index == NULL) {
6657ec681f3Smrg         /* We're still building a direct index */
6667ec681f3Smrg         base_index += nir_src_as_uint(deref->arr.index) * array_elements;
6677ec681f3Smrg      } else {
6687ec681f3Smrg         if (index == NULL) {
6697ec681f3Smrg            /* We used to be direct but not anymore */
6707ec681f3Smrg            index = nir_imm_int(b, base_index);
6717ec681f3Smrg            base_index = 0;
6727ec681f3Smrg         }
6737ec681f3Smrg
6747ec681f3Smrg         index = nir_iadd(b, index,
6757ec681f3Smrg                          nir_imul(b, nir_imm_int(b, array_elements),
6767ec681f3Smrg                                   nir_ssa_for_src(b, deref->arr.index, 1)));
6777ec681f3Smrg      }
6787ec681f3Smrg
6797ec681f3Smrg      array_elements *= glsl_get_length(parent->type);
6807ec681f3Smrg
6817ec681f3Smrg      deref = parent;
6827ec681f3Smrg   }
6837ec681f3Smrg
6847ec681f3Smrg   if (index)
6857ec681f3Smrg      index = nir_umin(b, index, nir_imm_int(b, array_elements - 1));
6867ec681f3Smrg
6877ec681f3Smrg   /* We have the offsets, we apply them, rewriting the source or removing
6887ec681f3Smrg    * instr if needed
6897ec681f3Smrg    */
6907ec681f3Smrg   if (index) {
6917ec681f3Smrg      nir_instr_rewrite_src(&instr->instr, &src->src,
6927ec681f3Smrg                            nir_src_for_ssa(index));
6937ec681f3Smrg
6947ec681f3Smrg      src->src_type = is_sampler ?
6957ec681f3Smrg         nir_tex_src_sampler_offset :
6967ec681f3Smrg         nir_tex_src_texture_offset;
6977ec681f3Smrg   } else {
6987ec681f3Smrg      nir_tex_instr_remove_src(instr, src_idx);
6997ec681f3Smrg   }
7007ec681f3Smrg
7017ec681f3Smrg   uint32_t set = deref->var->data.descriptor_set;
7027ec681f3Smrg   uint32_t binding = deref->var->data.binding;
7037ec681f3Smrg   /* FIXME: this is a really simplified check for the precision to be used
7047ec681f3Smrg    * for the sampling. Right now we are ony checking for the variables used
7057ec681f3Smrg    * on the operation itself, but there are other cases that we could use to
7067ec681f3Smrg    * infer the precision requirement.
7077ec681f3Smrg    */
7087ec681f3Smrg   bool relaxed_precision = deref->var->data.precision == GLSL_PRECISION_MEDIUM ||
7097ec681f3Smrg                            deref->var->data.precision == GLSL_PRECISION_LOW;
7107ec681f3Smrg   struct v3dv_descriptor_set_layout *set_layout = layout->set[set].layout;
7117ec681f3Smrg   struct v3dv_descriptor_set_binding_layout *binding_layout =
7127ec681f3Smrg      &set_layout->binding[binding];
7137ec681f3Smrg
7147ec681f3Smrg   /* For input attachments, the shader includes the attachment_idx. As we are
7157ec681f3Smrg    * treating them as a texture, we only want the base_index
7167ec681f3Smrg    */
7177ec681f3Smrg   uint32_t array_index = binding_layout->type != VK_DESCRIPTOR_TYPE_INPUT_ATTACHMENT ?
7187ec681f3Smrg      deref->var->data.index + base_index :
7197ec681f3Smrg      base_index;
7207ec681f3Smrg
7217ec681f3Smrg   uint8_t return_size;
7227ec681f3Smrg   if (unlikely(V3D_DEBUG & V3D_DEBUG_TMU_16BIT))
7237ec681f3Smrg      return_size = 16;
7247ec681f3Smrg   else  if (unlikely(V3D_DEBUG & V3D_DEBUG_TMU_32BIT))
7257ec681f3Smrg      return_size = 32;
7267ec681f3Smrg   else
7277ec681f3Smrg      return_size = relaxed_precision || instr->is_shadow ? 16 : 32;
7287ec681f3Smrg
7297ec681f3Smrg   struct v3dv_descriptor_map *map =
7307ec681f3Smrg      pipeline_get_descriptor_map(pipeline, binding_layout->type,
7317ec681f3Smrg                                  shader->info.stage, is_sampler);
7327ec681f3Smrg   int desc_index =
7337ec681f3Smrg      descriptor_map_add(map,
7347ec681f3Smrg                         deref->var->data.descriptor_set,
7357ec681f3Smrg                         deref->var->data.binding,
7367ec681f3Smrg                         array_index,
7377ec681f3Smrg                         binding_layout->array_size,
7387ec681f3Smrg                         return_size);
7397ec681f3Smrg
7407ec681f3Smrg   if (is_sampler)
7417ec681f3Smrg      instr->sampler_index = desc_index;
7427ec681f3Smrg   else
7437ec681f3Smrg      instr->texture_index = desc_index;
7447ec681f3Smrg
7457ec681f3Smrg   return return_size;
7467ec681f3Smrg}
7477ec681f3Smrg
7487ec681f3Smrgstatic bool
7497ec681f3Smrglower_sampler(nir_builder *b, nir_tex_instr *instr,
7507ec681f3Smrg              nir_shader *shader,
7517ec681f3Smrg              struct v3dv_pipeline *pipeline,
7527ec681f3Smrg              const struct v3dv_pipeline_layout *layout)
7537ec681f3Smrg{
7547ec681f3Smrg   uint8_t return_size = 0;
7557ec681f3Smrg
7567ec681f3Smrg   int texture_idx =
7577ec681f3Smrg      nir_tex_instr_src_index(instr, nir_tex_src_texture_deref);
7587ec681f3Smrg
7597ec681f3Smrg   if (texture_idx >= 0)
7607ec681f3Smrg      return_size = lower_tex_src_to_offset(b, instr, texture_idx, shader,
7617ec681f3Smrg                                            pipeline, layout);
7627ec681f3Smrg
7637ec681f3Smrg   int sampler_idx =
7647ec681f3Smrg      nir_tex_instr_src_index(instr, nir_tex_src_sampler_deref);
7657ec681f3Smrg
7667ec681f3Smrg   if (sampler_idx >= 0)
7677ec681f3Smrg      lower_tex_src_to_offset(b, instr, sampler_idx, shader, pipeline, layout);
7687ec681f3Smrg
7697ec681f3Smrg   if (texture_idx < 0 && sampler_idx < 0)
7707ec681f3Smrg      return false;
7717ec681f3Smrg
7727ec681f3Smrg   /* If we don't have a sampler, we assign it the idx we reserve for this
7737ec681f3Smrg    * case, and we ensure that it is using the correct return size.
7747ec681f3Smrg    */
7757ec681f3Smrg   if (sampler_idx < 0) {
7767ec681f3Smrg      instr->sampler_index = return_size == 16 ?
7777ec681f3Smrg         V3DV_NO_SAMPLER_16BIT_IDX : V3DV_NO_SAMPLER_32BIT_IDX;
7787ec681f3Smrg   }
7797ec681f3Smrg
7807ec681f3Smrg   return true;
7817ec681f3Smrg}
7827ec681f3Smrg
7837ec681f3Smrg/* FIXME: really similar to lower_tex_src_to_offset, perhaps refactor? */
7847ec681f3Smrgstatic void
7857ec681f3Smrglower_image_deref(nir_builder *b,
7867ec681f3Smrg                  nir_intrinsic_instr *instr,
7877ec681f3Smrg                  nir_shader *shader,
7887ec681f3Smrg                  struct v3dv_pipeline *pipeline,
7897ec681f3Smrg                  const struct v3dv_pipeline_layout *layout)
7907ec681f3Smrg{
7917ec681f3Smrg   nir_deref_instr *deref = nir_src_as_deref(instr->src[0]);
7927ec681f3Smrg   nir_ssa_def *index = NULL;
7937ec681f3Smrg   unsigned array_elements = 1;
7947ec681f3Smrg   unsigned base_index = 0;
7957ec681f3Smrg
7967ec681f3Smrg   while (deref->deref_type != nir_deref_type_var) {
7977ec681f3Smrg      assert(deref->parent.is_ssa);
7987ec681f3Smrg      nir_deref_instr *parent =
7997ec681f3Smrg         nir_instr_as_deref(deref->parent.ssa->parent_instr);
8007ec681f3Smrg
8017ec681f3Smrg      assert(deref->deref_type == nir_deref_type_array);
8027ec681f3Smrg
8037ec681f3Smrg      if (nir_src_is_const(deref->arr.index) && index == NULL) {
8047ec681f3Smrg         /* We're still building a direct index */
8057ec681f3Smrg         base_index += nir_src_as_uint(deref->arr.index) * array_elements;
8067ec681f3Smrg      } else {
8077ec681f3Smrg         if (index == NULL) {
8087ec681f3Smrg            /* We used to be direct but not anymore */
8097ec681f3Smrg            index = nir_imm_int(b, base_index);
8107ec681f3Smrg            base_index = 0;
8117ec681f3Smrg         }
8127ec681f3Smrg
8137ec681f3Smrg         index = nir_iadd(b, index,
8147ec681f3Smrg                          nir_imul(b, nir_imm_int(b, array_elements),
8157ec681f3Smrg                                   nir_ssa_for_src(b, deref->arr.index, 1)));
8167ec681f3Smrg      }
8177ec681f3Smrg
8187ec681f3Smrg      array_elements *= glsl_get_length(parent->type);
8197ec681f3Smrg
8207ec681f3Smrg      deref = parent;
8217ec681f3Smrg   }
8227ec681f3Smrg
8237ec681f3Smrg   if (index)
8247ec681f3Smrg      index = nir_umin(b, index, nir_imm_int(b, array_elements - 1));
8257ec681f3Smrg
8267ec681f3Smrg   uint32_t set = deref->var->data.descriptor_set;
8277ec681f3Smrg   uint32_t binding = deref->var->data.binding;
8287ec681f3Smrg   struct v3dv_descriptor_set_layout *set_layout = layout->set[set].layout;
8297ec681f3Smrg   struct v3dv_descriptor_set_binding_layout *binding_layout =
8307ec681f3Smrg      &set_layout->binding[binding];
8317ec681f3Smrg
8327ec681f3Smrg   uint32_t array_index = deref->var->data.index + base_index;
8337ec681f3Smrg
8347ec681f3Smrg   assert(binding_layout->type == VK_DESCRIPTOR_TYPE_STORAGE_IMAGE ||
8357ec681f3Smrg          binding_layout->type == VK_DESCRIPTOR_TYPE_STORAGE_TEXEL_BUFFER);
8367ec681f3Smrg
8377ec681f3Smrg   struct v3dv_descriptor_map *map =
8387ec681f3Smrg      pipeline_get_descriptor_map(pipeline, binding_layout->type,
8397ec681f3Smrg                                  shader->info.stage, false);
8407ec681f3Smrg
8417ec681f3Smrg   int desc_index =
8427ec681f3Smrg      descriptor_map_add(map,
8437ec681f3Smrg                         deref->var->data.descriptor_set,
8447ec681f3Smrg                         deref->var->data.binding,
8457ec681f3Smrg                         array_index,
8467ec681f3Smrg                         binding_layout->array_size,
8477ec681f3Smrg                         32 /* return_size: doesn't apply for textures */);
8487ec681f3Smrg
8497ec681f3Smrg   /* Note: we don't need to do anything here in relation to the precision and
8507ec681f3Smrg    * the output size because for images we can infer that info from the image
8517ec681f3Smrg    * intrinsic, that includes the image format (see
8527ec681f3Smrg    * NIR_INTRINSIC_FORMAT). That is done by the v3d compiler.
8537ec681f3Smrg    */
8547ec681f3Smrg
8557ec681f3Smrg   index = nir_imm_int(b, desc_index);
8567ec681f3Smrg
8577ec681f3Smrg   nir_rewrite_image_intrinsic(instr, index, false);
8587ec681f3Smrg}
8597ec681f3Smrg
8607ec681f3Smrgstatic bool
8617ec681f3Smrglower_intrinsic(nir_builder *b, nir_intrinsic_instr *instr,
8627ec681f3Smrg                nir_shader *shader,
8637ec681f3Smrg                struct v3dv_pipeline *pipeline,
8647ec681f3Smrg                const struct v3dv_pipeline_layout *layout)
8657ec681f3Smrg{
8667ec681f3Smrg   switch (instr->intrinsic) {
8677ec681f3Smrg   case nir_intrinsic_load_layer_id:
8687ec681f3Smrg      /* FIXME: if layered rendering gets supported, this would need a real
8697ec681f3Smrg       * lowering
8707ec681f3Smrg       */
8717ec681f3Smrg      nir_ssa_def_rewrite_uses(&instr->dest.ssa,
8727ec681f3Smrg                               nir_imm_int(b, 0));
8737ec681f3Smrg      nir_instr_remove(&instr->instr);
8747ec681f3Smrg      return true;
8757ec681f3Smrg
8767ec681f3Smrg   case nir_intrinsic_load_push_constant:
8777ec681f3Smrg      lower_load_push_constant(b, instr, pipeline);
8787ec681f3Smrg      return true;
8797ec681f3Smrg
8807ec681f3Smrg   case nir_intrinsic_vulkan_resource_index:
8817ec681f3Smrg      lower_vulkan_resource_index(b, instr, shader, pipeline, layout);
8827ec681f3Smrg      return true;
8837ec681f3Smrg
8847ec681f3Smrg   case nir_intrinsic_load_vulkan_descriptor: {
8857ec681f3Smrg      /* Loading the descriptor happens as part of load/store instructions,
8867ec681f3Smrg       * so for us this is a no-op.
8877ec681f3Smrg       */
8887ec681f3Smrg      nir_ssa_def_rewrite_uses(&instr->dest.ssa, instr->src[0].ssa);
8897ec681f3Smrg      nir_instr_remove(&instr->instr);
8907ec681f3Smrg      return true;
8917ec681f3Smrg   }
8927ec681f3Smrg
8937ec681f3Smrg   case nir_intrinsic_image_deref_load:
8947ec681f3Smrg   case nir_intrinsic_image_deref_store:
8957ec681f3Smrg   case nir_intrinsic_image_deref_atomic_add:
8967ec681f3Smrg   case nir_intrinsic_image_deref_atomic_imin:
8977ec681f3Smrg   case nir_intrinsic_image_deref_atomic_umin:
8987ec681f3Smrg   case nir_intrinsic_image_deref_atomic_imax:
8997ec681f3Smrg   case nir_intrinsic_image_deref_atomic_umax:
9007ec681f3Smrg   case nir_intrinsic_image_deref_atomic_and:
9017ec681f3Smrg   case nir_intrinsic_image_deref_atomic_or:
9027ec681f3Smrg   case nir_intrinsic_image_deref_atomic_xor:
9037ec681f3Smrg   case nir_intrinsic_image_deref_atomic_exchange:
9047ec681f3Smrg   case nir_intrinsic_image_deref_atomic_comp_swap:
9057ec681f3Smrg   case nir_intrinsic_image_deref_size:
9067ec681f3Smrg   case nir_intrinsic_image_deref_samples:
9077ec681f3Smrg      lower_image_deref(b, instr, shader, pipeline, layout);
9087ec681f3Smrg      return true;
9097ec681f3Smrg
9107ec681f3Smrg   default:
9117ec681f3Smrg      return false;
9127ec681f3Smrg   }
9137ec681f3Smrg}
9147ec681f3Smrg
9157ec681f3Smrgstatic bool
9167ec681f3Smrglower_impl(nir_function_impl *impl,
9177ec681f3Smrg           nir_shader *shader,
9187ec681f3Smrg           struct v3dv_pipeline *pipeline,
9197ec681f3Smrg           const struct v3dv_pipeline_layout *layout)
9207ec681f3Smrg{
9217ec681f3Smrg   nir_builder b;
9227ec681f3Smrg   nir_builder_init(&b, impl);
9237ec681f3Smrg   bool progress = false;
9247ec681f3Smrg
9257ec681f3Smrg   nir_foreach_block(block, impl) {
9267ec681f3Smrg      nir_foreach_instr_safe(instr, block) {
9277ec681f3Smrg         b.cursor = nir_before_instr(instr);
9287ec681f3Smrg         switch (instr->type) {
9297ec681f3Smrg         case nir_instr_type_tex:
9307ec681f3Smrg            progress |=
9317ec681f3Smrg               lower_sampler(&b, nir_instr_as_tex(instr), shader, pipeline, layout);
9327ec681f3Smrg            break;
9337ec681f3Smrg         case nir_instr_type_intrinsic:
9347ec681f3Smrg            progress |=
9357ec681f3Smrg               lower_intrinsic(&b, nir_instr_as_intrinsic(instr), shader,
9367ec681f3Smrg                               pipeline, layout);
9377ec681f3Smrg            break;
9387ec681f3Smrg         default:
9397ec681f3Smrg            break;
9407ec681f3Smrg         }
9417ec681f3Smrg      }
9427ec681f3Smrg   }
9437ec681f3Smrg
9447ec681f3Smrg   return progress;
9457ec681f3Smrg}
9467ec681f3Smrg
9477ec681f3Smrgstatic bool
9487ec681f3Smrglower_pipeline_layout_info(nir_shader *shader,
9497ec681f3Smrg                           struct v3dv_pipeline *pipeline,
9507ec681f3Smrg                           const struct v3dv_pipeline_layout *layout)
9517ec681f3Smrg{
9527ec681f3Smrg   bool progress = false;
9537ec681f3Smrg
9547ec681f3Smrg   nir_foreach_function(function, shader) {
9557ec681f3Smrg      if (function->impl)
9567ec681f3Smrg         progress |= lower_impl(function->impl, shader, pipeline, layout);
9577ec681f3Smrg   }
9587ec681f3Smrg
9597ec681f3Smrg   return progress;
9607ec681f3Smrg}
9617ec681f3Smrg
9627ec681f3Smrg
9637ec681f3Smrgstatic void
9647ec681f3Smrglower_fs_io(nir_shader *nir)
9657ec681f3Smrg{
9667ec681f3Smrg   /* Our backend doesn't handle array fragment shader outputs */
9677ec681f3Smrg   NIR_PASS_V(nir, nir_lower_io_arrays_to_elements_no_indirects, false);
9687ec681f3Smrg   NIR_PASS_V(nir, nir_remove_dead_variables, nir_var_shader_out, NULL);
9697ec681f3Smrg
9707ec681f3Smrg   nir_assign_io_var_locations(nir, nir_var_shader_in, &nir->num_inputs,
9717ec681f3Smrg                               MESA_SHADER_FRAGMENT);
9727ec681f3Smrg
9737ec681f3Smrg   nir_assign_io_var_locations(nir, nir_var_shader_out, &nir->num_outputs,
9747ec681f3Smrg                               MESA_SHADER_FRAGMENT);
9757ec681f3Smrg
9767ec681f3Smrg   NIR_PASS_V(nir, nir_lower_io, nir_var_shader_in | nir_var_shader_out,
9777ec681f3Smrg              type_size_vec4, 0);
9787ec681f3Smrg}
9797ec681f3Smrg
9807ec681f3Smrgstatic void
9817ec681f3Smrglower_gs_io(struct nir_shader *nir)
9827ec681f3Smrg{
9837ec681f3Smrg   NIR_PASS_V(nir, nir_lower_io_arrays_to_elements_no_indirects, false);
9847ec681f3Smrg
9857ec681f3Smrg   nir_assign_io_var_locations(nir, nir_var_shader_in, &nir->num_inputs,
9867ec681f3Smrg                               MESA_SHADER_GEOMETRY);
9877ec681f3Smrg
9887ec681f3Smrg   nir_assign_io_var_locations(nir, nir_var_shader_out, &nir->num_outputs,
9897ec681f3Smrg                               MESA_SHADER_GEOMETRY);
9907ec681f3Smrg}
9917ec681f3Smrg
9927ec681f3Smrgstatic void
9937ec681f3Smrglower_vs_io(struct nir_shader *nir)
9947ec681f3Smrg{
9957ec681f3Smrg   NIR_PASS_V(nir, nir_lower_io_arrays_to_elements_no_indirects, false);
9967ec681f3Smrg
9977ec681f3Smrg   nir_assign_io_var_locations(nir, nir_var_shader_in, &nir->num_inputs,
9987ec681f3Smrg                               MESA_SHADER_VERTEX);
9997ec681f3Smrg
10007ec681f3Smrg   nir_assign_io_var_locations(nir, nir_var_shader_out, &nir->num_outputs,
10017ec681f3Smrg                               MESA_SHADER_VERTEX);
10027ec681f3Smrg
10037ec681f3Smrg   /* FIXME: if we call nir_lower_io, we get a crash later. Likely because it
10047ec681f3Smrg    * overlaps with v3d_nir_lower_io. Need further research though.
10057ec681f3Smrg    */
10067ec681f3Smrg}
10077ec681f3Smrg
10087ec681f3Smrgstatic void
10097ec681f3Smrgshader_debug_output(const char *message, void *data)
10107ec681f3Smrg{
10117ec681f3Smrg   /* FIXME: We probably don't want to debug anything extra here, and in fact
10127ec681f3Smrg    * the compiler is not using this callback too much, only as an alternative
10137ec681f3Smrg    * way to debug out the shaderdb stats, that you can already get using
10147ec681f3Smrg    * V3D_DEBUG=shaderdb. Perhaps it would make sense to revisit the v3d
10157ec681f3Smrg    * compiler to remove that callback.
10167ec681f3Smrg    */
10177ec681f3Smrg}
10187ec681f3Smrg
10197ec681f3Smrgstatic void
10207ec681f3Smrgpipeline_populate_v3d_key(struct v3d_key *key,
10217ec681f3Smrg                          const struct v3dv_pipeline_stage *p_stage,
10227ec681f3Smrg                          uint32_t ucp_enables,
10237ec681f3Smrg                          bool robust_buffer_access)
10247ec681f3Smrg{
10257ec681f3Smrg   assert(p_stage->pipeline->shared_data &&
10267ec681f3Smrg          p_stage->pipeline->shared_data->maps[p_stage->stage]);
10277ec681f3Smrg
10287ec681f3Smrg   /* The following values are default values used at pipeline create. We use
10297ec681f3Smrg    * there 32 bit as default return size.
10307ec681f3Smrg    */
10317ec681f3Smrg   struct v3dv_descriptor_map *sampler_map =
10327ec681f3Smrg      &p_stage->pipeline->shared_data->maps[p_stage->stage]->sampler_map;
10337ec681f3Smrg   struct v3dv_descriptor_map *texture_map =
10347ec681f3Smrg      &p_stage->pipeline->shared_data->maps[p_stage->stage]->texture_map;
10357ec681f3Smrg
10367ec681f3Smrg   key->num_tex_used = texture_map->num_desc;
10377ec681f3Smrg   assert(key->num_tex_used <= V3D_MAX_TEXTURE_SAMPLERS);
10387ec681f3Smrg   for (uint32_t tex_idx = 0; tex_idx < texture_map->num_desc; tex_idx++) {
10397ec681f3Smrg      key->tex[tex_idx].swizzle[0] = PIPE_SWIZZLE_X;
10407ec681f3Smrg      key->tex[tex_idx].swizzle[1] = PIPE_SWIZZLE_Y;
10417ec681f3Smrg      key->tex[tex_idx].swizzle[2] = PIPE_SWIZZLE_Z;
10427ec681f3Smrg      key->tex[tex_idx].swizzle[3] = PIPE_SWIZZLE_W;
10437ec681f3Smrg   }
10447ec681f3Smrg
10457ec681f3Smrg   key->num_samplers_used = sampler_map->num_desc;
10467ec681f3Smrg   assert(key->num_samplers_used <= V3D_MAX_TEXTURE_SAMPLERS);
10477ec681f3Smrg   for (uint32_t sampler_idx = 0; sampler_idx < sampler_map->num_desc;
10487ec681f3Smrg        sampler_idx++) {
10497ec681f3Smrg      key->sampler[sampler_idx].return_size =
10507ec681f3Smrg         sampler_map->return_size[sampler_idx];
10517ec681f3Smrg
10527ec681f3Smrg      key->sampler[sampler_idx].return_channels =
10537ec681f3Smrg         key->sampler[sampler_idx].return_size == 32 ? 4 : 2;
10547ec681f3Smrg   }
10557ec681f3Smrg
10567ec681f3Smrg   switch (p_stage->stage) {
10577ec681f3Smrg   case BROADCOM_SHADER_VERTEX:
10587ec681f3Smrg   case BROADCOM_SHADER_VERTEX_BIN:
10597ec681f3Smrg      key->is_last_geometry_stage = p_stage->pipeline->gs == NULL;
10607ec681f3Smrg      break;
10617ec681f3Smrg   case BROADCOM_SHADER_GEOMETRY:
10627ec681f3Smrg   case BROADCOM_SHADER_GEOMETRY_BIN:
10637ec681f3Smrg      /* FIXME: while we don't implement tessellation shaders */
10647ec681f3Smrg      key->is_last_geometry_stage = true;
10657ec681f3Smrg      break;
10667ec681f3Smrg   case BROADCOM_SHADER_FRAGMENT:
10677ec681f3Smrg   case BROADCOM_SHADER_COMPUTE:
10687ec681f3Smrg      key->is_last_geometry_stage = false;
10697ec681f3Smrg      break;
10707ec681f3Smrg   default:
10717ec681f3Smrg      unreachable("unsupported shader stage");
10727ec681f3Smrg   }
10737ec681f3Smrg
10747ec681f3Smrg   /* Vulkan doesn't have fixed function state for user clip planes. Instead,
10757ec681f3Smrg    * shaders can write to gl_ClipDistance[], in which case the SPIR-V compiler
10767ec681f3Smrg    * takes care of adding a single compact array variable at
10777ec681f3Smrg    * VARYING_SLOT_CLIP_DIST0, so we don't need any user clip plane lowering.
10787ec681f3Smrg    *
10797ec681f3Smrg    * The only lowering we are interested is specific to the fragment shader,
10807ec681f3Smrg    * where we want to emit discards to honor writes to gl_ClipDistance[] in
10817ec681f3Smrg    * previous stages. This is done via nir_lower_clip_fs() so we only set up
10827ec681f3Smrg    * the ucp enable mask for that stage.
10837ec681f3Smrg    */
10847ec681f3Smrg   key->ucp_enables = ucp_enables;
10857ec681f3Smrg
10867ec681f3Smrg   key->robust_buffer_access = robust_buffer_access;
10877ec681f3Smrg
10887ec681f3Smrg   key->environment = V3D_ENVIRONMENT_VULKAN;
10897ec681f3Smrg}
10907ec681f3Smrg
10917ec681f3Smrg/* FIXME: anv maps to hw primitive type. Perhaps eventually we would do the
10927ec681f3Smrg * same. For not using prim_mode that is the one already used on v3d
10937ec681f3Smrg */
10947ec681f3Smrgstatic const enum pipe_prim_type vk_to_pipe_prim_type[] = {
10957ec681f3Smrg   [VK_PRIMITIVE_TOPOLOGY_POINT_LIST] = PIPE_PRIM_POINTS,
10967ec681f3Smrg   [VK_PRIMITIVE_TOPOLOGY_LINE_LIST] = PIPE_PRIM_LINES,
10977ec681f3Smrg   [VK_PRIMITIVE_TOPOLOGY_LINE_STRIP] = PIPE_PRIM_LINE_STRIP,
10987ec681f3Smrg   [VK_PRIMITIVE_TOPOLOGY_TRIANGLE_LIST] = PIPE_PRIM_TRIANGLES,
10997ec681f3Smrg   [VK_PRIMITIVE_TOPOLOGY_TRIANGLE_STRIP] = PIPE_PRIM_TRIANGLE_STRIP,
11007ec681f3Smrg   [VK_PRIMITIVE_TOPOLOGY_TRIANGLE_FAN] = PIPE_PRIM_TRIANGLE_FAN,
11017ec681f3Smrg   [VK_PRIMITIVE_TOPOLOGY_LINE_LIST_WITH_ADJACENCY] = PIPE_PRIM_LINES_ADJACENCY,
11027ec681f3Smrg   [VK_PRIMITIVE_TOPOLOGY_LINE_STRIP_WITH_ADJACENCY] = PIPE_PRIM_LINE_STRIP_ADJACENCY,
11037ec681f3Smrg   [VK_PRIMITIVE_TOPOLOGY_TRIANGLE_LIST_WITH_ADJACENCY] = PIPE_PRIM_TRIANGLES_ADJACENCY,
11047ec681f3Smrg   [VK_PRIMITIVE_TOPOLOGY_TRIANGLE_STRIP_WITH_ADJACENCY] = PIPE_PRIM_TRIANGLE_STRIP_ADJACENCY,
11057ec681f3Smrg};
11067ec681f3Smrg
11077ec681f3Smrgstatic const enum pipe_logicop vk_to_pipe_logicop[] = {
11087ec681f3Smrg   [VK_LOGIC_OP_CLEAR] = PIPE_LOGICOP_CLEAR,
11097ec681f3Smrg   [VK_LOGIC_OP_AND] = PIPE_LOGICOP_AND,
11107ec681f3Smrg   [VK_LOGIC_OP_AND_REVERSE] = PIPE_LOGICOP_AND_REVERSE,
11117ec681f3Smrg   [VK_LOGIC_OP_COPY] = PIPE_LOGICOP_COPY,
11127ec681f3Smrg   [VK_LOGIC_OP_AND_INVERTED] = PIPE_LOGICOP_AND_INVERTED,
11137ec681f3Smrg   [VK_LOGIC_OP_NO_OP] = PIPE_LOGICOP_NOOP,
11147ec681f3Smrg   [VK_LOGIC_OP_XOR] = PIPE_LOGICOP_XOR,
11157ec681f3Smrg   [VK_LOGIC_OP_OR] = PIPE_LOGICOP_OR,
11167ec681f3Smrg   [VK_LOGIC_OP_NOR] = PIPE_LOGICOP_NOR,
11177ec681f3Smrg   [VK_LOGIC_OP_EQUIVALENT] = PIPE_LOGICOP_EQUIV,
11187ec681f3Smrg   [VK_LOGIC_OP_INVERT] = PIPE_LOGICOP_INVERT,
11197ec681f3Smrg   [VK_LOGIC_OP_OR_REVERSE] = PIPE_LOGICOP_OR_REVERSE,
11207ec681f3Smrg   [VK_LOGIC_OP_COPY_INVERTED] = PIPE_LOGICOP_COPY_INVERTED,
11217ec681f3Smrg   [VK_LOGIC_OP_OR_INVERTED] = PIPE_LOGICOP_OR_INVERTED,
11227ec681f3Smrg   [VK_LOGIC_OP_NAND] = PIPE_LOGICOP_NAND,
11237ec681f3Smrg   [VK_LOGIC_OP_SET] = PIPE_LOGICOP_SET,
11247ec681f3Smrg};
11257ec681f3Smrg
11267ec681f3Smrgstatic void
11277ec681f3Smrgpipeline_populate_v3d_fs_key(struct v3d_fs_key *key,
11287ec681f3Smrg                             const VkGraphicsPipelineCreateInfo *pCreateInfo,
11297ec681f3Smrg                             const struct v3dv_pipeline_stage *p_stage,
11307ec681f3Smrg                             bool has_geometry_shader,
11317ec681f3Smrg                             uint32_t ucp_enables)
11327ec681f3Smrg{
11337ec681f3Smrg   assert(p_stage->stage == BROADCOM_SHADER_FRAGMENT);
11347ec681f3Smrg
11357ec681f3Smrg   memset(key, 0, sizeof(*key));
11367ec681f3Smrg
11377ec681f3Smrg   const bool rba = p_stage->pipeline->device->features.robustBufferAccess;
11387ec681f3Smrg   pipeline_populate_v3d_key(&key->base, p_stage, ucp_enables, rba);
11397ec681f3Smrg
11407ec681f3Smrg   const VkPipelineInputAssemblyStateCreateInfo *ia_info =
11417ec681f3Smrg      pCreateInfo->pInputAssemblyState;
11427ec681f3Smrg   uint8_t topology = vk_to_pipe_prim_type[ia_info->topology];
11437ec681f3Smrg
11447ec681f3Smrg   key->is_points = (topology == PIPE_PRIM_POINTS);
11457ec681f3Smrg   key->is_lines = (topology >= PIPE_PRIM_LINES &&
11467ec681f3Smrg                    topology <= PIPE_PRIM_LINE_STRIP);
11477ec681f3Smrg   key->has_gs = has_geometry_shader;
11487ec681f3Smrg
11497ec681f3Smrg   const VkPipelineColorBlendStateCreateInfo *cb_info =
11507ec681f3Smrg      !pCreateInfo->pRasterizationState->rasterizerDiscardEnable ?
11517ec681f3Smrg      pCreateInfo->pColorBlendState : NULL;
11527ec681f3Smrg
11537ec681f3Smrg   key->logicop_func = cb_info && cb_info->logicOpEnable == VK_TRUE ?
11547ec681f3Smrg                       vk_to_pipe_logicop[cb_info->logicOp] :
11557ec681f3Smrg                       PIPE_LOGICOP_COPY;
11567ec681f3Smrg
11577ec681f3Smrg   const bool raster_enabled =
11587ec681f3Smrg      !pCreateInfo->pRasterizationState->rasterizerDiscardEnable;
11597ec681f3Smrg
11607ec681f3Smrg   /* Multisample rasterization state must be ignored if rasterization
11617ec681f3Smrg    * is disabled.
11627ec681f3Smrg    */
11637ec681f3Smrg   const VkPipelineMultisampleStateCreateInfo *ms_info =
11647ec681f3Smrg      raster_enabled ? pCreateInfo->pMultisampleState : NULL;
11657ec681f3Smrg   if (ms_info) {
11667ec681f3Smrg      assert(ms_info->rasterizationSamples == VK_SAMPLE_COUNT_1_BIT ||
11677ec681f3Smrg             ms_info->rasterizationSamples == VK_SAMPLE_COUNT_4_BIT);
11687ec681f3Smrg      key->msaa = ms_info->rasterizationSamples > VK_SAMPLE_COUNT_1_BIT;
11697ec681f3Smrg
11707ec681f3Smrg      if (key->msaa) {
11717ec681f3Smrg         key->sample_coverage =
11727ec681f3Smrg            p_stage->pipeline->sample_mask != (1 << V3D_MAX_SAMPLES) - 1;
11737ec681f3Smrg         key->sample_alpha_to_coverage = ms_info->alphaToCoverageEnable;
11747ec681f3Smrg         key->sample_alpha_to_one = ms_info->alphaToOneEnable;
11757ec681f3Smrg      }
11767ec681f3Smrg   }
11777ec681f3Smrg
11787ec681f3Smrg   /* This is intended for V3D versions before 4.1, otherwise we just use the
11797ec681f3Smrg    * tile buffer load/store swap R/B bit.
11807ec681f3Smrg    */
11817ec681f3Smrg   key->swap_color_rb = 0;
11827ec681f3Smrg
11837ec681f3Smrg   const struct v3dv_render_pass *pass =
11847ec681f3Smrg      v3dv_render_pass_from_handle(pCreateInfo->renderPass);
11857ec681f3Smrg   const struct v3dv_subpass *subpass = p_stage->pipeline->subpass;
11867ec681f3Smrg   for (uint32_t i = 0; i < subpass->color_count; i++) {
11877ec681f3Smrg      const uint32_t att_idx = subpass->color_attachments[i].attachment;
11887ec681f3Smrg      if (att_idx == VK_ATTACHMENT_UNUSED)
11897ec681f3Smrg         continue;
11907ec681f3Smrg
11917ec681f3Smrg      key->cbufs |= 1 << i;
11927ec681f3Smrg
11937ec681f3Smrg      VkFormat fb_format = pass->attachments[att_idx].desc.format;
11947ec681f3Smrg      enum pipe_format fb_pipe_format = vk_format_to_pipe_format(fb_format);
11957ec681f3Smrg
11967ec681f3Smrg      /* If logic operations are enabled then we might emit color reads and we
11977ec681f3Smrg       * need to know the color buffer format and swizzle for that
11987ec681f3Smrg       */
11997ec681f3Smrg      if (key->logicop_func != PIPE_LOGICOP_COPY) {
12007ec681f3Smrg         key->color_fmt[i].format = fb_pipe_format;
12017ec681f3Smrg         key->color_fmt[i].swizzle =
12027ec681f3Smrg            v3dv_get_format_swizzle(p_stage->pipeline->device, fb_format);
12037ec681f3Smrg      }
12047ec681f3Smrg
12057ec681f3Smrg      const struct util_format_description *desc =
12067ec681f3Smrg         vk_format_description(fb_format);
12077ec681f3Smrg
12087ec681f3Smrg      if (desc->channel[0].type == UTIL_FORMAT_TYPE_FLOAT &&
12097ec681f3Smrg          desc->channel[0].size == 32) {
12107ec681f3Smrg         key->f32_color_rb |= 1 << i;
12117ec681f3Smrg      }
12127ec681f3Smrg
12137ec681f3Smrg      if (p_stage->nir->info.fs.untyped_color_outputs) {
12147ec681f3Smrg         if (util_format_is_pure_uint(fb_pipe_format))
12157ec681f3Smrg            key->uint_color_rb |= 1 << i;
12167ec681f3Smrg         else if (util_format_is_pure_sint(fb_pipe_format))
12177ec681f3Smrg            key->int_color_rb |= 1 << i;
12187ec681f3Smrg      }
12197ec681f3Smrg
12207ec681f3Smrg      if (key->is_points) {
12217ec681f3Smrg         /* FIXME: The mask would need to be computed based on the shader
12227ec681f3Smrg          * inputs. On gallium it is done at st_atom_rasterizer
12237ec681f3Smrg          * (sprite_coord_enable). anv seems (need to confirm) to do that on
12247ec681f3Smrg          * genX_pipeline (PointSpriteTextureCoordinateEnable). Would be also
12257ec681f3Smrg          * better to have tests to guide filling the mask.
12267ec681f3Smrg          */
12277ec681f3Smrg         key->point_sprite_mask = 0;
12287ec681f3Smrg
12297ec681f3Smrg         /* Vulkan mandates upper left. */
12307ec681f3Smrg         key->point_coord_upper_left = true;
12317ec681f3Smrg      }
12327ec681f3Smrg   }
12337ec681f3Smrg}
12347ec681f3Smrg
12357ec681f3Smrgstatic void
12367ec681f3Smrgsetup_stage_outputs_from_next_stage_inputs(
12377ec681f3Smrg   uint8_t next_stage_num_inputs,
12387ec681f3Smrg   struct v3d_varying_slot *next_stage_input_slots,
12397ec681f3Smrg   uint8_t *num_used_outputs,
12407ec681f3Smrg   struct v3d_varying_slot *used_output_slots,
12417ec681f3Smrg   uint32_t size_of_used_output_slots)
12427ec681f3Smrg{
12437ec681f3Smrg   *num_used_outputs = next_stage_num_inputs;
12447ec681f3Smrg   memcpy(used_output_slots, next_stage_input_slots, size_of_used_output_slots);
12457ec681f3Smrg}
12467ec681f3Smrg
12477ec681f3Smrgstatic void
12487ec681f3Smrgpipeline_populate_v3d_gs_key(struct v3d_gs_key *key,
12497ec681f3Smrg                             const VkGraphicsPipelineCreateInfo *pCreateInfo,
12507ec681f3Smrg                             const struct v3dv_pipeline_stage *p_stage)
12517ec681f3Smrg{
12527ec681f3Smrg   assert(p_stage->stage == BROADCOM_SHADER_GEOMETRY ||
12537ec681f3Smrg          p_stage->stage == BROADCOM_SHADER_GEOMETRY_BIN);
12547ec681f3Smrg
12557ec681f3Smrg   memset(key, 0, sizeof(*key));
12567ec681f3Smrg
12577ec681f3Smrg   const bool rba = p_stage->pipeline->device->features.robustBufferAccess;
12587ec681f3Smrg   pipeline_populate_v3d_key(&key->base, p_stage, 0, rba);
12597ec681f3Smrg
12607ec681f3Smrg   struct v3dv_pipeline *pipeline = p_stage->pipeline;
12617ec681f3Smrg
12627ec681f3Smrg   key->per_vertex_point_size =
12637ec681f3Smrg      p_stage->nir->info.outputs_written & (1ull << VARYING_SLOT_PSIZ);
12647ec681f3Smrg
12657ec681f3Smrg   key->is_coord = broadcom_shader_stage_is_binning(p_stage->stage);
12667ec681f3Smrg
12677ec681f3Smrg   assert(key->base.is_last_geometry_stage);
12687ec681f3Smrg   if (key->is_coord) {
12697ec681f3Smrg      /* Output varyings in the last binning shader are only used for transform
12707ec681f3Smrg       * feedback. Set to 0 as VK_EXT_transform_feedback is not supported.
12717ec681f3Smrg       */
12727ec681f3Smrg      key->num_used_outputs = 0;
12737ec681f3Smrg   } else {
12747ec681f3Smrg      struct v3dv_shader_variant *fs_variant =
12757ec681f3Smrg         pipeline->shared_data->variants[BROADCOM_SHADER_FRAGMENT];
12767ec681f3Smrg
12777ec681f3Smrg      STATIC_ASSERT(sizeof(key->used_outputs) ==
12787ec681f3Smrg                    sizeof(fs_variant->prog_data.fs->input_slots));
12797ec681f3Smrg
12807ec681f3Smrg      setup_stage_outputs_from_next_stage_inputs(
12817ec681f3Smrg         fs_variant->prog_data.fs->num_inputs,
12827ec681f3Smrg         fs_variant->prog_data.fs->input_slots,
12837ec681f3Smrg         &key->num_used_outputs,
12847ec681f3Smrg         key->used_outputs,
12857ec681f3Smrg         sizeof(key->used_outputs));
12867ec681f3Smrg   }
12877ec681f3Smrg}
12887ec681f3Smrg
12897ec681f3Smrgstatic void
12907ec681f3Smrgpipeline_populate_v3d_vs_key(struct v3d_vs_key *key,
12917ec681f3Smrg                             const VkGraphicsPipelineCreateInfo *pCreateInfo,
12927ec681f3Smrg                             const struct v3dv_pipeline_stage *p_stage)
12937ec681f3Smrg{
12947ec681f3Smrg   assert(p_stage->stage == BROADCOM_SHADER_VERTEX ||
12957ec681f3Smrg          p_stage->stage == BROADCOM_SHADER_VERTEX_BIN);
12967ec681f3Smrg
12977ec681f3Smrg   memset(key, 0, sizeof(*key));
12987ec681f3Smrg
12997ec681f3Smrg   const bool rba = p_stage->pipeline->device->features.robustBufferAccess;
13007ec681f3Smrg   pipeline_populate_v3d_key(&key->base, p_stage, 0, rba);
13017ec681f3Smrg
13027ec681f3Smrg   struct v3dv_pipeline *pipeline = p_stage->pipeline;
13037ec681f3Smrg
13047ec681f3Smrg   /* Vulkan specifies a point size per vertex, so true for if the prim are
13057ec681f3Smrg    * points, like on ES2)
13067ec681f3Smrg    */
13077ec681f3Smrg   const VkPipelineInputAssemblyStateCreateInfo *ia_info =
13087ec681f3Smrg      pCreateInfo->pInputAssemblyState;
13097ec681f3Smrg   uint8_t topology = vk_to_pipe_prim_type[ia_info->topology];
13107ec681f3Smrg
13117ec681f3Smrg   /* FIXME: PRIM_POINTS is not enough, in gallium the full check is
13127ec681f3Smrg    * PIPE_PRIM_POINTS && v3d->rasterizer->base.point_size_per_vertex */
13137ec681f3Smrg   key->per_vertex_point_size = (topology == PIPE_PRIM_POINTS);
13147ec681f3Smrg
13157ec681f3Smrg   key->is_coord = broadcom_shader_stage_is_binning(p_stage->stage);
13167ec681f3Smrg
13177ec681f3Smrg   if (key->is_coord) { /* Binning VS*/
13187ec681f3Smrg      if (key->base.is_last_geometry_stage) {
13197ec681f3Smrg         /* Output varyings in the last binning shader are only used for
13207ec681f3Smrg          * transform feedback. Set to 0 as VK_EXT_transform_feedback is not
13217ec681f3Smrg          * supported.
13227ec681f3Smrg          */
13237ec681f3Smrg         key->num_used_outputs = 0;
13247ec681f3Smrg      } else {
13257ec681f3Smrg         /* Linking against GS binning program */
13267ec681f3Smrg         assert(pipeline->gs);
13277ec681f3Smrg         struct v3dv_shader_variant *gs_bin_variant =
13287ec681f3Smrg            pipeline->shared_data->variants[BROADCOM_SHADER_GEOMETRY_BIN];
13297ec681f3Smrg
13307ec681f3Smrg         STATIC_ASSERT(sizeof(key->used_outputs) ==
13317ec681f3Smrg                       sizeof(gs_bin_variant->prog_data.gs->input_slots));
13327ec681f3Smrg
13337ec681f3Smrg         setup_stage_outputs_from_next_stage_inputs(
13347ec681f3Smrg            gs_bin_variant->prog_data.gs->num_inputs,
13357ec681f3Smrg            gs_bin_variant->prog_data.gs->input_slots,
13367ec681f3Smrg            &key->num_used_outputs,
13377ec681f3Smrg            key->used_outputs,
13387ec681f3Smrg            sizeof(key->used_outputs));
13397ec681f3Smrg      }
13407ec681f3Smrg   } else { /* Render VS */
13417ec681f3Smrg      if (pipeline->gs) {
13427ec681f3Smrg         /* Linking against GS render program */
13437ec681f3Smrg         struct v3dv_shader_variant *gs_variant =
13447ec681f3Smrg            pipeline->shared_data->variants[BROADCOM_SHADER_GEOMETRY];
13457ec681f3Smrg
13467ec681f3Smrg         STATIC_ASSERT(sizeof(key->used_outputs) ==
13477ec681f3Smrg                       sizeof(gs_variant->prog_data.gs->input_slots));
13487ec681f3Smrg
13497ec681f3Smrg         setup_stage_outputs_from_next_stage_inputs(
13507ec681f3Smrg            gs_variant->prog_data.gs->num_inputs,
13517ec681f3Smrg            gs_variant->prog_data.gs->input_slots,
13527ec681f3Smrg            &key->num_used_outputs,
13537ec681f3Smrg            key->used_outputs,
13547ec681f3Smrg            sizeof(key->used_outputs));
13557ec681f3Smrg      } else {
13567ec681f3Smrg         /* Linking against FS program */
13577ec681f3Smrg         struct v3dv_shader_variant *fs_variant =
13587ec681f3Smrg            pipeline->shared_data->variants[BROADCOM_SHADER_FRAGMENT];
13597ec681f3Smrg
13607ec681f3Smrg         STATIC_ASSERT(sizeof(key->used_outputs) ==
13617ec681f3Smrg                       sizeof(fs_variant->prog_data.fs->input_slots));
13627ec681f3Smrg
13637ec681f3Smrg         setup_stage_outputs_from_next_stage_inputs(
13647ec681f3Smrg            fs_variant->prog_data.fs->num_inputs,
13657ec681f3Smrg            fs_variant->prog_data.fs->input_slots,
13667ec681f3Smrg            &key->num_used_outputs,
13677ec681f3Smrg            key->used_outputs,
13687ec681f3Smrg            sizeof(key->used_outputs));
13697ec681f3Smrg      }
13707ec681f3Smrg   }
13717ec681f3Smrg
13727ec681f3Smrg   const VkPipelineVertexInputStateCreateInfo *vi_info =
13737ec681f3Smrg      pCreateInfo->pVertexInputState;
13747ec681f3Smrg   for (uint32_t i = 0; i < vi_info->vertexAttributeDescriptionCount; i++) {
13757ec681f3Smrg      const VkVertexInputAttributeDescription *desc =
13767ec681f3Smrg         &vi_info->pVertexAttributeDescriptions[i];
13777ec681f3Smrg      assert(desc->location < MAX_VERTEX_ATTRIBS);
13787ec681f3Smrg      if (desc->format == VK_FORMAT_B8G8R8A8_UNORM)
13797ec681f3Smrg         key->va_swap_rb_mask |= 1 << (VERT_ATTRIB_GENERIC0 + desc->location);
13807ec681f3Smrg   }
13817ec681f3Smrg}
13827ec681f3Smrg
13837ec681f3Smrg/**
13847ec681f3Smrg * Creates the initial form of the pipeline stage for a binning shader by
13857ec681f3Smrg * cloning the render shader and flagging it as a coordinate shader.
13867ec681f3Smrg *
13877ec681f3Smrg * Returns NULL if it was not able to allocate the object, so it should be
13887ec681f3Smrg * handled as a VK_ERROR_OUT_OF_HOST_MEMORY error.
13897ec681f3Smrg */
13907ec681f3Smrgstatic struct v3dv_pipeline_stage *
13917ec681f3Smrgpipeline_stage_create_binning(const struct v3dv_pipeline_stage *src,
13927ec681f3Smrg                              const VkAllocationCallbacks *pAllocator)
13937ec681f3Smrg{
13947ec681f3Smrg   struct v3dv_device *device = src->pipeline->device;
13957ec681f3Smrg
13967ec681f3Smrg   struct v3dv_pipeline_stage *p_stage =
13977ec681f3Smrg      vk_zalloc2(&device->vk.alloc, pAllocator, sizeof(*p_stage), 8,
13987ec681f3Smrg                 VK_SYSTEM_ALLOCATION_SCOPE_OBJECT);
13997ec681f3Smrg
14007ec681f3Smrg   if (p_stage == NULL)
14017ec681f3Smrg      return NULL;
14027ec681f3Smrg
14037ec681f3Smrg   assert(src->stage == BROADCOM_SHADER_VERTEX ||
14047ec681f3Smrg          src->stage == BROADCOM_SHADER_GEOMETRY);
14057ec681f3Smrg
14067ec681f3Smrg   enum broadcom_shader_stage bin_stage =
14077ec681f3Smrg      src->stage == BROADCOM_SHADER_VERTEX ?
14087ec681f3Smrg         BROADCOM_SHADER_VERTEX_BIN :
14097ec681f3Smrg         BROADCOM_SHADER_GEOMETRY_BIN;
14107ec681f3Smrg
14117ec681f3Smrg   p_stage->pipeline = src->pipeline;
14127ec681f3Smrg   p_stage->stage = bin_stage;
14137ec681f3Smrg   p_stage->entrypoint = src->entrypoint;
14147ec681f3Smrg   p_stage->module = src->module;
14157ec681f3Smrg   /* For binning shaders we will clone the NIR code from the corresponding
14167ec681f3Smrg    * render shader later, when we call pipeline_compile_xxx_shader. This way
14177ec681f3Smrg    * we only have to run the relevant NIR lowerings once for render shaders
14187ec681f3Smrg    */
14197ec681f3Smrg   p_stage->nir = NULL;
14207ec681f3Smrg   p_stage->spec_info = src->spec_info;
14217ec681f3Smrg   p_stage->feedback = (VkPipelineCreationFeedbackEXT) { 0 };
14227ec681f3Smrg   memcpy(p_stage->shader_sha1, src->shader_sha1, 20);
14237ec681f3Smrg
14247ec681f3Smrg   return p_stage;
14257ec681f3Smrg}
14267ec681f3Smrg
14277ec681f3Smrg/**
14287ec681f3Smrg * Returns false if it was not able to allocate or map the assembly bo memory.
14297ec681f3Smrg */
14307ec681f3Smrgstatic bool
14317ec681f3Smrgupload_assembly(struct v3dv_pipeline *pipeline)
14327ec681f3Smrg{
14337ec681f3Smrg   uint32_t total_size = 0;
14347ec681f3Smrg   for (uint8_t stage = 0; stage < BROADCOM_SHADER_STAGES; stage++) {
14357ec681f3Smrg      struct v3dv_shader_variant *variant =
14367ec681f3Smrg         pipeline->shared_data->variants[stage];
14377ec681f3Smrg
14387ec681f3Smrg      if (variant != NULL)
14397ec681f3Smrg         total_size += variant->qpu_insts_size;
14407ec681f3Smrg   }
14417ec681f3Smrg
14427ec681f3Smrg   struct v3dv_bo *bo = v3dv_bo_alloc(pipeline->device, total_size,
14437ec681f3Smrg                                      "pipeline shader assembly", true);
14447ec681f3Smrg   if (!bo) {
14457ec681f3Smrg      fprintf(stderr, "failed to allocate memory for shader\n");
14467ec681f3Smrg      return false;
14477ec681f3Smrg   }
14487ec681f3Smrg
14497ec681f3Smrg   bool ok = v3dv_bo_map(pipeline->device, bo, total_size);
14507ec681f3Smrg   if (!ok) {
14517ec681f3Smrg      fprintf(stderr, "failed to map source shader buffer\n");
14527ec681f3Smrg      return false;
14537ec681f3Smrg   }
14547ec681f3Smrg
14557ec681f3Smrg   uint32_t offset = 0;
14567ec681f3Smrg   for (uint8_t stage = 0; stage < BROADCOM_SHADER_STAGES; stage++) {
14577ec681f3Smrg      struct v3dv_shader_variant *variant =
14587ec681f3Smrg         pipeline->shared_data->variants[stage];
14597ec681f3Smrg
14607ec681f3Smrg      if (variant != NULL) {
14617ec681f3Smrg         variant->assembly_offset = offset;
14627ec681f3Smrg
14637ec681f3Smrg         memcpy(bo->map + offset, variant->qpu_insts, variant->qpu_insts_size);
14647ec681f3Smrg         offset += variant->qpu_insts_size;
14657ec681f3Smrg
14667ec681f3Smrg         /* We dont need qpu_insts anymore. */
14677ec681f3Smrg         free(variant->qpu_insts);
14687ec681f3Smrg         variant->qpu_insts = NULL;
14697ec681f3Smrg      }
14707ec681f3Smrg   }
14717ec681f3Smrg   assert(total_size == offset);
14727ec681f3Smrg
14737ec681f3Smrg   pipeline->shared_data->assembly_bo = bo;
14747ec681f3Smrg
14757ec681f3Smrg   return true;
14767ec681f3Smrg}
14777ec681f3Smrg
14787ec681f3Smrgstatic void
14797ec681f3Smrgpipeline_hash_graphics(const struct v3dv_pipeline *pipeline,
14807ec681f3Smrg                       struct v3dv_pipeline_key *key,
14817ec681f3Smrg                       unsigned char *sha1_out)
14827ec681f3Smrg{
14837ec681f3Smrg   struct mesa_sha1 ctx;
14847ec681f3Smrg   _mesa_sha1_init(&ctx);
14857ec681f3Smrg
14867ec681f3Smrg   /* We need to include all shader stages in the sha1 key as linking may modify
14877ec681f3Smrg    * the shader code in any stage. An alternative would be to use the
14887ec681f3Smrg    * serialized NIR, but that seems like an overkill.
14897ec681f3Smrg    */
14907ec681f3Smrg   _mesa_sha1_update(&ctx, pipeline->vs->shader_sha1,
14917ec681f3Smrg                     sizeof(pipeline->vs->shader_sha1));
14927ec681f3Smrg
14937ec681f3Smrg   if (pipeline->gs) {
14947ec681f3Smrg      _mesa_sha1_update(&ctx, pipeline->gs->shader_sha1,
14957ec681f3Smrg                        sizeof(pipeline->gs->shader_sha1));
14967ec681f3Smrg   }
14977ec681f3Smrg
14987ec681f3Smrg   _mesa_sha1_update(&ctx, pipeline->fs->shader_sha1,
14997ec681f3Smrg                     sizeof(pipeline->fs->shader_sha1));
15007ec681f3Smrg
15017ec681f3Smrg   _mesa_sha1_update(&ctx, key, sizeof(struct v3dv_pipeline_key));
15027ec681f3Smrg
15037ec681f3Smrg   _mesa_sha1_final(&ctx, sha1_out);
15047ec681f3Smrg}
15057ec681f3Smrg
15067ec681f3Smrgstatic void
15077ec681f3Smrgpipeline_hash_compute(const struct v3dv_pipeline *pipeline,
15087ec681f3Smrg                      struct v3dv_pipeline_key *key,
15097ec681f3Smrg                      unsigned char *sha1_out)
15107ec681f3Smrg{
15117ec681f3Smrg   struct mesa_sha1 ctx;
15127ec681f3Smrg   _mesa_sha1_init(&ctx);
15137ec681f3Smrg
15147ec681f3Smrg   _mesa_sha1_update(&ctx, pipeline->cs->shader_sha1,
15157ec681f3Smrg                     sizeof(pipeline->cs->shader_sha1));
15167ec681f3Smrg
15177ec681f3Smrg   _mesa_sha1_update(&ctx, key, sizeof(struct v3dv_pipeline_key));
15187ec681f3Smrg
15197ec681f3Smrg   _mesa_sha1_final(&ctx, sha1_out);
15207ec681f3Smrg}
15217ec681f3Smrg
15227ec681f3Smrg/* Checks that the pipeline has enough spill size to use for any of their
15237ec681f3Smrg * variants
15247ec681f3Smrg */
15257ec681f3Smrgstatic void
15267ec681f3Smrgpipeline_check_spill_size(struct v3dv_pipeline *pipeline)
15277ec681f3Smrg{
15287ec681f3Smrg   uint32_t max_spill_size = 0;
15297ec681f3Smrg
15307ec681f3Smrg   for(uint8_t stage = 0; stage < BROADCOM_SHADER_STAGES; stage++) {
15317ec681f3Smrg      struct v3dv_shader_variant *variant =
15327ec681f3Smrg         pipeline->shared_data->variants[stage];
15337ec681f3Smrg
15347ec681f3Smrg      if (variant != NULL) {
15357ec681f3Smrg         max_spill_size = MAX2(variant->prog_data.base->spill_size,
15367ec681f3Smrg                               max_spill_size);
15377ec681f3Smrg      }
15387ec681f3Smrg   }
15397ec681f3Smrg
15407ec681f3Smrg   if (max_spill_size > 0) {
15417ec681f3Smrg      struct v3dv_device *device = pipeline->device;
15427ec681f3Smrg
15437ec681f3Smrg      /* The TIDX register we use for choosing the area to access
15447ec681f3Smrg       * for scratch space is: (core << 6) | (qpu << 2) | thread.
15457ec681f3Smrg       * Even at minimum threadcount in a particular shader, that
15467ec681f3Smrg       * means we still multiply by qpus by 4.
15477ec681f3Smrg       */
15487ec681f3Smrg      const uint32_t total_spill_size =
15497ec681f3Smrg         4 * device->devinfo.qpu_count * max_spill_size;
15507ec681f3Smrg      if (pipeline->spill.bo) {
15517ec681f3Smrg         assert(pipeline->spill.size_per_thread > 0);
15527ec681f3Smrg         v3dv_bo_free(device, pipeline->spill.bo);
15537ec681f3Smrg      }
15547ec681f3Smrg      pipeline->spill.bo =
15557ec681f3Smrg         v3dv_bo_alloc(device, total_spill_size, "spill", true);
15567ec681f3Smrg      pipeline->spill.size_per_thread = max_spill_size;
15577ec681f3Smrg   }
15587ec681f3Smrg}
15597ec681f3Smrg
15607ec681f3Smrg/**
15617ec681f3Smrg * Creates a new shader_variant_create. Note that for prog_data is not const,
15627ec681f3Smrg * so it is assumed that the caller will prove a pointer that the
15637ec681f3Smrg * shader_variant will own.
15647ec681f3Smrg *
15657ec681f3Smrg * Creation doesn't include allocate a BD to store the content of qpu_insts,
15667ec681f3Smrg * as we will try to share the same bo for several shader variants. Also note
15677ec681f3Smrg * that qpu_ints being NULL is valid, for example if we are creating the
15687ec681f3Smrg * shader_variants from the cache, so we can just upload the assembly of all
15697ec681f3Smrg * the shader stages at once.
15707ec681f3Smrg */
15717ec681f3Smrgstruct v3dv_shader_variant *
15727ec681f3Smrgv3dv_shader_variant_create(struct v3dv_device *device,
15737ec681f3Smrg                           enum broadcom_shader_stage stage,
15747ec681f3Smrg                           struct v3d_prog_data *prog_data,
15757ec681f3Smrg                           uint32_t prog_data_size,
15767ec681f3Smrg                           uint32_t assembly_offset,
15777ec681f3Smrg                           uint64_t *qpu_insts,
15787ec681f3Smrg                           uint32_t qpu_insts_size,
15797ec681f3Smrg                           VkResult *out_vk_result)
15807ec681f3Smrg{
15817ec681f3Smrg   struct v3dv_shader_variant *variant =
15827ec681f3Smrg      vk_zalloc(&device->vk.alloc, sizeof(*variant), 8,
15837ec681f3Smrg                VK_SYSTEM_ALLOCATION_SCOPE_OBJECT);
15847ec681f3Smrg
15857ec681f3Smrg   if (variant == NULL) {
15867ec681f3Smrg      *out_vk_result = VK_ERROR_OUT_OF_HOST_MEMORY;
15877ec681f3Smrg      return NULL;
15887ec681f3Smrg   }
15897ec681f3Smrg
15907ec681f3Smrg   variant->stage = stage;
15917ec681f3Smrg   variant->prog_data_size = prog_data_size;
15927ec681f3Smrg   variant->prog_data.base = prog_data;
15937ec681f3Smrg
15947ec681f3Smrg   variant->assembly_offset = assembly_offset;
15957ec681f3Smrg   variant->qpu_insts_size = qpu_insts_size;
15967ec681f3Smrg   variant->qpu_insts = qpu_insts;
15977ec681f3Smrg
15987ec681f3Smrg   *out_vk_result = VK_SUCCESS;
15997ec681f3Smrg
16007ec681f3Smrg   return variant;
16017ec681f3Smrg}
16027ec681f3Smrg
16037ec681f3Smrg/* For a given key, it returns the compiled version of the shader.  Returns a
16047ec681f3Smrg * new reference to the shader_variant to the caller, or NULL.
16057ec681f3Smrg *
16067ec681f3Smrg * If the method returns NULL it means that something wrong happened:
16077ec681f3Smrg *   * Not enough memory: this is one of the possible outcomes defined by
16087ec681f3Smrg *     vkCreateXXXPipelines. out_vk_result will return the proper oom error.
16097ec681f3Smrg *   * Compilation error: hypothetically this shouldn't happen, as the spec
16107ec681f3Smrg *     states that vkShaderModule needs to be created with a valid SPIR-V, so
16117ec681f3Smrg *     any compilation failure is a driver bug. In the practice, something as
16127ec681f3Smrg *     common as failing to register allocate can lead to a compilation
16137ec681f3Smrg *     failure. In that case the only option (for any driver) is
16147ec681f3Smrg *     VK_ERROR_UNKNOWN, even if we know that the problem was a compiler
16157ec681f3Smrg *     error.
16167ec681f3Smrg */
16177ec681f3Smrgstatic struct v3dv_shader_variant *
16187ec681f3Smrgpipeline_compile_shader_variant(struct v3dv_pipeline_stage *p_stage,
16197ec681f3Smrg                                struct v3d_key *key,
16207ec681f3Smrg                                size_t key_size,
16217ec681f3Smrg                                const VkAllocationCallbacks *pAllocator,
16227ec681f3Smrg                                VkResult *out_vk_result)
16237ec681f3Smrg{
16247ec681f3Smrg   int64_t stage_start = os_time_get_nano();
16257ec681f3Smrg
16267ec681f3Smrg   struct v3dv_pipeline *pipeline = p_stage->pipeline;
16277ec681f3Smrg   struct v3dv_physical_device *physical_device =
16287ec681f3Smrg      &pipeline->device->instance->physicalDevice;
16297ec681f3Smrg   const struct v3d_compiler *compiler = physical_device->compiler;
16307ec681f3Smrg
16317ec681f3Smrg   if (unlikely(V3D_DEBUG & (V3D_DEBUG_NIR |
16327ec681f3Smrg                             v3d_debug_flag_for_shader_stage
16337ec681f3Smrg                             (broadcom_shader_stage_to_gl(p_stage->stage))))) {
16347ec681f3Smrg      fprintf(stderr, "Just before v3d_compile: %s prog %d NIR:\n",
16357ec681f3Smrg              broadcom_shader_stage_name(p_stage->stage),
16367ec681f3Smrg              p_stage->program_id);
16377ec681f3Smrg      nir_print_shader(p_stage->nir, stderr);
16387ec681f3Smrg      fprintf(stderr, "\n");
16397ec681f3Smrg   }
16407ec681f3Smrg
16417ec681f3Smrg   uint64_t *qpu_insts;
16427ec681f3Smrg   uint32_t qpu_insts_size;
16437ec681f3Smrg   struct v3d_prog_data *prog_data;
16447ec681f3Smrg   uint32_t prog_data_size =
16457ec681f3Smrg      v3d_prog_data_size(broadcom_shader_stage_to_gl(p_stage->stage));
16467ec681f3Smrg
16477ec681f3Smrg   qpu_insts = v3d_compile(compiler,
16487ec681f3Smrg                           key, &prog_data,
16497ec681f3Smrg                           p_stage->nir,
16507ec681f3Smrg                           shader_debug_output, NULL,
16517ec681f3Smrg                           p_stage->program_id, 0,
16527ec681f3Smrg                           &qpu_insts_size);
16537ec681f3Smrg
16547ec681f3Smrg   struct v3dv_shader_variant *variant = NULL;
16557ec681f3Smrg
16567ec681f3Smrg   if (!qpu_insts) {
16577ec681f3Smrg      fprintf(stderr, "Failed to compile %s prog %d NIR to VIR\n",
16587ec681f3Smrg              gl_shader_stage_name(p_stage->stage),
16597ec681f3Smrg              p_stage->program_id);
16607ec681f3Smrg      *out_vk_result = VK_ERROR_UNKNOWN;
16617ec681f3Smrg   } else {
16627ec681f3Smrg      variant =
16637ec681f3Smrg         v3dv_shader_variant_create(pipeline->device, p_stage->stage,
16647ec681f3Smrg                                    prog_data, prog_data_size,
16657ec681f3Smrg                                    0, /* assembly_offset, no final value yet */
16667ec681f3Smrg                                    qpu_insts, qpu_insts_size,
16677ec681f3Smrg                                    out_vk_result);
16687ec681f3Smrg   }
16697ec681f3Smrg   /* At this point we don't need anymore the nir shader, but we are freeing
16707ec681f3Smrg    * all the temporary p_stage structs used during the pipeline creation when
16717ec681f3Smrg    * we finish it, so let's not worry about freeing the nir here.
16727ec681f3Smrg    */
16737ec681f3Smrg
16747ec681f3Smrg   p_stage->feedback.duration += os_time_get_nano() - stage_start;
16757ec681f3Smrg
16767ec681f3Smrg   return variant;
16777ec681f3Smrg}
16787ec681f3Smrg
16797ec681f3Smrg/* FIXME: C&P from st, common place? */
16807ec681f3Smrgstatic void
16817ec681f3Smrgst_nir_opts(nir_shader *nir)
16827ec681f3Smrg{
16837ec681f3Smrg   bool progress;
16847ec681f3Smrg
16857ec681f3Smrg   do {
16867ec681f3Smrg      progress = false;
16877ec681f3Smrg
16887ec681f3Smrg      NIR_PASS_V(nir, nir_lower_vars_to_ssa);
16897ec681f3Smrg
16907ec681f3Smrg      /* Linking deals with unused inputs/outputs, but here we can remove
16917ec681f3Smrg       * things local to the shader in the hopes that we can cleanup other
16927ec681f3Smrg       * things. This pass will also remove variables with only stores, so we
16937ec681f3Smrg       * might be able to make progress after it.
16947ec681f3Smrg       */
16957ec681f3Smrg      NIR_PASS(progress, nir, nir_remove_dead_variables,
16967ec681f3Smrg               (nir_variable_mode)(nir_var_function_temp |
16977ec681f3Smrg                                   nir_var_shader_temp |
16987ec681f3Smrg                                   nir_var_mem_shared),
16997ec681f3Smrg               NULL);
17007ec681f3Smrg
17017ec681f3Smrg      NIR_PASS(progress, nir, nir_opt_copy_prop_vars);
17027ec681f3Smrg      NIR_PASS(progress, nir, nir_opt_dead_write_vars);
17037ec681f3Smrg
17047ec681f3Smrg      if (nir->options->lower_to_scalar) {
17057ec681f3Smrg         NIR_PASS_V(nir, nir_lower_alu_to_scalar, NULL, NULL);
17067ec681f3Smrg         NIR_PASS_V(nir, nir_lower_phis_to_scalar, false);
17077ec681f3Smrg      }
17087ec681f3Smrg
17097ec681f3Smrg      NIR_PASS_V(nir, nir_lower_alu);
17107ec681f3Smrg      NIR_PASS_V(nir, nir_lower_pack);
17117ec681f3Smrg      NIR_PASS(progress, nir, nir_copy_prop);
17127ec681f3Smrg      NIR_PASS(progress, nir, nir_opt_remove_phis);
17137ec681f3Smrg      NIR_PASS(progress, nir, nir_opt_dce);
17147ec681f3Smrg      if (nir_opt_trivial_continues(nir)) {
17157ec681f3Smrg         progress = true;
17167ec681f3Smrg         NIR_PASS(progress, nir, nir_copy_prop);
17177ec681f3Smrg         NIR_PASS(progress, nir, nir_opt_dce);
17187ec681f3Smrg      }
17197ec681f3Smrg      NIR_PASS(progress, nir, nir_opt_if, false);
17207ec681f3Smrg      NIR_PASS(progress, nir, nir_opt_dead_cf);
17217ec681f3Smrg      NIR_PASS(progress, nir, nir_opt_cse);
17227ec681f3Smrg      NIR_PASS(progress, nir, nir_opt_peephole_select, 8, true, true);
17237ec681f3Smrg
17247ec681f3Smrg      NIR_PASS(progress, nir, nir_opt_algebraic);
17257ec681f3Smrg      NIR_PASS(progress, nir, nir_opt_constant_folding);
17267ec681f3Smrg
17277ec681f3Smrg      NIR_PASS(progress, nir, nir_opt_undef);
17287ec681f3Smrg      NIR_PASS(progress, nir, nir_opt_conditional_discard);
17297ec681f3Smrg   } while (progress);
17307ec681f3Smrg}
17317ec681f3Smrg
17327ec681f3Smrgstatic void
17337ec681f3Smrglink_shaders(nir_shader *producer, nir_shader *consumer)
17347ec681f3Smrg{
17357ec681f3Smrg   assert(producer);
17367ec681f3Smrg   assert(consumer);
17377ec681f3Smrg
17387ec681f3Smrg   if (producer->options->lower_to_scalar) {
17397ec681f3Smrg      NIR_PASS_V(producer, nir_lower_io_to_scalar_early, nir_var_shader_out);
17407ec681f3Smrg      NIR_PASS_V(consumer, nir_lower_io_to_scalar_early, nir_var_shader_in);
17417ec681f3Smrg   }
17427ec681f3Smrg
17437ec681f3Smrg   nir_lower_io_arrays_to_elements(producer, consumer);
17447ec681f3Smrg
17457ec681f3Smrg   st_nir_opts(producer);
17467ec681f3Smrg   st_nir_opts(consumer);
17477ec681f3Smrg
17487ec681f3Smrg   if (nir_link_opt_varyings(producer, consumer))
17497ec681f3Smrg      st_nir_opts(consumer);
17507ec681f3Smrg
17517ec681f3Smrg   NIR_PASS_V(producer, nir_remove_dead_variables, nir_var_shader_out, NULL);
17527ec681f3Smrg   NIR_PASS_V(consumer, nir_remove_dead_variables, nir_var_shader_in, NULL);
17537ec681f3Smrg
17547ec681f3Smrg   if (nir_remove_unused_varyings(producer, consumer)) {
17557ec681f3Smrg      NIR_PASS_V(producer, nir_lower_global_vars_to_local);
17567ec681f3Smrg      NIR_PASS_V(consumer, nir_lower_global_vars_to_local);
17577ec681f3Smrg
17587ec681f3Smrg      st_nir_opts(producer);
17597ec681f3Smrg      st_nir_opts(consumer);
17607ec681f3Smrg
17617ec681f3Smrg      /* Optimizations can cause varyings to become unused.
17627ec681f3Smrg       * nir_compact_varyings() depends on all dead varyings being removed so
17637ec681f3Smrg       * we need to call nir_remove_dead_variables() again here.
17647ec681f3Smrg       */
17657ec681f3Smrg      NIR_PASS_V(producer, nir_remove_dead_variables, nir_var_shader_out, NULL);
17667ec681f3Smrg      NIR_PASS_V(consumer, nir_remove_dead_variables, nir_var_shader_in, NULL);
17677ec681f3Smrg   }
17687ec681f3Smrg}
17697ec681f3Smrg
17707ec681f3Smrgstatic void
17717ec681f3Smrgpipeline_lower_nir(struct v3dv_pipeline *pipeline,
17727ec681f3Smrg                   struct v3dv_pipeline_stage *p_stage,
17737ec681f3Smrg                   struct v3dv_pipeline_layout *layout)
17747ec681f3Smrg{
17757ec681f3Smrg   int64_t stage_start = os_time_get_nano();
17767ec681f3Smrg
17777ec681f3Smrg   assert(pipeline->shared_data &&
17787ec681f3Smrg          pipeline->shared_data->maps[p_stage->stage]);
17797ec681f3Smrg
17807ec681f3Smrg   nir_shader_gather_info(p_stage->nir, nir_shader_get_entrypoint(p_stage->nir));
17817ec681f3Smrg
17827ec681f3Smrg   /* We add this because we need a valid sampler for nir_lower_tex to do
17837ec681f3Smrg    * unpacking of the texture operation result, even for the case where there
17847ec681f3Smrg    * is no sampler state.
17857ec681f3Smrg    *
17867ec681f3Smrg    * We add two of those, one for the case we need a 16bit return_size, and
17877ec681f3Smrg    * another for the case we need a 32bit return size.
17887ec681f3Smrg    */
17897ec681f3Smrg   UNUSED unsigned index =
17907ec681f3Smrg      descriptor_map_add(&pipeline->shared_data->maps[p_stage->stage]->sampler_map,
17917ec681f3Smrg                         -1, -1, -1, 0, 16);
17927ec681f3Smrg   assert(index == V3DV_NO_SAMPLER_16BIT_IDX);
17937ec681f3Smrg
17947ec681f3Smrg   index =
17957ec681f3Smrg      descriptor_map_add(&pipeline->shared_data->maps[p_stage->stage]->sampler_map,
17967ec681f3Smrg                         -2, -2, -2, 0, 32);
17977ec681f3Smrg   assert(index == V3DV_NO_SAMPLER_32BIT_IDX);
17987ec681f3Smrg
17997ec681f3Smrg   /* Apply the actual pipeline layout to UBOs, SSBOs, and textures */
18007ec681f3Smrg   NIR_PASS_V(p_stage->nir, lower_pipeline_layout_info, pipeline, layout);
18017ec681f3Smrg
18027ec681f3Smrg   p_stage->feedback.duration += os_time_get_nano() - stage_start;
18037ec681f3Smrg}
18047ec681f3Smrg
18057ec681f3Smrg/**
18067ec681f3Smrg * The SPIR-V compiler will insert a sized compact array for
18077ec681f3Smrg * VARYING_SLOT_CLIP_DIST0 if the vertex shader writes to gl_ClipDistance[],
18087ec681f3Smrg * where the size of the array determines the number of active clip planes.
18097ec681f3Smrg */
18107ec681f3Smrgstatic uint32_t
18117ec681f3Smrgget_ucp_enable_mask(struct v3dv_pipeline_stage *p_stage)
18127ec681f3Smrg{
18137ec681f3Smrg   assert(p_stage->stage == BROADCOM_SHADER_VERTEX);
18147ec681f3Smrg   const nir_shader *shader = p_stage->nir;
18157ec681f3Smrg   assert(shader);
18167ec681f3Smrg
18177ec681f3Smrg   nir_foreach_variable_with_modes(var, shader, nir_var_shader_out) {
18187ec681f3Smrg      if (var->data.location == VARYING_SLOT_CLIP_DIST0) {
18197ec681f3Smrg         assert(var->data.compact);
18207ec681f3Smrg         return (1 << glsl_get_length(var->type)) - 1;
18217ec681f3Smrg      }
18227ec681f3Smrg   }
18237ec681f3Smrg   return 0;
18247ec681f3Smrg}
18257ec681f3Smrg
18267ec681f3Smrgstatic nir_shader *
18277ec681f3Smrgpipeline_stage_get_nir(struct v3dv_pipeline_stage *p_stage,
18287ec681f3Smrg                       struct v3dv_pipeline *pipeline,
18297ec681f3Smrg                       struct v3dv_pipeline_cache *cache)
18307ec681f3Smrg{
18317ec681f3Smrg   int64_t stage_start = os_time_get_nano();
18327ec681f3Smrg
18337ec681f3Smrg   nir_shader *nir = NULL;
18347ec681f3Smrg
18357ec681f3Smrg   nir = v3dv_pipeline_cache_search_for_nir(pipeline, cache,
18367ec681f3Smrg                                            &v3dv_nir_options,
18377ec681f3Smrg                                            p_stage->shader_sha1);
18387ec681f3Smrg
18397ec681f3Smrg   if (nir) {
18407ec681f3Smrg      assert(nir->info.stage == broadcom_shader_stage_to_gl(p_stage->stage));
18417ec681f3Smrg
18427ec681f3Smrg      /* A NIR cach hit doesn't avoid the large majority of pipeline stage
18437ec681f3Smrg       * creation so the cache hit is not recorded in the pipeline feedback
18447ec681f3Smrg       * flags
18457ec681f3Smrg       */
18467ec681f3Smrg
18477ec681f3Smrg      p_stage->feedback.duration += os_time_get_nano() - stage_start;
18487ec681f3Smrg
18497ec681f3Smrg      return nir;
18507ec681f3Smrg   }
18517ec681f3Smrg
18527ec681f3Smrg   nir = shader_module_compile_to_nir(pipeline->device, p_stage);
18537ec681f3Smrg
18547ec681f3Smrg   if (nir) {
18557ec681f3Smrg      struct v3dv_pipeline_cache *default_cache =
18567ec681f3Smrg         &pipeline->device->default_pipeline_cache;
18577ec681f3Smrg
18587ec681f3Smrg      v3dv_pipeline_cache_upload_nir(pipeline, cache, nir,
18597ec681f3Smrg                                     p_stage->shader_sha1);
18607ec681f3Smrg
18617ec681f3Smrg      /* Ensure that the variant is on the default cache, as cmd_buffer could
18627ec681f3Smrg       * need to change the current variant
18637ec681f3Smrg       */
18647ec681f3Smrg      if (default_cache != cache) {
18657ec681f3Smrg         v3dv_pipeline_cache_upload_nir(pipeline, default_cache, nir,
18667ec681f3Smrg                                        p_stage->shader_sha1);
18677ec681f3Smrg      }
18687ec681f3Smrg
18697ec681f3Smrg      p_stage->feedback.duration += os_time_get_nano() - stage_start;
18707ec681f3Smrg
18717ec681f3Smrg      return nir;
18727ec681f3Smrg   }
18737ec681f3Smrg
18747ec681f3Smrg   /* FIXME: this shouldn't happen, raise error? */
18757ec681f3Smrg   return NULL;
18767ec681f3Smrg}
18777ec681f3Smrg
18787ec681f3Smrgstatic void
18797ec681f3Smrgpipeline_hash_shader(const struct vk_shader_module *module,
18807ec681f3Smrg                     const char *entrypoint,
18817ec681f3Smrg                     gl_shader_stage stage,
18827ec681f3Smrg                     const VkSpecializationInfo *spec_info,
18837ec681f3Smrg                     unsigned char *sha1_out)
18847ec681f3Smrg{
18857ec681f3Smrg   struct mesa_sha1 ctx;
18867ec681f3Smrg   _mesa_sha1_init(&ctx);
18877ec681f3Smrg
18887ec681f3Smrg   _mesa_sha1_update(&ctx, module->sha1, sizeof(module->sha1));
18897ec681f3Smrg   _mesa_sha1_update(&ctx, entrypoint, strlen(entrypoint));
18907ec681f3Smrg   _mesa_sha1_update(&ctx, &stage, sizeof(stage));
18917ec681f3Smrg   if (spec_info) {
18927ec681f3Smrg      _mesa_sha1_update(&ctx, spec_info->pMapEntries,
18937ec681f3Smrg                        spec_info->mapEntryCount *
18947ec681f3Smrg                        sizeof(*spec_info->pMapEntries));
18957ec681f3Smrg      _mesa_sha1_update(&ctx, spec_info->pData,
18967ec681f3Smrg                        spec_info->dataSize);
18977ec681f3Smrg   }
18987ec681f3Smrg
18997ec681f3Smrg   _mesa_sha1_final(&ctx, sha1_out);
19007ec681f3Smrg}
19017ec681f3Smrg
19027ec681f3Smrgstatic VkResult
19037ec681f3Smrgpipeline_compile_vertex_shader(struct v3dv_pipeline *pipeline,
19047ec681f3Smrg                               const VkAllocationCallbacks *pAllocator,
19057ec681f3Smrg                               const VkGraphicsPipelineCreateInfo *pCreateInfo)
19067ec681f3Smrg{
19077ec681f3Smrg   assert(pipeline->vs_bin != NULL);
19087ec681f3Smrg   if (pipeline->vs_bin->nir == NULL) {
19097ec681f3Smrg      assert(pipeline->vs->nir);
19107ec681f3Smrg      pipeline->vs_bin->nir = nir_shader_clone(NULL, pipeline->vs->nir);
19117ec681f3Smrg   }
19127ec681f3Smrg
19137ec681f3Smrg   VkResult vk_result;
19147ec681f3Smrg   struct v3d_vs_key key;
19157ec681f3Smrg   pipeline_populate_v3d_vs_key(&key, pCreateInfo, pipeline->vs);
19167ec681f3Smrg   pipeline->shared_data->variants[BROADCOM_SHADER_VERTEX] =
19177ec681f3Smrg      pipeline_compile_shader_variant(pipeline->vs, &key.base, sizeof(key),
19187ec681f3Smrg                                      pAllocator, &vk_result);
19197ec681f3Smrg   if (vk_result != VK_SUCCESS)
19207ec681f3Smrg      return vk_result;
19217ec681f3Smrg
19227ec681f3Smrg   pipeline_populate_v3d_vs_key(&key, pCreateInfo, pipeline->vs_bin);
19237ec681f3Smrg   pipeline->shared_data->variants[BROADCOM_SHADER_VERTEX_BIN] =
19247ec681f3Smrg      pipeline_compile_shader_variant(pipeline->vs_bin, &key.base, sizeof(key),
19257ec681f3Smrg                                      pAllocator, &vk_result);
19267ec681f3Smrg
19277ec681f3Smrg   return vk_result;
19287ec681f3Smrg}
19297ec681f3Smrg
19307ec681f3Smrgstatic VkResult
19317ec681f3Smrgpipeline_compile_geometry_shader(struct v3dv_pipeline *pipeline,
19327ec681f3Smrg                                 const VkAllocationCallbacks *pAllocator,
19337ec681f3Smrg                                 const VkGraphicsPipelineCreateInfo *pCreateInfo)
19347ec681f3Smrg{
19357ec681f3Smrg   assert(pipeline->gs);
19367ec681f3Smrg
19377ec681f3Smrg   assert(pipeline->gs_bin != NULL);
19387ec681f3Smrg   if (pipeline->gs_bin->nir == NULL) {
19397ec681f3Smrg      assert(pipeline->gs->nir);
19407ec681f3Smrg      pipeline->gs_bin->nir = nir_shader_clone(NULL, pipeline->gs->nir);
19417ec681f3Smrg   }
19427ec681f3Smrg
19437ec681f3Smrg   VkResult vk_result;
19447ec681f3Smrg   struct v3d_gs_key key;
19457ec681f3Smrg   pipeline_populate_v3d_gs_key(&key, pCreateInfo, pipeline->gs);
19467ec681f3Smrg   pipeline->shared_data->variants[BROADCOM_SHADER_GEOMETRY] =
19477ec681f3Smrg      pipeline_compile_shader_variant(pipeline->gs, &key.base, sizeof(key),
19487ec681f3Smrg                                      pAllocator, &vk_result);
19497ec681f3Smrg   if (vk_result != VK_SUCCESS)
19507ec681f3Smrg      return vk_result;
19517ec681f3Smrg
19527ec681f3Smrg   pipeline_populate_v3d_gs_key(&key, pCreateInfo, pipeline->gs_bin);
19537ec681f3Smrg   pipeline->shared_data->variants[BROADCOM_SHADER_GEOMETRY_BIN] =
19547ec681f3Smrg      pipeline_compile_shader_variant(pipeline->gs_bin, &key.base, sizeof(key),
19557ec681f3Smrg                                      pAllocator, &vk_result);
19567ec681f3Smrg
19577ec681f3Smrg   return vk_result;
19587ec681f3Smrg}
19597ec681f3Smrg
19607ec681f3Smrgstatic VkResult
19617ec681f3Smrgpipeline_compile_fragment_shader(struct v3dv_pipeline *pipeline,
19627ec681f3Smrg                                 const VkAllocationCallbacks *pAllocator,
19637ec681f3Smrg                                 const VkGraphicsPipelineCreateInfo *pCreateInfo)
19647ec681f3Smrg{
19657ec681f3Smrg   struct v3dv_pipeline_stage *p_stage = pipeline->vs;
19667ec681f3Smrg
19677ec681f3Smrg   p_stage = pipeline->fs;
19687ec681f3Smrg
19697ec681f3Smrg   struct v3d_fs_key key;
19707ec681f3Smrg
19717ec681f3Smrg   pipeline_populate_v3d_fs_key(&key, pCreateInfo, p_stage,
19727ec681f3Smrg                                pipeline->gs != NULL,
19737ec681f3Smrg                                get_ucp_enable_mask(pipeline->vs));
19747ec681f3Smrg
19757ec681f3Smrg   VkResult vk_result;
19767ec681f3Smrg   pipeline->shared_data->variants[BROADCOM_SHADER_FRAGMENT] =
19777ec681f3Smrg      pipeline_compile_shader_variant(p_stage, &key.base, sizeof(key),
19787ec681f3Smrg                                      pAllocator, &vk_result);
19797ec681f3Smrg
19807ec681f3Smrg   return vk_result;
19817ec681f3Smrg}
19827ec681f3Smrg
19837ec681f3Smrgstatic void
19847ec681f3Smrgpipeline_populate_graphics_key(struct v3dv_pipeline *pipeline,
19857ec681f3Smrg                               struct v3dv_pipeline_key *key,
19867ec681f3Smrg                               const VkGraphicsPipelineCreateInfo *pCreateInfo)
19877ec681f3Smrg{
19887ec681f3Smrg   memset(key, 0, sizeof(*key));
19897ec681f3Smrg   key->robust_buffer_access =
19907ec681f3Smrg      pipeline->device->features.robustBufferAccess;
19917ec681f3Smrg
19927ec681f3Smrg   const bool raster_enabled =
19937ec681f3Smrg      !pCreateInfo->pRasterizationState->rasterizerDiscardEnable;
19947ec681f3Smrg
19957ec681f3Smrg   const VkPipelineInputAssemblyStateCreateInfo *ia_info =
19967ec681f3Smrg      pCreateInfo->pInputAssemblyState;
19977ec681f3Smrg   key->topology = vk_to_pipe_prim_type[ia_info->topology];
19987ec681f3Smrg
19997ec681f3Smrg   const VkPipelineColorBlendStateCreateInfo *cb_info =
20007ec681f3Smrg      raster_enabled ? pCreateInfo->pColorBlendState : NULL;
20017ec681f3Smrg
20027ec681f3Smrg   key->logicop_func = cb_info && cb_info->logicOpEnable == VK_TRUE ?
20037ec681f3Smrg      vk_to_pipe_logicop[cb_info->logicOp] :
20047ec681f3Smrg      PIPE_LOGICOP_COPY;
20057ec681f3Smrg
20067ec681f3Smrg   /* Multisample rasterization state must be ignored if rasterization
20077ec681f3Smrg    * is disabled.
20087ec681f3Smrg    */
20097ec681f3Smrg   const VkPipelineMultisampleStateCreateInfo *ms_info =
20107ec681f3Smrg      raster_enabled ? pCreateInfo->pMultisampleState : NULL;
20117ec681f3Smrg   if (ms_info) {
20127ec681f3Smrg      assert(ms_info->rasterizationSamples == VK_SAMPLE_COUNT_1_BIT ||
20137ec681f3Smrg             ms_info->rasterizationSamples == VK_SAMPLE_COUNT_4_BIT);
20147ec681f3Smrg      key->msaa = ms_info->rasterizationSamples > VK_SAMPLE_COUNT_1_BIT;
20157ec681f3Smrg
20167ec681f3Smrg      if (key->msaa) {
20177ec681f3Smrg         key->sample_coverage =
20187ec681f3Smrg            pipeline->sample_mask != (1 << V3D_MAX_SAMPLES) - 1;
20197ec681f3Smrg         key->sample_alpha_to_coverage = ms_info->alphaToCoverageEnable;
20207ec681f3Smrg         key->sample_alpha_to_one = ms_info->alphaToOneEnable;
20217ec681f3Smrg      }
20227ec681f3Smrg   }
20237ec681f3Smrg
20247ec681f3Smrg   const struct v3dv_render_pass *pass =
20257ec681f3Smrg      v3dv_render_pass_from_handle(pCreateInfo->renderPass);
20267ec681f3Smrg   const struct v3dv_subpass *subpass = pipeline->subpass;
20277ec681f3Smrg   for (uint32_t i = 0; i < subpass->color_count; i++) {
20287ec681f3Smrg      const uint32_t att_idx = subpass->color_attachments[i].attachment;
20297ec681f3Smrg      if (att_idx == VK_ATTACHMENT_UNUSED)
20307ec681f3Smrg         continue;
20317ec681f3Smrg
20327ec681f3Smrg      key->cbufs |= 1 << i;
20337ec681f3Smrg
20347ec681f3Smrg      VkFormat fb_format = pass->attachments[att_idx].desc.format;
20357ec681f3Smrg      enum pipe_format fb_pipe_format = vk_format_to_pipe_format(fb_format);
20367ec681f3Smrg
20377ec681f3Smrg      /* If logic operations are enabled then we might emit color reads and we
20387ec681f3Smrg       * need to know the color buffer format and swizzle for that
20397ec681f3Smrg       */
20407ec681f3Smrg      if (key->logicop_func != PIPE_LOGICOP_COPY) {
20417ec681f3Smrg         key->color_fmt[i].format = fb_pipe_format;
20427ec681f3Smrg         key->color_fmt[i].swizzle = v3dv_get_format_swizzle(pipeline->device,
20437ec681f3Smrg                                                             fb_format);
20447ec681f3Smrg      }
20457ec681f3Smrg
20467ec681f3Smrg      const struct util_format_description *desc =
20477ec681f3Smrg         vk_format_description(fb_format);
20487ec681f3Smrg
20497ec681f3Smrg      if (desc->channel[0].type == UTIL_FORMAT_TYPE_FLOAT &&
20507ec681f3Smrg          desc->channel[0].size == 32) {
20517ec681f3Smrg         key->f32_color_rb |= 1 << i;
20527ec681f3Smrg      }
20537ec681f3Smrg   }
20547ec681f3Smrg
20557ec681f3Smrg   const VkPipelineVertexInputStateCreateInfo *vi_info =
20567ec681f3Smrg      pCreateInfo->pVertexInputState;
20577ec681f3Smrg   for (uint32_t i = 0; i < vi_info->vertexAttributeDescriptionCount; i++) {
20587ec681f3Smrg      const VkVertexInputAttributeDescription *desc =
20597ec681f3Smrg         &vi_info->pVertexAttributeDescriptions[i];
20607ec681f3Smrg      assert(desc->location < MAX_VERTEX_ATTRIBS);
20617ec681f3Smrg      if (desc->format == VK_FORMAT_B8G8R8A8_UNORM)
20627ec681f3Smrg         key->va_swap_rb_mask |= 1 << (VERT_ATTRIB_GENERIC0 + desc->location);
20637ec681f3Smrg   }
20647ec681f3Smrg
20657ec681f3Smrg   assert(pipeline->subpass);
20667ec681f3Smrg   key->has_multiview = pipeline->subpass->view_mask != 0;
20677ec681f3Smrg}
20687ec681f3Smrg
20697ec681f3Smrgstatic void
20707ec681f3Smrgpipeline_populate_compute_key(struct v3dv_pipeline *pipeline,
20717ec681f3Smrg                              struct v3dv_pipeline_key *key,
20727ec681f3Smrg                              const VkComputePipelineCreateInfo *pCreateInfo)
20737ec681f3Smrg{
20747ec681f3Smrg   /* We use the same pipeline key for graphics and compute, but we don't need
20757ec681f3Smrg    * to add a field to flag compute keys because this key is not used alone
20767ec681f3Smrg    * to search in the cache, we also use the SPIR-V or the serialized NIR for
20777ec681f3Smrg    * example, which already flags compute shaders.
20787ec681f3Smrg    */
20797ec681f3Smrg   memset(key, 0, sizeof(*key));
20807ec681f3Smrg   key->robust_buffer_access =
20817ec681f3Smrg      pipeline->device->features.robustBufferAccess;
20827ec681f3Smrg}
20837ec681f3Smrg
20847ec681f3Smrgstatic struct v3dv_pipeline_shared_data *
20857ec681f3Smrgv3dv_pipeline_shared_data_new_empty(const unsigned char sha1_key[20],
20867ec681f3Smrg                                    struct v3dv_pipeline *pipeline,
20877ec681f3Smrg                                    bool is_graphics_pipeline)
20887ec681f3Smrg{
20897ec681f3Smrg   /* We create new_entry using the device alloc. Right now shared_data is ref
20907ec681f3Smrg    * and unref by both the pipeline and the pipeline cache, so we can't
20917ec681f3Smrg    * ensure that the cache or pipeline alloc will be available on the last
20927ec681f3Smrg    * unref.
20937ec681f3Smrg    */
20947ec681f3Smrg   struct v3dv_pipeline_shared_data *new_entry =
20957ec681f3Smrg      vk_zalloc2(&pipeline->device->vk.alloc, NULL,
20967ec681f3Smrg                 sizeof(struct v3dv_pipeline_shared_data), 8,
20977ec681f3Smrg                 VK_SYSTEM_ALLOCATION_SCOPE_OBJECT);
20987ec681f3Smrg
20997ec681f3Smrg   if (new_entry == NULL)
21007ec681f3Smrg      return NULL;
21017ec681f3Smrg
21027ec681f3Smrg   for (uint8_t stage = 0; stage < BROADCOM_SHADER_STAGES; stage++) {
21037ec681f3Smrg      /* We don't need specific descriptor maps for binning stages we use the
21047ec681f3Smrg       * map for the render stage.
21057ec681f3Smrg       */
21067ec681f3Smrg      if (broadcom_shader_stage_is_binning(stage))
21077ec681f3Smrg         continue;
21087ec681f3Smrg
21097ec681f3Smrg      if ((is_graphics_pipeline && stage == BROADCOM_SHADER_COMPUTE) ||
21107ec681f3Smrg          (!is_graphics_pipeline && stage != BROADCOM_SHADER_COMPUTE)) {
21117ec681f3Smrg         continue;
21127ec681f3Smrg      }
21137ec681f3Smrg
21147ec681f3Smrg      if (stage == BROADCOM_SHADER_GEOMETRY && !pipeline->gs) {
21157ec681f3Smrg         /* We always inject a custom GS if we have multiview */
21167ec681f3Smrg         if (!pipeline->subpass->view_mask)
21177ec681f3Smrg            continue;
21187ec681f3Smrg      }
21197ec681f3Smrg
21207ec681f3Smrg      struct v3dv_descriptor_maps *new_maps =
21217ec681f3Smrg         vk_zalloc2(&pipeline->device->vk.alloc, NULL,
21227ec681f3Smrg                    sizeof(struct v3dv_descriptor_maps), 8,
21237ec681f3Smrg                    VK_SYSTEM_ALLOCATION_SCOPE_OBJECT);
21247ec681f3Smrg
21257ec681f3Smrg      if (new_maps == NULL)
21267ec681f3Smrg         goto fail;
21277ec681f3Smrg
21287ec681f3Smrg      new_entry->maps[stage] = new_maps;
21297ec681f3Smrg   }
21307ec681f3Smrg
21317ec681f3Smrg   new_entry->maps[BROADCOM_SHADER_VERTEX_BIN] =
21327ec681f3Smrg      new_entry->maps[BROADCOM_SHADER_VERTEX];
21337ec681f3Smrg
21347ec681f3Smrg   new_entry->maps[BROADCOM_SHADER_GEOMETRY_BIN] =
21357ec681f3Smrg      new_entry->maps[BROADCOM_SHADER_GEOMETRY];
21367ec681f3Smrg
21377ec681f3Smrg   new_entry->ref_cnt = 1;
21387ec681f3Smrg   memcpy(new_entry->sha1_key, sha1_key, 20);
21397ec681f3Smrg
21407ec681f3Smrg   return new_entry;
21417ec681f3Smrg
21427ec681f3Smrgfail:
21437ec681f3Smrg   if (new_entry != NULL) {
21447ec681f3Smrg      for (uint8_t stage = 0; stage < BROADCOM_SHADER_STAGES; stage++) {
21457ec681f3Smrg         if (new_entry->maps[stage] != NULL)
21467ec681f3Smrg            vk_free(&pipeline->device->vk.alloc, new_entry->maps[stage]);
21477ec681f3Smrg      }
21487ec681f3Smrg   }
21497ec681f3Smrg
21507ec681f3Smrg   vk_free(&pipeline->device->vk.alloc, new_entry);
21517ec681f3Smrg
21527ec681f3Smrg   return NULL;
21537ec681f3Smrg}
21547ec681f3Smrg
21557ec681f3Smrgstatic void
21567ec681f3Smrgwrite_creation_feedback(struct v3dv_pipeline *pipeline,
21577ec681f3Smrg                        const void *next,
21587ec681f3Smrg                        const VkPipelineCreationFeedbackEXT *pipeline_feedback,
21597ec681f3Smrg                        uint32_t stage_count,
21607ec681f3Smrg                        const VkPipelineShaderStageCreateInfo *stages)
21617ec681f3Smrg{
21627ec681f3Smrg   const VkPipelineCreationFeedbackCreateInfoEXT *create_feedback =
21637ec681f3Smrg      vk_find_struct_const(next, PIPELINE_CREATION_FEEDBACK_CREATE_INFO_EXT);
21647ec681f3Smrg
21657ec681f3Smrg   if (create_feedback) {
21667ec681f3Smrg      typed_memcpy(create_feedback->pPipelineCreationFeedback,
21677ec681f3Smrg             pipeline_feedback,
21687ec681f3Smrg             1);
21697ec681f3Smrg
21707ec681f3Smrg      assert(stage_count == create_feedback->pipelineStageCreationFeedbackCount);
21717ec681f3Smrg
21727ec681f3Smrg      for (uint32_t i = 0; i < stage_count; i++) {
21737ec681f3Smrg         gl_shader_stage s = vk_to_mesa_shader_stage(stages[i].stage);
21747ec681f3Smrg         switch (s) {
21757ec681f3Smrg         case MESA_SHADER_VERTEX:
21767ec681f3Smrg            create_feedback->pPipelineStageCreationFeedbacks[i] =
21777ec681f3Smrg               pipeline->vs->feedback;
21787ec681f3Smrg
21797ec681f3Smrg            create_feedback->pPipelineStageCreationFeedbacks[i].duration +=
21807ec681f3Smrg               pipeline->vs_bin->feedback.duration;
21817ec681f3Smrg            break;
21827ec681f3Smrg
21837ec681f3Smrg         case MESA_SHADER_GEOMETRY:
21847ec681f3Smrg            create_feedback->pPipelineStageCreationFeedbacks[i] =
21857ec681f3Smrg               pipeline->gs->feedback;
21867ec681f3Smrg
21877ec681f3Smrg            create_feedback->pPipelineStageCreationFeedbacks[i].duration +=
21887ec681f3Smrg               pipeline->gs_bin->feedback.duration;
21897ec681f3Smrg            break;
21907ec681f3Smrg
21917ec681f3Smrg         case MESA_SHADER_FRAGMENT:
21927ec681f3Smrg            create_feedback->pPipelineStageCreationFeedbacks[i] =
21937ec681f3Smrg               pipeline->fs->feedback;
21947ec681f3Smrg            break;
21957ec681f3Smrg
21967ec681f3Smrg         case MESA_SHADER_COMPUTE:
21977ec681f3Smrg            create_feedback->pPipelineStageCreationFeedbacks[i] =
21987ec681f3Smrg               pipeline->cs->feedback;
21997ec681f3Smrg            break;
22007ec681f3Smrg
22017ec681f3Smrg         default:
22027ec681f3Smrg            unreachable("not supported shader stage");
22037ec681f3Smrg         }
22047ec681f3Smrg      }
22057ec681f3Smrg   }
22067ec681f3Smrg}
22077ec681f3Smrg
22087ec681f3Smrgstatic uint32_t
22097ec681f3Smrgmultiview_gs_input_primitive_from_pipeline(struct v3dv_pipeline *pipeline)
22107ec681f3Smrg{
22117ec681f3Smrg   switch (pipeline->topology) {
22127ec681f3Smrg   case PIPE_PRIM_POINTS:
22137ec681f3Smrg      return GL_POINTS;
22147ec681f3Smrg   case PIPE_PRIM_LINES:
22157ec681f3Smrg   case PIPE_PRIM_LINE_STRIP:
22167ec681f3Smrg      return GL_LINES;
22177ec681f3Smrg   case PIPE_PRIM_TRIANGLES:
22187ec681f3Smrg   case PIPE_PRIM_TRIANGLE_STRIP:
22197ec681f3Smrg   case PIPE_PRIM_TRIANGLE_FAN:
22207ec681f3Smrg      return GL_TRIANGLES;
22217ec681f3Smrg   default:
22227ec681f3Smrg      /* Since we don't allow GS with multiview, we can only see non-adjacency
22237ec681f3Smrg       * primitives.
22247ec681f3Smrg       */
22257ec681f3Smrg      unreachable("Unexpected pipeline primitive type");
22267ec681f3Smrg   }
22277ec681f3Smrg}
22287ec681f3Smrg
22297ec681f3Smrgstatic uint32_t
22307ec681f3Smrgmultiview_gs_output_primitive_from_pipeline(struct v3dv_pipeline *pipeline)
22317ec681f3Smrg{
22327ec681f3Smrg   switch (pipeline->topology) {
22337ec681f3Smrg   case PIPE_PRIM_POINTS:
22347ec681f3Smrg      return GL_POINTS;
22357ec681f3Smrg   case PIPE_PRIM_LINES:
22367ec681f3Smrg   case PIPE_PRIM_LINE_STRIP:
22377ec681f3Smrg      return GL_LINE_STRIP;
22387ec681f3Smrg   case PIPE_PRIM_TRIANGLES:
22397ec681f3Smrg   case PIPE_PRIM_TRIANGLE_STRIP:
22407ec681f3Smrg   case PIPE_PRIM_TRIANGLE_FAN:
22417ec681f3Smrg      return GL_TRIANGLE_STRIP;
22427ec681f3Smrg   default:
22437ec681f3Smrg      /* Since we don't allow GS with multiview, we can only see non-adjacency
22447ec681f3Smrg       * primitives.
22457ec681f3Smrg       */
22467ec681f3Smrg      unreachable("Unexpected pipeline primitive type");
22477ec681f3Smrg   }
22487ec681f3Smrg}
22497ec681f3Smrg
22507ec681f3Smrgstatic bool
22517ec681f3Smrgpipeline_add_multiview_gs(struct v3dv_pipeline *pipeline,
22527ec681f3Smrg                          struct v3dv_pipeline_cache *cache,
22537ec681f3Smrg                          const VkAllocationCallbacks *pAllocator)
22547ec681f3Smrg{
22557ec681f3Smrg   /* Create the passthrough GS from the VS output interface */
22567ec681f3Smrg   pipeline->vs->nir = pipeline_stage_get_nir(pipeline->vs, pipeline, cache);
22577ec681f3Smrg   nir_shader *vs_nir = pipeline->vs->nir;
22587ec681f3Smrg
22597ec681f3Smrg   const nir_shader_compiler_options *options = v3dv_pipeline_get_nir_options();
22607ec681f3Smrg   nir_builder b = nir_builder_init_simple_shader(MESA_SHADER_GEOMETRY, options,
22617ec681f3Smrg                                                  "multiview broadcast gs");
22627ec681f3Smrg   nir_shader *nir = b.shader;
22637ec681f3Smrg   nir->info.inputs_read = vs_nir->info.outputs_written;
22647ec681f3Smrg   nir->info.outputs_written = vs_nir->info.outputs_written |
22657ec681f3Smrg                               (1ull << VARYING_SLOT_LAYER);
22667ec681f3Smrg
22677ec681f3Smrg   uint32_t vertex_count = u_vertices_per_prim(pipeline->topology);
22687ec681f3Smrg   nir->info.gs.input_primitive =
22697ec681f3Smrg      multiview_gs_input_primitive_from_pipeline(pipeline);
22707ec681f3Smrg   nir->info.gs.output_primitive =
22717ec681f3Smrg      multiview_gs_output_primitive_from_pipeline(pipeline);
22727ec681f3Smrg   nir->info.gs.vertices_in = vertex_count;
22737ec681f3Smrg   nir->info.gs.vertices_out = nir->info.gs.vertices_in;
22747ec681f3Smrg   nir->info.gs.invocations = 1;
22757ec681f3Smrg   nir->info.gs.active_stream_mask = 0x1;
22767ec681f3Smrg
22777ec681f3Smrg   /* Make a list of GS input/output variables from the VS outputs */
22787ec681f3Smrg   nir_variable *in_vars[100];
22797ec681f3Smrg   nir_variable *out_vars[100];
22807ec681f3Smrg   uint32_t var_count = 0;
22817ec681f3Smrg   nir_foreach_shader_out_variable(out_vs_var, vs_nir) {
22827ec681f3Smrg      char name[8];
22837ec681f3Smrg      snprintf(name, ARRAY_SIZE(name), "in_%d", var_count);
22847ec681f3Smrg
22857ec681f3Smrg      in_vars[var_count] =
22867ec681f3Smrg         nir_variable_create(nir, nir_var_shader_in,
22877ec681f3Smrg                             glsl_array_type(out_vs_var->type, vertex_count, 0),
22887ec681f3Smrg                             name);
22897ec681f3Smrg      in_vars[var_count]->data.location = out_vs_var->data.location;
22907ec681f3Smrg      in_vars[var_count]->data.location_frac = out_vs_var->data.location_frac;
22917ec681f3Smrg      in_vars[var_count]->data.interpolation = out_vs_var->data.interpolation;
22927ec681f3Smrg
22937ec681f3Smrg      snprintf(name, ARRAY_SIZE(name), "out_%d", var_count);
22947ec681f3Smrg      out_vars[var_count] =
22957ec681f3Smrg         nir_variable_create(nir, nir_var_shader_out, out_vs_var->type, name);
22967ec681f3Smrg      out_vars[var_count]->data.location = out_vs_var->data.location;
22977ec681f3Smrg      out_vars[var_count]->data.interpolation = out_vs_var->data.interpolation;
22987ec681f3Smrg
22997ec681f3Smrg      var_count++;
23007ec681f3Smrg   }
23017ec681f3Smrg
23027ec681f3Smrg   /* Add the gl_Layer output variable */
23037ec681f3Smrg   nir_variable *out_layer =
23047ec681f3Smrg      nir_variable_create(nir, nir_var_shader_out, glsl_int_type(),
23057ec681f3Smrg                          "out_Layer");
23067ec681f3Smrg   out_layer->data.location = VARYING_SLOT_LAYER;
23077ec681f3Smrg
23087ec681f3Smrg   /* Get the view index value that we will write to gl_Layer */
23097ec681f3Smrg   nir_ssa_def *layer =
23107ec681f3Smrg      nir_load_system_value(&b, nir_intrinsic_load_view_index, 0, 1, 32);
23117ec681f3Smrg
23127ec681f3Smrg   /* Emit all output vertices */
23137ec681f3Smrg   for (uint32_t vi = 0; vi < vertex_count; vi++) {
23147ec681f3Smrg      /* Emit all output varyings */
23157ec681f3Smrg      for (uint32_t i = 0; i < var_count; i++) {
23167ec681f3Smrg         nir_deref_instr *in_value =
23177ec681f3Smrg            nir_build_deref_array_imm(&b, nir_build_deref_var(&b, in_vars[i]), vi);
23187ec681f3Smrg         nir_copy_deref(&b, nir_build_deref_var(&b, out_vars[i]), in_value);
23197ec681f3Smrg      }
23207ec681f3Smrg
23217ec681f3Smrg      /* Emit gl_Layer write */
23227ec681f3Smrg      nir_store_var(&b, out_layer, layer, 0x1);
23237ec681f3Smrg
23247ec681f3Smrg      nir_emit_vertex(&b, 0);
23257ec681f3Smrg   }
23267ec681f3Smrg   nir_end_primitive(&b, 0);
23277ec681f3Smrg
23287ec681f3Smrg   /* Make sure we run our pre-process NIR passes so we produce NIR compatible
23297ec681f3Smrg    * with what we expect from SPIR-V modules.
23307ec681f3Smrg    */
23317ec681f3Smrg   preprocess_nir(nir);
23327ec681f3Smrg
23337ec681f3Smrg   /* Attach the geometry shader to the  pipeline */
23347ec681f3Smrg   struct v3dv_device *device = pipeline->device;
23357ec681f3Smrg   struct v3dv_physical_device *physical_device =
23367ec681f3Smrg      &device->instance->physicalDevice;
23377ec681f3Smrg
23387ec681f3Smrg   struct v3dv_pipeline_stage *p_stage =
23397ec681f3Smrg      vk_zalloc2(&device->vk.alloc, pAllocator, sizeof(*p_stage), 8,
23407ec681f3Smrg                 VK_SYSTEM_ALLOCATION_SCOPE_OBJECT);
23417ec681f3Smrg
23427ec681f3Smrg   if (p_stage == NULL) {
23437ec681f3Smrg      ralloc_free(nir);
23447ec681f3Smrg      return false;
23457ec681f3Smrg   }
23467ec681f3Smrg
23477ec681f3Smrg   p_stage->pipeline = pipeline;
23487ec681f3Smrg   p_stage->stage = BROADCOM_SHADER_GEOMETRY;
23497ec681f3Smrg   p_stage->entrypoint = "main";
23507ec681f3Smrg   p_stage->module = 0;
23517ec681f3Smrg   p_stage->nir = nir;
23527ec681f3Smrg   pipeline_compute_sha1_from_nir(p_stage->nir, p_stage->shader_sha1);
23537ec681f3Smrg   p_stage->program_id = p_atomic_inc_return(&physical_device->next_program_id);
23547ec681f3Smrg
23557ec681f3Smrg   pipeline->has_gs = true;
23567ec681f3Smrg   pipeline->gs = p_stage;
23577ec681f3Smrg   pipeline->active_stages |= MESA_SHADER_GEOMETRY;
23587ec681f3Smrg
23597ec681f3Smrg   pipeline->gs_bin =
23607ec681f3Smrg      pipeline_stage_create_binning(pipeline->gs, pAllocator);
23617ec681f3Smrg      if (pipeline->gs_bin == NULL)
23627ec681f3Smrg         return false;
23637ec681f3Smrg
23647ec681f3Smrg   return true;
23657ec681f3Smrg}
23667ec681f3Smrg
23677ec681f3Smrg/*
23687ec681f3Smrg * It compiles a pipeline. Note that it also allocate internal object, but if
23697ec681f3Smrg * some allocations success, but other fails, the method is not freeing the
23707ec681f3Smrg * successful ones.
23717ec681f3Smrg *
23727ec681f3Smrg * This is done to simplify the code, as what we do in this case is just call
23737ec681f3Smrg * the pipeline destroy method, and this would handle freeing the internal
23747ec681f3Smrg * objects allocated. We just need to be careful setting to NULL the objects
23757ec681f3Smrg * not allocated.
23767ec681f3Smrg */
23777ec681f3Smrgstatic VkResult
23787ec681f3Smrgpipeline_compile_graphics(struct v3dv_pipeline *pipeline,
23797ec681f3Smrg                          struct v3dv_pipeline_cache *cache,
23807ec681f3Smrg                          const VkGraphicsPipelineCreateInfo *pCreateInfo,
23817ec681f3Smrg                          const VkAllocationCallbacks *pAllocator)
23827ec681f3Smrg{
23837ec681f3Smrg   VkPipelineCreationFeedbackEXT pipeline_feedback = {
23847ec681f3Smrg      .flags = VK_PIPELINE_CREATION_FEEDBACK_VALID_BIT_EXT,
23857ec681f3Smrg   };
23867ec681f3Smrg   int64_t pipeline_start = os_time_get_nano();
23877ec681f3Smrg
23887ec681f3Smrg   struct v3dv_device *device = pipeline->device;
23897ec681f3Smrg   struct v3dv_physical_device *physical_device =
23907ec681f3Smrg      &device->instance->physicalDevice;
23917ec681f3Smrg
23927ec681f3Smrg   /* First pass to get some common info from the shader, and create the
23937ec681f3Smrg    * individual pipeline_stage objects
23947ec681f3Smrg    */
23957ec681f3Smrg   for (uint32_t i = 0; i < pCreateInfo->stageCount; i++) {
23967ec681f3Smrg      const VkPipelineShaderStageCreateInfo *sinfo = &pCreateInfo->pStages[i];
23977ec681f3Smrg      gl_shader_stage stage = vk_to_mesa_shader_stage(sinfo->stage);
23987ec681f3Smrg
23997ec681f3Smrg      struct v3dv_pipeline_stage *p_stage =
24007ec681f3Smrg         vk_zalloc2(&device->vk.alloc, pAllocator, sizeof(*p_stage), 8,
24017ec681f3Smrg                    VK_SYSTEM_ALLOCATION_SCOPE_OBJECT);
24027ec681f3Smrg
24037ec681f3Smrg      if (p_stage == NULL)
24047ec681f3Smrg         return VK_ERROR_OUT_OF_HOST_MEMORY;
24057ec681f3Smrg
24067ec681f3Smrg      /* Note that we are assigning program_id slightly differently that
24077ec681f3Smrg       * v3d. Here we are assigning one per pipeline stage, so vs and vs_bin
24087ec681f3Smrg       * would have a different program_id, while v3d would have the same for
24097ec681f3Smrg       * both. For the case of v3dv, it is more natural to have an id this way,
24107ec681f3Smrg       * as right now we are using it for debugging, not for shader-db.
24117ec681f3Smrg       */
24127ec681f3Smrg      p_stage->program_id =
24137ec681f3Smrg         p_atomic_inc_return(&physical_device->next_program_id);
24147ec681f3Smrg
24157ec681f3Smrg      p_stage->pipeline = pipeline;
24167ec681f3Smrg      p_stage->stage = gl_shader_stage_to_broadcom(stage);
24177ec681f3Smrg      p_stage->entrypoint = sinfo->pName;
24187ec681f3Smrg      p_stage->module = vk_shader_module_from_handle(sinfo->module);
24197ec681f3Smrg      p_stage->spec_info = sinfo->pSpecializationInfo;
24207ec681f3Smrg
24217ec681f3Smrg      pipeline_hash_shader(p_stage->module,
24227ec681f3Smrg                           p_stage->entrypoint,
24237ec681f3Smrg                           stage,
24247ec681f3Smrg                           p_stage->spec_info,
24257ec681f3Smrg                           p_stage->shader_sha1);
24267ec681f3Smrg
24277ec681f3Smrg      pipeline->active_stages |= sinfo->stage;
24287ec681f3Smrg
24297ec681f3Smrg      /* We will try to get directly the compiled shader variant, so let's not
24307ec681f3Smrg       * worry about getting the nir shader for now.
24317ec681f3Smrg       */
24327ec681f3Smrg      p_stage->nir = NULL;
24337ec681f3Smrg
24347ec681f3Smrg      switch(stage) {
24357ec681f3Smrg      case MESA_SHADER_VERTEX:
24367ec681f3Smrg         pipeline->vs = p_stage;
24377ec681f3Smrg         pipeline->vs_bin =
24387ec681f3Smrg            pipeline_stage_create_binning(pipeline->vs, pAllocator);
24397ec681f3Smrg         if (pipeline->vs_bin == NULL)
24407ec681f3Smrg            return VK_ERROR_OUT_OF_HOST_MEMORY;
24417ec681f3Smrg         break;
24427ec681f3Smrg
24437ec681f3Smrg      case MESA_SHADER_GEOMETRY:
24447ec681f3Smrg         pipeline->has_gs = true;
24457ec681f3Smrg         pipeline->gs = p_stage;
24467ec681f3Smrg         pipeline->gs_bin =
24477ec681f3Smrg            pipeline_stage_create_binning(pipeline->gs, pAllocator);
24487ec681f3Smrg         if (pipeline->gs_bin == NULL)
24497ec681f3Smrg            return VK_ERROR_OUT_OF_HOST_MEMORY;
24507ec681f3Smrg         break;
24517ec681f3Smrg
24527ec681f3Smrg      case MESA_SHADER_FRAGMENT:
24537ec681f3Smrg         pipeline->fs = p_stage;
24547ec681f3Smrg         break;
24557ec681f3Smrg
24567ec681f3Smrg      default:
24577ec681f3Smrg         unreachable("not supported shader stage");
24587ec681f3Smrg      }
24597ec681f3Smrg   }
24607ec681f3Smrg
24617ec681f3Smrg   /* Add a no-op fragment shader if needed */
24627ec681f3Smrg   if (!pipeline->fs) {
24637ec681f3Smrg      nir_builder b = nir_builder_init_simple_shader(MESA_SHADER_FRAGMENT,
24647ec681f3Smrg                                                     &v3dv_nir_options,
24657ec681f3Smrg                                                     "noop_fs");
24667ec681f3Smrg
24677ec681f3Smrg      struct v3dv_pipeline_stage *p_stage =
24687ec681f3Smrg         vk_zalloc2(&device->vk.alloc, pAllocator, sizeof(*p_stage), 8,
24697ec681f3Smrg                    VK_SYSTEM_ALLOCATION_SCOPE_OBJECT);
24707ec681f3Smrg
24717ec681f3Smrg      if (p_stage == NULL)
24727ec681f3Smrg         return VK_ERROR_OUT_OF_HOST_MEMORY;
24737ec681f3Smrg
24747ec681f3Smrg      p_stage->pipeline = pipeline;
24757ec681f3Smrg      p_stage->stage = BROADCOM_SHADER_FRAGMENT;
24767ec681f3Smrg      p_stage->entrypoint = "main";
24777ec681f3Smrg      p_stage->module = 0;
24787ec681f3Smrg      p_stage->nir = b.shader;
24797ec681f3Smrg      pipeline_compute_sha1_from_nir(p_stage->nir, p_stage->shader_sha1);
24807ec681f3Smrg      p_stage->program_id =
24817ec681f3Smrg         p_atomic_inc_return(&physical_device->next_program_id);
24827ec681f3Smrg
24837ec681f3Smrg      pipeline->fs = p_stage;
24847ec681f3Smrg      pipeline->active_stages |= MESA_SHADER_FRAGMENT;
24857ec681f3Smrg   }
24867ec681f3Smrg
24877ec681f3Smrg   /* If multiview is enabled, we inject a custom passthrough geometry shader
24887ec681f3Smrg    * to broadcast draw calls to the appropriate views.
24897ec681f3Smrg    */
24907ec681f3Smrg   assert(!pipeline->subpass->view_mask || (!pipeline->has_gs && !pipeline->gs));
24917ec681f3Smrg   if (pipeline->subpass->view_mask) {
24927ec681f3Smrg      if (!pipeline_add_multiview_gs(pipeline, cache, pAllocator))
24937ec681f3Smrg         return VK_ERROR_OUT_OF_HOST_MEMORY;
24947ec681f3Smrg   }
24957ec681f3Smrg
24967ec681f3Smrg   /* First we try to get the variants from the pipeline cache */
24977ec681f3Smrg   struct v3dv_pipeline_key pipeline_key;
24987ec681f3Smrg   pipeline_populate_graphics_key(pipeline, &pipeline_key, pCreateInfo);
24997ec681f3Smrg   unsigned char pipeline_sha1[20];
25007ec681f3Smrg   pipeline_hash_graphics(pipeline, &pipeline_key, pipeline_sha1);
25017ec681f3Smrg
25027ec681f3Smrg   bool cache_hit = false;
25037ec681f3Smrg
25047ec681f3Smrg   pipeline->shared_data =
25057ec681f3Smrg      v3dv_pipeline_cache_search_for_pipeline(cache,
25067ec681f3Smrg                                              pipeline_sha1,
25077ec681f3Smrg                                              &cache_hit);
25087ec681f3Smrg
25097ec681f3Smrg   if (pipeline->shared_data != NULL) {
25107ec681f3Smrg      /* A correct pipeline must have at least a VS and FS */
25117ec681f3Smrg      assert(pipeline->shared_data->variants[BROADCOM_SHADER_VERTEX]);
25127ec681f3Smrg      assert(pipeline->shared_data->variants[BROADCOM_SHADER_VERTEX_BIN]);
25137ec681f3Smrg      assert(pipeline->shared_data->variants[BROADCOM_SHADER_FRAGMENT]);
25147ec681f3Smrg      assert(!pipeline->gs ||
25157ec681f3Smrg             pipeline->shared_data->variants[BROADCOM_SHADER_GEOMETRY]);
25167ec681f3Smrg      assert(!pipeline->gs ||
25177ec681f3Smrg             pipeline->shared_data->variants[BROADCOM_SHADER_GEOMETRY_BIN]);
25187ec681f3Smrg
25197ec681f3Smrg      if (cache_hit && cache != &pipeline->device->default_pipeline_cache)
25207ec681f3Smrg         pipeline_feedback.flags |=
25217ec681f3Smrg            VK_PIPELINE_CREATION_FEEDBACK_APPLICATION_PIPELINE_CACHE_HIT_BIT_EXT;
25227ec681f3Smrg
25237ec681f3Smrg      goto success;
25247ec681f3Smrg   }
25257ec681f3Smrg
25267ec681f3Smrg   if (pCreateInfo->flags & VK_PIPELINE_CREATE_FAIL_ON_PIPELINE_COMPILE_REQUIRED_BIT_EXT)
25277ec681f3Smrg      return VK_PIPELINE_COMPILE_REQUIRED_EXT;
25287ec681f3Smrg
25297ec681f3Smrg   /* Otherwise we try to get the NIR shaders (either from the original SPIR-V
25307ec681f3Smrg    * shader or the pipeline cache) and compile.
25317ec681f3Smrg    */
25327ec681f3Smrg   pipeline->shared_data =
25337ec681f3Smrg      v3dv_pipeline_shared_data_new_empty(pipeline_sha1, pipeline, true);
25347ec681f3Smrg
25357ec681f3Smrg   pipeline->vs->feedback.flags |=
25367ec681f3Smrg      VK_PIPELINE_CREATION_FEEDBACK_VALID_BIT_EXT;
25377ec681f3Smrg   if (pipeline->gs)
25387ec681f3Smrg      pipeline->gs->feedback.flags |=
25397ec681f3Smrg         VK_PIPELINE_CREATION_FEEDBACK_VALID_BIT_EXT;
25407ec681f3Smrg   pipeline->fs->feedback.flags |=
25417ec681f3Smrg      VK_PIPELINE_CREATION_FEEDBACK_VALID_BIT_EXT;
25427ec681f3Smrg
25437ec681f3Smrg   if (!pipeline->vs->nir)
25447ec681f3Smrg      pipeline->vs->nir = pipeline_stage_get_nir(pipeline->vs, pipeline, cache);
25457ec681f3Smrg   if (pipeline->gs && !pipeline->gs->nir)
25467ec681f3Smrg      pipeline->gs->nir = pipeline_stage_get_nir(pipeline->gs, pipeline, cache);
25477ec681f3Smrg   if (!pipeline->fs->nir)
25487ec681f3Smrg      pipeline->fs->nir = pipeline_stage_get_nir(pipeline->fs, pipeline, cache);
25497ec681f3Smrg
25507ec681f3Smrg   /* Linking + pipeline lowerings */
25517ec681f3Smrg   if (pipeline->gs) {
25527ec681f3Smrg      link_shaders(pipeline->gs->nir, pipeline->fs->nir);
25537ec681f3Smrg      link_shaders(pipeline->vs->nir, pipeline->gs->nir);
25547ec681f3Smrg   } else {
25557ec681f3Smrg      link_shaders(pipeline->vs->nir, pipeline->fs->nir);
25567ec681f3Smrg   }
25577ec681f3Smrg
25587ec681f3Smrg   pipeline_lower_nir(pipeline, pipeline->fs, pipeline->layout);
25597ec681f3Smrg   lower_fs_io(pipeline->fs->nir);
25607ec681f3Smrg
25617ec681f3Smrg   if (pipeline->gs) {
25627ec681f3Smrg      pipeline_lower_nir(pipeline, pipeline->gs, pipeline->layout);
25637ec681f3Smrg      lower_gs_io(pipeline->gs->nir);
25647ec681f3Smrg   }
25657ec681f3Smrg
25667ec681f3Smrg   pipeline_lower_nir(pipeline, pipeline->vs, pipeline->layout);
25677ec681f3Smrg   lower_vs_io(pipeline->vs->nir);
25687ec681f3Smrg
25697ec681f3Smrg   /* Compiling to vir */
25707ec681f3Smrg   VkResult vk_result;
25717ec681f3Smrg
25727ec681f3Smrg   /* We should have got all the variants or no variants from the cache */
25737ec681f3Smrg   assert(!pipeline->shared_data->variants[BROADCOM_SHADER_FRAGMENT]);
25747ec681f3Smrg   vk_result = pipeline_compile_fragment_shader(pipeline, pAllocator, pCreateInfo);
25757ec681f3Smrg   if (vk_result != VK_SUCCESS)
25767ec681f3Smrg      return vk_result;
25777ec681f3Smrg
25787ec681f3Smrg   assert(!pipeline->shared_data->variants[BROADCOM_SHADER_GEOMETRY] &&
25797ec681f3Smrg          !pipeline->shared_data->variants[BROADCOM_SHADER_GEOMETRY_BIN]);
25807ec681f3Smrg
25817ec681f3Smrg   if (pipeline->gs) {
25827ec681f3Smrg      vk_result =
25837ec681f3Smrg         pipeline_compile_geometry_shader(pipeline, pAllocator, pCreateInfo);
25847ec681f3Smrg      if (vk_result != VK_SUCCESS)
25857ec681f3Smrg         return vk_result;
25867ec681f3Smrg   }
25877ec681f3Smrg
25887ec681f3Smrg   assert(!pipeline->shared_data->variants[BROADCOM_SHADER_VERTEX] &&
25897ec681f3Smrg          !pipeline->shared_data->variants[BROADCOM_SHADER_VERTEX_BIN]);
25907ec681f3Smrg
25917ec681f3Smrg   vk_result = pipeline_compile_vertex_shader(pipeline, pAllocator, pCreateInfo);
25927ec681f3Smrg   if (vk_result != VK_SUCCESS)
25937ec681f3Smrg      return vk_result;
25947ec681f3Smrg
25957ec681f3Smrg   if (!upload_assembly(pipeline))
25967ec681f3Smrg      return VK_ERROR_OUT_OF_DEVICE_MEMORY;
25977ec681f3Smrg
25987ec681f3Smrg   v3dv_pipeline_cache_upload_pipeline(pipeline, cache);
25997ec681f3Smrg
26007ec681f3Smrg success:
26017ec681f3Smrg
26027ec681f3Smrg   pipeline_feedback.duration = os_time_get_nano() - pipeline_start;
26037ec681f3Smrg   write_creation_feedback(pipeline,
26047ec681f3Smrg                           pCreateInfo->pNext,
26057ec681f3Smrg                           &pipeline_feedback,
26067ec681f3Smrg                           pCreateInfo->stageCount,
26077ec681f3Smrg                           pCreateInfo->pStages);
26087ec681f3Smrg
26097ec681f3Smrg   /* Since we have the variants in the pipeline shared data we can now free
26107ec681f3Smrg    * the pipeline stages.
26117ec681f3Smrg    */
26127ec681f3Smrg   pipeline_free_stages(device, pipeline, pAllocator);
26137ec681f3Smrg
26147ec681f3Smrg   pipeline_check_spill_size(pipeline);
26157ec681f3Smrg
26167ec681f3Smrg   return compute_vpm_config(pipeline);
26177ec681f3Smrg}
26187ec681f3Smrg
26197ec681f3Smrgstatic VkResult
26207ec681f3Smrgcompute_vpm_config(struct v3dv_pipeline *pipeline)
26217ec681f3Smrg{
26227ec681f3Smrg   struct v3dv_shader_variant *vs_variant =
26237ec681f3Smrg      pipeline->shared_data->variants[BROADCOM_SHADER_VERTEX];
26247ec681f3Smrg   struct v3dv_shader_variant *vs_bin_variant =
26257ec681f3Smrg      pipeline->shared_data->variants[BROADCOM_SHADER_VERTEX];
26267ec681f3Smrg   struct v3d_vs_prog_data *vs = vs_variant->prog_data.vs;
26277ec681f3Smrg   struct v3d_vs_prog_data *vs_bin =vs_bin_variant->prog_data.vs;
26287ec681f3Smrg
26297ec681f3Smrg   struct v3d_gs_prog_data *gs = NULL;
26307ec681f3Smrg   struct v3d_gs_prog_data *gs_bin = NULL;
26317ec681f3Smrg   if (pipeline->has_gs) {
26327ec681f3Smrg      struct v3dv_shader_variant *gs_variant =
26337ec681f3Smrg         pipeline->shared_data->variants[BROADCOM_SHADER_GEOMETRY];
26347ec681f3Smrg      struct v3dv_shader_variant *gs_bin_variant =
26357ec681f3Smrg         pipeline->shared_data->variants[BROADCOM_SHADER_GEOMETRY_BIN];
26367ec681f3Smrg      gs = gs_variant->prog_data.gs;
26377ec681f3Smrg      gs_bin = gs_bin_variant->prog_data.gs;
26387ec681f3Smrg   }
26397ec681f3Smrg
26407ec681f3Smrg   if (!v3d_compute_vpm_config(&pipeline->device->devinfo,
26417ec681f3Smrg                               vs_bin, vs, gs_bin, gs,
26427ec681f3Smrg                               &pipeline->vpm_cfg_bin,
26437ec681f3Smrg                               &pipeline->vpm_cfg)) {
26447ec681f3Smrg      return VK_ERROR_OUT_OF_DEVICE_MEMORY;
26457ec681f3Smrg   }
26467ec681f3Smrg
26477ec681f3Smrg   return VK_SUCCESS;
26487ec681f3Smrg}
26497ec681f3Smrg
26507ec681f3Smrgstatic unsigned
26517ec681f3Smrgv3dv_dynamic_state_mask(VkDynamicState state)
26527ec681f3Smrg{
26537ec681f3Smrg   switch(state) {
26547ec681f3Smrg   case VK_DYNAMIC_STATE_VIEWPORT:
26557ec681f3Smrg      return V3DV_DYNAMIC_VIEWPORT;
26567ec681f3Smrg   case VK_DYNAMIC_STATE_SCISSOR:
26577ec681f3Smrg      return V3DV_DYNAMIC_SCISSOR;
26587ec681f3Smrg   case VK_DYNAMIC_STATE_STENCIL_COMPARE_MASK:
26597ec681f3Smrg      return V3DV_DYNAMIC_STENCIL_COMPARE_MASK;
26607ec681f3Smrg   case VK_DYNAMIC_STATE_STENCIL_WRITE_MASK:
26617ec681f3Smrg      return V3DV_DYNAMIC_STENCIL_WRITE_MASK;
26627ec681f3Smrg   case VK_DYNAMIC_STATE_STENCIL_REFERENCE:
26637ec681f3Smrg      return V3DV_DYNAMIC_STENCIL_REFERENCE;
26647ec681f3Smrg   case VK_DYNAMIC_STATE_BLEND_CONSTANTS:
26657ec681f3Smrg      return V3DV_DYNAMIC_BLEND_CONSTANTS;
26667ec681f3Smrg   case VK_DYNAMIC_STATE_DEPTH_BIAS:
26677ec681f3Smrg      return V3DV_DYNAMIC_DEPTH_BIAS;
26687ec681f3Smrg   case VK_DYNAMIC_STATE_LINE_WIDTH:
26697ec681f3Smrg      return V3DV_DYNAMIC_LINE_WIDTH;
26707ec681f3Smrg   case VK_DYNAMIC_STATE_COLOR_WRITE_ENABLE_EXT:
26717ec681f3Smrg      return V3DV_DYNAMIC_COLOR_WRITE_ENABLE;
26727ec681f3Smrg
26737ec681f3Smrg   /* Depth bounds testing is not available in in V3D 4.2 so here we are just
26747ec681f3Smrg    * ignoring this dynamic state. We are already asserting at pipeline creation
26757ec681f3Smrg    * time that depth bounds testing is not enabled.
26767ec681f3Smrg    */
26777ec681f3Smrg   case VK_DYNAMIC_STATE_DEPTH_BOUNDS:
26787ec681f3Smrg      return 0;
26797ec681f3Smrg
26807ec681f3Smrg   default:
26817ec681f3Smrg      unreachable("Unhandled dynamic state");
26827ec681f3Smrg   }
26837ec681f3Smrg}
26847ec681f3Smrg
26857ec681f3Smrgstatic void
26867ec681f3Smrgpipeline_init_dynamic_state(
26877ec681f3Smrg   struct v3dv_pipeline *pipeline,
26887ec681f3Smrg   const VkPipelineDynamicStateCreateInfo *pDynamicState,
26897ec681f3Smrg   const VkPipelineViewportStateCreateInfo *pViewportState,
26907ec681f3Smrg   const VkPipelineDepthStencilStateCreateInfo *pDepthStencilState,
26917ec681f3Smrg   const VkPipelineColorBlendStateCreateInfo *pColorBlendState,
26927ec681f3Smrg   const VkPipelineRasterizationStateCreateInfo *pRasterizationState,
26937ec681f3Smrg   const VkPipelineColorWriteCreateInfoEXT *pColorWriteState)
26947ec681f3Smrg{
26957ec681f3Smrg   pipeline->dynamic_state = default_dynamic_state;
26967ec681f3Smrg   struct v3dv_dynamic_state *dynamic = &pipeline->dynamic_state;
26977ec681f3Smrg
26987ec681f3Smrg   /* Create a mask of enabled dynamic states */
26997ec681f3Smrg   uint32_t dynamic_states = 0;
27007ec681f3Smrg   if (pDynamicState) {
27017ec681f3Smrg      uint32_t count = pDynamicState->dynamicStateCount;
27027ec681f3Smrg      for (uint32_t s = 0; s < count; s++) {
27037ec681f3Smrg         dynamic_states |=
27047ec681f3Smrg            v3dv_dynamic_state_mask(pDynamicState->pDynamicStates[s]);
27057ec681f3Smrg      }
27067ec681f3Smrg   }
27077ec681f3Smrg
27087ec681f3Smrg   /* For any pipeline states that are not dynamic, set the dynamic state
27097ec681f3Smrg    * from the static pipeline state.
27107ec681f3Smrg    */
27117ec681f3Smrg   if (pViewportState) {
27127ec681f3Smrg      if (!(dynamic_states & V3DV_DYNAMIC_VIEWPORT)) {
27137ec681f3Smrg         dynamic->viewport.count = pViewportState->viewportCount;
27147ec681f3Smrg         typed_memcpy(dynamic->viewport.viewports, pViewportState->pViewports,
27157ec681f3Smrg                      pViewportState->viewportCount);
27167ec681f3Smrg
27177ec681f3Smrg         for (uint32_t i = 0; i < dynamic->viewport.count; i++) {
27187ec681f3Smrg            v3dv_viewport_compute_xform(&dynamic->viewport.viewports[i],
27197ec681f3Smrg                                        dynamic->viewport.scale[i],
27207ec681f3Smrg                                        dynamic->viewport.translate[i]);
27217ec681f3Smrg         }
27227ec681f3Smrg      }
27237ec681f3Smrg
27247ec681f3Smrg      if (!(dynamic_states & V3DV_DYNAMIC_SCISSOR)) {
27257ec681f3Smrg         dynamic->scissor.count = pViewportState->scissorCount;
27267ec681f3Smrg         typed_memcpy(dynamic->scissor.scissors, pViewportState->pScissors,
27277ec681f3Smrg                      pViewportState->scissorCount);
27287ec681f3Smrg      }
27297ec681f3Smrg   }
27307ec681f3Smrg
27317ec681f3Smrg   if (pDepthStencilState) {
27327ec681f3Smrg      if (!(dynamic_states & V3DV_DYNAMIC_STENCIL_COMPARE_MASK)) {
27337ec681f3Smrg         dynamic->stencil_compare_mask.front =
27347ec681f3Smrg            pDepthStencilState->front.compareMask;
27357ec681f3Smrg         dynamic->stencil_compare_mask.back =
27367ec681f3Smrg            pDepthStencilState->back.compareMask;
27377ec681f3Smrg      }
27387ec681f3Smrg
27397ec681f3Smrg      if (!(dynamic_states & V3DV_DYNAMIC_STENCIL_WRITE_MASK)) {
27407ec681f3Smrg         dynamic->stencil_write_mask.front = pDepthStencilState->front.writeMask;
27417ec681f3Smrg         dynamic->stencil_write_mask.back = pDepthStencilState->back.writeMask;
27427ec681f3Smrg      }
27437ec681f3Smrg
27447ec681f3Smrg      if (!(dynamic_states & V3DV_DYNAMIC_STENCIL_REFERENCE)) {
27457ec681f3Smrg         dynamic->stencil_reference.front = pDepthStencilState->front.reference;
27467ec681f3Smrg         dynamic->stencil_reference.back = pDepthStencilState->back.reference;
27477ec681f3Smrg      }
27487ec681f3Smrg   }
27497ec681f3Smrg
27507ec681f3Smrg   if (pColorBlendState && !(dynamic_states & V3DV_DYNAMIC_BLEND_CONSTANTS)) {
27517ec681f3Smrg      memcpy(dynamic->blend_constants, pColorBlendState->blendConstants,
27527ec681f3Smrg             sizeof(dynamic->blend_constants));
27537ec681f3Smrg   }
27547ec681f3Smrg
27557ec681f3Smrg   if (pRasterizationState) {
27567ec681f3Smrg      if (pRasterizationState->depthBiasEnable &&
27577ec681f3Smrg          !(dynamic_states & V3DV_DYNAMIC_DEPTH_BIAS)) {
27587ec681f3Smrg         dynamic->depth_bias.constant_factor =
27597ec681f3Smrg            pRasterizationState->depthBiasConstantFactor;
27607ec681f3Smrg         dynamic->depth_bias.depth_bias_clamp =
27617ec681f3Smrg            pRasterizationState->depthBiasClamp;
27627ec681f3Smrg         dynamic->depth_bias.slope_factor =
27637ec681f3Smrg            pRasterizationState->depthBiasSlopeFactor;
27647ec681f3Smrg      }
27657ec681f3Smrg      if (!(dynamic_states & V3DV_DYNAMIC_LINE_WIDTH))
27667ec681f3Smrg         dynamic->line_width = pRasterizationState->lineWidth;
27677ec681f3Smrg   }
27687ec681f3Smrg
27697ec681f3Smrg   if (pColorWriteState && !(dynamic_states & V3DV_DYNAMIC_COLOR_WRITE_ENABLE)) {
27707ec681f3Smrg      dynamic->color_write_enable = 0;
27717ec681f3Smrg      for (uint32_t i = 0; i < pColorWriteState->attachmentCount; i++)
27727ec681f3Smrg         dynamic->color_write_enable |= pColorWriteState->pColorWriteEnables[i] ? (0xfu << (i * 4)) : 0;
27737ec681f3Smrg   }
27747ec681f3Smrg
27757ec681f3Smrg   pipeline->dynamic_state.mask = dynamic_states;
27767ec681f3Smrg}
27777ec681f3Smrg
27787ec681f3Smrgstatic bool
27797ec681f3Smrgstencil_op_is_no_op(const VkStencilOpState *stencil)
27807ec681f3Smrg{
27817ec681f3Smrg   return stencil->depthFailOp == VK_STENCIL_OP_KEEP &&
27827ec681f3Smrg          stencil->compareOp == VK_COMPARE_OP_ALWAYS;
27837ec681f3Smrg}
27847ec681f3Smrg
27857ec681f3Smrgstatic void
27867ec681f3Smrgenable_depth_bias(struct v3dv_pipeline *pipeline,
27877ec681f3Smrg                  const VkPipelineRasterizationStateCreateInfo *rs_info)
27887ec681f3Smrg{
27897ec681f3Smrg   pipeline->depth_bias.enabled = false;
27907ec681f3Smrg   pipeline->depth_bias.is_z16 = false;
27917ec681f3Smrg
27927ec681f3Smrg   if (!rs_info || !rs_info->depthBiasEnable)
27937ec681f3Smrg      return;
27947ec681f3Smrg
27957ec681f3Smrg   /* Check the depth/stencil attachment description for the subpass used with
27967ec681f3Smrg    * this pipeline.
27977ec681f3Smrg    */
27987ec681f3Smrg   assert(pipeline->pass && pipeline->subpass);
27997ec681f3Smrg   struct v3dv_render_pass *pass = pipeline->pass;
28007ec681f3Smrg   struct v3dv_subpass *subpass = pipeline->subpass;
28017ec681f3Smrg
28027ec681f3Smrg   if (subpass->ds_attachment.attachment == VK_ATTACHMENT_UNUSED)
28037ec681f3Smrg      return;
28047ec681f3Smrg
28057ec681f3Smrg   assert(subpass->ds_attachment.attachment < pass->attachment_count);
28067ec681f3Smrg   struct v3dv_render_pass_attachment *att =
28077ec681f3Smrg      &pass->attachments[subpass->ds_attachment.attachment];
28087ec681f3Smrg
28097ec681f3Smrg   if (att->desc.format == VK_FORMAT_D16_UNORM)
28107ec681f3Smrg      pipeline->depth_bias.is_z16 = true;
28117ec681f3Smrg
28127ec681f3Smrg   pipeline->depth_bias.enabled = true;
28137ec681f3Smrg}
28147ec681f3Smrg
28157ec681f3Smrgstatic void
28167ec681f3Smrgpipeline_set_ez_state(struct v3dv_pipeline *pipeline,
28177ec681f3Smrg                      const VkPipelineDepthStencilStateCreateInfo *ds_info)
28187ec681f3Smrg{
28197ec681f3Smrg   if (!ds_info || !ds_info->depthTestEnable) {
28207ec681f3Smrg      pipeline->ez_state = V3D_EZ_DISABLED;
28217ec681f3Smrg      return;
28227ec681f3Smrg   }
28237ec681f3Smrg
28247ec681f3Smrg   switch (ds_info->depthCompareOp) {
28257ec681f3Smrg   case VK_COMPARE_OP_LESS:
28267ec681f3Smrg   case VK_COMPARE_OP_LESS_OR_EQUAL:
28277ec681f3Smrg      pipeline->ez_state = V3D_EZ_LT_LE;
28287ec681f3Smrg      break;
28297ec681f3Smrg   case VK_COMPARE_OP_GREATER:
28307ec681f3Smrg   case VK_COMPARE_OP_GREATER_OR_EQUAL:
28317ec681f3Smrg      pipeline->ez_state = V3D_EZ_GT_GE;
28327ec681f3Smrg      break;
28337ec681f3Smrg   case VK_COMPARE_OP_NEVER:
28347ec681f3Smrg   case VK_COMPARE_OP_EQUAL:
28357ec681f3Smrg      pipeline->ez_state = V3D_EZ_UNDECIDED;
28367ec681f3Smrg      break;
28377ec681f3Smrg   default:
28387ec681f3Smrg      pipeline->ez_state = V3D_EZ_DISABLED;
28397ec681f3Smrg      break;
28407ec681f3Smrg   }
28417ec681f3Smrg
28427ec681f3Smrg   /* If stencil is enabled and is not a no-op, we need to disable EZ */
28437ec681f3Smrg   if (ds_info->stencilTestEnable &&
28447ec681f3Smrg       (!stencil_op_is_no_op(&ds_info->front) ||
28457ec681f3Smrg        !stencil_op_is_no_op(&ds_info->back))) {
28467ec681f3Smrg         pipeline->ez_state = V3D_EZ_DISABLED;
28477ec681f3Smrg   }
28487ec681f3Smrg}
28497ec681f3Smrg
28507ec681f3Smrgstatic bool
28517ec681f3Smrgpipeline_has_integer_vertex_attrib(struct v3dv_pipeline *pipeline)
28527ec681f3Smrg{
28537ec681f3Smrg   for (uint8_t i = 0; i < pipeline->va_count; i++) {
28547ec681f3Smrg      if (vk_format_is_int(pipeline->va[i].vk_format))
28557ec681f3Smrg         return true;
28567ec681f3Smrg   }
28577ec681f3Smrg   return false;
28587ec681f3Smrg}
28597ec681f3Smrg
28607ec681f3Smrg/* @pipeline can be NULL. We assume in that case that all the attributes have
28617ec681f3Smrg * a float format (we only create an all-float BO once and we reuse it with
28627ec681f3Smrg * all float pipelines), otherwise we look at the actual type of each
28637ec681f3Smrg * attribute used with the specific pipeline passed in.
28647ec681f3Smrg */
28657ec681f3Smrgstruct v3dv_bo *
28667ec681f3Smrgv3dv_pipeline_create_default_attribute_values(struct v3dv_device *device,
28677ec681f3Smrg                                              struct v3dv_pipeline *pipeline)
28687ec681f3Smrg{
28697ec681f3Smrg   uint32_t size = MAX_VERTEX_ATTRIBS * sizeof(float) * 4;
28707ec681f3Smrg   struct v3dv_bo *bo;
28717ec681f3Smrg
28727ec681f3Smrg   bo = v3dv_bo_alloc(device, size, "default_vi_attributes", true);
28737ec681f3Smrg
28747ec681f3Smrg   if (!bo) {
28757ec681f3Smrg      fprintf(stderr, "failed to allocate memory for the default "
28767ec681f3Smrg              "attribute values\n");
28777ec681f3Smrg      return NULL;
28787ec681f3Smrg   }
28797ec681f3Smrg
28807ec681f3Smrg   bool ok = v3dv_bo_map(device, bo, size);
28817ec681f3Smrg   if (!ok) {
28827ec681f3Smrg      fprintf(stderr, "failed to map default attribute values buffer\n");
28837ec681f3Smrg      return false;
28847ec681f3Smrg   }
28857ec681f3Smrg
28867ec681f3Smrg   uint32_t *attrs = bo->map;
28877ec681f3Smrg   uint8_t va_count = pipeline != NULL ? pipeline->va_count : 0;
28887ec681f3Smrg   for (int i = 0; i < MAX_VERTEX_ATTRIBS; i++) {
28897ec681f3Smrg      attrs[i * 4 + 0] = 0;
28907ec681f3Smrg      attrs[i * 4 + 1] = 0;
28917ec681f3Smrg      attrs[i * 4 + 2] = 0;
28927ec681f3Smrg      VkFormat attr_format =
28937ec681f3Smrg         pipeline != NULL ? pipeline->va[i].vk_format : VK_FORMAT_UNDEFINED;
28947ec681f3Smrg      if (i < va_count && vk_format_is_int(attr_format)) {
28957ec681f3Smrg         attrs[i * 4 + 3] = 1;
28967ec681f3Smrg      } else {
28977ec681f3Smrg         attrs[i * 4 + 3] = fui(1.0);
28987ec681f3Smrg      }
28997ec681f3Smrg   }
29007ec681f3Smrg
29017ec681f3Smrg   v3dv_bo_unmap(device, bo);
29027ec681f3Smrg
29037ec681f3Smrg   return bo;
29047ec681f3Smrg}
29057ec681f3Smrg
29067ec681f3Smrgstatic void
29077ec681f3Smrgpipeline_set_sample_mask(struct v3dv_pipeline *pipeline,
29087ec681f3Smrg                         const VkPipelineMultisampleStateCreateInfo *ms_info)
29097ec681f3Smrg{
29107ec681f3Smrg   pipeline->sample_mask = (1 << V3D_MAX_SAMPLES) - 1;
29117ec681f3Smrg
29127ec681f3Smrg   /* Ignore pSampleMask if we are not enabling multisampling. The hardware
29137ec681f3Smrg    * requires this to be 0xf or 0x0 if using a single sample.
29147ec681f3Smrg    */
29157ec681f3Smrg   if (ms_info && ms_info->pSampleMask &&
29167ec681f3Smrg       ms_info->rasterizationSamples > VK_SAMPLE_COUNT_1_BIT) {
29177ec681f3Smrg      pipeline->sample_mask &= ms_info->pSampleMask[0];
29187ec681f3Smrg   }
29197ec681f3Smrg}
29207ec681f3Smrg
29217ec681f3Smrgstatic void
29227ec681f3Smrgpipeline_set_sample_rate_shading(struct v3dv_pipeline *pipeline,
29237ec681f3Smrg                                 const VkPipelineMultisampleStateCreateInfo *ms_info)
29247ec681f3Smrg{
29257ec681f3Smrg   pipeline->sample_rate_shading =
29267ec681f3Smrg      ms_info && ms_info->rasterizationSamples > VK_SAMPLE_COUNT_1_BIT &&
29277ec681f3Smrg      ms_info->sampleShadingEnable;
29287ec681f3Smrg}
29297ec681f3Smrg
29307ec681f3Smrgstatic VkResult
29317ec681f3Smrgpipeline_init(struct v3dv_pipeline *pipeline,
29327ec681f3Smrg              struct v3dv_device *device,
29337ec681f3Smrg              struct v3dv_pipeline_cache *cache,
29347ec681f3Smrg              const VkGraphicsPipelineCreateInfo *pCreateInfo,
29357ec681f3Smrg              const VkAllocationCallbacks *pAllocator)
29367ec681f3Smrg{
29377ec681f3Smrg   VkResult result = VK_SUCCESS;
29387ec681f3Smrg
29397ec681f3Smrg   pipeline->device = device;
29407ec681f3Smrg
29417ec681f3Smrg   V3DV_FROM_HANDLE(v3dv_pipeline_layout, layout, pCreateInfo->layout);
29427ec681f3Smrg   pipeline->layout = layout;
29437ec681f3Smrg
29447ec681f3Smrg   V3DV_FROM_HANDLE(v3dv_render_pass, render_pass, pCreateInfo->renderPass);
29457ec681f3Smrg   assert(pCreateInfo->subpass < render_pass->subpass_count);
29467ec681f3Smrg   pipeline->pass = render_pass;
29477ec681f3Smrg   pipeline->subpass = &render_pass->subpasses[pCreateInfo->subpass];
29487ec681f3Smrg
29497ec681f3Smrg   const VkPipelineInputAssemblyStateCreateInfo *ia_info =
29507ec681f3Smrg      pCreateInfo->pInputAssemblyState;
29517ec681f3Smrg   pipeline->topology = vk_to_pipe_prim_type[ia_info->topology];
29527ec681f3Smrg
29537ec681f3Smrg   /* If rasterization is not enabled, various CreateInfo structs must be
29547ec681f3Smrg    * ignored.
29557ec681f3Smrg    */
29567ec681f3Smrg   const bool raster_enabled =
29577ec681f3Smrg      !pCreateInfo->pRasterizationState->rasterizerDiscardEnable;
29587ec681f3Smrg
29597ec681f3Smrg   const VkPipelineViewportStateCreateInfo *vp_info =
29607ec681f3Smrg      raster_enabled ? pCreateInfo->pViewportState : NULL;
29617ec681f3Smrg
29627ec681f3Smrg   const VkPipelineDepthStencilStateCreateInfo *ds_info =
29637ec681f3Smrg      raster_enabled ? pCreateInfo->pDepthStencilState : NULL;
29647ec681f3Smrg
29657ec681f3Smrg   const VkPipelineRasterizationStateCreateInfo *rs_info =
29667ec681f3Smrg      raster_enabled ? pCreateInfo->pRasterizationState : NULL;
29677ec681f3Smrg
29687ec681f3Smrg   const VkPipelineRasterizationProvokingVertexStateCreateInfoEXT *pv_info =
29697ec681f3Smrg      rs_info ? vk_find_struct_const(
29707ec681f3Smrg         rs_info->pNext,
29717ec681f3Smrg         PIPELINE_RASTERIZATION_PROVOKING_VERTEX_STATE_CREATE_INFO_EXT) :
29727ec681f3Smrg            NULL;
29737ec681f3Smrg
29747ec681f3Smrg   const VkPipelineColorBlendStateCreateInfo *cb_info =
29757ec681f3Smrg      raster_enabled ? pCreateInfo->pColorBlendState : NULL;
29767ec681f3Smrg
29777ec681f3Smrg   const VkPipelineMultisampleStateCreateInfo *ms_info =
29787ec681f3Smrg      raster_enabled ? pCreateInfo->pMultisampleState : NULL;
29797ec681f3Smrg
29807ec681f3Smrg   const VkPipelineColorWriteCreateInfoEXT *cw_info =
29817ec681f3Smrg      cb_info ? vk_find_struct_const(cb_info->pNext,
29827ec681f3Smrg                                     PIPELINE_COLOR_WRITE_CREATE_INFO_EXT) :
29837ec681f3Smrg                NULL;
29847ec681f3Smrg
29857ec681f3Smrg   pipeline_init_dynamic_state(pipeline,
29867ec681f3Smrg                               pCreateInfo->pDynamicState,
29877ec681f3Smrg                               vp_info, ds_info, cb_info, rs_info, cw_info);
29887ec681f3Smrg
29897ec681f3Smrg   /* V3D 4.2 doesn't support depth bounds testing so we don't advertise that
29907ec681f3Smrg    * feature and it shouldn't be used by any pipeline.
29917ec681f3Smrg    */
29927ec681f3Smrg   assert(!ds_info || !ds_info->depthBoundsTestEnable);
29937ec681f3Smrg
29947ec681f3Smrg   v3dv_X(device, pipeline_pack_state)(pipeline, cb_info, ds_info,
29957ec681f3Smrg                                       rs_info, pv_info, ms_info);
29967ec681f3Smrg
29977ec681f3Smrg   pipeline_set_ez_state(pipeline, ds_info);
29987ec681f3Smrg   enable_depth_bias(pipeline, rs_info);
29997ec681f3Smrg   pipeline_set_sample_mask(pipeline, ms_info);
30007ec681f3Smrg   pipeline_set_sample_rate_shading(pipeline, ms_info);
30017ec681f3Smrg
30027ec681f3Smrg   pipeline->primitive_restart =
30037ec681f3Smrg      pCreateInfo->pInputAssemblyState->primitiveRestartEnable;
30047ec681f3Smrg
30057ec681f3Smrg   result = pipeline_compile_graphics(pipeline, cache, pCreateInfo, pAllocator);
30067ec681f3Smrg
30077ec681f3Smrg   if (result != VK_SUCCESS) {
30087ec681f3Smrg      /* Caller would already destroy the pipeline, and we didn't allocate any
30097ec681f3Smrg       * extra info. We don't need to do anything else.
30107ec681f3Smrg       */
30117ec681f3Smrg      return result;
30127ec681f3Smrg   }
30137ec681f3Smrg
30147ec681f3Smrg   const VkPipelineVertexInputStateCreateInfo *vi_info =
30157ec681f3Smrg      pCreateInfo->pVertexInputState;
30167ec681f3Smrg
30177ec681f3Smrg   const VkPipelineVertexInputDivisorStateCreateInfoEXT *vd_info =
30187ec681f3Smrg      vk_find_struct_const(vi_info->pNext,
30197ec681f3Smrg                           PIPELINE_VERTEX_INPUT_DIVISOR_STATE_CREATE_INFO_EXT);
30207ec681f3Smrg
30217ec681f3Smrg   v3dv_X(device, pipeline_pack_compile_state)(pipeline, vi_info, vd_info);
30227ec681f3Smrg
30237ec681f3Smrg   if (pipeline_has_integer_vertex_attrib(pipeline)) {
30247ec681f3Smrg      pipeline->default_attribute_values =
30257ec681f3Smrg         v3dv_pipeline_create_default_attribute_values(pipeline->device, pipeline);
30267ec681f3Smrg      if (!pipeline->default_attribute_values)
30277ec681f3Smrg         return VK_ERROR_OUT_OF_DEVICE_MEMORY;
30287ec681f3Smrg   } else {
30297ec681f3Smrg      pipeline->default_attribute_values = NULL;
30307ec681f3Smrg   }
30317ec681f3Smrg
30327ec681f3Smrg   return result;
30337ec681f3Smrg}
30347ec681f3Smrg
30357ec681f3Smrgstatic VkResult
30367ec681f3Smrggraphics_pipeline_create(VkDevice _device,
30377ec681f3Smrg                         VkPipelineCache _cache,
30387ec681f3Smrg                         const VkGraphicsPipelineCreateInfo *pCreateInfo,
30397ec681f3Smrg                         const VkAllocationCallbacks *pAllocator,
30407ec681f3Smrg                         VkPipeline *pPipeline)
30417ec681f3Smrg{
30427ec681f3Smrg   V3DV_FROM_HANDLE(v3dv_device, device, _device);
30437ec681f3Smrg   V3DV_FROM_HANDLE(v3dv_pipeline_cache, cache, _cache);
30447ec681f3Smrg
30457ec681f3Smrg   struct v3dv_pipeline *pipeline;
30467ec681f3Smrg   VkResult result;
30477ec681f3Smrg
30487ec681f3Smrg   /* Use the default pipeline cache if none is specified */
30497ec681f3Smrg   if (cache == NULL && device->instance->default_pipeline_cache_enabled)
30507ec681f3Smrg      cache = &device->default_pipeline_cache;
30517ec681f3Smrg
30527ec681f3Smrg   pipeline = vk_object_zalloc(&device->vk, pAllocator, sizeof(*pipeline),
30537ec681f3Smrg                               VK_OBJECT_TYPE_PIPELINE);
30547ec681f3Smrg
30557ec681f3Smrg   if (pipeline == NULL)
30567ec681f3Smrg      return vk_error(device, VK_ERROR_OUT_OF_HOST_MEMORY);
30577ec681f3Smrg
30587ec681f3Smrg   result = pipeline_init(pipeline, device, cache,
30597ec681f3Smrg                          pCreateInfo,
30607ec681f3Smrg                          pAllocator);
30617ec681f3Smrg
30627ec681f3Smrg   if (result != VK_SUCCESS) {
30637ec681f3Smrg      v3dv_destroy_pipeline(pipeline, device, pAllocator);
30647ec681f3Smrg      if (result == VK_PIPELINE_COMPILE_REQUIRED_EXT)
30657ec681f3Smrg         *pPipeline = VK_NULL_HANDLE;
30667ec681f3Smrg      return result;
30677ec681f3Smrg   }
30687ec681f3Smrg
30697ec681f3Smrg   *pPipeline = v3dv_pipeline_to_handle(pipeline);
30707ec681f3Smrg
30717ec681f3Smrg   return VK_SUCCESS;
30727ec681f3Smrg}
30737ec681f3Smrg
30747ec681f3SmrgVKAPI_ATTR VkResult VKAPI_CALL
30757ec681f3Smrgv3dv_CreateGraphicsPipelines(VkDevice _device,
30767ec681f3Smrg                             VkPipelineCache pipelineCache,
30777ec681f3Smrg                             uint32_t count,
30787ec681f3Smrg                             const VkGraphicsPipelineCreateInfo *pCreateInfos,
30797ec681f3Smrg                             const VkAllocationCallbacks *pAllocator,
30807ec681f3Smrg                             VkPipeline *pPipelines)
30817ec681f3Smrg{
30827ec681f3Smrg   V3DV_FROM_HANDLE(v3dv_device, device, _device);
30837ec681f3Smrg   VkResult result = VK_SUCCESS;
30847ec681f3Smrg
30857ec681f3Smrg   if (unlikely(V3D_DEBUG & V3D_DEBUG_SHADERS))
30867ec681f3Smrg      mtx_lock(&device->pdevice->mutex);
30877ec681f3Smrg
30887ec681f3Smrg   uint32_t i = 0;
30897ec681f3Smrg   for (; i < count; i++) {
30907ec681f3Smrg      VkResult local_result;
30917ec681f3Smrg
30927ec681f3Smrg      local_result = graphics_pipeline_create(_device,
30937ec681f3Smrg                                              pipelineCache,
30947ec681f3Smrg                                              &pCreateInfos[i],
30957ec681f3Smrg                                              pAllocator,
30967ec681f3Smrg                                              &pPipelines[i]);
30977ec681f3Smrg
30987ec681f3Smrg      if (local_result != VK_SUCCESS) {
30997ec681f3Smrg         result = local_result;
31007ec681f3Smrg         pPipelines[i] = VK_NULL_HANDLE;
31017ec681f3Smrg
31027ec681f3Smrg         if (pCreateInfos[i].flags &
31037ec681f3Smrg             VK_PIPELINE_CREATE_EARLY_RETURN_ON_FAILURE_BIT_EXT)
31047ec681f3Smrg            break;
31057ec681f3Smrg      }
31067ec681f3Smrg   }
31077ec681f3Smrg
31087ec681f3Smrg   for (; i < count; i++)
31097ec681f3Smrg      pPipelines[i] = VK_NULL_HANDLE;
31107ec681f3Smrg
31117ec681f3Smrg   if (unlikely(V3D_DEBUG & V3D_DEBUG_SHADERS))
31127ec681f3Smrg      mtx_unlock(&device->pdevice->mutex);
31137ec681f3Smrg
31147ec681f3Smrg   return result;
31157ec681f3Smrg}
31167ec681f3Smrg
31177ec681f3Smrgstatic void
31187ec681f3Smrgshared_type_info(const struct glsl_type *type, unsigned *size, unsigned *align)
31197ec681f3Smrg{
31207ec681f3Smrg   assert(glsl_type_is_vector_or_scalar(type));
31217ec681f3Smrg
31227ec681f3Smrg   uint32_t comp_size = glsl_type_is_boolean(type)
31237ec681f3Smrg      ? 4 : glsl_get_bit_size(type) / 8;
31247ec681f3Smrg   unsigned length = glsl_get_vector_elements(type);
31257ec681f3Smrg   *size = comp_size * length,
31267ec681f3Smrg   *align = comp_size * (length == 3 ? 4 : length);
31277ec681f3Smrg}
31287ec681f3Smrg
31297ec681f3Smrgstatic void
31307ec681f3Smrglower_cs_shared(struct nir_shader *nir)
31317ec681f3Smrg{
31327ec681f3Smrg   NIR_PASS_V(nir, nir_lower_vars_to_explicit_types,
31337ec681f3Smrg              nir_var_mem_shared, shared_type_info);
31347ec681f3Smrg   NIR_PASS_V(nir, nir_lower_explicit_io,
31357ec681f3Smrg              nir_var_mem_shared, nir_address_format_32bit_offset);
31367ec681f3Smrg}
31377ec681f3Smrg
31387ec681f3Smrgstatic VkResult
31397ec681f3Smrgpipeline_compile_compute(struct v3dv_pipeline *pipeline,
31407ec681f3Smrg                         struct v3dv_pipeline_cache *cache,
31417ec681f3Smrg                         const VkComputePipelineCreateInfo *info,
31427ec681f3Smrg                         const VkAllocationCallbacks *alloc)
31437ec681f3Smrg{
31447ec681f3Smrg   VkPipelineCreationFeedbackEXT pipeline_feedback = {
31457ec681f3Smrg      .flags = VK_PIPELINE_CREATION_FEEDBACK_VALID_BIT_EXT,
31467ec681f3Smrg   };
31477ec681f3Smrg   int64_t pipeline_start = os_time_get_nano();
31487ec681f3Smrg
31497ec681f3Smrg   struct v3dv_device *device = pipeline->device;
31507ec681f3Smrg   struct v3dv_physical_device *physical_device =
31517ec681f3Smrg      &device->instance->physicalDevice;
31527ec681f3Smrg
31537ec681f3Smrg   const VkPipelineShaderStageCreateInfo *sinfo = &info->stage;
31547ec681f3Smrg   gl_shader_stage stage = vk_to_mesa_shader_stage(sinfo->stage);
31557ec681f3Smrg
31567ec681f3Smrg   struct v3dv_pipeline_stage *p_stage =
31577ec681f3Smrg      vk_zalloc2(&device->vk.alloc, alloc, sizeof(*p_stage), 8,
31587ec681f3Smrg                 VK_SYSTEM_ALLOCATION_SCOPE_OBJECT);
31597ec681f3Smrg   if (!p_stage)
31607ec681f3Smrg      return VK_ERROR_OUT_OF_HOST_MEMORY;
31617ec681f3Smrg
31627ec681f3Smrg   p_stage->program_id = p_atomic_inc_return(&physical_device->next_program_id);
31637ec681f3Smrg   p_stage->pipeline = pipeline;
31647ec681f3Smrg   p_stage->stage = gl_shader_stage_to_broadcom(stage);
31657ec681f3Smrg   p_stage->entrypoint = sinfo->pName;
31667ec681f3Smrg   p_stage->module = vk_shader_module_from_handle(sinfo->module);
31677ec681f3Smrg   p_stage->spec_info = sinfo->pSpecializationInfo;
31687ec681f3Smrg   p_stage->feedback = (VkPipelineCreationFeedbackEXT) { 0 };
31697ec681f3Smrg
31707ec681f3Smrg   pipeline_hash_shader(p_stage->module,
31717ec681f3Smrg                        p_stage->entrypoint,
31727ec681f3Smrg                        stage,
31737ec681f3Smrg                        p_stage->spec_info,
31747ec681f3Smrg                        p_stage->shader_sha1);
31757ec681f3Smrg
31767ec681f3Smrg   /* We try to get directly the variant first from the cache */
31777ec681f3Smrg   p_stage->nir = NULL;
31787ec681f3Smrg
31797ec681f3Smrg   pipeline->cs = p_stage;
31807ec681f3Smrg   pipeline->active_stages |= sinfo->stage;
31817ec681f3Smrg
31827ec681f3Smrg   struct v3dv_pipeline_key pipeline_key;
31837ec681f3Smrg   pipeline_populate_compute_key(pipeline, &pipeline_key, info);
31847ec681f3Smrg   unsigned char pipeline_sha1[20];
31857ec681f3Smrg   pipeline_hash_compute(pipeline, &pipeline_key, pipeline_sha1);
31867ec681f3Smrg
31877ec681f3Smrg   bool cache_hit = false;
31887ec681f3Smrg   pipeline->shared_data =
31897ec681f3Smrg      v3dv_pipeline_cache_search_for_pipeline(cache, pipeline_sha1, &cache_hit);
31907ec681f3Smrg
31917ec681f3Smrg   if (pipeline->shared_data != NULL) {
31927ec681f3Smrg      assert(pipeline->shared_data->variants[BROADCOM_SHADER_COMPUTE]);
31937ec681f3Smrg      if (cache_hit && cache != &pipeline->device->default_pipeline_cache)
31947ec681f3Smrg         pipeline_feedback.flags |=
31957ec681f3Smrg            VK_PIPELINE_CREATION_FEEDBACK_APPLICATION_PIPELINE_CACHE_HIT_BIT_EXT;
31967ec681f3Smrg
31977ec681f3Smrg      goto success;
31987ec681f3Smrg   }
31997ec681f3Smrg
32007ec681f3Smrg   if (info->flags & VK_PIPELINE_CREATE_FAIL_ON_PIPELINE_COMPILE_REQUIRED_BIT_EXT)
32017ec681f3Smrg      return VK_PIPELINE_COMPILE_REQUIRED_EXT;
32027ec681f3Smrg
32037ec681f3Smrg   pipeline->shared_data = v3dv_pipeline_shared_data_new_empty(pipeline_sha1,
32047ec681f3Smrg                                                               pipeline,
32057ec681f3Smrg                                                               false);
32067ec681f3Smrg
32077ec681f3Smrg   p_stage->feedback.flags |= VK_PIPELINE_CREATION_FEEDBACK_VALID_BIT_EXT;
32087ec681f3Smrg
32097ec681f3Smrg   /* If not found on cache, compile it */
32107ec681f3Smrg   p_stage->nir = pipeline_stage_get_nir(p_stage, pipeline, cache);
32117ec681f3Smrg   assert(p_stage->nir);
32127ec681f3Smrg
32137ec681f3Smrg   st_nir_opts(p_stage->nir);
32147ec681f3Smrg   pipeline_lower_nir(pipeline, p_stage, pipeline->layout);
32157ec681f3Smrg   lower_cs_shared(p_stage->nir);
32167ec681f3Smrg
32177ec681f3Smrg   VkResult result = VK_SUCCESS;
32187ec681f3Smrg
32197ec681f3Smrg   struct v3d_key key;
32207ec681f3Smrg   memset(&key, 0, sizeof(key));
32217ec681f3Smrg   pipeline_populate_v3d_key(&key, p_stage, 0,
32227ec681f3Smrg                             pipeline->device->features.robustBufferAccess);
32237ec681f3Smrg   pipeline->shared_data->variants[BROADCOM_SHADER_COMPUTE] =
32247ec681f3Smrg      pipeline_compile_shader_variant(p_stage, &key, sizeof(key),
32257ec681f3Smrg                                      alloc, &result);
32267ec681f3Smrg
32277ec681f3Smrg   if (result != VK_SUCCESS)
32287ec681f3Smrg      return result;
32297ec681f3Smrg
32307ec681f3Smrg   if (!upload_assembly(pipeline))
32317ec681f3Smrg      return VK_ERROR_OUT_OF_DEVICE_MEMORY;
32327ec681f3Smrg
32337ec681f3Smrg   v3dv_pipeline_cache_upload_pipeline(pipeline, cache);
32347ec681f3Smrg
32357ec681f3Smrgsuccess:
32367ec681f3Smrg
32377ec681f3Smrg   pipeline_feedback.duration = os_time_get_nano() - pipeline_start;
32387ec681f3Smrg   write_creation_feedback(pipeline,
32397ec681f3Smrg                           info->pNext,
32407ec681f3Smrg                           &pipeline_feedback,
32417ec681f3Smrg                           1,
32427ec681f3Smrg                           &info->stage);
32437ec681f3Smrg
32447ec681f3Smrg   /* As we got the variants in pipeline->shared_data, after compiling we
32457ec681f3Smrg    * don't need the pipeline_stages
32467ec681f3Smrg    */
32477ec681f3Smrg   pipeline_free_stages(device, pipeline, alloc);
32487ec681f3Smrg
32497ec681f3Smrg   pipeline_check_spill_size(pipeline);
32507ec681f3Smrg
32517ec681f3Smrg   return VK_SUCCESS;
32527ec681f3Smrg}
32537ec681f3Smrg
32547ec681f3Smrgstatic VkResult
32557ec681f3Smrgcompute_pipeline_init(struct v3dv_pipeline *pipeline,
32567ec681f3Smrg                      struct v3dv_device *device,
32577ec681f3Smrg                      struct v3dv_pipeline_cache *cache,
32587ec681f3Smrg                      const VkComputePipelineCreateInfo *info,
32597ec681f3Smrg                      const VkAllocationCallbacks *alloc)
32607ec681f3Smrg{
32617ec681f3Smrg   V3DV_FROM_HANDLE(v3dv_pipeline_layout, layout, info->layout);
32627ec681f3Smrg
32637ec681f3Smrg   pipeline->device = device;
32647ec681f3Smrg   pipeline->layout = layout;
32657ec681f3Smrg
32667ec681f3Smrg   VkResult result = pipeline_compile_compute(pipeline, cache, info, alloc);
32677ec681f3Smrg
32687ec681f3Smrg   return result;
32697ec681f3Smrg}
32707ec681f3Smrg
32717ec681f3Smrgstatic VkResult
32727ec681f3Smrgcompute_pipeline_create(VkDevice _device,
32737ec681f3Smrg                         VkPipelineCache _cache,
32747ec681f3Smrg                         const VkComputePipelineCreateInfo *pCreateInfo,
32757ec681f3Smrg                         const VkAllocationCallbacks *pAllocator,
32767ec681f3Smrg                         VkPipeline *pPipeline)
32777ec681f3Smrg{
32787ec681f3Smrg   V3DV_FROM_HANDLE(v3dv_device, device, _device);
32797ec681f3Smrg   V3DV_FROM_HANDLE(v3dv_pipeline_cache, cache, _cache);
32807ec681f3Smrg
32817ec681f3Smrg   struct v3dv_pipeline *pipeline;
32827ec681f3Smrg   VkResult result;
32837ec681f3Smrg
32847ec681f3Smrg   /* Use the default pipeline cache if none is specified */
32857ec681f3Smrg   if (cache == NULL && device->instance->default_pipeline_cache_enabled)
32867ec681f3Smrg      cache = &device->default_pipeline_cache;
32877ec681f3Smrg
32887ec681f3Smrg   pipeline = vk_object_zalloc(&device->vk, pAllocator, sizeof(*pipeline),
32897ec681f3Smrg                               VK_OBJECT_TYPE_PIPELINE);
32907ec681f3Smrg   if (pipeline == NULL)
32917ec681f3Smrg      return vk_error(device, VK_ERROR_OUT_OF_HOST_MEMORY);
32927ec681f3Smrg
32937ec681f3Smrg   result = compute_pipeline_init(pipeline, device, cache,
32947ec681f3Smrg                                  pCreateInfo, pAllocator);
32957ec681f3Smrg   if (result != VK_SUCCESS) {
32967ec681f3Smrg      v3dv_destroy_pipeline(pipeline, device, pAllocator);
32977ec681f3Smrg      if (result == VK_PIPELINE_COMPILE_REQUIRED_EXT)
32987ec681f3Smrg         *pPipeline = VK_NULL_HANDLE;
32997ec681f3Smrg      return result;
33007ec681f3Smrg   }
33017ec681f3Smrg
33027ec681f3Smrg   *pPipeline = v3dv_pipeline_to_handle(pipeline);
33037ec681f3Smrg
33047ec681f3Smrg   return VK_SUCCESS;
33057ec681f3Smrg}
33067ec681f3Smrg
33077ec681f3SmrgVKAPI_ATTR VkResult VKAPI_CALL
33087ec681f3Smrgv3dv_CreateComputePipelines(VkDevice _device,
33097ec681f3Smrg                            VkPipelineCache pipelineCache,
33107ec681f3Smrg                            uint32_t createInfoCount,
33117ec681f3Smrg                            const VkComputePipelineCreateInfo *pCreateInfos,
33127ec681f3Smrg                            const VkAllocationCallbacks *pAllocator,
33137ec681f3Smrg                            VkPipeline *pPipelines)
33147ec681f3Smrg{
33157ec681f3Smrg   V3DV_FROM_HANDLE(v3dv_device, device, _device);
33167ec681f3Smrg   VkResult result = VK_SUCCESS;
33177ec681f3Smrg
33187ec681f3Smrg   if (unlikely(V3D_DEBUG & V3D_DEBUG_SHADERS))
33197ec681f3Smrg      mtx_lock(&device->pdevice->mutex);
33207ec681f3Smrg
33217ec681f3Smrg   uint32_t i = 0;
33227ec681f3Smrg   for (; i < createInfoCount; i++) {
33237ec681f3Smrg      VkResult local_result;
33247ec681f3Smrg      local_result = compute_pipeline_create(_device,
33257ec681f3Smrg                                              pipelineCache,
33267ec681f3Smrg                                              &pCreateInfos[i],
33277ec681f3Smrg                                              pAllocator,
33287ec681f3Smrg                                              &pPipelines[i]);
33297ec681f3Smrg
33307ec681f3Smrg      if (local_result != VK_SUCCESS) {
33317ec681f3Smrg         result = local_result;
33327ec681f3Smrg         pPipelines[i] = VK_NULL_HANDLE;
33337ec681f3Smrg
33347ec681f3Smrg         if (pCreateInfos[i].flags &
33357ec681f3Smrg             VK_PIPELINE_CREATE_EARLY_RETURN_ON_FAILURE_BIT_EXT)
33367ec681f3Smrg            break;
33377ec681f3Smrg      }
33387ec681f3Smrg   }
33397ec681f3Smrg
33407ec681f3Smrg   for (; i < createInfoCount; i++)
33417ec681f3Smrg      pPipelines[i] = VK_NULL_HANDLE;
33427ec681f3Smrg
33437ec681f3Smrg   if (unlikely(V3D_DEBUG & V3D_DEBUG_SHADERS))
33447ec681f3Smrg      mtx_unlock(&device->pdevice->mutex);
33457ec681f3Smrg
33467ec681f3Smrg   return result;
33477ec681f3Smrg}
3348