broadcom/vulkan/v3dv_pipeline.c

7ec681f3Smrg/*
7ec681f3Smrg * Copyright © 2019 Raspberry Pi
7ec681f3Smrg *
7ec681f3Smrg * Permission is hereby granted, free of charge, to any person obtaining a
7ec681f3Smrg * copy of this software and associated documentation files (the "Software"),
7ec681f3Smrg * to deal in the Software without restriction, including without limitation
7ec681f3Smrg * the rights to use, copy, modify, merge, publish, distribute, sublicense,
7ec681f3Smrg * and/or sell copies of the Software, and to permit persons to whom the
7ec681f3Smrg * Software is furnished to do so, subject to the following conditions:
7ec681f3Smrg *
7ec681f3Smrg * The above copyright notice and this permission notice (including the next
7ec681f3Smrg * paragraph) shall be included in all copies or substantial portions of the
7ec681f3Smrg * Software.
7ec681f3Smrg *
7ec681f3Smrg * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
7ec681f3Smrg * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
7ec681f3Smrg * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT.  IN NO EVENT SHALL
7ec681f3Smrg * THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
7ec681f3Smrg * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING
7ec681f3Smrg * FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS
7ec681f3Smrg * IN THE SOFTWARE.
7ec681f3Smrg */
7ec681f3Smrg
7ec681f3Smrg#include "vk_util.h"
7ec681f3Smrg
7ec681f3Smrg#include "v3dv_debug.h"
7ec681f3Smrg#include "v3dv_private.h"
7ec681f3Smrg
7ec681f3Smrg#include "vk_format_info.h"
7ec681f3Smrg
7ec681f3Smrg#include "common/v3d_debug.h"
7ec681f3Smrg
7ec681f3Smrg#include "compiler/nir/nir_builder.h"
7ec681f3Smrg#include "nir/nir_serialize.h"
7ec681f3Smrg
7ec681f3Smrg#include "util/u_atomic.h"
7ec681f3Smrg#include "util/u_prim.h"
7ec681f3Smrg#include "util/os_time.h"
7ec681f3Smrg
7ec681f3Smrg#include "vulkan/util/vk_format.h"
7ec681f3Smrg
7ec681f3Smrgstatic VkResult
7ec681f3Smrgcompute_vpm_config(struct v3dv_pipeline *pipeline);
7ec681f3Smrg
7ec681f3Smrgvoid
7ec681f3Smrgv3dv_print_v3d_key(struct v3d_key *key,
7ec681f3Smrg                   uint32_t v3d_key_size)
7ec681f3Smrg{
7ec681f3Smrg   struct mesa_sha1 ctx;
7ec681f3Smrg   unsigned char sha1[20];
7ec681f3Smrg   char sha1buf[41];
7ec681f3Smrg
7ec681f3Smrg   _mesa_sha1_init(&ctx);
7ec681f3Smrg
7ec681f3Smrg   _mesa_sha1_update(&ctx, key, v3d_key_size);
7ec681f3Smrg
7ec681f3Smrg   _mesa_sha1_final(&ctx, sha1);
7ec681f3Smrg   _mesa_sha1_format(sha1buf, sha1);
7ec681f3Smrg
7ec681f3Smrg   fprintf(stderr, "key %p: %s\n", key, sha1buf);
7ec681f3Smrg}
7ec681f3Smrg
7ec681f3Smrgstatic void
7ec681f3Smrgpipeline_compute_sha1_from_nir(nir_shader *nir,
7ec681f3Smrg                               unsigned char sha1[20])
7ec681f3Smrg{
7ec681f3Smrg   assert(nir);
7ec681f3Smrg   struct blob blob;
7ec681f3Smrg   blob_init(&blob);
7ec681f3Smrg
7ec681f3Smrg   nir_serialize(&blob, nir, false);
7ec681f3Smrg   if (!blob.out_of_memory)
7ec681f3Smrg      _mesa_sha1_compute(blob.data, blob.size, sha1);
7ec681f3Smrg
7ec681f3Smrg   blob_finish(&blob);
7ec681f3Smrg}
7ec681f3Smrg
7ec681f3Smrgvoid
7ec681f3Smrgv3dv_shader_module_internal_init(struct v3dv_device *device,
7ec681f3Smrg                                 struct vk_shader_module *module,
7ec681f3Smrg                                 nir_shader *nir)
7ec681f3Smrg{
7ec681f3Smrg   vk_object_base_init(&device->vk, &module->base,
7ec681f3Smrg                       VK_OBJECT_TYPE_SHADER_MODULE);
7ec681f3Smrg   module->nir = nir;
7ec681f3Smrg   module->size = 0;
7ec681f3Smrg
7ec681f3Smrg   pipeline_compute_sha1_from_nir(nir, module->sha1);
7ec681f3Smrg}
7ec681f3Smrg
7ec681f3Smrgvoid
7ec681f3Smrgv3dv_shader_variant_destroy(struct v3dv_device *device,
7ec681f3Smrg                            struct v3dv_shader_variant *variant)
7ec681f3Smrg{
7ec681f3Smrg   /* The assembly BO is shared by all variants in the pipeline, so it can't
7ec681f3Smrg    * be freed here and should be freed with the pipeline
7ec681f3Smrg    */
7ec681f3Smrg   ralloc_free(variant->prog_data.base);
7ec681f3Smrg   vk_free(&device->vk.alloc, variant);
7ec681f3Smrg}
7ec681f3Smrg
7ec681f3Smrgstatic void
7ec681f3Smrgdestroy_pipeline_stage(struct v3dv_device *device,
7ec681f3Smrg                       struct v3dv_pipeline_stage *p_stage,
7ec681f3Smrg                       const VkAllocationCallbacks *pAllocator)
7ec681f3Smrg{
7ec681f3Smrg   if (!p_stage)
7ec681f3Smrg      return;
7ec681f3Smrg
7ec681f3Smrg   ralloc_free(p_stage->nir);
7ec681f3Smrg   vk_free2(&device->vk.alloc, pAllocator, p_stage);
7ec681f3Smrg}
7ec681f3Smrg
7ec681f3Smrgstatic void
7ec681f3Smrgpipeline_free_stages(struct v3dv_device *device,
7ec681f3Smrg                     struct v3dv_pipeline *pipeline,
7ec681f3Smrg                     const VkAllocationCallbacks *pAllocator)
7ec681f3Smrg{
7ec681f3Smrg   assert(pipeline);
7ec681f3Smrg
7ec681f3Smrg   /* FIXME: we can't just use a loop over mesa stage due the bin, would be
7ec681f3Smrg    * good to find an alternative.
7ec681f3Smrg    */
7ec681f3Smrg   destroy_pipeline_stage(device, pipeline->vs, pAllocator);
7ec681f3Smrg   destroy_pipeline_stage(device, pipeline->vs_bin, pAllocator);
7ec681f3Smrg   destroy_pipeline_stage(device, pipeline->gs, pAllocator);
7ec681f3Smrg   destroy_pipeline_stage(device, pipeline->gs_bin, pAllocator);
7ec681f3Smrg   destroy_pipeline_stage(device, pipeline->fs, pAllocator);
7ec681f3Smrg   destroy_pipeline_stage(device, pipeline->cs, pAllocator);
7ec681f3Smrg
7ec681f3Smrg   pipeline->vs = NULL;
7ec681f3Smrg   pipeline->vs_bin = NULL;
7ec681f3Smrg   pipeline->gs = NULL;
7ec681f3Smrg   pipeline->gs_bin = NULL;
7ec681f3Smrg   pipeline->fs = NULL;
7ec681f3Smrg   pipeline->cs = NULL;
7ec681f3Smrg}
7ec681f3Smrg
7ec681f3Smrgstatic void
7ec681f3Smrgv3dv_destroy_pipeline(struct v3dv_pipeline *pipeline,
7ec681f3Smrg                      struct v3dv_device *device,
7ec681f3Smrg                      const VkAllocationCallbacks *pAllocator)
7ec681f3Smrg{
7ec681f3Smrg   if (!pipeline)
7ec681f3Smrg      return;
7ec681f3Smrg
7ec681f3Smrg   pipeline_free_stages(device, pipeline, pAllocator);
7ec681f3Smrg
7ec681f3Smrg   if (pipeline->shared_data) {
7ec681f3Smrg      v3dv_pipeline_shared_data_unref(device, pipeline->shared_data);
7ec681f3Smrg      pipeline->shared_data = NULL;
7ec681f3Smrg   }
7ec681f3Smrg
7ec681f3Smrg   if (pipeline->spill.bo) {
7ec681f3Smrg      assert(pipeline->spill.size_per_thread > 0);
7ec681f3Smrg      v3dv_bo_free(device, pipeline->spill.bo);
7ec681f3Smrg   }
7ec681f3Smrg
7ec681f3Smrg   if (pipeline->default_attribute_values) {
7ec681f3Smrg      v3dv_bo_free(device, pipeline->default_attribute_values);
7ec681f3Smrg      pipeline->default_attribute_values = NULL;
7ec681f3Smrg   }
7ec681f3Smrg
7ec681f3Smrg   vk_object_free(&device->vk, pAllocator, pipeline);
7ec681f3Smrg}
7ec681f3Smrg
7ec681f3SmrgVKAPI_ATTR void VKAPI_CALL
7ec681f3Smrgv3dv_DestroyPipeline(VkDevice _device,
7ec681f3Smrg                     VkPipeline _pipeline,
7ec681f3Smrg                     const VkAllocationCallbacks *pAllocator)
7ec681f3Smrg{
7ec681f3Smrg   V3DV_FROM_HANDLE(v3dv_device, device, _device);
7ec681f3Smrg   V3DV_FROM_HANDLE(v3dv_pipeline, pipeline, _pipeline);
7ec681f3Smrg
7ec681f3Smrg   if (!pipeline)
7ec681f3Smrg      return;
7ec681f3Smrg
7ec681f3Smrg   v3dv_destroy_pipeline(pipeline, device, pAllocator);
7ec681f3Smrg}
7ec681f3Smrg
7ec681f3Smrgstatic const struct spirv_to_nir_options default_spirv_options =  {
7ec681f3Smrg   .caps = {
7ec681f3Smrg      .device_group = true,
7ec681f3Smrg      .multiview = true,
7ec681f3Smrg      .subgroup_basic = true,
7ec681f3Smrg      .variable_pointers = true,
7ec681f3Smrg    },
7ec681f3Smrg   .ubo_addr_format = nir_address_format_32bit_index_offset,
7ec681f3Smrg   .ssbo_addr_format = nir_address_format_32bit_index_offset,
7ec681f3Smrg   .phys_ssbo_addr_format = nir_address_format_64bit_global,
7ec681f3Smrg   .push_const_addr_format = nir_address_format_logical,
7ec681f3Smrg   .shared_addr_format = nir_address_format_32bit_offset,
7ec681f3Smrg};
7ec681f3Smrg
7ec681f3Smrgconst nir_shader_compiler_options v3dv_nir_options = {
7ec681f3Smrg   .lower_uadd_sat = true,
7ec681f3Smrg   .lower_iadd_sat = true,
7ec681f3Smrg   .lower_all_io_to_temps = true,
7ec681f3Smrg   .lower_extract_byte = true,
7ec681f3Smrg   .lower_extract_word = true,
7ec681f3Smrg   .lower_insert_byte = true,
7ec681f3Smrg   .lower_insert_word = true,
7ec681f3Smrg   .lower_bitfield_insert_to_shifts = true,
7ec681f3Smrg   .lower_bitfield_extract_to_shifts = true,
7ec681f3Smrg   .lower_bitfield_reverse = true,
7ec681f3Smrg   .lower_bit_count = true,
7ec681f3Smrg   .lower_cs_local_id_from_index = true,
7ec681f3Smrg   .lower_ffract = true,
7ec681f3Smrg   .lower_fmod = true,
7ec681f3Smrg   .lower_pack_unorm_2x16 = true,
7ec681f3Smrg   .lower_pack_snorm_2x16 = true,
7ec681f3Smrg   .lower_unpack_unorm_2x16 = true,
7ec681f3Smrg   .lower_unpack_snorm_2x16 = true,
7ec681f3Smrg   .lower_pack_unorm_4x8 = true,
7ec681f3Smrg   .lower_pack_snorm_4x8 = true,
7ec681f3Smrg   .lower_unpack_unorm_4x8 = true,
7ec681f3Smrg   .lower_unpack_snorm_4x8 = true,
7ec681f3Smrg   .lower_pack_half_2x16 = true,
7ec681f3Smrg   .lower_unpack_half_2x16 = true,
7ec681f3Smrg   /* FIXME: see if we can avoid the uadd_carry and usub_borrow lowering and
7ec681f3Smrg    * get the tests to pass since it might produce slightly better code.
7ec681f3Smrg    */
7ec681f3Smrg   .lower_uadd_carry = true,
7ec681f3Smrg   .lower_usub_borrow = true,
7ec681f3Smrg   /* FIXME: check if we can use multop + umul24 to implement mul2x32_64
7ec681f3Smrg    * without lowering.
7ec681f3Smrg    */
7ec681f3Smrg   .lower_mul_2x32_64 = true,
7ec681f3Smrg   .lower_fdiv = true,
7ec681f3Smrg   .lower_find_lsb = true,
7ec681f3Smrg   .lower_ffma16 = true,
7ec681f3Smrg   .lower_ffma32 = true,
7ec681f3Smrg   .lower_ffma64 = true,
7ec681f3Smrg   .lower_flrp32 = true,
7ec681f3Smrg   .lower_fpow = true,
7ec681f3Smrg   .lower_fsat = true,
7ec681f3Smrg   .lower_fsqrt = true,
7ec681f3Smrg   .lower_ifind_msb = true,
7ec681f3Smrg   .lower_isign = true,
7ec681f3Smrg   .lower_ldexp = true,
7ec681f3Smrg   .lower_mul_high = true,
7ec681f3Smrg   .lower_wpos_pntc = true,
7ec681f3Smrg   .lower_rotate = true,
7ec681f3Smrg   .lower_to_scalar = true,
7ec681f3Smrg   .lower_device_index_to_zero = true,
7ec681f3Smrg   .has_fsub = true,
7ec681f3Smrg   .has_isub = true,
7ec681f3Smrg   .vertex_id_zero_based = false, /* FIXME: to set this to true, the intrinsic
7ec681f3Smrg                                   * needs to be supported */
7ec681f3Smrg   .lower_interpolate_at = true,
7ec681f3Smrg   .max_unroll_iterations = 16,
7ec681f3Smrg   .force_indirect_unrolling = (nir_var_shader_in | nir_var_function_temp),
7ec681f3Smrg   .divergence_analysis_options =
7ec681f3Smrg      nir_divergence_multiple_workgroup_per_compute_subgroup
7ec681f3Smrg};
7ec681f3Smrg
7ec681f3Smrgconst nir_shader_compiler_options *
7ec681f3Smrgv3dv_pipeline_get_nir_options(void)
7ec681f3Smrg{
7ec681f3Smrg   return &v3dv_nir_options;
7ec681f3Smrg}
7ec681f3Smrg
7ec681f3Smrg#define OPT(pass, ...) ({                                  \
7ec681f3Smrg   bool this_progress = false;                             \
7ec681f3Smrg   NIR_PASS(this_progress, nir, pass, ##__VA_ARGS__);      \
7ec681f3Smrg   if (this_progress)                                      \
7ec681f3Smrg      progress = true;                                     \
7ec681f3Smrg   this_progress;                                          \
7ec681f3Smrg})
7ec681f3Smrg
7ec681f3Smrgstatic void
7ec681f3Smrgnir_optimize(nir_shader *nir, bool allow_copies)
7ec681f3Smrg{
7ec681f3Smrg   bool progress;
7ec681f3Smrg
7ec681f3Smrg   do {
7ec681f3Smrg      progress = false;
7ec681f3Smrg      OPT(nir_split_array_vars, nir_var_function_temp);
7ec681f3Smrg      OPT(nir_shrink_vec_array_vars, nir_var_function_temp);
7ec681f3Smrg      OPT(nir_opt_deref);
7ec681f3Smrg      OPT(nir_lower_vars_to_ssa);
7ec681f3Smrg      if (allow_copies) {
7ec681f3Smrg         /* Only run this pass in the first call to nir_optimize.  Later calls
7ec681f3Smrg          * assume that we've lowered away any copy_deref instructions and we
7ec681f3Smrg          * don't want to introduce any more.
7ec681f3Smrg          */
7ec681f3Smrg         OPT(nir_opt_find_array_copies);
7ec681f3Smrg      }
7ec681f3Smrg      OPT(nir_opt_copy_prop_vars);
7ec681f3Smrg      OPT(nir_opt_dead_write_vars);
7ec681f3Smrg      OPT(nir_opt_combine_stores, nir_var_all);
7ec681f3Smrg
7ec681f3Smrg      OPT(nir_lower_alu_to_scalar, NULL, NULL);
7ec681f3Smrg
7ec681f3Smrg      OPT(nir_copy_prop);
7ec681f3Smrg      OPT(nir_lower_phis_to_scalar, false);
7ec681f3Smrg
7ec681f3Smrg      OPT(nir_copy_prop);
7ec681f3Smrg      OPT(nir_opt_dce);
7ec681f3Smrg      OPT(nir_opt_cse);
7ec681f3Smrg      OPT(nir_opt_combine_stores, nir_var_all);
7ec681f3Smrg
7ec681f3Smrg      /* Passing 0 to the peephole select pass causes it to convert
7ec681f3Smrg       * if-statements that contain only move instructions in the branches
7ec681f3Smrg       * regardless of the count.
7ec681f3Smrg       *
7ec681f3Smrg       * Passing 1 to the peephole select pass causes it to convert
7ec681f3Smrg       * if-statements that contain at most a single ALU instruction (total)
7ec681f3Smrg       * in both branches.
7ec681f3Smrg       */
7ec681f3Smrg      OPT(nir_opt_peephole_select, 0, false, false);
7ec681f3Smrg      OPT(nir_opt_peephole_select, 8, false, true);
7ec681f3Smrg
7ec681f3Smrg      OPT(nir_opt_intrinsics);
7ec681f3Smrg      OPT(nir_opt_idiv_const, 32);
7ec681f3Smrg      OPT(nir_opt_algebraic);
7ec681f3Smrg      OPT(nir_opt_constant_folding);
7ec681f3Smrg
7ec681f3Smrg      OPT(nir_opt_dead_cf);
7ec681f3Smrg
7ec681f3Smrg      OPT(nir_opt_if, false);
7ec681f3Smrg      OPT(nir_opt_conditional_discard);
7ec681f3Smrg
7ec681f3Smrg      OPT(nir_opt_remove_phis);
7ec681f3Smrg      OPT(nir_opt_undef);
7ec681f3Smrg      OPT(nir_lower_pack);
7ec681f3Smrg   } while (progress);
7ec681f3Smrg
7ec681f3Smrg   OPT(nir_remove_dead_variables, nir_var_function_temp, NULL);
7ec681f3Smrg}
7ec681f3Smrg
7ec681f3Smrgstatic void
7ec681f3Smrgpreprocess_nir(nir_shader *nir)
7ec681f3Smrg{
7ec681f3Smrg   /* We have to lower away local variable initializers right before we
7ec681f3Smrg    * inline functions.  That way they get properly initialized at the top
7ec681f3Smrg    * of the function and not at the top of its caller.
7ec681f3Smrg    */
7ec681f3Smrg   NIR_PASS_V(nir, nir_lower_variable_initializers, nir_var_function_temp);
7ec681f3Smrg   NIR_PASS_V(nir, nir_lower_returns);
7ec681f3Smrg   NIR_PASS_V(nir, nir_inline_functions);
7ec681f3Smrg   NIR_PASS_V(nir, nir_opt_deref);
7ec681f3Smrg
7ec681f3Smrg   /* Pick off the single entrypoint that we want */
7ec681f3Smrg   foreach_list_typed_safe(nir_function, func, node, &nir->functions) {
7ec681f3Smrg      if (func->is_entrypoint)
7ec681f3Smrg         func->name = ralloc_strdup(func, "main");
7ec681f3Smrg      else
7ec681f3Smrg         exec_node_remove(&func->node);
7ec681f3Smrg   }
7ec681f3Smrg   assert(exec_list_length(&nir->functions) == 1);
7ec681f3Smrg
7ec681f3Smrg   /* Vulkan uses the separate-shader linking model */
7ec681f3Smrg   nir->info.separate_shader = true;
7ec681f3Smrg
7ec681f3Smrg   /* Make sure we lower variable initializers on output variables so that
7ec681f3Smrg    * nir_remove_dead_variables below sees the corresponding stores
7ec681f3Smrg    */
7ec681f3Smrg   NIR_PASS_V(nir, nir_lower_variable_initializers, nir_var_shader_out);
7ec681f3Smrg
7ec681f3Smrg   /* Now that we've deleted all but the main function, we can go ahead and
7ec681f3Smrg    * lower the rest of the variable initializers.
7ec681f3Smrg    */
7ec681f3Smrg   NIR_PASS_V(nir, nir_lower_variable_initializers, ~0);
7ec681f3Smrg
7ec681f3Smrg   /* Split member structs.  We do this before lower_io_to_temporaries so that
7ec681f3Smrg    * it doesn't lower system values to temporaries by accident.
7ec681f3Smrg    */
7ec681f3Smrg   NIR_PASS_V(nir, nir_split_var_copies);
7ec681f3Smrg   NIR_PASS_V(nir, nir_split_per_member_structs);
7ec681f3Smrg
7ec681f3Smrg   if (nir->info.stage == MESA_SHADER_FRAGMENT)
7ec681f3Smrg      NIR_PASS_V(nir, nir_lower_io_to_vector, nir_var_shader_out);
7ec681f3Smrg   if (nir->info.stage == MESA_SHADER_FRAGMENT) {
7ec681f3Smrg      NIR_PASS_V(nir, nir_lower_input_attachments,
7ec681f3Smrg                 &(nir_input_attachment_options) {
7ec681f3Smrg                    .use_fragcoord_sysval = false,
7ec681f3Smrg                       });
7ec681f3Smrg   }
7ec681f3Smrg
7ec681f3Smrg   NIR_PASS_V(nir, nir_lower_explicit_io,
7ec681f3Smrg              nir_var_mem_push_const,
7ec681f3Smrg              nir_address_format_32bit_offset);
7ec681f3Smrg
7ec681f3Smrg   NIR_PASS_V(nir, nir_lower_explicit_io,
7ec681f3Smrg              nir_var_mem_ubo | nir_var_mem_ssbo,
7ec681f3Smrg              nir_address_format_32bit_index_offset);
7ec681f3Smrg
7ec681f3Smrg   NIR_PASS_V(nir, nir_remove_dead_variables, nir_var_shader_in |
7ec681f3Smrg              nir_var_shader_out | nir_var_system_value | nir_var_mem_shared,
7ec681f3Smrg              NULL);
7ec681f3Smrg
7ec681f3Smrg   NIR_PASS_V(nir, nir_propagate_invariant, false);
7ec681f3Smrg   NIR_PASS_V(nir, nir_lower_io_to_temporaries,
7ec681f3Smrg              nir_shader_get_entrypoint(nir), true, false);
7ec681f3Smrg
7ec681f3Smrg   NIR_PASS_V(nir, nir_lower_system_values);
7ec681f3Smrg   NIR_PASS_V(nir, nir_lower_clip_cull_distance_arrays);
7ec681f3Smrg
7ec681f3Smrg   NIR_PASS_V(nir, nir_lower_alu_to_scalar, NULL, NULL);
7ec681f3Smrg
7ec681f3Smrg   NIR_PASS_V(nir, nir_normalize_cubemap_coords);
7ec681f3Smrg
7ec681f3Smrg   NIR_PASS_V(nir, nir_lower_global_vars_to_local);
7ec681f3Smrg
7ec681f3Smrg   NIR_PASS_V(nir, nir_split_var_copies);
7ec681f3Smrg   NIR_PASS_V(nir, nir_split_struct_vars, nir_var_function_temp);
7ec681f3Smrg
7ec681f3Smrg   nir_optimize(nir, true);
7ec681f3Smrg
7ec681f3Smrg   NIR_PASS_V(nir, nir_lower_load_const_to_scalar);
7ec681f3Smrg
7ec681f3Smrg   /* Lower a bunch of stuff */
7ec681f3Smrg   NIR_PASS_V(nir, nir_lower_var_copies);
7ec681f3Smrg
7ec681f3Smrg   NIR_PASS_V(nir, nir_lower_indirect_derefs, nir_var_shader_in, UINT32_MAX);
7ec681f3Smrg
7ec681f3Smrg   NIR_PASS_V(nir, nir_lower_indirect_derefs,
7ec681f3Smrg              nir_var_function_temp, 2);
7ec681f3Smrg
7ec681f3Smrg   NIR_PASS_V(nir, nir_lower_array_deref_of_vec,
7ec681f3Smrg              nir_var_mem_ubo | nir_var_mem_ssbo,
7ec681f3Smrg              nir_lower_direct_array_deref_of_vec_load);
7ec681f3Smrg
7ec681f3Smrg   NIR_PASS_V(nir, nir_lower_frexp);
7ec681f3Smrg
7ec681f3Smrg   /* Get rid of split copies */
7ec681f3Smrg   nir_optimize(nir, false);
7ec681f3Smrg}
7ec681f3Smrg
7ec681f3Smrgstatic nir_shader *
7ec681f3Smrgshader_module_compile_to_nir(struct v3dv_device *device,
7ec681f3Smrg                             struct v3dv_pipeline_stage *stage)
7ec681f3Smrg{
7ec681f3Smrg   nir_shader *nir;
7ec681f3Smrg   const nir_shader_compiler_options *nir_options = &v3dv_nir_options;
7ec681f3Smrg
7ec681f3Smrg   if (!stage->module->nir) {
7ec681f3Smrg      uint32_t *spirv = (uint32_t *) stage->module->data;
7ec681f3Smrg      assert(stage->module->size % 4 == 0);
7ec681f3Smrg
7ec681f3Smrg      if (unlikely(V3D_DEBUG & V3D_DEBUG_DUMP_SPIRV))
7ec681f3Smrg         v3dv_print_spirv(stage->module->data, stage->module->size, stderr);
7ec681f3Smrg
7ec681f3Smrg      uint32_t num_spec_entries = 0;
7ec681f3Smrg      struct nir_spirv_specialization *spec_entries =
7ec681f3Smrg         vk_spec_info_to_nir_spirv(stage->spec_info, &num_spec_entries);
7ec681f3Smrg      const struct spirv_to_nir_options spirv_options = default_spirv_options;
7ec681f3Smrg      nir = spirv_to_nir(spirv, stage->module->size / 4,
7ec681f3Smrg                         spec_entries, num_spec_entries,
7ec681f3Smrg                         broadcom_shader_stage_to_gl(stage->stage),
7ec681f3Smrg                         stage->entrypoint,
7ec681f3Smrg                         &spirv_options, nir_options);
7ec681f3Smrg      assert(nir);
7ec681f3Smrg      nir_validate_shader(nir, "after spirv_to_nir");
7ec681f3Smrg      free(spec_entries);
7ec681f3Smrg   } else {
7ec681f3Smrg      /* For NIR modules created by the driver we can't consume the NIR
7ec681f3Smrg       * directly, we need to clone it first, since ownership of the NIR code
7ec681f3Smrg       * (as with SPIR-V code for SPIR-V shaders), belongs to the creator
7ec681f3Smrg       * of the module and modules can be destroyed immediately after been used
7ec681f3Smrg       * to create pipelines.
7ec681f3Smrg       */
7ec681f3Smrg      nir = nir_shader_clone(NULL, stage->module->nir);
7ec681f3Smrg      nir_validate_shader(nir, "nir module");
7ec681f3Smrg   }
7ec681f3Smrg   assert(nir->info.stage == broadcom_shader_stage_to_gl(stage->stage));
7ec681f3Smrg
7ec681f3Smrg   const struct nir_lower_sysvals_to_varyings_options sysvals_to_varyings = {
7ec681f3Smrg      .frag_coord = true,
7ec681f3Smrg      .point_coord = true,
7ec681f3Smrg   };
7ec681f3Smrg   NIR_PASS_V(nir, nir_lower_sysvals_to_varyings, &sysvals_to_varyings);
7ec681f3Smrg
7ec681f3Smrg   if (unlikely(V3D_DEBUG & (V3D_DEBUG_NIR |
7ec681f3Smrg                             v3d_debug_flag_for_shader_stage(
7ec681f3Smrg                                broadcom_shader_stage_to_gl(stage->stage))))) {
7ec681f3Smrg      fprintf(stderr, "Initial form: %s prog %d NIR:\n",
7ec681f3Smrg              broadcom_shader_stage_name(stage->stage),
7ec681f3Smrg              stage->program_id);
7ec681f3Smrg      nir_print_shader(nir, stderr);
7ec681f3Smrg      fprintf(stderr, "\n");
7ec681f3Smrg   }
7ec681f3Smrg
7ec681f3Smrg   preprocess_nir(nir);
7ec681f3Smrg
7ec681f3Smrg   return nir;
7ec681f3Smrg}
7ec681f3Smrg
7ec681f3Smrgstatic int
7ec681f3Smrgtype_size_vec4(const struct glsl_type *type, bool bindless)
7ec681f3Smrg{
7ec681f3Smrg   return glsl_count_attribute_slots(type, false);
7ec681f3Smrg}
7ec681f3Smrg
7ec681f3Smrg/* FIXME: the number of parameters for this method is somewhat big. Perhaps
7ec681f3Smrg * rethink.
7ec681f3Smrg */
7ec681f3Smrgstatic unsigned
7ec681f3Smrgdescriptor_map_add(struct v3dv_descriptor_map *map,
7ec681f3Smrg                   int set,
7ec681f3Smrg                   int binding,
7ec681f3Smrg                   int array_index,
7ec681f3Smrg                   int array_size,
7ec681f3Smrg                   uint8_t return_size)
7ec681f3Smrg{
7ec681f3Smrg   assert(array_index < array_size);
7ec681f3Smrg   assert(return_size == 16 || return_size == 32);
7ec681f3Smrg
7ec681f3Smrg   unsigned index = 0;
7ec681f3Smrg   for (unsigned i = 0; i < map->num_desc; i++) {
7ec681f3Smrg      if (set == map->set[i] &&
7ec681f3Smrg          binding == map->binding[i] &&
7ec681f3Smrg          array_index == map->array_index[i]) {
7ec681f3Smrg         assert(array_size == map->array_size[i]);
7ec681f3Smrg         if (return_size != map->return_size[index]) {
7ec681f3Smrg            /* It the return_size is different it means that the same sampler
7ec681f3Smrg             * was used for operations with different precision
7ec681f3Smrg             * requirement. In this case we need to ensure that we use the
7ec681f3Smrg             * larger one.
7ec681f3Smrg             */
7ec681f3Smrg            map->return_size[index] = 32;
7ec681f3Smrg         }
7ec681f3Smrg         return index;
7ec681f3Smrg      }
7ec681f3Smrg      index++;
7ec681f3Smrg   }
7ec681f3Smrg
7ec681f3Smrg   assert(index == map->num_desc);
7ec681f3Smrg
7ec681f3Smrg   map->set[map->num_desc] = set;
7ec681f3Smrg   map->binding[map->num_desc] = binding;
7ec681f3Smrg   map->array_index[map->num_desc] = array_index;
7ec681f3Smrg   map->array_size[map->num_desc] = array_size;
7ec681f3Smrg   map->return_size[map->num_desc] = return_size;
7ec681f3Smrg   map->num_desc++;
7ec681f3Smrg
7ec681f3Smrg   return index;
7ec681f3Smrg}
7ec681f3Smrg
7ec681f3Smrg
7ec681f3Smrgstatic void
7ec681f3Smrglower_load_push_constant(nir_builder *b, nir_intrinsic_instr *instr,
7ec681f3Smrg                         struct v3dv_pipeline *pipeline)
7ec681f3Smrg{
7ec681f3Smrg   assert(instr->intrinsic == nir_intrinsic_load_push_constant);
7ec681f3Smrg   instr->intrinsic = nir_intrinsic_load_uniform;
7ec681f3Smrg}
7ec681f3Smrg
7ec681f3Smrgstatic struct v3dv_descriptor_map*
7ec681f3Smrgpipeline_get_descriptor_map(struct v3dv_pipeline *pipeline,
7ec681f3Smrg                            VkDescriptorType desc_type,
7ec681f3Smrg                            gl_shader_stage gl_stage,
7ec681f3Smrg                            bool is_sampler)
7ec681f3Smrg{
7ec681f3Smrg   enum broadcom_shader_stage broadcom_stage =
7ec681f3Smrg      gl_shader_stage_to_broadcom(gl_stage);
7ec681f3Smrg
7ec681f3Smrg   assert(pipeline->shared_data &&
7ec681f3Smrg          pipeline->shared_data->maps[broadcom_stage]);
7ec681f3Smrg
7ec681f3Smrg   switch(desc_type) {
7ec681f3Smrg   case VK_DESCRIPTOR_TYPE_SAMPLER:
7ec681f3Smrg      return &pipeline->shared_data->maps[broadcom_stage]->sampler_map;
7ec681f3Smrg   case VK_DESCRIPTOR_TYPE_SAMPLED_IMAGE:
7ec681f3Smrg   case VK_DESCRIPTOR_TYPE_INPUT_ATTACHMENT:
7ec681f3Smrg   case VK_DESCRIPTOR_TYPE_STORAGE_IMAGE:
7ec681f3Smrg   case VK_DESCRIPTOR_TYPE_UNIFORM_TEXEL_BUFFER:
7ec681f3Smrg   case VK_DESCRIPTOR_TYPE_STORAGE_TEXEL_BUFFER:
7ec681f3Smrg      return &pipeline->shared_data->maps[broadcom_stage]->texture_map;
7ec681f3Smrg   case VK_DESCRIPTOR_TYPE_COMBINED_IMAGE_SAMPLER:
7ec681f3Smrg      return is_sampler ?
7ec681f3Smrg         &pipeline->shared_data->maps[broadcom_stage]->sampler_map :
7ec681f3Smrg         &pipeline->shared_data->maps[broadcom_stage]->texture_map;
7ec681f3Smrg   case VK_DESCRIPTOR_TYPE_UNIFORM_BUFFER:
7ec681f3Smrg      return &pipeline->shared_data->maps[broadcom_stage]->ubo_map;
7ec681f3Smrg   case VK_DESCRIPTOR_TYPE_STORAGE_BUFFER:
7ec681f3Smrg      return &pipeline->shared_data->maps[broadcom_stage]->ssbo_map;
7ec681f3Smrg   default:
7ec681f3Smrg      unreachable("Descriptor type unknown or not having a descriptor map");
7ec681f3Smrg   }
7ec681f3Smrg}
7ec681f3Smrg
7ec681f3Smrg/* Gathers info from the intrinsic (set and binding) and then lowers it so it
7ec681f3Smrg * could be used by the v3d_compiler */
7ec681f3Smrgstatic void
7ec681f3Smrglower_vulkan_resource_index(nir_builder *b,
7ec681f3Smrg                            nir_intrinsic_instr *instr,
7ec681f3Smrg                            nir_shader *shader,
7ec681f3Smrg                            struct v3dv_pipeline *pipeline,
7ec681f3Smrg                            const struct v3dv_pipeline_layout *layout)
7ec681f3Smrg{
7ec681f3Smrg   assert(instr->intrinsic == nir_intrinsic_vulkan_resource_index);
7ec681f3Smrg
7ec681f3Smrg   nir_const_value *const_val = nir_src_as_const_value(instr->src[0]);
7ec681f3Smrg
7ec681f3Smrg   unsigned set = nir_intrinsic_desc_set(instr);
7ec681f3Smrg   unsigned binding = nir_intrinsic_binding(instr);
7ec681f3Smrg   struct v3dv_descriptor_set_layout *set_layout = layout->set[set].layout;
7ec681f3Smrg   struct v3dv_descriptor_set_binding_layout *binding_layout =
7ec681f3Smrg      &set_layout->binding[binding];
7ec681f3Smrg   unsigned index = 0;
7ec681f3Smrg   const VkDescriptorType desc_type = nir_intrinsic_desc_type(instr);
7ec681f3Smrg
7ec681f3Smrg   switch (desc_type) {
7ec681f3Smrg   case VK_DESCRIPTOR_TYPE_UNIFORM_BUFFER:
7ec681f3Smrg   case VK_DESCRIPTOR_TYPE_STORAGE_BUFFER: {
7ec681f3Smrg      struct v3dv_descriptor_map *descriptor_map =
7ec681f3Smrg         pipeline_get_descriptor_map(pipeline, desc_type, shader->info.stage, false);
7ec681f3Smrg
7ec681f3Smrg      if (!const_val)
7ec681f3Smrg         unreachable("non-constant vulkan_resource_index array index");
7ec681f3Smrg
7ec681f3Smrg      index = descriptor_map_add(descriptor_map, set, binding,
7ec681f3Smrg                                 const_val->u32,
7ec681f3Smrg                                 binding_layout->array_size,
7ec681f3Smrg                                 32 /* return_size: doesn't really apply for this case */);
7ec681f3Smrg
7ec681f3Smrg      if (desc_type == VK_DESCRIPTOR_TYPE_UNIFORM_BUFFER) {
7ec681f3Smrg         /* skip index 0 which is used for push constants */
7ec681f3Smrg         index++;
7ec681f3Smrg      }
7ec681f3Smrg      break;
7ec681f3Smrg   }
7ec681f3Smrg
7ec681f3Smrg   default:
7ec681f3Smrg      unreachable("unsupported desc_type for vulkan_resource_index");
7ec681f3Smrg      break;
7ec681f3Smrg   }
7ec681f3Smrg
7ec681f3Smrg   /* Since we use the deref pass, both vulkan_resource_index and
7ec681f3Smrg    * vulkan_load_descriptor return a vec2 providing an index and
7ec681f3Smrg    * offset. Our backend compiler only cares about the index part.
7ec681f3Smrg    */
7ec681f3Smrg   nir_ssa_def_rewrite_uses(&instr->dest.ssa,
7ec681f3Smrg                            nir_imm_ivec2(b, index, 0));
7ec681f3Smrg   nir_instr_remove(&instr->instr);
7ec681f3Smrg}
7ec681f3Smrg
7ec681f3Smrg/* Returns return_size, so it could be used for the case of not having a
7ec681f3Smrg * sampler object
7ec681f3Smrg */
7ec681f3Smrgstatic uint8_t
7ec681f3Smrglower_tex_src_to_offset(nir_builder *b, nir_tex_instr *instr, unsigned src_idx,
7ec681f3Smrg                        nir_shader *shader,
7ec681f3Smrg                        struct v3dv_pipeline *pipeline,
7ec681f3Smrg                        const struct v3dv_pipeline_layout *layout)
7ec681f3Smrg{
7ec681f3Smrg   nir_ssa_def *index = NULL;
7ec681f3Smrg   unsigned base_index = 0;
7ec681f3Smrg   unsigned array_elements = 1;
7ec681f3Smrg   nir_tex_src *src = &instr->src[src_idx];
7ec681f3Smrg   bool is_sampler = src->src_type == nir_tex_src_sampler_deref;
7ec681f3Smrg
7ec681f3Smrg   /* We compute first the offsets */
7ec681f3Smrg   nir_deref_instr *deref = nir_instr_as_deref(src->src.ssa->parent_instr);
7ec681f3Smrg   while (deref->deref_type != nir_deref_type_var) {
7ec681f3Smrg      assert(deref->parent.is_ssa);
7ec681f3Smrg      nir_deref_instr *parent =
7ec681f3Smrg         nir_instr_as_deref(deref->parent.ssa->parent_instr);
7ec681f3Smrg
7ec681f3Smrg      assert(deref->deref_type == nir_deref_type_array);
7ec681f3Smrg
7ec681f3Smrg      if (nir_src_is_const(deref->arr.index) && index == NULL) {
7ec681f3Smrg         /* We're still building a direct index */
7ec681f3Smrg         base_index += nir_src_as_uint(deref->arr.index) * array_elements;
7ec681f3Smrg      } else {
7ec681f3Smrg         if (index == NULL) {
7ec681f3Smrg            /* We used to be direct but not anymore */
7ec681f3Smrg            index = nir_imm_int(b, base_index);
7ec681f3Smrg            base_index = 0;
7ec681f3Smrg         }
7ec681f3Smrg
7ec681f3Smrg         index = nir_iadd(b, index,
7ec681f3Smrg                          nir_imul(b, nir_imm_int(b, array_elements),
7ec681f3Smrg                                   nir_ssa_for_src(b, deref->arr.index, 1)));
7ec681f3Smrg      }
7ec681f3Smrg
7ec681f3Smrg      array_elements *= glsl_get_length(parent->type);
7ec681f3Smrg
7ec681f3Smrg      deref = parent;
7ec681f3Smrg   }
7ec681f3Smrg
7ec681f3Smrg   if (index)
7ec681f3Smrg      index = nir_umin(b, index, nir_imm_int(b, array_elements - 1));
7ec681f3Smrg
7ec681f3Smrg   /* We have the offsets, we apply them, rewriting the source or removing
7ec681f3Smrg    * instr if needed
7ec681f3Smrg    */
7ec681f3Smrg   if (index) {
7ec681f3Smrg      nir_instr_rewrite_src(&instr->instr, &src->src,
7ec681f3Smrg                            nir_src_for_ssa(index));
7ec681f3Smrg
7ec681f3Smrg      src->src_type = is_sampler ?
7ec681f3Smrg         nir_tex_src_sampler_offset :
7ec681f3Smrg         nir_tex_src_texture_offset;
7ec681f3Smrg   } else {
7ec681f3Smrg      nir_tex_instr_remove_src(instr, src_idx);
7ec681f3Smrg   }
7ec681f3Smrg
7ec681f3Smrg   uint32_t set = deref->var->data.descriptor_set;
7ec681f3Smrg   uint32_t binding = deref->var->data.binding;
7ec681f3Smrg   /* FIXME: this is a really simplified check for the precision to be used
7ec681f3Smrg    * for the sampling. Right now we are ony checking for the variables used
7ec681f3Smrg    * on the operation itself, but there are other cases that we could use to
7ec681f3Smrg    * infer the precision requirement.
7ec681f3Smrg    */
7ec681f3Smrg   bool relaxed_precision = deref->var->data.precision == GLSL_PRECISION_MEDIUM ||
7ec681f3Smrg                            deref->var->data.precision == GLSL_PRECISION_LOW;
7ec681f3Smrg   struct v3dv_descriptor_set_layout *set_layout = layout->set[set].layout;
7ec681f3Smrg   struct v3dv_descriptor_set_binding_layout *binding_layout =
7ec681f3Smrg      &set_layout->binding[binding];
7ec681f3Smrg
7ec681f3Smrg   /* For input attachments, the shader includes the attachment_idx. As we are
7ec681f3Smrg    * treating them as a texture, we only want the base_index
7ec681f3Smrg    */
7ec681f3Smrg   uint32_t array_index = binding_layout->type != VK_DESCRIPTOR_TYPE_INPUT_ATTACHMENT ?
7ec681f3Smrg      deref->var->data.index + base_index :
7ec681f3Smrg      base_index;
7ec681f3Smrg
7ec681f3Smrg   uint8_t return_size;
7ec681f3Smrg   if (unlikely(V3D_DEBUG & V3D_DEBUG_TMU_16BIT))
7ec681f3Smrg      return_size = 16;
7ec681f3Smrg   else  if (unlikely(V3D_DEBUG & V3D_DEBUG_TMU_32BIT))
7ec681f3Smrg      return_size = 32;
7ec681f3Smrg   else
7ec681f3Smrg      return_size = relaxed_precision || instr->is_shadow ? 16 : 32;
7ec681f3Smrg
7ec681f3Smrg   struct v3dv_descriptor_map *map =
7ec681f3Smrg      pipeline_get_descriptor_map(pipeline, binding_layout->type,
7ec681f3Smrg                                  shader->info.stage, is_sampler);
7ec681f3Smrg   int desc_index =
7ec681f3Smrg      descriptor_map_add(map,
7ec681f3Smrg                         deref->var->data.descriptor_set,
7ec681f3Smrg                         deref->var->data.binding,
7ec681f3Smrg                         array_index,
7ec681f3Smrg                         binding_layout->array_size,
7ec681f3Smrg                         return_size);
7ec681f3Smrg
7ec681f3Smrg   if (is_sampler)
7ec681f3Smrg      instr->sampler_index = desc_index;
7ec681f3Smrg   else
7ec681f3Smrg      instr->texture_index = desc_index;
7ec681f3Smrg
7ec681f3Smrg   return return_size;
7ec681f3Smrg}
7ec681f3Smrg
7ec681f3Smrgstatic bool
7ec681f3Smrglower_sampler(nir_builder *b, nir_tex_instr *instr,
7ec681f3Smrg              nir_shader *shader,
7ec681f3Smrg              struct v3dv_pipeline *pipeline,
7ec681f3Smrg              const struct v3dv_pipeline_layout *layout)
7ec681f3Smrg{
7ec681f3Smrg   uint8_t return_size = 0;
7ec681f3Smrg
7ec681f3Smrg   int texture_idx =
7ec681f3Smrg      nir_tex_instr_src_index(instr, nir_tex_src_texture_deref);
7ec681f3Smrg
7ec681f3Smrg   if (texture_idx >= 0)
7ec681f3Smrg      return_size = lower_tex_src_to_offset(b, instr, texture_idx, shader,
7ec681f3Smrg                                            pipeline, layout);
7ec681f3Smrg
7ec681f3Smrg   int sampler_idx =
7ec681f3Smrg      nir_tex_instr_src_index(instr, nir_tex_src_sampler_deref);
7ec681f3Smrg
7ec681f3Smrg   if (sampler_idx >= 0)
7ec681f3Smrg      lower_tex_src_to_offset(b, instr, sampler_idx, shader, pipeline, layout);
7ec681f3Smrg
7ec681f3Smrg   if (texture_idx < 0 && sampler_idx < 0)
7ec681f3Smrg      return false;
7ec681f3Smrg
7ec681f3Smrg   /* If we don't have a sampler, we assign it the idx we reserve for this
7ec681f3Smrg    * case, and we ensure that it is using the correct return size.
7ec681f3Smrg    */
7ec681f3Smrg   if (sampler_idx < 0) {
7ec681f3Smrg      instr->sampler_index = return_size == 16 ?
7ec681f3Smrg         V3DV_NO_SAMPLER_16BIT_IDX : V3DV_NO_SAMPLER_32BIT_IDX;
7ec681f3Smrg   }
7ec681f3Smrg
7ec681f3Smrg   return true;
7ec681f3Smrg}
7ec681f3Smrg
7ec681f3Smrg/* FIXME: really similar to lower_tex_src_to_offset, perhaps refactor? */
7ec681f3Smrgstatic void
7ec681f3Smrglower_image_deref(nir_builder *b,
7ec681f3Smrg                  nir_intrinsic_instr *instr,
7ec681f3Smrg                  nir_shader *shader,
7ec681f3Smrg                  struct v3dv_pipeline *pipeline,
7ec681f3Smrg                  const struct v3dv_pipeline_layout *layout)
7ec681f3Smrg{
7ec681f3Smrg   nir_deref_instr *deref = nir_src_as_deref(instr->src[0]);
7ec681f3Smrg   nir_ssa_def *index = NULL;
7ec681f3Smrg   unsigned array_elements = 1;
7ec681f3Smrg   unsigned base_index = 0;
7ec681f3Smrg
7ec681f3Smrg   while (deref->deref_type != nir_deref_type_var) {
7ec681f3Smrg      assert(deref->parent.is_ssa);
7ec681f3Smrg      nir_deref_instr *parent =
7ec681f3Smrg         nir_instr_as_deref(deref->parent.ssa->parent_instr);
7ec681f3Smrg
7ec681f3Smrg      assert(deref->deref_type == nir_deref_type_array);
7ec681f3Smrg
7ec681f3Smrg      if (nir_src_is_const(deref->arr.index) && index == NULL) {
7ec681f3Smrg         /* We're still building a direct index */
7ec681f3Smrg         base_index += nir_src_as_uint(deref->arr.index) * array_elements;
7ec681f3Smrg      } else {
7ec681f3Smrg         if (index == NULL) {
7ec681f3Smrg            /* We used to be direct but not anymore */
7ec681f3Smrg            index = nir_imm_int(b, base_index);
7ec681f3Smrg            base_index = 0;
7ec681f3Smrg         }
7ec681f3Smrg
7ec681f3Smrg         index = nir_iadd(b, index,
7ec681f3Smrg                          nir_imul(b, nir_imm_int(b, array_elements),
7ec681f3Smrg                                   nir_ssa_for_src(b, deref->arr.index, 1)));
7ec681f3Smrg      }
7ec681f3Smrg
7ec681f3Smrg      array_elements *= glsl_get_length(parent->type);
7ec681f3Smrg
7ec681f3Smrg      deref = parent;
7ec681f3Smrg   }
7ec681f3Smrg
7ec681f3Smrg   if (index)
7ec681f3Smrg      index = nir_umin(b, index, nir_imm_int(b, array_elements - 1));
7ec681f3Smrg
7ec681f3Smrg   uint32_t set = deref->var->data.descriptor_set;
7ec681f3Smrg   uint32_t binding = deref->var->data.binding;
7ec681f3Smrg   struct v3dv_descriptor_set_layout *set_layout = layout->set[set].layout;
7ec681f3Smrg   struct v3dv_descriptor_set_binding_layout *binding_layout =
7ec681f3Smrg      &set_layout->binding[binding];
7ec681f3Smrg
7ec681f3Smrg   uint32_t array_index = deref->var->data.index + base_index;
7ec681f3Smrg
7ec681f3Smrg   assert(binding_layout->type == VK_DESCRIPTOR_TYPE_STORAGE_IMAGE ||
7ec681f3Smrg          binding_layout->type == VK_DESCRIPTOR_TYPE_STORAGE_TEXEL_BUFFER);
7ec681f3Smrg
7ec681f3Smrg   struct v3dv_descriptor_map *map =
7ec681f3Smrg      pipeline_get_descriptor_map(pipeline, binding_layout->type,
7ec681f3Smrg                                  shader->info.stage, false);
7ec681f3Smrg
7ec681f3Smrg   int desc_index =
7ec681f3Smrg      descriptor_map_add(map,
7ec681f3Smrg                         deref->var->data.descriptor_set,
7ec681f3Smrg                         deref->var->data.binding,
7ec681f3Smrg                         array_index,
7ec681f3Smrg                         binding_layout->array_size,
7ec681f3Smrg                         32 /* return_size: doesn't apply for textures */);
7ec681f3Smrg
7ec681f3Smrg   /* Note: we don't need to do anything here in relation to the precision and
7ec681f3Smrg    * the output size because for images we can infer that info from the image
7ec681f3Smrg    * intrinsic, that includes the image format (see
7ec681f3Smrg    * NIR_INTRINSIC_FORMAT). That is done by the v3d compiler.
7ec681f3Smrg    */
7ec681f3Smrg
7ec681f3Smrg   index = nir_imm_int(b, desc_index);
7ec681f3Smrg
7ec681f3Smrg   nir_rewrite_image_intrinsic(instr, index, false);
7ec681f3Smrg}
7ec681f3Smrg
7ec681f3Smrgstatic bool
7ec681f3Smrglower_intrinsic(nir_builder *b, nir_intrinsic_instr *instr,
7ec681f3Smrg                nir_shader *shader,
7ec681f3Smrg                struct v3dv_pipeline *pipeline,
7ec681f3Smrg                const struct v3dv_pipeline_layout *layout)
7ec681f3Smrg{
7ec681f3Smrg   switch (instr->intrinsic) {
7ec681f3Smrg   case nir_intrinsic_load_layer_id:
7ec681f3Smrg      /* FIXME: if layered rendering gets supported, this would need a real
7ec681f3Smrg       * lowering
7ec681f3Smrg       */
7ec681f3Smrg      nir_ssa_def_rewrite_uses(&instr->dest.ssa,
7ec681f3Smrg                               nir_imm_int(b, 0));
7ec681f3Smrg      nir_instr_remove(&instr->instr);
7ec681f3Smrg      return true;
7ec681f3Smrg
7ec681f3Smrg   case nir_intrinsic_load_push_constant:
7ec681f3Smrg      lower_load_push_constant(b, instr, pipeline);
7ec681f3Smrg      return true;
7ec681f3Smrg
7ec681f3Smrg   case nir_intrinsic_vulkan_resource_index:
7ec681f3Smrg      lower_vulkan_resource_index(b, instr, shader, pipeline, layout);
7ec681f3Smrg      return true;
7ec681f3Smrg
7ec681f3Smrg   case nir_intrinsic_load_vulkan_descriptor: {
7ec681f3Smrg      /* Loading the descriptor happens as part of load/store instructions,
7ec681f3Smrg       * so for us this is a no-op.
7ec681f3Smrg       */
7ec681f3Smrg      nir_ssa_def_rewrite_uses(&instr->dest.ssa, instr->src[0].ssa);
7ec681f3Smrg      nir_instr_remove(&instr->instr);
7ec681f3Smrg      return true;
7ec681f3Smrg   }
7ec681f3Smrg
7ec681f3Smrg   case nir_intrinsic_image_deref_load:
7ec681f3Smrg   case nir_intrinsic_image_deref_store:
7ec681f3Smrg   case nir_intrinsic_image_deref_atomic_add:
7ec681f3Smrg   case nir_intrinsic_image_deref_atomic_imin:
7ec681f3Smrg   case nir_intrinsic_image_deref_atomic_umin:
7ec681f3Smrg   case nir_intrinsic_image_deref_atomic_imax:
7ec681f3Smrg   case nir_intrinsic_image_deref_atomic_umax:
7ec681f3Smrg   case nir_intrinsic_image_deref_atomic_and:
7ec681f3Smrg   case nir_intrinsic_image_deref_atomic_or:
7ec681f3Smrg   case nir_intrinsic_image_deref_atomic_xor:
7ec681f3Smrg   case nir_intrinsic_image_deref_atomic_exchange:
7ec681f3Smrg   case nir_intrinsic_image_deref_atomic_comp_swap:
7ec681f3Smrg   case nir_intrinsic_image_deref_size:
7ec681f3Smrg   case nir_intrinsic_image_deref_samples:
7ec681f3Smrg      lower_image_deref(b, instr, shader, pipeline, layout);
7ec681f3Smrg      return true;
7ec681f3Smrg
7ec681f3Smrg   default:
7ec681f3Smrg      return false;
7ec681f3Smrg   }
7ec681f3Smrg}
7ec681f3Smrg
7ec681f3Smrgstatic bool
7ec681f3Smrglower_impl(nir_function_impl *impl,
7ec681f3Smrg           nir_shader *shader,
7ec681f3Smrg           struct v3dv_pipeline *pipeline,
7ec681f3Smrg           const struct v3dv_pipeline_layout *layout)
7ec681f3Smrg{
7ec681f3Smrg   nir_builder b;
7ec681f3Smrg   nir_builder_init(&b, impl);
7ec681f3Smrg   bool progress = false;
7ec681f3Smrg
7ec681f3Smrg   nir_foreach_block(block, impl) {
7ec681f3Smrg      nir_foreach_instr_safe(instr, block) {
7ec681f3Smrg         b.cursor = nir_before_instr(instr);
7ec681f3Smrg         switch (instr->type) {
7ec681f3Smrg         case nir_instr_type_tex:
7ec681f3Smrg            progress |=
7ec681f3Smrg               lower_sampler(&b, nir_instr_as_tex(instr), shader, pipeline, layout);
7ec681f3Smrg            break;
7ec681f3Smrg         case nir_instr_type_intrinsic:
7ec681f3Smrg            progress |=
7ec681f3Smrg               lower_intrinsic(&b, nir_instr_as_intrinsic(instr), shader,
7ec681f3Smrg                               pipeline, layout);
7ec681f3Smrg            break;
7ec681f3Smrg         default:
7ec681f3Smrg            break;
7ec681f3Smrg         }
7ec681f3Smrg      }
7ec681f3Smrg   }
7ec681f3Smrg
7ec681f3Smrg   return progress;
7ec681f3Smrg}
7ec681f3Smrg
7ec681f3Smrgstatic bool
7ec681f3Smrglower_pipeline_layout_info(nir_shader *shader,
7ec681f3Smrg                           struct v3dv_pipeline *pipeline,
7ec681f3Smrg                           const struct v3dv_pipeline_layout *layout)
7ec681f3Smrg{
7ec681f3Smrg   bool progress = false;
7ec681f3Smrg
7ec681f3Smrg   nir_foreach_function(function, shader) {
7ec681f3Smrg      if (function->impl)
7ec681f3Smrg         progress |= lower_impl(function->impl, shader, pipeline, layout);
7ec681f3Smrg   }
7ec681f3Smrg
7ec681f3Smrg   return progress;
7ec681f3Smrg}
7ec681f3Smrg
7ec681f3Smrg
7ec681f3Smrgstatic void
7ec681f3Smrglower_fs_io(nir_shader *nir)
7ec681f3Smrg{
7ec681f3Smrg   /* Our backend doesn't handle array fragment shader outputs */
7ec681f3Smrg   NIR_PASS_V(nir, nir_lower_io_arrays_to_elements_no_indirects, false);
7ec681f3Smrg   NIR_PASS_V(nir, nir_remove_dead_variables, nir_var_shader_out, NULL);
7ec681f3Smrg
7ec681f3Smrg   nir_assign_io_var_locations(nir, nir_var_shader_in, &nir->num_inputs,
7ec681f3Smrg                               MESA_SHADER_FRAGMENT);
7ec681f3Smrg
7ec681f3Smrg   nir_assign_io_var_locations(nir, nir_var_shader_out, &nir->num_outputs,
7ec681f3Smrg                               MESA_SHADER_FRAGMENT);
7ec681f3Smrg
7ec681f3Smrg   NIR_PASS_V(nir, nir_lower_io, nir_var_shader_in | nir_var_shader_out,
7ec681f3Smrg              type_size_vec4, 0);
7ec681f3Smrg}
7ec681f3Smrg
7ec681f3Smrgstatic void
7ec681f3Smrglower_gs_io(struct nir_shader *nir)
7ec681f3Smrg{
7ec681f3Smrg   NIR_PASS_V(nir, nir_lower_io_arrays_to_elements_no_indirects, false);
7ec681f3Smrg
7ec681f3Smrg   nir_assign_io_var_locations(nir, nir_var_shader_in, &nir->num_inputs,
7ec681f3Smrg                               MESA_SHADER_GEOMETRY);
7ec681f3Smrg
7ec681f3Smrg   nir_assign_io_var_locations(nir, nir_var_shader_out, &nir->num_outputs,
7ec681f3Smrg                               MESA_SHADER_GEOMETRY);
7ec681f3Smrg}
7ec681f3Smrg
7ec681f3Smrgstatic void
7ec681f3Smrglower_vs_io(struct nir_shader *nir)
7ec681f3Smrg{
7ec681f3Smrg   NIR_PASS_V(nir, nir_lower_io_arrays_to_elements_no_indirects, false);
7ec681f3Smrg
7ec681f3Smrg   nir_assign_io_var_locations(nir, nir_var_shader_in, &nir->num_inputs,
7ec681f3Smrg                               MESA_SHADER_VERTEX);
7ec681f3Smrg
7ec681f3Smrg   nir_assign_io_var_locations(nir, nir_var_shader_out, &nir->num_outputs,
7ec681f3Smrg                               MESA_SHADER_VERTEX);
7ec681f3Smrg
7ec681f3Smrg   /* FIXME: if we call nir_lower_io, we get a crash later. Likely because it
7ec681f3Smrg    * overlaps with v3d_nir_lower_io. Need further research though.
7ec681f3Smrg    */
7ec681f3Smrg}
7ec681f3Smrg
7ec681f3Smrgstatic void
7ec681f3Smrgshader_debug_output(const char *message, void *data)
7ec681f3Smrg{
7ec681f3Smrg   /* FIXME: We probably don't want to debug anything extra here, and in fact
7ec681f3Smrg    * the compiler is not using this callback too much, only as an alternative
7ec681f3Smrg    * way to debug out the shaderdb stats, that you can already get using
7ec681f3Smrg    * V3D_DEBUG=shaderdb. Perhaps it would make sense to revisit the v3d
7ec681f3Smrg    * compiler to remove that callback.
7ec681f3Smrg    */
7ec681f3Smrg}
7ec681f3Smrg
7ec681f3Smrgstatic void
7ec681f3Smrgpipeline_populate_v3d_key(struct v3d_key *key,
7ec681f3Smrg                          const struct v3dv_pipeline_stage *p_stage,
7ec681f3Smrg                          uint32_t ucp_enables,
7ec681f3Smrg                          bool robust_buffer_access)
7ec681f3Smrg{
7ec681f3Smrg   assert(p_stage->pipeline->shared_data &&
7ec681f3Smrg          p_stage->pipeline->shared_data->maps[p_stage->stage]);
7ec681f3Smrg
7ec681f3Smrg   /* The following values are default values used at pipeline create. We use
7ec681f3Smrg    * there 32 bit as default return size.
7ec681f3Smrg    */
7ec681f3Smrg   struct v3dv_descriptor_map *sampler_map =
7ec681f3Smrg      &p_stage->pipeline->shared_data->maps[p_stage->stage]->sampler_map;
7ec681f3Smrg   struct v3dv_descriptor_map *texture_map =
7ec681f3Smrg      &p_stage->pipeline->shared_data->maps[p_stage->stage]->texture_map;
7ec681f3Smrg
7ec681f3Smrg   key->num_tex_used = texture_map->num_desc;
7ec681f3Smrg   assert(key->num_tex_used <= V3D_MAX_TEXTURE_SAMPLERS);
7ec681f3Smrg   for (uint32_t tex_idx = 0; tex_idx < texture_map->num_desc; tex_idx++) {
7ec681f3Smrg      key->tex[tex_idx].swizzle[0] = PIPE_SWIZZLE_X;
7ec681f3Smrg      key->tex[tex_idx].swizzle[1] = PIPE_SWIZZLE_Y;
7ec681f3Smrg      key->tex[tex_idx].swizzle[2] = PIPE_SWIZZLE_Z;
7ec681f3Smrg      key->tex[tex_idx].swizzle[3] = PIPE_SWIZZLE_W;
7ec681f3Smrg   }
7ec681f3Smrg
7ec681f3Smrg   key->num_samplers_used = sampler_map->num_desc;
7ec681f3Smrg   assert(key->num_samplers_used <= V3D_MAX_TEXTURE_SAMPLERS);
7ec681f3Smrg   for (uint32_t sampler_idx = 0; sampler_idx < sampler_map->num_desc;
7ec681f3Smrg        sampler_idx++) {
7ec681f3Smrg      key->sampler[sampler_idx].return_size =
7ec681f3Smrg         sampler_map->return_size[sampler_idx];
7ec681f3Smrg
7ec681f3Smrg      key->sampler[sampler_idx].return_channels =
7ec681f3Smrg         key->sampler[sampler_idx].return_size == 32 ? 4 : 2;
7ec681f3Smrg   }
7ec681f3Smrg
7ec681f3Smrg   switch (p_stage->stage) {
7ec681f3Smrg   case BROADCOM_SHADER_VERTEX:
7ec681f3Smrg   case BROADCOM_SHADER_VERTEX_BIN:
7ec681f3Smrg      key->is_last_geometry_stage = p_stage->pipeline->gs == NULL;
7ec681f3Smrg      break;
7ec681f3Smrg   case BROADCOM_SHADER_GEOMETRY:
7ec681f3Smrg   case BROADCOM_SHADER_GEOMETRY_BIN:
7ec681f3Smrg      /* FIXME: while we don't implement tessellation shaders */
7ec681f3Smrg      key->is_last_geometry_stage = true;
7ec681f3Smrg      break;
7ec681f3Smrg   case BROADCOM_SHADER_FRAGMENT:
7ec681f3Smrg   case BROADCOM_SHADER_COMPUTE:
7ec681f3Smrg      key->is_last_geometry_stage = false;
7ec681f3Smrg      break;
7ec681f3Smrg   default:
7ec681f3Smrg      unreachable("unsupported shader stage");
7ec681f3Smrg   }
7ec681f3Smrg
7ec681f3Smrg   /* Vulkan doesn't have fixed function state for user clip planes. Instead,
7ec681f3Smrg    * shaders can write to gl_ClipDistance[], in which case the SPIR-V compiler
7ec681f3Smrg    * takes care of adding a single compact array variable at
7ec681f3Smrg    * VARYING_SLOT_CLIP_DIST0, so we don't need any user clip plane lowering.
7ec681f3Smrg    *
7ec681f3Smrg    * The only lowering we are interested is specific to the fragment shader,
7ec681f3Smrg    * where we want to emit discards to honor writes to gl_ClipDistance[] in
7ec681f3Smrg    * previous stages. This is done via nir_lower_clip_fs() so we only set up
7ec681f3Smrg    * the ucp enable mask for that stage.
7ec681f3Smrg    */
7ec681f3Smrg   key->ucp_enables = ucp_enables;
7ec681f3Smrg
7ec681f3Smrg   key->robust_buffer_access = robust_buffer_access;
7ec681f3Smrg
7ec681f3Smrg   key->environment = V3D_ENVIRONMENT_VULKAN;
7ec681f3Smrg}
7ec681f3Smrg
7ec681f3Smrg/* FIXME: anv maps to hw primitive type. Perhaps eventually we would do the
7ec681f3Smrg * same. For not using prim_mode that is the one already used on v3d
7ec681f3Smrg */
7ec681f3Smrgstatic const enum pipe_prim_type vk_to_pipe_prim_type[] = {
7ec681f3Smrg   [VK_PRIMITIVE_TOPOLOGY_POINT_LIST] = PIPE_PRIM_POINTS,
7ec681f3Smrg   [VK_PRIMITIVE_TOPOLOGY_LINE_LIST] = PIPE_PRIM_LINES,
7ec681f3Smrg   [VK_PRIMITIVE_TOPOLOGY_LINE_STRIP] = PIPE_PRIM_LINE_STRIP,
7ec681f3Smrg   [VK_PRIMITIVE_TOPOLOGY_TRIANGLE_LIST] = PIPE_PRIM_TRIANGLES,
7ec681f3Smrg   [VK_PRIMITIVE_TOPOLOGY_TRIANGLE_STRIP] = PIPE_PRIM_TRIANGLE_STRIP,
7ec681f3Smrg   [VK_PRIMITIVE_TOPOLOGY_TRIANGLE_FAN] = PIPE_PRIM_TRIANGLE_FAN,
7ec681f3Smrg   [VK_PRIMITIVE_TOPOLOGY_LINE_LIST_WITH_ADJACENCY] = PIPE_PRIM_LINES_ADJACENCY,
7ec681f3Smrg   [VK_PRIMITIVE_TOPOLOGY_LINE_STRIP_WITH_ADJACENCY] = PIPE_PRIM_LINE_STRIP_ADJACENCY,
7ec681f3Smrg   [VK_PRIMITIVE_TOPOLOGY_TRIANGLE_LIST_WITH_ADJACENCY] = PIPE_PRIM_TRIANGLES_ADJACENCY,
7ec681f3Smrg   [VK_PRIMITIVE_TOPOLOGY_TRIANGLE_STRIP_WITH_ADJACENCY] = PIPE_PRIM_TRIANGLE_STRIP_ADJACENCY,
7ec681f3Smrg};
7ec681f3Smrg
7ec681f3Smrgstatic const enum pipe_logicop vk_to_pipe_logicop[] = {
7ec681f3Smrg   [VK_LOGIC_OP_CLEAR] = PIPE_LOGICOP_CLEAR,
7ec681f3Smrg   [VK_LOGIC_OP_AND] = PIPE_LOGICOP_AND,
7ec681f3Smrg   [VK_LOGIC_OP_AND_REVERSE] = PIPE_LOGICOP_AND_REVERSE,
7ec681f3Smrg   [VK_LOGIC_OP_COPY] = PIPE_LOGICOP_COPY,
7ec681f3Smrg   [VK_LOGIC_OP_AND_INVERTED] = PIPE_LOGICOP_AND_INVERTED,
7ec681f3Smrg   [VK_LOGIC_OP_NO_OP] = PIPE_LOGICOP_NOOP,
7ec681f3Smrg   [VK_LOGIC_OP_XOR] = PIPE_LOGICOP_XOR,
7ec681f3Smrg   [VK_LOGIC_OP_OR] = PIPE_LOGICOP_OR,
7ec681f3Smrg   [VK_LOGIC_OP_NOR] = PIPE_LOGICOP_NOR,
7ec681f3Smrg   [VK_LOGIC_OP_EQUIVALENT] = PIPE_LOGICOP_EQUIV,
7ec681f3Smrg   [VK_LOGIC_OP_INVERT] = PIPE_LOGICOP_INVERT,
7ec681f3Smrg   [VK_LOGIC_OP_OR_REVERSE] = PIPE_LOGICOP_OR_REVERSE,
7ec681f3Smrg   [VK_LOGIC_OP_COPY_INVERTED] = PIPE_LOGICOP_COPY_INVERTED,
7ec681f3Smrg   [VK_LOGIC_OP_OR_INVERTED] = PIPE_LOGICOP_OR_INVERTED,
7ec681f3Smrg   [VK_LOGIC_OP_NAND] = PIPE_LOGICOP_NAND,
7ec681f3Smrg   [VK_LOGIC_OP_SET] = PIPE_LOGICOP_SET,
7ec681f3Smrg};
7ec681f3Smrg
7ec681f3Smrgstatic void
7ec681f3Smrgpipeline_populate_v3d_fs_key(struct v3d_fs_key *key,
7ec681f3Smrg                             const VkGraphicsPipelineCreateInfo *pCreateInfo,
7ec681f3Smrg                             const struct v3dv_pipeline_stage *p_stage,
7ec681f3Smrg                             bool has_geometry_shader,
7ec681f3Smrg                             uint32_t ucp_enables)
7ec681f3Smrg{
7ec681f3Smrg   assert(p_stage->stage == BROADCOM_SHADER_FRAGMENT);
7ec681f3Smrg
7ec681f3Smrg   memset(key, 0, sizeof(*key));
7ec681f3Smrg
7ec681f3Smrg   const bool rba = p_stage->pipeline->device->features.robustBufferAccess;
7ec681f3Smrg   pipeline_populate_v3d_key(&key->base, p_stage, ucp_enables, rba);
7ec681f3Smrg
7ec681f3Smrg   const VkPipelineInputAssemblyStateCreateInfo *ia_info =
7ec681f3Smrg      pCreateInfo->pInputAssemblyState;
7ec681f3Smrg   uint8_t topology = vk_to_pipe_prim_type[ia_info->topology];
7ec681f3Smrg
7ec681f3Smrg   key->is_points = (topology == PIPE_PRIM_POINTS);
7ec681f3Smrg   key->is_lines = (topology >= PIPE_PRIM_LINES &&
7ec681f3Smrg                    topology <= PIPE_PRIM_LINE_STRIP);
7ec681f3Smrg   key->has_gs = has_geometry_shader;
7ec681f3Smrg
7ec681f3Smrg   const VkPipelineColorBlendStateCreateInfo *cb_info =
7ec681f3Smrg      !pCreateInfo->pRasterizationState->rasterizerDiscardEnable ?
7ec681f3Smrg      pCreateInfo->pColorBlendState : NULL;
7ec681f3Smrg
7ec681f3Smrg   key->logicop_func = cb_info && cb_info->logicOpEnable == VK_TRUE ?
7ec681f3Smrg                       vk_to_pipe_logicop[cb_info->logicOp] :
7ec681f3Smrg                       PIPE_LOGICOP_COPY;
7ec681f3Smrg
7ec681f3Smrg   const bool raster_enabled =
7ec681f3Smrg      !pCreateInfo->pRasterizationState->rasterizerDiscardEnable;
7ec681f3Smrg
7ec681f3Smrg   /* Multisample rasterization state must be ignored if rasterization
7ec681f3Smrg    * is disabled.
7ec681f3Smrg    */
7ec681f3Smrg   const VkPipelineMultisampleStateCreateInfo *ms_info =
7ec681f3Smrg      raster_enabled ? pCreateInfo->pMultisampleState : NULL;
7ec681f3Smrg   if (ms_info) {
7ec681f3Smrg      assert(ms_info->rasterizationSamples == VK_SAMPLE_COUNT_1_BIT ||
7ec681f3Smrg             ms_info->rasterizationSamples == VK_SAMPLE_COUNT_4_BIT);
7ec681f3Smrg      key->msaa = ms_info->rasterizationSamples > VK_SAMPLE_COUNT_1_BIT;
7ec681f3Smrg
7ec681f3Smrg      if (key->msaa) {
7ec681f3Smrg         key->sample_coverage =
7ec681f3Smrg            p_stage->pipeline->sample_mask != (1 << V3D_MAX_SAMPLES) - 1;
7ec681f3Smrg         key->sample_alpha_to_coverage = ms_info->alphaToCoverageEnable;
7ec681f3Smrg         key->sample_alpha_to_one = ms_info->alphaToOneEnable;
7ec681f3Smrg      }
7ec681f3Smrg   }
7ec681f3Smrg
7ec681f3Smrg   /* This is intended for V3D versions before 4.1, otherwise we just use the
7ec681f3Smrg    * tile buffer load/store swap R/B bit.
7ec681f3Smrg    */
7ec681f3Smrg   key->swap_color_rb = 0;
7ec681f3Smrg
7ec681f3Smrg   const struct v3dv_render_pass *pass =
7ec681f3Smrg      v3dv_render_pass_from_handle(pCreateInfo->renderPass);
7ec681f3Smrg   const struct v3dv_subpass *subpass = p_stage->pipeline->subpass;
7ec681f3Smrg   for (uint32_t i = 0; i < subpass->color_count; i++) {
7ec681f3Smrg      const uint32_t att_idx = subpass->color_attachments[i].attachment;
7ec681f3Smrg      if (att_idx == VK_ATTACHMENT_UNUSED)
7ec681f3Smrg         continue;
7ec681f3Smrg
7ec681f3Smrg      key->cbufs |= 1 << i;
7ec681f3Smrg
7ec681f3Smrg      VkFormat fb_format = pass->attachments[att_idx].desc.format;
7ec681f3Smrg      enum pipe_format fb_pipe_format = vk_format_to_pipe_format(fb_format);
7ec681f3Smrg
7ec681f3Smrg      /* If logic operations are enabled then we might emit color reads and we
7ec681f3Smrg       * need to know the color buffer format and swizzle for that
7ec681f3Smrg       */
7ec681f3Smrg      if (key->logicop_func != PIPE_LOGICOP_COPY) {
7ec681f3Smrg         key->color_fmt[i].format = fb_pipe_format;
7ec681f3Smrg         key->color_fmt[i].swizzle =
7ec681f3Smrg            v3dv_get_format_swizzle(p_stage->pipeline->device, fb_format);
7ec681f3Smrg      }
7ec681f3Smrg
7ec681f3Smrg      const struct util_format_description *desc =
7ec681f3Smrg         vk_format_description(fb_format);
7ec681f3Smrg
7ec681f3Smrg      if (desc->channel[0].type == UTIL_FORMAT_TYPE_FLOAT &&
7ec681f3Smrg          desc->channel[0].size == 32) {
7ec681f3Smrg         key->f32_color_rb |= 1 << i;
7ec681f3Smrg      }
7ec681f3Smrg
7ec681f3Smrg      if (p_stage->nir->info.fs.untyped_color_outputs) {
7ec681f3Smrg         if (util_format_is_pure_uint(fb_pipe_format))
7ec681f3Smrg            key->uint_color_rb |= 1 << i;
7ec681f3Smrg         else if (util_format_is_pure_sint(fb_pipe_format))
7ec681f3Smrg            key->int_color_rb |= 1 << i;
7ec681f3Smrg      }
7ec681f3Smrg
7ec681f3Smrg      if (key->is_points) {
7ec681f3Smrg         /* FIXME: The mask would need to be computed based on the shader
7ec681f3Smrg          * inputs. On gallium it is done at st_atom_rasterizer
7ec681f3Smrg          * (sprite_coord_enable). anv seems (need to confirm) to do that on
7ec681f3Smrg          * genX_pipeline (PointSpriteTextureCoordinateEnable). Would be also
7ec681f3Smrg          * better to have tests to guide filling the mask.
7ec681f3Smrg          */
7ec681f3Smrg         key->point_sprite_mask = 0;
7ec681f3Smrg
7ec681f3Smrg         /* Vulkan mandates upper left. */
7ec681f3Smrg         key->point_coord_upper_left = true;
7ec681f3Smrg      }
7ec681f3Smrg   }
7ec681f3Smrg}
7ec681f3Smrg
7ec681f3Smrgstatic void
7ec681f3Smrgsetup_stage_outputs_from_next_stage_inputs(
7ec681f3Smrg   uint8_t next_stage_num_inputs,
7ec681f3Smrg   struct v3d_varying_slot *next_stage_input_slots,
7ec681f3Smrg   uint8_t *num_used_outputs,
7ec681f3Smrg   struct v3d_varying_slot *used_output_slots,
7ec681f3Smrg   uint32_t size_of_used_output_slots)
7ec681f3Smrg{
7ec681f3Smrg   *num_used_outputs = next_stage_num_inputs;
7ec681f3Smrg   memcpy(used_output_slots, next_stage_input_slots, size_of_used_output_slots);
7ec681f3Smrg}
7ec681f3Smrg
7ec681f3Smrgstatic void
7ec681f3Smrgpipeline_populate_v3d_gs_key(struct v3d_gs_key *key,
7ec681f3Smrg                             const VkGraphicsPipelineCreateInfo *pCreateInfo,
7ec681f3Smrg                             const struct v3dv_pipeline_stage *p_stage)
7ec681f3Smrg{
7ec681f3Smrg   assert(p_stage->stage == BROADCOM_SHADER_GEOMETRY ||
7ec681f3Smrg          p_stage->stage == BROADCOM_SHADER_GEOMETRY_BIN);
7ec681f3Smrg
7ec681f3Smrg   memset(key, 0, sizeof(*key));
7ec681f3Smrg
7ec681f3Smrg   const bool rba = p_stage->pipeline->device->features.robustBufferAccess;
7ec681f3Smrg   pipeline_populate_v3d_key(&key->base, p_stage, 0, rba);
7ec681f3Smrg
7ec681f3Smrg   struct v3dv_pipeline *pipeline = p_stage->pipeline;
7ec681f3Smrg
7ec681f3Smrg   key->per_vertex_point_size =
7ec681f3Smrg      p_stage->nir->info.outputs_written & (1ull << VARYING_SLOT_PSIZ);
7ec681f3Smrg
7ec681f3Smrg   key->is_coord = broadcom_shader_stage_is_binning(p_stage->stage);
7ec681f3Smrg
7ec681f3Smrg   assert(key->base.is_last_geometry_stage);
7ec681f3Smrg   if (key->is_coord) {
7ec681f3Smrg      /* Output varyings in the last binning shader are only used for transform
7ec681f3Smrg       * feedback. Set to 0 as VK_EXT_transform_feedback is not supported.
7ec681f3Smrg       */
7ec681f3Smrg      key->num_used_outputs = 0;
7ec681f3Smrg   } else {
7ec681f3Smrg      struct v3dv_shader_variant *fs_variant =
7ec681f3Smrg         pipeline->shared_data->variants[BROADCOM_SHADER_FRAGMENT];
7ec681f3Smrg
7ec681f3Smrg      STATIC_ASSERT(sizeof(key->used_outputs) ==
7ec681f3Smrg                    sizeof(fs_variant->prog_data.fs->input_slots));
7ec681f3Smrg
7ec681f3Smrg      setup_stage_outputs_from_next_stage_inputs(
7ec681f3Smrg         fs_variant->prog_data.fs->num_inputs,
7ec681f3Smrg         fs_variant->prog_data.fs->input_slots,
7ec681f3Smrg         &key->num_used_outputs,
7ec681f3Smrg         key->used_outputs,
7ec681f3Smrg         sizeof(key->used_outputs));
7ec681f3Smrg   }
7ec681f3Smrg}
7ec681f3Smrg
7ec681f3Smrgstatic void
7ec681f3Smrgpipeline_populate_v3d_vs_key(struct v3d_vs_key *key,
7ec681f3Smrg                             const VkGraphicsPipelineCreateInfo *pCreateInfo,
7ec681f3Smrg                             const struct v3dv_pipeline_stage *p_stage)
7ec681f3Smrg{
7ec681f3Smrg   assert(p_stage->stage == BROADCOM_SHADER_VERTEX ||
7ec681f3Smrg          p_stage->stage == BROADCOM_SHADER_VERTEX_BIN);
7ec681f3Smrg
7ec681f3Smrg   memset(key, 0, sizeof(*key));
7ec681f3Smrg
7ec681f3Smrg   const bool rba = p_stage->pipeline->device->features.robustBufferAccess;
7ec681f3Smrg   pipeline_populate_v3d_key(&key->base, p_stage, 0, rba);
7ec681f3Smrg
7ec681f3Smrg   struct v3dv_pipeline *pipeline = p_stage->pipeline;
7ec681f3Smrg
7ec681f3Smrg   /* Vulkan specifies a point size per vertex, so true for if the prim are
7ec681f3Smrg    * points, like on ES2)
7ec681f3Smrg    */
7ec681f3Smrg   const VkPipelineInputAssemblyStateCreateInfo *ia_info =
7ec681f3Smrg      pCreateInfo->pInputAssemblyState;
7ec681f3Smrg   uint8_t topology = vk_to_pipe_prim_type[ia_info->topology];
7ec681f3Smrg
7ec681f3Smrg   /* FIXME: PRIM_POINTS is not enough, in gallium the full check is
7ec681f3Smrg    * PIPE_PRIM_POINTS && v3d->rasterizer->base.point_size_per_vertex */
7ec681f3Smrg   key->per_vertex_point_size = (topology == PIPE_PRIM_POINTS);
7ec681f3Smrg
7ec681f3Smrg   key->is_coord = broadcom_shader_stage_is_binning(p_stage->stage);
7ec681f3Smrg
7ec681f3Smrg   if (key->is_coord) { /* Binning VS*/
7ec681f3Smrg      if (key->base.is_last_geometry_stage) {
7ec681f3Smrg         /* Output varyings in the last binning shader are only used for
7ec681f3Smrg          * transform feedback. Set to 0 as VK_EXT_transform_feedback is not
7ec681f3Smrg          * supported.
7ec681f3Smrg          */
7ec681f3Smrg         key->num_used_outputs = 0;
7ec681f3Smrg      } else {
7ec681f3Smrg         /* Linking against GS binning program */
7ec681f3Smrg         assert(pipeline->gs);
7ec681f3Smrg         struct v3dv_shader_variant *gs_bin_variant =
7ec681f3Smrg            pipeline->shared_data->variants[BROADCOM_SHADER_GEOMETRY_BIN];
7ec681f3Smrg
7ec681f3Smrg         STATIC_ASSERT(sizeof(key->used_outputs) ==
7ec681f3Smrg                       sizeof(gs_bin_variant->prog_data.gs->input_slots));
7ec681f3Smrg
7ec681f3Smrg         setup_stage_outputs_from_next_stage_inputs(
7ec681f3Smrg            gs_bin_variant->prog_data.gs->num_inputs,
7ec681f3Smrg            gs_bin_variant->prog_data.gs->input_slots,
7ec681f3Smrg            &key->num_used_outputs,
7ec681f3Smrg            key->used_outputs,
7ec681f3Smrg            sizeof(key->used_outputs));
7ec681f3Smrg      }
7ec681f3Smrg   } else { /* Render VS */
7ec681f3Smrg      if (pipeline->gs) {
7ec681f3Smrg         /* Linking against GS render program */
7ec681f3Smrg         struct v3dv_shader_variant *gs_variant =
7ec681f3Smrg            pipeline->shared_data->variants[BROADCOM_SHADER_GEOMETRY];
7ec681f3Smrg
7ec681f3Smrg         STATIC_ASSERT(sizeof(key->used_outputs) ==
7ec681f3Smrg                       sizeof(gs_variant->prog_data.gs->input_slots));
7ec681f3Smrg
7ec681f3Smrg         setup_stage_outputs_from_next_stage_inputs(
7ec681f3Smrg            gs_variant->prog_data.gs->num_inputs,
7ec681f3Smrg            gs_variant->prog_data.gs->input_slots,
7ec681f3Smrg            &key->num_used_outputs,
7ec681f3Smrg            key->used_outputs,
7ec681f3Smrg            sizeof(key->used_outputs));
7ec681f3Smrg      } else {
7ec681f3Smrg         /* Linking against FS program */
7ec681f3Smrg         struct v3dv_shader_variant *fs_variant =
7ec681f3Smrg            pipeline->shared_data->variants[BROADCOM_SHADER_FRAGMENT];
7ec681f3Smrg
7ec681f3Smrg         STATIC_ASSERT(sizeof(key->used_outputs) ==
7ec681f3Smrg                       sizeof(fs_variant->prog_data.fs->input_slots));
7ec681f3Smrg
7ec681f3Smrg         setup_stage_outputs_from_next_stage_inputs(
7ec681f3Smrg            fs_variant->prog_data.fs->num_inputs,
7ec681f3Smrg            fs_variant->prog_data.fs->input_slots,
7ec681f3Smrg            &key->num_used_outputs,
7ec681f3Smrg            key->used_outputs,
7ec681f3Smrg            sizeof(key->used_outputs));
7ec681f3Smrg      }
7ec681f3Smrg   }
7ec681f3Smrg
7ec681f3Smrg   const VkPipelineVertexInputStateCreateInfo *vi_info =
7ec681f3Smrg      pCreateInfo->pVertexInputState;
7ec681f3Smrg   for (uint32_t i = 0; i < vi_info->vertexAttributeDescriptionCount; i++) {
7ec681f3Smrg      const VkVertexInputAttributeDescription *desc =
7ec681f3Smrg         &vi_info->pVertexAttributeDescriptions[i];
7ec681f3Smrg      assert(desc->location < MAX_VERTEX_ATTRIBS);
7ec681f3Smrg      if (desc->format == VK_FORMAT_B8G8R8A8_UNORM)
7ec681f3Smrg         key->va_swap_rb_mask |= 1 << (VERT_ATTRIB_GENERIC0 + desc->location);
7ec681f3Smrg   }
7ec681f3Smrg}
7ec681f3Smrg
7ec681f3Smrg/**
7ec681f3Smrg * Creates the initial form of the pipeline stage for a binning shader by
7ec681f3Smrg * cloning the render shader and flagging it as a coordinate shader.
7ec681f3Smrg *
7ec681f3Smrg * Returns NULL if it was not able to allocate the object, so it should be
7ec681f3Smrg * handled as a VK_ERROR_OUT_OF_HOST_MEMORY error.
7ec681f3Smrg */
7ec681f3Smrgstatic struct v3dv_pipeline_stage *
7ec681f3Smrgpipeline_stage_create_binning(const struct v3dv_pipeline_stage *src,
7ec681f3Smrg                              const VkAllocationCallbacks *pAllocator)
7ec681f3Smrg{
7ec681f3Smrg   struct v3dv_device *device = src->pipeline->device;
7ec681f3Smrg
7ec681f3Smrg   struct v3dv_pipeline_stage *p_stage =
7ec681f3Smrg      vk_zalloc2(&device->vk.alloc, pAllocator, sizeof(*p_stage), 8,
7ec681f3Smrg                 VK_SYSTEM_ALLOCATION_SCOPE_OBJECT);
7ec681f3Smrg
7ec681f3Smrg   if (p_stage == NULL)
7ec681f3Smrg      return NULL;
7ec681f3Smrg
7ec681f3Smrg   assert(src->stage == BROADCOM_SHADER_VERTEX ||
7ec681f3Smrg          src->stage == BROADCOM_SHADER_GEOMETRY);
7ec681f3Smrg
7ec681f3Smrg   enum broadcom_shader_stage bin_stage =
7ec681f3Smrg      src->stage == BROADCOM_SHADER_VERTEX ?
7ec681f3Smrg         BROADCOM_SHADER_VERTEX_BIN :
7ec681f3Smrg         BROADCOM_SHADER_GEOMETRY_BIN;
7ec681f3Smrg
7ec681f3Smrg   p_stage->pipeline = src->pipeline;
7ec681f3Smrg   p_stage->stage = bin_stage;
7ec681f3Smrg   p_stage->entrypoint = src->entrypoint;
7ec681f3Smrg   p_stage->module = src->module;
7ec681f3Smrg   /* For binning shaders we will clone the NIR code from the corresponding
7ec681f3Smrg    * render shader later, when we call pipeline_compile_xxx_shader. This way
7ec681f3Smrg    * we only have to run the relevant NIR lowerings once for render shaders
7ec681f3Smrg    */
7ec681f3Smrg   p_stage->nir = NULL;
7ec681f3Smrg   p_stage->spec_info = src->spec_info;
7ec681f3Smrg   p_stage->feedback = (VkPipelineCreationFeedbackEXT) { 0 };
7ec681f3Smrg   memcpy(p_stage->shader_sha1, src->shader_sha1, 20);
7ec681f3Smrg
7ec681f3Smrg   return p_stage;
7ec681f3Smrg}
7ec681f3Smrg
7ec681f3Smrg/**
7ec681f3Smrg * Returns false if it was not able to allocate or map the assembly bo memory.
7ec681f3Smrg */
7ec681f3Smrgstatic bool
7ec681f3Smrgupload_assembly(struct v3dv_pipeline *pipeline)
7ec681f3Smrg{
7ec681f3Smrg   uint32_t total_size = 0;
7ec681f3Smrg   for (uint8_t stage = 0; stage < BROADCOM_SHADER_STAGES; stage++) {
7ec681f3Smrg      struct v3dv_shader_variant *variant =
7ec681f3Smrg         pipeline->shared_data->variants[stage];
7ec681f3Smrg
7ec681f3Smrg      if (variant != NULL)
7ec681f3Smrg         total_size += variant->qpu_insts_size;
7ec681f3Smrg   }
7ec681f3Smrg
7ec681f3Smrg   struct v3dv_bo *bo = v3dv_bo_alloc(pipeline->device, total_size,
7ec681f3Smrg                                      "pipeline shader assembly", true);
7ec681f3Smrg   if (!bo) {
7ec681f3Smrg      fprintf(stderr, "failed to allocate memory for shader\n");
7ec681f3Smrg      return false;
7ec681f3Smrg   }
7ec681f3Smrg
7ec681f3Smrg   bool ok = v3dv_bo_map(pipeline->device, bo, total_size);
7ec681f3Smrg   if (!ok) {
7ec681f3Smrg      fprintf(stderr, "failed to map source shader buffer\n");
7ec681f3Smrg      return false;
7ec681f3Smrg   }
7ec681f3Smrg
7ec681f3Smrg   uint32_t offset = 0;
7ec681f3Smrg   for (uint8_t stage = 0; stage < BROADCOM_SHADER_STAGES; stage++) {
7ec681f3Smrg      struct v3dv_shader_variant *variant =
7ec681f3Smrg         pipeline->shared_data->variants[stage];
7ec681f3Smrg
7ec681f3Smrg      if (variant != NULL) {
7ec681f3Smrg         variant->assembly_offset = offset;
7ec681f3Smrg
7ec681f3Smrg         memcpy(bo->map + offset, variant->qpu_insts, variant->qpu_insts_size);
7ec681f3Smrg         offset += variant->qpu_insts_size;
7ec681f3Smrg
7ec681f3Smrg         /* We dont need qpu_insts anymore. */
7ec681f3Smrg         free(variant->qpu_insts);
7ec681f3Smrg         variant->qpu_insts = NULL;
7ec681f3Smrg      }
7ec681f3Smrg   }
7ec681f3Smrg   assert(total_size == offset);
7ec681f3Smrg
7ec681f3Smrg   pipeline->shared_data->assembly_bo = bo;
7ec681f3Smrg
7ec681f3Smrg   return true;
7ec681f3Smrg}
7ec681f3Smrg
7ec681f3Smrgstatic void
7ec681f3Smrgpipeline_hash_graphics(const struct v3dv_pipeline *pipeline,
7ec681f3Smrg                       struct v3dv_pipeline_key *key,
7ec681f3Smrg                       unsigned char *sha1_out)
7ec681f3Smrg{
7ec681f3Smrg   struct mesa_sha1 ctx;
7ec681f3Smrg   _mesa_sha1_init(&ctx);
7ec681f3Smrg
7ec681f3Smrg   /* We need to include all shader stages in the sha1 key as linking may modify
7ec681f3Smrg    * the shader code in any stage. An alternative would be to use the
7ec681f3Smrg    * serialized NIR, but that seems like an overkill.
7ec681f3Smrg    */
7ec681f3Smrg   _mesa_sha1_update(&ctx, pipeline->vs->shader_sha1,
7ec681f3Smrg                     sizeof(pipeline->vs->shader_sha1));
7ec681f3Smrg
7ec681f3Smrg   if (pipeline->gs) {
7ec681f3Smrg      _mesa_sha1_update(&ctx, pipeline->gs->shader_sha1,
7ec681f3Smrg                        sizeof(pipeline->gs->shader_sha1));
7ec681f3Smrg   }
7ec681f3Smrg
7ec681f3Smrg   _mesa_sha1_update(&ctx, pipeline->fs->shader_sha1,
7ec681f3Smrg                     sizeof(pipeline->fs->shader_sha1));
7ec681f3Smrg
7ec681f3Smrg   _mesa_sha1_update(&ctx, key, sizeof(struct v3dv_pipeline_key));
7ec681f3Smrg
7ec681f3Smrg   _mesa_sha1_final(&ctx, sha1_out);
7ec681f3Smrg}
7ec681f3Smrg
7ec681f3Smrgstatic void
7ec681f3Smrgpipeline_hash_compute(const struct v3dv_pipeline *pipeline,
7ec681f3Smrg                      struct v3dv_pipeline_key *key,
7ec681f3Smrg                      unsigned char *sha1_out)
7ec681f3Smrg{
7ec681f3Smrg   struct mesa_sha1 ctx;
7ec681f3Smrg   _mesa_sha1_init(&ctx);
7ec681f3Smrg
7ec681f3Smrg   _mesa_sha1_update(&ctx, pipeline->cs->shader_sha1,
7ec681f3Smrg                     sizeof(pipeline->cs->shader_sha1));
7ec681f3Smrg
7ec681f3Smrg   _mesa_sha1_update(&ctx, key, sizeof(struct v3dv_pipeline_key));
7ec681f3Smrg
7ec681f3Smrg   _mesa_sha1_final(&ctx, sha1_out);
7ec681f3Smrg}
7ec681f3Smrg
7ec681f3Smrg/* Checks that the pipeline has enough spill size to use for any of their
7ec681f3Smrg * variants
7ec681f3Smrg */
7ec681f3Smrgstatic void
7ec681f3Smrgpipeline_check_spill_size(struct v3dv_pipeline *pipeline)
7ec681f3Smrg{
7ec681f3Smrg   uint32_t max_spill_size = 0;
7ec681f3Smrg
7ec681f3Smrg   for(uint8_t stage = 0; stage < BROADCOM_SHADER_STAGES; stage++) {
7ec681f3Smrg      struct v3dv_shader_variant *variant =
7ec681f3Smrg         pipeline->shared_data->variants[stage];
7ec681f3Smrg
7ec681f3Smrg      if (variant != NULL) {
7ec681f3Smrg         max_spill_size = MAX2(variant->prog_data.base->spill_size,
7ec681f3Smrg                               max_spill_size);
7ec681f3Smrg      }
7ec681f3Smrg   }
7ec681f3Smrg
7ec681f3Smrg   if (max_spill_size > 0) {
7ec681f3Smrg      struct v3dv_device *device = pipeline->device;
7ec681f3Smrg
7ec681f3Smrg      /* The TIDX register we use for choosing the area to access
7ec681f3Smrg       * for scratch space is: (core << 6) | (qpu << 2) | thread.
7ec681f3Smrg       * Even at minimum threadcount in a particular shader, that
7ec681f3Smrg       * means we still multiply by qpus by 4.
7ec681f3Smrg       */
7ec681f3Smrg      const uint32_t total_spill_size =
7ec681f3Smrg         4 * device->devinfo.qpu_count * max_spill_size;
7ec681f3Smrg      if (pipeline->spill.bo) {
7ec681f3Smrg         assert(pipeline->spill.size_per_thread > 0);
7ec681f3Smrg         v3dv_bo_free(device, pipeline->spill.bo);
7ec681f3Smrg      }
7ec681f3Smrg      pipeline->spill.bo =
7ec681f3Smrg         v3dv_bo_alloc(device, total_spill_size, "spill", true);
7ec681f3Smrg      pipeline->spill.size_per_thread = max_spill_size;
7ec681f3Smrg   }
7ec681f3Smrg}
7ec681f3Smrg
7ec681f3Smrg/**
7ec681f3Smrg * Creates a new shader_variant_create. Note that for prog_data is not const,
7ec681f3Smrg * so it is assumed that the caller will prove a pointer that the
7ec681f3Smrg * shader_variant will own.
7ec681f3Smrg *
7ec681f3Smrg * Creation doesn't include allocate a BD to store the content of qpu_insts,
7ec681f3Smrg * as we will try to share the same bo for several shader variants. Also note
7ec681f3Smrg * that qpu_ints being NULL is valid, for example if we are creating the
7ec681f3Smrg * shader_variants from the cache, so we can just upload the assembly of all
7ec681f3Smrg * the shader stages at once.
7ec681f3Smrg */
7ec681f3Smrgstruct v3dv_shader_variant *
7ec681f3Smrgv3dv_shader_variant_create(struct v3dv_device *device,
7ec681f3Smrg                           enum broadcom_shader_stage stage,
7ec681f3Smrg                           struct v3d_prog_data *prog_data,
7ec681f3Smrg                           uint32_t prog_data_size,
7ec681f3Smrg                           uint32_t assembly_offset,
7ec681f3Smrg                           uint64_t *qpu_insts,
7ec681f3Smrg                           uint32_t qpu_insts_size,
7ec681f3Smrg                           VkResult *out_vk_result)
7ec681f3Smrg{
7ec681f3Smrg   struct v3dv_shader_variant *variant =
7ec681f3Smrg      vk_zalloc(&device->vk.alloc, sizeof(*variant), 8,
7ec681f3Smrg                VK_SYSTEM_ALLOCATION_SCOPE_OBJECT);
7ec681f3Smrg
7ec681f3Smrg   if (variant == NULL) {
7ec681f3Smrg      *out_vk_result = VK_ERROR_OUT_OF_HOST_MEMORY;
7ec681f3Smrg      return NULL;
7ec681f3Smrg   }
7ec681f3Smrg
7ec681f3Smrg   variant->stage = stage;
7ec681f3Smrg   variant->prog_data_size = prog_data_size;
7ec681f3Smrg   variant->prog_data.base = prog_data;
7ec681f3Smrg
7ec681f3Smrg   variant->assembly_offset = assembly_offset;
7ec681f3Smrg   variant->qpu_insts_size = qpu_insts_size;
7ec681f3Smrg   variant->qpu_insts = qpu_insts;
7ec681f3Smrg
7ec681f3Smrg   *out_vk_result = VK_SUCCESS;
7ec681f3Smrg
7ec681f3Smrg   return variant;
7ec681f3Smrg}
7ec681f3Smrg
7ec681f3Smrg/* For a given key, it returns the compiled version of the shader.  Returns a
7ec681f3Smrg * new reference to the shader_variant to the caller, or NULL.
7ec681f3Smrg *
7ec681f3Smrg * If the method returns NULL it means that something wrong happened:
7ec681f3Smrg *   * Not enough memory: this is one of the possible outcomes defined by
7ec681f3Smrg *     vkCreateXXXPipelines. out_vk_result will return the proper oom error.
7ec681f3Smrg *   * Compilation error: hypothetically this shouldn't happen, as the spec
7ec681f3Smrg *     states that vkShaderModule needs to be created with a valid SPIR-V, so
7ec681f3Smrg *     any compilation failure is a driver bug. In the practice, something as
7ec681f3Smrg *     common as failing to register allocate can lead to a compilation
7ec681f3Smrg *     failure. In that case the only option (for any driver) is
7ec681f3Smrg *     VK_ERROR_UNKNOWN, even if we know that the problem was a compiler
7ec681f3Smrg *     error.
7ec681f3Smrg */
7ec681f3Smrgstatic struct v3dv_shader_variant *
7ec681f3Smrgpipeline_compile_shader_variant(struct v3dv_pipeline_stage *p_stage,
7ec681f3Smrg                                struct v3d_key *key,
7ec681f3Smrg                                size_t key_size,
7ec681f3Smrg                                const VkAllocationCallbacks *pAllocator,
7ec681f3Smrg                                VkResult *out_vk_result)
7ec681f3Smrg{
7ec681f3Smrg   int64_t stage_start = os_time_get_nano();
7ec681f3Smrg
7ec681f3Smrg   struct v3dv_pipeline *pipeline = p_stage->pipeline;
7ec681f3Smrg   struct v3dv_physical_device *physical_device =
7ec681f3Smrg      &pipeline->device->instance->physicalDevice;
7ec681f3Smrg   const struct v3d_compiler *compiler = physical_device->compiler;
7ec681f3Smrg
7ec681f3Smrg   if (unlikely(V3D_DEBUG & (V3D_DEBUG_NIR |
7ec681f3Smrg                             v3d_debug_flag_for_shader_stage
7ec681f3Smrg                             (broadcom_shader_stage_to_gl(p_stage->stage))))) {
7ec681f3Smrg      fprintf(stderr, "Just before v3d_compile: %s prog %d NIR:\n",
7ec681f3Smrg              broadcom_shader_stage_name(p_stage->stage),
7ec681f3Smrg              p_stage->program_id);
7ec681f3Smrg      nir_print_shader(p_stage->nir, stderr);
7ec681f3Smrg      fprintf(stderr, "\n");
7ec681f3Smrg   }
7ec681f3Smrg
7ec681f3Smrg   uint64_t *qpu_insts;
7ec681f3Smrg   uint32_t qpu_insts_size;
7ec681f3Smrg   struct v3d_prog_data *prog_data;
7ec681f3Smrg   uint32_t prog_data_size =
7ec681f3Smrg      v3d_prog_data_size(broadcom_shader_stage_to_gl(p_stage->stage));
7ec681f3Smrg
7ec681f3Smrg   qpu_insts = v3d_compile(compiler,
7ec681f3Smrg                           key, &prog_data,
7ec681f3Smrg                           p_stage->nir,
7ec681f3Smrg                           shader_debug_output, NULL,
7ec681f3Smrg                           p_stage->program_id, 0,
7ec681f3Smrg                           &qpu_insts_size);
7ec681f3Smrg
7ec681f3Smrg   struct v3dv_shader_variant *variant = NULL;
7ec681f3Smrg
7ec681f3Smrg   if (!qpu_insts) {
7ec681f3Smrg      fprintf(stderr, "Failed to compile %s prog %d NIR to VIR\n",
7ec681f3Smrg              gl_shader_stage_name(p_stage->stage),
7ec681f3Smrg              p_stage->program_id);
7ec681f3Smrg      *out_vk_result = VK_ERROR_UNKNOWN;
7ec681f3Smrg   } else {
7ec681f3Smrg      variant =
7ec681f3Smrg         v3dv_shader_variant_create(pipeline->device, p_stage->stage,
7ec681f3Smrg                                    prog_data, prog_data_size,
7ec681f3Smrg                                    0, /* assembly_offset, no final value yet */
7ec681f3Smrg                                    qpu_insts, qpu_insts_size,
7ec681f3Smrg                                    out_vk_result);
7ec681f3Smrg   }
7ec681f3Smrg   /* At this point we don't need anymore the nir shader, but we are freeing
7ec681f3Smrg    * all the temporary p_stage structs used during the pipeline creation when
7ec681f3Smrg    * we finish it, so let's not worry about freeing the nir here.
7ec681f3Smrg    */
7ec681f3Smrg
7ec681f3Smrg   p_stage->feedback.duration += os_time_get_nano() - stage_start;
7ec681f3Smrg
7ec681f3Smrg   return variant;
7ec681f3Smrg}
7ec681f3Smrg
7ec681f3Smrg/* FIXME: C&P from st, common place? */
7ec681f3Smrgstatic void
7ec681f3Smrgst_nir_opts(nir_shader *nir)
7ec681f3Smrg{
7ec681f3Smrg   bool progress;
7ec681f3Smrg
7ec681f3Smrg   do {
7ec681f3Smrg      progress = false;
7ec681f3Smrg
7ec681f3Smrg      NIR_PASS_V(nir, nir_lower_vars_to_ssa);
7ec681f3Smrg
7ec681f3Smrg      /* Linking deals with unused inputs/outputs, but here we can remove
7ec681f3Smrg       * things local to the shader in the hopes that we can cleanup other
7ec681f3Smrg       * things. This pass will also remove variables with only stores, so we
7ec681f3Smrg       * might be able to make progress after it.
7ec681f3Smrg       */
7ec681f3Smrg      NIR_PASS(progress, nir, nir_remove_dead_variables,
7ec681f3Smrg               (nir_variable_mode)(nir_var_function_temp |
7ec681f3Smrg                                   nir_var_shader_temp |
7ec681f3Smrg                                   nir_var_mem_shared),
7ec681f3Smrg               NULL);
7ec681f3Smrg
7ec681f3Smrg      NIR_PASS(progress, nir, nir_opt_copy_prop_vars);
7ec681f3Smrg      NIR_PASS(progress, nir, nir_opt_dead_write_vars);
7ec681f3Smrg
7ec681f3Smrg      if (nir->options->lower_to_scalar) {
7ec681f3Smrg         NIR_PASS_V(nir, nir_lower_alu_to_scalar, NULL, NULL);
7ec681f3Smrg         NIR_PASS_V(nir, nir_lower_phis_to_scalar, false);
7ec681f3Smrg      }
7ec681f3Smrg
7ec681f3Smrg      NIR_PASS_V(nir, nir_lower_alu);
7ec681f3Smrg      NIR_PASS_V(nir, nir_lower_pack);
7ec681f3Smrg      NIR_PASS(progress, nir, nir_copy_prop);
7ec681f3Smrg      NIR_PASS(progress, nir, nir_opt_remove_phis);
7ec681f3Smrg      NIR_PASS(progress, nir, nir_opt_dce);
7ec681f3Smrg      if (nir_opt_trivial_continues(nir)) {
7ec681f3Smrg         progress = true;
7ec681f3Smrg         NIR_PASS(progress, nir, nir_copy_prop);
7ec681f3Smrg         NIR_PASS(progress, nir, nir_opt_dce);
7ec681f3Smrg      }
7ec681f3Smrg      NIR_PASS(progress, nir, nir_opt_if, false);
7ec681f3Smrg      NIR_PASS(progress, nir, nir_opt_dead_cf);
7ec681f3Smrg      NIR_PASS(progress, nir, nir_opt_cse);
7ec681f3Smrg      NIR_PASS(progress, nir, nir_opt_peephole_select, 8, true, true);
7ec681f3Smrg
7ec681f3Smrg      NIR_PASS(progress, nir, nir_opt_algebraic);
7ec681f3Smrg      NIR_PASS(progress, nir, nir_opt_constant_folding);
7ec681f3Smrg
7ec681f3Smrg      NIR_PASS(progress, nir, nir_opt_undef);
7ec681f3Smrg      NIR_PASS(progress, nir, nir_opt_conditional_discard);
7ec681f3Smrg   } while (progress);
7ec681f3Smrg}
7ec681f3Smrg
7ec681f3Smrgstatic void
7ec681f3Smrglink_shaders(nir_shader *producer, nir_shader *consumer)
7ec681f3Smrg{
7ec681f3Smrg   assert(producer);
7ec681f3Smrg   assert(consumer);
7ec681f3Smrg
7ec681f3Smrg   if (producer->options->lower_to_scalar) {
7ec681f3Smrg      NIR_PASS_V(producer, nir_lower_io_to_scalar_early, nir_var_shader_out);
7ec681f3Smrg      NIR_PASS_V(consumer, nir_lower_io_to_scalar_early, nir_var_shader_in);
7ec681f3Smrg   }
7ec681f3Smrg
7ec681f3Smrg   nir_lower_io_arrays_to_elements(producer, consumer);
7ec681f3Smrg
7ec681f3Smrg   st_nir_opts(producer);
7ec681f3Smrg   st_nir_opts(consumer);
7ec681f3Smrg
7ec681f3Smrg   if (nir_link_opt_varyings(producer, consumer))
7ec681f3Smrg      st_nir_opts(consumer);
7ec681f3Smrg
7ec681f3Smrg   NIR_PASS_V(producer, nir_remove_dead_variables, nir_var_shader_out, NULL);
7ec681f3Smrg   NIR_PASS_V(consumer, nir_remove_dead_variables, nir_var_shader_in, NULL);
7ec681f3Smrg
7ec681f3Smrg   if (nir_remove_unused_varyings(producer, consumer)) {
7ec681f3Smrg      NIR_PASS_V(producer, nir_lower_global_vars_to_local);
7ec681f3Smrg      NIR_PASS_V(consumer, nir_lower_global_vars_to_local);
7ec681f3Smrg
7ec681f3Smrg      st_nir_opts(producer);
7ec681f3Smrg      st_nir_opts(consumer);
7ec681f3Smrg
7ec681f3Smrg      /* Optimizations can cause varyings to become unused.
7ec681f3Smrg       * nir_compact_varyings() depends on all dead varyings being removed so
7ec681f3Smrg       * we need to call nir_remove_dead_variables() again here.
7ec681f3Smrg       */
7ec681f3Smrg      NIR_PASS_V(producer, nir_remove_dead_variables, nir_var_shader_out, NULL);
7ec681f3Smrg      NIR_PASS_V(consumer, nir_remove_dead_variables, nir_var_shader_in, NULL);
7ec681f3Smrg   }
7ec681f3Smrg}
7ec681f3Smrg
7ec681f3Smrgstatic void
7ec681f3Smrgpipeline_lower_nir(struct v3dv_pipeline *pipeline,
7ec681f3Smrg                   struct v3dv_pipeline_stage *p_stage,
7ec681f3Smrg                   struct v3dv_pipeline_layout *layout)
7ec681f3Smrg{
7ec681f3Smrg   int64_t stage_start = os_time_get_nano();
7ec681f3Smrg
7ec681f3Smrg   assert(pipeline->shared_data &&
7ec681f3Smrg          pipeline->shared_data->maps[p_stage->stage]);
7ec681f3Smrg
7ec681f3Smrg   nir_shader_gather_info(p_stage->nir, nir_shader_get_entrypoint(p_stage->nir));
7ec681f3Smrg
7ec681f3Smrg   /* We add this because we need a valid sampler for nir_lower_tex to do
7ec681f3Smrg    * unpacking of the texture operation result, even for the case where there
7ec681f3Smrg    * is no sampler state.
7ec681f3Smrg    *
7ec681f3Smrg    * We add two of those, one for the case we need a 16bit return_size, and
7ec681f3Smrg    * another for the case we need a 32bit return size.
7ec681f3Smrg    */
7ec681f3Smrg   UNUSED unsigned index =
7ec681f3Smrg      descriptor_map_add(&pipeline->shared_data->maps[p_stage->stage]->sampler_map,
7ec681f3Smrg                         -1, -1, -1, 0, 16);
7ec681f3Smrg   assert(index == V3DV_NO_SAMPLER_16BIT_IDX);
7ec681f3Smrg
7ec681f3Smrg   index =
7ec681f3Smrg      descriptor_map_add(&pipeline->shared_data->maps[p_stage->stage]->sampler_map,
7ec681f3Smrg                         -2, -2, -2, 0, 32);
7ec681f3Smrg   assert(index == V3DV_NO_SAMPLER_32BIT_IDX);
7ec681f3Smrg
7ec681f3Smrg   /* Apply the actual pipeline layout to UBOs, SSBOs, and textures */
7ec681f3Smrg   NIR_PASS_V(p_stage->nir, lower_pipeline_layout_info, pipeline, layout);
7ec681f3Smrg
7ec681f3Smrg   p_stage->feedback.duration += os_time_get_nano() - stage_start;
7ec681f3Smrg}
7ec681f3Smrg
7ec681f3Smrg/**
7ec681f3Smrg * The SPIR-V compiler will insert a sized compact array for
7ec681f3Smrg * VARYING_SLOT_CLIP_DIST0 if the vertex shader writes to gl_ClipDistance[],
7ec681f3Smrg * where the size of the array determines the number of active clip planes.
7ec681f3Smrg */
7ec681f3Smrgstatic uint32_t
7ec681f3Smrgget_ucp_enable_mask(struct v3dv_pipeline_stage *p_stage)
7ec681f3Smrg{
7ec681f3Smrg   assert(p_stage->stage == BROADCOM_SHADER_VERTEX);
7ec681f3Smrg   const nir_shader *shader = p_stage->nir;
7ec681f3Smrg   assert(shader);
7ec681f3Smrg
7ec681f3Smrg   nir_foreach_variable_with_modes(var, shader, nir_var_shader_out) {
7ec681f3Smrg      if (var->data.location == VARYING_SLOT_CLIP_DIST0) {
7ec681f3Smrg         assert(var->data.compact);
7ec681f3Smrg         return (1 << glsl_get_length(var->type)) - 1;
7ec681f3Smrg      }
7ec681f3Smrg   }
7ec681f3Smrg   return 0;
7ec681f3Smrg}
7ec681f3Smrg
7ec681f3Smrgstatic nir_shader *
7ec681f3Smrgpipeline_stage_get_nir(struct v3dv_pipeline_stage *p_stage,
7ec681f3Smrg                       struct v3dv_pipeline *pipeline,
7ec681f3Smrg                       struct v3dv_pipeline_cache *cache)
7ec681f3Smrg{
7ec681f3Smrg   int64_t stage_start = os_time_get_nano();
7ec681f3Smrg
7ec681f3Smrg   nir_shader *nir = NULL;
7ec681f3Smrg
7ec681f3Smrg   nir = v3dv_pipeline_cache_search_for_nir(pipeline, cache,
7ec681f3Smrg                                            &v3dv_nir_options,
7ec681f3Smrg                                            p_stage->shader_sha1);
7ec681f3Smrg
7ec681f3Smrg   if (nir) {
7ec681f3Smrg      assert(nir->info.stage == broadcom_shader_stage_to_gl(p_stage->stage));
7ec681f3Smrg
7ec681f3Smrg      /* A NIR cach hit doesn't avoid the large majority of pipeline stage
7ec681f3Smrg       * creation so the cache hit is not recorded in the pipeline feedback
7ec681f3Smrg       * flags
7ec681f3Smrg       */
7ec681f3Smrg
7ec681f3Smrg      p_stage->feedback.duration += os_time_get_nano() - stage_start;
7ec681f3Smrg
7ec681f3Smrg      return nir;
7ec681f3Smrg   }
7ec681f3Smrg
7ec681f3Smrg   nir = shader_module_compile_to_nir(pipeline->device, p_stage);
7ec681f3Smrg
7ec681f3Smrg   if (nir) {
7ec681f3Smrg      struct v3dv_pipeline_cache *default_cache =
7ec681f3Smrg         &pipeline->device->default_pipeline_cache;
7ec681f3Smrg
7ec681f3Smrg      v3dv_pipeline_cache_upload_nir(pipeline, cache, nir,
7ec681f3Smrg                                     p_stage->shader_sha1);
7ec681f3Smrg
7ec681f3Smrg      /* Ensure that the variant is on the default cache, as cmd_buffer could
7ec681f3Smrg       * need to change the current variant
7ec681f3Smrg       */
7ec681f3Smrg      if (default_cache != cache) {
7ec681f3Smrg         v3dv_pipeline_cache_upload_nir(pipeline, default_cache, nir,
7ec681f3Smrg                                        p_stage->shader_sha1);
7ec681f3Smrg      }
7ec681f3Smrg
7ec681f3Smrg      p_stage->feedback.duration += os_time_get_nano() - stage_start;
7ec681f3Smrg
7ec681f3Smrg      return nir;
7ec681f3Smrg   }
7ec681f3Smrg
7ec681f3Smrg   /* FIXME: this shouldn't happen, raise error? */
7ec681f3Smrg   return NULL;
7ec681f3Smrg}
7ec681f3Smrg
7ec681f3Smrgstatic void
7ec681f3Smrgpipeline_hash_shader(const struct vk_shader_module *module,
7ec681f3Smrg                     const char *entrypoint,
7ec681f3Smrg                     gl_shader_stage stage,
7ec681f3Smrg                     const VkSpecializationInfo *spec_info,
7ec681f3Smrg                     unsigned char *sha1_out)
7ec681f3Smrg{
7ec681f3Smrg   struct mesa_sha1 ctx;
7ec681f3Smrg   _mesa_sha1_init(&ctx);
7ec681f3Smrg
7ec681f3Smrg   _mesa_sha1_update(&ctx, module->sha1, sizeof(module->sha1));
7ec681f3Smrg   _mesa_sha1_update(&ctx, entrypoint, strlen(entrypoint));
7ec681f3Smrg   _mesa_sha1_update(&ctx, &stage, sizeof(stage));
7ec681f3Smrg   if (spec_info) {
7ec681f3Smrg      _mesa_sha1_update(&ctx, spec_info->pMapEntries,
7ec681f3Smrg                        spec_info->mapEntryCount *
7ec681f3Smrg                        sizeof(*spec_info->pMapEntries));
7ec681f3Smrg      _mesa_sha1_update(&ctx, spec_info->pData,
7ec681f3Smrg                        spec_info->dataSize);
7ec681f3Smrg   }
7ec681f3Smrg
7ec681f3Smrg   _mesa_sha1_final(&ctx, sha1_out);
7ec681f3Smrg}
7ec681f3Smrg
7ec681f3Smrgstatic VkResult
7ec681f3Smrgpipeline_compile_vertex_shader(struct v3dv_pipeline *pipeline,
7ec681f3Smrg                               const VkAllocationCallbacks *pAllocator,
7ec681f3Smrg                               const VkGraphicsPipelineCreateInfo *pCreateInfo)
7ec681f3Smrg{
7ec681f3Smrg   assert(pipeline->vs_bin != NULL);
7ec681f3Smrg   if (pipeline->vs_bin->nir == NULL) {
7ec681f3Smrg      assert(pipeline->vs->nir);
7ec681f3Smrg      pipeline->vs_bin->nir = nir_shader_clone(NULL, pipeline->vs->nir);
7ec681f3Smrg   }
7ec681f3Smrg
7ec681f3Smrg   VkResult vk_result;
7ec681f3Smrg   struct v3d_vs_key key;
7ec681f3Smrg   pipeline_populate_v3d_vs_key(&key, pCreateInfo, pipeline->vs);
7ec681f3Smrg   pipeline->shared_data->variants[BROADCOM_SHADER_VERTEX] =
7ec681f3Smrg      pipeline_compile_shader_variant(pipeline->vs, &key.base, sizeof(key),
7ec681f3Smrg                                      pAllocator, &vk_result);
7ec681f3Smrg   if (vk_result != VK_SUCCESS)
7ec681f3Smrg      return vk_result;
7ec681f3Smrg
7ec681f3Smrg   pipeline_populate_v3d_vs_key(&key, pCreateInfo, pipeline->vs_bin);
7ec681f3Smrg   pipeline->shared_data->variants[BROADCOM_SHADER_VERTEX_BIN] =
7ec681f3Smrg      pipeline_compile_shader_variant(pipeline->vs_bin, &key.base, sizeof(key),
7ec681f3Smrg                                      pAllocator, &vk_result);
7ec681f3Smrg
7ec681f3Smrg   return vk_result;
7ec681f3Smrg}
7ec681f3Smrg
7ec681f3Smrgstatic VkResult
7ec681f3Smrgpipeline_compile_geometry_shader(struct v3dv_pipeline *pipeline,
7ec681f3Smrg                                 const VkAllocationCallbacks *pAllocator,
7ec681f3Smrg                                 const VkGraphicsPipelineCreateInfo *pCreateInfo)
7ec681f3Smrg{
7ec681f3Smrg   assert(pipeline->gs);
7ec681f3Smrg
7ec681f3Smrg   assert(pipeline->gs_bin != NULL);
7ec681f3Smrg   if (pipeline->gs_bin->nir == NULL) {
7ec681f3Smrg      assert(pipeline->gs->nir);
7ec681f3Smrg      pipeline->gs_bin->nir = nir_shader_clone(NULL, pipeline->gs->nir);
7ec681f3Smrg   }
7ec681f3Smrg
7ec681f3Smrg   VkResult vk_result;
7ec681f3Smrg   struct v3d_gs_key key;
7ec681f3Smrg   pipeline_populate_v3d_gs_key(&key, pCreateInfo, pipeline->gs);
7ec681f3Smrg   pipeline->shared_data->variants[BROADCOM_SHADER_GEOMETRY] =
7ec681f3Smrg      pipeline_compile_shader_variant(pipeline->gs, &key.base, sizeof(key),
7ec681f3Smrg                                      pAllocator, &vk_result);
7ec681f3Smrg   if (vk_result != VK_SUCCESS)
7ec681f3Smrg      return vk_result;
7ec681f3Smrg
7ec681f3Smrg   pipeline_populate_v3d_gs_key(&key, pCreateInfo, pipeline->gs_bin);
7ec681f3Smrg   pipeline->shared_data->variants[BROADCOM_SHADER_GEOMETRY_BIN] =
7ec681f3Smrg      pipeline_compile_shader_variant(pipeline->gs_bin, &key.base, sizeof(key),
7ec681f3Smrg                                      pAllocator, &vk_result);
7ec681f3Smrg
7ec681f3Smrg   return vk_result;
7ec681f3Smrg}
7ec681f3Smrg
7ec681f3Smrgstatic VkResult
7ec681f3Smrgpipeline_compile_fragment_shader(struct v3dv_pipeline *pipeline,
7ec681f3Smrg                                 const VkAllocationCallbacks *pAllocator,
7ec681f3Smrg                                 const VkGraphicsPipelineCreateInfo *pCreateInfo)
7ec681f3Smrg{
7ec681f3Smrg   struct v3dv_pipeline_stage *p_stage = pipeline->vs;
7ec681f3Smrg
7ec681f3Smrg   p_stage = pipeline->fs;
7ec681f3Smrg
7ec681f3Smrg   struct v3d_fs_key key;
7ec681f3Smrg
7ec681f3Smrg   pipeline_populate_v3d_fs_key(&key, pCreateInfo, p_stage,
7ec681f3Smrg                                pipeline->gs != NULL,
7ec681f3Smrg                                get_ucp_enable_mask(pipeline->vs));
7ec681f3Smrg
7ec681f3Smrg   VkResult vk_result;
7ec681f3Smrg   pipeline->shared_data->variants[BROADCOM_SHADER_FRAGMENT] =
7ec681f3Smrg      pipeline_compile_shader_variant(p_stage, &key.base, sizeof(key),
7ec681f3Smrg                                      pAllocator, &vk_result);
7ec681f3Smrg
7ec681f3Smrg   return vk_result;
7ec681f3Smrg}
7ec681f3Smrg
7ec681f3Smrgstatic void
7ec681f3Smrgpipeline_populate_graphics_key(struct v3dv_pipeline *pipeline,
7ec681f3Smrg                               struct v3dv_pipeline_key *key,
7ec681f3Smrg                               const VkGraphicsPipelineCreateInfo *pCreateInfo)
7ec681f3Smrg{
7ec681f3Smrg   memset(key, 0, sizeof(*key));
7ec681f3Smrg   key->robust_buffer_access =
7ec681f3Smrg      pipeline->device->features.robustBufferAccess;
7ec681f3Smrg
7ec681f3Smrg   const bool raster_enabled =
7ec681f3Smrg      !pCreateInfo->pRasterizationState->rasterizerDiscardEnable;
7ec681f3Smrg
7ec681f3Smrg   const VkPipelineInputAssemblyStateCreateInfo *ia_info =
7ec681f3Smrg      pCreateInfo->pInputAssemblyState;
7ec681f3Smrg   key->topology = vk_to_pipe_prim_type[ia_info->topology];
7ec681f3Smrg
7ec681f3Smrg   const VkPipelineColorBlendStateCreateInfo *cb_info =
7ec681f3Smrg      raster_enabled ? pCreateInfo->pColorBlendState : NULL;
7ec681f3Smrg
7ec681f3Smrg   key->logicop_func = cb_info && cb_info->logicOpEnable == VK_TRUE ?
7ec681f3Smrg      vk_to_pipe_logicop[cb_info->logicOp] :
7ec681f3Smrg      PIPE_LOGICOP_COPY;
7ec681f3Smrg
7ec681f3Smrg   /* Multisample rasterization state must be ignored if rasterization
7ec681f3Smrg    * is disabled.
7ec681f3Smrg    */
7ec681f3Smrg   const VkPipelineMultisampleStateCreateInfo *ms_info =
7ec681f3Smrg      raster_enabled ? pCreateInfo->pMultisampleState : NULL;
7ec681f3Smrg   if (ms_info) {
7ec681f3Smrg      assert(ms_info->rasterizationSamples == VK_SAMPLE_COUNT_1_BIT ||
7ec681f3Smrg             ms_info->rasterizationSamples == VK_SAMPLE_COUNT_4_BIT);
7ec681f3Smrg      key->msaa = ms_info->rasterizationSamples > VK_SAMPLE_COUNT_1_BIT;
7ec681f3Smrg
7ec681f3Smrg      if (key->msaa) {
7ec681f3Smrg         key->sample_coverage =
7ec681f3Smrg            pipeline->sample_mask != (1 << V3D_MAX_SAMPLES) - 1;
7ec681f3Smrg         key->sample_alpha_to_coverage = ms_info->alphaToCoverageEnable;
7ec681f3Smrg         key->sample_alpha_to_one = ms_info->alphaToOneEnable;
7ec681f3Smrg      }
7ec681f3Smrg   }
7ec681f3Smrg
7ec681f3Smrg   const struct v3dv_render_pass *pass =
7ec681f3Smrg      v3dv_render_pass_from_handle(pCreateInfo->renderPass);
7ec681f3Smrg   const struct v3dv_subpass *subpass = pipeline->subpass;
7ec681f3Smrg   for (uint32_t i = 0; i < subpass->color_count; i++) {
7ec681f3Smrg      const uint32_t att_idx = subpass->color_attachments[i].attachment;
7ec681f3Smrg      if (att_idx == VK_ATTACHMENT_UNUSED)
7ec681f3Smrg         continue;
7ec681f3Smrg
7ec681f3Smrg      key->cbufs |= 1 << i;
7ec681f3Smrg
7ec681f3Smrg      VkFormat fb_format = pass->attachments[att_idx].desc.format;
7ec681f3Smrg      enum pipe_format fb_pipe_format = vk_format_to_pipe_format(fb_format);
7ec681f3Smrg
7ec681f3Smrg      /* If logic operations are enabled then we might emit color reads and we
7ec681f3Smrg       * need to know the color buffer format and swizzle for that
7ec681f3Smrg       */
7ec681f3Smrg      if (key->logicop_func != PIPE_LOGICOP_COPY) {
7ec681f3Smrg         key->color_fmt[i].format = fb_pipe_format;
7ec681f3Smrg         key->color_fmt[i].swizzle = v3dv_get_format_swizzle(pipeline->device,
7ec681f3Smrg                                                             fb_format);
7ec681f3Smrg      }
7ec681f3Smrg
7ec681f3Smrg      const struct util_format_description *desc =
7ec681f3Smrg         vk_format_description(fb_format);
7ec681f3Smrg
7ec681f3Smrg      if (desc->channel[0].type == UTIL_FORMAT_TYPE_FLOAT &&
7ec681f3Smrg          desc->channel[0].size == 32) {
7ec681f3Smrg         key->f32_color_rb |= 1 << i;
7ec681f3Smrg      }
7ec681f3Smrg   }
7ec681f3Smrg
7ec681f3Smrg   const VkPipelineVertexInputStateCreateInfo *vi_info =
7ec681f3Smrg      pCreateInfo->pVertexInputState;
7ec681f3Smrg   for (uint32_t i = 0; i < vi_info->vertexAttributeDescriptionCount; i++) {
7ec681f3Smrg      const VkVertexInputAttributeDescription *desc =
7ec681f3Smrg         &vi_info->pVertexAttributeDescriptions[i];
7ec681f3Smrg      assert(desc->location < MAX_VERTEX_ATTRIBS);
7ec681f3Smrg      if (desc->format == VK_FORMAT_B8G8R8A8_UNORM)
7ec681f3Smrg         key->va_swap_rb_mask |= 1 << (VERT_ATTRIB_GENERIC0 + desc->location);
7ec681f3Smrg   }
7ec681f3Smrg
7ec681f3Smrg   assert(pipeline->subpass);
7ec681f3Smrg   key->has_multiview = pipeline->subpass->view_mask != 0;
7ec681f3Smrg}
7ec681f3Smrg
7ec681f3Smrgstatic void
7ec681f3Smrgpipeline_populate_compute_key(struct v3dv_pipeline *pipeline,
7ec681f3Smrg                              struct v3dv_pipeline_key *key,
7ec681f3Smrg                              const VkComputePipelineCreateInfo *pCreateInfo)
7ec681f3Smrg{
7ec681f3Smrg   /* We use the same pipeline key for graphics and compute, but we don't need
7ec681f3Smrg    * to add a field to flag compute keys because this key is not used alone
7ec681f3Smrg    * to search in the cache, we also use the SPIR-V or the serialized NIR for
7ec681f3Smrg    * example, which already flags compute shaders.
7ec681f3Smrg    */
7ec681f3Smrg   memset(key, 0, sizeof(*key));
7ec681f3Smrg   key->robust_buffer_access =
7ec681f3Smrg      pipeline->device->features.robustBufferAccess;
7ec681f3Smrg}
7ec681f3Smrg
7ec681f3Smrgstatic struct v3dv_pipeline_shared_data *
7ec681f3Smrgv3dv_pipeline_shared_data_new_empty(const unsigned char sha1_key[20],
7ec681f3Smrg                                    struct v3dv_pipeline *pipeline,
7ec681f3Smrg                                    bool is_graphics_pipeline)
7ec681f3Smrg{
7ec681f3Smrg   /* We create new_entry using the device alloc. Right now shared_data is ref
7ec681f3Smrg    * and unref by both the pipeline and the pipeline cache, so we can't
7ec681f3Smrg    * ensure that the cache or pipeline alloc will be available on the last
7ec681f3Smrg    * unref.
7ec681f3Smrg    */
7ec681f3Smrg   struct v3dv_pipeline_shared_data *new_entry =
7ec681f3Smrg      vk_zalloc2(&pipeline->device->vk.alloc, NULL,
7ec681f3Smrg                 sizeof(struct v3dv_pipeline_shared_data), 8,
7ec681f3Smrg                 VK_SYSTEM_ALLOCATION_SCOPE_OBJECT);
7ec681f3Smrg
7ec681f3Smrg   if (new_entry == NULL)
7ec681f3Smrg      return NULL;
7ec681f3Smrg
7ec681f3Smrg   for (uint8_t stage = 0; stage < BROADCOM_SHADER_STAGES; stage++) {
7ec681f3Smrg      /* We don't need specific descriptor maps for binning stages we use the
7ec681f3Smrg       * map for the render stage.
7ec681f3Smrg       */
7ec681f3Smrg      if (broadcom_shader_stage_is_binning(stage))
7ec681f3Smrg         continue;
7ec681f3Smrg
7ec681f3Smrg      if ((is_graphics_pipeline && stage == BROADCOM_SHADER_COMPUTE) ||
7ec681f3Smrg          (!is_graphics_pipeline && stage != BROADCOM_SHADER_COMPUTE)) {
7ec681f3Smrg         continue;
7ec681f3Smrg      }
7ec681f3Smrg
7ec681f3Smrg      if (stage == BROADCOM_SHADER_GEOMETRY && !pipeline->gs) {
7ec681f3Smrg         /* We always inject a custom GS if we have multiview */
7ec681f3Smrg         if (!pipeline->subpass->view_mask)
7ec681f3Smrg            continue;
7ec681f3Smrg      }
7ec681f3Smrg
7ec681f3Smrg      struct v3dv_descriptor_maps *new_maps =
7ec681f3Smrg         vk_zalloc2(&pipeline->device->vk.alloc, NULL,
7ec681f3Smrg                    sizeof(struct v3dv_descriptor_maps), 8,
7ec681f3Smrg                    VK_SYSTEM_ALLOCATION_SCOPE_OBJECT);
7ec681f3Smrg
7ec681f3Smrg      if (new_maps == NULL)
7ec681f3Smrg         goto fail;
7ec681f3Smrg
7ec681f3Smrg      new_entry->maps[stage] = new_maps;
7ec681f3Smrg   }
7ec681f3Smrg
7ec681f3Smrg   new_entry->maps[BROADCOM_SHADER_VERTEX_BIN] =
7ec681f3Smrg      new_entry->maps[BROADCOM_SHADER_VERTEX];
7ec681f3Smrg
7ec681f3Smrg   new_entry->maps[BROADCOM_SHADER_GEOMETRY_BIN] =
7ec681f3Smrg      new_entry->maps[BROADCOM_SHADER_GEOMETRY];
7ec681f3Smrg
7ec681f3Smrg   new_entry->ref_cnt = 1;
7ec681f3Smrg   memcpy(new_entry->sha1_key, sha1_key, 20);
7ec681f3Smrg
7ec681f3Smrg   return new_entry;
7ec681f3Smrg
7ec681f3Smrgfail:
7ec681f3Smrg   if (new_entry != NULL) {
7ec681f3Smrg      for (uint8_t stage = 0; stage < BROADCOM_SHADER_STAGES; stage++) {
7ec681f3Smrg         if (new_entry->maps[stage] != NULL)
7ec681f3Smrg            vk_free(&pipeline->device->vk.alloc, new_entry->maps[stage]);
7ec681f3Smrg      }
7ec681f3Smrg   }
7ec681f3Smrg
7ec681f3Smrg   vk_free(&pipeline->device->vk.alloc, new_entry);
7ec681f3Smrg
7ec681f3Smrg   return NULL;
7ec681f3Smrg}
7ec681f3Smrg
7ec681f3Smrgstatic void
7ec681f3Smrgwrite_creation_feedback(struct v3dv_pipeline *pipeline,
7ec681f3Smrg                        const void *next,
7ec681f3Smrg                        const VkPipelineCreationFeedbackEXT *pipeline_feedback,
7ec681f3Smrg                        uint32_t stage_count,
7ec681f3Smrg                        const VkPipelineShaderStageCreateInfo *stages)
7ec681f3Smrg{
7ec681f3Smrg   const VkPipelineCreationFeedbackCreateInfoEXT *create_feedback =
7ec681f3Smrg      vk_find_struct_const(next, PIPELINE_CREATION_FEEDBACK_CREATE_INFO_EXT);
7ec681f3Smrg
7ec681f3Smrg   if (create_feedback) {
7ec681f3Smrg      typed_memcpy(create_feedback->pPipelineCreationFeedback,
7ec681f3Smrg             pipeline_feedback,
7ec681f3Smrg             1);
7ec681f3Smrg
7ec681f3Smrg      assert(stage_count == create_feedback->pipelineStageCreationFeedbackCount);
7ec681f3Smrg
7ec681f3Smrg      for (uint32_t i = 0; i < stage_count; i++) {
7ec681f3Smrg         gl_shader_stage s = vk_to_mesa_shader_stage(stages[i].stage);
7ec681f3Smrg         switch (s) {
7ec681f3Smrg         case MESA_SHADER_VERTEX:
7ec681f3Smrg            create_feedback->pPipelineStageCreationFeedbacks[i] =
7ec681f3Smrg               pipeline->vs->feedback;
7ec681f3Smrg
7ec681f3Smrg            create_feedback->pPipelineStageCreationFeedbacks[i].duration +=
7ec681f3Smrg               pipeline->vs_bin->feedback.duration;
7ec681f3Smrg            break;
7ec681f3Smrg
7ec681f3Smrg         case MESA_SHADER_GEOMETRY:
7ec681f3Smrg            create_feedback->pPipelineStageCreationFeedbacks[i] =
7ec681f3Smrg               pipeline->gs->feedback;
7ec681f3Smrg
7ec681f3Smrg            create_feedback->pPipelineStageCreationFeedbacks[i].duration +=
7ec681f3Smrg               pipeline->gs_bin->feedback.duration;
7ec681f3Smrg            break;
7ec681f3Smrg
7ec681f3Smrg         case MESA_SHADER_FRAGMENT:
7ec681f3Smrg            create_feedback->pPipelineStageCreationFeedbacks[i] =
7ec681f3Smrg               pipeline->fs->feedback;
7ec681f3Smrg            break;
7ec681f3Smrg
7ec681f3Smrg         case MESA_SHADER_COMPUTE:
7ec681f3Smrg            create_feedback->pPipelineStageCreationFeedbacks[i] =
7ec681f3Smrg               pipeline->cs->feedback;
7ec681f3Smrg            break;
7ec681f3Smrg
7ec681f3Smrg         default:
7ec681f3Smrg            unreachable("not supported shader stage");
7ec681f3Smrg         }
7ec681f3Smrg      }
7ec681f3Smrg   }
7ec681f3Smrg}
7ec681f3Smrg
7ec681f3Smrgstatic uint32_t
7ec681f3Smrgmultiview_gs_input_primitive_from_pipeline(struct v3dv_pipeline *pipeline)
7ec681f3Smrg{
7ec681f3Smrg   switch (pipeline->topology) {
7ec681f3Smrg   case PIPE_PRIM_POINTS:
7ec681f3Smrg      return GL_POINTS;
7ec681f3Smrg   case PIPE_PRIM_LINES:
7ec681f3Smrg   case PIPE_PRIM_LINE_STRIP:
7ec681f3Smrg      return GL_LINES;
7ec681f3Smrg   case PIPE_PRIM_TRIANGLES:
7ec681f3Smrg   case PIPE_PRIM_TRIANGLE_STRIP:
7ec681f3Smrg   case PIPE_PRIM_TRIANGLE_FAN:
7ec681f3Smrg      return GL_TRIANGLES;
7ec681f3Smrg   default:
7ec681f3Smrg      /* Since we don't allow GS with multiview, we can only see non-adjacency
7ec681f3Smrg       * primitives.
7ec681f3Smrg       */
7ec681f3Smrg      unreachable("Unexpected pipeline primitive type");
7ec681f3Smrg   }
7ec681f3Smrg}
7ec681f3Smrg
7ec681f3Smrgstatic uint32_t
7ec681f3Smrgmultiview_gs_output_primitive_from_pipeline(struct v3dv_pipeline *pipeline)
7ec681f3Smrg{
7ec681f3Smrg   switch (pipeline->topology) {
7ec681f3Smrg   case PIPE_PRIM_POINTS:
7ec681f3Smrg      return GL_POINTS;
7ec681f3Smrg   case PIPE_PRIM_LINES:
7ec681f3Smrg   case PIPE_PRIM_LINE_STRIP:
7ec681f3Smrg      return GL_LINE_STRIP;
7ec681f3Smrg   case PIPE_PRIM_TRIANGLES:
7ec681f3Smrg   case PIPE_PRIM_TRIANGLE_STRIP:
7ec681f3Smrg   case PIPE_PRIM_TRIANGLE_FAN:
7ec681f3Smrg      return GL_TRIANGLE_STRIP;
7ec681f3Smrg   default:
7ec681f3Smrg      /* Since we don't allow GS with multiview, we can only see non-adjacency
7ec681f3Smrg       * primitives.
7ec681f3Smrg       */
7ec681f3Smrg      unreachable("Unexpected pipeline primitive type");
7ec681f3Smrg   }
7ec681f3Smrg}
7ec681f3Smrg
7ec681f3Smrgstatic bool
7ec681f3Smrgpipeline_add_multiview_gs(struct v3dv_pipeline *pipeline,
7ec681f3Smrg                          struct v3dv_pipeline_cache *cache,
7ec681f3Smrg                          const VkAllocationCallbacks *pAllocator)
7ec681f3Smrg{
7ec681f3Smrg   /* Create the passthrough GS from the VS output interface */
7ec681f3Smrg   pipeline->vs->nir = pipeline_stage_get_nir(pipeline->vs, pipeline, cache);
7ec681f3Smrg   nir_shader *vs_nir = pipeline->vs->nir;
7ec681f3Smrg
7ec681f3Smrg   const nir_shader_compiler_options *options = v3dv_pipeline_get_nir_options();
7ec681f3Smrg   nir_builder b = nir_builder_init_simple_shader(MESA_SHADER_GEOMETRY, options,
7ec681f3Smrg                                                  "multiview broadcast gs");
7ec681f3Smrg   nir_shader *nir = b.shader;
7ec681f3Smrg   nir->info.inputs_read = vs_nir->info.outputs_written;
7ec681f3Smrg   nir->info.outputs_written = vs_nir->info.outputs_written |
7ec681f3Smrg                               (1ull << VARYING_SLOT_LAYER);
7ec681f3Smrg
7ec681f3Smrg   uint32_t vertex_count = u_vertices_per_prim(pipeline->topology);
7ec681f3Smrg   nir->info.gs.input_primitive =
7ec681f3Smrg      multiview_gs_input_primitive_from_pipeline(pipeline);
7ec681f3Smrg   nir->info.gs.output_primitive =
7ec681f3Smrg      multiview_gs_output_primitive_from_pipeline(pipeline);
7ec681f3Smrg   nir->info.gs.vertices_in = vertex_count;
7ec681f3Smrg   nir->info.gs.vertices_out = nir->info.gs.vertices_in;
7ec681f3Smrg   nir->info.gs.invocations = 1;
7ec681f3Smrg   nir->info.gs.active_stream_mask = 0x1;
7ec681f3Smrg
7ec681f3Smrg   /* Make a list of GS input/output variables from the VS outputs */
7ec681f3Smrg   nir_variable *in_vars[100];
7ec681f3Smrg   nir_variable *out_vars[100];
7ec681f3Smrg   uint32_t var_count = 0;
7ec681f3Smrg   nir_foreach_shader_out_variable(out_vs_var, vs_nir) {
7ec681f3Smrg      char name[8];
7ec681f3Smrg      snprintf(name, ARRAY_SIZE(name), "in_%d", var_count);
7ec681f3Smrg
7ec681f3Smrg      in_vars[var_count] =
7ec681f3Smrg         nir_variable_create(nir, nir_var_shader_in,
7ec681f3Smrg                             glsl_array_type(out_vs_var->type, vertex_count, 0),
7ec681f3Smrg                             name);
7ec681f3Smrg      in_vars[var_count]->data.location = out_vs_var->data.location;
7ec681f3Smrg      in_vars[var_count]->data.location_frac = out_vs_var->data.location_frac;
7ec681f3Smrg      in_vars[var_count]->data.interpolation = out_vs_var->data.interpolation;
7ec681f3Smrg
7ec681f3Smrg      snprintf(name, ARRAY_SIZE(name), "out_%d", var_count);
7ec681f3Smrg      out_vars[var_count] =
7ec681f3Smrg         nir_variable_create(nir, nir_var_shader_out, out_vs_var->type, name);
7ec681f3Smrg      out_vars[var_count]->data.location = out_vs_var->data.location;
7ec681f3Smrg      out_vars[var_count]->data.interpolation = out_vs_var->data.interpolation;
7ec681f3Smrg
7ec681f3Smrg      var_count++;
7ec681f3Smrg   }
7ec681f3Smrg
7ec681f3Smrg   /* Add the gl_Layer output variable */
7ec681f3Smrg   nir_variable *out_layer =
7ec681f3Smrg      nir_variable_create(nir, nir_var_shader_out, glsl_int_type(),
7ec681f3Smrg                          "out_Layer");
7ec681f3Smrg   out_layer->data.location = VARYING_SLOT_LAYER;
7ec681f3Smrg
7ec681f3Smrg   /* Get the view index value that we will write to gl_Layer */
7ec681f3Smrg   nir_ssa_def *layer =
7ec681f3Smrg      nir_load_system_value(&b, nir_intrinsic_load_view_index, 0, 1, 32);
7ec681f3Smrg
7ec681f3Smrg   /* Emit all output vertices */
7ec681f3Smrg   for (uint32_t vi = 0; vi < vertex_count; vi++) {
7ec681f3Smrg      /* Emit all output varyings */
7ec681f3Smrg      for (uint32_t i = 0; i < var_count; i++) {
7ec681f3Smrg         nir_deref_instr *in_value =
7ec681f3Smrg            nir_build_deref_array_imm(&b, nir_build_deref_var(&b, in_vars[i]), vi);
7ec681f3Smrg         nir_copy_deref(&b, nir_build_deref_var(&b, out_vars[i]), in_value);
7ec681f3Smrg      }
7ec681f3Smrg
7ec681f3Smrg      /* Emit gl_Layer write */
7ec681f3Smrg      nir_store_var(&b, out_layer, layer, 0x1);
7ec681f3Smrg
7ec681f3Smrg      nir_emit_vertex(&b, 0);
7ec681f3Smrg   }
7ec681f3Smrg   nir_end_primitive(&b, 0);
7ec681f3Smrg
7ec681f3Smrg   /* Make sure we run our pre-process NIR passes so we produce NIR compatible
7ec681f3Smrg    * with what we expect from SPIR-V modules.
7ec681f3Smrg    */
7ec681f3Smrg   preprocess_nir(nir);
7ec681f3Smrg
7ec681f3Smrg   /* Attach the geometry shader to the  pipeline */
7ec681f3Smrg   struct v3dv_device *device = pipeline->device;
7ec681f3Smrg   struct v3dv_physical_device *physical_device =
7ec681f3Smrg      &device->instance->physicalDevice;
7ec681f3Smrg
7ec681f3Smrg   struct v3dv_pipeline_stage *p_stage =
7ec681f3Smrg      vk_zalloc2(&device->vk.alloc, pAllocator, sizeof(*p_stage), 8,
7ec681f3Smrg                 VK_SYSTEM_ALLOCATION_SCOPE_OBJECT);
7ec681f3Smrg
7ec681f3Smrg   if (p_stage == NULL) {
7ec681f3Smrg      ralloc_free(nir);
7ec681f3Smrg      return false;
7ec681f3Smrg   }
7ec681f3Smrg
7ec681f3Smrg   p_stage->pipeline = pipeline;
7ec681f3Smrg   p_stage->stage = BROADCOM_SHADER_GEOMETRY;
7ec681f3Smrg   p_stage->entrypoint = "main";
7ec681f3Smrg   p_stage->module = 0;
7ec681f3Smrg   p_stage->nir = nir;
7ec681f3Smrg   pipeline_compute_sha1_from_nir(p_stage->nir, p_stage->shader_sha1);
7ec681f3Smrg   p_stage->program_id = p_atomic_inc_return(&physical_device->next_program_id);
7ec681f3Smrg
7ec681f3Smrg   pipeline->has_gs = true;
7ec681f3Smrg   pipeline->gs = p_stage;
7ec681f3Smrg   pipeline->active_stages |= MESA_SHADER_GEOMETRY;
7ec681f3Smrg
7ec681f3Smrg   pipeline->gs_bin =
7ec681f3Smrg      pipeline_stage_create_binning(pipeline->gs, pAllocator);
7ec681f3Smrg      if (pipeline->gs_bin == NULL)
7ec681f3Smrg         return false;
7ec681f3Smrg
7ec681f3Smrg   return true;
7ec681f3Smrg}
7ec681f3Smrg
7ec681f3Smrg/*
7ec681f3Smrg * It compiles a pipeline. Note that it also allocate internal object, but if
7ec681f3Smrg * some allocations success, but other fails, the method is not freeing the
7ec681f3Smrg * successful ones.
7ec681f3Smrg *
7ec681f3Smrg * This is done to simplify the code, as what we do in this case is just call
7ec681f3Smrg * the pipeline destroy method, and this would handle freeing the internal
7ec681f3Smrg * objects allocated. We just need to be careful setting to NULL the objects
7ec681f3Smrg * not allocated.
7ec681f3Smrg */
7ec681f3Smrgstatic VkResult
7ec681f3Smrgpipeline_compile_graphics(struct v3dv_pipeline *pipeline,
7ec681f3Smrg                          struct v3dv_pipeline_cache *cache,
7ec681f3Smrg                          const VkGraphicsPipelineCreateInfo *pCreateInfo,
7ec681f3Smrg                          const VkAllocationCallbacks *pAllocator)
7ec681f3Smrg{
7ec681f3Smrg   VkPipelineCreationFeedbackEXT pipeline_feedback = {
7ec681f3Smrg      .flags = VK_PIPELINE_CREATION_FEEDBACK_VALID_BIT_EXT,
7ec681f3Smrg   };
7ec681f3Smrg   int64_t pipeline_start = os_time_get_nano();
7ec681f3Smrg
7ec681f3Smrg   struct v3dv_device *device = pipeline->device;
7ec681f3Smrg   struct v3dv_physical_device *physical_device =
7ec681f3Smrg      &device->instance->physicalDevice;
7ec681f3Smrg
7ec681f3Smrg   /* First pass to get some common info from the shader, and create the
7ec681f3Smrg    * individual pipeline_stage objects
7ec681f3Smrg    */
7ec681f3Smrg   for (uint32_t i = 0; i < pCreateInfo->stageCount; i++) {
7ec681f3Smrg      const VkPipelineShaderStageCreateInfo *sinfo = &pCreateInfo->pStages[i];
7ec681f3Smrg      gl_shader_stage stage = vk_to_mesa_shader_stage(sinfo->stage);
7ec681f3Smrg
7ec681f3Smrg      struct v3dv_pipeline_stage *p_stage =
7ec681f3Smrg         vk_zalloc2(&device->vk.alloc, pAllocator, sizeof(*p_stage), 8,
7ec681f3Smrg                    VK_SYSTEM_ALLOCATION_SCOPE_OBJECT);
7ec681f3Smrg
7ec681f3Smrg      if (p_stage == NULL)
7ec681f3Smrg         return VK_ERROR_OUT_OF_HOST_MEMORY;
7ec681f3Smrg
7ec681f3Smrg      /* Note that we are assigning program_id slightly differently that
7ec681f3Smrg       * v3d. Here we are assigning one per pipeline stage, so vs and vs_bin
7ec681f3Smrg       * would have a different program_id, while v3d would have the same for
7ec681f3Smrg       * both. For the case of v3dv, it is more natural to have an id this way,
7ec681f3Smrg       * as right now we are using it for debugging, not for shader-db.
7ec681f3Smrg       */
7ec681f3Smrg      p_stage->program_id =
7ec681f3Smrg         p_atomic_inc_return(&physical_device->next_program_id);
7ec681f3Smrg
7ec681f3Smrg      p_stage->pipeline = pipeline;
7ec681f3Smrg      p_stage->stage = gl_shader_stage_to_broadcom(stage);
7ec681f3Smrg      p_stage->entrypoint = sinfo->pName;
7ec681f3Smrg      p_stage->module = vk_shader_module_from_handle(sinfo->module);
7ec681f3Smrg      p_stage->spec_info = sinfo->pSpecializationInfo;
7ec681f3Smrg
7ec681f3Smrg      pipeline_hash_shader(p_stage->module,
7ec681f3Smrg                           p_stage->entrypoint,
7ec681f3Smrg                           stage,
7ec681f3Smrg                           p_stage->spec_info,
7ec681f3Smrg                           p_stage->shader_sha1);
7ec681f3Smrg
7ec681f3Smrg      pipeline->active_stages |= sinfo->stage;
7ec681f3Smrg
7ec681f3Smrg      /* We will try to get directly the compiled shader variant, so let's not
7ec681f3Smrg       * worry about getting the nir shader for now.
7ec681f3Smrg       */
7ec681f3Smrg      p_stage->nir = NULL;
7ec681f3Smrg
7ec681f3Smrg      switch(stage) {
7ec681f3Smrg      case MESA_SHADER_VERTEX:
7ec681f3Smrg         pipeline->vs = p_stage;
7ec681f3Smrg         pipeline->vs_bin =
7ec681f3Smrg            pipeline_stage_create_binning(pipeline->vs, pAllocator);
7ec681f3Smrg         if (pipeline->vs_bin == NULL)
7ec681f3Smrg            return VK_ERROR_OUT_OF_HOST_MEMORY;
7ec681f3Smrg         break;
7ec681f3Smrg
7ec681f3Smrg      case MESA_SHADER_GEOMETRY:
7ec681f3Smrg         pipeline->has_gs = true;
7ec681f3Smrg         pipeline->gs = p_stage;
7ec681f3Smrg         pipeline->gs_bin =
7ec681f3Smrg            pipeline_stage_create_binning(pipeline->gs, pAllocator);
7ec681f3Smrg         if (pipeline->gs_bin == NULL)
7ec681f3Smrg            return VK_ERROR_OUT_OF_HOST_MEMORY;
7ec681f3Smrg         break;
7ec681f3Smrg
7ec681f3Smrg      case MESA_SHADER_FRAGMENT:
7ec681f3Smrg         pipeline->fs = p_stage;
7ec681f3Smrg         break;
7ec681f3Smrg
7ec681f3Smrg      default:
7ec681f3Smrg         unreachable("not supported shader stage");
7ec681f3Smrg      }
7ec681f3Smrg   }
7ec681f3Smrg
7ec681f3Smrg   /* Add a no-op fragment shader if needed */
7ec681f3Smrg   if (!pipeline->fs) {
7ec681f3Smrg      nir_builder b = nir_builder_init_simple_shader(MESA_SHADER_FRAGMENT,
7ec681f3Smrg                                                     &v3dv_nir_options,
7ec681f3Smrg                                                     "noop_fs");
7ec681f3Smrg
7ec681f3Smrg      struct v3dv_pipeline_stage *p_stage =
7ec681f3Smrg         vk_zalloc2(&device->vk.alloc, pAllocator, sizeof(*p_stage), 8,
7ec681f3Smrg                    VK_SYSTEM_ALLOCATION_SCOPE_OBJECT);
7ec681f3Smrg
7ec681f3Smrg      if (p_stage == NULL)
7ec681f3Smrg         return VK_ERROR_OUT_OF_HOST_MEMORY;
7ec681f3Smrg
7ec681f3Smrg      p_stage->pipeline = pipeline;
7ec681f3Smrg      p_stage->stage = BROADCOM_SHADER_FRAGMENT;
7ec681f3Smrg      p_stage->entrypoint = "main";
7ec681f3Smrg      p_stage->module = 0;
7ec681f3Smrg      p_stage->nir = b.shader;
7ec681f3Smrg      pipeline_compute_sha1_from_nir(p_stage->nir, p_stage->shader_sha1);
7ec681f3Smrg      p_stage->program_id =
7ec681f3Smrg         p_atomic_inc_return(&physical_device->next_program_id);
7ec681f3Smrg
7ec681f3Smrg      pipeline->fs = p_stage;
7ec681f3Smrg      pipeline->active_stages |= MESA_SHADER_FRAGMENT;
7ec681f3Smrg   }
7ec681f3Smrg
7ec681f3Smrg   /* If multiview is enabled, we inject a custom passthrough geometry shader
7ec681f3Smrg    * to broadcast draw calls to the appropriate views.
7ec681f3Smrg    */
7ec681f3Smrg   assert(!pipeline->subpass->view_mask || (!pipeline->has_gs && !pipeline->gs));
7ec681f3Smrg   if (pipeline->subpass->view_mask) {
7ec681f3Smrg      if (!pipeline_add_multiview_gs(pipeline, cache, pAllocator))
7ec681f3Smrg         return VK_ERROR_OUT_OF_HOST_MEMORY;
7ec681f3Smrg   }
7ec681f3Smrg
7ec681f3Smrg   /* First we try to get the variants from the pipeline cache */
7ec681f3Smrg   struct v3dv_pipeline_key pipeline_key;
7ec681f3Smrg   pipeline_populate_graphics_key(pipeline, &pipeline_key, pCreateInfo);
7ec681f3Smrg   unsigned char pipeline_sha1[20];
7ec681f3Smrg   pipeline_hash_graphics(pipeline, &pipeline_key, pipeline_sha1);
7ec681f3Smrg
7ec681f3Smrg   bool cache_hit = false;
7ec681f3Smrg
7ec681f3Smrg   pipeline->shared_data =
7ec681f3Smrg      v3dv_pipeline_cache_search_for_pipeline(cache,
7ec681f3Smrg                                              pipeline_sha1,
7ec681f3Smrg                                              &cache_hit);
7ec681f3Smrg
7ec681f3Smrg   if (pipeline->shared_data != NULL) {
7ec681f3Smrg      /* A correct pipeline must have at least a VS and FS */
7ec681f3Smrg      assert(pipeline->shared_data->variants[BROADCOM_SHADER_VERTEX]);
7ec681f3Smrg      assert(pipeline->shared_data->variants[BROADCOM_SHADER_VERTEX_BIN]);
7ec681f3Smrg      assert(pipeline->shared_data->variants[BROADCOM_SHADER_FRAGMENT]);
7ec681f3Smrg      assert(!pipeline->gs ||
7ec681f3Smrg             pipeline->shared_data->variants[BROADCOM_SHADER_GEOMETRY]);
7ec681f3Smrg      assert(!pipeline->gs ||
7ec681f3Smrg             pipeline->shared_data->variants[BROADCOM_SHADER_GEOMETRY_BIN]);
7ec681f3Smrg
7ec681f3Smrg      if (cache_hit && cache != &pipeline->device->default_pipeline_cache)
7ec681f3Smrg         pipeline_feedback.flags |=
7ec681f3Smrg            VK_PIPELINE_CREATION_FEEDBACK_APPLICATION_PIPELINE_CACHE_HIT_BIT_EXT;
7ec681f3Smrg
7ec681f3Smrg      goto success;
7ec681f3Smrg   }
7ec681f3Smrg
7ec681f3Smrg   if (pCreateInfo->flags & VK_PIPELINE_CREATE_FAIL_ON_PIPELINE_COMPILE_REQUIRED_BIT_EXT)
7ec681f3Smrg      return VK_PIPELINE_COMPILE_REQUIRED_EXT;
7ec681f3Smrg
7ec681f3Smrg   /* Otherwise we try to get the NIR shaders (either from the original SPIR-V
7ec681f3Smrg    * shader or the pipeline cache) and compile.
7ec681f3Smrg    */
7ec681f3Smrg   pipeline->shared_data =
7ec681f3Smrg      v3dv_pipeline_shared_data_new_empty(pipeline_sha1, pipeline, true);
7ec681f3Smrg
7ec681f3Smrg   pipeline->vs->feedback.flags |=
7ec681f3Smrg      VK_PIPELINE_CREATION_FEEDBACK_VALID_BIT_EXT;
7ec681f3Smrg   if (pipeline->gs)
7ec681f3Smrg      pipeline->gs->feedback.flags |=
7ec681f3Smrg         VK_PIPELINE_CREATION_FEEDBACK_VALID_BIT_EXT;
7ec681f3Smrg   pipeline->fs->feedback.flags |=
7ec681f3Smrg      VK_PIPELINE_CREATION_FEEDBACK_VALID_BIT_EXT;
7ec681f3Smrg
7ec681f3Smrg   if (!pipeline->vs->nir)
7ec681f3Smrg      pipeline->vs->nir = pipeline_stage_get_nir(pipeline->vs, pipeline, cache);
7ec681f3Smrg   if (pipeline->gs && !pipeline->gs->nir)
7ec681f3Smrg      pipeline->gs->nir = pipeline_stage_get_nir(pipeline->gs, pipeline, cache);
7ec681f3Smrg   if (!pipeline->fs->nir)
7ec681f3Smrg      pipeline->fs->nir = pipeline_stage_get_nir(pipeline->fs, pipeline, cache);
7ec681f3Smrg
7ec681f3Smrg   /* Linking + pipeline lowerings */
7ec681f3Smrg   if (pipeline->gs) {
7ec681f3Smrg      link_shaders(pipeline->gs->nir, pipeline->fs->nir);
7ec681f3Smrg      link_shaders(pipeline->vs->nir, pipeline->gs->nir);
7ec681f3Smrg   } else {
7ec681f3Smrg      link_shaders(pipeline->vs->nir, pipeline->fs->nir);
7ec681f3Smrg   }
7ec681f3Smrg
7ec681f3Smrg   pipeline_lower_nir(pipeline, pipeline->fs, pipeline->layout);
7ec681f3Smrg   lower_fs_io(pipeline->fs->nir);
7ec681f3Smrg
7ec681f3Smrg   if (pipeline->gs) {
7ec681f3Smrg      pipeline_lower_nir(pipeline, pipeline->gs, pipeline->layout);
7ec681f3Smrg      lower_gs_io(pipeline->gs->nir);
7ec681f3Smrg   }
7ec681f3Smrg
7ec681f3Smrg   pipeline_lower_nir(pipeline, pipeline->vs, pipeline->layout);
7ec681f3Smrg   lower_vs_io(pipeline->vs->nir);
7ec681f3Smrg
7ec681f3Smrg   /* Compiling to vir */
7ec681f3Smrg   VkResult vk_result;
7ec681f3Smrg
7ec681f3Smrg   /* We should have got all the variants or no variants from the cache */
7ec681f3Smrg   assert(!pipeline->shared_data->variants[BROADCOM_SHADER_FRAGMENT]);
7ec681f3Smrg   vk_result = pipeline_compile_fragment_shader(pipeline, pAllocator, pCreateInfo);
7ec681f3Smrg   if (vk_result != VK_SUCCESS)
7ec681f3Smrg      return vk_result;
7ec681f3Smrg
7ec681f3Smrg   assert(!pipeline->shared_data->variants[BROADCOM_SHADER_GEOMETRY] &&
7ec681f3Smrg          !pipeline->shared_data->variants[BROADCOM_SHADER_GEOMETRY_BIN]);
7ec681f3Smrg
7ec681f3Smrg   if (pipeline->gs) {
7ec681f3Smrg      vk_result =
7ec681f3Smrg         pipeline_compile_geometry_shader(pipeline, pAllocator, pCreateInfo);
7ec681f3Smrg      if (vk_result != VK_SUCCESS)
7ec681f3Smrg         return vk_result;
7ec681f3Smrg   }
7ec681f3Smrg
7ec681f3Smrg   assert(!pipeline->shared_data->variants[BROADCOM_SHADER_VERTEX] &&
7ec681f3Smrg          !pipeline->shared_data->variants[BROADCOM_SHADER_VERTEX_BIN]);
7ec681f3Smrg
7ec681f3Smrg   vk_result = pipeline_compile_vertex_shader(pipeline, pAllocator, pCreateInfo);
7ec681f3Smrg   if (vk_result != VK_SUCCESS)
7ec681f3Smrg      return vk_result;
7ec681f3Smrg
7ec681f3Smrg   if (!upload_assembly(pipeline))
7ec681f3Smrg      return VK_ERROR_OUT_OF_DEVICE_MEMORY;
7ec681f3Smrg
7ec681f3Smrg   v3dv_pipeline_cache_upload_pipeline(pipeline, cache);
7ec681f3Smrg
7ec681f3Smrg success:
7ec681f3Smrg
7ec681f3Smrg   pipeline_feedback.duration = os_time_get_nano() - pipeline_start;
7ec681f3Smrg   write_creation_feedback(pipeline,
7ec681f3Smrg                           pCreateInfo->pNext,
7ec681f3Smrg                           &pipeline_feedback,
7ec681f3Smrg                           pCreateInfo->stageCount,
7ec681f3Smrg                           pCreateInfo->pStages);
7ec681f3Smrg
7ec681f3Smrg   /* Since we have the variants in the pipeline shared data we can now free
7ec681f3Smrg    * the pipeline stages.
7ec681f3Smrg    */
7ec681f3Smrg   pipeline_free_stages(device, pipeline, pAllocator);
7ec681f3Smrg
7ec681f3Smrg   pipeline_check_spill_size(pipeline);
7ec681f3Smrg
7ec681f3Smrg   return compute_vpm_config(pipeline);
7ec681f3Smrg}
7ec681f3Smrg
7ec681f3Smrgstatic VkResult
7ec681f3Smrgcompute_vpm_config(struct v3dv_pipeline *pipeline)
7ec681f3Smrg{
7ec681f3Smrg   struct v3dv_shader_variant *vs_variant =
7ec681f3Smrg      pipeline->shared_data->variants[BROADCOM_SHADER_VERTEX];
7ec681f3Smrg   struct v3dv_shader_variant *vs_bin_variant =
7ec681f3Smrg      pipeline->shared_data->variants[BROADCOM_SHADER_VERTEX];
7ec681f3Smrg   struct v3d_vs_prog_data *vs = vs_variant->prog_data.vs;
7ec681f3Smrg   struct v3d_vs_prog_data *vs_bin =vs_bin_variant->prog_data.vs;
7ec681f3Smrg
7ec681f3Smrg   struct v3d_gs_prog_data *gs = NULL;
7ec681f3Smrg   struct v3d_gs_prog_data *gs_bin = NULL;
7ec681f3Smrg   if (pipeline->has_gs) {
7ec681f3Smrg      struct v3dv_shader_variant *gs_variant =
7ec681f3Smrg         pipeline->shared_data->variants[BROADCOM_SHADER_GEOMETRY];
7ec681f3Smrg      struct v3dv_shader_variant *gs_bin_variant =
7ec681f3Smrg         pipeline->shared_data->variants[BROADCOM_SHADER_GEOMETRY_BIN];
7ec681f3Smrg      gs = gs_variant->prog_data.gs;
7ec681f3Smrg      gs_bin = gs_bin_variant->prog_data.gs;
7ec681f3Smrg   }
7ec681f3Smrg
7ec681f3Smrg   if (!v3d_compute_vpm_config(&pipeline->device->devinfo,
7ec681f3Smrg                               vs_bin, vs, gs_bin, gs,
7ec681f3Smrg                               &pipeline->vpm_cfg_bin,
7ec681f3Smrg                               &pipeline->vpm_cfg)) {
7ec681f3Smrg      return VK_ERROR_OUT_OF_DEVICE_MEMORY;
7ec681f3Smrg   }
7ec681f3Smrg
7ec681f3Smrg   return VK_SUCCESS;
7ec681f3Smrg}
7ec681f3Smrg
7ec681f3Smrgstatic unsigned
7ec681f3Smrgv3dv_dynamic_state_mask(VkDynamicState state)
7ec681f3Smrg{
7ec681f3Smrg   switch(state) {
7ec681f3Smrg   case VK_DYNAMIC_STATE_VIEWPORT:
7ec681f3Smrg      return V3DV_DYNAMIC_VIEWPORT;
7ec681f3Smrg   case VK_DYNAMIC_STATE_SCISSOR:
7ec681f3Smrg      return V3DV_DYNAMIC_SCISSOR;
7ec681f3Smrg   case VK_DYNAMIC_STATE_STENCIL_COMPARE_MASK:
7ec681f3Smrg      return V3DV_DYNAMIC_STENCIL_COMPARE_MASK;
7ec681f3Smrg   case VK_DYNAMIC_STATE_STENCIL_WRITE_MASK:
7ec681f3Smrg      return V3DV_DYNAMIC_STENCIL_WRITE_MASK;
7ec681f3Smrg   case VK_DYNAMIC_STATE_STENCIL_REFERENCE:
7ec681f3Smrg      return V3DV_DYNAMIC_STENCIL_REFERENCE;
7ec681f3Smrg   case VK_DYNAMIC_STATE_BLEND_CONSTANTS:
7ec681f3Smrg      return V3DV_DYNAMIC_BLEND_CONSTANTS;
7ec681f3Smrg   case VK_DYNAMIC_STATE_DEPTH_BIAS:
7ec681f3Smrg      return V3DV_DYNAMIC_DEPTH_BIAS;
7ec681f3Smrg   case VK_DYNAMIC_STATE_LINE_WIDTH:
7ec681f3Smrg      return V3DV_DYNAMIC_LINE_WIDTH;
7ec681f3Smrg   case VK_DYNAMIC_STATE_COLOR_WRITE_ENABLE_EXT:
7ec681f3Smrg      return V3DV_DYNAMIC_COLOR_WRITE_ENABLE;
7ec681f3Smrg
7ec681f3Smrg   /* Depth bounds testing is not available in in V3D 4.2 so here we are just
7ec681f3Smrg    * ignoring this dynamic state. We are already asserting at pipeline creation
7ec681f3Smrg    * time that depth bounds testing is not enabled.
7ec681f3Smrg    */
7ec681f3Smrg   case VK_DYNAMIC_STATE_DEPTH_BOUNDS:
7ec681f3Smrg      return 0;
7ec681f3Smrg
7ec681f3Smrg   default:
7ec681f3Smrg      unreachable("Unhandled dynamic state");
7ec681f3Smrg   }
7ec681f3Smrg}
7ec681f3Smrg
7ec681f3Smrgstatic void
7ec681f3Smrgpipeline_init_dynamic_state(
7ec681f3Smrg   struct v3dv_pipeline *pipeline,
7ec681f3Smrg   const VkPipelineDynamicStateCreateInfo *pDynamicState,
7ec681f3Smrg   const VkPipelineViewportStateCreateInfo *pViewportState,
7ec681f3Smrg   const VkPipelineDepthStencilStateCreateInfo *pDepthStencilState,
7ec681f3Smrg   const VkPipelineColorBlendStateCreateInfo *pColorBlendState,
7ec681f3Smrg   const VkPipelineRasterizationStateCreateInfo *pRasterizationState,
7ec681f3Smrg   const VkPipelineColorWriteCreateInfoEXT *pColorWriteState)
7ec681f3Smrg{
7ec681f3Smrg   pipeline->dynamic_state = default_dynamic_state;
7ec681f3Smrg   struct v3dv_dynamic_state *dynamic = &pipeline->dynamic_state;
7ec681f3Smrg
7ec681f3Smrg   /* Create a mask of enabled dynamic states */
7ec681f3Smrg   uint32_t dynamic_states = 0;
7ec681f3Smrg   if (pDynamicState) {
7ec681f3Smrg      uint32_t count = pDynamicState->dynamicStateCount;
7ec681f3Smrg      for (uint32_t s = 0; s < count; s++) {
7ec681f3Smrg         dynamic_states |=
7ec681f3Smrg            v3dv_dynamic_state_mask(pDynamicState->pDynamicStates[s]);
7ec681f3Smrg      }
7ec681f3Smrg   }
7ec681f3Smrg
7ec681f3Smrg   /* For any pipeline states that are not dynamic, set the dynamic state
7ec681f3Smrg    * from the static pipeline state.
7ec681f3Smrg    */
7ec681f3Smrg   if (pViewportState) {
7ec681f3Smrg      if (!(dynamic_states & V3DV_DYNAMIC_VIEWPORT)) {
7ec681f3Smrg         dynamic->viewport.count = pViewportState->viewportCount;
7ec681f3Smrg         typed_memcpy(dynamic->viewport.viewports, pViewportState->pViewports,
7ec681f3Smrg                      pViewportState->viewportCount);
7ec681f3Smrg
7ec681f3Smrg         for (uint32_t i = 0; i < dynamic->viewport.count; i++) {
7ec681f3Smrg            v3dv_viewport_compute_xform(&dynamic->viewport.viewports[i],
7ec681f3Smrg                                        dynamic->viewport.scale[i],
7ec681f3Smrg                                        dynamic->viewport.translate[i]);
7ec681f3Smrg         }
7ec681f3Smrg      }
7ec681f3Smrg
7ec681f3Smrg      if (!(dynamic_states & V3DV_DYNAMIC_SCISSOR)) {
7ec681f3Smrg         dynamic->scissor.count = pViewportState->scissorCount;
7ec681f3Smrg         typed_memcpy(dynamic->scissor.scissors, pViewportState->pScissors,
7ec681f3Smrg                      pViewportState->scissorCount);
7ec681f3Smrg      }
7ec681f3Smrg   }
7ec681f3Smrg
7ec681f3Smrg   if (pDepthStencilState) {
7ec681f3Smrg      if (!(dynamic_states & V3DV_DYNAMIC_STENCIL_COMPARE_MASK)) {
7ec681f3Smrg         dynamic->stencil_compare_mask.front =
7ec681f3Smrg            pDepthStencilState->front.compareMask;
7ec681f3Smrg         dynamic->stencil_compare_mask.back =
7ec681f3Smrg            pDepthStencilState->back.compareMask;
7ec681f3Smrg      }
7ec681f3Smrg
7ec681f3Smrg      if (!(dynamic_states & V3DV_DYNAMIC_STENCIL_WRITE_MASK)) {
7ec681f3Smrg         dynamic->stencil_write_mask.front = pDepthStencilState->front.writeMask;
7ec681f3Smrg         dynamic->stencil_write_mask.back = pDepthStencilState->back.writeMask;
7ec681f3Smrg      }
7ec681f3Smrg
7ec681f3Smrg      if (!(dynamic_states & V3DV_DYNAMIC_STENCIL_REFERENCE)) {
7ec681f3Smrg         dynamic->stencil_reference.front = pDepthStencilState->front.reference;
7ec681f3Smrg         dynamic->stencil_reference.back = pDepthStencilState->back.reference;
7ec681f3Smrg      }
7ec681f3Smrg   }
7ec681f3Smrg
7ec681f3Smrg   if (pColorBlendState && !(dynamic_states & V3DV_DYNAMIC_BLEND_CONSTANTS)) {
7ec681f3Smrg      memcpy(dynamic->blend_constants, pColorBlendState->blendConstants,
7ec681f3Smrg             sizeof(dynamic->blend_constants));
7ec681f3Smrg   }
7ec681f3Smrg
7ec681f3Smrg   if (pRasterizationState) {
7ec681f3Smrg      if (pRasterizationState->depthBiasEnable &&
7ec681f3Smrg          !(dynamic_states & V3DV_DYNAMIC_DEPTH_BIAS)) {
7ec681f3Smrg         dynamic->depth_bias.constant_factor =
7ec681f3Smrg            pRasterizationState->depthBiasConstantFactor;
7ec681f3Smrg         dynamic->depth_bias.depth_bias_clamp =
7ec681f3Smrg            pRasterizationState->depthBiasClamp;
7ec681f3Smrg         dynamic->depth_bias.slope_factor =
7ec681f3Smrg            pRasterizationState->depthBiasSlopeFactor;
7ec681f3Smrg      }
7ec681f3Smrg      if (!(dynamic_states & V3DV_DYNAMIC_LINE_WIDTH))
7ec681f3Smrg         dynamic->line_width = pRasterizationState->lineWidth;
7ec681f3Smrg   }
7ec681f3Smrg
7ec681f3Smrg   if (pColorWriteState && !(dynamic_states & V3DV_DYNAMIC_COLOR_WRITE_ENABLE)) {
7ec681f3Smrg      dynamic->color_write_enable = 0;
7ec681f3Smrg      for (uint32_t i = 0; i < pColorWriteState->attachmentCount; i++)
7ec681f3Smrg         dynamic->color_write_enable |= pColorWriteState->pColorWriteEnables[i] ? (0xfu << (i * 4)) : 0;
7ec681f3Smrg   }
7ec681f3Smrg
7ec681f3Smrg   pipeline->dynamic_state.mask = dynamic_states;
7ec681f3Smrg}
7ec681f3Smrg
7ec681f3Smrgstatic bool
7ec681f3Smrgstencil_op_is_no_op(const VkStencilOpState *stencil)
7ec681f3Smrg{
7ec681f3Smrg   return stencil->depthFailOp == VK_STENCIL_OP_KEEP &&
7ec681f3Smrg          stencil->compareOp == VK_COMPARE_OP_ALWAYS;
7ec681f3Smrg}
7ec681f3Smrg
7ec681f3Smrgstatic void
7ec681f3Smrgenable_depth_bias(struct v3dv_pipeline *pipeline,
7ec681f3Smrg                  const VkPipelineRasterizationStateCreateInfo *rs_info)
7ec681f3Smrg{
7ec681f3Smrg   pipeline->depth_bias.enabled = false;
7ec681f3Smrg   pipeline->depth_bias.is_z16 = false;
7ec681f3Smrg
7ec681f3Smrg   if (!rs_info || !rs_info->depthBiasEnable)
7ec681f3Smrg      return;
7ec681f3Smrg
7ec681f3Smrg   /* Check the depth/stencil attachment description for the subpass used with
7ec681f3Smrg    * this pipeline.
7ec681f3Smrg    */
7ec681f3Smrg   assert(pipeline->pass && pipeline->subpass);
7ec681f3Smrg   struct v3dv_render_pass *pass = pipeline->pass;
7ec681f3Smrg   struct v3dv_subpass *subpass = pipeline->subpass;
7ec681f3Smrg
7ec681f3Smrg   if (subpass->ds_attachment.attachment == VK_ATTACHMENT_UNUSED)
7ec681f3Smrg      return;
7ec681f3Smrg
7ec681f3Smrg   assert(subpass->ds_attachment.attachment < pass->attachment_count);
7ec681f3Smrg   struct v3dv_render_pass_attachment *att =
7ec681f3Smrg      &pass->attachments[subpass->ds_attachment.attachment];
7ec681f3Smrg
7ec681f3Smrg   if (att->desc.format == VK_FORMAT_D16_UNORM)
7ec681f3Smrg      pipeline->depth_bias.is_z16 = true;
7ec681f3Smrg
7ec681f3Smrg   pipeline->depth_bias.enabled = true;
7ec681f3Smrg}
7ec681f3Smrg
7ec681f3Smrgstatic void
7ec681f3Smrgpipeline_set_ez_state(struct v3dv_pipeline *pipeline,
7ec681f3Smrg                      const VkPipelineDepthStencilStateCreateInfo *ds_info)
7ec681f3Smrg{
7ec681f3Smrg   if (!ds_info || !ds_info->depthTestEnable) {
7ec681f3Smrg      pipeline->ez_state = V3D_EZ_DISABLED;
7ec681f3Smrg      return;
7ec681f3Smrg   }
7ec681f3Smrg
7ec681f3Smrg   switch (ds_info->depthCompareOp) {
7ec681f3Smrg   case VK_COMPARE_OP_LESS:
7ec681f3Smrg   case VK_COMPARE_OP_LESS_OR_EQUAL:
7ec681f3Smrg      pipeline->ez_state = V3D_EZ_LT_LE;
7ec681f3Smrg      break;
7ec681f3Smrg   case VK_COMPARE_OP_GREATER:
7ec681f3Smrg   case VK_COMPARE_OP_GREATER_OR_EQUAL:
7ec681f3Smrg      pipeline->ez_state = V3D_EZ_GT_GE;
7ec681f3Smrg      break;
7ec681f3Smrg   case VK_COMPARE_OP_NEVER:
7ec681f3Smrg   case VK_COMPARE_OP_EQUAL:
7ec681f3Smrg      pipeline->ez_state = V3D_EZ_UNDECIDED;
7ec681f3Smrg      break;
7ec681f3Smrg   default:
7ec681f3Smrg      pipeline->ez_state = V3D_EZ_DISABLED;
7ec681f3Smrg      break;
7ec681f3Smrg   }
7ec681f3Smrg
7ec681f3Smrg   /* If stencil is enabled and is not a no-op, we need to disable EZ */
7ec681f3Smrg   if (ds_info->stencilTestEnable &&
7ec681f3Smrg       (!stencil_op_is_no_op(&ds_info->front) ||
7ec681f3Smrg        !stencil_op_is_no_op(&ds_info->back))) {
7ec681f3Smrg         pipeline->ez_state = V3D_EZ_DISABLED;
7ec681f3Smrg   }
7ec681f3Smrg}
7ec681f3Smrg
7ec681f3Smrgstatic bool
7ec681f3Smrgpipeline_has_integer_vertex_attrib(struct v3dv_pipeline *pipeline)
7ec681f3Smrg{
7ec681f3Smrg   for (uint8_t i = 0; i < pipeline->va_count; i++) {
7ec681f3Smrg      if (vk_format_is_int(pipeline->va[i].vk_format))
7ec681f3Smrg         return true;
7ec681f3Smrg   }
7ec681f3Smrg   return false;
7ec681f3Smrg}
7ec681f3Smrg
7ec681f3Smrg/* @pipeline can be NULL. We assume in that case that all the attributes have
7ec681f3Smrg * a float format (we only create an all-float BO once and we reuse it with
7ec681f3Smrg * all float pipelines), otherwise we look at the actual type of each
7ec681f3Smrg * attribute used with the specific pipeline passed in.
7ec681f3Smrg */
7ec681f3Smrgstruct v3dv_bo *
7ec681f3Smrgv3dv_pipeline_create_default_attribute_values(struct v3dv_device *device,
7ec681f3Smrg                                              struct v3dv_pipeline *pipeline)
7ec681f3Smrg{
7ec681f3Smrg   uint32_t size = MAX_VERTEX_ATTRIBS * sizeof(float) * 4;
7ec681f3Smrg   struct v3dv_bo *bo;
7ec681f3Smrg
7ec681f3Smrg   bo = v3dv_bo_alloc(device, size, "default_vi_attributes", true);
7ec681f3Smrg
7ec681f3Smrg   if (!bo) {
7ec681f3Smrg      fprintf(stderr, "failed to allocate memory for the default "
7ec681f3Smrg              "attribute values\n");
7ec681f3Smrg      return NULL;
7ec681f3Smrg   }
7ec681f3Smrg
7ec681f3Smrg   bool ok = v3dv_bo_map(device, bo, size);
7ec681f3Smrg   if (!ok) {
7ec681f3Smrg      fprintf(stderr, "failed to map default attribute values buffer\n");
7ec681f3Smrg      return false;
7ec681f3Smrg   }
7ec681f3Smrg
7ec681f3Smrg   uint32_t *attrs = bo->map;
7ec681f3Smrg   uint8_t va_count = pipeline != NULL ? pipeline->va_count : 0;
7ec681f3Smrg   for (int i = 0; i < MAX_VERTEX_ATTRIBS; i++) {
7ec681f3Smrg      attrs[i * 4 + 0] = 0;
7ec681f3Smrg      attrs[i * 4 + 1] = 0;
7ec681f3Smrg      attrs[i * 4 + 2] = 0;
7ec681f3Smrg      VkFormat attr_format =
7ec681f3Smrg         pipeline != NULL ? pipeline->va[i].vk_format : VK_FORMAT_UNDEFINED;
7ec681f3Smrg      if (i < va_count && vk_format_is_int(attr_format)) {
7ec681f3Smrg         attrs[i * 4 + 3] = 1;
7ec681f3Smrg      } else {
7ec681f3Smrg         attrs[i * 4 + 3] = fui(1.0);
7ec681f3Smrg      }
7ec681f3Smrg   }
7ec681f3Smrg
7ec681f3Smrg   v3dv_bo_unmap(device, bo);
7ec681f3Smrg
7ec681f3Smrg   return bo;
7ec681f3Smrg}
7ec681f3Smrg
7ec681f3Smrgstatic void
7ec681f3Smrgpipeline_set_sample_mask(struct v3dv_pipeline *pipeline,
7ec681f3Smrg                         const VkPipelineMultisampleStateCreateInfo *ms_info)
7ec681f3Smrg{
7ec681f3Smrg   pipeline->sample_mask = (1 << V3D_MAX_SAMPLES) - 1;
7ec681f3Smrg
7ec681f3Smrg   /* Ignore pSampleMask if we are not enabling multisampling. The hardware
7ec681f3Smrg    * requires this to be 0xf or 0x0 if using a single sample.
7ec681f3Smrg    */
7ec681f3Smrg   if (ms_info && ms_info->pSampleMask &&
7ec681f3Smrg       ms_info->rasterizationSamples > VK_SAMPLE_COUNT_1_BIT) {
7ec681f3Smrg      pipeline->sample_mask &= ms_info->pSampleMask[0];
7ec681f3Smrg   }
7ec681f3Smrg}
7ec681f3Smrg
7ec681f3Smrgstatic void
7ec681f3Smrgpipeline_set_sample_rate_shading(struct v3dv_pipeline *pipeline,
7ec681f3Smrg                                 const VkPipelineMultisampleStateCreateInfo *ms_info)
7ec681f3Smrg{
7ec681f3Smrg   pipeline->sample_rate_shading =
7ec681f3Smrg      ms_info && ms_info->rasterizationSamples > VK_SAMPLE_COUNT_1_BIT &&
7ec681f3Smrg      ms_info->sampleShadingEnable;
7ec681f3Smrg}
7ec681f3Smrg
7ec681f3Smrgstatic VkResult
7ec681f3Smrgpipeline_init(struct v3dv_pipeline *pipeline,
7ec681f3Smrg              struct v3dv_device *device,
7ec681f3Smrg              struct v3dv_pipeline_cache *cache,
7ec681f3Smrg              const VkGraphicsPipelineCreateInfo *pCreateInfo,
7ec681f3Smrg              const VkAllocationCallbacks *pAllocator)
7ec681f3Smrg{
7ec681f3Smrg   VkResult result = VK_SUCCESS;
7ec681f3Smrg
7ec681f3Smrg   pipeline->device = device;
7ec681f3Smrg
7ec681f3Smrg   V3DV_FROM_HANDLE(v3dv_pipeline_layout, layout, pCreateInfo->layout);
7ec681f3Smrg   pipeline->layout = layout;
7ec681f3Smrg
7ec681f3Smrg   V3DV_FROM_HANDLE(v3dv_render_pass, render_pass, pCreateInfo->renderPass);
7ec681f3Smrg   assert(pCreateInfo->subpass < render_pass->subpass_count);
7ec681f3Smrg   pipeline->pass = render_pass;
7ec681f3Smrg   pipeline->subpass = &render_pass->subpasses[pCreateInfo->subpass];
7ec681f3Smrg
7ec681f3Smrg   const VkPipelineInputAssemblyStateCreateInfo *ia_info =
7ec681f3Smrg      pCreateInfo->pInputAssemblyState;
7ec681f3Smrg   pipeline->topology = vk_to_pipe_prim_type[ia_info->topology];
7ec681f3Smrg
7ec681f3Smrg   /* If rasterization is not enabled, various CreateInfo structs must be
7ec681f3Smrg    * ignored.
7ec681f3Smrg    */
7ec681f3Smrg   const bool raster_enabled =
7ec681f3Smrg      !pCreateInfo->pRasterizationState->rasterizerDiscardEnable;
7ec681f3Smrg
7ec681f3Smrg   const VkPipelineViewportStateCreateInfo *vp_info =
7ec681f3Smrg      raster_enabled ? pCreateInfo->pViewportState : NULL;
7ec681f3Smrg
7ec681f3Smrg   const VkPipelineDepthStencilStateCreateInfo *ds_info =
7ec681f3Smrg      raster_enabled ? pCreateInfo->pDepthStencilState : NULL;
7ec681f3Smrg
7ec681f3Smrg   const VkPipelineRasterizationStateCreateInfo *rs_info =
7ec681f3Smrg      raster_enabled ? pCreateInfo->pRasterizationState : NULL;
7ec681f3Smrg
7ec681f3Smrg   const VkPipelineRasterizationProvokingVertexStateCreateInfoEXT *pv_info =
7ec681f3Smrg      rs_info ? vk_find_struct_const(
7ec681f3Smrg         rs_info->pNext,
7ec681f3Smrg         PIPELINE_RASTERIZATION_PROVOKING_VERTEX_STATE_CREATE_INFO_EXT) :
7ec681f3Smrg            NULL;
7ec681f3Smrg
7ec681f3Smrg   const VkPipelineColorBlendStateCreateInfo *cb_info =
7ec681f3Smrg      raster_enabled ? pCreateInfo->pColorBlendState : NULL;
7ec681f3Smrg
7ec681f3Smrg   const VkPipelineMultisampleStateCreateInfo *ms_info =
7ec681f3Smrg      raster_enabled ? pCreateInfo->pMultisampleState : NULL;
7ec681f3Smrg
7ec681f3Smrg   const VkPipelineColorWriteCreateInfoEXT *cw_info =
7ec681f3Smrg      cb_info ? vk_find_struct_const(cb_info->pNext,
7ec681f3Smrg                                     PIPELINE_COLOR_WRITE_CREATE_INFO_EXT) :
7ec681f3Smrg                NULL;
7ec681f3Smrg
7ec681f3Smrg   pipeline_init_dynamic_state(pipeline,
7ec681f3Smrg                               pCreateInfo->pDynamicState,
7ec681f3Smrg                               vp_info, ds_info, cb_info, rs_info, cw_info);
7ec681f3Smrg
7ec681f3Smrg   /* V3D 4.2 doesn't support depth bounds testing so we don't advertise that
7ec681f3Smrg    * feature and it shouldn't be used by any pipeline.
7ec681f3Smrg    */
7ec681f3Smrg   assert(!ds_info || !ds_info->depthBoundsTestEnable);
7ec681f3Smrg
7ec681f3Smrg   v3dv_X(device, pipeline_pack_state)(pipeline, cb_info, ds_info,
7ec681f3Smrg                                       rs_info, pv_info, ms_info);
7ec681f3Smrg
7ec681f3Smrg   pipeline_set_ez_state(pipeline, ds_info);
7ec681f3Smrg   enable_depth_bias(pipeline, rs_info);
7ec681f3Smrg   pipeline_set_sample_mask(pipeline, ms_info);
7ec681f3Smrg   pipeline_set_sample_rate_shading(pipeline, ms_info);
7ec681f3Smrg
7ec681f3Smrg   pipeline->primitive_restart =
7ec681f3Smrg      pCreateInfo->pInputAssemblyState->primitiveRestartEnable;
7ec681f3Smrg
7ec681f3Smrg   result = pipeline_compile_graphics(pipeline, cache, pCreateInfo, pAllocator);
7ec681f3Smrg
7ec681f3Smrg   if (result != VK_SUCCESS) {
7ec681f3Smrg      /* Caller would already destroy the pipeline, and we didn't allocate any
7ec681f3Smrg       * extra info. We don't need to do anything else.
7ec681f3Smrg       */
7ec681f3Smrg      return result;
7ec681f3Smrg   }
7ec681f3Smrg
7ec681f3Smrg   const VkPipelineVertexInputStateCreateInfo *vi_info =
7ec681f3Smrg      pCreateInfo->pVertexInputState;
7ec681f3Smrg
7ec681f3Smrg   const VkPipelineVertexInputDivisorStateCreateInfoEXT *vd_info =
7ec681f3Smrg      vk_find_struct_const(vi_info->pNext,
7ec681f3Smrg                           PIPELINE_VERTEX_INPUT_DIVISOR_STATE_CREATE_INFO_EXT);
7ec681f3Smrg
7ec681f3Smrg   v3dv_X(device, pipeline_pack_compile_state)(pipeline, vi_info, vd_info);
7ec681f3Smrg
7ec681f3Smrg   if (pipeline_has_integer_vertex_attrib(pipeline)) {
7ec681f3Smrg      pipeline->default_attribute_values =
7ec681f3Smrg         v3dv_pipeline_create_default_attribute_values(pipeline->device, pipeline);
7ec681f3Smrg      if (!pipeline->default_attribute_values)
7ec681f3Smrg         return VK_ERROR_OUT_OF_DEVICE_MEMORY;
7ec681f3Smrg   } else {
7ec681f3Smrg      pipeline->default_attribute_values = NULL;
7ec681f3Smrg   }
7ec681f3Smrg
7ec681f3Smrg   return result;
7ec681f3Smrg}
7ec681f3Smrg
7ec681f3Smrgstatic VkResult
7ec681f3Smrggraphics_pipeline_create(VkDevice _device,
7ec681f3Smrg                         VkPipelineCache _cache,
7ec681f3Smrg                         const VkGraphicsPipelineCreateInfo *pCreateInfo,
7ec681f3Smrg                         const VkAllocationCallbacks *pAllocator,
7ec681f3Smrg                         VkPipeline *pPipeline)
7ec681f3Smrg{
7ec681f3Smrg   V3DV_FROM_HANDLE(v3dv_device, device, _device);
7ec681f3Smrg   V3DV_FROM_HANDLE(v3dv_pipeline_cache, cache, _cache);
7ec681f3Smrg
7ec681f3Smrg   struct v3dv_pipeline *pipeline;
7ec681f3Smrg   VkResult result;
7ec681f3Smrg
7ec681f3Smrg   /* Use the default pipeline cache if none is specified */
7ec681f3Smrg   if (cache == NULL && device->instance->default_pipeline_cache_enabled)
7ec681f3Smrg      cache = &device->default_pipeline_cache;
7ec681f3Smrg
7ec681f3Smrg   pipeline = vk_object_zalloc(&device->vk, pAllocator, sizeof(*pipeline),
7ec681f3Smrg                               VK_OBJECT_TYPE_PIPELINE);
7ec681f3Smrg
7ec681f3Smrg   if (pipeline == NULL)
7ec681f3Smrg      return vk_error(device, VK_ERROR_OUT_OF_HOST_MEMORY);
7ec681f3Smrg
7ec681f3Smrg   result = pipeline_init(pipeline, device, cache,
7ec681f3Smrg                          pCreateInfo,
7ec681f3Smrg                          pAllocator);
7ec681f3Smrg
7ec681f3Smrg   if (result != VK_SUCCESS) {
7ec681f3Smrg      v3dv_destroy_pipeline(pipeline, device, pAllocator);
7ec681f3Smrg      if (result == VK_PIPELINE_COMPILE_REQUIRED_EXT)
7ec681f3Smrg         *pPipeline = VK_NULL_HANDLE;
7ec681f3Smrg      return result;
7ec681f3Smrg   }
7ec681f3Smrg
7ec681f3Smrg   *pPipeline = v3dv_pipeline_to_handle(pipeline);
7ec681f3Smrg
7ec681f3Smrg   return VK_SUCCESS;
7ec681f3Smrg}
7ec681f3Smrg
7ec681f3SmrgVKAPI_ATTR VkResult VKAPI_CALL
7ec681f3Smrgv3dv_CreateGraphicsPipelines(VkDevice _device,
7ec681f3Smrg                             VkPipelineCache pipelineCache,
7ec681f3Smrg                             uint32_t count,
7ec681f3Smrg                             const VkGraphicsPipelineCreateInfo *pCreateInfos,
7ec681f3Smrg                             const VkAllocationCallbacks *pAllocator,
7ec681f3Smrg                             VkPipeline *pPipelines)
7ec681f3Smrg{
7ec681f3Smrg   V3DV_FROM_HANDLE(v3dv_device, device, _device);
7ec681f3Smrg   VkResult result = VK_SUCCESS;
7ec681f3Smrg
7ec681f3Smrg   if (unlikely(V3D_DEBUG & V3D_DEBUG_SHADERS))
7ec681f3Smrg      mtx_lock(&device->pdevice->mutex);
7ec681f3Smrg
7ec681f3Smrg   uint32_t i = 0;
7ec681f3Smrg   for (; i < count; i++) {
7ec681f3Smrg      VkResult local_result;
7ec681f3Smrg
7ec681f3Smrg      local_result = graphics_pipeline_create(_device,
7ec681f3Smrg                                              pipelineCache,
7ec681f3Smrg                                              &pCreateInfos[i],
7ec681f3Smrg                                              pAllocator,
7ec681f3Smrg                                              &pPipelines[i]);
7ec681f3Smrg
7ec681f3Smrg      if (local_result != VK_SUCCESS) {
7ec681f3Smrg         result = local_result;
7ec681f3Smrg         pPipelines[i] = VK_NULL_HANDLE;
7ec681f3Smrg
7ec681f3Smrg         if (pCreateInfos[i].flags &
7ec681f3Smrg             VK_PIPELINE_CREATE_EARLY_RETURN_ON_FAILURE_BIT_EXT)
7ec681f3Smrg            break;
7ec681f3Smrg      }
7ec681f3Smrg   }
7ec681f3Smrg
7ec681f3Smrg   for (; i < count; i++)
7ec681f3Smrg      pPipelines[i] = VK_NULL_HANDLE;
7ec681f3Smrg
7ec681f3Smrg   if (unlikely(V3D_DEBUG & V3D_DEBUG_SHADERS))
7ec681f3Smrg      mtx_unlock(&device->pdevice->mutex);
7ec681f3Smrg
7ec681f3Smrg   return result;
7ec681f3Smrg}
7ec681f3Smrg
7ec681f3Smrgstatic void
7ec681f3Smrgshared_type_info(const struct glsl_type *type, unsigned *size, unsigned *align)
7ec681f3Smrg{
7ec681f3Smrg   assert(glsl_type_is_vector_or_scalar(type));
7ec681f3Smrg
7ec681f3Smrg   uint32_t comp_size = glsl_type_is_boolean(type)
7ec681f3Smrg      ? 4 : glsl_get_bit_size(type) / 8;
7ec681f3Smrg   unsigned length = glsl_get_vector_elements(type);
7ec681f3Smrg   *size = comp_size * length,
7ec681f3Smrg   *align = comp_size * (length == 3 ? 4 : length);
7ec681f3Smrg}
7ec681f3Smrg
7ec681f3Smrgstatic void
7ec681f3Smrglower_cs_shared(struct nir_shader *nir)
7ec681f3Smrg{
7ec681f3Smrg   NIR_PASS_V(nir, nir_lower_vars_to_explicit_types,
7ec681f3Smrg              nir_var_mem_shared, shared_type_info);
7ec681f3Smrg   NIR_PASS_V(nir, nir_lower_explicit_io,
7ec681f3Smrg              nir_var_mem_shared, nir_address_format_32bit_offset);
7ec681f3Smrg}
7ec681f3Smrg
7ec681f3Smrgstatic VkResult
7ec681f3Smrgpipeline_compile_compute(struct v3dv_pipeline *pipeline,
7ec681f3Smrg                         struct v3dv_pipeline_cache *cache,
7ec681f3Smrg                         const VkComputePipelineCreateInfo *info,
7ec681f3Smrg                         const VkAllocationCallbacks *alloc)
7ec681f3Smrg{
7ec681f3Smrg   VkPipelineCreationFeedbackEXT pipeline_feedback = {
7ec681f3Smrg      .flags = VK_PIPELINE_CREATION_FEEDBACK_VALID_BIT_EXT,
7ec681f3Smrg   };
7ec681f3Smrg   int64_t pipeline_start = os_time_get_nano();
7ec681f3Smrg
7ec681f3Smrg   struct v3dv_device *device = pipeline->device;
7ec681f3Smrg   struct v3dv_physical_device *physical_device =
7ec681f3Smrg      &device->instance->physicalDevice;
7ec681f3Smrg
7ec681f3Smrg   const VkPipelineShaderStageCreateInfo *sinfo = &info->stage;
7ec681f3Smrg   gl_shader_stage stage = vk_to_mesa_shader_stage(sinfo->stage);
7ec681f3Smrg
7ec681f3Smrg   struct v3dv_pipeline_stage *p_stage =
7ec681f3Smrg      vk_zalloc2(&device->vk.alloc, alloc, sizeof(*p_stage), 8,
7ec681f3Smrg                 VK_SYSTEM_ALLOCATION_SCOPE_OBJECT);
7ec681f3Smrg   if (!p_stage)
7ec681f3Smrg      return VK_ERROR_OUT_OF_HOST_MEMORY;
7ec681f3Smrg
7ec681f3Smrg   p_stage->program_id = p_atomic_inc_return(&physical_device->next_program_id);
7ec681f3Smrg   p_stage->pipeline = pipeline;
7ec681f3Smrg   p_stage->stage = gl_shader_stage_to_broadcom(stage);
7ec681f3Smrg   p_stage->entrypoint = sinfo->pName;
7ec681f3Smrg   p_stage->module = vk_shader_module_from_handle(sinfo->module);
7ec681f3Smrg   p_stage->spec_info = sinfo->pSpecializationInfo;
7ec681f3Smrg   p_stage->feedback = (VkPipelineCreationFeedbackEXT) { 0 };
7ec681f3Smrg
7ec681f3Smrg   pipeline_hash_shader(p_stage->module,
7ec681f3Smrg                        p_stage->entrypoint,
7ec681f3Smrg                        stage,
7ec681f3Smrg                        p_stage->spec_info,
7ec681f3Smrg                        p_stage->shader_sha1);
7ec681f3Smrg
7ec681f3Smrg   /* We try to get directly the variant first from the cache */
7ec681f3Smrg   p_stage->nir = NULL;
7ec681f3Smrg
7ec681f3Smrg   pipeline->cs = p_stage;
7ec681f3Smrg   pipeline->active_stages |= sinfo->stage;
7ec681f3Smrg
7ec681f3Smrg   struct v3dv_pipeline_key pipeline_key;
7ec681f3Smrg   pipeline_populate_compute_key(pipeline, &pipeline_key, info);
7ec681f3Smrg   unsigned char pipeline_sha1[20];
7ec681f3Smrg   pipeline_hash_compute(pipeline, &pipeline_key, pipeline_sha1);
7ec681f3Smrg
7ec681f3Smrg   bool cache_hit = false;
7ec681f3Smrg   pipeline->shared_data =
7ec681f3Smrg      v3dv_pipeline_cache_search_for_pipeline(cache, pipeline_sha1, &cache_hit);
7ec681f3Smrg
7ec681f3Smrg   if (pipeline->shared_data != NULL) {
7ec681f3Smrg      assert(pipeline->shared_data->variants[BROADCOM_SHADER_COMPUTE]);
7ec681f3Smrg      if (cache_hit && cache != &pipeline->device->default_pipeline_cache)
7ec681f3Smrg         pipeline_feedback.flags |=
7ec681f3Smrg            VK_PIPELINE_CREATION_FEEDBACK_APPLICATION_PIPELINE_CACHE_HIT_BIT_EXT;
7ec681f3Smrg
7ec681f3Smrg      goto success;
7ec681f3Smrg   }
7ec681f3Smrg
7ec681f3Smrg   if (info->flags & VK_PIPELINE_CREATE_FAIL_ON_PIPELINE_COMPILE_REQUIRED_BIT_EXT)
7ec681f3Smrg      return VK_PIPELINE_COMPILE_REQUIRED_EXT;
7ec681f3Smrg
7ec681f3Smrg   pipeline->shared_data = v3dv_pipeline_shared_data_new_empty(pipeline_sha1,
7ec681f3Smrg                                                               pipeline,
7ec681f3Smrg                                                               false);
7ec681f3Smrg
7ec681f3Smrg   p_stage->feedback.flags |= VK_PIPELINE_CREATION_FEEDBACK_VALID_BIT_EXT;
7ec681f3Smrg
7ec681f3Smrg   /* If not found on cache, compile it */
7ec681f3Smrg   p_stage->nir = pipeline_stage_get_nir(p_stage, pipeline, cache);
7ec681f3Smrg   assert(p_stage->nir);
7ec681f3Smrg
7ec681f3Smrg   st_nir_opts(p_stage->nir);
7ec681f3Smrg   pipeline_lower_nir(pipeline, p_stage, pipeline->layout);
7ec681f3Smrg   lower_cs_shared(p_stage->nir);
7ec681f3Smrg
7ec681f3Smrg   VkResult result = VK_SUCCESS;
7ec681f3Smrg
7ec681f3Smrg   struct v3d_key key;
7ec681f3Smrg   memset(&key, 0, sizeof(key));
7ec681f3Smrg   pipeline_populate_v3d_key(&key, p_stage, 0,
7ec681f3Smrg                             pipeline->device->features.robustBufferAccess);
7ec681f3Smrg   pipeline->shared_data->variants[BROADCOM_SHADER_COMPUTE] =
7ec681f3Smrg      pipeline_compile_shader_variant(p_stage, &key, sizeof(key),
7ec681f3Smrg                                      alloc, &result);
7ec681f3Smrg
7ec681f3Smrg   if (result != VK_SUCCESS)
7ec681f3Smrg      return result;
7ec681f3Smrg
7ec681f3Smrg   if (!upload_assembly(pipeline))
7ec681f3Smrg      return VK_ERROR_OUT_OF_DEVICE_MEMORY;
7ec681f3Smrg
7ec681f3Smrg   v3dv_pipeline_cache_upload_pipeline(pipeline, cache);
7ec681f3Smrg
7ec681f3Smrgsuccess:
7ec681f3Smrg
7ec681f3Smrg   pipeline_feedback.duration = os_time_get_nano() - pipeline_start;
7ec681f3Smrg   write_creation_feedback(pipeline,
7ec681f3Smrg                           info->pNext,
7ec681f3Smrg                           &pipeline_feedback,
7ec681f3Smrg                           1,
7ec681f3Smrg                           &info->stage);
7ec681f3Smrg
7ec681f3Smrg   /* As we got the variants in pipeline->shared_data, after compiling we
7ec681f3Smrg    * don't need the pipeline_stages
7ec681f3Smrg    */
7ec681f3Smrg   pipeline_free_stages(device, pipeline, alloc);
7ec681f3Smrg
7ec681f3Smrg   pipeline_check_spill_size(pipeline);
7ec681f3Smrg
7ec681f3Smrg   return VK_SUCCESS;
7ec681f3Smrg}
7ec681f3Smrg
7ec681f3Smrgstatic VkResult
7ec681f3Smrgcompute_pipeline_init(struct v3dv_pipeline *pipeline,
7ec681f3Smrg                      struct v3dv_device *device,
7ec681f3Smrg                      struct v3dv_pipeline_cache *cache,
7ec681f3Smrg                      const VkComputePipelineCreateInfo *info,
7ec681f3Smrg                      const VkAllocationCallbacks *alloc)
7ec681f3Smrg{
7ec681f3Smrg   V3DV_FROM_HANDLE(v3dv_pipeline_layout, layout, info->layout);
7ec681f3Smrg
7ec681f3Smrg   pipeline->device = device;
7ec681f3Smrg   pipeline->layout = layout;
7ec681f3Smrg
7ec681f3Smrg   VkResult result = pipeline_compile_compute(pipeline, cache, info, alloc);
7ec681f3Smrg
7ec681f3Smrg   return result;
7ec681f3Smrg}
7ec681f3Smrg
7ec681f3Smrgstatic VkResult
7ec681f3Smrgcompute_pipeline_create(VkDevice _device,
7ec681f3Smrg                         VkPipelineCache _cache,
7ec681f3Smrg                         const VkComputePipelineCreateInfo *pCreateInfo,
7ec681f3Smrg                         const VkAllocationCallbacks *pAllocator,
7ec681f3Smrg                         VkPipeline *pPipeline)
7ec681f3Smrg{
7ec681f3Smrg   V3DV_FROM_HANDLE(v3dv_device, device, _device);
7ec681f3Smrg   V3DV_FROM_HANDLE(v3dv_pipeline_cache, cache, _cache);
7ec681f3Smrg
7ec681f3Smrg   struct v3dv_pipeline *pipeline;
7ec681f3Smrg   VkResult result;
7ec681f3Smrg
7ec681f3Smrg   /* Use the default pipeline cache if none is specified */
7ec681f3Smrg   if (cache == NULL && device->instance->default_pipeline_cache_enabled)
7ec681f3Smrg      cache = &device->default_pipeline_cache;
7ec681f3Smrg
7ec681f3Smrg   pipeline = vk_object_zalloc(&device->vk, pAllocator, sizeof(*pipeline),
7ec681f3Smrg                               VK_OBJECT_TYPE_PIPELINE);
7ec681f3Smrg   if (pipeline == NULL)
7ec681f3Smrg      return vk_error(device, VK_ERROR_OUT_OF_HOST_MEMORY);
7ec681f3Smrg
7ec681f3Smrg   result = compute_pipeline_init(pipeline, device, cache,
7ec681f3Smrg                                  pCreateInfo, pAllocator);
7ec681f3Smrg   if (result != VK_SUCCESS) {
7ec681f3Smrg      v3dv_destroy_pipeline(pipeline, device, pAllocator);
7ec681f3Smrg      if (result == VK_PIPELINE_COMPILE_REQUIRED_EXT)
7ec681f3Smrg         *pPipeline = VK_NULL_HANDLE;
7ec681f3Smrg      return result;
7ec681f3Smrg   }
7ec681f3Smrg
7ec681f3Smrg   *pPipeline = v3dv_pipeline_to_handle(pipeline);
7ec681f3Smrg
7ec681f3Smrg   return VK_SUCCESS;
7ec681f3Smrg}
7ec681f3Smrg
7ec681f3SmrgVKAPI_ATTR VkResult VKAPI_CALL
7ec681f3Smrgv3dv_CreateComputePipelines(VkDevice _device,
7ec681f3Smrg                            VkPipelineCache pipelineCache,
7ec681f3Smrg                            uint32_t createInfoCount,
7ec681f3Smrg                            const VkComputePipelineCreateInfo *pCreateInfos,
7ec681f3Smrg                            const VkAllocationCallbacks *pAllocator,
7ec681f3Smrg                            VkPipeline *pPipelines)
7ec681f3Smrg{
7ec681f3Smrg   V3DV_FROM_HANDLE(v3dv_device, device, _device);
7ec681f3Smrg   VkResult result = VK_SUCCESS;
7ec681f3Smrg
7ec681f3Smrg   if (unlikely(V3D_DEBUG & V3D_DEBUG_SHADERS))
7ec681f3Smrg      mtx_lock(&device->pdevice->mutex);
7ec681f3Smrg
7ec681f3Smrg   uint32_t i = 0;
7ec681f3Smrg   for (; i < createInfoCount; i++) {
7ec681f3Smrg      VkResult local_result;
7ec681f3Smrg      local_result = compute_pipeline_create(_device,
7ec681f3Smrg                                              pipelineCache,
7ec681f3Smrg                                              &pCreateInfos[i],
7ec681f3Smrg                                              pAllocator,
7ec681f3Smrg                                              &pPipelines[i]);
7ec681f3Smrg
7ec681f3Smrg      if (local_result != VK_SUCCESS) {
7ec681f3Smrg         result = local_result;
7ec681f3Smrg         pPipelines[i] = VK_NULL_HANDLE;
7ec681f3Smrg
7ec681f3Smrg         if (pCreateInfos[i].flags &
7ec681f3Smrg             VK_PIPELINE_CREATE_EARLY_RETURN_ON_FAILURE_BIT_EXT)
7ec681f3Smrg            break;
7ec681f3Smrg      }
7ec681f3Smrg   }
7ec681f3Smrg
7ec681f3Smrg   for (; i < createInfoCount; i++)
7ec681f3Smrg      pPipelines[i] = VK_NULL_HANDLE;
7ec681f3Smrg
7ec681f3Smrg   if (unlikely(V3D_DEBUG & V3D_DEBUG_SHADERS))
7ec681f3Smrg      mtx_unlock(&device->pdevice->mutex);
7ec681f3Smrg
7ec681f3Smrg   return result;
7ec681f3Smrg}