17ec681f3Smrg/* 27ec681f3Smrg * Copyright © 2019 Raspberry Pi 37ec681f3Smrg * 47ec681f3Smrg * Permission is hereby granted, free of charge, to any person obtaining a 57ec681f3Smrg * copy of this software and associated documentation files (the "Software"), 67ec681f3Smrg * to deal in the Software without restriction, including without limitation 77ec681f3Smrg * the rights to use, copy, modify, merge, publish, distribute, sublicense, 87ec681f3Smrg * and/or sell copies of the Software, and to permit persons to whom the 97ec681f3Smrg * Software is furnished to do so, subject to the following conditions: 107ec681f3Smrg * 117ec681f3Smrg * The above copyright notice and this permission notice (including the next 127ec681f3Smrg * paragraph) shall be included in all copies or substantial portions of the 137ec681f3Smrg * Software. 147ec681f3Smrg * 157ec681f3Smrg * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR 167ec681f3Smrg * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, 177ec681f3Smrg * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL 187ec681f3Smrg * THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER 197ec681f3Smrg * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING 207ec681f3Smrg * FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS 217ec681f3Smrg * IN THE SOFTWARE. 227ec681f3Smrg */ 237ec681f3Smrg 247ec681f3Smrg#include "vk_util.h" 257ec681f3Smrg 267ec681f3Smrg#include "v3dv_debug.h" 277ec681f3Smrg#include "v3dv_private.h" 287ec681f3Smrg 297ec681f3Smrg#include "vk_format_info.h" 307ec681f3Smrg 317ec681f3Smrg#include "common/v3d_debug.h" 327ec681f3Smrg 337ec681f3Smrg#include "compiler/nir/nir_builder.h" 347ec681f3Smrg#include "nir/nir_serialize.h" 357ec681f3Smrg 367ec681f3Smrg#include "util/u_atomic.h" 377ec681f3Smrg#include "util/u_prim.h" 387ec681f3Smrg#include "util/os_time.h" 397ec681f3Smrg 407ec681f3Smrg#include "vulkan/util/vk_format.h" 417ec681f3Smrg 427ec681f3Smrgstatic VkResult 437ec681f3Smrgcompute_vpm_config(struct v3dv_pipeline *pipeline); 447ec681f3Smrg 457ec681f3Smrgvoid 467ec681f3Smrgv3dv_print_v3d_key(struct v3d_key *key, 477ec681f3Smrg uint32_t v3d_key_size) 487ec681f3Smrg{ 497ec681f3Smrg struct mesa_sha1 ctx; 507ec681f3Smrg unsigned char sha1[20]; 517ec681f3Smrg char sha1buf[41]; 527ec681f3Smrg 537ec681f3Smrg _mesa_sha1_init(&ctx); 547ec681f3Smrg 557ec681f3Smrg _mesa_sha1_update(&ctx, key, v3d_key_size); 567ec681f3Smrg 577ec681f3Smrg _mesa_sha1_final(&ctx, sha1); 587ec681f3Smrg _mesa_sha1_format(sha1buf, sha1); 597ec681f3Smrg 607ec681f3Smrg fprintf(stderr, "key %p: %s\n", key, sha1buf); 617ec681f3Smrg} 627ec681f3Smrg 637ec681f3Smrgstatic void 647ec681f3Smrgpipeline_compute_sha1_from_nir(nir_shader *nir, 657ec681f3Smrg unsigned char sha1[20]) 667ec681f3Smrg{ 677ec681f3Smrg assert(nir); 687ec681f3Smrg struct blob blob; 697ec681f3Smrg blob_init(&blob); 707ec681f3Smrg 717ec681f3Smrg nir_serialize(&blob, nir, false); 727ec681f3Smrg if (!blob.out_of_memory) 737ec681f3Smrg _mesa_sha1_compute(blob.data, blob.size, sha1); 747ec681f3Smrg 757ec681f3Smrg blob_finish(&blob); 767ec681f3Smrg} 777ec681f3Smrg 787ec681f3Smrgvoid 797ec681f3Smrgv3dv_shader_module_internal_init(struct v3dv_device *device, 807ec681f3Smrg struct vk_shader_module *module, 817ec681f3Smrg nir_shader *nir) 827ec681f3Smrg{ 837ec681f3Smrg vk_object_base_init(&device->vk, &module->base, 847ec681f3Smrg VK_OBJECT_TYPE_SHADER_MODULE); 857ec681f3Smrg module->nir = nir; 867ec681f3Smrg module->size = 0; 877ec681f3Smrg 887ec681f3Smrg pipeline_compute_sha1_from_nir(nir, module->sha1); 897ec681f3Smrg} 907ec681f3Smrg 917ec681f3Smrgvoid 927ec681f3Smrgv3dv_shader_variant_destroy(struct v3dv_device *device, 937ec681f3Smrg struct v3dv_shader_variant *variant) 947ec681f3Smrg{ 957ec681f3Smrg /* The assembly BO is shared by all variants in the pipeline, so it can't 967ec681f3Smrg * be freed here and should be freed with the pipeline 977ec681f3Smrg */ 987ec681f3Smrg ralloc_free(variant->prog_data.base); 997ec681f3Smrg vk_free(&device->vk.alloc, variant); 1007ec681f3Smrg} 1017ec681f3Smrg 1027ec681f3Smrgstatic void 1037ec681f3Smrgdestroy_pipeline_stage(struct v3dv_device *device, 1047ec681f3Smrg struct v3dv_pipeline_stage *p_stage, 1057ec681f3Smrg const VkAllocationCallbacks *pAllocator) 1067ec681f3Smrg{ 1077ec681f3Smrg if (!p_stage) 1087ec681f3Smrg return; 1097ec681f3Smrg 1107ec681f3Smrg ralloc_free(p_stage->nir); 1117ec681f3Smrg vk_free2(&device->vk.alloc, pAllocator, p_stage); 1127ec681f3Smrg} 1137ec681f3Smrg 1147ec681f3Smrgstatic void 1157ec681f3Smrgpipeline_free_stages(struct v3dv_device *device, 1167ec681f3Smrg struct v3dv_pipeline *pipeline, 1177ec681f3Smrg const VkAllocationCallbacks *pAllocator) 1187ec681f3Smrg{ 1197ec681f3Smrg assert(pipeline); 1207ec681f3Smrg 1217ec681f3Smrg /* FIXME: we can't just use a loop over mesa stage due the bin, would be 1227ec681f3Smrg * good to find an alternative. 1237ec681f3Smrg */ 1247ec681f3Smrg destroy_pipeline_stage(device, pipeline->vs, pAllocator); 1257ec681f3Smrg destroy_pipeline_stage(device, pipeline->vs_bin, pAllocator); 1267ec681f3Smrg destroy_pipeline_stage(device, pipeline->gs, pAllocator); 1277ec681f3Smrg destroy_pipeline_stage(device, pipeline->gs_bin, pAllocator); 1287ec681f3Smrg destroy_pipeline_stage(device, pipeline->fs, pAllocator); 1297ec681f3Smrg destroy_pipeline_stage(device, pipeline->cs, pAllocator); 1307ec681f3Smrg 1317ec681f3Smrg pipeline->vs = NULL; 1327ec681f3Smrg pipeline->vs_bin = NULL; 1337ec681f3Smrg pipeline->gs = NULL; 1347ec681f3Smrg pipeline->gs_bin = NULL; 1357ec681f3Smrg pipeline->fs = NULL; 1367ec681f3Smrg pipeline->cs = NULL; 1377ec681f3Smrg} 1387ec681f3Smrg 1397ec681f3Smrgstatic void 1407ec681f3Smrgv3dv_destroy_pipeline(struct v3dv_pipeline *pipeline, 1417ec681f3Smrg struct v3dv_device *device, 1427ec681f3Smrg const VkAllocationCallbacks *pAllocator) 1437ec681f3Smrg{ 1447ec681f3Smrg if (!pipeline) 1457ec681f3Smrg return; 1467ec681f3Smrg 1477ec681f3Smrg pipeline_free_stages(device, pipeline, pAllocator); 1487ec681f3Smrg 1497ec681f3Smrg if (pipeline->shared_data) { 1507ec681f3Smrg v3dv_pipeline_shared_data_unref(device, pipeline->shared_data); 1517ec681f3Smrg pipeline->shared_data = NULL; 1527ec681f3Smrg } 1537ec681f3Smrg 1547ec681f3Smrg if (pipeline->spill.bo) { 1557ec681f3Smrg assert(pipeline->spill.size_per_thread > 0); 1567ec681f3Smrg v3dv_bo_free(device, pipeline->spill.bo); 1577ec681f3Smrg } 1587ec681f3Smrg 1597ec681f3Smrg if (pipeline->default_attribute_values) { 1607ec681f3Smrg v3dv_bo_free(device, pipeline->default_attribute_values); 1617ec681f3Smrg pipeline->default_attribute_values = NULL; 1627ec681f3Smrg } 1637ec681f3Smrg 1647ec681f3Smrg vk_object_free(&device->vk, pAllocator, pipeline); 1657ec681f3Smrg} 1667ec681f3Smrg 1677ec681f3SmrgVKAPI_ATTR void VKAPI_CALL 1687ec681f3Smrgv3dv_DestroyPipeline(VkDevice _device, 1697ec681f3Smrg VkPipeline _pipeline, 1707ec681f3Smrg const VkAllocationCallbacks *pAllocator) 1717ec681f3Smrg{ 1727ec681f3Smrg V3DV_FROM_HANDLE(v3dv_device, device, _device); 1737ec681f3Smrg V3DV_FROM_HANDLE(v3dv_pipeline, pipeline, _pipeline); 1747ec681f3Smrg 1757ec681f3Smrg if (!pipeline) 1767ec681f3Smrg return; 1777ec681f3Smrg 1787ec681f3Smrg v3dv_destroy_pipeline(pipeline, device, pAllocator); 1797ec681f3Smrg} 1807ec681f3Smrg 1817ec681f3Smrgstatic const struct spirv_to_nir_options default_spirv_options = { 1827ec681f3Smrg .caps = { 1837ec681f3Smrg .device_group = true, 1847ec681f3Smrg .multiview = true, 1857ec681f3Smrg .subgroup_basic = true, 1867ec681f3Smrg .variable_pointers = true, 1877ec681f3Smrg }, 1887ec681f3Smrg .ubo_addr_format = nir_address_format_32bit_index_offset, 1897ec681f3Smrg .ssbo_addr_format = nir_address_format_32bit_index_offset, 1907ec681f3Smrg .phys_ssbo_addr_format = nir_address_format_64bit_global, 1917ec681f3Smrg .push_const_addr_format = nir_address_format_logical, 1927ec681f3Smrg .shared_addr_format = nir_address_format_32bit_offset, 1937ec681f3Smrg}; 1947ec681f3Smrg 1957ec681f3Smrgconst nir_shader_compiler_options v3dv_nir_options = { 1967ec681f3Smrg .lower_uadd_sat = true, 1977ec681f3Smrg .lower_iadd_sat = true, 1987ec681f3Smrg .lower_all_io_to_temps = true, 1997ec681f3Smrg .lower_extract_byte = true, 2007ec681f3Smrg .lower_extract_word = true, 2017ec681f3Smrg .lower_insert_byte = true, 2027ec681f3Smrg .lower_insert_word = true, 2037ec681f3Smrg .lower_bitfield_insert_to_shifts = true, 2047ec681f3Smrg .lower_bitfield_extract_to_shifts = true, 2057ec681f3Smrg .lower_bitfield_reverse = true, 2067ec681f3Smrg .lower_bit_count = true, 2077ec681f3Smrg .lower_cs_local_id_from_index = true, 2087ec681f3Smrg .lower_ffract = true, 2097ec681f3Smrg .lower_fmod = true, 2107ec681f3Smrg .lower_pack_unorm_2x16 = true, 2117ec681f3Smrg .lower_pack_snorm_2x16 = true, 2127ec681f3Smrg .lower_unpack_unorm_2x16 = true, 2137ec681f3Smrg .lower_unpack_snorm_2x16 = true, 2147ec681f3Smrg .lower_pack_unorm_4x8 = true, 2157ec681f3Smrg .lower_pack_snorm_4x8 = true, 2167ec681f3Smrg .lower_unpack_unorm_4x8 = true, 2177ec681f3Smrg .lower_unpack_snorm_4x8 = true, 2187ec681f3Smrg .lower_pack_half_2x16 = true, 2197ec681f3Smrg .lower_unpack_half_2x16 = true, 2207ec681f3Smrg /* FIXME: see if we can avoid the uadd_carry and usub_borrow lowering and 2217ec681f3Smrg * get the tests to pass since it might produce slightly better code. 2227ec681f3Smrg */ 2237ec681f3Smrg .lower_uadd_carry = true, 2247ec681f3Smrg .lower_usub_borrow = true, 2257ec681f3Smrg /* FIXME: check if we can use multop + umul24 to implement mul2x32_64 2267ec681f3Smrg * without lowering. 2277ec681f3Smrg */ 2287ec681f3Smrg .lower_mul_2x32_64 = true, 2297ec681f3Smrg .lower_fdiv = true, 2307ec681f3Smrg .lower_find_lsb = true, 2317ec681f3Smrg .lower_ffma16 = true, 2327ec681f3Smrg .lower_ffma32 = true, 2337ec681f3Smrg .lower_ffma64 = true, 2347ec681f3Smrg .lower_flrp32 = true, 2357ec681f3Smrg .lower_fpow = true, 2367ec681f3Smrg .lower_fsat = true, 2377ec681f3Smrg .lower_fsqrt = true, 2387ec681f3Smrg .lower_ifind_msb = true, 2397ec681f3Smrg .lower_isign = true, 2407ec681f3Smrg .lower_ldexp = true, 2417ec681f3Smrg .lower_mul_high = true, 2427ec681f3Smrg .lower_wpos_pntc = true, 2437ec681f3Smrg .lower_rotate = true, 2447ec681f3Smrg .lower_to_scalar = true, 2457ec681f3Smrg .lower_device_index_to_zero = true, 2467ec681f3Smrg .has_fsub = true, 2477ec681f3Smrg .has_isub = true, 2487ec681f3Smrg .vertex_id_zero_based = false, /* FIXME: to set this to true, the intrinsic 2497ec681f3Smrg * needs to be supported */ 2507ec681f3Smrg .lower_interpolate_at = true, 2517ec681f3Smrg .max_unroll_iterations = 16, 2527ec681f3Smrg .force_indirect_unrolling = (nir_var_shader_in | nir_var_function_temp), 2537ec681f3Smrg .divergence_analysis_options = 2547ec681f3Smrg nir_divergence_multiple_workgroup_per_compute_subgroup 2557ec681f3Smrg}; 2567ec681f3Smrg 2577ec681f3Smrgconst nir_shader_compiler_options * 2587ec681f3Smrgv3dv_pipeline_get_nir_options(void) 2597ec681f3Smrg{ 2607ec681f3Smrg return &v3dv_nir_options; 2617ec681f3Smrg} 2627ec681f3Smrg 2637ec681f3Smrg#define OPT(pass, ...) ({ \ 2647ec681f3Smrg bool this_progress = false; \ 2657ec681f3Smrg NIR_PASS(this_progress, nir, pass, ##__VA_ARGS__); \ 2667ec681f3Smrg if (this_progress) \ 2677ec681f3Smrg progress = true; \ 2687ec681f3Smrg this_progress; \ 2697ec681f3Smrg}) 2707ec681f3Smrg 2717ec681f3Smrgstatic void 2727ec681f3Smrgnir_optimize(nir_shader *nir, bool allow_copies) 2737ec681f3Smrg{ 2747ec681f3Smrg bool progress; 2757ec681f3Smrg 2767ec681f3Smrg do { 2777ec681f3Smrg progress = false; 2787ec681f3Smrg OPT(nir_split_array_vars, nir_var_function_temp); 2797ec681f3Smrg OPT(nir_shrink_vec_array_vars, nir_var_function_temp); 2807ec681f3Smrg OPT(nir_opt_deref); 2817ec681f3Smrg OPT(nir_lower_vars_to_ssa); 2827ec681f3Smrg if (allow_copies) { 2837ec681f3Smrg /* Only run this pass in the first call to nir_optimize. Later calls 2847ec681f3Smrg * assume that we've lowered away any copy_deref instructions and we 2857ec681f3Smrg * don't want to introduce any more. 2867ec681f3Smrg */ 2877ec681f3Smrg OPT(nir_opt_find_array_copies); 2887ec681f3Smrg } 2897ec681f3Smrg OPT(nir_opt_copy_prop_vars); 2907ec681f3Smrg OPT(nir_opt_dead_write_vars); 2917ec681f3Smrg OPT(nir_opt_combine_stores, nir_var_all); 2927ec681f3Smrg 2937ec681f3Smrg OPT(nir_lower_alu_to_scalar, NULL, NULL); 2947ec681f3Smrg 2957ec681f3Smrg OPT(nir_copy_prop); 2967ec681f3Smrg OPT(nir_lower_phis_to_scalar, false); 2977ec681f3Smrg 2987ec681f3Smrg OPT(nir_copy_prop); 2997ec681f3Smrg OPT(nir_opt_dce); 3007ec681f3Smrg OPT(nir_opt_cse); 3017ec681f3Smrg OPT(nir_opt_combine_stores, nir_var_all); 3027ec681f3Smrg 3037ec681f3Smrg /* Passing 0 to the peephole select pass causes it to convert 3047ec681f3Smrg * if-statements that contain only move instructions in the branches 3057ec681f3Smrg * regardless of the count. 3067ec681f3Smrg * 3077ec681f3Smrg * Passing 1 to the peephole select pass causes it to convert 3087ec681f3Smrg * if-statements that contain at most a single ALU instruction (total) 3097ec681f3Smrg * in both branches. 3107ec681f3Smrg */ 3117ec681f3Smrg OPT(nir_opt_peephole_select, 0, false, false); 3127ec681f3Smrg OPT(nir_opt_peephole_select, 8, false, true); 3137ec681f3Smrg 3147ec681f3Smrg OPT(nir_opt_intrinsics); 3157ec681f3Smrg OPT(nir_opt_idiv_const, 32); 3167ec681f3Smrg OPT(nir_opt_algebraic); 3177ec681f3Smrg OPT(nir_opt_constant_folding); 3187ec681f3Smrg 3197ec681f3Smrg OPT(nir_opt_dead_cf); 3207ec681f3Smrg 3217ec681f3Smrg OPT(nir_opt_if, false); 3227ec681f3Smrg OPT(nir_opt_conditional_discard); 3237ec681f3Smrg 3247ec681f3Smrg OPT(nir_opt_remove_phis); 3257ec681f3Smrg OPT(nir_opt_undef); 3267ec681f3Smrg OPT(nir_lower_pack); 3277ec681f3Smrg } while (progress); 3287ec681f3Smrg 3297ec681f3Smrg OPT(nir_remove_dead_variables, nir_var_function_temp, NULL); 3307ec681f3Smrg} 3317ec681f3Smrg 3327ec681f3Smrgstatic void 3337ec681f3Smrgpreprocess_nir(nir_shader *nir) 3347ec681f3Smrg{ 3357ec681f3Smrg /* We have to lower away local variable initializers right before we 3367ec681f3Smrg * inline functions. That way they get properly initialized at the top 3377ec681f3Smrg * of the function and not at the top of its caller. 3387ec681f3Smrg */ 3397ec681f3Smrg NIR_PASS_V(nir, nir_lower_variable_initializers, nir_var_function_temp); 3407ec681f3Smrg NIR_PASS_V(nir, nir_lower_returns); 3417ec681f3Smrg NIR_PASS_V(nir, nir_inline_functions); 3427ec681f3Smrg NIR_PASS_V(nir, nir_opt_deref); 3437ec681f3Smrg 3447ec681f3Smrg /* Pick off the single entrypoint that we want */ 3457ec681f3Smrg foreach_list_typed_safe(nir_function, func, node, &nir->functions) { 3467ec681f3Smrg if (func->is_entrypoint) 3477ec681f3Smrg func->name = ralloc_strdup(func, "main"); 3487ec681f3Smrg else 3497ec681f3Smrg exec_node_remove(&func->node); 3507ec681f3Smrg } 3517ec681f3Smrg assert(exec_list_length(&nir->functions) == 1); 3527ec681f3Smrg 3537ec681f3Smrg /* Vulkan uses the separate-shader linking model */ 3547ec681f3Smrg nir->info.separate_shader = true; 3557ec681f3Smrg 3567ec681f3Smrg /* Make sure we lower variable initializers on output variables so that 3577ec681f3Smrg * nir_remove_dead_variables below sees the corresponding stores 3587ec681f3Smrg */ 3597ec681f3Smrg NIR_PASS_V(nir, nir_lower_variable_initializers, nir_var_shader_out); 3607ec681f3Smrg 3617ec681f3Smrg /* Now that we've deleted all but the main function, we can go ahead and 3627ec681f3Smrg * lower the rest of the variable initializers. 3637ec681f3Smrg */ 3647ec681f3Smrg NIR_PASS_V(nir, nir_lower_variable_initializers, ~0); 3657ec681f3Smrg 3667ec681f3Smrg /* Split member structs. We do this before lower_io_to_temporaries so that 3677ec681f3Smrg * it doesn't lower system values to temporaries by accident. 3687ec681f3Smrg */ 3697ec681f3Smrg NIR_PASS_V(nir, nir_split_var_copies); 3707ec681f3Smrg NIR_PASS_V(nir, nir_split_per_member_structs); 3717ec681f3Smrg 3727ec681f3Smrg if (nir->info.stage == MESA_SHADER_FRAGMENT) 3737ec681f3Smrg NIR_PASS_V(nir, nir_lower_io_to_vector, nir_var_shader_out); 3747ec681f3Smrg if (nir->info.stage == MESA_SHADER_FRAGMENT) { 3757ec681f3Smrg NIR_PASS_V(nir, nir_lower_input_attachments, 3767ec681f3Smrg &(nir_input_attachment_options) { 3777ec681f3Smrg .use_fragcoord_sysval = false, 3787ec681f3Smrg }); 3797ec681f3Smrg } 3807ec681f3Smrg 3817ec681f3Smrg NIR_PASS_V(nir, nir_lower_explicit_io, 3827ec681f3Smrg nir_var_mem_push_const, 3837ec681f3Smrg nir_address_format_32bit_offset); 3847ec681f3Smrg 3857ec681f3Smrg NIR_PASS_V(nir, nir_lower_explicit_io, 3867ec681f3Smrg nir_var_mem_ubo | nir_var_mem_ssbo, 3877ec681f3Smrg nir_address_format_32bit_index_offset); 3887ec681f3Smrg 3897ec681f3Smrg NIR_PASS_V(nir, nir_remove_dead_variables, nir_var_shader_in | 3907ec681f3Smrg nir_var_shader_out | nir_var_system_value | nir_var_mem_shared, 3917ec681f3Smrg NULL); 3927ec681f3Smrg 3937ec681f3Smrg NIR_PASS_V(nir, nir_propagate_invariant, false); 3947ec681f3Smrg NIR_PASS_V(nir, nir_lower_io_to_temporaries, 3957ec681f3Smrg nir_shader_get_entrypoint(nir), true, false); 3967ec681f3Smrg 3977ec681f3Smrg NIR_PASS_V(nir, nir_lower_system_values); 3987ec681f3Smrg NIR_PASS_V(nir, nir_lower_clip_cull_distance_arrays); 3997ec681f3Smrg 4007ec681f3Smrg NIR_PASS_V(nir, nir_lower_alu_to_scalar, NULL, NULL); 4017ec681f3Smrg 4027ec681f3Smrg NIR_PASS_V(nir, nir_normalize_cubemap_coords); 4037ec681f3Smrg 4047ec681f3Smrg NIR_PASS_V(nir, nir_lower_global_vars_to_local); 4057ec681f3Smrg 4067ec681f3Smrg NIR_PASS_V(nir, nir_split_var_copies); 4077ec681f3Smrg NIR_PASS_V(nir, nir_split_struct_vars, nir_var_function_temp); 4087ec681f3Smrg 4097ec681f3Smrg nir_optimize(nir, true); 4107ec681f3Smrg 4117ec681f3Smrg NIR_PASS_V(nir, nir_lower_load_const_to_scalar); 4127ec681f3Smrg 4137ec681f3Smrg /* Lower a bunch of stuff */ 4147ec681f3Smrg NIR_PASS_V(nir, nir_lower_var_copies); 4157ec681f3Smrg 4167ec681f3Smrg NIR_PASS_V(nir, nir_lower_indirect_derefs, nir_var_shader_in, UINT32_MAX); 4177ec681f3Smrg 4187ec681f3Smrg NIR_PASS_V(nir, nir_lower_indirect_derefs, 4197ec681f3Smrg nir_var_function_temp, 2); 4207ec681f3Smrg 4217ec681f3Smrg NIR_PASS_V(nir, nir_lower_array_deref_of_vec, 4227ec681f3Smrg nir_var_mem_ubo | nir_var_mem_ssbo, 4237ec681f3Smrg nir_lower_direct_array_deref_of_vec_load); 4247ec681f3Smrg 4257ec681f3Smrg NIR_PASS_V(nir, nir_lower_frexp); 4267ec681f3Smrg 4277ec681f3Smrg /* Get rid of split copies */ 4287ec681f3Smrg nir_optimize(nir, false); 4297ec681f3Smrg} 4307ec681f3Smrg 4317ec681f3Smrgstatic nir_shader * 4327ec681f3Smrgshader_module_compile_to_nir(struct v3dv_device *device, 4337ec681f3Smrg struct v3dv_pipeline_stage *stage) 4347ec681f3Smrg{ 4357ec681f3Smrg nir_shader *nir; 4367ec681f3Smrg const nir_shader_compiler_options *nir_options = &v3dv_nir_options; 4377ec681f3Smrg 4387ec681f3Smrg if (!stage->module->nir) { 4397ec681f3Smrg uint32_t *spirv = (uint32_t *) stage->module->data; 4407ec681f3Smrg assert(stage->module->size % 4 == 0); 4417ec681f3Smrg 4427ec681f3Smrg if (unlikely(V3D_DEBUG & V3D_DEBUG_DUMP_SPIRV)) 4437ec681f3Smrg v3dv_print_spirv(stage->module->data, stage->module->size, stderr); 4447ec681f3Smrg 4457ec681f3Smrg uint32_t num_spec_entries = 0; 4467ec681f3Smrg struct nir_spirv_specialization *spec_entries = 4477ec681f3Smrg vk_spec_info_to_nir_spirv(stage->spec_info, &num_spec_entries); 4487ec681f3Smrg const struct spirv_to_nir_options spirv_options = default_spirv_options; 4497ec681f3Smrg nir = spirv_to_nir(spirv, stage->module->size / 4, 4507ec681f3Smrg spec_entries, num_spec_entries, 4517ec681f3Smrg broadcom_shader_stage_to_gl(stage->stage), 4527ec681f3Smrg stage->entrypoint, 4537ec681f3Smrg &spirv_options, nir_options); 4547ec681f3Smrg assert(nir); 4557ec681f3Smrg nir_validate_shader(nir, "after spirv_to_nir"); 4567ec681f3Smrg free(spec_entries); 4577ec681f3Smrg } else { 4587ec681f3Smrg /* For NIR modules created by the driver we can't consume the NIR 4597ec681f3Smrg * directly, we need to clone it first, since ownership of the NIR code 4607ec681f3Smrg * (as with SPIR-V code for SPIR-V shaders), belongs to the creator 4617ec681f3Smrg * of the module and modules can be destroyed immediately after been used 4627ec681f3Smrg * to create pipelines. 4637ec681f3Smrg */ 4647ec681f3Smrg nir = nir_shader_clone(NULL, stage->module->nir); 4657ec681f3Smrg nir_validate_shader(nir, "nir module"); 4667ec681f3Smrg } 4677ec681f3Smrg assert(nir->info.stage == broadcom_shader_stage_to_gl(stage->stage)); 4687ec681f3Smrg 4697ec681f3Smrg const struct nir_lower_sysvals_to_varyings_options sysvals_to_varyings = { 4707ec681f3Smrg .frag_coord = true, 4717ec681f3Smrg .point_coord = true, 4727ec681f3Smrg }; 4737ec681f3Smrg NIR_PASS_V(nir, nir_lower_sysvals_to_varyings, &sysvals_to_varyings); 4747ec681f3Smrg 4757ec681f3Smrg if (unlikely(V3D_DEBUG & (V3D_DEBUG_NIR | 4767ec681f3Smrg v3d_debug_flag_for_shader_stage( 4777ec681f3Smrg broadcom_shader_stage_to_gl(stage->stage))))) { 4787ec681f3Smrg fprintf(stderr, "Initial form: %s prog %d NIR:\n", 4797ec681f3Smrg broadcom_shader_stage_name(stage->stage), 4807ec681f3Smrg stage->program_id); 4817ec681f3Smrg nir_print_shader(nir, stderr); 4827ec681f3Smrg fprintf(stderr, "\n"); 4837ec681f3Smrg } 4847ec681f3Smrg 4857ec681f3Smrg preprocess_nir(nir); 4867ec681f3Smrg 4877ec681f3Smrg return nir; 4887ec681f3Smrg} 4897ec681f3Smrg 4907ec681f3Smrgstatic int 4917ec681f3Smrgtype_size_vec4(const struct glsl_type *type, bool bindless) 4927ec681f3Smrg{ 4937ec681f3Smrg return glsl_count_attribute_slots(type, false); 4947ec681f3Smrg} 4957ec681f3Smrg 4967ec681f3Smrg/* FIXME: the number of parameters for this method is somewhat big. Perhaps 4977ec681f3Smrg * rethink. 4987ec681f3Smrg */ 4997ec681f3Smrgstatic unsigned 5007ec681f3Smrgdescriptor_map_add(struct v3dv_descriptor_map *map, 5017ec681f3Smrg int set, 5027ec681f3Smrg int binding, 5037ec681f3Smrg int array_index, 5047ec681f3Smrg int array_size, 5057ec681f3Smrg uint8_t return_size) 5067ec681f3Smrg{ 5077ec681f3Smrg assert(array_index < array_size); 5087ec681f3Smrg assert(return_size == 16 || return_size == 32); 5097ec681f3Smrg 5107ec681f3Smrg unsigned index = 0; 5117ec681f3Smrg for (unsigned i = 0; i < map->num_desc; i++) { 5127ec681f3Smrg if (set == map->set[i] && 5137ec681f3Smrg binding == map->binding[i] && 5147ec681f3Smrg array_index == map->array_index[i]) { 5157ec681f3Smrg assert(array_size == map->array_size[i]); 5167ec681f3Smrg if (return_size != map->return_size[index]) { 5177ec681f3Smrg /* It the return_size is different it means that the same sampler 5187ec681f3Smrg * was used for operations with different precision 5197ec681f3Smrg * requirement. In this case we need to ensure that we use the 5207ec681f3Smrg * larger one. 5217ec681f3Smrg */ 5227ec681f3Smrg map->return_size[index] = 32; 5237ec681f3Smrg } 5247ec681f3Smrg return index; 5257ec681f3Smrg } 5267ec681f3Smrg index++; 5277ec681f3Smrg } 5287ec681f3Smrg 5297ec681f3Smrg assert(index == map->num_desc); 5307ec681f3Smrg 5317ec681f3Smrg map->set[map->num_desc] = set; 5327ec681f3Smrg map->binding[map->num_desc] = binding; 5337ec681f3Smrg map->array_index[map->num_desc] = array_index; 5347ec681f3Smrg map->array_size[map->num_desc] = array_size; 5357ec681f3Smrg map->return_size[map->num_desc] = return_size; 5367ec681f3Smrg map->num_desc++; 5377ec681f3Smrg 5387ec681f3Smrg return index; 5397ec681f3Smrg} 5407ec681f3Smrg 5417ec681f3Smrg 5427ec681f3Smrgstatic void 5437ec681f3Smrglower_load_push_constant(nir_builder *b, nir_intrinsic_instr *instr, 5447ec681f3Smrg struct v3dv_pipeline *pipeline) 5457ec681f3Smrg{ 5467ec681f3Smrg assert(instr->intrinsic == nir_intrinsic_load_push_constant); 5477ec681f3Smrg instr->intrinsic = nir_intrinsic_load_uniform; 5487ec681f3Smrg} 5497ec681f3Smrg 5507ec681f3Smrgstatic struct v3dv_descriptor_map* 5517ec681f3Smrgpipeline_get_descriptor_map(struct v3dv_pipeline *pipeline, 5527ec681f3Smrg VkDescriptorType desc_type, 5537ec681f3Smrg gl_shader_stage gl_stage, 5547ec681f3Smrg bool is_sampler) 5557ec681f3Smrg{ 5567ec681f3Smrg enum broadcom_shader_stage broadcom_stage = 5577ec681f3Smrg gl_shader_stage_to_broadcom(gl_stage); 5587ec681f3Smrg 5597ec681f3Smrg assert(pipeline->shared_data && 5607ec681f3Smrg pipeline->shared_data->maps[broadcom_stage]); 5617ec681f3Smrg 5627ec681f3Smrg switch(desc_type) { 5637ec681f3Smrg case VK_DESCRIPTOR_TYPE_SAMPLER: 5647ec681f3Smrg return &pipeline->shared_data->maps[broadcom_stage]->sampler_map; 5657ec681f3Smrg case VK_DESCRIPTOR_TYPE_SAMPLED_IMAGE: 5667ec681f3Smrg case VK_DESCRIPTOR_TYPE_INPUT_ATTACHMENT: 5677ec681f3Smrg case VK_DESCRIPTOR_TYPE_STORAGE_IMAGE: 5687ec681f3Smrg case VK_DESCRIPTOR_TYPE_UNIFORM_TEXEL_BUFFER: 5697ec681f3Smrg case VK_DESCRIPTOR_TYPE_STORAGE_TEXEL_BUFFER: 5707ec681f3Smrg return &pipeline->shared_data->maps[broadcom_stage]->texture_map; 5717ec681f3Smrg case VK_DESCRIPTOR_TYPE_COMBINED_IMAGE_SAMPLER: 5727ec681f3Smrg return is_sampler ? 5737ec681f3Smrg &pipeline->shared_data->maps[broadcom_stage]->sampler_map : 5747ec681f3Smrg &pipeline->shared_data->maps[broadcom_stage]->texture_map; 5757ec681f3Smrg case VK_DESCRIPTOR_TYPE_UNIFORM_BUFFER: 5767ec681f3Smrg return &pipeline->shared_data->maps[broadcom_stage]->ubo_map; 5777ec681f3Smrg case VK_DESCRIPTOR_TYPE_STORAGE_BUFFER: 5787ec681f3Smrg return &pipeline->shared_data->maps[broadcom_stage]->ssbo_map; 5797ec681f3Smrg default: 5807ec681f3Smrg unreachable("Descriptor type unknown or not having a descriptor map"); 5817ec681f3Smrg } 5827ec681f3Smrg} 5837ec681f3Smrg 5847ec681f3Smrg/* Gathers info from the intrinsic (set and binding) and then lowers it so it 5857ec681f3Smrg * could be used by the v3d_compiler */ 5867ec681f3Smrgstatic void 5877ec681f3Smrglower_vulkan_resource_index(nir_builder *b, 5887ec681f3Smrg nir_intrinsic_instr *instr, 5897ec681f3Smrg nir_shader *shader, 5907ec681f3Smrg struct v3dv_pipeline *pipeline, 5917ec681f3Smrg const struct v3dv_pipeline_layout *layout) 5927ec681f3Smrg{ 5937ec681f3Smrg assert(instr->intrinsic == nir_intrinsic_vulkan_resource_index); 5947ec681f3Smrg 5957ec681f3Smrg nir_const_value *const_val = nir_src_as_const_value(instr->src[0]); 5967ec681f3Smrg 5977ec681f3Smrg unsigned set = nir_intrinsic_desc_set(instr); 5987ec681f3Smrg unsigned binding = nir_intrinsic_binding(instr); 5997ec681f3Smrg struct v3dv_descriptor_set_layout *set_layout = layout->set[set].layout; 6007ec681f3Smrg struct v3dv_descriptor_set_binding_layout *binding_layout = 6017ec681f3Smrg &set_layout->binding[binding]; 6027ec681f3Smrg unsigned index = 0; 6037ec681f3Smrg const VkDescriptorType desc_type = nir_intrinsic_desc_type(instr); 6047ec681f3Smrg 6057ec681f3Smrg switch (desc_type) { 6067ec681f3Smrg case VK_DESCRIPTOR_TYPE_UNIFORM_BUFFER: 6077ec681f3Smrg case VK_DESCRIPTOR_TYPE_STORAGE_BUFFER: { 6087ec681f3Smrg struct v3dv_descriptor_map *descriptor_map = 6097ec681f3Smrg pipeline_get_descriptor_map(pipeline, desc_type, shader->info.stage, false); 6107ec681f3Smrg 6117ec681f3Smrg if (!const_val) 6127ec681f3Smrg unreachable("non-constant vulkan_resource_index array index"); 6137ec681f3Smrg 6147ec681f3Smrg index = descriptor_map_add(descriptor_map, set, binding, 6157ec681f3Smrg const_val->u32, 6167ec681f3Smrg binding_layout->array_size, 6177ec681f3Smrg 32 /* return_size: doesn't really apply for this case */); 6187ec681f3Smrg 6197ec681f3Smrg if (desc_type == VK_DESCRIPTOR_TYPE_UNIFORM_BUFFER) { 6207ec681f3Smrg /* skip index 0 which is used for push constants */ 6217ec681f3Smrg index++; 6227ec681f3Smrg } 6237ec681f3Smrg break; 6247ec681f3Smrg } 6257ec681f3Smrg 6267ec681f3Smrg default: 6277ec681f3Smrg unreachable("unsupported desc_type for vulkan_resource_index"); 6287ec681f3Smrg break; 6297ec681f3Smrg } 6307ec681f3Smrg 6317ec681f3Smrg /* Since we use the deref pass, both vulkan_resource_index and 6327ec681f3Smrg * vulkan_load_descriptor return a vec2 providing an index and 6337ec681f3Smrg * offset. Our backend compiler only cares about the index part. 6347ec681f3Smrg */ 6357ec681f3Smrg nir_ssa_def_rewrite_uses(&instr->dest.ssa, 6367ec681f3Smrg nir_imm_ivec2(b, index, 0)); 6377ec681f3Smrg nir_instr_remove(&instr->instr); 6387ec681f3Smrg} 6397ec681f3Smrg 6407ec681f3Smrg/* Returns return_size, so it could be used for the case of not having a 6417ec681f3Smrg * sampler object 6427ec681f3Smrg */ 6437ec681f3Smrgstatic uint8_t 6447ec681f3Smrglower_tex_src_to_offset(nir_builder *b, nir_tex_instr *instr, unsigned src_idx, 6457ec681f3Smrg nir_shader *shader, 6467ec681f3Smrg struct v3dv_pipeline *pipeline, 6477ec681f3Smrg const struct v3dv_pipeline_layout *layout) 6487ec681f3Smrg{ 6497ec681f3Smrg nir_ssa_def *index = NULL; 6507ec681f3Smrg unsigned base_index = 0; 6517ec681f3Smrg unsigned array_elements = 1; 6527ec681f3Smrg nir_tex_src *src = &instr->src[src_idx]; 6537ec681f3Smrg bool is_sampler = src->src_type == nir_tex_src_sampler_deref; 6547ec681f3Smrg 6557ec681f3Smrg /* We compute first the offsets */ 6567ec681f3Smrg nir_deref_instr *deref = nir_instr_as_deref(src->src.ssa->parent_instr); 6577ec681f3Smrg while (deref->deref_type != nir_deref_type_var) { 6587ec681f3Smrg assert(deref->parent.is_ssa); 6597ec681f3Smrg nir_deref_instr *parent = 6607ec681f3Smrg nir_instr_as_deref(deref->parent.ssa->parent_instr); 6617ec681f3Smrg 6627ec681f3Smrg assert(deref->deref_type == nir_deref_type_array); 6637ec681f3Smrg 6647ec681f3Smrg if (nir_src_is_const(deref->arr.index) && index == NULL) { 6657ec681f3Smrg /* We're still building a direct index */ 6667ec681f3Smrg base_index += nir_src_as_uint(deref->arr.index) * array_elements; 6677ec681f3Smrg } else { 6687ec681f3Smrg if (index == NULL) { 6697ec681f3Smrg /* We used to be direct but not anymore */ 6707ec681f3Smrg index = nir_imm_int(b, base_index); 6717ec681f3Smrg base_index = 0; 6727ec681f3Smrg } 6737ec681f3Smrg 6747ec681f3Smrg index = nir_iadd(b, index, 6757ec681f3Smrg nir_imul(b, nir_imm_int(b, array_elements), 6767ec681f3Smrg nir_ssa_for_src(b, deref->arr.index, 1))); 6777ec681f3Smrg } 6787ec681f3Smrg 6797ec681f3Smrg array_elements *= glsl_get_length(parent->type); 6807ec681f3Smrg 6817ec681f3Smrg deref = parent; 6827ec681f3Smrg } 6837ec681f3Smrg 6847ec681f3Smrg if (index) 6857ec681f3Smrg index = nir_umin(b, index, nir_imm_int(b, array_elements - 1)); 6867ec681f3Smrg 6877ec681f3Smrg /* We have the offsets, we apply them, rewriting the source or removing 6887ec681f3Smrg * instr if needed 6897ec681f3Smrg */ 6907ec681f3Smrg if (index) { 6917ec681f3Smrg nir_instr_rewrite_src(&instr->instr, &src->src, 6927ec681f3Smrg nir_src_for_ssa(index)); 6937ec681f3Smrg 6947ec681f3Smrg src->src_type = is_sampler ? 6957ec681f3Smrg nir_tex_src_sampler_offset : 6967ec681f3Smrg nir_tex_src_texture_offset; 6977ec681f3Smrg } else { 6987ec681f3Smrg nir_tex_instr_remove_src(instr, src_idx); 6997ec681f3Smrg } 7007ec681f3Smrg 7017ec681f3Smrg uint32_t set = deref->var->data.descriptor_set; 7027ec681f3Smrg uint32_t binding = deref->var->data.binding; 7037ec681f3Smrg /* FIXME: this is a really simplified check for the precision to be used 7047ec681f3Smrg * for the sampling. Right now we are ony checking for the variables used 7057ec681f3Smrg * on the operation itself, but there are other cases that we could use to 7067ec681f3Smrg * infer the precision requirement. 7077ec681f3Smrg */ 7087ec681f3Smrg bool relaxed_precision = deref->var->data.precision == GLSL_PRECISION_MEDIUM || 7097ec681f3Smrg deref->var->data.precision == GLSL_PRECISION_LOW; 7107ec681f3Smrg struct v3dv_descriptor_set_layout *set_layout = layout->set[set].layout; 7117ec681f3Smrg struct v3dv_descriptor_set_binding_layout *binding_layout = 7127ec681f3Smrg &set_layout->binding[binding]; 7137ec681f3Smrg 7147ec681f3Smrg /* For input attachments, the shader includes the attachment_idx. As we are 7157ec681f3Smrg * treating them as a texture, we only want the base_index 7167ec681f3Smrg */ 7177ec681f3Smrg uint32_t array_index = binding_layout->type != VK_DESCRIPTOR_TYPE_INPUT_ATTACHMENT ? 7187ec681f3Smrg deref->var->data.index + base_index : 7197ec681f3Smrg base_index; 7207ec681f3Smrg 7217ec681f3Smrg uint8_t return_size; 7227ec681f3Smrg if (unlikely(V3D_DEBUG & V3D_DEBUG_TMU_16BIT)) 7237ec681f3Smrg return_size = 16; 7247ec681f3Smrg else if (unlikely(V3D_DEBUG & V3D_DEBUG_TMU_32BIT)) 7257ec681f3Smrg return_size = 32; 7267ec681f3Smrg else 7277ec681f3Smrg return_size = relaxed_precision || instr->is_shadow ? 16 : 32; 7287ec681f3Smrg 7297ec681f3Smrg struct v3dv_descriptor_map *map = 7307ec681f3Smrg pipeline_get_descriptor_map(pipeline, binding_layout->type, 7317ec681f3Smrg shader->info.stage, is_sampler); 7327ec681f3Smrg int desc_index = 7337ec681f3Smrg descriptor_map_add(map, 7347ec681f3Smrg deref->var->data.descriptor_set, 7357ec681f3Smrg deref->var->data.binding, 7367ec681f3Smrg array_index, 7377ec681f3Smrg binding_layout->array_size, 7387ec681f3Smrg return_size); 7397ec681f3Smrg 7407ec681f3Smrg if (is_sampler) 7417ec681f3Smrg instr->sampler_index = desc_index; 7427ec681f3Smrg else 7437ec681f3Smrg instr->texture_index = desc_index; 7447ec681f3Smrg 7457ec681f3Smrg return return_size; 7467ec681f3Smrg} 7477ec681f3Smrg 7487ec681f3Smrgstatic bool 7497ec681f3Smrglower_sampler(nir_builder *b, nir_tex_instr *instr, 7507ec681f3Smrg nir_shader *shader, 7517ec681f3Smrg struct v3dv_pipeline *pipeline, 7527ec681f3Smrg const struct v3dv_pipeline_layout *layout) 7537ec681f3Smrg{ 7547ec681f3Smrg uint8_t return_size = 0; 7557ec681f3Smrg 7567ec681f3Smrg int texture_idx = 7577ec681f3Smrg nir_tex_instr_src_index(instr, nir_tex_src_texture_deref); 7587ec681f3Smrg 7597ec681f3Smrg if (texture_idx >= 0) 7607ec681f3Smrg return_size = lower_tex_src_to_offset(b, instr, texture_idx, shader, 7617ec681f3Smrg pipeline, layout); 7627ec681f3Smrg 7637ec681f3Smrg int sampler_idx = 7647ec681f3Smrg nir_tex_instr_src_index(instr, nir_tex_src_sampler_deref); 7657ec681f3Smrg 7667ec681f3Smrg if (sampler_idx >= 0) 7677ec681f3Smrg lower_tex_src_to_offset(b, instr, sampler_idx, shader, pipeline, layout); 7687ec681f3Smrg 7697ec681f3Smrg if (texture_idx < 0 && sampler_idx < 0) 7707ec681f3Smrg return false; 7717ec681f3Smrg 7727ec681f3Smrg /* If we don't have a sampler, we assign it the idx we reserve for this 7737ec681f3Smrg * case, and we ensure that it is using the correct return size. 7747ec681f3Smrg */ 7757ec681f3Smrg if (sampler_idx < 0) { 7767ec681f3Smrg instr->sampler_index = return_size == 16 ? 7777ec681f3Smrg V3DV_NO_SAMPLER_16BIT_IDX : V3DV_NO_SAMPLER_32BIT_IDX; 7787ec681f3Smrg } 7797ec681f3Smrg 7807ec681f3Smrg return true; 7817ec681f3Smrg} 7827ec681f3Smrg 7837ec681f3Smrg/* FIXME: really similar to lower_tex_src_to_offset, perhaps refactor? */ 7847ec681f3Smrgstatic void 7857ec681f3Smrglower_image_deref(nir_builder *b, 7867ec681f3Smrg nir_intrinsic_instr *instr, 7877ec681f3Smrg nir_shader *shader, 7887ec681f3Smrg struct v3dv_pipeline *pipeline, 7897ec681f3Smrg const struct v3dv_pipeline_layout *layout) 7907ec681f3Smrg{ 7917ec681f3Smrg nir_deref_instr *deref = nir_src_as_deref(instr->src[0]); 7927ec681f3Smrg nir_ssa_def *index = NULL; 7937ec681f3Smrg unsigned array_elements = 1; 7947ec681f3Smrg unsigned base_index = 0; 7957ec681f3Smrg 7967ec681f3Smrg while (deref->deref_type != nir_deref_type_var) { 7977ec681f3Smrg assert(deref->parent.is_ssa); 7987ec681f3Smrg nir_deref_instr *parent = 7997ec681f3Smrg nir_instr_as_deref(deref->parent.ssa->parent_instr); 8007ec681f3Smrg 8017ec681f3Smrg assert(deref->deref_type == nir_deref_type_array); 8027ec681f3Smrg 8037ec681f3Smrg if (nir_src_is_const(deref->arr.index) && index == NULL) { 8047ec681f3Smrg /* We're still building a direct index */ 8057ec681f3Smrg base_index += nir_src_as_uint(deref->arr.index) * array_elements; 8067ec681f3Smrg } else { 8077ec681f3Smrg if (index == NULL) { 8087ec681f3Smrg /* We used to be direct but not anymore */ 8097ec681f3Smrg index = nir_imm_int(b, base_index); 8107ec681f3Smrg base_index = 0; 8117ec681f3Smrg } 8127ec681f3Smrg 8137ec681f3Smrg index = nir_iadd(b, index, 8147ec681f3Smrg nir_imul(b, nir_imm_int(b, array_elements), 8157ec681f3Smrg nir_ssa_for_src(b, deref->arr.index, 1))); 8167ec681f3Smrg } 8177ec681f3Smrg 8187ec681f3Smrg array_elements *= glsl_get_length(parent->type); 8197ec681f3Smrg 8207ec681f3Smrg deref = parent; 8217ec681f3Smrg } 8227ec681f3Smrg 8237ec681f3Smrg if (index) 8247ec681f3Smrg index = nir_umin(b, index, nir_imm_int(b, array_elements - 1)); 8257ec681f3Smrg 8267ec681f3Smrg uint32_t set = deref->var->data.descriptor_set; 8277ec681f3Smrg uint32_t binding = deref->var->data.binding; 8287ec681f3Smrg struct v3dv_descriptor_set_layout *set_layout = layout->set[set].layout; 8297ec681f3Smrg struct v3dv_descriptor_set_binding_layout *binding_layout = 8307ec681f3Smrg &set_layout->binding[binding]; 8317ec681f3Smrg 8327ec681f3Smrg uint32_t array_index = deref->var->data.index + base_index; 8337ec681f3Smrg 8347ec681f3Smrg assert(binding_layout->type == VK_DESCRIPTOR_TYPE_STORAGE_IMAGE || 8357ec681f3Smrg binding_layout->type == VK_DESCRIPTOR_TYPE_STORAGE_TEXEL_BUFFER); 8367ec681f3Smrg 8377ec681f3Smrg struct v3dv_descriptor_map *map = 8387ec681f3Smrg pipeline_get_descriptor_map(pipeline, binding_layout->type, 8397ec681f3Smrg shader->info.stage, false); 8407ec681f3Smrg 8417ec681f3Smrg int desc_index = 8427ec681f3Smrg descriptor_map_add(map, 8437ec681f3Smrg deref->var->data.descriptor_set, 8447ec681f3Smrg deref->var->data.binding, 8457ec681f3Smrg array_index, 8467ec681f3Smrg binding_layout->array_size, 8477ec681f3Smrg 32 /* return_size: doesn't apply for textures */); 8487ec681f3Smrg 8497ec681f3Smrg /* Note: we don't need to do anything here in relation to the precision and 8507ec681f3Smrg * the output size because for images we can infer that info from the image 8517ec681f3Smrg * intrinsic, that includes the image format (see 8527ec681f3Smrg * NIR_INTRINSIC_FORMAT). That is done by the v3d compiler. 8537ec681f3Smrg */ 8547ec681f3Smrg 8557ec681f3Smrg index = nir_imm_int(b, desc_index); 8567ec681f3Smrg 8577ec681f3Smrg nir_rewrite_image_intrinsic(instr, index, false); 8587ec681f3Smrg} 8597ec681f3Smrg 8607ec681f3Smrgstatic bool 8617ec681f3Smrglower_intrinsic(nir_builder *b, nir_intrinsic_instr *instr, 8627ec681f3Smrg nir_shader *shader, 8637ec681f3Smrg struct v3dv_pipeline *pipeline, 8647ec681f3Smrg const struct v3dv_pipeline_layout *layout) 8657ec681f3Smrg{ 8667ec681f3Smrg switch (instr->intrinsic) { 8677ec681f3Smrg case nir_intrinsic_load_layer_id: 8687ec681f3Smrg /* FIXME: if layered rendering gets supported, this would need a real 8697ec681f3Smrg * lowering 8707ec681f3Smrg */ 8717ec681f3Smrg nir_ssa_def_rewrite_uses(&instr->dest.ssa, 8727ec681f3Smrg nir_imm_int(b, 0)); 8737ec681f3Smrg nir_instr_remove(&instr->instr); 8747ec681f3Smrg return true; 8757ec681f3Smrg 8767ec681f3Smrg case nir_intrinsic_load_push_constant: 8777ec681f3Smrg lower_load_push_constant(b, instr, pipeline); 8787ec681f3Smrg return true; 8797ec681f3Smrg 8807ec681f3Smrg case nir_intrinsic_vulkan_resource_index: 8817ec681f3Smrg lower_vulkan_resource_index(b, instr, shader, pipeline, layout); 8827ec681f3Smrg return true; 8837ec681f3Smrg 8847ec681f3Smrg case nir_intrinsic_load_vulkan_descriptor: { 8857ec681f3Smrg /* Loading the descriptor happens as part of load/store instructions, 8867ec681f3Smrg * so for us this is a no-op. 8877ec681f3Smrg */ 8887ec681f3Smrg nir_ssa_def_rewrite_uses(&instr->dest.ssa, instr->src[0].ssa); 8897ec681f3Smrg nir_instr_remove(&instr->instr); 8907ec681f3Smrg return true; 8917ec681f3Smrg } 8927ec681f3Smrg 8937ec681f3Smrg case nir_intrinsic_image_deref_load: 8947ec681f3Smrg case nir_intrinsic_image_deref_store: 8957ec681f3Smrg case nir_intrinsic_image_deref_atomic_add: 8967ec681f3Smrg case nir_intrinsic_image_deref_atomic_imin: 8977ec681f3Smrg case nir_intrinsic_image_deref_atomic_umin: 8987ec681f3Smrg case nir_intrinsic_image_deref_atomic_imax: 8997ec681f3Smrg case nir_intrinsic_image_deref_atomic_umax: 9007ec681f3Smrg case nir_intrinsic_image_deref_atomic_and: 9017ec681f3Smrg case nir_intrinsic_image_deref_atomic_or: 9027ec681f3Smrg case nir_intrinsic_image_deref_atomic_xor: 9037ec681f3Smrg case nir_intrinsic_image_deref_atomic_exchange: 9047ec681f3Smrg case nir_intrinsic_image_deref_atomic_comp_swap: 9057ec681f3Smrg case nir_intrinsic_image_deref_size: 9067ec681f3Smrg case nir_intrinsic_image_deref_samples: 9077ec681f3Smrg lower_image_deref(b, instr, shader, pipeline, layout); 9087ec681f3Smrg return true; 9097ec681f3Smrg 9107ec681f3Smrg default: 9117ec681f3Smrg return false; 9127ec681f3Smrg } 9137ec681f3Smrg} 9147ec681f3Smrg 9157ec681f3Smrgstatic bool 9167ec681f3Smrglower_impl(nir_function_impl *impl, 9177ec681f3Smrg nir_shader *shader, 9187ec681f3Smrg struct v3dv_pipeline *pipeline, 9197ec681f3Smrg const struct v3dv_pipeline_layout *layout) 9207ec681f3Smrg{ 9217ec681f3Smrg nir_builder b; 9227ec681f3Smrg nir_builder_init(&b, impl); 9237ec681f3Smrg bool progress = false; 9247ec681f3Smrg 9257ec681f3Smrg nir_foreach_block(block, impl) { 9267ec681f3Smrg nir_foreach_instr_safe(instr, block) { 9277ec681f3Smrg b.cursor = nir_before_instr(instr); 9287ec681f3Smrg switch (instr->type) { 9297ec681f3Smrg case nir_instr_type_tex: 9307ec681f3Smrg progress |= 9317ec681f3Smrg lower_sampler(&b, nir_instr_as_tex(instr), shader, pipeline, layout); 9327ec681f3Smrg break; 9337ec681f3Smrg case nir_instr_type_intrinsic: 9347ec681f3Smrg progress |= 9357ec681f3Smrg lower_intrinsic(&b, nir_instr_as_intrinsic(instr), shader, 9367ec681f3Smrg pipeline, layout); 9377ec681f3Smrg break; 9387ec681f3Smrg default: 9397ec681f3Smrg break; 9407ec681f3Smrg } 9417ec681f3Smrg } 9427ec681f3Smrg } 9437ec681f3Smrg 9447ec681f3Smrg return progress; 9457ec681f3Smrg} 9467ec681f3Smrg 9477ec681f3Smrgstatic bool 9487ec681f3Smrglower_pipeline_layout_info(nir_shader *shader, 9497ec681f3Smrg struct v3dv_pipeline *pipeline, 9507ec681f3Smrg const struct v3dv_pipeline_layout *layout) 9517ec681f3Smrg{ 9527ec681f3Smrg bool progress = false; 9537ec681f3Smrg 9547ec681f3Smrg nir_foreach_function(function, shader) { 9557ec681f3Smrg if (function->impl) 9567ec681f3Smrg progress |= lower_impl(function->impl, shader, pipeline, layout); 9577ec681f3Smrg } 9587ec681f3Smrg 9597ec681f3Smrg return progress; 9607ec681f3Smrg} 9617ec681f3Smrg 9627ec681f3Smrg 9637ec681f3Smrgstatic void 9647ec681f3Smrglower_fs_io(nir_shader *nir) 9657ec681f3Smrg{ 9667ec681f3Smrg /* Our backend doesn't handle array fragment shader outputs */ 9677ec681f3Smrg NIR_PASS_V(nir, nir_lower_io_arrays_to_elements_no_indirects, false); 9687ec681f3Smrg NIR_PASS_V(nir, nir_remove_dead_variables, nir_var_shader_out, NULL); 9697ec681f3Smrg 9707ec681f3Smrg nir_assign_io_var_locations(nir, nir_var_shader_in, &nir->num_inputs, 9717ec681f3Smrg MESA_SHADER_FRAGMENT); 9727ec681f3Smrg 9737ec681f3Smrg nir_assign_io_var_locations(nir, nir_var_shader_out, &nir->num_outputs, 9747ec681f3Smrg MESA_SHADER_FRAGMENT); 9757ec681f3Smrg 9767ec681f3Smrg NIR_PASS_V(nir, nir_lower_io, nir_var_shader_in | nir_var_shader_out, 9777ec681f3Smrg type_size_vec4, 0); 9787ec681f3Smrg} 9797ec681f3Smrg 9807ec681f3Smrgstatic void 9817ec681f3Smrglower_gs_io(struct nir_shader *nir) 9827ec681f3Smrg{ 9837ec681f3Smrg NIR_PASS_V(nir, nir_lower_io_arrays_to_elements_no_indirects, false); 9847ec681f3Smrg 9857ec681f3Smrg nir_assign_io_var_locations(nir, nir_var_shader_in, &nir->num_inputs, 9867ec681f3Smrg MESA_SHADER_GEOMETRY); 9877ec681f3Smrg 9887ec681f3Smrg nir_assign_io_var_locations(nir, nir_var_shader_out, &nir->num_outputs, 9897ec681f3Smrg MESA_SHADER_GEOMETRY); 9907ec681f3Smrg} 9917ec681f3Smrg 9927ec681f3Smrgstatic void 9937ec681f3Smrglower_vs_io(struct nir_shader *nir) 9947ec681f3Smrg{ 9957ec681f3Smrg NIR_PASS_V(nir, nir_lower_io_arrays_to_elements_no_indirects, false); 9967ec681f3Smrg 9977ec681f3Smrg nir_assign_io_var_locations(nir, nir_var_shader_in, &nir->num_inputs, 9987ec681f3Smrg MESA_SHADER_VERTEX); 9997ec681f3Smrg 10007ec681f3Smrg nir_assign_io_var_locations(nir, nir_var_shader_out, &nir->num_outputs, 10017ec681f3Smrg MESA_SHADER_VERTEX); 10027ec681f3Smrg 10037ec681f3Smrg /* FIXME: if we call nir_lower_io, we get a crash later. Likely because it 10047ec681f3Smrg * overlaps with v3d_nir_lower_io. Need further research though. 10057ec681f3Smrg */ 10067ec681f3Smrg} 10077ec681f3Smrg 10087ec681f3Smrgstatic void 10097ec681f3Smrgshader_debug_output(const char *message, void *data) 10107ec681f3Smrg{ 10117ec681f3Smrg /* FIXME: We probably don't want to debug anything extra here, and in fact 10127ec681f3Smrg * the compiler is not using this callback too much, only as an alternative 10137ec681f3Smrg * way to debug out the shaderdb stats, that you can already get using 10147ec681f3Smrg * V3D_DEBUG=shaderdb. Perhaps it would make sense to revisit the v3d 10157ec681f3Smrg * compiler to remove that callback. 10167ec681f3Smrg */ 10177ec681f3Smrg} 10187ec681f3Smrg 10197ec681f3Smrgstatic void 10207ec681f3Smrgpipeline_populate_v3d_key(struct v3d_key *key, 10217ec681f3Smrg const struct v3dv_pipeline_stage *p_stage, 10227ec681f3Smrg uint32_t ucp_enables, 10237ec681f3Smrg bool robust_buffer_access) 10247ec681f3Smrg{ 10257ec681f3Smrg assert(p_stage->pipeline->shared_data && 10267ec681f3Smrg p_stage->pipeline->shared_data->maps[p_stage->stage]); 10277ec681f3Smrg 10287ec681f3Smrg /* The following values are default values used at pipeline create. We use 10297ec681f3Smrg * there 32 bit as default return size. 10307ec681f3Smrg */ 10317ec681f3Smrg struct v3dv_descriptor_map *sampler_map = 10327ec681f3Smrg &p_stage->pipeline->shared_data->maps[p_stage->stage]->sampler_map; 10337ec681f3Smrg struct v3dv_descriptor_map *texture_map = 10347ec681f3Smrg &p_stage->pipeline->shared_data->maps[p_stage->stage]->texture_map; 10357ec681f3Smrg 10367ec681f3Smrg key->num_tex_used = texture_map->num_desc; 10377ec681f3Smrg assert(key->num_tex_used <= V3D_MAX_TEXTURE_SAMPLERS); 10387ec681f3Smrg for (uint32_t tex_idx = 0; tex_idx < texture_map->num_desc; tex_idx++) { 10397ec681f3Smrg key->tex[tex_idx].swizzle[0] = PIPE_SWIZZLE_X; 10407ec681f3Smrg key->tex[tex_idx].swizzle[1] = PIPE_SWIZZLE_Y; 10417ec681f3Smrg key->tex[tex_idx].swizzle[2] = PIPE_SWIZZLE_Z; 10427ec681f3Smrg key->tex[tex_idx].swizzle[3] = PIPE_SWIZZLE_W; 10437ec681f3Smrg } 10447ec681f3Smrg 10457ec681f3Smrg key->num_samplers_used = sampler_map->num_desc; 10467ec681f3Smrg assert(key->num_samplers_used <= V3D_MAX_TEXTURE_SAMPLERS); 10477ec681f3Smrg for (uint32_t sampler_idx = 0; sampler_idx < sampler_map->num_desc; 10487ec681f3Smrg sampler_idx++) { 10497ec681f3Smrg key->sampler[sampler_idx].return_size = 10507ec681f3Smrg sampler_map->return_size[sampler_idx]; 10517ec681f3Smrg 10527ec681f3Smrg key->sampler[sampler_idx].return_channels = 10537ec681f3Smrg key->sampler[sampler_idx].return_size == 32 ? 4 : 2; 10547ec681f3Smrg } 10557ec681f3Smrg 10567ec681f3Smrg switch (p_stage->stage) { 10577ec681f3Smrg case BROADCOM_SHADER_VERTEX: 10587ec681f3Smrg case BROADCOM_SHADER_VERTEX_BIN: 10597ec681f3Smrg key->is_last_geometry_stage = p_stage->pipeline->gs == NULL; 10607ec681f3Smrg break; 10617ec681f3Smrg case BROADCOM_SHADER_GEOMETRY: 10627ec681f3Smrg case BROADCOM_SHADER_GEOMETRY_BIN: 10637ec681f3Smrg /* FIXME: while we don't implement tessellation shaders */ 10647ec681f3Smrg key->is_last_geometry_stage = true; 10657ec681f3Smrg break; 10667ec681f3Smrg case BROADCOM_SHADER_FRAGMENT: 10677ec681f3Smrg case BROADCOM_SHADER_COMPUTE: 10687ec681f3Smrg key->is_last_geometry_stage = false; 10697ec681f3Smrg break; 10707ec681f3Smrg default: 10717ec681f3Smrg unreachable("unsupported shader stage"); 10727ec681f3Smrg } 10737ec681f3Smrg 10747ec681f3Smrg /* Vulkan doesn't have fixed function state for user clip planes. Instead, 10757ec681f3Smrg * shaders can write to gl_ClipDistance[], in which case the SPIR-V compiler 10767ec681f3Smrg * takes care of adding a single compact array variable at 10777ec681f3Smrg * VARYING_SLOT_CLIP_DIST0, so we don't need any user clip plane lowering. 10787ec681f3Smrg * 10797ec681f3Smrg * The only lowering we are interested is specific to the fragment shader, 10807ec681f3Smrg * where we want to emit discards to honor writes to gl_ClipDistance[] in 10817ec681f3Smrg * previous stages. This is done via nir_lower_clip_fs() so we only set up 10827ec681f3Smrg * the ucp enable mask for that stage. 10837ec681f3Smrg */ 10847ec681f3Smrg key->ucp_enables = ucp_enables; 10857ec681f3Smrg 10867ec681f3Smrg key->robust_buffer_access = robust_buffer_access; 10877ec681f3Smrg 10887ec681f3Smrg key->environment = V3D_ENVIRONMENT_VULKAN; 10897ec681f3Smrg} 10907ec681f3Smrg 10917ec681f3Smrg/* FIXME: anv maps to hw primitive type. Perhaps eventually we would do the 10927ec681f3Smrg * same. For not using prim_mode that is the one already used on v3d 10937ec681f3Smrg */ 10947ec681f3Smrgstatic const enum pipe_prim_type vk_to_pipe_prim_type[] = { 10957ec681f3Smrg [VK_PRIMITIVE_TOPOLOGY_POINT_LIST] = PIPE_PRIM_POINTS, 10967ec681f3Smrg [VK_PRIMITIVE_TOPOLOGY_LINE_LIST] = PIPE_PRIM_LINES, 10977ec681f3Smrg [VK_PRIMITIVE_TOPOLOGY_LINE_STRIP] = PIPE_PRIM_LINE_STRIP, 10987ec681f3Smrg [VK_PRIMITIVE_TOPOLOGY_TRIANGLE_LIST] = PIPE_PRIM_TRIANGLES, 10997ec681f3Smrg [VK_PRIMITIVE_TOPOLOGY_TRIANGLE_STRIP] = PIPE_PRIM_TRIANGLE_STRIP, 11007ec681f3Smrg [VK_PRIMITIVE_TOPOLOGY_TRIANGLE_FAN] = PIPE_PRIM_TRIANGLE_FAN, 11017ec681f3Smrg [VK_PRIMITIVE_TOPOLOGY_LINE_LIST_WITH_ADJACENCY] = PIPE_PRIM_LINES_ADJACENCY, 11027ec681f3Smrg [VK_PRIMITIVE_TOPOLOGY_LINE_STRIP_WITH_ADJACENCY] = PIPE_PRIM_LINE_STRIP_ADJACENCY, 11037ec681f3Smrg [VK_PRIMITIVE_TOPOLOGY_TRIANGLE_LIST_WITH_ADJACENCY] = PIPE_PRIM_TRIANGLES_ADJACENCY, 11047ec681f3Smrg [VK_PRIMITIVE_TOPOLOGY_TRIANGLE_STRIP_WITH_ADJACENCY] = PIPE_PRIM_TRIANGLE_STRIP_ADJACENCY, 11057ec681f3Smrg}; 11067ec681f3Smrg 11077ec681f3Smrgstatic const enum pipe_logicop vk_to_pipe_logicop[] = { 11087ec681f3Smrg [VK_LOGIC_OP_CLEAR] = PIPE_LOGICOP_CLEAR, 11097ec681f3Smrg [VK_LOGIC_OP_AND] = PIPE_LOGICOP_AND, 11107ec681f3Smrg [VK_LOGIC_OP_AND_REVERSE] = PIPE_LOGICOP_AND_REVERSE, 11117ec681f3Smrg [VK_LOGIC_OP_COPY] = PIPE_LOGICOP_COPY, 11127ec681f3Smrg [VK_LOGIC_OP_AND_INVERTED] = PIPE_LOGICOP_AND_INVERTED, 11137ec681f3Smrg [VK_LOGIC_OP_NO_OP] = PIPE_LOGICOP_NOOP, 11147ec681f3Smrg [VK_LOGIC_OP_XOR] = PIPE_LOGICOP_XOR, 11157ec681f3Smrg [VK_LOGIC_OP_OR] = PIPE_LOGICOP_OR, 11167ec681f3Smrg [VK_LOGIC_OP_NOR] = PIPE_LOGICOP_NOR, 11177ec681f3Smrg [VK_LOGIC_OP_EQUIVALENT] = PIPE_LOGICOP_EQUIV, 11187ec681f3Smrg [VK_LOGIC_OP_INVERT] = PIPE_LOGICOP_INVERT, 11197ec681f3Smrg [VK_LOGIC_OP_OR_REVERSE] = PIPE_LOGICOP_OR_REVERSE, 11207ec681f3Smrg [VK_LOGIC_OP_COPY_INVERTED] = PIPE_LOGICOP_COPY_INVERTED, 11217ec681f3Smrg [VK_LOGIC_OP_OR_INVERTED] = PIPE_LOGICOP_OR_INVERTED, 11227ec681f3Smrg [VK_LOGIC_OP_NAND] = PIPE_LOGICOP_NAND, 11237ec681f3Smrg [VK_LOGIC_OP_SET] = PIPE_LOGICOP_SET, 11247ec681f3Smrg}; 11257ec681f3Smrg 11267ec681f3Smrgstatic void 11277ec681f3Smrgpipeline_populate_v3d_fs_key(struct v3d_fs_key *key, 11287ec681f3Smrg const VkGraphicsPipelineCreateInfo *pCreateInfo, 11297ec681f3Smrg const struct v3dv_pipeline_stage *p_stage, 11307ec681f3Smrg bool has_geometry_shader, 11317ec681f3Smrg uint32_t ucp_enables) 11327ec681f3Smrg{ 11337ec681f3Smrg assert(p_stage->stage == BROADCOM_SHADER_FRAGMENT); 11347ec681f3Smrg 11357ec681f3Smrg memset(key, 0, sizeof(*key)); 11367ec681f3Smrg 11377ec681f3Smrg const bool rba = p_stage->pipeline->device->features.robustBufferAccess; 11387ec681f3Smrg pipeline_populate_v3d_key(&key->base, p_stage, ucp_enables, rba); 11397ec681f3Smrg 11407ec681f3Smrg const VkPipelineInputAssemblyStateCreateInfo *ia_info = 11417ec681f3Smrg pCreateInfo->pInputAssemblyState; 11427ec681f3Smrg uint8_t topology = vk_to_pipe_prim_type[ia_info->topology]; 11437ec681f3Smrg 11447ec681f3Smrg key->is_points = (topology == PIPE_PRIM_POINTS); 11457ec681f3Smrg key->is_lines = (topology >= PIPE_PRIM_LINES && 11467ec681f3Smrg topology <= PIPE_PRIM_LINE_STRIP); 11477ec681f3Smrg key->has_gs = has_geometry_shader; 11487ec681f3Smrg 11497ec681f3Smrg const VkPipelineColorBlendStateCreateInfo *cb_info = 11507ec681f3Smrg !pCreateInfo->pRasterizationState->rasterizerDiscardEnable ? 11517ec681f3Smrg pCreateInfo->pColorBlendState : NULL; 11527ec681f3Smrg 11537ec681f3Smrg key->logicop_func = cb_info && cb_info->logicOpEnable == VK_TRUE ? 11547ec681f3Smrg vk_to_pipe_logicop[cb_info->logicOp] : 11557ec681f3Smrg PIPE_LOGICOP_COPY; 11567ec681f3Smrg 11577ec681f3Smrg const bool raster_enabled = 11587ec681f3Smrg !pCreateInfo->pRasterizationState->rasterizerDiscardEnable; 11597ec681f3Smrg 11607ec681f3Smrg /* Multisample rasterization state must be ignored if rasterization 11617ec681f3Smrg * is disabled. 11627ec681f3Smrg */ 11637ec681f3Smrg const VkPipelineMultisampleStateCreateInfo *ms_info = 11647ec681f3Smrg raster_enabled ? pCreateInfo->pMultisampleState : NULL; 11657ec681f3Smrg if (ms_info) { 11667ec681f3Smrg assert(ms_info->rasterizationSamples == VK_SAMPLE_COUNT_1_BIT || 11677ec681f3Smrg ms_info->rasterizationSamples == VK_SAMPLE_COUNT_4_BIT); 11687ec681f3Smrg key->msaa = ms_info->rasterizationSamples > VK_SAMPLE_COUNT_1_BIT; 11697ec681f3Smrg 11707ec681f3Smrg if (key->msaa) { 11717ec681f3Smrg key->sample_coverage = 11727ec681f3Smrg p_stage->pipeline->sample_mask != (1 << V3D_MAX_SAMPLES) - 1; 11737ec681f3Smrg key->sample_alpha_to_coverage = ms_info->alphaToCoverageEnable; 11747ec681f3Smrg key->sample_alpha_to_one = ms_info->alphaToOneEnable; 11757ec681f3Smrg } 11767ec681f3Smrg } 11777ec681f3Smrg 11787ec681f3Smrg /* This is intended for V3D versions before 4.1, otherwise we just use the 11797ec681f3Smrg * tile buffer load/store swap R/B bit. 11807ec681f3Smrg */ 11817ec681f3Smrg key->swap_color_rb = 0; 11827ec681f3Smrg 11837ec681f3Smrg const struct v3dv_render_pass *pass = 11847ec681f3Smrg v3dv_render_pass_from_handle(pCreateInfo->renderPass); 11857ec681f3Smrg const struct v3dv_subpass *subpass = p_stage->pipeline->subpass; 11867ec681f3Smrg for (uint32_t i = 0; i < subpass->color_count; i++) { 11877ec681f3Smrg const uint32_t att_idx = subpass->color_attachments[i].attachment; 11887ec681f3Smrg if (att_idx == VK_ATTACHMENT_UNUSED) 11897ec681f3Smrg continue; 11907ec681f3Smrg 11917ec681f3Smrg key->cbufs |= 1 << i; 11927ec681f3Smrg 11937ec681f3Smrg VkFormat fb_format = pass->attachments[att_idx].desc.format; 11947ec681f3Smrg enum pipe_format fb_pipe_format = vk_format_to_pipe_format(fb_format); 11957ec681f3Smrg 11967ec681f3Smrg /* If logic operations are enabled then we might emit color reads and we 11977ec681f3Smrg * need to know the color buffer format and swizzle for that 11987ec681f3Smrg */ 11997ec681f3Smrg if (key->logicop_func != PIPE_LOGICOP_COPY) { 12007ec681f3Smrg key->color_fmt[i].format = fb_pipe_format; 12017ec681f3Smrg key->color_fmt[i].swizzle = 12027ec681f3Smrg v3dv_get_format_swizzle(p_stage->pipeline->device, fb_format); 12037ec681f3Smrg } 12047ec681f3Smrg 12057ec681f3Smrg const struct util_format_description *desc = 12067ec681f3Smrg vk_format_description(fb_format); 12077ec681f3Smrg 12087ec681f3Smrg if (desc->channel[0].type == UTIL_FORMAT_TYPE_FLOAT && 12097ec681f3Smrg desc->channel[0].size == 32) { 12107ec681f3Smrg key->f32_color_rb |= 1 << i; 12117ec681f3Smrg } 12127ec681f3Smrg 12137ec681f3Smrg if (p_stage->nir->info.fs.untyped_color_outputs) { 12147ec681f3Smrg if (util_format_is_pure_uint(fb_pipe_format)) 12157ec681f3Smrg key->uint_color_rb |= 1 << i; 12167ec681f3Smrg else if (util_format_is_pure_sint(fb_pipe_format)) 12177ec681f3Smrg key->int_color_rb |= 1 << i; 12187ec681f3Smrg } 12197ec681f3Smrg 12207ec681f3Smrg if (key->is_points) { 12217ec681f3Smrg /* FIXME: The mask would need to be computed based on the shader 12227ec681f3Smrg * inputs. On gallium it is done at st_atom_rasterizer 12237ec681f3Smrg * (sprite_coord_enable). anv seems (need to confirm) to do that on 12247ec681f3Smrg * genX_pipeline (PointSpriteTextureCoordinateEnable). Would be also 12257ec681f3Smrg * better to have tests to guide filling the mask. 12267ec681f3Smrg */ 12277ec681f3Smrg key->point_sprite_mask = 0; 12287ec681f3Smrg 12297ec681f3Smrg /* Vulkan mandates upper left. */ 12307ec681f3Smrg key->point_coord_upper_left = true; 12317ec681f3Smrg } 12327ec681f3Smrg } 12337ec681f3Smrg} 12347ec681f3Smrg 12357ec681f3Smrgstatic void 12367ec681f3Smrgsetup_stage_outputs_from_next_stage_inputs( 12377ec681f3Smrg uint8_t next_stage_num_inputs, 12387ec681f3Smrg struct v3d_varying_slot *next_stage_input_slots, 12397ec681f3Smrg uint8_t *num_used_outputs, 12407ec681f3Smrg struct v3d_varying_slot *used_output_slots, 12417ec681f3Smrg uint32_t size_of_used_output_slots) 12427ec681f3Smrg{ 12437ec681f3Smrg *num_used_outputs = next_stage_num_inputs; 12447ec681f3Smrg memcpy(used_output_slots, next_stage_input_slots, size_of_used_output_slots); 12457ec681f3Smrg} 12467ec681f3Smrg 12477ec681f3Smrgstatic void 12487ec681f3Smrgpipeline_populate_v3d_gs_key(struct v3d_gs_key *key, 12497ec681f3Smrg const VkGraphicsPipelineCreateInfo *pCreateInfo, 12507ec681f3Smrg const struct v3dv_pipeline_stage *p_stage) 12517ec681f3Smrg{ 12527ec681f3Smrg assert(p_stage->stage == BROADCOM_SHADER_GEOMETRY || 12537ec681f3Smrg p_stage->stage == BROADCOM_SHADER_GEOMETRY_BIN); 12547ec681f3Smrg 12557ec681f3Smrg memset(key, 0, sizeof(*key)); 12567ec681f3Smrg 12577ec681f3Smrg const bool rba = p_stage->pipeline->device->features.robustBufferAccess; 12587ec681f3Smrg pipeline_populate_v3d_key(&key->base, p_stage, 0, rba); 12597ec681f3Smrg 12607ec681f3Smrg struct v3dv_pipeline *pipeline = p_stage->pipeline; 12617ec681f3Smrg 12627ec681f3Smrg key->per_vertex_point_size = 12637ec681f3Smrg p_stage->nir->info.outputs_written & (1ull << VARYING_SLOT_PSIZ); 12647ec681f3Smrg 12657ec681f3Smrg key->is_coord = broadcom_shader_stage_is_binning(p_stage->stage); 12667ec681f3Smrg 12677ec681f3Smrg assert(key->base.is_last_geometry_stage); 12687ec681f3Smrg if (key->is_coord) { 12697ec681f3Smrg /* Output varyings in the last binning shader are only used for transform 12707ec681f3Smrg * feedback. Set to 0 as VK_EXT_transform_feedback is not supported. 12717ec681f3Smrg */ 12727ec681f3Smrg key->num_used_outputs = 0; 12737ec681f3Smrg } else { 12747ec681f3Smrg struct v3dv_shader_variant *fs_variant = 12757ec681f3Smrg pipeline->shared_data->variants[BROADCOM_SHADER_FRAGMENT]; 12767ec681f3Smrg 12777ec681f3Smrg STATIC_ASSERT(sizeof(key->used_outputs) == 12787ec681f3Smrg sizeof(fs_variant->prog_data.fs->input_slots)); 12797ec681f3Smrg 12807ec681f3Smrg setup_stage_outputs_from_next_stage_inputs( 12817ec681f3Smrg fs_variant->prog_data.fs->num_inputs, 12827ec681f3Smrg fs_variant->prog_data.fs->input_slots, 12837ec681f3Smrg &key->num_used_outputs, 12847ec681f3Smrg key->used_outputs, 12857ec681f3Smrg sizeof(key->used_outputs)); 12867ec681f3Smrg } 12877ec681f3Smrg} 12887ec681f3Smrg 12897ec681f3Smrgstatic void 12907ec681f3Smrgpipeline_populate_v3d_vs_key(struct v3d_vs_key *key, 12917ec681f3Smrg const VkGraphicsPipelineCreateInfo *pCreateInfo, 12927ec681f3Smrg const struct v3dv_pipeline_stage *p_stage) 12937ec681f3Smrg{ 12947ec681f3Smrg assert(p_stage->stage == BROADCOM_SHADER_VERTEX || 12957ec681f3Smrg p_stage->stage == BROADCOM_SHADER_VERTEX_BIN); 12967ec681f3Smrg 12977ec681f3Smrg memset(key, 0, sizeof(*key)); 12987ec681f3Smrg 12997ec681f3Smrg const bool rba = p_stage->pipeline->device->features.robustBufferAccess; 13007ec681f3Smrg pipeline_populate_v3d_key(&key->base, p_stage, 0, rba); 13017ec681f3Smrg 13027ec681f3Smrg struct v3dv_pipeline *pipeline = p_stage->pipeline; 13037ec681f3Smrg 13047ec681f3Smrg /* Vulkan specifies a point size per vertex, so true for if the prim are 13057ec681f3Smrg * points, like on ES2) 13067ec681f3Smrg */ 13077ec681f3Smrg const VkPipelineInputAssemblyStateCreateInfo *ia_info = 13087ec681f3Smrg pCreateInfo->pInputAssemblyState; 13097ec681f3Smrg uint8_t topology = vk_to_pipe_prim_type[ia_info->topology]; 13107ec681f3Smrg 13117ec681f3Smrg /* FIXME: PRIM_POINTS is not enough, in gallium the full check is 13127ec681f3Smrg * PIPE_PRIM_POINTS && v3d->rasterizer->base.point_size_per_vertex */ 13137ec681f3Smrg key->per_vertex_point_size = (topology == PIPE_PRIM_POINTS); 13147ec681f3Smrg 13157ec681f3Smrg key->is_coord = broadcom_shader_stage_is_binning(p_stage->stage); 13167ec681f3Smrg 13177ec681f3Smrg if (key->is_coord) { /* Binning VS*/ 13187ec681f3Smrg if (key->base.is_last_geometry_stage) { 13197ec681f3Smrg /* Output varyings in the last binning shader are only used for 13207ec681f3Smrg * transform feedback. Set to 0 as VK_EXT_transform_feedback is not 13217ec681f3Smrg * supported. 13227ec681f3Smrg */ 13237ec681f3Smrg key->num_used_outputs = 0; 13247ec681f3Smrg } else { 13257ec681f3Smrg /* Linking against GS binning program */ 13267ec681f3Smrg assert(pipeline->gs); 13277ec681f3Smrg struct v3dv_shader_variant *gs_bin_variant = 13287ec681f3Smrg pipeline->shared_data->variants[BROADCOM_SHADER_GEOMETRY_BIN]; 13297ec681f3Smrg 13307ec681f3Smrg STATIC_ASSERT(sizeof(key->used_outputs) == 13317ec681f3Smrg sizeof(gs_bin_variant->prog_data.gs->input_slots)); 13327ec681f3Smrg 13337ec681f3Smrg setup_stage_outputs_from_next_stage_inputs( 13347ec681f3Smrg gs_bin_variant->prog_data.gs->num_inputs, 13357ec681f3Smrg gs_bin_variant->prog_data.gs->input_slots, 13367ec681f3Smrg &key->num_used_outputs, 13377ec681f3Smrg key->used_outputs, 13387ec681f3Smrg sizeof(key->used_outputs)); 13397ec681f3Smrg } 13407ec681f3Smrg } else { /* Render VS */ 13417ec681f3Smrg if (pipeline->gs) { 13427ec681f3Smrg /* Linking against GS render program */ 13437ec681f3Smrg struct v3dv_shader_variant *gs_variant = 13447ec681f3Smrg pipeline->shared_data->variants[BROADCOM_SHADER_GEOMETRY]; 13457ec681f3Smrg 13467ec681f3Smrg STATIC_ASSERT(sizeof(key->used_outputs) == 13477ec681f3Smrg sizeof(gs_variant->prog_data.gs->input_slots)); 13487ec681f3Smrg 13497ec681f3Smrg setup_stage_outputs_from_next_stage_inputs( 13507ec681f3Smrg gs_variant->prog_data.gs->num_inputs, 13517ec681f3Smrg gs_variant->prog_data.gs->input_slots, 13527ec681f3Smrg &key->num_used_outputs, 13537ec681f3Smrg key->used_outputs, 13547ec681f3Smrg sizeof(key->used_outputs)); 13557ec681f3Smrg } else { 13567ec681f3Smrg /* Linking against FS program */ 13577ec681f3Smrg struct v3dv_shader_variant *fs_variant = 13587ec681f3Smrg pipeline->shared_data->variants[BROADCOM_SHADER_FRAGMENT]; 13597ec681f3Smrg 13607ec681f3Smrg STATIC_ASSERT(sizeof(key->used_outputs) == 13617ec681f3Smrg sizeof(fs_variant->prog_data.fs->input_slots)); 13627ec681f3Smrg 13637ec681f3Smrg setup_stage_outputs_from_next_stage_inputs( 13647ec681f3Smrg fs_variant->prog_data.fs->num_inputs, 13657ec681f3Smrg fs_variant->prog_data.fs->input_slots, 13667ec681f3Smrg &key->num_used_outputs, 13677ec681f3Smrg key->used_outputs, 13687ec681f3Smrg sizeof(key->used_outputs)); 13697ec681f3Smrg } 13707ec681f3Smrg } 13717ec681f3Smrg 13727ec681f3Smrg const VkPipelineVertexInputStateCreateInfo *vi_info = 13737ec681f3Smrg pCreateInfo->pVertexInputState; 13747ec681f3Smrg for (uint32_t i = 0; i < vi_info->vertexAttributeDescriptionCount; i++) { 13757ec681f3Smrg const VkVertexInputAttributeDescription *desc = 13767ec681f3Smrg &vi_info->pVertexAttributeDescriptions[i]; 13777ec681f3Smrg assert(desc->location < MAX_VERTEX_ATTRIBS); 13787ec681f3Smrg if (desc->format == VK_FORMAT_B8G8R8A8_UNORM) 13797ec681f3Smrg key->va_swap_rb_mask |= 1 << (VERT_ATTRIB_GENERIC0 + desc->location); 13807ec681f3Smrg } 13817ec681f3Smrg} 13827ec681f3Smrg 13837ec681f3Smrg/** 13847ec681f3Smrg * Creates the initial form of the pipeline stage for a binning shader by 13857ec681f3Smrg * cloning the render shader and flagging it as a coordinate shader. 13867ec681f3Smrg * 13877ec681f3Smrg * Returns NULL if it was not able to allocate the object, so it should be 13887ec681f3Smrg * handled as a VK_ERROR_OUT_OF_HOST_MEMORY error. 13897ec681f3Smrg */ 13907ec681f3Smrgstatic struct v3dv_pipeline_stage * 13917ec681f3Smrgpipeline_stage_create_binning(const struct v3dv_pipeline_stage *src, 13927ec681f3Smrg const VkAllocationCallbacks *pAllocator) 13937ec681f3Smrg{ 13947ec681f3Smrg struct v3dv_device *device = src->pipeline->device; 13957ec681f3Smrg 13967ec681f3Smrg struct v3dv_pipeline_stage *p_stage = 13977ec681f3Smrg vk_zalloc2(&device->vk.alloc, pAllocator, sizeof(*p_stage), 8, 13987ec681f3Smrg VK_SYSTEM_ALLOCATION_SCOPE_OBJECT); 13997ec681f3Smrg 14007ec681f3Smrg if (p_stage == NULL) 14017ec681f3Smrg return NULL; 14027ec681f3Smrg 14037ec681f3Smrg assert(src->stage == BROADCOM_SHADER_VERTEX || 14047ec681f3Smrg src->stage == BROADCOM_SHADER_GEOMETRY); 14057ec681f3Smrg 14067ec681f3Smrg enum broadcom_shader_stage bin_stage = 14077ec681f3Smrg src->stage == BROADCOM_SHADER_VERTEX ? 14087ec681f3Smrg BROADCOM_SHADER_VERTEX_BIN : 14097ec681f3Smrg BROADCOM_SHADER_GEOMETRY_BIN; 14107ec681f3Smrg 14117ec681f3Smrg p_stage->pipeline = src->pipeline; 14127ec681f3Smrg p_stage->stage = bin_stage; 14137ec681f3Smrg p_stage->entrypoint = src->entrypoint; 14147ec681f3Smrg p_stage->module = src->module; 14157ec681f3Smrg /* For binning shaders we will clone the NIR code from the corresponding 14167ec681f3Smrg * render shader later, when we call pipeline_compile_xxx_shader. This way 14177ec681f3Smrg * we only have to run the relevant NIR lowerings once for render shaders 14187ec681f3Smrg */ 14197ec681f3Smrg p_stage->nir = NULL; 14207ec681f3Smrg p_stage->spec_info = src->spec_info; 14217ec681f3Smrg p_stage->feedback = (VkPipelineCreationFeedbackEXT) { 0 }; 14227ec681f3Smrg memcpy(p_stage->shader_sha1, src->shader_sha1, 20); 14237ec681f3Smrg 14247ec681f3Smrg return p_stage; 14257ec681f3Smrg} 14267ec681f3Smrg 14277ec681f3Smrg/** 14287ec681f3Smrg * Returns false if it was not able to allocate or map the assembly bo memory. 14297ec681f3Smrg */ 14307ec681f3Smrgstatic bool 14317ec681f3Smrgupload_assembly(struct v3dv_pipeline *pipeline) 14327ec681f3Smrg{ 14337ec681f3Smrg uint32_t total_size = 0; 14347ec681f3Smrg for (uint8_t stage = 0; stage < BROADCOM_SHADER_STAGES; stage++) { 14357ec681f3Smrg struct v3dv_shader_variant *variant = 14367ec681f3Smrg pipeline->shared_data->variants[stage]; 14377ec681f3Smrg 14387ec681f3Smrg if (variant != NULL) 14397ec681f3Smrg total_size += variant->qpu_insts_size; 14407ec681f3Smrg } 14417ec681f3Smrg 14427ec681f3Smrg struct v3dv_bo *bo = v3dv_bo_alloc(pipeline->device, total_size, 14437ec681f3Smrg "pipeline shader assembly", true); 14447ec681f3Smrg if (!bo) { 14457ec681f3Smrg fprintf(stderr, "failed to allocate memory for shader\n"); 14467ec681f3Smrg return false; 14477ec681f3Smrg } 14487ec681f3Smrg 14497ec681f3Smrg bool ok = v3dv_bo_map(pipeline->device, bo, total_size); 14507ec681f3Smrg if (!ok) { 14517ec681f3Smrg fprintf(stderr, "failed to map source shader buffer\n"); 14527ec681f3Smrg return false; 14537ec681f3Smrg } 14547ec681f3Smrg 14557ec681f3Smrg uint32_t offset = 0; 14567ec681f3Smrg for (uint8_t stage = 0; stage < BROADCOM_SHADER_STAGES; stage++) { 14577ec681f3Smrg struct v3dv_shader_variant *variant = 14587ec681f3Smrg pipeline->shared_data->variants[stage]; 14597ec681f3Smrg 14607ec681f3Smrg if (variant != NULL) { 14617ec681f3Smrg variant->assembly_offset = offset; 14627ec681f3Smrg 14637ec681f3Smrg memcpy(bo->map + offset, variant->qpu_insts, variant->qpu_insts_size); 14647ec681f3Smrg offset += variant->qpu_insts_size; 14657ec681f3Smrg 14667ec681f3Smrg /* We dont need qpu_insts anymore. */ 14677ec681f3Smrg free(variant->qpu_insts); 14687ec681f3Smrg variant->qpu_insts = NULL; 14697ec681f3Smrg } 14707ec681f3Smrg } 14717ec681f3Smrg assert(total_size == offset); 14727ec681f3Smrg 14737ec681f3Smrg pipeline->shared_data->assembly_bo = bo; 14747ec681f3Smrg 14757ec681f3Smrg return true; 14767ec681f3Smrg} 14777ec681f3Smrg 14787ec681f3Smrgstatic void 14797ec681f3Smrgpipeline_hash_graphics(const struct v3dv_pipeline *pipeline, 14807ec681f3Smrg struct v3dv_pipeline_key *key, 14817ec681f3Smrg unsigned char *sha1_out) 14827ec681f3Smrg{ 14837ec681f3Smrg struct mesa_sha1 ctx; 14847ec681f3Smrg _mesa_sha1_init(&ctx); 14857ec681f3Smrg 14867ec681f3Smrg /* We need to include all shader stages in the sha1 key as linking may modify 14877ec681f3Smrg * the shader code in any stage. An alternative would be to use the 14887ec681f3Smrg * serialized NIR, but that seems like an overkill. 14897ec681f3Smrg */ 14907ec681f3Smrg _mesa_sha1_update(&ctx, pipeline->vs->shader_sha1, 14917ec681f3Smrg sizeof(pipeline->vs->shader_sha1)); 14927ec681f3Smrg 14937ec681f3Smrg if (pipeline->gs) { 14947ec681f3Smrg _mesa_sha1_update(&ctx, pipeline->gs->shader_sha1, 14957ec681f3Smrg sizeof(pipeline->gs->shader_sha1)); 14967ec681f3Smrg } 14977ec681f3Smrg 14987ec681f3Smrg _mesa_sha1_update(&ctx, pipeline->fs->shader_sha1, 14997ec681f3Smrg sizeof(pipeline->fs->shader_sha1)); 15007ec681f3Smrg 15017ec681f3Smrg _mesa_sha1_update(&ctx, key, sizeof(struct v3dv_pipeline_key)); 15027ec681f3Smrg 15037ec681f3Smrg _mesa_sha1_final(&ctx, sha1_out); 15047ec681f3Smrg} 15057ec681f3Smrg 15067ec681f3Smrgstatic void 15077ec681f3Smrgpipeline_hash_compute(const struct v3dv_pipeline *pipeline, 15087ec681f3Smrg struct v3dv_pipeline_key *key, 15097ec681f3Smrg unsigned char *sha1_out) 15107ec681f3Smrg{ 15117ec681f3Smrg struct mesa_sha1 ctx; 15127ec681f3Smrg _mesa_sha1_init(&ctx); 15137ec681f3Smrg 15147ec681f3Smrg _mesa_sha1_update(&ctx, pipeline->cs->shader_sha1, 15157ec681f3Smrg sizeof(pipeline->cs->shader_sha1)); 15167ec681f3Smrg 15177ec681f3Smrg _mesa_sha1_update(&ctx, key, sizeof(struct v3dv_pipeline_key)); 15187ec681f3Smrg 15197ec681f3Smrg _mesa_sha1_final(&ctx, sha1_out); 15207ec681f3Smrg} 15217ec681f3Smrg 15227ec681f3Smrg/* Checks that the pipeline has enough spill size to use for any of their 15237ec681f3Smrg * variants 15247ec681f3Smrg */ 15257ec681f3Smrgstatic void 15267ec681f3Smrgpipeline_check_spill_size(struct v3dv_pipeline *pipeline) 15277ec681f3Smrg{ 15287ec681f3Smrg uint32_t max_spill_size = 0; 15297ec681f3Smrg 15307ec681f3Smrg for(uint8_t stage = 0; stage < BROADCOM_SHADER_STAGES; stage++) { 15317ec681f3Smrg struct v3dv_shader_variant *variant = 15327ec681f3Smrg pipeline->shared_data->variants[stage]; 15337ec681f3Smrg 15347ec681f3Smrg if (variant != NULL) { 15357ec681f3Smrg max_spill_size = MAX2(variant->prog_data.base->spill_size, 15367ec681f3Smrg max_spill_size); 15377ec681f3Smrg } 15387ec681f3Smrg } 15397ec681f3Smrg 15407ec681f3Smrg if (max_spill_size > 0) { 15417ec681f3Smrg struct v3dv_device *device = pipeline->device; 15427ec681f3Smrg 15437ec681f3Smrg /* The TIDX register we use for choosing the area to access 15447ec681f3Smrg * for scratch space is: (core << 6) | (qpu << 2) | thread. 15457ec681f3Smrg * Even at minimum threadcount in a particular shader, that 15467ec681f3Smrg * means we still multiply by qpus by 4. 15477ec681f3Smrg */ 15487ec681f3Smrg const uint32_t total_spill_size = 15497ec681f3Smrg 4 * device->devinfo.qpu_count * max_spill_size; 15507ec681f3Smrg if (pipeline->spill.bo) { 15517ec681f3Smrg assert(pipeline->spill.size_per_thread > 0); 15527ec681f3Smrg v3dv_bo_free(device, pipeline->spill.bo); 15537ec681f3Smrg } 15547ec681f3Smrg pipeline->spill.bo = 15557ec681f3Smrg v3dv_bo_alloc(device, total_spill_size, "spill", true); 15567ec681f3Smrg pipeline->spill.size_per_thread = max_spill_size; 15577ec681f3Smrg } 15587ec681f3Smrg} 15597ec681f3Smrg 15607ec681f3Smrg/** 15617ec681f3Smrg * Creates a new shader_variant_create. Note that for prog_data is not const, 15627ec681f3Smrg * so it is assumed that the caller will prove a pointer that the 15637ec681f3Smrg * shader_variant will own. 15647ec681f3Smrg * 15657ec681f3Smrg * Creation doesn't include allocate a BD to store the content of qpu_insts, 15667ec681f3Smrg * as we will try to share the same bo for several shader variants. Also note 15677ec681f3Smrg * that qpu_ints being NULL is valid, for example if we are creating the 15687ec681f3Smrg * shader_variants from the cache, so we can just upload the assembly of all 15697ec681f3Smrg * the shader stages at once. 15707ec681f3Smrg */ 15717ec681f3Smrgstruct v3dv_shader_variant * 15727ec681f3Smrgv3dv_shader_variant_create(struct v3dv_device *device, 15737ec681f3Smrg enum broadcom_shader_stage stage, 15747ec681f3Smrg struct v3d_prog_data *prog_data, 15757ec681f3Smrg uint32_t prog_data_size, 15767ec681f3Smrg uint32_t assembly_offset, 15777ec681f3Smrg uint64_t *qpu_insts, 15787ec681f3Smrg uint32_t qpu_insts_size, 15797ec681f3Smrg VkResult *out_vk_result) 15807ec681f3Smrg{ 15817ec681f3Smrg struct v3dv_shader_variant *variant = 15827ec681f3Smrg vk_zalloc(&device->vk.alloc, sizeof(*variant), 8, 15837ec681f3Smrg VK_SYSTEM_ALLOCATION_SCOPE_OBJECT); 15847ec681f3Smrg 15857ec681f3Smrg if (variant == NULL) { 15867ec681f3Smrg *out_vk_result = VK_ERROR_OUT_OF_HOST_MEMORY; 15877ec681f3Smrg return NULL; 15887ec681f3Smrg } 15897ec681f3Smrg 15907ec681f3Smrg variant->stage = stage; 15917ec681f3Smrg variant->prog_data_size = prog_data_size; 15927ec681f3Smrg variant->prog_data.base = prog_data; 15937ec681f3Smrg 15947ec681f3Smrg variant->assembly_offset = assembly_offset; 15957ec681f3Smrg variant->qpu_insts_size = qpu_insts_size; 15967ec681f3Smrg variant->qpu_insts = qpu_insts; 15977ec681f3Smrg 15987ec681f3Smrg *out_vk_result = VK_SUCCESS; 15997ec681f3Smrg 16007ec681f3Smrg return variant; 16017ec681f3Smrg} 16027ec681f3Smrg 16037ec681f3Smrg/* For a given key, it returns the compiled version of the shader. Returns a 16047ec681f3Smrg * new reference to the shader_variant to the caller, or NULL. 16057ec681f3Smrg * 16067ec681f3Smrg * If the method returns NULL it means that something wrong happened: 16077ec681f3Smrg * * Not enough memory: this is one of the possible outcomes defined by 16087ec681f3Smrg * vkCreateXXXPipelines. out_vk_result will return the proper oom error. 16097ec681f3Smrg * * Compilation error: hypothetically this shouldn't happen, as the spec 16107ec681f3Smrg * states that vkShaderModule needs to be created with a valid SPIR-V, so 16117ec681f3Smrg * any compilation failure is a driver bug. In the practice, something as 16127ec681f3Smrg * common as failing to register allocate can lead to a compilation 16137ec681f3Smrg * failure. In that case the only option (for any driver) is 16147ec681f3Smrg * VK_ERROR_UNKNOWN, even if we know that the problem was a compiler 16157ec681f3Smrg * error. 16167ec681f3Smrg */ 16177ec681f3Smrgstatic struct v3dv_shader_variant * 16187ec681f3Smrgpipeline_compile_shader_variant(struct v3dv_pipeline_stage *p_stage, 16197ec681f3Smrg struct v3d_key *key, 16207ec681f3Smrg size_t key_size, 16217ec681f3Smrg const VkAllocationCallbacks *pAllocator, 16227ec681f3Smrg VkResult *out_vk_result) 16237ec681f3Smrg{ 16247ec681f3Smrg int64_t stage_start = os_time_get_nano(); 16257ec681f3Smrg 16267ec681f3Smrg struct v3dv_pipeline *pipeline = p_stage->pipeline; 16277ec681f3Smrg struct v3dv_physical_device *physical_device = 16287ec681f3Smrg &pipeline->device->instance->physicalDevice; 16297ec681f3Smrg const struct v3d_compiler *compiler = physical_device->compiler; 16307ec681f3Smrg 16317ec681f3Smrg if (unlikely(V3D_DEBUG & (V3D_DEBUG_NIR | 16327ec681f3Smrg v3d_debug_flag_for_shader_stage 16337ec681f3Smrg (broadcom_shader_stage_to_gl(p_stage->stage))))) { 16347ec681f3Smrg fprintf(stderr, "Just before v3d_compile: %s prog %d NIR:\n", 16357ec681f3Smrg broadcom_shader_stage_name(p_stage->stage), 16367ec681f3Smrg p_stage->program_id); 16377ec681f3Smrg nir_print_shader(p_stage->nir, stderr); 16387ec681f3Smrg fprintf(stderr, "\n"); 16397ec681f3Smrg } 16407ec681f3Smrg 16417ec681f3Smrg uint64_t *qpu_insts; 16427ec681f3Smrg uint32_t qpu_insts_size; 16437ec681f3Smrg struct v3d_prog_data *prog_data; 16447ec681f3Smrg uint32_t prog_data_size = 16457ec681f3Smrg v3d_prog_data_size(broadcom_shader_stage_to_gl(p_stage->stage)); 16467ec681f3Smrg 16477ec681f3Smrg qpu_insts = v3d_compile(compiler, 16487ec681f3Smrg key, &prog_data, 16497ec681f3Smrg p_stage->nir, 16507ec681f3Smrg shader_debug_output, NULL, 16517ec681f3Smrg p_stage->program_id, 0, 16527ec681f3Smrg &qpu_insts_size); 16537ec681f3Smrg 16547ec681f3Smrg struct v3dv_shader_variant *variant = NULL; 16557ec681f3Smrg 16567ec681f3Smrg if (!qpu_insts) { 16577ec681f3Smrg fprintf(stderr, "Failed to compile %s prog %d NIR to VIR\n", 16587ec681f3Smrg gl_shader_stage_name(p_stage->stage), 16597ec681f3Smrg p_stage->program_id); 16607ec681f3Smrg *out_vk_result = VK_ERROR_UNKNOWN; 16617ec681f3Smrg } else { 16627ec681f3Smrg variant = 16637ec681f3Smrg v3dv_shader_variant_create(pipeline->device, p_stage->stage, 16647ec681f3Smrg prog_data, prog_data_size, 16657ec681f3Smrg 0, /* assembly_offset, no final value yet */ 16667ec681f3Smrg qpu_insts, qpu_insts_size, 16677ec681f3Smrg out_vk_result); 16687ec681f3Smrg } 16697ec681f3Smrg /* At this point we don't need anymore the nir shader, but we are freeing 16707ec681f3Smrg * all the temporary p_stage structs used during the pipeline creation when 16717ec681f3Smrg * we finish it, so let's not worry about freeing the nir here. 16727ec681f3Smrg */ 16737ec681f3Smrg 16747ec681f3Smrg p_stage->feedback.duration += os_time_get_nano() - stage_start; 16757ec681f3Smrg 16767ec681f3Smrg return variant; 16777ec681f3Smrg} 16787ec681f3Smrg 16797ec681f3Smrg/* FIXME: C&P from st, common place? */ 16807ec681f3Smrgstatic void 16817ec681f3Smrgst_nir_opts(nir_shader *nir) 16827ec681f3Smrg{ 16837ec681f3Smrg bool progress; 16847ec681f3Smrg 16857ec681f3Smrg do { 16867ec681f3Smrg progress = false; 16877ec681f3Smrg 16887ec681f3Smrg NIR_PASS_V(nir, nir_lower_vars_to_ssa); 16897ec681f3Smrg 16907ec681f3Smrg /* Linking deals with unused inputs/outputs, but here we can remove 16917ec681f3Smrg * things local to the shader in the hopes that we can cleanup other 16927ec681f3Smrg * things. This pass will also remove variables with only stores, so we 16937ec681f3Smrg * might be able to make progress after it. 16947ec681f3Smrg */ 16957ec681f3Smrg NIR_PASS(progress, nir, nir_remove_dead_variables, 16967ec681f3Smrg (nir_variable_mode)(nir_var_function_temp | 16977ec681f3Smrg nir_var_shader_temp | 16987ec681f3Smrg nir_var_mem_shared), 16997ec681f3Smrg NULL); 17007ec681f3Smrg 17017ec681f3Smrg NIR_PASS(progress, nir, nir_opt_copy_prop_vars); 17027ec681f3Smrg NIR_PASS(progress, nir, nir_opt_dead_write_vars); 17037ec681f3Smrg 17047ec681f3Smrg if (nir->options->lower_to_scalar) { 17057ec681f3Smrg NIR_PASS_V(nir, nir_lower_alu_to_scalar, NULL, NULL); 17067ec681f3Smrg NIR_PASS_V(nir, nir_lower_phis_to_scalar, false); 17077ec681f3Smrg } 17087ec681f3Smrg 17097ec681f3Smrg NIR_PASS_V(nir, nir_lower_alu); 17107ec681f3Smrg NIR_PASS_V(nir, nir_lower_pack); 17117ec681f3Smrg NIR_PASS(progress, nir, nir_copy_prop); 17127ec681f3Smrg NIR_PASS(progress, nir, nir_opt_remove_phis); 17137ec681f3Smrg NIR_PASS(progress, nir, nir_opt_dce); 17147ec681f3Smrg if (nir_opt_trivial_continues(nir)) { 17157ec681f3Smrg progress = true; 17167ec681f3Smrg NIR_PASS(progress, nir, nir_copy_prop); 17177ec681f3Smrg NIR_PASS(progress, nir, nir_opt_dce); 17187ec681f3Smrg } 17197ec681f3Smrg NIR_PASS(progress, nir, nir_opt_if, false); 17207ec681f3Smrg NIR_PASS(progress, nir, nir_opt_dead_cf); 17217ec681f3Smrg NIR_PASS(progress, nir, nir_opt_cse); 17227ec681f3Smrg NIR_PASS(progress, nir, nir_opt_peephole_select, 8, true, true); 17237ec681f3Smrg 17247ec681f3Smrg NIR_PASS(progress, nir, nir_opt_algebraic); 17257ec681f3Smrg NIR_PASS(progress, nir, nir_opt_constant_folding); 17267ec681f3Smrg 17277ec681f3Smrg NIR_PASS(progress, nir, nir_opt_undef); 17287ec681f3Smrg NIR_PASS(progress, nir, nir_opt_conditional_discard); 17297ec681f3Smrg } while (progress); 17307ec681f3Smrg} 17317ec681f3Smrg 17327ec681f3Smrgstatic void 17337ec681f3Smrglink_shaders(nir_shader *producer, nir_shader *consumer) 17347ec681f3Smrg{ 17357ec681f3Smrg assert(producer); 17367ec681f3Smrg assert(consumer); 17377ec681f3Smrg 17387ec681f3Smrg if (producer->options->lower_to_scalar) { 17397ec681f3Smrg NIR_PASS_V(producer, nir_lower_io_to_scalar_early, nir_var_shader_out); 17407ec681f3Smrg NIR_PASS_V(consumer, nir_lower_io_to_scalar_early, nir_var_shader_in); 17417ec681f3Smrg } 17427ec681f3Smrg 17437ec681f3Smrg nir_lower_io_arrays_to_elements(producer, consumer); 17447ec681f3Smrg 17457ec681f3Smrg st_nir_opts(producer); 17467ec681f3Smrg st_nir_opts(consumer); 17477ec681f3Smrg 17487ec681f3Smrg if (nir_link_opt_varyings(producer, consumer)) 17497ec681f3Smrg st_nir_opts(consumer); 17507ec681f3Smrg 17517ec681f3Smrg NIR_PASS_V(producer, nir_remove_dead_variables, nir_var_shader_out, NULL); 17527ec681f3Smrg NIR_PASS_V(consumer, nir_remove_dead_variables, nir_var_shader_in, NULL); 17537ec681f3Smrg 17547ec681f3Smrg if (nir_remove_unused_varyings(producer, consumer)) { 17557ec681f3Smrg NIR_PASS_V(producer, nir_lower_global_vars_to_local); 17567ec681f3Smrg NIR_PASS_V(consumer, nir_lower_global_vars_to_local); 17577ec681f3Smrg 17587ec681f3Smrg st_nir_opts(producer); 17597ec681f3Smrg st_nir_opts(consumer); 17607ec681f3Smrg 17617ec681f3Smrg /* Optimizations can cause varyings to become unused. 17627ec681f3Smrg * nir_compact_varyings() depends on all dead varyings being removed so 17637ec681f3Smrg * we need to call nir_remove_dead_variables() again here. 17647ec681f3Smrg */ 17657ec681f3Smrg NIR_PASS_V(producer, nir_remove_dead_variables, nir_var_shader_out, NULL); 17667ec681f3Smrg NIR_PASS_V(consumer, nir_remove_dead_variables, nir_var_shader_in, NULL); 17677ec681f3Smrg } 17687ec681f3Smrg} 17697ec681f3Smrg 17707ec681f3Smrgstatic void 17717ec681f3Smrgpipeline_lower_nir(struct v3dv_pipeline *pipeline, 17727ec681f3Smrg struct v3dv_pipeline_stage *p_stage, 17737ec681f3Smrg struct v3dv_pipeline_layout *layout) 17747ec681f3Smrg{ 17757ec681f3Smrg int64_t stage_start = os_time_get_nano(); 17767ec681f3Smrg 17777ec681f3Smrg assert(pipeline->shared_data && 17787ec681f3Smrg pipeline->shared_data->maps[p_stage->stage]); 17797ec681f3Smrg 17807ec681f3Smrg nir_shader_gather_info(p_stage->nir, nir_shader_get_entrypoint(p_stage->nir)); 17817ec681f3Smrg 17827ec681f3Smrg /* We add this because we need a valid sampler for nir_lower_tex to do 17837ec681f3Smrg * unpacking of the texture operation result, even for the case where there 17847ec681f3Smrg * is no sampler state. 17857ec681f3Smrg * 17867ec681f3Smrg * We add two of those, one for the case we need a 16bit return_size, and 17877ec681f3Smrg * another for the case we need a 32bit return size. 17887ec681f3Smrg */ 17897ec681f3Smrg UNUSED unsigned index = 17907ec681f3Smrg descriptor_map_add(&pipeline->shared_data->maps[p_stage->stage]->sampler_map, 17917ec681f3Smrg -1, -1, -1, 0, 16); 17927ec681f3Smrg assert(index == V3DV_NO_SAMPLER_16BIT_IDX); 17937ec681f3Smrg 17947ec681f3Smrg index = 17957ec681f3Smrg descriptor_map_add(&pipeline->shared_data->maps[p_stage->stage]->sampler_map, 17967ec681f3Smrg -2, -2, -2, 0, 32); 17977ec681f3Smrg assert(index == V3DV_NO_SAMPLER_32BIT_IDX); 17987ec681f3Smrg 17997ec681f3Smrg /* Apply the actual pipeline layout to UBOs, SSBOs, and textures */ 18007ec681f3Smrg NIR_PASS_V(p_stage->nir, lower_pipeline_layout_info, pipeline, layout); 18017ec681f3Smrg 18027ec681f3Smrg p_stage->feedback.duration += os_time_get_nano() - stage_start; 18037ec681f3Smrg} 18047ec681f3Smrg 18057ec681f3Smrg/** 18067ec681f3Smrg * The SPIR-V compiler will insert a sized compact array for 18077ec681f3Smrg * VARYING_SLOT_CLIP_DIST0 if the vertex shader writes to gl_ClipDistance[], 18087ec681f3Smrg * where the size of the array determines the number of active clip planes. 18097ec681f3Smrg */ 18107ec681f3Smrgstatic uint32_t 18117ec681f3Smrgget_ucp_enable_mask(struct v3dv_pipeline_stage *p_stage) 18127ec681f3Smrg{ 18137ec681f3Smrg assert(p_stage->stage == BROADCOM_SHADER_VERTEX); 18147ec681f3Smrg const nir_shader *shader = p_stage->nir; 18157ec681f3Smrg assert(shader); 18167ec681f3Smrg 18177ec681f3Smrg nir_foreach_variable_with_modes(var, shader, nir_var_shader_out) { 18187ec681f3Smrg if (var->data.location == VARYING_SLOT_CLIP_DIST0) { 18197ec681f3Smrg assert(var->data.compact); 18207ec681f3Smrg return (1 << glsl_get_length(var->type)) - 1; 18217ec681f3Smrg } 18227ec681f3Smrg } 18237ec681f3Smrg return 0; 18247ec681f3Smrg} 18257ec681f3Smrg 18267ec681f3Smrgstatic nir_shader * 18277ec681f3Smrgpipeline_stage_get_nir(struct v3dv_pipeline_stage *p_stage, 18287ec681f3Smrg struct v3dv_pipeline *pipeline, 18297ec681f3Smrg struct v3dv_pipeline_cache *cache) 18307ec681f3Smrg{ 18317ec681f3Smrg int64_t stage_start = os_time_get_nano(); 18327ec681f3Smrg 18337ec681f3Smrg nir_shader *nir = NULL; 18347ec681f3Smrg 18357ec681f3Smrg nir = v3dv_pipeline_cache_search_for_nir(pipeline, cache, 18367ec681f3Smrg &v3dv_nir_options, 18377ec681f3Smrg p_stage->shader_sha1); 18387ec681f3Smrg 18397ec681f3Smrg if (nir) { 18407ec681f3Smrg assert(nir->info.stage == broadcom_shader_stage_to_gl(p_stage->stage)); 18417ec681f3Smrg 18427ec681f3Smrg /* A NIR cach hit doesn't avoid the large majority of pipeline stage 18437ec681f3Smrg * creation so the cache hit is not recorded in the pipeline feedback 18447ec681f3Smrg * flags 18457ec681f3Smrg */ 18467ec681f3Smrg 18477ec681f3Smrg p_stage->feedback.duration += os_time_get_nano() - stage_start; 18487ec681f3Smrg 18497ec681f3Smrg return nir; 18507ec681f3Smrg } 18517ec681f3Smrg 18527ec681f3Smrg nir = shader_module_compile_to_nir(pipeline->device, p_stage); 18537ec681f3Smrg 18547ec681f3Smrg if (nir) { 18557ec681f3Smrg struct v3dv_pipeline_cache *default_cache = 18567ec681f3Smrg &pipeline->device->default_pipeline_cache; 18577ec681f3Smrg 18587ec681f3Smrg v3dv_pipeline_cache_upload_nir(pipeline, cache, nir, 18597ec681f3Smrg p_stage->shader_sha1); 18607ec681f3Smrg 18617ec681f3Smrg /* Ensure that the variant is on the default cache, as cmd_buffer could 18627ec681f3Smrg * need to change the current variant 18637ec681f3Smrg */ 18647ec681f3Smrg if (default_cache != cache) { 18657ec681f3Smrg v3dv_pipeline_cache_upload_nir(pipeline, default_cache, nir, 18667ec681f3Smrg p_stage->shader_sha1); 18677ec681f3Smrg } 18687ec681f3Smrg 18697ec681f3Smrg p_stage->feedback.duration += os_time_get_nano() - stage_start; 18707ec681f3Smrg 18717ec681f3Smrg return nir; 18727ec681f3Smrg } 18737ec681f3Smrg 18747ec681f3Smrg /* FIXME: this shouldn't happen, raise error? */ 18757ec681f3Smrg return NULL; 18767ec681f3Smrg} 18777ec681f3Smrg 18787ec681f3Smrgstatic void 18797ec681f3Smrgpipeline_hash_shader(const struct vk_shader_module *module, 18807ec681f3Smrg const char *entrypoint, 18817ec681f3Smrg gl_shader_stage stage, 18827ec681f3Smrg const VkSpecializationInfo *spec_info, 18837ec681f3Smrg unsigned char *sha1_out) 18847ec681f3Smrg{ 18857ec681f3Smrg struct mesa_sha1 ctx; 18867ec681f3Smrg _mesa_sha1_init(&ctx); 18877ec681f3Smrg 18887ec681f3Smrg _mesa_sha1_update(&ctx, module->sha1, sizeof(module->sha1)); 18897ec681f3Smrg _mesa_sha1_update(&ctx, entrypoint, strlen(entrypoint)); 18907ec681f3Smrg _mesa_sha1_update(&ctx, &stage, sizeof(stage)); 18917ec681f3Smrg if (spec_info) { 18927ec681f3Smrg _mesa_sha1_update(&ctx, spec_info->pMapEntries, 18937ec681f3Smrg spec_info->mapEntryCount * 18947ec681f3Smrg sizeof(*spec_info->pMapEntries)); 18957ec681f3Smrg _mesa_sha1_update(&ctx, spec_info->pData, 18967ec681f3Smrg spec_info->dataSize); 18977ec681f3Smrg } 18987ec681f3Smrg 18997ec681f3Smrg _mesa_sha1_final(&ctx, sha1_out); 19007ec681f3Smrg} 19017ec681f3Smrg 19027ec681f3Smrgstatic VkResult 19037ec681f3Smrgpipeline_compile_vertex_shader(struct v3dv_pipeline *pipeline, 19047ec681f3Smrg const VkAllocationCallbacks *pAllocator, 19057ec681f3Smrg const VkGraphicsPipelineCreateInfo *pCreateInfo) 19067ec681f3Smrg{ 19077ec681f3Smrg assert(pipeline->vs_bin != NULL); 19087ec681f3Smrg if (pipeline->vs_bin->nir == NULL) { 19097ec681f3Smrg assert(pipeline->vs->nir); 19107ec681f3Smrg pipeline->vs_bin->nir = nir_shader_clone(NULL, pipeline->vs->nir); 19117ec681f3Smrg } 19127ec681f3Smrg 19137ec681f3Smrg VkResult vk_result; 19147ec681f3Smrg struct v3d_vs_key key; 19157ec681f3Smrg pipeline_populate_v3d_vs_key(&key, pCreateInfo, pipeline->vs); 19167ec681f3Smrg pipeline->shared_data->variants[BROADCOM_SHADER_VERTEX] = 19177ec681f3Smrg pipeline_compile_shader_variant(pipeline->vs, &key.base, sizeof(key), 19187ec681f3Smrg pAllocator, &vk_result); 19197ec681f3Smrg if (vk_result != VK_SUCCESS) 19207ec681f3Smrg return vk_result; 19217ec681f3Smrg 19227ec681f3Smrg pipeline_populate_v3d_vs_key(&key, pCreateInfo, pipeline->vs_bin); 19237ec681f3Smrg pipeline->shared_data->variants[BROADCOM_SHADER_VERTEX_BIN] = 19247ec681f3Smrg pipeline_compile_shader_variant(pipeline->vs_bin, &key.base, sizeof(key), 19257ec681f3Smrg pAllocator, &vk_result); 19267ec681f3Smrg 19277ec681f3Smrg return vk_result; 19287ec681f3Smrg} 19297ec681f3Smrg 19307ec681f3Smrgstatic VkResult 19317ec681f3Smrgpipeline_compile_geometry_shader(struct v3dv_pipeline *pipeline, 19327ec681f3Smrg const VkAllocationCallbacks *pAllocator, 19337ec681f3Smrg const VkGraphicsPipelineCreateInfo *pCreateInfo) 19347ec681f3Smrg{ 19357ec681f3Smrg assert(pipeline->gs); 19367ec681f3Smrg 19377ec681f3Smrg assert(pipeline->gs_bin != NULL); 19387ec681f3Smrg if (pipeline->gs_bin->nir == NULL) { 19397ec681f3Smrg assert(pipeline->gs->nir); 19407ec681f3Smrg pipeline->gs_bin->nir = nir_shader_clone(NULL, pipeline->gs->nir); 19417ec681f3Smrg } 19427ec681f3Smrg 19437ec681f3Smrg VkResult vk_result; 19447ec681f3Smrg struct v3d_gs_key key; 19457ec681f3Smrg pipeline_populate_v3d_gs_key(&key, pCreateInfo, pipeline->gs); 19467ec681f3Smrg pipeline->shared_data->variants[BROADCOM_SHADER_GEOMETRY] = 19477ec681f3Smrg pipeline_compile_shader_variant(pipeline->gs, &key.base, sizeof(key), 19487ec681f3Smrg pAllocator, &vk_result); 19497ec681f3Smrg if (vk_result != VK_SUCCESS) 19507ec681f3Smrg return vk_result; 19517ec681f3Smrg 19527ec681f3Smrg pipeline_populate_v3d_gs_key(&key, pCreateInfo, pipeline->gs_bin); 19537ec681f3Smrg pipeline->shared_data->variants[BROADCOM_SHADER_GEOMETRY_BIN] = 19547ec681f3Smrg pipeline_compile_shader_variant(pipeline->gs_bin, &key.base, sizeof(key), 19557ec681f3Smrg pAllocator, &vk_result); 19567ec681f3Smrg 19577ec681f3Smrg return vk_result; 19587ec681f3Smrg} 19597ec681f3Smrg 19607ec681f3Smrgstatic VkResult 19617ec681f3Smrgpipeline_compile_fragment_shader(struct v3dv_pipeline *pipeline, 19627ec681f3Smrg const VkAllocationCallbacks *pAllocator, 19637ec681f3Smrg const VkGraphicsPipelineCreateInfo *pCreateInfo) 19647ec681f3Smrg{ 19657ec681f3Smrg struct v3dv_pipeline_stage *p_stage = pipeline->vs; 19667ec681f3Smrg 19677ec681f3Smrg p_stage = pipeline->fs; 19687ec681f3Smrg 19697ec681f3Smrg struct v3d_fs_key key; 19707ec681f3Smrg 19717ec681f3Smrg pipeline_populate_v3d_fs_key(&key, pCreateInfo, p_stage, 19727ec681f3Smrg pipeline->gs != NULL, 19737ec681f3Smrg get_ucp_enable_mask(pipeline->vs)); 19747ec681f3Smrg 19757ec681f3Smrg VkResult vk_result; 19767ec681f3Smrg pipeline->shared_data->variants[BROADCOM_SHADER_FRAGMENT] = 19777ec681f3Smrg pipeline_compile_shader_variant(p_stage, &key.base, sizeof(key), 19787ec681f3Smrg pAllocator, &vk_result); 19797ec681f3Smrg 19807ec681f3Smrg return vk_result; 19817ec681f3Smrg} 19827ec681f3Smrg 19837ec681f3Smrgstatic void 19847ec681f3Smrgpipeline_populate_graphics_key(struct v3dv_pipeline *pipeline, 19857ec681f3Smrg struct v3dv_pipeline_key *key, 19867ec681f3Smrg const VkGraphicsPipelineCreateInfo *pCreateInfo) 19877ec681f3Smrg{ 19887ec681f3Smrg memset(key, 0, sizeof(*key)); 19897ec681f3Smrg key->robust_buffer_access = 19907ec681f3Smrg pipeline->device->features.robustBufferAccess; 19917ec681f3Smrg 19927ec681f3Smrg const bool raster_enabled = 19937ec681f3Smrg !pCreateInfo->pRasterizationState->rasterizerDiscardEnable; 19947ec681f3Smrg 19957ec681f3Smrg const VkPipelineInputAssemblyStateCreateInfo *ia_info = 19967ec681f3Smrg pCreateInfo->pInputAssemblyState; 19977ec681f3Smrg key->topology = vk_to_pipe_prim_type[ia_info->topology]; 19987ec681f3Smrg 19997ec681f3Smrg const VkPipelineColorBlendStateCreateInfo *cb_info = 20007ec681f3Smrg raster_enabled ? pCreateInfo->pColorBlendState : NULL; 20017ec681f3Smrg 20027ec681f3Smrg key->logicop_func = cb_info && cb_info->logicOpEnable == VK_TRUE ? 20037ec681f3Smrg vk_to_pipe_logicop[cb_info->logicOp] : 20047ec681f3Smrg PIPE_LOGICOP_COPY; 20057ec681f3Smrg 20067ec681f3Smrg /* Multisample rasterization state must be ignored if rasterization 20077ec681f3Smrg * is disabled. 20087ec681f3Smrg */ 20097ec681f3Smrg const VkPipelineMultisampleStateCreateInfo *ms_info = 20107ec681f3Smrg raster_enabled ? pCreateInfo->pMultisampleState : NULL; 20117ec681f3Smrg if (ms_info) { 20127ec681f3Smrg assert(ms_info->rasterizationSamples == VK_SAMPLE_COUNT_1_BIT || 20137ec681f3Smrg ms_info->rasterizationSamples == VK_SAMPLE_COUNT_4_BIT); 20147ec681f3Smrg key->msaa = ms_info->rasterizationSamples > VK_SAMPLE_COUNT_1_BIT; 20157ec681f3Smrg 20167ec681f3Smrg if (key->msaa) { 20177ec681f3Smrg key->sample_coverage = 20187ec681f3Smrg pipeline->sample_mask != (1 << V3D_MAX_SAMPLES) - 1; 20197ec681f3Smrg key->sample_alpha_to_coverage = ms_info->alphaToCoverageEnable; 20207ec681f3Smrg key->sample_alpha_to_one = ms_info->alphaToOneEnable; 20217ec681f3Smrg } 20227ec681f3Smrg } 20237ec681f3Smrg 20247ec681f3Smrg const struct v3dv_render_pass *pass = 20257ec681f3Smrg v3dv_render_pass_from_handle(pCreateInfo->renderPass); 20267ec681f3Smrg const struct v3dv_subpass *subpass = pipeline->subpass; 20277ec681f3Smrg for (uint32_t i = 0; i < subpass->color_count; i++) { 20287ec681f3Smrg const uint32_t att_idx = subpass->color_attachments[i].attachment; 20297ec681f3Smrg if (att_idx == VK_ATTACHMENT_UNUSED) 20307ec681f3Smrg continue; 20317ec681f3Smrg 20327ec681f3Smrg key->cbufs |= 1 << i; 20337ec681f3Smrg 20347ec681f3Smrg VkFormat fb_format = pass->attachments[att_idx].desc.format; 20357ec681f3Smrg enum pipe_format fb_pipe_format = vk_format_to_pipe_format(fb_format); 20367ec681f3Smrg 20377ec681f3Smrg /* If logic operations are enabled then we might emit color reads and we 20387ec681f3Smrg * need to know the color buffer format and swizzle for that 20397ec681f3Smrg */ 20407ec681f3Smrg if (key->logicop_func != PIPE_LOGICOP_COPY) { 20417ec681f3Smrg key->color_fmt[i].format = fb_pipe_format; 20427ec681f3Smrg key->color_fmt[i].swizzle = v3dv_get_format_swizzle(pipeline->device, 20437ec681f3Smrg fb_format); 20447ec681f3Smrg } 20457ec681f3Smrg 20467ec681f3Smrg const struct util_format_description *desc = 20477ec681f3Smrg vk_format_description(fb_format); 20487ec681f3Smrg 20497ec681f3Smrg if (desc->channel[0].type == UTIL_FORMAT_TYPE_FLOAT && 20507ec681f3Smrg desc->channel[0].size == 32) { 20517ec681f3Smrg key->f32_color_rb |= 1 << i; 20527ec681f3Smrg } 20537ec681f3Smrg } 20547ec681f3Smrg 20557ec681f3Smrg const VkPipelineVertexInputStateCreateInfo *vi_info = 20567ec681f3Smrg pCreateInfo->pVertexInputState; 20577ec681f3Smrg for (uint32_t i = 0; i < vi_info->vertexAttributeDescriptionCount; i++) { 20587ec681f3Smrg const VkVertexInputAttributeDescription *desc = 20597ec681f3Smrg &vi_info->pVertexAttributeDescriptions[i]; 20607ec681f3Smrg assert(desc->location < MAX_VERTEX_ATTRIBS); 20617ec681f3Smrg if (desc->format == VK_FORMAT_B8G8R8A8_UNORM) 20627ec681f3Smrg key->va_swap_rb_mask |= 1 << (VERT_ATTRIB_GENERIC0 + desc->location); 20637ec681f3Smrg } 20647ec681f3Smrg 20657ec681f3Smrg assert(pipeline->subpass); 20667ec681f3Smrg key->has_multiview = pipeline->subpass->view_mask != 0; 20677ec681f3Smrg} 20687ec681f3Smrg 20697ec681f3Smrgstatic void 20707ec681f3Smrgpipeline_populate_compute_key(struct v3dv_pipeline *pipeline, 20717ec681f3Smrg struct v3dv_pipeline_key *key, 20727ec681f3Smrg const VkComputePipelineCreateInfo *pCreateInfo) 20737ec681f3Smrg{ 20747ec681f3Smrg /* We use the same pipeline key for graphics and compute, but we don't need 20757ec681f3Smrg * to add a field to flag compute keys because this key is not used alone 20767ec681f3Smrg * to search in the cache, we also use the SPIR-V or the serialized NIR for 20777ec681f3Smrg * example, which already flags compute shaders. 20787ec681f3Smrg */ 20797ec681f3Smrg memset(key, 0, sizeof(*key)); 20807ec681f3Smrg key->robust_buffer_access = 20817ec681f3Smrg pipeline->device->features.robustBufferAccess; 20827ec681f3Smrg} 20837ec681f3Smrg 20847ec681f3Smrgstatic struct v3dv_pipeline_shared_data * 20857ec681f3Smrgv3dv_pipeline_shared_data_new_empty(const unsigned char sha1_key[20], 20867ec681f3Smrg struct v3dv_pipeline *pipeline, 20877ec681f3Smrg bool is_graphics_pipeline) 20887ec681f3Smrg{ 20897ec681f3Smrg /* We create new_entry using the device alloc. Right now shared_data is ref 20907ec681f3Smrg * and unref by both the pipeline and the pipeline cache, so we can't 20917ec681f3Smrg * ensure that the cache or pipeline alloc will be available on the last 20927ec681f3Smrg * unref. 20937ec681f3Smrg */ 20947ec681f3Smrg struct v3dv_pipeline_shared_data *new_entry = 20957ec681f3Smrg vk_zalloc2(&pipeline->device->vk.alloc, NULL, 20967ec681f3Smrg sizeof(struct v3dv_pipeline_shared_data), 8, 20977ec681f3Smrg VK_SYSTEM_ALLOCATION_SCOPE_OBJECT); 20987ec681f3Smrg 20997ec681f3Smrg if (new_entry == NULL) 21007ec681f3Smrg return NULL; 21017ec681f3Smrg 21027ec681f3Smrg for (uint8_t stage = 0; stage < BROADCOM_SHADER_STAGES; stage++) { 21037ec681f3Smrg /* We don't need specific descriptor maps for binning stages we use the 21047ec681f3Smrg * map for the render stage. 21057ec681f3Smrg */ 21067ec681f3Smrg if (broadcom_shader_stage_is_binning(stage)) 21077ec681f3Smrg continue; 21087ec681f3Smrg 21097ec681f3Smrg if ((is_graphics_pipeline && stage == BROADCOM_SHADER_COMPUTE) || 21107ec681f3Smrg (!is_graphics_pipeline && stage != BROADCOM_SHADER_COMPUTE)) { 21117ec681f3Smrg continue; 21127ec681f3Smrg } 21137ec681f3Smrg 21147ec681f3Smrg if (stage == BROADCOM_SHADER_GEOMETRY && !pipeline->gs) { 21157ec681f3Smrg /* We always inject a custom GS if we have multiview */ 21167ec681f3Smrg if (!pipeline->subpass->view_mask) 21177ec681f3Smrg continue; 21187ec681f3Smrg } 21197ec681f3Smrg 21207ec681f3Smrg struct v3dv_descriptor_maps *new_maps = 21217ec681f3Smrg vk_zalloc2(&pipeline->device->vk.alloc, NULL, 21227ec681f3Smrg sizeof(struct v3dv_descriptor_maps), 8, 21237ec681f3Smrg VK_SYSTEM_ALLOCATION_SCOPE_OBJECT); 21247ec681f3Smrg 21257ec681f3Smrg if (new_maps == NULL) 21267ec681f3Smrg goto fail; 21277ec681f3Smrg 21287ec681f3Smrg new_entry->maps[stage] = new_maps; 21297ec681f3Smrg } 21307ec681f3Smrg 21317ec681f3Smrg new_entry->maps[BROADCOM_SHADER_VERTEX_BIN] = 21327ec681f3Smrg new_entry->maps[BROADCOM_SHADER_VERTEX]; 21337ec681f3Smrg 21347ec681f3Smrg new_entry->maps[BROADCOM_SHADER_GEOMETRY_BIN] = 21357ec681f3Smrg new_entry->maps[BROADCOM_SHADER_GEOMETRY]; 21367ec681f3Smrg 21377ec681f3Smrg new_entry->ref_cnt = 1; 21387ec681f3Smrg memcpy(new_entry->sha1_key, sha1_key, 20); 21397ec681f3Smrg 21407ec681f3Smrg return new_entry; 21417ec681f3Smrg 21427ec681f3Smrgfail: 21437ec681f3Smrg if (new_entry != NULL) { 21447ec681f3Smrg for (uint8_t stage = 0; stage < BROADCOM_SHADER_STAGES; stage++) { 21457ec681f3Smrg if (new_entry->maps[stage] != NULL) 21467ec681f3Smrg vk_free(&pipeline->device->vk.alloc, new_entry->maps[stage]); 21477ec681f3Smrg } 21487ec681f3Smrg } 21497ec681f3Smrg 21507ec681f3Smrg vk_free(&pipeline->device->vk.alloc, new_entry); 21517ec681f3Smrg 21527ec681f3Smrg return NULL; 21537ec681f3Smrg} 21547ec681f3Smrg 21557ec681f3Smrgstatic void 21567ec681f3Smrgwrite_creation_feedback(struct v3dv_pipeline *pipeline, 21577ec681f3Smrg const void *next, 21587ec681f3Smrg const VkPipelineCreationFeedbackEXT *pipeline_feedback, 21597ec681f3Smrg uint32_t stage_count, 21607ec681f3Smrg const VkPipelineShaderStageCreateInfo *stages) 21617ec681f3Smrg{ 21627ec681f3Smrg const VkPipelineCreationFeedbackCreateInfoEXT *create_feedback = 21637ec681f3Smrg vk_find_struct_const(next, PIPELINE_CREATION_FEEDBACK_CREATE_INFO_EXT); 21647ec681f3Smrg 21657ec681f3Smrg if (create_feedback) { 21667ec681f3Smrg typed_memcpy(create_feedback->pPipelineCreationFeedback, 21677ec681f3Smrg pipeline_feedback, 21687ec681f3Smrg 1); 21697ec681f3Smrg 21707ec681f3Smrg assert(stage_count == create_feedback->pipelineStageCreationFeedbackCount); 21717ec681f3Smrg 21727ec681f3Smrg for (uint32_t i = 0; i < stage_count; i++) { 21737ec681f3Smrg gl_shader_stage s = vk_to_mesa_shader_stage(stages[i].stage); 21747ec681f3Smrg switch (s) { 21757ec681f3Smrg case MESA_SHADER_VERTEX: 21767ec681f3Smrg create_feedback->pPipelineStageCreationFeedbacks[i] = 21777ec681f3Smrg pipeline->vs->feedback; 21787ec681f3Smrg 21797ec681f3Smrg create_feedback->pPipelineStageCreationFeedbacks[i].duration += 21807ec681f3Smrg pipeline->vs_bin->feedback.duration; 21817ec681f3Smrg break; 21827ec681f3Smrg 21837ec681f3Smrg case MESA_SHADER_GEOMETRY: 21847ec681f3Smrg create_feedback->pPipelineStageCreationFeedbacks[i] = 21857ec681f3Smrg pipeline->gs->feedback; 21867ec681f3Smrg 21877ec681f3Smrg create_feedback->pPipelineStageCreationFeedbacks[i].duration += 21887ec681f3Smrg pipeline->gs_bin->feedback.duration; 21897ec681f3Smrg break; 21907ec681f3Smrg 21917ec681f3Smrg case MESA_SHADER_FRAGMENT: 21927ec681f3Smrg create_feedback->pPipelineStageCreationFeedbacks[i] = 21937ec681f3Smrg pipeline->fs->feedback; 21947ec681f3Smrg break; 21957ec681f3Smrg 21967ec681f3Smrg case MESA_SHADER_COMPUTE: 21977ec681f3Smrg create_feedback->pPipelineStageCreationFeedbacks[i] = 21987ec681f3Smrg pipeline->cs->feedback; 21997ec681f3Smrg break; 22007ec681f3Smrg 22017ec681f3Smrg default: 22027ec681f3Smrg unreachable("not supported shader stage"); 22037ec681f3Smrg } 22047ec681f3Smrg } 22057ec681f3Smrg } 22067ec681f3Smrg} 22077ec681f3Smrg 22087ec681f3Smrgstatic uint32_t 22097ec681f3Smrgmultiview_gs_input_primitive_from_pipeline(struct v3dv_pipeline *pipeline) 22107ec681f3Smrg{ 22117ec681f3Smrg switch (pipeline->topology) { 22127ec681f3Smrg case PIPE_PRIM_POINTS: 22137ec681f3Smrg return GL_POINTS; 22147ec681f3Smrg case PIPE_PRIM_LINES: 22157ec681f3Smrg case PIPE_PRIM_LINE_STRIP: 22167ec681f3Smrg return GL_LINES; 22177ec681f3Smrg case PIPE_PRIM_TRIANGLES: 22187ec681f3Smrg case PIPE_PRIM_TRIANGLE_STRIP: 22197ec681f3Smrg case PIPE_PRIM_TRIANGLE_FAN: 22207ec681f3Smrg return GL_TRIANGLES; 22217ec681f3Smrg default: 22227ec681f3Smrg /* Since we don't allow GS with multiview, we can only see non-adjacency 22237ec681f3Smrg * primitives. 22247ec681f3Smrg */ 22257ec681f3Smrg unreachable("Unexpected pipeline primitive type"); 22267ec681f3Smrg } 22277ec681f3Smrg} 22287ec681f3Smrg 22297ec681f3Smrgstatic uint32_t 22307ec681f3Smrgmultiview_gs_output_primitive_from_pipeline(struct v3dv_pipeline *pipeline) 22317ec681f3Smrg{ 22327ec681f3Smrg switch (pipeline->topology) { 22337ec681f3Smrg case PIPE_PRIM_POINTS: 22347ec681f3Smrg return GL_POINTS; 22357ec681f3Smrg case PIPE_PRIM_LINES: 22367ec681f3Smrg case PIPE_PRIM_LINE_STRIP: 22377ec681f3Smrg return GL_LINE_STRIP; 22387ec681f3Smrg case PIPE_PRIM_TRIANGLES: 22397ec681f3Smrg case PIPE_PRIM_TRIANGLE_STRIP: 22407ec681f3Smrg case PIPE_PRIM_TRIANGLE_FAN: 22417ec681f3Smrg return GL_TRIANGLE_STRIP; 22427ec681f3Smrg default: 22437ec681f3Smrg /* Since we don't allow GS with multiview, we can only see non-adjacency 22447ec681f3Smrg * primitives. 22457ec681f3Smrg */ 22467ec681f3Smrg unreachable("Unexpected pipeline primitive type"); 22477ec681f3Smrg } 22487ec681f3Smrg} 22497ec681f3Smrg 22507ec681f3Smrgstatic bool 22517ec681f3Smrgpipeline_add_multiview_gs(struct v3dv_pipeline *pipeline, 22527ec681f3Smrg struct v3dv_pipeline_cache *cache, 22537ec681f3Smrg const VkAllocationCallbacks *pAllocator) 22547ec681f3Smrg{ 22557ec681f3Smrg /* Create the passthrough GS from the VS output interface */ 22567ec681f3Smrg pipeline->vs->nir = pipeline_stage_get_nir(pipeline->vs, pipeline, cache); 22577ec681f3Smrg nir_shader *vs_nir = pipeline->vs->nir; 22587ec681f3Smrg 22597ec681f3Smrg const nir_shader_compiler_options *options = v3dv_pipeline_get_nir_options(); 22607ec681f3Smrg nir_builder b = nir_builder_init_simple_shader(MESA_SHADER_GEOMETRY, options, 22617ec681f3Smrg "multiview broadcast gs"); 22627ec681f3Smrg nir_shader *nir = b.shader; 22637ec681f3Smrg nir->info.inputs_read = vs_nir->info.outputs_written; 22647ec681f3Smrg nir->info.outputs_written = vs_nir->info.outputs_written | 22657ec681f3Smrg (1ull << VARYING_SLOT_LAYER); 22667ec681f3Smrg 22677ec681f3Smrg uint32_t vertex_count = u_vertices_per_prim(pipeline->topology); 22687ec681f3Smrg nir->info.gs.input_primitive = 22697ec681f3Smrg multiview_gs_input_primitive_from_pipeline(pipeline); 22707ec681f3Smrg nir->info.gs.output_primitive = 22717ec681f3Smrg multiview_gs_output_primitive_from_pipeline(pipeline); 22727ec681f3Smrg nir->info.gs.vertices_in = vertex_count; 22737ec681f3Smrg nir->info.gs.vertices_out = nir->info.gs.vertices_in; 22747ec681f3Smrg nir->info.gs.invocations = 1; 22757ec681f3Smrg nir->info.gs.active_stream_mask = 0x1; 22767ec681f3Smrg 22777ec681f3Smrg /* Make a list of GS input/output variables from the VS outputs */ 22787ec681f3Smrg nir_variable *in_vars[100]; 22797ec681f3Smrg nir_variable *out_vars[100]; 22807ec681f3Smrg uint32_t var_count = 0; 22817ec681f3Smrg nir_foreach_shader_out_variable(out_vs_var, vs_nir) { 22827ec681f3Smrg char name[8]; 22837ec681f3Smrg snprintf(name, ARRAY_SIZE(name), "in_%d", var_count); 22847ec681f3Smrg 22857ec681f3Smrg in_vars[var_count] = 22867ec681f3Smrg nir_variable_create(nir, nir_var_shader_in, 22877ec681f3Smrg glsl_array_type(out_vs_var->type, vertex_count, 0), 22887ec681f3Smrg name); 22897ec681f3Smrg in_vars[var_count]->data.location = out_vs_var->data.location; 22907ec681f3Smrg in_vars[var_count]->data.location_frac = out_vs_var->data.location_frac; 22917ec681f3Smrg in_vars[var_count]->data.interpolation = out_vs_var->data.interpolation; 22927ec681f3Smrg 22937ec681f3Smrg snprintf(name, ARRAY_SIZE(name), "out_%d", var_count); 22947ec681f3Smrg out_vars[var_count] = 22957ec681f3Smrg nir_variable_create(nir, nir_var_shader_out, out_vs_var->type, name); 22967ec681f3Smrg out_vars[var_count]->data.location = out_vs_var->data.location; 22977ec681f3Smrg out_vars[var_count]->data.interpolation = out_vs_var->data.interpolation; 22987ec681f3Smrg 22997ec681f3Smrg var_count++; 23007ec681f3Smrg } 23017ec681f3Smrg 23027ec681f3Smrg /* Add the gl_Layer output variable */ 23037ec681f3Smrg nir_variable *out_layer = 23047ec681f3Smrg nir_variable_create(nir, nir_var_shader_out, glsl_int_type(), 23057ec681f3Smrg "out_Layer"); 23067ec681f3Smrg out_layer->data.location = VARYING_SLOT_LAYER; 23077ec681f3Smrg 23087ec681f3Smrg /* Get the view index value that we will write to gl_Layer */ 23097ec681f3Smrg nir_ssa_def *layer = 23107ec681f3Smrg nir_load_system_value(&b, nir_intrinsic_load_view_index, 0, 1, 32); 23117ec681f3Smrg 23127ec681f3Smrg /* Emit all output vertices */ 23137ec681f3Smrg for (uint32_t vi = 0; vi < vertex_count; vi++) { 23147ec681f3Smrg /* Emit all output varyings */ 23157ec681f3Smrg for (uint32_t i = 0; i < var_count; i++) { 23167ec681f3Smrg nir_deref_instr *in_value = 23177ec681f3Smrg nir_build_deref_array_imm(&b, nir_build_deref_var(&b, in_vars[i]), vi); 23187ec681f3Smrg nir_copy_deref(&b, nir_build_deref_var(&b, out_vars[i]), in_value); 23197ec681f3Smrg } 23207ec681f3Smrg 23217ec681f3Smrg /* Emit gl_Layer write */ 23227ec681f3Smrg nir_store_var(&b, out_layer, layer, 0x1); 23237ec681f3Smrg 23247ec681f3Smrg nir_emit_vertex(&b, 0); 23257ec681f3Smrg } 23267ec681f3Smrg nir_end_primitive(&b, 0); 23277ec681f3Smrg 23287ec681f3Smrg /* Make sure we run our pre-process NIR passes so we produce NIR compatible 23297ec681f3Smrg * with what we expect from SPIR-V modules. 23307ec681f3Smrg */ 23317ec681f3Smrg preprocess_nir(nir); 23327ec681f3Smrg 23337ec681f3Smrg /* Attach the geometry shader to the pipeline */ 23347ec681f3Smrg struct v3dv_device *device = pipeline->device; 23357ec681f3Smrg struct v3dv_physical_device *physical_device = 23367ec681f3Smrg &device->instance->physicalDevice; 23377ec681f3Smrg 23387ec681f3Smrg struct v3dv_pipeline_stage *p_stage = 23397ec681f3Smrg vk_zalloc2(&device->vk.alloc, pAllocator, sizeof(*p_stage), 8, 23407ec681f3Smrg VK_SYSTEM_ALLOCATION_SCOPE_OBJECT); 23417ec681f3Smrg 23427ec681f3Smrg if (p_stage == NULL) { 23437ec681f3Smrg ralloc_free(nir); 23447ec681f3Smrg return false; 23457ec681f3Smrg } 23467ec681f3Smrg 23477ec681f3Smrg p_stage->pipeline = pipeline; 23487ec681f3Smrg p_stage->stage = BROADCOM_SHADER_GEOMETRY; 23497ec681f3Smrg p_stage->entrypoint = "main"; 23507ec681f3Smrg p_stage->module = 0; 23517ec681f3Smrg p_stage->nir = nir; 23527ec681f3Smrg pipeline_compute_sha1_from_nir(p_stage->nir, p_stage->shader_sha1); 23537ec681f3Smrg p_stage->program_id = p_atomic_inc_return(&physical_device->next_program_id); 23547ec681f3Smrg 23557ec681f3Smrg pipeline->has_gs = true; 23567ec681f3Smrg pipeline->gs = p_stage; 23577ec681f3Smrg pipeline->active_stages |= MESA_SHADER_GEOMETRY; 23587ec681f3Smrg 23597ec681f3Smrg pipeline->gs_bin = 23607ec681f3Smrg pipeline_stage_create_binning(pipeline->gs, pAllocator); 23617ec681f3Smrg if (pipeline->gs_bin == NULL) 23627ec681f3Smrg return false; 23637ec681f3Smrg 23647ec681f3Smrg return true; 23657ec681f3Smrg} 23667ec681f3Smrg 23677ec681f3Smrg/* 23687ec681f3Smrg * It compiles a pipeline. Note that it also allocate internal object, but if 23697ec681f3Smrg * some allocations success, but other fails, the method is not freeing the 23707ec681f3Smrg * successful ones. 23717ec681f3Smrg * 23727ec681f3Smrg * This is done to simplify the code, as what we do in this case is just call 23737ec681f3Smrg * the pipeline destroy method, and this would handle freeing the internal 23747ec681f3Smrg * objects allocated. We just need to be careful setting to NULL the objects 23757ec681f3Smrg * not allocated. 23767ec681f3Smrg */ 23777ec681f3Smrgstatic VkResult 23787ec681f3Smrgpipeline_compile_graphics(struct v3dv_pipeline *pipeline, 23797ec681f3Smrg struct v3dv_pipeline_cache *cache, 23807ec681f3Smrg const VkGraphicsPipelineCreateInfo *pCreateInfo, 23817ec681f3Smrg const VkAllocationCallbacks *pAllocator) 23827ec681f3Smrg{ 23837ec681f3Smrg VkPipelineCreationFeedbackEXT pipeline_feedback = { 23847ec681f3Smrg .flags = VK_PIPELINE_CREATION_FEEDBACK_VALID_BIT_EXT, 23857ec681f3Smrg }; 23867ec681f3Smrg int64_t pipeline_start = os_time_get_nano(); 23877ec681f3Smrg 23887ec681f3Smrg struct v3dv_device *device = pipeline->device; 23897ec681f3Smrg struct v3dv_physical_device *physical_device = 23907ec681f3Smrg &device->instance->physicalDevice; 23917ec681f3Smrg 23927ec681f3Smrg /* First pass to get some common info from the shader, and create the 23937ec681f3Smrg * individual pipeline_stage objects 23947ec681f3Smrg */ 23957ec681f3Smrg for (uint32_t i = 0; i < pCreateInfo->stageCount; i++) { 23967ec681f3Smrg const VkPipelineShaderStageCreateInfo *sinfo = &pCreateInfo->pStages[i]; 23977ec681f3Smrg gl_shader_stage stage = vk_to_mesa_shader_stage(sinfo->stage); 23987ec681f3Smrg 23997ec681f3Smrg struct v3dv_pipeline_stage *p_stage = 24007ec681f3Smrg vk_zalloc2(&device->vk.alloc, pAllocator, sizeof(*p_stage), 8, 24017ec681f3Smrg VK_SYSTEM_ALLOCATION_SCOPE_OBJECT); 24027ec681f3Smrg 24037ec681f3Smrg if (p_stage == NULL) 24047ec681f3Smrg return VK_ERROR_OUT_OF_HOST_MEMORY; 24057ec681f3Smrg 24067ec681f3Smrg /* Note that we are assigning program_id slightly differently that 24077ec681f3Smrg * v3d. Here we are assigning one per pipeline stage, so vs and vs_bin 24087ec681f3Smrg * would have a different program_id, while v3d would have the same for 24097ec681f3Smrg * both. For the case of v3dv, it is more natural to have an id this way, 24107ec681f3Smrg * as right now we are using it for debugging, not for shader-db. 24117ec681f3Smrg */ 24127ec681f3Smrg p_stage->program_id = 24137ec681f3Smrg p_atomic_inc_return(&physical_device->next_program_id); 24147ec681f3Smrg 24157ec681f3Smrg p_stage->pipeline = pipeline; 24167ec681f3Smrg p_stage->stage = gl_shader_stage_to_broadcom(stage); 24177ec681f3Smrg p_stage->entrypoint = sinfo->pName; 24187ec681f3Smrg p_stage->module = vk_shader_module_from_handle(sinfo->module); 24197ec681f3Smrg p_stage->spec_info = sinfo->pSpecializationInfo; 24207ec681f3Smrg 24217ec681f3Smrg pipeline_hash_shader(p_stage->module, 24227ec681f3Smrg p_stage->entrypoint, 24237ec681f3Smrg stage, 24247ec681f3Smrg p_stage->spec_info, 24257ec681f3Smrg p_stage->shader_sha1); 24267ec681f3Smrg 24277ec681f3Smrg pipeline->active_stages |= sinfo->stage; 24287ec681f3Smrg 24297ec681f3Smrg /* We will try to get directly the compiled shader variant, so let's not 24307ec681f3Smrg * worry about getting the nir shader for now. 24317ec681f3Smrg */ 24327ec681f3Smrg p_stage->nir = NULL; 24337ec681f3Smrg 24347ec681f3Smrg switch(stage) { 24357ec681f3Smrg case MESA_SHADER_VERTEX: 24367ec681f3Smrg pipeline->vs = p_stage; 24377ec681f3Smrg pipeline->vs_bin = 24387ec681f3Smrg pipeline_stage_create_binning(pipeline->vs, pAllocator); 24397ec681f3Smrg if (pipeline->vs_bin == NULL) 24407ec681f3Smrg return VK_ERROR_OUT_OF_HOST_MEMORY; 24417ec681f3Smrg break; 24427ec681f3Smrg 24437ec681f3Smrg case MESA_SHADER_GEOMETRY: 24447ec681f3Smrg pipeline->has_gs = true; 24457ec681f3Smrg pipeline->gs = p_stage; 24467ec681f3Smrg pipeline->gs_bin = 24477ec681f3Smrg pipeline_stage_create_binning(pipeline->gs, pAllocator); 24487ec681f3Smrg if (pipeline->gs_bin == NULL) 24497ec681f3Smrg return VK_ERROR_OUT_OF_HOST_MEMORY; 24507ec681f3Smrg break; 24517ec681f3Smrg 24527ec681f3Smrg case MESA_SHADER_FRAGMENT: 24537ec681f3Smrg pipeline->fs = p_stage; 24547ec681f3Smrg break; 24557ec681f3Smrg 24567ec681f3Smrg default: 24577ec681f3Smrg unreachable("not supported shader stage"); 24587ec681f3Smrg } 24597ec681f3Smrg } 24607ec681f3Smrg 24617ec681f3Smrg /* Add a no-op fragment shader if needed */ 24627ec681f3Smrg if (!pipeline->fs) { 24637ec681f3Smrg nir_builder b = nir_builder_init_simple_shader(MESA_SHADER_FRAGMENT, 24647ec681f3Smrg &v3dv_nir_options, 24657ec681f3Smrg "noop_fs"); 24667ec681f3Smrg 24677ec681f3Smrg struct v3dv_pipeline_stage *p_stage = 24687ec681f3Smrg vk_zalloc2(&device->vk.alloc, pAllocator, sizeof(*p_stage), 8, 24697ec681f3Smrg VK_SYSTEM_ALLOCATION_SCOPE_OBJECT); 24707ec681f3Smrg 24717ec681f3Smrg if (p_stage == NULL) 24727ec681f3Smrg return VK_ERROR_OUT_OF_HOST_MEMORY; 24737ec681f3Smrg 24747ec681f3Smrg p_stage->pipeline = pipeline; 24757ec681f3Smrg p_stage->stage = BROADCOM_SHADER_FRAGMENT; 24767ec681f3Smrg p_stage->entrypoint = "main"; 24777ec681f3Smrg p_stage->module = 0; 24787ec681f3Smrg p_stage->nir = b.shader; 24797ec681f3Smrg pipeline_compute_sha1_from_nir(p_stage->nir, p_stage->shader_sha1); 24807ec681f3Smrg p_stage->program_id = 24817ec681f3Smrg p_atomic_inc_return(&physical_device->next_program_id); 24827ec681f3Smrg 24837ec681f3Smrg pipeline->fs = p_stage; 24847ec681f3Smrg pipeline->active_stages |= MESA_SHADER_FRAGMENT; 24857ec681f3Smrg } 24867ec681f3Smrg 24877ec681f3Smrg /* If multiview is enabled, we inject a custom passthrough geometry shader 24887ec681f3Smrg * to broadcast draw calls to the appropriate views. 24897ec681f3Smrg */ 24907ec681f3Smrg assert(!pipeline->subpass->view_mask || (!pipeline->has_gs && !pipeline->gs)); 24917ec681f3Smrg if (pipeline->subpass->view_mask) { 24927ec681f3Smrg if (!pipeline_add_multiview_gs(pipeline, cache, pAllocator)) 24937ec681f3Smrg return VK_ERROR_OUT_OF_HOST_MEMORY; 24947ec681f3Smrg } 24957ec681f3Smrg 24967ec681f3Smrg /* First we try to get the variants from the pipeline cache */ 24977ec681f3Smrg struct v3dv_pipeline_key pipeline_key; 24987ec681f3Smrg pipeline_populate_graphics_key(pipeline, &pipeline_key, pCreateInfo); 24997ec681f3Smrg unsigned char pipeline_sha1[20]; 25007ec681f3Smrg pipeline_hash_graphics(pipeline, &pipeline_key, pipeline_sha1); 25017ec681f3Smrg 25027ec681f3Smrg bool cache_hit = false; 25037ec681f3Smrg 25047ec681f3Smrg pipeline->shared_data = 25057ec681f3Smrg v3dv_pipeline_cache_search_for_pipeline(cache, 25067ec681f3Smrg pipeline_sha1, 25077ec681f3Smrg &cache_hit); 25087ec681f3Smrg 25097ec681f3Smrg if (pipeline->shared_data != NULL) { 25107ec681f3Smrg /* A correct pipeline must have at least a VS and FS */ 25117ec681f3Smrg assert(pipeline->shared_data->variants[BROADCOM_SHADER_VERTEX]); 25127ec681f3Smrg assert(pipeline->shared_data->variants[BROADCOM_SHADER_VERTEX_BIN]); 25137ec681f3Smrg assert(pipeline->shared_data->variants[BROADCOM_SHADER_FRAGMENT]); 25147ec681f3Smrg assert(!pipeline->gs || 25157ec681f3Smrg pipeline->shared_data->variants[BROADCOM_SHADER_GEOMETRY]); 25167ec681f3Smrg assert(!pipeline->gs || 25177ec681f3Smrg pipeline->shared_data->variants[BROADCOM_SHADER_GEOMETRY_BIN]); 25187ec681f3Smrg 25197ec681f3Smrg if (cache_hit && cache != &pipeline->device->default_pipeline_cache) 25207ec681f3Smrg pipeline_feedback.flags |= 25217ec681f3Smrg VK_PIPELINE_CREATION_FEEDBACK_APPLICATION_PIPELINE_CACHE_HIT_BIT_EXT; 25227ec681f3Smrg 25237ec681f3Smrg goto success; 25247ec681f3Smrg } 25257ec681f3Smrg 25267ec681f3Smrg if (pCreateInfo->flags & VK_PIPELINE_CREATE_FAIL_ON_PIPELINE_COMPILE_REQUIRED_BIT_EXT) 25277ec681f3Smrg return VK_PIPELINE_COMPILE_REQUIRED_EXT; 25287ec681f3Smrg 25297ec681f3Smrg /* Otherwise we try to get the NIR shaders (either from the original SPIR-V 25307ec681f3Smrg * shader or the pipeline cache) and compile. 25317ec681f3Smrg */ 25327ec681f3Smrg pipeline->shared_data = 25337ec681f3Smrg v3dv_pipeline_shared_data_new_empty(pipeline_sha1, pipeline, true); 25347ec681f3Smrg 25357ec681f3Smrg pipeline->vs->feedback.flags |= 25367ec681f3Smrg VK_PIPELINE_CREATION_FEEDBACK_VALID_BIT_EXT; 25377ec681f3Smrg if (pipeline->gs) 25387ec681f3Smrg pipeline->gs->feedback.flags |= 25397ec681f3Smrg VK_PIPELINE_CREATION_FEEDBACK_VALID_BIT_EXT; 25407ec681f3Smrg pipeline->fs->feedback.flags |= 25417ec681f3Smrg VK_PIPELINE_CREATION_FEEDBACK_VALID_BIT_EXT; 25427ec681f3Smrg 25437ec681f3Smrg if (!pipeline->vs->nir) 25447ec681f3Smrg pipeline->vs->nir = pipeline_stage_get_nir(pipeline->vs, pipeline, cache); 25457ec681f3Smrg if (pipeline->gs && !pipeline->gs->nir) 25467ec681f3Smrg pipeline->gs->nir = pipeline_stage_get_nir(pipeline->gs, pipeline, cache); 25477ec681f3Smrg if (!pipeline->fs->nir) 25487ec681f3Smrg pipeline->fs->nir = pipeline_stage_get_nir(pipeline->fs, pipeline, cache); 25497ec681f3Smrg 25507ec681f3Smrg /* Linking + pipeline lowerings */ 25517ec681f3Smrg if (pipeline->gs) { 25527ec681f3Smrg link_shaders(pipeline->gs->nir, pipeline->fs->nir); 25537ec681f3Smrg link_shaders(pipeline->vs->nir, pipeline->gs->nir); 25547ec681f3Smrg } else { 25557ec681f3Smrg link_shaders(pipeline->vs->nir, pipeline->fs->nir); 25567ec681f3Smrg } 25577ec681f3Smrg 25587ec681f3Smrg pipeline_lower_nir(pipeline, pipeline->fs, pipeline->layout); 25597ec681f3Smrg lower_fs_io(pipeline->fs->nir); 25607ec681f3Smrg 25617ec681f3Smrg if (pipeline->gs) { 25627ec681f3Smrg pipeline_lower_nir(pipeline, pipeline->gs, pipeline->layout); 25637ec681f3Smrg lower_gs_io(pipeline->gs->nir); 25647ec681f3Smrg } 25657ec681f3Smrg 25667ec681f3Smrg pipeline_lower_nir(pipeline, pipeline->vs, pipeline->layout); 25677ec681f3Smrg lower_vs_io(pipeline->vs->nir); 25687ec681f3Smrg 25697ec681f3Smrg /* Compiling to vir */ 25707ec681f3Smrg VkResult vk_result; 25717ec681f3Smrg 25727ec681f3Smrg /* We should have got all the variants or no variants from the cache */ 25737ec681f3Smrg assert(!pipeline->shared_data->variants[BROADCOM_SHADER_FRAGMENT]); 25747ec681f3Smrg vk_result = pipeline_compile_fragment_shader(pipeline, pAllocator, pCreateInfo); 25757ec681f3Smrg if (vk_result != VK_SUCCESS) 25767ec681f3Smrg return vk_result; 25777ec681f3Smrg 25787ec681f3Smrg assert(!pipeline->shared_data->variants[BROADCOM_SHADER_GEOMETRY] && 25797ec681f3Smrg !pipeline->shared_data->variants[BROADCOM_SHADER_GEOMETRY_BIN]); 25807ec681f3Smrg 25817ec681f3Smrg if (pipeline->gs) { 25827ec681f3Smrg vk_result = 25837ec681f3Smrg pipeline_compile_geometry_shader(pipeline, pAllocator, pCreateInfo); 25847ec681f3Smrg if (vk_result != VK_SUCCESS) 25857ec681f3Smrg return vk_result; 25867ec681f3Smrg } 25877ec681f3Smrg 25887ec681f3Smrg assert(!pipeline->shared_data->variants[BROADCOM_SHADER_VERTEX] && 25897ec681f3Smrg !pipeline->shared_data->variants[BROADCOM_SHADER_VERTEX_BIN]); 25907ec681f3Smrg 25917ec681f3Smrg vk_result = pipeline_compile_vertex_shader(pipeline, pAllocator, pCreateInfo); 25927ec681f3Smrg if (vk_result != VK_SUCCESS) 25937ec681f3Smrg return vk_result; 25947ec681f3Smrg 25957ec681f3Smrg if (!upload_assembly(pipeline)) 25967ec681f3Smrg return VK_ERROR_OUT_OF_DEVICE_MEMORY; 25977ec681f3Smrg 25987ec681f3Smrg v3dv_pipeline_cache_upload_pipeline(pipeline, cache); 25997ec681f3Smrg 26007ec681f3Smrg success: 26017ec681f3Smrg 26027ec681f3Smrg pipeline_feedback.duration = os_time_get_nano() - pipeline_start; 26037ec681f3Smrg write_creation_feedback(pipeline, 26047ec681f3Smrg pCreateInfo->pNext, 26057ec681f3Smrg &pipeline_feedback, 26067ec681f3Smrg pCreateInfo->stageCount, 26077ec681f3Smrg pCreateInfo->pStages); 26087ec681f3Smrg 26097ec681f3Smrg /* Since we have the variants in the pipeline shared data we can now free 26107ec681f3Smrg * the pipeline stages. 26117ec681f3Smrg */ 26127ec681f3Smrg pipeline_free_stages(device, pipeline, pAllocator); 26137ec681f3Smrg 26147ec681f3Smrg pipeline_check_spill_size(pipeline); 26157ec681f3Smrg 26167ec681f3Smrg return compute_vpm_config(pipeline); 26177ec681f3Smrg} 26187ec681f3Smrg 26197ec681f3Smrgstatic VkResult 26207ec681f3Smrgcompute_vpm_config(struct v3dv_pipeline *pipeline) 26217ec681f3Smrg{ 26227ec681f3Smrg struct v3dv_shader_variant *vs_variant = 26237ec681f3Smrg pipeline->shared_data->variants[BROADCOM_SHADER_VERTEX]; 26247ec681f3Smrg struct v3dv_shader_variant *vs_bin_variant = 26257ec681f3Smrg pipeline->shared_data->variants[BROADCOM_SHADER_VERTEX]; 26267ec681f3Smrg struct v3d_vs_prog_data *vs = vs_variant->prog_data.vs; 26277ec681f3Smrg struct v3d_vs_prog_data *vs_bin =vs_bin_variant->prog_data.vs; 26287ec681f3Smrg 26297ec681f3Smrg struct v3d_gs_prog_data *gs = NULL; 26307ec681f3Smrg struct v3d_gs_prog_data *gs_bin = NULL; 26317ec681f3Smrg if (pipeline->has_gs) { 26327ec681f3Smrg struct v3dv_shader_variant *gs_variant = 26337ec681f3Smrg pipeline->shared_data->variants[BROADCOM_SHADER_GEOMETRY]; 26347ec681f3Smrg struct v3dv_shader_variant *gs_bin_variant = 26357ec681f3Smrg pipeline->shared_data->variants[BROADCOM_SHADER_GEOMETRY_BIN]; 26367ec681f3Smrg gs = gs_variant->prog_data.gs; 26377ec681f3Smrg gs_bin = gs_bin_variant->prog_data.gs; 26387ec681f3Smrg } 26397ec681f3Smrg 26407ec681f3Smrg if (!v3d_compute_vpm_config(&pipeline->device->devinfo, 26417ec681f3Smrg vs_bin, vs, gs_bin, gs, 26427ec681f3Smrg &pipeline->vpm_cfg_bin, 26437ec681f3Smrg &pipeline->vpm_cfg)) { 26447ec681f3Smrg return VK_ERROR_OUT_OF_DEVICE_MEMORY; 26457ec681f3Smrg } 26467ec681f3Smrg 26477ec681f3Smrg return VK_SUCCESS; 26487ec681f3Smrg} 26497ec681f3Smrg 26507ec681f3Smrgstatic unsigned 26517ec681f3Smrgv3dv_dynamic_state_mask(VkDynamicState state) 26527ec681f3Smrg{ 26537ec681f3Smrg switch(state) { 26547ec681f3Smrg case VK_DYNAMIC_STATE_VIEWPORT: 26557ec681f3Smrg return V3DV_DYNAMIC_VIEWPORT; 26567ec681f3Smrg case VK_DYNAMIC_STATE_SCISSOR: 26577ec681f3Smrg return V3DV_DYNAMIC_SCISSOR; 26587ec681f3Smrg case VK_DYNAMIC_STATE_STENCIL_COMPARE_MASK: 26597ec681f3Smrg return V3DV_DYNAMIC_STENCIL_COMPARE_MASK; 26607ec681f3Smrg case VK_DYNAMIC_STATE_STENCIL_WRITE_MASK: 26617ec681f3Smrg return V3DV_DYNAMIC_STENCIL_WRITE_MASK; 26627ec681f3Smrg case VK_DYNAMIC_STATE_STENCIL_REFERENCE: 26637ec681f3Smrg return V3DV_DYNAMIC_STENCIL_REFERENCE; 26647ec681f3Smrg case VK_DYNAMIC_STATE_BLEND_CONSTANTS: 26657ec681f3Smrg return V3DV_DYNAMIC_BLEND_CONSTANTS; 26667ec681f3Smrg case VK_DYNAMIC_STATE_DEPTH_BIAS: 26677ec681f3Smrg return V3DV_DYNAMIC_DEPTH_BIAS; 26687ec681f3Smrg case VK_DYNAMIC_STATE_LINE_WIDTH: 26697ec681f3Smrg return V3DV_DYNAMIC_LINE_WIDTH; 26707ec681f3Smrg case VK_DYNAMIC_STATE_COLOR_WRITE_ENABLE_EXT: 26717ec681f3Smrg return V3DV_DYNAMIC_COLOR_WRITE_ENABLE; 26727ec681f3Smrg 26737ec681f3Smrg /* Depth bounds testing is not available in in V3D 4.2 so here we are just 26747ec681f3Smrg * ignoring this dynamic state. We are already asserting at pipeline creation 26757ec681f3Smrg * time that depth bounds testing is not enabled. 26767ec681f3Smrg */ 26777ec681f3Smrg case VK_DYNAMIC_STATE_DEPTH_BOUNDS: 26787ec681f3Smrg return 0; 26797ec681f3Smrg 26807ec681f3Smrg default: 26817ec681f3Smrg unreachable("Unhandled dynamic state"); 26827ec681f3Smrg } 26837ec681f3Smrg} 26847ec681f3Smrg 26857ec681f3Smrgstatic void 26867ec681f3Smrgpipeline_init_dynamic_state( 26877ec681f3Smrg struct v3dv_pipeline *pipeline, 26887ec681f3Smrg const VkPipelineDynamicStateCreateInfo *pDynamicState, 26897ec681f3Smrg const VkPipelineViewportStateCreateInfo *pViewportState, 26907ec681f3Smrg const VkPipelineDepthStencilStateCreateInfo *pDepthStencilState, 26917ec681f3Smrg const VkPipelineColorBlendStateCreateInfo *pColorBlendState, 26927ec681f3Smrg const VkPipelineRasterizationStateCreateInfo *pRasterizationState, 26937ec681f3Smrg const VkPipelineColorWriteCreateInfoEXT *pColorWriteState) 26947ec681f3Smrg{ 26957ec681f3Smrg pipeline->dynamic_state = default_dynamic_state; 26967ec681f3Smrg struct v3dv_dynamic_state *dynamic = &pipeline->dynamic_state; 26977ec681f3Smrg 26987ec681f3Smrg /* Create a mask of enabled dynamic states */ 26997ec681f3Smrg uint32_t dynamic_states = 0; 27007ec681f3Smrg if (pDynamicState) { 27017ec681f3Smrg uint32_t count = pDynamicState->dynamicStateCount; 27027ec681f3Smrg for (uint32_t s = 0; s < count; s++) { 27037ec681f3Smrg dynamic_states |= 27047ec681f3Smrg v3dv_dynamic_state_mask(pDynamicState->pDynamicStates[s]); 27057ec681f3Smrg } 27067ec681f3Smrg } 27077ec681f3Smrg 27087ec681f3Smrg /* For any pipeline states that are not dynamic, set the dynamic state 27097ec681f3Smrg * from the static pipeline state. 27107ec681f3Smrg */ 27117ec681f3Smrg if (pViewportState) { 27127ec681f3Smrg if (!(dynamic_states & V3DV_DYNAMIC_VIEWPORT)) { 27137ec681f3Smrg dynamic->viewport.count = pViewportState->viewportCount; 27147ec681f3Smrg typed_memcpy(dynamic->viewport.viewports, pViewportState->pViewports, 27157ec681f3Smrg pViewportState->viewportCount); 27167ec681f3Smrg 27177ec681f3Smrg for (uint32_t i = 0; i < dynamic->viewport.count; i++) { 27187ec681f3Smrg v3dv_viewport_compute_xform(&dynamic->viewport.viewports[i], 27197ec681f3Smrg dynamic->viewport.scale[i], 27207ec681f3Smrg dynamic->viewport.translate[i]); 27217ec681f3Smrg } 27227ec681f3Smrg } 27237ec681f3Smrg 27247ec681f3Smrg if (!(dynamic_states & V3DV_DYNAMIC_SCISSOR)) { 27257ec681f3Smrg dynamic->scissor.count = pViewportState->scissorCount; 27267ec681f3Smrg typed_memcpy(dynamic->scissor.scissors, pViewportState->pScissors, 27277ec681f3Smrg pViewportState->scissorCount); 27287ec681f3Smrg } 27297ec681f3Smrg } 27307ec681f3Smrg 27317ec681f3Smrg if (pDepthStencilState) { 27327ec681f3Smrg if (!(dynamic_states & V3DV_DYNAMIC_STENCIL_COMPARE_MASK)) { 27337ec681f3Smrg dynamic->stencil_compare_mask.front = 27347ec681f3Smrg pDepthStencilState->front.compareMask; 27357ec681f3Smrg dynamic->stencil_compare_mask.back = 27367ec681f3Smrg pDepthStencilState->back.compareMask; 27377ec681f3Smrg } 27387ec681f3Smrg 27397ec681f3Smrg if (!(dynamic_states & V3DV_DYNAMIC_STENCIL_WRITE_MASK)) { 27407ec681f3Smrg dynamic->stencil_write_mask.front = pDepthStencilState->front.writeMask; 27417ec681f3Smrg dynamic->stencil_write_mask.back = pDepthStencilState->back.writeMask; 27427ec681f3Smrg } 27437ec681f3Smrg 27447ec681f3Smrg if (!(dynamic_states & V3DV_DYNAMIC_STENCIL_REFERENCE)) { 27457ec681f3Smrg dynamic->stencil_reference.front = pDepthStencilState->front.reference; 27467ec681f3Smrg dynamic->stencil_reference.back = pDepthStencilState->back.reference; 27477ec681f3Smrg } 27487ec681f3Smrg } 27497ec681f3Smrg 27507ec681f3Smrg if (pColorBlendState && !(dynamic_states & V3DV_DYNAMIC_BLEND_CONSTANTS)) { 27517ec681f3Smrg memcpy(dynamic->blend_constants, pColorBlendState->blendConstants, 27527ec681f3Smrg sizeof(dynamic->blend_constants)); 27537ec681f3Smrg } 27547ec681f3Smrg 27557ec681f3Smrg if (pRasterizationState) { 27567ec681f3Smrg if (pRasterizationState->depthBiasEnable && 27577ec681f3Smrg !(dynamic_states & V3DV_DYNAMIC_DEPTH_BIAS)) { 27587ec681f3Smrg dynamic->depth_bias.constant_factor = 27597ec681f3Smrg pRasterizationState->depthBiasConstantFactor; 27607ec681f3Smrg dynamic->depth_bias.depth_bias_clamp = 27617ec681f3Smrg pRasterizationState->depthBiasClamp; 27627ec681f3Smrg dynamic->depth_bias.slope_factor = 27637ec681f3Smrg pRasterizationState->depthBiasSlopeFactor; 27647ec681f3Smrg } 27657ec681f3Smrg if (!(dynamic_states & V3DV_DYNAMIC_LINE_WIDTH)) 27667ec681f3Smrg dynamic->line_width = pRasterizationState->lineWidth; 27677ec681f3Smrg } 27687ec681f3Smrg 27697ec681f3Smrg if (pColorWriteState && !(dynamic_states & V3DV_DYNAMIC_COLOR_WRITE_ENABLE)) { 27707ec681f3Smrg dynamic->color_write_enable = 0; 27717ec681f3Smrg for (uint32_t i = 0; i < pColorWriteState->attachmentCount; i++) 27727ec681f3Smrg dynamic->color_write_enable |= pColorWriteState->pColorWriteEnables[i] ? (0xfu << (i * 4)) : 0; 27737ec681f3Smrg } 27747ec681f3Smrg 27757ec681f3Smrg pipeline->dynamic_state.mask = dynamic_states; 27767ec681f3Smrg} 27777ec681f3Smrg 27787ec681f3Smrgstatic bool 27797ec681f3Smrgstencil_op_is_no_op(const VkStencilOpState *stencil) 27807ec681f3Smrg{ 27817ec681f3Smrg return stencil->depthFailOp == VK_STENCIL_OP_KEEP && 27827ec681f3Smrg stencil->compareOp == VK_COMPARE_OP_ALWAYS; 27837ec681f3Smrg} 27847ec681f3Smrg 27857ec681f3Smrgstatic void 27867ec681f3Smrgenable_depth_bias(struct v3dv_pipeline *pipeline, 27877ec681f3Smrg const VkPipelineRasterizationStateCreateInfo *rs_info) 27887ec681f3Smrg{ 27897ec681f3Smrg pipeline->depth_bias.enabled = false; 27907ec681f3Smrg pipeline->depth_bias.is_z16 = false; 27917ec681f3Smrg 27927ec681f3Smrg if (!rs_info || !rs_info->depthBiasEnable) 27937ec681f3Smrg return; 27947ec681f3Smrg 27957ec681f3Smrg /* Check the depth/stencil attachment description for the subpass used with 27967ec681f3Smrg * this pipeline. 27977ec681f3Smrg */ 27987ec681f3Smrg assert(pipeline->pass && pipeline->subpass); 27997ec681f3Smrg struct v3dv_render_pass *pass = pipeline->pass; 28007ec681f3Smrg struct v3dv_subpass *subpass = pipeline->subpass; 28017ec681f3Smrg 28027ec681f3Smrg if (subpass->ds_attachment.attachment == VK_ATTACHMENT_UNUSED) 28037ec681f3Smrg return; 28047ec681f3Smrg 28057ec681f3Smrg assert(subpass->ds_attachment.attachment < pass->attachment_count); 28067ec681f3Smrg struct v3dv_render_pass_attachment *att = 28077ec681f3Smrg &pass->attachments[subpass->ds_attachment.attachment]; 28087ec681f3Smrg 28097ec681f3Smrg if (att->desc.format == VK_FORMAT_D16_UNORM) 28107ec681f3Smrg pipeline->depth_bias.is_z16 = true; 28117ec681f3Smrg 28127ec681f3Smrg pipeline->depth_bias.enabled = true; 28137ec681f3Smrg} 28147ec681f3Smrg 28157ec681f3Smrgstatic void 28167ec681f3Smrgpipeline_set_ez_state(struct v3dv_pipeline *pipeline, 28177ec681f3Smrg const VkPipelineDepthStencilStateCreateInfo *ds_info) 28187ec681f3Smrg{ 28197ec681f3Smrg if (!ds_info || !ds_info->depthTestEnable) { 28207ec681f3Smrg pipeline->ez_state = V3D_EZ_DISABLED; 28217ec681f3Smrg return; 28227ec681f3Smrg } 28237ec681f3Smrg 28247ec681f3Smrg switch (ds_info->depthCompareOp) { 28257ec681f3Smrg case VK_COMPARE_OP_LESS: 28267ec681f3Smrg case VK_COMPARE_OP_LESS_OR_EQUAL: 28277ec681f3Smrg pipeline->ez_state = V3D_EZ_LT_LE; 28287ec681f3Smrg break; 28297ec681f3Smrg case VK_COMPARE_OP_GREATER: 28307ec681f3Smrg case VK_COMPARE_OP_GREATER_OR_EQUAL: 28317ec681f3Smrg pipeline->ez_state = V3D_EZ_GT_GE; 28327ec681f3Smrg break; 28337ec681f3Smrg case VK_COMPARE_OP_NEVER: 28347ec681f3Smrg case VK_COMPARE_OP_EQUAL: 28357ec681f3Smrg pipeline->ez_state = V3D_EZ_UNDECIDED; 28367ec681f3Smrg break; 28377ec681f3Smrg default: 28387ec681f3Smrg pipeline->ez_state = V3D_EZ_DISABLED; 28397ec681f3Smrg break; 28407ec681f3Smrg } 28417ec681f3Smrg 28427ec681f3Smrg /* If stencil is enabled and is not a no-op, we need to disable EZ */ 28437ec681f3Smrg if (ds_info->stencilTestEnable && 28447ec681f3Smrg (!stencil_op_is_no_op(&ds_info->front) || 28457ec681f3Smrg !stencil_op_is_no_op(&ds_info->back))) { 28467ec681f3Smrg pipeline->ez_state = V3D_EZ_DISABLED; 28477ec681f3Smrg } 28487ec681f3Smrg} 28497ec681f3Smrg 28507ec681f3Smrgstatic bool 28517ec681f3Smrgpipeline_has_integer_vertex_attrib(struct v3dv_pipeline *pipeline) 28527ec681f3Smrg{ 28537ec681f3Smrg for (uint8_t i = 0; i < pipeline->va_count; i++) { 28547ec681f3Smrg if (vk_format_is_int(pipeline->va[i].vk_format)) 28557ec681f3Smrg return true; 28567ec681f3Smrg } 28577ec681f3Smrg return false; 28587ec681f3Smrg} 28597ec681f3Smrg 28607ec681f3Smrg/* @pipeline can be NULL. We assume in that case that all the attributes have 28617ec681f3Smrg * a float format (we only create an all-float BO once and we reuse it with 28627ec681f3Smrg * all float pipelines), otherwise we look at the actual type of each 28637ec681f3Smrg * attribute used with the specific pipeline passed in. 28647ec681f3Smrg */ 28657ec681f3Smrgstruct v3dv_bo * 28667ec681f3Smrgv3dv_pipeline_create_default_attribute_values(struct v3dv_device *device, 28677ec681f3Smrg struct v3dv_pipeline *pipeline) 28687ec681f3Smrg{ 28697ec681f3Smrg uint32_t size = MAX_VERTEX_ATTRIBS * sizeof(float) * 4; 28707ec681f3Smrg struct v3dv_bo *bo; 28717ec681f3Smrg 28727ec681f3Smrg bo = v3dv_bo_alloc(device, size, "default_vi_attributes", true); 28737ec681f3Smrg 28747ec681f3Smrg if (!bo) { 28757ec681f3Smrg fprintf(stderr, "failed to allocate memory for the default " 28767ec681f3Smrg "attribute values\n"); 28777ec681f3Smrg return NULL; 28787ec681f3Smrg } 28797ec681f3Smrg 28807ec681f3Smrg bool ok = v3dv_bo_map(device, bo, size); 28817ec681f3Smrg if (!ok) { 28827ec681f3Smrg fprintf(stderr, "failed to map default attribute values buffer\n"); 28837ec681f3Smrg return false; 28847ec681f3Smrg } 28857ec681f3Smrg 28867ec681f3Smrg uint32_t *attrs = bo->map; 28877ec681f3Smrg uint8_t va_count = pipeline != NULL ? pipeline->va_count : 0; 28887ec681f3Smrg for (int i = 0; i < MAX_VERTEX_ATTRIBS; i++) { 28897ec681f3Smrg attrs[i * 4 + 0] = 0; 28907ec681f3Smrg attrs[i * 4 + 1] = 0; 28917ec681f3Smrg attrs[i * 4 + 2] = 0; 28927ec681f3Smrg VkFormat attr_format = 28937ec681f3Smrg pipeline != NULL ? pipeline->va[i].vk_format : VK_FORMAT_UNDEFINED; 28947ec681f3Smrg if (i < va_count && vk_format_is_int(attr_format)) { 28957ec681f3Smrg attrs[i * 4 + 3] = 1; 28967ec681f3Smrg } else { 28977ec681f3Smrg attrs[i * 4 + 3] = fui(1.0); 28987ec681f3Smrg } 28997ec681f3Smrg } 29007ec681f3Smrg 29017ec681f3Smrg v3dv_bo_unmap(device, bo); 29027ec681f3Smrg 29037ec681f3Smrg return bo; 29047ec681f3Smrg} 29057ec681f3Smrg 29067ec681f3Smrgstatic void 29077ec681f3Smrgpipeline_set_sample_mask(struct v3dv_pipeline *pipeline, 29087ec681f3Smrg const VkPipelineMultisampleStateCreateInfo *ms_info) 29097ec681f3Smrg{ 29107ec681f3Smrg pipeline->sample_mask = (1 << V3D_MAX_SAMPLES) - 1; 29117ec681f3Smrg 29127ec681f3Smrg /* Ignore pSampleMask if we are not enabling multisampling. The hardware 29137ec681f3Smrg * requires this to be 0xf or 0x0 if using a single sample. 29147ec681f3Smrg */ 29157ec681f3Smrg if (ms_info && ms_info->pSampleMask && 29167ec681f3Smrg ms_info->rasterizationSamples > VK_SAMPLE_COUNT_1_BIT) { 29177ec681f3Smrg pipeline->sample_mask &= ms_info->pSampleMask[0]; 29187ec681f3Smrg } 29197ec681f3Smrg} 29207ec681f3Smrg 29217ec681f3Smrgstatic void 29227ec681f3Smrgpipeline_set_sample_rate_shading(struct v3dv_pipeline *pipeline, 29237ec681f3Smrg const VkPipelineMultisampleStateCreateInfo *ms_info) 29247ec681f3Smrg{ 29257ec681f3Smrg pipeline->sample_rate_shading = 29267ec681f3Smrg ms_info && ms_info->rasterizationSamples > VK_SAMPLE_COUNT_1_BIT && 29277ec681f3Smrg ms_info->sampleShadingEnable; 29287ec681f3Smrg} 29297ec681f3Smrg 29307ec681f3Smrgstatic VkResult 29317ec681f3Smrgpipeline_init(struct v3dv_pipeline *pipeline, 29327ec681f3Smrg struct v3dv_device *device, 29337ec681f3Smrg struct v3dv_pipeline_cache *cache, 29347ec681f3Smrg const VkGraphicsPipelineCreateInfo *pCreateInfo, 29357ec681f3Smrg const VkAllocationCallbacks *pAllocator) 29367ec681f3Smrg{ 29377ec681f3Smrg VkResult result = VK_SUCCESS; 29387ec681f3Smrg 29397ec681f3Smrg pipeline->device = device; 29407ec681f3Smrg 29417ec681f3Smrg V3DV_FROM_HANDLE(v3dv_pipeline_layout, layout, pCreateInfo->layout); 29427ec681f3Smrg pipeline->layout = layout; 29437ec681f3Smrg 29447ec681f3Smrg V3DV_FROM_HANDLE(v3dv_render_pass, render_pass, pCreateInfo->renderPass); 29457ec681f3Smrg assert(pCreateInfo->subpass < render_pass->subpass_count); 29467ec681f3Smrg pipeline->pass = render_pass; 29477ec681f3Smrg pipeline->subpass = &render_pass->subpasses[pCreateInfo->subpass]; 29487ec681f3Smrg 29497ec681f3Smrg const VkPipelineInputAssemblyStateCreateInfo *ia_info = 29507ec681f3Smrg pCreateInfo->pInputAssemblyState; 29517ec681f3Smrg pipeline->topology = vk_to_pipe_prim_type[ia_info->topology]; 29527ec681f3Smrg 29537ec681f3Smrg /* If rasterization is not enabled, various CreateInfo structs must be 29547ec681f3Smrg * ignored. 29557ec681f3Smrg */ 29567ec681f3Smrg const bool raster_enabled = 29577ec681f3Smrg !pCreateInfo->pRasterizationState->rasterizerDiscardEnable; 29587ec681f3Smrg 29597ec681f3Smrg const VkPipelineViewportStateCreateInfo *vp_info = 29607ec681f3Smrg raster_enabled ? pCreateInfo->pViewportState : NULL; 29617ec681f3Smrg 29627ec681f3Smrg const VkPipelineDepthStencilStateCreateInfo *ds_info = 29637ec681f3Smrg raster_enabled ? pCreateInfo->pDepthStencilState : NULL; 29647ec681f3Smrg 29657ec681f3Smrg const VkPipelineRasterizationStateCreateInfo *rs_info = 29667ec681f3Smrg raster_enabled ? pCreateInfo->pRasterizationState : NULL; 29677ec681f3Smrg 29687ec681f3Smrg const VkPipelineRasterizationProvokingVertexStateCreateInfoEXT *pv_info = 29697ec681f3Smrg rs_info ? vk_find_struct_const( 29707ec681f3Smrg rs_info->pNext, 29717ec681f3Smrg PIPELINE_RASTERIZATION_PROVOKING_VERTEX_STATE_CREATE_INFO_EXT) : 29727ec681f3Smrg NULL; 29737ec681f3Smrg 29747ec681f3Smrg const VkPipelineColorBlendStateCreateInfo *cb_info = 29757ec681f3Smrg raster_enabled ? pCreateInfo->pColorBlendState : NULL; 29767ec681f3Smrg 29777ec681f3Smrg const VkPipelineMultisampleStateCreateInfo *ms_info = 29787ec681f3Smrg raster_enabled ? pCreateInfo->pMultisampleState : NULL; 29797ec681f3Smrg 29807ec681f3Smrg const VkPipelineColorWriteCreateInfoEXT *cw_info = 29817ec681f3Smrg cb_info ? vk_find_struct_const(cb_info->pNext, 29827ec681f3Smrg PIPELINE_COLOR_WRITE_CREATE_INFO_EXT) : 29837ec681f3Smrg NULL; 29847ec681f3Smrg 29857ec681f3Smrg pipeline_init_dynamic_state(pipeline, 29867ec681f3Smrg pCreateInfo->pDynamicState, 29877ec681f3Smrg vp_info, ds_info, cb_info, rs_info, cw_info); 29887ec681f3Smrg 29897ec681f3Smrg /* V3D 4.2 doesn't support depth bounds testing so we don't advertise that 29907ec681f3Smrg * feature and it shouldn't be used by any pipeline. 29917ec681f3Smrg */ 29927ec681f3Smrg assert(!ds_info || !ds_info->depthBoundsTestEnable); 29937ec681f3Smrg 29947ec681f3Smrg v3dv_X(device, pipeline_pack_state)(pipeline, cb_info, ds_info, 29957ec681f3Smrg rs_info, pv_info, ms_info); 29967ec681f3Smrg 29977ec681f3Smrg pipeline_set_ez_state(pipeline, ds_info); 29987ec681f3Smrg enable_depth_bias(pipeline, rs_info); 29997ec681f3Smrg pipeline_set_sample_mask(pipeline, ms_info); 30007ec681f3Smrg pipeline_set_sample_rate_shading(pipeline, ms_info); 30017ec681f3Smrg 30027ec681f3Smrg pipeline->primitive_restart = 30037ec681f3Smrg pCreateInfo->pInputAssemblyState->primitiveRestartEnable; 30047ec681f3Smrg 30057ec681f3Smrg result = pipeline_compile_graphics(pipeline, cache, pCreateInfo, pAllocator); 30067ec681f3Smrg 30077ec681f3Smrg if (result != VK_SUCCESS) { 30087ec681f3Smrg /* Caller would already destroy the pipeline, and we didn't allocate any 30097ec681f3Smrg * extra info. We don't need to do anything else. 30107ec681f3Smrg */ 30117ec681f3Smrg return result; 30127ec681f3Smrg } 30137ec681f3Smrg 30147ec681f3Smrg const VkPipelineVertexInputStateCreateInfo *vi_info = 30157ec681f3Smrg pCreateInfo->pVertexInputState; 30167ec681f3Smrg 30177ec681f3Smrg const VkPipelineVertexInputDivisorStateCreateInfoEXT *vd_info = 30187ec681f3Smrg vk_find_struct_const(vi_info->pNext, 30197ec681f3Smrg PIPELINE_VERTEX_INPUT_DIVISOR_STATE_CREATE_INFO_EXT); 30207ec681f3Smrg 30217ec681f3Smrg v3dv_X(device, pipeline_pack_compile_state)(pipeline, vi_info, vd_info); 30227ec681f3Smrg 30237ec681f3Smrg if (pipeline_has_integer_vertex_attrib(pipeline)) { 30247ec681f3Smrg pipeline->default_attribute_values = 30257ec681f3Smrg v3dv_pipeline_create_default_attribute_values(pipeline->device, pipeline); 30267ec681f3Smrg if (!pipeline->default_attribute_values) 30277ec681f3Smrg return VK_ERROR_OUT_OF_DEVICE_MEMORY; 30287ec681f3Smrg } else { 30297ec681f3Smrg pipeline->default_attribute_values = NULL; 30307ec681f3Smrg } 30317ec681f3Smrg 30327ec681f3Smrg return result; 30337ec681f3Smrg} 30347ec681f3Smrg 30357ec681f3Smrgstatic VkResult 30367ec681f3Smrggraphics_pipeline_create(VkDevice _device, 30377ec681f3Smrg VkPipelineCache _cache, 30387ec681f3Smrg const VkGraphicsPipelineCreateInfo *pCreateInfo, 30397ec681f3Smrg const VkAllocationCallbacks *pAllocator, 30407ec681f3Smrg VkPipeline *pPipeline) 30417ec681f3Smrg{ 30427ec681f3Smrg V3DV_FROM_HANDLE(v3dv_device, device, _device); 30437ec681f3Smrg V3DV_FROM_HANDLE(v3dv_pipeline_cache, cache, _cache); 30447ec681f3Smrg 30457ec681f3Smrg struct v3dv_pipeline *pipeline; 30467ec681f3Smrg VkResult result; 30477ec681f3Smrg 30487ec681f3Smrg /* Use the default pipeline cache if none is specified */ 30497ec681f3Smrg if (cache == NULL && device->instance->default_pipeline_cache_enabled) 30507ec681f3Smrg cache = &device->default_pipeline_cache; 30517ec681f3Smrg 30527ec681f3Smrg pipeline = vk_object_zalloc(&device->vk, pAllocator, sizeof(*pipeline), 30537ec681f3Smrg VK_OBJECT_TYPE_PIPELINE); 30547ec681f3Smrg 30557ec681f3Smrg if (pipeline == NULL) 30567ec681f3Smrg return vk_error(device, VK_ERROR_OUT_OF_HOST_MEMORY); 30577ec681f3Smrg 30587ec681f3Smrg result = pipeline_init(pipeline, device, cache, 30597ec681f3Smrg pCreateInfo, 30607ec681f3Smrg pAllocator); 30617ec681f3Smrg 30627ec681f3Smrg if (result != VK_SUCCESS) { 30637ec681f3Smrg v3dv_destroy_pipeline(pipeline, device, pAllocator); 30647ec681f3Smrg if (result == VK_PIPELINE_COMPILE_REQUIRED_EXT) 30657ec681f3Smrg *pPipeline = VK_NULL_HANDLE; 30667ec681f3Smrg return result; 30677ec681f3Smrg } 30687ec681f3Smrg 30697ec681f3Smrg *pPipeline = v3dv_pipeline_to_handle(pipeline); 30707ec681f3Smrg 30717ec681f3Smrg return VK_SUCCESS; 30727ec681f3Smrg} 30737ec681f3Smrg 30747ec681f3SmrgVKAPI_ATTR VkResult VKAPI_CALL 30757ec681f3Smrgv3dv_CreateGraphicsPipelines(VkDevice _device, 30767ec681f3Smrg VkPipelineCache pipelineCache, 30777ec681f3Smrg uint32_t count, 30787ec681f3Smrg const VkGraphicsPipelineCreateInfo *pCreateInfos, 30797ec681f3Smrg const VkAllocationCallbacks *pAllocator, 30807ec681f3Smrg VkPipeline *pPipelines) 30817ec681f3Smrg{ 30827ec681f3Smrg V3DV_FROM_HANDLE(v3dv_device, device, _device); 30837ec681f3Smrg VkResult result = VK_SUCCESS; 30847ec681f3Smrg 30857ec681f3Smrg if (unlikely(V3D_DEBUG & V3D_DEBUG_SHADERS)) 30867ec681f3Smrg mtx_lock(&device->pdevice->mutex); 30877ec681f3Smrg 30887ec681f3Smrg uint32_t i = 0; 30897ec681f3Smrg for (; i < count; i++) { 30907ec681f3Smrg VkResult local_result; 30917ec681f3Smrg 30927ec681f3Smrg local_result = graphics_pipeline_create(_device, 30937ec681f3Smrg pipelineCache, 30947ec681f3Smrg &pCreateInfos[i], 30957ec681f3Smrg pAllocator, 30967ec681f3Smrg &pPipelines[i]); 30977ec681f3Smrg 30987ec681f3Smrg if (local_result != VK_SUCCESS) { 30997ec681f3Smrg result = local_result; 31007ec681f3Smrg pPipelines[i] = VK_NULL_HANDLE; 31017ec681f3Smrg 31027ec681f3Smrg if (pCreateInfos[i].flags & 31037ec681f3Smrg VK_PIPELINE_CREATE_EARLY_RETURN_ON_FAILURE_BIT_EXT) 31047ec681f3Smrg break; 31057ec681f3Smrg } 31067ec681f3Smrg } 31077ec681f3Smrg 31087ec681f3Smrg for (; i < count; i++) 31097ec681f3Smrg pPipelines[i] = VK_NULL_HANDLE; 31107ec681f3Smrg 31117ec681f3Smrg if (unlikely(V3D_DEBUG & V3D_DEBUG_SHADERS)) 31127ec681f3Smrg mtx_unlock(&device->pdevice->mutex); 31137ec681f3Smrg 31147ec681f3Smrg return result; 31157ec681f3Smrg} 31167ec681f3Smrg 31177ec681f3Smrgstatic void 31187ec681f3Smrgshared_type_info(const struct glsl_type *type, unsigned *size, unsigned *align) 31197ec681f3Smrg{ 31207ec681f3Smrg assert(glsl_type_is_vector_or_scalar(type)); 31217ec681f3Smrg 31227ec681f3Smrg uint32_t comp_size = glsl_type_is_boolean(type) 31237ec681f3Smrg ? 4 : glsl_get_bit_size(type) / 8; 31247ec681f3Smrg unsigned length = glsl_get_vector_elements(type); 31257ec681f3Smrg *size = comp_size * length, 31267ec681f3Smrg *align = comp_size * (length == 3 ? 4 : length); 31277ec681f3Smrg} 31287ec681f3Smrg 31297ec681f3Smrgstatic void 31307ec681f3Smrglower_cs_shared(struct nir_shader *nir) 31317ec681f3Smrg{ 31327ec681f3Smrg NIR_PASS_V(nir, nir_lower_vars_to_explicit_types, 31337ec681f3Smrg nir_var_mem_shared, shared_type_info); 31347ec681f3Smrg NIR_PASS_V(nir, nir_lower_explicit_io, 31357ec681f3Smrg nir_var_mem_shared, nir_address_format_32bit_offset); 31367ec681f3Smrg} 31377ec681f3Smrg 31387ec681f3Smrgstatic VkResult 31397ec681f3Smrgpipeline_compile_compute(struct v3dv_pipeline *pipeline, 31407ec681f3Smrg struct v3dv_pipeline_cache *cache, 31417ec681f3Smrg const VkComputePipelineCreateInfo *info, 31427ec681f3Smrg const VkAllocationCallbacks *alloc) 31437ec681f3Smrg{ 31447ec681f3Smrg VkPipelineCreationFeedbackEXT pipeline_feedback = { 31457ec681f3Smrg .flags = VK_PIPELINE_CREATION_FEEDBACK_VALID_BIT_EXT, 31467ec681f3Smrg }; 31477ec681f3Smrg int64_t pipeline_start = os_time_get_nano(); 31487ec681f3Smrg 31497ec681f3Smrg struct v3dv_device *device = pipeline->device; 31507ec681f3Smrg struct v3dv_physical_device *physical_device = 31517ec681f3Smrg &device->instance->physicalDevice; 31527ec681f3Smrg 31537ec681f3Smrg const VkPipelineShaderStageCreateInfo *sinfo = &info->stage; 31547ec681f3Smrg gl_shader_stage stage = vk_to_mesa_shader_stage(sinfo->stage); 31557ec681f3Smrg 31567ec681f3Smrg struct v3dv_pipeline_stage *p_stage = 31577ec681f3Smrg vk_zalloc2(&device->vk.alloc, alloc, sizeof(*p_stage), 8, 31587ec681f3Smrg VK_SYSTEM_ALLOCATION_SCOPE_OBJECT); 31597ec681f3Smrg if (!p_stage) 31607ec681f3Smrg return VK_ERROR_OUT_OF_HOST_MEMORY; 31617ec681f3Smrg 31627ec681f3Smrg p_stage->program_id = p_atomic_inc_return(&physical_device->next_program_id); 31637ec681f3Smrg p_stage->pipeline = pipeline; 31647ec681f3Smrg p_stage->stage = gl_shader_stage_to_broadcom(stage); 31657ec681f3Smrg p_stage->entrypoint = sinfo->pName; 31667ec681f3Smrg p_stage->module = vk_shader_module_from_handle(sinfo->module); 31677ec681f3Smrg p_stage->spec_info = sinfo->pSpecializationInfo; 31687ec681f3Smrg p_stage->feedback = (VkPipelineCreationFeedbackEXT) { 0 }; 31697ec681f3Smrg 31707ec681f3Smrg pipeline_hash_shader(p_stage->module, 31717ec681f3Smrg p_stage->entrypoint, 31727ec681f3Smrg stage, 31737ec681f3Smrg p_stage->spec_info, 31747ec681f3Smrg p_stage->shader_sha1); 31757ec681f3Smrg 31767ec681f3Smrg /* We try to get directly the variant first from the cache */ 31777ec681f3Smrg p_stage->nir = NULL; 31787ec681f3Smrg 31797ec681f3Smrg pipeline->cs = p_stage; 31807ec681f3Smrg pipeline->active_stages |= sinfo->stage; 31817ec681f3Smrg 31827ec681f3Smrg struct v3dv_pipeline_key pipeline_key; 31837ec681f3Smrg pipeline_populate_compute_key(pipeline, &pipeline_key, info); 31847ec681f3Smrg unsigned char pipeline_sha1[20]; 31857ec681f3Smrg pipeline_hash_compute(pipeline, &pipeline_key, pipeline_sha1); 31867ec681f3Smrg 31877ec681f3Smrg bool cache_hit = false; 31887ec681f3Smrg pipeline->shared_data = 31897ec681f3Smrg v3dv_pipeline_cache_search_for_pipeline(cache, pipeline_sha1, &cache_hit); 31907ec681f3Smrg 31917ec681f3Smrg if (pipeline->shared_data != NULL) { 31927ec681f3Smrg assert(pipeline->shared_data->variants[BROADCOM_SHADER_COMPUTE]); 31937ec681f3Smrg if (cache_hit && cache != &pipeline->device->default_pipeline_cache) 31947ec681f3Smrg pipeline_feedback.flags |= 31957ec681f3Smrg VK_PIPELINE_CREATION_FEEDBACK_APPLICATION_PIPELINE_CACHE_HIT_BIT_EXT; 31967ec681f3Smrg 31977ec681f3Smrg goto success; 31987ec681f3Smrg } 31997ec681f3Smrg 32007ec681f3Smrg if (info->flags & VK_PIPELINE_CREATE_FAIL_ON_PIPELINE_COMPILE_REQUIRED_BIT_EXT) 32017ec681f3Smrg return VK_PIPELINE_COMPILE_REQUIRED_EXT; 32027ec681f3Smrg 32037ec681f3Smrg pipeline->shared_data = v3dv_pipeline_shared_data_new_empty(pipeline_sha1, 32047ec681f3Smrg pipeline, 32057ec681f3Smrg false); 32067ec681f3Smrg 32077ec681f3Smrg p_stage->feedback.flags |= VK_PIPELINE_CREATION_FEEDBACK_VALID_BIT_EXT; 32087ec681f3Smrg 32097ec681f3Smrg /* If not found on cache, compile it */ 32107ec681f3Smrg p_stage->nir = pipeline_stage_get_nir(p_stage, pipeline, cache); 32117ec681f3Smrg assert(p_stage->nir); 32127ec681f3Smrg 32137ec681f3Smrg st_nir_opts(p_stage->nir); 32147ec681f3Smrg pipeline_lower_nir(pipeline, p_stage, pipeline->layout); 32157ec681f3Smrg lower_cs_shared(p_stage->nir); 32167ec681f3Smrg 32177ec681f3Smrg VkResult result = VK_SUCCESS; 32187ec681f3Smrg 32197ec681f3Smrg struct v3d_key key; 32207ec681f3Smrg memset(&key, 0, sizeof(key)); 32217ec681f3Smrg pipeline_populate_v3d_key(&key, p_stage, 0, 32227ec681f3Smrg pipeline->device->features.robustBufferAccess); 32237ec681f3Smrg pipeline->shared_data->variants[BROADCOM_SHADER_COMPUTE] = 32247ec681f3Smrg pipeline_compile_shader_variant(p_stage, &key, sizeof(key), 32257ec681f3Smrg alloc, &result); 32267ec681f3Smrg 32277ec681f3Smrg if (result != VK_SUCCESS) 32287ec681f3Smrg return result; 32297ec681f3Smrg 32307ec681f3Smrg if (!upload_assembly(pipeline)) 32317ec681f3Smrg return VK_ERROR_OUT_OF_DEVICE_MEMORY; 32327ec681f3Smrg 32337ec681f3Smrg v3dv_pipeline_cache_upload_pipeline(pipeline, cache); 32347ec681f3Smrg 32357ec681f3Smrgsuccess: 32367ec681f3Smrg 32377ec681f3Smrg pipeline_feedback.duration = os_time_get_nano() - pipeline_start; 32387ec681f3Smrg write_creation_feedback(pipeline, 32397ec681f3Smrg info->pNext, 32407ec681f3Smrg &pipeline_feedback, 32417ec681f3Smrg 1, 32427ec681f3Smrg &info->stage); 32437ec681f3Smrg 32447ec681f3Smrg /* As we got the variants in pipeline->shared_data, after compiling we 32457ec681f3Smrg * don't need the pipeline_stages 32467ec681f3Smrg */ 32477ec681f3Smrg pipeline_free_stages(device, pipeline, alloc); 32487ec681f3Smrg 32497ec681f3Smrg pipeline_check_spill_size(pipeline); 32507ec681f3Smrg 32517ec681f3Smrg return VK_SUCCESS; 32527ec681f3Smrg} 32537ec681f3Smrg 32547ec681f3Smrgstatic VkResult 32557ec681f3Smrgcompute_pipeline_init(struct v3dv_pipeline *pipeline, 32567ec681f3Smrg struct v3dv_device *device, 32577ec681f3Smrg struct v3dv_pipeline_cache *cache, 32587ec681f3Smrg const VkComputePipelineCreateInfo *info, 32597ec681f3Smrg const VkAllocationCallbacks *alloc) 32607ec681f3Smrg{ 32617ec681f3Smrg V3DV_FROM_HANDLE(v3dv_pipeline_layout, layout, info->layout); 32627ec681f3Smrg 32637ec681f3Smrg pipeline->device = device; 32647ec681f3Smrg pipeline->layout = layout; 32657ec681f3Smrg 32667ec681f3Smrg VkResult result = pipeline_compile_compute(pipeline, cache, info, alloc); 32677ec681f3Smrg 32687ec681f3Smrg return result; 32697ec681f3Smrg} 32707ec681f3Smrg 32717ec681f3Smrgstatic VkResult 32727ec681f3Smrgcompute_pipeline_create(VkDevice _device, 32737ec681f3Smrg VkPipelineCache _cache, 32747ec681f3Smrg const VkComputePipelineCreateInfo *pCreateInfo, 32757ec681f3Smrg const VkAllocationCallbacks *pAllocator, 32767ec681f3Smrg VkPipeline *pPipeline) 32777ec681f3Smrg{ 32787ec681f3Smrg V3DV_FROM_HANDLE(v3dv_device, device, _device); 32797ec681f3Smrg V3DV_FROM_HANDLE(v3dv_pipeline_cache, cache, _cache); 32807ec681f3Smrg 32817ec681f3Smrg struct v3dv_pipeline *pipeline; 32827ec681f3Smrg VkResult result; 32837ec681f3Smrg 32847ec681f3Smrg /* Use the default pipeline cache if none is specified */ 32857ec681f3Smrg if (cache == NULL && device->instance->default_pipeline_cache_enabled) 32867ec681f3Smrg cache = &device->default_pipeline_cache; 32877ec681f3Smrg 32887ec681f3Smrg pipeline = vk_object_zalloc(&device->vk, pAllocator, sizeof(*pipeline), 32897ec681f3Smrg VK_OBJECT_TYPE_PIPELINE); 32907ec681f3Smrg if (pipeline == NULL) 32917ec681f3Smrg return vk_error(device, VK_ERROR_OUT_OF_HOST_MEMORY); 32927ec681f3Smrg 32937ec681f3Smrg result = compute_pipeline_init(pipeline, device, cache, 32947ec681f3Smrg pCreateInfo, pAllocator); 32957ec681f3Smrg if (result != VK_SUCCESS) { 32967ec681f3Smrg v3dv_destroy_pipeline(pipeline, device, pAllocator); 32977ec681f3Smrg if (result == VK_PIPELINE_COMPILE_REQUIRED_EXT) 32987ec681f3Smrg *pPipeline = VK_NULL_HANDLE; 32997ec681f3Smrg return result; 33007ec681f3Smrg } 33017ec681f3Smrg 33027ec681f3Smrg *pPipeline = v3dv_pipeline_to_handle(pipeline); 33037ec681f3Smrg 33047ec681f3Smrg return VK_SUCCESS; 33057ec681f3Smrg} 33067ec681f3Smrg 33077ec681f3SmrgVKAPI_ATTR VkResult VKAPI_CALL 33087ec681f3Smrgv3dv_CreateComputePipelines(VkDevice _device, 33097ec681f3Smrg VkPipelineCache pipelineCache, 33107ec681f3Smrg uint32_t createInfoCount, 33117ec681f3Smrg const VkComputePipelineCreateInfo *pCreateInfos, 33127ec681f3Smrg const VkAllocationCallbacks *pAllocator, 33137ec681f3Smrg VkPipeline *pPipelines) 33147ec681f3Smrg{ 33157ec681f3Smrg V3DV_FROM_HANDLE(v3dv_device, device, _device); 33167ec681f3Smrg VkResult result = VK_SUCCESS; 33177ec681f3Smrg 33187ec681f3Smrg if (unlikely(V3D_DEBUG & V3D_DEBUG_SHADERS)) 33197ec681f3Smrg mtx_lock(&device->pdevice->mutex); 33207ec681f3Smrg 33217ec681f3Smrg uint32_t i = 0; 33227ec681f3Smrg for (; i < createInfoCount; i++) { 33237ec681f3Smrg VkResult local_result; 33247ec681f3Smrg local_result = compute_pipeline_create(_device, 33257ec681f3Smrg pipelineCache, 33267ec681f3Smrg &pCreateInfos[i], 33277ec681f3Smrg pAllocator, 33287ec681f3Smrg &pPipelines[i]); 33297ec681f3Smrg 33307ec681f3Smrg if (local_result != VK_SUCCESS) { 33317ec681f3Smrg result = local_result; 33327ec681f3Smrg pPipelines[i] = VK_NULL_HANDLE; 33337ec681f3Smrg 33347ec681f3Smrg if (pCreateInfos[i].flags & 33357ec681f3Smrg VK_PIPELINE_CREATE_EARLY_RETURN_ON_FAILURE_BIT_EXT) 33367ec681f3Smrg break; 33377ec681f3Smrg } 33387ec681f3Smrg } 33397ec681f3Smrg 33407ec681f3Smrg for (; i < createInfoCount; i++) 33417ec681f3Smrg pPipelines[i] = VK_NULL_HANDLE; 33427ec681f3Smrg 33437ec681f3Smrg if (unlikely(V3D_DEBUG & V3D_DEBUG_SHADERS)) 33447ec681f3Smrg mtx_unlock(&device->pdevice->mutex); 33457ec681f3Smrg 33467ec681f3Smrg return result; 33477ec681f3Smrg} 3348