101e04c3fSmrg/* 201e04c3fSmrg * Copyright © 2016 Red Hat. 301e04c3fSmrg * Copyright © 2016 Bas Nieuwenhuizen 401e04c3fSmrg * 501e04c3fSmrg * Permission is hereby granted, free of charge, to any person obtaining a 601e04c3fSmrg * copy of this software and associated documentation files (the "Software"), 701e04c3fSmrg * to deal in the Software without restriction, including without limitation 801e04c3fSmrg * the rights to use, copy, modify, merge, publish, distribute, sublicense, 901e04c3fSmrg * and/or sell copies of the Software, and to permit persons to whom the 1001e04c3fSmrg * Software is furnished to do so, subject to the following conditions: 1101e04c3fSmrg * 1201e04c3fSmrg * The above copyright notice and this permission notice (including the next 1301e04c3fSmrg * paragraph) shall be included in all copies or substantial portions of the 1401e04c3fSmrg * Software. 1501e04c3fSmrg * 1601e04c3fSmrg * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR 1701e04c3fSmrg * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, 1801e04c3fSmrg * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL 1901e04c3fSmrg * THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER 2001e04c3fSmrg * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING 2101e04c3fSmrg * FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS 2201e04c3fSmrg * IN THE SOFTWARE. 2301e04c3fSmrg */ 2401e04c3fSmrg#include "nir/nir_builder.h" 257ec681f3Smrg#include "radv_meta.h" 2601e04c3fSmrg 2701e04c3fSmrg/* 2801e04c3fSmrg * GFX queue: Compute shader implementation of image->buffer copy 2901e04c3fSmrg * Compute queue: implementation also of buffer->image, image->image, and image clear. 3001e04c3fSmrg */ 3101e04c3fSmrg 3201e04c3fSmrg/* GFX9 needs to use a 3D sampler to access 3D resources, so the shader has the options 3301e04c3fSmrg * for that. 3401e04c3fSmrg */ 3501e04c3fSmrgstatic nir_shader * 3601e04c3fSmrgbuild_nir_itob_compute_shader(struct radv_device *dev, bool is_3d) 3701e04c3fSmrg{ 387ec681f3Smrg enum glsl_sampler_dim dim = is_3d ? GLSL_SAMPLER_DIM_3D : GLSL_SAMPLER_DIM_2D; 397ec681f3Smrg const struct glsl_type *sampler_type = glsl_sampler_type(dim, false, false, GLSL_TYPE_FLOAT); 407ec681f3Smrg const struct glsl_type *img_type = glsl_image_type(GLSL_SAMPLER_DIM_BUF, false, GLSL_TYPE_FLOAT); 417ec681f3Smrg nir_builder b = nir_builder_init_simple_shader(MESA_SHADER_COMPUTE, NULL, 427ec681f3Smrg is_3d ? "meta_itob_cs_3d" : "meta_itob_cs"); 437ec681f3Smrg b.shader->info.workgroup_size[0] = 8; 447ec681f3Smrg b.shader->info.workgroup_size[1] = 8; 457ec681f3Smrg b.shader->info.workgroup_size[2] = 1; 467ec681f3Smrg nir_variable *input_img = nir_variable_create(b.shader, nir_var_uniform, sampler_type, "s_tex"); 477ec681f3Smrg input_img->data.descriptor_set = 0; 487ec681f3Smrg input_img->data.binding = 0; 497ec681f3Smrg 507ec681f3Smrg nir_variable *output_img = nir_variable_create(b.shader, nir_var_uniform, img_type, "out_img"); 517ec681f3Smrg output_img->data.descriptor_set = 0; 527ec681f3Smrg output_img->data.binding = 1; 537ec681f3Smrg 547ec681f3Smrg nir_ssa_def *global_id = get_global_ids(&b, is_3d ? 3 : 2); 557ec681f3Smrg 567ec681f3Smrg nir_ssa_def *offset = 577ec681f3Smrg nir_load_push_constant(&b, is_3d ? 3 : 2, 32, nir_imm_int(&b, 0), .range = 16); 587ec681f3Smrg nir_ssa_def *stride = nir_load_push_constant(&b, 1, 32, nir_imm_int(&b, 12), .range = 16); 597ec681f3Smrg 607ec681f3Smrg nir_ssa_def *img_coord = nir_iadd(&b, global_id, offset); 617ec681f3Smrg nir_ssa_def *input_img_deref = &nir_build_deref_var(&b, input_img)->dest.ssa; 627ec681f3Smrg 637ec681f3Smrg nir_tex_instr *tex = nir_tex_instr_create(b.shader, 3); 647ec681f3Smrg tex->sampler_dim = dim; 657ec681f3Smrg tex->op = nir_texop_txf; 667ec681f3Smrg tex->src[0].src_type = nir_tex_src_coord; 677ec681f3Smrg tex->src[0].src = nir_src_for_ssa(nir_channels(&b, img_coord, is_3d ? 0x7 : 0x3)); 687ec681f3Smrg tex->src[1].src_type = nir_tex_src_lod; 697ec681f3Smrg tex->src[1].src = nir_src_for_ssa(nir_imm_int(&b, 0)); 707ec681f3Smrg tex->src[2].src_type = nir_tex_src_texture_deref; 717ec681f3Smrg tex->src[2].src = nir_src_for_ssa(input_img_deref); 727ec681f3Smrg tex->dest_type = nir_type_float32; 737ec681f3Smrg tex->is_array = false; 747ec681f3Smrg tex->coord_components = is_3d ? 3 : 2; 757ec681f3Smrg 767ec681f3Smrg nir_ssa_dest_init(&tex->instr, &tex->dest, 4, 32, "tex"); 777ec681f3Smrg nir_builder_instr_insert(&b, &tex->instr); 787ec681f3Smrg 797ec681f3Smrg nir_ssa_def *pos_x = nir_channel(&b, global_id, 0); 807ec681f3Smrg nir_ssa_def *pos_y = nir_channel(&b, global_id, 1); 817ec681f3Smrg 827ec681f3Smrg nir_ssa_def *tmp = nir_imul(&b, pos_y, stride); 837ec681f3Smrg tmp = nir_iadd(&b, tmp, pos_x); 847ec681f3Smrg 857ec681f3Smrg nir_ssa_def *coord = nir_vec4(&b, tmp, tmp, tmp, tmp); 867ec681f3Smrg 877ec681f3Smrg nir_ssa_def *outval = &tex->dest.ssa; 887ec681f3Smrg nir_image_deref_store(&b, &nir_build_deref_var(&b, output_img)->dest.ssa, coord, 897ec681f3Smrg nir_ssa_undef(&b, 1, 32), outval, nir_imm_int(&b, 0), 907ec681f3Smrg .image_dim = GLSL_SAMPLER_DIM_BUF); 917ec681f3Smrg 927ec681f3Smrg return b.shader; 9301e04c3fSmrg} 9401e04c3fSmrg 9501e04c3fSmrg/* Image to buffer - don't write use image accessors */ 9601e04c3fSmrgstatic VkResult 9701e04c3fSmrgradv_device_init_meta_itob_state(struct radv_device *device) 9801e04c3fSmrg{ 997ec681f3Smrg VkResult result; 1007ec681f3Smrg nir_shader *cs = build_nir_itob_compute_shader(device, false); 1017ec681f3Smrg nir_shader *cs_3d = NULL; 1027ec681f3Smrg 1037ec681f3Smrg if (device->physical_device->rad_info.chip_class >= GFX9) 1047ec681f3Smrg cs_3d = build_nir_itob_compute_shader(device, true); 1057ec681f3Smrg 1067ec681f3Smrg /* 1077ec681f3Smrg * two descriptors one for the image being sampled 1087ec681f3Smrg * one for the buffer being written. 1097ec681f3Smrg */ 1107ec681f3Smrg VkDescriptorSetLayoutCreateInfo ds_create_info = { 1117ec681f3Smrg .sType = VK_STRUCTURE_TYPE_DESCRIPTOR_SET_LAYOUT_CREATE_INFO, 1127ec681f3Smrg .flags = VK_DESCRIPTOR_SET_LAYOUT_CREATE_PUSH_DESCRIPTOR_BIT_KHR, 1137ec681f3Smrg .bindingCount = 2, 1147ec681f3Smrg .pBindings = (VkDescriptorSetLayoutBinding[]){ 1157ec681f3Smrg {.binding = 0, 1167ec681f3Smrg .descriptorType = VK_DESCRIPTOR_TYPE_SAMPLED_IMAGE, 1177ec681f3Smrg .descriptorCount = 1, 1187ec681f3Smrg .stageFlags = VK_SHADER_STAGE_COMPUTE_BIT, 1197ec681f3Smrg .pImmutableSamplers = NULL}, 1207ec681f3Smrg {.binding = 1, 1217ec681f3Smrg .descriptorType = VK_DESCRIPTOR_TYPE_STORAGE_TEXEL_BUFFER, 1227ec681f3Smrg .descriptorCount = 1, 1237ec681f3Smrg .stageFlags = VK_SHADER_STAGE_COMPUTE_BIT, 1247ec681f3Smrg .pImmutableSamplers = NULL}, 1257ec681f3Smrg }}; 1267ec681f3Smrg 1277ec681f3Smrg result = radv_CreateDescriptorSetLayout(radv_device_to_handle(device), &ds_create_info, 1287ec681f3Smrg &device->meta_state.alloc, 1297ec681f3Smrg &device->meta_state.itob.img_ds_layout); 1307ec681f3Smrg if (result != VK_SUCCESS) 1317ec681f3Smrg goto fail; 1327ec681f3Smrg 1337ec681f3Smrg VkPipelineLayoutCreateInfo pl_create_info = { 1347ec681f3Smrg .sType = VK_STRUCTURE_TYPE_PIPELINE_LAYOUT_CREATE_INFO, 1357ec681f3Smrg .setLayoutCount = 1, 1367ec681f3Smrg .pSetLayouts = &device->meta_state.itob.img_ds_layout, 1377ec681f3Smrg .pushConstantRangeCount = 1, 1387ec681f3Smrg .pPushConstantRanges = &(VkPushConstantRange){VK_SHADER_STAGE_COMPUTE_BIT, 0, 16}, 1397ec681f3Smrg }; 1407ec681f3Smrg 1417ec681f3Smrg result = 1427ec681f3Smrg radv_CreatePipelineLayout(radv_device_to_handle(device), &pl_create_info, 1437ec681f3Smrg &device->meta_state.alloc, &device->meta_state.itob.img_p_layout); 1447ec681f3Smrg if (result != VK_SUCCESS) 1457ec681f3Smrg goto fail; 1467ec681f3Smrg 1477ec681f3Smrg /* compute shader */ 1487ec681f3Smrg 1497ec681f3Smrg VkPipelineShaderStageCreateInfo pipeline_shader_stage = { 1507ec681f3Smrg .sType = VK_STRUCTURE_TYPE_PIPELINE_SHADER_STAGE_CREATE_INFO, 1517ec681f3Smrg .stage = VK_SHADER_STAGE_COMPUTE_BIT, 1527ec681f3Smrg .module = vk_shader_module_handle_from_nir(cs), 1537ec681f3Smrg .pName = "main", 1547ec681f3Smrg .pSpecializationInfo = NULL, 1557ec681f3Smrg }; 1567ec681f3Smrg 1577ec681f3Smrg VkComputePipelineCreateInfo vk_pipeline_info = { 1587ec681f3Smrg .sType = VK_STRUCTURE_TYPE_COMPUTE_PIPELINE_CREATE_INFO, 1597ec681f3Smrg .stage = pipeline_shader_stage, 1607ec681f3Smrg .flags = 0, 1617ec681f3Smrg .layout = device->meta_state.itob.img_p_layout, 1627ec681f3Smrg }; 1637ec681f3Smrg 1647ec681f3Smrg result = radv_CreateComputePipelines(radv_device_to_handle(device), 1657ec681f3Smrg radv_pipeline_cache_to_handle(&device->meta_state.cache), 1, 1667ec681f3Smrg &vk_pipeline_info, NULL, &device->meta_state.itob.pipeline); 1677ec681f3Smrg if (result != VK_SUCCESS) 1687ec681f3Smrg goto fail; 1697ec681f3Smrg 1707ec681f3Smrg if (device->physical_device->rad_info.chip_class >= GFX9) { 1717ec681f3Smrg VkPipelineShaderStageCreateInfo pipeline_shader_stage_3d = { 1727ec681f3Smrg .sType = VK_STRUCTURE_TYPE_PIPELINE_SHADER_STAGE_CREATE_INFO, 1737ec681f3Smrg .stage = VK_SHADER_STAGE_COMPUTE_BIT, 1747ec681f3Smrg .module = vk_shader_module_handle_from_nir(cs_3d), 1757ec681f3Smrg .pName = "main", 1767ec681f3Smrg .pSpecializationInfo = NULL, 1777ec681f3Smrg }; 1787ec681f3Smrg 1797ec681f3Smrg VkComputePipelineCreateInfo vk_pipeline_info_3d = { 1807ec681f3Smrg .sType = VK_STRUCTURE_TYPE_COMPUTE_PIPELINE_CREATE_INFO, 1817ec681f3Smrg .stage = pipeline_shader_stage_3d, 1827ec681f3Smrg .flags = 0, 1837ec681f3Smrg .layout = device->meta_state.itob.img_p_layout, 1847ec681f3Smrg }; 1857ec681f3Smrg 1867ec681f3Smrg result = radv_CreateComputePipelines( 1877ec681f3Smrg radv_device_to_handle(device), radv_pipeline_cache_to_handle(&device->meta_state.cache), 1, 1887ec681f3Smrg &vk_pipeline_info_3d, NULL, &device->meta_state.itob.pipeline_3d); 1897ec681f3Smrg if (result != VK_SUCCESS) 1907ec681f3Smrg goto fail; 1917ec681f3Smrg ralloc_free(cs_3d); 1927ec681f3Smrg } 1937ec681f3Smrg ralloc_free(cs); 1947ec681f3Smrg 1957ec681f3Smrg return VK_SUCCESS; 19601e04c3fSmrgfail: 1977ec681f3Smrg ralloc_free(cs); 1987ec681f3Smrg ralloc_free(cs_3d); 1997ec681f3Smrg return result; 20001e04c3fSmrg} 20101e04c3fSmrg 20201e04c3fSmrgstatic void 20301e04c3fSmrgradv_device_finish_meta_itob_state(struct radv_device *device) 20401e04c3fSmrg{ 2057ec681f3Smrg struct radv_meta_state *state = &device->meta_state; 2067ec681f3Smrg 2077ec681f3Smrg radv_DestroyPipelineLayout(radv_device_to_handle(device), state->itob.img_p_layout, 2087ec681f3Smrg &state->alloc); 2097ec681f3Smrg radv_DestroyDescriptorSetLayout(radv_device_to_handle(device), state->itob.img_ds_layout, 2107ec681f3Smrg &state->alloc); 2117ec681f3Smrg radv_DestroyPipeline(radv_device_to_handle(device), state->itob.pipeline, &state->alloc); 2127ec681f3Smrg if (device->physical_device->rad_info.chip_class >= GFX9) 2137ec681f3Smrg radv_DestroyPipeline(radv_device_to_handle(device), state->itob.pipeline_3d, &state->alloc); 21401e04c3fSmrg} 21501e04c3fSmrg 21601e04c3fSmrgstatic nir_shader * 21701e04c3fSmrgbuild_nir_btoi_compute_shader(struct radv_device *dev, bool is_3d) 21801e04c3fSmrg{ 2197ec681f3Smrg enum glsl_sampler_dim dim = is_3d ? GLSL_SAMPLER_DIM_3D : GLSL_SAMPLER_DIM_2D; 2207ec681f3Smrg const struct glsl_type *buf_type = 2217ec681f3Smrg glsl_sampler_type(GLSL_SAMPLER_DIM_BUF, false, false, GLSL_TYPE_FLOAT); 2227ec681f3Smrg const struct glsl_type *img_type = glsl_image_type(dim, false, GLSL_TYPE_FLOAT); 2237ec681f3Smrg nir_builder b = nir_builder_init_simple_shader(MESA_SHADER_COMPUTE, NULL, 2247ec681f3Smrg is_3d ? "meta_btoi_cs_3d" : "meta_btoi_cs"); 2257ec681f3Smrg b.shader->info.workgroup_size[0] = 8; 2267ec681f3Smrg b.shader->info.workgroup_size[1] = 8; 2277ec681f3Smrg b.shader->info.workgroup_size[2] = 1; 2287ec681f3Smrg nir_variable *input_img = nir_variable_create(b.shader, nir_var_uniform, buf_type, "s_tex"); 2297ec681f3Smrg input_img->data.descriptor_set = 0; 2307ec681f3Smrg input_img->data.binding = 0; 2317ec681f3Smrg 2327ec681f3Smrg nir_variable *output_img = nir_variable_create(b.shader, nir_var_uniform, img_type, "out_img"); 2337ec681f3Smrg output_img->data.descriptor_set = 0; 2347ec681f3Smrg output_img->data.binding = 1; 2357ec681f3Smrg 2367ec681f3Smrg nir_ssa_def *global_id = get_global_ids(&b, is_3d ? 3 : 2); 2377ec681f3Smrg 2387ec681f3Smrg nir_ssa_def *offset = 2397ec681f3Smrg nir_load_push_constant(&b, is_3d ? 3 : 2, 32, nir_imm_int(&b, 0), .range = 16); 2407ec681f3Smrg nir_ssa_def *stride = nir_load_push_constant(&b, 1, 32, nir_imm_int(&b, 12), .range = 16); 2417ec681f3Smrg 2427ec681f3Smrg nir_ssa_def *pos_x = nir_channel(&b, global_id, 0); 2437ec681f3Smrg nir_ssa_def *pos_y = nir_channel(&b, global_id, 1); 2447ec681f3Smrg 2457ec681f3Smrg nir_ssa_def *buf_coord = nir_imul(&b, pos_y, stride); 2467ec681f3Smrg buf_coord = nir_iadd(&b, buf_coord, pos_x); 2477ec681f3Smrg 2487ec681f3Smrg nir_ssa_def *coord = nir_iadd(&b, global_id, offset); 2497ec681f3Smrg nir_ssa_def *input_img_deref = &nir_build_deref_var(&b, input_img)->dest.ssa; 2507ec681f3Smrg 2517ec681f3Smrg nir_tex_instr *tex = nir_tex_instr_create(b.shader, 3); 2527ec681f3Smrg tex->sampler_dim = GLSL_SAMPLER_DIM_BUF; 2537ec681f3Smrg tex->op = nir_texop_txf; 2547ec681f3Smrg tex->src[0].src_type = nir_tex_src_coord; 2557ec681f3Smrg tex->src[0].src = nir_src_for_ssa(buf_coord); 2567ec681f3Smrg tex->src[1].src_type = nir_tex_src_lod; 2577ec681f3Smrg tex->src[1].src = nir_src_for_ssa(nir_imm_int(&b, 0)); 2587ec681f3Smrg tex->src[2].src_type = nir_tex_src_texture_deref; 2597ec681f3Smrg tex->src[2].src = nir_src_for_ssa(input_img_deref); 2607ec681f3Smrg tex->dest_type = nir_type_float32; 2617ec681f3Smrg tex->is_array = false; 2627ec681f3Smrg tex->coord_components = 1; 2637ec681f3Smrg 2647ec681f3Smrg nir_ssa_dest_init(&tex->instr, &tex->dest, 4, 32, "tex"); 2657ec681f3Smrg nir_builder_instr_insert(&b, &tex->instr); 2667ec681f3Smrg 2677ec681f3Smrg nir_ssa_def *outval = &tex->dest.ssa; 2687ec681f3Smrg 2697ec681f3Smrg nir_ssa_def *img_coord = nir_vec4(&b, nir_channel(&b, coord, 0), 2707ec681f3Smrg nir_channel(&b, coord, 1), 2717ec681f3Smrg is_3d ? nir_channel(&b, coord, 2) : nir_ssa_undef(&b, 1, 32), 2727ec681f3Smrg nir_ssa_undef(&b, 1, 32)); 2737ec681f3Smrg 2747ec681f3Smrg nir_image_deref_store(&b, &nir_build_deref_var(&b, output_img)->dest.ssa, img_coord, 2757ec681f3Smrg nir_ssa_undef(&b, 1, 32), outval, nir_imm_int(&b, 0), .image_dim = dim); 2767ec681f3Smrg 2777ec681f3Smrg return b.shader; 27801e04c3fSmrg} 27901e04c3fSmrg 28001e04c3fSmrg/* Buffer to image - don't write use image accessors */ 28101e04c3fSmrgstatic VkResult 28201e04c3fSmrgradv_device_init_meta_btoi_state(struct radv_device *device) 28301e04c3fSmrg{ 2847ec681f3Smrg VkResult result; 2857ec681f3Smrg nir_shader *cs = build_nir_btoi_compute_shader(device, false); 2867ec681f3Smrg nir_shader *cs_3d = NULL; 2877ec681f3Smrg if (device->physical_device->rad_info.chip_class >= GFX9) 2887ec681f3Smrg cs_3d = build_nir_btoi_compute_shader(device, true); 2897ec681f3Smrg /* 2907ec681f3Smrg * two descriptors one for the image being sampled 2917ec681f3Smrg * one for the buffer being written. 2927ec681f3Smrg */ 2937ec681f3Smrg VkDescriptorSetLayoutCreateInfo ds_create_info = { 2947ec681f3Smrg .sType = VK_STRUCTURE_TYPE_DESCRIPTOR_SET_LAYOUT_CREATE_INFO, 2957ec681f3Smrg .flags = VK_DESCRIPTOR_SET_LAYOUT_CREATE_PUSH_DESCRIPTOR_BIT_KHR, 2967ec681f3Smrg .bindingCount = 2, 2977ec681f3Smrg .pBindings = (VkDescriptorSetLayoutBinding[]){ 2987ec681f3Smrg {.binding = 0, 2997ec681f3Smrg .descriptorType = VK_DESCRIPTOR_TYPE_STORAGE_TEXEL_BUFFER, 3007ec681f3Smrg .descriptorCount = 1, 3017ec681f3Smrg .stageFlags = VK_SHADER_STAGE_COMPUTE_BIT, 3027ec681f3Smrg .pImmutableSamplers = NULL}, 3037ec681f3Smrg {.binding = 1, 3047ec681f3Smrg .descriptorType = VK_DESCRIPTOR_TYPE_STORAGE_IMAGE, 3057ec681f3Smrg .descriptorCount = 1, 3067ec681f3Smrg .stageFlags = VK_SHADER_STAGE_COMPUTE_BIT, 3077ec681f3Smrg .pImmutableSamplers = NULL}, 3087ec681f3Smrg }}; 3097ec681f3Smrg 3107ec681f3Smrg result = radv_CreateDescriptorSetLayout(radv_device_to_handle(device), &ds_create_info, 3117ec681f3Smrg &device->meta_state.alloc, 3127ec681f3Smrg &device->meta_state.btoi.img_ds_layout); 3137ec681f3Smrg if (result != VK_SUCCESS) 3147ec681f3Smrg goto fail; 3157ec681f3Smrg 3167ec681f3Smrg VkPipelineLayoutCreateInfo pl_create_info = { 3177ec681f3Smrg .sType = VK_STRUCTURE_TYPE_PIPELINE_LAYOUT_CREATE_INFO, 3187ec681f3Smrg .setLayoutCount = 1, 3197ec681f3Smrg .pSetLayouts = &device->meta_state.btoi.img_ds_layout, 3207ec681f3Smrg .pushConstantRangeCount = 1, 3217ec681f3Smrg .pPushConstantRanges = &(VkPushConstantRange){VK_SHADER_STAGE_COMPUTE_BIT, 0, 16}, 3227ec681f3Smrg }; 3237ec681f3Smrg 3247ec681f3Smrg result = 3257ec681f3Smrg radv_CreatePipelineLayout(radv_device_to_handle(device), &pl_create_info, 3267ec681f3Smrg &device->meta_state.alloc, &device->meta_state.btoi.img_p_layout); 3277ec681f3Smrg if (result != VK_SUCCESS) 3287ec681f3Smrg goto fail; 3297ec681f3Smrg 3307ec681f3Smrg /* compute shader */ 3317ec681f3Smrg 3327ec681f3Smrg VkPipelineShaderStageCreateInfo pipeline_shader_stage = { 3337ec681f3Smrg .sType = VK_STRUCTURE_TYPE_PIPELINE_SHADER_STAGE_CREATE_INFO, 3347ec681f3Smrg .stage = VK_SHADER_STAGE_COMPUTE_BIT, 3357ec681f3Smrg .module = vk_shader_module_handle_from_nir(cs), 3367ec681f3Smrg .pName = "main", 3377ec681f3Smrg .pSpecializationInfo = NULL, 3387ec681f3Smrg }; 3397ec681f3Smrg 3407ec681f3Smrg VkComputePipelineCreateInfo vk_pipeline_info = { 3417ec681f3Smrg .sType = VK_STRUCTURE_TYPE_COMPUTE_PIPELINE_CREATE_INFO, 3427ec681f3Smrg .stage = pipeline_shader_stage, 3437ec681f3Smrg .flags = 0, 3447ec681f3Smrg .layout = device->meta_state.btoi.img_p_layout, 3457ec681f3Smrg }; 3467ec681f3Smrg 3477ec681f3Smrg result = radv_CreateComputePipelines(radv_device_to_handle(device), 3487ec681f3Smrg radv_pipeline_cache_to_handle(&device->meta_state.cache), 1, 3497ec681f3Smrg &vk_pipeline_info, NULL, &device->meta_state.btoi.pipeline); 3507ec681f3Smrg if (result != VK_SUCCESS) 3517ec681f3Smrg goto fail; 3527ec681f3Smrg 3537ec681f3Smrg if (device->physical_device->rad_info.chip_class >= GFX9) { 3547ec681f3Smrg VkPipelineShaderStageCreateInfo pipeline_shader_stage_3d = { 3557ec681f3Smrg .sType = VK_STRUCTURE_TYPE_PIPELINE_SHADER_STAGE_CREATE_INFO, 3567ec681f3Smrg .stage = VK_SHADER_STAGE_COMPUTE_BIT, 3577ec681f3Smrg .module = vk_shader_module_handle_from_nir(cs_3d), 3587ec681f3Smrg .pName = "main", 3597ec681f3Smrg .pSpecializationInfo = NULL, 3607ec681f3Smrg }; 3617ec681f3Smrg 3627ec681f3Smrg VkComputePipelineCreateInfo vk_pipeline_info_3d = { 3637ec681f3Smrg .sType = VK_STRUCTURE_TYPE_COMPUTE_PIPELINE_CREATE_INFO, 3647ec681f3Smrg .stage = pipeline_shader_stage_3d, 3657ec681f3Smrg .flags = 0, 3667ec681f3Smrg .layout = device->meta_state.btoi.img_p_layout, 3677ec681f3Smrg }; 3687ec681f3Smrg 3697ec681f3Smrg result = radv_CreateComputePipelines( 3707ec681f3Smrg radv_device_to_handle(device), radv_pipeline_cache_to_handle(&device->meta_state.cache), 1, 3717ec681f3Smrg &vk_pipeline_info_3d, NULL, &device->meta_state.btoi.pipeline_3d); 3727ec681f3Smrg ralloc_free(cs_3d); 3737ec681f3Smrg } 3747ec681f3Smrg ralloc_free(cs); 3757ec681f3Smrg 3767ec681f3Smrg return VK_SUCCESS; 37701e04c3fSmrgfail: 3787ec681f3Smrg ralloc_free(cs_3d); 3797ec681f3Smrg ralloc_free(cs); 3807ec681f3Smrg return result; 38101e04c3fSmrg} 38201e04c3fSmrg 38301e04c3fSmrgstatic void 38401e04c3fSmrgradv_device_finish_meta_btoi_state(struct radv_device *device) 38501e04c3fSmrg{ 3867ec681f3Smrg struct radv_meta_state *state = &device->meta_state; 3877ec681f3Smrg 3887ec681f3Smrg radv_DestroyPipelineLayout(radv_device_to_handle(device), state->btoi.img_p_layout, 3897ec681f3Smrg &state->alloc); 3907ec681f3Smrg radv_DestroyDescriptorSetLayout(radv_device_to_handle(device), state->btoi.img_ds_layout, 3917ec681f3Smrg &state->alloc); 3927ec681f3Smrg radv_DestroyPipeline(radv_device_to_handle(device), state->btoi.pipeline, &state->alloc); 3937ec681f3Smrg radv_DestroyPipeline(radv_device_to_handle(device), state->btoi.pipeline_3d, &state->alloc); 39401e04c3fSmrg} 39501e04c3fSmrg 39601e04c3fSmrg/* Buffer to image - special path for R32G32B32 */ 39701e04c3fSmrgstatic nir_shader * 39801e04c3fSmrgbuild_nir_btoi_r32g32b32_compute_shader(struct radv_device *dev) 39901e04c3fSmrg{ 4007ec681f3Smrg const struct glsl_type *buf_type = 4017ec681f3Smrg glsl_sampler_type(GLSL_SAMPLER_DIM_BUF, false, false, GLSL_TYPE_FLOAT); 4027ec681f3Smrg const struct glsl_type *img_type = glsl_image_type(GLSL_SAMPLER_DIM_BUF, false, GLSL_TYPE_FLOAT); 4037ec681f3Smrg nir_builder b = 4047ec681f3Smrg nir_builder_init_simple_shader(MESA_SHADER_COMPUTE, NULL, "meta_btoi_r32g32b32_cs"); 4057ec681f3Smrg b.shader->info.workgroup_size[0] = 8; 4067ec681f3Smrg b.shader->info.workgroup_size[1] = 8; 4077ec681f3Smrg b.shader->info.workgroup_size[2] = 1; 4087ec681f3Smrg nir_variable *input_img = nir_variable_create(b.shader, nir_var_uniform, buf_type, "s_tex"); 4097ec681f3Smrg input_img->data.descriptor_set = 0; 4107ec681f3Smrg input_img->data.binding = 0; 4117ec681f3Smrg 4127ec681f3Smrg nir_variable *output_img = nir_variable_create(b.shader, nir_var_uniform, img_type, "out_img"); 4137ec681f3Smrg output_img->data.descriptor_set = 0; 4147ec681f3Smrg output_img->data.binding = 1; 4157ec681f3Smrg 4167ec681f3Smrg nir_ssa_def *global_id = get_global_ids(&b, 2); 4177ec681f3Smrg 4187ec681f3Smrg nir_ssa_def *offset = nir_load_push_constant(&b, 2, 32, nir_imm_int(&b, 0), .range = 16); 4197ec681f3Smrg nir_ssa_def *pitch = nir_load_push_constant(&b, 1, 32, nir_imm_int(&b, 8), .range = 16); 4207ec681f3Smrg nir_ssa_def *stride = nir_load_push_constant(&b, 1, 32, nir_imm_int(&b, 12), .range = 16); 4217ec681f3Smrg 4227ec681f3Smrg nir_ssa_def *pos_x = nir_channel(&b, global_id, 0); 4237ec681f3Smrg nir_ssa_def *pos_y = nir_channel(&b, global_id, 1); 4247ec681f3Smrg 4257ec681f3Smrg nir_ssa_def *buf_coord = nir_imul(&b, pos_y, stride); 4267ec681f3Smrg buf_coord = nir_iadd(&b, buf_coord, pos_x); 4277ec681f3Smrg 4287ec681f3Smrg nir_ssa_def *img_coord = nir_iadd(&b, global_id, offset); 4297ec681f3Smrg 4307ec681f3Smrg nir_ssa_def *global_pos = 4317ec681f3Smrg nir_iadd(&b, nir_imul(&b, nir_channel(&b, img_coord, 1), pitch), 4327ec681f3Smrg nir_imul(&b, nir_channel(&b, img_coord, 0), nir_imm_int(&b, 3))); 4337ec681f3Smrg 4347ec681f3Smrg nir_ssa_def *input_img_deref = &nir_build_deref_var(&b, input_img)->dest.ssa; 4357ec681f3Smrg 4367ec681f3Smrg nir_tex_instr *tex = nir_tex_instr_create(b.shader, 3); 4377ec681f3Smrg tex->sampler_dim = GLSL_SAMPLER_DIM_BUF; 4387ec681f3Smrg tex->op = nir_texop_txf; 4397ec681f3Smrg tex->src[0].src_type = nir_tex_src_coord; 4407ec681f3Smrg tex->src[0].src = nir_src_for_ssa(buf_coord); 4417ec681f3Smrg tex->src[1].src_type = nir_tex_src_lod; 4427ec681f3Smrg tex->src[1].src = nir_src_for_ssa(nir_imm_int(&b, 0)); 4437ec681f3Smrg tex->src[2].src_type = nir_tex_src_texture_deref; 4447ec681f3Smrg tex->src[2].src = nir_src_for_ssa(input_img_deref); 4457ec681f3Smrg tex->dest_type = nir_type_float32; 4467ec681f3Smrg tex->is_array = false; 4477ec681f3Smrg tex->coord_components = 1; 4487ec681f3Smrg nir_ssa_dest_init(&tex->instr, &tex->dest, 4, 32, "tex"); 4497ec681f3Smrg nir_builder_instr_insert(&b, &tex->instr); 4507ec681f3Smrg 4517ec681f3Smrg nir_ssa_def *outval = &tex->dest.ssa; 4527ec681f3Smrg 4537ec681f3Smrg for (int chan = 0; chan < 3; chan++) { 4547ec681f3Smrg nir_ssa_def *local_pos = nir_iadd(&b, global_pos, nir_imm_int(&b, chan)); 4557ec681f3Smrg 4567ec681f3Smrg nir_ssa_def *coord = nir_vec4(&b, local_pos, local_pos, local_pos, local_pos); 4577ec681f3Smrg 4587ec681f3Smrg nir_image_deref_store(&b, &nir_build_deref_var(&b, output_img)->dest.ssa, coord, 4597ec681f3Smrg nir_ssa_undef(&b, 1, 32), nir_channel(&b, outval, chan), 4607ec681f3Smrg nir_imm_int(&b, 0), .image_dim = GLSL_SAMPLER_DIM_BUF); 4617ec681f3Smrg } 4627ec681f3Smrg 4637ec681f3Smrg return b.shader; 46401e04c3fSmrg} 46501e04c3fSmrg 46601e04c3fSmrgstatic VkResult 46701e04c3fSmrgradv_device_init_meta_btoi_r32g32b32_state(struct radv_device *device) 46801e04c3fSmrg{ 4697ec681f3Smrg VkResult result; 4707ec681f3Smrg nir_shader *cs = build_nir_btoi_r32g32b32_compute_shader(device); 4717ec681f3Smrg 4727ec681f3Smrg VkDescriptorSetLayoutCreateInfo ds_create_info = { 4737ec681f3Smrg .sType = VK_STRUCTURE_TYPE_DESCRIPTOR_SET_LAYOUT_CREATE_INFO, 4747ec681f3Smrg .flags = VK_DESCRIPTOR_SET_LAYOUT_CREATE_PUSH_DESCRIPTOR_BIT_KHR, 4757ec681f3Smrg .bindingCount = 2, 4767ec681f3Smrg .pBindings = (VkDescriptorSetLayoutBinding[]){ 4777ec681f3Smrg {.binding = 0, 4787ec681f3Smrg .descriptorType = VK_DESCRIPTOR_TYPE_UNIFORM_TEXEL_BUFFER, 4797ec681f3Smrg .descriptorCount = 1, 4807ec681f3Smrg .stageFlags = VK_SHADER_STAGE_COMPUTE_BIT, 4817ec681f3Smrg .pImmutableSamplers = NULL}, 4827ec681f3Smrg {.binding = 1, 4837ec681f3Smrg .descriptorType = VK_DESCRIPTOR_TYPE_STORAGE_TEXEL_BUFFER, 4847ec681f3Smrg .descriptorCount = 1, 4857ec681f3Smrg .stageFlags = VK_SHADER_STAGE_COMPUTE_BIT, 4867ec681f3Smrg .pImmutableSamplers = NULL}, 4877ec681f3Smrg }}; 4887ec681f3Smrg 4897ec681f3Smrg result = radv_CreateDescriptorSetLayout(radv_device_to_handle(device), &ds_create_info, 4907ec681f3Smrg &device->meta_state.alloc, 4917ec681f3Smrg &device->meta_state.btoi_r32g32b32.img_ds_layout); 4927ec681f3Smrg if (result != VK_SUCCESS) 4937ec681f3Smrg goto fail; 4947ec681f3Smrg 4957ec681f3Smrg VkPipelineLayoutCreateInfo pl_create_info = { 4967ec681f3Smrg .sType = VK_STRUCTURE_TYPE_PIPELINE_LAYOUT_CREATE_INFO, 4977ec681f3Smrg .setLayoutCount = 1, 4987ec681f3Smrg .pSetLayouts = &device->meta_state.btoi_r32g32b32.img_ds_layout, 4997ec681f3Smrg .pushConstantRangeCount = 1, 5007ec681f3Smrg .pPushConstantRanges = &(VkPushConstantRange){VK_SHADER_STAGE_COMPUTE_BIT, 0, 16}, 5017ec681f3Smrg }; 5027ec681f3Smrg 5037ec681f3Smrg result = radv_CreatePipelineLayout(radv_device_to_handle(device), &pl_create_info, 5047ec681f3Smrg &device->meta_state.alloc, 5057ec681f3Smrg &device->meta_state.btoi_r32g32b32.img_p_layout); 5067ec681f3Smrg if (result != VK_SUCCESS) 5077ec681f3Smrg goto fail; 5087ec681f3Smrg 5097ec681f3Smrg /* compute shader */ 5107ec681f3Smrg 5117ec681f3Smrg VkPipelineShaderStageCreateInfo pipeline_shader_stage = { 5127ec681f3Smrg .sType = VK_STRUCTURE_TYPE_PIPELINE_SHADER_STAGE_CREATE_INFO, 5137ec681f3Smrg .stage = VK_SHADER_STAGE_COMPUTE_BIT, 5147ec681f3Smrg .module = vk_shader_module_handle_from_nir(cs), 5157ec681f3Smrg .pName = "main", 5167ec681f3Smrg .pSpecializationInfo = NULL, 5177ec681f3Smrg }; 5187ec681f3Smrg 5197ec681f3Smrg VkComputePipelineCreateInfo vk_pipeline_info = { 5207ec681f3Smrg .sType = VK_STRUCTURE_TYPE_COMPUTE_PIPELINE_CREATE_INFO, 5217ec681f3Smrg .stage = pipeline_shader_stage, 5227ec681f3Smrg .flags = 0, 5237ec681f3Smrg .layout = device->meta_state.btoi_r32g32b32.img_p_layout, 5247ec681f3Smrg }; 5257ec681f3Smrg 5267ec681f3Smrg result = radv_CreateComputePipelines( 5277ec681f3Smrg radv_device_to_handle(device), radv_pipeline_cache_to_handle(&device->meta_state.cache), 1, 5287ec681f3Smrg &vk_pipeline_info, NULL, &device->meta_state.btoi_r32g32b32.pipeline); 52901e04c3fSmrg 53001e04c3fSmrgfail: 5317ec681f3Smrg ralloc_free(cs); 5327ec681f3Smrg return result; 53301e04c3fSmrg} 53401e04c3fSmrg 53501e04c3fSmrgstatic void 53601e04c3fSmrgradv_device_finish_meta_btoi_r32g32b32_state(struct radv_device *device) 53701e04c3fSmrg{ 5387ec681f3Smrg struct radv_meta_state *state = &device->meta_state; 5397ec681f3Smrg 5407ec681f3Smrg radv_DestroyPipelineLayout(radv_device_to_handle(device), state->btoi_r32g32b32.img_p_layout, 5417ec681f3Smrg &state->alloc); 5427ec681f3Smrg radv_DestroyDescriptorSetLayout(radv_device_to_handle(device), 5437ec681f3Smrg state->btoi_r32g32b32.img_ds_layout, &state->alloc); 5447ec681f3Smrg radv_DestroyPipeline(radv_device_to_handle(device), state->btoi_r32g32b32.pipeline, 5457ec681f3Smrg &state->alloc); 54601e04c3fSmrg} 54701e04c3fSmrg 54801e04c3fSmrgstatic nir_shader * 5497ec681f3Smrgbuild_nir_itoi_compute_shader(struct radv_device *dev, bool is_3d, int samples) 5507ec681f3Smrg{ 5517ec681f3Smrg bool is_multisampled = samples > 1; 5527ec681f3Smrg enum glsl_sampler_dim dim = is_3d ? GLSL_SAMPLER_DIM_3D 5537ec681f3Smrg : is_multisampled ? GLSL_SAMPLER_DIM_MS 5547ec681f3Smrg : GLSL_SAMPLER_DIM_2D; 5557ec681f3Smrg const struct glsl_type *buf_type = glsl_sampler_type(dim, false, false, GLSL_TYPE_FLOAT); 5567ec681f3Smrg const struct glsl_type *img_type = glsl_image_type(dim, false, GLSL_TYPE_FLOAT); 5577ec681f3Smrg nir_builder b = nir_builder_init_simple_shader( 5587ec681f3Smrg MESA_SHADER_COMPUTE, NULL, is_3d ? "meta_itoi_cs_3d-%d" : "meta_itoi_cs-%d", samples); 5597ec681f3Smrg b.shader->info.workgroup_size[0] = 8; 5607ec681f3Smrg b.shader->info.workgroup_size[1] = 8; 5617ec681f3Smrg b.shader->info.workgroup_size[2] = 1; 5627ec681f3Smrg nir_variable *input_img = nir_variable_create(b.shader, nir_var_uniform, buf_type, "s_tex"); 5637ec681f3Smrg input_img->data.descriptor_set = 0; 5647ec681f3Smrg input_img->data.binding = 0; 5657ec681f3Smrg 5667ec681f3Smrg nir_variable *output_img = nir_variable_create(b.shader, nir_var_uniform, img_type, "out_img"); 5677ec681f3Smrg output_img->data.descriptor_set = 0; 5687ec681f3Smrg output_img->data.binding = 1; 5697ec681f3Smrg 5707ec681f3Smrg nir_ssa_def *global_id = get_global_ids(&b, is_3d ? 3 : 2); 5717ec681f3Smrg 5727ec681f3Smrg nir_ssa_def *src_offset = 5737ec681f3Smrg nir_load_push_constant(&b, is_3d ? 3 : 2, 32, nir_imm_int(&b, 0), .range = 24); 5747ec681f3Smrg nir_ssa_def *dst_offset = 5757ec681f3Smrg nir_load_push_constant(&b, is_3d ? 3 : 2, 32, nir_imm_int(&b, 12), .range = 24); 5767ec681f3Smrg 5777ec681f3Smrg nir_ssa_def *src_coord = nir_iadd(&b, global_id, src_offset); 5787ec681f3Smrg nir_ssa_def *input_img_deref = &nir_build_deref_var(&b, input_img)->dest.ssa; 5797ec681f3Smrg 5807ec681f3Smrg nir_ssa_def *dst_coord = nir_iadd(&b, global_id, dst_offset); 5817ec681f3Smrg 5827ec681f3Smrg nir_tex_instr *tex_instr[8]; 5837ec681f3Smrg for (uint32_t i = 0; i < samples; i++) { 5847ec681f3Smrg tex_instr[i] = nir_tex_instr_create(b.shader, is_multisampled ? 4 : 3); 5857ec681f3Smrg 5867ec681f3Smrg nir_tex_instr *tex = tex_instr[i]; 5877ec681f3Smrg tex->sampler_dim = dim; 5887ec681f3Smrg tex->op = is_multisampled ? nir_texop_txf_ms : nir_texop_txf; 5897ec681f3Smrg tex->src[0].src_type = nir_tex_src_coord; 5907ec681f3Smrg tex->src[0].src = nir_src_for_ssa(nir_channels(&b, src_coord, is_3d ? 0x7 : 0x3)); 5917ec681f3Smrg tex->src[1].src_type = nir_tex_src_lod; 5927ec681f3Smrg tex->src[1].src = nir_src_for_ssa(nir_imm_int(&b, 0)); 5937ec681f3Smrg tex->src[2].src_type = nir_tex_src_texture_deref; 5947ec681f3Smrg tex->src[2].src = nir_src_for_ssa(input_img_deref); 5957ec681f3Smrg if (is_multisampled) { 5967ec681f3Smrg tex->src[3].src_type = nir_tex_src_ms_index; 5977ec681f3Smrg tex->src[3].src = nir_src_for_ssa(nir_imm_int(&b, i)); 5987ec681f3Smrg } 5997ec681f3Smrg tex->dest_type = nir_type_float32; 6007ec681f3Smrg tex->is_array = false; 6017ec681f3Smrg tex->coord_components = is_3d ? 3 : 2; 6027ec681f3Smrg 6037ec681f3Smrg nir_ssa_dest_init(&tex->instr, &tex->dest, 4, 32, "tex"); 6047ec681f3Smrg nir_builder_instr_insert(&b, &tex->instr); 6057ec681f3Smrg } 6067ec681f3Smrg 6077ec681f3Smrg nir_ssa_def *img_coord = nir_vec4(&b, nir_channel(&b, dst_coord, 0), 6087ec681f3Smrg nir_channel(&b, dst_coord, 1), 6097ec681f3Smrg is_3d ? nir_channel(&b, dst_coord, 2) : nir_ssa_undef(&b, 1, 32), 6107ec681f3Smrg nir_ssa_undef(&b, 1, 32)); 6117ec681f3Smrg 6127ec681f3Smrg for (uint32_t i = 0; i < samples; i++) { 6137ec681f3Smrg nir_ssa_def *outval = &tex_instr[i]->dest.ssa; 6147ec681f3Smrg nir_image_deref_store(&b, &nir_build_deref_var(&b, output_img)->dest.ssa, img_coord, 6157ec681f3Smrg nir_imm_int(&b, i), outval, nir_imm_int(&b, 0), .image_dim = dim); 6167ec681f3Smrg } 6177ec681f3Smrg 6187ec681f3Smrg return b.shader; 6197ec681f3Smrg} 6207ec681f3Smrg 6217ec681f3Smrgstatic VkResult 6227ec681f3Smrgcreate_itoi_pipeline(struct radv_device *device, int samples, VkPipeline *pipeline) 62301e04c3fSmrg{ 6247ec681f3Smrg struct radv_meta_state *state = &device->meta_state; 6257ec681f3Smrg nir_shader *cs = build_nir_itoi_compute_shader(device, false, samples); 6267ec681f3Smrg VkResult result; 6277ec681f3Smrg 6287ec681f3Smrg VkPipelineShaderStageCreateInfo pipeline_shader_stage = { 6297ec681f3Smrg .sType = VK_STRUCTURE_TYPE_PIPELINE_SHADER_STAGE_CREATE_INFO, 6307ec681f3Smrg .stage = VK_SHADER_STAGE_COMPUTE_BIT, 6317ec681f3Smrg .module = vk_shader_module_handle_from_nir(cs), 6327ec681f3Smrg .pName = "main", 6337ec681f3Smrg .pSpecializationInfo = NULL, 6347ec681f3Smrg }; 6357ec681f3Smrg 6367ec681f3Smrg VkComputePipelineCreateInfo vk_pipeline_info = { 6377ec681f3Smrg .sType = VK_STRUCTURE_TYPE_COMPUTE_PIPELINE_CREATE_INFO, 6387ec681f3Smrg .stage = pipeline_shader_stage, 6397ec681f3Smrg .flags = 0, 6407ec681f3Smrg .layout = state->itoi.img_p_layout, 6417ec681f3Smrg }; 6427ec681f3Smrg 6437ec681f3Smrg result = radv_CreateComputePipelines(radv_device_to_handle(device), 6447ec681f3Smrg radv_pipeline_cache_to_handle(&state->cache), 1, 6457ec681f3Smrg &vk_pipeline_info, NULL, pipeline); 6467ec681f3Smrg ralloc_free(cs); 6477ec681f3Smrg return result; 64801e04c3fSmrg} 64901e04c3fSmrg 65001e04c3fSmrg/* image to image - don't write use image accessors */ 65101e04c3fSmrgstatic VkResult 65201e04c3fSmrgradv_device_init_meta_itoi_state(struct radv_device *device) 65301e04c3fSmrg{ 6547ec681f3Smrg VkResult result; 6557ec681f3Smrg 6567ec681f3Smrg /* 6577ec681f3Smrg * two descriptors one for the image being sampled 6587ec681f3Smrg * one for the buffer being written. 6597ec681f3Smrg */ 6607ec681f3Smrg VkDescriptorSetLayoutCreateInfo ds_create_info = { 6617ec681f3Smrg .sType = VK_STRUCTURE_TYPE_DESCRIPTOR_SET_LAYOUT_CREATE_INFO, 6627ec681f3Smrg .flags = VK_DESCRIPTOR_SET_LAYOUT_CREATE_PUSH_DESCRIPTOR_BIT_KHR, 6637ec681f3Smrg .bindingCount = 2, 6647ec681f3Smrg .pBindings = (VkDescriptorSetLayoutBinding[]){ 6657ec681f3Smrg {.binding = 0, 6667ec681f3Smrg .descriptorType = VK_DESCRIPTOR_TYPE_SAMPLED_IMAGE, 6677ec681f3Smrg .descriptorCount = 1, 6687ec681f3Smrg .stageFlags = VK_SHADER_STAGE_COMPUTE_BIT, 6697ec681f3Smrg .pImmutableSamplers = NULL}, 6707ec681f3Smrg {.binding = 1, 6717ec681f3Smrg .descriptorType = VK_DESCRIPTOR_TYPE_STORAGE_IMAGE, 6727ec681f3Smrg .descriptorCount = 1, 6737ec681f3Smrg .stageFlags = VK_SHADER_STAGE_COMPUTE_BIT, 6747ec681f3Smrg .pImmutableSamplers = NULL}, 6757ec681f3Smrg }}; 6767ec681f3Smrg 6777ec681f3Smrg result = radv_CreateDescriptorSetLayout(radv_device_to_handle(device), &ds_create_info, 6787ec681f3Smrg &device->meta_state.alloc, 6797ec681f3Smrg &device->meta_state.itoi.img_ds_layout); 6807ec681f3Smrg if (result != VK_SUCCESS) 6817ec681f3Smrg goto fail; 6827ec681f3Smrg 6837ec681f3Smrg VkPipelineLayoutCreateInfo pl_create_info = { 6847ec681f3Smrg .sType = VK_STRUCTURE_TYPE_PIPELINE_LAYOUT_CREATE_INFO, 6857ec681f3Smrg .setLayoutCount = 1, 6867ec681f3Smrg .pSetLayouts = &device->meta_state.itoi.img_ds_layout, 6877ec681f3Smrg .pushConstantRangeCount = 1, 6887ec681f3Smrg .pPushConstantRanges = &(VkPushConstantRange){VK_SHADER_STAGE_COMPUTE_BIT, 0, 24}, 6897ec681f3Smrg }; 6907ec681f3Smrg 6917ec681f3Smrg result = 6927ec681f3Smrg radv_CreatePipelineLayout(radv_device_to_handle(device), &pl_create_info, 6937ec681f3Smrg &device->meta_state.alloc, &device->meta_state.itoi.img_p_layout); 6947ec681f3Smrg if (result != VK_SUCCESS) 6957ec681f3Smrg goto fail; 6967ec681f3Smrg 6977ec681f3Smrg for (uint32_t i = 0; i < MAX_SAMPLES_LOG2; i++) { 6987ec681f3Smrg uint32_t samples = 1 << i; 6997ec681f3Smrg result = create_itoi_pipeline(device, samples, &device->meta_state.itoi.pipeline[i]); 7007ec681f3Smrg if (result != VK_SUCCESS) 7017ec681f3Smrg goto fail; 7027ec681f3Smrg } 7037ec681f3Smrg 7047ec681f3Smrg if (device->physical_device->rad_info.chip_class >= GFX9) { 7057ec681f3Smrg nir_shader *cs_3d = build_nir_itoi_compute_shader(device, true, 1); 7067ec681f3Smrg 7077ec681f3Smrg VkPipelineShaderStageCreateInfo pipeline_shader_stage_3d = { 7087ec681f3Smrg .sType = VK_STRUCTURE_TYPE_PIPELINE_SHADER_STAGE_CREATE_INFO, 7097ec681f3Smrg .stage = VK_SHADER_STAGE_COMPUTE_BIT, 7107ec681f3Smrg .module = vk_shader_module_handle_from_nir(cs_3d), 7117ec681f3Smrg .pName = "main", 7127ec681f3Smrg .pSpecializationInfo = NULL, 7137ec681f3Smrg }; 7147ec681f3Smrg 7157ec681f3Smrg VkComputePipelineCreateInfo vk_pipeline_info_3d = { 7167ec681f3Smrg .sType = VK_STRUCTURE_TYPE_COMPUTE_PIPELINE_CREATE_INFO, 7177ec681f3Smrg .stage = pipeline_shader_stage_3d, 7187ec681f3Smrg .flags = 0, 7197ec681f3Smrg .layout = device->meta_state.itoi.img_p_layout, 7207ec681f3Smrg }; 7217ec681f3Smrg 7227ec681f3Smrg result = radv_CreateComputePipelines( 7237ec681f3Smrg radv_device_to_handle(device), radv_pipeline_cache_to_handle(&device->meta_state.cache), 1, 7247ec681f3Smrg &vk_pipeline_info_3d, NULL, &device->meta_state.itoi.pipeline_3d); 7257ec681f3Smrg ralloc_free(cs_3d); 7267ec681f3Smrg } 7277ec681f3Smrg 7287ec681f3Smrg return VK_SUCCESS; 72901e04c3fSmrgfail: 7307ec681f3Smrg return result; 73101e04c3fSmrg} 73201e04c3fSmrg 73301e04c3fSmrgstatic void 73401e04c3fSmrgradv_device_finish_meta_itoi_state(struct radv_device *device) 73501e04c3fSmrg{ 7367ec681f3Smrg struct radv_meta_state *state = &device->meta_state; 7377ec681f3Smrg 7387ec681f3Smrg radv_DestroyPipelineLayout(radv_device_to_handle(device), state->itoi.img_p_layout, 7397ec681f3Smrg &state->alloc); 7407ec681f3Smrg radv_DestroyDescriptorSetLayout(radv_device_to_handle(device), state->itoi.img_ds_layout, 7417ec681f3Smrg &state->alloc); 7427ec681f3Smrg 7437ec681f3Smrg for (uint32_t i = 0; i < MAX_SAMPLES_LOG2; ++i) { 7447ec681f3Smrg radv_DestroyPipeline(radv_device_to_handle(device), state->itoi.pipeline[i], &state->alloc); 7457ec681f3Smrg } 7467ec681f3Smrg 7477ec681f3Smrg if (device->physical_device->rad_info.chip_class >= GFX9) 7487ec681f3Smrg radv_DestroyPipeline(radv_device_to_handle(device), state->itoi.pipeline_3d, &state->alloc); 74901e04c3fSmrg} 75001e04c3fSmrg 75101e04c3fSmrgstatic nir_shader * 75201e04c3fSmrgbuild_nir_itoi_r32g32b32_compute_shader(struct radv_device *dev) 75301e04c3fSmrg{ 7547ec681f3Smrg const struct glsl_type *type = 7557ec681f3Smrg glsl_sampler_type(GLSL_SAMPLER_DIM_BUF, false, false, GLSL_TYPE_FLOAT); 7567ec681f3Smrg const struct glsl_type *img_type = glsl_image_type(GLSL_SAMPLER_DIM_BUF, false, GLSL_TYPE_FLOAT); 7577ec681f3Smrg nir_builder b = 7587ec681f3Smrg nir_builder_init_simple_shader(MESA_SHADER_COMPUTE, NULL, "meta_itoi_r32g32b32_cs"); 7597ec681f3Smrg b.shader->info.workgroup_size[0] = 8; 7607ec681f3Smrg b.shader->info.workgroup_size[1] = 8; 7617ec681f3Smrg b.shader->info.workgroup_size[2] = 1; 7627ec681f3Smrg nir_variable *input_img = nir_variable_create(b.shader, nir_var_uniform, type, "input_img"); 7637ec681f3Smrg input_img->data.descriptor_set = 0; 7647ec681f3Smrg input_img->data.binding = 0; 7657ec681f3Smrg 7667ec681f3Smrg nir_variable *output_img = 7677ec681f3Smrg nir_variable_create(b.shader, nir_var_uniform, img_type, "output_img"); 7687ec681f3Smrg output_img->data.descriptor_set = 0; 7697ec681f3Smrg output_img->data.binding = 1; 7707ec681f3Smrg 7717ec681f3Smrg nir_ssa_def *global_id = get_global_ids(&b, 2); 7727ec681f3Smrg 7737ec681f3Smrg nir_ssa_def *src_offset = nir_load_push_constant(&b, 3, 32, nir_imm_int(&b, 0), .range = 24); 7747ec681f3Smrg nir_ssa_def *dst_offset = nir_load_push_constant(&b, 3, 32, nir_imm_int(&b, 12), .range = 24); 7757ec681f3Smrg 7767ec681f3Smrg nir_ssa_def *src_stride = nir_channel(&b, src_offset, 2); 7777ec681f3Smrg nir_ssa_def *dst_stride = nir_channel(&b, dst_offset, 2); 7787ec681f3Smrg 7797ec681f3Smrg nir_ssa_def *src_img_coord = nir_iadd(&b, global_id, src_offset); 7807ec681f3Smrg nir_ssa_def *dst_img_coord = nir_iadd(&b, global_id, dst_offset); 7817ec681f3Smrg 7827ec681f3Smrg nir_ssa_def *src_global_pos = 7837ec681f3Smrg nir_iadd(&b, nir_imul(&b, nir_channel(&b, src_img_coord, 1), src_stride), 7847ec681f3Smrg nir_imul(&b, nir_channel(&b, src_img_coord, 0), nir_imm_int(&b, 3))); 7857ec681f3Smrg 7867ec681f3Smrg nir_ssa_def *dst_global_pos = 7877ec681f3Smrg nir_iadd(&b, nir_imul(&b, nir_channel(&b, dst_img_coord, 1), dst_stride), 7887ec681f3Smrg nir_imul(&b, nir_channel(&b, dst_img_coord, 0), nir_imm_int(&b, 3))); 7897ec681f3Smrg 7907ec681f3Smrg for (int chan = 0; chan < 3; chan++) { 7917ec681f3Smrg /* src */ 7927ec681f3Smrg nir_ssa_def *src_local_pos = nir_iadd(&b, src_global_pos, nir_imm_int(&b, chan)); 7937ec681f3Smrg nir_ssa_def *input_img_deref = &nir_build_deref_var(&b, input_img)->dest.ssa; 7947ec681f3Smrg 7957ec681f3Smrg nir_tex_instr *tex = nir_tex_instr_create(b.shader, 3); 7967ec681f3Smrg tex->sampler_dim = GLSL_SAMPLER_DIM_BUF; 7977ec681f3Smrg tex->op = nir_texop_txf; 7987ec681f3Smrg tex->src[0].src_type = nir_tex_src_coord; 7997ec681f3Smrg tex->src[0].src = nir_src_for_ssa(src_local_pos); 8007ec681f3Smrg tex->src[1].src_type = nir_tex_src_lod; 8017ec681f3Smrg tex->src[1].src = nir_src_for_ssa(nir_imm_int(&b, 0)); 8027ec681f3Smrg tex->src[2].src_type = nir_tex_src_texture_deref; 8037ec681f3Smrg tex->src[2].src = nir_src_for_ssa(input_img_deref); 8047ec681f3Smrg tex->dest_type = nir_type_float32; 8057ec681f3Smrg tex->is_array = false; 8067ec681f3Smrg tex->coord_components = 1; 8077ec681f3Smrg nir_ssa_dest_init(&tex->instr, &tex->dest, 4, 32, "tex"); 8087ec681f3Smrg nir_builder_instr_insert(&b, &tex->instr); 8097ec681f3Smrg 8107ec681f3Smrg nir_ssa_def *outval = &tex->dest.ssa; 8117ec681f3Smrg 8127ec681f3Smrg /* dst */ 8137ec681f3Smrg nir_ssa_def *dst_local_pos = nir_iadd(&b, dst_global_pos, nir_imm_int(&b, chan)); 8147ec681f3Smrg 8157ec681f3Smrg nir_ssa_def *dst_coord = 8167ec681f3Smrg nir_vec4(&b, dst_local_pos, dst_local_pos, dst_local_pos, dst_local_pos); 8177ec681f3Smrg 8187ec681f3Smrg nir_image_deref_store(&b, &nir_build_deref_var(&b, output_img)->dest.ssa, dst_coord, 8197ec681f3Smrg nir_ssa_undef(&b, 1, 32), nir_channel(&b, outval, 0), 8207ec681f3Smrg nir_imm_int(&b, 0), .image_dim = GLSL_SAMPLER_DIM_BUF); 8217ec681f3Smrg } 8227ec681f3Smrg 8237ec681f3Smrg return b.shader; 82401e04c3fSmrg} 82501e04c3fSmrg 82601e04c3fSmrg/* Image to image - special path for R32G32B32 */ 82701e04c3fSmrgstatic VkResult 82801e04c3fSmrgradv_device_init_meta_itoi_r32g32b32_state(struct radv_device *device) 82901e04c3fSmrg{ 8307ec681f3Smrg VkResult result; 8317ec681f3Smrg nir_shader *cs = build_nir_itoi_r32g32b32_compute_shader(device); 8327ec681f3Smrg 8337ec681f3Smrg VkDescriptorSetLayoutCreateInfo ds_create_info = { 8347ec681f3Smrg .sType = VK_STRUCTURE_TYPE_DESCRIPTOR_SET_LAYOUT_CREATE_INFO, 8357ec681f3Smrg .flags = VK_DESCRIPTOR_SET_LAYOUT_CREATE_PUSH_DESCRIPTOR_BIT_KHR, 8367ec681f3Smrg .bindingCount = 2, 8377ec681f3Smrg .pBindings = (VkDescriptorSetLayoutBinding[]){ 8387ec681f3Smrg {.binding = 0, 8397ec681f3Smrg .descriptorType = VK_DESCRIPTOR_TYPE_UNIFORM_TEXEL_BUFFER, 8407ec681f3Smrg .descriptorCount = 1, 8417ec681f3Smrg .stageFlags = VK_SHADER_STAGE_COMPUTE_BIT, 8427ec681f3Smrg .pImmutableSamplers = NULL}, 8437ec681f3Smrg {.binding = 1, 8447ec681f3Smrg .descriptorType = VK_DESCRIPTOR_TYPE_STORAGE_TEXEL_BUFFER, 8457ec681f3Smrg .descriptorCount = 1, 8467ec681f3Smrg .stageFlags = VK_SHADER_STAGE_COMPUTE_BIT, 8477ec681f3Smrg .pImmutableSamplers = NULL}, 8487ec681f3Smrg }}; 8497ec681f3Smrg 8507ec681f3Smrg result = radv_CreateDescriptorSetLayout(radv_device_to_handle(device), &ds_create_info, 8517ec681f3Smrg &device->meta_state.alloc, 8527ec681f3Smrg &device->meta_state.itoi_r32g32b32.img_ds_layout); 8537ec681f3Smrg if (result != VK_SUCCESS) 8547ec681f3Smrg goto fail; 8557ec681f3Smrg 8567ec681f3Smrg VkPipelineLayoutCreateInfo pl_create_info = { 8577ec681f3Smrg .sType = VK_STRUCTURE_TYPE_PIPELINE_LAYOUT_CREATE_INFO, 8587ec681f3Smrg .setLayoutCount = 1, 8597ec681f3Smrg .pSetLayouts = &device->meta_state.itoi_r32g32b32.img_ds_layout, 8607ec681f3Smrg .pushConstantRangeCount = 1, 8617ec681f3Smrg .pPushConstantRanges = &(VkPushConstantRange){VK_SHADER_STAGE_COMPUTE_BIT, 0, 24}, 8627ec681f3Smrg }; 8637ec681f3Smrg 8647ec681f3Smrg result = radv_CreatePipelineLayout(radv_device_to_handle(device), &pl_create_info, 8657ec681f3Smrg &device->meta_state.alloc, 8667ec681f3Smrg &device->meta_state.itoi_r32g32b32.img_p_layout); 8677ec681f3Smrg if (result != VK_SUCCESS) 8687ec681f3Smrg goto fail; 8697ec681f3Smrg 8707ec681f3Smrg /* compute shader */ 8717ec681f3Smrg 8727ec681f3Smrg VkPipelineShaderStageCreateInfo pipeline_shader_stage = { 8737ec681f3Smrg .sType = VK_STRUCTURE_TYPE_PIPELINE_SHADER_STAGE_CREATE_INFO, 8747ec681f3Smrg .stage = VK_SHADER_STAGE_COMPUTE_BIT, 8757ec681f3Smrg .module = vk_shader_module_handle_from_nir(cs), 8767ec681f3Smrg .pName = "main", 8777ec681f3Smrg .pSpecializationInfo = NULL, 8787ec681f3Smrg }; 8797ec681f3Smrg 8807ec681f3Smrg VkComputePipelineCreateInfo vk_pipeline_info = { 8817ec681f3Smrg .sType = VK_STRUCTURE_TYPE_COMPUTE_PIPELINE_CREATE_INFO, 8827ec681f3Smrg .stage = pipeline_shader_stage, 8837ec681f3Smrg .flags = 0, 8847ec681f3Smrg .layout = device->meta_state.itoi_r32g32b32.img_p_layout, 8857ec681f3Smrg }; 8867ec681f3Smrg 8877ec681f3Smrg result = radv_CreateComputePipelines( 8887ec681f3Smrg radv_device_to_handle(device), radv_pipeline_cache_to_handle(&device->meta_state.cache), 1, 8897ec681f3Smrg &vk_pipeline_info, NULL, &device->meta_state.itoi_r32g32b32.pipeline); 89001e04c3fSmrg 89101e04c3fSmrgfail: 8927ec681f3Smrg ralloc_free(cs); 8937ec681f3Smrg return result; 89401e04c3fSmrg} 89501e04c3fSmrg 89601e04c3fSmrgstatic void 89701e04c3fSmrgradv_device_finish_meta_itoi_r32g32b32_state(struct radv_device *device) 89801e04c3fSmrg{ 8997ec681f3Smrg struct radv_meta_state *state = &device->meta_state; 9007ec681f3Smrg 9017ec681f3Smrg radv_DestroyPipelineLayout(radv_device_to_handle(device), state->itoi_r32g32b32.img_p_layout, 9027ec681f3Smrg &state->alloc); 9037ec681f3Smrg radv_DestroyDescriptorSetLayout(radv_device_to_handle(device), 9047ec681f3Smrg state->itoi_r32g32b32.img_ds_layout, &state->alloc); 9057ec681f3Smrg radv_DestroyPipeline(radv_device_to_handle(device), state->itoi_r32g32b32.pipeline, 9067ec681f3Smrg &state->alloc); 90701e04c3fSmrg} 90801e04c3fSmrg 90901e04c3fSmrgstatic nir_shader * 9107ec681f3Smrgbuild_nir_cleari_compute_shader(struct radv_device *dev, bool is_3d, int samples) 9117ec681f3Smrg{ 9127ec681f3Smrg bool is_multisampled = samples > 1; 9137ec681f3Smrg enum glsl_sampler_dim dim = is_3d ? GLSL_SAMPLER_DIM_3D 9147ec681f3Smrg : is_multisampled ? GLSL_SAMPLER_DIM_MS 9157ec681f3Smrg : GLSL_SAMPLER_DIM_2D; 9167ec681f3Smrg const struct glsl_type *img_type = glsl_image_type(dim, false, GLSL_TYPE_FLOAT); 9177ec681f3Smrg nir_builder b = nir_builder_init_simple_shader( 9187ec681f3Smrg MESA_SHADER_COMPUTE, NULL, is_3d ? "meta_cleari_cs_3d-%d" : "meta_cleari_cs-%d", samples); 9197ec681f3Smrg b.shader->info.workgroup_size[0] = 8; 9207ec681f3Smrg b.shader->info.workgroup_size[1] = 8; 9217ec681f3Smrg b.shader->info.workgroup_size[2] = 1; 9227ec681f3Smrg 9237ec681f3Smrg nir_variable *output_img = nir_variable_create(b.shader, nir_var_uniform, img_type, "out_img"); 9247ec681f3Smrg output_img->data.descriptor_set = 0; 9257ec681f3Smrg output_img->data.binding = 0; 9267ec681f3Smrg 9277ec681f3Smrg nir_ssa_def *global_id = get_global_ids(&b, 2); 9287ec681f3Smrg 9297ec681f3Smrg nir_ssa_def *clear_val = nir_load_push_constant(&b, 4, 32, nir_imm_int(&b, 0), .range = 20); 9307ec681f3Smrg nir_ssa_def *layer = nir_load_push_constant(&b, 1, 32, nir_imm_int(&b, 16), .range = 20); 9317ec681f3Smrg 9327ec681f3Smrg nir_ssa_def *comps[4]; 9337ec681f3Smrg comps[0] = nir_channel(&b, global_id, 0); 9347ec681f3Smrg comps[1] = nir_channel(&b, global_id, 1); 9357ec681f3Smrg comps[2] = layer; 9367ec681f3Smrg comps[3] = nir_ssa_undef(&b, 1, 32); 9377ec681f3Smrg global_id = nir_vec(&b, comps, 4); 9387ec681f3Smrg 9397ec681f3Smrg for (uint32_t i = 0; i < samples; i++) { 9407ec681f3Smrg nir_image_deref_store(&b, &nir_build_deref_var(&b, output_img)->dest.ssa, global_id, 9417ec681f3Smrg nir_imm_int(&b, i), clear_val, nir_imm_int(&b, 0), .image_dim = dim); 9427ec681f3Smrg } 9437ec681f3Smrg 9447ec681f3Smrg return b.shader; 9457ec681f3Smrg} 9467ec681f3Smrg 9477ec681f3Smrgstatic VkResult 9487ec681f3Smrgcreate_cleari_pipeline(struct radv_device *device, int samples, VkPipeline *pipeline) 94901e04c3fSmrg{ 9507ec681f3Smrg nir_shader *cs = build_nir_cleari_compute_shader(device, false, samples); 9517ec681f3Smrg VkResult result; 9527ec681f3Smrg 9537ec681f3Smrg VkPipelineShaderStageCreateInfo pipeline_shader_stage = { 9547ec681f3Smrg .sType = VK_STRUCTURE_TYPE_PIPELINE_SHADER_STAGE_CREATE_INFO, 9557ec681f3Smrg .stage = VK_SHADER_STAGE_COMPUTE_BIT, 9567ec681f3Smrg .module = vk_shader_module_handle_from_nir(cs), 9577ec681f3Smrg .pName = "main", 9587ec681f3Smrg .pSpecializationInfo = NULL, 9597ec681f3Smrg }; 9607ec681f3Smrg 9617ec681f3Smrg VkComputePipelineCreateInfo vk_pipeline_info = { 9627ec681f3Smrg .sType = VK_STRUCTURE_TYPE_COMPUTE_PIPELINE_CREATE_INFO, 9637ec681f3Smrg .stage = pipeline_shader_stage, 9647ec681f3Smrg .flags = 0, 9657ec681f3Smrg .layout = device->meta_state.cleari.img_p_layout, 9667ec681f3Smrg }; 9677ec681f3Smrg 9687ec681f3Smrg result = radv_CreateComputePipelines(radv_device_to_handle(device), 9697ec681f3Smrg radv_pipeline_cache_to_handle(&device->meta_state.cache), 1, 9707ec681f3Smrg &vk_pipeline_info, NULL, pipeline); 9717ec681f3Smrg ralloc_free(cs); 9727ec681f3Smrg return result; 97301e04c3fSmrg} 97401e04c3fSmrg 97501e04c3fSmrgstatic VkResult 97601e04c3fSmrgradv_device_init_meta_cleari_state(struct radv_device *device) 97701e04c3fSmrg{ 9787ec681f3Smrg VkResult result; 9797ec681f3Smrg 9807ec681f3Smrg /* 9817ec681f3Smrg * two descriptors one for the image being sampled 9827ec681f3Smrg * one for the buffer being written. 9837ec681f3Smrg */ 9847ec681f3Smrg VkDescriptorSetLayoutCreateInfo ds_create_info = { 9857ec681f3Smrg .sType = VK_STRUCTURE_TYPE_DESCRIPTOR_SET_LAYOUT_CREATE_INFO, 9867ec681f3Smrg .flags = VK_DESCRIPTOR_SET_LAYOUT_CREATE_PUSH_DESCRIPTOR_BIT_KHR, 9877ec681f3Smrg .bindingCount = 1, 9887ec681f3Smrg .pBindings = (VkDescriptorSetLayoutBinding[]){ 9897ec681f3Smrg {.binding = 0, 9907ec681f3Smrg .descriptorType = VK_DESCRIPTOR_TYPE_STORAGE_IMAGE, 9917ec681f3Smrg .descriptorCount = 1, 9927ec681f3Smrg .stageFlags = VK_SHADER_STAGE_COMPUTE_BIT, 9937ec681f3Smrg .pImmutableSamplers = NULL}, 9947ec681f3Smrg }}; 9957ec681f3Smrg 9967ec681f3Smrg result = radv_CreateDescriptorSetLayout(radv_device_to_handle(device), &ds_create_info, 9977ec681f3Smrg &device->meta_state.alloc, 9987ec681f3Smrg &device->meta_state.cleari.img_ds_layout); 9997ec681f3Smrg if (result != VK_SUCCESS) 10007ec681f3Smrg goto fail; 10017ec681f3Smrg 10027ec681f3Smrg VkPipelineLayoutCreateInfo pl_create_info = { 10037ec681f3Smrg .sType = VK_STRUCTURE_TYPE_PIPELINE_LAYOUT_CREATE_INFO, 10047ec681f3Smrg .setLayoutCount = 1, 10057ec681f3Smrg .pSetLayouts = &device->meta_state.cleari.img_ds_layout, 10067ec681f3Smrg .pushConstantRangeCount = 1, 10077ec681f3Smrg .pPushConstantRanges = &(VkPushConstantRange){VK_SHADER_STAGE_COMPUTE_BIT, 0, 20}, 10087ec681f3Smrg }; 10097ec681f3Smrg 10107ec681f3Smrg result = 10117ec681f3Smrg radv_CreatePipelineLayout(radv_device_to_handle(device), &pl_create_info, 10127ec681f3Smrg &device->meta_state.alloc, &device->meta_state.cleari.img_p_layout); 10137ec681f3Smrg if (result != VK_SUCCESS) 10147ec681f3Smrg goto fail; 10157ec681f3Smrg 10167ec681f3Smrg for (uint32_t i = 0; i < MAX_SAMPLES_LOG2; i++) { 10177ec681f3Smrg uint32_t samples = 1 << i; 10187ec681f3Smrg result = create_cleari_pipeline(device, samples, &device->meta_state.cleari.pipeline[i]); 10197ec681f3Smrg if (result != VK_SUCCESS) 10207ec681f3Smrg goto fail; 10217ec681f3Smrg } 10227ec681f3Smrg 10237ec681f3Smrg if (device->physical_device->rad_info.chip_class >= GFX9) { 10247ec681f3Smrg nir_shader *cs_3d = build_nir_cleari_compute_shader(device, true, 1); 10257ec681f3Smrg 10267ec681f3Smrg /* compute shader */ 10277ec681f3Smrg VkPipelineShaderStageCreateInfo pipeline_shader_stage_3d = { 10287ec681f3Smrg .sType = VK_STRUCTURE_TYPE_PIPELINE_SHADER_STAGE_CREATE_INFO, 10297ec681f3Smrg .stage = VK_SHADER_STAGE_COMPUTE_BIT, 10307ec681f3Smrg .module = vk_shader_module_handle_from_nir(cs_3d), 10317ec681f3Smrg .pName = "main", 10327ec681f3Smrg .pSpecializationInfo = NULL, 10337ec681f3Smrg }; 10347ec681f3Smrg 10357ec681f3Smrg VkComputePipelineCreateInfo vk_pipeline_info_3d = { 10367ec681f3Smrg .sType = VK_STRUCTURE_TYPE_COMPUTE_PIPELINE_CREATE_INFO, 10377ec681f3Smrg .stage = pipeline_shader_stage_3d, 10387ec681f3Smrg .flags = 0, 10397ec681f3Smrg .layout = device->meta_state.cleari.img_p_layout, 10407ec681f3Smrg }; 10417ec681f3Smrg 10427ec681f3Smrg result = radv_CreateComputePipelines( 10437ec681f3Smrg radv_device_to_handle(device), radv_pipeline_cache_to_handle(&device->meta_state.cache), 1, 10447ec681f3Smrg &vk_pipeline_info_3d, NULL, &device->meta_state.cleari.pipeline_3d); 10457ec681f3Smrg ralloc_free(cs_3d); 10467ec681f3Smrg } 10477ec681f3Smrg 10487ec681f3Smrg return VK_SUCCESS; 104901e04c3fSmrgfail: 10507ec681f3Smrg return result; 105101e04c3fSmrg} 105201e04c3fSmrg 105301e04c3fSmrgstatic void 105401e04c3fSmrgradv_device_finish_meta_cleari_state(struct radv_device *device) 105501e04c3fSmrg{ 10567ec681f3Smrg struct radv_meta_state *state = &device->meta_state; 10577ec681f3Smrg 10587ec681f3Smrg radv_DestroyPipelineLayout(radv_device_to_handle(device), state->cleari.img_p_layout, 10597ec681f3Smrg &state->alloc); 10607ec681f3Smrg radv_DestroyDescriptorSetLayout(radv_device_to_handle(device), state->cleari.img_ds_layout, 10617ec681f3Smrg &state->alloc); 10627ec681f3Smrg 10637ec681f3Smrg for (uint32_t i = 0; i < MAX_SAMPLES_LOG2; ++i) { 10647ec681f3Smrg radv_DestroyPipeline(radv_device_to_handle(device), state->cleari.pipeline[i], &state->alloc); 10657ec681f3Smrg } 10667ec681f3Smrg 10677ec681f3Smrg radv_DestroyPipeline(radv_device_to_handle(device), state->cleari.pipeline_3d, &state->alloc); 106801e04c3fSmrg} 106901e04c3fSmrg 107001e04c3fSmrg/* Special path for clearing R32G32B32 images using a compute shader. */ 107101e04c3fSmrgstatic nir_shader * 107201e04c3fSmrgbuild_nir_cleari_r32g32b32_compute_shader(struct radv_device *dev) 107301e04c3fSmrg{ 10747ec681f3Smrg const struct glsl_type *img_type = glsl_image_type(GLSL_SAMPLER_DIM_BUF, false, GLSL_TYPE_FLOAT); 10757ec681f3Smrg nir_builder b = 10767ec681f3Smrg nir_builder_init_simple_shader(MESA_SHADER_COMPUTE, NULL, "meta_cleari_r32g32b32_cs"); 10777ec681f3Smrg b.shader->info.workgroup_size[0] = 8; 10787ec681f3Smrg b.shader->info.workgroup_size[1] = 8; 10797ec681f3Smrg b.shader->info.workgroup_size[2] = 1; 10807ec681f3Smrg 10817ec681f3Smrg nir_variable *output_img = nir_variable_create(b.shader, nir_var_uniform, img_type, "out_img"); 10827ec681f3Smrg output_img->data.descriptor_set = 0; 10837ec681f3Smrg output_img->data.binding = 0; 10847ec681f3Smrg 10857ec681f3Smrg nir_ssa_def *global_id = get_global_ids(&b, 2); 10867ec681f3Smrg 10877ec681f3Smrg nir_ssa_def *clear_val = nir_load_push_constant(&b, 3, 32, nir_imm_int(&b, 0), .range = 16); 10887ec681f3Smrg nir_ssa_def *stride = nir_load_push_constant(&b, 1, 32, nir_imm_int(&b, 12), .range = 16); 10897ec681f3Smrg 10907ec681f3Smrg nir_ssa_def *global_x = nir_channel(&b, global_id, 0); 10917ec681f3Smrg nir_ssa_def *global_y = nir_channel(&b, global_id, 1); 10927ec681f3Smrg 10937ec681f3Smrg nir_ssa_def *global_pos = 10947ec681f3Smrg nir_iadd(&b, nir_imul(&b, global_y, stride), nir_imul(&b, global_x, nir_imm_int(&b, 3))); 10957ec681f3Smrg 10967ec681f3Smrg for (unsigned chan = 0; chan < 3; chan++) { 10977ec681f3Smrg nir_ssa_def *local_pos = nir_iadd(&b, global_pos, nir_imm_int(&b, chan)); 10987ec681f3Smrg 10997ec681f3Smrg nir_ssa_def *coord = nir_vec4(&b, local_pos, local_pos, local_pos, local_pos); 11007ec681f3Smrg 11017ec681f3Smrg nir_image_deref_store(&b, &nir_build_deref_var(&b, output_img)->dest.ssa, coord, 11027ec681f3Smrg nir_ssa_undef(&b, 1, 32), nir_channel(&b, clear_val, chan), 11037ec681f3Smrg nir_imm_int(&b, 0), .image_dim = GLSL_SAMPLER_DIM_BUF); 11047ec681f3Smrg } 11057ec681f3Smrg 11067ec681f3Smrg return b.shader; 110701e04c3fSmrg} 110801e04c3fSmrg 110901e04c3fSmrgstatic VkResult 111001e04c3fSmrgradv_device_init_meta_cleari_r32g32b32_state(struct radv_device *device) 111101e04c3fSmrg{ 11127ec681f3Smrg VkResult result; 11137ec681f3Smrg nir_shader *cs = build_nir_cleari_r32g32b32_compute_shader(device); 11147ec681f3Smrg 11157ec681f3Smrg VkDescriptorSetLayoutCreateInfo ds_create_info = { 11167ec681f3Smrg .sType = VK_STRUCTURE_TYPE_DESCRIPTOR_SET_LAYOUT_CREATE_INFO, 11177ec681f3Smrg .flags = VK_DESCRIPTOR_SET_LAYOUT_CREATE_PUSH_DESCRIPTOR_BIT_KHR, 11187ec681f3Smrg .bindingCount = 1, 11197ec681f3Smrg .pBindings = (VkDescriptorSetLayoutBinding[]){ 11207ec681f3Smrg {.binding = 0, 11217ec681f3Smrg .descriptorType = VK_DESCRIPTOR_TYPE_STORAGE_TEXEL_BUFFER, 11227ec681f3Smrg .descriptorCount = 1, 11237ec681f3Smrg .stageFlags = VK_SHADER_STAGE_COMPUTE_BIT, 11247ec681f3Smrg .pImmutableSamplers = NULL}, 11257ec681f3Smrg }}; 11267ec681f3Smrg 11277ec681f3Smrg result = radv_CreateDescriptorSetLayout(radv_device_to_handle(device), &ds_create_info, 11287ec681f3Smrg &device->meta_state.alloc, 11297ec681f3Smrg &device->meta_state.cleari_r32g32b32.img_ds_layout); 11307ec681f3Smrg if (result != VK_SUCCESS) 11317ec681f3Smrg goto fail; 11327ec681f3Smrg 11337ec681f3Smrg VkPipelineLayoutCreateInfo pl_create_info = { 11347ec681f3Smrg .sType = VK_STRUCTURE_TYPE_PIPELINE_LAYOUT_CREATE_INFO, 11357ec681f3Smrg .setLayoutCount = 1, 11367ec681f3Smrg .pSetLayouts = &device->meta_state.cleari_r32g32b32.img_ds_layout, 11377ec681f3Smrg .pushConstantRangeCount = 1, 11387ec681f3Smrg .pPushConstantRanges = &(VkPushConstantRange){VK_SHADER_STAGE_COMPUTE_BIT, 0, 16}, 11397ec681f3Smrg }; 11407ec681f3Smrg 11417ec681f3Smrg result = radv_CreatePipelineLayout(radv_device_to_handle(device), &pl_create_info, 11427ec681f3Smrg &device->meta_state.alloc, 11437ec681f3Smrg &device->meta_state.cleari_r32g32b32.img_p_layout); 11447ec681f3Smrg if (result != VK_SUCCESS) 11457ec681f3Smrg goto fail; 11467ec681f3Smrg 11477ec681f3Smrg /* compute shader */ 11487ec681f3Smrg VkPipelineShaderStageCreateInfo pipeline_shader_stage = { 11497ec681f3Smrg .sType = VK_STRUCTURE_TYPE_PIPELINE_SHADER_STAGE_CREATE_INFO, 11507ec681f3Smrg .stage = VK_SHADER_STAGE_COMPUTE_BIT, 11517ec681f3Smrg .module = vk_shader_module_handle_from_nir(cs), 11527ec681f3Smrg .pName = "main", 11537ec681f3Smrg .pSpecializationInfo = NULL, 11547ec681f3Smrg }; 11557ec681f3Smrg 11567ec681f3Smrg VkComputePipelineCreateInfo vk_pipeline_info = { 11577ec681f3Smrg .sType = VK_STRUCTURE_TYPE_COMPUTE_PIPELINE_CREATE_INFO, 11587ec681f3Smrg .stage = pipeline_shader_stage, 11597ec681f3Smrg .flags = 0, 11607ec681f3Smrg .layout = device->meta_state.cleari_r32g32b32.img_p_layout, 11617ec681f3Smrg }; 11627ec681f3Smrg 11637ec681f3Smrg result = radv_CreateComputePipelines( 11647ec681f3Smrg radv_device_to_handle(device), radv_pipeline_cache_to_handle(&device->meta_state.cache), 1, 11657ec681f3Smrg &vk_pipeline_info, NULL, &device->meta_state.cleari_r32g32b32.pipeline); 116601e04c3fSmrg 116701e04c3fSmrgfail: 11687ec681f3Smrg ralloc_free(cs); 11697ec681f3Smrg return result; 117001e04c3fSmrg} 117101e04c3fSmrg 117201e04c3fSmrgstatic void 117301e04c3fSmrgradv_device_finish_meta_cleari_r32g32b32_state(struct radv_device *device) 117401e04c3fSmrg{ 11757ec681f3Smrg struct radv_meta_state *state = &device->meta_state; 11767ec681f3Smrg 11777ec681f3Smrg radv_DestroyPipelineLayout(radv_device_to_handle(device), state->cleari_r32g32b32.img_p_layout, 11787ec681f3Smrg &state->alloc); 11797ec681f3Smrg radv_DestroyDescriptorSetLayout(radv_device_to_handle(device), 11807ec681f3Smrg state->cleari_r32g32b32.img_ds_layout, &state->alloc); 11817ec681f3Smrg radv_DestroyPipeline(radv_device_to_handle(device), state->cleari_r32g32b32.pipeline, 11827ec681f3Smrg &state->alloc); 118301e04c3fSmrg} 118401e04c3fSmrg 118501e04c3fSmrgvoid 118601e04c3fSmrgradv_device_finish_meta_bufimage_state(struct radv_device *device) 118701e04c3fSmrg{ 11887ec681f3Smrg radv_device_finish_meta_itob_state(device); 11897ec681f3Smrg radv_device_finish_meta_btoi_state(device); 11907ec681f3Smrg radv_device_finish_meta_btoi_r32g32b32_state(device); 11917ec681f3Smrg radv_device_finish_meta_itoi_state(device); 11927ec681f3Smrg radv_device_finish_meta_itoi_r32g32b32_state(device); 11937ec681f3Smrg radv_device_finish_meta_cleari_state(device); 11947ec681f3Smrg radv_device_finish_meta_cleari_r32g32b32_state(device); 119501e04c3fSmrg} 119601e04c3fSmrg 119701e04c3fSmrgVkResult 119801e04c3fSmrgradv_device_init_meta_bufimage_state(struct radv_device *device) 119901e04c3fSmrg{ 12007ec681f3Smrg VkResult result; 120101e04c3fSmrg 12027ec681f3Smrg result = radv_device_init_meta_itob_state(device); 12037ec681f3Smrg if (result != VK_SUCCESS) 12047ec681f3Smrg goto fail_itob; 120501e04c3fSmrg 12067ec681f3Smrg result = radv_device_init_meta_btoi_state(device); 12077ec681f3Smrg if (result != VK_SUCCESS) 12087ec681f3Smrg goto fail_btoi; 120901e04c3fSmrg 12107ec681f3Smrg result = radv_device_init_meta_btoi_r32g32b32_state(device); 12117ec681f3Smrg if (result != VK_SUCCESS) 12127ec681f3Smrg goto fail_btoi_r32g32b32; 121301e04c3fSmrg 12147ec681f3Smrg result = radv_device_init_meta_itoi_state(device); 12157ec681f3Smrg if (result != VK_SUCCESS) 12167ec681f3Smrg goto fail_itoi; 121701e04c3fSmrg 12187ec681f3Smrg result = radv_device_init_meta_itoi_r32g32b32_state(device); 12197ec681f3Smrg if (result != VK_SUCCESS) 12207ec681f3Smrg goto fail_itoi_r32g32b32; 122101e04c3fSmrg 12227ec681f3Smrg result = radv_device_init_meta_cleari_state(device); 12237ec681f3Smrg if (result != VK_SUCCESS) 12247ec681f3Smrg goto fail_cleari; 122501e04c3fSmrg 12267ec681f3Smrg result = radv_device_init_meta_cleari_r32g32b32_state(device); 12277ec681f3Smrg if (result != VK_SUCCESS) 12287ec681f3Smrg goto fail_cleari_r32g32b32; 122901e04c3fSmrg 12307ec681f3Smrg return VK_SUCCESS; 123101e04c3fSmrgfail_cleari_r32g32b32: 12327ec681f3Smrg radv_device_finish_meta_cleari_r32g32b32_state(device); 123301e04c3fSmrgfail_cleari: 12347ec681f3Smrg radv_device_finish_meta_cleari_state(device); 123501e04c3fSmrgfail_itoi_r32g32b32: 12367ec681f3Smrg radv_device_finish_meta_itoi_r32g32b32_state(device); 123701e04c3fSmrgfail_itoi: 12387ec681f3Smrg radv_device_finish_meta_itoi_state(device); 123901e04c3fSmrgfail_btoi_r32g32b32: 12407ec681f3Smrg radv_device_finish_meta_btoi_r32g32b32_state(device); 124101e04c3fSmrgfail_btoi: 12427ec681f3Smrg radv_device_finish_meta_btoi_state(device); 124301e04c3fSmrgfail_itob: 12447ec681f3Smrg radv_device_finish_meta_itob_state(device); 12457ec681f3Smrg return result; 124601e04c3fSmrg} 124701e04c3fSmrg 124801e04c3fSmrgstatic void 12497ec681f3Smrgcreate_iview(struct radv_cmd_buffer *cmd_buffer, struct radv_meta_blit2d_surf *surf, 12507ec681f3Smrg struct radv_image_view *iview, VkFormat format, VkImageAspectFlagBits aspects) 125101e04c3fSmrg{ 12527ec681f3Smrg VkImageViewType view_type = cmd_buffer->device->physical_device->rad_info.chip_class < GFX9 12537ec681f3Smrg ? VK_IMAGE_VIEW_TYPE_2D 12547ec681f3Smrg : radv_meta_get_view_type(surf->image); 12557ec681f3Smrg 12567ec681f3Smrg if (format == VK_FORMAT_UNDEFINED) 12577ec681f3Smrg format = surf->format; 12587ec681f3Smrg 12597ec681f3Smrg radv_image_view_init(iview, cmd_buffer->device, 12607ec681f3Smrg &(VkImageViewCreateInfo){ 12617ec681f3Smrg .sType = VK_STRUCTURE_TYPE_IMAGE_VIEW_CREATE_INFO, 12627ec681f3Smrg .image = radv_image_to_handle(surf->image), 12637ec681f3Smrg .viewType = view_type, 12647ec681f3Smrg .format = format, 12657ec681f3Smrg .subresourceRange = {.aspectMask = aspects, 12667ec681f3Smrg .baseMipLevel = surf->level, 12677ec681f3Smrg .levelCount = 1, 12687ec681f3Smrg .baseArrayLayer = surf->layer, 12697ec681f3Smrg .layerCount = 1}, 12707ec681f3Smrg }, 12717ec681f3Smrg &(struct radv_image_view_extra_create_info){ 12727ec681f3Smrg .disable_compression = surf->disable_compression, 12737ec681f3Smrg }); 127401e04c3fSmrg} 127501e04c3fSmrg 127601e04c3fSmrgstatic void 12777ec681f3Smrgcreate_bview(struct radv_cmd_buffer *cmd_buffer, struct radv_buffer *buffer, unsigned offset, 12787ec681f3Smrg VkFormat format, struct radv_buffer_view *bview) 127901e04c3fSmrg{ 12807ec681f3Smrg radv_buffer_view_init(bview, cmd_buffer->device, 12817ec681f3Smrg &(VkBufferViewCreateInfo){ 12827ec681f3Smrg .sType = VK_STRUCTURE_TYPE_BUFFER_VIEW_CREATE_INFO, 12837ec681f3Smrg .flags = 0, 12847ec681f3Smrg .buffer = radv_buffer_to_handle(buffer), 12857ec681f3Smrg .format = format, 12867ec681f3Smrg .offset = offset, 12877ec681f3Smrg .range = VK_WHOLE_SIZE, 12887ec681f3Smrg }); 128901e04c3fSmrg} 129001e04c3fSmrg 129101e04c3fSmrgstatic void 12927ec681f3Smrgcreate_buffer_from_image(struct radv_cmd_buffer *cmd_buffer, struct radv_meta_blit2d_surf *surf, 12937ec681f3Smrg VkBufferUsageFlagBits usage, VkBuffer *buffer) 129401e04c3fSmrg{ 12957ec681f3Smrg struct radv_device *device = cmd_buffer->device; 12967ec681f3Smrg struct radv_device_memory mem; 12977ec681f3Smrg 12987ec681f3Smrg radv_device_memory_init(&mem, device, surf->image->bo); 12997ec681f3Smrg 13007ec681f3Smrg radv_CreateBuffer(radv_device_to_handle(device), 13017ec681f3Smrg &(VkBufferCreateInfo){ 13027ec681f3Smrg .sType = VK_STRUCTURE_TYPE_BUFFER_CREATE_INFO, 13037ec681f3Smrg .flags = 0, 13047ec681f3Smrg .size = surf->image->size, 13057ec681f3Smrg .usage = usage, 13067ec681f3Smrg .sharingMode = VK_SHARING_MODE_EXCLUSIVE, 13077ec681f3Smrg }, 13087ec681f3Smrg NULL, buffer); 13097ec681f3Smrg 13107ec681f3Smrg radv_BindBufferMemory2(radv_device_to_handle(device), 1, 13117ec681f3Smrg (VkBindBufferMemoryInfo[]){{ 13127ec681f3Smrg .sType = VK_STRUCTURE_TYPE_BIND_BUFFER_MEMORY_INFO, 13137ec681f3Smrg .buffer = *buffer, 13147ec681f3Smrg .memory = radv_device_memory_to_handle(&mem), 13157ec681f3Smrg .memoryOffset = surf->image->offset, 13167ec681f3Smrg }}); 13177ec681f3Smrg 13187ec681f3Smrg radv_device_memory_finish(&mem); 131901e04c3fSmrg} 132001e04c3fSmrg 132101e04c3fSmrgstatic void 13227ec681f3Smrgcreate_bview_for_r32g32b32(struct radv_cmd_buffer *cmd_buffer, struct radv_buffer *buffer, 13237ec681f3Smrg unsigned offset, VkFormat src_format, struct radv_buffer_view *bview) 132401e04c3fSmrg{ 13257ec681f3Smrg VkFormat format; 13267ec681f3Smrg 13277ec681f3Smrg switch (src_format) { 13287ec681f3Smrg case VK_FORMAT_R32G32B32_UINT: 13297ec681f3Smrg format = VK_FORMAT_R32_UINT; 13307ec681f3Smrg break; 13317ec681f3Smrg case VK_FORMAT_R32G32B32_SINT: 13327ec681f3Smrg format = VK_FORMAT_R32_SINT; 13337ec681f3Smrg break; 13347ec681f3Smrg case VK_FORMAT_R32G32B32_SFLOAT: 13357ec681f3Smrg format = VK_FORMAT_R32_SFLOAT; 13367ec681f3Smrg break; 13377ec681f3Smrg default: 13387ec681f3Smrg unreachable("invalid R32G32B32 format"); 13397ec681f3Smrg } 13407ec681f3Smrg 13417ec681f3Smrg radv_buffer_view_init(bview, cmd_buffer->device, 13427ec681f3Smrg &(VkBufferViewCreateInfo){ 13437ec681f3Smrg .sType = VK_STRUCTURE_TYPE_BUFFER_VIEW_CREATE_INFO, 13447ec681f3Smrg .flags = 0, 13457ec681f3Smrg .buffer = radv_buffer_to_handle(buffer), 13467ec681f3Smrg .format = format, 13477ec681f3Smrg .offset = offset, 13487ec681f3Smrg .range = VK_WHOLE_SIZE, 13497ec681f3Smrg }); 135001e04c3fSmrg} 135101e04c3fSmrg 135201e04c3fSmrgstatic unsigned 135301e04c3fSmrgget_image_stride_for_r32g32b32(struct radv_cmd_buffer *cmd_buffer, 13547ec681f3Smrg struct radv_meta_blit2d_surf *surf) 135501e04c3fSmrg{ 13567ec681f3Smrg unsigned stride; 135701e04c3fSmrg 13587ec681f3Smrg if (cmd_buffer->device->physical_device->rad_info.chip_class >= GFX9) { 13597ec681f3Smrg stride = surf->image->planes[0].surface.u.gfx9.surf_pitch; 13607ec681f3Smrg } else { 13617ec681f3Smrg stride = surf->image->planes[0].surface.u.legacy.level[0].nblk_x * 3; 13627ec681f3Smrg } 136301e04c3fSmrg 13647ec681f3Smrg return stride; 136501e04c3fSmrg} 136601e04c3fSmrg 136701e04c3fSmrgstatic void 13687ec681f3Smrgitob_bind_descriptors(struct radv_cmd_buffer *cmd_buffer, struct radv_image_view *src, 13697ec681f3Smrg struct radv_buffer_view *dst) 137001e04c3fSmrg{ 13717ec681f3Smrg struct radv_device *device = cmd_buffer->device; 13727ec681f3Smrg 13737ec681f3Smrg radv_meta_push_descriptor_set( 13747ec681f3Smrg cmd_buffer, VK_PIPELINE_BIND_POINT_COMPUTE, device->meta_state.itob.img_p_layout, 0, /* set */ 13757ec681f3Smrg 2, /* descriptorWriteCount */ 13767ec681f3Smrg (VkWriteDescriptorSet[]){ 13777ec681f3Smrg {.sType = VK_STRUCTURE_TYPE_WRITE_DESCRIPTOR_SET, 13787ec681f3Smrg .dstBinding = 0, 13797ec681f3Smrg .dstArrayElement = 0, 13807ec681f3Smrg .descriptorCount = 1, 13817ec681f3Smrg .descriptorType = VK_DESCRIPTOR_TYPE_SAMPLED_IMAGE, 13827ec681f3Smrg .pImageInfo = 13837ec681f3Smrg (VkDescriptorImageInfo[]){ 13847ec681f3Smrg { 13857ec681f3Smrg .sampler = VK_NULL_HANDLE, 13867ec681f3Smrg .imageView = radv_image_view_to_handle(src), 13877ec681f3Smrg .imageLayout = VK_IMAGE_LAYOUT_GENERAL, 13887ec681f3Smrg }, 13897ec681f3Smrg }}, 13907ec681f3Smrg { 13917ec681f3Smrg .sType = VK_STRUCTURE_TYPE_WRITE_DESCRIPTOR_SET, 13927ec681f3Smrg .dstBinding = 1, 13937ec681f3Smrg .dstArrayElement = 0, 13947ec681f3Smrg .descriptorCount = 1, 13957ec681f3Smrg .descriptorType = VK_DESCRIPTOR_TYPE_STORAGE_TEXEL_BUFFER, 13967ec681f3Smrg .pTexelBufferView = (VkBufferView[]){radv_buffer_view_to_handle(dst)}, 13977ec681f3Smrg }}); 139801e04c3fSmrg} 139901e04c3fSmrg 140001e04c3fSmrgvoid 14017ec681f3Smrgradv_meta_image_to_buffer(struct radv_cmd_buffer *cmd_buffer, struct radv_meta_blit2d_surf *src, 14027ec681f3Smrg struct radv_meta_blit2d_buffer *dst, unsigned num_rects, 14037ec681f3Smrg struct radv_meta_blit2d_rect *rects) 140401e04c3fSmrg{ 14057ec681f3Smrg VkPipeline pipeline = cmd_buffer->device->meta_state.itob.pipeline; 14067ec681f3Smrg struct radv_device *device = cmd_buffer->device; 14077ec681f3Smrg struct radv_image_view src_view; 14087ec681f3Smrg struct radv_buffer_view dst_view; 14097ec681f3Smrg 14107ec681f3Smrg create_iview(cmd_buffer, src, &src_view, VK_FORMAT_UNDEFINED, src->aspect_mask); 14117ec681f3Smrg create_bview(cmd_buffer, dst->buffer, dst->offset, dst->format, &dst_view); 14127ec681f3Smrg itob_bind_descriptors(cmd_buffer, &src_view, &dst_view); 14137ec681f3Smrg 14147ec681f3Smrg if (device->physical_device->rad_info.chip_class >= GFX9 && src->image->type == VK_IMAGE_TYPE_3D) 14157ec681f3Smrg pipeline = cmd_buffer->device->meta_state.itob.pipeline_3d; 14167ec681f3Smrg 14177ec681f3Smrg radv_CmdBindPipeline(radv_cmd_buffer_to_handle(cmd_buffer), VK_PIPELINE_BIND_POINT_COMPUTE, 14187ec681f3Smrg pipeline); 14197ec681f3Smrg 14207ec681f3Smrg for (unsigned r = 0; r < num_rects; ++r) { 14217ec681f3Smrg unsigned push_constants[4] = {rects[r].src_x, rects[r].src_y, src->layer, dst->pitch}; 14227ec681f3Smrg radv_CmdPushConstants(radv_cmd_buffer_to_handle(cmd_buffer), 14237ec681f3Smrg device->meta_state.itob.img_p_layout, VK_SHADER_STAGE_COMPUTE_BIT, 0, 14247ec681f3Smrg 16, push_constants); 14257ec681f3Smrg 14267ec681f3Smrg radv_unaligned_dispatch(cmd_buffer, rects[r].width, rects[r].height, 1); 14277ec681f3Smrg } 14287ec681f3Smrg 14297ec681f3Smrg radv_image_view_finish(&src_view); 14307ec681f3Smrg radv_buffer_view_finish(&dst_view); 143101e04c3fSmrg} 143201e04c3fSmrg 143301e04c3fSmrgstatic void 14347ec681f3Smrgbtoi_r32g32b32_bind_descriptors(struct radv_cmd_buffer *cmd_buffer, struct radv_buffer_view *src, 14357ec681f3Smrg struct radv_buffer_view *dst) 143601e04c3fSmrg{ 14377ec681f3Smrg struct radv_device *device = cmd_buffer->device; 14387ec681f3Smrg 14397ec681f3Smrg radv_meta_push_descriptor_set( 14407ec681f3Smrg cmd_buffer, VK_PIPELINE_BIND_POINT_COMPUTE, device->meta_state.btoi_r32g32b32.img_p_layout, 14417ec681f3Smrg 0, /* set */ 14427ec681f3Smrg 2, /* descriptorWriteCount */ 14437ec681f3Smrg (VkWriteDescriptorSet[]){ 14447ec681f3Smrg { 14457ec681f3Smrg .sType = VK_STRUCTURE_TYPE_WRITE_DESCRIPTOR_SET, 14467ec681f3Smrg .dstBinding = 0, 14477ec681f3Smrg .dstArrayElement = 0, 14487ec681f3Smrg .descriptorCount = 1, 14497ec681f3Smrg .descriptorType = VK_DESCRIPTOR_TYPE_UNIFORM_TEXEL_BUFFER, 14507ec681f3Smrg .pTexelBufferView = (VkBufferView[]){radv_buffer_view_to_handle(src)}, 14517ec681f3Smrg }, 14527ec681f3Smrg { 14537ec681f3Smrg .sType = VK_STRUCTURE_TYPE_WRITE_DESCRIPTOR_SET, 14547ec681f3Smrg .dstBinding = 1, 14557ec681f3Smrg .dstArrayElement = 0, 14567ec681f3Smrg .descriptorCount = 1, 14577ec681f3Smrg .descriptorType = VK_DESCRIPTOR_TYPE_STORAGE_TEXEL_BUFFER, 14587ec681f3Smrg .pTexelBufferView = (VkBufferView[]){radv_buffer_view_to_handle(dst)}, 14597ec681f3Smrg }}); 146001e04c3fSmrg} 146101e04c3fSmrg 146201e04c3fSmrgstatic void 146301e04c3fSmrgradv_meta_buffer_to_image_cs_r32g32b32(struct radv_cmd_buffer *cmd_buffer, 14647ec681f3Smrg struct radv_meta_blit2d_buffer *src, 14657ec681f3Smrg struct radv_meta_blit2d_surf *dst, unsigned num_rects, 14667ec681f3Smrg struct radv_meta_blit2d_rect *rects) 146701e04c3fSmrg{ 14687ec681f3Smrg VkPipeline pipeline = cmd_buffer->device->meta_state.btoi_r32g32b32.pipeline; 14697ec681f3Smrg struct radv_device *device = cmd_buffer->device; 14707ec681f3Smrg struct radv_buffer_view src_view, dst_view; 14717ec681f3Smrg unsigned dst_offset = 0; 14727ec681f3Smrg unsigned stride; 14737ec681f3Smrg VkBuffer buffer; 14747ec681f3Smrg 14757ec681f3Smrg /* This special btoi path for R32G32B32 formats will write the linear 14767ec681f3Smrg * image as a buffer with the same underlying memory. The compute 14777ec681f3Smrg * shader will copy all components separately using a R32 format. 14787ec681f3Smrg */ 14797ec681f3Smrg create_buffer_from_image(cmd_buffer, dst, VK_BUFFER_USAGE_STORAGE_TEXEL_BUFFER_BIT, &buffer); 14807ec681f3Smrg 14817ec681f3Smrg create_bview(cmd_buffer, src->buffer, src->offset, src->format, &src_view); 14827ec681f3Smrg create_bview_for_r32g32b32(cmd_buffer, radv_buffer_from_handle(buffer), dst_offset, dst->format, 14837ec681f3Smrg &dst_view); 14847ec681f3Smrg btoi_r32g32b32_bind_descriptors(cmd_buffer, &src_view, &dst_view); 14857ec681f3Smrg 14867ec681f3Smrg radv_CmdBindPipeline(radv_cmd_buffer_to_handle(cmd_buffer), VK_PIPELINE_BIND_POINT_COMPUTE, 14877ec681f3Smrg pipeline); 14887ec681f3Smrg 14897ec681f3Smrg stride = get_image_stride_for_r32g32b32(cmd_buffer, dst); 14907ec681f3Smrg 14917ec681f3Smrg for (unsigned r = 0; r < num_rects; ++r) { 14927ec681f3Smrg unsigned push_constants[4] = { 14937ec681f3Smrg rects[r].dst_x, 14947ec681f3Smrg rects[r].dst_y, 14957ec681f3Smrg stride, 14967ec681f3Smrg src->pitch, 14977ec681f3Smrg }; 14987ec681f3Smrg 14997ec681f3Smrg radv_CmdPushConstants(radv_cmd_buffer_to_handle(cmd_buffer), 15007ec681f3Smrg device->meta_state.btoi_r32g32b32.img_p_layout, 15017ec681f3Smrg VK_SHADER_STAGE_COMPUTE_BIT, 0, 16, push_constants); 15027ec681f3Smrg 15037ec681f3Smrg radv_unaligned_dispatch(cmd_buffer, rects[r].width, rects[r].height, 1); 15047ec681f3Smrg } 15057ec681f3Smrg 15067ec681f3Smrg radv_buffer_view_finish(&src_view); 15077ec681f3Smrg radv_buffer_view_finish(&dst_view); 15087ec681f3Smrg radv_DestroyBuffer(radv_device_to_handle(device), buffer, NULL); 150901e04c3fSmrg} 151001e04c3fSmrg 151101e04c3fSmrgstatic void 15127ec681f3Smrgbtoi_bind_descriptors(struct radv_cmd_buffer *cmd_buffer, struct radv_buffer_view *src, 15137ec681f3Smrg struct radv_image_view *dst) 151401e04c3fSmrg{ 15157ec681f3Smrg struct radv_device *device = cmd_buffer->device; 15167ec681f3Smrg 15177ec681f3Smrg radv_meta_push_descriptor_set( 15187ec681f3Smrg cmd_buffer, VK_PIPELINE_BIND_POINT_COMPUTE, device->meta_state.btoi.img_p_layout, 0, /* set */ 15197ec681f3Smrg 2, /* descriptorWriteCount */ 15207ec681f3Smrg (VkWriteDescriptorSet[]){ 15217ec681f3Smrg { 15227ec681f3Smrg .sType = VK_STRUCTURE_TYPE_WRITE_DESCRIPTOR_SET, 15237ec681f3Smrg .dstBinding = 0, 15247ec681f3Smrg .dstArrayElement = 0, 15257ec681f3Smrg .descriptorCount = 1, 15267ec681f3Smrg .descriptorType = VK_DESCRIPTOR_TYPE_STORAGE_TEXEL_BUFFER, 15277ec681f3Smrg .pTexelBufferView = (VkBufferView[]){radv_buffer_view_to_handle(src)}, 15287ec681f3Smrg }, 15297ec681f3Smrg {.sType = VK_STRUCTURE_TYPE_WRITE_DESCRIPTOR_SET, 15307ec681f3Smrg .dstBinding = 1, 15317ec681f3Smrg .dstArrayElement = 0, 15327ec681f3Smrg .descriptorCount = 1, 15337ec681f3Smrg .descriptorType = VK_DESCRIPTOR_TYPE_STORAGE_IMAGE, 15347ec681f3Smrg .pImageInfo = (VkDescriptorImageInfo[]){ 15357ec681f3Smrg { 15367ec681f3Smrg .sampler = VK_NULL_HANDLE, 15377ec681f3Smrg .imageView = radv_image_view_to_handle(dst), 15387ec681f3Smrg .imageLayout = VK_IMAGE_LAYOUT_GENERAL, 15397ec681f3Smrg }, 15407ec681f3Smrg }}}); 154101e04c3fSmrg} 154201e04c3fSmrg 154301e04c3fSmrgvoid 154401e04c3fSmrgradv_meta_buffer_to_image_cs(struct radv_cmd_buffer *cmd_buffer, 15457ec681f3Smrg struct radv_meta_blit2d_buffer *src, struct radv_meta_blit2d_surf *dst, 15467ec681f3Smrg unsigned num_rects, struct radv_meta_blit2d_rect *rects) 154701e04c3fSmrg{ 15487ec681f3Smrg VkPipeline pipeline = cmd_buffer->device->meta_state.btoi.pipeline; 15497ec681f3Smrg struct radv_device *device = cmd_buffer->device; 15507ec681f3Smrg struct radv_buffer_view src_view; 15517ec681f3Smrg struct radv_image_view dst_view; 15527ec681f3Smrg 15537ec681f3Smrg if (dst->image->vk_format == VK_FORMAT_R32G32B32_UINT || 15547ec681f3Smrg dst->image->vk_format == VK_FORMAT_R32G32B32_SINT || 15557ec681f3Smrg dst->image->vk_format == VK_FORMAT_R32G32B32_SFLOAT) { 15567ec681f3Smrg radv_meta_buffer_to_image_cs_r32g32b32(cmd_buffer, src, dst, num_rects, rects); 15577ec681f3Smrg return; 15587ec681f3Smrg } 15597ec681f3Smrg 15607ec681f3Smrg create_bview(cmd_buffer, src->buffer, src->offset, src->format, &src_view); 15617ec681f3Smrg create_iview(cmd_buffer, dst, &dst_view, VK_FORMAT_UNDEFINED, dst->aspect_mask); 15627ec681f3Smrg btoi_bind_descriptors(cmd_buffer, &src_view, &dst_view); 15637ec681f3Smrg 15647ec681f3Smrg if (device->physical_device->rad_info.chip_class >= GFX9 && dst->image->type == VK_IMAGE_TYPE_3D) 15657ec681f3Smrg pipeline = cmd_buffer->device->meta_state.btoi.pipeline_3d; 15667ec681f3Smrg radv_CmdBindPipeline(radv_cmd_buffer_to_handle(cmd_buffer), VK_PIPELINE_BIND_POINT_COMPUTE, 15677ec681f3Smrg pipeline); 15687ec681f3Smrg 15697ec681f3Smrg for (unsigned r = 0; r < num_rects; ++r) { 15707ec681f3Smrg unsigned push_constants[4] = { 15717ec681f3Smrg rects[r].dst_x, 15727ec681f3Smrg rects[r].dst_y, 15737ec681f3Smrg dst->layer, 15747ec681f3Smrg src->pitch, 15757ec681f3Smrg }; 15767ec681f3Smrg radv_CmdPushConstants(radv_cmd_buffer_to_handle(cmd_buffer), 15777ec681f3Smrg device->meta_state.btoi.img_p_layout, VK_SHADER_STAGE_COMPUTE_BIT, 0, 15787ec681f3Smrg 16, push_constants); 15797ec681f3Smrg 15807ec681f3Smrg radv_unaligned_dispatch(cmd_buffer, rects[r].width, rects[r].height, 1); 15817ec681f3Smrg } 15827ec681f3Smrg 15837ec681f3Smrg radv_image_view_finish(&dst_view); 15847ec681f3Smrg radv_buffer_view_finish(&src_view); 158501e04c3fSmrg} 158601e04c3fSmrg 158701e04c3fSmrgstatic void 15887ec681f3Smrgitoi_r32g32b32_bind_descriptors(struct radv_cmd_buffer *cmd_buffer, struct radv_buffer_view *src, 15897ec681f3Smrg struct radv_buffer_view *dst) 159001e04c3fSmrg{ 15917ec681f3Smrg struct radv_device *device = cmd_buffer->device; 15927ec681f3Smrg 15937ec681f3Smrg radv_meta_push_descriptor_set( 15947ec681f3Smrg cmd_buffer, VK_PIPELINE_BIND_POINT_COMPUTE, device->meta_state.itoi_r32g32b32.img_p_layout, 15957ec681f3Smrg 0, /* set */ 15967ec681f3Smrg 2, /* descriptorWriteCount */ 15977ec681f3Smrg (VkWriteDescriptorSet[]){ 15987ec681f3Smrg { 15997ec681f3Smrg .sType = VK_STRUCTURE_TYPE_WRITE_DESCRIPTOR_SET, 16007ec681f3Smrg .dstBinding = 0, 16017ec681f3Smrg .dstArrayElement = 0, 16027ec681f3Smrg .descriptorCount = 1, 16037ec681f3Smrg .descriptorType = VK_DESCRIPTOR_TYPE_UNIFORM_TEXEL_BUFFER, 16047ec681f3Smrg .pTexelBufferView = (VkBufferView[]){radv_buffer_view_to_handle(src)}, 16057ec681f3Smrg }, 16067ec681f3Smrg { 16077ec681f3Smrg .sType = VK_STRUCTURE_TYPE_WRITE_DESCRIPTOR_SET, 16087ec681f3Smrg .dstBinding = 1, 16097ec681f3Smrg .dstArrayElement = 0, 16107ec681f3Smrg .descriptorCount = 1, 16117ec681f3Smrg .descriptorType = VK_DESCRIPTOR_TYPE_STORAGE_TEXEL_BUFFER, 16127ec681f3Smrg .pTexelBufferView = (VkBufferView[]){radv_buffer_view_to_handle(dst)}, 16137ec681f3Smrg }}); 161401e04c3fSmrg} 161501e04c3fSmrg 161601e04c3fSmrgstatic void 161701e04c3fSmrgradv_meta_image_to_image_cs_r32g32b32(struct radv_cmd_buffer *cmd_buffer, 16187ec681f3Smrg struct radv_meta_blit2d_surf *src, 16197ec681f3Smrg struct radv_meta_blit2d_surf *dst, unsigned num_rects, 16207ec681f3Smrg struct radv_meta_blit2d_rect *rects) 162101e04c3fSmrg{ 16227ec681f3Smrg VkPipeline pipeline = cmd_buffer->device->meta_state.itoi_r32g32b32.pipeline; 16237ec681f3Smrg struct radv_device *device = cmd_buffer->device; 16247ec681f3Smrg struct radv_buffer_view src_view, dst_view; 16257ec681f3Smrg unsigned src_offset = 0, dst_offset = 0; 16267ec681f3Smrg unsigned src_stride, dst_stride; 16277ec681f3Smrg VkBuffer src_buffer, dst_buffer; 16287ec681f3Smrg 16297ec681f3Smrg /* 96-bit formats are only compatible to themselves. */ 16307ec681f3Smrg assert(dst->format == VK_FORMAT_R32G32B32_UINT || dst->format == VK_FORMAT_R32G32B32_SINT || 16317ec681f3Smrg dst->format == VK_FORMAT_R32G32B32_SFLOAT); 16327ec681f3Smrg 16337ec681f3Smrg /* This special itoi path for R32G32B32 formats will write the linear 16347ec681f3Smrg * image as a buffer with the same underlying memory. The compute 16357ec681f3Smrg * shader will copy all components separately using a R32 format. 16367ec681f3Smrg */ 16377ec681f3Smrg create_buffer_from_image(cmd_buffer, src, VK_BUFFER_USAGE_UNIFORM_TEXEL_BUFFER_BIT, &src_buffer); 16387ec681f3Smrg create_buffer_from_image(cmd_buffer, dst, VK_BUFFER_USAGE_STORAGE_TEXEL_BUFFER_BIT, &dst_buffer); 16397ec681f3Smrg 16407ec681f3Smrg create_bview_for_r32g32b32(cmd_buffer, radv_buffer_from_handle(src_buffer), src_offset, 16417ec681f3Smrg src->format, &src_view); 16427ec681f3Smrg create_bview_for_r32g32b32(cmd_buffer, radv_buffer_from_handle(dst_buffer), dst_offset, 16437ec681f3Smrg dst->format, &dst_view); 16447ec681f3Smrg itoi_r32g32b32_bind_descriptors(cmd_buffer, &src_view, &dst_view); 16457ec681f3Smrg 16467ec681f3Smrg radv_CmdBindPipeline(radv_cmd_buffer_to_handle(cmd_buffer), VK_PIPELINE_BIND_POINT_COMPUTE, 16477ec681f3Smrg pipeline); 16487ec681f3Smrg 16497ec681f3Smrg src_stride = get_image_stride_for_r32g32b32(cmd_buffer, src); 16507ec681f3Smrg dst_stride = get_image_stride_for_r32g32b32(cmd_buffer, dst); 16517ec681f3Smrg 16527ec681f3Smrg for (unsigned r = 0; r < num_rects; ++r) { 16537ec681f3Smrg unsigned push_constants[6] = { 16547ec681f3Smrg rects[r].src_x, rects[r].src_y, src_stride, rects[r].dst_x, rects[r].dst_y, dst_stride, 16557ec681f3Smrg }; 16567ec681f3Smrg radv_CmdPushConstants(radv_cmd_buffer_to_handle(cmd_buffer), 16577ec681f3Smrg device->meta_state.itoi_r32g32b32.img_p_layout, 16587ec681f3Smrg VK_SHADER_STAGE_COMPUTE_BIT, 0, 24, push_constants); 16597ec681f3Smrg 16607ec681f3Smrg radv_unaligned_dispatch(cmd_buffer, rects[r].width, rects[r].height, 1); 16617ec681f3Smrg } 16627ec681f3Smrg 16637ec681f3Smrg radv_buffer_view_finish(&src_view); 16647ec681f3Smrg radv_buffer_view_finish(&dst_view); 16657ec681f3Smrg radv_DestroyBuffer(radv_device_to_handle(device), src_buffer, NULL); 16667ec681f3Smrg radv_DestroyBuffer(radv_device_to_handle(device), dst_buffer, NULL); 166701e04c3fSmrg} 166801e04c3fSmrg 166901e04c3fSmrgstatic void 16707ec681f3Smrgitoi_bind_descriptors(struct radv_cmd_buffer *cmd_buffer, struct radv_image_view *src, 16717ec681f3Smrg struct radv_image_view *dst) 167201e04c3fSmrg{ 16737ec681f3Smrg struct radv_device *device = cmd_buffer->device; 16747ec681f3Smrg 16757ec681f3Smrg radv_meta_push_descriptor_set( 16767ec681f3Smrg cmd_buffer, VK_PIPELINE_BIND_POINT_COMPUTE, device->meta_state.itoi.img_p_layout, 0, /* set */ 16777ec681f3Smrg 2, /* descriptorWriteCount */ 16787ec681f3Smrg (VkWriteDescriptorSet[]){{.sType = VK_STRUCTURE_TYPE_WRITE_DESCRIPTOR_SET, 16797ec681f3Smrg .dstBinding = 0, 16807ec681f3Smrg .dstArrayElement = 0, 16817ec681f3Smrg .descriptorCount = 1, 16827ec681f3Smrg .descriptorType = VK_DESCRIPTOR_TYPE_SAMPLED_IMAGE, 16837ec681f3Smrg .pImageInfo = 16847ec681f3Smrg (VkDescriptorImageInfo[]){ 16857ec681f3Smrg { 16867ec681f3Smrg .sampler = VK_NULL_HANDLE, 16877ec681f3Smrg .imageView = radv_image_view_to_handle(src), 16887ec681f3Smrg .imageLayout = VK_IMAGE_LAYOUT_GENERAL, 16897ec681f3Smrg }, 16907ec681f3Smrg }}, 16917ec681f3Smrg {.sType = VK_STRUCTURE_TYPE_WRITE_DESCRIPTOR_SET, 16927ec681f3Smrg .dstBinding = 1, 16937ec681f3Smrg .dstArrayElement = 0, 16947ec681f3Smrg .descriptorCount = 1, 16957ec681f3Smrg .descriptorType = VK_DESCRIPTOR_TYPE_STORAGE_IMAGE, 16967ec681f3Smrg .pImageInfo = (VkDescriptorImageInfo[]){ 16977ec681f3Smrg { 16987ec681f3Smrg .sampler = VK_NULL_HANDLE, 16997ec681f3Smrg .imageView = radv_image_view_to_handle(dst), 17007ec681f3Smrg .imageLayout = VK_IMAGE_LAYOUT_GENERAL, 17017ec681f3Smrg }, 17027ec681f3Smrg }}}); 170301e04c3fSmrg} 170401e04c3fSmrg 170501e04c3fSmrgvoid 17067ec681f3Smrgradv_meta_image_to_image_cs(struct radv_cmd_buffer *cmd_buffer, struct radv_meta_blit2d_surf *src, 17077ec681f3Smrg struct radv_meta_blit2d_surf *dst, unsigned num_rects, 17087ec681f3Smrg struct radv_meta_blit2d_rect *rects) 170901e04c3fSmrg{ 17107ec681f3Smrg struct radv_device *device = cmd_buffer->device; 17117ec681f3Smrg struct radv_image_view src_view, dst_view; 17127ec681f3Smrg uint32_t samples = src->image->info.samples; 17137ec681f3Smrg uint32_t samples_log2 = ffs(samples) - 1; 17147ec681f3Smrg 17157ec681f3Smrg if (src->format == VK_FORMAT_R32G32B32_UINT || src->format == VK_FORMAT_R32G32B32_SINT || 17167ec681f3Smrg src->format == VK_FORMAT_R32G32B32_SFLOAT) { 17177ec681f3Smrg radv_meta_image_to_image_cs_r32g32b32(cmd_buffer, src, dst, num_rects, rects); 17187ec681f3Smrg return; 17197ec681f3Smrg } 17207ec681f3Smrg 17217ec681f3Smrg u_foreach_bit(i, dst->aspect_mask) { 17227ec681f3Smrg unsigned aspect_mask = 1u << i; 17237ec681f3Smrg VkFormat depth_format = 0; 17247ec681f3Smrg if (aspect_mask == VK_IMAGE_ASPECT_STENCIL_BIT) 17257ec681f3Smrg depth_format = vk_format_stencil_only(dst->image->vk_format); 17267ec681f3Smrg else if (aspect_mask == VK_IMAGE_ASPECT_DEPTH_BIT) 17277ec681f3Smrg depth_format = vk_format_depth_only(dst->image->vk_format); 17287ec681f3Smrg 17297ec681f3Smrg create_iview(cmd_buffer, src, &src_view, depth_format, aspect_mask); 17307ec681f3Smrg create_iview(cmd_buffer, dst, &dst_view, depth_format, aspect_mask); 17317ec681f3Smrg 17327ec681f3Smrg itoi_bind_descriptors(cmd_buffer, &src_view, &dst_view); 17337ec681f3Smrg 17347ec681f3Smrg VkPipeline pipeline = cmd_buffer->device->meta_state.itoi.pipeline[samples_log2]; 17357ec681f3Smrg if (device->physical_device->rad_info.chip_class >= GFX9 && 17367ec681f3Smrg (src->image->type == VK_IMAGE_TYPE_3D || dst->image->type == VK_IMAGE_TYPE_3D)) 17377ec681f3Smrg pipeline = cmd_buffer->device->meta_state.itoi.pipeline_3d; 17387ec681f3Smrg radv_CmdBindPipeline(radv_cmd_buffer_to_handle(cmd_buffer), VK_PIPELINE_BIND_POINT_COMPUTE, 17397ec681f3Smrg pipeline); 17407ec681f3Smrg 17417ec681f3Smrg for (unsigned r = 0; r < num_rects; ++r) { 17427ec681f3Smrg unsigned push_constants[6] = { 17437ec681f3Smrg rects[r].src_x, rects[r].src_y, src->layer, rects[r].dst_x, rects[r].dst_y, dst->layer, 17447ec681f3Smrg }; 17457ec681f3Smrg radv_CmdPushConstants(radv_cmd_buffer_to_handle(cmd_buffer), 17467ec681f3Smrg device->meta_state.itoi.img_p_layout, VK_SHADER_STAGE_COMPUTE_BIT, 0, 17477ec681f3Smrg 24, push_constants); 17487ec681f3Smrg 17497ec681f3Smrg radv_unaligned_dispatch(cmd_buffer, rects[r].width, rects[r].height, 1); 17507ec681f3Smrg } 17517ec681f3Smrg 17527ec681f3Smrg radv_image_view_finish(&src_view); 17537ec681f3Smrg radv_image_view_finish(&dst_view); 17547ec681f3Smrg } 175501e04c3fSmrg} 175601e04c3fSmrg 175701e04c3fSmrgstatic void 17587ec681f3Smrgcleari_r32g32b32_bind_descriptors(struct radv_cmd_buffer *cmd_buffer, struct radv_buffer_view *view) 175901e04c3fSmrg{ 17607ec681f3Smrg struct radv_device *device = cmd_buffer->device; 17617ec681f3Smrg 17627ec681f3Smrg radv_meta_push_descriptor_set( 17637ec681f3Smrg cmd_buffer, VK_PIPELINE_BIND_POINT_COMPUTE, device->meta_state.cleari_r32g32b32.img_p_layout, 17647ec681f3Smrg 0, /* set */ 17657ec681f3Smrg 1, /* descriptorWriteCount */ 17667ec681f3Smrg (VkWriteDescriptorSet[]){{ 17677ec681f3Smrg .sType = VK_STRUCTURE_TYPE_WRITE_DESCRIPTOR_SET, 17687ec681f3Smrg .dstBinding = 0, 17697ec681f3Smrg .dstArrayElement = 0, 17707ec681f3Smrg .descriptorCount = 1, 17717ec681f3Smrg .descriptorType = VK_DESCRIPTOR_TYPE_STORAGE_TEXEL_BUFFER, 17727ec681f3Smrg .pTexelBufferView = (VkBufferView[]){radv_buffer_view_to_handle(view)}, 17737ec681f3Smrg }}); 177401e04c3fSmrg} 177501e04c3fSmrg 177601e04c3fSmrgstatic void 177701e04c3fSmrgradv_meta_clear_image_cs_r32g32b32(struct radv_cmd_buffer *cmd_buffer, 17787ec681f3Smrg struct radv_meta_blit2d_surf *dst, 17797ec681f3Smrg const VkClearColorValue *clear_color) 178001e04c3fSmrg{ 17817ec681f3Smrg VkPipeline pipeline = cmd_buffer->device->meta_state.cleari_r32g32b32.pipeline; 17827ec681f3Smrg struct radv_device *device = cmd_buffer->device; 17837ec681f3Smrg struct radv_buffer_view dst_view; 17847ec681f3Smrg unsigned stride; 17857ec681f3Smrg VkBuffer buffer; 17867ec681f3Smrg 17877ec681f3Smrg /* This special clear path for R32G32B32 formats will write the linear 17887ec681f3Smrg * image as a buffer with the same underlying memory. The compute 17897ec681f3Smrg * shader will clear all components separately using a R32 format. 17907ec681f3Smrg */ 17917ec681f3Smrg create_buffer_from_image(cmd_buffer, dst, VK_BUFFER_USAGE_STORAGE_TEXEL_BUFFER_BIT, &buffer); 17927ec681f3Smrg 17937ec681f3Smrg create_bview_for_r32g32b32(cmd_buffer, radv_buffer_from_handle(buffer), 0, dst->format, 17947ec681f3Smrg &dst_view); 17957ec681f3Smrg cleari_r32g32b32_bind_descriptors(cmd_buffer, &dst_view); 17967ec681f3Smrg 17977ec681f3Smrg radv_CmdBindPipeline(radv_cmd_buffer_to_handle(cmd_buffer), VK_PIPELINE_BIND_POINT_COMPUTE, 17987ec681f3Smrg pipeline); 17997ec681f3Smrg 18007ec681f3Smrg stride = get_image_stride_for_r32g32b32(cmd_buffer, dst); 18017ec681f3Smrg 18027ec681f3Smrg unsigned push_constants[4] = { 18037ec681f3Smrg clear_color->uint32[0], 18047ec681f3Smrg clear_color->uint32[1], 18057ec681f3Smrg clear_color->uint32[2], 18067ec681f3Smrg stride, 18077ec681f3Smrg }; 18087ec681f3Smrg 18097ec681f3Smrg radv_CmdPushConstants(radv_cmd_buffer_to_handle(cmd_buffer), 18107ec681f3Smrg device->meta_state.cleari_r32g32b32.img_p_layout, 18117ec681f3Smrg VK_SHADER_STAGE_COMPUTE_BIT, 0, 16, push_constants); 18127ec681f3Smrg 18137ec681f3Smrg radv_unaligned_dispatch(cmd_buffer, dst->image->info.width, dst->image->info.height, 1); 18147ec681f3Smrg 18157ec681f3Smrg radv_buffer_view_finish(&dst_view); 18167ec681f3Smrg radv_DestroyBuffer(radv_device_to_handle(device), buffer, NULL); 181701e04c3fSmrg} 181801e04c3fSmrg 181901e04c3fSmrgstatic void 18207ec681f3Smrgcleari_bind_descriptors(struct radv_cmd_buffer *cmd_buffer, struct radv_image_view *dst_iview) 182101e04c3fSmrg{ 18227ec681f3Smrg struct radv_device *device = cmd_buffer->device; 18237ec681f3Smrg 18247ec681f3Smrg radv_meta_push_descriptor_set(cmd_buffer, VK_PIPELINE_BIND_POINT_COMPUTE, 18257ec681f3Smrg device->meta_state.cleari.img_p_layout, 0, /* set */ 18267ec681f3Smrg 1, /* descriptorWriteCount */ 18277ec681f3Smrg (VkWriteDescriptorSet[]){ 18287ec681f3Smrg {.sType = VK_STRUCTURE_TYPE_WRITE_DESCRIPTOR_SET, 18297ec681f3Smrg .dstBinding = 0, 18307ec681f3Smrg .dstArrayElement = 0, 18317ec681f3Smrg .descriptorCount = 1, 18327ec681f3Smrg .descriptorType = VK_DESCRIPTOR_TYPE_STORAGE_IMAGE, 18337ec681f3Smrg .pImageInfo = 18347ec681f3Smrg (VkDescriptorImageInfo[]){ 18357ec681f3Smrg { 18367ec681f3Smrg .sampler = VK_NULL_HANDLE, 18377ec681f3Smrg .imageView = radv_image_view_to_handle(dst_iview), 18387ec681f3Smrg .imageLayout = VK_IMAGE_LAYOUT_GENERAL, 18397ec681f3Smrg }, 18407ec681f3Smrg }}, 18417ec681f3Smrg }); 184201e04c3fSmrg} 184301e04c3fSmrg 184401e04c3fSmrgvoid 18457ec681f3Smrgradv_meta_clear_image_cs(struct radv_cmd_buffer *cmd_buffer, struct radv_meta_blit2d_surf *dst, 18467ec681f3Smrg const VkClearColorValue *clear_color) 184701e04c3fSmrg{ 18487ec681f3Smrg struct radv_device *device = cmd_buffer->device; 18497ec681f3Smrg struct radv_image_view dst_iview; 18507ec681f3Smrg uint32_t samples = dst->image->info.samples; 18517ec681f3Smrg uint32_t samples_log2 = ffs(samples) - 1; 18527ec681f3Smrg 18537ec681f3Smrg if (dst->format == VK_FORMAT_R32G32B32_UINT || dst->format == VK_FORMAT_R32G32B32_SINT || 18547ec681f3Smrg dst->format == VK_FORMAT_R32G32B32_SFLOAT) { 18557ec681f3Smrg radv_meta_clear_image_cs_r32g32b32(cmd_buffer, dst, clear_color); 18567ec681f3Smrg return; 18577ec681f3Smrg } 18587ec681f3Smrg 18597ec681f3Smrg create_iview(cmd_buffer, dst, &dst_iview, VK_FORMAT_UNDEFINED, dst->aspect_mask); 18607ec681f3Smrg cleari_bind_descriptors(cmd_buffer, &dst_iview); 18617ec681f3Smrg 18627ec681f3Smrg VkPipeline pipeline = cmd_buffer->device->meta_state.cleari.pipeline[samples_log2]; 18637ec681f3Smrg if (device->physical_device->rad_info.chip_class >= GFX9 && dst->image->type == VK_IMAGE_TYPE_3D) 18647ec681f3Smrg pipeline = cmd_buffer->device->meta_state.cleari.pipeline_3d; 18657ec681f3Smrg 18667ec681f3Smrg radv_CmdBindPipeline(radv_cmd_buffer_to_handle(cmd_buffer), VK_PIPELINE_BIND_POINT_COMPUTE, 18677ec681f3Smrg pipeline); 18687ec681f3Smrg 18697ec681f3Smrg unsigned push_constants[5] = { 18707ec681f3Smrg clear_color->uint32[0], 18717ec681f3Smrg clear_color->uint32[1], 18727ec681f3Smrg clear_color->uint32[2], 18737ec681f3Smrg clear_color->uint32[3], 18747ec681f3Smrg dst->layer, 18757ec681f3Smrg }; 18767ec681f3Smrg 18777ec681f3Smrg radv_CmdPushConstants(radv_cmd_buffer_to_handle(cmd_buffer), 18787ec681f3Smrg device->meta_state.cleari.img_p_layout, VK_SHADER_STAGE_COMPUTE_BIT, 0, 20, 18797ec681f3Smrg push_constants); 18807ec681f3Smrg 18817ec681f3Smrg radv_unaligned_dispatch(cmd_buffer, dst->image->info.width, dst->image->info.height, 1); 18827ec681f3Smrg 18837ec681f3Smrg radv_image_view_finish(&dst_iview); 188401e04c3fSmrg} 1885