1b8e80941Smrg#include "radv_meta.h" 2b8e80941Smrg#include "nir/nir_builder.h" 3b8e80941Smrg 4b8e80941Smrg#include "sid.h" 5b8e80941Smrg#include "radv_cs.h" 6b8e80941Smrg 7b8e80941Smrgstatic nir_shader * 8b8e80941Smrgbuild_buffer_fill_shader(struct radv_device *dev) 9b8e80941Smrg{ 10b8e80941Smrg nir_builder b; 11b8e80941Smrg 12b8e80941Smrg nir_builder_init_simple_shader(&b, NULL, MESA_SHADER_COMPUTE, NULL); 13b8e80941Smrg b.shader->info.name = ralloc_strdup(b.shader, "meta_buffer_fill"); 14b8e80941Smrg b.shader->info.cs.local_size[0] = 64; 15b8e80941Smrg b.shader->info.cs.local_size[1] = 1; 16b8e80941Smrg b.shader->info.cs.local_size[2] = 1; 17b8e80941Smrg 18b8e80941Smrg nir_ssa_def *invoc_id = nir_load_local_invocation_id(&b); 19b8e80941Smrg nir_ssa_def *wg_id = nir_load_work_group_id(&b); 20b8e80941Smrg nir_ssa_def *block_size = nir_imm_ivec4(&b, 21b8e80941Smrg b.shader->info.cs.local_size[0], 22b8e80941Smrg b.shader->info.cs.local_size[1], 23b8e80941Smrg b.shader->info.cs.local_size[2], 0); 24b8e80941Smrg 25b8e80941Smrg nir_ssa_def *global_id = nir_iadd(&b, nir_imul(&b, wg_id, block_size), invoc_id); 26b8e80941Smrg 27b8e80941Smrg nir_ssa_def *offset = nir_imul(&b, global_id, nir_imm_int(&b, 16)); 28b8e80941Smrg offset = nir_channel(&b, offset, 0); 29b8e80941Smrg 30b8e80941Smrg nir_intrinsic_instr *dst_buf = nir_intrinsic_instr_create(b.shader, 31b8e80941Smrg nir_intrinsic_vulkan_resource_index); 32b8e80941Smrg dst_buf->src[0] = nir_src_for_ssa(nir_imm_int(&b, 0)); 33b8e80941Smrg dst_buf->num_components = 1; 34b8e80941Smrg nir_intrinsic_set_desc_set(dst_buf, 0); 35b8e80941Smrg nir_intrinsic_set_binding(dst_buf, 0); 36b8e80941Smrg nir_ssa_dest_init(&dst_buf->instr, &dst_buf->dest, dst_buf->num_components, 32, NULL); 37b8e80941Smrg nir_builder_instr_insert(&b, &dst_buf->instr); 38b8e80941Smrg 39b8e80941Smrg nir_intrinsic_instr *load = nir_intrinsic_instr_create(b.shader, nir_intrinsic_load_push_constant); 40b8e80941Smrg nir_intrinsic_set_base(load, 0); 41b8e80941Smrg nir_intrinsic_set_range(load, 4); 42b8e80941Smrg load->src[0] = nir_src_for_ssa(nir_imm_int(&b, 0)); 43b8e80941Smrg load->num_components = 1; 44b8e80941Smrg nir_ssa_dest_init(&load->instr, &load->dest, 1, 32, "fill_value"); 45b8e80941Smrg nir_builder_instr_insert(&b, &load->instr); 46b8e80941Smrg 47b8e80941Smrg nir_ssa_def *swizzled_load = nir_swizzle(&b, &load->dest.ssa, (unsigned[]) { 0, 0, 0, 0}, 4, false); 48b8e80941Smrg 49b8e80941Smrg nir_intrinsic_instr *store = nir_intrinsic_instr_create(b.shader, nir_intrinsic_store_ssbo); 50b8e80941Smrg store->src[0] = nir_src_for_ssa(swizzled_load); 51b8e80941Smrg store->src[1] = nir_src_for_ssa(&dst_buf->dest.ssa); 52b8e80941Smrg store->src[2] = nir_src_for_ssa(offset); 53b8e80941Smrg nir_intrinsic_set_write_mask(store, 0xf); 54b8e80941Smrg nir_intrinsic_set_access(store, ACCESS_NON_READABLE); 55b8e80941Smrg store->num_components = 4; 56b8e80941Smrg nir_builder_instr_insert(&b, &store->instr); 57b8e80941Smrg 58b8e80941Smrg return b.shader; 59b8e80941Smrg} 60b8e80941Smrg 61b8e80941Smrgstatic nir_shader * 62b8e80941Smrgbuild_buffer_copy_shader(struct radv_device *dev) 63b8e80941Smrg{ 64b8e80941Smrg nir_builder b; 65b8e80941Smrg 66b8e80941Smrg nir_builder_init_simple_shader(&b, NULL, MESA_SHADER_COMPUTE, NULL); 67b8e80941Smrg b.shader->info.name = ralloc_strdup(b.shader, "meta_buffer_copy"); 68b8e80941Smrg b.shader->info.cs.local_size[0] = 64; 69b8e80941Smrg b.shader->info.cs.local_size[1] = 1; 70b8e80941Smrg b.shader->info.cs.local_size[2] = 1; 71b8e80941Smrg 72b8e80941Smrg nir_ssa_def *invoc_id = nir_load_local_invocation_id(&b); 73b8e80941Smrg nir_ssa_def *wg_id = nir_load_work_group_id(&b); 74b8e80941Smrg nir_ssa_def *block_size = nir_imm_ivec4(&b, 75b8e80941Smrg b.shader->info.cs.local_size[0], 76b8e80941Smrg b.shader->info.cs.local_size[1], 77b8e80941Smrg b.shader->info.cs.local_size[2], 0); 78b8e80941Smrg 79b8e80941Smrg nir_ssa_def *global_id = nir_iadd(&b, nir_imul(&b, wg_id, block_size), invoc_id); 80b8e80941Smrg 81b8e80941Smrg nir_ssa_def *offset = nir_imul(&b, global_id, nir_imm_int(&b, 16)); 82b8e80941Smrg offset = nir_channel(&b, offset, 0); 83b8e80941Smrg 84b8e80941Smrg nir_intrinsic_instr *dst_buf = nir_intrinsic_instr_create(b.shader, 85b8e80941Smrg nir_intrinsic_vulkan_resource_index); 86b8e80941Smrg dst_buf->src[0] = nir_src_for_ssa(nir_imm_int(&b, 0)); 87b8e80941Smrg dst_buf->num_components = 1; 88b8e80941Smrg nir_intrinsic_set_desc_set(dst_buf, 0); 89b8e80941Smrg nir_intrinsic_set_binding(dst_buf, 0); 90b8e80941Smrg nir_ssa_dest_init(&dst_buf->instr, &dst_buf->dest, dst_buf->num_components, 32, NULL); 91b8e80941Smrg nir_builder_instr_insert(&b, &dst_buf->instr); 92b8e80941Smrg 93b8e80941Smrg nir_intrinsic_instr *src_buf = nir_intrinsic_instr_create(b.shader, 94b8e80941Smrg nir_intrinsic_vulkan_resource_index); 95b8e80941Smrg src_buf->src[0] = nir_src_for_ssa(nir_imm_int(&b, 0)); 96b8e80941Smrg src_buf->num_components = 1; 97b8e80941Smrg nir_intrinsic_set_desc_set(src_buf, 0); 98b8e80941Smrg nir_intrinsic_set_binding(src_buf, 1); 99b8e80941Smrg nir_ssa_dest_init(&src_buf->instr, &src_buf->dest, src_buf->num_components, 32, NULL); 100b8e80941Smrg nir_builder_instr_insert(&b, &src_buf->instr); 101b8e80941Smrg 102b8e80941Smrg nir_intrinsic_instr *load = nir_intrinsic_instr_create(b.shader, nir_intrinsic_load_ssbo); 103b8e80941Smrg load->src[0] = nir_src_for_ssa(&src_buf->dest.ssa); 104b8e80941Smrg load->src[1] = nir_src_for_ssa(offset); 105b8e80941Smrg nir_ssa_dest_init(&load->instr, &load->dest, 4, 32, NULL); 106b8e80941Smrg load->num_components = 4; 107b8e80941Smrg nir_builder_instr_insert(&b, &load->instr); 108b8e80941Smrg 109b8e80941Smrg nir_intrinsic_instr *store = nir_intrinsic_instr_create(b.shader, nir_intrinsic_store_ssbo); 110b8e80941Smrg store->src[0] = nir_src_for_ssa(&load->dest.ssa); 111b8e80941Smrg store->src[1] = nir_src_for_ssa(&dst_buf->dest.ssa); 112b8e80941Smrg store->src[2] = nir_src_for_ssa(offset); 113b8e80941Smrg nir_intrinsic_set_write_mask(store, 0xf); 114b8e80941Smrg nir_intrinsic_set_access(store, ACCESS_NON_READABLE); 115b8e80941Smrg store->num_components = 4; 116b8e80941Smrg nir_builder_instr_insert(&b, &store->instr); 117b8e80941Smrg 118b8e80941Smrg return b.shader; 119b8e80941Smrg} 120b8e80941Smrg 121b8e80941Smrg 122b8e80941Smrg 123b8e80941SmrgVkResult radv_device_init_meta_buffer_state(struct radv_device *device) 124b8e80941Smrg{ 125b8e80941Smrg VkResult result; 126b8e80941Smrg struct radv_shader_module fill_cs = { .nir = NULL }; 127b8e80941Smrg struct radv_shader_module copy_cs = { .nir = NULL }; 128b8e80941Smrg 129b8e80941Smrg fill_cs.nir = build_buffer_fill_shader(device); 130b8e80941Smrg copy_cs.nir = build_buffer_copy_shader(device); 131b8e80941Smrg 132b8e80941Smrg VkDescriptorSetLayoutCreateInfo fill_ds_create_info = { 133b8e80941Smrg .sType = VK_STRUCTURE_TYPE_DESCRIPTOR_SET_LAYOUT_CREATE_INFO, 134b8e80941Smrg .flags = VK_DESCRIPTOR_SET_LAYOUT_CREATE_PUSH_DESCRIPTOR_BIT_KHR, 135b8e80941Smrg .bindingCount = 1, 136b8e80941Smrg .pBindings = (VkDescriptorSetLayoutBinding[]) { 137b8e80941Smrg { 138b8e80941Smrg .binding = 0, 139b8e80941Smrg .descriptorType = VK_DESCRIPTOR_TYPE_STORAGE_BUFFER, 140b8e80941Smrg .descriptorCount = 1, 141b8e80941Smrg .stageFlags = VK_SHADER_STAGE_COMPUTE_BIT, 142b8e80941Smrg .pImmutableSamplers = NULL 143b8e80941Smrg }, 144b8e80941Smrg } 145b8e80941Smrg }; 146b8e80941Smrg 147b8e80941Smrg result = radv_CreateDescriptorSetLayout(radv_device_to_handle(device), 148b8e80941Smrg &fill_ds_create_info, 149b8e80941Smrg &device->meta_state.alloc, 150b8e80941Smrg &device->meta_state.buffer.fill_ds_layout); 151b8e80941Smrg if (result != VK_SUCCESS) 152b8e80941Smrg goto fail; 153b8e80941Smrg 154b8e80941Smrg VkDescriptorSetLayoutCreateInfo copy_ds_create_info = { 155b8e80941Smrg .sType = VK_STRUCTURE_TYPE_DESCRIPTOR_SET_LAYOUT_CREATE_INFO, 156b8e80941Smrg .flags = VK_DESCRIPTOR_SET_LAYOUT_CREATE_PUSH_DESCRIPTOR_BIT_KHR, 157b8e80941Smrg .bindingCount = 2, 158b8e80941Smrg .pBindings = (VkDescriptorSetLayoutBinding[]) { 159b8e80941Smrg { 160b8e80941Smrg .binding = 0, 161b8e80941Smrg .descriptorType = VK_DESCRIPTOR_TYPE_STORAGE_BUFFER, 162b8e80941Smrg .descriptorCount = 1, 163b8e80941Smrg .stageFlags = VK_SHADER_STAGE_COMPUTE_BIT, 164b8e80941Smrg .pImmutableSamplers = NULL 165b8e80941Smrg }, 166b8e80941Smrg { 167b8e80941Smrg .binding = 1, 168b8e80941Smrg .descriptorType = VK_DESCRIPTOR_TYPE_STORAGE_BUFFER, 169b8e80941Smrg .descriptorCount = 1, 170b8e80941Smrg .stageFlags = VK_SHADER_STAGE_COMPUTE_BIT, 171b8e80941Smrg .pImmutableSamplers = NULL 172b8e80941Smrg }, 173b8e80941Smrg } 174b8e80941Smrg }; 175b8e80941Smrg 176b8e80941Smrg result = radv_CreateDescriptorSetLayout(radv_device_to_handle(device), 177b8e80941Smrg ©_ds_create_info, 178b8e80941Smrg &device->meta_state.alloc, 179b8e80941Smrg &device->meta_state.buffer.copy_ds_layout); 180b8e80941Smrg if (result != VK_SUCCESS) 181b8e80941Smrg goto fail; 182b8e80941Smrg 183b8e80941Smrg 184b8e80941Smrg VkPipelineLayoutCreateInfo fill_pl_create_info = { 185b8e80941Smrg .sType = VK_STRUCTURE_TYPE_PIPELINE_LAYOUT_CREATE_INFO, 186b8e80941Smrg .setLayoutCount = 1, 187b8e80941Smrg .pSetLayouts = &device->meta_state.buffer.fill_ds_layout, 188b8e80941Smrg .pushConstantRangeCount = 1, 189b8e80941Smrg .pPushConstantRanges = &(VkPushConstantRange){VK_SHADER_STAGE_COMPUTE_BIT, 0, 4}, 190b8e80941Smrg }; 191b8e80941Smrg 192b8e80941Smrg result = radv_CreatePipelineLayout(radv_device_to_handle(device), 193b8e80941Smrg &fill_pl_create_info, 194b8e80941Smrg &device->meta_state.alloc, 195b8e80941Smrg &device->meta_state.buffer.fill_p_layout); 196b8e80941Smrg if (result != VK_SUCCESS) 197b8e80941Smrg goto fail; 198b8e80941Smrg 199b8e80941Smrg VkPipelineLayoutCreateInfo copy_pl_create_info = { 200b8e80941Smrg .sType = VK_STRUCTURE_TYPE_PIPELINE_LAYOUT_CREATE_INFO, 201b8e80941Smrg .setLayoutCount = 1, 202b8e80941Smrg .pSetLayouts = &device->meta_state.buffer.copy_ds_layout, 203b8e80941Smrg .pushConstantRangeCount = 0, 204b8e80941Smrg }; 205b8e80941Smrg 206b8e80941Smrg result = radv_CreatePipelineLayout(radv_device_to_handle(device), 207b8e80941Smrg ©_pl_create_info, 208b8e80941Smrg &device->meta_state.alloc, 209b8e80941Smrg &device->meta_state.buffer.copy_p_layout); 210b8e80941Smrg if (result != VK_SUCCESS) 211b8e80941Smrg goto fail; 212b8e80941Smrg 213b8e80941Smrg VkPipelineShaderStageCreateInfo fill_pipeline_shader_stage = { 214b8e80941Smrg .sType = VK_STRUCTURE_TYPE_PIPELINE_SHADER_STAGE_CREATE_INFO, 215b8e80941Smrg .stage = VK_SHADER_STAGE_COMPUTE_BIT, 216b8e80941Smrg .module = radv_shader_module_to_handle(&fill_cs), 217b8e80941Smrg .pName = "main", 218b8e80941Smrg .pSpecializationInfo = NULL, 219b8e80941Smrg }; 220b8e80941Smrg 221b8e80941Smrg VkComputePipelineCreateInfo fill_vk_pipeline_info = { 222b8e80941Smrg .sType = VK_STRUCTURE_TYPE_COMPUTE_PIPELINE_CREATE_INFO, 223b8e80941Smrg .stage = fill_pipeline_shader_stage, 224b8e80941Smrg .flags = 0, 225b8e80941Smrg .layout = device->meta_state.buffer.fill_p_layout, 226b8e80941Smrg }; 227b8e80941Smrg 228b8e80941Smrg result = radv_CreateComputePipelines(radv_device_to_handle(device), 229b8e80941Smrg radv_pipeline_cache_to_handle(&device->meta_state.cache), 230b8e80941Smrg 1, &fill_vk_pipeline_info, NULL, 231b8e80941Smrg &device->meta_state.buffer.fill_pipeline); 232b8e80941Smrg if (result != VK_SUCCESS) 233b8e80941Smrg goto fail; 234b8e80941Smrg 235b8e80941Smrg VkPipelineShaderStageCreateInfo copy_pipeline_shader_stage = { 236b8e80941Smrg .sType = VK_STRUCTURE_TYPE_PIPELINE_SHADER_STAGE_CREATE_INFO, 237b8e80941Smrg .stage = VK_SHADER_STAGE_COMPUTE_BIT, 238b8e80941Smrg .module = radv_shader_module_to_handle(©_cs), 239b8e80941Smrg .pName = "main", 240b8e80941Smrg .pSpecializationInfo = NULL, 241b8e80941Smrg }; 242b8e80941Smrg 243b8e80941Smrg VkComputePipelineCreateInfo copy_vk_pipeline_info = { 244b8e80941Smrg .sType = VK_STRUCTURE_TYPE_COMPUTE_PIPELINE_CREATE_INFO, 245b8e80941Smrg .stage = copy_pipeline_shader_stage, 246b8e80941Smrg .flags = 0, 247b8e80941Smrg .layout = device->meta_state.buffer.copy_p_layout, 248b8e80941Smrg }; 249b8e80941Smrg 250b8e80941Smrg result = radv_CreateComputePipelines(radv_device_to_handle(device), 251b8e80941Smrg radv_pipeline_cache_to_handle(&device->meta_state.cache), 252b8e80941Smrg 1, ©_vk_pipeline_info, NULL, 253b8e80941Smrg &device->meta_state.buffer.copy_pipeline); 254b8e80941Smrg if (result != VK_SUCCESS) 255b8e80941Smrg goto fail; 256b8e80941Smrg 257b8e80941Smrg ralloc_free(fill_cs.nir); 258b8e80941Smrg ralloc_free(copy_cs.nir); 259b8e80941Smrg return VK_SUCCESS; 260b8e80941Smrgfail: 261b8e80941Smrg radv_device_finish_meta_buffer_state(device); 262b8e80941Smrg ralloc_free(fill_cs.nir); 263b8e80941Smrg ralloc_free(copy_cs.nir); 264b8e80941Smrg return result; 265b8e80941Smrg} 266b8e80941Smrg 267b8e80941Smrgvoid radv_device_finish_meta_buffer_state(struct radv_device *device) 268b8e80941Smrg{ 269b8e80941Smrg struct radv_meta_state *state = &device->meta_state; 270b8e80941Smrg 271b8e80941Smrg radv_DestroyPipeline(radv_device_to_handle(device), 272b8e80941Smrg state->buffer.copy_pipeline, &state->alloc); 273b8e80941Smrg radv_DestroyPipeline(radv_device_to_handle(device), 274b8e80941Smrg state->buffer.fill_pipeline, &state->alloc); 275b8e80941Smrg radv_DestroyPipelineLayout(radv_device_to_handle(device), 276b8e80941Smrg state->buffer.copy_p_layout, &state->alloc); 277b8e80941Smrg radv_DestroyPipelineLayout(radv_device_to_handle(device), 278b8e80941Smrg state->buffer.fill_p_layout, &state->alloc); 279b8e80941Smrg radv_DestroyDescriptorSetLayout(radv_device_to_handle(device), 280b8e80941Smrg state->buffer.copy_ds_layout, 281b8e80941Smrg &state->alloc); 282b8e80941Smrg radv_DestroyDescriptorSetLayout(radv_device_to_handle(device), 283b8e80941Smrg state->buffer.fill_ds_layout, 284b8e80941Smrg &state->alloc); 285b8e80941Smrg} 286b8e80941Smrg 287b8e80941Smrgstatic void fill_buffer_shader(struct radv_cmd_buffer *cmd_buffer, 288b8e80941Smrg struct radeon_winsys_bo *bo, 289b8e80941Smrg uint64_t offset, uint64_t size, uint32_t value) 290b8e80941Smrg{ 291b8e80941Smrg struct radv_device *device = cmd_buffer->device; 292b8e80941Smrg uint64_t block_count = round_up_u64(size, 1024); 293b8e80941Smrg struct radv_meta_saved_state saved_state; 294b8e80941Smrg 295b8e80941Smrg radv_meta_save(&saved_state, cmd_buffer, 296b8e80941Smrg RADV_META_SAVE_COMPUTE_PIPELINE | 297b8e80941Smrg RADV_META_SAVE_CONSTANTS | 298b8e80941Smrg RADV_META_SAVE_DESCRIPTORS); 299b8e80941Smrg 300b8e80941Smrg struct radv_buffer dst_buffer = { 301b8e80941Smrg .bo = bo, 302b8e80941Smrg .offset = offset, 303b8e80941Smrg .size = size 304b8e80941Smrg }; 305b8e80941Smrg 306b8e80941Smrg radv_CmdBindPipeline(radv_cmd_buffer_to_handle(cmd_buffer), 307b8e80941Smrg VK_PIPELINE_BIND_POINT_COMPUTE, 308b8e80941Smrg device->meta_state.buffer.fill_pipeline); 309b8e80941Smrg 310b8e80941Smrg radv_meta_push_descriptor_set(cmd_buffer, VK_PIPELINE_BIND_POINT_COMPUTE, 311b8e80941Smrg device->meta_state.buffer.fill_p_layout, 312b8e80941Smrg 0, /* set */ 313b8e80941Smrg 1, /* descriptorWriteCount */ 314b8e80941Smrg (VkWriteDescriptorSet[]) { 315b8e80941Smrg { 316b8e80941Smrg .sType = VK_STRUCTURE_TYPE_WRITE_DESCRIPTOR_SET, 317b8e80941Smrg .dstBinding = 0, 318b8e80941Smrg .dstArrayElement = 0, 319b8e80941Smrg .descriptorCount = 1, 320b8e80941Smrg .descriptorType = VK_DESCRIPTOR_TYPE_STORAGE_BUFFER, 321b8e80941Smrg .pBufferInfo = &(VkDescriptorBufferInfo) { 322b8e80941Smrg .buffer = radv_buffer_to_handle(&dst_buffer), 323b8e80941Smrg .offset = 0, 324b8e80941Smrg .range = size 325b8e80941Smrg } 326b8e80941Smrg } 327b8e80941Smrg }); 328b8e80941Smrg 329b8e80941Smrg radv_CmdPushConstants(radv_cmd_buffer_to_handle(cmd_buffer), 330b8e80941Smrg device->meta_state.buffer.fill_p_layout, 331b8e80941Smrg VK_SHADER_STAGE_COMPUTE_BIT, 0, 4, 332b8e80941Smrg &value); 333b8e80941Smrg 334b8e80941Smrg radv_CmdDispatch(radv_cmd_buffer_to_handle(cmd_buffer), block_count, 1, 1); 335b8e80941Smrg 336b8e80941Smrg radv_meta_restore(&saved_state, cmd_buffer); 337b8e80941Smrg} 338b8e80941Smrg 339b8e80941Smrgstatic void copy_buffer_shader(struct radv_cmd_buffer *cmd_buffer, 340b8e80941Smrg struct radeon_winsys_bo *src_bo, 341b8e80941Smrg struct radeon_winsys_bo *dst_bo, 342b8e80941Smrg uint64_t src_offset, uint64_t dst_offset, 343b8e80941Smrg uint64_t size) 344b8e80941Smrg{ 345b8e80941Smrg struct radv_device *device = cmd_buffer->device; 346b8e80941Smrg uint64_t block_count = round_up_u64(size, 1024); 347b8e80941Smrg struct radv_meta_saved_state saved_state; 348b8e80941Smrg 349b8e80941Smrg radv_meta_save(&saved_state, cmd_buffer, 350b8e80941Smrg RADV_META_SAVE_COMPUTE_PIPELINE | 351b8e80941Smrg RADV_META_SAVE_DESCRIPTORS); 352b8e80941Smrg 353b8e80941Smrg struct radv_buffer dst_buffer = { 354b8e80941Smrg .bo = dst_bo, 355b8e80941Smrg .offset = dst_offset, 356b8e80941Smrg .size = size 357b8e80941Smrg }; 358b8e80941Smrg 359b8e80941Smrg struct radv_buffer src_buffer = { 360b8e80941Smrg .bo = src_bo, 361b8e80941Smrg .offset = src_offset, 362b8e80941Smrg .size = size 363b8e80941Smrg }; 364b8e80941Smrg 365b8e80941Smrg radv_CmdBindPipeline(radv_cmd_buffer_to_handle(cmd_buffer), 366b8e80941Smrg VK_PIPELINE_BIND_POINT_COMPUTE, 367b8e80941Smrg device->meta_state.buffer.copy_pipeline); 368b8e80941Smrg 369b8e80941Smrg radv_meta_push_descriptor_set(cmd_buffer, VK_PIPELINE_BIND_POINT_COMPUTE, 370b8e80941Smrg device->meta_state.buffer.copy_p_layout, 371b8e80941Smrg 0, /* set */ 372b8e80941Smrg 2, /* descriptorWriteCount */ 373b8e80941Smrg (VkWriteDescriptorSet[]) { 374b8e80941Smrg { 375b8e80941Smrg .sType = VK_STRUCTURE_TYPE_WRITE_DESCRIPTOR_SET, 376b8e80941Smrg .dstBinding = 0, 377b8e80941Smrg .dstArrayElement = 0, 378b8e80941Smrg .descriptorCount = 1, 379b8e80941Smrg .descriptorType = VK_DESCRIPTOR_TYPE_STORAGE_BUFFER, 380b8e80941Smrg .pBufferInfo = &(VkDescriptorBufferInfo) { 381b8e80941Smrg .buffer = radv_buffer_to_handle(&dst_buffer), 382b8e80941Smrg .offset = 0, 383b8e80941Smrg .range = size 384b8e80941Smrg } 385b8e80941Smrg }, 386b8e80941Smrg { 387b8e80941Smrg .sType = VK_STRUCTURE_TYPE_WRITE_DESCRIPTOR_SET, 388b8e80941Smrg .dstBinding = 1, 389b8e80941Smrg .dstArrayElement = 0, 390b8e80941Smrg .descriptorCount = 1, 391b8e80941Smrg .descriptorType = VK_DESCRIPTOR_TYPE_STORAGE_BUFFER, 392b8e80941Smrg .pBufferInfo = &(VkDescriptorBufferInfo) { 393b8e80941Smrg .buffer = radv_buffer_to_handle(&src_buffer), 394b8e80941Smrg .offset = 0, 395b8e80941Smrg .range = size 396b8e80941Smrg } 397b8e80941Smrg } 398b8e80941Smrg }); 399b8e80941Smrg 400b8e80941Smrg radv_CmdDispatch(radv_cmd_buffer_to_handle(cmd_buffer), block_count, 1, 1); 401b8e80941Smrg 402b8e80941Smrg radv_meta_restore(&saved_state, cmd_buffer); 403b8e80941Smrg} 404b8e80941Smrg 405b8e80941Smrg 406b8e80941Smrguint32_t radv_fill_buffer(struct radv_cmd_buffer *cmd_buffer, 407b8e80941Smrg struct radeon_winsys_bo *bo, 408b8e80941Smrg uint64_t offset, uint64_t size, uint32_t value) 409b8e80941Smrg{ 410b8e80941Smrg uint32_t flush_bits = 0; 411b8e80941Smrg 412b8e80941Smrg assert(!(offset & 3)); 413b8e80941Smrg assert(!(size & 3)); 414b8e80941Smrg 415b8e80941Smrg if (size >= RADV_BUFFER_OPS_CS_THRESHOLD) { 416b8e80941Smrg fill_buffer_shader(cmd_buffer, bo, offset, size, value); 417b8e80941Smrg flush_bits = RADV_CMD_FLAG_CS_PARTIAL_FLUSH | 418b8e80941Smrg RADV_CMD_FLAG_INV_VMEM_L1 | 419b8e80941Smrg RADV_CMD_FLAG_WRITEBACK_GLOBAL_L2; 420b8e80941Smrg } else if (size) { 421b8e80941Smrg uint64_t va = radv_buffer_get_va(bo); 422b8e80941Smrg va += offset; 423b8e80941Smrg radv_cs_add_buffer(cmd_buffer->device->ws, cmd_buffer->cs, bo); 424b8e80941Smrg si_cp_dma_clear_buffer(cmd_buffer, va, size, value); 425b8e80941Smrg } 426b8e80941Smrg 427b8e80941Smrg return flush_bits; 428b8e80941Smrg} 429b8e80941Smrg 430b8e80941Smrgstatic 431b8e80941Smrgvoid radv_copy_buffer(struct radv_cmd_buffer *cmd_buffer, 432b8e80941Smrg struct radeon_winsys_bo *src_bo, 433b8e80941Smrg struct radeon_winsys_bo *dst_bo, 434b8e80941Smrg uint64_t src_offset, uint64_t dst_offset, 435b8e80941Smrg uint64_t size) 436b8e80941Smrg{ 437b8e80941Smrg if (size >= RADV_BUFFER_OPS_CS_THRESHOLD && !(size & 3) && !(src_offset & 3) && !(dst_offset & 3)) 438b8e80941Smrg copy_buffer_shader(cmd_buffer, src_bo, dst_bo, 439b8e80941Smrg src_offset, dst_offset, size); 440b8e80941Smrg else if (size) { 441b8e80941Smrg uint64_t src_va = radv_buffer_get_va(src_bo); 442b8e80941Smrg uint64_t dst_va = radv_buffer_get_va(dst_bo); 443b8e80941Smrg src_va += src_offset; 444b8e80941Smrg dst_va += dst_offset; 445b8e80941Smrg 446b8e80941Smrg radv_cs_add_buffer(cmd_buffer->device->ws, cmd_buffer->cs, src_bo); 447b8e80941Smrg radv_cs_add_buffer(cmd_buffer->device->ws, cmd_buffer->cs, dst_bo); 448b8e80941Smrg 449b8e80941Smrg si_cp_dma_buffer_copy(cmd_buffer, src_va, dst_va, size); 450b8e80941Smrg } 451b8e80941Smrg} 452b8e80941Smrg 453b8e80941Smrgvoid radv_CmdFillBuffer( 454b8e80941Smrg VkCommandBuffer commandBuffer, 455b8e80941Smrg VkBuffer dstBuffer, 456b8e80941Smrg VkDeviceSize dstOffset, 457b8e80941Smrg VkDeviceSize fillSize, 458b8e80941Smrg uint32_t data) 459b8e80941Smrg{ 460b8e80941Smrg RADV_FROM_HANDLE(radv_cmd_buffer, cmd_buffer, commandBuffer); 461b8e80941Smrg RADV_FROM_HANDLE(radv_buffer, dst_buffer, dstBuffer); 462b8e80941Smrg 463b8e80941Smrg if (fillSize == VK_WHOLE_SIZE) 464b8e80941Smrg fillSize = (dst_buffer->size - dstOffset) & ~3ull; 465b8e80941Smrg 466b8e80941Smrg radv_fill_buffer(cmd_buffer, dst_buffer->bo, dst_buffer->offset + dstOffset, 467b8e80941Smrg fillSize, data); 468b8e80941Smrg} 469b8e80941Smrg 470b8e80941Smrgvoid radv_CmdCopyBuffer( 471b8e80941Smrg VkCommandBuffer commandBuffer, 472b8e80941Smrg VkBuffer srcBuffer, 473b8e80941Smrg VkBuffer destBuffer, 474b8e80941Smrg uint32_t regionCount, 475b8e80941Smrg const VkBufferCopy* pRegions) 476b8e80941Smrg{ 477b8e80941Smrg RADV_FROM_HANDLE(radv_cmd_buffer, cmd_buffer, commandBuffer); 478b8e80941Smrg RADV_FROM_HANDLE(radv_buffer, src_buffer, srcBuffer); 479b8e80941Smrg RADV_FROM_HANDLE(radv_buffer, dest_buffer, destBuffer); 480b8e80941Smrg bool old_predicating; 481b8e80941Smrg 482b8e80941Smrg /* VK_EXT_conditional_rendering says that copy commands should not be 483b8e80941Smrg * affected by conditional rendering. 484b8e80941Smrg */ 485b8e80941Smrg old_predicating = cmd_buffer->state.predicating; 486b8e80941Smrg cmd_buffer->state.predicating = false; 487b8e80941Smrg 488b8e80941Smrg for (unsigned r = 0; r < regionCount; r++) { 489b8e80941Smrg uint64_t src_offset = src_buffer->offset + pRegions[r].srcOffset; 490b8e80941Smrg uint64_t dest_offset = dest_buffer->offset + pRegions[r].dstOffset; 491b8e80941Smrg uint64_t copy_size = pRegions[r].size; 492b8e80941Smrg 493b8e80941Smrg radv_copy_buffer(cmd_buffer, src_buffer->bo, dest_buffer->bo, 494b8e80941Smrg src_offset, dest_offset, copy_size); 495b8e80941Smrg } 496b8e80941Smrg 497b8e80941Smrg /* Restore conditional rendering. */ 498b8e80941Smrg cmd_buffer->state.predicating = old_predicating; 499b8e80941Smrg} 500b8e80941Smrg 501b8e80941Smrgvoid radv_CmdUpdateBuffer( 502b8e80941Smrg VkCommandBuffer commandBuffer, 503b8e80941Smrg VkBuffer dstBuffer, 504b8e80941Smrg VkDeviceSize dstOffset, 505b8e80941Smrg VkDeviceSize dataSize, 506b8e80941Smrg const void* pData) 507b8e80941Smrg{ 508b8e80941Smrg RADV_FROM_HANDLE(radv_cmd_buffer, cmd_buffer, commandBuffer); 509b8e80941Smrg RADV_FROM_HANDLE(radv_buffer, dst_buffer, dstBuffer); 510b8e80941Smrg bool mec = radv_cmd_buffer_uses_mec(cmd_buffer); 511b8e80941Smrg uint64_t words = dataSize / 4; 512b8e80941Smrg uint64_t va = radv_buffer_get_va(dst_buffer->bo); 513b8e80941Smrg va += dstOffset + dst_buffer->offset; 514b8e80941Smrg 515b8e80941Smrg assert(!(dataSize & 3)); 516b8e80941Smrg assert(!(va & 3)); 517b8e80941Smrg 518b8e80941Smrg if (!dataSize) 519b8e80941Smrg return; 520b8e80941Smrg 521b8e80941Smrg if (dataSize < RADV_BUFFER_UPDATE_THRESHOLD) { 522b8e80941Smrg si_emit_cache_flush(cmd_buffer); 523b8e80941Smrg 524b8e80941Smrg radv_cs_add_buffer(cmd_buffer->device->ws, cmd_buffer->cs, dst_buffer->bo); 525b8e80941Smrg 526b8e80941Smrg radeon_check_space(cmd_buffer->device->ws, cmd_buffer->cs, words + 4); 527b8e80941Smrg 528b8e80941Smrg radeon_emit(cmd_buffer->cs, PKT3(PKT3_WRITE_DATA, 2 + words, 0)); 529b8e80941Smrg radeon_emit(cmd_buffer->cs, S_370_DST_SEL(mec ? 530b8e80941Smrg V_370_MEM : V_370_MEM_GRBM) | 531b8e80941Smrg S_370_WR_CONFIRM(1) | 532b8e80941Smrg S_370_ENGINE_SEL(V_370_ME)); 533b8e80941Smrg radeon_emit(cmd_buffer->cs, va); 534b8e80941Smrg radeon_emit(cmd_buffer->cs, va >> 32); 535b8e80941Smrg radeon_emit_array(cmd_buffer->cs, pData, words); 536b8e80941Smrg 537b8e80941Smrg if (unlikely(cmd_buffer->device->trace_bo)) 538b8e80941Smrg radv_cmd_buffer_trace_emit(cmd_buffer); 539b8e80941Smrg } else { 540b8e80941Smrg uint32_t buf_offset; 541b8e80941Smrg radv_cmd_buffer_upload_data(cmd_buffer, dataSize, 32, pData, &buf_offset); 542b8e80941Smrg radv_copy_buffer(cmd_buffer, cmd_buffer->upload.upload_bo, dst_buffer->bo, 543b8e80941Smrg buf_offset, dstOffset + dst_buffer->offset, dataSize); 544b8e80941Smrg } 545b8e80941Smrg} 546