radv_meta_bufimage.c revision 7ec681f3
1/* 2 * Copyright © 2016 Red Hat. 3 * Copyright © 2016 Bas Nieuwenhuizen 4 * 5 * Permission is hereby granted, free of charge, to any person obtaining a 6 * copy of this software and associated documentation files (the "Software"), 7 * to deal in the Software without restriction, including without limitation 8 * the rights to use, copy, modify, merge, publish, distribute, sublicense, 9 * and/or sell copies of the Software, and to permit persons to whom the 10 * Software is furnished to do so, subject to the following conditions: 11 * 12 * The above copyright notice and this permission notice (including the next 13 * paragraph) shall be included in all copies or substantial portions of the 14 * Software. 15 * 16 * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR 17 * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, 18 * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL 19 * THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER 20 * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING 21 * FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS 22 * IN THE SOFTWARE. 23 */ 24#include "nir/nir_builder.h" 25#include "radv_meta.h" 26 27/* 28 * GFX queue: Compute shader implementation of image->buffer copy 29 * Compute queue: implementation also of buffer->image, image->image, and image clear. 30 */ 31 32/* GFX9 needs to use a 3D sampler to access 3D resources, so the shader has the options 33 * for that. 34 */ 35static nir_shader * 36build_nir_itob_compute_shader(struct radv_device *dev, bool is_3d) 37{ 38 enum glsl_sampler_dim dim = is_3d ? GLSL_SAMPLER_DIM_3D : GLSL_SAMPLER_DIM_2D; 39 const struct glsl_type *sampler_type = glsl_sampler_type(dim, false, false, GLSL_TYPE_FLOAT); 40 const struct glsl_type *img_type = glsl_image_type(GLSL_SAMPLER_DIM_BUF, false, GLSL_TYPE_FLOAT); 41 nir_builder b = nir_builder_init_simple_shader(MESA_SHADER_COMPUTE, NULL, 42 is_3d ? "meta_itob_cs_3d" : "meta_itob_cs"); 43 b.shader->info.workgroup_size[0] = 8; 44 b.shader->info.workgroup_size[1] = 8; 45 b.shader->info.workgroup_size[2] = 1; 46 nir_variable *input_img = nir_variable_create(b.shader, nir_var_uniform, sampler_type, "s_tex"); 47 input_img->data.descriptor_set = 0; 48 input_img->data.binding = 0; 49 50 nir_variable *output_img = nir_variable_create(b.shader, nir_var_uniform, img_type, "out_img"); 51 output_img->data.descriptor_set = 0; 52 output_img->data.binding = 1; 53 54 nir_ssa_def *global_id = get_global_ids(&b, is_3d ? 3 : 2); 55 56 nir_ssa_def *offset = 57 nir_load_push_constant(&b, is_3d ? 3 : 2, 32, nir_imm_int(&b, 0), .range = 16); 58 nir_ssa_def *stride = nir_load_push_constant(&b, 1, 32, nir_imm_int(&b, 12), .range = 16); 59 60 nir_ssa_def *img_coord = nir_iadd(&b, global_id, offset); 61 nir_ssa_def *input_img_deref = &nir_build_deref_var(&b, input_img)->dest.ssa; 62 63 nir_tex_instr *tex = nir_tex_instr_create(b.shader, 3); 64 tex->sampler_dim = dim; 65 tex->op = nir_texop_txf; 66 tex->src[0].src_type = nir_tex_src_coord; 67 tex->src[0].src = nir_src_for_ssa(nir_channels(&b, img_coord, is_3d ? 0x7 : 0x3)); 68 tex->src[1].src_type = nir_tex_src_lod; 69 tex->src[1].src = nir_src_for_ssa(nir_imm_int(&b, 0)); 70 tex->src[2].src_type = nir_tex_src_texture_deref; 71 tex->src[2].src = nir_src_for_ssa(input_img_deref); 72 tex->dest_type = nir_type_float32; 73 tex->is_array = false; 74 tex->coord_components = is_3d ? 3 : 2; 75 76 nir_ssa_dest_init(&tex->instr, &tex->dest, 4, 32, "tex"); 77 nir_builder_instr_insert(&b, &tex->instr); 78 79 nir_ssa_def *pos_x = nir_channel(&b, global_id, 0); 80 nir_ssa_def *pos_y = nir_channel(&b, global_id, 1); 81 82 nir_ssa_def *tmp = nir_imul(&b, pos_y, stride); 83 tmp = nir_iadd(&b, tmp, pos_x); 84 85 nir_ssa_def *coord = nir_vec4(&b, tmp, tmp, tmp, tmp); 86 87 nir_ssa_def *outval = &tex->dest.ssa; 88 nir_image_deref_store(&b, &nir_build_deref_var(&b, output_img)->dest.ssa, coord, 89 nir_ssa_undef(&b, 1, 32), outval, nir_imm_int(&b, 0), 90 .image_dim = GLSL_SAMPLER_DIM_BUF); 91 92 return b.shader; 93} 94 95/* Image to buffer - don't write use image accessors */ 96static VkResult 97radv_device_init_meta_itob_state(struct radv_device *device) 98{ 99 VkResult result; 100 nir_shader *cs = build_nir_itob_compute_shader(device, false); 101 nir_shader *cs_3d = NULL; 102 103 if (device->physical_device->rad_info.chip_class >= GFX9) 104 cs_3d = build_nir_itob_compute_shader(device, true); 105 106 /* 107 * two descriptors one for the image being sampled 108 * one for the buffer being written. 109 */ 110 VkDescriptorSetLayoutCreateInfo ds_create_info = { 111 .sType = VK_STRUCTURE_TYPE_DESCRIPTOR_SET_LAYOUT_CREATE_INFO, 112 .flags = VK_DESCRIPTOR_SET_LAYOUT_CREATE_PUSH_DESCRIPTOR_BIT_KHR, 113 .bindingCount = 2, 114 .pBindings = (VkDescriptorSetLayoutBinding[]){ 115 {.binding = 0, 116 .descriptorType = VK_DESCRIPTOR_TYPE_SAMPLED_IMAGE, 117 .descriptorCount = 1, 118 .stageFlags = VK_SHADER_STAGE_COMPUTE_BIT, 119 .pImmutableSamplers = NULL}, 120 {.binding = 1, 121 .descriptorType = VK_DESCRIPTOR_TYPE_STORAGE_TEXEL_BUFFER, 122 .descriptorCount = 1, 123 .stageFlags = VK_SHADER_STAGE_COMPUTE_BIT, 124 .pImmutableSamplers = NULL}, 125 }}; 126 127 result = radv_CreateDescriptorSetLayout(radv_device_to_handle(device), &ds_create_info, 128 &device->meta_state.alloc, 129 &device->meta_state.itob.img_ds_layout); 130 if (result != VK_SUCCESS) 131 goto fail; 132 133 VkPipelineLayoutCreateInfo pl_create_info = { 134 .sType = VK_STRUCTURE_TYPE_PIPELINE_LAYOUT_CREATE_INFO, 135 .setLayoutCount = 1, 136 .pSetLayouts = &device->meta_state.itob.img_ds_layout, 137 .pushConstantRangeCount = 1, 138 .pPushConstantRanges = &(VkPushConstantRange){VK_SHADER_STAGE_COMPUTE_BIT, 0, 16}, 139 }; 140 141 result = 142 radv_CreatePipelineLayout(radv_device_to_handle(device), &pl_create_info, 143 &device->meta_state.alloc, &device->meta_state.itob.img_p_layout); 144 if (result != VK_SUCCESS) 145 goto fail; 146 147 /* compute shader */ 148 149 VkPipelineShaderStageCreateInfo pipeline_shader_stage = { 150 .sType = VK_STRUCTURE_TYPE_PIPELINE_SHADER_STAGE_CREATE_INFO, 151 .stage = VK_SHADER_STAGE_COMPUTE_BIT, 152 .module = vk_shader_module_handle_from_nir(cs), 153 .pName = "main", 154 .pSpecializationInfo = NULL, 155 }; 156 157 VkComputePipelineCreateInfo vk_pipeline_info = { 158 .sType = VK_STRUCTURE_TYPE_COMPUTE_PIPELINE_CREATE_INFO, 159 .stage = pipeline_shader_stage, 160 .flags = 0, 161 .layout = device->meta_state.itob.img_p_layout, 162 }; 163 164 result = radv_CreateComputePipelines(radv_device_to_handle(device), 165 radv_pipeline_cache_to_handle(&device->meta_state.cache), 1, 166 &vk_pipeline_info, NULL, &device->meta_state.itob.pipeline); 167 if (result != VK_SUCCESS) 168 goto fail; 169 170 if (device->physical_device->rad_info.chip_class >= GFX9) { 171 VkPipelineShaderStageCreateInfo pipeline_shader_stage_3d = { 172 .sType = VK_STRUCTURE_TYPE_PIPELINE_SHADER_STAGE_CREATE_INFO, 173 .stage = VK_SHADER_STAGE_COMPUTE_BIT, 174 .module = vk_shader_module_handle_from_nir(cs_3d), 175 .pName = "main", 176 .pSpecializationInfo = NULL, 177 }; 178 179 VkComputePipelineCreateInfo vk_pipeline_info_3d = { 180 .sType = VK_STRUCTURE_TYPE_COMPUTE_PIPELINE_CREATE_INFO, 181 .stage = pipeline_shader_stage_3d, 182 .flags = 0, 183 .layout = device->meta_state.itob.img_p_layout, 184 }; 185 186 result = radv_CreateComputePipelines( 187 radv_device_to_handle(device), radv_pipeline_cache_to_handle(&device->meta_state.cache), 1, 188 &vk_pipeline_info_3d, NULL, &device->meta_state.itob.pipeline_3d); 189 if (result != VK_SUCCESS) 190 goto fail; 191 ralloc_free(cs_3d); 192 } 193 ralloc_free(cs); 194 195 return VK_SUCCESS; 196fail: 197 ralloc_free(cs); 198 ralloc_free(cs_3d); 199 return result; 200} 201 202static void 203radv_device_finish_meta_itob_state(struct radv_device *device) 204{ 205 struct radv_meta_state *state = &device->meta_state; 206 207 radv_DestroyPipelineLayout(radv_device_to_handle(device), state->itob.img_p_layout, 208 &state->alloc); 209 radv_DestroyDescriptorSetLayout(radv_device_to_handle(device), state->itob.img_ds_layout, 210 &state->alloc); 211 radv_DestroyPipeline(radv_device_to_handle(device), state->itob.pipeline, &state->alloc); 212 if (device->physical_device->rad_info.chip_class >= GFX9) 213 radv_DestroyPipeline(radv_device_to_handle(device), state->itob.pipeline_3d, &state->alloc); 214} 215 216static nir_shader * 217build_nir_btoi_compute_shader(struct radv_device *dev, bool is_3d) 218{ 219 enum glsl_sampler_dim dim = is_3d ? GLSL_SAMPLER_DIM_3D : GLSL_SAMPLER_DIM_2D; 220 const struct glsl_type *buf_type = 221 glsl_sampler_type(GLSL_SAMPLER_DIM_BUF, false, false, GLSL_TYPE_FLOAT); 222 const struct glsl_type *img_type = glsl_image_type(dim, false, GLSL_TYPE_FLOAT); 223 nir_builder b = nir_builder_init_simple_shader(MESA_SHADER_COMPUTE, NULL, 224 is_3d ? "meta_btoi_cs_3d" : "meta_btoi_cs"); 225 b.shader->info.workgroup_size[0] = 8; 226 b.shader->info.workgroup_size[1] = 8; 227 b.shader->info.workgroup_size[2] = 1; 228 nir_variable *input_img = nir_variable_create(b.shader, nir_var_uniform, buf_type, "s_tex"); 229 input_img->data.descriptor_set = 0; 230 input_img->data.binding = 0; 231 232 nir_variable *output_img = nir_variable_create(b.shader, nir_var_uniform, img_type, "out_img"); 233 output_img->data.descriptor_set = 0; 234 output_img->data.binding = 1; 235 236 nir_ssa_def *global_id = get_global_ids(&b, is_3d ? 3 : 2); 237 238 nir_ssa_def *offset = 239 nir_load_push_constant(&b, is_3d ? 3 : 2, 32, nir_imm_int(&b, 0), .range = 16); 240 nir_ssa_def *stride = nir_load_push_constant(&b, 1, 32, nir_imm_int(&b, 12), .range = 16); 241 242 nir_ssa_def *pos_x = nir_channel(&b, global_id, 0); 243 nir_ssa_def *pos_y = nir_channel(&b, global_id, 1); 244 245 nir_ssa_def *buf_coord = nir_imul(&b, pos_y, stride); 246 buf_coord = nir_iadd(&b, buf_coord, pos_x); 247 248 nir_ssa_def *coord = nir_iadd(&b, global_id, offset); 249 nir_ssa_def *input_img_deref = &nir_build_deref_var(&b, input_img)->dest.ssa; 250 251 nir_tex_instr *tex = nir_tex_instr_create(b.shader, 3); 252 tex->sampler_dim = GLSL_SAMPLER_DIM_BUF; 253 tex->op = nir_texop_txf; 254 tex->src[0].src_type = nir_tex_src_coord; 255 tex->src[0].src = nir_src_for_ssa(buf_coord); 256 tex->src[1].src_type = nir_tex_src_lod; 257 tex->src[1].src = nir_src_for_ssa(nir_imm_int(&b, 0)); 258 tex->src[2].src_type = nir_tex_src_texture_deref; 259 tex->src[2].src = nir_src_for_ssa(input_img_deref); 260 tex->dest_type = nir_type_float32; 261 tex->is_array = false; 262 tex->coord_components = 1; 263 264 nir_ssa_dest_init(&tex->instr, &tex->dest, 4, 32, "tex"); 265 nir_builder_instr_insert(&b, &tex->instr); 266 267 nir_ssa_def *outval = &tex->dest.ssa; 268 269 nir_ssa_def *img_coord = nir_vec4(&b, nir_channel(&b, coord, 0), 270 nir_channel(&b, coord, 1), 271 is_3d ? nir_channel(&b, coord, 2) : nir_ssa_undef(&b, 1, 32), 272 nir_ssa_undef(&b, 1, 32)); 273 274 nir_image_deref_store(&b, &nir_build_deref_var(&b, output_img)->dest.ssa, img_coord, 275 nir_ssa_undef(&b, 1, 32), outval, nir_imm_int(&b, 0), .image_dim = dim); 276 277 return b.shader; 278} 279 280/* Buffer to image - don't write use image accessors */ 281static VkResult 282radv_device_init_meta_btoi_state(struct radv_device *device) 283{ 284 VkResult result; 285 nir_shader *cs = build_nir_btoi_compute_shader(device, false); 286 nir_shader *cs_3d = NULL; 287 if (device->physical_device->rad_info.chip_class >= GFX9) 288 cs_3d = build_nir_btoi_compute_shader(device, true); 289 /* 290 * two descriptors one for the image being sampled 291 * one for the buffer being written. 292 */ 293 VkDescriptorSetLayoutCreateInfo ds_create_info = { 294 .sType = VK_STRUCTURE_TYPE_DESCRIPTOR_SET_LAYOUT_CREATE_INFO, 295 .flags = VK_DESCRIPTOR_SET_LAYOUT_CREATE_PUSH_DESCRIPTOR_BIT_KHR, 296 .bindingCount = 2, 297 .pBindings = (VkDescriptorSetLayoutBinding[]){ 298 {.binding = 0, 299 .descriptorType = VK_DESCRIPTOR_TYPE_STORAGE_TEXEL_BUFFER, 300 .descriptorCount = 1, 301 .stageFlags = VK_SHADER_STAGE_COMPUTE_BIT, 302 .pImmutableSamplers = NULL}, 303 {.binding = 1, 304 .descriptorType = VK_DESCRIPTOR_TYPE_STORAGE_IMAGE, 305 .descriptorCount = 1, 306 .stageFlags = VK_SHADER_STAGE_COMPUTE_BIT, 307 .pImmutableSamplers = NULL}, 308 }}; 309 310 result = radv_CreateDescriptorSetLayout(radv_device_to_handle(device), &ds_create_info, 311 &device->meta_state.alloc, 312 &device->meta_state.btoi.img_ds_layout); 313 if (result != VK_SUCCESS) 314 goto fail; 315 316 VkPipelineLayoutCreateInfo pl_create_info = { 317 .sType = VK_STRUCTURE_TYPE_PIPELINE_LAYOUT_CREATE_INFO, 318 .setLayoutCount = 1, 319 .pSetLayouts = &device->meta_state.btoi.img_ds_layout, 320 .pushConstantRangeCount = 1, 321 .pPushConstantRanges = &(VkPushConstantRange){VK_SHADER_STAGE_COMPUTE_BIT, 0, 16}, 322 }; 323 324 result = 325 radv_CreatePipelineLayout(radv_device_to_handle(device), &pl_create_info, 326 &device->meta_state.alloc, &device->meta_state.btoi.img_p_layout); 327 if (result != VK_SUCCESS) 328 goto fail; 329 330 /* compute shader */ 331 332 VkPipelineShaderStageCreateInfo pipeline_shader_stage = { 333 .sType = VK_STRUCTURE_TYPE_PIPELINE_SHADER_STAGE_CREATE_INFO, 334 .stage = VK_SHADER_STAGE_COMPUTE_BIT, 335 .module = vk_shader_module_handle_from_nir(cs), 336 .pName = "main", 337 .pSpecializationInfo = NULL, 338 }; 339 340 VkComputePipelineCreateInfo vk_pipeline_info = { 341 .sType = VK_STRUCTURE_TYPE_COMPUTE_PIPELINE_CREATE_INFO, 342 .stage = pipeline_shader_stage, 343 .flags = 0, 344 .layout = device->meta_state.btoi.img_p_layout, 345 }; 346 347 result = radv_CreateComputePipelines(radv_device_to_handle(device), 348 radv_pipeline_cache_to_handle(&device->meta_state.cache), 1, 349 &vk_pipeline_info, NULL, &device->meta_state.btoi.pipeline); 350 if (result != VK_SUCCESS) 351 goto fail; 352 353 if (device->physical_device->rad_info.chip_class >= GFX9) { 354 VkPipelineShaderStageCreateInfo pipeline_shader_stage_3d = { 355 .sType = VK_STRUCTURE_TYPE_PIPELINE_SHADER_STAGE_CREATE_INFO, 356 .stage = VK_SHADER_STAGE_COMPUTE_BIT, 357 .module = vk_shader_module_handle_from_nir(cs_3d), 358 .pName = "main", 359 .pSpecializationInfo = NULL, 360 }; 361 362 VkComputePipelineCreateInfo vk_pipeline_info_3d = { 363 .sType = VK_STRUCTURE_TYPE_COMPUTE_PIPELINE_CREATE_INFO, 364 .stage = pipeline_shader_stage_3d, 365 .flags = 0, 366 .layout = device->meta_state.btoi.img_p_layout, 367 }; 368 369 result = radv_CreateComputePipelines( 370 radv_device_to_handle(device), radv_pipeline_cache_to_handle(&device->meta_state.cache), 1, 371 &vk_pipeline_info_3d, NULL, &device->meta_state.btoi.pipeline_3d); 372 ralloc_free(cs_3d); 373 } 374 ralloc_free(cs); 375 376 return VK_SUCCESS; 377fail: 378 ralloc_free(cs_3d); 379 ralloc_free(cs); 380 return result; 381} 382 383static void 384radv_device_finish_meta_btoi_state(struct radv_device *device) 385{ 386 struct radv_meta_state *state = &device->meta_state; 387 388 radv_DestroyPipelineLayout(radv_device_to_handle(device), state->btoi.img_p_layout, 389 &state->alloc); 390 radv_DestroyDescriptorSetLayout(radv_device_to_handle(device), state->btoi.img_ds_layout, 391 &state->alloc); 392 radv_DestroyPipeline(radv_device_to_handle(device), state->btoi.pipeline, &state->alloc); 393 radv_DestroyPipeline(radv_device_to_handle(device), state->btoi.pipeline_3d, &state->alloc); 394} 395 396/* Buffer to image - special path for R32G32B32 */ 397static nir_shader * 398build_nir_btoi_r32g32b32_compute_shader(struct radv_device *dev) 399{ 400 const struct glsl_type *buf_type = 401 glsl_sampler_type(GLSL_SAMPLER_DIM_BUF, false, false, GLSL_TYPE_FLOAT); 402 const struct glsl_type *img_type = glsl_image_type(GLSL_SAMPLER_DIM_BUF, false, GLSL_TYPE_FLOAT); 403 nir_builder b = 404 nir_builder_init_simple_shader(MESA_SHADER_COMPUTE, NULL, "meta_btoi_r32g32b32_cs"); 405 b.shader->info.workgroup_size[0] = 8; 406 b.shader->info.workgroup_size[1] = 8; 407 b.shader->info.workgroup_size[2] = 1; 408 nir_variable *input_img = nir_variable_create(b.shader, nir_var_uniform, buf_type, "s_tex"); 409 input_img->data.descriptor_set = 0; 410 input_img->data.binding = 0; 411 412 nir_variable *output_img = nir_variable_create(b.shader, nir_var_uniform, img_type, "out_img"); 413 output_img->data.descriptor_set = 0; 414 output_img->data.binding = 1; 415 416 nir_ssa_def *global_id = get_global_ids(&b, 2); 417 418 nir_ssa_def *offset = nir_load_push_constant(&b, 2, 32, nir_imm_int(&b, 0), .range = 16); 419 nir_ssa_def *pitch = nir_load_push_constant(&b, 1, 32, nir_imm_int(&b, 8), .range = 16); 420 nir_ssa_def *stride = nir_load_push_constant(&b, 1, 32, nir_imm_int(&b, 12), .range = 16); 421 422 nir_ssa_def *pos_x = nir_channel(&b, global_id, 0); 423 nir_ssa_def *pos_y = nir_channel(&b, global_id, 1); 424 425 nir_ssa_def *buf_coord = nir_imul(&b, pos_y, stride); 426 buf_coord = nir_iadd(&b, buf_coord, pos_x); 427 428 nir_ssa_def *img_coord = nir_iadd(&b, global_id, offset); 429 430 nir_ssa_def *global_pos = 431 nir_iadd(&b, nir_imul(&b, nir_channel(&b, img_coord, 1), pitch), 432 nir_imul(&b, nir_channel(&b, img_coord, 0), nir_imm_int(&b, 3))); 433 434 nir_ssa_def *input_img_deref = &nir_build_deref_var(&b, input_img)->dest.ssa; 435 436 nir_tex_instr *tex = nir_tex_instr_create(b.shader, 3); 437 tex->sampler_dim = GLSL_SAMPLER_DIM_BUF; 438 tex->op = nir_texop_txf; 439 tex->src[0].src_type = nir_tex_src_coord; 440 tex->src[0].src = nir_src_for_ssa(buf_coord); 441 tex->src[1].src_type = nir_tex_src_lod; 442 tex->src[1].src = nir_src_for_ssa(nir_imm_int(&b, 0)); 443 tex->src[2].src_type = nir_tex_src_texture_deref; 444 tex->src[2].src = nir_src_for_ssa(input_img_deref); 445 tex->dest_type = nir_type_float32; 446 tex->is_array = false; 447 tex->coord_components = 1; 448 nir_ssa_dest_init(&tex->instr, &tex->dest, 4, 32, "tex"); 449 nir_builder_instr_insert(&b, &tex->instr); 450 451 nir_ssa_def *outval = &tex->dest.ssa; 452 453 for (int chan = 0; chan < 3; chan++) { 454 nir_ssa_def *local_pos = nir_iadd(&b, global_pos, nir_imm_int(&b, chan)); 455 456 nir_ssa_def *coord = nir_vec4(&b, local_pos, local_pos, local_pos, local_pos); 457 458 nir_image_deref_store(&b, &nir_build_deref_var(&b, output_img)->dest.ssa, coord, 459 nir_ssa_undef(&b, 1, 32), nir_channel(&b, outval, chan), 460 nir_imm_int(&b, 0), .image_dim = GLSL_SAMPLER_DIM_BUF); 461 } 462 463 return b.shader; 464} 465 466static VkResult 467radv_device_init_meta_btoi_r32g32b32_state(struct radv_device *device) 468{ 469 VkResult result; 470 nir_shader *cs = build_nir_btoi_r32g32b32_compute_shader(device); 471 472 VkDescriptorSetLayoutCreateInfo ds_create_info = { 473 .sType = VK_STRUCTURE_TYPE_DESCRIPTOR_SET_LAYOUT_CREATE_INFO, 474 .flags = VK_DESCRIPTOR_SET_LAYOUT_CREATE_PUSH_DESCRIPTOR_BIT_KHR, 475 .bindingCount = 2, 476 .pBindings = (VkDescriptorSetLayoutBinding[]){ 477 {.binding = 0, 478 .descriptorType = VK_DESCRIPTOR_TYPE_UNIFORM_TEXEL_BUFFER, 479 .descriptorCount = 1, 480 .stageFlags = VK_SHADER_STAGE_COMPUTE_BIT, 481 .pImmutableSamplers = NULL}, 482 {.binding = 1, 483 .descriptorType = VK_DESCRIPTOR_TYPE_STORAGE_TEXEL_BUFFER, 484 .descriptorCount = 1, 485 .stageFlags = VK_SHADER_STAGE_COMPUTE_BIT, 486 .pImmutableSamplers = NULL}, 487 }}; 488 489 result = radv_CreateDescriptorSetLayout(radv_device_to_handle(device), &ds_create_info, 490 &device->meta_state.alloc, 491 &device->meta_state.btoi_r32g32b32.img_ds_layout); 492 if (result != VK_SUCCESS) 493 goto fail; 494 495 VkPipelineLayoutCreateInfo pl_create_info = { 496 .sType = VK_STRUCTURE_TYPE_PIPELINE_LAYOUT_CREATE_INFO, 497 .setLayoutCount = 1, 498 .pSetLayouts = &device->meta_state.btoi_r32g32b32.img_ds_layout, 499 .pushConstantRangeCount = 1, 500 .pPushConstantRanges = &(VkPushConstantRange){VK_SHADER_STAGE_COMPUTE_BIT, 0, 16}, 501 }; 502 503 result = radv_CreatePipelineLayout(radv_device_to_handle(device), &pl_create_info, 504 &device->meta_state.alloc, 505 &device->meta_state.btoi_r32g32b32.img_p_layout); 506 if (result != VK_SUCCESS) 507 goto fail; 508 509 /* compute shader */ 510 511 VkPipelineShaderStageCreateInfo pipeline_shader_stage = { 512 .sType = VK_STRUCTURE_TYPE_PIPELINE_SHADER_STAGE_CREATE_INFO, 513 .stage = VK_SHADER_STAGE_COMPUTE_BIT, 514 .module = vk_shader_module_handle_from_nir(cs), 515 .pName = "main", 516 .pSpecializationInfo = NULL, 517 }; 518 519 VkComputePipelineCreateInfo vk_pipeline_info = { 520 .sType = VK_STRUCTURE_TYPE_COMPUTE_PIPELINE_CREATE_INFO, 521 .stage = pipeline_shader_stage, 522 .flags = 0, 523 .layout = device->meta_state.btoi_r32g32b32.img_p_layout, 524 }; 525 526 result = radv_CreateComputePipelines( 527 radv_device_to_handle(device), radv_pipeline_cache_to_handle(&device->meta_state.cache), 1, 528 &vk_pipeline_info, NULL, &device->meta_state.btoi_r32g32b32.pipeline); 529 530fail: 531 ralloc_free(cs); 532 return result; 533} 534 535static void 536radv_device_finish_meta_btoi_r32g32b32_state(struct radv_device *device) 537{ 538 struct radv_meta_state *state = &device->meta_state; 539 540 radv_DestroyPipelineLayout(radv_device_to_handle(device), state->btoi_r32g32b32.img_p_layout, 541 &state->alloc); 542 radv_DestroyDescriptorSetLayout(radv_device_to_handle(device), 543 state->btoi_r32g32b32.img_ds_layout, &state->alloc); 544 radv_DestroyPipeline(radv_device_to_handle(device), state->btoi_r32g32b32.pipeline, 545 &state->alloc); 546} 547 548static nir_shader * 549build_nir_itoi_compute_shader(struct radv_device *dev, bool is_3d, int samples) 550{ 551 bool is_multisampled = samples > 1; 552 enum glsl_sampler_dim dim = is_3d ? GLSL_SAMPLER_DIM_3D 553 : is_multisampled ? GLSL_SAMPLER_DIM_MS 554 : GLSL_SAMPLER_DIM_2D; 555 const struct glsl_type *buf_type = glsl_sampler_type(dim, false, false, GLSL_TYPE_FLOAT); 556 const struct glsl_type *img_type = glsl_image_type(dim, false, GLSL_TYPE_FLOAT); 557 nir_builder b = nir_builder_init_simple_shader( 558 MESA_SHADER_COMPUTE, NULL, is_3d ? "meta_itoi_cs_3d-%d" : "meta_itoi_cs-%d", samples); 559 b.shader->info.workgroup_size[0] = 8; 560 b.shader->info.workgroup_size[1] = 8; 561 b.shader->info.workgroup_size[2] = 1; 562 nir_variable *input_img = nir_variable_create(b.shader, nir_var_uniform, buf_type, "s_tex"); 563 input_img->data.descriptor_set = 0; 564 input_img->data.binding = 0; 565 566 nir_variable *output_img = nir_variable_create(b.shader, nir_var_uniform, img_type, "out_img"); 567 output_img->data.descriptor_set = 0; 568 output_img->data.binding = 1; 569 570 nir_ssa_def *global_id = get_global_ids(&b, is_3d ? 3 : 2); 571 572 nir_ssa_def *src_offset = 573 nir_load_push_constant(&b, is_3d ? 3 : 2, 32, nir_imm_int(&b, 0), .range = 24); 574 nir_ssa_def *dst_offset = 575 nir_load_push_constant(&b, is_3d ? 3 : 2, 32, nir_imm_int(&b, 12), .range = 24); 576 577 nir_ssa_def *src_coord = nir_iadd(&b, global_id, src_offset); 578 nir_ssa_def *input_img_deref = &nir_build_deref_var(&b, input_img)->dest.ssa; 579 580 nir_ssa_def *dst_coord = nir_iadd(&b, global_id, dst_offset); 581 582 nir_tex_instr *tex_instr[8]; 583 for (uint32_t i = 0; i < samples; i++) { 584 tex_instr[i] = nir_tex_instr_create(b.shader, is_multisampled ? 4 : 3); 585 586 nir_tex_instr *tex = tex_instr[i]; 587 tex->sampler_dim = dim; 588 tex->op = is_multisampled ? nir_texop_txf_ms : nir_texop_txf; 589 tex->src[0].src_type = nir_tex_src_coord; 590 tex->src[0].src = nir_src_for_ssa(nir_channels(&b, src_coord, is_3d ? 0x7 : 0x3)); 591 tex->src[1].src_type = nir_tex_src_lod; 592 tex->src[1].src = nir_src_for_ssa(nir_imm_int(&b, 0)); 593 tex->src[2].src_type = nir_tex_src_texture_deref; 594 tex->src[2].src = nir_src_for_ssa(input_img_deref); 595 if (is_multisampled) { 596 tex->src[3].src_type = nir_tex_src_ms_index; 597 tex->src[3].src = nir_src_for_ssa(nir_imm_int(&b, i)); 598 } 599 tex->dest_type = nir_type_float32; 600 tex->is_array = false; 601 tex->coord_components = is_3d ? 3 : 2; 602 603 nir_ssa_dest_init(&tex->instr, &tex->dest, 4, 32, "tex"); 604 nir_builder_instr_insert(&b, &tex->instr); 605 } 606 607 nir_ssa_def *img_coord = nir_vec4(&b, nir_channel(&b, dst_coord, 0), 608 nir_channel(&b, dst_coord, 1), 609 is_3d ? nir_channel(&b, dst_coord, 2) : nir_ssa_undef(&b, 1, 32), 610 nir_ssa_undef(&b, 1, 32)); 611 612 for (uint32_t i = 0; i < samples; i++) { 613 nir_ssa_def *outval = &tex_instr[i]->dest.ssa; 614 nir_image_deref_store(&b, &nir_build_deref_var(&b, output_img)->dest.ssa, img_coord, 615 nir_imm_int(&b, i), outval, nir_imm_int(&b, 0), .image_dim = dim); 616 } 617 618 return b.shader; 619} 620 621static VkResult 622create_itoi_pipeline(struct radv_device *device, int samples, VkPipeline *pipeline) 623{ 624 struct radv_meta_state *state = &device->meta_state; 625 nir_shader *cs = build_nir_itoi_compute_shader(device, false, samples); 626 VkResult result; 627 628 VkPipelineShaderStageCreateInfo pipeline_shader_stage = { 629 .sType = VK_STRUCTURE_TYPE_PIPELINE_SHADER_STAGE_CREATE_INFO, 630 .stage = VK_SHADER_STAGE_COMPUTE_BIT, 631 .module = vk_shader_module_handle_from_nir(cs), 632 .pName = "main", 633 .pSpecializationInfo = NULL, 634 }; 635 636 VkComputePipelineCreateInfo vk_pipeline_info = { 637 .sType = VK_STRUCTURE_TYPE_COMPUTE_PIPELINE_CREATE_INFO, 638 .stage = pipeline_shader_stage, 639 .flags = 0, 640 .layout = state->itoi.img_p_layout, 641 }; 642 643 result = radv_CreateComputePipelines(radv_device_to_handle(device), 644 radv_pipeline_cache_to_handle(&state->cache), 1, 645 &vk_pipeline_info, NULL, pipeline); 646 ralloc_free(cs); 647 return result; 648} 649 650/* image to image - don't write use image accessors */ 651static VkResult 652radv_device_init_meta_itoi_state(struct radv_device *device) 653{ 654 VkResult result; 655 656 /* 657 * two descriptors one for the image being sampled 658 * one for the buffer being written. 659 */ 660 VkDescriptorSetLayoutCreateInfo ds_create_info = { 661 .sType = VK_STRUCTURE_TYPE_DESCRIPTOR_SET_LAYOUT_CREATE_INFO, 662 .flags = VK_DESCRIPTOR_SET_LAYOUT_CREATE_PUSH_DESCRIPTOR_BIT_KHR, 663 .bindingCount = 2, 664 .pBindings = (VkDescriptorSetLayoutBinding[]){ 665 {.binding = 0, 666 .descriptorType = VK_DESCRIPTOR_TYPE_SAMPLED_IMAGE, 667 .descriptorCount = 1, 668 .stageFlags = VK_SHADER_STAGE_COMPUTE_BIT, 669 .pImmutableSamplers = NULL}, 670 {.binding = 1, 671 .descriptorType = VK_DESCRIPTOR_TYPE_STORAGE_IMAGE, 672 .descriptorCount = 1, 673 .stageFlags = VK_SHADER_STAGE_COMPUTE_BIT, 674 .pImmutableSamplers = NULL}, 675 }}; 676 677 result = radv_CreateDescriptorSetLayout(radv_device_to_handle(device), &ds_create_info, 678 &device->meta_state.alloc, 679 &device->meta_state.itoi.img_ds_layout); 680 if (result != VK_SUCCESS) 681 goto fail; 682 683 VkPipelineLayoutCreateInfo pl_create_info = { 684 .sType = VK_STRUCTURE_TYPE_PIPELINE_LAYOUT_CREATE_INFO, 685 .setLayoutCount = 1, 686 .pSetLayouts = &device->meta_state.itoi.img_ds_layout, 687 .pushConstantRangeCount = 1, 688 .pPushConstantRanges = &(VkPushConstantRange){VK_SHADER_STAGE_COMPUTE_BIT, 0, 24}, 689 }; 690 691 result = 692 radv_CreatePipelineLayout(radv_device_to_handle(device), &pl_create_info, 693 &device->meta_state.alloc, &device->meta_state.itoi.img_p_layout); 694 if (result != VK_SUCCESS) 695 goto fail; 696 697 for (uint32_t i = 0; i < MAX_SAMPLES_LOG2; i++) { 698 uint32_t samples = 1 << i; 699 result = create_itoi_pipeline(device, samples, &device->meta_state.itoi.pipeline[i]); 700 if (result != VK_SUCCESS) 701 goto fail; 702 } 703 704 if (device->physical_device->rad_info.chip_class >= GFX9) { 705 nir_shader *cs_3d = build_nir_itoi_compute_shader(device, true, 1); 706 707 VkPipelineShaderStageCreateInfo pipeline_shader_stage_3d = { 708 .sType = VK_STRUCTURE_TYPE_PIPELINE_SHADER_STAGE_CREATE_INFO, 709 .stage = VK_SHADER_STAGE_COMPUTE_BIT, 710 .module = vk_shader_module_handle_from_nir(cs_3d), 711 .pName = "main", 712 .pSpecializationInfo = NULL, 713 }; 714 715 VkComputePipelineCreateInfo vk_pipeline_info_3d = { 716 .sType = VK_STRUCTURE_TYPE_COMPUTE_PIPELINE_CREATE_INFO, 717 .stage = pipeline_shader_stage_3d, 718 .flags = 0, 719 .layout = device->meta_state.itoi.img_p_layout, 720 }; 721 722 result = radv_CreateComputePipelines( 723 radv_device_to_handle(device), radv_pipeline_cache_to_handle(&device->meta_state.cache), 1, 724 &vk_pipeline_info_3d, NULL, &device->meta_state.itoi.pipeline_3d); 725 ralloc_free(cs_3d); 726 } 727 728 return VK_SUCCESS; 729fail: 730 return result; 731} 732 733static void 734radv_device_finish_meta_itoi_state(struct radv_device *device) 735{ 736 struct radv_meta_state *state = &device->meta_state; 737 738 radv_DestroyPipelineLayout(radv_device_to_handle(device), state->itoi.img_p_layout, 739 &state->alloc); 740 radv_DestroyDescriptorSetLayout(radv_device_to_handle(device), state->itoi.img_ds_layout, 741 &state->alloc); 742 743 for (uint32_t i = 0; i < MAX_SAMPLES_LOG2; ++i) { 744 radv_DestroyPipeline(radv_device_to_handle(device), state->itoi.pipeline[i], &state->alloc); 745 } 746 747 if (device->physical_device->rad_info.chip_class >= GFX9) 748 radv_DestroyPipeline(radv_device_to_handle(device), state->itoi.pipeline_3d, &state->alloc); 749} 750 751static nir_shader * 752build_nir_itoi_r32g32b32_compute_shader(struct radv_device *dev) 753{ 754 const struct glsl_type *type = 755 glsl_sampler_type(GLSL_SAMPLER_DIM_BUF, false, false, GLSL_TYPE_FLOAT); 756 const struct glsl_type *img_type = glsl_image_type(GLSL_SAMPLER_DIM_BUF, false, GLSL_TYPE_FLOAT); 757 nir_builder b = 758 nir_builder_init_simple_shader(MESA_SHADER_COMPUTE, NULL, "meta_itoi_r32g32b32_cs"); 759 b.shader->info.workgroup_size[0] = 8; 760 b.shader->info.workgroup_size[1] = 8; 761 b.shader->info.workgroup_size[2] = 1; 762 nir_variable *input_img = nir_variable_create(b.shader, nir_var_uniform, type, "input_img"); 763 input_img->data.descriptor_set = 0; 764 input_img->data.binding = 0; 765 766 nir_variable *output_img = 767 nir_variable_create(b.shader, nir_var_uniform, img_type, "output_img"); 768 output_img->data.descriptor_set = 0; 769 output_img->data.binding = 1; 770 771 nir_ssa_def *global_id = get_global_ids(&b, 2); 772 773 nir_ssa_def *src_offset = nir_load_push_constant(&b, 3, 32, nir_imm_int(&b, 0), .range = 24); 774 nir_ssa_def *dst_offset = nir_load_push_constant(&b, 3, 32, nir_imm_int(&b, 12), .range = 24); 775 776 nir_ssa_def *src_stride = nir_channel(&b, src_offset, 2); 777 nir_ssa_def *dst_stride = nir_channel(&b, dst_offset, 2); 778 779 nir_ssa_def *src_img_coord = nir_iadd(&b, global_id, src_offset); 780 nir_ssa_def *dst_img_coord = nir_iadd(&b, global_id, dst_offset); 781 782 nir_ssa_def *src_global_pos = 783 nir_iadd(&b, nir_imul(&b, nir_channel(&b, src_img_coord, 1), src_stride), 784 nir_imul(&b, nir_channel(&b, src_img_coord, 0), nir_imm_int(&b, 3))); 785 786 nir_ssa_def *dst_global_pos = 787 nir_iadd(&b, nir_imul(&b, nir_channel(&b, dst_img_coord, 1), dst_stride), 788 nir_imul(&b, nir_channel(&b, dst_img_coord, 0), nir_imm_int(&b, 3))); 789 790 for (int chan = 0; chan < 3; chan++) { 791 /* src */ 792 nir_ssa_def *src_local_pos = nir_iadd(&b, src_global_pos, nir_imm_int(&b, chan)); 793 nir_ssa_def *input_img_deref = &nir_build_deref_var(&b, input_img)->dest.ssa; 794 795 nir_tex_instr *tex = nir_tex_instr_create(b.shader, 3); 796 tex->sampler_dim = GLSL_SAMPLER_DIM_BUF; 797 tex->op = nir_texop_txf; 798 tex->src[0].src_type = nir_tex_src_coord; 799 tex->src[0].src = nir_src_for_ssa(src_local_pos); 800 tex->src[1].src_type = nir_tex_src_lod; 801 tex->src[1].src = nir_src_for_ssa(nir_imm_int(&b, 0)); 802 tex->src[2].src_type = nir_tex_src_texture_deref; 803 tex->src[2].src = nir_src_for_ssa(input_img_deref); 804 tex->dest_type = nir_type_float32; 805 tex->is_array = false; 806 tex->coord_components = 1; 807 nir_ssa_dest_init(&tex->instr, &tex->dest, 4, 32, "tex"); 808 nir_builder_instr_insert(&b, &tex->instr); 809 810 nir_ssa_def *outval = &tex->dest.ssa; 811 812 /* dst */ 813 nir_ssa_def *dst_local_pos = nir_iadd(&b, dst_global_pos, nir_imm_int(&b, chan)); 814 815 nir_ssa_def *dst_coord = 816 nir_vec4(&b, dst_local_pos, dst_local_pos, dst_local_pos, dst_local_pos); 817 818 nir_image_deref_store(&b, &nir_build_deref_var(&b, output_img)->dest.ssa, dst_coord, 819 nir_ssa_undef(&b, 1, 32), nir_channel(&b, outval, 0), 820 nir_imm_int(&b, 0), .image_dim = GLSL_SAMPLER_DIM_BUF); 821 } 822 823 return b.shader; 824} 825 826/* Image to image - special path for R32G32B32 */ 827static VkResult 828radv_device_init_meta_itoi_r32g32b32_state(struct radv_device *device) 829{ 830 VkResult result; 831 nir_shader *cs = build_nir_itoi_r32g32b32_compute_shader(device); 832 833 VkDescriptorSetLayoutCreateInfo ds_create_info = { 834 .sType = VK_STRUCTURE_TYPE_DESCRIPTOR_SET_LAYOUT_CREATE_INFO, 835 .flags = VK_DESCRIPTOR_SET_LAYOUT_CREATE_PUSH_DESCRIPTOR_BIT_KHR, 836 .bindingCount = 2, 837 .pBindings = (VkDescriptorSetLayoutBinding[]){ 838 {.binding = 0, 839 .descriptorType = VK_DESCRIPTOR_TYPE_UNIFORM_TEXEL_BUFFER, 840 .descriptorCount = 1, 841 .stageFlags = VK_SHADER_STAGE_COMPUTE_BIT, 842 .pImmutableSamplers = NULL}, 843 {.binding = 1, 844 .descriptorType = VK_DESCRIPTOR_TYPE_STORAGE_TEXEL_BUFFER, 845 .descriptorCount = 1, 846 .stageFlags = VK_SHADER_STAGE_COMPUTE_BIT, 847 .pImmutableSamplers = NULL}, 848 }}; 849 850 result = radv_CreateDescriptorSetLayout(radv_device_to_handle(device), &ds_create_info, 851 &device->meta_state.alloc, 852 &device->meta_state.itoi_r32g32b32.img_ds_layout); 853 if (result != VK_SUCCESS) 854 goto fail; 855 856 VkPipelineLayoutCreateInfo pl_create_info = { 857 .sType = VK_STRUCTURE_TYPE_PIPELINE_LAYOUT_CREATE_INFO, 858 .setLayoutCount = 1, 859 .pSetLayouts = &device->meta_state.itoi_r32g32b32.img_ds_layout, 860 .pushConstantRangeCount = 1, 861 .pPushConstantRanges = &(VkPushConstantRange){VK_SHADER_STAGE_COMPUTE_BIT, 0, 24}, 862 }; 863 864 result = radv_CreatePipelineLayout(radv_device_to_handle(device), &pl_create_info, 865 &device->meta_state.alloc, 866 &device->meta_state.itoi_r32g32b32.img_p_layout); 867 if (result != VK_SUCCESS) 868 goto fail; 869 870 /* compute shader */ 871 872 VkPipelineShaderStageCreateInfo pipeline_shader_stage = { 873 .sType = VK_STRUCTURE_TYPE_PIPELINE_SHADER_STAGE_CREATE_INFO, 874 .stage = VK_SHADER_STAGE_COMPUTE_BIT, 875 .module = vk_shader_module_handle_from_nir(cs), 876 .pName = "main", 877 .pSpecializationInfo = NULL, 878 }; 879 880 VkComputePipelineCreateInfo vk_pipeline_info = { 881 .sType = VK_STRUCTURE_TYPE_COMPUTE_PIPELINE_CREATE_INFO, 882 .stage = pipeline_shader_stage, 883 .flags = 0, 884 .layout = device->meta_state.itoi_r32g32b32.img_p_layout, 885 }; 886 887 result = radv_CreateComputePipelines( 888 radv_device_to_handle(device), radv_pipeline_cache_to_handle(&device->meta_state.cache), 1, 889 &vk_pipeline_info, NULL, &device->meta_state.itoi_r32g32b32.pipeline); 890 891fail: 892 ralloc_free(cs); 893 return result; 894} 895 896static void 897radv_device_finish_meta_itoi_r32g32b32_state(struct radv_device *device) 898{ 899 struct radv_meta_state *state = &device->meta_state; 900 901 radv_DestroyPipelineLayout(radv_device_to_handle(device), state->itoi_r32g32b32.img_p_layout, 902 &state->alloc); 903 radv_DestroyDescriptorSetLayout(radv_device_to_handle(device), 904 state->itoi_r32g32b32.img_ds_layout, &state->alloc); 905 radv_DestroyPipeline(radv_device_to_handle(device), state->itoi_r32g32b32.pipeline, 906 &state->alloc); 907} 908 909static nir_shader * 910build_nir_cleari_compute_shader(struct radv_device *dev, bool is_3d, int samples) 911{ 912 bool is_multisampled = samples > 1; 913 enum glsl_sampler_dim dim = is_3d ? GLSL_SAMPLER_DIM_3D 914 : is_multisampled ? GLSL_SAMPLER_DIM_MS 915 : GLSL_SAMPLER_DIM_2D; 916 const struct glsl_type *img_type = glsl_image_type(dim, false, GLSL_TYPE_FLOAT); 917 nir_builder b = nir_builder_init_simple_shader( 918 MESA_SHADER_COMPUTE, NULL, is_3d ? "meta_cleari_cs_3d-%d" : "meta_cleari_cs-%d", samples); 919 b.shader->info.workgroup_size[0] = 8; 920 b.shader->info.workgroup_size[1] = 8; 921 b.shader->info.workgroup_size[2] = 1; 922 923 nir_variable *output_img = nir_variable_create(b.shader, nir_var_uniform, img_type, "out_img"); 924 output_img->data.descriptor_set = 0; 925 output_img->data.binding = 0; 926 927 nir_ssa_def *global_id = get_global_ids(&b, 2); 928 929 nir_ssa_def *clear_val = nir_load_push_constant(&b, 4, 32, nir_imm_int(&b, 0), .range = 20); 930 nir_ssa_def *layer = nir_load_push_constant(&b, 1, 32, nir_imm_int(&b, 16), .range = 20); 931 932 nir_ssa_def *comps[4]; 933 comps[0] = nir_channel(&b, global_id, 0); 934 comps[1] = nir_channel(&b, global_id, 1); 935 comps[2] = layer; 936 comps[3] = nir_ssa_undef(&b, 1, 32); 937 global_id = nir_vec(&b, comps, 4); 938 939 for (uint32_t i = 0; i < samples; i++) { 940 nir_image_deref_store(&b, &nir_build_deref_var(&b, output_img)->dest.ssa, global_id, 941 nir_imm_int(&b, i), clear_val, nir_imm_int(&b, 0), .image_dim = dim); 942 } 943 944 return b.shader; 945} 946 947static VkResult 948create_cleari_pipeline(struct radv_device *device, int samples, VkPipeline *pipeline) 949{ 950 nir_shader *cs = build_nir_cleari_compute_shader(device, false, samples); 951 VkResult result; 952 953 VkPipelineShaderStageCreateInfo pipeline_shader_stage = { 954 .sType = VK_STRUCTURE_TYPE_PIPELINE_SHADER_STAGE_CREATE_INFO, 955 .stage = VK_SHADER_STAGE_COMPUTE_BIT, 956 .module = vk_shader_module_handle_from_nir(cs), 957 .pName = "main", 958 .pSpecializationInfo = NULL, 959 }; 960 961 VkComputePipelineCreateInfo vk_pipeline_info = { 962 .sType = VK_STRUCTURE_TYPE_COMPUTE_PIPELINE_CREATE_INFO, 963 .stage = pipeline_shader_stage, 964 .flags = 0, 965 .layout = device->meta_state.cleari.img_p_layout, 966 }; 967 968 result = radv_CreateComputePipelines(radv_device_to_handle(device), 969 radv_pipeline_cache_to_handle(&device->meta_state.cache), 1, 970 &vk_pipeline_info, NULL, pipeline); 971 ralloc_free(cs); 972 return result; 973} 974 975static VkResult 976radv_device_init_meta_cleari_state(struct radv_device *device) 977{ 978 VkResult result; 979 980 /* 981 * two descriptors one for the image being sampled 982 * one for the buffer being written. 983 */ 984 VkDescriptorSetLayoutCreateInfo ds_create_info = { 985 .sType = VK_STRUCTURE_TYPE_DESCRIPTOR_SET_LAYOUT_CREATE_INFO, 986 .flags = VK_DESCRIPTOR_SET_LAYOUT_CREATE_PUSH_DESCRIPTOR_BIT_KHR, 987 .bindingCount = 1, 988 .pBindings = (VkDescriptorSetLayoutBinding[]){ 989 {.binding = 0, 990 .descriptorType = VK_DESCRIPTOR_TYPE_STORAGE_IMAGE, 991 .descriptorCount = 1, 992 .stageFlags = VK_SHADER_STAGE_COMPUTE_BIT, 993 .pImmutableSamplers = NULL}, 994 }}; 995 996 result = radv_CreateDescriptorSetLayout(radv_device_to_handle(device), &ds_create_info, 997 &device->meta_state.alloc, 998 &device->meta_state.cleari.img_ds_layout); 999 if (result != VK_SUCCESS) 1000 goto fail; 1001 1002 VkPipelineLayoutCreateInfo pl_create_info = { 1003 .sType = VK_STRUCTURE_TYPE_PIPELINE_LAYOUT_CREATE_INFO, 1004 .setLayoutCount = 1, 1005 .pSetLayouts = &device->meta_state.cleari.img_ds_layout, 1006 .pushConstantRangeCount = 1, 1007 .pPushConstantRanges = &(VkPushConstantRange){VK_SHADER_STAGE_COMPUTE_BIT, 0, 20}, 1008 }; 1009 1010 result = 1011 radv_CreatePipelineLayout(radv_device_to_handle(device), &pl_create_info, 1012 &device->meta_state.alloc, &device->meta_state.cleari.img_p_layout); 1013 if (result != VK_SUCCESS) 1014 goto fail; 1015 1016 for (uint32_t i = 0; i < MAX_SAMPLES_LOG2; i++) { 1017 uint32_t samples = 1 << i; 1018 result = create_cleari_pipeline(device, samples, &device->meta_state.cleari.pipeline[i]); 1019 if (result != VK_SUCCESS) 1020 goto fail; 1021 } 1022 1023 if (device->physical_device->rad_info.chip_class >= GFX9) { 1024 nir_shader *cs_3d = build_nir_cleari_compute_shader(device, true, 1); 1025 1026 /* compute shader */ 1027 VkPipelineShaderStageCreateInfo pipeline_shader_stage_3d = { 1028 .sType = VK_STRUCTURE_TYPE_PIPELINE_SHADER_STAGE_CREATE_INFO, 1029 .stage = VK_SHADER_STAGE_COMPUTE_BIT, 1030 .module = vk_shader_module_handle_from_nir(cs_3d), 1031 .pName = "main", 1032 .pSpecializationInfo = NULL, 1033 }; 1034 1035 VkComputePipelineCreateInfo vk_pipeline_info_3d = { 1036 .sType = VK_STRUCTURE_TYPE_COMPUTE_PIPELINE_CREATE_INFO, 1037 .stage = pipeline_shader_stage_3d, 1038 .flags = 0, 1039 .layout = device->meta_state.cleari.img_p_layout, 1040 }; 1041 1042 result = radv_CreateComputePipelines( 1043 radv_device_to_handle(device), radv_pipeline_cache_to_handle(&device->meta_state.cache), 1, 1044 &vk_pipeline_info_3d, NULL, &device->meta_state.cleari.pipeline_3d); 1045 ralloc_free(cs_3d); 1046 } 1047 1048 return VK_SUCCESS; 1049fail: 1050 return result; 1051} 1052 1053static void 1054radv_device_finish_meta_cleari_state(struct radv_device *device) 1055{ 1056 struct radv_meta_state *state = &device->meta_state; 1057 1058 radv_DestroyPipelineLayout(radv_device_to_handle(device), state->cleari.img_p_layout, 1059 &state->alloc); 1060 radv_DestroyDescriptorSetLayout(radv_device_to_handle(device), state->cleari.img_ds_layout, 1061 &state->alloc); 1062 1063 for (uint32_t i = 0; i < MAX_SAMPLES_LOG2; ++i) { 1064 radv_DestroyPipeline(radv_device_to_handle(device), state->cleari.pipeline[i], &state->alloc); 1065 } 1066 1067 radv_DestroyPipeline(radv_device_to_handle(device), state->cleari.pipeline_3d, &state->alloc); 1068} 1069 1070/* Special path for clearing R32G32B32 images using a compute shader. */ 1071static nir_shader * 1072build_nir_cleari_r32g32b32_compute_shader(struct radv_device *dev) 1073{ 1074 const struct glsl_type *img_type = glsl_image_type(GLSL_SAMPLER_DIM_BUF, false, GLSL_TYPE_FLOAT); 1075 nir_builder b = 1076 nir_builder_init_simple_shader(MESA_SHADER_COMPUTE, NULL, "meta_cleari_r32g32b32_cs"); 1077 b.shader->info.workgroup_size[0] = 8; 1078 b.shader->info.workgroup_size[1] = 8; 1079 b.shader->info.workgroup_size[2] = 1; 1080 1081 nir_variable *output_img = nir_variable_create(b.shader, nir_var_uniform, img_type, "out_img"); 1082 output_img->data.descriptor_set = 0; 1083 output_img->data.binding = 0; 1084 1085 nir_ssa_def *global_id = get_global_ids(&b, 2); 1086 1087 nir_ssa_def *clear_val = nir_load_push_constant(&b, 3, 32, nir_imm_int(&b, 0), .range = 16); 1088 nir_ssa_def *stride = nir_load_push_constant(&b, 1, 32, nir_imm_int(&b, 12), .range = 16); 1089 1090 nir_ssa_def *global_x = nir_channel(&b, global_id, 0); 1091 nir_ssa_def *global_y = nir_channel(&b, global_id, 1); 1092 1093 nir_ssa_def *global_pos = 1094 nir_iadd(&b, nir_imul(&b, global_y, stride), nir_imul(&b, global_x, nir_imm_int(&b, 3))); 1095 1096 for (unsigned chan = 0; chan < 3; chan++) { 1097 nir_ssa_def *local_pos = nir_iadd(&b, global_pos, nir_imm_int(&b, chan)); 1098 1099 nir_ssa_def *coord = nir_vec4(&b, local_pos, local_pos, local_pos, local_pos); 1100 1101 nir_image_deref_store(&b, &nir_build_deref_var(&b, output_img)->dest.ssa, coord, 1102 nir_ssa_undef(&b, 1, 32), nir_channel(&b, clear_val, chan), 1103 nir_imm_int(&b, 0), .image_dim = GLSL_SAMPLER_DIM_BUF); 1104 } 1105 1106 return b.shader; 1107} 1108 1109static VkResult 1110radv_device_init_meta_cleari_r32g32b32_state(struct radv_device *device) 1111{ 1112 VkResult result; 1113 nir_shader *cs = build_nir_cleari_r32g32b32_compute_shader(device); 1114 1115 VkDescriptorSetLayoutCreateInfo ds_create_info = { 1116 .sType = VK_STRUCTURE_TYPE_DESCRIPTOR_SET_LAYOUT_CREATE_INFO, 1117 .flags = VK_DESCRIPTOR_SET_LAYOUT_CREATE_PUSH_DESCRIPTOR_BIT_KHR, 1118 .bindingCount = 1, 1119 .pBindings = (VkDescriptorSetLayoutBinding[]){ 1120 {.binding = 0, 1121 .descriptorType = VK_DESCRIPTOR_TYPE_STORAGE_TEXEL_BUFFER, 1122 .descriptorCount = 1, 1123 .stageFlags = VK_SHADER_STAGE_COMPUTE_BIT, 1124 .pImmutableSamplers = NULL}, 1125 }}; 1126 1127 result = radv_CreateDescriptorSetLayout(radv_device_to_handle(device), &ds_create_info, 1128 &device->meta_state.alloc, 1129 &device->meta_state.cleari_r32g32b32.img_ds_layout); 1130 if (result != VK_SUCCESS) 1131 goto fail; 1132 1133 VkPipelineLayoutCreateInfo pl_create_info = { 1134 .sType = VK_STRUCTURE_TYPE_PIPELINE_LAYOUT_CREATE_INFO, 1135 .setLayoutCount = 1, 1136 .pSetLayouts = &device->meta_state.cleari_r32g32b32.img_ds_layout, 1137 .pushConstantRangeCount = 1, 1138 .pPushConstantRanges = &(VkPushConstantRange){VK_SHADER_STAGE_COMPUTE_BIT, 0, 16}, 1139 }; 1140 1141 result = radv_CreatePipelineLayout(radv_device_to_handle(device), &pl_create_info, 1142 &device->meta_state.alloc, 1143 &device->meta_state.cleari_r32g32b32.img_p_layout); 1144 if (result != VK_SUCCESS) 1145 goto fail; 1146 1147 /* compute shader */ 1148 VkPipelineShaderStageCreateInfo pipeline_shader_stage = { 1149 .sType = VK_STRUCTURE_TYPE_PIPELINE_SHADER_STAGE_CREATE_INFO, 1150 .stage = VK_SHADER_STAGE_COMPUTE_BIT, 1151 .module = vk_shader_module_handle_from_nir(cs), 1152 .pName = "main", 1153 .pSpecializationInfo = NULL, 1154 }; 1155 1156 VkComputePipelineCreateInfo vk_pipeline_info = { 1157 .sType = VK_STRUCTURE_TYPE_COMPUTE_PIPELINE_CREATE_INFO, 1158 .stage = pipeline_shader_stage, 1159 .flags = 0, 1160 .layout = device->meta_state.cleari_r32g32b32.img_p_layout, 1161 }; 1162 1163 result = radv_CreateComputePipelines( 1164 radv_device_to_handle(device), radv_pipeline_cache_to_handle(&device->meta_state.cache), 1, 1165 &vk_pipeline_info, NULL, &device->meta_state.cleari_r32g32b32.pipeline); 1166 1167fail: 1168 ralloc_free(cs); 1169 return result; 1170} 1171 1172static void 1173radv_device_finish_meta_cleari_r32g32b32_state(struct radv_device *device) 1174{ 1175 struct radv_meta_state *state = &device->meta_state; 1176 1177 radv_DestroyPipelineLayout(radv_device_to_handle(device), state->cleari_r32g32b32.img_p_layout, 1178 &state->alloc); 1179 radv_DestroyDescriptorSetLayout(radv_device_to_handle(device), 1180 state->cleari_r32g32b32.img_ds_layout, &state->alloc); 1181 radv_DestroyPipeline(radv_device_to_handle(device), state->cleari_r32g32b32.pipeline, 1182 &state->alloc); 1183} 1184 1185void 1186radv_device_finish_meta_bufimage_state(struct radv_device *device) 1187{ 1188 radv_device_finish_meta_itob_state(device); 1189 radv_device_finish_meta_btoi_state(device); 1190 radv_device_finish_meta_btoi_r32g32b32_state(device); 1191 radv_device_finish_meta_itoi_state(device); 1192 radv_device_finish_meta_itoi_r32g32b32_state(device); 1193 radv_device_finish_meta_cleari_state(device); 1194 radv_device_finish_meta_cleari_r32g32b32_state(device); 1195} 1196 1197VkResult 1198radv_device_init_meta_bufimage_state(struct radv_device *device) 1199{ 1200 VkResult result; 1201 1202 result = radv_device_init_meta_itob_state(device); 1203 if (result != VK_SUCCESS) 1204 goto fail_itob; 1205 1206 result = radv_device_init_meta_btoi_state(device); 1207 if (result != VK_SUCCESS) 1208 goto fail_btoi; 1209 1210 result = radv_device_init_meta_btoi_r32g32b32_state(device); 1211 if (result != VK_SUCCESS) 1212 goto fail_btoi_r32g32b32; 1213 1214 result = radv_device_init_meta_itoi_state(device); 1215 if (result != VK_SUCCESS) 1216 goto fail_itoi; 1217 1218 result = radv_device_init_meta_itoi_r32g32b32_state(device); 1219 if (result != VK_SUCCESS) 1220 goto fail_itoi_r32g32b32; 1221 1222 result = radv_device_init_meta_cleari_state(device); 1223 if (result != VK_SUCCESS) 1224 goto fail_cleari; 1225 1226 result = radv_device_init_meta_cleari_r32g32b32_state(device); 1227 if (result != VK_SUCCESS) 1228 goto fail_cleari_r32g32b32; 1229 1230 return VK_SUCCESS; 1231fail_cleari_r32g32b32: 1232 radv_device_finish_meta_cleari_r32g32b32_state(device); 1233fail_cleari: 1234 radv_device_finish_meta_cleari_state(device); 1235fail_itoi_r32g32b32: 1236 radv_device_finish_meta_itoi_r32g32b32_state(device); 1237fail_itoi: 1238 radv_device_finish_meta_itoi_state(device); 1239fail_btoi_r32g32b32: 1240 radv_device_finish_meta_btoi_r32g32b32_state(device); 1241fail_btoi: 1242 radv_device_finish_meta_btoi_state(device); 1243fail_itob: 1244 radv_device_finish_meta_itob_state(device); 1245 return result; 1246} 1247 1248static void 1249create_iview(struct radv_cmd_buffer *cmd_buffer, struct radv_meta_blit2d_surf *surf, 1250 struct radv_image_view *iview, VkFormat format, VkImageAspectFlagBits aspects) 1251{ 1252 VkImageViewType view_type = cmd_buffer->device->physical_device->rad_info.chip_class < GFX9 1253 ? VK_IMAGE_VIEW_TYPE_2D 1254 : radv_meta_get_view_type(surf->image); 1255 1256 if (format == VK_FORMAT_UNDEFINED) 1257 format = surf->format; 1258 1259 radv_image_view_init(iview, cmd_buffer->device, 1260 &(VkImageViewCreateInfo){ 1261 .sType = VK_STRUCTURE_TYPE_IMAGE_VIEW_CREATE_INFO, 1262 .image = radv_image_to_handle(surf->image), 1263 .viewType = view_type, 1264 .format = format, 1265 .subresourceRange = {.aspectMask = aspects, 1266 .baseMipLevel = surf->level, 1267 .levelCount = 1, 1268 .baseArrayLayer = surf->layer, 1269 .layerCount = 1}, 1270 }, 1271 &(struct radv_image_view_extra_create_info){ 1272 .disable_compression = surf->disable_compression, 1273 }); 1274} 1275 1276static void 1277create_bview(struct radv_cmd_buffer *cmd_buffer, struct radv_buffer *buffer, unsigned offset, 1278 VkFormat format, struct radv_buffer_view *bview) 1279{ 1280 radv_buffer_view_init(bview, cmd_buffer->device, 1281 &(VkBufferViewCreateInfo){ 1282 .sType = VK_STRUCTURE_TYPE_BUFFER_VIEW_CREATE_INFO, 1283 .flags = 0, 1284 .buffer = radv_buffer_to_handle(buffer), 1285 .format = format, 1286 .offset = offset, 1287 .range = VK_WHOLE_SIZE, 1288 }); 1289} 1290 1291static void 1292create_buffer_from_image(struct radv_cmd_buffer *cmd_buffer, struct radv_meta_blit2d_surf *surf, 1293 VkBufferUsageFlagBits usage, VkBuffer *buffer) 1294{ 1295 struct radv_device *device = cmd_buffer->device; 1296 struct radv_device_memory mem; 1297 1298 radv_device_memory_init(&mem, device, surf->image->bo); 1299 1300 radv_CreateBuffer(radv_device_to_handle(device), 1301 &(VkBufferCreateInfo){ 1302 .sType = VK_STRUCTURE_TYPE_BUFFER_CREATE_INFO, 1303 .flags = 0, 1304 .size = surf->image->size, 1305 .usage = usage, 1306 .sharingMode = VK_SHARING_MODE_EXCLUSIVE, 1307 }, 1308 NULL, buffer); 1309 1310 radv_BindBufferMemory2(radv_device_to_handle(device), 1, 1311 (VkBindBufferMemoryInfo[]){{ 1312 .sType = VK_STRUCTURE_TYPE_BIND_BUFFER_MEMORY_INFO, 1313 .buffer = *buffer, 1314 .memory = radv_device_memory_to_handle(&mem), 1315 .memoryOffset = surf->image->offset, 1316 }}); 1317 1318 radv_device_memory_finish(&mem); 1319} 1320 1321static void 1322create_bview_for_r32g32b32(struct radv_cmd_buffer *cmd_buffer, struct radv_buffer *buffer, 1323 unsigned offset, VkFormat src_format, struct radv_buffer_view *bview) 1324{ 1325 VkFormat format; 1326 1327 switch (src_format) { 1328 case VK_FORMAT_R32G32B32_UINT: 1329 format = VK_FORMAT_R32_UINT; 1330 break; 1331 case VK_FORMAT_R32G32B32_SINT: 1332 format = VK_FORMAT_R32_SINT; 1333 break; 1334 case VK_FORMAT_R32G32B32_SFLOAT: 1335 format = VK_FORMAT_R32_SFLOAT; 1336 break; 1337 default: 1338 unreachable("invalid R32G32B32 format"); 1339 } 1340 1341 radv_buffer_view_init(bview, cmd_buffer->device, 1342 &(VkBufferViewCreateInfo){ 1343 .sType = VK_STRUCTURE_TYPE_BUFFER_VIEW_CREATE_INFO, 1344 .flags = 0, 1345 .buffer = radv_buffer_to_handle(buffer), 1346 .format = format, 1347 .offset = offset, 1348 .range = VK_WHOLE_SIZE, 1349 }); 1350} 1351 1352static unsigned 1353get_image_stride_for_r32g32b32(struct radv_cmd_buffer *cmd_buffer, 1354 struct radv_meta_blit2d_surf *surf) 1355{ 1356 unsigned stride; 1357 1358 if (cmd_buffer->device->physical_device->rad_info.chip_class >= GFX9) { 1359 stride = surf->image->planes[0].surface.u.gfx9.surf_pitch; 1360 } else { 1361 stride = surf->image->planes[0].surface.u.legacy.level[0].nblk_x * 3; 1362 } 1363 1364 return stride; 1365} 1366 1367static void 1368itob_bind_descriptors(struct radv_cmd_buffer *cmd_buffer, struct radv_image_view *src, 1369 struct radv_buffer_view *dst) 1370{ 1371 struct radv_device *device = cmd_buffer->device; 1372 1373 radv_meta_push_descriptor_set( 1374 cmd_buffer, VK_PIPELINE_BIND_POINT_COMPUTE, device->meta_state.itob.img_p_layout, 0, /* set */ 1375 2, /* descriptorWriteCount */ 1376 (VkWriteDescriptorSet[]){ 1377 {.sType = VK_STRUCTURE_TYPE_WRITE_DESCRIPTOR_SET, 1378 .dstBinding = 0, 1379 .dstArrayElement = 0, 1380 .descriptorCount = 1, 1381 .descriptorType = VK_DESCRIPTOR_TYPE_SAMPLED_IMAGE, 1382 .pImageInfo = 1383 (VkDescriptorImageInfo[]){ 1384 { 1385 .sampler = VK_NULL_HANDLE, 1386 .imageView = radv_image_view_to_handle(src), 1387 .imageLayout = VK_IMAGE_LAYOUT_GENERAL, 1388 }, 1389 }}, 1390 { 1391 .sType = VK_STRUCTURE_TYPE_WRITE_DESCRIPTOR_SET, 1392 .dstBinding = 1, 1393 .dstArrayElement = 0, 1394 .descriptorCount = 1, 1395 .descriptorType = VK_DESCRIPTOR_TYPE_STORAGE_TEXEL_BUFFER, 1396 .pTexelBufferView = (VkBufferView[]){radv_buffer_view_to_handle(dst)}, 1397 }}); 1398} 1399 1400void 1401radv_meta_image_to_buffer(struct radv_cmd_buffer *cmd_buffer, struct radv_meta_blit2d_surf *src, 1402 struct radv_meta_blit2d_buffer *dst, unsigned num_rects, 1403 struct radv_meta_blit2d_rect *rects) 1404{ 1405 VkPipeline pipeline = cmd_buffer->device->meta_state.itob.pipeline; 1406 struct radv_device *device = cmd_buffer->device; 1407 struct radv_image_view src_view; 1408 struct radv_buffer_view dst_view; 1409 1410 create_iview(cmd_buffer, src, &src_view, VK_FORMAT_UNDEFINED, src->aspect_mask); 1411 create_bview(cmd_buffer, dst->buffer, dst->offset, dst->format, &dst_view); 1412 itob_bind_descriptors(cmd_buffer, &src_view, &dst_view); 1413 1414 if (device->physical_device->rad_info.chip_class >= GFX9 && src->image->type == VK_IMAGE_TYPE_3D) 1415 pipeline = cmd_buffer->device->meta_state.itob.pipeline_3d; 1416 1417 radv_CmdBindPipeline(radv_cmd_buffer_to_handle(cmd_buffer), VK_PIPELINE_BIND_POINT_COMPUTE, 1418 pipeline); 1419 1420 for (unsigned r = 0; r < num_rects; ++r) { 1421 unsigned push_constants[4] = {rects[r].src_x, rects[r].src_y, src->layer, dst->pitch}; 1422 radv_CmdPushConstants(radv_cmd_buffer_to_handle(cmd_buffer), 1423 device->meta_state.itob.img_p_layout, VK_SHADER_STAGE_COMPUTE_BIT, 0, 1424 16, push_constants); 1425 1426 radv_unaligned_dispatch(cmd_buffer, rects[r].width, rects[r].height, 1); 1427 } 1428 1429 radv_image_view_finish(&src_view); 1430 radv_buffer_view_finish(&dst_view); 1431} 1432 1433static void 1434btoi_r32g32b32_bind_descriptors(struct radv_cmd_buffer *cmd_buffer, struct radv_buffer_view *src, 1435 struct radv_buffer_view *dst) 1436{ 1437 struct radv_device *device = cmd_buffer->device; 1438 1439 radv_meta_push_descriptor_set( 1440 cmd_buffer, VK_PIPELINE_BIND_POINT_COMPUTE, device->meta_state.btoi_r32g32b32.img_p_layout, 1441 0, /* set */ 1442 2, /* descriptorWriteCount */ 1443 (VkWriteDescriptorSet[]){ 1444 { 1445 .sType = VK_STRUCTURE_TYPE_WRITE_DESCRIPTOR_SET, 1446 .dstBinding = 0, 1447 .dstArrayElement = 0, 1448 .descriptorCount = 1, 1449 .descriptorType = VK_DESCRIPTOR_TYPE_UNIFORM_TEXEL_BUFFER, 1450 .pTexelBufferView = (VkBufferView[]){radv_buffer_view_to_handle(src)}, 1451 }, 1452 { 1453 .sType = VK_STRUCTURE_TYPE_WRITE_DESCRIPTOR_SET, 1454 .dstBinding = 1, 1455 .dstArrayElement = 0, 1456 .descriptorCount = 1, 1457 .descriptorType = VK_DESCRIPTOR_TYPE_STORAGE_TEXEL_BUFFER, 1458 .pTexelBufferView = (VkBufferView[]){radv_buffer_view_to_handle(dst)}, 1459 }}); 1460} 1461 1462static void 1463radv_meta_buffer_to_image_cs_r32g32b32(struct radv_cmd_buffer *cmd_buffer, 1464 struct radv_meta_blit2d_buffer *src, 1465 struct radv_meta_blit2d_surf *dst, unsigned num_rects, 1466 struct radv_meta_blit2d_rect *rects) 1467{ 1468 VkPipeline pipeline = cmd_buffer->device->meta_state.btoi_r32g32b32.pipeline; 1469 struct radv_device *device = cmd_buffer->device; 1470 struct radv_buffer_view src_view, dst_view; 1471 unsigned dst_offset = 0; 1472 unsigned stride; 1473 VkBuffer buffer; 1474 1475 /* This special btoi path for R32G32B32 formats will write the linear 1476 * image as a buffer with the same underlying memory. The compute 1477 * shader will copy all components separately using a R32 format. 1478 */ 1479 create_buffer_from_image(cmd_buffer, dst, VK_BUFFER_USAGE_STORAGE_TEXEL_BUFFER_BIT, &buffer); 1480 1481 create_bview(cmd_buffer, src->buffer, src->offset, src->format, &src_view); 1482 create_bview_for_r32g32b32(cmd_buffer, radv_buffer_from_handle(buffer), dst_offset, dst->format, 1483 &dst_view); 1484 btoi_r32g32b32_bind_descriptors(cmd_buffer, &src_view, &dst_view); 1485 1486 radv_CmdBindPipeline(radv_cmd_buffer_to_handle(cmd_buffer), VK_PIPELINE_BIND_POINT_COMPUTE, 1487 pipeline); 1488 1489 stride = get_image_stride_for_r32g32b32(cmd_buffer, dst); 1490 1491 for (unsigned r = 0; r < num_rects; ++r) { 1492 unsigned push_constants[4] = { 1493 rects[r].dst_x, 1494 rects[r].dst_y, 1495 stride, 1496 src->pitch, 1497 }; 1498 1499 radv_CmdPushConstants(radv_cmd_buffer_to_handle(cmd_buffer), 1500 device->meta_state.btoi_r32g32b32.img_p_layout, 1501 VK_SHADER_STAGE_COMPUTE_BIT, 0, 16, push_constants); 1502 1503 radv_unaligned_dispatch(cmd_buffer, rects[r].width, rects[r].height, 1); 1504 } 1505 1506 radv_buffer_view_finish(&src_view); 1507 radv_buffer_view_finish(&dst_view); 1508 radv_DestroyBuffer(radv_device_to_handle(device), buffer, NULL); 1509} 1510 1511static void 1512btoi_bind_descriptors(struct radv_cmd_buffer *cmd_buffer, struct radv_buffer_view *src, 1513 struct radv_image_view *dst) 1514{ 1515 struct radv_device *device = cmd_buffer->device; 1516 1517 radv_meta_push_descriptor_set( 1518 cmd_buffer, VK_PIPELINE_BIND_POINT_COMPUTE, device->meta_state.btoi.img_p_layout, 0, /* set */ 1519 2, /* descriptorWriteCount */ 1520 (VkWriteDescriptorSet[]){ 1521 { 1522 .sType = VK_STRUCTURE_TYPE_WRITE_DESCRIPTOR_SET, 1523 .dstBinding = 0, 1524 .dstArrayElement = 0, 1525 .descriptorCount = 1, 1526 .descriptorType = VK_DESCRIPTOR_TYPE_STORAGE_TEXEL_BUFFER, 1527 .pTexelBufferView = (VkBufferView[]){radv_buffer_view_to_handle(src)}, 1528 }, 1529 {.sType = VK_STRUCTURE_TYPE_WRITE_DESCRIPTOR_SET, 1530 .dstBinding = 1, 1531 .dstArrayElement = 0, 1532 .descriptorCount = 1, 1533 .descriptorType = VK_DESCRIPTOR_TYPE_STORAGE_IMAGE, 1534 .pImageInfo = (VkDescriptorImageInfo[]){ 1535 { 1536 .sampler = VK_NULL_HANDLE, 1537 .imageView = radv_image_view_to_handle(dst), 1538 .imageLayout = VK_IMAGE_LAYOUT_GENERAL, 1539 }, 1540 }}}); 1541} 1542 1543void 1544radv_meta_buffer_to_image_cs(struct radv_cmd_buffer *cmd_buffer, 1545 struct radv_meta_blit2d_buffer *src, struct radv_meta_blit2d_surf *dst, 1546 unsigned num_rects, struct radv_meta_blit2d_rect *rects) 1547{ 1548 VkPipeline pipeline = cmd_buffer->device->meta_state.btoi.pipeline; 1549 struct radv_device *device = cmd_buffer->device; 1550 struct radv_buffer_view src_view; 1551 struct radv_image_view dst_view; 1552 1553 if (dst->image->vk_format == VK_FORMAT_R32G32B32_UINT || 1554 dst->image->vk_format == VK_FORMAT_R32G32B32_SINT || 1555 dst->image->vk_format == VK_FORMAT_R32G32B32_SFLOAT) { 1556 radv_meta_buffer_to_image_cs_r32g32b32(cmd_buffer, src, dst, num_rects, rects); 1557 return; 1558 } 1559 1560 create_bview(cmd_buffer, src->buffer, src->offset, src->format, &src_view); 1561 create_iview(cmd_buffer, dst, &dst_view, VK_FORMAT_UNDEFINED, dst->aspect_mask); 1562 btoi_bind_descriptors(cmd_buffer, &src_view, &dst_view); 1563 1564 if (device->physical_device->rad_info.chip_class >= GFX9 && dst->image->type == VK_IMAGE_TYPE_3D) 1565 pipeline = cmd_buffer->device->meta_state.btoi.pipeline_3d; 1566 radv_CmdBindPipeline(radv_cmd_buffer_to_handle(cmd_buffer), VK_PIPELINE_BIND_POINT_COMPUTE, 1567 pipeline); 1568 1569 for (unsigned r = 0; r < num_rects; ++r) { 1570 unsigned push_constants[4] = { 1571 rects[r].dst_x, 1572 rects[r].dst_y, 1573 dst->layer, 1574 src->pitch, 1575 }; 1576 radv_CmdPushConstants(radv_cmd_buffer_to_handle(cmd_buffer), 1577 device->meta_state.btoi.img_p_layout, VK_SHADER_STAGE_COMPUTE_BIT, 0, 1578 16, push_constants); 1579 1580 radv_unaligned_dispatch(cmd_buffer, rects[r].width, rects[r].height, 1); 1581 } 1582 1583 radv_image_view_finish(&dst_view); 1584 radv_buffer_view_finish(&src_view); 1585} 1586 1587static void 1588itoi_r32g32b32_bind_descriptors(struct radv_cmd_buffer *cmd_buffer, struct radv_buffer_view *src, 1589 struct radv_buffer_view *dst) 1590{ 1591 struct radv_device *device = cmd_buffer->device; 1592 1593 radv_meta_push_descriptor_set( 1594 cmd_buffer, VK_PIPELINE_BIND_POINT_COMPUTE, device->meta_state.itoi_r32g32b32.img_p_layout, 1595 0, /* set */ 1596 2, /* descriptorWriteCount */ 1597 (VkWriteDescriptorSet[]){ 1598 { 1599 .sType = VK_STRUCTURE_TYPE_WRITE_DESCRIPTOR_SET, 1600 .dstBinding = 0, 1601 .dstArrayElement = 0, 1602 .descriptorCount = 1, 1603 .descriptorType = VK_DESCRIPTOR_TYPE_UNIFORM_TEXEL_BUFFER, 1604 .pTexelBufferView = (VkBufferView[]){radv_buffer_view_to_handle(src)}, 1605 }, 1606 { 1607 .sType = VK_STRUCTURE_TYPE_WRITE_DESCRIPTOR_SET, 1608 .dstBinding = 1, 1609 .dstArrayElement = 0, 1610 .descriptorCount = 1, 1611 .descriptorType = VK_DESCRIPTOR_TYPE_STORAGE_TEXEL_BUFFER, 1612 .pTexelBufferView = (VkBufferView[]){radv_buffer_view_to_handle(dst)}, 1613 }}); 1614} 1615 1616static void 1617radv_meta_image_to_image_cs_r32g32b32(struct radv_cmd_buffer *cmd_buffer, 1618 struct radv_meta_blit2d_surf *src, 1619 struct radv_meta_blit2d_surf *dst, unsigned num_rects, 1620 struct radv_meta_blit2d_rect *rects) 1621{ 1622 VkPipeline pipeline = cmd_buffer->device->meta_state.itoi_r32g32b32.pipeline; 1623 struct radv_device *device = cmd_buffer->device; 1624 struct radv_buffer_view src_view, dst_view; 1625 unsigned src_offset = 0, dst_offset = 0; 1626 unsigned src_stride, dst_stride; 1627 VkBuffer src_buffer, dst_buffer; 1628 1629 /* 96-bit formats are only compatible to themselves. */ 1630 assert(dst->format == VK_FORMAT_R32G32B32_UINT || dst->format == VK_FORMAT_R32G32B32_SINT || 1631 dst->format == VK_FORMAT_R32G32B32_SFLOAT); 1632 1633 /* This special itoi path for R32G32B32 formats will write the linear 1634 * image as a buffer with the same underlying memory. The compute 1635 * shader will copy all components separately using a R32 format. 1636 */ 1637 create_buffer_from_image(cmd_buffer, src, VK_BUFFER_USAGE_UNIFORM_TEXEL_BUFFER_BIT, &src_buffer); 1638 create_buffer_from_image(cmd_buffer, dst, VK_BUFFER_USAGE_STORAGE_TEXEL_BUFFER_BIT, &dst_buffer); 1639 1640 create_bview_for_r32g32b32(cmd_buffer, radv_buffer_from_handle(src_buffer), src_offset, 1641 src->format, &src_view); 1642 create_bview_for_r32g32b32(cmd_buffer, radv_buffer_from_handle(dst_buffer), dst_offset, 1643 dst->format, &dst_view); 1644 itoi_r32g32b32_bind_descriptors(cmd_buffer, &src_view, &dst_view); 1645 1646 radv_CmdBindPipeline(radv_cmd_buffer_to_handle(cmd_buffer), VK_PIPELINE_BIND_POINT_COMPUTE, 1647 pipeline); 1648 1649 src_stride = get_image_stride_for_r32g32b32(cmd_buffer, src); 1650 dst_stride = get_image_stride_for_r32g32b32(cmd_buffer, dst); 1651 1652 for (unsigned r = 0; r < num_rects; ++r) { 1653 unsigned push_constants[6] = { 1654 rects[r].src_x, rects[r].src_y, src_stride, rects[r].dst_x, rects[r].dst_y, dst_stride, 1655 }; 1656 radv_CmdPushConstants(radv_cmd_buffer_to_handle(cmd_buffer), 1657 device->meta_state.itoi_r32g32b32.img_p_layout, 1658 VK_SHADER_STAGE_COMPUTE_BIT, 0, 24, push_constants); 1659 1660 radv_unaligned_dispatch(cmd_buffer, rects[r].width, rects[r].height, 1); 1661 } 1662 1663 radv_buffer_view_finish(&src_view); 1664 radv_buffer_view_finish(&dst_view); 1665 radv_DestroyBuffer(radv_device_to_handle(device), src_buffer, NULL); 1666 radv_DestroyBuffer(radv_device_to_handle(device), dst_buffer, NULL); 1667} 1668 1669static void 1670itoi_bind_descriptors(struct radv_cmd_buffer *cmd_buffer, struct radv_image_view *src, 1671 struct radv_image_view *dst) 1672{ 1673 struct radv_device *device = cmd_buffer->device; 1674 1675 radv_meta_push_descriptor_set( 1676 cmd_buffer, VK_PIPELINE_BIND_POINT_COMPUTE, device->meta_state.itoi.img_p_layout, 0, /* set */ 1677 2, /* descriptorWriteCount */ 1678 (VkWriteDescriptorSet[]){{.sType = VK_STRUCTURE_TYPE_WRITE_DESCRIPTOR_SET, 1679 .dstBinding = 0, 1680 .dstArrayElement = 0, 1681 .descriptorCount = 1, 1682 .descriptorType = VK_DESCRIPTOR_TYPE_SAMPLED_IMAGE, 1683 .pImageInfo = 1684 (VkDescriptorImageInfo[]){ 1685 { 1686 .sampler = VK_NULL_HANDLE, 1687 .imageView = radv_image_view_to_handle(src), 1688 .imageLayout = VK_IMAGE_LAYOUT_GENERAL, 1689 }, 1690 }}, 1691 {.sType = VK_STRUCTURE_TYPE_WRITE_DESCRIPTOR_SET, 1692 .dstBinding = 1, 1693 .dstArrayElement = 0, 1694 .descriptorCount = 1, 1695 .descriptorType = VK_DESCRIPTOR_TYPE_STORAGE_IMAGE, 1696 .pImageInfo = (VkDescriptorImageInfo[]){ 1697 { 1698 .sampler = VK_NULL_HANDLE, 1699 .imageView = radv_image_view_to_handle(dst), 1700 .imageLayout = VK_IMAGE_LAYOUT_GENERAL, 1701 }, 1702 }}}); 1703} 1704 1705void 1706radv_meta_image_to_image_cs(struct radv_cmd_buffer *cmd_buffer, struct radv_meta_blit2d_surf *src, 1707 struct radv_meta_blit2d_surf *dst, unsigned num_rects, 1708 struct radv_meta_blit2d_rect *rects) 1709{ 1710 struct radv_device *device = cmd_buffer->device; 1711 struct radv_image_view src_view, dst_view; 1712 uint32_t samples = src->image->info.samples; 1713 uint32_t samples_log2 = ffs(samples) - 1; 1714 1715 if (src->format == VK_FORMAT_R32G32B32_UINT || src->format == VK_FORMAT_R32G32B32_SINT || 1716 src->format == VK_FORMAT_R32G32B32_SFLOAT) { 1717 radv_meta_image_to_image_cs_r32g32b32(cmd_buffer, src, dst, num_rects, rects); 1718 return; 1719 } 1720 1721 u_foreach_bit(i, dst->aspect_mask) { 1722 unsigned aspect_mask = 1u << i; 1723 VkFormat depth_format = 0; 1724 if (aspect_mask == VK_IMAGE_ASPECT_STENCIL_BIT) 1725 depth_format = vk_format_stencil_only(dst->image->vk_format); 1726 else if (aspect_mask == VK_IMAGE_ASPECT_DEPTH_BIT) 1727 depth_format = vk_format_depth_only(dst->image->vk_format); 1728 1729 create_iview(cmd_buffer, src, &src_view, depth_format, aspect_mask); 1730 create_iview(cmd_buffer, dst, &dst_view, depth_format, aspect_mask); 1731 1732 itoi_bind_descriptors(cmd_buffer, &src_view, &dst_view); 1733 1734 VkPipeline pipeline = cmd_buffer->device->meta_state.itoi.pipeline[samples_log2]; 1735 if (device->physical_device->rad_info.chip_class >= GFX9 && 1736 (src->image->type == VK_IMAGE_TYPE_3D || dst->image->type == VK_IMAGE_TYPE_3D)) 1737 pipeline = cmd_buffer->device->meta_state.itoi.pipeline_3d; 1738 radv_CmdBindPipeline(radv_cmd_buffer_to_handle(cmd_buffer), VK_PIPELINE_BIND_POINT_COMPUTE, 1739 pipeline); 1740 1741 for (unsigned r = 0; r < num_rects; ++r) { 1742 unsigned push_constants[6] = { 1743 rects[r].src_x, rects[r].src_y, src->layer, rects[r].dst_x, rects[r].dst_y, dst->layer, 1744 }; 1745 radv_CmdPushConstants(radv_cmd_buffer_to_handle(cmd_buffer), 1746 device->meta_state.itoi.img_p_layout, VK_SHADER_STAGE_COMPUTE_BIT, 0, 1747 24, push_constants); 1748 1749 radv_unaligned_dispatch(cmd_buffer, rects[r].width, rects[r].height, 1); 1750 } 1751 1752 radv_image_view_finish(&src_view); 1753 radv_image_view_finish(&dst_view); 1754 } 1755} 1756 1757static void 1758cleari_r32g32b32_bind_descriptors(struct radv_cmd_buffer *cmd_buffer, struct radv_buffer_view *view) 1759{ 1760 struct radv_device *device = cmd_buffer->device; 1761 1762 radv_meta_push_descriptor_set( 1763 cmd_buffer, VK_PIPELINE_BIND_POINT_COMPUTE, device->meta_state.cleari_r32g32b32.img_p_layout, 1764 0, /* set */ 1765 1, /* descriptorWriteCount */ 1766 (VkWriteDescriptorSet[]){{ 1767 .sType = VK_STRUCTURE_TYPE_WRITE_DESCRIPTOR_SET, 1768 .dstBinding = 0, 1769 .dstArrayElement = 0, 1770 .descriptorCount = 1, 1771 .descriptorType = VK_DESCRIPTOR_TYPE_STORAGE_TEXEL_BUFFER, 1772 .pTexelBufferView = (VkBufferView[]){radv_buffer_view_to_handle(view)}, 1773 }}); 1774} 1775 1776static void 1777radv_meta_clear_image_cs_r32g32b32(struct radv_cmd_buffer *cmd_buffer, 1778 struct radv_meta_blit2d_surf *dst, 1779 const VkClearColorValue *clear_color) 1780{ 1781 VkPipeline pipeline = cmd_buffer->device->meta_state.cleari_r32g32b32.pipeline; 1782 struct radv_device *device = cmd_buffer->device; 1783 struct radv_buffer_view dst_view; 1784 unsigned stride; 1785 VkBuffer buffer; 1786 1787 /* This special clear path for R32G32B32 formats will write the linear 1788 * image as a buffer with the same underlying memory. The compute 1789 * shader will clear all components separately using a R32 format. 1790 */ 1791 create_buffer_from_image(cmd_buffer, dst, VK_BUFFER_USAGE_STORAGE_TEXEL_BUFFER_BIT, &buffer); 1792 1793 create_bview_for_r32g32b32(cmd_buffer, radv_buffer_from_handle(buffer), 0, dst->format, 1794 &dst_view); 1795 cleari_r32g32b32_bind_descriptors(cmd_buffer, &dst_view); 1796 1797 radv_CmdBindPipeline(radv_cmd_buffer_to_handle(cmd_buffer), VK_PIPELINE_BIND_POINT_COMPUTE, 1798 pipeline); 1799 1800 stride = get_image_stride_for_r32g32b32(cmd_buffer, dst); 1801 1802 unsigned push_constants[4] = { 1803 clear_color->uint32[0], 1804 clear_color->uint32[1], 1805 clear_color->uint32[2], 1806 stride, 1807 }; 1808 1809 radv_CmdPushConstants(radv_cmd_buffer_to_handle(cmd_buffer), 1810 device->meta_state.cleari_r32g32b32.img_p_layout, 1811 VK_SHADER_STAGE_COMPUTE_BIT, 0, 16, push_constants); 1812 1813 radv_unaligned_dispatch(cmd_buffer, dst->image->info.width, dst->image->info.height, 1); 1814 1815 radv_buffer_view_finish(&dst_view); 1816 radv_DestroyBuffer(radv_device_to_handle(device), buffer, NULL); 1817} 1818 1819static void 1820cleari_bind_descriptors(struct radv_cmd_buffer *cmd_buffer, struct radv_image_view *dst_iview) 1821{ 1822 struct radv_device *device = cmd_buffer->device; 1823 1824 radv_meta_push_descriptor_set(cmd_buffer, VK_PIPELINE_BIND_POINT_COMPUTE, 1825 device->meta_state.cleari.img_p_layout, 0, /* set */ 1826 1, /* descriptorWriteCount */ 1827 (VkWriteDescriptorSet[]){ 1828 {.sType = VK_STRUCTURE_TYPE_WRITE_DESCRIPTOR_SET, 1829 .dstBinding = 0, 1830 .dstArrayElement = 0, 1831 .descriptorCount = 1, 1832 .descriptorType = VK_DESCRIPTOR_TYPE_STORAGE_IMAGE, 1833 .pImageInfo = 1834 (VkDescriptorImageInfo[]){ 1835 { 1836 .sampler = VK_NULL_HANDLE, 1837 .imageView = radv_image_view_to_handle(dst_iview), 1838 .imageLayout = VK_IMAGE_LAYOUT_GENERAL, 1839 }, 1840 }}, 1841 }); 1842} 1843 1844void 1845radv_meta_clear_image_cs(struct radv_cmd_buffer *cmd_buffer, struct radv_meta_blit2d_surf *dst, 1846 const VkClearColorValue *clear_color) 1847{ 1848 struct radv_device *device = cmd_buffer->device; 1849 struct radv_image_view dst_iview; 1850 uint32_t samples = dst->image->info.samples; 1851 uint32_t samples_log2 = ffs(samples) - 1; 1852 1853 if (dst->format == VK_FORMAT_R32G32B32_UINT || dst->format == VK_FORMAT_R32G32B32_SINT || 1854 dst->format == VK_FORMAT_R32G32B32_SFLOAT) { 1855 radv_meta_clear_image_cs_r32g32b32(cmd_buffer, dst, clear_color); 1856 return; 1857 } 1858 1859 create_iview(cmd_buffer, dst, &dst_iview, VK_FORMAT_UNDEFINED, dst->aspect_mask); 1860 cleari_bind_descriptors(cmd_buffer, &dst_iview); 1861 1862 VkPipeline pipeline = cmd_buffer->device->meta_state.cleari.pipeline[samples_log2]; 1863 if (device->physical_device->rad_info.chip_class >= GFX9 && dst->image->type == VK_IMAGE_TYPE_3D) 1864 pipeline = cmd_buffer->device->meta_state.cleari.pipeline_3d; 1865 1866 radv_CmdBindPipeline(radv_cmd_buffer_to_handle(cmd_buffer), VK_PIPELINE_BIND_POINT_COMPUTE, 1867 pipeline); 1868 1869 unsigned push_constants[5] = { 1870 clear_color->uint32[0], 1871 clear_color->uint32[1], 1872 clear_color->uint32[2], 1873 clear_color->uint32[3], 1874 dst->layer, 1875 }; 1876 1877 radv_CmdPushConstants(radv_cmd_buffer_to_handle(cmd_buffer), 1878 device->meta_state.cleari.img_p_layout, VK_SHADER_STAGE_COMPUTE_BIT, 0, 20, 1879 push_constants); 1880 1881 radv_unaligned_dispatch(cmd_buffer, dst->image->info.width, dst->image->info.height, 1); 1882 1883 radv_image_view_finish(&dst_iview); 1884} 1885