1/* 2 * Copyright © 2016 Red Hat. 3 * Copyright © 2016 Bas Nieuwenhuizen 4 * 5 * Permission is hereby granted, free of charge, to any person obtaining a 6 * copy of this software and associated documentation files (the "Software"), 7 * to deal in the Software without restriction, including without limitation 8 * the rights to use, copy, modify, merge, publish, distribute, sublicense, 9 * and/or sell copies of the Software, and to permit persons to whom the 10 * Software is furnished to do so, subject to the following conditions: 11 * 12 * The above copyright notice and this permission notice (including the next 13 * paragraph) shall be included in all copies or substantial portions of the 14 * Software. 15 * 16 * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR 17 * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, 18 * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL 19 * THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER 20 * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING 21 * FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS 22 * IN THE SOFTWARE. 23 */ 24#include "radv_meta.h" 25#include "nir/nir_builder.h" 26 27/* 28 * GFX queue: Compute shader implementation of image->buffer copy 29 * Compute queue: implementation also of buffer->image, image->image, and image clear. 30 */ 31 32/* GFX9 needs to use a 3D sampler to access 3D resources, so the shader has the options 33 * for that. 34 */ 35static nir_shader * 36build_nir_itob_compute_shader(struct radv_device *dev, bool is_3d) 37{ 38 nir_builder b; 39 enum glsl_sampler_dim dim = is_3d ? GLSL_SAMPLER_DIM_3D : GLSL_SAMPLER_DIM_2D; 40 const struct glsl_type *sampler_type = glsl_sampler_type(dim, 41 false, 42 false, 43 GLSL_TYPE_FLOAT); 44 const struct glsl_type *img_type = glsl_sampler_type(GLSL_SAMPLER_DIM_BUF, 45 false, 46 false, 47 GLSL_TYPE_FLOAT); 48 nir_builder_init_simple_shader(&b, NULL, MESA_SHADER_COMPUTE, NULL); 49 b.shader->info.name = ralloc_strdup(b.shader, is_3d ? "meta_itob_cs_3d" : "meta_itob_cs"); 50 b.shader->info.cs.local_size[0] = 16; 51 b.shader->info.cs.local_size[1] = 16; 52 b.shader->info.cs.local_size[2] = 1; 53 nir_variable *input_img = nir_variable_create(b.shader, nir_var_uniform, 54 sampler_type, "s_tex"); 55 input_img->data.descriptor_set = 0; 56 input_img->data.binding = 0; 57 58 nir_variable *output_img = nir_variable_create(b.shader, nir_var_uniform, 59 img_type, "out_img"); 60 output_img->data.descriptor_set = 0; 61 output_img->data.binding = 1; 62 63 nir_ssa_def *invoc_id = nir_load_local_invocation_id(&b); 64 nir_ssa_def *wg_id = nir_load_work_group_id(&b); 65 nir_ssa_def *block_size = nir_imm_ivec4(&b, 66 b.shader->info.cs.local_size[0], 67 b.shader->info.cs.local_size[1], 68 b.shader->info.cs.local_size[2], 0); 69 70 nir_ssa_def *global_id = nir_iadd(&b, nir_imul(&b, wg_id, block_size), invoc_id); 71 72 73 74 nir_intrinsic_instr *offset = nir_intrinsic_instr_create(b.shader, nir_intrinsic_load_push_constant); 75 nir_intrinsic_set_base(offset, 0); 76 nir_intrinsic_set_range(offset, 16); 77 offset->src[0] = nir_src_for_ssa(nir_imm_int(&b, 0)); 78 offset->num_components = is_3d ? 3 : 2; 79 nir_ssa_dest_init(&offset->instr, &offset->dest, is_3d ? 3 : 2, 32, "offset"); 80 nir_builder_instr_insert(&b, &offset->instr); 81 82 nir_intrinsic_instr *stride = nir_intrinsic_instr_create(b.shader, nir_intrinsic_load_push_constant); 83 nir_intrinsic_set_base(stride, 0); 84 nir_intrinsic_set_range(stride, 16); 85 stride->src[0] = nir_src_for_ssa(nir_imm_int(&b, 12)); 86 stride->num_components = 1; 87 nir_ssa_dest_init(&stride->instr, &stride->dest, 1, 32, "stride"); 88 nir_builder_instr_insert(&b, &stride->instr); 89 90 nir_ssa_def *img_coord = nir_iadd(&b, global_id, &offset->dest.ssa); 91 nir_ssa_def *input_img_deref = &nir_build_deref_var(&b, input_img)->dest.ssa; 92 93 nir_tex_instr *tex = nir_tex_instr_create(b.shader, 3); 94 tex->sampler_dim = dim; 95 tex->op = nir_texop_txf; 96 tex->src[0].src_type = nir_tex_src_coord; 97 tex->src[0].src = nir_src_for_ssa(nir_channels(&b, img_coord, is_3d ? 0x7 : 0x3)); 98 tex->src[1].src_type = nir_tex_src_lod; 99 tex->src[1].src = nir_src_for_ssa(nir_imm_int(&b, 0)); 100 tex->src[2].src_type = nir_tex_src_texture_deref; 101 tex->src[2].src = nir_src_for_ssa(input_img_deref); 102 tex->dest_type = nir_type_float; 103 tex->is_array = false; 104 tex->coord_components = is_3d ? 3 : 2; 105 106 nir_ssa_dest_init(&tex->instr, &tex->dest, 4, 32, "tex"); 107 nir_builder_instr_insert(&b, &tex->instr); 108 109 nir_ssa_def *pos_x = nir_channel(&b, global_id, 0); 110 nir_ssa_def *pos_y = nir_channel(&b, global_id, 1); 111 112 nir_ssa_def *tmp = nir_imul(&b, pos_y, &stride->dest.ssa); 113 tmp = nir_iadd(&b, tmp, pos_x); 114 115 nir_ssa_def *coord = nir_vec4(&b, tmp, tmp, tmp, tmp); 116 117 nir_ssa_def *outval = &tex->dest.ssa; 118 nir_intrinsic_instr *store = nir_intrinsic_instr_create(b.shader, nir_intrinsic_image_deref_store); 119 store->num_components = 4; 120 store->src[0] = nir_src_for_ssa(&nir_build_deref_var(&b, output_img)->dest.ssa); 121 store->src[1] = nir_src_for_ssa(coord); 122 store->src[2] = nir_src_for_ssa(nir_ssa_undef(&b, 1, 32)); 123 store->src[3] = nir_src_for_ssa(outval); 124 125 nir_builder_instr_insert(&b, &store->instr); 126 return b.shader; 127} 128 129/* Image to buffer - don't write use image accessors */ 130static VkResult 131radv_device_init_meta_itob_state(struct radv_device *device) 132{ 133 VkResult result; 134 struct radv_shader_module cs = { .nir = NULL }; 135 struct radv_shader_module cs_3d = { .nir = NULL }; 136 137 cs.nir = build_nir_itob_compute_shader(device, false); 138 if (device->physical_device->rad_info.chip_class >= GFX9) 139 cs_3d.nir = build_nir_itob_compute_shader(device, true); 140 141 /* 142 * two descriptors one for the image being sampled 143 * one for the buffer being written. 144 */ 145 VkDescriptorSetLayoutCreateInfo ds_create_info = { 146 .sType = VK_STRUCTURE_TYPE_DESCRIPTOR_SET_LAYOUT_CREATE_INFO, 147 .flags = VK_DESCRIPTOR_SET_LAYOUT_CREATE_PUSH_DESCRIPTOR_BIT_KHR, 148 .bindingCount = 2, 149 .pBindings = (VkDescriptorSetLayoutBinding[]) { 150 { 151 .binding = 0, 152 .descriptorType = VK_DESCRIPTOR_TYPE_SAMPLED_IMAGE, 153 .descriptorCount = 1, 154 .stageFlags = VK_SHADER_STAGE_COMPUTE_BIT, 155 .pImmutableSamplers = NULL 156 }, 157 { 158 .binding = 1, 159 .descriptorType = VK_DESCRIPTOR_TYPE_STORAGE_TEXEL_BUFFER, 160 .descriptorCount = 1, 161 .stageFlags = VK_SHADER_STAGE_COMPUTE_BIT, 162 .pImmutableSamplers = NULL 163 }, 164 } 165 }; 166 167 result = radv_CreateDescriptorSetLayout(radv_device_to_handle(device), 168 &ds_create_info, 169 &device->meta_state.alloc, 170 &device->meta_state.itob.img_ds_layout); 171 if (result != VK_SUCCESS) 172 goto fail; 173 174 175 VkPipelineLayoutCreateInfo pl_create_info = { 176 .sType = VK_STRUCTURE_TYPE_PIPELINE_LAYOUT_CREATE_INFO, 177 .setLayoutCount = 1, 178 .pSetLayouts = &device->meta_state.itob.img_ds_layout, 179 .pushConstantRangeCount = 1, 180 .pPushConstantRanges = &(VkPushConstantRange){VK_SHADER_STAGE_COMPUTE_BIT, 0, 16}, 181 }; 182 183 result = radv_CreatePipelineLayout(radv_device_to_handle(device), 184 &pl_create_info, 185 &device->meta_state.alloc, 186 &device->meta_state.itob.img_p_layout); 187 if (result != VK_SUCCESS) 188 goto fail; 189 190 /* compute shader */ 191 192 VkPipelineShaderStageCreateInfo pipeline_shader_stage = { 193 .sType = VK_STRUCTURE_TYPE_PIPELINE_SHADER_STAGE_CREATE_INFO, 194 .stage = VK_SHADER_STAGE_COMPUTE_BIT, 195 .module = radv_shader_module_to_handle(&cs), 196 .pName = "main", 197 .pSpecializationInfo = NULL, 198 }; 199 200 VkComputePipelineCreateInfo vk_pipeline_info = { 201 .sType = VK_STRUCTURE_TYPE_COMPUTE_PIPELINE_CREATE_INFO, 202 .stage = pipeline_shader_stage, 203 .flags = 0, 204 .layout = device->meta_state.itob.img_p_layout, 205 }; 206 207 result = radv_CreateComputePipelines(radv_device_to_handle(device), 208 radv_pipeline_cache_to_handle(&device->meta_state.cache), 209 1, &vk_pipeline_info, NULL, 210 &device->meta_state.itob.pipeline); 211 if (result != VK_SUCCESS) 212 goto fail; 213 214 if (device->physical_device->rad_info.chip_class >= GFX9) { 215 VkPipelineShaderStageCreateInfo pipeline_shader_stage_3d = { 216 .sType = VK_STRUCTURE_TYPE_PIPELINE_SHADER_STAGE_CREATE_INFO, 217 .stage = VK_SHADER_STAGE_COMPUTE_BIT, 218 .module = radv_shader_module_to_handle(&cs_3d), 219 .pName = "main", 220 .pSpecializationInfo = NULL, 221 }; 222 223 VkComputePipelineCreateInfo vk_pipeline_info_3d = { 224 .sType = VK_STRUCTURE_TYPE_COMPUTE_PIPELINE_CREATE_INFO, 225 .stage = pipeline_shader_stage_3d, 226 .flags = 0, 227 .layout = device->meta_state.itob.img_p_layout, 228 }; 229 230 result = radv_CreateComputePipelines(radv_device_to_handle(device), 231 radv_pipeline_cache_to_handle(&device->meta_state.cache), 232 1, &vk_pipeline_info_3d, NULL, 233 &device->meta_state.itob.pipeline_3d); 234 if (result != VK_SUCCESS) 235 goto fail; 236 ralloc_free(cs_3d.nir); 237 } 238 ralloc_free(cs.nir); 239 240 return VK_SUCCESS; 241fail: 242 ralloc_free(cs.nir); 243 ralloc_free(cs_3d.nir); 244 return result; 245} 246 247static void 248radv_device_finish_meta_itob_state(struct radv_device *device) 249{ 250 struct radv_meta_state *state = &device->meta_state; 251 252 radv_DestroyPipelineLayout(radv_device_to_handle(device), 253 state->itob.img_p_layout, &state->alloc); 254 radv_DestroyDescriptorSetLayout(radv_device_to_handle(device), 255 state->itob.img_ds_layout, 256 &state->alloc); 257 radv_DestroyPipeline(radv_device_to_handle(device), 258 state->itob.pipeline, &state->alloc); 259 if (device->physical_device->rad_info.chip_class >= GFX9) 260 radv_DestroyPipeline(radv_device_to_handle(device), 261 state->itob.pipeline_3d, &state->alloc); 262} 263 264static nir_shader * 265build_nir_btoi_compute_shader(struct radv_device *dev, bool is_3d) 266{ 267 nir_builder b; 268 enum glsl_sampler_dim dim = is_3d ? GLSL_SAMPLER_DIM_3D : GLSL_SAMPLER_DIM_2D; 269 const struct glsl_type *buf_type = glsl_sampler_type(GLSL_SAMPLER_DIM_BUF, 270 false, 271 false, 272 GLSL_TYPE_FLOAT); 273 const struct glsl_type *img_type = glsl_sampler_type(dim, 274 false, 275 false, 276 GLSL_TYPE_FLOAT); 277 nir_builder_init_simple_shader(&b, NULL, MESA_SHADER_COMPUTE, NULL); 278 b.shader->info.name = ralloc_strdup(b.shader, is_3d ? "meta_btoi_cs_3d" : "meta_btoi_cs"); 279 b.shader->info.cs.local_size[0] = 16; 280 b.shader->info.cs.local_size[1] = 16; 281 b.shader->info.cs.local_size[2] = 1; 282 nir_variable *input_img = nir_variable_create(b.shader, nir_var_uniform, 283 buf_type, "s_tex"); 284 input_img->data.descriptor_set = 0; 285 input_img->data.binding = 0; 286 287 nir_variable *output_img = nir_variable_create(b.shader, nir_var_uniform, 288 img_type, "out_img"); 289 output_img->data.descriptor_set = 0; 290 output_img->data.binding = 1; 291 292 nir_ssa_def *invoc_id = nir_load_local_invocation_id(&b); 293 nir_ssa_def *wg_id = nir_load_work_group_id(&b); 294 nir_ssa_def *block_size = nir_imm_ivec4(&b, 295 b.shader->info.cs.local_size[0], 296 b.shader->info.cs.local_size[1], 297 b.shader->info.cs.local_size[2], 0); 298 299 nir_ssa_def *global_id = nir_iadd(&b, nir_imul(&b, wg_id, block_size), invoc_id); 300 301 nir_intrinsic_instr *offset = nir_intrinsic_instr_create(b.shader, nir_intrinsic_load_push_constant); 302 nir_intrinsic_set_base(offset, 0); 303 nir_intrinsic_set_range(offset, 16); 304 offset->src[0] = nir_src_for_ssa(nir_imm_int(&b, 0)); 305 offset->num_components = is_3d ? 3 : 2; 306 nir_ssa_dest_init(&offset->instr, &offset->dest, is_3d ? 3 : 2, 32, "offset"); 307 nir_builder_instr_insert(&b, &offset->instr); 308 309 nir_intrinsic_instr *stride = nir_intrinsic_instr_create(b.shader, nir_intrinsic_load_push_constant); 310 nir_intrinsic_set_base(stride, 0); 311 nir_intrinsic_set_range(stride, 16); 312 stride->src[0] = nir_src_for_ssa(nir_imm_int(&b, 12)); 313 stride->num_components = 1; 314 nir_ssa_dest_init(&stride->instr, &stride->dest, 1, 32, "stride"); 315 nir_builder_instr_insert(&b, &stride->instr); 316 317 nir_ssa_def *pos_x = nir_channel(&b, global_id, 0); 318 nir_ssa_def *pos_y = nir_channel(&b, global_id, 1); 319 320 nir_ssa_def *tmp = nir_imul(&b, pos_y, &stride->dest.ssa); 321 tmp = nir_iadd(&b, tmp, pos_x); 322 323 nir_ssa_def *buf_coord = nir_vec4(&b, tmp, tmp, tmp, tmp); 324 325 nir_ssa_def *img_coord = nir_iadd(&b, global_id, &offset->dest.ssa); 326 nir_ssa_def *input_img_deref = &nir_build_deref_var(&b, input_img)->dest.ssa; 327 328 nir_tex_instr *tex = nir_tex_instr_create(b.shader, 3); 329 tex->sampler_dim = GLSL_SAMPLER_DIM_BUF; 330 tex->op = nir_texop_txf; 331 tex->src[0].src_type = nir_tex_src_coord; 332 tex->src[0].src = nir_src_for_ssa(nir_channels(&b, buf_coord, 1)); 333 tex->src[1].src_type = nir_tex_src_lod; 334 tex->src[1].src = nir_src_for_ssa(nir_imm_int(&b, 0)); 335 tex->src[2].src_type = nir_tex_src_texture_deref; 336 tex->src[2].src = nir_src_for_ssa(input_img_deref); 337 tex->dest_type = nir_type_float; 338 tex->is_array = false; 339 tex->coord_components = 1; 340 341 nir_ssa_dest_init(&tex->instr, &tex->dest, 4, 32, "tex"); 342 nir_builder_instr_insert(&b, &tex->instr); 343 344 nir_ssa_def *outval = &tex->dest.ssa; 345 nir_intrinsic_instr *store = nir_intrinsic_instr_create(b.shader, nir_intrinsic_image_deref_store); 346 store->num_components = 4; 347 store->src[0] = nir_src_for_ssa(&nir_build_deref_var(&b, output_img)->dest.ssa); 348 store->src[1] = nir_src_for_ssa(img_coord); 349 store->src[2] = nir_src_for_ssa(nir_ssa_undef(&b, 1, 32)); 350 store->src[3] = nir_src_for_ssa(outval); 351 352 nir_builder_instr_insert(&b, &store->instr); 353 return b.shader; 354} 355 356/* Buffer to image - don't write use image accessors */ 357static VkResult 358radv_device_init_meta_btoi_state(struct radv_device *device) 359{ 360 VkResult result; 361 struct radv_shader_module cs = { .nir = NULL }; 362 struct radv_shader_module cs_3d = { .nir = NULL }; 363 cs.nir = build_nir_btoi_compute_shader(device, false); 364 if (device->physical_device->rad_info.chip_class >= GFX9) 365 cs_3d.nir = build_nir_btoi_compute_shader(device, true); 366 /* 367 * two descriptors one for the image being sampled 368 * one for the buffer being written. 369 */ 370 VkDescriptorSetLayoutCreateInfo ds_create_info = { 371 .sType = VK_STRUCTURE_TYPE_DESCRIPTOR_SET_LAYOUT_CREATE_INFO, 372 .flags = VK_DESCRIPTOR_SET_LAYOUT_CREATE_PUSH_DESCRIPTOR_BIT_KHR, 373 .bindingCount = 2, 374 .pBindings = (VkDescriptorSetLayoutBinding[]) { 375 { 376 .binding = 0, 377 .descriptorType = VK_DESCRIPTOR_TYPE_STORAGE_TEXEL_BUFFER, 378 .descriptorCount = 1, 379 .stageFlags = VK_SHADER_STAGE_COMPUTE_BIT, 380 .pImmutableSamplers = NULL 381 }, 382 { 383 .binding = 1, 384 .descriptorType = VK_DESCRIPTOR_TYPE_STORAGE_IMAGE, 385 .descriptorCount = 1, 386 .stageFlags = VK_SHADER_STAGE_COMPUTE_BIT, 387 .pImmutableSamplers = NULL 388 }, 389 } 390 }; 391 392 result = radv_CreateDescriptorSetLayout(radv_device_to_handle(device), 393 &ds_create_info, 394 &device->meta_state.alloc, 395 &device->meta_state.btoi.img_ds_layout); 396 if (result != VK_SUCCESS) 397 goto fail; 398 399 400 VkPipelineLayoutCreateInfo pl_create_info = { 401 .sType = VK_STRUCTURE_TYPE_PIPELINE_LAYOUT_CREATE_INFO, 402 .setLayoutCount = 1, 403 .pSetLayouts = &device->meta_state.btoi.img_ds_layout, 404 .pushConstantRangeCount = 1, 405 .pPushConstantRanges = &(VkPushConstantRange){VK_SHADER_STAGE_COMPUTE_BIT, 0, 16}, 406 }; 407 408 result = radv_CreatePipelineLayout(radv_device_to_handle(device), 409 &pl_create_info, 410 &device->meta_state.alloc, 411 &device->meta_state.btoi.img_p_layout); 412 if (result != VK_SUCCESS) 413 goto fail; 414 415 /* compute shader */ 416 417 VkPipelineShaderStageCreateInfo pipeline_shader_stage = { 418 .sType = VK_STRUCTURE_TYPE_PIPELINE_SHADER_STAGE_CREATE_INFO, 419 .stage = VK_SHADER_STAGE_COMPUTE_BIT, 420 .module = radv_shader_module_to_handle(&cs), 421 .pName = "main", 422 .pSpecializationInfo = NULL, 423 }; 424 425 VkComputePipelineCreateInfo vk_pipeline_info = { 426 .sType = VK_STRUCTURE_TYPE_COMPUTE_PIPELINE_CREATE_INFO, 427 .stage = pipeline_shader_stage, 428 .flags = 0, 429 .layout = device->meta_state.btoi.img_p_layout, 430 }; 431 432 result = radv_CreateComputePipelines(radv_device_to_handle(device), 433 radv_pipeline_cache_to_handle(&device->meta_state.cache), 434 1, &vk_pipeline_info, NULL, 435 &device->meta_state.btoi.pipeline); 436 if (result != VK_SUCCESS) 437 goto fail; 438 439 if (device->physical_device->rad_info.chip_class >= GFX9) { 440 VkPipelineShaderStageCreateInfo pipeline_shader_stage_3d = { 441 .sType = VK_STRUCTURE_TYPE_PIPELINE_SHADER_STAGE_CREATE_INFO, 442 .stage = VK_SHADER_STAGE_COMPUTE_BIT, 443 .module = radv_shader_module_to_handle(&cs_3d), 444 .pName = "main", 445 .pSpecializationInfo = NULL, 446 }; 447 448 VkComputePipelineCreateInfo vk_pipeline_info_3d = { 449 .sType = VK_STRUCTURE_TYPE_COMPUTE_PIPELINE_CREATE_INFO, 450 .stage = pipeline_shader_stage_3d, 451 .flags = 0, 452 .layout = device->meta_state.btoi.img_p_layout, 453 }; 454 455 result = radv_CreateComputePipelines(radv_device_to_handle(device), 456 radv_pipeline_cache_to_handle(&device->meta_state.cache), 457 1, &vk_pipeline_info_3d, NULL, 458 &device->meta_state.btoi.pipeline_3d); 459 ralloc_free(cs_3d.nir); 460 } 461 ralloc_free(cs.nir); 462 463 return VK_SUCCESS; 464fail: 465 ralloc_free(cs_3d.nir); 466 ralloc_free(cs.nir); 467 return result; 468} 469 470static void 471radv_device_finish_meta_btoi_state(struct radv_device *device) 472{ 473 struct radv_meta_state *state = &device->meta_state; 474 475 radv_DestroyPipelineLayout(radv_device_to_handle(device), 476 state->btoi.img_p_layout, &state->alloc); 477 radv_DestroyDescriptorSetLayout(radv_device_to_handle(device), 478 state->btoi.img_ds_layout, 479 &state->alloc); 480 radv_DestroyPipeline(radv_device_to_handle(device), 481 state->btoi.pipeline, &state->alloc); 482 radv_DestroyPipeline(radv_device_to_handle(device), 483 state->btoi.pipeline_3d, &state->alloc); 484} 485 486/* Buffer to image - special path for R32G32B32 */ 487static nir_shader * 488build_nir_btoi_r32g32b32_compute_shader(struct radv_device *dev) 489{ 490 nir_builder b; 491 const struct glsl_type *buf_type = glsl_sampler_type(GLSL_SAMPLER_DIM_BUF, 492 false, 493 false, 494 GLSL_TYPE_FLOAT); 495 const struct glsl_type *img_type = glsl_sampler_type(GLSL_SAMPLER_DIM_BUF, 496 false, 497 false, 498 GLSL_TYPE_FLOAT); 499 nir_builder_init_simple_shader(&b, NULL, MESA_SHADER_COMPUTE, NULL); 500 b.shader->info.name = ralloc_strdup(b.shader, "meta_btoi_r32g32b32_cs"); 501 b.shader->info.cs.local_size[0] = 16; 502 b.shader->info.cs.local_size[1] = 16; 503 b.shader->info.cs.local_size[2] = 1; 504 nir_variable *input_img = nir_variable_create(b.shader, nir_var_uniform, 505 buf_type, "s_tex"); 506 input_img->data.descriptor_set = 0; 507 input_img->data.binding = 0; 508 509 nir_variable *output_img = nir_variable_create(b.shader, nir_var_uniform, 510 img_type, "out_img"); 511 output_img->data.descriptor_set = 0; 512 output_img->data.binding = 1; 513 514 nir_ssa_def *invoc_id = nir_load_local_invocation_id(&b); 515 nir_ssa_def *wg_id = nir_load_work_group_id(&b); 516 nir_ssa_def *block_size = nir_imm_ivec4(&b, 517 b.shader->info.cs.local_size[0], 518 b.shader->info.cs.local_size[1], 519 b.shader->info.cs.local_size[2], 0); 520 521 nir_ssa_def *global_id = nir_iadd(&b, nir_imul(&b, wg_id, block_size), invoc_id); 522 523 nir_intrinsic_instr *offset = nir_intrinsic_instr_create(b.shader, nir_intrinsic_load_push_constant); 524 nir_intrinsic_set_base(offset, 0); 525 nir_intrinsic_set_range(offset, 16); 526 offset->src[0] = nir_src_for_ssa(nir_imm_int(&b, 0)); 527 offset->num_components = 2; 528 nir_ssa_dest_init(&offset->instr, &offset->dest, 2, 32, "offset"); 529 nir_builder_instr_insert(&b, &offset->instr); 530 531 nir_intrinsic_instr *pitch = nir_intrinsic_instr_create(b.shader, nir_intrinsic_load_push_constant); 532 nir_intrinsic_set_base(pitch, 0); 533 nir_intrinsic_set_range(pitch, 16); 534 pitch->src[0] = nir_src_for_ssa(nir_imm_int(&b, 8)); 535 pitch->num_components = 1; 536 nir_ssa_dest_init(&pitch->instr, &pitch->dest, 1, 32, "pitch"); 537 nir_builder_instr_insert(&b, &pitch->instr); 538 539 nir_intrinsic_instr *stride = nir_intrinsic_instr_create(b.shader, nir_intrinsic_load_push_constant); 540 nir_intrinsic_set_base(stride, 0); 541 nir_intrinsic_set_range(stride, 16); 542 stride->src[0] = nir_src_for_ssa(nir_imm_int(&b, 12)); 543 stride->num_components = 1; 544 nir_ssa_dest_init(&stride->instr, &stride->dest, 1, 32, "stride"); 545 nir_builder_instr_insert(&b, &stride->instr); 546 547 nir_ssa_def *pos_x = nir_channel(&b, global_id, 0); 548 nir_ssa_def *pos_y = nir_channel(&b, global_id, 1); 549 550 nir_ssa_def *tmp = nir_imul(&b, pos_y, &stride->dest.ssa); 551 tmp = nir_iadd(&b, tmp, pos_x); 552 553 nir_ssa_def *buf_coord = nir_vec4(&b, tmp, tmp, tmp, tmp); 554 555 nir_ssa_def *img_coord = nir_iadd(&b, global_id, &offset->dest.ssa); 556 557 nir_ssa_def *global_pos = 558 nir_iadd(&b, 559 nir_imul(&b, nir_channel(&b, img_coord, 1), &pitch->dest.ssa), 560 nir_imul(&b, nir_channel(&b, img_coord, 0), nir_imm_int(&b, 3))); 561 562 nir_ssa_def *input_img_deref = &nir_build_deref_var(&b, input_img)->dest.ssa; 563 564 nir_tex_instr *tex = nir_tex_instr_create(b.shader, 3); 565 tex->sampler_dim = GLSL_SAMPLER_DIM_BUF; 566 tex->op = nir_texop_txf; 567 tex->src[0].src_type = nir_tex_src_coord; 568 tex->src[0].src = nir_src_for_ssa(nir_channels(&b, buf_coord, 1)); 569 tex->src[1].src_type = nir_tex_src_lod; 570 tex->src[1].src = nir_src_for_ssa(nir_imm_int(&b, 0)); 571 tex->src[2].src_type = nir_tex_src_texture_deref; 572 tex->src[2].src = nir_src_for_ssa(input_img_deref); 573 tex->dest_type = nir_type_float; 574 tex->is_array = false; 575 tex->coord_components = 1; 576 nir_ssa_dest_init(&tex->instr, &tex->dest, 4, 32, "tex"); 577 nir_builder_instr_insert(&b, &tex->instr); 578 579 nir_ssa_def *outval = &tex->dest.ssa; 580 581 for (int chan = 0; chan < 3; chan++) { 582 nir_ssa_def *local_pos = 583 nir_iadd(&b, global_pos, nir_imm_int(&b, chan)); 584 585 nir_ssa_def *coord = 586 nir_vec4(&b, local_pos, local_pos, local_pos, local_pos); 587 588 nir_intrinsic_instr *store = nir_intrinsic_instr_create(b.shader, nir_intrinsic_image_deref_store); 589 store->num_components = 1; 590 store->src[0] = nir_src_for_ssa(&nir_build_deref_var(&b, output_img)->dest.ssa); 591 store->src[1] = nir_src_for_ssa(coord); 592 store->src[2] = nir_src_for_ssa(nir_ssa_undef(&b, 1, 32)); 593 store->src[3] = nir_src_for_ssa(nir_channel(&b, outval, chan)); 594 nir_builder_instr_insert(&b, &store->instr); 595 } 596 597 return b.shader; 598} 599 600static VkResult 601radv_device_init_meta_btoi_r32g32b32_state(struct radv_device *device) 602{ 603 VkResult result; 604 struct radv_shader_module cs = { .nir = NULL }; 605 606 cs.nir = build_nir_btoi_r32g32b32_compute_shader(device); 607 608 VkDescriptorSetLayoutCreateInfo ds_create_info = { 609 .sType = VK_STRUCTURE_TYPE_DESCRIPTOR_SET_LAYOUT_CREATE_INFO, 610 .flags = VK_DESCRIPTOR_SET_LAYOUT_CREATE_PUSH_DESCRIPTOR_BIT_KHR, 611 .bindingCount = 2, 612 .pBindings = (VkDescriptorSetLayoutBinding[]) { 613 { 614 .binding = 0, 615 .descriptorType = VK_DESCRIPTOR_TYPE_UNIFORM_TEXEL_BUFFER, 616 .descriptorCount = 1, 617 .stageFlags = VK_SHADER_STAGE_COMPUTE_BIT, 618 .pImmutableSamplers = NULL 619 }, 620 { 621 .binding = 1, 622 .descriptorType = VK_DESCRIPTOR_TYPE_STORAGE_TEXEL_BUFFER, 623 .descriptorCount = 1, 624 .stageFlags = VK_SHADER_STAGE_COMPUTE_BIT, 625 .pImmutableSamplers = NULL 626 }, 627 } 628 }; 629 630 result = radv_CreateDescriptorSetLayout(radv_device_to_handle(device), 631 &ds_create_info, 632 &device->meta_state.alloc, 633 &device->meta_state.btoi_r32g32b32.img_ds_layout); 634 if (result != VK_SUCCESS) 635 goto fail; 636 637 638 VkPipelineLayoutCreateInfo pl_create_info = { 639 .sType = VK_STRUCTURE_TYPE_PIPELINE_LAYOUT_CREATE_INFO, 640 .setLayoutCount = 1, 641 .pSetLayouts = &device->meta_state.btoi_r32g32b32.img_ds_layout, 642 .pushConstantRangeCount = 1, 643 .pPushConstantRanges = &(VkPushConstantRange){VK_SHADER_STAGE_COMPUTE_BIT, 0, 16}, 644 }; 645 646 result = radv_CreatePipelineLayout(radv_device_to_handle(device), 647 &pl_create_info, 648 &device->meta_state.alloc, 649 &device->meta_state.btoi_r32g32b32.img_p_layout); 650 if (result != VK_SUCCESS) 651 goto fail; 652 653 /* compute shader */ 654 655 VkPipelineShaderStageCreateInfo pipeline_shader_stage = { 656 .sType = VK_STRUCTURE_TYPE_PIPELINE_SHADER_STAGE_CREATE_INFO, 657 .stage = VK_SHADER_STAGE_COMPUTE_BIT, 658 .module = radv_shader_module_to_handle(&cs), 659 .pName = "main", 660 .pSpecializationInfo = NULL, 661 }; 662 663 VkComputePipelineCreateInfo vk_pipeline_info = { 664 .sType = VK_STRUCTURE_TYPE_COMPUTE_PIPELINE_CREATE_INFO, 665 .stage = pipeline_shader_stage, 666 .flags = 0, 667 .layout = device->meta_state.btoi_r32g32b32.img_p_layout, 668 }; 669 670 result = radv_CreateComputePipelines(radv_device_to_handle(device), 671 radv_pipeline_cache_to_handle(&device->meta_state.cache), 672 1, &vk_pipeline_info, NULL, 673 &device->meta_state.btoi_r32g32b32.pipeline); 674 675fail: 676 ralloc_free(cs.nir); 677 return result; 678} 679 680static void 681radv_device_finish_meta_btoi_r32g32b32_state(struct radv_device *device) 682{ 683 struct radv_meta_state *state = &device->meta_state; 684 685 radv_DestroyPipelineLayout(radv_device_to_handle(device), 686 state->btoi_r32g32b32.img_p_layout, &state->alloc); 687 radv_DestroyDescriptorSetLayout(radv_device_to_handle(device), 688 state->btoi_r32g32b32.img_ds_layout, 689 &state->alloc); 690 radv_DestroyPipeline(radv_device_to_handle(device), 691 state->btoi_r32g32b32.pipeline, &state->alloc); 692} 693 694static nir_shader * 695build_nir_itoi_compute_shader(struct radv_device *dev, bool is_3d) 696{ 697 nir_builder b; 698 enum glsl_sampler_dim dim = is_3d ? GLSL_SAMPLER_DIM_3D : GLSL_SAMPLER_DIM_2D; 699 const struct glsl_type *buf_type = glsl_sampler_type(dim, 700 false, 701 false, 702 GLSL_TYPE_FLOAT); 703 const struct glsl_type *img_type = glsl_sampler_type(dim, 704 false, 705 false, 706 GLSL_TYPE_FLOAT); 707 nir_builder_init_simple_shader(&b, NULL, MESA_SHADER_COMPUTE, NULL); 708 b.shader->info.name = ralloc_strdup(b.shader, is_3d ? "meta_itoi_cs_3d" : "meta_itoi_cs"); 709 b.shader->info.cs.local_size[0] = 16; 710 b.shader->info.cs.local_size[1] = 16; 711 b.shader->info.cs.local_size[2] = 1; 712 nir_variable *input_img = nir_variable_create(b.shader, nir_var_uniform, 713 buf_type, "s_tex"); 714 input_img->data.descriptor_set = 0; 715 input_img->data.binding = 0; 716 717 nir_variable *output_img = nir_variable_create(b.shader, nir_var_uniform, 718 img_type, "out_img"); 719 output_img->data.descriptor_set = 0; 720 output_img->data.binding = 1; 721 722 nir_ssa_def *invoc_id = nir_load_local_invocation_id(&b); 723 nir_ssa_def *wg_id = nir_load_work_group_id(&b); 724 nir_ssa_def *block_size = nir_imm_ivec4(&b, 725 b.shader->info.cs.local_size[0], 726 b.shader->info.cs.local_size[1], 727 b.shader->info.cs.local_size[2], 0); 728 729 nir_ssa_def *global_id = nir_iadd(&b, nir_imul(&b, wg_id, block_size), invoc_id); 730 731 nir_intrinsic_instr *src_offset = nir_intrinsic_instr_create(b.shader, nir_intrinsic_load_push_constant); 732 nir_intrinsic_set_base(src_offset, 0); 733 nir_intrinsic_set_range(src_offset, 24); 734 src_offset->src[0] = nir_src_for_ssa(nir_imm_int(&b, 0)); 735 src_offset->num_components = is_3d ? 3 : 2; 736 nir_ssa_dest_init(&src_offset->instr, &src_offset->dest, is_3d ? 3 : 2, 32, "src_offset"); 737 nir_builder_instr_insert(&b, &src_offset->instr); 738 739 nir_intrinsic_instr *dst_offset = nir_intrinsic_instr_create(b.shader, nir_intrinsic_load_push_constant); 740 nir_intrinsic_set_base(dst_offset, 0); 741 nir_intrinsic_set_range(dst_offset, 24); 742 dst_offset->src[0] = nir_src_for_ssa(nir_imm_int(&b, 12)); 743 dst_offset->num_components = is_3d ? 3 : 2; 744 nir_ssa_dest_init(&dst_offset->instr, &dst_offset->dest, is_3d ? 3 : 2, 32, "dst_offset"); 745 nir_builder_instr_insert(&b, &dst_offset->instr); 746 747 nir_ssa_def *src_coord = nir_iadd(&b, global_id, &src_offset->dest.ssa); 748 nir_ssa_def *input_img_deref = &nir_build_deref_var(&b, input_img)->dest.ssa; 749 750 nir_ssa_def *dst_coord = nir_iadd(&b, global_id, &dst_offset->dest.ssa); 751 752 nir_tex_instr *tex = nir_tex_instr_create(b.shader, 3); 753 tex->sampler_dim = dim; 754 tex->op = nir_texop_txf; 755 tex->src[0].src_type = nir_tex_src_coord; 756 tex->src[0].src = nir_src_for_ssa(nir_channels(&b, src_coord, is_3d ? 0x7 : 0x3)); 757 tex->src[1].src_type = nir_tex_src_lod; 758 tex->src[1].src = nir_src_for_ssa(nir_imm_int(&b, 0)); 759 tex->src[2].src_type = nir_tex_src_texture_deref; 760 tex->src[2].src = nir_src_for_ssa(input_img_deref); 761 tex->dest_type = nir_type_float; 762 tex->is_array = false; 763 tex->coord_components = is_3d ? 3 : 2; 764 765 nir_ssa_dest_init(&tex->instr, &tex->dest, 4, 32, "tex"); 766 nir_builder_instr_insert(&b, &tex->instr); 767 768 nir_ssa_def *outval = &tex->dest.ssa; 769 nir_intrinsic_instr *store = nir_intrinsic_instr_create(b.shader, nir_intrinsic_image_deref_store); 770 store->num_components = 4; 771 store->src[0] = nir_src_for_ssa(&nir_build_deref_var(&b, output_img)->dest.ssa); 772 store->src[1] = nir_src_for_ssa(dst_coord); 773 store->src[2] = nir_src_for_ssa(nir_ssa_undef(&b, 1, 32)); 774 store->src[3] = nir_src_for_ssa(outval); 775 776 nir_builder_instr_insert(&b, &store->instr); 777 return b.shader; 778} 779 780/* image to image - don't write use image accessors */ 781static VkResult 782radv_device_init_meta_itoi_state(struct radv_device *device) 783{ 784 VkResult result; 785 struct radv_shader_module cs = { .nir = NULL }; 786 struct radv_shader_module cs_3d = { .nir = NULL }; 787 cs.nir = build_nir_itoi_compute_shader(device, false); 788 if (device->physical_device->rad_info.chip_class >= GFX9) 789 cs_3d.nir = build_nir_itoi_compute_shader(device, true); 790 /* 791 * two descriptors one for the image being sampled 792 * one for the buffer being written. 793 */ 794 VkDescriptorSetLayoutCreateInfo ds_create_info = { 795 .sType = VK_STRUCTURE_TYPE_DESCRIPTOR_SET_LAYOUT_CREATE_INFO, 796 .flags = VK_DESCRIPTOR_SET_LAYOUT_CREATE_PUSH_DESCRIPTOR_BIT_KHR, 797 .bindingCount = 2, 798 .pBindings = (VkDescriptorSetLayoutBinding[]) { 799 { 800 .binding = 0, 801 .descriptorType = VK_DESCRIPTOR_TYPE_SAMPLED_IMAGE, 802 .descriptorCount = 1, 803 .stageFlags = VK_SHADER_STAGE_COMPUTE_BIT, 804 .pImmutableSamplers = NULL 805 }, 806 { 807 .binding = 1, 808 .descriptorType = VK_DESCRIPTOR_TYPE_STORAGE_IMAGE, 809 .descriptorCount = 1, 810 .stageFlags = VK_SHADER_STAGE_COMPUTE_BIT, 811 .pImmutableSamplers = NULL 812 }, 813 } 814 }; 815 816 result = radv_CreateDescriptorSetLayout(radv_device_to_handle(device), 817 &ds_create_info, 818 &device->meta_state.alloc, 819 &device->meta_state.itoi.img_ds_layout); 820 if (result != VK_SUCCESS) 821 goto fail; 822 823 824 VkPipelineLayoutCreateInfo pl_create_info = { 825 .sType = VK_STRUCTURE_TYPE_PIPELINE_LAYOUT_CREATE_INFO, 826 .setLayoutCount = 1, 827 .pSetLayouts = &device->meta_state.itoi.img_ds_layout, 828 .pushConstantRangeCount = 1, 829 .pPushConstantRanges = &(VkPushConstantRange){VK_SHADER_STAGE_COMPUTE_BIT, 0, 24}, 830 }; 831 832 result = radv_CreatePipelineLayout(radv_device_to_handle(device), 833 &pl_create_info, 834 &device->meta_state.alloc, 835 &device->meta_state.itoi.img_p_layout); 836 if (result != VK_SUCCESS) 837 goto fail; 838 839 /* compute shader */ 840 841 VkPipelineShaderStageCreateInfo pipeline_shader_stage = { 842 .sType = VK_STRUCTURE_TYPE_PIPELINE_SHADER_STAGE_CREATE_INFO, 843 .stage = VK_SHADER_STAGE_COMPUTE_BIT, 844 .module = radv_shader_module_to_handle(&cs), 845 .pName = "main", 846 .pSpecializationInfo = NULL, 847 }; 848 849 VkComputePipelineCreateInfo vk_pipeline_info = { 850 .sType = VK_STRUCTURE_TYPE_COMPUTE_PIPELINE_CREATE_INFO, 851 .stage = pipeline_shader_stage, 852 .flags = 0, 853 .layout = device->meta_state.itoi.img_p_layout, 854 }; 855 856 result = radv_CreateComputePipelines(radv_device_to_handle(device), 857 radv_pipeline_cache_to_handle(&device->meta_state.cache), 858 1, &vk_pipeline_info, NULL, 859 &device->meta_state.itoi.pipeline); 860 if (result != VK_SUCCESS) 861 goto fail; 862 863 if (device->physical_device->rad_info.chip_class >= GFX9) { 864 VkPipelineShaderStageCreateInfo pipeline_shader_stage_3d = { 865 .sType = VK_STRUCTURE_TYPE_PIPELINE_SHADER_STAGE_CREATE_INFO, 866.stage = VK_SHADER_STAGE_COMPUTE_BIT, 867 .module = radv_shader_module_to_handle(&cs_3d), 868 .pName = "main", 869 .pSpecializationInfo = NULL, 870 }; 871 872 VkComputePipelineCreateInfo vk_pipeline_info_3d = { 873 .sType = VK_STRUCTURE_TYPE_COMPUTE_PIPELINE_CREATE_INFO, 874 .stage = pipeline_shader_stage_3d, 875 .flags = 0, 876 .layout = device->meta_state.itoi.img_p_layout, 877 }; 878 879 result = radv_CreateComputePipelines(radv_device_to_handle(device), 880 radv_pipeline_cache_to_handle(&device->meta_state.cache), 881 1, &vk_pipeline_info_3d, NULL, 882 &device->meta_state.itoi.pipeline_3d); 883 884 ralloc_free(cs_3d.nir); 885 } 886 ralloc_free(cs.nir); 887 888 return VK_SUCCESS; 889fail: 890 ralloc_free(cs.nir); 891 ralloc_free(cs_3d.nir); 892 return result; 893} 894 895static void 896radv_device_finish_meta_itoi_state(struct radv_device *device) 897{ 898 struct radv_meta_state *state = &device->meta_state; 899 900 radv_DestroyPipelineLayout(radv_device_to_handle(device), 901 state->itoi.img_p_layout, &state->alloc); 902 radv_DestroyDescriptorSetLayout(radv_device_to_handle(device), 903 state->itoi.img_ds_layout, 904 &state->alloc); 905 radv_DestroyPipeline(radv_device_to_handle(device), 906 state->itoi.pipeline, &state->alloc); 907 if (device->physical_device->rad_info.chip_class >= GFX9) 908 radv_DestroyPipeline(radv_device_to_handle(device), 909 state->itoi.pipeline_3d, &state->alloc); 910} 911 912static nir_shader * 913build_nir_itoi_r32g32b32_compute_shader(struct radv_device *dev) 914{ 915 nir_builder b; 916 const struct glsl_type *type = glsl_sampler_type(GLSL_SAMPLER_DIM_BUF, 917 false, 918 false, 919 GLSL_TYPE_FLOAT); 920 nir_builder_init_simple_shader(&b, NULL, MESA_SHADER_COMPUTE, NULL); 921 b.shader->info.name = ralloc_strdup(b.shader, "meta_itoi_r32g32b32_cs"); 922 b.shader->info.cs.local_size[0] = 16; 923 b.shader->info.cs.local_size[1] = 16; 924 b.shader->info.cs.local_size[2] = 1; 925 nir_variable *input_img = nir_variable_create(b.shader, nir_var_uniform, 926 type, "input_img"); 927 input_img->data.descriptor_set = 0; 928 input_img->data.binding = 0; 929 930 nir_variable *output_img = nir_variable_create(b.shader, nir_var_uniform, 931 type, "output_img"); 932 output_img->data.descriptor_set = 0; 933 output_img->data.binding = 1; 934 935 nir_ssa_def *invoc_id = nir_load_local_invocation_id(&b); 936 nir_ssa_def *wg_id = nir_load_work_group_id(&b); 937 nir_ssa_def *block_size = nir_imm_ivec4(&b, 938 b.shader->info.cs.local_size[0], 939 b.shader->info.cs.local_size[1], 940 b.shader->info.cs.local_size[2], 0); 941 942 nir_ssa_def *global_id = nir_iadd(&b, nir_imul(&b, wg_id, block_size), invoc_id); 943 944 nir_intrinsic_instr *src_offset = nir_intrinsic_instr_create(b.shader, nir_intrinsic_load_push_constant); 945 nir_intrinsic_set_base(src_offset, 0); 946 nir_intrinsic_set_range(src_offset, 24); 947 src_offset->src[0] = nir_src_for_ssa(nir_imm_int(&b, 0)); 948 src_offset->num_components = 3; 949 nir_ssa_dest_init(&src_offset->instr, &src_offset->dest, 3, 32, "src_offset"); 950 nir_builder_instr_insert(&b, &src_offset->instr); 951 952 nir_ssa_def *src_stride = nir_channel(&b, &src_offset->dest.ssa, 2); 953 954 nir_intrinsic_instr *dst_offset = nir_intrinsic_instr_create(b.shader, nir_intrinsic_load_push_constant); 955 nir_intrinsic_set_base(dst_offset, 0); 956 nir_intrinsic_set_range(dst_offset, 24); 957 dst_offset->src[0] = nir_src_for_ssa(nir_imm_int(&b, 12)); 958 dst_offset->num_components = 3; 959 nir_ssa_dest_init(&dst_offset->instr, &dst_offset->dest, 3, 32, "dst_offset"); 960 nir_builder_instr_insert(&b, &dst_offset->instr); 961 962 nir_ssa_def *dst_stride = nir_channel(&b, &dst_offset->dest.ssa, 2); 963 964 nir_ssa_def *src_img_coord = nir_iadd(&b, global_id, &src_offset->dest.ssa); 965 nir_ssa_def *dst_img_coord = nir_iadd(&b, global_id, &dst_offset->dest.ssa); 966 967 nir_ssa_def *src_global_pos = 968 nir_iadd(&b, 969 nir_imul(&b, nir_channel(&b, src_img_coord, 1), src_stride), 970 nir_imul(&b, nir_channel(&b, src_img_coord, 0), nir_imm_int(&b, 3))); 971 972 nir_ssa_def *dst_global_pos = 973 nir_iadd(&b, 974 nir_imul(&b, nir_channel(&b, dst_img_coord, 1), dst_stride), 975 nir_imul(&b, nir_channel(&b, dst_img_coord, 0), nir_imm_int(&b, 3))); 976 977 for (int chan = 0; chan < 3; chan++) { 978 /* src */ 979 nir_ssa_def *src_local_pos = 980 nir_iadd(&b, src_global_pos, nir_imm_int(&b, chan)); 981 982 nir_ssa_def *src_coord = 983 nir_vec4(&b, src_local_pos, src_local_pos, 984 src_local_pos, src_local_pos); 985 986 nir_ssa_def *input_img_deref = &nir_build_deref_var(&b, input_img)->dest.ssa; 987 988 nir_tex_instr *tex = nir_tex_instr_create(b.shader, 3); 989 tex->sampler_dim = GLSL_SAMPLER_DIM_BUF; 990 tex->op = nir_texop_txf; 991 tex->src[0].src_type = nir_tex_src_coord; 992 tex->src[0].src = nir_src_for_ssa(nir_channels(&b, src_coord, 1)); 993 tex->src[1].src_type = nir_tex_src_lod; 994 tex->src[1].src = nir_src_for_ssa(nir_imm_int(&b, 0)); 995 tex->src[2].src_type = nir_tex_src_texture_deref; 996 tex->src[2].src = nir_src_for_ssa(input_img_deref); 997 tex->dest_type = nir_type_float; 998 tex->is_array = false; 999 tex->coord_components = 1; 1000 nir_ssa_dest_init(&tex->instr, &tex->dest, 4, 32, "tex"); 1001 nir_builder_instr_insert(&b, &tex->instr); 1002 1003 nir_ssa_def *outval = &tex->dest.ssa; 1004 1005 /* dst */ 1006 nir_ssa_def *dst_local_pos = 1007 nir_iadd(&b, dst_global_pos, nir_imm_int(&b, chan)); 1008 1009 nir_ssa_def *dst_coord = 1010 nir_vec4(&b, dst_local_pos, dst_local_pos, 1011 dst_local_pos, dst_local_pos); 1012 1013 nir_intrinsic_instr *store = 1014 nir_intrinsic_instr_create(b.shader, 1015 nir_intrinsic_image_deref_store); 1016 store->num_components = 1; 1017 store->src[0] = nir_src_for_ssa(&nir_build_deref_var(&b, output_img)->dest.ssa); 1018 store->src[1] = nir_src_for_ssa(dst_coord); 1019 store->src[2] = nir_src_for_ssa(nir_ssa_undef(&b, 1, 32)); 1020 store->src[3] = nir_src_for_ssa(nir_channel(&b, outval, 0)); 1021 nir_builder_instr_insert(&b, &store->instr); 1022 } 1023 1024 return b.shader; 1025} 1026 1027/* Image to image - special path for R32G32B32 */ 1028static VkResult 1029radv_device_init_meta_itoi_r32g32b32_state(struct radv_device *device) 1030{ 1031 VkResult result; 1032 struct radv_shader_module cs = { .nir = NULL }; 1033 1034 cs.nir = build_nir_itoi_r32g32b32_compute_shader(device); 1035 1036 VkDescriptorSetLayoutCreateInfo ds_create_info = { 1037 .sType = VK_STRUCTURE_TYPE_DESCRIPTOR_SET_LAYOUT_CREATE_INFO, 1038 .flags = VK_DESCRIPTOR_SET_LAYOUT_CREATE_PUSH_DESCRIPTOR_BIT_KHR, 1039 .bindingCount = 2, 1040 .pBindings = (VkDescriptorSetLayoutBinding[]) { 1041 { 1042 .binding = 0, 1043 .descriptorType = VK_DESCRIPTOR_TYPE_UNIFORM_TEXEL_BUFFER, 1044 .descriptorCount = 1, 1045 .stageFlags = VK_SHADER_STAGE_COMPUTE_BIT, 1046 .pImmutableSamplers = NULL 1047 }, 1048 { 1049 .binding = 1, 1050 .descriptorType = VK_DESCRIPTOR_TYPE_STORAGE_TEXEL_BUFFER, 1051 .descriptorCount = 1, 1052 .stageFlags = VK_SHADER_STAGE_COMPUTE_BIT, 1053 .pImmutableSamplers = NULL 1054 }, 1055 } 1056 }; 1057 1058 result = radv_CreateDescriptorSetLayout(radv_device_to_handle(device), 1059 &ds_create_info, 1060 &device->meta_state.alloc, 1061 &device->meta_state.itoi_r32g32b32.img_ds_layout); 1062 if (result != VK_SUCCESS) 1063 goto fail; 1064 1065 1066 VkPipelineLayoutCreateInfo pl_create_info = { 1067 .sType = VK_STRUCTURE_TYPE_PIPELINE_LAYOUT_CREATE_INFO, 1068 .setLayoutCount = 1, 1069 .pSetLayouts = &device->meta_state.itoi_r32g32b32.img_ds_layout, 1070 .pushConstantRangeCount = 1, 1071 .pPushConstantRanges = &(VkPushConstantRange){VK_SHADER_STAGE_COMPUTE_BIT, 0, 24}, 1072 }; 1073 1074 result = radv_CreatePipelineLayout(radv_device_to_handle(device), 1075 &pl_create_info, 1076 &device->meta_state.alloc, 1077 &device->meta_state.itoi_r32g32b32.img_p_layout); 1078 if (result != VK_SUCCESS) 1079 goto fail; 1080 1081 /* compute shader */ 1082 1083 VkPipelineShaderStageCreateInfo pipeline_shader_stage = { 1084 .sType = VK_STRUCTURE_TYPE_PIPELINE_SHADER_STAGE_CREATE_INFO, 1085 .stage = VK_SHADER_STAGE_COMPUTE_BIT, 1086 .module = radv_shader_module_to_handle(&cs), 1087 .pName = "main", 1088 .pSpecializationInfo = NULL, 1089 }; 1090 1091 VkComputePipelineCreateInfo vk_pipeline_info = { 1092 .sType = VK_STRUCTURE_TYPE_COMPUTE_PIPELINE_CREATE_INFO, 1093 .stage = pipeline_shader_stage, 1094 .flags = 0, 1095 .layout = device->meta_state.itoi_r32g32b32.img_p_layout, 1096 }; 1097 1098 result = radv_CreateComputePipelines(radv_device_to_handle(device), 1099 radv_pipeline_cache_to_handle(&device->meta_state.cache), 1100 1, &vk_pipeline_info, NULL, 1101 &device->meta_state.itoi_r32g32b32.pipeline); 1102 1103fail: 1104 ralloc_free(cs.nir); 1105 return result; 1106} 1107 1108static void 1109radv_device_finish_meta_itoi_r32g32b32_state(struct radv_device *device) 1110{ 1111 struct radv_meta_state *state = &device->meta_state; 1112 1113 radv_DestroyPipelineLayout(radv_device_to_handle(device), 1114 state->itoi_r32g32b32.img_p_layout, &state->alloc); 1115 radv_DestroyDescriptorSetLayout(radv_device_to_handle(device), 1116 state->itoi_r32g32b32.img_ds_layout, 1117 &state->alloc); 1118 radv_DestroyPipeline(radv_device_to_handle(device), 1119 state->itoi_r32g32b32.pipeline, &state->alloc); 1120} 1121 1122static nir_shader * 1123build_nir_cleari_compute_shader(struct radv_device *dev, bool is_3d) 1124{ 1125 nir_builder b; 1126 enum glsl_sampler_dim dim = is_3d ? GLSL_SAMPLER_DIM_3D : GLSL_SAMPLER_DIM_2D; 1127 const struct glsl_type *img_type = glsl_sampler_type(dim, 1128 false, 1129 false, 1130 GLSL_TYPE_FLOAT); 1131 nir_builder_init_simple_shader(&b, NULL, MESA_SHADER_COMPUTE, NULL); 1132 b.shader->info.name = ralloc_strdup(b.shader, is_3d ? "meta_cleari_cs_3d" : "meta_cleari_cs"); 1133 b.shader->info.cs.local_size[0] = 16; 1134 b.shader->info.cs.local_size[1] = 16; 1135 b.shader->info.cs.local_size[2] = 1; 1136 1137 nir_variable *output_img = nir_variable_create(b.shader, nir_var_uniform, 1138 img_type, "out_img"); 1139 output_img->data.descriptor_set = 0; 1140 output_img->data.binding = 0; 1141 1142 nir_ssa_def *invoc_id = nir_load_local_invocation_id(&b); 1143 nir_ssa_def *wg_id = nir_load_work_group_id(&b); 1144 nir_ssa_def *block_size = nir_imm_ivec4(&b, 1145 b.shader->info.cs.local_size[0], 1146 b.shader->info.cs.local_size[1], 1147 b.shader->info.cs.local_size[2], 0); 1148 1149 nir_ssa_def *global_id = nir_iadd(&b, nir_imul(&b, wg_id, block_size), invoc_id); 1150 1151 nir_intrinsic_instr *clear_val = nir_intrinsic_instr_create(b.shader, nir_intrinsic_load_push_constant); 1152 nir_intrinsic_set_base(clear_val, 0); 1153 nir_intrinsic_set_range(clear_val, 20); 1154 clear_val->src[0] = nir_src_for_ssa(nir_imm_int(&b, 0)); 1155 clear_val->num_components = 4; 1156 nir_ssa_dest_init(&clear_val->instr, &clear_val->dest, 4, 32, "clear_value"); 1157 nir_builder_instr_insert(&b, &clear_val->instr); 1158 1159 nir_intrinsic_instr *layer = nir_intrinsic_instr_create(b.shader, nir_intrinsic_load_push_constant); 1160 nir_intrinsic_set_base(layer, 0); 1161 nir_intrinsic_set_range(layer, 20); 1162 layer->src[0] = nir_src_for_ssa(nir_imm_int(&b, 16)); 1163 layer->num_components = 1; 1164 nir_ssa_dest_init(&layer->instr, &layer->dest, 1, 32, "layer"); 1165 nir_builder_instr_insert(&b, &layer->instr); 1166 1167 nir_ssa_def *global_z = nir_iadd(&b, nir_channel(&b, global_id, 2), &layer->dest.ssa); 1168 1169 nir_ssa_def *comps[4]; 1170 comps[0] = nir_channel(&b, global_id, 0); 1171 comps[1] = nir_channel(&b, global_id, 1); 1172 comps[2] = global_z; 1173 comps[3] = nir_imm_int(&b, 0); 1174 global_id = nir_vec(&b, comps, 4); 1175 1176 nir_intrinsic_instr *store = nir_intrinsic_instr_create(b.shader, nir_intrinsic_image_deref_store); 1177 store->num_components = 4; 1178 store->src[0] = nir_src_for_ssa(&nir_build_deref_var(&b, output_img)->dest.ssa); 1179 store->src[1] = nir_src_for_ssa(global_id); 1180 store->src[2] = nir_src_for_ssa(nir_ssa_undef(&b, 1, 32)); 1181 store->src[3] = nir_src_for_ssa(&clear_val->dest.ssa); 1182 1183 nir_builder_instr_insert(&b, &store->instr); 1184 return b.shader; 1185} 1186 1187static VkResult 1188radv_device_init_meta_cleari_state(struct radv_device *device) 1189{ 1190 VkResult result; 1191 struct radv_shader_module cs = { .nir = NULL }; 1192 struct radv_shader_module cs_3d = { .nir = NULL }; 1193 cs.nir = build_nir_cleari_compute_shader(device, false); 1194 if (device->physical_device->rad_info.chip_class >= GFX9) 1195 cs_3d.nir = build_nir_cleari_compute_shader(device, true); 1196 1197 /* 1198 * two descriptors one for the image being sampled 1199 * one for the buffer being written. 1200 */ 1201 VkDescriptorSetLayoutCreateInfo ds_create_info = { 1202 .sType = VK_STRUCTURE_TYPE_DESCRIPTOR_SET_LAYOUT_CREATE_INFO, 1203 .flags = VK_DESCRIPTOR_SET_LAYOUT_CREATE_PUSH_DESCRIPTOR_BIT_KHR, 1204 .bindingCount = 1, 1205 .pBindings = (VkDescriptorSetLayoutBinding[]) { 1206 { 1207 .binding = 0, 1208 .descriptorType = VK_DESCRIPTOR_TYPE_STORAGE_IMAGE, 1209 .descriptorCount = 1, 1210 .stageFlags = VK_SHADER_STAGE_COMPUTE_BIT, 1211 .pImmutableSamplers = NULL 1212 }, 1213 } 1214 }; 1215 1216 result = radv_CreateDescriptorSetLayout(radv_device_to_handle(device), 1217 &ds_create_info, 1218 &device->meta_state.alloc, 1219 &device->meta_state.cleari.img_ds_layout); 1220 if (result != VK_SUCCESS) 1221 goto fail; 1222 1223 1224 VkPipelineLayoutCreateInfo pl_create_info = { 1225 .sType = VK_STRUCTURE_TYPE_PIPELINE_LAYOUT_CREATE_INFO, 1226 .setLayoutCount = 1, 1227 .pSetLayouts = &device->meta_state.cleari.img_ds_layout, 1228 .pushConstantRangeCount = 1, 1229 .pPushConstantRanges = &(VkPushConstantRange){VK_SHADER_STAGE_COMPUTE_BIT, 0, 20}, 1230 }; 1231 1232 result = radv_CreatePipelineLayout(radv_device_to_handle(device), 1233 &pl_create_info, 1234 &device->meta_state.alloc, 1235 &device->meta_state.cleari.img_p_layout); 1236 if (result != VK_SUCCESS) 1237 goto fail; 1238 1239 /* compute shader */ 1240 1241 VkPipelineShaderStageCreateInfo pipeline_shader_stage = { 1242 .sType = VK_STRUCTURE_TYPE_PIPELINE_SHADER_STAGE_CREATE_INFO, 1243 .stage = VK_SHADER_STAGE_COMPUTE_BIT, 1244 .module = radv_shader_module_to_handle(&cs), 1245 .pName = "main", 1246 .pSpecializationInfo = NULL, 1247 }; 1248 1249 VkComputePipelineCreateInfo vk_pipeline_info = { 1250 .sType = VK_STRUCTURE_TYPE_COMPUTE_PIPELINE_CREATE_INFO, 1251 .stage = pipeline_shader_stage, 1252 .flags = 0, 1253 .layout = device->meta_state.cleari.img_p_layout, 1254 }; 1255 1256 result = radv_CreateComputePipelines(radv_device_to_handle(device), 1257 radv_pipeline_cache_to_handle(&device->meta_state.cache), 1258 1, &vk_pipeline_info, NULL, 1259 &device->meta_state.cleari.pipeline); 1260 if (result != VK_SUCCESS) 1261 goto fail; 1262 1263 1264 if (device->physical_device->rad_info.chip_class >= GFX9) { 1265 /* compute shader */ 1266 VkPipelineShaderStageCreateInfo pipeline_shader_stage_3d = { 1267 .sType = VK_STRUCTURE_TYPE_PIPELINE_SHADER_STAGE_CREATE_INFO, 1268 .stage = VK_SHADER_STAGE_COMPUTE_BIT, 1269 .module = radv_shader_module_to_handle(&cs_3d), 1270 .pName = "main", 1271 .pSpecializationInfo = NULL, 1272 }; 1273 1274 VkComputePipelineCreateInfo vk_pipeline_info_3d = { 1275 .sType = VK_STRUCTURE_TYPE_COMPUTE_PIPELINE_CREATE_INFO, 1276 .stage = pipeline_shader_stage_3d, 1277 .flags = 0, 1278 .layout = device->meta_state.cleari.img_p_layout, 1279 }; 1280 1281 result = radv_CreateComputePipelines(radv_device_to_handle(device), 1282 radv_pipeline_cache_to_handle(&device->meta_state.cache), 1283 1, &vk_pipeline_info_3d, NULL, 1284 &device->meta_state.cleari.pipeline_3d); 1285 if (result != VK_SUCCESS) 1286 goto fail; 1287 1288 ralloc_free(cs_3d.nir); 1289 } 1290 ralloc_free(cs.nir); 1291 return VK_SUCCESS; 1292fail: 1293 ralloc_free(cs.nir); 1294 ralloc_free(cs_3d.nir); 1295 return result; 1296} 1297 1298static void 1299radv_device_finish_meta_cleari_state(struct radv_device *device) 1300{ 1301 struct radv_meta_state *state = &device->meta_state; 1302 1303 radv_DestroyPipelineLayout(radv_device_to_handle(device), 1304 state->cleari.img_p_layout, &state->alloc); 1305 radv_DestroyDescriptorSetLayout(radv_device_to_handle(device), 1306 state->cleari.img_ds_layout, 1307 &state->alloc); 1308 radv_DestroyPipeline(radv_device_to_handle(device), 1309 state->cleari.pipeline, &state->alloc); 1310 radv_DestroyPipeline(radv_device_to_handle(device), 1311 state->cleari.pipeline_3d, &state->alloc); 1312} 1313 1314/* Special path for clearing R32G32B32 images using a compute shader. */ 1315static nir_shader * 1316build_nir_cleari_r32g32b32_compute_shader(struct radv_device *dev) 1317{ 1318 nir_builder b; 1319 const struct glsl_type *img_type = glsl_sampler_type(GLSL_SAMPLER_DIM_BUF, 1320 false, 1321 false, 1322 GLSL_TYPE_FLOAT); 1323 nir_builder_init_simple_shader(&b, NULL, MESA_SHADER_COMPUTE, NULL); 1324 b.shader->info.name = ralloc_strdup(b.shader, "meta_cleari_r32g32b32_cs"); 1325 b.shader->info.cs.local_size[0] = 16; 1326 b.shader->info.cs.local_size[1] = 16; 1327 b.shader->info.cs.local_size[2] = 1; 1328 1329 nir_variable *output_img = nir_variable_create(b.shader, nir_var_uniform, 1330 img_type, "out_img"); 1331 output_img->data.descriptor_set = 0; 1332 output_img->data.binding = 0; 1333 1334 nir_ssa_def *invoc_id = nir_load_local_invocation_id(&b); 1335 nir_ssa_def *wg_id = nir_load_work_group_id(&b); 1336 nir_ssa_def *block_size = nir_imm_ivec4(&b, 1337 b.shader->info.cs.local_size[0], 1338 b.shader->info.cs.local_size[1], 1339 b.shader->info.cs.local_size[2], 0); 1340 1341 nir_ssa_def *global_id = nir_iadd(&b, nir_imul(&b, wg_id, block_size), invoc_id); 1342 1343 nir_intrinsic_instr *clear_val = nir_intrinsic_instr_create(b.shader, nir_intrinsic_load_push_constant); 1344 nir_intrinsic_set_base(clear_val, 0); 1345 nir_intrinsic_set_range(clear_val, 16); 1346 clear_val->src[0] = nir_src_for_ssa(nir_imm_int(&b, 0)); 1347 clear_val->num_components = 3; 1348 nir_ssa_dest_init(&clear_val->instr, &clear_val->dest, 3, 32, "clear_value"); 1349 nir_builder_instr_insert(&b, &clear_val->instr); 1350 1351 nir_intrinsic_instr *stride = nir_intrinsic_instr_create(b.shader, nir_intrinsic_load_push_constant); 1352 nir_intrinsic_set_base(stride, 0); 1353 nir_intrinsic_set_range(stride, 16); 1354 stride->src[0] = nir_src_for_ssa(nir_imm_int(&b, 12)); 1355 stride->num_components = 1; 1356 nir_ssa_dest_init(&stride->instr, &stride->dest, 1, 32, "stride"); 1357 nir_builder_instr_insert(&b, &stride->instr); 1358 1359 nir_ssa_def *global_x = nir_channel(&b, global_id, 0); 1360 nir_ssa_def *global_y = nir_channel(&b, global_id, 1); 1361 1362 nir_ssa_def *global_pos = 1363 nir_iadd(&b, 1364 nir_imul(&b, global_y, &stride->dest.ssa), 1365 nir_imul(&b, global_x, nir_imm_int(&b, 3))); 1366 1367 for (unsigned chan = 0; chan < 3; chan++) { 1368 nir_ssa_def *local_pos = 1369 nir_iadd(&b, global_pos, nir_imm_int(&b, chan)); 1370 1371 nir_ssa_def *coord = 1372 nir_vec4(&b, local_pos, local_pos, local_pos, local_pos); 1373 1374 nir_intrinsic_instr *store = nir_intrinsic_instr_create(b.shader, nir_intrinsic_image_deref_store); 1375 store->num_components = 1; 1376 store->src[0] = nir_src_for_ssa(&nir_build_deref_var(&b, output_img)->dest.ssa); 1377 store->src[1] = nir_src_for_ssa(coord); 1378 store->src[2] = nir_src_for_ssa(nir_ssa_undef(&b, 1, 32)); 1379 store->src[3] = nir_src_for_ssa(nir_channel(&b, &clear_val->dest.ssa, chan)); 1380 nir_builder_instr_insert(&b, &store->instr); 1381 } 1382 1383 return b.shader; 1384} 1385 1386static VkResult 1387radv_device_init_meta_cleari_r32g32b32_state(struct radv_device *device) 1388{ 1389 VkResult result; 1390 struct radv_shader_module cs = { .nir = NULL }; 1391 1392 cs.nir = build_nir_cleari_r32g32b32_compute_shader(device); 1393 1394 VkDescriptorSetLayoutCreateInfo ds_create_info = { 1395 .sType = VK_STRUCTURE_TYPE_DESCRIPTOR_SET_LAYOUT_CREATE_INFO, 1396 .flags = VK_DESCRIPTOR_SET_LAYOUT_CREATE_PUSH_DESCRIPTOR_BIT_KHR, 1397 .bindingCount = 1, 1398 .pBindings = (VkDescriptorSetLayoutBinding[]) { 1399 { 1400 .binding = 0, 1401 .descriptorType = VK_DESCRIPTOR_TYPE_STORAGE_TEXEL_BUFFER, 1402 .descriptorCount = 1, 1403 .stageFlags = VK_SHADER_STAGE_COMPUTE_BIT, 1404 .pImmutableSamplers = NULL 1405 }, 1406 } 1407 }; 1408 1409 result = radv_CreateDescriptorSetLayout(radv_device_to_handle(device), 1410 &ds_create_info, 1411 &device->meta_state.alloc, 1412 &device->meta_state.cleari_r32g32b32.img_ds_layout); 1413 if (result != VK_SUCCESS) 1414 goto fail; 1415 1416 VkPipelineLayoutCreateInfo pl_create_info = { 1417 .sType = VK_STRUCTURE_TYPE_PIPELINE_LAYOUT_CREATE_INFO, 1418 .setLayoutCount = 1, 1419 .pSetLayouts = &device->meta_state.cleari_r32g32b32.img_ds_layout, 1420 .pushConstantRangeCount = 1, 1421 .pPushConstantRanges = &(VkPushConstantRange){VK_SHADER_STAGE_COMPUTE_BIT, 0, 16}, 1422 }; 1423 1424 result = radv_CreatePipelineLayout(radv_device_to_handle(device), 1425 &pl_create_info, 1426 &device->meta_state.alloc, 1427 &device->meta_state.cleari_r32g32b32.img_p_layout); 1428 if (result != VK_SUCCESS) 1429 goto fail; 1430 1431 /* compute shader */ 1432 VkPipelineShaderStageCreateInfo pipeline_shader_stage = { 1433 .sType = VK_STRUCTURE_TYPE_PIPELINE_SHADER_STAGE_CREATE_INFO, 1434 .stage = VK_SHADER_STAGE_COMPUTE_BIT, 1435 .module = radv_shader_module_to_handle(&cs), 1436 .pName = "main", 1437 .pSpecializationInfo = NULL, 1438 }; 1439 1440 VkComputePipelineCreateInfo vk_pipeline_info = { 1441 .sType = VK_STRUCTURE_TYPE_COMPUTE_PIPELINE_CREATE_INFO, 1442 .stage = pipeline_shader_stage, 1443 .flags = 0, 1444 .layout = device->meta_state.cleari_r32g32b32.img_p_layout, 1445 }; 1446 1447 result = radv_CreateComputePipelines(radv_device_to_handle(device), 1448 radv_pipeline_cache_to_handle(&device->meta_state.cache), 1449 1, &vk_pipeline_info, NULL, 1450 &device->meta_state.cleari_r32g32b32.pipeline); 1451 1452fail: 1453 ralloc_free(cs.nir); 1454 return result; 1455} 1456 1457static void 1458radv_device_finish_meta_cleari_r32g32b32_state(struct radv_device *device) 1459{ 1460 struct radv_meta_state *state = &device->meta_state; 1461 1462 radv_DestroyPipelineLayout(radv_device_to_handle(device), 1463 state->cleari_r32g32b32.img_p_layout, 1464 &state->alloc); 1465 radv_DestroyDescriptorSetLayout(radv_device_to_handle(device), 1466 state->cleari_r32g32b32.img_ds_layout, 1467 &state->alloc); 1468 radv_DestroyPipeline(radv_device_to_handle(device), 1469 state->cleari_r32g32b32.pipeline, &state->alloc); 1470} 1471 1472void 1473radv_device_finish_meta_bufimage_state(struct radv_device *device) 1474{ 1475 radv_device_finish_meta_itob_state(device); 1476 radv_device_finish_meta_btoi_state(device); 1477 radv_device_finish_meta_btoi_r32g32b32_state(device); 1478 radv_device_finish_meta_itoi_state(device); 1479 radv_device_finish_meta_itoi_r32g32b32_state(device); 1480 radv_device_finish_meta_cleari_state(device); 1481 radv_device_finish_meta_cleari_r32g32b32_state(device); 1482} 1483 1484VkResult 1485radv_device_init_meta_bufimage_state(struct radv_device *device) 1486{ 1487 VkResult result; 1488 1489 result = radv_device_init_meta_itob_state(device); 1490 if (result != VK_SUCCESS) 1491 goto fail_itob; 1492 1493 result = radv_device_init_meta_btoi_state(device); 1494 if (result != VK_SUCCESS) 1495 goto fail_btoi; 1496 1497 result = radv_device_init_meta_btoi_r32g32b32_state(device); 1498 if (result != VK_SUCCESS) 1499 goto fail_btoi_r32g32b32; 1500 1501 result = radv_device_init_meta_itoi_state(device); 1502 if (result != VK_SUCCESS) 1503 goto fail_itoi; 1504 1505 result = radv_device_init_meta_itoi_r32g32b32_state(device); 1506 if (result != VK_SUCCESS) 1507 goto fail_itoi_r32g32b32; 1508 1509 result = radv_device_init_meta_cleari_state(device); 1510 if (result != VK_SUCCESS) 1511 goto fail_cleari; 1512 1513 result = radv_device_init_meta_cleari_r32g32b32_state(device); 1514 if (result != VK_SUCCESS) 1515 goto fail_cleari_r32g32b32; 1516 1517 return VK_SUCCESS; 1518fail_cleari_r32g32b32: 1519 radv_device_finish_meta_cleari_r32g32b32_state(device); 1520fail_cleari: 1521 radv_device_finish_meta_cleari_state(device); 1522fail_itoi_r32g32b32: 1523 radv_device_finish_meta_itoi_r32g32b32_state(device); 1524fail_itoi: 1525 radv_device_finish_meta_itoi_state(device); 1526fail_btoi_r32g32b32: 1527 radv_device_finish_meta_btoi_r32g32b32_state(device); 1528fail_btoi: 1529 radv_device_finish_meta_btoi_state(device); 1530fail_itob: 1531 radv_device_finish_meta_itob_state(device); 1532 return result; 1533} 1534 1535static void 1536create_iview(struct radv_cmd_buffer *cmd_buffer, 1537 struct radv_meta_blit2d_surf *surf, 1538 struct radv_image_view *iview) 1539{ 1540 VkImageViewType view_type = cmd_buffer->device->physical_device->rad_info.chip_class < GFX9 ? VK_IMAGE_VIEW_TYPE_2D : 1541 radv_meta_get_view_type(surf->image); 1542 radv_image_view_init(iview, cmd_buffer->device, 1543 &(VkImageViewCreateInfo) { 1544 .sType = VK_STRUCTURE_TYPE_IMAGE_VIEW_CREATE_INFO, 1545 .image = radv_image_to_handle(surf->image), 1546 .viewType = view_type, 1547 .format = surf->format, 1548 .subresourceRange = { 1549 .aspectMask = surf->aspect_mask, 1550 .baseMipLevel = surf->level, 1551 .levelCount = 1, 1552 .baseArrayLayer = surf->layer, 1553 .layerCount = 1 1554 }, 1555 }); 1556} 1557 1558static void 1559create_bview(struct radv_cmd_buffer *cmd_buffer, 1560 struct radv_buffer *buffer, 1561 unsigned offset, 1562 VkFormat format, 1563 struct radv_buffer_view *bview) 1564{ 1565 radv_buffer_view_init(bview, cmd_buffer->device, 1566 &(VkBufferViewCreateInfo) { 1567 .sType = VK_STRUCTURE_TYPE_BUFFER_VIEW_CREATE_INFO, 1568 .flags = 0, 1569 .buffer = radv_buffer_to_handle(buffer), 1570 .format = format, 1571 .offset = offset, 1572 .range = VK_WHOLE_SIZE, 1573 }); 1574 1575} 1576 1577static void 1578create_buffer_from_image(struct radv_cmd_buffer *cmd_buffer, 1579 struct radv_meta_blit2d_surf *surf, 1580 VkBufferUsageFlagBits usage, 1581 VkBuffer *buffer) 1582{ 1583 struct radv_device *device = cmd_buffer->device; 1584 struct radv_device_memory mem = { .bo = surf->image->bo }; 1585 1586 radv_CreateBuffer(radv_device_to_handle(device), 1587 &(VkBufferCreateInfo) { 1588 .sType = VK_STRUCTURE_TYPE_BUFFER_CREATE_INFO, 1589 .flags = 0, 1590 .size = surf->image->size, 1591 .usage = usage, 1592 .sharingMode = VK_SHARING_MODE_EXCLUSIVE, 1593 }, NULL, buffer); 1594 1595 radv_BindBufferMemory2(radv_device_to_handle(device), 1, 1596 (VkBindBufferMemoryInfo[]) { 1597 { 1598 .sType = VK_STRUCTURE_TYPE_BIND_BUFFER_MEMORY_INFO, 1599 .buffer = *buffer, 1600 .memory = radv_device_memory_to_handle(&mem), 1601 .memoryOffset = surf->image->offset, 1602 } 1603 }); 1604} 1605 1606static void 1607create_bview_for_r32g32b32(struct radv_cmd_buffer *cmd_buffer, 1608 struct radv_buffer *buffer, 1609 unsigned offset, 1610 VkFormat src_format, 1611 struct radv_buffer_view *bview) 1612{ 1613 VkFormat format; 1614 1615 switch (src_format) { 1616 case VK_FORMAT_R32G32B32_UINT: 1617 format = VK_FORMAT_R32_UINT; 1618 break; 1619 case VK_FORMAT_R32G32B32_SINT: 1620 format = VK_FORMAT_R32_SINT; 1621 break; 1622 case VK_FORMAT_R32G32B32_SFLOAT: 1623 format = VK_FORMAT_R32_SFLOAT; 1624 break; 1625 default: 1626 unreachable("invalid R32G32B32 format"); 1627 } 1628 1629 radv_buffer_view_init(bview, cmd_buffer->device, 1630 &(VkBufferViewCreateInfo) { 1631 .sType = VK_STRUCTURE_TYPE_BUFFER_VIEW_CREATE_INFO, 1632 .flags = 0, 1633 .buffer = radv_buffer_to_handle(buffer), 1634 .format = format, 1635 .offset = offset, 1636 .range = VK_WHOLE_SIZE, 1637 }); 1638} 1639 1640static unsigned 1641get_image_stride_for_r32g32b32(struct radv_cmd_buffer *cmd_buffer, 1642 struct radv_meta_blit2d_surf *surf) 1643{ 1644 unsigned stride; 1645 1646 if (cmd_buffer->device->physical_device->rad_info.chip_class >= GFX9) { 1647 stride = surf->image->planes[0].surface.u.gfx9.surf_pitch; 1648 } else { 1649 stride = surf->image->planes[0].surface.u.legacy.level[0].nblk_x * 3; 1650 } 1651 1652 return stride; 1653} 1654 1655static void 1656itob_bind_descriptors(struct radv_cmd_buffer *cmd_buffer, 1657 struct radv_image_view *src, 1658 struct radv_buffer_view *dst) 1659{ 1660 struct radv_device *device = cmd_buffer->device; 1661 1662 radv_meta_push_descriptor_set(cmd_buffer, 1663 VK_PIPELINE_BIND_POINT_COMPUTE, 1664 device->meta_state.itob.img_p_layout, 1665 0, /* set */ 1666 2, /* descriptorWriteCount */ 1667 (VkWriteDescriptorSet[]) { 1668 { 1669 .sType = VK_STRUCTURE_TYPE_WRITE_DESCRIPTOR_SET, 1670 .dstBinding = 0, 1671 .dstArrayElement = 0, 1672 .descriptorCount = 1, 1673 .descriptorType = VK_DESCRIPTOR_TYPE_SAMPLED_IMAGE, 1674 .pImageInfo = (VkDescriptorImageInfo[]) { 1675 { 1676 .sampler = VK_NULL_HANDLE, 1677 .imageView = radv_image_view_to_handle(src), 1678 .imageLayout = VK_IMAGE_LAYOUT_GENERAL, 1679 }, 1680 } 1681 }, 1682 { 1683 .sType = VK_STRUCTURE_TYPE_WRITE_DESCRIPTOR_SET, 1684 .dstBinding = 1, 1685 .dstArrayElement = 0, 1686 .descriptorCount = 1, 1687 .descriptorType = VK_DESCRIPTOR_TYPE_STORAGE_TEXEL_BUFFER, 1688 .pTexelBufferView = (VkBufferView[]) { radv_buffer_view_to_handle(dst) }, 1689 } 1690 }); 1691} 1692 1693void 1694radv_meta_image_to_buffer(struct radv_cmd_buffer *cmd_buffer, 1695 struct radv_meta_blit2d_surf *src, 1696 struct radv_meta_blit2d_buffer *dst, 1697 unsigned num_rects, 1698 struct radv_meta_blit2d_rect *rects) 1699{ 1700 VkPipeline pipeline = cmd_buffer->device->meta_state.itob.pipeline; 1701 struct radv_device *device = cmd_buffer->device; 1702 struct radv_image_view src_view; 1703 struct radv_buffer_view dst_view; 1704 1705 create_iview(cmd_buffer, src, &src_view); 1706 create_bview(cmd_buffer, dst->buffer, dst->offset, dst->format, &dst_view); 1707 itob_bind_descriptors(cmd_buffer, &src_view, &dst_view); 1708 1709 if (device->physical_device->rad_info.chip_class >= GFX9 && 1710 src->image->type == VK_IMAGE_TYPE_3D) 1711 pipeline = cmd_buffer->device->meta_state.itob.pipeline_3d; 1712 1713 radv_CmdBindPipeline(radv_cmd_buffer_to_handle(cmd_buffer), 1714 VK_PIPELINE_BIND_POINT_COMPUTE, pipeline); 1715 1716 for (unsigned r = 0; r < num_rects; ++r) { 1717 unsigned push_constants[4] = { 1718 rects[r].src_x, 1719 rects[r].src_y, 1720 src->layer, 1721 dst->pitch 1722 }; 1723 radv_CmdPushConstants(radv_cmd_buffer_to_handle(cmd_buffer), 1724 device->meta_state.itob.img_p_layout, 1725 VK_SHADER_STAGE_COMPUTE_BIT, 0, 16, 1726 push_constants); 1727 1728 radv_unaligned_dispatch(cmd_buffer, rects[r].width, rects[r].height, 1); 1729 } 1730} 1731 1732static void 1733btoi_r32g32b32_bind_descriptors(struct radv_cmd_buffer *cmd_buffer, 1734 struct radv_buffer_view *src, 1735 struct radv_buffer_view *dst) 1736{ 1737 struct radv_device *device = cmd_buffer->device; 1738 1739 radv_meta_push_descriptor_set(cmd_buffer, 1740 VK_PIPELINE_BIND_POINT_COMPUTE, 1741 device->meta_state.btoi_r32g32b32.img_p_layout, 1742 0, /* set */ 1743 2, /* descriptorWriteCount */ 1744 (VkWriteDescriptorSet[]) { 1745 { 1746 .sType = VK_STRUCTURE_TYPE_WRITE_DESCRIPTOR_SET, 1747 .dstBinding = 0, 1748 .dstArrayElement = 0, 1749 .descriptorCount = 1, 1750 .descriptorType = VK_DESCRIPTOR_TYPE_UNIFORM_TEXEL_BUFFER, 1751 .pTexelBufferView = (VkBufferView[]) { radv_buffer_view_to_handle(src) }, 1752 }, 1753 { 1754 .sType = VK_STRUCTURE_TYPE_WRITE_DESCRIPTOR_SET, 1755 .dstBinding = 1, 1756 .dstArrayElement = 0, 1757 .descriptorCount = 1, 1758 .descriptorType = VK_DESCRIPTOR_TYPE_STORAGE_TEXEL_BUFFER, 1759 .pTexelBufferView = (VkBufferView[]) { radv_buffer_view_to_handle(dst) }, 1760 } 1761 }); 1762} 1763 1764static void 1765radv_meta_buffer_to_image_cs_r32g32b32(struct radv_cmd_buffer *cmd_buffer, 1766 struct radv_meta_blit2d_buffer *src, 1767 struct radv_meta_blit2d_surf *dst, 1768 unsigned num_rects, 1769 struct radv_meta_blit2d_rect *rects) 1770{ 1771 VkPipeline pipeline = cmd_buffer->device->meta_state.btoi_r32g32b32.pipeline; 1772 struct radv_device *device = cmd_buffer->device; 1773 struct radv_buffer_view src_view, dst_view; 1774 unsigned dst_offset = 0; 1775 unsigned stride; 1776 VkBuffer buffer; 1777 1778 /* This special btoi path for R32G32B32 formats will write the linear 1779 * image as a buffer with the same underlying memory. The compute 1780 * shader will copy all components separately using a R32 format. 1781 */ 1782 create_buffer_from_image(cmd_buffer, dst, 1783 VK_BUFFER_USAGE_STORAGE_TEXEL_BUFFER_BIT, 1784 &buffer); 1785 1786 create_bview(cmd_buffer, src->buffer, src->offset, 1787 src->format, &src_view); 1788 create_bview_for_r32g32b32(cmd_buffer, radv_buffer_from_handle(buffer), 1789 dst_offset, dst->format, &dst_view); 1790 btoi_r32g32b32_bind_descriptors(cmd_buffer, &src_view, &dst_view); 1791 1792 radv_CmdBindPipeline(radv_cmd_buffer_to_handle(cmd_buffer), 1793 VK_PIPELINE_BIND_POINT_COMPUTE, pipeline); 1794 1795 stride = get_image_stride_for_r32g32b32(cmd_buffer, dst); 1796 1797 for (unsigned r = 0; r < num_rects; ++r) { 1798 unsigned push_constants[4] = { 1799 rects[r].dst_x, 1800 rects[r].dst_y, 1801 stride, 1802 src->pitch, 1803 }; 1804 1805 radv_CmdPushConstants(radv_cmd_buffer_to_handle(cmd_buffer), 1806 device->meta_state.btoi_r32g32b32.img_p_layout, 1807 VK_SHADER_STAGE_COMPUTE_BIT, 0, 16, 1808 push_constants); 1809 1810 radv_unaligned_dispatch(cmd_buffer, rects[r].width, rects[r].height, 1); 1811 } 1812 1813 radv_DestroyBuffer(radv_device_to_handle(device), buffer, NULL); 1814} 1815 1816static void 1817btoi_bind_descriptors(struct radv_cmd_buffer *cmd_buffer, 1818 struct radv_buffer_view *src, 1819 struct radv_image_view *dst) 1820{ 1821 struct radv_device *device = cmd_buffer->device; 1822 1823 radv_meta_push_descriptor_set(cmd_buffer, 1824 VK_PIPELINE_BIND_POINT_COMPUTE, 1825 device->meta_state.btoi.img_p_layout, 1826 0, /* set */ 1827 2, /* descriptorWriteCount */ 1828 (VkWriteDescriptorSet[]) { 1829 { 1830 .sType = VK_STRUCTURE_TYPE_WRITE_DESCRIPTOR_SET, 1831 .dstBinding = 0, 1832 .dstArrayElement = 0, 1833 .descriptorCount = 1, 1834 .descriptorType = VK_DESCRIPTOR_TYPE_STORAGE_TEXEL_BUFFER, 1835 .pTexelBufferView = (VkBufferView[]) { radv_buffer_view_to_handle(src) }, 1836 }, 1837 { 1838 .sType = VK_STRUCTURE_TYPE_WRITE_DESCRIPTOR_SET, 1839 .dstBinding = 1, 1840 .dstArrayElement = 0, 1841 .descriptorCount = 1, 1842 .descriptorType = VK_DESCRIPTOR_TYPE_STORAGE_IMAGE, 1843 .pImageInfo = (VkDescriptorImageInfo[]) { 1844 { 1845 .sampler = VK_NULL_HANDLE, 1846 .imageView = radv_image_view_to_handle(dst), 1847 .imageLayout = VK_IMAGE_LAYOUT_GENERAL, 1848 }, 1849 } 1850 } 1851 }); 1852} 1853 1854void 1855radv_meta_buffer_to_image_cs(struct radv_cmd_buffer *cmd_buffer, 1856 struct radv_meta_blit2d_buffer *src, 1857 struct radv_meta_blit2d_surf *dst, 1858 unsigned num_rects, 1859 struct radv_meta_blit2d_rect *rects) 1860{ 1861 VkPipeline pipeline = cmd_buffer->device->meta_state.btoi.pipeline; 1862 struct radv_device *device = cmd_buffer->device; 1863 struct radv_buffer_view src_view; 1864 struct radv_image_view dst_view; 1865 1866 if (dst->image->vk_format == VK_FORMAT_R32G32B32_UINT || 1867 dst->image->vk_format == VK_FORMAT_R32G32B32_SINT || 1868 dst->image->vk_format == VK_FORMAT_R32G32B32_SFLOAT) { 1869 radv_meta_buffer_to_image_cs_r32g32b32(cmd_buffer, src, dst, 1870 num_rects, rects); 1871 return; 1872 } 1873 1874 create_bview(cmd_buffer, src->buffer, src->offset, src->format, &src_view); 1875 create_iview(cmd_buffer, dst, &dst_view); 1876 btoi_bind_descriptors(cmd_buffer, &src_view, &dst_view); 1877 1878 if (device->physical_device->rad_info.chip_class >= GFX9 && 1879 dst->image->type == VK_IMAGE_TYPE_3D) 1880 pipeline = cmd_buffer->device->meta_state.btoi.pipeline_3d; 1881 radv_CmdBindPipeline(radv_cmd_buffer_to_handle(cmd_buffer), 1882 VK_PIPELINE_BIND_POINT_COMPUTE, pipeline); 1883 1884 for (unsigned r = 0; r < num_rects; ++r) { 1885 unsigned push_constants[4] = { 1886 rects[r].dst_x, 1887 rects[r].dst_y, 1888 dst->layer, 1889 src->pitch, 1890 }; 1891 radv_CmdPushConstants(radv_cmd_buffer_to_handle(cmd_buffer), 1892 device->meta_state.btoi.img_p_layout, 1893 VK_SHADER_STAGE_COMPUTE_BIT, 0, 16, 1894 push_constants); 1895 1896 radv_unaligned_dispatch(cmd_buffer, rects[r].width, rects[r].height, 1); 1897 } 1898} 1899 1900static void 1901itoi_r32g32b32_bind_descriptors(struct radv_cmd_buffer *cmd_buffer, 1902 struct radv_buffer_view *src, 1903 struct radv_buffer_view *dst) 1904{ 1905 struct radv_device *device = cmd_buffer->device; 1906 1907 radv_meta_push_descriptor_set(cmd_buffer, 1908 VK_PIPELINE_BIND_POINT_COMPUTE, 1909 device->meta_state.itoi_r32g32b32.img_p_layout, 1910 0, /* set */ 1911 2, /* descriptorWriteCount */ 1912 (VkWriteDescriptorSet[]) { 1913 { 1914 .sType = VK_STRUCTURE_TYPE_WRITE_DESCRIPTOR_SET, 1915 .dstBinding = 0, 1916 .dstArrayElement = 0, 1917 .descriptorCount = 1, 1918 .descriptorType = VK_DESCRIPTOR_TYPE_UNIFORM_TEXEL_BUFFER, 1919 .pTexelBufferView = (VkBufferView[]) { radv_buffer_view_to_handle(src) }, 1920 }, 1921 { 1922 .sType = VK_STRUCTURE_TYPE_WRITE_DESCRIPTOR_SET, 1923 .dstBinding = 1, 1924 .dstArrayElement = 0, 1925 .descriptorCount = 1, 1926 .descriptorType = VK_DESCRIPTOR_TYPE_STORAGE_TEXEL_BUFFER, 1927 .pTexelBufferView = (VkBufferView[]) { radv_buffer_view_to_handle(dst) }, 1928 } 1929 }); 1930} 1931 1932static void 1933radv_meta_image_to_image_cs_r32g32b32(struct radv_cmd_buffer *cmd_buffer, 1934 struct radv_meta_blit2d_surf *src, 1935 struct radv_meta_blit2d_surf *dst, 1936 unsigned num_rects, 1937 struct radv_meta_blit2d_rect *rects) 1938{ 1939 VkPipeline pipeline = cmd_buffer->device->meta_state.itoi_r32g32b32.pipeline; 1940 struct radv_device *device = cmd_buffer->device; 1941 struct radv_buffer_view src_view, dst_view; 1942 unsigned src_offset = 0, dst_offset = 0; 1943 unsigned src_stride, dst_stride; 1944 VkBuffer src_buffer, dst_buffer; 1945 1946 /* 96-bit formats are only compatible to themselves. */ 1947 assert(dst->format == VK_FORMAT_R32G32B32_UINT || 1948 dst->format == VK_FORMAT_R32G32B32_SINT || 1949 dst->format == VK_FORMAT_R32G32B32_SFLOAT); 1950 1951 /* This special itoi path for R32G32B32 formats will write the linear 1952 * image as a buffer with the same underlying memory. The compute 1953 * shader will copy all components separately using a R32 format. 1954 */ 1955 create_buffer_from_image(cmd_buffer, src, 1956 VK_BUFFER_USAGE_UNIFORM_TEXEL_BUFFER_BIT, 1957 &src_buffer); 1958 create_buffer_from_image(cmd_buffer, dst, 1959 VK_BUFFER_USAGE_STORAGE_TEXEL_BUFFER_BIT, 1960 &dst_buffer); 1961 1962 create_bview_for_r32g32b32(cmd_buffer, radv_buffer_from_handle(src_buffer), 1963 src_offset, src->format, &src_view); 1964 create_bview_for_r32g32b32(cmd_buffer, radv_buffer_from_handle(dst_buffer), 1965 dst_offset, dst->format, &dst_view); 1966 itoi_r32g32b32_bind_descriptors(cmd_buffer, &src_view, &dst_view); 1967 1968 radv_CmdBindPipeline(radv_cmd_buffer_to_handle(cmd_buffer), 1969 VK_PIPELINE_BIND_POINT_COMPUTE, pipeline); 1970 1971 src_stride = get_image_stride_for_r32g32b32(cmd_buffer, src); 1972 dst_stride = get_image_stride_for_r32g32b32(cmd_buffer, dst); 1973 1974 for (unsigned r = 0; r < num_rects; ++r) { 1975 unsigned push_constants[6] = { 1976 rects[r].src_x, 1977 rects[r].src_y, 1978 src_stride, 1979 rects[r].dst_x, 1980 rects[r].dst_y, 1981 dst_stride, 1982 }; 1983 radv_CmdPushConstants(radv_cmd_buffer_to_handle(cmd_buffer), 1984 device->meta_state.itoi_r32g32b32.img_p_layout, 1985 VK_SHADER_STAGE_COMPUTE_BIT, 0, 24, 1986 push_constants); 1987 1988 radv_unaligned_dispatch(cmd_buffer, rects[r].width, rects[r].height, 1); 1989 } 1990 1991 radv_DestroyBuffer(radv_device_to_handle(device), src_buffer, NULL); 1992 radv_DestroyBuffer(radv_device_to_handle(device), dst_buffer, NULL); 1993} 1994 1995static void 1996itoi_bind_descriptors(struct radv_cmd_buffer *cmd_buffer, 1997 struct radv_image_view *src, 1998 struct radv_image_view *dst) 1999{ 2000 struct radv_device *device = cmd_buffer->device; 2001 2002 radv_meta_push_descriptor_set(cmd_buffer, 2003 VK_PIPELINE_BIND_POINT_COMPUTE, 2004 device->meta_state.itoi.img_p_layout, 2005 0, /* set */ 2006 2, /* descriptorWriteCount */ 2007 (VkWriteDescriptorSet[]) { 2008 { 2009 .sType = VK_STRUCTURE_TYPE_WRITE_DESCRIPTOR_SET, 2010 .dstBinding = 0, 2011 .dstArrayElement = 0, 2012 .descriptorCount = 1, 2013 .descriptorType = VK_DESCRIPTOR_TYPE_SAMPLED_IMAGE, 2014 .pImageInfo = (VkDescriptorImageInfo[]) { 2015 { 2016 .sampler = VK_NULL_HANDLE, 2017 .imageView = radv_image_view_to_handle(src), 2018 .imageLayout = VK_IMAGE_LAYOUT_GENERAL, 2019 }, 2020 } 2021 }, 2022 { 2023 .sType = VK_STRUCTURE_TYPE_WRITE_DESCRIPTOR_SET, 2024 .dstBinding = 1, 2025 .dstArrayElement = 0, 2026 .descriptorCount = 1, 2027 .descriptorType = VK_DESCRIPTOR_TYPE_STORAGE_IMAGE, 2028 .pImageInfo = (VkDescriptorImageInfo[]) { 2029 { 2030 .sampler = VK_NULL_HANDLE, 2031 .imageView = radv_image_view_to_handle(dst), 2032 .imageLayout = VK_IMAGE_LAYOUT_GENERAL, 2033 }, 2034 } 2035 } 2036 }); 2037} 2038 2039void 2040radv_meta_image_to_image_cs(struct radv_cmd_buffer *cmd_buffer, 2041 struct radv_meta_blit2d_surf *src, 2042 struct radv_meta_blit2d_surf *dst, 2043 unsigned num_rects, 2044 struct radv_meta_blit2d_rect *rects) 2045{ 2046 VkPipeline pipeline = cmd_buffer->device->meta_state.itoi.pipeline; 2047 struct radv_device *device = cmd_buffer->device; 2048 struct radv_image_view src_view, dst_view; 2049 2050 if (src->format == VK_FORMAT_R32G32B32_UINT || 2051 src->format == VK_FORMAT_R32G32B32_SINT || 2052 src->format == VK_FORMAT_R32G32B32_SFLOAT) { 2053 radv_meta_image_to_image_cs_r32g32b32(cmd_buffer, src, dst, 2054 num_rects, rects); 2055 return; 2056 } 2057 2058 create_iview(cmd_buffer, src, &src_view); 2059 create_iview(cmd_buffer, dst, &dst_view); 2060 2061 itoi_bind_descriptors(cmd_buffer, &src_view, &dst_view); 2062 2063 if (device->physical_device->rad_info.chip_class >= GFX9 && 2064 (src->image->type == VK_IMAGE_TYPE_3D || dst->image->type == VK_IMAGE_TYPE_3D)) 2065 pipeline = cmd_buffer->device->meta_state.itoi.pipeline_3d; 2066 radv_CmdBindPipeline(radv_cmd_buffer_to_handle(cmd_buffer), 2067 VK_PIPELINE_BIND_POINT_COMPUTE, pipeline); 2068 2069 for (unsigned r = 0; r < num_rects; ++r) { 2070 unsigned push_constants[6] = { 2071 rects[r].src_x, 2072 rects[r].src_y, 2073 src->layer, 2074 rects[r].dst_x, 2075 rects[r].dst_y, 2076 dst->layer, 2077 }; 2078 radv_CmdPushConstants(radv_cmd_buffer_to_handle(cmd_buffer), 2079 device->meta_state.itoi.img_p_layout, 2080 VK_SHADER_STAGE_COMPUTE_BIT, 0, 24, 2081 push_constants); 2082 2083 radv_unaligned_dispatch(cmd_buffer, rects[r].width, rects[r].height, 1); 2084 } 2085} 2086 2087static void 2088cleari_r32g32b32_bind_descriptors(struct radv_cmd_buffer *cmd_buffer, 2089 struct radv_buffer_view *view) 2090{ 2091 struct radv_device *device = cmd_buffer->device; 2092 2093 radv_meta_push_descriptor_set(cmd_buffer, 2094 VK_PIPELINE_BIND_POINT_COMPUTE, 2095 device->meta_state.cleari_r32g32b32.img_p_layout, 2096 0, /* set */ 2097 1, /* descriptorWriteCount */ 2098 (VkWriteDescriptorSet[]) { 2099 { 2100 .sType = VK_STRUCTURE_TYPE_WRITE_DESCRIPTOR_SET, 2101 .dstBinding = 0, 2102 .dstArrayElement = 0, 2103 .descriptorCount = 1, 2104 .descriptorType = VK_DESCRIPTOR_TYPE_STORAGE_TEXEL_BUFFER, 2105 .pTexelBufferView = (VkBufferView[]) { radv_buffer_view_to_handle(view) }, 2106 } 2107 }); 2108} 2109 2110static void 2111radv_meta_clear_image_cs_r32g32b32(struct radv_cmd_buffer *cmd_buffer, 2112 struct radv_meta_blit2d_surf *dst, 2113 const VkClearColorValue *clear_color) 2114{ 2115 VkPipeline pipeline = cmd_buffer->device->meta_state.cleari_r32g32b32.pipeline; 2116 struct radv_device *device = cmd_buffer->device; 2117 struct radv_buffer_view dst_view; 2118 unsigned stride; 2119 VkBuffer buffer; 2120 2121 /* This special clear path for R32G32B32 formats will write the linear 2122 * image as a buffer with the same underlying memory. The compute 2123 * shader will clear all components separately using a R32 format. 2124 */ 2125 create_buffer_from_image(cmd_buffer, dst, 2126 VK_BUFFER_USAGE_STORAGE_TEXEL_BUFFER_BIT, 2127 &buffer); 2128 2129 create_bview_for_r32g32b32(cmd_buffer, radv_buffer_from_handle(buffer), 2130 0, dst->format, &dst_view); 2131 cleari_r32g32b32_bind_descriptors(cmd_buffer, &dst_view); 2132 2133 radv_CmdBindPipeline(radv_cmd_buffer_to_handle(cmd_buffer), 2134 VK_PIPELINE_BIND_POINT_COMPUTE, pipeline); 2135 2136 stride = get_image_stride_for_r32g32b32(cmd_buffer, dst); 2137 2138 unsigned push_constants[4] = { 2139 clear_color->uint32[0], 2140 clear_color->uint32[1], 2141 clear_color->uint32[2], 2142 stride, 2143 }; 2144 2145 radv_CmdPushConstants(radv_cmd_buffer_to_handle(cmd_buffer), 2146 device->meta_state.cleari_r32g32b32.img_p_layout, 2147 VK_SHADER_STAGE_COMPUTE_BIT, 0, 16, 2148 push_constants); 2149 2150 radv_unaligned_dispatch(cmd_buffer, dst->image->info.width, 2151 dst->image->info.height, 1); 2152 2153 radv_DestroyBuffer(radv_device_to_handle(device), buffer, NULL); 2154} 2155 2156static void 2157cleari_bind_descriptors(struct radv_cmd_buffer *cmd_buffer, 2158 struct radv_image_view *dst_iview) 2159{ 2160 struct radv_device *device = cmd_buffer->device; 2161 2162 radv_meta_push_descriptor_set(cmd_buffer, 2163 VK_PIPELINE_BIND_POINT_COMPUTE, 2164 device->meta_state.cleari.img_p_layout, 2165 0, /* set */ 2166 1, /* descriptorWriteCount */ 2167 (VkWriteDescriptorSet[]) { 2168 { 2169 .sType = VK_STRUCTURE_TYPE_WRITE_DESCRIPTOR_SET, 2170 .dstBinding = 0, 2171 .dstArrayElement = 0, 2172 .descriptorCount = 1, 2173 .descriptorType = VK_DESCRIPTOR_TYPE_STORAGE_IMAGE, 2174 .pImageInfo = (VkDescriptorImageInfo[]) { 2175 { 2176 .sampler = VK_NULL_HANDLE, 2177 .imageView = radv_image_view_to_handle(dst_iview), 2178 .imageLayout = VK_IMAGE_LAYOUT_GENERAL, 2179 }, 2180 } 2181 }, 2182 }); 2183} 2184 2185void 2186radv_meta_clear_image_cs(struct radv_cmd_buffer *cmd_buffer, 2187 struct radv_meta_blit2d_surf *dst, 2188 const VkClearColorValue *clear_color) 2189{ 2190 VkPipeline pipeline = cmd_buffer->device->meta_state.cleari.pipeline; 2191 struct radv_device *device = cmd_buffer->device; 2192 struct radv_image_view dst_iview; 2193 2194 if (dst->format == VK_FORMAT_R32G32B32_UINT || 2195 dst->format == VK_FORMAT_R32G32B32_SINT || 2196 dst->format == VK_FORMAT_R32G32B32_SFLOAT) { 2197 radv_meta_clear_image_cs_r32g32b32(cmd_buffer, dst, clear_color); 2198 return; 2199 } 2200 2201 create_iview(cmd_buffer, dst, &dst_iview); 2202 cleari_bind_descriptors(cmd_buffer, &dst_iview); 2203 2204 if (device->physical_device->rad_info.chip_class >= GFX9 && 2205 dst->image->type == VK_IMAGE_TYPE_3D) 2206 pipeline = cmd_buffer->device->meta_state.cleari.pipeline_3d; 2207 2208 radv_CmdBindPipeline(radv_cmd_buffer_to_handle(cmd_buffer), 2209 VK_PIPELINE_BIND_POINT_COMPUTE, pipeline); 2210 2211 unsigned push_constants[5] = { 2212 clear_color->uint32[0], 2213 clear_color->uint32[1], 2214 clear_color->uint32[2], 2215 clear_color->uint32[3], 2216 dst->layer, 2217 }; 2218 2219 radv_CmdPushConstants(radv_cmd_buffer_to_handle(cmd_buffer), 2220 device->meta_state.cleari.img_p_layout, 2221 VK_SHADER_STAGE_COMPUTE_BIT, 0, 20, 2222 push_constants); 2223 2224 radv_unaligned_dispatch(cmd_buffer, dst->image->info.width, dst->image->info.height, 1); 2225} 2226