1/* 2 * Copyright © 2021 Google 3 * 4 * Permission is hereby granted, free of charge, to any person obtaining a 5 * copy of this software and associated documentation files (the "Software"), 6 * to deal in the Software without restriction, including without limitation 7 * the rights to use, copy, modify, merge, publish, distribute, sublicense, 8 * and/or sell copies of the Software, and to permit persons to whom the 9 * Software is furnished to do so, subject to the following conditions: 10 * 11 * The above copyright notice and this permission notice (including the next 12 * paragraph) shall be included in all copies or substantial portions of the 13 * Software. 14 * 15 * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR 16 * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, 17 * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL 18 * THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER 19 * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING 20 * FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS 21 * IN THE SOFTWARE. 22 */ 23 24#define AC_SURFACE_INCLUDE_NIR 25#include "ac_surface.h" 26 27#include "radv_meta.h" 28#include "radv_private.h" 29 30static nir_shader * 31build_dcc_retile_compute_shader(struct radv_device *dev, struct radeon_surf *surf) 32{ 33 enum glsl_sampler_dim dim = GLSL_SAMPLER_DIM_BUF; 34 const struct glsl_type *buf_type = glsl_image_type(dim, false, GLSL_TYPE_UINT); 35 nir_builder b = nir_builder_init_simple_shader(MESA_SHADER_COMPUTE, NULL, "dcc_retile_compute"); 36 37 b.shader->info.workgroup_size[0] = 8; 38 b.shader->info.workgroup_size[1] = 8; 39 b.shader->info.workgroup_size[2] = 1; 40 41 nir_ssa_def *src_dcc_size = nir_load_push_constant(&b, 2, 32, nir_imm_int(&b, 0), .range = 8); 42 nir_ssa_def *src_dcc_pitch = nir_channels(&b, src_dcc_size, 1); 43 nir_ssa_def *src_dcc_height = nir_channels(&b, src_dcc_size, 2); 44 45 nir_ssa_def *dst_dcc_size = nir_load_push_constant(&b, 2, 32, nir_imm_int(&b, 8), .range = 8); 46 nir_ssa_def *dst_dcc_pitch = nir_channels(&b, dst_dcc_size, 1); 47 nir_ssa_def *dst_dcc_height = nir_channels(&b, dst_dcc_size, 2); 48 nir_variable *input_dcc = nir_variable_create(b.shader, nir_var_uniform, buf_type, "dcc_in"); 49 input_dcc->data.descriptor_set = 0; 50 input_dcc->data.binding = 0; 51 nir_variable *output_dcc = nir_variable_create(b.shader, nir_var_uniform, buf_type, "dcc_out"); 52 output_dcc->data.descriptor_set = 0; 53 output_dcc->data.binding = 1; 54 55 nir_ssa_def *input_dcc_ref = &nir_build_deref_var(&b, input_dcc)->dest.ssa; 56 nir_ssa_def *output_dcc_ref = &nir_build_deref_var(&b, output_dcc)->dest.ssa; 57 58 nir_ssa_def *coord = get_global_ids(&b, 2); 59 nir_ssa_def *zero = nir_imm_int(&b, 0); 60 coord = nir_imul( 61 &b, coord, 62 nir_imm_ivec2(&b, surf->u.gfx9.color.dcc_block_width, surf->u.gfx9.color.dcc_block_height)); 63 64 nir_ssa_def *src = ac_nir_dcc_addr_from_coord(&b, &dev->physical_device->rad_info, surf->bpe, 65 &surf->u.gfx9.color.dcc_equation, src_dcc_pitch, 66 src_dcc_height, zero, nir_channel(&b, coord, 0), 67 nir_channel(&b, coord, 1), zero, zero, zero); 68 nir_ssa_def *dst = ac_nir_dcc_addr_from_coord( 69 &b, &dev->physical_device->rad_info, surf->bpe, &surf->u.gfx9.color.display_dcc_equation, 70 dst_dcc_pitch, dst_dcc_height, zero, nir_channel(&b, coord, 0), nir_channel(&b, coord, 1), 71 zero, zero, zero); 72 73 nir_ssa_def *dcc_val = nir_image_deref_load(&b, 1, 32, input_dcc_ref, 74 nir_vec4(&b, src, src, src, src), 75 nir_ssa_undef(&b, 1, 32), nir_imm_int(&b, 0), 76 .image_dim = dim); 77 78 nir_image_deref_store(&b, output_dcc_ref, nir_vec4(&b, dst, dst, dst, dst), 79 nir_ssa_undef(&b, 1, 32), dcc_val, nir_imm_int(&b, 0), .image_dim = dim); 80 81 return b.shader; 82} 83 84void 85radv_device_finish_meta_dcc_retile_state(struct radv_device *device) 86{ 87 struct radv_meta_state *state = &device->meta_state; 88 89 for (unsigned i = 0; i < ARRAY_SIZE(state->dcc_retile.pipeline); i++) { 90 radv_DestroyPipeline(radv_device_to_handle(device), state->dcc_retile.pipeline[i], 91 &state->alloc); 92 } 93 radv_DestroyPipelineLayout(radv_device_to_handle(device), state->dcc_retile.p_layout, 94 &state->alloc); 95 radv_DestroyDescriptorSetLayout(radv_device_to_handle(device), state->dcc_retile.ds_layout, 96 &state->alloc); 97 98 /* Reset for next finish. */ 99 memset(&state->dcc_retile, 0, sizeof(state->dcc_retile)); 100} 101 102/* 103 * This take a surface, but the only things used are: 104 * - BPE 105 * - DCC equations 106 * - DCC block size 107 * 108 * BPE is always 4 at the moment and the rest is derived from the tilemode. 109 */ 110static VkResult 111radv_device_init_meta_dcc_retile_state(struct radv_device *device, struct radeon_surf *surf) 112{ 113 VkResult result = VK_SUCCESS; 114 nir_shader *cs = build_dcc_retile_compute_shader(device, surf); 115 116 VkDescriptorSetLayoutCreateInfo ds_create_info = { 117 .sType = VK_STRUCTURE_TYPE_DESCRIPTOR_SET_LAYOUT_CREATE_INFO, 118 .flags = VK_DESCRIPTOR_SET_LAYOUT_CREATE_PUSH_DESCRIPTOR_BIT_KHR, 119 .bindingCount = 2, 120 .pBindings = (VkDescriptorSetLayoutBinding[]){ 121 {.binding = 0, 122 .descriptorType = VK_DESCRIPTOR_TYPE_STORAGE_TEXEL_BUFFER, 123 .descriptorCount = 1, 124 .stageFlags = VK_SHADER_STAGE_COMPUTE_BIT, 125 .pImmutableSamplers = NULL}, 126 {.binding = 1, 127 .descriptorType = VK_DESCRIPTOR_TYPE_STORAGE_TEXEL_BUFFER, 128 .descriptorCount = 1, 129 .stageFlags = VK_SHADER_STAGE_COMPUTE_BIT, 130 .pImmutableSamplers = NULL}, 131 }}; 132 133 result = radv_CreateDescriptorSetLayout(radv_device_to_handle(device), &ds_create_info, 134 &device->meta_state.alloc, 135 &device->meta_state.dcc_retile.ds_layout); 136 if (result != VK_SUCCESS) 137 goto cleanup; 138 139 VkPipelineLayoutCreateInfo pl_create_info = { 140 .sType = VK_STRUCTURE_TYPE_PIPELINE_LAYOUT_CREATE_INFO, 141 .setLayoutCount = 1, 142 .pSetLayouts = &device->meta_state.dcc_retile.ds_layout, 143 .pushConstantRangeCount = 1, 144 .pPushConstantRanges = &(VkPushConstantRange){VK_SHADER_STAGE_COMPUTE_BIT, 0, 16}, 145 }; 146 147 result = 148 radv_CreatePipelineLayout(radv_device_to_handle(device), &pl_create_info, 149 &device->meta_state.alloc, &device->meta_state.dcc_retile.p_layout); 150 if (result != VK_SUCCESS) 151 goto cleanup; 152 153 /* compute shader */ 154 155 VkPipelineShaderStageCreateInfo pipeline_shader_stage = { 156 .sType = VK_STRUCTURE_TYPE_PIPELINE_SHADER_STAGE_CREATE_INFO, 157 .stage = VK_SHADER_STAGE_COMPUTE_BIT, 158 .module = vk_shader_module_handle_from_nir(cs), 159 .pName = "main", 160 .pSpecializationInfo = NULL, 161 }; 162 163 VkComputePipelineCreateInfo vk_pipeline_info = { 164 .sType = VK_STRUCTURE_TYPE_COMPUTE_PIPELINE_CREATE_INFO, 165 .stage = pipeline_shader_stage, 166 .flags = 0, 167 .layout = device->meta_state.dcc_retile.p_layout, 168 }; 169 170 result = radv_CreateComputePipelines( 171 radv_device_to_handle(device), radv_pipeline_cache_to_handle(&device->meta_state.cache), 1, 172 &vk_pipeline_info, NULL, &device->meta_state.dcc_retile.pipeline[surf->u.gfx9.swizzle_mode]); 173 if (result != VK_SUCCESS) 174 goto cleanup; 175 176cleanup: 177 if (result != VK_SUCCESS) 178 radv_device_finish_meta_dcc_retile_state(device); 179 ralloc_free(cs); 180 return result; 181} 182 183void 184radv_retile_dcc(struct radv_cmd_buffer *cmd_buffer, struct radv_image *image) 185{ 186 struct radv_meta_saved_state saved_state; 187 struct radv_device *device = cmd_buffer->device; 188 struct radv_buffer buffer; 189 190 assert(image->type == VK_IMAGE_TYPE_2D); 191 assert(image->info.array_size == 1 && image->info.levels == 1); 192 193 struct radv_cmd_state *state = &cmd_buffer->state; 194 195 state->flush_bits |= radv_dst_access_flush(cmd_buffer, VK_ACCESS_SHADER_READ_BIT, image) | 196 radv_dst_access_flush(cmd_buffer, VK_ACCESS_SHADER_WRITE_BIT, image); 197 198 unsigned swizzle_mode = image->planes[0].surface.u.gfx9.swizzle_mode; 199 200 /* Compile pipelines if not already done so. */ 201 if (!cmd_buffer->device->meta_state.dcc_retile.pipeline[swizzle_mode]) { 202 VkResult ret = 203 radv_device_init_meta_dcc_retile_state(cmd_buffer->device, &image->planes[0].surface); 204 if (ret != VK_SUCCESS) { 205 cmd_buffer->record_result = ret; 206 return; 207 } 208 } 209 210 radv_meta_save( 211 &saved_state, cmd_buffer, 212 RADV_META_SAVE_DESCRIPTORS | RADV_META_SAVE_COMPUTE_PIPELINE | RADV_META_SAVE_CONSTANTS); 213 214 radv_CmdBindPipeline(radv_cmd_buffer_to_handle(cmd_buffer), VK_PIPELINE_BIND_POINT_COMPUTE, 215 device->meta_state.dcc_retile.pipeline[swizzle_mode]); 216 217 radv_buffer_init(&buffer, device, image->bo, image->size, image->offset); 218 219 struct radv_buffer_view views[2]; 220 VkBufferView view_handles[2]; 221 radv_buffer_view_init(views, cmd_buffer->device, 222 &(VkBufferViewCreateInfo){ 223 .sType = VK_STRUCTURE_TYPE_BUFFER_VIEW_CREATE_INFO, 224 .buffer = radv_buffer_to_handle(&buffer), 225 .offset = image->planes[0].surface.meta_offset, 226 .range = image->planes[0].surface.meta_size, 227 .format = VK_FORMAT_R8_UINT, 228 }); 229 radv_buffer_view_init(views + 1, cmd_buffer->device, 230 &(VkBufferViewCreateInfo){ 231 .sType = VK_STRUCTURE_TYPE_BUFFER_VIEW_CREATE_INFO, 232 .buffer = radv_buffer_to_handle(&buffer), 233 .offset = image->planes[0].surface.display_dcc_offset, 234 .range = image->planes[0].surface.u.gfx9.color.display_dcc_size, 235 .format = VK_FORMAT_R8_UINT, 236 }); 237 for (unsigned i = 0; i < 2; ++i) 238 view_handles[i] = radv_buffer_view_to_handle(&views[i]); 239 240 radv_meta_push_descriptor_set(cmd_buffer, VK_PIPELINE_BIND_POINT_COMPUTE, 241 device->meta_state.dcc_retile.p_layout, 0, /* set */ 242 2, /* descriptorWriteCount */ 243 (VkWriteDescriptorSet[]){ 244 { 245 .sType = VK_STRUCTURE_TYPE_WRITE_DESCRIPTOR_SET, 246 .dstBinding = 0, 247 .dstArrayElement = 0, 248 .descriptorCount = 1, 249 .descriptorType = VK_DESCRIPTOR_TYPE_STORAGE_TEXEL_BUFFER, 250 .pTexelBufferView = &view_handles[0], 251 }, 252 { 253 .sType = VK_STRUCTURE_TYPE_WRITE_DESCRIPTOR_SET, 254 .dstBinding = 1, 255 .dstArrayElement = 0, 256 .descriptorCount = 1, 257 .descriptorType = VK_DESCRIPTOR_TYPE_STORAGE_TEXEL_BUFFER, 258 .pTexelBufferView = &view_handles[1], 259 }, 260 }); 261 262 unsigned width = DIV_ROUND_UP(image->info.width, vk_format_get_blockwidth(image->vk_format)); 263 unsigned height = DIV_ROUND_UP(image->info.height, vk_format_get_blockheight(image->vk_format)); 264 265 unsigned dcc_width = DIV_ROUND_UP(width, image->planes[0].surface.u.gfx9.color.dcc_block_width); 266 unsigned dcc_height = 267 DIV_ROUND_UP(height, image->planes[0].surface.u.gfx9.color.dcc_block_height); 268 269 uint32_t constants[] = { 270 image->planes[0].surface.u.gfx9.color.dcc_pitch_max + 1, 271 image->planes[0].surface.u.gfx9.color.dcc_height, 272 image->planes[0].surface.u.gfx9.color.display_dcc_pitch_max + 1, 273 image->planes[0].surface.u.gfx9.color.display_dcc_height, 274 }; 275 radv_CmdPushConstants(radv_cmd_buffer_to_handle(cmd_buffer), 276 device->meta_state.dcc_retile.p_layout, VK_SHADER_STAGE_COMPUTE_BIT, 0, 16, 277 constants); 278 279 radv_unaligned_dispatch(cmd_buffer, dcc_width, dcc_height, 1); 280 281 radv_buffer_view_finish(views); 282 radv_buffer_view_finish(views + 1); 283 radv_buffer_finish(&buffer); 284 285 radv_meta_restore(&saved_state, cmd_buffer); 286 287 state->flush_bits |= RADV_CMD_FLAG_CS_PARTIAL_FLUSH | 288 radv_src_access_flush(cmd_buffer, VK_ACCESS_SHADER_WRITE_BIT, image); 289} 290