v3dvx_meta_common.c revision 7ec681f3
1/* 2 * Copyright © 2021 Raspberry Pi 3 * 4 * Permission is hereby granted, free of charge, to any person obtaining a 5 * copy of this software and associated documentation files (the "Software"), 6 * to deal in the Software without restriction, including without limitation 7 * the rights to use, copy, modify, merge, publish, distribute, sublicense, 8 * and/or sell copies of the Software, and to permit persons to whom the 9 * Software is furnished to do so, subject to the following conditions: 10 * 11 * The above copyright notice and this permission notice (including the next 12 * paragraph) shall be included in all copies or substantial portions of the 13 * Software. 14 * 15 * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR 16 * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, 17 * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL 18 * THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER 19 * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING 20 * FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS 21 * IN THE SOFTWARE. 22 */ 23 24#include "v3dv_private.h" 25#include "v3dv_meta_common.h" 26 27#include "broadcom/common/v3d_macros.h" 28#include "broadcom/cle/v3dx_pack.h" 29#include "broadcom/compiler/v3d_compiler.h" 30 31#include "vk_format_info.h" 32 33struct rcl_clear_info { 34 const union v3dv_clear_value *clear_value; 35 struct v3dv_image *image; 36 VkImageAspectFlags aspects; 37 uint32_t level; 38}; 39 40static struct v3dv_cl * 41emit_rcl_prologue(struct v3dv_job *job, 42 struct v3dv_meta_framebuffer *fb, 43 const struct rcl_clear_info *clear_info) 44{ 45 const struct v3dv_frame_tiling *tiling = &job->frame_tiling; 46 47 struct v3dv_cl *rcl = &job->rcl; 48 v3dv_cl_ensure_space_with_branch(rcl, 200 + 49 tiling->layers * 256 * 50 cl_packet_length(SUPERTILE_COORDINATES)); 51 if (job->cmd_buffer->state.oom) 52 return NULL; 53 54 cl_emit(rcl, TILE_RENDERING_MODE_CFG_COMMON, config) { 55 config.early_z_disable = true; 56 config.image_width_pixels = tiling->width; 57 config.image_height_pixels = tiling->height; 58 config.number_of_render_targets = 1; 59 config.multisample_mode_4x = tiling->msaa; 60 config.maximum_bpp_of_all_render_targets = tiling->internal_bpp; 61 config.internal_depth_type = fb->internal_depth_type; 62 } 63 64 if (clear_info && (clear_info->aspects & VK_IMAGE_ASPECT_COLOR_BIT)) { 65 uint32_t clear_pad = 0; 66 if (clear_info->image) { 67 const struct v3dv_image *image = clear_info->image; 68 const struct v3d_resource_slice *slice = 69 &image->slices[clear_info->level]; 70 if (slice->tiling == V3D_TILING_UIF_NO_XOR || 71 slice->tiling == V3D_TILING_UIF_XOR) { 72 int uif_block_height = v3d_utile_height(image->cpp) * 2; 73 74 uint32_t implicit_padded_height = 75 align(tiling->height, uif_block_height) / uif_block_height; 76 77 if (slice->padded_height_of_output_image_in_uif_blocks - 78 implicit_padded_height >= 15) { 79 clear_pad = slice->padded_height_of_output_image_in_uif_blocks; 80 } 81 } 82 } 83 84 const uint32_t *color = &clear_info->clear_value->color[0]; 85 cl_emit(rcl, TILE_RENDERING_MODE_CFG_CLEAR_COLORS_PART1, clear) { 86 clear.clear_color_low_32_bits = color[0]; 87 clear.clear_color_next_24_bits = color[1] & 0x00ffffff; 88 clear.render_target_number = 0; 89 }; 90 91 if (tiling->internal_bpp >= V3D_INTERNAL_BPP_64) { 92 cl_emit(rcl, TILE_RENDERING_MODE_CFG_CLEAR_COLORS_PART2, clear) { 93 clear.clear_color_mid_low_32_bits = 94 ((color[1] >> 24) | (color[2] << 8)); 95 clear.clear_color_mid_high_24_bits = 96 ((color[2] >> 24) | ((color[3] & 0xffff) << 8)); 97 clear.render_target_number = 0; 98 }; 99 } 100 101 if (tiling->internal_bpp >= V3D_INTERNAL_BPP_128 || clear_pad) { 102 cl_emit(rcl, TILE_RENDERING_MODE_CFG_CLEAR_COLORS_PART3, clear) { 103 clear.uif_padded_height_in_uif_blocks = clear_pad; 104 clear.clear_color_high_16_bits = color[3] >> 16; 105 clear.render_target_number = 0; 106 }; 107 } 108 } 109 110 cl_emit(rcl, TILE_RENDERING_MODE_CFG_COLOR, rt) { 111 rt.render_target_0_internal_bpp = tiling->internal_bpp; 112 rt.render_target_0_internal_type = fb->internal_type; 113 rt.render_target_0_clamp = V3D_RENDER_TARGET_CLAMP_NONE; 114 } 115 116 cl_emit(rcl, TILE_RENDERING_MODE_CFG_ZS_CLEAR_VALUES, clear) { 117 clear.z_clear_value = clear_info ? clear_info->clear_value->z : 1.0f; 118 clear.stencil_clear_value = clear_info ? clear_info->clear_value->s : 0; 119 }; 120 121 cl_emit(rcl, TILE_LIST_INITIAL_BLOCK_SIZE, init) { 122 init.use_auto_chained_tile_lists = true; 123 init.size_of_first_block_in_chained_tile_lists = 124 TILE_ALLOCATION_BLOCK_SIZE_64B; 125 } 126 127 return rcl; 128} 129 130static void 131emit_frame_setup(struct v3dv_job *job, 132 uint32_t min_layer, 133 const union v3dv_clear_value *clear_value) 134{ 135 v3dv_return_if_oom(NULL, job); 136 137 const struct v3dv_frame_tiling *tiling = &job->frame_tiling; 138 139 struct v3dv_cl *rcl = &job->rcl; 140 141 const uint32_t tile_alloc_offset = 142 64 * min_layer * tiling->draw_tiles_x * tiling->draw_tiles_y; 143 cl_emit(rcl, MULTICORE_RENDERING_TILE_LIST_SET_BASE, list) { 144 list.address = v3dv_cl_address(job->tile_alloc, tile_alloc_offset); 145 } 146 147 cl_emit(rcl, MULTICORE_RENDERING_SUPERTILE_CFG, config) { 148 config.number_of_bin_tile_lists = 1; 149 config.total_frame_width_in_tiles = tiling->draw_tiles_x; 150 config.total_frame_height_in_tiles = tiling->draw_tiles_y; 151 152 config.supertile_width_in_tiles = tiling->supertile_width; 153 config.supertile_height_in_tiles = tiling->supertile_height; 154 155 config.total_frame_width_in_supertiles = 156 tiling->frame_width_in_supertiles; 157 config.total_frame_height_in_supertiles = 158 tiling->frame_height_in_supertiles; 159 } 160 161 /* Implement GFXH-1742 workaround. Also, if we are clearing we have to do 162 * it here. 163 */ 164 for (int i = 0; i < 2; i++) { 165 cl_emit(rcl, TILE_COORDINATES, coords); 166 cl_emit(rcl, END_OF_LOADS, end); 167 cl_emit(rcl, STORE_TILE_BUFFER_GENERAL, store) { 168 store.buffer_to_store = NONE; 169 } 170 if (clear_value && i == 0) { 171 cl_emit(rcl, CLEAR_TILE_BUFFERS, clear) { 172 clear.clear_z_stencil_buffer = true; 173 clear.clear_all_render_targets = true; 174 } 175 } 176 cl_emit(rcl, END_OF_TILE_MARKER, end); 177 } 178 179 cl_emit(rcl, FLUSH_VCD_CACHE, flush); 180} 181 182static void 183emit_supertile_coordinates(struct v3dv_job *job, 184 struct v3dv_meta_framebuffer *framebuffer) 185{ 186 v3dv_return_if_oom(NULL, job); 187 188 struct v3dv_cl *rcl = &job->rcl; 189 190 const uint32_t min_y = framebuffer->min_y_supertile; 191 const uint32_t max_y = framebuffer->max_y_supertile; 192 const uint32_t min_x = framebuffer->min_x_supertile; 193 const uint32_t max_x = framebuffer->max_x_supertile; 194 195 for (int y = min_y; y <= max_y; y++) { 196 for (int x = min_x; x <= max_x; x++) { 197 cl_emit(rcl, SUPERTILE_COORDINATES, coords) { 198 coords.column_number_in_supertiles = x; 199 coords.row_number_in_supertiles = y; 200 } 201 } 202 } 203} 204 205static void 206emit_linear_load(struct v3dv_cl *cl, 207 uint32_t buffer, 208 struct v3dv_bo *bo, 209 uint32_t offset, 210 uint32_t stride, 211 uint32_t format) 212{ 213 cl_emit(cl, LOAD_TILE_BUFFER_GENERAL, load) { 214 load.buffer_to_load = buffer; 215 load.address = v3dv_cl_address(bo, offset); 216 load.input_image_format = format; 217 load.memory_format = V3D_TILING_RASTER; 218 load.height_in_ub_or_stride = stride; 219 load.decimate_mode = V3D_DECIMATE_MODE_SAMPLE_0; 220 } 221} 222 223static void 224emit_linear_store(struct v3dv_cl *cl, 225 uint32_t buffer, 226 struct v3dv_bo *bo, 227 uint32_t offset, 228 uint32_t stride, 229 bool msaa, 230 uint32_t format) 231{ 232 cl_emit(cl, STORE_TILE_BUFFER_GENERAL, store) { 233 store.buffer_to_store = RENDER_TARGET_0; 234 store.address = v3dv_cl_address(bo, offset); 235 store.clear_buffer_being_stored = false; 236 store.output_image_format = format; 237 store.memory_format = V3D_TILING_RASTER; 238 store.height_in_ub_or_stride = stride; 239 store.decimate_mode = msaa ? V3D_DECIMATE_MODE_ALL_SAMPLES : 240 V3D_DECIMATE_MODE_SAMPLE_0; 241 } 242} 243 244/* This chooses a tile buffer format that is appropriate for the copy operation. 245 * Typically, this is the image render target type, however, if we are copying 246 * depth/stencil to/from a buffer the hardware can't do raster loads/stores, so 247 * we need to load and store to/from a tile color buffer using a compatible 248 * color format. 249 */ 250static uint32_t 251choose_tlb_format(struct v3dv_meta_framebuffer *framebuffer, 252 VkImageAspectFlags aspect, 253 bool for_store, 254 bool is_copy_to_buffer, 255 bool is_copy_from_buffer) 256{ 257 if (is_copy_to_buffer || is_copy_from_buffer) { 258 switch (framebuffer->vk_format) { 259 case VK_FORMAT_D16_UNORM: 260 return V3D_OUTPUT_IMAGE_FORMAT_R16UI; 261 case VK_FORMAT_D32_SFLOAT: 262 return V3D_OUTPUT_IMAGE_FORMAT_R32F; 263 case VK_FORMAT_X8_D24_UNORM_PACK32: 264 return V3D_OUTPUT_IMAGE_FORMAT_RGBA8UI; 265 case VK_FORMAT_D24_UNORM_S8_UINT: 266 /* When storing the stencil aspect of a combined depth/stencil image 267 * to a buffer, the Vulkan spec states that the output buffer must 268 * have packed stencil values, so we choose an R8UI format for our 269 * store outputs. For the load input we still want RGBA8UI since the 270 * source image contains 4 channels (including the 3 channels 271 * containing the 24-bit depth value). 272 * 273 * When loading the stencil aspect of a combined depth/stencil image 274 * from a buffer, we read packed 8-bit stencil values from the buffer 275 * that we need to put into the LSB of the 32-bit format (the R 276 * channel), so we use R8UI. For the store, if we used R8UI then we 277 * would write 8-bit stencil values consecutively over depth channels, 278 * so we need to use RGBA8UI. This will write each stencil value in 279 * its correct position, but will overwrite depth values (channels G 280 * B,A) with undefined values. To fix this, we will have to restore 281 * the depth aspect from the Z tile buffer, which we should pre-load 282 * from the image before the store). 283 */ 284 if (aspect & VK_IMAGE_ASPECT_DEPTH_BIT) { 285 return V3D_OUTPUT_IMAGE_FORMAT_RGBA8UI; 286 } else { 287 assert(aspect & VK_IMAGE_ASPECT_STENCIL_BIT); 288 if (is_copy_to_buffer) { 289 return for_store ? V3D_OUTPUT_IMAGE_FORMAT_R8UI : 290 V3D_OUTPUT_IMAGE_FORMAT_RGBA8UI; 291 } else { 292 assert(is_copy_from_buffer); 293 return for_store ? V3D_OUTPUT_IMAGE_FORMAT_RGBA8UI : 294 V3D_OUTPUT_IMAGE_FORMAT_R8UI; 295 } 296 } 297 default: /* Color formats */ 298 return framebuffer->format->rt_type; 299 break; 300 } 301 } else { 302 return framebuffer->format->rt_type; 303 } 304} 305 306static inline bool 307format_needs_rb_swap(struct v3dv_device *device, 308 VkFormat format) 309{ 310 const uint8_t *swizzle = v3dv_get_format_swizzle(device, format); 311 return swizzle[0] == PIPE_SWIZZLE_Z; 312} 313 314static void 315emit_image_load(struct v3dv_device *device, 316 struct v3dv_cl *cl, 317 struct v3dv_meta_framebuffer *framebuffer, 318 struct v3dv_image *image, 319 VkImageAspectFlags aspect, 320 uint32_t layer, 321 uint32_t mip_level, 322 bool is_copy_to_buffer, 323 bool is_copy_from_buffer) 324{ 325 uint32_t layer_offset = v3dv_layer_offset(image, mip_level, layer); 326 327 /* For image to/from buffer copies we always load to and store from RT0, 328 * even for depth/stencil aspects, because the hardware can't do raster 329 * stores or loads from/to the depth/stencil tile buffers. 330 */ 331 bool load_to_color_tlb = is_copy_to_buffer || is_copy_from_buffer || 332 aspect == VK_IMAGE_ASPECT_COLOR_BIT; 333 334 const struct v3d_resource_slice *slice = &image->slices[mip_level]; 335 cl_emit(cl, LOAD_TILE_BUFFER_GENERAL, load) { 336 load.buffer_to_load = load_to_color_tlb ? 337 RENDER_TARGET_0 : v3dX(zs_buffer_from_aspect_bits)(aspect); 338 339 load.address = v3dv_cl_address(image->mem->bo, layer_offset); 340 341 load.input_image_format = choose_tlb_format(framebuffer, aspect, false, 342 is_copy_to_buffer, 343 is_copy_from_buffer); 344 load.memory_format = slice->tiling; 345 346 /* When copying depth/stencil images to a buffer, for D24 formats Vulkan 347 * expects the depth value in the LSB bits of each 32-bit pixel. 348 * Unfortunately, the hardware seems to put the S8/X8 bits there and the 349 * depth bits on the MSB. To work around that we can reverse the channel 350 * order and then swap the R/B channels to get what we want. 351 * 352 * NOTE: reversing and swapping only gets us the behavior we want if the 353 * operations happen in that exact order, which seems to be the case when 354 * done on the tile buffer load operations. On the store, it seems the 355 * order is not the same. The order on the store is probably reversed so 356 * that reversing and swapping on both the load and the store preserves 357 * the original order of the channels in memory. 358 * 359 * Notice that we only need to do this when copying to a buffer, where 360 * depth and stencil aspects are copied as separate regions and 361 * the spec expects them to be tightly packed. 362 */ 363 bool needs_rb_swap = false; 364 bool needs_chan_reverse = false; 365 if (is_copy_to_buffer && 366 (framebuffer->vk_format == VK_FORMAT_X8_D24_UNORM_PACK32 || 367 (framebuffer->vk_format == VK_FORMAT_D24_UNORM_S8_UINT && 368 (aspect & VK_IMAGE_ASPECT_DEPTH_BIT)))) { 369 needs_rb_swap = true; 370 needs_chan_reverse = true; 371 } else if (!is_copy_from_buffer && !is_copy_to_buffer && 372 (aspect & VK_IMAGE_ASPECT_COLOR_BIT)) { 373 /* This is not a raw data copy (i.e. we are clearing the image), 374 * so we need to make sure we respect the format swizzle. 375 */ 376 needs_rb_swap = format_needs_rb_swap(device, framebuffer->vk_format); 377 } 378 379 load.r_b_swap = needs_rb_swap; 380 load.channel_reverse = needs_chan_reverse; 381 382 if (slice->tiling == V3D_TILING_UIF_NO_XOR || 383 slice->tiling == V3D_TILING_UIF_XOR) { 384 load.height_in_ub_or_stride = 385 slice->padded_height_of_output_image_in_uif_blocks; 386 } else if (slice->tiling == V3D_TILING_RASTER) { 387 load.height_in_ub_or_stride = slice->stride; 388 } 389 390 if (image->vk.samples > VK_SAMPLE_COUNT_1_BIT) 391 load.decimate_mode = V3D_DECIMATE_MODE_ALL_SAMPLES; 392 else 393 load.decimate_mode = V3D_DECIMATE_MODE_SAMPLE_0; 394 } 395} 396 397static void 398emit_image_store(struct v3dv_device *device, 399 struct v3dv_cl *cl, 400 struct v3dv_meta_framebuffer *framebuffer, 401 struct v3dv_image *image, 402 VkImageAspectFlags aspect, 403 uint32_t layer, 404 uint32_t mip_level, 405 bool is_copy_to_buffer, 406 bool is_copy_from_buffer, 407 bool is_multisample_resolve) 408{ 409 uint32_t layer_offset = v3dv_layer_offset(image, mip_level, layer); 410 411 bool store_from_color_tlb = is_copy_to_buffer || is_copy_from_buffer || 412 aspect == VK_IMAGE_ASPECT_COLOR_BIT; 413 414 const struct v3d_resource_slice *slice = &image->slices[mip_level]; 415 cl_emit(cl, STORE_TILE_BUFFER_GENERAL, store) { 416 store.buffer_to_store = store_from_color_tlb ? 417 RENDER_TARGET_0 : v3dX(zs_buffer_from_aspect_bits)(aspect); 418 419 store.address = v3dv_cl_address(image->mem->bo, layer_offset); 420 store.clear_buffer_being_stored = false; 421 422 /* See rationale in emit_image_load() */ 423 bool needs_rb_swap = false; 424 bool needs_chan_reverse = false; 425 if (is_copy_from_buffer && 426 (framebuffer->vk_format == VK_FORMAT_X8_D24_UNORM_PACK32 || 427 (framebuffer->vk_format == VK_FORMAT_D24_UNORM_S8_UINT && 428 (aspect & VK_IMAGE_ASPECT_DEPTH_BIT)))) { 429 needs_rb_swap = true; 430 needs_chan_reverse = true; 431 } else if (!is_copy_from_buffer && !is_copy_to_buffer && 432 (aspect & VK_IMAGE_ASPECT_COLOR_BIT)) { 433 needs_rb_swap = format_needs_rb_swap(device, framebuffer->vk_format); 434 } 435 436 store.r_b_swap = needs_rb_swap; 437 store.channel_reverse = needs_chan_reverse; 438 439 store.output_image_format = choose_tlb_format(framebuffer, aspect, true, 440 is_copy_to_buffer, 441 is_copy_from_buffer); 442 store.memory_format = slice->tiling; 443 if (slice->tiling == V3D_TILING_UIF_NO_XOR || 444 slice->tiling == V3D_TILING_UIF_XOR) { 445 store.height_in_ub_or_stride = 446 slice->padded_height_of_output_image_in_uif_blocks; 447 } else if (slice->tiling == V3D_TILING_RASTER) { 448 store.height_in_ub_or_stride = slice->stride; 449 } 450 451 if (image->vk.samples > VK_SAMPLE_COUNT_1_BIT) 452 store.decimate_mode = V3D_DECIMATE_MODE_ALL_SAMPLES; 453 else if (is_multisample_resolve) 454 store.decimate_mode = V3D_DECIMATE_MODE_4X; 455 else 456 store.decimate_mode = V3D_DECIMATE_MODE_SAMPLE_0; 457 } 458} 459 460static void 461emit_copy_layer_to_buffer_per_tile_list(struct v3dv_job *job, 462 struct v3dv_meta_framebuffer *framebuffer, 463 struct v3dv_buffer *buffer, 464 struct v3dv_image *image, 465 uint32_t layer_offset, 466 const VkBufferImageCopy2KHR *region) 467{ 468 struct v3dv_cl *cl = &job->indirect; 469 v3dv_cl_ensure_space(cl, 200, 1); 470 v3dv_return_if_oom(NULL, job); 471 472 struct v3dv_cl_reloc tile_list_start = v3dv_cl_get_address(cl); 473 474 cl_emit(cl, TILE_COORDINATES_IMPLICIT, coords); 475 476 /* Load image to TLB */ 477 assert((image->vk.image_type != VK_IMAGE_TYPE_3D && 478 layer_offset < region->imageSubresource.layerCount) || 479 layer_offset < image->vk.extent.depth); 480 481 const uint32_t image_layer = image->vk.image_type != VK_IMAGE_TYPE_3D ? 482 region->imageSubresource.baseArrayLayer + layer_offset : 483 region->imageOffset.z + layer_offset; 484 485 emit_image_load(job->device, cl, framebuffer, image, 486 region->imageSubresource.aspectMask, 487 image_layer, 488 region->imageSubresource.mipLevel, 489 true, false); 490 491 cl_emit(cl, END_OF_LOADS, end); 492 493 cl_emit(cl, BRANCH_TO_IMPLICIT_TILE_LIST, branch); 494 495 /* Store TLB to buffer */ 496 uint32_t width, height; 497 if (region->bufferRowLength == 0) 498 width = region->imageExtent.width; 499 else 500 width = region->bufferRowLength; 501 502 if (region->bufferImageHeight == 0) 503 height = region->imageExtent.height; 504 else 505 height = region->bufferImageHeight; 506 507 /* Handle copy from compressed format */ 508 width = DIV_ROUND_UP(width, vk_format_get_blockwidth(image->vk.format)); 509 height = DIV_ROUND_UP(height, vk_format_get_blockheight(image->vk.format)); 510 511 /* If we are storing stencil from a combined depth/stencil format the 512 * Vulkan spec states that the output buffer must have packed stencil 513 * values, where each stencil value is 1 byte. 514 */ 515 uint32_t cpp = 516 region->imageSubresource.aspectMask & VK_IMAGE_ASPECT_STENCIL_BIT ? 517 1 : image->cpp; 518 uint32_t buffer_stride = width * cpp; 519 uint32_t buffer_offset = buffer->mem_offset + region->bufferOffset + 520 height * buffer_stride * layer_offset; 521 522 uint32_t format = choose_tlb_format(framebuffer, 523 region->imageSubresource.aspectMask, 524 true, true, false); 525 bool msaa = image->vk.samples > VK_SAMPLE_COUNT_1_BIT; 526 527 emit_linear_store(cl, RENDER_TARGET_0, buffer->mem->bo, 528 buffer_offset, buffer_stride, msaa, format); 529 530 cl_emit(cl, END_OF_TILE_MARKER, end); 531 532 cl_emit(cl, RETURN_FROM_SUB_LIST, ret); 533 534 cl_emit(&job->rcl, START_ADDRESS_OF_GENERIC_TILE_LIST, branch) { 535 branch.start = tile_list_start; 536 branch.end = v3dv_cl_get_address(cl); 537 } 538} 539 540static void 541emit_copy_layer_to_buffer(struct v3dv_job *job, 542 struct v3dv_buffer *buffer, 543 struct v3dv_image *image, 544 struct v3dv_meta_framebuffer *framebuffer, 545 uint32_t layer, 546 const VkBufferImageCopy2KHR *region) 547{ 548 emit_copy_layer_to_buffer_per_tile_list(job, framebuffer, buffer, 549 image, layer, region); 550 emit_supertile_coordinates(job, framebuffer); 551} 552 553void 554v3dX(meta_emit_copy_image_to_buffer_rcl)(struct v3dv_job *job, 555 struct v3dv_buffer *buffer, 556 struct v3dv_image *image, 557 struct v3dv_meta_framebuffer *framebuffer, 558 const VkBufferImageCopy2KHR *region) 559{ 560 struct v3dv_cl *rcl = emit_rcl_prologue(job, framebuffer, NULL); 561 v3dv_return_if_oom(NULL, job); 562 563 emit_frame_setup(job, 0, NULL); 564 for (int layer = 0; layer < job->frame_tiling.layers; layer++) 565 emit_copy_layer_to_buffer(job, buffer, image, framebuffer, layer, region); 566 cl_emit(rcl, END_OF_RENDERING, end); 567} 568 569static void 570emit_resolve_image_layer_per_tile_list(struct v3dv_job *job, 571 struct v3dv_meta_framebuffer *framebuffer, 572 struct v3dv_image *dst, 573 struct v3dv_image *src, 574 uint32_t layer_offset, 575 const VkImageResolve2KHR *region) 576{ 577 struct v3dv_cl *cl = &job->indirect; 578 v3dv_cl_ensure_space(cl, 200, 1); 579 v3dv_return_if_oom(NULL, job); 580 581 struct v3dv_cl_reloc tile_list_start = v3dv_cl_get_address(cl); 582 583 cl_emit(cl, TILE_COORDINATES_IMPLICIT, coords); 584 585 assert((src->vk.image_type != VK_IMAGE_TYPE_3D && 586 layer_offset < region->srcSubresource.layerCount) || 587 layer_offset < src->vk.extent.depth); 588 589 const uint32_t src_layer = src->vk.image_type != VK_IMAGE_TYPE_3D ? 590 region->srcSubresource.baseArrayLayer + layer_offset : 591 region->srcOffset.z + layer_offset; 592 593 emit_image_load(job->device, cl, framebuffer, src, 594 region->srcSubresource.aspectMask, 595 src_layer, 596 region->srcSubresource.mipLevel, 597 false, false); 598 599 cl_emit(cl, END_OF_LOADS, end); 600 601 cl_emit(cl, BRANCH_TO_IMPLICIT_TILE_LIST, branch); 602 603 assert((dst->vk.image_type != VK_IMAGE_TYPE_3D && 604 layer_offset < region->dstSubresource.layerCount) || 605 layer_offset < dst->vk.extent.depth); 606 607 const uint32_t dst_layer = dst->vk.image_type != VK_IMAGE_TYPE_3D ? 608 region->dstSubresource.baseArrayLayer + layer_offset : 609 region->dstOffset.z + layer_offset; 610 611 emit_image_store(job->device, cl, framebuffer, dst, 612 region->dstSubresource.aspectMask, 613 dst_layer, 614 region->dstSubresource.mipLevel, 615 false, false, true); 616 617 cl_emit(cl, END_OF_TILE_MARKER, end); 618 619 cl_emit(cl, RETURN_FROM_SUB_LIST, ret); 620 621 cl_emit(&job->rcl, START_ADDRESS_OF_GENERIC_TILE_LIST, branch) { 622 branch.start = tile_list_start; 623 branch.end = v3dv_cl_get_address(cl); 624 } 625} 626 627static void 628emit_resolve_image_layer(struct v3dv_job *job, 629 struct v3dv_image *dst, 630 struct v3dv_image *src, 631 struct v3dv_meta_framebuffer *framebuffer, 632 uint32_t layer, 633 const VkImageResolve2KHR *region) 634{ 635 emit_resolve_image_layer_per_tile_list(job, framebuffer, 636 dst, src, layer, region); 637 emit_supertile_coordinates(job, framebuffer); 638} 639 640void 641v3dX(meta_emit_resolve_image_rcl)(struct v3dv_job *job, 642 struct v3dv_image *dst, 643 struct v3dv_image *src, 644 struct v3dv_meta_framebuffer *framebuffer, 645 const VkImageResolve2KHR *region) 646{ 647 struct v3dv_cl *rcl = emit_rcl_prologue(job, framebuffer, NULL); 648 v3dv_return_if_oom(NULL, job); 649 650 emit_frame_setup(job, 0, NULL); 651 for (int layer = 0; layer < job->frame_tiling.layers; layer++) 652 emit_resolve_image_layer(job, dst, src, framebuffer, layer, region); 653 cl_emit(rcl, END_OF_RENDERING, end); 654} 655 656static void 657emit_copy_buffer_per_tile_list(struct v3dv_job *job, 658 struct v3dv_bo *dst, 659 struct v3dv_bo *src, 660 uint32_t dst_offset, 661 uint32_t src_offset, 662 uint32_t stride, 663 uint32_t format) 664{ 665 struct v3dv_cl *cl = &job->indirect; 666 v3dv_cl_ensure_space(cl, 200, 1); 667 v3dv_return_if_oom(NULL, job); 668 669 struct v3dv_cl_reloc tile_list_start = v3dv_cl_get_address(cl); 670 671 cl_emit(cl, TILE_COORDINATES_IMPLICIT, coords); 672 673 emit_linear_load(cl, RENDER_TARGET_0, src, src_offset, stride, format); 674 675 cl_emit(cl, END_OF_LOADS, end); 676 677 cl_emit(cl, BRANCH_TO_IMPLICIT_TILE_LIST, branch); 678 679 emit_linear_store(cl, RENDER_TARGET_0, 680 dst, dst_offset, stride, false, format); 681 682 cl_emit(cl, END_OF_TILE_MARKER, end); 683 684 cl_emit(cl, RETURN_FROM_SUB_LIST, ret); 685 686 cl_emit(&job->rcl, START_ADDRESS_OF_GENERIC_TILE_LIST, branch) { 687 branch.start = tile_list_start; 688 branch.end = v3dv_cl_get_address(cl); 689 } 690} 691 692void 693v3dX(meta_emit_copy_buffer)(struct v3dv_job *job, 694 struct v3dv_bo *dst, 695 struct v3dv_bo *src, 696 uint32_t dst_offset, 697 uint32_t src_offset, 698 struct v3dv_meta_framebuffer *framebuffer, 699 uint32_t format, 700 uint32_t item_size) 701{ 702 const uint32_t stride = job->frame_tiling.width * item_size; 703 emit_copy_buffer_per_tile_list(job, dst, src, 704 dst_offset, src_offset, 705 stride, format); 706 emit_supertile_coordinates(job, framebuffer); 707} 708 709void 710v3dX(meta_emit_copy_buffer_rcl)(struct v3dv_job *job, 711 struct v3dv_bo *dst, 712 struct v3dv_bo *src, 713 uint32_t dst_offset, 714 uint32_t src_offset, 715 struct v3dv_meta_framebuffer *framebuffer, 716 uint32_t format, 717 uint32_t item_size) 718{ 719 struct v3dv_cl *rcl = emit_rcl_prologue(job, framebuffer, NULL); 720 v3dv_return_if_oom(NULL, job); 721 722 emit_frame_setup(job, 0, NULL); 723 724 v3dX(meta_emit_copy_buffer)(job, dst, src, dst_offset, src_offset, 725 framebuffer, format, item_size); 726 727 cl_emit(rcl, END_OF_RENDERING, end); 728} 729 730static void 731emit_copy_image_layer_per_tile_list(struct v3dv_job *job, 732 struct v3dv_meta_framebuffer *framebuffer, 733 struct v3dv_image *dst, 734 struct v3dv_image *src, 735 uint32_t layer_offset, 736 const VkImageCopy2KHR *region) 737{ 738 struct v3dv_cl *cl = &job->indirect; 739 v3dv_cl_ensure_space(cl, 200, 1); 740 v3dv_return_if_oom(NULL, job); 741 742 struct v3dv_cl_reloc tile_list_start = v3dv_cl_get_address(cl); 743 744 cl_emit(cl, TILE_COORDINATES_IMPLICIT, coords); 745 746 assert((src->vk.image_type != VK_IMAGE_TYPE_3D && 747 layer_offset < region->srcSubresource.layerCount) || 748 layer_offset < src->vk.extent.depth); 749 750 const uint32_t src_layer = src->vk.image_type != VK_IMAGE_TYPE_3D ? 751 region->srcSubresource.baseArrayLayer + layer_offset : 752 region->srcOffset.z + layer_offset; 753 754 emit_image_load(job->device, cl, framebuffer, src, 755 region->srcSubresource.aspectMask, 756 src_layer, 757 region->srcSubresource.mipLevel, 758 false, false); 759 760 cl_emit(cl, END_OF_LOADS, end); 761 762 cl_emit(cl, BRANCH_TO_IMPLICIT_TILE_LIST, branch); 763 764 assert((dst->vk.image_type != VK_IMAGE_TYPE_3D && 765 layer_offset < region->dstSubresource.layerCount) || 766 layer_offset < dst->vk.extent.depth); 767 768 const uint32_t dst_layer = dst->vk.image_type != VK_IMAGE_TYPE_3D ? 769 region->dstSubresource.baseArrayLayer + layer_offset : 770 region->dstOffset.z + layer_offset; 771 772 emit_image_store(job->device, cl, framebuffer, dst, 773 region->dstSubresource.aspectMask, 774 dst_layer, 775 region->dstSubresource.mipLevel, 776 false, false, false); 777 778 cl_emit(cl, END_OF_TILE_MARKER, end); 779 780 cl_emit(cl, RETURN_FROM_SUB_LIST, ret); 781 782 cl_emit(&job->rcl, START_ADDRESS_OF_GENERIC_TILE_LIST, branch) { 783 branch.start = tile_list_start; 784 branch.end = v3dv_cl_get_address(cl); 785 } 786} 787 788static void 789emit_copy_image_layer(struct v3dv_job *job, 790 struct v3dv_image *dst, 791 struct v3dv_image *src, 792 struct v3dv_meta_framebuffer *framebuffer, 793 uint32_t layer, 794 const VkImageCopy2KHR *region) 795{ 796 emit_copy_image_layer_per_tile_list(job, framebuffer, dst, src, layer, region); 797 emit_supertile_coordinates(job, framebuffer); 798} 799 800void 801v3dX(meta_emit_copy_image_rcl)(struct v3dv_job *job, 802 struct v3dv_image *dst, 803 struct v3dv_image *src, 804 struct v3dv_meta_framebuffer *framebuffer, 805 const VkImageCopy2KHR *region) 806{ 807 struct v3dv_cl *rcl = emit_rcl_prologue(job, framebuffer, NULL); 808 v3dv_return_if_oom(NULL, job); 809 810 emit_frame_setup(job, 0, NULL); 811 for (int layer = 0; layer < job->frame_tiling.layers; layer++) 812 emit_copy_image_layer(job, dst, src, framebuffer, layer, region); 813 cl_emit(rcl, END_OF_RENDERING, end); 814} 815 816void 817v3dX(meta_emit_tfu_job)(struct v3dv_cmd_buffer *cmd_buffer, 818 struct v3dv_image *dst, 819 uint32_t dst_mip_level, 820 uint32_t dst_layer, 821 struct v3dv_image *src, 822 uint32_t src_mip_level, 823 uint32_t src_layer, 824 uint32_t width, 825 uint32_t height, 826 const struct v3dv_format *format) 827{ 828 const struct v3d_resource_slice *src_slice = &src->slices[src_mip_level]; 829 const struct v3d_resource_slice *dst_slice = &dst->slices[dst_mip_level]; 830 831 assert(dst->mem && dst->mem->bo); 832 const struct v3dv_bo *dst_bo = dst->mem->bo; 833 834 assert(src->mem && src->mem->bo); 835 const struct v3dv_bo *src_bo = src->mem->bo; 836 837 struct drm_v3d_submit_tfu tfu = { 838 .ios = (height << 16) | width, 839 .bo_handles = { 840 dst_bo->handle, 841 src_bo->handle != dst_bo->handle ? src_bo->handle : 0 842 }, 843 }; 844 845 const uint32_t src_offset = 846 src_bo->offset + v3dv_layer_offset(src, src_mip_level, src_layer); 847 tfu.iia |= src_offset; 848 849 uint32_t icfg; 850 if (src_slice->tiling == V3D_TILING_RASTER) { 851 icfg = V3D_TFU_ICFG_FORMAT_RASTER; 852 } else { 853 icfg = V3D_TFU_ICFG_FORMAT_LINEARTILE + 854 (src_slice->tiling - V3D_TILING_LINEARTILE); 855 } 856 tfu.icfg |= icfg << V3D_TFU_ICFG_FORMAT_SHIFT; 857 858 const uint32_t dst_offset = 859 dst_bo->offset + v3dv_layer_offset(dst, dst_mip_level, dst_layer); 860 tfu.ioa |= dst_offset; 861 862 tfu.ioa |= (V3D_TFU_IOA_FORMAT_LINEARTILE + 863 (dst_slice->tiling - V3D_TILING_LINEARTILE)) << 864 V3D_TFU_IOA_FORMAT_SHIFT; 865 tfu.icfg |= format->tex_type << V3D_TFU_ICFG_TTYPE_SHIFT; 866 867 switch (src_slice->tiling) { 868 case V3D_TILING_UIF_NO_XOR: 869 case V3D_TILING_UIF_XOR: 870 tfu.iis |= src_slice->padded_height / (2 * v3d_utile_height(src->cpp)); 871 break; 872 case V3D_TILING_RASTER: 873 tfu.iis |= src_slice->stride / src->cpp; 874 break; 875 default: 876 break; 877 } 878 879 /* If we're writing level 0 (!IOA_DIMTW), then we need to supply the 880 * OPAD field for the destination (how many extra UIF blocks beyond 881 * those necessary to cover the height). 882 */ 883 if (dst_slice->tiling == V3D_TILING_UIF_NO_XOR || 884 dst_slice->tiling == V3D_TILING_UIF_XOR) { 885 uint32_t uif_block_h = 2 * v3d_utile_height(dst->cpp); 886 uint32_t implicit_padded_height = align(height, uif_block_h); 887 uint32_t icfg = 888 (dst_slice->padded_height - implicit_padded_height) / uif_block_h; 889 tfu.icfg |= icfg << V3D_TFU_ICFG_OPAD_SHIFT; 890 } 891 892 v3dv_cmd_buffer_add_tfu_job(cmd_buffer, &tfu); 893} 894 895static void 896emit_clear_image_layer_per_tile_list(struct v3dv_job *job, 897 struct v3dv_meta_framebuffer *framebuffer, 898 struct v3dv_image *image, 899 VkImageAspectFlags aspects, 900 uint32_t layer, 901 uint32_t level) 902{ 903 struct v3dv_cl *cl = &job->indirect; 904 v3dv_cl_ensure_space(cl, 200, 1); 905 v3dv_return_if_oom(NULL, job); 906 907 struct v3dv_cl_reloc tile_list_start = v3dv_cl_get_address(cl); 908 909 cl_emit(cl, TILE_COORDINATES_IMPLICIT, coords); 910 911 cl_emit(cl, END_OF_LOADS, end); 912 913 cl_emit(cl, BRANCH_TO_IMPLICIT_TILE_LIST, branch); 914 915 emit_image_store(job->device, cl, framebuffer, image, aspects, 916 layer, level, false, false, false); 917 918 cl_emit(cl, END_OF_TILE_MARKER, end); 919 920 cl_emit(cl, RETURN_FROM_SUB_LIST, ret); 921 922 cl_emit(&job->rcl, START_ADDRESS_OF_GENERIC_TILE_LIST, branch) { 923 branch.start = tile_list_start; 924 branch.end = v3dv_cl_get_address(cl); 925 } 926} 927 928static void 929emit_clear_image_layers(struct v3dv_job *job, 930 struct v3dv_image *image, 931 struct v3dv_meta_framebuffer *framebuffer, 932 VkImageAspectFlags aspects, 933 uint32_t min_layer, 934 uint32_t max_layer, 935 uint32_t level) 936{ 937 for (uint32_t layer = min_layer; layer < max_layer; layer++) { 938 emit_clear_image_layer_per_tile_list(job, framebuffer, image, aspects, 939 layer, level); 940 emit_supertile_coordinates(job, framebuffer); 941 } 942} 943 944void 945v3dX(meta_emit_clear_image_rcl)(struct v3dv_job *job, 946 struct v3dv_image *image, 947 struct v3dv_meta_framebuffer *framebuffer, 948 const union v3dv_clear_value *clear_value, 949 VkImageAspectFlags aspects, 950 uint32_t min_layer, 951 uint32_t max_layer, 952 uint32_t level) 953{ 954 const struct rcl_clear_info clear_info = { 955 .clear_value = clear_value, 956 .image = image, 957 .aspects = aspects, 958 .level = level, 959 }; 960 961 struct v3dv_cl *rcl = emit_rcl_prologue(job, framebuffer, &clear_info); 962 v3dv_return_if_oom(NULL, job); 963 964 emit_frame_setup(job, 0, clear_value); 965 emit_clear_image_layers(job, image, framebuffer, aspects, 966 min_layer, max_layer, level); 967 cl_emit(rcl, END_OF_RENDERING, end); 968} 969 970static void 971emit_fill_buffer_per_tile_list(struct v3dv_job *job, 972 struct v3dv_bo *bo, 973 uint32_t offset, 974 uint32_t stride) 975{ 976 struct v3dv_cl *cl = &job->indirect; 977 v3dv_cl_ensure_space(cl, 200, 1); 978 v3dv_return_if_oom(NULL, job); 979 980 struct v3dv_cl_reloc tile_list_start = v3dv_cl_get_address(cl); 981 982 cl_emit(cl, TILE_COORDINATES_IMPLICIT, coords); 983 984 cl_emit(cl, END_OF_LOADS, end); 985 986 cl_emit(cl, BRANCH_TO_IMPLICIT_TILE_LIST, branch); 987 988 emit_linear_store(cl, RENDER_TARGET_0, bo, offset, stride, false, 989 V3D_OUTPUT_IMAGE_FORMAT_RGBA8UI); 990 991 cl_emit(cl, END_OF_TILE_MARKER, end); 992 993 cl_emit(cl, RETURN_FROM_SUB_LIST, ret); 994 995 cl_emit(&job->rcl, START_ADDRESS_OF_GENERIC_TILE_LIST, branch) { 996 branch.start = tile_list_start; 997 branch.end = v3dv_cl_get_address(cl); 998 } 999} 1000 1001static void 1002emit_fill_buffer(struct v3dv_job *job, 1003 struct v3dv_bo *bo, 1004 uint32_t offset, 1005 struct v3dv_meta_framebuffer *framebuffer) 1006{ 1007 const uint32_t stride = job->frame_tiling.width * 4; 1008 emit_fill_buffer_per_tile_list(job, bo, offset, stride); 1009 emit_supertile_coordinates(job, framebuffer); 1010} 1011 1012void 1013v3dX(meta_emit_fill_buffer_rcl)(struct v3dv_job *job, 1014 struct v3dv_bo *bo, 1015 uint32_t offset, 1016 struct v3dv_meta_framebuffer *framebuffer, 1017 uint32_t data) 1018{ 1019 const union v3dv_clear_value clear_value = { 1020 .color = { data, 0, 0, 0 }, 1021 }; 1022 1023 const struct rcl_clear_info clear_info = { 1024 .clear_value = &clear_value, 1025 .image = NULL, 1026 .aspects = VK_IMAGE_ASPECT_COLOR_BIT, 1027 .level = 0, 1028 }; 1029 1030 struct v3dv_cl *rcl = emit_rcl_prologue(job, framebuffer, &clear_info); 1031 v3dv_return_if_oom(NULL, job); 1032 1033 emit_frame_setup(job, 0, &clear_value); 1034 emit_fill_buffer(job, bo, offset, framebuffer); 1035 cl_emit(rcl, END_OF_RENDERING, end); 1036} 1037 1038 1039static void 1040emit_copy_buffer_to_layer_per_tile_list(struct v3dv_job *job, 1041 struct v3dv_meta_framebuffer *framebuffer, 1042 struct v3dv_image *image, 1043 struct v3dv_buffer *buffer, 1044 uint32_t layer, 1045 const VkBufferImageCopy2KHR *region) 1046{ 1047 struct v3dv_cl *cl = &job->indirect; 1048 v3dv_cl_ensure_space(cl, 200, 1); 1049 v3dv_return_if_oom(NULL, job); 1050 1051 struct v3dv_cl_reloc tile_list_start = v3dv_cl_get_address(cl); 1052 1053 cl_emit(cl, TILE_COORDINATES_IMPLICIT, coords); 1054 1055 const VkImageSubresourceLayers *imgrsc = ®ion->imageSubresource; 1056 assert((image->vk.image_type != VK_IMAGE_TYPE_3D && layer < imgrsc->layerCount) || 1057 layer < image->vk.extent.depth); 1058 1059 /* Load TLB from buffer */ 1060 uint32_t width, height; 1061 if (region->bufferRowLength == 0) 1062 width = region->imageExtent.width; 1063 else 1064 width = region->bufferRowLength; 1065 1066 if (region->bufferImageHeight == 0) 1067 height = region->imageExtent.height; 1068 else 1069 height = region->bufferImageHeight; 1070 1071 /* Handle copy to compressed format using a compatible format */ 1072 width = DIV_ROUND_UP(width, vk_format_get_blockwidth(image->vk.format)); 1073 height = DIV_ROUND_UP(height, vk_format_get_blockheight(image->vk.format)); 1074 1075 uint32_t cpp = imgrsc->aspectMask & VK_IMAGE_ASPECT_STENCIL_BIT ? 1076 1 : image->cpp; 1077 uint32_t buffer_stride = width * cpp; 1078 uint32_t buffer_offset = 1079 buffer->mem_offset + region->bufferOffset + height * buffer_stride * layer; 1080 1081 uint32_t format = choose_tlb_format(framebuffer, imgrsc->aspectMask, 1082 false, false, true); 1083 1084 emit_linear_load(cl, RENDER_TARGET_0, buffer->mem->bo, 1085 buffer_offset, buffer_stride, format); 1086 1087 /* Because we can't do raster loads/stores of Z/S formats we need to 1088 * use a color tile buffer with a compatible RGBA color format instead. 1089 * However, when we are uploading a single aspect to a combined 1090 * depth/stencil image we have the problem that our tile buffer stores don't 1091 * allow us to mask out the other aspect, so we always write all four RGBA 1092 * channels to the image and we end up overwriting that other aspect with 1093 * undefined values. To work around that, we first load the aspect we are 1094 * not copying from the image memory into a proper Z/S tile buffer. Then we 1095 * do our store from the color buffer for the aspect we are copying, and 1096 * after that, we do another store from the Z/S tile buffer to restore the 1097 * other aspect to its original value. 1098 */ 1099 if (framebuffer->vk_format == VK_FORMAT_D24_UNORM_S8_UINT) { 1100 if (imgrsc->aspectMask & VK_IMAGE_ASPECT_DEPTH_BIT) { 1101 emit_image_load(job->device, cl, framebuffer, image, 1102 VK_IMAGE_ASPECT_STENCIL_BIT, 1103 imgrsc->baseArrayLayer + layer, imgrsc->mipLevel, 1104 false, false); 1105 } else { 1106 assert(imgrsc->aspectMask & VK_IMAGE_ASPECT_STENCIL_BIT); 1107 emit_image_load(job->device, cl, framebuffer, image, 1108 VK_IMAGE_ASPECT_DEPTH_BIT, 1109 imgrsc->baseArrayLayer + layer, imgrsc->mipLevel, 1110 false, false); 1111 } 1112 } 1113 1114 cl_emit(cl, END_OF_LOADS, end); 1115 1116 cl_emit(cl, BRANCH_TO_IMPLICIT_TILE_LIST, branch); 1117 1118 /* Store TLB to image */ 1119 emit_image_store(job->device, cl, framebuffer, image, imgrsc->aspectMask, 1120 imgrsc->baseArrayLayer + layer, imgrsc->mipLevel, 1121 false, true, false); 1122 1123 if (framebuffer->vk_format == VK_FORMAT_D24_UNORM_S8_UINT) { 1124 if (imgrsc->aspectMask & VK_IMAGE_ASPECT_DEPTH_BIT) { 1125 emit_image_store(job->device, cl, framebuffer, image, 1126 VK_IMAGE_ASPECT_STENCIL_BIT, 1127 imgrsc->baseArrayLayer + layer, imgrsc->mipLevel, 1128 false, false, false); 1129 } else { 1130 assert(imgrsc->aspectMask & VK_IMAGE_ASPECT_STENCIL_BIT); 1131 emit_image_store(job->device, cl, framebuffer, image, 1132 VK_IMAGE_ASPECT_DEPTH_BIT, 1133 imgrsc->baseArrayLayer + layer, imgrsc->mipLevel, 1134 false, false, false); 1135 } 1136 } 1137 1138 cl_emit(cl, END_OF_TILE_MARKER, end); 1139 1140 cl_emit(cl, RETURN_FROM_SUB_LIST, ret); 1141 1142 cl_emit(&job->rcl, START_ADDRESS_OF_GENERIC_TILE_LIST, branch) { 1143 branch.start = tile_list_start; 1144 branch.end = v3dv_cl_get_address(cl); 1145 } 1146} 1147 1148static void 1149emit_copy_buffer_to_layer(struct v3dv_job *job, 1150 struct v3dv_image *image, 1151 struct v3dv_buffer *buffer, 1152 struct v3dv_meta_framebuffer *framebuffer, 1153 uint32_t layer, 1154 const VkBufferImageCopy2KHR *region) 1155{ 1156 emit_copy_buffer_to_layer_per_tile_list(job, framebuffer, image, buffer, 1157 layer, region); 1158 emit_supertile_coordinates(job, framebuffer); 1159} 1160 1161void 1162v3dX(meta_emit_copy_buffer_to_image_rcl)(struct v3dv_job *job, 1163 struct v3dv_image *image, 1164 struct v3dv_buffer *buffer, 1165 struct v3dv_meta_framebuffer *framebuffer, 1166 const VkBufferImageCopy2KHR *region) 1167{ 1168 struct v3dv_cl *rcl = emit_rcl_prologue(job, framebuffer, NULL); 1169 v3dv_return_if_oom(NULL, job); 1170 1171 emit_frame_setup(job, 0, NULL); 1172 for (int layer = 0; layer < job->frame_tiling.layers; layer++) 1173 emit_copy_buffer_to_layer(job, image, buffer, framebuffer, layer, region); 1174 cl_emit(rcl, END_OF_RENDERING, end); 1175} 1176 1177/* Figure out a TLB size configuration for a number of pixels to process. 1178 * Beware that we can't "render" more than 4096x4096 pixels in a single job, 1179 * if the pixel count is larger than this, the caller might need to split 1180 * the job and call this function multiple times. 1181 */ 1182static void 1183framebuffer_size_for_pixel_count(uint32_t num_pixels, 1184 uint32_t *width, 1185 uint32_t *height) 1186{ 1187 assert(num_pixels > 0); 1188 1189 const uint32_t max_dim_pixels = 4096; 1190 const uint32_t max_pixels = max_dim_pixels * max_dim_pixels; 1191 1192 uint32_t w, h; 1193 if (num_pixels > max_pixels) { 1194 w = max_dim_pixels; 1195 h = max_dim_pixels; 1196 } else { 1197 w = num_pixels; 1198 h = 1; 1199 while (w > max_dim_pixels || ((w % 2) == 0 && w > 2 * h)) { 1200 w >>= 1; 1201 h <<= 1; 1202 } 1203 } 1204 assert(w <= max_dim_pixels && h <= max_dim_pixels); 1205 assert(w * h <= num_pixels); 1206 assert(w > 0 && h > 0); 1207 1208 *width = w; 1209 *height = h; 1210} 1211 1212struct v3dv_job * 1213v3dX(meta_copy_buffer)(struct v3dv_cmd_buffer *cmd_buffer, 1214 struct v3dv_bo *dst, 1215 uint32_t dst_offset, 1216 struct v3dv_bo *src, 1217 uint32_t src_offset, 1218 const VkBufferCopy2KHR *region) 1219{ 1220 const uint32_t internal_bpp = V3D_INTERNAL_BPP_32; 1221 const uint32_t internal_type = V3D_INTERNAL_TYPE_8UI; 1222 1223 /* Select appropriate pixel format for the copy operation based on the 1224 * size to copy and the alignment of the source and destination offsets. 1225 */ 1226 src_offset += region->srcOffset; 1227 dst_offset += region->dstOffset; 1228 uint32_t item_size = 4; 1229 while (item_size > 1 && 1230 (src_offset % item_size != 0 || dst_offset % item_size != 0)) { 1231 item_size /= 2; 1232 } 1233 1234 while (item_size > 1 && region->size % item_size != 0) 1235 item_size /= 2; 1236 1237 assert(region->size % item_size == 0); 1238 uint32_t num_items = region->size / item_size; 1239 assert(num_items > 0); 1240 1241 uint32_t format; 1242 VkFormat vk_format; 1243 switch (item_size) { 1244 case 4: 1245 format = V3D_OUTPUT_IMAGE_FORMAT_RGBA8UI; 1246 vk_format = VK_FORMAT_R8G8B8A8_UINT; 1247 break; 1248 case 2: 1249 format = V3D_OUTPUT_IMAGE_FORMAT_RG8UI; 1250 vk_format = VK_FORMAT_R8G8_UINT; 1251 break; 1252 default: 1253 format = V3D_OUTPUT_IMAGE_FORMAT_R8UI; 1254 vk_format = VK_FORMAT_R8_UINT; 1255 break; 1256 } 1257 1258 struct v3dv_job *job = NULL; 1259 while (num_items > 0) { 1260 job = v3dv_cmd_buffer_start_job(cmd_buffer, -1, V3DV_JOB_TYPE_GPU_CL); 1261 if (!job) 1262 return NULL; 1263 1264 uint32_t width, height; 1265 framebuffer_size_for_pixel_count(num_items, &width, &height); 1266 1267 v3dv_job_start_frame(job, width, height, 1, true, 1, internal_bpp, false); 1268 1269 struct v3dv_meta_framebuffer framebuffer; 1270 v3dX(meta_framebuffer_init)(&framebuffer, vk_format, internal_type, 1271 &job->frame_tiling); 1272 1273 v3dX(job_emit_binning_flush)(job); 1274 1275 v3dX(meta_emit_copy_buffer_rcl)(job, dst, src, dst_offset, src_offset, 1276 &framebuffer, format, item_size); 1277 1278 v3dv_cmd_buffer_finish_job(cmd_buffer); 1279 1280 const uint32_t items_copied = width * height; 1281 const uint32_t bytes_copied = items_copied * item_size; 1282 num_items -= items_copied; 1283 src_offset += bytes_copied; 1284 dst_offset += bytes_copied; 1285 } 1286 1287 return job; 1288} 1289 1290void 1291v3dX(meta_fill_buffer)(struct v3dv_cmd_buffer *cmd_buffer, 1292 struct v3dv_bo *bo, 1293 uint32_t offset, 1294 uint32_t size, 1295 uint32_t data) 1296{ 1297 assert(size > 0 && size % 4 == 0); 1298 assert(offset + size <= bo->size); 1299 1300 const uint32_t internal_bpp = V3D_INTERNAL_BPP_32; 1301 const uint32_t internal_type = V3D_INTERNAL_TYPE_8UI; 1302 uint32_t num_items = size / 4; 1303 1304 while (num_items > 0) { 1305 struct v3dv_job *job = 1306 v3dv_cmd_buffer_start_job(cmd_buffer, -1, V3DV_JOB_TYPE_GPU_CL); 1307 if (!job) 1308 return; 1309 1310 uint32_t width, height; 1311 framebuffer_size_for_pixel_count(num_items, &width, &height); 1312 1313 v3dv_job_start_frame(job, width, height, 1, true, 1, internal_bpp, false); 1314 1315 struct v3dv_meta_framebuffer framebuffer; 1316 v3dX(meta_framebuffer_init)(&framebuffer, VK_FORMAT_R8G8B8A8_UINT, 1317 internal_type, &job->frame_tiling); 1318 1319 v3dX(job_emit_binning_flush)(job); 1320 1321 v3dX(meta_emit_fill_buffer_rcl)(job, bo, offset, &framebuffer, data); 1322 1323 v3dv_cmd_buffer_finish_job(cmd_buffer); 1324 1325 const uint32_t items_copied = width * height; 1326 const uint32_t bytes_copied = items_copied * 4; 1327 num_items -= items_copied; 1328 offset += bytes_copied; 1329 } 1330} 1331 1332void 1333v3dX(meta_framebuffer_init)(struct v3dv_meta_framebuffer *fb, 1334 VkFormat vk_format, 1335 uint32_t internal_type, 1336 const struct v3dv_frame_tiling *tiling) 1337{ 1338 fb->internal_type = internal_type; 1339 1340 /* Supertile coverage always starts at 0,0 */ 1341 uint32_t supertile_w_in_pixels = 1342 tiling->tile_width * tiling->supertile_width; 1343 uint32_t supertile_h_in_pixels = 1344 tiling->tile_height * tiling->supertile_height; 1345 1346 fb->min_x_supertile = 0; 1347 fb->min_y_supertile = 0; 1348 fb->max_x_supertile = (tiling->width - 1) / supertile_w_in_pixels; 1349 fb->max_y_supertile = (tiling->height - 1) / supertile_h_in_pixels; 1350 1351 fb->vk_format = vk_format; 1352 fb->format = v3dX(get_format)(vk_format); 1353 1354 fb->internal_depth_type = V3D_INTERNAL_TYPE_DEPTH_32F; 1355 if (vk_format_is_depth_or_stencil(vk_format)) 1356 fb->internal_depth_type = v3dX(get_internal_depth_type)(vk_format); 1357} 1358