17ec681f3Smrg/* 27ec681f3Smrg * Copyright © 2021 Raspberry Pi 37ec681f3Smrg * 47ec681f3Smrg * Permission is hereby granted, free of charge, to any person obtaining a 57ec681f3Smrg * copy of this software and associated documentation files (the "Software"), 67ec681f3Smrg * to deal in the Software without restriction, including without limitation 77ec681f3Smrg * the rights to use, copy, modify, merge, publish, distribute, sublicense, 87ec681f3Smrg * and/or sell copies of the Software, and to permit persons to whom the 97ec681f3Smrg * Software is furnished to do so, subject to the following conditions: 107ec681f3Smrg * 117ec681f3Smrg * The above copyright notice and this permission notice (including the next 127ec681f3Smrg * paragraph) shall be included in all copies or substantial portions of the 137ec681f3Smrg * Software. 147ec681f3Smrg * 157ec681f3Smrg * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR 167ec681f3Smrg * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, 177ec681f3Smrg * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL 187ec681f3Smrg * THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER 197ec681f3Smrg * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING 207ec681f3Smrg * FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS 217ec681f3Smrg * IN THE SOFTWARE. 227ec681f3Smrg */ 237ec681f3Smrg 247ec681f3Smrg#include "v3dv_private.h" 257ec681f3Smrg#include "v3dv_meta_common.h" 267ec681f3Smrg 277ec681f3Smrg#include "broadcom/common/v3d_macros.h" 287ec681f3Smrg#include "broadcom/cle/v3dx_pack.h" 297ec681f3Smrg#include "broadcom/compiler/v3d_compiler.h" 307ec681f3Smrg 317ec681f3Smrg#include "vk_format_info.h" 327ec681f3Smrg 337ec681f3Smrgstruct rcl_clear_info { 347ec681f3Smrg const union v3dv_clear_value *clear_value; 357ec681f3Smrg struct v3dv_image *image; 367ec681f3Smrg VkImageAspectFlags aspects; 377ec681f3Smrg uint32_t level; 387ec681f3Smrg}; 397ec681f3Smrg 407ec681f3Smrgstatic struct v3dv_cl * 417ec681f3Smrgemit_rcl_prologue(struct v3dv_job *job, 427ec681f3Smrg struct v3dv_meta_framebuffer *fb, 437ec681f3Smrg const struct rcl_clear_info *clear_info) 447ec681f3Smrg{ 457ec681f3Smrg const struct v3dv_frame_tiling *tiling = &job->frame_tiling; 467ec681f3Smrg 477ec681f3Smrg struct v3dv_cl *rcl = &job->rcl; 487ec681f3Smrg v3dv_cl_ensure_space_with_branch(rcl, 200 + 497ec681f3Smrg tiling->layers * 256 * 507ec681f3Smrg cl_packet_length(SUPERTILE_COORDINATES)); 517ec681f3Smrg if (job->cmd_buffer->state.oom) 527ec681f3Smrg return NULL; 537ec681f3Smrg 547ec681f3Smrg cl_emit(rcl, TILE_RENDERING_MODE_CFG_COMMON, config) { 557ec681f3Smrg config.early_z_disable = true; 567ec681f3Smrg config.image_width_pixels = tiling->width; 577ec681f3Smrg config.image_height_pixels = tiling->height; 587ec681f3Smrg config.number_of_render_targets = 1; 597ec681f3Smrg config.multisample_mode_4x = tiling->msaa; 607ec681f3Smrg config.maximum_bpp_of_all_render_targets = tiling->internal_bpp; 617ec681f3Smrg config.internal_depth_type = fb->internal_depth_type; 627ec681f3Smrg } 637ec681f3Smrg 647ec681f3Smrg if (clear_info && (clear_info->aspects & VK_IMAGE_ASPECT_COLOR_BIT)) { 657ec681f3Smrg uint32_t clear_pad = 0; 667ec681f3Smrg if (clear_info->image) { 677ec681f3Smrg const struct v3dv_image *image = clear_info->image; 687ec681f3Smrg const struct v3d_resource_slice *slice = 697ec681f3Smrg &image->slices[clear_info->level]; 707ec681f3Smrg if (slice->tiling == V3D_TILING_UIF_NO_XOR || 717ec681f3Smrg slice->tiling == V3D_TILING_UIF_XOR) { 727ec681f3Smrg int uif_block_height = v3d_utile_height(image->cpp) * 2; 737ec681f3Smrg 747ec681f3Smrg uint32_t implicit_padded_height = 757ec681f3Smrg align(tiling->height, uif_block_height) / uif_block_height; 767ec681f3Smrg 777ec681f3Smrg if (slice->padded_height_of_output_image_in_uif_blocks - 787ec681f3Smrg implicit_padded_height >= 15) { 797ec681f3Smrg clear_pad = slice->padded_height_of_output_image_in_uif_blocks; 807ec681f3Smrg } 817ec681f3Smrg } 827ec681f3Smrg } 837ec681f3Smrg 847ec681f3Smrg const uint32_t *color = &clear_info->clear_value->color[0]; 857ec681f3Smrg cl_emit(rcl, TILE_RENDERING_MODE_CFG_CLEAR_COLORS_PART1, clear) { 867ec681f3Smrg clear.clear_color_low_32_bits = color[0]; 877ec681f3Smrg clear.clear_color_next_24_bits = color[1] & 0x00ffffff; 887ec681f3Smrg clear.render_target_number = 0; 897ec681f3Smrg }; 907ec681f3Smrg 917ec681f3Smrg if (tiling->internal_bpp >= V3D_INTERNAL_BPP_64) { 927ec681f3Smrg cl_emit(rcl, TILE_RENDERING_MODE_CFG_CLEAR_COLORS_PART2, clear) { 937ec681f3Smrg clear.clear_color_mid_low_32_bits = 947ec681f3Smrg ((color[1] >> 24) | (color[2] << 8)); 957ec681f3Smrg clear.clear_color_mid_high_24_bits = 967ec681f3Smrg ((color[2] >> 24) | ((color[3] & 0xffff) << 8)); 977ec681f3Smrg clear.render_target_number = 0; 987ec681f3Smrg }; 997ec681f3Smrg } 1007ec681f3Smrg 1017ec681f3Smrg if (tiling->internal_bpp >= V3D_INTERNAL_BPP_128 || clear_pad) { 1027ec681f3Smrg cl_emit(rcl, TILE_RENDERING_MODE_CFG_CLEAR_COLORS_PART3, clear) { 1037ec681f3Smrg clear.uif_padded_height_in_uif_blocks = clear_pad; 1047ec681f3Smrg clear.clear_color_high_16_bits = color[3] >> 16; 1057ec681f3Smrg clear.render_target_number = 0; 1067ec681f3Smrg }; 1077ec681f3Smrg } 1087ec681f3Smrg } 1097ec681f3Smrg 1107ec681f3Smrg cl_emit(rcl, TILE_RENDERING_MODE_CFG_COLOR, rt) { 1117ec681f3Smrg rt.render_target_0_internal_bpp = tiling->internal_bpp; 1127ec681f3Smrg rt.render_target_0_internal_type = fb->internal_type; 1137ec681f3Smrg rt.render_target_0_clamp = V3D_RENDER_TARGET_CLAMP_NONE; 1147ec681f3Smrg } 1157ec681f3Smrg 1167ec681f3Smrg cl_emit(rcl, TILE_RENDERING_MODE_CFG_ZS_CLEAR_VALUES, clear) { 1177ec681f3Smrg clear.z_clear_value = clear_info ? clear_info->clear_value->z : 1.0f; 1187ec681f3Smrg clear.stencil_clear_value = clear_info ? clear_info->clear_value->s : 0; 1197ec681f3Smrg }; 1207ec681f3Smrg 1217ec681f3Smrg cl_emit(rcl, TILE_LIST_INITIAL_BLOCK_SIZE, init) { 1227ec681f3Smrg init.use_auto_chained_tile_lists = true; 1237ec681f3Smrg init.size_of_first_block_in_chained_tile_lists = 1247ec681f3Smrg TILE_ALLOCATION_BLOCK_SIZE_64B; 1257ec681f3Smrg } 1267ec681f3Smrg 1277ec681f3Smrg return rcl; 1287ec681f3Smrg} 1297ec681f3Smrg 1307ec681f3Smrgstatic void 1317ec681f3Smrgemit_frame_setup(struct v3dv_job *job, 1327ec681f3Smrg uint32_t min_layer, 1337ec681f3Smrg const union v3dv_clear_value *clear_value) 1347ec681f3Smrg{ 1357ec681f3Smrg v3dv_return_if_oom(NULL, job); 1367ec681f3Smrg 1377ec681f3Smrg const struct v3dv_frame_tiling *tiling = &job->frame_tiling; 1387ec681f3Smrg 1397ec681f3Smrg struct v3dv_cl *rcl = &job->rcl; 1407ec681f3Smrg 1417ec681f3Smrg const uint32_t tile_alloc_offset = 1427ec681f3Smrg 64 * min_layer * tiling->draw_tiles_x * tiling->draw_tiles_y; 1437ec681f3Smrg cl_emit(rcl, MULTICORE_RENDERING_TILE_LIST_SET_BASE, list) { 1447ec681f3Smrg list.address = v3dv_cl_address(job->tile_alloc, tile_alloc_offset); 1457ec681f3Smrg } 1467ec681f3Smrg 1477ec681f3Smrg cl_emit(rcl, MULTICORE_RENDERING_SUPERTILE_CFG, config) { 1487ec681f3Smrg config.number_of_bin_tile_lists = 1; 1497ec681f3Smrg config.total_frame_width_in_tiles = tiling->draw_tiles_x; 1507ec681f3Smrg config.total_frame_height_in_tiles = tiling->draw_tiles_y; 1517ec681f3Smrg 1527ec681f3Smrg config.supertile_width_in_tiles = tiling->supertile_width; 1537ec681f3Smrg config.supertile_height_in_tiles = tiling->supertile_height; 1547ec681f3Smrg 1557ec681f3Smrg config.total_frame_width_in_supertiles = 1567ec681f3Smrg tiling->frame_width_in_supertiles; 1577ec681f3Smrg config.total_frame_height_in_supertiles = 1587ec681f3Smrg tiling->frame_height_in_supertiles; 1597ec681f3Smrg } 1607ec681f3Smrg 1617ec681f3Smrg /* Implement GFXH-1742 workaround. Also, if we are clearing we have to do 1627ec681f3Smrg * it here. 1637ec681f3Smrg */ 1647ec681f3Smrg for (int i = 0; i < 2; i++) { 1657ec681f3Smrg cl_emit(rcl, TILE_COORDINATES, coords); 1667ec681f3Smrg cl_emit(rcl, END_OF_LOADS, end); 1677ec681f3Smrg cl_emit(rcl, STORE_TILE_BUFFER_GENERAL, store) { 1687ec681f3Smrg store.buffer_to_store = NONE; 1697ec681f3Smrg } 1707ec681f3Smrg if (clear_value && i == 0) { 1717ec681f3Smrg cl_emit(rcl, CLEAR_TILE_BUFFERS, clear) { 1727ec681f3Smrg clear.clear_z_stencil_buffer = true; 1737ec681f3Smrg clear.clear_all_render_targets = true; 1747ec681f3Smrg } 1757ec681f3Smrg } 1767ec681f3Smrg cl_emit(rcl, END_OF_TILE_MARKER, end); 1777ec681f3Smrg } 1787ec681f3Smrg 1797ec681f3Smrg cl_emit(rcl, FLUSH_VCD_CACHE, flush); 1807ec681f3Smrg} 1817ec681f3Smrg 1827ec681f3Smrgstatic void 1837ec681f3Smrgemit_supertile_coordinates(struct v3dv_job *job, 1847ec681f3Smrg struct v3dv_meta_framebuffer *framebuffer) 1857ec681f3Smrg{ 1867ec681f3Smrg v3dv_return_if_oom(NULL, job); 1877ec681f3Smrg 1887ec681f3Smrg struct v3dv_cl *rcl = &job->rcl; 1897ec681f3Smrg 1907ec681f3Smrg const uint32_t min_y = framebuffer->min_y_supertile; 1917ec681f3Smrg const uint32_t max_y = framebuffer->max_y_supertile; 1927ec681f3Smrg const uint32_t min_x = framebuffer->min_x_supertile; 1937ec681f3Smrg const uint32_t max_x = framebuffer->max_x_supertile; 1947ec681f3Smrg 1957ec681f3Smrg for (int y = min_y; y <= max_y; y++) { 1967ec681f3Smrg for (int x = min_x; x <= max_x; x++) { 1977ec681f3Smrg cl_emit(rcl, SUPERTILE_COORDINATES, coords) { 1987ec681f3Smrg coords.column_number_in_supertiles = x; 1997ec681f3Smrg coords.row_number_in_supertiles = y; 2007ec681f3Smrg } 2017ec681f3Smrg } 2027ec681f3Smrg } 2037ec681f3Smrg} 2047ec681f3Smrg 2057ec681f3Smrgstatic void 2067ec681f3Smrgemit_linear_load(struct v3dv_cl *cl, 2077ec681f3Smrg uint32_t buffer, 2087ec681f3Smrg struct v3dv_bo *bo, 2097ec681f3Smrg uint32_t offset, 2107ec681f3Smrg uint32_t stride, 2117ec681f3Smrg uint32_t format) 2127ec681f3Smrg{ 2137ec681f3Smrg cl_emit(cl, LOAD_TILE_BUFFER_GENERAL, load) { 2147ec681f3Smrg load.buffer_to_load = buffer; 2157ec681f3Smrg load.address = v3dv_cl_address(bo, offset); 2167ec681f3Smrg load.input_image_format = format; 2177ec681f3Smrg load.memory_format = V3D_TILING_RASTER; 2187ec681f3Smrg load.height_in_ub_or_stride = stride; 2197ec681f3Smrg load.decimate_mode = V3D_DECIMATE_MODE_SAMPLE_0; 2207ec681f3Smrg } 2217ec681f3Smrg} 2227ec681f3Smrg 2237ec681f3Smrgstatic void 2247ec681f3Smrgemit_linear_store(struct v3dv_cl *cl, 2257ec681f3Smrg uint32_t buffer, 2267ec681f3Smrg struct v3dv_bo *bo, 2277ec681f3Smrg uint32_t offset, 2287ec681f3Smrg uint32_t stride, 2297ec681f3Smrg bool msaa, 2307ec681f3Smrg uint32_t format) 2317ec681f3Smrg{ 2327ec681f3Smrg cl_emit(cl, STORE_TILE_BUFFER_GENERAL, store) { 2337ec681f3Smrg store.buffer_to_store = RENDER_TARGET_0; 2347ec681f3Smrg store.address = v3dv_cl_address(bo, offset); 2357ec681f3Smrg store.clear_buffer_being_stored = false; 2367ec681f3Smrg store.output_image_format = format; 2377ec681f3Smrg store.memory_format = V3D_TILING_RASTER; 2387ec681f3Smrg store.height_in_ub_or_stride = stride; 2397ec681f3Smrg store.decimate_mode = msaa ? V3D_DECIMATE_MODE_ALL_SAMPLES : 2407ec681f3Smrg V3D_DECIMATE_MODE_SAMPLE_0; 2417ec681f3Smrg } 2427ec681f3Smrg} 2437ec681f3Smrg 2447ec681f3Smrg/* This chooses a tile buffer format that is appropriate for the copy operation. 2457ec681f3Smrg * Typically, this is the image render target type, however, if we are copying 2467ec681f3Smrg * depth/stencil to/from a buffer the hardware can't do raster loads/stores, so 2477ec681f3Smrg * we need to load and store to/from a tile color buffer using a compatible 2487ec681f3Smrg * color format. 2497ec681f3Smrg */ 2507ec681f3Smrgstatic uint32_t 2517ec681f3Smrgchoose_tlb_format(struct v3dv_meta_framebuffer *framebuffer, 2527ec681f3Smrg VkImageAspectFlags aspect, 2537ec681f3Smrg bool for_store, 2547ec681f3Smrg bool is_copy_to_buffer, 2557ec681f3Smrg bool is_copy_from_buffer) 2567ec681f3Smrg{ 2577ec681f3Smrg if (is_copy_to_buffer || is_copy_from_buffer) { 2587ec681f3Smrg switch (framebuffer->vk_format) { 2597ec681f3Smrg case VK_FORMAT_D16_UNORM: 2607ec681f3Smrg return V3D_OUTPUT_IMAGE_FORMAT_R16UI; 2617ec681f3Smrg case VK_FORMAT_D32_SFLOAT: 2627ec681f3Smrg return V3D_OUTPUT_IMAGE_FORMAT_R32F; 2637ec681f3Smrg case VK_FORMAT_X8_D24_UNORM_PACK32: 2647ec681f3Smrg return V3D_OUTPUT_IMAGE_FORMAT_RGBA8UI; 2657ec681f3Smrg case VK_FORMAT_D24_UNORM_S8_UINT: 2667ec681f3Smrg /* When storing the stencil aspect of a combined depth/stencil image 2677ec681f3Smrg * to a buffer, the Vulkan spec states that the output buffer must 2687ec681f3Smrg * have packed stencil values, so we choose an R8UI format for our 2697ec681f3Smrg * store outputs. For the load input we still want RGBA8UI since the 2707ec681f3Smrg * source image contains 4 channels (including the 3 channels 2717ec681f3Smrg * containing the 24-bit depth value). 2727ec681f3Smrg * 2737ec681f3Smrg * When loading the stencil aspect of a combined depth/stencil image 2747ec681f3Smrg * from a buffer, we read packed 8-bit stencil values from the buffer 2757ec681f3Smrg * that we need to put into the LSB of the 32-bit format (the R 2767ec681f3Smrg * channel), so we use R8UI. For the store, if we used R8UI then we 2777ec681f3Smrg * would write 8-bit stencil values consecutively over depth channels, 2787ec681f3Smrg * so we need to use RGBA8UI. This will write each stencil value in 2797ec681f3Smrg * its correct position, but will overwrite depth values (channels G 2807ec681f3Smrg * B,A) with undefined values. To fix this, we will have to restore 2817ec681f3Smrg * the depth aspect from the Z tile buffer, which we should pre-load 2827ec681f3Smrg * from the image before the store). 2837ec681f3Smrg */ 2847ec681f3Smrg if (aspect & VK_IMAGE_ASPECT_DEPTH_BIT) { 2857ec681f3Smrg return V3D_OUTPUT_IMAGE_FORMAT_RGBA8UI; 2867ec681f3Smrg } else { 2877ec681f3Smrg assert(aspect & VK_IMAGE_ASPECT_STENCIL_BIT); 2887ec681f3Smrg if (is_copy_to_buffer) { 2897ec681f3Smrg return for_store ? V3D_OUTPUT_IMAGE_FORMAT_R8UI : 2907ec681f3Smrg V3D_OUTPUT_IMAGE_FORMAT_RGBA8UI; 2917ec681f3Smrg } else { 2927ec681f3Smrg assert(is_copy_from_buffer); 2937ec681f3Smrg return for_store ? V3D_OUTPUT_IMAGE_FORMAT_RGBA8UI : 2947ec681f3Smrg V3D_OUTPUT_IMAGE_FORMAT_R8UI; 2957ec681f3Smrg } 2967ec681f3Smrg } 2977ec681f3Smrg default: /* Color formats */ 2987ec681f3Smrg return framebuffer->format->rt_type; 2997ec681f3Smrg break; 3007ec681f3Smrg } 3017ec681f3Smrg } else { 3027ec681f3Smrg return framebuffer->format->rt_type; 3037ec681f3Smrg } 3047ec681f3Smrg} 3057ec681f3Smrg 3067ec681f3Smrgstatic inline bool 3077ec681f3Smrgformat_needs_rb_swap(struct v3dv_device *device, 3087ec681f3Smrg VkFormat format) 3097ec681f3Smrg{ 3107ec681f3Smrg const uint8_t *swizzle = v3dv_get_format_swizzle(device, format); 3117ec681f3Smrg return swizzle[0] == PIPE_SWIZZLE_Z; 3127ec681f3Smrg} 3137ec681f3Smrg 3147ec681f3Smrgstatic void 3157ec681f3Smrgemit_image_load(struct v3dv_device *device, 3167ec681f3Smrg struct v3dv_cl *cl, 3177ec681f3Smrg struct v3dv_meta_framebuffer *framebuffer, 3187ec681f3Smrg struct v3dv_image *image, 3197ec681f3Smrg VkImageAspectFlags aspect, 3207ec681f3Smrg uint32_t layer, 3217ec681f3Smrg uint32_t mip_level, 3227ec681f3Smrg bool is_copy_to_buffer, 3237ec681f3Smrg bool is_copy_from_buffer) 3247ec681f3Smrg{ 3257ec681f3Smrg uint32_t layer_offset = v3dv_layer_offset(image, mip_level, layer); 3267ec681f3Smrg 3277ec681f3Smrg /* For image to/from buffer copies we always load to and store from RT0, 3287ec681f3Smrg * even for depth/stencil aspects, because the hardware can't do raster 3297ec681f3Smrg * stores or loads from/to the depth/stencil tile buffers. 3307ec681f3Smrg */ 3317ec681f3Smrg bool load_to_color_tlb = is_copy_to_buffer || is_copy_from_buffer || 3327ec681f3Smrg aspect == VK_IMAGE_ASPECT_COLOR_BIT; 3337ec681f3Smrg 3347ec681f3Smrg const struct v3d_resource_slice *slice = &image->slices[mip_level]; 3357ec681f3Smrg cl_emit(cl, LOAD_TILE_BUFFER_GENERAL, load) { 3367ec681f3Smrg load.buffer_to_load = load_to_color_tlb ? 3377ec681f3Smrg RENDER_TARGET_0 : v3dX(zs_buffer_from_aspect_bits)(aspect); 3387ec681f3Smrg 3397ec681f3Smrg load.address = v3dv_cl_address(image->mem->bo, layer_offset); 3407ec681f3Smrg 3417ec681f3Smrg load.input_image_format = choose_tlb_format(framebuffer, aspect, false, 3427ec681f3Smrg is_copy_to_buffer, 3437ec681f3Smrg is_copy_from_buffer); 3447ec681f3Smrg load.memory_format = slice->tiling; 3457ec681f3Smrg 3467ec681f3Smrg /* When copying depth/stencil images to a buffer, for D24 formats Vulkan 3477ec681f3Smrg * expects the depth value in the LSB bits of each 32-bit pixel. 3487ec681f3Smrg * Unfortunately, the hardware seems to put the S8/X8 bits there and the 3497ec681f3Smrg * depth bits on the MSB. To work around that we can reverse the channel 3507ec681f3Smrg * order and then swap the R/B channels to get what we want. 3517ec681f3Smrg * 3527ec681f3Smrg * NOTE: reversing and swapping only gets us the behavior we want if the 3537ec681f3Smrg * operations happen in that exact order, which seems to be the case when 3547ec681f3Smrg * done on the tile buffer load operations. On the store, it seems the 3557ec681f3Smrg * order is not the same. The order on the store is probably reversed so 3567ec681f3Smrg * that reversing and swapping on both the load and the store preserves 3577ec681f3Smrg * the original order of the channels in memory. 3587ec681f3Smrg * 3597ec681f3Smrg * Notice that we only need to do this when copying to a buffer, where 3607ec681f3Smrg * depth and stencil aspects are copied as separate regions and 3617ec681f3Smrg * the spec expects them to be tightly packed. 3627ec681f3Smrg */ 3637ec681f3Smrg bool needs_rb_swap = false; 3647ec681f3Smrg bool needs_chan_reverse = false; 3657ec681f3Smrg if (is_copy_to_buffer && 3667ec681f3Smrg (framebuffer->vk_format == VK_FORMAT_X8_D24_UNORM_PACK32 || 3677ec681f3Smrg (framebuffer->vk_format == VK_FORMAT_D24_UNORM_S8_UINT && 3687ec681f3Smrg (aspect & VK_IMAGE_ASPECT_DEPTH_BIT)))) { 3697ec681f3Smrg needs_rb_swap = true; 3707ec681f3Smrg needs_chan_reverse = true; 3717ec681f3Smrg } else if (!is_copy_from_buffer && !is_copy_to_buffer && 3727ec681f3Smrg (aspect & VK_IMAGE_ASPECT_COLOR_BIT)) { 3737ec681f3Smrg /* This is not a raw data copy (i.e. we are clearing the image), 3747ec681f3Smrg * so we need to make sure we respect the format swizzle. 3757ec681f3Smrg */ 3767ec681f3Smrg needs_rb_swap = format_needs_rb_swap(device, framebuffer->vk_format); 3777ec681f3Smrg } 3787ec681f3Smrg 3797ec681f3Smrg load.r_b_swap = needs_rb_swap; 3807ec681f3Smrg load.channel_reverse = needs_chan_reverse; 3817ec681f3Smrg 3827ec681f3Smrg if (slice->tiling == V3D_TILING_UIF_NO_XOR || 3837ec681f3Smrg slice->tiling == V3D_TILING_UIF_XOR) { 3847ec681f3Smrg load.height_in_ub_or_stride = 3857ec681f3Smrg slice->padded_height_of_output_image_in_uif_blocks; 3867ec681f3Smrg } else if (slice->tiling == V3D_TILING_RASTER) { 3877ec681f3Smrg load.height_in_ub_or_stride = slice->stride; 3887ec681f3Smrg } 3897ec681f3Smrg 3907ec681f3Smrg if (image->vk.samples > VK_SAMPLE_COUNT_1_BIT) 3917ec681f3Smrg load.decimate_mode = V3D_DECIMATE_MODE_ALL_SAMPLES; 3927ec681f3Smrg else 3937ec681f3Smrg load.decimate_mode = V3D_DECIMATE_MODE_SAMPLE_0; 3947ec681f3Smrg } 3957ec681f3Smrg} 3967ec681f3Smrg 3977ec681f3Smrgstatic void 3987ec681f3Smrgemit_image_store(struct v3dv_device *device, 3997ec681f3Smrg struct v3dv_cl *cl, 4007ec681f3Smrg struct v3dv_meta_framebuffer *framebuffer, 4017ec681f3Smrg struct v3dv_image *image, 4027ec681f3Smrg VkImageAspectFlags aspect, 4037ec681f3Smrg uint32_t layer, 4047ec681f3Smrg uint32_t mip_level, 4057ec681f3Smrg bool is_copy_to_buffer, 4067ec681f3Smrg bool is_copy_from_buffer, 4077ec681f3Smrg bool is_multisample_resolve) 4087ec681f3Smrg{ 4097ec681f3Smrg uint32_t layer_offset = v3dv_layer_offset(image, mip_level, layer); 4107ec681f3Smrg 4117ec681f3Smrg bool store_from_color_tlb = is_copy_to_buffer || is_copy_from_buffer || 4127ec681f3Smrg aspect == VK_IMAGE_ASPECT_COLOR_BIT; 4137ec681f3Smrg 4147ec681f3Smrg const struct v3d_resource_slice *slice = &image->slices[mip_level]; 4157ec681f3Smrg cl_emit(cl, STORE_TILE_BUFFER_GENERAL, store) { 4167ec681f3Smrg store.buffer_to_store = store_from_color_tlb ? 4177ec681f3Smrg RENDER_TARGET_0 : v3dX(zs_buffer_from_aspect_bits)(aspect); 4187ec681f3Smrg 4197ec681f3Smrg store.address = v3dv_cl_address(image->mem->bo, layer_offset); 4207ec681f3Smrg store.clear_buffer_being_stored = false; 4217ec681f3Smrg 4227ec681f3Smrg /* See rationale in emit_image_load() */ 4237ec681f3Smrg bool needs_rb_swap = false; 4247ec681f3Smrg bool needs_chan_reverse = false; 4257ec681f3Smrg if (is_copy_from_buffer && 4267ec681f3Smrg (framebuffer->vk_format == VK_FORMAT_X8_D24_UNORM_PACK32 || 4277ec681f3Smrg (framebuffer->vk_format == VK_FORMAT_D24_UNORM_S8_UINT && 4287ec681f3Smrg (aspect & VK_IMAGE_ASPECT_DEPTH_BIT)))) { 4297ec681f3Smrg needs_rb_swap = true; 4307ec681f3Smrg needs_chan_reverse = true; 4317ec681f3Smrg } else if (!is_copy_from_buffer && !is_copy_to_buffer && 4327ec681f3Smrg (aspect & VK_IMAGE_ASPECT_COLOR_BIT)) { 4337ec681f3Smrg needs_rb_swap = format_needs_rb_swap(device, framebuffer->vk_format); 4347ec681f3Smrg } 4357ec681f3Smrg 4367ec681f3Smrg store.r_b_swap = needs_rb_swap; 4377ec681f3Smrg store.channel_reverse = needs_chan_reverse; 4387ec681f3Smrg 4397ec681f3Smrg store.output_image_format = choose_tlb_format(framebuffer, aspect, true, 4407ec681f3Smrg is_copy_to_buffer, 4417ec681f3Smrg is_copy_from_buffer); 4427ec681f3Smrg store.memory_format = slice->tiling; 4437ec681f3Smrg if (slice->tiling == V3D_TILING_UIF_NO_XOR || 4447ec681f3Smrg slice->tiling == V3D_TILING_UIF_XOR) { 4457ec681f3Smrg store.height_in_ub_or_stride = 4467ec681f3Smrg slice->padded_height_of_output_image_in_uif_blocks; 4477ec681f3Smrg } else if (slice->tiling == V3D_TILING_RASTER) { 4487ec681f3Smrg store.height_in_ub_or_stride = slice->stride; 4497ec681f3Smrg } 4507ec681f3Smrg 4517ec681f3Smrg if (image->vk.samples > VK_SAMPLE_COUNT_1_BIT) 4527ec681f3Smrg store.decimate_mode = V3D_DECIMATE_MODE_ALL_SAMPLES; 4537ec681f3Smrg else if (is_multisample_resolve) 4547ec681f3Smrg store.decimate_mode = V3D_DECIMATE_MODE_4X; 4557ec681f3Smrg else 4567ec681f3Smrg store.decimate_mode = V3D_DECIMATE_MODE_SAMPLE_0; 4577ec681f3Smrg } 4587ec681f3Smrg} 4597ec681f3Smrg 4607ec681f3Smrgstatic void 4617ec681f3Smrgemit_copy_layer_to_buffer_per_tile_list(struct v3dv_job *job, 4627ec681f3Smrg struct v3dv_meta_framebuffer *framebuffer, 4637ec681f3Smrg struct v3dv_buffer *buffer, 4647ec681f3Smrg struct v3dv_image *image, 4657ec681f3Smrg uint32_t layer_offset, 4667ec681f3Smrg const VkBufferImageCopy2KHR *region) 4677ec681f3Smrg{ 4687ec681f3Smrg struct v3dv_cl *cl = &job->indirect; 4697ec681f3Smrg v3dv_cl_ensure_space(cl, 200, 1); 4707ec681f3Smrg v3dv_return_if_oom(NULL, job); 4717ec681f3Smrg 4727ec681f3Smrg struct v3dv_cl_reloc tile_list_start = v3dv_cl_get_address(cl); 4737ec681f3Smrg 4747ec681f3Smrg cl_emit(cl, TILE_COORDINATES_IMPLICIT, coords); 4757ec681f3Smrg 4767ec681f3Smrg /* Load image to TLB */ 4777ec681f3Smrg assert((image->vk.image_type != VK_IMAGE_TYPE_3D && 4787ec681f3Smrg layer_offset < region->imageSubresource.layerCount) || 4797ec681f3Smrg layer_offset < image->vk.extent.depth); 4807ec681f3Smrg 4817ec681f3Smrg const uint32_t image_layer = image->vk.image_type != VK_IMAGE_TYPE_3D ? 4827ec681f3Smrg region->imageSubresource.baseArrayLayer + layer_offset : 4837ec681f3Smrg region->imageOffset.z + layer_offset; 4847ec681f3Smrg 4857ec681f3Smrg emit_image_load(job->device, cl, framebuffer, image, 4867ec681f3Smrg region->imageSubresource.aspectMask, 4877ec681f3Smrg image_layer, 4887ec681f3Smrg region->imageSubresource.mipLevel, 4897ec681f3Smrg true, false); 4907ec681f3Smrg 4917ec681f3Smrg cl_emit(cl, END_OF_LOADS, end); 4927ec681f3Smrg 4937ec681f3Smrg cl_emit(cl, BRANCH_TO_IMPLICIT_TILE_LIST, branch); 4947ec681f3Smrg 4957ec681f3Smrg /* Store TLB to buffer */ 4967ec681f3Smrg uint32_t width, height; 4977ec681f3Smrg if (region->bufferRowLength == 0) 4987ec681f3Smrg width = region->imageExtent.width; 4997ec681f3Smrg else 5007ec681f3Smrg width = region->bufferRowLength; 5017ec681f3Smrg 5027ec681f3Smrg if (region->bufferImageHeight == 0) 5037ec681f3Smrg height = region->imageExtent.height; 5047ec681f3Smrg else 5057ec681f3Smrg height = region->bufferImageHeight; 5067ec681f3Smrg 5077ec681f3Smrg /* Handle copy from compressed format */ 5087ec681f3Smrg width = DIV_ROUND_UP(width, vk_format_get_blockwidth(image->vk.format)); 5097ec681f3Smrg height = DIV_ROUND_UP(height, vk_format_get_blockheight(image->vk.format)); 5107ec681f3Smrg 5117ec681f3Smrg /* If we are storing stencil from a combined depth/stencil format the 5127ec681f3Smrg * Vulkan spec states that the output buffer must have packed stencil 5137ec681f3Smrg * values, where each stencil value is 1 byte. 5147ec681f3Smrg */ 5157ec681f3Smrg uint32_t cpp = 5167ec681f3Smrg region->imageSubresource.aspectMask & VK_IMAGE_ASPECT_STENCIL_BIT ? 5177ec681f3Smrg 1 : image->cpp; 5187ec681f3Smrg uint32_t buffer_stride = width * cpp; 5197ec681f3Smrg uint32_t buffer_offset = buffer->mem_offset + region->bufferOffset + 5207ec681f3Smrg height * buffer_stride * layer_offset; 5217ec681f3Smrg 5227ec681f3Smrg uint32_t format = choose_tlb_format(framebuffer, 5237ec681f3Smrg region->imageSubresource.aspectMask, 5247ec681f3Smrg true, true, false); 5257ec681f3Smrg bool msaa = image->vk.samples > VK_SAMPLE_COUNT_1_BIT; 5267ec681f3Smrg 5277ec681f3Smrg emit_linear_store(cl, RENDER_TARGET_0, buffer->mem->bo, 5287ec681f3Smrg buffer_offset, buffer_stride, msaa, format); 5297ec681f3Smrg 5307ec681f3Smrg cl_emit(cl, END_OF_TILE_MARKER, end); 5317ec681f3Smrg 5327ec681f3Smrg cl_emit(cl, RETURN_FROM_SUB_LIST, ret); 5337ec681f3Smrg 5347ec681f3Smrg cl_emit(&job->rcl, START_ADDRESS_OF_GENERIC_TILE_LIST, branch) { 5357ec681f3Smrg branch.start = tile_list_start; 5367ec681f3Smrg branch.end = v3dv_cl_get_address(cl); 5377ec681f3Smrg } 5387ec681f3Smrg} 5397ec681f3Smrg 5407ec681f3Smrgstatic void 5417ec681f3Smrgemit_copy_layer_to_buffer(struct v3dv_job *job, 5427ec681f3Smrg struct v3dv_buffer *buffer, 5437ec681f3Smrg struct v3dv_image *image, 5447ec681f3Smrg struct v3dv_meta_framebuffer *framebuffer, 5457ec681f3Smrg uint32_t layer, 5467ec681f3Smrg const VkBufferImageCopy2KHR *region) 5477ec681f3Smrg{ 5487ec681f3Smrg emit_copy_layer_to_buffer_per_tile_list(job, framebuffer, buffer, 5497ec681f3Smrg image, layer, region); 5507ec681f3Smrg emit_supertile_coordinates(job, framebuffer); 5517ec681f3Smrg} 5527ec681f3Smrg 5537ec681f3Smrgvoid 5547ec681f3Smrgv3dX(meta_emit_copy_image_to_buffer_rcl)(struct v3dv_job *job, 5557ec681f3Smrg struct v3dv_buffer *buffer, 5567ec681f3Smrg struct v3dv_image *image, 5577ec681f3Smrg struct v3dv_meta_framebuffer *framebuffer, 5587ec681f3Smrg const VkBufferImageCopy2KHR *region) 5597ec681f3Smrg{ 5607ec681f3Smrg struct v3dv_cl *rcl = emit_rcl_prologue(job, framebuffer, NULL); 5617ec681f3Smrg v3dv_return_if_oom(NULL, job); 5627ec681f3Smrg 5637ec681f3Smrg emit_frame_setup(job, 0, NULL); 5647ec681f3Smrg for (int layer = 0; layer < job->frame_tiling.layers; layer++) 5657ec681f3Smrg emit_copy_layer_to_buffer(job, buffer, image, framebuffer, layer, region); 5667ec681f3Smrg cl_emit(rcl, END_OF_RENDERING, end); 5677ec681f3Smrg} 5687ec681f3Smrg 5697ec681f3Smrgstatic void 5707ec681f3Smrgemit_resolve_image_layer_per_tile_list(struct v3dv_job *job, 5717ec681f3Smrg struct v3dv_meta_framebuffer *framebuffer, 5727ec681f3Smrg struct v3dv_image *dst, 5737ec681f3Smrg struct v3dv_image *src, 5747ec681f3Smrg uint32_t layer_offset, 5757ec681f3Smrg const VkImageResolve2KHR *region) 5767ec681f3Smrg{ 5777ec681f3Smrg struct v3dv_cl *cl = &job->indirect; 5787ec681f3Smrg v3dv_cl_ensure_space(cl, 200, 1); 5797ec681f3Smrg v3dv_return_if_oom(NULL, job); 5807ec681f3Smrg 5817ec681f3Smrg struct v3dv_cl_reloc tile_list_start = v3dv_cl_get_address(cl); 5827ec681f3Smrg 5837ec681f3Smrg cl_emit(cl, TILE_COORDINATES_IMPLICIT, coords); 5847ec681f3Smrg 5857ec681f3Smrg assert((src->vk.image_type != VK_IMAGE_TYPE_3D && 5867ec681f3Smrg layer_offset < region->srcSubresource.layerCount) || 5877ec681f3Smrg layer_offset < src->vk.extent.depth); 5887ec681f3Smrg 5897ec681f3Smrg const uint32_t src_layer = src->vk.image_type != VK_IMAGE_TYPE_3D ? 5907ec681f3Smrg region->srcSubresource.baseArrayLayer + layer_offset : 5917ec681f3Smrg region->srcOffset.z + layer_offset; 5927ec681f3Smrg 5937ec681f3Smrg emit_image_load(job->device, cl, framebuffer, src, 5947ec681f3Smrg region->srcSubresource.aspectMask, 5957ec681f3Smrg src_layer, 5967ec681f3Smrg region->srcSubresource.mipLevel, 5977ec681f3Smrg false, false); 5987ec681f3Smrg 5997ec681f3Smrg cl_emit(cl, END_OF_LOADS, end); 6007ec681f3Smrg 6017ec681f3Smrg cl_emit(cl, BRANCH_TO_IMPLICIT_TILE_LIST, branch); 6027ec681f3Smrg 6037ec681f3Smrg assert((dst->vk.image_type != VK_IMAGE_TYPE_3D && 6047ec681f3Smrg layer_offset < region->dstSubresource.layerCount) || 6057ec681f3Smrg layer_offset < dst->vk.extent.depth); 6067ec681f3Smrg 6077ec681f3Smrg const uint32_t dst_layer = dst->vk.image_type != VK_IMAGE_TYPE_3D ? 6087ec681f3Smrg region->dstSubresource.baseArrayLayer + layer_offset : 6097ec681f3Smrg region->dstOffset.z + layer_offset; 6107ec681f3Smrg 6117ec681f3Smrg emit_image_store(job->device, cl, framebuffer, dst, 6127ec681f3Smrg region->dstSubresource.aspectMask, 6137ec681f3Smrg dst_layer, 6147ec681f3Smrg region->dstSubresource.mipLevel, 6157ec681f3Smrg false, false, true); 6167ec681f3Smrg 6177ec681f3Smrg cl_emit(cl, END_OF_TILE_MARKER, end); 6187ec681f3Smrg 6197ec681f3Smrg cl_emit(cl, RETURN_FROM_SUB_LIST, ret); 6207ec681f3Smrg 6217ec681f3Smrg cl_emit(&job->rcl, START_ADDRESS_OF_GENERIC_TILE_LIST, branch) { 6227ec681f3Smrg branch.start = tile_list_start; 6237ec681f3Smrg branch.end = v3dv_cl_get_address(cl); 6247ec681f3Smrg } 6257ec681f3Smrg} 6267ec681f3Smrg 6277ec681f3Smrgstatic void 6287ec681f3Smrgemit_resolve_image_layer(struct v3dv_job *job, 6297ec681f3Smrg struct v3dv_image *dst, 6307ec681f3Smrg struct v3dv_image *src, 6317ec681f3Smrg struct v3dv_meta_framebuffer *framebuffer, 6327ec681f3Smrg uint32_t layer, 6337ec681f3Smrg const VkImageResolve2KHR *region) 6347ec681f3Smrg{ 6357ec681f3Smrg emit_resolve_image_layer_per_tile_list(job, framebuffer, 6367ec681f3Smrg dst, src, layer, region); 6377ec681f3Smrg emit_supertile_coordinates(job, framebuffer); 6387ec681f3Smrg} 6397ec681f3Smrg 6407ec681f3Smrgvoid 6417ec681f3Smrgv3dX(meta_emit_resolve_image_rcl)(struct v3dv_job *job, 6427ec681f3Smrg struct v3dv_image *dst, 6437ec681f3Smrg struct v3dv_image *src, 6447ec681f3Smrg struct v3dv_meta_framebuffer *framebuffer, 6457ec681f3Smrg const VkImageResolve2KHR *region) 6467ec681f3Smrg{ 6477ec681f3Smrg struct v3dv_cl *rcl = emit_rcl_prologue(job, framebuffer, NULL); 6487ec681f3Smrg v3dv_return_if_oom(NULL, job); 6497ec681f3Smrg 6507ec681f3Smrg emit_frame_setup(job, 0, NULL); 6517ec681f3Smrg for (int layer = 0; layer < job->frame_tiling.layers; layer++) 6527ec681f3Smrg emit_resolve_image_layer(job, dst, src, framebuffer, layer, region); 6537ec681f3Smrg cl_emit(rcl, END_OF_RENDERING, end); 6547ec681f3Smrg} 6557ec681f3Smrg 6567ec681f3Smrgstatic void 6577ec681f3Smrgemit_copy_buffer_per_tile_list(struct v3dv_job *job, 6587ec681f3Smrg struct v3dv_bo *dst, 6597ec681f3Smrg struct v3dv_bo *src, 6607ec681f3Smrg uint32_t dst_offset, 6617ec681f3Smrg uint32_t src_offset, 6627ec681f3Smrg uint32_t stride, 6637ec681f3Smrg uint32_t format) 6647ec681f3Smrg{ 6657ec681f3Smrg struct v3dv_cl *cl = &job->indirect; 6667ec681f3Smrg v3dv_cl_ensure_space(cl, 200, 1); 6677ec681f3Smrg v3dv_return_if_oom(NULL, job); 6687ec681f3Smrg 6697ec681f3Smrg struct v3dv_cl_reloc tile_list_start = v3dv_cl_get_address(cl); 6707ec681f3Smrg 6717ec681f3Smrg cl_emit(cl, TILE_COORDINATES_IMPLICIT, coords); 6727ec681f3Smrg 6737ec681f3Smrg emit_linear_load(cl, RENDER_TARGET_0, src, src_offset, stride, format); 6747ec681f3Smrg 6757ec681f3Smrg cl_emit(cl, END_OF_LOADS, end); 6767ec681f3Smrg 6777ec681f3Smrg cl_emit(cl, BRANCH_TO_IMPLICIT_TILE_LIST, branch); 6787ec681f3Smrg 6797ec681f3Smrg emit_linear_store(cl, RENDER_TARGET_0, 6807ec681f3Smrg dst, dst_offset, stride, false, format); 6817ec681f3Smrg 6827ec681f3Smrg cl_emit(cl, END_OF_TILE_MARKER, end); 6837ec681f3Smrg 6847ec681f3Smrg cl_emit(cl, RETURN_FROM_SUB_LIST, ret); 6857ec681f3Smrg 6867ec681f3Smrg cl_emit(&job->rcl, START_ADDRESS_OF_GENERIC_TILE_LIST, branch) { 6877ec681f3Smrg branch.start = tile_list_start; 6887ec681f3Smrg branch.end = v3dv_cl_get_address(cl); 6897ec681f3Smrg } 6907ec681f3Smrg} 6917ec681f3Smrg 6927ec681f3Smrgvoid 6937ec681f3Smrgv3dX(meta_emit_copy_buffer)(struct v3dv_job *job, 6947ec681f3Smrg struct v3dv_bo *dst, 6957ec681f3Smrg struct v3dv_bo *src, 6967ec681f3Smrg uint32_t dst_offset, 6977ec681f3Smrg uint32_t src_offset, 6987ec681f3Smrg struct v3dv_meta_framebuffer *framebuffer, 6997ec681f3Smrg uint32_t format, 7007ec681f3Smrg uint32_t item_size) 7017ec681f3Smrg{ 7027ec681f3Smrg const uint32_t stride = job->frame_tiling.width * item_size; 7037ec681f3Smrg emit_copy_buffer_per_tile_list(job, dst, src, 7047ec681f3Smrg dst_offset, src_offset, 7057ec681f3Smrg stride, format); 7067ec681f3Smrg emit_supertile_coordinates(job, framebuffer); 7077ec681f3Smrg} 7087ec681f3Smrg 7097ec681f3Smrgvoid 7107ec681f3Smrgv3dX(meta_emit_copy_buffer_rcl)(struct v3dv_job *job, 7117ec681f3Smrg struct v3dv_bo *dst, 7127ec681f3Smrg struct v3dv_bo *src, 7137ec681f3Smrg uint32_t dst_offset, 7147ec681f3Smrg uint32_t src_offset, 7157ec681f3Smrg struct v3dv_meta_framebuffer *framebuffer, 7167ec681f3Smrg uint32_t format, 7177ec681f3Smrg uint32_t item_size) 7187ec681f3Smrg{ 7197ec681f3Smrg struct v3dv_cl *rcl = emit_rcl_prologue(job, framebuffer, NULL); 7207ec681f3Smrg v3dv_return_if_oom(NULL, job); 7217ec681f3Smrg 7227ec681f3Smrg emit_frame_setup(job, 0, NULL); 7237ec681f3Smrg 7247ec681f3Smrg v3dX(meta_emit_copy_buffer)(job, dst, src, dst_offset, src_offset, 7257ec681f3Smrg framebuffer, format, item_size); 7267ec681f3Smrg 7277ec681f3Smrg cl_emit(rcl, END_OF_RENDERING, end); 7287ec681f3Smrg} 7297ec681f3Smrg 7307ec681f3Smrgstatic void 7317ec681f3Smrgemit_copy_image_layer_per_tile_list(struct v3dv_job *job, 7327ec681f3Smrg struct v3dv_meta_framebuffer *framebuffer, 7337ec681f3Smrg struct v3dv_image *dst, 7347ec681f3Smrg struct v3dv_image *src, 7357ec681f3Smrg uint32_t layer_offset, 7367ec681f3Smrg const VkImageCopy2KHR *region) 7377ec681f3Smrg{ 7387ec681f3Smrg struct v3dv_cl *cl = &job->indirect; 7397ec681f3Smrg v3dv_cl_ensure_space(cl, 200, 1); 7407ec681f3Smrg v3dv_return_if_oom(NULL, job); 7417ec681f3Smrg 7427ec681f3Smrg struct v3dv_cl_reloc tile_list_start = v3dv_cl_get_address(cl); 7437ec681f3Smrg 7447ec681f3Smrg cl_emit(cl, TILE_COORDINATES_IMPLICIT, coords); 7457ec681f3Smrg 7467ec681f3Smrg assert((src->vk.image_type != VK_IMAGE_TYPE_3D && 7477ec681f3Smrg layer_offset < region->srcSubresource.layerCount) || 7487ec681f3Smrg layer_offset < src->vk.extent.depth); 7497ec681f3Smrg 7507ec681f3Smrg const uint32_t src_layer = src->vk.image_type != VK_IMAGE_TYPE_3D ? 7517ec681f3Smrg region->srcSubresource.baseArrayLayer + layer_offset : 7527ec681f3Smrg region->srcOffset.z + layer_offset; 7537ec681f3Smrg 7547ec681f3Smrg emit_image_load(job->device, cl, framebuffer, src, 7557ec681f3Smrg region->srcSubresource.aspectMask, 7567ec681f3Smrg src_layer, 7577ec681f3Smrg region->srcSubresource.mipLevel, 7587ec681f3Smrg false, false); 7597ec681f3Smrg 7607ec681f3Smrg cl_emit(cl, END_OF_LOADS, end); 7617ec681f3Smrg 7627ec681f3Smrg cl_emit(cl, BRANCH_TO_IMPLICIT_TILE_LIST, branch); 7637ec681f3Smrg 7647ec681f3Smrg assert((dst->vk.image_type != VK_IMAGE_TYPE_3D && 7657ec681f3Smrg layer_offset < region->dstSubresource.layerCount) || 7667ec681f3Smrg layer_offset < dst->vk.extent.depth); 7677ec681f3Smrg 7687ec681f3Smrg const uint32_t dst_layer = dst->vk.image_type != VK_IMAGE_TYPE_3D ? 7697ec681f3Smrg region->dstSubresource.baseArrayLayer + layer_offset : 7707ec681f3Smrg region->dstOffset.z + layer_offset; 7717ec681f3Smrg 7727ec681f3Smrg emit_image_store(job->device, cl, framebuffer, dst, 7737ec681f3Smrg region->dstSubresource.aspectMask, 7747ec681f3Smrg dst_layer, 7757ec681f3Smrg region->dstSubresource.mipLevel, 7767ec681f3Smrg false, false, false); 7777ec681f3Smrg 7787ec681f3Smrg cl_emit(cl, END_OF_TILE_MARKER, end); 7797ec681f3Smrg 7807ec681f3Smrg cl_emit(cl, RETURN_FROM_SUB_LIST, ret); 7817ec681f3Smrg 7827ec681f3Smrg cl_emit(&job->rcl, START_ADDRESS_OF_GENERIC_TILE_LIST, branch) { 7837ec681f3Smrg branch.start = tile_list_start; 7847ec681f3Smrg branch.end = v3dv_cl_get_address(cl); 7857ec681f3Smrg } 7867ec681f3Smrg} 7877ec681f3Smrg 7887ec681f3Smrgstatic void 7897ec681f3Smrgemit_copy_image_layer(struct v3dv_job *job, 7907ec681f3Smrg struct v3dv_image *dst, 7917ec681f3Smrg struct v3dv_image *src, 7927ec681f3Smrg struct v3dv_meta_framebuffer *framebuffer, 7937ec681f3Smrg uint32_t layer, 7947ec681f3Smrg const VkImageCopy2KHR *region) 7957ec681f3Smrg{ 7967ec681f3Smrg emit_copy_image_layer_per_tile_list(job, framebuffer, dst, src, layer, region); 7977ec681f3Smrg emit_supertile_coordinates(job, framebuffer); 7987ec681f3Smrg} 7997ec681f3Smrg 8007ec681f3Smrgvoid 8017ec681f3Smrgv3dX(meta_emit_copy_image_rcl)(struct v3dv_job *job, 8027ec681f3Smrg struct v3dv_image *dst, 8037ec681f3Smrg struct v3dv_image *src, 8047ec681f3Smrg struct v3dv_meta_framebuffer *framebuffer, 8057ec681f3Smrg const VkImageCopy2KHR *region) 8067ec681f3Smrg{ 8077ec681f3Smrg struct v3dv_cl *rcl = emit_rcl_prologue(job, framebuffer, NULL); 8087ec681f3Smrg v3dv_return_if_oom(NULL, job); 8097ec681f3Smrg 8107ec681f3Smrg emit_frame_setup(job, 0, NULL); 8117ec681f3Smrg for (int layer = 0; layer < job->frame_tiling.layers; layer++) 8127ec681f3Smrg emit_copy_image_layer(job, dst, src, framebuffer, layer, region); 8137ec681f3Smrg cl_emit(rcl, END_OF_RENDERING, end); 8147ec681f3Smrg} 8157ec681f3Smrg 8167ec681f3Smrgvoid 8177ec681f3Smrgv3dX(meta_emit_tfu_job)(struct v3dv_cmd_buffer *cmd_buffer, 8187ec681f3Smrg struct v3dv_image *dst, 8197ec681f3Smrg uint32_t dst_mip_level, 8207ec681f3Smrg uint32_t dst_layer, 8217ec681f3Smrg struct v3dv_image *src, 8227ec681f3Smrg uint32_t src_mip_level, 8237ec681f3Smrg uint32_t src_layer, 8247ec681f3Smrg uint32_t width, 8257ec681f3Smrg uint32_t height, 8267ec681f3Smrg const struct v3dv_format *format) 8277ec681f3Smrg{ 8287ec681f3Smrg const struct v3d_resource_slice *src_slice = &src->slices[src_mip_level]; 8297ec681f3Smrg const struct v3d_resource_slice *dst_slice = &dst->slices[dst_mip_level]; 8307ec681f3Smrg 8317ec681f3Smrg assert(dst->mem && dst->mem->bo); 8327ec681f3Smrg const struct v3dv_bo *dst_bo = dst->mem->bo; 8337ec681f3Smrg 8347ec681f3Smrg assert(src->mem && src->mem->bo); 8357ec681f3Smrg const struct v3dv_bo *src_bo = src->mem->bo; 8367ec681f3Smrg 8377ec681f3Smrg struct drm_v3d_submit_tfu tfu = { 8387ec681f3Smrg .ios = (height << 16) | width, 8397ec681f3Smrg .bo_handles = { 8407ec681f3Smrg dst_bo->handle, 8417ec681f3Smrg src_bo->handle != dst_bo->handle ? src_bo->handle : 0 8427ec681f3Smrg }, 8437ec681f3Smrg }; 8447ec681f3Smrg 8457ec681f3Smrg const uint32_t src_offset = 8467ec681f3Smrg src_bo->offset + v3dv_layer_offset(src, src_mip_level, src_layer); 8477ec681f3Smrg tfu.iia |= src_offset; 8487ec681f3Smrg 8497ec681f3Smrg uint32_t icfg; 8507ec681f3Smrg if (src_slice->tiling == V3D_TILING_RASTER) { 8517ec681f3Smrg icfg = V3D_TFU_ICFG_FORMAT_RASTER; 8527ec681f3Smrg } else { 8537ec681f3Smrg icfg = V3D_TFU_ICFG_FORMAT_LINEARTILE + 8547ec681f3Smrg (src_slice->tiling - V3D_TILING_LINEARTILE); 8557ec681f3Smrg } 8567ec681f3Smrg tfu.icfg |= icfg << V3D_TFU_ICFG_FORMAT_SHIFT; 8577ec681f3Smrg 8587ec681f3Smrg const uint32_t dst_offset = 8597ec681f3Smrg dst_bo->offset + v3dv_layer_offset(dst, dst_mip_level, dst_layer); 8607ec681f3Smrg tfu.ioa |= dst_offset; 8617ec681f3Smrg 8627ec681f3Smrg tfu.ioa |= (V3D_TFU_IOA_FORMAT_LINEARTILE + 8637ec681f3Smrg (dst_slice->tiling - V3D_TILING_LINEARTILE)) << 8647ec681f3Smrg V3D_TFU_IOA_FORMAT_SHIFT; 8657ec681f3Smrg tfu.icfg |= format->tex_type << V3D_TFU_ICFG_TTYPE_SHIFT; 8667ec681f3Smrg 8677ec681f3Smrg switch (src_slice->tiling) { 8687ec681f3Smrg case V3D_TILING_UIF_NO_XOR: 8697ec681f3Smrg case V3D_TILING_UIF_XOR: 8707ec681f3Smrg tfu.iis |= src_slice->padded_height / (2 * v3d_utile_height(src->cpp)); 8717ec681f3Smrg break; 8727ec681f3Smrg case V3D_TILING_RASTER: 8737ec681f3Smrg tfu.iis |= src_slice->stride / src->cpp; 8747ec681f3Smrg break; 8757ec681f3Smrg default: 8767ec681f3Smrg break; 8777ec681f3Smrg } 8787ec681f3Smrg 8797ec681f3Smrg /* If we're writing level 0 (!IOA_DIMTW), then we need to supply the 8807ec681f3Smrg * OPAD field for the destination (how many extra UIF blocks beyond 8817ec681f3Smrg * those necessary to cover the height). 8827ec681f3Smrg */ 8837ec681f3Smrg if (dst_slice->tiling == V3D_TILING_UIF_NO_XOR || 8847ec681f3Smrg dst_slice->tiling == V3D_TILING_UIF_XOR) { 8857ec681f3Smrg uint32_t uif_block_h = 2 * v3d_utile_height(dst->cpp); 8867ec681f3Smrg uint32_t implicit_padded_height = align(height, uif_block_h); 8877ec681f3Smrg uint32_t icfg = 8887ec681f3Smrg (dst_slice->padded_height - implicit_padded_height) / uif_block_h; 8897ec681f3Smrg tfu.icfg |= icfg << V3D_TFU_ICFG_OPAD_SHIFT; 8907ec681f3Smrg } 8917ec681f3Smrg 8927ec681f3Smrg v3dv_cmd_buffer_add_tfu_job(cmd_buffer, &tfu); 8937ec681f3Smrg} 8947ec681f3Smrg 8957ec681f3Smrgstatic void 8967ec681f3Smrgemit_clear_image_layer_per_tile_list(struct v3dv_job *job, 8977ec681f3Smrg struct v3dv_meta_framebuffer *framebuffer, 8987ec681f3Smrg struct v3dv_image *image, 8997ec681f3Smrg VkImageAspectFlags aspects, 9007ec681f3Smrg uint32_t layer, 9017ec681f3Smrg uint32_t level) 9027ec681f3Smrg{ 9037ec681f3Smrg struct v3dv_cl *cl = &job->indirect; 9047ec681f3Smrg v3dv_cl_ensure_space(cl, 200, 1); 9057ec681f3Smrg v3dv_return_if_oom(NULL, job); 9067ec681f3Smrg 9077ec681f3Smrg struct v3dv_cl_reloc tile_list_start = v3dv_cl_get_address(cl); 9087ec681f3Smrg 9097ec681f3Smrg cl_emit(cl, TILE_COORDINATES_IMPLICIT, coords); 9107ec681f3Smrg 9117ec681f3Smrg cl_emit(cl, END_OF_LOADS, end); 9127ec681f3Smrg 9137ec681f3Smrg cl_emit(cl, BRANCH_TO_IMPLICIT_TILE_LIST, branch); 9147ec681f3Smrg 9157ec681f3Smrg emit_image_store(job->device, cl, framebuffer, image, aspects, 9167ec681f3Smrg layer, level, false, false, false); 9177ec681f3Smrg 9187ec681f3Smrg cl_emit(cl, END_OF_TILE_MARKER, end); 9197ec681f3Smrg 9207ec681f3Smrg cl_emit(cl, RETURN_FROM_SUB_LIST, ret); 9217ec681f3Smrg 9227ec681f3Smrg cl_emit(&job->rcl, START_ADDRESS_OF_GENERIC_TILE_LIST, branch) { 9237ec681f3Smrg branch.start = tile_list_start; 9247ec681f3Smrg branch.end = v3dv_cl_get_address(cl); 9257ec681f3Smrg } 9267ec681f3Smrg} 9277ec681f3Smrg 9287ec681f3Smrgstatic void 9297ec681f3Smrgemit_clear_image_layers(struct v3dv_job *job, 9307ec681f3Smrg struct v3dv_image *image, 9317ec681f3Smrg struct v3dv_meta_framebuffer *framebuffer, 9327ec681f3Smrg VkImageAspectFlags aspects, 9337ec681f3Smrg uint32_t min_layer, 9347ec681f3Smrg uint32_t max_layer, 9357ec681f3Smrg uint32_t level) 9367ec681f3Smrg{ 9377ec681f3Smrg for (uint32_t layer = min_layer; layer < max_layer; layer++) { 9387ec681f3Smrg emit_clear_image_layer_per_tile_list(job, framebuffer, image, aspects, 9397ec681f3Smrg layer, level); 9407ec681f3Smrg emit_supertile_coordinates(job, framebuffer); 9417ec681f3Smrg } 9427ec681f3Smrg} 9437ec681f3Smrg 9447ec681f3Smrgvoid 9457ec681f3Smrgv3dX(meta_emit_clear_image_rcl)(struct v3dv_job *job, 9467ec681f3Smrg struct v3dv_image *image, 9477ec681f3Smrg struct v3dv_meta_framebuffer *framebuffer, 9487ec681f3Smrg const union v3dv_clear_value *clear_value, 9497ec681f3Smrg VkImageAspectFlags aspects, 9507ec681f3Smrg uint32_t min_layer, 9517ec681f3Smrg uint32_t max_layer, 9527ec681f3Smrg uint32_t level) 9537ec681f3Smrg{ 9547ec681f3Smrg const struct rcl_clear_info clear_info = { 9557ec681f3Smrg .clear_value = clear_value, 9567ec681f3Smrg .image = image, 9577ec681f3Smrg .aspects = aspects, 9587ec681f3Smrg .level = level, 9597ec681f3Smrg }; 9607ec681f3Smrg 9617ec681f3Smrg struct v3dv_cl *rcl = emit_rcl_prologue(job, framebuffer, &clear_info); 9627ec681f3Smrg v3dv_return_if_oom(NULL, job); 9637ec681f3Smrg 9647ec681f3Smrg emit_frame_setup(job, 0, clear_value); 9657ec681f3Smrg emit_clear_image_layers(job, image, framebuffer, aspects, 9667ec681f3Smrg min_layer, max_layer, level); 9677ec681f3Smrg cl_emit(rcl, END_OF_RENDERING, end); 9687ec681f3Smrg} 9697ec681f3Smrg 9707ec681f3Smrgstatic void 9717ec681f3Smrgemit_fill_buffer_per_tile_list(struct v3dv_job *job, 9727ec681f3Smrg struct v3dv_bo *bo, 9737ec681f3Smrg uint32_t offset, 9747ec681f3Smrg uint32_t stride) 9757ec681f3Smrg{ 9767ec681f3Smrg struct v3dv_cl *cl = &job->indirect; 9777ec681f3Smrg v3dv_cl_ensure_space(cl, 200, 1); 9787ec681f3Smrg v3dv_return_if_oom(NULL, job); 9797ec681f3Smrg 9807ec681f3Smrg struct v3dv_cl_reloc tile_list_start = v3dv_cl_get_address(cl); 9817ec681f3Smrg 9827ec681f3Smrg cl_emit(cl, TILE_COORDINATES_IMPLICIT, coords); 9837ec681f3Smrg 9847ec681f3Smrg cl_emit(cl, END_OF_LOADS, end); 9857ec681f3Smrg 9867ec681f3Smrg cl_emit(cl, BRANCH_TO_IMPLICIT_TILE_LIST, branch); 9877ec681f3Smrg 9887ec681f3Smrg emit_linear_store(cl, RENDER_TARGET_0, bo, offset, stride, false, 9897ec681f3Smrg V3D_OUTPUT_IMAGE_FORMAT_RGBA8UI); 9907ec681f3Smrg 9917ec681f3Smrg cl_emit(cl, END_OF_TILE_MARKER, end); 9927ec681f3Smrg 9937ec681f3Smrg cl_emit(cl, RETURN_FROM_SUB_LIST, ret); 9947ec681f3Smrg 9957ec681f3Smrg cl_emit(&job->rcl, START_ADDRESS_OF_GENERIC_TILE_LIST, branch) { 9967ec681f3Smrg branch.start = tile_list_start; 9977ec681f3Smrg branch.end = v3dv_cl_get_address(cl); 9987ec681f3Smrg } 9997ec681f3Smrg} 10007ec681f3Smrg 10017ec681f3Smrgstatic void 10027ec681f3Smrgemit_fill_buffer(struct v3dv_job *job, 10037ec681f3Smrg struct v3dv_bo *bo, 10047ec681f3Smrg uint32_t offset, 10057ec681f3Smrg struct v3dv_meta_framebuffer *framebuffer) 10067ec681f3Smrg{ 10077ec681f3Smrg const uint32_t stride = job->frame_tiling.width * 4; 10087ec681f3Smrg emit_fill_buffer_per_tile_list(job, bo, offset, stride); 10097ec681f3Smrg emit_supertile_coordinates(job, framebuffer); 10107ec681f3Smrg} 10117ec681f3Smrg 10127ec681f3Smrgvoid 10137ec681f3Smrgv3dX(meta_emit_fill_buffer_rcl)(struct v3dv_job *job, 10147ec681f3Smrg struct v3dv_bo *bo, 10157ec681f3Smrg uint32_t offset, 10167ec681f3Smrg struct v3dv_meta_framebuffer *framebuffer, 10177ec681f3Smrg uint32_t data) 10187ec681f3Smrg{ 10197ec681f3Smrg const union v3dv_clear_value clear_value = { 10207ec681f3Smrg .color = { data, 0, 0, 0 }, 10217ec681f3Smrg }; 10227ec681f3Smrg 10237ec681f3Smrg const struct rcl_clear_info clear_info = { 10247ec681f3Smrg .clear_value = &clear_value, 10257ec681f3Smrg .image = NULL, 10267ec681f3Smrg .aspects = VK_IMAGE_ASPECT_COLOR_BIT, 10277ec681f3Smrg .level = 0, 10287ec681f3Smrg }; 10297ec681f3Smrg 10307ec681f3Smrg struct v3dv_cl *rcl = emit_rcl_prologue(job, framebuffer, &clear_info); 10317ec681f3Smrg v3dv_return_if_oom(NULL, job); 10327ec681f3Smrg 10337ec681f3Smrg emit_frame_setup(job, 0, &clear_value); 10347ec681f3Smrg emit_fill_buffer(job, bo, offset, framebuffer); 10357ec681f3Smrg cl_emit(rcl, END_OF_RENDERING, end); 10367ec681f3Smrg} 10377ec681f3Smrg 10387ec681f3Smrg 10397ec681f3Smrgstatic void 10407ec681f3Smrgemit_copy_buffer_to_layer_per_tile_list(struct v3dv_job *job, 10417ec681f3Smrg struct v3dv_meta_framebuffer *framebuffer, 10427ec681f3Smrg struct v3dv_image *image, 10437ec681f3Smrg struct v3dv_buffer *buffer, 10447ec681f3Smrg uint32_t layer, 10457ec681f3Smrg const VkBufferImageCopy2KHR *region) 10467ec681f3Smrg{ 10477ec681f3Smrg struct v3dv_cl *cl = &job->indirect; 10487ec681f3Smrg v3dv_cl_ensure_space(cl, 200, 1); 10497ec681f3Smrg v3dv_return_if_oom(NULL, job); 10507ec681f3Smrg 10517ec681f3Smrg struct v3dv_cl_reloc tile_list_start = v3dv_cl_get_address(cl); 10527ec681f3Smrg 10537ec681f3Smrg cl_emit(cl, TILE_COORDINATES_IMPLICIT, coords); 10547ec681f3Smrg 10557ec681f3Smrg const VkImageSubresourceLayers *imgrsc = ®ion->imageSubresource; 10567ec681f3Smrg assert((image->vk.image_type != VK_IMAGE_TYPE_3D && layer < imgrsc->layerCount) || 10577ec681f3Smrg layer < image->vk.extent.depth); 10587ec681f3Smrg 10597ec681f3Smrg /* Load TLB from buffer */ 10607ec681f3Smrg uint32_t width, height; 10617ec681f3Smrg if (region->bufferRowLength == 0) 10627ec681f3Smrg width = region->imageExtent.width; 10637ec681f3Smrg else 10647ec681f3Smrg width = region->bufferRowLength; 10657ec681f3Smrg 10667ec681f3Smrg if (region->bufferImageHeight == 0) 10677ec681f3Smrg height = region->imageExtent.height; 10687ec681f3Smrg else 10697ec681f3Smrg height = region->bufferImageHeight; 10707ec681f3Smrg 10717ec681f3Smrg /* Handle copy to compressed format using a compatible format */ 10727ec681f3Smrg width = DIV_ROUND_UP(width, vk_format_get_blockwidth(image->vk.format)); 10737ec681f3Smrg height = DIV_ROUND_UP(height, vk_format_get_blockheight(image->vk.format)); 10747ec681f3Smrg 10757ec681f3Smrg uint32_t cpp = imgrsc->aspectMask & VK_IMAGE_ASPECT_STENCIL_BIT ? 10767ec681f3Smrg 1 : image->cpp; 10777ec681f3Smrg uint32_t buffer_stride = width * cpp; 10787ec681f3Smrg uint32_t buffer_offset = 10797ec681f3Smrg buffer->mem_offset + region->bufferOffset + height * buffer_stride * layer; 10807ec681f3Smrg 10817ec681f3Smrg uint32_t format = choose_tlb_format(framebuffer, imgrsc->aspectMask, 10827ec681f3Smrg false, false, true); 10837ec681f3Smrg 10847ec681f3Smrg emit_linear_load(cl, RENDER_TARGET_0, buffer->mem->bo, 10857ec681f3Smrg buffer_offset, buffer_stride, format); 10867ec681f3Smrg 10877ec681f3Smrg /* Because we can't do raster loads/stores of Z/S formats we need to 10887ec681f3Smrg * use a color tile buffer with a compatible RGBA color format instead. 10897ec681f3Smrg * However, when we are uploading a single aspect to a combined 10907ec681f3Smrg * depth/stencil image we have the problem that our tile buffer stores don't 10917ec681f3Smrg * allow us to mask out the other aspect, so we always write all four RGBA 10927ec681f3Smrg * channels to the image and we end up overwriting that other aspect with 10937ec681f3Smrg * undefined values. To work around that, we first load the aspect we are 10947ec681f3Smrg * not copying from the image memory into a proper Z/S tile buffer. Then we 10957ec681f3Smrg * do our store from the color buffer for the aspect we are copying, and 10967ec681f3Smrg * after that, we do another store from the Z/S tile buffer to restore the 10977ec681f3Smrg * other aspect to its original value. 10987ec681f3Smrg */ 10997ec681f3Smrg if (framebuffer->vk_format == VK_FORMAT_D24_UNORM_S8_UINT) { 11007ec681f3Smrg if (imgrsc->aspectMask & VK_IMAGE_ASPECT_DEPTH_BIT) { 11017ec681f3Smrg emit_image_load(job->device, cl, framebuffer, image, 11027ec681f3Smrg VK_IMAGE_ASPECT_STENCIL_BIT, 11037ec681f3Smrg imgrsc->baseArrayLayer + layer, imgrsc->mipLevel, 11047ec681f3Smrg false, false); 11057ec681f3Smrg } else { 11067ec681f3Smrg assert(imgrsc->aspectMask & VK_IMAGE_ASPECT_STENCIL_BIT); 11077ec681f3Smrg emit_image_load(job->device, cl, framebuffer, image, 11087ec681f3Smrg VK_IMAGE_ASPECT_DEPTH_BIT, 11097ec681f3Smrg imgrsc->baseArrayLayer + layer, imgrsc->mipLevel, 11107ec681f3Smrg false, false); 11117ec681f3Smrg } 11127ec681f3Smrg } 11137ec681f3Smrg 11147ec681f3Smrg cl_emit(cl, END_OF_LOADS, end); 11157ec681f3Smrg 11167ec681f3Smrg cl_emit(cl, BRANCH_TO_IMPLICIT_TILE_LIST, branch); 11177ec681f3Smrg 11187ec681f3Smrg /* Store TLB to image */ 11197ec681f3Smrg emit_image_store(job->device, cl, framebuffer, image, imgrsc->aspectMask, 11207ec681f3Smrg imgrsc->baseArrayLayer + layer, imgrsc->mipLevel, 11217ec681f3Smrg false, true, false); 11227ec681f3Smrg 11237ec681f3Smrg if (framebuffer->vk_format == VK_FORMAT_D24_UNORM_S8_UINT) { 11247ec681f3Smrg if (imgrsc->aspectMask & VK_IMAGE_ASPECT_DEPTH_BIT) { 11257ec681f3Smrg emit_image_store(job->device, cl, framebuffer, image, 11267ec681f3Smrg VK_IMAGE_ASPECT_STENCIL_BIT, 11277ec681f3Smrg imgrsc->baseArrayLayer + layer, imgrsc->mipLevel, 11287ec681f3Smrg false, false, false); 11297ec681f3Smrg } else { 11307ec681f3Smrg assert(imgrsc->aspectMask & VK_IMAGE_ASPECT_STENCIL_BIT); 11317ec681f3Smrg emit_image_store(job->device, cl, framebuffer, image, 11327ec681f3Smrg VK_IMAGE_ASPECT_DEPTH_BIT, 11337ec681f3Smrg imgrsc->baseArrayLayer + layer, imgrsc->mipLevel, 11347ec681f3Smrg false, false, false); 11357ec681f3Smrg } 11367ec681f3Smrg } 11377ec681f3Smrg 11387ec681f3Smrg cl_emit(cl, END_OF_TILE_MARKER, end); 11397ec681f3Smrg 11407ec681f3Smrg cl_emit(cl, RETURN_FROM_SUB_LIST, ret); 11417ec681f3Smrg 11427ec681f3Smrg cl_emit(&job->rcl, START_ADDRESS_OF_GENERIC_TILE_LIST, branch) { 11437ec681f3Smrg branch.start = tile_list_start; 11447ec681f3Smrg branch.end = v3dv_cl_get_address(cl); 11457ec681f3Smrg } 11467ec681f3Smrg} 11477ec681f3Smrg 11487ec681f3Smrgstatic void 11497ec681f3Smrgemit_copy_buffer_to_layer(struct v3dv_job *job, 11507ec681f3Smrg struct v3dv_image *image, 11517ec681f3Smrg struct v3dv_buffer *buffer, 11527ec681f3Smrg struct v3dv_meta_framebuffer *framebuffer, 11537ec681f3Smrg uint32_t layer, 11547ec681f3Smrg const VkBufferImageCopy2KHR *region) 11557ec681f3Smrg{ 11567ec681f3Smrg emit_copy_buffer_to_layer_per_tile_list(job, framebuffer, image, buffer, 11577ec681f3Smrg layer, region); 11587ec681f3Smrg emit_supertile_coordinates(job, framebuffer); 11597ec681f3Smrg} 11607ec681f3Smrg 11617ec681f3Smrgvoid 11627ec681f3Smrgv3dX(meta_emit_copy_buffer_to_image_rcl)(struct v3dv_job *job, 11637ec681f3Smrg struct v3dv_image *image, 11647ec681f3Smrg struct v3dv_buffer *buffer, 11657ec681f3Smrg struct v3dv_meta_framebuffer *framebuffer, 11667ec681f3Smrg const VkBufferImageCopy2KHR *region) 11677ec681f3Smrg{ 11687ec681f3Smrg struct v3dv_cl *rcl = emit_rcl_prologue(job, framebuffer, NULL); 11697ec681f3Smrg v3dv_return_if_oom(NULL, job); 11707ec681f3Smrg 11717ec681f3Smrg emit_frame_setup(job, 0, NULL); 11727ec681f3Smrg for (int layer = 0; layer < job->frame_tiling.layers; layer++) 11737ec681f3Smrg emit_copy_buffer_to_layer(job, image, buffer, framebuffer, layer, region); 11747ec681f3Smrg cl_emit(rcl, END_OF_RENDERING, end); 11757ec681f3Smrg} 11767ec681f3Smrg 11777ec681f3Smrg/* Figure out a TLB size configuration for a number of pixels to process. 11787ec681f3Smrg * Beware that we can't "render" more than 4096x4096 pixels in a single job, 11797ec681f3Smrg * if the pixel count is larger than this, the caller might need to split 11807ec681f3Smrg * the job and call this function multiple times. 11817ec681f3Smrg */ 11827ec681f3Smrgstatic void 11837ec681f3Smrgframebuffer_size_for_pixel_count(uint32_t num_pixels, 11847ec681f3Smrg uint32_t *width, 11857ec681f3Smrg uint32_t *height) 11867ec681f3Smrg{ 11877ec681f3Smrg assert(num_pixels > 0); 11887ec681f3Smrg 11897ec681f3Smrg const uint32_t max_dim_pixels = 4096; 11907ec681f3Smrg const uint32_t max_pixels = max_dim_pixels * max_dim_pixels; 11917ec681f3Smrg 11927ec681f3Smrg uint32_t w, h; 11937ec681f3Smrg if (num_pixels > max_pixels) { 11947ec681f3Smrg w = max_dim_pixels; 11957ec681f3Smrg h = max_dim_pixels; 11967ec681f3Smrg } else { 11977ec681f3Smrg w = num_pixels; 11987ec681f3Smrg h = 1; 11997ec681f3Smrg while (w > max_dim_pixels || ((w % 2) == 0 && w > 2 * h)) { 12007ec681f3Smrg w >>= 1; 12017ec681f3Smrg h <<= 1; 12027ec681f3Smrg } 12037ec681f3Smrg } 12047ec681f3Smrg assert(w <= max_dim_pixels && h <= max_dim_pixels); 12057ec681f3Smrg assert(w * h <= num_pixels); 12067ec681f3Smrg assert(w > 0 && h > 0); 12077ec681f3Smrg 12087ec681f3Smrg *width = w; 12097ec681f3Smrg *height = h; 12107ec681f3Smrg} 12117ec681f3Smrg 12127ec681f3Smrgstruct v3dv_job * 12137ec681f3Smrgv3dX(meta_copy_buffer)(struct v3dv_cmd_buffer *cmd_buffer, 12147ec681f3Smrg struct v3dv_bo *dst, 12157ec681f3Smrg uint32_t dst_offset, 12167ec681f3Smrg struct v3dv_bo *src, 12177ec681f3Smrg uint32_t src_offset, 12187ec681f3Smrg const VkBufferCopy2KHR *region) 12197ec681f3Smrg{ 12207ec681f3Smrg const uint32_t internal_bpp = V3D_INTERNAL_BPP_32; 12217ec681f3Smrg const uint32_t internal_type = V3D_INTERNAL_TYPE_8UI; 12227ec681f3Smrg 12237ec681f3Smrg /* Select appropriate pixel format for the copy operation based on the 12247ec681f3Smrg * size to copy and the alignment of the source and destination offsets. 12257ec681f3Smrg */ 12267ec681f3Smrg src_offset += region->srcOffset; 12277ec681f3Smrg dst_offset += region->dstOffset; 12287ec681f3Smrg uint32_t item_size = 4; 12297ec681f3Smrg while (item_size > 1 && 12307ec681f3Smrg (src_offset % item_size != 0 || dst_offset % item_size != 0)) { 12317ec681f3Smrg item_size /= 2; 12327ec681f3Smrg } 12337ec681f3Smrg 12347ec681f3Smrg while (item_size > 1 && region->size % item_size != 0) 12357ec681f3Smrg item_size /= 2; 12367ec681f3Smrg 12377ec681f3Smrg assert(region->size % item_size == 0); 12387ec681f3Smrg uint32_t num_items = region->size / item_size; 12397ec681f3Smrg assert(num_items > 0); 12407ec681f3Smrg 12417ec681f3Smrg uint32_t format; 12427ec681f3Smrg VkFormat vk_format; 12437ec681f3Smrg switch (item_size) { 12447ec681f3Smrg case 4: 12457ec681f3Smrg format = V3D_OUTPUT_IMAGE_FORMAT_RGBA8UI; 12467ec681f3Smrg vk_format = VK_FORMAT_R8G8B8A8_UINT; 12477ec681f3Smrg break; 12487ec681f3Smrg case 2: 12497ec681f3Smrg format = V3D_OUTPUT_IMAGE_FORMAT_RG8UI; 12507ec681f3Smrg vk_format = VK_FORMAT_R8G8_UINT; 12517ec681f3Smrg break; 12527ec681f3Smrg default: 12537ec681f3Smrg format = V3D_OUTPUT_IMAGE_FORMAT_R8UI; 12547ec681f3Smrg vk_format = VK_FORMAT_R8_UINT; 12557ec681f3Smrg break; 12567ec681f3Smrg } 12577ec681f3Smrg 12587ec681f3Smrg struct v3dv_job *job = NULL; 12597ec681f3Smrg while (num_items > 0) { 12607ec681f3Smrg job = v3dv_cmd_buffer_start_job(cmd_buffer, -1, V3DV_JOB_TYPE_GPU_CL); 12617ec681f3Smrg if (!job) 12627ec681f3Smrg return NULL; 12637ec681f3Smrg 12647ec681f3Smrg uint32_t width, height; 12657ec681f3Smrg framebuffer_size_for_pixel_count(num_items, &width, &height); 12667ec681f3Smrg 12677ec681f3Smrg v3dv_job_start_frame(job, width, height, 1, true, 1, internal_bpp, false); 12687ec681f3Smrg 12697ec681f3Smrg struct v3dv_meta_framebuffer framebuffer; 12707ec681f3Smrg v3dX(meta_framebuffer_init)(&framebuffer, vk_format, internal_type, 12717ec681f3Smrg &job->frame_tiling); 12727ec681f3Smrg 12737ec681f3Smrg v3dX(job_emit_binning_flush)(job); 12747ec681f3Smrg 12757ec681f3Smrg v3dX(meta_emit_copy_buffer_rcl)(job, dst, src, dst_offset, src_offset, 12767ec681f3Smrg &framebuffer, format, item_size); 12777ec681f3Smrg 12787ec681f3Smrg v3dv_cmd_buffer_finish_job(cmd_buffer); 12797ec681f3Smrg 12807ec681f3Smrg const uint32_t items_copied = width * height; 12817ec681f3Smrg const uint32_t bytes_copied = items_copied * item_size; 12827ec681f3Smrg num_items -= items_copied; 12837ec681f3Smrg src_offset += bytes_copied; 12847ec681f3Smrg dst_offset += bytes_copied; 12857ec681f3Smrg } 12867ec681f3Smrg 12877ec681f3Smrg return job; 12887ec681f3Smrg} 12897ec681f3Smrg 12907ec681f3Smrgvoid 12917ec681f3Smrgv3dX(meta_fill_buffer)(struct v3dv_cmd_buffer *cmd_buffer, 12927ec681f3Smrg struct v3dv_bo *bo, 12937ec681f3Smrg uint32_t offset, 12947ec681f3Smrg uint32_t size, 12957ec681f3Smrg uint32_t data) 12967ec681f3Smrg{ 12977ec681f3Smrg assert(size > 0 && size % 4 == 0); 12987ec681f3Smrg assert(offset + size <= bo->size); 12997ec681f3Smrg 13007ec681f3Smrg const uint32_t internal_bpp = V3D_INTERNAL_BPP_32; 13017ec681f3Smrg const uint32_t internal_type = V3D_INTERNAL_TYPE_8UI; 13027ec681f3Smrg uint32_t num_items = size / 4; 13037ec681f3Smrg 13047ec681f3Smrg while (num_items > 0) { 13057ec681f3Smrg struct v3dv_job *job = 13067ec681f3Smrg v3dv_cmd_buffer_start_job(cmd_buffer, -1, V3DV_JOB_TYPE_GPU_CL); 13077ec681f3Smrg if (!job) 13087ec681f3Smrg return; 13097ec681f3Smrg 13107ec681f3Smrg uint32_t width, height; 13117ec681f3Smrg framebuffer_size_for_pixel_count(num_items, &width, &height); 13127ec681f3Smrg 13137ec681f3Smrg v3dv_job_start_frame(job, width, height, 1, true, 1, internal_bpp, false); 13147ec681f3Smrg 13157ec681f3Smrg struct v3dv_meta_framebuffer framebuffer; 13167ec681f3Smrg v3dX(meta_framebuffer_init)(&framebuffer, VK_FORMAT_R8G8B8A8_UINT, 13177ec681f3Smrg internal_type, &job->frame_tiling); 13187ec681f3Smrg 13197ec681f3Smrg v3dX(job_emit_binning_flush)(job); 13207ec681f3Smrg 13217ec681f3Smrg v3dX(meta_emit_fill_buffer_rcl)(job, bo, offset, &framebuffer, data); 13227ec681f3Smrg 13237ec681f3Smrg v3dv_cmd_buffer_finish_job(cmd_buffer); 13247ec681f3Smrg 13257ec681f3Smrg const uint32_t items_copied = width * height; 13267ec681f3Smrg const uint32_t bytes_copied = items_copied * 4; 13277ec681f3Smrg num_items -= items_copied; 13287ec681f3Smrg offset += bytes_copied; 13297ec681f3Smrg } 13307ec681f3Smrg} 13317ec681f3Smrg 13327ec681f3Smrgvoid 13337ec681f3Smrgv3dX(meta_framebuffer_init)(struct v3dv_meta_framebuffer *fb, 13347ec681f3Smrg VkFormat vk_format, 13357ec681f3Smrg uint32_t internal_type, 13367ec681f3Smrg const struct v3dv_frame_tiling *tiling) 13377ec681f3Smrg{ 13387ec681f3Smrg fb->internal_type = internal_type; 13397ec681f3Smrg 13407ec681f3Smrg /* Supertile coverage always starts at 0,0 */ 13417ec681f3Smrg uint32_t supertile_w_in_pixels = 13427ec681f3Smrg tiling->tile_width * tiling->supertile_width; 13437ec681f3Smrg uint32_t supertile_h_in_pixels = 13447ec681f3Smrg tiling->tile_height * tiling->supertile_height; 13457ec681f3Smrg 13467ec681f3Smrg fb->min_x_supertile = 0; 13477ec681f3Smrg fb->min_y_supertile = 0; 13487ec681f3Smrg fb->max_x_supertile = (tiling->width - 1) / supertile_w_in_pixels; 13497ec681f3Smrg fb->max_y_supertile = (tiling->height - 1) / supertile_h_in_pixels; 13507ec681f3Smrg 13517ec681f3Smrg fb->vk_format = vk_format; 13527ec681f3Smrg fb->format = v3dX(get_format)(vk_format); 13537ec681f3Smrg 13547ec681f3Smrg fb->internal_depth_type = V3D_INTERNAL_TYPE_DEPTH_32F; 13557ec681f3Smrg if (vk_format_is_depth_or_stencil(vk_format)) 13567ec681f3Smrg fb->internal_depth_type = v3dX(get_internal_depth_type)(vk_format); 13577ec681f3Smrg} 1358