17ec681f3Smrg/*
27ec681f3Smrg * Copyright © 2021 Raspberry Pi
37ec681f3Smrg *
47ec681f3Smrg * Permission is hereby granted, free of charge, to any person obtaining a
57ec681f3Smrg * copy of this software and associated documentation files (the "Software"),
67ec681f3Smrg * to deal in the Software without restriction, including without limitation
77ec681f3Smrg * the rights to use, copy, modify, merge, publish, distribute, sublicense,
87ec681f3Smrg * and/or sell copies of the Software, and to permit persons to whom the
97ec681f3Smrg * Software is furnished to do so, subject to the following conditions:
107ec681f3Smrg *
117ec681f3Smrg * The above copyright notice and this permission notice (including the next
127ec681f3Smrg * paragraph) shall be included in all copies or substantial portions of the
137ec681f3Smrg * Software.
147ec681f3Smrg *
157ec681f3Smrg * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
167ec681f3Smrg * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
177ec681f3Smrg * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT.  IN NO EVENT SHALL
187ec681f3Smrg * THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
197ec681f3Smrg * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING
207ec681f3Smrg * FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS
217ec681f3Smrg * IN THE SOFTWARE.
227ec681f3Smrg */
237ec681f3Smrg
247ec681f3Smrg#include "v3dv_private.h"
257ec681f3Smrg#include "v3dv_meta_common.h"
267ec681f3Smrg
277ec681f3Smrg#include "broadcom/common/v3d_macros.h"
287ec681f3Smrg#include "broadcom/cle/v3dx_pack.h"
297ec681f3Smrg#include "broadcom/compiler/v3d_compiler.h"
307ec681f3Smrg
317ec681f3Smrg#include "vk_format_info.h"
327ec681f3Smrg
337ec681f3Smrgstruct rcl_clear_info {
347ec681f3Smrg   const union v3dv_clear_value *clear_value;
357ec681f3Smrg   struct v3dv_image *image;
367ec681f3Smrg   VkImageAspectFlags aspects;
377ec681f3Smrg   uint32_t level;
387ec681f3Smrg};
397ec681f3Smrg
407ec681f3Smrgstatic struct v3dv_cl *
417ec681f3Smrgemit_rcl_prologue(struct v3dv_job *job,
427ec681f3Smrg                  struct v3dv_meta_framebuffer *fb,
437ec681f3Smrg                  const struct rcl_clear_info *clear_info)
447ec681f3Smrg{
457ec681f3Smrg   const struct v3dv_frame_tiling *tiling = &job->frame_tiling;
467ec681f3Smrg
477ec681f3Smrg   struct v3dv_cl *rcl = &job->rcl;
487ec681f3Smrg   v3dv_cl_ensure_space_with_branch(rcl, 200 +
497ec681f3Smrg                                    tiling->layers * 256 *
507ec681f3Smrg                                    cl_packet_length(SUPERTILE_COORDINATES));
517ec681f3Smrg   if (job->cmd_buffer->state.oom)
527ec681f3Smrg      return NULL;
537ec681f3Smrg
547ec681f3Smrg   cl_emit(rcl, TILE_RENDERING_MODE_CFG_COMMON, config) {
557ec681f3Smrg      config.early_z_disable = true;
567ec681f3Smrg      config.image_width_pixels = tiling->width;
577ec681f3Smrg      config.image_height_pixels = tiling->height;
587ec681f3Smrg      config.number_of_render_targets = 1;
597ec681f3Smrg      config.multisample_mode_4x = tiling->msaa;
607ec681f3Smrg      config.maximum_bpp_of_all_render_targets = tiling->internal_bpp;
617ec681f3Smrg      config.internal_depth_type = fb->internal_depth_type;
627ec681f3Smrg   }
637ec681f3Smrg
647ec681f3Smrg   if (clear_info && (clear_info->aspects & VK_IMAGE_ASPECT_COLOR_BIT)) {
657ec681f3Smrg      uint32_t clear_pad = 0;
667ec681f3Smrg      if (clear_info->image) {
677ec681f3Smrg         const struct v3dv_image *image = clear_info->image;
687ec681f3Smrg         const struct v3d_resource_slice *slice =
697ec681f3Smrg            &image->slices[clear_info->level];
707ec681f3Smrg         if (slice->tiling == V3D_TILING_UIF_NO_XOR ||
717ec681f3Smrg             slice->tiling == V3D_TILING_UIF_XOR) {
727ec681f3Smrg            int uif_block_height = v3d_utile_height(image->cpp) * 2;
737ec681f3Smrg
747ec681f3Smrg            uint32_t implicit_padded_height =
757ec681f3Smrg               align(tiling->height, uif_block_height) / uif_block_height;
767ec681f3Smrg
777ec681f3Smrg            if (slice->padded_height_of_output_image_in_uif_blocks -
787ec681f3Smrg                implicit_padded_height >= 15) {
797ec681f3Smrg               clear_pad = slice->padded_height_of_output_image_in_uif_blocks;
807ec681f3Smrg            }
817ec681f3Smrg         }
827ec681f3Smrg      }
837ec681f3Smrg
847ec681f3Smrg      const uint32_t *color = &clear_info->clear_value->color[0];
857ec681f3Smrg      cl_emit(rcl, TILE_RENDERING_MODE_CFG_CLEAR_COLORS_PART1, clear) {
867ec681f3Smrg         clear.clear_color_low_32_bits = color[0];
877ec681f3Smrg         clear.clear_color_next_24_bits = color[1] & 0x00ffffff;
887ec681f3Smrg         clear.render_target_number = 0;
897ec681f3Smrg      };
907ec681f3Smrg
917ec681f3Smrg      if (tiling->internal_bpp >= V3D_INTERNAL_BPP_64) {
927ec681f3Smrg         cl_emit(rcl, TILE_RENDERING_MODE_CFG_CLEAR_COLORS_PART2, clear) {
937ec681f3Smrg            clear.clear_color_mid_low_32_bits =
947ec681f3Smrg              ((color[1] >> 24) | (color[2] << 8));
957ec681f3Smrg            clear.clear_color_mid_high_24_bits =
967ec681f3Smrg              ((color[2] >> 24) | ((color[3] & 0xffff) << 8));
977ec681f3Smrg            clear.render_target_number = 0;
987ec681f3Smrg         };
997ec681f3Smrg      }
1007ec681f3Smrg
1017ec681f3Smrg      if (tiling->internal_bpp >= V3D_INTERNAL_BPP_128 || clear_pad) {
1027ec681f3Smrg         cl_emit(rcl, TILE_RENDERING_MODE_CFG_CLEAR_COLORS_PART3, clear) {
1037ec681f3Smrg            clear.uif_padded_height_in_uif_blocks = clear_pad;
1047ec681f3Smrg            clear.clear_color_high_16_bits = color[3] >> 16;
1057ec681f3Smrg            clear.render_target_number = 0;
1067ec681f3Smrg         };
1077ec681f3Smrg      }
1087ec681f3Smrg   }
1097ec681f3Smrg
1107ec681f3Smrg   cl_emit(rcl, TILE_RENDERING_MODE_CFG_COLOR, rt) {
1117ec681f3Smrg      rt.render_target_0_internal_bpp = tiling->internal_bpp;
1127ec681f3Smrg      rt.render_target_0_internal_type = fb->internal_type;
1137ec681f3Smrg      rt.render_target_0_clamp = V3D_RENDER_TARGET_CLAMP_NONE;
1147ec681f3Smrg   }
1157ec681f3Smrg
1167ec681f3Smrg   cl_emit(rcl, TILE_RENDERING_MODE_CFG_ZS_CLEAR_VALUES, clear) {
1177ec681f3Smrg      clear.z_clear_value = clear_info ? clear_info->clear_value->z : 1.0f;
1187ec681f3Smrg      clear.stencil_clear_value = clear_info ? clear_info->clear_value->s : 0;
1197ec681f3Smrg   };
1207ec681f3Smrg
1217ec681f3Smrg   cl_emit(rcl, TILE_LIST_INITIAL_BLOCK_SIZE, init) {
1227ec681f3Smrg      init.use_auto_chained_tile_lists = true;
1237ec681f3Smrg      init.size_of_first_block_in_chained_tile_lists =
1247ec681f3Smrg         TILE_ALLOCATION_BLOCK_SIZE_64B;
1257ec681f3Smrg   }
1267ec681f3Smrg
1277ec681f3Smrg   return rcl;
1287ec681f3Smrg}
1297ec681f3Smrg
1307ec681f3Smrgstatic void
1317ec681f3Smrgemit_frame_setup(struct v3dv_job *job,
1327ec681f3Smrg                 uint32_t min_layer,
1337ec681f3Smrg                 const union v3dv_clear_value *clear_value)
1347ec681f3Smrg{
1357ec681f3Smrg   v3dv_return_if_oom(NULL, job);
1367ec681f3Smrg
1377ec681f3Smrg   const struct v3dv_frame_tiling *tiling = &job->frame_tiling;
1387ec681f3Smrg
1397ec681f3Smrg   struct v3dv_cl *rcl = &job->rcl;
1407ec681f3Smrg
1417ec681f3Smrg   const uint32_t tile_alloc_offset =
1427ec681f3Smrg      64 * min_layer * tiling->draw_tiles_x * tiling->draw_tiles_y;
1437ec681f3Smrg   cl_emit(rcl, MULTICORE_RENDERING_TILE_LIST_SET_BASE, list) {
1447ec681f3Smrg      list.address = v3dv_cl_address(job->tile_alloc, tile_alloc_offset);
1457ec681f3Smrg   }
1467ec681f3Smrg
1477ec681f3Smrg   cl_emit(rcl, MULTICORE_RENDERING_SUPERTILE_CFG, config) {
1487ec681f3Smrg      config.number_of_bin_tile_lists = 1;
1497ec681f3Smrg      config.total_frame_width_in_tiles = tiling->draw_tiles_x;
1507ec681f3Smrg      config.total_frame_height_in_tiles = tiling->draw_tiles_y;
1517ec681f3Smrg
1527ec681f3Smrg      config.supertile_width_in_tiles = tiling->supertile_width;
1537ec681f3Smrg      config.supertile_height_in_tiles = tiling->supertile_height;
1547ec681f3Smrg
1557ec681f3Smrg      config.total_frame_width_in_supertiles =
1567ec681f3Smrg         tiling->frame_width_in_supertiles;
1577ec681f3Smrg      config.total_frame_height_in_supertiles =
1587ec681f3Smrg         tiling->frame_height_in_supertiles;
1597ec681f3Smrg   }
1607ec681f3Smrg
1617ec681f3Smrg   /* Implement GFXH-1742 workaround. Also, if we are clearing we have to do
1627ec681f3Smrg    * it here.
1637ec681f3Smrg    */
1647ec681f3Smrg   for (int i = 0; i < 2; i++) {
1657ec681f3Smrg      cl_emit(rcl, TILE_COORDINATES, coords);
1667ec681f3Smrg      cl_emit(rcl, END_OF_LOADS, end);
1677ec681f3Smrg      cl_emit(rcl, STORE_TILE_BUFFER_GENERAL, store) {
1687ec681f3Smrg         store.buffer_to_store = NONE;
1697ec681f3Smrg      }
1707ec681f3Smrg      if (clear_value && i == 0) {
1717ec681f3Smrg         cl_emit(rcl, CLEAR_TILE_BUFFERS, clear) {
1727ec681f3Smrg            clear.clear_z_stencil_buffer = true;
1737ec681f3Smrg            clear.clear_all_render_targets = true;
1747ec681f3Smrg         }
1757ec681f3Smrg      }
1767ec681f3Smrg      cl_emit(rcl, END_OF_TILE_MARKER, end);
1777ec681f3Smrg   }
1787ec681f3Smrg
1797ec681f3Smrg   cl_emit(rcl, FLUSH_VCD_CACHE, flush);
1807ec681f3Smrg}
1817ec681f3Smrg
1827ec681f3Smrgstatic void
1837ec681f3Smrgemit_supertile_coordinates(struct v3dv_job *job,
1847ec681f3Smrg                           struct v3dv_meta_framebuffer *framebuffer)
1857ec681f3Smrg{
1867ec681f3Smrg   v3dv_return_if_oom(NULL, job);
1877ec681f3Smrg
1887ec681f3Smrg   struct v3dv_cl *rcl = &job->rcl;
1897ec681f3Smrg
1907ec681f3Smrg   const uint32_t min_y = framebuffer->min_y_supertile;
1917ec681f3Smrg   const uint32_t max_y = framebuffer->max_y_supertile;
1927ec681f3Smrg   const uint32_t min_x = framebuffer->min_x_supertile;
1937ec681f3Smrg   const uint32_t max_x = framebuffer->max_x_supertile;
1947ec681f3Smrg
1957ec681f3Smrg   for (int y = min_y; y <= max_y; y++) {
1967ec681f3Smrg      for (int x = min_x; x <= max_x; x++) {
1977ec681f3Smrg         cl_emit(rcl, SUPERTILE_COORDINATES, coords) {
1987ec681f3Smrg            coords.column_number_in_supertiles = x;
1997ec681f3Smrg            coords.row_number_in_supertiles = y;
2007ec681f3Smrg         }
2017ec681f3Smrg      }
2027ec681f3Smrg   }
2037ec681f3Smrg}
2047ec681f3Smrg
2057ec681f3Smrgstatic void
2067ec681f3Smrgemit_linear_load(struct v3dv_cl *cl,
2077ec681f3Smrg                 uint32_t buffer,
2087ec681f3Smrg                 struct v3dv_bo *bo,
2097ec681f3Smrg                 uint32_t offset,
2107ec681f3Smrg                 uint32_t stride,
2117ec681f3Smrg                 uint32_t format)
2127ec681f3Smrg{
2137ec681f3Smrg   cl_emit(cl, LOAD_TILE_BUFFER_GENERAL, load) {
2147ec681f3Smrg      load.buffer_to_load = buffer;
2157ec681f3Smrg      load.address = v3dv_cl_address(bo, offset);
2167ec681f3Smrg      load.input_image_format = format;
2177ec681f3Smrg      load.memory_format = V3D_TILING_RASTER;
2187ec681f3Smrg      load.height_in_ub_or_stride = stride;
2197ec681f3Smrg      load.decimate_mode = V3D_DECIMATE_MODE_SAMPLE_0;
2207ec681f3Smrg   }
2217ec681f3Smrg}
2227ec681f3Smrg
2237ec681f3Smrgstatic void
2247ec681f3Smrgemit_linear_store(struct v3dv_cl *cl,
2257ec681f3Smrg                  uint32_t buffer,
2267ec681f3Smrg                  struct v3dv_bo *bo,
2277ec681f3Smrg                  uint32_t offset,
2287ec681f3Smrg                  uint32_t stride,
2297ec681f3Smrg                  bool msaa,
2307ec681f3Smrg                  uint32_t format)
2317ec681f3Smrg{
2327ec681f3Smrg   cl_emit(cl, STORE_TILE_BUFFER_GENERAL, store) {
2337ec681f3Smrg      store.buffer_to_store = RENDER_TARGET_0;
2347ec681f3Smrg      store.address = v3dv_cl_address(bo, offset);
2357ec681f3Smrg      store.clear_buffer_being_stored = false;
2367ec681f3Smrg      store.output_image_format = format;
2377ec681f3Smrg      store.memory_format = V3D_TILING_RASTER;
2387ec681f3Smrg      store.height_in_ub_or_stride = stride;
2397ec681f3Smrg      store.decimate_mode = msaa ? V3D_DECIMATE_MODE_ALL_SAMPLES :
2407ec681f3Smrg                                   V3D_DECIMATE_MODE_SAMPLE_0;
2417ec681f3Smrg   }
2427ec681f3Smrg}
2437ec681f3Smrg
2447ec681f3Smrg/* This chooses a tile buffer format that is appropriate for the copy operation.
2457ec681f3Smrg * Typically, this is the image render target type, however, if we are copying
2467ec681f3Smrg * depth/stencil to/from a buffer the hardware can't do raster loads/stores, so
2477ec681f3Smrg * we need to load and store to/from a tile color buffer using a compatible
2487ec681f3Smrg * color format.
2497ec681f3Smrg */
2507ec681f3Smrgstatic uint32_t
2517ec681f3Smrgchoose_tlb_format(struct v3dv_meta_framebuffer *framebuffer,
2527ec681f3Smrg                  VkImageAspectFlags aspect,
2537ec681f3Smrg                  bool for_store,
2547ec681f3Smrg                  bool is_copy_to_buffer,
2557ec681f3Smrg                  bool is_copy_from_buffer)
2567ec681f3Smrg{
2577ec681f3Smrg   if (is_copy_to_buffer || is_copy_from_buffer) {
2587ec681f3Smrg      switch (framebuffer->vk_format) {
2597ec681f3Smrg      case VK_FORMAT_D16_UNORM:
2607ec681f3Smrg         return V3D_OUTPUT_IMAGE_FORMAT_R16UI;
2617ec681f3Smrg      case VK_FORMAT_D32_SFLOAT:
2627ec681f3Smrg         return V3D_OUTPUT_IMAGE_FORMAT_R32F;
2637ec681f3Smrg      case VK_FORMAT_X8_D24_UNORM_PACK32:
2647ec681f3Smrg         return V3D_OUTPUT_IMAGE_FORMAT_RGBA8UI;
2657ec681f3Smrg      case VK_FORMAT_D24_UNORM_S8_UINT:
2667ec681f3Smrg         /* When storing the stencil aspect of a combined depth/stencil image
2677ec681f3Smrg          * to a buffer, the Vulkan spec states that the output buffer must
2687ec681f3Smrg          * have packed stencil values, so we choose an R8UI format for our
2697ec681f3Smrg          * store outputs. For the load input we still want RGBA8UI since the
2707ec681f3Smrg          * source image contains 4 channels (including the 3 channels
2717ec681f3Smrg          * containing the 24-bit depth value).
2727ec681f3Smrg          *
2737ec681f3Smrg          * When loading the stencil aspect of a combined depth/stencil image
2747ec681f3Smrg          * from a buffer, we read packed 8-bit stencil values from the buffer
2757ec681f3Smrg          * that we need to put into the LSB of the 32-bit format (the R
2767ec681f3Smrg          * channel), so we use R8UI. For the store, if we used R8UI then we
2777ec681f3Smrg          * would write 8-bit stencil values consecutively over depth channels,
2787ec681f3Smrg          * so we need to use RGBA8UI. This will write each stencil value in
2797ec681f3Smrg          * its correct position, but will overwrite depth values (channels G
2807ec681f3Smrg          * B,A) with undefined values. To fix this,  we will have to restore
2817ec681f3Smrg          * the depth aspect from the Z tile buffer, which we should pre-load
2827ec681f3Smrg          * from the image before the store).
2837ec681f3Smrg          */
2847ec681f3Smrg         if (aspect & VK_IMAGE_ASPECT_DEPTH_BIT) {
2857ec681f3Smrg            return V3D_OUTPUT_IMAGE_FORMAT_RGBA8UI;
2867ec681f3Smrg         } else {
2877ec681f3Smrg            assert(aspect & VK_IMAGE_ASPECT_STENCIL_BIT);
2887ec681f3Smrg            if (is_copy_to_buffer) {
2897ec681f3Smrg               return for_store ? V3D_OUTPUT_IMAGE_FORMAT_R8UI :
2907ec681f3Smrg                                  V3D_OUTPUT_IMAGE_FORMAT_RGBA8UI;
2917ec681f3Smrg            } else {
2927ec681f3Smrg               assert(is_copy_from_buffer);
2937ec681f3Smrg               return for_store ? V3D_OUTPUT_IMAGE_FORMAT_RGBA8UI :
2947ec681f3Smrg                                  V3D_OUTPUT_IMAGE_FORMAT_R8UI;
2957ec681f3Smrg            }
2967ec681f3Smrg         }
2977ec681f3Smrg      default: /* Color formats */
2987ec681f3Smrg         return framebuffer->format->rt_type;
2997ec681f3Smrg         break;
3007ec681f3Smrg      }
3017ec681f3Smrg   } else {
3027ec681f3Smrg      return framebuffer->format->rt_type;
3037ec681f3Smrg   }
3047ec681f3Smrg}
3057ec681f3Smrg
3067ec681f3Smrgstatic inline bool
3077ec681f3Smrgformat_needs_rb_swap(struct v3dv_device *device,
3087ec681f3Smrg                     VkFormat format)
3097ec681f3Smrg{
3107ec681f3Smrg   const uint8_t *swizzle = v3dv_get_format_swizzle(device, format);
3117ec681f3Smrg   return swizzle[0] == PIPE_SWIZZLE_Z;
3127ec681f3Smrg}
3137ec681f3Smrg
3147ec681f3Smrgstatic void
3157ec681f3Smrgemit_image_load(struct v3dv_device *device,
3167ec681f3Smrg                struct v3dv_cl *cl,
3177ec681f3Smrg                struct v3dv_meta_framebuffer *framebuffer,
3187ec681f3Smrg                struct v3dv_image *image,
3197ec681f3Smrg                VkImageAspectFlags aspect,
3207ec681f3Smrg                uint32_t layer,
3217ec681f3Smrg                uint32_t mip_level,
3227ec681f3Smrg                bool is_copy_to_buffer,
3237ec681f3Smrg                bool is_copy_from_buffer)
3247ec681f3Smrg{
3257ec681f3Smrg   uint32_t layer_offset = v3dv_layer_offset(image, mip_level, layer);
3267ec681f3Smrg
3277ec681f3Smrg   /* For image to/from buffer copies we always load to and store from RT0,
3287ec681f3Smrg    * even for depth/stencil aspects, because the hardware can't do raster
3297ec681f3Smrg    * stores or loads from/to the depth/stencil tile buffers.
3307ec681f3Smrg    */
3317ec681f3Smrg   bool load_to_color_tlb = is_copy_to_buffer || is_copy_from_buffer ||
3327ec681f3Smrg                            aspect == VK_IMAGE_ASPECT_COLOR_BIT;
3337ec681f3Smrg
3347ec681f3Smrg   const struct v3d_resource_slice *slice = &image->slices[mip_level];
3357ec681f3Smrg   cl_emit(cl, LOAD_TILE_BUFFER_GENERAL, load) {
3367ec681f3Smrg      load.buffer_to_load = load_to_color_tlb ?
3377ec681f3Smrg         RENDER_TARGET_0 : v3dX(zs_buffer_from_aspect_bits)(aspect);
3387ec681f3Smrg
3397ec681f3Smrg      load.address = v3dv_cl_address(image->mem->bo, layer_offset);
3407ec681f3Smrg
3417ec681f3Smrg      load.input_image_format = choose_tlb_format(framebuffer, aspect, false,
3427ec681f3Smrg                                                  is_copy_to_buffer,
3437ec681f3Smrg                                                  is_copy_from_buffer);
3447ec681f3Smrg      load.memory_format = slice->tiling;
3457ec681f3Smrg
3467ec681f3Smrg      /* When copying depth/stencil images to a buffer, for D24 formats Vulkan
3477ec681f3Smrg       * expects the depth value in the LSB bits of each 32-bit pixel.
3487ec681f3Smrg       * Unfortunately, the hardware seems to put the S8/X8 bits there and the
3497ec681f3Smrg       * depth bits on the MSB. To work around that we can reverse the channel
3507ec681f3Smrg       * order and then swap the R/B channels to get what we want.
3517ec681f3Smrg       *
3527ec681f3Smrg       * NOTE: reversing and swapping only gets us the behavior we want if the
3537ec681f3Smrg       * operations happen in that exact order, which seems to be the case when
3547ec681f3Smrg       * done on the tile buffer load operations. On the store, it seems the
3557ec681f3Smrg       * order is not the same. The order on the store is probably reversed so
3567ec681f3Smrg       * that reversing and swapping on both the load and the store preserves
3577ec681f3Smrg       * the original order of the channels in memory.
3587ec681f3Smrg       *
3597ec681f3Smrg       * Notice that we only need to do this when copying to a buffer, where
3607ec681f3Smrg       * depth and stencil aspects are copied as separate regions and
3617ec681f3Smrg       * the spec expects them to be tightly packed.
3627ec681f3Smrg       */
3637ec681f3Smrg      bool needs_rb_swap = false;
3647ec681f3Smrg      bool needs_chan_reverse = false;
3657ec681f3Smrg      if (is_copy_to_buffer &&
3667ec681f3Smrg         (framebuffer->vk_format == VK_FORMAT_X8_D24_UNORM_PACK32 ||
3677ec681f3Smrg          (framebuffer->vk_format == VK_FORMAT_D24_UNORM_S8_UINT &&
3687ec681f3Smrg           (aspect & VK_IMAGE_ASPECT_DEPTH_BIT)))) {
3697ec681f3Smrg         needs_rb_swap = true;
3707ec681f3Smrg         needs_chan_reverse = true;
3717ec681f3Smrg      } else if (!is_copy_from_buffer && !is_copy_to_buffer &&
3727ec681f3Smrg                 (aspect & VK_IMAGE_ASPECT_COLOR_BIT)) {
3737ec681f3Smrg         /* This is not a raw data copy (i.e. we are clearing the image),
3747ec681f3Smrg          * so we need to make sure we respect the format swizzle.
3757ec681f3Smrg          */
3767ec681f3Smrg         needs_rb_swap = format_needs_rb_swap(device, framebuffer->vk_format);
3777ec681f3Smrg      }
3787ec681f3Smrg
3797ec681f3Smrg      load.r_b_swap = needs_rb_swap;
3807ec681f3Smrg      load.channel_reverse = needs_chan_reverse;
3817ec681f3Smrg
3827ec681f3Smrg      if (slice->tiling == V3D_TILING_UIF_NO_XOR ||
3837ec681f3Smrg          slice->tiling == V3D_TILING_UIF_XOR) {
3847ec681f3Smrg         load.height_in_ub_or_stride =
3857ec681f3Smrg            slice->padded_height_of_output_image_in_uif_blocks;
3867ec681f3Smrg      } else if (slice->tiling == V3D_TILING_RASTER) {
3877ec681f3Smrg         load.height_in_ub_or_stride = slice->stride;
3887ec681f3Smrg      }
3897ec681f3Smrg
3907ec681f3Smrg      if (image->vk.samples > VK_SAMPLE_COUNT_1_BIT)
3917ec681f3Smrg         load.decimate_mode = V3D_DECIMATE_MODE_ALL_SAMPLES;
3927ec681f3Smrg      else
3937ec681f3Smrg         load.decimate_mode = V3D_DECIMATE_MODE_SAMPLE_0;
3947ec681f3Smrg   }
3957ec681f3Smrg}
3967ec681f3Smrg
3977ec681f3Smrgstatic void
3987ec681f3Smrgemit_image_store(struct v3dv_device *device,
3997ec681f3Smrg                 struct v3dv_cl *cl,
4007ec681f3Smrg                 struct v3dv_meta_framebuffer *framebuffer,
4017ec681f3Smrg                 struct v3dv_image *image,
4027ec681f3Smrg                 VkImageAspectFlags aspect,
4037ec681f3Smrg                 uint32_t layer,
4047ec681f3Smrg                 uint32_t mip_level,
4057ec681f3Smrg                 bool is_copy_to_buffer,
4067ec681f3Smrg                 bool is_copy_from_buffer,
4077ec681f3Smrg                 bool is_multisample_resolve)
4087ec681f3Smrg{
4097ec681f3Smrg   uint32_t layer_offset = v3dv_layer_offset(image, mip_level, layer);
4107ec681f3Smrg
4117ec681f3Smrg   bool store_from_color_tlb = is_copy_to_buffer || is_copy_from_buffer ||
4127ec681f3Smrg                               aspect == VK_IMAGE_ASPECT_COLOR_BIT;
4137ec681f3Smrg
4147ec681f3Smrg   const struct v3d_resource_slice *slice = &image->slices[mip_level];
4157ec681f3Smrg   cl_emit(cl, STORE_TILE_BUFFER_GENERAL, store) {
4167ec681f3Smrg      store.buffer_to_store = store_from_color_tlb ?
4177ec681f3Smrg         RENDER_TARGET_0 : v3dX(zs_buffer_from_aspect_bits)(aspect);
4187ec681f3Smrg
4197ec681f3Smrg      store.address = v3dv_cl_address(image->mem->bo, layer_offset);
4207ec681f3Smrg      store.clear_buffer_being_stored = false;
4217ec681f3Smrg
4227ec681f3Smrg      /* See rationale in emit_image_load() */
4237ec681f3Smrg      bool needs_rb_swap = false;
4247ec681f3Smrg      bool needs_chan_reverse = false;
4257ec681f3Smrg      if (is_copy_from_buffer &&
4267ec681f3Smrg         (framebuffer->vk_format == VK_FORMAT_X8_D24_UNORM_PACK32 ||
4277ec681f3Smrg          (framebuffer->vk_format == VK_FORMAT_D24_UNORM_S8_UINT &&
4287ec681f3Smrg           (aspect & VK_IMAGE_ASPECT_DEPTH_BIT)))) {
4297ec681f3Smrg         needs_rb_swap = true;
4307ec681f3Smrg         needs_chan_reverse = true;
4317ec681f3Smrg      } else if (!is_copy_from_buffer && !is_copy_to_buffer &&
4327ec681f3Smrg                 (aspect & VK_IMAGE_ASPECT_COLOR_BIT)) {
4337ec681f3Smrg         needs_rb_swap = format_needs_rb_swap(device, framebuffer->vk_format);
4347ec681f3Smrg      }
4357ec681f3Smrg
4367ec681f3Smrg      store.r_b_swap = needs_rb_swap;
4377ec681f3Smrg      store.channel_reverse = needs_chan_reverse;
4387ec681f3Smrg
4397ec681f3Smrg      store.output_image_format = choose_tlb_format(framebuffer, aspect, true,
4407ec681f3Smrg                                                    is_copy_to_buffer,
4417ec681f3Smrg                                                    is_copy_from_buffer);
4427ec681f3Smrg      store.memory_format = slice->tiling;
4437ec681f3Smrg      if (slice->tiling == V3D_TILING_UIF_NO_XOR ||
4447ec681f3Smrg          slice->tiling == V3D_TILING_UIF_XOR) {
4457ec681f3Smrg         store.height_in_ub_or_stride =
4467ec681f3Smrg            slice->padded_height_of_output_image_in_uif_blocks;
4477ec681f3Smrg      } else if (slice->tiling == V3D_TILING_RASTER) {
4487ec681f3Smrg         store.height_in_ub_or_stride = slice->stride;
4497ec681f3Smrg      }
4507ec681f3Smrg
4517ec681f3Smrg      if (image->vk.samples > VK_SAMPLE_COUNT_1_BIT)
4527ec681f3Smrg         store.decimate_mode = V3D_DECIMATE_MODE_ALL_SAMPLES;
4537ec681f3Smrg      else if (is_multisample_resolve)
4547ec681f3Smrg         store.decimate_mode = V3D_DECIMATE_MODE_4X;
4557ec681f3Smrg      else
4567ec681f3Smrg         store.decimate_mode = V3D_DECIMATE_MODE_SAMPLE_0;
4577ec681f3Smrg   }
4587ec681f3Smrg}
4597ec681f3Smrg
4607ec681f3Smrgstatic void
4617ec681f3Smrgemit_copy_layer_to_buffer_per_tile_list(struct v3dv_job *job,
4627ec681f3Smrg                                        struct v3dv_meta_framebuffer *framebuffer,
4637ec681f3Smrg                                        struct v3dv_buffer *buffer,
4647ec681f3Smrg                                        struct v3dv_image *image,
4657ec681f3Smrg                                        uint32_t layer_offset,
4667ec681f3Smrg                                        const VkBufferImageCopy2KHR *region)
4677ec681f3Smrg{
4687ec681f3Smrg   struct v3dv_cl *cl = &job->indirect;
4697ec681f3Smrg   v3dv_cl_ensure_space(cl, 200, 1);
4707ec681f3Smrg   v3dv_return_if_oom(NULL, job);
4717ec681f3Smrg
4727ec681f3Smrg   struct v3dv_cl_reloc tile_list_start = v3dv_cl_get_address(cl);
4737ec681f3Smrg
4747ec681f3Smrg   cl_emit(cl, TILE_COORDINATES_IMPLICIT, coords);
4757ec681f3Smrg
4767ec681f3Smrg   /* Load image to TLB */
4777ec681f3Smrg   assert((image->vk.image_type != VK_IMAGE_TYPE_3D &&
4787ec681f3Smrg           layer_offset < region->imageSubresource.layerCount) ||
4797ec681f3Smrg          layer_offset < image->vk.extent.depth);
4807ec681f3Smrg
4817ec681f3Smrg   const uint32_t image_layer = image->vk.image_type != VK_IMAGE_TYPE_3D ?
4827ec681f3Smrg      region->imageSubresource.baseArrayLayer + layer_offset :
4837ec681f3Smrg      region->imageOffset.z + layer_offset;
4847ec681f3Smrg
4857ec681f3Smrg   emit_image_load(job->device, cl, framebuffer, image,
4867ec681f3Smrg                   region->imageSubresource.aspectMask,
4877ec681f3Smrg                   image_layer,
4887ec681f3Smrg                   region->imageSubresource.mipLevel,
4897ec681f3Smrg                   true, false);
4907ec681f3Smrg
4917ec681f3Smrg   cl_emit(cl, END_OF_LOADS, end);
4927ec681f3Smrg
4937ec681f3Smrg   cl_emit(cl, BRANCH_TO_IMPLICIT_TILE_LIST, branch);
4947ec681f3Smrg
4957ec681f3Smrg   /* Store TLB to buffer */
4967ec681f3Smrg   uint32_t width, height;
4977ec681f3Smrg   if (region->bufferRowLength == 0)
4987ec681f3Smrg      width = region->imageExtent.width;
4997ec681f3Smrg   else
5007ec681f3Smrg      width = region->bufferRowLength;
5017ec681f3Smrg
5027ec681f3Smrg   if (region->bufferImageHeight == 0)
5037ec681f3Smrg      height = region->imageExtent.height;
5047ec681f3Smrg   else
5057ec681f3Smrg      height = region->bufferImageHeight;
5067ec681f3Smrg
5077ec681f3Smrg   /* Handle copy from compressed format */
5087ec681f3Smrg   width = DIV_ROUND_UP(width, vk_format_get_blockwidth(image->vk.format));
5097ec681f3Smrg   height = DIV_ROUND_UP(height, vk_format_get_blockheight(image->vk.format));
5107ec681f3Smrg
5117ec681f3Smrg   /* If we are storing stencil from a combined depth/stencil format the
5127ec681f3Smrg    * Vulkan spec states that the output buffer must have packed stencil
5137ec681f3Smrg    * values, where each stencil value is 1 byte.
5147ec681f3Smrg    */
5157ec681f3Smrg   uint32_t cpp =
5167ec681f3Smrg      region->imageSubresource.aspectMask & VK_IMAGE_ASPECT_STENCIL_BIT ?
5177ec681f3Smrg         1 : image->cpp;
5187ec681f3Smrg   uint32_t buffer_stride = width * cpp;
5197ec681f3Smrg   uint32_t buffer_offset = buffer->mem_offset + region->bufferOffset +
5207ec681f3Smrg                            height * buffer_stride * layer_offset;
5217ec681f3Smrg
5227ec681f3Smrg   uint32_t format = choose_tlb_format(framebuffer,
5237ec681f3Smrg                                       region->imageSubresource.aspectMask,
5247ec681f3Smrg                                       true, true, false);
5257ec681f3Smrg   bool msaa = image->vk.samples > VK_SAMPLE_COUNT_1_BIT;
5267ec681f3Smrg
5277ec681f3Smrg   emit_linear_store(cl, RENDER_TARGET_0, buffer->mem->bo,
5287ec681f3Smrg                     buffer_offset, buffer_stride, msaa, format);
5297ec681f3Smrg
5307ec681f3Smrg   cl_emit(cl, END_OF_TILE_MARKER, end);
5317ec681f3Smrg
5327ec681f3Smrg   cl_emit(cl, RETURN_FROM_SUB_LIST, ret);
5337ec681f3Smrg
5347ec681f3Smrg   cl_emit(&job->rcl, START_ADDRESS_OF_GENERIC_TILE_LIST, branch) {
5357ec681f3Smrg      branch.start = tile_list_start;
5367ec681f3Smrg      branch.end = v3dv_cl_get_address(cl);
5377ec681f3Smrg   }
5387ec681f3Smrg}
5397ec681f3Smrg
5407ec681f3Smrgstatic void
5417ec681f3Smrgemit_copy_layer_to_buffer(struct v3dv_job *job,
5427ec681f3Smrg                          struct v3dv_buffer *buffer,
5437ec681f3Smrg                          struct v3dv_image *image,
5447ec681f3Smrg                          struct v3dv_meta_framebuffer *framebuffer,
5457ec681f3Smrg                          uint32_t layer,
5467ec681f3Smrg                          const VkBufferImageCopy2KHR *region)
5477ec681f3Smrg{
5487ec681f3Smrg   emit_copy_layer_to_buffer_per_tile_list(job, framebuffer, buffer,
5497ec681f3Smrg                                           image, layer, region);
5507ec681f3Smrg   emit_supertile_coordinates(job, framebuffer);
5517ec681f3Smrg}
5527ec681f3Smrg
5537ec681f3Smrgvoid
5547ec681f3Smrgv3dX(meta_emit_copy_image_to_buffer_rcl)(struct v3dv_job *job,
5557ec681f3Smrg                                         struct v3dv_buffer *buffer,
5567ec681f3Smrg                                         struct v3dv_image *image,
5577ec681f3Smrg                                         struct v3dv_meta_framebuffer *framebuffer,
5587ec681f3Smrg                                         const VkBufferImageCopy2KHR *region)
5597ec681f3Smrg{
5607ec681f3Smrg   struct v3dv_cl *rcl = emit_rcl_prologue(job, framebuffer, NULL);
5617ec681f3Smrg   v3dv_return_if_oom(NULL, job);
5627ec681f3Smrg
5637ec681f3Smrg   emit_frame_setup(job, 0, NULL);
5647ec681f3Smrg   for (int layer = 0; layer < job->frame_tiling.layers; layer++)
5657ec681f3Smrg      emit_copy_layer_to_buffer(job, buffer, image, framebuffer, layer, region);
5667ec681f3Smrg   cl_emit(rcl, END_OF_RENDERING, end);
5677ec681f3Smrg}
5687ec681f3Smrg
5697ec681f3Smrgstatic void
5707ec681f3Smrgemit_resolve_image_layer_per_tile_list(struct v3dv_job *job,
5717ec681f3Smrg                                       struct v3dv_meta_framebuffer *framebuffer,
5727ec681f3Smrg                                       struct v3dv_image *dst,
5737ec681f3Smrg                                       struct v3dv_image *src,
5747ec681f3Smrg                                       uint32_t layer_offset,
5757ec681f3Smrg                                       const VkImageResolve2KHR *region)
5767ec681f3Smrg{
5777ec681f3Smrg   struct v3dv_cl *cl = &job->indirect;
5787ec681f3Smrg   v3dv_cl_ensure_space(cl, 200, 1);
5797ec681f3Smrg   v3dv_return_if_oom(NULL, job);
5807ec681f3Smrg
5817ec681f3Smrg   struct v3dv_cl_reloc tile_list_start = v3dv_cl_get_address(cl);
5827ec681f3Smrg
5837ec681f3Smrg   cl_emit(cl, TILE_COORDINATES_IMPLICIT, coords);
5847ec681f3Smrg
5857ec681f3Smrg   assert((src->vk.image_type != VK_IMAGE_TYPE_3D &&
5867ec681f3Smrg           layer_offset < region->srcSubresource.layerCount) ||
5877ec681f3Smrg          layer_offset < src->vk.extent.depth);
5887ec681f3Smrg
5897ec681f3Smrg   const uint32_t src_layer = src->vk.image_type != VK_IMAGE_TYPE_3D ?
5907ec681f3Smrg      region->srcSubresource.baseArrayLayer + layer_offset :
5917ec681f3Smrg      region->srcOffset.z + layer_offset;
5927ec681f3Smrg
5937ec681f3Smrg   emit_image_load(job->device, cl, framebuffer, src,
5947ec681f3Smrg                   region->srcSubresource.aspectMask,
5957ec681f3Smrg                   src_layer,
5967ec681f3Smrg                   region->srcSubresource.mipLevel,
5977ec681f3Smrg                   false, false);
5987ec681f3Smrg
5997ec681f3Smrg   cl_emit(cl, END_OF_LOADS, end);
6007ec681f3Smrg
6017ec681f3Smrg   cl_emit(cl, BRANCH_TO_IMPLICIT_TILE_LIST, branch);
6027ec681f3Smrg
6037ec681f3Smrg   assert((dst->vk.image_type != VK_IMAGE_TYPE_3D &&
6047ec681f3Smrg           layer_offset < region->dstSubresource.layerCount) ||
6057ec681f3Smrg          layer_offset < dst->vk.extent.depth);
6067ec681f3Smrg
6077ec681f3Smrg   const uint32_t dst_layer = dst->vk.image_type != VK_IMAGE_TYPE_3D ?
6087ec681f3Smrg      region->dstSubresource.baseArrayLayer + layer_offset :
6097ec681f3Smrg      region->dstOffset.z + layer_offset;
6107ec681f3Smrg
6117ec681f3Smrg   emit_image_store(job->device, cl, framebuffer, dst,
6127ec681f3Smrg                    region->dstSubresource.aspectMask,
6137ec681f3Smrg                    dst_layer,
6147ec681f3Smrg                    region->dstSubresource.mipLevel,
6157ec681f3Smrg                    false, false, true);
6167ec681f3Smrg
6177ec681f3Smrg   cl_emit(cl, END_OF_TILE_MARKER, end);
6187ec681f3Smrg
6197ec681f3Smrg   cl_emit(cl, RETURN_FROM_SUB_LIST, ret);
6207ec681f3Smrg
6217ec681f3Smrg   cl_emit(&job->rcl, START_ADDRESS_OF_GENERIC_TILE_LIST, branch) {
6227ec681f3Smrg      branch.start = tile_list_start;
6237ec681f3Smrg      branch.end = v3dv_cl_get_address(cl);
6247ec681f3Smrg   }
6257ec681f3Smrg}
6267ec681f3Smrg
6277ec681f3Smrgstatic void
6287ec681f3Smrgemit_resolve_image_layer(struct v3dv_job *job,
6297ec681f3Smrg                         struct v3dv_image *dst,
6307ec681f3Smrg                         struct v3dv_image *src,
6317ec681f3Smrg                         struct v3dv_meta_framebuffer *framebuffer,
6327ec681f3Smrg                         uint32_t layer,
6337ec681f3Smrg                         const VkImageResolve2KHR *region)
6347ec681f3Smrg{
6357ec681f3Smrg   emit_resolve_image_layer_per_tile_list(job, framebuffer,
6367ec681f3Smrg                                          dst, src, layer, region);
6377ec681f3Smrg   emit_supertile_coordinates(job, framebuffer);
6387ec681f3Smrg}
6397ec681f3Smrg
6407ec681f3Smrgvoid
6417ec681f3Smrgv3dX(meta_emit_resolve_image_rcl)(struct v3dv_job *job,
6427ec681f3Smrg                                  struct v3dv_image *dst,
6437ec681f3Smrg                                  struct v3dv_image *src,
6447ec681f3Smrg                                  struct v3dv_meta_framebuffer *framebuffer,
6457ec681f3Smrg                                  const VkImageResolve2KHR *region)
6467ec681f3Smrg{
6477ec681f3Smrg   struct v3dv_cl *rcl = emit_rcl_prologue(job, framebuffer, NULL);
6487ec681f3Smrg   v3dv_return_if_oom(NULL, job);
6497ec681f3Smrg
6507ec681f3Smrg   emit_frame_setup(job, 0, NULL);
6517ec681f3Smrg   for (int layer = 0; layer < job->frame_tiling.layers; layer++)
6527ec681f3Smrg      emit_resolve_image_layer(job, dst, src, framebuffer, layer, region);
6537ec681f3Smrg   cl_emit(rcl, END_OF_RENDERING, end);
6547ec681f3Smrg}
6557ec681f3Smrg
6567ec681f3Smrgstatic void
6577ec681f3Smrgemit_copy_buffer_per_tile_list(struct v3dv_job *job,
6587ec681f3Smrg                               struct v3dv_bo *dst,
6597ec681f3Smrg                               struct v3dv_bo *src,
6607ec681f3Smrg                               uint32_t dst_offset,
6617ec681f3Smrg                               uint32_t src_offset,
6627ec681f3Smrg                               uint32_t stride,
6637ec681f3Smrg                               uint32_t format)
6647ec681f3Smrg{
6657ec681f3Smrg   struct v3dv_cl *cl = &job->indirect;
6667ec681f3Smrg   v3dv_cl_ensure_space(cl, 200, 1);
6677ec681f3Smrg   v3dv_return_if_oom(NULL, job);
6687ec681f3Smrg
6697ec681f3Smrg   struct v3dv_cl_reloc tile_list_start = v3dv_cl_get_address(cl);
6707ec681f3Smrg
6717ec681f3Smrg   cl_emit(cl, TILE_COORDINATES_IMPLICIT, coords);
6727ec681f3Smrg
6737ec681f3Smrg   emit_linear_load(cl, RENDER_TARGET_0, src, src_offset, stride, format);
6747ec681f3Smrg
6757ec681f3Smrg   cl_emit(cl, END_OF_LOADS, end);
6767ec681f3Smrg
6777ec681f3Smrg   cl_emit(cl, BRANCH_TO_IMPLICIT_TILE_LIST, branch);
6787ec681f3Smrg
6797ec681f3Smrg   emit_linear_store(cl, RENDER_TARGET_0,
6807ec681f3Smrg                     dst, dst_offset, stride, false, format);
6817ec681f3Smrg
6827ec681f3Smrg   cl_emit(cl, END_OF_TILE_MARKER, end);
6837ec681f3Smrg
6847ec681f3Smrg   cl_emit(cl, RETURN_FROM_SUB_LIST, ret);
6857ec681f3Smrg
6867ec681f3Smrg   cl_emit(&job->rcl, START_ADDRESS_OF_GENERIC_TILE_LIST, branch) {
6877ec681f3Smrg      branch.start = tile_list_start;
6887ec681f3Smrg      branch.end = v3dv_cl_get_address(cl);
6897ec681f3Smrg   }
6907ec681f3Smrg}
6917ec681f3Smrg
6927ec681f3Smrgvoid
6937ec681f3Smrgv3dX(meta_emit_copy_buffer)(struct v3dv_job *job,
6947ec681f3Smrg                            struct v3dv_bo *dst,
6957ec681f3Smrg                            struct v3dv_bo *src,
6967ec681f3Smrg                            uint32_t dst_offset,
6977ec681f3Smrg                            uint32_t src_offset,
6987ec681f3Smrg                            struct v3dv_meta_framebuffer *framebuffer,
6997ec681f3Smrg                            uint32_t format,
7007ec681f3Smrg                            uint32_t item_size)
7017ec681f3Smrg{
7027ec681f3Smrg   const uint32_t stride = job->frame_tiling.width * item_size;
7037ec681f3Smrg   emit_copy_buffer_per_tile_list(job, dst, src,
7047ec681f3Smrg                                  dst_offset, src_offset,
7057ec681f3Smrg                                  stride, format);
7067ec681f3Smrg   emit_supertile_coordinates(job, framebuffer);
7077ec681f3Smrg}
7087ec681f3Smrg
7097ec681f3Smrgvoid
7107ec681f3Smrgv3dX(meta_emit_copy_buffer_rcl)(struct v3dv_job *job,
7117ec681f3Smrg                                struct v3dv_bo *dst,
7127ec681f3Smrg                                struct v3dv_bo *src,
7137ec681f3Smrg                                uint32_t dst_offset,
7147ec681f3Smrg                                uint32_t src_offset,
7157ec681f3Smrg                                struct v3dv_meta_framebuffer *framebuffer,
7167ec681f3Smrg                                uint32_t format,
7177ec681f3Smrg                                uint32_t item_size)
7187ec681f3Smrg{
7197ec681f3Smrg   struct v3dv_cl *rcl = emit_rcl_prologue(job, framebuffer, NULL);
7207ec681f3Smrg   v3dv_return_if_oom(NULL, job);
7217ec681f3Smrg
7227ec681f3Smrg   emit_frame_setup(job, 0, NULL);
7237ec681f3Smrg
7247ec681f3Smrg   v3dX(meta_emit_copy_buffer)(job, dst, src, dst_offset, src_offset,
7257ec681f3Smrg                               framebuffer, format, item_size);
7267ec681f3Smrg
7277ec681f3Smrg   cl_emit(rcl, END_OF_RENDERING, end);
7287ec681f3Smrg}
7297ec681f3Smrg
7307ec681f3Smrgstatic void
7317ec681f3Smrgemit_copy_image_layer_per_tile_list(struct v3dv_job *job,
7327ec681f3Smrg                                    struct v3dv_meta_framebuffer *framebuffer,
7337ec681f3Smrg                                    struct v3dv_image *dst,
7347ec681f3Smrg                                    struct v3dv_image *src,
7357ec681f3Smrg                                    uint32_t layer_offset,
7367ec681f3Smrg                                    const VkImageCopy2KHR *region)
7377ec681f3Smrg{
7387ec681f3Smrg   struct v3dv_cl *cl = &job->indirect;
7397ec681f3Smrg   v3dv_cl_ensure_space(cl, 200, 1);
7407ec681f3Smrg   v3dv_return_if_oom(NULL, job);
7417ec681f3Smrg
7427ec681f3Smrg   struct v3dv_cl_reloc tile_list_start = v3dv_cl_get_address(cl);
7437ec681f3Smrg
7447ec681f3Smrg   cl_emit(cl, TILE_COORDINATES_IMPLICIT, coords);
7457ec681f3Smrg
7467ec681f3Smrg   assert((src->vk.image_type != VK_IMAGE_TYPE_3D &&
7477ec681f3Smrg           layer_offset < region->srcSubresource.layerCount) ||
7487ec681f3Smrg          layer_offset < src->vk.extent.depth);
7497ec681f3Smrg
7507ec681f3Smrg   const uint32_t src_layer = src->vk.image_type != VK_IMAGE_TYPE_3D ?
7517ec681f3Smrg      region->srcSubresource.baseArrayLayer + layer_offset :
7527ec681f3Smrg      region->srcOffset.z + layer_offset;
7537ec681f3Smrg
7547ec681f3Smrg   emit_image_load(job->device, cl, framebuffer, src,
7557ec681f3Smrg                   region->srcSubresource.aspectMask,
7567ec681f3Smrg                   src_layer,
7577ec681f3Smrg                   region->srcSubresource.mipLevel,
7587ec681f3Smrg                   false, false);
7597ec681f3Smrg
7607ec681f3Smrg   cl_emit(cl, END_OF_LOADS, end);
7617ec681f3Smrg
7627ec681f3Smrg   cl_emit(cl, BRANCH_TO_IMPLICIT_TILE_LIST, branch);
7637ec681f3Smrg
7647ec681f3Smrg   assert((dst->vk.image_type != VK_IMAGE_TYPE_3D &&
7657ec681f3Smrg           layer_offset < region->dstSubresource.layerCount) ||
7667ec681f3Smrg          layer_offset < dst->vk.extent.depth);
7677ec681f3Smrg
7687ec681f3Smrg   const uint32_t dst_layer = dst->vk.image_type != VK_IMAGE_TYPE_3D ?
7697ec681f3Smrg      region->dstSubresource.baseArrayLayer + layer_offset :
7707ec681f3Smrg      region->dstOffset.z + layer_offset;
7717ec681f3Smrg
7727ec681f3Smrg   emit_image_store(job->device, cl, framebuffer, dst,
7737ec681f3Smrg                    region->dstSubresource.aspectMask,
7747ec681f3Smrg                    dst_layer,
7757ec681f3Smrg                    region->dstSubresource.mipLevel,
7767ec681f3Smrg                    false, false, false);
7777ec681f3Smrg
7787ec681f3Smrg   cl_emit(cl, END_OF_TILE_MARKER, end);
7797ec681f3Smrg
7807ec681f3Smrg   cl_emit(cl, RETURN_FROM_SUB_LIST, ret);
7817ec681f3Smrg
7827ec681f3Smrg   cl_emit(&job->rcl, START_ADDRESS_OF_GENERIC_TILE_LIST, branch) {
7837ec681f3Smrg      branch.start = tile_list_start;
7847ec681f3Smrg      branch.end = v3dv_cl_get_address(cl);
7857ec681f3Smrg   }
7867ec681f3Smrg}
7877ec681f3Smrg
7887ec681f3Smrgstatic void
7897ec681f3Smrgemit_copy_image_layer(struct v3dv_job *job,
7907ec681f3Smrg                      struct v3dv_image *dst,
7917ec681f3Smrg                      struct v3dv_image *src,
7927ec681f3Smrg                      struct v3dv_meta_framebuffer *framebuffer,
7937ec681f3Smrg                      uint32_t layer,
7947ec681f3Smrg                      const VkImageCopy2KHR *region)
7957ec681f3Smrg{
7967ec681f3Smrg   emit_copy_image_layer_per_tile_list(job, framebuffer, dst, src, layer, region);
7977ec681f3Smrg   emit_supertile_coordinates(job, framebuffer);
7987ec681f3Smrg}
7997ec681f3Smrg
8007ec681f3Smrgvoid
8017ec681f3Smrgv3dX(meta_emit_copy_image_rcl)(struct v3dv_job *job,
8027ec681f3Smrg                               struct v3dv_image *dst,
8037ec681f3Smrg                               struct v3dv_image *src,
8047ec681f3Smrg                               struct v3dv_meta_framebuffer *framebuffer,
8057ec681f3Smrg                               const VkImageCopy2KHR *region)
8067ec681f3Smrg{
8077ec681f3Smrg   struct v3dv_cl *rcl = emit_rcl_prologue(job, framebuffer, NULL);
8087ec681f3Smrg   v3dv_return_if_oom(NULL, job);
8097ec681f3Smrg
8107ec681f3Smrg   emit_frame_setup(job, 0, NULL);
8117ec681f3Smrg   for (int layer = 0; layer < job->frame_tiling.layers; layer++)
8127ec681f3Smrg      emit_copy_image_layer(job, dst, src, framebuffer, layer, region);
8137ec681f3Smrg   cl_emit(rcl, END_OF_RENDERING, end);
8147ec681f3Smrg}
8157ec681f3Smrg
8167ec681f3Smrgvoid
8177ec681f3Smrgv3dX(meta_emit_tfu_job)(struct v3dv_cmd_buffer *cmd_buffer,
8187ec681f3Smrg                        struct v3dv_image *dst,
8197ec681f3Smrg                        uint32_t dst_mip_level,
8207ec681f3Smrg                        uint32_t dst_layer,
8217ec681f3Smrg                        struct v3dv_image *src,
8227ec681f3Smrg                        uint32_t src_mip_level,
8237ec681f3Smrg                        uint32_t src_layer,
8247ec681f3Smrg                        uint32_t width,
8257ec681f3Smrg                        uint32_t height,
8267ec681f3Smrg                        const struct v3dv_format *format)
8277ec681f3Smrg{
8287ec681f3Smrg   const struct v3d_resource_slice *src_slice = &src->slices[src_mip_level];
8297ec681f3Smrg   const struct v3d_resource_slice *dst_slice = &dst->slices[dst_mip_level];
8307ec681f3Smrg
8317ec681f3Smrg   assert(dst->mem && dst->mem->bo);
8327ec681f3Smrg   const struct v3dv_bo *dst_bo = dst->mem->bo;
8337ec681f3Smrg
8347ec681f3Smrg   assert(src->mem && src->mem->bo);
8357ec681f3Smrg   const struct v3dv_bo *src_bo = src->mem->bo;
8367ec681f3Smrg
8377ec681f3Smrg   struct drm_v3d_submit_tfu tfu = {
8387ec681f3Smrg      .ios = (height << 16) | width,
8397ec681f3Smrg      .bo_handles = {
8407ec681f3Smrg         dst_bo->handle,
8417ec681f3Smrg         src_bo->handle != dst_bo->handle ? src_bo->handle : 0
8427ec681f3Smrg      },
8437ec681f3Smrg   };
8447ec681f3Smrg
8457ec681f3Smrg   const uint32_t src_offset =
8467ec681f3Smrg      src_bo->offset + v3dv_layer_offset(src, src_mip_level, src_layer);
8477ec681f3Smrg   tfu.iia |= src_offset;
8487ec681f3Smrg
8497ec681f3Smrg   uint32_t icfg;
8507ec681f3Smrg   if (src_slice->tiling == V3D_TILING_RASTER) {
8517ec681f3Smrg      icfg = V3D_TFU_ICFG_FORMAT_RASTER;
8527ec681f3Smrg   } else {
8537ec681f3Smrg      icfg = V3D_TFU_ICFG_FORMAT_LINEARTILE +
8547ec681f3Smrg             (src_slice->tiling - V3D_TILING_LINEARTILE);
8557ec681f3Smrg   }
8567ec681f3Smrg   tfu.icfg |= icfg << V3D_TFU_ICFG_FORMAT_SHIFT;
8577ec681f3Smrg
8587ec681f3Smrg   const uint32_t dst_offset =
8597ec681f3Smrg      dst_bo->offset + v3dv_layer_offset(dst, dst_mip_level, dst_layer);
8607ec681f3Smrg   tfu.ioa |= dst_offset;
8617ec681f3Smrg
8627ec681f3Smrg   tfu.ioa |= (V3D_TFU_IOA_FORMAT_LINEARTILE +
8637ec681f3Smrg               (dst_slice->tiling - V3D_TILING_LINEARTILE)) <<
8647ec681f3Smrg                V3D_TFU_IOA_FORMAT_SHIFT;
8657ec681f3Smrg   tfu.icfg |= format->tex_type << V3D_TFU_ICFG_TTYPE_SHIFT;
8667ec681f3Smrg
8677ec681f3Smrg   switch (src_slice->tiling) {
8687ec681f3Smrg   case V3D_TILING_UIF_NO_XOR:
8697ec681f3Smrg   case V3D_TILING_UIF_XOR:
8707ec681f3Smrg      tfu.iis |= src_slice->padded_height / (2 * v3d_utile_height(src->cpp));
8717ec681f3Smrg      break;
8727ec681f3Smrg   case V3D_TILING_RASTER:
8737ec681f3Smrg      tfu.iis |= src_slice->stride / src->cpp;
8747ec681f3Smrg      break;
8757ec681f3Smrg   default:
8767ec681f3Smrg      break;
8777ec681f3Smrg   }
8787ec681f3Smrg
8797ec681f3Smrg   /* If we're writing level 0 (!IOA_DIMTW), then we need to supply the
8807ec681f3Smrg    * OPAD field for the destination (how many extra UIF blocks beyond
8817ec681f3Smrg    * those necessary to cover the height).
8827ec681f3Smrg    */
8837ec681f3Smrg   if (dst_slice->tiling == V3D_TILING_UIF_NO_XOR ||
8847ec681f3Smrg       dst_slice->tiling == V3D_TILING_UIF_XOR) {
8857ec681f3Smrg      uint32_t uif_block_h = 2 * v3d_utile_height(dst->cpp);
8867ec681f3Smrg      uint32_t implicit_padded_height = align(height, uif_block_h);
8877ec681f3Smrg      uint32_t icfg =
8887ec681f3Smrg         (dst_slice->padded_height - implicit_padded_height) / uif_block_h;
8897ec681f3Smrg      tfu.icfg |= icfg << V3D_TFU_ICFG_OPAD_SHIFT;
8907ec681f3Smrg   }
8917ec681f3Smrg
8927ec681f3Smrg   v3dv_cmd_buffer_add_tfu_job(cmd_buffer, &tfu);
8937ec681f3Smrg}
8947ec681f3Smrg
8957ec681f3Smrgstatic void
8967ec681f3Smrgemit_clear_image_layer_per_tile_list(struct v3dv_job *job,
8977ec681f3Smrg                                     struct v3dv_meta_framebuffer *framebuffer,
8987ec681f3Smrg                                     struct v3dv_image *image,
8997ec681f3Smrg                                     VkImageAspectFlags aspects,
9007ec681f3Smrg                                     uint32_t layer,
9017ec681f3Smrg                                     uint32_t level)
9027ec681f3Smrg{
9037ec681f3Smrg   struct v3dv_cl *cl = &job->indirect;
9047ec681f3Smrg   v3dv_cl_ensure_space(cl, 200, 1);
9057ec681f3Smrg   v3dv_return_if_oom(NULL, job);
9067ec681f3Smrg
9077ec681f3Smrg   struct v3dv_cl_reloc tile_list_start = v3dv_cl_get_address(cl);
9087ec681f3Smrg
9097ec681f3Smrg   cl_emit(cl, TILE_COORDINATES_IMPLICIT, coords);
9107ec681f3Smrg
9117ec681f3Smrg   cl_emit(cl, END_OF_LOADS, end);
9127ec681f3Smrg
9137ec681f3Smrg   cl_emit(cl, BRANCH_TO_IMPLICIT_TILE_LIST, branch);
9147ec681f3Smrg
9157ec681f3Smrg   emit_image_store(job->device, cl, framebuffer, image, aspects,
9167ec681f3Smrg                    layer, level, false, false, false);
9177ec681f3Smrg
9187ec681f3Smrg   cl_emit(cl, END_OF_TILE_MARKER, end);
9197ec681f3Smrg
9207ec681f3Smrg   cl_emit(cl, RETURN_FROM_SUB_LIST, ret);
9217ec681f3Smrg
9227ec681f3Smrg   cl_emit(&job->rcl, START_ADDRESS_OF_GENERIC_TILE_LIST, branch) {
9237ec681f3Smrg      branch.start = tile_list_start;
9247ec681f3Smrg      branch.end = v3dv_cl_get_address(cl);
9257ec681f3Smrg   }
9267ec681f3Smrg}
9277ec681f3Smrg
9287ec681f3Smrgstatic void
9297ec681f3Smrgemit_clear_image_layers(struct v3dv_job *job,
9307ec681f3Smrg                 struct v3dv_image *image,
9317ec681f3Smrg                 struct v3dv_meta_framebuffer *framebuffer,
9327ec681f3Smrg                 VkImageAspectFlags aspects,
9337ec681f3Smrg                 uint32_t min_layer,
9347ec681f3Smrg                 uint32_t max_layer,
9357ec681f3Smrg                 uint32_t level)
9367ec681f3Smrg{
9377ec681f3Smrg   for (uint32_t layer = min_layer; layer < max_layer; layer++) {
9387ec681f3Smrg      emit_clear_image_layer_per_tile_list(job, framebuffer, image, aspects,
9397ec681f3Smrg                                           layer, level);
9407ec681f3Smrg      emit_supertile_coordinates(job, framebuffer);
9417ec681f3Smrg   }
9427ec681f3Smrg}
9437ec681f3Smrg
9447ec681f3Smrgvoid
9457ec681f3Smrgv3dX(meta_emit_clear_image_rcl)(struct v3dv_job *job,
9467ec681f3Smrg                                struct v3dv_image *image,
9477ec681f3Smrg                                struct v3dv_meta_framebuffer *framebuffer,
9487ec681f3Smrg                                const union v3dv_clear_value *clear_value,
9497ec681f3Smrg                                VkImageAspectFlags aspects,
9507ec681f3Smrg                                uint32_t min_layer,
9517ec681f3Smrg                                uint32_t max_layer,
9527ec681f3Smrg                                uint32_t level)
9537ec681f3Smrg{
9547ec681f3Smrg   const struct rcl_clear_info clear_info = {
9557ec681f3Smrg      .clear_value = clear_value,
9567ec681f3Smrg      .image = image,
9577ec681f3Smrg      .aspects = aspects,
9587ec681f3Smrg      .level = level,
9597ec681f3Smrg   };
9607ec681f3Smrg
9617ec681f3Smrg   struct v3dv_cl *rcl = emit_rcl_prologue(job, framebuffer, &clear_info);
9627ec681f3Smrg   v3dv_return_if_oom(NULL, job);
9637ec681f3Smrg
9647ec681f3Smrg   emit_frame_setup(job, 0, clear_value);
9657ec681f3Smrg   emit_clear_image_layers(job, image, framebuffer, aspects,
9667ec681f3Smrg                           min_layer, max_layer, level);
9677ec681f3Smrg   cl_emit(rcl, END_OF_RENDERING, end);
9687ec681f3Smrg}
9697ec681f3Smrg
9707ec681f3Smrgstatic void
9717ec681f3Smrgemit_fill_buffer_per_tile_list(struct v3dv_job *job,
9727ec681f3Smrg                               struct v3dv_bo *bo,
9737ec681f3Smrg                               uint32_t offset,
9747ec681f3Smrg                               uint32_t stride)
9757ec681f3Smrg{
9767ec681f3Smrg   struct v3dv_cl *cl = &job->indirect;
9777ec681f3Smrg   v3dv_cl_ensure_space(cl, 200, 1);
9787ec681f3Smrg   v3dv_return_if_oom(NULL, job);
9797ec681f3Smrg
9807ec681f3Smrg   struct v3dv_cl_reloc tile_list_start = v3dv_cl_get_address(cl);
9817ec681f3Smrg
9827ec681f3Smrg   cl_emit(cl, TILE_COORDINATES_IMPLICIT, coords);
9837ec681f3Smrg
9847ec681f3Smrg   cl_emit(cl, END_OF_LOADS, end);
9857ec681f3Smrg
9867ec681f3Smrg   cl_emit(cl, BRANCH_TO_IMPLICIT_TILE_LIST, branch);
9877ec681f3Smrg
9887ec681f3Smrg   emit_linear_store(cl, RENDER_TARGET_0, bo, offset, stride, false,
9897ec681f3Smrg                     V3D_OUTPUT_IMAGE_FORMAT_RGBA8UI);
9907ec681f3Smrg
9917ec681f3Smrg   cl_emit(cl, END_OF_TILE_MARKER, end);
9927ec681f3Smrg
9937ec681f3Smrg   cl_emit(cl, RETURN_FROM_SUB_LIST, ret);
9947ec681f3Smrg
9957ec681f3Smrg   cl_emit(&job->rcl, START_ADDRESS_OF_GENERIC_TILE_LIST, branch) {
9967ec681f3Smrg      branch.start = tile_list_start;
9977ec681f3Smrg      branch.end = v3dv_cl_get_address(cl);
9987ec681f3Smrg   }
9997ec681f3Smrg}
10007ec681f3Smrg
10017ec681f3Smrgstatic void
10027ec681f3Smrgemit_fill_buffer(struct v3dv_job *job,
10037ec681f3Smrg                 struct v3dv_bo *bo,
10047ec681f3Smrg                 uint32_t offset,
10057ec681f3Smrg                 struct v3dv_meta_framebuffer *framebuffer)
10067ec681f3Smrg{
10077ec681f3Smrg   const uint32_t stride = job->frame_tiling.width * 4;
10087ec681f3Smrg   emit_fill_buffer_per_tile_list(job, bo, offset, stride);
10097ec681f3Smrg   emit_supertile_coordinates(job, framebuffer);
10107ec681f3Smrg}
10117ec681f3Smrg
10127ec681f3Smrgvoid
10137ec681f3Smrgv3dX(meta_emit_fill_buffer_rcl)(struct v3dv_job *job,
10147ec681f3Smrg                                struct v3dv_bo *bo,
10157ec681f3Smrg                                uint32_t offset,
10167ec681f3Smrg                                struct v3dv_meta_framebuffer *framebuffer,
10177ec681f3Smrg                                uint32_t data)
10187ec681f3Smrg{
10197ec681f3Smrg   const union v3dv_clear_value clear_value = {
10207ec681f3Smrg       .color = { data, 0, 0, 0 },
10217ec681f3Smrg   };
10227ec681f3Smrg
10237ec681f3Smrg   const struct rcl_clear_info clear_info = {
10247ec681f3Smrg      .clear_value = &clear_value,
10257ec681f3Smrg      .image = NULL,
10267ec681f3Smrg      .aspects = VK_IMAGE_ASPECT_COLOR_BIT,
10277ec681f3Smrg      .level = 0,
10287ec681f3Smrg   };
10297ec681f3Smrg
10307ec681f3Smrg   struct v3dv_cl *rcl = emit_rcl_prologue(job, framebuffer, &clear_info);
10317ec681f3Smrg   v3dv_return_if_oom(NULL, job);
10327ec681f3Smrg
10337ec681f3Smrg   emit_frame_setup(job, 0, &clear_value);
10347ec681f3Smrg   emit_fill_buffer(job, bo, offset, framebuffer);
10357ec681f3Smrg   cl_emit(rcl, END_OF_RENDERING, end);
10367ec681f3Smrg}
10377ec681f3Smrg
10387ec681f3Smrg
10397ec681f3Smrgstatic void
10407ec681f3Smrgemit_copy_buffer_to_layer_per_tile_list(struct v3dv_job *job,
10417ec681f3Smrg                                        struct v3dv_meta_framebuffer *framebuffer,
10427ec681f3Smrg                                        struct v3dv_image *image,
10437ec681f3Smrg                                        struct v3dv_buffer *buffer,
10447ec681f3Smrg                                        uint32_t layer,
10457ec681f3Smrg                                        const VkBufferImageCopy2KHR *region)
10467ec681f3Smrg{
10477ec681f3Smrg   struct v3dv_cl *cl = &job->indirect;
10487ec681f3Smrg   v3dv_cl_ensure_space(cl, 200, 1);
10497ec681f3Smrg   v3dv_return_if_oom(NULL, job);
10507ec681f3Smrg
10517ec681f3Smrg   struct v3dv_cl_reloc tile_list_start = v3dv_cl_get_address(cl);
10527ec681f3Smrg
10537ec681f3Smrg   cl_emit(cl, TILE_COORDINATES_IMPLICIT, coords);
10547ec681f3Smrg
10557ec681f3Smrg   const VkImageSubresourceLayers *imgrsc = &region->imageSubresource;
10567ec681f3Smrg   assert((image->vk.image_type != VK_IMAGE_TYPE_3D && layer < imgrsc->layerCount) ||
10577ec681f3Smrg          layer < image->vk.extent.depth);
10587ec681f3Smrg
10597ec681f3Smrg   /* Load TLB from buffer */
10607ec681f3Smrg   uint32_t width, height;
10617ec681f3Smrg   if (region->bufferRowLength == 0)
10627ec681f3Smrg      width = region->imageExtent.width;
10637ec681f3Smrg   else
10647ec681f3Smrg      width = region->bufferRowLength;
10657ec681f3Smrg
10667ec681f3Smrg   if (region->bufferImageHeight == 0)
10677ec681f3Smrg      height = region->imageExtent.height;
10687ec681f3Smrg   else
10697ec681f3Smrg      height = region->bufferImageHeight;
10707ec681f3Smrg
10717ec681f3Smrg   /* Handle copy to compressed format using a compatible format */
10727ec681f3Smrg   width = DIV_ROUND_UP(width, vk_format_get_blockwidth(image->vk.format));
10737ec681f3Smrg   height = DIV_ROUND_UP(height, vk_format_get_blockheight(image->vk.format));
10747ec681f3Smrg
10757ec681f3Smrg   uint32_t cpp = imgrsc->aspectMask & VK_IMAGE_ASPECT_STENCIL_BIT ?
10767ec681f3Smrg                  1 : image->cpp;
10777ec681f3Smrg   uint32_t buffer_stride = width * cpp;
10787ec681f3Smrg   uint32_t buffer_offset =
10797ec681f3Smrg      buffer->mem_offset + region->bufferOffset + height * buffer_stride * layer;
10807ec681f3Smrg
10817ec681f3Smrg   uint32_t format = choose_tlb_format(framebuffer, imgrsc->aspectMask,
10827ec681f3Smrg                                       false, false, true);
10837ec681f3Smrg
10847ec681f3Smrg   emit_linear_load(cl, RENDER_TARGET_0, buffer->mem->bo,
10857ec681f3Smrg                    buffer_offset, buffer_stride, format);
10867ec681f3Smrg
10877ec681f3Smrg   /* Because we can't do raster loads/stores of Z/S formats we need to
10887ec681f3Smrg    * use a color tile buffer with a compatible RGBA color format instead.
10897ec681f3Smrg    * However, when we are uploading a single aspect to a combined
10907ec681f3Smrg    * depth/stencil image we have the problem that our tile buffer stores don't
10917ec681f3Smrg    * allow us to mask out the other aspect, so we always write all four RGBA
10927ec681f3Smrg    * channels to the image and we end up overwriting that other aspect with
10937ec681f3Smrg    * undefined values. To work around that, we first load the aspect we are
10947ec681f3Smrg    * not copying from the image memory into a proper Z/S tile buffer. Then we
10957ec681f3Smrg    * do our store from the color buffer for the aspect we are copying, and
10967ec681f3Smrg    * after that, we do another store from the Z/S tile buffer to restore the
10977ec681f3Smrg    * other aspect to its original value.
10987ec681f3Smrg    */
10997ec681f3Smrg   if (framebuffer->vk_format == VK_FORMAT_D24_UNORM_S8_UINT) {
11007ec681f3Smrg      if (imgrsc->aspectMask & VK_IMAGE_ASPECT_DEPTH_BIT) {
11017ec681f3Smrg         emit_image_load(job->device, cl, framebuffer, image,
11027ec681f3Smrg                         VK_IMAGE_ASPECT_STENCIL_BIT,
11037ec681f3Smrg                         imgrsc->baseArrayLayer + layer, imgrsc->mipLevel,
11047ec681f3Smrg                         false, false);
11057ec681f3Smrg      } else {
11067ec681f3Smrg         assert(imgrsc->aspectMask & VK_IMAGE_ASPECT_STENCIL_BIT);
11077ec681f3Smrg         emit_image_load(job->device, cl, framebuffer, image,
11087ec681f3Smrg                         VK_IMAGE_ASPECT_DEPTH_BIT,
11097ec681f3Smrg                         imgrsc->baseArrayLayer + layer, imgrsc->mipLevel,
11107ec681f3Smrg                         false, false);
11117ec681f3Smrg      }
11127ec681f3Smrg   }
11137ec681f3Smrg
11147ec681f3Smrg   cl_emit(cl, END_OF_LOADS, end);
11157ec681f3Smrg
11167ec681f3Smrg   cl_emit(cl, BRANCH_TO_IMPLICIT_TILE_LIST, branch);
11177ec681f3Smrg
11187ec681f3Smrg   /* Store TLB to image */
11197ec681f3Smrg   emit_image_store(job->device, cl, framebuffer, image, imgrsc->aspectMask,
11207ec681f3Smrg                    imgrsc->baseArrayLayer + layer, imgrsc->mipLevel,
11217ec681f3Smrg                    false, true, false);
11227ec681f3Smrg
11237ec681f3Smrg   if (framebuffer->vk_format == VK_FORMAT_D24_UNORM_S8_UINT) {
11247ec681f3Smrg      if (imgrsc->aspectMask & VK_IMAGE_ASPECT_DEPTH_BIT) {
11257ec681f3Smrg         emit_image_store(job->device, cl, framebuffer, image,
11267ec681f3Smrg                          VK_IMAGE_ASPECT_STENCIL_BIT,
11277ec681f3Smrg                          imgrsc->baseArrayLayer + layer, imgrsc->mipLevel,
11287ec681f3Smrg                          false, false, false);
11297ec681f3Smrg      } else {
11307ec681f3Smrg         assert(imgrsc->aspectMask & VK_IMAGE_ASPECT_STENCIL_BIT);
11317ec681f3Smrg         emit_image_store(job->device, cl, framebuffer, image,
11327ec681f3Smrg                          VK_IMAGE_ASPECT_DEPTH_BIT,
11337ec681f3Smrg                          imgrsc->baseArrayLayer + layer, imgrsc->mipLevel,
11347ec681f3Smrg                          false, false, false);
11357ec681f3Smrg      }
11367ec681f3Smrg   }
11377ec681f3Smrg
11387ec681f3Smrg   cl_emit(cl, END_OF_TILE_MARKER, end);
11397ec681f3Smrg
11407ec681f3Smrg   cl_emit(cl, RETURN_FROM_SUB_LIST, ret);
11417ec681f3Smrg
11427ec681f3Smrg   cl_emit(&job->rcl, START_ADDRESS_OF_GENERIC_TILE_LIST, branch) {
11437ec681f3Smrg      branch.start = tile_list_start;
11447ec681f3Smrg      branch.end = v3dv_cl_get_address(cl);
11457ec681f3Smrg   }
11467ec681f3Smrg}
11477ec681f3Smrg
11487ec681f3Smrgstatic void
11497ec681f3Smrgemit_copy_buffer_to_layer(struct v3dv_job *job,
11507ec681f3Smrg                          struct v3dv_image *image,
11517ec681f3Smrg                          struct v3dv_buffer *buffer,
11527ec681f3Smrg                          struct v3dv_meta_framebuffer *framebuffer,
11537ec681f3Smrg                          uint32_t layer,
11547ec681f3Smrg                          const VkBufferImageCopy2KHR *region)
11557ec681f3Smrg{
11567ec681f3Smrg   emit_copy_buffer_to_layer_per_tile_list(job, framebuffer, image, buffer,
11577ec681f3Smrg                                           layer, region);
11587ec681f3Smrg   emit_supertile_coordinates(job, framebuffer);
11597ec681f3Smrg}
11607ec681f3Smrg
11617ec681f3Smrgvoid
11627ec681f3Smrgv3dX(meta_emit_copy_buffer_to_image_rcl)(struct v3dv_job *job,
11637ec681f3Smrg                                         struct v3dv_image *image,
11647ec681f3Smrg                                         struct v3dv_buffer *buffer,
11657ec681f3Smrg                                         struct v3dv_meta_framebuffer *framebuffer,
11667ec681f3Smrg                                         const VkBufferImageCopy2KHR *region)
11677ec681f3Smrg{
11687ec681f3Smrg   struct v3dv_cl *rcl = emit_rcl_prologue(job, framebuffer, NULL);
11697ec681f3Smrg   v3dv_return_if_oom(NULL, job);
11707ec681f3Smrg
11717ec681f3Smrg   emit_frame_setup(job, 0, NULL);
11727ec681f3Smrg   for (int layer = 0; layer < job->frame_tiling.layers; layer++)
11737ec681f3Smrg      emit_copy_buffer_to_layer(job, image, buffer, framebuffer, layer, region);
11747ec681f3Smrg   cl_emit(rcl, END_OF_RENDERING, end);
11757ec681f3Smrg}
11767ec681f3Smrg
11777ec681f3Smrg/* Figure out a TLB size configuration for a number of pixels to process.
11787ec681f3Smrg * Beware that we can't "render" more than 4096x4096 pixels in a single job,
11797ec681f3Smrg * if the pixel count is larger than this, the caller might need to split
11807ec681f3Smrg * the job and call this function multiple times.
11817ec681f3Smrg */
11827ec681f3Smrgstatic void
11837ec681f3Smrgframebuffer_size_for_pixel_count(uint32_t num_pixels,
11847ec681f3Smrg                                 uint32_t *width,
11857ec681f3Smrg                                 uint32_t *height)
11867ec681f3Smrg{
11877ec681f3Smrg   assert(num_pixels > 0);
11887ec681f3Smrg
11897ec681f3Smrg   const uint32_t max_dim_pixels = 4096;
11907ec681f3Smrg   const uint32_t max_pixels = max_dim_pixels * max_dim_pixels;
11917ec681f3Smrg
11927ec681f3Smrg   uint32_t w, h;
11937ec681f3Smrg   if (num_pixels > max_pixels) {
11947ec681f3Smrg      w = max_dim_pixels;
11957ec681f3Smrg      h = max_dim_pixels;
11967ec681f3Smrg   } else {
11977ec681f3Smrg      w = num_pixels;
11987ec681f3Smrg      h = 1;
11997ec681f3Smrg      while (w > max_dim_pixels || ((w % 2) == 0 && w > 2 * h)) {
12007ec681f3Smrg         w >>= 1;
12017ec681f3Smrg         h <<= 1;
12027ec681f3Smrg      }
12037ec681f3Smrg   }
12047ec681f3Smrg   assert(w <= max_dim_pixels && h <= max_dim_pixels);
12057ec681f3Smrg   assert(w * h <= num_pixels);
12067ec681f3Smrg   assert(w > 0 && h > 0);
12077ec681f3Smrg
12087ec681f3Smrg   *width = w;
12097ec681f3Smrg   *height = h;
12107ec681f3Smrg}
12117ec681f3Smrg
12127ec681f3Smrgstruct v3dv_job *
12137ec681f3Smrgv3dX(meta_copy_buffer)(struct v3dv_cmd_buffer *cmd_buffer,
12147ec681f3Smrg                       struct v3dv_bo *dst,
12157ec681f3Smrg                       uint32_t dst_offset,
12167ec681f3Smrg                       struct v3dv_bo *src,
12177ec681f3Smrg                       uint32_t src_offset,
12187ec681f3Smrg                       const VkBufferCopy2KHR *region)
12197ec681f3Smrg{
12207ec681f3Smrg   const uint32_t internal_bpp = V3D_INTERNAL_BPP_32;
12217ec681f3Smrg   const uint32_t internal_type = V3D_INTERNAL_TYPE_8UI;
12227ec681f3Smrg
12237ec681f3Smrg   /* Select appropriate pixel format for the copy operation based on the
12247ec681f3Smrg    * size to copy and the alignment of the source and destination offsets.
12257ec681f3Smrg    */
12267ec681f3Smrg   src_offset += region->srcOffset;
12277ec681f3Smrg   dst_offset += region->dstOffset;
12287ec681f3Smrg   uint32_t item_size = 4;
12297ec681f3Smrg   while (item_size > 1 &&
12307ec681f3Smrg          (src_offset % item_size != 0 || dst_offset % item_size != 0)) {
12317ec681f3Smrg      item_size /= 2;
12327ec681f3Smrg   }
12337ec681f3Smrg
12347ec681f3Smrg   while (item_size > 1 && region->size % item_size != 0)
12357ec681f3Smrg      item_size /= 2;
12367ec681f3Smrg
12377ec681f3Smrg   assert(region->size % item_size == 0);
12387ec681f3Smrg   uint32_t num_items = region->size / item_size;
12397ec681f3Smrg   assert(num_items > 0);
12407ec681f3Smrg
12417ec681f3Smrg   uint32_t format;
12427ec681f3Smrg   VkFormat vk_format;
12437ec681f3Smrg   switch (item_size) {
12447ec681f3Smrg   case 4:
12457ec681f3Smrg      format = V3D_OUTPUT_IMAGE_FORMAT_RGBA8UI;
12467ec681f3Smrg      vk_format = VK_FORMAT_R8G8B8A8_UINT;
12477ec681f3Smrg      break;
12487ec681f3Smrg   case 2:
12497ec681f3Smrg      format = V3D_OUTPUT_IMAGE_FORMAT_RG8UI;
12507ec681f3Smrg      vk_format = VK_FORMAT_R8G8_UINT;
12517ec681f3Smrg      break;
12527ec681f3Smrg   default:
12537ec681f3Smrg      format = V3D_OUTPUT_IMAGE_FORMAT_R8UI;
12547ec681f3Smrg      vk_format = VK_FORMAT_R8_UINT;
12557ec681f3Smrg      break;
12567ec681f3Smrg   }
12577ec681f3Smrg
12587ec681f3Smrg   struct v3dv_job *job = NULL;
12597ec681f3Smrg   while (num_items > 0) {
12607ec681f3Smrg      job = v3dv_cmd_buffer_start_job(cmd_buffer, -1, V3DV_JOB_TYPE_GPU_CL);
12617ec681f3Smrg      if (!job)
12627ec681f3Smrg         return NULL;
12637ec681f3Smrg
12647ec681f3Smrg      uint32_t width, height;
12657ec681f3Smrg      framebuffer_size_for_pixel_count(num_items, &width, &height);
12667ec681f3Smrg
12677ec681f3Smrg      v3dv_job_start_frame(job, width, height, 1, true, 1, internal_bpp, false);
12687ec681f3Smrg
12697ec681f3Smrg      struct v3dv_meta_framebuffer framebuffer;
12707ec681f3Smrg      v3dX(meta_framebuffer_init)(&framebuffer, vk_format, internal_type,
12717ec681f3Smrg                                  &job->frame_tiling);
12727ec681f3Smrg
12737ec681f3Smrg      v3dX(job_emit_binning_flush)(job);
12747ec681f3Smrg
12757ec681f3Smrg      v3dX(meta_emit_copy_buffer_rcl)(job, dst, src, dst_offset, src_offset,
12767ec681f3Smrg                                      &framebuffer, format, item_size);
12777ec681f3Smrg
12787ec681f3Smrg      v3dv_cmd_buffer_finish_job(cmd_buffer);
12797ec681f3Smrg
12807ec681f3Smrg      const uint32_t items_copied = width * height;
12817ec681f3Smrg      const uint32_t bytes_copied = items_copied * item_size;
12827ec681f3Smrg      num_items -= items_copied;
12837ec681f3Smrg      src_offset += bytes_copied;
12847ec681f3Smrg      dst_offset += bytes_copied;
12857ec681f3Smrg   }
12867ec681f3Smrg
12877ec681f3Smrg   return job;
12887ec681f3Smrg}
12897ec681f3Smrg
12907ec681f3Smrgvoid
12917ec681f3Smrgv3dX(meta_fill_buffer)(struct v3dv_cmd_buffer *cmd_buffer,
12927ec681f3Smrg                       struct v3dv_bo *bo,
12937ec681f3Smrg                       uint32_t offset,
12947ec681f3Smrg                       uint32_t size,
12957ec681f3Smrg                       uint32_t data)
12967ec681f3Smrg{
12977ec681f3Smrg   assert(size > 0 && size % 4 == 0);
12987ec681f3Smrg   assert(offset + size <= bo->size);
12997ec681f3Smrg
13007ec681f3Smrg   const uint32_t internal_bpp = V3D_INTERNAL_BPP_32;
13017ec681f3Smrg   const uint32_t internal_type = V3D_INTERNAL_TYPE_8UI;
13027ec681f3Smrg   uint32_t num_items = size / 4;
13037ec681f3Smrg
13047ec681f3Smrg   while (num_items > 0) {
13057ec681f3Smrg      struct v3dv_job *job =
13067ec681f3Smrg         v3dv_cmd_buffer_start_job(cmd_buffer, -1, V3DV_JOB_TYPE_GPU_CL);
13077ec681f3Smrg      if (!job)
13087ec681f3Smrg         return;
13097ec681f3Smrg
13107ec681f3Smrg      uint32_t width, height;
13117ec681f3Smrg      framebuffer_size_for_pixel_count(num_items, &width, &height);
13127ec681f3Smrg
13137ec681f3Smrg      v3dv_job_start_frame(job, width, height, 1, true, 1, internal_bpp, false);
13147ec681f3Smrg
13157ec681f3Smrg      struct v3dv_meta_framebuffer framebuffer;
13167ec681f3Smrg      v3dX(meta_framebuffer_init)(&framebuffer, VK_FORMAT_R8G8B8A8_UINT,
13177ec681f3Smrg                                  internal_type, &job->frame_tiling);
13187ec681f3Smrg
13197ec681f3Smrg      v3dX(job_emit_binning_flush)(job);
13207ec681f3Smrg
13217ec681f3Smrg      v3dX(meta_emit_fill_buffer_rcl)(job, bo, offset, &framebuffer, data);
13227ec681f3Smrg
13237ec681f3Smrg      v3dv_cmd_buffer_finish_job(cmd_buffer);
13247ec681f3Smrg
13257ec681f3Smrg      const uint32_t items_copied = width * height;
13267ec681f3Smrg      const uint32_t bytes_copied = items_copied * 4;
13277ec681f3Smrg      num_items -= items_copied;
13287ec681f3Smrg      offset += bytes_copied;
13297ec681f3Smrg   }
13307ec681f3Smrg}
13317ec681f3Smrg
13327ec681f3Smrgvoid
13337ec681f3Smrgv3dX(meta_framebuffer_init)(struct v3dv_meta_framebuffer *fb,
13347ec681f3Smrg                            VkFormat vk_format,
13357ec681f3Smrg                            uint32_t internal_type,
13367ec681f3Smrg                            const struct v3dv_frame_tiling *tiling)
13377ec681f3Smrg{
13387ec681f3Smrg   fb->internal_type = internal_type;
13397ec681f3Smrg
13407ec681f3Smrg   /* Supertile coverage always starts at 0,0  */
13417ec681f3Smrg   uint32_t supertile_w_in_pixels =
13427ec681f3Smrg      tiling->tile_width * tiling->supertile_width;
13437ec681f3Smrg   uint32_t supertile_h_in_pixels =
13447ec681f3Smrg      tiling->tile_height * tiling->supertile_height;
13457ec681f3Smrg
13467ec681f3Smrg   fb->min_x_supertile = 0;
13477ec681f3Smrg   fb->min_y_supertile = 0;
13487ec681f3Smrg   fb->max_x_supertile = (tiling->width - 1) / supertile_w_in_pixels;
13497ec681f3Smrg   fb->max_y_supertile = (tiling->height - 1) / supertile_h_in_pixels;
13507ec681f3Smrg
13517ec681f3Smrg   fb->vk_format = vk_format;
13527ec681f3Smrg   fb->format = v3dX(get_format)(vk_format);
13537ec681f3Smrg
13547ec681f3Smrg   fb->internal_depth_type = V3D_INTERNAL_TYPE_DEPTH_32F;
13557ec681f3Smrg   if (vk_format_is_depth_or_stencil(vk_format))
13567ec681f3Smrg      fb->internal_depth_type = v3dX(get_internal_depth_type)(vk_format);
13577ec681f3Smrg}
1358