17ec681f3Smrg/*
27ec681f3Smrg * Copyright © 2021 Raspberry Pi
37ec681f3Smrg *
47ec681f3Smrg * Permission is hereby granted, free of charge, to any person obtaining a
57ec681f3Smrg * copy of this software and associated documentation files (the "Software"),
67ec681f3Smrg * to deal in the Software without restriction, including without limitation
77ec681f3Smrg * the rights to use, copy, modify, merge, publish, distribute, sublicense,
87ec681f3Smrg * and/or sell copies of the Software, and to permit persons to whom the
97ec681f3Smrg * Software is furnished to do so, subject to the following conditions:
107ec681f3Smrg *
117ec681f3Smrg * The above copyright notice and this permission notice (including the next
127ec681f3Smrg * paragraph) shall be included in all copies or substantial portions of the
137ec681f3Smrg * Software.
147ec681f3Smrg *
157ec681f3Smrg * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
167ec681f3Smrg * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
177ec681f3Smrg * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT.  IN NO EVENT SHALL
187ec681f3Smrg * THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
197ec681f3Smrg * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING
207ec681f3Smrg * FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS
217ec681f3Smrg * IN THE SOFTWARE.
227ec681f3Smrg */
237ec681f3Smrg
247ec681f3Smrg#include "v3dv_private.h"
257ec681f3Smrg#include "broadcom/common/v3d_macros.h"
267ec681f3Smrg#include "broadcom/cle/v3dx_pack.h"
277ec681f3Smrg#include "broadcom/compiler/v3d_compiler.h"
287ec681f3Smrg
297ec681f3Smrg#include "util/half_float.h"
307ec681f3Smrg#include "vulkan/util/vk_format.h"
317ec681f3Smrg#include "util/u_pack_color.h"
327ec681f3Smrg
337ec681f3Smrg#include "vk_format_info.h"
347ec681f3Smrg
357ec681f3Smrgvoid
367ec681f3Smrgv3dX(job_emit_binning_flush)(struct v3dv_job *job)
377ec681f3Smrg{
387ec681f3Smrg   assert(job);
397ec681f3Smrg
407ec681f3Smrg   v3dv_cl_ensure_space_with_branch(&job->bcl, cl_packet_length(FLUSH));
417ec681f3Smrg   v3dv_return_if_oom(NULL, job);
427ec681f3Smrg
437ec681f3Smrg   cl_emit(&job->bcl, FLUSH, flush);
447ec681f3Smrg}
457ec681f3Smrg
467ec681f3Smrgvoid
477ec681f3Smrgv3dX(job_emit_binning_prolog)(struct v3dv_job *job,
487ec681f3Smrg                              const struct v3dv_frame_tiling *tiling,
497ec681f3Smrg                              uint32_t layers)
507ec681f3Smrg{
517ec681f3Smrg   /* This must go before the binning mode configuration. It is
527ec681f3Smrg    * required for layered framebuffers to work.
537ec681f3Smrg    */
547ec681f3Smrg   cl_emit(&job->bcl, NUMBER_OF_LAYERS, config) {
557ec681f3Smrg      config.number_of_layers = layers;
567ec681f3Smrg   }
577ec681f3Smrg
587ec681f3Smrg   cl_emit(&job->bcl, TILE_BINNING_MODE_CFG, config) {
597ec681f3Smrg      config.width_in_pixels = tiling->width;
607ec681f3Smrg      config.height_in_pixels = tiling->height;
617ec681f3Smrg      config.number_of_render_targets = MAX2(tiling->render_target_count, 1);
627ec681f3Smrg      config.multisample_mode_4x = tiling->msaa;
637ec681f3Smrg      config.maximum_bpp_of_all_render_targets = tiling->internal_bpp;
647ec681f3Smrg   }
657ec681f3Smrg
667ec681f3Smrg   /* There's definitely nothing in the VCD cache we want. */
677ec681f3Smrg   cl_emit(&job->bcl, FLUSH_VCD_CACHE, bin);
687ec681f3Smrg
697ec681f3Smrg   /* "Binning mode lists must have a Start Tile Binning item (6) after
707ec681f3Smrg    *  any prefix state data before the binning list proper starts."
717ec681f3Smrg    */
727ec681f3Smrg   cl_emit(&job->bcl, START_TILE_BINNING, bin);
737ec681f3Smrg}
747ec681f3Smrg
757ec681f3Smrgvoid
767ec681f3Smrgv3dX(cmd_buffer_end_render_pass_secondary)(struct v3dv_cmd_buffer *cmd_buffer)
777ec681f3Smrg{
787ec681f3Smrg   assert(cmd_buffer->state.job);
797ec681f3Smrg   v3dv_cl_ensure_space_with_branch(&cmd_buffer->state.job->bcl,
807ec681f3Smrg                                    cl_packet_length(RETURN_FROM_SUB_LIST));
817ec681f3Smrg   v3dv_return_if_oom(cmd_buffer, NULL);
827ec681f3Smrg   cl_emit(&cmd_buffer->state.job->bcl, RETURN_FROM_SUB_LIST, ret);
837ec681f3Smrg}
847ec681f3Smrg
857ec681f3Smrgvoid
867ec681f3Smrgv3dX(job_emit_clip_window)(struct v3dv_job *job, const VkRect2D *rect)
877ec681f3Smrg{
887ec681f3Smrg   assert(job);
897ec681f3Smrg
907ec681f3Smrg   v3dv_cl_ensure_space_with_branch(&job->bcl, cl_packet_length(CLIP_WINDOW));
917ec681f3Smrg   v3dv_return_if_oom(NULL, job);
927ec681f3Smrg
937ec681f3Smrg   cl_emit(&job->bcl, CLIP_WINDOW, clip) {
947ec681f3Smrg      clip.clip_window_left_pixel_coordinate = rect->offset.x;
957ec681f3Smrg      clip.clip_window_bottom_pixel_coordinate = rect->offset.y;
967ec681f3Smrg      clip.clip_window_width_in_pixels = rect->extent.width;
977ec681f3Smrg      clip.clip_window_height_in_pixels = rect->extent.height;
987ec681f3Smrg   }
997ec681f3Smrg}
1007ec681f3Smrg
1017ec681f3Smrgstatic void
1027ec681f3Smrgcmd_buffer_render_pass_emit_load(struct v3dv_cmd_buffer *cmd_buffer,
1037ec681f3Smrg                                 struct v3dv_cl *cl,
1047ec681f3Smrg                                 struct v3dv_image_view *iview,
1057ec681f3Smrg                                 uint32_t layer,
1067ec681f3Smrg                                 uint32_t buffer)
1077ec681f3Smrg{
1087ec681f3Smrg   const struct v3dv_image *image = (struct v3dv_image *) iview->vk.image;
1097ec681f3Smrg   const struct v3d_resource_slice *slice =
1107ec681f3Smrg      &image->slices[iview->vk.base_mip_level];
1117ec681f3Smrg   uint32_t layer_offset =
1127ec681f3Smrg      v3dv_layer_offset(image, iview->vk.base_mip_level,
1137ec681f3Smrg                        iview->vk.base_array_layer + layer);
1147ec681f3Smrg
1157ec681f3Smrg   cl_emit(cl, LOAD_TILE_BUFFER_GENERAL, load) {
1167ec681f3Smrg      load.buffer_to_load = buffer;
1177ec681f3Smrg      load.address = v3dv_cl_address(image->mem->bo, layer_offset);
1187ec681f3Smrg
1197ec681f3Smrg      load.input_image_format = iview->format->rt_type;
1207ec681f3Smrg      load.r_b_swap = iview->swap_rb;
1217ec681f3Smrg      load.memory_format = slice->tiling;
1227ec681f3Smrg
1237ec681f3Smrg      if (slice->tiling == V3D_TILING_UIF_NO_XOR ||
1247ec681f3Smrg          slice->tiling == V3D_TILING_UIF_XOR) {
1257ec681f3Smrg         load.height_in_ub_or_stride =
1267ec681f3Smrg            slice->padded_height_of_output_image_in_uif_blocks;
1277ec681f3Smrg      } else if (slice->tiling == V3D_TILING_RASTER) {
1287ec681f3Smrg         load.height_in_ub_or_stride = slice->stride;
1297ec681f3Smrg      }
1307ec681f3Smrg
1317ec681f3Smrg      if (image->vk.samples > VK_SAMPLE_COUNT_1_BIT)
1327ec681f3Smrg         load.decimate_mode = V3D_DECIMATE_MODE_ALL_SAMPLES;
1337ec681f3Smrg      else
1347ec681f3Smrg         load.decimate_mode = V3D_DECIMATE_MODE_SAMPLE_0;
1357ec681f3Smrg   }
1367ec681f3Smrg}
1377ec681f3Smrg
1387ec681f3Smrgstatic bool
1397ec681f3Smrgcheck_needs_load(const struct v3dv_cmd_buffer_state *state,
1407ec681f3Smrg                 VkImageAspectFlags aspect,
1417ec681f3Smrg                 uint32_t first_subpass_idx,
1427ec681f3Smrg                 VkAttachmentLoadOp load_op)
1437ec681f3Smrg{
1447ec681f3Smrg   /* We call this with image->vk.aspects & aspect, so 0 means the aspect we are
1457ec681f3Smrg    * testing does not exist in the image.
1467ec681f3Smrg    */
1477ec681f3Smrg   if (!aspect)
1487ec681f3Smrg      return false;
1497ec681f3Smrg
1507ec681f3Smrg   /* Attachment (or view) load operations apply on the first subpass that
1517ec681f3Smrg    * uses the attachment (or view), otherwise we always need to load.
1527ec681f3Smrg    */
1537ec681f3Smrg   if (state->job->first_subpass > first_subpass_idx)
1547ec681f3Smrg      return true;
1557ec681f3Smrg
1567ec681f3Smrg   /* If the job is continuing a subpass started in another job, we always
1577ec681f3Smrg    * need to load.
1587ec681f3Smrg    */
1597ec681f3Smrg   if (state->job->is_subpass_continue)
1607ec681f3Smrg      return true;
1617ec681f3Smrg
1627ec681f3Smrg   /* If the area is not aligned to tile boundaries, we always need to load */
1637ec681f3Smrg   if (!state->tile_aligned_render_area)
1647ec681f3Smrg      return true;
1657ec681f3Smrg
1667ec681f3Smrg   /* The attachment load operations must be LOAD */
1677ec681f3Smrg   return load_op == VK_ATTACHMENT_LOAD_OP_LOAD;
1687ec681f3Smrg}
1697ec681f3Smrg
1707ec681f3Smrgstatic inline uint32_t
1717ec681f3Smrgv3dv_zs_buffer(bool depth, bool stencil)
1727ec681f3Smrg{
1737ec681f3Smrg   if (depth && stencil)
1747ec681f3Smrg      return ZSTENCIL;
1757ec681f3Smrg   else if (depth)
1767ec681f3Smrg      return Z;
1777ec681f3Smrg   else if (stencil)
1787ec681f3Smrg      return STENCIL;
1797ec681f3Smrg   return NONE;
1807ec681f3Smrg}
1817ec681f3Smrg
1827ec681f3Smrgstatic void
1837ec681f3Smrgcmd_buffer_render_pass_emit_loads(struct v3dv_cmd_buffer *cmd_buffer,
1847ec681f3Smrg                                  struct v3dv_cl *cl,
1857ec681f3Smrg                                  uint32_t layer)
1867ec681f3Smrg{
1877ec681f3Smrg   const struct v3dv_cmd_buffer_state *state = &cmd_buffer->state;
1887ec681f3Smrg   const struct v3dv_framebuffer *framebuffer = state->framebuffer;
1897ec681f3Smrg   const struct v3dv_render_pass *pass = state->pass;
1907ec681f3Smrg   const struct v3dv_subpass *subpass = &pass->subpasses[state->subpass_idx];
1917ec681f3Smrg
1927ec681f3Smrg  assert(!pass->multiview_enabled || layer < MAX_MULTIVIEW_VIEW_COUNT);
1937ec681f3Smrg
1947ec681f3Smrg   for (uint32_t i = 0; i < subpass->color_count; i++) {
1957ec681f3Smrg      uint32_t attachment_idx = subpass->color_attachments[i].attachment;
1967ec681f3Smrg
1977ec681f3Smrg      if (attachment_idx == VK_ATTACHMENT_UNUSED)
1987ec681f3Smrg         continue;
1997ec681f3Smrg
2007ec681f3Smrg      const struct v3dv_render_pass_attachment *attachment =
2017ec681f3Smrg         &state->pass->attachments[attachment_idx];
2027ec681f3Smrg
2037ec681f3Smrg      /* According to the Vulkan spec:
2047ec681f3Smrg       *
2057ec681f3Smrg       *    "The load operation for each sample in an attachment happens before
2067ec681f3Smrg       *     any recorded command which accesses the sample in the first subpass
2077ec681f3Smrg       *     where the attachment is used."
2087ec681f3Smrg       *
2097ec681f3Smrg       * If the load operation is CLEAR, we must only clear once on the first
2107ec681f3Smrg       * subpass that uses the attachment (and in that case we don't LOAD).
2117ec681f3Smrg       * After that, we always want to load so we don't lose any rendering done
2127ec681f3Smrg       * by a previous subpass to the same attachment. We also want to load
2137ec681f3Smrg       * if the current job is continuing subpass work started by a previous
2147ec681f3Smrg       * job, for the same reason.
2157ec681f3Smrg       *
2167ec681f3Smrg       * If the render area is not aligned to tile boundaries then we have
2177ec681f3Smrg       * tiles which are partially covered by it. In this case, we need to
2187ec681f3Smrg       * load the tiles so we can preserve the pixels that are outside the
2197ec681f3Smrg       * render area for any such tiles.
2207ec681f3Smrg       */
2217ec681f3Smrg      uint32_t first_subpass = !pass->multiview_enabled ?
2227ec681f3Smrg         attachment->first_subpass :
2237ec681f3Smrg         attachment->views[layer].first_subpass;
2247ec681f3Smrg
2257ec681f3Smrg      bool needs_load = check_needs_load(state,
2267ec681f3Smrg                                         VK_IMAGE_ASPECT_COLOR_BIT,
2277ec681f3Smrg                                         first_subpass,
2287ec681f3Smrg                                         attachment->desc.loadOp);
2297ec681f3Smrg      if (needs_load) {
2307ec681f3Smrg         struct v3dv_image_view *iview = framebuffer->attachments[attachment_idx];
2317ec681f3Smrg         cmd_buffer_render_pass_emit_load(cmd_buffer, cl, iview,
2327ec681f3Smrg                                          layer, RENDER_TARGET_0 + i);
2337ec681f3Smrg      }
2347ec681f3Smrg   }
2357ec681f3Smrg
2367ec681f3Smrg   uint32_t ds_attachment_idx = subpass->ds_attachment.attachment;
2377ec681f3Smrg   if (ds_attachment_idx != VK_ATTACHMENT_UNUSED) {
2387ec681f3Smrg      const struct v3dv_render_pass_attachment *ds_attachment =
2397ec681f3Smrg         &state->pass->attachments[ds_attachment_idx];
2407ec681f3Smrg
2417ec681f3Smrg      const VkImageAspectFlags ds_aspects =
2427ec681f3Smrg         vk_format_aspects(ds_attachment->desc.format);
2437ec681f3Smrg
2447ec681f3Smrg      uint32_t ds_first_subpass = !pass->multiview_enabled ?
2457ec681f3Smrg         ds_attachment->first_subpass :
2467ec681f3Smrg         ds_attachment->views[layer].first_subpass;
2477ec681f3Smrg
2487ec681f3Smrg      const bool needs_depth_load =
2497ec681f3Smrg         check_needs_load(state,
2507ec681f3Smrg                          ds_aspects & VK_IMAGE_ASPECT_DEPTH_BIT,
2517ec681f3Smrg                          ds_first_subpass,
2527ec681f3Smrg                          ds_attachment->desc.loadOp);
2537ec681f3Smrg
2547ec681f3Smrg      const bool needs_stencil_load =
2557ec681f3Smrg         check_needs_load(state,
2567ec681f3Smrg                          ds_aspects & VK_IMAGE_ASPECT_STENCIL_BIT,
2577ec681f3Smrg                          ds_first_subpass,
2587ec681f3Smrg                          ds_attachment->desc.stencilLoadOp);
2597ec681f3Smrg
2607ec681f3Smrg      if (needs_depth_load || needs_stencil_load) {
2617ec681f3Smrg         struct v3dv_image_view *iview =
2627ec681f3Smrg            framebuffer->attachments[ds_attachment_idx];
2637ec681f3Smrg         /* From the Vulkan spec:
2647ec681f3Smrg          *
2657ec681f3Smrg          *   "When an image view of a depth/stencil image is used as a
2667ec681f3Smrg          *   depth/stencil framebuffer attachment, the aspectMask is ignored
2677ec681f3Smrg          *   and both depth and stencil image subresources are used."
2687ec681f3Smrg          *
2697ec681f3Smrg          * So we ignore the aspects from the subresource range of the image
2707ec681f3Smrg          * view for the depth/stencil attachment, but we still need to restrict
2717ec681f3Smrg          * the to aspects compatible with the render pass and the image.
2727ec681f3Smrg          */
2737ec681f3Smrg         const uint32_t zs_buffer =
2747ec681f3Smrg            v3dv_zs_buffer(needs_depth_load, needs_stencil_load);
2757ec681f3Smrg         cmd_buffer_render_pass_emit_load(cmd_buffer, cl,
2767ec681f3Smrg                                          iview, layer, zs_buffer);
2777ec681f3Smrg      }
2787ec681f3Smrg   }
2797ec681f3Smrg
2807ec681f3Smrg   cl_emit(cl, END_OF_LOADS, end);
2817ec681f3Smrg}
2827ec681f3Smrg
2837ec681f3Smrgstatic void
2847ec681f3Smrgcmd_buffer_render_pass_emit_store(struct v3dv_cmd_buffer *cmd_buffer,
2857ec681f3Smrg                                  struct v3dv_cl *cl,
2867ec681f3Smrg                                  uint32_t attachment_idx,
2877ec681f3Smrg                                  uint32_t layer,
2887ec681f3Smrg                                  uint32_t buffer,
2897ec681f3Smrg                                  bool clear,
2907ec681f3Smrg                                  bool is_multisample_resolve)
2917ec681f3Smrg{
2927ec681f3Smrg   const struct v3dv_image_view *iview =
2937ec681f3Smrg      cmd_buffer->state.framebuffer->attachments[attachment_idx];
2947ec681f3Smrg   const struct v3dv_image *image = (struct v3dv_image *) iview->vk.image;
2957ec681f3Smrg   const struct v3d_resource_slice *slice =
2967ec681f3Smrg      &image->slices[iview->vk.base_mip_level];
2977ec681f3Smrg   uint32_t layer_offset = v3dv_layer_offset(image,
2987ec681f3Smrg                                             iview->vk.base_mip_level,
2997ec681f3Smrg                                             iview->vk.base_array_layer + layer);
3007ec681f3Smrg
3017ec681f3Smrg   cl_emit(cl, STORE_TILE_BUFFER_GENERAL, store) {
3027ec681f3Smrg      store.buffer_to_store = buffer;
3037ec681f3Smrg      store.address = v3dv_cl_address(image->mem->bo, layer_offset);
3047ec681f3Smrg      store.clear_buffer_being_stored = clear;
3057ec681f3Smrg
3067ec681f3Smrg      store.output_image_format = iview->format->rt_type;
3077ec681f3Smrg      store.r_b_swap = iview->swap_rb;
3087ec681f3Smrg      store.memory_format = slice->tiling;
3097ec681f3Smrg
3107ec681f3Smrg      if (slice->tiling == V3D_TILING_UIF_NO_XOR ||
3117ec681f3Smrg          slice->tiling == V3D_TILING_UIF_XOR) {
3127ec681f3Smrg         store.height_in_ub_or_stride =
3137ec681f3Smrg            slice->padded_height_of_output_image_in_uif_blocks;
3147ec681f3Smrg      } else if (slice->tiling == V3D_TILING_RASTER) {
3157ec681f3Smrg         store.height_in_ub_or_stride = slice->stride;
3167ec681f3Smrg      }
3177ec681f3Smrg
3187ec681f3Smrg      if (image->vk.samples > VK_SAMPLE_COUNT_1_BIT)
3197ec681f3Smrg         store.decimate_mode = V3D_DECIMATE_MODE_ALL_SAMPLES;
3207ec681f3Smrg      else if (is_multisample_resolve)
3217ec681f3Smrg         store.decimate_mode = V3D_DECIMATE_MODE_4X;
3227ec681f3Smrg      else
3237ec681f3Smrg         store.decimate_mode = V3D_DECIMATE_MODE_SAMPLE_0;
3247ec681f3Smrg   }
3257ec681f3Smrg}
3267ec681f3Smrg
3277ec681f3Smrgstatic bool
3287ec681f3Smrgcheck_needs_clear(const struct v3dv_cmd_buffer_state *state,
3297ec681f3Smrg                  VkImageAspectFlags aspect,
3307ec681f3Smrg                  uint32_t first_subpass_idx,
3317ec681f3Smrg                  VkAttachmentLoadOp load_op,
3327ec681f3Smrg                  bool do_clear_with_draw)
3337ec681f3Smrg{
3347ec681f3Smrg   /* We call this with image->vk.aspects & aspect, so 0 means the aspect we are
3357ec681f3Smrg    * testing does not exist in the image.
3367ec681f3Smrg    */
3377ec681f3Smrg   if (!aspect)
3387ec681f3Smrg      return false;
3397ec681f3Smrg
3407ec681f3Smrg   /* If the aspect needs to be cleared with a draw call then we won't emit
3417ec681f3Smrg    * the clear here.
3427ec681f3Smrg    */
3437ec681f3Smrg   if (do_clear_with_draw)
3447ec681f3Smrg      return false;
3457ec681f3Smrg
3467ec681f3Smrg   /* If this is resuming a subpass started with another job, then attachment
3477ec681f3Smrg    * load operations don't apply.
3487ec681f3Smrg    */
3497ec681f3Smrg   if (state->job->is_subpass_continue)
3507ec681f3Smrg      return false;
3517ec681f3Smrg
3527ec681f3Smrg   /* If the render area is not aligned to tile boudaries we can't use the
3537ec681f3Smrg    * TLB for a clear.
3547ec681f3Smrg    */
3557ec681f3Smrg   if (!state->tile_aligned_render_area)
3567ec681f3Smrg      return false;
3577ec681f3Smrg
3587ec681f3Smrg   /* If this job is running in a subpass other than the first subpass in
3597ec681f3Smrg    * which this attachment (or view) is used then attachment load operations
3607ec681f3Smrg    * don't apply.
3617ec681f3Smrg    */
3627ec681f3Smrg   if (state->job->first_subpass != first_subpass_idx)
3637ec681f3Smrg      return false;
3647ec681f3Smrg
3657ec681f3Smrg   /* The attachment load operation must be CLEAR */
3667ec681f3Smrg   return load_op == VK_ATTACHMENT_LOAD_OP_CLEAR;
3677ec681f3Smrg}
3687ec681f3Smrg
3697ec681f3Smrgstatic bool
3707ec681f3Smrgcheck_needs_store(const struct v3dv_cmd_buffer_state *state,
3717ec681f3Smrg                  VkImageAspectFlags aspect,
3727ec681f3Smrg                  uint32_t last_subpass_idx,
3737ec681f3Smrg                  VkAttachmentStoreOp store_op)
3747ec681f3Smrg{
3757ec681f3Smrg   /* We call this with image->vk.aspects & aspect, so 0 means the aspect we are
3767ec681f3Smrg    * testing does not exist in the image.
3777ec681f3Smrg    */
3787ec681f3Smrg   if (!aspect)
3797ec681f3Smrg      return false;
3807ec681f3Smrg
3817ec681f3Smrg   /* Attachment (or view) store operations only apply on the last subpass
3827ec681f3Smrg    * where the attachment (or view)  is used, in other subpasses we always
3837ec681f3Smrg    * need to store.
3847ec681f3Smrg    */
3857ec681f3Smrg   if (state->subpass_idx < last_subpass_idx)
3867ec681f3Smrg      return true;
3877ec681f3Smrg
3887ec681f3Smrg   /* Attachment store operations only apply on the last job we emit on the the
3897ec681f3Smrg    * last subpass where the attachment is used, otherwise we always need to
3907ec681f3Smrg    * store.
3917ec681f3Smrg    */
3927ec681f3Smrg   if (!state->job->is_subpass_finish)
3937ec681f3Smrg      return true;
3947ec681f3Smrg
3957ec681f3Smrg   /* The attachment store operation must be STORE */
3967ec681f3Smrg   return store_op == VK_ATTACHMENT_STORE_OP_STORE;
3977ec681f3Smrg}
3987ec681f3Smrg
3997ec681f3Smrgstatic void
4007ec681f3Smrgcmd_buffer_render_pass_emit_stores(struct v3dv_cmd_buffer *cmd_buffer,
4017ec681f3Smrg                                   struct v3dv_cl *cl,
4027ec681f3Smrg                                   uint32_t layer)
4037ec681f3Smrg{
4047ec681f3Smrg   struct v3dv_cmd_buffer_state *state = &cmd_buffer->state;
4057ec681f3Smrg   struct v3dv_render_pass *pass = state->pass;
4067ec681f3Smrg   const struct v3dv_subpass *subpass =
4077ec681f3Smrg      &pass->subpasses[state->subpass_idx];
4087ec681f3Smrg
4097ec681f3Smrg   bool has_stores = false;
4107ec681f3Smrg   bool use_global_zs_clear = false;
4117ec681f3Smrg   bool use_global_rt_clear = false;
4127ec681f3Smrg
4137ec681f3Smrg   assert(!pass->multiview_enabled || layer < MAX_MULTIVIEW_VIEW_COUNT);
4147ec681f3Smrg
4157ec681f3Smrg   /* FIXME: separate stencil */
4167ec681f3Smrg   uint32_t ds_attachment_idx = subpass->ds_attachment.attachment;
4177ec681f3Smrg   if (ds_attachment_idx != VK_ATTACHMENT_UNUSED) {
4187ec681f3Smrg      const struct v3dv_render_pass_attachment *ds_attachment =
4197ec681f3Smrg         &state->pass->attachments[ds_attachment_idx];
4207ec681f3Smrg
4217ec681f3Smrg      assert(state->job->first_subpass >= ds_attachment->first_subpass);
4227ec681f3Smrg      assert(state->subpass_idx >= ds_attachment->first_subpass);
4237ec681f3Smrg      assert(state->subpass_idx <= ds_attachment->last_subpass);
4247ec681f3Smrg
4257ec681f3Smrg      /* From the Vulkan spec, VkImageSubresourceRange:
4267ec681f3Smrg       *
4277ec681f3Smrg       *   "When an image view of a depth/stencil image is used as a
4287ec681f3Smrg       *   depth/stencil framebuffer attachment, the aspectMask is ignored
4297ec681f3Smrg       *   and both depth and stencil image subresources are used."
4307ec681f3Smrg       *
4317ec681f3Smrg       * So we ignore the aspects from the subresource range of the image
4327ec681f3Smrg       * view for the depth/stencil attachment, but we still need to restrict
4337ec681f3Smrg       * the to aspects compatible with the render pass and the image.
4347ec681f3Smrg       */
4357ec681f3Smrg      const VkImageAspectFlags aspects =
4367ec681f3Smrg         vk_format_aspects(ds_attachment->desc.format);
4377ec681f3Smrg
4387ec681f3Smrg      /* Only clear once on the first subpass that uses the attachment */
4397ec681f3Smrg      uint32_t ds_first_subpass = !state->pass->multiview_enabled ?
4407ec681f3Smrg         ds_attachment->first_subpass :
4417ec681f3Smrg         ds_attachment->views[layer].first_subpass;
4427ec681f3Smrg
4437ec681f3Smrg      bool needs_depth_clear =
4447ec681f3Smrg         check_needs_clear(state,
4457ec681f3Smrg                           aspects & VK_IMAGE_ASPECT_DEPTH_BIT,
4467ec681f3Smrg                           ds_first_subpass,
4477ec681f3Smrg                           ds_attachment->desc.loadOp,
4487ec681f3Smrg                           subpass->do_depth_clear_with_draw);
4497ec681f3Smrg
4507ec681f3Smrg      bool needs_stencil_clear =
4517ec681f3Smrg         check_needs_clear(state,
4527ec681f3Smrg                           aspects & VK_IMAGE_ASPECT_STENCIL_BIT,
4537ec681f3Smrg                           ds_first_subpass,
4547ec681f3Smrg                           ds_attachment->desc.stencilLoadOp,
4557ec681f3Smrg                           subpass->do_stencil_clear_with_draw);
4567ec681f3Smrg
4577ec681f3Smrg      /* Skip the last store if it is not required */
4587ec681f3Smrg      uint32_t ds_last_subpass = !pass->multiview_enabled ?
4597ec681f3Smrg         ds_attachment->last_subpass :
4607ec681f3Smrg         ds_attachment->views[layer].last_subpass;
4617ec681f3Smrg
4627ec681f3Smrg      bool needs_depth_store =
4637ec681f3Smrg         check_needs_store(state,
4647ec681f3Smrg                           aspects & VK_IMAGE_ASPECT_DEPTH_BIT,
4657ec681f3Smrg                           ds_last_subpass,
4667ec681f3Smrg                           ds_attachment->desc.storeOp);
4677ec681f3Smrg
4687ec681f3Smrg      bool needs_stencil_store =
4697ec681f3Smrg         check_needs_store(state,
4707ec681f3Smrg                           aspects & VK_IMAGE_ASPECT_STENCIL_BIT,
4717ec681f3Smrg                           ds_last_subpass,
4727ec681f3Smrg                           ds_attachment->desc.stencilStoreOp);
4737ec681f3Smrg
4747ec681f3Smrg      /* GFXH-1689: The per-buffer store command's clear buffer bit is broken
4757ec681f3Smrg       * for depth/stencil.
4767ec681f3Smrg       *
4777ec681f3Smrg       * There used to be some confusion regarding the Clear Tile Buffers
4787ec681f3Smrg       * Z/S bit also being broken, but we confirmed with Broadcom that this
4797ec681f3Smrg       * is not the case, it was just that some other hardware bugs (that we
4807ec681f3Smrg       * need to work around, such as GFXH-1461) could cause this bit to behave
4817ec681f3Smrg       * incorrectly.
4827ec681f3Smrg       *
4837ec681f3Smrg       * There used to be another issue where the RTs bit in the Clear Tile
4847ec681f3Smrg       * Buffers packet also cleared Z/S, but Broadcom confirmed this is
4857ec681f3Smrg       * fixed since V3D 4.1.
4867ec681f3Smrg       *
4877ec681f3Smrg       * So if we have to emit a clear of depth or stencil we don't use
4887ec681f3Smrg       * the per-buffer store clear bit, even if we need to store the buffers,
4897ec681f3Smrg       * instead we always have to use the Clear Tile Buffers Z/S bit.
4907ec681f3Smrg       * If we have configured the job to do early Z/S clearing, then we
4917ec681f3Smrg       * don't want to emit any Clear Tile Buffers command at all here.
4927ec681f3Smrg       *
4937ec681f3Smrg       * Note that GFXH-1689 is not reproduced in the simulator, where
4947ec681f3Smrg       * using the clear buffer bit in depth/stencil stores works fine.
4957ec681f3Smrg       */
4967ec681f3Smrg      use_global_zs_clear = !state->job->early_zs_clear &&
4977ec681f3Smrg         (needs_depth_clear || needs_stencil_clear);
4987ec681f3Smrg      if (needs_depth_store || needs_stencil_store) {
4997ec681f3Smrg         const uint32_t zs_buffer =
5007ec681f3Smrg            v3dv_zs_buffer(needs_depth_store, needs_stencil_store);
5017ec681f3Smrg         cmd_buffer_render_pass_emit_store(cmd_buffer, cl,
5027ec681f3Smrg                                           ds_attachment_idx, layer,
5037ec681f3Smrg                                           zs_buffer, false, false);
5047ec681f3Smrg         has_stores = true;
5057ec681f3Smrg      }
5067ec681f3Smrg   }
5077ec681f3Smrg
5087ec681f3Smrg   for (uint32_t i = 0; i < subpass->color_count; i++) {
5097ec681f3Smrg      uint32_t attachment_idx = subpass->color_attachments[i].attachment;
5107ec681f3Smrg
5117ec681f3Smrg      if (attachment_idx == VK_ATTACHMENT_UNUSED)
5127ec681f3Smrg         continue;
5137ec681f3Smrg
5147ec681f3Smrg      const struct v3dv_render_pass_attachment *attachment =
5157ec681f3Smrg         &state->pass->attachments[attachment_idx];
5167ec681f3Smrg
5177ec681f3Smrg      assert(state->job->first_subpass >= attachment->first_subpass);
5187ec681f3Smrg      assert(state->subpass_idx >= attachment->first_subpass);
5197ec681f3Smrg      assert(state->subpass_idx <= attachment->last_subpass);
5207ec681f3Smrg
5217ec681f3Smrg      /* Only clear once on the first subpass that uses the attachment */
5227ec681f3Smrg      uint32_t first_subpass = !pass->multiview_enabled ?
5237ec681f3Smrg         attachment->first_subpass :
5247ec681f3Smrg         attachment->views[layer].first_subpass;
5257ec681f3Smrg
5267ec681f3Smrg      bool needs_clear =
5277ec681f3Smrg         check_needs_clear(state,
5287ec681f3Smrg                           VK_IMAGE_ASPECT_COLOR_BIT,
5297ec681f3Smrg                           first_subpass,
5307ec681f3Smrg                           attachment->desc.loadOp,
5317ec681f3Smrg                           false);
5327ec681f3Smrg
5337ec681f3Smrg      /* Skip the last store if it is not required  */
5347ec681f3Smrg      uint32_t last_subpass = !pass->multiview_enabled ?
5357ec681f3Smrg         attachment->last_subpass :
5367ec681f3Smrg         attachment->views[layer].last_subpass;
5377ec681f3Smrg
5387ec681f3Smrg      bool needs_store =
5397ec681f3Smrg         check_needs_store(state,
5407ec681f3Smrg                           VK_IMAGE_ASPECT_COLOR_BIT,
5417ec681f3Smrg                           last_subpass,
5427ec681f3Smrg                           attachment->desc.storeOp);
5437ec681f3Smrg
5447ec681f3Smrg      /* If we need to resolve this attachment emit that store first. Notice
5457ec681f3Smrg       * that we must not request a tile buffer clear here in that case, since
5467ec681f3Smrg       * that would clear the tile buffer before we get to emit the actual
5477ec681f3Smrg       * color attachment store below, since the clear happens after the
5487ec681f3Smrg       * store is completed.
5497ec681f3Smrg       *
5507ec681f3Smrg       * If the attachment doesn't support TLB resolves then we will have to
5517ec681f3Smrg       * fallback to doing the resolve in a shader separately after this
5527ec681f3Smrg       * job, so we will need to store the multisampled sttachment even if that
5537ec681f3Smrg       * wansn't requested by the client.
5547ec681f3Smrg       */
5557ec681f3Smrg      const bool needs_resolve =
5567ec681f3Smrg         subpass->resolve_attachments &&
5577ec681f3Smrg         subpass->resolve_attachments[i].attachment != VK_ATTACHMENT_UNUSED;
5587ec681f3Smrg      if (needs_resolve && attachment->use_tlb_resolve) {
5597ec681f3Smrg         const uint32_t resolve_attachment_idx =
5607ec681f3Smrg            subpass->resolve_attachments[i].attachment;
5617ec681f3Smrg         cmd_buffer_render_pass_emit_store(cmd_buffer, cl,
5627ec681f3Smrg                                           resolve_attachment_idx, layer,
5637ec681f3Smrg                                           RENDER_TARGET_0 + i,
5647ec681f3Smrg                                           false, true);
5657ec681f3Smrg         has_stores = true;
5667ec681f3Smrg      } else if (needs_resolve) {
5677ec681f3Smrg         needs_store = true;
5687ec681f3Smrg      }
5697ec681f3Smrg
5707ec681f3Smrg      /* Emit the color attachment store if needed */
5717ec681f3Smrg      if (needs_store) {
5727ec681f3Smrg         cmd_buffer_render_pass_emit_store(cmd_buffer, cl,
5737ec681f3Smrg                                           attachment_idx, layer,
5747ec681f3Smrg                                           RENDER_TARGET_0 + i,
5757ec681f3Smrg                                           needs_clear && !use_global_rt_clear,
5767ec681f3Smrg                                           false);
5777ec681f3Smrg         has_stores = true;
5787ec681f3Smrg      } else if (needs_clear) {
5797ec681f3Smrg         use_global_rt_clear = true;
5807ec681f3Smrg      }
5817ec681f3Smrg   }
5827ec681f3Smrg
5837ec681f3Smrg   /* We always need to emit at least one dummy store */
5847ec681f3Smrg   if (!has_stores) {
5857ec681f3Smrg      cl_emit(cl, STORE_TILE_BUFFER_GENERAL, store) {
5867ec681f3Smrg         store.buffer_to_store = NONE;
5877ec681f3Smrg      }
5887ec681f3Smrg   }
5897ec681f3Smrg
5907ec681f3Smrg   /* If we have any depth/stencil clears we can't use the per-buffer clear
5917ec681f3Smrg    * bit and instead we have to emit a single clear of all tile buffers.
5927ec681f3Smrg    */
5937ec681f3Smrg   if (use_global_zs_clear || use_global_rt_clear) {
5947ec681f3Smrg      cl_emit(cl, CLEAR_TILE_BUFFERS, clear) {
5957ec681f3Smrg         clear.clear_z_stencil_buffer = use_global_zs_clear;
5967ec681f3Smrg         clear.clear_all_render_targets = use_global_rt_clear;
5977ec681f3Smrg      }
5987ec681f3Smrg   }
5997ec681f3Smrg}
6007ec681f3Smrg
6017ec681f3Smrgstatic void
6027ec681f3Smrgcmd_buffer_render_pass_emit_per_tile_rcl(struct v3dv_cmd_buffer *cmd_buffer,
6037ec681f3Smrg                                         uint32_t layer)
6047ec681f3Smrg{
6057ec681f3Smrg   struct v3dv_job *job = cmd_buffer->state.job;
6067ec681f3Smrg   assert(job);
6077ec681f3Smrg
6087ec681f3Smrg   /* Emit the generic list in our indirect state -- the rcl will just
6097ec681f3Smrg    * have pointers into it.
6107ec681f3Smrg    */
6117ec681f3Smrg   struct v3dv_cl *cl = &job->indirect;
6127ec681f3Smrg   v3dv_cl_ensure_space(cl, 200, 1);
6137ec681f3Smrg   v3dv_return_if_oom(cmd_buffer, NULL);
6147ec681f3Smrg
6157ec681f3Smrg   struct v3dv_cl_reloc tile_list_start = v3dv_cl_get_address(cl);
6167ec681f3Smrg
6177ec681f3Smrg   cl_emit(cl, TILE_COORDINATES_IMPLICIT, coords);
6187ec681f3Smrg
6197ec681f3Smrg   cmd_buffer_render_pass_emit_loads(cmd_buffer, cl, layer);
6207ec681f3Smrg
6217ec681f3Smrg   /* The binner starts out writing tiles assuming that the initial mode
6227ec681f3Smrg    * is triangles, so make sure that's the case.
6237ec681f3Smrg    */
6247ec681f3Smrg   cl_emit(cl, PRIM_LIST_FORMAT, fmt) {
6257ec681f3Smrg      fmt.primitive_type = LIST_TRIANGLES;
6267ec681f3Smrg   }
6277ec681f3Smrg
6287ec681f3Smrg   /* PTB assumes that value to be 0, but hw will not set it. */
6297ec681f3Smrg   cl_emit(cl, SET_INSTANCEID, set) {
6307ec681f3Smrg      set.instance_id = 0;
6317ec681f3Smrg   }
6327ec681f3Smrg
6337ec681f3Smrg   cl_emit(cl, BRANCH_TO_IMPLICIT_TILE_LIST, branch);
6347ec681f3Smrg
6357ec681f3Smrg   cmd_buffer_render_pass_emit_stores(cmd_buffer, cl, layer);
6367ec681f3Smrg
6377ec681f3Smrg   cl_emit(cl, END_OF_TILE_MARKER, end);
6387ec681f3Smrg
6397ec681f3Smrg   cl_emit(cl, RETURN_FROM_SUB_LIST, ret);
6407ec681f3Smrg
6417ec681f3Smrg   cl_emit(&job->rcl, START_ADDRESS_OF_GENERIC_TILE_LIST, branch) {
6427ec681f3Smrg      branch.start = tile_list_start;
6437ec681f3Smrg      branch.end = v3dv_cl_get_address(cl);
6447ec681f3Smrg   }
6457ec681f3Smrg}
6467ec681f3Smrg
6477ec681f3Smrgstatic void
6487ec681f3Smrgcmd_buffer_emit_render_pass_layer_rcl(struct v3dv_cmd_buffer *cmd_buffer,
6497ec681f3Smrg                                      uint32_t layer)
6507ec681f3Smrg{
6517ec681f3Smrg   const struct v3dv_cmd_buffer_state *state = &cmd_buffer->state;
6527ec681f3Smrg
6537ec681f3Smrg   struct v3dv_job *job = cmd_buffer->state.job;
6547ec681f3Smrg   struct v3dv_cl *rcl = &job->rcl;
6557ec681f3Smrg
6567ec681f3Smrg   /* If doing multicore binning, we would need to initialize each
6577ec681f3Smrg    * core's tile list here.
6587ec681f3Smrg    */
6597ec681f3Smrg   const struct v3dv_frame_tiling *tiling = &job->frame_tiling;
6607ec681f3Smrg   const uint32_t tile_alloc_offset =
6617ec681f3Smrg      64 * layer * tiling->draw_tiles_x * tiling->draw_tiles_y;
6627ec681f3Smrg   cl_emit(rcl, MULTICORE_RENDERING_TILE_LIST_SET_BASE, list) {
6637ec681f3Smrg      list.address = v3dv_cl_address(job->tile_alloc, tile_alloc_offset);
6647ec681f3Smrg   }
6657ec681f3Smrg
6667ec681f3Smrg   cmd_buffer_render_pass_emit_per_tile_rcl(cmd_buffer, layer);
6677ec681f3Smrg
6687ec681f3Smrg   uint32_t supertile_w_in_pixels =
6697ec681f3Smrg      tiling->tile_width * tiling->supertile_width;
6707ec681f3Smrg   uint32_t supertile_h_in_pixels =
6717ec681f3Smrg      tiling->tile_height * tiling->supertile_height;
6727ec681f3Smrg   const uint32_t min_x_supertile =
6737ec681f3Smrg      state->render_area.offset.x / supertile_w_in_pixels;
6747ec681f3Smrg   const uint32_t min_y_supertile =
6757ec681f3Smrg      state->render_area.offset.y / supertile_h_in_pixels;
6767ec681f3Smrg
6777ec681f3Smrg   uint32_t max_render_x = state->render_area.offset.x;
6787ec681f3Smrg   if (state->render_area.extent.width > 0)
6797ec681f3Smrg      max_render_x += state->render_area.extent.width - 1;
6807ec681f3Smrg   uint32_t max_render_y = state->render_area.offset.y;
6817ec681f3Smrg   if (state->render_area.extent.height > 0)
6827ec681f3Smrg      max_render_y += state->render_area.extent.height - 1;
6837ec681f3Smrg   const uint32_t max_x_supertile = max_render_x / supertile_w_in_pixels;
6847ec681f3Smrg   const uint32_t max_y_supertile = max_render_y / supertile_h_in_pixels;
6857ec681f3Smrg
6867ec681f3Smrg   for (int y = min_y_supertile; y <= max_y_supertile; y++) {
6877ec681f3Smrg      for (int x = min_x_supertile; x <= max_x_supertile; x++) {
6887ec681f3Smrg         cl_emit(rcl, SUPERTILE_COORDINATES, coords) {
6897ec681f3Smrg            coords.column_number_in_supertiles = x;
6907ec681f3Smrg            coords.row_number_in_supertiles = y;
6917ec681f3Smrg         }
6927ec681f3Smrg      }
6937ec681f3Smrg   }
6947ec681f3Smrg}
6957ec681f3Smrg
6967ec681f3Smrgstatic void
6977ec681f3Smrgset_rcl_early_z_config(struct v3dv_job *job,
6987ec681f3Smrg                       bool *early_z_disable,
6997ec681f3Smrg                       uint32_t *early_z_test_and_update_direction)
7007ec681f3Smrg{
7017ec681f3Smrg   /* If this is true then we have not emitted any draw calls in this job
7027ec681f3Smrg    * and we don't get any benefits form early Z.
7037ec681f3Smrg    */
7047ec681f3Smrg   if (!job->decided_global_ez_enable) {
7057ec681f3Smrg      assert(job->draw_count == 0);
7067ec681f3Smrg      *early_z_disable = true;
7077ec681f3Smrg      return;
7087ec681f3Smrg   }
7097ec681f3Smrg
7107ec681f3Smrg   switch (job->first_ez_state) {
7117ec681f3Smrg   case V3D_EZ_UNDECIDED:
7127ec681f3Smrg   case V3D_EZ_LT_LE:
7137ec681f3Smrg      *early_z_disable = false;
7147ec681f3Smrg      *early_z_test_and_update_direction = EARLY_Z_DIRECTION_LT_LE;
7157ec681f3Smrg      break;
7167ec681f3Smrg   case V3D_EZ_GT_GE:
7177ec681f3Smrg      *early_z_disable = false;
7187ec681f3Smrg      *early_z_test_and_update_direction = EARLY_Z_DIRECTION_GT_GE;
7197ec681f3Smrg      break;
7207ec681f3Smrg   case V3D_EZ_DISABLED:
7217ec681f3Smrg      *early_z_disable = true;
7227ec681f3Smrg      break;
7237ec681f3Smrg   }
7247ec681f3Smrg}
7257ec681f3Smrg
7267ec681f3Smrgvoid
7277ec681f3Smrgv3dX(cmd_buffer_emit_render_pass_rcl)(struct v3dv_cmd_buffer *cmd_buffer)
7287ec681f3Smrg{
7297ec681f3Smrg   struct v3dv_job *job = cmd_buffer->state.job;
7307ec681f3Smrg   assert(job);
7317ec681f3Smrg
7327ec681f3Smrg   const struct v3dv_cmd_buffer_state *state = &cmd_buffer->state;
7337ec681f3Smrg   const struct v3dv_framebuffer *framebuffer = state->framebuffer;
7347ec681f3Smrg
7357ec681f3Smrg   /* We can't emit the RCL until we have a framebuffer, which we may not have
7367ec681f3Smrg    * if we are recording a secondary command buffer. In that case, we will
7377ec681f3Smrg    * have to wait until vkCmdExecuteCommands is called from a primary command
7387ec681f3Smrg    * buffer.
7397ec681f3Smrg    */
7407ec681f3Smrg   if (!framebuffer) {
7417ec681f3Smrg      assert(cmd_buffer->level == VK_COMMAND_BUFFER_LEVEL_SECONDARY);
7427ec681f3Smrg      return;
7437ec681f3Smrg   }
7447ec681f3Smrg
7457ec681f3Smrg   const struct v3dv_frame_tiling *tiling = &job->frame_tiling;
7467ec681f3Smrg
7477ec681f3Smrg   const uint32_t fb_layers = job->frame_tiling.layers;
7487ec681f3Smrg
7497ec681f3Smrg   v3dv_cl_ensure_space_with_branch(&job->rcl, 200 +
7507ec681f3Smrg                                    MAX2(fb_layers, 1) * 256 *
7517ec681f3Smrg                                    cl_packet_length(SUPERTILE_COORDINATES));
7527ec681f3Smrg   v3dv_return_if_oom(cmd_buffer, NULL);
7537ec681f3Smrg
7547ec681f3Smrg   assert(state->subpass_idx < state->pass->subpass_count);
7557ec681f3Smrg   const struct v3dv_render_pass *pass = state->pass;
7567ec681f3Smrg   const struct v3dv_subpass *subpass = &pass->subpasses[state->subpass_idx];
7577ec681f3Smrg   struct v3dv_cl *rcl = &job->rcl;
7587ec681f3Smrg
7597ec681f3Smrg   /* Comon config must be the first TILE_RENDERING_MODE_CFG and
7607ec681f3Smrg    * Z_STENCIL_CLEAR_VALUES must be last. The ones in between are optional
7617ec681f3Smrg    * updates to the previous HW state.
7627ec681f3Smrg    */
7637ec681f3Smrg   bool do_early_zs_clear = false;
7647ec681f3Smrg   const uint32_t ds_attachment_idx = subpass->ds_attachment.attachment;
7657ec681f3Smrg   cl_emit(rcl, TILE_RENDERING_MODE_CFG_COMMON, config) {
7667ec681f3Smrg      config.image_width_pixels = framebuffer->width;
7677ec681f3Smrg      config.image_height_pixels = framebuffer->height;
7687ec681f3Smrg      config.number_of_render_targets = MAX2(subpass->color_count, 1);
7697ec681f3Smrg      config.multisample_mode_4x = tiling->msaa;
7707ec681f3Smrg      config.maximum_bpp_of_all_render_targets = tiling->internal_bpp;
7717ec681f3Smrg
7727ec681f3Smrg      if (ds_attachment_idx != VK_ATTACHMENT_UNUSED) {
7737ec681f3Smrg         const struct v3dv_image_view *iview =
7747ec681f3Smrg            framebuffer->attachments[ds_attachment_idx];
7757ec681f3Smrg         config.internal_depth_type = iview->internal_type;
7767ec681f3Smrg
7777ec681f3Smrg         set_rcl_early_z_config(job,
7787ec681f3Smrg                                &config.early_z_disable,
7797ec681f3Smrg                                &config.early_z_test_and_update_direction);
7807ec681f3Smrg
7817ec681f3Smrg         /* Early-Z/S clear can be enabled if the job is clearing and not
7827ec681f3Smrg          * storing (or loading) depth. If a stencil aspect is also present
7837ec681f3Smrg          * we have the same requirements for it, however, in this case we
7847ec681f3Smrg          * can accept stencil loadOp DONT_CARE as well, so instead of
7857ec681f3Smrg          * checking that stencil is cleared we check that is not loaded.
7867ec681f3Smrg          *
7877ec681f3Smrg          * Early-Z/S clearing is independent of Early Z/S testing, so it is
7887ec681f3Smrg          * possible to enable one but not the other so long as their
7897ec681f3Smrg          * respective requirements are met.
7907ec681f3Smrg          */
7917ec681f3Smrg         struct v3dv_render_pass_attachment *ds_attachment =
7927ec681f3Smrg            &pass->attachments[ds_attachment_idx];
7937ec681f3Smrg
7947ec681f3Smrg         const VkImageAspectFlags ds_aspects =
7957ec681f3Smrg            vk_format_aspects(ds_attachment->desc.format);
7967ec681f3Smrg
7977ec681f3Smrg         bool needs_depth_clear =
7987ec681f3Smrg            check_needs_clear(state,
7997ec681f3Smrg                              ds_aspects & VK_IMAGE_ASPECT_DEPTH_BIT,
8007ec681f3Smrg                              ds_attachment->first_subpass,
8017ec681f3Smrg                              ds_attachment->desc.loadOp,
8027ec681f3Smrg                              subpass->do_depth_clear_with_draw);
8037ec681f3Smrg
8047ec681f3Smrg         bool needs_depth_store =
8057ec681f3Smrg            check_needs_store(state,
8067ec681f3Smrg                              ds_aspects & VK_IMAGE_ASPECT_DEPTH_BIT,
8077ec681f3Smrg                              ds_attachment->last_subpass,
8087ec681f3Smrg                              ds_attachment->desc.storeOp);
8097ec681f3Smrg
8107ec681f3Smrg         do_early_zs_clear = needs_depth_clear && !needs_depth_store;
8117ec681f3Smrg         if (do_early_zs_clear &&
8127ec681f3Smrg             vk_format_has_stencil(ds_attachment->desc.format)) {
8137ec681f3Smrg            bool needs_stencil_load =
8147ec681f3Smrg               check_needs_load(state,
8157ec681f3Smrg                                ds_aspects & VK_IMAGE_ASPECT_STENCIL_BIT,
8167ec681f3Smrg                                ds_attachment->first_subpass,
8177ec681f3Smrg                                ds_attachment->desc.stencilLoadOp);
8187ec681f3Smrg
8197ec681f3Smrg            bool needs_stencil_store =
8207ec681f3Smrg               check_needs_store(state,
8217ec681f3Smrg                                 ds_aspects & VK_IMAGE_ASPECT_STENCIL_BIT,
8227ec681f3Smrg                                 ds_attachment->last_subpass,
8237ec681f3Smrg                                 ds_attachment->desc.stencilStoreOp);
8247ec681f3Smrg
8257ec681f3Smrg            do_early_zs_clear = !needs_stencil_load && !needs_stencil_store;
8267ec681f3Smrg         }
8277ec681f3Smrg
8287ec681f3Smrg         config.early_depth_stencil_clear = do_early_zs_clear;
8297ec681f3Smrg      } else {
8307ec681f3Smrg         config.early_z_disable = true;
8317ec681f3Smrg      }
8327ec681f3Smrg   }
8337ec681f3Smrg
8347ec681f3Smrg   /* If we enabled early Z/S clear, then we can't emit any "Clear Tile Buffers"
8357ec681f3Smrg    * commands with the Z/S bit set, so keep track of whether we enabled this
8367ec681f3Smrg    * in the job so we can skip these later.
8377ec681f3Smrg    */
8387ec681f3Smrg   job->early_zs_clear = do_early_zs_clear;
8397ec681f3Smrg
8407ec681f3Smrg   for (uint32_t i = 0; i < subpass->color_count; i++) {
8417ec681f3Smrg      uint32_t attachment_idx = subpass->color_attachments[i].attachment;
8427ec681f3Smrg      if (attachment_idx == VK_ATTACHMENT_UNUSED)
8437ec681f3Smrg         continue;
8447ec681f3Smrg
8457ec681f3Smrg      struct v3dv_image_view *iview =
8467ec681f3Smrg         state->framebuffer->attachments[attachment_idx];
8477ec681f3Smrg
8487ec681f3Smrg      const struct v3dv_image *image = (struct v3dv_image *) iview->vk.image;
8497ec681f3Smrg      const struct v3d_resource_slice *slice =
8507ec681f3Smrg         &image->slices[iview->vk.base_mip_level];
8517ec681f3Smrg
8527ec681f3Smrg      const uint32_t *clear_color =
8537ec681f3Smrg         &state->attachments[attachment_idx].clear_value.color[0];
8547ec681f3Smrg
8557ec681f3Smrg      uint32_t clear_pad = 0;
8567ec681f3Smrg      if (slice->tiling == V3D_TILING_UIF_NO_XOR ||
8577ec681f3Smrg          slice->tiling == V3D_TILING_UIF_XOR) {
8587ec681f3Smrg         int uif_block_height = v3d_utile_height(image->cpp) * 2;
8597ec681f3Smrg
8607ec681f3Smrg         uint32_t implicit_padded_height =
8617ec681f3Smrg            align(framebuffer->height, uif_block_height) / uif_block_height;
8627ec681f3Smrg
8637ec681f3Smrg         if (slice->padded_height_of_output_image_in_uif_blocks -
8647ec681f3Smrg             implicit_padded_height >= 15) {
8657ec681f3Smrg            clear_pad = slice->padded_height_of_output_image_in_uif_blocks;
8667ec681f3Smrg         }
8677ec681f3Smrg      }
8687ec681f3Smrg
8697ec681f3Smrg      cl_emit(rcl, TILE_RENDERING_MODE_CFG_CLEAR_COLORS_PART1, clear) {
8707ec681f3Smrg         clear.clear_color_low_32_bits = clear_color[0];
8717ec681f3Smrg         clear.clear_color_next_24_bits = clear_color[1] & 0xffffff;
8727ec681f3Smrg         clear.render_target_number = i;
8737ec681f3Smrg      };
8747ec681f3Smrg
8757ec681f3Smrg      if (iview->internal_bpp >= V3D_INTERNAL_BPP_64) {
8767ec681f3Smrg         cl_emit(rcl, TILE_RENDERING_MODE_CFG_CLEAR_COLORS_PART2, clear) {
8777ec681f3Smrg            clear.clear_color_mid_low_32_bits =
8787ec681f3Smrg               ((clear_color[1] >> 24) | (clear_color[2] << 8));
8797ec681f3Smrg            clear.clear_color_mid_high_24_bits =
8807ec681f3Smrg               ((clear_color[2] >> 24) | ((clear_color[3] & 0xffff) << 8));
8817ec681f3Smrg            clear.render_target_number = i;
8827ec681f3Smrg         };
8837ec681f3Smrg      }
8847ec681f3Smrg
8857ec681f3Smrg      if (iview->internal_bpp >= V3D_INTERNAL_BPP_128 || clear_pad) {
8867ec681f3Smrg         cl_emit(rcl, TILE_RENDERING_MODE_CFG_CLEAR_COLORS_PART3, clear) {
8877ec681f3Smrg            clear.uif_padded_height_in_uif_blocks = clear_pad;
8887ec681f3Smrg            clear.clear_color_high_16_bits = clear_color[3] >> 16;
8897ec681f3Smrg            clear.render_target_number = i;
8907ec681f3Smrg         };
8917ec681f3Smrg      }
8927ec681f3Smrg   }
8937ec681f3Smrg
8947ec681f3Smrg   cl_emit(rcl, TILE_RENDERING_MODE_CFG_COLOR, rt) {
8957ec681f3Smrg      v3dX(cmd_buffer_render_pass_setup_render_target)
8967ec681f3Smrg         (cmd_buffer, 0, &rt.render_target_0_internal_bpp,
8977ec681f3Smrg          &rt.render_target_0_internal_type, &rt.render_target_0_clamp);
8987ec681f3Smrg      v3dX(cmd_buffer_render_pass_setup_render_target)
8997ec681f3Smrg         (cmd_buffer, 1, &rt.render_target_1_internal_bpp,
9007ec681f3Smrg          &rt.render_target_1_internal_type, &rt.render_target_1_clamp);
9017ec681f3Smrg      v3dX(cmd_buffer_render_pass_setup_render_target)
9027ec681f3Smrg         (cmd_buffer, 2, &rt.render_target_2_internal_bpp,
9037ec681f3Smrg          &rt.render_target_2_internal_type, &rt.render_target_2_clamp);
9047ec681f3Smrg      v3dX(cmd_buffer_render_pass_setup_render_target)
9057ec681f3Smrg         (cmd_buffer, 3, &rt.render_target_3_internal_bpp,
9067ec681f3Smrg          &rt.render_target_3_internal_type, &rt.render_target_3_clamp);
9077ec681f3Smrg   }
9087ec681f3Smrg
9097ec681f3Smrg   /* Ends rendering mode config. */
9107ec681f3Smrg   if (ds_attachment_idx != VK_ATTACHMENT_UNUSED) {
9117ec681f3Smrg      cl_emit(rcl, TILE_RENDERING_MODE_CFG_ZS_CLEAR_VALUES, clear) {
9127ec681f3Smrg         clear.z_clear_value =
9137ec681f3Smrg            state->attachments[ds_attachment_idx].clear_value.z;
9147ec681f3Smrg         clear.stencil_clear_value =
9157ec681f3Smrg            state->attachments[ds_attachment_idx].clear_value.s;
9167ec681f3Smrg      };
9177ec681f3Smrg   } else {
9187ec681f3Smrg      cl_emit(rcl, TILE_RENDERING_MODE_CFG_ZS_CLEAR_VALUES, clear) {
9197ec681f3Smrg         clear.z_clear_value = 1.0f;
9207ec681f3Smrg         clear.stencil_clear_value = 0;
9217ec681f3Smrg      };
9227ec681f3Smrg   }
9237ec681f3Smrg
9247ec681f3Smrg   /* Always set initial block size before the first branch, which needs
9257ec681f3Smrg    * to match the value from binning mode config.
9267ec681f3Smrg    */
9277ec681f3Smrg   cl_emit(rcl, TILE_LIST_INITIAL_BLOCK_SIZE, init) {
9287ec681f3Smrg      init.use_auto_chained_tile_lists = true;
9297ec681f3Smrg      init.size_of_first_block_in_chained_tile_lists =
9307ec681f3Smrg         TILE_ALLOCATION_BLOCK_SIZE_64B;
9317ec681f3Smrg   }
9327ec681f3Smrg
9337ec681f3Smrg   cl_emit(rcl, MULTICORE_RENDERING_SUPERTILE_CFG, config) {
9347ec681f3Smrg      config.number_of_bin_tile_lists = 1;
9357ec681f3Smrg      config.total_frame_width_in_tiles = tiling->draw_tiles_x;
9367ec681f3Smrg      config.total_frame_height_in_tiles = tiling->draw_tiles_y;
9377ec681f3Smrg
9387ec681f3Smrg      config.supertile_width_in_tiles = tiling->supertile_width;
9397ec681f3Smrg      config.supertile_height_in_tiles = tiling->supertile_height;
9407ec681f3Smrg
9417ec681f3Smrg      config.total_frame_width_in_supertiles =
9427ec681f3Smrg         tiling->frame_width_in_supertiles;
9437ec681f3Smrg      config.total_frame_height_in_supertiles =
9447ec681f3Smrg         tiling->frame_height_in_supertiles;
9457ec681f3Smrg   }
9467ec681f3Smrg
9477ec681f3Smrg   /* Start by clearing the tile buffer. */
9487ec681f3Smrg   cl_emit(rcl, TILE_COORDINATES, coords) {
9497ec681f3Smrg      coords.tile_column_number = 0;
9507ec681f3Smrg      coords.tile_row_number = 0;
9517ec681f3Smrg   }
9527ec681f3Smrg
9537ec681f3Smrg   /* Emit an initial clear of the tile buffers. This is necessary
9547ec681f3Smrg    * for any buffers that should be cleared (since clearing
9557ec681f3Smrg    * normally happens at the *end* of the generic tile list), but
9567ec681f3Smrg    * it's also nice to clear everything so the first tile doesn't
9577ec681f3Smrg    * inherit any contents from some previous frame.
9587ec681f3Smrg    *
9597ec681f3Smrg    * Also, implement the GFXH-1742 workaround. There's a race in
9607ec681f3Smrg    * the HW between the RCL updating the TLB's internal type/size
9617ec681f3Smrg    * and the spawning of the QPU instances using the TLB's current
9627ec681f3Smrg    * internal type/size. To make sure the QPUs get the right
9637ec681f3Smrg    * state, we need 1 dummy store in between internal type/size
9647ec681f3Smrg    * changes on V3D 3.x, and 2 dummy stores on 4.x.
9657ec681f3Smrg    */
9667ec681f3Smrg   for (int i = 0; i < 2; i++) {
9677ec681f3Smrg      if (i > 0)
9687ec681f3Smrg         cl_emit(rcl, TILE_COORDINATES, coords);
9697ec681f3Smrg      cl_emit(rcl, END_OF_LOADS, end);
9707ec681f3Smrg      cl_emit(rcl, STORE_TILE_BUFFER_GENERAL, store) {
9717ec681f3Smrg         store.buffer_to_store = NONE;
9727ec681f3Smrg      }
9737ec681f3Smrg      if (i == 0 && cmd_buffer->state.tile_aligned_render_area) {
9747ec681f3Smrg         cl_emit(rcl, CLEAR_TILE_BUFFERS, clear) {
9757ec681f3Smrg            clear.clear_z_stencil_buffer = !job->early_zs_clear;
9767ec681f3Smrg            clear.clear_all_render_targets = true;
9777ec681f3Smrg         }
9787ec681f3Smrg      }
9797ec681f3Smrg      cl_emit(rcl, END_OF_TILE_MARKER, end);
9807ec681f3Smrg   }
9817ec681f3Smrg
9827ec681f3Smrg   cl_emit(rcl, FLUSH_VCD_CACHE, flush);
9837ec681f3Smrg
9847ec681f3Smrg   for (int layer = 0; layer < MAX2(1, fb_layers); layer++) {
9857ec681f3Smrg      if (subpass->view_mask == 0 || (subpass->view_mask & (1u << layer)))
9867ec681f3Smrg         cmd_buffer_emit_render_pass_layer_rcl(cmd_buffer, layer);
9877ec681f3Smrg   }
9887ec681f3Smrg
9897ec681f3Smrg   cl_emit(rcl, END_OF_RENDERING, end);
9907ec681f3Smrg}
9917ec681f3Smrg
9927ec681f3Smrgvoid
9937ec681f3Smrgv3dX(cmd_buffer_emit_viewport)(struct v3dv_cmd_buffer *cmd_buffer)
9947ec681f3Smrg{
9957ec681f3Smrg   struct v3dv_dynamic_state *dynamic = &cmd_buffer->state.dynamic;
9967ec681f3Smrg   /* FIXME: right now we only support one viewport. viewporst[0] would work
9977ec681f3Smrg    * now, would need to change if we allow multiple viewports
9987ec681f3Smrg    */
9997ec681f3Smrg   float *vptranslate = dynamic->viewport.translate[0];
10007ec681f3Smrg   float *vpscale = dynamic->viewport.scale[0];
10017ec681f3Smrg
10027ec681f3Smrg   struct v3dv_job *job = cmd_buffer->state.job;
10037ec681f3Smrg   assert(job);
10047ec681f3Smrg
10057ec681f3Smrg   const uint32_t required_cl_size =
10067ec681f3Smrg      cl_packet_length(CLIPPER_XY_SCALING) +
10077ec681f3Smrg      cl_packet_length(CLIPPER_Z_SCALE_AND_OFFSET) +
10087ec681f3Smrg      cl_packet_length(CLIPPER_Z_MIN_MAX_CLIPPING_PLANES) +
10097ec681f3Smrg      cl_packet_length(VIEWPORT_OFFSET);
10107ec681f3Smrg   v3dv_cl_ensure_space_with_branch(&job->bcl, required_cl_size);
10117ec681f3Smrg   v3dv_return_if_oom(cmd_buffer, NULL);
10127ec681f3Smrg
10137ec681f3Smrg   cl_emit(&job->bcl, CLIPPER_XY_SCALING, clip) {
10147ec681f3Smrg      clip.viewport_half_width_in_1_256th_of_pixel = vpscale[0] * 256.0f;
10157ec681f3Smrg      clip.viewport_half_height_in_1_256th_of_pixel = vpscale[1] * 256.0f;
10167ec681f3Smrg   }
10177ec681f3Smrg
10187ec681f3Smrg   cl_emit(&job->bcl, CLIPPER_Z_SCALE_AND_OFFSET, clip) {
10197ec681f3Smrg      clip.viewport_z_offset_zc_to_zs = vptranslate[2];
10207ec681f3Smrg      clip.viewport_z_scale_zc_to_zs = vpscale[2];
10217ec681f3Smrg   }
10227ec681f3Smrg   cl_emit(&job->bcl, CLIPPER_Z_MIN_MAX_CLIPPING_PLANES, clip) {
10237ec681f3Smrg      /* Vulkan's Z NDC is [0..1], unlile OpenGL which is [-1, 1] */
10247ec681f3Smrg      float z1 = vptranslate[2];
10257ec681f3Smrg      float z2 = vptranslate[2] + vpscale[2];
10267ec681f3Smrg      clip.minimum_zw = MIN2(z1, z2);
10277ec681f3Smrg      clip.maximum_zw = MAX2(z1, z2);
10287ec681f3Smrg   }
10297ec681f3Smrg
10307ec681f3Smrg   cl_emit(&job->bcl, VIEWPORT_OFFSET, vp) {
10317ec681f3Smrg      vp.viewport_centre_x_coordinate = vptranslate[0];
10327ec681f3Smrg      vp.viewport_centre_y_coordinate = vptranslate[1];
10337ec681f3Smrg   }
10347ec681f3Smrg
10357ec681f3Smrg   cmd_buffer->state.dirty &= ~V3DV_CMD_DIRTY_VIEWPORT;
10367ec681f3Smrg}
10377ec681f3Smrg
10387ec681f3Smrgvoid
10397ec681f3Smrgv3dX(cmd_buffer_emit_stencil)(struct v3dv_cmd_buffer *cmd_buffer)
10407ec681f3Smrg{
10417ec681f3Smrg   struct v3dv_job *job = cmd_buffer->state.job;
10427ec681f3Smrg   assert(job);
10437ec681f3Smrg
10447ec681f3Smrg   struct v3dv_pipeline *pipeline = cmd_buffer->state.gfx.pipeline;
10457ec681f3Smrg   struct v3dv_dynamic_state *dynamic_state = &cmd_buffer->state.dynamic;
10467ec681f3Smrg
10477ec681f3Smrg   const uint32_t dynamic_stencil_states = V3DV_DYNAMIC_STENCIL_COMPARE_MASK |
10487ec681f3Smrg      V3DV_DYNAMIC_STENCIL_WRITE_MASK |
10497ec681f3Smrg      V3DV_DYNAMIC_STENCIL_REFERENCE;
10507ec681f3Smrg
10517ec681f3Smrg   v3dv_cl_ensure_space_with_branch(&job->bcl,
10527ec681f3Smrg                                    2 * cl_packet_length(STENCIL_CFG));
10537ec681f3Smrg   v3dv_return_if_oom(cmd_buffer, NULL);
10547ec681f3Smrg
10557ec681f3Smrg   bool emitted_stencil = false;
10567ec681f3Smrg   for (uint32_t i = 0; i < 2; i++) {
10577ec681f3Smrg      if (pipeline->emit_stencil_cfg[i]) {
10587ec681f3Smrg         if (dynamic_state->mask & dynamic_stencil_states) {
10597ec681f3Smrg            cl_emit_with_prepacked(&job->bcl, STENCIL_CFG,
10607ec681f3Smrg                                   pipeline->stencil_cfg[i], config) {
10617ec681f3Smrg               if (dynamic_state->mask & V3DV_DYNAMIC_STENCIL_COMPARE_MASK) {
10627ec681f3Smrg                  config.stencil_test_mask =
10637ec681f3Smrg                     i == 0 ? dynamic_state->stencil_compare_mask.front :
10647ec681f3Smrg                     dynamic_state->stencil_compare_mask.back;
10657ec681f3Smrg               }
10667ec681f3Smrg               if (dynamic_state->mask & V3DV_DYNAMIC_STENCIL_WRITE_MASK) {
10677ec681f3Smrg                  config.stencil_write_mask =
10687ec681f3Smrg                     i == 0 ? dynamic_state->stencil_write_mask.front :
10697ec681f3Smrg                     dynamic_state->stencil_write_mask.back;
10707ec681f3Smrg               }
10717ec681f3Smrg               if (dynamic_state->mask & V3DV_DYNAMIC_STENCIL_REFERENCE) {
10727ec681f3Smrg                  config.stencil_ref_value =
10737ec681f3Smrg                     i == 0 ? dynamic_state->stencil_reference.front :
10747ec681f3Smrg                     dynamic_state->stencil_reference.back;
10757ec681f3Smrg               }
10767ec681f3Smrg            }
10777ec681f3Smrg         } else {
10787ec681f3Smrg            cl_emit_prepacked(&job->bcl, &pipeline->stencil_cfg[i]);
10797ec681f3Smrg         }
10807ec681f3Smrg
10817ec681f3Smrg         emitted_stencil = true;
10827ec681f3Smrg      }
10837ec681f3Smrg   }
10847ec681f3Smrg
10857ec681f3Smrg   if (emitted_stencil) {
10867ec681f3Smrg      const uint32_t dynamic_stencil_dirty_flags =
10877ec681f3Smrg         V3DV_CMD_DIRTY_STENCIL_COMPARE_MASK |
10887ec681f3Smrg         V3DV_CMD_DIRTY_STENCIL_WRITE_MASK |
10897ec681f3Smrg         V3DV_CMD_DIRTY_STENCIL_REFERENCE;
10907ec681f3Smrg      cmd_buffer->state.dirty &= ~dynamic_stencil_dirty_flags;
10917ec681f3Smrg   }
10927ec681f3Smrg}
10937ec681f3Smrg
10947ec681f3Smrgvoid
10957ec681f3Smrgv3dX(cmd_buffer_emit_depth_bias)(struct v3dv_cmd_buffer *cmd_buffer)
10967ec681f3Smrg{
10977ec681f3Smrg   struct v3dv_pipeline *pipeline = cmd_buffer->state.gfx.pipeline;
10987ec681f3Smrg   assert(pipeline);
10997ec681f3Smrg
11007ec681f3Smrg   if (!pipeline->depth_bias.enabled)
11017ec681f3Smrg      return;
11027ec681f3Smrg
11037ec681f3Smrg   struct v3dv_job *job = cmd_buffer->state.job;
11047ec681f3Smrg   assert(job);
11057ec681f3Smrg
11067ec681f3Smrg   v3dv_cl_ensure_space_with_branch(&job->bcl, cl_packet_length(DEPTH_OFFSET));
11077ec681f3Smrg   v3dv_return_if_oom(cmd_buffer, NULL);
11087ec681f3Smrg
11097ec681f3Smrg   struct v3dv_dynamic_state *dynamic = &cmd_buffer->state.dynamic;
11107ec681f3Smrg   cl_emit(&job->bcl, DEPTH_OFFSET, bias) {
11117ec681f3Smrg      bias.depth_offset_factor = dynamic->depth_bias.slope_factor;
11127ec681f3Smrg      bias.depth_offset_units = dynamic->depth_bias.constant_factor;
11137ec681f3Smrg      if (pipeline->depth_bias.is_z16)
11147ec681f3Smrg         bias.depth_offset_units *= 256.0f;
11157ec681f3Smrg      bias.limit = dynamic->depth_bias.depth_bias_clamp;
11167ec681f3Smrg   }
11177ec681f3Smrg
11187ec681f3Smrg   cmd_buffer->state.dirty &= ~V3DV_CMD_DIRTY_DEPTH_BIAS;
11197ec681f3Smrg}
11207ec681f3Smrg
11217ec681f3Smrgvoid
11227ec681f3Smrgv3dX(cmd_buffer_emit_line_width)(struct v3dv_cmd_buffer *cmd_buffer)
11237ec681f3Smrg{
11247ec681f3Smrg   struct v3dv_job *job = cmd_buffer->state.job;
11257ec681f3Smrg   assert(job);
11267ec681f3Smrg
11277ec681f3Smrg   v3dv_cl_ensure_space_with_branch(&job->bcl, cl_packet_length(LINE_WIDTH));
11287ec681f3Smrg   v3dv_return_if_oom(cmd_buffer, NULL);
11297ec681f3Smrg
11307ec681f3Smrg   cl_emit(&job->bcl, LINE_WIDTH, line) {
11317ec681f3Smrg      line.line_width = cmd_buffer->state.dynamic.line_width;
11327ec681f3Smrg   }
11337ec681f3Smrg
11347ec681f3Smrg   cmd_buffer->state.dirty &= ~V3DV_CMD_DIRTY_LINE_WIDTH;
11357ec681f3Smrg}
11367ec681f3Smrg
11377ec681f3Smrgvoid
11387ec681f3Smrgv3dX(cmd_buffer_emit_sample_state)(struct v3dv_cmd_buffer *cmd_buffer)
11397ec681f3Smrg{
11407ec681f3Smrg   struct v3dv_pipeline *pipeline = cmd_buffer->state.gfx.pipeline;
11417ec681f3Smrg   assert(pipeline);
11427ec681f3Smrg
11437ec681f3Smrg   struct v3dv_job *job = cmd_buffer->state.job;
11447ec681f3Smrg   assert(job);
11457ec681f3Smrg
11467ec681f3Smrg   v3dv_cl_ensure_space_with_branch(&job->bcl, cl_packet_length(SAMPLE_STATE));
11477ec681f3Smrg   v3dv_return_if_oom(cmd_buffer, NULL);
11487ec681f3Smrg
11497ec681f3Smrg   cl_emit(&job->bcl, SAMPLE_STATE, state) {
11507ec681f3Smrg      state.coverage = 1.0f;
11517ec681f3Smrg      state.mask = pipeline->sample_mask;
11527ec681f3Smrg   }
11537ec681f3Smrg}
11547ec681f3Smrg
11557ec681f3Smrgvoid
11567ec681f3Smrgv3dX(cmd_buffer_emit_blend)(struct v3dv_cmd_buffer *cmd_buffer)
11577ec681f3Smrg{
11587ec681f3Smrg   struct v3dv_job *job = cmd_buffer->state.job;
11597ec681f3Smrg   assert(job);
11607ec681f3Smrg
11617ec681f3Smrg   struct v3dv_pipeline *pipeline = cmd_buffer->state.gfx.pipeline;
11627ec681f3Smrg   assert(pipeline);
11637ec681f3Smrg
11647ec681f3Smrg   const uint32_t blend_packets_size =
11657ec681f3Smrg      cl_packet_length(BLEND_ENABLES) +
11667ec681f3Smrg      cl_packet_length(BLEND_CONSTANT_COLOR) +
11677ec681f3Smrg      cl_packet_length(BLEND_CFG) * V3D_MAX_DRAW_BUFFERS;
11687ec681f3Smrg
11697ec681f3Smrg   v3dv_cl_ensure_space_with_branch(&job->bcl, blend_packets_size);
11707ec681f3Smrg   v3dv_return_if_oom(cmd_buffer, NULL);
11717ec681f3Smrg
11727ec681f3Smrg   if (cmd_buffer->state.dirty & V3DV_CMD_DIRTY_PIPELINE) {
11737ec681f3Smrg      if (pipeline->blend.enables) {
11747ec681f3Smrg         cl_emit(&job->bcl, BLEND_ENABLES, enables) {
11757ec681f3Smrg            enables.mask = pipeline->blend.enables;
11767ec681f3Smrg         }
11777ec681f3Smrg      }
11787ec681f3Smrg
11797ec681f3Smrg      for (uint32_t i = 0; i < V3D_MAX_DRAW_BUFFERS; i++) {
11807ec681f3Smrg         if (pipeline->blend.enables & (1 << i))
11817ec681f3Smrg            cl_emit_prepacked(&job->bcl, &pipeline->blend.cfg[i]);
11827ec681f3Smrg      }
11837ec681f3Smrg   }
11847ec681f3Smrg
11857ec681f3Smrg   if (pipeline->blend.needs_color_constants &&
11867ec681f3Smrg       cmd_buffer->state.dirty & V3DV_CMD_DIRTY_BLEND_CONSTANTS) {
11877ec681f3Smrg      struct v3dv_dynamic_state *dynamic = &cmd_buffer->state.dynamic;
11887ec681f3Smrg      cl_emit(&job->bcl, BLEND_CONSTANT_COLOR, color) {
11897ec681f3Smrg         color.red_f16 = _mesa_float_to_half(dynamic->blend_constants[0]);
11907ec681f3Smrg         color.green_f16 = _mesa_float_to_half(dynamic->blend_constants[1]);
11917ec681f3Smrg         color.blue_f16 = _mesa_float_to_half(dynamic->blend_constants[2]);
11927ec681f3Smrg         color.alpha_f16 = _mesa_float_to_half(dynamic->blend_constants[3]);
11937ec681f3Smrg      }
11947ec681f3Smrg      cmd_buffer->state.dirty &= ~V3DV_CMD_DIRTY_BLEND_CONSTANTS;
11957ec681f3Smrg   }
11967ec681f3Smrg}
11977ec681f3Smrg
11987ec681f3Smrgvoid
11997ec681f3Smrgv3dX(cmd_buffer_emit_color_write_mask)(struct v3dv_cmd_buffer *cmd_buffer)
12007ec681f3Smrg{
12017ec681f3Smrg   struct v3dv_job *job = cmd_buffer->state.job;
12027ec681f3Smrg   v3dv_cl_ensure_space_with_branch(&job->bcl, cl_packet_length(COLOR_WRITE_MASKS));
12037ec681f3Smrg
12047ec681f3Smrg   struct v3dv_pipeline *pipeline = cmd_buffer->state.gfx.pipeline;
12057ec681f3Smrg   struct v3dv_dynamic_state *dynamic = &cmd_buffer->state.dynamic;
12067ec681f3Smrg   cl_emit(&job->bcl, COLOR_WRITE_MASKS, mask) {
12077ec681f3Smrg      mask.mask = (~dynamic->color_write_enable |
12087ec681f3Smrg                   pipeline->blend.color_write_masks) & 0xffff;
12097ec681f3Smrg   }
12107ec681f3Smrg
12117ec681f3Smrg   cmd_buffer->state.dirty &= ~V3DV_CMD_DIRTY_COLOR_WRITE_ENABLE;
12127ec681f3Smrg}
12137ec681f3Smrg
12147ec681f3Smrgstatic void
12157ec681f3Smrgemit_flat_shade_flags(struct v3dv_job *job,
12167ec681f3Smrg                      int varying_offset,
12177ec681f3Smrg                      uint32_t varyings,
12187ec681f3Smrg                      enum V3DX(Varying_Flags_Action) lower,
12197ec681f3Smrg                      enum V3DX(Varying_Flags_Action) higher)
12207ec681f3Smrg{
12217ec681f3Smrg   v3dv_cl_ensure_space_with_branch(&job->bcl,
12227ec681f3Smrg                                    cl_packet_length(FLAT_SHADE_FLAGS));
12237ec681f3Smrg   v3dv_return_if_oom(NULL, job);
12247ec681f3Smrg
12257ec681f3Smrg   cl_emit(&job->bcl, FLAT_SHADE_FLAGS, flags) {
12267ec681f3Smrg      flags.varying_offset_v0 = varying_offset;
12277ec681f3Smrg      flags.flat_shade_flags_for_varyings_v024 = varyings;
12287ec681f3Smrg      flags.action_for_flat_shade_flags_of_lower_numbered_varyings = lower;
12297ec681f3Smrg      flags.action_for_flat_shade_flags_of_higher_numbered_varyings = higher;
12307ec681f3Smrg   }
12317ec681f3Smrg}
12327ec681f3Smrg
12337ec681f3Smrgstatic void
12347ec681f3Smrgemit_noperspective_flags(struct v3dv_job *job,
12357ec681f3Smrg                         int varying_offset,
12367ec681f3Smrg                         uint32_t varyings,
12377ec681f3Smrg                         enum V3DX(Varying_Flags_Action) lower,
12387ec681f3Smrg                         enum V3DX(Varying_Flags_Action) higher)
12397ec681f3Smrg{
12407ec681f3Smrg   v3dv_cl_ensure_space_with_branch(&job->bcl,
12417ec681f3Smrg                                    cl_packet_length(NON_PERSPECTIVE_FLAGS));
12427ec681f3Smrg   v3dv_return_if_oom(NULL, job);
12437ec681f3Smrg
12447ec681f3Smrg   cl_emit(&job->bcl, NON_PERSPECTIVE_FLAGS, flags) {
12457ec681f3Smrg      flags.varying_offset_v0 = varying_offset;
12467ec681f3Smrg      flags.non_perspective_flags_for_varyings_v024 = varyings;
12477ec681f3Smrg      flags.action_for_non_perspective_flags_of_lower_numbered_varyings = lower;
12487ec681f3Smrg      flags.action_for_non_perspective_flags_of_higher_numbered_varyings = higher;
12497ec681f3Smrg   }
12507ec681f3Smrg}
12517ec681f3Smrg
12527ec681f3Smrgstatic void
12537ec681f3Smrgemit_centroid_flags(struct v3dv_job *job,
12547ec681f3Smrg                    int varying_offset,
12557ec681f3Smrg                    uint32_t varyings,
12567ec681f3Smrg                    enum V3DX(Varying_Flags_Action) lower,
12577ec681f3Smrg                    enum V3DX(Varying_Flags_Action) higher)
12587ec681f3Smrg{
12597ec681f3Smrg   v3dv_cl_ensure_space_with_branch(&job->bcl,
12607ec681f3Smrg                                    cl_packet_length(CENTROID_FLAGS));
12617ec681f3Smrg   v3dv_return_if_oom(NULL, job);
12627ec681f3Smrg
12637ec681f3Smrg   cl_emit(&job->bcl, CENTROID_FLAGS, flags) {
12647ec681f3Smrg      flags.varying_offset_v0 = varying_offset;
12657ec681f3Smrg      flags.centroid_flags_for_varyings_v024 = varyings;
12667ec681f3Smrg      flags.action_for_centroid_flags_of_lower_numbered_varyings = lower;
12677ec681f3Smrg      flags.action_for_centroid_flags_of_higher_numbered_varyings = higher;
12687ec681f3Smrg   }
12697ec681f3Smrg}
12707ec681f3Smrg
12717ec681f3Smrgstatic bool
12727ec681f3Smrgemit_varying_flags(struct v3dv_job *job,
12737ec681f3Smrg                   uint32_t num_flags,
12747ec681f3Smrg                   const uint32_t *flags,
12757ec681f3Smrg                   void (*flag_emit_callback)(struct v3dv_job *job,
12767ec681f3Smrg                                              int varying_offset,
12777ec681f3Smrg                                              uint32_t flags,
12787ec681f3Smrg                                              enum V3DX(Varying_Flags_Action) lower,
12797ec681f3Smrg                                              enum V3DX(Varying_Flags_Action) higher))
12807ec681f3Smrg{
12817ec681f3Smrg   bool emitted_any = false;
12827ec681f3Smrg   for (int i = 0; i < num_flags; i++) {
12837ec681f3Smrg      if (!flags[i])
12847ec681f3Smrg         continue;
12857ec681f3Smrg
12867ec681f3Smrg      if (emitted_any) {
12877ec681f3Smrg         flag_emit_callback(job, i, flags[i],
12887ec681f3Smrg                            V3D_VARYING_FLAGS_ACTION_UNCHANGED,
12897ec681f3Smrg                            V3D_VARYING_FLAGS_ACTION_UNCHANGED);
12907ec681f3Smrg      } else if (i == 0) {
12917ec681f3Smrg         flag_emit_callback(job, i, flags[i],
12927ec681f3Smrg                            V3D_VARYING_FLAGS_ACTION_UNCHANGED,
12937ec681f3Smrg                            V3D_VARYING_FLAGS_ACTION_ZEROED);
12947ec681f3Smrg      } else {
12957ec681f3Smrg         flag_emit_callback(job, i, flags[i],
12967ec681f3Smrg                            V3D_VARYING_FLAGS_ACTION_ZEROED,
12977ec681f3Smrg                            V3D_VARYING_FLAGS_ACTION_ZEROED);
12987ec681f3Smrg      }
12997ec681f3Smrg
13007ec681f3Smrg      emitted_any = true;
13017ec681f3Smrg   }
13027ec681f3Smrg
13037ec681f3Smrg   return emitted_any;
13047ec681f3Smrg}
13057ec681f3Smrg
13067ec681f3Smrgvoid
13077ec681f3Smrgv3dX(cmd_buffer_emit_varyings_state)(struct v3dv_cmd_buffer *cmd_buffer)
13087ec681f3Smrg{
13097ec681f3Smrg   struct v3dv_job *job = cmd_buffer->state.job;
13107ec681f3Smrg   struct v3dv_pipeline *pipeline = cmd_buffer->state.gfx.pipeline;
13117ec681f3Smrg
13127ec681f3Smrg   struct v3d_fs_prog_data *prog_data_fs =
13137ec681f3Smrg      pipeline->shared_data->variants[BROADCOM_SHADER_FRAGMENT]->prog_data.fs;
13147ec681f3Smrg
13157ec681f3Smrg   const uint32_t num_flags =
13167ec681f3Smrg      ARRAY_SIZE(prog_data_fs->flat_shade_flags);
13177ec681f3Smrg   const uint32_t *flat_shade_flags = prog_data_fs->flat_shade_flags;
13187ec681f3Smrg   const uint32_t *noperspective_flags =  prog_data_fs->noperspective_flags;
13197ec681f3Smrg   const uint32_t *centroid_flags = prog_data_fs->centroid_flags;
13207ec681f3Smrg
13217ec681f3Smrg   if (!emit_varying_flags(job, num_flags, flat_shade_flags,
13227ec681f3Smrg                           emit_flat_shade_flags)) {
13237ec681f3Smrg      v3dv_cl_ensure_space_with_branch(
13247ec681f3Smrg         &job->bcl, cl_packet_length(ZERO_ALL_FLAT_SHADE_FLAGS));
13257ec681f3Smrg      v3dv_return_if_oom(cmd_buffer, NULL);
13267ec681f3Smrg
13277ec681f3Smrg      cl_emit(&job->bcl, ZERO_ALL_FLAT_SHADE_FLAGS, flags);
13287ec681f3Smrg   }
13297ec681f3Smrg
13307ec681f3Smrg   if (!emit_varying_flags(job, num_flags, noperspective_flags,
13317ec681f3Smrg                           emit_noperspective_flags)) {
13327ec681f3Smrg      v3dv_cl_ensure_space_with_branch(
13337ec681f3Smrg         &job->bcl, cl_packet_length(ZERO_ALL_NON_PERSPECTIVE_FLAGS));
13347ec681f3Smrg      v3dv_return_if_oom(cmd_buffer, NULL);
13357ec681f3Smrg
13367ec681f3Smrg      cl_emit(&job->bcl, ZERO_ALL_NON_PERSPECTIVE_FLAGS, flags);
13377ec681f3Smrg   }
13387ec681f3Smrg
13397ec681f3Smrg   if (!emit_varying_flags(job, num_flags, centroid_flags,
13407ec681f3Smrg                           emit_centroid_flags)) {
13417ec681f3Smrg      v3dv_cl_ensure_space_with_branch(
13427ec681f3Smrg         &job->bcl, cl_packet_length(ZERO_ALL_CENTROID_FLAGS));
13437ec681f3Smrg      v3dv_return_if_oom(cmd_buffer, NULL);
13447ec681f3Smrg
13457ec681f3Smrg      cl_emit(&job->bcl, ZERO_ALL_CENTROID_FLAGS, flags);
13467ec681f3Smrg   }
13477ec681f3Smrg}
13487ec681f3Smrg
13497ec681f3Smrgstatic void
13507ec681f3Smrgjob_update_ez_state(struct v3dv_job *job,
13517ec681f3Smrg                    struct v3dv_pipeline *pipeline,
13527ec681f3Smrg                    struct v3dv_cmd_buffer *cmd_buffer)
13537ec681f3Smrg{
13547ec681f3Smrg   /* If first_ez_state is V3D_EZ_DISABLED it means that we have already
13557ec681f3Smrg    * determined that we should disable EZ completely for all draw calls in
13567ec681f3Smrg    * this job. This will cause us to disable EZ for the entire job in the
13577ec681f3Smrg    * Tile Rendering Mode RCL packet and when we do that we need to make sure
13587ec681f3Smrg    * we never emit a draw call in the job with EZ enabled in the CFG_BITS
13597ec681f3Smrg    * packet, so ez_state must also be V3D_EZ_DISABLED;
13607ec681f3Smrg    */
13617ec681f3Smrg   if (job->first_ez_state == V3D_EZ_DISABLED) {
13627ec681f3Smrg      assert(job->ez_state == V3D_EZ_DISABLED);
13637ec681f3Smrg      return;
13647ec681f3Smrg   }
13657ec681f3Smrg
13667ec681f3Smrg   /* This is part of the pre draw call handling, so we should be inside a
13677ec681f3Smrg    * render pass.
13687ec681f3Smrg    */
13697ec681f3Smrg   assert(cmd_buffer->state.pass);
13707ec681f3Smrg
13717ec681f3Smrg   /* If this is the first time we update EZ state for this job we first check
13727ec681f3Smrg    * if there is anything that requires disabling it completely for the entire
13737ec681f3Smrg    * job (based on state that is not related to the current draw call and
13747ec681f3Smrg    * pipeline state).
13757ec681f3Smrg    */
13767ec681f3Smrg   if (!job->decided_global_ez_enable) {
13777ec681f3Smrg      job->decided_global_ez_enable = true;
13787ec681f3Smrg
13797ec681f3Smrg      struct v3dv_cmd_buffer_state *state = &cmd_buffer->state;
13807ec681f3Smrg      assert(state->subpass_idx < state->pass->subpass_count);
13817ec681f3Smrg      struct v3dv_subpass *subpass = &state->pass->subpasses[state->subpass_idx];
13827ec681f3Smrg      if (subpass->ds_attachment.attachment == VK_ATTACHMENT_UNUSED) {
13837ec681f3Smrg         job->first_ez_state = V3D_EZ_DISABLED;
13847ec681f3Smrg         job->ez_state = V3D_EZ_DISABLED;
13857ec681f3Smrg         return;
13867ec681f3Smrg      }
13877ec681f3Smrg
13887ec681f3Smrg      /* GFXH-1918: the early-z buffer may load incorrect depth values
13897ec681f3Smrg       * if the frame has odd width or height.
13907ec681f3Smrg       *
13917ec681f3Smrg       * So we need to disable EZ in this case.
13927ec681f3Smrg       */
13937ec681f3Smrg      const struct v3dv_render_pass_attachment *ds_attachment =
13947ec681f3Smrg         &state->pass->attachments[subpass->ds_attachment.attachment];
13957ec681f3Smrg
13967ec681f3Smrg      const VkImageAspectFlags ds_aspects =
13977ec681f3Smrg         vk_format_aspects(ds_attachment->desc.format);
13987ec681f3Smrg
13997ec681f3Smrg      bool needs_depth_load =
14007ec681f3Smrg         check_needs_load(state,
14017ec681f3Smrg                          ds_aspects & VK_IMAGE_ASPECT_DEPTH_BIT,
14027ec681f3Smrg                          ds_attachment->first_subpass,
14037ec681f3Smrg                          ds_attachment->desc.loadOp);
14047ec681f3Smrg
14057ec681f3Smrg      if (needs_depth_load) {
14067ec681f3Smrg         struct v3dv_framebuffer *fb = state->framebuffer;
14077ec681f3Smrg
14087ec681f3Smrg         if (!fb) {
14097ec681f3Smrg            assert(cmd_buffer->level == VK_COMMAND_BUFFER_LEVEL_SECONDARY);
14107ec681f3Smrg            perf_debug("Loading depth aspect in a secondary command buffer "
14117ec681f3Smrg                       "without framebuffer info disables early-z tests.\n");
14127ec681f3Smrg            job->first_ez_state = V3D_EZ_DISABLED;
14137ec681f3Smrg            job->ez_state = V3D_EZ_DISABLED;
14147ec681f3Smrg            return;
14157ec681f3Smrg         }
14167ec681f3Smrg
14177ec681f3Smrg         if (((fb->width % 2) != 0 || (fb->height % 2) != 0)) {
14187ec681f3Smrg            perf_debug("Loading depth aspect for framebuffer with odd width "
14197ec681f3Smrg                       "or height disables early-Z tests.\n");
14207ec681f3Smrg            job->first_ez_state = V3D_EZ_DISABLED;
14217ec681f3Smrg            job->ez_state = V3D_EZ_DISABLED;
14227ec681f3Smrg            return;
14237ec681f3Smrg         }
14247ec681f3Smrg      }
14257ec681f3Smrg   }
14267ec681f3Smrg
14277ec681f3Smrg   /* Otherwise, we can decide to selectively enable or disable EZ for draw
14287ec681f3Smrg    * calls using the CFG_BITS packet based on the bound pipeline state.
14297ec681f3Smrg    */
14307ec681f3Smrg
14317ec681f3Smrg   /* If the FS writes Z, then it may update against the chosen EZ direction */
14327ec681f3Smrg   struct v3dv_shader_variant *fs_variant =
14337ec681f3Smrg      pipeline->shared_data->variants[BROADCOM_SHADER_FRAGMENT];
14347ec681f3Smrg   if (fs_variant->prog_data.fs->writes_z) {
14357ec681f3Smrg      job->ez_state = V3D_EZ_DISABLED;
14367ec681f3Smrg      return;
14377ec681f3Smrg   }
14387ec681f3Smrg
14397ec681f3Smrg   switch (pipeline->ez_state) {
14407ec681f3Smrg   case V3D_EZ_UNDECIDED:
14417ec681f3Smrg      /* If the pipeline didn't pick a direction but didn't disable, then go
14427ec681f3Smrg       * along with the current EZ state. This allows EZ optimization for Z
14437ec681f3Smrg       * func == EQUAL or NEVER.
14447ec681f3Smrg       */
14457ec681f3Smrg      break;
14467ec681f3Smrg
14477ec681f3Smrg   case V3D_EZ_LT_LE:
14487ec681f3Smrg   case V3D_EZ_GT_GE:
14497ec681f3Smrg      /* If the pipeline picked a direction, then it needs to match the current
14507ec681f3Smrg       * direction if we've decided on one.
14517ec681f3Smrg       */
14527ec681f3Smrg      if (job->ez_state == V3D_EZ_UNDECIDED)
14537ec681f3Smrg         job->ez_state = pipeline->ez_state;
14547ec681f3Smrg      else if (job->ez_state != pipeline->ez_state)
14557ec681f3Smrg         job->ez_state = V3D_EZ_DISABLED;
14567ec681f3Smrg      break;
14577ec681f3Smrg
14587ec681f3Smrg   case V3D_EZ_DISABLED:
14597ec681f3Smrg      /* If the pipeline disables EZ because of a bad Z func or stencil
14607ec681f3Smrg       * operation, then we can't do any more EZ in this frame.
14617ec681f3Smrg       */
14627ec681f3Smrg      job->ez_state = V3D_EZ_DISABLED;
14637ec681f3Smrg      break;
14647ec681f3Smrg   }
14657ec681f3Smrg
14667ec681f3Smrg   if (job->first_ez_state == V3D_EZ_UNDECIDED &&
14677ec681f3Smrg       job->ez_state != V3D_EZ_DISABLED) {
14687ec681f3Smrg      job->first_ez_state = job->ez_state;
14697ec681f3Smrg   }
14707ec681f3Smrg}
14717ec681f3Smrg
14727ec681f3Smrgvoid
14737ec681f3Smrgv3dX(cmd_buffer_emit_configuration_bits)(struct v3dv_cmd_buffer *cmd_buffer)
14747ec681f3Smrg{
14757ec681f3Smrg   struct v3dv_job *job = cmd_buffer->state.job;
14767ec681f3Smrg   assert(job);
14777ec681f3Smrg
14787ec681f3Smrg   struct v3dv_pipeline *pipeline = cmd_buffer->state.gfx.pipeline;
14797ec681f3Smrg   assert(pipeline);
14807ec681f3Smrg
14817ec681f3Smrg   job_update_ez_state(job, pipeline, cmd_buffer);
14827ec681f3Smrg
14837ec681f3Smrg   v3dv_cl_ensure_space_with_branch(&job->bcl, cl_packet_length(CFG_BITS));
14847ec681f3Smrg   v3dv_return_if_oom(cmd_buffer, NULL);
14857ec681f3Smrg
14867ec681f3Smrg   cl_emit_with_prepacked(&job->bcl, CFG_BITS, pipeline->cfg_bits, config) {
14877ec681f3Smrg      config.early_z_enable = job->ez_state != V3D_EZ_DISABLED;
14887ec681f3Smrg      config.early_z_updates_enable = config.early_z_enable &&
14897ec681f3Smrg         pipeline->z_updates_enable;
14907ec681f3Smrg   }
14917ec681f3Smrg}
14927ec681f3Smrg
14937ec681f3Smrgvoid
14947ec681f3Smrgv3dX(cmd_buffer_emit_occlusion_query)(struct v3dv_cmd_buffer *cmd_buffer)
14957ec681f3Smrg{
14967ec681f3Smrg   struct v3dv_job *job = cmd_buffer->state.job;
14977ec681f3Smrg   assert(job);
14987ec681f3Smrg
14997ec681f3Smrg   v3dv_cl_ensure_space_with_branch(&job->bcl,
15007ec681f3Smrg                                    cl_packet_length(OCCLUSION_QUERY_COUNTER));
15017ec681f3Smrg   v3dv_return_if_oom(cmd_buffer, NULL);
15027ec681f3Smrg
15037ec681f3Smrg   cl_emit(&job->bcl, OCCLUSION_QUERY_COUNTER, counter) {
15047ec681f3Smrg      if (cmd_buffer->state.query.active_query.bo) {
15057ec681f3Smrg         counter.address =
15067ec681f3Smrg            v3dv_cl_address(cmd_buffer->state.query.active_query.bo,
15077ec681f3Smrg                            cmd_buffer->state.query.active_query.offset);
15087ec681f3Smrg      }
15097ec681f3Smrg   }
15107ec681f3Smrg
15117ec681f3Smrg   cmd_buffer->state.dirty &= ~V3DV_CMD_DIRTY_OCCLUSION_QUERY;
15127ec681f3Smrg}
15137ec681f3Smrg
15147ec681f3Smrgstatic struct v3dv_job *
15157ec681f3Smrgcmd_buffer_subpass_split_for_barrier(struct v3dv_cmd_buffer *cmd_buffer,
15167ec681f3Smrg                                     bool is_bcl_barrier)
15177ec681f3Smrg{
15187ec681f3Smrg   assert(cmd_buffer->state.subpass_idx != -1);
15197ec681f3Smrg   v3dv_cmd_buffer_finish_job(cmd_buffer);
15207ec681f3Smrg   struct v3dv_job *job =
15217ec681f3Smrg      v3dv_cmd_buffer_subpass_resume(cmd_buffer,
15227ec681f3Smrg                                     cmd_buffer->state.subpass_idx);
15237ec681f3Smrg   if (!job)
15247ec681f3Smrg      return NULL;
15257ec681f3Smrg
15267ec681f3Smrg   job->serialize = true;
15277ec681f3Smrg   job->needs_bcl_sync = is_bcl_barrier;
15287ec681f3Smrg   return job;
15297ec681f3Smrg}
15307ec681f3Smrg
15317ec681f3Smrgstatic void
15327ec681f3Smrgcmd_buffer_copy_secondary_end_query_state(struct v3dv_cmd_buffer *primary,
15337ec681f3Smrg                                          struct v3dv_cmd_buffer *secondary)
15347ec681f3Smrg{
15357ec681f3Smrg   struct v3dv_cmd_buffer_state *p_state = &primary->state;
15367ec681f3Smrg   struct v3dv_cmd_buffer_state *s_state = &secondary->state;
15377ec681f3Smrg
15387ec681f3Smrg   const uint32_t total_state_count =
15397ec681f3Smrg      p_state->query.end.used_count + s_state->query.end.used_count;
15407ec681f3Smrg   v3dv_cmd_buffer_ensure_array_state(primary,
15417ec681f3Smrg                                      sizeof(struct v3dv_end_query_cpu_job_info),
15427ec681f3Smrg                                      total_state_count,
15437ec681f3Smrg                                      &p_state->query.end.alloc_count,
15447ec681f3Smrg                                      (void **) &p_state->query.end.states);
15457ec681f3Smrg   v3dv_return_if_oom(primary, NULL);
15467ec681f3Smrg
15477ec681f3Smrg   for (uint32_t i = 0; i < s_state->query.end.used_count; i++) {
15487ec681f3Smrg      const struct v3dv_end_query_cpu_job_info *s_qstate =
15497ec681f3Smrg         &secondary->state.query.end.states[i];
15507ec681f3Smrg
15517ec681f3Smrg      struct v3dv_end_query_cpu_job_info *p_qstate =
15527ec681f3Smrg         &p_state->query.end.states[p_state->query.end.used_count++];
15537ec681f3Smrg
15547ec681f3Smrg      p_qstate->pool = s_qstate->pool;
15557ec681f3Smrg      p_qstate->query = s_qstate->query;
15567ec681f3Smrg   }
15577ec681f3Smrg}
15587ec681f3Smrg
15597ec681f3Smrgvoid
15607ec681f3Smrgv3dX(cmd_buffer_execute_inside_pass)(struct v3dv_cmd_buffer *primary,
15617ec681f3Smrg                                     uint32_t cmd_buffer_count,
15627ec681f3Smrg                                     const VkCommandBuffer *cmd_buffers)
15637ec681f3Smrg{
15647ec681f3Smrg   assert(primary->state.job);
15657ec681f3Smrg
15667ec681f3Smrg   /* Emit occlusion query state if needed so the draw calls inside our
15677ec681f3Smrg    * secondaries update the counters.
15687ec681f3Smrg    */
15697ec681f3Smrg   bool has_occlusion_query =
15707ec681f3Smrg      primary->state.dirty & V3DV_CMD_DIRTY_OCCLUSION_QUERY;
15717ec681f3Smrg   if (has_occlusion_query)
15727ec681f3Smrg      v3dX(cmd_buffer_emit_occlusion_query)(primary);
15737ec681f3Smrg
15747ec681f3Smrg   /* FIXME: if our primary job tiling doesn't enable MSSA but any of the
15757ec681f3Smrg    * pipelines used by the secondaries do, we need to re-start the primary
15767ec681f3Smrg    * job to enable MSAA. See cmd_buffer_restart_job_for_msaa_if_needed.
15777ec681f3Smrg    */
15787ec681f3Smrg   bool pending_barrier = false;
15797ec681f3Smrg   bool pending_bcl_barrier = false;
15807ec681f3Smrg   for (uint32_t i = 0; i < cmd_buffer_count; i++) {
15817ec681f3Smrg      V3DV_FROM_HANDLE(v3dv_cmd_buffer, secondary, cmd_buffers[i]);
15827ec681f3Smrg
15837ec681f3Smrg      assert(secondary->usage_flags &
15847ec681f3Smrg             VK_COMMAND_BUFFER_USAGE_RENDER_PASS_CONTINUE_BIT);
15857ec681f3Smrg
15867ec681f3Smrg      list_for_each_entry(struct v3dv_job, secondary_job,
15877ec681f3Smrg                          &secondary->jobs, list_link) {
15887ec681f3Smrg         if (secondary_job->type == V3DV_JOB_TYPE_GPU_CL_SECONDARY) {
15897ec681f3Smrg            /* If the job is a CL, then we branch to it from the primary BCL.
15907ec681f3Smrg             * In this case the secondary's BCL is finished with a
15917ec681f3Smrg             * RETURN_FROM_SUB_LIST command to return back to the primary BCL
15927ec681f3Smrg             * once we are done executing it.
15937ec681f3Smrg             */
15947ec681f3Smrg            assert(v3dv_cl_offset(&secondary_job->rcl) == 0);
15957ec681f3Smrg            assert(secondary_job->bcl.bo);
15967ec681f3Smrg
15977ec681f3Smrg            /* Sanity check that secondary BCL ends with RETURN_FROM_SUB_LIST */
15987ec681f3Smrg            STATIC_ASSERT(cl_packet_length(RETURN_FROM_SUB_LIST) == 1);
15997ec681f3Smrg            assert(v3dv_cl_offset(&secondary_job->bcl) >= 1);
16007ec681f3Smrg            assert(*(((uint8_t *)secondary_job->bcl.next) - 1) ==
16017ec681f3Smrg                   V3DX(RETURN_FROM_SUB_LIST_opcode));
16027ec681f3Smrg
16037ec681f3Smrg            /* If this secondary has any barriers (or we had any pending barrier
16047ec681f3Smrg             * to apply), then we can't just branch to it from the primary, we
16057ec681f3Smrg             * need to split the primary to create a new job that can consume
16067ec681f3Smrg             * the barriers first.
16077ec681f3Smrg             *
16087ec681f3Smrg             * FIXME: in this case, maybe just copy the secondary BCL without
16097ec681f3Smrg             * the RETURN_FROM_SUB_LIST into the primary job to skip the
16107ec681f3Smrg             * branch?
16117ec681f3Smrg             */
16127ec681f3Smrg            struct v3dv_job *primary_job = primary->state.job;
16137ec681f3Smrg            if (!primary_job || secondary_job->serialize || pending_barrier) {
16147ec681f3Smrg               const bool needs_bcl_barrier =
16157ec681f3Smrg                  secondary_job->needs_bcl_sync || pending_bcl_barrier;
16167ec681f3Smrg               primary_job =
16177ec681f3Smrg                  cmd_buffer_subpass_split_for_barrier(primary,
16187ec681f3Smrg                                                       needs_bcl_barrier);
16197ec681f3Smrg               v3dv_return_if_oom(primary, NULL);
16207ec681f3Smrg
16217ec681f3Smrg               /* Since we have created a new primary we need to re-emit
16227ec681f3Smrg                * occlusion query state.
16237ec681f3Smrg                */
16247ec681f3Smrg               if (has_occlusion_query)
16257ec681f3Smrg                  v3dX(cmd_buffer_emit_occlusion_query)(primary);
16267ec681f3Smrg            }
16277ec681f3Smrg
16287ec681f3Smrg            /* Make sure our primary job has all required BO references */
16297ec681f3Smrg            set_foreach(secondary_job->bos, entry) {
16307ec681f3Smrg               struct v3dv_bo *bo = (struct v3dv_bo *)entry->key;
16317ec681f3Smrg               v3dv_job_add_bo(primary_job, bo);
16327ec681f3Smrg            }
16337ec681f3Smrg
16347ec681f3Smrg            /* Emit required branch instructions. We expect each of these
16357ec681f3Smrg             * to end with a corresponding 'return from sub list' item.
16367ec681f3Smrg             */
16377ec681f3Smrg            list_for_each_entry(struct v3dv_bo, bcl_bo,
16387ec681f3Smrg                                &secondary_job->bcl.bo_list, list_link) {
16397ec681f3Smrg               v3dv_cl_ensure_space_with_branch(&primary_job->bcl,
16407ec681f3Smrg                                                cl_packet_length(BRANCH_TO_SUB_LIST));
16417ec681f3Smrg               v3dv_return_if_oom(primary, NULL);
16427ec681f3Smrg               cl_emit(&primary_job->bcl, BRANCH_TO_SUB_LIST, branch) {
16437ec681f3Smrg                  branch.address = v3dv_cl_address(bcl_bo, 0);
16447ec681f3Smrg               }
16457ec681f3Smrg            }
16467ec681f3Smrg
16477ec681f3Smrg            primary_job->tmu_dirty_rcl |= secondary_job->tmu_dirty_rcl;
16487ec681f3Smrg         } else {
16497ec681f3Smrg            /* This is a regular job (CPU or GPU), so just finish the current
16507ec681f3Smrg             * primary job (if any) and then add the secondary job to the
16517ec681f3Smrg             * primary's job list right after it.
16527ec681f3Smrg             */
16537ec681f3Smrg            v3dv_cmd_buffer_finish_job(primary);
16547ec681f3Smrg            v3dv_job_clone_in_cmd_buffer(secondary_job, primary);
16557ec681f3Smrg            if (pending_barrier) {
16567ec681f3Smrg               secondary_job->serialize = true;
16577ec681f3Smrg               if (pending_bcl_barrier)
16587ec681f3Smrg                  secondary_job->needs_bcl_sync = true;
16597ec681f3Smrg            }
16607ec681f3Smrg         }
16617ec681f3Smrg
16627ec681f3Smrg         pending_barrier = false;
16637ec681f3Smrg         pending_bcl_barrier = false;
16647ec681f3Smrg      }
16657ec681f3Smrg
16667ec681f3Smrg      /* If the secondary has recorded any vkCmdEndQuery commands, we need to
16677ec681f3Smrg       * copy this state to the primary so it is processed properly when the
16687ec681f3Smrg       * current primary job is finished.
16697ec681f3Smrg       */
16707ec681f3Smrg      cmd_buffer_copy_secondary_end_query_state(primary, secondary);
16717ec681f3Smrg
16727ec681f3Smrg      /* If this secondary had any pending barrier state we will need that
16737ec681f3Smrg       * barrier state consumed with whatever comes next in the primary.
16747ec681f3Smrg       */
16757ec681f3Smrg      assert(secondary->state.has_barrier || !secondary->state.has_bcl_barrier);
16767ec681f3Smrg      pending_barrier = secondary->state.has_barrier;
16777ec681f3Smrg      pending_bcl_barrier = secondary->state.has_bcl_barrier;
16787ec681f3Smrg   }
16797ec681f3Smrg
16807ec681f3Smrg   if (pending_barrier) {
16817ec681f3Smrg      primary->state.has_barrier = true;
16827ec681f3Smrg      primary->state.has_bcl_barrier |= pending_bcl_barrier;
16837ec681f3Smrg   }
16847ec681f3Smrg}
16857ec681f3Smrg
16867ec681f3Smrgstatic void
16877ec681f3Smrgemit_gs_shader_state_record(struct v3dv_job *job,
16887ec681f3Smrg                            struct v3dv_bo *assembly_bo,
16897ec681f3Smrg                            struct v3dv_shader_variant *gs_bin,
16907ec681f3Smrg                            struct v3dv_cl_reloc gs_bin_uniforms,
16917ec681f3Smrg                            struct v3dv_shader_variant *gs,
16927ec681f3Smrg                            struct v3dv_cl_reloc gs_render_uniforms)
16937ec681f3Smrg{
16947ec681f3Smrg   cl_emit(&job->indirect, GEOMETRY_SHADER_STATE_RECORD, shader) {
16957ec681f3Smrg      shader.geometry_bin_mode_shader_code_address =
16967ec681f3Smrg         v3dv_cl_address(assembly_bo, gs_bin->assembly_offset);
16977ec681f3Smrg      shader.geometry_bin_mode_shader_4_way_threadable =
16987ec681f3Smrg         gs_bin->prog_data.gs->base.threads == 4;
16997ec681f3Smrg      shader.geometry_bin_mode_shader_start_in_final_thread_section =
17007ec681f3Smrg         gs_bin->prog_data.gs->base.single_seg;
17017ec681f3Smrg      shader.geometry_bin_mode_shader_propagate_nans = true;
17027ec681f3Smrg      shader.geometry_bin_mode_shader_uniforms_address =
17037ec681f3Smrg         gs_bin_uniforms;
17047ec681f3Smrg
17057ec681f3Smrg      shader.geometry_render_mode_shader_code_address =
17067ec681f3Smrg         v3dv_cl_address(assembly_bo, gs->assembly_offset);
17077ec681f3Smrg      shader.geometry_render_mode_shader_4_way_threadable =
17087ec681f3Smrg         gs->prog_data.gs->base.threads == 4;
17097ec681f3Smrg      shader.geometry_render_mode_shader_start_in_final_thread_section =
17107ec681f3Smrg         gs->prog_data.gs->base.single_seg;
17117ec681f3Smrg      shader.geometry_render_mode_shader_propagate_nans = true;
17127ec681f3Smrg      shader.geometry_render_mode_shader_uniforms_address =
17137ec681f3Smrg         gs_render_uniforms;
17147ec681f3Smrg   }
17157ec681f3Smrg}
17167ec681f3Smrg
17177ec681f3Smrgstatic uint8_t
17187ec681f3Smrgv3d_gs_output_primitive(uint32_t prim_type)
17197ec681f3Smrg{
17207ec681f3Smrg    switch (prim_type) {
17217ec681f3Smrg    case GL_POINTS:
17227ec681f3Smrg        return GEOMETRY_SHADER_POINTS;
17237ec681f3Smrg    case GL_LINE_STRIP:
17247ec681f3Smrg        return GEOMETRY_SHADER_LINE_STRIP;
17257ec681f3Smrg    case GL_TRIANGLE_STRIP:
17267ec681f3Smrg        return GEOMETRY_SHADER_TRI_STRIP;
17277ec681f3Smrg    default:
17287ec681f3Smrg        unreachable("Unsupported primitive type");
17297ec681f3Smrg    }
17307ec681f3Smrg}
17317ec681f3Smrg
17327ec681f3Smrgstatic void
17337ec681f3Smrgemit_tes_gs_common_params(struct v3dv_job *job,
17347ec681f3Smrg                          uint8_t gs_out_prim_type,
17357ec681f3Smrg                          uint8_t gs_num_invocations)
17367ec681f3Smrg{
17377ec681f3Smrg   cl_emit(&job->indirect, TESSELLATION_GEOMETRY_COMMON_PARAMS, shader) {
17387ec681f3Smrg      shader.tessellation_type = TESSELLATION_TYPE_TRIANGLE;
17397ec681f3Smrg      shader.tessellation_point_mode = false;
17407ec681f3Smrg      shader.tessellation_edge_spacing = TESSELLATION_EDGE_SPACING_EVEN;
17417ec681f3Smrg      shader.tessellation_clockwise = true;
17427ec681f3Smrg      shader.tessellation_invocations = 1;
17437ec681f3Smrg
17447ec681f3Smrg      shader.geometry_shader_output_format =
17457ec681f3Smrg         v3d_gs_output_primitive(gs_out_prim_type);
17467ec681f3Smrg      shader.geometry_shader_instances = gs_num_invocations & 0x1F;
17477ec681f3Smrg   }
17487ec681f3Smrg}
17497ec681f3Smrg
17507ec681f3Smrgstatic uint8_t
17517ec681f3Smrgsimd_width_to_gs_pack_mode(uint32_t width)
17527ec681f3Smrg{
17537ec681f3Smrg   switch (width) {
17547ec681f3Smrg   case 16:
17557ec681f3Smrg      return V3D_PACK_MODE_16_WAY;
17567ec681f3Smrg   case 8:
17577ec681f3Smrg      return V3D_PACK_MODE_8_WAY;
17587ec681f3Smrg   case 4:
17597ec681f3Smrg      return V3D_PACK_MODE_4_WAY;
17607ec681f3Smrg   case 1:
17617ec681f3Smrg      return V3D_PACK_MODE_1_WAY;
17627ec681f3Smrg   default:
17637ec681f3Smrg      unreachable("Invalid SIMD width");
17647ec681f3Smrg   };
17657ec681f3Smrg}
17667ec681f3Smrg
17677ec681f3Smrgstatic void
17687ec681f3Smrgemit_tes_gs_shader_params(struct v3dv_job *job,
17697ec681f3Smrg                          uint32_t gs_simd,
17707ec681f3Smrg                          uint32_t gs_vpm_output_size,
17717ec681f3Smrg                          uint32_t gs_max_vpm_input_size_per_batch)
17727ec681f3Smrg{
17737ec681f3Smrg   cl_emit(&job->indirect, TESSELLATION_GEOMETRY_SHADER_PARAMS, shader) {
17747ec681f3Smrg      shader.tcs_batch_flush_mode = V3D_TCS_FLUSH_MODE_FULLY_PACKED;
17757ec681f3Smrg      shader.per_patch_data_column_depth = 1;
17767ec681f3Smrg      shader.tcs_output_segment_size_in_sectors = 1;
17777ec681f3Smrg      shader.tcs_output_segment_pack_mode = V3D_PACK_MODE_16_WAY;
17787ec681f3Smrg      shader.tes_output_segment_size_in_sectors = 1;
17797ec681f3Smrg      shader.tes_output_segment_pack_mode = V3D_PACK_MODE_16_WAY;
17807ec681f3Smrg      shader.gs_output_segment_size_in_sectors = gs_vpm_output_size;
17817ec681f3Smrg      shader.gs_output_segment_pack_mode =
17827ec681f3Smrg         simd_width_to_gs_pack_mode(gs_simd);
17837ec681f3Smrg      shader.tbg_max_patches_per_tcs_batch = 1;
17847ec681f3Smrg      shader.tbg_max_extra_vertex_segs_for_patches_after_first = 0;
17857ec681f3Smrg      shader.tbg_min_tcs_output_segments_required_in_play = 1;
17867ec681f3Smrg      shader.tbg_min_per_patch_data_segments_required_in_play = 1;
17877ec681f3Smrg      shader.tpg_max_patches_per_tes_batch = 1;
17887ec681f3Smrg      shader.tpg_max_vertex_segments_per_tes_batch = 0;
17897ec681f3Smrg      shader.tpg_max_tcs_output_segments_per_tes_batch = 1;
17907ec681f3Smrg      shader.tpg_min_tes_output_segments_required_in_play = 1;
17917ec681f3Smrg      shader.gbg_max_tes_output_vertex_segments_per_gs_batch =
17927ec681f3Smrg         gs_max_vpm_input_size_per_batch;
17937ec681f3Smrg      shader.gbg_min_gs_output_segments_required_in_play = 1;
17947ec681f3Smrg   }
17957ec681f3Smrg}
17967ec681f3Smrg
17977ec681f3Smrgvoid
17987ec681f3Smrgv3dX(cmd_buffer_emit_gl_shader_state)(struct v3dv_cmd_buffer *cmd_buffer)
17997ec681f3Smrg{
18007ec681f3Smrg   struct v3dv_job *job = cmd_buffer->state.job;
18017ec681f3Smrg   assert(job);
18027ec681f3Smrg
18037ec681f3Smrg   struct v3dv_cmd_buffer_state *state = &cmd_buffer->state;
18047ec681f3Smrg   struct v3dv_pipeline *pipeline = state->gfx.pipeline;
18057ec681f3Smrg   assert(pipeline);
18067ec681f3Smrg
18077ec681f3Smrg   struct v3dv_shader_variant *vs_variant =
18087ec681f3Smrg      pipeline->shared_data->variants[BROADCOM_SHADER_VERTEX];
18097ec681f3Smrg   struct v3d_vs_prog_data *prog_data_vs = vs_variant->prog_data.vs;
18107ec681f3Smrg
18117ec681f3Smrg   struct v3dv_shader_variant *vs_bin_variant =
18127ec681f3Smrg      pipeline->shared_data->variants[BROADCOM_SHADER_VERTEX_BIN];
18137ec681f3Smrg   struct v3d_vs_prog_data *prog_data_vs_bin = vs_bin_variant->prog_data.vs;
18147ec681f3Smrg
18157ec681f3Smrg   struct v3dv_shader_variant *fs_variant =
18167ec681f3Smrg      pipeline->shared_data->variants[BROADCOM_SHADER_FRAGMENT];
18177ec681f3Smrg   struct v3d_fs_prog_data *prog_data_fs = fs_variant->prog_data.fs;
18187ec681f3Smrg
18197ec681f3Smrg   struct v3dv_shader_variant *gs_variant = NULL;
18207ec681f3Smrg   struct v3dv_shader_variant *gs_bin_variant = NULL;
18217ec681f3Smrg   struct v3d_gs_prog_data *prog_data_gs = NULL;
18227ec681f3Smrg   struct v3d_gs_prog_data *prog_data_gs_bin = NULL;
18237ec681f3Smrg   if (pipeline->has_gs) {
18247ec681f3Smrg      gs_variant =
18257ec681f3Smrg         pipeline->shared_data->variants[BROADCOM_SHADER_GEOMETRY];
18267ec681f3Smrg      prog_data_gs = gs_variant->prog_data.gs;
18277ec681f3Smrg
18287ec681f3Smrg      gs_bin_variant =
18297ec681f3Smrg         pipeline->shared_data->variants[BROADCOM_SHADER_GEOMETRY_BIN];
18307ec681f3Smrg      prog_data_gs_bin = gs_bin_variant->prog_data.gs;
18317ec681f3Smrg   }
18327ec681f3Smrg
18337ec681f3Smrg   /* Update the cache dirty flag based on the shader progs data */
18347ec681f3Smrg   job->tmu_dirty_rcl |= prog_data_vs_bin->base.tmu_dirty_rcl;
18357ec681f3Smrg   job->tmu_dirty_rcl |= prog_data_vs->base.tmu_dirty_rcl;
18367ec681f3Smrg   job->tmu_dirty_rcl |= prog_data_fs->base.tmu_dirty_rcl;
18377ec681f3Smrg   if (pipeline->has_gs) {
18387ec681f3Smrg      job->tmu_dirty_rcl |= prog_data_gs_bin->base.tmu_dirty_rcl;
18397ec681f3Smrg      job->tmu_dirty_rcl |= prog_data_gs->base.tmu_dirty_rcl;
18407ec681f3Smrg   }
18417ec681f3Smrg
18427ec681f3Smrg   /* See GFXH-930 workaround below */
18437ec681f3Smrg   uint32_t num_elements_to_emit = MAX2(pipeline->va_count, 1);
18447ec681f3Smrg
18457ec681f3Smrg   uint32_t shader_state_record_length =
18467ec681f3Smrg      cl_packet_length(GL_SHADER_STATE_RECORD);
18477ec681f3Smrg   if (pipeline->has_gs) {
18487ec681f3Smrg      shader_state_record_length +=
18497ec681f3Smrg         cl_packet_length(GEOMETRY_SHADER_STATE_RECORD) +
18507ec681f3Smrg         cl_packet_length(TESSELLATION_GEOMETRY_COMMON_PARAMS) +
18517ec681f3Smrg         2 * cl_packet_length(TESSELLATION_GEOMETRY_SHADER_PARAMS);
18527ec681f3Smrg   }
18537ec681f3Smrg
18547ec681f3Smrg   uint32_t shader_rec_offset =
18557ec681f3Smrg      v3dv_cl_ensure_space(&job->indirect,
18567ec681f3Smrg                           shader_state_record_length +
18577ec681f3Smrg                           num_elements_to_emit *
18587ec681f3Smrg                           cl_packet_length(GL_SHADER_STATE_ATTRIBUTE_RECORD),
18597ec681f3Smrg                           32);
18607ec681f3Smrg   v3dv_return_if_oom(cmd_buffer, NULL);
18617ec681f3Smrg
18627ec681f3Smrg   struct v3dv_bo *assembly_bo = pipeline->shared_data->assembly_bo;
18637ec681f3Smrg
18647ec681f3Smrg   if (pipeline->has_gs) {
18657ec681f3Smrg      emit_gs_shader_state_record(job,
18667ec681f3Smrg                                  assembly_bo,
18677ec681f3Smrg                                  gs_bin_variant,
18687ec681f3Smrg                                  cmd_buffer->state.uniforms.gs_bin,
18697ec681f3Smrg                                  gs_variant,
18707ec681f3Smrg                                  cmd_buffer->state.uniforms.gs);
18717ec681f3Smrg
18727ec681f3Smrg      emit_tes_gs_common_params(job,
18737ec681f3Smrg                                prog_data_gs->out_prim_type,
18747ec681f3Smrg                                prog_data_gs->num_invocations);
18757ec681f3Smrg
18767ec681f3Smrg      emit_tes_gs_shader_params(job,
18777ec681f3Smrg                                pipeline->vpm_cfg_bin.gs_width,
18787ec681f3Smrg                                pipeline->vpm_cfg_bin.Gd,
18797ec681f3Smrg                                pipeline->vpm_cfg_bin.Gv);
18807ec681f3Smrg
18817ec681f3Smrg      emit_tes_gs_shader_params(job,
18827ec681f3Smrg                                pipeline->vpm_cfg.gs_width,
18837ec681f3Smrg                                pipeline->vpm_cfg.Gd,
18847ec681f3Smrg                                pipeline->vpm_cfg.Gv);
18857ec681f3Smrg   }
18867ec681f3Smrg
18877ec681f3Smrg   struct v3dv_bo *default_attribute_values =
18887ec681f3Smrg      pipeline->default_attribute_values != NULL ?
18897ec681f3Smrg      pipeline->default_attribute_values :
18907ec681f3Smrg      pipeline->device->default_attribute_float;
18917ec681f3Smrg
18927ec681f3Smrg   cl_emit_with_prepacked(&job->indirect, GL_SHADER_STATE_RECORD,
18937ec681f3Smrg                          pipeline->shader_state_record, shader) {
18947ec681f3Smrg
18957ec681f3Smrg      /* FIXME: we are setting this values here and during the
18967ec681f3Smrg       * prepacking. This is because both cl_emit_with_prepacked and v3dvx_pack
18977ec681f3Smrg       * asserts for minimum values of these. It would be good to get
18987ec681f3Smrg       * v3dvx_pack to assert on the final value if possible
18997ec681f3Smrg       */
19007ec681f3Smrg      shader.min_coord_shader_input_segments_required_in_play =
19017ec681f3Smrg         pipeline->vpm_cfg_bin.As;
19027ec681f3Smrg      shader.min_vertex_shader_input_segments_required_in_play =
19037ec681f3Smrg         pipeline->vpm_cfg.As;
19047ec681f3Smrg
19057ec681f3Smrg      shader.coordinate_shader_code_address =
19067ec681f3Smrg         v3dv_cl_address(assembly_bo, vs_bin_variant->assembly_offset);
19077ec681f3Smrg      shader.vertex_shader_code_address =
19087ec681f3Smrg         v3dv_cl_address(assembly_bo, vs_variant->assembly_offset);
19097ec681f3Smrg      shader.fragment_shader_code_address =
19107ec681f3Smrg         v3dv_cl_address(assembly_bo, fs_variant->assembly_offset);
19117ec681f3Smrg
19127ec681f3Smrg      shader.coordinate_shader_uniforms_address = cmd_buffer->state.uniforms.vs_bin;
19137ec681f3Smrg      shader.vertex_shader_uniforms_address = cmd_buffer->state.uniforms.vs;
19147ec681f3Smrg      shader.fragment_shader_uniforms_address = cmd_buffer->state.uniforms.fs;
19157ec681f3Smrg
19167ec681f3Smrg      shader.address_of_default_attribute_values =
19177ec681f3Smrg         v3dv_cl_address(default_attribute_values, 0);
19187ec681f3Smrg
19197ec681f3Smrg      shader.any_shader_reads_hardware_written_primitive_id =
19207ec681f3Smrg         (pipeline->has_gs && prog_data_gs->uses_pid) || prog_data_fs->uses_pid;
19217ec681f3Smrg      shader.insert_primitive_id_as_first_varying_to_fragment_shader =
19227ec681f3Smrg         !pipeline->has_gs && prog_data_fs->uses_pid;
19237ec681f3Smrg   }
19247ec681f3Smrg
19257ec681f3Smrg   /* Upload vertex element attributes (SHADER_STATE_ATTRIBUTE_RECORD) */
19267ec681f3Smrg   bool cs_loaded_any = false;
19277ec681f3Smrg   const bool cs_uses_builtins = prog_data_vs_bin->uses_iid ||
19287ec681f3Smrg                                 prog_data_vs_bin->uses_biid ||
19297ec681f3Smrg                                 prog_data_vs_bin->uses_vid;
19307ec681f3Smrg   const uint32_t packet_length =
19317ec681f3Smrg      cl_packet_length(GL_SHADER_STATE_ATTRIBUTE_RECORD);
19327ec681f3Smrg
19337ec681f3Smrg   uint32_t emitted_va_count = 0;
19347ec681f3Smrg   for (uint32_t i = 0; emitted_va_count < pipeline->va_count; i++) {
19357ec681f3Smrg      assert(i < MAX_VERTEX_ATTRIBS);
19367ec681f3Smrg
19377ec681f3Smrg      if (pipeline->va[i].vk_format == VK_FORMAT_UNDEFINED)
19387ec681f3Smrg         continue;
19397ec681f3Smrg
19407ec681f3Smrg      const uint32_t binding = pipeline->va[i].binding;
19417ec681f3Smrg
19427ec681f3Smrg      /* We store each vertex attribute in the array using its driver location
19437ec681f3Smrg       * as index.
19447ec681f3Smrg       */
19457ec681f3Smrg      const uint32_t location = i;
19467ec681f3Smrg
19477ec681f3Smrg      struct v3dv_vertex_binding *c_vb = &cmd_buffer->state.vertex_bindings[binding];
19487ec681f3Smrg
19497ec681f3Smrg      cl_emit_with_prepacked(&job->indirect, GL_SHADER_STATE_ATTRIBUTE_RECORD,
19507ec681f3Smrg                             &pipeline->vertex_attrs[i * packet_length], attr) {
19517ec681f3Smrg
19527ec681f3Smrg         assert(c_vb->buffer->mem->bo);
19537ec681f3Smrg         attr.address = v3dv_cl_address(c_vb->buffer->mem->bo,
19547ec681f3Smrg                                        c_vb->buffer->mem_offset +
19557ec681f3Smrg                                        pipeline->va[i].offset +
19567ec681f3Smrg                                        c_vb->offset);
19577ec681f3Smrg
19587ec681f3Smrg         attr.number_of_values_read_by_coordinate_shader =
19597ec681f3Smrg            prog_data_vs_bin->vattr_sizes[location];
19607ec681f3Smrg         attr.number_of_values_read_by_vertex_shader =
19617ec681f3Smrg            prog_data_vs->vattr_sizes[location];
19627ec681f3Smrg
19637ec681f3Smrg         /* GFXH-930: At least one attribute must be enabled and read by CS
19647ec681f3Smrg          * and VS.  If we have attributes being consumed by the VS but not
19657ec681f3Smrg          * the CS, then set up a dummy load of the last attribute into the
19667ec681f3Smrg          * CS's VPM inputs.  (Since CS is just dead-code-elimination compared
19677ec681f3Smrg          * to VS, we can't have CS loading but not VS).
19687ec681f3Smrg          *
19697ec681f3Smrg          * GFXH-1602: first attribute must be active if using builtins.
19707ec681f3Smrg          */
19717ec681f3Smrg         if (prog_data_vs_bin->vattr_sizes[location])
19727ec681f3Smrg            cs_loaded_any = true;
19737ec681f3Smrg
19747ec681f3Smrg         if (i == 0 && cs_uses_builtins && !cs_loaded_any) {
19757ec681f3Smrg            attr.number_of_values_read_by_coordinate_shader = 1;
19767ec681f3Smrg            cs_loaded_any = true;
19777ec681f3Smrg         } else if (i == pipeline->va_count - 1 && !cs_loaded_any) {
19787ec681f3Smrg            attr.number_of_values_read_by_coordinate_shader = 1;
19797ec681f3Smrg            cs_loaded_any = true;
19807ec681f3Smrg         }
19817ec681f3Smrg
19827ec681f3Smrg         attr.maximum_index = 0xffffff;
19837ec681f3Smrg      }
19847ec681f3Smrg
19857ec681f3Smrg      emitted_va_count++;
19867ec681f3Smrg   }
19877ec681f3Smrg
19887ec681f3Smrg   if (pipeline->va_count == 0) {
19897ec681f3Smrg      /* GFXH-930: At least one attribute must be enabled and read
19907ec681f3Smrg       * by CS and VS.  If we have no attributes being consumed by
19917ec681f3Smrg       * the shader, set up a dummy to be loaded into the VPM.
19927ec681f3Smrg       */
19937ec681f3Smrg      cl_emit(&job->indirect, GL_SHADER_STATE_ATTRIBUTE_RECORD, attr) {
19947ec681f3Smrg         /* Valid address of data whose value will be unused. */
19957ec681f3Smrg         attr.address = v3dv_cl_address(job->indirect.bo, 0);
19967ec681f3Smrg
19977ec681f3Smrg         attr.type = ATTRIBUTE_FLOAT;
19987ec681f3Smrg         attr.stride = 0;
19997ec681f3Smrg         attr.vec_size = 1;
20007ec681f3Smrg
20017ec681f3Smrg         attr.number_of_values_read_by_coordinate_shader = 1;
20027ec681f3Smrg         attr.number_of_values_read_by_vertex_shader = 1;
20037ec681f3Smrg      }
20047ec681f3Smrg   }
20057ec681f3Smrg
20067ec681f3Smrg   if (cmd_buffer->state.dirty & V3DV_CMD_DIRTY_PIPELINE) {
20077ec681f3Smrg      v3dv_cl_ensure_space_with_branch(&job->bcl,
20087ec681f3Smrg                                       sizeof(pipeline->vcm_cache_size));
20097ec681f3Smrg      v3dv_return_if_oom(cmd_buffer, NULL);
20107ec681f3Smrg
20117ec681f3Smrg      cl_emit_prepacked(&job->bcl, &pipeline->vcm_cache_size);
20127ec681f3Smrg   }
20137ec681f3Smrg
20147ec681f3Smrg   v3dv_cl_ensure_space_with_branch(&job->bcl,
20157ec681f3Smrg                                    cl_packet_length(GL_SHADER_STATE));
20167ec681f3Smrg   v3dv_return_if_oom(cmd_buffer, NULL);
20177ec681f3Smrg
20187ec681f3Smrg   if (pipeline->has_gs) {
20197ec681f3Smrg      cl_emit(&job->bcl, GL_SHADER_STATE_INCLUDING_GS, state) {
20207ec681f3Smrg         state.address = v3dv_cl_address(job->indirect.bo, shader_rec_offset);
20217ec681f3Smrg         state.number_of_attribute_arrays = num_elements_to_emit;
20227ec681f3Smrg      }
20237ec681f3Smrg   } else {
20247ec681f3Smrg      cl_emit(&job->bcl, GL_SHADER_STATE, state) {
20257ec681f3Smrg         state.address = v3dv_cl_address(job->indirect.bo, shader_rec_offset);
20267ec681f3Smrg         state.number_of_attribute_arrays = num_elements_to_emit;
20277ec681f3Smrg      }
20287ec681f3Smrg   }
20297ec681f3Smrg
20307ec681f3Smrg   cmd_buffer->state.dirty &= ~(V3DV_CMD_DIRTY_VERTEX_BUFFER |
20317ec681f3Smrg                                V3DV_CMD_DIRTY_DESCRIPTOR_SETS |
20327ec681f3Smrg                                V3DV_CMD_DIRTY_PUSH_CONSTANTS);
20337ec681f3Smrg   cmd_buffer->state.dirty_descriptor_stages &= ~VK_SHADER_STAGE_ALL_GRAPHICS;
20347ec681f3Smrg   cmd_buffer->state.dirty_push_constants_stages &= ~VK_SHADER_STAGE_ALL_GRAPHICS;
20357ec681f3Smrg}
20367ec681f3Smrg
20377ec681f3Smrg/* FIXME: C&P from v3dx_draw. Refactor to common place? */
20387ec681f3Smrgstatic uint32_t
20397ec681f3Smrgv3d_hw_prim_type(enum pipe_prim_type prim_type)
20407ec681f3Smrg{
20417ec681f3Smrg   switch (prim_type) {
20427ec681f3Smrg   case PIPE_PRIM_POINTS:
20437ec681f3Smrg   case PIPE_PRIM_LINES:
20447ec681f3Smrg   case PIPE_PRIM_LINE_LOOP:
20457ec681f3Smrg   case PIPE_PRIM_LINE_STRIP:
20467ec681f3Smrg   case PIPE_PRIM_TRIANGLES:
20477ec681f3Smrg   case PIPE_PRIM_TRIANGLE_STRIP:
20487ec681f3Smrg   case PIPE_PRIM_TRIANGLE_FAN:
20497ec681f3Smrg      return prim_type;
20507ec681f3Smrg
20517ec681f3Smrg   case PIPE_PRIM_LINES_ADJACENCY:
20527ec681f3Smrg   case PIPE_PRIM_LINE_STRIP_ADJACENCY:
20537ec681f3Smrg   case PIPE_PRIM_TRIANGLES_ADJACENCY:
20547ec681f3Smrg   case PIPE_PRIM_TRIANGLE_STRIP_ADJACENCY:
20557ec681f3Smrg      return 8 + (prim_type - PIPE_PRIM_LINES_ADJACENCY);
20567ec681f3Smrg
20577ec681f3Smrg   default:
20587ec681f3Smrg      unreachable("Unsupported primitive type");
20597ec681f3Smrg   }
20607ec681f3Smrg}
20617ec681f3Smrg
20627ec681f3Smrgvoid
20637ec681f3Smrgv3dX(cmd_buffer_emit_draw)(struct v3dv_cmd_buffer *cmd_buffer,
20647ec681f3Smrg                           struct v3dv_draw_info *info)
20657ec681f3Smrg{
20667ec681f3Smrg   struct v3dv_job *job = cmd_buffer->state.job;
20677ec681f3Smrg   assert(job);
20687ec681f3Smrg
20697ec681f3Smrg   struct v3dv_cmd_buffer_state *state = &cmd_buffer->state;
20707ec681f3Smrg   struct v3dv_pipeline *pipeline = state->gfx.pipeline;
20717ec681f3Smrg
20727ec681f3Smrg   assert(pipeline);
20737ec681f3Smrg
20747ec681f3Smrg   uint32_t hw_prim_type = v3d_hw_prim_type(pipeline->topology);
20757ec681f3Smrg
20767ec681f3Smrg   if (info->first_instance > 0) {
20777ec681f3Smrg      v3dv_cl_ensure_space_with_branch(
20787ec681f3Smrg         &job->bcl, cl_packet_length(BASE_VERTEX_BASE_INSTANCE));
20797ec681f3Smrg      v3dv_return_if_oom(cmd_buffer, NULL);
20807ec681f3Smrg
20817ec681f3Smrg      cl_emit(&job->bcl, BASE_VERTEX_BASE_INSTANCE, base) {
20827ec681f3Smrg         base.base_instance = info->first_instance;
20837ec681f3Smrg         base.base_vertex = 0;
20847ec681f3Smrg      }
20857ec681f3Smrg   }
20867ec681f3Smrg
20877ec681f3Smrg   if (info->instance_count > 1) {
20887ec681f3Smrg      v3dv_cl_ensure_space_with_branch(
20897ec681f3Smrg         &job->bcl, cl_packet_length(VERTEX_ARRAY_INSTANCED_PRIMS));
20907ec681f3Smrg      v3dv_return_if_oom(cmd_buffer, NULL);
20917ec681f3Smrg
20927ec681f3Smrg      cl_emit(&job->bcl, VERTEX_ARRAY_INSTANCED_PRIMS, prim) {
20937ec681f3Smrg         prim.mode = hw_prim_type;
20947ec681f3Smrg         prim.index_of_first_vertex = info->first_vertex;
20957ec681f3Smrg         prim.number_of_instances = info->instance_count;
20967ec681f3Smrg         prim.instance_length = info->vertex_count;
20977ec681f3Smrg      }
20987ec681f3Smrg   } else {
20997ec681f3Smrg      v3dv_cl_ensure_space_with_branch(
21007ec681f3Smrg         &job->bcl, cl_packet_length(VERTEX_ARRAY_PRIMS));
21017ec681f3Smrg      v3dv_return_if_oom(cmd_buffer, NULL);
21027ec681f3Smrg      cl_emit(&job->bcl, VERTEX_ARRAY_PRIMS, prim) {
21037ec681f3Smrg         prim.mode = hw_prim_type;
21047ec681f3Smrg         prim.length = info->vertex_count;
21057ec681f3Smrg         prim.index_of_first_vertex = info->first_vertex;
21067ec681f3Smrg      }
21077ec681f3Smrg   }
21087ec681f3Smrg}
21097ec681f3Smrg
21107ec681f3Smrgvoid
21117ec681f3Smrgv3dX(cmd_buffer_emit_index_buffer)(struct v3dv_cmd_buffer *cmd_buffer)
21127ec681f3Smrg{
21137ec681f3Smrg   struct v3dv_job *job = cmd_buffer->state.job;
21147ec681f3Smrg   assert(job);
21157ec681f3Smrg
21167ec681f3Smrg   /* We flag all state as dirty when we create a new job so make sure we
21177ec681f3Smrg    * have a valid index buffer before attempting to emit state for it.
21187ec681f3Smrg    */
21197ec681f3Smrg   struct v3dv_buffer *ibuffer =
21207ec681f3Smrg      v3dv_buffer_from_handle(cmd_buffer->state.index_buffer.buffer);
21217ec681f3Smrg   if (ibuffer) {
21227ec681f3Smrg      v3dv_cl_ensure_space_with_branch(
21237ec681f3Smrg         &job->bcl, cl_packet_length(INDEX_BUFFER_SETUP));
21247ec681f3Smrg      v3dv_return_if_oom(cmd_buffer, NULL);
21257ec681f3Smrg
21267ec681f3Smrg      const uint32_t offset = cmd_buffer->state.index_buffer.offset;
21277ec681f3Smrg      cl_emit(&job->bcl, INDEX_BUFFER_SETUP, ib) {
21287ec681f3Smrg         ib.address = v3dv_cl_address(ibuffer->mem->bo,
21297ec681f3Smrg                                      ibuffer->mem_offset + offset);
21307ec681f3Smrg         ib.size = ibuffer->mem->bo->size;
21317ec681f3Smrg      }
21327ec681f3Smrg   }
21337ec681f3Smrg
21347ec681f3Smrg   cmd_buffer->state.dirty &= ~V3DV_CMD_DIRTY_INDEX_BUFFER;
21357ec681f3Smrg}
21367ec681f3Smrg
21377ec681f3Smrgvoid
21387ec681f3Smrgv3dX(cmd_buffer_emit_draw_indexed)(struct v3dv_cmd_buffer *cmd_buffer,
21397ec681f3Smrg                                   uint32_t indexCount,
21407ec681f3Smrg                                   uint32_t instanceCount,
21417ec681f3Smrg                                   uint32_t firstIndex,
21427ec681f3Smrg                                   int32_t vertexOffset,
21437ec681f3Smrg                                   uint32_t firstInstance)
21447ec681f3Smrg{
21457ec681f3Smrg   struct v3dv_job *job = cmd_buffer->state.job;
21467ec681f3Smrg   assert(job);
21477ec681f3Smrg
21487ec681f3Smrg   const struct v3dv_pipeline *pipeline = cmd_buffer->state.gfx.pipeline;
21497ec681f3Smrg   uint32_t hw_prim_type = v3d_hw_prim_type(pipeline->topology);
21507ec681f3Smrg   uint8_t index_type = ffs(cmd_buffer->state.index_buffer.index_size) - 1;
21517ec681f3Smrg   uint32_t index_offset = firstIndex * cmd_buffer->state.index_buffer.index_size;
21527ec681f3Smrg
21537ec681f3Smrg   if (vertexOffset != 0 || firstInstance != 0) {
21547ec681f3Smrg      v3dv_cl_ensure_space_with_branch(
21557ec681f3Smrg         &job->bcl, cl_packet_length(BASE_VERTEX_BASE_INSTANCE));
21567ec681f3Smrg      v3dv_return_if_oom(cmd_buffer, NULL);
21577ec681f3Smrg
21587ec681f3Smrg      cl_emit(&job->bcl, BASE_VERTEX_BASE_INSTANCE, base) {
21597ec681f3Smrg         base.base_instance = firstInstance;
21607ec681f3Smrg         base.base_vertex = vertexOffset;
21617ec681f3Smrg      }
21627ec681f3Smrg   }
21637ec681f3Smrg
21647ec681f3Smrg   if (instanceCount == 1) {
21657ec681f3Smrg      v3dv_cl_ensure_space_with_branch(
21667ec681f3Smrg         &job->bcl, cl_packet_length(INDEXED_PRIM_LIST));
21677ec681f3Smrg      v3dv_return_if_oom(cmd_buffer, NULL);
21687ec681f3Smrg
21697ec681f3Smrg      cl_emit(&job->bcl, INDEXED_PRIM_LIST, prim) {
21707ec681f3Smrg         prim.index_type = index_type;
21717ec681f3Smrg         prim.length = indexCount;
21727ec681f3Smrg         prim.index_offset = index_offset;
21737ec681f3Smrg         prim.mode = hw_prim_type;
21747ec681f3Smrg         prim.enable_primitive_restarts = pipeline->primitive_restart;
21757ec681f3Smrg      }
21767ec681f3Smrg   } else if (instanceCount > 1) {
21777ec681f3Smrg      v3dv_cl_ensure_space_with_branch(
21787ec681f3Smrg         &job->bcl, cl_packet_length(INDEXED_INSTANCED_PRIM_LIST));
21797ec681f3Smrg      v3dv_return_if_oom(cmd_buffer, NULL);
21807ec681f3Smrg
21817ec681f3Smrg      cl_emit(&job->bcl, INDEXED_INSTANCED_PRIM_LIST, prim) {
21827ec681f3Smrg         prim.index_type = index_type;
21837ec681f3Smrg         prim.index_offset = index_offset;
21847ec681f3Smrg         prim.mode = hw_prim_type;
21857ec681f3Smrg         prim.enable_primitive_restarts = pipeline->primitive_restart;
21867ec681f3Smrg         prim.number_of_instances = instanceCount;
21877ec681f3Smrg         prim.instance_length = indexCount;
21887ec681f3Smrg      }
21897ec681f3Smrg   }
21907ec681f3Smrg}
21917ec681f3Smrg
21927ec681f3Smrgvoid
21937ec681f3Smrgv3dX(cmd_buffer_emit_draw_indirect)(struct v3dv_cmd_buffer *cmd_buffer,
21947ec681f3Smrg                                    struct v3dv_buffer *buffer,
21957ec681f3Smrg                                    VkDeviceSize offset,
21967ec681f3Smrg                                    uint32_t drawCount,
21977ec681f3Smrg                                    uint32_t stride)
21987ec681f3Smrg{
21997ec681f3Smrg   struct v3dv_job *job = cmd_buffer->state.job;
22007ec681f3Smrg   assert(job);
22017ec681f3Smrg
22027ec681f3Smrg   const struct v3dv_pipeline *pipeline = cmd_buffer->state.gfx.pipeline;
22037ec681f3Smrg   uint32_t hw_prim_type = v3d_hw_prim_type(pipeline->topology);
22047ec681f3Smrg
22057ec681f3Smrg   v3dv_cl_ensure_space_with_branch(
22067ec681f3Smrg      &job->bcl, cl_packet_length(INDIRECT_VERTEX_ARRAY_INSTANCED_PRIMS));
22077ec681f3Smrg   v3dv_return_if_oom(cmd_buffer, NULL);
22087ec681f3Smrg
22097ec681f3Smrg   cl_emit(&job->bcl, INDIRECT_VERTEX_ARRAY_INSTANCED_PRIMS, prim) {
22107ec681f3Smrg      prim.mode = hw_prim_type;
22117ec681f3Smrg      prim.number_of_draw_indirect_array_records = drawCount;
22127ec681f3Smrg      prim.stride_in_multiples_of_4_bytes = stride >> 2;
22137ec681f3Smrg      prim.address = v3dv_cl_address(buffer->mem->bo,
22147ec681f3Smrg                                     buffer->mem_offset + offset);
22157ec681f3Smrg   }
22167ec681f3Smrg}
22177ec681f3Smrg
22187ec681f3Smrgvoid
22197ec681f3Smrgv3dX(cmd_buffer_emit_indexed_indirect)(struct v3dv_cmd_buffer *cmd_buffer,
22207ec681f3Smrg                                       struct v3dv_buffer *buffer,
22217ec681f3Smrg                                       VkDeviceSize offset,
22227ec681f3Smrg                                       uint32_t drawCount,
22237ec681f3Smrg                                       uint32_t stride)
22247ec681f3Smrg{
22257ec681f3Smrg   struct v3dv_job *job = cmd_buffer->state.job;
22267ec681f3Smrg   assert(job);
22277ec681f3Smrg
22287ec681f3Smrg   const struct v3dv_pipeline *pipeline = cmd_buffer->state.gfx.pipeline;
22297ec681f3Smrg   uint32_t hw_prim_type = v3d_hw_prim_type(pipeline->topology);
22307ec681f3Smrg   uint8_t index_type = ffs(cmd_buffer->state.index_buffer.index_size) - 1;
22317ec681f3Smrg
22327ec681f3Smrg   v3dv_cl_ensure_space_with_branch(
22337ec681f3Smrg      &job->bcl, cl_packet_length(INDIRECT_INDEXED_INSTANCED_PRIM_LIST));
22347ec681f3Smrg   v3dv_return_if_oom(cmd_buffer, NULL);
22357ec681f3Smrg
22367ec681f3Smrg   cl_emit(&job->bcl, INDIRECT_INDEXED_INSTANCED_PRIM_LIST, prim) {
22377ec681f3Smrg      prim.index_type = index_type;
22387ec681f3Smrg      prim.mode = hw_prim_type;
22397ec681f3Smrg      prim.enable_primitive_restarts = pipeline->primitive_restart;
22407ec681f3Smrg      prim.number_of_draw_indirect_indexed_records = drawCount;
22417ec681f3Smrg      prim.stride_in_multiples_of_4_bytes = stride >> 2;
22427ec681f3Smrg      prim.address = v3dv_cl_address(buffer->mem->bo,
22437ec681f3Smrg                                     buffer->mem_offset + offset);
22447ec681f3Smrg   }
22457ec681f3Smrg}
22467ec681f3Smrg
22477ec681f3Smrgvoid
22487ec681f3Smrgv3dX(cmd_buffer_render_pass_setup_render_target)(struct v3dv_cmd_buffer *cmd_buffer,
22497ec681f3Smrg                                                 int rt,
22507ec681f3Smrg                                                 uint32_t *rt_bpp,
22517ec681f3Smrg                                                 uint32_t *rt_type,
22527ec681f3Smrg                                                 uint32_t *rt_clamp)
22537ec681f3Smrg{
22547ec681f3Smrg   const struct v3dv_cmd_buffer_state *state = &cmd_buffer->state;
22557ec681f3Smrg
22567ec681f3Smrg   assert(state->subpass_idx < state->pass->subpass_count);
22577ec681f3Smrg   const struct v3dv_subpass *subpass =
22587ec681f3Smrg      &state->pass->subpasses[state->subpass_idx];
22597ec681f3Smrg
22607ec681f3Smrg   if (rt >= subpass->color_count)
22617ec681f3Smrg      return;
22627ec681f3Smrg
22637ec681f3Smrg   struct v3dv_subpass_attachment *attachment = &subpass->color_attachments[rt];
22647ec681f3Smrg   const uint32_t attachment_idx = attachment->attachment;
22657ec681f3Smrg   if (attachment_idx == VK_ATTACHMENT_UNUSED)
22667ec681f3Smrg      return;
22677ec681f3Smrg
22687ec681f3Smrg   const struct v3dv_framebuffer *framebuffer = state->framebuffer;
22697ec681f3Smrg   assert(attachment_idx < framebuffer->attachment_count);
22707ec681f3Smrg   struct v3dv_image_view *iview = framebuffer->attachments[attachment_idx];
22717ec681f3Smrg   assert(iview->vk.aspects & VK_IMAGE_ASPECT_COLOR_BIT);
22727ec681f3Smrg
22737ec681f3Smrg   *rt_bpp = iview->internal_bpp;
22747ec681f3Smrg   *rt_type = iview->internal_type;
22757ec681f3Smrg   if (vk_format_is_int(iview->vk.format))
22767ec681f3Smrg      *rt_clamp = V3D_RENDER_TARGET_CLAMP_INT;
22777ec681f3Smrg   else if (vk_format_is_srgb(iview->vk.format))
22787ec681f3Smrg      *rt_clamp = V3D_RENDER_TARGET_CLAMP_NORM;
22797ec681f3Smrg   else
22807ec681f3Smrg      *rt_clamp = V3D_RENDER_TARGET_CLAMP_NONE;
22817ec681f3Smrg}
2282