17ec681f3Smrg/*
27ec681f3Smrg * Copyright © 2019 Raspberry Pi
37ec681f3Smrg *
47ec681f3Smrg * Permission is hereby granted, free of charge, to any person obtaining a
57ec681f3Smrg * copy of this software and associated documentation files (the "Software"),
67ec681f3Smrg * to deal in the Software without restriction, including without limitation
77ec681f3Smrg * the rights to use, copy, modify, merge, publish, distribute, sublicense,
87ec681f3Smrg * and/or sell copies of the Software, and to permit persons to whom the
97ec681f3Smrg * Software is furnished to do so, subject to the following conditions:
107ec681f3Smrg *
117ec681f3Smrg * The above copyright notice and this permission notice (including the next
127ec681f3Smrg * paragraph) shall be included in all copies or substantial portions of the
137ec681f3Smrg * Software.
147ec681f3Smrg *
157ec681f3Smrg * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
167ec681f3Smrg * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
177ec681f3Smrg * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT.  IN NO EVENT SHALL
187ec681f3Smrg * THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
197ec681f3Smrg * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING
207ec681f3Smrg * FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS
217ec681f3Smrg * IN THE SOFTWARE.
227ec681f3Smrg */
237ec681f3Smrg
247ec681f3Smrg#include "v3dv_private.h"
257ec681f3Smrg#include "util/u_pack_color.h"
267ec681f3Smrg#include "vk_format_info.h"
277ec681f3Smrg#include "vk_util.h"
287ec681f3Smrg
297ec681f3Smrgconst struct v3dv_dynamic_state default_dynamic_state = {
307ec681f3Smrg   .viewport = {
317ec681f3Smrg      .count = 0,
327ec681f3Smrg   },
337ec681f3Smrg   .scissor = {
347ec681f3Smrg      .count = 0,
357ec681f3Smrg   },
367ec681f3Smrg   .stencil_compare_mask =
377ec681f3Smrg   {
387ec681f3Smrg     .front = ~0u,
397ec681f3Smrg     .back = ~0u,
407ec681f3Smrg   },
417ec681f3Smrg   .stencil_write_mask =
427ec681f3Smrg   {
437ec681f3Smrg     .front = ~0u,
447ec681f3Smrg     .back = ~0u,
457ec681f3Smrg   },
467ec681f3Smrg   .stencil_reference =
477ec681f3Smrg   {
487ec681f3Smrg     .front = 0u,
497ec681f3Smrg     .back = 0u,
507ec681f3Smrg   },
517ec681f3Smrg   .blend_constants = { 0.0f, 0.0f, 0.0f, 0.0f },
527ec681f3Smrg   .depth_bias = {
537ec681f3Smrg      .constant_factor = 0.0f,
547ec681f3Smrg      .depth_bias_clamp = 0.0f,
557ec681f3Smrg      .slope_factor = 0.0f,
567ec681f3Smrg   },
577ec681f3Smrg   .line_width = 1.0f,
587ec681f3Smrg   .color_write_enable = (1ull << (4 * V3D_MAX_DRAW_BUFFERS)) - 1,
597ec681f3Smrg};
607ec681f3Smrg
617ec681f3Smrgvoid
627ec681f3Smrgv3dv_job_add_bo(struct v3dv_job *job, struct v3dv_bo *bo)
637ec681f3Smrg{
647ec681f3Smrg   if (!bo)
657ec681f3Smrg      return;
667ec681f3Smrg
677ec681f3Smrg   if (job->bo_handle_mask & bo->handle_bit) {
687ec681f3Smrg      if (_mesa_set_search(job->bos, bo))
697ec681f3Smrg         return;
707ec681f3Smrg   }
717ec681f3Smrg
727ec681f3Smrg   _mesa_set_add(job->bos, bo);
737ec681f3Smrg   job->bo_count++;
747ec681f3Smrg   job->bo_handle_mask |= bo->handle_bit;
757ec681f3Smrg}
767ec681f3Smrg
777ec681f3Smrgvoid
787ec681f3Smrgv3dv_job_add_bo_unchecked(struct v3dv_job *job, struct v3dv_bo *bo)
797ec681f3Smrg{
807ec681f3Smrg   assert(bo);
817ec681f3Smrg   _mesa_set_add(job->bos, bo);
827ec681f3Smrg   job->bo_count++;
837ec681f3Smrg   job->bo_handle_mask |= bo->handle_bit;
847ec681f3Smrg}
857ec681f3Smrg
867ec681f3SmrgVKAPI_ATTR VkResult VKAPI_CALL
877ec681f3Smrgv3dv_CreateCommandPool(VkDevice _device,
887ec681f3Smrg                       const VkCommandPoolCreateInfo *pCreateInfo,
897ec681f3Smrg                       const VkAllocationCallbacks *pAllocator,
907ec681f3Smrg                       VkCommandPool *pCmdPool)
917ec681f3Smrg{
927ec681f3Smrg   V3DV_FROM_HANDLE(v3dv_device, device, _device);
937ec681f3Smrg   struct v3dv_cmd_pool *pool;
947ec681f3Smrg
957ec681f3Smrg   /* We only support one queue */
967ec681f3Smrg   assert(pCreateInfo->queueFamilyIndex == 0);
977ec681f3Smrg
987ec681f3Smrg   pool = vk_object_zalloc(&device->vk, pAllocator, sizeof(*pool),
997ec681f3Smrg                           VK_OBJECT_TYPE_COMMAND_POOL);
1007ec681f3Smrg   if (pool == NULL)
1017ec681f3Smrg      return vk_error(device, VK_ERROR_OUT_OF_HOST_MEMORY);
1027ec681f3Smrg
1037ec681f3Smrg   if (pAllocator)
1047ec681f3Smrg      pool->alloc = *pAllocator;
1057ec681f3Smrg   else
1067ec681f3Smrg      pool->alloc = device->vk.alloc;
1077ec681f3Smrg
1087ec681f3Smrg   list_inithead(&pool->cmd_buffers);
1097ec681f3Smrg
1107ec681f3Smrg   *pCmdPool = v3dv_cmd_pool_to_handle(pool);
1117ec681f3Smrg
1127ec681f3Smrg   return VK_SUCCESS;
1137ec681f3Smrg}
1147ec681f3Smrg
1157ec681f3Smrgstatic void
1167ec681f3Smrgcmd_buffer_init(struct v3dv_cmd_buffer *cmd_buffer,
1177ec681f3Smrg                struct v3dv_device *device,
1187ec681f3Smrg                struct v3dv_cmd_pool *pool,
1197ec681f3Smrg                VkCommandBufferLevel level)
1207ec681f3Smrg{
1217ec681f3Smrg   /* Do not reset the base object! If we are calling this from a command
1227ec681f3Smrg    * buffer reset that would reset the loader's dispatch table for the
1237ec681f3Smrg    * command buffer, and any other relevant info from vk_object_base
1247ec681f3Smrg    */
1257ec681f3Smrg   const uint32_t base_size = sizeof(struct vk_command_buffer);
1267ec681f3Smrg   uint8_t *cmd_buffer_driver_start = ((uint8_t *) cmd_buffer) + base_size;
1277ec681f3Smrg   memset(cmd_buffer_driver_start, 0, sizeof(*cmd_buffer) - base_size);
1287ec681f3Smrg
1297ec681f3Smrg   cmd_buffer->device = device;
1307ec681f3Smrg   cmd_buffer->pool = pool;
1317ec681f3Smrg   cmd_buffer->level = level;
1327ec681f3Smrg
1337ec681f3Smrg   list_inithead(&cmd_buffer->private_objs);
1347ec681f3Smrg   list_inithead(&cmd_buffer->jobs);
1357ec681f3Smrg   list_inithead(&cmd_buffer->list_link);
1367ec681f3Smrg
1377ec681f3Smrg   assert(pool);
1387ec681f3Smrg   list_addtail(&cmd_buffer->pool_link, &pool->cmd_buffers);
1397ec681f3Smrg
1407ec681f3Smrg   cmd_buffer->state.subpass_idx = -1;
1417ec681f3Smrg   cmd_buffer->state.meta.subpass_idx = -1;
1427ec681f3Smrg
1437ec681f3Smrg   cmd_buffer->status = V3DV_CMD_BUFFER_STATUS_INITIALIZED;
1447ec681f3Smrg}
1457ec681f3Smrg
1467ec681f3Smrgstatic VkResult
1477ec681f3Smrgcmd_buffer_create(struct v3dv_device *device,
1487ec681f3Smrg                  struct v3dv_cmd_pool *pool,
1497ec681f3Smrg                  VkCommandBufferLevel level,
1507ec681f3Smrg                  VkCommandBuffer *pCommandBuffer)
1517ec681f3Smrg{
1527ec681f3Smrg   struct v3dv_cmd_buffer *cmd_buffer;
1537ec681f3Smrg   cmd_buffer = vk_zalloc2(&device->vk.alloc,
1547ec681f3Smrg                           &pool->alloc,
1557ec681f3Smrg                           sizeof(*cmd_buffer),
1567ec681f3Smrg                           8,
1577ec681f3Smrg                           VK_SYSTEM_ALLOCATION_SCOPE_OBJECT);
1587ec681f3Smrg   if (cmd_buffer == NULL)
1597ec681f3Smrg      return vk_error(device, VK_ERROR_OUT_OF_HOST_MEMORY);
1607ec681f3Smrg
1617ec681f3Smrg   VkResult result;
1627ec681f3Smrg   result = vk_command_buffer_init(&cmd_buffer->vk, &device->vk);
1637ec681f3Smrg   if (result != VK_SUCCESS) {
1647ec681f3Smrg      vk_free2(&device->vk.alloc, &pool->alloc, cmd_buffer);
1657ec681f3Smrg      return result;
1667ec681f3Smrg   }
1677ec681f3Smrg
1687ec681f3Smrg   cmd_buffer_init(cmd_buffer, device, pool, level);
1697ec681f3Smrg
1707ec681f3Smrg   *pCommandBuffer = v3dv_cmd_buffer_to_handle(cmd_buffer);
1717ec681f3Smrg
1727ec681f3Smrg   return VK_SUCCESS;
1737ec681f3Smrg}
1747ec681f3Smrg
1757ec681f3Smrgstatic void
1767ec681f3Smrgjob_destroy_gpu_cl_resources(struct v3dv_job *job)
1777ec681f3Smrg{
1787ec681f3Smrg   assert(job->type == V3DV_JOB_TYPE_GPU_CL ||
1797ec681f3Smrg          job->type == V3DV_JOB_TYPE_GPU_CL_SECONDARY);
1807ec681f3Smrg
1817ec681f3Smrg   v3dv_cl_destroy(&job->bcl);
1827ec681f3Smrg   v3dv_cl_destroy(&job->rcl);
1837ec681f3Smrg   v3dv_cl_destroy(&job->indirect);
1847ec681f3Smrg
1857ec681f3Smrg   /* Since we don't ref BOs when we add them to the command buffer, don't
1867ec681f3Smrg    * unref them here either. Bo's will be freed when their corresponding API
1877ec681f3Smrg    * objects are destroyed.
1887ec681f3Smrg    */
1897ec681f3Smrg   _mesa_set_destroy(job->bos, NULL);
1907ec681f3Smrg
1917ec681f3Smrg   v3dv_bo_free(job->device, job->tile_alloc);
1927ec681f3Smrg   v3dv_bo_free(job->device, job->tile_state);
1937ec681f3Smrg}
1947ec681f3Smrg
1957ec681f3Smrgstatic void
1967ec681f3Smrgjob_destroy_cloned_gpu_cl_resources(struct v3dv_job *job)
1977ec681f3Smrg{
1987ec681f3Smrg   assert(job->type == V3DV_JOB_TYPE_GPU_CL);
1997ec681f3Smrg
2007ec681f3Smrg   list_for_each_entry_safe(struct v3dv_bo, bo, &job->bcl.bo_list, list_link) {
2017ec681f3Smrg      list_del(&bo->list_link);
2027ec681f3Smrg      vk_free(&job->device->vk.alloc, bo);
2037ec681f3Smrg   }
2047ec681f3Smrg
2057ec681f3Smrg   list_for_each_entry_safe(struct v3dv_bo, bo, &job->rcl.bo_list, list_link) {
2067ec681f3Smrg      list_del(&bo->list_link);
2077ec681f3Smrg      vk_free(&job->device->vk.alloc, bo);
2087ec681f3Smrg   }
2097ec681f3Smrg
2107ec681f3Smrg   list_for_each_entry_safe(struct v3dv_bo, bo, &job->indirect.bo_list, list_link) {
2117ec681f3Smrg      list_del(&bo->list_link);
2127ec681f3Smrg      vk_free(&job->device->vk.alloc, bo);
2137ec681f3Smrg   }
2147ec681f3Smrg}
2157ec681f3Smrg
2167ec681f3Smrgstatic void
2177ec681f3Smrgjob_destroy_gpu_csd_resources(struct v3dv_job *job)
2187ec681f3Smrg{
2197ec681f3Smrg   assert(job->type == V3DV_JOB_TYPE_GPU_CSD);
2207ec681f3Smrg   assert(job->cmd_buffer);
2217ec681f3Smrg
2227ec681f3Smrg   v3dv_cl_destroy(&job->indirect);
2237ec681f3Smrg
2247ec681f3Smrg   _mesa_set_destroy(job->bos, NULL);
2257ec681f3Smrg
2267ec681f3Smrg   if (job->csd.shared_memory)
2277ec681f3Smrg      v3dv_bo_free(job->device, job->csd.shared_memory);
2287ec681f3Smrg}
2297ec681f3Smrg
2307ec681f3Smrgstatic void
2317ec681f3Smrgjob_destroy_cpu_wait_events_resources(struct v3dv_job *job)
2327ec681f3Smrg{
2337ec681f3Smrg   assert(job->type == V3DV_JOB_TYPE_CPU_WAIT_EVENTS);
2347ec681f3Smrg   assert(job->cmd_buffer);
2357ec681f3Smrg   vk_free(&job->cmd_buffer->device->vk.alloc, job->cpu.event_wait.events);
2367ec681f3Smrg}
2377ec681f3Smrg
2387ec681f3Smrgstatic void
2397ec681f3Smrgjob_destroy_cpu_csd_indirect_resources(struct v3dv_job *job)
2407ec681f3Smrg{
2417ec681f3Smrg   assert(job->type == V3DV_JOB_TYPE_CPU_CSD_INDIRECT);
2427ec681f3Smrg   assert(job->cmd_buffer);
2437ec681f3Smrg   v3dv_job_destroy(job->cpu.csd_indirect.csd_job);
2447ec681f3Smrg}
2457ec681f3Smrg
2467ec681f3Smrgvoid
2477ec681f3Smrgv3dv_job_destroy(struct v3dv_job *job)
2487ec681f3Smrg{
2497ec681f3Smrg   assert(job);
2507ec681f3Smrg
2517ec681f3Smrg   list_del(&job->list_link);
2527ec681f3Smrg
2537ec681f3Smrg   /* Cloned jobs don't make deep copies of the original jobs, so they don't
2547ec681f3Smrg    * own any of their resources. However, they do allocate clones of BO
2557ec681f3Smrg    * structs, so make sure we free those.
2567ec681f3Smrg    */
2577ec681f3Smrg   if (!job->is_clone) {
2587ec681f3Smrg      switch (job->type) {
2597ec681f3Smrg      case V3DV_JOB_TYPE_GPU_CL:
2607ec681f3Smrg      case V3DV_JOB_TYPE_GPU_CL_SECONDARY:
2617ec681f3Smrg         job_destroy_gpu_cl_resources(job);
2627ec681f3Smrg         break;
2637ec681f3Smrg      case V3DV_JOB_TYPE_GPU_CSD:
2647ec681f3Smrg         job_destroy_gpu_csd_resources(job);
2657ec681f3Smrg         break;
2667ec681f3Smrg      case V3DV_JOB_TYPE_CPU_WAIT_EVENTS:
2677ec681f3Smrg         job_destroy_cpu_wait_events_resources(job);
2687ec681f3Smrg         break;
2697ec681f3Smrg      case V3DV_JOB_TYPE_CPU_CSD_INDIRECT:
2707ec681f3Smrg         job_destroy_cpu_csd_indirect_resources(job);
2717ec681f3Smrg         break;
2727ec681f3Smrg      default:
2737ec681f3Smrg         break;
2747ec681f3Smrg      }
2757ec681f3Smrg   } else {
2767ec681f3Smrg      /* Cloned jobs */
2777ec681f3Smrg      if (job->type == V3DV_JOB_TYPE_GPU_CL)
2787ec681f3Smrg         job_destroy_cloned_gpu_cl_resources(job);
2797ec681f3Smrg   }
2807ec681f3Smrg
2817ec681f3Smrg   vk_free(&job->device->vk.alloc, job);
2827ec681f3Smrg}
2837ec681f3Smrg
2847ec681f3Smrgvoid
2857ec681f3Smrgv3dv_cmd_buffer_add_private_obj(struct v3dv_cmd_buffer *cmd_buffer,
2867ec681f3Smrg                                uint64_t obj,
2877ec681f3Smrg                                v3dv_cmd_buffer_private_obj_destroy_cb destroy_cb)
2887ec681f3Smrg{
2897ec681f3Smrg   struct v3dv_cmd_buffer_private_obj *pobj =
2907ec681f3Smrg      vk_alloc(&cmd_buffer->device->vk.alloc, sizeof(*pobj), 8,
2917ec681f3Smrg               VK_SYSTEM_ALLOCATION_SCOPE_COMMAND);
2927ec681f3Smrg   if (!pobj) {
2937ec681f3Smrg      v3dv_flag_oom(cmd_buffer, NULL);
2947ec681f3Smrg      return;
2957ec681f3Smrg   }
2967ec681f3Smrg
2977ec681f3Smrg   pobj->obj = obj;
2987ec681f3Smrg   pobj->destroy_cb = destroy_cb;
2997ec681f3Smrg
3007ec681f3Smrg   list_addtail(&pobj->list_link, &cmd_buffer->private_objs);
3017ec681f3Smrg}
3027ec681f3Smrg
3037ec681f3Smrgstatic void
3047ec681f3Smrgcmd_buffer_destroy_private_obj(struct v3dv_cmd_buffer *cmd_buffer,
3057ec681f3Smrg                               struct v3dv_cmd_buffer_private_obj *pobj)
3067ec681f3Smrg{
3077ec681f3Smrg   assert(pobj && pobj->obj && pobj->destroy_cb);
3087ec681f3Smrg   pobj->destroy_cb(v3dv_device_to_handle(cmd_buffer->device),
3097ec681f3Smrg                    pobj->obj,
3107ec681f3Smrg                    &cmd_buffer->device->vk.alloc);
3117ec681f3Smrg   list_del(&pobj->list_link);
3127ec681f3Smrg   vk_free(&cmd_buffer->device->vk.alloc, pobj);
3137ec681f3Smrg}
3147ec681f3Smrg
3157ec681f3Smrgstatic void
3167ec681f3Smrgcmd_buffer_free_resources(struct v3dv_cmd_buffer *cmd_buffer)
3177ec681f3Smrg{
3187ec681f3Smrg   list_for_each_entry_safe(struct v3dv_job, job,
3197ec681f3Smrg                            &cmd_buffer->jobs, list_link) {
3207ec681f3Smrg      v3dv_job_destroy(job);
3217ec681f3Smrg   }
3227ec681f3Smrg
3237ec681f3Smrg   if (cmd_buffer->state.job)
3247ec681f3Smrg      v3dv_job_destroy(cmd_buffer->state.job);
3257ec681f3Smrg
3267ec681f3Smrg   if (cmd_buffer->state.attachments)
3277ec681f3Smrg      vk_free(&cmd_buffer->pool->alloc, cmd_buffer->state.attachments);
3287ec681f3Smrg
3297ec681f3Smrg   if (cmd_buffer->state.query.end.alloc_count > 0)
3307ec681f3Smrg      vk_free(&cmd_buffer->device->vk.alloc, cmd_buffer->state.query.end.states);
3317ec681f3Smrg
3327ec681f3Smrg   if (cmd_buffer->push_constants_resource.bo)
3337ec681f3Smrg      v3dv_bo_free(cmd_buffer->device, cmd_buffer->push_constants_resource.bo);
3347ec681f3Smrg
3357ec681f3Smrg   list_for_each_entry_safe(struct v3dv_cmd_buffer_private_obj, pobj,
3367ec681f3Smrg                            &cmd_buffer->private_objs, list_link) {
3377ec681f3Smrg      cmd_buffer_destroy_private_obj(cmd_buffer, pobj);
3387ec681f3Smrg   }
3397ec681f3Smrg
3407ec681f3Smrg   if (cmd_buffer->state.meta.attachments) {
3417ec681f3Smrg         assert(cmd_buffer->state.meta.attachment_alloc_count > 0);
3427ec681f3Smrg         vk_free(&cmd_buffer->device->vk.alloc, cmd_buffer->state.meta.attachments);
3437ec681f3Smrg   }
3447ec681f3Smrg}
3457ec681f3Smrg
3467ec681f3Smrgstatic void
3477ec681f3Smrgcmd_buffer_destroy(struct v3dv_cmd_buffer *cmd_buffer)
3487ec681f3Smrg{
3497ec681f3Smrg   list_del(&cmd_buffer->pool_link);
3507ec681f3Smrg   cmd_buffer_free_resources(cmd_buffer);
3517ec681f3Smrg   vk_command_buffer_finish(&cmd_buffer->vk);
3527ec681f3Smrg   vk_free2(&cmd_buffer->device->vk.alloc, &cmd_buffer->pool->alloc,
3537ec681f3Smrg            cmd_buffer);
3547ec681f3Smrg}
3557ec681f3Smrg
3567ec681f3Smrgstatic bool
3577ec681f3Smrgattachment_list_is_subset(struct v3dv_subpass_attachment *l1, uint32_t l1_count,
3587ec681f3Smrg                          struct v3dv_subpass_attachment *l2, uint32_t l2_count)
3597ec681f3Smrg{
3607ec681f3Smrg   for (uint32_t i = 0; i < l1_count; i++) {
3617ec681f3Smrg      uint32_t attachment_idx = l1[i].attachment;
3627ec681f3Smrg      if (attachment_idx == VK_ATTACHMENT_UNUSED)
3637ec681f3Smrg         continue;
3647ec681f3Smrg
3657ec681f3Smrg      uint32_t j;
3667ec681f3Smrg      for (j = 0; j < l2_count; j++) {
3677ec681f3Smrg         if (l2[j].attachment == attachment_idx)
3687ec681f3Smrg            break;
3697ec681f3Smrg      }
3707ec681f3Smrg      if (j == l2_count)
3717ec681f3Smrg         return false;
3727ec681f3Smrg   }
3737ec681f3Smrg
3747ec681f3Smrg   return true;
3757ec681f3Smrg }
3767ec681f3Smrg
3777ec681f3Smrgstatic bool
3787ec681f3Smrgcmd_buffer_can_merge_subpass(struct v3dv_cmd_buffer *cmd_buffer,
3797ec681f3Smrg                             uint32_t subpass_idx)
3807ec681f3Smrg{
3817ec681f3Smrg   const struct v3dv_cmd_buffer_state *state = &cmd_buffer->state;
3827ec681f3Smrg   assert(state->pass);
3837ec681f3Smrg
3847ec681f3Smrg   const struct v3dv_physical_device *physical_device =
3857ec681f3Smrg      &cmd_buffer->device->instance->physicalDevice;
3867ec681f3Smrg
3877ec681f3Smrg   if (cmd_buffer->level != VK_COMMAND_BUFFER_LEVEL_PRIMARY)
3887ec681f3Smrg      return false;
3897ec681f3Smrg
3907ec681f3Smrg   if (!cmd_buffer->state.job)
3917ec681f3Smrg      return false;
3927ec681f3Smrg
3937ec681f3Smrg   if (cmd_buffer->state.job->always_flush)
3947ec681f3Smrg      return false;
3957ec681f3Smrg
3967ec681f3Smrg   if (!physical_device->options.merge_jobs)
3977ec681f3Smrg      return false;
3987ec681f3Smrg
3997ec681f3Smrg   /* Each render pass starts a new job */
4007ec681f3Smrg   if (subpass_idx == 0)
4017ec681f3Smrg      return false;
4027ec681f3Smrg
4037ec681f3Smrg   /* Two subpasses can be merged in the same job if we can emit a single RCL
4047ec681f3Smrg    * for them (since the RCL includes the END_OF_RENDERING command that
4057ec681f3Smrg    * triggers the "render job finished" interrupt). We can do this so long
4067ec681f3Smrg    * as both subpasses render against the same attachments.
4077ec681f3Smrg    */
4087ec681f3Smrg   assert(state->subpass_idx == subpass_idx - 1);
4097ec681f3Smrg   struct v3dv_subpass *prev_subpass = &state->pass->subpasses[state->subpass_idx];
4107ec681f3Smrg   struct v3dv_subpass *subpass = &state->pass->subpasses[subpass_idx];
4117ec681f3Smrg
4127ec681f3Smrg   /* Don't merge if the subpasses have different view masks, since in that
4137ec681f3Smrg    * case the framebuffer setup is different and we need to emit different
4147ec681f3Smrg    * RCLs.
4157ec681f3Smrg    */
4167ec681f3Smrg   if (subpass->view_mask != prev_subpass->view_mask)
4177ec681f3Smrg      return false;
4187ec681f3Smrg
4197ec681f3Smrg   /* Because the list of subpass attachments can include VK_ATTACHMENT_UNUSED,
4207ec681f3Smrg    * we need to check that for each subpass all its used attachments are
4217ec681f3Smrg    * used by the other subpass.
4227ec681f3Smrg    */
4237ec681f3Smrg   bool compatible =
4247ec681f3Smrg      attachment_list_is_subset(prev_subpass->color_attachments,
4257ec681f3Smrg                                prev_subpass->color_count,
4267ec681f3Smrg                                subpass->color_attachments,
4277ec681f3Smrg                                subpass->color_count);
4287ec681f3Smrg   if (!compatible)
4297ec681f3Smrg      return false;
4307ec681f3Smrg
4317ec681f3Smrg   compatible =
4327ec681f3Smrg      attachment_list_is_subset(subpass->color_attachments,
4337ec681f3Smrg                                subpass->color_count,
4347ec681f3Smrg                                prev_subpass->color_attachments,
4357ec681f3Smrg                                prev_subpass->color_count);
4367ec681f3Smrg   if (!compatible)
4377ec681f3Smrg      return false;
4387ec681f3Smrg
4397ec681f3Smrg   if (subpass->ds_attachment.attachment !=
4407ec681f3Smrg       prev_subpass->ds_attachment.attachment)
4417ec681f3Smrg      return false;
4427ec681f3Smrg
4437ec681f3Smrg   /* FIXME: Since some attachment formats can't be resolved using the TLB we
4447ec681f3Smrg    * need to emit separate resolve jobs for them and that would not be
4457ec681f3Smrg    * compatible with subpass merges. We could fix that by testing if any of
4467ec681f3Smrg    * the attachments to resolve doesn't suppotr TLB resolves.
4477ec681f3Smrg    */
4487ec681f3Smrg   if (prev_subpass->resolve_attachments || subpass->resolve_attachments)
4497ec681f3Smrg      return false;
4507ec681f3Smrg
4517ec681f3Smrg   return true;
4527ec681f3Smrg}
4537ec681f3Smrg
4547ec681f3Smrg/**
4557ec681f3Smrg * Computes and sets the job frame tiling information required to setup frame
4567ec681f3Smrg * binning and rendering.
4577ec681f3Smrg */
4587ec681f3Smrgstatic struct v3dv_frame_tiling *
4597ec681f3Smrgjob_compute_frame_tiling(struct v3dv_job *job,
4607ec681f3Smrg                         uint32_t width,
4617ec681f3Smrg                         uint32_t height,
4627ec681f3Smrg                         uint32_t layers,
4637ec681f3Smrg                         uint32_t render_target_count,
4647ec681f3Smrg                         uint8_t max_internal_bpp,
4657ec681f3Smrg                         bool msaa)
4667ec681f3Smrg{
4677ec681f3Smrg   static const uint8_t tile_sizes[] = {
4687ec681f3Smrg      64, 64,
4697ec681f3Smrg      64, 32,
4707ec681f3Smrg      32, 32,
4717ec681f3Smrg      32, 16,
4727ec681f3Smrg      16, 16,
4737ec681f3Smrg      16,  8,
4747ec681f3Smrg       8,  8
4757ec681f3Smrg   };
4767ec681f3Smrg
4777ec681f3Smrg   assert(job);
4787ec681f3Smrg   struct v3dv_frame_tiling *tiling = &job->frame_tiling;
4797ec681f3Smrg
4807ec681f3Smrg   tiling->width = width;
4817ec681f3Smrg   tiling->height = height;
4827ec681f3Smrg   tiling->layers = layers;
4837ec681f3Smrg   tiling->render_target_count = render_target_count;
4847ec681f3Smrg   tiling->msaa = msaa;
4857ec681f3Smrg
4867ec681f3Smrg   uint32_t tile_size_index = 0;
4877ec681f3Smrg
4887ec681f3Smrg   if (render_target_count > 2)
4897ec681f3Smrg      tile_size_index += 2;
4907ec681f3Smrg   else if (render_target_count > 1)
4917ec681f3Smrg      tile_size_index += 1;
4927ec681f3Smrg
4937ec681f3Smrg   if (msaa)
4947ec681f3Smrg      tile_size_index += 2;
4957ec681f3Smrg
4967ec681f3Smrg   tiling->internal_bpp = max_internal_bpp;
4977ec681f3Smrg   tile_size_index += tiling->internal_bpp;
4987ec681f3Smrg   assert(tile_size_index < ARRAY_SIZE(tile_sizes) / 2);
4997ec681f3Smrg
5007ec681f3Smrg   tiling->tile_width = tile_sizes[tile_size_index * 2];
5017ec681f3Smrg   tiling->tile_height = tile_sizes[tile_size_index * 2 + 1];
5027ec681f3Smrg
5037ec681f3Smrg   tiling->draw_tiles_x = DIV_ROUND_UP(width, tiling->tile_width);
5047ec681f3Smrg   tiling->draw_tiles_y = DIV_ROUND_UP(height, tiling->tile_height);
5057ec681f3Smrg
5067ec681f3Smrg   /* Size up our supertiles until we get under the limit */
5077ec681f3Smrg   const uint32_t max_supertiles = 256;
5087ec681f3Smrg   tiling->supertile_width = 1;
5097ec681f3Smrg   tiling->supertile_height = 1;
5107ec681f3Smrg   for (;;) {
5117ec681f3Smrg      tiling->frame_width_in_supertiles =
5127ec681f3Smrg         DIV_ROUND_UP(tiling->draw_tiles_x, tiling->supertile_width);
5137ec681f3Smrg      tiling->frame_height_in_supertiles =
5147ec681f3Smrg         DIV_ROUND_UP(tiling->draw_tiles_y, tiling->supertile_height);
5157ec681f3Smrg      const uint32_t num_supertiles = tiling->frame_width_in_supertiles *
5167ec681f3Smrg                                      tiling->frame_height_in_supertiles;
5177ec681f3Smrg      if (num_supertiles < max_supertiles)
5187ec681f3Smrg         break;
5197ec681f3Smrg
5207ec681f3Smrg      if (tiling->supertile_width < tiling->supertile_height)
5217ec681f3Smrg         tiling->supertile_width++;
5227ec681f3Smrg      else
5237ec681f3Smrg         tiling->supertile_height++;
5247ec681f3Smrg   }
5257ec681f3Smrg
5267ec681f3Smrg   return tiling;
5277ec681f3Smrg}
5287ec681f3Smrg
5297ec681f3Smrgvoid
5307ec681f3Smrgv3dv_job_start_frame(struct v3dv_job *job,
5317ec681f3Smrg                     uint32_t width,
5327ec681f3Smrg                     uint32_t height,
5337ec681f3Smrg                     uint32_t layers,
5347ec681f3Smrg                     bool allocate_tile_state_for_all_layers,
5357ec681f3Smrg                     uint32_t render_target_count,
5367ec681f3Smrg                     uint8_t max_internal_bpp,
5377ec681f3Smrg                     bool msaa)
5387ec681f3Smrg{
5397ec681f3Smrg   assert(job);
5407ec681f3Smrg
5417ec681f3Smrg   /* Start by computing frame tiling spec for this job */
5427ec681f3Smrg   const struct v3dv_frame_tiling *tiling =
5437ec681f3Smrg      job_compute_frame_tiling(job,
5447ec681f3Smrg                               width, height, layers,
5457ec681f3Smrg                               render_target_count, max_internal_bpp, msaa);
5467ec681f3Smrg
5477ec681f3Smrg   v3dv_cl_ensure_space_with_branch(&job->bcl, 256);
5487ec681f3Smrg   v3dv_return_if_oom(NULL, job);
5497ec681f3Smrg
5507ec681f3Smrg   /* We only need to allocate tile state for all layers if the binner
5517ec681f3Smrg    * writes primitives to layers other than the first. This can only be
5527ec681f3Smrg    * done using layered rendering (writing gl_Layer from a geometry shader),
5537ec681f3Smrg    * so for other cases of multilayered framebuffers (typically with
5547ec681f3Smrg    * meta copy/clear operations) that won't use layered rendering, we only
5557ec681f3Smrg    * need one layer worth of of tile state for the binner.
5567ec681f3Smrg    */
5577ec681f3Smrg   if (!allocate_tile_state_for_all_layers)
5587ec681f3Smrg      layers = 1;
5597ec681f3Smrg
5607ec681f3Smrg   /* The PTB will request the tile alloc initial size per tile at start
5617ec681f3Smrg    * of tile binning.
5627ec681f3Smrg    */
5637ec681f3Smrg   uint32_t tile_alloc_size = 64 * tiling->layers *
5647ec681f3Smrg                              tiling->draw_tiles_x *
5657ec681f3Smrg                              tiling->draw_tiles_y;
5667ec681f3Smrg
5677ec681f3Smrg   /* The PTB allocates in aligned 4k chunks after the initial setup. */
5687ec681f3Smrg   tile_alloc_size = align(tile_alloc_size, 4096);
5697ec681f3Smrg
5707ec681f3Smrg   /* Include the first two chunk allocations that the PTB does so that
5717ec681f3Smrg    * we definitely clear the OOM condition before triggering one (the HW
5727ec681f3Smrg    * won't trigger OOM during the first allocations).
5737ec681f3Smrg    */
5747ec681f3Smrg   tile_alloc_size += 8192;
5757ec681f3Smrg
5767ec681f3Smrg   /* For performance, allocate some extra initial memory after the PTB's
5777ec681f3Smrg    * minimal allocations, so that we hopefully don't have to block the
5787ec681f3Smrg    * GPU on the kernel handling an OOM signal.
5797ec681f3Smrg    */
5807ec681f3Smrg   tile_alloc_size += 512 * 1024;
5817ec681f3Smrg
5827ec681f3Smrg   job->tile_alloc = v3dv_bo_alloc(job->device, tile_alloc_size,
5837ec681f3Smrg                                   "tile_alloc", true);
5847ec681f3Smrg   if (!job->tile_alloc) {
5857ec681f3Smrg      v3dv_flag_oom(NULL, job);
5867ec681f3Smrg      return;
5877ec681f3Smrg   }
5887ec681f3Smrg
5897ec681f3Smrg   v3dv_job_add_bo_unchecked(job, job->tile_alloc);
5907ec681f3Smrg
5917ec681f3Smrg   const uint32_t tsda_per_tile_size = 256;
5927ec681f3Smrg   const uint32_t tile_state_size = tiling->layers *
5937ec681f3Smrg                                    tiling->draw_tiles_x *
5947ec681f3Smrg                                    tiling->draw_tiles_y *
5957ec681f3Smrg                                    tsda_per_tile_size;
5967ec681f3Smrg   job->tile_state = v3dv_bo_alloc(job->device, tile_state_size, "TSDA", true);
5977ec681f3Smrg   if (!job->tile_state) {
5987ec681f3Smrg      v3dv_flag_oom(NULL, job);
5997ec681f3Smrg      return;
6007ec681f3Smrg   }
6017ec681f3Smrg
6027ec681f3Smrg   v3dv_job_add_bo_unchecked(job, job->tile_state);
6037ec681f3Smrg
6047ec681f3Smrg   v3dv_X(job->device, job_emit_binning_prolog)(job, tiling, layers);
6057ec681f3Smrg
6067ec681f3Smrg   job->ez_state = V3D_EZ_UNDECIDED;
6077ec681f3Smrg   job->first_ez_state = V3D_EZ_UNDECIDED;
6087ec681f3Smrg}
6097ec681f3Smrg
6107ec681f3Smrgstatic void
6117ec681f3Smrgcmd_buffer_end_render_pass_frame(struct v3dv_cmd_buffer *cmd_buffer)
6127ec681f3Smrg{
6137ec681f3Smrg   assert(cmd_buffer->state.job);
6147ec681f3Smrg
6157ec681f3Smrg   /* Typically, we have a single job for each subpass and we emit the job's RCL
6167ec681f3Smrg    * here when we are ending the frame for the subpass. However, some commands
6177ec681f3Smrg    * such as vkCmdClearAttachments need to run in their own separate job and
6187ec681f3Smrg    * they emit their own RCL even if they execute inside a subpass. In this
6197ec681f3Smrg    * scenario, we don't want to emit subpass RCL when we end the frame for
6207ec681f3Smrg    * those jobs, so we only emit the subpass RCL if the job has not recorded
6217ec681f3Smrg    * any RCL commands of its own.
6227ec681f3Smrg    */
6237ec681f3Smrg   if (v3dv_cl_offset(&cmd_buffer->state.job->rcl) == 0)
6247ec681f3Smrg      v3dv_X(cmd_buffer->device, cmd_buffer_emit_render_pass_rcl)(cmd_buffer);
6257ec681f3Smrg
6267ec681f3Smrg   v3dv_X(cmd_buffer->device, job_emit_binning_flush)(cmd_buffer->state.job);
6277ec681f3Smrg}
6287ec681f3Smrg
6297ec681f3Smrgstruct v3dv_job *
6307ec681f3Smrgv3dv_cmd_buffer_create_cpu_job(struct v3dv_device *device,
6317ec681f3Smrg                               enum v3dv_job_type type,
6327ec681f3Smrg                               struct v3dv_cmd_buffer *cmd_buffer,
6337ec681f3Smrg                               uint32_t subpass_idx)
6347ec681f3Smrg{
6357ec681f3Smrg   struct v3dv_job *job = vk_zalloc(&device->vk.alloc,
6367ec681f3Smrg                                    sizeof(struct v3dv_job), 8,
6377ec681f3Smrg                                    VK_SYSTEM_ALLOCATION_SCOPE_COMMAND);
6387ec681f3Smrg   if (!job) {
6397ec681f3Smrg      v3dv_flag_oom(cmd_buffer, NULL);
6407ec681f3Smrg      return NULL;
6417ec681f3Smrg   }
6427ec681f3Smrg
6437ec681f3Smrg   v3dv_job_init(job, type, device, cmd_buffer, subpass_idx);
6447ec681f3Smrg   return job;
6457ec681f3Smrg}
6467ec681f3Smrg
6477ec681f3Smrgstatic void
6487ec681f3Smrgcmd_buffer_add_cpu_jobs_for_pending_state(struct v3dv_cmd_buffer *cmd_buffer)
6497ec681f3Smrg{
6507ec681f3Smrg   struct v3dv_cmd_buffer_state *state = &cmd_buffer->state;
6517ec681f3Smrg
6527ec681f3Smrg   if (state->query.end.used_count > 0) {
6537ec681f3Smrg      const uint32_t query_count = state->query.end.used_count;
6547ec681f3Smrg      for (uint32_t i = 0; i < query_count; i++) {
6557ec681f3Smrg         assert(i < state->query.end.used_count);
6567ec681f3Smrg         struct v3dv_job *job =
6577ec681f3Smrg            v3dv_cmd_buffer_create_cpu_job(cmd_buffer->device,
6587ec681f3Smrg                                           V3DV_JOB_TYPE_CPU_END_QUERY,
6597ec681f3Smrg                                           cmd_buffer, -1);
6607ec681f3Smrg         v3dv_return_if_oom(cmd_buffer, NULL);
6617ec681f3Smrg
6627ec681f3Smrg         job->cpu.query_end = state->query.end.states[i];
6637ec681f3Smrg         list_addtail(&job->list_link, &cmd_buffer->jobs);
6647ec681f3Smrg      }
6657ec681f3Smrg   }
6667ec681f3Smrg}
6677ec681f3Smrg
6687ec681f3Smrgvoid
6697ec681f3Smrgv3dv_cmd_buffer_finish_job(struct v3dv_cmd_buffer *cmd_buffer)
6707ec681f3Smrg{
6717ec681f3Smrg   struct v3dv_job *job = cmd_buffer->state.job;
6727ec681f3Smrg   if (!job)
6737ec681f3Smrg      return;
6747ec681f3Smrg
6757ec681f3Smrg   if (cmd_buffer->state.oom) {
6767ec681f3Smrg      v3dv_job_destroy(job);
6777ec681f3Smrg      cmd_buffer->state.job = NULL;
6787ec681f3Smrg      return;
6797ec681f3Smrg   }
6807ec681f3Smrg
6817ec681f3Smrg   /* If we have created a job for a command buffer then we should have
6827ec681f3Smrg    * recorded something into it: if the job was started in a render pass, it
6837ec681f3Smrg    * should at least have the start frame commands, otherwise, it should have
6847ec681f3Smrg    * a transfer command. The only exception are secondary command buffers
6857ec681f3Smrg    * inside a render pass.
6867ec681f3Smrg    */
6877ec681f3Smrg   assert(cmd_buffer->level == VK_COMMAND_BUFFER_LEVEL_SECONDARY ||
6887ec681f3Smrg          v3dv_cl_offset(&job->bcl) > 0);
6897ec681f3Smrg
6907ec681f3Smrg   /* When we merge multiple subpasses into the same job we must only emit one
6917ec681f3Smrg    * RCL, so we do that here, when we decided that we need to finish the job.
6927ec681f3Smrg    * Any rendering that happens outside a render pass is never merged, so
6937ec681f3Smrg    * the RCL should have been emitted by the time we got here.
6947ec681f3Smrg    */
6957ec681f3Smrg   assert(v3dv_cl_offset(&job->rcl) != 0 || cmd_buffer->state.pass);
6967ec681f3Smrg
6977ec681f3Smrg   /* If we are finishing a job inside a render pass we have two scenarios:
6987ec681f3Smrg    *
6997ec681f3Smrg    * 1. It is a regular CL, in which case we will submit the job to the GPU,
7007ec681f3Smrg    *    so we may need to generate an RCL and add a binning flush.
7017ec681f3Smrg    *
7027ec681f3Smrg    * 2. It is a partial CL recorded in a secondary command buffer, in which
7037ec681f3Smrg    *    case we are not submitting it directly to the GPU but rather branch to
7047ec681f3Smrg    *    it from a primary command buffer. In this case we just want to end
7057ec681f3Smrg    *    the BCL with a RETURN_FROM_SUB_LIST and the RCL and binning flush
7067ec681f3Smrg    *    will be the primary job that branches to this CL.
7077ec681f3Smrg    */
7087ec681f3Smrg   if (cmd_buffer->state.pass) {
7097ec681f3Smrg      if (job->type == V3DV_JOB_TYPE_GPU_CL) {
7107ec681f3Smrg         cmd_buffer_end_render_pass_frame(cmd_buffer);
7117ec681f3Smrg      } else {
7127ec681f3Smrg         assert(job->type == V3DV_JOB_TYPE_GPU_CL_SECONDARY);
7137ec681f3Smrg         v3dv_X(cmd_buffer->device, cmd_buffer_end_render_pass_secondary)(cmd_buffer);
7147ec681f3Smrg      }
7157ec681f3Smrg   }
7167ec681f3Smrg
7177ec681f3Smrg   list_addtail(&job->list_link, &cmd_buffer->jobs);
7187ec681f3Smrg   cmd_buffer->state.job = NULL;
7197ec681f3Smrg
7207ec681f3Smrg   /* If we have recorded any state with this last GPU job that requires to
7217ec681f3Smrg    * emit CPU jobs after the job is completed, add them now. The only
7227ec681f3Smrg    * exception is secondary command buffers inside a render pass, because in
7237ec681f3Smrg    * that case we want to defer this until we finish recording the primary
7247ec681f3Smrg    * job into which we execute the secondary.
7257ec681f3Smrg    */
7267ec681f3Smrg   if (cmd_buffer->level == VK_COMMAND_BUFFER_LEVEL_PRIMARY ||
7277ec681f3Smrg       !cmd_buffer->state.pass) {
7287ec681f3Smrg      cmd_buffer_add_cpu_jobs_for_pending_state(cmd_buffer);
7297ec681f3Smrg   }
7307ec681f3Smrg}
7317ec681f3Smrg
7327ec681f3Smrgstatic bool
7337ec681f3Smrgjob_type_is_gpu(struct v3dv_job *job)
7347ec681f3Smrg{
7357ec681f3Smrg   switch (job->type) {
7367ec681f3Smrg   case V3DV_JOB_TYPE_GPU_CL:
7377ec681f3Smrg   case V3DV_JOB_TYPE_GPU_CL_SECONDARY:
7387ec681f3Smrg   case V3DV_JOB_TYPE_GPU_TFU:
7397ec681f3Smrg   case V3DV_JOB_TYPE_GPU_CSD:
7407ec681f3Smrg      return true;
7417ec681f3Smrg   default:
7427ec681f3Smrg      return false;
7437ec681f3Smrg   }
7447ec681f3Smrg}
7457ec681f3Smrg
7467ec681f3Smrgstatic void
7477ec681f3Smrgcmd_buffer_serialize_job_if_needed(struct v3dv_cmd_buffer *cmd_buffer,
7487ec681f3Smrg                                   struct v3dv_job *job)
7497ec681f3Smrg{
7507ec681f3Smrg   assert(cmd_buffer && job);
7517ec681f3Smrg
7527ec681f3Smrg   if (!cmd_buffer->state.has_barrier)
7537ec681f3Smrg      return;
7547ec681f3Smrg
7557ec681f3Smrg   /* Serialization only affects GPU jobs, CPU jobs are always automatically
7567ec681f3Smrg    * serialized.
7577ec681f3Smrg    */
7587ec681f3Smrg   if (!job_type_is_gpu(job))
7597ec681f3Smrg      return;
7607ec681f3Smrg
7617ec681f3Smrg   job->serialize = true;
7627ec681f3Smrg   if (cmd_buffer->state.has_bcl_barrier &&
7637ec681f3Smrg       (job->type == V3DV_JOB_TYPE_GPU_CL ||
7647ec681f3Smrg        job->type == V3DV_JOB_TYPE_GPU_CL_SECONDARY)) {
7657ec681f3Smrg      job->needs_bcl_sync = true;
7667ec681f3Smrg   }
7677ec681f3Smrg
7687ec681f3Smrg   cmd_buffer->state.has_barrier = false;
7697ec681f3Smrg   cmd_buffer->state.has_bcl_barrier = false;
7707ec681f3Smrg}
7717ec681f3Smrg
7727ec681f3Smrgvoid
7737ec681f3Smrgv3dv_job_init(struct v3dv_job *job,
7747ec681f3Smrg              enum v3dv_job_type type,
7757ec681f3Smrg              struct v3dv_device *device,
7767ec681f3Smrg              struct v3dv_cmd_buffer *cmd_buffer,
7777ec681f3Smrg              int32_t subpass_idx)
7787ec681f3Smrg{
7797ec681f3Smrg   assert(job);
7807ec681f3Smrg
7817ec681f3Smrg   /* Make sure we haven't made this new job current before calling here */
7827ec681f3Smrg   assert(!cmd_buffer || cmd_buffer->state.job != job);
7837ec681f3Smrg
7847ec681f3Smrg   job->type = type;
7857ec681f3Smrg
7867ec681f3Smrg   job->device = device;
7877ec681f3Smrg   job->cmd_buffer = cmd_buffer;
7887ec681f3Smrg
7897ec681f3Smrg   list_inithead(&job->list_link);
7907ec681f3Smrg
7917ec681f3Smrg   if (type == V3DV_JOB_TYPE_GPU_CL ||
7927ec681f3Smrg       type == V3DV_JOB_TYPE_GPU_CL_SECONDARY ||
7937ec681f3Smrg       type == V3DV_JOB_TYPE_GPU_CSD) {
7947ec681f3Smrg      job->bos =
7957ec681f3Smrg         _mesa_set_create(NULL, _mesa_hash_pointer, _mesa_key_pointer_equal);
7967ec681f3Smrg      job->bo_count = 0;
7977ec681f3Smrg
7987ec681f3Smrg      v3dv_cl_init(job, &job->indirect);
7997ec681f3Smrg
8007ec681f3Smrg      if (unlikely(V3D_DEBUG & V3D_DEBUG_ALWAYS_FLUSH))
8017ec681f3Smrg         job->always_flush = true;
8027ec681f3Smrg   }
8037ec681f3Smrg
8047ec681f3Smrg   if (type == V3DV_JOB_TYPE_GPU_CL ||
8057ec681f3Smrg       type == V3DV_JOB_TYPE_GPU_CL_SECONDARY) {
8067ec681f3Smrg      v3dv_cl_init(job, &job->bcl);
8077ec681f3Smrg      v3dv_cl_init(job, &job->rcl);
8087ec681f3Smrg   }
8097ec681f3Smrg
8107ec681f3Smrg   if (cmd_buffer) {
8117ec681f3Smrg      /* Flag all state as dirty. Generally, we need to re-emit state for each
8127ec681f3Smrg       * new job.
8137ec681f3Smrg       *
8147ec681f3Smrg       * FIXME: there may be some exceptions, in which case we could skip some
8157ec681f3Smrg       * bits.
8167ec681f3Smrg       */
8177ec681f3Smrg      cmd_buffer->state.dirty = ~0;
8187ec681f3Smrg      cmd_buffer->state.dirty_descriptor_stages = ~0;
8197ec681f3Smrg
8207ec681f3Smrg      /* Honor inheritance of occlussion queries in secondaries if requested */
8217ec681f3Smrg      if (cmd_buffer->level == VK_COMMAND_BUFFER_LEVEL_SECONDARY &&
8227ec681f3Smrg          cmd_buffer->state.inheritance.occlusion_query_enable) {
8237ec681f3Smrg         cmd_buffer->state.dirty &= ~V3DV_CMD_DIRTY_OCCLUSION_QUERY;
8247ec681f3Smrg      }
8257ec681f3Smrg
8267ec681f3Smrg      /* Keep track of the first subpass that we are recording in this new job.
8277ec681f3Smrg       * We will use this when we emit the RCL to decide how to emit our loads
8287ec681f3Smrg       * and stores.
8297ec681f3Smrg       */
8307ec681f3Smrg      if (cmd_buffer->state.pass)
8317ec681f3Smrg         job->first_subpass = subpass_idx;
8327ec681f3Smrg
8337ec681f3Smrg      cmd_buffer_serialize_job_if_needed(cmd_buffer, job);
8347ec681f3Smrg   }
8357ec681f3Smrg}
8367ec681f3Smrg
8377ec681f3Smrgstruct v3dv_job *
8387ec681f3Smrgv3dv_cmd_buffer_start_job(struct v3dv_cmd_buffer *cmd_buffer,
8397ec681f3Smrg                          int32_t subpass_idx,
8407ec681f3Smrg                          enum v3dv_job_type type)
8417ec681f3Smrg{
8427ec681f3Smrg   /* Don't create a new job if we can merge the current subpass into
8437ec681f3Smrg    * the current job.
8447ec681f3Smrg    */
8457ec681f3Smrg   if (cmd_buffer->state.pass &&
8467ec681f3Smrg       subpass_idx != -1 &&
8477ec681f3Smrg       cmd_buffer_can_merge_subpass(cmd_buffer, subpass_idx)) {
8487ec681f3Smrg      cmd_buffer->state.job->is_subpass_finish = false;
8497ec681f3Smrg      return cmd_buffer->state.job;
8507ec681f3Smrg   }
8517ec681f3Smrg
8527ec681f3Smrg   /* Ensure we are not starting a new job without finishing a previous one */
8537ec681f3Smrg   if (cmd_buffer->state.job != NULL)
8547ec681f3Smrg      v3dv_cmd_buffer_finish_job(cmd_buffer);
8557ec681f3Smrg
8567ec681f3Smrg   assert(cmd_buffer->state.job == NULL);
8577ec681f3Smrg   struct v3dv_job *job = vk_zalloc(&cmd_buffer->device->vk.alloc,
8587ec681f3Smrg                                    sizeof(struct v3dv_job), 8,
8597ec681f3Smrg                                    VK_SYSTEM_ALLOCATION_SCOPE_COMMAND);
8607ec681f3Smrg
8617ec681f3Smrg   if (!job) {
8627ec681f3Smrg      fprintf(stderr, "Error: failed to allocate CPU memory for job\n");
8637ec681f3Smrg      v3dv_flag_oom(cmd_buffer, NULL);
8647ec681f3Smrg      return NULL;
8657ec681f3Smrg   }
8667ec681f3Smrg
8677ec681f3Smrg   v3dv_job_init(job, type, cmd_buffer->device, cmd_buffer, subpass_idx);
8687ec681f3Smrg   cmd_buffer->state.job = job;
8697ec681f3Smrg
8707ec681f3Smrg   return job;
8717ec681f3Smrg}
8727ec681f3Smrg
8737ec681f3Smrgstatic VkResult
8747ec681f3Smrgcmd_buffer_reset(struct v3dv_cmd_buffer *cmd_buffer,
8757ec681f3Smrg                 VkCommandBufferResetFlags flags)
8767ec681f3Smrg{
8777ec681f3Smrg   vk_command_buffer_reset(&cmd_buffer->vk);
8787ec681f3Smrg   if (cmd_buffer->status != V3DV_CMD_BUFFER_STATUS_INITIALIZED) {
8797ec681f3Smrg      struct v3dv_device *device = cmd_buffer->device;
8807ec681f3Smrg      struct v3dv_cmd_pool *pool = cmd_buffer->pool;
8817ec681f3Smrg      VkCommandBufferLevel level = cmd_buffer->level;
8827ec681f3Smrg
8837ec681f3Smrg      /* cmd_buffer_init below will re-add the command buffer to the pool
8847ec681f3Smrg       * so remove it here so we don't end up adding it again.
8857ec681f3Smrg       */
8867ec681f3Smrg      list_del(&cmd_buffer->pool_link);
8877ec681f3Smrg
8887ec681f3Smrg      /* FIXME: For now we always free all resources as if
8897ec681f3Smrg       * VK_COMMAND_BUFFER_RESET_RELEASE_RESOURCES_BIT was set.
8907ec681f3Smrg       */
8917ec681f3Smrg      if (cmd_buffer->status != V3DV_CMD_BUFFER_STATUS_NEW)
8927ec681f3Smrg         cmd_buffer_free_resources(cmd_buffer);
8937ec681f3Smrg
8947ec681f3Smrg      cmd_buffer_init(cmd_buffer, device, pool, level);
8957ec681f3Smrg   }
8967ec681f3Smrg
8977ec681f3Smrg   assert(cmd_buffer->status == V3DV_CMD_BUFFER_STATUS_INITIALIZED);
8987ec681f3Smrg   return VK_SUCCESS;
8997ec681f3Smrg}
9007ec681f3Smrg
9017ec681f3SmrgVKAPI_ATTR VkResult VKAPI_CALL
9027ec681f3Smrgv3dv_AllocateCommandBuffers(VkDevice _device,
9037ec681f3Smrg                            const VkCommandBufferAllocateInfo *pAllocateInfo,
9047ec681f3Smrg                            VkCommandBuffer *pCommandBuffers)
9057ec681f3Smrg{
9067ec681f3Smrg   V3DV_FROM_HANDLE(v3dv_device, device, _device);
9077ec681f3Smrg   V3DV_FROM_HANDLE(v3dv_cmd_pool, pool, pAllocateInfo->commandPool);
9087ec681f3Smrg
9097ec681f3Smrg   VkResult result = VK_SUCCESS;
9107ec681f3Smrg   uint32_t i;
9117ec681f3Smrg
9127ec681f3Smrg   for (i = 0; i < pAllocateInfo->commandBufferCount; i++) {
9137ec681f3Smrg      result = cmd_buffer_create(device, pool, pAllocateInfo->level,
9147ec681f3Smrg                                 &pCommandBuffers[i]);
9157ec681f3Smrg      if (result != VK_SUCCESS)
9167ec681f3Smrg         break;
9177ec681f3Smrg   }
9187ec681f3Smrg
9197ec681f3Smrg   if (result != VK_SUCCESS) {
9207ec681f3Smrg      v3dv_FreeCommandBuffers(_device, pAllocateInfo->commandPool,
9217ec681f3Smrg                              i, pCommandBuffers);
9227ec681f3Smrg      for (i = 0; i < pAllocateInfo->commandBufferCount; i++)
9237ec681f3Smrg         pCommandBuffers[i] = VK_NULL_HANDLE;
9247ec681f3Smrg   }
9257ec681f3Smrg
9267ec681f3Smrg   return result;
9277ec681f3Smrg}
9287ec681f3Smrg
9297ec681f3SmrgVKAPI_ATTR void VKAPI_CALL
9307ec681f3Smrgv3dv_FreeCommandBuffers(VkDevice device,
9317ec681f3Smrg                        VkCommandPool commandPool,
9327ec681f3Smrg                        uint32_t commandBufferCount,
9337ec681f3Smrg                        const VkCommandBuffer *pCommandBuffers)
9347ec681f3Smrg{
9357ec681f3Smrg   for (uint32_t i = 0; i < commandBufferCount; i++) {
9367ec681f3Smrg      V3DV_FROM_HANDLE(v3dv_cmd_buffer, cmd_buffer, pCommandBuffers[i]);
9377ec681f3Smrg
9387ec681f3Smrg      if (!cmd_buffer)
9397ec681f3Smrg         continue;
9407ec681f3Smrg
9417ec681f3Smrg      cmd_buffer_destroy(cmd_buffer);
9427ec681f3Smrg   }
9437ec681f3Smrg}
9447ec681f3Smrg
9457ec681f3SmrgVKAPI_ATTR void VKAPI_CALL
9467ec681f3Smrgv3dv_DestroyCommandPool(VkDevice _device,
9477ec681f3Smrg                        VkCommandPool commandPool,
9487ec681f3Smrg                        const VkAllocationCallbacks *pAllocator)
9497ec681f3Smrg{
9507ec681f3Smrg   V3DV_FROM_HANDLE(v3dv_device, device, _device);
9517ec681f3Smrg   V3DV_FROM_HANDLE(v3dv_cmd_pool, pool, commandPool);
9527ec681f3Smrg
9537ec681f3Smrg   if (!pool)
9547ec681f3Smrg      return;
9557ec681f3Smrg
9567ec681f3Smrg   list_for_each_entry_safe(struct v3dv_cmd_buffer, cmd_buffer,
9577ec681f3Smrg                            &pool->cmd_buffers, pool_link) {
9587ec681f3Smrg      cmd_buffer_destroy(cmd_buffer);
9597ec681f3Smrg   }
9607ec681f3Smrg
9617ec681f3Smrg   vk_object_free(&device->vk, pAllocator, pool);
9627ec681f3Smrg}
9637ec681f3Smrg
9647ec681f3SmrgVKAPI_ATTR void VKAPI_CALL
9657ec681f3Smrgv3dv_TrimCommandPool(VkDevice device,
9667ec681f3Smrg                     VkCommandPool commandPool,
9677ec681f3Smrg                     VkCommandPoolTrimFlags flags)
9687ec681f3Smrg{
9697ec681f3Smrg   /* We don't need to do anything here, our command pools never hold on to
9707ec681f3Smrg    * any resources from command buffers that are freed or reset.
9717ec681f3Smrg    */
9727ec681f3Smrg}
9737ec681f3Smrg
9747ec681f3Smrg
9757ec681f3Smrgstatic void
9767ec681f3Smrgcmd_buffer_subpass_handle_pending_resolves(struct v3dv_cmd_buffer *cmd_buffer)
9777ec681f3Smrg{
9787ec681f3Smrg   assert(cmd_buffer->state.subpass_idx < cmd_buffer->state.pass->subpass_count);
9797ec681f3Smrg   const struct v3dv_render_pass *pass = cmd_buffer->state.pass;
9807ec681f3Smrg   const struct v3dv_subpass *subpass =
9817ec681f3Smrg      &pass->subpasses[cmd_buffer->state.subpass_idx];
9827ec681f3Smrg
9837ec681f3Smrg   if (!subpass->resolve_attachments)
9847ec681f3Smrg      return;
9857ec681f3Smrg
9867ec681f3Smrg   struct v3dv_framebuffer *fb = cmd_buffer->state.framebuffer;
9877ec681f3Smrg
9887ec681f3Smrg   /* At this point we have already ended the current subpass and now we are
9897ec681f3Smrg    * about to emit vkCmdResolveImage calls to get the resolves we can't handle
9907ec681f3Smrg    * handle in the subpass RCL.
9917ec681f3Smrg    *
9927ec681f3Smrg    * vkCmdResolveImage is not supposed to be called inside a render pass so
9937ec681f3Smrg    * before we call that we need to make sure our command buffer state reflects
9947ec681f3Smrg    * that we are no longer in a subpass by finishing the current job and
9957ec681f3Smrg    * resetting the framebuffer and render pass state temporarily and then
9967ec681f3Smrg    * restoring it after we are done with the resolves.
9977ec681f3Smrg    */
9987ec681f3Smrg   if (cmd_buffer->state.job)
9997ec681f3Smrg      v3dv_cmd_buffer_finish_job(cmd_buffer);
10007ec681f3Smrg   struct v3dv_framebuffer *restore_fb = cmd_buffer->state.framebuffer;
10017ec681f3Smrg   struct v3dv_render_pass *restore_pass = cmd_buffer->state.pass;
10027ec681f3Smrg   uint32_t restore_subpass_idx = cmd_buffer->state.subpass_idx;
10037ec681f3Smrg   cmd_buffer->state.framebuffer = NULL;
10047ec681f3Smrg   cmd_buffer->state.pass = NULL;
10057ec681f3Smrg   cmd_buffer->state.subpass_idx = -1;
10067ec681f3Smrg
10077ec681f3Smrg   VkCommandBuffer cmd_buffer_handle = v3dv_cmd_buffer_to_handle(cmd_buffer);
10087ec681f3Smrg   for (uint32_t i = 0; i < subpass->color_count; i++) {
10097ec681f3Smrg      const uint32_t src_attachment_idx =
10107ec681f3Smrg         subpass->color_attachments[i].attachment;
10117ec681f3Smrg      if (src_attachment_idx == VK_ATTACHMENT_UNUSED)
10127ec681f3Smrg         continue;
10137ec681f3Smrg
10147ec681f3Smrg      if (pass->attachments[src_attachment_idx].use_tlb_resolve)
10157ec681f3Smrg         continue;
10167ec681f3Smrg
10177ec681f3Smrg      const uint32_t dst_attachment_idx =
10187ec681f3Smrg         subpass->resolve_attachments[i].attachment;
10197ec681f3Smrg      if (dst_attachment_idx == VK_ATTACHMENT_UNUSED)
10207ec681f3Smrg         continue;
10217ec681f3Smrg
10227ec681f3Smrg      struct v3dv_image_view *src_iview = fb->attachments[src_attachment_idx];
10237ec681f3Smrg      struct v3dv_image_view *dst_iview = fb->attachments[dst_attachment_idx];
10247ec681f3Smrg
10257ec681f3Smrg      VkImageResolve2KHR region = {
10267ec681f3Smrg         .sType = VK_STRUCTURE_TYPE_IMAGE_RESOLVE_2_KHR,
10277ec681f3Smrg         .srcSubresource = {
10287ec681f3Smrg            VK_IMAGE_ASPECT_COLOR_BIT,
10297ec681f3Smrg            src_iview->vk.base_mip_level,
10307ec681f3Smrg            src_iview->vk.base_array_layer,
10317ec681f3Smrg            src_iview->vk.layer_count,
10327ec681f3Smrg         },
10337ec681f3Smrg         .srcOffset = { 0, 0, 0 },
10347ec681f3Smrg         .dstSubresource =  {
10357ec681f3Smrg            VK_IMAGE_ASPECT_COLOR_BIT,
10367ec681f3Smrg            dst_iview->vk.base_mip_level,
10377ec681f3Smrg            dst_iview->vk.base_array_layer,
10387ec681f3Smrg            dst_iview->vk.layer_count,
10397ec681f3Smrg         },
10407ec681f3Smrg         .dstOffset = { 0, 0, 0 },
10417ec681f3Smrg         .extent = src_iview->vk.image->extent,
10427ec681f3Smrg      };
10437ec681f3Smrg
10447ec681f3Smrg      struct v3dv_image *src_image = (struct v3dv_image *) src_iview->vk.image;
10457ec681f3Smrg      struct v3dv_image *dst_image = (struct v3dv_image *) dst_iview->vk.image;
10467ec681f3Smrg      VkResolveImageInfo2KHR resolve_info = {
10477ec681f3Smrg         .sType = VK_STRUCTURE_TYPE_RESOLVE_IMAGE_INFO_2_KHR,
10487ec681f3Smrg         .srcImage = v3dv_image_to_handle(src_image),
10497ec681f3Smrg         .srcImageLayout = VK_IMAGE_LAYOUT_GENERAL,
10507ec681f3Smrg         .dstImage = v3dv_image_to_handle(dst_image),
10517ec681f3Smrg         .dstImageLayout = VK_IMAGE_LAYOUT_GENERAL,
10527ec681f3Smrg         .regionCount = 1,
10537ec681f3Smrg         .pRegions = &region,
10547ec681f3Smrg      };
10557ec681f3Smrg      v3dv_CmdResolveImage2KHR(cmd_buffer_handle, &resolve_info);
10567ec681f3Smrg   }
10577ec681f3Smrg
10587ec681f3Smrg   cmd_buffer->state.framebuffer = restore_fb;
10597ec681f3Smrg   cmd_buffer->state.pass = restore_pass;
10607ec681f3Smrg   cmd_buffer->state.subpass_idx = restore_subpass_idx;
10617ec681f3Smrg}
10627ec681f3Smrg
10637ec681f3Smrgstatic VkResult
10647ec681f3Smrgcmd_buffer_begin_render_pass_secondary(
10657ec681f3Smrg   struct v3dv_cmd_buffer *cmd_buffer,
10667ec681f3Smrg   const VkCommandBufferInheritanceInfo *inheritance_info)
10677ec681f3Smrg{
10687ec681f3Smrg   assert(cmd_buffer->level == VK_COMMAND_BUFFER_LEVEL_SECONDARY);
10697ec681f3Smrg   assert(cmd_buffer->usage_flags & VK_COMMAND_BUFFER_USAGE_RENDER_PASS_CONTINUE_BIT);
10707ec681f3Smrg   assert(inheritance_info);
10717ec681f3Smrg
10727ec681f3Smrg   cmd_buffer->state.pass =
10737ec681f3Smrg      v3dv_render_pass_from_handle(inheritance_info->renderPass);
10747ec681f3Smrg   assert(cmd_buffer->state.pass);
10757ec681f3Smrg
10767ec681f3Smrg   cmd_buffer->state.framebuffer =
10777ec681f3Smrg      v3dv_framebuffer_from_handle(inheritance_info->framebuffer);
10787ec681f3Smrg
10797ec681f3Smrg   assert(inheritance_info->subpass < cmd_buffer->state.pass->subpass_count);
10807ec681f3Smrg   cmd_buffer->state.subpass_idx = inheritance_info->subpass;
10817ec681f3Smrg
10827ec681f3Smrg   cmd_buffer->state.inheritance.occlusion_query_enable =
10837ec681f3Smrg      inheritance_info->occlusionQueryEnable;
10847ec681f3Smrg
10857ec681f3Smrg   /* Secondaries that execute inside a render pass won't start subpasses
10867ec681f3Smrg    * so we want to create a job for them here.
10877ec681f3Smrg    */
10887ec681f3Smrg   struct v3dv_job *job =
10897ec681f3Smrg      v3dv_cmd_buffer_start_job(cmd_buffer, inheritance_info->subpass,
10907ec681f3Smrg                                V3DV_JOB_TYPE_GPU_CL_SECONDARY);
10917ec681f3Smrg   if (!job) {
10927ec681f3Smrg      v3dv_flag_oom(cmd_buffer, NULL);
10937ec681f3Smrg      return VK_ERROR_OUT_OF_HOST_MEMORY;
10947ec681f3Smrg   }
10957ec681f3Smrg
10967ec681f3Smrg   /* Secondary command buffers don't know about the render area, but our
10977ec681f3Smrg    * scissor setup accounts for it, so let's make sure we make it large
10987ec681f3Smrg    * enough that it doesn't actually constrain any rendering. This should
10997ec681f3Smrg    * be fine, since the Vulkan spec states:
11007ec681f3Smrg    *
11017ec681f3Smrg    *    "The application must ensure (using scissor if necessary) that all
11027ec681f3Smrg    *     rendering is contained within the render area."
11037ec681f3Smrg    *
11047ec681f3Smrg    * FIXME: setup constants for the max framebuffer dimensions and use them
11057ec681f3Smrg    * here and when filling in VkPhysicalDeviceLimits.
11067ec681f3Smrg    */
11077ec681f3Smrg   const struct v3dv_framebuffer *framebuffer = cmd_buffer->state.framebuffer;
11087ec681f3Smrg   cmd_buffer->state.render_area.offset.x = 0;
11097ec681f3Smrg   cmd_buffer->state.render_area.offset.y = 0;
11107ec681f3Smrg   cmd_buffer->state.render_area.extent.width =
11117ec681f3Smrg      framebuffer ? framebuffer->width : 4096;
11127ec681f3Smrg   cmd_buffer->state.render_area.extent.height =
11137ec681f3Smrg      framebuffer ? framebuffer->height : 4096;
11147ec681f3Smrg
11157ec681f3Smrg   return VK_SUCCESS;
11167ec681f3Smrg}
11177ec681f3Smrg
11187ec681f3SmrgVKAPI_ATTR VkResult VKAPI_CALL
11197ec681f3Smrgv3dv_BeginCommandBuffer(VkCommandBuffer commandBuffer,
11207ec681f3Smrg                        const VkCommandBufferBeginInfo *pBeginInfo)
11217ec681f3Smrg{
11227ec681f3Smrg   V3DV_FROM_HANDLE(v3dv_cmd_buffer, cmd_buffer, commandBuffer);
11237ec681f3Smrg
11247ec681f3Smrg   /* If this is the first vkBeginCommandBuffer, we must initialize the
11257ec681f3Smrg    * command buffer's state. Otherwise, we must reset its state. In both
11267ec681f3Smrg    * cases we reset it.
11277ec681f3Smrg    */
11287ec681f3Smrg   VkResult result = cmd_buffer_reset(cmd_buffer, 0);
11297ec681f3Smrg   if (result != VK_SUCCESS)
11307ec681f3Smrg      return result;
11317ec681f3Smrg
11327ec681f3Smrg   assert(cmd_buffer->status == V3DV_CMD_BUFFER_STATUS_INITIALIZED);
11337ec681f3Smrg
11347ec681f3Smrg   cmd_buffer->usage_flags = pBeginInfo->flags;
11357ec681f3Smrg
11367ec681f3Smrg   if (cmd_buffer->level == VK_COMMAND_BUFFER_LEVEL_SECONDARY) {
11377ec681f3Smrg      if (pBeginInfo->flags & VK_COMMAND_BUFFER_USAGE_RENDER_PASS_CONTINUE_BIT) {
11387ec681f3Smrg         result =
11397ec681f3Smrg            cmd_buffer_begin_render_pass_secondary(cmd_buffer,
11407ec681f3Smrg                                                   pBeginInfo->pInheritanceInfo);
11417ec681f3Smrg         if (result != VK_SUCCESS)
11427ec681f3Smrg            return result;
11437ec681f3Smrg      }
11447ec681f3Smrg   }
11457ec681f3Smrg
11467ec681f3Smrg   cmd_buffer->status = V3DV_CMD_BUFFER_STATUS_RECORDING;
11477ec681f3Smrg
11487ec681f3Smrg   return VK_SUCCESS;
11497ec681f3Smrg}
11507ec681f3Smrg
11517ec681f3SmrgVKAPI_ATTR VkResult VKAPI_CALL
11527ec681f3Smrgv3dv_ResetCommandBuffer(VkCommandBuffer commandBuffer,
11537ec681f3Smrg                        VkCommandBufferResetFlags flags)
11547ec681f3Smrg{
11557ec681f3Smrg   V3DV_FROM_HANDLE(v3dv_cmd_buffer, cmd_buffer, commandBuffer);
11567ec681f3Smrg   return cmd_buffer_reset(cmd_buffer, flags);
11577ec681f3Smrg}
11587ec681f3Smrg
11597ec681f3SmrgVKAPI_ATTR VkResult VKAPI_CALL
11607ec681f3Smrgv3dv_ResetCommandPool(VkDevice device,
11617ec681f3Smrg                      VkCommandPool commandPool,
11627ec681f3Smrg                      VkCommandPoolResetFlags flags)
11637ec681f3Smrg{
11647ec681f3Smrg   V3DV_FROM_HANDLE(v3dv_cmd_pool, pool, commandPool);
11657ec681f3Smrg
11667ec681f3Smrg   VkCommandBufferResetFlags reset_flags = 0;
11677ec681f3Smrg   if (flags & VK_COMMAND_POOL_RESET_RELEASE_RESOURCES_BIT)
11687ec681f3Smrg      reset_flags = VK_COMMAND_BUFFER_RESET_RELEASE_RESOURCES_BIT;
11697ec681f3Smrg   list_for_each_entry_safe(struct v3dv_cmd_buffer, cmd_buffer,
11707ec681f3Smrg                            &pool->cmd_buffers, pool_link) {
11717ec681f3Smrg      cmd_buffer_reset(cmd_buffer, reset_flags);
11727ec681f3Smrg   }
11737ec681f3Smrg
11747ec681f3Smrg   return VK_SUCCESS;
11757ec681f3Smrg}
11767ec681f3Smrg
11777ec681f3Smrgstatic void
11787ec681f3Smrgcmd_buffer_update_tile_alignment(struct v3dv_cmd_buffer *cmd_buffer)
11797ec681f3Smrg{
11807ec681f3Smrg   /* Render areas and scissor/viewport are only relevant inside render passes,
11817ec681f3Smrg    * otherwise we are dealing with transfer operations where these elements
11827ec681f3Smrg    * don't apply.
11837ec681f3Smrg    */
11847ec681f3Smrg   assert(cmd_buffer->state.pass);
11857ec681f3Smrg   const VkRect2D *rect = &cmd_buffer->state.render_area;
11867ec681f3Smrg
11877ec681f3Smrg   /* We should only call this at the beginning of a subpass so we should
11887ec681f3Smrg    * always have framebuffer information available.
11897ec681f3Smrg    */
11907ec681f3Smrg   assert(cmd_buffer->state.framebuffer);
11917ec681f3Smrg   cmd_buffer->state.tile_aligned_render_area =
11927ec681f3Smrg      v3dv_subpass_area_is_tile_aligned(cmd_buffer->device, rect,
11937ec681f3Smrg                                        cmd_buffer->state.framebuffer,
11947ec681f3Smrg                                        cmd_buffer->state.pass,
11957ec681f3Smrg                                        cmd_buffer->state.subpass_idx);
11967ec681f3Smrg
11977ec681f3Smrg   if (!cmd_buffer->state.tile_aligned_render_area) {
11987ec681f3Smrg      perf_debug("Render area for subpass %d of render pass %p doesn't "
11997ec681f3Smrg                 "match render pass granularity.\n",
12007ec681f3Smrg                 cmd_buffer->state.subpass_idx, cmd_buffer->state.pass);
12017ec681f3Smrg   }
12027ec681f3Smrg}
12037ec681f3Smrg
12047ec681f3Smrgstatic void
12057ec681f3Smrgcmd_buffer_state_set_attachment_clear_color(struct v3dv_cmd_buffer *cmd_buffer,
12067ec681f3Smrg                                            uint32_t attachment_idx,
12077ec681f3Smrg                                            const VkClearColorValue *color)
12087ec681f3Smrg{
12097ec681f3Smrg   assert(attachment_idx < cmd_buffer->state.pass->attachment_count);
12107ec681f3Smrg
12117ec681f3Smrg   const struct v3dv_render_pass_attachment *attachment =
12127ec681f3Smrg      &cmd_buffer->state.pass->attachments[attachment_idx];
12137ec681f3Smrg
12147ec681f3Smrg   uint32_t internal_type, internal_bpp;
12157ec681f3Smrg   const struct v3dv_format *format =
12167ec681f3Smrg      v3dv_X(cmd_buffer->device, get_format)(attachment->desc.format);
12177ec681f3Smrg
12187ec681f3Smrg   v3dv_X(cmd_buffer->device, get_internal_type_bpp_for_output_format)
12197ec681f3Smrg      (format->rt_type, &internal_type, &internal_bpp);
12207ec681f3Smrg
12217ec681f3Smrg   uint32_t internal_size = 4 << internal_bpp;
12227ec681f3Smrg
12237ec681f3Smrg   struct v3dv_cmd_buffer_attachment_state *attachment_state =
12247ec681f3Smrg      &cmd_buffer->state.attachments[attachment_idx];
12257ec681f3Smrg
12267ec681f3Smrg   v3dv_X(cmd_buffer->device, get_hw_clear_color)
12277ec681f3Smrg      (color, internal_type, internal_size, &attachment_state->clear_value.color[0]);
12287ec681f3Smrg
12297ec681f3Smrg   attachment_state->vk_clear_value.color = *color;
12307ec681f3Smrg}
12317ec681f3Smrg
12327ec681f3Smrgstatic void
12337ec681f3Smrgcmd_buffer_state_set_attachment_clear_depth_stencil(
12347ec681f3Smrg   struct v3dv_cmd_buffer *cmd_buffer,
12357ec681f3Smrg   uint32_t attachment_idx,
12367ec681f3Smrg   bool clear_depth, bool clear_stencil,
12377ec681f3Smrg   const VkClearDepthStencilValue *ds)
12387ec681f3Smrg{
12397ec681f3Smrg   struct v3dv_cmd_buffer_attachment_state *attachment_state =
12407ec681f3Smrg      &cmd_buffer->state.attachments[attachment_idx];
12417ec681f3Smrg
12427ec681f3Smrg   if (clear_depth)
12437ec681f3Smrg      attachment_state->clear_value.z = ds->depth;
12447ec681f3Smrg
12457ec681f3Smrg   if (clear_stencil)
12467ec681f3Smrg      attachment_state->clear_value.s = ds->stencil;
12477ec681f3Smrg
12487ec681f3Smrg   attachment_state->vk_clear_value.depthStencil = *ds;
12497ec681f3Smrg}
12507ec681f3Smrg
12517ec681f3Smrgstatic void
12527ec681f3Smrgcmd_buffer_state_set_clear_values(struct v3dv_cmd_buffer *cmd_buffer,
12537ec681f3Smrg                                  uint32_t count, const VkClearValue *values)
12547ec681f3Smrg{
12557ec681f3Smrg   struct v3dv_cmd_buffer_state *state = &cmd_buffer->state;
12567ec681f3Smrg   const struct v3dv_render_pass *pass = state->pass;
12577ec681f3Smrg
12587ec681f3Smrg   /* There could be less clear values than attachments in the render pass, in
12597ec681f3Smrg    * which case we only want to process as many as we have, or there could be
12607ec681f3Smrg    * more, in which case we want to ignore those for which we don't have a
12617ec681f3Smrg    * corresponding attachment.
12627ec681f3Smrg    */
12637ec681f3Smrg   count = MIN2(count, pass->attachment_count);
12647ec681f3Smrg   for (uint32_t i = 0; i < count; i++) {
12657ec681f3Smrg      const struct v3dv_render_pass_attachment *attachment =
12667ec681f3Smrg         &pass->attachments[i];
12677ec681f3Smrg
12687ec681f3Smrg      if (attachment->desc.loadOp != VK_ATTACHMENT_LOAD_OP_CLEAR)
12697ec681f3Smrg         continue;
12707ec681f3Smrg
12717ec681f3Smrg      VkImageAspectFlags aspects = vk_format_aspects(attachment->desc.format);
12727ec681f3Smrg      if (aspects & VK_IMAGE_ASPECT_COLOR_BIT) {
12737ec681f3Smrg         cmd_buffer_state_set_attachment_clear_color(cmd_buffer, i,
12747ec681f3Smrg                                                     &values[i].color);
12757ec681f3Smrg      } else if (aspects & (VK_IMAGE_ASPECT_DEPTH_BIT |
12767ec681f3Smrg                            VK_IMAGE_ASPECT_STENCIL_BIT)) {
12777ec681f3Smrg         cmd_buffer_state_set_attachment_clear_depth_stencil(
12787ec681f3Smrg            cmd_buffer, i,
12797ec681f3Smrg            aspects & VK_IMAGE_ASPECT_DEPTH_BIT,
12807ec681f3Smrg            aspects & VK_IMAGE_ASPECT_STENCIL_BIT,
12817ec681f3Smrg            &values[i].depthStencil);
12827ec681f3Smrg      }
12837ec681f3Smrg   }
12847ec681f3Smrg}
12857ec681f3Smrg
12867ec681f3Smrgstatic void
12877ec681f3Smrgcmd_buffer_init_render_pass_attachment_state(struct v3dv_cmd_buffer *cmd_buffer,
12887ec681f3Smrg                                             const VkRenderPassBeginInfo *pRenderPassBegin)
12897ec681f3Smrg{
12907ec681f3Smrg   cmd_buffer_state_set_clear_values(cmd_buffer,
12917ec681f3Smrg                                     pRenderPassBegin->clearValueCount,
12927ec681f3Smrg                                     pRenderPassBegin->pClearValues);
12937ec681f3Smrg}
12947ec681f3Smrg
12957ec681f3Smrgstatic void
12967ec681f3Smrgcmd_buffer_ensure_render_pass_attachment_state(struct v3dv_cmd_buffer *cmd_buffer)
12977ec681f3Smrg{
12987ec681f3Smrg   struct v3dv_cmd_buffer_state *state = &cmd_buffer->state;
12997ec681f3Smrg   const struct v3dv_render_pass *pass = state->pass;
13007ec681f3Smrg
13017ec681f3Smrg   if (state->attachment_alloc_count < pass->attachment_count) {
13027ec681f3Smrg      if (state->attachments > 0) {
13037ec681f3Smrg         assert(state->attachment_alloc_count > 0);
13047ec681f3Smrg         vk_free(&cmd_buffer->device->vk.alloc, state->attachments);
13057ec681f3Smrg      }
13067ec681f3Smrg
13077ec681f3Smrg      uint32_t size = sizeof(struct v3dv_cmd_buffer_attachment_state) *
13087ec681f3Smrg                      pass->attachment_count;
13097ec681f3Smrg      state->attachments = vk_zalloc(&cmd_buffer->device->vk.alloc, size, 8,
13107ec681f3Smrg                                     VK_SYSTEM_ALLOCATION_SCOPE_COMMAND);
13117ec681f3Smrg      if (!state->attachments) {
13127ec681f3Smrg         v3dv_flag_oom(cmd_buffer, NULL);
13137ec681f3Smrg         return;
13147ec681f3Smrg      }
13157ec681f3Smrg      state->attachment_alloc_count = pass->attachment_count;
13167ec681f3Smrg   }
13177ec681f3Smrg
13187ec681f3Smrg   assert(state->attachment_alloc_count >= pass->attachment_count);
13197ec681f3Smrg}
13207ec681f3Smrg
13217ec681f3SmrgVKAPI_ATTR void VKAPI_CALL
13227ec681f3Smrgv3dv_CmdBeginRenderPass(VkCommandBuffer commandBuffer,
13237ec681f3Smrg                        const VkRenderPassBeginInfo *pRenderPassBegin,
13247ec681f3Smrg                        VkSubpassContents contents)
13257ec681f3Smrg{
13267ec681f3Smrg   V3DV_FROM_HANDLE(v3dv_cmd_buffer, cmd_buffer, commandBuffer);
13277ec681f3Smrg   V3DV_FROM_HANDLE(v3dv_render_pass, pass, pRenderPassBegin->renderPass);
13287ec681f3Smrg   V3DV_FROM_HANDLE(v3dv_framebuffer, framebuffer, pRenderPassBegin->framebuffer);
13297ec681f3Smrg
13307ec681f3Smrg   struct v3dv_cmd_buffer_state *state = &cmd_buffer->state;
13317ec681f3Smrg   state->pass = pass;
13327ec681f3Smrg   state->framebuffer = framebuffer;
13337ec681f3Smrg
13347ec681f3Smrg   cmd_buffer_ensure_render_pass_attachment_state(cmd_buffer);
13357ec681f3Smrg   v3dv_return_if_oom(cmd_buffer, NULL);
13367ec681f3Smrg
13377ec681f3Smrg   cmd_buffer_init_render_pass_attachment_state(cmd_buffer, pRenderPassBegin);
13387ec681f3Smrg
13397ec681f3Smrg   state->render_area = pRenderPassBegin->renderArea;
13407ec681f3Smrg
13417ec681f3Smrg   /* If our render area is smaller than the current clip window we will have
13427ec681f3Smrg    * to emit a new clip window to constraint it to the render area.
13437ec681f3Smrg    */
13447ec681f3Smrg   uint32_t min_render_x = state->render_area.offset.x;
13457ec681f3Smrg   uint32_t min_render_y = state->render_area.offset.y;
13467ec681f3Smrg   uint32_t max_render_x = min_render_x + state->render_area.extent.width - 1;
13477ec681f3Smrg   uint32_t max_render_y = min_render_y + state->render_area.extent.height - 1;
13487ec681f3Smrg   uint32_t min_clip_x = state->clip_window.offset.x;
13497ec681f3Smrg   uint32_t min_clip_y = state->clip_window.offset.y;
13507ec681f3Smrg   uint32_t max_clip_x = min_clip_x + state->clip_window.extent.width - 1;
13517ec681f3Smrg   uint32_t max_clip_y = min_clip_y + state->clip_window.extent.height - 1;
13527ec681f3Smrg   if (min_render_x > min_clip_x || min_render_y > min_clip_y ||
13537ec681f3Smrg       max_render_x < max_clip_x || max_render_y < max_clip_y) {
13547ec681f3Smrg      state->dirty |= V3DV_CMD_DIRTY_SCISSOR;
13557ec681f3Smrg   }
13567ec681f3Smrg
13577ec681f3Smrg   /* Setup for first subpass */
13587ec681f3Smrg   v3dv_cmd_buffer_subpass_start(cmd_buffer, 0);
13597ec681f3Smrg}
13607ec681f3Smrg
13617ec681f3SmrgVKAPI_ATTR void VKAPI_CALL
13627ec681f3Smrgv3dv_CmdNextSubpass(VkCommandBuffer commandBuffer, VkSubpassContents contents)
13637ec681f3Smrg{
13647ec681f3Smrg   V3DV_FROM_HANDLE(v3dv_cmd_buffer, cmd_buffer, commandBuffer);
13657ec681f3Smrg
13667ec681f3Smrg   struct v3dv_cmd_buffer_state *state = &cmd_buffer->state;
13677ec681f3Smrg   assert(state->subpass_idx < state->pass->subpass_count - 1);
13687ec681f3Smrg
13697ec681f3Smrg   /* Finish the previous subpass */
13707ec681f3Smrg   v3dv_cmd_buffer_subpass_finish(cmd_buffer);
13717ec681f3Smrg   cmd_buffer_subpass_handle_pending_resolves(cmd_buffer);
13727ec681f3Smrg
13737ec681f3Smrg   /* Start the next subpass */
13747ec681f3Smrg   v3dv_cmd_buffer_subpass_start(cmd_buffer, state->subpass_idx + 1);
13757ec681f3Smrg}
13767ec681f3Smrg
13777ec681f3Smrgstatic void
13787ec681f3Smrgcmd_buffer_emit_subpass_clears(struct v3dv_cmd_buffer *cmd_buffer)
13797ec681f3Smrg{
13807ec681f3Smrg   assert(cmd_buffer->level == VK_COMMAND_BUFFER_LEVEL_PRIMARY);
13817ec681f3Smrg
13827ec681f3Smrg   assert(cmd_buffer->state.pass);
13837ec681f3Smrg   assert(cmd_buffer->state.subpass_idx < cmd_buffer->state.pass->subpass_count);
13847ec681f3Smrg   const struct v3dv_cmd_buffer_state *state = &cmd_buffer->state;
13857ec681f3Smrg   const struct v3dv_render_pass *pass = state->pass;
13867ec681f3Smrg   const struct v3dv_subpass *subpass = &pass->subpasses[state->subpass_idx];
13877ec681f3Smrg
13887ec681f3Smrg   /* We only need to emit subpass clears as draw calls when the render
13897ec681f3Smrg    * area is not aligned to tile boundaries or for GFXH-1461.
13907ec681f3Smrg    */
13917ec681f3Smrg   if (cmd_buffer->state.tile_aligned_render_area &&
13927ec681f3Smrg       !subpass->do_depth_clear_with_draw &&
13937ec681f3Smrg       !subpass->do_depth_clear_with_draw) {
13947ec681f3Smrg      return;
13957ec681f3Smrg   }
13967ec681f3Smrg
13977ec681f3Smrg   uint32_t att_count = 0;
13987ec681f3Smrg   VkClearAttachment atts[V3D_MAX_DRAW_BUFFERS + 1]; /* 4 color + D/S */
13997ec681f3Smrg
14007ec681f3Smrg   /* We only need to emit subpass clears as draw calls for color attachments
14017ec681f3Smrg    * if the render area is not aligned to tile boundaries.
14027ec681f3Smrg    */
14037ec681f3Smrg   if (!cmd_buffer->state.tile_aligned_render_area) {
14047ec681f3Smrg      for (uint32_t i = 0; i < subpass->color_count; i++) {
14057ec681f3Smrg         const uint32_t att_idx = subpass->color_attachments[i].attachment;
14067ec681f3Smrg         if (att_idx == VK_ATTACHMENT_UNUSED)
14077ec681f3Smrg            continue;
14087ec681f3Smrg
14097ec681f3Smrg         struct v3dv_render_pass_attachment *att = &pass->attachments[att_idx];
14107ec681f3Smrg         if (att->desc.loadOp != VK_ATTACHMENT_LOAD_OP_CLEAR)
14117ec681f3Smrg            continue;
14127ec681f3Smrg
14137ec681f3Smrg         if (state->subpass_idx != att->first_subpass)
14147ec681f3Smrg            continue;
14157ec681f3Smrg
14167ec681f3Smrg         atts[att_count].aspectMask = VK_IMAGE_ASPECT_COLOR_BIT;
14177ec681f3Smrg         atts[att_count].colorAttachment = i;
14187ec681f3Smrg         atts[att_count].clearValue = state->attachments[att_idx].vk_clear_value;
14197ec681f3Smrg         att_count++;
14207ec681f3Smrg      }
14217ec681f3Smrg   }
14227ec681f3Smrg
14237ec681f3Smrg   /* For D/S we may also need to emit a subpass clear for GFXH-1461 */
14247ec681f3Smrg   const uint32_t ds_att_idx = subpass->ds_attachment.attachment;
14257ec681f3Smrg   if (ds_att_idx != VK_ATTACHMENT_UNUSED) {
14267ec681f3Smrg      struct v3dv_render_pass_attachment *att = &pass->attachments[ds_att_idx];
14277ec681f3Smrg      if (state->subpass_idx == att->first_subpass) {
14287ec681f3Smrg         VkImageAspectFlags aspects = vk_format_aspects(att->desc.format);
14297ec681f3Smrg         if (att->desc.loadOp != VK_ATTACHMENT_LOAD_OP_CLEAR ||
14307ec681f3Smrg             (cmd_buffer->state.tile_aligned_render_area &&
14317ec681f3Smrg              !subpass->do_depth_clear_with_draw)) {
14327ec681f3Smrg            aspects &= ~VK_IMAGE_ASPECT_DEPTH_BIT;
14337ec681f3Smrg         }
14347ec681f3Smrg         if (att->desc.stencilLoadOp != VK_ATTACHMENT_LOAD_OP_CLEAR ||
14357ec681f3Smrg             (cmd_buffer->state.tile_aligned_render_area &&
14367ec681f3Smrg              !subpass->do_stencil_clear_with_draw)) {
14377ec681f3Smrg            aspects &= ~VK_IMAGE_ASPECT_STENCIL_BIT;
14387ec681f3Smrg         }
14397ec681f3Smrg         if (aspects) {
14407ec681f3Smrg            atts[att_count].aspectMask = aspects;
14417ec681f3Smrg            atts[att_count].colorAttachment = 0; /* Ignored */
14427ec681f3Smrg            atts[att_count].clearValue =
14437ec681f3Smrg               state->attachments[ds_att_idx].vk_clear_value;
14447ec681f3Smrg            att_count++;
14457ec681f3Smrg         }
14467ec681f3Smrg      }
14477ec681f3Smrg   }
14487ec681f3Smrg
14497ec681f3Smrg   if (att_count == 0)
14507ec681f3Smrg      return;
14517ec681f3Smrg
14527ec681f3Smrg   if (!cmd_buffer->state.tile_aligned_render_area) {
14537ec681f3Smrg      perf_debug("Render area doesn't match render pass granularity, falling "
14547ec681f3Smrg                 "back to vkCmdClearAttachments for "
14557ec681f3Smrg                 "VK_ATTACHMENT_LOAD_OP_CLEAR.\n");
14567ec681f3Smrg   } else if (subpass->do_depth_clear_with_draw ||
14577ec681f3Smrg              subpass->do_stencil_clear_with_draw) {
14587ec681f3Smrg      perf_debug("Subpass clears DEPTH but loads STENCIL (or viceversa), "
14597ec681f3Smrg                 "falling back to vkCmdClearAttachments for "
14607ec681f3Smrg                 "VK_ATTACHMENT_LOAD_OP_CLEAR.\n");
14617ec681f3Smrg   }
14627ec681f3Smrg
14637ec681f3Smrg   /* From the Vulkan 1.0 spec:
14647ec681f3Smrg    *
14657ec681f3Smrg    *    "VK_ATTACHMENT_LOAD_OP_CLEAR specifies that the contents within the
14667ec681f3Smrg    *     render area will be cleared to a uniform value, which is specified
14677ec681f3Smrg    *     when a render pass instance is begun."
14687ec681f3Smrg    *
14697ec681f3Smrg    * So the clear is only constrained by the render area and not by pipeline
14707ec681f3Smrg    * state such as scissor or viewport, these are the semantics of
14717ec681f3Smrg    * vkCmdClearAttachments as well.
14727ec681f3Smrg    */
14737ec681f3Smrg   VkCommandBuffer _cmd_buffer = v3dv_cmd_buffer_to_handle(cmd_buffer);
14747ec681f3Smrg   VkClearRect rect = {
14757ec681f3Smrg      .rect = state->render_area,
14767ec681f3Smrg      .baseArrayLayer = 0,
14777ec681f3Smrg      .layerCount = 1,
14787ec681f3Smrg   };
14797ec681f3Smrg   v3dv_CmdClearAttachments(_cmd_buffer, att_count, atts, 1, &rect);
14807ec681f3Smrg}
14817ec681f3Smrg
14827ec681f3Smrgstatic struct v3dv_job *
14837ec681f3Smrgcmd_buffer_subpass_create_job(struct v3dv_cmd_buffer *cmd_buffer,
14847ec681f3Smrg                              uint32_t subpass_idx,
14857ec681f3Smrg                              enum v3dv_job_type type)
14867ec681f3Smrg{
14877ec681f3Smrg   assert(type == V3DV_JOB_TYPE_GPU_CL ||
14887ec681f3Smrg          type == V3DV_JOB_TYPE_GPU_CL_SECONDARY);
14897ec681f3Smrg
14907ec681f3Smrg   struct v3dv_cmd_buffer_state *state = &cmd_buffer->state;
14917ec681f3Smrg   assert(subpass_idx < state->pass->subpass_count);
14927ec681f3Smrg
14937ec681f3Smrg   /* Starting a new job can trigger a finish of the current one, so don't
14947ec681f3Smrg    * change the command buffer state for the new job until we are done creating
14957ec681f3Smrg    * the new job.
14967ec681f3Smrg    */
14977ec681f3Smrg   struct v3dv_job *job =
14987ec681f3Smrg      v3dv_cmd_buffer_start_job(cmd_buffer, subpass_idx, type);
14997ec681f3Smrg   if (!job)
15007ec681f3Smrg      return NULL;
15017ec681f3Smrg
15027ec681f3Smrg   state->subpass_idx = subpass_idx;
15037ec681f3Smrg
15047ec681f3Smrg   /* If we are starting a new job we need to setup binning. We only do this
15057ec681f3Smrg    * for V3DV_JOB_TYPE_GPU_CL jobs because V3DV_JOB_TYPE_GPU_CL_SECONDARY
15067ec681f3Smrg    * jobs are not submitted to the GPU directly, and are instead meant to be
15077ec681f3Smrg    * branched to from other V3DV_JOB_TYPE_GPU_CL jobs.
15087ec681f3Smrg    */
15097ec681f3Smrg   if (type == V3DV_JOB_TYPE_GPU_CL &&
15107ec681f3Smrg       job->first_subpass == state->subpass_idx) {
15117ec681f3Smrg      const struct v3dv_subpass *subpass =
15127ec681f3Smrg         &state->pass->subpasses[state->subpass_idx];
15137ec681f3Smrg
15147ec681f3Smrg      const struct v3dv_framebuffer *framebuffer = state->framebuffer;
15157ec681f3Smrg
15167ec681f3Smrg      uint8_t internal_bpp;
15177ec681f3Smrg      bool msaa;
15187ec681f3Smrg      v3dv_X(job->device, framebuffer_compute_internal_bpp_msaa)
15197ec681f3Smrg         (framebuffer, subpass, &internal_bpp, &msaa);
15207ec681f3Smrg
15217ec681f3Smrg      /* From the Vulkan spec:
15227ec681f3Smrg       *
15237ec681f3Smrg       *    "If the render pass uses multiview, then layers must be one and
15247ec681f3Smrg       *     each attachment requires a number of layers that is greater than
15257ec681f3Smrg       *     the maximum bit index set in the view mask in the subpasses in
15267ec681f3Smrg       *     which it is used."
15277ec681f3Smrg       *
15287ec681f3Smrg       * So when multiview is enabled, we take the number of layers from the
15297ec681f3Smrg       * last bit set in the view mask.
15307ec681f3Smrg       */
15317ec681f3Smrg      uint32_t layers = framebuffer->layers;
15327ec681f3Smrg      if (subpass->view_mask != 0) {
15337ec681f3Smrg         assert(framebuffer->layers == 1);
15347ec681f3Smrg         layers = util_last_bit(subpass->view_mask);
15357ec681f3Smrg      }
15367ec681f3Smrg
15377ec681f3Smrg      v3dv_job_start_frame(job,
15387ec681f3Smrg                           framebuffer->width,
15397ec681f3Smrg                           framebuffer->height,
15407ec681f3Smrg                           layers,
15417ec681f3Smrg                           true,
15427ec681f3Smrg                           subpass->color_count,
15437ec681f3Smrg                           internal_bpp,
15447ec681f3Smrg                           msaa);
15457ec681f3Smrg   }
15467ec681f3Smrg
15477ec681f3Smrg   return job;
15487ec681f3Smrg}
15497ec681f3Smrg
15507ec681f3Smrgstruct v3dv_job *
15517ec681f3Smrgv3dv_cmd_buffer_subpass_start(struct v3dv_cmd_buffer *cmd_buffer,
15527ec681f3Smrg                              uint32_t subpass_idx)
15537ec681f3Smrg{
15547ec681f3Smrg   assert(cmd_buffer->state.pass);
15557ec681f3Smrg   assert(subpass_idx < cmd_buffer->state.pass->subpass_count);
15567ec681f3Smrg
15577ec681f3Smrg   struct v3dv_job *job =
15587ec681f3Smrg      cmd_buffer_subpass_create_job(cmd_buffer, subpass_idx,
15597ec681f3Smrg                                    V3DV_JOB_TYPE_GPU_CL);
15607ec681f3Smrg   if (!job)
15617ec681f3Smrg      return NULL;
15627ec681f3Smrg
15637ec681f3Smrg   /* Check if our render area is aligned to tile boundaries. We have to do
15647ec681f3Smrg    * this in each subpass because the subset of attachments used can change
15657ec681f3Smrg    * and with that the tile size selected by the hardware can change too.
15667ec681f3Smrg    */
15677ec681f3Smrg   cmd_buffer_update_tile_alignment(cmd_buffer);
15687ec681f3Smrg
15697ec681f3Smrg   /* If we can't use TLB clears then we need to emit draw clears for any
15707ec681f3Smrg    * LOAD_OP_CLEAR attachments in this subpass now. We might also need to emit
15717ec681f3Smrg    * Depth/Stencil clears if we hit GFXH-1461.
15727ec681f3Smrg    *
15737ec681f3Smrg    * Secondary command buffers don't start subpasses (and may not even have
15747ec681f3Smrg    * framebuffer state), so we only care about this in primaries. The only
15757ec681f3Smrg    * exception could be a secondary runnning inside a subpass that needs to
15767ec681f3Smrg    * record a meta operation (with its own render pass) that relies on
15777ec681f3Smrg    * attachment load clears, but we don't have any instances of that right
15787ec681f3Smrg    * now.
15797ec681f3Smrg    */
15807ec681f3Smrg   if (cmd_buffer->level == VK_COMMAND_BUFFER_LEVEL_PRIMARY)
15817ec681f3Smrg      cmd_buffer_emit_subpass_clears(cmd_buffer);
15827ec681f3Smrg
15837ec681f3Smrg   return job;
15847ec681f3Smrg}
15857ec681f3Smrg
15867ec681f3Smrgstruct v3dv_job *
15877ec681f3Smrgv3dv_cmd_buffer_subpass_resume(struct v3dv_cmd_buffer *cmd_buffer,
15887ec681f3Smrg                               uint32_t subpass_idx)
15897ec681f3Smrg{
15907ec681f3Smrg   assert(cmd_buffer->state.pass);
15917ec681f3Smrg   assert(subpass_idx < cmd_buffer->state.pass->subpass_count);
15927ec681f3Smrg
15937ec681f3Smrg   struct v3dv_job *job;
15947ec681f3Smrg   if (cmd_buffer->level == VK_COMMAND_BUFFER_LEVEL_PRIMARY) {
15957ec681f3Smrg      job = cmd_buffer_subpass_create_job(cmd_buffer, subpass_idx,
15967ec681f3Smrg                                          V3DV_JOB_TYPE_GPU_CL);
15977ec681f3Smrg   } else {
15987ec681f3Smrg      assert(cmd_buffer->level == VK_COMMAND_BUFFER_LEVEL_SECONDARY);
15997ec681f3Smrg      job = cmd_buffer_subpass_create_job(cmd_buffer, subpass_idx,
16007ec681f3Smrg                                          V3DV_JOB_TYPE_GPU_CL_SECONDARY);
16017ec681f3Smrg   }
16027ec681f3Smrg
16037ec681f3Smrg   if (!job)
16047ec681f3Smrg      return NULL;
16057ec681f3Smrg
16067ec681f3Smrg   job->is_subpass_continue = true;
16077ec681f3Smrg
16087ec681f3Smrg   return job;
16097ec681f3Smrg}
16107ec681f3Smrg
16117ec681f3Smrgvoid
16127ec681f3Smrgv3dv_cmd_buffer_subpass_finish(struct v3dv_cmd_buffer *cmd_buffer)
16137ec681f3Smrg{
16147ec681f3Smrg   /* We can end up here without a job if the last command recorded into the
16157ec681f3Smrg    * subpass already finished the job (for example a pipeline barrier). In
16167ec681f3Smrg    * that case we miss to set the is_subpass_finish flag, but that is not
16177ec681f3Smrg    * required for proper behavior.
16187ec681f3Smrg    */
16197ec681f3Smrg   struct v3dv_job *job = cmd_buffer->state.job;
16207ec681f3Smrg   if (job)
16217ec681f3Smrg      job->is_subpass_finish = true;
16227ec681f3Smrg}
16237ec681f3Smrg
16247ec681f3SmrgVKAPI_ATTR void VKAPI_CALL
16257ec681f3Smrgv3dv_CmdEndRenderPass(VkCommandBuffer commandBuffer)
16267ec681f3Smrg{
16277ec681f3Smrg   V3DV_FROM_HANDLE(v3dv_cmd_buffer, cmd_buffer, commandBuffer);
16287ec681f3Smrg
16297ec681f3Smrg   /* Finalize last subpass */
16307ec681f3Smrg   struct v3dv_cmd_buffer_state *state = &cmd_buffer->state;
16317ec681f3Smrg   assert(state->subpass_idx == state->pass->subpass_count - 1);
16327ec681f3Smrg   v3dv_cmd_buffer_subpass_finish(cmd_buffer);
16337ec681f3Smrg   v3dv_cmd_buffer_finish_job(cmd_buffer);
16347ec681f3Smrg
16357ec681f3Smrg   cmd_buffer_subpass_handle_pending_resolves(cmd_buffer);
16367ec681f3Smrg
16377ec681f3Smrg   /* We are no longer inside a render pass */
16387ec681f3Smrg   state->framebuffer = NULL;
16397ec681f3Smrg   state->pass = NULL;
16407ec681f3Smrg   state->subpass_idx = -1;
16417ec681f3Smrg}
16427ec681f3Smrg
16437ec681f3SmrgVKAPI_ATTR VkResult VKAPI_CALL
16447ec681f3Smrgv3dv_EndCommandBuffer(VkCommandBuffer commandBuffer)
16457ec681f3Smrg{
16467ec681f3Smrg   V3DV_FROM_HANDLE(v3dv_cmd_buffer, cmd_buffer, commandBuffer);
16477ec681f3Smrg
16487ec681f3Smrg   if (cmd_buffer->state.oom)
16497ec681f3Smrg      return VK_ERROR_OUT_OF_HOST_MEMORY;
16507ec681f3Smrg
16517ec681f3Smrg   /* Primaries should have ended any recording jobs by the time they hit
16527ec681f3Smrg    * vkEndRenderPass (if we are inside a render pass). Commands outside
16537ec681f3Smrg    * a render pass instance (for both primaries and secondaries) spawn
16547ec681f3Smrg    * complete jobs too. So the only case where we can get here without
16557ec681f3Smrg    * finishing a recording job is when we are recording a secondary
16567ec681f3Smrg    * inside a render pass.
16577ec681f3Smrg    */
16587ec681f3Smrg   if (cmd_buffer->state.job) {
16597ec681f3Smrg      assert(cmd_buffer->level == VK_COMMAND_BUFFER_LEVEL_SECONDARY &&
16607ec681f3Smrg             cmd_buffer->state.pass);
16617ec681f3Smrg      v3dv_cmd_buffer_finish_job(cmd_buffer);
16627ec681f3Smrg   }
16637ec681f3Smrg
16647ec681f3Smrg   cmd_buffer->status = V3DV_CMD_BUFFER_STATUS_EXECUTABLE;
16657ec681f3Smrg
16667ec681f3Smrg   return VK_SUCCESS;
16677ec681f3Smrg}
16687ec681f3Smrg
16697ec681f3Smrgstatic void
16707ec681f3Smrgclone_bo_list(struct v3dv_cmd_buffer *cmd_buffer,
16717ec681f3Smrg              struct list_head *dst,
16727ec681f3Smrg              struct list_head *src)
16737ec681f3Smrg{
16747ec681f3Smrg   assert(cmd_buffer);
16757ec681f3Smrg
16767ec681f3Smrg   list_inithead(dst);
16777ec681f3Smrg   list_for_each_entry(struct v3dv_bo, bo, src, list_link) {
16787ec681f3Smrg      struct v3dv_bo *clone_bo =
16797ec681f3Smrg         vk_alloc(&cmd_buffer->device->vk.alloc, sizeof(struct v3dv_bo), 8,
16807ec681f3Smrg                  VK_SYSTEM_ALLOCATION_SCOPE_COMMAND);
16817ec681f3Smrg      if (!clone_bo) {
16827ec681f3Smrg         v3dv_flag_oom(cmd_buffer, NULL);
16837ec681f3Smrg         return;
16847ec681f3Smrg      }
16857ec681f3Smrg
16867ec681f3Smrg      *clone_bo = *bo;
16877ec681f3Smrg      list_addtail(&clone_bo->list_link, dst);
16887ec681f3Smrg   }
16897ec681f3Smrg}
16907ec681f3Smrg
16917ec681f3Smrg/* Clones a job for inclusion in the given command buffer. Note that this
16927ec681f3Smrg * doesn't make a deep copy so the cloned job it doesn't own any resources.
16937ec681f3Smrg * Useful when we need to have a job in more than one list, which happens
16947ec681f3Smrg * for jobs recorded in secondary command buffers when we want to execute
16957ec681f3Smrg * them in primaries.
16967ec681f3Smrg */
16977ec681f3Smrgstruct v3dv_job *
16987ec681f3Smrgv3dv_job_clone_in_cmd_buffer(struct v3dv_job *job,
16997ec681f3Smrg                             struct v3dv_cmd_buffer *cmd_buffer)
17007ec681f3Smrg{
17017ec681f3Smrg   struct v3dv_job *clone_job = vk_alloc(&job->device->vk.alloc,
17027ec681f3Smrg                                         sizeof(struct v3dv_job), 8,
17037ec681f3Smrg                                         VK_SYSTEM_ALLOCATION_SCOPE_COMMAND);
17047ec681f3Smrg   if (!clone_job) {
17057ec681f3Smrg      v3dv_flag_oom(cmd_buffer, NULL);
17067ec681f3Smrg      return NULL;
17077ec681f3Smrg   }
17087ec681f3Smrg
17097ec681f3Smrg   /* Cloned jobs don't duplicate resources! */
17107ec681f3Smrg   *clone_job = *job;
17117ec681f3Smrg   clone_job->is_clone = true;
17127ec681f3Smrg   clone_job->cmd_buffer = cmd_buffer;
17137ec681f3Smrg   list_addtail(&clone_job->list_link, &cmd_buffer->jobs);
17147ec681f3Smrg
17157ec681f3Smrg   /* We need to regen the BO lists so that they point to the BO list in the
17167ec681f3Smrg    * cloned job. Otherwise functions like list_length() will loop forever.
17177ec681f3Smrg    */
17187ec681f3Smrg   if (job->type == V3DV_JOB_TYPE_GPU_CL) {
17197ec681f3Smrg      clone_bo_list(cmd_buffer, &clone_job->bcl.bo_list, &job->bcl.bo_list);
17207ec681f3Smrg      clone_bo_list(cmd_buffer, &clone_job->rcl.bo_list, &job->rcl.bo_list);
17217ec681f3Smrg      clone_bo_list(cmd_buffer, &clone_job->indirect.bo_list,
17227ec681f3Smrg                    &job->indirect.bo_list);
17237ec681f3Smrg   }
17247ec681f3Smrg
17257ec681f3Smrg   return clone_job;
17267ec681f3Smrg}
17277ec681f3Smrg
17287ec681f3Smrgstatic void
17297ec681f3Smrgcmd_buffer_execute_outside_pass(struct v3dv_cmd_buffer *primary,
17307ec681f3Smrg                                uint32_t cmd_buffer_count,
17317ec681f3Smrg                                const VkCommandBuffer *cmd_buffers)
17327ec681f3Smrg{
17337ec681f3Smrg   bool pending_barrier = false;
17347ec681f3Smrg   bool pending_bcl_barrier = false;
17357ec681f3Smrg   for (uint32_t i = 0; i < cmd_buffer_count; i++) {
17367ec681f3Smrg      V3DV_FROM_HANDLE(v3dv_cmd_buffer, secondary, cmd_buffers[i]);
17377ec681f3Smrg
17387ec681f3Smrg      assert(!(secondary->usage_flags &
17397ec681f3Smrg               VK_COMMAND_BUFFER_USAGE_RENDER_PASS_CONTINUE_BIT));
17407ec681f3Smrg
17417ec681f3Smrg      /* Secondary command buffers that execute outside a render pass create
17427ec681f3Smrg       * complete jobs with an RCL and tile setup, so we simply want to merge
17437ec681f3Smrg       * their job list into the primary's. However, because they may be
17447ec681f3Smrg       * executed into multiple primaries at the same time and we only have a
17457ec681f3Smrg       * single list_link in each job, we can't just add then to the primary's
17467ec681f3Smrg       * job list and we instead have to clone them first.
17477ec681f3Smrg       *
17487ec681f3Smrg       * Alternatively, we could create a "execute secondary" CPU job that
17497ec681f3Smrg       * when executed in a queue, would submit all the jobs in the referenced
17507ec681f3Smrg       * secondary command buffer. However, this would raise some challenges
17517ec681f3Smrg       * to make it work with the implementation of wait threads in the queue
17527ec681f3Smrg       * which we use for event waits, for example.
17537ec681f3Smrg       */
17547ec681f3Smrg      list_for_each_entry(struct v3dv_job, secondary_job,
17557ec681f3Smrg                          &secondary->jobs, list_link) {
17567ec681f3Smrg         /* These can only happen inside a render pass */
17577ec681f3Smrg         assert(secondary_job->type != V3DV_JOB_TYPE_GPU_CL_SECONDARY);
17587ec681f3Smrg         struct v3dv_job *job = v3dv_job_clone_in_cmd_buffer(secondary_job, primary);
17597ec681f3Smrg         if (!job)
17607ec681f3Smrg            return;
17617ec681f3Smrg
17627ec681f3Smrg         if (pending_barrier) {
17637ec681f3Smrg            job->serialize = true;
17647ec681f3Smrg            if (pending_bcl_barrier)
17657ec681f3Smrg               job->needs_bcl_sync = true;
17667ec681f3Smrg            pending_barrier = false;
17677ec681f3Smrg            pending_bcl_barrier = false;
17687ec681f3Smrg         }
17697ec681f3Smrg      }
17707ec681f3Smrg
17717ec681f3Smrg      /* If this secondary had any pending barrier state we will need that
17727ec681f3Smrg       * barrier state consumed with whatever comes after it (first job in
17737ec681f3Smrg       * the next secondary or the primary, if this was the last secondary).
17747ec681f3Smrg       */
17757ec681f3Smrg      assert(secondary->state.has_barrier || !secondary->state.has_bcl_barrier);
17767ec681f3Smrg      pending_barrier = secondary->state.has_barrier;
17777ec681f3Smrg      pending_bcl_barrier = secondary->state.has_bcl_barrier;
17787ec681f3Smrg   }
17797ec681f3Smrg
17807ec681f3Smrg   if (pending_barrier) {
17817ec681f3Smrg      primary->state.has_barrier = true;
17827ec681f3Smrg      primary->state.has_bcl_barrier |= pending_bcl_barrier;
17837ec681f3Smrg   }
17847ec681f3Smrg}
17857ec681f3Smrg
17867ec681f3SmrgVKAPI_ATTR void VKAPI_CALL
17877ec681f3Smrgv3dv_CmdExecuteCommands(VkCommandBuffer commandBuffer,
17887ec681f3Smrg                        uint32_t commandBufferCount,
17897ec681f3Smrg                        const VkCommandBuffer *pCommandBuffers)
17907ec681f3Smrg{
17917ec681f3Smrg   V3DV_FROM_HANDLE(v3dv_cmd_buffer, primary, commandBuffer);
17927ec681f3Smrg
17937ec681f3Smrg   if (primary->state.pass != NULL) {
17947ec681f3Smrg      v3dv_X(primary->device, cmd_buffer_execute_inside_pass)
17957ec681f3Smrg         (primary, commandBufferCount, pCommandBuffers);
17967ec681f3Smrg   } else {
17977ec681f3Smrg      cmd_buffer_execute_outside_pass(primary,
17987ec681f3Smrg                                      commandBufferCount, pCommandBuffers);
17997ec681f3Smrg   }
18007ec681f3Smrg}
18017ec681f3Smrg
18027ec681f3Smrg/* This goes though the list of possible dynamic states in the pipeline and,
18037ec681f3Smrg * for those that are not configured as dynamic, copies relevant state into
18047ec681f3Smrg * the command buffer.
18057ec681f3Smrg */
18067ec681f3Smrgstatic void
18077ec681f3Smrgcmd_buffer_bind_pipeline_static_state(struct v3dv_cmd_buffer *cmd_buffer,
18087ec681f3Smrg                                      const struct v3dv_dynamic_state *src)
18097ec681f3Smrg{
18107ec681f3Smrg   struct v3dv_dynamic_state *dest = &cmd_buffer->state.dynamic;
18117ec681f3Smrg   uint32_t dynamic_mask = src->mask;
18127ec681f3Smrg   uint32_t dirty = 0;
18137ec681f3Smrg
18147ec681f3Smrg   if (!(dynamic_mask & V3DV_DYNAMIC_VIEWPORT)) {
18157ec681f3Smrg      dest->viewport.count = src->viewport.count;
18167ec681f3Smrg      if (memcmp(&dest->viewport.viewports, &src->viewport.viewports,
18177ec681f3Smrg                 src->viewport.count * sizeof(VkViewport))) {
18187ec681f3Smrg         typed_memcpy(dest->viewport.viewports,
18197ec681f3Smrg                      src->viewport.viewports,
18207ec681f3Smrg                      src->viewport.count);
18217ec681f3Smrg         typed_memcpy(dest->viewport.scale, src->viewport.scale,
18227ec681f3Smrg                      src->viewport.count);
18237ec681f3Smrg         typed_memcpy(dest->viewport.translate, src->viewport.translate,
18247ec681f3Smrg                      src->viewport.count);
18257ec681f3Smrg         dirty |= V3DV_CMD_DIRTY_VIEWPORT;
18267ec681f3Smrg      }
18277ec681f3Smrg   }
18287ec681f3Smrg
18297ec681f3Smrg   if (!(dynamic_mask & V3DV_DYNAMIC_SCISSOR)) {
18307ec681f3Smrg      dest->scissor.count = src->scissor.count;
18317ec681f3Smrg      if (memcmp(&dest->scissor.scissors, &src->scissor.scissors,
18327ec681f3Smrg                 src->scissor.count * sizeof(VkRect2D))) {
18337ec681f3Smrg         typed_memcpy(dest->scissor.scissors,
18347ec681f3Smrg                      src->scissor.scissors, src->scissor.count);
18357ec681f3Smrg         dirty |= V3DV_CMD_DIRTY_SCISSOR;
18367ec681f3Smrg      }
18377ec681f3Smrg   }
18387ec681f3Smrg
18397ec681f3Smrg   if (!(dynamic_mask & V3DV_DYNAMIC_STENCIL_COMPARE_MASK)) {
18407ec681f3Smrg      if (memcmp(&dest->stencil_compare_mask, &src->stencil_compare_mask,
18417ec681f3Smrg                 sizeof(src->stencil_compare_mask))) {
18427ec681f3Smrg         dest->stencil_compare_mask = src->stencil_compare_mask;
18437ec681f3Smrg         dirty |= V3DV_CMD_DIRTY_STENCIL_COMPARE_MASK;
18447ec681f3Smrg      }
18457ec681f3Smrg   }
18467ec681f3Smrg
18477ec681f3Smrg   if (!(dynamic_mask & V3DV_DYNAMIC_STENCIL_WRITE_MASK)) {
18487ec681f3Smrg      if (memcmp(&dest->stencil_write_mask, &src->stencil_write_mask,
18497ec681f3Smrg                 sizeof(src->stencil_write_mask))) {
18507ec681f3Smrg         dest->stencil_write_mask = src->stencil_write_mask;
18517ec681f3Smrg         dirty |= V3DV_CMD_DIRTY_STENCIL_WRITE_MASK;
18527ec681f3Smrg      }
18537ec681f3Smrg   }
18547ec681f3Smrg
18557ec681f3Smrg   if (!(dynamic_mask & V3DV_DYNAMIC_STENCIL_REFERENCE)) {
18567ec681f3Smrg      if (memcmp(&dest->stencil_reference, &src->stencil_reference,
18577ec681f3Smrg                 sizeof(src->stencil_reference))) {
18587ec681f3Smrg         dest->stencil_reference = src->stencil_reference;
18597ec681f3Smrg         dirty |= V3DV_CMD_DIRTY_STENCIL_REFERENCE;
18607ec681f3Smrg      }
18617ec681f3Smrg   }
18627ec681f3Smrg
18637ec681f3Smrg   if (!(dynamic_mask & V3DV_DYNAMIC_BLEND_CONSTANTS)) {
18647ec681f3Smrg      if (memcmp(dest->blend_constants, src->blend_constants,
18657ec681f3Smrg                 sizeof(src->blend_constants))) {
18667ec681f3Smrg         memcpy(dest->blend_constants, src->blend_constants,
18677ec681f3Smrg                sizeof(src->blend_constants));
18687ec681f3Smrg         dirty |= V3DV_CMD_DIRTY_BLEND_CONSTANTS;
18697ec681f3Smrg      }
18707ec681f3Smrg   }
18717ec681f3Smrg
18727ec681f3Smrg   if (!(dynamic_mask & V3DV_DYNAMIC_DEPTH_BIAS)) {
18737ec681f3Smrg      if (memcmp(&dest->depth_bias, &src->depth_bias,
18747ec681f3Smrg                 sizeof(src->depth_bias))) {
18757ec681f3Smrg         memcpy(&dest->depth_bias, &src->depth_bias, sizeof(src->depth_bias));
18767ec681f3Smrg         dirty |= V3DV_CMD_DIRTY_DEPTH_BIAS;
18777ec681f3Smrg      }
18787ec681f3Smrg   }
18797ec681f3Smrg
18807ec681f3Smrg   if (!(dynamic_mask & V3DV_DYNAMIC_LINE_WIDTH)) {
18817ec681f3Smrg      if (dest->line_width != src->line_width) {
18827ec681f3Smrg         dest->line_width = src->line_width;
18837ec681f3Smrg         dirty |= V3DV_CMD_DIRTY_LINE_WIDTH;
18847ec681f3Smrg      }
18857ec681f3Smrg   }
18867ec681f3Smrg
18877ec681f3Smrg   if (!(dynamic_mask & V3DV_DYNAMIC_COLOR_WRITE_ENABLE)) {
18887ec681f3Smrg      if (dest->color_write_enable != src->color_write_enable) {
18897ec681f3Smrg         dest->color_write_enable = src->color_write_enable;
18907ec681f3Smrg         dirty |= V3DV_CMD_DIRTY_COLOR_WRITE_ENABLE;
18917ec681f3Smrg      }
18927ec681f3Smrg   }
18937ec681f3Smrg
18947ec681f3Smrg   cmd_buffer->state.dynamic.mask = dynamic_mask;
18957ec681f3Smrg   cmd_buffer->state.dirty |= dirty;
18967ec681f3Smrg}
18977ec681f3Smrg
18987ec681f3Smrgstatic void
18997ec681f3Smrgbind_graphics_pipeline(struct v3dv_cmd_buffer *cmd_buffer,
19007ec681f3Smrg                       struct v3dv_pipeline *pipeline)
19017ec681f3Smrg{
19027ec681f3Smrg   assert(pipeline && !(pipeline->active_stages & VK_SHADER_STAGE_COMPUTE_BIT));
19037ec681f3Smrg   if (cmd_buffer->state.gfx.pipeline == pipeline)
19047ec681f3Smrg      return;
19057ec681f3Smrg
19067ec681f3Smrg   cmd_buffer->state.gfx.pipeline = pipeline;
19077ec681f3Smrg
19087ec681f3Smrg   cmd_buffer_bind_pipeline_static_state(cmd_buffer, &pipeline->dynamic_state);
19097ec681f3Smrg
19107ec681f3Smrg   cmd_buffer->state.dirty |= V3DV_CMD_DIRTY_PIPELINE;
19117ec681f3Smrg}
19127ec681f3Smrg
19137ec681f3Smrgstatic void
19147ec681f3Smrgbind_compute_pipeline(struct v3dv_cmd_buffer *cmd_buffer,
19157ec681f3Smrg                      struct v3dv_pipeline *pipeline)
19167ec681f3Smrg{
19177ec681f3Smrg   assert(pipeline && pipeline->active_stages == VK_SHADER_STAGE_COMPUTE_BIT);
19187ec681f3Smrg
19197ec681f3Smrg   if (cmd_buffer->state.compute.pipeline == pipeline)
19207ec681f3Smrg      return;
19217ec681f3Smrg
19227ec681f3Smrg   cmd_buffer->state.compute.pipeline = pipeline;
19237ec681f3Smrg   cmd_buffer->state.dirty |= V3DV_CMD_DIRTY_COMPUTE_PIPELINE;
19247ec681f3Smrg}
19257ec681f3Smrg
19267ec681f3SmrgVKAPI_ATTR void VKAPI_CALL
19277ec681f3Smrgv3dv_CmdBindPipeline(VkCommandBuffer commandBuffer,
19287ec681f3Smrg                     VkPipelineBindPoint pipelineBindPoint,
19297ec681f3Smrg                     VkPipeline _pipeline)
19307ec681f3Smrg{
19317ec681f3Smrg   V3DV_FROM_HANDLE(v3dv_cmd_buffer, cmd_buffer, commandBuffer);
19327ec681f3Smrg   V3DV_FROM_HANDLE(v3dv_pipeline, pipeline, _pipeline);
19337ec681f3Smrg
19347ec681f3Smrg   switch (pipelineBindPoint) {
19357ec681f3Smrg   case VK_PIPELINE_BIND_POINT_COMPUTE:
19367ec681f3Smrg      bind_compute_pipeline(cmd_buffer, pipeline);
19377ec681f3Smrg      break;
19387ec681f3Smrg
19397ec681f3Smrg   case VK_PIPELINE_BIND_POINT_GRAPHICS:
19407ec681f3Smrg      bind_graphics_pipeline(cmd_buffer, pipeline);
19417ec681f3Smrg      break;
19427ec681f3Smrg
19437ec681f3Smrg   default:
19447ec681f3Smrg      assert(!"invalid bind point");
19457ec681f3Smrg      break;
19467ec681f3Smrg   }
19477ec681f3Smrg}
19487ec681f3Smrg
19497ec681f3Smrg/* FIXME: C&P from radv. tu has similar code. Perhaps common place? */
19507ec681f3Smrgvoid
19517ec681f3Smrgv3dv_viewport_compute_xform(const VkViewport *viewport,
19527ec681f3Smrg                            float scale[3],
19537ec681f3Smrg                            float translate[3])
19547ec681f3Smrg{
19557ec681f3Smrg   float x = viewport->x;
19567ec681f3Smrg   float y = viewport->y;
19577ec681f3Smrg   float half_width = 0.5f * viewport->width;
19587ec681f3Smrg   float half_height = 0.5f * viewport->height;
19597ec681f3Smrg   double n = viewport->minDepth;
19607ec681f3Smrg   double f = viewport->maxDepth;
19617ec681f3Smrg
19627ec681f3Smrg   scale[0] = half_width;
19637ec681f3Smrg   translate[0] = half_width + x;
19647ec681f3Smrg   scale[1] = half_height;
19657ec681f3Smrg   translate[1] = half_height + y;
19667ec681f3Smrg
19677ec681f3Smrg   scale[2] = (f - n);
19687ec681f3Smrg   translate[2] = n;
19697ec681f3Smrg
19707ec681f3Smrg   /* It seems that if the scale is small enough the hardware won't clip
19717ec681f3Smrg    * correctly so we work around this my choosing the smallest scale that
19727ec681f3Smrg    * seems to work.
19737ec681f3Smrg    *
19747ec681f3Smrg    * This case is exercised by CTS:
19757ec681f3Smrg    * dEQP-VK.draw.inverted_depth_ranges.nodepthclamp_deltazero
19767ec681f3Smrg    */
19777ec681f3Smrg   const float min_abs_scale = 0.000009f;
19787ec681f3Smrg   if (fabs(scale[2]) < min_abs_scale)
19797ec681f3Smrg      scale[2] = min_abs_scale * (scale[2] < 0 ? -1.0f : 1.0f);
19807ec681f3Smrg}
19817ec681f3Smrg
19827ec681f3SmrgVKAPI_ATTR void VKAPI_CALL
19837ec681f3Smrgv3dv_CmdSetViewport(VkCommandBuffer commandBuffer,
19847ec681f3Smrg                    uint32_t firstViewport,
19857ec681f3Smrg                    uint32_t viewportCount,
19867ec681f3Smrg                    const VkViewport *pViewports)
19877ec681f3Smrg{
19887ec681f3Smrg   V3DV_FROM_HANDLE(v3dv_cmd_buffer, cmd_buffer, commandBuffer);
19897ec681f3Smrg   struct v3dv_cmd_buffer_state *state = &cmd_buffer->state;
19907ec681f3Smrg   const uint32_t total_count = firstViewport + viewportCount;
19917ec681f3Smrg
19927ec681f3Smrg   assert(firstViewport < MAX_VIEWPORTS);
19937ec681f3Smrg   assert(total_count >= 1 && total_count <= MAX_VIEWPORTS);
19947ec681f3Smrg
19957ec681f3Smrg   if (state->dynamic.viewport.count < total_count)
19967ec681f3Smrg      state->dynamic.viewport.count = total_count;
19977ec681f3Smrg
19987ec681f3Smrg   if (!memcmp(state->dynamic.viewport.viewports + firstViewport,
19997ec681f3Smrg               pViewports, viewportCount * sizeof(*pViewports))) {
20007ec681f3Smrg      return;
20017ec681f3Smrg   }
20027ec681f3Smrg
20037ec681f3Smrg   memcpy(state->dynamic.viewport.viewports + firstViewport, pViewports,
20047ec681f3Smrg          viewportCount * sizeof(*pViewports));
20057ec681f3Smrg
20067ec681f3Smrg   for (uint32_t i = firstViewport; i < total_count; i++) {
20077ec681f3Smrg      v3dv_viewport_compute_xform(&state->dynamic.viewport.viewports[i],
20087ec681f3Smrg                                  state->dynamic.viewport.scale[i],
20097ec681f3Smrg                                  state->dynamic.viewport.translate[i]);
20107ec681f3Smrg   }
20117ec681f3Smrg
20127ec681f3Smrg   cmd_buffer->state.dirty |= V3DV_CMD_DIRTY_VIEWPORT;
20137ec681f3Smrg}
20147ec681f3Smrg
20157ec681f3SmrgVKAPI_ATTR void VKAPI_CALL
20167ec681f3Smrgv3dv_CmdSetScissor(VkCommandBuffer commandBuffer,
20177ec681f3Smrg                   uint32_t firstScissor,
20187ec681f3Smrg                   uint32_t scissorCount,
20197ec681f3Smrg                   const VkRect2D *pScissors)
20207ec681f3Smrg{
20217ec681f3Smrg   V3DV_FROM_HANDLE(v3dv_cmd_buffer, cmd_buffer, commandBuffer);
20227ec681f3Smrg   struct v3dv_cmd_buffer_state *state = &cmd_buffer->state;
20237ec681f3Smrg
20247ec681f3Smrg   assert(firstScissor < MAX_SCISSORS);
20257ec681f3Smrg   assert(firstScissor + scissorCount >= 1 &&
20267ec681f3Smrg          firstScissor + scissorCount <= MAX_SCISSORS);
20277ec681f3Smrg
20287ec681f3Smrg   if (state->dynamic.scissor.count < firstScissor + scissorCount)
20297ec681f3Smrg      state->dynamic.scissor.count = firstScissor + scissorCount;
20307ec681f3Smrg
20317ec681f3Smrg   if (!memcmp(state->dynamic.scissor.scissors + firstScissor,
20327ec681f3Smrg               pScissors, scissorCount * sizeof(*pScissors))) {
20337ec681f3Smrg      return;
20347ec681f3Smrg   }
20357ec681f3Smrg
20367ec681f3Smrg   memcpy(state->dynamic.scissor.scissors + firstScissor, pScissors,
20377ec681f3Smrg          scissorCount * sizeof(*pScissors));
20387ec681f3Smrg
20397ec681f3Smrg   cmd_buffer->state.dirty |= V3DV_CMD_DIRTY_SCISSOR;
20407ec681f3Smrg}
20417ec681f3Smrg
20427ec681f3Smrgstatic void
20437ec681f3Smrgemit_scissor(struct v3dv_cmd_buffer *cmd_buffer)
20447ec681f3Smrg{
20457ec681f3Smrg   if (cmd_buffer->state.dynamic.viewport.count == 0)
20467ec681f3Smrg      return;
20477ec681f3Smrg
20487ec681f3Smrg   struct v3dv_dynamic_state *dynamic = &cmd_buffer->state.dynamic;
20497ec681f3Smrg
20507ec681f3Smrg   /* FIXME: right now we only support one viewport. viewporst[0] would work
20517ec681f3Smrg    * now, but would need to change if we allow multiple viewports.
20527ec681f3Smrg    */
20537ec681f3Smrg   float *vptranslate = dynamic->viewport.translate[0];
20547ec681f3Smrg   float *vpscale = dynamic->viewport.scale[0];
20557ec681f3Smrg
20567ec681f3Smrg   float vp_minx = -fabsf(vpscale[0]) + vptranslate[0];
20577ec681f3Smrg   float vp_maxx = fabsf(vpscale[0]) + vptranslate[0];
20587ec681f3Smrg   float vp_miny = -fabsf(vpscale[1]) + vptranslate[1];
20597ec681f3Smrg   float vp_maxy = fabsf(vpscale[1]) + vptranslate[1];
20607ec681f3Smrg
20617ec681f3Smrg   /* Quoting from v3dx_emit:
20627ec681f3Smrg    * "Clip to the scissor if it's enabled, but still clip to the
20637ec681f3Smrg    * drawable regardless since that controls where the binner
20647ec681f3Smrg    * tries to put things.
20657ec681f3Smrg    *
20667ec681f3Smrg    * Additionally, always clip the rendering to the viewport,
20677ec681f3Smrg    * since the hardware does guardband clipping, meaning
20687ec681f3Smrg    * primitives would rasterize outside of the view volume."
20697ec681f3Smrg    */
20707ec681f3Smrg   uint32_t minx, miny, maxx, maxy;
20717ec681f3Smrg
20727ec681f3Smrg   /* From the Vulkan spec:
20737ec681f3Smrg    *
20747ec681f3Smrg    * "The application must ensure (using scissor if necessary) that all
20757ec681f3Smrg    *  rendering is contained within the render area. The render area must be
20767ec681f3Smrg    *  contained within the framebuffer dimensions."
20777ec681f3Smrg    *
20787ec681f3Smrg    * So it is the application's responsibility to ensure this. Still, we can
20797ec681f3Smrg    * help by automatically restricting the scissor rect to the render area.
20807ec681f3Smrg    */
20817ec681f3Smrg   minx = MAX2(vp_minx, cmd_buffer->state.render_area.offset.x);
20827ec681f3Smrg   miny = MAX2(vp_miny, cmd_buffer->state.render_area.offset.y);
20837ec681f3Smrg   maxx = MIN2(vp_maxx, cmd_buffer->state.render_area.offset.x +
20847ec681f3Smrg                        cmd_buffer->state.render_area.extent.width);
20857ec681f3Smrg   maxy = MIN2(vp_maxy, cmd_buffer->state.render_area.offset.y +
20867ec681f3Smrg                        cmd_buffer->state.render_area.extent.height);
20877ec681f3Smrg
20887ec681f3Smrg   minx = vp_minx;
20897ec681f3Smrg   miny = vp_miny;
20907ec681f3Smrg   maxx = vp_maxx;
20917ec681f3Smrg   maxy = vp_maxy;
20927ec681f3Smrg
20937ec681f3Smrg   /* Clip against user provided scissor if needed.
20947ec681f3Smrg    *
20957ec681f3Smrg    * FIXME: right now we only allow one scissor. Below would need to be
20967ec681f3Smrg    * updated if we support more
20977ec681f3Smrg    */
20987ec681f3Smrg   if (dynamic->scissor.count > 0) {
20997ec681f3Smrg      VkRect2D *scissor = &dynamic->scissor.scissors[0];
21007ec681f3Smrg      minx = MAX2(minx, scissor->offset.x);
21017ec681f3Smrg      miny = MAX2(miny, scissor->offset.y);
21027ec681f3Smrg      maxx = MIN2(maxx, scissor->offset.x + scissor->extent.width);
21037ec681f3Smrg      maxy = MIN2(maxy, scissor->offset.y + scissor->extent.height);
21047ec681f3Smrg   }
21057ec681f3Smrg
21067ec681f3Smrg   /* If the scissor is outside the viewport area we end up with
21077ec681f3Smrg    * min{x,y} > max{x,y}.
21087ec681f3Smrg    */
21097ec681f3Smrg   if (minx > maxx)
21107ec681f3Smrg      maxx = minx;
21117ec681f3Smrg   if (miny > maxy)
21127ec681f3Smrg      maxy = miny;
21137ec681f3Smrg
21147ec681f3Smrg   cmd_buffer->state.clip_window.offset.x = minx;
21157ec681f3Smrg   cmd_buffer->state.clip_window.offset.y = miny;
21167ec681f3Smrg   cmd_buffer->state.clip_window.extent.width = maxx - minx;
21177ec681f3Smrg   cmd_buffer->state.clip_window.extent.height = maxy - miny;
21187ec681f3Smrg
21197ec681f3Smrg   v3dv_X(cmd_buffer->device, job_emit_clip_window)
21207ec681f3Smrg      (cmd_buffer->state.job, &cmd_buffer->state.clip_window);
21217ec681f3Smrg
21227ec681f3Smrg   cmd_buffer->state.dirty &= ~V3DV_CMD_DIRTY_SCISSOR;
21237ec681f3Smrg}
21247ec681f3Smrg
21257ec681f3Smrgstatic void
21267ec681f3Smrgupdate_gfx_uniform_state(struct v3dv_cmd_buffer *cmd_buffer,
21277ec681f3Smrg                         uint32_t dirty_uniform_state)
21287ec681f3Smrg{
21297ec681f3Smrg   /* We need to update uniform streams if any piece of state that is passed
21307ec681f3Smrg    * to the shader as a uniform may have changed.
21317ec681f3Smrg    *
21327ec681f3Smrg    * If only descriptor sets are dirty then we can safely ignore updates
21337ec681f3Smrg    * for shader stages that don't access descriptors.
21347ec681f3Smrg    */
21357ec681f3Smrg
21367ec681f3Smrg   struct v3dv_pipeline *pipeline = cmd_buffer->state.gfx.pipeline;
21377ec681f3Smrg   assert(pipeline);
21387ec681f3Smrg
21397ec681f3Smrg   const bool has_new_pipeline = dirty_uniform_state & V3DV_CMD_DIRTY_PIPELINE;
21407ec681f3Smrg   const bool has_new_viewport = dirty_uniform_state & V3DV_CMD_DIRTY_VIEWPORT;
21417ec681f3Smrg   const bool has_new_push_constants = dirty_uniform_state & V3DV_CMD_DIRTY_PUSH_CONSTANTS;
21427ec681f3Smrg   const bool has_new_descriptors = dirty_uniform_state & V3DV_CMD_DIRTY_DESCRIPTOR_SETS;
21437ec681f3Smrg   const bool has_new_view_index = dirty_uniform_state & V3DV_CMD_DIRTY_VIEW_INDEX;
21447ec681f3Smrg
21457ec681f3Smrg   /* VK_SHADER_STAGE_FRAGMENT_BIT */
21467ec681f3Smrg   const bool has_new_descriptors_fs =
21477ec681f3Smrg      has_new_descriptors &&
21487ec681f3Smrg      (cmd_buffer->state.dirty_descriptor_stages & VK_SHADER_STAGE_FRAGMENT_BIT);
21497ec681f3Smrg
21507ec681f3Smrg   const bool has_new_push_constants_fs =
21517ec681f3Smrg      has_new_push_constants &&
21527ec681f3Smrg      (cmd_buffer->state.dirty_push_constants_stages & VK_SHADER_STAGE_FRAGMENT_BIT);
21537ec681f3Smrg
21547ec681f3Smrg   const bool needs_fs_update = has_new_pipeline ||
21557ec681f3Smrg                                has_new_view_index ||
21567ec681f3Smrg                                has_new_push_constants_fs ||
21577ec681f3Smrg                                has_new_descriptors_fs ||
21587ec681f3Smrg                                has_new_view_index;
21597ec681f3Smrg
21607ec681f3Smrg   if (needs_fs_update) {
21617ec681f3Smrg      struct v3dv_shader_variant *fs_variant =
21627ec681f3Smrg         pipeline->shared_data->variants[BROADCOM_SHADER_FRAGMENT];
21637ec681f3Smrg
21647ec681f3Smrg      cmd_buffer->state.uniforms.fs =
21657ec681f3Smrg         v3dv_write_uniforms(cmd_buffer, pipeline, fs_variant);
21667ec681f3Smrg   }
21677ec681f3Smrg
21687ec681f3Smrg   /* VK_SHADER_STAGE_GEOMETRY_BIT */
21697ec681f3Smrg   if (pipeline->has_gs) {
21707ec681f3Smrg      const bool has_new_descriptors_gs =
21717ec681f3Smrg         has_new_descriptors &&
21727ec681f3Smrg         (cmd_buffer->state.dirty_descriptor_stages &
21737ec681f3Smrg          VK_SHADER_STAGE_GEOMETRY_BIT);
21747ec681f3Smrg
21757ec681f3Smrg      const bool has_new_push_constants_gs =
21767ec681f3Smrg         has_new_push_constants &&
21777ec681f3Smrg         (cmd_buffer->state.dirty_push_constants_stages &
21787ec681f3Smrg          VK_SHADER_STAGE_GEOMETRY_BIT);
21797ec681f3Smrg
21807ec681f3Smrg      const bool needs_gs_update = has_new_viewport ||
21817ec681f3Smrg                                   has_new_view_index ||
21827ec681f3Smrg                                   has_new_pipeline ||
21837ec681f3Smrg                                   has_new_push_constants_gs ||
21847ec681f3Smrg                                   has_new_descriptors_gs;
21857ec681f3Smrg
21867ec681f3Smrg      if (needs_gs_update) {
21877ec681f3Smrg         struct v3dv_shader_variant *gs_variant =
21887ec681f3Smrg            pipeline->shared_data->variants[BROADCOM_SHADER_GEOMETRY];
21897ec681f3Smrg
21907ec681f3Smrg          struct v3dv_shader_variant *gs_bin_variant =
21917ec681f3Smrg            pipeline->shared_data->variants[BROADCOM_SHADER_GEOMETRY_BIN];
21927ec681f3Smrg
21937ec681f3Smrg         cmd_buffer->state.uniforms.gs =
21947ec681f3Smrg            v3dv_write_uniforms(cmd_buffer, pipeline, gs_variant);
21957ec681f3Smrg
21967ec681f3Smrg         cmd_buffer->state.uniforms.gs_bin =
21977ec681f3Smrg            v3dv_write_uniforms(cmd_buffer, pipeline, gs_bin_variant);
21987ec681f3Smrg      }
21997ec681f3Smrg   }
22007ec681f3Smrg
22017ec681f3Smrg   /* VK_SHADER_STAGE_VERTEX_BIT */
22027ec681f3Smrg   const bool has_new_descriptors_vs =
22037ec681f3Smrg      has_new_descriptors &&
22047ec681f3Smrg      (cmd_buffer->state.dirty_descriptor_stages & VK_SHADER_STAGE_VERTEX_BIT);
22057ec681f3Smrg
22067ec681f3Smrg   const bool has_new_push_constants_vs =
22077ec681f3Smrg      has_new_push_constants &&
22087ec681f3Smrg      (cmd_buffer->state.dirty_push_constants_stages & VK_SHADER_STAGE_VERTEX_BIT);
22097ec681f3Smrg
22107ec681f3Smrg   const bool needs_vs_update = has_new_viewport ||
22117ec681f3Smrg                                has_new_view_index ||
22127ec681f3Smrg                                has_new_pipeline ||
22137ec681f3Smrg                                has_new_push_constants_vs ||
22147ec681f3Smrg                                has_new_descriptors_vs;
22157ec681f3Smrg
22167ec681f3Smrg   if (needs_vs_update) {
22177ec681f3Smrg      struct v3dv_shader_variant *vs_variant =
22187ec681f3Smrg         pipeline->shared_data->variants[BROADCOM_SHADER_VERTEX];
22197ec681f3Smrg
22207ec681f3Smrg       struct v3dv_shader_variant *vs_bin_variant =
22217ec681f3Smrg         pipeline->shared_data->variants[BROADCOM_SHADER_VERTEX_BIN];
22227ec681f3Smrg
22237ec681f3Smrg      cmd_buffer->state.uniforms.vs =
22247ec681f3Smrg         v3dv_write_uniforms(cmd_buffer, pipeline, vs_variant);
22257ec681f3Smrg
22267ec681f3Smrg      cmd_buffer->state.uniforms.vs_bin =
22277ec681f3Smrg         v3dv_write_uniforms(cmd_buffer, pipeline, vs_bin_variant);
22287ec681f3Smrg   }
22297ec681f3Smrg
22307ec681f3Smrg   cmd_buffer->state.dirty &= ~V3DV_CMD_DIRTY_VIEW_INDEX;
22317ec681f3Smrg}
22327ec681f3Smrg
22337ec681f3Smrg/* This stores command buffer state that we might be about to stomp for
22347ec681f3Smrg * a meta operation.
22357ec681f3Smrg */
22367ec681f3Smrgvoid
22377ec681f3Smrgv3dv_cmd_buffer_meta_state_push(struct v3dv_cmd_buffer *cmd_buffer,
22387ec681f3Smrg                                bool push_descriptor_state)
22397ec681f3Smrg{
22407ec681f3Smrg   struct v3dv_cmd_buffer_state *state = &cmd_buffer->state;
22417ec681f3Smrg
22427ec681f3Smrg   if (state->subpass_idx != -1) {
22437ec681f3Smrg      state->meta.subpass_idx = state->subpass_idx;
22447ec681f3Smrg      state->meta.framebuffer = v3dv_framebuffer_to_handle(state->framebuffer);
22457ec681f3Smrg      state->meta.pass = v3dv_render_pass_to_handle(state->pass);
22467ec681f3Smrg
22477ec681f3Smrg      const uint32_t attachment_state_item_size =
22487ec681f3Smrg         sizeof(struct v3dv_cmd_buffer_attachment_state);
22497ec681f3Smrg      const uint32_t attachment_state_total_size =
22507ec681f3Smrg         attachment_state_item_size * state->attachment_alloc_count;
22517ec681f3Smrg      if (state->meta.attachment_alloc_count < state->attachment_alloc_count) {
22527ec681f3Smrg         if (state->meta.attachment_alloc_count > 0)
22537ec681f3Smrg            vk_free(&cmd_buffer->device->vk.alloc, state->meta.attachments);
22547ec681f3Smrg
22557ec681f3Smrg         state->meta.attachments = vk_zalloc(&cmd_buffer->device->vk.alloc,
22567ec681f3Smrg                                             attachment_state_total_size, 8,
22577ec681f3Smrg                                             VK_SYSTEM_ALLOCATION_SCOPE_COMMAND);
22587ec681f3Smrg         if (!state->meta.attachments) {
22597ec681f3Smrg            v3dv_flag_oom(cmd_buffer, NULL);
22607ec681f3Smrg            return;
22617ec681f3Smrg         }
22627ec681f3Smrg         state->meta.attachment_alloc_count = state->attachment_alloc_count;
22637ec681f3Smrg      }
22647ec681f3Smrg      state->meta.attachment_count = state->attachment_alloc_count;
22657ec681f3Smrg      memcpy(state->meta.attachments, state->attachments,
22667ec681f3Smrg             attachment_state_total_size);
22677ec681f3Smrg
22687ec681f3Smrg      state->meta.tile_aligned_render_area = state->tile_aligned_render_area;
22697ec681f3Smrg      memcpy(&state->meta.render_area, &state->render_area, sizeof(VkRect2D));
22707ec681f3Smrg   }
22717ec681f3Smrg
22727ec681f3Smrg   /* We expect that meta operations are graphics-only, so we only take into
22737ec681f3Smrg    * account the graphics pipeline, and the graphics state
22747ec681f3Smrg    */
22757ec681f3Smrg   state->meta.gfx.pipeline = state->gfx.pipeline;
22767ec681f3Smrg   memcpy(&state->meta.dynamic, &state->dynamic, sizeof(state->dynamic));
22777ec681f3Smrg
22787ec681f3Smrg   struct v3dv_descriptor_state *gfx_descriptor_state =
22797ec681f3Smrg      &cmd_buffer->state.gfx.descriptor_state;
22807ec681f3Smrg
22817ec681f3Smrg   if (push_descriptor_state) {
22827ec681f3Smrg      if (gfx_descriptor_state->valid != 0) {
22837ec681f3Smrg         memcpy(&state->meta.gfx.descriptor_state, gfx_descriptor_state,
22847ec681f3Smrg                sizeof(state->gfx.descriptor_state));
22857ec681f3Smrg      }
22867ec681f3Smrg      state->meta.has_descriptor_state = true;
22877ec681f3Smrg   } else {
22887ec681f3Smrg      state->meta.has_descriptor_state = false;
22897ec681f3Smrg   }
22907ec681f3Smrg
22917ec681f3Smrg   /* FIXME: if we keep track of wether we have bound any push constant state
22927ec681f3Smrg    *        at all we could restruct this only to cases where it is actually
22937ec681f3Smrg    *        necessary.
22947ec681f3Smrg    */
22957ec681f3Smrg   memcpy(state->meta.push_constants, cmd_buffer->push_constants_data,
22967ec681f3Smrg          sizeof(state->meta.push_constants));
22977ec681f3Smrg}
22987ec681f3Smrg
22997ec681f3Smrg/* This restores command buffer state after a meta operation
23007ec681f3Smrg */
23017ec681f3Smrgvoid
23027ec681f3Smrgv3dv_cmd_buffer_meta_state_pop(struct v3dv_cmd_buffer *cmd_buffer,
23037ec681f3Smrg                               uint32_t dirty_dynamic_state,
23047ec681f3Smrg                               bool needs_subpass_resume)
23057ec681f3Smrg{
23067ec681f3Smrg   struct v3dv_cmd_buffer_state *state = &cmd_buffer->state;
23077ec681f3Smrg
23087ec681f3Smrg   if (state->meta.subpass_idx != -1) {
23097ec681f3Smrg      state->pass = v3dv_render_pass_from_handle(state->meta.pass);
23107ec681f3Smrg      state->framebuffer = v3dv_framebuffer_from_handle(state->meta.framebuffer);
23117ec681f3Smrg
23127ec681f3Smrg      assert(state->meta.attachment_count <= state->attachment_alloc_count);
23137ec681f3Smrg      const uint32_t attachment_state_item_size =
23147ec681f3Smrg         sizeof(struct v3dv_cmd_buffer_attachment_state);
23157ec681f3Smrg      const uint32_t attachment_state_total_size =
23167ec681f3Smrg         attachment_state_item_size * state->meta.attachment_count;
23177ec681f3Smrg      memcpy(state->attachments, state->meta.attachments,
23187ec681f3Smrg             attachment_state_total_size);
23197ec681f3Smrg
23207ec681f3Smrg      state->tile_aligned_render_area = state->meta.tile_aligned_render_area;
23217ec681f3Smrg      memcpy(&state->render_area, &state->meta.render_area, sizeof(VkRect2D));
23227ec681f3Smrg
23237ec681f3Smrg      /* Is needs_subpass_resume is true it means that the emitted the meta
23247ec681f3Smrg       * operation in its own job (possibly with an RT config that is
23257ec681f3Smrg       * incompatible with the current subpass), so resuming subpass execution
23267ec681f3Smrg       * after it requires that we create a new job with the subpass RT setup.
23277ec681f3Smrg       */
23287ec681f3Smrg      if (needs_subpass_resume)
23297ec681f3Smrg         v3dv_cmd_buffer_subpass_resume(cmd_buffer, state->meta.subpass_idx);
23307ec681f3Smrg   } else {
23317ec681f3Smrg      state->subpass_idx = -1;
23327ec681f3Smrg   }
23337ec681f3Smrg
23347ec681f3Smrg   if (state->meta.gfx.pipeline != NULL) {
23357ec681f3Smrg      struct v3dv_pipeline *pipeline = state->meta.gfx.pipeline;
23367ec681f3Smrg      VkPipelineBindPoint pipeline_binding =
23377ec681f3Smrg         v3dv_pipeline_get_binding_point(pipeline);
23387ec681f3Smrg      v3dv_CmdBindPipeline(v3dv_cmd_buffer_to_handle(cmd_buffer),
23397ec681f3Smrg                           pipeline_binding,
23407ec681f3Smrg                           v3dv_pipeline_to_handle(state->meta.gfx.pipeline));
23417ec681f3Smrg   } else {
23427ec681f3Smrg      state->gfx.pipeline = NULL;
23437ec681f3Smrg   }
23447ec681f3Smrg
23457ec681f3Smrg   if (dirty_dynamic_state) {
23467ec681f3Smrg      memcpy(&state->dynamic, &state->meta.dynamic, sizeof(state->dynamic));
23477ec681f3Smrg      state->dirty |= dirty_dynamic_state;
23487ec681f3Smrg   }
23497ec681f3Smrg
23507ec681f3Smrg   if (state->meta.has_descriptor_state) {
23517ec681f3Smrg      if (state->meta.gfx.descriptor_state.valid != 0) {
23527ec681f3Smrg         memcpy(&state->gfx.descriptor_state, &state->meta.gfx.descriptor_state,
23537ec681f3Smrg                sizeof(state->gfx.descriptor_state));
23547ec681f3Smrg      } else {
23557ec681f3Smrg         state->gfx.descriptor_state.valid = 0;
23567ec681f3Smrg      }
23577ec681f3Smrg   }
23587ec681f3Smrg
23597ec681f3Smrg   memcpy(cmd_buffer->push_constants_data, state->meta.push_constants,
23607ec681f3Smrg          sizeof(state->meta.push_constants));
23617ec681f3Smrg
23627ec681f3Smrg   state->meta.gfx.pipeline = NULL;
23637ec681f3Smrg   state->meta.framebuffer = VK_NULL_HANDLE;
23647ec681f3Smrg   state->meta.pass = VK_NULL_HANDLE;
23657ec681f3Smrg   state->meta.subpass_idx = -1;
23667ec681f3Smrg   state->meta.has_descriptor_state = false;
23677ec681f3Smrg}
23687ec681f3Smrg
23697ec681f3Smrgstatic struct v3dv_job *
23707ec681f3Smrgcmd_buffer_pre_draw_split_job(struct v3dv_cmd_buffer *cmd_buffer)
23717ec681f3Smrg{
23727ec681f3Smrg   struct v3dv_job *job = cmd_buffer->state.job;
23737ec681f3Smrg   assert(job);
23747ec681f3Smrg
23757ec681f3Smrg   /* If the job has been flagged with 'always_flush' and it has already
23767ec681f3Smrg    * recorded any draw calls then we need to start a new job for it.
23777ec681f3Smrg    */
23787ec681f3Smrg   if (job->always_flush && job->draw_count > 0) {
23797ec681f3Smrg      assert(cmd_buffer->state.pass);
23807ec681f3Smrg      /* First, flag the current job as not being the last in the
23817ec681f3Smrg       * current subpass
23827ec681f3Smrg       */
23837ec681f3Smrg      job->is_subpass_finish = false;
23847ec681f3Smrg
23857ec681f3Smrg      /* Now start a new job in the same subpass and flag it as continuing
23867ec681f3Smrg       * the current subpass.
23877ec681f3Smrg       */
23887ec681f3Smrg      job = v3dv_cmd_buffer_subpass_resume(cmd_buffer,
23897ec681f3Smrg                                           cmd_buffer->state.subpass_idx);
23907ec681f3Smrg      assert(job->draw_count == 0);
23917ec681f3Smrg
23927ec681f3Smrg      /* Inherit the 'always flush' behavior */
23937ec681f3Smrg      job->always_flush = true;
23947ec681f3Smrg   }
23957ec681f3Smrg
23967ec681f3Smrg   assert(job->draw_count == 0 || !job->always_flush);
23977ec681f3Smrg   return job;
23987ec681f3Smrg}
23997ec681f3Smrg
24007ec681f3Smrg/**
24017ec681f3Smrg * The Vulkan spec states:
24027ec681f3Smrg *
24037ec681f3Smrg *   "It is legal for a subpass to use no color or depth/stencil
24047ec681f3Smrg *    attachments (...)  This kind of subpass can use shader side effects such
24057ec681f3Smrg *    as image stores and atomics to produce an output. In this case, the
24067ec681f3Smrg *    subpass continues to use the width, height, and layers of the framebuffer
24077ec681f3Smrg *    to define the dimensions of the rendering area, and the
24087ec681f3Smrg *    rasterizationSamples from each pipeline’s
24097ec681f3Smrg *    VkPipelineMultisampleStateCreateInfo to define the number of samples used
24107ec681f3Smrg *    in rasterization."
24117ec681f3Smrg *
24127ec681f3Smrg * We need to enable MSAA in the TILE_BINNING_MODE_CFG packet, which we
24137ec681f3Smrg * emit when we start a new frame at the begining of a subpass. At that point,
24147ec681f3Smrg * if the framebuffer doesn't have any attachments we won't enable MSAA and
24157ec681f3Smrg * the job won't be valid in the scenario described by the spec.
24167ec681f3Smrg *
24177ec681f3Smrg * This function is intended to be called before a draw call and will test if
24187ec681f3Smrg * we are in that scenario, in which case, it will restart the current job
24197ec681f3Smrg * with MSAA enabled.
24207ec681f3Smrg */
24217ec681f3Smrgstatic void
24227ec681f3Smrgcmd_buffer_restart_job_for_msaa_if_needed(struct v3dv_cmd_buffer *cmd_buffer)
24237ec681f3Smrg{
24247ec681f3Smrg   assert(cmd_buffer->state.job);
24257ec681f3Smrg
24267ec681f3Smrg   /* We don't support variableMultisampleRate so we know that all pipelines
24277ec681f3Smrg    * bound in the same subpass must have matching number of samples, so we
24287ec681f3Smrg    * can do this check only on the first draw call.
24297ec681f3Smrg    */
24307ec681f3Smrg   if (cmd_buffer->state.job->draw_count > 0)
24317ec681f3Smrg      return;
24327ec681f3Smrg
24337ec681f3Smrg   /* We only need to restart the frame if the pipeline requires MSAA but
24347ec681f3Smrg    * our frame tiling didn't enable it.
24357ec681f3Smrg    */
24367ec681f3Smrg   if (!cmd_buffer->state.gfx.pipeline->msaa ||
24377ec681f3Smrg       cmd_buffer->state.job->frame_tiling.msaa) {
24387ec681f3Smrg      return;
24397ec681f3Smrg   }
24407ec681f3Smrg
24417ec681f3Smrg   /* FIXME: Secondary command buffers don't start frames. Instead, they are
24427ec681f3Smrg    * recorded into primary jobs that start them. For secondaries, we should
24437ec681f3Smrg    * still handle this scenario, but we should do that when we record them
24447ec681f3Smrg    * into primaries by testing if any of the secondaries has multisampled
24457ec681f3Smrg    * draw calls in them, and then using that info to decide if we need to
24467ec681f3Smrg    * restart the primary job into which they are being recorded.
24477ec681f3Smrg    */
24487ec681f3Smrg   if (cmd_buffer->level != VK_COMMAND_BUFFER_LEVEL_PRIMARY)
24497ec681f3Smrg      return;
24507ec681f3Smrg
24517ec681f3Smrg   /* Drop the current job and restart it with MSAA enabled */
24527ec681f3Smrg   struct v3dv_job *old_job = cmd_buffer->state.job;
24537ec681f3Smrg   cmd_buffer->state.job = NULL;
24547ec681f3Smrg
24557ec681f3Smrg   struct v3dv_job *job = vk_zalloc(&cmd_buffer->device->vk.alloc,
24567ec681f3Smrg                                    sizeof(struct v3dv_job), 8,
24577ec681f3Smrg                                    VK_SYSTEM_ALLOCATION_SCOPE_COMMAND);
24587ec681f3Smrg   if (!job) {
24597ec681f3Smrg      v3dv_flag_oom(cmd_buffer, NULL);
24607ec681f3Smrg      return;
24617ec681f3Smrg   }
24627ec681f3Smrg
24637ec681f3Smrg   v3dv_job_init(job, V3DV_JOB_TYPE_GPU_CL, cmd_buffer->device, cmd_buffer,
24647ec681f3Smrg                 cmd_buffer->state.subpass_idx);
24657ec681f3Smrg   cmd_buffer->state.job = job;
24667ec681f3Smrg
24677ec681f3Smrg   v3dv_job_start_frame(job,
24687ec681f3Smrg                        old_job->frame_tiling.width,
24697ec681f3Smrg                        old_job->frame_tiling.height,
24707ec681f3Smrg                        old_job->frame_tiling.layers,
24717ec681f3Smrg                        true,
24727ec681f3Smrg                        old_job->frame_tiling.render_target_count,
24737ec681f3Smrg                        old_job->frame_tiling.internal_bpp,
24747ec681f3Smrg                        true /* msaa */);
24757ec681f3Smrg
24767ec681f3Smrg   v3dv_job_destroy(old_job);
24777ec681f3Smrg}
24787ec681f3Smrg
24797ec681f3Smrgvoid
24807ec681f3Smrgv3dv_cmd_buffer_emit_pre_draw(struct v3dv_cmd_buffer *cmd_buffer)
24817ec681f3Smrg{
24827ec681f3Smrg   assert(cmd_buffer->state.gfx.pipeline);
24837ec681f3Smrg   assert(!(cmd_buffer->state.gfx.pipeline->active_stages & VK_SHADER_STAGE_COMPUTE_BIT));
24847ec681f3Smrg
24857ec681f3Smrg   /* If we emitted a pipeline barrier right before this draw we won't have
24867ec681f3Smrg    * an active job. In that case, create a new job continuing the current
24877ec681f3Smrg    * subpass.
24887ec681f3Smrg    */
24897ec681f3Smrg   if (!cmd_buffer->state.job) {
24907ec681f3Smrg      v3dv_cmd_buffer_subpass_resume(cmd_buffer,
24917ec681f3Smrg                                     cmd_buffer->state.subpass_idx);
24927ec681f3Smrg   }
24937ec681f3Smrg
24947ec681f3Smrg   /* Restart single sample job for MSAA pipeline if needed */
24957ec681f3Smrg   cmd_buffer_restart_job_for_msaa_if_needed(cmd_buffer);
24967ec681f3Smrg
24977ec681f3Smrg   /* If the job is configured to flush on every draw call we need to create
24987ec681f3Smrg    * a new job now.
24997ec681f3Smrg    */
25007ec681f3Smrg   struct v3dv_job *job = cmd_buffer_pre_draw_split_job(cmd_buffer);
25017ec681f3Smrg   job->draw_count++;
25027ec681f3Smrg
25037ec681f3Smrg   /* GL shader state binds shaders, uniform and vertex attribute state. The
25047ec681f3Smrg    * compiler injects uniforms to handle some descriptor types (such as
25057ec681f3Smrg    * textures), so we need to regen that when descriptor state changes.
25067ec681f3Smrg    *
25077ec681f3Smrg    * We also need to emit new shader state if we have a dirty viewport since
25087ec681f3Smrg    * that will require that we new uniform state for QUNIFORM_VIEWPORT_*.
25097ec681f3Smrg    */
25107ec681f3Smrg   uint32_t *dirty = &cmd_buffer->state.dirty;
25117ec681f3Smrg
25127ec681f3Smrg   const uint32_t dirty_uniform_state =
25137ec681f3Smrg      *dirty & (V3DV_CMD_DIRTY_PIPELINE |
25147ec681f3Smrg                V3DV_CMD_DIRTY_PUSH_CONSTANTS |
25157ec681f3Smrg                V3DV_CMD_DIRTY_DESCRIPTOR_SETS |
25167ec681f3Smrg                V3DV_CMD_DIRTY_VIEWPORT |
25177ec681f3Smrg                V3DV_CMD_DIRTY_VIEW_INDEX);
25187ec681f3Smrg
25197ec681f3Smrg   if (dirty_uniform_state)
25207ec681f3Smrg      update_gfx_uniform_state(cmd_buffer, dirty_uniform_state);
25217ec681f3Smrg
25227ec681f3Smrg   struct v3dv_device *device = cmd_buffer->device;
25237ec681f3Smrg
25247ec681f3Smrg   if (dirty_uniform_state || (*dirty & V3DV_CMD_DIRTY_VERTEX_BUFFER))
25257ec681f3Smrg      v3dv_X(device, cmd_buffer_emit_gl_shader_state)(cmd_buffer);
25267ec681f3Smrg
25277ec681f3Smrg   if (*dirty & (V3DV_CMD_DIRTY_PIPELINE)) {
25287ec681f3Smrg      v3dv_X(device, cmd_buffer_emit_configuration_bits)(cmd_buffer);
25297ec681f3Smrg      v3dv_X(device, cmd_buffer_emit_varyings_state)(cmd_buffer);
25307ec681f3Smrg   }
25317ec681f3Smrg
25327ec681f3Smrg   if (*dirty & (V3DV_CMD_DIRTY_VIEWPORT | V3DV_CMD_DIRTY_SCISSOR)) {
25337ec681f3Smrg      emit_scissor(cmd_buffer);
25347ec681f3Smrg   }
25357ec681f3Smrg
25367ec681f3Smrg   if (*dirty & V3DV_CMD_DIRTY_VIEWPORT) {
25377ec681f3Smrg      v3dv_X(device, cmd_buffer_emit_viewport)(cmd_buffer);
25387ec681f3Smrg   }
25397ec681f3Smrg
25407ec681f3Smrg   if (*dirty & V3DV_CMD_DIRTY_INDEX_BUFFER)
25417ec681f3Smrg      v3dv_X(device, cmd_buffer_emit_index_buffer)(cmd_buffer);
25427ec681f3Smrg
25437ec681f3Smrg   const uint32_t dynamic_stencil_dirty_flags =
25447ec681f3Smrg      V3DV_CMD_DIRTY_STENCIL_COMPARE_MASK |
25457ec681f3Smrg      V3DV_CMD_DIRTY_STENCIL_WRITE_MASK |
25467ec681f3Smrg      V3DV_CMD_DIRTY_STENCIL_REFERENCE;
25477ec681f3Smrg   if (*dirty & (V3DV_CMD_DIRTY_PIPELINE | dynamic_stencil_dirty_flags))
25487ec681f3Smrg      v3dv_X(device, cmd_buffer_emit_stencil)(cmd_buffer);
25497ec681f3Smrg
25507ec681f3Smrg   if (*dirty & (V3DV_CMD_DIRTY_PIPELINE | V3DV_CMD_DIRTY_DEPTH_BIAS))
25517ec681f3Smrg      v3dv_X(device, cmd_buffer_emit_depth_bias)(cmd_buffer);
25527ec681f3Smrg
25537ec681f3Smrg   if (*dirty & (V3DV_CMD_DIRTY_PIPELINE | V3DV_CMD_DIRTY_BLEND_CONSTANTS))
25547ec681f3Smrg      v3dv_X(device, cmd_buffer_emit_blend)(cmd_buffer);
25557ec681f3Smrg
25567ec681f3Smrg   if (*dirty & V3DV_CMD_DIRTY_OCCLUSION_QUERY)
25577ec681f3Smrg      v3dv_X(device, cmd_buffer_emit_occlusion_query)(cmd_buffer);
25587ec681f3Smrg
25597ec681f3Smrg   if (*dirty & V3DV_CMD_DIRTY_LINE_WIDTH)
25607ec681f3Smrg      v3dv_X(device, cmd_buffer_emit_line_width)(cmd_buffer);
25617ec681f3Smrg
25627ec681f3Smrg   if (*dirty & V3DV_CMD_DIRTY_PIPELINE)
25637ec681f3Smrg      v3dv_X(device, cmd_buffer_emit_sample_state)(cmd_buffer);
25647ec681f3Smrg
25657ec681f3Smrg   if (*dirty & (V3DV_CMD_DIRTY_PIPELINE | V3DV_CMD_DIRTY_COLOR_WRITE_ENABLE))
25667ec681f3Smrg      v3dv_X(device, cmd_buffer_emit_color_write_mask)(cmd_buffer);
25677ec681f3Smrg
25687ec681f3Smrg   cmd_buffer->state.dirty &= ~V3DV_CMD_DIRTY_PIPELINE;
25697ec681f3Smrg}
25707ec681f3Smrg
25717ec681f3Smrgstatic inline void
25727ec681f3Smrgcmd_buffer_set_view_index(struct v3dv_cmd_buffer *cmd_buffer,
25737ec681f3Smrg                          uint32_t view_index)
25747ec681f3Smrg{
25757ec681f3Smrg   cmd_buffer->state.view_index = view_index;
25767ec681f3Smrg   cmd_buffer->state.dirty |= V3DV_CMD_DIRTY_VIEW_INDEX;
25777ec681f3Smrg}
25787ec681f3Smrg
25797ec681f3Smrgstatic void
25807ec681f3Smrgcmd_buffer_draw(struct v3dv_cmd_buffer *cmd_buffer,
25817ec681f3Smrg                struct v3dv_draw_info *info)
25827ec681f3Smrg{
25837ec681f3Smrg
25847ec681f3Smrg   struct v3dv_render_pass *pass = cmd_buffer->state.pass;
25857ec681f3Smrg   if (likely(!pass->multiview_enabled)) {
25867ec681f3Smrg      v3dv_cmd_buffer_emit_pre_draw(cmd_buffer);
25877ec681f3Smrg      v3dv_X(cmd_buffer->device, cmd_buffer_emit_draw)(cmd_buffer, info);
25887ec681f3Smrg      return;
25897ec681f3Smrg   }
25907ec681f3Smrg
25917ec681f3Smrg   uint32_t view_mask = pass->subpasses[cmd_buffer->state.subpass_idx].view_mask;
25927ec681f3Smrg   while (view_mask) {
25937ec681f3Smrg      cmd_buffer_set_view_index(cmd_buffer, u_bit_scan(&view_mask));
25947ec681f3Smrg      v3dv_cmd_buffer_emit_pre_draw(cmd_buffer);
25957ec681f3Smrg      v3dv_X(cmd_buffer->device, cmd_buffer_emit_draw)(cmd_buffer, info);
25967ec681f3Smrg   }
25977ec681f3Smrg}
25987ec681f3Smrg
25997ec681f3SmrgVKAPI_ATTR void VKAPI_CALL
26007ec681f3Smrgv3dv_CmdDraw(VkCommandBuffer commandBuffer,
26017ec681f3Smrg             uint32_t vertexCount,
26027ec681f3Smrg             uint32_t instanceCount,
26037ec681f3Smrg             uint32_t firstVertex,
26047ec681f3Smrg             uint32_t firstInstance)
26057ec681f3Smrg{
26067ec681f3Smrg   if (vertexCount == 0 || instanceCount == 0)
26077ec681f3Smrg      return;
26087ec681f3Smrg
26097ec681f3Smrg   V3DV_FROM_HANDLE(v3dv_cmd_buffer, cmd_buffer, commandBuffer);
26107ec681f3Smrg   struct v3dv_draw_info info = {};
26117ec681f3Smrg   info.vertex_count = vertexCount;
26127ec681f3Smrg   info.instance_count = instanceCount;
26137ec681f3Smrg   info.first_instance = firstInstance;
26147ec681f3Smrg   info.first_vertex = firstVertex;
26157ec681f3Smrg
26167ec681f3Smrg   cmd_buffer_draw(cmd_buffer, &info);
26177ec681f3Smrg}
26187ec681f3Smrg
26197ec681f3SmrgVKAPI_ATTR void VKAPI_CALL
26207ec681f3Smrgv3dv_CmdDrawIndexed(VkCommandBuffer commandBuffer,
26217ec681f3Smrg                    uint32_t indexCount,
26227ec681f3Smrg                    uint32_t instanceCount,
26237ec681f3Smrg                    uint32_t firstIndex,
26247ec681f3Smrg                    int32_t vertexOffset,
26257ec681f3Smrg                    uint32_t firstInstance)
26267ec681f3Smrg{
26277ec681f3Smrg   if (indexCount == 0 || instanceCount == 0)
26287ec681f3Smrg      return;
26297ec681f3Smrg
26307ec681f3Smrg   V3DV_FROM_HANDLE(v3dv_cmd_buffer, cmd_buffer, commandBuffer);
26317ec681f3Smrg
26327ec681f3Smrg   struct v3dv_render_pass *pass = cmd_buffer->state.pass;
26337ec681f3Smrg   if (likely(!pass->multiview_enabled)) {
26347ec681f3Smrg      v3dv_cmd_buffer_emit_pre_draw(cmd_buffer);
26357ec681f3Smrg      v3dv_X(cmd_buffer->device, cmd_buffer_emit_draw_indexed)
26367ec681f3Smrg         (cmd_buffer, indexCount, instanceCount,
26377ec681f3Smrg          firstIndex, vertexOffset, firstInstance);
26387ec681f3Smrg      return;
26397ec681f3Smrg   }
26407ec681f3Smrg
26417ec681f3Smrg   uint32_t view_mask = pass->subpasses[cmd_buffer->state.subpass_idx].view_mask;
26427ec681f3Smrg   while (view_mask) {
26437ec681f3Smrg      cmd_buffer_set_view_index(cmd_buffer, u_bit_scan(&view_mask));
26447ec681f3Smrg      v3dv_cmd_buffer_emit_pre_draw(cmd_buffer);
26457ec681f3Smrg      v3dv_X(cmd_buffer->device, cmd_buffer_emit_draw_indexed)
26467ec681f3Smrg         (cmd_buffer, indexCount, instanceCount,
26477ec681f3Smrg          firstIndex, vertexOffset, firstInstance);
26487ec681f3Smrg   }
26497ec681f3Smrg}
26507ec681f3Smrg
26517ec681f3SmrgVKAPI_ATTR void VKAPI_CALL
26527ec681f3Smrgv3dv_CmdDrawIndirect(VkCommandBuffer commandBuffer,
26537ec681f3Smrg                     VkBuffer _buffer,
26547ec681f3Smrg                     VkDeviceSize offset,
26557ec681f3Smrg                     uint32_t drawCount,
26567ec681f3Smrg                     uint32_t stride)
26577ec681f3Smrg{
26587ec681f3Smrg   /* drawCount is the number of draws to execute, and can be zero. */
26597ec681f3Smrg   if (drawCount == 0)
26607ec681f3Smrg      return;
26617ec681f3Smrg
26627ec681f3Smrg   V3DV_FROM_HANDLE(v3dv_cmd_buffer, cmd_buffer, commandBuffer);
26637ec681f3Smrg   V3DV_FROM_HANDLE(v3dv_buffer, buffer, _buffer);
26647ec681f3Smrg
26657ec681f3Smrg   struct v3dv_render_pass *pass = cmd_buffer->state.pass;
26667ec681f3Smrg   if (likely(!pass->multiview_enabled)) {
26677ec681f3Smrg      v3dv_cmd_buffer_emit_pre_draw(cmd_buffer);
26687ec681f3Smrg      v3dv_X(cmd_buffer->device, cmd_buffer_emit_draw_indirect)
26697ec681f3Smrg         (cmd_buffer, buffer, offset, drawCount, stride);
26707ec681f3Smrg      return;
26717ec681f3Smrg   }
26727ec681f3Smrg
26737ec681f3Smrg   uint32_t view_mask = pass->subpasses[cmd_buffer->state.subpass_idx].view_mask;
26747ec681f3Smrg   while (view_mask) {
26757ec681f3Smrg      cmd_buffer_set_view_index(cmd_buffer, u_bit_scan(&view_mask));
26767ec681f3Smrg      v3dv_cmd_buffer_emit_pre_draw(cmd_buffer);
26777ec681f3Smrg      v3dv_X(cmd_buffer->device, cmd_buffer_emit_draw_indirect)
26787ec681f3Smrg         (cmd_buffer, buffer, offset, drawCount, stride);
26797ec681f3Smrg   }
26807ec681f3Smrg}
26817ec681f3Smrg
26827ec681f3SmrgVKAPI_ATTR void VKAPI_CALL
26837ec681f3Smrgv3dv_CmdDrawIndexedIndirect(VkCommandBuffer commandBuffer,
26847ec681f3Smrg                            VkBuffer _buffer,
26857ec681f3Smrg                            VkDeviceSize offset,
26867ec681f3Smrg                            uint32_t drawCount,
26877ec681f3Smrg                            uint32_t stride)
26887ec681f3Smrg{
26897ec681f3Smrg   /* drawCount is the number of draws to execute, and can be zero. */
26907ec681f3Smrg   if (drawCount == 0)
26917ec681f3Smrg      return;
26927ec681f3Smrg
26937ec681f3Smrg   V3DV_FROM_HANDLE(v3dv_cmd_buffer, cmd_buffer, commandBuffer);
26947ec681f3Smrg   V3DV_FROM_HANDLE(v3dv_buffer, buffer, _buffer);
26957ec681f3Smrg
26967ec681f3Smrg   struct v3dv_render_pass *pass = cmd_buffer->state.pass;
26977ec681f3Smrg   if (likely(!pass->multiview_enabled)) {
26987ec681f3Smrg      v3dv_cmd_buffer_emit_pre_draw(cmd_buffer);
26997ec681f3Smrg      v3dv_X(cmd_buffer->device, cmd_buffer_emit_indexed_indirect)
27007ec681f3Smrg         (cmd_buffer, buffer, offset, drawCount, stride);
27017ec681f3Smrg      return;
27027ec681f3Smrg   }
27037ec681f3Smrg
27047ec681f3Smrg   uint32_t view_mask = pass->subpasses[cmd_buffer->state.subpass_idx].view_mask;
27057ec681f3Smrg   while (view_mask) {
27067ec681f3Smrg      cmd_buffer_set_view_index(cmd_buffer, u_bit_scan(&view_mask));
27077ec681f3Smrg      v3dv_cmd_buffer_emit_pre_draw(cmd_buffer);
27087ec681f3Smrg      v3dv_X(cmd_buffer->device, cmd_buffer_emit_indexed_indirect)
27097ec681f3Smrg         (cmd_buffer, buffer, offset, drawCount, stride);
27107ec681f3Smrg   }
27117ec681f3Smrg}
27127ec681f3Smrg
27137ec681f3SmrgVKAPI_ATTR void VKAPI_CALL
27147ec681f3Smrgv3dv_CmdPipelineBarrier(VkCommandBuffer commandBuffer,
27157ec681f3Smrg                        VkPipelineStageFlags srcStageMask,
27167ec681f3Smrg                        VkPipelineStageFlags dstStageMask,
27177ec681f3Smrg                        VkDependencyFlags dependencyFlags,
27187ec681f3Smrg                        uint32_t memoryBarrierCount,
27197ec681f3Smrg                        const VkMemoryBarrier *pMemoryBarriers,
27207ec681f3Smrg                        uint32_t bufferBarrierCount,
27217ec681f3Smrg                        const VkBufferMemoryBarrier *pBufferBarriers,
27227ec681f3Smrg                        uint32_t imageBarrierCount,
27237ec681f3Smrg                        const VkImageMemoryBarrier *pImageBarriers)
27247ec681f3Smrg{
27257ec681f3Smrg   V3DV_FROM_HANDLE(v3dv_cmd_buffer, cmd_buffer, commandBuffer);
27267ec681f3Smrg
27277ec681f3Smrg   /* We only care about barriers between GPU jobs */
27287ec681f3Smrg   if (srcStageMask == VK_PIPELINE_STAGE_HOST_BIT ||
27297ec681f3Smrg       dstStageMask == VK_PIPELINE_STAGE_HOST_BIT) {
27307ec681f3Smrg      return;
27317ec681f3Smrg   }
27327ec681f3Smrg
27337ec681f3Smrg   /* If we have a recording job, finish it here */
27347ec681f3Smrg   struct v3dv_job *job = cmd_buffer->state.job;
27357ec681f3Smrg   if (job)
27367ec681f3Smrg      v3dv_cmd_buffer_finish_job(cmd_buffer);
27377ec681f3Smrg
27387ec681f3Smrg   cmd_buffer->state.has_barrier = true;
27397ec681f3Smrg   if (dstStageMask & (VK_PIPELINE_STAGE_VERTEX_INPUT_BIT |
27407ec681f3Smrg                       VK_PIPELINE_STAGE_VERTEX_SHADER_BIT |
27417ec681f3Smrg                       VK_PIPELINE_STAGE_GEOMETRY_SHADER_BIT |
27427ec681f3Smrg                       VK_PIPELINE_STAGE_TESSELLATION_CONTROL_SHADER_BIT |
27437ec681f3Smrg                       VK_PIPELINE_STAGE_TESSELLATION_EVALUATION_SHADER_BIT |
27447ec681f3Smrg                       VK_PIPELINE_STAGE_DRAW_INDIRECT_BIT)) {
27457ec681f3Smrg      cmd_buffer->state.has_bcl_barrier = true;
27467ec681f3Smrg   }
27477ec681f3Smrg}
27487ec681f3Smrg
27497ec681f3SmrgVKAPI_ATTR void VKAPI_CALL
27507ec681f3Smrgv3dv_CmdBindVertexBuffers(VkCommandBuffer commandBuffer,
27517ec681f3Smrg                          uint32_t firstBinding,
27527ec681f3Smrg                          uint32_t bindingCount,
27537ec681f3Smrg                          const VkBuffer *pBuffers,
27547ec681f3Smrg                          const VkDeviceSize *pOffsets)
27557ec681f3Smrg{
27567ec681f3Smrg   V3DV_FROM_HANDLE(v3dv_cmd_buffer, cmd_buffer, commandBuffer);
27577ec681f3Smrg   struct v3dv_vertex_binding *vb = cmd_buffer->state.vertex_bindings;
27587ec681f3Smrg
27597ec681f3Smrg   /* We have to defer setting up vertex buffer since we need the buffer
27607ec681f3Smrg    * stride from the pipeline.
27617ec681f3Smrg    */
27627ec681f3Smrg
27637ec681f3Smrg   assert(firstBinding + bindingCount <= MAX_VBS);
27647ec681f3Smrg   bool vb_state_changed = false;
27657ec681f3Smrg   for (uint32_t i = 0; i < bindingCount; i++) {
27667ec681f3Smrg      if (vb[firstBinding + i].buffer != v3dv_buffer_from_handle(pBuffers[i])) {
27677ec681f3Smrg         vb[firstBinding + i].buffer = v3dv_buffer_from_handle(pBuffers[i]);
27687ec681f3Smrg         vb_state_changed = true;
27697ec681f3Smrg      }
27707ec681f3Smrg      if (vb[firstBinding + i].offset != pOffsets[i]) {
27717ec681f3Smrg         vb[firstBinding + i].offset = pOffsets[i];
27727ec681f3Smrg         vb_state_changed = true;
27737ec681f3Smrg      }
27747ec681f3Smrg   }
27757ec681f3Smrg
27767ec681f3Smrg   if (vb_state_changed)
27777ec681f3Smrg      cmd_buffer->state.dirty |= V3DV_CMD_DIRTY_VERTEX_BUFFER;
27787ec681f3Smrg}
27797ec681f3Smrg
27807ec681f3Smrgstatic uint32_t
27817ec681f3Smrgget_index_size(VkIndexType index_type)
27827ec681f3Smrg{
27837ec681f3Smrg   switch (index_type) {
27847ec681f3Smrg   case VK_INDEX_TYPE_UINT8_EXT:
27857ec681f3Smrg      return 1;
27867ec681f3Smrg      break;
27877ec681f3Smrg   case VK_INDEX_TYPE_UINT16:
27887ec681f3Smrg      return 2;
27897ec681f3Smrg      break;
27907ec681f3Smrg   case VK_INDEX_TYPE_UINT32:
27917ec681f3Smrg      return 4;
27927ec681f3Smrg      break;
27937ec681f3Smrg   default:
27947ec681f3Smrg      unreachable("Unsupported index type");
27957ec681f3Smrg   }
27967ec681f3Smrg}
27977ec681f3Smrg
27987ec681f3SmrgVKAPI_ATTR void VKAPI_CALL
27997ec681f3Smrgv3dv_CmdBindIndexBuffer(VkCommandBuffer commandBuffer,
28007ec681f3Smrg                        VkBuffer buffer,
28017ec681f3Smrg                        VkDeviceSize offset,
28027ec681f3Smrg                        VkIndexType indexType)
28037ec681f3Smrg{
28047ec681f3Smrg   V3DV_FROM_HANDLE(v3dv_cmd_buffer, cmd_buffer, commandBuffer);
28057ec681f3Smrg
28067ec681f3Smrg   const uint32_t index_size = get_index_size(indexType);
28077ec681f3Smrg   if (buffer == cmd_buffer->state.index_buffer.buffer &&
28087ec681f3Smrg       offset == cmd_buffer->state.index_buffer.offset &&
28097ec681f3Smrg       index_size == cmd_buffer->state.index_buffer.index_size) {
28107ec681f3Smrg      return;
28117ec681f3Smrg   }
28127ec681f3Smrg
28137ec681f3Smrg   cmd_buffer->state.index_buffer.buffer = buffer;
28147ec681f3Smrg   cmd_buffer->state.index_buffer.offset = offset;
28157ec681f3Smrg   cmd_buffer->state.index_buffer.index_size = index_size;
28167ec681f3Smrg   cmd_buffer->state.dirty |= V3DV_CMD_DIRTY_INDEX_BUFFER;
28177ec681f3Smrg}
28187ec681f3Smrg
28197ec681f3SmrgVKAPI_ATTR void VKAPI_CALL
28207ec681f3Smrgv3dv_CmdSetStencilCompareMask(VkCommandBuffer commandBuffer,
28217ec681f3Smrg                              VkStencilFaceFlags faceMask,
28227ec681f3Smrg                              uint32_t compareMask)
28237ec681f3Smrg{
28247ec681f3Smrg   V3DV_FROM_HANDLE(v3dv_cmd_buffer, cmd_buffer, commandBuffer);
28257ec681f3Smrg
28267ec681f3Smrg   if (faceMask & VK_STENCIL_FACE_FRONT_BIT)
28277ec681f3Smrg      cmd_buffer->state.dynamic.stencil_compare_mask.front = compareMask & 0xff;
28287ec681f3Smrg   if (faceMask & VK_STENCIL_FACE_BACK_BIT)
28297ec681f3Smrg      cmd_buffer->state.dynamic.stencil_compare_mask.back = compareMask & 0xff;
28307ec681f3Smrg
28317ec681f3Smrg   cmd_buffer->state.dirty |= V3DV_CMD_DIRTY_STENCIL_COMPARE_MASK;
28327ec681f3Smrg}
28337ec681f3Smrg
28347ec681f3SmrgVKAPI_ATTR void VKAPI_CALL
28357ec681f3Smrgv3dv_CmdSetStencilWriteMask(VkCommandBuffer commandBuffer,
28367ec681f3Smrg                            VkStencilFaceFlags faceMask,
28377ec681f3Smrg                            uint32_t writeMask)
28387ec681f3Smrg{
28397ec681f3Smrg   V3DV_FROM_HANDLE(v3dv_cmd_buffer, cmd_buffer, commandBuffer);
28407ec681f3Smrg
28417ec681f3Smrg   if (faceMask & VK_STENCIL_FACE_FRONT_BIT)
28427ec681f3Smrg      cmd_buffer->state.dynamic.stencil_write_mask.front = writeMask & 0xff;
28437ec681f3Smrg   if (faceMask & VK_STENCIL_FACE_BACK_BIT)
28447ec681f3Smrg      cmd_buffer->state.dynamic.stencil_write_mask.back = writeMask & 0xff;
28457ec681f3Smrg
28467ec681f3Smrg   cmd_buffer->state.dirty |= V3DV_CMD_DIRTY_STENCIL_WRITE_MASK;
28477ec681f3Smrg}
28487ec681f3Smrg
28497ec681f3SmrgVKAPI_ATTR void VKAPI_CALL
28507ec681f3Smrgv3dv_CmdSetStencilReference(VkCommandBuffer commandBuffer,
28517ec681f3Smrg                            VkStencilFaceFlags faceMask,
28527ec681f3Smrg                            uint32_t reference)
28537ec681f3Smrg{
28547ec681f3Smrg   V3DV_FROM_HANDLE(v3dv_cmd_buffer, cmd_buffer, commandBuffer);
28557ec681f3Smrg
28567ec681f3Smrg   if (faceMask & VK_STENCIL_FACE_FRONT_BIT)
28577ec681f3Smrg      cmd_buffer->state.dynamic.stencil_reference.front = reference & 0xff;
28587ec681f3Smrg   if (faceMask & VK_STENCIL_FACE_BACK_BIT)
28597ec681f3Smrg      cmd_buffer->state.dynamic.stencil_reference.back = reference & 0xff;
28607ec681f3Smrg
28617ec681f3Smrg   cmd_buffer->state.dirty |= V3DV_CMD_DIRTY_STENCIL_REFERENCE;
28627ec681f3Smrg}
28637ec681f3Smrg
28647ec681f3SmrgVKAPI_ATTR void VKAPI_CALL
28657ec681f3Smrgv3dv_CmdSetDepthBias(VkCommandBuffer commandBuffer,
28667ec681f3Smrg                     float depthBiasConstantFactor,
28677ec681f3Smrg                     float depthBiasClamp,
28687ec681f3Smrg                     float depthBiasSlopeFactor)
28697ec681f3Smrg{
28707ec681f3Smrg   V3DV_FROM_HANDLE(v3dv_cmd_buffer, cmd_buffer, commandBuffer);
28717ec681f3Smrg
28727ec681f3Smrg   cmd_buffer->state.dynamic.depth_bias.constant_factor = depthBiasConstantFactor;
28737ec681f3Smrg   cmd_buffer->state.dynamic.depth_bias.depth_bias_clamp = depthBiasClamp;
28747ec681f3Smrg   cmd_buffer->state.dynamic.depth_bias.slope_factor = depthBiasSlopeFactor;
28757ec681f3Smrg   cmd_buffer->state.dirty |= V3DV_CMD_DIRTY_DEPTH_BIAS;
28767ec681f3Smrg}
28777ec681f3Smrg
28787ec681f3SmrgVKAPI_ATTR void VKAPI_CALL
28797ec681f3Smrgv3dv_CmdSetDepthBounds(VkCommandBuffer commandBuffer,
28807ec681f3Smrg                       float minDepthBounds,
28817ec681f3Smrg                       float maxDepthBounds)
28827ec681f3Smrg{
28837ec681f3Smrg   /* We do not support depth bounds testing so we just ingore this. We are
28847ec681f3Smrg    * already asserting that pipelines don't enable the feature anyway.
28857ec681f3Smrg    */
28867ec681f3Smrg}
28877ec681f3Smrg
28887ec681f3SmrgVKAPI_ATTR void VKAPI_CALL
28897ec681f3Smrgv3dv_CmdSetLineWidth(VkCommandBuffer commandBuffer,
28907ec681f3Smrg                     float lineWidth)
28917ec681f3Smrg{
28927ec681f3Smrg   V3DV_FROM_HANDLE(v3dv_cmd_buffer, cmd_buffer, commandBuffer);
28937ec681f3Smrg
28947ec681f3Smrg   cmd_buffer->state.dynamic.line_width = lineWidth;
28957ec681f3Smrg   cmd_buffer->state.dirty |= V3DV_CMD_DIRTY_LINE_WIDTH;
28967ec681f3Smrg}
28977ec681f3Smrg
28987ec681f3SmrgVKAPI_ATTR void VKAPI_CALL
28997ec681f3Smrgv3dv_CmdBindDescriptorSets(VkCommandBuffer commandBuffer,
29007ec681f3Smrg                           VkPipelineBindPoint pipelineBindPoint,
29017ec681f3Smrg                           VkPipelineLayout _layout,
29027ec681f3Smrg                           uint32_t firstSet,
29037ec681f3Smrg                           uint32_t descriptorSetCount,
29047ec681f3Smrg                           const VkDescriptorSet *pDescriptorSets,
29057ec681f3Smrg                           uint32_t dynamicOffsetCount,
29067ec681f3Smrg                           const uint32_t *pDynamicOffsets)
29077ec681f3Smrg{
29087ec681f3Smrg   V3DV_FROM_HANDLE(v3dv_cmd_buffer, cmd_buffer, commandBuffer);
29097ec681f3Smrg   V3DV_FROM_HANDLE(v3dv_pipeline_layout, layout, _layout);
29107ec681f3Smrg
29117ec681f3Smrg   uint32_t dyn_index = 0;
29127ec681f3Smrg
29137ec681f3Smrg   assert(firstSet + descriptorSetCount <= MAX_SETS);
29147ec681f3Smrg
29157ec681f3Smrg   struct v3dv_descriptor_state *descriptor_state =
29167ec681f3Smrg      pipelineBindPoint == VK_PIPELINE_BIND_POINT_COMPUTE ?
29177ec681f3Smrg      &cmd_buffer->state.compute.descriptor_state :
29187ec681f3Smrg      &cmd_buffer->state.gfx.descriptor_state;
29197ec681f3Smrg
29207ec681f3Smrg   VkShaderStageFlags dirty_stages = 0;
29217ec681f3Smrg   bool descriptor_state_changed = false;
29227ec681f3Smrg   for (uint32_t i = 0; i < descriptorSetCount; i++) {
29237ec681f3Smrg      V3DV_FROM_HANDLE(v3dv_descriptor_set, set, pDescriptorSets[i]);
29247ec681f3Smrg      uint32_t index = firstSet + i;
29257ec681f3Smrg
29267ec681f3Smrg      descriptor_state->valid |= (1u << index);
29277ec681f3Smrg      if (descriptor_state->descriptor_sets[index] != set) {
29287ec681f3Smrg         descriptor_state->descriptor_sets[index] = set;
29297ec681f3Smrg         dirty_stages |= set->layout->shader_stages;
29307ec681f3Smrg         descriptor_state_changed = true;
29317ec681f3Smrg      }
29327ec681f3Smrg
29337ec681f3Smrg      for (uint32_t j = 0; j < set->layout->dynamic_offset_count; j++, dyn_index++) {
29347ec681f3Smrg         uint32_t idx = j + layout->set[i + firstSet].dynamic_offset_start;
29357ec681f3Smrg
29367ec681f3Smrg         if (descriptor_state->dynamic_offsets[idx] != pDynamicOffsets[dyn_index]) {
29377ec681f3Smrg            descriptor_state->dynamic_offsets[idx] = pDynamicOffsets[dyn_index];
29387ec681f3Smrg            dirty_stages |= set->layout->shader_stages;
29397ec681f3Smrg            descriptor_state_changed = true;
29407ec681f3Smrg         }
29417ec681f3Smrg      }
29427ec681f3Smrg   }
29437ec681f3Smrg
29447ec681f3Smrg   if (descriptor_state_changed) {
29457ec681f3Smrg      if (pipelineBindPoint == VK_PIPELINE_BIND_POINT_GRAPHICS) {
29467ec681f3Smrg         cmd_buffer->state.dirty |= V3DV_CMD_DIRTY_DESCRIPTOR_SETS;
29477ec681f3Smrg         cmd_buffer->state.dirty_descriptor_stages |= dirty_stages & VK_SHADER_STAGE_ALL_GRAPHICS;
29487ec681f3Smrg      } else {
29497ec681f3Smrg         cmd_buffer->state.dirty |= V3DV_CMD_DIRTY_COMPUTE_DESCRIPTOR_SETS;
29507ec681f3Smrg         cmd_buffer->state.dirty_descriptor_stages |= VK_SHADER_STAGE_COMPUTE_BIT;
29517ec681f3Smrg      }
29527ec681f3Smrg   }
29537ec681f3Smrg}
29547ec681f3Smrg
29557ec681f3SmrgVKAPI_ATTR void VKAPI_CALL
29567ec681f3Smrgv3dv_CmdPushConstants(VkCommandBuffer commandBuffer,
29577ec681f3Smrg                      VkPipelineLayout layout,
29587ec681f3Smrg                      VkShaderStageFlags stageFlags,
29597ec681f3Smrg                      uint32_t offset,
29607ec681f3Smrg                      uint32_t size,
29617ec681f3Smrg                      const void *pValues)
29627ec681f3Smrg{
29637ec681f3Smrg   V3DV_FROM_HANDLE(v3dv_cmd_buffer, cmd_buffer, commandBuffer);
29647ec681f3Smrg
29657ec681f3Smrg   if (!memcmp((uint8_t *) cmd_buffer->push_constants_data + offset, pValues, size))
29667ec681f3Smrg      return;
29677ec681f3Smrg
29687ec681f3Smrg   memcpy((uint8_t *) cmd_buffer->push_constants_data + offset, pValues, size);
29697ec681f3Smrg
29707ec681f3Smrg   cmd_buffer->state.dirty |= V3DV_CMD_DIRTY_PUSH_CONSTANTS;
29717ec681f3Smrg   cmd_buffer->state.dirty_push_constants_stages |= stageFlags;
29727ec681f3Smrg}
29737ec681f3Smrg
29747ec681f3SmrgVKAPI_ATTR void VKAPI_CALL
29757ec681f3Smrgv3dv_CmdSetBlendConstants(VkCommandBuffer commandBuffer,
29767ec681f3Smrg                          const float blendConstants[4])
29777ec681f3Smrg{
29787ec681f3Smrg   V3DV_FROM_HANDLE(v3dv_cmd_buffer, cmd_buffer, commandBuffer);
29797ec681f3Smrg   struct v3dv_cmd_buffer_state *state = &cmd_buffer->state;
29807ec681f3Smrg
29817ec681f3Smrg   if (!memcmp(state->dynamic.blend_constants, blendConstants,
29827ec681f3Smrg               sizeof(state->dynamic.blend_constants))) {
29837ec681f3Smrg      return;
29847ec681f3Smrg   }
29857ec681f3Smrg
29867ec681f3Smrg   memcpy(state->dynamic.blend_constants, blendConstants,
29877ec681f3Smrg          sizeof(state->dynamic.blend_constants));
29887ec681f3Smrg
29897ec681f3Smrg   cmd_buffer->state.dirty |= V3DV_CMD_DIRTY_BLEND_CONSTANTS;
29907ec681f3Smrg}
29917ec681f3Smrg
29927ec681f3SmrgVKAPI_ATTR void VKAPI_CALL
29937ec681f3Smrgv3dv_CmdSetColorWriteEnableEXT(VkCommandBuffer commandBuffer,
29947ec681f3Smrg                               uint32_t attachmentCount,
29957ec681f3Smrg                               const VkBool32 *pColorWriteEnables)
29967ec681f3Smrg{
29977ec681f3Smrg   V3DV_FROM_HANDLE(v3dv_cmd_buffer, cmd_buffer, commandBuffer);
29987ec681f3Smrg   struct v3dv_cmd_buffer_state *state = &cmd_buffer->state;
29997ec681f3Smrg   uint32_t color_write_enable = 0;
30007ec681f3Smrg
30017ec681f3Smrg   for (uint32_t i = 0; i < attachmentCount; i++)
30027ec681f3Smrg      color_write_enable |= pColorWriteEnables[i] ? (0xfu << (i * 4)) : 0;
30037ec681f3Smrg
30047ec681f3Smrg   if (state->dynamic.color_write_enable == color_write_enable)
30057ec681f3Smrg      return;
30067ec681f3Smrg
30077ec681f3Smrg   state->dynamic.color_write_enable = color_write_enable;
30087ec681f3Smrg
30097ec681f3Smrg   state->dirty |= V3DV_CMD_DIRTY_COLOR_WRITE_ENABLE;
30107ec681f3Smrg}
30117ec681f3Smrg
30127ec681f3Smrgvoid
30137ec681f3Smrgv3dv_cmd_buffer_reset_queries(struct v3dv_cmd_buffer *cmd_buffer,
30147ec681f3Smrg                              struct v3dv_query_pool *pool,
30157ec681f3Smrg                              uint32_t first,
30167ec681f3Smrg                              uint32_t count)
30177ec681f3Smrg{
30187ec681f3Smrg   /* Resets can only happen outside a render pass instance so we should not
30197ec681f3Smrg    * be in the middle of job recording.
30207ec681f3Smrg    */
30217ec681f3Smrg   assert(cmd_buffer->state.pass == NULL);
30227ec681f3Smrg   assert(cmd_buffer->state.job == NULL);
30237ec681f3Smrg
30247ec681f3Smrg   assert(first < pool->query_count);
30257ec681f3Smrg   assert(first + count <= pool->query_count);
30267ec681f3Smrg
30277ec681f3Smrg   struct v3dv_job *job =
30287ec681f3Smrg      v3dv_cmd_buffer_create_cpu_job(cmd_buffer->device,
30297ec681f3Smrg                                     V3DV_JOB_TYPE_CPU_RESET_QUERIES,
30307ec681f3Smrg                                     cmd_buffer, -1);
30317ec681f3Smrg   v3dv_return_if_oom(cmd_buffer, NULL);
30327ec681f3Smrg
30337ec681f3Smrg   job->cpu.query_reset.pool = pool;
30347ec681f3Smrg   job->cpu.query_reset.first = first;
30357ec681f3Smrg   job->cpu.query_reset.count = count;
30367ec681f3Smrg
30377ec681f3Smrg   list_addtail(&job->list_link, &cmd_buffer->jobs);
30387ec681f3Smrg}
30397ec681f3Smrg
30407ec681f3Smrgvoid
30417ec681f3Smrgv3dv_cmd_buffer_ensure_array_state(struct v3dv_cmd_buffer *cmd_buffer,
30427ec681f3Smrg                                   uint32_t slot_size,
30437ec681f3Smrg                                   uint32_t used_count,
30447ec681f3Smrg                                   uint32_t *alloc_count,
30457ec681f3Smrg                                   void **ptr)
30467ec681f3Smrg{
30477ec681f3Smrg   if (used_count >= *alloc_count) {
30487ec681f3Smrg      const uint32_t prev_slot_count = *alloc_count;
30497ec681f3Smrg      void *old_buffer = *ptr;
30507ec681f3Smrg
30517ec681f3Smrg      const uint32_t new_slot_count = MAX2(*alloc_count * 2, 4);
30527ec681f3Smrg      const uint32_t bytes = new_slot_count * slot_size;
30537ec681f3Smrg      *ptr = vk_alloc(&cmd_buffer->device->vk.alloc, bytes, 8,
30547ec681f3Smrg                      VK_SYSTEM_ALLOCATION_SCOPE_COMMAND);
30557ec681f3Smrg      if (*ptr == NULL) {
30567ec681f3Smrg         fprintf(stderr, "Error: failed to allocate CPU buffer for query.\n");
30577ec681f3Smrg         v3dv_flag_oom(cmd_buffer, NULL);
30587ec681f3Smrg         return;
30597ec681f3Smrg      }
30607ec681f3Smrg
30617ec681f3Smrg      memcpy(*ptr, old_buffer, prev_slot_count * slot_size);
30627ec681f3Smrg      *alloc_count = new_slot_count;
30637ec681f3Smrg   }
30647ec681f3Smrg   assert(used_count < *alloc_count);
30657ec681f3Smrg}
30667ec681f3Smrg
30677ec681f3Smrgvoid
30687ec681f3Smrgv3dv_cmd_buffer_begin_query(struct v3dv_cmd_buffer *cmd_buffer,
30697ec681f3Smrg                            struct v3dv_query_pool *pool,
30707ec681f3Smrg                            uint32_t query,
30717ec681f3Smrg                            VkQueryControlFlags flags)
30727ec681f3Smrg{
30737ec681f3Smrg   /* FIXME: we only support one active query for now */
30747ec681f3Smrg   assert(cmd_buffer->state.query.active_query.bo == NULL);
30757ec681f3Smrg   assert(query < pool->query_count);
30767ec681f3Smrg
30777ec681f3Smrg   cmd_buffer->state.query.active_query.bo = pool->queries[query].bo;
30787ec681f3Smrg   cmd_buffer->state.query.active_query.offset = pool->queries[query].offset;
30797ec681f3Smrg   cmd_buffer->state.dirty |= V3DV_CMD_DIRTY_OCCLUSION_QUERY;
30807ec681f3Smrg}
30817ec681f3Smrg
30827ec681f3Smrgvoid
30837ec681f3Smrgv3dv_cmd_buffer_end_query(struct v3dv_cmd_buffer *cmd_buffer,
30847ec681f3Smrg                          struct v3dv_query_pool *pool,
30857ec681f3Smrg                          uint32_t query)
30867ec681f3Smrg{
30877ec681f3Smrg   assert(query < pool->query_count);
30887ec681f3Smrg   assert(cmd_buffer->state.query.active_query.bo != NULL);
30897ec681f3Smrg
30907ec681f3Smrg   if  (cmd_buffer->state.pass) {
30917ec681f3Smrg      /* Queue the EndQuery in the command buffer state, we will create a CPU
30927ec681f3Smrg       * job to flag all of these queries as possibly available right after the
30937ec681f3Smrg       * render pass job in which they have been recorded.
30947ec681f3Smrg       */
30957ec681f3Smrg      struct v3dv_cmd_buffer_state *state = &cmd_buffer->state;
30967ec681f3Smrg      v3dv_cmd_buffer_ensure_array_state(cmd_buffer,
30977ec681f3Smrg                                         sizeof(struct v3dv_end_query_cpu_job_info),
30987ec681f3Smrg                                         state->query.end.used_count,
30997ec681f3Smrg                                         &state->query.end.alloc_count,
31007ec681f3Smrg                                         (void **) &state->query.end.states);
31017ec681f3Smrg      v3dv_return_if_oom(cmd_buffer, NULL);
31027ec681f3Smrg
31037ec681f3Smrg      struct v3dv_end_query_cpu_job_info *info =
31047ec681f3Smrg         &state->query.end.states[state->query.end.used_count++];
31057ec681f3Smrg
31067ec681f3Smrg      info->pool = pool;
31077ec681f3Smrg      info->query = query;
31087ec681f3Smrg
31097ec681f3Smrg      /* From the Vulkan spec:
31107ec681f3Smrg       *
31117ec681f3Smrg       *   "If queries are used while executing a render pass instance that has
31127ec681f3Smrg       *    multiview enabled, the query uses N consecutive query indices in
31137ec681f3Smrg       *    the query pool (starting at query) where N is the number of bits set
31147ec681f3Smrg       *    in the view mask in the subpass the query is used in. How the
31157ec681f3Smrg       *    numerical results of the query are distributed among the queries is
31167ec681f3Smrg       *    implementation-dependent."
31177ec681f3Smrg       *
31187ec681f3Smrg       * In our case, only the first query is used but this means we still need
31197ec681f3Smrg       * to flag the other queries as available so we don't emit errors when
31207ec681f3Smrg       * the applications attempt to retrive values from them.
31217ec681f3Smrg       */
31227ec681f3Smrg      struct v3dv_render_pass *pass = cmd_buffer->state.pass;
31237ec681f3Smrg      if (!pass->multiview_enabled) {
31247ec681f3Smrg         info->count = 1;
31257ec681f3Smrg      } else {
31267ec681f3Smrg         struct v3dv_subpass *subpass = &pass->subpasses[state->subpass_idx];
31277ec681f3Smrg         info->count = util_bitcount(subpass->view_mask);
31287ec681f3Smrg      }
31297ec681f3Smrg   } else {
31307ec681f3Smrg      /* Otherwise, schedule the CPU job immediately */
31317ec681f3Smrg      struct v3dv_job *job =
31327ec681f3Smrg         v3dv_cmd_buffer_create_cpu_job(cmd_buffer->device,
31337ec681f3Smrg                                        V3DV_JOB_TYPE_CPU_END_QUERY,
31347ec681f3Smrg                                        cmd_buffer, -1);
31357ec681f3Smrg      v3dv_return_if_oom(cmd_buffer, NULL);
31367ec681f3Smrg
31377ec681f3Smrg      job->cpu.query_end.pool = pool;
31387ec681f3Smrg      job->cpu.query_end.query = query;
31397ec681f3Smrg
31407ec681f3Smrg      /* Multiview queries cannot cross subpass boundaries */
31417ec681f3Smrg      job->cpu.query_end.count = 1;
31427ec681f3Smrg
31437ec681f3Smrg      list_addtail(&job->list_link, &cmd_buffer->jobs);
31447ec681f3Smrg   }
31457ec681f3Smrg
31467ec681f3Smrg   cmd_buffer->state.query.active_query.bo = NULL;
31477ec681f3Smrg   cmd_buffer->state.dirty |= V3DV_CMD_DIRTY_OCCLUSION_QUERY;
31487ec681f3Smrg}
31497ec681f3Smrg
31507ec681f3Smrgvoid
31517ec681f3Smrgv3dv_cmd_buffer_copy_query_results(struct v3dv_cmd_buffer *cmd_buffer,
31527ec681f3Smrg                                   struct v3dv_query_pool *pool,
31537ec681f3Smrg                                   uint32_t first,
31547ec681f3Smrg                                   uint32_t count,
31557ec681f3Smrg                                   struct v3dv_buffer *dst,
31567ec681f3Smrg                                   uint32_t offset,
31577ec681f3Smrg                                   uint32_t stride,
31587ec681f3Smrg                                   VkQueryResultFlags flags)
31597ec681f3Smrg{
31607ec681f3Smrg   /* Copies can only happen outside a render pass instance so we should not
31617ec681f3Smrg    * be in the middle of job recording.
31627ec681f3Smrg    */
31637ec681f3Smrg   assert(cmd_buffer->state.pass == NULL);
31647ec681f3Smrg   assert(cmd_buffer->state.job == NULL);
31657ec681f3Smrg
31667ec681f3Smrg   assert(first < pool->query_count);
31677ec681f3Smrg   assert(first + count <= pool->query_count);
31687ec681f3Smrg
31697ec681f3Smrg   struct v3dv_job *job =
31707ec681f3Smrg      v3dv_cmd_buffer_create_cpu_job(cmd_buffer->device,
31717ec681f3Smrg                                     V3DV_JOB_TYPE_CPU_COPY_QUERY_RESULTS,
31727ec681f3Smrg                                     cmd_buffer, -1);
31737ec681f3Smrg   v3dv_return_if_oom(cmd_buffer, NULL);
31747ec681f3Smrg
31757ec681f3Smrg   job->cpu.query_copy_results.pool = pool;
31767ec681f3Smrg   job->cpu.query_copy_results.first = first;
31777ec681f3Smrg   job->cpu.query_copy_results.count = count;
31787ec681f3Smrg   job->cpu.query_copy_results.dst = dst;
31797ec681f3Smrg   job->cpu.query_copy_results.offset = offset;
31807ec681f3Smrg   job->cpu.query_copy_results.stride = stride;
31817ec681f3Smrg   job->cpu.query_copy_results.flags = flags;
31827ec681f3Smrg
31837ec681f3Smrg   list_addtail(&job->list_link, &cmd_buffer->jobs);
31847ec681f3Smrg}
31857ec681f3Smrg
31867ec681f3Smrgvoid
31877ec681f3Smrgv3dv_cmd_buffer_add_tfu_job(struct v3dv_cmd_buffer *cmd_buffer,
31887ec681f3Smrg                            struct drm_v3d_submit_tfu *tfu)
31897ec681f3Smrg{
31907ec681f3Smrg   struct v3dv_device *device = cmd_buffer->device;
31917ec681f3Smrg   struct v3dv_job *job = vk_zalloc(&device->vk.alloc,
31927ec681f3Smrg                                    sizeof(struct v3dv_job), 8,
31937ec681f3Smrg                                    VK_SYSTEM_ALLOCATION_SCOPE_COMMAND);
31947ec681f3Smrg   if (!job) {
31957ec681f3Smrg      v3dv_flag_oom(cmd_buffer, NULL);
31967ec681f3Smrg      return;
31977ec681f3Smrg   }
31987ec681f3Smrg
31997ec681f3Smrg   v3dv_job_init(job, V3DV_JOB_TYPE_GPU_TFU, device, cmd_buffer, -1);
32007ec681f3Smrg   job->tfu = *tfu;
32017ec681f3Smrg   list_addtail(&job->list_link, &cmd_buffer->jobs);
32027ec681f3Smrg}
32037ec681f3Smrg
32047ec681f3SmrgVKAPI_ATTR void VKAPI_CALL
32057ec681f3Smrgv3dv_CmdSetEvent(VkCommandBuffer commandBuffer,
32067ec681f3Smrg                 VkEvent _event,
32077ec681f3Smrg                 VkPipelineStageFlags stageMask)
32087ec681f3Smrg{
32097ec681f3Smrg   V3DV_FROM_HANDLE(v3dv_cmd_buffer, cmd_buffer, commandBuffer);
32107ec681f3Smrg   V3DV_FROM_HANDLE(v3dv_event, event, _event);
32117ec681f3Smrg
32127ec681f3Smrg   /* Event (re)sets can only happen outside a render pass instance so we
32137ec681f3Smrg    * should not be in the middle of job recording.
32147ec681f3Smrg    */
32157ec681f3Smrg   assert(cmd_buffer->state.pass == NULL);
32167ec681f3Smrg   assert(cmd_buffer->state.job == NULL);
32177ec681f3Smrg
32187ec681f3Smrg   struct v3dv_job *job =
32197ec681f3Smrg      v3dv_cmd_buffer_create_cpu_job(cmd_buffer->device,
32207ec681f3Smrg                                     V3DV_JOB_TYPE_CPU_SET_EVENT,
32217ec681f3Smrg                                     cmd_buffer, -1);
32227ec681f3Smrg   v3dv_return_if_oom(cmd_buffer, NULL);
32237ec681f3Smrg
32247ec681f3Smrg   job->cpu.event_set.event = event;
32257ec681f3Smrg   job->cpu.event_set.state = 1;
32267ec681f3Smrg
32277ec681f3Smrg   list_addtail(&job->list_link, &cmd_buffer->jobs);
32287ec681f3Smrg}
32297ec681f3Smrg
32307ec681f3SmrgVKAPI_ATTR void VKAPI_CALL
32317ec681f3Smrgv3dv_CmdResetEvent(VkCommandBuffer commandBuffer,
32327ec681f3Smrg                   VkEvent _event,
32337ec681f3Smrg                   VkPipelineStageFlags stageMask)
32347ec681f3Smrg{
32357ec681f3Smrg   V3DV_FROM_HANDLE(v3dv_cmd_buffer, cmd_buffer, commandBuffer);
32367ec681f3Smrg   V3DV_FROM_HANDLE(v3dv_event, event, _event);
32377ec681f3Smrg
32387ec681f3Smrg   /* Event (re)sets can only happen outside a render pass instance so we
32397ec681f3Smrg    * should not be in the middle of job recording.
32407ec681f3Smrg    */
32417ec681f3Smrg   assert(cmd_buffer->state.pass == NULL);
32427ec681f3Smrg   assert(cmd_buffer->state.job == NULL);
32437ec681f3Smrg
32447ec681f3Smrg   struct v3dv_job *job =
32457ec681f3Smrg      v3dv_cmd_buffer_create_cpu_job(cmd_buffer->device,
32467ec681f3Smrg                                     V3DV_JOB_TYPE_CPU_SET_EVENT,
32477ec681f3Smrg                                     cmd_buffer, -1);
32487ec681f3Smrg   v3dv_return_if_oom(cmd_buffer, NULL);
32497ec681f3Smrg
32507ec681f3Smrg   job->cpu.event_set.event = event;
32517ec681f3Smrg   job->cpu.event_set.state = 0;
32527ec681f3Smrg
32537ec681f3Smrg   list_addtail(&job->list_link, &cmd_buffer->jobs);
32547ec681f3Smrg}
32557ec681f3Smrg
32567ec681f3SmrgVKAPI_ATTR void VKAPI_CALL
32577ec681f3Smrgv3dv_CmdWaitEvents(VkCommandBuffer commandBuffer,
32587ec681f3Smrg                   uint32_t eventCount,
32597ec681f3Smrg                   const VkEvent *pEvents,
32607ec681f3Smrg                   VkPipelineStageFlags srcStageMask,
32617ec681f3Smrg                   VkPipelineStageFlags dstStageMask,
32627ec681f3Smrg                   uint32_t memoryBarrierCount,
32637ec681f3Smrg                   const VkMemoryBarrier *pMemoryBarriers,
32647ec681f3Smrg                   uint32_t bufferMemoryBarrierCount,
32657ec681f3Smrg                   const VkBufferMemoryBarrier *pBufferMemoryBarriers,
32667ec681f3Smrg                   uint32_t imageMemoryBarrierCount,
32677ec681f3Smrg                   const VkImageMemoryBarrier *pImageMemoryBarriers)
32687ec681f3Smrg{
32697ec681f3Smrg   V3DV_FROM_HANDLE(v3dv_cmd_buffer, cmd_buffer, commandBuffer);
32707ec681f3Smrg
32717ec681f3Smrg   assert(eventCount > 0);
32727ec681f3Smrg
32737ec681f3Smrg   struct v3dv_job *job =
32747ec681f3Smrg      v3dv_cmd_buffer_create_cpu_job(cmd_buffer->device,
32757ec681f3Smrg                                     V3DV_JOB_TYPE_CPU_WAIT_EVENTS,
32767ec681f3Smrg                                     cmd_buffer, -1);
32777ec681f3Smrg   v3dv_return_if_oom(cmd_buffer, NULL);
32787ec681f3Smrg
32797ec681f3Smrg   const uint32_t event_list_size = sizeof(struct v3dv_event *) * eventCount;
32807ec681f3Smrg
32817ec681f3Smrg   job->cpu.event_wait.events =
32827ec681f3Smrg      vk_alloc(&cmd_buffer->device->vk.alloc, event_list_size, 8,
32837ec681f3Smrg               VK_SYSTEM_ALLOCATION_SCOPE_COMMAND);
32847ec681f3Smrg   if (!job->cpu.event_wait.events) {
32857ec681f3Smrg      v3dv_flag_oom(cmd_buffer, NULL);
32867ec681f3Smrg      return;
32877ec681f3Smrg   }
32887ec681f3Smrg   job->cpu.event_wait.event_count = eventCount;
32897ec681f3Smrg
32907ec681f3Smrg   for (uint32_t i = 0; i < eventCount; i++)
32917ec681f3Smrg      job->cpu.event_wait.events[i] = v3dv_event_from_handle(pEvents[i]);
32927ec681f3Smrg
32937ec681f3Smrg   /* vkCmdWaitEvents can be recorded inside a render pass, so we might have
32947ec681f3Smrg    * an active job.
32957ec681f3Smrg    *
32967ec681f3Smrg    * If we are inside a render pass, because we vkCmd(Re)SetEvent can't happen
32977ec681f3Smrg    * inside a render pass, it is safe to move the wait job so it happens right
32987ec681f3Smrg    * before the current job we are currently recording for the subpass, if any
32997ec681f3Smrg    * (it would actually be safe to move it all the way back to right before
33007ec681f3Smrg    * the start of the render pass).
33017ec681f3Smrg    *
33027ec681f3Smrg    * If we are outside a render pass then we should not have any on-going job
33037ec681f3Smrg    * and we are free to just add the wait job without restrictions.
33047ec681f3Smrg    */
33057ec681f3Smrg   assert(cmd_buffer->state.pass || !cmd_buffer->state.job);
33067ec681f3Smrg   list_addtail(&job->list_link, &cmd_buffer->jobs);
33077ec681f3Smrg}
33087ec681f3Smrg
33097ec681f3SmrgVKAPI_ATTR void VKAPI_CALL
33107ec681f3Smrgv3dv_CmdWriteTimestamp(VkCommandBuffer commandBuffer,
33117ec681f3Smrg                       VkPipelineStageFlagBits pipelineStage,
33127ec681f3Smrg                       VkQueryPool queryPool,
33137ec681f3Smrg                       uint32_t query)
33147ec681f3Smrg{
33157ec681f3Smrg   V3DV_FROM_HANDLE(v3dv_cmd_buffer, cmd_buffer, commandBuffer);
33167ec681f3Smrg   V3DV_FROM_HANDLE(v3dv_query_pool, query_pool, queryPool);
33177ec681f3Smrg
33187ec681f3Smrg   /* If this is called inside a render pass we need to finish the current
33197ec681f3Smrg    * job here...
33207ec681f3Smrg    */
33217ec681f3Smrg   struct v3dv_render_pass *pass = cmd_buffer->state.pass;
33227ec681f3Smrg   if (pass)
33237ec681f3Smrg      v3dv_cmd_buffer_finish_job(cmd_buffer);
33247ec681f3Smrg
33257ec681f3Smrg   struct v3dv_job *job =
33267ec681f3Smrg      v3dv_cmd_buffer_create_cpu_job(cmd_buffer->device,
33277ec681f3Smrg                                     V3DV_JOB_TYPE_CPU_TIMESTAMP_QUERY,
33287ec681f3Smrg                                     cmd_buffer, -1);
33297ec681f3Smrg   v3dv_return_if_oom(cmd_buffer, NULL);
33307ec681f3Smrg
33317ec681f3Smrg   job->cpu.query_timestamp.pool = query_pool;
33327ec681f3Smrg   job->cpu.query_timestamp.query = query;
33337ec681f3Smrg
33347ec681f3Smrg   if (!pass || !pass->multiview_enabled) {
33357ec681f3Smrg      job->cpu.query_timestamp.count = 1;
33367ec681f3Smrg   } else {
33377ec681f3Smrg      struct v3dv_subpass *subpass =
33387ec681f3Smrg         &pass->subpasses[cmd_buffer->state.subpass_idx];
33397ec681f3Smrg      job->cpu.query_timestamp.count = util_bitcount(subpass->view_mask);
33407ec681f3Smrg   }
33417ec681f3Smrg
33427ec681f3Smrg   list_addtail(&job->list_link, &cmd_buffer->jobs);
33437ec681f3Smrg   cmd_buffer->state.job = NULL;
33447ec681f3Smrg
33457ec681f3Smrg   /* ...and resume the subpass after the timestamp */
33467ec681f3Smrg   if (cmd_buffer->state.pass)
33477ec681f3Smrg      v3dv_cmd_buffer_subpass_resume(cmd_buffer, cmd_buffer->state.subpass_idx);
33487ec681f3Smrg}
33497ec681f3Smrg
33507ec681f3Smrgstatic void
33517ec681f3Smrgcmd_buffer_emit_pre_dispatch(struct v3dv_cmd_buffer *cmd_buffer)
33527ec681f3Smrg{
33537ec681f3Smrg   assert(cmd_buffer->state.compute.pipeline);
33547ec681f3Smrg   assert(cmd_buffer->state.compute.pipeline->active_stages ==
33557ec681f3Smrg          VK_SHADER_STAGE_COMPUTE_BIT);
33567ec681f3Smrg
33577ec681f3Smrg   cmd_buffer->state.dirty &= ~(V3DV_CMD_DIRTY_COMPUTE_PIPELINE |
33587ec681f3Smrg                                V3DV_CMD_DIRTY_COMPUTE_DESCRIPTOR_SETS);
33597ec681f3Smrg   cmd_buffer->state.dirty_descriptor_stages &= ~VK_SHADER_STAGE_COMPUTE_BIT;
33607ec681f3Smrg   cmd_buffer->state.dirty_push_constants_stages &= ~VK_SHADER_STAGE_COMPUTE_BIT;
33617ec681f3Smrg}
33627ec681f3Smrg
33637ec681f3Smrg#define V3D_CSD_CFG012_WG_COUNT_SHIFT 16
33647ec681f3Smrg#define V3D_CSD_CFG012_WG_OFFSET_SHIFT 0
33657ec681f3Smrg/* Allow this dispatch to start while the last one is still running. */
33667ec681f3Smrg#define V3D_CSD_CFG3_OVERLAP_WITH_PREV (1 << 26)
33677ec681f3Smrg/* Maximum supergroup ID.  6 bits. */
33687ec681f3Smrg#define V3D_CSD_CFG3_MAX_SG_ID_SHIFT 20
33697ec681f3Smrg/* Batches per supergroup minus 1.  8 bits. */
33707ec681f3Smrg#define V3D_CSD_CFG3_BATCHES_PER_SG_M1_SHIFT 12
33717ec681f3Smrg/* Workgroups per supergroup, 0 means 16 */
33727ec681f3Smrg#define V3D_CSD_CFG3_WGS_PER_SG_SHIFT 8
33737ec681f3Smrg#define V3D_CSD_CFG3_WG_SIZE_SHIFT 0
33747ec681f3Smrg
33757ec681f3Smrg#define V3D_CSD_CFG5_PROPAGATE_NANS (1 << 2)
33767ec681f3Smrg#define V3D_CSD_CFG5_SINGLE_SEG (1 << 1)
33777ec681f3Smrg#define V3D_CSD_CFG5_THREADING (1 << 0)
33787ec681f3Smrg
33797ec681f3Smrgvoid
33807ec681f3Smrgv3dv_cmd_buffer_rewrite_indirect_csd_job(
33817ec681f3Smrg   struct v3dv_csd_indirect_cpu_job_info *info,
33827ec681f3Smrg   const uint32_t *wg_counts)
33837ec681f3Smrg{
33847ec681f3Smrg   assert(info->csd_job);
33857ec681f3Smrg   struct v3dv_job *job = info->csd_job;
33867ec681f3Smrg
33877ec681f3Smrg   assert(job->type == V3DV_JOB_TYPE_GPU_CSD);
33887ec681f3Smrg   assert(wg_counts[0] > 0 && wg_counts[1] > 0 && wg_counts[2] > 0);
33897ec681f3Smrg
33907ec681f3Smrg   struct drm_v3d_submit_csd *submit = &job->csd.submit;
33917ec681f3Smrg
33927ec681f3Smrg   job->csd.wg_count[0] = wg_counts[0];
33937ec681f3Smrg   job->csd.wg_count[1] = wg_counts[1];
33947ec681f3Smrg   job->csd.wg_count[2] = wg_counts[2];
33957ec681f3Smrg
33967ec681f3Smrg   submit->cfg[0] = wg_counts[0] << V3D_CSD_CFG012_WG_COUNT_SHIFT;
33977ec681f3Smrg   submit->cfg[1] = wg_counts[1] << V3D_CSD_CFG012_WG_COUNT_SHIFT;
33987ec681f3Smrg   submit->cfg[2] = wg_counts[2] << V3D_CSD_CFG012_WG_COUNT_SHIFT;
33997ec681f3Smrg
34007ec681f3Smrg   submit->cfg[4] = DIV_ROUND_UP(info->wg_size, 16) *
34017ec681f3Smrg                    (wg_counts[0] * wg_counts[1] * wg_counts[2]) - 1;
34027ec681f3Smrg   assert(submit->cfg[4] != ~0);
34037ec681f3Smrg
34047ec681f3Smrg   if (info->needs_wg_uniform_rewrite) {
34057ec681f3Smrg      /* Make sure the GPU is not currently accessing the indirect CL for this
34067ec681f3Smrg       * job, since we are about to overwrite some of the uniform data.
34077ec681f3Smrg       */
34087ec681f3Smrg      v3dv_bo_wait(job->device, job->indirect.bo, PIPE_TIMEOUT_INFINITE);
34097ec681f3Smrg
34107ec681f3Smrg      for (uint32_t i = 0; i < 3; i++) {
34117ec681f3Smrg         if (info->wg_uniform_offsets[i]) {
34127ec681f3Smrg            /* Sanity check that our uniform pointers are within the allocated
34137ec681f3Smrg             * BO space for our indirect CL.
34147ec681f3Smrg             */
34157ec681f3Smrg            assert(info->wg_uniform_offsets[i] >= (uint32_t *) job->indirect.base);
34167ec681f3Smrg            assert(info->wg_uniform_offsets[i] < (uint32_t *) job->indirect.next);
34177ec681f3Smrg            *(info->wg_uniform_offsets[i]) = wg_counts[i];
34187ec681f3Smrg         }
34197ec681f3Smrg      }
34207ec681f3Smrg   }
34217ec681f3Smrg}
34227ec681f3Smrg
34237ec681f3Smrgstatic struct v3dv_job *
34247ec681f3Smrgcmd_buffer_create_csd_job(struct v3dv_cmd_buffer *cmd_buffer,
34257ec681f3Smrg                          uint32_t base_offset_x,
34267ec681f3Smrg                          uint32_t base_offset_y,
34277ec681f3Smrg                          uint32_t base_offset_z,
34287ec681f3Smrg                          uint32_t group_count_x,
34297ec681f3Smrg                          uint32_t group_count_y,
34307ec681f3Smrg                          uint32_t group_count_z,
34317ec681f3Smrg                          uint32_t **wg_uniform_offsets_out,
34327ec681f3Smrg                          uint32_t *wg_size_out)
34337ec681f3Smrg{
34347ec681f3Smrg   struct v3dv_pipeline *pipeline = cmd_buffer->state.compute.pipeline;
34357ec681f3Smrg   assert(pipeline && pipeline->shared_data->variants[BROADCOM_SHADER_COMPUTE]);
34367ec681f3Smrg   struct v3dv_shader_variant *cs_variant =
34377ec681f3Smrg      pipeline->shared_data->variants[BROADCOM_SHADER_COMPUTE];
34387ec681f3Smrg
34397ec681f3Smrg   struct v3dv_job *job = vk_zalloc(&cmd_buffer->device->vk.alloc,
34407ec681f3Smrg                                    sizeof(struct v3dv_job), 8,
34417ec681f3Smrg                                    VK_SYSTEM_ALLOCATION_SCOPE_COMMAND);
34427ec681f3Smrg   if (!job) {
34437ec681f3Smrg      v3dv_flag_oom(cmd_buffer, NULL);
34447ec681f3Smrg      return NULL;
34457ec681f3Smrg   }
34467ec681f3Smrg
34477ec681f3Smrg   v3dv_job_init(job, V3DV_JOB_TYPE_GPU_CSD, cmd_buffer->device, cmd_buffer, -1);
34487ec681f3Smrg   cmd_buffer->state.job = job;
34497ec681f3Smrg
34507ec681f3Smrg   struct drm_v3d_submit_csd *submit = &job->csd.submit;
34517ec681f3Smrg
34527ec681f3Smrg   job->csd.wg_count[0] = group_count_x;
34537ec681f3Smrg   job->csd.wg_count[1] = group_count_y;
34547ec681f3Smrg   job->csd.wg_count[2] = group_count_z;
34557ec681f3Smrg
34567ec681f3Smrg   job->csd.wg_base[0] = base_offset_x;
34577ec681f3Smrg   job->csd.wg_base[1] = base_offset_y;
34587ec681f3Smrg   job->csd.wg_base[2] = base_offset_z;
34597ec681f3Smrg
34607ec681f3Smrg   submit->cfg[0] |= group_count_x << V3D_CSD_CFG012_WG_COUNT_SHIFT;
34617ec681f3Smrg   submit->cfg[1] |= group_count_y << V3D_CSD_CFG012_WG_COUNT_SHIFT;
34627ec681f3Smrg   submit->cfg[2] |= group_count_z << V3D_CSD_CFG012_WG_COUNT_SHIFT;
34637ec681f3Smrg
34647ec681f3Smrg   const struct v3d_compute_prog_data *cpd =
34657ec681f3Smrg      cs_variant->prog_data.cs;
34667ec681f3Smrg
34677ec681f3Smrg   const uint32_t num_wgs = group_count_x * group_count_y * group_count_z;
34687ec681f3Smrg   const uint32_t wg_size = cpd->local_size[0] *
34697ec681f3Smrg                            cpd->local_size[1] *
34707ec681f3Smrg                            cpd->local_size[2];
34717ec681f3Smrg
34727ec681f3Smrg   uint32_t wgs_per_sg =
34737ec681f3Smrg      v3d_csd_choose_workgroups_per_supergroup(
34747ec681f3Smrg         &cmd_buffer->device->devinfo,
34757ec681f3Smrg         cs_variant->prog_data.cs->has_subgroups,
34767ec681f3Smrg         cs_variant->prog_data.cs->base.has_control_barrier,
34777ec681f3Smrg         cs_variant->prog_data.cs->base.threads,
34787ec681f3Smrg         num_wgs, wg_size);
34797ec681f3Smrg
34807ec681f3Smrg   uint32_t batches_per_sg = DIV_ROUND_UP(wgs_per_sg * wg_size, 16);
34817ec681f3Smrg   uint32_t whole_sgs = num_wgs / wgs_per_sg;
34827ec681f3Smrg   uint32_t rem_wgs = num_wgs - whole_sgs * wgs_per_sg;
34837ec681f3Smrg   uint32_t num_batches = batches_per_sg * whole_sgs +
34847ec681f3Smrg                          DIV_ROUND_UP(rem_wgs * wg_size, 16);
34857ec681f3Smrg
34867ec681f3Smrg   submit->cfg[3] |= (wgs_per_sg & 0xf) << V3D_CSD_CFG3_WGS_PER_SG_SHIFT;
34877ec681f3Smrg   submit->cfg[3] |= (batches_per_sg - 1) << V3D_CSD_CFG3_BATCHES_PER_SG_M1_SHIFT;
34887ec681f3Smrg   submit->cfg[3] |= (wg_size & 0xff) << V3D_CSD_CFG3_WG_SIZE_SHIFT;
34897ec681f3Smrg   if (wg_size_out)
34907ec681f3Smrg      *wg_size_out = wg_size;
34917ec681f3Smrg
34927ec681f3Smrg   submit->cfg[4] = num_batches - 1;
34937ec681f3Smrg   assert(submit->cfg[4] != ~0);
34947ec681f3Smrg
34957ec681f3Smrg   assert(pipeline->shared_data->assembly_bo);
34967ec681f3Smrg   struct v3dv_bo *cs_assembly_bo = pipeline->shared_data->assembly_bo;
34977ec681f3Smrg
34987ec681f3Smrg   submit->cfg[5] = cs_assembly_bo->offset + cs_variant->assembly_offset;
34997ec681f3Smrg   submit->cfg[5] |= V3D_CSD_CFG5_PROPAGATE_NANS;
35007ec681f3Smrg   if (cs_variant->prog_data.base->single_seg)
35017ec681f3Smrg      submit->cfg[5] |= V3D_CSD_CFG5_SINGLE_SEG;
35027ec681f3Smrg   if (cs_variant->prog_data.base->threads == 4)
35037ec681f3Smrg      submit->cfg[5] |= V3D_CSD_CFG5_THREADING;
35047ec681f3Smrg
35057ec681f3Smrg   if (cs_variant->prog_data.cs->shared_size > 0) {
35067ec681f3Smrg      job->csd.shared_memory =
35077ec681f3Smrg         v3dv_bo_alloc(cmd_buffer->device,
35087ec681f3Smrg                       cs_variant->prog_data.cs->shared_size * wgs_per_sg,
35097ec681f3Smrg                       "shared_vars", true);
35107ec681f3Smrg      if (!job->csd.shared_memory) {
35117ec681f3Smrg         v3dv_flag_oom(cmd_buffer, NULL);
35127ec681f3Smrg         return job;
35137ec681f3Smrg      }
35147ec681f3Smrg   }
35157ec681f3Smrg
35167ec681f3Smrg   v3dv_job_add_bo_unchecked(job, cs_assembly_bo);
35177ec681f3Smrg   struct v3dv_cl_reloc uniforms =
35187ec681f3Smrg      v3dv_write_uniforms_wg_offsets(cmd_buffer, pipeline,
35197ec681f3Smrg                                     cs_variant,
35207ec681f3Smrg                                     wg_uniform_offsets_out);
35217ec681f3Smrg   submit->cfg[6] = uniforms.bo->offset + uniforms.offset;
35227ec681f3Smrg
35237ec681f3Smrg   v3dv_job_add_bo(job, uniforms.bo);
35247ec681f3Smrg
35257ec681f3Smrg   return job;
35267ec681f3Smrg}
35277ec681f3Smrg
35287ec681f3Smrgstatic void
35297ec681f3Smrgcmd_buffer_dispatch(struct v3dv_cmd_buffer *cmd_buffer,
35307ec681f3Smrg                    uint32_t base_offset_x,
35317ec681f3Smrg                    uint32_t base_offset_y,
35327ec681f3Smrg                    uint32_t base_offset_z,
35337ec681f3Smrg                    uint32_t group_count_x,
35347ec681f3Smrg                    uint32_t group_count_y,
35357ec681f3Smrg                    uint32_t group_count_z)
35367ec681f3Smrg{
35377ec681f3Smrg   if (group_count_x == 0 || group_count_y == 0 || group_count_z == 0)
35387ec681f3Smrg      return;
35397ec681f3Smrg
35407ec681f3Smrg   struct v3dv_job *job =
35417ec681f3Smrg      cmd_buffer_create_csd_job(cmd_buffer,
35427ec681f3Smrg                                base_offset_x,
35437ec681f3Smrg                                base_offset_y,
35447ec681f3Smrg                                base_offset_z,
35457ec681f3Smrg                                group_count_x,
35467ec681f3Smrg                                group_count_y,
35477ec681f3Smrg                                group_count_z,
35487ec681f3Smrg                                NULL, NULL);
35497ec681f3Smrg
35507ec681f3Smrg   list_addtail(&job->list_link, &cmd_buffer->jobs);
35517ec681f3Smrg   cmd_buffer->state.job = NULL;
35527ec681f3Smrg}
35537ec681f3Smrg
35547ec681f3SmrgVKAPI_ATTR void VKAPI_CALL
35557ec681f3Smrgv3dv_CmdDispatch(VkCommandBuffer commandBuffer,
35567ec681f3Smrg                 uint32_t groupCountX,
35577ec681f3Smrg                 uint32_t groupCountY,
35587ec681f3Smrg                 uint32_t groupCountZ)
35597ec681f3Smrg{
35607ec681f3Smrg   V3DV_FROM_HANDLE(v3dv_cmd_buffer, cmd_buffer, commandBuffer);
35617ec681f3Smrg
35627ec681f3Smrg   cmd_buffer_emit_pre_dispatch(cmd_buffer);
35637ec681f3Smrg   cmd_buffer_dispatch(cmd_buffer, 0, 0, 0,
35647ec681f3Smrg                       groupCountX, groupCountY, groupCountZ);
35657ec681f3Smrg}
35667ec681f3Smrg
35677ec681f3SmrgVKAPI_ATTR void VKAPI_CALL
35687ec681f3Smrgv3dv_CmdDispatchBase(VkCommandBuffer commandBuffer,
35697ec681f3Smrg                     uint32_t baseGroupX,
35707ec681f3Smrg                     uint32_t baseGroupY,
35717ec681f3Smrg                     uint32_t baseGroupZ,
35727ec681f3Smrg                     uint32_t groupCountX,
35737ec681f3Smrg                     uint32_t groupCountY,
35747ec681f3Smrg                     uint32_t groupCountZ)
35757ec681f3Smrg{
35767ec681f3Smrg   V3DV_FROM_HANDLE(v3dv_cmd_buffer, cmd_buffer, commandBuffer);
35777ec681f3Smrg
35787ec681f3Smrg   cmd_buffer_emit_pre_dispatch(cmd_buffer);
35797ec681f3Smrg   cmd_buffer_dispatch(cmd_buffer,
35807ec681f3Smrg                       baseGroupX, baseGroupY, baseGroupZ,
35817ec681f3Smrg                       groupCountX, groupCountY, groupCountZ);
35827ec681f3Smrg}
35837ec681f3Smrg
35847ec681f3Smrg
35857ec681f3Smrgstatic void
35867ec681f3Smrgcmd_buffer_dispatch_indirect(struct v3dv_cmd_buffer *cmd_buffer,
35877ec681f3Smrg                             struct v3dv_buffer *buffer,
35887ec681f3Smrg                             uint32_t offset)
35897ec681f3Smrg{
35907ec681f3Smrg   /* We can't do indirect dispatches, so instead we record a CPU job that,
35917ec681f3Smrg    * when executed in the queue, will map the indirect buffer, read the
35927ec681f3Smrg    * dispatch parameters, and submit a regular dispatch.
35937ec681f3Smrg    */
35947ec681f3Smrg   struct v3dv_job *job =
35957ec681f3Smrg      v3dv_cmd_buffer_create_cpu_job(cmd_buffer->device,
35967ec681f3Smrg                                     V3DV_JOB_TYPE_CPU_CSD_INDIRECT,
35977ec681f3Smrg                                     cmd_buffer, -1);
35987ec681f3Smrg   v3dv_return_if_oom(cmd_buffer, NULL);
35997ec681f3Smrg
36007ec681f3Smrg   /* We need to create a CSD job now, even if we still don't know the actual
36017ec681f3Smrg    * dispatch parameters, because the job setup needs to be done using the
36027ec681f3Smrg    * current command buffer state (i.e. pipeline, descriptor sets, push
36037ec681f3Smrg    * constants, etc.). So we create the job with default dispatch parameters
36047ec681f3Smrg    * and we will rewrite the parts we need at submit time if the indirect
36057ec681f3Smrg    * parameters don't match the ones we used to setup the job.
36067ec681f3Smrg    */
36077ec681f3Smrg   struct v3dv_job *csd_job =
36087ec681f3Smrg      cmd_buffer_create_csd_job(cmd_buffer,
36097ec681f3Smrg                                0, 0, 0,
36107ec681f3Smrg                                1, 1, 1,
36117ec681f3Smrg                                &job->cpu.csd_indirect.wg_uniform_offsets[0],
36127ec681f3Smrg                                &job->cpu.csd_indirect.wg_size);
36137ec681f3Smrg   v3dv_return_if_oom(cmd_buffer, NULL);
36147ec681f3Smrg   assert(csd_job);
36157ec681f3Smrg
36167ec681f3Smrg   job->cpu.csd_indirect.buffer = buffer;
36177ec681f3Smrg   job->cpu.csd_indirect.offset = offset;
36187ec681f3Smrg   job->cpu.csd_indirect.csd_job = csd_job;
36197ec681f3Smrg
36207ec681f3Smrg   /* If the compute shader reads the workgroup sizes we will also need to
36217ec681f3Smrg    * rewrite the corresponding uniforms.
36227ec681f3Smrg    */
36237ec681f3Smrg   job->cpu.csd_indirect.needs_wg_uniform_rewrite =
36247ec681f3Smrg      job->cpu.csd_indirect.wg_uniform_offsets[0] ||
36257ec681f3Smrg      job->cpu.csd_indirect.wg_uniform_offsets[1] ||
36267ec681f3Smrg      job->cpu.csd_indirect.wg_uniform_offsets[2];
36277ec681f3Smrg
36287ec681f3Smrg   list_addtail(&job->list_link, &cmd_buffer->jobs);
36297ec681f3Smrg   cmd_buffer->state.job = NULL;
36307ec681f3Smrg}
36317ec681f3Smrg
36327ec681f3SmrgVKAPI_ATTR void VKAPI_CALL
36337ec681f3Smrgv3dv_CmdDispatchIndirect(VkCommandBuffer commandBuffer,
36347ec681f3Smrg                         VkBuffer _buffer,
36357ec681f3Smrg                         VkDeviceSize offset)
36367ec681f3Smrg{
36377ec681f3Smrg   V3DV_FROM_HANDLE(v3dv_cmd_buffer, cmd_buffer, commandBuffer);
36387ec681f3Smrg   V3DV_FROM_HANDLE(v3dv_buffer, buffer, _buffer);
36397ec681f3Smrg
36407ec681f3Smrg   assert(offset <= UINT32_MAX);
36417ec681f3Smrg
36427ec681f3Smrg   cmd_buffer_emit_pre_dispatch(cmd_buffer);
36437ec681f3Smrg   cmd_buffer_dispatch_indirect(cmd_buffer, buffer, offset);
36447ec681f3Smrg}
36457ec681f3Smrg
36467ec681f3SmrgVKAPI_ATTR void VKAPI_CALL
36477ec681f3Smrgv3dv_CmdSetDeviceMask(VkCommandBuffer commandBuffer, uint32_t deviceMask)
36487ec681f3Smrg{
36497ec681f3Smrg   /* Nothing to do here since we only support a single device */
36507ec681f3Smrg   assert(deviceMask == 0x1);
36517ec681f3Smrg}
3652