17ec681f3Smrg/* 27ec681f3Smrg * Copyright © 2019 Raspberry Pi 37ec681f3Smrg * 47ec681f3Smrg * Permission is hereby granted, free of charge, to any person obtaining a 57ec681f3Smrg * copy of this software and associated documentation files (the "Software"), 67ec681f3Smrg * to deal in the Software without restriction, including without limitation 77ec681f3Smrg * the rights to use, copy, modify, merge, publish, distribute, sublicense, 87ec681f3Smrg * and/or sell copies of the Software, and to permit persons to whom the 97ec681f3Smrg * Software is furnished to do so, subject to the following conditions: 107ec681f3Smrg * 117ec681f3Smrg * The above copyright notice and this permission notice (including the next 127ec681f3Smrg * paragraph) shall be included in all copies or substantial portions of the 137ec681f3Smrg * Software. 147ec681f3Smrg * 157ec681f3Smrg * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR 167ec681f3Smrg * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, 177ec681f3Smrg * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL 187ec681f3Smrg * THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER 197ec681f3Smrg * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING 207ec681f3Smrg * FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS 217ec681f3Smrg * IN THE SOFTWARE. 227ec681f3Smrg */ 237ec681f3Smrg 247ec681f3Smrg#include "v3dv_private.h" 257ec681f3Smrg#include "util/u_pack_color.h" 267ec681f3Smrg#include "vk_format_info.h" 277ec681f3Smrg#include "vk_util.h" 287ec681f3Smrg 297ec681f3Smrgconst struct v3dv_dynamic_state default_dynamic_state = { 307ec681f3Smrg .viewport = { 317ec681f3Smrg .count = 0, 327ec681f3Smrg }, 337ec681f3Smrg .scissor = { 347ec681f3Smrg .count = 0, 357ec681f3Smrg }, 367ec681f3Smrg .stencil_compare_mask = 377ec681f3Smrg { 387ec681f3Smrg .front = ~0u, 397ec681f3Smrg .back = ~0u, 407ec681f3Smrg }, 417ec681f3Smrg .stencil_write_mask = 427ec681f3Smrg { 437ec681f3Smrg .front = ~0u, 447ec681f3Smrg .back = ~0u, 457ec681f3Smrg }, 467ec681f3Smrg .stencil_reference = 477ec681f3Smrg { 487ec681f3Smrg .front = 0u, 497ec681f3Smrg .back = 0u, 507ec681f3Smrg }, 517ec681f3Smrg .blend_constants = { 0.0f, 0.0f, 0.0f, 0.0f }, 527ec681f3Smrg .depth_bias = { 537ec681f3Smrg .constant_factor = 0.0f, 547ec681f3Smrg .depth_bias_clamp = 0.0f, 557ec681f3Smrg .slope_factor = 0.0f, 567ec681f3Smrg }, 577ec681f3Smrg .line_width = 1.0f, 587ec681f3Smrg .color_write_enable = (1ull << (4 * V3D_MAX_DRAW_BUFFERS)) - 1, 597ec681f3Smrg}; 607ec681f3Smrg 617ec681f3Smrgvoid 627ec681f3Smrgv3dv_job_add_bo(struct v3dv_job *job, struct v3dv_bo *bo) 637ec681f3Smrg{ 647ec681f3Smrg if (!bo) 657ec681f3Smrg return; 667ec681f3Smrg 677ec681f3Smrg if (job->bo_handle_mask & bo->handle_bit) { 687ec681f3Smrg if (_mesa_set_search(job->bos, bo)) 697ec681f3Smrg return; 707ec681f3Smrg } 717ec681f3Smrg 727ec681f3Smrg _mesa_set_add(job->bos, bo); 737ec681f3Smrg job->bo_count++; 747ec681f3Smrg job->bo_handle_mask |= bo->handle_bit; 757ec681f3Smrg} 767ec681f3Smrg 777ec681f3Smrgvoid 787ec681f3Smrgv3dv_job_add_bo_unchecked(struct v3dv_job *job, struct v3dv_bo *bo) 797ec681f3Smrg{ 807ec681f3Smrg assert(bo); 817ec681f3Smrg _mesa_set_add(job->bos, bo); 827ec681f3Smrg job->bo_count++; 837ec681f3Smrg job->bo_handle_mask |= bo->handle_bit; 847ec681f3Smrg} 857ec681f3Smrg 867ec681f3SmrgVKAPI_ATTR VkResult VKAPI_CALL 877ec681f3Smrgv3dv_CreateCommandPool(VkDevice _device, 887ec681f3Smrg const VkCommandPoolCreateInfo *pCreateInfo, 897ec681f3Smrg const VkAllocationCallbacks *pAllocator, 907ec681f3Smrg VkCommandPool *pCmdPool) 917ec681f3Smrg{ 927ec681f3Smrg V3DV_FROM_HANDLE(v3dv_device, device, _device); 937ec681f3Smrg struct v3dv_cmd_pool *pool; 947ec681f3Smrg 957ec681f3Smrg /* We only support one queue */ 967ec681f3Smrg assert(pCreateInfo->queueFamilyIndex == 0); 977ec681f3Smrg 987ec681f3Smrg pool = vk_object_zalloc(&device->vk, pAllocator, sizeof(*pool), 997ec681f3Smrg VK_OBJECT_TYPE_COMMAND_POOL); 1007ec681f3Smrg if (pool == NULL) 1017ec681f3Smrg return vk_error(device, VK_ERROR_OUT_OF_HOST_MEMORY); 1027ec681f3Smrg 1037ec681f3Smrg if (pAllocator) 1047ec681f3Smrg pool->alloc = *pAllocator; 1057ec681f3Smrg else 1067ec681f3Smrg pool->alloc = device->vk.alloc; 1077ec681f3Smrg 1087ec681f3Smrg list_inithead(&pool->cmd_buffers); 1097ec681f3Smrg 1107ec681f3Smrg *pCmdPool = v3dv_cmd_pool_to_handle(pool); 1117ec681f3Smrg 1127ec681f3Smrg return VK_SUCCESS; 1137ec681f3Smrg} 1147ec681f3Smrg 1157ec681f3Smrgstatic void 1167ec681f3Smrgcmd_buffer_init(struct v3dv_cmd_buffer *cmd_buffer, 1177ec681f3Smrg struct v3dv_device *device, 1187ec681f3Smrg struct v3dv_cmd_pool *pool, 1197ec681f3Smrg VkCommandBufferLevel level) 1207ec681f3Smrg{ 1217ec681f3Smrg /* Do not reset the base object! If we are calling this from a command 1227ec681f3Smrg * buffer reset that would reset the loader's dispatch table for the 1237ec681f3Smrg * command buffer, and any other relevant info from vk_object_base 1247ec681f3Smrg */ 1257ec681f3Smrg const uint32_t base_size = sizeof(struct vk_command_buffer); 1267ec681f3Smrg uint8_t *cmd_buffer_driver_start = ((uint8_t *) cmd_buffer) + base_size; 1277ec681f3Smrg memset(cmd_buffer_driver_start, 0, sizeof(*cmd_buffer) - base_size); 1287ec681f3Smrg 1297ec681f3Smrg cmd_buffer->device = device; 1307ec681f3Smrg cmd_buffer->pool = pool; 1317ec681f3Smrg cmd_buffer->level = level; 1327ec681f3Smrg 1337ec681f3Smrg list_inithead(&cmd_buffer->private_objs); 1347ec681f3Smrg list_inithead(&cmd_buffer->jobs); 1357ec681f3Smrg list_inithead(&cmd_buffer->list_link); 1367ec681f3Smrg 1377ec681f3Smrg assert(pool); 1387ec681f3Smrg list_addtail(&cmd_buffer->pool_link, &pool->cmd_buffers); 1397ec681f3Smrg 1407ec681f3Smrg cmd_buffer->state.subpass_idx = -1; 1417ec681f3Smrg cmd_buffer->state.meta.subpass_idx = -1; 1427ec681f3Smrg 1437ec681f3Smrg cmd_buffer->status = V3DV_CMD_BUFFER_STATUS_INITIALIZED; 1447ec681f3Smrg} 1457ec681f3Smrg 1467ec681f3Smrgstatic VkResult 1477ec681f3Smrgcmd_buffer_create(struct v3dv_device *device, 1487ec681f3Smrg struct v3dv_cmd_pool *pool, 1497ec681f3Smrg VkCommandBufferLevel level, 1507ec681f3Smrg VkCommandBuffer *pCommandBuffer) 1517ec681f3Smrg{ 1527ec681f3Smrg struct v3dv_cmd_buffer *cmd_buffer; 1537ec681f3Smrg cmd_buffer = vk_zalloc2(&device->vk.alloc, 1547ec681f3Smrg &pool->alloc, 1557ec681f3Smrg sizeof(*cmd_buffer), 1567ec681f3Smrg 8, 1577ec681f3Smrg VK_SYSTEM_ALLOCATION_SCOPE_OBJECT); 1587ec681f3Smrg if (cmd_buffer == NULL) 1597ec681f3Smrg return vk_error(device, VK_ERROR_OUT_OF_HOST_MEMORY); 1607ec681f3Smrg 1617ec681f3Smrg VkResult result; 1627ec681f3Smrg result = vk_command_buffer_init(&cmd_buffer->vk, &device->vk); 1637ec681f3Smrg if (result != VK_SUCCESS) { 1647ec681f3Smrg vk_free2(&device->vk.alloc, &pool->alloc, cmd_buffer); 1657ec681f3Smrg return result; 1667ec681f3Smrg } 1677ec681f3Smrg 1687ec681f3Smrg cmd_buffer_init(cmd_buffer, device, pool, level); 1697ec681f3Smrg 1707ec681f3Smrg *pCommandBuffer = v3dv_cmd_buffer_to_handle(cmd_buffer); 1717ec681f3Smrg 1727ec681f3Smrg return VK_SUCCESS; 1737ec681f3Smrg} 1747ec681f3Smrg 1757ec681f3Smrgstatic void 1767ec681f3Smrgjob_destroy_gpu_cl_resources(struct v3dv_job *job) 1777ec681f3Smrg{ 1787ec681f3Smrg assert(job->type == V3DV_JOB_TYPE_GPU_CL || 1797ec681f3Smrg job->type == V3DV_JOB_TYPE_GPU_CL_SECONDARY); 1807ec681f3Smrg 1817ec681f3Smrg v3dv_cl_destroy(&job->bcl); 1827ec681f3Smrg v3dv_cl_destroy(&job->rcl); 1837ec681f3Smrg v3dv_cl_destroy(&job->indirect); 1847ec681f3Smrg 1857ec681f3Smrg /* Since we don't ref BOs when we add them to the command buffer, don't 1867ec681f3Smrg * unref them here either. Bo's will be freed when their corresponding API 1877ec681f3Smrg * objects are destroyed. 1887ec681f3Smrg */ 1897ec681f3Smrg _mesa_set_destroy(job->bos, NULL); 1907ec681f3Smrg 1917ec681f3Smrg v3dv_bo_free(job->device, job->tile_alloc); 1927ec681f3Smrg v3dv_bo_free(job->device, job->tile_state); 1937ec681f3Smrg} 1947ec681f3Smrg 1957ec681f3Smrgstatic void 1967ec681f3Smrgjob_destroy_cloned_gpu_cl_resources(struct v3dv_job *job) 1977ec681f3Smrg{ 1987ec681f3Smrg assert(job->type == V3DV_JOB_TYPE_GPU_CL); 1997ec681f3Smrg 2007ec681f3Smrg list_for_each_entry_safe(struct v3dv_bo, bo, &job->bcl.bo_list, list_link) { 2017ec681f3Smrg list_del(&bo->list_link); 2027ec681f3Smrg vk_free(&job->device->vk.alloc, bo); 2037ec681f3Smrg } 2047ec681f3Smrg 2057ec681f3Smrg list_for_each_entry_safe(struct v3dv_bo, bo, &job->rcl.bo_list, list_link) { 2067ec681f3Smrg list_del(&bo->list_link); 2077ec681f3Smrg vk_free(&job->device->vk.alloc, bo); 2087ec681f3Smrg } 2097ec681f3Smrg 2107ec681f3Smrg list_for_each_entry_safe(struct v3dv_bo, bo, &job->indirect.bo_list, list_link) { 2117ec681f3Smrg list_del(&bo->list_link); 2127ec681f3Smrg vk_free(&job->device->vk.alloc, bo); 2137ec681f3Smrg } 2147ec681f3Smrg} 2157ec681f3Smrg 2167ec681f3Smrgstatic void 2177ec681f3Smrgjob_destroy_gpu_csd_resources(struct v3dv_job *job) 2187ec681f3Smrg{ 2197ec681f3Smrg assert(job->type == V3DV_JOB_TYPE_GPU_CSD); 2207ec681f3Smrg assert(job->cmd_buffer); 2217ec681f3Smrg 2227ec681f3Smrg v3dv_cl_destroy(&job->indirect); 2237ec681f3Smrg 2247ec681f3Smrg _mesa_set_destroy(job->bos, NULL); 2257ec681f3Smrg 2267ec681f3Smrg if (job->csd.shared_memory) 2277ec681f3Smrg v3dv_bo_free(job->device, job->csd.shared_memory); 2287ec681f3Smrg} 2297ec681f3Smrg 2307ec681f3Smrgstatic void 2317ec681f3Smrgjob_destroy_cpu_wait_events_resources(struct v3dv_job *job) 2327ec681f3Smrg{ 2337ec681f3Smrg assert(job->type == V3DV_JOB_TYPE_CPU_WAIT_EVENTS); 2347ec681f3Smrg assert(job->cmd_buffer); 2357ec681f3Smrg vk_free(&job->cmd_buffer->device->vk.alloc, job->cpu.event_wait.events); 2367ec681f3Smrg} 2377ec681f3Smrg 2387ec681f3Smrgstatic void 2397ec681f3Smrgjob_destroy_cpu_csd_indirect_resources(struct v3dv_job *job) 2407ec681f3Smrg{ 2417ec681f3Smrg assert(job->type == V3DV_JOB_TYPE_CPU_CSD_INDIRECT); 2427ec681f3Smrg assert(job->cmd_buffer); 2437ec681f3Smrg v3dv_job_destroy(job->cpu.csd_indirect.csd_job); 2447ec681f3Smrg} 2457ec681f3Smrg 2467ec681f3Smrgvoid 2477ec681f3Smrgv3dv_job_destroy(struct v3dv_job *job) 2487ec681f3Smrg{ 2497ec681f3Smrg assert(job); 2507ec681f3Smrg 2517ec681f3Smrg list_del(&job->list_link); 2527ec681f3Smrg 2537ec681f3Smrg /* Cloned jobs don't make deep copies of the original jobs, so they don't 2547ec681f3Smrg * own any of their resources. However, they do allocate clones of BO 2557ec681f3Smrg * structs, so make sure we free those. 2567ec681f3Smrg */ 2577ec681f3Smrg if (!job->is_clone) { 2587ec681f3Smrg switch (job->type) { 2597ec681f3Smrg case V3DV_JOB_TYPE_GPU_CL: 2607ec681f3Smrg case V3DV_JOB_TYPE_GPU_CL_SECONDARY: 2617ec681f3Smrg job_destroy_gpu_cl_resources(job); 2627ec681f3Smrg break; 2637ec681f3Smrg case V3DV_JOB_TYPE_GPU_CSD: 2647ec681f3Smrg job_destroy_gpu_csd_resources(job); 2657ec681f3Smrg break; 2667ec681f3Smrg case V3DV_JOB_TYPE_CPU_WAIT_EVENTS: 2677ec681f3Smrg job_destroy_cpu_wait_events_resources(job); 2687ec681f3Smrg break; 2697ec681f3Smrg case V3DV_JOB_TYPE_CPU_CSD_INDIRECT: 2707ec681f3Smrg job_destroy_cpu_csd_indirect_resources(job); 2717ec681f3Smrg break; 2727ec681f3Smrg default: 2737ec681f3Smrg break; 2747ec681f3Smrg } 2757ec681f3Smrg } else { 2767ec681f3Smrg /* Cloned jobs */ 2777ec681f3Smrg if (job->type == V3DV_JOB_TYPE_GPU_CL) 2787ec681f3Smrg job_destroy_cloned_gpu_cl_resources(job); 2797ec681f3Smrg } 2807ec681f3Smrg 2817ec681f3Smrg vk_free(&job->device->vk.alloc, job); 2827ec681f3Smrg} 2837ec681f3Smrg 2847ec681f3Smrgvoid 2857ec681f3Smrgv3dv_cmd_buffer_add_private_obj(struct v3dv_cmd_buffer *cmd_buffer, 2867ec681f3Smrg uint64_t obj, 2877ec681f3Smrg v3dv_cmd_buffer_private_obj_destroy_cb destroy_cb) 2887ec681f3Smrg{ 2897ec681f3Smrg struct v3dv_cmd_buffer_private_obj *pobj = 2907ec681f3Smrg vk_alloc(&cmd_buffer->device->vk.alloc, sizeof(*pobj), 8, 2917ec681f3Smrg VK_SYSTEM_ALLOCATION_SCOPE_COMMAND); 2927ec681f3Smrg if (!pobj) { 2937ec681f3Smrg v3dv_flag_oom(cmd_buffer, NULL); 2947ec681f3Smrg return; 2957ec681f3Smrg } 2967ec681f3Smrg 2977ec681f3Smrg pobj->obj = obj; 2987ec681f3Smrg pobj->destroy_cb = destroy_cb; 2997ec681f3Smrg 3007ec681f3Smrg list_addtail(&pobj->list_link, &cmd_buffer->private_objs); 3017ec681f3Smrg} 3027ec681f3Smrg 3037ec681f3Smrgstatic void 3047ec681f3Smrgcmd_buffer_destroy_private_obj(struct v3dv_cmd_buffer *cmd_buffer, 3057ec681f3Smrg struct v3dv_cmd_buffer_private_obj *pobj) 3067ec681f3Smrg{ 3077ec681f3Smrg assert(pobj && pobj->obj && pobj->destroy_cb); 3087ec681f3Smrg pobj->destroy_cb(v3dv_device_to_handle(cmd_buffer->device), 3097ec681f3Smrg pobj->obj, 3107ec681f3Smrg &cmd_buffer->device->vk.alloc); 3117ec681f3Smrg list_del(&pobj->list_link); 3127ec681f3Smrg vk_free(&cmd_buffer->device->vk.alloc, pobj); 3137ec681f3Smrg} 3147ec681f3Smrg 3157ec681f3Smrgstatic void 3167ec681f3Smrgcmd_buffer_free_resources(struct v3dv_cmd_buffer *cmd_buffer) 3177ec681f3Smrg{ 3187ec681f3Smrg list_for_each_entry_safe(struct v3dv_job, job, 3197ec681f3Smrg &cmd_buffer->jobs, list_link) { 3207ec681f3Smrg v3dv_job_destroy(job); 3217ec681f3Smrg } 3227ec681f3Smrg 3237ec681f3Smrg if (cmd_buffer->state.job) 3247ec681f3Smrg v3dv_job_destroy(cmd_buffer->state.job); 3257ec681f3Smrg 3267ec681f3Smrg if (cmd_buffer->state.attachments) 3277ec681f3Smrg vk_free(&cmd_buffer->pool->alloc, cmd_buffer->state.attachments); 3287ec681f3Smrg 3297ec681f3Smrg if (cmd_buffer->state.query.end.alloc_count > 0) 3307ec681f3Smrg vk_free(&cmd_buffer->device->vk.alloc, cmd_buffer->state.query.end.states); 3317ec681f3Smrg 3327ec681f3Smrg if (cmd_buffer->push_constants_resource.bo) 3337ec681f3Smrg v3dv_bo_free(cmd_buffer->device, cmd_buffer->push_constants_resource.bo); 3347ec681f3Smrg 3357ec681f3Smrg list_for_each_entry_safe(struct v3dv_cmd_buffer_private_obj, pobj, 3367ec681f3Smrg &cmd_buffer->private_objs, list_link) { 3377ec681f3Smrg cmd_buffer_destroy_private_obj(cmd_buffer, pobj); 3387ec681f3Smrg } 3397ec681f3Smrg 3407ec681f3Smrg if (cmd_buffer->state.meta.attachments) { 3417ec681f3Smrg assert(cmd_buffer->state.meta.attachment_alloc_count > 0); 3427ec681f3Smrg vk_free(&cmd_buffer->device->vk.alloc, cmd_buffer->state.meta.attachments); 3437ec681f3Smrg } 3447ec681f3Smrg} 3457ec681f3Smrg 3467ec681f3Smrgstatic void 3477ec681f3Smrgcmd_buffer_destroy(struct v3dv_cmd_buffer *cmd_buffer) 3487ec681f3Smrg{ 3497ec681f3Smrg list_del(&cmd_buffer->pool_link); 3507ec681f3Smrg cmd_buffer_free_resources(cmd_buffer); 3517ec681f3Smrg vk_command_buffer_finish(&cmd_buffer->vk); 3527ec681f3Smrg vk_free2(&cmd_buffer->device->vk.alloc, &cmd_buffer->pool->alloc, 3537ec681f3Smrg cmd_buffer); 3547ec681f3Smrg} 3557ec681f3Smrg 3567ec681f3Smrgstatic bool 3577ec681f3Smrgattachment_list_is_subset(struct v3dv_subpass_attachment *l1, uint32_t l1_count, 3587ec681f3Smrg struct v3dv_subpass_attachment *l2, uint32_t l2_count) 3597ec681f3Smrg{ 3607ec681f3Smrg for (uint32_t i = 0; i < l1_count; i++) { 3617ec681f3Smrg uint32_t attachment_idx = l1[i].attachment; 3627ec681f3Smrg if (attachment_idx == VK_ATTACHMENT_UNUSED) 3637ec681f3Smrg continue; 3647ec681f3Smrg 3657ec681f3Smrg uint32_t j; 3667ec681f3Smrg for (j = 0; j < l2_count; j++) { 3677ec681f3Smrg if (l2[j].attachment == attachment_idx) 3687ec681f3Smrg break; 3697ec681f3Smrg } 3707ec681f3Smrg if (j == l2_count) 3717ec681f3Smrg return false; 3727ec681f3Smrg } 3737ec681f3Smrg 3747ec681f3Smrg return true; 3757ec681f3Smrg } 3767ec681f3Smrg 3777ec681f3Smrgstatic bool 3787ec681f3Smrgcmd_buffer_can_merge_subpass(struct v3dv_cmd_buffer *cmd_buffer, 3797ec681f3Smrg uint32_t subpass_idx) 3807ec681f3Smrg{ 3817ec681f3Smrg const struct v3dv_cmd_buffer_state *state = &cmd_buffer->state; 3827ec681f3Smrg assert(state->pass); 3837ec681f3Smrg 3847ec681f3Smrg const struct v3dv_physical_device *physical_device = 3857ec681f3Smrg &cmd_buffer->device->instance->physicalDevice; 3867ec681f3Smrg 3877ec681f3Smrg if (cmd_buffer->level != VK_COMMAND_BUFFER_LEVEL_PRIMARY) 3887ec681f3Smrg return false; 3897ec681f3Smrg 3907ec681f3Smrg if (!cmd_buffer->state.job) 3917ec681f3Smrg return false; 3927ec681f3Smrg 3937ec681f3Smrg if (cmd_buffer->state.job->always_flush) 3947ec681f3Smrg return false; 3957ec681f3Smrg 3967ec681f3Smrg if (!physical_device->options.merge_jobs) 3977ec681f3Smrg return false; 3987ec681f3Smrg 3997ec681f3Smrg /* Each render pass starts a new job */ 4007ec681f3Smrg if (subpass_idx == 0) 4017ec681f3Smrg return false; 4027ec681f3Smrg 4037ec681f3Smrg /* Two subpasses can be merged in the same job if we can emit a single RCL 4047ec681f3Smrg * for them (since the RCL includes the END_OF_RENDERING command that 4057ec681f3Smrg * triggers the "render job finished" interrupt). We can do this so long 4067ec681f3Smrg * as both subpasses render against the same attachments. 4077ec681f3Smrg */ 4087ec681f3Smrg assert(state->subpass_idx == subpass_idx - 1); 4097ec681f3Smrg struct v3dv_subpass *prev_subpass = &state->pass->subpasses[state->subpass_idx]; 4107ec681f3Smrg struct v3dv_subpass *subpass = &state->pass->subpasses[subpass_idx]; 4117ec681f3Smrg 4127ec681f3Smrg /* Don't merge if the subpasses have different view masks, since in that 4137ec681f3Smrg * case the framebuffer setup is different and we need to emit different 4147ec681f3Smrg * RCLs. 4157ec681f3Smrg */ 4167ec681f3Smrg if (subpass->view_mask != prev_subpass->view_mask) 4177ec681f3Smrg return false; 4187ec681f3Smrg 4197ec681f3Smrg /* Because the list of subpass attachments can include VK_ATTACHMENT_UNUSED, 4207ec681f3Smrg * we need to check that for each subpass all its used attachments are 4217ec681f3Smrg * used by the other subpass. 4227ec681f3Smrg */ 4237ec681f3Smrg bool compatible = 4247ec681f3Smrg attachment_list_is_subset(prev_subpass->color_attachments, 4257ec681f3Smrg prev_subpass->color_count, 4267ec681f3Smrg subpass->color_attachments, 4277ec681f3Smrg subpass->color_count); 4287ec681f3Smrg if (!compatible) 4297ec681f3Smrg return false; 4307ec681f3Smrg 4317ec681f3Smrg compatible = 4327ec681f3Smrg attachment_list_is_subset(subpass->color_attachments, 4337ec681f3Smrg subpass->color_count, 4347ec681f3Smrg prev_subpass->color_attachments, 4357ec681f3Smrg prev_subpass->color_count); 4367ec681f3Smrg if (!compatible) 4377ec681f3Smrg return false; 4387ec681f3Smrg 4397ec681f3Smrg if (subpass->ds_attachment.attachment != 4407ec681f3Smrg prev_subpass->ds_attachment.attachment) 4417ec681f3Smrg return false; 4427ec681f3Smrg 4437ec681f3Smrg /* FIXME: Since some attachment formats can't be resolved using the TLB we 4447ec681f3Smrg * need to emit separate resolve jobs for them and that would not be 4457ec681f3Smrg * compatible with subpass merges. We could fix that by testing if any of 4467ec681f3Smrg * the attachments to resolve doesn't suppotr TLB resolves. 4477ec681f3Smrg */ 4487ec681f3Smrg if (prev_subpass->resolve_attachments || subpass->resolve_attachments) 4497ec681f3Smrg return false; 4507ec681f3Smrg 4517ec681f3Smrg return true; 4527ec681f3Smrg} 4537ec681f3Smrg 4547ec681f3Smrg/** 4557ec681f3Smrg * Computes and sets the job frame tiling information required to setup frame 4567ec681f3Smrg * binning and rendering. 4577ec681f3Smrg */ 4587ec681f3Smrgstatic struct v3dv_frame_tiling * 4597ec681f3Smrgjob_compute_frame_tiling(struct v3dv_job *job, 4607ec681f3Smrg uint32_t width, 4617ec681f3Smrg uint32_t height, 4627ec681f3Smrg uint32_t layers, 4637ec681f3Smrg uint32_t render_target_count, 4647ec681f3Smrg uint8_t max_internal_bpp, 4657ec681f3Smrg bool msaa) 4667ec681f3Smrg{ 4677ec681f3Smrg static const uint8_t tile_sizes[] = { 4687ec681f3Smrg 64, 64, 4697ec681f3Smrg 64, 32, 4707ec681f3Smrg 32, 32, 4717ec681f3Smrg 32, 16, 4727ec681f3Smrg 16, 16, 4737ec681f3Smrg 16, 8, 4747ec681f3Smrg 8, 8 4757ec681f3Smrg }; 4767ec681f3Smrg 4777ec681f3Smrg assert(job); 4787ec681f3Smrg struct v3dv_frame_tiling *tiling = &job->frame_tiling; 4797ec681f3Smrg 4807ec681f3Smrg tiling->width = width; 4817ec681f3Smrg tiling->height = height; 4827ec681f3Smrg tiling->layers = layers; 4837ec681f3Smrg tiling->render_target_count = render_target_count; 4847ec681f3Smrg tiling->msaa = msaa; 4857ec681f3Smrg 4867ec681f3Smrg uint32_t tile_size_index = 0; 4877ec681f3Smrg 4887ec681f3Smrg if (render_target_count > 2) 4897ec681f3Smrg tile_size_index += 2; 4907ec681f3Smrg else if (render_target_count > 1) 4917ec681f3Smrg tile_size_index += 1; 4927ec681f3Smrg 4937ec681f3Smrg if (msaa) 4947ec681f3Smrg tile_size_index += 2; 4957ec681f3Smrg 4967ec681f3Smrg tiling->internal_bpp = max_internal_bpp; 4977ec681f3Smrg tile_size_index += tiling->internal_bpp; 4987ec681f3Smrg assert(tile_size_index < ARRAY_SIZE(tile_sizes) / 2); 4997ec681f3Smrg 5007ec681f3Smrg tiling->tile_width = tile_sizes[tile_size_index * 2]; 5017ec681f3Smrg tiling->tile_height = tile_sizes[tile_size_index * 2 + 1]; 5027ec681f3Smrg 5037ec681f3Smrg tiling->draw_tiles_x = DIV_ROUND_UP(width, tiling->tile_width); 5047ec681f3Smrg tiling->draw_tiles_y = DIV_ROUND_UP(height, tiling->tile_height); 5057ec681f3Smrg 5067ec681f3Smrg /* Size up our supertiles until we get under the limit */ 5077ec681f3Smrg const uint32_t max_supertiles = 256; 5087ec681f3Smrg tiling->supertile_width = 1; 5097ec681f3Smrg tiling->supertile_height = 1; 5107ec681f3Smrg for (;;) { 5117ec681f3Smrg tiling->frame_width_in_supertiles = 5127ec681f3Smrg DIV_ROUND_UP(tiling->draw_tiles_x, tiling->supertile_width); 5137ec681f3Smrg tiling->frame_height_in_supertiles = 5147ec681f3Smrg DIV_ROUND_UP(tiling->draw_tiles_y, tiling->supertile_height); 5157ec681f3Smrg const uint32_t num_supertiles = tiling->frame_width_in_supertiles * 5167ec681f3Smrg tiling->frame_height_in_supertiles; 5177ec681f3Smrg if (num_supertiles < max_supertiles) 5187ec681f3Smrg break; 5197ec681f3Smrg 5207ec681f3Smrg if (tiling->supertile_width < tiling->supertile_height) 5217ec681f3Smrg tiling->supertile_width++; 5227ec681f3Smrg else 5237ec681f3Smrg tiling->supertile_height++; 5247ec681f3Smrg } 5257ec681f3Smrg 5267ec681f3Smrg return tiling; 5277ec681f3Smrg} 5287ec681f3Smrg 5297ec681f3Smrgvoid 5307ec681f3Smrgv3dv_job_start_frame(struct v3dv_job *job, 5317ec681f3Smrg uint32_t width, 5327ec681f3Smrg uint32_t height, 5337ec681f3Smrg uint32_t layers, 5347ec681f3Smrg bool allocate_tile_state_for_all_layers, 5357ec681f3Smrg uint32_t render_target_count, 5367ec681f3Smrg uint8_t max_internal_bpp, 5377ec681f3Smrg bool msaa) 5387ec681f3Smrg{ 5397ec681f3Smrg assert(job); 5407ec681f3Smrg 5417ec681f3Smrg /* Start by computing frame tiling spec for this job */ 5427ec681f3Smrg const struct v3dv_frame_tiling *tiling = 5437ec681f3Smrg job_compute_frame_tiling(job, 5447ec681f3Smrg width, height, layers, 5457ec681f3Smrg render_target_count, max_internal_bpp, msaa); 5467ec681f3Smrg 5477ec681f3Smrg v3dv_cl_ensure_space_with_branch(&job->bcl, 256); 5487ec681f3Smrg v3dv_return_if_oom(NULL, job); 5497ec681f3Smrg 5507ec681f3Smrg /* We only need to allocate tile state for all layers if the binner 5517ec681f3Smrg * writes primitives to layers other than the first. This can only be 5527ec681f3Smrg * done using layered rendering (writing gl_Layer from a geometry shader), 5537ec681f3Smrg * so for other cases of multilayered framebuffers (typically with 5547ec681f3Smrg * meta copy/clear operations) that won't use layered rendering, we only 5557ec681f3Smrg * need one layer worth of of tile state for the binner. 5567ec681f3Smrg */ 5577ec681f3Smrg if (!allocate_tile_state_for_all_layers) 5587ec681f3Smrg layers = 1; 5597ec681f3Smrg 5607ec681f3Smrg /* The PTB will request the tile alloc initial size per tile at start 5617ec681f3Smrg * of tile binning. 5627ec681f3Smrg */ 5637ec681f3Smrg uint32_t tile_alloc_size = 64 * tiling->layers * 5647ec681f3Smrg tiling->draw_tiles_x * 5657ec681f3Smrg tiling->draw_tiles_y; 5667ec681f3Smrg 5677ec681f3Smrg /* The PTB allocates in aligned 4k chunks after the initial setup. */ 5687ec681f3Smrg tile_alloc_size = align(tile_alloc_size, 4096); 5697ec681f3Smrg 5707ec681f3Smrg /* Include the first two chunk allocations that the PTB does so that 5717ec681f3Smrg * we definitely clear the OOM condition before triggering one (the HW 5727ec681f3Smrg * won't trigger OOM during the first allocations). 5737ec681f3Smrg */ 5747ec681f3Smrg tile_alloc_size += 8192; 5757ec681f3Smrg 5767ec681f3Smrg /* For performance, allocate some extra initial memory after the PTB's 5777ec681f3Smrg * minimal allocations, so that we hopefully don't have to block the 5787ec681f3Smrg * GPU on the kernel handling an OOM signal. 5797ec681f3Smrg */ 5807ec681f3Smrg tile_alloc_size += 512 * 1024; 5817ec681f3Smrg 5827ec681f3Smrg job->tile_alloc = v3dv_bo_alloc(job->device, tile_alloc_size, 5837ec681f3Smrg "tile_alloc", true); 5847ec681f3Smrg if (!job->tile_alloc) { 5857ec681f3Smrg v3dv_flag_oom(NULL, job); 5867ec681f3Smrg return; 5877ec681f3Smrg } 5887ec681f3Smrg 5897ec681f3Smrg v3dv_job_add_bo_unchecked(job, job->tile_alloc); 5907ec681f3Smrg 5917ec681f3Smrg const uint32_t tsda_per_tile_size = 256; 5927ec681f3Smrg const uint32_t tile_state_size = tiling->layers * 5937ec681f3Smrg tiling->draw_tiles_x * 5947ec681f3Smrg tiling->draw_tiles_y * 5957ec681f3Smrg tsda_per_tile_size; 5967ec681f3Smrg job->tile_state = v3dv_bo_alloc(job->device, tile_state_size, "TSDA", true); 5977ec681f3Smrg if (!job->tile_state) { 5987ec681f3Smrg v3dv_flag_oom(NULL, job); 5997ec681f3Smrg return; 6007ec681f3Smrg } 6017ec681f3Smrg 6027ec681f3Smrg v3dv_job_add_bo_unchecked(job, job->tile_state); 6037ec681f3Smrg 6047ec681f3Smrg v3dv_X(job->device, job_emit_binning_prolog)(job, tiling, layers); 6057ec681f3Smrg 6067ec681f3Smrg job->ez_state = V3D_EZ_UNDECIDED; 6077ec681f3Smrg job->first_ez_state = V3D_EZ_UNDECIDED; 6087ec681f3Smrg} 6097ec681f3Smrg 6107ec681f3Smrgstatic void 6117ec681f3Smrgcmd_buffer_end_render_pass_frame(struct v3dv_cmd_buffer *cmd_buffer) 6127ec681f3Smrg{ 6137ec681f3Smrg assert(cmd_buffer->state.job); 6147ec681f3Smrg 6157ec681f3Smrg /* Typically, we have a single job for each subpass and we emit the job's RCL 6167ec681f3Smrg * here when we are ending the frame for the subpass. However, some commands 6177ec681f3Smrg * such as vkCmdClearAttachments need to run in their own separate job and 6187ec681f3Smrg * they emit their own RCL even if they execute inside a subpass. In this 6197ec681f3Smrg * scenario, we don't want to emit subpass RCL when we end the frame for 6207ec681f3Smrg * those jobs, so we only emit the subpass RCL if the job has not recorded 6217ec681f3Smrg * any RCL commands of its own. 6227ec681f3Smrg */ 6237ec681f3Smrg if (v3dv_cl_offset(&cmd_buffer->state.job->rcl) == 0) 6247ec681f3Smrg v3dv_X(cmd_buffer->device, cmd_buffer_emit_render_pass_rcl)(cmd_buffer); 6257ec681f3Smrg 6267ec681f3Smrg v3dv_X(cmd_buffer->device, job_emit_binning_flush)(cmd_buffer->state.job); 6277ec681f3Smrg} 6287ec681f3Smrg 6297ec681f3Smrgstruct v3dv_job * 6307ec681f3Smrgv3dv_cmd_buffer_create_cpu_job(struct v3dv_device *device, 6317ec681f3Smrg enum v3dv_job_type type, 6327ec681f3Smrg struct v3dv_cmd_buffer *cmd_buffer, 6337ec681f3Smrg uint32_t subpass_idx) 6347ec681f3Smrg{ 6357ec681f3Smrg struct v3dv_job *job = vk_zalloc(&device->vk.alloc, 6367ec681f3Smrg sizeof(struct v3dv_job), 8, 6377ec681f3Smrg VK_SYSTEM_ALLOCATION_SCOPE_COMMAND); 6387ec681f3Smrg if (!job) { 6397ec681f3Smrg v3dv_flag_oom(cmd_buffer, NULL); 6407ec681f3Smrg return NULL; 6417ec681f3Smrg } 6427ec681f3Smrg 6437ec681f3Smrg v3dv_job_init(job, type, device, cmd_buffer, subpass_idx); 6447ec681f3Smrg return job; 6457ec681f3Smrg} 6467ec681f3Smrg 6477ec681f3Smrgstatic void 6487ec681f3Smrgcmd_buffer_add_cpu_jobs_for_pending_state(struct v3dv_cmd_buffer *cmd_buffer) 6497ec681f3Smrg{ 6507ec681f3Smrg struct v3dv_cmd_buffer_state *state = &cmd_buffer->state; 6517ec681f3Smrg 6527ec681f3Smrg if (state->query.end.used_count > 0) { 6537ec681f3Smrg const uint32_t query_count = state->query.end.used_count; 6547ec681f3Smrg for (uint32_t i = 0; i < query_count; i++) { 6557ec681f3Smrg assert(i < state->query.end.used_count); 6567ec681f3Smrg struct v3dv_job *job = 6577ec681f3Smrg v3dv_cmd_buffer_create_cpu_job(cmd_buffer->device, 6587ec681f3Smrg V3DV_JOB_TYPE_CPU_END_QUERY, 6597ec681f3Smrg cmd_buffer, -1); 6607ec681f3Smrg v3dv_return_if_oom(cmd_buffer, NULL); 6617ec681f3Smrg 6627ec681f3Smrg job->cpu.query_end = state->query.end.states[i]; 6637ec681f3Smrg list_addtail(&job->list_link, &cmd_buffer->jobs); 6647ec681f3Smrg } 6657ec681f3Smrg } 6667ec681f3Smrg} 6677ec681f3Smrg 6687ec681f3Smrgvoid 6697ec681f3Smrgv3dv_cmd_buffer_finish_job(struct v3dv_cmd_buffer *cmd_buffer) 6707ec681f3Smrg{ 6717ec681f3Smrg struct v3dv_job *job = cmd_buffer->state.job; 6727ec681f3Smrg if (!job) 6737ec681f3Smrg return; 6747ec681f3Smrg 6757ec681f3Smrg if (cmd_buffer->state.oom) { 6767ec681f3Smrg v3dv_job_destroy(job); 6777ec681f3Smrg cmd_buffer->state.job = NULL; 6787ec681f3Smrg return; 6797ec681f3Smrg } 6807ec681f3Smrg 6817ec681f3Smrg /* If we have created a job for a command buffer then we should have 6827ec681f3Smrg * recorded something into it: if the job was started in a render pass, it 6837ec681f3Smrg * should at least have the start frame commands, otherwise, it should have 6847ec681f3Smrg * a transfer command. The only exception are secondary command buffers 6857ec681f3Smrg * inside a render pass. 6867ec681f3Smrg */ 6877ec681f3Smrg assert(cmd_buffer->level == VK_COMMAND_BUFFER_LEVEL_SECONDARY || 6887ec681f3Smrg v3dv_cl_offset(&job->bcl) > 0); 6897ec681f3Smrg 6907ec681f3Smrg /* When we merge multiple subpasses into the same job we must only emit one 6917ec681f3Smrg * RCL, so we do that here, when we decided that we need to finish the job. 6927ec681f3Smrg * Any rendering that happens outside a render pass is never merged, so 6937ec681f3Smrg * the RCL should have been emitted by the time we got here. 6947ec681f3Smrg */ 6957ec681f3Smrg assert(v3dv_cl_offset(&job->rcl) != 0 || cmd_buffer->state.pass); 6967ec681f3Smrg 6977ec681f3Smrg /* If we are finishing a job inside a render pass we have two scenarios: 6987ec681f3Smrg * 6997ec681f3Smrg * 1. It is a regular CL, in which case we will submit the job to the GPU, 7007ec681f3Smrg * so we may need to generate an RCL and add a binning flush. 7017ec681f3Smrg * 7027ec681f3Smrg * 2. It is a partial CL recorded in a secondary command buffer, in which 7037ec681f3Smrg * case we are not submitting it directly to the GPU but rather branch to 7047ec681f3Smrg * it from a primary command buffer. In this case we just want to end 7057ec681f3Smrg * the BCL with a RETURN_FROM_SUB_LIST and the RCL and binning flush 7067ec681f3Smrg * will be the primary job that branches to this CL. 7077ec681f3Smrg */ 7087ec681f3Smrg if (cmd_buffer->state.pass) { 7097ec681f3Smrg if (job->type == V3DV_JOB_TYPE_GPU_CL) { 7107ec681f3Smrg cmd_buffer_end_render_pass_frame(cmd_buffer); 7117ec681f3Smrg } else { 7127ec681f3Smrg assert(job->type == V3DV_JOB_TYPE_GPU_CL_SECONDARY); 7137ec681f3Smrg v3dv_X(cmd_buffer->device, cmd_buffer_end_render_pass_secondary)(cmd_buffer); 7147ec681f3Smrg } 7157ec681f3Smrg } 7167ec681f3Smrg 7177ec681f3Smrg list_addtail(&job->list_link, &cmd_buffer->jobs); 7187ec681f3Smrg cmd_buffer->state.job = NULL; 7197ec681f3Smrg 7207ec681f3Smrg /* If we have recorded any state with this last GPU job that requires to 7217ec681f3Smrg * emit CPU jobs after the job is completed, add them now. The only 7227ec681f3Smrg * exception is secondary command buffers inside a render pass, because in 7237ec681f3Smrg * that case we want to defer this until we finish recording the primary 7247ec681f3Smrg * job into which we execute the secondary. 7257ec681f3Smrg */ 7267ec681f3Smrg if (cmd_buffer->level == VK_COMMAND_BUFFER_LEVEL_PRIMARY || 7277ec681f3Smrg !cmd_buffer->state.pass) { 7287ec681f3Smrg cmd_buffer_add_cpu_jobs_for_pending_state(cmd_buffer); 7297ec681f3Smrg } 7307ec681f3Smrg} 7317ec681f3Smrg 7327ec681f3Smrgstatic bool 7337ec681f3Smrgjob_type_is_gpu(struct v3dv_job *job) 7347ec681f3Smrg{ 7357ec681f3Smrg switch (job->type) { 7367ec681f3Smrg case V3DV_JOB_TYPE_GPU_CL: 7377ec681f3Smrg case V3DV_JOB_TYPE_GPU_CL_SECONDARY: 7387ec681f3Smrg case V3DV_JOB_TYPE_GPU_TFU: 7397ec681f3Smrg case V3DV_JOB_TYPE_GPU_CSD: 7407ec681f3Smrg return true; 7417ec681f3Smrg default: 7427ec681f3Smrg return false; 7437ec681f3Smrg } 7447ec681f3Smrg} 7457ec681f3Smrg 7467ec681f3Smrgstatic void 7477ec681f3Smrgcmd_buffer_serialize_job_if_needed(struct v3dv_cmd_buffer *cmd_buffer, 7487ec681f3Smrg struct v3dv_job *job) 7497ec681f3Smrg{ 7507ec681f3Smrg assert(cmd_buffer && job); 7517ec681f3Smrg 7527ec681f3Smrg if (!cmd_buffer->state.has_barrier) 7537ec681f3Smrg return; 7547ec681f3Smrg 7557ec681f3Smrg /* Serialization only affects GPU jobs, CPU jobs are always automatically 7567ec681f3Smrg * serialized. 7577ec681f3Smrg */ 7587ec681f3Smrg if (!job_type_is_gpu(job)) 7597ec681f3Smrg return; 7607ec681f3Smrg 7617ec681f3Smrg job->serialize = true; 7627ec681f3Smrg if (cmd_buffer->state.has_bcl_barrier && 7637ec681f3Smrg (job->type == V3DV_JOB_TYPE_GPU_CL || 7647ec681f3Smrg job->type == V3DV_JOB_TYPE_GPU_CL_SECONDARY)) { 7657ec681f3Smrg job->needs_bcl_sync = true; 7667ec681f3Smrg } 7677ec681f3Smrg 7687ec681f3Smrg cmd_buffer->state.has_barrier = false; 7697ec681f3Smrg cmd_buffer->state.has_bcl_barrier = false; 7707ec681f3Smrg} 7717ec681f3Smrg 7727ec681f3Smrgvoid 7737ec681f3Smrgv3dv_job_init(struct v3dv_job *job, 7747ec681f3Smrg enum v3dv_job_type type, 7757ec681f3Smrg struct v3dv_device *device, 7767ec681f3Smrg struct v3dv_cmd_buffer *cmd_buffer, 7777ec681f3Smrg int32_t subpass_idx) 7787ec681f3Smrg{ 7797ec681f3Smrg assert(job); 7807ec681f3Smrg 7817ec681f3Smrg /* Make sure we haven't made this new job current before calling here */ 7827ec681f3Smrg assert(!cmd_buffer || cmd_buffer->state.job != job); 7837ec681f3Smrg 7847ec681f3Smrg job->type = type; 7857ec681f3Smrg 7867ec681f3Smrg job->device = device; 7877ec681f3Smrg job->cmd_buffer = cmd_buffer; 7887ec681f3Smrg 7897ec681f3Smrg list_inithead(&job->list_link); 7907ec681f3Smrg 7917ec681f3Smrg if (type == V3DV_JOB_TYPE_GPU_CL || 7927ec681f3Smrg type == V3DV_JOB_TYPE_GPU_CL_SECONDARY || 7937ec681f3Smrg type == V3DV_JOB_TYPE_GPU_CSD) { 7947ec681f3Smrg job->bos = 7957ec681f3Smrg _mesa_set_create(NULL, _mesa_hash_pointer, _mesa_key_pointer_equal); 7967ec681f3Smrg job->bo_count = 0; 7977ec681f3Smrg 7987ec681f3Smrg v3dv_cl_init(job, &job->indirect); 7997ec681f3Smrg 8007ec681f3Smrg if (unlikely(V3D_DEBUG & V3D_DEBUG_ALWAYS_FLUSH)) 8017ec681f3Smrg job->always_flush = true; 8027ec681f3Smrg } 8037ec681f3Smrg 8047ec681f3Smrg if (type == V3DV_JOB_TYPE_GPU_CL || 8057ec681f3Smrg type == V3DV_JOB_TYPE_GPU_CL_SECONDARY) { 8067ec681f3Smrg v3dv_cl_init(job, &job->bcl); 8077ec681f3Smrg v3dv_cl_init(job, &job->rcl); 8087ec681f3Smrg } 8097ec681f3Smrg 8107ec681f3Smrg if (cmd_buffer) { 8117ec681f3Smrg /* Flag all state as dirty. Generally, we need to re-emit state for each 8127ec681f3Smrg * new job. 8137ec681f3Smrg * 8147ec681f3Smrg * FIXME: there may be some exceptions, in which case we could skip some 8157ec681f3Smrg * bits. 8167ec681f3Smrg */ 8177ec681f3Smrg cmd_buffer->state.dirty = ~0; 8187ec681f3Smrg cmd_buffer->state.dirty_descriptor_stages = ~0; 8197ec681f3Smrg 8207ec681f3Smrg /* Honor inheritance of occlussion queries in secondaries if requested */ 8217ec681f3Smrg if (cmd_buffer->level == VK_COMMAND_BUFFER_LEVEL_SECONDARY && 8227ec681f3Smrg cmd_buffer->state.inheritance.occlusion_query_enable) { 8237ec681f3Smrg cmd_buffer->state.dirty &= ~V3DV_CMD_DIRTY_OCCLUSION_QUERY; 8247ec681f3Smrg } 8257ec681f3Smrg 8267ec681f3Smrg /* Keep track of the first subpass that we are recording in this new job. 8277ec681f3Smrg * We will use this when we emit the RCL to decide how to emit our loads 8287ec681f3Smrg * and stores. 8297ec681f3Smrg */ 8307ec681f3Smrg if (cmd_buffer->state.pass) 8317ec681f3Smrg job->first_subpass = subpass_idx; 8327ec681f3Smrg 8337ec681f3Smrg cmd_buffer_serialize_job_if_needed(cmd_buffer, job); 8347ec681f3Smrg } 8357ec681f3Smrg} 8367ec681f3Smrg 8377ec681f3Smrgstruct v3dv_job * 8387ec681f3Smrgv3dv_cmd_buffer_start_job(struct v3dv_cmd_buffer *cmd_buffer, 8397ec681f3Smrg int32_t subpass_idx, 8407ec681f3Smrg enum v3dv_job_type type) 8417ec681f3Smrg{ 8427ec681f3Smrg /* Don't create a new job if we can merge the current subpass into 8437ec681f3Smrg * the current job. 8447ec681f3Smrg */ 8457ec681f3Smrg if (cmd_buffer->state.pass && 8467ec681f3Smrg subpass_idx != -1 && 8477ec681f3Smrg cmd_buffer_can_merge_subpass(cmd_buffer, subpass_idx)) { 8487ec681f3Smrg cmd_buffer->state.job->is_subpass_finish = false; 8497ec681f3Smrg return cmd_buffer->state.job; 8507ec681f3Smrg } 8517ec681f3Smrg 8527ec681f3Smrg /* Ensure we are not starting a new job without finishing a previous one */ 8537ec681f3Smrg if (cmd_buffer->state.job != NULL) 8547ec681f3Smrg v3dv_cmd_buffer_finish_job(cmd_buffer); 8557ec681f3Smrg 8567ec681f3Smrg assert(cmd_buffer->state.job == NULL); 8577ec681f3Smrg struct v3dv_job *job = vk_zalloc(&cmd_buffer->device->vk.alloc, 8587ec681f3Smrg sizeof(struct v3dv_job), 8, 8597ec681f3Smrg VK_SYSTEM_ALLOCATION_SCOPE_COMMAND); 8607ec681f3Smrg 8617ec681f3Smrg if (!job) { 8627ec681f3Smrg fprintf(stderr, "Error: failed to allocate CPU memory for job\n"); 8637ec681f3Smrg v3dv_flag_oom(cmd_buffer, NULL); 8647ec681f3Smrg return NULL; 8657ec681f3Smrg } 8667ec681f3Smrg 8677ec681f3Smrg v3dv_job_init(job, type, cmd_buffer->device, cmd_buffer, subpass_idx); 8687ec681f3Smrg cmd_buffer->state.job = job; 8697ec681f3Smrg 8707ec681f3Smrg return job; 8717ec681f3Smrg} 8727ec681f3Smrg 8737ec681f3Smrgstatic VkResult 8747ec681f3Smrgcmd_buffer_reset(struct v3dv_cmd_buffer *cmd_buffer, 8757ec681f3Smrg VkCommandBufferResetFlags flags) 8767ec681f3Smrg{ 8777ec681f3Smrg vk_command_buffer_reset(&cmd_buffer->vk); 8787ec681f3Smrg if (cmd_buffer->status != V3DV_CMD_BUFFER_STATUS_INITIALIZED) { 8797ec681f3Smrg struct v3dv_device *device = cmd_buffer->device; 8807ec681f3Smrg struct v3dv_cmd_pool *pool = cmd_buffer->pool; 8817ec681f3Smrg VkCommandBufferLevel level = cmd_buffer->level; 8827ec681f3Smrg 8837ec681f3Smrg /* cmd_buffer_init below will re-add the command buffer to the pool 8847ec681f3Smrg * so remove it here so we don't end up adding it again. 8857ec681f3Smrg */ 8867ec681f3Smrg list_del(&cmd_buffer->pool_link); 8877ec681f3Smrg 8887ec681f3Smrg /* FIXME: For now we always free all resources as if 8897ec681f3Smrg * VK_COMMAND_BUFFER_RESET_RELEASE_RESOURCES_BIT was set. 8907ec681f3Smrg */ 8917ec681f3Smrg if (cmd_buffer->status != V3DV_CMD_BUFFER_STATUS_NEW) 8927ec681f3Smrg cmd_buffer_free_resources(cmd_buffer); 8937ec681f3Smrg 8947ec681f3Smrg cmd_buffer_init(cmd_buffer, device, pool, level); 8957ec681f3Smrg } 8967ec681f3Smrg 8977ec681f3Smrg assert(cmd_buffer->status == V3DV_CMD_BUFFER_STATUS_INITIALIZED); 8987ec681f3Smrg return VK_SUCCESS; 8997ec681f3Smrg} 9007ec681f3Smrg 9017ec681f3SmrgVKAPI_ATTR VkResult VKAPI_CALL 9027ec681f3Smrgv3dv_AllocateCommandBuffers(VkDevice _device, 9037ec681f3Smrg const VkCommandBufferAllocateInfo *pAllocateInfo, 9047ec681f3Smrg VkCommandBuffer *pCommandBuffers) 9057ec681f3Smrg{ 9067ec681f3Smrg V3DV_FROM_HANDLE(v3dv_device, device, _device); 9077ec681f3Smrg V3DV_FROM_HANDLE(v3dv_cmd_pool, pool, pAllocateInfo->commandPool); 9087ec681f3Smrg 9097ec681f3Smrg VkResult result = VK_SUCCESS; 9107ec681f3Smrg uint32_t i; 9117ec681f3Smrg 9127ec681f3Smrg for (i = 0; i < pAllocateInfo->commandBufferCount; i++) { 9137ec681f3Smrg result = cmd_buffer_create(device, pool, pAllocateInfo->level, 9147ec681f3Smrg &pCommandBuffers[i]); 9157ec681f3Smrg if (result != VK_SUCCESS) 9167ec681f3Smrg break; 9177ec681f3Smrg } 9187ec681f3Smrg 9197ec681f3Smrg if (result != VK_SUCCESS) { 9207ec681f3Smrg v3dv_FreeCommandBuffers(_device, pAllocateInfo->commandPool, 9217ec681f3Smrg i, pCommandBuffers); 9227ec681f3Smrg for (i = 0; i < pAllocateInfo->commandBufferCount; i++) 9237ec681f3Smrg pCommandBuffers[i] = VK_NULL_HANDLE; 9247ec681f3Smrg } 9257ec681f3Smrg 9267ec681f3Smrg return result; 9277ec681f3Smrg} 9287ec681f3Smrg 9297ec681f3SmrgVKAPI_ATTR void VKAPI_CALL 9307ec681f3Smrgv3dv_FreeCommandBuffers(VkDevice device, 9317ec681f3Smrg VkCommandPool commandPool, 9327ec681f3Smrg uint32_t commandBufferCount, 9337ec681f3Smrg const VkCommandBuffer *pCommandBuffers) 9347ec681f3Smrg{ 9357ec681f3Smrg for (uint32_t i = 0; i < commandBufferCount; i++) { 9367ec681f3Smrg V3DV_FROM_HANDLE(v3dv_cmd_buffer, cmd_buffer, pCommandBuffers[i]); 9377ec681f3Smrg 9387ec681f3Smrg if (!cmd_buffer) 9397ec681f3Smrg continue; 9407ec681f3Smrg 9417ec681f3Smrg cmd_buffer_destroy(cmd_buffer); 9427ec681f3Smrg } 9437ec681f3Smrg} 9447ec681f3Smrg 9457ec681f3SmrgVKAPI_ATTR void VKAPI_CALL 9467ec681f3Smrgv3dv_DestroyCommandPool(VkDevice _device, 9477ec681f3Smrg VkCommandPool commandPool, 9487ec681f3Smrg const VkAllocationCallbacks *pAllocator) 9497ec681f3Smrg{ 9507ec681f3Smrg V3DV_FROM_HANDLE(v3dv_device, device, _device); 9517ec681f3Smrg V3DV_FROM_HANDLE(v3dv_cmd_pool, pool, commandPool); 9527ec681f3Smrg 9537ec681f3Smrg if (!pool) 9547ec681f3Smrg return; 9557ec681f3Smrg 9567ec681f3Smrg list_for_each_entry_safe(struct v3dv_cmd_buffer, cmd_buffer, 9577ec681f3Smrg &pool->cmd_buffers, pool_link) { 9587ec681f3Smrg cmd_buffer_destroy(cmd_buffer); 9597ec681f3Smrg } 9607ec681f3Smrg 9617ec681f3Smrg vk_object_free(&device->vk, pAllocator, pool); 9627ec681f3Smrg} 9637ec681f3Smrg 9647ec681f3SmrgVKAPI_ATTR void VKAPI_CALL 9657ec681f3Smrgv3dv_TrimCommandPool(VkDevice device, 9667ec681f3Smrg VkCommandPool commandPool, 9677ec681f3Smrg VkCommandPoolTrimFlags flags) 9687ec681f3Smrg{ 9697ec681f3Smrg /* We don't need to do anything here, our command pools never hold on to 9707ec681f3Smrg * any resources from command buffers that are freed or reset. 9717ec681f3Smrg */ 9727ec681f3Smrg} 9737ec681f3Smrg 9747ec681f3Smrg 9757ec681f3Smrgstatic void 9767ec681f3Smrgcmd_buffer_subpass_handle_pending_resolves(struct v3dv_cmd_buffer *cmd_buffer) 9777ec681f3Smrg{ 9787ec681f3Smrg assert(cmd_buffer->state.subpass_idx < cmd_buffer->state.pass->subpass_count); 9797ec681f3Smrg const struct v3dv_render_pass *pass = cmd_buffer->state.pass; 9807ec681f3Smrg const struct v3dv_subpass *subpass = 9817ec681f3Smrg &pass->subpasses[cmd_buffer->state.subpass_idx]; 9827ec681f3Smrg 9837ec681f3Smrg if (!subpass->resolve_attachments) 9847ec681f3Smrg return; 9857ec681f3Smrg 9867ec681f3Smrg struct v3dv_framebuffer *fb = cmd_buffer->state.framebuffer; 9877ec681f3Smrg 9887ec681f3Smrg /* At this point we have already ended the current subpass and now we are 9897ec681f3Smrg * about to emit vkCmdResolveImage calls to get the resolves we can't handle 9907ec681f3Smrg * handle in the subpass RCL. 9917ec681f3Smrg * 9927ec681f3Smrg * vkCmdResolveImage is not supposed to be called inside a render pass so 9937ec681f3Smrg * before we call that we need to make sure our command buffer state reflects 9947ec681f3Smrg * that we are no longer in a subpass by finishing the current job and 9957ec681f3Smrg * resetting the framebuffer and render pass state temporarily and then 9967ec681f3Smrg * restoring it after we are done with the resolves. 9977ec681f3Smrg */ 9987ec681f3Smrg if (cmd_buffer->state.job) 9997ec681f3Smrg v3dv_cmd_buffer_finish_job(cmd_buffer); 10007ec681f3Smrg struct v3dv_framebuffer *restore_fb = cmd_buffer->state.framebuffer; 10017ec681f3Smrg struct v3dv_render_pass *restore_pass = cmd_buffer->state.pass; 10027ec681f3Smrg uint32_t restore_subpass_idx = cmd_buffer->state.subpass_idx; 10037ec681f3Smrg cmd_buffer->state.framebuffer = NULL; 10047ec681f3Smrg cmd_buffer->state.pass = NULL; 10057ec681f3Smrg cmd_buffer->state.subpass_idx = -1; 10067ec681f3Smrg 10077ec681f3Smrg VkCommandBuffer cmd_buffer_handle = v3dv_cmd_buffer_to_handle(cmd_buffer); 10087ec681f3Smrg for (uint32_t i = 0; i < subpass->color_count; i++) { 10097ec681f3Smrg const uint32_t src_attachment_idx = 10107ec681f3Smrg subpass->color_attachments[i].attachment; 10117ec681f3Smrg if (src_attachment_idx == VK_ATTACHMENT_UNUSED) 10127ec681f3Smrg continue; 10137ec681f3Smrg 10147ec681f3Smrg if (pass->attachments[src_attachment_idx].use_tlb_resolve) 10157ec681f3Smrg continue; 10167ec681f3Smrg 10177ec681f3Smrg const uint32_t dst_attachment_idx = 10187ec681f3Smrg subpass->resolve_attachments[i].attachment; 10197ec681f3Smrg if (dst_attachment_idx == VK_ATTACHMENT_UNUSED) 10207ec681f3Smrg continue; 10217ec681f3Smrg 10227ec681f3Smrg struct v3dv_image_view *src_iview = fb->attachments[src_attachment_idx]; 10237ec681f3Smrg struct v3dv_image_view *dst_iview = fb->attachments[dst_attachment_idx]; 10247ec681f3Smrg 10257ec681f3Smrg VkImageResolve2KHR region = { 10267ec681f3Smrg .sType = VK_STRUCTURE_TYPE_IMAGE_RESOLVE_2_KHR, 10277ec681f3Smrg .srcSubresource = { 10287ec681f3Smrg VK_IMAGE_ASPECT_COLOR_BIT, 10297ec681f3Smrg src_iview->vk.base_mip_level, 10307ec681f3Smrg src_iview->vk.base_array_layer, 10317ec681f3Smrg src_iview->vk.layer_count, 10327ec681f3Smrg }, 10337ec681f3Smrg .srcOffset = { 0, 0, 0 }, 10347ec681f3Smrg .dstSubresource = { 10357ec681f3Smrg VK_IMAGE_ASPECT_COLOR_BIT, 10367ec681f3Smrg dst_iview->vk.base_mip_level, 10377ec681f3Smrg dst_iview->vk.base_array_layer, 10387ec681f3Smrg dst_iview->vk.layer_count, 10397ec681f3Smrg }, 10407ec681f3Smrg .dstOffset = { 0, 0, 0 }, 10417ec681f3Smrg .extent = src_iview->vk.image->extent, 10427ec681f3Smrg }; 10437ec681f3Smrg 10447ec681f3Smrg struct v3dv_image *src_image = (struct v3dv_image *) src_iview->vk.image; 10457ec681f3Smrg struct v3dv_image *dst_image = (struct v3dv_image *) dst_iview->vk.image; 10467ec681f3Smrg VkResolveImageInfo2KHR resolve_info = { 10477ec681f3Smrg .sType = VK_STRUCTURE_TYPE_RESOLVE_IMAGE_INFO_2_KHR, 10487ec681f3Smrg .srcImage = v3dv_image_to_handle(src_image), 10497ec681f3Smrg .srcImageLayout = VK_IMAGE_LAYOUT_GENERAL, 10507ec681f3Smrg .dstImage = v3dv_image_to_handle(dst_image), 10517ec681f3Smrg .dstImageLayout = VK_IMAGE_LAYOUT_GENERAL, 10527ec681f3Smrg .regionCount = 1, 10537ec681f3Smrg .pRegions = ®ion, 10547ec681f3Smrg }; 10557ec681f3Smrg v3dv_CmdResolveImage2KHR(cmd_buffer_handle, &resolve_info); 10567ec681f3Smrg } 10577ec681f3Smrg 10587ec681f3Smrg cmd_buffer->state.framebuffer = restore_fb; 10597ec681f3Smrg cmd_buffer->state.pass = restore_pass; 10607ec681f3Smrg cmd_buffer->state.subpass_idx = restore_subpass_idx; 10617ec681f3Smrg} 10627ec681f3Smrg 10637ec681f3Smrgstatic VkResult 10647ec681f3Smrgcmd_buffer_begin_render_pass_secondary( 10657ec681f3Smrg struct v3dv_cmd_buffer *cmd_buffer, 10667ec681f3Smrg const VkCommandBufferInheritanceInfo *inheritance_info) 10677ec681f3Smrg{ 10687ec681f3Smrg assert(cmd_buffer->level == VK_COMMAND_BUFFER_LEVEL_SECONDARY); 10697ec681f3Smrg assert(cmd_buffer->usage_flags & VK_COMMAND_BUFFER_USAGE_RENDER_PASS_CONTINUE_BIT); 10707ec681f3Smrg assert(inheritance_info); 10717ec681f3Smrg 10727ec681f3Smrg cmd_buffer->state.pass = 10737ec681f3Smrg v3dv_render_pass_from_handle(inheritance_info->renderPass); 10747ec681f3Smrg assert(cmd_buffer->state.pass); 10757ec681f3Smrg 10767ec681f3Smrg cmd_buffer->state.framebuffer = 10777ec681f3Smrg v3dv_framebuffer_from_handle(inheritance_info->framebuffer); 10787ec681f3Smrg 10797ec681f3Smrg assert(inheritance_info->subpass < cmd_buffer->state.pass->subpass_count); 10807ec681f3Smrg cmd_buffer->state.subpass_idx = inheritance_info->subpass; 10817ec681f3Smrg 10827ec681f3Smrg cmd_buffer->state.inheritance.occlusion_query_enable = 10837ec681f3Smrg inheritance_info->occlusionQueryEnable; 10847ec681f3Smrg 10857ec681f3Smrg /* Secondaries that execute inside a render pass won't start subpasses 10867ec681f3Smrg * so we want to create a job for them here. 10877ec681f3Smrg */ 10887ec681f3Smrg struct v3dv_job *job = 10897ec681f3Smrg v3dv_cmd_buffer_start_job(cmd_buffer, inheritance_info->subpass, 10907ec681f3Smrg V3DV_JOB_TYPE_GPU_CL_SECONDARY); 10917ec681f3Smrg if (!job) { 10927ec681f3Smrg v3dv_flag_oom(cmd_buffer, NULL); 10937ec681f3Smrg return VK_ERROR_OUT_OF_HOST_MEMORY; 10947ec681f3Smrg } 10957ec681f3Smrg 10967ec681f3Smrg /* Secondary command buffers don't know about the render area, but our 10977ec681f3Smrg * scissor setup accounts for it, so let's make sure we make it large 10987ec681f3Smrg * enough that it doesn't actually constrain any rendering. This should 10997ec681f3Smrg * be fine, since the Vulkan spec states: 11007ec681f3Smrg * 11017ec681f3Smrg * "The application must ensure (using scissor if necessary) that all 11027ec681f3Smrg * rendering is contained within the render area." 11037ec681f3Smrg * 11047ec681f3Smrg * FIXME: setup constants for the max framebuffer dimensions and use them 11057ec681f3Smrg * here and when filling in VkPhysicalDeviceLimits. 11067ec681f3Smrg */ 11077ec681f3Smrg const struct v3dv_framebuffer *framebuffer = cmd_buffer->state.framebuffer; 11087ec681f3Smrg cmd_buffer->state.render_area.offset.x = 0; 11097ec681f3Smrg cmd_buffer->state.render_area.offset.y = 0; 11107ec681f3Smrg cmd_buffer->state.render_area.extent.width = 11117ec681f3Smrg framebuffer ? framebuffer->width : 4096; 11127ec681f3Smrg cmd_buffer->state.render_area.extent.height = 11137ec681f3Smrg framebuffer ? framebuffer->height : 4096; 11147ec681f3Smrg 11157ec681f3Smrg return VK_SUCCESS; 11167ec681f3Smrg} 11177ec681f3Smrg 11187ec681f3SmrgVKAPI_ATTR VkResult VKAPI_CALL 11197ec681f3Smrgv3dv_BeginCommandBuffer(VkCommandBuffer commandBuffer, 11207ec681f3Smrg const VkCommandBufferBeginInfo *pBeginInfo) 11217ec681f3Smrg{ 11227ec681f3Smrg V3DV_FROM_HANDLE(v3dv_cmd_buffer, cmd_buffer, commandBuffer); 11237ec681f3Smrg 11247ec681f3Smrg /* If this is the first vkBeginCommandBuffer, we must initialize the 11257ec681f3Smrg * command buffer's state. Otherwise, we must reset its state. In both 11267ec681f3Smrg * cases we reset it. 11277ec681f3Smrg */ 11287ec681f3Smrg VkResult result = cmd_buffer_reset(cmd_buffer, 0); 11297ec681f3Smrg if (result != VK_SUCCESS) 11307ec681f3Smrg return result; 11317ec681f3Smrg 11327ec681f3Smrg assert(cmd_buffer->status == V3DV_CMD_BUFFER_STATUS_INITIALIZED); 11337ec681f3Smrg 11347ec681f3Smrg cmd_buffer->usage_flags = pBeginInfo->flags; 11357ec681f3Smrg 11367ec681f3Smrg if (cmd_buffer->level == VK_COMMAND_BUFFER_LEVEL_SECONDARY) { 11377ec681f3Smrg if (pBeginInfo->flags & VK_COMMAND_BUFFER_USAGE_RENDER_PASS_CONTINUE_BIT) { 11387ec681f3Smrg result = 11397ec681f3Smrg cmd_buffer_begin_render_pass_secondary(cmd_buffer, 11407ec681f3Smrg pBeginInfo->pInheritanceInfo); 11417ec681f3Smrg if (result != VK_SUCCESS) 11427ec681f3Smrg return result; 11437ec681f3Smrg } 11447ec681f3Smrg } 11457ec681f3Smrg 11467ec681f3Smrg cmd_buffer->status = V3DV_CMD_BUFFER_STATUS_RECORDING; 11477ec681f3Smrg 11487ec681f3Smrg return VK_SUCCESS; 11497ec681f3Smrg} 11507ec681f3Smrg 11517ec681f3SmrgVKAPI_ATTR VkResult VKAPI_CALL 11527ec681f3Smrgv3dv_ResetCommandBuffer(VkCommandBuffer commandBuffer, 11537ec681f3Smrg VkCommandBufferResetFlags flags) 11547ec681f3Smrg{ 11557ec681f3Smrg V3DV_FROM_HANDLE(v3dv_cmd_buffer, cmd_buffer, commandBuffer); 11567ec681f3Smrg return cmd_buffer_reset(cmd_buffer, flags); 11577ec681f3Smrg} 11587ec681f3Smrg 11597ec681f3SmrgVKAPI_ATTR VkResult VKAPI_CALL 11607ec681f3Smrgv3dv_ResetCommandPool(VkDevice device, 11617ec681f3Smrg VkCommandPool commandPool, 11627ec681f3Smrg VkCommandPoolResetFlags flags) 11637ec681f3Smrg{ 11647ec681f3Smrg V3DV_FROM_HANDLE(v3dv_cmd_pool, pool, commandPool); 11657ec681f3Smrg 11667ec681f3Smrg VkCommandBufferResetFlags reset_flags = 0; 11677ec681f3Smrg if (flags & VK_COMMAND_POOL_RESET_RELEASE_RESOURCES_BIT) 11687ec681f3Smrg reset_flags = VK_COMMAND_BUFFER_RESET_RELEASE_RESOURCES_BIT; 11697ec681f3Smrg list_for_each_entry_safe(struct v3dv_cmd_buffer, cmd_buffer, 11707ec681f3Smrg &pool->cmd_buffers, pool_link) { 11717ec681f3Smrg cmd_buffer_reset(cmd_buffer, reset_flags); 11727ec681f3Smrg } 11737ec681f3Smrg 11747ec681f3Smrg return VK_SUCCESS; 11757ec681f3Smrg} 11767ec681f3Smrg 11777ec681f3Smrgstatic void 11787ec681f3Smrgcmd_buffer_update_tile_alignment(struct v3dv_cmd_buffer *cmd_buffer) 11797ec681f3Smrg{ 11807ec681f3Smrg /* Render areas and scissor/viewport are only relevant inside render passes, 11817ec681f3Smrg * otherwise we are dealing with transfer operations where these elements 11827ec681f3Smrg * don't apply. 11837ec681f3Smrg */ 11847ec681f3Smrg assert(cmd_buffer->state.pass); 11857ec681f3Smrg const VkRect2D *rect = &cmd_buffer->state.render_area; 11867ec681f3Smrg 11877ec681f3Smrg /* We should only call this at the beginning of a subpass so we should 11887ec681f3Smrg * always have framebuffer information available. 11897ec681f3Smrg */ 11907ec681f3Smrg assert(cmd_buffer->state.framebuffer); 11917ec681f3Smrg cmd_buffer->state.tile_aligned_render_area = 11927ec681f3Smrg v3dv_subpass_area_is_tile_aligned(cmd_buffer->device, rect, 11937ec681f3Smrg cmd_buffer->state.framebuffer, 11947ec681f3Smrg cmd_buffer->state.pass, 11957ec681f3Smrg cmd_buffer->state.subpass_idx); 11967ec681f3Smrg 11977ec681f3Smrg if (!cmd_buffer->state.tile_aligned_render_area) { 11987ec681f3Smrg perf_debug("Render area for subpass %d of render pass %p doesn't " 11997ec681f3Smrg "match render pass granularity.\n", 12007ec681f3Smrg cmd_buffer->state.subpass_idx, cmd_buffer->state.pass); 12017ec681f3Smrg } 12027ec681f3Smrg} 12037ec681f3Smrg 12047ec681f3Smrgstatic void 12057ec681f3Smrgcmd_buffer_state_set_attachment_clear_color(struct v3dv_cmd_buffer *cmd_buffer, 12067ec681f3Smrg uint32_t attachment_idx, 12077ec681f3Smrg const VkClearColorValue *color) 12087ec681f3Smrg{ 12097ec681f3Smrg assert(attachment_idx < cmd_buffer->state.pass->attachment_count); 12107ec681f3Smrg 12117ec681f3Smrg const struct v3dv_render_pass_attachment *attachment = 12127ec681f3Smrg &cmd_buffer->state.pass->attachments[attachment_idx]; 12137ec681f3Smrg 12147ec681f3Smrg uint32_t internal_type, internal_bpp; 12157ec681f3Smrg const struct v3dv_format *format = 12167ec681f3Smrg v3dv_X(cmd_buffer->device, get_format)(attachment->desc.format); 12177ec681f3Smrg 12187ec681f3Smrg v3dv_X(cmd_buffer->device, get_internal_type_bpp_for_output_format) 12197ec681f3Smrg (format->rt_type, &internal_type, &internal_bpp); 12207ec681f3Smrg 12217ec681f3Smrg uint32_t internal_size = 4 << internal_bpp; 12227ec681f3Smrg 12237ec681f3Smrg struct v3dv_cmd_buffer_attachment_state *attachment_state = 12247ec681f3Smrg &cmd_buffer->state.attachments[attachment_idx]; 12257ec681f3Smrg 12267ec681f3Smrg v3dv_X(cmd_buffer->device, get_hw_clear_color) 12277ec681f3Smrg (color, internal_type, internal_size, &attachment_state->clear_value.color[0]); 12287ec681f3Smrg 12297ec681f3Smrg attachment_state->vk_clear_value.color = *color; 12307ec681f3Smrg} 12317ec681f3Smrg 12327ec681f3Smrgstatic void 12337ec681f3Smrgcmd_buffer_state_set_attachment_clear_depth_stencil( 12347ec681f3Smrg struct v3dv_cmd_buffer *cmd_buffer, 12357ec681f3Smrg uint32_t attachment_idx, 12367ec681f3Smrg bool clear_depth, bool clear_stencil, 12377ec681f3Smrg const VkClearDepthStencilValue *ds) 12387ec681f3Smrg{ 12397ec681f3Smrg struct v3dv_cmd_buffer_attachment_state *attachment_state = 12407ec681f3Smrg &cmd_buffer->state.attachments[attachment_idx]; 12417ec681f3Smrg 12427ec681f3Smrg if (clear_depth) 12437ec681f3Smrg attachment_state->clear_value.z = ds->depth; 12447ec681f3Smrg 12457ec681f3Smrg if (clear_stencil) 12467ec681f3Smrg attachment_state->clear_value.s = ds->stencil; 12477ec681f3Smrg 12487ec681f3Smrg attachment_state->vk_clear_value.depthStencil = *ds; 12497ec681f3Smrg} 12507ec681f3Smrg 12517ec681f3Smrgstatic void 12527ec681f3Smrgcmd_buffer_state_set_clear_values(struct v3dv_cmd_buffer *cmd_buffer, 12537ec681f3Smrg uint32_t count, const VkClearValue *values) 12547ec681f3Smrg{ 12557ec681f3Smrg struct v3dv_cmd_buffer_state *state = &cmd_buffer->state; 12567ec681f3Smrg const struct v3dv_render_pass *pass = state->pass; 12577ec681f3Smrg 12587ec681f3Smrg /* There could be less clear values than attachments in the render pass, in 12597ec681f3Smrg * which case we only want to process as many as we have, or there could be 12607ec681f3Smrg * more, in which case we want to ignore those for which we don't have a 12617ec681f3Smrg * corresponding attachment. 12627ec681f3Smrg */ 12637ec681f3Smrg count = MIN2(count, pass->attachment_count); 12647ec681f3Smrg for (uint32_t i = 0; i < count; i++) { 12657ec681f3Smrg const struct v3dv_render_pass_attachment *attachment = 12667ec681f3Smrg &pass->attachments[i]; 12677ec681f3Smrg 12687ec681f3Smrg if (attachment->desc.loadOp != VK_ATTACHMENT_LOAD_OP_CLEAR) 12697ec681f3Smrg continue; 12707ec681f3Smrg 12717ec681f3Smrg VkImageAspectFlags aspects = vk_format_aspects(attachment->desc.format); 12727ec681f3Smrg if (aspects & VK_IMAGE_ASPECT_COLOR_BIT) { 12737ec681f3Smrg cmd_buffer_state_set_attachment_clear_color(cmd_buffer, i, 12747ec681f3Smrg &values[i].color); 12757ec681f3Smrg } else if (aspects & (VK_IMAGE_ASPECT_DEPTH_BIT | 12767ec681f3Smrg VK_IMAGE_ASPECT_STENCIL_BIT)) { 12777ec681f3Smrg cmd_buffer_state_set_attachment_clear_depth_stencil( 12787ec681f3Smrg cmd_buffer, i, 12797ec681f3Smrg aspects & VK_IMAGE_ASPECT_DEPTH_BIT, 12807ec681f3Smrg aspects & VK_IMAGE_ASPECT_STENCIL_BIT, 12817ec681f3Smrg &values[i].depthStencil); 12827ec681f3Smrg } 12837ec681f3Smrg } 12847ec681f3Smrg} 12857ec681f3Smrg 12867ec681f3Smrgstatic void 12877ec681f3Smrgcmd_buffer_init_render_pass_attachment_state(struct v3dv_cmd_buffer *cmd_buffer, 12887ec681f3Smrg const VkRenderPassBeginInfo *pRenderPassBegin) 12897ec681f3Smrg{ 12907ec681f3Smrg cmd_buffer_state_set_clear_values(cmd_buffer, 12917ec681f3Smrg pRenderPassBegin->clearValueCount, 12927ec681f3Smrg pRenderPassBegin->pClearValues); 12937ec681f3Smrg} 12947ec681f3Smrg 12957ec681f3Smrgstatic void 12967ec681f3Smrgcmd_buffer_ensure_render_pass_attachment_state(struct v3dv_cmd_buffer *cmd_buffer) 12977ec681f3Smrg{ 12987ec681f3Smrg struct v3dv_cmd_buffer_state *state = &cmd_buffer->state; 12997ec681f3Smrg const struct v3dv_render_pass *pass = state->pass; 13007ec681f3Smrg 13017ec681f3Smrg if (state->attachment_alloc_count < pass->attachment_count) { 13027ec681f3Smrg if (state->attachments > 0) { 13037ec681f3Smrg assert(state->attachment_alloc_count > 0); 13047ec681f3Smrg vk_free(&cmd_buffer->device->vk.alloc, state->attachments); 13057ec681f3Smrg } 13067ec681f3Smrg 13077ec681f3Smrg uint32_t size = sizeof(struct v3dv_cmd_buffer_attachment_state) * 13087ec681f3Smrg pass->attachment_count; 13097ec681f3Smrg state->attachments = vk_zalloc(&cmd_buffer->device->vk.alloc, size, 8, 13107ec681f3Smrg VK_SYSTEM_ALLOCATION_SCOPE_COMMAND); 13117ec681f3Smrg if (!state->attachments) { 13127ec681f3Smrg v3dv_flag_oom(cmd_buffer, NULL); 13137ec681f3Smrg return; 13147ec681f3Smrg } 13157ec681f3Smrg state->attachment_alloc_count = pass->attachment_count; 13167ec681f3Smrg } 13177ec681f3Smrg 13187ec681f3Smrg assert(state->attachment_alloc_count >= pass->attachment_count); 13197ec681f3Smrg} 13207ec681f3Smrg 13217ec681f3SmrgVKAPI_ATTR void VKAPI_CALL 13227ec681f3Smrgv3dv_CmdBeginRenderPass(VkCommandBuffer commandBuffer, 13237ec681f3Smrg const VkRenderPassBeginInfo *pRenderPassBegin, 13247ec681f3Smrg VkSubpassContents contents) 13257ec681f3Smrg{ 13267ec681f3Smrg V3DV_FROM_HANDLE(v3dv_cmd_buffer, cmd_buffer, commandBuffer); 13277ec681f3Smrg V3DV_FROM_HANDLE(v3dv_render_pass, pass, pRenderPassBegin->renderPass); 13287ec681f3Smrg V3DV_FROM_HANDLE(v3dv_framebuffer, framebuffer, pRenderPassBegin->framebuffer); 13297ec681f3Smrg 13307ec681f3Smrg struct v3dv_cmd_buffer_state *state = &cmd_buffer->state; 13317ec681f3Smrg state->pass = pass; 13327ec681f3Smrg state->framebuffer = framebuffer; 13337ec681f3Smrg 13347ec681f3Smrg cmd_buffer_ensure_render_pass_attachment_state(cmd_buffer); 13357ec681f3Smrg v3dv_return_if_oom(cmd_buffer, NULL); 13367ec681f3Smrg 13377ec681f3Smrg cmd_buffer_init_render_pass_attachment_state(cmd_buffer, pRenderPassBegin); 13387ec681f3Smrg 13397ec681f3Smrg state->render_area = pRenderPassBegin->renderArea; 13407ec681f3Smrg 13417ec681f3Smrg /* If our render area is smaller than the current clip window we will have 13427ec681f3Smrg * to emit a new clip window to constraint it to the render area. 13437ec681f3Smrg */ 13447ec681f3Smrg uint32_t min_render_x = state->render_area.offset.x; 13457ec681f3Smrg uint32_t min_render_y = state->render_area.offset.y; 13467ec681f3Smrg uint32_t max_render_x = min_render_x + state->render_area.extent.width - 1; 13477ec681f3Smrg uint32_t max_render_y = min_render_y + state->render_area.extent.height - 1; 13487ec681f3Smrg uint32_t min_clip_x = state->clip_window.offset.x; 13497ec681f3Smrg uint32_t min_clip_y = state->clip_window.offset.y; 13507ec681f3Smrg uint32_t max_clip_x = min_clip_x + state->clip_window.extent.width - 1; 13517ec681f3Smrg uint32_t max_clip_y = min_clip_y + state->clip_window.extent.height - 1; 13527ec681f3Smrg if (min_render_x > min_clip_x || min_render_y > min_clip_y || 13537ec681f3Smrg max_render_x < max_clip_x || max_render_y < max_clip_y) { 13547ec681f3Smrg state->dirty |= V3DV_CMD_DIRTY_SCISSOR; 13557ec681f3Smrg } 13567ec681f3Smrg 13577ec681f3Smrg /* Setup for first subpass */ 13587ec681f3Smrg v3dv_cmd_buffer_subpass_start(cmd_buffer, 0); 13597ec681f3Smrg} 13607ec681f3Smrg 13617ec681f3SmrgVKAPI_ATTR void VKAPI_CALL 13627ec681f3Smrgv3dv_CmdNextSubpass(VkCommandBuffer commandBuffer, VkSubpassContents contents) 13637ec681f3Smrg{ 13647ec681f3Smrg V3DV_FROM_HANDLE(v3dv_cmd_buffer, cmd_buffer, commandBuffer); 13657ec681f3Smrg 13667ec681f3Smrg struct v3dv_cmd_buffer_state *state = &cmd_buffer->state; 13677ec681f3Smrg assert(state->subpass_idx < state->pass->subpass_count - 1); 13687ec681f3Smrg 13697ec681f3Smrg /* Finish the previous subpass */ 13707ec681f3Smrg v3dv_cmd_buffer_subpass_finish(cmd_buffer); 13717ec681f3Smrg cmd_buffer_subpass_handle_pending_resolves(cmd_buffer); 13727ec681f3Smrg 13737ec681f3Smrg /* Start the next subpass */ 13747ec681f3Smrg v3dv_cmd_buffer_subpass_start(cmd_buffer, state->subpass_idx + 1); 13757ec681f3Smrg} 13767ec681f3Smrg 13777ec681f3Smrgstatic void 13787ec681f3Smrgcmd_buffer_emit_subpass_clears(struct v3dv_cmd_buffer *cmd_buffer) 13797ec681f3Smrg{ 13807ec681f3Smrg assert(cmd_buffer->level == VK_COMMAND_BUFFER_LEVEL_PRIMARY); 13817ec681f3Smrg 13827ec681f3Smrg assert(cmd_buffer->state.pass); 13837ec681f3Smrg assert(cmd_buffer->state.subpass_idx < cmd_buffer->state.pass->subpass_count); 13847ec681f3Smrg const struct v3dv_cmd_buffer_state *state = &cmd_buffer->state; 13857ec681f3Smrg const struct v3dv_render_pass *pass = state->pass; 13867ec681f3Smrg const struct v3dv_subpass *subpass = &pass->subpasses[state->subpass_idx]; 13877ec681f3Smrg 13887ec681f3Smrg /* We only need to emit subpass clears as draw calls when the render 13897ec681f3Smrg * area is not aligned to tile boundaries or for GFXH-1461. 13907ec681f3Smrg */ 13917ec681f3Smrg if (cmd_buffer->state.tile_aligned_render_area && 13927ec681f3Smrg !subpass->do_depth_clear_with_draw && 13937ec681f3Smrg !subpass->do_depth_clear_with_draw) { 13947ec681f3Smrg return; 13957ec681f3Smrg } 13967ec681f3Smrg 13977ec681f3Smrg uint32_t att_count = 0; 13987ec681f3Smrg VkClearAttachment atts[V3D_MAX_DRAW_BUFFERS + 1]; /* 4 color + D/S */ 13997ec681f3Smrg 14007ec681f3Smrg /* We only need to emit subpass clears as draw calls for color attachments 14017ec681f3Smrg * if the render area is not aligned to tile boundaries. 14027ec681f3Smrg */ 14037ec681f3Smrg if (!cmd_buffer->state.tile_aligned_render_area) { 14047ec681f3Smrg for (uint32_t i = 0; i < subpass->color_count; i++) { 14057ec681f3Smrg const uint32_t att_idx = subpass->color_attachments[i].attachment; 14067ec681f3Smrg if (att_idx == VK_ATTACHMENT_UNUSED) 14077ec681f3Smrg continue; 14087ec681f3Smrg 14097ec681f3Smrg struct v3dv_render_pass_attachment *att = &pass->attachments[att_idx]; 14107ec681f3Smrg if (att->desc.loadOp != VK_ATTACHMENT_LOAD_OP_CLEAR) 14117ec681f3Smrg continue; 14127ec681f3Smrg 14137ec681f3Smrg if (state->subpass_idx != att->first_subpass) 14147ec681f3Smrg continue; 14157ec681f3Smrg 14167ec681f3Smrg atts[att_count].aspectMask = VK_IMAGE_ASPECT_COLOR_BIT; 14177ec681f3Smrg atts[att_count].colorAttachment = i; 14187ec681f3Smrg atts[att_count].clearValue = state->attachments[att_idx].vk_clear_value; 14197ec681f3Smrg att_count++; 14207ec681f3Smrg } 14217ec681f3Smrg } 14227ec681f3Smrg 14237ec681f3Smrg /* For D/S we may also need to emit a subpass clear for GFXH-1461 */ 14247ec681f3Smrg const uint32_t ds_att_idx = subpass->ds_attachment.attachment; 14257ec681f3Smrg if (ds_att_idx != VK_ATTACHMENT_UNUSED) { 14267ec681f3Smrg struct v3dv_render_pass_attachment *att = &pass->attachments[ds_att_idx]; 14277ec681f3Smrg if (state->subpass_idx == att->first_subpass) { 14287ec681f3Smrg VkImageAspectFlags aspects = vk_format_aspects(att->desc.format); 14297ec681f3Smrg if (att->desc.loadOp != VK_ATTACHMENT_LOAD_OP_CLEAR || 14307ec681f3Smrg (cmd_buffer->state.tile_aligned_render_area && 14317ec681f3Smrg !subpass->do_depth_clear_with_draw)) { 14327ec681f3Smrg aspects &= ~VK_IMAGE_ASPECT_DEPTH_BIT; 14337ec681f3Smrg } 14347ec681f3Smrg if (att->desc.stencilLoadOp != VK_ATTACHMENT_LOAD_OP_CLEAR || 14357ec681f3Smrg (cmd_buffer->state.tile_aligned_render_area && 14367ec681f3Smrg !subpass->do_stencil_clear_with_draw)) { 14377ec681f3Smrg aspects &= ~VK_IMAGE_ASPECT_STENCIL_BIT; 14387ec681f3Smrg } 14397ec681f3Smrg if (aspects) { 14407ec681f3Smrg atts[att_count].aspectMask = aspects; 14417ec681f3Smrg atts[att_count].colorAttachment = 0; /* Ignored */ 14427ec681f3Smrg atts[att_count].clearValue = 14437ec681f3Smrg state->attachments[ds_att_idx].vk_clear_value; 14447ec681f3Smrg att_count++; 14457ec681f3Smrg } 14467ec681f3Smrg } 14477ec681f3Smrg } 14487ec681f3Smrg 14497ec681f3Smrg if (att_count == 0) 14507ec681f3Smrg return; 14517ec681f3Smrg 14527ec681f3Smrg if (!cmd_buffer->state.tile_aligned_render_area) { 14537ec681f3Smrg perf_debug("Render area doesn't match render pass granularity, falling " 14547ec681f3Smrg "back to vkCmdClearAttachments for " 14557ec681f3Smrg "VK_ATTACHMENT_LOAD_OP_CLEAR.\n"); 14567ec681f3Smrg } else if (subpass->do_depth_clear_with_draw || 14577ec681f3Smrg subpass->do_stencil_clear_with_draw) { 14587ec681f3Smrg perf_debug("Subpass clears DEPTH but loads STENCIL (or viceversa), " 14597ec681f3Smrg "falling back to vkCmdClearAttachments for " 14607ec681f3Smrg "VK_ATTACHMENT_LOAD_OP_CLEAR.\n"); 14617ec681f3Smrg } 14627ec681f3Smrg 14637ec681f3Smrg /* From the Vulkan 1.0 spec: 14647ec681f3Smrg * 14657ec681f3Smrg * "VK_ATTACHMENT_LOAD_OP_CLEAR specifies that the contents within the 14667ec681f3Smrg * render area will be cleared to a uniform value, which is specified 14677ec681f3Smrg * when a render pass instance is begun." 14687ec681f3Smrg * 14697ec681f3Smrg * So the clear is only constrained by the render area and not by pipeline 14707ec681f3Smrg * state such as scissor or viewport, these are the semantics of 14717ec681f3Smrg * vkCmdClearAttachments as well. 14727ec681f3Smrg */ 14737ec681f3Smrg VkCommandBuffer _cmd_buffer = v3dv_cmd_buffer_to_handle(cmd_buffer); 14747ec681f3Smrg VkClearRect rect = { 14757ec681f3Smrg .rect = state->render_area, 14767ec681f3Smrg .baseArrayLayer = 0, 14777ec681f3Smrg .layerCount = 1, 14787ec681f3Smrg }; 14797ec681f3Smrg v3dv_CmdClearAttachments(_cmd_buffer, att_count, atts, 1, &rect); 14807ec681f3Smrg} 14817ec681f3Smrg 14827ec681f3Smrgstatic struct v3dv_job * 14837ec681f3Smrgcmd_buffer_subpass_create_job(struct v3dv_cmd_buffer *cmd_buffer, 14847ec681f3Smrg uint32_t subpass_idx, 14857ec681f3Smrg enum v3dv_job_type type) 14867ec681f3Smrg{ 14877ec681f3Smrg assert(type == V3DV_JOB_TYPE_GPU_CL || 14887ec681f3Smrg type == V3DV_JOB_TYPE_GPU_CL_SECONDARY); 14897ec681f3Smrg 14907ec681f3Smrg struct v3dv_cmd_buffer_state *state = &cmd_buffer->state; 14917ec681f3Smrg assert(subpass_idx < state->pass->subpass_count); 14927ec681f3Smrg 14937ec681f3Smrg /* Starting a new job can trigger a finish of the current one, so don't 14947ec681f3Smrg * change the command buffer state for the new job until we are done creating 14957ec681f3Smrg * the new job. 14967ec681f3Smrg */ 14977ec681f3Smrg struct v3dv_job *job = 14987ec681f3Smrg v3dv_cmd_buffer_start_job(cmd_buffer, subpass_idx, type); 14997ec681f3Smrg if (!job) 15007ec681f3Smrg return NULL; 15017ec681f3Smrg 15027ec681f3Smrg state->subpass_idx = subpass_idx; 15037ec681f3Smrg 15047ec681f3Smrg /* If we are starting a new job we need to setup binning. We only do this 15057ec681f3Smrg * for V3DV_JOB_TYPE_GPU_CL jobs because V3DV_JOB_TYPE_GPU_CL_SECONDARY 15067ec681f3Smrg * jobs are not submitted to the GPU directly, and are instead meant to be 15077ec681f3Smrg * branched to from other V3DV_JOB_TYPE_GPU_CL jobs. 15087ec681f3Smrg */ 15097ec681f3Smrg if (type == V3DV_JOB_TYPE_GPU_CL && 15107ec681f3Smrg job->first_subpass == state->subpass_idx) { 15117ec681f3Smrg const struct v3dv_subpass *subpass = 15127ec681f3Smrg &state->pass->subpasses[state->subpass_idx]; 15137ec681f3Smrg 15147ec681f3Smrg const struct v3dv_framebuffer *framebuffer = state->framebuffer; 15157ec681f3Smrg 15167ec681f3Smrg uint8_t internal_bpp; 15177ec681f3Smrg bool msaa; 15187ec681f3Smrg v3dv_X(job->device, framebuffer_compute_internal_bpp_msaa) 15197ec681f3Smrg (framebuffer, subpass, &internal_bpp, &msaa); 15207ec681f3Smrg 15217ec681f3Smrg /* From the Vulkan spec: 15227ec681f3Smrg * 15237ec681f3Smrg * "If the render pass uses multiview, then layers must be one and 15247ec681f3Smrg * each attachment requires a number of layers that is greater than 15257ec681f3Smrg * the maximum bit index set in the view mask in the subpasses in 15267ec681f3Smrg * which it is used." 15277ec681f3Smrg * 15287ec681f3Smrg * So when multiview is enabled, we take the number of layers from the 15297ec681f3Smrg * last bit set in the view mask. 15307ec681f3Smrg */ 15317ec681f3Smrg uint32_t layers = framebuffer->layers; 15327ec681f3Smrg if (subpass->view_mask != 0) { 15337ec681f3Smrg assert(framebuffer->layers == 1); 15347ec681f3Smrg layers = util_last_bit(subpass->view_mask); 15357ec681f3Smrg } 15367ec681f3Smrg 15377ec681f3Smrg v3dv_job_start_frame(job, 15387ec681f3Smrg framebuffer->width, 15397ec681f3Smrg framebuffer->height, 15407ec681f3Smrg layers, 15417ec681f3Smrg true, 15427ec681f3Smrg subpass->color_count, 15437ec681f3Smrg internal_bpp, 15447ec681f3Smrg msaa); 15457ec681f3Smrg } 15467ec681f3Smrg 15477ec681f3Smrg return job; 15487ec681f3Smrg} 15497ec681f3Smrg 15507ec681f3Smrgstruct v3dv_job * 15517ec681f3Smrgv3dv_cmd_buffer_subpass_start(struct v3dv_cmd_buffer *cmd_buffer, 15527ec681f3Smrg uint32_t subpass_idx) 15537ec681f3Smrg{ 15547ec681f3Smrg assert(cmd_buffer->state.pass); 15557ec681f3Smrg assert(subpass_idx < cmd_buffer->state.pass->subpass_count); 15567ec681f3Smrg 15577ec681f3Smrg struct v3dv_job *job = 15587ec681f3Smrg cmd_buffer_subpass_create_job(cmd_buffer, subpass_idx, 15597ec681f3Smrg V3DV_JOB_TYPE_GPU_CL); 15607ec681f3Smrg if (!job) 15617ec681f3Smrg return NULL; 15627ec681f3Smrg 15637ec681f3Smrg /* Check if our render area is aligned to tile boundaries. We have to do 15647ec681f3Smrg * this in each subpass because the subset of attachments used can change 15657ec681f3Smrg * and with that the tile size selected by the hardware can change too. 15667ec681f3Smrg */ 15677ec681f3Smrg cmd_buffer_update_tile_alignment(cmd_buffer); 15687ec681f3Smrg 15697ec681f3Smrg /* If we can't use TLB clears then we need to emit draw clears for any 15707ec681f3Smrg * LOAD_OP_CLEAR attachments in this subpass now. We might also need to emit 15717ec681f3Smrg * Depth/Stencil clears if we hit GFXH-1461. 15727ec681f3Smrg * 15737ec681f3Smrg * Secondary command buffers don't start subpasses (and may not even have 15747ec681f3Smrg * framebuffer state), so we only care about this in primaries. The only 15757ec681f3Smrg * exception could be a secondary runnning inside a subpass that needs to 15767ec681f3Smrg * record a meta operation (with its own render pass) that relies on 15777ec681f3Smrg * attachment load clears, but we don't have any instances of that right 15787ec681f3Smrg * now. 15797ec681f3Smrg */ 15807ec681f3Smrg if (cmd_buffer->level == VK_COMMAND_BUFFER_LEVEL_PRIMARY) 15817ec681f3Smrg cmd_buffer_emit_subpass_clears(cmd_buffer); 15827ec681f3Smrg 15837ec681f3Smrg return job; 15847ec681f3Smrg} 15857ec681f3Smrg 15867ec681f3Smrgstruct v3dv_job * 15877ec681f3Smrgv3dv_cmd_buffer_subpass_resume(struct v3dv_cmd_buffer *cmd_buffer, 15887ec681f3Smrg uint32_t subpass_idx) 15897ec681f3Smrg{ 15907ec681f3Smrg assert(cmd_buffer->state.pass); 15917ec681f3Smrg assert(subpass_idx < cmd_buffer->state.pass->subpass_count); 15927ec681f3Smrg 15937ec681f3Smrg struct v3dv_job *job; 15947ec681f3Smrg if (cmd_buffer->level == VK_COMMAND_BUFFER_LEVEL_PRIMARY) { 15957ec681f3Smrg job = cmd_buffer_subpass_create_job(cmd_buffer, subpass_idx, 15967ec681f3Smrg V3DV_JOB_TYPE_GPU_CL); 15977ec681f3Smrg } else { 15987ec681f3Smrg assert(cmd_buffer->level == VK_COMMAND_BUFFER_LEVEL_SECONDARY); 15997ec681f3Smrg job = cmd_buffer_subpass_create_job(cmd_buffer, subpass_idx, 16007ec681f3Smrg V3DV_JOB_TYPE_GPU_CL_SECONDARY); 16017ec681f3Smrg } 16027ec681f3Smrg 16037ec681f3Smrg if (!job) 16047ec681f3Smrg return NULL; 16057ec681f3Smrg 16067ec681f3Smrg job->is_subpass_continue = true; 16077ec681f3Smrg 16087ec681f3Smrg return job; 16097ec681f3Smrg} 16107ec681f3Smrg 16117ec681f3Smrgvoid 16127ec681f3Smrgv3dv_cmd_buffer_subpass_finish(struct v3dv_cmd_buffer *cmd_buffer) 16137ec681f3Smrg{ 16147ec681f3Smrg /* We can end up here without a job if the last command recorded into the 16157ec681f3Smrg * subpass already finished the job (for example a pipeline barrier). In 16167ec681f3Smrg * that case we miss to set the is_subpass_finish flag, but that is not 16177ec681f3Smrg * required for proper behavior. 16187ec681f3Smrg */ 16197ec681f3Smrg struct v3dv_job *job = cmd_buffer->state.job; 16207ec681f3Smrg if (job) 16217ec681f3Smrg job->is_subpass_finish = true; 16227ec681f3Smrg} 16237ec681f3Smrg 16247ec681f3SmrgVKAPI_ATTR void VKAPI_CALL 16257ec681f3Smrgv3dv_CmdEndRenderPass(VkCommandBuffer commandBuffer) 16267ec681f3Smrg{ 16277ec681f3Smrg V3DV_FROM_HANDLE(v3dv_cmd_buffer, cmd_buffer, commandBuffer); 16287ec681f3Smrg 16297ec681f3Smrg /* Finalize last subpass */ 16307ec681f3Smrg struct v3dv_cmd_buffer_state *state = &cmd_buffer->state; 16317ec681f3Smrg assert(state->subpass_idx == state->pass->subpass_count - 1); 16327ec681f3Smrg v3dv_cmd_buffer_subpass_finish(cmd_buffer); 16337ec681f3Smrg v3dv_cmd_buffer_finish_job(cmd_buffer); 16347ec681f3Smrg 16357ec681f3Smrg cmd_buffer_subpass_handle_pending_resolves(cmd_buffer); 16367ec681f3Smrg 16377ec681f3Smrg /* We are no longer inside a render pass */ 16387ec681f3Smrg state->framebuffer = NULL; 16397ec681f3Smrg state->pass = NULL; 16407ec681f3Smrg state->subpass_idx = -1; 16417ec681f3Smrg} 16427ec681f3Smrg 16437ec681f3SmrgVKAPI_ATTR VkResult VKAPI_CALL 16447ec681f3Smrgv3dv_EndCommandBuffer(VkCommandBuffer commandBuffer) 16457ec681f3Smrg{ 16467ec681f3Smrg V3DV_FROM_HANDLE(v3dv_cmd_buffer, cmd_buffer, commandBuffer); 16477ec681f3Smrg 16487ec681f3Smrg if (cmd_buffer->state.oom) 16497ec681f3Smrg return VK_ERROR_OUT_OF_HOST_MEMORY; 16507ec681f3Smrg 16517ec681f3Smrg /* Primaries should have ended any recording jobs by the time they hit 16527ec681f3Smrg * vkEndRenderPass (if we are inside a render pass). Commands outside 16537ec681f3Smrg * a render pass instance (for both primaries and secondaries) spawn 16547ec681f3Smrg * complete jobs too. So the only case where we can get here without 16557ec681f3Smrg * finishing a recording job is when we are recording a secondary 16567ec681f3Smrg * inside a render pass. 16577ec681f3Smrg */ 16587ec681f3Smrg if (cmd_buffer->state.job) { 16597ec681f3Smrg assert(cmd_buffer->level == VK_COMMAND_BUFFER_LEVEL_SECONDARY && 16607ec681f3Smrg cmd_buffer->state.pass); 16617ec681f3Smrg v3dv_cmd_buffer_finish_job(cmd_buffer); 16627ec681f3Smrg } 16637ec681f3Smrg 16647ec681f3Smrg cmd_buffer->status = V3DV_CMD_BUFFER_STATUS_EXECUTABLE; 16657ec681f3Smrg 16667ec681f3Smrg return VK_SUCCESS; 16677ec681f3Smrg} 16687ec681f3Smrg 16697ec681f3Smrgstatic void 16707ec681f3Smrgclone_bo_list(struct v3dv_cmd_buffer *cmd_buffer, 16717ec681f3Smrg struct list_head *dst, 16727ec681f3Smrg struct list_head *src) 16737ec681f3Smrg{ 16747ec681f3Smrg assert(cmd_buffer); 16757ec681f3Smrg 16767ec681f3Smrg list_inithead(dst); 16777ec681f3Smrg list_for_each_entry(struct v3dv_bo, bo, src, list_link) { 16787ec681f3Smrg struct v3dv_bo *clone_bo = 16797ec681f3Smrg vk_alloc(&cmd_buffer->device->vk.alloc, sizeof(struct v3dv_bo), 8, 16807ec681f3Smrg VK_SYSTEM_ALLOCATION_SCOPE_COMMAND); 16817ec681f3Smrg if (!clone_bo) { 16827ec681f3Smrg v3dv_flag_oom(cmd_buffer, NULL); 16837ec681f3Smrg return; 16847ec681f3Smrg } 16857ec681f3Smrg 16867ec681f3Smrg *clone_bo = *bo; 16877ec681f3Smrg list_addtail(&clone_bo->list_link, dst); 16887ec681f3Smrg } 16897ec681f3Smrg} 16907ec681f3Smrg 16917ec681f3Smrg/* Clones a job for inclusion in the given command buffer. Note that this 16927ec681f3Smrg * doesn't make a deep copy so the cloned job it doesn't own any resources. 16937ec681f3Smrg * Useful when we need to have a job in more than one list, which happens 16947ec681f3Smrg * for jobs recorded in secondary command buffers when we want to execute 16957ec681f3Smrg * them in primaries. 16967ec681f3Smrg */ 16977ec681f3Smrgstruct v3dv_job * 16987ec681f3Smrgv3dv_job_clone_in_cmd_buffer(struct v3dv_job *job, 16997ec681f3Smrg struct v3dv_cmd_buffer *cmd_buffer) 17007ec681f3Smrg{ 17017ec681f3Smrg struct v3dv_job *clone_job = vk_alloc(&job->device->vk.alloc, 17027ec681f3Smrg sizeof(struct v3dv_job), 8, 17037ec681f3Smrg VK_SYSTEM_ALLOCATION_SCOPE_COMMAND); 17047ec681f3Smrg if (!clone_job) { 17057ec681f3Smrg v3dv_flag_oom(cmd_buffer, NULL); 17067ec681f3Smrg return NULL; 17077ec681f3Smrg } 17087ec681f3Smrg 17097ec681f3Smrg /* Cloned jobs don't duplicate resources! */ 17107ec681f3Smrg *clone_job = *job; 17117ec681f3Smrg clone_job->is_clone = true; 17127ec681f3Smrg clone_job->cmd_buffer = cmd_buffer; 17137ec681f3Smrg list_addtail(&clone_job->list_link, &cmd_buffer->jobs); 17147ec681f3Smrg 17157ec681f3Smrg /* We need to regen the BO lists so that they point to the BO list in the 17167ec681f3Smrg * cloned job. Otherwise functions like list_length() will loop forever. 17177ec681f3Smrg */ 17187ec681f3Smrg if (job->type == V3DV_JOB_TYPE_GPU_CL) { 17197ec681f3Smrg clone_bo_list(cmd_buffer, &clone_job->bcl.bo_list, &job->bcl.bo_list); 17207ec681f3Smrg clone_bo_list(cmd_buffer, &clone_job->rcl.bo_list, &job->rcl.bo_list); 17217ec681f3Smrg clone_bo_list(cmd_buffer, &clone_job->indirect.bo_list, 17227ec681f3Smrg &job->indirect.bo_list); 17237ec681f3Smrg } 17247ec681f3Smrg 17257ec681f3Smrg return clone_job; 17267ec681f3Smrg} 17277ec681f3Smrg 17287ec681f3Smrgstatic void 17297ec681f3Smrgcmd_buffer_execute_outside_pass(struct v3dv_cmd_buffer *primary, 17307ec681f3Smrg uint32_t cmd_buffer_count, 17317ec681f3Smrg const VkCommandBuffer *cmd_buffers) 17327ec681f3Smrg{ 17337ec681f3Smrg bool pending_barrier = false; 17347ec681f3Smrg bool pending_bcl_barrier = false; 17357ec681f3Smrg for (uint32_t i = 0; i < cmd_buffer_count; i++) { 17367ec681f3Smrg V3DV_FROM_HANDLE(v3dv_cmd_buffer, secondary, cmd_buffers[i]); 17377ec681f3Smrg 17387ec681f3Smrg assert(!(secondary->usage_flags & 17397ec681f3Smrg VK_COMMAND_BUFFER_USAGE_RENDER_PASS_CONTINUE_BIT)); 17407ec681f3Smrg 17417ec681f3Smrg /* Secondary command buffers that execute outside a render pass create 17427ec681f3Smrg * complete jobs with an RCL and tile setup, so we simply want to merge 17437ec681f3Smrg * their job list into the primary's. However, because they may be 17447ec681f3Smrg * executed into multiple primaries at the same time and we only have a 17457ec681f3Smrg * single list_link in each job, we can't just add then to the primary's 17467ec681f3Smrg * job list and we instead have to clone them first. 17477ec681f3Smrg * 17487ec681f3Smrg * Alternatively, we could create a "execute secondary" CPU job that 17497ec681f3Smrg * when executed in a queue, would submit all the jobs in the referenced 17507ec681f3Smrg * secondary command buffer. However, this would raise some challenges 17517ec681f3Smrg * to make it work with the implementation of wait threads in the queue 17527ec681f3Smrg * which we use for event waits, for example. 17537ec681f3Smrg */ 17547ec681f3Smrg list_for_each_entry(struct v3dv_job, secondary_job, 17557ec681f3Smrg &secondary->jobs, list_link) { 17567ec681f3Smrg /* These can only happen inside a render pass */ 17577ec681f3Smrg assert(secondary_job->type != V3DV_JOB_TYPE_GPU_CL_SECONDARY); 17587ec681f3Smrg struct v3dv_job *job = v3dv_job_clone_in_cmd_buffer(secondary_job, primary); 17597ec681f3Smrg if (!job) 17607ec681f3Smrg return; 17617ec681f3Smrg 17627ec681f3Smrg if (pending_barrier) { 17637ec681f3Smrg job->serialize = true; 17647ec681f3Smrg if (pending_bcl_barrier) 17657ec681f3Smrg job->needs_bcl_sync = true; 17667ec681f3Smrg pending_barrier = false; 17677ec681f3Smrg pending_bcl_barrier = false; 17687ec681f3Smrg } 17697ec681f3Smrg } 17707ec681f3Smrg 17717ec681f3Smrg /* If this secondary had any pending barrier state we will need that 17727ec681f3Smrg * barrier state consumed with whatever comes after it (first job in 17737ec681f3Smrg * the next secondary or the primary, if this was the last secondary). 17747ec681f3Smrg */ 17757ec681f3Smrg assert(secondary->state.has_barrier || !secondary->state.has_bcl_barrier); 17767ec681f3Smrg pending_barrier = secondary->state.has_barrier; 17777ec681f3Smrg pending_bcl_barrier = secondary->state.has_bcl_barrier; 17787ec681f3Smrg } 17797ec681f3Smrg 17807ec681f3Smrg if (pending_barrier) { 17817ec681f3Smrg primary->state.has_barrier = true; 17827ec681f3Smrg primary->state.has_bcl_barrier |= pending_bcl_barrier; 17837ec681f3Smrg } 17847ec681f3Smrg} 17857ec681f3Smrg 17867ec681f3SmrgVKAPI_ATTR void VKAPI_CALL 17877ec681f3Smrgv3dv_CmdExecuteCommands(VkCommandBuffer commandBuffer, 17887ec681f3Smrg uint32_t commandBufferCount, 17897ec681f3Smrg const VkCommandBuffer *pCommandBuffers) 17907ec681f3Smrg{ 17917ec681f3Smrg V3DV_FROM_HANDLE(v3dv_cmd_buffer, primary, commandBuffer); 17927ec681f3Smrg 17937ec681f3Smrg if (primary->state.pass != NULL) { 17947ec681f3Smrg v3dv_X(primary->device, cmd_buffer_execute_inside_pass) 17957ec681f3Smrg (primary, commandBufferCount, pCommandBuffers); 17967ec681f3Smrg } else { 17977ec681f3Smrg cmd_buffer_execute_outside_pass(primary, 17987ec681f3Smrg commandBufferCount, pCommandBuffers); 17997ec681f3Smrg } 18007ec681f3Smrg} 18017ec681f3Smrg 18027ec681f3Smrg/* This goes though the list of possible dynamic states in the pipeline and, 18037ec681f3Smrg * for those that are not configured as dynamic, copies relevant state into 18047ec681f3Smrg * the command buffer. 18057ec681f3Smrg */ 18067ec681f3Smrgstatic void 18077ec681f3Smrgcmd_buffer_bind_pipeline_static_state(struct v3dv_cmd_buffer *cmd_buffer, 18087ec681f3Smrg const struct v3dv_dynamic_state *src) 18097ec681f3Smrg{ 18107ec681f3Smrg struct v3dv_dynamic_state *dest = &cmd_buffer->state.dynamic; 18117ec681f3Smrg uint32_t dynamic_mask = src->mask; 18127ec681f3Smrg uint32_t dirty = 0; 18137ec681f3Smrg 18147ec681f3Smrg if (!(dynamic_mask & V3DV_DYNAMIC_VIEWPORT)) { 18157ec681f3Smrg dest->viewport.count = src->viewport.count; 18167ec681f3Smrg if (memcmp(&dest->viewport.viewports, &src->viewport.viewports, 18177ec681f3Smrg src->viewport.count * sizeof(VkViewport))) { 18187ec681f3Smrg typed_memcpy(dest->viewport.viewports, 18197ec681f3Smrg src->viewport.viewports, 18207ec681f3Smrg src->viewport.count); 18217ec681f3Smrg typed_memcpy(dest->viewport.scale, src->viewport.scale, 18227ec681f3Smrg src->viewport.count); 18237ec681f3Smrg typed_memcpy(dest->viewport.translate, src->viewport.translate, 18247ec681f3Smrg src->viewport.count); 18257ec681f3Smrg dirty |= V3DV_CMD_DIRTY_VIEWPORT; 18267ec681f3Smrg } 18277ec681f3Smrg } 18287ec681f3Smrg 18297ec681f3Smrg if (!(dynamic_mask & V3DV_DYNAMIC_SCISSOR)) { 18307ec681f3Smrg dest->scissor.count = src->scissor.count; 18317ec681f3Smrg if (memcmp(&dest->scissor.scissors, &src->scissor.scissors, 18327ec681f3Smrg src->scissor.count * sizeof(VkRect2D))) { 18337ec681f3Smrg typed_memcpy(dest->scissor.scissors, 18347ec681f3Smrg src->scissor.scissors, src->scissor.count); 18357ec681f3Smrg dirty |= V3DV_CMD_DIRTY_SCISSOR; 18367ec681f3Smrg } 18377ec681f3Smrg } 18387ec681f3Smrg 18397ec681f3Smrg if (!(dynamic_mask & V3DV_DYNAMIC_STENCIL_COMPARE_MASK)) { 18407ec681f3Smrg if (memcmp(&dest->stencil_compare_mask, &src->stencil_compare_mask, 18417ec681f3Smrg sizeof(src->stencil_compare_mask))) { 18427ec681f3Smrg dest->stencil_compare_mask = src->stencil_compare_mask; 18437ec681f3Smrg dirty |= V3DV_CMD_DIRTY_STENCIL_COMPARE_MASK; 18447ec681f3Smrg } 18457ec681f3Smrg } 18467ec681f3Smrg 18477ec681f3Smrg if (!(dynamic_mask & V3DV_DYNAMIC_STENCIL_WRITE_MASK)) { 18487ec681f3Smrg if (memcmp(&dest->stencil_write_mask, &src->stencil_write_mask, 18497ec681f3Smrg sizeof(src->stencil_write_mask))) { 18507ec681f3Smrg dest->stencil_write_mask = src->stencil_write_mask; 18517ec681f3Smrg dirty |= V3DV_CMD_DIRTY_STENCIL_WRITE_MASK; 18527ec681f3Smrg } 18537ec681f3Smrg } 18547ec681f3Smrg 18557ec681f3Smrg if (!(dynamic_mask & V3DV_DYNAMIC_STENCIL_REFERENCE)) { 18567ec681f3Smrg if (memcmp(&dest->stencil_reference, &src->stencil_reference, 18577ec681f3Smrg sizeof(src->stencil_reference))) { 18587ec681f3Smrg dest->stencil_reference = src->stencil_reference; 18597ec681f3Smrg dirty |= V3DV_CMD_DIRTY_STENCIL_REFERENCE; 18607ec681f3Smrg } 18617ec681f3Smrg } 18627ec681f3Smrg 18637ec681f3Smrg if (!(dynamic_mask & V3DV_DYNAMIC_BLEND_CONSTANTS)) { 18647ec681f3Smrg if (memcmp(dest->blend_constants, src->blend_constants, 18657ec681f3Smrg sizeof(src->blend_constants))) { 18667ec681f3Smrg memcpy(dest->blend_constants, src->blend_constants, 18677ec681f3Smrg sizeof(src->blend_constants)); 18687ec681f3Smrg dirty |= V3DV_CMD_DIRTY_BLEND_CONSTANTS; 18697ec681f3Smrg } 18707ec681f3Smrg } 18717ec681f3Smrg 18727ec681f3Smrg if (!(dynamic_mask & V3DV_DYNAMIC_DEPTH_BIAS)) { 18737ec681f3Smrg if (memcmp(&dest->depth_bias, &src->depth_bias, 18747ec681f3Smrg sizeof(src->depth_bias))) { 18757ec681f3Smrg memcpy(&dest->depth_bias, &src->depth_bias, sizeof(src->depth_bias)); 18767ec681f3Smrg dirty |= V3DV_CMD_DIRTY_DEPTH_BIAS; 18777ec681f3Smrg } 18787ec681f3Smrg } 18797ec681f3Smrg 18807ec681f3Smrg if (!(dynamic_mask & V3DV_DYNAMIC_LINE_WIDTH)) { 18817ec681f3Smrg if (dest->line_width != src->line_width) { 18827ec681f3Smrg dest->line_width = src->line_width; 18837ec681f3Smrg dirty |= V3DV_CMD_DIRTY_LINE_WIDTH; 18847ec681f3Smrg } 18857ec681f3Smrg } 18867ec681f3Smrg 18877ec681f3Smrg if (!(dynamic_mask & V3DV_DYNAMIC_COLOR_WRITE_ENABLE)) { 18887ec681f3Smrg if (dest->color_write_enable != src->color_write_enable) { 18897ec681f3Smrg dest->color_write_enable = src->color_write_enable; 18907ec681f3Smrg dirty |= V3DV_CMD_DIRTY_COLOR_WRITE_ENABLE; 18917ec681f3Smrg } 18927ec681f3Smrg } 18937ec681f3Smrg 18947ec681f3Smrg cmd_buffer->state.dynamic.mask = dynamic_mask; 18957ec681f3Smrg cmd_buffer->state.dirty |= dirty; 18967ec681f3Smrg} 18977ec681f3Smrg 18987ec681f3Smrgstatic void 18997ec681f3Smrgbind_graphics_pipeline(struct v3dv_cmd_buffer *cmd_buffer, 19007ec681f3Smrg struct v3dv_pipeline *pipeline) 19017ec681f3Smrg{ 19027ec681f3Smrg assert(pipeline && !(pipeline->active_stages & VK_SHADER_STAGE_COMPUTE_BIT)); 19037ec681f3Smrg if (cmd_buffer->state.gfx.pipeline == pipeline) 19047ec681f3Smrg return; 19057ec681f3Smrg 19067ec681f3Smrg cmd_buffer->state.gfx.pipeline = pipeline; 19077ec681f3Smrg 19087ec681f3Smrg cmd_buffer_bind_pipeline_static_state(cmd_buffer, &pipeline->dynamic_state); 19097ec681f3Smrg 19107ec681f3Smrg cmd_buffer->state.dirty |= V3DV_CMD_DIRTY_PIPELINE; 19117ec681f3Smrg} 19127ec681f3Smrg 19137ec681f3Smrgstatic void 19147ec681f3Smrgbind_compute_pipeline(struct v3dv_cmd_buffer *cmd_buffer, 19157ec681f3Smrg struct v3dv_pipeline *pipeline) 19167ec681f3Smrg{ 19177ec681f3Smrg assert(pipeline && pipeline->active_stages == VK_SHADER_STAGE_COMPUTE_BIT); 19187ec681f3Smrg 19197ec681f3Smrg if (cmd_buffer->state.compute.pipeline == pipeline) 19207ec681f3Smrg return; 19217ec681f3Smrg 19227ec681f3Smrg cmd_buffer->state.compute.pipeline = pipeline; 19237ec681f3Smrg cmd_buffer->state.dirty |= V3DV_CMD_DIRTY_COMPUTE_PIPELINE; 19247ec681f3Smrg} 19257ec681f3Smrg 19267ec681f3SmrgVKAPI_ATTR void VKAPI_CALL 19277ec681f3Smrgv3dv_CmdBindPipeline(VkCommandBuffer commandBuffer, 19287ec681f3Smrg VkPipelineBindPoint pipelineBindPoint, 19297ec681f3Smrg VkPipeline _pipeline) 19307ec681f3Smrg{ 19317ec681f3Smrg V3DV_FROM_HANDLE(v3dv_cmd_buffer, cmd_buffer, commandBuffer); 19327ec681f3Smrg V3DV_FROM_HANDLE(v3dv_pipeline, pipeline, _pipeline); 19337ec681f3Smrg 19347ec681f3Smrg switch (pipelineBindPoint) { 19357ec681f3Smrg case VK_PIPELINE_BIND_POINT_COMPUTE: 19367ec681f3Smrg bind_compute_pipeline(cmd_buffer, pipeline); 19377ec681f3Smrg break; 19387ec681f3Smrg 19397ec681f3Smrg case VK_PIPELINE_BIND_POINT_GRAPHICS: 19407ec681f3Smrg bind_graphics_pipeline(cmd_buffer, pipeline); 19417ec681f3Smrg break; 19427ec681f3Smrg 19437ec681f3Smrg default: 19447ec681f3Smrg assert(!"invalid bind point"); 19457ec681f3Smrg break; 19467ec681f3Smrg } 19477ec681f3Smrg} 19487ec681f3Smrg 19497ec681f3Smrg/* FIXME: C&P from radv. tu has similar code. Perhaps common place? */ 19507ec681f3Smrgvoid 19517ec681f3Smrgv3dv_viewport_compute_xform(const VkViewport *viewport, 19527ec681f3Smrg float scale[3], 19537ec681f3Smrg float translate[3]) 19547ec681f3Smrg{ 19557ec681f3Smrg float x = viewport->x; 19567ec681f3Smrg float y = viewport->y; 19577ec681f3Smrg float half_width = 0.5f * viewport->width; 19587ec681f3Smrg float half_height = 0.5f * viewport->height; 19597ec681f3Smrg double n = viewport->minDepth; 19607ec681f3Smrg double f = viewport->maxDepth; 19617ec681f3Smrg 19627ec681f3Smrg scale[0] = half_width; 19637ec681f3Smrg translate[0] = half_width + x; 19647ec681f3Smrg scale[1] = half_height; 19657ec681f3Smrg translate[1] = half_height + y; 19667ec681f3Smrg 19677ec681f3Smrg scale[2] = (f - n); 19687ec681f3Smrg translate[2] = n; 19697ec681f3Smrg 19707ec681f3Smrg /* It seems that if the scale is small enough the hardware won't clip 19717ec681f3Smrg * correctly so we work around this my choosing the smallest scale that 19727ec681f3Smrg * seems to work. 19737ec681f3Smrg * 19747ec681f3Smrg * This case is exercised by CTS: 19757ec681f3Smrg * dEQP-VK.draw.inverted_depth_ranges.nodepthclamp_deltazero 19767ec681f3Smrg */ 19777ec681f3Smrg const float min_abs_scale = 0.000009f; 19787ec681f3Smrg if (fabs(scale[2]) < min_abs_scale) 19797ec681f3Smrg scale[2] = min_abs_scale * (scale[2] < 0 ? -1.0f : 1.0f); 19807ec681f3Smrg} 19817ec681f3Smrg 19827ec681f3SmrgVKAPI_ATTR void VKAPI_CALL 19837ec681f3Smrgv3dv_CmdSetViewport(VkCommandBuffer commandBuffer, 19847ec681f3Smrg uint32_t firstViewport, 19857ec681f3Smrg uint32_t viewportCount, 19867ec681f3Smrg const VkViewport *pViewports) 19877ec681f3Smrg{ 19887ec681f3Smrg V3DV_FROM_HANDLE(v3dv_cmd_buffer, cmd_buffer, commandBuffer); 19897ec681f3Smrg struct v3dv_cmd_buffer_state *state = &cmd_buffer->state; 19907ec681f3Smrg const uint32_t total_count = firstViewport + viewportCount; 19917ec681f3Smrg 19927ec681f3Smrg assert(firstViewport < MAX_VIEWPORTS); 19937ec681f3Smrg assert(total_count >= 1 && total_count <= MAX_VIEWPORTS); 19947ec681f3Smrg 19957ec681f3Smrg if (state->dynamic.viewport.count < total_count) 19967ec681f3Smrg state->dynamic.viewport.count = total_count; 19977ec681f3Smrg 19987ec681f3Smrg if (!memcmp(state->dynamic.viewport.viewports + firstViewport, 19997ec681f3Smrg pViewports, viewportCount * sizeof(*pViewports))) { 20007ec681f3Smrg return; 20017ec681f3Smrg } 20027ec681f3Smrg 20037ec681f3Smrg memcpy(state->dynamic.viewport.viewports + firstViewport, pViewports, 20047ec681f3Smrg viewportCount * sizeof(*pViewports)); 20057ec681f3Smrg 20067ec681f3Smrg for (uint32_t i = firstViewport; i < total_count; i++) { 20077ec681f3Smrg v3dv_viewport_compute_xform(&state->dynamic.viewport.viewports[i], 20087ec681f3Smrg state->dynamic.viewport.scale[i], 20097ec681f3Smrg state->dynamic.viewport.translate[i]); 20107ec681f3Smrg } 20117ec681f3Smrg 20127ec681f3Smrg cmd_buffer->state.dirty |= V3DV_CMD_DIRTY_VIEWPORT; 20137ec681f3Smrg} 20147ec681f3Smrg 20157ec681f3SmrgVKAPI_ATTR void VKAPI_CALL 20167ec681f3Smrgv3dv_CmdSetScissor(VkCommandBuffer commandBuffer, 20177ec681f3Smrg uint32_t firstScissor, 20187ec681f3Smrg uint32_t scissorCount, 20197ec681f3Smrg const VkRect2D *pScissors) 20207ec681f3Smrg{ 20217ec681f3Smrg V3DV_FROM_HANDLE(v3dv_cmd_buffer, cmd_buffer, commandBuffer); 20227ec681f3Smrg struct v3dv_cmd_buffer_state *state = &cmd_buffer->state; 20237ec681f3Smrg 20247ec681f3Smrg assert(firstScissor < MAX_SCISSORS); 20257ec681f3Smrg assert(firstScissor + scissorCount >= 1 && 20267ec681f3Smrg firstScissor + scissorCount <= MAX_SCISSORS); 20277ec681f3Smrg 20287ec681f3Smrg if (state->dynamic.scissor.count < firstScissor + scissorCount) 20297ec681f3Smrg state->dynamic.scissor.count = firstScissor + scissorCount; 20307ec681f3Smrg 20317ec681f3Smrg if (!memcmp(state->dynamic.scissor.scissors + firstScissor, 20327ec681f3Smrg pScissors, scissorCount * sizeof(*pScissors))) { 20337ec681f3Smrg return; 20347ec681f3Smrg } 20357ec681f3Smrg 20367ec681f3Smrg memcpy(state->dynamic.scissor.scissors + firstScissor, pScissors, 20377ec681f3Smrg scissorCount * sizeof(*pScissors)); 20387ec681f3Smrg 20397ec681f3Smrg cmd_buffer->state.dirty |= V3DV_CMD_DIRTY_SCISSOR; 20407ec681f3Smrg} 20417ec681f3Smrg 20427ec681f3Smrgstatic void 20437ec681f3Smrgemit_scissor(struct v3dv_cmd_buffer *cmd_buffer) 20447ec681f3Smrg{ 20457ec681f3Smrg if (cmd_buffer->state.dynamic.viewport.count == 0) 20467ec681f3Smrg return; 20477ec681f3Smrg 20487ec681f3Smrg struct v3dv_dynamic_state *dynamic = &cmd_buffer->state.dynamic; 20497ec681f3Smrg 20507ec681f3Smrg /* FIXME: right now we only support one viewport. viewporst[0] would work 20517ec681f3Smrg * now, but would need to change if we allow multiple viewports. 20527ec681f3Smrg */ 20537ec681f3Smrg float *vptranslate = dynamic->viewport.translate[0]; 20547ec681f3Smrg float *vpscale = dynamic->viewport.scale[0]; 20557ec681f3Smrg 20567ec681f3Smrg float vp_minx = -fabsf(vpscale[0]) + vptranslate[0]; 20577ec681f3Smrg float vp_maxx = fabsf(vpscale[0]) + vptranslate[0]; 20587ec681f3Smrg float vp_miny = -fabsf(vpscale[1]) + vptranslate[1]; 20597ec681f3Smrg float vp_maxy = fabsf(vpscale[1]) + vptranslate[1]; 20607ec681f3Smrg 20617ec681f3Smrg /* Quoting from v3dx_emit: 20627ec681f3Smrg * "Clip to the scissor if it's enabled, but still clip to the 20637ec681f3Smrg * drawable regardless since that controls where the binner 20647ec681f3Smrg * tries to put things. 20657ec681f3Smrg * 20667ec681f3Smrg * Additionally, always clip the rendering to the viewport, 20677ec681f3Smrg * since the hardware does guardband clipping, meaning 20687ec681f3Smrg * primitives would rasterize outside of the view volume." 20697ec681f3Smrg */ 20707ec681f3Smrg uint32_t minx, miny, maxx, maxy; 20717ec681f3Smrg 20727ec681f3Smrg /* From the Vulkan spec: 20737ec681f3Smrg * 20747ec681f3Smrg * "The application must ensure (using scissor if necessary) that all 20757ec681f3Smrg * rendering is contained within the render area. The render area must be 20767ec681f3Smrg * contained within the framebuffer dimensions." 20777ec681f3Smrg * 20787ec681f3Smrg * So it is the application's responsibility to ensure this. Still, we can 20797ec681f3Smrg * help by automatically restricting the scissor rect to the render area. 20807ec681f3Smrg */ 20817ec681f3Smrg minx = MAX2(vp_minx, cmd_buffer->state.render_area.offset.x); 20827ec681f3Smrg miny = MAX2(vp_miny, cmd_buffer->state.render_area.offset.y); 20837ec681f3Smrg maxx = MIN2(vp_maxx, cmd_buffer->state.render_area.offset.x + 20847ec681f3Smrg cmd_buffer->state.render_area.extent.width); 20857ec681f3Smrg maxy = MIN2(vp_maxy, cmd_buffer->state.render_area.offset.y + 20867ec681f3Smrg cmd_buffer->state.render_area.extent.height); 20877ec681f3Smrg 20887ec681f3Smrg minx = vp_minx; 20897ec681f3Smrg miny = vp_miny; 20907ec681f3Smrg maxx = vp_maxx; 20917ec681f3Smrg maxy = vp_maxy; 20927ec681f3Smrg 20937ec681f3Smrg /* Clip against user provided scissor if needed. 20947ec681f3Smrg * 20957ec681f3Smrg * FIXME: right now we only allow one scissor. Below would need to be 20967ec681f3Smrg * updated if we support more 20977ec681f3Smrg */ 20987ec681f3Smrg if (dynamic->scissor.count > 0) { 20997ec681f3Smrg VkRect2D *scissor = &dynamic->scissor.scissors[0]; 21007ec681f3Smrg minx = MAX2(minx, scissor->offset.x); 21017ec681f3Smrg miny = MAX2(miny, scissor->offset.y); 21027ec681f3Smrg maxx = MIN2(maxx, scissor->offset.x + scissor->extent.width); 21037ec681f3Smrg maxy = MIN2(maxy, scissor->offset.y + scissor->extent.height); 21047ec681f3Smrg } 21057ec681f3Smrg 21067ec681f3Smrg /* If the scissor is outside the viewport area we end up with 21077ec681f3Smrg * min{x,y} > max{x,y}. 21087ec681f3Smrg */ 21097ec681f3Smrg if (minx > maxx) 21107ec681f3Smrg maxx = minx; 21117ec681f3Smrg if (miny > maxy) 21127ec681f3Smrg maxy = miny; 21137ec681f3Smrg 21147ec681f3Smrg cmd_buffer->state.clip_window.offset.x = minx; 21157ec681f3Smrg cmd_buffer->state.clip_window.offset.y = miny; 21167ec681f3Smrg cmd_buffer->state.clip_window.extent.width = maxx - minx; 21177ec681f3Smrg cmd_buffer->state.clip_window.extent.height = maxy - miny; 21187ec681f3Smrg 21197ec681f3Smrg v3dv_X(cmd_buffer->device, job_emit_clip_window) 21207ec681f3Smrg (cmd_buffer->state.job, &cmd_buffer->state.clip_window); 21217ec681f3Smrg 21227ec681f3Smrg cmd_buffer->state.dirty &= ~V3DV_CMD_DIRTY_SCISSOR; 21237ec681f3Smrg} 21247ec681f3Smrg 21257ec681f3Smrgstatic void 21267ec681f3Smrgupdate_gfx_uniform_state(struct v3dv_cmd_buffer *cmd_buffer, 21277ec681f3Smrg uint32_t dirty_uniform_state) 21287ec681f3Smrg{ 21297ec681f3Smrg /* We need to update uniform streams if any piece of state that is passed 21307ec681f3Smrg * to the shader as a uniform may have changed. 21317ec681f3Smrg * 21327ec681f3Smrg * If only descriptor sets are dirty then we can safely ignore updates 21337ec681f3Smrg * for shader stages that don't access descriptors. 21347ec681f3Smrg */ 21357ec681f3Smrg 21367ec681f3Smrg struct v3dv_pipeline *pipeline = cmd_buffer->state.gfx.pipeline; 21377ec681f3Smrg assert(pipeline); 21387ec681f3Smrg 21397ec681f3Smrg const bool has_new_pipeline = dirty_uniform_state & V3DV_CMD_DIRTY_PIPELINE; 21407ec681f3Smrg const bool has_new_viewport = dirty_uniform_state & V3DV_CMD_DIRTY_VIEWPORT; 21417ec681f3Smrg const bool has_new_push_constants = dirty_uniform_state & V3DV_CMD_DIRTY_PUSH_CONSTANTS; 21427ec681f3Smrg const bool has_new_descriptors = dirty_uniform_state & V3DV_CMD_DIRTY_DESCRIPTOR_SETS; 21437ec681f3Smrg const bool has_new_view_index = dirty_uniform_state & V3DV_CMD_DIRTY_VIEW_INDEX; 21447ec681f3Smrg 21457ec681f3Smrg /* VK_SHADER_STAGE_FRAGMENT_BIT */ 21467ec681f3Smrg const bool has_new_descriptors_fs = 21477ec681f3Smrg has_new_descriptors && 21487ec681f3Smrg (cmd_buffer->state.dirty_descriptor_stages & VK_SHADER_STAGE_FRAGMENT_BIT); 21497ec681f3Smrg 21507ec681f3Smrg const bool has_new_push_constants_fs = 21517ec681f3Smrg has_new_push_constants && 21527ec681f3Smrg (cmd_buffer->state.dirty_push_constants_stages & VK_SHADER_STAGE_FRAGMENT_BIT); 21537ec681f3Smrg 21547ec681f3Smrg const bool needs_fs_update = has_new_pipeline || 21557ec681f3Smrg has_new_view_index || 21567ec681f3Smrg has_new_push_constants_fs || 21577ec681f3Smrg has_new_descriptors_fs || 21587ec681f3Smrg has_new_view_index; 21597ec681f3Smrg 21607ec681f3Smrg if (needs_fs_update) { 21617ec681f3Smrg struct v3dv_shader_variant *fs_variant = 21627ec681f3Smrg pipeline->shared_data->variants[BROADCOM_SHADER_FRAGMENT]; 21637ec681f3Smrg 21647ec681f3Smrg cmd_buffer->state.uniforms.fs = 21657ec681f3Smrg v3dv_write_uniforms(cmd_buffer, pipeline, fs_variant); 21667ec681f3Smrg } 21677ec681f3Smrg 21687ec681f3Smrg /* VK_SHADER_STAGE_GEOMETRY_BIT */ 21697ec681f3Smrg if (pipeline->has_gs) { 21707ec681f3Smrg const bool has_new_descriptors_gs = 21717ec681f3Smrg has_new_descriptors && 21727ec681f3Smrg (cmd_buffer->state.dirty_descriptor_stages & 21737ec681f3Smrg VK_SHADER_STAGE_GEOMETRY_BIT); 21747ec681f3Smrg 21757ec681f3Smrg const bool has_new_push_constants_gs = 21767ec681f3Smrg has_new_push_constants && 21777ec681f3Smrg (cmd_buffer->state.dirty_push_constants_stages & 21787ec681f3Smrg VK_SHADER_STAGE_GEOMETRY_BIT); 21797ec681f3Smrg 21807ec681f3Smrg const bool needs_gs_update = has_new_viewport || 21817ec681f3Smrg has_new_view_index || 21827ec681f3Smrg has_new_pipeline || 21837ec681f3Smrg has_new_push_constants_gs || 21847ec681f3Smrg has_new_descriptors_gs; 21857ec681f3Smrg 21867ec681f3Smrg if (needs_gs_update) { 21877ec681f3Smrg struct v3dv_shader_variant *gs_variant = 21887ec681f3Smrg pipeline->shared_data->variants[BROADCOM_SHADER_GEOMETRY]; 21897ec681f3Smrg 21907ec681f3Smrg struct v3dv_shader_variant *gs_bin_variant = 21917ec681f3Smrg pipeline->shared_data->variants[BROADCOM_SHADER_GEOMETRY_BIN]; 21927ec681f3Smrg 21937ec681f3Smrg cmd_buffer->state.uniforms.gs = 21947ec681f3Smrg v3dv_write_uniforms(cmd_buffer, pipeline, gs_variant); 21957ec681f3Smrg 21967ec681f3Smrg cmd_buffer->state.uniforms.gs_bin = 21977ec681f3Smrg v3dv_write_uniforms(cmd_buffer, pipeline, gs_bin_variant); 21987ec681f3Smrg } 21997ec681f3Smrg } 22007ec681f3Smrg 22017ec681f3Smrg /* VK_SHADER_STAGE_VERTEX_BIT */ 22027ec681f3Smrg const bool has_new_descriptors_vs = 22037ec681f3Smrg has_new_descriptors && 22047ec681f3Smrg (cmd_buffer->state.dirty_descriptor_stages & VK_SHADER_STAGE_VERTEX_BIT); 22057ec681f3Smrg 22067ec681f3Smrg const bool has_new_push_constants_vs = 22077ec681f3Smrg has_new_push_constants && 22087ec681f3Smrg (cmd_buffer->state.dirty_push_constants_stages & VK_SHADER_STAGE_VERTEX_BIT); 22097ec681f3Smrg 22107ec681f3Smrg const bool needs_vs_update = has_new_viewport || 22117ec681f3Smrg has_new_view_index || 22127ec681f3Smrg has_new_pipeline || 22137ec681f3Smrg has_new_push_constants_vs || 22147ec681f3Smrg has_new_descriptors_vs; 22157ec681f3Smrg 22167ec681f3Smrg if (needs_vs_update) { 22177ec681f3Smrg struct v3dv_shader_variant *vs_variant = 22187ec681f3Smrg pipeline->shared_data->variants[BROADCOM_SHADER_VERTEX]; 22197ec681f3Smrg 22207ec681f3Smrg struct v3dv_shader_variant *vs_bin_variant = 22217ec681f3Smrg pipeline->shared_data->variants[BROADCOM_SHADER_VERTEX_BIN]; 22227ec681f3Smrg 22237ec681f3Smrg cmd_buffer->state.uniforms.vs = 22247ec681f3Smrg v3dv_write_uniforms(cmd_buffer, pipeline, vs_variant); 22257ec681f3Smrg 22267ec681f3Smrg cmd_buffer->state.uniforms.vs_bin = 22277ec681f3Smrg v3dv_write_uniforms(cmd_buffer, pipeline, vs_bin_variant); 22287ec681f3Smrg } 22297ec681f3Smrg 22307ec681f3Smrg cmd_buffer->state.dirty &= ~V3DV_CMD_DIRTY_VIEW_INDEX; 22317ec681f3Smrg} 22327ec681f3Smrg 22337ec681f3Smrg/* This stores command buffer state that we might be about to stomp for 22347ec681f3Smrg * a meta operation. 22357ec681f3Smrg */ 22367ec681f3Smrgvoid 22377ec681f3Smrgv3dv_cmd_buffer_meta_state_push(struct v3dv_cmd_buffer *cmd_buffer, 22387ec681f3Smrg bool push_descriptor_state) 22397ec681f3Smrg{ 22407ec681f3Smrg struct v3dv_cmd_buffer_state *state = &cmd_buffer->state; 22417ec681f3Smrg 22427ec681f3Smrg if (state->subpass_idx != -1) { 22437ec681f3Smrg state->meta.subpass_idx = state->subpass_idx; 22447ec681f3Smrg state->meta.framebuffer = v3dv_framebuffer_to_handle(state->framebuffer); 22457ec681f3Smrg state->meta.pass = v3dv_render_pass_to_handle(state->pass); 22467ec681f3Smrg 22477ec681f3Smrg const uint32_t attachment_state_item_size = 22487ec681f3Smrg sizeof(struct v3dv_cmd_buffer_attachment_state); 22497ec681f3Smrg const uint32_t attachment_state_total_size = 22507ec681f3Smrg attachment_state_item_size * state->attachment_alloc_count; 22517ec681f3Smrg if (state->meta.attachment_alloc_count < state->attachment_alloc_count) { 22527ec681f3Smrg if (state->meta.attachment_alloc_count > 0) 22537ec681f3Smrg vk_free(&cmd_buffer->device->vk.alloc, state->meta.attachments); 22547ec681f3Smrg 22557ec681f3Smrg state->meta.attachments = vk_zalloc(&cmd_buffer->device->vk.alloc, 22567ec681f3Smrg attachment_state_total_size, 8, 22577ec681f3Smrg VK_SYSTEM_ALLOCATION_SCOPE_COMMAND); 22587ec681f3Smrg if (!state->meta.attachments) { 22597ec681f3Smrg v3dv_flag_oom(cmd_buffer, NULL); 22607ec681f3Smrg return; 22617ec681f3Smrg } 22627ec681f3Smrg state->meta.attachment_alloc_count = state->attachment_alloc_count; 22637ec681f3Smrg } 22647ec681f3Smrg state->meta.attachment_count = state->attachment_alloc_count; 22657ec681f3Smrg memcpy(state->meta.attachments, state->attachments, 22667ec681f3Smrg attachment_state_total_size); 22677ec681f3Smrg 22687ec681f3Smrg state->meta.tile_aligned_render_area = state->tile_aligned_render_area; 22697ec681f3Smrg memcpy(&state->meta.render_area, &state->render_area, sizeof(VkRect2D)); 22707ec681f3Smrg } 22717ec681f3Smrg 22727ec681f3Smrg /* We expect that meta operations are graphics-only, so we only take into 22737ec681f3Smrg * account the graphics pipeline, and the graphics state 22747ec681f3Smrg */ 22757ec681f3Smrg state->meta.gfx.pipeline = state->gfx.pipeline; 22767ec681f3Smrg memcpy(&state->meta.dynamic, &state->dynamic, sizeof(state->dynamic)); 22777ec681f3Smrg 22787ec681f3Smrg struct v3dv_descriptor_state *gfx_descriptor_state = 22797ec681f3Smrg &cmd_buffer->state.gfx.descriptor_state; 22807ec681f3Smrg 22817ec681f3Smrg if (push_descriptor_state) { 22827ec681f3Smrg if (gfx_descriptor_state->valid != 0) { 22837ec681f3Smrg memcpy(&state->meta.gfx.descriptor_state, gfx_descriptor_state, 22847ec681f3Smrg sizeof(state->gfx.descriptor_state)); 22857ec681f3Smrg } 22867ec681f3Smrg state->meta.has_descriptor_state = true; 22877ec681f3Smrg } else { 22887ec681f3Smrg state->meta.has_descriptor_state = false; 22897ec681f3Smrg } 22907ec681f3Smrg 22917ec681f3Smrg /* FIXME: if we keep track of wether we have bound any push constant state 22927ec681f3Smrg * at all we could restruct this only to cases where it is actually 22937ec681f3Smrg * necessary. 22947ec681f3Smrg */ 22957ec681f3Smrg memcpy(state->meta.push_constants, cmd_buffer->push_constants_data, 22967ec681f3Smrg sizeof(state->meta.push_constants)); 22977ec681f3Smrg} 22987ec681f3Smrg 22997ec681f3Smrg/* This restores command buffer state after a meta operation 23007ec681f3Smrg */ 23017ec681f3Smrgvoid 23027ec681f3Smrgv3dv_cmd_buffer_meta_state_pop(struct v3dv_cmd_buffer *cmd_buffer, 23037ec681f3Smrg uint32_t dirty_dynamic_state, 23047ec681f3Smrg bool needs_subpass_resume) 23057ec681f3Smrg{ 23067ec681f3Smrg struct v3dv_cmd_buffer_state *state = &cmd_buffer->state; 23077ec681f3Smrg 23087ec681f3Smrg if (state->meta.subpass_idx != -1) { 23097ec681f3Smrg state->pass = v3dv_render_pass_from_handle(state->meta.pass); 23107ec681f3Smrg state->framebuffer = v3dv_framebuffer_from_handle(state->meta.framebuffer); 23117ec681f3Smrg 23127ec681f3Smrg assert(state->meta.attachment_count <= state->attachment_alloc_count); 23137ec681f3Smrg const uint32_t attachment_state_item_size = 23147ec681f3Smrg sizeof(struct v3dv_cmd_buffer_attachment_state); 23157ec681f3Smrg const uint32_t attachment_state_total_size = 23167ec681f3Smrg attachment_state_item_size * state->meta.attachment_count; 23177ec681f3Smrg memcpy(state->attachments, state->meta.attachments, 23187ec681f3Smrg attachment_state_total_size); 23197ec681f3Smrg 23207ec681f3Smrg state->tile_aligned_render_area = state->meta.tile_aligned_render_area; 23217ec681f3Smrg memcpy(&state->render_area, &state->meta.render_area, sizeof(VkRect2D)); 23227ec681f3Smrg 23237ec681f3Smrg /* Is needs_subpass_resume is true it means that the emitted the meta 23247ec681f3Smrg * operation in its own job (possibly with an RT config that is 23257ec681f3Smrg * incompatible with the current subpass), so resuming subpass execution 23267ec681f3Smrg * after it requires that we create a new job with the subpass RT setup. 23277ec681f3Smrg */ 23287ec681f3Smrg if (needs_subpass_resume) 23297ec681f3Smrg v3dv_cmd_buffer_subpass_resume(cmd_buffer, state->meta.subpass_idx); 23307ec681f3Smrg } else { 23317ec681f3Smrg state->subpass_idx = -1; 23327ec681f3Smrg } 23337ec681f3Smrg 23347ec681f3Smrg if (state->meta.gfx.pipeline != NULL) { 23357ec681f3Smrg struct v3dv_pipeline *pipeline = state->meta.gfx.pipeline; 23367ec681f3Smrg VkPipelineBindPoint pipeline_binding = 23377ec681f3Smrg v3dv_pipeline_get_binding_point(pipeline); 23387ec681f3Smrg v3dv_CmdBindPipeline(v3dv_cmd_buffer_to_handle(cmd_buffer), 23397ec681f3Smrg pipeline_binding, 23407ec681f3Smrg v3dv_pipeline_to_handle(state->meta.gfx.pipeline)); 23417ec681f3Smrg } else { 23427ec681f3Smrg state->gfx.pipeline = NULL; 23437ec681f3Smrg } 23447ec681f3Smrg 23457ec681f3Smrg if (dirty_dynamic_state) { 23467ec681f3Smrg memcpy(&state->dynamic, &state->meta.dynamic, sizeof(state->dynamic)); 23477ec681f3Smrg state->dirty |= dirty_dynamic_state; 23487ec681f3Smrg } 23497ec681f3Smrg 23507ec681f3Smrg if (state->meta.has_descriptor_state) { 23517ec681f3Smrg if (state->meta.gfx.descriptor_state.valid != 0) { 23527ec681f3Smrg memcpy(&state->gfx.descriptor_state, &state->meta.gfx.descriptor_state, 23537ec681f3Smrg sizeof(state->gfx.descriptor_state)); 23547ec681f3Smrg } else { 23557ec681f3Smrg state->gfx.descriptor_state.valid = 0; 23567ec681f3Smrg } 23577ec681f3Smrg } 23587ec681f3Smrg 23597ec681f3Smrg memcpy(cmd_buffer->push_constants_data, state->meta.push_constants, 23607ec681f3Smrg sizeof(state->meta.push_constants)); 23617ec681f3Smrg 23627ec681f3Smrg state->meta.gfx.pipeline = NULL; 23637ec681f3Smrg state->meta.framebuffer = VK_NULL_HANDLE; 23647ec681f3Smrg state->meta.pass = VK_NULL_HANDLE; 23657ec681f3Smrg state->meta.subpass_idx = -1; 23667ec681f3Smrg state->meta.has_descriptor_state = false; 23677ec681f3Smrg} 23687ec681f3Smrg 23697ec681f3Smrgstatic struct v3dv_job * 23707ec681f3Smrgcmd_buffer_pre_draw_split_job(struct v3dv_cmd_buffer *cmd_buffer) 23717ec681f3Smrg{ 23727ec681f3Smrg struct v3dv_job *job = cmd_buffer->state.job; 23737ec681f3Smrg assert(job); 23747ec681f3Smrg 23757ec681f3Smrg /* If the job has been flagged with 'always_flush' and it has already 23767ec681f3Smrg * recorded any draw calls then we need to start a new job for it. 23777ec681f3Smrg */ 23787ec681f3Smrg if (job->always_flush && job->draw_count > 0) { 23797ec681f3Smrg assert(cmd_buffer->state.pass); 23807ec681f3Smrg /* First, flag the current job as not being the last in the 23817ec681f3Smrg * current subpass 23827ec681f3Smrg */ 23837ec681f3Smrg job->is_subpass_finish = false; 23847ec681f3Smrg 23857ec681f3Smrg /* Now start a new job in the same subpass and flag it as continuing 23867ec681f3Smrg * the current subpass. 23877ec681f3Smrg */ 23887ec681f3Smrg job = v3dv_cmd_buffer_subpass_resume(cmd_buffer, 23897ec681f3Smrg cmd_buffer->state.subpass_idx); 23907ec681f3Smrg assert(job->draw_count == 0); 23917ec681f3Smrg 23927ec681f3Smrg /* Inherit the 'always flush' behavior */ 23937ec681f3Smrg job->always_flush = true; 23947ec681f3Smrg } 23957ec681f3Smrg 23967ec681f3Smrg assert(job->draw_count == 0 || !job->always_flush); 23977ec681f3Smrg return job; 23987ec681f3Smrg} 23997ec681f3Smrg 24007ec681f3Smrg/** 24017ec681f3Smrg * The Vulkan spec states: 24027ec681f3Smrg * 24037ec681f3Smrg * "It is legal for a subpass to use no color or depth/stencil 24047ec681f3Smrg * attachments (...) This kind of subpass can use shader side effects such 24057ec681f3Smrg * as image stores and atomics to produce an output. In this case, the 24067ec681f3Smrg * subpass continues to use the width, height, and layers of the framebuffer 24077ec681f3Smrg * to define the dimensions of the rendering area, and the 24087ec681f3Smrg * rasterizationSamples from each pipeline’s 24097ec681f3Smrg * VkPipelineMultisampleStateCreateInfo to define the number of samples used 24107ec681f3Smrg * in rasterization." 24117ec681f3Smrg * 24127ec681f3Smrg * We need to enable MSAA in the TILE_BINNING_MODE_CFG packet, which we 24137ec681f3Smrg * emit when we start a new frame at the begining of a subpass. At that point, 24147ec681f3Smrg * if the framebuffer doesn't have any attachments we won't enable MSAA and 24157ec681f3Smrg * the job won't be valid in the scenario described by the spec. 24167ec681f3Smrg * 24177ec681f3Smrg * This function is intended to be called before a draw call and will test if 24187ec681f3Smrg * we are in that scenario, in which case, it will restart the current job 24197ec681f3Smrg * with MSAA enabled. 24207ec681f3Smrg */ 24217ec681f3Smrgstatic void 24227ec681f3Smrgcmd_buffer_restart_job_for_msaa_if_needed(struct v3dv_cmd_buffer *cmd_buffer) 24237ec681f3Smrg{ 24247ec681f3Smrg assert(cmd_buffer->state.job); 24257ec681f3Smrg 24267ec681f3Smrg /* We don't support variableMultisampleRate so we know that all pipelines 24277ec681f3Smrg * bound in the same subpass must have matching number of samples, so we 24287ec681f3Smrg * can do this check only on the first draw call. 24297ec681f3Smrg */ 24307ec681f3Smrg if (cmd_buffer->state.job->draw_count > 0) 24317ec681f3Smrg return; 24327ec681f3Smrg 24337ec681f3Smrg /* We only need to restart the frame if the pipeline requires MSAA but 24347ec681f3Smrg * our frame tiling didn't enable it. 24357ec681f3Smrg */ 24367ec681f3Smrg if (!cmd_buffer->state.gfx.pipeline->msaa || 24377ec681f3Smrg cmd_buffer->state.job->frame_tiling.msaa) { 24387ec681f3Smrg return; 24397ec681f3Smrg } 24407ec681f3Smrg 24417ec681f3Smrg /* FIXME: Secondary command buffers don't start frames. Instead, they are 24427ec681f3Smrg * recorded into primary jobs that start them. For secondaries, we should 24437ec681f3Smrg * still handle this scenario, but we should do that when we record them 24447ec681f3Smrg * into primaries by testing if any of the secondaries has multisampled 24457ec681f3Smrg * draw calls in them, and then using that info to decide if we need to 24467ec681f3Smrg * restart the primary job into which they are being recorded. 24477ec681f3Smrg */ 24487ec681f3Smrg if (cmd_buffer->level != VK_COMMAND_BUFFER_LEVEL_PRIMARY) 24497ec681f3Smrg return; 24507ec681f3Smrg 24517ec681f3Smrg /* Drop the current job and restart it with MSAA enabled */ 24527ec681f3Smrg struct v3dv_job *old_job = cmd_buffer->state.job; 24537ec681f3Smrg cmd_buffer->state.job = NULL; 24547ec681f3Smrg 24557ec681f3Smrg struct v3dv_job *job = vk_zalloc(&cmd_buffer->device->vk.alloc, 24567ec681f3Smrg sizeof(struct v3dv_job), 8, 24577ec681f3Smrg VK_SYSTEM_ALLOCATION_SCOPE_COMMAND); 24587ec681f3Smrg if (!job) { 24597ec681f3Smrg v3dv_flag_oom(cmd_buffer, NULL); 24607ec681f3Smrg return; 24617ec681f3Smrg } 24627ec681f3Smrg 24637ec681f3Smrg v3dv_job_init(job, V3DV_JOB_TYPE_GPU_CL, cmd_buffer->device, cmd_buffer, 24647ec681f3Smrg cmd_buffer->state.subpass_idx); 24657ec681f3Smrg cmd_buffer->state.job = job; 24667ec681f3Smrg 24677ec681f3Smrg v3dv_job_start_frame(job, 24687ec681f3Smrg old_job->frame_tiling.width, 24697ec681f3Smrg old_job->frame_tiling.height, 24707ec681f3Smrg old_job->frame_tiling.layers, 24717ec681f3Smrg true, 24727ec681f3Smrg old_job->frame_tiling.render_target_count, 24737ec681f3Smrg old_job->frame_tiling.internal_bpp, 24747ec681f3Smrg true /* msaa */); 24757ec681f3Smrg 24767ec681f3Smrg v3dv_job_destroy(old_job); 24777ec681f3Smrg} 24787ec681f3Smrg 24797ec681f3Smrgvoid 24807ec681f3Smrgv3dv_cmd_buffer_emit_pre_draw(struct v3dv_cmd_buffer *cmd_buffer) 24817ec681f3Smrg{ 24827ec681f3Smrg assert(cmd_buffer->state.gfx.pipeline); 24837ec681f3Smrg assert(!(cmd_buffer->state.gfx.pipeline->active_stages & VK_SHADER_STAGE_COMPUTE_BIT)); 24847ec681f3Smrg 24857ec681f3Smrg /* If we emitted a pipeline barrier right before this draw we won't have 24867ec681f3Smrg * an active job. In that case, create a new job continuing the current 24877ec681f3Smrg * subpass. 24887ec681f3Smrg */ 24897ec681f3Smrg if (!cmd_buffer->state.job) { 24907ec681f3Smrg v3dv_cmd_buffer_subpass_resume(cmd_buffer, 24917ec681f3Smrg cmd_buffer->state.subpass_idx); 24927ec681f3Smrg } 24937ec681f3Smrg 24947ec681f3Smrg /* Restart single sample job for MSAA pipeline if needed */ 24957ec681f3Smrg cmd_buffer_restart_job_for_msaa_if_needed(cmd_buffer); 24967ec681f3Smrg 24977ec681f3Smrg /* If the job is configured to flush on every draw call we need to create 24987ec681f3Smrg * a new job now. 24997ec681f3Smrg */ 25007ec681f3Smrg struct v3dv_job *job = cmd_buffer_pre_draw_split_job(cmd_buffer); 25017ec681f3Smrg job->draw_count++; 25027ec681f3Smrg 25037ec681f3Smrg /* GL shader state binds shaders, uniform and vertex attribute state. The 25047ec681f3Smrg * compiler injects uniforms to handle some descriptor types (such as 25057ec681f3Smrg * textures), so we need to regen that when descriptor state changes. 25067ec681f3Smrg * 25077ec681f3Smrg * We also need to emit new shader state if we have a dirty viewport since 25087ec681f3Smrg * that will require that we new uniform state for QUNIFORM_VIEWPORT_*. 25097ec681f3Smrg */ 25107ec681f3Smrg uint32_t *dirty = &cmd_buffer->state.dirty; 25117ec681f3Smrg 25127ec681f3Smrg const uint32_t dirty_uniform_state = 25137ec681f3Smrg *dirty & (V3DV_CMD_DIRTY_PIPELINE | 25147ec681f3Smrg V3DV_CMD_DIRTY_PUSH_CONSTANTS | 25157ec681f3Smrg V3DV_CMD_DIRTY_DESCRIPTOR_SETS | 25167ec681f3Smrg V3DV_CMD_DIRTY_VIEWPORT | 25177ec681f3Smrg V3DV_CMD_DIRTY_VIEW_INDEX); 25187ec681f3Smrg 25197ec681f3Smrg if (dirty_uniform_state) 25207ec681f3Smrg update_gfx_uniform_state(cmd_buffer, dirty_uniform_state); 25217ec681f3Smrg 25227ec681f3Smrg struct v3dv_device *device = cmd_buffer->device; 25237ec681f3Smrg 25247ec681f3Smrg if (dirty_uniform_state || (*dirty & V3DV_CMD_DIRTY_VERTEX_BUFFER)) 25257ec681f3Smrg v3dv_X(device, cmd_buffer_emit_gl_shader_state)(cmd_buffer); 25267ec681f3Smrg 25277ec681f3Smrg if (*dirty & (V3DV_CMD_DIRTY_PIPELINE)) { 25287ec681f3Smrg v3dv_X(device, cmd_buffer_emit_configuration_bits)(cmd_buffer); 25297ec681f3Smrg v3dv_X(device, cmd_buffer_emit_varyings_state)(cmd_buffer); 25307ec681f3Smrg } 25317ec681f3Smrg 25327ec681f3Smrg if (*dirty & (V3DV_CMD_DIRTY_VIEWPORT | V3DV_CMD_DIRTY_SCISSOR)) { 25337ec681f3Smrg emit_scissor(cmd_buffer); 25347ec681f3Smrg } 25357ec681f3Smrg 25367ec681f3Smrg if (*dirty & V3DV_CMD_DIRTY_VIEWPORT) { 25377ec681f3Smrg v3dv_X(device, cmd_buffer_emit_viewport)(cmd_buffer); 25387ec681f3Smrg } 25397ec681f3Smrg 25407ec681f3Smrg if (*dirty & V3DV_CMD_DIRTY_INDEX_BUFFER) 25417ec681f3Smrg v3dv_X(device, cmd_buffer_emit_index_buffer)(cmd_buffer); 25427ec681f3Smrg 25437ec681f3Smrg const uint32_t dynamic_stencil_dirty_flags = 25447ec681f3Smrg V3DV_CMD_DIRTY_STENCIL_COMPARE_MASK | 25457ec681f3Smrg V3DV_CMD_DIRTY_STENCIL_WRITE_MASK | 25467ec681f3Smrg V3DV_CMD_DIRTY_STENCIL_REFERENCE; 25477ec681f3Smrg if (*dirty & (V3DV_CMD_DIRTY_PIPELINE | dynamic_stencil_dirty_flags)) 25487ec681f3Smrg v3dv_X(device, cmd_buffer_emit_stencil)(cmd_buffer); 25497ec681f3Smrg 25507ec681f3Smrg if (*dirty & (V3DV_CMD_DIRTY_PIPELINE | V3DV_CMD_DIRTY_DEPTH_BIAS)) 25517ec681f3Smrg v3dv_X(device, cmd_buffer_emit_depth_bias)(cmd_buffer); 25527ec681f3Smrg 25537ec681f3Smrg if (*dirty & (V3DV_CMD_DIRTY_PIPELINE | V3DV_CMD_DIRTY_BLEND_CONSTANTS)) 25547ec681f3Smrg v3dv_X(device, cmd_buffer_emit_blend)(cmd_buffer); 25557ec681f3Smrg 25567ec681f3Smrg if (*dirty & V3DV_CMD_DIRTY_OCCLUSION_QUERY) 25577ec681f3Smrg v3dv_X(device, cmd_buffer_emit_occlusion_query)(cmd_buffer); 25587ec681f3Smrg 25597ec681f3Smrg if (*dirty & V3DV_CMD_DIRTY_LINE_WIDTH) 25607ec681f3Smrg v3dv_X(device, cmd_buffer_emit_line_width)(cmd_buffer); 25617ec681f3Smrg 25627ec681f3Smrg if (*dirty & V3DV_CMD_DIRTY_PIPELINE) 25637ec681f3Smrg v3dv_X(device, cmd_buffer_emit_sample_state)(cmd_buffer); 25647ec681f3Smrg 25657ec681f3Smrg if (*dirty & (V3DV_CMD_DIRTY_PIPELINE | V3DV_CMD_DIRTY_COLOR_WRITE_ENABLE)) 25667ec681f3Smrg v3dv_X(device, cmd_buffer_emit_color_write_mask)(cmd_buffer); 25677ec681f3Smrg 25687ec681f3Smrg cmd_buffer->state.dirty &= ~V3DV_CMD_DIRTY_PIPELINE; 25697ec681f3Smrg} 25707ec681f3Smrg 25717ec681f3Smrgstatic inline void 25727ec681f3Smrgcmd_buffer_set_view_index(struct v3dv_cmd_buffer *cmd_buffer, 25737ec681f3Smrg uint32_t view_index) 25747ec681f3Smrg{ 25757ec681f3Smrg cmd_buffer->state.view_index = view_index; 25767ec681f3Smrg cmd_buffer->state.dirty |= V3DV_CMD_DIRTY_VIEW_INDEX; 25777ec681f3Smrg} 25787ec681f3Smrg 25797ec681f3Smrgstatic void 25807ec681f3Smrgcmd_buffer_draw(struct v3dv_cmd_buffer *cmd_buffer, 25817ec681f3Smrg struct v3dv_draw_info *info) 25827ec681f3Smrg{ 25837ec681f3Smrg 25847ec681f3Smrg struct v3dv_render_pass *pass = cmd_buffer->state.pass; 25857ec681f3Smrg if (likely(!pass->multiview_enabled)) { 25867ec681f3Smrg v3dv_cmd_buffer_emit_pre_draw(cmd_buffer); 25877ec681f3Smrg v3dv_X(cmd_buffer->device, cmd_buffer_emit_draw)(cmd_buffer, info); 25887ec681f3Smrg return; 25897ec681f3Smrg } 25907ec681f3Smrg 25917ec681f3Smrg uint32_t view_mask = pass->subpasses[cmd_buffer->state.subpass_idx].view_mask; 25927ec681f3Smrg while (view_mask) { 25937ec681f3Smrg cmd_buffer_set_view_index(cmd_buffer, u_bit_scan(&view_mask)); 25947ec681f3Smrg v3dv_cmd_buffer_emit_pre_draw(cmd_buffer); 25957ec681f3Smrg v3dv_X(cmd_buffer->device, cmd_buffer_emit_draw)(cmd_buffer, info); 25967ec681f3Smrg } 25977ec681f3Smrg} 25987ec681f3Smrg 25997ec681f3SmrgVKAPI_ATTR void VKAPI_CALL 26007ec681f3Smrgv3dv_CmdDraw(VkCommandBuffer commandBuffer, 26017ec681f3Smrg uint32_t vertexCount, 26027ec681f3Smrg uint32_t instanceCount, 26037ec681f3Smrg uint32_t firstVertex, 26047ec681f3Smrg uint32_t firstInstance) 26057ec681f3Smrg{ 26067ec681f3Smrg if (vertexCount == 0 || instanceCount == 0) 26077ec681f3Smrg return; 26087ec681f3Smrg 26097ec681f3Smrg V3DV_FROM_HANDLE(v3dv_cmd_buffer, cmd_buffer, commandBuffer); 26107ec681f3Smrg struct v3dv_draw_info info = {}; 26117ec681f3Smrg info.vertex_count = vertexCount; 26127ec681f3Smrg info.instance_count = instanceCount; 26137ec681f3Smrg info.first_instance = firstInstance; 26147ec681f3Smrg info.first_vertex = firstVertex; 26157ec681f3Smrg 26167ec681f3Smrg cmd_buffer_draw(cmd_buffer, &info); 26177ec681f3Smrg} 26187ec681f3Smrg 26197ec681f3SmrgVKAPI_ATTR void VKAPI_CALL 26207ec681f3Smrgv3dv_CmdDrawIndexed(VkCommandBuffer commandBuffer, 26217ec681f3Smrg uint32_t indexCount, 26227ec681f3Smrg uint32_t instanceCount, 26237ec681f3Smrg uint32_t firstIndex, 26247ec681f3Smrg int32_t vertexOffset, 26257ec681f3Smrg uint32_t firstInstance) 26267ec681f3Smrg{ 26277ec681f3Smrg if (indexCount == 0 || instanceCount == 0) 26287ec681f3Smrg return; 26297ec681f3Smrg 26307ec681f3Smrg V3DV_FROM_HANDLE(v3dv_cmd_buffer, cmd_buffer, commandBuffer); 26317ec681f3Smrg 26327ec681f3Smrg struct v3dv_render_pass *pass = cmd_buffer->state.pass; 26337ec681f3Smrg if (likely(!pass->multiview_enabled)) { 26347ec681f3Smrg v3dv_cmd_buffer_emit_pre_draw(cmd_buffer); 26357ec681f3Smrg v3dv_X(cmd_buffer->device, cmd_buffer_emit_draw_indexed) 26367ec681f3Smrg (cmd_buffer, indexCount, instanceCount, 26377ec681f3Smrg firstIndex, vertexOffset, firstInstance); 26387ec681f3Smrg return; 26397ec681f3Smrg } 26407ec681f3Smrg 26417ec681f3Smrg uint32_t view_mask = pass->subpasses[cmd_buffer->state.subpass_idx].view_mask; 26427ec681f3Smrg while (view_mask) { 26437ec681f3Smrg cmd_buffer_set_view_index(cmd_buffer, u_bit_scan(&view_mask)); 26447ec681f3Smrg v3dv_cmd_buffer_emit_pre_draw(cmd_buffer); 26457ec681f3Smrg v3dv_X(cmd_buffer->device, cmd_buffer_emit_draw_indexed) 26467ec681f3Smrg (cmd_buffer, indexCount, instanceCount, 26477ec681f3Smrg firstIndex, vertexOffset, firstInstance); 26487ec681f3Smrg } 26497ec681f3Smrg} 26507ec681f3Smrg 26517ec681f3SmrgVKAPI_ATTR void VKAPI_CALL 26527ec681f3Smrgv3dv_CmdDrawIndirect(VkCommandBuffer commandBuffer, 26537ec681f3Smrg VkBuffer _buffer, 26547ec681f3Smrg VkDeviceSize offset, 26557ec681f3Smrg uint32_t drawCount, 26567ec681f3Smrg uint32_t stride) 26577ec681f3Smrg{ 26587ec681f3Smrg /* drawCount is the number of draws to execute, and can be zero. */ 26597ec681f3Smrg if (drawCount == 0) 26607ec681f3Smrg return; 26617ec681f3Smrg 26627ec681f3Smrg V3DV_FROM_HANDLE(v3dv_cmd_buffer, cmd_buffer, commandBuffer); 26637ec681f3Smrg V3DV_FROM_HANDLE(v3dv_buffer, buffer, _buffer); 26647ec681f3Smrg 26657ec681f3Smrg struct v3dv_render_pass *pass = cmd_buffer->state.pass; 26667ec681f3Smrg if (likely(!pass->multiview_enabled)) { 26677ec681f3Smrg v3dv_cmd_buffer_emit_pre_draw(cmd_buffer); 26687ec681f3Smrg v3dv_X(cmd_buffer->device, cmd_buffer_emit_draw_indirect) 26697ec681f3Smrg (cmd_buffer, buffer, offset, drawCount, stride); 26707ec681f3Smrg return; 26717ec681f3Smrg } 26727ec681f3Smrg 26737ec681f3Smrg uint32_t view_mask = pass->subpasses[cmd_buffer->state.subpass_idx].view_mask; 26747ec681f3Smrg while (view_mask) { 26757ec681f3Smrg cmd_buffer_set_view_index(cmd_buffer, u_bit_scan(&view_mask)); 26767ec681f3Smrg v3dv_cmd_buffer_emit_pre_draw(cmd_buffer); 26777ec681f3Smrg v3dv_X(cmd_buffer->device, cmd_buffer_emit_draw_indirect) 26787ec681f3Smrg (cmd_buffer, buffer, offset, drawCount, stride); 26797ec681f3Smrg } 26807ec681f3Smrg} 26817ec681f3Smrg 26827ec681f3SmrgVKAPI_ATTR void VKAPI_CALL 26837ec681f3Smrgv3dv_CmdDrawIndexedIndirect(VkCommandBuffer commandBuffer, 26847ec681f3Smrg VkBuffer _buffer, 26857ec681f3Smrg VkDeviceSize offset, 26867ec681f3Smrg uint32_t drawCount, 26877ec681f3Smrg uint32_t stride) 26887ec681f3Smrg{ 26897ec681f3Smrg /* drawCount is the number of draws to execute, and can be zero. */ 26907ec681f3Smrg if (drawCount == 0) 26917ec681f3Smrg return; 26927ec681f3Smrg 26937ec681f3Smrg V3DV_FROM_HANDLE(v3dv_cmd_buffer, cmd_buffer, commandBuffer); 26947ec681f3Smrg V3DV_FROM_HANDLE(v3dv_buffer, buffer, _buffer); 26957ec681f3Smrg 26967ec681f3Smrg struct v3dv_render_pass *pass = cmd_buffer->state.pass; 26977ec681f3Smrg if (likely(!pass->multiview_enabled)) { 26987ec681f3Smrg v3dv_cmd_buffer_emit_pre_draw(cmd_buffer); 26997ec681f3Smrg v3dv_X(cmd_buffer->device, cmd_buffer_emit_indexed_indirect) 27007ec681f3Smrg (cmd_buffer, buffer, offset, drawCount, stride); 27017ec681f3Smrg return; 27027ec681f3Smrg } 27037ec681f3Smrg 27047ec681f3Smrg uint32_t view_mask = pass->subpasses[cmd_buffer->state.subpass_idx].view_mask; 27057ec681f3Smrg while (view_mask) { 27067ec681f3Smrg cmd_buffer_set_view_index(cmd_buffer, u_bit_scan(&view_mask)); 27077ec681f3Smrg v3dv_cmd_buffer_emit_pre_draw(cmd_buffer); 27087ec681f3Smrg v3dv_X(cmd_buffer->device, cmd_buffer_emit_indexed_indirect) 27097ec681f3Smrg (cmd_buffer, buffer, offset, drawCount, stride); 27107ec681f3Smrg } 27117ec681f3Smrg} 27127ec681f3Smrg 27137ec681f3SmrgVKAPI_ATTR void VKAPI_CALL 27147ec681f3Smrgv3dv_CmdPipelineBarrier(VkCommandBuffer commandBuffer, 27157ec681f3Smrg VkPipelineStageFlags srcStageMask, 27167ec681f3Smrg VkPipelineStageFlags dstStageMask, 27177ec681f3Smrg VkDependencyFlags dependencyFlags, 27187ec681f3Smrg uint32_t memoryBarrierCount, 27197ec681f3Smrg const VkMemoryBarrier *pMemoryBarriers, 27207ec681f3Smrg uint32_t bufferBarrierCount, 27217ec681f3Smrg const VkBufferMemoryBarrier *pBufferBarriers, 27227ec681f3Smrg uint32_t imageBarrierCount, 27237ec681f3Smrg const VkImageMemoryBarrier *pImageBarriers) 27247ec681f3Smrg{ 27257ec681f3Smrg V3DV_FROM_HANDLE(v3dv_cmd_buffer, cmd_buffer, commandBuffer); 27267ec681f3Smrg 27277ec681f3Smrg /* We only care about barriers between GPU jobs */ 27287ec681f3Smrg if (srcStageMask == VK_PIPELINE_STAGE_HOST_BIT || 27297ec681f3Smrg dstStageMask == VK_PIPELINE_STAGE_HOST_BIT) { 27307ec681f3Smrg return; 27317ec681f3Smrg } 27327ec681f3Smrg 27337ec681f3Smrg /* If we have a recording job, finish it here */ 27347ec681f3Smrg struct v3dv_job *job = cmd_buffer->state.job; 27357ec681f3Smrg if (job) 27367ec681f3Smrg v3dv_cmd_buffer_finish_job(cmd_buffer); 27377ec681f3Smrg 27387ec681f3Smrg cmd_buffer->state.has_barrier = true; 27397ec681f3Smrg if (dstStageMask & (VK_PIPELINE_STAGE_VERTEX_INPUT_BIT | 27407ec681f3Smrg VK_PIPELINE_STAGE_VERTEX_SHADER_BIT | 27417ec681f3Smrg VK_PIPELINE_STAGE_GEOMETRY_SHADER_BIT | 27427ec681f3Smrg VK_PIPELINE_STAGE_TESSELLATION_CONTROL_SHADER_BIT | 27437ec681f3Smrg VK_PIPELINE_STAGE_TESSELLATION_EVALUATION_SHADER_BIT | 27447ec681f3Smrg VK_PIPELINE_STAGE_DRAW_INDIRECT_BIT)) { 27457ec681f3Smrg cmd_buffer->state.has_bcl_barrier = true; 27467ec681f3Smrg } 27477ec681f3Smrg} 27487ec681f3Smrg 27497ec681f3SmrgVKAPI_ATTR void VKAPI_CALL 27507ec681f3Smrgv3dv_CmdBindVertexBuffers(VkCommandBuffer commandBuffer, 27517ec681f3Smrg uint32_t firstBinding, 27527ec681f3Smrg uint32_t bindingCount, 27537ec681f3Smrg const VkBuffer *pBuffers, 27547ec681f3Smrg const VkDeviceSize *pOffsets) 27557ec681f3Smrg{ 27567ec681f3Smrg V3DV_FROM_HANDLE(v3dv_cmd_buffer, cmd_buffer, commandBuffer); 27577ec681f3Smrg struct v3dv_vertex_binding *vb = cmd_buffer->state.vertex_bindings; 27587ec681f3Smrg 27597ec681f3Smrg /* We have to defer setting up vertex buffer since we need the buffer 27607ec681f3Smrg * stride from the pipeline. 27617ec681f3Smrg */ 27627ec681f3Smrg 27637ec681f3Smrg assert(firstBinding + bindingCount <= MAX_VBS); 27647ec681f3Smrg bool vb_state_changed = false; 27657ec681f3Smrg for (uint32_t i = 0; i < bindingCount; i++) { 27667ec681f3Smrg if (vb[firstBinding + i].buffer != v3dv_buffer_from_handle(pBuffers[i])) { 27677ec681f3Smrg vb[firstBinding + i].buffer = v3dv_buffer_from_handle(pBuffers[i]); 27687ec681f3Smrg vb_state_changed = true; 27697ec681f3Smrg } 27707ec681f3Smrg if (vb[firstBinding + i].offset != pOffsets[i]) { 27717ec681f3Smrg vb[firstBinding + i].offset = pOffsets[i]; 27727ec681f3Smrg vb_state_changed = true; 27737ec681f3Smrg } 27747ec681f3Smrg } 27757ec681f3Smrg 27767ec681f3Smrg if (vb_state_changed) 27777ec681f3Smrg cmd_buffer->state.dirty |= V3DV_CMD_DIRTY_VERTEX_BUFFER; 27787ec681f3Smrg} 27797ec681f3Smrg 27807ec681f3Smrgstatic uint32_t 27817ec681f3Smrgget_index_size(VkIndexType index_type) 27827ec681f3Smrg{ 27837ec681f3Smrg switch (index_type) { 27847ec681f3Smrg case VK_INDEX_TYPE_UINT8_EXT: 27857ec681f3Smrg return 1; 27867ec681f3Smrg break; 27877ec681f3Smrg case VK_INDEX_TYPE_UINT16: 27887ec681f3Smrg return 2; 27897ec681f3Smrg break; 27907ec681f3Smrg case VK_INDEX_TYPE_UINT32: 27917ec681f3Smrg return 4; 27927ec681f3Smrg break; 27937ec681f3Smrg default: 27947ec681f3Smrg unreachable("Unsupported index type"); 27957ec681f3Smrg } 27967ec681f3Smrg} 27977ec681f3Smrg 27987ec681f3SmrgVKAPI_ATTR void VKAPI_CALL 27997ec681f3Smrgv3dv_CmdBindIndexBuffer(VkCommandBuffer commandBuffer, 28007ec681f3Smrg VkBuffer buffer, 28017ec681f3Smrg VkDeviceSize offset, 28027ec681f3Smrg VkIndexType indexType) 28037ec681f3Smrg{ 28047ec681f3Smrg V3DV_FROM_HANDLE(v3dv_cmd_buffer, cmd_buffer, commandBuffer); 28057ec681f3Smrg 28067ec681f3Smrg const uint32_t index_size = get_index_size(indexType); 28077ec681f3Smrg if (buffer == cmd_buffer->state.index_buffer.buffer && 28087ec681f3Smrg offset == cmd_buffer->state.index_buffer.offset && 28097ec681f3Smrg index_size == cmd_buffer->state.index_buffer.index_size) { 28107ec681f3Smrg return; 28117ec681f3Smrg } 28127ec681f3Smrg 28137ec681f3Smrg cmd_buffer->state.index_buffer.buffer = buffer; 28147ec681f3Smrg cmd_buffer->state.index_buffer.offset = offset; 28157ec681f3Smrg cmd_buffer->state.index_buffer.index_size = index_size; 28167ec681f3Smrg cmd_buffer->state.dirty |= V3DV_CMD_DIRTY_INDEX_BUFFER; 28177ec681f3Smrg} 28187ec681f3Smrg 28197ec681f3SmrgVKAPI_ATTR void VKAPI_CALL 28207ec681f3Smrgv3dv_CmdSetStencilCompareMask(VkCommandBuffer commandBuffer, 28217ec681f3Smrg VkStencilFaceFlags faceMask, 28227ec681f3Smrg uint32_t compareMask) 28237ec681f3Smrg{ 28247ec681f3Smrg V3DV_FROM_HANDLE(v3dv_cmd_buffer, cmd_buffer, commandBuffer); 28257ec681f3Smrg 28267ec681f3Smrg if (faceMask & VK_STENCIL_FACE_FRONT_BIT) 28277ec681f3Smrg cmd_buffer->state.dynamic.stencil_compare_mask.front = compareMask & 0xff; 28287ec681f3Smrg if (faceMask & VK_STENCIL_FACE_BACK_BIT) 28297ec681f3Smrg cmd_buffer->state.dynamic.stencil_compare_mask.back = compareMask & 0xff; 28307ec681f3Smrg 28317ec681f3Smrg cmd_buffer->state.dirty |= V3DV_CMD_DIRTY_STENCIL_COMPARE_MASK; 28327ec681f3Smrg} 28337ec681f3Smrg 28347ec681f3SmrgVKAPI_ATTR void VKAPI_CALL 28357ec681f3Smrgv3dv_CmdSetStencilWriteMask(VkCommandBuffer commandBuffer, 28367ec681f3Smrg VkStencilFaceFlags faceMask, 28377ec681f3Smrg uint32_t writeMask) 28387ec681f3Smrg{ 28397ec681f3Smrg V3DV_FROM_HANDLE(v3dv_cmd_buffer, cmd_buffer, commandBuffer); 28407ec681f3Smrg 28417ec681f3Smrg if (faceMask & VK_STENCIL_FACE_FRONT_BIT) 28427ec681f3Smrg cmd_buffer->state.dynamic.stencil_write_mask.front = writeMask & 0xff; 28437ec681f3Smrg if (faceMask & VK_STENCIL_FACE_BACK_BIT) 28447ec681f3Smrg cmd_buffer->state.dynamic.stencil_write_mask.back = writeMask & 0xff; 28457ec681f3Smrg 28467ec681f3Smrg cmd_buffer->state.dirty |= V3DV_CMD_DIRTY_STENCIL_WRITE_MASK; 28477ec681f3Smrg} 28487ec681f3Smrg 28497ec681f3SmrgVKAPI_ATTR void VKAPI_CALL 28507ec681f3Smrgv3dv_CmdSetStencilReference(VkCommandBuffer commandBuffer, 28517ec681f3Smrg VkStencilFaceFlags faceMask, 28527ec681f3Smrg uint32_t reference) 28537ec681f3Smrg{ 28547ec681f3Smrg V3DV_FROM_HANDLE(v3dv_cmd_buffer, cmd_buffer, commandBuffer); 28557ec681f3Smrg 28567ec681f3Smrg if (faceMask & VK_STENCIL_FACE_FRONT_BIT) 28577ec681f3Smrg cmd_buffer->state.dynamic.stencil_reference.front = reference & 0xff; 28587ec681f3Smrg if (faceMask & VK_STENCIL_FACE_BACK_BIT) 28597ec681f3Smrg cmd_buffer->state.dynamic.stencil_reference.back = reference & 0xff; 28607ec681f3Smrg 28617ec681f3Smrg cmd_buffer->state.dirty |= V3DV_CMD_DIRTY_STENCIL_REFERENCE; 28627ec681f3Smrg} 28637ec681f3Smrg 28647ec681f3SmrgVKAPI_ATTR void VKAPI_CALL 28657ec681f3Smrgv3dv_CmdSetDepthBias(VkCommandBuffer commandBuffer, 28667ec681f3Smrg float depthBiasConstantFactor, 28677ec681f3Smrg float depthBiasClamp, 28687ec681f3Smrg float depthBiasSlopeFactor) 28697ec681f3Smrg{ 28707ec681f3Smrg V3DV_FROM_HANDLE(v3dv_cmd_buffer, cmd_buffer, commandBuffer); 28717ec681f3Smrg 28727ec681f3Smrg cmd_buffer->state.dynamic.depth_bias.constant_factor = depthBiasConstantFactor; 28737ec681f3Smrg cmd_buffer->state.dynamic.depth_bias.depth_bias_clamp = depthBiasClamp; 28747ec681f3Smrg cmd_buffer->state.dynamic.depth_bias.slope_factor = depthBiasSlopeFactor; 28757ec681f3Smrg cmd_buffer->state.dirty |= V3DV_CMD_DIRTY_DEPTH_BIAS; 28767ec681f3Smrg} 28777ec681f3Smrg 28787ec681f3SmrgVKAPI_ATTR void VKAPI_CALL 28797ec681f3Smrgv3dv_CmdSetDepthBounds(VkCommandBuffer commandBuffer, 28807ec681f3Smrg float minDepthBounds, 28817ec681f3Smrg float maxDepthBounds) 28827ec681f3Smrg{ 28837ec681f3Smrg /* We do not support depth bounds testing so we just ingore this. We are 28847ec681f3Smrg * already asserting that pipelines don't enable the feature anyway. 28857ec681f3Smrg */ 28867ec681f3Smrg} 28877ec681f3Smrg 28887ec681f3SmrgVKAPI_ATTR void VKAPI_CALL 28897ec681f3Smrgv3dv_CmdSetLineWidth(VkCommandBuffer commandBuffer, 28907ec681f3Smrg float lineWidth) 28917ec681f3Smrg{ 28927ec681f3Smrg V3DV_FROM_HANDLE(v3dv_cmd_buffer, cmd_buffer, commandBuffer); 28937ec681f3Smrg 28947ec681f3Smrg cmd_buffer->state.dynamic.line_width = lineWidth; 28957ec681f3Smrg cmd_buffer->state.dirty |= V3DV_CMD_DIRTY_LINE_WIDTH; 28967ec681f3Smrg} 28977ec681f3Smrg 28987ec681f3SmrgVKAPI_ATTR void VKAPI_CALL 28997ec681f3Smrgv3dv_CmdBindDescriptorSets(VkCommandBuffer commandBuffer, 29007ec681f3Smrg VkPipelineBindPoint pipelineBindPoint, 29017ec681f3Smrg VkPipelineLayout _layout, 29027ec681f3Smrg uint32_t firstSet, 29037ec681f3Smrg uint32_t descriptorSetCount, 29047ec681f3Smrg const VkDescriptorSet *pDescriptorSets, 29057ec681f3Smrg uint32_t dynamicOffsetCount, 29067ec681f3Smrg const uint32_t *pDynamicOffsets) 29077ec681f3Smrg{ 29087ec681f3Smrg V3DV_FROM_HANDLE(v3dv_cmd_buffer, cmd_buffer, commandBuffer); 29097ec681f3Smrg V3DV_FROM_HANDLE(v3dv_pipeline_layout, layout, _layout); 29107ec681f3Smrg 29117ec681f3Smrg uint32_t dyn_index = 0; 29127ec681f3Smrg 29137ec681f3Smrg assert(firstSet + descriptorSetCount <= MAX_SETS); 29147ec681f3Smrg 29157ec681f3Smrg struct v3dv_descriptor_state *descriptor_state = 29167ec681f3Smrg pipelineBindPoint == VK_PIPELINE_BIND_POINT_COMPUTE ? 29177ec681f3Smrg &cmd_buffer->state.compute.descriptor_state : 29187ec681f3Smrg &cmd_buffer->state.gfx.descriptor_state; 29197ec681f3Smrg 29207ec681f3Smrg VkShaderStageFlags dirty_stages = 0; 29217ec681f3Smrg bool descriptor_state_changed = false; 29227ec681f3Smrg for (uint32_t i = 0; i < descriptorSetCount; i++) { 29237ec681f3Smrg V3DV_FROM_HANDLE(v3dv_descriptor_set, set, pDescriptorSets[i]); 29247ec681f3Smrg uint32_t index = firstSet + i; 29257ec681f3Smrg 29267ec681f3Smrg descriptor_state->valid |= (1u << index); 29277ec681f3Smrg if (descriptor_state->descriptor_sets[index] != set) { 29287ec681f3Smrg descriptor_state->descriptor_sets[index] = set; 29297ec681f3Smrg dirty_stages |= set->layout->shader_stages; 29307ec681f3Smrg descriptor_state_changed = true; 29317ec681f3Smrg } 29327ec681f3Smrg 29337ec681f3Smrg for (uint32_t j = 0; j < set->layout->dynamic_offset_count; j++, dyn_index++) { 29347ec681f3Smrg uint32_t idx = j + layout->set[i + firstSet].dynamic_offset_start; 29357ec681f3Smrg 29367ec681f3Smrg if (descriptor_state->dynamic_offsets[idx] != pDynamicOffsets[dyn_index]) { 29377ec681f3Smrg descriptor_state->dynamic_offsets[idx] = pDynamicOffsets[dyn_index]; 29387ec681f3Smrg dirty_stages |= set->layout->shader_stages; 29397ec681f3Smrg descriptor_state_changed = true; 29407ec681f3Smrg } 29417ec681f3Smrg } 29427ec681f3Smrg } 29437ec681f3Smrg 29447ec681f3Smrg if (descriptor_state_changed) { 29457ec681f3Smrg if (pipelineBindPoint == VK_PIPELINE_BIND_POINT_GRAPHICS) { 29467ec681f3Smrg cmd_buffer->state.dirty |= V3DV_CMD_DIRTY_DESCRIPTOR_SETS; 29477ec681f3Smrg cmd_buffer->state.dirty_descriptor_stages |= dirty_stages & VK_SHADER_STAGE_ALL_GRAPHICS; 29487ec681f3Smrg } else { 29497ec681f3Smrg cmd_buffer->state.dirty |= V3DV_CMD_DIRTY_COMPUTE_DESCRIPTOR_SETS; 29507ec681f3Smrg cmd_buffer->state.dirty_descriptor_stages |= VK_SHADER_STAGE_COMPUTE_BIT; 29517ec681f3Smrg } 29527ec681f3Smrg } 29537ec681f3Smrg} 29547ec681f3Smrg 29557ec681f3SmrgVKAPI_ATTR void VKAPI_CALL 29567ec681f3Smrgv3dv_CmdPushConstants(VkCommandBuffer commandBuffer, 29577ec681f3Smrg VkPipelineLayout layout, 29587ec681f3Smrg VkShaderStageFlags stageFlags, 29597ec681f3Smrg uint32_t offset, 29607ec681f3Smrg uint32_t size, 29617ec681f3Smrg const void *pValues) 29627ec681f3Smrg{ 29637ec681f3Smrg V3DV_FROM_HANDLE(v3dv_cmd_buffer, cmd_buffer, commandBuffer); 29647ec681f3Smrg 29657ec681f3Smrg if (!memcmp((uint8_t *) cmd_buffer->push_constants_data + offset, pValues, size)) 29667ec681f3Smrg return; 29677ec681f3Smrg 29687ec681f3Smrg memcpy((uint8_t *) cmd_buffer->push_constants_data + offset, pValues, size); 29697ec681f3Smrg 29707ec681f3Smrg cmd_buffer->state.dirty |= V3DV_CMD_DIRTY_PUSH_CONSTANTS; 29717ec681f3Smrg cmd_buffer->state.dirty_push_constants_stages |= stageFlags; 29727ec681f3Smrg} 29737ec681f3Smrg 29747ec681f3SmrgVKAPI_ATTR void VKAPI_CALL 29757ec681f3Smrgv3dv_CmdSetBlendConstants(VkCommandBuffer commandBuffer, 29767ec681f3Smrg const float blendConstants[4]) 29777ec681f3Smrg{ 29787ec681f3Smrg V3DV_FROM_HANDLE(v3dv_cmd_buffer, cmd_buffer, commandBuffer); 29797ec681f3Smrg struct v3dv_cmd_buffer_state *state = &cmd_buffer->state; 29807ec681f3Smrg 29817ec681f3Smrg if (!memcmp(state->dynamic.blend_constants, blendConstants, 29827ec681f3Smrg sizeof(state->dynamic.blend_constants))) { 29837ec681f3Smrg return; 29847ec681f3Smrg } 29857ec681f3Smrg 29867ec681f3Smrg memcpy(state->dynamic.blend_constants, blendConstants, 29877ec681f3Smrg sizeof(state->dynamic.blend_constants)); 29887ec681f3Smrg 29897ec681f3Smrg cmd_buffer->state.dirty |= V3DV_CMD_DIRTY_BLEND_CONSTANTS; 29907ec681f3Smrg} 29917ec681f3Smrg 29927ec681f3SmrgVKAPI_ATTR void VKAPI_CALL 29937ec681f3Smrgv3dv_CmdSetColorWriteEnableEXT(VkCommandBuffer commandBuffer, 29947ec681f3Smrg uint32_t attachmentCount, 29957ec681f3Smrg const VkBool32 *pColorWriteEnables) 29967ec681f3Smrg{ 29977ec681f3Smrg V3DV_FROM_HANDLE(v3dv_cmd_buffer, cmd_buffer, commandBuffer); 29987ec681f3Smrg struct v3dv_cmd_buffer_state *state = &cmd_buffer->state; 29997ec681f3Smrg uint32_t color_write_enable = 0; 30007ec681f3Smrg 30017ec681f3Smrg for (uint32_t i = 0; i < attachmentCount; i++) 30027ec681f3Smrg color_write_enable |= pColorWriteEnables[i] ? (0xfu << (i * 4)) : 0; 30037ec681f3Smrg 30047ec681f3Smrg if (state->dynamic.color_write_enable == color_write_enable) 30057ec681f3Smrg return; 30067ec681f3Smrg 30077ec681f3Smrg state->dynamic.color_write_enable = color_write_enable; 30087ec681f3Smrg 30097ec681f3Smrg state->dirty |= V3DV_CMD_DIRTY_COLOR_WRITE_ENABLE; 30107ec681f3Smrg} 30117ec681f3Smrg 30127ec681f3Smrgvoid 30137ec681f3Smrgv3dv_cmd_buffer_reset_queries(struct v3dv_cmd_buffer *cmd_buffer, 30147ec681f3Smrg struct v3dv_query_pool *pool, 30157ec681f3Smrg uint32_t first, 30167ec681f3Smrg uint32_t count) 30177ec681f3Smrg{ 30187ec681f3Smrg /* Resets can only happen outside a render pass instance so we should not 30197ec681f3Smrg * be in the middle of job recording. 30207ec681f3Smrg */ 30217ec681f3Smrg assert(cmd_buffer->state.pass == NULL); 30227ec681f3Smrg assert(cmd_buffer->state.job == NULL); 30237ec681f3Smrg 30247ec681f3Smrg assert(first < pool->query_count); 30257ec681f3Smrg assert(first + count <= pool->query_count); 30267ec681f3Smrg 30277ec681f3Smrg struct v3dv_job *job = 30287ec681f3Smrg v3dv_cmd_buffer_create_cpu_job(cmd_buffer->device, 30297ec681f3Smrg V3DV_JOB_TYPE_CPU_RESET_QUERIES, 30307ec681f3Smrg cmd_buffer, -1); 30317ec681f3Smrg v3dv_return_if_oom(cmd_buffer, NULL); 30327ec681f3Smrg 30337ec681f3Smrg job->cpu.query_reset.pool = pool; 30347ec681f3Smrg job->cpu.query_reset.first = first; 30357ec681f3Smrg job->cpu.query_reset.count = count; 30367ec681f3Smrg 30377ec681f3Smrg list_addtail(&job->list_link, &cmd_buffer->jobs); 30387ec681f3Smrg} 30397ec681f3Smrg 30407ec681f3Smrgvoid 30417ec681f3Smrgv3dv_cmd_buffer_ensure_array_state(struct v3dv_cmd_buffer *cmd_buffer, 30427ec681f3Smrg uint32_t slot_size, 30437ec681f3Smrg uint32_t used_count, 30447ec681f3Smrg uint32_t *alloc_count, 30457ec681f3Smrg void **ptr) 30467ec681f3Smrg{ 30477ec681f3Smrg if (used_count >= *alloc_count) { 30487ec681f3Smrg const uint32_t prev_slot_count = *alloc_count; 30497ec681f3Smrg void *old_buffer = *ptr; 30507ec681f3Smrg 30517ec681f3Smrg const uint32_t new_slot_count = MAX2(*alloc_count * 2, 4); 30527ec681f3Smrg const uint32_t bytes = new_slot_count * slot_size; 30537ec681f3Smrg *ptr = vk_alloc(&cmd_buffer->device->vk.alloc, bytes, 8, 30547ec681f3Smrg VK_SYSTEM_ALLOCATION_SCOPE_COMMAND); 30557ec681f3Smrg if (*ptr == NULL) { 30567ec681f3Smrg fprintf(stderr, "Error: failed to allocate CPU buffer for query.\n"); 30577ec681f3Smrg v3dv_flag_oom(cmd_buffer, NULL); 30587ec681f3Smrg return; 30597ec681f3Smrg } 30607ec681f3Smrg 30617ec681f3Smrg memcpy(*ptr, old_buffer, prev_slot_count * slot_size); 30627ec681f3Smrg *alloc_count = new_slot_count; 30637ec681f3Smrg } 30647ec681f3Smrg assert(used_count < *alloc_count); 30657ec681f3Smrg} 30667ec681f3Smrg 30677ec681f3Smrgvoid 30687ec681f3Smrgv3dv_cmd_buffer_begin_query(struct v3dv_cmd_buffer *cmd_buffer, 30697ec681f3Smrg struct v3dv_query_pool *pool, 30707ec681f3Smrg uint32_t query, 30717ec681f3Smrg VkQueryControlFlags flags) 30727ec681f3Smrg{ 30737ec681f3Smrg /* FIXME: we only support one active query for now */ 30747ec681f3Smrg assert(cmd_buffer->state.query.active_query.bo == NULL); 30757ec681f3Smrg assert(query < pool->query_count); 30767ec681f3Smrg 30777ec681f3Smrg cmd_buffer->state.query.active_query.bo = pool->queries[query].bo; 30787ec681f3Smrg cmd_buffer->state.query.active_query.offset = pool->queries[query].offset; 30797ec681f3Smrg cmd_buffer->state.dirty |= V3DV_CMD_DIRTY_OCCLUSION_QUERY; 30807ec681f3Smrg} 30817ec681f3Smrg 30827ec681f3Smrgvoid 30837ec681f3Smrgv3dv_cmd_buffer_end_query(struct v3dv_cmd_buffer *cmd_buffer, 30847ec681f3Smrg struct v3dv_query_pool *pool, 30857ec681f3Smrg uint32_t query) 30867ec681f3Smrg{ 30877ec681f3Smrg assert(query < pool->query_count); 30887ec681f3Smrg assert(cmd_buffer->state.query.active_query.bo != NULL); 30897ec681f3Smrg 30907ec681f3Smrg if (cmd_buffer->state.pass) { 30917ec681f3Smrg /* Queue the EndQuery in the command buffer state, we will create a CPU 30927ec681f3Smrg * job to flag all of these queries as possibly available right after the 30937ec681f3Smrg * render pass job in which they have been recorded. 30947ec681f3Smrg */ 30957ec681f3Smrg struct v3dv_cmd_buffer_state *state = &cmd_buffer->state; 30967ec681f3Smrg v3dv_cmd_buffer_ensure_array_state(cmd_buffer, 30977ec681f3Smrg sizeof(struct v3dv_end_query_cpu_job_info), 30987ec681f3Smrg state->query.end.used_count, 30997ec681f3Smrg &state->query.end.alloc_count, 31007ec681f3Smrg (void **) &state->query.end.states); 31017ec681f3Smrg v3dv_return_if_oom(cmd_buffer, NULL); 31027ec681f3Smrg 31037ec681f3Smrg struct v3dv_end_query_cpu_job_info *info = 31047ec681f3Smrg &state->query.end.states[state->query.end.used_count++]; 31057ec681f3Smrg 31067ec681f3Smrg info->pool = pool; 31077ec681f3Smrg info->query = query; 31087ec681f3Smrg 31097ec681f3Smrg /* From the Vulkan spec: 31107ec681f3Smrg * 31117ec681f3Smrg * "If queries are used while executing a render pass instance that has 31127ec681f3Smrg * multiview enabled, the query uses N consecutive query indices in 31137ec681f3Smrg * the query pool (starting at query) where N is the number of bits set 31147ec681f3Smrg * in the view mask in the subpass the query is used in. How the 31157ec681f3Smrg * numerical results of the query are distributed among the queries is 31167ec681f3Smrg * implementation-dependent." 31177ec681f3Smrg * 31187ec681f3Smrg * In our case, only the first query is used but this means we still need 31197ec681f3Smrg * to flag the other queries as available so we don't emit errors when 31207ec681f3Smrg * the applications attempt to retrive values from them. 31217ec681f3Smrg */ 31227ec681f3Smrg struct v3dv_render_pass *pass = cmd_buffer->state.pass; 31237ec681f3Smrg if (!pass->multiview_enabled) { 31247ec681f3Smrg info->count = 1; 31257ec681f3Smrg } else { 31267ec681f3Smrg struct v3dv_subpass *subpass = &pass->subpasses[state->subpass_idx]; 31277ec681f3Smrg info->count = util_bitcount(subpass->view_mask); 31287ec681f3Smrg } 31297ec681f3Smrg } else { 31307ec681f3Smrg /* Otherwise, schedule the CPU job immediately */ 31317ec681f3Smrg struct v3dv_job *job = 31327ec681f3Smrg v3dv_cmd_buffer_create_cpu_job(cmd_buffer->device, 31337ec681f3Smrg V3DV_JOB_TYPE_CPU_END_QUERY, 31347ec681f3Smrg cmd_buffer, -1); 31357ec681f3Smrg v3dv_return_if_oom(cmd_buffer, NULL); 31367ec681f3Smrg 31377ec681f3Smrg job->cpu.query_end.pool = pool; 31387ec681f3Smrg job->cpu.query_end.query = query; 31397ec681f3Smrg 31407ec681f3Smrg /* Multiview queries cannot cross subpass boundaries */ 31417ec681f3Smrg job->cpu.query_end.count = 1; 31427ec681f3Smrg 31437ec681f3Smrg list_addtail(&job->list_link, &cmd_buffer->jobs); 31447ec681f3Smrg } 31457ec681f3Smrg 31467ec681f3Smrg cmd_buffer->state.query.active_query.bo = NULL; 31477ec681f3Smrg cmd_buffer->state.dirty |= V3DV_CMD_DIRTY_OCCLUSION_QUERY; 31487ec681f3Smrg} 31497ec681f3Smrg 31507ec681f3Smrgvoid 31517ec681f3Smrgv3dv_cmd_buffer_copy_query_results(struct v3dv_cmd_buffer *cmd_buffer, 31527ec681f3Smrg struct v3dv_query_pool *pool, 31537ec681f3Smrg uint32_t first, 31547ec681f3Smrg uint32_t count, 31557ec681f3Smrg struct v3dv_buffer *dst, 31567ec681f3Smrg uint32_t offset, 31577ec681f3Smrg uint32_t stride, 31587ec681f3Smrg VkQueryResultFlags flags) 31597ec681f3Smrg{ 31607ec681f3Smrg /* Copies can only happen outside a render pass instance so we should not 31617ec681f3Smrg * be in the middle of job recording. 31627ec681f3Smrg */ 31637ec681f3Smrg assert(cmd_buffer->state.pass == NULL); 31647ec681f3Smrg assert(cmd_buffer->state.job == NULL); 31657ec681f3Smrg 31667ec681f3Smrg assert(first < pool->query_count); 31677ec681f3Smrg assert(first + count <= pool->query_count); 31687ec681f3Smrg 31697ec681f3Smrg struct v3dv_job *job = 31707ec681f3Smrg v3dv_cmd_buffer_create_cpu_job(cmd_buffer->device, 31717ec681f3Smrg V3DV_JOB_TYPE_CPU_COPY_QUERY_RESULTS, 31727ec681f3Smrg cmd_buffer, -1); 31737ec681f3Smrg v3dv_return_if_oom(cmd_buffer, NULL); 31747ec681f3Smrg 31757ec681f3Smrg job->cpu.query_copy_results.pool = pool; 31767ec681f3Smrg job->cpu.query_copy_results.first = first; 31777ec681f3Smrg job->cpu.query_copy_results.count = count; 31787ec681f3Smrg job->cpu.query_copy_results.dst = dst; 31797ec681f3Smrg job->cpu.query_copy_results.offset = offset; 31807ec681f3Smrg job->cpu.query_copy_results.stride = stride; 31817ec681f3Smrg job->cpu.query_copy_results.flags = flags; 31827ec681f3Smrg 31837ec681f3Smrg list_addtail(&job->list_link, &cmd_buffer->jobs); 31847ec681f3Smrg} 31857ec681f3Smrg 31867ec681f3Smrgvoid 31877ec681f3Smrgv3dv_cmd_buffer_add_tfu_job(struct v3dv_cmd_buffer *cmd_buffer, 31887ec681f3Smrg struct drm_v3d_submit_tfu *tfu) 31897ec681f3Smrg{ 31907ec681f3Smrg struct v3dv_device *device = cmd_buffer->device; 31917ec681f3Smrg struct v3dv_job *job = vk_zalloc(&device->vk.alloc, 31927ec681f3Smrg sizeof(struct v3dv_job), 8, 31937ec681f3Smrg VK_SYSTEM_ALLOCATION_SCOPE_COMMAND); 31947ec681f3Smrg if (!job) { 31957ec681f3Smrg v3dv_flag_oom(cmd_buffer, NULL); 31967ec681f3Smrg return; 31977ec681f3Smrg } 31987ec681f3Smrg 31997ec681f3Smrg v3dv_job_init(job, V3DV_JOB_TYPE_GPU_TFU, device, cmd_buffer, -1); 32007ec681f3Smrg job->tfu = *tfu; 32017ec681f3Smrg list_addtail(&job->list_link, &cmd_buffer->jobs); 32027ec681f3Smrg} 32037ec681f3Smrg 32047ec681f3SmrgVKAPI_ATTR void VKAPI_CALL 32057ec681f3Smrgv3dv_CmdSetEvent(VkCommandBuffer commandBuffer, 32067ec681f3Smrg VkEvent _event, 32077ec681f3Smrg VkPipelineStageFlags stageMask) 32087ec681f3Smrg{ 32097ec681f3Smrg V3DV_FROM_HANDLE(v3dv_cmd_buffer, cmd_buffer, commandBuffer); 32107ec681f3Smrg V3DV_FROM_HANDLE(v3dv_event, event, _event); 32117ec681f3Smrg 32127ec681f3Smrg /* Event (re)sets can only happen outside a render pass instance so we 32137ec681f3Smrg * should not be in the middle of job recording. 32147ec681f3Smrg */ 32157ec681f3Smrg assert(cmd_buffer->state.pass == NULL); 32167ec681f3Smrg assert(cmd_buffer->state.job == NULL); 32177ec681f3Smrg 32187ec681f3Smrg struct v3dv_job *job = 32197ec681f3Smrg v3dv_cmd_buffer_create_cpu_job(cmd_buffer->device, 32207ec681f3Smrg V3DV_JOB_TYPE_CPU_SET_EVENT, 32217ec681f3Smrg cmd_buffer, -1); 32227ec681f3Smrg v3dv_return_if_oom(cmd_buffer, NULL); 32237ec681f3Smrg 32247ec681f3Smrg job->cpu.event_set.event = event; 32257ec681f3Smrg job->cpu.event_set.state = 1; 32267ec681f3Smrg 32277ec681f3Smrg list_addtail(&job->list_link, &cmd_buffer->jobs); 32287ec681f3Smrg} 32297ec681f3Smrg 32307ec681f3SmrgVKAPI_ATTR void VKAPI_CALL 32317ec681f3Smrgv3dv_CmdResetEvent(VkCommandBuffer commandBuffer, 32327ec681f3Smrg VkEvent _event, 32337ec681f3Smrg VkPipelineStageFlags stageMask) 32347ec681f3Smrg{ 32357ec681f3Smrg V3DV_FROM_HANDLE(v3dv_cmd_buffer, cmd_buffer, commandBuffer); 32367ec681f3Smrg V3DV_FROM_HANDLE(v3dv_event, event, _event); 32377ec681f3Smrg 32387ec681f3Smrg /* Event (re)sets can only happen outside a render pass instance so we 32397ec681f3Smrg * should not be in the middle of job recording. 32407ec681f3Smrg */ 32417ec681f3Smrg assert(cmd_buffer->state.pass == NULL); 32427ec681f3Smrg assert(cmd_buffer->state.job == NULL); 32437ec681f3Smrg 32447ec681f3Smrg struct v3dv_job *job = 32457ec681f3Smrg v3dv_cmd_buffer_create_cpu_job(cmd_buffer->device, 32467ec681f3Smrg V3DV_JOB_TYPE_CPU_SET_EVENT, 32477ec681f3Smrg cmd_buffer, -1); 32487ec681f3Smrg v3dv_return_if_oom(cmd_buffer, NULL); 32497ec681f3Smrg 32507ec681f3Smrg job->cpu.event_set.event = event; 32517ec681f3Smrg job->cpu.event_set.state = 0; 32527ec681f3Smrg 32537ec681f3Smrg list_addtail(&job->list_link, &cmd_buffer->jobs); 32547ec681f3Smrg} 32557ec681f3Smrg 32567ec681f3SmrgVKAPI_ATTR void VKAPI_CALL 32577ec681f3Smrgv3dv_CmdWaitEvents(VkCommandBuffer commandBuffer, 32587ec681f3Smrg uint32_t eventCount, 32597ec681f3Smrg const VkEvent *pEvents, 32607ec681f3Smrg VkPipelineStageFlags srcStageMask, 32617ec681f3Smrg VkPipelineStageFlags dstStageMask, 32627ec681f3Smrg uint32_t memoryBarrierCount, 32637ec681f3Smrg const VkMemoryBarrier *pMemoryBarriers, 32647ec681f3Smrg uint32_t bufferMemoryBarrierCount, 32657ec681f3Smrg const VkBufferMemoryBarrier *pBufferMemoryBarriers, 32667ec681f3Smrg uint32_t imageMemoryBarrierCount, 32677ec681f3Smrg const VkImageMemoryBarrier *pImageMemoryBarriers) 32687ec681f3Smrg{ 32697ec681f3Smrg V3DV_FROM_HANDLE(v3dv_cmd_buffer, cmd_buffer, commandBuffer); 32707ec681f3Smrg 32717ec681f3Smrg assert(eventCount > 0); 32727ec681f3Smrg 32737ec681f3Smrg struct v3dv_job *job = 32747ec681f3Smrg v3dv_cmd_buffer_create_cpu_job(cmd_buffer->device, 32757ec681f3Smrg V3DV_JOB_TYPE_CPU_WAIT_EVENTS, 32767ec681f3Smrg cmd_buffer, -1); 32777ec681f3Smrg v3dv_return_if_oom(cmd_buffer, NULL); 32787ec681f3Smrg 32797ec681f3Smrg const uint32_t event_list_size = sizeof(struct v3dv_event *) * eventCount; 32807ec681f3Smrg 32817ec681f3Smrg job->cpu.event_wait.events = 32827ec681f3Smrg vk_alloc(&cmd_buffer->device->vk.alloc, event_list_size, 8, 32837ec681f3Smrg VK_SYSTEM_ALLOCATION_SCOPE_COMMAND); 32847ec681f3Smrg if (!job->cpu.event_wait.events) { 32857ec681f3Smrg v3dv_flag_oom(cmd_buffer, NULL); 32867ec681f3Smrg return; 32877ec681f3Smrg } 32887ec681f3Smrg job->cpu.event_wait.event_count = eventCount; 32897ec681f3Smrg 32907ec681f3Smrg for (uint32_t i = 0; i < eventCount; i++) 32917ec681f3Smrg job->cpu.event_wait.events[i] = v3dv_event_from_handle(pEvents[i]); 32927ec681f3Smrg 32937ec681f3Smrg /* vkCmdWaitEvents can be recorded inside a render pass, so we might have 32947ec681f3Smrg * an active job. 32957ec681f3Smrg * 32967ec681f3Smrg * If we are inside a render pass, because we vkCmd(Re)SetEvent can't happen 32977ec681f3Smrg * inside a render pass, it is safe to move the wait job so it happens right 32987ec681f3Smrg * before the current job we are currently recording for the subpass, if any 32997ec681f3Smrg * (it would actually be safe to move it all the way back to right before 33007ec681f3Smrg * the start of the render pass). 33017ec681f3Smrg * 33027ec681f3Smrg * If we are outside a render pass then we should not have any on-going job 33037ec681f3Smrg * and we are free to just add the wait job without restrictions. 33047ec681f3Smrg */ 33057ec681f3Smrg assert(cmd_buffer->state.pass || !cmd_buffer->state.job); 33067ec681f3Smrg list_addtail(&job->list_link, &cmd_buffer->jobs); 33077ec681f3Smrg} 33087ec681f3Smrg 33097ec681f3SmrgVKAPI_ATTR void VKAPI_CALL 33107ec681f3Smrgv3dv_CmdWriteTimestamp(VkCommandBuffer commandBuffer, 33117ec681f3Smrg VkPipelineStageFlagBits pipelineStage, 33127ec681f3Smrg VkQueryPool queryPool, 33137ec681f3Smrg uint32_t query) 33147ec681f3Smrg{ 33157ec681f3Smrg V3DV_FROM_HANDLE(v3dv_cmd_buffer, cmd_buffer, commandBuffer); 33167ec681f3Smrg V3DV_FROM_HANDLE(v3dv_query_pool, query_pool, queryPool); 33177ec681f3Smrg 33187ec681f3Smrg /* If this is called inside a render pass we need to finish the current 33197ec681f3Smrg * job here... 33207ec681f3Smrg */ 33217ec681f3Smrg struct v3dv_render_pass *pass = cmd_buffer->state.pass; 33227ec681f3Smrg if (pass) 33237ec681f3Smrg v3dv_cmd_buffer_finish_job(cmd_buffer); 33247ec681f3Smrg 33257ec681f3Smrg struct v3dv_job *job = 33267ec681f3Smrg v3dv_cmd_buffer_create_cpu_job(cmd_buffer->device, 33277ec681f3Smrg V3DV_JOB_TYPE_CPU_TIMESTAMP_QUERY, 33287ec681f3Smrg cmd_buffer, -1); 33297ec681f3Smrg v3dv_return_if_oom(cmd_buffer, NULL); 33307ec681f3Smrg 33317ec681f3Smrg job->cpu.query_timestamp.pool = query_pool; 33327ec681f3Smrg job->cpu.query_timestamp.query = query; 33337ec681f3Smrg 33347ec681f3Smrg if (!pass || !pass->multiview_enabled) { 33357ec681f3Smrg job->cpu.query_timestamp.count = 1; 33367ec681f3Smrg } else { 33377ec681f3Smrg struct v3dv_subpass *subpass = 33387ec681f3Smrg &pass->subpasses[cmd_buffer->state.subpass_idx]; 33397ec681f3Smrg job->cpu.query_timestamp.count = util_bitcount(subpass->view_mask); 33407ec681f3Smrg } 33417ec681f3Smrg 33427ec681f3Smrg list_addtail(&job->list_link, &cmd_buffer->jobs); 33437ec681f3Smrg cmd_buffer->state.job = NULL; 33447ec681f3Smrg 33457ec681f3Smrg /* ...and resume the subpass after the timestamp */ 33467ec681f3Smrg if (cmd_buffer->state.pass) 33477ec681f3Smrg v3dv_cmd_buffer_subpass_resume(cmd_buffer, cmd_buffer->state.subpass_idx); 33487ec681f3Smrg} 33497ec681f3Smrg 33507ec681f3Smrgstatic void 33517ec681f3Smrgcmd_buffer_emit_pre_dispatch(struct v3dv_cmd_buffer *cmd_buffer) 33527ec681f3Smrg{ 33537ec681f3Smrg assert(cmd_buffer->state.compute.pipeline); 33547ec681f3Smrg assert(cmd_buffer->state.compute.pipeline->active_stages == 33557ec681f3Smrg VK_SHADER_STAGE_COMPUTE_BIT); 33567ec681f3Smrg 33577ec681f3Smrg cmd_buffer->state.dirty &= ~(V3DV_CMD_DIRTY_COMPUTE_PIPELINE | 33587ec681f3Smrg V3DV_CMD_DIRTY_COMPUTE_DESCRIPTOR_SETS); 33597ec681f3Smrg cmd_buffer->state.dirty_descriptor_stages &= ~VK_SHADER_STAGE_COMPUTE_BIT; 33607ec681f3Smrg cmd_buffer->state.dirty_push_constants_stages &= ~VK_SHADER_STAGE_COMPUTE_BIT; 33617ec681f3Smrg} 33627ec681f3Smrg 33637ec681f3Smrg#define V3D_CSD_CFG012_WG_COUNT_SHIFT 16 33647ec681f3Smrg#define V3D_CSD_CFG012_WG_OFFSET_SHIFT 0 33657ec681f3Smrg/* Allow this dispatch to start while the last one is still running. */ 33667ec681f3Smrg#define V3D_CSD_CFG3_OVERLAP_WITH_PREV (1 << 26) 33677ec681f3Smrg/* Maximum supergroup ID. 6 bits. */ 33687ec681f3Smrg#define V3D_CSD_CFG3_MAX_SG_ID_SHIFT 20 33697ec681f3Smrg/* Batches per supergroup minus 1. 8 bits. */ 33707ec681f3Smrg#define V3D_CSD_CFG3_BATCHES_PER_SG_M1_SHIFT 12 33717ec681f3Smrg/* Workgroups per supergroup, 0 means 16 */ 33727ec681f3Smrg#define V3D_CSD_CFG3_WGS_PER_SG_SHIFT 8 33737ec681f3Smrg#define V3D_CSD_CFG3_WG_SIZE_SHIFT 0 33747ec681f3Smrg 33757ec681f3Smrg#define V3D_CSD_CFG5_PROPAGATE_NANS (1 << 2) 33767ec681f3Smrg#define V3D_CSD_CFG5_SINGLE_SEG (1 << 1) 33777ec681f3Smrg#define V3D_CSD_CFG5_THREADING (1 << 0) 33787ec681f3Smrg 33797ec681f3Smrgvoid 33807ec681f3Smrgv3dv_cmd_buffer_rewrite_indirect_csd_job( 33817ec681f3Smrg struct v3dv_csd_indirect_cpu_job_info *info, 33827ec681f3Smrg const uint32_t *wg_counts) 33837ec681f3Smrg{ 33847ec681f3Smrg assert(info->csd_job); 33857ec681f3Smrg struct v3dv_job *job = info->csd_job; 33867ec681f3Smrg 33877ec681f3Smrg assert(job->type == V3DV_JOB_TYPE_GPU_CSD); 33887ec681f3Smrg assert(wg_counts[0] > 0 && wg_counts[1] > 0 && wg_counts[2] > 0); 33897ec681f3Smrg 33907ec681f3Smrg struct drm_v3d_submit_csd *submit = &job->csd.submit; 33917ec681f3Smrg 33927ec681f3Smrg job->csd.wg_count[0] = wg_counts[0]; 33937ec681f3Smrg job->csd.wg_count[1] = wg_counts[1]; 33947ec681f3Smrg job->csd.wg_count[2] = wg_counts[2]; 33957ec681f3Smrg 33967ec681f3Smrg submit->cfg[0] = wg_counts[0] << V3D_CSD_CFG012_WG_COUNT_SHIFT; 33977ec681f3Smrg submit->cfg[1] = wg_counts[1] << V3D_CSD_CFG012_WG_COUNT_SHIFT; 33987ec681f3Smrg submit->cfg[2] = wg_counts[2] << V3D_CSD_CFG012_WG_COUNT_SHIFT; 33997ec681f3Smrg 34007ec681f3Smrg submit->cfg[4] = DIV_ROUND_UP(info->wg_size, 16) * 34017ec681f3Smrg (wg_counts[0] * wg_counts[1] * wg_counts[2]) - 1; 34027ec681f3Smrg assert(submit->cfg[4] != ~0); 34037ec681f3Smrg 34047ec681f3Smrg if (info->needs_wg_uniform_rewrite) { 34057ec681f3Smrg /* Make sure the GPU is not currently accessing the indirect CL for this 34067ec681f3Smrg * job, since we are about to overwrite some of the uniform data. 34077ec681f3Smrg */ 34087ec681f3Smrg v3dv_bo_wait(job->device, job->indirect.bo, PIPE_TIMEOUT_INFINITE); 34097ec681f3Smrg 34107ec681f3Smrg for (uint32_t i = 0; i < 3; i++) { 34117ec681f3Smrg if (info->wg_uniform_offsets[i]) { 34127ec681f3Smrg /* Sanity check that our uniform pointers are within the allocated 34137ec681f3Smrg * BO space for our indirect CL. 34147ec681f3Smrg */ 34157ec681f3Smrg assert(info->wg_uniform_offsets[i] >= (uint32_t *) job->indirect.base); 34167ec681f3Smrg assert(info->wg_uniform_offsets[i] < (uint32_t *) job->indirect.next); 34177ec681f3Smrg *(info->wg_uniform_offsets[i]) = wg_counts[i]; 34187ec681f3Smrg } 34197ec681f3Smrg } 34207ec681f3Smrg } 34217ec681f3Smrg} 34227ec681f3Smrg 34237ec681f3Smrgstatic struct v3dv_job * 34247ec681f3Smrgcmd_buffer_create_csd_job(struct v3dv_cmd_buffer *cmd_buffer, 34257ec681f3Smrg uint32_t base_offset_x, 34267ec681f3Smrg uint32_t base_offset_y, 34277ec681f3Smrg uint32_t base_offset_z, 34287ec681f3Smrg uint32_t group_count_x, 34297ec681f3Smrg uint32_t group_count_y, 34307ec681f3Smrg uint32_t group_count_z, 34317ec681f3Smrg uint32_t **wg_uniform_offsets_out, 34327ec681f3Smrg uint32_t *wg_size_out) 34337ec681f3Smrg{ 34347ec681f3Smrg struct v3dv_pipeline *pipeline = cmd_buffer->state.compute.pipeline; 34357ec681f3Smrg assert(pipeline && pipeline->shared_data->variants[BROADCOM_SHADER_COMPUTE]); 34367ec681f3Smrg struct v3dv_shader_variant *cs_variant = 34377ec681f3Smrg pipeline->shared_data->variants[BROADCOM_SHADER_COMPUTE]; 34387ec681f3Smrg 34397ec681f3Smrg struct v3dv_job *job = vk_zalloc(&cmd_buffer->device->vk.alloc, 34407ec681f3Smrg sizeof(struct v3dv_job), 8, 34417ec681f3Smrg VK_SYSTEM_ALLOCATION_SCOPE_COMMAND); 34427ec681f3Smrg if (!job) { 34437ec681f3Smrg v3dv_flag_oom(cmd_buffer, NULL); 34447ec681f3Smrg return NULL; 34457ec681f3Smrg } 34467ec681f3Smrg 34477ec681f3Smrg v3dv_job_init(job, V3DV_JOB_TYPE_GPU_CSD, cmd_buffer->device, cmd_buffer, -1); 34487ec681f3Smrg cmd_buffer->state.job = job; 34497ec681f3Smrg 34507ec681f3Smrg struct drm_v3d_submit_csd *submit = &job->csd.submit; 34517ec681f3Smrg 34527ec681f3Smrg job->csd.wg_count[0] = group_count_x; 34537ec681f3Smrg job->csd.wg_count[1] = group_count_y; 34547ec681f3Smrg job->csd.wg_count[2] = group_count_z; 34557ec681f3Smrg 34567ec681f3Smrg job->csd.wg_base[0] = base_offset_x; 34577ec681f3Smrg job->csd.wg_base[1] = base_offset_y; 34587ec681f3Smrg job->csd.wg_base[2] = base_offset_z; 34597ec681f3Smrg 34607ec681f3Smrg submit->cfg[0] |= group_count_x << V3D_CSD_CFG012_WG_COUNT_SHIFT; 34617ec681f3Smrg submit->cfg[1] |= group_count_y << V3D_CSD_CFG012_WG_COUNT_SHIFT; 34627ec681f3Smrg submit->cfg[2] |= group_count_z << V3D_CSD_CFG012_WG_COUNT_SHIFT; 34637ec681f3Smrg 34647ec681f3Smrg const struct v3d_compute_prog_data *cpd = 34657ec681f3Smrg cs_variant->prog_data.cs; 34667ec681f3Smrg 34677ec681f3Smrg const uint32_t num_wgs = group_count_x * group_count_y * group_count_z; 34687ec681f3Smrg const uint32_t wg_size = cpd->local_size[0] * 34697ec681f3Smrg cpd->local_size[1] * 34707ec681f3Smrg cpd->local_size[2]; 34717ec681f3Smrg 34727ec681f3Smrg uint32_t wgs_per_sg = 34737ec681f3Smrg v3d_csd_choose_workgroups_per_supergroup( 34747ec681f3Smrg &cmd_buffer->device->devinfo, 34757ec681f3Smrg cs_variant->prog_data.cs->has_subgroups, 34767ec681f3Smrg cs_variant->prog_data.cs->base.has_control_barrier, 34777ec681f3Smrg cs_variant->prog_data.cs->base.threads, 34787ec681f3Smrg num_wgs, wg_size); 34797ec681f3Smrg 34807ec681f3Smrg uint32_t batches_per_sg = DIV_ROUND_UP(wgs_per_sg * wg_size, 16); 34817ec681f3Smrg uint32_t whole_sgs = num_wgs / wgs_per_sg; 34827ec681f3Smrg uint32_t rem_wgs = num_wgs - whole_sgs * wgs_per_sg; 34837ec681f3Smrg uint32_t num_batches = batches_per_sg * whole_sgs + 34847ec681f3Smrg DIV_ROUND_UP(rem_wgs * wg_size, 16); 34857ec681f3Smrg 34867ec681f3Smrg submit->cfg[3] |= (wgs_per_sg & 0xf) << V3D_CSD_CFG3_WGS_PER_SG_SHIFT; 34877ec681f3Smrg submit->cfg[3] |= (batches_per_sg - 1) << V3D_CSD_CFG3_BATCHES_PER_SG_M1_SHIFT; 34887ec681f3Smrg submit->cfg[3] |= (wg_size & 0xff) << V3D_CSD_CFG3_WG_SIZE_SHIFT; 34897ec681f3Smrg if (wg_size_out) 34907ec681f3Smrg *wg_size_out = wg_size; 34917ec681f3Smrg 34927ec681f3Smrg submit->cfg[4] = num_batches - 1; 34937ec681f3Smrg assert(submit->cfg[4] != ~0); 34947ec681f3Smrg 34957ec681f3Smrg assert(pipeline->shared_data->assembly_bo); 34967ec681f3Smrg struct v3dv_bo *cs_assembly_bo = pipeline->shared_data->assembly_bo; 34977ec681f3Smrg 34987ec681f3Smrg submit->cfg[5] = cs_assembly_bo->offset + cs_variant->assembly_offset; 34997ec681f3Smrg submit->cfg[5] |= V3D_CSD_CFG5_PROPAGATE_NANS; 35007ec681f3Smrg if (cs_variant->prog_data.base->single_seg) 35017ec681f3Smrg submit->cfg[5] |= V3D_CSD_CFG5_SINGLE_SEG; 35027ec681f3Smrg if (cs_variant->prog_data.base->threads == 4) 35037ec681f3Smrg submit->cfg[5] |= V3D_CSD_CFG5_THREADING; 35047ec681f3Smrg 35057ec681f3Smrg if (cs_variant->prog_data.cs->shared_size > 0) { 35067ec681f3Smrg job->csd.shared_memory = 35077ec681f3Smrg v3dv_bo_alloc(cmd_buffer->device, 35087ec681f3Smrg cs_variant->prog_data.cs->shared_size * wgs_per_sg, 35097ec681f3Smrg "shared_vars", true); 35107ec681f3Smrg if (!job->csd.shared_memory) { 35117ec681f3Smrg v3dv_flag_oom(cmd_buffer, NULL); 35127ec681f3Smrg return job; 35137ec681f3Smrg } 35147ec681f3Smrg } 35157ec681f3Smrg 35167ec681f3Smrg v3dv_job_add_bo_unchecked(job, cs_assembly_bo); 35177ec681f3Smrg struct v3dv_cl_reloc uniforms = 35187ec681f3Smrg v3dv_write_uniforms_wg_offsets(cmd_buffer, pipeline, 35197ec681f3Smrg cs_variant, 35207ec681f3Smrg wg_uniform_offsets_out); 35217ec681f3Smrg submit->cfg[6] = uniforms.bo->offset + uniforms.offset; 35227ec681f3Smrg 35237ec681f3Smrg v3dv_job_add_bo(job, uniforms.bo); 35247ec681f3Smrg 35257ec681f3Smrg return job; 35267ec681f3Smrg} 35277ec681f3Smrg 35287ec681f3Smrgstatic void 35297ec681f3Smrgcmd_buffer_dispatch(struct v3dv_cmd_buffer *cmd_buffer, 35307ec681f3Smrg uint32_t base_offset_x, 35317ec681f3Smrg uint32_t base_offset_y, 35327ec681f3Smrg uint32_t base_offset_z, 35337ec681f3Smrg uint32_t group_count_x, 35347ec681f3Smrg uint32_t group_count_y, 35357ec681f3Smrg uint32_t group_count_z) 35367ec681f3Smrg{ 35377ec681f3Smrg if (group_count_x == 0 || group_count_y == 0 || group_count_z == 0) 35387ec681f3Smrg return; 35397ec681f3Smrg 35407ec681f3Smrg struct v3dv_job *job = 35417ec681f3Smrg cmd_buffer_create_csd_job(cmd_buffer, 35427ec681f3Smrg base_offset_x, 35437ec681f3Smrg base_offset_y, 35447ec681f3Smrg base_offset_z, 35457ec681f3Smrg group_count_x, 35467ec681f3Smrg group_count_y, 35477ec681f3Smrg group_count_z, 35487ec681f3Smrg NULL, NULL); 35497ec681f3Smrg 35507ec681f3Smrg list_addtail(&job->list_link, &cmd_buffer->jobs); 35517ec681f3Smrg cmd_buffer->state.job = NULL; 35527ec681f3Smrg} 35537ec681f3Smrg 35547ec681f3SmrgVKAPI_ATTR void VKAPI_CALL 35557ec681f3Smrgv3dv_CmdDispatch(VkCommandBuffer commandBuffer, 35567ec681f3Smrg uint32_t groupCountX, 35577ec681f3Smrg uint32_t groupCountY, 35587ec681f3Smrg uint32_t groupCountZ) 35597ec681f3Smrg{ 35607ec681f3Smrg V3DV_FROM_HANDLE(v3dv_cmd_buffer, cmd_buffer, commandBuffer); 35617ec681f3Smrg 35627ec681f3Smrg cmd_buffer_emit_pre_dispatch(cmd_buffer); 35637ec681f3Smrg cmd_buffer_dispatch(cmd_buffer, 0, 0, 0, 35647ec681f3Smrg groupCountX, groupCountY, groupCountZ); 35657ec681f3Smrg} 35667ec681f3Smrg 35677ec681f3SmrgVKAPI_ATTR void VKAPI_CALL 35687ec681f3Smrgv3dv_CmdDispatchBase(VkCommandBuffer commandBuffer, 35697ec681f3Smrg uint32_t baseGroupX, 35707ec681f3Smrg uint32_t baseGroupY, 35717ec681f3Smrg uint32_t baseGroupZ, 35727ec681f3Smrg uint32_t groupCountX, 35737ec681f3Smrg uint32_t groupCountY, 35747ec681f3Smrg uint32_t groupCountZ) 35757ec681f3Smrg{ 35767ec681f3Smrg V3DV_FROM_HANDLE(v3dv_cmd_buffer, cmd_buffer, commandBuffer); 35777ec681f3Smrg 35787ec681f3Smrg cmd_buffer_emit_pre_dispatch(cmd_buffer); 35797ec681f3Smrg cmd_buffer_dispatch(cmd_buffer, 35807ec681f3Smrg baseGroupX, baseGroupY, baseGroupZ, 35817ec681f3Smrg groupCountX, groupCountY, groupCountZ); 35827ec681f3Smrg} 35837ec681f3Smrg 35847ec681f3Smrg 35857ec681f3Smrgstatic void 35867ec681f3Smrgcmd_buffer_dispatch_indirect(struct v3dv_cmd_buffer *cmd_buffer, 35877ec681f3Smrg struct v3dv_buffer *buffer, 35887ec681f3Smrg uint32_t offset) 35897ec681f3Smrg{ 35907ec681f3Smrg /* We can't do indirect dispatches, so instead we record a CPU job that, 35917ec681f3Smrg * when executed in the queue, will map the indirect buffer, read the 35927ec681f3Smrg * dispatch parameters, and submit a regular dispatch. 35937ec681f3Smrg */ 35947ec681f3Smrg struct v3dv_job *job = 35957ec681f3Smrg v3dv_cmd_buffer_create_cpu_job(cmd_buffer->device, 35967ec681f3Smrg V3DV_JOB_TYPE_CPU_CSD_INDIRECT, 35977ec681f3Smrg cmd_buffer, -1); 35987ec681f3Smrg v3dv_return_if_oom(cmd_buffer, NULL); 35997ec681f3Smrg 36007ec681f3Smrg /* We need to create a CSD job now, even if we still don't know the actual 36017ec681f3Smrg * dispatch parameters, because the job setup needs to be done using the 36027ec681f3Smrg * current command buffer state (i.e. pipeline, descriptor sets, push 36037ec681f3Smrg * constants, etc.). So we create the job with default dispatch parameters 36047ec681f3Smrg * and we will rewrite the parts we need at submit time if the indirect 36057ec681f3Smrg * parameters don't match the ones we used to setup the job. 36067ec681f3Smrg */ 36077ec681f3Smrg struct v3dv_job *csd_job = 36087ec681f3Smrg cmd_buffer_create_csd_job(cmd_buffer, 36097ec681f3Smrg 0, 0, 0, 36107ec681f3Smrg 1, 1, 1, 36117ec681f3Smrg &job->cpu.csd_indirect.wg_uniform_offsets[0], 36127ec681f3Smrg &job->cpu.csd_indirect.wg_size); 36137ec681f3Smrg v3dv_return_if_oom(cmd_buffer, NULL); 36147ec681f3Smrg assert(csd_job); 36157ec681f3Smrg 36167ec681f3Smrg job->cpu.csd_indirect.buffer = buffer; 36177ec681f3Smrg job->cpu.csd_indirect.offset = offset; 36187ec681f3Smrg job->cpu.csd_indirect.csd_job = csd_job; 36197ec681f3Smrg 36207ec681f3Smrg /* If the compute shader reads the workgroup sizes we will also need to 36217ec681f3Smrg * rewrite the corresponding uniforms. 36227ec681f3Smrg */ 36237ec681f3Smrg job->cpu.csd_indirect.needs_wg_uniform_rewrite = 36247ec681f3Smrg job->cpu.csd_indirect.wg_uniform_offsets[0] || 36257ec681f3Smrg job->cpu.csd_indirect.wg_uniform_offsets[1] || 36267ec681f3Smrg job->cpu.csd_indirect.wg_uniform_offsets[2]; 36277ec681f3Smrg 36287ec681f3Smrg list_addtail(&job->list_link, &cmd_buffer->jobs); 36297ec681f3Smrg cmd_buffer->state.job = NULL; 36307ec681f3Smrg} 36317ec681f3Smrg 36327ec681f3SmrgVKAPI_ATTR void VKAPI_CALL 36337ec681f3Smrgv3dv_CmdDispatchIndirect(VkCommandBuffer commandBuffer, 36347ec681f3Smrg VkBuffer _buffer, 36357ec681f3Smrg VkDeviceSize offset) 36367ec681f3Smrg{ 36377ec681f3Smrg V3DV_FROM_HANDLE(v3dv_cmd_buffer, cmd_buffer, commandBuffer); 36387ec681f3Smrg V3DV_FROM_HANDLE(v3dv_buffer, buffer, _buffer); 36397ec681f3Smrg 36407ec681f3Smrg assert(offset <= UINT32_MAX); 36417ec681f3Smrg 36427ec681f3Smrg cmd_buffer_emit_pre_dispatch(cmd_buffer); 36437ec681f3Smrg cmd_buffer_dispatch_indirect(cmd_buffer, buffer, offset); 36447ec681f3Smrg} 36457ec681f3Smrg 36467ec681f3SmrgVKAPI_ATTR void VKAPI_CALL 36477ec681f3Smrgv3dv_CmdSetDeviceMask(VkCommandBuffer commandBuffer, uint32_t deviceMask) 36487ec681f3Smrg{ 36497ec681f3Smrg /* Nothing to do here since we only support a single device */ 36507ec681f3Smrg assert(deviceMask == 0x1); 36517ec681f3Smrg} 3652