1361fc4cbSmaya/* 2361fc4cbSmaya * Copyright © 2016 Red Hat. 3361fc4cbSmaya * Copyright © 2016 Bas Nieuwenhuizen 4361fc4cbSmaya * 5361fc4cbSmaya * based in part on anv driver which is: 6361fc4cbSmaya * Copyright © 2015 Intel Corporation 7361fc4cbSmaya * 8361fc4cbSmaya * Permission is hereby granted, free of charge, to any person obtaining a 9361fc4cbSmaya * copy of this software and associated documentation files (the "Software"), 10361fc4cbSmaya * to deal in the Software without restriction, including without limitation 11361fc4cbSmaya * the rights to use, copy, modify, merge, publish, distribute, sublicense, 12361fc4cbSmaya * and/or sell copies of the Software, and to permit persons to whom the 13361fc4cbSmaya * Software is furnished to do so, subject to the following conditions: 14361fc4cbSmaya * 15361fc4cbSmaya * The above copyright notice and this permission notice (including the next 16361fc4cbSmaya * paragraph) shall be included in all copies or substantial portions of the 17361fc4cbSmaya * Software. 18361fc4cbSmaya * 19361fc4cbSmaya * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR 20361fc4cbSmaya * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, 21361fc4cbSmaya * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL 22361fc4cbSmaya * THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER 23361fc4cbSmaya * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING 24361fc4cbSmaya * FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER 25361fc4cbSmaya * DEALINGS IN THE SOFTWARE. 26361fc4cbSmaya */ 27361fc4cbSmaya#include "tu_private.h" 28361fc4cbSmaya 29361fc4cbSmaya#include "vk_util.h" 307ec681f3Smrg#include "vk_format.h" 31361fc4cbSmaya 327ec681f3Smrg/* Return true if we have to fallback to sysmem rendering because the 337ec681f3Smrg * dependency can't be satisfied with tiled rendering. 347ec681f3Smrg */ 357ec681f3Smrg 367ec681f3Smrgstatic bool 377ec681f3Smrgdep_invalid_for_gmem(const VkSubpassDependency2 *dep) 38361fc4cbSmaya{ 397ec681f3Smrg /* External dependencies don't matter here. */ 407ec681f3Smrg if (dep->srcSubpass == VK_SUBPASS_EXTERNAL || 417ec681f3Smrg dep->dstSubpass == VK_SUBPASS_EXTERNAL) 427ec681f3Smrg return false; 437ec681f3Smrg 447ec681f3Smrg /* We can conceptually break down the process of rewriting a sysmem 457ec681f3Smrg * renderpass into a gmem one into two parts: 467ec681f3Smrg * 477ec681f3Smrg * 1. Split each draw and multisample resolve into N copies, one for each 487ec681f3Smrg * bin. (If hardware binning, add one more copy where the FS is disabled 497ec681f3Smrg * for the binning pass). This is always allowed because the vertex stage 507ec681f3Smrg * is allowed to run an arbitrary number of times and there are no extra 517ec681f3Smrg * ordering constraints within a draw. 527ec681f3Smrg * 2. Take the last copy of the second-to-last draw and slide it down to 537ec681f3Smrg * before the last copy of the last draw. Repeat for each earlier draw 547ec681f3Smrg * until the draw pass for the last bin is complete, then repeat for each 557ec681f3Smrg * earlier bin until we finish with the first bin. 567ec681f3Smrg * 577ec681f3Smrg * During this rearranging process, we can't slide draws past each other in 587ec681f3Smrg * a way that breaks the subpass dependencies. For each draw, we must slide 597ec681f3Smrg * it past (copies of) the rest of the draws in the renderpass. We can 607ec681f3Smrg * slide a draw past another if there isn't a dependency between them, or 617ec681f3Smrg * if the dependenc(ies) are dependencies between framebuffer-space stages 627ec681f3Smrg * only with the BY_REGION bit set. Note that this includes 637ec681f3Smrg * self-dependencies, since these may result in pipeline barriers that also 647ec681f3Smrg * break the rearranging process. 657ec681f3Smrg */ 667ec681f3Smrg 677ec681f3Smrg /* This is straight from the Vulkan 1.2 spec, section 6.1.4 "Framebuffer 687ec681f3Smrg * Region Dependencies": 697ec681f3Smrg */ 707ec681f3Smrg const VkPipelineStageFlags framebuffer_space_stages = 717ec681f3Smrg VK_PIPELINE_STAGE_FRAGMENT_SHADER_BIT | 727ec681f3Smrg VK_PIPELINE_STAGE_EARLY_FRAGMENT_TESTS_BIT | 737ec681f3Smrg VK_PIPELINE_STAGE_LATE_FRAGMENT_TESTS_BIT | 747ec681f3Smrg VK_PIPELINE_STAGE_COLOR_ATTACHMENT_OUTPUT_BIT; 757ec681f3Smrg 767ec681f3Smrg return 777ec681f3Smrg (dep->srcStageMask & ~framebuffer_space_stages) || 787ec681f3Smrg (dep->dstStageMask & ~framebuffer_space_stages) || 797ec681f3Smrg !(dep->dependencyFlags & VK_DEPENDENCY_BY_REGION_BIT); 807ec681f3Smrg} 81361fc4cbSmaya 827ec681f3Smrgstatic void 837ec681f3Smrgtu_render_pass_add_subpass_dep(struct tu_render_pass *pass, 847ec681f3Smrg const VkSubpassDependency2 *dep) 857ec681f3Smrg{ 867ec681f3Smrg uint32_t src = dep->srcSubpass; 877ec681f3Smrg uint32_t dst = dep->dstSubpass; 887ec681f3Smrg 897ec681f3Smrg /* Ignore subpass self-dependencies as they allow the app to call 907ec681f3Smrg * vkCmdPipelineBarrier() inside the render pass and the driver should only 917ec681f3Smrg * do the barrier when called, not when starting the render pass. 927ec681f3Smrg * 937ec681f3Smrg * We cannot decide whether to allow gmem rendering before a barrier 947ec681f3Smrg * is actually emitted, so we delay the decision until then. 957ec681f3Smrg */ 967ec681f3Smrg if (src == dst) 977ec681f3Smrg return; 98361fc4cbSmaya 997ec681f3Smrg if (dep_invalid_for_gmem(dep)) 1007ec681f3Smrg pass->gmem_pixels = 0; 101361fc4cbSmaya 1027ec681f3Smrg struct tu_subpass_barrier *dst_barrier; 1037ec681f3Smrg if (dst == VK_SUBPASS_EXTERNAL) { 1047ec681f3Smrg dst_barrier = &pass->end_barrier; 1057ec681f3Smrg } else { 1067ec681f3Smrg dst_barrier = &pass->subpasses[dst].start_barrier; 1077ec681f3Smrg } 108361fc4cbSmaya 1097ec681f3Smrg dst_barrier->src_stage_mask |= dep->srcStageMask; 1107ec681f3Smrg dst_barrier->dst_stage_mask |= dep->dstStageMask; 1117ec681f3Smrg dst_barrier->src_access_mask |= dep->srcAccessMask; 1127ec681f3Smrg dst_barrier->dst_access_mask |= dep->dstAccessMask; 1137ec681f3Smrg} 114361fc4cbSmaya 1157ec681f3Smrg/* We currently only care about undefined layouts, because we have to 1167ec681f3Smrg * flush/invalidate CCU for those. PREINITIALIZED is the same thing as 1177ec681f3Smrg * UNDEFINED for anything not linear tiled, but we don't know yet whether the 1187ec681f3Smrg * images used are tiled, so just assume they are. 1197ec681f3Smrg */ 1207ec681f3Smrg 1217ec681f3Smrgstatic bool 1227ec681f3Smrglayout_undefined(VkImageLayout layout) 1237ec681f3Smrg{ 1247ec681f3Smrg return layout == VK_IMAGE_LAYOUT_UNDEFINED || 1257ec681f3Smrg layout == VK_IMAGE_LAYOUT_PREINITIALIZED; 1267ec681f3Smrg} 1277ec681f3Smrg 1287ec681f3Smrg/* This implements the following bit of spec text: 1297ec681f3Smrg * 1307ec681f3Smrg * If there is no subpass dependency from VK_SUBPASS_EXTERNAL to the 1317ec681f3Smrg * first subpass that uses an attachment, then an implicit subpass 1327ec681f3Smrg * dependency exists from VK_SUBPASS_EXTERNAL to the first subpass it is 1337ec681f3Smrg * used in. The implicit subpass dependency only exists if there 1347ec681f3Smrg * exists an automatic layout transition away from initialLayout. 1357ec681f3Smrg * The subpass dependency operates as if defined with the 1367ec681f3Smrg * following parameters: 1377ec681f3Smrg * 1387ec681f3Smrg * VkSubpassDependency implicitDependency = { 1397ec681f3Smrg * .srcSubpass = VK_SUBPASS_EXTERNAL; 1407ec681f3Smrg * .dstSubpass = firstSubpass; // First subpass attachment is used in 1417ec681f3Smrg * .srcStageMask = VK_PIPELINE_STAGE_TOP_OF_PIPE_BIT; 1427ec681f3Smrg * .dstStageMask = VK_PIPELINE_STAGE_ALL_COMMANDS_BIT; 1437ec681f3Smrg * .srcAccessMask = 0; 1447ec681f3Smrg * .dstAccessMask = VK_ACCESS_INPUT_ATTACHMENT_READ_BIT | 1457ec681f3Smrg * VK_ACCESS_COLOR_ATTACHMENT_READ_BIT | 1467ec681f3Smrg * VK_ACCESS_COLOR_ATTACHMENT_WRITE_BIT | 1477ec681f3Smrg * VK_ACCESS_DEPTH_STENCIL_ATTACHMENT_READ_BIT | 1487ec681f3Smrg * VK_ACCESS_DEPTH_STENCIL_ATTACHMENT_WRITE_BIT; 1497ec681f3Smrg * .dependencyFlags = 0; 1507ec681f3Smrg * }; 1517ec681f3Smrg * 1527ec681f3Smrg * Similarly, if there is no subpass dependency from the last subpass 1537ec681f3Smrg * that uses an attachment to VK_SUBPASS_EXTERNAL, then an implicit 1547ec681f3Smrg * subpass dependency exists from the last subpass it is used in to 1557ec681f3Smrg * VK_SUBPASS_EXTERNAL. The implicit subpass dependency only exists 1567ec681f3Smrg * if there exists an automatic layout transition into finalLayout. 1577ec681f3Smrg * The subpass dependency operates as if defined with the following 1587ec681f3Smrg * parameters: 1597ec681f3Smrg * 1607ec681f3Smrg * VkSubpassDependency implicitDependency = { 1617ec681f3Smrg * .srcSubpass = lastSubpass; // Last subpass attachment is used in 1627ec681f3Smrg * .dstSubpass = VK_SUBPASS_EXTERNAL; 1637ec681f3Smrg * .srcStageMask = VK_PIPELINE_STAGE_ALL_COMMANDS_BIT; 1647ec681f3Smrg * .dstStageMask = VK_PIPELINE_STAGE_BOTTOM_OF_PIPE_BIT; 1657ec681f3Smrg * .srcAccessMask = VK_ACCESS_INPUT_ATTACHMENT_READ_BIT | 1667ec681f3Smrg * VK_ACCESS_COLOR_ATTACHMENT_READ_BIT | 1677ec681f3Smrg * VK_ACCESS_COLOR_ATTACHMENT_WRITE_BIT | 1687ec681f3Smrg * VK_ACCESS_DEPTH_STENCIL_ATTACHMENT_READ_BIT | 1697ec681f3Smrg * VK_ACCESS_DEPTH_STENCIL_ATTACHMENT_WRITE_BIT; 1707ec681f3Smrg * .dstAccessMask = 0; 1717ec681f3Smrg * .dependencyFlags = 0; 1727ec681f3Smrg * }; 1737ec681f3Smrg * 1747ec681f3Smrg * Note: currently this is the only use we have for layout transitions, 1757ec681f3Smrg * besides needing to invalidate CCU at the beginning, so we also flag 1767ec681f3Smrg * transitions from UNDEFINED here. 1777ec681f3Smrg */ 1787ec681f3Smrgstatic void 1797ec681f3Smrgtu_render_pass_add_implicit_deps(struct tu_render_pass *pass, 1807ec681f3Smrg const VkRenderPassCreateInfo2 *info) 1817ec681f3Smrg{ 1827ec681f3Smrg const VkAttachmentDescription2* att = info->pAttachments; 1837ec681f3Smrg bool has_external_src[info->subpassCount]; 1847ec681f3Smrg bool has_external_dst[info->subpassCount]; 1857ec681f3Smrg bool att_used[pass->attachment_count]; 1867ec681f3Smrg 1877ec681f3Smrg memset(has_external_src, 0, sizeof(has_external_src)); 1887ec681f3Smrg memset(has_external_dst, 0, sizeof(has_external_dst)); 1897ec681f3Smrg 1907ec681f3Smrg for (uint32_t i = 0; i < info->dependencyCount; i++) { 1917ec681f3Smrg uint32_t src = info->pDependencies[i].srcSubpass; 1927ec681f3Smrg uint32_t dst = info->pDependencies[i].dstSubpass; 1937ec681f3Smrg 1947ec681f3Smrg if (src == dst) 1957ec681f3Smrg continue; 1967ec681f3Smrg 1977ec681f3Smrg if (src == VK_SUBPASS_EXTERNAL) 1987ec681f3Smrg has_external_src[dst] = true; 1997ec681f3Smrg if (dst == VK_SUBPASS_EXTERNAL) 2007ec681f3Smrg has_external_dst[src] = true; 201361fc4cbSmaya } 202361fc4cbSmaya 2037ec681f3Smrg memset(att_used, 0, sizeof(att_used)); 204361fc4cbSmaya 2057ec681f3Smrg for (unsigned i = 0; i < info->subpassCount; i++) { 2067ec681f3Smrg const VkSubpassDescription2 *subpass = &info->pSubpasses[i]; 2077ec681f3Smrg bool src_implicit_dep = false; 2087ec681f3Smrg 2097ec681f3Smrg for (unsigned j = 0; j < subpass->inputAttachmentCount; j++) { 2107ec681f3Smrg uint32_t a = subpass->pInputAttachments[j].attachment; 2117ec681f3Smrg if (a == VK_ATTACHMENT_UNUSED) 2127ec681f3Smrg continue; 2137ec681f3Smrg if (att[a].initialLayout != subpass->pInputAttachments[j].layout && 2147ec681f3Smrg !att_used[a] && !has_external_src[i]) 2157ec681f3Smrg src_implicit_dep = true; 2167ec681f3Smrg att_used[a] = true; 2177ec681f3Smrg } 2187ec681f3Smrg 2197ec681f3Smrg for (unsigned j = 0; j < subpass->colorAttachmentCount; j++) { 2207ec681f3Smrg uint32_t a = subpass->pColorAttachments[j].attachment; 2217ec681f3Smrg if (a == VK_ATTACHMENT_UNUSED) 2227ec681f3Smrg continue; 2237ec681f3Smrg if (att[a].initialLayout != subpass->pColorAttachments[j].layout && 2247ec681f3Smrg !att_used[a] && !has_external_src[i]) 2257ec681f3Smrg src_implicit_dep = true; 2267ec681f3Smrg att_used[a] = true; 2277ec681f3Smrg } 2287ec681f3Smrg 2297ec681f3Smrg if (subpass->pDepthStencilAttachment && 2307ec681f3Smrg subpass->pDepthStencilAttachment->attachment != VK_ATTACHMENT_UNUSED) { 2317ec681f3Smrg uint32_t a = subpass->pDepthStencilAttachment->attachment; 2327ec681f3Smrg if (att[a].initialLayout != subpass->pDepthStencilAttachment->layout && 2337ec681f3Smrg !att_used[a] && !has_external_src[i]) 2347ec681f3Smrg src_implicit_dep = true; 2357ec681f3Smrg att_used[a] = true; 2367ec681f3Smrg } 2377ec681f3Smrg 2387ec681f3Smrg if (subpass->pResolveAttachments) { 2397ec681f3Smrg for (unsigned j = 0; j < subpass->colorAttachmentCount; j++) { 2407ec681f3Smrg uint32_t a = subpass->pResolveAttachments[j].attachment; 2417ec681f3Smrg if (a == VK_ATTACHMENT_UNUSED) 2427ec681f3Smrg continue; 2437ec681f3Smrg if (att[a].initialLayout != subpass->pResolveAttachments[j].layout && 2447ec681f3Smrg !att_used[a] && !has_external_src[i]) 2457ec681f3Smrg src_implicit_dep = true; 2467ec681f3Smrg att_used[a] = true; 2477ec681f3Smrg } 2487ec681f3Smrg } 2497ec681f3Smrg 2507ec681f3Smrg const VkSubpassDescriptionDepthStencilResolve *ds_resolve = 2517ec681f3Smrg vk_find_struct_const(subpass->pNext, SUBPASS_DESCRIPTION_DEPTH_STENCIL_RESOLVE_KHR); 2527ec681f3Smrg 2537ec681f3Smrg if (ds_resolve && ds_resolve->pDepthStencilResolveAttachment && 2547ec681f3Smrg ds_resolve->pDepthStencilResolveAttachment->attachment != VK_ATTACHMENT_UNUSED) { 2557ec681f3Smrg uint32_t a = ds_resolve->pDepthStencilResolveAttachment->attachment; 2567ec681f3Smrg if (att[a].initialLayout != subpass->pDepthStencilAttachment->layout && 2577ec681f3Smrg !att_used[a] && !has_external_src[i]) 2587ec681f3Smrg src_implicit_dep = true; 2597ec681f3Smrg att_used[a] = true; 2607ec681f3Smrg } 2617ec681f3Smrg 2627ec681f3Smrg if (src_implicit_dep) { 2637ec681f3Smrg tu_render_pass_add_subpass_dep(pass, &(VkSubpassDependency2KHR) { 2647ec681f3Smrg .srcSubpass = VK_SUBPASS_EXTERNAL, 2657ec681f3Smrg .dstSubpass = i, 2667ec681f3Smrg .srcStageMask = VK_PIPELINE_STAGE_TOP_OF_PIPE_BIT, 2677ec681f3Smrg .dstStageMask = VK_PIPELINE_STAGE_ALL_COMMANDS_BIT, 2687ec681f3Smrg .srcAccessMask = 0, 2697ec681f3Smrg .dstAccessMask = VK_ACCESS_INPUT_ATTACHMENT_READ_BIT | 2707ec681f3Smrg VK_ACCESS_COLOR_ATTACHMENT_READ_BIT | 2717ec681f3Smrg VK_ACCESS_COLOR_ATTACHMENT_WRITE_BIT | 2727ec681f3Smrg VK_ACCESS_DEPTH_STENCIL_ATTACHMENT_READ_BIT | 2737ec681f3Smrg VK_ACCESS_DEPTH_STENCIL_ATTACHMENT_WRITE_BIT, 2747ec681f3Smrg .dependencyFlags = 0, 2757ec681f3Smrg }); 2767ec681f3Smrg } 277361fc4cbSmaya } 278361fc4cbSmaya 2797ec681f3Smrg memset(att_used, 0, sizeof(att_used)); 2807ec681f3Smrg 2817ec681f3Smrg for (int i = info->subpassCount - 1; i >= 0; i--) { 2827ec681f3Smrg const VkSubpassDescription2 *subpass = &info->pSubpasses[i]; 2837ec681f3Smrg bool dst_implicit_dep = false; 2847ec681f3Smrg 2857ec681f3Smrg for (unsigned j = 0; j < subpass->inputAttachmentCount; j++) { 2867ec681f3Smrg uint32_t a = subpass->pInputAttachments[j].attachment; 2877ec681f3Smrg if (a == VK_ATTACHMENT_UNUSED) 2887ec681f3Smrg continue; 2897ec681f3Smrg if (att[a].finalLayout != subpass->pInputAttachments[j].layout && 2907ec681f3Smrg !att_used[a] && !has_external_dst[i]) 2917ec681f3Smrg dst_implicit_dep = true; 2927ec681f3Smrg att_used[a] = true; 2937ec681f3Smrg } 2947ec681f3Smrg 2957ec681f3Smrg for (unsigned j = 0; j < subpass->colorAttachmentCount; j++) { 2967ec681f3Smrg uint32_t a = subpass->pColorAttachments[j].attachment; 2977ec681f3Smrg if (a == VK_ATTACHMENT_UNUSED) 2987ec681f3Smrg continue; 2997ec681f3Smrg if (att[a].finalLayout != subpass->pColorAttachments[j].layout && 3007ec681f3Smrg !att_used[a] && !has_external_dst[i]) 3017ec681f3Smrg dst_implicit_dep = true; 3027ec681f3Smrg att_used[a] = true; 3037ec681f3Smrg } 3047ec681f3Smrg 3057ec681f3Smrg if (subpass->pDepthStencilAttachment && 3067ec681f3Smrg subpass->pDepthStencilAttachment->attachment != VK_ATTACHMENT_UNUSED) { 3077ec681f3Smrg uint32_t a = subpass->pDepthStencilAttachment->attachment; 3087ec681f3Smrg if (att[a].finalLayout != subpass->pDepthStencilAttachment->layout && 3097ec681f3Smrg !att_used[a] && !has_external_dst[i]) 3107ec681f3Smrg dst_implicit_dep = true; 3117ec681f3Smrg att_used[a] = true; 3127ec681f3Smrg } 3137ec681f3Smrg 3147ec681f3Smrg if (subpass->pResolveAttachments) { 3157ec681f3Smrg for (unsigned j = 0; j < subpass->colorAttachmentCount; j++) { 3167ec681f3Smrg uint32_t a = subpass->pResolveAttachments[j].attachment; 3177ec681f3Smrg if (a == VK_ATTACHMENT_UNUSED) 3187ec681f3Smrg continue; 3197ec681f3Smrg if (att[a].finalLayout != subpass->pResolveAttachments[j].layout && 3207ec681f3Smrg !att_used[a] && !has_external_dst[i]) 3217ec681f3Smrg dst_implicit_dep = true; 3227ec681f3Smrg att_used[a] = true; 3237ec681f3Smrg } 3247ec681f3Smrg } 3257ec681f3Smrg 3267ec681f3Smrg const VkSubpassDescriptionDepthStencilResolve *ds_resolve = 3277ec681f3Smrg vk_find_struct_const(subpass->pNext, SUBPASS_DESCRIPTION_DEPTH_STENCIL_RESOLVE_KHR); 3287ec681f3Smrg 3297ec681f3Smrg if (ds_resolve && ds_resolve->pDepthStencilResolveAttachment && 3307ec681f3Smrg ds_resolve->pDepthStencilResolveAttachment->attachment != VK_ATTACHMENT_UNUSED) { 3317ec681f3Smrg uint32_t a = ds_resolve->pDepthStencilResolveAttachment->attachment; 3327ec681f3Smrg if (att[a].finalLayout != subpass->pDepthStencilAttachment->layout && 3337ec681f3Smrg !att_used[a] && !has_external_dst[i]) 3347ec681f3Smrg dst_implicit_dep = true; 3357ec681f3Smrg att_used[a] = true; 3367ec681f3Smrg } 3377ec681f3Smrg 3387ec681f3Smrg if (dst_implicit_dep) { 3397ec681f3Smrg tu_render_pass_add_subpass_dep(pass, &(VkSubpassDependency2KHR) { 3407ec681f3Smrg .srcSubpass = i, 3417ec681f3Smrg .dstSubpass = VK_SUBPASS_EXTERNAL, 3427ec681f3Smrg .srcStageMask = VK_PIPELINE_STAGE_ALL_COMMANDS_BIT, 3437ec681f3Smrg .dstStageMask = VK_PIPELINE_STAGE_BOTTOM_OF_PIPE_BIT, 3447ec681f3Smrg .srcAccessMask = VK_ACCESS_INPUT_ATTACHMENT_READ_BIT | 3457ec681f3Smrg VK_ACCESS_COLOR_ATTACHMENT_READ_BIT | 3467ec681f3Smrg VK_ACCESS_COLOR_ATTACHMENT_WRITE_BIT | 3477ec681f3Smrg VK_ACCESS_DEPTH_STENCIL_ATTACHMENT_READ_BIT | 3487ec681f3Smrg VK_ACCESS_DEPTH_STENCIL_ATTACHMENT_WRITE_BIT, 3497ec681f3Smrg .dstAccessMask = 0, 3507ec681f3Smrg .dependencyFlags = 0, 3517ec681f3Smrg }); 3527ec681f3Smrg } 353361fc4cbSmaya } 354361fc4cbSmaya 3557ec681f3Smrg /* Handle UNDEFINED transitions, similar to the handling in tu_barrier(). 3567ec681f3Smrg * Assume that if an attachment has an initial layout of UNDEFINED, it gets 3577ec681f3Smrg * transitioned eventually. 3587ec681f3Smrg */ 3597ec681f3Smrg for (unsigned i = 0; i < info->attachmentCount; i++) { 3607ec681f3Smrg if (layout_undefined(att[i].initialLayout)) { 3617ec681f3Smrg if (vk_format_is_depth_or_stencil(att[i].format)) { 3627ec681f3Smrg pass->subpasses[0].start_barrier.incoherent_ccu_depth = true; 3637ec681f3Smrg } else { 3647ec681f3Smrg pass->subpasses[0].start_barrier.incoherent_ccu_color = true; 3657ec681f3Smrg } 366361fc4cbSmaya } 3677ec681f3Smrg } 3687ec681f3Smrg} 369361fc4cbSmaya 3707ec681f3Smrg/* If an input attachment is used without an intervening write to the same 3717ec681f3Smrg * attachment, then we can just use the original image, even in GMEM mode. 3727ec681f3Smrg * This is an optimization, but it's also important because it allows us to 3737ec681f3Smrg * avoid having to invalidate UCHE at the beginning of each tile due to it 3747ec681f3Smrg * becoming invalid. The only reads of GMEM via UCHE should be after an 3757ec681f3Smrg * earlier subpass modified it, which only works if there's already an 3767ec681f3Smrg * appropriate dependency that will add the CACHE_INVALIDATE anyway. We 3777ec681f3Smrg * don't consider this in the dependency code, so this is also required for 3787ec681f3Smrg * correctness. 3797ec681f3Smrg */ 3807ec681f3Smrgstatic void 3817ec681f3Smrgtu_render_pass_patch_input_gmem(struct tu_render_pass *pass) 3827ec681f3Smrg{ 3837ec681f3Smrg bool written[pass->attachment_count]; 384361fc4cbSmaya 3857ec681f3Smrg memset(written, 0, sizeof(written)); 386361fc4cbSmaya 3877ec681f3Smrg for (unsigned i = 0; i < pass->subpass_count; i++) { 3887ec681f3Smrg struct tu_subpass *subpass = &pass->subpasses[i]; 389361fc4cbSmaya 3907ec681f3Smrg for (unsigned j = 0; j < subpass->input_count; j++) { 3917ec681f3Smrg uint32_t a = subpass->input_attachments[j].attachment; 3927ec681f3Smrg if (a == VK_ATTACHMENT_UNUSED) 3937ec681f3Smrg continue; 3947ec681f3Smrg subpass->input_attachments[j].patch_input_gmem = written[a]; 3957ec681f3Smrg } 3967ec681f3Smrg 3977ec681f3Smrg for (unsigned j = 0; j < subpass->color_count; j++) { 3987ec681f3Smrg uint32_t a = subpass->color_attachments[j].attachment; 3997ec681f3Smrg if (a == VK_ATTACHMENT_UNUSED) 4007ec681f3Smrg continue; 4017ec681f3Smrg written[a] = true; 4027ec681f3Smrg 4037ec681f3Smrg for (unsigned k = 0; k < subpass->input_count; k++) { 4047ec681f3Smrg if (subpass->input_attachments[k].attachment == a && 4057ec681f3Smrg !subpass->input_attachments[k].patch_input_gmem) { 4067ec681f3Smrg /* For render feedback loops, we have no idea whether the use 4077ec681f3Smrg * as a color attachment or input attachment will come first, 4087ec681f3Smrg * so we have to always use GMEM in case the color attachment 4097ec681f3Smrg * comes first and defensively invalidate UCHE in case the 4107ec681f3Smrg * input attachment comes first. 4117ec681f3Smrg */ 4127ec681f3Smrg subpass->feedback_invalidate = true; 4137ec681f3Smrg subpass->input_attachments[k].patch_input_gmem = true; 4147ec681f3Smrg } 415361fc4cbSmaya } 416361fc4cbSmaya } 417361fc4cbSmaya 4187ec681f3Smrg for (unsigned j = 0; j < subpass->resolve_count; j++) { 4197ec681f3Smrg uint32_t a = subpass->resolve_attachments[j].attachment; 4207ec681f3Smrg if (a == VK_ATTACHMENT_UNUSED) 4217ec681f3Smrg continue; 4227ec681f3Smrg written[a] = true; 4237ec681f3Smrg } 424361fc4cbSmaya 4257ec681f3Smrg if (subpass->depth_stencil_attachment.attachment != VK_ATTACHMENT_UNUSED) { 4267ec681f3Smrg written[subpass->depth_stencil_attachment.attachment] = true; 4277ec681f3Smrg for (unsigned k = 0; k < subpass->input_count; k++) { 4287ec681f3Smrg if (subpass->input_attachments[k].attachment == 4297ec681f3Smrg subpass->depth_stencil_attachment.attachment && 4307ec681f3Smrg !subpass->input_attachments[k].patch_input_gmem) { 4317ec681f3Smrg subpass->feedback_invalidate = true; 4327ec681f3Smrg subpass->input_attachments[k].patch_input_gmem = true; 433361fc4cbSmaya } 434361fc4cbSmaya } 435361fc4cbSmaya } 4367ec681f3Smrg } 4377ec681f3Smrg} 438361fc4cbSmaya 4397ec681f3Smrgstatic void 4407ec681f3Smrgtu_render_pass_check_feedback_loop(struct tu_render_pass *pass) 4417ec681f3Smrg{ 4427ec681f3Smrg for (unsigned i = 0; i < pass->subpass_count; i++) { 4437ec681f3Smrg struct tu_subpass *subpass = &pass->subpasses[i]; 444361fc4cbSmaya 4457ec681f3Smrg for (unsigned j = 0; j < subpass->color_count; j++) { 4467ec681f3Smrg uint32_t a = subpass->color_attachments[j].attachment; 4477ec681f3Smrg if (a == VK_ATTACHMENT_UNUSED) 4487ec681f3Smrg continue; 4497ec681f3Smrg for (unsigned k = 0; k < subpass->input_count; k++) { 4507ec681f3Smrg if (subpass->input_attachments[k].attachment == a) { 4517ec681f3Smrg subpass->feedback = true; 4527ec681f3Smrg break; 453361fc4cbSmaya } 454361fc4cbSmaya } 455361fc4cbSmaya } 456361fc4cbSmaya 4577ec681f3Smrg if (subpass->depth_stencil_attachment.attachment != VK_ATTACHMENT_UNUSED) { 4587ec681f3Smrg for (unsigned k = 0; k < subpass->input_count; k++) { 4597ec681f3Smrg if (subpass->input_attachments[k].attachment == 4607ec681f3Smrg subpass->depth_stencil_attachment.attachment) { 4617ec681f3Smrg subpass->feedback = true; 4627ec681f3Smrg break; 4637ec681f3Smrg } 464361fc4cbSmaya } 465361fc4cbSmaya } 4667ec681f3Smrg } 4677ec681f3Smrg} 4687ec681f3Smrg 4697ec681f3Smrgstatic void update_samples(struct tu_subpass *subpass, 4707ec681f3Smrg VkSampleCountFlagBits samples) 4717ec681f3Smrg{ 4727ec681f3Smrg assert(subpass->samples == 0 || subpass->samples == samples); 4737ec681f3Smrg subpass->samples = samples; 4747ec681f3Smrg} 475361fc4cbSmaya 4767ec681f3Smrgstatic void 4777ec681f3Smrgtu_render_pass_gmem_config(struct tu_render_pass *pass, 4787ec681f3Smrg const struct tu_physical_device *phys_dev) 4797ec681f3Smrg{ 4807ec681f3Smrg uint32_t block_align_shift = 3; /* log2(gmem_align/(tile_align_w*tile_align_h)) */ 4817ec681f3Smrg uint32_t tile_align_w = phys_dev->info->tile_align_w; 4827ec681f3Smrg uint32_t gmem_align = (1 << block_align_shift) * tile_align_w * phys_dev->info->tile_align_h; 4837ec681f3Smrg 4847ec681f3Smrg /* calculate total bytes per pixel */ 4857ec681f3Smrg uint32_t cpp_total = 0; 4867ec681f3Smrg for (uint32_t i = 0; i < pass->attachment_count; i++) { 4877ec681f3Smrg struct tu_render_pass_attachment *att = &pass->attachments[i]; 4887ec681f3Smrg bool cpp1 = (att->cpp == 1); 4897ec681f3Smrg if (att->gmem_offset >= 0) { 4907ec681f3Smrg cpp_total += att->cpp; 4917ec681f3Smrg 4927ec681f3Smrg /* take into account the separate stencil: */ 4937ec681f3Smrg if (att->format == VK_FORMAT_D32_SFLOAT_S8_UINT) { 4947ec681f3Smrg cpp1 = (att->samples == 1); 4957ec681f3Smrg cpp_total += att->samples; 4967ec681f3Smrg } 4977ec681f3Smrg 4987ec681f3Smrg /* texture pitch must be aligned to 64, use a tile_align_w that is 4997ec681f3Smrg * a multiple of 64 for cpp==1 attachment to work as input attachment 5007ec681f3Smrg */ 5017ec681f3Smrg if (cpp1 && tile_align_w % 64 != 0) { 5027ec681f3Smrg tile_align_w *= 2; 5037ec681f3Smrg block_align_shift -= 1; 5047ec681f3Smrg } 5057ec681f3Smrg } 506361fc4cbSmaya } 507361fc4cbSmaya 5087ec681f3Smrg pass->tile_align_w = tile_align_w; 5097ec681f3Smrg 5107ec681f3Smrg /* no gmem attachments */ 5117ec681f3Smrg if (cpp_total == 0) { 5127ec681f3Smrg /* any value non-zero value so tiling config works with no attachments */ 5137ec681f3Smrg pass->gmem_pixels = 1024*1024; 5147ec681f3Smrg return; 5157ec681f3Smrg } 5167ec681f3Smrg 5177ec681f3Smrg /* TODO: using ccu_offset_gmem so that BLIT_OP_SCALE resolve path 5187ec681f3Smrg * doesn't break things. maybe there is a better solution? 5197ec681f3Smrg * TODO: this algorithm isn't optimal 5207ec681f3Smrg * for example, two attachments with cpp = {1, 4} 5217ec681f3Smrg * result: nblocks = {12, 52}, pixels = 196608 5227ec681f3Smrg * optimal: nblocks = {13, 51}, pixels = 208896 5237ec681f3Smrg */ 5247ec681f3Smrg uint32_t gmem_blocks = phys_dev->ccu_offset_gmem / gmem_align; 5257ec681f3Smrg uint32_t offset = 0, pixels = ~0u, i; 5267ec681f3Smrg for (i = 0; i < pass->attachment_count; i++) { 5277ec681f3Smrg struct tu_render_pass_attachment *att = &pass->attachments[i]; 5287ec681f3Smrg if (att->gmem_offset < 0) 5297ec681f3Smrg continue; 5307ec681f3Smrg 5317ec681f3Smrg att->gmem_offset = offset; 5327ec681f3Smrg 5337ec681f3Smrg uint32_t align = MAX2(1, att->cpp >> block_align_shift); 5347ec681f3Smrg uint32_t nblocks = MAX2((gmem_blocks * att->cpp / cpp_total) & ~(align - 1), align); 5357ec681f3Smrg 5367ec681f3Smrg if (nblocks > gmem_blocks) 5377ec681f3Smrg break; 5387ec681f3Smrg 5397ec681f3Smrg gmem_blocks -= nblocks; 5407ec681f3Smrg cpp_total -= att->cpp; 5417ec681f3Smrg offset += nblocks * gmem_align; 5427ec681f3Smrg pixels = MIN2(pixels, nblocks * gmem_align / att->cpp); 5437ec681f3Smrg 5447ec681f3Smrg /* repeat the same for separate stencil */ 5457ec681f3Smrg if (att->format == VK_FORMAT_D32_SFLOAT_S8_UINT) { 5467ec681f3Smrg att->gmem_offset_stencil = offset; 5477ec681f3Smrg 5487ec681f3Smrg /* note: for s8_uint, block align is always 1 */ 5497ec681f3Smrg uint32_t nblocks = gmem_blocks * att->samples / cpp_total; 5507ec681f3Smrg if (nblocks > gmem_blocks) 5517ec681f3Smrg break; 5527ec681f3Smrg 5537ec681f3Smrg gmem_blocks -= nblocks; 5547ec681f3Smrg cpp_total -= att->samples; 5557ec681f3Smrg offset += nblocks * gmem_align; 5567ec681f3Smrg pixels = MIN2(pixels, nblocks * gmem_align / att->samples); 557361fc4cbSmaya } 558361fc4cbSmaya } 559361fc4cbSmaya 5607ec681f3Smrg /* if the loop didn't complete then the gmem config is impossible */ 5617ec681f3Smrg if (i == pass->attachment_count) 5627ec681f3Smrg pass->gmem_pixels = pixels; 5637ec681f3Smrg} 564361fc4cbSmaya 5657ec681f3Smrgstatic void 5667ec681f3Smrgattachment_set_ops(struct tu_render_pass_attachment *att, 5677ec681f3Smrg VkAttachmentLoadOp load_op, 5687ec681f3Smrg VkAttachmentLoadOp stencil_load_op, 5697ec681f3Smrg VkAttachmentStoreOp store_op, 5707ec681f3Smrg VkAttachmentStoreOp stencil_store_op) 5717ec681f3Smrg{ 5727ec681f3Smrg /* load/store ops */ 5737ec681f3Smrg att->clear_mask = 5747ec681f3Smrg (load_op == VK_ATTACHMENT_LOAD_OP_CLEAR) ? VK_IMAGE_ASPECT_COLOR_BIT : 0; 5757ec681f3Smrg att->load = (load_op == VK_ATTACHMENT_LOAD_OP_LOAD); 5767ec681f3Smrg att->store = (store_op == VK_ATTACHMENT_STORE_OP_STORE); 5777ec681f3Smrg 5787ec681f3Smrg bool stencil_clear = (stencil_load_op == VK_ATTACHMENT_LOAD_OP_CLEAR); 5797ec681f3Smrg bool stencil_load = (stencil_load_op == VK_ATTACHMENT_LOAD_OP_LOAD); 5807ec681f3Smrg bool stencil_store = (stencil_store_op == VK_ATTACHMENT_STORE_OP_STORE); 5817ec681f3Smrg 5827ec681f3Smrg switch (att->format) { 5837ec681f3Smrg case VK_FORMAT_D24_UNORM_S8_UINT: /* || stencil load/store */ 5847ec681f3Smrg if (att->clear_mask) 5857ec681f3Smrg att->clear_mask = VK_IMAGE_ASPECT_DEPTH_BIT; 5867ec681f3Smrg if (stencil_clear) 5877ec681f3Smrg att->clear_mask |= VK_IMAGE_ASPECT_STENCIL_BIT; 5887ec681f3Smrg if (stencil_load) 5897ec681f3Smrg att->load = true; 5907ec681f3Smrg if (stencil_store) 5917ec681f3Smrg att->store = true; 5927ec681f3Smrg break; 5937ec681f3Smrg case VK_FORMAT_S8_UINT: /* replace load/store with stencil load/store */ 5947ec681f3Smrg att->clear_mask = stencil_clear ? VK_IMAGE_ASPECT_COLOR_BIT : 0; 5957ec681f3Smrg att->load = stencil_load; 5967ec681f3Smrg att->store = stencil_store; 5977ec681f3Smrg break; 5987ec681f3Smrg case VK_FORMAT_D32_SFLOAT_S8_UINT: /* separate stencil */ 5997ec681f3Smrg if (att->clear_mask) 6007ec681f3Smrg att->clear_mask = VK_IMAGE_ASPECT_DEPTH_BIT; 6017ec681f3Smrg if (stencil_clear) 6027ec681f3Smrg att->clear_mask |= VK_IMAGE_ASPECT_STENCIL_BIT; 6037ec681f3Smrg if (stencil_load) 6047ec681f3Smrg att->load_stencil = true; 6057ec681f3Smrg if (stencil_store) 6067ec681f3Smrg att->store_stencil = true; 6077ec681f3Smrg break; 6087ec681f3Smrg default: 6097ec681f3Smrg break; 6107ec681f3Smrg } 611361fc4cbSmaya} 612361fc4cbSmaya 6137ec681f3Smrgstatic bool 6147ec681f3Smrgis_depth_stencil_resolve_enabled(const VkSubpassDescriptionDepthStencilResolve *depth_stencil_resolve) 6157ec681f3Smrg{ 6167ec681f3Smrg if (depth_stencil_resolve && 6177ec681f3Smrg depth_stencil_resolve->pDepthStencilResolveAttachment && 6187ec681f3Smrg depth_stencil_resolve->pDepthStencilResolveAttachment->attachment != VK_ATTACHMENT_UNUSED) { 6197ec681f3Smrg return true; 6207ec681f3Smrg } 6217ec681f3Smrg return false; 6227ec681f3Smrg} 6237ec681f3Smrg 6247ec681f3SmrgVKAPI_ATTR VkResult VKAPI_CALL 6257ec681f3Smrgtu_CreateRenderPass2(VkDevice _device, 6267ec681f3Smrg const VkRenderPassCreateInfo2KHR *pCreateInfo, 6277ec681f3Smrg const VkAllocationCallbacks *pAllocator, 6287ec681f3Smrg VkRenderPass *pRenderPass) 629361fc4cbSmaya{ 630361fc4cbSmaya TU_FROM_HANDLE(tu_device, device, _device); 631361fc4cbSmaya struct tu_render_pass *pass; 632361fc4cbSmaya size_t size; 633361fc4cbSmaya size_t attachments_offset; 634361fc4cbSmaya 6357ec681f3Smrg assert(pCreateInfo->sType == VK_STRUCTURE_TYPE_RENDER_PASS_CREATE_INFO_2_KHR); 636361fc4cbSmaya 637361fc4cbSmaya size = sizeof(*pass); 638361fc4cbSmaya size += pCreateInfo->subpassCount * sizeof(pass->subpasses[0]); 639361fc4cbSmaya attachments_offset = size; 640361fc4cbSmaya size += pCreateInfo->attachmentCount * sizeof(pass->attachments[0]); 641361fc4cbSmaya 6427ec681f3Smrg pass = vk_object_zalloc(&device->vk, pAllocator, size, 6437ec681f3Smrg VK_OBJECT_TYPE_RENDER_PASS); 644361fc4cbSmaya if (pass == NULL) 6457ec681f3Smrg return vk_error(device, VK_ERROR_OUT_OF_HOST_MEMORY); 646361fc4cbSmaya 647361fc4cbSmaya pass->attachment_count = pCreateInfo->attachmentCount; 648361fc4cbSmaya pass->subpass_count = pCreateInfo->subpassCount; 649361fc4cbSmaya pass->attachments = (void *) pass + attachments_offset; 650361fc4cbSmaya 651361fc4cbSmaya for (uint32_t i = 0; i < pCreateInfo->attachmentCount; i++) { 652361fc4cbSmaya struct tu_render_pass_attachment *att = &pass->attachments[i]; 653361fc4cbSmaya 654361fc4cbSmaya att->format = pCreateInfo->pAttachments[i].format; 655361fc4cbSmaya att->samples = pCreateInfo->pAttachments[i].samples; 6567ec681f3Smrg /* for d32s8, cpp is for the depth image, and 6577ec681f3Smrg * att->samples will be used as the cpp for the stencil image 6587ec681f3Smrg */ 6597ec681f3Smrg if (att->format == VK_FORMAT_D32_SFLOAT_S8_UINT) 6607ec681f3Smrg att->cpp = 4 * att->samples; 6617ec681f3Smrg else 6627ec681f3Smrg att->cpp = vk_format_get_blocksize(att->format) * att->samples; 6637ec681f3Smrg att->gmem_offset = -1; 6647ec681f3Smrg 6657ec681f3Smrg attachment_set_ops(att, 6667ec681f3Smrg pCreateInfo->pAttachments[i].loadOp, 6677ec681f3Smrg pCreateInfo->pAttachments[i].stencilLoadOp, 6687ec681f3Smrg pCreateInfo->pAttachments[i].storeOp, 6697ec681f3Smrg pCreateInfo->pAttachments[i].stencilStoreOp); 670361fc4cbSmaya } 671361fc4cbSmaya uint32_t subpass_attachment_count = 0; 672361fc4cbSmaya struct tu_subpass_attachment *p; 673361fc4cbSmaya for (uint32_t i = 0; i < pCreateInfo->subpassCount; i++) { 6747ec681f3Smrg const VkSubpassDescription2 *desc = &pCreateInfo->pSubpasses[i]; 6757ec681f3Smrg const VkSubpassDescriptionDepthStencilResolve *ds_resolve = 6767ec681f3Smrg vk_find_struct_const(desc->pNext, SUBPASS_DESCRIPTION_DEPTH_STENCIL_RESOLVE_KHR); 677361fc4cbSmaya 678361fc4cbSmaya subpass_attachment_count += 679361fc4cbSmaya desc->inputAttachmentCount + desc->colorAttachmentCount + 680361fc4cbSmaya (desc->pResolveAttachments ? desc->colorAttachmentCount : 0) + 6817ec681f3Smrg (is_depth_stencil_resolve_enabled(ds_resolve) ? 1 : 0); 682361fc4cbSmaya } 683361fc4cbSmaya 684361fc4cbSmaya if (subpass_attachment_count) { 685361fc4cbSmaya pass->subpass_attachments = vk_alloc2( 6867ec681f3Smrg &device->vk.alloc, pAllocator, 687361fc4cbSmaya subpass_attachment_count * sizeof(struct tu_subpass_attachment), 8, 688361fc4cbSmaya VK_SYSTEM_ALLOCATION_SCOPE_OBJECT); 689361fc4cbSmaya if (pass->subpass_attachments == NULL) { 6907ec681f3Smrg vk_object_free(&device->vk, pAllocator, pass); 6917ec681f3Smrg return vk_error(device, VK_ERROR_OUT_OF_HOST_MEMORY); 692361fc4cbSmaya } 693361fc4cbSmaya } else 694361fc4cbSmaya pass->subpass_attachments = NULL; 695361fc4cbSmaya 696361fc4cbSmaya p = pass->subpass_attachments; 697361fc4cbSmaya for (uint32_t i = 0; i < pCreateInfo->subpassCount; i++) { 6987ec681f3Smrg const VkSubpassDescription2 *desc = &pCreateInfo->pSubpasses[i]; 6997ec681f3Smrg const VkSubpassDescriptionDepthStencilResolve *ds_resolve = 7007ec681f3Smrg vk_find_struct_const(desc->pNext, SUBPASS_DESCRIPTION_DEPTH_STENCIL_RESOLVE_KHR); 701361fc4cbSmaya struct tu_subpass *subpass = &pass->subpasses[i]; 702361fc4cbSmaya 703361fc4cbSmaya subpass->input_count = desc->inputAttachmentCount; 704361fc4cbSmaya subpass->color_count = desc->colorAttachmentCount; 7057ec681f3Smrg subpass->resolve_count = 0; 7067ec681f3Smrg subpass->resolve_depth_stencil = is_depth_stencil_resolve_enabled(ds_resolve); 7077ec681f3Smrg subpass->samples = 0; 7087ec681f3Smrg subpass->srgb_cntl = 0; 7097ec681f3Smrg 7107ec681f3Smrg subpass->multiview_mask = desc->viewMask; 711361fc4cbSmaya 712361fc4cbSmaya if (desc->inputAttachmentCount > 0) { 713361fc4cbSmaya subpass->input_attachments = p; 714361fc4cbSmaya p += desc->inputAttachmentCount; 715361fc4cbSmaya 716361fc4cbSmaya for (uint32_t j = 0; j < desc->inputAttachmentCount; j++) { 7177ec681f3Smrg uint32_t a = desc->pInputAttachments[j].attachment; 7187ec681f3Smrg subpass->input_attachments[j].attachment = a; 7197ec681f3Smrg /* Note: attachments only used as input attachments will be read 7207ec681f3Smrg * directly instead of through gmem, so we don't mark input 7217ec681f3Smrg * attachments as needing gmem. 7227ec681f3Smrg */ 723361fc4cbSmaya } 724361fc4cbSmaya } 725361fc4cbSmaya 726361fc4cbSmaya if (desc->colorAttachmentCount > 0) { 727361fc4cbSmaya subpass->color_attachments = p; 728361fc4cbSmaya p += desc->colorAttachmentCount; 729361fc4cbSmaya 730361fc4cbSmaya for (uint32_t j = 0; j < desc->colorAttachmentCount; j++) { 7317ec681f3Smrg uint32_t a = desc->pColorAttachments[j].attachment; 7327ec681f3Smrg subpass->color_attachments[j].attachment = a; 7337ec681f3Smrg 7347ec681f3Smrg if (a != VK_ATTACHMENT_UNUSED) { 7357ec681f3Smrg pass->attachments[a].gmem_offset = 0; 7367ec681f3Smrg update_samples(subpass, pCreateInfo->pAttachments[a].samples); 7377ec681f3Smrg 7387ec681f3Smrg if (vk_format_is_srgb(pass->attachments[a].format)) 7397ec681f3Smrg subpass->srgb_cntl |= 1 << j; 7407ec681f3Smrg 7417ec681f3Smrg pass->attachments[a].clear_views |= subpass->multiview_mask; 742361fc4cbSmaya } 743361fc4cbSmaya } 744361fc4cbSmaya } 745361fc4cbSmaya 7467ec681f3Smrg subpass->resolve_attachments = (desc->pResolveAttachments || subpass->resolve_depth_stencil) ? p : NULL; 747361fc4cbSmaya if (desc->pResolveAttachments) { 748361fc4cbSmaya p += desc->colorAttachmentCount; 7497ec681f3Smrg subpass->resolve_count += desc->colorAttachmentCount; 750361fc4cbSmaya for (uint32_t j = 0; j < desc->colorAttachmentCount; j++) { 7517ec681f3Smrg subpass->resolve_attachments[j].attachment = 7527ec681f3Smrg desc->pResolveAttachments[j].attachment; 753361fc4cbSmaya } 754361fc4cbSmaya } 755361fc4cbSmaya 7567ec681f3Smrg if (subpass->resolve_depth_stencil) { 7577ec681f3Smrg p++; 7587ec681f3Smrg subpass->resolve_count++; 7597ec681f3Smrg uint32_t a = ds_resolve->pDepthStencilResolveAttachment->attachment; 7607ec681f3Smrg subpass->resolve_attachments[subpass->resolve_count - 1].attachment = a; 761361fc4cbSmaya } 762361fc4cbSmaya 7637ec681f3Smrg uint32_t a = desc->pDepthStencilAttachment ? 7647ec681f3Smrg desc->pDepthStencilAttachment->attachment : VK_ATTACHMENT_UNUSED; 7657ec681f3Smrg subpass->depth_stencil_attachment.attachment = a; 7667ec681f3Smrg if (a != VK_ATTACHMENT_UNUSED) { 7677ec681f3Smrg pass->attachments[a].gmem_offset = 0; 7687ec681f3Smrg update_samples(subpass, pCreateInfo->pAttachments[a].samples); 7697ec681f3Smrg 7707ec681f3Smrg pass->attachments[a].clear_views |= subpass->multiview_mask; 7717ec681f3Smrg } 772361fc4cbSmaya } 773361fc4cbSmaya 7747ec681f3Smrg tu_render_pass_patch_input_gmem(pass); 7757ec681f3Smrg 7767ec681f3Smrg tu_render_pass_check_feedback_loop(pass); 7777ec681f3Smrg 7787ec681f3Smrg /* disable unused attachments */ 7797ec681f3Smrg for (uint32_t i = 0; i < pass->attachment_count; i++) { 7807ec681f3Smrg struct tu_render_pass_attachment *att = &pass->attachments[i]; 7817ec681f3Smrg if (att->gmem_offset < 0) { 7827ec681f3Smrg att->clear_mask = 0; 7837ec681f3Smrg att->load = false; 784361fc4cbSmaya } 785361fc4cbSmaya } 786361fc4cbSmaya 7877ec681f3Smrg /* From the VK_KHR_multiview spec: 7887ec681f3Smrg * 7897ec681f3Smrg * Multiview is all-or-nothing for a render pass - that is, either all 7907ec681f3Smrg * subpasses must have a non-zero view mask (though some subpasses may 7917ec681f3Smrg * have only one view) or all must be zero. 7927ec681f3Smrg * 7937ec681f3Smrg * This means we only have to check one of the view masks. 7947ec681f3Smrg */ 7957ec681f3Smrg if (pCreateInfo->pSubpasses[0].viewMask) { 7967ec681f3Smrg /* It seems multiview must use sysmem rendering. */ 7977ec681f3Smrg pass->gmem_pixels = 0; 7987ec681f3Smrg } else { 7997ec681f3Smrg tu_render_pass_gmem_config(pass, device->physical_device); 8007ec681f3Smrg } 8017ec681f3Smrg 8027ec681f3Smrg for (unsigned i = 0; i < pCreateInfo->dependencyCount; ++i) { 8037ec681f3Smrg tu_render_pass_add_subpass_dep(pass, &pCreateInfo->pDependencies[i]); 8047ec681f3Smrg } 8057ec681f3Smrg 8067ec681f3Smrg tu_render_pass_add_implicit_deps(pass, pCreateInfo); 8077ec681f3Smrg 808361fc4cbSmaya *pRenderPass = tu_render_pass_to_handle(pass); 809361fc4cbSmaya 810361fc4cbSmaya return VK_SUCCESS; 811361fc4cbSmaya} 812361fc4cbSmaya 8137ec681f3SmrgVKAPI_ATTR void VKAPI_CALL 814361fc4cbSmayatu_DestroyRenderPass(VkDevice _device, 815361fc4cbSmaya VkRenderPass _pass, 816361fc4cbSmaya const VkAllocationCallbacks *pAllocator) 817361fc4cbSmaya{ 818361fc4cbSmaya TU_FROM_HANDLE(tu_device, device, _device); 819361fc4cbSmaya TU_FROM_HANDLE(tu_render_pass, pass, _pass); 820361fc4cbSmaya 821361fc4cbSmaya if (!_pass) 822361fc4cbSmaya return; 8237ec681f3Smrg 8247ec681f3Smrg vk_free2(&device->vk.alloc, pAllocator, pass->subpass_attachments); 8257ec681f3Smrg vk_object_free(&device->vk, pAllocator, pass); 826361fc4cbSmaya} 827361fc4cbSmaya 8287ec681f3SmrgVKAPI_ATTR void VKAPI_CALL 829361fc4cbSmayatu_GetRenderAreaGranularity(VkDevice _device, 830361fc4cbSmaya VkRenderPass renderPass, 831361fc4cbSmaya VkExtent2D *pGranularity) 832361fc4cbSmaya{ 833361fc4cbSmaya TU_FROM_HANDLE(tu_device, device, _device); 8347ec681f3Smrg pGranularity->width = device->physical_device->info->gmem_align_w; 8357ec681f3Smrg pGranularity->height = device->physical_device->info->gmem_align_h; 8367ec681f3Smrg} 8377ec681f3Smrg 8387ec681f3Smrguint32_t 8397ec681f3Smrgtu_subpass_get_attachment_to_resolve(const struct tu_subpass *subpass, uint32_t index) 8407ec681f3Smrg{ 8417ec681f3Smrg if (subpass->resolve_depth_stencil && 8427ec681f3Smrg index == (subpass->resolve_count - 1)) 8437ec681f3Smrg return subpass->depth_stencil_attachment.attachment; 844361fc4cbSmaya 8457ec681f3Smrg return subpass->color_attachments[index].attachment; 846361fc4cbSmaya} 847