1361fc4cbSmaya/*
2361fc4cbSmaya * Copyright © 2016 Red Hat.
3361fc4cbSmaya * Copyright © 2016 Bas Nieuwenhuizen
4361fc4cbSmaya *
5361fc4cbSmaya * based in part on anv driver which is:
6361fc4cbSmaya * Copyright © 2015 Intel Corporation
7361fc4cbSmaya *
8361fc4cbSmaya * Permission is hereby granted, free of charge, to any person obtaining a
9361fc4cbSmaya * copy of this software and associated documentation files (the "Software"),
10361fc4cbSmaya * to deal in the Software without restriction, including without limitation
11361fc4cbSmaya * the rights to use, copy, modify, merge, publish, distribute, sublicense,
12361fc4cbSmaya * and/or sell copies of the Software, and to permit persons to whom the
13361fc4cbSmaya * Software is furnished to do so, subject to the following conditions:
14361fc4cbSmaya *
15361fc4cbSmaya * The above copyright notice and this permission notice (including the next
16361fc4cbSmaya * paragraph) shall be included in all copies or substantial portions of the
17361fc4cbSmaya * Software.
18361fc4cbSmaya *
19361fc4cbSmaya * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
20361fc4cbSmaya * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
21361fc4cbSmaya * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT.  IN NO EVENT SHALL
22361fc4cbSmaya * THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
23361fc4cbSmaya * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING
24361fc4cbSmaya * FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER
25361fc4cbSmaya * DEALINGS IN THE SOFTWARE.
26361fc4cbSmaya */
27361fc4cbSmaya#include "tu_private.h"
28361fc4cbSmaya
29361fc4cbSmaya#include "vk_util.h"
307ec681f3Smrg#include "vk_format.h"
31361fc4cbSmaya
327ec681f3Smrg/* Return true if we have to fallback to sysmem rendering because the
337ec681f3Smrg * dependency can't be satisfied with tiled rendering.
347ec681f3Smrg */
357ec681f3Smrg
367ec681f3Smrgstatic bool
377ec681f3Smrgdep_invalid_for_gmem(const VkSubpassDependency2 *dep)
38361fc4cbSmaya{
397ec681f3Smrg   /* External dependencies don't matter here. */
407ec681f3Smrg   if (dep->srcSubpass == VK_SUBPASS_EXTERNAL ||
417ec681f3Smrg       dep->dstSubpass == VK_SUBPASS_EXTERNAL)
427ec681f3Smrg      return false;
437ec681f3Smrg
447ec681f3Smrg   /* We can conceptually break down the process of rewriting a sysmem
457ec681f3Smrg    * renderpass into a gmem one into two parts:
467ec681f3Smrg    *
477ec681f3Smrg    * 1. Split each draw and multisample resolve into N copies, one for each
487ec681f3Smrg    * bin. (If hardware binning, add one more copy where the FS is disabled
497ec681f3Smrg    * for the binning pass). This is always allowed because the vertex stage
507ec681f3Smrg    * is allowed to run an arbitrary number of times and there are no extra
517ec681f3Smrg    * ordering constraints within a draw.
527ec681f3Smrg    * 2. Take the last copy of the second-to-last draw and slide it down to
537ec681f3Smrg    * before the last copy of the last draw. Repeat for each earlier draw
547ec681f3Smrg    * until the draw pass for the last bin is complete, then repeat for each
557ec681f3Smrg    * earlier bin until we finish with the first bin.
567ec681f3Smrg    *
577ec681f3Smrg    * During this rearranging process, we can't slide draws past each other in
587ec681f3Smrg    * a way that breaks the subpass dependencies. For each draw, we must slide
597ec681f3Smrg    * it past (copies of) the rest of the draws in the renderpass. We can
607ec681f3Smrg    * slide a draw past another if there isn't a dependency between them, or
617ec681f3Smrg    * if the dependenc(ies) are dependencies between framebuffer-space stages
627ec681f3Smrg    * only with the BY_REGION bit set. Note that this includes
637ec681f3Smrg    * self-dependencies, since these may result in pipeline barriers that also
647ec681f3Smrg    * break the rearranging process.
657ec681f3Smrg    */
667ec681f3Smrg
677ec681f3Smrg   /* This is straight from the Vulkan 1.2 spec, section 6.1.4 "Framebuffer
687ec681f3Smrg    * Region Dependencies":
697ec681f3Smrg    */
707ec681f3Smrg   const VkPipelineStageFlags framebuffer_space_stages =
717ec681f3Smrg      VK_PIPELINE_STAGE_FRAGMENT_SHADER_BIT |
727ec681f3Smrg      VK_PIPELINE_STAGE_EARLY_FRAGMENT_TESTS_BIT |
737ec681f3Smrg      VK_PIPELINE_STAGE_LATE_FRAGMENT_TESTS_BIT |
747ec681f3Smrg      VK_PIPELINE_STAGE_COLOR_ATTACHMENT_OUTPUT_BIT;
757ec681f3Smrg
767ec681f3Smrg   return
777ec681f3Smrg      (dep->srcStageMask & ~framebuffer_space_stages) ||
787ec681f3Smrg      (dep->dstStageMask & ~framebuffer_space_stages) ||
797ec681f3Smrg      !(dep->dependencyFlags & VK_DEPENDENCY_BY_REGION_BIT);
807ec681f3Smrg}
81361fc4cbSmaya
827ec681f3Smrgstatic void
837ec681f3Smrgtu_render_pass_add_subpass_dep(struct tu_render_pass *pass,
847ec681f3Smrg                               const VkSubpassDependency2 *dep)
857ec681f3Smrg{
867ec681f3Smrg   uint32_t src = dep->srcSubpass;
877ec681f3Smrg   uint32_t dst = dep->dstSubpass;
887ec681f3Smrg
897ec681f3Smrg   /* Ignore subpass self-dependencies as they allow the app to call
907ec681f3Smrg    * vkCmdPipelineBarrier() inside the render pass and the driver should only
917ec681f3Smrg    * do the barrier when called, not when starting the render pass.
927ec681f3Smrg    *
937ec681f3Smrg    * We cannot decide whether to allow gmem rendering before a barrier
947ec681f3Smrg    * is actually emitted, so we delay the decision until then.
957ec681f3Smrg    */
967ec681f3Smrg   if (src == dst)
977ec681f3Smrg      return;
98361fc4cbSmaya
997ec681f3Smrg   if (dep_invalid_for_gmem(dep))
1007ec681f3Smrg      pass->gmem_pixels = 0;
101361fc4cbSmaya
1027ec681f3Smrg   struct tu_subpass_barrier *dst_barrier;
1037ec681f3Smrg   if (dst == VK_SUBPASS_EXTERNAL) {
1047ec681f3Smrg      dst_barrier = &pass->end_barrier;
1057ec681f3Smrg   } else {
1067ec681f3Smrg      dst_barrier = &pass->subpasses[dst].start_barrier;
1077ec681f3Smrg   }
108361fc4cbSmaya
1097ec681f3Smrg   dst_barrier->src_stage_mask |= dep->srcStageMask;
1107ec681f3Smrg   dst_barrier->dst_stage_mask |= dep->dstStageMask;
1117ec681f3Smrg   dst_barrier->src_access_mask |= dep->srcAccessMask;
1127ec681f3Smrg   dst_barrier->dst_access_mask |= dep->dstAccessMask;
1137ec681f3Smrg}
114361fc4cbSmaya
1157ec681f3Smrg/* We currently only care about undefined layouts, because we have to
1167ec681f3Smrg * flush/invalidate CCU for those. PREINITIALIZED is the same thing as
1177ec681f3Smrg * UNDEFINED for anything not linear tiled, but we don't know yet whether the
1187ec681f3Smrg * images used are tiled, so just assume they are.
1197ec681f3Smrg */
1207ec681f3Smrg
1217ec681f3Smrgstatic bool
1227ec681f3Smrglayout_undefined(VkImageLayout layout)
1237ec681f3Smrg{
1247ec681f3Smrg   return layout == VK_IMAGE_LAYOUT_UNDEFINED ||
1257ec681f3Smrg          layout == VK_IMAGE_LAYOUT_PREINITIALIZED;
1267ec681f3Smrg}
1277ec681f3Smrg
1287ec681f3Smrg/* This implements the following bit of spec text:
1297ec681f3Smrg *
1307ec681f3Smrg *    If there is no subpass dependency from VK_SUBPASS_EXTERNAL to the
1317ec681f3Smrg *    first subpass that uses an attachment, then an implicit subpass
1327ec681f3Smrg *    dependency exists from VK_SUBPASS_EXTERNAL to the first subpass it is
1337ec681f3Smrg *    used in. The implicit subpass dependency only exists if there
1347ec681f3Smrg *    exists an automatic layout transition away from initialLayout.
1357ec681f3Smrg *    The subpass dependency operates as if defined with the
1367ec681f3Smrg *    following parameters:
1377ec681f3Smrg *
1387ec681f3Smrg *    VkSubpassDependency implicitDependency = {
1397ec681f3Smrg *        .srcSubpass = VK_SUBPASS_EXTERNAL;
1407ec681f3Smrg *        .dstSubpass = firstSubpass; // First subpass attachment is used in
1417ec681f3Smrg *        .srcStageMask = VK_PIPELINE_STAGE_TOP_OF_PIPE_BIT;
1427ec681f3Smrg *        .dstStageMask = VK_PIPELINE_STAGE_ALL_COMMANDS_BIT;
1437ec681f3Smrg *        .srcAccessMask = 0;
1447ec681f3Smrg *        .dstAccessMask = VK_ACCESS_INPUT_ATTACHMENT_READ_BIT |
1457ec681f3Smrg *                         VK_ACCESS_COLOR_ATTACHMENT_READ_BIT |
1467ec681f3Smrg *                         VK_ACCESS_COLOR_ATTACHMENT_WRITE_BIT |
1477ec681f3Smrg *                         VK_ACCESS_DEPTH_STENCIL_ATTACHMENT_READ_BIT |
1487ec681f3Smrg *                         VK_ACCESS_DEPTH_STENCIL_ATTACHMENT_WRITE_BIT;
1497ec681f3Smrg *        .dependencyFlags = 0;
1507ec681f3Smrg *    };
1517ec681f3Smrg *
1527ec681f3Smrg *    Similarly, if there is no subpass dependency from the last subpass
1537ec681f3Smrg *    that uses an attachment to VK_SUBPASS_EXTERNAL, then an implicit
1547ec681f3Smrg *    subpass dependency exists from the last subpass it is used in to
1557ec681f3Smrg *    VK_SUBPASS_EXTERNAL. The implicit subpass dependency only exists
1567ec681f3Smrg *    if there exists an automatic layout transition into finalLayout.
1577ec681f3Smrg *    The subpass dependency operates as if defined with the following
1587ec681f3Smrg *    parameters:
1597ec681f3Smrg *
1607ec681f3Smrg *    VkSubpassDependency implicitDependency = {
1617ec681f3Smrg *        .srcSubpass = lastSubpass; // Last subpass attachment is used in
1627ec681f3Smrg *        .dstSubpass = VK_SUBPASS_EXTERNAL;
1637ec681f3Smrg *        .srcStageMask = VK_PIPELINE_STAGE_ALL_COMMANDS_BIT;
1647ec681f3Smrg *        .dstStageMask = VK_PIPELINE_STAGE_BOTTOM_OF_PIPE_BIT;
1657ec681f3Smrg *        .srcAccessMask = VK_ACCESS_INPUT_ATTACHMENT_READ_BIT |
1667ec681f3Smrg *                         VK_ACCESS_COLOR_ATTACHMENT_READ_BIT |
1677ec681f3Smrg *                         VK_ACCESS_COLOR_ATTACHMENT_WRITE_BIT |
1687ec681f3Smrg *                         VK_ACCESS_DEPTH_STENCIL_ATTACHMENT_READ_BIT |
1697ec681f3Smrg *                         VK_ACCESS_DEPTH_STENCIL_ATTACHMENT_WRITE_BIT;
1707ec681f3Smrg *        .dstAccessMask = 0;
1717ec681f3Smrg *        .dependencyFlags = 0;
1727ec681f3Smrg *    };
1737ec681f3Smrg *
1747ec681f3Smrg * Note: currently this is the only use we have for layout transitions,
1757ec681f3Smrg * besides needing to invalidate CCU at the beginning, so we also flag
1767ec681f3Smrg * transitions from UNDEFINED here.
1777ec681f3Smrg */
1787ec681f3Smrgstatic void
1797ec681f3Smrgtu_render_pass_add_implicit_deps(struct tu_render_pass *pass,
1807ec681f3Smrg                                 const VkRenderPassCreateInfo2 *info)
1817ec681f3Smrg{
1827ec681f3Smrg   const VkAttachmentDescription2* att = info->pAttachments;
1837ec681f3Smrg   bool has_external_src[info->subpassCount];
1847ec681f3Smrg   bool has_external_dst[info->subpassCount];
1857ec681f3Smrg   bool att_used[pass->attachment_count];
1867ec681f3Smrg
1877ec681f3Smrg   memset(has_external_src, 0, sizeof(has_external_src));
1887ec681f3Smrg   memset(has_external_dst, 0, sizeof(has_external_dst));
1897ec681f3Smrg
1907ec681f3Smrg   for (uint32_t i = 0; i < info->dependencyCount; i++) {
1917ec681f3Smrg      uint32_t src = info->pDependencies[i].srcSubpass;
1927ec681f3Smrg      uint32_t dst = info->pDependencies[i].dstSubpass;
1937ec681f3Smrg
1947ec681f3Smrg      if (src == dst)
1957ec681f3Smrg         continue;
1967ec681f3Smrg
1977ec681f3Smrg      if (src == VK_SUBPASS_EXTERNAL)
1987ec681f3Smrg         has_external_src[dst] = true;
1997ec681f3Smrg      if (dst == VK_SUBPASS_EXTERNAL)
2007ec681f3Smrg         has_external_dst[src] = true;
201361fc4cbSmaya   }
202361fc4cbSmaya
2037ec681f3Smrg   memset(att_used, 0, sizeof(att_used));
204361fc4cbSmaya
2057ec681f3Smrg   for (unsigned i = 0; i < info->subpassCount; i++) {
2067ec681f3Smrg      const VkSubpassDescription2 *subpass = &info->pSubpasses[i];
2077ec681f3Smrg      bool src_implicit_dep = false;
2087ec681f3Smrg
2097ec681f3Smrg      for (unsigned j = 0; j < subpass->inputAttachmentCount; j++) {
2107ec681f3Smrg         uint32_t a = subpass->pInputAttachments[j].attachment;
2117ec681f3Smrg         if (a == VK_ATTACHMENT_UNUSED)
2127ec681f3Smrg            continue;
2137ec681f3Smrg         if (att[a].initialLayout != subpass->pInputAttachments[j].layout &&
2147ec681f3Smrg             !att_used[a] && !has_external_src[i])
2157ec681f3Smrg            src_implicit_dep = true;
2167ec681f3Smrg         att_used[a] = true;
2177ec681f3Smrg      }
2187ec681f3Smrg
2197ec681f3Smrg      for (unsigned j = 0; j < subpass->colorAttachmentCount; j++) {
2207ec681f3Smrg         uint32_t a = subpass->pColorAttachments[j].attachment;
2217ec681f3Smrg         if (a == VK_ATTACHMENT_UNUSED)
2227ec681f3Smrg            continue;
2237ec681f3Smrg         if (att[a].initialLayout != subpass->pColorAttachments[j].layout &&
2247ec681f3Smrg             !att_used[a] && !has_external_src[i])
2257ec681f3Smrg            src_implicit_dep = true;
2267ec681f3Smrg         att_used[a] = true;
2277ec681f3Smrg      }
2287ec681f3Smrg
2297ec681f3Smrg      if (subpass->pDepthStencilAttachment &&
2307ec681f3Smrg          subpass->pDepthStencilAttachment->attachment != VK_ATTACHMENT_UNUSED) {
2317ec681f3Smrg         uint32_t a = subpass->pDepthStencilAttachment->attachment;
2327ec681f3Smrg         if (att[a].initialLayout != subpass->pDepthStencilAttachment->layout &&
2337ec681f3Smrg             !att_used[a] && !has_external_src[i])
2347ec681f3Smrg            src_implicit_dep = true;
2357ec681f3Smrg         att_used[a] = true;
2367ec681f3Smrg      }
2377ec681f3Smrg
2387ec681f3Smrg      if (subpass->pResolveAttachments) {
2397ec681f3Smrg         for (unsigned j = 0; j < subpass->colorAttachmentCount; j++) {
2407ec681f3Smrg            uint32_t a = subpass->pResolveAttachments[j].attachment;
2417ec681f3Smrg            if (a == VK_ATTACHMENT_UNUSED)
2427ec681f3Smrg               continue;
2437ec681f3Smrg            if (att[a].initialLayout != subpass->pResolveAttachments[j].layout &&
2447ec681f3Smrg               !att_used[a] && !has_external_src[i])
2457ec681f3Smrg               src_implicit_dep = true;
2467ec681f3Smrg            att_used[a] = true;
2477ec681f3Smrg         }
2487ec681f3Smrg      }
2497ec681f3Smrg
2507ec681f3Smrg      const VkSubpassDescriptionDepthStencilResolve *ds_resolve =
2517ec681f3Smrg         vk_find_struct_const(subpass->pNext, SUBPASS_DESCRIPTION_DEPTH_STENCIL_RESOLVE_KHR);
2527ec681f3Smrg
2537ec681f3Smrg      if (ds_resolve && ds_resolve->pDepthStencilResolveAttachment &&
2547ec681f3Smrg          ds_resolve->pDepthStencilResolveAttachment->attachment != VK_ATTACHMENT_UNUSED) {
2557ec681f3Smrg            uint32_t a = ds_resolve->pDepthStencilResolveAttachment->attachment;
2567ec681f3Smrg            if (att[a].initialLayout != subpass->pDepthStencilAttachment->layout &&
2577ec681f3Smrg               !att_used[a] && !has_external_src[i])
2587ec681f3Smrg               src_implicit_dep = true;
2597ec681f3Smrg            att_used[a] = true;
2607ec681f3Smrg      }
2617ec681f3Smrg
2627ec681f3Smrg      if (src_implicit_dep) {
2637ec681f3Smrg         tu_render_pass_add_subpass_dep(pass, &(VkSubpassDependency2KHR) {
2647ec681f3Smrg            .srcSubpass = VK_SUBPASS_EXTERNAL,
2657ec681f3Smrg            .dstSubpass = i,
2667ec681f3Smrg            .srcStageMask = VK_PIPELINE_STAGE_TOP_OF_PIPE_BIT,
2677ec681f3Smrg            .dstStageMask = VK_PIPELINE_STAGE_ALL_COMMANDS_BIT,
2687ec681f3Smrg            .srcAccessMask = 0,
2697ec681f3Smrg            .dstAccessMask = VK_ACCESS_INPUT_ATTACHMENT_READ_BIT |
2707ec681f3Smrg                             VK_ACCESS_COLOR_ATTACHMENT_READ_BIT |
2717ec681f3Smrg                             VK_ACCESS_COLOR_ATTACHMENT_WRITE_BIT |
2727ec681f3Smrg                             VK_ACCESS_DEPTH_STENCIL_ATTACHMENT_READ_BIT |
2737ec681f3Smrg                             VK_ACCESS_DEPTH_STENCIL_ATTACHMENT_WRITE_BIT,
2747ec681f3Smrg            .dependencyFlags = 0,
2757ec681f3Smrg         });
2767ec681f3Smrg      }
277361fc4cbSmaya   }
278361fc4cbSmaya
2797ec681f3Smrg   memset(att_used, 0, sizeof(att_used));
2807ec681f3Smrg
2817ec681f3Smrg   for (int i = info->subpassCount - 1; i >= 0; i--) {
2827ec681f3Smrg      const VkSubpassDescription2 *subpass = &info->pSubpasses[i];
2837ec681f3Smrg      bool dst_implicit_dep = false;
2847ec681f3Smrg
2857ec681f3Smrg      for (unsigned j = 0; j < subpass->inputAttachmentCount; j++) {
2867ec681f3Smrg         uint32_t a = subpass->pInputAttachments[j].attachment;
2877ec681f3Smrg         if (a == VK_ATTACHMENT_UNUSED)
2887ec681f3Smrg            continue;
2897ec681f3Smrg         if (att[a].finalLayout != subpass->pInputAttachments[j].layout &&
2907ec681f3Smrg             !att_used[a] && !has_external_dst[i])
2917ec681f3Smrg            dst_implicit_dep = true;
2927ec681f3Smrg         att_used[a] = true;
2937ec681f3Smrg      }
2947ec681f3Smrg
2957ec681f3Smrg      for (unsigned j = 0; j < subpass->colorAttachmentCount; j++) {
2967ec681f3Smrg         uint32_t a = subpass->pColorAttachments[j].attachment;
2977ec681f3Smrg         if (a == VK_ATTACHMENT_UNUSED)
2987ec681f3Smrg            continue;
2997ec681f3Smrg         if (att[a].finalLayout != subpass->pColorAttachments[j].layout &&
3007ec681f3Smrg             !att_used[a] && !has_external_dst[i])
3017ec681f3Smrg            dst_implicit_dep = true;
3027ec681f3Smrg         att_used[a] = true;
3037ec681f3Smrg      }
3047ec681f3Smrg
3057ec681f3Smrg      if (subpass->pDepthStencilAttachment &&
3067ec681f3Smrg          subpass->pDepthStencilAttachment->attachment != VK_ATTACHMENT_UNUSED) {
3077ec681f3Smrg         uint32_t a = subpass->pDepthStencilAttachment->attachment;
3087ec681f3Smrg         if (att[a].finalLayout != subpass->pDepthStencilAttachment->layout &&
3097ec681f3Smrg             !att_used[a] && !has_external_dst[i])
3107ec681f3Smrg            dst_implicit_dep = true;
3117ec681f3Smrg         att_used[a] = true;
3127ec681f3Smrg      }
3137ec681f3Smrg
3147ec681f3Smrg      if (subpass->pResolveAttachments) {
3157ec681f3Smrg         for (unsigned j = 0; j < subpass->colorAttachmentCount; j++) {
3167ec681f3Smrg            uint32_t a = subpass->pResolveAttachments[j].attachment;
3177ec681f3Smrg            if (a == VK_ATTACHMENT_UNUSED)
3187ec681f3Smrg               continue;
3197ec681f3Smrg            if (att[a].finalLayout != subpass->pResolveAttachments[j].layout &&
3207ec681f3Smrg                !att_used[a] && !has_external_dst[i])
3217ec681f3Smrg               dst_implicit_dep = true;
3227ec681f3Smrg            att_used[a] = true;
3237ec681f3Smrg         }
3247ec681f3Smrg      }
3257ec681f3Smrg
3267ec681f3Smrg      const VkSubpassDescriptionDepthStencilResolve *ds_resolve =
3277ec681f3Smrg         vk_find_struct_const(subpass->pNext, SUBPASS_DESCRIPTION_DEPTH_STENCIL_RESOLVE_KHR);
3287ec681f3Smrg
3297ec681f3Smrg      if (ds_resolve && ds_resolve->pDepthStencilResolveAttachment &&
3307ec681f3Smrg          ds_resolve->pDepthStencilResolveAttachment->attachment != VK_ATTACHMENT_UNUSED) {
3317ec681f3Smrg            uint32_t a = ds_resolve->pDepthStencilResolveAttachment->attachment;
3327ec681f3Smrg            if (att[a].finalLayout != subpass->pDepthStencilAttachment->layout &&
3337ec681f3Smrg               !att_used[a] && !has_external_dst[i])
3347ec681f3Smrg               dst_implicit_dep = true;
3357ec681f3Smrg            att_used[a] = true;
3367ec681f3Smrg      }
3377ec681f3Smrg
3387ec681f3Smrg      if (dst_implicit_dep) {
3397ec681f3Smrg         tu_render_pass_add_subpass_dep(pass, &(VkSubpassDependency2KHR) {
3407ec681f3Smrg            .srcSubpass = i,
3417ec681f3Smrg            .dstSubpass = VK_SUBPASS_EXTERNAL,
3427ec681f3Smrg            .srcStageMask = VK_PIPELINE_STAGE_ALL_COMMANDS_BIT,
3437ec681f3Smrg            .dstStageMask = VK_PIPELINE_STAGE_BOTTOM_OF_PIPE_BIT,
3447ec681f3Smrg            .srcAccessMask = VK_ACCESS_INPUT_ATTACHMENT_READ_BIT |
3457ec681f3Smrg                             VK_ACCESS_COLOR_ATTACHMENT_READ_BIT |
3467ec681f3Smrg                             VK_ACCESS_COLOR_ATTACHMENT_WRITE_BIT |
3477ec681f3Smrg                             VK_ACCESS_DEPTH_STENCIL_ATTACHMENT_READ_BIT |
3487ec681f3Smrg                             VK_ACCESS_DEPTH_STENCIL_ATTACHMENT_WRITE_BIT,
3497ec681f3Smrg            .dstAccessMask = 0,
3507ec681f3Smrg            .dependencyFlags = 0,
3517ec681f3Smrg         });
3527ec681f3Smrg      }
353361fc4cbSmaya   }
354361fc4cbSmaya
3557ec681f3Smrg   /* Handle UNDEFINED transitions, similar to the handling in tu_barrier().
3567ec681f3Smrg    * Assume that if an attachment has an initial layout of UNDEFINED, it gets
3577ec681f3Smrg    * transitioned eventually.
3587ec681f3Smrg    */
3597ec681f3Smrg   for (unsigned i = 0; i < info->attachmentCount; i++) {
3607ec681f3Smrg      if (layout_undefined(att[i].initialLayout)) {
3617ec681f3Smrg         if (vk_format_is_depth_or_stencil(att[i].format)) {
3627ec681f3Smrg            pass->subpasses[0].start_barrier.incoherent_ccu_depth = true;
3637ec681f3Smrg         } else {
3647ec681f3Smrg            pass->subpasses[0].start_barrier.incoherent_ccu_color = true;
3657ec681f3Smrg         }
366361fc4cbSmaya      }
3677ec681f3Smrg   }
3687ec681f3Smrg}
369361fc4cbSmaya
3707ec681f3Smrg/* If an input attachment is used without an intervening write to the same
3717ec681f3Smrg * attachment, then we can just use the original image, even in GMEM mode.
3727ec681f3Smrg * This is an optimization, but it's also important because it allows us to
3737ec681f3Smrg * avoid having to invalidate UCHE at the beginning of each tile due to it
3747ec681f3Smrg * becoming invalid. The only reads of GMEM via UCHE should be after an
3757ec681f3Smrg * earlier subpass modified it, which only works if there's already an
3767ec681f3Smrg * appropriate dependency that will add the CACHE_INVALIDATE anyway. We
3777ec681f3Smrg * don't consider this in the dependency code, so this is also required for
3787ec681f3Smrg * correctness.
3797ec681f3Smrg */
3807ec681f3Smrgstatic void
3817ec681f3Smrgtu_render_pass_patch_input_gmem(struct tu_render_pass *pass)
3827ec681f3Smrg{
3837ec681f3Smrg   bool written[pass->attachment_count];
384361fc4cbSmaya
3857ec681f3Smrg   memset(written, 0, sizeof(written));
386361fc4cbSmaya
3877ec681f3Smrg   for (unsigned i = 0; i < pass->subpass_count; i++) {
3887ec681f3Smrg      struct tu_subpass *subpass = &pass->subpasses[i];
389361fc4cbSmaya
3907ec681f3Smrg      for (unsigned j = 0; j < subpass->input_count; j++) {
3917ec681f3Smrg         uint32_t a = subpass->input_attachments[j].attachment;
3927ec681f3Smrg         if (a == VK_ATTACHMENT_UNUSED)
3937ec681f3Smrg            continue;
3947ec681f3Smrg         subpass->input_attachments[j].patch_input_gmem = written[a];
3957ec681f3Smrg      }
3967ec681f3Smrg
3977ec681f3Smrg      for (unsigned j = 0; j < subpass->color_count; j++) {
3987ec681f3Smrg         uint32_t a = subpass->color_attachments[j].attachment;
3997ec681f3Smrg         if (a == VK_ATTACHMENT_UNUSED)
4007ec681f3Smrg            continue;
4017ec681f3Smrg         written[a] = true;
4027ec681f3Smrg
4037ec681f3Smrg         for (unsigned k = 0; k < subpass->input_count; k++) {
4047ec681f3Smrg            if (subpass->input_attachments[k].attachment == a &&
4057ec681f3Smrg                !subpass->input_attachments[k].patch_input_gmem) {
4067ec681f3Smrg               /* For render feedback loops, we have no idea whether the use
4077ec681f3Smrg                * as a color attachment or input attachment will come first,
4087ec681f3Smrg                * so we have to always use GMEM in case the color attachment
4097ec681f3Smrg                * comes first and defensively invalidate UCHE in case the
4107ec681f3Smrg                * input attachment comes first.
4117ec681f3Smrg                */
4127ec681f3Smrg               subpass->feedback_invalidate = true;
4137ec681f3Smrg               subpass->input_attachments[k].patch_input_gmem = true;
4147ec681f3Smrg            }
415361fc4cbSmaya         }
416361fc4cbSmaya      }
417361fc4cbSmaya
4187ec681f3Smrg      for (unsigned j = 0; j < subpass->resolve_count; j++) {
4197ec681f3Smrg         uint32_t a = subpass->resolve_attachments[j].attachment;
4207ec681f3Smrg         if (a == VK_ATTACHMENT_UNUSED)
4217ec681f3Smrg            continue;
4227ec681f3Smrg         written[a] = true;
4237ec681f3Smrg      }
424361fc4cbSmaya
4257ec681f3Smrg      if (subpass->depth_stencil_attachment.attachment != VK_ATTACHMENT_UNUSED) {
4267ec681f3Smrg         written[subpass->depth_stencil_attachment.attachment] = true;
4277ec681f3Smrg         for (unsigned k = 0; k < subpass->input_count; k++) {
4287ec681f3Smrg            if (subpass->input_attachments[k].attachment ==
4297ec681f3Smrg                subpass->depth_stencil_attachment.attachment &&
4307ec681f3Smrg                !subpass->input_attachments[k].patch_input_gmem) {
4317ec681f3Smrg               subpass->feedback_invalidate = true;
4327ec681f3Smrg               subpass->input_attachments[k].patch_input_gmem = true;
433361fc4cbSmaya            }
434361fc4cbSmaya         }
435361fc4cbSmaya      }
4367ec681f3Smrg   }
4377ec681f3Smrg}
438361fc4cbSmaya
4397ec681f3Smrgstatic void
4407ec681f3Smrgtu_render_pass_check_feedback_loop(struct tu_render_pass *pass)
4417ec681f3Smrg{
4427ec681f3Smrg   for (unsigned i = 0; i < pass->subpass_count; i++) {
4437ec681f3Smrg      struct tu_subpass *subpass = &pass->subpasses[i];
444361fc4cbSmaya
4457ec681f3Smrg      for (unsigned j = 0; j < subpass->color_count; j++) {
4467ec681f3Smrg         uint32_t a = subpass->color_attachments[j].attachment;
4477ec681f3Smrg         if (a == VK_ATTACHMENT_UNUSED)
4487ec681f3Smrg            continue;
4497ec681f3Smrg         for (unsigned k = 0; k < subpass->input_count; k++) {
4507ec681f3Smrg            if (subpass->input_attachments[k].attachment == a) {
4517ec681f3Smrg               subpass->feedback = true;
4527ec681f3Smrg               break;
453361fc4cbSmaya            }
454361fc4cbSmaya         }
455361fc4cbSmaya      }
456361fc4cbSmaya
4577ec681f3Smrg      if (subpass->depth_stencil_attachment.attachment != VK_ATTACHMENT_UNUSED) {
4587ec681f3Smrg         for (unsigned k = 0; k < subpass->input_count; k++) {
4597ec681f3Smrg            if (subpass->input_attachments[k].attachment ==
4607ec681f3Smrg                subpass->depth_stencil_attachment.attachment) {
4617ec681f3Smrg               subpass->feedback = true;
4627ec681f3Smrg               break;
4637ec681f3Smrg            }
464361fc4cbSmaya         }
465361fc4cbSmaya      }
4667ec681f3Smrg   }
4677ec681f3Smrg}
4687ec681f3Smrg
4697ec681f3Smrgstatic void update_samples(struct tu_subpass *subpass,
4707ec681f3Smrg                           VkSampleCountFlagBits samples)
4717ec681f3Smrg{
4727ec681f3Smrg   assert(subpass->samples == 0 || subpass->samples == samples);
4737ec681f3Smrg   subpass->samples = samples;
4747ec681f3Smrg}
475361fc4cbSmaya
4767ec681f3Smrgstatic void
4777ec681f3Smrgtu_render_pass_gmem_config(struct tu_render_pass *pass,
4787ec681f3Smrg                           const struct tu_physical_device *phys_dev)
4797ec681f3Smrg{
4807ec681f3Smrg   uint32_t block_align_shift = 3; /* log2(gmem_align/(tile_align_w*tile_align_h)) */
4817ec681f3Smrg   uint32_t tile_align_w = phys_dev->info->tile_align_w;
4827ec681f3Smrg   uint32_t gmem_align = (1 << block_align_shift) * tile_align_w * phys_dev->info->tile_align_h;
4837ec681f3Smrg
4847ec681f3Smrg   /* calculate total bytes per pixel */
4857ec681f3Smrg   uint32_t cpp_total = 0;
4867ec681f3Smrg   for (uint32_t i = 0; i < pass->attachment_count; i++) {
4877ec681f3Smrg      struct tu_render_pass_attachment *att = &pass->attachments[i];
4887ec681f3Smrg      bool cpp1 = (att->cpp == 1);
4897ec681f3Smrg      if (att->gmem_offset >= 0) {
4907ec681f3Smrg         cpp_total += att->cpp;
4917ec681f3Smrg
4927ec681f3Smrg         /* take into account the separate stencil: */
4937ec681f3Smrg         if (att->format == VK_FORMAT_D32_SFLOAT_S8_UINT) {
4947ec681f3Smrg            cpp1 = (att->samples == 1);
4957ec681f3Smrg            cpp_total += att->samples;
4967ec681f3Smrg         }
4977ec681f3Smrg
4987ec681f3Smrg         /* texture pitch must be aligned to 64, use a tile_align_w that is
4997ec681f3Smrg          * a multiple of 64 for cpp==1 attachment to work as input attachment
5007ec681f3Smrg          */
5017ec681f3Smrg         if (cpp1 && tile_align_w % 64 != 0) {
5027ec681f3Smrg            tile_align_w *= 2;
5037ec681f3Smrg            block_align_shift -= 1;
5047ec681f3Smrg         }
5057ec681f3Smrg      }
506361fc4cbSmaya   }
507361fc4cbSmaya
5087ec681f3Smrg   pass->tile_align_w = tile_align_w;
5097ec681f3Smrg
5107ec681f3Smrg   /* no gmem attachments */
5117ec681f3Smrg   if (cpp_total == 0) {
5127ec681f3Smrg      /* any value non-zero value so tiling config works with no attachments */
5137ec681f3Smrg      pass->gmem_pixels = 1024*1024;
5147ec681f3Smrg      return;
5157ec681f3Smrg   }
5167ec681f3Smrg
5177ec681f3Smrg   /* TODO: using ccu_offset_gmem so that BLIT_OP_SCALE resolve path
5187ec681f3Smrg    * doesn't break things. maybe there is a better solution?
5197ec681f3Smrg    * TODO: this algorithm isn't optimal
5207ec681f3Smrg    * for example, two attachments with cpp = {1, 4}
5217ec681f3Smrg    * result:  nblocks = {12, 52}, pixels = 196608
5227ec681f3Smrg    * optimal: nblocks = {13, 51}, pixels = 208896
5237ec681f3Smrg    */
5247ec681f3Smrg   uint32_t gmem_blocks = phys_dev->ccu_offset_gmem / gmem_align;
5257ec681f3Smrg   uint32_t offset = 0, pixels = ~0u, i;
5267ec681f3Smrg   for (i = 0; i < pass->attachment_count; i++) {
5277ec681f3Smrg      struct tu_render_pass_attachment *att = &pass->attachments[i];
5287ec681f3Smrg      if (att->gmem_offset < 0)
5297ec681f3Smrg         continue;
5307ec681f3Smrg
5317ec681f3Smrg      att->gmem_offset = offset;
5327ec681f3Smrg
5337ec681f3Smrg      uint32_t align = MAX2(1, att->cpp >> block_align_shift);
5347ec681f3Smrg      uint32_t nblocks = MAX2((gmem_blocks * att->cpp / cpp_total) & ~(align - 1), align);
5357ec681f3Smrg
5367ec681f3Smrg      if (nblocks > gmem_blocks)
5377ec681f3Smrg         break;
5387ec681f3Smrg
5397ec681f3Smrg      gmem_blocks -= nblocks;
5407ec681f3Smrg      cpp_total -= att->cpp;
5417ec681f3Smrg      offset += nblocks * gmem_align;
5427ec681f3Smrg      pixels = MIN2(pixels, nblocks * gmem_align / att->cpp);
5437ec681f3Smrg
5447ec681f3Smrg      /* repeat the same for separate stencil */
5457ec681f3Smrg      if (att->format == VK_FORMAT_D32_SFLOAT_S8_UINT) {
5467ec681f3Smrg         att->gmem_offset_stencil = offset;
5477ec681f3Smrg
5487ec681f3Smrg         /* note: for s8_uint, block align is always 1 */
5497ec681f3Smrg         uint32_t nblocks = gmem_blocks * att->samples / cpp_total;
5507ec681f3Smrg         if (nblocks > gmem_blocks)
5517ec681f3Smrg            break;
5527ec681f3Smrg
5537ec681f3Smrg         gmem_blocks -= nblocks;
5547ec681f3Smrg         cpp_total -= att->samples;
5557ec681f3Smrg         offset += nblocks * gmem_align;
5567ec681f3Smrg         pixels = MIN2(pixels, nblocks * gmem_align / att->samples);
557361fc4cbSmaya      }
558361fc4cbSmaya   }
559361fc4cbSmaya
5607ec681f3Smrg   /* if the loop didn't complete then the gmem config is impossible */
5617ec681f3Smrg   if (i == pass->attachment_count)
5627ec681f3Smrg      pass->gmem_pixels = pixels;
5637ec681f3Smrg}
564361fc4cbSmaya
5657ec681f3Smrgstatic void
5667ec681f3Smrgattachment_set_ops(struct tu_render_pass_attachment *att,
5677ec681f3Smrg                   VkAttachmentLoadOp load_op,
5687ec681f3Smrg                   VkAttachmentLoadOp stencil_load_op,
5697ec681f3Smrg                   VkAttachmentStoreOp store_op,
5707ec681f3Smrg                   VkAttachmentStoreOp stencil_store_op)
5717ec681f3Smrg{
5727ec681f3Smrg   /* load/store ops */
5737ec681f3Smrg   att->clear_mask =
5747ec681f3Smrg      (load_op == VK_ATTACHMENT_LOAD_OP_CLEAR) ? VK_IMAGE_ASPECT_COLOR_BIT : 0;
5757ec681f3Smrg   att->load = (load_op == VK_ATTACHMENT_LOAD_OP_LOAD);
5767ec681f3Smrg   att->store = (store_op == VK_ATTACHMENT_STORE_OP_STORE);
5777ec681f3Smrg
5787ec681f3Smrg   bool stencil_clear = (stencil_load_op == VK_ATTACHMENT_LOAD_OP_CLEAR);
5797ec681f3Smrg   bool stencil_load = (stencil_load_op == VK_ATTACHMENT_LOAD_OP_LOAD);
5807ec681f3Smrg   bool stencil_store = (stencil_store_op == VK_ATTACHMENT_STORE_OP_STORE);
5817ec681f3Smrg
5827ec681f3Smrg   switch (att->format) {
5837ec681f3Smrg   case VK_FORMAT_D24_UNORM_S8_UINT: /* || stencil load/store */
5847ec681f3Smrg      if (att->clear_mask)
5857ec681f3Smrg         att->clear_mask = VK_IMAGE_ASPECT_DEPTH_BIT;
5867ec681f3Smrg      if (stencil_clear)
5877ec681f3Smrg         att->clear_mask |= VK_IMAGE_ASPECT_STENCIL_BIT;
5887ec681f3Smrg      if (stencil_load)
5897ec681f3Smrg         att->load = true;
5907ec681f3Smrg      if (stencil_store)
5917ec681f3Smrg         att->store = true;
5927ec681f3Smrg      break;
5937ec681f3Smrg   case VK_FORMAT_S8_UINT: /* replace load/store with stencil load/store */
5947ec681f3Smrg      att->clear_mask = stencil_clear ? VK_IMAGE_ASPECT_COLOR_BIT : 0;
5957ec681f3Smrg      att->load = stencil_load;
5967ec681f3Smrg      att->store = stencil_store;
5977ec681f3Smrg      break;
5987ec681f3Smrg   case VK_FORMAT_D32_SFLOAT_S8_UINT: /* separate stencil */
5997ec681f3Smrg      if (att->clear_mask)
6007ec681f3Smrg         att->clear_mask = VK_IMAGE_ASPECT_DEPTH_BIT;
6017ec681f3Smrg      if (stencil_clear)
6027ec681f3Smrg         att->clear_mask |= VK_IMAGE_ASPECT_STENCIL_BIT;
6037ec681f3Smrg      if (stencil_load)
6047ec681f3Smrg         att->load_stencil = true;
6057ec681f3Smrg      if (stencil_store)
6067ec681f3Smrg         att->store_stencil = true;
6077ec681f3Smrg      break;
6087ec681f3Smrg   default:
6097ec681f3Smrg      break;
6107ec681f3Smrg   }
611361fc4cbSmaya}
612361fc4cbSmaya
6137ec681f3Smrgstatic bool
6147ec681f3Smrgis_depth_stencil_resolve_enabled(const VkSubpassDescriptionDepthStencilResolve *depth_stencil_resolve)
6157ec681f3Smrg{
6167ec681f3Smrg   if (depth_stencil_resolve &&
6177ec681f3Smrg       depth_stencil_resolve->pDepthStencilResolveAttachment &&
6187ec681f3Smrg       depth_stencil_resolve->pDepthStencilResolveAttachment->attachment != VK_ATTACHMENT_UNUSED) {
6197ec681f3Smrg      return true;
6207ec681f3Smrg   }
6217ec681f3Smrg   return false;
6227ec681f3Smrg}
6237ec681f3Smrg
6247ec681f3SmrgVKAPI_ATTR VkResult VKAPI_CALL
6257ec681f3Smrgtu_CreateRenderPass2(VkDevice _device,
6267ec681f3Smrg                     const VkRenderPassCreateInfo2KHR *pCreateInfo,
6277ec681f3Smrg                     const VkAllocationCallbacks *pAllocator,
6287ec681f3Smrg                     VkRenderPass *pRenderPass)
629361fc4cbSmaya{
630361fc4cbSmaya   TU_FROM_HANDLE(tu_device, device, _device);
631361fc4cbSmaya   struct tu_render_pass *pass;
632361fc4cbSmaya   size_t size;
633361fc4cbSmaya   size_t attachments_offset;
634361fc4cbSmaya
6357ec681f3Smrg   assert(pCreateInfo->sType == VK_STRUCTURE_TYPE_RENDER_PASS_CREATE_INFO_2_KHR);
636361fc4cbSmaya
637361fc4cbSmaya   size = sizeof(*pass);
638361fc4cbSmaya   size += pCreateInfo->subpassCount * sizeof(pass->subpasses[0]);
639361fc4cbSmaya   attachments_offset = size;
640361fc4cbSmaya   size += pCreateInfo->attachmentCount * sizeof(pass->attachments[0]);
641361fc4cbSmaya
6427ec681f3Smrg   pass = vk_object_zalloc(&device->vk, pAllocator, size,
6437ec681f3Smrg                           VK_OBJECT_TYPE_RENDER_PASS);
644361fc4cbSmaya   if (pass == NULL)
6457ec681f3Smrg      return vk_error(device, VK_ERROR_OUT_OF_HOST_MEMORY);
646361fc4cbSmaya
647361fc4cbSmaya   pass->attachment_count = pCreateInfo->attachmentCount;
648361fc4cbSmaya   pass->subpass_count = pCreateInfo->subpassCount;
649361fc4cbSmaya   pass->attachments = (void *) pass + attachments_offset;
650361fc4cbSmaya
651361fc4cbSmaya   for (uint32_t i = 0; i < pCreateInfo->attachmentCount; i++) {
652361fc4cbSmaya      struct tu_render_pass_attachment *att = &pass->attachments[i];
653361fc4cbSmaya
654361fc4cbSmaya      att->format = pCreateInfo->pAttachments[i].format;
655361fc4cbSmaya      att->samples = pCreateInfo->pAttachments[i].samples;
6567ec681f3Smrg      /* for d32s8, cpp is for the depth image, and
6577ec681f3Smrg       * att->samples will be used as the cpp for the stencil image
6587ec681f3Smrg       */
6597ec681f3Smrg      if (att->format == VK_FORMAT_D32_SFLOAT_S8_UINT)
6607ec681f3Smrg         att->cpp = 4 * att->samples;
6617ec681f3Smrg      else
6627ec681f3Smrg         att->cpp = vk_format_get_blocksize(att->format) * att->samples;
6637ec681f3Smrg      att->gmem_offset = -1;
6647ec681f3Smrg
6657ec681f3Smrg      attachment_set_ops(att,
6667ec681f3Smrg                         pCreateInfo->pAttachments[i].loadOp,
6677ec681f3Smrg                         pCreateInfo->pAttachments[i].stencilLoadOp,
6687ec681f3Smrg                         pCreateInfo->pAttachments[i].storeOp,
6697ec681f3Smrg                         pCreateInfo->pAttachments[i].stencilStoreOp);
670361fc4cbSmaya   }
671361fc4cbSmaya   uint32_t subpass_attachment_count = 0;
672361fc4cbSmaya   struct tu_subpass_attachment *p;
673361fc4cbSmaya   for (uint32_t i = 0; i < pCreateInfo->subpassCount; i++) {
6747ec681f3Smrg      const VkSubpassDescription2 *desc = &pCreateInfo->pSubpasses[i];
6757ec681f3Smrg      const VkSubpassDescriptionDepthStencilResolve *ds_resolve =
6767ec681f3Smrg         vk_find_struct_const(desc->pNext, SUBPASS_DESCRIPTION_DEPTH_STENCIL_RESOLVE_KHR);
677361fc4cbSmaya
678361fc4cbSmaya      subpass_attachment_count +=
679361fc4cbSmaya         desc->inputAttachmentCount + desc->colorAttachmentCount +
680361fc4cbSmaya         (desc->pResolveAttachments ? desc->colorAttachmentCount : 0) +
6817ec681f3Smrg         (is_depth_stencil_resolve_enabled(ds_resolve) ? 1 : 0);
682361fc4cbSmaya   }
683361fc4cbSmaya
684361fc4cbSmaya   if (subpass_attachment_count) {
685361fc4cbSmaya      pass->subpass_attachments = vk_alloc2(
6867ec681f3Smrg         &device->vk.alloc, pAllocator,
687361fc4cbSmaya         subpass_attachment_count * sizeof(struct tu_subpass_attachment), 8,
688361fc4cbSmaya         VK_SYSTEM_ALLOCATION_SCOPE_OBJECT);
689361fc4cbSmaya      if (pass->subpass_attachments == NULL) {
6907ec681f3Smrg         vk_object_free(&device->vk, pAllocator, pass);
6917ec681f3Smrg         return vk_error(device, VK_ERROR_OUT_OF_HOST_MEMORY);
692361fc4cbSmaya      }
693361fc4cbSmaya   } else
694361fc4cbSmaya      pass->subpass_attachments = NULL;
695361fc4cbSmaya
696361fc4cbSmaya   p = pass->subpass_attachments;
697361fc4cbSmaya   for (uint32_t i = 0; i < pCreateInfo->subpassCount; i++) {
6987ec681f3Smrg      const VkSubpassDescription2 *desc = &pCreateInfo->pSubpasses[i];
6997ec681f3Smrg      const VkSubpassDescriptionDepthStencilResolve *ds_resolve =
7007ec681f3Smrg         vk_find_struct_const(desc->pNext, SUBPASS_DESCRIPTION_DEPTH_STENCIL_RESOLVE_KHR);
701361fc4cbSmaya      struct tu_subpass *subpass = &pass->subpasses[i];
702361fc4cbSmaya
703361fc4cbSmaya      subpass->input_count = desc->inputAttachmentCount;
704361fc4cbSmaya      subpass->color_count = desc->colorAttachmentCount;
7057ec681f3Smrg      subpass->resolve_count = 0;
7067ec681f3Smrg      subpass->resolve_depth_stencil = is_depth_stencil_resolve_enabled(ds_resolve);
7077ec681f3Smrg      subpass->samples = 0;
7087ec681f3Smrg      subpass->srgb_cntl = 0;
7097ec681f3Smrg
7107ec681f3Smrg      subpass->multiview_mask = desc->viewMask;
711361fc4cbSmaya
712361fc4cbSmaya      if (desc->inputAttachmentCount > 0) {
713361fc4cbSmaya         subpass->input_attachments = p;
714361fc4cbSmaya         p += desc->inputAttachmentCount;
715361fc4cbSmaya
716361fc4cbSmaya         for (uint32_t j = 0; j < desc->inputAttachmentCount; j++) {
7177ec681f3Smrg            uint32_t a = desc->pInputAttachments[j].attachment;
7187ec681f3Smrg            subpass->input_attachments[j].attachment = a;
7197ec681f3Smrg            /* Note: attachments only used as input attachments will be read
7207ec681f3Smrg             * directly instead of through gmem, so we don't mark input
7217ec681f3Smrg             * attachments as needing gmem.
7227ec681f3Smrg             */
723361fc4cbSmaya         }
724361fc4cbSmaya      }
725361fc4cbSmaya
726361fc4cbSmaya      if (desc->colorAttachmentCount > 0) {
727361fc4cbSmaya         subpass->color_attachments = p;
728361fc4cbSmaya         p += desc->colorAttachmentCount;
729361fc4cbSmaya
730361fc4cbSmaya         for (uint32_t j = 0; j < desc->colorAttachmentCount; j++) {
7317ec681f3Smrg            uint32_t a = desc->pColorAttachments[j].attachment;
7327ec681f3Smrg            subpass->color_attachments[j].attachment = a;
7337ec681f3Smrg
7347ec681f3Smrg            if (a != VK_ATTACHMENT_UNUSED) {
7357ec681f3Smrg               pass->attachments[a].gmem_offset = 0;
7367ec681f3Smrg               update_samples(subpass, pCreateInfo->pAttachments[a].samples);
7377ec681f3Smrg
7387ec681f3Smrg               if (vk_format_is_srgb(pass->attachments[a].format))
7397ec681f3Smrg                  subpass->srgb_cntl |= 1 << j;
7407ec681f3Smrg
7417ec681f3Smrg               pass->attachments[a].clear_views |= subpass->multiview_mask;
742361fc4cbSmaya            }
743361fc4cbSmaya         }
744361fc4cbSmaya      }
745361fc4cbSmaya
7467ec681f3Smrg      subpass->resolve_attachments = (desc->pResolveAttachments || subpass->resolve_depth_stencil) ? p : NULL;
747361fc4cbSmaya      if (desc->pResolveAttachments) {
748361fc4cbSmaya         p += desc->colorAttachmentCount;
7497ec681f3Smrg         subpass->resolve_count += desc->colorAttachmentCount;
750361fc4cbSmaya         for (uint32_t j = 0; j < desc->colorAttachmentCount; j++) {
7517ec681f3Smrg            subpass->resolve_attachments[j].attachment =
7527ec681f3Smrg                  desc->pResolveAttachments[j].attachment;
753361fc4cbSmaya         }
754361fc4cbSmaya      }
755361fc4cbSmaya
7567ec681f3Smrg      if (subpass->resolve_depth_stencil) {
7577ec681f3Smrg         p++;
7587ec681f3Smrg         subpass->resolve_count++;
7597ec681f3Smrg         uint32_t a = ds_resolve->pDepthStencilResolveAttachment->attachment;
7607ec681f3Smrg         subpass->resolve_attachments[subpass->resolve_count - 1].attachment = a;
761361fc4cbSmaya      }
762361fc4cbSmaya
7637ec681f3Smrg      uint32_t a = desc->pDepthStencilAttachment ?
7647ec681f3Smrg         desc->pDepthStencilAttachment->attachment : VK_ATTACHMENT_UNUSED;
7657ec681f3Smrg      subpass->depth_stencil_attachment.attachment = a;
7667ec681f3Smrg      if (a != VK_ATTACHMENT_UNUSED) {
7677ec681f3Smrg            pass->attachments[a].gmem_offset = 0;
7687ec681f3Smrg            update_samples(subpass, pCreateInfo->pAttachments[a].samples);
7697ec681f3Smrg
7707ec681f3Smrg            pass->attachments[a].clear_views |= subpass->multiview_mask;
7717ec681f3Smrg      }
772361fc4cbSmaya   }
773361fc4cbSmaya
7747ec681f3Smrg   tu_render_pass_patch_input_gmem(pass);
7757ec681f3Smrg
7767ec681f3Smrg   tu_render_pass_check_feedback_loop(pass);
7777ec681f3Smrg
7787ec681f3Smrg   /* disable unused attachments */
7797ec681f3Smrg   for (uint32_t i = 0; i < pass->attachment_count; i++) {
7807ec681f3Smrg      struct tu_render_pass_attachment *att = &pass->attachments[i];
7817ec681f3Smrg      if (att->gmem_offset < 0) {
7827ec681f3Smrg         att->clear_mask = 0;
7837ec681f3Smrg         att->load = false;
784361fc4cbSmaya      }
785361fc4cbSmaya   }
786361fc4cbSmaya
7877ec681f3Smrg   /* From the VK_KHR_multiview spec:
7887ec681f3Smrg    *
7897ec681f3Smrg    *    Multiview is all-or-nothing for a render pass - that is, either all
7907ec681f3Smrg    *    subpasses must have a non-zero view mask (though some subpasses may
7917ec681f3Smrg    *    have only one view) or all must be zero.
7927ec681f3Smrg    *
7937ec681f3Smrg    * This means we only have to check one of the view masks.
7947ec681f3Smrg    */
7957ec681f3Smrg   if (pCreateInfo->pSubpasses[0].viewMask) {
7967ec681f3Smrg      /* It seems multiview must use sysmem rendering. */
7977ec681f3Smrg      pass->gmem_pixels = 0;
7987ec681f3Smrg   } else {
7997ec681f3Smrg      tu_render_pass_gmem_config(pass, device->physical_device);
8007ec681f3Smrg   }
8017ec681f3Smrg
8027ec681f3Smrg   for (unsigned i = 0; i < pCreateInfo->dependencyCount; ++i) {
8037ec681f3Smrg      tu_render_pass_add_subpass_dep(pass, &pCreateInfo->pDependencies[i]);
8047ec681f3Smrg   }
8057ec681f3Smrg
8067ec681f3Smrg   tu_render_pass_add_implicit_deps(pass, pCreateInfo);
8077ec681f3Smrg
808361fc4cbSmaya   *pRenderPass = tu_render_pass_to_handle(pass);
809361fc4cbSmaya
810361fc4cbSmaya   return VK_SUCCESS;
811361fc4cbSmaya}
812361fc4cbSmaya
8137ec681f3SmrgVKAPI_ATTR void VKAPI_CALL
814361fc4cbSmayatu_DestroyRenderPass(VkDevice _device,
815361fc4cbSmaya                     VkRenderPass _pass,
816361fc4cbSmaya                     const VkAllocationCallbacks *pAllocator)
817361fc4cbSmaya{
818361fc4cbSmaya   TU_FROM_HANDLE(tu_device, device, _device);
819361fc4cbSmaya   TU_FROM_HANDLE(tu_render_pass, pass, _pass);
820361fc4cbSmaya
821361fc4cbSmaya   if (!_pass)
822361fc4cbSmaya      return;
8237ec681f3Smrg
8247ec681f3Smrg   vk_free2(&device->vk.alloc, pAllocator, pass->subpass_attachments);
8257ec681f3Smrg   vk_object_free(&device->vk, pAllocator, pass);
826361fc4cbSmaya}
827361fc4cbSmaya
8287ec681f3SmrgVKAPI_ATTR void VKAPI_CALL
829361fc4cbSmayatu_GetRenderAreaGranularity(VkDevice _device,
830361fc4cbSmaya                            VkRenderPass renderPass,
831361fc4cbSmaya                            VkExtent2D *pGranularity)
832361fc4cbSmaya{
833361fc4cbSmaya   TU_FROM_HANDLE(tu_device, device, _device);
8347ec681f3Smrg   pGranularity->width = device->physical_device->info->gmem_align_w;
8357ec681f3Smrg   pGranularity->height = device->physical_device->info->gmem_align_h;
8367ec681f3Smrg}
8377ec681f3Smrg
8387ec681f3Smrguint32_t
8397ec681f3Smrgtu_subpass_get_attachment_to_resolve(const struct tu_subpass *subpass, uint32_t index)
8407ec681f3Smrg{
8417ec681f3Smrg   if (subpass->resolve_depth_stencil &&
8427ec681f3Smrg       index == (subpass->resolve_count - 1))
8437ec681f3Smrg      return subpass->depth_stencil_attachment.attachment;
844361fc4cbSmaya
8457ec681f3Smrg   return subpass->color_attachments[index].attachment;
846361fc4cbSmaya}
847