19f464c52Smaya/*
29f464c52Smaya * Copyright © 2017 Intel Corporation
39f464c52Smaya *
49f464c52Smaya * Permission is hereby granted, free of charge, to any person obtaining a
59f464c52Smaya * copy of this software and associated documentation files (the "Software"),
69f464c52Smaya * to deal in the Software without restriction, including without limitation
79f464c52Smaya * the rights to use, copy, modify, merge, publish, distribute, sublicense,
89f464c52Smaya * and/or sell copies of the Software, and to permit persons to whom the
99f464c52Smaya * Software is furnished to do so, subject to the following conditions:
109f464c52Smaya *
119f464c52Smaya * The above copyright notice and this permission notice shall be included
129f464c52Smaya * in all copies or substantial portions of the Software.
139f464c52Smaya *
149f464c52Smaya * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS
159f464c52Smaya * OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
169f464c52Smaya * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT.  IN NO EVENT SHALL
179f464c52Smaya * THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
189f464c52Smaya * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING
199f464c52Smaya * FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER
209f464c52Smaya * DEALINGS IN THE SOFTWARE.
219f464c52Smaya */
229f464c52Smaya
239f464c52Smaya/**
249f464c52Smaya * @file iris_resolve.c
259f464c52Smaya *
269f464c52Smaya * This file handles resolve tracking for main and auxiliary surfaces.
279f464c52Smaya *
289f464c52Smaya * It also handles our cache tracking.  We have sets for the render cache,
299f464c52Smaya * depth cache, and so on.  If a BO is in a cache's set, then it may have
309f464c52Smaya * data in that cache.  The helpers take care of emitting flushes for
319f464c52Smaya * render-to-texture, format reinterpretation issues, and other situations.
329f464c52Smaya */
339f464c52Smaya
349f464c52Smaya#include "util/hash_table.h"
359f464c52Smaya#include "util/set.h"
369f464c52Smaya#include "iris_context.h"
377ec681f3Smrg#include "compiler/nir/nir.h"
389f464c52Smaya
399f464c52Smaya/**
409f464c52Smaya * Disable auxiliary buffers if a renderbuffer is also bound as a texture
419f464c52Smaya * or shader image.  This causes a self-dependency, where both rendering
429f464c52Smaya * and sampling may concurrently read or write the CCS buffer, causing
439f464c52Smaya * incorrect pixels.
449f464c52Smaya */
459f464c52Smayastatic bool
469f464c52Smayadisable_rb_aux_buffer(struct iris_context *ice,
479f464c52Smaya                      bool *draw_aux_buffer_disabled,
489f464c52Smaya                      struct iris_resource *tex_res,
499f464c52Smaya                      unsigned min_level, unsigned num_levels,
509f464c52Smaya                      const char *usage)
519f464c52Smaya{
529f464c52Smaya   struct pipe_framebuffer_state *cso_fb = &ice->state.framebuffer;
539f464c52Smaya   bool found = false;
549f464c52Smaya
559f464c52Smaya   /* We only need to worry about color compression and fast clears. */
569f464c52Smaya   if (tex_res->aux.usage != ISL_AUX_USAGE_CCS_D &&
577ec681f3Smrg       tex_res->aux.usage != ISL_AUX_USAGE_CCS_E &&
587ec681f3Smrg       tex_res->aux.usage != ISL_AUX_USAGE_GFX12_CCS_E)
599f464c52Smaya      return false;
609f464c52Smaya
619f464c52Smaya   for (unsigned i = 0; i < cso_fb->nr_cbufs; i++) {
629f464c52Smaya      struct iris_surface *surf = (void *) cso_fb->cbufs[i];
639f464c52Smaya      if (!surf)
649f464c52Smaya         continue;
659f464c52Smaya
669f464c52Smaya      struct iris_resource *rb_res = (void *) surf->base.texture;
679f464c52Smaya
689f464c52Smaya      if (rb_res->bo == tex_res->bo &&
699f464c52Smaya          surf->base.u.tex.level >= min_level &&
709f464c52Smaya          surf->base.u.tex.level < min_level + num_levels) {
719f464c52Smaya         found = draw_aux_buffer_disabled[i] = true;
729f464c52Smaya      }
739f464c52Smaya   }
749f464c52Smaya
759f464c52Smaya   if (found) {
769f464c52Smaya      perf_debug(&ice->dbg,
779f464c52Smaya                 "Disabling CCS because a renderbuffer is also bound %s.\n",
789f464c52Smaya                 usage);
799f464c52Smaya   }
809f464c52Smaya
819f464c52Smaya   return found;
829f464c52Smaya}
839f464c52Smaya
849f464c52Smayastatic void
859f464c52Smayaresolve_sampler_views(struct iris_context *ice,
869f464c52Smaya                      struct iris_batch *batch,
879f464c52Smaya                      struct iris_shader_state *shs,
889f464c52Smaya                      const struct shader_info *info,
899f464c52Smaya                      bool *draw_aux_buffer_disabled,
909f464c52Smaya                      bool consider_framebuffer)
919f464c52Smaya{
927ec681f3Smrg   uint32_t views = info ? (shs->bound_sampler_views & info->textures_used[0]) : 0;
939f464c52Smaya
949f464c52Smaya   while (views) {
959f464c52Smaya      const int i = u_bit_scan(&views);
969f464c52Smaya      struct iris_sampler_view *isv = shs->textures[i];
979f464c52Smaya
987ec681f3Smrg      if (isv->res->base.b.target != PIPE_BUFFER) {
999f464c52Smaya         if (consider_framebuffer) {
1007ec681f3Smrg            disable_rb_aux_buffer(ice, draw_aux_buffer_disabled, isv->res,
1017ec681f3Smrg                                  isv->view.base_level, isv->view.levels,
1029f464c52Smaya                                  "for sampling");
1039f464c52Smaya         }
1049f464c52Smaya
1057ec681f3Smrg         iris_resource_prepare_texture(ice, isv->res, isv->view.format,
1069f464c52Smaya                                       isv->view.base_level, isv->view.levels,
1079f464c52Smaya                                       isv->view.base_array_layer,
1087ec681f3Smrg                                       isv->view.array_len);
1099f464c52Smaya      }
1109f464c52Smaya
1117ec681f3Smrg      iris_emit_buffer_barrier_for(batch, isv->res->bo,
1127ec681f3Smrg                                   IRIS_DOMAIN_OTHER_READ);
1139f464c52Smaya   }
1149f464c52Smaya}
1159f464c52Smaya
1169f464c52Smayastatic void
1179f464c52Smayaresolve_image_views(struct iris_context *ice,
1189f464c52Smaya                    struct iris_batch *batch,
1199f464c52Smaya                    struct iris_shader_state *shs,
1207ec681f3Smrg                    const struct shader_info *info,
1219f464c52Smaya                    bool *draw_aux_buffer_disabled,
1229f464c52Smaya                    bool consider_framebuffer)
1239f464c52Smaya{
1247ec681f3Smrg   uint32_t views = info ? (shs->bound_image_views & info->images_used) : 0;
1259f464c52Smaya
1269f464c52Smaya   while (views) {
1279f464c52Smaya      const int i = u_bit_scan(&views);
1287ec681f3Smrg      struct pipe_image_view *pview = &shs->image[i].base;
1297ec681f3Smrg      struct iris_resource *res = (void *) pview->resource;
1309f464c52Smaya
1317ec681f3Smrg      if (res->base.b.target != PIPE_BUFFER) {
1329f464c52Smaya         if (consider_framebuffer) {
1339f464c52Smaya            disable_rb_aux_buffer(ice, draw_aux_buffer_disabled,
1347ec681f3Smrg                                  res, pview->u.tex.level, 1,
1357ec681f3Smrg                                  "as a shader image");
1369f464c52Smaya         }
1379f464c52Smaya
1387ec681f3Smrg         unsigned num_layers =
1397ec681f3Smrg            pview->u.tex.last_layer - pview->u.tex.first_layer + 1;
1407ec681f3Smrg
1417ec681f3Smrg         enum isl_aux_usage aux_usage =
1427ec681f3Smrg            iris_image_view_aux_usage(ice, pview, info);
1437ec681f3Smrg
1447ec681f3Smrg         iris_resource_prepare_access(ice, res,
1457ec681f3Smrg                                      pview->u.tex.level, 1,
1467ec681f3Smrg                                      pview->u.tex.first_layer, num_layers,
1477ec681f3Smrg                                      aux_usage, false);
1489f464c52Smaya      }
1499f464c52Smaya
1507ec681f3Smrg      iris_emit_buffer_barrier_for(batch, res->bo, IRIS_DOMAIN_DATA_WRITE);
1519f464c52Smaya   }
1529f464c52Smaya}
1539f464c52Smaya
1549f464c52Smaya/**
1559f464c52Smaya * \brief Resolve buffers before drawing.
1569f464c52Smaya *
1579f464c52Smaya * Resolve the depth buffer's HiZ buffer, resolve the depth buffer of each
1589f464c52Smaya * enabled depth texture, and flush the render cache for any dirty textures.
1599f464c52Smaya */
1609f464c52Smayavoid
1619f464c52Smayairis_predraw_resolve_inputs(struct iris_context *ice,
1629f464c52Smaya                            struct iris_batch *batch,
1639f464c52Smaya                            bool *draw_aux_buffer_disabled,
1649f464c52Smaya                            gl_shader_stage stage,
1659f464c52Smaya                            bool consider_framebuffer)
1669f464c52Smaya{
1679f464c52Smaya   struct iris_shader_state *shs = &ice->state.shaders[stage];
1689f464c52Smaya   const struct shader_info *info = iris_get_shader_info(ice, stage);
1699f464c52Smaya
1707ec681f3Smrg   uint64_t stage_dirty = (IRIS_STAGE_DIRTY_BINDINGS_VS << stage) |
1717ec681f3Smrg      (consider_framebuffer ? IRIS_STAGE_DIRTY_BINDINGS_FS : 0);
1729f464c52Smaya
1737ec681f3Smrg   if (ice->state.stage_dirty & stage_dirty) {
1749f464c52Smaya      resolve_sampler_views(ice, batch, shs, info, draw_aux_buffer_disabled,
1759f464c52Smaya                            consider_framebuffer);
1767ec681f3Smrg      resolve_image_views(ice, batch, shs, info, draw_aux_buffer_disabled,
1779f464c52Smaya                          consider_framebuffer);
1789f464c52Smaya   }
1799f464c52Smaya}
1809f464c52Smaya
1819f464c52Smayavoid
1829f464c52Smayairis_predraw_resolve_framebuffer(struct iris_context *ice,
1839f464c52Smaya                                 struct iris_batch *batch,
1849f464c52Smaya                                 bool *draw_aux_buffer_disabled)
1859f464c52Smaya{
1869f464c52Smaya   struct pipe_framebuffer_state *cso_fb = &ice->state.framebuffer;
1877ec681f3Smrg   struct iris_screen *screen = (void *) ice->ctx.screen;
1887ec681f3Smrg   struct intel_device_info *devinfo = &screen->devinfo;
1897ec681f3Smrg   struct iris_uncompiled_shader *ish =
1907ec681f3Smrg      ice->shaders.uncompiled[MESA_SHADER_FRAGMENT];
1917ec681f3Smrg   const nir_shader *nir = ish->nir;
1929f464c52Smaya
1939f464c52Smaya   if (ice->state.dirty & IRIS_DIRTY_DEPTH_BUFFER) {
1949f464c52Smaya      struct pipe_surface *zs_surf = cso_fb->zsbuf;
1959f464c52Smaya
1969f464c52Smaya      if (zs_surf) {
1979f464c52Smaya         struct iris_resource *z_res, *s_res;
1989f464c52Smaya         iris_get_depth_stencil_resources(zs_surf->texture, &z_res, &s_res);
1999f464c52Smaya         unsigned num_layers =
2009f464c52Smaya            zs_surf->u.tex.last_layer - zs_surf->u.tex.first_layer + 1;
2019f464c52Smaya
2029f464c52Smaya         if (z_res) {
2037ec681f3Smrg            iris_resource_prepare_render(ice, z_res, zs_surf->u.tex.level,
2047ec681f3Smrg                                         zs_surf->u.tex.first_layer,
2057ec681f3Smrg                                         num_layers, ice->state.hiz_usage);
2067ec681f3Smrg            iris_emit_buffer_barrier_for(batch, z_res->bo,
2077ec681f3Smrg                                         IRIS_DOMAIN_DEPTH_WRITE);
2089f464c52Smaya         }
2099f464c52Smaya
2109f464c52Smaya         if (s_res) {
2117ec681f3Smrg            iris_emit_buffer_barrier_for(batch, s_res->bo,
2127ec681f3Smrg                                         IRIS_DOMAIN_DEPTH_WRITE);
2139f464c52Smaya         }
2149f464c52Smaya      }
2159f464c52Smaya   }
2169f464c52Smaya
2177ec681f3Smrg   if (devinfo->ver == 8 && nir->info.outputs_read != 0) {
2187ec681f3Smrg      for (unsigned i = 0; i < cso_fb->nr_cbufs; i++) {
2197ec681f3Smrg         if (cso_fb->cbufs[i]) {
2207ec681f3Smrg            struct iris_surface *surf = (void *) cso_fb->cbufs[i];
2217ec681f3Smrg            struct iris_resource *res = (void *) cso_fb->cbufs[i]->texture;
2227ec681f3Smrg
2237ec681f3Smrg            iris_resource_prepare_texture(ice, res, surf->view.format,
2247ec681f3Smrg                                          surf->view.base_level, 1,
2257ec681f3Smrg                                          surf->view.base_array_layer,
2267ec681f3Smrg                                          surf->view.array_len);
2277ec681f3Smrg         }
2287ec681f3Smrg      }
2297ec681f3Smrg   }
2307ec681f3Smrg
2317ec681f3Smrg   if (ice->state.stage_dirty & IRIS_STAGE_DIRTY_BINDINGS_FS) {
2329f464c52Smaya      for (unsigned i = 0; i < cso_fb->nr_cbufs; i++) {
2339f464c52Smaya         struct iris_surface *surf = (void *) cso_fb->cbufs[i];
2349f464c52Smaya         if (!surf)
2359f464c52Smaya            continue;
2369f464c52Smaya
2379f464c52Smaya         struct iris_resource *res = (void *) surf->base.texture;
2389f464c52Smaya
2399f464c52Smaya         enum isl_aux_usage aux_usage =
2407ec681f3Smrg            iris_resource_render_aux_usage(ice, res, surf->view.base_level,
2417ec681f3Smrg                                           surf->view.format,
2429f464c52Smaya                                           draw_aux_buffer_disabled[i]);
2439f464c52Smaya
2449f464c52Smaya         if (ice->state.draw_aux_usage[i] != aux_usage) {
2459f464c52Smaya            ice->state.draw_aux_usage[i] = aux_usage;
2469f464c52Smaya            /* XXX: Need to track which bindings to make dirty */
2477ec681f3Smrg            ice->state.dirty |= IRIS_DIRTY_RENDER_BUFFER;
2487ec681f3Smrg            ice->state.stage_dirty |= IRIS_ALL_STAGE_DIRTY_BINDINGS;
2499f464c52Smaya         }
2509f464c52Smaya
2517ec681f3Smrg         iris_resource_prepare_render(ice, res, surf->view.base_level,
2529f464c52Smaya                                      surf->view.base_array_layer,
2539f464c52Smaya                                      surf->view.array_len,
2549f464c52Smaya                                      aux_usage);
2559f464c52Smaya
2567ec681f3Smrg         iris_cache_flush_for_render(batch, res->bo, aux_usage);
2579f464c52Smaya      }
2589f464c52Smaya   }
2599f464c52Smaya}
2609f464c52Smaya
2619f464c52Smaya/**
2629f464c52Smaya * \brief Call this after drawing to mark which buffers need resolving
2639f464c52Smaya *
2649f464c52Smaya * If the depth buffer was written to and if it has an accompanying HiZ
2659f464c52Smaya * buffer, then mark that it needs a depth resolve.
2669f464c52Smaya *
2679f464c52Smaya * If the color buffer is a multisample window system buffer, then
2689f464c52Smaya * mark that it needs a downsample.
2699f464c52Smaya *
2709f464c52Smaya * Also mark any render targets which will be textured as needing a render
2719f464c52Smaya * cache flush.
2729f464c52Smaya */
2739f464c52Smayavoid
2749f464c52Smayairis_postdraw_update_resolve_tracking(struct iris_context *ice,
2759f464c52Smaya                                      struct iris_batch *batch)
2769f464c52Smaya{
2779f464c52Smaya   struct pipe_framebuffer_state *cso_fb = &ice->state.framebuffer;
2789f464c52Smaya
2799f464c52Smaya   // XXX: front buffer drawing?
2809f464c52Smaya
2819f464c52Smaya   bool may_have_resolved_depth =
2829f464c52Smaya      ice->state.dirty & (IRIS_DIRTY_DEPTH_BUFFER |
2839f464c52Smaya                          IRIS_DIRTY_WM_DEPTH_STENCIL);
2849f464c52Smaya
2859f464c52Smaya   struct pipe_surface *zs_surf = cso_fb->zsbuf;
2869f464c52Smaya   if (zs_surf) {
2879f464c52Smaya      struct iris_resource *z_res, *s_res;
2889f464c52Smaya      iris_get_depth_stencil_resources(zs_surf->texture, &z_res, &s_res);
2899f464c52Smaya      unsigned num_layers =
2909f464c52Smaya         zs_surf->u.tex.last_layer - zs_surf->u.tex.first_layer + 1;
2919f464c52Smaya
2929f464c52Smaya      if (z_res) {
2937ec681f3Smrg         if (may_have_resolved_depth && ice->state.depth_writes_enabled) {
2947ec681f3Smrg            iris_resource_finish_render(ice, z_res, zs_surf->u.tex.level,
2957ec681f3Smrg                                        zs_surf->u.tex.first_layer,
2967ec681f3Smrg                                        num_layers, ice->state.hiz_usage);
2979f464c52Smaya         }
2989f464c52Smaya      }
2999f464c52Smaya
3009f464c52Smaya      if (s_res) {
3017ec681f3Smrg         if (may_have_resolved_depth && ice->state.stencil_writes_enabled) {
3029f464c52Smaya            iris_resource_finish_write(ice, s_res, zs_surf->u.tex.level,
3039f464c52Smaya                                       zs_surf->u.tex.first_layer, num_layers,
3047ec681f3Smrg                                       s_res->aux.usage);
3059f464c52Smaya         }
3069f464c52Smaya      }
3079f464c52Smaya   }
3089f464c52Smaya
3099f464c52Smaya   bool may_have_resolved_color =
3107ec681f3Smrg      ice->state.stage_dirty & IRIS_STAGE_DIRTY_BINDINGS_FS;
3119f464c52Smaya
3129f464c52Smaya   for (unsigned i = 0; i < cso_fb->nr_cbufs; i++) {
3139f464c52Smaya      struct iris_surface *surf = (void *) cso_fb->cbufs[i];
3149f464c52Smaya      if (!surf)
3159f464c52Smaya         continue;
3169f464c52Smaya
3179f464c52Smaya      struct iris_resource *res = (void *) surf->base.texture;
3189f464c52Smaya      enum isl_aux_usage aux_usage = ice->state.draw_aux_usage[i];
3199f464c52Smaya
3209f464c52Smaya      if (may_have_resolved_color) {
3219f464c52Smaya         union pipe_surface_desc *desc = &surf->base.u;
3229f464c52Smaya         unsigned num_layers =
3239f464c52Smaya            desc->tex.last_layer - desc->tex.first_layer + 1;
3249f464c52Smaya         iris_resource_finish_render(ice, res, desc->tex.level,
3259f464c52Smaya                                     desc->tex.first_layer, num_layers,
3269f464c52Smaya                                     aux_usage);
3279f464c52Smaya      }
3289f464c52Smaya   }
3299f464c52Smaya}
3309f464c52Smaya
3319f464c52Smayavoid
3329f464c52Smayairis_cache_flush_for_render(struct iris_batch *batch,
3339f464c52Smaya                            struct iris_bo *bo,
3349f464c52Smaya                            enum isl_aux_usage aux_usage)
3359f464c52Smaya{
3367ec681f3Smrg   iris_emit_buffer_barrier_for(batch, bo, IRIS_DOMAIN_RENDER_WRITE);
3379f464c52Smaya
3389f464c52Smaya   /* Check to see if this bo has been used by a previous rendering operation
3397ec681f3Smrg    * but with a different aux usage.  If it has, flush the render cache so we
3407ec681f3Smrg    * ensure that it's only in there with one aux usage at a time.
3419f464c52Smaya    *
3429f464c52Smaya    * Even though it's not obvious, this can easily happen in practice.
3439f464c52Smaya    * Suppose a client is blending on a surface with sRGB encode enabled on
3447ec681f3Smrg    * gfx9.  This implies that you get AUX_USAGE_CCS_D at best.  If the client
3459f464c52Smaya    * then disables sRGB decode and continues blending we will flip on
3469f464c52Smaya    * AUX_USAGE_CCS_E without doing any sort of resolve in-between (this is
3479f464c52Smaya    * perfectly valid since CCS_E is a subset of CCS_D).  However, this means
3489f464c52Smaya    * that we have fragments in-flight which are rendering with UNORM+CCS_E
3499f464c52Smaya    * and other fragments in-flight with SRGB+CCS_D on the same surface at the
3509f464c52Smaya    * same time and the pixel scoreboard and color blender are trying to sort
3519f464c52Smaya    * it all out.  This ends badly (i.e. GPU hangs).
3529f464c52Smaya    *
3537ec681f3Smrg    * There are comments in various docs which indicate that the render cache
3547ec681f3Smrg    * isn't 100% resilient to format changes.  However, to date, we have never
3557ec681f3Smrg    * observed GPU hangs or even corruption to be associated with switching the
3567ec681f3Smrg    * format, only the aux usage.  So we let that slide for now.
3579f464c52Smaya    */
3587ec681f3Smrg   void *v_aux_usage = (void *) (uintptr_t) aux_usage;
3599f464c52Smaya   struct hash_entry *entry =
3609f464c52Smaya      _mesa_hash_table_search_pre_hashed(batch->cache.render, bo->hash, bo);
3617ec681f3Smrg   if (!entry) {
3627ec681f3Smrg      _mesa_hash_table_insert_pre_hashed(batch->cache.render, bo->hash, bo,
3637ec681f3Smrg                                         v_aux_usage);
3647ec681f3Smrg   } else if (entry->data != v_aux_usage) {
3657ec681f3Smrg      iris_emit_pipe_control_flush(batch,
3667ec681f3Smrg                                   "cache tracker: aux usage mismatch",
3677ec681f3Smrg                                   PIPE_CONTROL_RENDER_TARGET_FLUSH |
3687ec681f3Smrg                                   PIPE_CONTROL_TILE_CACHE_FLUSH |
3697ec681f3Smrg                                   PIPE_CONTROL_CS_STALL);
3707ec681f3Smrg      entry->data = v_aux_usage;
3717ec681f3Smrg   }
3729f464c52Smaya}
3739f464c52Smaya
3747ec681f3Smrgstatic void
3757ec681f3Smrgflush_ubos(struct iris_batch *batch,
3767ec681f3Smrg            struct iris_shader_state *shs)
3779f464c52Smaya{
3787ec681f3Smrg   uint32_t cbufs = shs->dirty_cbufs & shs->bound_cbufs;
3797ec681f3Smrg
3807ec681f3Smrg   while (cbufs) {
3817ec681f3Smrg      const int i = u_bit_scan(&cbufs);
3827ec681f3Smrg      struct pipe_shader_buffer *cbuf = &shs->constbuf[i];
3837ec681f3Smrg      struct iris_resource *res = (void *)cbuf->buffer;
3847ec681f3Smrg      iris_emit_buffer_barrier_for(batch, res->bo, IRIS_DOMAIN_OTHER_READ);
3859f464c52Smaya   }
3869f464c52Smaya
3877ec681f3Smrg   shs->dirty_cbufs = 0;
3889f464c52Smaya}
3899f464c52Smaya
3907ec681f3Smrgstatic void
3917ec681f3Smrgflush_ssbos(struct iris_batch *batch,
3927ec681f3Smrg            struct iris_shader_state *shs)
3939f464c52Smaya{
3947ec681f3Smrg   uint32_t ssbos = shs->bound_ssbos;
3957ec681f3Smrg
3967ec681f3Smrg   while (ssbos) {
3977ec681f3Smrg      const int i = u_bit_scan(&ssbos);
3987ec681f3Smrg      struct pipe_shader_buffer *ssbo = &shs->ssbo[i];
3997ec681f3Smrg      struct iris_resource *res = (void *)ssbo->buffer;
4007ec681f3Smrg      iris_emit_buffer_barrier_for(batch, res->bo, IRIS_DOMAIN_DATA_WRITE);
4017ec681f3Smrg   }
4029f464c52Smaya}
4039f464c52Smaya
4049f464c52Smayavoid
4057ec681f3Smrgiris_predraw_flush_buffers(struct iris_context *ice,
4067ec681f3Smrg                           struct iris_batch *batch,
4077ec681f3Smrg                           gl_shader_stage stage)
4089f464c52Smaya{
4097ec681f3Smrg   struct iris_shader_state *shs = &ice->state.shaders[stage];
4107ec681f3Smrg
4117ec681f3Smrg   if (ice->state.stage_dirty & (IRIS_STAGE_DIRTY_CONSTANTS_VS << stage))
4127ec681f3Smrg      flush_ubos(batch, shs);
4137ec681f3Smrg
4147ec681f3Smrg   if (ice->state.stage_dirty & (IRIS_STAGE_DIRTY_BINDINGS_VS << stage))
4157ec681f3Smrg      flush_ssbos(batch, shs);
4169f464c52Smaya}
4179f464c52Smaya
4189f464c52Smayastatic void
4199f464c52Smayairis_resolve_color(struct iris_context *ice,
4209f464c52Smaya                   struct iris_batch *batch,
4219f464c52Smaya                   struct iris_resource *res,
4229f464c52Smaya                   unsigned level, unsigned layer,
4239f464c52Smaya                   enum isl_aux_op resolve_op)
4249f464c52Smaya{
4259f464c52Smaya   //DBG("%s to mt %p level %u layer %u\n", __FUNCTION__, mt, level, layer);
4269f464c52Smaya
4279f464c52Smaya   struct blorp_surf surf;
4287ec681f3Smrg   iris_blorp_surf_for_resource(&batch->screen->isl_dev, &surf,
4297ec681f3Smrg                                &res->base.b, res->aux.usage, level, true);
4309f464c52Smaya
4319f464c52Smaya   iris_batch_maybe_flush(batch, 1500);
4329f464c52Smaya
4339f464c52Smaya   /* Ivybridge PRM Vol 2, Part 1, "11.7 MCS Buffer for Render Target(s)":
4349f464c52Smaya    *
4359f464c52Smaya    *    "Any transition from any value in {Clear, Render, Resolve} to a
4369f464c52Smaya    *     different value in {Clear, Render, Resolve} requires end of pipe
4379f464c52Smaya    *     synchronization."
4389f464c52Smaya    *
4399f464c52Smaya    * In other words, fast clear ops are not properly synchronized with
4409f464c52Smaya    * other drawing.  We need to use a PIPE_CONTROL to ensure that the
4419f464c52Smaya    * contents of the previous draw hit the render target before we resolve
4429f464c52Smaya    * and again afterwards to ensure that the resolve is complete before we
4439f464c52Smaya    * do any more regular drawing.
4449f464c52Smaya    */
4457ec681f3Smrg   iris_emit_end_of_pipe_sync(batch, "color resolve: pre-flush",
4467ec681f3Smrg                              PIPE_CONTROL_RENDER_TARGET_FLUSH);
4479f464c52Smaya
4487ec681f3Smrg   iris_batch_sync_region_start(batch);
4499f464c52Smaya   struct blorp_batch blorp_batch;
4509f464c52Smaya   blorp_batch_init(&ice->blorp, &blorp_batch, batch, 0);
4517ec681f3Smrg   blorp_ccs_resolve(&blorp_batch, &surf, level, layer, 1, res->surf.format,
4529f464c52Smaya                     resolve_op);
4539f464c52Smaya   blorp_batch_finish(&blorp_batch);
4549f464c52Smaya
4559f464c52Smaya   /* See comment above */
4567ec681f3Smrg   iris_emit_end_of_pipe_sync(batch, "color resolve: post-flush",
4577ec681f3Smrg                              PIPE_CONTROL_RENDER_TARGET_FLUSH);
4587ec681f3Smrg   iris_batch_sync_region_end(batch);
4599f464c52Smaya}
4609f464c52Smaya
4619f464c52Smayastatic void
4629f464c52Smayairis_mcs_partial_resolve(struct iris_context *ice,
4639f464c52Smaya                         struct iris_batch *batch,
4649f464c52Smaya                         struct iris_resource *res,
4659f464c52Smaya                         uint32_t start_layer,
4669f464c52Smaya                         uint32_t num_layers)
4679f464c52Smaya{
4689f464c52Smaya   //DBG("%s to mt %p layers %u-%u\n", __FUNCTION__, mt,
4699f464c52Smaya       //start_layer, start_layer + num_layers - 1);
4709f464c52Smaya
4717ec681f3Smrg   assert(isl_aux_usage_has_mcs(res->aux.usage));
4727ec681f3Smrg
4737ec681f3Smrg   iris_batch_maybe_flush(batch, 1500);
4749f464c52Smaya
4759f464c52Smaya   struct blorp_surf surf;
4767ec681f3Smrg   iris_blorp_surf_for_resource(&batch->screen->isl_dev, &surf,
4777ec681f3Smrg                                &res->base.b, res->aux.usage, 0, true);
4787ec681f3Smrg   iris_emit_buffer_barrier_for(batch, res->bo, IRIS_DOMAIN_RENDER_WRITE);
4799f464c52Smaya
4809f464c52Smaya   struct blorp_batch blorp_batch;
4817ec681f3Smrg   iris_batch_sync_region_start(batch);
4829f464c52Smaya   blorp_batch_init(&ice->blorp, &blorp_batch, batch, 0);
4837ec681f3Smrg   blorp_mcs_partial_resolve(&blorp_batch, &surf, res->surf.format,
4849f464c52Smaya                             start_layer, num_layers);
4859f464c52Smaya   blorp_batch_finish(&blorp_batch);
4867ec681f3Smrg   iris_batch_sync_region_end(batch);
4879f464c52Smaya}
4889f464c52Smaya
4897ec681f3Smrgbool
4907ec681f3Smrgiris_sample_with_depth_aux(const struct intel_device_info *devinfo,
4917ec681f3Smrg                           const struct iris_resource *res)
4929f464c52Smaya{
4937ec681f3Smrg   switch (res->aux.usage) {
4947ec681f3Smrg   case ISL_AUX_USAGE_HIZ:
4957ec681f3Smrg      if (devinfo->has_sample_with_hiz)
4967ec681f3Smrg         break;
4979f464c52Smaya      return false;
4987ec681f3Smrg   case ISL_AUX_USAGE_HIZ_CCS:
4999f464c52Smaya      return false;
5007ec681f3Smrg   case ISL_AUX_USAGE_HIZ_CCS_WT:
5017ec681f3Smrg      break;
5027ec681f3Smrg   default:
5037ec681f3Smrg      return false;
5047ec681f3Smrg   }
5059f464c52Smaya
5069f464c52Smaya   for (unsigned level = 0; level < res->surf.levels; ++level) {
5079f464c52Smaya      if (!iris_resource_level_has_hiz(res, level))
5089f464c52Smaya         return false;
5099f464c52Smaya   }
5109f464c52Smaya
5117ec681f3Smrg   /* From the BDW PRM (Volume 2d: Command Reference: Structures
5129f464c52Smaya    *                   RENDER_SURFACE_STATE.AuxiliarySurfaceMode):
5139f464c52Smaya    *
5149f464c52Smaya    *  "If this field is set to AUX_HIZ, Number of Multisamples must be
5159f464c52Smaya    *   MULTISAMPLECOUNT_1, and Surface Type cannot be SURFTYPE_3D.
5169f464c52Smaya    *
5179f464c52Smaya    * There is no such blurb for 1D textures, but there is sufficient evidence
5189f464c52Smaya    * that this is broken on SKL+.
5199f464c52Smaya    */
5209f464c52Smaya   return res->surf.samples == 1 && res->surf.dim == ISL_SURF_DIM_2D;
5219f464c52Smaya}
5229f464c52Smaya
5239f464c52Smaya/**
5249f464c52Smaya * Perform a HiZ or depth resolve operation.
5259f464c52Smaya *
5269f464c52Smaya * For an overview of HiZ ops, see the following sections of the Sandy Bridge
5279f464c52Smaya * PRM, Volume 1, Part 2:
5289f464c52Smaya *   - 7.5.3.1 Depth Buffer Clear
5299f464c52Smaya *   - 7.5.3.2 Depth Buffer Resolve
5309f464c52Smaya *   - 7.5.3.3 Hierarchical Depth Buffer Resolve
5319f464c52Smaya */
5329f464c52Smayavoid
5339f464c52Smayairis_hiz_exec(struct iris_context *ice,
5349f464c52Smaya              struct iris_batch *batch,
5359f464c52Smaya              struct iris_resource *res,
5369f464c52Smaya              unsigned int level, unsigned int start_layer,
5379f464c52Smaya              unsigned int num_layers, enum isl_aux_op op,
5389f464c52Smaya              bool update_clear_depth)
5399f464c52Smaya{
5409f464c52Smaya   assert(iris_resource_level_has_hiz(res, level));
5419f464c52Smaya   assert(op != ISL_AUX_OP_NONE);
5429f464c52Smaya   UNUSED const char *name = NULL;
5439f464c52Smaya
5447ec681f3Smrg   iris_batch_maybe_flush(batch, 1500);
5457ec681f3Smrg
5469f464c52Smaya   switch (op) {
5479f464c52Smaya   case ISL_AUX_OP_FULL_RESOLVE:
5489f464c52Smaya      name = "depth resolve";
5499f464c52Smaya      break;
5509f464c52Smaya   case ISL_AUX_OP_AMBIGUATE:
5519f464c52Smaya      name = "hiz ambiguate";
5529f464c52Smaya      break;
5539f464c52Smaya   case ISL_AUX_OP_FAST_CLEAR:
5549f464c52Smaya      name = "depth clear";
5559f464c52Smaya      break;
5569f464c52Smaya   case ISL_AUX_OP_PARTIAL_RESOLVE:
5579f464c52Smaya   case ISL_AUX_OP_NONE:
5589f464c52Smaya      unreachable("Invalid HiZ op");
5599f464c52Smaya   }
5609f464c52Smaya
5619f464c52Smaya   //DBG("%s %s to mt %p level %d layers %d-%d\n",
5629f464c52Smaya       //__func__, name, mt, level, start_layer, start_layer + num_layers - 1);
5639f464c52Smaya
5649f464c52Smaya   /* The following stalls and flushes are only documented to be required
5659f464c52Smaya    * for HiZ clear operations.  However, they also seem to be required for
5669f464c52Smaya    * resolve operations.
5679f464c52Smaya    *
5689f464c52Smaya    * From the Ivybridge PRM, volume 2, "Depth Buffer Clear":
5699f464c52Smaya    *
5709f464c52Smaya    *   "If other rendering operations have preceded this clear, a
5719f464c52Smaya    *    PIPE_CONTROL with depth cache flush enabled, Depth Stall bit
5729f464c52Smaya    *    enabled must be issued before the rectangle primitive used for
5739f464c52Smaya    *    the depth buffer clear operation."
5749f464c52Smaya    *
5757ec681f3Smrg    * Same applies for Gfx8 and Gfx9.
5769f464c52Smaya    */
5779f464c52Smaya   iris_emit_pipe_control_flush(batch,
5787ec681f3Smrg                                "hiz op: pre-flush",
5799f464c52Smaya                                PIPE_CONTROL_DEPTH_CACHE_FLUSH |
5807ec681f3Smrg                                PIPE_CONTROL_DEPTH_STALL |
5819f464c52Smaya                                PIPE_CONTROL_CS_STALL);
5829f464c52Smaya
5837ec681f3Smrg   iris_batch_sync_region_start(batch);
5849f464c52Smaya
5859f464c52Smaya   struct blorp_surf surf;
5867ec681f3Smrg   iris_blorp_surf_for_resource(&batch->screen->isl_dev, &surf,
5877ec681f3Smrg                                &res->base.b, res->aux.usage, level, true);
5889f464c52Smaya
5899f464c52Smaya   struct blorp_batch blorp_batch;
5909f464c52Smaya   enum blorp_batch_flags flags = 0;
5919f464c52Smaya   flags |= update_clear_depth ? 0 : BLORP_BATCH_NO_UPDATE_CLEAR_COLOR;
5929f464c52Smaya   blorp_batch_init(&ice->blorp, &blorp_batch, batch, flags);
5939f464c52Smaya   blorp_hiz_op(&blorp_batch, &surf, level, start_layer, num_layers, op);
5949f464c52Smaya   blorp_batch_finish(&blorp_batch);
5959f464c52Smaya
5969f464c52Smaya   /* The following stalls and flushes are only documented to be required
5979f464c52Smaya    * for HiZ clear operations.  However, they also seem to be required for
5989f464c52Smaya    * resolve operations.
5999f464c52Smaya    *
6009f464c52Smaya    * From the Broadwell PRM, volume 7, "Depth Buffer Clear":
6019f464c52Smaya    *
6029f464c52Smaya    *    "Depth buffer clear pass using any of the methods (WM_STATE,
6039f464c52Smaya    *     3DSTATE_WM or 3DSTATE_WM_HZ_OP) must be followed by a
6049f464c52Smaya    *     PIPE_CONTROL command with DEPTH_STALL bit and Depth FLUSH bits
6059f464c52Smaya    *     "set" before starting to render.  DepthStall and DepthFlush are
6069f464c52Smaya    *     not needed between consecutive depth clear passes nor is it
6079f464c52Smaya    *     required if the depth clear pass was done with
6089f464c52Smaya    *     'full_surf_clear' bit set in the 3DSTATE_WM_HZ_OP."
6099f464c52Smaya    *
6109f464c52Smaya    * TODO: Such as the spec says, this could be conditional.
6119f464c52Smaya    */
6129f464c52Smaya   iris_emit_pipe_control_flush(batch,
6137ec681f3Smrg                                "hiz op: post flush",
6149f464c52Smaya                                PIPE_CONTROL_DEPTH_CACHE_FLUSH |
6159f464c52Smaya                                PIPE_CONTROL_DEPTH_STALL);
6167ec681f3Smrg
6177ec681f3Smrg   iris_batch_sync_region_end(batch);
6189f464c52Smaya}
6199f464c52Smaya
6209f464c52Smaya/**
6219f464c52Smaya * Does the resource's slice have hiz enabled?
6229f464c52Smaya */
6239f464c52Smayabool
6249f464c52Smayairis_resource_level_has_hiz(const struct iris_resource *res, uint32_t level)
6259f464c52Smaya{
6269f464c52Smaya   iris_resource_check_level_layer(res, level, 0);
6277ec681f3Smrg
6287ec681f3Smrg   if (!isl_aux_usage_has_hiz(res->aux.usage))
6297ec681f3Smrg      return false;
6307ec681f3Smrg
6317ec681f3Smrg   /* Disable HiZ for LOD > 0 unless the width/height are 8x4 aligned.
6327ec681f3Smrg    * For LOD == 0, we can grow the dimensions to make it work.
6337ec681f3Smrg    */
6347ec681f3Smrg   if (level > 0) {
6357ec681f3Smrg      if (u_minify(res->base.b.width0, level) & 7)
6367ec681f3Smrg         return false;
6377ec681f3Smrg
6387ec681f3Smrg      if (u_minify(res->base.b.height0, level) & 3)
6397ec681f3Smrg         return false;
6407ec681f3Smrg   }
6417ec681f3Smrg
6427ec681f3Smrg   return true;
6439f464c52Smaya}
6449f464c52Smaya
6459f464c52Smaya/** \brief Assert that the level and layer are valid for the resource. */
6469f464c52Smayavoid
6479f464c52Smayairis_resource_check_level_layer(UNUSED const struct iris_resource *res,
6489f464c52Smaya                                UNUSED uint32_t level, UNUSED uint32_t layer)
6499f464c52Smaya{
6509f464c52Smaya   assert(level < res->surf.levels);
6517ec681f3Smrg   assert(layer < util_num_layers(&res->base.b, level));
6529f464c52Smaya}
6539f464c52Smaya
6549f464c52Smayastatic inline uint32_t
6559f464c52Smayamiptree_level_range_length(const struct iris_resource *res,
6569f464c52Smaya                           uint32_t start_level, uint32_t num_levels)
6579f464c52Smaya{
6589f464c52Smaya   assert(start_level < res->surf.levels);
6599f464c52Smaya
6609f464c52Smaya   if (num_levels == INTEL_REMAINING_LAYERS)
6619f464c52Smaya      num_levels = res->surf.levels;
6629f464c52Smaya
6639f464c52Smaya   /* Check for overflow */
6649f464c52Smaya   assert(start_level + num_levels >= start_level);
6659f464c52Smaya   assert(start_level + num_levels <= res->surf.levels);
6669f464c52Smaya
6679f464c52Smaya   return num_levels;
6689f464c52Smaya}
6699f464c52Smaya
6709f464c52Smayastatic inline uint32_t
6719f464c52Smayamiptree_layer_range_length(const struct iris_resource *res, uint32_t level,
6729f464c52Smaya                           uint32_t start_layer, uint32_t num_layers)
6739f464c52Smaya{
6747ec681f3Smrg   assert(level <= res->base.b.last_level);
6759f464c52Smaya
6769f464c52Smaya   const uint32_t total_num_layers = iris_get_num_logical_layers(res, level);
6779f464c52Smaya   assert(start_layer < total_num_layers);
6789f464c52Smaya   if (num_layers == INTEL_REMAINING_LAYERS)
6799f464c52Smaya      num_layers = total_num_layers - start_layer;
6809f464c52Smaya   /* Check for overflow */
6819f464c52Smaya   assert(start_layer + num_layers >= start_layer);
6829f464c52Smaya   assert(start_layer + num_layers <= total_num_layers);
6839f464c52Smaya
6849f464c52Smaya   return num_layers;
6859f464c52Smaya}
6869f464c52Smaya
6877ec681f3Smrgbool
6887ec681f3Smrgiris_has_invalid_primary(const struct iris_resource *res,
6897ec681f3Smrg                         unsigned start_level, unsigned num_levels,
6907ec681f3Smrg                         unsigned start_layer, unsigned num_layers)
6919f464c52Smaya{
6927ec681f3Smrg   if (res->aux.usage == ISL_AUX_USAGE_NONE)
6939f464c52Smaya      return false;
6949f464c52Smaya
6959f464c52Smaya   /* Clamp the level range to fit the resource */
6969f464c52Smaya   num_levels = miptree_level_range_length(res, start_level, num_levels);
6979f464c52Smaya
6989f464c52Smaya   for (uint32_t l = 0; l < num_levels; l++) {
6999f464c52Smaya      const uint32_t level = start_level + l;
7009f464c52Smaya      const uint32_t level_layers =
7019f464c52Smaya         miptree_layer_range_length(res, level, start_layer, num_layers);
7029f464c52Smaya      for (unsigned a = 0; a < level_layers; a++) {
7039f464c52Smaya         enum isl_aux_state aux_state =
7049f464c52Smaya            iris_resource_get_aux_state(res, level, start_layer + a);
7057ec681f3Smrg         if (!isl_aux_state_has_valid_primary(aux_state))
7069f464c52Smaya            return true;
7079f464c52Smaya      }
7089f464c52Smaya   }
7099f464c52Smaya
7109f464c52Smaya   return false;
7119f464c52Smaya}
7129f464c52Smaya
7139f464c52Smayavoid
7149f464c52Smayairis_resource_prepare_access(struct iris_context *ice,
7159f464c52Smaya                             struct iris_resource *res,
7169f464c52Smaya                             uint32_t start_level, uint32_t num_levels,
7179f464c52Smaya                             uint32_t start_layer, uint32_t num_layers,
7189f464c52Smaya                             enum isl_aux_usage aux_usage,
7199f464c52Smaya                             bool fast_clear_supported)
7209f464c52Smaya{
7217ec681f3Smrg   if (res->aux.usage == ISL_AUX_USAGE_NONE)
7227ec681f3Smrg      return;
7239f464c52Smaya
7247ec681f3Smrg   /* We can't do resolves on the compute engine, so awkwardly, we have to
7257ec681f3Smrg    * do them on the render batch...
7267ec681f3Smrg    */
7277ec681f3Smrg   struct iris_batch *batch = &ice->batches[IRIS_BATCH_RENDER];
7289f464c52Smaya
7297ec681f3Smrg   const uint32_t clamped_levels =
7307ec681f3Smrg      miptree_level_range_length(res, start_level, num_levels);
7317ec681f3Smrg   for (uint32_t l = 0; l < clamped_levels; l++) {
7327ec681f3Smrg      const uint32_t level = start_level + l;
7339f464c52Smaya      const uint32_t level_layers =
7347ec681f3Smrg         miptree_layer_range_length(res, level, start_layer, num_layers);
7359f464c52Smaya      for (uint32_t a = 0; a < level_layers; a++) {
7367ec681f3Smrg         const uint32_t layer = start_layer + a;
7377ec681f3Smrg         const enum isl_aux_state aux_state =
7387ec681f3Smrg            iris_resource_get_aux_state(res, level, layer);
7397ec681f3Smrg         const enum isl_aux_op aux_op =
7407ec681f3Smrg            isl_aux_prepare_access(aux_state, aux_usage, fast_clear_supported);
7417ec681f3Smrg
7427ec681f3Smrg         /* Prepare the aux buffer for a conditional or unconditional access.
7437ec681f3Smrg          * A conditional access is handled by assuming that the access will
7447ec681f3Smrg          * not evaluate to a no-op. If the access does in fact occur, the aux
7457ec681f3Smrg          * will be in the required state. If it does not, no data is lost
7467ec681f3Smrg          * because the aux_op performed is lossless.
7477ec681f3Smrg          */
7487ec681f3Smrg         if (aux_op == ISL_AUX_OP_NONE) {
7497ec681f3Smrg            /* Nothing to do here. */
7507ec681f3Smrg         } else if (isl_aux_usage_has_mcs(res->aux.usage)) {
7517ec681f3Smrg            assert(aux_op == ISL_AUX_OP_PARTIAL_RESOLVE);
7527ec681f3Smrg            iris_mcs_partial_resolve(ice, batch, res, layer, 1);
7537ec681f3Smrg         } else if (isl_aux_usage_has_hiz(res->aux.usage)) {
7547ec681f3Smrg            iris_hiz_exec(ice, batch, res, level, layer, 1, aux_op, false);
7557ec681f3Smrg         } else if (res->aux.usage == ISL_AUX_USAGE_STC_CCS) {
7567ec681f3Smrg            unreachable("iris doesn't resolve STC_CCS resources");
7577ec681f3Smrg         } else {
7587ec681f3Smrg            assert(isl_aux_usage_has_ccs(res->aux.usage));
7597ec681f3Smrg            iris_resolve_color(ice, batch, res, level, layer, aux_op);
7609f464c52Smaya         }
7619f464c52Smaya
7627ec681f3Smrg         const enum isl_aux_state new_state =
7637ec681f3Smrg            isl_aux_state_transition_aux_op(aux_state, res->aux.usage, aux_op);
7647ec681f3Smrg         iris_resource_set_aux_state(ice, res, level, layer, 1, new_state);
7659f464c52Smaya      }
7669f464c52Smaya   }
7679f464c52Smaya}
7689f464c52Smaya
7699f464c52Smayavoid
7709f464c52Smayairis_resource_finish_write(struct iris_context *ice,
7719f464c52Smaya                           struct iris_resource *res, uint32_t level,
7729f464c52Smaya                           uint32_t start_layer, uint32_t num_layers,
7739f464c52Smaya                           enum isl_aux_usage aux_usage)
7749f464c52Smaya{
7757ec681f3Smrg   if (res->aux.usage == ISL_AUX_USAGE_NONE)
7767ec681f3Smrg      return;
7777ec681f3Smrg
7787ec681f3Smrg   const uint32_t level_layers =
7797ec681f3Smrg      miptree_layer_range_length(res, level, start_layer, num_layers);
7807ec681f3Smrg
7817ec681f3Smrg   for (uint32_t a = 0; a < level_layers; a++) {
7827ec681f3Smrg      const uint32_t layer = start_layer + a;
7837ec681f3Smrg      const enum isl_aux_state aux_state =
7847ec681f3Smrg         iris_resource_get_aux_state(res, level, layer);
7857ec681f3Smrg
7867ec681f3Smrg      /* Transition the aux state for a conditional or unconditional write. A
7877ec681f3Smrg       * conditional write is handled by assuming that the write applies to
7887ec681f3Smrg       * only part of the render target. This prevents the new state from
7897ec681f3Smrg       * losing the types of compression that might exist in the current state
7907ec681f3Smrg       * (e.g. CLEAR). If the write evaluates to a no-op, the state will still
7917ec681f3Smrg       * be able to communicate when resolves are necessary (but it may
7927ec681f3Smrg       * falsely communicate this as well).
7937ec681f3Smrg       */
7947ec681f3Smrg      const enum isl_aux_state new_aux_state =
7957ec681f3Smrg         isl_aux_state_transition_write(aux_state, aux_usage, false);
7969f464c52Smaya
7977ec681f3Smrg      iris_resource_set_aux_state(ice, res, level, layer, 1, new_aux_state);
7989f464c52Smaya   }
7999f464c52Smaya}
8009f464c52Smaya
8019f464c52Smayaenum isl_aux_state
8029f464c52Smayairis_resource_get_aux_state(const struct iris_resource *res,
8039f464c52Smaya                            uint32_t level, uint32_t layer)
8049f464c52Smaya{
8059f464c52Smaya   iris_resource_check_level_layer(res, level, layer);
8069f464c52Smaya
8079f464c52Smaya   if (res->surf.usage & ISL_SURF_USAGE_DEPTH_BIT) {
8087ec681f3Smrg      assert(isl_aux_usage_has_hiz(res->aux.usage));
8099f464c52Smaya   } else {
8109f464c52Smaya      assert(res->surf.samples == 1 ||
8119f464c52Smaya             res->surf.msaa_layout == ISL_MSAA_LAYOUT_ARRAY);
8129f464c52Smaya   }
8139f464c52Smaya
8149f464c52Smaya   return res->aux.state[level][layer];
8159f464c52Smaya}
8169f464c52Smaya
8179f464c52Smayavoid
8189f464c52Smayairis_resource_set_aux_state(struct iris_context *ice,
8199f464c52Smaya                            struct iris_resource *res, uint32_t level,
8209f464c52Smaya                            uint32_t start_layer, uint32_t num_layers,
8219f464c52Smaya                            enum isl_aux_state aux_state)
8229f464c52Smaya{
8239f464c52Smaya   num_layers = miptree_layer_range_length(res, level, start_layer, num_layers);
8249f464c52Smaya
8259f464c52Smaya   if (res->surf.usage & ISL_SURF_USAGE_DEPTH_BIT) {
8267ec681f3Smrg      assert(iris_resource_level_has_hiz(res, level) ||
8277ec681f3Smrg             !isl_aux_state_has_valid_aux(aux_state));
8289f464c52Smaya   } else {
8299f464c52Smaya      assert(res->surf.samples == 1 ||
8309f464c52Smaya             res->surf.msaa_layout == ISL_MSAA_LAYOUT_ARRAY);
8319f464c52Smaya   }
8329f464c52Smaya
8339f464c52Smaya   for (unsigned a = 0; a < num_layers; a++) {
8349f464c52Smaya      if (res->aux.state[level][start_layer + a] != aux_state) {
8359f464c52Smaya         res->aux.state[level][start_layer + a] = aux_state;
8369f464c52Smaya         /* XXX: Need to track which bindings to make dirty */
8377ec681f3Smrg         ice->state.dirty |= IRIS_DIRTY_RENDER_BUFFER |
8387ec681f3Smrg                             IRIS_DIRTY_RENDER_RESOLVES_AND_FLUSHES |
8397ec681f3Smrg                             IRIS_DIRTY_COMPUTE_RESOLVES_AND_FLUSHES;
8407ec681f3Smrg         ice->state.stage_dirty |= IRIS_ALL_STAGE_DIRTY_BINDINGS;
8419f464c52Smaya      }
8429f464c52Smaya   }
8439f464c52Smaya
8447ec681f3Smrg   if (res->mod_info && !res->mod_info->supports_clear_color) {
8457ec681f3Smrg      assert(res->mod_info->aux_usage != ISL_AUX_USAGE_NONE);
8467ec681f3Smrg      if (aux_state == ISL_AUX_STATE_CLEAR ||
8477ec681f3Smrg          aux_state == ISL_AUX_STATE_COMPRESSED_CLEAR ||
8487ec681f3Smrg          aux_state == ISL_AUX_STATE_PARTIAL_CLEAR) {
8497ec681f3Smrg         iris_mark_dirty_dmabuf(ice, &res->base.b);
8507ec681f3Smrg      }
8519f464c52Smaya   }
8529f464c52Smaya}
8539f464c52Smaya
8549f464c52Smayaenum isl_aux_usage
8559f464c52Smayairis_resource_texture_aux_usage(struct iris_context *ice,
8569f464c52Smaya                                const struct iris_resource *res,
8577ec681f3Smrg                                enum isl_format view_format)
8589f464c52Smaya{
8599f464c52Smaya   struct iris_screen *screen = (void *) ice->ctx.screen;
8607ec681f3Smrg   struct intel_device_info *devinfo = &screen->devinfo;
8619f464c52Smaya
8629f464c52Smaya   switch (res->aux.usage) {
8639f464c52Smaya   case ISL_AUX_USAGE_HIZ:
8647ec681f3Smrg   case ISL_AUX_USAGE_HIZ_CCS:
8657ec681f3Smrg   case ISL_AUX_USAGE_HIZ_CCS_WT:
8667ec681f3Smrg      assert(res->surf.format == view_format);
8677ec681f3Smrg      return util_last_bit(res->aux.sampler_usages) - 1;
8689f464c52Smaya
8699f464c52Smaya   case ISL_AUX_USAGE_MCS:
8707ec681f3Smrg   case ISL_AUX_USAGE_MCS_CCS:
8717ec681f3Smrg   case ISL_AUX_USAGE_STC_CCS:
8727ec681f3Smrg   case ISL_AUX_USAGE_MC:
8737ec681f3Smrg      return res->aux.usage;
8749f464c52Smaya
8759f464c52Smaya   case ISL_AUX_USAGE_CCS_E:
8767ec681f3Smrg   case ISL_AUX_USAGE_GFX12_CCS_E:
8779f464c52Smaya      /* If we don't have any unresolved color, report an aux usage of
8789f464c52Smaya       * ISL_AUX_USAGE_NONE.  This way, texturing won't even look at the
8799f464c52Smaya       * aux surface and we can save some bandwidth.
8809f464c52Smaya       */
8817ec681f3Smrg      if (!iris_has_invalid_primary(res, 0, INTEL_REMAINING_LEVELS,
8827ec681f3Smrg                                    0, INTEL_REMAINING_LAYERS))
8839f464c52Smaya         return ISL_AUX_USAGE_NONE;
8849f464c52Smaya
8857ec681f3Smrg      /* On Gfx9 color buffers may be compressed by the hardware (lossless
8867ec681f3Smrg       * compression). There are, however, format restrictions and care needs
8877ec681f3Smrg       * to be taken that the sampler engine is capable for re-interpreting a
8887ec681f3Smrg       * buffer with format different the buffer was originally written with.
8897ec681f3Smrg       *
8907ec681f3Smrg       * For example, SRGB formats are not compressible and the sampler engine
8917ec681f3Smrg       * isn't capable of treating RGBA_UNORM as SRGB_ALPHA. In such a case
8927ec681f3Smrg       * the underlying color buffer needs to be resolved so that the sampling
8937ec681f3Smrg       * surface can be sampled as non-compressed (i.e., without the auxiliary
8947ec681f3Smrg       * MCS buffer being set).
8957ec681f3Smrg       */
8967ec681f3Smrg      if (isl_formats_are_ccs_e_compatible(devinfo, res->surf.format,
8977ec681f3Smrg                                           view_format))
8987ec681f3Smrg         return res->aux.usage;
8999f464c52Smaya      break;
9009f464c52Smaya
9019f464c52Smaya   default:
9029f464c52Smaya      break;
9039f464c52Smaya   }
9049f464c52Smaya
9059f464c52Smaya   return ISL_AUX_USAGE_NONE;
9069f464c52Smaya}
9079f464c52Smaya
9087ec681f3Smrgenum isl_aux_usage
9097ec681f3Smrgiris_image_view_aux_usage(struct iris_context *ice,
9107ec681f3Smrg                          const struct pipe_image_view *pview,
9117ec681f3Smrg                          const struct shader_info *info)
9127ec681f3Smrg{
9137ec681f3Smrg   if (!info)
9147ec681f3Smrg      return ISL_AUX_USAGE_NONE;
9157ec681f3Smrg
9167ec681f3Smrg   const struct iris_screen *screen = (void *) ice->ctx.screen;
9177ec681f3Smrg   const struct intel_device_info *devinfo = &screen->devinfo;
9187ec681f3Smrg   struct iris_resource *res = (void *) pview->resource;
9197ec681f3Smrg
9207ec681f3Smrg   enum isl_format view_format = iris_image_view_get_format(ice, pview);
9217ec681f3Smrg   enum isl_aux_usage aux_usage =
9227ec681f3Smrg      iris_resource_texture_aux_usage(ice, res, view_format);
9237ec681f3Smrg
9247ec681f3Smrg   bool uses_atomic_load_store =
9257ec681f3Smrg      ice->shaders.uncompiled[info->stage]->uses_atomic_load_store;
9267ec681f3Smrg
9277ec681f3Smrg   /* On GFX12, compressed surfaces supports non-atomic operations. GFX12HP and
9287ec681f3Smrg    * further, add support for all the operations.
9297ec681f3Smrg    */
9307ec681f3Smrg   if (aux_usage == ISL_AUX_USAGE_GFX12_CCS_E &&
9317ec681f3Smrg       (devinfo->verx10 >= 125 || !uses_atomic_load_store))
9327ec681f3Smrg      return ISL_AUX_USAGE_GFX12_CCS_E;
9337ec681f3Smrg
9347ec681f3Smrg   return ISL_AUX_USAGE_NONE;
9357ec681f3Smrg}
9367ec681f3Smrg
9377ec681f3Smrgbool
9387ec681f3Smrgiris_can_sample_mcs_with_clear(const struct intel_device_info *devinfo,
9397ec681f3Smrg                               const struct iris_resource *res)
9407ec681f3Smrg{
9417ec681f3Smrg   assert(isl_aux_usage_has_mcs(res->aux.usage));
9427ec681f3Smrg
9437ec681f3Smrg   /* On TGL, the sampler has an issue with some 8 and 16bpp MSAA fast clears.
9447ec681f3Smrg    * See HSD 1707282275, wa_14013111325. Due to the use of
9457ec681f3Smrg    * format-reinterpretation, a simplified workaround is implemented.
9467ec681f3Smrg    */
9477ec681f3Smrg   if (devinfo->ver >= 12 &&
9487ec681f3Smrg       isl_format_get_layout(res->surf.format)->bpb <= 16) {
9497ec681f3Smrg      return false;
9507ec681f3Smrg   }
9517ec681f3Smrg
9527ec681f3Smrg   return true;
9537ec681f3Smrg}
9547ec681f3Smrg
9559f464c52Smayastatic bool
9569f464c52Smayaisl_formats_are_fast_clear_compatible(enum isl_format a, enum isl_format b)
9579f464c52Smaya{
9587ec681f3Smrg   /* On gfx8 and earlier, the hardware was only capable of handling 0/1 clear
9599f464c52Smaya    * values so sRGB curve application was a no-op for all fast-clearable
9609f464c52Smaya    * formats.
9619f464c52Smaya    *
9627ec681f3Smrg    * On gfx9+, the hardware supports arbitrary clear values.  For sRGB clear
9639f464c52Smaya    * values, the hardware interprets the floats, not as what would be
9649f464c52Smaya    * returned from the sampler (or written by the shader), but as being
9659f464c52Smaya    * between format conversion and sRGB curve application.  This means that
9669f464c52Smaya    * we can switch between sRGB and UNORM without having to whack the clear
9679f464c52Smaya    * color.
9689f464c52Smaya    */
9699f464c52Smaya   return isl_format_srgb_to_linear(a) == isl_format_srgb_to_linear(b);
9709f464c52Smaya}
9719f464c52Smaya
9729f464c52Smayavoid
9739f464c52Smayairis_resource_prepare_texture(struct iris_context *ice,
9749f464c52Smaya                              struct iris_resource *res,
9759f464c52Smaya                              enum isl_format view_format,
9769f464c52Smaya                              uint32_t start_level, uint32_t num_levels,
9777ec681f3Smrg                              uint32_t start_layer, uint32_t num_layers)
9789f464c52Smaya{
9797ec681f3Smrg   const struct iris_screen *screen = (void *) ice->ctx.screen;
9807ec681f3Smrg   const struct intel_device_info *devinfo = &screen->devinfo;
9817ec681f3Smrg
9829f464c52Smaya   enum isl_aux_usage aux_usage =
9837ec681f3Smrg      iris_resource_texture_aux_usage(ice, res, view_format);
9849f464c52Smaya
9857ec681f3Smrg   bool clear_supported = isl_aux_usage_has_fast_clears(aux_usage);
9869f464c52Smaya
9879f464c52Smaya   /* Clear color is specified as ints or floats and the conversion is done by
9889f464c52Smaya    * the sampler.  If we have a texture view, we would have to perform the
9899f464c52Smaya    * clear color conversion manually.  Just disable clear color.
9909f464c52Smaya    */
9919f464c52Smaya   if (!isl_formats_are_fast_clear_compatible(res->surf.format, view_format))
9929f464c52Smaya      clear_supported = false;
9939f464c52Smaya
9947ec681f3Smrg   if (isl_aux_usage_has_mcs(aux_usage) &&
9957ec681f3Smrg       !iris_can_sample_mcs_with_clear(devinfo, res)) {
9967ec681f3Smrg      clear_supported = false;
9977ec681f3Smrg   }
9987ec681f3Smrg
9997ec681f3Smrg   iris_resource_prepare_access(ice, res, start_level, num_levels,
10009f464c52Smaya                                start_layer, num_layers,
10019f464c52Smaya                                aux_usage, clear_supported);
10029f464c52Smaya}
10039f464c52Smaya
10047ec681f3Smrg/* Whether or not rendering a color value with either format results in the
10057ec681f3Smrg * same pixel. This can return false negatives.
10067ec681f3Smrg */
10077ec681f3Smrgbool
10087ec681f3Smrgiris_render_formats_color_compatible(enum isl_format a, enum isl_format b,
10097ec681f3Smrg                                     union isl_color_value color,
10107ec681f3Smrg                                     bool clear_color_unknown)
10119f464c52Smaya{
10127ec681f3Smrg   if (a == b)
10137ec681f3Smrg      return true;
10147ec681f3Smrg
10157ec681f3Smrg   /* A difference in color space doesn't matter for 0/1 values. */
10167ec681f3Smrg   if (!clear_color_unknown &&
10177ec681f3Smrg       isl_format_srgb_to_linear(a) == isl_format_srgb_to_linear(b) &&
10187ec681f3Smrg       isl_color_value_is_zero_one(color, a)) {
10197ec681f3Smrg      return true;
10207ec681f3Smrg   }
10217ec681f3Smrg
10227ec681f3Smrg   return false;
10239f464c52Smaya}
10249f464c52Smaya
10259f464c52Smayaenum isl_aux_usage
10269f464c52Smayairis_resource_render_aux_usage(struct iris_context *ice,
10277ec681f3Smrg                               struct iris_resource *res, uint32_t level,
10289f464c52Smaya                               enum isl_format render_format,
10299f464c52Smaya                               bool draw_aux_disabled)
10309f464c52Smaya{
10319f464c52Smaya   struct iris_screen *screen = (void *) ice->ctx.screen;
10327ec681f3Smrg   struct intel_device_info *devinfo = &screen->devinfo;
10339f464c52Smaya
10349f464c52Smaya   if (draw_aux_disabled)
10359f464c52Smaya      return ISL_AUX_USAGE_NONE;
10369f464c52Smaya
10379f464c52Smaya   switch (res->aux.usage) {
10387ec681f3Smrg   case ISL_AUX_USAGE_HIZ:
10397ec681f3Smrg   case ISL_AUX_USAGE_HIZ_CCS:
10407ec681f3Smrg   case ISL_AUX_USAGE_HIZ_CCS_WT:
10417ec681f3Smrg      assert(render_format == res->surf.format);
10427ec681f3Smrg      return iris_resource_level_has_hiz(res, level) ?
10437ec681f3Smrg             res->aux.usage : ISL_AUX_USAGE_NONE;
10447ec681f3Smrg
10457ec681f3Smrg   case ISL_AUX_USAGE_STC_CCS:
10467ec681f3Smrg      assert(render_format == res->surf.format);
10477ec681f3Smrg      return res->aux.usage;
10487ec681f3Smrg
10499f464c52Smaya   case ISL_AUX_USAGE_MCS:
10507ec681f3Smrg   case ISL_AUX_USAGE_MCS_CCS:
10517ec681f3Smrg      return res->aux.usage;
10529f464c52Smaya
10539f464c52Smaya   case ISL_AUX_USAGE_CCS_D:
10549f464c52Smaya   case ISL_AUX_USAGE_CCS_E:
10557ec681f3Smrg   case ISL_AUX_USAGE_GFX12_CCS_E:
10567ec681f3Smrg      /* Disable CCS for some cases of texture-view rendering. On gfx12, HW
10577ec681f3Smrg       * may convert some subregions of shader output to fast-cleared blocks
10587ec681f3Smrg       * if CCS is enabled and the shader output matches the clear color.
10597ec681f3Smrg       * Existing fast-cleared blocks are correctly interpreted by the clear
10607ec681f3Smrg       * color and the resource format (see can_fast_clear_color). To avoid
10617ec681f3Smrg       * gaining new fast-cleared blocks that can't be interpreted by the
10627ec681f3Smrg       * resource format (and to avoid misinterpreting existing ones), shut
10637ec681f3Smrg       * off CCS when the interpretation of the clear color differs between
10647ec681f3Smrg       * the render_format and the resource format.
10659f464c52Smaya       */
10667ec681f3Smrg      if (!iris_render_formats_color_compatible(render_format,
10677ec681f3Smrg                                                res->surf.format,
10687ec681f3Smrg                                                res->aux.clear_color,
10697ec681f3Smrg                                                res->aux.clear_color_unknown)) {
10709f464c52Smaya         return ISL_AUX_USAGE_NONE;
10717ec681f3Smrg      }
10729f464c52Smaya
10737ec681f3Smrg      if (res->aux.usage == ISL_AUX_USAGE_CCS_D)
10747ec681f3Smrg         return ISL_AUX_USAGE_CCS_D;
10759f464c52Smaya
10767ec681f3Smrg      if (isl_formats_are_ccs_e_compatible(devinfo, res->surf.format,
10777ec681f3Smrg                                           render_format)) {
10787ec681f3Smrg         return res->aux.usage;
10797ec681f3Smrg      }
10807ec681f3Smrg      FALLTHROUGH;
10819f464c52Smaya
10829f464c52Smaya   default:
10839f464c52Smaya      return ISL_AUX_USAGE_NONE;
10849f464c52Smaya   }
10859f464c52Smaya}
10869f464c52Smaya
10879f464c52Smayavoid
10889f464c52Smayairis_resource_prepare_render(struct iris_context *ice,
10899f464c52Smaya                             struct iris_resource *res, uint32_t level,
10909f464c52Smaya                             uint32_t start_layer, uint32_t layer_count,
10919f464c52Smaya                             enum isl_aux_usage aux_usage)
10929f464c52Smaya{
10937ec681f3Smrg   iris_resource_prepare_access(ice, res, level, 1, start_layer,
10949f464c52Smaya                                layer_count, aux_usage,
10957ec681f3Smrg                                isl_aux_usage_has_fast_clears(aux_usage));
10969f464c52Smaya}
10979f464c52Smaya
10989f464c52Smayavoid
10999f464c52Smayairis_resource_finish_render(struct iris_context *ice,
11009f464c52Smaya                            struct iris_resource *res, uint32_t level,
11019f464c52Smaya                            uint32_t start_layer, uint32_t layer_count,
11029f464c52Smaya                            enum isl_aux_usage aux_usage)
11039f464c52Smaya{
11049f464c52Smaya   iris_resource_finish_write(ice, res, level, start_layer, layer_count,
11059f464c52Smaya                              aux_usage);
11069f464c52Smaya}
1107