19f464c52Smaya/* 29f464c52Smaya * Copyright © 2017 Intel Corporation 39f464c52Smaya * 49f464c52Smaya * Permission is hereby granted, free of charge, to any person obtaining a 59f464c52Smaya * copy of this software and associated documentation files (the "Software"), 69f464c52Smaya * to deal in the Software without restriction, including without limitation 79f464c52Smaya * the rights to use, copy, modify, merge, publish, distribute, sublicense, 89f464c52Smaya * and/or sell copies of the Software, and to permit persons to whom the 99f464c52Smaya * Software is furnished to do so, subject to the following conditions: 109f464c52Smaya * 119f464c52Smaya * The above copyright notice and this permission notice shall be included 129f464c52Smaya * in all copies or substantial portions of the Software. 139f464c52Smaya * 149f464c52Smaya * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS 159f464c52Smaya * OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, 169f464c52Smaya * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL 179f464c52Smaya * THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER 189f464c52Smaya * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING 199f464c52Smaya * FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER 209f464c52Smaya * DEALINGS IN THE SOFTWARE. 219f464c52Smaya */ 229f464c52Smaya 239f464c52Smaya/** 249f464c52Smaya * @file iris_resolve.c 259f464c52Smaya * 269f464c52Smaya * This file handles resolve tracking for main and auxiliary surfaces. 279f464c52Smaya * 289f464c52Smaya * It also handles our cache tracking. We have sets for the render cache, 299f464c52Smaya * depth cache, and so on. If a BO is in a cache's set, then it may have 309f464c52Smaya * data in that cache. The helpers take care of emitting flushes for 319f464c52Smaya * render-to-texture, format reinterpretation issues, and other situations. 329f464c52Smaya */ 339f464c52Smaya 349f464c52Smaya#include "util/hash_table.h" 359f464c52Smaya#include "util/set.h" 369f464c52Smaya#include "iris_context.h" 377ec681f3Smrg#include "compiler/nir/nir.h" 389f464c52Smaya 399f464c52Smaya/** 409f464c52Smaya * Disable auxiliary buffers if a renderbuffer is also bound as a texture 419f464c52Smaya * or shader image. This causes a self-dependency, where both rendering 429f464c52Smaya * and sampling may concurrently read or write the CCS buffer, causing 439f464c52Smaya * incorrect pixels. 449f464c52Smaya */ 459f464c52Smayastatic bool 469f464c52Smayadisable_rb_aux_buffer(struct iris_context *ice, 479f464c52Smaya bool *draw_aux_buffer_disabled, 489f464c52Smaya struct iris_resource *tex_res, 499f464c52Smaya unsigned min_level, unsigned num_levels, 509f464c52Smaya const char *usage) 519f464c52Smaya{ 529f464c52Smaya struct pipe_framebuffer_state *cso_fb = &ice->state.framebuffer; 539f464c52Smaya bool found = false; 549f464c52Smaya 559f464c52Smaya /* We only need to worry about color compression and fast clears. */ 569f464c52Smaya if (tex_res->aux.usage != ISL_AUX_USAGE_CCS_D && 577ec681f3Smrg tex_res->aux.usage != ISL_AUX_USAGE_CCS_E && 587ec681f3Smrg tex_res->aux.usage != ISL_AUX_USAGE_GFX12_CCS_E) 599f464c52Smaya return false; 609f464c52Smaya 619f464c52Smaya for (unsigned i = 0; i < cso_fb->nr_cbufs; i++) { 629f464c52Smaya struct iris_surface *surf = (void *) cso_fb->cbufs[i]; 639f464c52Smaya if (!surf) 649f464c52Smaya continue; 659f464c52Smaya 669f464c52Smaya struct iris_resource *rb_res = (void *) surf->base.texture; 679f464c52Smaya 689f464c52Smaya if (rb_res->bo == tex_res->bo && 699f464c52Smaya surf->base.u.tex.level >= min_level && 709f464c52Smaya surf->base.u.tex.level < min_level + num_levels) { 719f464c52Smaya found = draw_aux_buffer_disabled[i] = true; 729f464c52Smaya } 739f464c52Smaya } 749f464c52Smaya 759f464c52Smaya if (found) { 769f464c52Smaya perf_debug(&ice->dbg, 779f464c52Smaya "Disabling CCS because a renderbuffer is also bound %s.\n", 789f464c52Smaya usage); 799f464c52Smaya } 809f464c52Smaya 819f464c52Smaya return found; 829f464c52Smaya} 839f464c52Smaya 849f464c52Smayastatic void 859f464c52Smayaresolve_sampler_views(struct iris_context *ice, 869f464c52Smaya struct iris_batch *batch, 879f464c52Smaya struct iris_shader_state *shs, 889f464c52Smaya const struct shader_info *info, 899f464c52Smaya bool *draw_aux_buffer_disabled, 909f464c52Smaya bool consider_framebuffer) 919f464c52Smaya{ 927ec681f3Smrg uint32_t views = info ? (shs->bound_sampler_views & info->textures_used[0]) : 0; 939f464c52Smaya 949f464c52Smaya while (views) { 959f464c52Smaya const int i = u_bit_scan(&views); 969f464c52Smaya struct iris_sampler_view *isv = shs->textures[i]; 979f464c52Smaya 987ec681f3Smrg if (isv->res->base.b.target != PIPE_BUFFER) { 999f464c52Smaya if (consider_framebuffer) { 1007ec681f3Smrg disable_rb_aux_buffer(ice, draw_aux_buffer_disabled, isv->res, 1017ec681f3Smrg isv->view.base_level, isv->view.levels, 1029f464c52Smaya "for sampling"); 1039f464c52Smaya } 1049f464c52Smaya 1057ec681f3Smrg iris_resource_prepare_texture(ice, isv->res, isv->view.format, 1069f464c52Smaya isv->view.base_level, isv->view.levels, 1079f464c52Smaya isv->view.base_array_layer, 1087ec681f3Smrg isv->view.array_len); 1099f464c52Smaya } 1109f464c52Smaya 1117ec681f3Smrg iris_emit_buffer_barrier_for(batch, isv->res->bo, 1127ec681f3Smrg IRIS_DOMAIN_OTHER_READ); 1139f464c52Smaya } 1149f464c52Smaya} 1159f464c52Smaya 1169f464c52Smayastatic void 1179f464c52Smayaresolve_image_views(struct iris_context *ice, 1189f464c52Smaya struct iris_batch *batch, 1199f464c52Smaya struct iris_shader_state *shs, 1207ec681f3Smrg const struct shader_info *info, 1219f464c52Smaya bool *draw_aux_buffer_disabled, 1229f464c52Smaya bool consider_framebuffer) 1239f464c52Smaya{ 1247ec681f3Smrg uint32_t views = info ? (shs->bound_image_views & info->images_used) : 0; 1259f464c52Smaya 1269f464c52Smaya while (views) { 1279f464c52Smaya const int i = u_bit_scan(&views); 1287ec681f3Smrg struct pipe_image_view *pview = &shs->image[i].base; 1297ec681f3Smrg struct iris_resource *res = (void *) pview->resource; 1309f464c52Smaya 1317ec681f3Smrg if (res->base.b.target != PIPE_BUFFER) { 1329f464c52Smaya if (consider_framebuffer) { 1339f464c52Smaya disable_rb_aux_buffer(ice, draw_aux_buffer_disabled, 1347ec681f3Smrg res, pview->u.tex.level, 1, 1357ec681f3Smrg "as a shader image"); 1369f464c52Smaya } 1379f464c52Smaya 1387ec681f3Smrg unsigned num_layers = 1397ec681f3Smrg pview->u.tex.last_layer - pview->u.tex.first_layer + 1; 1407ec681f3Smrg 1417ec681f3Smrg enum isl_aux_usage aux_usage = 1427ec681f3Smrg iris_image_view_aux_usage(ice, pview, info); 1437ec681f3Smrg 1447ec681f3Smrg iris_resource_prepare_access(ice, res, 1457ec681f3Smrg pview->u.tex.level, 1, 1467ec681f3Smrg pview->u.tex.first_layer, num_layers, 1477ec681f3Smrg aux_usage, false); 1489f464c52Smaya } 1499f464c52Smaya 1507ec681f3Smrg iris_emit_buffer_barrier_for(batch, res->bo, IRIS_DOMAIN_DATA_WRITE); 1519f464c52Smaya } 1529f464c52Smaya} 1539f464c52Smaya 1549f464c52Smaya/** 1559f464c52Smaya * \brief Resolve buffers before drawing. 1569f464c52Smaya * 1579f464c52Smaya * Resolve the depth buffer's HiZ buffer, resolve the depth buffer of each 1589f464c52Smaya * enabled depth texture, and flush the render cache for any dirty textures. 1599f464c52Smaya */ 1609f464c52Smayavoid 1619f464c52Smayairis_predraw_resolve_inputs(struct iris_context *ice, 1629f464c52Smaya struct iris_batch *batch, 1639f464c52Smaya bool *draw_aux_buffer_disabled, 1649f464c52Smaya gl_shader_stage stage, 1659f464c52Smaya bool consider_framebuffer) 1669f464c52Smaya{ 1679f464c52Smaya struct iris_shader_state *shs = &ice->state.shaders[stage]; 1689f464c52Smaya const struct shader_info *info = iris_get_shader_info(ice, stage); 1699f464c52Smaya 1707ec681f3Smrg uint64_t stage_dirty = (IRIS_STAGE_DIRTY_BINDINGS_VS << stage) | 1717ec681f3Smrg (consider_framebuffer ? IRIS_STAGE_DIRTY_BINDINGS_FS : 0); 1729f464c52Smaya 1737ec681f3Smrg if (ice->state.stage_dirty & stage_dirty) { 1749f464c52Smaya resolve_sampler_views(ice, batch, shs, info, draw_aux_buffer_disabled, 1759f464c52Smaya consider_framebuffer); 1767ec681f3Smrg resolve_image_views(ice, batch, shs, info, draw_aux_buffer_disabled, 1779f464c52Smaya consider_framebuffer); 1789f464c52Smaya } 1799f464c52Smaya} 1809f464c52Smaya 1819f464c52Smayavoid 1829f464c52Smayairis_predraw_resolve_framebuffer(struct iris_context *ice, 1839f464c52Smaya struct iris_batch *batch, 1849f464c52Smaya bool *draw_aux_buffer_disabled) 1859f464c52Smaya{ 1869f464c52Smaya struct pipe_framebuffer_state *cso_fb = &ice->state.framebuffer; 1877ec681f3Smrg struct iris_screen *screen = (void *) ice->ctx.screen; 1887ec681f3Smrg struct intel_device_info *devinfo = &screen->devinfo; 1897ec681f3Smrg struct iris_uncompiled_shader *ish = 1907ec681f3Smrg ice->shaders.uncompiled[MESA_SHADER_FRAGMENT]; 1917ec681f3Smrg const nir_shader *nir = ish->nir; 1929f464c52Smaya 1939f464c52Smaya if (ice->state.dirty & IRIS_DIRTY_DEPTH_BUFFER) { 1949f464c52Smaya struct pipe_surface *zs_surf = cso_fb->zsbuf; 1959f464c52Smaya 1969f464c52Smaya if (zs_surf) { 1979f464c52Smaya struct iris_resource *z_res, *s_res; 1989f464c52Smaya iris_get_depth_stencil_resources(zs_surf->texture, &z_res, &s_res); 1999f464c52Smaya unsigned num_layers = 2009f464c52Smaya zs_surf->u.tex.last_layer - zs_surf->u.tex.first_layer + 1; 2019f464c52Smaya 2029f464c52Smaya if (z_res) { 2037ec681f3Smrg iris_resource_prepare_render(ice, z_res, zs_surf->u.tex.level, 2047ec681f3Smrg zs_surf->u.tex.first_layer, 2057ec681f3Smrg num_layers, ice->state.hiz_usage); 2067ec681f3Smrg iris_emit_buffer_barrier_for(batch, z_res->bo, 2077ec681f3Smrg IRIS_DOMAIN_DEPTH_WRITE); 2089f464c52Smaya } 2099f464c52Smaya 2109f464c52Smaya if (s_res) { 2117ec681f3Smrg iris_emit_buffer_barrier_for(batch, s_res->bo, 2127ec681f3Smrg IRIS_DOMAIN_DEPTH_WRITE); 2139f464c52Smaya } 2149f464c52Smaya } 2159f464c52Smaya } 2169f464c52Smaya 2177ec681f3Smrg if (devinfo->ver == 8 && nir->info.outputs_read != 0) { 2187ec681f3Smrg for (unsigned i = 0; i < cso_fb->nr_cbufs; i++) { 2197ec681f3Smrg if (cso_fb->cbufs[i]) { 2207ec681f3Smrg struct iris_surface *surf = (void *) cso_fb->cbufs[i]; 2217ec681f3Smrg struct iris_resource *res = (void *) cso_fb->cbufs[i]->texture; 2227ec681f3Smrg 2237ec681f3Smrg iris_resource_prepare_texture(ice, res, surf->view.format, 2247ec681f3Smrg surf->view.base_level, 1, 2257ec681f3Smrg surf->view.base_array_layer, 2267ec681f3Smrg surf->view.array_len); 2277ec681f3Smrg } 2287ec681f3Smrg } 2297ec681f3Smrg } 2307ec681f3Smrg 2317ec681f3Smrg if (ice->state.stage_dirty & IRIS_STAGE_DIRTY_BINDINGS_FS) { 2329f464c52Smaya for (unsigned i = 0; i < cso_fb->nr_cbufs; i++) { 2339f464c52Smaya struct iris_surface *surf = (void *) cso_fb->cbufs[i]; 2349f464c52Smaya if (!surf) 2359f464c52Smaya continue; 2369f464c52Smaya 2379f464c52Smaya struct iris_resource *res = (void *) surf->base.texture; 2389f464c52Smaya 2399f464c52Smaya enum isl_aux_usage aux_usage = 2407ec681f3Smrg iris_resource_render_aux_usage(ice, res, surf->view.base_level, 2417ec681f3Smrg surf->view.format, 2429f464c52Smaya draw_aux_buffer_disabled[i]); 2439f464c52Smaya 2449f464c52Smaya if (ice->state.draw_aux_usage[i] != aux_usage) { 2459f464c52Smaya ice->state.draw_aux_usage[i] = aux_usage; 2469f464c52Smaya /* XXX: Need to track which bindings to make dirty */ 2477ec681f3Smrg ice->state.dirty |= IRIS_DIRTY_RENDER_BUFFER; 2487ec681f3Smrg ice->state.stage_dirty |= IRIS_ALL_STAGE_DIRTY_BINDINGS; 2499f464c52Smaya } 2509f464c52Smaya 2517ec681f3Smrg iris_resource_prepare_render(ice, res, surf->view.base_level, 2529f464c52Smaya surf->view.base_array_layer, 2539f464c52Smaya surf->view.array_len, 2549f464c52Smaya aux_usage); 2559f464c52Smaya 2567ec681f3Smrg iris_cache_flush_for_render(batch, res->bo, aux_usage); 2579f464c52Smaya } 2589f464c52Smaya } 2599f464c52Smaya} 2609f464c52Smaya 2619f464c52Smaya/** 2629f464c52Smaya * \brief Call this after drawing to mark which buffers need resolving 2639f464c52Smaya * 2649f464c52Smaya * If the depth buffer was written to and if it has an accompanying HiZ 2659f464c52Smaya * buffer, then mark that it needs a depth resolve. 2669f464c52Smaya * 2679f464c52Smaya * If the color buffer is a multisample window system buffer, then 2689f464c52Smaya * mark that it needs a downsample. 2699f464c52Smaya * 2709f464c52Smaya * Also mark any render targets which will be textured as needing a render 2719f464c52Smaya * cache flush. 2729f464c52Smaya */ 2739f464c52Smayavoid 2749f464c52Smayairis_postdraw_update_resolve_tracking(struct iris_context *ice, 2759f464c52Smaya struct iris_batch *batch) 2769f464c52Smaya{ 2779f464c52Smaya struct pipe_framebuffer_state *cso_fb = &ice->state.framebuffer; 2789f464c52Smaya 2799f464c52Smaya // XXX: front buffer drawing? 2809f464c52Smaya 2819f464c52Smaya bool may_have_resolved_depth = 2829f464c52Smaya ice->state.dirty & (IRIS_DIRTY_DEPTH_BUFFER | 2839f464c52Smaya IRIS_DIRTY_WM_DEPTH_STENCIL); 2849f464c52Smaya 2859f464c52Smaya struct pipe_surface *zs_surf = cso_fb->zsbuf; 2869f464c52Smaya if (zs_surf) { 2879f464c52Smaya struct iris_resource *z_res, *s_res; 2889f464c52Smaya iris_get_depth_stencil_resources(zs_surf->texture, &z_res, &s_res); 2899f464c52Smaya unsigned num_layers = 2909f464c52Smaya zs_surf->u.tex.last_layer - zs_surf->u.tex.first_layer + 1; 2919f464c52Smaya 2929f464c52Smaya if (z_res) { 2937ec681f3Smrg if (may_have_resolved_depth && ice->state.depth_writes_enabled) { 2947ec681f3Smrg iris_resource_finish_render(ice, z_res, zs_surf->u.tex.level, 2957ec681f3Smrg zs_surf->u.tex.first_layer, 2967ec681f3Smrg num_layers, ice->state.hiz_usage); 2979f464c52Smaya } 2989f464c52Smaya } 2999f464c52Smaya 3009f464c52Smaya if (s_res) { 3017ec681f3Smrg if (may_have_resolved_depth && ice->state.stencil_writes_enabled) { 3029f464c52Smaya iris_resource_finish_write(ice, s_res, zs_surf->u.tex.level, 3039f464c52Smaya zs_surf->u.tex.first_layer, num_layers, 3047ec681f3Smrg s_res->aux.usage); 3059f464c52Smaya } 3069f464c52Smaya } 3079f464c52Smaya } 3089f464c52Smaya 3099f464c52Smaya bool may_have_resolved_color = 3107ec681f3Smrg ice->state.stage_dirty & IRIS_STAGE_DIRTY_BINDINGS_FS; 3119f464c52Smaya 3129f464c52Smaya for (unsigned i = 0; i < cso_fb->nr_cbufs; i++) { 3139f464c52Smaya struct iris_surface *surf = (void *) cso_fb->cbufs[i]; 3149f464c52Smaya if (!surf) 3159f464c52Smaya continue; 3169f464c52Smaya 3179f464c52Smaya struct iris_resource *res = (void *) surf->base.texture; 3189f464c52Smaya enum isl_aux_usage aux_usage = ice->state.draw_aux_usage[i]; 3199f464c52Smaya 3209f464c52Smaya if (may_have_resolved_color) { 3219f464c52Smaya union pipe_surface_desc *desc = &surf->base.u; 3229f464c52Smaya unsigned num_layers = 3239f464c52Smaya desc->tex.last_layer - desc->tex.first_layer + 1; 3249f464c52Smaya iris_resource_finish_render(ice, res, desc->tex.level, 3259f464c52Smaya desc->tex.first_layer, num_layers, 3269f464c52Smaya aux_usage); 3279f464c52Smaya } 3289f464c52Smaya } 3299f464c52Smaya} 3309f464c52Smaya 3319f464c52Smayavoid 3329f464c52Smayairis_cache_flush_for_render(struct iris_batch *batch, 3339f464c52Smaya struct iris_bo *bo, 3349f464c52Smaya enum isl_aux_usage aux_usage) 3359f464c52Smaya{ 3367ec681f3Smrg iris_emit_buffer_barrier_for(batch, bo, IRIS_DOMAIN_RENDER_WRITE); 3379f464c52Smaya 3389f464c52Smaya /* Check to see if this bo has been used by a previous rendering operation 3397ec681f3Smrg * but with a different aux usage. If it has, flush the render cache so we 3407ec681f3Smrg * ensure that it's only in there with one aux usage at a time. 3419f464c52Smaya * 3429f464c52Smaya * Even though it's not obvious, this can easily happen in practice. 3439f464c52Smaya * Suppose a client is blending on a surface with sRGB encode enabled on 3447ec681f3Smrg * gfx9. This implies that you get AUX_USAGE_CCS_D at best. If the client 3459f464c52Smaya * then disables sRGB decode and continues blending we will flip on 3469f464c52Smaya * AUX_USAGE_CCS_E without doing any sort of resolve in-between (this is 3479f464c52Smaya * perfectly valid since CCS_E is a subset of CCS_D). However, this means 3489f464c52Smaya * that we have fragments in-flight which are rendering with UNORM+CCS_E 3499f464c52Smaya * and other fragments in-flight with SRGB+CCS_D on the same surface at the 3509f464c52Smaya * same time and the pixel scoreboard and color blender are trying to sort 3519f464c52Smaya * it all out. This ends badly (i.e. GPU hangs). 3529f464c52Smaya * 3537ec681f3Smrg * There are comments in various docs which indicate that the render cache 3547ec681f3Smrg * isn't 100% resilient to format changes. However, to date, we have never 3557ec681f3Smrg * observed GPU hangs or even corruption to be associated with switching the 3567ec681f3Smrg * format, only the aux usage. So we let that slide for now. 3579f464c52Smaya */ 3587ec681f3Smrg void *v_aux_usage = (void *) (uintptr_t) aux_usage; 3599f464c52Smaya struct hash_entry *entry = 3609f464c52Smaya _mesa_hash_table_search_pre_hashed(batch->cache.render, bo->hash, bo); 3617ec681f3Smrg if (!entry) { 3627ec681f3Smrg _mesa_hash_table_insert_pre_hashed(batch->cache.render, bo->hash, bo, 3637ec681f3Smrg v_aux_usage); 3647ec681f3Smrg } else if (entry->data != v_aux_usage) { 3657ec681f3Smrg iris_emit_pipe_control_flush(batch, 3667ec681f3Smrg "cache tracker: aux usage mismatch", 3677ec681f3Smrg PIPE_CONTROL_RENDER_TARGET_FLUSH | 3687ec681f3Smrg PIPE_CONTROL_TILE_CACHE_FLUSH | 3697ec681f3Smrg PIPE_CONTROL_CS_STALL); 3707ec681f3Smrg entry->data = v_aux_usage; 3717ec681f3Smrg } 3729f464c52Smaya} 3739f464c52Smaya 3747ec681f3Smrgstatic void 3757ec681f3Smrgflush_ubos(struct iris_batch *batch, 3767ec681f3Smrg struct iris_shader_state *shs) 3779f464c52Smaya{ 3787ec681f3Smrg uint32_t cbufs = shs->dirty_cbufs & shs->bound_cbufs; 3797ec681f3Smrg 3807ec681f3Smrg while (cbufs) { 3817ec681f3Smrg const int i = u_bit_scan(&cbufs); 3827ec681f3Smrg struct pipe_shader_buffer *cbuf = &shs->constbuf[i]; 3837ec681f3Smrg struct iris_resource *res = (void *)cbuf->buffer; 3847ec681f3Smrg iris_emit_buffer_barrier_for(batch, res->bo, IRIS_DOMAIN_OTHER_READ); 3859f464c52Smaya } 3869f464c52Smaya 3877ec681f3Smrg shs->dirty_cbufs = 0; 3889f464c52Smaya} 3899f464c52Smaya 3907ec681f3Smrgstatic void 3917ec681f3Smrgflush_ssbos(struct iris_batch *batch, 3927ec681f3Smrg struct iris_shader_state *shs) 3939f464c52Smaya{ 3947ec681f3Smrg uint32_t ssbos = shs->bound_ssbos; 3957ec681f3Smrg 3967ec681f3Smrg while (ssbos) { 3977ec681f3Smrg const int i = u_bit_scan(&ssbos); 3987ec681f3Smrg struct pipe_shader_buffer *ssbo = &shs->ssbo[i]; 3997ec681f3Smrg struct iris_resource *res = (void *)ssbo->buffer; 4007ec681f3Smrg iris_emit_buffer_barrier_for(batch, res->bo, IRIS_DOMAIN_DATA_WRITE); 4017ec681f3Smrg } 4029f464c52Smaya} 4039f464c52Smaya 4049f464c52Smayavoid 4057ec681f3Smrgiris_predraw_flush_buffers(struct iris_context *ice, 4067ec681f3Smrg struct iris_batch *batch, 4077ec681f3Smrg gl_shader_stage stage) 4089f464c52Smaya{ 4097ec681f3Smrg struct iris_shader_state *shs = &ice->state.shaders[stage]; 4107ec681f3Smrg 4117ec681f3Smrg if (ice->state.stage_dirty & (IRIS_STAGE_DIRTY_CONSTANTS_VS << stage)) 4127ec681f3Smrg flush_ubos(batch, shs); 4137ec681f3Smrg 4147ec681f3Smrg if (ice->state.stage_dirty & (IRIS_STAGE_DIRTY_BINDINGS_VS << stage)) 4157ec681f3Smrg flush_ssbos(batch, shs); 4169f464c52Smaya} 4179f464c52Smaya 4189f464c52Smayastatic void 4199f464c52Smayairis_resolve_color(struct iris_context *ice, 4209f464c52Smaya struct iris_batch *batch, 4219f464c52Smaya struct iris_resource *res, 4229f464c52Smaya unsigned level, unsigned layer, 4239f464c52Smaya enum isl_aux_op resolve_op) 4249f464c52Smaya{ 4259f464c52Smaya //DBG("%s to mt %p level %u layer %u\n", __FUNCTION__, mt, level, layer); 4269f464c52Smaya 4279f464c52Smaya struct blorp_surf surf; 4287ec681f3Smrg iris_blorp_surf_for_resource(&batch->screen->isl_dev, &surf, 4297ec681f3Smrg &res->base.b, res->aux.usage, level, true); 4309f464c52Smaya 4319f464c52Smaya iris_batch_maybe_flush(batch, 1500); 4329f464c52Smaya 4339f464c52Smaya /* Ivybridge PRM Vol 2, Part 1, "11.7 MCS Buffer for Render Target(s)": 4349f464c52Smaya * 4359f464c52Smaya * "Any transition from any value in {Clear, Render, Resolve} to a 4369f464c52Smaya * different value in {Clear, Render, Resolve} requires end of pipe 4379f464c52Smaya * synchronization." 4389f464c52Smaya * 4399f464c52Smaya * In other words, fast clear ops are not properly synchronized with 4409f464c52Smaya * other drawing. We need to use a PIPE_CONTROL to ensure that the 4419f464c52Smaya * contents of the previous draw hit the render target before we resolve 4429f464c52Smaya * and again afterwards to ensure that the resolve is complete before we 4439f464c52Smaya * do any more regular drawing. 4449f464c52Smaya */ 4457ec681f3Smrg iris_emit_end_of_pipe_sync(batch, "color resolve: pre-flush", 4467ec681f3Smrg PIPE_CONTROL_RENDER_TARGET_FLUSH); 4479f464c52Smaya 4487ec681f3Smrg iris_batch_sync_region_start(batch); 4499f464c52Smaya struct blorp_batch blorp_batch; 4509f464c52Smaya blorp_batch_init(&ice->blorp, &blorp_batch, batch, 0); 4517ec681f3Smrg blorp_ccs_resolve(&blorp_batch, &surf, level, layer, 1, res->surf.format, 4529f464c52Smaya resolve_op); 4539f464c52Smaya blorp_batch_finish(&blorp_batch); 4549f464c52Smaya 4559f464c52Smaya /* See comment above */ 4567ec681f3Smrg iris_emit_end_of_pipe_sync(batch, "color resolve: post-flush", 4577ec681f3Smrg PIPE_CONTROL_RENDER_TARGET_FLUSH); 4587ec681f3Smrg iris_batch_sync_region_end(batch); 4599f464c52Smaya} 4609f464c52Smaya 4619f464c52Smayastatic void 4629f464c52Smayairis_mcs_partial_resolve(struct iris_context *ice, 4639f464c52Smaya struct iris_batch *batch, 4649f464c52Smaya struct iris_resource *res, 4659f464c52Smaya uint32_t start_layer, 4669f464c52Smaya uint32_t num_layers) 4679f464c52Smaya{ 4689f464c52Smaya //DBG("%s to mt %p layers %u-%u\n", __FUNCTION__, mt, 4699f464c52Smaya //start_layer, start_layer + num_layers - 1); 4709f464c52Smaya 4717ec681f3Smrg assert(isl_aux_usage_has_mcs(res->aux.usage)); 4727ec681f3Smrg 4737ec681f3Smrg iris_batch_maybe_flush(batch, 1500); 4749f464c52Smaya 4759f464c52Smaya struct blorp_surf surf; 4767ec681f3Smrg iris_blorp_surf_for_resource(&batch->screen->isl_dev, &surf, 4777ec681f3Smrg &res->base.b, res->aux.usage, 0, true); 4787ec681f3Smrg iris_emit_buffer_barrier_for(batch, res->bo, IRIS_DOMAIN_RENDER_WRITE); 4799f464c52Smaya 4809f464c52Smaya struct blorp_batch blorp_batch; 4817ec681f3Smrg iris_batch_sync_region_start(batch); 4829f464c52Smaya blorp_batch_init(&ice->blorp, &blorp_batch, batch, 0); 4837ec681f3Smrg blorp_mcs_partial_resolve(&blorp_batch, &surf, res->surf.format, 4849f464c52Smaya start_layer, num_layers); 4859f464c52Smaya blorp_batch_finish(&blorp_batch); 4867ec681f3Smrg iris_batch_sync_region_end(batch); 4879f464c52Smaya} 4889f464c52Smaya 4897ec681f3Smrgbool 4907ec681f3Smrgiris_sample_with_depth_aux(const struct intel_device_info *devinfo, 4917ec681f3Smrg const struct iris_resource *res) 4929f464c52Smaya{ 4937ec681f3Smrg switch (res->aux.usage) { 4947ec681f3Smrg case ISL_AUX_USAGE_HIZ: 4957ec681f3Smrg if (devinfo->has_sample_with_hiz) 4967ec681f3Smrg break; 4979f464c52Smaya return false; 4987ec681f3Smrg case ISL_AUX_USAGE_HIZ_CCS: 4999f464c52Smaya return false; 5007ec681f3Smrg case ISL_AUX_USAGE_HIZ_CCS_WT: 5017ec681f3Smrg break; 5027ec681f3Smrg default: 5037ec681f3Smrg return false; 5047ec681f3Smrg } 5059f464c52Smaya 5069f464c52Smaya for (unsigned level = 0; level < res->surf.levels; ++level) { 5079f464c52Smaya if (!iris_resource_level_has_hiz(res, level)) 5089f464c52Smaya return false; 5099f464c52Smaya } 5109f464c52Smaya 5117ec681f3Smrg /* From the BDW PRM (Volume 2d: Command Reference: Structures 5129f464c52Smaya * RENDER_SURFACE_STATE.AuxiliarySurfaceMode): 5139f464c52Smaya * 5149f464c52Smaya * "If this field is set to AUX_HIZ, Number of Multisamples must be 5159f464c52Smaya * MULTISAMPLECOUNT_1, and Surface Type cannot be SURFTYPE_3D. 5169f464c52Smaya * 5179f464c52Smaya * There is no such blurb for 1D textures, but there is sufficient evidence 5189f464c52Smaya * that this is broken on SKL+. 5199f464c52Smaya */ 5209f464c52Smaya return res->surf.samples == 1 && res->surf.dim == ISL_SURF_DIM_2D; 5219f464c52Smaya} 5229f464c52Smaya 5239f464c52Smaya/** 5249f464c52Smaya * Perform a HiZ or depth resolve operation. 5259f464c52Smaya * 5269f464c52Smaya * For an overview of HiZ ops, see the following sections of the Sandy Bridge 5279f464c52Smaya * PRM, Volume 1, Part 2: 5289f464c52Smaya * - 7.5.3.1 Depth Buffer Clear 5299f464c52Smaya * - 7.5.3.2 Depth Buffer Resolve 5309f464c52Smaya * - 7.5.3.3 Hierarchical Depth Buffer Resolve 5319f464c52Smaya */ 5329f464c52Smayavoid 5339f464c52Smayairis_hiz_exec(struct iris_context *ice, 5349f464c52Smaya struct iris_batch *batch, 5359f464c52Smaya struct iris_resource *res, 5369f464c52Smaya unsigned int level, unsigned int start_layer, 5379f464c52Smaya unsigned int num_layers, enum isl_aux_op op, 5389f464c52Smaya bool update_clear_depth) 5399f464c52Smaya{ 5409f464c52Smaya assert(iris_resource_level_has_hiz(res, level)); 5419f464c52Smaya assert(op != ISL_AUX_OP_NONE); 5429f464c52Smaya UNUSED const char *name = NULL; 5439f464c52Smaya 5447ec681f3Smrg iris_batch_maybe_flush(batch, 1500); 5457ec681f3Smrg 5469f464c52Smaya switch (op) { 5479f464c52Smaya case ISL_AUX_OP_FULL_RESOLVE: 5489f464c52Smaya name = "depth resolve"; 5499f464c52Smaya break; 5509f464c52Smaya case ISL_AUX_OP_AMBIGUATE: 5519f464c52Smaya name = "hiz ambiguate"; 5529f464c52Smaya break; 5539f464c52Smaya case ISL_AUX_OP_FAST_CLEAR: 5549f464c52Smaya name = "depth clear"; 5559f464c52Smaya break; 5569f464c52Smaya case ISL_AUX_OP_PARTIAL_RESOLVE: 5579f464c52Smaya case ISL_AUX_OP_NONE: 5589f464c52Smaya unreachable("Invalid HiZ op"); 5599f464c52Smaya } 5609f464c52Smaya 5619f464c52Smaya //DBG("%s %s to mt %p level %d layers %d-%d\n", 5629f464c52Smaya //__func__, name, mt, level, start_layer, start_layer + num_layers - 1); 5639f464c52Smaya 5649f464c52Smaya /* The following stalls and flushes are only documented to be required 5659f464c52Smaya * for HiZ clear operations. However, they also seem to be required for 5669f464c52Smaya * resolve operations. 5679f464c52Smaya * 5689f464c52Smaya * From the Ivybridge PRM, volume 2, "Depth Buffer Clear": 5699f464c52Smaya * 5709f464c52Smaya * "If other rendering operations have preceded this clear, a 5719f464c52Smaya * PIPE_CONTROL with depth cache flush enabled, Depth Stall bit 5729f464c52Smaya * enabled must be issued before the rectangle primitive used for 5739f464c52Smaya * the depth buffer clear operation." 5749f464c52Smaya * 5757ec681f3Smrg * Same applies for Gfx8 and Gfx9. 5769f464c52Smaya */ 5779f464c52Smaya iris_emit_pipe_control_flush(batch, 5787ec681f3Smrg "hiz op: pre-flush", 5799f464c52Smaya PIPE_CONTROL_DEPTH_CACHE_FLUSH | 5807ec681f3Smrg PIPE_CONTROL_DEPTH_STALL | 5819f464c52Smaya PIPE_CONTROL_CS_STALL); 5829f464c52Smaya 5837ec681f3Smrg iris_batch_sync_region_start(batch); 5849f464c52Smaya 5859f464c52Smaya struct blorp_surf surf; 5867ec681f3Smrg iris_blorp_surf_for_resource(&batch->screen->isl_dev, &surf, 5877ec681f3Smrg &res->base.b, res->aux.usage, level, true); 5889f464c52Smaya 5899f464c52Smaya struct blorp_batch blorp_batch; 5909f464c52Smaya enum blorp_batch_flags flags = 0; 5919f464c52Smaya flags |= update_clear_depth ? 0 : BLORP_BATCH_NO_UPDATE_CLEAR_COLOR; 5929f464c52Smaya blorp_batch_init(&ice->blorp, &blorp_batch, batch, flags); 5939f464c52Smaya blorp_hiz_op(&blorp_batch, &surf, level, start_layer, num_layers, op); 5949f464c52Smaya blorp_batch_finish(&blorp_batch); 5959f464c52Smaya 5969f464c52Smaya /* The following stalls and flushes are only documented to be required 5979f464c52Smaya * for HiZ clear operations. However, they also seem to be required for 5989f464c52Smaya * resolve operations. 5999f464c52Smaya * 6009f464c52Smaya * From the Broadwell PRM, volume 7, "Depth Buffer Clear": 6019f464c52Smaya * 6029f464c52Smaya * "Depth buffer clear pass using any of the methods (WM_STATE, 6039f464c52Smaya * 3DSTATE_WM or 3DSTATE_WM_HZ_OP) must be followed by a 6049f464c52Smaya * PIPE_CONTROL command with DEPTH_STALL bit and Depth FLUSH bits 6059f464c52Smaya * "set" before starting to render. DepthStall and DepthFlush are 6069f464c52Smaya * not needed between consecutive depth clear passes nor is it 6079f464c52Smaya * required if the depth clear pass was done with 6089f464c52Smaya * 'full_surf_clear' bit set in the 3DSTATE_WM_HZ_OP." 6099f464c52Smaya * 6109f464c52Smaya * TODO: Such as the spec says, this could be conditional. 6119f464c52Smaya */ 6129f464c52Smaya iris_emit_pipe_control_flush(batch, 6137ec681f3Smrg "hiz op: post flush", 6149f464c52Smaya PIPE_CONTROL_DEPTH_CACHE_FLUSH | 6159f464c52Smaya PIPE_CONTROL_DEPTH_STALL); 6167ec681f3Smrg 6177ec681f3Smrg iris_batch_sync_region_end(batch); 6189f464c52Smaya} 6199f464c52Smaya 6209f464c52Smaya/** 6219f464c52Smaya * Does the resource's slice have hiz enabled? 6229f464c52Smaya */ 6239f464c52Smayabool 6249f464c52Smayairis_resource_level_has_hiz(const struct iris_resource *res, uint32_t level) 6259f464c52Smaya{ 6269f464c52Smaya iris_resource_check_level_layer(res, level, 0); 6277ec681f3Smrg 6287ec681f3Smrg if (!isl_aux_usage_has_hiz(res->aux.usage)) 6297ec681f3Smrg return false; 6307ec681f3Smrg 6317ec681f3Smrg /* Disable HiZ for LOD > 0 unless the width/height are 8x4 aligned. 6327ec681f3Smrg * For LOD == 0, we can grow the dimensions to make it work. 6337ec681f3Smrg */ 6347ec681f3Smrg if (level > 0) { 6357ec681f3Smrg if (u_minify(res->base.b.width0, level) & 7) 6367ec681f3Smrg return false; 6377ec681f3Smrg 6387ec681f3Smrg if (u_minify(res->base.b.height0, level) & 3) 6397ec681f3Smrg return false; 6407ec681f3Smrg } 6417ec681f3Smrg 6427ec681f3Smrg return true; 6439f464c52Smaya} 6449f464c52Smaya 6459f464c52Smaya/** \brief Assert that the level and layer are valid for the resource. */ 6469f464c52Smayavoid 6479f464c52Smayairis_resource_check_level_layer(UNUSED const struct iris_resource *res, 6489f464c52Smaya UNUSED uint32_t level, UNUSED uint32_t layer) 6499f464c52Smaya{ 6509f464c52Smaya assert(level < res->surf.levels); 6517ec681f3Smrg assert(layer < util_num_layers(&res->base.b, level)); 6529f464c52Smaya} 6539f464c52Smaya 6549f464c52Smayastatic inline uint32_t 6559f464c52Smayamiptree_level_range_length(const struct iris_resource *res, 6569f464c52Smaya uint32_t start_level, uint32_t num_levels) 6579f464c52Smaya{ 6589f464c52Smaya assert(start_level < res->surf.levels); 6599f464c52Smaya 6609f464c52Smaya if (num_levels == INTEL_REMAINING_LAYERS) 6619f464c52Smaya num_levels = res->surf.levels; 6629f464c52Smaya 6639f464c52Smaya /* Check for overflow */ 6649f464c52Smaya assert(start_level + num_levels >= start_level); 6659f464c52Smaya assert(start_level + num_levels <= res->surf.levels); 6669f464c52Smaya 6679f464c52Smaya return num_levels; 6689f464c52Smaya} 6699f464c52Smaya 6709f464c52Smayastatic inline uint32_t 6719f464c52Smayamiptree_layer_range_length(const struct iris_resource *res, uint32_t level, 6729f464c52Smaya uint32_t start_layer, uint32_t num_layers) 6739f464c52Smaya{ 6747ec681f3Smrg assert(level <= res->base.b.last_level); 6759f464c52Smaya 6769f464c52Smaya const uint32_t total_num_layers = iris_get_num_logical_layers(res, level); 6779f464c52Smaya assert(start_layer < total_num_layers); 6789f464c52Smaya if (num_layers == INTEL_REMAINING_LAYERS) 6799f464c52Smaya num_layers = total_num_layers - start_layer; 6809f464c52Smaya /* Check for overflow */ 6819f464c52Smaya assert(start_layer + num_layers >= start_layer); 6829f464c52Smaya assert(start_layer + num_layers <= total_num_layers); 6839f464c52Smaya 6849f464c52Smaya return num_layers; 6859f464c52Smaya} 6869f464c52Smaya 6877ec681f3Smrgbool 6887ec681f3Smrgiris_has_invalid_primary(const struct iris_resource *res, 6897ec681f3Smrg unsigned start_level, unsigned num_levels, 6907ec681f3Smrg unsigned start_layer, unsigned num_layers) 6919f464c52Smaya{ 6927ec681f3Smrg if (res->aux.usage == ISL_AUX_USAGE_NONE) 6939f464c52Smaya return false; 6949f464c52Smaya 6959f464c52Smaya /* Clamp the level range to fit the resource */ 6969f464c52Smaya num_levels = miptree_level_range_length(res, start_level, num_levels); 6979f464c52Smaya 6989f464c52Smaya for (uint32_t l = 0; l < num_levels; l++) { 6999f464c52Smaya const uint32_t level = start_level + l; 7009f464c52Smaya const uint32_t level_layers = 7019f464c52Smaya miptree_layer_range_length(res, level, start_layer, num_layers); 7029f464c52Smaya for (unsigned a = 0; a < level_layers; a++) { 7039f464c52Smaya enum isl_aux_state aux_state = 7049f464c52Smaya iris_resource_get_aux_state(res, level, start_layer + a); 7057ec681f3Smrg if (!isl_aux_state_has_valid_primary(aux_state)) 7069f464c52Smaya return true; 7079f464c52Smaya } 7089f464c52Smaya } 7099f464c52Smaya 7109f464c52Smaya return false; 7119f464c52Smaya} 7129f464c52Smaya 7139f464c52Smayavoid 7149f464c52Smayairis_resource_prepare_access(struct iris_context *ice, 7159f464c52Smaya struct iris_resource *res, 7169f464c52Smaya uint32_t start_level, uint32_t num_levels, 7179f464c52Smaya uint32_t start_layer, uint32_t num_layers, 7189f464c52Smaya enum isl_aux_usage aux_usage, 7199f464c52Smaya bool fast_clear_supported) 7209f464c52Smaya{ 7217ec681f3Smrg if (res->aux.usage == ISL_AUX_USAGE_NONE) 7227ec681f3Smrg return; 7239f464c52Smaya 7247ec681f3Smrg /* We can't do resolves on the compute engine, so awkwardly, we have to 7257ec681f3Smrg * do them on the render batch... 7267ec681f3Smrg */ 7277ec681f3Smrg struct iris_batch *batch = &ice->batches[IRIS_BATCH_RENDER]; 7289f464c52Smaya 7297ec681f3Smrg const uint32_t clamped_levels = 7307ec681f3Smrg miptree_level_range_length(res, start_level, num_levels); 7317ec681f3Smrg for (uint32_t l = 0; l < clamped_levels; l++) { 7327ec681f3Smrg const uint32_t level = start_level + l; 7339f464c52Smaya const uint32_t level_layers = 7347ec681f3Smrg miptree_layer_range_length(res, level, start_layer, num_layers); 7359f464c52Smaya for (uint32_t a = 0; a < level_layers; a++) { 7367ec681f3Smrg const uint32_t layer = start_layer + a; 7377ec681f3Smrg const enum isl_aux_state aux_state = 7387ec681f3Smrg iris_resource_get_aux_state(res, level, layer); 7397ec681f3Smrg const enum isl_aux_op aux_op = 7407ec681f3Smrg isl_aux_prepare_access(aux_state, aux_usage, fast_clear_supported); 7417ec681f3Smrg 7427ec681f3Smrg /* Prepare the aux buffer for a conditional or unconditional access. 7437ec681f3Smrg * A conditional access is handled by assuming that the access will 7447ec681f3Smrg * not evaluate to a no-op. If the access does in fact occur, the aux 7457ec681f3Smrg * will be in the required state. If it does not, no data is lost 7467ec681f3Smrg * because the aux_op performed is lossless. 7477ec681f3Smrg */ 7487ec681f3Smrg if (aux_op == ISL_AUX_OP_NONE) { 7497ec681f3Smrg /* Nothing to do here. */ 7507ec681f3Smrg } else if (isl_aux_usage_has_mcs(res->aux.usage)) { 7517ec681f3Smrg assert(aux_op == ISL_AUX_OP_PARTIAL_RESOLVE); 7527ec681f3Smrg iris_mcs_partial_resolve(ice, batch, res, layer, 1); 7537ec681f3Smrg } else if (isl_aux_usage_has_hiz(res->aux.usage)) { 7547ec681f3Smrg iris_hiz_exec(ice, batch, res, level, layer, 1, aux_op, false); 7557ec681f3Smrg } else if (res->aux.usage == ISL_AUX_USAGE_STC_CCS) { 7567ec681f3Smrg unreachable("iris doesn't resolve STC_CCS resources"); 7577ec681f3Smrg } else { 7587ec681f3Smrg assert(isl_aux_usage_has_ccs(res->aux.usage)); 7597ec681f3Smrg iris_resolve_color(ice, batch, res, level, layer, aux_op); 7609f464c52Smaya } 7619f464c52Smaya 7627ec681f3Smrg const enum isl_aux_state new_state = 7637ec681f3Smrg isl_aux_state_transition_aux_op(aux_state, res->aux.usage, aux_op); 7647ec681f3Smrg iris_resource_set_aux_state(ice, res, level, layer, 1, new_state); 7659f464c52Smaya } 7669f464c52Smaya } 7679f464c52Smaya} 7689f464c52Smaya 7699f464c52Smayavoid 7709f464c52Smayairis_resource_finish_write(struct iris_context *ice, 7719f464c52Smaya struct iris_resource *res, uint32_t level, 7729f464c52Smaya uint32_t start_layer, uint32_t num_layers, 7739f464c52Smaya enum isl_aux_usage aux_usage) 7749f464c52Smaya{ 7757ec681f3Smrg if (res->aux.usage == ISL_AUX_USAGE_NONE) 7767ec681f3Smrg return; 7777ec681f3Smrg 7787ec681f3Smrg const uint32_t level_layers = 7797ec681f3Smrg miptree_layer_range_length(res, level, start_layer, num_layers); 7807ec681f3Smrg 7817ec681f3Smrg for (uint32_t a = 0; a < level_layers; a++) { 7827ec681f3Smrg const uint32_t layer = start_layer + a; 7837ec681f3Smrg const enum isl_aux_state aux_state = 7847ec681f3Smrg iris_resource_get_aux_state(res, level, layer); 7857ec681f3Smrg 7867ec681f3Smrg /* Transition the aux state for a conditional or unconditional write. A 7877ec681f3Smrg * conditional write is handled by assuming that the write applies to 7887ec681f3Smrg * only part of the render target. This prevents the new state from 7897ec681f3Smrg * losing the types of compression that might exist in the current state 7907ec681f3Smrg * (e.g. CLEAR). If the write evaluates to a no-op, the state will still 7917ec681f3Smrg * be able to communicate when resolves are necessary (but it may 7927ec681f3Smrg * falsely communicate this as well). 7937ec681f3Smrg */ 7947ec681f3Smrg const enum isl_aux_state new_aux_state = 7957ec681f3Smrg isl_aux_state_transition_write(aux_state, aux_usage, false); 7969f464c52Smaya 7977ec681f3Smrg iris_resource_set_aux_state(ice, res, level, layer, 1, new_aux_state); 7989f464c52Smaya } 7999f464c52Smaya} 8009f464c52Smaya 8019f464c52Smayaenum isl_aux_state 8029f464c52Smayairis_resource_get_aux_state(const struct iris_resource *res, 8039f464c52Smaya uint32_t level, uint32_t layer) 8049f464c52Smaya{ 8059f464c52Smaya iris_resource_check_level_layer(res, level, layer); 8069f464c52Smaya 8079f464c52Smaya if (res->surf.usage & ISL_SURF_USAGE_DEPTH_BIT) { 8087ec681f3Smrg assert(isl_aux_usage_has_hiz(res->aux.usage)); 8099f464c52Smaya } else { 8109f464c52Smaya assert(res->surf.samples == 1 || 8119f464c52Smaya res->surf.msaa_layout == ISL_MSAA_LAYOUT_ARRAY); 8129f464c52Smaya } 8139f464c52Smaya 8149f464c52Smaya return res->aux.state[level][layer]; 8159f464c52Smaya} 8169f464c52Smaya 8179f464c52Smayavoid 8189f464c52Smayairis_resource_set_aux_state(struct iris_context *ice, 8199f464c52Smaya struct iris_resource *res, uint32_t level, 8209f464c52Smaya uint32_t start_layer, uint32_t num_layers, 8219f464c52Smaya enum isl_aux_state aux_state) 8229f464c52Smaya{ 8239f464c52Smaya num_layers = miptree_layer_range_length(res, level, start_layer, num_layers); 8249f464c52Smaya 8259f464c52Smaya if (res->surf.usage & ISL_SURF_USAGE_DEPTH_BIT) { 8267ec681f3Smrg assert(iris_resource_level_has_hiz(res, level) || 8277ec681f3Smrg !isl_aux_state_has_valid_aux(aux_state)); 8289f464c52Smaya } else { 8299f464c52Smaya assert(res->surf.samples == 1 || 8309f464c52Smaya res->surf.msaa_layout == ISL_MSAA_LAYOUT_ARRAY); 8319f464c52Smaya } 8329f464c52Smaya 8339f464c52Smaya for (unsigned a = 0; a < num_layers; a++) { 8349f464c52Smaya if (res->aux.state[level][start_layer + a] != aux_state) { 8359f464c52Smaya res->aux.state[level][start_layer + a] = aux_state; 8369f464c52Smaya /* XXX: Need to track which bindings to make dirty */ 8377ec681f3Smrg ice->state.dirty |= IRIS_DIRTY_RENDER_BUFFER | 8387ec681f3Smrg IRIS_DIRTY_RENDER_RESOLVES_AND_FLUSHES | 8397ec681f3Smrg IRIS_DIRTY_COMPUTE_RESOLVES_AND_FLUSHES; 8407ec681f3Smrg ice->state.stage_dirty |= IRIS_ALL_STAGE_DIRTY_BINDINGS; 8419f464c52Smaya } 8429f464c52Smaya } 8439f464c52Smaya 8447ec681f3Smrg if (res->mod_info && !res->mod_info->supports_clear_color) { 8457ec681f3Smrg assert(res->mod_info->aux_usage != ISL_AUX_USAGE_NONE); 8467ec681f3Smrg if (aux_state == ISL_AUX_STATE_CLEAR || 8477ec681f3Smrg aux_state == ISL_AUX_STATE_COMPRESSED_CLEAR || 8487ec681f3Smrg aux_state == ISL_AUX_STATE_PARTIAL_CLEAR) { 8497ec681f3Smrg iris_mark_dirty_dmabuf(ice, &res->base.b); 8507ec681f3Smrg } 8519f464c52Smaya } 8529f464c52Smaya} 8539f464c52Smaya 8549f464c52Smayaenum isl_aux_usage 8559f464c52Smayairis_resource_texture_aux_usage(struct iris_context *ice, 8569f464c52Smaya const struct iris_resource *res, 8577ec681f3Smrg enum isl_format view_format) 8589f464c52Smaya{ 8599f464c52Smaya struct iris_screen *screen = (void *) ice->ctx.screen; 8607ec681f3Smrg struct intel_device_info *devinfo = &screen->devinfo; 8619f464c52Smaya 8629f464c52Smaya switch (res->aux.usage) { 8639f464c52Smaya case ISL_AUX_USAGE_HIZ: 8647ec681f3Smrg case ISL_AUX_USAGE_HIZ_CCS: 8657ec681f3Smrg case ISL_AUX_USAGE_HIZ_CCS_WT: 8667ec681f3Smrg assert(res->surf.format == view_format); 8677ec681f3Smrg return util_last_bit(res->aux.sampler_usages) - 1; 8689f464c52Smaya 8699f464c52Smaya case ISL_AUX_USAGE_MCS: 8707ec681f3Smrg case ISL_AUX_USAGE_MCS_CCS: 8717ec681f3Smrg case ISL_AUX_USAGE_STC_CCS: 8727ec681f3Smrg case ISL_AUX_USAGE_MC: 8737ec681f3Smrg return res->aux.usage; 8749f464c52Smaya 8759f464c52Smaya case ISL_AUX_USAGE_CCS_E: 8767ec681f3Smrg case ISL_AUX_USAGE_GFX12_CCS_E: 8779f464c52Smaya /* If we don't have any unresolved color, report an aux usage of 8789f464c52Smaya * ISL_AUX_USAGE_NONE. This way, texturing won't even look at the 8799f464c52Smaya * aux surface and we can save some bandwidth. 8809f464c52Smaya */ 8817ec681f3Smrg if (!iris_has_invalid_primary(res, 0, INTEL_REMAINING_LEVELS, 8827ec681f3Smrg 0, INTEL_REMAINING_LAYERS)) 8839f464c52Smaya return ISL_AUX_USAGE_NONE; 8849f464c52Smaya 8857ec681f3Smrg /* On Gfx9 color buffers may be compressed by the hardware (lossless 8867ec681f3Smrg * compression). There are, however, format restrictions and care needs 8877ec681f3Smrg * to be taken that the sampler engine is capable for re-interpreting a 8887ec681f3Smrg * buffer with format different the buffer was originally written with. 8897ec681f3Smrg * 8907ec681f3Smrg * For example, SRGB formats are not compressible and the sampler engine 8917ec681f3Smrg * isn't capable of treating RGBA_UNORM as SRGB_ALPHA. In such a case 8927ec681f3Smrg * the underlying color buffer needs to be resolved so that the sampling 8937ec681f3Smrg * surface can be sampled as non-compressed (i.e., without the auxiliary 8947ec681f3Smrg * MCS buffer being set). 8957ec681f3Smrg */ 8967ec681f3Smrg if (isl_formats_are_ccs_e_compatible(devinfo, res->surf.format, 8977ec681f3Smrg view_format)) 8987ec681f3Smrg return res->aux.usage; 8999f464c52Smaya break; 9009f464c52Smaya 9019f464c52Smaya default: 9029f464c52Smaya break; 9039f464c52Smaya } 9049f464c52Smaya 9059f464c52Smaya return ISL_AUX_USAGE_NONE; 9069f464c52Smaya} 9079f464c52Smaya 9087ec681f3Smrgenum isl_aux_usage 9097ec681f3Smrgiris_image_view_aux_usage(struct iris_context *ice, 9107ec681f3Smrg const struct pipe_image_view *pview, 9117ec681f3Smrg const struct shader_info *info) 9127ec681f3Smrg{ 9137ec681f3Smrg if (!info) 9147ec681f3Smrg return ISL_AUX_USAGE_NONE; 9157ec681f3Smrg 9167ec681f3Smrg const struct iris_screen *screen = (void *) ice->ctx.screen; 9177ec681f3Smrg const struct intel_device_info *devinfo = &screen->devinfo; 9187ec681f3Smrg struct iris_resource *res = (void *) pview->resource; 9197ec681f3Smrg 9207ec681f3Smrg enum isl_format view_format = iris_image_view_get_format(ice, pview); 9217ec681f3Smrg enum isl_aux_usage aux_usage = 9227ec681f3Smrg iris_resource_texture_aux_usage(ice, res, view_format); 9237ec681f3Smrg 9247ec681f3Smrg bool uses_atomic_load_store = 9257ec681f3Smrg ice->shaders.uncompiled[info->stage]->uses_atomic_load_store; 9267ec681f3Smrg 9277ec681f3Smrg /* On GFX12, compressed surfaces supports non-atomic operations. GFX12HP and 9287ec681f3Smrg * further, add support for all the operations. 9297ec681f3Smrg */ 9307ec681f3Smrg if (aux_usage == ISL_AUX_USAGE_GFX12_CCS_E && 9317ec681f3Smrg (devinfo->verx10 >= 125 || !uses_atomic_load_store)) 9327ec681f3Smrg return ISL_AUX_USAGE_GFX12_CCS_E; 9337ec681f3Smrg 9347ec681f3Smrg return ISL_AUX_USAGE_NONE; 9357ec681f3Smrg} 9367ec681f3Smrg 9377ec681f3Smrgbool 9387ec681f3Smrgiris_can_sample_mcs_with_clear(const struct intel_device_info *devinfo, 9397ec681f3Smrg const struct iris_resource *res) 9407ec681f3Smrg{ 9417ec681f3Smrg assert(isl_aux_usage_has_mcs(res->aux.usage)); 9427ec681f3Smrg 9437ec681f3Smrg /* On TGL, the sampler has an issue with some 8 and 16bpp MSAA fast clears. 9447ec681f3Smrg * See HSD 1707282275, wa_14013111325. Due to the use of 9457ec681f3Smrg * format-reinterpretation, a simplified workaround is implemented. 9467ec681f3Smrg */ 9477ec681f3Smrg if (devinfo->ver >= 12 && 9487ec681f3Smrg isl_format_get_layout(res->surf.format)->bpb <= 16) { 9497ec681f3Smrg return false; 9507ec681f3Smrg } 9517ec681f3Smrg 9527ec681f3Smrg return true; 9537ec681f3Smrg} 9547ec681f3Smrg 9559f464c52Smayastatic bool 9569f464c52Smayaisl_formats_are_fast_clear_compatible(enum isl_format a, enum isl_format b) 9579f464c52Smaya{ 9587ec681f3Smrg /* On gfx8 and earlier, the hardware was only capable of handling 0/1 clear 9599f464c52Smaya * values so sRGB curve application was a no-op for all fast-clearable 9609f464c52Smaya * formats. 9619f464c52Smaya * 9627ec681f3Smrg * On gfx9+, the hardware supports arbitrary clear values. For sRGB clear 9639f464c52Smaya * values, the hardware interprets the floats, not as what would be 9649f464c52Smaya * returned from the sampler (or written by the shader), but as being 9659f464c52Smaya * between format conversion and sRGB curve application. This means that 9669f464c52Smaya * we can switch between sRGB and UNORM without having to whack the clear 9679f464c52Smaya * color. 9689f464c52Smaya */ 9699f464c52Smaya return isl_format_srgb_to_linear(a) == isl_format_srgb_to_linear(b); 9709f464c52Smaya} 9719f464c52Smaya 9729f464c52Smayavoid 9739f464c52Smayairis_resource_prepare_texture(struct iris_context *ice, 9749f464c52Smaya struct iris_resource *res, 9759f464c52Smaya enum isl_format view_format, 9769f464c52Smaya uint32_t start_level, uint32_t num_levels, 9777ec681f3Smrg uint32_t start_layer, uint32_t num_layers) 9789f464c52Smaya{ 9797ec681f3Smrg const struct iris_screen *screen = (void *) ice->ctx.screen; 9807ec681f3Smrg const struct intel_device_info *devinfo = &screen->devinfo; 9817ec681f3Smrg 9829f464c52Smaya enum isl_aux_usage aux_usage = 9837ec681f3Smrg iris_resource_texture_aux_usage(ice, res, view_format); 9849f464c52Smaya 9857ec681f3Smrg bool clear_supported = isl_aux_usage_has_fast_clears(aux_usage); 9869f464c52Smaya 9879f464c52Smaya /* Clear color is specified as ints or floats and the conversion is done by 9889f464c52Smaya * the sampler. If we have a texture view, we would have to perform the 9899f464c52Smaya * clear color conversion manually. Just disable clear color. 9909f464c52Smaya */ 9919f464c52Smaya if (!isl_formats_are_fast_clear_compatible(res->surf.format, view_format)) 9929f464c52Smaya clear_supported = false; 9939f464c52Smaya 9947ec681f3Smrg if (isl_aux_usage_has_mcs(aux_usage) && 9957ec681f3Smrg !iris_can_sample_mcs_with_clear(devinfo, res)) { 9967ec681f3Smrg clear_supported = false; 9977ec681f3Smrg } 9987ec681f3Smrg 9997ec681f3Smrg iris_resource_prepare_access(ice, res, start_level, num_levels, 10009f464c52Smaya start_layer, num_layers, 10019f464c52Smaya aux_usage, clear_supported); 10029f464c52Smaya} 10039f464c52Smaya 10047ec681f3Smrg/* Whether or not rendering a color value with either format results in the 10057ec681f3Smrg * same pixel. This can return false negatives. 10067ec681f3Smrg */ 10077ec681f3Smrgbool 10087ec681f3Smrgiris_render_formats_color_compatible(enum isl_format a, enum isl_format b, 10097ec681f3Smrg union isl_color_value color, 10107ec681f3Smrg bool clear_color_unknown) 10119f464c52Smaya{ 10127ec681f3Smrg if (a == b) 10137ec681f3Smrg return true; 10147ec681f3Smrg 10157ec681f3Smrg /* A difference in color space doesn't matter for 0/1 values. */ 10167ec681f3Smrg if (!clear_color_unknown && 10177ec681f3Smrg isl_format_srgb_to_linear(a) == isl_format_srgb_to_linear(b) && 10187ec681f3Smrg isl_color_value_is_zero_one(color, a)) { 10197ec681f3Smrg return true; 10207ec681f3Smrg } 10217ec681f3Smrg 10227ec681f3Smrg return false; 10239f464c52Smaya} 10249f464c52Smaya 10259f464c52Smayaenum isl_aux_usage 10269f464c52Smayairis_resource_render_aux_usage(struct iris_context *ice, 10277ec681f3Smrg struct iris_resource *res, uint32_t level, 10289f464c52Smaya enum isl_format render_format, 10299f464c52Smaya bool draw_aux_disabled) 10309f464c52Smaya{ 10319f464c52Smaya struct iris_screen *screen = (void *) ice->ctx.screen; 10327ec681f3Smrg struct intel_device_info *devinfo = &screen->devinfo; 10339f464c52Smaya 10349f464c52Smaya if (draw_aux_disabled) 10359f464c52Smaya return ISL_AUX_USAGE_NONE; 10369f464c52Smaya 10379f464c52Smaya switch (res->aux.usage) { 10387ec681f3Smrg case ISL_AUX_USAGE_HIZ: 10397ec681f3Smrg case ISL_AUX_USAGE_HIZ_CCS: 10407ec681f3Smrg case ISL_AUX_USAGE_HIZ_CCS_WT: 10417ec681f3Smrg assert(render_format == res->surf.format); 10427ec681f3Smrg return iris_resource_level_has_hiz(res, level) ? 10437ec681f3Smrg res->aux.usage : ISL_AUX_USAGE_NONE; 10447ec681f3Smrg 10457ec681f3Smrg case ISL_AUX_USAGE_STC_CCS: 10467ec681f3Smrg assert(render_format == res->surf.format); 10477ec681f3Smrg return res->aux.usage; 10487ec681f3Smrg 10499f464c52Smaya case ISL_AUX_USAGE_MCS: 10507ec681f3Smrg case ISL_AUX_USAGE_MCS_CCS: 10517ec681f3Smrg return res->aux.usage; 10529f464c52Smaya 10539f464c52Smaya case ISL_AUX_USAGE_CCS_D: 10549f464c52Smaya case ISL_AUX_USAGE_CCS_E: 10557ec681f3Smrg case ISL_AUX_USAGE_GFX12_CCS_E: 10567ec681f3Smrg /* Disable CCS for some cases of texture-view rendering. On gfx12, HW 10577ec681f3Smrg * may convert some subregions of shader output to fast-cleared blocks 10587ec681f3Smrg * if CCS is enabled and the shader output matches the clear color. 10597ec681f3Smrg * Existing fast-cleared blocks are correctly interpreted by the clear 10607ec681f3Smrg * color and the resource format (see can_fast_clear_color). To avoid 10617ec681f3Smrg * gaining new fast-cleared blocks that can't be interpreted by the 10627ec681f3Smrg * resource format (and to avoid misinterpreting existing ones), shut 10637ec681f3Smrg * off CCS when the interpretation of the clear color differs between 10647ec681f3Smrg * the render_format and the resource format. 10659f464c52Smaya */ 10667ec681f3Smrg if (!iris_render_formats_color_compatible(render_format, 10677ec681f3Smrg res->surf.format, 10687ec681f3Smrg res->aux.clear_color, 10697ec681f3Smrg res->aux.clear_color_unknown)) { 10709f464c52Smaya return ISL_AUX_USAGE_NONE; 10717ec681f3Smrg } 10729f464c52Smaya 10737ec681f3Smrg if (res->aux.usage == ISL_AUX_USAGE_CCS_D) 10747ec681f3Smrg return ISL_AUX_USAGE_CCS_D; 10759f464c52Smaya 10767ec681f3Smrg if (isl_formats_are_ccs_e_compatible(devinfo, res->surf.format, 10777ec681f3Smrg render_format)) { 10787ec681f3Smrg return res->aux.usage; 10797ec681f3Smrg } 10807ec681f3Smrg FALLTHROUGH; 10819f464c52Smaya 10829f464c52Smaya default: 10839f464c52Smaya return ISL_AUX_USAGE_NONE; 10849f464c52Smaya } 10859f464c52Smaya} 10869f464c52Smaya 10879f464c52Smayavoid 10889f464c52Smayairis_resource_prepare_render(struct iris_context *ice, 10899f464c52Smaya struct iris_resource *res, uint32_t level, 10909f464c52Smaya uint32_t start_layer, uint32_t layer_count, 10919f464c52Smaya enum isl_aux_usage aux_usage) 10929f464c52Smaya{ 10937ec681f3Smrg iris_resource_prepare_access(ice, res, level, 1, start_layer, 10949f464c52Smaya layer_count, aux_usage, 10957ec681f3Smrg isl_aux_usage_has_fast_clears(aux_usage)); 10969f464c52Smaya} 10979f464c52Smaya 10989f464c52Smayavoid 10999f464c52Smayairis_resource_finish_render(struct iris_context *ice, 11009f464c52Smaya struct iris_resource *res, uint32_t level, 11019f464c52Smaya uint32_t start_layer, uint32_t layer_count, 11029f464c52Smaya enum isl_aux_usage aux_usage) 11039f464c52Smaya{ 11049f464c52Smaya iris_resource_finish_write(ice, res, level, start_layer, layer_count, 11059f464c52Smaya aux_usage); 11069f464c52Smaya} 1107