1b8e80941Smrg/* 2b8e80941Smrg * Copyright © 2017 Intel Corporation 3b8e80941Smrg * 4b8e80941Smrg * Permission is hereby granted, free of charge, to any person obtaining a 5b8e80941Smrg * copy of this software and associated documentation files (the "Software"), 6b8e80941Smrg * to deal in the Software without restriction, including without limitation 7b8e80941Smrg * the rights to use, copy, modify, merge, publish, distribute, sublicense, 8b8e80941Smrg * and/or sell copies of the Software, and to permit persons to whom the 9b8e80941Smrg * Software is furnished to do so, subject to the following conditions: 10b8e80941Smrg * 11b8e80941Smrg * The above copyright notice and this permission notice shall be included 12b8e80941Smrg * in all copies or substantial portions of the Software. 13b8e80941Smrg * 14b8e80941Smrg * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS 15b8e80941Smrg * OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, 16b8e80941Smrg * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL 17b8e80941Smrg * THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER 18b8e80941Smrg * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING 19b8e80941Smrg * FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER 20b8e80941Smrg * DEALINGS IN THE SOFTWARE. 21b8e80941Smrg */ 22b8e80941Smrg 23b8e80941Smrg/** 24b8e80941Smrg * @file iris_resolve.c 25b8e80941Smrg * 26b8e80941Smrg * This file handles resolve tracking for main and auxiliary surfaces. 27b8e80941Smrg * 28b8e80941Smrg * It also handles our cache tracking. We have sets for the render cache, 29b8e80941Smrg * depth cache, and so on. If a BO is in a cache's set, then it may have 30b8e80941Smrg * data in that cache. The helpers take care of emitting flushes for 31b8e80941Smrg * render-to-texture, format reinterpretation issues, and other situations. 32b8e80941Smrg */ 33b8e80941Smrg 34b8e80941Smrg#include "util/hash_table.h" 35b8e80941Smrg#include "util/set.h" 36b8e80941Smrg#include "iris_context.h" 37b8e80941Smrg 38b8e80941Smrg/** 39b8e80941Smrg * Disable auxiliary buffers if a renderbuffer is also bound as a texture 40b8e80941Smrg * or shader image. This causes a self-dependency, where both rendering 41b8e80941Smrg * and sampling may concurrently read or write the CCS buffer, causing 42b8e80941Smrg * incorrect pixels. 43b8e80941Smrg */ 44b8e80941Smrgstatic bool 45b8e80941Smrgdisable_rb_aux_buffer(struct iris_context *ice, 46b8e80941Smrg bool *draw_aux_buffer_disabled, 47b8e80941Smrg struct iris_resource *tex_res, 48b8e80941Smrg unsigned min_level, unsigned num_levels, 49b8e80941Smrg const char *usage) 50b8e80941Smrg{ 51b8e80941Smrg struct pipe_framebuffer_state *cso_fb = &ice->state.framebuffer; 52b8e80941Smrg bool found = false; 53b8e80941Smrg 54b8e80941Smrg /* We only need to worry about color compression and fast clears. */ 55b8e80941Smrg if (tex_res->aux.usage != ISL_AUX_USAGE_CCS_D && 56b8e80941Smrg tex_res->aux.usage != ISL_AUX_USAGE_CCS_E) 57b8e80941Smrg return false; 58b8e80941Smrg 59b8e80941Smrg for (unsigned i = 0; i < cso_fb->nr_cbufs; i++) { 60b8e80941Smrg struct iris_surface *surf = (void *) cso_fb->cbufs[i]; 61b8e80941Smrg if (!surf) 62b8e80941Smrg continue; 63b8e80941Smrg 64b8e80941Smrg struct iris_resource *rb_res = (void *) surf->base.texture; 65b8e80941Smrg 66b8e80941Smrg if (rb_res->bo == tex_res->bo && 67b8e80941Smrg surf->base.u.tex.level >= min_level && 68b8e80941Smrg surf->base.u.tex.level < min_level + num_levels) { 69b8e80941Smrg found = draw_aux_buffer_disabled[i] = true; 70b8e80941Smrg } 71b8e80941Smrg } 72b8e80941Smrg 73b8e80941Smrg if (found) { 74b8e80941Smrg perf_debug(&ice->dbg, 75b8e80941Smrg "Disabling CCS because a renderbuffer is also bound %s.\n", 76b8e80941Smrg usage); 77b8e80941Smrg } 78b8e80941Smrg 79b8e80941Smrg return found; 80b8e80941Smrg} 81b8e80941Smrg 82b8e80941Smrgstatic void 83b8e80941Smrgresolve_sampler_views(struct iris_context *ice, 84b8e80941Smrg struct iris_batch *batch, 85b8e80941Smrg struct iris_shader_state *shs, 86b8e80941Smrg const struct shader_info *info, 87b8e80941Smrg bool *draw_aux_buffer_disabled, 88b8e80941Smrg bool consider_framebuffer) 89b8e80941Smrg{ 90b8e80941Smrg uint32_t views = info ? (shs->bound_sampler_views & info->textures_used) : 0; 91b8e80941Smrg 92b8e80941Smrg unsigned astc5x5_wa_bits = 0; // XXX: actual tracking 93b8e80941Smrg 94b8e80941Smrg while (views) { 95b8e80941Smrg const int i = u_bit_scan(&views); 96b8e80941Smrg struct iris_sampler_view *isv = shs->textures[i]; 97b8e80941Smrg struct iris_resource *res = (void *) isv->base.texture; 98b8e80941Smrg 99b8e80941Smrg if (res->base.target != PIPE_BUFFER) { 100b8e80941Smrg if (consider_framebuffer) { 101b8e80941Smrg disable_rb_aux_buffer(ice, draw_aux_buffer_disabled, 102b8e80941Smrg res, isv->view.base_level, isv->view.levels, 103b8e80941Smrg "for sampling"); 104b8e80941Smrg } 105b8e80941Smrg 106b8e80941Smrg iris_resource_prepare_texture(ice, batch, res, isv->view.format, 107b8e80941Smrg isv->view.base_level, isv->view.levels, 108b8e80941Smrg isv->view.base_array_layer, 109b8e80941Smrg isv->view.array_len, 110b8e80941Smrg astc5x5_wa_bits); 111b8e80941Smrg } 112b8e80941Smrg 113b8e80941Smrg iris_cache_flush_for_read(batch, res->bo); 114b8e80941Smrg } 115b8e80941Smrg} 116b8e80941Smrg 117b8e80941Smrgstatic void 118b8e80941Smrgresolve_image_views(struct iris_context *ice, 119b8e80941Smrg struct iris_batch *batch, 120b8e80941Smrg struct iris_shader_state *shs, 121b8e80941Smrg bool *draw_aux_buffer_disabled, 122b8e80941Smrg bool consider_framebuffer) 123b8e80941Smrg{ 124b8e80941Smrg /* TODO: Consider images used by program */ 125b8e80941Smrg uint32_t views = shs->bound_image_views; 126b8e80941Smrg 127b8e80941Smrg while (views) { 128b8e80941Smrg const int i = u_bit_scan(&views); 129b8e80941Smrg struct iris_resource *res = (void *) shs->image[i].base.resource; 130b8e80941Smrg 131b8e80941Smrg if (res->base.target != PIPE_BUFFER) { 132b8e80941Smrg if (consider_framebuffer) { 133b8e80941Smrg disable_rb_aux_buffer(ice, draw_aux_buffer_disabled, 134b8e80941Smrg res, 0, ~0, "as a shader image"); 135b8e80941Smrg } 136b8e80941Smrg 137b8e80941Smrg iris_resource_prepare_image(ice, batch, res); 138b8e80941Smrg } 139b8e80941Smrg 140b8e80941Smrg iris_cache_flush_for_read(batch, res->bo); 141b8e80941Smrg } 142b8e80941Smrg} 143b8e80941Smrg 144b8e80941Smrg 145b8e80941Smrg/** 146b8e80941Smrg * \brief Resolve buffers before drawing. 147b8e80941Smrg * 148b8e80941Smrg * Resolve the depth buffer's HiZ buffer, resolve the depth buffer of each 149b8e80941Smrg * enabled depth texture, and flush the render cache for any dirty textures. 150b8e80941Smrg */ 151b8e80941Smrgvoid 152b8e80941Smrgiris_predraw_resolve_inputs(struct iris_context *ice, 153b8e80941Smrg struct iris_batch *batch, 154b8e80941Smrg bool *draw_aux_buffer_disabled, 155b8e80941Smrg gl_shader_stage stage, 156b8e80941Smrg bool consider_framebuffer) 157b8e80941Smrg{ 158b8e80941Smrg struct iris_shader_state *shs = &ice->state.shaders[stage]; 159b8e80941Smrg const struct shader_info *info = iris_get_shader_info(ice, stage); 160b8e80941Smrg 161b8e80941Smrg uint64_t dirty = (IRIS_DIRTY_BINDINGS_VS << stage) | 162b8e80941Smrg (consider_framebuffer ? IRIS_DIRTY_BINDINGS_FS : 0); 163b8e80941Smrg 164b8e80941Smrg if (ice->state.dirty & dirty) { 165b8e80941Smrg resolve_sampler_views(ice, batch, shs, info, draw_aux_buffer_disabled, 166b8e80941Smrg consider_framebuffer); 167b8e80941Smrg resolve_image_views(ice, batch, shs, draw_aux_buffer_disabled, 168b8e80941Smrg consider_framebuffer); 169b8e80941Smrg } 170b8e80941Smrg 171b8e80941Smrg // XXX: ASTC hacks 172b8e80941Smrg} 173b8e80941Smrg 174b8e80941Smrgvoid 175b8e80941Smrgiris_predraw_resolve_framebuffer(struct iris_context *ice, 176b8e80941Smrg struct iris_batch *batch, 177b8e80941Smrg bool *draw_aux_buffer_disabled) 178b8e80941Smrg{ 179b8e80941Smrg struct pipe_framebuffer_state *cso_fb = &ice->state.framebuffer; 180b8e80941Smrg 181b8e80941Smrg if (ice->state.dirty & IRIS_DIRTY_DEPTH_BUFFER) { 182b8e80941Smrg struct pipe_surface *zs_surf = cso_fb->zsbuf; 183b8e80941Smrg 184b8e80941Smrg if (zs_surf) { 185b8e80941Smrg struct iris_resource *z_res, *s_res; 186b8e80941Smrg iris_get_depth_stencil_resources(zs_surf->texture, &z_res, &s_res); 187b8e80941Smrg unsigned num_layers = 188b8e80941Smrg zs_surf->u.tex.last_layer - zs_surf->u.tex.first_layer + 1; 189b8e80941Smrg 190b8e80941Smrg if (z_res) { 191b8e80941Smrg iris_resource_prepare_depth(ice, batch, z_res, 192b8e80941Smrg zs_surf->u.tex.level, 193b8e80941Smrg zs_surf->u.tex.first_layer, 194b8e80941Smrg num_layers); 195b8e80941Smrg iris_cache_flush_for_depth(batch, z_res->bo); 196b8e80941Smrg } 197b8e80941Smrg 198b8e80941Smrg if (s_res) { 199b8e80941Smrg iris_cache_flush_for_depth(batch, s_res->bo); 200b8e80941Smrg } 201b8e80941Smrg } 202b8e80941Smrg } 203b8e80941Smrg 204b8e80941Smrg if (ice->state.dirty & (IRIS_DIRTY_BINDINGS_FS | IRIS_DIRTY_BLEND_STATE)) { 205b8e80941Smrg for (unsigned i = 0; i < cso_fb->nr_cbufs; i++) { 206b8e80941Smrg struct iris_surface *surf = (void *) cso_fb->cbufs[i]; 207b8e80941Smrg if (!surf) 208b8e80941Smrg continue; 209b8e80941Smrg 210b8e80941Smrg struct iris_resource *res = (void *) surf->base.texture; 211b8e80941Smrg 212b8e80941Smrg enum isl_aux_usage aux_usage = 213b8e80941Smrg iris_resource_render_aux_usage(ice, res, surf->view.format, 214b8e80941Smrg ice->state.blend_enables & (1u << i), 215b8e80941Smrg draw_aux_buffer_disabled[i]); 216b8e80941Smrg 217b8e80941Smrg if (ice->state.draw_aux_usage[i] != aux_usage) { 218b8e80941Smrg ice->state.draw_aux_usage[i] = aux_usage; 219b8e80941Smrg /* XXX: Need to track which bindings to make dirty */ 220b8e80941Smrg ice->state.dirty |= IRIS_ALL_DIRTY_BINDINGS; 221b8e80941Smrg } 222b8e80941Smrg 223b8e80941Smrg iris_resource_prepare_render(ice, batch, res, surf->view.base_level, 224b8e80941Smrg surf->view.base_array_layer, 225b8e80941Smrg surf->view.array_len, 226b8e80941Smrg aux_usage); 227b8e80941Smrg 228b8e80941Smrg iris_cache_flush_for_render(batch, res->bo, surf->view.format, 229b8e80941Smrg aux_usage); 230b8e80941Smrg } 231b8e80941Smrg } 232b8e80941Smrg} 233b8e80941Smrg 234b8e80941Smrg/** 235b8e80941Smrg * \brief Call this after drawing to mark which buffers need resolving 236b8e80941Smrg * 237b8e80941Smrg * If the depth buffer was written to and if it has an accompanying HiZ 238b8e80941Smrg * buffer, then mark that it needs a depth resolve. 239b8e80941Smrg * 240b8e80941Smrg * If the color buffer is a multisample window system buffer, then 241b8e80941Smrg * mark that it needs a downsample. 242b8e80941Smrg * 243b8e80941Smrg * Also mark any render targets which will be textured as needing a render 244b8e80941Smrg * cache flush. 245b8e80941Smrg */ 246b8e80941Smrgvoid 247b8e80941Smrgiris_postdraw_update_resolve_tracking(struct iris_context *ice, 248b8e80941Smrg struct iris_batch *batch) 249b8e80941Smrg{ 250b8e80941Smrg struct pipe_framebuffer_state *cso_fb = &ice->state.framebuffer; 251b8e80941Smrg 252b8e80941Smrg // XXX: front buffer drawing? 253b8e80941Smrg 254b8e80941Smrg bool may_have_resolved_depth = 255b8e80941Smrg ice->state.dirty & (IRIS_DIRTY_DEPTH_BUFFER | 256b8e80941Smrg IRIS_DIRTY_WM_DEPTH_STENCIL); 257b8e80941Smrg 258b8e80941Smrg struct pipe_surface *zs_surf = cso_fb->zsbuf; 259b8e80941Smrg if (zs_surf) { 260b8e80941Smrg struct iris_resource *z_res, *s_res; 261b8e80941Smrg iris_get_depth_stencil_resources(zs_surf->texture, &z_res, &s_res); 262b8e80941Smrg unsigned num_layers = 263b8e80941Smrg zs_surf->u.tex.last_layer - zs_surf->u.tex.first_layer + 1; 264b8e80941Smrg 265b8e80941Smrg if (z_res) { 266b8e80941Smrg if (may_have_resolved_depth) { 267b8e80941Smrg iris_resource_finish_depth(ice, z_res, zs_surf->u.tex.level, 268b8e80941Smrg zs_surf->u.tex.first_layer, num_layers, 269b8e80941Smrg ice->state.depth_writes_enabled); 270b8e80941Smrg } 271b8e80941Smrg 272b8e80941Smrg if (ice->state.depth_writes_enabled) 273b8e80941Smrg iris_depth_cache_add_bo(batch, z_res->bo); 274b8e80941Smrg } 275b8e80941Smrg 276b8e80941Smrg if (s_res) { 277b8e80941Smrg if (may_have_resolved_depth) { 278b8e80941Smrg iris_resource_finish_write(ice, s_res, zs_surf->u.tex.level, 279b8e80941Smrg zs_surf->u.tex.first_layer, num_layers, 280b8e80941Smrg ISL_AUX_USAGE_NONE); 281b8e80941Smrg } 282b8e80941Smrg 283b8e80941Smrg if (ice->state.stencil_writes_enabled) 284b8e80941Smrg iris_depth_cache_add_bo(batch, s_res->bo); 285b8e80941Smrg } 286b8e80941Smrg } 287b8e80941Smrg 288b8e80941Smrg bool may_have_resolved_color = 289b8e80941Smrg ice->state.dirty & (IRIS_DIRTY_BINDINGS_FS | IRIS_DIRTY_BLEND_STATE); 290b8e80941Smrg 291b8e80941Smrg for (unsigned i = 0; i < cso_fb->nr_cbufs; i++) { 292b8e80941Smrg struct iris_surface *surf = (void *) cso_fb->cbufs[i]; 293b8e80941Smrg if (!surf) 294b8e80941Smrg continue; 295b8e80941Smrg 296b8e80941Smrg struct iris_resource *res = (void *) surf->base.texture; 297b8e80941Smrg enum isl_aux_usage aux_usage = ice->state.draw_aux_usage[i]; 298b8e80941Smrg 299b8e80941Smrg iris_render_cache_add_bo(batch, res->bo, surf->view.format, 300b8e80941Smrg aux_usage); 301b8e80941Smrg 302b8e80941Smrg if (may_have_resolved_color) { 303b8e80941Smrg union pipe_surface_desc *desc = &surf->base.u; 304b8e80941Smrg unsigned num_layers = 305b8e80941Smrg desc->tex.last_layer - desc->tex.first_layer + 1; 306b8e80941Smrg iris_resource_finish_render(ice, res, desc->tex.level, 307b8e80941Smrg desc->tex.first_layer, num_layers, 308b8e80941Smrg aux_usage); 309b8e80941Smrg } 310b8e80941Smrg } 311b8e80941Smrg} 312b8e80941Smrg 313b8e80941Smrg/** 314b8e80941Smrg * Clear the cache-tracking sets. 315b8e80941Smrg */ 316b8e80941Smrgvoid 317b8e80941Smrgiris_cache_sets_clear(struct iris_batch *batch) 318b8e80941Smrg{ 319b8e80941Smrg hash_table_foreach(batch->cache.render, render_entry) 320b8e80941Smrg _mesa_hash_table_remove(batch->cache.render, render_entry); 321b8e80941Smrg 322b8e80941Smrg set_foreach(batch->cache.depth, depth_entry) 323b8e80941Smrg _mesa_set_remove(batch->cache.depth, depth_entry); 324b8e80941Smrg} 325b8e80941Smrg 326b8e80941Smrg/** 327b8e80941Smrg * Emits an appropriate flush for a BO if it has been rendered to within the 328b8e80941Smrg * same batchbuffer as a read that's about to be emitted. 329b8e80941Smrg * 330b8e80941Smrg * The GPU has separate, incoherent caches for the render cache and the 331b8e80941Smrg * sampler cache, along with other caches. Usually data in the different 332b8e80941Smrg * caches don't interact (e.g. we don't render to our driver-generated 333b8e80941Smrg * immediate constant data), but for render-to-texture in FBOs we definitely 334b8e80941Smrg * do. When a batchbuffer is flushed, the kernel will ensure that everything 335b8e80941Smrg * necessary is flushed before another use of that BO, but for reuse from 336b8e80941Smrg * different caches within a batchbuffer, it's all our responsibility. 337b8e80941Smrg */ 338b8e80941Smrgvoid 339b8e80941Smrgiris_flush_depth_and_render_caches(struct iris_batch *batch) 340b8e80941Smrg{ 341b8e80941Smrg iris_emit_pipe_control_flush(batch, 342b8e80941Smrg PIPE_CONTROL_DEPTH_CACHE_FLUSH | 343b8e80941Smrg PIPE_CONTROL_RENDER_TARGET_FLUSH | 344b8e80941Smrg PIPE_CONTROL_CS_STALL); 345b8e80941Smrg 346b8e80941Smrg iris_emit_pipe_control_flush(batch, 347b8e80941Smrg PIPE_CONTROL_TEXTURE_CACHE_INVALIDATE | 348b8e80941Smrg PIPE_CONTROL_CONST_CACHE_INVALIDATE); 349b8e80941Smrg 350b8e80941Smrg iris_cache_sets_clear(batch); 351b8e80941Smrg} 352b8e80941Smrg 353b8e80941Smrgvoid 354b8e80941Smrgiris_cache_flush_for_read(struct iris_batch *batch, 355b8e80941Smrg struct iris_bo *bo) 356b8e80941Smrg{ 357b8e80941Smrg if (_mesa_hash_table_search_pre_hashed(batch->cache.render, bo->hash, bo) || 358b8e80941Smrg _mesa_set_search_pre_hashed(batch->cache.depth, bo->hash, bo)) 359b8e80941Smrg iris_flush_depth_and_render_caches(batch); 360b8e80941Smrg} 361b8e80941Smrg 362b8e80941Smrgstatic void * 363b8e80941Smrgformat_aux_tuple(enum isl_format format, enum isl_aux_usage aux_usage) 364b8e80941Smrg{ 365b8e80941Smrg return (void *)(uintptr_t)((uint32_t)format << 8 | aux_usage); 366b8e80941Smrg} 367b8e80941Smrg 368b8e80941Smrgvoid 369b8e80941Smrgiris_cache_flush_for_render(struct iris_batch *batch, 370b8e80941Smrg struct iris_bo *bo, 371b8e80941Smrg enum isl_format format, 372b8e80941Smrg enum isl_aux_usage aux_usage) 373b8e80941Smrg{ 374b8e80941Smrg if (_mesa_set_search_pre_hashed(batch->cache.depth, bo->hash, bo)) 375b8e80941Smrg iris_flush_depth_and_render_caches(batch); 376b8e80941Smrg 377b8e80941Smrg /* Check to see if this bo has been used by a previous rendering operation 378b8e80941Smrg * but with a different format or aux usage. If it has, flush the render 379b8e80941Smrg * cache so we ensure that it's only in there with one format or aux usage 380b8e80941Smrg * at a time. 381b8e80941Smrg * 382b8e80941Smrg * Even though it's not obvious, this can easily happen in practice. 383b8e80941Smrg * Suppose a client is blending on a surface with sRGB encode enabled on 384b8e80941Smrg * gen9. This implies that you get AUX_USAGE_CCS_D at best. If the client 385b8e80941Smrg * then disables sRGB decode and continues blending we will flip on 386b8e80941Smrg * AUX_USAGE_CCS_E without doing any sort of resolve in-between (this is 387b8e80941Smrg * perfectly valid since CCS_E is a subset of CCS_D). However, this means 388b8e80941Smrg * that we have fragments in-flight which are rendering with UNORM+CCS_E 389b8e80941Smrg * and other fragments in-flight with SRGB+CCS_D on the same surface at the 390b8e80941Smrg * same time and the pixel scoreboard and color blender are trying to sort 391b8e80941Smrg * it all out. This ends badly (i.e. GPU hangs). 392b8e80941Smrg * 393b8e80941Smrg * To date, we have never observed GPU hangs or even corruption to be 394b8e80941Smrg * associated with switching the format, only the aux usage. However, 395b8e80941Smrg * there are comments in various docs which indicate that the render cache 396b8e80941Smrg * isn't 100% resilient to format changes. We may as well be conservative 397b8e80941Smrg * and flush on format changes too. We can always relax this later if we 398b8e80941Smrg * find it to be a performance problem. 399b8e80941Smrg */ 400b8e80941Smrg struct hash_entry *entry = 401b8e80941Smrg _mesa_hash_table_search_pre_hashed(batch->cache.render, bo->hash, bo); 402b8e80941Smrg if (entry && entry->data != format_aux_tuple(format, aux_usage)) 403b8e80941Smrg iris_flush_depth_and_render_caches(batch); 404b8e80941Smrg} 405b8e80941Smrg 406b8e80941Smrgvoid 407b8e80941Smrgiris_render_cache_add_bo(struct iris_batch *batch, 408b8e80941Smrg struct iris_bo *bo, 409b8e80941Smrg enum isl_format format, 410b8e80941Smrg enum isl_aux_usage aux_usage) 411b8e80941Smrg{ 412b8e80941Smrg#ifndef NDEBUG 413b8e80941Smrg struct hash_entry *entry = 414b8e80941Smrg _mesa_hash_table_search_pre_hashed(batch->cache.render, bo->hash, bo); 415b8e80941Smrg if (entry) { 416b8e80941Smrg /* Otherwise, someone didn't do a flush_for_render and that would be 417b8e80941Smrg * very bad indeed. 418b8e80941Smrg */ 419b8e80941Smrg assert(entry->data == format_aux_tuple(format, aux_usage)); 420b8e80941Smrg } 421b8e80941Smrg#endif 422b8e80941Smrg 423b8e80941Smrg _mesa_hash_table_insert_pre_hashed(batch->cache.render, bo->hash, bo, 424b8e80941Smrg format_aux_tuple(format, aux_usage)); 425b8e80941Smrg} 426b8e80941Smrg 427b8e80941Smrgvoid 428b8e80941Smrgiris_cache_flush_for_depth(struct iris_batch *batch, 429b8e80941Smrg struct iris_bo *bo) 430b8e80941Smrg{ 431b8e80941Smrg if (_mesa_hash_table_search_pre_hashed(batch->cache.render, bo->hash, bo)) 432b8e80941Smrg iris_flush_depth_and_render_caches(batch); 433b8e80941Smrg} 434b8e80941Smrg 435b8e80941Smrgvoid 436b8e80941Smrgiris_depth_cache_add_bo(struct iris_batch *batch, struct iris_bo *bo) 437b8e80941Smrg{ 438b8e80941Smrg _mesa_set_add_pre_hashed(batch->cache.depth, bo->hash, bo); 439b8e80941Smrg} 440b8e80941Smrg 441b8e80941Smrgstatic void 442b8e80941Smrgiris_resolve_color(struct iris_context *ice, 443b8e80941Smrg struct iris_batch *batch, 444b8e80941Smrg struct iris_resource *res, 445b8e80941Smrg unsigned level, unsigned layer, 446b8e80941Smrg enum isl_aux_op resolve_op) 447b8e80941Smrg{ 448b8e80941Smrg //DBG("%s to mt %p level %u layer %u\n", __FUNCTION__, mt, level, layer); 449b8e80941Smrg 450b8e80941Smrg struct blorp_surf surf; 451b8e80941Smrg iris_blorp_surf_for_resource(&ice->vtbl, &surf, &res->base, res->aux.usage, 452b8e80941Smrg level, true); 453b8e80941Smrg 454b8e80941Smrg iris_batch_maybe_flush(batch, 1500); 455b8e80941Smrg 456b8e80941Smrg /* Ivybridge PRM Vol 2, Part 1, "11.7 MCS Buffer for Render Target(s)": 457b8e80941Smrg * 458b8e80941Smrg * "Any transition from any value in {Clear, Render, Resolve} to a 459b8e80941Smrg * different value in {Clear, Render, Resolve} requires end of pipe 460b8e80941Smrg * synchronization." 461b8e80941Smrg * 462b8e80941Smrg * In other words, fast clear ops are not properly synchronized with 463b8e80941Smrg * other drawing. We need to use a PIPE_CONTROL to ensure that the 464b8e80941Smrg * contents of the previous draw hit the render target before we resolve 465b8e80941Smrg * and again afterwards to ensure that the resolve is complete before we 466b8e80941Smrg * do any more regular drawing. 467b8e80941Smrg */ 468b8e80941Smrg iris_emit_end_of_pipe_sync(batch, PIPE_CONTROL_RENDER_TARGET_FLUSH); 469b8e80941Smrg 470b8e80941Smrg struct blorp_batch blorp_batch; 471b8e80941Smrg blorp_batch_init(&ice->blorp, &blorp_batch, batch, 0); 472b8e80941Smrg blorp_ccs_resolve(&blorp_batch, &surf, level, layer, 1, 473b8e80941Smrg isl_format_srgb_to_linear(res->surf.format), 474b8e80941Smrg resolve_op); 475b8e80941Smrg blorp_batch_finish(&blorp_batch); 476b8e80941Smrg 477b8e80941Smrg /* See comment above */ 478b8e80941Smrg iris_emit_end_of_pipe_sync(batch, PIPE_CONTROL_RENDER_TARGET_FLUSH); 479b8e80941Smrg} 480b8e80941Smrg 481b8e80941Smrgstatic void 482b8e80941Smrgiris_mcs_partial_resolve(struct iris_context *ice, 483b8e80941Smrg struct iris_batch *batch, 484b8e80941Smrg struct iris_resource *res, 485b8e80941Smrg uint32_t start_layer, 486b8e80941Smrg uint32_t num_layers) 487b8e80941Smrg{ 488b8e80941Smrg //DBG("%s to mt %p layers %u-%u\n", __FUNCTION__, mt, 489b8e80941Smrg //start_layer, start_layer + num_layers - 1); 490b8e80941Smrg 491b8e80941Smrg assert(res->aux.usage == ISL_AUX_USAGE_MCS); 492b8e80941Smrg 493b8e80941Smrg struct blorp_surf surf; 494b8e80941Smrg iris_blorp_surf_for_resource(&ice->vtbl, &surf, &res->base, res->aux.usage, 495b8e80941Smrg 0, true); 496b8e80941Smrg 497b8e80941Smrg struct blorp_batch blorp_batch; 498b8e80941Smrg blorp_batch_init(&ice->blorp, &blorp_batch, batch, 0); 499b8e80941Smrg blorp_mcs_partial_resolve(&blorp_batch, &surf, 500b8e80941Smrg isl_format_srgb_to_linear(res->surf.format), 501b8e80941Smrg start_layer, num_layers); 502b8e80941Smrg blorp_batch_finish(&blorp_batch); 503b8e80941Smrg} 504b8e80941Smrg 505b8e80941Smrg 506b8e80941Smrg/** 507b8e80941Smrg * Return true if the format that will be used to access the resource is 508b8e80941Smrg * CCS_E-compatible with the resource's linear/non-sRGB format. 509b8e80941Smrg * 510b8e80941Smrg * Why use the linear format? Well, although the resourcemay be specified 511b8e80941Smrg * with an sRGB format, the usage of that color space/format can be toggled. 512b8e80941Smrg * Since our HW tends to support more linear formats than sRGB ones, we use 513b8e80941Smrg * this format variant for check for CCS_E compatibility. 514b8e80941Smrg */ 515b8e80941Smrgstatic bool 516b8e80941Smrgformat_ccs_e_compat_with_resource(const struct gen_device_info *devinfo, 517b8e80941Smrg const struct iris_resource *res, 518b8e80941Smrg enum isl_format access_format) 519b8e80941Smrg{ 520b8e80941Smrg assert(res->aux.usage == ISL_AUX_USAGE_CCS_E); 521b8e80941Smrg 522b8e80941Smrg enum isl_format isl_format = isl_format_srgb_to_linear(res->surf.format); 523b8e80941Smrg return isl_formats_are_ccs_e_compatible(devinfo, isl_format, access_format); 524b8e80941Smrg} 525b8e80941Smrg 526b8e80941Smrgstatic bool 527b8e80941Smrgsample_with_hiz(const struct gen_device_info *devinfo, 528b8e80941Smrg const struct iris_resource *res) 529b8e80941Smrg{ 530b8e80941Smrg if (!devinfo->has_sample_with_hiz) 531b8e80941Smrg return false; 532b8e80941Smrg 533b8e80941Smrg if (res->aux.usage != ISL_AUX_USAGE_HIZ) 534b8e80941Smrg return false; 535b8e80941Smrg 536b8e80941Smrg /* It seems the hardware won't fallback to the depth buffer if some of the 537b8e80941Smrg * mipmap levels aren't available in the HiZ buffer. So we need all levels 538b8e80941Smrg * of the texture to be HiZ enabled. 539b8e80941Smrg */ 540b8e80941Smrg for (unsigned level = 0; level < res->surf.levels; ++level) { 541b8e80941Smrg if (!iris_resource_level_has_hiz(res, level)) 542b8e80941Smrg return false; 543b8e80941Smrg } 544b8e80941Smrg 545b8e80941Smrg /* If compressed multisampling is enabled, then we use it for the auxiliary 546b8e80941Smrg * buffer instead. 547b8e80941Smrg * 548b8e80941Smrg * From the BDW PRM (Volume 2d: Command Reference: Structures 549b8e80941Smrg * RENDER_SURFACE_STATE.AuxiliarySurfaceMode): 550b8e80941Smrg * 551b8e80941Smrg * "If this field is set to AUX_HIZ, Number of Multisamples must be 552b8e80941Smrg * MULTISAMPLECOUNT_1, and Surface Type cannot be SURFTYPE_3D. 553b8e80941Smrg * 554b8e80941Smrg * There is no such blurb for 1D textures, but there is sufficient evidence 555b8e80941Smrg * that this is broken on SKL+. 556b8e80941Smrg */ 557b8e80941Smrg // XXX: i965 disables this for arrays too, is that reasonable? 558b8e80941Smrg return res->surf.samples == 1 && res->surf.dim == ISL_SURF_DIM_2D; 559b8e80941Smrg} 560b8e80941Smrg 561b8e80941Smrg/** 562b8e80941Smrg * Perform a HiZ or depth resolve operation. 563b8e80941Smrg * 564b8e80941Smrg * For an overview of HiZ ops, see the following sections of the Sandy Bridge 565b8e80941Smrg * PRM, Volume 1, Part 2: 566b8e80941Smrg * - 7.5.3.1 Depth Buffer Clear 567b8e80941Smrg * - 7.5.3.2 Depth Buffer Resolve 568b8e80941Smrg * - 7.5.3.3 Hierarchical Depth Buffer Resolve 569b8e80941Smrg */ 570b8e80941Smrgvoid 571b8e80941Smrgiris_hiz_exec(struct iris_context *ice, 572b8e80941Smrg struct iris_batch *batch, 573b8e80941Smrg struct iris_resource *res, 574b8e80941Smrg unsigned int level, unsigned int start_layer, 575b8e80941Smrg unsigned int num_layers, enum isl_aux_op op, 576b8e80941Smrg bool update_clear_depth) 577b8e80941Smrg{ 578b8e80941Smrg assert(iris_resource_level_has_hiz(res, level)); 579b8e80941Smrg assert(op != ISL_AUX_OP_NONE); 580b8e80941Smrg UNUSED const char *name = NULL; 581b8e80941Smrg 582b8e80941Smrg switch (op) { 583b8e80941Smrg case ISL_AUX_OP_FULL_RESOLVE: 584b8e80941Smrg name = "depth resolve"; 585b8e80941Smrg break; 586b8e80941Smrg case ISL_AUX_OP_AMBIGUATE: 587b8e80941Smrg name = "hiz ambiguate"; 588b8e80941Smrg break; 589b8e80941Smrg case ISL_AUX_OP_FAST_CLEAR: 590b8e80941Smrg name = "depth clear"; 591b8e80941Smrg break; 592b8e80941Smrg case ISL_AUX_OP_PARTIAL_RESOLVE: 593b8e80941Smrg case ISL_AUX_OP_NONE: 594b8e80941Smrg unreachable("Invalid HiZ op"); 595b8e80941Smrg } 596b8e80941Smrg 597b8e80941Smrg //DBG("%s %s to mt %p level %d layers %d-%d\n", 598b8e80941Smrg //__func__, name, mt, level, start_layer, start_layer + num_layers - 1); 599b8e80941Smrg 600b8e80941Smrg /* The following stalls and flushes are only documented to be required 601b8e80941Smrg * for HiZ clear operations. However, they also seem to be required for 602b8e80941Smrg * resolve operations. 603b8e80941Smrg * 604b8e80941Smrg * From the Ivybridge PRM, volume 2, "Depth Buffer Clear": 605b8e80941Smrg * 606b8e80941Smrg * "If other rendering operations have preceded this clear, a 607b8e80941Smrg * PIPE_CONTROL with depth cache flush enabled, Depth Stall bit 608b8e80941Smrg * enabled must be issued before the rectangle primitive used for 609b8e80941Smrg * the depth buffer clear operation." 610b8e80941Smrg * 611b8e80941Smrg * Same applies for Gen8 and Gen9. 612b8e80941Smrg * 613b8e80941Smrg * In addition, from the Ivybridge PRM, volume 2, 1.10.4.1 614b8e80941Smrg * PIPE_CONTROL, Depth Cache Flush Enable: 615b8e80941Smrg * 616b8e80941Smrg * "This bit must not be set when Depth Stall Enable bit is set in 617b8e80941Smrg * this packet." 618b8e80941Smrg * 619b8e80941Smrg * This is confirmed to hold for real, Haswell gets immediate gpu hangs. 620b8e80941Smrg * 621b8e80941Smrg * Therefore issue two pipe control flushes, one for cache flush and 622b8e80941Smrg * another for depth stall. 623b8e80941Smrg */ 624b8e80941Smrg iris_emit_pipe_control_flush(batch, 625b8e80941Smrg PIPE_CONTROL_DEPTH_CACHE_FLUSH | 626b8e80941Smrg PIPE_CONTROL_CS_STALL); 627b8e80941Smrg 628b8e80941Smrg iris_emit_pipe_control_flush(batch, PIPE_CONTROL_DEPTH_STALL); 629b8e80941Smrg 630b8e80941Smrg assert(res->aux.usage == ISL_AUX_USAGE_HIZ && res->aux.bo); 631b8e80941Smrg 632b8e80941Smrg iris_batch_maybe_flush(batch, 1500); 633b8e80941Smrg 634b8e80941Smrg struct blorp_surf surf; 635b8e80941Smrg iris_blorp_surf_for_resource(&ice->vtbl, &surf, &res->base, 636b8e80941Smrg ISL_AUX_USAGE_HIZ, level, true); 637b8e80941Smrg 638b8e80941Smrg struct blorp_batch blorp_batch; 639b8e80941Smrg enum blorp_batch_flags flags = 0; 640b8e80941Smrg flags |= update_clear_depth ? 0 : BLORP_BATCH_NO_UPDATE_CLEAR_COLOR; 641b8e80941Smrg blorp_batch_init(&ice->blorp, &blorp_batch, batch, flags); 642b8e80941Smrg blorp_hiz_op(&blorp_batch, &surf, level, start_layer, num_layers, op); 643b8e80941Smrg blorp_batch_finish(&blorp_batch); 644b8e80941Smrg 645b8e80941Smrg /* The following stalls and flushes are only documented to be required 646b8e80941Smrg * for HiZ clear operations. However, they also seem to be required for 647b8e80941Smrg * resolve operations. 648b8e80941Smrg * 649b8e80941Smrg * From the Broadwell PRM, volume 7, "Depth Buffer Clear": 650b8e80941Smrg * 651b8e80941Smrg * "Depth buffer clear pass using any of the methods (WM_STATE, 652b8e80941Smrg * 3DSTATE_WM or 3DSTATE_WM_HZ_OP) must be followed by a 653b8e80941Smrg * PIPE_CONTROL command with DEPTH_STALL bit and Depth FLUSH bits 654b8e80941Smrg * "set" before starting to render. DepthStall and DepthFlush are 655b8e80941Smrg * not needed between consecutive depth clear passes nor is it 656b8e80941Smrg * required if the depth clear pass was done with 657b8e80941Smrg * 'full_surf_clear' bit set in the 3DSTATE_WM_HZ_OP." 658b8e80941Smrg * 659b8e80941Smrg * TODO: Such as the spec says, this could be conditional. 660b8e80941Smrg */ 661b8e80941Smrg iris_emit_pipe_control_flush(batch, 662b8e80941Smrg PIPE_CONTROL_DEPTH_CACHE_FLUSH | 663b8e80941Smrg PIPE_CONTROL_DEPTH_STALL); 664b8e80941Smrg} 665b8e80941Smrg 666b8e80941Smrg/** 667b8e80941Smrg * Does the resource's slice have hiz enabled? 668b8e80941Smrg */ 669b8e80941Smrgbool 670b8e80941Smrgiris_resource_level_has_hiz(const struct iris_resource *res, uint32_t level) 671b8e80941Smrg{ 672b8e80941Smrg iris_resource_check_level_layer(res, level, 0); 673b8e80941Smrg return res->aux.has_hiz & 1 << level; 674b8e80941Smrg} 675b8e80941Smrg 676b8e80941Smrg/** \brief Assert that the level and layer are valid for the resource. */ 677b8e80941Smrgvoid 678b8e80941Smrgiris_resource_check_level_layer(UNUSED const struct iris_resource *res, 679b8e80941Smrg UNUSED uint32_t level, UNUSED uint32_t layer) 680b8e80941Smrg{ 681b8e80941Smrg assert(level < res->surf.levels); 682b8e80941Smrg assert(layer < util_num_layers(&res->base, level)); 683b8e80941Smrg} 684b8e80941Smrg 685b8e80941Smrgstatic inline uint32_t 686b8e80941Smrgmiptree_level_range_length(const struct iris_resource *res, 687b8e80941Smrg uint32_t start_level, uint32_t num_levels) 688b8e80941Smrg{ 689b8e80941Smrg assert(start_level < res->surf.levels); 690b8e80941Smrg 691b8e80941Smrg if (num_levels == INTEL_REMAINING_LAYERS) 692b8e80941Smrg num_levels = res->surf.levels; 693b8e80941Smrg 694b8e80941Smrg /* Check for overflow */ 695b8e80941Smrg assert(start_level + num_levels >= start_level); 696b8e80941Smrg assert(start_level + num_levels <= res->surf.levels); 697b8e80941Smrg 698b8e80941Smrg return num_levels; 699b8e80941Smrg} 700b8e80941Smrg 701b8e80941Smrgstatic inline uint32_t 702b8e80941Smrgmiptree_layer_range_length(const struct iris_resource *res, uint32_t level, 703b8e80941Smrg uint32_t start_layer, uint32_t num_layers) 704b8e80941Smrg{ 705b8e80941Smrg assert(level <= res->base.last_level); 706b8e80941Smrg 707b8e80941Smrg const uint32_t total_num_layers = iris_get_num_logical_layers(res, level); 708b8e80941Smrg assert(start_layer < total_num_layers); 709b8e80941Smrg if (num_layers == INTEL_REMAINING_LAYERS) 710b8e80941Smrg num_layers = total_num_layers - start_layer; 711b8e80941Smrg /* Check for overflow */ 712b8e80941Smrg assert(start_layer + num_layers >= start_layer); 713b8e80941Smrg assert(start_layer + num_layers <= total_num_layers); 714b8e80941Smrg 715b8e80941Smrg return num_layers; 716b8e80941Smrg} 717b8e80941Smrg 718b8e80941Smrgstatic bool 719b8e80941Smrghas_color_unresolved(const struct iris_resource *res, 720b8e80941Smrg unsigned start_level, unsigned num_levels, 721b8e80941Smrg unsigned start_layer, unsigned num_layers) 722b8e80941Smrg{ 723b8e80941Smrg if (!res->aux.bo) 724b8e80941Smrg return false; 725b8e80941Smrg 726b8e80941Smrg /* Clamp the level range to fit the resource */ 727b8e80941Smrg num_levels = miptree_level_range_length(res, start_level, num_levels); 728b8e80941Smrg 729b8e80941Smrg for (uint32_t l = 0; l < num_levels; l++) { 730b8e80941Smrg const uint32_t level = start_level + l; 731b8e80941Smrg const uint32_t level_layers = 732b8e80941Smrg miptree_layer_range_length(res, level, start_layer, num_layers); 733b8e80941Smrg for (unsigned a = 0; a < level_layers; a++) { 734b8e80941Smrg enum isl_aux_state aux_state = 735b8e80941Smrg iris_resource_get_aux_state(res, level, start_layer + a); 736b8e80941Smrg assert(aux_state != ISL_AUX_STATE_AUX_INVALID); 737b8e80941Smrg if (aux_state != ISL_AUX_STATE_PASS_THROUGH) 738b8e80941Smrg return true; 739b8e80941Smrg } 740b8e80941Smrg } 741b8e80941Smrg 742b8e80941Smrg return false; 743b8e80941Smrg} 744b8e80941Smrg 745b8e80941Smrgstatic enum isl_aux_op 746b8e80941Smrgget_ccs_d_resolve_op(enum isl_aux_state aux_state, 747b8e80941Smrg enum isl_aux_usage aux_usage, 748b8e80941Smrg bool fast_clear_supported) 749b8e80941Smrg{ 750b8e80941Smrg assert(aux_usage == ISL_AUX_USAGE_NONE || aux_usage == ISL_AUX_USAGE_CCS_D); 751b8e80941Smrg 752b8e80941Smrg const bool ccs_supported = aux_usage == ISL_AUX_USAGE_CCS_D; 753b8e80941Smrg 754b8e80941Smrg assert(ccs_supported == fast_clear_supported); 755b8e80941Smrg 756b8e80941Smrg switch (aux_state) { 757b8e80941Smrg case ISL_AUX_STATE_CLEAR: 758b8e80941Smrg case ISL_AUX_STATE_PARTIAL_CLEAR: 759b8e80941Smrg if (!ccs_supported) 760b8e80941Smrg return ISL_AUX_OP_FULL_RESOLVE; 761b8e80941Smrg else 762b8e80941Smrg return ISL_AUX_OP_NONE; 763b8e80941Smrg 764b8e80941Smrg case ISL_AUX_STATE_PASS_THROUGH: 765b8e80941Smrg return ISL_AUX_OP_NONE; 766b8e80941Smrg 767b8e80941Smrg case ISL_AUX_STATE_RESOLVED: 768b8e80941Smrg case ISL_AUX_STATE_AUX_INVALID: 769b8e80941Smrg case ISL_AUX_STATE_COMPRESSED_CLEAR: 770b8e80941Smrg case ISL_AUX_STATE_COMPRESSED_NO_CLEAR: 771b8e80941Smrg break; 772b8e80941Smrg } 773b8e80941Smrg 774b8e80941Smrg unreachable("Invalid aux state for CCS_D"); 775b8e80941Smrg} 776b8e80941Smrg 777b8e80941Smrgstatic enum isl_aux_op 778b8e80941Smrgget_ccs_e_resolve_op(enum isl_aux_state aux_state, 779b8e80941Smrg enum isl_aux_usage aux_usage, 780b8e80941Smrg bool fast_clear_supported) 781b8e80941Smrg{ 782b8e80941Smrg /* CCS_E surfaces can be accessed as CCS_D if we're careful. */ 783b8e80941Smrg assert(aux_usage == ISL_AUX_USAGE_NONE || 784b8e80941Smrg aux_usage == ISL_AUX_USAGE_CCS_D || 785b8e80941Smrg aux_usage == ISL_AUX_USAGE_CCS_E); 786b8e80941Smrg 787b8e80941Smrg if (aux_usage == ISL_AUX_USAGE_CCS_D) 788b8e80941Smrg assert(fast_clear_supported); 789b8e80941Smrg 790b8e80941Smrg switch (aux_state) { 791b8e80941Smrg case ISL_AUX_STATE_CLEAR: 792b8e80941Smrg case ISL_AUX_STATE_PARTIAL_CLEAR: 793b8e80941Smrg if (fast_clear_supported) 794b8e80941Smrg return ISL_AUX_OP_NONE; 795b8e80941Smrg else if (aux_usage == ISL_AUX_USAGE_CCS_E) 796b8e80941Smrg return ISL_AUX_OP_PARTIAL_RESOLVE; 797b8e80941Smrg else 798b8e80941Smrg return ISL_AUX_OP_FULL_RESOLVE; 799b8e80941Smrg 800b8e80941Smrg case ISL_AUX_STATE_COMPRESSED_CLEAR: 801b8e80941Smrg if (aux_usage != ISL_AUX_USAGE_CCS_E) 802b8e80941Smrg return ISL_AUX_OP_FULL_RESOLVE; 803b8e80941Smrg else if (!fast_clear_supported) 804b8e80941Smrg return ISL_AUX_OP_PARTIAL_RESOLVE; 805b8e80941Smrg else 806b8e80941Smrg return ISL_AUX_OP_NONE; 807b8e80941Smrg 808b8e80941Smrg case ISL_AUX_STATE_COMPRESSED_NO_CLEAR: 809b8e80941Smrg if (aux_usage != ISL_AUX_USAGE_CCS_E) 810b8e80941Smrg return ISL_AUX_OP_FULL_RESOLVE; 811b8e80941Smrg else 812b8e80941Smrg return ISL_AUX_OP_NONE; 813b8e80941Smrg 814b8e80941Smrg case ISL_AUX_STATE_PASS_THROUGH: 815b8e80941Smrg return ISL_AUX_OP_NONE; 816b8e80941Smrg 817b8e80941Smrg case ISL_AUX_STATE_RESOLVED: 818b8e80941Smrg case ISL_AUX_STATE_AUX_INVALID: 819b8e80941Smrg break; 820b8e80941Smrg } 821b8e80941Smrg 822b8e80941Smrg unreachable("Invalid aux state for CCS_E"); 823b8e80941Smrg} 824b8e80941Smrg 825b8e80941Smrgstatic void 826b8e80941Smrgiris_resource_prepare_ccs_access(struct iris_context *ice, 827b8e80941Smrg struct iris_batch *batch, 828b8e80941Smrg struct iris_resource *res, 829b8e80941Smrg uint32_t level, uint32_t layer, 830b8e80941Smrg enum isl_aux_usage aux_usage, 831b8e80941Smrg bool fast_clear_supported) 832b8e80941Smrg{ 833b8e80941Smrg enum isl_aux_state aux_state = iris_resource_get_aux_state(res, level, layer); 834b8e80941Smrg 835b8e80941Smrg enum isl_aux_op resolve_op; 836b8e80941Smrg if (res->aux.usage == ISL_AUX_USAGE_CCS_E) { 837b8e80941Smrg resolve_op = get_ccs_e_resolve_op(aux_state, aux_usage, 838b8e80941Smrg fast_clear_supported); 839b8e80941Smrg } else { 840b8e80941Smrg assert(res->aux.usage == ISL_AUX_USAGE_CCS_D); 841b8e80941Smrg resolve_op = get_ccs_d_resolve_op(aux_state, aux_usage, 842b8e80941Smrg fast_clear_supported); 843b8e80941Smrg } 844b8e80941Smrg 845b8e80941Smrg if (resolve_op != ISL_AUX_OP_NONE) { 846b8e80941Smrg iris_resolve_color(ice, batch, res, level, layer, resolve_op); 847b8e80941Smrg 848b8e80941Smrg switch (resolve_op) { 849b8e80941Smrg case ISL_AUX_OP_FULL_RESOLVE: 850b8e80941Smrg /* The CCS full resolve operation destroys the CCS and sets it to the 851b8e80941Smrg * pass-through state. (You can also think of this as being both a 852b8e80941Smrg * resolve and an ambiguate in one operation.) 853b8e80941Smrg */ 854b8e80941Smrg iris_resource_set_aux_state(ice, res, level, layer, 1, 855b8e80941Smrg ISL_AUX_STATE_PASS_THROUGH); 856b8e80941Smrg break; 857b8e80941Smrg 858b8e80941Smrg case ISL_AUX_OP_PARTIAL_RESOLVE: 859b8e80941Smrg iris_resource_set_aux_state(ice, res, level, layer, 1, 860b8e80941Smrg ISL_AUX_STATE_COMPRESSED_NO_CLEAR); 861b8e80941Smrg break; 862b8e80941Smrg 863b8e80941Smrg default: 864b8e80941Smrg unreachable("Invalid resolve op"); 865b8e80941Smrg } 866b8e80941Smrg } 867b8e80941Smrg} 868b8e80941Smrg 869b8e80941Smrgstatic void 870b8e80941Smrgiris_resource_finish_ccs_write(struct iris_context *ice, 871b8e80941Smrg struct iris_resource *res, 872b8e80941Smrg uint32_t level, uint32_t layer, 873b8e80941Smrg enum isl_aux_usage aux_usage) 874b8e80941Smrg{ 875b8e80941Smrg assert(aux_usage == ISL_AUX_USAGE_NONE || 876b8e80941Smrg aux_usage == ISL_AUX_USAGE_CCS_D || 877b8e80941Smrg aux_usage == ISL_AUX_USAGE_CCS_E); 878b8e80941Smrg 879b8e80941Smrg enum isl_aux_state aux_state = 880b8e80941Smrg iris_resource_get_aux_state(res, level, layer); 881b8e80941Smrg 882b8e80941Smrg if (res->aux.usage == ISL_AUX_USAGE_CCS_E) { 883b8e80941Smrg switch (aux_state) { 884b8e80941Smrg case ISL_AUX_STATE_CLEAR: 885b8e80941Smrg case ISL_AUX_STATE_PARTIAL_CLEAR: 886b8e80941Smrg assert(aux_usage == ISL_AUX_USAGE_CCS_E || 887b8e80941Smrg aux_usage == ISL_AUX_USAGE_CCS_D); 888b8e80941Smrg 889b8e80941Smrg if (aux_usage == ISL_AUX_USAGE_CCS_E) { 890b8e80941Smrg iris_resource_set_aux_state(ice, res, level, layer, 1, 891b8e80941Smrg ISL_AUX_STATE_COMPRESSED_CLEAR); 892b8e80941Smrg } else if (aux_state != ISL_AUX_STATE_PARTIAL_CLEAR) { 893b8e80941Smrg iris_resource_set_aux_state(ice, res, level, layer, 1, 894b8e80941Smrg ISL_AUX_STATE_PARTIAL_CLEAR); 895b8e80941Smrg } 896b8e80941Smrg break; 897b8e80941Smrg 898b8e80941Smrg case ISL_AUX_STATE_COMPRESSED_CLEAR: 899b8e80941Smrg case ISL_AUX_STATE_COMPRESSED_NO_CLEAR: 900b8e80941Smrg assert(aux_usage == ISL_AUX_USAGE_CCS_E); 901b8e80941Smrg break; /* Nothing to do */ 902b8e80941Smrg 903b8e80941Smrg case ISL_AUX_STATE_PASS_THROUGH: 904b8e80941Smrg if (aux_usage == ISL_AUX_USAGE_CCS_E) { 905b8e80941Smrg iris_resource_set_aux_state(ice, res, level, layer, 1, 906b8e80941Smrg ISL_AUX_STATE_COMPRESSED_NO_CLEAR); 907b8e80941Smrg } else { 908b8e80941Smrg /* Nothing to do */ 909b8e80941Smrg } 910b8e80941Smrg break; 911b8e80941Smrg 912b8e80941Smrg case ISL_AUX_STATE_RESOLVED: 913b8e80941Smrg case ISL_AUX_STATE_AUX_INVALID: 914b8e80941Smrg unreachable("Invalid aux state for CCS_E"); 915b8e80941Smrg } 916b8e80941Smrg } else { 917b8e80941Smrg assert(res->aux.usage == ISL_AUX_USAGE_CCS_D); 918b8e80941Smrg /* CCS_D is a bit simpler */ 919b8e80941Smrg switch (aux_state) { 920b8e80941Smrg case ISL_AUX_STATE_CLEAR: 921b8e80941Smrg assert(aux_usage == ISL_AUX_USAGE_CCS_D); 922b8e80941Smrg iris_resource_set_aux_state(ice, res, level, layer, 1, 923b8e80941Smrg ISL_AUX_STATE_PARTIAL_CLEAR); 924b8e80941Smrg break; 925b8e80941Smrg 926b8e80941Smrg case ISL_AUX_STATE_PARTIAL_CLEAR: 927b8e80941Smrg assert(aux_usage == ISL_AUX_USAGE_CCS_D); 928b8e80941Smrg break; /* Nothing to do */ 929b8e80941Smrg 930b8e80941Smrg case ISL_AUX_STATE_PASS_THROUGH: 931b8e80941Smrg /* Nothing to do */ 932b8e80941Smrg break; 933b8e80941Smrg 934b8e80941Smrg case ISL_AUX_STATE_COMPRESSED_CLEAR: 935b8e80941Smrg case ISL_AUX_STATE_COMPRESSED_NO_CLEAR: 936b8e80941Smrg case ISL_AUX_STATE_RESOLVED: 937b8e80941Smrg case ISL_AUX_STATE_AUX_INVALID: 938b8e80941Smrg unreachable("Invalid aux state for CCS_D"); 939b8e80941Smrg } 940b8e80941Smrg } 941b8e80941Smrg} 942b8e80941Smrg 943b8e80941Smrgstatic void 944b8e80941Smrgiris_resource_prepare_mcs_access(struct iris_context *ice, 945b8e80941Smrg struct iris_batch *batch, 946b8e80941Smrg struct iris_resource *res, 947b8e80941Smrg uint32_t layer, 948b8e80941Smrg enum isl_aux_usage aux_usage, 949b8e80941Smrg bool fast_clear_supported) 950b8e80941Smrg{ 951b8e80941Smrg assert(aux_usage == ISL_AUX_USAGE_MCS); 952b8e80941Smrg 953b8e80941Smrg switch (iris_resource_get_aux_state(res, 0, layer)) { 954b8e80941Smrg case ISL_AUX_STATE_CLEAR: 955b8e80941Smrg case ISL_AUX_STATE_COMPRESSED_CLEAR: 956b8e80941Smrg if (!fast_clear_supported) { 957b8e80941Smrg iris_mcs_partial_resolve(ice, batch, res, layer, 1); 958b8e80941Smrg iris_resource_set_aux_state(ice, res, 0, layer, 1, 959b8e80941Smrg ISL_AUX_STATE_COMPRESSED_NO_CLEAR); 960b8e80941Smrg } 961b8e80941Smrg break; 962b8e80941Smrg 963b8e80941Smrg case ISL_AUX_STATE_COMPRESSED_NO_CLEAR: 964b8e80941Smrg break; /* Nothing to do */ 965b8e80941Smrg 966b8e80941Smrg case ISL_AUX_STATE_RESOLVED: 967b8e80941Smrg case ISL_AUX_STATE_PASS_THROUGH: 968b8e80941Smrg case ISL_AUX_STATE_AUX_INVALID: 969b8e80941Smrg case ISL_AUX_STATE_PARTIAL_CLEAR: 970b8e80941Smrg unreachable("Invalid aux state for MCS"); 971b8e80941Smrg } 972b8e80941Smrg} 973b8e80941Smrg 974b8e80941Smrgstatic void 975b8e80941Smrgiris_resource_finish_mcs_write(struct iris_context *ice, 976b8e80941Smrg struct iris_resource *res, 977b8e80941Smrg uint32_t layer, 978b8e80941Smrg enum isl_aux_usage aux_usage) 979b8e80941Smrg{ 980b8e80941Smrg assert(aux_usage == ISL_AUX_USAGE_MCS); 981b8e80941Smrg 982b8e80941Smrg switch (iris_resource_get_aux_state(res, 0, layer)) { 983b8e80941Smrg case ISL_AUX_STATE_CLEAR: 984b8e80941Smrg iris_resource_set_aux_state(ice, res, 0, layer, 1, 985b8e80941Smrg ISL_AUX_STATE_COMPRESSED_CLEAR); 986b8e80941Smrg break; 987b8e80941Smrg 988b8e80941Smrg case ISL_AUX_STATE_COMPRESSED_CLEAR: 989b8e80941Smrg case ISL_AUX_STATE_COMPRESSED_NO_CLEAR: 990b8e80941Smrg break; /* Nothing to do */ 991b8e80941Smrg 992b8e80941Smrg case ISL_AUX_STATE_RESOLVED: 993b8e80941Smrg case ISL_AUX_STATE_PASS_THROUGH: 994b8e80941Smrg case ISL_AUX_STATE_AUX_INVALID: 995b8e80941Smrg case ISL_AUX_STATE_PARTIAL_CLEAR: 996b8e80941Smrg unreachable("Invalid aux state for MCS"); 997b8e80941Smrg } 998b8e80941Smrg} 999b8e80941Smrg 1000b8e80941Smrgstatic void 1001b8e80941Smrgiris_resource_prepare_hiz_access(struct iris_context *ice, 1002b8e80941Smrg struct iris_batch *batch, 1003b8e80941Smrg struct iris_resource *res, 1004b8e80941Smrg uint32_t level, uint32_t layer, 1005b8e80941Smrg enum isl_aux_usage aux_usage, 1006b8e80941Smrg bool fast_clear_supported) 1007b8e80941Smrg{ 1008b8e80941Smrg assert(aux_usage == ISL_AUX_USAGE_NONE || aux_usage == ISL_AUX_USAGE_HIZ); 1009b8e80941Smrg 1010b8e80941Smrg enum isl_aux_op hiz_op = ISL_AUX_OP_NONE; 1011b8e80941Smrg switch (iris_resource_get_aux_state(res, level, layer)) { 1012b8e80941Smrg case ISL_AUX_STATE_CLEAR: 1013b8e80941Smrg case ISL_AUX_STATE_COMPRESSED_CLEAR: 1014b8e80941Smrg if (aux_usage != ISL_AUX_USAGE_HIZ || !fast_clear_supported) 1015b8e80941Smrg hiz_op = ISL_AUX_OP_FULL_RESOLVE; 1016b8e80941Smrg break; 1017b8e80941Smrg 1018b8e80941Smrg case ISL_AUX_STATE_COMPRESSED_NO_CLEAR: 1019b8e80941Smrg if (aux_usage != ISL_AUX_USAGE_HIZ) 1020b8e80941Smrg hiz_op = ISL_AUX_OP_FULL_RESOLVE; 1021b8e80941Smrg break; 1022b8e80941Smrg 1023b8e80941Smrg case ISL_AUX_STATE_PASS_THROUGH: 1024b8e80941Smrg case ISL_AUX_STATE_RESOLVED: 1025b8e80941Smrg break; 1026b8e80941Smrg 1027b8e80941Smrg case ISL_AUX_STATE_AUX_INVALID: 1028b8e80941Smrg if (aux_usage == ISL_AUX_USAGE_HIZ) 1029b8e80941Smrg hiz_op = ISL_AUX_OP_AMBIGUATE; 1030b8e80941Smrg break; 1031b8e80941Smrg 1032b8e80941Smrg case ISL_AUX_STATE_PARTIAL_CLEAR: 1033b8e80941Smrg unreachable("Invalid HiZ state"); 1034b8e80941Smrg } 1035b8e80941Smrg 1036b8e80941Smrg if (hiz_op != ISL_AUX_OP_NONE) { 1037b8e80941Smrg iris_hiz_exec(ice, batch, res, level, layer, 1, hiz_op, false); 1038b8e80941Smrg 1039b8e80941Smrg switch (hiz_op) { 1040b8e80941Smrg case ISL_AUX_OP_FULL_RESOLVE: 1041b8e80941Smrg iris_resource_set_aux_state(ice, res, level, layer, 1, 1042b8e80941Smrg ISL_AUX_STATE_RESOLVED); 1043b8e80941Smrg break; 1044b8e80941Smrg 1045b8e80941Smrg case ISL_AUX_OP_AMBIGUATE: 1046b8e80941Smrg /* The HiZ resolve operation is actually an ambiguate */ 1047b8e80941Smrg iris_resource_set_aux_state(ice, res, level, layer, 1, 1048b8e80941Smrg ISL_AUX_STATE_PASS_THROUGH); 1049b8e80941Smrg break; 1050b8e80941Smrg 1051b8e80941Smrg default: 1052b8e80941Smrg unreachable("Invalid HiZ op"); 1053b8e80941Smrg } 1054b8e80941Smrg } 1055b8e80941Smrg} 1056b8e80941Smrg 1057b8e80941Smrgstatic void 1058b8e80941Smrgiris_resource_finish_hiz_write(struct iris_context *ice, 1059b8e80941Smrg struct iris_resource *res, 1060b8e80941Smrg uint32_t level, uint32_t layer, 1061b8e80941Smrg enum isl_aux_usage aux_usage) 1062b8e80941Smrg{ 1063b8e80941Smrg assert(aux_usage == ISL_AUX_USAGE_NONE || aux_usage == ISL_AUX_USAGE_HIZ); 1064b8e80941Smrg 1065b8e80941Smrg switch (iris_resource_get_aux_state(res, level, layer)) { 1066b8e80941Smrg case ISL_AUX_STATE_CLEAR: 1067b8e80941Smrg assert(aux_usage == ISL_AUX_USAGE_HIZ); 1068b8e80941Smrg iris_resource_set_aux_state(ice, res, level, layer, 1, 1069b8e80941Smrg ISL_AUX_STATE_COMPRESSED_CLEAR); 1070b8e80941Smrg break; 1071b8e80941Smrg 1072b8e80941Smrg case ISL_AUX_STATE_COMPRESSED_NO_CLEAR: 1073b8e80941Smrg case ISL_AUX_STATE_COMPRESSED_CLEAR: 1074b8e80941Smrg assert(aux_usage == ISL_AUX_USAGE_HIZ); 1075b8e80941Smrg break; /* Nothing to do */ 1076b8e80941Smrg 1077b8e80941Smrg case ISL_AUX_STATE_RESOLVED: 1078b8e80941Smrg if (aux_usage == ISL_AUX_USAGE_HIZ) { 1079b8e80941Smrg iris_resource_set_aux_state(ice, res, level, layer, 1, 1080b8e80941Smrg ISL_AUX_STATE_COMPRESSED_NO_CLEAR); 1081b8e80941Smrg } else { 1082b8e80941Smrg iris_resource_set_aux_state(ice, res, level, layer, 1, 1083b8e80941Smrg ISL_AUX_STATE_AUX_INVALID); 1084b8e80941Smrg } 1085b8e80941Smrg break; 1086b8e80941Smrg 1087b8e80941Smrg case ISL_AUX_STATE_PASS_THROUGH: 1088b8e80941Smrg if (aux_usage == ISL_AUX_USAGE_HIZ) { 1089b8e80941Smrg iris_resource_set_aux_state(ice, res, level, layer, 1, 1090b8e80941Smrg ISL_AUX_STATE_COMPRESSED_NO_CLEAR); 1091b8e80941Smrg } 1092b8e80941Smrg break; 1093b8e80941Smrg 1094b8e80941Smrg case ISL_AUX_STATE_AUX_INVALID: 1095b8e80941Smrg assert(aux_usage != ISL_AUX_USAGE_HIZ); 1096b8e80941Smrg break; 1097b8e80941Smrg 1098b8e80941Smrg case ISL_AUX_STATE_PARTIAL_CLEAR: 1099b8e80941Smrg unreachable("Invalid HiZ state"); 1100b8e80941Smrg } 1101b8e80941Smrg} 1102b8e80941Smrg 1103b8e80941Smrgvoid 1104b8e80941Smrgiris_resource_prepare_access(struct iris_context *ice, 1105b8e80941Smrg struct iris_batch *batch, 1106b8e80941Smrg struct iris_resource *res, 1107b8e80941Smrg uint32_t start_level, uint32_t num_levels, 1108b8e80941Smrg uint32_t start_layer, uint32_t num_layers, 1109b8e80941Smrg enum isl_aux_usage aux_usage, 1110b8e80941Smrg bool fast_clear_supported) 1111b8e80941Smrg{ 1112b8e80941Smrg num_levels = miptree_level_range_length(res, start_level, num_levels); 1113b8e80941Smrg 1114b8e80941Smrg switch (res->aux.usage) { 1115b8e80941Smrg case ISL_AUX_USAGE_NONE: 1116b8e80941Smrg /* Nothing to do */ 1117b8e80941Smrg break; 1118b8e80941Smrg 1119b8e80941Smrg case ISL_AUX_USAGE_MCS: 1120b8e80941Smrg assert(start_level == 0 && num_levels == 1); 1121b8e80941Smrg const uint32_t level_layers = 1122b8e80941Smrg miptree_layer_range_length(res, 0, start_layer, num_layers); 1123b8e80941Smrg for (uint32_t a = 0; a < level_layers; a++) { 1124b8e80941Smrg iris_resource_prepare_mcs_access(ice, batch, res, start_layer + a, 1125b8e80941Smrg aux_usage, fast_clear_supported); 1126b8e80941Smrg } 1127b8e80941Smrg break; 1128b8e80941Smrg 1129b8e80941Smrg case ISL_AUX_USAGE_CCS_D: 1130b8e80941Smrg case ISL_AUX_USAGE_CCS_E: 1131b8e80941Smrg for (uint32_t l = 0; l < num_levels; l++) { 1132b8e80941Smrg const uint32_t level = start_level + l; 1133b8e80941Smrg const uint32_t level_layers = 1134b8e80941Smrg miptree_layer_range_length(res, level, start_layer, num_layers); 1135b8e80941Smrg for (uint32_t a = 0; a < level_layers; a++) { 1136b8e80941Smrg iris_resource_prepare_ccs_access(ice, batch, res, level, 1137b8e80941Smrg start_layer + a, 1138b8e80941Smrg aux_usage, fast_clear_supported); 1139b8e80941Smrg } 1140b8e80941Smrg } 1141b8e80941Smrg break; 1142b8e80941Smrg 1143b8e80941Smrg case ISL_AUX_USAGE_HIZ: 1144b8e80941Smrg for (uint32_t l = 0; l < num_levels; l++) { 1145b8e80941Smrg const uint32_t level = start_level + l; 1146b8e80941Smrg if (!iris_resource_level_has_hiz(res, level)) 1147b8e80941Smrg continue; 1148b8e80941Smrg 1149b8e80941Smrg const uint32_t level_layers = 1150b8e80941Smrg miptree_layer_range_length(res, level, start_layer, num_layers); 1151b8e80941Smrg for (uint32_t a = 0; a < level_layers; a++) { 1152b8e80941Smrg iris_resource_prepare_hiz_access(ice, batch, res, level, 1153b8e80941Smrg start_layer + a, aux_usage, 1154b8e80941Smrg fast_clear_supported); 1155b8e80941Smrg } 1156b8e80941Smrg } 1157b8e80941Smrg break; 1158b8e80941Smrg 1159b8e80941Smrg default: 1160b8e80941Smrg unreachable("Invalid aux usage"); 1161b8e80941Smrg } 1162b8e80941Smrg} 1163b8e80941Smrg 1164b8e80941Smrgvoid 1165b8e80941Smrgiris_resource_finish_write(struct iris_context *ice, 1166b8e80941Smrg struct iris_resource *res, uint32_t level, 1167b8e80941Smrg uint32_t start_layer, uint32_t num_layers, 1168b8e80941Smrg enum isl_aux_usage aux_usage) 1169b8e80941Smrg{ 1170b8e80941Smrg num_layers = miptree_layer_range_length(res, level, start_layer, num_layers); 1171b8e80941Smrg 1172b8e80941Smrg switch (res->aux.usage) { 1173b8e80941Smrg case ISL_AUX_USAGE_NONE: 1174b8e80941Smrg break; 1175b8e80941Smrg 1176b8e80941Smrg case ISL_AUX_USAGE_MCS: 1177b8e80941Smrg for (uint32_t a = 0; a < num_layers; a++) { 1178b8e80941Smrg iris_resource_finish_mcs_write(ice, res, start_layer + a, 1179b8e80941Smrg aux_usage); 1180b8e80941Smrg } 1181b8e80941Smrg break; 1182b8e80941Smrg 1183b8e80941Smrg case ISL_AUX_USAGE_CCS_D: 1184b8e80941Smrg case ISL_AUX_USAGE_CCS_E: 1185b8e80941Smrg for (uint32_t a = 0; a < num_layers; a++) { 1186b8e80941Smrg iris_resource_finish_ccs_write(ice, res, level, start_layer + a, 1187b8e80941Smrg aux_usage); 1188b8e80941Smrg } 1189b8e80941Smrg break; 1190b8e80941Smrg 1191b8e80941Smrg case ISL_AUX_USAGE_HIZ: 1192b8e80941Smrg if (!iris_resource_level_has_hiz(res, level)) 1193b8e80941Smrg return; 1194b8e80941Smrg 1195b8e80941Smrg for (uint32_t a = 0; a < num_layers; a++) { 1196b8e80941Smrg iris_resource_finish_hiz_write(ice, res, level, start_layer + a, 1197b8e80941Smrg aux_usage); 1198b8e80941Smrg } 1199b8e80941Smrg break; 1200b8e80941Smrg 1201b8e80941Smrg default: 1202b8e80941Smrg unreachable("Invavlid aux usage"); 1203b8e80941Smrg } 1204b8e80941Smrg} 1205b8e80941Smrg 1206b8e80941Smrgenum isl_aux_state 1207b8e80941Smrgiris_resource_get_aux_state(const struct iris_resource *res, 1208b8e80941Smrg uint32_t level, uint32_t layer) 1209b8e80941Smrg{ 1210b8e80941Smrg iris_resource_check_level_layer(res, level, layer); 1211b8e80941Smrg 1212b8e80941Smrg if (res->surf.usage & ISL_SURF_USAGE_DEPTH_BIT) { 1213b8e80941Smrg assert(iris_resource_level_has_hiz(res, level)); 1214b8e80941Smrg } else if (res->surf.usage & ISL_SURF_USAGE_STENCIL_BIT) { 1215b8e80941Smrg unreachable("Cannot get aux state for stencil"); 1216b8e80941Smrg } else { 1217b8e80941Smrg assert(res->surf.samples == 1 || 1218b8e80941Smrg res->surf.msaa_layout == ISL_MSAA_LAYOUT_ARRAY); 1219b8e80941Smrg } 1220b8e80941Smrg 1221b8e80941Smrg return res->aux.state[level][layer]; 1222b8e80941Smrg} 1223b8e80941Smrg 1224b8e80941Smrgvoid 1225b8e80941Smrgiris_resource_set_aux_state(struct iris_context *ice, 1226b8e80941Smrg struct iris_resource *res, uint32_t level, 1227b8e80941Smrg uint32_t start_layer, uint32_t num_layers, 1228b8e80941Smrg enum isl_aux_state aux_state) 1229b8e80941Smrg{ 1230b8e80941Smrg num_layers = miptree_layer_range_length(res, level, start_layer, num_layers); 1231b8e80941Smrg 1232b8e80941Smrg if (res->surf.usage & ISL_SURF_USAGE_DEPTH_BIT) { 1233b8e80941Smrg assert(iris_resource_level_has_hiz(res, level)); 1234b8e80941Smrg } else if (res->surf.usage & ISL_SURF_USAGE_STENCIL_BIT) { 1235b8e80941Smrg unreachable("Cannot set aux state for stencil"); 1236b8e80941Smrg } else { 1237b8e80941Smrg assert(res->surf.samples == 1 || 1238b8e80941Smrg res->surf.msaa_layout == ISL_MSAA_LAYOUT_ARRAY); 1239b8e80941Smrg } 1240b8e80941Smrg 1241b8e80941Smrg for (unsigned a = 0; a < num_layers; a++) { 1242b8e80941Smrg if (res->aux.state[level][start_layer + a] != aux_state) { 1243b8e80941Smrg res->aux.state[level][start_layer + a] = aux_state; 1244b8e80941Smrg /* XXX: Need to track which bindings to make dirty */ 1245b8e80941Smrg ice->state.dirty |= IRIS_ALL_DIRTY_BINDINGS; 1246b8e80941Smrg } 1247b8e80941Smrg } 1248b8e80941Smrg} 1249b8e80941Smrg 1250b8e80941Smrg/* On Gen9 color buffers may be compressed by the hardware (lossless 1251b8e80941Smrg * compression). There are, however, format restrictions and care needs to be 1252b8e80941Smrg * taken that the sampler engine is capable for re-interpreting a buffer with 1253b8e80941Smrg * format different the buffer was originally written with. 1254b8e80941Smrg * 1255b8e80941Smrg * For example, SRGB formats are not compressible and the sampler engine isn't 1256b8e80941Smrg * capable of treating RGBA_UNORM as SRGB_ALPHA. In such a case the underlying 1257b8e80941Smrg * color buffer needs to be resolved so that the sampling surface can be 1258b8e80941Smrg * sampled as non-compressed (i.e., without the auxiliary MCS buffer being 1259b8e80941Smrg * set). 1260b8e80941Smrg */ 1261b8e80941Smrgstatic bool 1262b8e80941Smrgcan_texture_with_ccs(const struct gen_device_info *devinfo, 1263b8e80941Smrg struct pipe_debug_callback *dbg, 1264b8e80941Smrg const struct iris_resource *res, 1265b8e80941Smrg enum isl_format view_format) 1266b8e80941Smrg{ 1267b8e80941Smrg if (res->aux.usage != ISL_AUX_USAGE_CCS_E) 1268b8e80941Smrg return false; 1269b8e80941Smrg 1270b8e80941Smrg if (!format_ccs_e_compat_with_resource(devinfo, res, view_format)) { 1271b8e80941Smrg const struct isl_format_layout *res_fmtl = 1272b8e80941Smrg isl_format_get_layout(res->surf.format); 1273b8e80941Smrg const struct isl_format_layout *view_fmtl = 1274b8e80941Smrg isl_format_get_layout(view_format); 1275b8e80941Smrg 1276b8e80941Smrg perf_debug(dbg, "Incompatible sampling format (%s) for CCS (%s)\n", 1277b8e80941Smrg view_fmtl->name, res_fmtl->name); 1278b8e80941Smrg 1279b8e80941Smrg return false; 1280b8e80941Smrg } 1281b8e80941Smrg 1282b8e80941Smrg return true; 1283b8e80941Smrg} 1284b8e80941Smrg 1285b8e80941Smrgenum isl_aux_usage 1286b8e80941Smrgiris_resource_texture_aux_usage(struct iris_context *ice, 1287b8e80941Smrg const struct iris_resource *res, 1288b8e80941Smrg enum isl_format view_format, 1289b8e80941Smrg enum gen9_astc5x5_wa_tex_type astc5x5_wa_bits) 1290b8e80941Smrg{ 1291b8e80941Smrg struct iris_screen *screen = (void *) ice->ctx.screen; 1292b8e80941Smrg struct gen_device_info *devinfo = &screen->devinfo; 1293b8e80941Smrg 1294b8e80941Smrg assert(devinfo->gen == 9 || astc5x5_wa_bits == 0); 1295b8e80941Smrg 1296b8e80941Smrg /* On gen9, ASTC 5x5 textures cannot live in the sampler cache along side 1297b8e80941Smrg * CCS or HiZ compressed textures. See gen9_apply_astc5x5_wa_flush() for 1298b8e80941Smrg * details. 1299b8e80941Smrg */ 1300b8e80941Smrg if ((astc5x5_wa_bits & GEN9_ASTC5X5_WA_TEX_TYPE_ASTC5x5) && 1301b8e80941Smrg res->aux.usage != ISL_AUX_USAGE_MCS) 1302b8e80941Smrg return ISL_AUX_USAGE_NONE; 1303b8e80941Smrg 1304b8e80941Smrg switch (res->aux.usage) { 1305b8e80941Smrg case ISL_AUX_USAGE_HIZ: 1306b8e80941Smrg if (sample_with_hiz(devinfo, res)) 1307b8e80941Smrg return ISL_AUX_USAGE_HIZ; 1308b8e80941Smrg break; 1309b8e80941Smrg 1310b8e80941Smrg case ISL_AUX_USAGE_MCS: 1311b8e80941Smrg return ISL_AUX_USAGE_MCS; 1312b8e80941Smrg 1313b8e80941Smrg case ISL_AUX_USAGE_CCS_D: 1314b8e80941Smrg case ISL_AUX_USAGE_CCS_E: 1315b8e80941Smrg /* If we don't have any unresolved color, report an aux usage of 1316b8e80941Smrg * ISL_AUX_USAGE_NONE. This way, texturing won't even look at the 1317b8e80941Smrg * aux surface and we can save some bandwidth. 1318b8e80941Smrg */ 1319b8e80941Smrg if (!has_color_unresolved(res, 0, INTEL_REMAINING_LEVELS, 1320b8e80941Smrg 0, INTEL_REMAINING_LAYERS)) 1321b8e80941Smrg return ISL_AUX_USAGE_NONE; 1322b8e80941Smrg 1323b8e80941Smrg if (can_texture_with_ccs(devinfo, &ice->dbg, res, view_format)) 1324b8e80941Smrg return ISL_AUX_USAGE_CCS_E; 1325b8e80941Smrg break; 1326b8e80941Smrg 1327b8e80941Smrg default: 1328b8e80941Smrg break; 1329b8e80941Smrg } 1330b8e80941Smrg 1331b8e80941Smrg return ISL_AUX_USAGE_NONE; 1332b8e80941Smrg} 1333b8e80941Smrg 1334b8e80941Smrgstatic bool 1335b8e80941Smrgisl_formats_are_fast_clear_compatible(enum isl_format a, enum isl_format b) 1336b8e80941Smrg{ 1337b8e80941Smrg /* On gen8 and earlier, the hardware was only capable of handling 0/1 clear 1338b8e80941Smrg * values so sRGB curve application was a no-op for all fast-clearable 1339b8e80941Smrg * formats. 1340b8e80941Smrg * 1341b8e80941Smrg * On gen9+, the hardware supports arbitrary clear values. For sRGB clear 1342b8e80941Smrg * values, the hardware interprets the floats, not as what would be 1343b8e80941Smrg * returned from the sampler (or written by the shader), but as being 1344b8e80941Smrg * between format conversion and sRGB curve application. This means that 1345b8e80941Smrg * we can switch between sRGB and UNORM without having to whack the clear 1346b8e80941Smrg * color. 1347b8e80941Smrg */ 1348b8e80941Smrg return isl_format_srgb_to_linear(a) == isl_format_srgb_to_linear(b); 1349b8e80941Smrg} 1350b8e80941Smrg 1351b8e80941Smrgvoid 1352b8e80941Smrgiris_resource_prepare_texture(struct iris_context *ice, 1353b8e80941Smrg struct iris_batch *batch, 1354b8e80941Smrg struct iris_resource *res, 1355b8e80941Smrg enum isl_format view_format, 1356b8e80941Smrg uint32_t start_level, uint32_t num_levels, 1357b8e80941Smrg uint32_t start_layer, uint32_t num_layers, 1358b8e80941Smrg enum gen9_astc5x5_wa_tex_type astc5x5_wa_bits) 1359b8e80941Smrg{ 1360b8e80941Smrg enum isl_aux_usage aux_usage = 1361b8e80941Smrg iris_resource_texture_aux_usage(ice, res, view_format, astc5x5_wa_bits); 1362b8e80941Smrg 1363b8e80941Smrg bool clear_supported = aux_usage != ISL_AUX_USAGE_NONE; 1364b8e80941Smrg 1365b8e80941Smrg /* Clear color is specified as ints or floats and the conversion is done by 1366b8e80941Smrg * the sampler. If we have a texture view, we would have to perform the 1367b8e80941Smrg * clear color conversion manually. Just disable clear color. 1368b8e80941Smrg */ 1369b8e80941Smrg if (!isl_formats_are_fast_clear_compatible(res->surf.format, view_format)) 1370b8e80941Smrg clear_supported = false; 1371b8e80941Smrg 1372b8e80941Smrg iris_resource_prepare_access(ice, batch, res, start_level, num_levels, 1373b8e80941Smrg start_layer, num_layers, 1374b8e80941Smrg aux_usage, clear_supported); 1375b8e80941Smrg} 1376b8e80941Smrg 1377b8e80941Smrgvoid 1378b8e80941Smrgiris_resource_prepare_image(struct iris_context *ice, 1379b8e80941Smrg struct iris_batch *batch, 1380b8e80941Smrg struct iris_resource *res) 1381b8e80941Smrg{ 1382b8e80941Smrg /* The data port doesn't understand any compression */ 1383b8e80941Smrg iris_resource_prepare_access(ice, batch, res, 0, INTEL_REMAINING_LEVELS, 1384b8e80941Smrg 0, INTEL_REMAINING_LAYERS, 1385b8e80941Smrg ISL_AUX_USAGE_NONE, false); 1386b8e80941Smrg} 1387b8e80941Smrg 1388b8e80941Smrgenum isl_aux_usage 1389b8e80941Smrgiris_resource_render_aux_usage(struct iris_context *ice, 1390b8e80941Smrg struct iris_resource *res, 1391b8e80941Smrg enum isl_format render_format, 1392b8e80941Smrg bool blend_enabled, 1393b8e80941Smrg bool draw_aux_disabled) 1394b8e80941Smrg{ 1395b8e80941Smrg struct iris_screen *screen = (void *) ice->ctx.screen; 1396b8e80941Smrg struct gen_device_info *devinfo = &screen->devinfo; 1397b8e80941Smrg 1398b8e80941Smrg if (draw_aux_disabled) 1399b8e80941Smrg return ISL_AUX_USAGE_NONE; 1400b8e80941Smrg 1401b8e80941Smrg switch (res->aux.usage) { 1402b8e80941Smrg case ISL_AUX_USAGE_MCS: 1403b8e80941Smrg return ISL_AUX_USAGE_MCS; 1404b8e80941Smrg 1405b8e80941Smrg case ISL_AUX_USAGE_CCS_D: 1406b8e80941Smrg case ISL_AUX_USAGE_CCS_E: 1407b8e80941Smrg /* Gen9+ hardware technically supports non-0/1 clear colors with sRGB 1408b8e80941Smrg * formats. However, there are issues with blending where it doesn't 1409b8e80941Smrg * properly apply the sRGB curve to the clear color when blending. 1410b8e80941Smrg */ 1411b8e80941Smrg if (devinfo->gen >= 9 && blend_enabled && 1412b8e80941Smrg isl_format_is_srgb(render_format) && 1413b8e80941Smrg !isl_color_value_is_zero_one(res->aux.clear_color, render_format)) 1414b8e80941Smrg return ISL_AUX_USAGE_NONE; 1415b8e80941Smrg 1416b8e80941Smrg if (res->aux.usage == ISL_AUX_USAGE_CCS_E && 1417b8e80941Smrg format_ccs_e_compat_with_resource(devinfo, res, render_format)) 1418b8e80941Smrg return ISL_AUX_USAGE_CCS_E; 1419b8e80941Smrg 1420b8e80941Smrg /* Otherwise, we have to fall back to CCS_D */ 1421b8e80941Smrg return ISL_AUX_USAGE_CCS_D; 1422b8e80941Smrg 1423b8e80941Smrg default: 1424b8e80941Smrg return ISL_AUX_USAGE_NONE; 1425b8e80941Smrg } 1426b8e80941Smrg} 1427b8e80941Smrg 1428b8e80941Smrgvoid 1429b8e80941Smrgiris_resource_prepare_render(struct iris_context *ice, 1430b8e80941Smrg struct iris_batch *batch, 1431b8e80941Smrg struct iris_resource *res, uint32_t level, 1432b8e80941Smrg uint32_t start_layer, uint32_t layer_count, 1433b8e80941Smrg enum isl_aux_usage aux_usage) 1434b8e80941Smrg{ 1435b8e80941Smrg iris_resource_prepare_access(ice, batch, res, level, 1, start_layer, 1436b8e80941Smrg layer_count, aux_usage, 1437b8e80941Smrg aux_usage != ISL_AUX_USAGE_NONE); 1438b8e80941Smrg} 1439b8e80941Smrg 1440b8e80941Smrgvoid 1441b8e80941Smrgiris_resource_finish_render(struct iris_context *ice, 1442b8e80941Smrg struct iris_resource *res, uint32_t level, 1443b8e80941Smrg uint32_t start_layer, uint32_t layer_count, 1444b8e80941Smrg enum isl_aux_usage aux_usage) 1445b8e80941Smrg{ 1446b8e80941Smrg iris_resource_finish_write(ice, res, level, start_layer, layer_count, 1447b8e80941Smrg aux_usage); 1448b8e80941Smrg} 1449b8e80941Smrg 1450b8e80941Smrgvoid 1451b8e80941Smrgiris_resource_prepare_depth(struct iris_context *ice, 1452b8e80941Smrg struct iris_batch *batch, 1453b8e80941Smrg struct iris_resource *res, uint32_t level, 1454b8e80941Smrg uint32_t start_layer, uint32_t layer_count) 1455b8e80941Smrg{ 1456b8e80941Smrg iris_resource_prepare_access(ice, batch, res, level, 1, start_layer, 1457b8e80941Smrg layer_count, res->aux.usage, !!res->aux.bo); 1458b8e80941Smrg} 1459b8e80941Smrg 1460b8e80941Smrgvoid 1461b8e80941Smrgiris_resource_finish_depth(struct iris_context *ice, 1462b8e80941Smrg struct iris_resource *res, uint32_t level, 1463b8e80941Smrg uint32_t start_layer, uint32_t layer_count, 1464b8e80941Smrg bool depth_written) 1465b8e80941Smrg{ 1466b8e80941Smrg if (depth_written) { 1467b8e80941Smrg iris_resource_finish_write(ice, res, level, start_layer, layer_count, 1468b8e80941Smrg res->aux.usage); 1469b8e80941Smrg } 1470b8e80941Smrg} 1471