1/* 2 * Copyright © 2017 Intel Corporation 3 * 4 * Permission is hereby granted, free of charge, to any person obtaining a 5 * copy of this software and associated documentation files (the "Software"), 6 * to deal in the Software without restriction, including without limitation 7 * the rights to use, copy, modify, merge, publish, distribute, sublicense, 8 * and/or sell copies of the Software, and to permit persons to whom the 9 * Software is furnished to do so, subject to the following conditions: 10 * 11 * The above copyright notice and this permission notice shall be included 12 * in all copies or substantial portions of the Software. 13 * 14 * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS 15 * OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, 16 * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL 17 * THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER 18 * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING 19 * FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER 20 * DEALINGS IN THE SOFTWARE. 21 */ 22 23/** 24 * @file iris_resolve.c 25 * 26 * This file handles resolve tracking for main and auxiliary surfaces. 27 * 28 * It also handles our cache tracking. We have sets for the render cache, 29 * depth cache, and so on. If a BO is in a cache's set, then it may have 30 * data in that cache. The helpers take care of emitting flushes for 31 * render-to-texture, format reinterpretation issues, and other situations. 32 */ 33 34#include "util/hash_table.h" 35#include "util/set.h" 36#include "iris_context.h" 37 38/** 39 * Disable auxiliary buffers if a renderbuffer is also bound as a texture 40 * or shader image. This causes a self-dependency, where both rendering 41 * and sampling may concurrently read or write the CCS buffer, causing 42 * incorrect pixels. 43 */ 44static bool 45disable_rb_aux_buffer(struct iris_context *ice, 46 bool *draw_aux_buffer_disabled, 47 struct iris_resource *tex_res, 48 unsigned min_level, unsigned num_levels, 49 const char *usage) 50{ 51 struct pipe_framebuffer_state *cso_fb = &ice->state.framebuffer; 52 bool found = false; 53 54 /* We only need to worry about color compression and fast clears. */ 55 if (tex_res->aux.usage != ISL_AUX_USAGE_CCS_D && 56 tex_res->aux.usage != ISL_AUX_USAGE_CCS_E) 57 return false; 58 59 for (unsigned i = 0; i < cso_fb->nr_cbufs; i++) { 60 struct iris_surface *surf = (void *) cso_fb->cbufs[i]; 61 if (!surf) 62 continue; 63 64 struct iris_resource *rb_res = (void *) surf->base.texture; 65 66 if (rb_res->bo == tex_res->bo && 67 surf->base.u.tex.level >= min_level && 68 surf->base.u.tex.level < min_level + num_levels) { 69 found = draw_aux_buffer_disabled[i] = true; 70 } 71 } 72 73 if (found) { 74 perf_debug(&ice->dbg, 75 "Disabling CCS because a renderbuffer is also bound %s.\n", 76 usage); 77 } 78 79 return found; 80} 81 82static void 83resolve_sampler_views(struct iris_context *ice, 84 struct iris_batch *batch, 85 struct iris_shader_state *shs, 86 const struct shader_info *info, 87 bool *draw_aux_buffer_disabled, 88 bool consider_framebuffer) 89{ 90 uint32_t views = info ? (shs->bound_sampler_views & info->textures_used) : 0; 91 92 unsigned astc5x5_wa_bits = 0; // XXX: actual tracking 93 94 while (views) { 95 const int i = u_bit_scan(&views); 96 struct iris_sampler_view *isv = shs->textures[i]; 97 struct iris_resource *res = (void *) isv->base.texture; 98 99 if (res->base.target != PIPE_BUFFER) { 100 if (consider_framebuffer) { 101 disable_rb_aux_buffer(ice, draw_aux_buffer_disabled, 102 res, isv->view.base_level, isv->view.levels, 103 "for sampling"); 104 } 105 106 iris_resource_prepare_texture(ice, batch, res, isv->view.format, 107 isv->view.base_level, isv->view.levels, 108 isv->view.base_array_layer, 109 isv->view.array_len, 110 astc5x5_wa_bits); 111 } 112 113 iris_cache_flush_for_read(batch, res->bo); 114 } 115} 116 117static void 118resolve_image_views(struct iris_context *ice, 119 struct iris_batch *batch, 120 struct iris_shader_state *shs, 121 bool *draw_aux_buffer_disabled, 122 bool consider_framebuffer) 123{ 124 /* TODO: Consider images used by program */ 125 uint32_t views = shs->bound_image_views; 126 127 while (views) { 128 const int i = u_bit_scan(&views); 129 struct iris_resource *res = (void *) shs->image[i].base.resource; 130 131 if (res->base.target != PIPE_BUFFER) { 132 if (consider_framebuffer) { 133 disable_rb_aux_buffer(ice, draw_aux_buffer_disabled, 134 res, 0, ~0, "as a shader image"); 135 } 136 137 iris_resource_prepare_image(ice, batch, res); 138 } 139 140 iris_cache_flush_for_read(batch, res->bo); 141 } 142} 143 144 145/** 146 * \brief Resolve buffers before drawing. 147 * 148 * Resolve the depth buffer's HiZ buffer, resolve the depth buffer of each 149 * enabled depth texture, and flush the render cache for any dirty textures. 150 */ 151void 152iris_predraw_resolve_inputs(struct iris_context *ice, 153 struct iris_batch *batch, 154 bool *draw_aux_buffer_disabled, 155 gl_shader_stage stage, 156 bool consider_framebuffer) 157{ 158 struct iris_shader_state *shs = &ice->state.shaders[stage]; 159 const struct shader_info *info = iris_get_shader_info(ice, stage); 160 161 uint64_t dirty = (IRIS_DIRTY_BINDINGS_VS << stage) | 162 (consider_framebuffer ? IRIS_DIRTY_BINDINGS_FS : 0); 163 164 if (ice->state.dirty & dirty) { 165 resolve_sampler_views(ice, batch, shs, info, draw_aux_buffer_disabled, 166 consider_framebuffer); 167 resolve_image_views(ice, batch, shs, draw_aux_buffer_disabled, 168 consider_framebuffer); 169 } 170 171 // XXX: ASTC hacks 172} 173 174void 175iris_predraw_resolve_framebuffer(struct iris_context *ice, 176 struct iris_batch *batch, 177 bool *draw_aux_buffer_disabled) 178{ 179 struct pipe_framebuffer_state *cso_fb = &ice->state.framebuffer; 180 181 if (ice->state.dirty & IRIS_DIRTY_DEPTH_BUFFER) { 182 struct pipe_surface *zs_surf = cso_fb->zsbuf; 183 184 if (zs_surf) { 185 struct iris_resource *z_res, *s_res; 186 iris_get_depth_stencil_resources(zs_surf->texture, &z_res, &s_res); 187 unsigned num_layers = 188 zs_surf->u.tex.last_layer - zs_surf->u.tex.first_layer + 1; 189 190 if (z_res) { 191 iris_resource_prepare_depth(ice, batch, z_res, 192 zs_surf->u.tex.level, 193 zs_surf->u.tex.first_layer, 194 num_layers); 195 iris_cache_flush_for_depth(batch, z_res->bo); 196 } 197 198 if (s_res) { 199 iris_cache_flush_for_depth(batch, s_res->bo); 200 } 201 } 202 } 203 204 if (ice->state.dirty & (IRIS_DIRTY_BINDINGS_FS | IRIS_DIRTY_BLEND_STATE)) { 205 for (unsigned i = 0; i < cso_fb->nr_cbufs; i++) { 206 struct iris_surface *surf = (void *) cso_fb->cbufs[i]; 207 if (!surf) 208 continue; 209 210 struct iris_resource *res = (void *) surf->base.texture; 211 212 enum isl_aux_usage aux_usage = 213 iris_resource_render_aux_usage(ice, res, surf->view.format, 214 ice->state.blend_enables & (1u << i), 215 draw_aux_buffer_disabled[i]); 216 217 if (ice->state.draw_aux_usage[i] != aux_usage) { 218 ice->state.draw_aux_usage[i] = aux_usage; 219 /* XXX: Need to track which bindings to make dirty */ 220 ice->state.dirty |= IRIS_ALL_DIRTY_BINDINGS; 221 } 222 223 iris_resource_prepare_render(ice, batch, res, surf->view.base_level, 224 surf->view.base_array_layer, 225 surf->view.array_len, 226 aux_usage); 227 228 iris_cache_flush_for_render(batch, res->bo, surf->view.format, 229 aux_usage); 230 } 231 } 232} 233 234/** 235 * \brief Call this after drawing to mark which buffers need resolving 236 * 237 * If the depth buffer was written to and if it has an accompanying HiZ 238 * buffer, then mark that it needs a depth resolve. 239 * 240 * If the color buffer is a multisample window system buffer, then 241 * mark that it needs a downsample. 242 * 243 * Also mark any render targets which will be textured as needing a render 244 * cache flush. 245 */ 246void 247iris_postdraw_update_resolve_tracking(struct iris_context *ice, 248 struct iris_batch *batch) 249{ 250 struct pipe_framebuffer_state *cso_fb = &ice->state.framebuffer; 251 252 // XXX: front buffer drawing? 253 254 bool may_have_resolved_depth = 255 ice->state.dirty & (IRIS_DIRTY_DEPTH_BUFFER | 256 IRIS_DIRTY_WM_DEPTH_STENCIL); 257 258 struct pipe_surface *zs_surf = cso_fb->zsbuf; 259 if (zs_surf) { 260 struct iris_resource *z_res, *s_res; 261 iris_get_depth_stencil_resources(zs_surf->texture, &z_res, &s_res); 262 unsigned num_layers = 263 zs_surf->u.tex.last_layer - zs_surf->u.tex.first_layer + 1; 264 265 if (z_res) { 266 if (may_have_resolved_depth) { 267 iris_resource_finish_depth(ice, z_res, zs_surf->u.tex.level, 268 zs_surf->u.tex.first_layer, num_layers, 269 ice->state.depth_writes_enabled); 270 } 271 272 if (ice->state.depth_writes_enabled) 273 iris_depth_cache_add_bo(batch, z_res->bo); 274 } 275 276 if (s_res) { 277 if (may_have_resolved_depth) { 278 iris_resource_finish_write(ice, s_res, zs_surf->u.tex.level, 279 zs_surf->u.tex.first_layer, num_layers, 280 ISL_AUX_USAGE_NONE); 281 } 282 283 if (ice->state.stencil_writes_enabled) 284 iris_depth_cache_add_bo(batch, s_res->bo); 285 } 286 } 287 288 bool may_have_resolved_color = 289 ice->state.dirty & (IRIS_DIRTY_BINDINGS_FS | IRIS_DIRTY_BLEND_STATE); 290 291 for (unsigned i = 0; i < cso_fb->nr_cbufs; i++) { 292 struct iris_surface *surf = (void *) cso_fb->cbufs[i]; 293 if (!surf) 294 continue; 295 296 struct iris_resource *res = (void *) surf->base.texture; 297 enum isl_aux_usage aux_usage = ice->state.draw_aux_usage[i]; 298 299 iris_render_cache_add_bo(batch, res->bo, surf->view.format, 300 aux_usage); 301 302 if (may_have_resolved_color) { 303 union pipe_surface_desc *desc = &surf->base.u; 304 unsigned num_layers = 305 desc->tex.last_layer - desc->tex.first_layer + 1; 306 iris_resource_finish_render(ice, res, desc->tex.level, 307 desc->tex.first_layer, num_layers, 308 aux_usage); 309 } 310 } 311} 312 313/** 314 * Clear the cache-tracking sets. 315 */ 316void 317iris_cache_sets_clear(struct iris_batch *batch) 318{ 319 hash_table_foreach(batch->cache.render, render_entry) 320 _mesa_hash_table_remove(batch->cache.render, render_entry); 321 322 set_foreach(batch->cache.depth, depth_entry) 323 _mesa_set_remove(batch->cache.depth, depth_entry); 324} 325 326/** 327 * Emits an appropriate flush for a BO if it has been rendered to within the 328 * same batchbuffer as a read that's about to be emitted. 329 * 330 * The GPU has separate, incoherent caches for the render cache and the 331 * sampler cache, along with other caches. Usually data in the different 332 * caches don't interact (e.g. we don't render to our driver-generated 333 * immediate constant data), but for render-to-texture in FBOs we definitely 334 * do. When a batchbuffer is flushed, the kernel will ensure that everything 335 * necessary is flushed before another use of that BO, but for reuse from 336 * different caches within a batchbuffer, it's all our responsibility. 337 */ 338void 339iris_flush_depth_and_render_caches(struct iris_batch *batch) 340{ 341 iris_emit_pipe_control_flush(batch, 342 PIPE_CONTROL_DEPTH_CACHE_FLUSH | 343 PIPE_CONTROL_RENDER_TARGET_FLUSH | 344 PIPE_CONTROL_CS_STALL); 345 346 iris_emit_pipe_control_flush(batch, 347 PIPE_CONTROL_TEXTURE_CACHE_INVALIDATE | 348 PIPE_CONTROL_CONST_CACHE_INVALIDATE); 349 350 iris_cache_sets_clear(batch); 351} 352 353void 354iris_cache_flush_for_read(struct iris_batch *batch, 355 struct iris_bo *bo) 356{ 357 if (_mesa_hash_table_search_pre_hashed(batch->cache.render, bo->hash, bo) || 358 _mesa_set_search_pre_hashed(batch->cache.depth, bo->hash, bo)) 359 iris_flush_depth_and_render_caches(batch); 360} 361 362static void * 363format_aux_tuple(enum isl_format format, enum isl_aux_usage aux_usage) 364{ 365 return (void *)(uintptr_t)((uint32_t)format << 8 | aux_usage); 366} 367 368void 369iris_cache_flush_for_render(struct iris_batch *batch, 370 struct iris_bo *bo, 371 enum isl_format format, 372 enum isl_aux_usage aux_usage) 373{ 374 if (_mesa_set_search_pre_hashed(batch->cache.depth, bo->hash, bo)) 375 iris_flush_depth_and_render_caches(batch); 376 377 /* Check to see if this bo has been used by a previous rendering operation 378 * but with a different format or aux usage. If it has, flush the render 379 * cache so we ensure that it's only in there with one format or aux usage 380 * at a time. 381 * 382 * Even though it's not obvious, this can easily happen in practice. 383 * Suppose a client is blending on a surface with sRGB encode enabled on 384 * gen9. This implies that you get AUX_USAGE_CCS_D at best. If the client 385 * then disables sRGB decode and continues blending we will flip on 386 * AUX_USAGE_CCS_E without doing any sort of resolve in-between (this is 387 * perfectly valid since CCS_E is a subset of CCS_D). However, this means 388 * that we have fragments in-flight which are rendering with UNORM+CCS_E 389 * and other fragments in-flight with SRGB+CCS_D on the same surface at the 390 * same time and the pixel scoreboard and color blender are trying to sort 391 * it all out. This ends badly (i.e. GPU hangs). 392 * 393 * To date, we have never observed GPU hangs or even corruption to be 394 * associated with switching the format, only the aux usage. However, 395 * there are comments in various docs which indicate that the render cache 396 * isn't 100% resilient to format changes. We may as well be conservative 397 * and flush on format changes too. We can always relax this later if we 398 * find it to be a performance problem. 399 */ 400 struct hash_entry *entry = 401 _mesa_hash_table_search_pre_hashed(batch->cache.render, bo->hash, bo); 402 if (entry && entry->data != format_aux_tuple(format, aux_usage)) 403 iris_flush_depth_and_render_caches(batch); 404} 405 406void 407iris_render_cache_add_bo(struct iris_batch *batch, 408 struct iris_bo *bo, 409 enum isl_format format, 410 enum isl_aux_usage aux_usage) 411{ 412#ifndef NDEBUG 413 struct hash_entry *entry = 414 _mesa_hash_table_search_pre_hashed(batch->cache.render, bo->hash, bo); 415 if (entry) { 416 /* Otherwise, someone didn't do a flush_for_render and that would be 417 * very bad indeed. 418 */ 419 assert(entry->data == format_aux_tuple(format, aux_usage)); 420 } 421#endif 422 423 _mesa_hash_table_insert_pre_hashed(batch->cache.render, bo->hash, bo, 424 format_aux_tuple(format, aux_usage)); 425} 426 427void 428iris_cache_flush_for_depth(struct iris_batch *batch, 429 struct iris_bo *bo) 430{ 431 if (_mesa_hash_table_search_pre_hashed(batch->cache.render, bo->hash, bo)) 432 iris_flush_depth_and_render_caches(batch); 433} 434 435void 436iris_depth_cache_add_bo(struct iris_batch *batch, struct iris_bo *bo) 437{ 438 _mesa_set_add_pre_hashed(batch->cache.depth, bo->hash, bo); 439} 440 441static void 442iris_resolve_color(struct iris_context *ice, 443 struct iris_batch *batch, 444 struct iris_resource *res, 445 unsigned level, unsigned layer, 446 enum isl_aux_op resolve_op) 447{ 448 //DBG("%s to mt %p level %u layer %u\n", __FUNCTION__, mt, level, layer); 449 450 struct blorp_surf surf; 451 iris_blorp_surf_for_resource(&ice->vtbl, &surf, &res->base, res->aux.usage, 452 level, true); 453 454 iris_batch_maybe_flush(batch, 1500); 455 456 /* Ivybridge PRM Vol 2, Part 1, "11.7 MCS Buffer for Render Target(s)": 457 * 458 * "Any transition from any value in {Clear, Render, Resolve} to a 459 * different value in {Clear, Render, Resolve} requires end of pipe 460 * synchronization." 461 * 462 * In other words, fast clear ops are not properly synchronized with 463 * other drawing. We need to use a PIPE_CONTROL to ensure that the 464 * contents of the previous draw hit the render target before we resolve 465 * and again afterwards to ensure that the resolve is complete before we 466 * do any more regular drawing. 467 */ 468 iris_emit_end_of_pipe_sync(batch, PIPE_CONTROL_RENDER_TARGET_FLUSH); 469 470 struct blorp_batch blorp_batch; 471 blorp_batch_init(&ice->blorp, &blorp_batch, batch, 0); 472 blorp_ccs_resolve(&blorp_batch, &surf, level, layer, 1, 473 isl_format_srgb_to_linear(res->surf.format), 474 resolve_op); 475 blorp_batch_finish(&blorp_batch); 476 477 /* See comment above */ 478 iris_emit_end_of_pipe_sync(batch, PIPE_CONTROL_RENDER_TARGET_FLUSH); 479} 480 481static void 482iris_mcs_partial_resolve(struct iris_context *ice, 483 struct iris_batch *batch, 484 struct iris_resource *res, 485 uint32_t start_layer, 486 uint32_t num_layers) 487{ 488 //DBG("%s to mt %p layers %u-%u\n", __FUNCTION__, mt, 489 //start_layer, start_layer + num_layers - 1); 490 491 assert(res->aux.usage == ISL_AUX_USAGE_MCS); 492 493 struct blorp_surf surf; 494 iris_blorp_surf_for_resource(&ice->vtbl, &surf, &res->base, res->aux.usage, 495 0, true); 496 497 struct blorp_batch blorp_batch; 498 blorp_batch_init(&ice->blorp, &blorp_batch, batch, 0); 499 blorp_mcs_partial_resolve(&blorp_batch, &surf, 500 isl_format_srgb_to_linear(res->surf.format), 501 start_layer, num_layers); 502 blorp_batch_finish(&blorp_batch); 503} 504 505 506/** 507 * Return true if the format that will be used to access the resource is 508 * CCS_E-compatible with the resource's linear/non-sRGB format. 509 * 510 * Why use the linear format? Well, although the resourcemay be specified 511 * with an sRGB format, the usage of that color space/format can be toggled. 512 * Since our HW tends to support more linear formats than sRGB ones, we use 513 * this format variant for check for CCS_E compatibility. 514 */ 515static bool 516format_ccs_e_compat_with_resource(const struct gen_device_info *devinfo, 517 const struct iris_resource *res, 518 enum isl_format access_format) 519{ 520 assert(res->aux.usage == ISL_AUX_USAGE_CCS_E); 521 522 enum isl_format isl_format = isl_format_srgb_to_linear(res->surf.format); 523 return isl_formats_are_ccs_e_compatible(devinfo, isl_format, access_format); 524} 525 526static bool 527sample_with_hiz(const struct gen_device_info *devinfo, 528 const struct iris_resource *res) 529{ 530 if (!devinfo->has_sample_with_hiz) 531 return false; 532 533 if (res->aux.usage != ISL_AUX_USAGE_HIZ) 534 return false; 535 536 /* It seems the hardware won't fallback to the depth buffer if some of the 537 * mipmap levels aren't available in the HiZ buffer. So we need all levels 538 * of the texture to be HiZ enabled. 539 */ 540 for (unsigned level = 0; level < res->surf.levels; ++level) { 541 if (!iris_resource_level_has_hiz(res, level)) 542 return false; 543 } 544 545 /* If compressed multisampling is enabled, then we use it for the auxiliary 546 * buffer instead. 547 * 548 * From the BDW PRM (Volume 2d: Command Reference: Structures 549 * RENDER_SURFACE_STATE.AuxiliarySurfaceMode): 550 * 551 * "If this field is set to AUX_HIZ, Number of Multisamples must be 552 * MULTISAMPLECOUNT_1, and Surface Type cannot be SURFTYPE_3D. 553 * 554 * There is no such blurb for 1D textures, but there is sufficient evidence 555 * that this is broken on SKL+. 556 */ 557 // XXX: i965 disables this for arrays too, is that reasonable? 558 return res->surf.samples == 1 && res->surf.dim == ISL_SURF_DIM_2D; 559} 560 561/** 562 * Perform a HiZ or depth resolve operation. 563 * 564 * For an overview of HiZ ops, see the following sections of the Sandy Bridge 565 * PRM, Volume 1, Part 2: 566 * - 7.5.3.1 Depth Buffer Clear 567 * - 7.5.3.2 Depth Buffer Resolve 568 * - 7.5.3.3 Hierarchical Depth Buffer Resolve 569 */ 570void 571iris_hiz_exec(struct iris_context *ice, 572 struct iris_batch *batch, 573 struct iris_resource *res, 574 unsigned int level, unsigned int start_layer, 575 unsigned int num_layers, enum isl_aux_op op, 576 bool update_clear_depth) 577{ 578 assert(iris_resource_level_has_hiz(res, level)); 579 assert(op != ISL_AUX_OP_NONE); 580 UNUSED const char *name = NULL; 581 582 switch (op) { 583 case ISL_AUX_OP_FULL_RESOLVE: 584 name = "depth resolve"; 585 break; 586 case ISL_AUX_OP_AMBIGUATE: 587 name = "hiz ambiguate"; 588 break; 589 case ISL_AUX_OP_FAST_CLEAR: 590 name = "depth clear"; 591 break; 592 case ISL_AUX_OP_PARTIAL_RESOLVE: 593 case ISL_AUX_OP_NONE: 594 unreachable("Invalid HiZ op"); 595 } 596 597 //DBG("%s %s to mt %p level %d layers %d-%d\n", 598 //__func__, name, mt, level, start_layer, start_layer + num_layers - 1); 599 600 /* The following stalls and flushes are only documented to be required 601 * for HiZ clear operations. However, they also seem to be required for 602 * resolve operations. 603 * 604 * From the Ivybridge PRM, volume 2, "Depth Buffer Clear": 605 * 606 * "If other rendering operations have preceded this clear, a 607 * PIPE_CONTROL with depth cache flush enabled, Depth Stall bit 608 * enabled must be issued before the rectangle primitive used for 609 * the depth buffer clear operation." 610 * 611 * Same applies for Gen8 and Gen9. 612 * 613 * In addition, from the Ivybridge PRM, volume 2, 1.10.4.1 614 * PIPE_CONTROL, Depth Cache Flush Enable: 615 * 616 * "This bit must not be set when Depth Stall Enable bit is set in 617 * this packet." 618 * 619 * This is confirmed to hold for real, Haswell gets immediate gpu hangs. 620 * 621 * Therefore issue two pipe control flushes, one for cache flush and 622 * another for depth stall. 623 */ 624 iris_emit_pipe_control_flush(batch, 625 PIPE_CONTROL_DEPTH_CACHE_FLUSH | 626 PIPE_CONTROL_CS_STALL); 627 628 iris_emit_pipe_control_flush(batch, PIPE_CONTROL_DEPTH_STALL); 629 630 assert(res->aux.usage == ISL_AUX_USAGE_HIZ && res->aux.bo); 631 632 iris_batch_maybe_flush(batch, 1500); 633 634 struct blorp_surf surf; 635 iris_blorp_surf_for_resource(&ice->vtbl, &surf, &res->base, 636 ISL_AUX_USAGE_HIZ, level, true); 637 638 struct blorp_batch blorp_batch; 639 enum blorp_batch_flags flags = 0; 640 flags |= update_clear_depth ? 0 : BLORP_BATCH_NO_UPDATE_CLEAR_COLOR; 641 blorp_batch_init(&ice->blorp, &blorp_batch, batch, flags); 642 blorp_hiz_op(&blorp_batch, &surf, level, start_layer, num_layers, op); 643 blorp_batch_finish(&blorp_batch); 644 645 /* The following stalls and flushes are only documented to be required 646 * for HiZ clear operations. However, they also seem to be required for 647 * resolve operations. 648 * 649 * From the Broadwell PRM, volume 7, "Depth Buffer Clear": 650 * 651 * "Depth buffer clear pass using any of the methods (WM_STATE, 652 * 3DSTATE_WM or 3DSTATE_WM_HZ_OP) must be followed by a 653 * PIPE_CONTROL command with DEPTH_STALL bit and Depth FLUSH bits 654 * "set" before starting to render. DepthStall and DepthFlush are 655 * not needed between consecutive depth clear passes nor is it 656 * required if the depth clear pass was done with 657 * 'full_surf_clear' bit set in the 3DSTATE_WM_HZ_OP." 658 * 659 * TODO: Such as the spec says, this could be conditional. 660 */ 661 iris_emit_pipe_control_flush(batch, 662 PIPE_CONTROL_DEPTH_CACHE_FLUSH | 663 PIPE_CONTROL_DEPTH_STALL); 664} 665 666/** 667 * Does the resource's slice have hiz enabled? 668 */ 669bool 670iris_resource_level_has_hiz(const struct iris_resource *res, uint32_t level) 671{ 672 iris_resource_check_level_layer(res, level, 0); 673 return res->aux.has_hiz & 1 << level; 674} 675 676/** \brief Assert that the level and layer are valid for the resource. */ 677void 678iris_resource_check_level_layer(UNUSED const struct iris_resource *res, 679 UNUSED uint32_t level, UNUSED uint32_t layer) 680{ 681 assert(level < res->surf.levels); 682 assert(layer < util_num_layers(&res->base, level)); 683} 684 685static inline uint32_t 686miptree_level_range_length(const struct iris_resource *res, 687 uint32_t start_level, uint32_t num_levels) 688{ 689 assert(start_level < res->surf.levels); 690 691 if (num_levels == INTEL_REMAINING_LAYERS) 692 num_levels = res->surf.levels; 693 694 /* Check for overflow */ 695 assert(start_level + num_levels >= start_level); 696 assert(start_level + num_levels <= res->surf.levels); 697 698 return num_levels; 699} 700 701static inline uint32_t 702miptree_layer_range_length(const struct iris_resource *res, uint32_t level, 703 uint32_t start_layer, uint32_t num_layers) 704{ 705 assert(level <= res->base.last_level); 706 707 const uint32_t total_num_layers = iris_get_num_logical_layers(res, level); 708 assert(start_layer < total_num_layers); 709 if (num_layers == INTEL_REMAINING_LAYERS) 710 num_layers = total_num_layers - start_layer; 711 /* Check for overflow */ 712 assert(start_layer + num_layers >= start_layer); 713 assert(start_layer + num_layers <= total_num_layers); 714 715 return num_layers; 716} 717 718static bool 719has_color_unresolved(const struct iris_resource *res, 720 unsigned start_level, unsigned num_levels, 721 unsigned start_layer, unsigned num_layers) 722{ 723 if (!res->aux.bo) 724 return false; 725 726 /* Clamp the level range to fit the resource */ 727 num_levels = miptree_level_range_length(res, start_level, num_levels); 728 729 for (uint32_t l = 0; l < num_levels; l++) { 730 const uint32_t level = start_level + l; 731 const uint32_t level_layers = 732 miptree_layer_range_length(res, level, start_layer, num_layers); 733 for (unsigned a = 0; a < level_layers; a++) { 734 enum isl_aux_state aux_state = 735 iris_resource_get_aux_state(res, level, start_layer + a); 736 assert(aux_state != ISL_AUX_STATE_AUX_INVALID); 737 if (aux_state != ISL_AUX_STATE_PASS_THROUGH) 738 return true; 739 } 740 } 741 742 return false; 743} 744 745static enum isl_aux_op 746get_ccs_d_resolve_op(enum isl_aux_state aux_state, 747 enum isl_aux_usage aux_usage, 748 bool fast_clear_supported) 749{ 750 assert(aux_usage == ISL_AUX_USAGE_NONE || aux_usage == ISL_AUX_USAGE_CCS_D); 751 752 const bool ccs_supported = aux_usage == ISL_AUX_USAGE_CCS_D; 753 754 assert(ccs_supported == fast_clear_supported); 755 756 switch (aux_state) { 757 case ISL_AUX_STATE_CLEAR: 758 case ISL_AUX_STATE_PARTIAL_CLEAR: 759 if (!ccs_supported) 760 return ISL_AUX_OP_FULL_RESOLVE; 761 else 762 return ISL_AUX_OP_NONE; 763 764 case ISL_AUX_STATE_PASS_THROUGH: 765 return ISL_AUX_OP_NONE; 766 767 case ISL_AUX_STATE_RESOLVED: 768 case ISL_AUX_STATE_AUX_INVALID: 769 case ISL_AUX_STATE_COMPRESSED_CLEAR: 770 case ISL_AUX_STATE_COMPRESSED_NO_CLEAR: 771 break; 772 } 773 774 unreachable("Invalid aux state for CCS_D"); 775} 776 777static enum isl_aux_op 778get_ccs_e_resolve_op(enum isl_aux_state aux_state, 779 enum isl_aux_usage aux_usage, 780 bool fast_clear_supported) 781{ 782 /* CCS_E surfaces can be accessed as CCS_D if we're careful. */ 783 assert(aux_usage == ISL_AUX_USAGE_NONE || 784 aux_usage == ISL_AUX_USAGE_CCS_D || 785 aux_usage == ISL_AUX_USAGE_CCS_E); 786 787 if (aux_usage == ISL_AUX_USAGE_CCS_D) 788 assert(fast_clear_supported); 789 790 switch (aux_state) { 791 case ISL_AUX_STATE_CLEAR: 792 case ISL_AUX_STATE_PARTIAL_CLEAR: 793 if (fast_clear_supported) 794 return ISL_AUX_OP_NONE; 795 else if (aux_usage == ISL_AUX_USAGE_CCS_E) 796 return ISL_AUX_OP_PARTIAL_RESOLVE; 797 else 798 return ISL_AUX_OP_FULL_RESOLVE; 799 800 case ISL_AUX_STATE_COMPRESSED_CLEAR: 801 if (aux_usage != ISL_AUX_USAGE_CCS_E) 802 return ISL_AUX_OP_FULL_RESOLVE; 803 else if (!fast_clear_supported) 804 return ISL_AUX_OP_PARTIAL_RESOLVE; 805 else 806 return ISL_AUX_OP_NONE; 807 808 case ISL_AUX_STATE_COMPRESSED_NO_CLEAR: 809 if (aux_usage != ISL_AUX_USAGE_CCS_E) 810 return ISL_AUX_OP_FULL_RESOLVE; 811 else 812 return ISL_AUX_OP_NONE; 813 814 case ISL_AUX_STATE_PASS_THROUGH: 815 return ISL_AUX_OP_NONE; 816 817 case ISL_AUX_STATE_RESOLVED: 818 case ISL_AUX_STATE_AUX_INVALID: 819 break; 820 } 821 822 unreachable("Invalid aux state for CCS_E"); 823} 824 825static void 826iris_resource_prepare_ccs_access(struct iris_context *ice, 827 struct iris_batch *batch, 828 struct iris_resource *res, 829 uint32_t level, uint32_t layer, 830 enum isl_aux_usage aux_usage, 831 bool fast_clear_supported) 832{ 833 enum isl_aux_state aux_state = iris_resource_get_aux_state(res, level, layer); 834 835 enum isl_aux_op resolve_op; 836 if (res->aux.usage == ISL_AUX_USAGE_CCS_E) { 837 resolve_op = get_ccs_e_resolve_op(aux_state, aux_usage, 838 fast_clear_supported); 839 } else { 840 assert(res->aux.usage == ISL_AUX_USAGE_CCS_D); 841 resolve_op = get_ccs_d_resolve_op(aux_state, aux_usage, 842 fast_clear_supported); 843 } 844 845 if (resolve_op != ISL_AUX_OP_NONE) { 846 iris_resolve_color(ice, batch, res, level, layer, resolve_op); 847 848 switch (resolve_op) { 849 case ISL_AUX_OP_FULL_RESOLVE: 850 /* The CCS full resolve operation destroys the CCS and sets it to the 851 * pass-through state. (You can also think of this as being both a 852 * resolve and an ambiguate in one operation.) 853 */ 854 iris_resource_set_aux_state(ice, res, level, layer, 1, 855 ISL_AUX_STATE_PASS_THROUGH); 856 break; 857 858 case ISL_AUX_OP_PARTIAL_RESOLVE: 859 iris_resource_set_aux_state(ice, res, level, layer, 1, 860 ISL_AUX_STATE_COMPRESSED_NO_CLEAR); 861 break; 862 863 default: 864 unreachable("Invalid resolve op"); 865 } 866 } 867} 868 869static void 870iris_resource_finish_ccs_write(struct iris_context *ice, 871 struct iris_resource *res, 872 uint32_t level, uint32_t layer, 873 enum isl_aux_usage aux_usage) 874{ 875 assert(aux_usage == ISL_AUX_USAGE_NONE || 876 aux_usage == ISL_AUX_USAGE_CCS_D || 877 aux_usage == ISL_AUX_USAGE_CCS_E); 878 879 enum isl_aux_state aux_state = 880 iris_resource_get_aux_state(res, level, layer); 881 882 if (res->aux.usage == ISL_AUX_USAGE_CCS_E) { 883 switch (aux_state) { 884 case ISL_AUX_STATE_CLEAR: 885 case ISL_AUX_STATE_PARTIAL_CLEAR: 886 assert(aux_usage == ISL_AUX_USAGE_CCS_E || 887 aux_usage == ISL_AUX_USAGE_CCS_D); 888 889 if (aux_usage == ISL_AUX_USAGE_CCS_E) { 890 iris_resource_set_aux_state(ice, res, level, layer, 1, 891 ISL_AUX_STATE_COMPRESSED_CLEAR); 892 } else if (aux_state != ISL_AUX_STATE_PARTIAL_CLEAR) { 893 iris_resource_set_aux_state(ice, res, level, layer, 1, 894 ISL_AUX_STATE_PARTIAL_CLEAR); 895 } 896 break; 897 898 case ISL_AUX_STATE_COMPRESSED_CLEAR: 899 case ISL_AUX_STATE_COMPRESSED_NO_CLEAR: 900 assert(aux_usage == ISL_AUX_USAGE_CCS_E); 901 break; /* Nothing to do */ 902 903 case ISL_AUX_STATE_PASS_THROUGH: 904 if (aux_usage == ISL_AUX_USAGE_CCS_E) { 905 iris_resource_set_aux_state(ice, res, level, layer, 1, 906 ISL_AUX_STATE_COMPRESSED_NO_CLEAR); 907 } else { 908 /* Nothing to do */ 909 } 910 break; 911 912 case ISL_AUX_STATE_RESOLVED: 913 case ISL_AUX_STATE_AUX_INVALID: 914 unreachable("Invalid aux state for CCS_E"); 915 } 916 } else { 917 assert(res->aux.usage == ISL_AUX_USAGE_CCS_D); 918 /* CCS_D is a bit simpler */ 919 switch (aux_state) { 920 case ISL_AUX_STATE_CLEAR: 921 assert(aux_usage == ISL_AUX_USAGE_CCS_D); 922 iris_resource_set_aux_state(ice, res, level, layer, 1, 923 ISL_AUX_STATE_PARTIAL_CLEAR); 924 break; 925 926 case ISL_AUX_STATE_PARTIAL_CLEAR: 927 assert(aux_usage == ISL_AUX_USAGE_CCS_D); 928 break; /* Nothing to do */ 929 930 case ISL_AUX_STATE_PASS_THROUGH: 931 /* Nothing to do */ 932 break; 933 934 case ISL_AUX_STATE_COMPRESSED_CLEAR: 935 case ISL_AUX_STATE_COMPRESSED_NO_CLEAR: 936 case ISL_AUX_STATE_RESOLVED: 937 case ISL_AUX_STATE_AUX_INVALID: 938 unreachable("Invalid aux state for CCS_D"); 939 } 940 } 941} 942 943static void 944iris_resource_prepare_mcs_access(struct iris_context *ice, 945 struct iris_batch *batch, 946 struct iris_resource *res, 947 uint32_t layer, 948 enum isl_aux_usage aux_usage, 949 bool fast_clear_supported) 950{ 951 assert(aux_usage == ISL_AUX_USAGE_MCS); 952 953 switch (iris_resource_get_aux_state(res, 0, layer)) { 954 case ISL_AUX_STATE_CLEAR: 955 case ISL_AUX_STATE_COMPRESSED_CLEAR: 956 if (!fast_clear_supported) { 957 iris_mcs_partial_resolve(ice, batch, res, layer, 1); 958 iris_resource_set_aux_state(ice, res, 0, layer, 1, 959 ISL_AUX_STATE_COMPRESSED_NO_CLEAR); 960 } 961 break; 962 963 case ISL_AUX_STATE_COMPRESSED_NO_CLEAR: 964 break; /* Nothing to do */ 965 966 case ISL_AUX_STATE_RESOLVED: 967 case ISL_AUX_STATE_PASS_THROUGH: 968 case ISL_AUX_STATE_AUX_INVALID: 969 case ISL_AUX_STATE_PARTIAL_CLEAR: 970 unreachable("Invalid aux state for MCS"); 971 } 972} 973 974static void 975iris_resource_finish_mcs_write(struct iris_context *ice, 976 struct iris_resource *res, 977 uint32_t layer, 978 enum isl_aux_usage aux_usage) 979{ 980 assert(aux_usage == ISL_AUX_USAGE_MCS); 981 982 switch (iris_resource_get_aux_state(res, 0, layer)) { 983 case ISL_AUX_STATE_CLEAR: 984 iris_resource_set_aux_state(ice, res, 0, layer, 1, 985 ISL_AUX_STATE_COMPRESSED_CLEAR); 986 break; 987 988 case ISL_AUX_STATE_COMPRESSED_CLEAR: 989 case ISL_AUX_STATE_COMPRESSED_NO_CLEAR: 990 break; /* Nothing to do */ 991 992 case ISL_AUX_STATE_RESOLVED: 993 case ISL_AUX_STATE_PASS_THROUGH: 994 case ISL_AUX_STATE_AUX_INVALID: 995 case ISL_AUX_STATE_PARTIAL_CLEAR: 996 unreachable("Invalid aux state for MCS"); 997 } 998} 999 1000static void 1001iris_resource_prepare_hiz_access(struct iris_context *ice, 1002 struct iris_batch *batch, 1003 struct iris_resource *res, 1004 uint32_t level, uint32_t layer, 1005 enum isl_aux_usage aux_usage, 1006 bool fast_clear_supported) 1007{ 1008 assert(aux_usage == ISL_AUX_USAGE_NONE || aux_usage == ISL_AUX_USAGE_HIZ); 1009 1010 enum isl_aux_op hiz_op = ISL_AUX_OP_NONE; 1011 switch (iris_resource_get_aux_state(res, level, layer)) { 1012 case ISL_AUX_STATE_CLEAR: 1013 case ISL_AUX_STATE_COMPRESSED_CLEAR: 1014 if (aux_usage != ISL_AUX_USAGE_HIZ || !fast_clear_supported) 1015 hiz_op = ISL_AUX_OP_FULL_RESOLVE; 1016 break; 1017 1018 case ISL_AUX_STATE_COMPRESSED_NO_CLEAR: 1019 if (aux_usage != ISL_AUX_USAGE_HIZ) 1020 hiz_op = ISL_AUX_OP_FULL_RESOLVE; 1021 break; 1022 1023 case ISL_AUX_STATE_PASS_THROUGH: 1024 case ISL_AUX_STATE_RESOLVED: 1025 break; 1026 1027 case ISL_AUX_STATE_AUX_INVALID: 1028 if (aux_usage == ISL_AUX_USAGE_HIZ) 1029 hiz_op = ISL_AUX_OP_AMBIGUATE; 1030 break; 1031 1032 case ISL_AUX_STATE_PARTIAL_CLEAR: 1033 unreachable("Invalid HiZ state"); 1034 } 1035 1036 if (hiz_op != ISL_AUX_OP_NONE) { 1037 iris_hiz_exec(ice, batch, res, level, layer, 1, hiz_op, false); 1038 1039 switch (hiz_op) { 1040 case ISL_AUX_OP_FULL_RESOLVE: 1041 iris_resource_set_aux_state(ice, res, level, layer, 1, 1042 ISL_AUX_STATE_RESOLVED); 1043 break; 1044 1045 case ISL_AUX_OP_AMBIGUATE: 1046 /* The HiZ resolve operation is actually an ambiguate */ 1047 iris_resource_set_aux_state(ice, res, level, layer, 1, 1048 ISL_AUX_STATE_PASS_THROUGH); 1049 break; 1050 1051 default: 1052 unreachable("Invalid HiZ op"); 1053 } 1054 } 1055} 1056 1057static void 1058iris_resource_finish_hiz_write(struct iris_context *ice, 1059 struct iris_resource *res, 1060 uint32_t level, uint32_t layer, 1061 enum isl_aux_usage aux_usage) 1062{ 1063 assert(aux_usage == ISL_AUX_USAGE_NONE || aux_usage == ISL_AUX_USAGE_HIZ); 1064 1065 switch (iris_resource_get_aux_state(res, level, layer)) { 1066 case ISL_AUX_STATE_CLEAR: 1067 assert(aux_usage == ISL_AUX_USAGE_HIZ); 1068 iris_resource_set_aux_state(ice, res, level, layer, 1, 1069 ISL_AUX_STATE_COMPRESSED_CLEAR); 1070 break; 1071 1072 case ISL_AUX_STATE_COMPRESSED_NO_CLEAR: 1073 case ISL_AUX_STATE_COMPRESSED_CLEAR: 1074 assert(aux_usage == ISL_AUX_USAGE_HIZ); 1075 break; /* Nothing to do */ 1076 1077 case ISL_AUX_STATE_RESOLVED: 1078 if (aux_usage == ISL_AUX_USAGE_HIZ) { 1079 iris_resource_set_aux_state(ice, res, level, layer, 1, 1080 ISL_AUX_STATE_COMPRESSED_NO_CLEAR); 1081 } else { 1082 iris_resource_set_aux_state(ice, res, level, layer, 1, 1083 ISL_AUX_STATE_AUX_INVALID); 1084 } 1085 break; 1086 1087 case ISL_AUX_STATE_PASS_THROUGH: 1088 if (aux_usage == ISL_AUX_USAGE_HIZ) { 1089 iris_resource_set_aux_state(ice, res, level, layer, 1, 1090 ISL_AUX_STATE_COMPRESSED_NO_CLEAR); 1091 } 1092 break; 1093 1094 case ISL_AUX_STATE_AUX_INVALID: 1095 assert(aux_usage != ISL_AUX_USAGE_HIZ); 1096 break; 1097 1098 case ISL_AUX_STATE_PARTIAL_CLEAR: 1099 unreachable("Invalid HiZ state"); 1100 } 1101} 1102 1103void 1104iris_resource_prepare_access(struct iris_context *ice, 1105 struct iris_batch *batch, 1106 struct iris_resource *res, 1107 uint32_t start_level, uint32_t num_levels, 1108 uint32_t start_layer, uint32_t num_layers, 1109 enum isl_aux_usage aux_usage, 1110 bool fast_clear_supported) 1111{ 1112 num_levels = miptree_level_range_length(res, start_level, num_levels); 1113 1114 switch (res->aux.usage) { 1115 case ISL_AUX_USAGE_NONE: 1116 /* Nothing to do */ 1117 break; 1118 1119 case ISL_AUX_USAGE_MCS: 1120 assert(start_level == 0 && num_levels == 1); 1121 const uint32_t level_layers = 1122 miptree_layer_range_length(res, 0, start_layer, num_layers); 1123 for (uint32_t a = 0; a < level_layers; a++) { 1124 iris_resource_prepare_mcs_access(ice, batch, res, start_layer + a, 1125 aux_usage, fast_clear_supported); 1126 } 1127 break; 1128 1129 case ISL_AUX_USAGE_CCS_D: 1130 case ISL_AUX_USAGE_CCS_E: 1131 for (uint32_t l = 0; l < num_levels; l++) { 1132 const uint32_t level = start_level + l; 1133 const uint32_t level_layers = 1134 miptree_layer_range_length(res, level, start_layer, num_layers); 1135 for (uint32_t a = 0; a < level_layers; a++) { 1136 iris_resource_prepare_ccs_access(ice, batch, res, level, 1137 start_layer + a, 1138 aux_usage, fast_clear_supported); 1139 } 1140 } 1141 break; 1142 1143 case ISL_AUX_USAGE_HIZ: 1144 for (uint32_t l = 0; l < num_levels; l++) { 1145 const uint32_t level = start_level + l; 1146 if (!iris_resource_level_has_hiz(res, level)) 1147 continue; 1148 1149 const uint32_t level_layers = 1150 miptree_layer_range_length(res, level, start_layer, num_layers); 1151 for (uint32_t a = 0; a < level_layers; a++) { 1152 iris_resource_prepare_hiz_access(ice, batch, res, level, 1153 start_layer + a, aux_usage, 1154 fast_clear_supported); 1155 } 1156 } 1157 break; 1158 1159 default: 1160 unreachable("Invalid aux usage"); 1161 } 1162} 1163 1164void 1165iris_resource_finish_write(struct iris_context *ice, 1166 struct iris_resource *res, uint32_t level, 1167 uint32_t start_layer, uint32_t num_layers, 1168 enum isl_aux_usage aux_usage) 1169{ 1170 num_layers = miptree_layer_range_length(res, level, start_layer, num_layers); 1171 1172 switch (res->aux.usage) { 1173 case ISL_AUX_USAGE_NONE: 1174 break; 1175 1176 case ISL_AUX_USAGE_MCS: 1177 for (uint32_t a = 0; a < num_layers; a++) { 1178 iris_resource_finish_mcs_write(ice, res, start_layer + a, 1179 aux_usage); 1180 } 1181 break; 1182 1183 case ISL_AUX_USAGE_CCS_D: 1184 case ISL_AUX_USAGE_CCS_E: 1185 for (uint32_t a = 0; a < num_layers; a++) { 1186 iris_resource_finish_ccs_write(ice, res, level, start_layer + a, 1187 aux_usage); 1188 } 1189 break; 1190 1191 case ISL_AUX_USAGE_HIZ: 1192 if (!iris_resource_level_has_hiz(res, level)) 1193 return; 1194 1195 for (uint32_t a = 0; a < num_layers; a++) { 1196 iris_resource_finish_hiz_write(ice, res, level, start_layer + a, 1197 aux_usage); 1198 } 1199 break; 1200 1201 default: 1202 unreachable("Invavlid aux usage"); 1203 } 1204} 1205 1206enum isl_aux_state 1207iris_resource_get_aux_state(const struct iris_resource *res, 1208 uint32_t level, uint32_t layer) 1209{ 1210 iris_resource_check_level_layer(res, level, layer); 1211 1212 if (res->surf.usage & ISL_SURF_USAGE_DEPTH_BIT) { 1213 assert(iris_resource_level_has_hiz(res, level)); 1214 } else if (res->surf.usage & ISL_SURF_USAGE_STENCIL_BIT) { 1215 unreachable("Cannot get aux state for stencil"); 1216 } else { 1217 assert(res->surf.samples == 1 || 1218 res->surf.msaa_layout == ISL_MSAA_LAYOUT_ARRAY); 1219 } 1220 1221 return res->aux.state[level][layer]; 1222} 1223 1224void 1225iris_resource_set_aux_state(struct iris_context *ice, 1226 struct iris_resource *res, uint32_t level, 1227 uint32_t start_layer, uint32_t num_layers, 1228 enum isl_aux_state aux_state) 1229{ 1230 num_layers = miptree_layer_range_length(res, level, start_layer, num_layers); 1231 1232 if (res->surf.usage & ISL_SURF_USAGE_DEPTH_BIT) { 1233 assert(iris_resource_level_has_hiz(res, level)); 1234 } else if (res->surf.usage & ISL_SURF_USAGE_STENCIL_BIT) { 1235 unreachable("Cannot set aux state for stencil"); 1236 } else { 1237 assert(res->surf.samples == 1 || 1238 res->surf.msaa_layout == ISL_MSAA_LAYOUT_ARRAY); 1239 } 1240 1241 for (unsigned a = 0; a < num_layers; a++) { 1242 if (res->aux.state[level][start_layer + a] != aux_state) { 1243 res->aux.state[level][start_layer + a] = aux_state; 1244 /* XXX: Need to track which bindings to make dirty */ 1245 ice->state.dirty |= IRIS_ALL_DIRTY_BINDINGS; 1246 } 1247 } 1248} 1249 1250/* On Gen9 color buffers may be compressed by the hardware (lossless 1251 * compression). There are, however, format restrictions and care needs to be 1252 * taken that the sampler engine is capable for re-interpreting a buffer with 1253 * format different the buffer was originally written with. 1254 * 1255 * For example, SRGB formats are not compressible and the sampler engine isn't 1256 * capable of treating RGBA_UNORM as SRGB_ALPHA. In such a case the underlying 1257 * color buffer needs to be resolved so that the sampling surface can be 1258 * sampled as non-compressed (i.e., without the auxiliary MCS buffer being 1259 * set). 1260 */ 1261static bool 1262can_texture_with_ccs(const struct gen_device_info *devinfo, 1263 struct pipe_debug_callback *dbg, 1264 const struct iris_resource *res, 1265 enum isl_format view_format) 1266{ 1267 if (res->aux.usage != ISL_AUX_USAGE_CCS_E) 1268 return false; 1269 1270 if (!format_ccs_e_compat_with_resource(devinfo, res, view_format)) { 1271 const struct isl_format_layout *res_fmtl = 1272 isl_format_get_layout(res->surf.format); 1273 const struct isl_format_layout *view_fmtl = 1274 isl_format_get_layout(view_format); 1275 1276 perf_debug(dbg, "Incompatible sampling format (%s) for CCS (%s)\n", 1277 view_fmtl->name, res_fmtl->name); 1278 1279 return false; 1280 } 1281 1282 return true; 1283} 1284 1285enum isl_aux_usage 1286iris_resource_texture_aux_usage(struct iris_context *ice, 1287 const struct iris_resource *res, 1288 enum isl_format view_format, 1289 enum gen9_astc5x5_wa_tex_type astc5x5_wa_bits) 1290{ 1291 struct iris_screen *screen = (void *) ice->ctx.screen; 1292 struct gen_device_info *devinfo = &screen->devinfo; 1293 1294 assert(devinfo->gen == 9 || astc5x5_wa_bits == 0); 1295 1296 /* On gen9, ASTC 5x5 textures cannot live in the sampler cache along side 1297 * CCS or HiZ compressed textures. See gen9_apply_astc5x5_wa_flush() for 1298 * details. 1299 */ 1300 if ((astc5x5_wa_bits & GEN9_ASTC5X5_WA_TEX_TYPE_ASTC5x5) && 1301 res->aux.usage != ISL_AUX_USAGE_MCS) 1302 return ISL_AUX_USAGE_NONE; 1303 1304 switch (res->aux.usage) { 1305 case ISL_AUX_USAGE_HIZ: 1306 if (sample_with_hiz(devinfo, res)) 1307 return ISL_AUX_USAGE_HIZ; 1308 break; 1309 1310 case ISL_AUX_USAGE_MCS: 1311 return ISL_AUX_USAGE_MCS; 1312 1313 case ISL_AUX_USAGE_CCS_D: 1314 case ISL_AUX_USAGE_CCS_E: 1315 /* If we don't have any unresolved color, report an aux usage of 1316 * ISL_AUX_USAGE_NONE. This way, texturing won't even look at the 1317 * aux surface and we can save some bandwidth. 1318 */ 1319 if (!has_color_unresolved(res, 0, INTEL_REMAINING_LEVELS, 1320 0, INTEL_REMAINING_LAYERS)) 1321 return ISL_AUX_USAGE_NONE; 1322 1323 if (can_texture_with_ccs(devinfo, &ice->dbg, res, view_format)) 1324 return ISL_AUX_USAGE_CCS_E; 1325 break; 1326 1327 default: 1328 break; 1329 } 1330 1331 return ISL_AUX_USAGE_NONE; 1332} 1333 1334static bool 1335isl_formats_are_fast_clear_compatible(enum isl_format a, enum isl_format b) 1336{ 1337 /* On gen8 and earlier, the hardware was only capable of handling 0/1 clear 1338 * values so sRGB curve application was a no-op for all fast-clearable 1339 * formats. 1340 * 1341 * On gen9+, the hardware supports arbitrary clear values. For sRGB clear 1342 * values, the hardware interprets the floats, not as what would be 1343 * returned from the sampler (or written by the shader), but as being 1344 * between format conversion and sRGB curve application. This means that 1345 * we can switch between sRGB and UNORM without having to whack the clear 1346 * color. 1347 */ 1348 return isl_format_srgb_to_linear(a) == isl_format_srgb_to_linear(b); 1349} 1350 1351void 1352iris_resource_prepare_texture(struct iris_context *ice, 1353 struct iris_batch *batch, 1354 struct iris_resource *res, 1355 enum isl_format view_format, 1356 uint32_t start_level, uint32_t num_levels, 1357 uint32_t start_layer, uint32_t num_layers, 1358 enum gen9_astc5x5_wa_tex_type astc5x5_wa_bits) 1359{ 1360 enum isl_aux_usage aux_usage = 1361 iris_resource_texture_aux_usage(ice, res, view_format, astc5x5_wa_bits); 1362 1363 bool clear_supported = aux_usage != ISL_AUX_USAGE_NONE; 1364 1365 /* Clear color is specified as ints or floats and the conversion is done by 1366 * the sampler. If we have a texture view, we would have to perform the 1367 * clear color conversion manually. Just disable clear color. 1368 */ 1369 if (!isl_formats_are_fast_clear_compatible(res->surf.format, view_format)) 1370 clear_supported = false; 1371 1372 iris_resource_prepare_access(ice, batch, res, start_level, num_levels, 1373 start_layer, num_layers, 1374 aux_usage, clear_supported); 1375} 1376 1377void 1378iris_resource_prepare_image(struct iris_context *ice, 1379 struct iris_batch *batch, 1380 struct iris_resource *res) 1381{ 1382 /* The data port doesn't understand any compression */ 1383 iris_resource_prepare_access(ice, batch, res, 0, INTEL_REMAINING_LEVELS, 1384 0, INTEL_REMAINING_LAYERS, 1385 ISL_AUX_USAGE_NONE, false); 1386} 1387 1388enum isl_aux_usage 1389iris_resource_render_aux_usage(struct iris_context *ice, 1390 struct iris_resource *res, 1391 enum isl_format render_format, 1392 bool blend_enabled, 1393 bool draw_aux_disabled) 1394{ 1395 struct iris_screen *screen = (void *) ice->ctx.screen; 1396 struct gen_device_info *devinfo = &screen->devinfo; 1397 1398 if (draw_aux_disabled) 1399 return ISL_AUX_USAGE_NONE; 1400 1401 switch (res->aux.usage) { 1402 case ISL_AUX_USAGE_MCS: 1403 return ISL_AUX_USAGE_MCS; 1404 1405 case ISL_AUX_USAGE_CCS_D: 1406 case ISL_AUX_USAGE_CCS_E: 1407 /* Gen9+ hardware technically supports non-0/1 clear colors with sRGB 1408 * formats. However, there are issues with blending where it doesn't 1409 * properly apply the sRGB curve to the clear color when blending. 1410 */ 1411 if (devinfo->gen >= 9 && blend_enabled && 1412 isl_format_is_srgb(render_format) && 1413 !isl_color_value_is_zero_one(res->aux.clear_color, render_format)) 1414 return ISL_AUX_USAGE_NONE; 1415 1416 if (res->aux.usage == ISL_AUX_USAGE_CCS_E && 1417 format_ccs_e_compat_with_resource(devinfo, res, render_format)) 1418 return ISL_AUX_USAGE_CCS_E; 1419 1420 /* Otherwise, we have to fall back to CCS_D */ 1421 return ISL_AUX_USAGE_CCS_D; 1422 1423 default: 1424 return ISL_AUX_USAGE_NONE; 1425 } 1426} 1427 1428void 1429iris_resource_prepare_render(struct iris_context *ice, 1430 struct iris_batch *batch, 1431 struct iris_resource *res, uint32_t level, 1432 uint32_t start_layer, uint32_t layer_count, 1433 enum isl_aux_usage aux_usage) 1434{ 1435 iris_resource_prepare_access(ice, batch, res, level, 1, start_layer, 1436 layer_count, aux_usage, 1437 aux_usage != ISL_AUX_USAGE_NONE); 1438} 1439 1440void 1441iris_resource_finish_render(struct iris_context *ice, 1442 struct iris_resource *res, uint32_t level, 1443 uint32_t start_layer, uint32_t layer_count, 1444 enum isl_aux_usage aux_usage) 1445{ 1446 iris_resource_finish_write(ice, res, level, start_layer, layer_count, 1447 aux_usage); 1448} 1449 1450void 1451iris_resource_prepare_depth(struct iris_context *ice, 1452 struct iris_batch *batch, 1453 struct iris_resource *res, uint32_t level, 1454 uint32_t start_layer, uint32_t layer_count) 1455{ 1456 iris_resource_prepare_access(ice, batch, res, level, 1, start_layer, 1457 layer_count, res->aux.usage, !!res->aux.bo); 1458} 1459 1460void 1461iris_resource_finish_depth(struct iris_context *ice, 1462 struct iris_resource *res, uint32_t level, 1463 uint32_t start_layer, uint32_t layer_count, 1464 bool depth_written) 1465{ 1466 if (depth_written) { 1467 iris_resource_finish_write(ice, res, level, start_layer, layer_count, 1468 res->aux.usage); 1469 } 1470} 1471