1/* 2 * Copyright 2010 Jerome Glisse <glisse@freedesktop.org> 3 * Copyright 2015 Advanced Micro Devices, Inc. 4 * All Rights Reserved. 5 * 6 * Permission is hereby granted, free of charge, to any person obtaining a 7 * copy of this software and associated documentation files (the "Software"), 8 * to deal in the Software without restriction, including without limitation 9 * on the rights to use, copy, modify, merge, publish, distribute, sub 10 * license, and/or sell copies of the Software, and to permit persons to whom 11 * the Software is furnished to do so, subject to the following conditions: 12 * 13 * The above copyright notice and this permission notice (including the next 14 * paragraph) shall be included in all copies or substantial portions of the 15 * Software. 16 * 17 * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR 18 * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, 19 * FITNESS FOR A PARTICULAR PURPOSE AND NON-INFRINGEMENT. IN NO EVENT SHALL 20 * THE AUTHOR(S) AND/OR THEIR SUPPLIERS BE LIABLE FOR ANY CLAIM, 21 * DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR 22 * OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE 23 * USE OR OTHER DEALINGS IN THE SOFTWARE. 24 */ 25 26#include "si_pipe.h" 27#include "si_compute.h" 28#include "util/u_format.h" 29#include "util/u_log.h" 30#include "util/u_surface.h" 31 32enum { 33 SI_COPY = SI_SAVE_FRAMEBUFFER | SI_SAVE_TEXTURES | 34 SI_SAVE_FRAGMENT_STATE | SI_DISABLE_RENDER_COND, 35 36 SI_BLIT = SI_SAVE_FRAMEBUFFER | SI_SAVE_TEXTURES | 37 SI_SAVE_FRAGMENT_STATE, 38 39 SI_DECOMPRESS = SI_SAVE_FRAMEBUFFER | SI_SAVE_FRAGMENT_STATE | 40 SI_DISABLE_RENDER_COND, 41 42 SI_COLOR_RESOLVE = SI_SAVE_FRAMEBUFFER | SI_SAVE_FRAGMENT_STATE 43}; 44 45void si_blitter_begin(struct si_context *sctx, enum si_blitter_op op) 46{ 47 util_blitter_save_vertex_shader(sctx->blitter, sctx->vs_shader.cso); 48 util_blitter_save_tessctrl_shader(sctx->blitter, sctx->tcs_shader.cso); 49 util_blitter_save_tesseval_shader(sctx->blitter, sctx->tes_shader.cso); 50 util_blitter_save_geometry_shader(sctx->blitter, sctx->gs_shader.cso); 51 util_blitter_save_so_targets(sctx->blitter, sctx->streamout.num_targets, 52 (struct pipe_stream_output_target**)sctx->streamout.targets); 53 util_blitter_save_rasterizer(sctx->blitter, sctx->queued.named.rasterizer); 54 55 if (op & SI_SAVE_FRAGMENT_STATE) { 56 util_blitter_save_blend(sctx->blitter, sctx->queued.named.blend); 57 util_blitter_save_depth_stencil_alpha(sctx->blitter, sctx->queued.named.dsa); 58 util_blitter_save_stencil_ref(sctx->blitter, &sctx->stencil_ref.state); 59 util_blitter_save_fragment_shader(sctx->blitter, sctx->ps_shader.cso); 60 util_blitter_save_sample_mask(sctx->blitter, sctx->sample_mask); 61 util_blitter_save_scissor(sctx->blitter, &sctx->scissors[0]); 62 util_blitter_save_window_rectangles(sctx->blitter, 63 sctx->window_rectangles_include, 64 sctx->num_window_rectangles, 65 sctx->window_rectangles); 66 } 67 68 if (op & SI_SAVE_FRAMEBUFFER) 69 util_blitter_save_framebuffer(sctx->blitter, &sctx->framebuffer.state); 70 71 if (op & SI_SAVE_TEXTURES) { 72 util_blitter_save_fragment_sampler_states( 73 sctx->blitter, 2, 74 (void**)sctx->samplers[PIPE_SHADER_FRAGMENT].sampler_states); 75 76 util_blitter_save_fragment_sampler_views(sctx->blitter, 2, 77 sctx->samplers[PIPE_SHADER_FRAGMENT].views); 78 } 79 80 if (op & SI_DISABLE_RENDER_COND) 81 sctx->render_cond_force_off = true; 82 83 if (sctx->screen->dpbb_allowed) { 84 sctx->dpbb_force_off = true; 85 si_mark_atom_dirty(sctx, &sctx->atoms.s.dpbb_state); 86 } 87} 88 89void si_blitter_end(struct si_context *sctx) 90{ 91 if (sctx->screen->dpbb_allowed) { 92 sctx->dpbb_force_off = false; 93 si_mark_atom_dirty(sctx, &sctx->atoms.s.dpbb_state); 94 } 95 96 sctx->render_cond_force_off = false; 97 98 /* Restore shader pointers because the VS blit shader changed all 99 * non-global VS user SGPRs. */ 100 sctx->shader_pointers_dirty |= SI_DESCS_SHADER_MASK(VERTEX); 101 sctx->vertex_buffer_pointer_dirty = sctx->vb_descriptors_buffer != NULL; 102 si_mark_atom_dirty(sctx, &sctx->atoms.s.shader_pointers); 103} 104 105static unsigned u_max_sample(struct pipe_resource *r) 106{ 107 return r->nr_samples ? r->nr_samples - 1 : 0; 108} 109 110static unsigned 111si_blit_dbcb_copy(struct si_context *sctx, 112 struct si_texture *src, 113 struct si_texture *dst, 114 unsigned planes, unsigned level_mask, 115 unsigned first_layer, unsigned last_layer, 116 unsigned first_sample, unsigned last_sample) 117{ 118 struct pipe_surface surf_tmpl = {{0}}; 119 unsigned layer, sample, checked_last_layer, max_layer; 120 unsigned fully_copied_levels = 0; 121 122 if (planes & PIPE_MASK_Z) 123 sctx->dbcb_depth_copy_enabled = true; 124 if (planes & PIPE_MASK_S) 125 sctx->dbcb_stencil_copy_enabled = true; 126 si_mark_atom_dirty(sctx, &sctx->atoms.s.db_render_state); 127 128 assert(sctx->dbcb_depth_copy_enabled || sctx->dbcb_stencil_copy_enabled); 129 130 sctx->decompression_enabled = true; 131 132 while (level_mask) { 133 unsigned level = u_bit_scan(&level_mask); 134 135 /* The smaller the mipmap level, the less layers there are 136 * as far as 3D textures are concerned. */ 137 max_layer = util_max_layer(&src->buffer.b.b, level); 138 checked_last_layer = MIN2(last_layer, max_layer); 139 140 surf_tmpl.u.tex.level = level; 141 142 for (layer = first_layer; layer <= checked_last_layer; layer++) { 143 struct pipe_surface *zsurf, *cbsurf; 144 145 surf_tmpl.format = src->buffer.b.b.format; 146 surf_tmpl.u.tex.first_layer = layer; 147 surf_tmpl.u.tex.last_layer = layer; 148 149 zsurf = sctx->b.create_surface(&sctx->b, &src->buffer.b.b, &surf_tmpl); 150 151 surf_tmpl.format = dst->buffer.b.b.format; 152 cbsurf = sctx->b.create_surface(&sctx->b, &dst->buffer.b.b, &surf_tmpl); 153 154 for (sample = first_sample; sample <= last_sample; sample++) { 155 if (sample != sctx->dbcb_copy_sample) { 156 sctx->dbcb_copy_sample = sample; 157 si_mark_atom_dirty(sctx, &sctx->atoms.s.db_render_state); 158 } 159 160 si_blitter_begin(sctx, SI_DECOMPRESS); 161 util_blitter_custom_depth_stencil(sctx->blitter, zsurf, cbsurf, 1 << sample, 162 sctx->custom_dsa_flush, 1.0f); 163 si_blitter_end(sctx); 164 } 165 166 pipe_surface_reference(&zsurf, NULL); 167 pipe_surface_reference(&cbsurf, NULL); 168 } 169 170 if (first_layer == 0 && last_layer >= max_layer && 171 first_sample == 0 && last_sample >= u_max_sample(&src->buffer.b.b)) 172 fully_copied_levels |= 1u << level; 173 } 174 175 sctx->decompression_enabled = false; 176 sctx->dbcb_depth_copy_enabled = false; 177 sctx->dbcb_stencil_copy_enabled = false; 178 si_mark_atom_dirty(sctx, &sctx->atoms.s.db_render_state); 179 180 return fully_copied_levels; 181} 182 183void si_blit_decompress_depth(struct pipe_context *ctx, 184 struct si_texture *texture, 185 struct si_texture *staging, 186 unsigned first_level, unsigned last_level, 187 unsigned first_layer, unsigned last_layer, 188 unsigned first_sample, unsigned last_sample) 189{ 190 const struct util_format_description *desc; 191 unsigned planes = 0; 192 193 assert(staging != NULL && "use si_blit_decompress_zs_in_place instead"); 194 195 desc = util_format_description(staging->buffer.b.b.format); 196 197 if (util_format_has_depth(desc)) 198 planes |= PIPE_MASK_Z; 199 if (util_format_has_stencil(desc)) 200 planes |= PIPE_MASK_S; 201 202 si_blit_dbcb_copy( 203 (struct si_context *)ctx, texture, staging, planes, 204 u_bit_consecutive(first_level, last_level - first_level + 1), 205 first_layer, last_layer, first_sample, last_sample); 206} 207 208/* Helper function for si_blit_decompress_zs_in_place. 209 */ 210static void 211si_blit_decompress_zs_planes_in_place(struct si_context *sctx, 212 struct si_texture *texture, 213 unsigned planes, unsigned level_mask, 214 unsigned first_layer, unsigned last_layer) 215{ 216 struct pipe_surface *zsurf, surf_tmpl = {{0}}; 217 unsigned layer, max_layer, checked_last_layer; 218 unsigned fully_decompressed_mask = 0; 219 220 if (!level_mask) 221 return; 222 223 if (planes & PIPE_MASK_S) 224 sctx->db_flush_stencil_inplace = true; 225 if (planes & PIPE_MASK_Z) 226 sctx->db_flush_depth_inplace = true; 227 si_mark_atom_dirty(sctx, &sctx->atoms.s.db_render_state); 228 229 surf_tmpl.format = texture->buffer.b.b.format; 230 231 sctx->decompression_enabled = true; 232 233 while (level_mask) { 234 unsigned level = u_bit_scan(&level_mask); 235 236 surf_tmpl.u.tex.level = level; 237 238 /* The smaller the mipmap level, the less layers there are 239 * as far as 3D textures are concerned. */ 240 max_layer = util_max_layer(&texture->buffer.b.b, level); 241 checked_last_layer = MIN2(last_layer, max_layer); 242 243 for (layer = first_layer; layer <= checked_last_layer; layer++) { 244 surf_tmpl.u.tex.first_layer = layer; 245 surf_tmpl.u.tex.last_layer = layer; 246 247 zsurf = sctx->b.create_surface(&sctx->b, &texture->buffer.b.b, &surf_tmpl); 248 249 si_blitter_begin(sctx, SI_DECOMPRESS); 250 util_blitter_custom_depth_stencil(sctx->blitter, zsurf, NULL, ~0, 251 sctx->custom_dsa_flush, 252 1.0f); 253 si_blitter_end(sctx); 254 255 pipe_surface_reference(&zsurf, NULL); 256 } 257 258 /* The texture will always be dirty if some layers aren't flushed. 259 * I don't think this case occurs often though. */ 260 if (first_layer == 0 && last_layer >= max_layer) { 261 fully_decompressed_mask |= 1u << level; 262 } 263 } 264 265 if (planes & PIPE_MASK_Z) 266 texture->dirty_level_mask &= ~fully_decompressed_mask; 267 if (planes & PIPE_MASK_S) 268 texture->stencil_dirty_level_mask &= ~fully_decompressed_mask; 269 270 sctx->decompression_enabled = false; 271 sctx->db_flush_depth_inplace = false; 272 sctx->db_flush_stencil_inplace = false; 273 si_mark_atom_dirty(sctx, &sctx->atoms.s.db_render_state); 274} 275 276/* Helper function of si_flush_depth_texture: decompress the given levels 277 * of Z and/or S planes in place. 278 */ 279static void 280si_blit_decompress_zs_in_place(struct si_context *sctx, 281 struct si_texture *texture, 282 unsigned levels_z, unsigned levels_s, 283 unsigned first_layer, unsigned last_layer) 284{ 285 unsigned both = levels_z & levels_s; 286 287 /* First, do combined Z & S decompresses for levels that need it. */ 288 if (both) { 289 si_blit_decompress_zs_planes_in_place( 290 sctx, texture, PIPE_MASK_Z | PIPE_MASK_S, 291 both, 292 first_layer, last_layer); 293 levels_z &= ~both; 294 levels_s &= ~both; 295 } 296 297 /* Now do separate Z and S decompresses. */ 298 if (levels_z) { 299 si_blit_decompress_zs_planes_in_place( 300 sctx, texture, PIPE_MASK_Z, 301 levels_z, 302 first_layer, last_layer); 303 } 304 305 if (levels_s) { 306 si_blit_decompress_zs_planes_in_place( 307 sctx, texture, PIPE_MASK_S, 308 levels_s, 309 first_layer, last_layer); 310 } 311} 312 313static void 314si_decompress_depth(struct si_context *sctx, 315 struct si_texture *tex, 316 unsigned required_planes, 317 unsigned first_level, unsigned last_level, 318 unsigned first_layer, unsigned last_layer) 319{ 320 unsigned inplace_planes = 0; 321 unsigned copy_planes = 0; 322 unsigned level_mask = u_bit_consecutive(first_level, last_level - first_level + 1); 323 unsigned levels_z = 0; 324 unsigned levels_s = 0; 325 326 if (required_planes & PIPE_MASK_Z) { 327 levels_z = level_mask & tex->dirty_level_mask; 328 329 if (levels_z) { 330 if (si_can_sample_zs(tex, false)) 331 inplace_planes |= PIPE_MASK_Z; 332 else 333 copy_planes |= PIPE_MASK_Z; 334 } 335 } 336 if (required_planes & PIPE_MASK_S) { 337 levels_s = level_mask & tex->stencil_dirty_level_mask; 338 339 if (levels_s) { 340 if (si_can_sample_zs(tex, true)) 341 inplace_planes |= PIPE_MASK_S; 342 else 343 copy_planes |= PIPE_MASK_S; 344 } 345 } 346 347 if (unlikely(sctx->log)) 348 u_log_printf(sctx->log, 349 "\n------------------------------------------------\n" 350 "Decompress Depth (levels %u - %u, levels Z: 0x%x S: 0x%x)\n\n", 351 first_level, last_level, levels_z, levels_s); 352 353 /* We may have to allocate the flushed texture here when called from 354 * si_decompress_subresource. 355 */ 356 if (copy_planes && 357 (tex->flushed_depth_texture || 358 si_init_flushed_depth_texture(&sctx->b, &tex->buffer.b.b, NULL))) { 359 struct si_texture *dst = tex->flushed_depth_texture; 360 unsigned fully_copied_levels; 361 unsigned levels = 0; 362 363 assert(tex->flushed_depth_texture); 364 365 if (util_format_is_depth_and_stencil(dst->buffer.b.b.format)) 366 copy_planes = PIPE_MASK_Z | PIPE_MASK_S; 367 368 if (copy_planes & PIPE_MASK_Z) { 369 levels |= levels_z; 370 levels_z = 0; 371 } 372 if (copy_planes & PIPE_MASK_S) { 373 levels |= levels_s; 374 levels_s = 0; 375 } 376 377 fully_copied_levels = si_blit_dbcb_copy( 378 sctx, tex, dst, copy_planes, levels, 379 first_layer, last_layer, 380 0, u_max_sample(&tex->buffer.b.b)); 381 382 if (copy_planes & PIPE_MASK_Z) 383 tex->dirty_level_mask &= ~fully_copied_levels; 384 if (copy_planes & PIPE_MASK_S) 385 tex->stencil_dirty_level_mask &= ~fully_copied_levels; 386 } 387 388 if (inplace_planes) { 389 bool has_htile = si_htile_enabled(tex, first_level); 390 bool tc_compat_htile = vi_tc_compat_htile_enabled(tex, first_level); 391 392 /* Don't decompress if there is no HTILE or when HTILE is 393 * TC-compatible. */ 394 if (has_htile && !tc_compat_htile) { 395 si_blit_decompress_zs_in_place( 396 sctx, tex, 397 levels_z, levels_s, 398 first_layer, last_layer); 399 } else { 400 /* This is only a cache flush. 401 * 402 * Only clear the mask that we are flushing, because 403 * si_make_DB_shader_coherent() treats different levels 404 * and depth and stencil differently. 405 */ 406 if (inplace_planes & PIPE_MASK_Z) 407 tex->dirty_level_mask &= ~levels_z; 408 if (inplace_planes & PIPE_MASK_S) 409 tex->stencil_dirty_level_mask &= ~levels_s; 410 } 411 412 /* Only in-place decompression needs to flush DB caches, or 413 * when we don't decompress but TC-compatible planes are dirty. 414 */ 415 si_make_DB_shader_coherent(sctx, tex->buffer.b.b.nr_samples, 416 inplace_planes & PIPE_MASK_S, 417 tc_compat_htile); 418 } 419 /* set_framebuffer_state takes care of coherency for single-sample. 420 * The DB->CB copy uses CB for the final writes. 421 */ 422 if (copy_planes && tex->buffer.b.b.nr_samples > 1) 423 si_make_CB_shader_coherent(sctx, tex->buffer.b.b.nr_samples, 424 false, true /* no DCC */); 425} 426 427static void 428si_decompress_sampler_depth_textures(struct si_context *sctx, 429 struct si_samplers *textures) 430{ 431 unsigned i; 432 unsigned mask = textures->needs_depth_decompress_mask; 433 434 while (mask) { 435 struct pipe_sampler_view *view; 436 struct si_sampler_view *sview; 437 struct si_texture *tex; 438 439 i = u_bit_scan(&mask); 440 441 view = textures->views[i]; 442 assert(view); 443 sview = (struct si_sampler_view*)view; 444 445 tex = (struct si_texture *)view->texture; 446 assert(tex->db_compatible); 447 448 si_decompress_depth(sctx, tex, 449 sview->is_stencil_sampler ? PIPE_MASK_S : PIPE_MASK_Z, 450 view->u.tex.first_level, view->u.tex.last_level, 451 0, util_max_layer(&tex->buffer.b.b, view->u.tex.first_level)); 452 } 453} 454 455static void si_blit_decompress_color(struct si_context *sctx, 456 struct si_texture *tex, 457 unsigned first_level, unsigned last_level, 458 unsigned first_layer, unsigned last_layer, 459 bool need_dcc_decompress) 460{ 461 void* custom_blend; 462 unsigned layer, checked_last_layer, max_layer; 463 unsigned level_mask = 464 u_bit_consecutive(first_level, last_level - first_level + 1); 465 466 if (!need_dcc_decompress) 467 level_mask &= tex->dirty_level_mask; 468 if (!level_mask) 469 return; 470 471 if (unlikely(sctx->log)) 472 u_log_printf(sctx->log, 473 "\n------------------------------------------------\n" 474 "Decompress Color (levels %u - %u, mask 0x%x)\n\n", 475 first_level, last_level, level_mask); 476 477 if (need_dcc_decompress) { 478 custom_blend = sctx->custom_blend_dcc_decompress; 479 480 assert(tex->dcc_offset); 481 482 /* disable levels without DCC */ 483 for (int i = first_level; i <= last_level; i++) { 484 if (!vi_dcc_enabled(tex, i)) 485 level_mask &= ~(1 << i); 486 } 487 } else if (tex->surface.fmask_size) { 488 custom_blend = sctx->custom_blend_fmask_decompress; 489 } else { 490 custom_blend = sctx->custom_blend_eliminate_fastclear; 491 } 492 493 sctx->decompression_enabled = true; 494 495 while (level_mask) { 496 unsigned level = u_bit_scan(&level_mask); 497 498 /* The smaller the mipmap level, the less layers there are 499 * as far as 3D textures are concerned. */ 500 max_layer = util_max_layer(&tex->buffer.b.b, level); 501 checked_last_layer = MIN2(last_layer, max_layer); 502 503 for (layer = first_layer; layer <= checked_last_layer; layer++) { 504 struct pipe_surface *cbsurf, surf_tmpl; 505 506 surf_tmpl.format = tex->buffer.b.b.format; 507 surf_tmpl.u.tex.level = level; 508 surf_tmpl.u.tex.first_layer = layer; 509 surf_tmpl.u.tex.last_layer = layer; 510 cbsurf = sctx->b.create_surface(&sctx->b, &tex->buffer.b.b, &surf_tmpl); 511 512 /* Required before and after FMASK and DCC_DECOMPRESS. */ 513 if (custom_blend == sctx->custom_blend_fmask_decompress || 514 custom_blend == sctx->custom_blend_dcc_decompress) 515 sctx->flags |= SI_CONTEXT_FLUSH_AND_INV_CB; 516 517 si_blitter_begin(sctx, SI_DECOMPRESS); 518 util_blitter_custom_color(sctx->blitter, cbsurf, custom_blend); 519 si_blitter_end(sctx); 520 521 if (custom_blend == sctx->custom_blend_fmask_decompress || 522 custom_blend == sctx->custom_blend_dcc_decompress) 523 sctx->flags |= SI_CONTEXT_FLUSH_AND_INV_CB; 524 525 pipe_surface_reference(&cbsurf, NULL); 526 } 527 528 /* The texture will always be dirty if some layers aren't flushed. 529 * I don't think this case occurs often though. */ 530 if (first_layer == 0 && last_layer >= max_layer) { 531 tex->dirty_level_mask &= ~(1 << level); 532 } 533 } 534 535 sctx->decompression_enabled = false; 536 si_make_CB_shader_coherent(sctx, tex->buffer.b.b.nr_samples, 537 vi_dcc_enabled(tex, first_level), 538 tex->surface.u.gfx9.dcc.pipe_aligned); 539} 540 541static void 542si_decompress_color_texture(struct si_context *sctx, struct si_texture *tex, 543 unsigned first_level, unsigned last_level) 544{ 545 /* CMASK or DCC can be discarded and we can still end up here. */ 546 if (!tex->cmask_buffer && !tex->surface.fmask_size && !tex->dcc_offset) 547 return; 548 549 si_blit_decompress_color(sctx, tex, first_level, last_level, 0, 550 util_max_layer(&tex->buffer.b.b, first_level), 551 false); 552} 553 554static void 555si_decompress_sampler_color_textures(struct si_context *sctx, 556 struct si_samplers *textures) 557{ 558 unsigned i; 559 unsigned mask = textures->needs_color_decompress_mask; 560 561 while (mask) { 562 struct pipe_sampler_view *view; 563 struct si_texture *tex; 564 565 i = u_bit_scan(&mask); 566 567 view = textures->views[i]; 568 assert(view); 569 570 tex = (struct si_texture *)view->texture; 571 572 si_decompress_color_texture(sctx, tex, view->u.tex.first_level, 573 view->u.tex.last_level); 574 } 575} 576 577static void 578si_decompress_image_color_textures(struct si_context *sctx, 579 struct si_images *images) 580{ 581 unsigned i; 582 unsigned mask = images->needs_color_decompress_mask; 583 584 while (mask) { 585 const struct pipe_image_view *view; 586 struct si_texture *tex; 587 588 i = u_bit_scan(&mask); 589 590 view = &images->views[i]; 591 assert(view->resource->target != PIPE_BUFFER); 592 593 tex = (struct si_texture *)view->resource; 594 595 si_decompress_color_texture(sctx, tex, view->u.tex.level, 596 view->u.tex.level); 597 } 598} 599 600static void si_check_render_feedback_texture(struct si_context *sctx, 601 struct si_texture *tex, 602 unsigned first_level, 603 unsigned last_level, 604 unsigned first_layer, 605 unsigned last_layer) 606{ 607 bool render_feedback = false; 608 609 if (!tex->dcc_offset) 610 return; 611 612 for (unsigned j = 0; j < sctx->framebuffer.state.nr_cbufs; ++j) { 613 struct si_surface * surf; 614 615 if (!sctx->framebuffer.state.cbufs[j]) 616 continue; 617 618 surf = (struct si_surface*)sctx->framebuffer.state.cbufs[j]; 619 620 if (tex == (struct si_texture *)surf->base.texture && 621 surf->base.u.tex.level >= first_level && 622 surf->base.u.tex.level <= last_level && 623 surf->base.u.tex.first_layer <= last_layer && 624 surf->base.u.tex.last_layer >= first_layer) { 625 render_feedback = true; 626 break; 627 } 628 } 629 630 if (render_feedback) 631 si_texture_disable_dcc(sctx, tex); 632} 633 634static void si_check_render_feedback_textures(struct si_context *sctx, 635 struct si_samplers *textures) 636{ 637 uint32_t mask = textures->enabled_mask; 638 639 while (mask) { 640 const struct pipe_sampler_view *view; 641 struct si_texture *tex; 642 643 unsigned i = u_bit_scan(&mask); 644 645 view = textures->views[i]; 646 if(view->texture->target == PIPE_BUFFER) 647 continue; 648 649 tex = (struct si_texture *)view->texture; 650 651 si_check_render_feedback_texture(sctx, tex, 652 view->u.tex.first_level, 653 view->u.tex.last_level, 654 view->u.tex.first_layer, 655 view->u.tex.last_layer); 656 } 657} 658 659static void si_check_render_feedback_images(struct si_context *sctx, 660 struct si_images *images) 661{ 662 uint32_t mask = images->enabled_mask; 663 664 while (mask) { 665 const struct pipe_image_view *view; 666 struct si_texture *tex; 667 668 unsigned i = u_bit_scan(&mask); 669 670 view = &images->views[i]; 671 if (view->resource->target == PIPE_BUFFER) 672 continue; 673 674 tex = (struct si_texture *)view->resource; 675 676 si_check_render_feedback_texture(sctx, tex, 677 view->u.tex.level, 678 view->u.tex.level, 679 view->u.tex.first_layer, 680 view->u.tex.last_layer); 681 } 682} 683 684static void si_check_render_feedback_resident_textures(struct si_context *sctx) 685{ 686 util_dynarray_foreach(&sctx->resident_tex_handles, 687 struct si_texture_handle *, tex_handle) { 688 struct pipe_sampler_view *view; 689 struct si_texture *tex; 690 691 view = (*tex_handle)->view; 692 if (view->texture->target == PIPE_BUFFER) 693 continue; 694 695 tex = (struct si_texture *)view->texture; 696 697 si_check_render_feedback_texture(sctx, tex, 698 view->u.tex.first_level, 699 view->u.tex.last_level, 700 view->u.tex.first_layer, 701 view->u.tex.last_layer); 702 } 703} 704 705static void si_check_render_feedback_resident_images(struct si_context *sctx) 706{ 707 util_dynarray_foreach(&sctx->resident_img_handles, 708 struct si_image_handle *, img_handle) { 709 struct pipe_image_view *view; 710 struct si_texture *tex; 711 712 view = &(*img_handle)->view; 713 if (view->resource->target == PIPE_BUFFER) 714 continue; 715 716 tex = (struct si_texture *)view->resource; 717 718 si_check_render_feedback_texture(sctx, tex, 719 view->u.tex.level, 720 view->u.tex.level, 721 view->u.tex.first_layer, 722 view->u.tex.last_layer); 723 } 724} 725 726static void si_check_render_feedback(struct si_context *sctx) 727{ 728 if (!sctx->need_check_render_feedback) 729 return; 730 731 /* There is no render feedback if color writes are disabled. 732 * (e.g. a pixel shader with image stores) 733 */ 734 if (!si_get_total_colormask(sctx)) 735 return; 736 737 for (int i = 0; i < SI_NUM_SHADERS; ++i) { 738 si_check_render_feedback_images(sctx, &sctx->images[i]); 739 si_check_render_feedback_textures(sctx, &sctx->samplers[i]); 740 } 741 742 si_check_render_feedback_resident_images(sctx); 743 si_check_render_feedback_resident_textures(sctx); 744 745 sctx->need_check_render_feedback = false; 746} 747 748static void si_decompress_resident_textures(struct si_context *sctx) 749{ 750 util_dynarray_foreach(&sctx->resident_tex_needs_color_decompress, 751 struct si_texture_handle *, tex_handle) { 752 struct pipe_sampler_view *view = (*tex_handle)->view; 753 struct si_texture *tex = (struct si_texture *)view->texture; 754 755 si_decompress_color_texture(sctx, tex, view->u.tex.first_level, 756 view->u.tex.last_level); 757 } 758 759 util_dynarray_foreach(&sctx->resident_tex_needs_depth_decompress, 760 struct si_texture_handle *, tex_handle) { 761 struct pipe_sampler_view *view = (*tex_handle)->view; 762 struct si_sampler_view *sview = (struct si_sampler_view *)view; 763 struct si_texture *tex = (struct si_texture *)view->texture; 764 765 si_decompress_depth(sctx, tex, 766 sview->is_stencil_sampler ? PIPE_MASK_S : PIPE_MASK_Z, 767 view->u.tex.first_level, view->u.tex.last_level, 768 0, util_max_layer(&tex->buffer.b.b, view->u.tex.first_level)); 769 } 770} 771 772static void si_decompress_resident_images(struct si_context *sctx) 773{ 774 util_dynarray_foreach(&sctx->resident_img_needs_color_decompress, 775 struct si_image_handle *, img_handle) { 776 struct pipe_image_view *view = &(*img_handle)->view; 777 struct si_texture *tex = (struct si_texture *)view->resource; 778 779 si_decompress_color_texture(sctx, tex, view->u.tex.level, 780 view->u.tex.level); 781 } 782} 783 784void si_decompress_textures(struct si_context *sctx, unsigned shader_mask) 785{ 786 unsigned compressed_colortex_counter, mask; 787 788 if (sctx->blitter->running) 789 return; 790 791 /* Update the compressed_colortex_mask if necessary. */ 792 compressed_colortex_counter = p_atomic_read(&sctx->screen->compressed_colortex_counter); 793 if (compressed_colortex_counter != sctx->last_compressed_colortex_counter) { 794 sctx->last_compressed_colortex_counter = compressed_colortex_counter; 795 si_update_needs_color_decompress_masks(sctx); 796 } 797 798 /* Decompress color & depth textures if needed. */ 799 mask = sctx->shader_needs_decompress_mask & shader_mask; 800 while (mask) { 801 unsigned i = u_bit_scan(&mask); 802 803 if (sctx->samplers[i].needs_depth_decompress_mask) { 804 si_decompress_sampler_depth_textures(sctx, &sctx->samplers[i]); 805 } 806 if (sctx->samplers[i].needs_color_decompress_mask) { 807 si_decompress_sampler_color_textures(sctx, &sctx->samplers[i]); 808 } 809 if (sctx->images[i].needs_color_decompress_mask) { 810 si_decompress_image_color_textures(sctx, &sctx->images[i]); 811 } 812 } 813 814 if (shader_mask & u_bit_consecutive(0, SI_NUM_GRAPHICS_SHADERS)) { 815 if (sctx->uses_bindless_samplers) 816 si_decompress_resident_textures(sctx); 817 if (sctx->uses_bindless_images) 818 si_decompress_resident_images(sctx); 819 820 if (sctx->ps_uses_fbfetch) { 821 struct pipe_surface *cb0 = sctx->framebuffer.state.cbufs[0]; 822 si_decompress_color_texture(sctx, 823 (struct si_texture*)cb0->texture, 824 cb0->u.tex.first_layer, 825 cb0->u.tex.last_layer); 826 } 827 828 si_check_render_feedback(sctx); 829 } else if (shader_mask & (1 << PIPE_SHADER_COMPUTE)) { 830 if (sctx->cs_shader_state.program->uses_bindless_samplers) 831 si_decompress_resident_textures(sctx); 832 if (sctx->cs_shader_state.program->uses_bindless_images) 833 si_decompress_resident_images(sctx); 834 } 835} 836 837/* Helper for decompressing a portion of a color or depth resource before 838 * blitting if any decompression is needed. 839 * The driver doesn't decompress resources automatically while u_blitter is 840 * rendering. */ 841static void si_decompress_subresource(struct pipe_context *ctx, 842 struct pipe_resource *tex, 843 unsigned planes, unsigned level, 844 unsigned first_layer, unsigned last_layer) 845{ 846 struct si_context *sctx = (struct si_context *)ctx; 847 struct si_texture *stex = (struct si_texture*)tex; 848 849 if (stex->db_compatible) { 850 planes &= PIPE_MASK_Z | PIPE_MASK_S; 851 852 if (!stex->surface.has_stencil) 853 planes &= ~PIPE_MASK_S; 854 855 /* If we've rendered into the framebuffer and it's a blitting 856 * source, make sure the decompression pass is invoked 857 * by dirtying the framebuffer. 858 */ 859 if (sctx->framebuffer.state.zsbuf && 860 sctx->framebuffer.state.zsbuf->u.tex.level == level && 861 sctx->framebuffer.state.zsbuf->texture == tex) 862 si_update_fb_dirtiness_after_rendering(sctx); 863 864 si_decompress_depth(sctx, stex, planes, 865 level, level, 866 first_layer, last_layer); 867 } else if (stex->surface.fmask_size || stex->cmask_buffer || stex->dcc_offset) { 868 /* If we've rendered into the framebuffer and it's a blitting 869 * source, make sure the decompression pass is invoked 870 * by dirtying the framebuffer. 871 */ 872 for (unsigned i = 0; i < sctx->framebuffer.state.nr_cbufs; i++) { 873 if (sctx->framebuffer.state.cbufs[i] && 874 sctx->framebuffer.state.cbufs[i]->u.tex.level == level && 875 sctx->framebuffer.state.cbufs[i]->texture == tex) { 876 si_update_fb_dirtiness_after_rendering(sctx); 877 break; 878 } 879 } 880 881 si_blit_decompress_color(sctx, stex, level, level, 882 first_layer, last_layer, false); 883 } 884} 885 886struct texture_orig_info { 887 unsigned format; 888 unsigned width0; 889 unsigned height0; 890 unsigned npix_x; 891 unsigned npix_y; 892 unsigned npix0_x; 893 unsigned npix0_y; 894}; 895 896void si_resource_copy_region(struct pipe_context *ctx, 897 struct pipe_resource *dst, 898 unsigned dst_level, 899 unsigned dstx, unsigned dsty, unsigned dstz, 900 struct pipe_resource *src, 901 unsigned src_level, 902 const struct pipe_box *src_box) 903{ 904 struct si_context *sctx = (struct si_context *)ctx; 905 struct si_texture *ssrc = (struct si_texture*)src; 906 struct si_texture *sdst = (struct si_texture*)dst; 907 struct pipe_surface *dst_view, dst_templ; 908 struct pipe_sampler_view src_templ, *src_view; 909 unsigned dst_width, dst_height, src_width0, src_height0; 910 unsigned dst_width0, dst_height0, src_force_level = 0; 911 struct pipe_box sbox, dstbox; 912 913 /* Handle buffers first. */ 914 if (dst->target == PIPE_BUFFER && src->target == PIPE_BUFFER) { 915 si_copy_buffer(sctx, dst, src, dstx, src_box->x, src_box->width); 916 return; 917 } 918 919 if (!util_format_is_compressed(src->format) && 920 !util_format_is_compressed(dst->format) && 921 !util_format_is_depth_or_stencil(src->format) && 922 src->nr_samples <= 1 && 923 !sdst->dcc_offset && 924 !(dst->target != src->target && 925 (src->target == PIPE_TEXTURE_1D_ARRAY || dst->target == PIPE_TEXTURE_1D_ARRAY))) { 926 si_compute_copy_image(sctx, dst, dst_level, src, src_level, dstx, dsty, dstz, src_box); 927 return; 928 } 929 930 assert(u_max_sample(dst) == u_max_sample(src)); 931 932 /* The driver doesn't decompress resources automatically while 933 * u_blitter is rendering. */ 934 si_decompress_subresource(ctx, src, PIPE_MASK_RGBAZS, src_level, 935 src_box->z, src_box->z + src_box->depth - 1); 936 937 dst_width = u_minify(dst->width0, dst_level); 938 dst_height = u_minify(dst->height0, dst_level); 939 dst_width0 = dst->width0; 940 dst_height0 = dst->height0; 941 src_width0 = src->width0; 942 src_height0 = src->height0; 943 944 util_blitter_default_dst_texture(&dst_templ, dst, dst_level, dstz); 945 util_blitter_default_src_texture(sctx->blitter, &src_templ, src, src_level); 946 947 if (util_format_is_compressed(src->format) || 948 util_format_is_compressed(dst->format)) { 949 unsigned blocksize = ssrc->surface.bpe; 950 951 if (blocksize == 8) 952 src_templ.format = PIPE_FORMAT_R16G16B16A16_UINT; /* 64-bit block */ 953 else 954 src_templ.format = PIPE_FORMAT_R32G32B32A32_UINT; /* 128-bit block */ 955 dst_templ.format = src_templ.format; 956 957 dst_width = util_format_get_nblocksx(dst->format, dst_width); 958 dst_height = util_format_get_nblocksy(dst->format, dst_height); 959 dst_width0 = util_format_get_nblocksx(dst->format, dst_width0); 960 dst_height0 = util_format_get_nblocksy(dst->format, dst_height0); 961 src_width0 = util_format_get_nblocksx(src->format, src_width0); 962 src_height0 = util_format_get_nblocksy(src->format, src_height0); 963 964 dstx = util_format_get_nblocksx(dst->format, dstx); 965 dsty = util_format_get_nblocksy(dst->format, dsty); 966 967 sbox.x = util_format_get_nblocksx(src->format, src_box->x); 968 sbox.y = util_format_get_nblocksy(src->format, src_box->y); 969 sbox.z = src_box->z; 970 sbox.width = util_format_get_nblocksx(src->format, src_box->width); 971 sbox.height = util_format_get_nblocksy(src->format, src_box->height); 972 sbox.depth = src_box->depth; 973 src_box = &sbox; 974 975 src_force_level = src_level; 976 } else if (!util_blitter_is_copy_supported(sctx->blitter, dst, src)) { 977 if (util_format_is_subsampled_422(src->format)) { 978 src_templ.format = PIPE_FORMAT_R8G8B8A8_UINT; 979 dst_templ.format = PIPE_FORMAT_R8G8B8A8_UINT; 980 981 dst_width = util_format_get_nblocksx(dst->format, dst_width); 982 dst_width0 = util_format_get_nblocksx(dst->format, dst_width0); 983 src_width0 = util_format_get_nblocksx(src->format, src_width0); 984 985 dstx = util_format_get_nblocksx(dst->format, dstx); 986 987 sbox = *src_box; 988 sbox.x = util_format_get_nblocksx(src->format, src_box->x); 989 sbox.width = util_format_get_nblocksx(src->format, src_box->width); 990 src_box = &sbox; 991 } else { 992 unsigned blocksize = ssrc->surface.bpe; 993 994 switch (blocksize) { 995 case 1: 996 dst_templ.format = PIPE_FORMAT_R8_UNORM; 997 src_templ.format = PIPE_FORMAT_R8_UNORM; 998 break; 999 case 2: 1000 dst_templ.format = PIPE_FORMAT_R8G8_UNORM; 1001 src_templ.format = PIPE_FORMAT_R8G8_UNORM; 1002 break; 1003 case 4: 1004 dst_templ.format = PIPE_FORMAT_R8G8B8A8_UNORM; 1005 src_templ.format = PIPE_FORMAT_R8G8B8A8_UNORM; 1006 break; 1007 case 8: 1008 dst_templ.format = PIPE_FORMAT_R16G16B16A16_UINT; 1009 src_templ.format = PIPE_FORMAT_R16G16B16A16_UINT; 1010 break; 1011 case 16: 1012 dst_templ.format = PIPE_FORMAT_R32G32B32A32_UINT; 1013 src_templ.format = PIPE_FORMAT_R32G32B32A32_UINT; 1014 break; 1015 default: 1016 fprintf(stderr, "Unhandled format %s with blocksize %u\n", 1017 util_format_short_name(src->format), blocksize); 1018 assert(0); 1019 } 1020 } 1021 } 1022 1023 /* SNORM8 blitting has precision issues on some chips. Use the SINT 1024 * equivalent instead, which doesn't force DCC decompression. 1025 * Note that some chips avoid this issue by using SDMA. 1026 */ 1027 if (util_format_is_snorm8(dst_templ.format)) { 1028 dst_templ.format = src_templ.format = 1029 util_format_snorm8_to_sint8(dst_templ.format); 1030 } 1031 1032 vi_disable_dcc_if_incompatible_format(sctx, dst, dst_level, 1033 dst_templ.format); 1034 vi_disable_dcc_if_incompatible_format(sctx, src, src_level, 1035 src_templ.format); 1036 1037 /* Initialize the surface. */ 1038 dst_view = si_create_surface_custom(ctx, dst, &dst_templ, 1039 dst_width0, dst_height0, 1040 dst_width, dst_height); 1041 1042 /* Initialize the sampler view. */ 1043 src_view = si_create_sampler_view_custom(ctx, src, &src_templ, 1044 src_width0, src_height0, 1045 src_force_level); 1046 1047 u_box_3d(dstx, dsty, dstz, abs(src_box->width), abs(src_box->height), 1048 abs(src_box->depth), &dstbox); 1049 1050 /* Copy. */ 1051 si_blitter_begin(sctx, SI_COPY); 1052 util_blitter_blit_generic(sctx->blitter, dst_view, &dstbox, 1053 src_view, src_box, src_width0, src_height0, 1054 PIPE_MASK_RGBAZS, PIPE_TEX_FILTER_NEAREST, NULL, 1055 false); 1056 si_blitter_end(sctx); 1057 1058 pipe_surface_reference(&dst_view, NULL); 1059 pipe_sampler_view_reference(&src_view, NULL); 1060} 1061 1062static void si_do_CB_resolve(struct si_context *sctx, 1063 const struct pipe_blit_info *info, 1064 struct pipe_resource *dst, 1065 unsigned dst_level, unsigned dst_z, 1066 enum pipe_format format) 1067{ 1068 /* Required before and after CB_RESOLVE. */ 1069 sctx->flags |= SI_CONTEXT_FLUSH_AND_INV_CB; 1070 1071 si_blitter_begin(sctx, SI_COLOR_RESOLVE | 1072 (info->render_condition_enable ? 0 : SI_DISABLE_RENDER_COND)); 1073 util_blitter_custom_resolve_color(sctx->blitter, dst, dst_level, dst_z, 1074 info->src.resource, info->src.box.z, 1075 ~0, sctx->custom_blend_resolve, 1076 format); 1077 si_blitter_end(sctx); 1078 1079 /* Flush caches for possible texturing. */ 1080 si_make_CB_shader_coherent(sctx, 1, false, true /* no DCC */); 1081} 1082 1083static bool do_hardware_msaa_resolve(struct pipe_context *ctx, 1084 const struct pipe_blit_info *info) 1085{ 1086 struct si_context *sctx = (struct si_context*)ctx; 1087 struct si_texture *src = (struct si_texture*)info->src.resource; 1088 struct si_texture *dst = (struct si_texture*)info->dst.resource; 1089 MAYBE_UNUSED struct si_texture *stmp; 1090 unsigned dst_width = u_minify(info->dst.resource->width0, info->dst.level); 1091 unsigned dst_height = u_minify(info->dst.resource->height0, info->dst.level); 1092 enum pipe_format format = info->src.format; 1093 struct pipe_resource *tmp, templ; 1094 struct pipe_blit_info blit; 1095 1096 /* Check basic requirements for hw resolve. */ 1097 if (!(info->src.resource->nr_samples > 1 && 1098 info->dst.resource->nr_samples <= 1 && 1099 !util_format_is_pure_integer(format) && 1100 !util_format_is_depth_or_stencil(format) && 1101 util_max_layer(info->src.resource, 0) == 0)) 1102 return false; 1103 1104 /* Hardware MSAA resolve doesn't work if SPI format = NORM16_ABGR and 1105 * the format is R16G16. Use R16A16, which does work. 1106 */ 1107 if (format == PIPE_FORMAT_R16G16_UNORM) 1108 format = PIPE_FORMAT_R16A16_UNORM; 1109 if (format == PIPE_FORMAT_R16G16_SNORM) 1110 format = PIPE_FORMAT_R16A16_SNORM; 1111 1112 /* Check the remaining requirements for hw resolve. */ 1113 if (util_max_layer(info->dst.resource, info->dst.level) == 0 && 1114 !info->scissor_enable && 1115 (info->mask & PIPE_MASK_RGBA) == PIPE_MASK_RGBA && 1116 util_is_format_compatible(util_format_description(info->src.format), 1117 util_format_description(info->dst.format)) && 1118 dst_width == info->src.resource->width0 && 1119 dst_height == info->src.resource->height0 && 1120 info->dst.box.x == 0 && 1121 info->dst.box.y == 0 && 1122 info->dst.box.width == dst_width && 1123 info->dst.box.height == dst_height && 1124 info->dst.box.depth == 1 && 1125 info->src.box.x == 0 && 1126 info->src.box.y == 0 && 1127 info->src.box.width == dst_width && 1128 info->src.box.height == dst_height && 1129 info->src.box.depth == 1 && 1130 !dst->surface.is_linear && 1131 (!dst->cmask_buffer || !dst->dirty_level_mask)) { /* dst cannot be fast-cleared */ 1132 /* Check the last constraint. */ 1133 if (src->surface.micro_tile_mode != dst->surface.micro_tile_mode) { 1134 /* The next fast clear will switch to this mode to 1135 * get direct hw resolve next time if the mode is 1136 * different now. 1137 */ 1138 src->last_msaa_resolve_target_micro_mode = 1139 dst->surface.micro_tile_mode; 1140 goto resolve_to_temp; 1141 } 1142 1143 /* Resolving into a surface with DCC is unsupported. Since 1144 * it's being overwritten anyway, clear it to uncompressed. 1145 * This is still the fastest codepath even with this clear. 1146 */ 1147 if (vi_dcc_enabled(dst, info->dst.level)) { 1148 /* TODO: Implement per-level DCC clears for GFX9. */ 1149 if (sctx->chip_class >= GFX9 && 1150 info->dst.resource->last_level != 0) 1151 goto resolve_to_temp; 1152 1153 /* This can happen with mipmapping. */ 1154 if (sctx->chip_class == VI && 1155 !dst->surface.u.legacy.level[info->dst.level].dcc_fast_clear_size) 1156 goto resolve_to_temp; 1157 1158 vi_dcc_clear_level(sctx, dst, info->dst.level, 1159 0xFFFFFFFF); 1160 dst->dirty_level_mask &= ~(1 << info->dst.level); 1161 } 1162 1163 /* Resolve directly from src to dst. */ 1164 si_do_CB_resolve(sctx, info, info->dst.resource, 1165 info->dst.level, info->dst.box.z, format); 1166 return true; 1167 } 1168 1169resolve_to_temp: 1170 /* Shader-based resolve is VERY SLOW. Instead, resolve into 1171 * a temporary texture and blit. 1172 */ 1173 memset(&templ, 0, sizeof(templ)); 1174 templ.target = PIPE_TEXTURE_2D; 1175 templ.format = info->src.resource->format; 1176 templ.width0 = info->src.resource->width0; 1177 templ.height0 = info->src.resource->height0; 1178 templ.depth0 = 1; 1179 templ.array_size = 1; 1180 templ.usage = PIPE_USAGE_DEFAULT; 1181 templ.flags = SI_RESOURCE_FLAG_FORCE_MSAA_TILING | 1182 SI_RESOURCE_FLAG_DISABLE_DCC; 1183 1184 /* The src and dst microtile modes must be the same. */ 1185 if (src->surface.micro_tile_mode == RADEON_MICRO_MODE_DISPLAY) 1186 templ.bind = PIPE_BIND_SCANOUT; 1187 else 1188 templ.bind = 0; 1189 1190 tmp = ctx->screen->resource_create(ctx->screen, &templ); 1191 if (!tmp) 1192 return false; 1193 stmp = (struct si_texture*)tmp; 1194 1195 assert(!stmp->surface.is_linear); 1196 assert(src->surface.micro_tile_mode == stmp->surface.micro_tile_mode); 1197 1198 /* resolve */ 1199 si_do_CB_resolve(sctx, info, tmp, 0, 0, format); 1200 1201 /* blit */ 1202 blit = *info; 1203 blit.src.resource = tmp; 1204 blit.src.box.z = 0; 1205 1206 si_blitter_begin(sctx, SI_BLIT | 1207 (info->render_condition_enable ? 0 : SI_DISABLE_RENDER_COND)); 1208 util_blitter_blit(sctx->blitter, &blit); 1209 si_blitter_end(sctx); 1210 1211 pipe_resource_reference(&tmp, NULL); 1212 return true; 1213} 1214 1215static void si_blit(struct pipe_context *ctx, 1216 const struct pipe_blit_info *info) 1217{ 1218 struct si_context *sctx = (struct si_context*)ctx; 1219 struct si_texture *dst = (struct si_texture *)info->dst.resource; 1220 1221 if (do_hardware_msaa_resolve(ctx, info)) { 1222 return; 1223 } 1224 1225 /* Using SDMA for copying to a linear texture in GTT is much faster. 1226 * This improves DRI PRIME performance. 1227 * 1228 * resource_copy_region can't do this yet, because dma_copy calls it 1229 * on failure (recursion). 1230 */ 1231 if (dst->surface.is_linear && 1232 sctx->dma_copy && 1233 util_can_blit_via_copy_region(info, false)) { 1234 sctx->dma_copy(ctx, info->dst.resource, info->dst.level, 1235 info->dst.box.x, info->dst.box.y, 1236 info->dst.box.z, 1237 info->src.resource, info->src.level, 1238 &info->src.box); 1239 return; 1240 } 1241 1242 assert(util_blitter_is_blit_supported(sctx->blitter, info)); 1243 1244 /* The driver doesn't decompress resources automatically while 1245 * u_blitter is rendering. */ 1246 vi_disable_dcc_if_incompatible_format(sctx, info->src.resource, 1247 info->src.level, 1248 info->src.format); 1249 vi_disable_dcc_if_incompatible_format(sctx, info->dst.resource, 1250 info->dst.level, 1251 info->dst.format); 1252 si_decompress_subresource(ctx, info->src.resource, info->mask, 1253 info->src.level, 1254 info->src.box.z, 1255 info->src.box.z + info->src.box.depth - 1); 1256 1257 if (sctx->screen->debug_flags & DBG(FORCE_DMA) && 1258 util_try_blit_via_copy_region(ctx, info)) 1259 return; 1260 1261 si_blitter_begin(sctx, SI_BLIT | 1262 (info->render_condition_enable ? 0 : SI_DISABLE_RENDER_COND)); 1263 util_blitter_blit(sctx->blitter, info); 1264 si_blitter_end(sctx); 1265} 1266 1267static boolean si_generate_mipmap(struct pipe_context *ctx, 1268 struct pipe_resource *tex, 1269 enum pipe_format format, 1270 unsigned base_level, unsigned last_level, 1271 unsigned first_layer, unsigned last_layer) 1272{ 1273 struct si_context *sctx = (struct si_context*)ctx; 1274 struct si_texture *stex = (struct si_texture *)tex; 1275 1276 if (!util_blitter_is_copy_supported(sctx->blitter, tex, tex)) 1277 return false; 1278 1279 /* The driver doesn't decompress resources automatically while 1280 * u_blitter is rendering. */ 1281 vi_disable_dcc_if_incompatible_format(sctx, tex, base_level, 1282 format); 1283 si_decompress_subresource(ctx, tex, PIPE_MASK_RGBAZS, 1284 base_level, first_layer, last_layer); 1285 1286 /* Clear dirty_level_mask for the levels that will be overwritten. */ 1287 assert(base_level < last_level); 1288 stex->dirty_level_mask &= ~u_bit_consecutive(base_level + 1, 1289 last_level - base_level); 1290 1291 sctx->generate_mipmap_for_depth = stex->is_depth; 1292 1293 si_blitter_begin(sctx, SI_BLIT | SI_DISABLE_RENDER_COND); 1294 util_blitter_generate_mipmap(sctx->blitter, tex, format, 1295 base_level, last_level, 1296 first_layer, last_layer); 1297 si_blitter_end(sctx); 1298 1299 sctx->generate_mipmap_for_depth = false; 1300 return true; 1301} 1302 1303static void si_flush_resource(struct pipe_context *ctx, 1304 struct pipe_resource *res) 1305{ 1306 struct si_context *sctx = (struct si_context*)ctx; 1307 struct si_texture *tex = (struct si_texture*)res; 1308 1309 assert(res->target != PIPE_BUFFER); 1310 assert(!tex->dcc_separate_buffer || tex->dcc_gather_statistics); 1311 1312 /* st/dri calls flush twice per frame (not a bug), this prevents double 1313 * decompression. */ 1314 if (tex->dcc_separate_buffer && !tex->separate_dcc_dirty) 1315 return; 1316 1317 if (!tex->is_depth && (tex->cmask_buffer || tex->dcc_offset)) { 1318 si_blit_decompress_color(sctx, tex, 0, res->last_level, 1319 0, util_max_layer(res, 0), 1320 tex->dcc_separate_buffer != NULL); 1321 1322 if (tex->display_dcc_offset) 1323 si_retile_dcc(sctx, tex); 1324 } 1325 1326 /* Always do the analysis even if DCC is disabled at the moment. */ 1327 if (tex->dcc_gather_statistics) { 1328 bool separate_dcc_dirty = tex->separate_dcc_dirty; 1329 1330 /* If the color buffer hasn't been unbound and fast clear hasn't 1331 * been used, separate_dcc_dirty is false, but there may have been 1332 * new rendering. Check if the color buffer is bound and assume 1333 * it's dirty. 1334 * 1335 * Note that DRI2 never unbinds window colorbuffers, which means 1336 * the DCC pipeline statistics query would never be re-set and would 1337 * keep adding new results until all free memory is exhausted if we 1338 * didn't do this. 1339 */ 1340 if (!separate_dcc_dirty) { 1341 for (unsigned i = 0; i < sctx->framebuffer.state.nr_cbufs; i++) { 1342 if (sctx->framebuffer.state.cbufs[i] && 1343 sctx->framebuffer.state.cbufs[i]->texture == res) { 1344 separate_dcc_dirty = true; 1345 break; 1346 } 1347 } 1348 } 1349 1350 if (separate_dcc_dirty) { 1351 tex->separate_dcc_dirty = false; 1352 vi_separate_dcc_process_and_reset_stats(ctx, tex); 1353 } 1354 } 1355} 1356 1357void si_decompress_dcc(struct si_context *sctx, struct si_texture *tex) 1358{ 1359 /* If graphics is disabled, we can't decompress DCC, but it shouldn't 1360 * be compressed either. The caller should simply discard it. 1361 */ 1362 if (!tex->dcc_offset || !sctx->has_graphics) 1363 return; 1364 1365 si_blit_decompress_color(sctx, tex, 0, tex->buffer.b.b.last_level, 1366 0, util_max_layer(&tex->buffer.b.b, 0), 1367 true); 1368} 1369 1370void si_init_blit_functions(struct si_context *sctx) 1371{ 1372 sctx->b.resource_copy_region = si_resource_copy_region; 1373 1374 if (sctx->has_graphics) { 1375 sctx->b.blit = si_blit; 1376 sctx->b.flush_resource = si_flush_resource; 1377 sctx->b.generate_mipmap = si_generate_mipmap; 1378 } 1379} 1380