si_blit.c revision 01e04c3f
1/* 2 * Copyright 2010 Jerome Glisse <glisse@freedesktop.org> 3 * Copyright 2015 Advanced Micro Devices, Inc. 4 * All Rights Reserved. 5 * 6 * Permission is hereby granted, free of charge, to any person obtaining a 7 * copy of this software and associated documentation files (the "Software"), 8 * to deal in the Software without restriction, including without limitation 9 * on the rights to use, copy, modify, merge, publish, distribute, sub 10 * license, and/or sell copies of the Software, and to permit persons to whom 11 * the Software is furnished to do so, subject to the following conditions: 12 * 13 * The above copyright notice and this permission notice (including the next 14 * paragraph) shall be included in all copies or substantial portions of the 15 * Software. 16 * 17 * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR 18 * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, 19 * FITNESS FOR A PARTICULAR PURPOSE AND NON-INFRINGEMENT. IN NO EVENT SHALL 20 * THE AUTHOR(S) AND/OR THEIR SUPPLIERS BE LIABLE FOR ANY CLAIM, 21 * DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR 22 * OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE 23 * USE OR OTHER DEALINGS IN THE SOFTWARE. 24 */ 25 26#include "si_pipe.h" 27#include "si_compute.h" 28#include "util/u_format.h" 29#include "util/u_log.h" 30#include "util/u_surface.h" 31 32enum { 33 SI_COPY = SI_SAVE_FRAMEBUFFER | SI_SAVE_TEXTURES | 34 SI_SAVE_FRAGMENT_STATE | SI_DISABLE_RENDER_COND, 35 36 SI_BLIT = SI_SAVE_FRAMEBUFFER | SI_SAVE_TEXTURES | 37 SI_SAVE_FRAGMENT_STATE, 38 39 SI_DECOMPRESS = SI_SAVE_FRAMEBUFFER | SI_SAVE_FRAGMENT_STATE | 40 SI_DISABLE_RENDER_COND, 41 42 SI_COLOR_RESOLVE = SI_SAVE_FRAMEBUFFER | SI_SAVE_FRAGMENT_STATE 43}; 44 45void si_blitter_begin(struct si_context *sctx, enum si_blitter_op op) 46{ 47 util_blitter_save_vertex_shader(sctx->blitter, sctx->vs_shader.cso); 48 util_blitter_save_tessctrl_shader(sctx->blitter, sctx->tcs_shader.cso); 49 util_blitter_save_tesseval_shader(sctx->blitter, sctx->tes_shader.cso); 50 util_blitter_save_geometry_shader(sctx->blitter, sctx->gs_shader.cso); 51 util_blitter_save_so_targets(sctx->blitter, sctx->streamout.num_targets, 52 (struct pipe_stream_output_target**)sctx->streamout.targets); 53 util_blitter_save_rasterizer(sctx->blitter, sctx->queued.named.rasterizer); 54 55 if (op & SI_SAVE_FRAGMENT_STATE) { 56 util_blitter_save_blend(sctx->blitter, sctx->queued.named.blend); 57 util_blitter_save_depth_stencil_alpha(sctx->blitter, sctx->queued.named.dsa); 58 util_blitter_save_stencil_ref(sctx->blitter, &sctx->stencil_ref.state); 59 util_blitter_save_fragment_shader(sctx->blitter, sctx->ps_shader.cso); 60 util_blitter_save_sample_mask(sctx->blitter, sctx->sample_mask); 61 util_blitter_save_scissor(sctx->blitter, &sctx->scissors.states[0]); 62 util_blitter_save_window_rectangles(sctx->blitter, 63 sctx->window_rectangles_include, 64 sctx->num_window_rectangles, 65 sctx->window_rectangles); 66 } 67 68 if (op & SI_SAVE_FRAMEBUFFER) 69 util_blitter_save_framebuffer(sctx->blitter, &sctx->framebuffer.state); 70 71 if (op & SI_SAVE_TEXTURES) { 72 util_blitter_save_fragment_sampler_states( 73 sctx->blitter, 2, 74 (void**)sctx->samplers[PIPE_SHADER_FRAGMENT].sampler_states); 75 76 util_blitter_save_fragment_sampler_views(sctx->blitter, 2, 77 sctx->samplers[PIPE_SHADER_FRAGMENT].views); 78 } 79 80 if (op & SI_DISABLE_RENDER_COND) 81 sctx->render_cond_force_off = true; 82 83 if (sctx->screen->dpbb_allowed) { 84 sctx->dpbb_force_off = true; 85 si_mark_atom_dirty(sctx, &sctx->atoms.s.dpbb_state); 86 } 87} 88 89void si_blitter_end(struct si_context *sctx) 90{ 91 if (sctx->screen->dpbb_allowed) { 92 sctx->dpbb_force_off = false; 93 si_mark_atom_dirty(sctx, &sctx->atoms.s.dpbb_state); 94 } 95 96 sctx->render_cond_force_off = false; 97 98 /* Restore shader pointers because the VS blit shader changed all 99 * non-global VS user SGPRs. */ 100 sctx->shader_pointers_dirty |= SI_DESCS_SHADER_MASK(VERTEX); 101 sctx->vertex_buffer_pointer_dirty = sctx->vb_descriptors_buffer != NULL; 102 si_mark_atom_dirty(sctx, &sctx->atoms.s.shader_pointers); 103} 104 105static unsigned u_max_sample(struct pipe_resource *r) 106{ 107 return r->nr_samples ? r->nr_samples - 1 : 0; 108} 109 110static unsigned 111si_blit_dbcb_copy(struct si_context *sctx, 112 struct si_texture *src, 113 struct si_texture *dst, 114 unsigned planes, unsigned level_mask, 115 unsigned first_layer, unsigned last_layer, 116 unsigned first_sample, unsigned last_sample) 117{ 118 struct pipe_surface surf_tmpl = {{0}}; 119 unsigned layer, sample, checked_last_layer, max_layer; 120 unsigned fully_copied_levels = 0; 121 122 if (planes & PIPE_MASK_Z) 123 sctx->dbcb_depth_copy_enabled = true; 124 if (planes & PIPE_MASK_S) 125 sctx->dbcb_stencil_copy_enabled = true; 126 si_mark_atom_dirty(sctx, &sctx->atoms.s.db_render_state); 127 128 assert(sctx->dbcb_depth_copy_enabled || sctx->dbcb_stencil_copy_enabled); 129 130 sctx->decompression_enabled = true; 131 132 while (level_mask) { 133 unsigned level = u_bit_scan(&level_mask); 134 135 /* The smaller the mipmap level, the less layers there are 136 * as far as 3D textures are concerned. */ 137 max_layer = util_max_layer(&src->buffer.b.b, level); 138 checked_last_layer = MIN2(last_layer, max_layer); 139 140 surf_tmpl.u.tex.level = level; 141 142 for (layer = first_layer; layer <= checked_last_layer; layer++) { 143 struct pipe_surface *zsurf, *cbsurf; 144 145 surf_tmpl.format = src->buffer.b.b.format; 146 surf_tmpl.u.tex.first_layer = layer; 147 surf_tmpl.u.tex.last_layer = layer; 148 149 zsurf = sctx->b.create_surface(&sctx->b, &src->buffer.b.b, &surf_tmpl); 150 151 surf_tmpl.format = dst->buffer.b.b.format; 152 cbsurf = sctx->b.create_surface(&sctx->b, &dst->buffer.b.b, &surf_tmpl); 153 154 for (sample = first_sample; sample <= last_sample; sample++) { 155 if (sample != sctx->dbcb_copy_sample) { 156 sctx->dbcb_copy_sample = sample; 157 si_mark_atom_dirty(sctx, &sctx->atoms.s.db_render_state); 158 } 159 160 si_blitter_begin(sctx, SI_DECOMPRESS); 161 util_blitter_custom_depth_stencil(sctx->blitter, zsurf, cbsurf, 1 << sample, 162 sctx->custom_dsa_flush, 1.0f); 163 si_blitter_end(sctx); 164 } 165 166 pipe_surface_reference(&zsurf, NULL); 167 pipe_surface_reference(&cbsurf, NULL); 168 } 169 170 if (first_layer == 0 && last_layer >= max_layer && 171 first_sample == 0 && last_sample >= u_max_sample(&src->buffer.b.b)) 172 fully_copied_levels |= 1u << level; 173 } 174 175 sctx->decompression_enabled = false; 176 sctx->dbcb_depth_copy_enabled = false; 177 sctx->dbcb_stencil_copy_enabled = false; 178 si_mark_atom_dirty(sctx, &sctx->atoms.s.db_render_state); 179 180 return fully_copied_levels; 181} 182 183void si_blit_decompress_depth(struct pipe_context *ctx, 184 struct si_texture *texture, 185 struct si_texture *staging, 186 unsigned first_level, unsigned last_level, 187 unsigned first_layer, unsigned last_layer, 188 unsigned first_sample, unsigned last_sample) 189{ 190 const struct util_format_description *desc; 191 unsigned planes = 0; 192 193 assert(staging != NULL && "use si_blit_decompress_zs_in_place instead"); 194 195 desc = util_format_description(staging->buffer.b.b.format); 196 197 if (util_format_has_depth(desc)) 198 planes |= PIPE_MASK_Z; 199 if (util_format_has_stencil(desc)) 200 planes |= PIPE_MASK_S; 201 202 si_blit_dbcb_copy( 203 (struct si_context *)ctx, texture, staging, planes, 204 u_bit_consecutive(first_level, last_level - first_level + 1), 205 first_layer, last_layer, first_sample, last_sample); 206} 207 208/* Helper function for si_blit_decompress_zs_in_place. 209 */ 210static void 211si_blit_decompress_zs_planes_in_place(struct si_context *sctx, 212 struct si_texture *texture, 213 unsigned planes, unsigned level_mask, 214 unsigned first_layer, unsigned last_layer) 215{ 216 struct pipe_surface *zsurf, surf_tmpl = {{0}}; 217 unsigned layer, max_layer, checked_last_layer; 218 unsigned fully_decompressed_mask = 0; 219 220 if (!level_mask) 221 return; 222 223 if (planes & PIPE_MASK_S) 224 sctx->db_flush_stencil_inplace = true; 225 if (planes & PIPE_MASK_Z) 226 sctx->db_flush_depth_inplace = true; 227 si_mark_atom_dirty(sctx, &sctx->atoms.s.db_render_state); 228 229 surf_tmpl.format = texture->buffer.b.b.format; 230 231 sctx->decompression_enabled = true; 232 233 while (level_mask) { 234 unsigned level = u_bit_scan(&level_mask); 235 236 surf_tmpl.u.tex.level = level; 237 238 /* The smaller the mipmap level, the less layers there are 239 * as far as 3D textures are concerned. */ 240 max_layer = util_max_layer(&texture->buffer.b.b, level); 241 checked_last_layer = MIN2(last_layer, max_layer); 242 243 for (layer = first_layer; layer <= checked_last_layer; layer++) { 244 surf_tmpl.u.tex.first_layer = layer; 245 surf_tmpl.u.tex.last_layer = layer; 246 247 zsurf = sctx->b.create_surface(&sctx->b, &texture->buffer.b.b, &surf_tmpl); 248 249 si_blitter_begin(sctx, SI_DECOMPRESS); 250 util_blitter_custom_depth_stencil(sctx->blitter, zsurf, NULL, ~0, 251 sctx->custom_dsa_flush, 252 1.0f); 253 si_blitter_end(sctx); 254 255 pipe_surface_reference(&zsurf, NULL); 256 } 257 258 /* The texture will always be dirty if some layers aren't flushed. 259 * I don't think this case occurs often though. */ 260 if (first_layer == 0 && last_layer >= max_layer) { 261 fully_decompressed_mask |= 1u << level; 262 } 263 } 264 265 if (planes & PIPE_MASK_Z) 266 texture->dirty_level_mask &= ~fully_decompressed_mask; 267 if (planes & PIPE_MASK_S) 268 texture->stencil_dirty_level_mask &= ~fully_decompressed_mask; 269 270 sctx->decompression_enabled = false; 271 sctx->db_flush_depth_inplace = false; 272 sctx->db_flush_stencil_inplace = false; 273 si_mark_atom_dirty(sctx, &sctx->atoms.s.db_render_state); 274} 275 276/* Helper function of si_flush_depth_texture: decompress the given levels 277 * of Z and/or S planes in place. 278 */ 279static void 280si_blit_decompress_zs_in_place(struct si_context *sctx, 281 struct si_texture *texture, 282 unsigned levels_z, unsigned levels_s, 283 unsigned first_layer, unsigned last_layer) 284{ 285 unsigned both = levels_z & levels_s; 286 287 /* First, do combined Z & S decompresses for levels that need it. */ 288 if (both) { 289 si_blit_decompress_zs_planes_in_place( 290 sctx, texture, PIPE_MASK_Z | PIPE_MASK_S, 291 both, 292 first_layer, last_layer); 293 levels_z &= ~both; 294 levels_s &= ~both; 295 } 296 297 /* Now do separate Z and S decompresses. */ 298 if (levels_z) { 299 si_blit_decompress_zs_planes_in_place( 300 sctx, texture, PIPE_MASK_Z, 301 levels_z, 302 first_layer, last_layer); 303 } 304 305 if (levels_s) { 306 si_blit_decompress_zs_planes_in_place( 307 sctx, texture, PIPE_MASK_S, 308 levels_s, 309 first_layer, last_layer); 310 } 311} 312 313static void 314si_decompress_depth(struct si_context *sctx, 315 struct si_texture *tex, 316 unsigned required_planes, 317 unsigned first_level, unsigned last_level, 318 unsigned first_layer, unsigned last_layer) 319{ 320 unsigned inplace_planes = 0; 321 unsigned copy_planes = 0; 322 unsigned level_mask = u_bit_consecutive(first_level, last_level - first_level + 1); 323 unsigned levels_z = 0; 324 unsigned levels_s = 0; 325 326 if (required_planes & PIPE_MASK_Z) { 327 levels_z = level_mask & tex->dirty_level_mask; 328 329 if (levels_z) { 330 if (si_can_sample_zs(tex, false)) 331 inplace_planes |= PIPE_MASK_Z; 332 else 333 copy_planes |= PIPE_MASK_Z; 334 } 335 } 336 if (required_planes & PIPE_MASK_S) { 337 levels_s = level_mask & tex->stencil_dirty_level_mask; 338 339 if (levels_s) { 340 if (si_can_sample_zs(tex, true)) 341 inplace_planes |= PIPE_MASK_S; 342 else 343 copy_planes |= PIPE_MASK_S; 344 } 345 } 346 347 if (unlikely(sctx->log)) 348 u_log_printf(sctx->log, 349 "\n------------------------------------------------\n" 350 "Decompress Depth (levels %u - %u, levels Z: 0x%x S: 0x%x)\n\n", 351 first_level, last_level, levels_z, levels_s); 352 353 /* We may have to allocate the flushed texture here when called from 354 * si_decompress_subresource. 355 */ 356 if (copy_planes && 357 (tex->flushed_depth_texture || 358 si_init_flushed_depth_texture(&sctx->b, &tex->buffer.b.b, NULL))) { 359 struct si_texture *dst = tex->flushed_depth_texture; 360 unsigned fully_copied_levels; 361 unsigned levels = 0; 362 363 assert(tex->flushed_depth_texture); 364 365 if (util_format_is_depth_and_stencil(dst->buffer.b.b.format)) 366 copy_planes = PIPE_MASK_Z | PIPE_MASK_S; 367 368 if (copy_planes & PIPE_MASK_Z) { 369 levels |= levels_z; 370 levels_z = 0; 371 } 372 if (copy_planes & PIPE_MASK_S) { 373 levels |= levels_s; 374 levels_s = 0; 375 } 376 377 fully_copied_levels = si_blit_dbcb_copy( 378 sctx, tex, dst, copy_planes, levels, 379 first_layer, last_layer, 380 0, u_max_sample(&tex->buffer.b.b)); 381 382 if (copy_planes & PIPE_MASK_Z) 383 tex->dirty_level_mask &= ~fully_copied_levels; 384 if (copy_planes & PIPE_MASK_S) 385 tex->stencil_dirty_level_mask &= ~fully_copied_levels; 386 } 387 388 if (inplace_planes) { 389 bool has_htile = si_htile_enabled(tex, first_level); 390 bool tc_compat_htile = vi_tc_compat_htile_enabled(tex, first_level); 391 392 /* Don't decompress if there is no HTILE or when HTILE is 393 * TC-compatible. */ 394 if (has_htile && !tc_compat_htile) { 395 si_blit_decompress_zs_in_place( 396 sctx, tex, 397 levels_z, levels_s, 398 first_layer, last_layer); 399 } else { 400 /* This is only a cache flush. 401 * 402 * Only clear the mask that we are flushing, because 403 * si_make_DB_shader_coherent() treats different levels 404 * and depth and stencil differently. 405 */ 406 if (inplace_planes & PIPE_MASK_Z) 407 tex->dirty_level_mask &= ~levels_z; 408 if (inplace_planes & PIPE_MASK_S) 409 tex->stencil_dirty_level_mask &= ~levels_s; 410 } 411 412 /* Only in-place decompression needs to flush DB caches, or 413 * when we don't decompress but TC-compatible planes are dirty. 414 */ 415 si_make_DB_shader_coherent(sctx, tex->buffer.b.b.nr_samples, 416 inplace_planes & PIPE_MASK_S, 417 tc_compat_htile); 418 } 419 /* set_framebuffer_state takes care of coherency for single-sample. 420 * The DB->CB copy uses CB for the final writes. 421 */ 422 if (copy_planes && tex->buffer.b.b.nr_samples > 1) 423 si_make_CB_shader_coherent(sctx, tex->buffer.b.b.nr_samples, 424 false); 425} 426 427static void 428si_decompress_sampler_depth_textures(struct si_context *sctx, 429 struct si_samplers *textures) 430{ 431 unsigned i; 432 unsigned mask = textures->needs_depth_decompress_mask; 433 434 while (mask) { 435 struct pipe_sampler_view *view; 436 struct si_sampler_view *sview; 437 struct si_texture *tex; 438 439 i = u_bit_scan(&mask); 440 441 view = textures->views[i]; 442 assert(view); 443 sview = (struct si_sampler_view*)view; 444 445 tex = (struct si_texture *)view->texture; 446 assert(tex->db_compatible); 447 448 si_decompress_depth(sctx, tex, 449 sview->is_stencil_sampler ? PIPE_MASK_S : PIPE_MASK_Z, 450 view->u.tex.first_level, view->u.tex.last_level, 451 0, util_max_layer(&tex->buffer.b.b, view->u.tex.first_level)); 452 } 453} 454 455static void si_blit_decompress_color(struct si_context *sctx, 456 struct si_texture *tex, 457 unsigned first_level, unsigned last_level, 458 unsigned first_layer, unsigned last_layer, 459 bool need_dcc_decompress) 460{ 461 void* custom_blend; 462 unsigned layer, checked_last_layer, max_layer; 463 unsigned level_mask = 464 u_bit_consecutive(first_level, last_level - first_level + 1); 465 466 if (!need_dcc_decompress) 467 level_mask &= tex->dirty_level_mask; 468 if (!level_mask) 469 return; 470 471 if (unlikely(sctx->log)) 472 u_log_printf(sctx->log, 473 "\n------------------------------------------------\n" 474 "Decompress Color (levels %u - %u, mask 0x%x)\n\n", 475 first_level, last_level, level_mask); 476 477 if (need_dcc_decompress) { 478 custom_blend = sctx->custom_blend_dcc_decompress; 479 480 assert(tex->dcc_offset); 481 482 /* disable levels without DCC */ 483 for (int i = first_level; i <= last_level; i++) { 484 if (!vi_dcc_enabled(tex, i)) 485 level_mask &= ~(1 << i); 486 } 487 } else if (tex->surface.fmask_size) { 488 custom_blend = sctx->custom_blend_fmask_decompress; 489 } else { 490 custom_blend = sctx->custom_blend_eliminate_fastclear; 491 } 492 493 sctx->decompression_enabled = true; 494 495 while (level_mask) { 496 unsigned level = u_bit_scan(&level_mask); 497 498 /* The smaller the mipmap level, the less layers there are 499 * as far as 3D textures are concerned. */ 500 max_layer = util_max_layer(&tex->buffer.b.b, level); 501 checked_last_layer = MIN2(last_layer, max_layer); 502 503 for (layer = first_layer; layer <= checked_last_layer; layer++) { 504 struct pipe_surface *cbsurf, surf_tmpl; 505 506 surf_tmpl.format = tex->buffer.b.b.format; 507 surf_tmpl.u.tex.level = level; 508 surf_tmpl.u.tex.first_layer = layer; 509 surf_tmpl.u.tex.last_layer = layer; 510 cbsurf = sctx->b.create_surface(&sctx->b, &tex->buffer.b.b, &surf_tmpl); 511 512 /* Required before and after FMASK and DCC_DECOMPRESS. */ 513 if (custom_blend == sctx->custom_blend_fmask_decompress || 514 custom_blend == sctx->custom_blend_dcc_decompress) 515 sctx->flags |= SI_CONTEXT_FLUSH_AND_INV_CB; 516 517 si_blitter_begin(sctx, SI_DECOMPRESS); 518 util_blitter_custom_color(sctx->blitter, cbsurf, custom_blend); 519 si_blitter_end(sctx); 520 521 if (custom_blend == sctx->custom_blend_fmask_decompress || 522 custom_blend == sctx->custom_blend_dcc_decompress) 523 sctx->flags |= SI_CONTEXT_FLUSH_AND_INV_CB; 524 525 pipe_surface_reference(&cbsurf, NULL); 526 } 527 528 /* The texture will always be dirty if some layers aren't flushed. 529 * I don't think this case occurs often though. */ 530 if (first_layer == 0 && last_layer >= max_layer) { 531 tex->dirty_level_mask &= ~(1 << level); 532 } 533 } 534 535 sctx->decompression_enabled = false; 536 si_make_CB_shader_coherent(sctx, tex->buffer.b.b.nr_samples, 537 vi_dcc_enabled(tex, first_level)); 538} 539 540static void 541si_decompress_color_texture(struct si_context *sctx, struct si_texture *tex, 542 unsigned first_level, unsigned last_level) 543{ 544 /* CMASK or DCC can be discarded and we can still end up here. */ 545 if (!tex->cmask_buffer && !tex->surface.fmask_size && !tex->dcc_offset) 546 return; 547 548 si_blit_decompress_color(sctx, tex, first_level, last_level, 0, 549 util_max_layer(&tex->buffer.b.b, first_level), 550 false); 551} 552 553static void 554si_decompress_sampler_color_textures(struct si_context *sctx, 555 struct si_samplers *textures) 556{ 557 unsigned i; 558 unsigned mask = textures->needs_color_decompress_mask; 559 560 while (mask) { 561 struct pipe_sampler_view *view; 562 struct si_texture *tex; 563 564 i = u_bit_scan(&mask); 565 566 view = textures->views[i]; 567 assert(view); 568 569 tex = (struct si_texture *)view->texture; 570 571 si_decompress_color_texture(sctx, tex, view->u.tex.first_level, 572 view->u.tex.last_level); 573 } 574} 575 576static void 577si_decompress_image_color_textures(struct si_context *sctx, 578 struct si_images *images) 579{ 580 unsigned i; 581 unsigned mask = images->needs_color_decompress_mask; 582 583 while (mask) { 584 const struct pipe_image_view *view; 585 struct si_texture *tex; 586 587 i = u_bit_scan(&mask); 588 589 view = &images->views[i]; 590 assert(view->resource->target != PIPE_BUFFER); 591 592 tex = (struct si_texture *)view->resource; 593 594 si_decompress_color_texture(sctx, tex, view->u.tex.level, 595 view->u.tex.level); 596 } 597} 598 599static void si_check_render_feedback_texture(struct si_context *sctx, 600 struct si_texture *tex, 601 unsigned first_level, 602 unsigned last_level, 603 unsigned first_layer, 604 unsigned last_layer) 605{ 606 bool render_feedback = false; 607 608 if (!tex->dcc_offset) 609 return; 610 611 for (unsigned j = 0; j < sctx->framebuffer.state.nr_cbufs; ++j) { 612 struct si_surface * surf; 613 614 if (!sctx->framebuffer.state.cbufs[j]) 615 continue; 616 617 surf = (struct si_surface*)sctx->framebuffer.state.cbufs[j]; 618 619 if (tex == (struct si_texture *)surf->base.texture && 620 surf->base.u.tex.level >= first_level && 621 surf->base.u.tex.level <= last_level && 622 surf->base.u.tex.first_layer <= last_layer && 623 surf->base.u.tex.last_layer >= first_layer) { 624 render_feedback = true; 625 break; 626 } 627 } 628 629 if (render_feedback) 630 si_texture_disable_dcc(sctx, tex); 631} 632 633static void si_check_render_feedback_textures(struct si_context *sctx, 634 struct si_samplers *textures) 635{ 636 uint32_t mask = textures->enabled_mask; 637 638 while (mask) { 639 const struct pipe_sampler_view *view; 640 struct si_texture *tex; 641 642 unsigned i = u_bit_scan(&mask); 643 644 view = textures->views[i]; 645 if(view->texture->target == PIPE_BUFFER) 646 continue; 647 648 tex = (struct si_texture *)view->texture; 649 650 si_check_render_feedback_texture(sctx, tex, 651 view->u.tex.first_level, 652 view->u.tex.last_level, 653 view->u.tex.first_layer, 654 view->u.tex.last_layer); 655 } 656} 657 658static void si_check_render_feedback_images(struct si_context *sctx, 659 struct si_images *images) 660{ 661 uint32_t mask = images->enabled_mask; 662 663 while (mask) { 664 const struct pipe_image_view *view; 665 struct si_texture *tex; 666 667 unsigned i = u_bit_scan(&mask); 668 669 view = &images->views[i]; 670 if (view->resource->target == PIPE_BUFFER) 671 continue; 672 673 tex = (struct si_texture *)view->resource; 674 675 si_check_render_feedback_texture(sctx, tex, 676 view->u.tex.level, 677 view->u.tex.level, 678 view->u.tex.first_layer, 679 view->u.tex.last_layer); 680 } 681} 682 683static void si_check_render_feedback_resident_textures(struct si_context *sctx) 684{ 685 util_dynarray_foreach(&sctx->resident_tex_handles, 686 struct si_texture_handle *, tex_handle) { 687 struct pipe_sampler_view *view; 688 struct si_texture *tex; 689 690 view = (*tex_handle)->view; 691 if (view->texture->target == PIPE_BUFFER) 692 continue; 693 694 tex = (struct si_texture *)view->texture; 695 696 si_check_render_feedback_texture(sctx, tex, 697 view->u.tex.first_level, 698 view->u.tex.last_level, 699 view->u.tex.first_layer, 700 view->u.tex.last_layer); 701 } 702} 703 704static void si_check_render_feedback_resident_images(struct si_context *sctx) 705{ 706 util_dynarray_foreach(&sctx->resident_img_handles, 707 struct si_image_handle *, img_handle) { 708 struct pipe_image_view *view; 709 struct si_texture *tex; 710 711 view = &(*img_handle)->view; 712 if (view->resource->target == PIPE_BUFFER) 713 continue; 714 715 tex = (struct si_texture *)view->resource; 716 717 si_check_render_feedback_texture(sctx, tex, 718 view->u.tex.level, 719 view->u.tex.level, 720 view->u.tex.first_layer, 721 view->u.tex.last_layer); 722 } 723} 724 725static void si_check_render_feedback(struct si_context *sctx) 726{ 727 if (!sctx->need_check_render_feedback) 728 return; 729 730 /* There is no render feedback if color writes are disabled. 731 * (e.g. a pixel shader with image stores) 732 */ 733 if (!si_get_total_colormask(sctx)) 734 return; 735 736 for (int i = 0; i < SI_NUM_SHADERS; ++i) { 737 si_check_render_feedback_images(sctx, &sctx->images[i]); 738 si_check_render_feedback_textures(sctx, &sctx->samplers[i]); 739 } 740 741 si_check_render_feedback_resident_images(sctx); 742 si_check_render_feedback_resident_textures(sctx); 743 744 sctx->need_check_render_feedback = false; 745} 746 747static void si_decompress_resident_textures(struct si_context *sctx) 748{ 749 util_dynarray_foreach(&sctx->resident_tex_needs_color_decompress, 750 struct si_texture_handle *, tex_handle) { 751 struct pipe_sampler_view *view = (*tex_handle)->view; 752 struct si_texture *tex = (struct si_texture *)view->texture; 753 754 si_decompress_color_texture(sctx, tex, view->u.tex.first_level, 755 view->u.tex.last_level); 756 } 757 758 util_dynarray_foreach(&sctx->resident_tex_needs_depth_decompress, 759 struct si_texture_handle *, tex_handle) { 760 struct pipe_sampler_view *view = (*tex_handle)->view; 761 struct si_sampler_view *sview = (struct si_sampler_view *)view; 762 struct si_texture *tex = (struct si_texture *)view->texture; 763 764 si_decompress_depth(sctx, tex, 765 sview->is_stencil_sampler ? PIPE_MASK_S : PIPE_MASK_Z, 766 view->u.tex.first_level, view->u.tex.last_level, 767 0, util_max_layer(&tex->buffer.b.b, view->u.tex.first_level)); 768 } 769} 770 771static void si_decompress_resident_images(struct si_context *sctx) 772{ 773 util_dynarray_foreach(&sctx->resident_img_needs_color_decompress, 774 struct si_image_handle *, img_handle) { 775 struct pipe_image_view *view = &(*img_handle)->view; 776 struct si_texture *tex = (struct si_texture *)view->resource; 777 778 si_decompress_color_texture(sctx, tex, view->u.tex.level, 779 view->u.tex.level); 780 } 781} 782 783void si_decompress_textures(struct si_context *sctx, unsigned shader_mask) 784{ 785 unsigned compressed_colortex_counter, mask; 786 787 if (sctx->blitter->running) 788 return; 789 790 /* Update the compressed_colortex_mask if necessary. */ 791 compressed_colortex_counter = p_atomic_read(&sctx->screen->compressed_colortex_counter); 792 if (compressed_colortex_counter != sctx->last_compressed_colortex_counter) { 793 sctx->last_compressed_colortex_counter = compressed_colortex_counter; 794 si_update_needs_color_decompress_masks(sctx); 795 } 796 797 /* Decompress color & depth textures if needed. */ 798 mask = sctx->shader_needs_decompress_mask & shader_mask; 799 while (mask) { 800 unsigned i = u_bit_scan(&mask); 801 802 if (sctx->samplers[i].needs_depth_decompress_mask) { 803 si_decompress_sampler_depth_textures(sctx, &sctx->samplers[i]); 804 } 805 if (sctx->samplers[i].needs_color_decompress_mask) { 806 si_decompress_sampler_color_textures(sctx, &sctx->samplers[i]); 807 } 808 if (sctx->images[i].needs_color_decompress_mask) { 809 si_decompress_image_color_textures(sctx, &sctx->images[i]); 810 } 811 } 812 813 if (shader_mask & u_bit_consecutive(0, SI_NUM_GRAPHICS_SHADERS)) { 814 if (sctx->uses_bindless_samplers) 815 si_decompress_resident_textures(sctx); 816 if (sctx->uses_bindless_images) 817 si_decompress_resident_images(sctx); 818 819 if (sctx->ps_uses_fbfetch) { 820 struct pipe_surface *cb0 = sctx->framebuffer.state.cbufs[0]; 821 si_decompress_color_texture(sctx, 822 (struct si_texture*)cb0->texture, 823 cb0->u.tex.first_layer, 824 cb0->u.tex.last_layer); 825 } 826 827 si_check_render_feedback(sctx); 828 } else if (shader_mask & (1 << PIPE_SHADER_COMPUTE)) { 829 if (sctx->cs_shader_state.program->uses_bindless_samplers) 830 si_decompress_resident_textures(sctx); 831 if (sctx->cs_shader_state.program->uses_bindless_images) 832 si_decompress_resident_images(sctx); 833 } 834} 835 836/* Helper for decompressing a portion of a color or depth resource before 837 * blitting if any decompression is needed. 838 * The driver doesn't decompress resources automatically while u_blitter is 839 * rendering. */ 840static void si_decompress_subresource(struct pipe_context *ctx, 841 struct pipe_resource *tex, 842 unsigned planes, unsigned level, 843 unsigned first_layer, unsigned last_layer) 844{ 845 struct si_context *sctx = (struct si_context *)ctx; 846 struct si_texture *stex = (struct si_texture*)tex; 847 848 if (stex->db_compatible) { 849 planes &= PIPE_MASK_Z | PIPE_MASK_S; 850 851 if (!stex->surface.has_stencil) 852 planes &= ~PIPE_MASK_S; 853 854 /* If we've rendered into the framebuffer and it's a blitting 855 * source, make sure the decompression pass is invoked 856 * by dirtying the framebuffer. 857 */ 858 if (sctx->framebuffer.state.zsbuf && 859 sctx->framebuffer.state.zsbuf->u.tex.level == level && 860 sctx->framebuffer.state.zsbuf->texture == tex) 861 si_update_fb_dirtiness_after_rendering(sctx); 862 863 si_decompress_depth(sctx, stex, planes, 864 level, level, 865 first_layer, last_layer); 866 } else if (stex->surface.fmask_size || stex->cmask_buffer || stex->dcc_offset) { 867 /* If we've rendered into the framebuffer and it's a blitting 868 * source, make sure the decompression pass is invoked 869 * by dirtying the framebuffer. 870 */ 871 for (unsigned i = 0; i < sctx->framebuffer.state.nr_cbufs; i++) { 872 if (sctx->framebuffer.state.cbufs[i] && 873 sctx->framebuffer.state.cbufs[i]->u.tex.level == level && 874 sctx->framebuffer.state.cbufs[i]->texture == tex) { 875 si_update_fb_dirtiness_after_rendering(sctx); 876 break; 877 } 878 } 879 880 si_blit_decompress_color(sctx, stex, level, level, 881 first_layer, last_layer, false); 882 } 883} 884 885struct texture_orig_info { 886 unsigned format; 887 unsigned width0; 888 unsigned height0; 889 unsigned npix_x; 890 unsigned npix_y; 891 unsigned npix0_x; 892 unsigned npix0_y; 893}; 894 895void si_resource_copy_region(struct pipe_context *ctx, 896 struct pipe_resource *dst, 897 unsigned dst_level, 898 unsigned dstx, unsigned dsty, unsigned dstz, 899 struct pipe_resource *src, 900 unsigned src_level, 901 const struct pipe_box *src_box) 902{ 903 struct si_context *sctx = (struct si_context *)ctx; 904 struct si_texture *ssrc = (struct si_texture*)src; 905 struct pipe_surface *dst_view, dst_templ; 906 struct pipe_sampler_view src_templ, *src_view; 907 unsigned dst_width, dst_height, src_width0, src_height0; 908 unsigned dst_width0, dst_height0, src_force_level = 0; 909 struct pipe_box sbox, dstbox; 910 911 /* Handle buffers first. */ 912 if (dst->target == PIPE_BUFFER && src->target == PIPE_BUFFER) { 913 si_copy_buffer(sctx, dst, src, dstx, src_box->x, src_box->width); 914 return; 915 } 916 917 assert(u_max_sample(dst) == u_max_sample(src)); 918 919 /* The driver doesn't decompress resources automatically while 920 * u_blitter is rendering. */ 921 si_decompress_subresource(ctx, src, PIPE_MASK_RGBAZS, src_level, 922 src_box->z, src_box->z + src_box->depth - 1); 923 924 dst_width = u_minify(dst->width0, dst_level); 925 dst_height = u_minify(dst->height0, dst_level); 926 dst_width0 = dst->width0; 927 dst_height0 = dst->height0; 928 src_width0 = src->width0; 929 src_height0 = src->height0; 930 931 util_blitter_default_dst_texture(&dst_templ, dst, dst_level, dstz); 932 util_blitter_default_src_texture(sctx->blitter, &src_templ, src, src_level); 933 934 if (util_format_is_compressed(src->format) || 935 util_format_is_compressed(dst->format)) { 936 unsigned blocksize = ssrc->surface.bpe; 937 938 if (blocksize == 8) 939 src_templ.format = PIPE_FORMAT_R16G16B16A16_UINT; /* 64-bit block */ 940 else 941 src_templ.format = PIPE_FORMAT_R32G32B32A32_UINT; /* 128-bit block */ 942 dst_templ.format = src_templ.format; 943 944 dst_width = util_format_get_nblocksx(dst->format, dst_width); 945 dst_height = util_format_get_nblocksy(dst->format, dst_height); 946 dst_width0 = util_format_get_nblocksx(dst->format, dst_width0); 947 dst_height0 = util_format_get_nblocksy(dst->format, dst_height0); 948 src_width0 = util_format_get_nblocksx(src->format, src_width0); 949 src_height0 = util_format_get_nblocksy(src->format, src_height0); 950 951 dstx = util_format_get_nblocksx(dst->format, dstx); 952 dsty = util_format_get_nblocksy(dst->format, dsty); 953 954 sbox.x = util_format_get_nblocksx(src->format, src_box->x); 955 sbox.y = util_format_get_nblocksy(src->format, src_box->y); 956 sbox.z = src_box->z; 957 sbox.width = util_format_get_nblocksx(src->format, src_box->width); 958 sbox.height = util_format_get_nblocksy(src->format, src_box->height); 959 sbox.depth = src_box->depth; 960 src_box = &sbox; 961 962 src_force_level = src_level; 963 } else if (!util_blitter_is_copy_supported(sctx->blitter, dst, src)) { 964 if (util_format_is_subsampled_422(src->format)) { 965 src_templ.format = PIPE_FORMAT_R8G8B8A8_UINT; 966 dst_templ.format = PIPE_FORMAT_R8G8B8A8_UINT; 967 968 dst_width = util_format_get_nblocksx(dst->format, dst_width); 969 dst_width0 = util_format_get_nblocksx(dst->format, dst_width0); 970 src_width0 = util_format_get_nblocksx(src->format, src_width0); 971 972 dstx = util_format_get_nblocksx(dst->format, dstx); 973 974 sbox = *src_box; 975 sbox.x = util_format_get_nblocksx(src->format, src_box->x); 976 sbox.width = util_format_get_nblocksx(src->format, src_box->width); 977 src_box = &sbox; 978 } else { 979 unsigned blocksize = ssrc->surface.bpe; 980 981 switch (blocksize) { 982 case 1: 983 dst_templ.format = PIPE_FORMAT_R8_UNORM; 984 src_templ.format = PIPE_FORMAT_R8_UNORM; 985 break; 986 case 2: 987 dst_templ.format = PIPE_FORMAT_R8G8_UNORM; 988 src_templ.format = PIPE_FORMAT_R8G8_UNORM; 989 break; 990 case 4: 991 dst_templ.format = PIPE_FORMAT_R8G8B8A8_UNORM; 992 src_templ.format = PIPE_FORMAT_R8G8B8A8_UNORM; 993 break; 994 case 8: 995 dst_templ.format = PIPE_FORMAT_R16G16B16A16_UINT; 996 src_templ.format = PIPE_FORMAT_R16G16B16A16_UINT; 997 break; 998 case 16: 999 dst_templ.format = PIPE_FORMAT_R32G32B32A32_UINT; 1000 src_templ.format = PIPE_FORMAT_R32G32B32A32_UINT; 1001 break; 1002 default: 1003 fprintf(stderr, "Unhandled format %s with blocksize %u\n", 1004 util_format_short_name(src->format), blocksize); 1005 assert(0); 1006 } 1007 } 1008 } 1009 1010 /* SNORM8 blitting has precision issues on some chips. Use the SINT 1011 * equivalent instead, which doesn't force DCC decompression. 1012 * Note that some chips avoid this issue by using SDMA. 1013 */ 1014 if (util_format_is_snorm8(dst_templ.format)) { 1015 switch (dst_templ.format) { 1016 case PIPE_FORMAT_R8_SNORM: 1017 dst_templ.format = src_templ.format = PIPE_FORMAT_R8_SINT; 1018 break; 1019 case PIPE_FORMAT_R8G8_SNORM: 1020 dst_templ.format = src_templ.format = PIPE_FORMAT_R8G8_SINT; 1021 break; 1022 case PIPE_FORMAT_R8G8B8X8_SNORM: 1023 dst_templ.format = src_templ.format = PIPE_FORMAT_R8G8B8X8_SINT; 1024 break; 1025 case PIPE_FORMAT_R8G8B8A8_SNORM: 1026 /* There are no SINT variants for ABGR and XBGR, so we have to use RGBA. */ 1027 case PIPE_FORMAT_A8B8G8R8_SNORM: 1028 case PIPE_FORMAT_X8B8G8R8_SNORM: 1029 dst_templ.format = src_templ.format = PIPE_FORMAT_R8G8B8A8_SINT; 1030 break; 1031 case PIPE_FORMAT_A8_SNORM: 1032 dst_templ.format = src_templ.format = PIPE_FORMAT_A8_SINT; 1033 break; 1034 case PIPE_FORMAT_L8_SNORM: 1035 dst_templ.format = src_templ.format = PIPE_FORMAT_L8_SINT; 1036 break; 1037 case PIPE_FORMAT_L8A8_SNORM: 1038 dst_templ.format = src_templ.format = PIPE_FORMAT_L8A8_SINT; 1039 break; 1040 case PIPE_FORMAT_I8_SNORM: 1041 dst_templ.format = src_templ.format = PIPE_FORMAT_I8_SINT; 1042 break; 1043 default:; /* fall through */ 1044 } 1045 } 1046 1047 vi_disable_dcc_if_incompatible_format(sctx, dst, dst_level, 1048 dst_templ.format); 1049 vi_disable_dcc_if_incompatible_format(sctx, src, src_level, 1050 src_templ.format); 1051 1052 /* Initialize the surface. */ 1053 dst_view = si_create_surface_custom(ctx, dst, &dst_templ, 1054 dst_width0, dst_height0, 1055 dst_width, dst_height); 1056 1057 /* Initialize the sampler view. */ 1058 src_view = si_create_sampler_view_custom(ctx, src, &src_templ, 1059 src_width0, src_height0, 1060 src_force_level); 1061 1062 u_box_3d(dstx, dsty, dstz, abs(src_box->width), abs(src_box->height), 1063 abs(src_box->depth), &dstbox); 1064 1065 /* Copy. */ 1066 si_blitter_begin(sctx, SI_COPY); 1067 util_blitter_blit_generic(sctx->blitter, dst_view, &dstbox, 1068 src_view, src_box, src_width0, src_height0, 1069 PIPE_MASK_RGBAZS, PIPE_TEX_FILTER_NEAREST, NULL, 1070 false); 1071 si_blitter_end(sctx); 1072 1073 pipe_surface_reference(&dst_view, NULL); 1074 pipe_sampler_view_reference(&src_view, NULL); 1075} 1076 1077static void si_do_CB_resolve(struct si_context *sctx, 1078 const struct pipe_blit_info *info, 1079 struct pipe_resource *dst, 1080 unsigned dst_level, unsigned dst_z, 1081 enum pipe_format format) 1082{ 1083 /* Required before and after CB_RESOLVE. */ 1084 sctx->flags |= SI_CONTEXT_FLUSH_AND_INV_CB; 1085 1086 si_blitter_begin(sctx, SI_COLOR_RESOLVE | 1087 (info->render_condition_enable ? 0 : SI_DISABLE_RENDER_COND)); 1088 util_blitter_custom_resolve_color(sctx->blitter, dst, dst_level, dst_z, 1089 info->src.resource, info->src.box.z, 1090 ~0, sctx->custom_blend_resolve, 1091 format); 1092 si_blitter_end(sctx); 1093 1094 /* Flush caches for possible texturing. */ 1095 si_make_CB_shader_coherent(sctx, 1, false); 1096} 1097 1098static bool do_hardware_msaa_resolve(struct pipe_context *ctx, 1099 const struct pipe_blit_info *info) 1100{ 1101 struct si_context *sctx = (struct si_context*)ctx; 1102 struct si_texture *src = (struct si_texture*)info->src.resource; 1103 struct si_texture *dst = (struct si_texture*)info->dst.resource; 1104 MAYBE_UNUSED struct si_texture *stmp; 1105 unsigned dst_width = u_minify(info->dst.resource->width0, info->dst.level); 1106 unsigned dst_height = u_minify(info->dst.resource->height0, info->dst.level); 1107 enum pipe_format format = info->src.format; 1108 struct pipe_resource *tmp, templ; 1109 struct pipe_blit_info blit; 1110 1111 /* Check basic requirements for hw resolve. */ 1112 if (!(info->src.resource->nr_samples > 1 && 1113 info->dst.resource->nr_samples <= 1 && 1114 !util_format_is_pure_integer(format) && 1115 !util_format_is_depth_or_stencil(format) && 1116 util_max_layer(info->src.resource, 0) == 0)) 1117 return false; 1118 1119 /* Hardware MSAA resolve doesn't work if SPI format = NORM16_ABGR and 1120 * the format is R16G16. Use R16A16, which does work. 1121 */ 1122 if (format == PIPE_FORMAT_R16G16_UNORM) 1123 format = PIPE_FORMAT_R16A16_UNORM; 1124 if (format == PIPE_FORMAT_R16G16_SNORM) 1125 format = PIPE_FORMAT_R16A16_SNORM; 1126 1127 /* Check the remaining requirements for hw resolve. */ 1128 if (util_max_layer(info->dst.resource, info->dst.level) == 0 && 1129 !info->scissor_enable && 1130 (info->mask & PIPE_MASK_RGBA) == PIPE_MASK_RGBA && 1131 util_is_format_compatible(util_format_description(info->src.format), 1132 util_format_description(info->dst.format)) && 1133 dst_width == info->src.resource->width0 && 1134 dst_height == info->src.resource->height0 && 1135 info->dst.box.x == 0 && 1136 info->dst.box.y == 0 && 1137 info->dst.box.width == dst_width && 1138 info->dst.box.height == dst_height && 1139 info->dst.box.depth == 1 && 1140 info->src.box.x == 0 && 1141 info->src.box.y == 0 && 1142 info->src.box.width == dst_width && 1143 info->src.box.height == dst_height && 1144 info->src.box.depth == 1 && 1145 !dst->surface.is_linear && 1146 (!dst->cmask_buffer || !dst->dirty_level_mask)) { /* dst cannot be fast-cleared */ 1147 /* Check the last constraint. */ 1148 if (src->surface.micro_tile_mode != dst->surface.micro_tile_mode) { 1149 /* The next fast clear will switch to this mode to 1150 * get direct hw resolve next time if the mode is 1151 * different now. 1152 */ 1153 src->last_msaa_resolve_target_micro_mode = 1154 dst->surface.micro_tile_mode; 1155 goto resolve_to_temp; 1156 } 1157 1158 /* Resolving into a surface with DCC is unsupported. Since 1159 * it's being overwritten anyway, clear it to uncompressed. 1160 * This is still the fastest codepath even with this clear. 1161 */ 1162 if (vi_dcc_enabled(dst, info->dst.level)) { 1163 /* TODO: Implement per-level DCC clears for GFX9. */ 1164 if (sctx->chip_class >= GFX9 && 1165 info->dst.resource->last_level != 0) 1166 goto resolve_to_temp; 1167 1168 /* This can happen with mipmapping. */ 1169 if (sctx->chip_class == VI && 1170 !dst->surface.u.legacy.level[info->dst.level].dcc_fast_clear_size) 1171 goto resolve_to_temp; 1172 1173 vi_dcc_clear_level(sctx, dst, info->dst.level, 1174 0xFFFFFFFF); 1175 dst->dirty_level_mask &= ~(1 << info->dst.level); 1176 } 1177 1178 /* Resolve directly from src to dst. */ 1179 si_do_CB_resolve(sctx, info, info->dst.resource, 1180 info->dst.level, info->dst.box.z, format); 1181 return true; 1182 } 1183 1184resolve_to_temp: 1185 /* Shader-based resolve is VERY SLOW. Instead, resolve into 1186 * a temporary texture and blit. 1187 */ 1188 memset(&templ, 0, sizeof(templ)); 1189 templ.target = PIPE_TEXTURE_2D; 1190 templ.format = info->src.resource->format; 1191 templ.width0 = info->src.resource->width0; 1192 templ.height0 = info->src.resource->height0; 1193 templ.depth0 = 1; 1194 templ.array_size = 1; 1195 templ.usage = PIPE_USAGE_DEFAULT; 1196 templ.flags = SI_RESOURCE_FLAG_FORCE_TILING | 1197 SI_RESOURCE_FLAG_DISABLE_DCC; 1198 1199 /* The src and dst microtile modes must be the same. */ 1200 if (src->surface.micro_tile_mode == RADEON_MICRO_MODE_DISPLAY) 1201 templ.bind = PIPE_BIND_SCANOUT; 1202 else 1203 templ.bind = 0; 1204 1205 tmp = ctx->screen->resource_create(ctx->screen, &templ); 1206 if (!tmp) 1207 return false; 1208 stmp = (struct si_texture*)tmp; 1209 1210 assert(!stmp->surface.is_linear); 1211 assert(src->surface.micro_tile_mode == stmp->surface.micro_tile_mode); 1212 1213 /* resolve */ 1214 si_do_CB_resolve(sctx, info, tmp, 0, 0, format); 1215 1216 /* blit */ 1217 blit = *info; 1218 blit.src.resource = tmp; 1219 blit.src.box.z = 0; 1220 1221 si_blitter_begin(sctx, SI_BLIT | 1222 (info->render_condition_enable ? 0 : SI_DISABLE_RENDER_COND)); 1223 util_blitter_blit(sctx->blitter, &blit); 1224 si_blitter_end(sctx); 1225 1226 pipe_resource_reference(&tmp, NULL); 1227 return true; 1228} 1229 1230static void si_blit(struct pipe_context *ctx, 1231 const struct pipe_blit_info *info) 1232{ 1233 struct si_context *sctx = (struct si_context*)ctx; 1234 struct si_texture *dst = (struct si_texture *)info->dst.resource; 1235 1236 if (do_hardware_msaa_resolve(ctx, info)) { 1237 return; 1238 } 1239 1240 /* Using SDMA for copying to a linear texture in GTT is much faster. 1241 * This improves DRI PRIME performance. 1242 * 1243 * resource_copy_region can't do this yet, because dma_copy calls it 1244 * on failure (recursion). 1245 */ 1246 if (dst->surface.is_linear && 1247 sctx->dma_copy && 1248 util_can_blit_via_copy_region(info, false)) { 1249 sctx->dma_copy(ctx, info->dst.resource, info->dst.level, 1250 info->dst.box.x, info->dst.box.y, 1251 info->dst.box.z, 1252 info->src.resource, info->src.level, 1253 &info->src.box); 1254 return; 1255 } 1256 1257 assert(util_blitter_is_blit_supported(sctx->blitter, info)); 1258 1259 /* The driver doesn't decompress resources automatically while 1260 * u_blitter is rendering. */ 1261 vi_disable_dcc_if_incompatible_format(sctx, info->src.resource, 1262 info->src.level, 1263 info->src.format); 1264 vi_disable_dcc_if_incompatible_format(sctx, info->dst.resource, 1265 info->dst.level, 1266 info->dst.format); 1267 si_decompress_subresource(ctx, info->src.resource, info->mask, 1268 info->src.level, 1269 info->src.box.z, 1270 info->src.box.z + info->src.box.depth - 1); 1271 1272 if (sctx->screen->debug_flags & DBG(FORCE_DMA) && 1273 util_try_blit_via_copy_region(ctx, info)) 1274 return; 1275 1276 si_blitter_begin(sctx, SI_BLIT | 1277 (info->render_condition_enable ? 0 : SI_DISABLE_RENDER_COND)); 1278 util_blitter_blit(sctx->blitter, info); 1279 si_blitter_end(sctx); 1280} 1281 1282static boolean si_generate_mipmap(struct pipe_context *ctx, 1283 struct pipe_resource *tex, 1284 enum pipe_format format, 1285 unsigned base_level, unsigned last_level, 1286 unsigned first_layer, unsigned last_layer) 1287{ 1288 struct si_context *sctx = (struct si_context*)ctx; 1289 struct si_texture *stex = (struct si_texture *)tex; 1290 1291 if (!util_blitter_is_copy_supported(sctx->blitter, tex, tex)) 1292 return false; 1293 1294 /* The driver doesn't decompress resources automatically while 1295 * u_blitter is rendering. */ 1296 vi_disable_dcc_if_incompatible_format(sctx, tex, base_level, 1297 format); 1298 si_decompress_subresource(ctx, tex, PIPE_MASK_RGBAZS, 1299 base_level, first_layer, last_layer); 1300 1301 /* Clear dirty_level_mask for the levels that will be overwritten. */ 1302 assert(base_level < last_level); 1303 stex->dirty_level_mask &= ~u_bit_consecutive(base_level + 1, 1304 last_level - base_level); 1305 1306 sctx->generate_mipmap_for_depth = stex->is_depth; 1307 1308 si_blitter_begin(sctx, SI_BLIT | SI_DISABLE_RENDER_COND); 1309 util_blitter_generate_mipmap(sctx->blitter, tex, format, 1310 base_level, last_level, 1311 first_layer, last_layer); 1312 si_blitter_end(sctx); 1313 1314 sctx->generate_mipmap_for_depth = false; 1315 return true; 1316} 1317 1318static void si_flush_resource(struct pipe_context *ctx, 1319 struct pipe_resource *res) 1320{ 1321 struct si_context *sctx = (struct si_context*)ctx; 1322 struct si_texture *tex = (struct si_texture*)res; 1323 1324 assert(res->target != PIPE_BUFFER); 1325 assert(!tex->dcc_separate_buffer || tex->dcc_gather_statistics); 1326 1327 /* st/dri calls flush twice per frame (not a bug), this prevents double 1328 * decompression. */ 1329 if (tex->dcc_separate_buffer && !tex->separate_dcc_dirty) 1330 return; 1331 1332 if (!tex->is_depth && (tex->cmask_buffer || tex->dcc_offset)) { 1333 si_blit_decompress_color(sctx, tex, 0, res->last_level, 1334 0, util_max_layer(res, 0), 1335 tex->dcc_separate_buffer != NULL); 1336 } 1337 1338 /* Always do the analysis even if DCC is disabled at the moment. */ 1339 if (tex->dcc_gather_statistics) { 1340 bool separate_dcc_dirty = tex->separate_dcc_dirty; 1341 1342 /* If the color buffer hasn't been unbound and fast clear hasn't 1343 * been used, separate_dcc_dirty is false, but there may have been 1344 * new rendering. Check if the color buffer is bound and assume 1345 * it's dirty. 1346 * 1347 * Note that DRI2 never unbinds window colorbuffers, which means 1348 * the DCC pipeline statistics query would never be re-set and would 1349 * keep adding new results until all free memory is exhausted if we 1350 * didn't do this. 1351 */ 1352 if (!separate_dcc_dirty) { 1353 for (unsigned i = 0; i < sctx->framebuffer.state.nr_cbufs; i++) { 1354 if (sctx->framebuffer.state.cbufs[i] && 1355 sctx->framebuffer.state.cbufs[i]->texture == res) { 1356 separate_dcc_dirty = true; 1357 break; 1358 } 1359 } 1360 } 1361 1362 if (separate_dcc_dirty) { 1363 tex->separate_dcc_dirty = false; 1364 vi_separate_dcc_process_and_reset_stats(ctx, tex); 1365 } 1366 } 1367} 1368 1369void si_decompress_dcc(struct si_context *sctx, struct si_texture *tex) 1370{ 1371 if (!tex->dcc_offset) 1372 return; 1373 1374 si_blit_decompress_color(sctx, tex, 0, tex->buffer.b.b.last_level, 1375 0, util_max_layer(&tex->buffer.b.b, 0), 1376 true); 1377} 1378 1379void si_init_blit_functions(struct si_context *sctx) 1380{ 1381 sctx->b.resource_copy_region = si_resource_copy_region; 1382 sctx->b.blit = si_blit; 1383 sctx->b.flush_resource = si_flush_resource; 1384 sctx->b.generate_mipmap = si_generate_mipmap; 1385} 1386