1848b8605Smrg/* 2848b8605Smrg * Copyright 2010 Jerome Glisse <glisse@freedesktop.org> 3b8e80941Smrg * Copyright 2015 Advanced Micro Devices, Inc. 4b8e80941Smrg * All Rights Reserved. 5848b8605Smrg * 6848b8605Smrg * Permission is hereby granted, free of charge, to any person obtaining a 7848b8605Smrg * copy of this software and associated documentation files (the "Software"), 8848b8605Smrg * to deal in the Software without restriction, including without limitation 9848b8605Smrg * on the rights to use, copy, modify, merge, publish, distribute, sub 10848b8605Smrg * license, and/or sell copies of the Software, and to permit persons to whom 11848b8605Smrg * the Software is furnished to do so, subject to the following conditions: 12848b8605Smrg * 13848b8605Smrg * The above copyright notice and this permission notice (including the next 14848b8605Smrg * paragraph) shall be included in all copies or substantial portions of the 15848b8605Smrg * Software. 16848b8605Smrg * 17848b8605Smrg * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR 18848b8605Smrg * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, 19848b8605Smrg * FITNESS FOR A PARTICULAR PURPOSE AND NON-INFRINGEMENT. IN NO EVENT SHALL 20848b8605Smrg * THE AUTHOR(S) AND/OR THEIR SUPPLIERS BE LIABLE FOR ANY CLAIM, 21848b8605Smrg * DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR 22848b8605Smrg * OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE 23848b8605Smrg * USE OR OTHER DEALINGS IN THE SOFTWARE. 24848b8605Smrg */ 25848b8605Smrg 26848b8605Smrg#include "si_pipe.h" 27b8e80941Smrg#include "si_compute.h" 28848b8605Smrg#include "util/u_format.h" 29b8e80941Smrg#include "util/u_log.h" 30b8e80941Smrg#include "util/u_surface.h" 31848b8605Smrg 32b8e80941Smrgenum { 33848b8605Smrg SI_COPY = SI_SAVE_FRAMEBUFFER | SI_SAVE_TEXTURES | 34b8e80941Smrg SI_SAVE_FRAGMENT_STATE | SI_DISABLE_RENDER_COND, 35848b8605Smrg 36b8e80941Smrg SI_BLIT = SI_SAVE_FRAMEBUFFER | SI_SAVE_TEXTURES | 37b8e80941Smrg SI_SAVE_FRAGMENT_STATE, 38848b8605Smrg 39b8e80941Smrg SI_DECOMPRESS = SI_SAVE_FRAMEBUFFER | SI_SAVE_FRAGMENT_STATE | 40b8e80941Smrg SI_DISABLE_RENDER_COND, 41848b8605Smrg 42b8e80941Smrg SI_COLOR_RESOLVE = SI_SAVE_FRAMEBUFFER | SI_SAVE_FRAGMENT_STATE 43848b8605Smrg}; 44848b8605Smrg 45b8e80941Smrgvoid si_blitter_begin(struct si_context *sctx, enum si_blitter_op op) 46848b8605Smrg{ 47b8e80941Smrg util_blitter_save_vertex_shader(sctx->blitter, sctx->vs_shader.cso); 48b8e80941Smrg util_blitter_save_tessctrl_shader(sctx->blitter, sctx->tcs_shader.cso); 49b8e80941Smrg util_blitter_save_tesseval_shader(sctx->blitter, sctx->tes_shader.cso); 50b8e80941Smrg util_blitter_save_geometry_shader(sctx->blitter, sctx->gs_shader.cso); 51b8e80941Smrg util_blitter_save_so_targets(sctx->blitter, sctx->streamout.num_targets, 52b8e80941Smrg (struct pipe_stream_output_target**)sctx->streamout.targets); 53848b8605Smrg util_blitter_save_rasterizer(sctx->blitter, sctx->queued.named.rasterizer); 54b8e80941Smrg 55b8e80941Smrg if (op & SI_SAVE_FRAGMENT_STATE) { 56b8e80941Smrg util_blitter_save_blend(sctx->blitter, sctx->queued.named.blend); 57b8e80941Smrg util_blitter_save_depth_stencil_alpha(sctx->blitter, sctx->queued.named.dsa); 58b8e80941Smrg util_blitter_save_stencil_ref(sctx->blitter, &sctx->stencil_ref.state); 59b8e80941Smrg util_blitter_save_fragment_shader(sctx->blitter, sctx->ps_shader.cso); 60b8e80941Smrg util_blitter_save_sample_mask(sctx->blitter, sctx->sample_mask); 61b8e80941Smrg util_blitter_save_scissor(sctx->blitter, &sctx->scissors[0]); 62b8e80941Smrg util_blitter_save_window_rectangles(sctx->blitter, 63b8e80941Smrg sctx->window_rectangles_include, 64b8e80941Smrg sctx->num_window_rectangles, 65b8e80941Smrg sctx->window_rectangles); 66848b8605Smrg } 67848b8605Smrg 68848b8605Smrg if (op & SI_SAVE_FRAMEBUFFER) 69848b8605Smrg util_blitter_save_framebuffer(sctx->blitter, &sctx->framebuffer.state); 70848b8605Smrg 71848b8605Smrg if (op & SI_SAVE_TEXTURES) { 72848b8605Smrg util_blitter_save_fragment_sampler_states( 73848b8605Smrg sctx->blitter, 2, 74b8e80941Smrg (void**)sctx->samplers[PIPE_SHADER_FRAGMENT].sampler_states); 75848b8605Smrg 76848b8605Smrg util_blitter_save_fragment_sampler_views(sctx->blitter, 2, 77b8e80941Smrg sctx->samplers[PIPE_SHADER_FRAGMENT].views); 78848b8605Smrg } 79848b8605Smrg 80b8e80941Smrg if (op & SI_DISABLE_RENDER_COND) 81b8e80941Smrg sctx->render_cond_force_off = true; 82b8e80941Smrg 83b8e80941Smrg if (sctx->screen->dpbb_allowed) { 84b8e80941Smrg sctx->dpbb_force_off = true; 85b8e80941Smrg si_mark_atom_dirty(sctx, &sctx->atoms.s.dpbb_state); 86848b8605Smrg } 87848b8605Smrg} 88848b8605Smrg 89b8e80941Smrgvoid si_blitter_end(struct si_context *sctx) 90848b8605Smrg{ 91b8e80941Smrg if (sctx->screen->dpbb_allowed) { 92b8e80941Smrg sctx->dpbb_force_off = false; 93b8e80941Smrg si_mark_atom_dirty(sctx, &sctx->atoms.s.dpbb_state); 94b8e80941Smrg } 95b8e80941Smrg 96b8e80941Smrg sctx->render_cond_force_off = false; 97b8e80941Smrg 98b8e80941Smrg /* Restore shader pointers because the VS blit shader changed all 99b8e80941Smrg * non-global VS user SGPRs. */ 100b8e80941Smrg sctx->shader_pointers_dirty |= SI_DESCS_SHADER_MASK(VERTEX); 101b8e80941Smrg sctx->vertex_buffer_pointer_dirty = sctx->vb_descriptors_buffer != NULL; 102b8e80941Smrg si_mark_atom_dirty(sctx, &sctx->atoms.s.shader_pointers); 103848b8605Smrg} 104848b8605Smrg 105848b8605Smrgstatic unsigned u_max_sample(struct pipe_resource *r) 106848b8605Smrg{ 107848b8605Smrg return r->nr_samples ? r->nr_samples - 1 : 0; 108848b8605Smrg} 109848b8605Smrg 110b8e80941Smrgstatic unsigned 111b8e80941Smrgsi_blit_dbcb_copy(struct si_context *sctx, 112b8e80941Smrg struct si_texture *src, 113b8e80941Smrg struct si_texture *dst, 114b8e80941Smrg unsigned planes, unsigned level_mask, 115b8e80941Smrg unsigned first_layer, unsigned last_layer, 116b8e80941Smrg unsigned first_sample, unsigned last_sample) 117848b8605Smrg{ 118b8e80941Smrg struct pipe_surface surf_tmpl = {{0}}; 119b8e80941Smrg unsigned layer, sample, checked_last_layer, max_layer; 120b8e80941Smrg unsigned fully_copied_levels = 0; 121848b8605Smrg 122b8e80941Smrg if (planes & PIPE_MASK_Z) 123b8e80941Smrg sctx->dbcb_depth_copy_enabled = true; 124b8e80941Smrg if (planes & PIPE_MASK_S) 125b8e80941Smrg sctx->dbcb_stencil_copy_enabled = true; 126b8e80941Smrg si_mark_atom_dirty(sctx, &sctx->atoms.s.db_render_state); 127848b8605Smrg 128b8e80941Smrg assert(sctx->dbcb_depth_copy_enabled || sctx->dbcb_stencil_copy_enabled); 129848b8605Smrg 130b8e80941Smrg sctx->decompression_enabled = true; 131b8e80941Smrg 132b8e80941Smrg while (level_mask) { 133b8e80941Smrg unsigned level = u_bit_scan(&level_mask); 134848b8605Smrg 135848b8605Smrg /* The smaller the mipmap level, the less layers there are 136848b8605Smrg * as far as 3D textures are concerned. */ 137b8e80941Smrg max_layer = util_max_layer(&src->buffer.b.b, level); 138b8e80941Smrg checked_last_layer = MIN2(last_layer, max_layer); 139b8e80941Smrg 140b8e80941Smrg surf_tmpl.u.tex.level = level; 141848b8605Smrg 142848b8605Smrg for (layer = first_layer; layer <= checked_last_layer; layer++) { 143b8e80941Smrg struct pipe_surface *zsurf, *cbsurf; 144848b8605Smrg 145b8e80941Smrg surf_tmpl.format = src->buffer.b.b.format; 146b8e80941Smrg surf_tmpl.u.tex.first_layer = layer; 147b8e80941Smrg surf_tmpl.u.tex.last_layer = layer; 148848b8605Smrg 149b8e80941Smrg zsurf = sctx->b.create_surface(&sctx->b, &src->buffer.b.b, &surf_tmpl); 150848b8605Smrg 151b8e80941Smrg surf_tmpl.format = dst->buffer.b.b.format; 152b8e80941Smrg cbsurf = sctx->b.create_surface(&sctx->b, &dst->buffer.b.b, &surf_tmpl); 153848b8605Smrg 154b8e80941Smrg for (sample = first_sample; sample <= last_sample; sample++) { 155b8e80941Smrg if (sample != sctx->dbcb_copy_sample) { 156b8e80941Smrg sctx->dbcb_copy_sample = sample; 157b8e80941Smrg si_mark_atom_dirty(sctx, &sctx->atoms.s.db_render_state); 158b8e80941Smrg } 159848b8605Smrg 160b8e80941Smrg si_blitter_begin(sctx, SI_DECOMPRESS); 161b8e80941Smrg util_blitter_custom_depth_stencil(sctx->blitter, zsurf, cbsurf, 1 << sample, 162b8e80941Smrg sctx->custom_dsa_flush, 1.0f); 163b8e80941Smrg si_blitter_end(sctx); 164848b8605Smrg } 165848b8605Smrg 166b8e80941Smrg pipe_surface_reference(&zsurf, NULL); 167b8e80941Smrg pipe_surface_reference(&cbsurf, NULL); 168848b8605Smrg } 169b8e80941Smrg 170b8e80941Smrg if (first_layer == 0 && last_layer >= max_layer && 171b8e80941Smrg first_sample == 0 && last_sample >= u_max_sample(&src->buffer.b.b)) 172b8e80941Smrg fully_copied_levels |= 1u << level; 173848b8605Smrg } 174b8e80941Smrg 175b8e80941Smrg sctx->decompression_enabled = false; 176b8e80941Smrg sctx->dbcb_depth_copy_enabled = false; 177b8e80941Smrg sctx->dbcb_stencil_copy_enabled = false; 178b8e80941Smrg si_mark_atom_dirty(sctx, &sctx->atoms.s.db_render_state); 179b8e80941Smrg 180b8e80941Smrg return fully_copied_levels; 181b8e80941Smrg} 182b8e80941Smrg 183b8e80941Smrgvoid si_blit_decompress_depth(struct pipe_context *ctx, 184b8e80941Smrg struct si_texture *texture, 185b8e80941Smrg struct si_texture *staging, 186b8e80941Smrg unsigned first_level, unsigned last_level, 187b8e80941Smrg unsigned first_layer, unsigned last_layer, 188b8e80941Smrg unsigned first_sample, unsigned last_sample) 189b8e80941Smrg{ 190b8e80941Smrg const struct util_format_description *desc; 191b8e80941Smrg unsigned planes = 0; 192b8e80941Smrg 193b8e80941Smrg assert(staging != NULL && "use si_blit_decompress_zs_in_place instead"); 194b8e80941Smrg 195b8e80941Smrg desc = util_format_description(staging->buffer.b.b.format); 196b8e80941Smrg 197b8e80941Smrg if (util_format_has_depth(desc)) 198b8e80941Smrg planes |= PIPE_MASK_Z; 199b8e80941Smrg if (util_format_has_stencil(desc)) 200b8e80941Smrg planes |= PIPE_MASK_S; 201b8e80941Smrg 202b8e80941Smrg si_blit_dbcb_copy( 203b8e80941Smrg (struct si_context *)ctx, texture, staging, planes, 204b8e80941Smrg u_bit_consecutive(first_level, last_level - first_level + 1), 205b8e80941Smrg first_layer, last_layer, first_sample, last_sample); 206848b8605Smrg} 207848b8605Smrg 208b8e80941Smrg/* Helper function for si_blit_decompress_zs_in_place. 209b8e80941Smrg */ 210b8e80941Smrgstatic void 211b8e80941Smrgsi_blit_decompress_zs_planes_in_place(struct si_context *sctx, 212b8e80941Smrg struct si_texture *texture, 213b8e80941Smrg unsigned planes, unsigned level_mask, 214b8e80941Smrg unsigned first_layer, unsigned last_layer) 215848b8605Smrg{ 216848b8605Smrg struct pipe_surface *zsurf, surf_tmpl = {{0}}; 217b8e80941Smrg unsigned layer, max_layer, checked_last_layer; 218b8e80941Smrg unsigned fully_decompressed_mask = 0; 219848b8605Smrg 220b8e80941Smrg if (!level_mask) 221b8e80941Smrg return; 222848b8605Smrg 223b8e80941Smrg if (planes & PIPE_MASK_S) 224b8e80941Smrg sctx->db_flush_stencil_inplace = true; 225b8e80941Smrg if (planes & PIPE_MASK_Z) 226b8e80941Smrg sctx->db_flush_depth_inplace = true; 227b8e80941Smrg si_mark_atom_dirty(sctx, &sctx->atoms.s.db_render_state); 228b8e80941Smrg 229b8e80941Smrg surf_tmpl.format = texture->buffer.b.b.format; 230b8e80941Smrg 231b8e80941Smrg sctx->decompression_enabled = true; 232b8e80941Smrg 233b8e80941Smrg while (level_mask) { 234b8e80941Smrg unsigned level = u_bit_scan(&level_mask); 235848b8605Smrg 236848b8605Smrg surf_tmpl.u.tex.level = level; 237848b8605Smrg 238848b8605Smrg /* The smaller the mipmap level, the less layers there are 239848b8605Smrg * as far as 3D textures are concerned. */ 240b8e80941Smrg max_layer = util_max_layer(&texture->buffer.b.b, level); 241b8e80941Smrg checked_last_layer = MIN2(last_layer, max_layer); 242848b8605Smrg 243848b8605Smrg for (layer = first_layer; layer <= checked_last_layer; layer++) { 244848b8605Smrg surf_tmpl.u.tex.first_layer = layer; 245848b8605Smrg surf_tmpl.u.tex.last_layer = layer; 246848b8605Smrg 247b8e80941Smrg zsurf = sctx->b.create_surface(&sctx->b, &texture->buffer.b.b, &surf_tmpl); 248848b8605Smrg 249b8e80941Smrg si_blitter_begin(sctx, SI_DECOMPRESS); 250848b8605Smrg util_blitter_custom_depth_stencil(sctx->blitter, zsurf, NULL, ~0, 251b8e80941Smrg sctx->custom_dsa_flush, 252848b8605Smrg 1.0f); 253b8e80941Smrg si_blitter_end(sctx); 254848b8605Smrg 255848b8605Smrg pipe_surface_reference(&zsurf, NULL); 256848b8605Smrg } 257848b8605Smrg 258848b8605Smrg /* The texture will always be dirty if some layers aren't flushed. 259848b8605Smrg * I don't think this case occurs often though. */ 260b8e80941Smrg if (first_layer == 0 && last_layer >= max_layer) { 261b8e80941Smrg fully_decompressed_mask |= 1u << level; 262b8e80941Smrg } 263b8e80941Smrg } 264b8e80941Smrg 265b8e80941Smrg if (planes & PIPE_MASK_Z) 266b8e80941Smrg texture->dirty_level_mask &= ~fully_decompressed_mask; 267b8e80941Smrg if (planes & PIPE_MASK_S) 268b8e80941Smrg texture->stencil_dirty_level_mask &= ~fully_decompressed_mask; 269b8e80941Smrg 270b8e80941Smrg sctx->decompression_enabled = false; 271b8e80941Smrg sctx->db_flush_depth_inplace = false; 272b8e80941Smrg sctx->db_flush_stencil_inplace = false; 273b8e80941Smrg si_mark_atom_dirty(sctx, &sctx->atoms.s.db_render_state); 274b8e80941Smrg} 275b8e80941Smrg 276b8e80941Smrg/* Helper function of si_flush_depth_texture: decompress the given levels 277b8e80941Smrg * of Z and/or S planes in place. 278b8e80941Smrg */ 279b8e80941Smrgstatic void 280b8e80941Smrgsi_blit_decompress_zs_in_place(struct si_context *sctx, 281b8e80941Smrg struct si_texture *texture, 282b8e80941Smrg unsigned levels_z, unsigned levels_s, 283b8e80941Smrg unsigned first_layer, unsigned last_layer) 284b8e80941Smrg{ 285b8e80941Smrg unsigned both = levels_z & levels_s; 286b8e80941Smrg 287b8e80941Smrg /* First, do combined Z & S decompresses for levels that need it. */ 288b8e80941Smrg if (both) { 289b8e80941Smrg si_blit_decompress_zs_planes_in_place( 290b8e80941Smrg sctx, texture, PIPE_MASK_Z | PIPE_MASK_S, 291b8e80941Smrg both, 292b8e80941Smrg first_layer, last_layer); 293b8e80941Smrg levels_z &= ~both; 294b8e80941Smrg levels_s &= ~both; 295b8e80941Smrg } 296b8e80941Smrg 297b8e80941Smrg /* Now do separate Z and S decompresses. */ 298b8e80941Smrg if (levels_z) { 299b8e80941Smrg si_blit_decompress_zs_planes_in_place( 300b8e80941Smrg sctx, texture, PIPE_MASK_Z, 301b8e80941Smrg levels_z, 302b8e80941Smrg first_layer, last_layer); 303b8e80941Smrg } 304b8e80941Smrg 305b8e80941Smrg if (levels_s) { 306b8e80941Smrg si_blit_decompress_zs_planes_in_place( 307b8e80941Smrg sctx, texture, PIPE_MASK_S, 308b8e80941Smrg levels_s, 309b8e80941Smrg first_layer, last_layer); 310b8e80941Smrg } 311b8e80941Smrg} 312b8e80941Smrg 313b8e80941Smrgstatic void 314b8e80941Smrgsi_decompress_depth(struct si_context *sctx, 315b8e80941Smrg struct si_texture *tex, 316b8e80941Smrg unsigned required_planes, 317b8e80941Smrg unsigned first_level, unsigned last_level, 318b8e80941Smrg unsigned first_layer, unsigned last_layer) 319b8e80941Smrg{ 320b8e80941Smrg unsigned inplace_planes = 0; 321b8e80941Smrg unsigned copy_planes = 0; 322b8e80941Smrg unsigned level_mask = u_bit_consecutive(first_level, last_level - first_level + 1); 323b8e80941Smrg unsigned levels_z = 0; 324b8e80941Smrg unsigned levels_s = 0; 325b8e80941Smrg 326b8e80941Smrg if (required_planes & PIPE_MASK_Z) { 327b8e80941Smrg levels_z = level_mask & tex->dirty_level_mask; 328b8e80941Smrg 329b8e80941Smrg if (levels_z) { 330b8e80941Smrg if (si_can_sample_zs(tex, false)) 331b8e80941Smrg inplace_planes |= PIPE_MASK_Z; 332b8e80941Smrg else 333b8e80941Smrg copy_planes |= PIPE_MASK_Z; 334848b8605Smrg } 335848b8605Smrg } 336b8e80941Smrg if (required_planes & PIPE_MASK_S) { 337b8e80941Smrg levels_s = level_mask & tex->stencil_dirty_level_mask; 338b8e80941Smrg 339b8e80941Smrg if (levels_s) { 340b8e80941Smrg if (si_can_sample_zs(tex, true)) 341b8e80941Smrg inplace_planes |= PIPE_MASK_S; 342b8e80941Smrg else 343b8e80941Smrg copy_planes |= PIPE_MASK_S; 344b8e80941Smrg } 345b8e80941Smrg } 346b8e80941Smrg 347b8e80941Smrg if (unlikely(sctx->log)) 348b8e80941Smrg u_log_printf(sctx->log, 349b8e80941Smrg "\n------------------------------------------------\n" 350b8e80941Smrg "Decompress Depth (levels %u - %u, levels Z: 0x%x S: 0x%x)\n\n", 351b8e80941Smrg first_level, last_level, levels_z, levels_s); 352b8e80941Smrg 353b8e80941Smrg /* We may have to allocate the flushed texture here when called from 354b8e80941Smrg * si_decompress_subresource. 355b8e80941Smrg */ 356b8e80941Smrg if (copy_planes && 357b8e80941Smrg (tex->flushed_depth_texture || 358b8e80941Smrg si_init_flushed_depth_texture(&sctx->b, &tex->buffer.b.b, NULL))) { 359b8e80941Smrg struct si_texture *dst = tex->flushed_depth_texture; 360b8e80941Smrg unsigned fully_copied_levels; 361b8e80941Smrg unsigned levels = 0; 362b8e80941Smrg 363b8e80941Smrg assert(tex->flushed_depth_texture); 364b8e80941Smrg 365b8e80941Smrg if (util_format_is_depth_and_stencil(dst->buffer.b.b.format)) 366b8e80941Smrg copy_planes = PIPE_MASK_Z | PIPE_MASK_S; 367b8e80941Smrg 368b8e80941Smrg if (copy_planes & PIPE_MASK_Z) { 369b8e80941Smrg levels |= levels_z; 370b8e80941Smrg levels_z = 0; 371b8e80941Smrg } 372b8e80941Smrg if (copy_planes & PIPE_MASK_S) { 373b8e80941Smrg levels |= levels_s; 374b8e80941Smrg levels_s = 0; 375b8e80941Smrg } 376b8e80941Smrg 377b8e80941Smrg fully_copied_levels = si_blit_dbcb_copy( 378b8e80941Smrg sctx, tex, dst, copy_planes, levels, 379b8e80941Smrg first_layer, last_layer, 380b8e80941Smrg 0, u_max_sample(&tex->buffer.b.b)); 381b8e80941Smrg 382b8e80941Smrg if (copy_planes & PIPE_MASK_Z) 383b8e80941Smrg tex->dirty_level_mask &= ~fully_copied_levels; 384b8e80941Smrg if (copy_planes & PIPE_MASK_S) 385b8e80941Smrg tex->stencil_dirty_level_mask &= ~fully_copied_levels; 386b8e80941Smrg } 387b8e80941Smrg 388b8e80941Smrg if (inplace_planes) { 389b8e80941Smrg bool has_htile = si_htile_enabled(tex, first_level); 390b8e80941Smrg bool tc_compat_htile = vi_tc_compat_htile_enabled(tex, first_level); 391b8e80941Smrg 392b8e80941Smrg /* Don't decompress if there is no HTILE or when HTILE is 393b8e80941Smrg * TC-compatible. */ 394b8e80941Smrg if (has_htile && !tc_compat_htile) { 395b8e80941Smrg si_blit_decompress_zs_in_place( 396b8e80941Smrg sctx, tex, 397b8e80941Smrg levels_z, levels_s, 398b8e80941Smrg first_layer, last_layer); 399b8e80941Smrg } else { 400b8e80941Smrg /* This is only a cache flush. 401b8e80941Smrg * 402b8e80941Smrg * Only clear the mask that we are flushing, because 403b8e80941Smrg * si_make_DB_shader_coherent() treats different levels 404b8e80941Smrg * and depth and stencil differently. 405b8e80941Smrg */ 406b8e80941Smrg if (inplace_planes & PIPE_MASK_Z) 407b8e80941Smrg tex->dirty_level_mask &= ~levels_z; 408b8e80941Smrg if (inplace_planes & PIPE_MASK_S) 409b8e80941Smrg tex->stencil_dirty_level_mask &= ~levels_s; 410b8e80941Smrg } 411b8e80941Smrg 412b8e80941Smrg /* Only in-place decompression needs to flush DB caches, or 413b8e80941Smrg * when we don't decompress but TC-compatible planes are dirty. 414b8e80941Smrg */ 415b8e80941Smrg si_make_DB_shader_coherent(sctx, tex->buffer.b.b.nr_samples, 416b8e80941Smrg inplace_planes & PIPE_MASK_S, 417b8e80941Smrg tc_compat_htile); 418b8e80941Smrg } 419b8e80941Smrg /* set_framebuffer_state takes care of coherency for single-sample. 420b8e80941Smrg * The DB->CB copy uses CB for the final writes. 421b8e80941Smrg */ 422b8e80941Smrg if (copy_planes && tex->buffer.b.b.nr_samples > 1) 423b8e80941Smrg si_make_CB_shader_coherent(sctx, tex->buffer.b.b.nr_samples, 424b8e80941Smrg false, true /* no DCC */); 425848b8605Smrg} 426848b8605Smrg 427b8e80941Smrgstatic void 428b8e80941Smrgsi_decompress_sampler_depth_textures(struct si_context *sctx, 429b8e80941Smrg struct si_samplers *textures) 430848b8605Smrg{ 431848b8605Smrg unsigned i; 432b8e80941Smrg unsigned mask = textures->needs_depth_decompress_mask; 433848b8605Smrg 434848b8605Smrg while (mask) { 435848b8605Smrg struct pipe_sampler_view *view; 436b8e80941Smrg struct si_sampler_view *sview; 437b8e80941Smrg struct si_texture *tex; 438848b8605Smrg 439848b8605Smrg i = u_bit_scan(&mask); 440848b8605Smrg 441b8e80941Smrg view = textures->views[i]; 442848b8605Smrg assert(view); 443b8e80941Smrg sview = (struct si_sampler_view*)view; 444848b8605Smrg 445b8e80941Smrg tex = (struct si_texture *)view->texture; 446b8e80941Smrg assert(tex->db_compatible); 447848b8605Smrg 448b8e80941Smrg si_decompress_depth(sctx, tex, 449b8e80941Smrg sview->is_stencil_sampler ? PIPE_MASK_S : PIPE_MASK_Z, 450b8e80941Smrg view->u.tex.first_level, view->u.tex.last_level, 451b8e80941Smrg 0, util_max_layer(&tex->buffer.b.b, view->u.tex.first_level)); 452848b8605Smrg } 453848b8605Smrg} 454848b8605Smrg 455b8e80941Smrgstatic void si_blit_decompress_color(struct si_context *sctx, 456b8e80941Smrg struct si_texture *tex, 457b8e80941Smrg unsigned first_level, unsigned last_level, 458b8e80941Smrg unsigned first_layer, unsigned last_layer, 459b8e80941Smrg bool need_dcc_decompress) 460848b8605Smrg{ 461b8e80941Smrg void* custom_blend; 462b8e80941Smrg unsigned layer, checked_last_layer, max_layer; 463b8e80941Smrg unsigned level_mask = 464b8e80941Smrg u_bit_consecutive(first_level, last_level - first_level + 1); 465b8e80941Smrg 466b8e80941Smrg if (!need_dcc_decompress) 467b8e80941Smrg level_mask &= tex->dirty_level_mask; 468b8e80941Smrg if (!level_mask) 469848b8605Smrg return; 470848b8605Smrg 471b8e80941Smrg if (unlikely(sctx->log)) 472b8e80941Smrg u_log_printf(sctx->log, 473b8e80941Smrg "\n------------------------------------------------\n" 474b8e80941Smrg "Decompress Color (levels %u - %u, mask 0x%x)\n\n", 475b8e80941Smrg first_level, last_level, level_mask); 476b8e80941Smrg 477b8e80941Smrg if (need_dcc_decompress) { 478b8e80941Smrg custom_blend = sctx->custom_blend_dcc_decompress; 479b8e80941Smrg 480b8e80941Smrg assert(tex->dcc_offset); 481b8e80941Smrg 482b8e80941Smrg /* disable levels without DCC */ 483b8e80941Smrg for (int i = first_level; i <= last_level; i++) { 484b8e80941Smrg if (!vi_dcc_enabled(tex, i)) 485b8e80941Smrg level_mask &= ~(1 << i); 486b8e80941Smrg } 487b8e80941Smrg } else if (tex->surface.fmask_size) { 488b8e80941Smrg custom_blend = sctx->custom_blend_fmask_decompress; 489b8e80941Smrg } else { 490b8e80941Smrg custom_blend = sctx->custom_blend_eliminate_fastclear; 491b8e80941Smrg } 492b8e80941Smrg 493b8e80941Smrg sctx->decompression_enabled = true; 494b8e80941Smrg 495b8e80941Smrg while (level_mask) { 496b8e80941Smrg unsigned level = u_bit_scan(&level_mask); 497848b8605Smrg 498848b8605Smrg /* The smaller the mipmap level, the less layers there are 499848b8605Smrg * as far as 3D textures are concerned. */ 500b8e80941Smrg max_layer = util_max_layer(&tex->buffer.b.b, level); 501b8e80941Smrg checked_last_layer = MIN2(last_layer, max_layer); 502848b8605Smrg 503848b8605Smrg for (layer = first_layer; layer <= checked_last_layer; layer++) { 504848b8605Smrg struct pipe_surface *cbsurf, surf_tmpl; 505848b8605Smrg 506b8e80941Smrg surf_tmpl.format = tex->buffer.b.b.format; 507848b8605Smrg surf_tmpl.u.tex.level = level; 508848b8605Smrg surf_tmpl.u.tex.first_layer = layer; 509848b8605Smrg surf_tmpl.u.tex.last_layer = layer; 510b8e80941Smrg cbsurf = sctx->b.create_surface(&sctx->b, &tex->buffer.b.b, &surf_tmpl); 511b8e80941Smrg 512b8e80941Smrg /* Required before and after FMASK and DCC_DECOMPRESS. */ 513b8e80941Smrg if (custom_blend == sctx->custom_blend_fmask_decompress || 514b8e80941Smrg custom_blend == sctx->custom_blend_dcc_decompress) 515b8e80941Smrg sctx->flags |= SI_CONTEXT_FLUSH_AND_INV_CB; 516848b8605Smrg 517b8e80941Smrg si_blitter_begin(sctx, SI_DECOMPRESS); 518b8e80941Smrg util_blitter_custom_color(sctx->blitter, cbsurf, custom_blend); 519b8e80941Smrg si_blitter_end(sctx); 520b8e80941Smrg 521b8e80941Smrg if (custom_blend == sctx->custom_blend_fmask_decompress || 522b8e80941Smrg custom_blend == sctx->custom_blend_dcc_decompress) 523b8e80941Smrg sctx->flags |= SI_CONTEXT_FLUSH_AND_INV_CB; 524848b8605Smrg 525848b8605Smrg pipe_surface_reference(&cbsurf, NULL); 526848b8605Smrg } 527848b8605Smrg 528848b8605Smrg /* The texture will always be dirty if some layers aren't flushed. 529848b8605Smrg * I don't think this case occurs often though. */ 530b8e80941Smrg if (first_layer == 0 && last_layer >= max_layer) { 531b8e80941Smrg tex->dirty_level_mask &= ~(1 << level); 532848b8605Smrg } 533848b8605Smrg } 534b8e80941Smrg 535b8e80941Smrg sctx->decompression_enabled = false; 536b8e80941Smrg si_make_CB_shader_coherent(sctx, tex->buffer.b.b.nr_samples, 537b8e80941Smrg vi_dcc_enabled(tex, first_level), 538b8e80941Smrg tex->surface.u.gfx9.dcc.pipe_aligned); 539848b8605Smrg} 540848b8605Smrg 541b8e80941Smrgstatic void 542b8e80941Smrgsi_decompress_color_texture(struct si_context *sctx, struct si_texture *tex, 543b8e80941Smrg unsigned first_level, unsigned last_level) 544b8e80941Smrg{ 545b8e80941Smrg /* CMASK or DCC can be discarded and we can still end up here. */ 546b8e80941Smrg if (!tex->cmask_buffer && !tex->surface.fmask_size && !tex->dcc_offset) 547b8e80941Smrg return; 548b8e80941Smrg 549b8e80941Smrg si_blit_decompress_color(sctx, tex, first_level, last_level, 0, 550b8e80941Smrg util_max_layer(&tex->buffer.b.b, first_level), 551b8e80941Smrg false); 552b8e80941Smrg} 553b8e80941Smrg 554b8e80941Smrgstatic void 555b8e80941Smrgsi_decompress_sampler_color_textures(struct si_context *sctx, 556b8e80941Smrg struct si_samplers *textures) 557848b8605Smrg{ 558848b8605Smrg unsigned i; 559b8e80941Smrg unsigned mask = textures->needs_color_decompress_mask; 560848b8605Smrg 561848b8605Smrg while (mask) { 562848b8605Smrg struct pipe_sampler_view *view; 563b8e80941Smrg struct si_texture *tex; 564848b8605Smrg 565848b8605Smrg i = u_bit_scan(&mask); 566848b8605Smrg 567b8e80941Smrg view = textures->views[i]; 568848b8605Smrg assert(view); 569848b8605Smrg 570b8e80941Smrg tex = (struct si_texture *)view->texture; 571848b8605Smrg 572b8e80941Smrg si_decompress_color_texture(sctx, tex, view->u.tex.first_level, 573b8e80941Smrg view->u.tex.last_level); 574848b8605Smrg } 575848b8605Smrg} 576848b8605Smrg 577b8e80941Smrgstatic void 578b8e80941Smrgsi_decompress_image_color_textures(struct si_context *sctx, 579b8e80941Smrg struct si_images *images) 580848b8605Smrg{ 581b8e80941Smrg unsigned i; 582b8e80941Smrg unsigned mask = images->needs_color_decompress_mask; 583b8e80941Smrg 584b8e80941Smrg while (mask) { 585b8e80941Smrg const struct pipe_image_view *view; 586b8e80941Smrg struct si_texture *tex; 587b8e80941Smrg 588b8e80941Smrg i = u_bit_scan(&mask); 589848b8605Smrg 590b8e80941Smrg view = &images->views[i]; 591b8e80941Smrg assert(view->resource->target != PIPE_BUFFER); 592b8e80941Smrg 593b8e80941Smrg tex = (struct si_texture *)view->resource; 594b8e80941Smrg 595b8e80941Smrg si_decompress_color_texture(sctx, tex, view->u.tex.level, 596b8e80941Smrg view->u.tex.level); 597848b8605Smrg } 598b8e80941Smrg} 599848b8605Smrg 600b8e80941Smrgstatic void si_check_render_feedback_texture(struct si_context *sctx, 601b8e80941Smrg struct si_texture *tex, 602b8e80941Smrg unsigned first_level, 603b8e80941Smrg unsigned last_level, 604b8e80941Smrg unsigned first_layer, 605b8e80941Smrg unsigned last_layer) 606b8e80941Smrg{ 607b8e80941Smrg bool render_feedback = false; 608848b8605Smrg 609b8e80941Smrg if (!tex->dcc_offset) 610b8e80941Smrg return; 611848b8605Smrg 612b8e80941Smrg for (unsigned j = 0; j < sctx->framebuffer.state.nr_cbufs; ++j) { 613b8e80941Smrg struct si_surface * surf; 614848b8605Smrg 615b8e80941Smrg if (!sctx->framebuffer.state.cbufs[j]) 616b8e80941Smrg continue; 617b8e80941Smrg 618b8e80941Smrg surf = (struct si_surface*)sctx->framebuffer.state.cbufs[j]; 619848b8605Smrg 620b8e80941Smrg if (tex == (struct si_texture *)surf->base.texture && 621b8e80941Smrg surf->base.u.tex.level >= first_level && 622b8e80941Smrg surf->base.u.tex.level <= last_level && 623b8e80941Smrg surf->base.u.tex.first_layer <= last_layer && 624b8e80941Smrg surf->base.u.tex.last_layer >= first_layer) { 625b8e80941Smrg render_feedback = true; 626b8e80941Smrg break; 627848b8605Smrg } 628848b8605Smrg } 629848b8605Smrg 630b8e80941Smrg if (render_feedback) 631b8e80941Smrg si_texture_disable_dcc(sctx, tex); 632848b8605Smrg} 633848b8605Smrg 634b8e80941Smrgstatic void si_check_render_feedback_textures(struct si_context *sctx, 635b8e80941Smrg struct si_samplers *textures) 636848b8605Smrg{ 637b8e80941Smrg uint32_t mask = textures->enabled_mask; 638848b8605Smrg 639b8e80941Smrg while (mask) { 640b8e80941Smrg const struct pipe_sampler_view *view; 641b8e80941Smrg struct si_texture *tex; 642b8e80941Smrg 643b8e80941Smrg unsigned i = u_bit_scan(&mask); 644b8e80941Smrg 645b8e80941Smrg view = textures->views[i]; 646b8e80941Smrg if(view->texture->target == PIPE_BUFFER) 647b8e80941Smrg continue; 648b8e80941Smrg 649b8e80941Smrg tex = (struct si_texture *)view->texture; 650b8e80941Smrg 651b8e80941Smrg si_check_render_feedback_texture(sctx, tex, 652b8e80941Smrg view->u.tex.first_level, 653b8e80941Smrg view->u.tex.last_level, 654b8e80941Smrg view->u.tex.first_layer, 655b8e80941Smrg view->u.tex.last_layer); 656b8e80941Smrg } 657848b8605Smrg} 658848b8605Smrg 659b8e80941Smrgstatic void si_check_render_feedback_images(struct si_context *sctx, 660b8e80941Smrg struct si_images *images) 661848b8605Smrg{ 662b8e80941Smrg uint32_t mask = images->enabled_mask; 663b8e80941Smrg 664b8e80941Smrg while (mask) { 665b8e80941Smrg const struct pipe_image_view *view; 666b8e80941Smrg struct si_texture *tex; 667b8e80941Smrg 668b8e80941Smrg unsigned i = u_bit_scan(&mask); 669b8e80941Smrg 670b8e80941Smrg view = &images->views[i]; 671b8e80941Smrg if (view->resource->target == PIPE_BUFFER) 672b8e80941Smrg continue; 673b8e80941Smrg 674b8e80941Smrg tex = (struct si_texture *)view->resource; 675b8e80941Smrg 676b8e80941Smrg si_check_render_feedback_texture(sctx, tex, 677b8e80941Smrg view->u.tex.level, 678b8e80941Smrg view->u.tex.level, 679b8e80941Smrg view->u.tex.first_layer, 680b8e80941Smrg view->u.tex.last_layer); 681b8e80941Smrg } 682b8e80941Smrg} 683b8e80941Smrg 684b8e80941Smrgstatic void si_check_render_feedback_resident_textures(struct si_context *sctx) 685b8e80941Smrg{ 686b8e80941Smrg util_dynarray_foreach(&sctx->resident_tex_handles, 687b8e80941Smrg struct si_texture_handle *, tex_handle) { 688b8e80941Smrg struct pipe_sampler_view *view; 689b8e80941Smrg struct si_texture *tex; 690b8e80941Smrg 691b8e80941Smrg view = (*tex_handle)->view; 692b8e80941Smrg if (view->texture->target == PIPE_BUFFER) 693b8e80941Smrg continue; 694b8e80941Smrg 695b8e80941Smrg tex = (struct si_texture *)view->texture; 696b8e80941Smrg 697b8e80941Smrg si_check_render_feedback_texture(sctx, tex, 698b8e80941Smrg view->u.tex.first_level, 699b8e80941Smrg view->u.tex.last_level, 700b8e80941Smrg view->u.tex.first_layer, 701b8e80941Smrg view->u.tex.last_layer); 702b8e80941Smrg } 703b8e80941Smrg} 704b8e80941Smrg 705b8e80941Smrgstatic void si_check_render_feedback_resident_images(struct si_context *sctx) 706b8e80941Smrg{ 707b8e80941Smrg util_dynarray_foreach(&sctx->resident_img_handles, 708b8e80941Smrg struct si_image_handle *, img_handle) { 709b8e80941Smrg struct pipe_image_view *view; 710b8e80941Smrg struct si_texture *tex; 711b8e80941Smrg 712b8e80941Smrg view = &(*img_handle)->view; 713b8e80941Smrg if (view->resource->target == PIPE_BUFFER) 714b8e80941Smrg continue; 715b8e80941Smrg 716b8e80941Smrg tex = (struct si_texture *)view->resource; 717b8e80941Smrg 718b8e80941Smrg si_check_render_feedback_texture(sctx, tex, 719b8e80941Smrg view->u.tex.level, 720b8e80941Smrg view->u.tex.level, 721b8e80941Smrg view->u.tex.first_layer, 722b8e80941Smrg view->u.tex.last_layer); 723b8e80941Smrg } 724b8e80941Smrg} 725b8e80941Smrg 726b8e80941Smrgstatic void si_check_render_feedback(struct si_context *sctx) 727b8e80941Smrg{ 728b8e80941Smrg if (!sctx->need_check_render_feedback) 729b8e80941Smrg return; 730b8e80941Smrg 731b8e80941Smrg /* There is no render feedback if color writes are disabled. 732b8e80941Smrg * (e.g. a pixel shader with image stores) 733b8e80941Smrg */ 734b8e80941Smrg if (!si_get_total_colormask(sctx)) 735b8e80941Smrg return; 736b8e80941Smrg 737b8e80941Smrg for (int i = 0; i < SI_NUM_SHADERS; ++i) { 738b8e80941Smrg si_check_render_feedback_images(sctx, &sctx->images[i]); 739b8e80941Smrg si_check_render_feedback_textures(sctx, &sctx->samplers[i]); 740b8e80941Smrg } 741b8e80941Smrg 742b8e80941Smrg si_check_render_feedback_resident_images(sctx); 743b8e80941Smrg si_check_render_feedback_resident_textures(sctx); 744b8e80941Smrg 745b8e80941Smrg sctx->need_check_render_feedback = false; 746b8e80941Smrg} 747848b8605Smrg 748b8e80941Smrgstatic void si_decompress_resident_textures(struct si_context *sctx) 749b8e80941Smrg{ 750b8e80941Smrg util_dynarray_foreach(&sctx->resident_tex_needs_color_decompress, 751b8e80941Smrg struct si_texture_handle *, tex_handle) { 752b8e80941Smrg struct pipe_sampler_view *view = (*tex_handle)->view; 753b8e80941Smrg struct si_texture *tex = (struct si_texture *)view->texture; 754b8e80941Smrg 755b8e80941Smrg si_decompress_color_texture(sctx, tex, view->u.tex.first_level, 756b8e80941Smrg view->u.tex.last_level); 757b8e80941Smrg } 758b8e80941Smrg 759b8e80941Smrg util_dynarray_foreach(&sctx->resident_tex_needs_depth_decompress, 760b8e80941Smrg struct si_texture_handle *, tex_handle) { 761b8e80941Smrg struct pipe_sampler_view *view = (*tex_handle)->view; 762b8e80941Smrg struct si_sampler_view *sview = (struct si_sampler_view *)view; 763b8e80941Smrg struct si_texture *tex = (struct si_texture *)view->texture; 764b8e80941Smrg 765b8e80941Smrg si_decompress_depth(sctx, tex, 766b8e80941Smrg sview->is_stencil_sampler ? PIPE_MASK_S : PIPE_MASK_Z, 767b8e80941Smrg view->u.tex.first_level, view->u.tex.last_level, 768b8e80941Smrg 0, util_max_layer(&tex->buffer.b.b, view->u.tex.first_level)); 769b8e80941Smrg } 770b8e80941Smrg} 771b8e80941Smrg 772b8e80941Smrgstatic void si_decompress_resident_images(struct si_context *sctx) 773b8e80941Smrg{ 774b8e80941Smrg util_dynarray_foreach(&sctx->resident_img_needs_color_decompress, 775b8e80941Smrg struct si_image_handle *, img_handle) { 776b8e80941Smrg struct pipe_image_view *view = &(*img_handle)->view; 777b8e80941Smrg struct si_texture *tex = (struct si_texture *)view->resource; 778b8e80941Smrg 779b8e80941Smrg si_decompress_color_texture(sctx, tex, view->u.tex.level, 780b8e80941Smrg view->u.tex.level); 781b8e80941Smrg } 782b8e80941Smrg} 783b8e80941Smrg 784b8e80941Smrgvoid si_decompress_textures(struct si_context *sctx, unsigned shader_mask) 785b8e80941Smrg{ 786b8e80941Smrg unsigned compressed_colortex_counter, mask; 787b8e80941Smrg 788b8e80941Smrg if (sctx->blitter->running) 789b8e80941Smrg return; 790b8e80941Smrg 791b8e80941Smrg /* Update the compressed_colortex_mask if necessary. */ 792b8e80941Smrg compressed_colortex_counter = p_atomic_read(&sctx->screen->compressed_colortex_counter); 793b8e80941Smrg if (compressed_colortex_counter != sctx->last_compressed_colortex_counter) { 794b8e80941Smrg sctx->last_compressed_colortex_counter = compressed_colortex_counter; 795b8e80941Smrg si_update_needs_color_decompress_masks(sctx); 796b8e80941Smrg } 797b8e80941Smrg 798b8e80941Smrg /* Decompress color & depth textures if needed. */ 799b8e80941Smrg mask = sctx->shader_needs_decompress_mask & shader_mask; 800b8e80941Smrg while (mask) { 801b8e80941Smrg unsigned i = u_bit_scan(&mask); 802b8e80941Smrg 803b8e80941Smrg if (sctx->samplers[i].needs_depth_decompress_mask) { 804b8e80941Smrg si_decompress_sampler_depth_textures(sctx, &sctx->samplers[i]); 805b8e80941Smrg } 806b8e80941Smrg if (sctx->samplers[i].needs_color_decompress_mask) { 807b8e80941Smrg si_decompress_sampler_color_textures(sctx, &sctx->samplers[i]); 808b8e80941Smrg } 809b8e80941Smrg if (sctx->images[i].needs_color_decompress_mask) { 810b8e80941Smrg si_decompress_image_color_textures(sctx, &sctx->images[i]); 811b8e80941Smrg } 812b8e80941Smrg } 813b8e80941Smrg 814b8e80941Smrg if (shader_mask & u_bit_consecutive(0, SI_NUM_GRAPHICS_SHADERS)) { 815b8e80941Smrg if (sctx->uses_bindless_samplers) 816b8e80941Smrg si_decompress_resident_textures(sctx); 817b8e80941Smrg if (sctx->uses_bindless_images) 818b8e80941Smrg si_decompress_resident_images(sctx); 819b8e80941Smrg 820b8e80941Smrg if (sctx->ps_uses_fbfetch) { 821b8e80941Smrg struct pipe_surface *cb0 = sctx->framebuffer.state.cbufs[0]; 822b8e80941Smrg si_decompress_color_texture(sctx, 823b8e80941Smrg (struct si_texture*)cb0->texture, 824b8e80941Smrg cb0->u.tex.first_layer, 825b8e80941Smrg cb0->u.tex.last_layer); 826b8e80941Smrg } 827b8e80941Smrg 828b8e80941Smrg si_check_render_feedback(sctx); 829b8e80941Smrg } else if (shader_mask & (1 << PIPE_SHADER_COMPUTE)) { 830b8e80941Smrg if (sctx->cs_shader_state.program->uses_bindless_samplers) 831b8e80941Smrg si_decompress_resident_textures(sctx); 832b8e80941Smrg if (sctx->cs_shader_state.program->uses_bindless_images) 833b8e80941Smrg si_decompress_resident_images(sctx); 834b8e80941Smrg } 835848b8605Smrg} 836848b8605Smrg 837848b8605Smrg/* Helper for decompressing a portion of a color or depth resource before 838848b8605Smrg * blitting if any decompression is needed. 839848b8605Smrg * The driver doesn't decompress resources automatically while u_blitter is 840848b8605Smrg * rendering. */ 841848b8605Smrgstatic void si_decompress_subresource(struct pipe_context *ctx, 842848b8605Smrg struct pipe_resource *tex, 843b8e80941Smrg unsigned planes, unsigned level, 844848b8605Smrg unsigned first_layer, unsigned last_layer) 845848b8605Smrg{ 846848b8605Smrg struct si_context *sctx = (struct si_context *)ctx; 847b8e80941Smrg struct si_texture *stex = (struct si_texture*)tex; 848b8e80941Smrg 849b8e80941Smrg if (stex->db_compatible) { 850b8e80941Smrg planes &= PIPE_MASK_Z | PIPE_MASK_S; 851b8e80941Smrg 852b8e80941Smrg if (!stex->surface.has_stencil) 853b8e80941Smrg planes &= ~PIPE_MASK_S; 854b8e80941Smrg 855b8e80941Smrg /* If we've rendered into the framebuffer and it's a blitting 856b8e80941Smrg * source, make sure the decompression pass is invoked 857b8e80941Smrg * by dirtying the framebuffer. 858b8e80941Smrg */ 859b8e80941Smrg if (sctx->framebuffer.state.zsbuf && 860b8e80941Smrg sctx->framebuffer.state.zsbuf->u.tex.level == level && 861b8e80941Smrg sctx->framebuffer.state.zsbuf->texture == tex) 862b8e80941Smrg si_update_fb_dirtiness_after_rendering(sctx); 863b8e80941Smrg 864b8e80941Smrg si_decompress_depth(sctx, stex, planes, 865b8e80941Smrg level, level, 866b8e80941Smrg first_layer, last_layer); 867b8e80941Smrg } else if (stex->surface.fmask_size || stex->cmask_buffer || stex->dcc_offset) { 868b8e80941Smrg /* If we've rendered into the framebuffer and it's a blitting 869b8e80941Smrg * source, make sure the decompression pass is invoked 870b8e80941Smrg * by dirtying the framebuffer. 871b8e80941Smrg */ 872b8e80941Smrg for (unsigned i = 0; i < sctx->framebuffer.state.nr_cbufs; i++) { 873b8e80941Smrg if (sctx->framebuffer.state.cbufs[i] && 874b8e80941Smrg sctx->framebuffer.state.cbufs[i]->u.tex.level == level && 875b8e80941Smrg sctx->framebuffer.state.cbufs[i]->texture == tex) { 876b8e80941Smrg si_update_fb_dirtiness_after_rendering(sctx); 877b8e80941Smrg break; 878b8e80941Smrg } 879b8e80941Smrg } 880848b8605Smrg 881b8e80941Smrg si_blit_decompress_color(sctx, stex, level, level, 882b8e80941Smrg first_layer, last_layer, false); 883848b8605Smrg } 884848b8605Smrg} 885848b8605Smrg 886848b8605Smrgstruct texture_orig_info { 887848b8605Smrg unsigned format; 888848b8605Smrg unsigned width0; 889848b8605Smrg unsigned height0; 890848b8605Smrg unsigned npix_x; 891848b8605Smrg unsigned npix_y; 892848b8605Smrg unsigned npix0_x; 893848b8605Smrg unsigned npix0_y; 894848b8605Smrg}; 895848b8605Smrg 896b8e80941Smrgvoid si_resource_copy_region(struct pipe_context *ctx, 897b8e80941Smrg struct pipe_resource *dst, 898b8e80941Smrg unsigned dst_level, 899b8e80941Smrg unsigned dstx, unsigned dsty, unsigned dstz, 900b8e80941Smrg struct pipe_resource *src, 901b8e80941Smrg unsigned src_level, 902b8e80941Smrg const struct pipe_box *src_box) 903848b8605Smrg{ 904848b8605Smrg struct si_context *sctx = (struct si_context *)ctx; 905b8e80941Smrg struct si_texture *ssrc = (struct si_texture*)src; 906b8e80941Smrg struct si_texture *sdst = (struct si_texture*)dst; 907848b8605Smrg struct pipe_surface *dst_view, dst_templ; 908848b8605Smrg struct pipe_sampler_view src_templ, *src_view; 909b8e80941Smrg unsigned dst_width, dst_height, src_width0, src_height0; 910b8e80941Smrg unsigned dst_width0, dst_height0, src_force_level = 0; 911848b8605Smrg struct pipe_box sbox, dstbox; 912848b8605Smrg 913b8e80941Smrg /* Handle buffers first. */ 914848b8605Smrg if (dst->target == PIPE_BUFFER && src->target == PIPE_BUFFER) { 915848b8605Smrg si_copy_buffer(sctx, dst, src, dstx, src_box->x, src_box->width); 916848b8605Smrg return; 917848b8605Smrg } 918848b8605Smrg 919b8e80941Smrg if (!util_format_is_compressed(src->format) && 920b8e80941Smrg !util_format_is_compressed(dst->format) && 921b8e80941Smrg !util_format_is_depth_or_stencil(src->format) && 922b8e80941Smrg src->nr_samples <= 1 && 923b8e80941Smrg !sdst->dcc_offset && 924b8e80941Smrg !(dst->target != src->target && 925b8e80941Smrg (src->target == PIPE_TEXTURE_1D_ARRAY || dst->target == PIPE_TEXTURE_1D_ARRAY))) { 926b8e80941Smrg si_compute_copy_image(sctx, dst, dst_level, src, src_level, dstx, dsty, dstz, src_box); 927b8e80941Smrg return; 928b8e80941Smrg } 929b8e80941Smrg 930b8e80941Smrg assert(u_max_sample(dst) == u_max_sample(src)); 931848b8605Smrg 932848b8605Smrg /* The driver doesn't decompress resources automatically while 933848b8605Smrg * u_blitter is rendering. */ 934b8e80941Smrg si_decompress_subresource(ctx, src, PIPE_MASK_RGBAZS, src_level, 935848b8605Smrg src_box->z, src_box->z + src_box->depth - 1); 936848b8605Smrg 937b8e80941Smrg dst_width = u_minify(dst->width0, dst_level); 938b8e80941Smrg dst_height = u_minify(dst->height0, dst_level); 939b8e80941Smrg dst_width0 = dst->width0; 940b8e80941Smrg dst_height0 = dst->height0; 941b8e80941Smrg src_width0 = src->width0; 942b8e80941Smrg src_height0 = src->height0; 943b8e80941Smrg 944b8e80941Smrg util_blitter_default_dst_texture(&dst_templ, dst, dst_level, dstz); 945b8e80941Smrg util_blitter_default_src_texture(sctx->blitter, &src_templ, src, src_level); 946848b8605Smrg 947b8e80941Smrg if (util_format_is_compressed(src->format) || 948848b8605Smrg util_format_is_compressed(dst->format)) { 949b8e80941Smrg unsigned blocksize = ssrc->surface.bpe; 950b8e80941Smrg 951b8e80941Smrg if (blocksize == 8) 952b8e80941Smrg src_templ.format = PIPE_FORMAT_R16G16B16A16_UINT; /* 64-bit block */ 953b8e80941Smrg else 954b8e80941Smrg src_templ.format = PIPE_FORMAT_R32G32B32A32_UINT; /* 128-bit block */ 955b8e80941Smrg dst_templ.format = src_templ.format; 956b8e80941Smrg 957b8e80941Smrg dst_width = util_format_get_nblocksx(dst->format, dst_width); 958b8e80941Smrg dst_height = util_format_get_nblocksy(dst->format, dst_height); 959b8e80941Smrg dst_width0 = util_format_get_nblocksx(dst->format, dst_width0); 960b8e80941Smrg dst_height0 = util_format_get_nblocksy(dst->format, dst_height0); 961b8e80941Smrg src_width0 = util_format_get_nblocksx(src->format, src_width0); 962b8e80941Smrg src_height0 = util_format_get_nblocksy(src->format, src_height0); 963b8e80941Smrg 964b8e80941Smrg dstx = util_format_get_nblocksx(dst->format, dstx); 965b8e80941Smrg dsty = util_format_get_nblocksy(dst->format, dsty); 966b8e80941Smrg 967b8e80941Smrg sbox.x = util_format_get_nblocksx(src->format, src_box->x); 968b8e80941Smrg sbox.y = util_format_get_nblocksy(src->format, src_box->y); 969848b8605Smrg sbox.z = src_box->z; 970b8e80941Smrg sbox.width = util_format_get_nblocksx(src->format, src_box->width); 971b8e80941Smrg sbox.height = util_format_get_nblocksy(src->format, src_box->height); 972848b8605Smrg sbox.depth = src_box->depth; 973848b8605Smrg src_box = &sbox; 974848b8605Smrg 975b8e80941Smrg src_force_level = src_level; 976848b8605Smrg } else if (!util_blitter_is_copy_supported(sctx->blitter, dst, src)) { 977848b8605Smrg if (util_format_is_subsampled_422(src->format)) { 978b8e80941Smrg src_templ.format = PIPE_FORMAT_R8G8B8A8_UINT; 979b8e80941Smrg dst_templ.format = PIPE_FORMAT_R8G8B8A8_UINT; 980b8e80941Smrg 981b8e80941Smrg dst_width = util_format_get_nblocksx(dst->format, dst_width); 982b8e80941Smrg dst_width0 = util_format_get_nblocksx(dst->format, dst_width0); 983b8e80941Smrg src_width0 = util_format_get_nblocksx(src->format, src_width0); 984b8e80941Smrg 985b8e80941Smrg dstx = util_format_get_nblocksx(dst->format, dstx); 986848b8605Smrg 987848b8605Smrg sbox = *src_box; 988b8e80941Smrg sbox.x = util_format_get_nblocksx(src->format, src_box->x); 989b8e80941Smrg sbox.width = util_format_get_nblocksx(src->format, src_box->width); 990848b8605Smrg src_box = &sbox; 991848b8605Smrg } else { 992b8e80941Smrg unsigned blocksize = ssrc->surface.bpe; 993848b8605Smrg 994848b8605Smrg switch (blocksize) { 995848b8605Smrg case 1: 996b8e80941Smrg dst_templ.format = PIPE_FORMAT_R8_UNORM; 997b8e80941Smrg src_templ.format = PIPE_FORMAT_R8_UNORM; 998848b8605Smrg break; 999848b8605Smrg case 2: 1000b8e80941Smrg dst_templ.format = PIPE_FORMAT_R8G8_UNORM; 1001b8e80941Smrg src_templ.format = PIPE_FORMAT_R8G8_UNORM; 1002848b8605Smrg break; 1003848b8605Smrg case 4: 1004b8e80941Smrg dst_templ.format = PIPE_FORMAT_R8G8B8A8_UNORM; 1005b8e80941Smrg src_templ.format = PIPE_FORMAT_R8G8B8A8_UNORM; 1006848b8605Smrg break; 1007848b8605Smrg case 8: 1008b8e80941Smrg dst_templ.format = PIPE_FORMAT_R16G16B16A16_UINT; 1009b8e80941Smrg src_templ.format = PIPE_FORMAT_R16G16B16A16_UINT; 1010848b8605Smrg break; 1011848b8605Smrg case 16: 1012b8e80941Smrg dst_templ.format = PIPE_FORMAT_R32G32B32A32_UINT; 1013b8e80941Smrg src_templ.format = PIPE_FORMAT_R32G32B32A32_UINT; 1014848b8605Smrg break; 1015848b8605Smrg default: 1016848b8605Smrg fprintf(stderr, "Unhandled format %s with blocksize %u\n", 1017848b8605Smrg util_format_short_name(src->format), blocksize); 1018848b8605Smrg assert(0); 1019848b8605Smrg } 1020848b8605Smrg } 1021848b8605Smrg } 1022848b8605Smrg 1023b8e80941Smrg /* SNORM8 blitting has precision issues on some chips. Use the SINT 1024b8e80941Smrg * equivalent instead, which doesn't force DCC decompression. 1025b8e80941Smrg * Note that some chips avoid this issue by using SDMA. 1026b8e80941Smrg */ 1027b8e80941Smrg if (util_format_is_snorm8(dst_templ.format)) { 1028b8e80941Smrg dst_templ.format = src_templ.format = 1029b8e80941Smrg util_format_snorm8_to_sint8(dst_templ.format); 1030b8e80941Smrg } 1031b8e80941Smrg 1032b8e80941Smrg vi_disable_dcc_if_incompatible_format(sctx, dst, dst_level, 1033b8e80941Smrg dst_templ.format); 1034b8e80941Smrg vi_disable_dcc_if_incompatible_format(sctx, src, src_level, 1035b8e80941Smrg src_templ.format); 1036b8e80941Smrg 1037848b8605Smrg /* Initialize the surface. */ 1038b8e80941Smrg dst_view = si_create_surface_custom(ctx, dst, &dst_templ, 1039b8e80941Smrg dst_width0, dst_height0, 1040b8e80941Smrg dst_width, dst_height); 1041848b8605Smrg 1042848b8605Smrg /* Initialize the sampler view. */ 1043b8e80941Smrg src_view = si_create_sampler_view_custom(ctx, src, &src_templ, 1044b8e80941Smrg src_width0, src_height0, 1045b8e80941Smrg src_force_level); 1046848b8605Smrg 1047848b8605Smrg u_box_3d(dstx, dsty, dstz, abs(src_box->width), abs(src_box->height), 1048848b8605Smrg abs(src_box->depth), &dstbox); 1049848b8605Smrg 1050848b8605Smrg /* Copy. */ 1051b8e80941Smrg si_blitter_begin(sctx, SI_COPY); 1052848b8605Smrg util_blitter_blit_generic(sctx->blitter, dst_view, &dstbox, 1053b8e80941Smrg src_view, src_box, src_width0, src_height0, 1054b8e80941Smrg PIPE_MASK_RGBAZS, PIPE_TEX_FILTER_NEAREST, NULL, 1055b8e80941Smrg false); 1056b8e80941Smrg si_blitter_end(sctx); 1057848b8605Smrg 1058848b8605Smrg pipe_surface_reference(&dst_view, NULL); 1059848b8605Smrg pipe_sampler_view_reference(&src_view, NULL); 1060848b8605Smrg} 1061848b8605Smrg 1062b8e80941Smrgstatic void si_do_CB_resolve(struct si_context *sctx, 1063b8e80941Smrg const struct pipe_blit_info *info, 1064b8e80941Smrg struct pipe_resource *dst, 1065b8e80941Smrg unsigned dst_level, unsigned dst_z, 1066b8e80941Smrg enum pipe_format format) 1067848b8605Smrg{ 1068b8e80941Smrg /* Required before and after CB_RESOLVE. */ 1069b8e80941Smrg sctx->flags |= SI_CONTEXT_FLUSH_AND_INV_CB; 1070b8e80941Smrg 1071b8e80941Smrg si_blitter_begin(sctx, SI_COLOR_RESOLVE | 1072b8e80941Smrg (info->render_condition_enable ? 0 : SI_DISABLE_RENDER_COND)); 1073b8e80941Smrg util_blitter_custom_resolve_color(sctx->blitter, dst, dst_level, dst_z, 1074b8e80941Smrg info->src.resource, info->src.box.z, 1075b8e80941Smrg ~0, sctx->custom_blend_resolve, 1076b8e80941Smrg format); 1077b8e80941Smrg si_blitter_end(sctx); 1078b8e80941Smrg 1079b8e80941Smrg /* Flush caches for possible texturing. */ 1080b8e80941Smrg si_make_CB_shader_coherent(sctx, 1, false, true /* no DCC */); 1081848b8605Smrg} 1082848b8605Smrg 1083848b8605Smrgstatic bool do_hardware_msaa_resolve(struct pipe_context *ctx, 1084848b8605Smrg const struct pipe_blit_info *info) 1085848b8605Smrg{ 1086848b8605Smrg struct si_context *sctx = (struct si_context*)ctx; 1087b8e80941Smrg struct si_texture *src = (struct si_texture*)info->src.resource; 1088b8e80941Smrg struct si_texture *dst = (struct si_texture*)info->dst.resource; 1089b8e80941Smrg MAYBE_UNUSED struct si_texture *stmp; 1090848b8605Smrg unsigned dst_width = u_minify(info->dst.resource->width0, info->dst.level); 1091848b8605Smrg unsigned dst_height = u_minify(info->dst.resource->height0, info->dst.level); 1092b8e80941Smrg enum pipe_format format = info->src.format; 1093b8e80941Smrg struct pipe_resource *tmp, templ; 1094b8e80941Smrg struct pipe_blit_info blit; 1095b8e80941Smrg 1096b8e80941Smrg /* Check basic requirements for hw resolve. */ 1097b8e80941Smrg if (!(info->src.resource->nr_samples > 1 && 1098b8e80941Smrg info->dst.resource->nr_samples <= 1 && 1099b8e80941Smrg !util_format_is_pure_integer(format) && 1100b8e80941Smrg !util_format_is_depth_or_stencil(format) && 1101b8e80941Smrg util_max_layer(info->src.resource, 0) == 0)) 1102b8e80941Smrg return false; 1103b8e80941Smrg 1104b8e80941Smrg /* Hardware MSAA resolve doesn't work if SPI format = NORM16_ABGR and 1105b8e80941Smrg * the format is R16G16. Use R16A16, which does work. 1106b8e80941Smrg */ 1107b8e80941Smrg if (format == PIPE_FORMAT_R16G16_UNORM) 1108b8e80941Smrg format = PIPE_FORMAT_R16A16_UNORM; 1109b8e80941Smrg if (format == PIPE_FORMAT_R16G16_SNORM) 1110b8e80941Smrg format = PIPE_FORMAT_R16A16_SNORM; 1111b8e80941Smrg 1112b8e80941Smrg /* Check the remaining requirements for hw resolve. */ 1113b8e80941Smrg if (util_max_layer(info->dst.resource, info->dst.level) == 0 && 1114848b8605Smrg !info->scissor_enable && 1115848b8605Smrg (info->mask & PIPE_MASK_RGBA) == PIPE_MASK_RGBA && 1116b8e80941Smrg util_is_format_compatible(util_format_description(info->src.format), 1117b8e80941Smrg util_format_description(info->dst.format)) && 1118848b8605Smrg dst_width == info->src.resource->width0 && 1119848b8605Smrg dst_height == info->src.resource->height0 && 1120848b8605Smrg info->dst.box.x == 0 && 1121848b8605Smrg info->dst.box.y == 0 && 1122848b8605Smrg info->dst.box.width == dst_width && 1123848b8605Smrg info->dst.box.height == dst_height && 1124848b8605Smrg info->dst.box.depth == 1 && 1125848b8605Smrg info->src.box.x == 0 && 1126848b8605Smrg info->src.box.y == 0 && 1127848b8605Smrg info->src.box.width == dst_width && 1128848b8605Smrg info->src.box.height == dst_height && 1129848b8605Smrg info->src.box.depth == 1 && 1130b8e80941Smrg !dst->surface.is_linear && 1131b8e80941Smrg (!dst->cmask_buffer || !dst->dirty_level_mask)) { /* dst cannot be fast-cleared */ 1132b8e80941Smrg /* Check the last constraint. */ 1133b8e80941Smrg if (src->surface.micro_tile_mode != dst->surface.micro_tile_mode) { 1134b8e80941Smrg /* The next fast clear will switch to this mode to 1135b8e80941Smrg * get direct hw resolve next time if the mode is 1136b8e80941Smrg * different now. 1137b8e80941Smrg */ 1138b8e80941Smrg src->last_msaa_resolve_target_micro_mode = 1139b8e80941Smrg dst->surface.micro_tile_mode; 1140b8e80941Smrg goto resolve_to_temp; 1141b8e80941Smrg } 1142b8e80941Smrg 1143b8e80941Smrg /* Resolving into a surface with DCC is unsupported. Since 1144b8e80941Smrg * it's being overwritten anyway, clear it to uncompressed. 1145b8e80941Smrg * This is still the fastest codepath even with this clear. 1146b8e80941Smrg */ 1147b8e80941Smrg if (vi_dcc_enabled(dst, info->dst.level)) { 1148b8e80941Smrg /* TODO: Implement per-level DCC clears for GFX9. */ 1149b8e80941Smrg if (sctx->chip_class >= GFX9 && 1150b8e80941Smrg info->dst.resource->last_level != 0) 1151b8e80941Smrg goto resolve_to_temp; 1152b8e80941Smrg 1153b8e80941Smrg /* This can happen with mipmapping. */ 1154b8e80941Smrg if (sctx->chip_class == VI && 1155b8e80941Smrg !dst->surface.u.legacy.level[info->dst.level].dcc_fast_clear_size) 1156b8e80941Smrg goto resolve_to_temp; 1157b8e80941Smrg 1158b8e80941Smrg vi_dcc_clear_level(sctx, dst, info->dst.level, 1159b8e80941Smrg 0xFFFFFFFF); 1160b8e80941Smrg dst->dirty_level_mask &= ~(1 << info->dst.level); 1161b8e80941Smrg } 1162b8e80941Smrg 1163b8e80941Smrg /* Resolve directly from src to dst. */ 1164b8e80941Smrg si_do_CB_resolve(sctx, info, info->dst.resource, 1165b8e80941Smrg info->dst.level, info->dst.box.z, format); 1166848b8605Smrg return true; 1167848b8605Smrg } 1168b8e80941Smrg 1169b8e80941Smrgresolve_to_temp: 1170b8e80941Smrg /* Shader-based resolve is VERY SLOW. Instead, resolve into 1171b8e80941Smrg * a temporary texture and blit. 1172b8e80941Smrg */ 1173b8e80941Smrg memset(&templ, 0, sizeof(templ)); 1174b8e80941Smrg templ.target = PIPE_TEXTURE_2D; 1175b8e80941Smrg templ.format = info->src.resource->format; 1176b8e80941Smrg templ.width0 = info->src.resource->width0; 1177b8e80941Smrg templ.height0 = info->src.resource->height0; 1178b8e80941Smrg templ.depth0 = 1; 1179b8e80941Smrg templ.array_size = 1; 1180b8e80941Smrg templ.usage = PIPE_USAGE_DEFAULT; 1181b8e80941Smrg templ.flags = SI_RESOURCE_FLAG_FORCE_MSAA_TILING | 1182b8e80941Smrg SI_RESOURCE_FLAG_DISABLE_DCC; 1183b8e80941Smrg 1184b8e80941Smrg /* The src and dst microtile modes must be the same. */ 1185b8e80941Smrg if (src->surface.micro_tile_mode == RADEON_MICRO_MODE_DISPLAY) 1186b8e80941Smrg templ.bind = PIPE_BIND_SCANOUT; 1187b8e80941Smrg else 1188b8e80941Smrg templ.bind = 0; 1189b8e80941Smrg 1190b8e80941Smrg tmp = ctx->screen->resource_create(ctx->screen, &templ); 1191b8e80941Smrg if (!tmp) 1192b8e80941Smrg return false; 1193b8e80941Smrg stmp = (struct si_texture*)tmp; 1194b8e80941Smrg 1195b8e80941Smrg assert(!stmp->surface.is_linear); 1196b8e80941Smrg assert(src->surface.micro_tile_mode == stmp->surface.micro_tile_mode); 1197b8e80941Smrg 1198b8e80941Smrg /* resolve */ 1199b8e80941Smrg si_do_CB_resolve(sctx, info, tmp, 0, 0, format); 1200b8e80941Smrg 1201b8e80941Smrg /* blit */ 1202b8e80941Smrg blit = *info; 1203b8e80941Smrg blit.src.resource = tmp; 1204b8e80941Smrg blit.src.box.z = 0; 1205b8e80941Smrg 1206b8e80941Smrg si_blitter_begin(sctx, SI_BLIT | 1207b8e80941Smrg (info->render_condition_enable ? 0 : SI_DISABLE_RENDER_COND)); 1208b8e80941Smrg util_blitter_blit(sctx->blitter, &blit); 1209b8e80941Smrg si_blitter_end(sctx); 1210b8e80941Smrg 1211b8e80941Smrg pipe_resource_reference(&tmp, NULL); 1212b8e80941Smrg return true; 1213848b8605Smrg} 1214848b8605Smrg 1215848b8605Smrgstatic void si_blit(struct pipe_context *ctx, 1216848b8605Smrg const struct pipe_blit_info *info) 1217848b8605Smrg{ 1218848b8605Smrg struct si_context *sctx = (struct si_context*)ctx; 1219b8e80941Smrg struct si_texture *dst = (struct si_texture *)info->dst.resource; 1220848b8605Smrg 1221848b8605Smrg if (do_hardware_msaa_resolve(ctx, info)) { 1222848b8605Smrg return; 1223848b8605Smrg } 1224848b8605Smrg 1225b8e80941Smrg /* Using SDMA for copying to a linear texture in GTT is much faster. 1226b8e80941Smrg * This improves DRI PRIME performance. 1227b8e80941Smrg * 1228b8e80941Smrg * resource_copy_region can't do this yet, because dma_copy calls it 1229b8e80941Smrg * on failure (recursion). 1230b8e80941Smrg */ 1231b8e80941Smrg if (dst->surface.is_linear && 1232b8e80941Smrg sctx->dma_copy && 1233b8e80941Smrg util_can_blit_via_copy_region(info, false)) { 1234b8e80941Smrg sctx->dma_copy(ctx, info->dst.resource, info->dst.level, 1235b8e80941Smrg info->dst.box.x, info->dst.box.y, 1236b8e80941Smrg info->dst.box.z, 1237b8e80941Smrg info->src.resource, info->src.level, 1238b8e80941Smrg &info->src.box); 1239b8e80941Smrg return; 1240b8e80941Smrg } 1241b8e80941Smrg 1242848b8605Smrg assert(util_blitter_is_blit_supported(sctx->blitter, info)); 1243848b8605Smrg 1244848b8605Smrg /* The driver doesn't decompress resources automatically while 1245848b8605Smrg * u_blitter is rendering. */ 1246b8e80941Smrg vi_disable_dcc_if_incompatible_format(sctx, info->src.resource, 1247b8e80941Smrg info->src.level, 1248b8e80941Smrg info->src.format); 1249b8e80941Smrg vi_disable_dcc_if_incompatible_format(sctx, info->dst.resource, 1250b8e80941Smrg info->dst.level, 1251b8e80941Smrg info->dst.format); 1252b8e80941Smrg si_decompress_subresource(ctx, info->src.resource, info->mask, 1253b8e80941Smrg info->src.level, 1254848b8605Smrg info->src.box.z, 1255848b8605Smrg info->src.box.z + info->src.box.depth - 1); 1256848b8605Smrg 1257b8e80941Smrg if (sctx->screen->debug_flags & DBG(FORCE_DMA) && 1258b8e80941Smrg util_try_blit_via_copy_region(ctx, info)) 1259b8e80941Smrg return; 1260b8e80941Smrg 1261b8e80941Smrg si_blitter_begin(sctx, SI_BLIT | 1262848b8605Smrg (info->render_condition_enable ? 0 : SI_DISABLE_RENDER_COND)); 1263848b8605Smrg util_blitter_blit(sctx->blitter, info); 1264b8e80941Smrg si_blitter_end(sctx); 1265b8e80941Smrg} 1266b8e80941Smrg 1267b8e80941Smrgstatic boolean si_generate_mipmap(struct pipe_context *ctx, 1268b8e80941Smrg struct pipe_resource *tex, 1269b8e80941Smrg enum pipe_format format, 1270b8e80941Smrg unsigned base_level, unsigned last_level, 1271b8e80941Smrg unsigned first_layer, unsigned last_layer) 1272b8e80941Smrg{ 1273b8e80941Smrg struct si_context *sctx = (struct si_context*)ctx; 1274b8e80941Smrg struct si_texture *stex = (struct si_texture *)tex; 1275b8e80941Smrg 1276b8e80941Smrg if (!util_blitter_is_copy_supported(sctx->blitter, tex, tex)) 1277b8e80941Smrg return false; 1278b8e80941Smrg 1279b8e80941Smrg /* The driver doesn't decompress resources automatically while 1280b8e80941Smrg * u_blitter is rendering. */ 1281b8e80941Smrg vi_disable_dcc_if_incompatible_format(sctx, tex, base_level, 1282b8e80941Smrg format); 1283b8e80941Smrg si_decompress_subresource(ctx, tex, PIPE_MASK_RGBAZS, 1284b8e80941Smrg base_level, first_layer, last_layer); 1285b8e80941Smrg 1286b8e80941Smrg /* Clear dirty_level_mask for the levels that will be overwritten. */ 1287b8e80941Smrg assert(base_level < last_level); 1288b8e80941Smrg stex->dirty_level_mask &= ~u_bit_consecutive(base_level + 1, 1289b8e80941Smrg last_level - base_level); 1290b8e80941Smrg 1291b8e80941Smrg sctx->generate_mipmap_for_depth = stex->is_depth; 1292b8e80941Smrg 1293b8e80941Smrg si_blitter_begin(sctx, SI_BLIT | SI_DISABLE_RENDER_COND); 1294b8e80941Smrg util_blitter_generate_mipmap(sctx->blitter, tex, format, 1295b8e80941Smrg base_level, last_level, 1296b8e80941Smrg first_layer, last_layer); 1297b8e80941Smrg si_blitter_end(sctx); 1298b8e80941Smrg 1299b8e80941Smrg sctx->generate_mipmap_for_depth = false; 1300b8e80941Smrg return true; 1301848b8605Smrg} 1302848b8605Smrg 1303848b8605Smrgstatic void si_flush_resource(struct pipe_context *ctx, 1304848b8605Smrg struct pipe_resource *res) 1305848b8605Smrg{ 1306b8e80941Smrg struct si_context *sctx = (struct si_context*)ctx; 1307b8e80941Smrg struct si_texture *tex = (struct si_texture*)res; 1308848b8605Smrg 1309848b8605Smrg assert(res->target != PIPE_BUFFER); 1310b8e80941Smrg assert(!tex->dcc_separate_buffer || tex->dcc_gather_statistics); 1311848b8605Smrg 1312b8e80941Smrg /* st/dri calls flush twice per frame (not a bug), this prevents double 1313b8e80941Smrg * decompression. */ 1314b8e80941Smrg if (tex->dcc_separate_buffer && !tex->separate_dcc_dirty) 1315b8e80941Smrg return; 1316b8e80941Smrg 1317b8e80941Smrg if (!tex->is_depth && (tex->cmask_buffer || tex->dcc_offset)) { 1318b8e80941Smrg si_blit_decompress_color(sctx, tex, 0, res->last_level, 1319b8e80941Smrg 0, util_max_layer(res, 0), 1320b8e80941Smrg tex->dcc_separate_buffer != NULL); 1321b8e80941Smrg 1322b8e80941Smrg if (tex->display_dcc_offset) 1323b8e80941Smrg si_retile_dcc(sctx, tex); 1324b8e80941Smrg } 1325b8e80941Smrg 1326b8e80941Smrg /* Always do the analysis even if DCC is disabled at the moment. */ 1327b8e80941Smrg if (tex->dcc_gather_statistics) { 1328b8e80941Smrg bool separate_dcc_dirty = tex->separate_dcc_dirty; 1329b8e80941Smrg 1330b8e80941Smrg /* If the color buffer hasn't been unbound and fast clear hasn't 1331b8e80941Smrg * been used, separate_dcc_dirty is false, but there may have been 1332b8e80941Smrg * new rendering. Check if the color buffer is bound and assume 1333b8e80941Smrg * it's dirty. 1334b8e80941Smrg * 1335b8e80941Smrg * Note that DRI2 never unbinds window colorbuffers, which means 1336b8e80941Smrg * the DCC pipeline statistics query would never be re-set and would 1337b8e80941Smrg * keep adding new results until all free memory is exhausted if we 1338b8e80941Smrg * didn't do this. 1339b8e80941Smrg */ 1340b8e80941Smrg if (!separate_dcc_dirty) { 1341b8e80941Smrg for (unsigned i = 0; i < sctx->framebuffer.state.nr_cbufs; i++) { 1342b8e80941Smrg if (sctx->framebuffer.state.cbufs[i] && 1343b8e80941Smrg sctx->framebuffer.state.cbufs[i]->texture == res) { 1344b8e80941Smrg separate_dcc_dirty = true; 1345b8e80941Smrg break; 1346b8e80941Smrg } 1347b8e80941Smrg } 1348b8e80941Smrg } 1349b8e80941Smrg 1350b8e80941Smrg if (separate_dcc_dirty) { 1351b8e80941Smrg tex->separate_dcc_dirty = false; 1352b8e80941Smrg vi_separate_dcc_process_and_reset_stats(ctx, tex); 1353b8e80941Smrg } 1354848b8605Smrg } 1355848b8605Smrg} 1356848b8605Smrg 1357b8e80941Smrgvoid si_decompress_dcc(struct si_context *sctx, struct si_texture *tex) 1358b8e80941Smrg{ 1359b8e80941Smrg /* If graphics is disabled, we can't decompress DCC, but it shouldn't 1360b8e80941Smrg * be compressed either. The caller should simply discard it. 1361b8e80941Smrg */ 1362b8e80941Smrg if (!tex->dcc_offset || !sctx->has_graphics) 1363b8e80941Smrg return; 1364b8e80941Smrg 1365b8e80941Smrg si_blit_decompress_color(sctx, tex, 0, tex->buffer.b.b.last_level, 1366b8e80941Smrg 0, util_max_layer(&tex->buffer.b.b, 0), 1367b8e80941Smrg true); 1368b8e80941Smrg} 1369b8e80941Smrg 1370848b8605Smrgvoid si_init_blit_functions(struct si_context *sctx) 1371848b8605Smrg{ 1372b8e80941Smrg sctx->b.resource_copy_region = si_resource_copy_region; 1373b8e80941Smrg 1374b8e80941Smrg if (sctx->has_graphics) { 1375b8e80941Smrg sctx->b.blit = si_blit; 1376b8e80941Smrg sctx->b.flush_resource = si_flush_resource; 1377b8e80941Smrg sctx->b.generate_mipmap = si_generate_mipmap; 1378b8e80941Smrg } 1379848b8605Smrg} 1380