1/* 2 * Copyright 2012 Advanced Micro Devices, Inc. 3 * All Rights Reserved. 4 * 5 * Permission is hereby granted, free of charge, to any person obtaining a 6 * copy of this software and associated documentation files (the "Software"), 7 * to deal in the Software without restriction, including without limitation 8 * on the rights to use, copy, modify, merge, publish, distribute, sub 9 * license, and/or sell copies of the Software, and to permit persons to whom 10 * the Software is furnished to do so, subject to the following conditions: 11 * 12 * The above copyright notice and this permission notice (including the next 13 * paragraph) shall be included in all copies or substantial portions of the 14 * Software. 15 * 16 * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR 17 * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, 18 * FITNESS FOR A PARTICULAR PURPOSE AND NON-INFRINGEMENT. IN NO EVENT SHALL 19 * THE AUTHOR(S) AND/OR THEIR SUPPLIERS BE LIABLE FOR ANY CLAIM, 20 * DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR 21 * OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE 22 * USE OR OTHER DEALINGS IN THE SOFTWARE. 23 */ 24 25#include "si_build_pm4.h" 26#include "si_query.h" 27#include "si_shader_internal.h" 28#include "sid.h" 29#include "util/fast_idiv_by_const.h" 30#include "util/format/u_format.h" 31#include "util/format/u_format_s3tc.h" 32#include "util/u_dual_blend.h" 33#include "util/u_helpers.h" 34#include "util/u_memory.h" 35#include "util/u_resource.h" 36#include "util/u_upload_mgr.h" 37#include "util/u_blend.h" 38 39#include "gfx10_format_table.h" 40 41static unsigned si_map_swizzle(unsigned swizzle) 42{ 43 switch (swizzle) { 44 case PIPE_SWIZZLE_Y: 45 return V_008F0C_SQ_SEL_Y; 46 case PIPE_SWIZZLE_Z: 47 return V_008F0C_SQ_SEL_Z; 48 case PIPE_SWIZZLE_W: 49 return V_008F0C_SQ_SEL_W; 50 case PIPE_SWIZZLE_0: 51 return V_008F0C_SQ_SEL_0; 52 case PIPE_SWIZZLE_1: 53 return V_008F0C_SQ_SEL_1; 54 default: /* PIPE_SWIZZLE_X */ 55 return V_008F0C_SQ_SEL_X; 56 } 57} 58 59/* 12.4 fixed-point */ 60static unsigned si_pack_float_12p4(float x) 61{ 62 return x <= 0 ? 0 : x >= 4096 ? 0xffff : x * 16; 63} 64 65/* 66 * Inferred framebuffer and blender state. 67 * 68 * CB_TARGET_MASK is emitted here to avoid a hang with dual source blending 69 * if there is not enough PS outputs. 70 */ 71static void si_emit_cb_render_state(struct si_context *sctx) 72{ 73 struct radeon_cmdbuf *cs = &sctx->gfx_cs; 74 struct si_state_blend *blend = sctx->queued.named.blend; 75 /* CB_COLORn_INFO.FORMAT=INVALID should disable unbound colorbuffers, 76 * but you never know. */ 77 uint32_t cb_target_mask = sctx->framebuffer.colorbuf_enabled_4bit & blend->cb_target_mask; 78 unsigned i; 79 80 /* Avoid a hang that happens when dual source blending is enabled 81 * but there is not enough color outputs. This is undefined behavior, 82 * so disable color writes completely. 83 * 84 * Reproducible with Unigine Heaven 4.0 and drirc missing. 85 */ 86 if (blend->dual_src_blend && sctx->shader.ps.cso && 87 (sctx->shader.ps.cso->info.colors_written & 0x3) != 0x3) 88 cb_target_mask = 0; 89 90 /* GFX9: Flush DFSM when CB_TARGET_MASK changes. 91 * I think we don't have to do anything between IBs. 92 */ 93 if (sctx->screen->dpbb_allowed && sctx->last_cb_target_mask != cb_target_mask) { 94 sctx->last_cb_target_mask = cb_target_mask; 95 96 radeon_begin(cs); 97 radeon_emit(PKT3(PKT3_EVENT_WRITE, 0, 0)); 98 radeon_emit(EVENT_TYPE(V_028A90_BREAK_BATCH) | EVENT_INDEX(0)); 99 radeon_end(); 100 } 101 102 radeon_begin(cs); 103 radeon_opt_set_context_reg(sctx, R_028238_CB_TARGET_MASK, SI_TRACKED_CB_TARGET_MASK, 104 cb_target_mask); 105 106 if (sctx->chip_class >= GFX8) { 107 /* DCC MSAA workaround. 108 * Alternatively, we can set CB_COLORi_DCC_CONTROL.OVERWRITE_- 109 * COMBINER_DISABLE, but that would be more complicated. 110 */ 111 bool oc_disable = 112 blend->dcc_msaa_corruption_4bit & cb_target_mask && sctx->framebuffer.nr_samples >= 2; 113 unsigned watermark = sctx->framebuffer.dcc_overwrite_combiner_watermark; 114 115 radeon_opt_set_context_reg( 116 sctx, R_028424_CB_DCC_CONTROL, SI_TRACKED_CB_DCC_CONTROL, 117 S_028424_OVERWRITE_COMBINER_MRT_SHARING_DISABLE(sctx->chip_class <= GFX9) | 118 S_028424_OVERWRITE_COMBINER_WATERMARK(watermark) | 119 S_028424_OVERWRITE_COMBINER_DISABLE(oc_disable) | 120 S_028424_DISABLE_CONSTANT_ENCODE_REG(sctx->screen->info.has_dcc_constant_encode)); 121 } 122 123 /* RB+ register settings. */ 124 if (sctx->screen->info.rbplus_allowed) { 125 unsigned spi_shader_col_format = 126 sctx->shader.ps.cso ? sctx->shader.ps.current->key.part.ps.epilog.spi_shader_col_format 127 : 0; 128 unsigned sx_ps_downconvert = 0; 129 unsigned sx_blend_opt_epsilon = 0; 130 unsigned sx_blend_opt_control = 0; 131 132 for (i = 0; i < sctx->framebuffer.state.nr_cbufs; i++) { 133 struct si_surface *surf = (struct si_surface *)sctx->framebuffer.state.cbufs[i]; 134 unsigned format, swap, spi_format, colormask; 135 bool has_alpha, has_rgb; 136 137 if (!surf) { 138 /* If the color buffer is not set, the driver sets 32_R 139 * as the SPI color format, because the hw doesn't allow 140 * holes between color outputs, so also set this to 141 * enable RB+. 142 */ 143 sx_ps_downconvert |= V_028754_SX_RT_EXPORT_32_R << (i * 4); 144 continue; 145 } 146 147 format = G_028C70_FORMAT(surf->cb_color_info); 148 swap = G_028C70_COMP_SWAP(surf->cb_color_info); 149 spi_format = (spi_shader_col_format >> (i * 4)) & 0xf; 150 colormask = (cb_target_mask >> (i * 4)) & 0xf; 151 152 /* Set if RGB and A are present. */ 153 has_alpha = !G_028C74_FORCE_DST_ALPHA_1(surf->cb_color_attrib); 154 155 if (format == V_028C70_COLOR_8 || format == V_028C70_COLOR_16 || 156 format == V_028C70_COLOR_32) 157 has_rgb = !has_alpha; 158 else 159 has_rgb = true; 160 161 /* Check the colormask and export format. */ 162 if (!(colormask & (PIPE_MASK_RGBA & ~PIPE_MASK_A))) 163 has_rgb = false; 164 if (!(colormask & PIPE_MASK_A)) 165 has_alpha = false; 166 167 if (spi_format == V_028714_SPI_SHADER_ZERO) { 168 has_rgb = false; 169 has_alpha = false; 170 } 171 172 /* Disable value checking for disabled channels. */ 173 if (!has_rgb) 174 sx_blend_opt_control |= S_02875C_MRT0_COLOR_OPT_DISABLE(1) << (i * 4); 175 if (!has_alpha) 176 sx_blend_opt_control |= S_02875C_MRT0_ALPHA_OPT_DISABLE(1) << (i * 4); 177 178 /* Enable down-conversion for 32bpp and smaller formats. */ 179 switch (format) { 180 case V_028C70_COLOR_8: 181 case V_028C70_COLOR_8_8: 182 case V_028C70_COLOR_8_8_8_8: 183 /* For 1 and 2-channel formats, use the superset thereof. */ 184 if (spi_format == V_028714_SPI_SHADER_FP16_ABGR || 185 spi_format == V_028714_SPI_SHADER_UINT16_ABGR || 186 spi_format == V_028714_SPI_SHADER_SINT16_ABGR) { 187 sx_ps_downconvert |= V_028754_SX_RT_EXPORT_8_8_8_8 << (i * 4); 188 sx_blend_opt_epsilon |= V_028758_8BIT_FORMAT << (i * 4); 189 } 190 break; 191 192 case V_028C70_COLOR_5_6_5: 193 if (spi_format == V_028714_SPI_SHADER_FP16_ABGR) { 194 sx_ps_downconvert |= V_028754_SX_RT_EXPORT_5_6_5 << (i * 4); 195 sx_blend_opt_epsilon |= V_028758_6BIT_FORMAT << (i * 4); 196 } 197 break; 198 199 case V_028C70_COLOR_1_5_5_5: 200 if (spi_format == V_028714_SPI_SHADER_FP16_ABGR) { 201 sx_ps_downconvert |= V_028754_SX_RT_EXPORT_1_5_5_5 << (i * 4); 202 sx_blend_opt_epsilon |= V_028758_5BIT_FORMAT << (i * 4); 203 } 204 break; 205 206 case V_028C70_COLOR_4_4_4_4: 207 if (spi_format == V_028714_SPI_SHADER_FP16_ABGR) { 208 sx_ps_downconvert |= V_028754_SX_RT_EXPORT_4_4_4_4 << (i * 4); 209 sx_blend_opt_epsilon |= V_028758_4BIT_FORMAT << (i * 4); 210 } 211 break; 212 213 case V_028C70_COLOR_32: 214 if (swap == V_028C70_SWAP_STD && spi_format == V_028714_SPI_SHADER_32_R) 215 sx_ps_downconvert |= V_028754_SX_RT_EXPORT_32_R << (i * 4); 216 else if (swap == V_028C70_SWAP_ALT_REV && spi_format == V_028714_SPI_SHADER_32_AR) 217 sx_ps_downconvert |= V_028754_SX_RT_EXPORT_32_A << (i * 4); 218 break; 219 220 case V_028C70_COLOR_16: 221 case V_028C70_COLOR_16_16: 222 /* For 1-channel formats, use the superset thereof. */ 223 if (spi_format == V_028714_SPI_SHADER_UNORM16_ABGR || 224 spi_format == V_028714_SPI_SHADER_SNORM16_ABGR || 225 spi_format == V_028714_SPI_SHADER_UINT16_ABGR || 226 spi_format == V_028714_SPI_SHADER_SINT16_ABGR) { 227 if (swap == V_028C70_SWAP_STD || swap == V_028C70_SWAP_STD_REV) 228 sx_ps_downconvert |= V_028754_SX_RT_EXPORT_16_16_GR << (i * 4); 229 else 230 sx_ps_downconvert |= V_028754_SX_RT_EXPORT_16_16_AR << (i * 4); 231 } 232 break; 233 234 case V_028C70_COLOR_10_11_11: 235 if (spi_format == V_028714_SPI_SHADER_FP16_ABGR) 236 sx_ps_downconvert |= V_028754_SX_RT_EXPORT_10_11_11 << (i * 4); 237 break; 238 239 case V_028C70_COLOR_2_10_10_10: 240 if (spi_format == V_028714_SPI_SHADER_FP16_ABGR) { 241 sx_ps_downconvert |= V_028754_SX_RT_EXPORT_2_10_10_10 << (i * 4); 242 sx_blend_opt_epsilon |= V_028758_10BIT_FORMAT << (i * 4); 243 } 244 break; 245 246 case V_028C70_COLOR_5_9_9_9: 247 if (spi_format == V_028714_SPI_SHADER_FP16_ABGR) 248 sx_ps_downconvert |= V_028754_SX_RT_EXPORT_9_9_9_E5 << (i * 4); 249 break; 250 } 251 } 252 253 /* If there are no color outputs, the first color export is 254 * always enabled as 32_R, so also set this to enable RB+. 255 */ 256 if (!sx_ps_downconvert) 257 sx_ps_downconvert = V_028754_SX_RT_EXPORT_32_R; 258 259 /* SX_PS_DOWNCONVERT, SX_BLEND_OPT_EPSILON, SX_BLEND_OPT_CONTROL */ 260 radeon_opt_set_context_reg3(sctx, R_028754_SX_PS_DOWNCONVERT, SI_TRACKED_SX_PS_DOWNCONVERT, 261 sx_ps_downconvert, sx_blend_opt_epsilon, sx_blend_opt_control); 262 } 263 radeon_end_update_context_roll(sctx); 264} 265 266/* 267 * Blender functions 268 */ 269 270static uint32_t si_translate_blend_function(int blend_func) 271{ 272 switch (blend_func) { 273 case PIPE_BLEND_ADD: 274 return V_028780_COMB_DST_PLUS_SRC; 275 case PIPE_BLEND_SUBTRACT: 276 return V_028780_COMB_SRC_MINUS_DST; 277 case PIPE_BLEND_REVERSE_SUBTRACT: 278 return V_028780_COMB_DST_MINUS_SRC; 279 case PIPE_BLEND_MIN: 280 return V_028780_COMB_MIN_DST_SRC; 281 case PIPE_BLEND_MAX: 282 return V_028780_COMB_MAX_DST_SRC; 283 default: 284 PRINT_ERR("Unknown blend function %d\n", blend_func); 285 assert(0); 286 break; 287 } 288 return 0; 289} 290 291static uint32_t si_translate_blend_factor(int blend_fact) 292{ 293 switch (blend_fact) { 294 case PIPE_BLENDFACTOR_ONE: 295 return V_028780_BLEND_ONE; 296 case PIPE_BLENDFACTOR_SRC_COLOR: 297 return V_028780_BLEND_SRC_COLOR; 298 case PIPE_BLENDFACTOR_SRC_ALPHA: 299 return V_028780_BLEND_SRC_ALPHA; 300 case PIPE_BLENDFACTOR_DST_ALPHA: 301 return V_028780_BLEND_DST_ALPHA; 302 case PIPE_BLENDFACTOR_DST_COLOR: 303 return V_028780_BLEND_DST_COLOR; 304 case PIPE_BLENDFACTOR_SRC_ALPHA_SATURATE: 305 return V_028780_BLEND_SRC_ALPHA_SATURATE; 306 case PIPE_BLENDFACTOR_CONST_COLOR: 307 return V_028780_BLEND_CONSTANT_COLOR; 308 case PIPE_BLENDFACTOR_CONST_ALPHA: 309 return V_028780_BLEND_CONSTANT_ALPHA; 310 case PIPE_BLENDFACTOR_ZERO: 311 return V_028780_BLEND_ZERO; 312 case PIPE_BLENDFACTOR_INV_SRC_COLOR: 313 return V_028780_BLEND_ONE_MINUS_SRC_COLOR; 314 case PIPE_BLENDFACTOR_INV_SRC_ALPHA: 315 return V_028780_BLEND_ONE_MINUS_SRC_ALPHA; 316 case PIPE_BLENDFACTOR_INV_DST_ALPHA: 317 return V_028780_BLEND_ONE_MINUS_DST_ALPHA; 318 case PIPE_BLENDFACTOR_INV_DST_COLOR: 319 return V_028780_BLEND_ONE_MINUS_DST_COLOR; 320 case PIPE_BLENDFACTOR_INV_CONST_COLOR: 321 return V_028780_BLEND_ONE_MINUS_CONSTANT_COLOR; 322 case PIPE_BLENDFACTOR_INV_CONST_ALPHA: 323 return V_028780_BLEND_ONE_MINUS_CONSTANT_ALPHA; 324 case PIPE_BLENDFACTOR_SRC1_COLOR: 325 return V_028780_BLEND_SRC1_COLOR; 326 case PIPE_BLENDFACTOR_SRC1_ALPHA: 327 return V_028780_BLEND_SRC1_ALPHA; 328 case PIPE_BLENDFACTOR_INV_SRC1_COLOR: 329 return V_028780_BLEND_INV_SRC1_COLOR; 330 case PIPE_BLENDFACTOR_INV_SRC1_ALPHA: 331 return V_028780_BLEND_INV_SRC1_ALPHA; 332 default: 333 PRINT_ERR("Bad blend factor %d not supported!\n", blend_fact); 334 assert(0); 335 break; 336 } 337 return 0; 338} 339 340static uint32_t si_translate_blend_opt_function(int blend_func) 341{ 342 switch (blend_func) { 343 case PIPE_BLEND_ADD: 344 return V_028760_OPT_COMB_ADD; 345 case PIPE_BLEND_SUBTRACT: 346 return V_028760_OPT_COMB_SUBTRACT; 347 case PIPE_BLEND_REVERSE_SUBTRACT: 348 return V_028760_OPT_COMB_REVSUBTRACT; 349 case PIPE_BLEND_MIN: 350 return V_028760_OPT_COMB_MIN; 351 case PIPE_BLEND_MAX: 352 return V_028760_OPT_COMB_MAX; 353 default: 354 return V_028760_OPT_COMB_BLEND_DISABLED; 355 } 356} 357 358static uint32_t si_translate_blend_opt_factor(int blend_fact, bool is_alpha) 359{ 360 switch (blend_fact) { 361 case PIPE_BLENDFACTOR_ZERO: 362 return V_028760_BLEND_OPT_PRESERVE_NONE_IGNORE_ALL; 363 case PIPE_BLENDFACTOR_ONE: 364 return V_028760_BLEND_OPT_PRESERVE_ALL_IGNORE_NONE; 365 case PIPE_BLENDFACTOR_SRC_COLOR: 366 return is_alpha ? V_028760_BLEND_OPT_PRESERVE_A1_IGNORE_A0 367 : V_028760_BLEND_OPT_PRESERVE_C1_IGNORE_C0; 368 case PIPE_BLENDFACTOR_INV_SRC_COLOR: 369 return is_alpha ? V_028760_BLEND_OPT_PRESERVE_A0_IGNORE_A1 370 : V_028760_BLEND_OPT_PRESERVE_C0_IGNORE_C1; 371 case PIPE_BLENDFACTOR_SRC_ALPHA: 372 return V_028760_BLEND_OPT_PRESERVE_A1_IGNORE_A0; 373 case PIPE_BLENDFACTOR_INV_SRC_ALPHA: 374 return V_028760_BLEND_OPT_PRESERVE_A0_IGNORE_A1; 375 case PIPE_BLENDFACTOR_SRC_ALPHA_SATURATE: 376 return is_alpha ? V_028760_BLEND_OPT_PRESERVE_ALL_IGNORE_NONE 377 : V_028760_BLEND_OPT_PRESERVE_NONE_IGNORE_A0; 378 default: 379 return V_028760_BLEND_OPT_PRESERVE_NONE_IGNORE_NONE; 380 } 381} 382 383static void si_blend_check_commutativity(struct si_screen *sscreen, struct si_state_blend *blend, 384 enum pipe_blend_func func, enum pipe_blendfactor src, 385 enum pipe_blendfactor dst, unsigned chanmask) 386{ 387 /* Src factor is allowed when it does not depend on Dst */ 388 static const uint32_t src_allowed = 389 (1u << PIPE_BLENDFACTOR_ONE) | (1u << PIPE_BLENDFACTOR_SRC_COLOR) | 390 (1u << PIPE_BLENDFACTOR_SRC_ALPHA) | (1u << PIPE_BLENDFACTOR_SRC_ALPHA_SATURATE) | 391 (1u << PIPE_BLENDFACTOR_CONST_COLOR) | (1u << PIPE_BLENDFACTOR_CONST_ALPHA) | 392 (1u << PIPE_BLENDFACTOR_SRC1_COLOR) | (1u << PIPE_BLENDFACTOR_SRC1_ALPHA) | 393 (1u << PIPE_BLENDFACTOR_ZERO) | (1u << PIPE_BLENDFACTOR_INV_SRC_COLOR) | 394 (1u << PIPE_BLENDFACTOR_INV_SRC_ALPHA) | (1u << PIPE_BLENDFACTOR_INV_CONST_COLOR) | 395 (1u << PIPE_BLENDFACTOR_INV_CONST_ALPHA) | (1u << PIPE_BLENDFACTOR_INV_SRC1_COLOR) | 396 (1u << PIPE_BLENDFACTOR_INV_SRC1_ALPHA); 397 398 if (dst == PIPE_BLENDFACTOR_ONE && (src_allowed & (1u << src))) { 399 /* Addition is commutative, but floating point addition isn't 400 * associative: subtle changes can be introduced via different 401 * rounding. 402 * 403 * Out-of-order is also non-deterministic, which means that 404 * this breaks OpenGL invariance requirements. So only enable 405 * out-of-order additive blending if explicitly allowed by a 406 * setting. 407 */ 408 if (func == PIPE_BLEND_MAX || func == PIPE_BLEND_MIN || 409 (func == PIPE_BLEND_ADD && sscreen->commutative_blend_add)) 410 blend->commutative_4bit |= chanmask; 411 } 412} 413 414/** 415 * Get rid of DST in the blend factors by commuting the operands: 416 * func(src * DST, dst * 0) ---> func(src * 0, dst * SRC) 417 */ 418static void si_blend_remove_dst(unsigned *func, unsigned *src_factor, unsigned *dst_factor, 419 unsigned expected_dst, unsigned replacement_src) 420{ 421 if (*src_factor == expected_dst && *dst_factor == PIPE_BLENDFACTOR_ZERO) { 422 *src_factor = PIPE_BLENDFACTOR_ZERO; 423 *dst_factor = replacement_src; 424 425 /* Commuting the operands requires reversing subtractions. */ 426 if (*func == PIPE_BLEND_SUBTRACT) 427 *func = PIPE_BLEND_REVERSE_SUBTRACT; 428 else if (*func == PIPE_BLEND_REVERSE_SUBTRACT) 429 *func = PIPE_BLEND_SUBTRACT; 430 } 431} 432 433static void *si_create_blend_state_mode(struct pipe_context *ctx, 434 const struct pipe_blend_state *state, unsigned mode) 435{ 436 struct si_context *sctx = (struct si_context *)ctx; 437 struct si_state_blend *blend = CALLOC_STRUCT(si_state_blend); 438 struct si_pm4_state *pm4 = &blend->pm4; 439 uint32_t sx_mrt_blend_opt[8] = {0}; 440 uint32_t color_control = 0; 441 bool logicop_enable = state->logicop_enable && state->logicop_func != PIPE_LOGICOP_COPY; 442 443 if (!blend) 444 return NULL; 445 446 blend->alpha_to_coverage = state->alpha_to_coverage; 447 blend->alpha_to_one = state->alpha_to_one; 448 blend->dual_src_blend = util_blend_state_is_dual(state, 0); 449 blend->logicop_enable = logicop_enable; 450 blend->allows_noop_optimization = 451 state->rt[0].rgb_func == PIPE_BLEND_ADD && 452 state->rt[0].alpha_func == PIPE_BLEND_ADD && 453 state->rt[0].rgb_src_factor == PIPE_BLENDFACTOR_DST_COLOR && 454 state->rt[0].alpha_src_factor == PIPE_BLENDFACTOR_DST_COLOR && 455 state->rt[0].rgb_dst_factor == PIPE_BLENDFACTOR_ZERO && 456 state->rt[0].alpha_dst_factor == PIPE_BLENDFACTOR_ZERO && 457 mode == V_028808_CB_NORMAL; 458 459 unsigned num_shader_outputs = state->max_rt + 1; /* estimate */ 460 if (blend->dual_src_blend) 461 num_shader_outputs = MAX2(num_shader_outputs, 2); 462 463 if (logicop_enable) { 464 color_control |= S_028808_ROP3(state->logicop_func | (state->logicop_func << 4)); 465 } else { 466 color_control |= S_028808_ROP3(0xcc); 467 } 468 469 if (state->alpha_to_coverage && state->alpha_to_coverage_dither) { 470 si_pm4_set_reg(pm4, R_028B70_DB_ALPHA_TO_MASK, 471 S_028B70_ALPHA_TO_MASK_ENABLE(state->alpha_to_coverage) | 472 S_028B70_ALPHA_TO_MASK_OFFSET0(3) | S_028B70_ALPHA_TO_MASK_OFFSET1(1) | 473 S_028B70_ALPHA_TO_MASK_OFFSET2(0) | S_028B70_ALPHA_TO_MASK_OFFSET3(2) | 474 S_028B70_OFFSET_ROUND(1)); 475 } else { 476 si_pm4_set_reg(pm4, R_028B70_DB_ALPHA_TO_MASK, 477 S_028B70_ALPHA_TO_MASK_ENABLE(state->alpha_to_coverage) | 478 S_028B70_ALPHA_TO_MASK_OFFSET0(2) | S_028B70_ALPHA_TO_MASK_OFFSET1(2) | 479 S_028B70_ALPHA_TO_MASK_OFFSET2(2) | S_028B70_ALPHA_TO_MASK_OFFSET3(2) | 480 S_028B70_OFFSET_ROUND(0)); 481 } 482 483 if (state->alpha_to_coverage) 484 blend->need_src_alpha_4bit |= 0xf; 485 486 blend->cb_target_mask = 0; 487 blend->cb_target_enabled_4bit = 0; 488 489 for (int i = 0; i < num_shader_outputs; i++) { 490 /* state->rt entries > 0 only written if independent blending */ 491 const int j = state->independent_blend_enable ? i : 0; 492 493 unsigned eqRGB = state->rt[j].rgb_func; 494 unsigned srcRGB = state->rt[j].rgb_src_factor; 495 unsigned dstRGB = state->rt[j].rgb_dst_factor; 496 unsigned eqA = state->rt[j].alpha_func; 497 unsigned srcA = state->rt[j].alpha_src_factor; 498 unsigned dstA = state->rt[j].alpha_dst_factor; 499 500 unsigned srcRGB_opt, dstRGB_opt, srcA_opt, dstA_opt; 501 unsigned blend_cntl = 0; 502 503 sx_mrt_blend_opt[i] = S_028760_COLOR_COMB_FCN(V_028760_OPT_COMB_BLEND_DISABLED) | 504 S_028760_ALPHA_COMB_FCN(V_028760_OPT_COMB_BLEND_DISABLED); 505 506 /* Only set dual source blending for MRT0 to avoid a hang. */ 507 if (i >= 1 && blend->dual_src_blend) { 508 /* Vulkan does this for dual source blending. */ 509 if (i == 1) 510 blend_cntl |= S_028780_ENABLE(1); 511 512 si_pm4_set_reg(pm4, R_028780_CB_BLEND0_CONTROL + i * 4, blend_cntl); 513 continue; 514 } 515 516 /* Only addition and subtraction equations are supported with 517 * dual source blending. 518 */ 519 if (blend->dual_src_blend && (eqRGB == PIPE_BLEND_MIN || eqRGB == PIPE_BLEND_MAX || 520 eqA == PIPE_BLEND_MIN || eqA == PIPE_BLEND_MAX)) { 521 assert(!"Unsupported equation for dual source blending"); 522 si_pm4_set_reg(pm4, R_028780_CB_BLEND0_CONTROL + i * 4, blend_cntl); 523 continue; 524 } 525 526 /* cb_render_state will disable unused ones */ 527 blend->cb_target_mask |= (unsigned)state->rt[j].colormask << (4 * i); 528 if (state->rt[j].colormask) 529 blend->cb_target_enabled_4bit |= 0xf << (4 * i); 530 531 if (!state->rt[j].colormask || !state->rt[j].blend_enable) { 532 si_pm4_set_reg(pm4, R_028780_CB_BLEND0_CONTROL + i * 4, blend_cntl); 533 continue; 534 } 535 536 si_blend_check_commutativity(sctx->screen, blend, eqRGB, srcRGB, dstRGB, 0x7 << (4 * i)); 537 si_blend_check_commutativity(sctx->screen, blend, eqA, srcA, dstA, 0x8 << (4 * i)); 538 539 /* Blending optimizations for RB+. 540 * These transformations don't change the behavior. 541 * 542 * First, get rid of DST in the blend factors: 543 * func(src * DST, dst * 0) ---> func(src * 0, dst * SRC) 544 */ 545 si_blend_remove_dst(&eqRGB, &srcRGB, &dstRGB, PIPE_BLENDFACTOR_DST_COLOR, 546 PIPE_BLENDFACTOR_SRC_COLOR); 547 si_blend_remove_dst(&eqA, &srcA, &dstA, PIPE_BLENDFACTOR_DST_COLOR, 548 PIPE_BLENDFACTOR_SRC_COLOR); 549 si_blend_remove_dst(&eqA, &srcA, &dstA, PIPE_BLENDFACTOR_DST_ALPHA, 550 PIPE_BLENDFACTOR_SRC_ALPHA); 551 552 /* Look up the ideal settings from tables. */ 553 srcRGB_opt = si_translate_blend_opt_factor(srcRGB, false); 554 dstRGB_opt = si_translate_blend_opt_factor(dstRGB, false); 555 srcA_opt = si_translate_blend_opt_factor(srcA, true); 556 dstA_opt = si_translate_blend_opt_factor(dstA, true); 557 558 /* Handle interdependencies. */ 559 if (util_blend_factor_uses_dest(srcRGB, false)) 560 dstRGB_opt = V_028760_BLEND_OPT_PRESERVE_NONE_IGNORE_NONE; 561 if (util_blend_factor_uses_dest(srcA, false)) 562 dstA_opt = V_028760_BLEND_OPT_PRESERVE_NONE_IGNORE_NONE; 563 564 if (srcRGB == PIPE_BLENDFACTOR_SRC_ALPHA_SATURATE && 565 (dstRGB == PIPE_BLENDFACTOR_ZERO || dstRGB == PIPE_BLENDFACTOR_SRC_ALPHA || 566 dstRGB == PIPE_BLENDFACTOR_SRC_ALPHA_SATURATE)) 567 dstRGB_opt = V_028760_BLEND_OPT_PRESERVE_NONE_IGNORE_A0; 568 569 /* Set the final value. */ 570 sx_mrt_blend_opt[i] = S_028760_COLOR_SRC_OPT(srcRGB_opt) | 571 S_028760_COLOR_DST_OPT(dstRGB_opt) | 572 S_028760_COLOR_COMB_FCN(si_translate_blend_opt_function(eqRGB)) | 573 S_028760_ALPHA_SRC_OPT(srcA_opt) | S_028760_ALPHA_DST_OPT(dstA_opt) | 574 S_028760_ALPHA_COMB_FCN(si_translate_blend_opt_function(eqA)); 575 576 /* Set blend state. */ 577 blend_cntl |= S_028780_ENABLE(1); 578 blend_cntl |= S_028780_COLOR_COMB_FCN(si_translate_blend_function(eqRGB)); 579 blend_cntl |= S_028780_COLOR_SRCBLEND(si_translate_blend_factor(srcRGB)); 580 blend_cntl |= S_028780_COLOR_DESTBLEND(si_translate_blend_factor(dstRGB)); 581 582 if (srcA != srcRGB || dstA != dstRGB || eqA != eqRGB) { 583 blend_cntl |= S_028780_SEPARATE_ALPHA_BLEND(1); 584 blend_cntl |= S_028780_ALPHA_COMB_FCN(si_translate_blend_function(eqA)); 585 blend_cntl |= S_028780_ALPHA_SRCBLEND(si_translate_blend_factor(srcA)); 586 blend_cntl |= S_028780_ALPHA_DESTBLEND(si_translate_blend_factor(dstA)); 587 } 588 si_pm4_set_reg(pm4, R_028780_CB_BLEND0_CONTROL + i * 4, blend_cntl); 589 590 blend->blend_enable_4bit |= 0xfu << (i * 4); 591 592 if (sctx->chip_class >= GFX8 && sctx->chip_class <= GFX10) 593 blend->dcc_msaa_corruption_4bit |= 0xfu << (i * 4); 594 595 /* This is only important for formats without alpha. */ 596 if (srcRGB == PIPE_BLENDFACTOR_SRC_ALPHA || dstRGB == PIPE_BLENDFACTOR_SRC_ALPHA || 597 srcRGB == PIPE_BLENDFACTOR_SRC_ALPHA_SATURATE || 598 dstRGB == PIPE_BLENDFACTOR_SRC_ALPHA_SATURATE || 599 srcRGB == PIPE_BLENDFACTOR_INV_SRC_ALPHA || dstRGB == PIPE_BLENDFACTOR_INV_SRC_ALPHA) 600 blend->need_src_alpha_4bit |= 0xfu << (i * 4); 601 } 602 603 if (sctx->chip_class >= GFX8 && sctx->chip_class <= GFX10 && logicop_enable) 604 blend->dcc_msaa_corruption_4bit |= blend->cb_target_enabled_4bit; 605 606 if (blend->cb_target_mask) { 607 color_control |= S_028808_MODE(mode); 608 } else { 609 color_control |= S_028808_MODE(V_028808_CB_DISABLE); 610 } 611 612 if (sctx->screen->info.rbplus_allowed) { 613 /* Disable RB+ blend optimizations for dual source blending. 614 * Vulkan does this. 615 */ 616 if (blend->dual_src_blend) { 617 for (int i = 0; i < num_shader_outputs; i++) { 618 sx_mrt_blend_opt[i] = S_028760_COLOR_COMB_FCN(V_028760_OPT_COMB_NONE) | 619 S_028760_ALPHA_COMB_FCN(V_028760_OPT_COMB_NONE); 620 } 621 } 622 623 for (int i = 0; i < num_shader_outputs; i++) 624 si_pm4_set_reg(pm4, R_028760_SX_MRT0_BLEND_OPT + i * 4, sx_mrt_blend_opt[i]); 625 626 /* RB+ doesn't work with dual source blending, logic op, and RESOLVE. */ 627 if (blend->dual_src_blend || logicop_enable || mode == V_028808_CB_RESOLVE) 628 color_control |= S_028808_DISABLE_DUAL_QUAD(1); 629 } 630 631 si_pm4_set_reg(pm4, R_028808_CB_COLOR_CONTROL, color_control); 632 return blend; 633} 634 635static void *si_create_blend_state(struct pipe_context *ctx, const struct pipe_blend_state *state) 636{ 637 return si_create_blend_state_mode(ctx, state, V_028808_CB_NORMAL); 638} 639 640static bool si_check_blend_dst_sampler_noop(struct si_context *sctx) 641{ 642 if (sctx->framebuffer.state.nr_cbufs == 1) { 643 struct si_shader_selector *sel = sctx->shader.ps.cso; 644 bool free_nir; 645 if (unlikely(sel->info.writes_1_if_tex_is_1 == 0xff)) { 646 struct nir_shader *nir = si_get_nir_shader(sel, NULL, &free_nir); 647 648 /* Determine if this fragment shader always writes vec4(1) if a specific texture 649 * is all 1s. 650 */ 651 float in[4] = { 1.0, 1.0, 1.0, 1.0 }; 652 float out[4]; 653 int texunit; 654 if (si_nir_is_output_const_if_tex_is_const(nir, in, out, &texunit) && 655 !memcmp(in, out, 4 * sizeof(float))) { 656 sel->info.writes_1_if_tex_is_1 = 1 + texunit; 657 } else { 658 sel->info.writes_1_if_tex_is_1 = 0; 659 } 660 661 if (free_nir) 662 ralloc_free(nir); 663 } 664 665 if (sel->info.writes_1_if_tex_is_1 && 666 sel->info.writes_1_if_tex_is_1 != 0xff) { 667 /* Now check if the texture is cleared to 1 */ 668 int unit = sctx->shader.ps.cso->info.writes_1_if_tex_is_1 - 1; 669 struct si_samplers *samp = &sctx->samplers[PIPE_SHADER_FRAGMENT]; 670 if ((1u << unit) & samp->enabled_mask) { 671 struct si_texture* tex = (struct si_texture*) samp->views[unit]->texture; 672 if (tex->is_depth && 673 tex->depth_cleared_level_mask & BITFIELD_BIT(samp->views[unit]->u.tex.first_level) && 674 tex->depth_clear_value[0] == 1) { 675 return false; 676 } 677 /* TODO: handle color textures */ 678 } 679 } 680 } 681 682 return true; 683} 684 685static void si_draw_blend_dst_sampler_noop(struct pipe_context *ctx, 686 const struct pipe_draw_info *info, 687 unsigned drawid_offset, 688 const struct pipe_draw_indirect_info *indirect, 689 const struct pipe_draw_start_count_bias *draws, 690 unsigned num_draws) { 691 struct si_context *sctx = (struct si_context *)ctx; 692 693 if (!si_check_blend_dst_sampler_noop(sctx)) 694 return; 695 696 sctx->real_draw_vbo(ctx, info, drawid_offset, indirect, draws, num_draws); 697} 698 699static void si_draw_vstate_blend_dst_sampler_noop(struct pipe_context *ctx, 700 struct pipe_vertex_state *state, 701 uint32_t partial_velem_mask, 702 struct pipe_draw_vertex_state_info info, 703 const struct pipe_draw_start_count_bias *draws, 704 unsigned num_draws) { 705 struct si_context *sctx = (struct si_context *)ctx; 706 707 if (!si_check_blend_dst_sampler_noop(sctx)) 708 return; 709 710 sctx->real_draw_vertex_state(ctx, state, partial_velem_mask, info, draws, num_draws); 711} 712 713static void si_bind_blend_state(struct pipe_context *ctx, void *state) 714{ 715 struct si_context *sctx = (struct si_context *)ctx; 716 struct si_state_blend *old_blend = sctx->queued.named.blend; 717 struct si_state_blend *blend = (struct si_state_blend *)state; 718 719 if (!blend) 720 blend = (struct si_state_blend *)sctx->noop_blend; 721 722 si_pm4_bind_state(sctx, blend, blend); 723 724 if (old_blend->cb_target_mask != blend->cb_target_mask || 725 old_blend->dual_src_blend != blend->dual_src_blend || 726 (old_blend->dcc_msaa_corruption_4bit != blend->dcc_msaa_corruption_4bit && 727 sctx->framebuffer.has_dcc_msaa)) 728 si_mark_atom_dirty(sctx, &sctx->atoms.s.cb_render_state); 729 730 if (old_blend->cb_target_mask != blend->cb_target_mask || 731 old_blend->alpha_to_coverage != blend->alpha_to_coverage || 732 old_blend->alpha_to_one != blend->alpha_to_one || 733 old_blend->dual_src_blend != blend->dual_src_blend || 734 old_blend->blend_enable_4bit != blend->blend_enable_4bit || 735 old_blend->need_src_alpha_4bit != blend->need_src_alpha_4bit) { 736 si_ps_key_update_framebuffer_blend(sctx); 737 si_ps_key_update_blend_rasterizer(sctx); 738 si_update_ps_inputs_read_or_disabled(sctx); 739 sctx->do_update_shaders = true; 740 } 741 742 if (sctx->screen->dpbb_allowed && 743 (old_blend->alpha_to_coverage != blend->alpha_to_coverage || 744 old_blend->blend_enable_4bit != blend->blend_enable_4bit || 745 old_blend->cb_target_enabled_4bit != blend->cb_target_enabled_4bit)) 746 si_mark_atom_dirty(sctx, &sctx->atoms.s.dpbb_state); 747 748 if (sctx->screen->has_out_of_order_rast && 749 ((old_blend->blend_enable_4bit != blend->blend_enable_4bit || 750 old_blend->cb_target_enabled_4bit != blend->cb_target_enabled_4bit || 751 old_blend->commutative_4bit != blend->commutative_4bit || 752 old_blend->logicop_enable != blend->logicop_enable))) 753 si_mark_atom_dirty(sctx, &sctx->atoms.s.msaa_config); 754 755 if (likely(!radeon_uses_secure_bos(sctx->ws))) { 756 if (unlikely(blend->allows_noop_optimization)) { 757 si_install_draw_wrapper(sctx, si_draw_blend_dst_sampler_noop, 758 si_draw_vstate_blend_dst_sampler_noop); 759 } else { 760 si_install_draw_wrapper(sctx, NULL, NULL); 761 } 762 } 763} 764 765static void si_delete_blend_state(struct pipe_context *ctx, void *state) 766{ 767 struct si_context *sctx = (struct si_context *)ctx; 768 769 if (sctx->queued.named.blend == state) 770 si_bind_blend_state(ctx, sctx->noop_blend); 771 772 si_pm4_free_state(sctx, (struct si_pm4_state*)state, SI_STATE_IDX(blend)); 773} 774 775static void si_set_blend_color(struct pipe_context *ctx, const struct pipe_blend_color *state) 776{ 777 struct si_context *sctx = (struct si_context *)ctx; 778 static const struct pipe_blend_color zeros; 779 780 sctx->blend_color = *state; 781 sctx->blend_color_any_nonzeros = memcmp(state, &zeros, sizeof(*state)) != 0; 782 si_mark_atom_dirty(sctx, &sctx->atoms.s.blend_color); 783} 784 785static void si_emit_blend_color(struct si_context *sctx) 786{ 787 struct radeon_cmdbuf *cs = &sctx->gfx_cs; 788 789 radeon_begin(cs); 790 radeon_set_context_reg_seq(R_028414_CB_BLEND_RED, 4); 791 radeon_emit_array((uint32_t *)sctx->blend_color.color, 4); 792 radeon_end(); 793} 794 795/* 796 * Clipping 797 */ 798 799static void si_set_clip_state(struct pipe_context *ctx, const struct pipe_clip_state *state) 800{ 801 struct si_context *sctx = (struct si_context *)ctx; 802 struct pipe_constant_buffer cb; 803 static const struct pipe_clip_state zeros; 804 805 if (memcmp(&sctx->clip_state, state, sizeof(*state)) == 0) 806 return; 807 808 sctx->clip_state = *state; 809 sctx->clip_state_any_nonzeros = memcmp(state, &zeros, sizeof(*state)) != 0; 810 si_mark_atom_dirty(sctx, &sctx->atoms.s.clip_state); 811 812 cb.buffer = NULL; 813 cb.user_buffer = state->ucp; 814 cb.buffer_offset = 0; 815 cb.buffer_size = 4 * 4 * 8; 816 si_set_internal_const_buffer(sctx, SI_VS_CONST_CLIP_PLANES, &cb); 817} 818 819static void si_emit_clip_state(struct si_context *sctx) 820{ 821 struct radeon_cmdbuf *cs = &sctx->gfx_cs; 822 823 radeon_begin(cs); 824 radeon_set_context_reg_seq(R_0285BC_PA_CL_UCP_0_X, 6 * 4); 825 radeon_emit_array((uint32_t *)sctx->clip_state.ucp, 6 * 4); 826 radeon_end(); 827} 828 829static void si_emit_clip_regs(struct si_context *sctx) 830{ 831 struct si_shader *vs = si_get_vs(sctx)->current; 832 struct si_shader_selector *vs_sel = vs->selector; 833 struct si_shader_info *info = &vs_sel->info; 834 struct si_state_rasterizer *rs = sctx->queued.named.rasterizer; 835 bool window_space = info->stage == MESA_SHADER_VERTEX ? 836 info->base.vs.window_space_position : 0; 837 unsigned clipdist_mask = vs_sel->clipdist_mask; 838 unsigned ucp_mask = clipdist_mask ? 0 : rs->clip_plane_enable & SIX_BITS; 839 unsigned culldist_mask = vs_sel->culldist_mask; 840 841 /* Clip distances on points have no effect, so need to be implemented 842 * as cull distances. This applies for the clipvertex case as well. 843 * 844 * Setting this for primitives other than points should have no adverse 845 * effects. 846 */ 847 clipdist_mask &= rs->clip_plane_enable; 848 culldist_mask |= clipdist_mask; 849 850 unsigned pa_cl_cntl = S_02881C_BYPASS_VTX_RATE_COMBINER(sctx->chip_class >= GFX10_3 && 851 !sctx->screen->options.vrs2x2) | 852 S_02881C_BYPASS_PRIM_RATE_COMBINER(sctx->chip_class >= GFX10_3) | 853 clipdist_mask | (culldist_mask << 8); 854 855 radeon_begin(&sctx->gfx_cs); 856 radeon_opt_set_context_reg(sctx, R_02881C_PA_CL_VS_OUT_CNTL, SI_TRACKED_PA_CL_VS_OUT_CNTL, 857 pa_cl_cntl | vs->pa_cl_vs_out_cntl); 858 radeon_opt_set_context_reg(sctx, R_028810_PA_CL_CLIP_CNTL, SI_TRACKED_PA_CL_CLIP_CNTL, 859 rs->pa_cl_clip_cntl | ucp_mask | S_028810_CLIP_DISABLE(window_space)); 860 radeon_end_update_context_roll(sctx); 861} 862 863/* 864 * inferred state between framebuffer and rasterizer 865 */ 866static void si_update_poly_offset_state(struct si_context *sctx) 867{ 868 struct si_state_rasterizer *rs = sctx->queued.named.rasterizer; 869 870 if (!rs->uses_poly_offset || !sctx->framebuffer.state.zsbuf) { 871 si_pm4_bind_state(sctx, poly_offset, NULL); 872 return; 873 } 874 875 /* Use the user format, not db_render_format, so that the polygon 876 * offset behaves as expected by applications. 877 */ 878 switch (sctx->framebuffer.state.zsbuf->texture->format) { 879 case PIPE_FORMAT_Z16_UNORM: 880 si_pm4_bind_state(sctx, poly_offset, &rs->pm4_poly_offset[0]); 881 break; 882 default: /* 24-bit */ 883 si_pm4_bind_state(sctx, poly_offset, &rs->pm4_poly_offset[1]); 884 break; 885 case PIPE_FORMAT_Z32_FLOAT: 886 case PIPE_FORMAT_Z32_FLOAT_S8X24_UINT: 887 si_pm4_bind_state(sctx, poly_offset, &rs->pm4_poly_offset[2]); 888 break; 889 } 890} 891 892/* 893 * Rasterizer 894 */ 895 896static uint32_t si_translate_fill(uint32_t func) 897{ 898 switch (func) { 899 case PIPE_POLYGON_MODE_FILL: 900 return V_028814_X_DRAW_TRIANGLES; 901 case PIPE_POLYGON_MODE_LINE: 902 return V_028814_X_DRAW_LINES; 903 case PIPE_POLYGON_MODE_POINT: 904 return V_028814_X_DRAW_POINTS; 905 default: 906 assert(0); 907 return V_028814_X_DRAW_POINTS; 908 } 909} 910 911static void *si_create_rs_state(struct pipe_context *ctx, const struct pipe_rasterizer_state *state) 912{ 913 struct si_screen *sscreen = ((struct si_context *)ctx)->screen; 914 struct si_state_rasterizer *rs = CALLOC_STRUCT(si_state_rasterizer); 915 struct si_pm4_state *pm4 = &rs->pm4; 916 unsigned tmp, i; 917 float psize_min, psize_max; 918 919 if (!rs) { 920 return NULL; 921 } 922 923 rs->scissor_enable = state->scissor; 924 rs->clip_halfz = state->clip_halfz; 925 rs->two_side = state->light_twoside; 926 rs->multisample_enable = state->multisample; 927 rs->force_persample_interp = state->force_persample_interp; 928 rs->clip_plane_enable = state->clip_plane_enable; 929 rs->half_pixel_center = state->half_pixel_center; 930 rs->line_stipple_enable = state->line_stipple_enable; 931 rs->poly_stipple_enable = state->poly_stipple_enable; 932 rs->line_smooth = state->line_smooth; 933 rs->line_width = state->line_width; 934 rs->poly_smooth = state->poly_smooth; 935 rs->uses_poly_offset = state->offset_point || state->offset_line || state->offset_tri; 936 rs->clamp_fragment_color = state->clamp_fragment_color; 937 rs->clamp_vertex_color = state->clamp_vertex_color; 938 rs->flatshade = state->flatshade; 939 rs->flatshade_first = state->flatshade_first; 940 rs->sprite_coord_enable = state->sprite_coord_enable; 941 rs->rasterizer_discard = state->rasterizer_discard; 942 rs->polygon_mode_is_lines = 943 (state->fill_front == PIPE_POLYGON_MODE_LINE && !(state->cull_face & PIPE_FACE_FRONT)) || 944 (state->fill_back == PIPE_POLYGON_MODE_LINE && !(state->cull_face & PIPE_FACE_BACK)); 945 rs->polygon_mode_is_points = 946 (state->fill_front == PIPE_POLYGON_MODE_POINT && !(state->cull_face & PIPE_FACE_FRONT)) || 947 (state->fill_back == PIPE_POLYGON_MODE_POINT && !(state->cull_face & PIPE_FACE_BACK)); 948 rs->pa_sc_line_stipple = state->line_stipple_enable 949 ? S_028A0C_LINE_PATTERN(state->line_stipple_pattern) | 950 S_028A0C_REPEAT_COUNT(state->line_stipple_factor) 951 : 0; 952 rs->pa_cl_clip_cntl = S_028810_DX_CLIP_SPACE_DEF(state->clip_halfz) | 953 S_028810_ZCLIP_NEAR_DISABLE(!state->depth_clip_near) | 954 S_028810_ZCLIP_FAR_DISABLE(!state->depth_clip_far) | 955 S_028810_DX_RASTERIZATION_KILL(state->rasterizer_discard) | 956 S_028810_DX_LINEAR_ATTR_CLIP_ENA(1); 957 958 if (rs->rasterizer_discard) { 959 rs->ngg_cull_flags = SI_NGG_CULL_ENABLED | 960 SI_NGG_CULL_FRONT_FACE | 961 SI_NGG_CULL_BACK_FACE; 962 rs->ngg_cull_flags_y_inverted = rs->ngg_cull_flags; 963 } else { 964 rs->ngg_cull_flags = SI_NGG_CULL_ENABLED; 965 rs->ngg_cull_flags_y_inverted = rs->ngg_cull_flags; 966 967 bool cull_front, cull_back; 968 969 if (!state->front_ccw) { 970 cull_front = !!(state->cull_face & PIPE_FACE_FRONT); 971 cull_back = !!(state->cull_face & PIPE_FACE_BACK); 972 } else { 973 cull_back = !!(state->cull_face & PIPE_FACE_FRONT); 974 cull_front = !!(state->cull_face & PIPE_FACE_BACK); 975 } 976 977 if (cull_front) { 978 rs->ngg_cull_flags |= SI_NGG_CULL_FRONT_FACE; 979 rs->ngg_cull_flags_y_inverted |= SI_NGG_CULL_BACK_FACE; 980 } 981 982 if (cull_back) { 983 rs->ngg_cull_flags |= SI_NGG_CULL_BACK_FACE; 984 rs->ngg_cull_flags_y_inverted |= SI_NGG_CULL_FRONT_FACE; 985 } 986 } 987 988 si_pm4_set_reg( 989 pm4, R_0286D4_SPI_INTERP_CONTROL_0, 990 S_0286D4_FLAT_SHADE_ENA(1) | S_0286D4_PNT_SPRITE_ENA(state->point_quad_rasterization) | 991 S_0286D4_PNT_SPRITE_OVRD_X(V_0286D4_SPI_PNT_SPRITE_SEL_S) | 992 S_0286D4_PNT_SPRITE_OVRD_Y(V_0286D4_SPI_PNT_SPRITE_SEL_T) | 993 S_0286D4_PNT_SPRITE_OVRD_Z(V_0286D4_SPI_PNT_SPRITE_SEL_0) | 994 S_0286D4_PNT_SPRITE_OVRD_W(V_0286D4_SPI_PNT_SPRITE_SEL_1) | 995 S_0286D4_PNT_SPRITE_TOP_1(state->sprite_coord_mode != PIPE_SPRITE_COORD_UPPER_LEFT)); 996 997 /* point size 12.4 fixed point */ 998 tmp = (unsigned)(state->point_size * 8.0); 999 si_pm4_set_reg(pm4, R_028A00_PA_SU_POINT_SIZE, S_028A00_HEIGHT(tmp) | S_028A00_WIDTH(tmp)); 1000 1001 if (state->point_size_per_vertex) { 1002 psize_min = util_get_min_point_size(state); 1003 psize_max = SI_MAX_POINT_SIZE; 1004 } else { 1005 /* Force the point size to be as if the vertex output was disabled. */ 1006 psize_min = state->point_size; 1007 psize_max = state->point_size; 1008 } 1009 rs->max_point_size = psize_max; 1010 1011 /* Divide by two, because 0.5 = 1 pixel. */ 1012 si_pm4_set_reg(pm4, R_028A04_PA_SU_POINT_MINMAX, 1013 S_028A04_MIN_SIZE(si_pack_float_12p4(psize_min / 2)) | 1014 S_028A04_MAX_SIZE(si_pack_float_12p4(psize_max / 2))); 1015 1016 si_pm4_set_reg(pm4, R_028A08_PA_SU_LINE_CNTL, 1017 S_028A08_WIDTH(si_pack_float_12p4(state->line_width / 2))); 1018 si_pm4_set_reg( 1019 pm4, R_028A48_PA_SC_MODE_CNTL_0, 1020 S_028A48_LINE_STIPPLE_ENABLE(state->line_stipple_enable) | 1021 S_028A48_MSAA_ENABLE(state->multisample || state->poly_smooth || state->line_smooth) | 1022 S_028A48_VPORT_SCISSOR_ENABLE(1) | 1023 S_028A48_ALTERNATE_RBS_PER_TILE(sscreen->info.chip_class >= GFX9)); 1024 1025 bool polygon_mode_enabled = 1026 (state->fill_front != PIPE_POLYGON_MODE_FILL && !(state->cull_face & PIPE_FACE_FRONT)) || 1027 (state->fill_back != PIPE_POLYGON_MODE_FILL && !(state->cull_face & PIPE_FACE_BACK)); 1028 1029 si_pm4_set_reg(pm4, R_028814_PA_SU_SC_MODE_CNTL, 1030 S_028814_PROVOKING_VTX_LAST(!state->flatshade_first) | 1031 S_028814_CULL_FRONT((state->cull_face & PIPE_FACE_FRONT) ? 1 : 0) | 1032 S_028814_CULL_BACK((state->cull_face & PIPE_FACE_BACK) ? 1 : 0) | 1033 S_028814_FACE(!state->front_ccw) | 1034 S_028814_POLY_OFFSET_FRONT_ENABLE(util_get_offset(state, state->fill_front)) | 1035 S_028814_POLY_OFFSET_BACK_ENABLE(util_get_offset(state, state->fill_back)) | 1036 S_028814_POLY_OFFSET_PARA_ENABLE(state->offset_point || state->offset_line) | 1037 S_028814_POLY_MODE(polygon_mode_enabled) | 1038 S_028814_POLYMODE_FRONT_PTYPE(si_translate_fill(state->fill_front)) | 1039 S_028814_POLYMODE_BACK_PTYPE(si_translate_fill(state->fill_back)) | 1040 /* this must be set if POLY_MODE or PERPENDICULAR_ENDCAP_ENA is set */ 1041 S_028814_KEEP_TOGETHER_ENABLE(sscreen->info.chip_class >= GFX10 ? polygon_mode_enabled : 0)); 1042 1043 if (!rs->uses_poly_offset) 1044 return rs; 1045 1046 rs->pm4_poly_offset = CALLOC(3, sizeof(struct si_pm4_state)); 1047 if (!rs->pm4_poly_offset) { 1048 FREE(rs); 1049 return NULL; 1050 } 1051 1052 /* Precalculate polygon offset states for 16-bit, 24-bit, and 32-bit zbuffers. */ 1053 for (i = 0; i < 3; i++) { 1054 struct si_pm4_state *pm4 = &rs->pm4_poly_offset[i]; 1055 float offset_units = state->offset_units; 1056 float offset_scale = state->offset_scale * 16.0f; 1057 uint32_t pa_su_poly_offset_db_fmt_cntl = 0; 1058 1059 if (!state->offset_units_unscaled) { 1060 switch (i) { 1061 case 0: /* 16-bit zbuffer */ 1062 offset_units *= 4.0f; 1063 pa_su_poly_offset_db_fmt_cntl = S_028B78_POLY_OFFSET_NEG_NUM_DB_BITS(-16); 1064 break; 1065 case 1: /* 24-bit zbuffer */ 1066 offset_units *= 2.0f; 1067 pa_su_poly_offset_db_fmt_cntl = S_028B78_POLY_OFFSET_NEG_NUM_DB_BITS(-24); 1068 break; 1069 case 2: /* 32-bit zbuffer */ 1070 offset_units *= 1.0f; 1071 pa_su_poly_offset_db_fmt_cntl = 1072 S_028B78_POLY_OFFSET_NEG_NUM_DB_BITS(-23) | S_028B78_POLY_OFFSET_DB_IS_FLOAT_FMT(1); 1073 break; 1074 } 1075 } 1076 1077 si_pm4_set_reg(pm4, R_028B78_PA_SU_POLY_OFFSET_DB_FMT_CNTL, pa_su_poly_offset_db_fmt_cntl); 1078 si_pm4_set_reg(pm4, R_028B7C_PA_SU_POLY_OFFSET_CLAMP, fui(state->offset_clamp)); 1079 si_pm4_set_reg(pm4, R_028B80_PA_SU_POLY_OFFSET_FRONT_SCALE, fui(offset_scale)); 1080 si_pm4_set_reg(pm4, R_028B84_PA_SU_POLY_OFFSET_FRONT_OFFSET, fui(offset_units)); 1081 si_pm4_set_reg(pm4, R_028B88_PA_SU_POLY_OFFSET_BACK_SCALE, fui(offset_scale)); 1082 si_pm4_set_reg(pm4, R_028B8C_PA_SU_POLY_OFFSET_BACK_OFFSET, fui(offset_units)); 1083 } 1084 1085 return rs; 1086} 1087 1088static void si_bind_rs_state(struct pipe_context *ctx, void *state) 1089{ 1090 struct si_context *sctx = (struct si_context *)ctx; 1091 struct si_state_rasterizer *old_rs = (struct si_state_rasterizer *)sctx->queued.named.rasterizer; 1092 struct si_state_rasterizer *rs = (struct si_state_rasterizer *)state; 1093 1094 if (!rs) 1095 rs = (struct si_state_rasterizer *)sctx->discard_rasterizer_state; 1096 1097 if (old_rs->multisample_enable != rs->multisample_enable) { 1098 si_mark_atom_dirty(sctx, &sctx->atoms.s.db_render_state); 1099 1100 si_mark_atom_dirty(sctx, &sctx->atoms.s.msaa_config); 1101 1102 /* Update the small primitive filter workaround if necessary. */ 1103 if (sctx->screen->info.has_msaa_sample_loc_bug && sctx->framebuffer.nr_samples > 1) 1104 si_mark_atom_dirty(sctx, &sctx->atoms.s.msaa_sample_locs); 1105 1106 /* NGG cull state uses multisample_enable. */ 1107 if (sctx->screen->use_ngg_culling) 1108 si_mark_atom_dirty(sctx, &sctx->atoms.s.ngg_cull_state); 1109 } 1110 1111 sctx->current_vs_state &= C_VS_STATE_CLAMP_VERTEX_COLOR; 1112 sctx->current_vs_state |= S_VS_STATE_CLAMP_VERTEX_COLOR(rs->clamp_vertex_color); 1113 1114 si_pm4_bind_state(sctx, rasterizer, rs); 1115 si_update_poly_offset_state(sctx); 1116 1117 if (old_rs->scissor_enable != rs->scissor_enable) 1118 si_mark_atom_dirty(sctx, &sctx->atoms.s.scissors); 1119 1120 if (old_rs->line_width != rs->line_width || old_rs->max_point_size != rs->max_point_size || 1121 old_rs->half_pixel_center != rs->half_pixel_center) 1122 si_mark_atom_dirty(sctx, &sctx->atoms.s.guardband); 1123 1124 if (old_rs->clip_halfz != rs->clip_halfz) 1125 si_mark_atom_dirty(sctx, &sctx->atoms.s.viewports); 1126 1127 if (old_rs->clip_plane_enable != rs->clip_plane_enable || 1128 old_rs->pa_cl_clip_cntl != rs->pa_cl_clip_cntl) 1129 si_mark_atom_dirty(sctx, &sctx->atoms.s.clip_regs); 1130 1131 if (old_rs->sprite_coord_enable != rs->sprite_coord_enable || 1132 old_rs->flatshade != rs->flatshade) 1133 si_mark_atom_dirty(sctx, &sctx->atoms.s.spi_map); 1134 1135 if (old_rs->clip_plane_enable != rs->clip_plane_enable || 1136 old_rs->rasterizer_discard != rs->rasterizer_discard || 1137 old_rs->sprite_coord_enable != rs->sprite_coord_enable || 1138 old_rs->flatshade != rs->flatshade || old_rs->two_side != rs->two_side || 1139 old_rs->multisample_enable != rs->multisample_enable || 1140 old_rs->poly_stipple_enable != rs->poly_stipple_enable || 1141 old_rs->poly_smooth != rs->poly_smooth || old_rs->line_smooth != rs->line_smooth || 1142 old_rs->clamp_fragment_color != rs->clamp_fragment_color || 1143 old_rs->force_persample_interp != rs->force_persample_interp || 1144 old_rs->polygon_mode_is_points != rs->polygon_mode_is_points) { 1145 si_ps_key_update_blend_rasterizer(sctx); 1146 si_ps_key_update_rasterizer(sctx); 1147 si_ps_key_update_framebuffer_rasterizer_sample_shading(sctx); 1148 si_update_ps_inputs_read_or_disabled(sctx); 1149 sctx->do_update_shaders = true; 1150 } 1151 1152 if (old_rs->line_smooth != rs->line_smooth || 1153 old_rs->poly_smooth != rs->poly_smooth || 1154 old_rs->poly_stipple_enable != rs->poly_stipple_enable || 1155 old_rs->flatshade != rs->flatshade) 1156 si_update_vrs_flat_shading(sctx); 1157} 1158 1159static void si_delete_rs_state(struct pipe_context *ctx, void *state) 1160{ 1161 struct si_context *sctx = (struct si_context *)ctx; 1162 struct si_state_rasterizer *rs = (struct si_state_rasterizer *)state; 1163 1164 if (sctx->queued.named.rasterizer == state) 1165 si_bind_rs_state(ctx, sctx->discard_rasterizer_state); 1166 1167 FREE(rs->pm4_poly_offset); 1168 si_pm4_free_state(sctx, &rs->pm4, SI_STATE_IDX(rasterizer)); 1169} 1170 1171/* 1172 * inferred state between dsa and stencil ref 1173 */ 1174static void si_emit_stencil_ref(struct si_context *sctx) 1175{ 1176 struct radeon_cmdbuf *cs = &sctx->gfx_cs; 1177 struct pipe_stencil_ref *ref = &sctx->stencil_ref.state; 1178 struct si_dsa_stencil_ref_part *dsa = &sctx->stencil_ref.dsa_part; 1179 1180 radeon_begin(cs); 1181 radeon_set_context_reg_seq(R_028430_DB_STENCILREFMASK, 2); 1182 radeon_emit(S_028430_STENCILTESTVAL(ref->ref_value[0]) | 1183 S_028430_STENCILMASK(dsa->valuemask[0]) | 1184 S_028430_STENCILWRITEMASK(dsa->writemask[0]) | 1185 S_028430_STENCILOPVAL(1)); 1186 radeon_emit(S_028434_STENCILTESTVAL_BF(ref->ref_value[1]) | 1187 S_028434_STENCILMASK_BF(dsa->valuemask[1]) | 1188 S_028434_STENCILWRITEMASK_BF(dsa->writemask[1]) | 1189 S_028434_STENCILOPVAL_BF(1)); 1190 radeon_end(); 1191} 1192 1193static void si_set_stencil_ref(struct pipe_context *ctx, const struct pipe_stencil_ref state) 1194{ 1195 struct si_context *sctx = (struct si_context *)ctx; 1196 1197 if (memcmp(&sctx->stencil_ref.state, &state, sizeof(state)) == 0) 1198 return; 1199 1200 sctx->stencil_ref.state = state; 1201 si_mark_atom_dirty(sctx, &sctx->atoms.s.stencil_ref); 1202} 1203 1204/* 1205 * DSA 1206 */ 1207 1208static uint32_t si_translate_stencil_op(int s_op) 1209{ 1210 switch (s_op) { 1211 case PIPE_STENCIL_OP_KEEP: 1212 return V_02842C_STENCIL_KEEP; 1213 case PIPE_STENCIL_OP_ZERO: 1214 return V_02842C_STENCIL_ZERO; 1215 case PIPE_STENCIL_OP_REPLACE: 1216 return V_02842C_STENCIL_REPLACE_TEST; 1217 case PIPE_STENCIL_OP_INCR: 1218 return V_02842C_STENCIL_ADD_CLAMP; 1219 case PIPE_STENCIL_OP_DECR: 1220 return V_02842C_STENCIL_SUB_CLAMP; 1221 case PIPE_STENCIL_OP_INCR_WRAP: 1222 return V_02842C_STENCIL_ADD_WRAP; 1223 case PIPE_STENCIL_OP_DECR_WRAP: 1224 return V_02842C_STENCIL_SUB_WRAP; 1225 case PIPE_STENCIL_OP_INVERT: 1226 return V_02842C_STENCIL_INVERT; 1227 default: 1228 PRINT_ERR("Unknown stencil op %d", s_op); 1229 assert(0); 1230 break; 1231 } 1232 return 0; 1233} 1234 1235static bool si_order_invariant_stencil_op(enum pipe_stencil_op op) 1236{ 1237 /* REPLACE is normally order invariant, except when the stencil 1238 * reference value is written by the fragment shader. Tracking this 1239 * interaction does not seem worth the effort, so be conservative. */ 1240 return op != PIPE_STENCIL_OP_INCR && op != PIPE_STENCIL_OP_DECR && op != PIPE_STENCIL_OP_REPLACE; 1241} 1242 1243/* Compute whether, assuming Z writes are disabled, this stencil state is order 1244 * invariant in the sense that the set of passing fragments as well as the 1245 * final stencil buffer result does not depend on the order of fragments. */ 1246static bool si_order_invariant_stencil_state(const struct pipe_stencil_state *state) 1247{ 1248 return !state->enabled || !state->writemask || 1249 /* The following assumes that Z writes are disabled. */ 1250 (state->func == PIPE_FUNC_ALWAYS && si_order_invariant_stencil_op(state->zpass_op) && 1251 si_order_invariant_stencil_op(state->zfail_op)) || 1252 (state->func == PIPE_FUNC_NEVER && si_order_invariant_stencil_op(state->fail_op)); 1253} 1254 1255static void *si_create_dsa_state(struct pipe_context *ctx, 1256 const struct pipe_depth_stencil_alpha_state *state) 1257{ 1258 struct si_context *sctx = (struct si_context *)ctx; 1259 struct si_state_dsa *dsa = CALLOC_STRUCT(si_state_dsa); 1260 struct si_pm4_state *pm4 = &dsa->pm4; 1261 unsigned db_depth_control; 1262 uint32_t db_stencil_control = 0; 1263 1264 if (!dsa) { 1265 return NULL; 1266 } 1267 1268 dsa->stencil_ref.valuemask[0] = state->stencil[0].valuemask; 1269 dsa->stencil_ref.valuemask[1] = state->stencil[1].valuemask; 1270 dsa->stencil_ref.writemask[0] = state->stencil[0].writemask; 1271 dsa->stencil_ref.writemask[1] = state->stencil[1].writemask; 1272 1273 db_depth_control = 1274 S_028800_Z_ENABLE(state->depth_enabled) | S_028800_Z_WRITE_ENABLE(state->depth_writemask) | 1275 S_028800_ZFUNC(state->depth_func) | S_028800_DEPTH_BOUNDS_ENABLE(state->depth_bounds_test); 1276 1277 /* stencil */ 1278 if (state->stencil[0].enabled) { 1279 db_depth_control |= S_028800_STENCIL_ENABLE(1); 1280 db_depth_control |= S_028800_STENCILFUNC(state->stencil[0].func); 1281 db_stencil_control |= 1282 S_02842C_STENCILFAIL(si_translate_stencil_op(state->stencil[0].fail_op)); 1283 db_stencil_control |= 1284 S_02842C_STENCILZPASS(si_translate_stencil_op(state->stencil[0].zpass_op)); 1285 db_stencil_control |= 1286 S_02842C_STENCILZFAIL(si_translate_stencil_op(state->stencil[0].zfail_op)); 1287 1288 if (state->stencil[1].enabled) { 1289 db_depth_control |= S_028800_BACKFACE_ENABLE(1); 1290 db_depth_control |= S_028800_STENCILFUNC_BF(state->stencil[1].func); 1291 db_stencil_control |= 1292 S_02842C_STENCILFAIL_BF(si_translate_stencil_op(state->stencil[1].fail_op)); 1293 db_stencil_control |= 1294 S_02842C_STENCILZPASS_BF(si_translate_stencil_op(state->stencil[1].zpass_op)); 1295 db_stencil_control |= 1296 S_02842C_STENCILZFAIL_BF(si_translate_stencil_op(state->stencil[1].zfail_op)); 1297 } 1298 } 1299 1300 /* alpha */ 1301 if (state->alpha_enabled) { 1302 dsa->alpha_func = state->alpha_func; 1303 1304 si_pm4_set_reg(pm4, R_00B030_SPI_SHADER_USER_DATA_PS_0 + SI_SGPR_ALPHA_REF * 4, 1305 fui(state->alpha_ref_value)); 1306 } else { 1307 dsa->alpha_func = PIPE_FUNC_ALWAYS; 1308 } 1309 1310 si_pm4_set_reg(pm4, R_028800_DB_DEPTH_CONTROL, db_depth_control); 1311 if (state->stencil[0].enabled) 1312 si_pm4_set_reg(pm4, R_02842C_DB_STENCIL_CONTROL, db_stencil_control); 1313 if (state->depth_bounds_test) { 1314 si_pm4_set_reg(pm4, R_028020_DB_DEPTH_BOUNDS_MIN, fui(state->depth_bounds_min)); 1315 si_pm4_set_reg(pm4, R_028024_DB_DEPTH_BOUNDS_MAX, fui(state->depth_bounds_max)); 1316 } 1317 1318 dsa->depth_enabled = state->depth_enabled; 1319 dsa->depth_write_enabled = state->depth_enabled && state->depth_writemask; 1320 dsa->stencil_enabled = state->stencil[0].enabled; 1321 dsa->stencil_write_enabled = 1322 (util_writes_stencil(&state->stencil[0]) || util_writes_stencil(&state->stencil[1])); 1323 dsa->db_can_write = dsa->depth_write_enabled || dsa->stencil_write_enabled; 1324 1325 bool zfunc_is_ordered = 1326 state->depth_func == PIPE_FUNC_NEVER || state->depth_func == PIPE_FUNC_LESS || 1327 state->depth_func == PIPE_FUNC_LEQUAL || state->depth_func == PIPE_FUNC_GREATER || 1328 state->depth_func == PIPE_FUNC_GEQUAL; 1329 1330 bool nozwrite_and_order_invariant_stencil = 1331 !dsa->db_can_write || 1332 (!dsa->depth_write_enabled && si_order_invariant_stencil_state(&state->stencil[0]) && 1333 si_order_invariant_stencil_state(&state->stencil[1])); 1334 1335 dsa->order_invariance[1].zs = 1336 nozwrite_and_order_invariant_stencil || (!dsa->stencil_write_enabled && zfunc_is_ordered); 1337 dsa->order_invariance[0].zs = !dsa->depth_write_enabled || zfunc_is_ordered; 1338 1339 dsa->order_invariance[1].pass_set = 1340 nozwrite_and_order_invariant_stencil || 1341 (!dsa->stencil_write_enabled && 1342 (state->depth_func == PIPE_FUNC_ALWAYS || state->depth_func == PIPE_FUNC_NEVER)); 1343 dsa->order_invariance[0].pass_set = 1344 !dsa->depth_write_enabled || 1345 (state->depth_func == PIPE_FUNC_ALWAYS || state->depth_func == PIPE_FUNC_NEVER); 1346 1347 dsa->order_invariance[1].pass_last = sctx->screen->assume_no_z_fights && 1348 !dsa->stencil_write_enabled && dsa->depth_write_enabled && 1349 zfunc_is_ordered; 1350 dsa->order_invariance[0].pass_last = 1351 sctx->screen->assume_no_z_fights && dsa->depth_write_enabled && zfunc_is_ordered; 1352 1353 return dsa; 1354} 1355 1356static void si_bind_dsa_state(struct pipe_context *ctx, void *state) 1357{ 1358 struct si_context *sctx = (struct si_context *)ctx; 1359 struct si_state_dsa *old_dsa = sctx->queued.named.dsa; 1360 struct si_state_dsa *dsa = state; 1361 1362 if (!dsa) 1363 dsa = (struct si_state_dsa *)sctx->noop_dsa; 1364 1365 si_pm4_bind_state(sctx, dsa, dsa); 1366 1367 if (memcmp(&dsa->stencil_ref, &sctx->stencil_ref.dsa_part, 1368 sizeof(struct si_dsa_stencil_ref_part)) != 0) { 1369 sctx->stencil_ref.dsa_part = dsa->stencil_ref; 1370 si_mark_atom_dirty(sctx, &sctx->atoms.s.stencil_ref); 1371 } 1372 1373 if (old_dsa->alpha_func != dsa->alpha_func) { 1374 si_ps_key_update_dsa(sctx); 1375 si_update_ps_inputs_read_or_disabled(sctx); 1376 si_update_ps_kill_enable(sctx); 1377 sctx->do_update_shaders = true; 1378 } 1379 1380 if (sctx->screen->dpbb_allowed && ((old_dsa->depth_enabled != dsa->depth_enabled || 1381 old_dsa->stencil_enabled != dsa->stencil_enabled || 1382 old_dsa->db_can_write != dsa->db_can_write))) 1383 si_mark_atom_dirty(sctx, &sctx->atoms.s.dpbb_state); 1384 1385 if (sctx->screen->has_out_of_order_rast && 1386 (memcmp(old_dsa->order_invariance, dsa->order_invariance, 1387 sizeof(old_dsa->order_invariance)))) 1388 si_mark_atom_dirty(sctx, &sctx->atoms.s.msaa_config); 1389} 1390 1391static void si_delete_dsa_state(struct pipe_context *ctx, void *state) 1392{ 1393 struct si_context *sctx = (struct si_context *)ctx; 1394 1395 if (sctx->queued.named.dsa == state) 1396 si_bind_dsa_state(ctx, sctx->noop_dsa); 1397 1398 si_pm4_free_state(sctx, (struct si_pm4_state*)state, SI_STATE_IDX(dsa)); 1399} 1400 1401static void *si_create_db_flush_dsa(struct si_context *sctx) 1402{ 1403 struct pipe_depth_stencil_alpha_state dsa = {}; 1404 1405 return sctx->b.create_depth_stencil_alpha_state(&sctx->b, &dsa); 1406} 1407 1408/* DB RENDER STATE */ 1409 1410static void si_set_active_query_state(struct pipe_context *ctx, bool enable) 1411{ 1412 struct si_context *sctx = (struct si_context *)ctx; 1413 1414 /* Pipeline stat & streamout queries. */ 1415 if (enable) { 1416 sctx->flags &= ~SI_CONTEXT_STOP_PIPELINE_STATS; 1417 sctx->flags |= SI_CONTEXT_START_PIPELINE_STATS; 1418 } else { 1419 sctx->flags &= ~SI_CONTEXT_START_PIPELINE_STATS; 1420 sctx->flags |= SI_CONTEXT_STOP_PIPELINE_STATS; 1421 } 1422 1423 /* Occlusion queries. */ 1424 if (sctx->occlusion_queries_disabled != !enable) { 1425 sctx->occlusion_queries_disabled = !enable; 1426 si_mark_atom_dirty(sctx, &sctx->atoms.s.db_render_state); 1427 } 1428} 1429 1430void si_set_occlusion_query_state(struct si_context *sctx, bool old_perfect_enable) 1431{ 1432 si_mark_atom_dirty(sctx, &sctx->atoms.s.db_render_state); 1433 1434 bool perfect_enable = sctx->num_perfect_occlusion_queries != 0; 1435 1436 if (perfect_enable != old_perfect_enable) 1437 si_mark_atom_dirty(sctx, &sctx->atoms.s.msaa_config); 1438} 1439 1440void si_save_qbo_state(struct si_context *sctx, struct si_qbo_state *st) 1441{ 1442 si_get_pipe_constant_buffer(sctx, PIPE_SHADER_COMPUTE, 0, &st->saved_const0); 1443} 1444 1445void si_restore_qbo_state(struct si_context *sctx, struct si_qbo_state *st) 1446{ 1447 sctx->b.set_constant_buffer(&sctx->b, PIPE_SHADER_COMPUTE, 0, true, &st->saved_const0); 1448} 1449 1450static void si_emit_db_render_state(struct si_context *sctx) 1451{ 1452 struct si_state_rasterizer *rs = sctx->queued.named.rasterizer; 1453 unsigned db_shader_control, db_render_control, db_count_control; 1454 1455 /* DB_RENDER_CONTROL */ 1456 if (sctx->dbcb_depth_copy_enabled || sctx->dbcb_stencil_copy_enabled) { 1457 db_render_control = S_028000_DEPTH_COPY(sctx->dbcb_depth_copy_enabled) | 1458 S_028000_STENCIL_COPY(sctx->dbcb_stencil_copy_enabled) | 1459 S_028000_COPY_CENTROID(1) | S_028000_COPY_SAMPLE(sctx->dbcb_copy_sample); 1460 } else if (sctx->db_flush_depth_inplace || sctx->db_flush_stencil_inplace) { 1461 db_render_control = S_028000_DEPTH_COMPRESS_DISABLE(sctx->db_flush_depth_inplace) | 1462 S_028000_STENCIL_COMPRESS_DISABLE(sctx->db_flush_stencil_inplace); 1463 } else { 1464 db_render_control = S_028000_DEPTH_CLEAR_ENABLE(sctx->db_depth_clear) | 1465 S_028000_STENCIL_CLEAR_ENABLE(sctx->db_stencil_clear); 1466 } 1467 1468 /* DB_COUNT_CONTROL (occlusion queries) */ 1469 if (sctx->num_occlusion_queries > 0 && !sctx->occlusion_queries_disabled) { 1470 bool perfect = sctx->num_perfect_occlusion_queries > 0; 1471 bool gfx10_perfect = sctx->chip_class >= GFX10 && perfect; 1472 1473 if (sctx->chip_class >= GFX7) { 1474 unsigned log_sample_rate = sctx->framebuffer.log_samples; 1475 1476 db_count_control = S_028004_PERFECT_ZPASS_COUNTS(perfect) | 1477 S_028004_DISABLE_CONSERVATIVE_ZPASS_COUNTS(gfx10_perfect) | 1478 S_028004_SAMPLE_RATE(log_sample_rate) | S_028004_ZPASS_ENABLE(1) | 1479 S_028004_SLICE_EVEN_ENABLE(1) | S_028004_SLICE_ODD_ENABLE(1); 1480 } else { 1481 db_count_control = S_028004_PERFECT_ZPASS_COUNTS(perfect) | 1482 S_028004_SAMPLE_RATE(sctx->framebuffer.log_samples); 1483 } 1484 } else { 1485 /* Disable occlusion queries. */ 1486 if (sctx->chip_class >= GFX7) { 1487 db_count_control = 0; 1488 } else { 1489 db_count_control = S_028004_ZPASS_INCREMENT_DISABLE(1); 1490 } 1491 } 1492 1493 radeon_begin(&sctx->gfx_cs); 1494 radeon_opt_set_context_reg2(sctx, R_028000_DB_RENDER_CONTROL, SI_TRACKED_DB_RENDER_CONTROL, 1495 db_render_control, db_count_control); 1496 1497 /* DB_RENDER_OVERRIDE2 */ 1498 radeon_opt_set_context_reg( 1499 sctx, R_028010_DB_RENDER_OVERRIDE2, SI_TRACKED_DB_RENDER_OVERRIDE2, 1500 S_028010_DISABLE_ZMASK_EXPCLEAR_OPTIMIZATION(sctx->db_depth_disable_expclear) | 1501 S_028010_DISABLE_SMEM_EXPCLEAR_OPTIMIZATION(sctx->db_stencil_disable_expclear) | 1502 S_028010_DECOMPRESS_Z_ON_FLUSH(sctx->framebuffer.nr_samples >= 4) | 1503 S_028010_CENTROID_COMPUTATION_MODE(sctx->chip_class >= GFX10_3 ? 1 : 0)); 1504 1505 db_shader_control = sctx->ps_db_shader_control; 1506 1507 /* Bug workaround for smoothing (overrasterization) on GFX6. */ 1508 if (sctx->chip_class == GFX6 && sctx->smoothing_enabled) { 1509 db_shader_control &= C_02880C_Z_ORDER; 1510 db_shader_control |= S_02880C_Z_ORDER(V_02880C_LATE_Z); 1511 } 1512 1513 /* Disable the gl_SampleMask fragment shader output if MSAA is disabled. */ 1514 if (!rs->multisample_enable) 1515 db_shader_control &= C_02880C_MASK_EXPORT_ENABLE; 1516 1517 if (sctx->screen->info.has_rbplus && !sctx->screen->info.rbplus_allowed) 1518 db_shader_control |= S_02880C_DUAL_QUAD_DISABLE(1); 1519 1520 radeon_opt_set_context_reg(sctx, R_02880C_DB_SHADER_CONTROL, SI_TRACKED_DB_SHADER_CONTROL, 1521 db_shader_control); 1522 1523 if (sctx->chip_class >= GFX10_3) { 1524 if (sctx->allow_flat_shading) { 1525 radeon_opt_set_context_reg(sctx, R_028064_DB_VRS_OVERRIDE_CNTL, 1526 SI_TRACKED_DB_VRS_OVERRIDE_CNTL, 1527 S_028064_VRS_OVERRIDE_RATE_COMBINER_MODE( 1528 V_028064_VRS_COMB_MODE_OVERRIDE) | 1529 S_028064_VRS_OVERRIDE_RATE_X(1) | 1530 S_028064_VRS_OVERRIDE_RATE_Y(1)); 1531 } else { 1532 /* If the shader is using discard, turn off coarse shading because 1533 * discard at 2x2 pixel granularity degrades quality too much. 1534 * 1535 * MIN allows sample shading but not coarse shading. 1536 */ 1537 unsigned mode = sctx->screen->options.vrs2x2 && G_02880C_KILL_ENABLE(db_shader_control) ? 1538 V_028064_VRS_COMB_MODE_MIN : V_028064_VRS_COMB_MODE_PASSTHRU; 1539 1540 radeon_opt_set_context_reg(sctx, R_028064_DB_VRS_OVERRIDE_CNTL, 1541 SI_TRACKED_DB_VRS_OVERRIDE_CNTL, 1542 S_028064_VRS_OVERRIDE_RATE_COMBINER_MODE(mode) | 1543 S_028064_VRS_OVERRIDE_RATE_X(0) | 1544 S_028064_VRS_OVERRIDE_RATE_Y(0)); 1545 } 1546 } 1547 radeon_end_update_context_roll(sctx); 1548} 1549 1550/* 1551 * format translation 1552 */ 1553uint32_t si_translate_colorformat(enum chip_class chip_class, 1554 enum pipe_format format) 1555{ 1556 const struct util_format_description *desc = util_format_description(format); 1557 if (!desc) 1558 return V_028C70_COLOR_INVALID; 1559 1560#define HAS_SIZE(x, y, z, w) \ 1561 (desc->channel[0].size == (x) && desc->channel[1].size == (y) && \ 1562 desc->channel[2].size == (z) && desc->channel[3].size == (w)) 1563 1564 if (format == PIPE_FORMAT_R11G11B10_FLOAT) /* isn't plain */ 1565 return V_028C70_COLOR_10_11_11; 1566 1567 if (chip_class >= GFX10_3 && 1568 format == PIPE_FORMAT_R9G9B9E5_FLOAT) /* isn't plain */ 1569 return V_028C70_COLOR_5_9_9_9; 1570 1571 if (desc->layout != UTIL_FORMAT_LAYOUT_PLAIN) 1572 return V_028C70_COLOR_INVALID; 1573 1574 /* hw cannot support mixed formats (except depth/stencil, since 1575 * stencil is not written to). */ 1576 if (desc->is_mixed && desc->colorspace != UTIL_FORMAT_COLORSPACE_ZS) 1577 return V_028C70_COLOR_INVALID; 1578 1579 switch (desc->nr_channels) { 1580 case 1: 1581 switch (desc->channel[0].size) { 1582 case 8: 1583 return V_028C70_COLOR_8; 1584 case 16: 1585 return V_028C70_COLOR_16; 1586 case 32: 1587 return V_028C70_COLOR_32; 1588 } 1589 break; 1590 case 2: 1591 if (desc->channel[0].size == desc->channel[1].size) { 1592 switch (desc->channel[0].size) { 1593 case 8: 1594 return V_028C70_COLOR_8_8; 1595 case 16: 1596 return V_028C70_COLOR_16_16; 1597 case 32: 1598 return V_028C70_COLOR_32_32; 1599 } 1600 } else if (HAS_SIZE(8, 24, 0, 0)) { 1601 return V_028C70_COLOR_24_8; 1602 } else if (HAS_SIZE(24, 8, 0, 0)) { 1603 return V_028C70_COLOR_8_24; 1604 } 1605 break; 1606 case 3: 1607 if (HAS_SIZE(5, 6, 5, 0)) { 1608 return V_028C70_COLOR_5_6_5; 1609 } else if (HAS_SIZE(32, 8, 24, 0)) { 1610 return V_028C70_COLOR_X24_8_32_FLOAT; 1611 } 1612 break; 1613 case 4: 1614 if (desc->channel[0].size == desc->channel[1].size && 1615 desc->channel[0].size == desc->channel[2].size && 1616 desc->channel[0].size == desc->channel[3].size) { 1617 switch (desc->channel[0].size) { 1618 case 4: 1619 return V_028C70_COLOR_4_4_4_4; 1620 case 8: 1621 return V_028C70_COLOR_8_8_8_8; 1622 case 16: 1623 return V_028C70_COLOR_16_16_16_16; 1624 case 32: 1625 return V_028C70_COLOR_32_32_32_32; 1626 } 1627 } else if (HAS_SIZE(5, 5, 5, 1)) { 1628 return V_028C70_COLOR_1_5_5_5; 1629 } else if (HAS_SIZE(1, 5, 5, 5)) { 1630 return V_028C70_COLOR_5_5_5_1; 1631 } else if (HAS_SIZE(10, 10, 10, 2)) { 1632 return V_028C70_COLOR_2_10_10_10; 1633 } 1634 break; 1635 } 1636 return V_028C70_COLOR_INVALID; 1637} 1638 1639static uint32_t si_colorformat_endian_swap(uint32_t colorformat) 1640{ 1641 if (SI_BIG_ENDIAN) { 1642 switch (colorformat) { 1643 /* 8-bit buffers. */ 1644 case V_028C70_COLOR_8: 1645 return V_028C70_ENDIAN_NONE; 1646 1647 /* 16-bit buffers. */ 1648 case V_028C70_COLOR_5_6_5: 1649 case V_028C70_COLOR_1_5_5_5: 1650 case V_028C70_COLOR_4_4_4_4: 1651 case V_028C70_COLOR_16: 1652 case V_028C70_COLOR_8_8: 1653 return V_028C70_ENDIAN_8IN16; 1654 1655 /* 32-bit buffers. */ 1656 case V_028C70_COLOR_8_8_8_8: 1657 case V_028C70_COLOR_2_10_10_10: 1658 case V_028C70_COLOR_8_24: 1659 case V_028C70_COLOR_24_8: 1660 case V_028C70_COLOR_16_16: 1661 return V_028C70_ENDIAN_8IN32; 1662 1663 /* 64-bit buffers. */ 1664 case V_028C70_COLOR_16_16_16_16: 1665 return V_028C70_ENDIAN_8IN16; 1666 1667 case V_028C70_COLOR_32_32: 1668 return V_028C70_ENDIAN_8IN32; 1669 1670 /* 128-bit buffers. */ 1671 case V_028C70_COLOR_32_32_32_32: 1672 return V_028C70_ENDIAN_8IN32; 1673 default: 1674 return V_028C70_ENDIAN_NONE; /* Unsupported. */ 1675 } 1676 } else { 1677 return V_028C70_ENDIAN_NONE; 1678 } 1679} 1680 1681static uint32_t si_translate_dbformat(enum pipe_format format) 1682{ 1683 switch (format) { 1684 case PIPE_FORMAT_Z16_UNORM: 1685 return V_028040_Z_16; 1686 case PIPE_FORMAT_S8_UINT_Z24_UNORM: 1687 case PIPE_FORMAT_X8Z24_UNORM: 1688 case PIPE_FORMAT_Z24X8_UNORM: 1689 case PIPE_FORMAT_Z24_UNORM_S8_UINT: 1690 return V_028040_Z_24; /* deprecated on AMD GCN */ 1691 case PIPE_FORMAT_Z32_FLOAT: 1692 case PIPE_FORMAT_Z32_FLOAT_S8X24_UINT: 1693 return V_028040_Z_32_FLOAT; 1694 default: 1695 return V_028040_Z_INVALID; 1696 } 1697} 1698 1699/* 1700 * Texture translation 1701 */ 1702 1703static uint32_t si_translate_texformat(struct pipe_screen *screen, enum pipe_format format, 1704 const struct util_format_description *desc, 1705 int first_non_void) 1706{ 1707 struct si_screen *sscreen = (struct si_screen *)screen; 1708 bool uniform = true; 1709 int i; 1710 1711 assert(sscreen->info.chip_class <= GFX9); 1712 1713 /* Colorspace (return non-RGB formats directly). */ 1714 switch (desc->colorspace) { 1715 /* Depth stencil formats */ 1716 case UTIL_FORMAT_COLORSPACE_ZS: 1717 switch (format) { 1718 case PIPE_FORMAT_Z16_UNORM: 1719 return V_008F14_IMG_DATA_FORMAT_16; 1720 case PIPE_FORMAT_X24S8_UINT: 1721 case PIPE_FORMAT_S8X24_UINT: 1722 /* 1723 * Implemented as an 8_8_8_8 data format to fix texture 1724 * gathers in stencil sampling. This affects at least 1725 * GL45-CTS.texture_cube_map_array.sampling on GFX8. 1726 */ 1727 if (sscreen->info.chip_class <= GFX8) 1728 return V_008F14_IMG_DATA_FORMAT_8_8_8_8; 1729 1730 if (format == PIPE_FORMAT_X24S8_UINT) 1731 return V_008F14_IMG_DATA_FORMAT_8_24; 1732 else 1733 return V_008F14_IMG_DATA_FORMAT_24_8; 1734 case PIPE_FORMAT_Z24X8_UNORM: 1735 case PIPE_FORMAT_Z24_UNORM_S8_UINT: 1736 return V_008F14_IMG_DATA_FORMAT_8_24; 1737 case PIPE_FORMAT_X8Z24_UNORM: 1738 case PIPE_FORMAT_S8_UINT_Z24_UNORM: 1739 return V_008F14_IMG_DATA_FORMAT_24_8; 1740 case PIPE_FORMAT_S8_UINT: 1741 return V_008F14_IMG_DATA_FORMAT_8; 1742 case PIPE_FORMAT_Z32_FLOAT: 1743 return V_008F14_IMG_DATA_FORMAT_32; 1744 case PIPE_FORMAT_X32_S8X24_UINT: 1745 case PIPE_FORMAT_Z32_FLOAT_S8X24_UINT: 1746 return V_008F14_IMG_DATA_FORMAT_X24_8_32; 1747 default: 1748 goto out_unknown; 1749 } 1750 1751 case UTIL_FORMAT_COLORSPACE_YUV: 1752 goto out_unknown; /* TODO */ 1753 1754 case UTIL_FORMAT_COLORSPACE_SRGB: 1755 if (desc->nr_channels != 4 && desc->nr_channels != 1) 1756 goto out_unknown; 1757 break; 1758 1759 default: 1760 break; 1761 } 1762 1763 if (desc->layout == UTIL_FORMAT_LAYOUT_RGTC) { 1764 if (!sscreen->info.has_format_bc1_through_bc7) 1765 goto out_unknown; 1766 1767 switch (format) { 1768 case PIPE_FORMAT_RGTC1_SNORM: 1769 case PIPE_FORMAT_LATC1_SNORM: 1770 case PIPE_FORMAT_RGTC1_UNORM: 1771 case PIPE_FORMAT_LATC1_UNORM: 1772 return V_008F14_IMG_DATA_FORMAT_BC4; 1773 case PIPE_FORMAT_RGTC2_SNORM: 1774 case PIPE_FORMAT_LATC2_SNORM: 1775 case PIPE_FORMAT_RGTC2_UNORM: 1776 case PIPE_FORMAT_LATC2_UNORM: 1777 return V_008F14_IMG_DATA_FORMAT_BC5; 1778 default: 1779 goto out_unknown; 1780 } 1781 } 1782 1783 if (desc->layout == UTIL_FORMAT_LAYOUT_ETC && 1784 (sscreen->info.family == CHIP_STONEY || sscreen->info.family == CHIP_VEGA10 || 1785 sscreen->info.family == CHIP_RAVEN || sscreen->info.family == CHIP_RAVEN2)) { 1786 switch (format) { 1787 case PIPE_FORMAT_ETC1_RGB8: 1788 case PIPE_FORMAT_ETC2_RGB8: 1789 case PIPE_FORMAT_ETC2_SRGB8: 1790 return V_008F14_IMG_DATA_FORMAT_ETC2_RGB; 1791 case PIPE_FORMAT_ETC2_RGB8A1: 1792 case PIPE_FORMAT_ETC2_SRGB8A1: 1793 return V_008F14_IMG_DATA_FORMAT_ETC2_RGBA1; 1794 case PIPE_FORMAT_ETC2_RGBA8: 1795 case PIPE_FORMAT_ETC2_SRGBA8: 1796 return V_008F14_IMG_DATA_FORMAT_ETC2_RGBA; 1797 case PIPE_FORMAT_ETC2_R11_UNORM: 1798 case PIPE_FORMAT_ETC2_R11_SNORM: 1799 return V_008F14_IMG_DATA_FORMAT_ETC2_R; 1800 case PIPE_FORMAT_ETC2_RG11_UNORM: 1801 case PIPE_FORMAT_ETC2_RG11_SNORM: 1802 return V_008F14_IMG_DATA_FORMAT_ETC2_RG; 1803 default: 1804 goto out_unknown; 1805 } 1806 } 1807 1808 if (desc->layout == UTIL_FORMAT_LAYOUT_BPTC) { 1809 if (!sscreen->info.has_format_bc1_through_bc7) 1810 goto out_unknown; 1811 1812 switch (format) { 1813 case PIPE_FORMAT_BPTC_RGBA_UNORM: 1814 case PIPE_FORMAT_BPTC_SRGBA: 1815 return V_008F14_IMG_DATA_FORMAT_BC7; 1816 case PIPE_FORMAT_BPTC_RGB_FLOAT: 1817 case PIPE_FORMAT_BPTC_RGB_UFLOAT: 1818 return V_008F14_IMG_DATA_FORMAT_BC6; 1819 default: 1820 goto out_unknown; 1821 } 1822 } 1823 1824 if (desc->layout == UTIL_FORMAT_LAYOUT_SUBSAMPLED) { 1825 switch (format) { 1826 case PIPE_FORMAT_R8G8_B8G8_UNORM: 1827 case PIPE_FORMAT_G8R8_B8R8_UNORM: 1828 return V_008F14_IMG_DATA_FORMAT_GB_GR; 1829 case PIPE_FORMAT_G8R8_G8B8_UNORM: 1830 case PIPE_FORMAT_R8G8_R8B8_UNORM: 1831 return V_008F14_IMG_DATA_FORMAT_BG_RG; 1832 default: 1833 goto out_unknown; 1834 } 1835 } 1836 1837 if (desc->layout == UTIL_FORMAT_LAYOUT_S3TC) { 1838 if (!sscreen->info.has_format_bc1_through_bc7) 1839 goto out_unknown; 1840 1841 switch (format) { 1842 case PIPE_FORMAT_DXT1_RGB: 1843 case PIPE_FORMAT_DXT1_RGBA: 1844 case PIPE_FORMAT_DXT1_SRGB: 1845 case PIPE_FORMAT_DXT1_SRGBA: 1846 return V_008F14_IMG_DATA_FORMAT_BC1; 1847 case PIPE_FORMAT_DXT3_RGBA: 1848 case PIPE_FORMAT_DXT3_SRGBA: 1849 return V_008F14_IMG_DATA_FORMAT_BC2; 1850 case PIPE_FORMAT_DXT5_RGBA: 1851 case PIPE_FORMAT_DXT5_SRGBA: 1852 return V_008F14_IMG_DATA_FORMAT_BC3; 1853 default: 1854 goto out_unknown; 1855 } 1856 } 1857 1858 if (format == PIPE_FORMAT_R9G9B9E5_FLOAT) { 1859 return V_008F14_IMG_DATA_FORMAT_5_9_9_9; 1860 } else if (format == PIPE_FORMAT_R11G11B10_FLOAT) { 1861 return V_008F14_IMG_DATA_FORMAT_10_11_11; 1862 } 1863 1864 /* R8G8Bx_SNORM - TODO CxV8U8 */ 1865 1866 /* hw cannot support mixed formats (except depth/stencil, since only 1867 * depth is read).*/ 1868 if (desc->is_mixed && desc->colorspace != UTIL_FORMAT_COLORSPACE_ZS) 1869 goto out_unknown; 1870 1871 /* See whether the components are of the same size. */ 1872 for (i = 1; i < desc->nr_channels; i++) { 1873 uniform = uniform && desc->channel[0].size == desc->channel[i].size; 1874 } 1875 1876 /* Non-uniform formats. */ 1877 if (!uniform) { 1878 switch (desc->nr_channels) { 1879 case 3: 1880 if (desc->channel[0].size == 5 && desc->channel[1].size == 6 && 1881 desc->channel[2].size == 5) { 1882 return V_008F14_IMG_DATA_FORMAT_5_6_5; 1883 } 1884 goto out_unknown; 1885 case 4: 1886 if (desc->channel[0].size == 5 && desc->channel[1].size == 5 && 1887 desc->channel[2].size == 5 && desc->channel[3].size == 1) { 1888 return V_008F14_IMG_DATA_FORMAT_1_5_5_5; 1889 } 1890 if (desc->channel[0].size == 1 && desc->channel[1].size == 5 && 1891 desc->channel[2].size == 5 && desc->channel[3].size == 5) { 1892 return V_008F14_IMG_DATA_FORMAT_5_5_5_1; 1893 } 1894 if (desc->channel[0].size == 10 && desc->channel[1].size == 10 && 1895 desc->channel[2].size == 10 && desc->channel[3].size == 2) { 1896 return V_008F14_IMG_DATA_FORMAT_2_10_10_10; 1897 } 1898 goto out_unknown; 1899 } 1900 goto out_unknown; 1901 } 1902 1903 if (first_non_void < 0 || first_non_void > 3) 1904 goto out_unknown; 1905 1906 /* uniform formats */ 1907 switch (desc->channel[first_non_void].size) { 1908 case 4: 1909 switch (desc->nr_channels) { 1910#if 0 /* Not supported for render targets */ 1911 case 2: 1912 return V_008F14_IMG_DATA_FORMAT_4_4; 1913#endif 1914 case 4: 1915 return V_008F14_IMG_DATA_FORMAT_4_4_4_4; 1916 } 1917 break; 1918 case 8: 1919 switch (desc->nr_channels) { 1920 case 1: 1921 return V_008F14_IMG_DATA_FORMAT_8; 1922 case 2: 1923 return V_008F14_IMG_DATA_FORMAT_8_8; 1924 case 4: 1925 return V_008F14_IMG_DATA_FORMAT_8_8_8_8; 1926 } 1927 break; 1928 case 16: 1929 switch (desc->nr_channels) { 1930 case 1: 1931 return V_008F14_IMG_DATA_FORMAT_16; 1932 case 2: 1933 return V_008F14_IMG_DATA_FORMAT_16_16; 1934 case 4: 1935 return V_008F14_IMG_DATA_FORMAT_16_16_16_16; 1936 } 1937 break; 1938 case 32: 1939 switch (desc->nr_channels) { 1940 case 1: 1941 return V_008F14_IMG_DATA_FORMAT_32; 1942 case 2: 1943 return V_008F14_IMG_DATA_FORMAT_32_32; 1944#if 0 /* Not supported for render targets */ 1945 case 3: 1946 return V_008F14_IMG_DATA_FORMAT_32_32_32; 1947#endif 1948 case 4: 1949 return V_008F14_IMG_DATA_FORMAT_32_32_32_32; 1950 } 1951 } 1952 1953out_unknown: 1954 return ~0; 1955} 1956 1957static unsigned is_wrap_mode_legal(struct si_screen *screen, unsigned wrap) 1958{ 1959 if (!screen->info.has_3d_cube_border_color_mipmap) { 1960 switch (wrap) { 1961 case PIPE_TEX_WRAP_CLAMP: 1962 case PIPE_TEX_WRAP_CLAMP_TO_BORDER: 1963 case PIPE_TEX_WRAP_MIRROR_CLAMP: 1964 case PIPE_TEX_WRAP_MIRROR_CLAMP_TO_BORDER: 1965 return false; 1966 } 1967 } 1968 return true; 1969} 1970 1971static unsigned si_tex_wrap(unsigned wrap) 1972{ 1973 switch (wrap) { 1974 default: 1975 case PIPE_TEX_WRAP_REPEAT: 1976 return V_008F30_SQ_TEX_WRAP; 1977 case PIPE_TEX_WRAP_CLAMP: 1978 return V_008F30_SQ_TEX_CLAMP_HALF_BORDER; 1979 case PIPE_TEX_WRAP_CLAMP_TO_EDGE: 1980 return V_008F30_SQ_TEX_CLAMP_LAST_TEXEL; 1981 case PIPE_TEX_WRAP_CLAMP_TO_BORDER: 1982 return V_008F30_SQ_TEX_CLAMP_BORDER; 1983 case PIPE_TEX_WRAP_MIRROR_REPEAT: 1984 return V_008F30_SQ_TEX_MIRROR; 1985 case PIPE_TEX_WRAP_MIRROR_CLAMP: 1986 return V_008F30_SQ_TEX_MIRROR_ONCE_HALF_BORDER; 1987 case PIPE_TEX_WRAP_MIRROR_CLAMP_TO_EDGE: 1988 return V_008F30_SQ_TEX_MIRROR_ONCE_LAST_TEXEL; 1989 case PIPE_TEX_WRAP_MIRROR_CLAMP_TO_BORDER: 1990 return V_008F30_SQ_TEX_MIRROR_ONCE_BORDER; 1991 } 1992} 1993 1994static unsigned si_tex_mipfilter(unsigned filter) 1995{ 1996 switch (filter) { 1997 case PIPE_TEX_MIPFILTER_NEAREST: 1998 return V_008F38_SQ_TEX_Z_FILTER_POINT; 1999 case PIPE_TEX_MIPFILTER_LINEAR: 2000 return V_008F38_SQ_TEX_Z_FILTER_LINEAR; 2001 default: 2002 case PIPE_TEX_MIPFILTER_NONE: 2003 return V_008F38_SQ_TEX_Z_FILTER_NONE; 2004 } 2005} 2006 2007static unsigned si_tex_compare(unsigned compare) 2008{ 2009 switch (compare) { 2010 default: 2011 case PIPE_FUNC_NEVER: 2012 return V_008F30_SQ_TEX_DEPTH_COMPARE_NEVER; 2013 case PIPE_FUNC_LESS: 2014 return V_008F30_SQ_TEX_DEPTH_COMPARE_LESS; 2015 case PIPE_FUNC_EQUAL: 2016 return V_008F30_SQ_TEX_DEPTH_COMPARE_EQUAL; 2017 case PIPE_FUNC_LEQUAL: 2018 return V_008F30_SQ_TEX_DEPTH_COMPARE_LESSEQUAL; 2019 case PIPE_FUNC_GREATER: 2020 return V_008F30_SQ_TEX_DEPTH_COMPARE_GREATER; 2021 case PIPE_FUNC_NOTEQUAL: 2022 return V_008F30_SQ_TEX_DEPTH_COMPARE_NOTEQUAL; 2023 case PIPE_FUNC_GEQUAL: 2024 return V_008F30_SQ_TEX_DEPTH_COMPARE_GREATEREQUAL; 2025 case PIPE_FUNC_ALWAYS: 2026 return V_008F30_SQ_TEX_DEPTH_COMPARE_ALWAYS; 2027 } 2028} 2029 2030static unsigned si_tex_dim(struct si_screen *sscreen, struct si_texture *tex, unsigned view_target, 2031 unsigned nr_samples) 2032{ 2033 unsigned res_target = tex->buffer.b.b.target; 2034 2035 if (view_target == PIPE_TEXTURE_CUBE || view_target == PIPE_TEXTURE_CUBE_ARRAY) 2036 res_target = view_target; 2037 /* If interpreting cubemaps as something else, set 2D_ARRAY. */ 2038 else if (res_target == PIPE_TEXTURE_CUBE || res_target == PIPE_TEXTURE_CUBE_ARRAY) 2039 res_target = PIPE_TEXTURE_2D_ARRAY; 2040 2041 /* GFX9 allocates 1D textures as 2D. */ 2042 if ((res_target == PIPE_TEXTURE_1D || res_target == PIPE_TEXTURE_1D_ARRAY) && 2043 sscreen->info.chip_class == GFX9 && 2044 tex->surface.u.gfx9.resource_type == RADEON_RESOURCE_2D) { 2045 if (res_target == PIPE_TEXTURE_1D) 2046 res_target = PIPE_TEXTURE_2D; 2047 else 2048 res_target = PIPE_TEXTURE_2D_ARRAY; 2049 } 2050 2051 switch (res_target) { 2052 default: 2053 case PIPE_TEXTURE_1D: 2054 return V_008F1C_SQ_RSRC_IMG_1D; 2055 case PIPE_TEXTURE_1D_ARRAY: 2056 return V_008F1C_SQ_RSRC_IMG_1D_ARRAY; 2057 case PIPE_TEXTURE_2D: 2058 case PIPE_TEXTURE_RECT: 2059 return nr_samples > 1 ? V_008F1C_SQ_RSRC_IMG_2D_MSAA : V_008F1C_SQ_RSRC_IMG_2D; 2060 case PIPE_TEXTURE_2D_ARRAY: 2061 return nr_samples > 1 ? V_008F1C_SQ_RSRC_IMG_2D_MSAA_ARRAY : V_008F1C_SQ_RSRC_IMG_2D_ARRAY; 2062 case PIPE_TEXTURE_3D: 2063 return V_008F1C_SQ_RSRC_IMG_3D; 2064 case PIPE_TEXTURE_CUBE: 2065 case PIPE_TEXTURE_CUBE_ARRAY: 2066 return V_008F1C_SQ_RSRC_IMG_CUBE; 2067 } 2068} 2069 2070/* 2071 * Format support testing 2072 */ 2073 2074static bool si_is_sampler_format_supported(struct pipe_screen *screen, enum pipe_format format) 2075{ 2076 struct si_screen *sscreen = (struct si_screen *)screen; 2077 2078 if (sscreen->info.chip_class >= GFX10) { 2079 const struct gfx10_format *fmt = &gfx10_format_table[format]; 2080 if (!fmt->img_format || fmt->buffers_only) 2081 return false; 2082 return true; 2083 } 2084 2085 const struct util_format_description *desc = util_format_description(format); 2086 if (!desc) 2087 return false; 2088 2089 return si_translate_texformat(screen, format, desc, 2090 util_format_get_first_non_void_channel(format)) != ~0U; 2091} 2092 2093static uint32_t si_translate_buffer_dataformat(struct pipe_screen *screen, 2094 const struct util_format_description *desc, 2095 int first_non_void) 2096{ 2097 int i; 2098 2099 assert(((struct si_screen *)screen)->info.chip_class <= GFX9); 2100 2101 if (desc->format == PIPE_FORMAT_R11G11B10_FLOAT) 2102 return V_008F0C_BUF_DATA_FORMAT_10_11_11; 2103 2104 assert(first_non_void >= 0); 2105 2106 if (desc->nr_channels == 4 && desc->channel[0].size == 10 && desc->channel[1].size == 10 && 2107 desc->channel[2].size == 10 && desc->channel[3].size == 2) 2108 return V_008F0C_BUF_DATA_FORMAT_2_10_10_10; 2109 2110 /* See whether the components are of the same size. */ 2111 for (i = 0; i < desc->nr_channels; i++) { 2112 if (desc->channel[first_non_void].size != desc->channel[i].size) 2113 return V_008F0C_BUF_DATA_FORMAT_INVALID; 2114 } 2115 2116 switch (desc->channel[first_non_void].size) { 2117 case 8: 2118 switch (desc->nr_channels) { 2119 case 1: 2120 case 3: /* 3 loads */ 2121 return V_008F0C_BUF_DATA_FORMAT_8; 2122 case 2: 2123 return V_008F0C_BUF_DATA_FORMAT_8_8; 2124 case 4: 2125 return V_008F0C_BUF_DATA_FORMAT_8_8_8_8; 2126 } 2127 break; 2128 case 16: 2129 switch (desc->nr_channels) { 2130 case 1: 2131 case 3: /* 3 loads */ 2132 return V_008F0C_BUF_DATA_FORMAT_16; 2133 case 2: 2134 return V_008F0C_BUF_DATA_FORMAT_16_16; 2135 case 4: 2136 return V_008F0C_BUF_DATA_FORMAT_16_16_16_16; 2137 } 2138 break; 2139 case 32: 2140 switch (desc->nr_channels) { 2141 case 1: 2142 return V_008F0C_BUF_DATA_FORMAT_32; 2143 case 2: 2144 return V_008F0C_BUF_DATA_FORMAT_32_32; 2145 case 3: 2146 return V_008F0C_BUF_DATA_FORMAT_32_32_32; 2147 case 4: 2148 return V_008F0C_BUF_DATA_FORMAT_32_32_32_32; 2149 } 2150 break; 2151 case 64: 2152 /* Legacy double formats. */ 2153 switch (desc->nr_channels) { 2154 case 1: /* 1 load */ 2155 return V_008F0C_BUF_DATA_FORMAT_32_32; 2156 case 2: /* 1 load */ 2157 return V_008F0C_BUF_DATA_FORMAT_32_32_32_32; 2158 case 3: /* 3 loads */ 2159 return V_008F0C_BUF_DATA_FORMAT_32_32; 2160 case 4: /* 2 loads */ 2161 return V_008F0C_BUF_DATA_FORMAT_32_32_32_32; 2162 } 2163 break; 2164 } 2165 2166 return V_008F0C_BUF_DATA_FORMAT_INVALID; 2167} 2168 2169static uint32_t si_translate_buffer_numformat(struct pipe_screen *screen, 2170 const struct util_format_description *desc, 2171 int first_non_void) 2172{ 2173 assert(((struct si_screen *)screen)->info.chip_class <= GFX9); 2174 2175 if (desc->format == PIPE_FORMAT_R11G11B10_FLOAT) 2176 return V_008F0C_BUF_NUM_FORMAT_FLOAT; 2177 2178 assert(first_non_void >= 0); 2179 2180 switch (desc->channel[first_non_void].type) { 2181 case UTIL_FORMAT_TYPE_SIGNED: 2182 case UTIL_FORMAT_TYPE_FIXED: 2183 if (desc->channel[first_non_void].size >= 32 || desc->channel[first_non_void].pure_integer) 2184 return V_008F0C_BUF_NUM_FORMAT_SINT; 2185 else if (desc->channel[first_non_void].normalized) 2186 return V_008F0C_BUF_NUM_FORMAT_SNORM; 2187 else 2188 return V_008F0C_BUF_NUM_FORMAT_SSCALED; 2189 break; 2190 case UTIL_FORMAT_TYPE_UNSIGNED: 2191 if (desc->channel[first_non_void].size >= 32 || desc->channel[first_non_void].pure_integer) 2192 return V_008F0C_BUF_NUM_FORMAT_UINT; 2193 else if (desc->channel[first_non_void].normalized) 2194 return V_008F0C_BUF_NUM_FORMAT_UNORM; 2195 else 2196 return V_008F0C_BUF_NUM_FORMAT_USCALED; 2197 break; 2198 case UTIL_FORMAT_TYPE_FLOAT: 2199 default: 2200 return V_008F0C_BUF_NUM_FORMAT_FLOAT; 2201 } 2202} 2203 2204static unsigned si_is_vertex_format_supported(struct pipe_screen *screen, enum pipe_format format, 2205 unsigned usage) 2206{ 2207 struct si_screen *sscreen = (struct si_screen *)screen; 2208 const struct util_format_description *desc; 2209 int first_non_void; 2210 unsigned data_format; 2211 2212 assert((usage & ~(PIPE_BIND_SHADER_IMAGE | PIPE_BIND_SAMPLER_VIEW | PIPE_BIND_VERTEX_BUFFER)) == 2213 0); 2214 2215 desc = util_format_description(format); 2216 if (!desc) 2217 return 0; 2218 2219 /* There are no native 8_8_8 or 16_16_16 data formats, and we currently 2220 * select 8_8_8_8 and 16_16_16_16 instead. This works reasonably well 2221 * for read-only access (with caveats surrounding bounds checks), but 2222 * obviously fails for write access which we have to implement for 2223 * shader images. Luckily, OpenGL doesn't expect this to be supported 2224 * anyway, and so the only impact is on PBO uploads / downloads, which 2225 * shouldn't be expected to be fast for GL_RGB anyway. 2226 */ 2227 if (desc->block.bits == 3 * 8 || desc->block.bits == 3 * 16) { 2228 if (usage & (PIPE_BIND_SHADER_IMAGE | PIPE_BIND_SAMPLER_VIEW)) { 2229 usage &= ~(PIPE_BIND_SHADER_IMAGE | PIPE_BIND_SAMPLER_VIEW); 2230 if (!usage) 2231 return 0; 2232 } 2233 } 2234 2235 if (sscreen->info.chip_class >= GFX10) { 2236 const struct gfx10_format *fmt = &gfx10_format_table[format]; 2237 if (!fmt->img_format || fmt->img_format >= 128) 2238 return 0; 2239 return usage; 2240 } 2241 2242 first_non_void = util_format_get_first_non_void_channel(format); 2243 data_format = si_translate_buffer_dataformat(screen, desc, first_non_void); 2244 if (data_format == V_008F0C_BUF_DATA_FORMAT_INVALID) 2245 return 0; 2246 2247 return usage; 2248} 2249 2250static bool si_is_colorbuffer_format_supported(enum chip_class chip_class, 2251 enum pipe_format format) 2252{ 2253 return si_translate_colorformat(chip_class, format) != V_028C70_COLOR_INVALID && 2254 si_translate_colorswap(format, false) != ~0U; 2255} 2256 2257static bool si_is_zs_format_supported(enum pipe_format format) 2258{ 2259 return si_translate_dbformat(format) != V_028040_Z_INVALID; 2260} 2261 2262static bool si_is_format_supported(struct pipe_screen *screen, enum pipe_format format, 2263 enum pipe_texture_target target, unsigned sample_count, 2264 unsigned storage_sample_count, unsigned usage) 2265{ 2266 struct si_screen *sscreen = (struct si_screen *)screen; 2267 unsigned retval = 0; 2268 2269 if (target >= PIPE_MAX_TEXTURE_TYPES) { 2270 PRINT_ERR("radeonsi: unsupported texture type %d\n", target); 2271 return false; 2272 } 2273 2274 if ((target == PIPE_TEXTURE_3D || target == PIPE_TEXTURE_CUBE) && 2275 !sscreen->info.has_3d_cube_border_color_mipmap) 2276 return false; 2277 2278 if (util_format_get_num_planes(format) >= 2) 2279 return false; 2280 2281 if (MAX2(1, sample_count) < MAX2(1, storage_sample_count)) 2282 return false; 2283 2284 if (sample_count > 1) { 2285 if (!screen->get_param(screen, PIPE_CAP_TEXTURE_MULTISAMPLE)) 2286 return false; 2287 2288 /* Only power-of-two sample counts are supported. */ 2289 if (!util_is_power_of_two_or_zero(sample_count) || 2290 !util_is_power_of_two_or_zero(storage_sample_count)) 2291 return false; 2292 2293 /* Chips with 1 RB don't increment occlusion queries at 16x MSAA sample rate, 2294 * so don't expose 16 samples there. 2295 */ 2296 const unsigned max_eqaa_samples = util_bitcount(sscreen->info.enabled_rb_mask) <= 1 ? 8 : 16; 2297 const unsigned max_samples = 8; 2298 2299 /* MSAA support without framebuffer attachments. */ 2300 if (format == PIPE_FORMAT_NONE && sample_count <= max_eqaa_samples) 2301 return true; 2302 2303 if (!sscreen->info.has_eqaa_surface_allocator || util_format_is_depth_or_stencil(format)) { 2304 /* Color without EQAA or depth/stencil. */ 2305 if (sample_count > max_samples || sample_count != storage_sample_count) 2306 return false; 2307 } else { 2308 /* Color with EQAA. */ 2309 if (sample_count > max_eqaa_samples || storage_sample_count > max_samples) 2310 return false; 2311 } 2312 } 2313 2314 if (usage & (PIPE_BIND_SAMPLER_VIEW | PIPE_BIND_SHADER_IMAGE)) { 2315 if (target == PIPE_BUFFER) { 2316 retval |= si_is_vertex_format_supported( 2317 screen, format, usage & (PIPE_BIND_SAMPLER_VIEW | PIPE_BIND_SHADER_IMAGE)); 2318 } else { 2319 if (si_is_sampler_format_supported(screen, format)) 2320 retval |= usage & (PIPE_BIND_SAMPLER_VIEW | PIPE_BIND_SHADER_IMAGE); 2321 } 2322 } 2323 2324 if ((usage & (PIPE_BIND_RENDER_TARGET | PIPE_BIND_DISPLAY_TARGET | PIPE_BIND_SCANOUT | 2325 PIPE_BIND_SHARED | PIPE_BIND_BLENDABLE)) && 2326 si_is_colorbuffer_format_supported(sscreen->info.chip_class, format)) { 2327 retval |= usage & (PIPE_BIND_RENDER_TARGET | PIPE_BIND_DISPLAY_TARGET | PIPE_BIND_SCANOUT | 2328 PIPE_BIND_SHARED); 2329 if (!util_format_is_pure_integer(format) && !util_format_is_depth_or_stencil(format)) 2330 retval |= usage & PIPE_BIND_BLENDABLE; 2331 } 2332 2333 if ((usage & PIPE_BIND_DEPTH_STENCIL) && si_is_zs_format_supported(format)) { 2334 retval |= PIPE_BIND_DEPTH_STENCIL; 2335 } 2336 2337 if (usage & PIPE_BIND_VERTEX_BUFFER) { 2338 retval |= si_is_vertex_format_supported(screen, format, PIPE_BIND_VERTEX_BUFFER); 2339 } 2340 2341 if (usage & PIPE_BIND_INDEX_BUFFER) { 2342 if (format == PIPE_FORMAT_R8_UINT || 2343 format == PIPE_FORMAT_R16_UINT || 2344 format == PIPE_FORMAT_R32_UINT) 2345 retval |= PIPE_BIND_INDEX_BUFFER; 2346 } 2347 2348 if ((usage & PIPE_BIND_LINEAR) && !util_format_is_compressed(format) && 2349 !(usage & PIPE_BIND_DEPTH_STENCIL)) 2350 retval |= PIPE_BIND_LINEAR; 2351 2352 return retval == usage; 2353} 2354 2355/* 2356 * framebuffer handling 2357 */ 2358 2359static void si_choose_spi_color_formats(struct si_surface *surf, unsigned format, unsigned swap, 2360 unsigned ntype, bool is_depth) 2361{ 2362 struct ac_spi_color_formats formats = {}; 2363 2364 ac_choose_spi_color_formats(format, swap, ntype, is_depth, true, &formats); 2365 2366 surf->spi_shader_col_format = formats.normal; 2367 surf->spi_shader_col_format_alpha = formats.alpha; 2368 surf->spi_shader_col_format_blend = formats.blend; 2369 surf->spi_shader_col_format_blend_alpha = formats.blend_alpha; 2370} 2371 2372static void si_initialize_color_surface(struct si_context *sctx, struct si_surface *surf) 2373{ 2374 struct si_texture *tex = (struct si_texture *)surf->base.texture; 2375 unsigned color_info, color_attrib; 2376 unsigned format, swap, ntype, endian; 2377 const struct util_format_description *desc; 2378 int firstchan; 2379 unsigned blend_clamp = 0, blend_bypass = 0; 2380 2381 desc = util_format_description(surf->base.format); 2382 for (firstchan = 0; firstchan < 4; firstchan++) { 2383 if (desc->channel[firstchan].type != UTIL_FORMAT_TYPE_VOID) { 2384 break; 2385 } 2386 } 2387 if (firstchan == 4 || desc->channel[firstchan].type == UTIL_FORMAT_TYPE_FLOAT) { 2388 ntype = V_028C70_NUMBER_FLOAT; 2389 } else { 2390 ntype = V_028C70_NUMBER_UNORM; 2391 if (desc->colorspace == UTIL_FORMAT_COLORSPACE_SRGB) 2392 ntype = V_028C70_NUMBER_SRGB; 2393 else if (desc->channel[firstchan].type == UTIL_FORMAT_TYPE_SIGNED) { 2394 if (desc->channel[firstchan].pure_integer) { 2395 ntype = V_028C70_NUMBER_SINT; 2396 } else { 2397 assert(desc->channel[firstchan].normalized); 2398 ntype = V_028C70_NUMBER_SNORM; 2399 } 2400 } else if (desc->channel[firstchan].type == UTIL_FORMAT_TYPE_UNSIGNED) { 2401 if (desc->channel[firstchan].pure_integer) { 2402 ntype = V_028C70_NUMBER_UINT; 2403 } else { 2404 assert(desc->channel[firstchan].normalized); 2405 ntype = V_028C70_NUMBER_UNORM; 2406 } 2407 } 2408 } 2409 2410 format = si_translate_colorformat(sctx->chip_class, surf->base.format); 2411 if (format == V_028C70_COLOR_INVALID) { 2412 PRINT_ERR("Invalid CB format: %d, disabling CB.\n", surf->base.format); 2413 } 2414 assert(format != V_028C70_COLOR_INVALID); 2415 swap = si_translate_colorswap(surf->base.format, false); 2416 endian = si_colorformat_endian_swap(format); 2417 2418 /* blend clamp should be set for all NORM/SRGB types */ 2419 if (ntype == V_028C70_NUMBER_UNORM || ntype == V_028C70_NUMBER_SNORM || 2420 ntype == V_028C70_NUMBER_SRGB) 2421 blend_clamp = 1; 2422 2423 /* set blend bypass according to docs if SINT/UINT or 2424 8/24 COLOR variants */ 2425 if (ntype == V_028C70_NUMBER_UINT || ntype == V_028C70_NUMBER_SINT || 2426 format == V_028C70_COLOR_8_24 || format == V_028C70_COLOR_24_8 || 2427 format == V_028C70_COLOR_X24_8_32_FLOAT) { 2428 blend_clamp = 0; 2429 blend_bypass = 1; 2430 } 2431 2432 if (ntype == V_028C70_NUMBER_UINT || ntype == V_028C70_NUMBER_SINT) { 2433 if (format == V_028C70_COLOR_8 || format == V_028C70_COLOR_8_8 || 2434 format == V_028C70_COLOR_8_8_8_8) 2435 surf->color_is_int8 = true; 2436 else if (format == V_028C70_COLOR_10_10_10_2 || format == V_028C70_COLOR_2_10_10_10) 2437 surf->color_is_int10 = true; 2438 } 2439 2440 color_info = 2441 S_028C70_FORMAT(format) | S_028C70_COMP_SWAP(swap) | S_028C70_BLEND_CLAMP(blend_clamp) | 2442 S_028C70_BLEND_BYPASS(blend_bypass) | S_028C70_SIMPLE_FLOAT(1) | 2443 S_028C70_ROUND_MODE(ntype != V_028C70_NUMBER_UNORM && ntype != V_028C70_NUMBER_SNORM && 2444 ntype != V_028C70_NUMBER_SRGB && format != V_028C70_COLOR_8_24 && 2445 format != V_028C70_COLOR_24_8) | 2446 S_028C70_NUMBER_TYPE(ntype) | S_028C70_ENDIAN(endian); 2447 2448 /* Intensity is implemented as Red, so treat it that way. */ 2449 color_attrib = S_028C74_FORCE_DST_ALPHA_1(desc->swizzle[3] == PIPE_SWIZZLE_1 || 2450 util_format_is_intensity(surf->base.format)); 2451 2452 if (tex->buffer.b.b.nr_samples > 1) { 2453 unsigned log_samples = util_logbase2(tex->buffer.b.b.nr_samples); 2454 unsigned log_fragments = util_logbase2(tex->buffer.b.b.nr_storage_samples); 2455 2456 color_attrib |= S_028C74_NUM_SAMPLES(log_samples) | S_028C74_NUM_FRAGMENTS(log_fragments); 2457 2458 if (tex->surface.fmask_offset) { 2459 color_info |= S_028C70_COMPRESSION(1); 2460 unsigned fmask_bankh = util_logbase2(tex->surface.u.legacy.color.fmask.bankh); 2461 2462 if (sctx->chip_class == GFX6) { 2463 /* due to a hw bug, FMASK_BANK_HEIGHT must be set on GFX6 too */ 2464 color_attrib |= S_028C74_FMASK_BANK_HEIGHT(fmask_bankh); 2465 } 2466 } 2467 } 2468 2469 /* amdvlk: [min-compressed-block-size] should be set to 32 for dGPU and 2470 * 64 for APU because all of our APUs to date use DIMMs which have 2471 * a request granularity size of 64B while all other chips have a 2472 * 32B request size */ 2473 unsigned min_compressed_block_size = V_028C78_MIN_BLOCK_SIZE_32B; 2474 if (!sctx->screen->info.has_dedicated_vram) 2475 min_compressed_block_size = V_028C78_MIN_BLOCK_SIZE_64B; 2476 2477 if (sctx->chip_class >= GFX10) { 2478 surf->cb_dcc_control = S_028C78_MAX_UNCOMPRESSED_BLOCK_SIZE(V_028C78_MAX_BLOCK_SIZE_256B) | 2479 S_028C78_MAX_COMPRESSED_BLOCK_SIZE(tex->surface.u.gfx9.color.dcc.max_compressed_block_size) | 2480 S_028C78_MIN_COMPRESSED_BLOCK_SIZE(min_compressed_block_size) | 2481 S_028C78_INDEPENDENT_64B_BLOCKS(tex->surface.u.gfx9.color.dcc.independent_64B_blocks) | 2482 S_028C78_INDEPENDENT_128B_BLOCKS(tex->surface.u.gfx9.color.dcc.independent_128B_blocks); 2483 } else if (sctx->chip_class >= GFX8) { 2484 unsigned max_uncompressed_block_size = V_028C78_MAX_BLOCK_SIZE_256B; 2485 2486 if (tex->buffer.b.b.nr_storage_samples > 1) { 2487 if (tex->surface.bpe == 1) 2488 max_uncompressed_block_size = V_028C78_MAX_BLOCK_SIZE_64B; 2489 else if (tex->surface.bpe == 2) 2490 max_uncompressed_block_size = V_028C78_MAX_BLOCK_SIZE_128B; 2491 } 2492 2493 surf->cb_dcc_control = S_028C78_MAX_UNCOMPRESSED_BLOCK_SIZE(max_uncompressed_block_size) | 2494 S_028C78_MIN_COMPRESSED_BLOCK_SIZE(min_compressed_block_size) | 2495 S_028C78_INDEPENDENT_64B_BLOCKS(1); 2496 } 2497 2498 /* This must be set for fast clear to work without FMASK. */ 2499 if (!tex->surface.fmask_size && sctx->chip_class == GFX6) { 2500 unsigned bankh = util_logbase2(tex->surface.u.legacy.bankh); 2501 color_attrib |= S_028C74_FMASK_BANK_HEIGHT(bankh); 2502 } 2503 2504 /* GFX10 field has the same base shift as the GFX6 field */ 2505 unsigned color_view = S_028C6C_SLICE_START(surf->base.u.tex.first_layer) | 2506 S_028C6C_SLICE_MAX_GFX10(surf->base.u.tex.last_layer); 2507 unsigned mip0_depth = util_max_layer(&tex->buffer.b.b, 0); 2508 2509 if (sctx->chip_class >= GFX10) { 2510 color_view |= S_028C6C_MIP_LEVEL_GFX10(surf->base.u.tex.level); 2511 2512 surf->cb_color_attrib3 = S_028EE0_MIP0_DEPTH(mip0_depth) | 2513 S_028EE0_RESOURCE_TYPE(tex->surface.u.gfx9.resource_type) | 2514 S_028EE0_RESOURCE_LEVEL(1); 2515 } else if (sctx->chip_class == GFX9) { 2516 color_view |= S_028C6C_MIP_LEVEL_GFX9(surf->base.u.tex.level); 2517 color_attrib |= S_028C74_MIP0_DEPTH(mip0_depth) | 2518 S_028C74_RESOURCE_TYPE(tex->surface.u.gfx9.resource_type); 2519 } 2520 2521 if (sctx->chip_class >= GFX9) { 2522 surf->cb_color_attrib2 = S_028C68_MIP0_WIDTH(surf->width0 - 1) | 2523 S_028C68_MIP0_HEIGHT(surf->height0 - 1) | 2524 S_028C68_MAX_MIP(tex->buffer.b.b.last_level); 2525 } 2526 2527 surf->cb_color_view = color_view; 2528 surf->cb_color_info = color_info; 2529 surf->cb_color_attrib = color_attrib; 2530 2531 /* Determine pixel shader export format */ 2532 si_choose_spi_color_formats(surf, format, swap, ntype, tex->is_depth); 2533 2534 surf->color_initialized = true; 2535} 2536 2537static void si_init_depth_surface(struct si_context *sctx, struct si_surface *surf) 2538{ 2539 struct si_texture *tex = (struct si_texture *)surf->base.texture; 2540 unsigned level = surf->base.u.tex.level; 2541 unsigned format, stencil_format; 2542 uint32_t z_info, s_info; 2543 2544 format = si_translate_dbformat(tex->db_render_format); 2545 stencil_format = tex->surface.has_stencil ? V_028044_STENCIL_8 : V_028044_STENCIL_INVALID; 2546 2547 assert(format != V_028040_Z_INVALID); 2548 if (format == V_028040_Z_INVALID) 2549 PRINT_ERR("Invalid DB format: %d, disabling DB.\n", tex->buffer.b.b.format); 2550 2551 surf->db_depth_view = S_028008_SLICE_START(surf->base.u.tex.first_layer) | 2552 S_028008_SLICE_MAX(surf->base.u.tex.last_layer); 2553 surf->db_htile_data_base = 0; 2554 surf->db_htile_surface = 0; 2555 2556 if (sctx->chip_class >= GFX10) { 2557 surf->db_depth_view |= S_028008_SLICE_START_HI(surf->base.u.tex.first_layer >> 11) | 2558 S_028008_SLICE_MAX_HI(surf->base.u.tex.last_layer >> 11); 2559 } 2560 2561 if (sctx->chip_class >= GFX9) { 2562 assert(tex->surface.u.gfx9.surf_offset == 0); 2563 surf->db_depth_base = tex->buffer.gpu_address >> 8; 2564 surf->db_stencil_base = (tex->buffer.gpu_address + tex->surface.u.gfx9.zs.stencil_offset) >> 8; 2565 z_info = S_028038_FORMAT(format) | 2566 S_028038_NUM_SAMPLES(util_logbase2(tex->buffer.b.b.nr_samples)) | 2567 S_028038_SW_MODE(tex->surface.u.gfx9.swizzle_mode) | 2568 S_028038_MAXMIP(tex->buffer.b.b.last_level); 2569 s_info = S_02803C_FORMAT(stencil_format) | 2570 S_02803C_SW_MODE(tex->surface.u.gfx9.zs.stencil_swizzle_mode); 2571 2572 if (sctx->chip_class == GFX9) { 2573 surf->db_z_info2 = S_028068_EPITCH(tex->surface.u.gfx9.epitch); 2574 surf->db_stencil_info2 = S_02806C_EPITCH(tex->surface.u.gfx9.zs.stencil_epitch); 2575 } 2576 surf->db_depth_view |= S_028008_MIPID(level); 2577 surf->db_depth_size = 2578 S_02801C_X_MAX(tex->buffer.b.b.width0 - 1) | S_02801C_Y_MAX(tex->buffer.b.b.height0 - 1); 2579 2580 if (si_htile_enabled(tex, level, PIPE_MASK_ZS)) { 2581 z_info |= S_028038_TILE_SURFACE_ENABLE(1) | S_028038_ALLOW_EXPCLEAR(1); 2582 s_info |= S_02803C_TILE_STENCIL_DISABLE(tex->htile_stencil_disabled); 2583 2584 if (tex->surface.has_stencil && !tex->htile_stencil_disabled) { 2585 /* Stencil buffer workaround ported from the GFX6-GFX8 code. 2586 * See that for explanation. 2587 */ 2588 s_info |= S_02803C_ALLOW_EXPCLEAR(tex->buffer.b.b.nr_samples <= 1); 2589 } 2590 2591 surf->db_htile_data_base = (tex->buffer.gpu_address + tex->surface.meta_offset) >> 8; 2592 surf->db_htile_surface = 2593 S_028ABC_FULL_CACHE(1) | S_028ABC_PIPE_ALIGNED(1); 2594 if (sctx->chip_class == GFX9) { 2595 surf->db_htile_surface |= S_028ABC_RB_ALIGNED(1); 2596 } 2597 } 2598 } else { 2599 /* GFX6-GFX8 */ 2600 struct legacy_surf_level *levelinfo = &tex->surface.u.legacy.level[level]; 2601 2602 assert(levelinfo->nblk_x % 8 == 0 && levelinfo->nblk_y % 8 == 0); 2603 2604 surf->db_depth_base = 2605 (tex->buffer.gpu_address >> 8) + tex->surface.u.legacy.level[level].offset_256B; 2606 surf->db_stencil_base = 2607 (tex->buffer.gpu_address >> 8) + tex->surface.u.legacy.zs.stencil_level[level].offset_256B; 2608 2609 z_info = 2610 S_028040_FORMAT(format) | S_028040_NUM_SAMPLES(util_logbase2(tex->buffer.b.b.nr_samples)); 2611 s_info = S_028044_FORMAT(stencil_format); 2612 surf->db_depth_info = 0; 2613 2614 if (sctx->chip_class >= GFX7) { 2615 struct radeon_info *info = &sctx->screen->info; 2616 unsigned index = tex->surface.u.legacy.tiling_index[level]; 2617 unsigned stencil_index = tex->surface.u.legacy.zs.stencil_tiling_index[level]; 2618 unsigned macro_index = tex->surface.u.legacy.macro_tile_index; 2619 unsigned tile_mode = info->si_tile_mode_array[index]; 2620 unsigned stencil_tile_mode = info->si_tile_mode_array[stencil_index]; 2621 unsigned macro_mode = info->cik_macrotile_mode_array[macro_index]; 2622 2623 surf->db_depth_info |= S_02803C_ARRAY_MODE(G_009910_ARRAY_MODE(tile_mode)) | 2624 S_02803C_PIPE_CONFIG(G_009910_PIPE_CONFIG(tile_mode)) | 2625 S_02803C_BANK_WIDTH(G_009990_BANK_WIDTH(macro_mode)) | 2626 S_02803C_BANK_HEIGHT(G_009990_BANK_HEIGHT(macro_mode)) | 2627 S_02803C_MACRO_TILE_ASPECT(G_009990_MACRO_TILE_ASPECT(macro_mode)) | 2628 S_02803C_NUM_BANKS(G_009990_NUM_BANKS(macro_mode)); 2629 z_info |= S_028040_TILE_SPLIT(G_009910_TILE_SPLIT(tile_mode)); 2630 s_info |= S_028044_TILE_SPLIT(G_009910_TILE_SPLIT(stencil_tile_mode)); 2631 } else { 2632 unsigned tile_mode_index = si_tile_mode_index(tex, level, false); 2633 z_info |= S_028040_TILE_MODE_INDEX(tile_mode_index); 2634 tile_mode_index = si_tile_mode_index(tex, level, true); 2635 s_info |= S_028044_TILE_MODE_INDEX(tile_mode_index); 2636 } 2637 2638 surf->db_depth_size = S_028058_PITCH_TILE_MAX((levelinfo->nblk_x / 8) - 1) | 2639 S_028058_HEIGHT_TILE_MAX((levelinfo->nblk_y / 8) - 1); 2640 surf->db_depth_slice = 2641 S_02805C_SLICE_TILE_MAX((levelinfo->nblk_x * levelinfo->nblk_y) / 64 - 1); 2642 2643 if (si_htile_enabled(tex, level, PIPE_MASK_ZS)) { 2644 z_info |= S_028040_TILE_SURFACE_ENABLE(1) | S_028040_ALLOW_EXPCLEAR(1); 2645 s_info |= S_028044_TILE_STENCIL_DISABLE(tex->htile_stencil_disabled); 2646 2647 if (tex->surface.has_stencil) { 2648 /* Workaround: For a not yet understood reason, the 2649 * combination of MSAA, fast stencil clear and stencil 2650 * decompress messes with subsequent stencil buffer 2651 * uses. Problem was reproduced on Verde, Bonaire, 2652 * Tonga, and Carrizo. 2653 * 2654 * Disabling EXPCLEAR works around the problem. 2655 * 2656 * Check piglit's arb_texture_multisample-stencil-clear 2657 * test if you want to try changing this. 2658 */ 2659 if (tex->buffer.b.b.nr_samples <= 1) 2660 s_info |= S_028044_ALLOW_EXPCLEAR(1); 2661 } 2662 2663 surf->db_htile_data_base = (tex->buffer.gpu_address + tex->surface.meta_offset) >> 8; 2664 surf->db_htile_surface = S_028ABC_FULL_CACHE(1); 2665 } 2666 } 2667 2668 surf->db_z_info = z_info; 2669 surf->db_stencil_info = s_info; 2670 2671 surf->depth_initialized = true; 2672} 2673 2674void si_update_fb_dirtiness_after_rendering(struct si_context *sctx) 2675{ 2676 if (sctx->decompression_enabled) 2677 return; 2678 2679 if (sctx->framebuffer.state.zsbuf) { 2680 struct pipe_surface *surf = sctx->framebuffer.state.zsbuf; 2681 struct si_texture *tex = (struct si_texture *)surf->texture; 2682 2683 tex->dirty_level_mask |= 1 << surf->u.tex.level; 2684 2685 if (tex->surface.has_stencil) 2686 tex->stencil_dirty_level_mask |= 1 << surf->u.tex.level; 2687 } 2688 2689 unsigned compressed_cb_mask = sctx->framebuffer.compressed_cb_mask; 2690 while (compressed_cb_mask) { 2691 unsigned i = u_bit_scan(&compressed_cb_mask); 2692 struct pipe_surface *surf = sctx->framebuffer.state.cbufs[i]; 2693 struct si_texture *tex = (struct si_texture *)surf->texture; 2694 2695 if (tex->surface.fmask_offset) { 2696 tex->dirty_level_mask |= 1 << surf->u.tex.level; 2697 tex->fmask_is_identity = false; 2698 } 2699 } 2700} 2701 2702static void si_dec_framebuffer_counters(const struct pipe_framebuffer_state *state) 2703{ 2704 for (int i = 0; i < state->nr_cbufs; ++i) { 2705 struct si_surface *surf = NULL; 2706 struct si_texture *tex; 2707 2708 if (!state->cbufs[i]) 2709 continue; 2710 surf = (struct si_surface *)state->cbufs[i]; 2711 tex = (struct si_texture *)surf->base.texture; 2712 2713 p_atomic_dec(&tex->framebuffers_bound); 2714 } 2715} 2716 2717void si_mark_display_dcc_dirty(struct si_context *sctx, struct si_texture *tex) 2718{ 2719 if (!tex->surface.display_dcc_offset || tex->displayable_dcc_dirty) 2720 return; 2721 2722 if (!(tex->buffer.external_usage & PIPE_HANDLE_USAGE_EXPLICIT_FLUSH)) { 2723 struct hash_entry *entry = _mesa_hash_table_search(sctx->dirty_implicit_resources, tex); 2724 if (!entry) { 2725 struct pipe_resource *dummy = NULL; 2726 pipe_resource_reference(&dummy, &tex->buffer.b.b); 2727 _mesa_hash_table_insert(sctx->dirty_implicit_resources, tex, tex); 2728 } 2729 } 2730 tex->displayable_dcc_dirty = true; 2731} 2732 2733static void si_update_display_dcc_dirty(struct si_context *sctx) 2734{ 2735 const struct pipe_framebuffer_state *state = &sctx->framebuffer.state; 2736 2737 for (unsigned i = 0; i < state->nr_cbufs; i++) { 2738 if (state->cbufs[i]) 2739 si_mark_display_dcc_dirty(sctx, (struct si_texture *)state->cbufs[i]->texture); 2740 } 2741} 2742 2743static void si_set_framebuffer_state(struct pipe_context *ctx, 2744 const struct pipe_framebuffer_state *state) 2745{ 2746 struct si_context *sctx = (struct si_context *)ctx; 2747 struct si_surface *surf = NULL; 2748 struct si_texture *tex; 2749 bool old_any_dst_linear = sctx->framebuffer.any_dst_linear; 2750 unsigned old_nr_samples = sctx->framebuffer.nr_samples; 2751 unsigned old_colorbuf_enabled_4bit = sctx->framebuffer.colorbuf_enabled_4bit; 2752 bool old_has_zsbuf = !!sctx->framebuffer.state.zsbuf; 2753 bool old_has_stencil = 2754 old_has_zsbuf && 2755 ((struct si_texture *)sctx->framebuffer.state.zsbuf->texture)->surface.has_stencil; 2756 bool unbound = false; 2757 int i; 2758 2759 /* Reject zero-sized framebuffers due to a hw bug on GFX6 that occurs 2760 * when PA_SU_HARDWARE_SCREEN_OFFSET != 0 and any_scissor.BR_X/Y <= 0. 2761 * We could implement the full workaround here, but it's a useless case. 2762 */ 2763 if ((!state->width || !state->height) && (state->nr_cbufs || state->zsbuf)) { 2764 unreachable("the framebuffer shouldn't have zero area"); 2765 return; 2766 } 2767 2768 si_update_fb_dirtiness_after_rendering(sctx); 2769 2770 /* Disable DCC if the formats are incompatible. */ 2771 for (i = 0; i < state->nr_cbufs; i++) { 2772 if (!state->cbufs[i]) 2773 continue; 2774 2775 surf = (struct si_surface *)state->cbufs[i]; 2776 tex = (struct si_texture *)surf->base.texture; 2777 2778 if (!surf->dcc_incompatible) 2779 continue; 2780 2781 /* Since the DCC decompression calls back into set_framebuffer- 2782 * _state, we need to unbind the framebuffer, so that 2783 * vi_separate_dcc_stop_query isn't called twice with the same 2784 * color buffer. 2785 */ 2786 if (!unbound) { 2787 util_copy_framebuffer_state(&sctx->framebuffer.state, NULL); 2788 unbound = true; 2789 } 2790 2791 if (vi_dcc_enabled(tex, surf->base.u.tex.level)) 2792 if (!si_texture_disable_dcc(sctx, tex)) 2793 si_decompress_dcc(sctx, tex); 2794 2795 surf->dcc_incompatible = false; 2796 } 2797 2798 /* Only flush TC when changing the framebuffer state, because 2799 * the only client not using TC that can change textures is 2800 * the framebuffer. 2801 * 2802 * Wait for compute shaders because of possible transitions: 2803 * - FB write -> shader read 2804 * - shader write -> FB read 2805 * 2806 * DB caches are flushed on demand (using si_decompress_textures). 2807 * 2808 * When MSAA is enabled, CB and TC caches are flushed on demand 2809 * (after FMASK decompression). Shader write -> FB read transitions 2810 * cannot happen for MSAA textures, because MSAA shader images are 2811 * not supported. 2812 * 2813 * Only flush and wait for CB if there is actually a bound color buffer. 2814 */ 2815 if (sctx->framebuffer.uncompressed_cb_mask) { 2816 si_make_CB_shader_coherent(sctx, sctx->framebuffer.nr_samples, 2817 sctx->framebuffer.CB_has_shader_readable_metadata, 2818 sctx->framebuffer.all_DCC_pipe_aligned); 2819 } 2820 2821 sctx->flags |= SI_CONTEXT_CS_PARTIAL_FLUSH; 2822 2823 /* u_blitter doesn't invoke depth decompression when it does multiple 2824 * blits in a row, but the only case when it matters for DB is when 2825 * doing generate_mipmap. So here we flush DB manually between 2826 * individual generate_mipmap blits. 2827 * Note that lower mipmap levels aren't compressed. 2828 */ 2829 if (sctx->generate_mipmap_for_depth) { 2830 si_make_DB_shader_coherent(sctx, 1, false, sctx->framebuffer.DB_has_shader_readable_metadata); 2831 } else if (sctx->chip_class == GFX9) { 2832 /* It appears that DB metadata "leaks" in a sequence of: 2833 * - depth clear 2834 * - DCC decompress for shader image writes (with DB disabled) 2835 * - render with DEPTH_BEFORE_SHADER=1 2836 * Flushing DB metadata works around the problem. 2837 */ 2838 sctx->flags |= SI_CONTEXT_FLUSH_AND_INV_DB_META; 2839 } 2840 2841 /* Take the maximum of the old and new count. If the new count is lower, 2842 * dirtying is needed to disable the unbound colorbuffers. 2843 */ 2844 sctx->framebuffer.dirty_cbufs |= 2845 (1 << MAX2(sctx->framebuffer.state.nr_cbufs, state->nr_cbufs)) - 1; 2846 sctx->framebuffer.dirty_zsbuf |= sctx->framebuffer.state.zsbuf != state->zsbuf; 2847 2848 si_dec_framebuffer_counters(&sctx->framebuffer.state); 2849 util_copy_framebuffer_state(&sctx->framebuffer.state, state); 2850 2851 sctx->framebuffer.colorbuf_enabled_4bit = 0; 2852 sctx->framebuffer.spi_shader_col_format = 0; 2853 sctx->framebuffer.spi_shader_col_format_alpha = 0; 2854 sctx->framebuffer.spi_shader_col_format_blend = 0; 2855 sctx->framebuffer.spi_shader_col_format_blend_alpha = 0; 2856 sctx->framebuffer.color_is_int8 = 0; 2857 sctx->framebuffer.color_is_int10 = 0; 2858 2859 sctx->framebuffer.compressed_cb_mask = 0; 2860 sctx->framebuffer.uncompressed_cb_mask = 0; 2861 sctx->framebuffer.nr_samples = util_framebuffer_get_num_samples(state); 2862 sctx->framebuffer.nr_color_samples = sctx->framebuffer.nr_samples; 2863 sctx->framebuffer.log_samples = util_logbase2(sctx->framebuffer.nr_samples); 2864 sctx->framebuffer.any_dst_linear = false; 2865 sctx->framebuffer.CB_has_shader_readable_metadata = false; 2866 sctx->framebuffer.DB_has_shader_readable_metadata = false; 2867 sctx->framebuffer.all_DCC_pipe_aligned = true; 2868 sctx->framebuffer.has_dcc_msaa = false; 2869 sctx->framebuffer.min_bytes_per_pixel = 0; 2870 2871 for (i = 0; i < state->nr_cbufs; i++) { 2872 if (!state->cbufs[i]) 2873 continue; 2874 2875 surf = (struct si_surface *)state->cbufs[i]; 2876 tex = (struct si_texture *)surf->base.texture; 2877 2878 if (!surf->color_initialized) { 2879 si_initialize_color_surface(sctx, surf); 2880 } 2881 2882 sctx->framebuffer.colorbuf_enabled_4bit |= 0xf << (i * 4); 2883 sctx->framebuffer.spi_shader_col_format |= surf->spi_shader_col_format << (i * 4); 2884 sctx->framebuffer.spi_shader_col_format_alpha |= surf->spi_shader_col_format_alpha << (i * 4); 2885 sctx->framebuffer.spi_shader_col_format_blend |= surf->spi_shader_col_format_blend << (i * 4); 2886 sctx->framebuffer.spi_shader_col_format_blend_alpha |= surf->spi_shader_col_format_blend_alpha 2887 << (i * 4); 2888 2889 if (surf->color_is_int8) 2890 sctx->framebuffer.color_is_int8 |= 1 << i; 2891 if (surf->color_is_int10) 2892 sctx->framebuffer.color_is_int10 |= 1 << i; 2893 2894 if (tex->surface.fmask_offset) 2895 sctx->framebuffer.compressed_cb_mask |= 1 << i; 2896 else 2897 sctx->framebuffer.uncompressed_cb_mask |= 1 << i; 2898 2899 /* Don't update nr_color_samples for non-AA buffers. 2900 * (e.g. destination of MSAA resolve) 2901 */ 2902 if (tex->buffer.b.b.nr_samples >= 2 && 2903 tex->buffer.b.b.nr_storage_samples < tex->buffer.b.b.nr_samples) { 2904 sctx->framebuffer.nr_color_samples = 2905 MIN2(sctx->framebuffer.nr_color_samples, tex->buffer.b.b.nr_storage_samples); 2906 sctx->framebuffer.nr_color_samples = MAX2(1, sctx->framebuffer.nr_color_samples); 2907 } 2908 2909 if (tex->surface.is_linear) 2910 sctx->framebuffer.any_dst_linear = true; 2911 2912 if (vi_dcc_enabled(tex, surf->base.u.tex.level)) { 2913 sctx->framebuffer.CB_has_shader_readable_metadata = true; 2914 2915 if (sctx->chip_class >= GFX9 && !tex->surface.u.gfx9.color.dcc.pipe_aligned) 2916 sctx->framebuffer.all_DCC_pipe_aligned = false; 2917 2918 if (tex->buffer.b.b.nr_storage_samples >= 2) 2919 sctx->framebuffer.has_dcc_msaa = true; 2920 } 2921 2922 si_context_add_resource_size(sctx, surf->base.texture); 2923 2924 p_atomic_inc(&tex->framebuffers_bound); 2925 2926 /* Update the minimum but don't keep 0. */ 2927 if (!sctx->framebuffer.min_bytes_per_pixel || 2928 tex->surface.bpe < sctx->framebuffer.min_bytes_per_pixel) 2929 sctx->framebuffer.min_bytes_per_pixel = tex->surface.bpe; 2930 } 2931 2932 /* For optimal DCC performance. */ 2933 if (sctx->chip_class >= GFX10) 2934 sctx->framebuffer.dcc_overwrite_combiner_watermark = 6; 2935 else 2936 sctx->framebuffer.dcc_overwrite_combiner_watermark = 4; 2937 2938 struct si_texture *zstex = NULL; 2939 2940 if (state->zsbuf) { 2941 surf = (struct si_surface *)state->zsbuf; 2942 zstex = (struct si_texture *)surf->base.texture; 2943 2944 if (!surf->depth_initialized) { 2945 si_init_depth_surface(sctx, surf); 2946 } 2947 2948 if (vi_tc_compat_htile_enabled(zstex, surf->base.u.tex.level, PIPE_MASK_ZS)) 2949 sctx->framebuffer.DB_has_shader_readable_metadata = true; 2950 2951 si_context_add_resource_size(sctx, surf->base.texture); 2952 2953 /* Update the minimum but don't keep 0. */ 2954 if (!sctx->framebuffer.min_bytes_per_pixel || 2955 zstex->surface.bpe < sctx->framebuffer.min_bytes_per_pixel) 2956 sctx->framebuffer.min_bytes_per_pixel = zstex->surface.bpe; 2957 } 2958 2959 si_update_ps_colorbuf0_slot(sctx); 2960 si_update_poly_offset_state(sctx); 2961 si_mark_atom_dirty(sctx, &sctx->atoms.s.cb_render_state); 2962 si_mark_atom_dirty(sctx, &sctx->atoms.s.framebuffer); 2963 2964 /* NGG cull state uses the sample count. */ 2965 if (sctx->screen->use_ngg_culling) 2966 si_mark_atom_dirty(sctx, &sctx->atoms.s.ngg_cull_state); 2967 2968 if (sctx->screen->dpbb_allowed) 2969 si_mark_atom_dirty(sctx, &sctx->atoms.s.dpbb_state); 2970 2971 if (sctx->framebuffer.any_dst_linear != old_any_dst_linear) 2972 si_mark_atom_dirty(sctx, &sctx->atoms.s.msaa_config); 2973 2974 if (sctx->screen->has_out_of_order_rast && 2975 (sctx->framebuffer.colorbuf_enabled_4bit != old_colorbuf_enabled_4bit || 2976 !!sctx->framebuffer.state.zsbuf != old_has_zsbuf || 2977 (zstex && zstex->surface.has_stencil != old_has_stencil))) 2978 si_mark_atom_dirty(sctx, &sctx->atoms.s.msaa_config); 2979 2980 if (sctx->framebuffer.nr_samples != old_nr_samples) { 2981 struct pipe_constant_buffer constbuf = {0}; 2982 2983 si_mark_atom_dirty(sctx, &sctx->atoms.s.msaa_config); 2984 si_mark_atom_dirty(sctx, &sctx->atoms.s.db_render_state); 2985 2986 if (!sctx->sample_pos_buffer) { 2987 sctx->sample_pos_buffer = pipe_buffer_create_with_data(&sctx->b, 0, PIPE_USAGE_DEFAULT, 2988 sizeof(sctx->sample_positions), 2989 &sctx->sample_positions); 2990 } 2991 constbuf.buffer = sctx->sample_pos_buffer; 2992 2993 /* Set sample locations as fragment shader constants. */ 2994 switch (sctx->framebuffer.nr_samples) { 2995 case 1: 2996 constbuf.buffer_offset = 0; 2997 break; 2998 case 2: 2999 constbuf.buffer_offset = 3000 (ubyte *)sctx->sample_positions.x2 - (ubyte *)sctx->sample_positions.x1; 3001 break; 3002 case 4: 3003 constbuf.buffer_offset = 3004 (ubyte *)sctx->sample_positions.x4 - (ubyte *)sctx->sample_positions.x1; 3005 break; 3006 case 8: 3007 constbuf.buffer_offset = 3008 (ubyte *)sctx->sample_positions.x8 - (ubyte *)sctx->sample_positions.x1; 3009 break; 3010 case 16: 3011 constbuf.buffer_offset = 3012 (ubyte *)sctx->sample_positions.x16 - (ubyte *)sctx->sample_positions.x1; 3013 break; 3014 default: 3015 PRINT_ERR("Requested an invalid number of samples %i.\n", sctx->framebuffer.nr_samples); 3016 assert(0); 3017 } 3018 constbuf.buffer_size = sctx->framebuffer.nr_samples * 2 * 4; 3019 si_set_internal_const_buffer(sctx, SI_PS_CONST_SAMPLE_POSITIONS, &constbuf); 3020 3021 si_mark_atom_dirty(sctx, &sctx->atoms.s.msaa_sample_locs); 3022 } 3023 3024 si_ps_key_update_framebuffer(sctx); 3025 si_ps_key_update_framebuffer_blend(sctx); 3026 si_ps_key_update_framebuffer_rasterizer_sample_shading(sctx); 3027 si_update_ps_inputs_read_or_disabled(sctx); 3028 sctx->do_update_shaders = true; 3029 3030 if (!sctx->decompression_enabled) { 3031 /* Prevent textures decompression when the framebuffer state 3032 * changes come from the decompression passes themselves. 3033 */ 3034 sctx->need_check_render_feedback = true; 3035 } 3036} 3037 3038static void si_emit_framebuffer_state(struct si_context *sctx) 3039{ 3040 struct radeon_cmdbuf *cs = &sctx->gfx_cs; 3041 struct pipe_framebuffer_state *state = &sctx->framebuffer.state; 3042 unsigned i, nr_cbufs = state->nr_cbufs; 3043 struct si_texture *tex = NULL; 3044 struct si_surface *cb = NULL; 3045 unsigned cb_color_info = 0; 3046 3047 radeon_begin(cs); 3048 3049 /* Colorbuffers. */ 3050 for (i = 0; i < nr_cbufs; i++) { 3051 uint64_t cb_color_base, cb_color_fmask, cb_color_cmask, cb_dcc_base; 3052 unsigned cb_color_attrib; 3053 3054 if (!(sctx->framebuffer.dirty_cbufs & (1 << i))) 3055 continue; 3056 3057 cb = (struct si_surface *)state->cbufs[i]; 3058 if (!cb) { 3059 radeon_set_context_reg(R_028C70_CB_COLOR0_INFO + i * 0x3C, 3060 S_028C70_FORMAT(V_028C70_COLOR_INVALID)); 3061 continue; 3062 } 3063 3064 tex = (struct si_texture *)cb->base.texture; 3065 radeon_add_to_buffer_list( 3066 sctx, &sctx->gfx_cs, &tex->buffer, RADEON_USAGE_READWRITE | RADEON_USAGE_NEEDS_IMPLICIT_SYNC, 3067 tex->buffer.b.b.nr_samples > 1 ? RADEON_PRIO_COLOR_BUFFER_MSAA : RADEON_PRIO_COLOR_BUFFER); 3068 3069 if (tex->cmask_buffer && tex->cmask_buffer != &tex->buffer) { 3070 radeon_add_to_buffer_list(sctx, &sctx->gfx_cs, tex->cmask_buffer, 3071 RADEON_USAGE_READWRITE | RADEON_USAGE_NEEDS_IMPLICIT_SYNC, 3072 RADEON_PRIO_SEPARATE_META); 3073 } 3074 3075 /* Compute mutable surface parameters. */ 3076 cb_color_base = tex->buffer.gpu_address >> 8; 3077 cb_color_fmask = 0; 3078 cb_color_cmask = tex->cmask_base_address_reg; 3079 cb_dcc_base = 0; 3080 cb_color_info = cb->cb_color_info | tex->cb_color_info; 3081 cb_color_attrib = cb->cb_color_attrib; 3082 3083 if (tex->swap_rgb_to_bgr) { 3084 /* Swap R and B channels. */ 3085 static unsigned rgb_to_bgr[4] = { 3086 [V_028C70_SWAP_STD] = V_028C70_SWAP_ALT, 3087 [V_028C70_SWAP_ALT] = V_028C70_SWAP_STD, 3088 [V_028C70_SWAP_STD_REV] = V_028C70_SWAP_ALT_REV, 3089 [V_028C70_SWAP_ALT_REV] = V_028C70_SWAP_STD_REV, 3090 }; 3091 unsigned swap = rgb_to_bgr[G_028C70_COMP_SWAP(cb_color_info)]; 3092 3093 cb_color_info &= C_028C70_COMP_SWAP; 3094 cb_color_info |= S_028C70_COMP_SWAP(swap); 3095 } 3096 3097 if (cb->base.u.tex.level > 0) 3098 cb_color_info &= C_028C70_FAST_CLEAR; 3099 3100 if (tex->surface.fmask_offset) { 3101 cb_color_fmask = (tex->buffer.gpu_address + tex->surface.fmask_offset) >> 8; 3102 cb_color_fmask |= tex->surface.fmask_tile_swizzle; 3103 } 3104 3105 /* Set up DCC. */ 3106 if (vi_dcc_enabled(tex, cb->base.u.tex.level)) { 3107 bool is_msaa_resolve_dst = state->cbufs[0] && state->cbufs[0]->texture->nr_samples > 1 && 3108 state->cbufs[1] == &cb->base && 3109 state->cbufs[1]->texture->nr_samples <= 1; 3110 3111 if (!is_msaa_resolve_dst) 3112 cb_color_info |= S_028C70_DCC_ENABLE(1); 3113 3114 cb_dcc_base = (tex->buffer.gpu_address + tex->surface.meta_offset) >> 8; 3115 3116 unsigned dcc_tile_swizzle = tex->surface.tile_swizzle; 3117 dcc_tile_swizzle &= ((1 << tex->surface.meta_alignment_log2) - 1) >> 8; 3118 cb_dcc_base |= dcc_tile_swizzle; 3119 } 3120 3121 if (sctx->chip_class >= GFX10) { 3122 unsigned cb_color_attrib3; 3123 3124 /* Set mutable surface parameters. */ 3125 cb_color_base += tex->surface.u.gfx9.surf_offset >> 8; 3126 cb_color_base |= tex->surface.tile_swizzle; 3127 if (!tex->surface.fmask_offset) 3128 cb_color_fmask = cb_color_base; 3129 if (cb->base.u.tex.level > 0) 3130 cb_color_cmask = cb_color_base; 3131 3132 cb_color_attrib3 = cb->cb_color_attrib3 | 3133 S_028EE0_COLOR_SW_MODE(tex->surface.u.gfx9.swizzle_mode) | 3134 S_028EE0_FMASK_SW_MODE(tex->surface.u.gfx9.color.fmask_swizzle_mode) | 3135 S_028EE0_CMASK_PIPE_ALIGNED(1) | 3136 S_028EE0_DCC_PIPE_ALIGNED(tex->surface.u.gfx9.color.dcc.pipe_aligned); 3137 3138 radeon_set_context_reg_seq(R_028C60_CB_COLOR0_BASE + i * 0x3C, 14); 3139 radeon_emit(cb_color_base); /* CB_COLOR0_BASE */ 3140 radeon_emit(0); /* hole */ 3141 radeon_emit(0); /* hole */ 3142 radeon_emit(cb->cb_color_view); /* CB_COLOR0_VIEW */ 3143 radeon_emit(cb_color_info); /* CB_COLOR0_INFO */ 3144 radeon_emit(cb_color_attrib); /* CB_COLOR0_ATTRIB */ 3145 radeon_emit(cb->cb_dcc_control); /* CB_COLOR0_DCC_CONTROL */ 3146 radeon_emit(cb_color_cmask); /* CB_COLOR0_CMASK */ 3147 radeon_emit(0); /* hole */ 3148 radeon_emit(cb_color_fmask); /* CB_COLOR0_FMASK */ 3149 radeon_emit(0); /* hole */ 3150 radeon_emit(tex->color_clear_value[0]); /* CB_COLOR0_CLEAR_WORD0 */ 3151 radeon_emit(tex->color_clear_value[1]); /* CB_COLOR0_CLEAR_WORD1 */ 3152 radeon_emit(cb_dcc_base); /* CB_COLOR0_DCC_BASE */ 3153 3154 radeon_set_context_reg(R_028E40_CB_COLOR0_BASE_EXT + i * 4, cb_color_base >> 32); 3155 radeon_set_context_reg(R_028E60_CB_COLOR0_CMASK_BASE_EXT + i * 4, 3156 cb_color_cmask >> 32); 3157 radeon_set_context_reg(R_028E80_CB_COLOR0_FMASK_BASE_EXT + i * 4, 3158 cb_color_fmask >> 32); 3159 radeon_set_context_reg(R_028EA0_CB_COLOR0_DCC_BASE_EXT + i * 4, cb_dcc_base >> 32); 3160 radeon_set_context_reg(R_028EC0_CB_COLOR0_ATTRIB2 + i * 4, cb->cb_color_attrib2); 3161 radeon_set_context_reg(R_028EE0_CB_COLOR0_ATTRIB3 + i * 4, cb_color_attrib3); 3162 } else if (sctx->chip_class == GFX9) { 3163 struct gfx9_surf_meta_flags meta = { 3164 .rb_aligned = 1, 3165 .pipe_aligned = 1, 3166 }; 3167 3168 if (!tex->is_depth && tex->surface.meta_offset) 3169 meta = tex->surface.u.gfx9.color.dcc; 3170 3171 /* Set mutable surface parameters. */ 3172 cb_color_base += tex->surface.u.gfx9.surf_offset >> 8; 3173 cb_color_base |= tex->surface.tile_swizzle; 3174 if (!tex->surface.fmask_offset) 3175 cb_color_fmask = cb_color_base; 3176 if (cb->base.u.tex.level > 0) 3177 cb_color_cmask = cb_color_base; 3178 cb_color_attrib |= S_028C74_COLOR_SW_MODE(tex->surface.u.gfx9.swizzle_mode) | 3179 S_028C74_FMASK_SW_MODE(tex->surface.u.gfx9.color.fmask_swizzle_mode) | 3180 S_028C74_RB_ALIGNED(meta.rb_aligned) | 3181 S_028C74_PIPE_ALIGNED(meta.pipe_aligned); 3182 3183 radeon_set_context_reg_seq(R_028C60_CB_COLOR0_BASE + i * 0x3C, 15); 3184 radeon_emit(cb_color_base); /* CB_COLOR0_BASE */ 3185 radeon_emit(S_028C64_BASE_256B(cb_color_base >> 32)); /* CB_COLOR0_BASE_EXT */ 3186 radeon_emit(cb->cb_color_attrib2); /* CB_COLOR0_ATTRIB2 */ 3187 radeon_emit(cb->cb_color_view); /* CB_COLOR0_VIEW */ 3188 radeon_emit(cb_color_info); /* CB_COLOR0_INFO */ 3189 radeon_emit(cb_color_attrib); /* CB_COLOR0_ATTRIB */ 3190 radeon_emit(cb->cb_dcc_control); /* CB_COLOR0_DCC_CONTROL */ 3191 radeon_emit(cb_color_cmask); /* CB_COLOR0_CMASK */ 3192 radeon_emit(S_028C80_BASE_256B(cb_color_cmask >> 32)); /* CB_COLOR0_CMASK_BASE_EXT */ 3193 radeon_emit(cb_color_fmask); /* CB_COLOR0_FMASK */ 3194 radeon_emit(S_028C88_BASE_256B(cb_color_fmask >> 32)); /* CB_COLOR0_FMASK_BASE_EXT */ 3195 radeon_emit(tex->color_clear_value[0]); /* CB_COLOR0_CLEAR_WORD0 */ 3196 radeon_emit(tex->color_clear_value[1]); /* CB_COLOR0_CLEAR_WORD1 */ 3197 radeon_emit(cb_dcc_base); /* CB_COLOR0_DCC_BASE */ 3198 radeon_emit(S_028C98_BASE_256B(cb_dcc_base >> 32)); /* CB_COLOR0_DCC_BASE_EXT */ 3199 3200 radeon_set_context_reg(R_0287A0_CB_MRT0_EPITCH + i * 4, 3201 S_0287A0_EPITCH(tex->surface.u.gfx9.epitch)); 3202 } else { 3203 /* Compute mutable surface parameters (GFX6-GFX8). */ 3204 const struct legacy_surf_level *level_info = 3205 &tex->surface.u.legacy.level[cb->base.u.tex.level]; 3206 unsigned pitch_tile_max, slice_tile_max, tile_mode_index; 3207 unsigned cb_color_pitch, cb_color_slice, cb_color_fmask_slice; 3208 3209 cb_color_base += level_info->offset_256B; 3210 /* Only macrotiled modes can set tile swizzle. */ 3211 if (level_info->mode == RADEON_SURF_MODE_2D) 3212 cb_color_base |= tex->surface.tile_swizzle; 3213 3214 if (!tex->surface.fmask_offset) 3215 cb_color_fmask = cb_color_base; 3216 if (cb->base.u.tex.level > 0) 3217 cb_color_cmask = cb_color_base; 3218 if (cb_dcc_base) 3219 cb_dcc_base += tex->surface.u.legacy.color.dcc_level[cb->base.u.tex.level].dcc_offset >> 8; 3220 3221 pitch_tile_max = level_info->nblk_x / 8 - 1; 3222 slice_tile_max = level_info->nblk_x * level_info->nblk_y / 64 - 1; 3223 tile_mode_index = si_tile_mode_index(tex, cb->base.u.tex.level, false); 3224 3225 cb_color_attrib |= S_028C74_TILE_MODE_INDEX(tile_mode_index); 3226 cb_color_pitch = S_028C64_TILE_MAX(pitch_tile_max); 3227 cb_color_slice = S_028C68_TILE_MAX(slice_tile_max); 3228 3229 if (tex->surface.fmask_offset) { 3230 if (sctx->chip_class >= GFX7) 3231 cb_color_pitch |= 3232 S_028C64_FMASK_TILE_MAX(tex->surface.u.legacy.color.fmask.pitch_in_pixels / 8 - 1); 3233 cb_color_attrib |= 3234 S_028C74_FMASK_TILE_MODE_INDEX(tex->surface.u.legacy.color.fmask.tiling_index); 3235 cb_color_fmask_slice = S_028C88_TILE_MAX(tex->surface.u.legacy.color.fmask.slice_tile_max); 3236 } else { 3237 /* This must be set for fast clear to work without FMASK. */ 3238 if (sctx->chip_class >= GFX7) 3239 cb_color_pitch |= S_028C64_FMASK_TILE_MAX(pitch_tile_max); 3240 cb_color_attrib |= S_028C74_FMASK_TILE_MODE_INDEX(tile_mode_index); 3241 cb_color_fmask_slice = S_028C88_TILE_MAX(slice_tile_max); 3242 } 3243 3244 radeon_set_context_reg_seq(R_028C60_CB_COLOR0_BASE + i * 0x3C, 3245 sctx->chip_class >= GFX8 ? 14 : 13); 3246 radeon_emit(cb_color_base); /* CB_COLOR0_BASE */ 3247 radeon_emit(cb_color_pitch); /* CB_COLOR0_PITCH */ 3248 radeon_emit(cb_color_slice); /* CB_COLOR0_SLICE */ 3249 radeon_emit(cb->cb_color_view); /* CB_COLOR0_VIEW */ 3250 radeon_emit(cb_color_info); /* CB_COLOR0_INFO */ 3251 radeon_emit(cb_color_attrib); /* CB_COLOR0_ATTRIB */ 3252 radeon_emit(cb->cb_dcc_control); /* CB_COLOR0_DCC_CONTROL */ 3253 radeon_emit(cb_color_cmask); /* CB_COLOR0_CMASK */ 3254 radeon_emit(tex->surface.u.legacy.color.cmask_slice_tile_max); /* CB_COLOR0_CMASK_SLICE */ 3255 radeon_emit(cb_color_fmask); /* CB_COLOR0_FMASK */ 3256 radeon_emit(cb_color_fmask_slice); /* CB_COLOR0_FMASK_SLICE */ 3257 radeon_emit(tex->color_clear_value[0]); /* CB_COLOR0_CLEAR_WORD0 */ 3258 radeon_emit(tex->color_clear_value[1]); /* CB_COLOR0_CLEAR_WORD1 */ 3259 3260 if (sctx->chip_class >= GFX8) /* R_028C94_CB_COLOR0_DCC_BASE */ 3261 radeon_emit(cb_dcc_base); 3262 } 3263 } 3264 for (; i < 8; i++) 3265 if (sctx->framebuffer.dirty_cbufs & (1 << i)) 3266 radeon_set_context_reg(R_028C70_CB_COLOR0_INFO + i * 0x3C, 0); 3267 3268 /* ZS buffer. */ 3269 if (state->zsbuf && sctx->framebuffer.dirty_zsbuf) { 3270 struct si_surface *zb = (struct si_surface *)state->zsbuf; 3271 struct si_texture *tex = (struct si_texture *)zb->base.texture; 3272 unsigned db_z_info = zb->db_z_info; 3273 unsigned db_stencil_info = zb->db_stencil_info; 3274 unsigned db_htile_surface = zb->db_htile_surface; 3275 3276 radeon_add_to_buffer_list(sctx, &sctx->gfx_cs, &tex->buffer, RADEON_USAGE_READWRITE, 3277 zb->base.texture->nr_samples > 1 ? RADEON_PRIO_DEPTH_BUFFER_MSAA 3278 : RADEON_PRIO_DEPTH_BUFFER); 3279 3280 /* Set fields dependent on tc_compatile_htile. */ 3281 if (sctx->chip_class >= GFX9 && 3282 vi_tc_compat_htile_enabled(tex, zb->base.u.tex.level, PIPE_MASK_ZS)) { 3283 unsigned max_zplanes = 4; 3284 3285 if (tex->db_render_format == PIPE_FORMAT_Z16_UNORM && tex->buffer.b.b.nr_samples > 1) 3286 max_zplanes = 2; 3287 3288 db_z_info |= S_028038_DECOMPRESS_ON_N_ZPLANES(max_zplanes + 1); 3289 3290 if (sctx->chip_class >= GFX10) { 3291 db_z_info |= S_028040_ITERATE_FLUSH(1); 3292 db_stencil_info |= S_028044_ITERATE_FLUSH(!tex->htile_stencil_disabled); 3293 } else { 3294 db_z_info |= S_028038_ITERATE_FLUSH(1); 3295 db_stencil_info |= S_02803C_ITERATE_FLUSH(1); 3296 } 3297 } 3298 3299 unsigned level = zb->base.u.tex.level; 3300 3301 if (sctx->chip_class >= GFX10) { 3302 radeon_set_context_reg(R_028014_DB_HTILE_DATA_BASE, zb->db_htile_data_base); 3303 radeon_set_context_reg(R_02801C_DB_DEPTH_SIZE_XY, zb->db_depth_size); 3304 3305 radeon_set_context_reg_seq(R_02803C_DB_DEPTH_INFO, 7); 3306 radeon_emit(S_02803C_RESOURCE_LEVEL(1)); /* DB_DEPTH_INFO */ 3307 radeon_emit(db_z_info | /* DB_Z_INFO */ 3308 S_028038_ZRANGE_PRECISION(tex->depth_clear_value[level] != 0)); 3309 radeon_emit(db_stencil_info); /* DB_STENCIL_INFO */ 3310 radeon_emit(zb->db_depth_base); /* DB_Z_READ_BASE */ 3311 radeon_emit(zb->db_stencil_base); /* DB_STENCIL_READ_BASE */ 3312 radeon_emit(zb->db_depth_base); /* DB_Z_WRITE_BASE */ 3313 radeon_emit(zb->db_stencil_base); /* DB_STENCIL_WRITE_BASE */ 3314 3315 radeon_set_context_reg_seq(R_028068_DB_Z_READ_BASE_HI, 5); 3316 radeon_emit(zb->db_depth_base >> 32); /* DB_Z_READ_BASE_HI */ 3317 radeon_emit(zb->db_stencil_base >> 32); /* DB_STENCIL_READ_BASE_HI */ 3318 radeon_emit(zb->db_depth_base >> 32); /* DB_Z_WRITE_BASE_HI */ 3319 radeon_emit(zb->db_stencil_base >> 32); /* DB_STENCIL_WRITE_BASE_HI */ 3320 radeon_emit(zb->db_htile_data_base >> 32); /* DB_HTILE_DATA_BASE_HI */ 3321 } else if (sctx->chip_class == GFX9) { 3322 radeon_set_context_reg_seq(R_028014_DB_HTILE_DATA_BASE, 3); 3323 radeon_emit(zb->db_htile_data_base); /* DB_HTILE_DATA_BASE */ 3324 radeon_emit(S_028018_BASE_HI(zb->db_htile_data_base >> 32)); /* DB_HTILE_DATA_BASE_HI */ 3325 radeon_emit(zb->db_depth_size); /* DB_DEPTH_SIZE */ 3326 3327 radeon_set_context_reg_seq(R_028038_DB_Z_INFO, 10); 3328 radeon_emit(db_z_info | /* DB_Z_INFO */ 3329 S_028038_ZRANGE_PRECISION(tex->depth_clear_value[level] != 0)); 3330 radeon_emit(db_stencil_info); /* DB_STENCIL_INFO */ 3331 radeon_emit(zb->db_depth_base); /* DB_Z_READ_BASE */ 3332 radeon_emit(S_028044_BASE_HI(zb->db_depth_base >> 32)); /* DB_Z_READ_BASE_HI */ 3333 radeon_emit(zb->db_stencil_base); /* DB_STENCIL_READ_BASE */ 3334 radeon_emit(S_02804C_BASE_HI(zb->db_stencil_base >> 32)); /* DB_STENCIL_READ_BASE_HI */ 3335 radeon_emit(zb->db_depth_base); /* DB_Z_WRITE_BASE */ 3336 radeon_emit(S_028054_BASE_HI(zb->db_depth_base >> 32)); /* DB_Z_WRITE_BASE_HI */ 3337 radeon_emit(zb->db_stencil_base); /* DB_STENCIL_WRITE_BASE */ 3338 radeon_emit(S_02805C_BASE_HI(zb->db_stencil_base >> 32)); /* DB_STENCIL_WRITE_BASE_HI */ 3339 3340 radeon_set_context_reg_seq(R_028068_DB_Z_INFO2, 2); 3341 radeon_emit(zb->db_z_info2); /* DB_Z_INFO2 */ 3342 radeon_emit(zb->db_stencil_info2); /* DB_STENCIL_INFO2 */ 3343 } else { 3344 /* GFX6-GFX8 */ 3345 /* Set fields dependent on tc_compatile_htile. */ 3346 if (si_htile_enabled(tex, zb->base.u.tex.level, PIPE_MASK_ZS)) { 3347 if (tex->tc_compatible_htile) { 3348 db_htile_surface |= S_028ABC_TC_COMPATIBLE(1); 3349 3350 /* 0 = full compression. N = only compress up to N-1 Z planes. */ 3351 if (tex->buffer.b.b.nr_samples <= 1) 3352 db_z_info |= S_028040_DECOMPRESS_ON_N_ZPLANES(5); 3353 else if (tex->buffer.b.b.nr_samples <= 4) 3354 db_z_info |= S_028040_DECOMPRESS_ON_N_ZPLANES(3); 3355 else 3356 db_z_info |= S_028040_DECOMPRESS_ON_N_ZPLANES(2); 3357 } 3358 } 3359 3360 radeon_set_context_reg(R_028014_DB_HTILE_DATA_BASE, zb->db_htile_data_base); 3361 3362 radeon_set_context_reg_seq(R_02803C_DB_DEPTH_INFO, 9); 3363 radeon_emit(zb->db_depth_info | /* DB_DEPTH_INFO */ 3364 S_02803C_ADDR5_SWIZZLE_MASK(!tex->tc_compatible_htile)); 3365 radeon_emit(db_z_info | /* DB_Z_INFO */ 3366 S_028040_ZRANGE_PRECISION(tex->depth_clear_value[level] != 0)); 3367 radeon_emit(db_stencil_info); /* DB_STENCIL_INFO */ 3368 radeon_emit(zb->db_depth_base); /* DB_Z_READ_BASE */ 3369 radeon_emit(zb->db_stencil_base); /* DB_STENCIL_READ_BASE */ 3370 radeon_emit(zb->db_depth_base); /* DB_Z_WRITE_BASE */ 3371 radeon_emit(zb->db_stencil_base); /* DB_STENCIL_WRITE_BASE */ 3372 radeon_emit(zb->db_depth_size); /* DB_DEPTH_SIZE */ 3373 radeon_emit(zb->db_depth_slice); /* DB_DEPTH_SLICE */ 3374 } 3375 3376 radeon_set_context_reg_seq(R_028028_DB_STENCIL_CLEAR, 2); 3377 radeon_emit(tex->stencil_clear_value[level]); /* R_028028_DB_STENCIL_CLEAR */ 3378 radeon_emit(fui(tex->depth_clear_value[level])); /* R_02802C_DB_DEPTH_CLEAR */ 3379 3380 radeon_set_context_reg(R_028008_DB_DEPTH_VIEW, zb->db_depth_view); 3381 radeon_set_context_reg(R_028ABC_DB_HTILE_SURFACE, db_htile_surface); 3382 } else if (sctx->framebuffer.dirty_zsbuf) { 3383 if (sctx->chip_class == GFX9) 3384 radeon_set_context_reg_seq(R_028038_DB_Z_INFO, 2); 3385 else 3386 radeon_set_context_reg_seq(R_028040_DB_Z_INFO, 2); 3387 3388 radeon_emit(S_028040_FORMAT(V_028040_Z_INVALID)); /* DB_Z_INFO */ 3389 radeon_emit(S_028044_FORMAT(V_028044_STENCIL_INVALID)); /* DB_STENCIL_INFO */ 3390 } 3391 3392 /* Framebuffer dimensions. */ 3393 /* PA_SC_WINDOW_SCISSOR_TL is set in si_init_cs_preamble_state */ 3394 radeon_set_context_reg(R_028208_PA_SC_WINDOW_SCISSOR_BR, 3395 S_028208_BR_X(state->width) | S_028208_BR_Y(state->height)); 3396 3397 if (sctx->screen->dpbb_allowed) { 3398 radeon_emit(PKT3(PKT3_EVENT_WRITE, 0, 0)); 3399 radeon_emit(EVENT_TYPE(V_028A90_BREAK_BATCH) | EVENT_INDEX(0)); 3400 } 3401 radeon_end(); 3402 3403 si_update_display_dcc_dirty(sctx); 3404 3405 sctx->framebuffer.dirty_cbufs = 0; 3406 sctx->framebuffer.dirty_zsbuf = false; 3407} 3408 3409static void si_emit_msaa_sample_locs(struct si_context *sctx) 3410{ 3411 struct radeon_cmdbuf *cs = &sctx->gfx_cs; 3412 struct si_state_rasterizer *rs = sctx->queued.named.rasterizer; 3413 unsigned nr_samples = sctx->framebuffer.nr_samples; 3414 bool has_msaa_sample_loc_bug = sctx->screen->info.has_msaa_sample_loc_bug; 3415 3416 /* Smoothing (only possible with nr_samples == 1) uses the same 3417 * sample locations as the MSAA it simulates. 3418 */ 3419 if (nr_samples <= 1 && sctx->smoothing_enabled) 3420 nr_samples = SI_NUM_SMOOTH_AA_SAMPLES; 3421 3422 /* On Polaris, the small primitive filter uses the sample locations 3423 * even when MSAA is off, so we need to make sure they're set to 0. 3424 * 3425 * GFX10 uses sample locations unconditionally, so they always need 3426 * to be set up. 3427 */ 3428 if ((nr_samples >= 2 || has_msaa_sample_loc_bug || sctx->chip_class >= GFX10) && 3429 nr_samples != sctx->sample_locs_num_samples) { 3430 sctx->sample_locs_num_samples = nr_samples; 3431 si_emit_sample_locations(cs, nr_samples); 3432 } 3433 3434 radeon_begin(cs); 3435 3436 if (sctx->family >= CHIP_POLARIS10) { 3437 unsigned small_prim_filter_cntl = 3438 S_028830_SMALL_PRIM_FILTER_ENABLE(1) | 3439 /* line bug */ 3440 S_028830_LINE_FILTER_DISABLE(sctx->family <= CHIP_POLARIS12); 3441 3442 /* For hardware with the sample location bug, the problem is that in order to use the small 3443 * primitive filter, we need to explicitly set the sample locations to 0. But the DB doesn't 3444 * properly process the change of sample locations without a flush, and so we can end up 3445 * with incorrect Z values. 3446 * 3447 * Instead of doing a flush, just disable the small primitive filter when MSAA is 3448 * force-disabled. 3449 * 3450 * The alternative of setting sample locations to 0 would require a DB flush to avoid 3451 * Z errors, see https://bugs.freedesktop.org/show_bug.cgi?id=96908 3452 */ 3453 if (has_msaa_sample_loc_bug && sctx->framebuffer.nr_samples > 1 && !rs->multisample_enable) 3454 small_prim_filter_cntl &= C_028830_SMALL_PRIM_FILTER_ENABLE; 3455 3456 radeon_opt_set_context_reg(sctx, R_028830_PA_SU_SMALL_PRIM_FILTER_CNTL, 3457 SI_TRACKED_PA_SU_SMALL_PRIM_FILTER_CNTL, small_prim_filter_cntl); 3458 } 3459 3460 /* The exclusion bits can be set to improve rasterization efficiency 3461 * if no sample lies on the pixel boundary (-8 sample offset). 3462 */ 3463 bool exclusion = sctx->chip_class >= GFX7 && (!rs->multisample_enable || nr_samples != 16); 3464 radeon_opt_set_context_reg( 3465 sctx, R_02882C_PA_SU_PRIM_FILTER_CNTL, SI_TRACKED_PA_SU_PRIM_FILTER_CNTL, 3466 S_02882C_XMAX_RIGHT_EXCLUSION(exclusion) | S_02882C_YMAX_BOTTOM_EXCLUSION(exclusion)); 3467 radeon_end(); 3468} 3469 3470static bool si_out_of_order_rasterization(struct si_context *sctx) 3471{ 3472 struct si_state_blend *blend = sctx->queued.named.blend; 3473 struct si_state_dsa *dsa = sctx->queued.named.dsa; 3474 3475 if (!sctx->screen->has_out_of_order_rast) 3476 return false; 3477 3478 unsigned colormask = sctx->framebuffer.colorbuf_enabled_4bit; 3479 3480 colormask &= blend->cb_target_enabled_4bit; 3481 3482 /* Conservative: No logic op. */ 3483 if (colormask && blend->logicop_enable) 3484 return false; 3485 3486 struct si_dsa_order_invariance dsa_order_invariant = {.zs = true, 3487 .pass_set = true, 3488 .pass_last = false}; 3489 3490 if (sctx->framebuffer.state.zsbuf) { 3491 struct si_texture *zstex = (struct si_texture *)sctx->framebuffer.state.zsbuf->texture; 3492 bool has_stencil = zstex->surface.has_stencil; 3493 dsa_order_invariant = dsa->order_invariance[has_stencil]; 3494 if (!dsa_order_invariant.zs) 3495 return false; 3496 3497 /* The set of PS invocations is always order invariant, 3498 * except when early Z/S tests are requested. */ 3499 if (sctx->shader.ps.cso && sctx->shader.ps.cso->info.base.writes_memory && 3500 sctx->shader.ps.cso->info.base.fs.early_fragment_tests && 3501 !dsa_order_invariant.pass_set) 3502 return false; 3503 3504 if (sctx->num_perfect_occlusion_queries != 0 && !dsa_order_invariant.pass_set) 3505 return false; 3506 } 3507 3508 if (!colormask) 3509 return true; 3510 3511 unsigned blendmask = colormask & blend->blend_enable_4bit; 3512 3513 if (blendmask) { 3514 /* Only commutative blending. */ 3515 if (blendmask & ~blend->commutative_4bit) 3516 return false; 3517 3518 if (!dsa_order_invariant.pass_set) 3519 return false; 3520 } 3521 3522 if (colormask & ~blendmask) { 3523 if (!dsa_order_invariant.pass_last) 3524 return false; 3525 } 3526 3527 return true; 3528} 3529 3530static void si_emit_msaa_config(struct si_context *sctx) 3531{ 3532 struct radeon_cmdbuf *cs = &sctx->gfx_cs; 3533 unsigned num_tile_pipes = sctx->screen->info.num_tile_pipes; 3534 /* 33% faster rendering to linear color buffers */ 3535 bool dst_is_linear = sctx->framebuffer.any_dst_linear; 3536 bool out_of_order_rast = si_out_of_order_rasterization(sctx); 3537 unsigned sc_mode_cntl_1 = 3538 S_028A4C_WALK_SIZE(dst_is_linear) | S_028A4C_WALK_FENCE_ENABLE(!dst_is_linear) | 3539 S_028A4C_WALK_FENCE_SIZE(num_tile_pipes == 2 ? 2 : 3) | 3540 S_028A4C_OUT_OF_ORDER_PRIMITIVE_ENABLE(out_of_order_rast) | 3541 S_028A4C_OUT_OF_ORDER_WATER_MARK(0x7) | 3542 /* always 1: */ 3543 S_028A4C_WALK_ALIGN8_PRIM_FITS_ST(1) | S_028A4C_SUPERTILE_WALK_ORDER_ENABLE(1) | 3544 S_028A4C_TILE_WALK_ORDER_ENABLE(1) | S_028A4C_MULTI_SHADER_ENGINE_PRIM_DISCARD_ENABLE(1) | 3545 S_028A4C_FORCE_EOV_CNTDWN_ENABLE(1) | S_028A4C_FORCE_EOV_REZ_ENABLE(1); 3546 unsigned db_eqaa = S_028804_HIGH_QUALITY_INTERSECTIONS(1) | S_028804_INCOHERENT_EQAA_READS(1) | 3547 S_028804_INTERPOLATE_COMP_Z(1) | S_028804_STATIC_ANCHOR_ASSOCIATIONS(1); 3548 unsigned coverage_samples, color_samples, z_samples; 3549 struct si_state_rasterizer *rs = sctx->queued.named.rasterizer; 3550 3551 /* S: Coverage samples (up to 16x): 3552 * - Scan conversion samples (PA_SC_AA_CONFIG.MSAA_NUM_SAMPLES) 3553 * - CB FMASK samples (CB_COLORi_ATTRIB.NUM_SAMPLES) 3554 * 3555 * Z: Z/S samples (up to 8x, must be <= coverage samples and >= color samples): 3556 * - Value seen by DB (DB_Z_INFO.NUM_SAMPLES) 3557 * - Value seen by CB, must be correct even if Z/S is unbound (DB_EQAA.MAX_ANCHOR_SAMPLES) 3558 * # Missing samples are derived from Z planes if Z is compressed (up to 16x quality), or 3559 * # from the closest defined sample if Z is uncompressed (same quality as the number of 3560 * # Z samples). 3561 * 3562 * F: Color samples (up to 8x, must be <= coverage samples): 3563 * - CB color samples (CB_COLORi_ATTRIB.NUM_FRAGMENTS) 3564 * - PS iter samples (DB_EQAA.PS_ITER_SAMPLES) 3565 * 3566 * Can be anything between coverage and color samples: 3567 * - SampleMaskIn samples (PA_SC_AA_CONFIG.MSAA_EXPOSED_SAMPLES) 3568 * - SampleMaskOut samples (DB_EQAA.MASK_EXPORT_NUM_SAMPLES) 3569 * - Alpha-to-coverage samples (DB_EQAA.ALPHA_TO_MASK_NUM_SAMPLES) 3570 * - Occlusion query samples (DB_COUNT_CONTROL.SAMPLE_RATE) 3571 * # All are currently set the same as coverage samples. 3572 * 3573 * If color samples < coverage samples, FMASK has a higher bpp to store an "unknown" 3574 * flag for undefined color samples. A shader-based resolve must handle unknowns 3575 * or mask them out with AND. Unknowns can also be guessed from neighbors via 3576 * an edge-detect shader-based resolve, which is required to make "color samples = 1" 3577 * useful. The CB resolve always drops unknowns. 3578 * 3579 * Sensible AA configurations: 3580 * EQAA 16s 8z 8f - might look the same as 16x MSAA if Z is compressed 3581 * EQAA 16s 8z 4f - might look the same as 16x MSAA if Z is compressed 3582 * EQAA 16s 4z 4f - might look the same as 16x MSAA if Z is compressed 3583 * EQAA 8s 8z 8f = 8x MSAA 3584 * EQAA 8s 8z 4f - might look the same as 8x MSAA 3585 * EQAA 8s 8z 2f - might look the same as 8x MSAA with low-density geometry 3586 * EQAA 8s 4z 4f - might look the same as 8x MSAA if Z is compressed 3587 * EQAA 8s 4z 2f - might look the same as 8x MSAA with low-density geometry if Z is compressed 3588 * EQAA 4s 4z 4f = 4x MSAA 3589 * EQAA 4s 4z 2f - might look the same as 4x MSAA with low-density geometry 3590 * EQAA 2s 2z 2f = 2x MSAA 3591 */ 3592 coverage_samples = color_samples = z_samples = si_get_num_coverage_samples(sctx); 3593 3594 if (sctx->framebuffer.nr_samples > 1 && rs->multisample_enable) { 3595 color_samples = sctx->framebuffer.nr_color_samples; 3596 3597 if (sctx->framebuffer.state.zsbuf) { 3598 z_samples = sctx->framebuffer.state.zsbuf->texture->nr_samples; 3599 z_samples = MAX2(1, z_samples); 3600 } else { 3601 z_samples = coverage_samples; 3602 } 3603 } 3604 3605 /* The DX10 diamond test is optional in GL and decreases line rasterization 3606 * performance, so don't use it. 3607 * 3608 * TODO: We should also enable perpendicular endcaps for AA lines, 3609 * but that requires implementing line stippling in the pixel 3610 * shader. SC can only do line stippling with axis-aligned 3611 * endcaps. 3612 */ 3613 unsigned sc_line_cntl = 0; 3614 unsigned sc_aa_config = 0; 3615 3616 if (coverage_samples > 1) { 3617 /* distance from the pixel center, indexed by log2(nr_samples) */ 3618 static unsigned max_dist[] = { 3619 0, /* unused */ 3620 4, /* 2x MSAA */ 3621 6, /* 4x MSAA */ 3622 7, /* 8x MSAA */ 3623 8, /* 16x MSAA */ 3624 }; 3625 unsigned log_samples = util_logbase2(coverage_samples); 3626 unsigned log_z_samples = util_logbase2(z_samples); 3627 unsigned ps_iter_samples = si_get_ps_iter_samples(sctx); 3628 unsigned log_ps_iter_samples = util_logbase2(ps_iter_samples); 3629 3630 sc_line_cntl |= S_028BDC_EXPAND_LINE_WIDTH(1); 3631 sc_aa_config = S_028BE0_MSAA_NUM_SAMPLES(log_samples) | 3632 S_028BE0_MAX_SAMPLE_DIST(max_dist[log_samples]) | 3633 S_028BE0_MSAA_EXPOSED_SAMPLES(log_samples) | 3634 S_028BE0_COVERED_CENTROID_IS_CENTER(sctx->chip_class >= GFX10_3); 3635 3636 if (sctx->framebuffer.nr_samples > 1) { 3637 db_eqaa |= S_028804_MAX_ANCHOR_SAMPLES(log_z_samples) | 3638 S_028804_PS_ITER_SAMPLES(log_ps_iter_samples) | 3639 S_028804_MASK_EXPORT_NUM_SAMPLES(log_samples) | 3640 S_028804_ALPHA_TO_MASK_NUM_SAMPLES(log_samples); 3641 sc_mode_cntl_1 |= S_028A4C_PS_ITER_SAMPLE(ps_iter_samples > 1); 3642 } else if (sctx->smoothing_enabled) { 3643 db_eqaa |= S_028804_OVERRASTERIZATION_AMOUNT(log_samples); 3644 } 3645 } 3646 3647 radeon_begin(cs); 3648 3649 /* R_028BDC_PA_SC_LINE_CNTL, R_028BE0_PA_SC_AA_CONFIG */ 3650 radeon_opt_set_context_reg2(sctx, R_028BDC_PA_SC_LINE_CNTL, SI_TRACKED_PA_SC_LINE_CNTL, 3651 sc_line_cntl, sc_aa_config); 3652 /* R_028804_DB_EQAA */ 3653 radeon_opt_set_context_reg(sctx, R_028804_DB_EQAA, SI_TRACKED_DB_EQAA, db_eqaa); 3654 /* R_028A4C_PA_SC_MODE_CNTL_1 */ 3655 radeon_opt_set_context_reg(sctx, R_028A4C_PA_SC_MODE_CNTL_1, SI_TRACKED_PA_SC_MODE_CNTL_1, 3656 sc_mode_cntl_1); 3657 radeon_end_update_context_roll(sctx); 3658} 3659 3660void si_update_ps_iter_samples(struct si_context *sctx) 3661{ 3662 if (sctx->framebuffer.nr_samples > 1) 3663 si_mark_atom_dirty(sctx, &sctx->atoms.s.msaa_config); 3664 if (sctx->screen->dpbb_allowed) 3665 si_mark_atom_dirty(sctx, &sctx->atoms.s.dpbb_state); 3666} 3667 3668static void si_set_min_samples(struct pipe_context *ctx, unsigned min_samples) 3669{ 3670 struct si_context *sctx = (struct si_context *)ctx; 3671 3672 /* The hardware can only do sample shading with 2^n samples. */ 3673 min_samples = util_next_power_of_two(min_samples); 3674 3675 if (sctx->ps_iter_samples == min_samples) 3676 return; 3677 3678 sctx->ps_iter_samples = min_samples; 3679 3680 si_ps_key_update_sample_shading(sctx); 3681 si_ps_key_update_framebuffer_rasterizer_sample_shading(sctx); 3682 sctx->do_update_shaders = true; 3683 3684 si_update_ps_iter_samples(sctx); 3685} 3686 3687/* 3688 * Samplers 3689 */ 3690 3691/** 3692 * Build the sampler view descriptor for a buffer texture. 3693 * @param state 256-bit descriptor; only the high 128 bits are filled in 3694 */ 3695void si_make_buffer_descriptor(struct si_screen *screen, struct si_resource *buf, 3696 enum pipe_format format, unsigned offset, unsigned size, 3697 uint32_t *state) 3698{ 3699 const struct util_format_description *desc; 3700 unsigned stride; 3701 unsigned num_records; 3702 3703 desc = util_format_description(format); 3704 stride = desc->block.bits / 8; 3705 3706 num_records = size / stride; 3707 num_records = MIN2(num_records, (buf->b.b.width0 - offset) / stride); 3708 3709 /* The NUM_RECORDS field has a different meaning depending on the chip, 3710 * instruction type, STRIDE, and SWIZZLE_ENABLE. 3711 * 3712 * GFX6-7,10: 3713 * - If STRIDE == 0, it's in byte units. 3714 * - If STRIDE != 0, it's in units of STRIDE, used with inst.IDXEN. 3715 * 3716 * GFX8: 3717 * - For SMEM and STRIDE == 0, it's in byte units. 3718 * - For SMEM and STRIDE != 0, it's in units of STRIDE. 3719 * - For VMEM and STRIDE == 0 or SWIZZLE_ENABLE == 0, it's in byte units. 3720 * - For VMEM and STRIDE != 0 and SWIZZLE_ENABLE == 1, it's in units of STRIDE. 3721 * NOTE: There is incompatibility between VMEM and SMEM opcodes due to SWIZZLE_- 3722 * ENABLE. The workaround is to set STRIDE = 0 if SWIZZLE_ENABLE == 0 when 3723 * using SMEM. This can be done in the shader by clearing STRIDE with s_and. 3724 * That way the same descriptor can be used by both SMEM and VMEM. 3725 * 3726 * GFX9: 3727 * - For SMEM and STRIDE == 0, it's in byte units. 3728 * - For SMEM and STRIDE != 0, it's in units of STRIDE. 3729 * - For VMEM and inst.IDXEN == 0 or STRIDE == 0, it's in byte units. 3730 * - For VMEM and inst.IDXEN == 1 and STRIDE != 0, it's in units of STRIDE. 3731 */ 3732 if (screen->info.chip_class == GFX8) 3733 num_records *= stride; 3734 3735 state[4] = 0; 3736 state[5] = S_008F04_STRIDE(stride); 3737 state[6] = num_records; 3738 state[7] = S_008F0C_DST_SEL_X(si_map_swizzle(desc->swizzle[0])) | 3739 S_008F0C_DST_SEL_Y(si_map_swizzle(desc->swizzle[1])) | 3740 S_008F0C_DST_SEL_Z(si_map_swizzle(desc->swizzle[2])) | 3741 S_008F0C_DST_SEL_W(si_map_swizzle(desc->swizzle[3])); 3742 3743 if (screen->info.chip_class >= GFX10) { 3744 const struct gfx10_format *fmt = &gfx10_format_table[format]; 3745 3746 /* OOB_SELECT chooses the out-of-bounds check: 3747 * - 0: (index >= NUM_RECORDS) || (offset >= STRIDE) 3748 * - 1: index >= NUM_RECORDS 3749 * - 2: NUM_RECORDS == 0 3750 * - 3: if SWIZZLE_ENABLE == 0: offset >= NUM_RECORDS 3751 * else: swizzle_address >= NUM_RECORDS 3752 */ 3753 state[7] |= S_008F0C_FORMAT(fmt->img_format) | 3754 S_008F0C_OOB_SELECT(V_008F0C_OOB_SELECT_STRUCTURED_WITH_OFFSET) | 3755 S_008F0C_RESOURCE_LEVEL(1); 3756 } else { 3757 int first_non_void; 3758 unsigned num_format, data_format; 3759 3760 first_non_void = util_format_get_first_non_void_channel(format); 3761 num_format = si_translate_buffer_numformat(&screen->b, desc, first_non_void); 3762 data_format = si_translate_buffer_dataformat(&screen->b, desc, first_non_void); 3763 3764 state[7] |= S_008F0C_NUM_FORMAT(num_format) | S_008F0C_DATA_FORMAT(data_format); 3765 } 3766} 3767 3768static unsigned gfx9_border_color_swizzle(const unsigned char swizzle[4]) 3769{ 3770 unsigned bc_swizzle = V_008F20_BC_SWIZZLE_XYZW; 3771 3772 if (swizzle[3] == PIPE_SWIZZLE_X) { 3773 /* For the pre-defined border color values (white, opaque 3774 * black, transparent black), the only thing that matters is 3775 * that the alpha channel winds up in the correct place 3776 * (because the RGB channels are all the same) so either of 3777 * these enumerations will work. 3778 */ 3779 if (swizzle[2] == PIPE_SWIZZLE_Y) 3780 bc_swizzle = V_008F20_BC_SWIZZLE_WZYX; 3781 else 3782 bc_swizzle = V_008F20_BC_SWIZZLE_WXYZ; 3783 } else if (swizzle[0] == PIPE_SWIZZLE_X) { 3784 if (swizzle[1] == PIPE_SWIZZLE_Y) 3785 bc_swizzle = V_008F20_BC_SWIZZLE_XYZW; 3786 else 3787 bc_swizzle = V_008F20_BC_SWIZZLE_XWYZ; 3788 } else if (swizzle[1] == PIPE_SWIZZLE_X) { 3789 bc_swizzle = V_008F20_BC_SWIZZLE_YXWZ; 3790 } else if (swizzle[2] == PIPE_SWIZZLE_X) { 3791 bc_swizzle = V_008F20_BC_SWIZZLE_ZYXW; 3792 } 3793 3794 return bc_swizzle; 3795} 3796 3797/** 3798 * Build the sampler view descriptor for a texture. 3799 */ 3800static void gfx10_make_texture_descriptor( 3801 struct si_screen *screen, struct si_texture *tex, bool sampler, enum pipe_texture_target target, 3802 enum pipe_format pipe_format, const unsigned char state_swizzle[4], unsigned first_level, 3803 unsigned last_level, unsigned first_layer, unsigned last_layer, unsigned width, unsigned height, 3804 unsigned depth, uint32_t *state, uint32_t *fmask_state) 3805{ 3806 struct pipe_resource *res = &tex->buffer.b.b; 3807 const struct util_format_description *desc; 3808 unsigned img_format; 3809 unsigned char swizzle[4]; 3810 unsigned type; 3811 uint64_t va; 3812 3813 desc = util_format_description(pipe_format); 3814 img_format = gfx10_format_table[pipe_format].img_format; 3815 3816 if (desc->colorspace == UTIL_FORMAT_COLORSPACE_ZS) { 3817 const unsigned char swizzle_xxxx[4] = {0, 0, 0, 0}; 3818 const unsigned char swizzle_yyyy[4] = {1, 1, 1, 1}; 3819 const unsigned char swizzle_wwww[4] = {3, 3, 3, 3}; 3820 bool is_stencil = false; 3821 3822 switch (pipe_format) { 3823 case PIPE_FORMAT_S8_UINT_Z24_UNORM: 3824 case PIPE_FORMAT_X32_S8X24_UINT: 3825 case PIPE_FORMAT_X8Z24_UNORM: 3826 util_format_compose_swizzles(swizzle_yyyy, state_swizzle, swizzle); 3827 is_stencil = true; 3828 break; 3829 case PIPE_FORMAT_X24S8_UINT: 3830 /* 3831 * X24S8 is implemented as an 8_8_8_8 data format, to 3832 * fix texture gathers. This affects at least 3833 * GL45-CTS.texture_cube_map_array.sampling on GFX8. 3834 */ 3835 util_format_compose_swizzles(swizzle_wwww, state_swizzle, swizzle); 3836 is_stencil = true; 3837 break; 3838 default: 3839 util_format_compose_swizzles(swizzle_xxxx, state_swizzle, swizzle); 3840 is_stencil = pipe_format == PIPE_FORMAT_S8_UINT; 3841 } 3842 3843 if (tex->upgraded_depth && !is_stencil) { 3844 assert(img_format == V_008F0C_GFX10_FORMAT_32_FLOAT); 3845 img_format = V_008F0C_GFX10_FORMAT_32_FLOAT_CLAMP; 3846 } 3847 } else { 3848 util_format_compose_swizzles(desc->swizzle, state_swizzle, swizzle); 3849 } 3850 3851 if (!sampler && (res->target == PIPE_TEXTURE_CUBE || res->target == PIPE_TEXTURE_CUBE_ARRAY)) { 3852 /* For the purpose of shader images, treat cube maps as 2D 3853 * arrays. 3854 */ 3855 type = V_008F1C_SQ_RSRC_IMG_2D_ARRAY; 3856 } else { 3857 type = si_tex_dim(screen, tex, target, res->nr_samples); 3858 } 3859 3860 if (type == V_008F1C_SQ_RSRC_IMG_1D_ARRAY) { 3861 height = 1; 3862 depth = res->array_size; 3863 } else if (type == V_008F1C_SQ_RSRC_IMG_2D_ARRAY || type == V_008F1C_SQ_RSRC_IMG_2D_MSAA_ARRAY) { 3864 if (sampler || res->target != PIPE_TEXTURE_3D) 3865 depth = res->array_size; 3866 } else if (type == V_008F1C_SQ_RSRC_IMG_CUBE) 3867 depth = res->array_size / 6; 3868 3869 state[0] = 0; 3870 state[1] = S_00A004_FORMAT(img_format) | S_00A004_WIDTH_LO(width - 1); 3871 state[2] = S_00A008_WIDTH_HI((width - 1) >> 2) | S_00A008_HEIGHT(height - 1) | 3872 S_00A008_RESOURCE_LEVEL(1); 3873 state[3] = 3874 S_00A00C_DST_SEL_X(si_map_swizzle(swizzle[0])) | 3875 S_00A00C_DST_SEL_Y(si_map_swizzle(swizzle[1])) | 3876 S_00A00C_DST_SEL_Z(si_map_swizzle(swizzle[2])) | 3877 S_00A00C_DST_SEL_W(si_map_swizzle(swizzle[3])) | 3878 S_00A00C_BASE_LEVEL(res->nr_samples > 1 ? 0 : first_level) | 3879 S_00A00C_LAST_LEVEL(res->nr_samples > 1 ? util_logbase2(res->nr_samples) : last_level) | 3880 S_00A00C_BC_SWIZZLE(gfx9_border_color_swizzle(desc->swizzle)) | S_00A00C_TYPE(type); 3881 /* Depth is the the last accessible layer on gfx9+. The hw doesn't need 3882 * to know the total number of layers. 3883 */ 3884 state[4] = 3885 S_00A010_DEPTH((type == V_008F1C_SQ_RSRC_IMG_3D && sampler) ? depth - 1 : last_layer) | 3886 S_00A010_BASE_ARRAY(first_layer); 3887 state[5] = S_00A014_ARRAY_PITCH(!!(type == V_008F1C_SQ_RSRC_IMG_3D && !sampler)) | 3888 S_00A014_MAX_MIP(res->nr_samples > 1 ? util_logbase2(res->nr_samples) 3889 : tex->buffer.b.b.last_level) | 3890 S_00A014_PERF_MOD(4); 3891 state[6] = 0; 3892 state[7] = 0; 3893 3894 if (vi_dcc_enabled(tex, first_level)) { 3895 state[6] |= S_00A018_MAX_UNCOMPRESSED_BLOCK_SIZE(V_028C78_MAX_BLOCK_SIZE_256B) | 3896 S_00A018_MAX_COMPRESSED_BLOCK_SIZE(tex->surface.u.gfx9.color.dcc.max_compressed_block_size) | 3897 S_00A018_ALPHA_IS_ON_MSB(vi_alpha_is_on_msb(screen, pipe_format)); 3898 } 3899 3900 /* Initialize the sampler view for FMASK. */ 3901 if (tex->surface.fmask_offset) { 3902 uint32_t format; 3903 3904 va = tex->buffer.gpu_address + tex->surface.fmask_offset; 3905 3906#define FMASK(s, f) (((unsigned)(MAX2(1, s)) * 16) + (MAX2(1, f))) 3907 switch (FMASK(res->nr_samples, res->nr_storage_samples)) { 3908 case FMASK(2, 1): 3909 format = V_008F0C_GFX10_FORMAT_FMASK8_S2_F1; 3910 break; 3911 case FMASK(2, 2): 3912 format = V_008F0C_GFX10_FORMAT_FMASK8_S2_F2; 3913 break; 3914 case FMASK(4, 1): 3915 format = V_008F0C_GFX10_FORMAT_FMASK8_S4_F1; 3916 break; 3917 case FMASK(4, 2): 3918 format = V_008F0C_GFX10_FORMAT_FMASK8_S4_F2; 3919 break; 3920 case FMASK(4, 4): 3921 format = V_008F0C_GFX10_FORMAT_FMASK8_S4_F4; 3922 break; 3923 case FMASK(8, 1): 3924 format = V_008F0C_GFX10_FORMAT_FMASK8_S8_F1; 3925 break; 3926 case FMASK(8, 2): 3927 format = V_008F0C_GFX10_FORMAT_FMASK16_S8_F2; 3928 break; 3929 case FMASK(8, 4): 3930 format = V_008F0C_GFX10_FORMAT_FMASK32_S8_F4; 3931 break; 3932 case FMASK(8, 8): 3933 format = V_008F0C_GFX10_FORMAT_FMASK32_S8_F8; 3934 break; 3935 case FMASK(16, 1): 3936 format = V_008F0C_GFX10_FORMAT_FMASK16_S16_F1; 3937 break; 3938 case FMASK(16, 2): 3939 format = V_008F0C_GFX10_FORMAT_FMASK32_S16_F2; 3940 break; 3941 case FMASK(16, 4): 3942 format = V_008F0C_GFX10_FORMAT_FMASK64_S16_F4; 3943 break; 3944 case FMASK(16, 8): 3945 format = V_008F0C_GFX10_FORMAT_FMASK64_S16_F8; 3946 break; 3947 default: 3948 unreachable("invalid nr_samples"); 3949 } 3950#undef FMASK 3951 fmask_state[0] = (va >> 8) | tex->surface.fmask_tile_swizzle; 3952 fmask_state[1] = S_00A004_BASE_ADDRESS_HI(va >> 40) | S_00A004_FORMAT(format) | 3953 S_00A004_WIDTH_LO(width - 1); 3954 fmask_state[2] = S_00A008_WIDTH_HI((width - 1) >> 2) | S_00A008_HEIGHT(height - 1) | 3955 S_00A008_RESOURCE_LEVEL(1); 3956 fmask_state[3] = 3957 S_00A00C_DST_SEL_X(V_008F1C_SQ_SEL_X) | S_00A00C_DST_SEL_Y(V_008F1C_SQ_SEL_X) | 3958 S_00A00C_DST_SEL_Z(V_008F1C_SQ_SEL_X) | S_00A00C_DST_SEL_W(V_008F1C_SQ_SEL_X) | 3959 S_00A00C_SW_MODE(tex->surface.u.gfx9.color.fmask_swizzle_mode) | 3960 S_00A00C_TYPE(si_tex_dim(screen, tex, target, 0)); 3961 fmask_state[4] = S_00A010_DEPTH(last_layer) | S_00A010_BASE_ARRAY(first_layer); 3962 fmask_state[5] = 0; 3963 fmask_state[6] = S_00A018_META_PIPE_ALIGNED(1); 3964 fmask_state[7] = 0; 3965 } 3966} 3967 3968/** 3969 * Build the sampler view descriptor for a texture (SI-GFX9). 3970 */ 3971static void si_make_texture_descriptor(struct si_screen *screen, struct si_texture *tex, 3972 bool sampler, enum pipe_texture_target target, 3973 enum pipe_format pipe_format, 3974 const unsigned char state_swizzle[4], unsigned first_level, 3975 unsigned last_level, unsigned first_layer, 3976 unsigned last_layer, unsigned width, unsigned height, 3977 unsigned depth, uint32_t *state, uint32_t *fmask_state) 3978{ 3979 struct pipe_resource *res = &tex->buffer.b.b; 3980 const struct util_format_description *desc; 3981 unsigned char swizzle[4]; 3982 int first_non_void; 3983 unsigned num_format, data_format, type, num_samples; 3984 uint64_t va; 3985 3986 desc = util_format_description(pipe_format); 3987 3988 num_samples = desc->colorspace == UTIL_FORMAT_COLORSPACE_ZS ? MAX2(1, res->nr_samples) 3989 : MAX2(1, res->nr_storage_samples); 3990 3991 if (desc->colorspace == UTIL_FORMAT_COLORSPACE_ZS) { 3992 const unsigned char swizzle_xxxx[4] = {0, 0, 0, 0}; 3993 const unsigned char swizzle_yyyy[4] = {1, 1, 1, 1}; 3994 const unsigned char swizzle_wwww[4] = {3, 3, 3, 3}; 3995 3996 switch (pipe_format) { 3997 case PIPE_FORMAT_S8_UINT_Z24_UNORM: 3998 case PIPE_FORMAT_X32_S8X24_UINT: 3999 case PIPE_FORMAT_X8Z24_UNORM: 4000 util_format_compose_swizzles(swizzle_yyyy, state_swizzle, swizzle); 4001 break; 4002 case PIPE_FORMAT_X24S8_UINT: 4003 /* 4004 * X24S8 is implemented as an 8_8_8_8 data format, to 4005 * fix texture gathers. This affects at least 4006 * GL45-CTS.texture_cube_map_array.sampling on GFX8. 4007 */ 4008 if (screen->info.chip_class <= GFX8) 4009 util_format_compose_swizzles(swizzle_wwww, state_swizzle, swizzle); 4010 else 4011 util_format_compose_swizzles(swizzle_yyyy, state_swizzle, swizzle); 4012 break; 4013 default: 4014 util_format_compose_swizzles(swizzle_xxxx, state_swizzle, swizzle); 4015 } 4016 } else { 4017 util_format_compose_swizzles(desc->swizzle, state_swizzle, swizzle); 4018 } 4019 4020 first_non_void = util_format_get_first_non_void_channel(pipe_format); 4021 4022 switch (pipe_format) { 4023 case PIPE_FORMAT_S8_UINT_Z24_UNORM: 4024 num_format = V_008F14_IMG_NUM_FORMAT_UNORM; 4025 break; 4026 default: 4027 if (first_non_void < 0) { 4028 if (util_format_is_compressed(pipe_format)) { 4029 switch (pipe_format) { 4030 case PIPE_FORMAT_DXT1_SRGB: 4031 case PIPE_FORMAT_DXT1_SRGBA: 4032 case PIPE_FORMAT_DXT3_SRGBA: 4033 case PIPE_FORMAT_DXT5_SRGBA: 4034 case PIPE_FORMAT_BPTC_SRGBA: 4035 case PIPE_FORMAT_ETC2_SRGB8: 4036 case PIPE_FORMAT_ETC2_SRGB8A1: 4037 case PIPE_FORMAT_ETC2_SRGBA8: 4038 num_format = V_008F14_IMG_NUM_FORMAT_SRGB; 4039 break; 4040 case PIPE_FORMAT_RGTC1_SNORM: 4041 case PIPE_FORMAT_LATC1_SNORM: 4042 case PIPE_FORMAT_RGTC2_SNORM: 4043 case PIPE_FORMAT_LATC2_SNORM: 4044 case PIPE_FORMAT_ETC2_R11_SNORM: 4045 case PIPE_FORMAT_ETC2_RG11_SNORM: 4046 /* implies float, so use SNORM/UNORM to determine 4047 whether data is signed or not */ 4048 case PIPE_FORMAT_BPTC_RGB_FLOAT: 4049 num_format = V_008F14_IMG_NUM_FORMAT_SNORM; 4050 break; 4051 default: 4052 num_format = V_008F14_IMG_NUM_FORMAT_UNORM; 4053 break; 4054 } 4055 } else if (desc->layout == UTIL_FORMAT_LAYOUT_SUBSAMPLED) { 4056 num_format = V_008F14_IMG_NUM_FORMAT_UNORM; 4057 } else { 4058 num_format = V_008F14_IMG_NUM_FORMAT_FLOAT; 4059 } 4060 } else if (desc->colorspace == UTIL_FORMAT_COLORSPACE_SRGB) { 4061 num_format = V_008F14_IMG_NUM_FORMAT_SRGB; 4062 } else { 4063 num_format = V_008F14_IMG_NUM_FORMAT_UNORM; 4064 4065 switch (desc->channel[first_non_void].type) { 4066 case UTIL_FORMAT_TYPE_FLOAT: 4067 num_format = V_008F14_IMG_NUM_FORMAT_FLOAT; 4068 break; 4069 case UTIL_FORMAT_TYPE_SIGNED: 4070 if (desc->channel[first_non_void].normalized) 4071 num_format = V_008F14_IMG_NUM_FORMAT_SNORM; 4072 else if (desc->channel[first_non_void].pure_integer) 4073 num_format = V_008F14_IMG_NUM_FORMAT_SINT; 4074 else 4075 num_format = V_008F14_IMG_NUM_FORMAT_SSCALED; 4076 break; 4077 case UTIL_FORMAT_TYPE_UNSIGNED: 4078 if (desc->channel[first_non_void].normalized) 4079 num_format = V_008F14_IMG_NUM_FORMAT_UNORM; 4080 else if (desc->channel[first_non_void].pure_integer) 4081 num_format = V_008F14_IMG_NUM_FORMAT_UINT; 4082 else 4083 num_format = V_008F14_IMG_NUM_FORMAT_USCALED; 4084 } 4085 } 4086 } 4087 4088 data_format = si_translate_texformat(&screen->b, pipe_format, desc, first_non_void); 4089 if (data_format == ~0) { 4090 data_format = 0; 4091 } 4092 4093 /* S8 with Z32 HTILE needs a special format. */ 4094 if (screen->info.chip_class == GFX9 && pipe_format == PIPE_FORMAT_S8_UINT) 4095 data_format = V_008F14_IMG_DATA_FORMAT_S8_32; 4096 4097 if (!sampler && (res->target == PIPE_TEXTURE_CUBE || res->target == PIPE_TEXTURE_CUBE_ARRAY || 4098 (screen->info.chip_class <= GFX8 && res->target == PIPE_TEXTURE_3D))) { 4099 /* For the purpose of shader images, treat cube maps and 3D 4100 * textures as 2D arrays. For 3D textures, the address 4101 * calculations for mipmaps are different, so we rely on the 4102 * caller to effectively disable mipmaps. 4103 */ 4104 type = V_008F1C_SQ_RSRC_IMG_2D_ARRAY; 4105 4106 assert(res->target != PIPE_TEXTURE_3D || (first_level == 0 && last_level == 0)); 4107 } else { 4108 type = si_tex_dim(screen, tex, target, num_samples); 4109 } 4110 4111 if (type == V_008F1C_SQ_RSRC_IMG_1D_ARRAY) { 4112 height = 1; 4113 depth = res->array_size; 4114 } else if (type == V_008F1C_SQ_RSRC_IMG_2D_ARRAY || type == V_008F1C_SQ_RSRC_IMG_2D_MSAA_ARRAY) { 4115 if (sampler || res->target != PIPE_TEXTURE_3D) 4116 depth = res->array_size; 4117 } else if (type == V_008F1C_SQ_RSRC_IMG_CUBE) 4118 depth = res->array_size / 6; 4119 4120 state[0] = 0; 4121 state[1] = (S_008F14_DATA_FORMAT(data_format) | S_008F14_NUM_FORMAT(num_format)); 4122 state[2] = (S_008F18_WIDTH(width - 1) | S_008F18_HEIGHT(height - 1) | S_008F18_PERF_MOD(4)); 4123 state[3] = (S_008F1C_DST_SEL_X(si_map_swizzle(swizzle[0])) | 4124 S_008F1C_DST_SEL_Y(si_map_swizzle(swizzle[1])) | 4125 S_008F1C_DST_SEL_Z(si_map_swizzle(swizzle[2])) | 4126 S_008F1C_DST_SEL_W(si_map_swizzle(swizzle[3])) | 4127 S_008F1C_BASE_LEVEL(num_samples > 1 ? 0 : first_level) | 4128 S_008F1C_LAST_LEVEL(num_samples > 1 ? util_logbase2(num_samples) : last_level) | 4129 S_008F1C_TYPE(type)); 4130 state[4] = 0; 4131 state[5] = S_008F24_BASE_ARRAY(first_layer); 4132 state[6] = 0; 4133 state[7] = 0; 4134 4135 if (screen->info.chip_class == GFX9) { 4136 unsigned bc_swizzle = gfx9_border_color_swizzle(desc->swizzle); 4137 4138 /* Depth is the the last accessible layer on Gfx9. 4139 * The hw doesn't need to know the total number of layers. 4140 */ 4141 if (type == V_008F1C_SQ_RSRC_IMG_3D) 4142 state[4] |= S_008F20_DEPTH(depth - 1); 4143 else 4144 state[4] |= S_008F20_DEPTH(last_layer); 4145 4146 state[4] |= S_008F20_BC_SWIZZLE(bc_swizzle); 4147 state[5] |= S_008F24_MAX_MIP(num_samples > 1 ? util_logbase2(num_samples) 4148 : tex->buffer.b.b.last_level); 4149 } else { 4150 state[3] |= S_008F1C_POW2_PAD(res->last_level > 0); 4151 state[4] |= S_008F20_DEPTH(depth - 1); 4152 state[5] |= S_008F24_LAST_ARRAY(last_layer); 4153 } 4154 4155 if (vi_dcc_enabled(tex, first_level)) { 4156 state[6] = S_008F28_ALPHA_IS_ON_MSB(vi_alpha_is_on_msb(screen, pipe_format)); 4157 } else { 4158 /* The last dword is unused by hw. The shader uses it to clear 4159 * bits in the first dword of sampler state. 4160 */ 4161 if (screen->info.chip_class <= GFX7 && res->nr_samples <= 1) { 4162 if (first_level == last_level) 4163 state[7] = C_008F30_MAX_ANISO_RATIO; 4164 else 4165 state[7] = 0xffffffff; 4166 } 4167 } 4168 4169 /* Initialize the sampler view for FMASK. */ 4170 if (tex->surface.fmask_offset) { 4171 uint32_t data_format, num_format; 4172 4173 va = tex->buffer.gpu_address + tex->surface.fmask_offset; 4174 4175#define FMASK(s, f) (((unsigned)(MAX2(1, s)) * 16) + (MAX2(1, f))) 4176 if (screen->info.chip_class == GFX9) { 4177 data_format = V_008F14_IMG_DATA_FORMAT_FMASK; 4178 switch (FMASK(res->nr_samples, res->nr_storage_samples)) { 4179 case FMASK(2, 1): 4180 num_format = V_008F14_IMG_NUM_FORMAT_FMASK_8_2_1; 4181 break; 4182 case FMASK(2, 2): 4183 num_format = V_008F14_IMG_NUM_FORMAT_FMASK_8_2_2; 4184 break; 4185 case FMASK(4, 1): 4186 num_format = V_008F14_IMG_NUM_FORMAT_FMASK_8_4_1; 4187 break; 4188 case FMASK(4, 2): 4189 num_format = V_008F14_IMG_NUM_FORMAT_FMASK_8_4_2; 4190 break; 4191 case FMASK(4, 4): 4192 num_format = V_008F14_IMG_NUM_FORMAT_FMASK_8_4_4; 4193 break; 4194 case FMASK(8, 1): 4195 num_format = V_008F14_IMG_NUM_FORMAT_FMASK_8_8_1; 4196 break; 4197 case FMASK(8, 2): 4198 num_format = V_008F14_IMG_NUM_FORMAT_FMASK_16_8_2; 4199 break; 4200 case FMASK(8, 4): 4201 num_format = V_008F14_IMG_NUM_FORMAT_FMASK_32_8_4; 4202 break; 4203 case FMASK(8, 8): 4204 num_format = V_008F14_IMG_NUM_FORMAT_FMASK_32_8_8; 4205 break; 4206 case FMASK(16, 1): 4207 num_format = V_008F14_IMG_NUM_FORMAT_FMASK_16_16_1; 4208 break; 4209 case FMASK(16, 2): 4210 num_format = V_008F14_IMG_NUM_FORMAT_FMASK_32_16_2; 4211 break; 4212 case FMASK(16, 4): 4213 num_format = V_008F14_IMG_NUM_FORMAT_FMASK_64_16_4; 4214 break; 4215 case FMASK(16, 8): 4216 num_format = V_008F14_IMG_NUM_FORMAT_FMASK_64_16_8; 4217 break; 4218 default: 4219 unreachable("invalid nr_samples"); 4220 } 4221 } else { 4222 switch (FMASK(res->nr_samples, res->nr_storage_samples)) { 4223 case FMASK(2, 1): 4224 data_format = V_008F14_IMG_DATA_FORMAT_FMASK8_S2_F1; 4225 break; 4226 case FMASK(2, 2): 4227 data_format = V_008F14_IMG_DATA_FORMAT_FMASK8_S2_F2; 4228 break; 4229 case FMASK(4, 1): 4230 data_format = V_008F14_IMG_DATA_FORMAT_FMASK8_S4_F1; 4231 break; 4232 case FMASK(4, 2): 4233 data_format = V_008F14_IMG_DATA_FORMAT_FMASK8_S4_F2; 4234 break; 4235 case FMASK(4, 4): 4236 data_format = V_008F14_IMG_DATA_FORMAT_FMASK8_S4_F4; 4237 break; 4238 case FMASK(8, 1): 4239 data_format = V_008F14_IMG_DATA_FORMAT_FMASK8_S8_F1; 4240 break; 4241 case FMASK(8, 2): 4242 data_format = V_008F14_IMG_DATA_FORMAT_FMASK16_S8_F2; 4243 break; 4244 case FMASK(8, 4): 4245 data_format = V_008F14_IMG_DATA_FORMAT_FMASK32_S8_F4; 4246 break; 4247 case FMASK(8, 8): 4248 data_format = V_008F14_IMG_DATA_FORMAT_FMASK32_S8_F8; 4249 break; 4250 case FMASK(16, 1): 4251 data_format = V_008F14_IMG_DATA_FORMAT_FMASK16_S16_F1; 4252 break; 4253 case FMASK(16, 2): 4254 data_format = V_008F14_IMG_DATA_FORMAT_FMASK32_S16_F2; 4255 break; 4256 case FMASK(16, 4): 4257 data_format = V_008F14_IMG_DATA_FORMAT_FMASK64_S16_F4; 4258 break; 4259 case FMASK(16, 8): 4260 data_format = V_008F14_IMG_DATA_FORMAT_FMASK64_S16_F8; 4261 break; 4262 default: 4263 unreachable("invalid nr_samples"); 4264 } 4265 num_format = V_008F14_IMG_NUM_FORMAT_UINT; 4266 } 4267#undef FMASK 4268 4269 fmask_state[0] = (va >> 8) | tex->surface.fmask_tile_swizzle; 4270 fmask_state[1] = S_008F14_BASE_ADDRESS_HI(va >> 40) | S_008F14_DATA_FORMAT(data_format) | 4271 S_008F14_NUM_FORMAT(num_format); 4272 fmask_state[2] = S_008F18_WIDTH(width - 1) | S_008F18_HEIGHT(height - 1); 4273 fmask_state[3] = 4274 S_008F1C_DST_SEL_X(V_008F1C_SQ_SEL_X) | S_008F1C_DST_SEL_Y(V_008F1C_SQ_SEL_X) | 4275 S_008F1C_DST_SEL_Z(V_008F1C_SQ_SEL_X) | S_008F1C_DST_SEL_W(V_008F1C_SQ_SEL_X) | 4276 S_008F1C_TYPE(si_tex_dim(screen, tex, target, 0)); 4277 fmask_state[4] = 0; 4278 fmask_state[5] = S_008F24_BASE_ARRAY(first_layer); 4279 fmask_state[6] = 0; 4280 fmask_state[7] = 0; 4281 4282 if (screen->info.chip_class == GFX9) { 4283 fmask_state[3] |= S_008F1C_SW_MODE(tex->surface.u.gfx9.color.fmask_swizzle_mode); 4284 fmask_state[4] |= 4285 S_008F20_DEPTH(last_layer) | S_008F20_PITCH(tex->surface.u.gfx9.color.fmask_epitch); 4286 fmask_state[5] |= S_008F24_META_PIPE_ALIGNED(1) | 4287 S_008F24_META_RB_ALIGNED(1); 4288 } else { 4289 fmask_state[3] |= S_008F1C_TILING_INDEX(tex->surface.u.legacy.color.fmask.tiling_index); 4290 fmask_state[4] |= S_008F20_DEPTH(depth - 1) | 4291 S_008F20_PITCH(tex->surface.u.legacy.color.fmask.pitch_in_pixels - 1); 4292 fmask_state[5] |= S_008F24_LAST_ARRAY(last_layer); 4293 } 4294 } 4295} 4296 4297/** 4298 * Create a sampler view. 4299 * 4300 * @param ctx context 4301 * @param texture texture 4302 * @param state sampler view template 4303 * @param width0 width0 override (for compressed textures as int) 4304 * @param height0 height0 override (for compressed textures as int) 4305 * @param force_level set the base address to the level (for compressed textures) 4306 */ 4307struct pipe_sampler_view *si_create_sampler_view_custom(struct pipe_context *ctx, 4308 struct pipe_resource *texture, 4309 const struct pipe_sampler_view *state, 4310 unsigned width0, unsigned height0, 4311 unsigned force_level) 4312{ 4313 struct si_context *sctx = (struct si_context *)ctx; 4314 struct si_sampler_view *view = CALLOC_STRUCT_CL(si_sampler_view); 4315 struct si_texture *tex = (struct si_texture *)texture; 4316 unsigned base_level, first_level, last_level; 4317 unsigned char state_swizzle[4]; 4318 unsigned height, depth, width; 4319 unsigned last_layer = state->u.tex.last_layer; 4320 enum pipe_format pipe_format; 4321 const struct legacy_surf_level *surflevel; 4322 4323 if (!view) 4324 return NULL; 4325 4326 /* initialize base object */ 4327 view->base = *state; 4328 view->base.texture = NULL; 4329 view->base.reference.count = 1; 4330 view->base.context = ctx; 4331 4332 assert(texture); 4333 pipe_resource_reference(&view->base.texture, texture); 4334 4335 if (state->format == PIPE_FORMAT_X24S8_UINT || state->format == PIPE_FORMAT_S8X24_UINT || 4336 state->format == PIPE_FORMAT_X32_S8X24_UINT || state->format == PIPE_FORMAT_S8_UINT) 4337 view->is_stencil_sampler = true; 4338 4339 /* Buffer resource. */ 4340 if (texture->target == PIPE_BUFFER) { 4341 si_make_buffer_descriptor(sctx->screen, si_resource(texture), state->format, 4342 state->u.buf.offset, state->u.buf.size, view->state); 4343 return &view->base; 4344 } 4345 4346 state_swizzle[0] = state->swizzle_r; 4347 state_swizzle[1] = state->swizzle_g; 4348 state_swizzle[2] = state->swizzle_b; 4349 state_swizzle[3] = state->swizzle_a; 4350 4351 base_level = 0; 4352 first_level = state->u.tex.first_level; 4353 last_level = state->u.tex.last_level; 4354 width = width0; 4355 height = height0; 4356 depth = texture->depth0; 4357 4358 if (sctx->chip_class <= GFX8 && force_level) { 4359 assert(force_level == first_level && force_level == last_level); 4360 base_level = force_level; 4361 first_level = 0; 4362 last_level = 0; 4363 width = u_minify(width, force_level); 4364 height = u_minify(height, force_level); 4365 depth = u_minify(depth, force_level); 4366 } 4367 4368 /* This is not needed if gallium frontends set last_layer correctly. */ 4369 if (state->target == PIPE_TEXTURE_1D || state->target == PIPE_TEXTURE_2D || 4370 state->target == PIPE_TEXTURE_RECT || state->target == PIPE_TEXTURE_CUBE) 4371 last_layer = state->u.tex.first_layer; 4372 4373 /* Texturing with separate depth and stencil. */ 4374 pipe_format = state->format; 4375 4376 /* Depth/stencil texturing sometimes needs separate texture. */ 4377 if (tex->is_depth && !si_can_sample_zs(tex, view->is_stencil_sampler)) { 4378 if (!tex->flushed_depth_texture && !si_init_flushed_depth_texture(ctx, texture)) { 4379 pipe_resource_reference(&view->base.texture, NULL); 4380 FREE(view); 4381 return NULL; 4382 } 4383 4384 assert(tex->flushed_depth_texture); 4385 4386 /* Override format for the case where the flushed texture 4387 * contains only Z or only S. 4388 */ 4389 if (tex->flushed_depth_texture->buffer.b.b.format != tex->buffer.b.b.format) 4390 pipe_format = tex->flushed_depth_texture->buffer.b.b.format; 4391 4392 tex = tex->flushed_depth_texture; 4393 } 4394 4395 surflevel = tex->surface.u.legacy.level; 4396 4397 if (tex->db_compatible) { 4398 if (!view->is_stencil_sampler) 4399 pipe_format = tex->db_render_format; 4400 4401 switch (pipe_format) { 4402 case PIPE_FORMAT_Z32_FLOAT_S8X24_UINT: 4403 pipe_format = PIPE_FORMAT_Z32_FLOAT; 4404 break; 4405 case PIPE_FORMAT_X8Z24_UNORM: 4406 case PIPE_FORMAT_S8_UINT_Z24_UNORM: 4407 /* Z24 is always stored like this for DB 4408 * compatibility. 4409 */ 4410 pipe_format = PIPE_FORMAT_Z24X8_UNORM; 4411 break; 4412 case PIPE_FORMAT_X24S8_UINT: 4413 case PIPE_FORMAT_S8X24_UINT: 4414 case PIPE_FORMAT_X32_S8X24_UINT: 4415 pipe_format = PIPE_FORMAT_S8_UINT; 4416 surflevel = tex->surface.u.legacy.zs.stencil_level; 4417 break; 4418 default:; 4419 } 4420 } 4421 4422 view->dcc_incompatible = 4423 vi_dcc_formats_are_incompatible(texture, state->u.tex.first_level, state->format); 4424 4425 sctx->screen->make_texture_descriptor( 4426 sctx->screen, tex, true, state->target, pipe_format, state_swizzle, first_level, last_level, 4427 state->u.tex.first_layer, last_layer, width, height, depth, view->state, view->fmask_state); 4428 4429 view->base_level_info = &surflevel[base_level]; 4430 view->base_level = base_level; 4431 view->block_width = util_format_get_blockwidth(pipe_format); 4432 return &view->base; 4433} 4434 4435static struct pipe_sampler_view *si_create_sampler_view(struct pipe_context *ctx, 4436 struct pipe_resource *texture, 4437 const struct pipe_sampler_view *state) 4438{ 4439 return si_create_sampler_view_custom(ctx, texture, state, texture ? texture->width0 : 0, 4440 texture ? texture->height0 : 0, 0); 4441} 4442 4443static void si_sampler_view_destroy(struct pipe_context *ctx, struct pipe_sampler_view *state) 4444{ 4445 struct si_sampler_view *view = (struct si_sampler_view *)state; 4446 4447 pipe_resource_reference(&state->texture, NULL); 4448 FREE_CL(view); 4449} 4450 4451static bool wrap_mode_uses_border_color(unsigned wrap, bool linear_filter) 4452{ 4453 return wrap == PIPE_TEX_WRAP_CLAMP_TO_BORDER || wrap == PIPE_TEX_WRAP_MIRROR_CLAMP_TO_BORDER || 4454 (linear_filter && (wrap == PIPE_TEX_WRAP_CLAMP || wrap == PIPE_TEX_WRAP_MIRROR_CLAMP)); 4455} 4456 4457static uint32_t si_translate_border_color(struct si_context *sctx, 4458 const struct pipe_sampler_state *state, 4459 const union pipe_color_union *color, bool is_integer) 4460{ 4461 bool linear_filter = state->min_img_filter != PIPE_TEX_FILTER_NEAREST || 4462 state->mag_img_filter != PIPE_TEX_FILTER_NEAREST; 4463 4464 if (!wrap_mode_uses_border_color(state->wrap_s, linear_filter) && 4465 !wrap_mode_uses_border_color(state->wrap_t, linear_filter) && 4466 !wrap_mode_uses_border_color(state->wrap_r, linear_filter)) 4467 return S_008F3C_BORDER_COLOR_TYPE(V_008F3C_SQ_TEX_BORDER_COLOR_TRANS_BLACK); 4468 4469#define simple_border_types(elt) \ 4470 do { \ 4471 if (color->elt[0] == 0 && color->elt[1] == 0 && color->elt[2] == 0 && color->elt[3] == 0) \ 4472 return S_008F3C_BORDER_COLOR_TYPE(V_008F3C_SQ_TEX_BORDER_COLOR_TRANS_BLACK); \ 4473 if (color->elt[0] == 0 && color->elt[1] == 0 && color->elt[2] == 0 && color->elt[3] == 1) \ 4474 return S_008F3C_BORDER_COLOR_TYPE(V_008F3C_SQ_TEX_BORDER_COLOR_OPAQUE_BLACK); \ 4475 if (color->elt[0] == 1 && color->elt[1] == 1 && color->elt[2] == 1 && color->elt[3] == 1) \ 4476 return S_008F3C_BORDER_COLOR_TYPE(V_008F3C_SQ_TEX_BORDER_COLOR_OPAQUE_WHITE); \ 4477 } while (false) 4478 4479 if (is_integer) 4480 simple_border_types(ui); 4481 else 4482 simple_border_types(f); 4483 4484#undef simple_border_types 4485 4486 int i; 4487 4488 /* Check if the border has been uploaded already. */ 4489 for (i = 0; i < sctx->border_color_count; i++) 4490 if (memcmp(&sctx->border_color_table[i], color, sizeof(*color)) == 0) 4491 break; 4492 4493 if (i >= SI_MAX_BORDER_COLORS) { 4494 /* Getting 4096 unique border colors is very unlikely. */ 4495 static bool printed; 4496 if (!printed) { 4497 fprintf(stderr, "radeonsi: The border color table is full. " 4498 "Any new border colors will be just black. " 4499 "This is a hardware limitation.\n"); 4500 printed = true; 4501 } 4502 return S_008F3C_BORDER_COLOR_TYPE(V_008F3C_SQ_TEX_BORDER_COLOR_TRANS_BLACK); 4503 } 4504 4505 if (i == sctx->border_color_count) { 4506 /* Upload a new border color. */ 4507 memcpy(&sctx->border_color_table[i], color, sizeof(*color)); 4508 util_memcpy_cpu_to_le32(&sctx->border_color_map[i], color, sizeof(*color)); 4509 sctx->border_color_count++; 4510 } 4511 4512 return S_008F3C_BORDER_COLOR_PTR(i) | 4513 S_008F3C_BORDER_COLOR_TYPE(V_008F3C_SQ_TEX_BORDER_COLOR_REGISTER); 4514} 4515 4516static inline int S_FIXED(float value, unsigned frac_bits) 4517{ 4518 return value * (1 << frac_bits); 4519} 4520 4521static inline unsigned si_tex_filter(unsigned filter, unsigned max_aniso) 4522{ 4523 if (filter == PIPE_TEX_FILTER_LINEAR) 4524 return max_aniso > 1 ? V_008F38_SQ_TEX_XY_FILTER_ANISO_BILINEAR 4525 : V_008F38_SQ_TEX_XY_FILTER_BILINEAR; 4526 else 4527 return max_aniso > 1 ? V_008F38_SQ_TEX_XY_FILTER_ANISO_POINT 4528 : V_008F38_SQ_TEX_XY_FILTER_POINT; 4529} 4530 4531static inline unsigned si_tex_aniso_filter(unsigned filter) 4532{ 4533 if (filter < 2) 4534 return 0; 4535 if (filter < 4) 4536 return 1; 4537 if (filter < 8) 4538 return 2; 4539 if (filter < 16) 4540 return 3; 4541 return 4; 4542} 4543 4544static void *si_create_sampler_state(struct pipe_context *ctx, 4545 const struct pipe_sampler_state *state) 4546{ 4547 struct si_context *sctx = (struct si_context *)ctx; 4548 struct si_screen *sscreen = sctx->screen; 4549 struct si_sampler_state *rstate = CALLOC_STRUCT(si_sampler_state); 4550 unsigned max_aniso = sscreen->force_aniso >= 0 ? sscreen->force_aniso : state->max_anisotropy; 4551 unsigned max_aniso_ratio = si_tex_aniso_filter(max_aniso); 4552 bool trunc_coord = state->min_img_filter == PIPE_TEX_FILTER_NEAREST && 4553 state->mag_img_filter == PIPE_TEX_FILTER_NEAREST && 4554 state->compare_mode == PIPE_TEX_COMPARE_NONE; 4555 union pipe_color_union clamped_border_color; 4556 4557 if (!rstate) { 4558 return NULL; 4559 } 4560 4561 /* Validate inputs. */ 4562 if (!is_wrap_mode_legal(sscreen, state->wrap_s) || 4563 !is_wrap_mode_legal(sscreen, state->wrap_t) || 4564 !is_wrap_mode_legal(sscreen, state->wrap_r) || 4565 (!sscreen->info.has_3d_cube_border_color_mipmap && 4566 (state->min_mip_filter != PIPE_TEX_MIPFILTER_NONE || 4567 state->max_anisotropy > 0))) { 4568 assert(0); 4569 return NULL; 4570 } 4571 4572#ifndef NDEBUG 4573 rstate->magic = SI_SAMPLER_STATE_MAGIC; 4574#endif 4575 rstate->val[0] = 4576 (S_008F30_CLAMP_X(si_tex_wrap(state->wrap_s)) | S_008F30_CLAMP_Y(si_tex_wrap(state->wrap_t)) | 4577 S_008F30_CLAMP_Z(si_tex_wrap(state->wrap_r)) | S_008F30_MAX_ANISO_RATIO(max_aniso_ratio) | 4578 S_008F30_DEPTH_COMPARE_FUNC(si_tex_compare(state->compare_func)) | 4579 S_008F30_FORCE_UNNORMALIZED(!state->normalized_coords) | 4580 S_008F30_ANISO_THRESHOLD(max_aniso_ratio >> 1) | S_008F30_ANISO_BIAS(max_aniso_ratio) | 4581 S_008F30_DISABLE_CUBE_WRAP(!state->seamless_cube_map) | 4582 S_008F30_TRUNC_COORD(trunc_coord) | 4583 S_008F30_COMPAT_MODE(sctx->chip_class == GFX8 || sctx->chip_class == GFX9)); 4584 rstate->val[1] = (S_008F34_MIN_LOD(S_FIXED(CLAMP(state->min_lod, 0, 15), 8)) | 4585 S_008F34_MAX_LOD(S_FIXED(CLAMP(state->max_lod, 0, 15), 8)) | 4586 S_008F34_PERF_MIP(max_aniso_ratio ? max_aniso_ratio + 6 : 0)); 4587 rstate->val[2] = (S_008F38_LOD_BIAS(S_FIXED(CLAMP(state->lod_bias, -16, 16), 8)) | 4588 S_008F38_XY_MAG_FILTER(si_tex_filter(state->mag_img_filter, max_aniso)) | 4589 S_008F38_XY_MIN_FILTER(si_tex_filter(state->min_img_filter, max_aniso)) | 4590 S_008F38_MIP_FILTER(si_tex_mipfilter(state->min_mip_filter)) | 4591 S_008F38_MIP_POINT_PRECLAMP(0)); 4592 rstate->val[3] = si_translate_border_color(sctx, state, &state->border_color, 4593 state->border_color_is_integer); 4594 4595 if (sscreen->info.chip_class >= GFX10) { 4596 rstate->val[2] |= S_008F38_ANISO_OVERRIDE_GFX10(1); 4597 } else { 4598 rstate->val[2] |= S_008F38_DISABLE_LSB_CEIL(sctx->chip_class <= GFX8) | 4599 S_008F38_FILTER_PREC_FIX(1) | 4600 S_008F38_ANISO_OVERRIDE_GFX8(sctx->chip_class >= GFX8); 4601 } 4602 4603 /* Create sampler resource for upgraded depth textures. */ 4604 memcpy(rstate->upgraded_depth_val, rstate->val, sizeof(rstate->val)); 4605 4606 for (unsigned i = 0; i < 4; ++i) { 4607 /* Use channel 0 on purpose, so that we can use OPAQUE_WHITE 4608 * when the border color is 1.0. */ 4609 clamped_border_color.f[i] = CLAMP(state->border_color.f[0], 0, 1); 4610 } 4611 4612 if (memcmp(&state->border_color, &clamped_border_color, sizeof(clamped_border_color)) == 0) { 4613 if (sscreen->info.chip_class <= GFX9) 4614 rstate->upgraded_depth_val[3] |= S_008F3C_UPGRADED_DEPTH(1); 4615 } else { 4616 rstate->upgraded_depth_val[3] = 4617 si_translate_border_color(sctx, state, &clamped_border_color, false); 4618 } 4619 4620 return rstate; 4621} 4622 4623static void si_set_sample_mask(struct pipe_context *ctx, unsigned sample_mask) 4624{ 4625 struct si_context *sctx = (struct si_context *)ctx; 4626 4627 if (sctx->sample_mask == (uint16_t)sample_mask) 4628 return; 4629 4630 sctx->sample_mask = sample_mask; 4631 si_mark_atom_dirty(sctx, &sctx->atoms.s.sample_mask); 4632} 4633 4634static void si_emit_sample_mask(struct si_context *sctx) 4635{ 4636 struct radeon_cmdbuf *cs = &sctx->gfx_cs; 4637 unsigned mask = sctx->sample_mask; 4638 4639 /* Needed for line and polygon smoothing as well as for the Polaris 4640 * small primitive filter. We expect the gallium frontend to take care of 4641 * this for us. 4642 */ 4643 assert(mask == 0xffff || sctx->framebuffer.nr_samples > 1 || 4644 (mask & 1 && sctx->blitter_running)); 4645 4646 radeon_begin(cs); 4647 radeon_set_context_reg_seq(R_028C38_PA_SC_AA_MASK_X0Y0_X1Y0, 2); 4648 radeon_emit(mask | (mask << 16)); 4649 radeon_emit(mask | (mask << 16)); 4650 radeon_end(); 4651} 4652 4653static void si_delete_sampler_state(struct pipe_context *ctx, void *state) 4654{ 4655#ifndef NDEBUG 4656 struct si_sampler_state *s = state; 4657 4658 assert(s->magic == SI_SAMPLER_STATE_MAGIC); 4659 s->magic = 0; 4660#endif 4661 free(state); 4662} 4663 4664/* 4665 * Vertex elements & buffers 4666 */ 4667 4668struct si_fast_udiv_info32 si_compute_fast_udiv_info32(uint32_t D, unsigned num_bits) 4669{ 4670 struct util_fast_udiv_info info = util_compute_fast_udiv_info(D, num_bits, 32); 4671 4672 struct si_fast_udiv_info32 result = { 4673 info.multiplier, 4674 info.pre_shift, 4675 info.post_shift, 4676 info.increment, 4677 }; 4678 return result; 4679} 4680 4681static void *si_create_vertex_elements(struct pipe_context *ctx, unsigned count, 4682 const struct pipe_vertex_element *elements) 4683{ 4684 struct si_screen *sscreen = (struct si_screen *)ctx->screen; 4685 struct si_vertex_elements *v = CALLOC_STRUCT(si_vertex_elements); 4686 bool used[SI_NUM_VERTEX_BUFFERS] = {}; 4687 struct si_fast_udiv_info32 divisor_factors[SI_MAX_ATTRIBS] = {}; 4688 STATIC_ASSERT(sizeof(struct si_fast_udiv_info32) == 16); 4689 STATIC_ASSERT(sizeof(divisor_factors[0].multiplier) == 4); 4690 STATIC_ASSERT(sizeof(divisor_factors[0].pre_shift) == 4); 4691 STATIC_ASSERT(sizeof(divisor_factors[0].post_shift) == 4); 4692 STATIC_ASSERT(sizeof(divisor_factors[0].increment) == 4); 4693 int i; 4694 4695 assert(count <= SI_MAX_ATTRIBS); 4696 if (!v) 4697 return NULL; 4698 4699 v->count = count; 4700 4701 unsigned num_vbos_in_user_sgprs = si_num_vbos_in_user_sgprs(sscreen); 4702 unsigned alloc_count = 4703 count > num_vbos_in_user_sgprs ? count - num_vbos_in_user_sgprs : 0; 4704 v->vb_desc_list_alloc_size = align(alloc_count * 16, SI_CPDMA_ALIGNMENT); 4705 4706 for (i = 0; i < count; ++i) { 4707 const struct util_format_description *desc; 4708 const struct util_format_channel_description *channel; 4709 int first_non_void; 4710 unsigned vbo_index = elements[i].vertex_buffer_index; 4711 4712 if (vbo_index >= SI_NUM_VERTEX_BUFFERS) { 4713 FREE(v); 4714 return NULL; 4715 } 4716 4717 unsigned instance_divisor = elements[i].instance_divisor; 4718 if (instance_divisor) { 4719 if (instance_divisor == 1) { 4720 v->instance_divisor_is_one |= 1u << i; 4721 } else { 4722 v->instance_divisor_is_fetched |= 1u << i; 4723 divisor_factors[i] = si_compute_fast_udiv_info32(instance_divisor, 32); 4724 } 4725 } 4726 4727 if (!used[vbo_index]) { 4728 v->first_vb_use_mask |= 1 << i; 4729 used[vbo_index] = true; 4730 } 4731 4732 desc = util_format_description(elements[i].src_format); 4733 first_non_void = util_format_get_first_non_void_channel(elements[i].src_format); 4734 channel = first_non_void >= 0 ? &desc->channel[first_non_void] : NULL; 4735 4736 v->format_size[i] = desc->block.bits / 8; 4737 v->src_offset[i] = elements[i].src_offset; 4738 v->vertex_buffer_index[i] = vbo_index; 4739 4740 bool always_fix = false; 4741 union si_vs_fix_fetch fix_fetch; 4742 unsigned log_hw_load_size; /* the load element size as seen by the hardware */ 4743 4744 fix_fetch.bits = 0; 4745 log_hw_load_size = MIN2(2, util_logbase2(desc->block.bits) - 3); 4746 4747 if (channel) { 4748 switch (channel->type) { 4749 case UTIL_FORMAT_TYPE_FLOAT: 4750 fix_fetch.u.format = AC_FETCH_FORMAT_FLOAT; 4751 break; 4752 case UTIL_FORMAT_TYPE_FIXED: 4753 fix_fetch.u.format = AC_FETCH_FORMAT_FIXED; 4754 break; 4755 case UTIL_FORMAT_TYPE_SIGNED: { 4756 if (channel->pure_integer) 4757 fix_fetch.u.format = AC_FETCH_FORMAT_SINT; 4758 else if (channel->normalized) 4759 fix_fetch.u.format = AC_FETCH_FORMAT_SNORM; 4760 else 4761 fix_fetch.u.format = AC_FETCH_FORMAT_SSCALED; 4762 break; 4763 } 4764 case UTIL_FORMAT_TYPE_UNSIGNED: { 4765 if (channel->pure_integer) 4766 fix_fetch.u.format = AC_FETCH_FORMAT_UINT; 4767 else if (channel->normalized) 4768 fix_fetch.u.format = AC_FETCH_FORMAT_UNORM; 4769 else 4770 fix_fetch.u.format = AC_FETCH_FORMAT_USCALED; 4771 break; 4772 } 4773 default: 4774 unreachable("bad format type"); 4775 } 4776 } else { 4777 switch (elements[i].src_format) { 4778 case PIPE_FORMAT_R11G11B10_FLOAT: 4779 fix_fetch.u.format = AC_FETCH_FORMAT_FLOAT; 4780 break; 4781 default: 4782 unreachable("bad other format"); 4783 } 4784 } 4785 4786 if (desc->channel[0].size == 10) { 4787 fix_fetch.u.log_size = 3; /* special encoding for 2_10_10_10 */ 4788 log_hw_load_size = 2; 4789 4790 /* The hardware always treats the 2-bit alpha channel as 4791 * unsigned, so a shader workaround is needed. The affected 4792 * chips are GFX8 and older except Stoney (GFX8.1). 4793 */ 4794 always_fix = sscreen->info.chip_class <= GFX8 && sscreen->info.family != CHIP_STONEY && 4795 channel->type == UTIL_FORMAT_TYPE_SIGNED; 4796 } else if (elements[i].src_format == PIPE_FORMAT_R11G11B10_FLOAT) { 4797 fix_fetch.u.log_size = 3; /* special encoding */ 4798 fix_fetch.u.format = AC_FETCH_FORMAT_FIXED; 4799 log_hw_load_size = 2; 4800 } else { 4801 fix_fetch.u.log_size = util_logbase2(channel->size) - 3; 4802 fix_fetch.u.num_channels_m1 = desc->nr_channels - 1; 4803 4804 /* Always fix up: 4805 * - doubles (multiple loads + truncate to float) 4806 * - 32-bit requiring a conversion 4807 */ 4808 always_fix = (fix_fetch.u.log_size == 3) || 4809 (fix_fetch.u.log_size == 2 && fix_fetch.u.format != AC_FETCH_FORMAT_FLOAT && 4810 fix_fetch.u.format != AC_FETCH_FORMAT_UINT && 4811 fix_fetch.u.format != AC_FETCH_FORMAT_SINT); 4812 4813 /* Also fixup 8_8_8 and 16_16_16. */ 4814 if (desc->nr_channels == 3 && fix_fetch.u.log_size <= 1) { 4815 always_fix = true; 4816 log_hw_load_size = fix_fetch.u.log_size; 4817 } 4818 } 4819 4820 if (desc->swizzle[0] != PIPE_SWIZZLE_X) { 4821 assert(desc->swizzle[0] == PIPE_SWIZZLE_Z && 4822 (desc->swizzle[2] == PIPE_SWIZZLE_X || desc->swizzle[2] == PIPE_SWIZZLE_0)); 4823 fix_fetch.u.reverse = 1; 4824 } 4825 4826 /* Force the workaround for unaligned access here already if the 4827 * offset relative to the vertex buffer base is unaligned. 4828 * 4829 * There is a theoretical case in which this is too conservative: 4830 * if the vertex buffer's offset is also unaligned in just the 4831 * right way, we end up with an aligned address after all. 4832 * However, this case should be extremely rare in practice (it 4833 * won't happen in well-behaved applications), and taking it 4834 * into account would complicate the fast path (where everything 4835 * is nicely aligned). 4836 */ 4837 bool check_alignment = 4838 log_hw_load_size >= 1 && 4839 (sscreen->info.chip_class == GFX6 || sscreen->info.chip_class >= GFX10); 4840 bool opencode = sscreen->options.vs_fetch_always_opencode; 4841 4842 if (check_alignment && (elements[i].src_offset & ((1 << log_hw_load_size) - 1)) != 0) 4843 opencode = true; 4844 4845 if (always_fix || check_alignment || opencode) 4846 v->fix_fetch[i] = fix_fetch.bits; 4847 4848 if (opencode) 4849 v->fix_fetch_opencode |= 1 << i; 4850 if (opencode || always_fix) 4851 v->fix_fetch_always |= 1 << i; 4852 4853 if (check_alignment && !opencode) { 4854 assert(log_hw_load_size == 1 || log_hw_load_size == 2); 4855 4856 v->fix_fetch_unaligned |= 1 << i; 4857 v->hw_load_is_dword |= (log_hw_load_size - 1) << i; 4858 v->vb_alignment_check_mask |= 1 << vbo_index; 4859 } 4860 4861 v->rsrc_word3[i] = S_008F0C_DST_SEL_X(si_map_swizzle(desc->swizzle[0])) | 4862 S_008F0C_DST_SEL_Y(si_map_swizzle(desc->swizzle[1])) | 4863 S_008F0C_DST_SEL_Z(si_map_swizzle(desc->swizzle[2])) | 4864 S_008F0C_DST_SEL_W(si_map_swizzle(desc->swizzle[3])); 4865 4866 if (sscreen->info.chip_class >= GFX10) { 4867 const struct gfx10_format *fmt = &gfx10_format_table[elements[i].src_format]; 4868 assert(fmt->img_format != 0 && fmt->img_format < 128); 4869 v->rsrc_word3[i] |= S_008F0C_FORMAT(fmt->img_format) | S_008F0C_RESOURCE_LEVEL(1); 4870 } else { 4871 unsigned data_format, num_format; 4872 data_format = si_translate_buffer_dataformat(ctx->screen, desc, first_non_void); 4873 num_format = si_translate_buffer_numformat(ctx->screen, desc, first_non_void); 4874 v->rsrc_word3[i] |= S_008F0C_NUM_FORMAT(num_format) | S_008F0C_DATA_FORMAT(data_format); 4875 } 4876 } 4877 4878 if (v->instance_divisor_is_fetched) { 4879 unsigned num_divisors = util_last_bit(v->instance_divisor_is_fetched); 4880 4881 v->instance_divisor_factor_buffer = (struct si_resource *)pipe_buffer_create( 4882 &sscreen->b, 0, PIPE_USAGE_DEFAULT, num_divisors * sizeof(divisor_factors[0])); 4883 if (!v->instance_divisor_factor_buffer) { 4884 FREE(v); 4885 return NULL; 4886 } 4887 void *map = 4888 sscreen->ws->buffer_map(sscreen->ws, v->instance_divisor_factor_buffer->buf, NULL, PIPE_MAP_WRITE); 4889 memcpy(map, divisor_factors, num_divisors * sizeof(divisor_factors[0])); 4890 } 4891 return v; 4892} 4893 4894static void si_bind_vertex_elements(struct pipe_context *ctx, void *state) 4895{ 4896 struct si_context *sctx = (struct si_context *)ctx; 4897 struct si_vertex_elements *old = sctx->vertex_elements; 4898 struct si_vertex_elements *v = (struct si_vertex_elements *)state; 4899 4900 if (!v) 4901 v = sctx->no_velems_state; 4902 4903 sctx->vertex_elements = v; 4904 sctx->num_vertex_elements = v->count; 4905 4906 if (sctx->num_vertex_elements) { 4907 sctx->vertex_buffers_dirty = true; 4908 } else { 4909 sctx->vertex_buffers_dirty = false; 4910 sctx->vertex_buffer_pointer_dirty = false; 4911 sctx->vertex_buffer_user_sgprs_dirty = false; 4912 } 4913 4914 if (old->instance_divisor_is_one != v->instance_divisor_is_one || 4915 old->instance_divisor_is_fetched != v->instance_divisor_is_fetched || 4916 (old->vb_alignment_check_mask ^ v->vb_alignment_check_mask) & 4917 sctx->vertex_buffer_unaligned || 4918 ((v->vb_alignment_check_mask & sctx->vertex_buffer_unaligned) && 4919 memcmp(old->vertex_buffer_index, v->vertex_buffer_index, 4920 sizeof(v->vertex_buffer_index[0]) * MAX2(old->count, v->count))) || 4921 /* fix_fetch_{always,opencode,unaligned} and hw_load_is_dword are 4922 * functions of fix_fetch and the src_offset alignment. 4923 * If they change and fix_fetch doesn't, it must be due to different 4924 * src_offset alignment, which is reflected in fix_fetch_opencode. */ 4925 old->fix_fetch_opencode != v->fix_fetch_opencode || 4926 memcmp(old->fix_fetch, v->fix_fetch, sizeof(v->fix_fetch[0]) * 4927 MAX2(old->count, v->count))) { 4928 si_vs_key_update_inputs(sctx); 4929 sctx->do_update_shaders = true; 4930 } 4931 4932 if (v->instance_divisor_is_fetched) { 4933 struct pipe_constant_buffer cb; 4934 4935 cb.buffer = &v->instance_divisor_factor_buffer->b.b; 4936 cb.user_buffer = NULL; 4937 cb.buffer_offset = 0; 4938 cb.buffer_size = 0xffffffff; 4939 si_set_internal_const_buffer(sctx, SI_VS_CONST_INSTANCE_DIVISORS, &cb); 4940 } 4941} 4942 4943static void si_delete_vertex_element(struct pipe_context *ctx, void *state) 4944{ 4945 struct si_context *sctx = (struct si_context *)ctx; 4946 struct si_vertex_elements *v = (struct si_vertex_elements *)state; 4947 4948 if (sctx->vertex_elements == state) 4949 si_bind_vertex_elements(ctx, sctx->no_velems_state); 4950 4951 si_resource_reference(&v->instance_divisor_factor_buffer, NULL); 4952 FREE(state); 4953} 4954 4955static void si_set_vertex_buffers(struct pipe_context *ctx, unsigned start_slot, unsigned count, 4956 unsigned unbind_num_trailing_slots, bool take_ownership, 4957 const struct pipe_vertex_buffer *buffers) 4958{ 4959 struct si_context *sctx = (struct si_context *)ctx; 4960 struct pipe_vertex_buffer *dst = sctx->vertex_buffer + start_slot; 4961 unsigned updated_mask = u_bit_consecutive(start_slot, count + unbind_num_trailing_slots); 4962 uint32_t orig_unaligned = sctx->vertex_buffer_unaligned; 4963 uint32_t unaligned = 0; 4964 int i; 4965 4966 assert(start_slot + count + unbind_num_trailing_slots <= ARRAY_SIZE(sctx->vertex_buffer)); 4967 4968 if (buffers) { 4969 if (take_ownership) { 4970 for (i = 0; i < count; i++) { 4971 const struct pipe_vertex_buffer *src = buffers + i; 4972 struct pipe_vertex_buffer *dsti = dst + i; 4973 struct pipe_resource *buf = src->buffer.resource; 4974 unsigned slot_bit = 1 << (start_slot + i); 4975 4976 /* Only unreference bound vertex buffers. (take_ownership) */ 4977 pipe_resource_reference(&dsti->buffer.resource, NULL); 4978 4979 if (src->buffer_offset & 3 || src->stride & 3) 4980 unaligned |= slot_bit; 4981 4982 si_context_add_resource_size(sctx, buf); 4983 if (buf) 4984 si_resource(buf)->bind_history |= PIPE_BIND_VERTEX_BUFFER; 4985 } 4986 /* take_ownership allows us to copy pipe_resource pointers without refcounting. */ 4987 memcpy(dst, buffers, count * sizeof(struct pipe_vertex_buffer)); 4988 } else { 4989 for (i = 0; i < count; i++) { 4990 const struct pipe_vertex_buffer *src = buffers + i; 4991 struct pipe_vertex_buffer *dsti = dst + i; 4992 struct pipe_resource *buf = src->buffer.resource; 4993 unsigned slot_bit = 1 << (start_slot + i); 4994 4995 pipe_resource_reference(&dsti->buffer.resource, buf); 4996 dsti->buffer_offset = src->buffer_offset; 4997 dsti->stride = src->stride; 4998 4999 if (dsti->buffer_offset & 3 || dsti->stride & 3) 5000 unaligned |= slot_bit; 5001 5002 si_context_add_resource_size(sctx, buf); 5003 if (buf) 5004 si_resource(buf)->bind_history |= PIPE_BIND_VERTEX_BUFFER; 5005 } 5006 } 5007 } else { 5008 for (i = 0; i < count; i++) 5009 pipe_resource_reference(&dst[i].buffer.resource, NULL); 5010 } 5011 5012 for (i = 0; i < unbind_num_trailing_slots; i++) 5013 pipe_resource_reference(&dst[count + i].buffer.resource, NULL); 5014 5015 sctx->vertex_buffers_dirty = sctx->num_vertex_elements > 0; 5016 sctx->vertex_buffer_unaligned = (orig_unaligned & ~updated_mask) | unaligned; 5017 5018 /* Check whether alignment may have changed in a way that requires 5019 * shader changes. This check is conservative: a vertex buffer can only 5020 * trigger a shader change if the misalignment amount changes (e.g. 5021 * from byte-aligned to short-aligned), but we only keep track of 5022 * whether buffers are at least dword-aligned, since that should always 5023 * be the case in well-behaved applications anyway. 5024 */ 5025 if ((sctx->vertex_elements->vb_alignment_check_mask & 5026 (unaligned | orig_unaligned) & updated_mask)) { 5027 si_vs_key_update_inputs(sctx); 5028 sctx->do_update_shaders = true; 5029 } 5030} 5031 5032static struct pipe_vertex_state * 5033si_create_vertex_state(struct pipe_screen *screen, 5034 struct pipe_vertex_buffer *buffer, 5035 const struct pipe_vertex_element *elements, 5036 unsigned num_elements, 5037 struct pipe_resource *indexbuf, 5038 uint32_t full_velem_mask) 5039{ 5040 struct si_screen *sscreen = (struct si_screen *)screen; 5041 struct si_vertex_state *state = CALLOC_STRUCT(si_vertex_state); 5042 5043 util_init_pipe_vertex_state(screen, buffer, elements, num_elements, indexbuf, full_velem_mask, 5044 &state->b); 5045 5046 /* Initialize the vertex element state in state->element. 5047 * Do it by creating a vertex element state object and copying it there. 5048 */ 5049 struct si_context ctx = {}; 5050 ctx.b.screen = screen; 5051 struct si_vertex_elements *velems = si_create_vertex_elements(&ctx.b, num_elements, elements); 5052 state->velems = *velems; 5053 si_delete_vertex_element(&ctx.b, velems); 5054 5055 assert(!state->velems.instance_divisor_is_one); 5056 assert(!state->velems.instance_divisor_is_fetched); 5057 assert(!state->velems.fix_fetch_always); 5058 assert(buffer->stride % 4 == 0); 5059 assert(buffer->buffer_offset % 4 == 0); 5060 assert(!buffer->is_user_buffer); 5061 for (unsigned i = 0; i < num_elements; i++) { 5062 assert(elements[i].src_offset % 4 == 0); 5063 assert(!elements[i].dual_slot); 5064 } 5065 5066 for (unsigned i = 0; i < num_elements; i++) { 5067 si_set_vertex_buffer_descriptor(sscreen, &state->velems, &state->b.input.vbuffer, i, 5068 &state->descriptors[i * 4]); 5069 } 5070 5071 return &state->b; 5072} 5073 5074static void si_vertex_state_destroy(struct pipe_screen *screen, 5075 struct pipe_vertex_state *state) 5076{ 5077 pipe_vertex_buffer_unreference(&state->input.vbuffer); 5078 pipe_resource_reference(&state->input.indexbuf, NULL); 5079 FREE(state); 5080} 5081 5082static struct pipe_vertex_state * 5083si_pipe_create_vertex_state(struct pipe_screen *screen, 5084 struct pipe_vertex_buffer *buffer, 5085 const struct pipe_vertex_element *elements, 5086 unsigned num_elements, 5087 struct pipe_resource *indexbuf, 5088 uint32_t full_velem_mask) 5089{ 5090 struct si_screen *sscreen = (struct si_screen *)screen; 5091 5092 return util_vertex_state_cache_get(screen, buffer, elements, num_elements, indexbuf, 5093 full_velem_mask, &sscreen->vertex_state_cache); 5094} 5095 5096static void si_pipe_vertex_state_destroy(struct pipe_screen *screen, 5097 struct pipe_vertex_state *state) 5098{ 5099 struct si_screen *sscreen = (struct si_screen *)screen; 5100 5101 util_vertex_state_destroy(screen, &sscreen->vertex_state_cache, state); 5102} 5103 5104/* 5105 * Misc 5106 */ 5107 5108static void si_set_tess_state(struct pipe_context *ctx, const float default_outer_level[4], 5109 const float default_inner_level[2]) 5110{ 5111 struct si_context *sctx = (struct si_context *)ctx; 5112 struct pipe_constant_buffer cb; 5113 float array[8]; 5114 5115 memcpy(array, default_outer_level, sizeof(float) * 4); 5116 memcpy(array + 4, default_inner_level, sizeof(float) * 2); 5117 5118 cb.buffer = NULL; 5119 cb.user_buffer = array; 5120 cb.buffer_offset = 0; 5121 cb.buffer_size = sizeof(array); 5122 5123 si_set_internal_const_buffer(sctx, SI_HS_CONST_DEFAULT_TESS_LEVELS, &cb); 5124} 5125 5126static void si_set_patch_vertices(struct pipe_context *ctx, uint8_t patch_vertices) 5127{ 5128 struct si_context *sctx = (struct si_context *)ctx; 5129 5130 sctx->patch_vertices = patch_vertices; 5131} 5132 5133static void si_texture_barrier(struct pipe_context *ctx, unsigned flags) 5134{ 5135 struct si_context *sctx = (struct si_context *)ctx; 5136 5137 si_update_fb_dirtiness_after_rendering(sctx); 5138 5139 /* Multisample surfaces are flushed in si_decompress_textures. */ 5140 if (sctx->framebuffer.uncompressed_cb_mask) { 5141 si_make_CB_shader_coherent(sctx, sctx->framebuffer.nr_samples, 5142 sctx->framebuffer.CB_has_shader_readable_metadata, 5143 sctx->framebuffer.all_DCC_pipe_aligned); 5144 } 5145} 5146 5147/* This only ensures coherency for shader image/buffer stores. */ 5148static void si_memory_barrier(struct pipe_context *ctx, unsigned flags) 5149{ 5150 struct si_context *sctx = (struct si_context *)ctx; 5151 5152 if (!(flags & ~PIPE_BARRIER_UPDATE)) 5153 return; 5154 5155 /* Subsequent commands must wait for all shader invocations to 5156 * complete. */ 5157 sctx->flags |= SI_CONTEXT_PS_PARTIAL_FLUSH | SI_CONTEXT_CS_PARTIAL_FLUSH | 5158 SI_CONTEXT_PFP_SYNC_ME; 5159 5160 if (flags & PIPE_BARRIER_CONSTANT_BUFFER) 5161 sctx->flags |= SI_CONTEXT_INV_SCACHE | SI_CONTEXT_INV_VCACHE; 5162 5163 if (flags & (PIPE_BARRIER_VERTEX_BUFFER | PIPE_BARRIER_SHADER_BUFFER | PIPE_BARRIER_TEXTURE | 5164 PIPE_BARRIER_IMAGE | PIPE_BARRIER_STREAMOUT_BUFFER | PIPE_BARRIER_GLOBAL_BUFFER)) { 5165 /* As far as I can tell, L1 contents are written back to L2 5166 * automatically at end of shader, but the contents of other 5167 * L1 caches might still be stale. */ 5168 sctx->flags |= SI_CONTEXT_INV_VCACHE; 5169 } 5170 5171 if (flags & PIPE_BARRIER_INDEX_BUFFER) { 5172 /* Indices are read through TC L2 since GFX8. 5173 * L1 isn't used. 5174 */ 5175 if (sctx->screen->info.chip_class <= GFX7) 5176 sctx->flags |= SI_CONTEXT_WB_L2; 5177 } 5178 5179 /* MSAA color, any depth and any stencil are flushed in 5180 * si_decompress_textures when needed. 5181 */ 5182 if (flags & PIPE_BARRIER_FRAMEBUFFER && sctx->framebuffer.uncompressed_cb_mask) { 5183 sctx->flags |= SI_CONTEXT_FLUSH_AND_INV_CB; 5184 5185 if (sctx->chip_class <= GFX8) 5186 sctx->flags |= SI_CONTEXT_WB_L2; 5187 } 5188 5189 /* Indirect buffers use TC L2 on GFX9, but not older hw. */ 5190 if (sctx->screen->info.chip_class <= GFX8 && flags & PIPE_BARRIER_INDIRECT_BUFFER) 5191 sctx->flags |= SI_CONTEXT_WB_L2; 5192} 5193 5194static void *si_create_blend_custom(struct si_context *sctx, unsigned mode) 5195{ 5196 struct pipe_blend_state blend; 5197 5198 memset(&blend, 0, sizeof(blend)); 5199 blend.independent_blend_enable = true; 5200 blend.rt[0].colormask = 0xf; 5201 return si_create_blend_state_mode(&sctx->b, &blend, mode); 5202} 5203 5204void si_init_state_compute_functions(struct si_context *sctx) 5205{ 5206 sctx->b.create_sampler_state = si_create_sampler_state; 5207 sctx->b.delete_sampler_state = si_delete_sampler_state; 5208 sctx->b.create_sampler_view = si_create_sampler_view; 5209 sctx->b.sampler_view_destroy = si_sampler_view_destroy; 5210 sctx->b.memory_barrier = si_memory_barrier; 5211} 5212 5213void si_init_state_functions(struct si_context *sctx) 5214{ 5215 sctx->atoms.s.framebuffer.emit = si_emit_framebuffer_state; 5216 sctx->atoms.s.msaa_sample_locs.emit = si_emit_msaa_sample_locs; 5217 sctx->atoms.s.db_render_state.emit = si_emit_db_render_state; 5218 sctx->atoms.s.dpbb_state.emit = si_emit_dpbb_state; 5219 sctx->atoms.s.msaa_config.emit = si_emit_msaa_config; 5220 sctx->atoms.s.sample_mask.emit = si_emit_sample_mask; 5221 sctx->atoms.s.cb_render_state.emit = si_emit_cb_render_state; 5222 sctx->atoms.s.blend_color.emit = si_emit_blend_color; 5223 sctx->atoms.s.clip_regs.emit = si_emit_clip_regs; 5224 sctx->atoms.s.clip_state.emit = si_emit_clip_state; 5225 sctx->atoms.s.stencil_ref.emit = si_emit_stencil_ref; 5226 5227 sctx->b.create_blend_state = si_create_blend_state; 5228 sctx->b.bind_blend_state = si_bind_blend_state; 5229 sctx->b.delete_blend_state = si_delete_blend_state; 5230 sctx->b.set_blend_color = si_set_blend_color; 5231 5232 sctx->b.create_rasterizer_state = si_create_rs_state; 5233 sctx->b.bind_rasterizer_state = si_bind_rs_state; 5234 sctx->b.delete_rasterizer_state = si_delete_rs_state; 5235 5236 sctx->b.create_depth_stencil_alpha_state = si_create_dsa_state; 5237 sctx->b.bind_depth_stencil_alpha_state = si_bind_dsa_state; 5238 sctx->b.delete_depth_stencil_alpha_state = si_delete_dsa_state; 5239 5240 sctx->custom_dsa_flush = si_create_db_flush_dsa(sctx); 5241 sctx->custom_blend_resolve = si_create_blend_custom(sctx, V_028808_CB_RESOLVE); 5242 sctx->custom_blend_fmask_decompress = si_create_blend_custom(sctx, V_028808_CB_FMASK_DECOMPRESS); 5243 sctx->custom_blend_eliminate_fastclear = 5244 si_create_blend_custom(sctx, V_028808_CB_ELIMINATE_FAST_CLEAR); 5245 sctx->custom_blend_dcc_decompress = si_create_blend_custom(sctx, V_028808_CB_DCC_DECOMPRESS); 5246 5247 sctx->b.set_clip_state = si_set_clip_state; 5248 sctx->b.set_stencil_ref = si_set_stencil_ref; 5249 5250 sctx->b.set_framebuffer_state = si_set_framebuffer_state; 5251 5252 sctx->b.set_sample_mask = si_set_sample_mask; 5253 5254 sctx->b.create_vertex_elements_state = si_create_vertex_elements; 5255 sctx->b.bind_vertex_elements_state = si_bind_vertex_elements; 5256 sctx->b.delete_vertex_elements_state = si_delete_vertex_element; 5257 sctx->b.set_vertex_buffers = si_set_vertex_buffers; 5258 5259 sctx->b.texture_barrier = si_texture_barrier; 5260 sctx->b.set_min_samples = si_set_min_samples; 5261 sctx->b.set_tess_state = si_set_tess_state; 5262 sctx->b.set_patch_vertices = si_set_patch_vertices; 5263 5264 sctx->b.set_active_query_state = si_set_active_query_state; 5265} 5266 5267void si_init_screen_state_functions(struct si_screen *sscreen) 5268{ 5269 sscreen->b.is_format_supported = si_is_format_supported; 5270 sscreen->b.create_vertex_state = si_pipe_create_vertex_state; 5271 sscreen->b.vertex_state_destroy = si_pipe_vertex_state_destroy; 5272 5273 if (sscreen->info.chip_class >= GFX10) { 5274 sscreen->make_texture_descriptor = gfx10_make_texture_descriptor; 5275 } else { 5276 sscreen->make_texture_descriptor = si_make_texture_descriptor; 5277 } 5278 5279 util_vertex_state_cache_init(&sscreen->vertex_state_cache, 5280 si_create_vertex_state, si_vertex_state_destroy); 5281} 5282 5283static void si_set_grbm_gfx_index(struct si_context *sctx, struct si_pm4_state *pm4, unsigned value) 5284{ 5285 unsigned reg = sctx->chip_class >= GFX7 ? R_030800_GRBM_GFX_INDEX : R_00802C_GRBM_GFX_INDEX; 5286 si_pm4_set_reg(pm4, reg, value); 5287} 5288 5289static void si_set_grbm_gfx_index_se(struct si_context *sctx, struct si_pm4_state *pm4, unsigned se) 5290{ 5291 assert(se == ~0 || se < sctx->screen->info.max_se); 5292 si_set_grbm_gfx_index(sctx, pm4, 5293 (se == ~0 ? S_030800_SE_BROADCAST_WRITES(1) : S_030800_SE_INDEX(se)) | 5294 S_030800_SH_BROADCAST_WRITES(1) | 5295 S_030800_INSTANCE_BROADCAST_WRITES(1)); 5296} 5297 5298static void si_write_harvested_raster_configs(struct si_context *sctx, struct si_pm4_state *pm4, 5299 unsigned raster_config, unsigned raster_config_1) 5300{ 5301 unsigned num_se = MAX2(sctx->screen->info.max_se, 1); 5302 unsigned raster_config_se[4]; 5303 unsigned se; 5304 5305 ac_get_harvested_configs(&sctx->screen->info, raster_config, &raster_config_1, raster_config_se); 5306 5307 for (se = 0; se < num_se; se++) { 5308 si_set_grbm_gfx_index_se(sctx, pm4, se); 5309 si_pm4_set_reg(pm4, R_028350_PA_SC_RASTER_CONFIG, raster_config_se[se]); 5310 } 5311 si_set_grbm_gfx_index(sctx, pm4, ~0); 5312 5313 if (sctx->chip_class >= GFX7) { 5314 si_pm4_set_reg(pm4, R_028354_PA_SC_RASTER_CONFIG_1, raster_config_1); 5315 } 5316} 5317 5318static void si_set_raster_config(struct si_context *sctx, struct si_pm4_state *pm4) 5319{ 5320 struct si_screen *sscreen = sctx->screen; 5321 unsigned num_rb = MIN2(sscreen->info.max_render_backends, 16); 5322 unsigned rb_mask = sscreen->info.enabled_rb_mask; 5323 unsigned raster_config = sscreen->pa_sc_raster_config; 5324 unsigned raster_config_1 = sscreen->pa_sc_raster_config_1; 5325 5326 if (!rb_mask || util_bitcount(rb_mask) >= num_rb) { 5327 /* Always use the default config when all backends are enabled 5328 * (or when we failed to determine the enabled backends). 5329 */ 5330 si_pm4_set_reg(pm4, R_028350_PA_SC_RASTER_CONFIG, raster_config); 5331 if (sctx->chip_class >= GFX7) 5332 si_pm4_set_reg(pm4, R_028354_PA_SC_RASTER_CONFIG_1, raster_config_1); 5333 } else { 5334 si_write_harvested_raster_configs(sctx, pm4, raster_config, raster_config_1); 5335 } 5336} 5337 5338void si_init_cs_preamble_state(struct si_context *sctx, bool uses_reg_shadowing) 5339{ 5340 struct si_screen *sscreen = sctx->screen; 5341 uint64_t border_color_va = sctx->border_color_buffer->gpu_address; 5342 bool has_clear_state = sscreen->info.has_clear_state; 5343 struct si_pm4_state *pm4 = CALLOC_STRUCT(si_pm4_state); 5344 5345 if (!pm4) 5346 return; 5347 5348 if (!uses_reg_shadowing) { 5349 si_pm4_cmd_add(pm4, PKT3(PKT3_CONTEXT_CONTROL, 1, 0)); 5350 si_pm4_cmd_add(pm4, CC0_UPDATE_LOAD_ENABLES(1)); 5351 si_pm4_cmd_add(pm4, CC1_UPDATE_SHADOW_ENABLES(1)); 5352 5353 if (has_clear_state) { 5354 si_pm4_cmd_add(pm4, PKT3(PKT3_CLEAR_STATE, 0, 0)); 5355 si_pm4_cmd_add(pm4, 0); 5356 } 5357 } 5358 5359 /* CLEAR_STATE doesn't restore these correctly. */ 5360 si_pm4_set_reg(pm4, R_028240_PA_SC_GENERIC_SCISSOR_TL, S_028240_WINDOW_OFFSET_DISABLE(1)); 5361 si_pm4_set_reg(pm4, R_028244_PA_SC_GENERIC_SCISSOR_BR, 5362 S_028244_BR_X(16384) | S_028244_BR_Y(16384)); 5363 5364 si_pm4_set_reg(pm4, R_028A18_VGT_HOS_MAX_TESS_LEVEL, fui(64)); 5365 if (!has_clear_state) 5366 si_pm4_set_reg(pm4, R_028A1C_VGT_HOS_MIN_TESS_LEVEL, fui(0)); 5367 5368 if (!has_clear_state) { 5369 si_pm4_set_reg(pm4, R_028230_PA_SC_EDGERULE, 5370 S_028230_ER_TRI(0xA) | S_028230_ER_POINT(0xA) | S_028230_ER_RECT(0xA) | 5371 /* Required by DX10_DIAMOND_TEST_ENA: */ 5372 S_028230_ER_LINE_LR(0x1A) | S_028230_ER_LINE_RL(0x26) | 5373 S_028230_ER_LINE_TB(0xA) | S_028230_ER_LINE_BT(0xA)); 5374 si_pm4_set_reg(pm4, R_028820_PA_CL_NANINF_CNTL, 0); 5375 si_pm4_set_reg(pm4, R_028AC0_DB_SRESULTS_COMPARE_STATE0, 0x0); 5376 si_pm4_set_reg(pm4, R_028AC4_DB_SRESULTS_COMPARE_STATE1, 0x0); 5377 si_pm4_set_reg(pm4, R_028AC8_DB_PRELOAD_CONTROL, 0x0); 5378 si_pm4_set_reg(pm4, R_02800C_DB_RENDER_OVERRIDE, 0); 5379 si_pm4_set_reg(pm4, R_028A5C_VGT_GS_PER_VS, 0x2); 5380 si_pm4_set_reg(pm4, R_028A8C_VGT_PRIMITIVEID_RESET, 0x0); 5381 si_pm4_set_reg(pm4, R_028B98_VGT_STRMOUT_BUFFER_CONFIG, 0x0); 5382 si_pm4_set_reg(pm4, R_028AB8_VGT_VTX_CNT_EN, 0x0); 5383 } 5384 5385 si_pm4_set_reg(pm4, R_028080_TA_BC_BASE_ADDR, border_color_va >> 8); 5386 if (sctx->chip_class >= GFX7) 5387 si_pm4_set_reg(pm4, R_028084_TA_BC_BASE_ADDR_HI, S_028084_ADDRESS(border_color_va >> 40)); 5388 5389 if (sctx->chip_class == GFX6) { 5390 si_pm4_set_reg(pm4, R_008A14_PA_CL_ENHANCE, 5391 S_008A14_NUM_CLIP_SEQ(3) | S_008A14_CLIP_VTX_REORDER_ENA(1)); 5392 } 5393 5394 if (sctx->chip_class <= GFX7 || !has_clear_state) { 5395 si_pm4_set_reg(pm4, R_028C58_VGT_VERTEX_REUSE_BLOCK_CNTL, 14); 5396 si_pm4_set_reg(pm4, R_028C5C_VGT_OUT_DEALLOC_CNTL, 16); 5397 5398 /* CLEAR_STATE doesn't clear these correctly on certain generations. 5399 * I don't know why. Deduced by trial and error. 5400 */ 5401 si_pm4_set_reg(pm4, R_028B28_VGT_STRMOUT_DRAW_OPAQUE_OFFSET, 0); 5402 si_pm4_set_reg(pm4, R_028204_PA_SC_WINDOW_SCISSOR_TL, S_028204_WINDOW_OFFSET_DISABLE(1)); 5403 si_pm4_set_reg(pm4, R_028030_PA_SC_SCREEN_SCISSOR_TL, 0); 5404 si_pm4_set_reg(pm4, R_028034_PA_SC_SCREEN_SCISSOR_BR, 5405 S_028034_BR_X(16384) | S_028034_BR_Y(16384)); 5406 } 5407 5408 if (sctx->chip_class >= GFX10) { 5409 si_pm4_set_reg(pm4, R_028038_DB_DFSM_CONTROL, 5410 S_028038_PUNCHOUT_MODE(V_028038_FORCE_OFF) | 5411 S_028038_POPS_DRAIN_PS_ON_OVERLAP(1)); 5412 } 5413 5414 unsigned cu_mask_ps = 0xffffffff; 5415 5416 /* It's wasteful to enable all CUs for PS if shader arrays have a different 5417 * number of CUs. The reason is that the hardware sends the same number of PS 5418 * waves to each shader array, so the slowest shader array limits the performance. 5419 * Disable the extra CUs for PS in other shader arrays to save power and thus 5420 * increase clocks for busy CUs. In the future, we might disable or enable this 5421 * tweak only for certain apps. 5422 */ 5423 if (sctx->chip_class >= GFX10_3) 5424 cu_mask_ps = u_bit_consecutive(0, sscreen->info.min_good_cu_per_sa); 5425 5426 if (sctx->chip_class >= GFX7) { 5427 si_pm4_set_reg(pm4, R_00B01C_SPI_SHADER_PGM_RSRC3_PS, 5428 S_00B01C_CU_EN(cu_mask_ps) | S_00B01C_WAVE_LIMIT(0x3F)); 5429 } 5430 5431 if (sctx->chip_class <= GFX8) { 5432 si_set_raster_config(sctx, pm4); 5433 5434 /* FIXME calculate these values somehow ??? */ 5435 si_pm4_set_reg(pm4, R_028A54_VGT_GS_PER_ES, SI_GS_PER_ES); 5436 si_pm4_set_reg(pm4, R_028A58_VGT_ES_PER_GS, 0x40); 5437 5438 /* These registers, when written, also overwrite the CLEAR_STATE 5439 * context, so we can't rely on CLEAR_STATE setting them. 5440 * It would be an issue if there was another UMD changing them. 5441 */ 5442 si_pm4_set_reg(pm4, R_028400_VGT_MAX_VTX_INDX, ~0); 5443 si_pm4_set_reg(pm4, R_028404_VGT_MIN_VTX_INDX, 0); 5444 si_pm4_set_reg(pm4, R_028408_VGT_INDX_OFFSET, 0); 5445 } 5446 5447 if (sscreen->info.chip_class >= GFX10) { 5448 si_pm4_set_reg(pm4, R_00B524_SPI_SHADER_PGM_HI_LS, 5449 S_00B524_MEM_BASE(sscreen->info.address32_hi >> 8)); 5450 si_pm4_set_reg(pm4, R_00B324_SPI_SHADER_PGM_HI_ES, 5451 S_00B324_MEM_BASE(sscreen->info.address32_hi >> 8)); 5452 } else if (sscreen->info.chip_class == GFX9) { 5453 si_pm4_set_reg(pm4, R_00B414_SPI_SHADER_PGM_HI_LS, 5454 S_00B414_MEM_BASE(sscreen->info.address32_hi >> 8)); 5455 si_pm4_set_reg(pm4, R_00B214_SPI_SHADER_PGM_HI_ES, 5456 S_00B214_MEM_BASE(sscreen->info.address32_hi >> 8)); 5457 } else { 5458 si_pm4_set_reg(pm4, R_00B524_SPI_SHADER_PGM_HI_LS, 5459 S_00B524_MEM_BASE(sscreen->info.address32_hi >> 8)); 5460 } 5461 5462 if (sctx->chip_class >= GFX7 && sctx->chip_class <= GFX8) { 5463 si_pm4_set_reg(pm4, R_00B51C_SPI_SHADER_PGM_RSRC3_LS, 5464 S_00B51C_CU_EN(0xffff) | S_00B51C_WAVE_LIMIT(0x3F)); 5465 si_pm4_set_reg(pm4, R_00B41C_SPI_SHADER_PGM_RSRC3_HS, S_00B41C_WAVE_LIMIT(0x3F)); 5466 si_pm4_set_reg(pm4, R_00B31C_SPI_SHADER_PGM_RSRC3_ES, 5467 S_00B31C_CU_EN(0xffff) | S_00B31C_WAVE_LIMIT(0x3F)); 5468 5469 /* If this is 0, Bonaire can hang even if GS isn't being used. 5470 * Other chips are unaffected. These are suboptimal values, 5471 * but we don't use on-chip GS. 5472 */ 5473 si_pm4_set_reg(pm4, R_028A44_VGT_GS_ONCHIP_CNTL, 5474 S_028A44_ES_VERTS_PER_SUBGRP(64) | S_028A44_GS_PRIMS_PER_SUBGRP(4)); 5475 } 5476 5477 if (sctx->chip_class == GFX8) { 5478 unsigned vgt_tess_distribution; 5479 5480 vgt_tess_distribution = S_028B50_ACCUM_ISOLINE(32) | S_028B50_ACCUM_TRI(11) | 5481 S_028B50_ACCUM_QUAD(11) | S_028B50_DONUT_SPLIT_GFX81(16); 5482 5483 /* Testing with Unigine Heaven extreme tesselation yielded best results 5484 * with TRAP_SPLIT = 3. 5485 */ 5486 if (sctx->family == CHIP_FIJI || sctx->family >= CHIP_POLARIS10) 5487 vgt_tess_distribution |= S_028B50_TRAP_SPLIT(3); 5488 5489 si_pm4_set_reg(pm4, R_028B50_VGT_TESS_DISTRIBUTION, vgt_tess_distribution); 5490 } 5491 5492 if (sscreen->info.chip_class <= GFX9) { 5493 si_pm4_set_reg(pm4, R_028AA0_VGT_INSTANCE_STEP_RATE_0, 1); 5494 } 5495 5496 if (sctx->chip_class == GFX9) { 5497 si_pm4_set_reg(pm4, R_030920_VGT_MAX_VTX_INDX, ~0); 5498 si_pm4_set_reg(pm4, R_030924_VGT_MIN_VTX_INDX, 0); 5499 si_pm4_set_reg(pm4, R_030928_VGT_INDX_OFFSET, 0); 5500 5501 si_pm4_set_reg(pm4, R_028060_DB_DFSM_CONTROL, 5502 S_028060_PUNCHOUT_MODE(V_028060_FORCE_OFF) | 5503 S_028060_POPS_DRAIN_PS_ON_OVERLAP(1)); 5504 } 5505 5506 if (sctx->chip_class >= GFX9) { 5507 si_pm4_set_reg(pm4, R_00B41C_SPI_SHADER_PGM_RSRC3_HS, 5508 S_00B41C_CU_EN(0xffff) | S_00B41C_WAVE_LIMIT(0x3F)); 5509 5510 si_pm4_set_reg(pm4, R_028B50_VGT_TESS_DISTRIBUTION, 5511 S_028B50_ACCUM_ISOLINE(40) | S_028B50_ACCUM_TRI(30) | S_028B50_ACCUM_QUAD(24) | 5512 S_028B50_DONUT_SPLIT_GFX9(24) | S_028B50_TRAP_SPLIT(6)); 5513 si_pm4_set_reg(pm4, R_028C48_PA_SC_BINNER_CNTL_1, 5514 S_028C48_MAX_ALLOC_COUNT(sscreen->info.pbb_max_alloc_count - 1) | 5515 S_028C48_MAX_PRIM_PER_BATCH(1023)); 5516 si_pm4_set_reg(pm4, R_028C4C_PA_SC_CONSERVATIVE_RASTERIZATION_CNTL, 5517 S_028C4C_NULL_SQUAD_AA_MASK_ENABLE(1)); 5518 5519 si_pm4_set_reg(pm4, R_030968_VGT_INSTANCE_BASE_ID, 0); 5520 si_pm4_set_reg(pm4, R_0301EC_CP_COHER_START_DELAY, 5521 sctx->chip_class >= GFX10 ? 0x20 : 0); 5522 } 5523 5524 if (sctx->chip_class >= GFX10) { 5525 /* Logical CUs 16 - 31 */ 5526 si_pm4_set_reg(pm4, R_00B004_SPI_SHADER_PGM_RSRC4_PS, S_00B004_CU_EN(cu_mask_ps >> 16)); 5527 si_pm4_set_reg(pm4, R_00B104_SPI_SHADER_PGM_RSRC4_VS, S_00B104_CU_EN(0xffff)); 5528 si_pm4_set_reg(pm4, R_00B404_SPI_SHADER_PGM_RSRC4_HS, S_00B404_CU_EN(0xffff)); 5529 5530 si_pm4_set_reg(pm4, R_00B0C8_SPI_SHADER_USER_ACCUM_PS_0, 0); 5531 si_pm4_set_reg(pm4, R_00B0CC_SPI_SHADER_USER_ACCUM_PS_1, 0); 5532 si_pm4_set_reg(pm4, R_00B0D0_SPI_SHADER_USER_ACCUM_PS_2, 0); 5533 si_pm4_set_reg(pm4, R_00B0D4_SPI_SHADER_USER_ACCUM_PS_3, 0); 5534 si_pm4_set_reg(pm4, R_00B1C8_SPI_SHADER_USER_ACCUM_VS_0, 0); 5535 si_pm4_set_reg(pm4, R_00B1CC_SPI_SHADER_USER_ACCUM_VS_1, 0); 5536 si_pm4_set_reg(pm4, R_00B1D0_SPI_SHADER_USER_ACCUM_VS_2, 0); 5537 si_pm4_set_reg(pm4, R_00B1D4_SPI_SHADER_USER_ACCUM_VS_3, 0); 5538 si_pm4_set_reg(pm4, R_00B2C8_SPI_SHADER_USER_ACCUM_ESGS_0, 0); 5539 si_pm4_set_reg(pm4, R_00B2CC_SPI_SHADER_USER_ACCUM_ESGS_1, 0); 5540 si_pm4_set_reg(pm4, R_00B2D0_SPI_SHADER_USER_ACCUM_ESGS_2, 0); 5541 si_pm4_set_reg(pm4, R_00B2D4_SPI_SHADER_USER_ACCUM_ESGS_3, 0); 5542 si_pm4_set_reg(pm4, R_00B4C8_SPI_SHADER_USER_ACCUM_LSHS_0, 0); 5543 si_pm4_set_reg(pm4, R_00B4CC_SPI_SHADER_USER_ACCUM_LSHS_1, 0); 5544 si_pm4_set_reg(pm4, R_00B4D0_SPI_SHADER_USER_ACCUM_LSHS_2, 0); 5545 si_pm4_set_reg(pm4, R_00B4D4_SPI_SHADER_USER_ACCUM_LSHS_3, 0); 5546 5547 si_pm4_set_reg(pm4, R_00B0C0_SPI_SHADER_REQ_CTRL_PS, 5548 S_00B0C0_SOFT_GROUPING_EN(1) | 5549 S_00B0C0_NUMBER_OF_REQUESTS_PER_CU(4 - 1)); 5550 si_pm4_set_reg(pm4, R_00B1C0_SPI_SHADER_REQ_CTRL_VS, 0); 5551 5552 /* Enable CMASK/HTILE/DCC caching in L2 for small chips. */ 5553 unsigned meta_write_policy, meta_read_policy; 5554 if (sscreen->info.max_render_backends <= 4) { 5555 meta_write_policy = V_02807C_CACHE_LRU_WR; /* cache writes */ 5556 meta_read_policy = V_02807C_CACHE_LRU_RD; /* cache reads */ 5557 } else { 5558 meta_write_policy = V_02807C_CACHE_STREAM; /* write combine */ 5559 meta_read_policy = V_02807C_CACHE_NOA; /* don't cache reads */ 5560 } 5561 5562 si_pm4_set_reg(pm4, R_02807C_DB_RMI_L2_CACHE_CONTROL, 5563 S_02807C_Z_WR_POLICY(V_02807C_CACHE_STREAM) | 5564 S_02807C_S_WR_POLICY(V_02807C_CACHE_STREAM) | 5565 S_02807C_HTILE_WR_POLICY(meta_write_policy) | 5566 S_02807C_ZPCPSD_WR_POLICY(V_02807C_CACHE_STREAM) | 5567 S_02807C_Z_RD_POLICY(V_02807C_CACHE_NOA) | 5568 S_02807C_S_RD_POLICY(V_02807C_CACHE_NOA) | 5569 S_02807C_HTILE_RD_POLICY(meta_read_policy)); 5570 si_pm4_set_reg(pm4, R_028410_CB_RMI_GL2_CACHE_CONTROL, 5571 S_028410_CMASK_WR_POLICY(meta_write_policy) | 5572 S_028410_FMASK_WR_POLICY(V_028410_CACHE_STREAM) | 5573 S_028410_DCC_WR_POLICY(meta_write_policy) | 5574 S_028410_COLOR_WR_POLICY(V_028410_CACHE_STREAM) | 5575 S_028410_CMASK_RD_POLICY(meta_read_policy) | 5576 S_028410_FMASK_RD_POLICY(V_028410_CACHE_NOA) | 5577 S_028410_DCC_RD_POLICY(meta_read_policy) | 5578 S_028410_COLOR_RD_POLICY(V_028410_CACHE_NOA)); 5579 5580 si_pm4_set_reg(pm4, R_028428_CB_COVERAGE_OUT_CONTROL, 0); 5581 si_pm4_set_reg(pm4, R_028A98_VGT_DRAW_PAYLOAD_CNTL, 0); 5582 5583 /* Break up a pixel wave if it contains deallocs for more than 5584 * half the parameter cache. 5585 * 5586 * To avoid a deadlock where pixel waves aren't launched 5587 * because they're waiting for more pixels while the frontend 5588 * is stuck waiting for PC space, the maximum allowed value is 5589 * the size of the PC minus the largest possible allocation for 5590 * a single primitive shader subgroup. 5591 */ 5592 si_pm4_set_reg(pm4, R_028C50_PA_SC_NGG_MODE_CNTL, S_028C50_MAX_DEALLOCS_IN_WAVE(512)); 5593 /* Reuse for legacy (non-NGG) only. */ 5594 si_pm4_set_reg(pm4, R_028C58_VGT_VERTEX_REUSE_BLOCK_CNTL, 14); 5595 5596 if (!has_clear_state) { 5597 si_pm4_set_reg(pm4, R_02835C_PA_SC_TILE_STEERING_OVERRIDE, 5598 sscreen->info.pa_sc_tile_steering_override); 5599 } 5600 5601 5602 si_pm4_set_reg(pm4, R_030964_GE_MAX_VTX_INDX, ~0); 5603 si_pm4_set_reg(pm4, R_030924_GE_MIN_VTX_INDX, 0); 5604 si_pm4_set_reg(pm4, R_030928_GE_INDX_OFFSET, 0); 5605 si_pm4_set_reg(pm4, R_03097C_GE_STEREO_CNTL, 0); 5606 si_pm4_set_reg(pm4, R_030988_GE_USER_VGPR_EN, 0); 5607 } 5608 5609 if (sctx->chip_class >= GFX10_3) { 5610 si_pm4_set_reg(pm4, R_028750_SX_PS_DOWNCONVERT_CONTROL, 0xff); 5611 /* The rate combiners have no effect if they are disabled like this: 5612 * VERTEX_RATE: BYPASS_VTX_RATE_COMBINER = 1 5613 * PRIMITIVE_RATE: BYPASS_PRIM_RATE_COMBINER = 1 5614 * HTILE_RATE: VRS_HTILE_ENCODING = 0 5615 * SAMPLE_ITER: PS_ITER_SAMPLE = 0 5616 * 5617 * Use OVERRIDE, which will ignore results from previous combiners. 5618 * (e.g. enabled sample shading overrides the vertex rate) 5619 */ 5620 si_pm4_set_reg(pm4, R_028848_PA_CL_VRS_CNTL, 5621 S_028848_VERTEX_RATE_COMBINER_MODE(V_028848_VRS_COMB_MODE_OVERRIDE) | 5622 S_028848_SAMPLE_ITER_COMBINER_MODE(V_028848_VRS_COMB_MODE_OVERRIDE)); 5623 } 5624 5625 sctx->cs_preamble_state = pm4; 5626} 5627