1/* 2 * Copyright 2012 Advanced Micro Devices, Inc. 3 * All Rights Reserved. 4 * 5 * Permission is hereby granted, free of charge, to any person obtaining a 6 * copy of this software and associated documentation files (the "Software"), 7 * to deal in the Software without restriction, including without limitation 8 * on the rights to use, copy, modify, merge, publish, distribute, sub 9 * license, and/or sell copies of the Software, and to permit persons to whom 10 * the Software is furnished to do so, subject to the following conditions: 11 * 12 * The above copyright notice and this permission notice (including the next 13 * paragraph) shall be included in all copies or substantial portions of the 14 * Software. 15 * 16 * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR 17 * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, 18 * FITNESS FOR A PARTICULAR PURPOSE AND NON-INFRINGEMENT. IN NO EVENT SHALL 19 * THE AUTHOR(S) AND/OR THEIR SUPPLIERS BE LIABLE FOR ANY CLAIM, 20 * DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR 21 * OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE 22 * USE OR OTHER DEALINGS IN THE SOFTWARE. 23 */ 24 25#include "si_build_pm4.h" 26#include "gfx9d.h" 27#include "si_query.h" 28 29#include "util/u_dual_blend.h" 30#include "util/u_format.h" 31#include "util/u_format_s3tc.h" 32#include "util/u_memory.h" 33#include "util/u_resource.h" 34#include "util/u_upload_mgr.h" 35#include "util/fast_idiv_by_const.h" 36 37static unsigned si_map_swizzle(unsigned swizzle) 38{ 39 switch (swizzle) { 40 case PIPE_SWIZZLE_Y: 41 return V_008F0C_SQ_SEL_Y; 42 case PIPE_SWIZZLE_Z: 43 return V_008F0C_SQ_SEL_Z; 44 case PIPE_SWIZZLE_W: 45 return V_008F0C_SQ_SEL_W; 46 case PIPE_SWIZZLE_0: 47 return V_008F0C_SQ_SEL_0; 48 case PIPE_SWIZZLE_1: 49 return V_008F0C_SQ_SEL_1; 50 default: /* PIPE_SWIZZLE_X */ 51 return V_008F0C_SQ_SEL_X; 52 } 53} 54 55/* 12.4 fixed-point */ 56static unsigned si_pack_float_12p4(float x) 57{ 58 return x <= 0 ? 0 : 59 x >= 4096 ? 0xffff : x * 16; 60} 61 62/* 63 * Inferred framebuffer and blender state. 64 * 65 * CB_TARGET_MASK is emitted here to avoid a hang with dual source blending 66 * if there is not enough PS outputs. 67 */ 68static void si_emit_cb_render_state(struct si_context *sctx) 69{ 70 struct radeon_cmdbuf *cs = sctx->gfx_cs; 71 struct si_state_blend *blend = sctx->queued.named.blend; 72 /* CB_COLORn_INFO.FORMAT=INVALID should disable unbound colorbuffers, 73 * but you never know. */ 74 uint32_t cb_target_mask = sctx->framebuffer.colorbuf_enabled_4bit; 75 unsigned i; 76 77 if (blend) 78 cb_target_mask &= blend->cb_target_mask; 79 80 /* Avoid a hang that happens when dual source blending is enabled 81 * but there is not enough color outputs. This is undefined behavior, 82 * so disable color writes completely. 83 * 84 * Reproducible with Unigine Heaven 4.0 and drirc missing. 85 */ 86 if (blend && blend->dual_src_blend && 87 sctx->ps_shader.cso && 88 (sctx->ps_shader.cso->info.colors_written & 0x3) != 0x3) 89 cb_target_mask = 0; 90 91 /* GFX9: Flush DFSM when CB_TARGET_MASK changes. 92 * I think we don't have to do anything between IBs. 93 */ 94 if (sctx->screen->dfsm_allowed && 95 sctx->last_cb_target_mask != cb_target_mask) { 96 sctx->last_cb_target_mask = cb_target_mask; 97 98 radeon_emit(cs, PKT3(PKT3_EVENT_WRITE, 0, 0)); 99 radeon_emit(cs, EVENT_TYPE(V_028A90_FLUSH_DFSM) | EVENT_INDEX(0)); 100 } 101 102 unsigned initial_cdw = cs->current.cdw; 103 radeon_opt_set_context_reg(sctx, R_028238_CB_TARGET_MASK, 104 SI_TRACKED_CB_TARGET_MASK, cb_target_mask); 105 106 if (sctx->chip_class >= VI) { 107 /* DCC MSAA workaround for blending. 108 * Alternatively, we can set CB_COLORi_DCC_CONTROL.OVERWRITE_- 109 * COMBINER_DISABLE, but that would be more complicated. 110 */ 111 bool oc_disable = (sctx->chip_class == VI || 112 sctx->chip_class == GFX9) && 113 blend && 114 blend->blend_enable_4bit & cb_target_mask && 115 sctx->framebuffer.nr_samples >= 2; 116 unsigned watermark = sctx->framebuffer.dcc_overwrite_combiner_watermark; 117 118 radeon_opt_set_context_reg( 119 sctx, R_028424_CB_DCC_CONTROL, 120 SI_TRACKED_CB_DCC_CONTROL, 121 S_028424_OVERWRITE_COMBINER_MRT_SHARING_DISABLE(1) | 122 S_028424_OVERWRITE_COMBINER_WATERMARK(watermark) | 123 S_028424_OVERWRITE_COMBINER_DISABLE(oc_disable) | 124 S_028424_DISABLE_CONSTANT_ENCODE_REG(sctx->screen->has_dcc_constant_encode)); 125 } 126 127 /* RB+ register settings. */ 128 if (sctx->screen->rbplus_allowed) { 129 unsigned spi_shader_col_format = 130 sctx->ps_shader.cso ? 131 sctx->ps_shader.current->key.part.ps.epilog.spi_shader_col_format : 0; 132 unsigned sx_ps_downconvert = 0; 133 unsigned sx_blend_opt_epsilon = 0; 134 unsigned sx_blend_opt_control = 0; 135 136 for (i = 0; i < sctx->framebuffer.state.nr_cbufs; i++) { 137 struct si_surface *surf = 138 (struct si_surface*)sctx->framebuffer.state.cbufs[i]; 139 unsigned format, swap, spi_format, colormask; 140 bool has_alpha, has_rgb; 141 142 if (!surf) 143 continue; 144 145 format = G_028C70_FORMAT(surf->cb_color_info); 146 swap = G_028C70_COMP_SWAP(surf->cb_color_info); 147 spi_format = (spi_shader_col_format >> (i * 4)) & 0xf; 148 colormask = (cb_target_mask >> (i * 4)) & 0xf; 149 150 /* Set if RGB and A are present. */ 151 has_alpha = !G_028C74_FORCE_DST_ALPHA_1(surf->cb_color_attrib); 152 153 if (format == V_028C70_COLOR_8 || 154 format == V_028C70_COLOR_16 || 155 format == V_028C70_COLOR_32) 156 has_rgb = !has_alpha; 157 else 158 has_rgb = true; 159 160 /* Check the colormask and export format. */ 161 if (!(colormask & (PIPE_MASK_RGBA & ~PIPE_MASK_A))) 162 has_rgb = false; 163 if (!(colormask & PIPE_MASK_A)) 164 has_alpha = false; 165 166 if (spi_format == V_028714_SPI_SHADER_ZERO) { 167 has_rgb = false; 168 has_alpha = false; 169 } 170 171 /* Disable value checking for disabled channels. */ 172 if (!has_rgb) 173 sx_blend_opt_control |= S_02875C_MRT0_COLOR_OPT_DISABLE(1) << (i * 4); 174 if (!has_alpha) 175 sx_blend_opt_control |= S_02875C_MRT0_ALPHA_OPT_DISABLE(1) << (i * 4); 176 177 /* Enable down-conversion for 32bpp and smaller formats. */ 178 switch (format) { 179 case V_028C70_COLOR_8: 180 case V_028C70_COLOR_8_8: 181 case V_028C70_COLOR_8_8_8_8: 182 /* For 1 and 2-channel formats, use the superset thereof. */ 183 if (spi_format == V_028714_SPI_SHADER_FP16_ABGR || 184 spi_format == V_028714_SPI_SHADER_UINT16_ABGR || 185 spi_format == V_028714_SPI_SHADER_SINT16_ABGR) { 186 sx_ps_downconvert |= V_028754_SX_RT_EXPORT_8_8_8_8 << (i * 4); 187 sx_blend_opt_epsilon |= V_028758_8BIT_FORMAT << (i * 4); 188 } 189 break; 190 191 case V_028C70_COLOR_5_6_5: 192 if (spi_format == V_028714_SPI_SHADER_FP16_ABGR) { 193 sx_ps_downconvert |= V_028754_SX_RT_EXPORT_5_6_5 << (i * 4); 194 sx_blend_opt_epsilon |= V_028758_6BIT_FORMAT << (i * 4); 195 } 196 break; 197 198 case V_028C70_COLOR_1_5_5_5: 199 if (spi_format == V_028714_SPI_SHADER_FP16_ABGR) { 200 sx_ps_downconvert |= V_028754_SX_RT_EXPORT_1_5_5_5 << (i * 4); 201 sx_blend_opt_epsilon |= V_028758_5BIT_FORMAT << (i * 4); 202 } 203 break; 204 205 case V_028C70_COLOR_4_4_4_4: 206 if (spi_format == V_028714_SPI_SHADER_FP16_ABGR) { 207 sx_ps_downconvert |= V_028754_SX_RT_EXPORT_4_4_4_4 << (i * 4); 208 sx_blend_opt_epsilon |= V_028758_4BIT_FORMAT << (i * 4); 209 } 210 break; 211 212 case V_028C70_COLOR_32: 213 if (swap == V_028C70_SWAP_STD && 214 spi_format == V_028714_SPI_SHADER_32_R) 215 sx_ps_downconvert |= V_028754_SX_RT_EXPORT_32_R << (i * 4); 216 else if (swap == V_028C70_SWAP_ALT_REV && 217 spi_format == V_028714_SPI_SHADER_32_AR) 218 sx_ps_downconvert |= V_028754_SX_RT_EXPORT_32_A << (i * 4); 219 break; 220 221 case V_028C70_COLOR_16: 222 case V_028C70_COLOR_16_16: 223 /* For 1-channel formats, use the superset thereof. */ 224 if (spi_format == V_028714_SPI_SHADER_UNORM16_ABGR || 225 spi_format == V_028714_SPI_SHADER_SNORM16_ABGR || 226 spi_format == V_028714_SPI_SHADER_UINT16_ABGR || 227 spi_format == V_028714_SPI_SHADER_SINT16_ABGR) { 228 if (swap == V_028C70_SWAP_STD || 229 swap == V_028C70_SWAP_STD_REV) 230 sx_ps_downconvert |= V_028754_SX_RT_EXPORT_16_16_GR << (i * 4); 231 else 232 sx_ps_downconvert |= V_028754_SX_RT_EXPORT_16_16_AR << (i * 4); 233 } 234 break; 235 236 case V_028C70_COLOR_10_11_11: 237 if (spi_format == V_028714_SPI_SHADER_FP16_ABGR) { 238 sx_ps_downconvert |= V_028754_SX_RT_EXPORT_10_11_11 << (i * 4); 239 sx_blend_opt_epsilon |= V_028758_11BIT_FORMAT << (i * 4); 240 } 241 break; 242 243 case V_028C70_COLOR_2_10_10_10: 244 if (spi_format == V_028714_SPI_SHADER_FP16_ABGR) { 245 sx_ps_downconvert |= V_028754_SX_RT_EXPORT_2_10_10_10 << (i * 4); 246 sx_blend_opt_epsilon |= V_028758_10BIT_FORMAT << (i * 4); 247 } 248 break; 249 } 250 } 251 252 /* SX_PS_DOWNCONVERT, SX_BLEND_OPT_EPSILON, SX_BLEND_OPT_CONTROL */ 253 radeon_opt_set_context_reg3(sctx, R_028754_SX_PS_DOWNCONVERT, 254 SI_TRACKED_SX_PS_DOWNCONVERT, 255 sx_ps_downconvert, sx_blend_opt_epsilon, 256 sx_blend_opt_control); 257 } 258 if (initial_cdw != cs->current.cdw) 259 sctx->context_roll = true; 260} 261 262/* 263 * Blender functions 264 */ 265 266static uint32_t si_translate_blend_function(int blend_func) 267{ 268 switch (blend_func) { 269 case PIPE_BLEND_ADD: 270 return V_028780_COMB_DST_PLUS_SRC; 271 case PIPE_BLEND_SUBTRACT: 272 return V_028780_COMB_SRC_MINUS_DST; 273 case PIPE_BLEND_REVERSE_SUBTRACT: 274 return V_028780_COMB_DST_MINUS_SRC; 275 case PIPE_BLEND_MIN: 276 return V_028780_COMB_MIN_DST_SRC; 277 case PIPE_BLEND_MAX: 278 return V_028780_COMB_MAX_DST_SRC; 279 default: 280 PRINT_ERR("Unknown blend function %d\n", blend_func); 281 assert(0); 282 break; 283 } 284 return 0; 285} 286 287static uint32_t si_translate_blend_factor(int blend_fact) 288{ 289 switch (blend_fact) { 290 case PIPE_BLENDFACTOR_ONE: 291 return V_028780_BLEND_ONE; 292 case PIPE_BLENDFACTOR_SRC_COLOR: 293 return V_028780_BLEND_SRC_COLOR; 294 case PIPE_BLENDFACTOR_SRC_ALPHA: 295 return V_028780_BLEND_SRC_ALPHA; 296 case PIPE_BLENDFACTOR_DST_ALPHA: 297 return V_028780_BLEND_DST_ALPHA; 298 case PIPE_BLENDFACTOR_DST_COLOR: 299 return V_028780_BLEND_DST_COLOR; 300 case PIPE_BLENDFACTOR_SRC_ALPHA_SATURATE: 301 return V_028780_BLEND_SRC_ALPHA_SATURATE; 302 case PIPE_BLENDFACTOR_CONST_COLOR: 303 return V_028780_BLEND_CONSTANT_COLOR; 304 case PIPE_BLENDFACTOR_CONST_ALPHA: 305 return V_028780_BLEND_CONSTANT_ALPHA; 306 case PIPE_BLENDFACTOR_ZERO: 307 return V_028780_BLEND_ZERO; 308 case PIPE_BLENDFACTOR_INV_SRC_COLOR: 309 return V_028780_BLEND_ONE_MINUS_SRC_COLOR; 310 case PIPE_BLENDFACTOR_INV_SRC_ALPHA: 311 return V_028780_BLEND_ONE_MINUS_SRC_ALPHA; 312 case PIPE_BLENDFACTOR_INV_DST_ALPHA: 313 return V_028780_BLEND_ONE_MINUS_DST_ALPHA; 314 case PIPE_BLENDFACTOR_INV_DST_COLOR: 315 return V_028780_BLEND_ONE_MINUS_DST_COLOR; 316 case PIPE_BLENDFACTOR_INV_CONST_COLOR: 317 return V_028780_BLEND_ONE_MINUS_CONSTANT_COLOR; 318 case PIPE_BLENDFACTOR_INV_CONST_ALPHA: 319 return V_028780_BLEND_ONE_MINUS_CONSTANT_ALPHA; 320 case PIPE_BLENDFACTOR_SRC1_COLOR: 321 return V_028780_BLEND_SRC1_COLOR; 322 case PIPE_BLENDFACTOR_SRC1_ALPHA: 323 return V_028780_BLEND_SRC1_ALPHA; 324 case PIPE_BLENDFACTOR_INV_SRC1_COLOR: 325 return V_028780_BLEND_INV_SRC1_COLOR; 326 case PIPE_BLENDFACTOR_INV_SRC1_ALPHA: 327 return V_028780_BLEND_INV_SRC1_ALPHA; 328 default: 329 PRINT_ERR("Bad blend factor %d not supported!\n", blend_fact); 330 assert(0); 331 break; 332 } 333 return 0; 334} 335 336static uint32_t si_translate_blend_opt_function(int blend_func) 337{ 338 switch (blend_func) { 339 case PIPE_BLEND_ADD: 340 return V_028760_OPT_COMB_ADD; 341 case PIPE_BLEND_SUBTRACT: 342 return V_028760_OPT_COMB_SUBTRACT; 343 case PIPE_BLEND_REVERSE_SUBTRACT: 344 return V_028760_OPT_COMB_REVSUBTRACT; 345 case PIPE_BLEND_MIN: 346 return V_028760_OPT_COMB_MIN; 347 case PIPE_BLEND_MAX: 348 return V_028760_OPT_COMB_MAX; 349 default: 350 return V_028760_OPT_COMB_BLEND_DISABLED; 351 } 352} 353 354static uint32_t si_translate_blend_opt_factor(int blend_fact, bool is_alpha) 355{ 356 switch (blend_fact) { 357 case PIPE_BLENDFACTOR_ZERO: 358 return V_028760_BLEND_OPT_PRESERVE_NONE_IGNORE_ALL; 359 case PIPE_BLENDFACTOR_ONE: 360 return V_028760_BLEND_OPT_PRESERVE_ALL_IGNORE_NONE; 361 case PIPE_BLENDFACTOR_SRC_COLOR: 362 return is_alpha ? V_028760_BLEND_OPT_PRESERVE_A1_IGNORE_A0 363 : V_028760_BLEND_OPT_PRESERVE_C1_IGNORE_C0; 364 case PIPE_BLENDFACTOR_INV_SRC_COLOR: 365 return is_alpha ? V_028760_BLEND_OPT_PRESERVE_A0_IGNORE_A1 366 : V_028760_BLEND_OPT_PRESERVE_C0_IGNORE_C1; 367 case PIPE_BLENDFACTOR_SRC_ALPHA: 368 return V_028760_BLEND_OPT_PRESERVE_A1_IGNORE_A0; 369 case PIPE_BLENDFACTOR_INV_SRC_ALPHA: 370 return V_028760_BLEND_OPT_PRESERVE_A0_IGNORE_A1; 371 case PIPE_BLENDFACTOR_SRC_ALPHA_SATURATE: 372 return is_alpha ? V_028760_BLEND_OPT_PRESERVE_ALL_IGNORE_NONE 373 : V_028760_BLEND_OPT_PRESERVE_NONE_IGNORE_A0; 374 default: 375 return V_028760_BLEND_OPT_PRESERVE_NONE_IGNORE_NONE; 376 } 377} 378 379static void si_blend_check_commutativity(struct si_screen *sscreen, 380 struct si_state_blend *blend, 381 enum pipe_blend_func func, 382 enum pipe_blendfactor src, 383 enum pipe_blendfactor dst, 384 unsigned chanmask) 385{ 386 /* Src factor is allowed when it does not depend on Dst */ 387 static const uint32_t src_allowed = 388 (1u << PIPE_BLENDFACTOR_ONE) | 389 (1u << PIPE_BLENDFACTOR_SRC_COLOR) | 390 (1u << PIPE_BLENDFACTOR_SRC_ALPHA) | 391 (1u << PIPE_BLENDFACTOR_SRC_ALPHA_SATURATE) | 392 (1u << PIPE_BLENDFACTOR_CONST_COLOR) | 393 (1u << PIPE_BLENDFACTOR_CONST_ALPHA) | 394 (1u << PIPE_BLENDFACTOR_SRC1_COLOR) | 395 (1u << PIPE_BLENDFACTOR_SRC1_ALPHA) | 396 (1u << PIPE_BLENDFACTOR_ZERO) | 397 (1u << PIPE_BLENDFACTOR_INV_SRC_COLOR) | 398 (1u << PIPE_BLENDFACTOR_INV_SRC_ALPHA) | 399 (1u << PIPE_BLENDFACTOR_INV_CONST_COLOR) | 400 (1u << PIPE_BLENDFACTOR_INV_CONST_ALPHA) | 401 (1u << PIPE_BLENDFACTOR_INV_SRC1_COLOR) | 402 (1u << PIPE_BLENDFACTOR_INV_SRC1_ALPHA); 403 404 if (dst == PIPE_BLENDFACTOR_ONE && 405 (src_allowed & (1u << src))) { 406 /* Addition is commutative, but floating point addition isn't 407 * associative: subtle changes can be introduced via different 408 * rounding. 409 * 410 * Out-of-order is also non-deterministic, which means that 411 * this breaks OpenGL invariance requirements. So only enable 412 * out-of-order additive blending if explicitly allowed by a 413 * setting. 414 */ 415 if (func == PIPE_BLEND_MAX || func == PIPE_BLEND_MIN || 416 (func == PIPE_BLEND_ADD && sscreen->commutative_blend_add)) 417 blend->commutative_4bit |= chanmask; 418 } 419} 420 421/** 422 * Get rid of DST in the blend factors by commuting the operands: 423 * func(src * DST, dst * 0) ---> func(src * 0, dst * SRC) 424 */ 425static void si_blend_remove_dst(unsigned *func, unsigned *src_factor, 426 unsigned *dst_factor, unsigned expected_dst, 427 unsigned replacement_src) 428{ 429 if (*src_factor == expected_dst && 430 *dst_factor == PIPE_BLENDFACTOR_ZERO) { 431 *src_factor = PIPE_BLENDFACTOR_ZERO; 432 *dst_factor = replacement_src; 433 434 /* Commuting the operands requires reversing subtractions. */ 435 if (*func == PIPE_BLEND_SUBTRACT) 436 *func = PIPE_BLEND_REVERSE_SUBTRACT; 437 else if (*func == PIPE_BLEND_REVERSE_SUBTRACT) 438 *func = PIPE_BLEND_SUBTRACT; 439 } 440} 441 442static bool si_blend_factor_uses_dst(unsigned factor) 443{ 444 return factor == PIPE_BLENDFACTOR_DST_COLOR || 445 factor == PIPE_BLENDFACTOR_DST_ALPHA || 446 factor == PIPE_BLENDFACTOR_SRC_ALPHA_SATURATE || 447 factor == PIPE_BLENDFACTOR_INV_DST_ALPHA || 448 factor == PIPE_BLENDFACTOR_INV_DST_COLOR; 449} 450 451static void *si_create_blend_state_mode(struct pipe_context *ctx, 452 const struct pipe_blend_state *state, 453 unsigned mode) 454{ 455 struct si_context *sctx = (struct si_context*)ctx; 456 struct si_state_blend *blend = CALLOC_STRUCT(si_state_blend); 457 struct si_pm4_state *pm4 = &blend->pm4; 458 uint32_t sx_mrt_blend_opt[8] = {0}; 459 uint32_t color_control = 0; 460 461 if (!blend) 462 return NULL; 463 464 blend->alpha_to_coverage = state->alpha_to_coverage; 465 blend->alpha_to_one = state->alpha_to_one; 466 blend->dual_src_blend = util_blend_state_is_dual(state, 0); 467 blend->logicop_enable = state->logicop_enable; 468 469 if (state->logicop_enable) { 470 color_control |= S_028808_ROP3(state->logicop_func | (state->logicop_func << 4)); 471 } else { 472 color_control |= S_028808_ROP3(0xcc); 473 } 474 475 si_pm4_set_reg(pm4, R_028B70_DB_ALPHA_TO_MASK, 476 S_028B70_ALPHA_TO_MASK_ENABLE(state->alpha_to_coverage) | 477 S_028B70_ALPHA_TO_MASK_OFFSET0(3) | 478 S_028B70_ALPHA_TO_MASK_OFFSET1(1) | 479 S_028B70_ALPHA_TO_MASK_OFFSET2(0) | 480 S_028B70_ALPHA_TO_MASK_OFFSET3(2) | 481 S_028B70_OFFSET_ROUND(1)); 482 483 if (state->alpha_to_coverage) 484 blend->need_src_alpha_4bit |= 0xf; 485 486 blend->cb_target_mask = 0; 487 blend->cb_target_enabled_4bit = 0; 488 489 for (int i = 0; i < 8; i++) { 490 /* state->rt entries > 0 only written if independent blending */ 491 const int j = state->independent_blend_enable ? i : 0; 492 493 unsigned eqRGB = state->rt[j].rgb_func; 494 unsigned srcRGB = state->rt[j].rgb_src_factor; 495 unsigned dstRGB = state->rt[j].rgb_dst_factor; 496 unsigned eqA = state->rt[j].alpha_func; 497 unsigned srcA = state->rt[j].alpha_src_factor; 498 unsigned dstA = state->rt[j].alpha_dst_factor; 499 500 unsigned srcRGB_opt, dstRGB_opt, srcA_opt, dstA_opt; 501 unsigned blend_cntl = 0; 502 503 sx_mrt_blend_opt[i] = 504 S_028760_COLOR_COMB_FCN(V_028760_OPT_COMB_BLEND_DISABLED) | 505 S_028760_ALPHA_COMB_FCN(V_028760_OPT_COMB_BLEND_DISABLED); 506 507 /* Only set dual source blending for MRT0 to avoid a hang. */ 508 if (i >= 1 && blend->dual_src_blend) { 509 /* Vulkan does this for dual source blending. */ 510 if (i == 1) 511 blend_cntl |= S_028780_ENABLE(1); 512 513 si_pm4_set_reg(pm4, R_028780_CB_BLEND0_CONTROL + i * 4, blend_cntl); 514 continue; 515 } 516 517 /* Only addition and subtraction equations are supported with 518 * dual source blending. 519 */ 520 if (blend->dual_src_blend && 521 (eqRGB == PIPE_BLEND_MIN || eqRGB == PIPE_BLEND_MAX || 522 eqA == PIPE_BLEND_MIN || eqA == PIPE_BLEND_MAX)) { 523 assert(!"Unsupported equation for dual source blending"); 524 si_pm4_set_reg(pm4, R_028780_CB_BLEND0_CONTROL + i * 4, blend_cntl); 525 continue; 526 } 527 528 /* cb_render_state will disable unused ones */ 529 blend->cb_target_mask |= (unsigned)state->rt[j].colormask << (4 * i); 530 if (state->rt[j].colormask) 531 blend->cb_target_enabled_4bit |= 0xf << (4 * i); 532 533 if (!state->rt[j].colormask || !state->rt[j].blend_enable) { 534 si_pm4_set_reg(pm4, R_028780_CB_BLEND0_CONTROL + i * 4, blend_cntl); 535 continue; 536 } 537 538 si_blend_check_commutativity(sctx->screen, blend, 539 eqRGB, srcRGB, dstRGB, 0x7 << (4 * i)); 540 si_blend_check_commutativity(sctx->screen, blend, 541 eqA, srcA, dstA, 0x8 << (4 * i)); 542 543 /* Blending optimizations for RB+. 544 * These transformations don't change the behavior. 545 * 546 * First, get rid of DST in the blend factors: 547 * func(src * DST, dst * 0) ---> func(src * 0, dst * SRC) 548 */ 549 si_blend_remove_dst(&eqRGB, &srcRGB, &dstRGB, 550 PIPE_BLENDFACTOR_DST_COLOR, 551 PIPE_BLENDFACTOR_SRC_COLOR); 552 si_blend_remove_dst(&eqA, &srcA, &dstA, 553 PIPE_BLENDFACTOR_DST_COLOR, 554 PIPE_BLENDFACTOR_SRC_COLOR); 555 si_blend_remove_dst(&eqA, &srcA, &dstA, 556 PIPE_BLENDFACTOR_DST_ALPHA, 557 PIPE_BLENDFACTOR_SRC_ALPHA); 558 559 /* Look up the ideal settings from tables. */ 560 srcRGB_opt = si_translate_blend_opt_factor(srcRGB, false); 561 dstRGB_opt = si_translate_blend_opt_factor(dstRGB, false); 562 srcA_opt = si_translate_blend_opt_factor(srcA, true); 563 dstA_opt = si_translate_blend_opt_factor(dstA, true); 564 565 /* Handle interdependencies. */ 566 if (si_blend_factor_uses_dst(srcRGB)) 567 dstRGB_opt = V_028760_BLEND_OPT_PRESERVE_NONE_IGNORE_NONE; 568 if (si_blend_factor_uses_dst(srcA)) 569 dstA_opt = V_028760_BLEND_OPT_PRESERVE_NONE_IGNORE_NONE; 570 571 if (srcRGB == PIPE_BLENDFACTOR_SRC_ALPHA_SATURATE && 572 (dstRGB == PIPE_BLENDFACTOR_ZERO || 573 dstRGB == PIPE_BLENDFACTOR_SRC_ALPHA || 574 dstRGB == PIPE_BLENDFACTOR_SRC_ALPHA_SATURATE)) 575 dstRGB_opt = V_028760_BLEND_OPT_PRESERVE_NONE_IGNORE_A0; 576 577 /* Set the final value. */ 578 sx_mrt_blend_opt[i] = 579 S_028760_COLOR_SRC_OPT(srcRGB_opt) | 580 S_028760_COLOR_DST_OPT(dstRGB_opt) | 581 S_028760_COLOR_COMB_FCN(si_translate_blend_opt_function(eqRGB)) | 582 S_028760_ALPHA_SRC_OPT(srcA_opt) | 583 S_028760_ALPHA_DST_OPT(dstA_opt) | 584 S_028760_ALPHA_COMB_FCN(si_translate_blend_opt_function(eqA)); 585 586 /* Set blend state. */ 587 blend_cntl |= S_028780_ENABLE(1); 588 blend_cntl |= S_028780_COLOR_COMB_FCN(si_translate_blend_function(eqRGB)); 589 blend_cntl |= S_028780_COLOR_SRCBLEND(si_translate_blend_factor(srcRGB)); 590 blend_cntl |= S_028780_COLOR_DESTBLEND(si_translate_blend_factor(dstRGB)); 591 592 if (srcA != srcRGB || dstA != dstRGB || eqA != eqRGB) { 593 blend_cntl |= S_028780_SEPARATE_ALPHA_BLEND(1); 594 blend_cntl |= S_028780_ALPHA_COMB_FCN(si_translate_blend_function(eqA)); 595 blend_cntl |= S_028780_ALPHA_SRCBLEND(si_translate_blend_factor(srcA)); 596 blend_cntl |= S_028780_ALPHA_DESTBLEND(si_translate_blend_factor(dstA)); 597 } 598 si_pm4_set_reg(pm4, R_028780_CB_BLEND0_CONTROL + i * 4, blend_cntl); 599 600 blend->blend_enable_4bit |= 0xfu << (i * 4); 601 602 /* This is only important for formats without alpha. */ 603 if (srcRGB == PIPE_BLENDFACTOR_SRC_ALPHA || 604 dstRGB == PIPE_BLENDFACTOR_SRC_ALPHA || 605 srcRGB == PIPE_BLENDFACTOR_SRC_ALPHA_SATURATE || 606 dstRGB == PIPE_BLENDFACTOR_SRC_ALPHA_SATURATE || 607 srcRGB == PIPE_BLENDFACTOR_INV_SRC_ALPHA || 608 dstRGB == PIPE_BLENDFACTOR_INV_SRC_ALPHA) 609 blend->need_src_alpha_4bit |= 0xfu << (i * 4); 610 } 611 612 if (blend->cb_target_mask) { 613 color_control |= S_028808_MODE(mode); 614 } else { 615 color_control |= S_028808_MODE(V_028808_CB_DISABLE); 616 } 617 618 if (sctx->screen->rbplus_allowed) { 619 /* Disable RB+ blend optimizations for dual source blending. 620 * Vulkan does this. 621 */ 622 if (blend->dual_src_blend) { 623 for (int i = 0; i < 8; i++) { 624 sx_mrt_blend_opt[i] = 625 S_028760_COLOR_COMB_FCN(V_028760_OPT_COMB_NONE) | 626 S_028760_ALPHA_COMB_FCN(V_028760_OPT_COMB_NONE); 627 } 628 } 629 630 for (int i = 0; i < 8; i++) 631 si_pm4_set_reg(pm4, R_028760_SX_MRT0_BLEND_OPT + i * 4, 632 sx_mrt_blend_opt[i]); 633 634 /* RB+ doesn't work with dual source blending, logic op, and RESOLVE. */ 635 if (blend->dual_src_blend || state->logicop_enable || 636 mode == V_028808_CB_RESOLVE) 637 color_control |= S_028808_DISABLE_DUAL_QUAD(1); 638 } 639 640 si_pm4_set_reg(pm4, R_028808_CB_COLOR_CONTROL, color_control); 641 return blend; 642} 643 644static void *si_create_blend_state(struct pipe_context *ctx, 645 const struct pipe_blend_state *state) 646{ 647 return si_create_blend_state_mode(ctx, state, V_028808_CB_NORMAL); 648} 649 650static void si_bind_blend_state(struct pipe_context *ctx, void *state) 651{ 652 struct si_context *sctx = (struct si_context *)ctx; 653 struct si_state_blend *old_blend = sctx->queued.named.blend; 654 struct si_state_blend *blend = (struct si_state_blend *)state; 655 656 if (!state) 657 return; 658 659 si_pm4_bind_state(sctx, blend, state); 660 661 if (!old_blend || 662 old_blend->cb_target_mask != blend->cb_target_mask || 663 old_blend->dual_src_blend != blend->dual_src_blend || 664 (old_blend->blend_enable_4bit != blend->blend_enable_4bit && 665 sctx->framebuffer.nr_samples >= 2 && 666 sctx->screen->dcc_msaa_allowed)) 667 si_mark_atom_dirty(sctx, &sctx->atoms.s.cb_render_state); 668 669 if (!old_blend || 670 old_blend->cb_target_mask != blend->cb_target_mask || 671 old_blend->alpha_to_coverage != blend->alpha_to_coverage || 672 old_blend->alpha_to_one != blend->alpha_to_one || 673 old_blend->dual_src_blend != blend->dual_src_blend || 674 old_blend->blend_enable_4bit != blend->blend_enable_4bit || 675 old_blend->need_src_alpha_4bit != blend->need_src_alpha_4bit) 676 sctx->do_update_shaders = true; 677 678 if (sctx->screen->dpbb_allowed && 679 (!old_blend || 680 old_blend->alpha_to_coverage != blend->alpha_to_coverage || 681 old_blend->blend_enable_4bit != blend->blend_enable_4bit || 682 old_blend->cb_target_enabled_4bit != blend->cb_target_enabled_4bit)) 683 si_mark_atom_dirty(sctx, &sctx->atoms.s.dpbb_state); 684 685 if (sctx->screen->has_out_of_order_rast && 686 (!old_blend || 687 (old_blend->blend_enable_4bit != blend->blend_enable_4bit || 688 old_blend->cb_target_enabled_4bit != blend->cb_target_enabled_4bit || 689 old_blend->commutative_4bit != blend->commutative_4bit || 690 old_blend->logicop_enable != blend->logicop_enable))) 691 si_mark_atom_dirty(sctx, &sctx->atoms.s.msaa_config); 692} 693 694static void si_delete_blend_state(struct pipe_context *ctx, void *state) 695{ 696 struct si_context *sctx = (struct si_context *)ctx; 697 si_pm4_delete_state(sctx, blend, (struct si_state_blend *)state); 698} 699 700static void si_set_blend_color(struct pipe_context *ctx, 701 const struct pipe_blend_color *state) 702{ 703 struct si_context *sctx = (struct si_context *)ctx; 704 static const struct pipe_blend_color zeros; 705 706 sctx->blend_color.state = *state; 707 sctx->blend_color.any_nonzeros = memcmp(state, &zeros, sizeof(*state)) != 0; 708 si_mark_atom_dirty(sctx, &sctx->atoms.s.blend_color); 709} 710 711static void si_emit_blend_color(struct si_context *sctx) 712{ 713 struct radeon_cmdbuf *cs = sctx->gfx_cs; 714 715 radeon_set_context_reg_seq(cs, R_028414_CB_BLEND_RED, 4); 716 radeon_emit_array(cs, (uint32_t*)sctx->blend_color.state.color, 4); 717} 718 719/* 720 * Clipping 721 */ 722 723static void si_set_clip_state(struct pipe_context *ctx, 724 const struct pipe_clip_state *state) 725{ 726 struct si_context *sctx = (struct si_context *)ctx; 727 struct pipe_constant_buffer cb; 728 static const struct pipe_clip_state zeros; 729 730 if (memcmp(&sctx->clip_state.state, state, sizeof(*state)) == 0) 731 return; 732 733 sctx->clip_state.state = *state; 734 sctx->clip_state.any_nonzeros = memcmp(state, &zeros, sizeof(*state)) != 0; 735 si_mark_atom_dirty(sctx, &sctx->atoms.s.clip_state); 736 737 cb.buffer = NULL; 738 cb.user_buffer = state->ucp; 739 cb.buffer_offset = 0; 740 cb.buffer_size = 4*4*8; 741 si_set_rw_buffer(sctx, SI_VS_CONST_CLIP_PLANES, &cb); 742 pipe_resource_reference(&cb.buffer, NULL); 743} 744 745static void si_emit_clip_state(struct si_context *sctx) 746{ 747 struct radeon_cmdbuf *cs = sctx->gfx_cs; 748 749 radeon_set_context_reg_seq(cs, R_0285BC_PA_CL_UCP_0_X, 6*4); 750 radeon_emit_array(cs, (uint32_t*)sctx->clip_state.state.ucp, 6*4); 751} 752 753static void si_emit_clip_regs(struct si_context *sctx) 754{ 755 struct si_shader *vs = si_get_vs_state(sctx); 756 struct si_shader_selector *vs_sel = vs->selector; 757 struct tgsi_shader_info *info = &vs_sel->info; 758 struct si_state_rasterizer *rs = sctx->queued.named.rasterizer; 759 unsigned window_space = 760 info->properties[TGSI_PROPERTY_VS_WINDOW_SPACE_POSITION]; 761 unsigned clipdist_mask = vs_sel->clipdist_mask; 762 unsigned ucp_mask = clipdist_mask ? 0 : rs->clip_plane_enable & SIX_BITS; 763 unsigned culldist_mask = vs_sel->culldist_mask; 764 unsigned total_mask; 765 766 if (vs->key.opt.clip_disable) { 767 assert(!info->culldist_writemask); 768 clipdist_mask = 0; 769 culldist_mask = 0; 770 } 771 total_mask = clipdist_mask | culldist_mask; 772 773 /* Clip distances on points have no effect, so need to be implemented 774 * as cull distances. This applies for the clipvertex case as well. 775 * 776 * Setting this for primitives other than points should have no adverse 777 * effects. 778 */ 779 clipdist_mask &= rs->clip_plane_enable; 780 culldist_mask |= clipdist_mask; 781 782 unsigned initial_cdw = sctx->gfx_cs->current.cdw; 783 radeon_opt_set_context_reg(sctx, R_02881C_PA_CL_VS_OUT_CNTL, 784 SI_TRACKED_PA_CL_VS_OUT_CNTL, 785 vs_sel->pa_cl_vs_out_cntl | 786 S_02881C_VS_OUT_CCDIST0_VEC_ENA((total_mask & 0x0F) != 0) | 787 S_02881C_VS_OUT_CCDIST1_VEC_ENA((total_mask & 0xF0) != 0) | 788 clipdist_mask | (culldist_mask << 8)); 789 radeon_opt_set_context_reg(sctx, R_028810_PA_CL_CLIP_CNTL, 790 SI_TRACKED_PA_CL_CLIP_CNTL, 791 rs->pa_cl_clip_cntl | 792 ucp_mask | 793 S_028810_CLIP_DISABLE(window_space)); 794 795 if (initial_cdw != sctx->gfx_cs->current.cdw) 796 sctx->context_roll = true; 797} 798 799/* 800 * inferred state between framebuffer and rasterizer 801 */ 802static void si_update_poly_offset_state(struct si_context *sctx) 803{ 804 struct si_state_rasterizer *rs = sctx->queued.named.rasterizer; 805 806 if (!rs || !rs->uses_poly_offset || !sctx->framebuffer.state.zsbuf) { 807 si_pm4_bind_state(sctx, poly_offset, NULL); 808 return; 809 } 810 811 /* Use the user format, not db_render_format, so that the polygon 812 * offset behaves as expected by applications. 813 */ 814 switch (sctx->framebuffer.state.zsbuf->texture->format) { 815 case PIPE_FORMAT_Z16_UNORM: 816 si_pm4_bind_state(sctx, poly_offset, &rs->pm4_poly_offset[0]); 817 break; 818 default: /* 24-bit */ 819 si_pm4_bind_state(sctx, poly_offset, &rs->pm4_poly_offset[1]); 820 break; 821 case PIPE_FORMAT_Z32_FLOAT: 822 case PIPE_FORMAT_Z32_FLOAT_S8X24_UINT: 823 si_pm4_bind_state(sctx, poly_offset, &rs->pm4_poly_offset[2]); 824 break; 825 } 826} 827 828/* 829 * Rasterizer 830 */ 831 832static uint32_t si_translate_fill(uint32_t func) 833{ 834 switch(func) { 835 case PIPE_POLYGON_MODE_FILL: 836 return V_028814_X_DRAW_TRIANGLES; 837 case PIPE_POLYGON_MODE_LINE: 838 return V_028814_X_DRAW_LINES; 839 case PIPE_POLYGON_MODE_POINT: 840 return V_028814_X_DRAW_POINTS; 841 default: 842 assert(0); 843 return V_028814_X_DRAW_POINTS; 844 } 845} 846 847static void *si_create_rs_state(struct pipe_context *ctx, 848 const struct pipe_rasterizer_state *state) 849{ 850 struct si_screen *sscreen = ((struct si_context *)ctx)->screen; 851 struct si_state_rasterizer *rs = CALLOC_STRUCT(si_state_rasterizer); 852 struct si_pm4_state *pm4 = &rs->pm4; 853 unsigned tmp, i; 854 float psize_min, psize_max; 855 856 if (!rs) { 857 return NULL; 858 } 859 860 rs->scissor_enable = state->scissor; 861 rs->clip_halfz = state->clip_halfz; 862 rs->two_side = state->light_twoside; 863 rs->multisample_enable = state->multisample; 864 rs->force_persample_interp = state->force_persample_interp; 865 rs->clip_plane_enable = state->clip_plane_enable; 866 rs->half_pixel_center = state->half_pixel_center; 867 rs->line_stipple_enable = state->line_stipple_enable; 868 rs->poly_stipple_enable = state->poly_stipple_enable; 869 rs->line_smooth = state->line_smooth; 870 rs->line_width = state->line_width; 871 rs->poly_smooth = state->poly_smooth; 872 rs->uses_poly_offset = state->offset_point || state->offset_line || 873 state->offset_tri; 874 rs->clamp_fragment_color = state->clamp_fragment_color; 875 rs->clamp_vertex_color = state->clamp_vertex_color; 876 rs->flatshade = state->flatshade; 877 rs->sprite_coord_enable = state->sprite_coord_enable; 878 rs->rasterizer_discard = state->rasterizer_discard; 879 rs->pa_sc_line_stipple = state->line_stipple_enable ? 880 S_028A0C_LINE_PATTERN(state->line_stipple_pattern) | 881 S_028A0C_REPEAT_COUNT(state->line_stipple_factor) : 0; 882 rs->pa_cl_clip_cntl = 883 S_028810_DX_CLIP_SPACE_DEF(state->clip_halfz) | 884 S_028810_ZCLIP_NEAR_DISABLE(!state->depth_clip_near) | 885 S_028810_ZCLIP_FAR_DISABLE(!state->depth_clip_far) | 886 S_028810_DX_RASTERIZATION_KILL(state->rasterizer_discard) | 887 S_028810_DX_LINEAR_ATTR_CLIP_ENA(1); 888 889 si_pm4_set_reg(pm4, R_0286D4_SPI_INTERP_CONTROL_0, 890 S_0286D4_FLAT_SHADE_ENA(1) | 891 S_0286D4_PNT_SPRITE_ENA(state->point_quad_rasterization) | 892 S_0286D4_PNT_SPRITE_OVRD_X(V_0286D4_SPI_PNT_SPRITE_SEL_S) | 893 S_0286D4_PNT_SPRITE_OVRD_Y(V_0286D4_SPI_PNT_SPRITE_SEL_T) | 894 S_0286D4_PNT_SPRITE_OVRD_Z(V_0286D4_SPI_PNT_SPRITE_SEL_0) | 895 S_0286D4_PNT_SPRITE_OVRD_W(V_0286D4_SPI_PNT_SPRITE_SEL_1) | 896 S_0286D4_PNT_SPRITE_TOP_1(state->sprite_coord_mode != PIPE_SPRITE_COORD_UPPER_LEFT)); 897 898 /* point size 12.4 fixed point */ 899 tmp = (unsigned)(state->point_size * 8.0); 900 si_pm4_set_reg(pm4, R_028A00_PA_SU_POINT_SIZE, S_028A00_HEIGHT(tmp) | S_028A00_WIDTH(tmp)); 901 902 if (state->point_size_per_vertex) { 903 psize_min = util_get_min_point_size(state); 904 psize_max = SI_MAX_POINT_SIZE; 905 } else { 906 /* Force the point size to be as if the vertex output was disabled. */ 907 psize_min = state->point_size; 908 psize_max = state->point_size; 909 } 910 rs->max_point_size = psize_max; 911 912 /* Divide by two, because 0.5 = 1 pixel. */ 913 si_pm4_set_reg(pm4, R_028A04_PA_SU_POINT_MINMAX, 914 S_028A04_MIN_SIZE(si_pack_float_12p4(psize_min/2)) | 915 S_028A04_MAX_SIZE(si_pack_float_12p4(psize_max/2))); 916 917 si_pm4_set_reg(pm4, R_028A08_PA_SU_LINE_CNTL, 918 S_028A08_WIDTH(si_pack_float_12p4(state->line_width/2))); 919 si_pm4_set_reg(pm4, R_028A48_PA_SC_MODE_CNTL_0, 920 S_028A48_LINE_STIPPLE_ENABLE(state->line_stipple_enable) | 921 S_028A48_MSAA_ENABLE(state->multisample || 922 state->poly_smooth || 923 state->line_smooth) | 924 S_028A48_VPORT_SCISSOR_ENABLE(1) | 925 S_028A48_ALTERNATE_RBS_PER_TILE(sscreen->info.chip_class >= GFX9)); 926 927 si_pm4_set_reg(pm4, R_028B7C_PA_SU_POLY_OFFSET_CLAMP, fui(state->offset_clamp)); 928 si_pm4_set_reg(pm4, R_028814_PA_SU_SC_MODE_CNTL, 929 S_028814_PROVOKING_VTX_LAST(!state->flatshade_first) | 930 S_028814_CULL_FRONT((state->cull_face & PIPE_FACE_FRONT) ? 1 : 0) | 931 S_028814_CULL_BACK((state->cull_face & PIPE_FACE_BACK) ? 1 : 0) | 932 S_028814_FACE(!state->front_ccw) | 933 S_028814_POLY_OFFSET_FRONT_ENABLE(util_get_offset(state, state->fill_front)) | 934 S_028814_POLY_OFFSET_BACK_ENABLE(util_get_offset(state, state->fill_back)) | 935 S_028814_POLY_OFFSET_PARA_ENABLE(state->offset_point || state->offset_line) | 936 S_028814_POLY_MODE(state->fill_front != PIPE_POLYGON_MODE_FILL || 937 state->fill_back != PIPE_POLYGON_MODE_FILL) | 938 S_028814_POLYMODE_FRONT_PTYPE(si_translate_fill(state->fill_front)) | 939 S_028814_POLYMODE_BACK_PTYPE(si_translate_fill(state->fill_back))); 940 941 if (!rs->uses_poly_offset) 942 return rs; 943 944 rs->pm4_poly_offset = CALLOC(3, sizeof(struct si_pm4_state)); 945 if (!rs->pm4_poly_offset) { 946 FREE(rs); 947 return NULL; 948 } 949 950 /* Precalculate polygon offset states for 16-bit, 24-bit, and 32-bit zbuffers. */ 951 for (i = 0; i < 3; i++) { 952 struct si_pm4_state *pm4 = &rs->pm4_poly_offset[i]; 953 float offset_units = state->offset_units; 954 float offset_scale = state->offset_scale * 16.0f; 955 uint32_t pa_su_poly_offset_db_fmt_cntl = 0; 956 957 if (!state->offset_units_unscaled) { 958 switch (i) { 959 case 0: /* 16-bit zbuffer */ 960 offset_units *= 4.0f; 961 pa_su_poly_offset_db_fmt_cntl = 962 S_028B78_POLY_OFFSET_NEG_NUM_DB_BITS(-16); 963 break; 964 case 1: /* 24-bit zbuffer */ 965 offset_units *= 2.0f; 966 pa_su_poly_offset_db_fmt_cntl = 967 S_028B78_POLY_OFFSET_NEG_NUM_DB_BITS(-24); 968 break; 969 case 2: /* 32-bit zbuffer */ 970 offset_units *= 1.0f; 971 pa_su_poly_offset_db_fmt_cntl = S_028B78_POLY_OFFSET_NEG_NUM_DB_BITS(-23) | 972 S_028B78_POLY_OFFSET_DB_IS_FLOAT_FMT(1); 973 break; 974 } 975 } 976 977 si_pm4_set_reg(pm4, R_028B80_PA_SU_POLY_OFFSET_FRONT_SCALE, 978 fui(offset_scale)); 979 si_pm4_set_reg(pm4, R_028B84_PA_SU_POLY_OFFSET_FRONT_OFFSET, 980 fui(offset_units)); 981 si_pm4_set_reg(pm4, R_028B88_PA_SU_POLY_OFFSET_BACK_SCALE, 982 fui(offset_scale)); 983 si_pm4_set_reg(pm4, R_028B8C_PA_SU_POLY_OFFSET_BACK_OFFSET, 984 fui(offset_units)); 985 si_pm4_set_reg(pm4, R_028B78_PA_SU_POLY_OFFSET_DB_FMT_CNTL, 986 pa_su_poly_offset_db_fmt_cntl); 987 } 988 989 return rs; 990} 991 992static void si_bind_rs_state(struct pipe_context *ctx, void *state) 993{ 994 struct si_context *sctx = (struct si_context *)ctx; 995 struct si_state_rasterizer *old_rs = 996 (struct si_state_rasterizer*)sctx->queued.named.rasterizer; 997 struct si_state_rasterizer *rs = (struct si_state_rasterizer *)state; 998 999 if (!state) 1000 return; 1001 1002 if (!old_rs || old_rs->multisample_enable != rs->multisample_enable) { 1003 si_mark_atom_dirty(sctx, &sctx->atoms.s.db_render_state); 1004 1005 /* Update the small primitive filter workaround if necessary. */ 1006 if (sctx->screen->has_msaa_sample_loc_bug && 1007 sctx->framebuffer.nr_samples > 1) 1008 si_mark_atom_dirty(sctx, &sctx->atoms.s.msaa_sample_locs); 1009 } 1010 1011 sctx->current_vs_state &= C_VS_STATE_CLAMP_VERTEX_COLOR; 1012 sctx->current_vs_state |= S_VS_STATE_CLAMP_VERTEX_COLOR(rs->clamp_vertex_color); 1013 1014 si_pm4_bind_state(sctx, rasterizer, rs); 1015 si_update_poly_offset_state(sctx); 1016 1017 if (!old_rs || 1018 old_rs->scissor_enable != rs->scissor_enable) 1019 si_mark_atom_dirty(sctx, &sctx->atoms.s.scissors); 1020 1021 if (!old_rs || 1022 old_rs->line_width != rs->line_width || 1023 old_rs->max_point_size != rs->max_point_size || 1024 old_rs->half_pixel_center != rs->half_pixel_center) 1025 si_mark_atom_dirty(sctx, &sctx->atoms.s.guardband); 1026 1027 if (!old_rs || 1028 old_rs->clip_halfz != rs->clip_halfz) 1029 si_mark_atom_dirty(sctx, &sctx->atoms.s.viewports); 1030 1031 if (!old_rs || 1032 old_rs->clip_plane_enable != rs->clip_plane_enable || 1033 old_rs->pa_cl_clip_cntl != rs->pa_cl_clip_cntl) 1034 si_mark_atom_dirty(sctx, &sctx->atoms.s.clip_regs); 1035 1036 sctx->ia_multi_vgt_param_key.u.line_stipple_enabled = 1037 rs->line_stipple_enable; 1038 1039 if (!old_rs || 1040 old_rs->clip_plane_enable != rs->clip_plane_enable || 1041 old_rs->rasterizer_discard != rs->rasterizer_discard || 1042 old_rs->sprite_coord_enable != rs->sprite_coord_enable || 1043 old_rs->flatshade != rs->flatshade || 1044 old_rs->two_side != rs->two_side || 1045 old_rs->multisample_enable != rs->multisample_enable || 1046 old_rs->poly_stipple_enable != rs->poly_stipple_enable || 1047 old_rs->poly_smooth != rs->poly_smooth || 1048 old_rs->line_smooth != rs->line_smooth || 1049 old_rs->clamp_fragment_color != rs->clamp_fragment_color || 1050 old_rs->force_persample_interp != rs->force_persample_interp) 1051 sctx->do_update_shaders = true; 1052} 1053 1054static void si_delete_rs_state(struct pipe_context *ctx, void *state) 1055{ 1056 struct si_context *sctx = (struct si_context *)ctx; 1057 struct si_state_rasterizer *rs = (struct si_state_rasterizer *)state; 1058 1059 if (sctx->queued.named.rasterizer == state) 1060 si_pm4_bind_state(sctx, poly_offset, NULL); 1061 1062 FREE(rs->pm4_poly_offset); 1063 si_pm4_delete_state(sctx, rasterizer, rs); 1064} 1065 1066/* 1067 * infeered state between dsa and stencil ref 1068 */ 1069static void si_emit_stencil_ref(struct si_context *sctx) 1070{ 1071 struct radeon_cmdbuf *cs = sctx->gfx_cs; 1072 struct pipe_stencil_ref *ref = &sctx->stencil_ref.state; 1073 struct si_dsa_stencil_ref_part *dsa = &sctx->stencil_ref.dsa_part; 1074 1075 radeon_set_context_reg_seq(cs, R_028430_DB_STENCILREFMASK, 2); 1076 radeon_emit(cs, S_028430_STENCILTESTVAL(ref->ref_value[0]) | 1077 S_028430_STENCILMASK(dsa->valuemask[0]) | 1078 S_028430_STENCILWRITEMASK(dsa->writemask[0]) | 1079 S_028430_STENCILOPVAL(1)); 1080 radeon_emit(cs, S_028434_STENCILTESTVAL_BF(ref->ref_value[1]) | 1081 S_028434_STENCILMASK_BF(dsa->valuemask[1]) | 1082 S_028434_STENCILWRITEMASK_BF(dsa->writemask[1]) | 1083 S_028434_STENCILOPVAL_BF(1)); 1084} 1085 1086static void si_set_stencil_ref(struct pipe_context *ctx, 1087 const struct pipe_stencil_ref *state) 1088{ 1089 struct si_context *sctx = (struct si_context *)ctx; 1090 1091 if (memcmp(&sctx->stencil_ref.state, state, sizeof(*state)) == 0) 1092 return; 1093 1094 sctx->stencil_ref.state = *state; 1095 si_mark_atom_dirty(sctx, &sctx->atoms.s.stencil_ref); 1096} 1097 1098 1099/* 1100 * DSA 1101 */ 1102 1103static uint32_t si_translate_stencil_op(int s_op) 1104{ 1105 switch (s_op) { 1106 case PIPE_STENCIL_OP_KEEP: 1107 return V_02842C_STENCIL_KEEP; 1108 case PIPE_STENCIL_OP_ZERO: 1109 return V_02842C_STENCIL_ZERO; 1110 case PIPE_STENCIL_OP_REPLACE: 1111 return V_02842C_STENCIL_REPLACE_TEST; 1112 case PIPE_STENCIL_OP_INCR: 1113 return V_02842C_STENCIL_ADD_CLAMP; 1114 case PIPE_STENCIL_OP_DECR: 1115 return V_02842C_STENCIL_SUB_CLAMP; 1116 case PIPE_STENCIL_OP_INCR_WRAP: 1117 return V_02842C_STENCIL_ADD_WRAP; 1118 case PIPE_STENCIL_OP_DECR_WRAP: 1119 return V_02842C_STENCIL_SUB_WRAP; 1120 case PIPE_STENCIL_OP_INVERT: 1121 return V_02842C_STENCIL_INVERT; 1122 default: 1123 PRINT_ERR("Unknown stencil op %d", s_op); 1124 assert(0); 1125 break; 1126 } 1127 return 0; 1128} 1129 1130static bool si_dsa_writes_stencil(const struct pipe_stencil_state *s) 1131{ 1132 return s->enabled && s->writemask && 1133 (s->fail_op != PIPE_STENCIL_OP_KEEP || 1134 s->zfail_op != PIPE_STENCIL_OP_KEEP || 1135 s->zpass_op != PIPE_STENCIL_OP_KEEP); 1136} 1137 1138static bool si_order_invariant_stencil_op(enum pipe_stencil_op op) 1139{ 1140 /* REPLACE is normally order invariant, except when the stencil 1141 * reference value is written by the fragment shader. Tracking this 1142 * interaction does not seem worth the effort, so be conservative. */ 1143 return op != PIPE_STENCIL_OP_INCR && 1144 op != PIPE_STENCIL_OP_DECR && 1145 op != PIPE_STENCIL_OP_REPLACE; 1146} 1147 1148/* Compute whether, assuming Z writes are disabled, this stencil state is order 1149 * invariant in the sense that the set of passing fragments as well as the 1150 * final stencil buffer result does not depend on the order of fragments. */ 1151static bool si_order_invariant_stencil_state(const struct pipe_stencil_state *state) 1152{ 1153 return !state->enabled || !state->writemask || 1154 /* The following assumes that Z writes are disabled. */ 1155 (state->func == PIPE_FUNC_ALWAYS && 1156 si_order_invariant_stencil_op(state->zpass_op) && 1157 si_order_invariant_stencil_op(state->zfail_op)) || 1158 (state->func == PIPE_FUNC_NEVER && 1159 si_order_invariant_stencil_op(state->fail_op)); 1160} 1161 1162static void *si_create_dsa_state(struct pipe_context *ctx, 1163 const struct pipe_depth_stencil_alpha_state *state) 1164{ 1165 struct si_context *sctx = (struct si_context *)ctx; 1166 struct si_state_dsa *dsa = CALLOC_STRUCT(si_state_dsa); 1167 struct si_pm4_state *pm4 = &dsa->pm4; 1168 unsigned db_depth_control; 1169 uint32_t db_stencil_control = 0; 1170 1171 if (!dsa) { 1172 return NULL; 1173 } 1174 1175 dsa->stencil_ref.valuemask[0] = state->stencil[0].valuemask; 1176 dsa->stencil_ref.valuemask[1] = state->stencil[1].valuemask; 1177 dsa->stencil_ref.writemask[0] = state->stencil[0].writemask; 1178 dsa->stencil_ref.writemask[1] = state->stencil[1].writemask; 1179 1180 db_depth_control = S_028800_Z_ENABLE(state->depth.enabled) | 1181 S_028800_Z_WRITE_ENABLE(state->depth.writemask) | 1182 S_028800_ZFUNC(state->depth.func) | 1183 S_028800_DEPTH_BOUNDS_ENABLE(state->depth.bounds_test); 1184 1185 /* stencil */ 1186 if (state->stencil[0].enabled) { 1187 db_depth_control |= S_028800_STENCIL_ENABLE(1); 1188 db_depth_control |= S_028800_STENCILFUNC(state->stencil[0].func); 1189 db_stencil_control |= S_02842C_STENCILFAIL(si_translate_stencil_op(state->stencil[0].fail_op)); 1190 db_stencil_control |= S_02842C_STENCILZPASS(si_translate_stencil_op(state->stencil[0].zpass_op)); 1191 db_stencil_control |= S_02842C_STENCILZFAIL(si_translate_stencil_op(state->stencil[0].zfail_op)); 1192 1193 if (state->stencil[1].enabled) { 1194 db_depth_control |= S_028800_BACKFACE_ENABLE(1); 1195 db_depth_control |= S_028800_STENCILFUNC_BF(state->stencil[1].func); 1196 db_stencil_control |= S_02842C_STENCILFAIL_BF(si_translate_stencil_op(state->stencil[1].fail_op)); 1197 db_stencil_control |= S_02842C_STENCILZPASS_BF(si_translate_stencil_op(state->stencil[1].zpass_op)); 1198 db_stencil_control |= S_02842C_STENCILZFAIL_BF(si_translate_stencil_op(state->stencil[1].zfail_op)); 1199 } 1200 } 1201 1202 /* alpha */ 1203 if (state->alpha.enabled) { 1204 dsa->alpha_func = state->alpha.func; 1205 1206 si_pm4_set_reg(pm4, R_00B030_SPI_SHADER_USER_DATA_PS_0 + 1207 SI_SGPR_ALPHA_REF * 4, fui(state->alpha.ref_value)); 1208 } else { 1209 dsa->alpha_func = PIPE_FUNC_ALWAYS; 1210 } 1211 1212 si_pm4_set_reg(pm4, R_028800_DB_DEPTH_CONTROL, db_depth_control); 1213 if (state->stencil[0].enabled) 1214 si_pm4_set_reg(pm4, R_02842C_DB_STENCIL_CONTROL, db_stencil_control); 1215 if (state->depth.bounds_test) { 1216 si_pm4_set_reg(pm4, R_028020_DB_DEPTH_BOUNDS_MIN, fui(state->depth.bounds_min)); 1217 si_pm4_set_reg(pm4, R_028024_DB_DEPTH_BOUNDS_MAX, fui(state->depth.bounds_max)); 1218 } 1219 1220 dsa->depth_enabled = state->depth.enabled; 1221 dsa->depth_write_enabled = state->depth.enabled && 1222 state->depth.writemask; 1223 dsa->stencil_enabled = state->stencil[0].enabled; 1224 dsa->stencil_write_enabled = state->stencil[0].enabled && 1225 (si_dsa_writes_stencil(&state->stencil[0]) || 1226 si_dsa_writes_stencil(&state->stencil[1])); 1227 dsa->db_can_write = dsa->depth_write_enabled || 1228 dsa->stencil_write_enabled; 1229 1230 bool zfunc_is_ordered = 1231 state->depth.func == PIPE_FUNC_NEVER || 1232 state->depth.func == PIPE_FUNC_LESS || 1233 state->depth.func == PIPE_FUNC_LEQUAL || 1234 state->depth.func == PIPE_FUNC_GREATER || 1235 state->depth.func == PIPE_FUNC_GEQUAL; 1236 1237 bool nozwrite_and_order_invariant_stencil = 1238 !dsa->db_can_write || 1239 (!dsa->depth_write_enabled && 1240 si_order_invariant_stencil_state(&state->stencil[0]) && 1241 si_order_invariant_stencil_state(&state->stencil[1])); 1242 1243 dsa->order_invariance[1].zs = 1244 nozwrite_and_order_invariant_stencil || 1245 (!dsa->stencil_write_enabled && zfunc_is_ordered); 1246 dsa->order_invariance[0].zs = !dsa->depth_write_enabled || zfunc_is_ordered; 1247 1248 dsa->order_invariance[1].pass_set = 1249 nozwrite_and_order_invariant_stencil || 1250 (!dsa->stencil_write_enabled && 1251 (state->depth.func == PIPE_FUNC_ALWAYS || 1252 state->depth.func == PIPE_FUNC_NEVER)); 1253 dsa->order_invariance[0].pass_set = 1254 !dsa->depth_write_enabled || 1255 (state->depth.func == PIPE_FUNC_ALWAYS || 1256 state->depth.func == PIPE_FUNC_NEVER); 1257 1258 dsa->order_invariance[1].pass_last = 1259 sctx->screen->assume_no_z_fights && 1260 !dsa->stencil_write_enabled && 1261 dsa->depth_write_enabled && zfunc_is_ordered; 1262 dsa->order_invariance[0].pass_last = 1263 sctx->screen->assume_no_z_fights && 1264 dsa->depth_write_enabled && zfunc_is_ordered; 1265 1266 return dsa; 1267} 1268 1269static void si_bind_dsa_state(struct pipe_context *ctx, void *state) 1270{ 1271 struct si_context *sctx = (struct si_context *)ctx; 1272 struct si_state_dsa *old_dsa = sctx->queued.named.dsa; 1273 struct si_state_dsa *dsa = state; 1274 1275 if (!state) 1276 return; 1277 1278 si_pm4_bind_state(sctx, dsa, dsa); 1279 1280 if (memcmp(&dsa->stencil_ref, &sctx->stencil_ref.dsa_part, 1281 sizeof(struct si_dsa_stencil_ref_part)) != 0) { 1282 sctx->stencil_ref.dsa_part = dsa->stencil_ref; 1283 si_mark_atom_dirty(sctx, &sctx->atoms.s.stencil_ref); 1284 } 1285 1286 if (!old_dsa || old_dsa->alpha_func != dsa->alpha_func) 1287 sctx->do_update_shaders = true; 1288 1289 if (sctx->screen->dpbb_allowed && 1290 (!old_dsa || 1291 (old_dsa->depth_enabled != dsa->depth_enabled || 1292 old_dsa->stencil_enabled != dsa->stencil_enabled || 1293 old_dsa->db_can_write != dsa->db_can_write))) 1294 si_mark_atom_dirty(sctx, &sctx->atoms.s.dpbb_state); 1295 1296 if (sctx->screen->has_out_of_order_rast && 1297 (!old_dsa || 1298 memcmp(old_dsa->order_invariance, dsa->order_invariance, 1299 sizeof(old_dsa->order_invariance)))) 1300 si_mark_atom_dirty(sctx, &sctx->atoms.s.msaa_config); 1301} 1302 1303static void si_delete_dsa_state(struct pipe_context *ctx, void *state) 1304{ 1305 struct si_context *sctx = (struct si_context *)ctx; 1306 si_pm4_delete_state(sctx, dsa, (struct si_state_dsa *)state); 1307} 1308 1309static void *si_create_db_flush_dsa(struct si_context *sctx) 1310{ 1311 struct pipe_depth_stencil_alpha_state dsa = {}; 1312 1313 return sctx->b.create_depth_stencil_alpha_state(&sctx->b, &dsa); 1314} 1315 1316/* DB RENDER STATE */ 1317 1318static void si_set_active_query_state(struct pipe_context *ctx, boolean enable) 1319{ 1320 struct si_context *sctx = (struct si_context*)ctx; 1321 1322 /* Pipeline stat & streamout queries. */ 1323 if (enable) { 1324 sctx->flags &= ~SI_CONTEXT_STOP_PIPELINE_STATS; 1325 sctx->flags |= SI_CONTEXT_START_PIPELINE_STATS; 1326 } else { 1327 sctx->flags &= ~SI_CONTEXT_START_PIPELINE_STATS; 1328 sctx->flags |= SI_CONTEXT_STOP_PIPELINE_STATS; 1329 } 1330 1331 /* Occlusion queries. */ 1332 if (sctx->occlusion_queries_disabled != !enable) { 1333 sctx->occlusion_queries_disabled = !enable; 1334 si_mark_atom_dirty(sctx, &sctx->atoms.s.db_render_state); 1335 } 1336} 1337 1338void si_set_occlusion_query_state(struct si_context *sctx, 1339 bool old_perfect_enable) 1340{ 1341 si_mark_atom_dirty(sctx, &sctx->atoms.s.db_render_state); 1342 1343 bool perfect_enable = sctx->num_perfect_occlusion_queries != 0; 1344 1345 if (perfect_enable != old_perfect_enable) 1346 si_mark_atom_dirty(sctx, &sctx->atoms.s.msaa_config); 1347} 1348 1349void si_save_qbo_state(struct si_context *sctx, struct si_qbo_state *st) 1350{ 1351 st->saved_compute = sctx->cs_shader_state.program; 1352 1353 si_get_pipe_constant_buffer(sctx, PIPE_SHADER_COMPUTE, 0, &st->saved_const0); 1354 si_get_shader_buffers(sctx, PIPE_SHADER_COMPUTE, 0, 3, st->saved_ssbo); 1355 1356 st->saved_ssbo_writable_mask = 0; 1357 1358 for (unsigned i = 0; i < 3; i++) { 1359 if (sctx->const_and_shader_buffers[PIPE_SHADER_COMPUTE].writable_mask & 1360 (1u << si_get_shaderbuf_slot(i))) 1361 st->saved_ssbo_writable_mask |= 1 << i; 1362 } 1363} 1364 1365static void si_emit_db_render_state(struct si_context *sctx) 1366{ 1367 struct si_state_rasterizer *rs = sctx->queued.named.rasterizer; 1368 unsigned db_shader_control, db_render_control, db_count_control; 1369 unsigned initial_cdw = sctx->gfx_cs->current.cdw; 1370 1371 /* DB_RENDER_CONTROL */ 1372 if (sctx->dbcb_depth_copy_enabled || 1373 sctx->dbcb_stencil_copy_enabled) { 1374 db_render_control = 1375 S_028000_DEPTH_COPY(sctx->dbcb_depth_copy_enabled) | 1376 S_028000_STENCIL_COPY(sctx->dbcb_stencil_copy_enabled) | 1377 S_028000_COPY_CENTROID(1) | 1378 S_028000_COPY_SAMPLE(sctx->dbcb_copy_sample); 1379 } else if (sctx->db_flush_depth_inplace || sctx->db_flush_stencil_inplace) { 1380 db_render_control = 1381 S_028000_DEPTH_COMPRESS_DISABLE(sctx->db_flush_depth_inplace) | 1382 S_028000_STENCIL_COMPRESS_DISABLE(sctx->db_flush_stencil_inplace); 1383 } else { 1384 db_render_control = 1385 S_028000_DEPTH_CLEAR_ENABLE(sctx->db_depth_clear) | 1386 S_028000_STENCIL_CLEAR_ENABLE(sctx->db_stencil_clear); 1387 } 1388 1389 /* DB_COUNT_CONTROL (occlusion queries) */ 1390 if (sctx->num_occlusion_queries > 0 && 1391 !sctx->occlusion_queries_disabled) { 1392 bool perfect = sctx->num_perfect_occlusion_queries > 0; 1393 1394 if (sctx->chip_class >= CIK) { 1395 unsigned log_sample_rate = sctx->framebuffer.log_samples; 1396 1397 /* Stoney doesn't increment occlusion query counters 1398 * if the sample rate is 16x. Use 8x sample rate instead. 1399 */ 1400 if (sctx->family == CHIP_STONEY) 1401 log_sample_rate = MIN2(log_sample_rate, 3); 1402 1403 db_count_control = 1404 S_028004_PERFECT_ZPASS_COUNTS(perfect) | 1405 S_028004_SAMPLE_RATE(log_sample_rate) | 1406 S_028004_ZPASS_ENABLE(1) | 1407 S_028004_SLICE_EVEN_ENABLE(1) | 1408 S_028004_SLICE_ODD_ENABLE(1); 1409 } else { 1410 db_count_control = 1411 S_028004_PERFECT_ZPASS_COUNTS(perfect) | 1412 S_028004_SAMPLE_RATE(sctx->framebuffer.log_samples); 1413 } 1414 } else { 1415 /* Disable occlusion queries. */ 1416 if (sctx->chip_class >= CIK) { 1417 db_count_control = 0; 1418 } else { 1419 db_count_control = S_028004_ZPASS_INCREMENT_DISABLE(1); 1420 } 1421 } 1422 1423 radeon_opt_set_context_reg2(sctx, R_028000_DB_RENDER_CONTROL, 1424 SI_TRACKED_DB_RENDER_CONTROL, db_render_control, 1425 db_count_control); 1426 1427 /* DB_RENDER_OVERRIDE2 */ 1428 radeon_opt_set_context_reg(sctx, R_028010_DB_RENDER_OVERRIDE2, 1429 SI_TRACKED_DB_RENDER_OVERRIDE2, 1430 S_028010_DISABLE_ZMASK_EXPCLEAR_OPTIMIZATION(sctx->db_depth_disable_expclear) | 1431 S_028010_DISABLE_SMEM_EXPCLEAR_OPTIMIZATION(sctx->db_stencil_disable_expclear) | 1432 S_028010_DECOMPRESS_Z_ON_FLUSH(sctx->framebuffer.nr_samples >= 4)); 1433 1434 db_shader_control = sctx->ps_db_shader_control; 1435 1436 /* Bug workaround for smoothing (overrasterization) on SI. */ 1437 if (sctx->chip_class == SI && sctx->smoothing_enabled) { 1438 db_shader_control &= C_02880C_Z_ORDER; 1439 db_shader_control |= S_02880C_Z_ORDER(V_02880C_LATE_Z); 1440 } 1441 1442 /* Disable the gl_SampleMask fragment shader output if MSAA is disabled. */ 1443 if (!rs->multisample_enable) 1444 db_shader_control &= C_02880C_MASK_EXPORT_ENABLE; 1445 1446 if (sctx->screen->has_rbplus && 1447 !sctx->screen->rbplus_allowed) 1448 db_shader_control |= S_02880C_DUAL_QUAD_DISABLE(1); 1449 1450 radeon_opt_set_context_reg(sctx, R_02880C_DB_SHADER_CONTROL, 1451 SI_TRACKED_DB_SHADER_CONTROL, db_shader_control); 1452 1453 if (initial_cdw != sctx->gfx_cs->current.cdw) 1454 sctx->context_roll = true; 1455} 1456 1457/* 1458 * format translation 1459 */ 1460static uint32_t si_translate_colorformat(enum pipe_format format) 1461{ 1462 const struct util_format_description *desc = util_format_description(format); 1463 if (!desc) 1464 return V_028C70_COLOR_INVALID; 1465 1466#define HAS_SIZE(x,y,z,w) \ 1467 (desc->channel[0].size == (x) && desc->channel[1].size == (y) && \ 1468 desc->channel[2].size == (z) && desc->channel[3].size == (w)) 1469 1470 if (format == PIPE_FORMAT_R11G11B10_FLOAT) /* isn't plain */ 1471 return V_028C70_COLOR_10_11_11; 1472 1473 if (desc->layout != UTIL_FORMAT_LAYOUT_PLAIN) 1474 return V_028C70_COLOR_INVALID; 1475 1476 /* hw cannot support mixed formats (except depth/stencil, since 1477 * stencil is not written to). */ 1478 if (desc->is_mixed && desc->colorspace != UTIL_FORMAT_COLORSPACE_ZS) 1479 return V_028C70_COLOR_INVALID; 1480 1481 switch (desc->nr_channels) { 1482 case 1: 1483 switch (desc->channel[0].size) { 1484 case 8: 1485 return V_028C70_COLOR_8; 1486 case 16: 1487 return V_028C70_COLOR_16; 1488 case 32: 1489 return V_028C70_COLOR_32; 1490 } 1491 break; 1492 case 2: 1493 if (desc->channel[0].size == desc->channel[1].size) { 1494 switch (desc->channel[0].size) { 1495 case 8: 1496 return V_028C70_COLOR_8_8; 1497 case 16: 1498 return V_028C70_COLOR_16_16; 1499 case 32: 1500 return V_028C70_COLOR_32_32; 1501 } 1502 } else if (HAS_SIZE(8,24,0,0)) { 1503 return V_028C70_COLOR_24_8; 1504 } else if (HAS_SIZE(24,8,0,0)) { 1505 return V_028C70_COLOR_8_24; 1506 } 1507 break; 1508 case 3: 1509 if (HAS_SIZE(5,6,5,0)) { 1510 return V_028C70_COLOR_5_6_5; 1511 } else if (HAS_SIZE(32,8,24,0)) { 1512 return V_028C70_COLOR_X24_8_32_FLOAT; 1513 } 1514 break; 1515 case 4: 1516 if (desc->channel[0].size == desc->channel[1].size && 1517 desc->channel[0].size == desc->channel[2].size && 1518 desc->channel[0].size == desc->channel[3].size) { 1519 switch (desc->channel[0].size) { 1520 case 4: 1521 return V_028C70_COLOR_4_4_4_4; 1522 case 8: 1523 return V_028C70_COLOR_8_8_8_8; 1524 case 16: 1525 return V_028C70_COLOR_16_16_16_16; 1526 case 32: 1527 return V_028C70_COLOR_32_32_32_32; 1528 } 1529 } else if (HAS_SIZE(5,5,5,1)) { 1530 return V_028C70_COLOR_1_5_5_5; 1531 } else if (HAS_SIZE(1,5,5,5)) { 1532 return V_028C70_COLOR_5_5_5_1; 1533 } else if (HAS_SIZE(10,10,10,2)) { 1534 return V_028C70_COLOR_2_10_10_10; 1535 } 1536 break; 1537 } 1538 return V_028C70_COLOR_INVALID; 1539} 1540 1541static uint32_t si_colorformat_endian_swap(uint32_t colorformat) 1542{ 1543 if (SI_BIG_ENDIAN) { 1544 switch(colorformat) { 1545 /* 8-bit buffers. */ 1546 case V_028C70_COLOR_8: 1547 return V_028C70_ENDIAN_NONE; 1548 1549 /* 16-bit buffers. */ 1550 case V_028C70_COLOR_5_6_5: 1551 case V_028C70_COLOR_1_5_5_5: 1552 case V_028C70_COLOR_4_4_4_4: 1553 case V_028C70_COLOR_16: 1554 case V_028C70_COLOR_8_8: 1555 return V_028C70_ENDIAN_8IN16; 1556 1557 /* 32-bit buffers. */ 1558 case V_028C70_COLOR_8_8_8_8: 1559 case V_028C70_COLOR_2_10_10_10: 1560 case V_028C70_COLOR_8_24: 1561 case V_028C70_COLOR_24_8: 1562 case V_028C70_COLOR_16_16: 1563 return V_028C70_ENDIAN_8IN32; 1564 1565 /* 64-bit buffers. */ 1566 case V_028C70_COLOR_16_16_16_16: 1567 return V_028C70_ENDIAN_8IN16; 1568 1569 case V_028C70_COLOR_32_32: 1570 return V_028C70_ENDIAN_8IN32; 1571 1572 /* 128-bit buffers. */ 1573 case V_028C70_COLOR_32_32_32_32: 1574 return V_028C70_ENDIAN_8IN32; 1575 default: 1576 return V_028C70_ENDIAN_NONE; /* Unsupported. */ 1577 } 1578 } else { 1579 return V_028C70_ENDIAN_NONE; 1580 } 1581} 1582 1583static uint32_t si_translate_dbformat(enum pipe_format format) 1584{ 1585 switch (format) { 1586 case PIPE_FORMAT_Z16_UNORM: 1587 return V_028040_Z_16; 1588 case PIPE_FORMAT_S8_UINT_Z24_UNORM: 1589 case PIPE_FORMAT_X8Z24_UNORM: 1590 case PIPE_FORMAT_Z24X8_UNORM: 1591 case PIPE_FORMAT_Z24_UNORM_S8_UINT: 1592 return V_028040_Z_24; /* deprecated on SI */ 1593 case PIPE_FORMAT_Z32_FLOAT: 1594 case PIPE_FORMAT_Z32_FLOAT_S8X24_UINT: 1595 return V_028040_Z_32_FLOAT; 1596 default: 1597 return V_028040_Z_INVALID; 1598 } 1599} 1600 1601/* 1602 * Texture translation 1603 */ 1604 1605static uint32_t si_translate_texformat(struct pipe_screen *screen, 1606 enum pipe_format format, 1607 const struct util_format_description *desc, 1608 int first_non_void) 1609{ 1610 struct si_screen *sscreen = (struct si_screen*)screen; 1611 bool uniform = true; 1612 int i; 1613 1614 /* Colorspace (return non-RGB formats directly). */ 1615 switch (desc->colorspace) { 1616 /* Depth stencil formats */ 1617 case UTIL_FORMAT_COLORSPACE_ZS: 1618 switch (format) { 1619 case PIPE_FORMAT_Z16_UNORM: 1620 return V_008F14_IMG_DATA_FORMAT_16; 1621 case PIPE_FORMAT_X24S8_UINT: 1622 case PIPE_FORMAT_S8X24_UINT: 1623 /* 1624 * Implemented as an 8_8_8_8 data format to fix texture 1625 * gathers in stencil sampling. This affects at least 1626 * GL45-CTS.texture_cube_map_array.sampling on VI. 1627 */ 1628 if (sscreen->info.chip_class <= VI) 1629 return V_008F14_IMG_DATA_FORMAT_8_8_8_8; 1630 1631 if (format == PIPE_FORMAT_X24S8_UINT) 1632 return V_008F14_IMG_DATA_FORMAT_8_24; 1633 else 1634 return V_008F14_IMG_DATA_FORMAT_24_8; 1635 case PIPE_FORMAT_Z24X8_UNORM: 1636 case PIPE_FORMAT_Z24_UNORM_S8_UINT: 1637 return V_008F14_IMG_DATA_FORMAT_8_24; 1638 case PIPE_FORMAT_X8Z24_UNORM: 1639 case PIPE_FORMAT_S8_UINT_Z24_UNORM: 1640 return V_008F14_IMG_DATA_FORMAT_24_8; 1641 case PIPE_FORMAT_S8_UINT: 1642 return V_008F14_IMG_DATA_FORMAT_8; 1643 case PIPE_FORMAT_Z32_FLOAT: 1644 return V_008F14_IMG_DATA_FORMAT_32; 1645 case PIPE_FORMAT_X32_S8X24_UINT: 1646 case PIPE_FORMAT_Z32_FLOAT_S8X24_UINT: 1647 return V_008F14_IMG_DATA_FORMAT_X24_8_32; 1648 default: 1649 goto out_unknown; 1650 } 1651 1652 case UTIL_FORMAT_COLORSPACE_YUV: 1653 goto out_unknown; /* TODO */ 1654 1655 case UTIL_FORMAT_COLORSPACE_SRGB: 1656 if (desc->nr_channels != 4 && desc->nr_channels != 1) 1657 goto out_unknown; 1658 break; 1659 1660 default: 1661 break; 1662 } 1663 1664 if (desc->layout == UTIL_FORMAT_LAYOUT_RGTC) { 1665 if (!sscreen->info.has_format_bc1_through_bc7) 1666 goto out_unknown; 1667 1668 switch (format) { 1669 case PIPE_FORMAT_RGTC1_SNORM: 1670 case PIPE_FORMAT_LATC1_SNORM: 1671 case PIPE_FORMAT_RGTC1_UNORM: 1672 case PIPE_FORMAT_LATC1_UNORM: 1673 return V_008F14_IMG_DATA_FORMAT_BC4; 1674 case PIPE_FORMAT_RGTC2_SNORM: 1675 case PIPE_FORMAT_LATC2_SNORM: 1676 case PIPE_FORMAT_RGTC2_UNORM: 1677 case PIPE_FORMAT_LATC2_UNORM: 1678 return V_008F14_IMG_DATA_FORMAT_BC5; 1679 default: 1680 goto out_unknown; 1681 } 1682 } 1683 1684 if (desc->layout == UTIL_FORMAT_LAYOUT_ETC && 1685 (sscreen->info.family == CHIP_STONEY || 1686 sscreen->info.family == CHIP_VEGA10 || 1687 sscreen->info.family == CHIP_RAVEN)) { 1688 switch (format) { 1689 case PIPE_FORMAT_ETC1_RGB8: 1690 case PIPE_FORMAT_ETC2_RGB8: 1691 case PIPE_FORMAT_ETC2_SRGB8: 1692 return V_008F14_IMG_DATA_FORMAT_ETC2_RGB; 1693 case PIPE_FORMAT_ETC2_RGB8A1: 1694 case PIPE_FORMAT_ETC2_SRGB8A1: 1695 return V_008F14_IMG_DATA_FORMAT_ETC2_RGBA1; 1696 case PIPE_FORMAT_ETC2_RGBA8: 1697 case PIPE_FORMAT_ETC2_SRGBA8: 1698 return V_008F14_IMG_DATA_FORMAT_ETC2_RGBA; 1699 case PIPE_FORMAT_ETC2_R11_UNORM: 1700 case PIPE_FORMAT_ETC2_R11_SNORM: 1701 return V_008F14_IMG_DATA_FORMAT_ETC2_R; 1702 case PIPE_FORMAT_ETC2_RG11_UNORM: 1703 case PIPE_FORMAT_ETC2_RG11_SNORM: 1704 return V_008F14_IMG_DATA_FORMAT_ETC2_RG; 1705 default: 1706 goto out_unknown; 1707 } 1708 } 1709 1710 if (desc->layout == UTIL_FORMAT_LAYOUT_BPTC) { 1711 if (!sscreen->info.has_format_bc1_through_bc7) 1712 goto out_unknown; 1713 1714 switch (format) { 1715 case PIPE_FORMAT_BPTC_RGBA_UNORM: 1716 case PIPE_FORMAT_BPTC_SRGBA: 1717 return V_008F14_IMG_DATA_FORMAT_BC7; 1718 case PIPE_FORMAT_BPTC_RGB_FLOAT: 1719 case PIPE_FORMAT_BPTC_RGB_UFLOAT: 1720 return V_008F14_IMG_DATA_FORMAT_BC6; 1721 default: 1722 goto out_unknown; 1723 } 1724 } 1725 1726 if (desc->layout == UTIL_FORMAT_LAYOUT_SUBSAMPLED) { 1727 switch (format) { 1728 case PIPE_FORMAT_R8G8_B8G8_UNORM: 1729 case PIPE_FORMAT_G8R8_B8R8_UNORM: 1730 return V_008F14_IMG_DATA_FORMAT_GB_GR; 1731 case PIPE_FORMAT_G8R8_G8B8_UNORM: 1732 case PIPE_FORMAT_R8G8_R8B8_UNORM: 1733 return V_008F14_IMG_DATA_FORMAT_BG_RG; 1734 default: 1735 goto out_unknown; 1736 } 1737 } 1738 1739 if (desc->layout == UTIL_FORMAT_LAYOUT_S3TC) { 1740 if (!sscreen->info.has_format_bc1_through_bc7) 1741 goto out_unknown; 1742 1743 switch (format) { 1744 case PIPE_FORMAT_DXT1_RGB: 1745 case PIPE_FORMAT_DXT1_RGBA: 1746 case PIPE_FORMAT_DXT1_SRGB: 1747 case PIPE_FORMAT_DXT1_SRGBA: 1748 return V_008F14_IMG_DATA_FORMAT_BC1; 1749 case PIPE_FORMAT_DXT3_RGBA: 1750 case PIPE_FORMAT_DXT3_SRGBA: 1751 return V_008F14_IMG_DATA_FORMAT_BC2; 1752 case PIPE_FORMAT_DXT5_RGBA: 1753 case PIPE_FORMAT_DXT5_SRGBA: 1754 return V_008F14_IMG_DATA_FORMAT_BC3; 1755 default: 1756 goto out_unknown; 1757 } 1758 } 1759 1760 if (format == PIPE_FORMAT_R9G9B9E5_FLOAT) { 1761 return V_008F14_IMG_DATA_FORMAT_5_9_9_9; 1762 } else if (format == PIPE_FORMAT_R11G11B10_FLOAT) { 1763 return V_008F14_IMG_DATA_FORMAT_10_11_11; 1764 } 1765 1766 /* R8G8Bx_SNORM - TODO CxV8U8 */ 1767 1768 /* hw cannot support mixed formats (except depth/stencil, since only 1769 * depth is read).*/ 1770 if (desc->is_mixed && desc->colorspace != UTIL_FORMAT_COLORSPACE_ZS) 1771 goto out_unknown; 1772 1773 /* See whether the components are of the same size. */ 1774 for (i = 1; i < desc->nr_channels; i++) { 1775 uniform = uniform && desc->channel[0].size == desc->channel[i].size; 1776 } 1777 1778 /* Non-uniform formats. */ 1779 if (!uniform) { 1780 switch(desc->nr_channels) { 1781 case 3: 1782 if (desc->channel[0].size == 5 && 1783 desc->channel[1].size == 6 && 1784 desc->channel[2].size == 5) { 1785 return V_008F14_IMG_DATA_FORMAT_5_6_5; 1786 } 1787 goto out_unknown; 1788 case 4: 1789 if (desc->channel[0].size == 5 && 1790 desc->channel[1].size == 5 && 1791 desc->channel[2].size == 5 && 1792 desc->channel[3].size == 1) { 1793 return V_008F14_IMG_DATA_FORMAT_1_5_5_5; 1794 } 1795 if (desc->channel[0].size == 1 && 1796 desc->channel[1].size == 5 && 1797 desc->channel[2].size == 5 && 1798 desc->channel[3].size == 5) { 1799 return V_008F14_IMG_DATA_FORMAT_5_5_5_1; 1800 } 1801 if (desc->channel[0].size == 10 && 1802 desc->channel[1].size == 10 && 1803 desc->channel[2].size == 10 && 1804 desc->channel[3].size == 2) { 1805 return V_008F14_IMG_DATA_FORMAT_2_10_10_10; 1806 } 1807 goto out_unknown; 1808 } 1809 goto out_unknown; 1810 } 1811 1812 if (first_non_void < 0 || first_non_void > 3) 1813 goto out_unknown; 1814 1815 /* uniform formats */ 1816 switch (desc->channel[first_non_void].size) { 1817 case 4: 1818 switch (desc->nr_channels) { 1819#if 0 /* Not supported for render targets */ 1820 case 2: 1821 return V_008F14_IMG_DATA_FORMAT_4_4; 1822#endif 1823 case 4: 1824 return V_008F14_IMG_DATA_FORMAT_4_4_4_4; 1825 } 1826 break; 1827 case 8: 1828 switch (desc->nr_channels) { 1829 case 1: 1830 return V_008F14_IMG_DATA_FORMAT_8; 1831 case 2: 1832 return V_008F14_IMG_DATA_FORMAT_8_8; 1833 case 4: 1834 return V_008F14_IMG_DATA_FORMAT_8_8_8_8; 1835 } 1836 break; 1837 case 16: 1838 switch (desc->nr_channels) { 1839 case 1: 1840 return V_008F14_IMG_DATA_FORMAT_16; 1841 case 2: 1842 return V_008F14_IMG_DATA_FORMAT_16_16; 1843 case 4: 1844 return V_008F14_IMG_DATA_FORMAT_16_16_16_16; 1845 } 1846 break; 1847 case 32: 1848 switch (desc->nr_channels) { 1849 case 1: 1850 return V_008F14_IMG_DATA_FORMAT_32; 1851 case 2: 1852 return V_008F14_IMG_DATA_FORMAT_32_32; 1853#if 0 /* Not supported for render targets */ 1854 case 3: 1855 return V_008F14_IMG_DATA_FORMAT_32_32_32; 1856#endif 1857 case 4: 1858 return V_008F14_IMG_DATA_FORMAT_32_32_32_32; 1859 } 1860 } 1861 1862out_unknown: 1863 return ~0; 1864} 1865 1866static unsigned si_tex_wrap(unsigned wrap) 1867{ 1868 switch (wrap) { 1869 default: 1870 case PIPE_TEX_WRAP_REPEAT: 1871 return V_008F30_SQ_TEX_WRAP; 1872 case PIPE_TEX_WRAP_CLAMP: 1873 return V_008F30_SQ_TEX_CLAMP_HALF_BORDER; 1874 case PIPE_TEX_WRAP_CLAMP_TO_EDGE: 1875 return V_008F30_SQ_TEX_CLAMP_LAST_TEXEL; 1876 case PIPE_TEX_WRAP_CLAMP_TO_BORDER: 1877 return V_008F30_SQ_TEX_CLAMP_BORDER; 1878 case PIPE_TEX_WRAP_MIRROR_REPEAT: 1879 return V_008F30_SQ_TEX_MIRROR; 1880 case PIPE_TEX_WRAP_MIRROR_CLAMP: 1881 return V_008F30_SQ_TEX_MIRROR_ONCE_HALF_BORDER; 1882 case PIPE_TEX_WRAP_MIRROR_CLAMP_TO_EDGE: 1883 return V_008F30_SQ_TEX_MIRROR_ONCE_LAST_TEXEL; 1884 case PIPE_TEX_WRAP_MIRROR_CLAMP_TO_BORDER: 1885 return V_008F30_SQ_TEX_MIRROR_ONCE_BORDER; 1886 } 1887} 1888 1889static unsigned si_tex_mipfilter(unsigned filter) 1890{ 1891 switch (filter) { 1892 case PIPE_TEX_MIPFILTER_NEAREST: 1893 return V_008F38_SQ_TEX_Z_FILTER_POINT; 1894 case PIPE_TEX_MIPFILTER_LINEAR: 1895 return V_008F38_SQ_TEX_Z_FILTER_LINEAR; 1896 default: 1897 case PIPE_TEX_MIPFILTER_NONE: 1898 return V_008F38_SQ_TEX_Z_FILTER_NONE; 1899 } 1900} 1901 1902static unsigned si_tex_compare(unsigned compare) 1903{ 1904 switch (compare) { 1905 default: 1906 case PIPE_FUNC_NEVER: 1907 return V_008F30_SQ_TEX_DEPTH_COMPARE_NEVER; 1908 case PIPE_FUNC_LESS: 1909 return V_008F30_SQ_TEX_DEPTH_COMPARE_LESS; 1910 case PIPE_FUNC_EQUAL: 1911 return V_008F30_SQ_TEX_DEPTH_COMPARE_EQUAL; 1912 case PIPE_FUNC_LEQUAL: 1913 return V_008F30_SQ_TEX_DEPTH_COMPARE_LESSEQUAL; 1914 case PIPE_FUNC_GREATER: 1915 return V_008F30_SQ_TEX_DEPTH_COMPARE_GREATER; 1916 case PIPE_FUNC_NOTEQUAL: 1917 return V_008F30_SQ_TEX_DEPTH_COMPARE_NOTEQUAL; 1918 case PIPE_FUNC_GEQUAL: 1919 return V_008F30_SQ_TEX_DEPTH_COMPARE_GREATEREQUAL; 1920 case PIPE_FUNC_ALWAYS: 1921 return V_008F30_SQ_TEX_DEPTH_COMPARE_ALWAYS; 1922 } 1923} 1924 1925static unsigned si_tex_dim(struct si_screen *sscreen, struct si_texture *tex, 1926 unsigned view_target, unsigned nr_samples) 1927{ 1928 unsigned res_target = tex->buffer.b.b.target; 1929 1930 if (view_target == PIPE_TEXTURE_CUBE || 1931 view_target == PIPE_TEXTURE_CUBE_ARRAY) 1932 res_target = view_target; 1933 /* If interpreting cubemaps as something else, set 2D_ARRAY. */ 1934 else if (res_target == PIPE_TEXTURE_CUBE || 1935 res_target == PIPE_TEXTURE_CUBE_ARRAY) 1936 res_target = PIPE_TEXTURE_2D_ARRAY; 1937 1938 /* GFX9 allocates 1D textures as 2D. */ 1939 if ((res_target == PIPE_TEXTURE_1D || 1940 res_target == PIPE_TEXTURE_1D_ARRAY) && 1941 sscreen->info.chip_class >= GFX9 && 1942 tex->surface.u.gfx9.resource_type == RADEON_RESOURCE_2D) { 1943 if (res_target == PIPE_TEXTURE_1D) 1944 res_target = PIPE_TEXTURE_2D; 1945 else 1946 res_target = PIPE_TEXTURE_2D_ARRAY; 1947 } 1948 1949 switch (res_target) { 1950 default: 1951 case PIPE_TEXTURE_1D: 1952 return V_008F1C_SQ_RSRC_IMG_1D; 1953 case PIPE_TEXTURE_1D_ARRAY: 1954 return V_008F1C_SQ_RSRC_IMG_1D_ARRAY; 1955 case PIPE_TEXTURE_2D: 1956 case PIPE_TEXTURE_RECT: 1957 return nr_samples > 1 ? V_008F1C_SQ_RSRC_IMG_2D_MSAA : 1958 V_008F1C_SQ_RSRC_IMG_2D; 1959 case PIPE_TEXTURE_2D_ARRAY: 1960 return nr_samples > 1 ? V_008F1C_SQ_RSRC_IMG_2D_MSAA_ARRAY : 1961 V_008F1C_SQ_RSRC_IMG_2D_ARRAY; 1962 case PIPE_TEXTURE_3D: 1963 return V_008F1C_SQ_RSRC_IMG_3D; 1964 case PIPE_TEXTURE_CUBE: 1965 case PIPE_TEXTURE_CUBE_ARRAY: 1966 return V_008F1C_SQ_RSRC_IMG_CUBE; 1967 } 1968} 1969 1970/* 1971 * Format support testing 1972 */ 1973 1974static bool si_is_sampler_format_supported(struct pipe_screen *screen, enum pipe_format format) 1975{ 1976 const struct util_format_description *desc = util_format_description(format); 1977 if (!desc) 1978 return false; 1979 1980 return si_translate_texformat(screen, format, desc, 1981 util_format_get_first_non_void_channel(format)) != ~0U; 1982} 1983 1984static uint32_t si_translate_buffer_dataformat(struct pipe_screen *screen, 1985 const struct util_format_description *desc, 1986 int first_non_void) 1987{ 1988 int i; 1989 1990 if (desc->format == PIPE_FORMAT_R11G11B10_FLOAT) 1991 return V_008F0C_BUF_DATA_FORMAT_10_11_11; 1992 1993 assert(first_non_void >= 0); 1994 1995 if (desc->nr_channels == 4 && 1996 desc->channel[0].size == 10 && 1997 desc->channel[1].size == 10 && 1998 desc->channel[2].size == 10 && 1999 desc->channel[3].size == 2) 2000 return V_008F0C_BUF_DATA_FORMAT_2_10_10_10; 2001 2002 /* See whether the components are of the same size. */ 2003 for (i = 0; i < desc->nr_channels; i++) { 2004 if (desc->channel[first_non_void].size != desc->channel[i].size) 2005 return V_008F0C_BUF_DATA_FORMAT_INVALID; 2006 } 2007 2008 switch (desc->channel[first_non_void].size) { 2009 case 8: 2010 switch (desc->nr_channels) { 2011 case 1: 2012 case 3: /* 3 loads */ 2013 return V_008F0C_BUF_DATA_FORMAT_8; 2014 case 2: 2015 return V_008F0C_BUF_DATA_FORMAT_8_8; 2016 case 4: 2017 return V_008F0C_BUF_DATA_FORMAT_8_8_8_8; 2018 } 2019 break; 2020 case 16: 2021 switch (desc->nr_channels) { 2022 case 1: 2023 case 3: /* 3 loads */ 2024 return V_008F0C_BUF_DATA_FORMAT_16; 2025 case 2: 2026 return V_008F0C_BUF_DATA_FORMAT_16_16; 2027 case 4: 2028 return V_008F0C_BUF_DATA_FORMAT_16_16_16_16; 2029 } 2030 break; 2031 case 32: 2032 switch (desc->nr_channels) { 2033 case 1: 2034 return V_008F0C_BUF_DATA_FORMAT_32; 2035 case 2: 2036 return V_008F0C_BUF_DATA_FORMAT_32_32; 2037 case 3: 2038 return V_008F0C_BUF_DATA_FORMAT_32_32_32; 2039 case 4: 2040 return V_008F0C_BUF_DATA_FORMAT_32_32_32_32; 2041 } 2042 break; 2043 case 64: 2044 /* Legacy double formats. */ 2045 switch (desc->nr_channels) { 2046 case 1: /* 1 load */ 2047 return V_008F0C_BUF_DATA_FORMAT_32_32; 2048 case 2: /* 1 load */ 2049 return V_008F0C_BUF_DATA_FORMAT_32_32_32_32; 2050 case 3: /* 3 loads */ 2051 return V_008F0C_BUF_DATA_FORMAT_32_32; 2052 case 4: /* 2 loads */ 2053 return V_008F0C_BUF_DATA_FORMAT_32_32_32_32; 2054 } 2055 break; 2056 } 2057 2058 return V_008F0C_BUF_DATA_FORMAT_INVALID; 2059} 2060 2061static uint32_t si_translate_buffer_numformat(struct pipe_screen *screen, 2062 const struct util_format_description *desc, 2063 int first_non_void) 2064{ 2065 if (desc->format == PIPE_FORMAT_R11G11B10_FLOAT) 2066 return V_008F0C_BUF_NUM_FORMAT_FLOAT; 2067 2068 assert(first_non_void >= 0); 2069 2070 switch (desc->channel[first_non_void].type) { 2071 case UTIL_FORMAT_TYPE_SIGNED: 2072 case UTIL_FORMAT_TYPE_FIXED: 2073 if (desc->channel[first_non_void].size >= 32 || 2074 desc->channel[first_non_void].pure_integer) 2075 return V_008F0C_BUF_NUM_FORMAT_SINT; 2076 else if (desc->channel[first_non_void].normalized) 2077 return V_008F0C_BUF_NUM_FORMAT_SNORM; 2078 else 2079 return V_008F0C_BUF_NUM_FORMAT_SSCALED; 2080 break; 2081 case UTIL_FORMAT_TYPE_UNSIGNED: 2082 if (desc->channel[first_non_void].size >= 32 || 2083 desc->channel[first_non_void].pure_integer) 2084 return V_008F0C_BUF_NUM_FORMAT_UINT; 2085 else if (desc->channel[first_non_void].normalized) 2086 return V_008F0C_BUF_NUM_FORMAT_UNORM; 2087 else 2088 return V_008F0C_BUF_NUM_FORMAT_USCALED; 2089 break; 2090 case UTIL_FORMAT_TYPE_FLOAT: 2091 default: 2092 return V_008F0C_BUF_NUM_FORMAT_FLOAT; 2093 } 2094} 2095 2096static unsigned si_is_vertex_format_supported(struct pipe_screen *screen, 2097 enum pipe_format format, 2098 unsigned usage) 2099{ 2100 const struct util_format_description *desc; 2101 int first_non_void; 2102 unsigned data_format; 2103 2104 assert((usage & ~(PIPE_BIND_SHADER_IMAGE | 2105 PIPE_BIND_SAMPLER_VIEW | 2106 PIPE_BIND_VERTEX_BUFFER)) == 0); 2107 2108 desc = util_format_description(format); 2109 if (!desc) 2110 return 0; 2111 2112 /* There are no native 8_8_8 or 16_16_16 data formats, and we currently 2113 * select 8_8_8_8 and 16_16_16_16 instead. This works reasonably well 2114 * for read-only access (with caveats surrounding bounds checks), but 2115 * obviously fails for write access which we have to implement for 2116 * shader images. Luckily, OpenGL doesn't expect this to be supported 2117 * anyway, and so the only impact is on PBO uploads / downloads, which 2118 * shouldn't be expected to be fast for GL_RGB anyway. 2119 */ 2120 if (desc->block.bits == 3 * 8 || 2121 desc->block.bits == 3 * 16) { 2122 if (usage & (PIPE_BIND_SHADER_IMAGE | PIPE_BIND_SAMPLER_VIEW)) { 2123 usage &= ~(PIPE_BIND_SHADER_IMAGE | PIPE_BIND_SAMPLER_VIEW); 2124 if (!usage) 2125 return 0; 2126 } 2127 } 2128 2129 first_non_void = util_format_get_first_non_void_channel(format); 2130 data_format = si_translate_buffer_dataformat(screen, desc, first_non_void); 2131 if (data_format == V_008F0C_BUF_DATA_FORMAT_INVALID) 2132 return 0; 2133 2134 return usage; 2135} 2136 2137static bool si_is_colorbuffer_format_supported(enum pipe_format format) 2138{ 2139 return si_translate_colorformat(format) != V_028C70_COLOR_INVALID && 2140 si_translate_colorswap(format, false) != ~0U; 2141} 2142 2143static bool si_is_zs_format_supported(enum pipe_format format) 2144{ 2145 return si_translate_dbformat(format) != V_028040_Z_INVALID; 2146} 2147 2148static boolean si_is_format_supported(struct pipe_screen *screen, 2149 enum pipe_format format, 2150 enum pipe_texture_target target, 2151 unsigned sample_count, 2152 unsigned storage_sample_count, 2153 unsigned usage) 2154{ 2155 struct si_screen *sscreen = (struct si_screen *)screen; 2156 unsigned retval = 0; 2157 2158 if (target >= PIPE_MAX_TEXTURE_TYPES) { 2159 PRINT_ERR("radeonsi: unsupported texture type %d\n", target); 2160 return false; 2161 } 2162 2163 if (MAX2(1, sample_count) < MAX2(1, storage_sample_count)) 2164 return false; 2165 2166 if (sample_count > 1) { 2167 if (!screen->get_param(screen, PIPE_CAP_TEXTURE_MULTISAMPLE)) 2168 return false; 2169 2170 if (usage & PIPE_BIND_SHADER_IMAGE) 2171 return false; 2172 2173 /* Only power-of-two sample counts are supported. */ 2174 if (!util_is_power_of_two_or_zero(sample_count) || 2175 !util_is_power_of_two_or_zero(storage_sample_count)) 2176 return false; 2177 2178 /* MSAA support without framebuffer attachments. */ 2179 if (format == PIPE_FORMAT_NONE && sample_count <= 16) 2180 return true; 2181 2182 if (!sscreen->info.has_eqaa_surface_allocator || 2183 util_format_is_depth_or_stencil(format)) { 2184 /* Color without EQAA or depth/stencil. */ 2185 if (sample_count > 8 || 2186 sample_count != storage_sample_count) 2187 return false; 2188 } else { 2189 /* Color with EQAA. */ 2190 if (sample_count > 16 || 2191 storage_sample_count > 8) 2192 return false; 2193 } 2194 } 2195 2196 if (usage & (PIPE_BIND_SAMPLER_VIEW | 2197 PIPE_BIND_SHADER_IMAGE)) { 2198 if (target == PIPE_BUFFER) { 2199 retval |= si_is_vertex_format_supported( 2200 screen, format, usage & (PIPE_BIND_SAMPLER_VIEW | 2201 PIPE_BIND_SHADER_IMAGE)); 2202 } else { 2203 if (si_is_sampler_format_supported(screen, format)) 2204 retval |= usage & (PIPE_BIND_SAMPLER_VIEW | 2205 PIPE_BIND_SHADER_IMAGE); 2206 } 2207 } 2208 2209 if ((usage & (PIPE_BIND_RENDER_TARGET | 2210 PIPE_BIND_DISPLAY_TARGET | 2211 PIPE_BIND_SCANOUT | 2212 PIPE_BIND_SHARED | 2213 PIPE_BIND_BLENDABLE)) && 2214 si_is_colorbuffer_format_supported(format)) { 2215 retval |= usage & 2216 (PIPE_BIND_RENDER_TARGET | 2217 PIPE_BIND_DISPLAY_TARGET | 2218 PIPE_BIND_SCANOUT | 2219 PIPE_BIND_SHARED); 2220 if (!util_format_is_pure_integer(format) && 2221 !util_format_is_depth_or_stencil(format)) 2222 retval |= usage & PIPE_BIND_BLENDABLE; 2223 } 2224 2225 if ((usage & PIPE_BIND_DEPTH_STENCIL) && 2226 si_is_zs_format_supported(format)) { 2227 retval |= PIPE_BIND_DEPTH_STENCIL; 2228 } 2229 2230 if (usage & PIPE_BIND_VERTEX_BUFFER) { 2231 retval |= si_is_vertex_format_supported(screen, format, 2232 PIPE_BIND_VERTEX_BUFFER); 2233 } 2234 2235 if ((usage & PIPE_BIND_LINEAR) && 2236 !util_format_is_compressed(format) && 2237 !(usage & PIPE_BIND_DEPTH_STENCIL)) 2238 retval |= PIPE_BIND_LINEAR; 2239 2240 return retval == usage; 2241} 2242 2243/* 2244 * framebuffer handling 2245 */ 2246 2247static void si_choose_spi_color_formats(struct si_surface *surf, 2248 unsigned format, unsigned swap, 2249 unsigned ntype, bool is_depth) 2250{ 2251 /* Alpha is needed for alpha-to-coverage. 2252 * Blending may be with or without alpha. 2253 */ 2254 unsigned normal = 0; /* most optimal, may not support blending or export alpha */ 2255 unsigned alpha = 0; /* exports alpha, but may not support blending */ 2256 unsigned blend = 0; /* supports blending, but may not export alpha */ 2257 unsigned blend_alpha = 0; /* least optimal, supports blending and exports alpha */ 2258 2259 /* Choose the SPI color formats. These are required values for RB+. 2260 * Other chips have multiple choices, though they are not necessarily better. 2261 */ 2262 switch (format) { 2263 case V_028C70_COLOR_5_6_5: 2264 case V_028C70_COLOR_1_5_5_5: 2265 case V_028C70_COLOR_5_5_5_1: 2266 case V_028C70_COLOR_4_4_4_4: 2267 case V_028C70_COLOR_10_11_11: 2268 case V_028C70_COLOR_11_11_10: 2269 case V_028C70_COLOR_8: 2270 case V_028C70_COLOR_8_8: 2271 case V_028C70_COLOR_8_8_8_8: 2272 case V_028C70_COLOR_10_10_10_2: 2273 case V_028C70_COLOR_2_10_10_10: 2274 if (ntype == V_028C70_NUMBER_UINT) 2275 alpha = blend = blend_alpha = normal = V_028714_SPI_SHADER_UINT16_ABGR; 2276 else if (ntype == V_028C70_NUMBER_SINT) 2277 alpha = blend = blend_alpha = normal = V_028714_SPI_SHADER_SINT16_ABGR; 2278 else 2279 alpha = blend = blend_alpha = normal = V_028714_SPI_SHADER_FP16_ABGR; 2280 break; 2281 2282 case V_028C70_COLOR_16: 2283 case V_028C70_COLOR_16_16: 2284 case V_028C70_COLOR_16_16_16_16: 2285 if (ntype == V_028C70_NUMBER_UNORM || 2286 ntype == V_028C70_NUMBER_SNORM) { 2287 /* UNORM16 and SNORM16 don't support blending */ 2288 if (ntype == V_028C70_NUMBER_UNORM) 2289 normal = alpha = V_028714_SPI_SHADER_UNORM16_ABGR; 2290 else 2291 normal = alpha = V_028714_SPI_SHADER_SNORM16_ABGR; 2292 2293 /* Use 32 bits per channel for blending. */ 2294 if (format == V_028C70_COLOR_16) { 2295 if (swap == V_028C70_SWAP_STD) { /* R */ 2296 blend = V_028714_SPI_SHADER_32_R; 2297 blend_alpha = V_028714_SPI_SHADER_32_AR; 2298 } else if (swap == V_028C70_SWAP_ALT_REV) /* A */ 2299 blend = blend_alpha = V_028714_SPI_SHADER_32_AR; 2300 else 2301 assert(0); 2302 } else if (format == V_028C70_COLOR_16_16) { 2303 if (swap == V_028C70_SWAP_STD) { /* RG */ 2304 blend = V_028714_SPI_SHADER_32_GR; 2305 blend_alpha = V_028714_SPI_SHADER_32_ABGR; 2306 } else if (swap == V_028C70_SWAP_ALT) /* RA */ 2307 blend = blend_alpha = V_028714_SPI_SHADER_32_AR; 2308 else 2309 assert(0); 2310 } else /* 16_16_16_16 */ 2311 blend = blend_alpha = V_028714_SPI_SHADER_32_ABGR; 2312 } else if (ntype == V_028C70_NUMBER_UINT) 2313 alpha = blend = blend_alpha = normal = V_028714_SPI_SHADER_UINT16_ABGR; 2314 else if (ntype == V_028C70_NUMBER_SINT) 2315 alpha = blend = blend_alpha = normal = V_028714_SPI_SHADER_SINT16_ABGR; 2316 else if (ntype == V_028C70_NUMBER_FLOAT) 2317 alpha = blend = blend_alpha = normal = V_028714_SPI_SHADER_FP16_ABGR; 2318 else 2319 assert(0); 2320 break; 2321 2322 case V_028C70_COLOR_32: 2323 if (swap == V_028C70_SWAP_STD) { /* R */ 2324 blend = normal = V_028714_SPI_SHADER_32_R; 2325 alpha = blend_alpha = V_028714_SPI_SHADER_32_AR; 2326 } else if (swap == V_028C70_SWAP_ALT_REV) /* A */ 2327 alpha = blend = blend_alpha = normal = V_028714_SPI_SHADER_32_AR; 2328 else 2329 assert(0); 2330 break; 2331 2332 case V_028C70_COLOR_32_32: 2333 if (swap == V_028C70_SWAP_STD) { /* RG */ 2334 blend = normal = V_028714_SPI_SHADER_32_GR; 2335 alpha = blend_alpha = V_028714_SPI_SHADER_32_ABGR; 2336 } else if (swap == V_028C70_SWAP_ALT) /* RA */ 2337 alpha = blend = blend_alpha = normal = V_028714_SPI_SHADER_32_AR; 2338 else 2339 assert(0); 2340 break; 2341 2342 case V_028C70_COLOR_32_32_32_32: 2343 case V_028C70_COLOR_8_24: 2344 case V_028C70_COLOR_24_8: 2345 case V_028C70_COLOR_X24_8_32_FLOAT: 2346 alpha = blend = blend_alpha = normal = V_028714_SPI_SHADER_32_ABGR; 2347 break; 2348 2349 default: 2350 assert(0); 2351 return; 2352 } 2353 2354 /* The DB->CB copy needs 32_ABGR. */ 2355 if (is_depth) 2356 alpha = blend = blend_alpha = normal = V_028714_SPI_SHADER_32_ABGR; 2357 2358 surf->spi_shader_col_format = normal; 2359 surf->spi_shader_col_format_alpha = alpha; 2360 surf->spi_shader_col_format_blend = blend; 2361 surf->spi_shader_col_format_blend_alpha = blend_alpha; 2362} 2363 2364static void si_initialize_color_surface(struct si_context *sctx, 2365 struct si_surface *surf) 2366{ 2367 struct si_texture *tex = (struct si_texture*)surf->base.texture; 2368 unsigned color_info, color_attrib; 2369 unsigned format, swap, ntype, endian; 2370 const struct util_format_description *desc; 2371 int firstchan; 2372 unsigned blend_clamp = 0, blend_bypass = 0; 2373 2374 desc = util_format_description(surf->base.format); 2375 for (firstchan = 0; firstchan < 4; firstchan++) { 2376 if (desc->channel[firstchan].type != UTIL_FORMAT_TYPE_VOID) { 2377 break; 2378 } 2379 } 2380 if (firstchan == 4 || desc->channel[firstchan].type == UTIL_FORMAT_TYPE_FLOAT) { 2381 ntype = V_028C70_NUMBER_FLOAT; 2382 } else { 2383 ntype = V_028C70_NUMBER_UNORM; 2384 if (desc->colorspace == UTIL_FORMAT_COLORSPACE_SRGB) 2385 ntype = V_028C70_NUMBER_SRGB; 2386 else if (desc->channel[firstchan].type == UTIL_FORMAT_TYPE_SIGNED) { 2387 if (desc->channel[firstchan].pure_integer) { 2388 ntype = V_028C70_NUMBER_SINT; 2389 } else { 2390 assert(desc->channel[firstchan].normalized); 2391 ntype = V_028C70_NUMBER_SNORM; 2392 } 2393 } else if (desc->channel[firstchan].type == UTIL_FORMAT_TYPE_UNSIGNED) { 2394 if (desc->channel[firstchan].pure_integer) { 2395 ntype = V_028C70_NUMBER_UINT; 2396 } else { 2397 assert(desc->channel[firstchan].normalized); 2398 ntype = V_028C70_NUMBER_UNORM; 2399 } 2400 } 2401 } 2402 2403 format = si_translate_colorformat(surf->base.format); 2404 if (format == V_028C70_COLOR_INVALID) { 2405 PRINT_ERR("Invalid CB format: %d, disabling CB.\n", surf->base.format); 2406 } 2407 assert(format != V_028C70_COLOR_INVALID); 2408 swap = si_translate_colorswap(surf->base.format, false); 2409 endian = si_colorformat_endian_swap(format); 2410 2411 /* blend clamp should be set for all NORM/SRGB types */ 2412 if (ntype == V_028C70_NUMBER_UNORM || 2413 ntype == V_028C70_NUMBER_SNORM || 2414 ntype == V_028C70_NUMBER_SRGB) 2415 blend_clamp = 1; 2416 2417 /* set blend bypass according to docs if SINT/UINT or 2418 8/24 COLOR variants */ 2419 if (ntype == V_028C70_NUMBER_UINT || ntype == V_028C70_NUMBER_SINT || 2420 format == V_028C70_COLOR_8_24 || format == V_028C70_COLOR_24_8 || 2421 format == V_028C70_COLOR_X24_8_32_FLOAT) { 2422 blend_clamp = 0; 2423 blend_bypass = 1; 2424 } 2425 2426 if (ntype == V_028C70_NUMBER_UINT || ntype == V_028C70_NUMBER_SINT) { 2427 if (format == V_028C70_COLOR_8 || 2428 format == V_028C70_COLOR_8_8 || 2429 format == V_028C70_COLOR_8_8_8_8) 2430 surf->color_is_int8 = true; 2431 else if (format == V_028C70_COLOR_10_10_10_2 || 2432 format == V_028C70_COLOR_2_10_10_10) 2433 surf->color_is_int10 = true; 2434 } 2435 2436 color_info = S_028C70_FORMAT(format) | 2437 S_028C70_COMP_SWAP(swap) | 2438 S_028C70_BLEND_CLAMP(blend_clamp) | 2439 S_028C70_BLEND_BYPASS(blend_bypass) | 2440 S_028C70_SIMPLE_FLOAT(1) | 2441 S_028C70_ROUND_MODE(ntype != V_028C70_NUMBER_UNORM && 2442 ntype != V_028C70_NUMBER_SNORM && 2443 ntype != V_028C70_NUMBER_SRGB && 2444 format != V_028C70_COLOR_8_24 && 2445 format != V_028C70_COLOR_24_8) | 2446 S_028C70_NUMBER_TYPE(ntype) | 2447 S_028C70_ENDIAN(endian); 2448 2449 /* Intensity is implemented as Red, so treat it that way. */ 2450 color_attrib = S_028C74_FORCE_DST_ALPHA_1(desc->swizzle[3] == PIPE_SWIZZLE_1 || 2451 util_format_is_intensity(surf->base.format)); 2452 2453 if (tex->buffer.b.b.nr_samples > 1) { 2454 unsigned log_samples = util_logbase2(tex->buffer.b.b.nr_samples); 2455 unsigned log_fragments = util_logbase2(tex->buffer.b.b.nr_storage_samples); 2456 2457 color_attrib |= S_028C74_NUM_SAMPLES(log_samples) | 2458 S_028C74_NUM_FRAGMENTS(log_fragments); 2459 2460 if (tex->surface.fmask_size) { 2461 color_info |= S_028C70_COMPRESSION(1); 2462 unsigned fmask_bankh = util_logbase2(tex->surface.u.legacy.fmask.bankh); 2463 2464 if (sctx->chip_class == SI) { 2465 /* due to a hw bug, FMASK_BANK_HEIGHT must be set on SI too */ 2466 color_attrib |= S_028C74_FMASK_BANK_HEIGHT(fmask_bankh); 2467 } 2468 } 2469 } 2470 2471 if (sctx->chip_class >= VI) { 2472 unsigned max_uncompressed_block_size = V_028C78_MAX_BLOCK_SIZE_256B; 2473 unsigned min_compressed_block_size = V_028C78_MIN_BLOCK_SIZE_32B; 2474 2475 /* amdvlk: [min-compressed-block-size] should be set to 32 for dGPU and 2476 64 for APU because all of our APUs to date use DIMMs which have 2477 a request granularity size of 64B while all other chips have a 2478 32B request size */ 2479 if (!sctx->screen->info.has_dedicated_vram) 2480 min_compressed_block_size = V_028C78_MIN_BLOCK_SIZE_64B; 2481 2482 if (tex->buffer.b.b.nr_storage_samples > 1) { 2483 if (tex->surface.bpe == 1) 2484 max_uncompressed_block_size = V_028C78_MAX_BLOCK_SIZE_64B; 2485 else if (tex->surface.bpe == 2) 2486 max_uncompressed_block_size = V_028C78_MAX_BLOCK_SIZE_128B; 2487 } 2488 2489 surf->cb_dcc_control = S_028C78_MAX_UNCOMPRESSED_BLOCK_SIZE(max_uncompressed_block_size) | 2490 S_028C78_MIN_COMPRESSED_BLOCK_SIZE(min_compressed_block_size) | 2491 S_028C78_INDEPENDENT_64B_BLOCKS(1); 2492 } 2493 2494 /* This must be set for fast clear to work without FMASK. */ 2495 if (!tex->surface.fmask_size && sctx->chip_class == SI) { 2496 unsigned bankh = util_logbase2(tex->surface.u.legacy.bankh); 2497 color_attrib |= S_028C74_FMASK_BANK_HEIGHT(bankh); 2498 } 2499 2500 unsigned color_view = S_028C6C_SLICE_START(surf->base.u.tex.first_layer) | 2501 S_028C6C_SLICE_MAX(surf->base.u.tex.last_layer); 2502 2503 if (sctx->chip_class >= GFX9) { 2504 unsigned mip0_depth = util_max_layer(&tex->buffer.b.b, 0); 2505 2506 color_view |= S_028C6C_MIP_LEVEL(surf->base.u.tex.level); 2507 color_attrib |= S_028C74_MIP0_DEPTH(mip0_depth) | 2508 S_028C74_RESOURCE_TYPE(tex->surface.u.gfx9.resource_type); 2509 surf->cb_color_attrib2 = S_028C68_MIP0_WIDTH(surf->width0 - 1) | 2510 S_028C68_MIP0_HEIGHT(surf->height0 - 1) | 2511 S_028C68_MAX_MIP(tex->buffer.b.b.last_level); 2512 } 2513 2514 surf->cb_color_view = color_view; 2515 surf->cb_color_info = color_info; 2516 surf->cb_color_attrib = color_attrib; 2517 2518 /* Determine pixel shader export format */ 2519 si_choose_spi_color_formats(surf, format, swap, ntype, tex->is_depth); 2520 2521 surf->color_initialized = true; 2522} 2523 2524static void si_init_depth_surface(struct si_context *sctx, 2525 struct si_surface *surf) 2526{ 2527 struct si_texture *tex = (struct si_texture*)surf->base.texture; 2528 unsigned level = surf->base.u.tex.level; 2529 unsigned format, stencil_format; 2530 uint32_t z_info, s_info; 2531 2532 format = si_translate_dbformat(tex->db_render_format); 2533 stencil_format = tex->surface.has_stencil ? 2534 V_028044_STENCIL_8 : V_028044_STENCIL_INVALID; 2535 2536 assert(format != V_028040_Z_INVALID); 2537 if (format == V_028040_Z_INVALID) 2538 PRINT_ERR("Invalid DB format: %d, disabling DB.\n", tex->buffer.b.b.format); 2539 2540 surf->db_depth_view = S_028008_SLICE_START(surf->base.u.tex.first_layer) | 2541 S_028008_SLICE_MAX(surf->base.u.tex.last_layer); 2542 surf->db_htile_data_base = 0; 2543 surf->db_htile_surface = 0; 2544 2545 if (sctx->chip_class >= GFX9) { 2546 assert(tex->surface.u.gfx9.surf_offset == 0); 2547 surf->db_depth_base = tex->buffer.gpu_address >> 8; 2548 surf->db_stencil_base = (tex->buffer.gpu_address + 2549 tex->surface.u.gfx9.stencil_offset) >> 8; 2550 z_info = S_028038_FORMAT(format) | 2551 S_028038_NUM_SAMPLES(util_logbase2(tex->buffer.b.b.nr_samples)) | 2552 S_028038_SW_MODE(tex->surface.u.gfx9.surf.swizzle_mode) | 2553 S_028038_MAXMIP(tex->buffer.b.b.last_level); 2554 s_info = S_02803C_FORMAT(stencil_format) | 2555 S_02803C_SW_MODE(tex->surface.u.gfx9.stencil.swizzle_mode); 2556 surf->db_z_info2 = S_028068_EPITCH(tex->surface.u.gfx9.surf.epitch); 2557 surf->db_stencil_info2 = S_02806C_EPITCH(tex->surface.u.gfx9.stencil.epitch); 2558 surf->db_depth_view |= S_028008_MIPID(level); 2559 surf->db_depth_size = S_02801C_X_MAX(tex->buffer.b.b.width0 - 1) | 2560 S_02801C_Y_MAX(tex->buffer.b.b.height0 - 1); 2561 2562 if (si_htile_enabled(tex, level)) { 2563 z_info |= S_028038_TILE_SURFACE_ENABLE(1) | 2564 S_028038_ALLOW_EXPCLEAR(1); 2565 2566 if (tex->tc_compatible_htile) { 2567 unsigned max_zplanes = 4; 2568 2569 if (tex->db_render_format == PIPE_FORMAT_Z16_UNORM && 2570 tex->buffer.b.b.nr_samples > 1) 2571 max_zplanes = 2; 2572 2573 z_info |= S_028038_DECOMPRESS_ON_N_ZPLANES(max_zplanes + 1) | 2574 S_028038_ITERATE_FLUSH(1); 2575 s_info |= S_02803C_ITERATE_FLUSH(1); 2576 } 2577 2578 if (tex->surface.has_stencil) { 2579 /* Stencil buffer workaround ported from the SI-CI-VI code. 2580 * See that for explanation. 2581 */ 2582 s_info |= S_02803C_ALLOW_EXPCLEAR(tex->buffer.b.b.nr_samples <= 1); 2583 } else { 2584 /* Use all HTILE for depth if there's no stencil. */ 2585 s_info |= S_02803C_TILE_STENCIL_DISABLE(1); 2586 } 2587 2588 surf->db_htile_data_base = (tex->buffer.gpu_address + 2589 tex->htile_offset) >> 8; 2590 surf->db_htile_surface = S_028ABC_FULL_CACHE(1) | 2591 S_028ABC_PIPE_ALIGNED(tex->surface.u.gfx9.htile.pipe_aligned) | 2592 S_028ABC_RB_ALIGNED(tex->surface.u.gfx9.htile.rb_aligned); 2593 } 2594 } else { 2595 /* SI-CI-VI */ 2596 struct legacy_surf_level *levelinfo = &tex->surface.u.legacy.level[level]; 2597 2598 assert(levelinfo->nblk_x % 8 == 0 && levelinfo->nblk_y % 8 == 0); 2599 2600 surf->db_depth_base = (tex->buffer.gpu_address + 2601 tex->surface.u.legacy.level[level].offset) >> 8; 2602 surf->db_stencil_base = (tex->buffer.gpu_address + 2603 tex->surface.u.legacy.stencil_level[level].offset) >> 8; 2604 2605 z_info = S_028040_FORMAT(format) | 2606 S_028040_NUM_SAMPLES(util_logbase2(tex->buffer.b.b.nr_samples)); 2607 s_info = S_028044_FORMAT(stencil_format); 2608 surf->db_depth_info = S_02803C_ADDR5_SWIZZLE_MASK(!tex->tc_compatible_htile); 2609 2610 if (sctx->chip_class >= CIK) { 2611 struct radeon_info *info = &sctx->screen->info; 2612 unsigned index = tex->surface.u.legacy.tiling_index[level]; 2613 unsigned stencil_index = tex->surface.u.legacy.stencil_tiling_index[level]; 2614 unsigned macro_index = tex->surface.u.legacy.macro_tile_index; 2615 unsigned tile_mode = info->si_tile_mode_array[index]; 2616 unsigned stencil_tile_mode = info->si_tile_mode_array[stencil_index]; 2617 unsigned macro_mode = info->cik_macrotile_mode_array[macro_index]; 2618 2619 surf->db_depth_info |= 2620 S_02803C_ARRAY_MODE(G_009910_ARRAY_MODE(tile_mode)) | 2621 S_02803C_PIPE_CONFIG(G_009910_PIPE_CONFIG(tile_mode)) | 2622 S_02803C_BANK_WIDTH(G_009990_BANK_WIDTH(macro_mode)) | 2623 S_02803C_BANK_HEIGHT(G_009990_BANK_HEIGHT(macro_mode)) | 2624 S_02803C_MACRO_TILE_ASPECT(G_009990_MACRO_TILE_ASPECT(macro_mode)) | 2625 S_02803C_NUM_BANKS(G_009990_NUM_BANKS(macro_mode)); 2626 z_info |= S_028040_TILE_SPLIT(G_009910_TILE_SPLIT(tile_mode)); 2627 s_info |= S_028044_TILE_SPLIT(G_009910_TILE_SPLIT(stencil_tile_mode)); 2628 } else { 2629 unsigned tile_mode_index = si_tile_mode_index(tex, level, false); 2630 z_info |= S_028040_TILE_MODE_INDEX(tile_mode_index); 2631 tile_mode_index = si_tile_mode_index(tex, level, true); 2632 s_info |= S_028044_TILE_MODE_INDEX(tile_mode_index); 2633 } 2634 2635 surf->db_depth_size = S_028058_PITCH_TILE_MAX((levelinfo->nblk_x / 8) - 1) | 2636 S_028058_HEIGHT_TILE_MAX((levelinfo->nblk_y / 8) - 1); 2637 surf->db_depth_slice = S_02805C_SLICE_TILE_MAX((levelinfo->nblk_x * 2638 levelinfo->nblk_y) / 64 - 1); 2639 2640 if (si_htile_enabled(tex, level)) { 2641 z_info |= S_028040_TILE_SURFACE_ENABLE(1) | 2642 S_028040_ALLOW_EXPCLEAR(1); 2643 2644 if (tex->surface.has_stencil) { 2645 /* Workaround: For a not yet understood reason, the 2646 * combination of MSAA, fast stencil clear and stencil 2647 * decompress messes with subsequent stencil buffer 2648 * uses. Problem was reproduced on Verde, Bonaire, 2649 * Tonga, and Carrizo. 2650 * 2651 * Disabling EXPCLEAR works around the problem. 2652 * 2653 * Check piglit's arb_texture_multisample-stencil-clear 2654 * test if you want to try changing this. 2655 */ 2656 if (tex->buffer.b.b.nr_samples <= 1) 2657 s_info |= S_028044_ALLOW_EXPCLEAR(1); 2658 } else if (!tex->tc_compatible_htile) { 2659 /* Use all of the htile_buffer for depth if there's no stencil. 2660 * This must not be set when TC-compatible HTILE is enabled 2661 * due to a hw bug. 2662 */ 2663 s_info |= S_028044_TILE_STENCIL_DISABLE(1); 2664 } 2665 2666 surf->db_htile_data_base = (tex->buffer.gpu_address + 2667 tex->htile_offset) >> 8; 2668 surf->db_htile_surface = S_028ABC_FULL_CACHE(1); 2669 2670 if (tex->tc_compatible_htile) { 2671 surf->db_htile_surface |= S_028ABC_TC_COMPATIBLE(1); 2672 2673 /* 0 = full compression. N = only compress up to N-1 Z planes. */ 2674 if (tex->buffer.b.b.nr_samples <= 1) 2675 z_info |= S_028040_DECOMPRESS_ON_N_ZPLANES(5); 2676 else if (tex->buffer.b.b.nr_samples <= 4) 2677 z_info |= S_028040_DECOMPRESS_ON_N_ZPLANES(3); 2678 else 2679 z_info |= S_028040_DECOMPRESS_ON_N_ZPLANES(2); 2680 } 2681 } 2682 } 2683 2684 surf->db_z_info = z_info; 2685 surf->db_stencil_info = s_info; 2686 2687 surf->depth_initialized = true; 2688} 2689 2690void si_update_fb_dirtiness_after_rendering(struct si_context *sctx) 2691{ 2692 if (sctx->decompression_enabled) 2693 return; 2694 2695 if (sctx->framebuffer.state.zsbuf) { 2696 struct pipe_surface *surf = sctx->framebuffer.state.zsbuf; 2697 struct si_texture *tex = (struct si_texture *)surf->texture; 2698 2699 tex->dirty_level_mask |= 1 << surf->u.tex.level; 2700 2701 if (tex->surface.has_stencil) 2702 tex->stencil_dirty_level_mask |= 1 << surf->u.tex.level; 2703 } 2704 2705 unsigned compressed_cb_mask = sctx->framebuffer.compressed_cb_mask; 2706 while (compressed_cb_mask) { 2707 unsigned i = u_bit_scan(&compressed_cb_mask); 2708 struct pipe_surface *surf = sctx->framebuffer.state.cbufs[i]; 2709 struct si_texture *tex = (struct si_texture*)surf->texture; 2710 2711 if (tex->surface.fmask_size) 2712 tex->dirty_level_mask |= 1 << surf->u.tex.level; 2713 if (tex->dcc_gather_statistics) 2714 tex->separate_dcc_dirty = true; 2715 } 2716} 2717 2718static void si_dec_framebuffer_counters(const struct pipe_framebuffer_state *state) 2719{ 2720 for (int i = 0; i < state->nr_cbufs; ++i) { 2721 struct si_surface *surf = NULL; 2722 struct si_texture *tex; 2723 2724 if (!state->cbufs[i]) 2725 continue; 2726 surf = (struct si_surface*)state->cbufs[i]; 2727 tex = (struct si_texture*)surf->base.texture; 2728 2729 p_atomic_dec(&tex->framebuffers_bound); 2730 } 2731} 2732 2733static void si_set_framebuffer_state(struct pipe_context *ctx, 2734 const struct pipe_framebuffer_state *state) 2735{ 2736 struct si_context *sctx = (struct si_context *)ctx; 2737 struct si_surface *surf = NULL; 2738 struct si_texture *tex; 2739 bool old_any_dst_linear = sctx->framebuffer.any_dst_linear; 2740 unsigned old_nr_samples = sctx->framebuffer.nr_samples; 2741 unsigned old_colorbuf_enabled_4bit = sctx->framebuffer.colorbuf_enabled_4bit; 2742 bool old_has_zsbuf = !!sctx->framebuffer.state.zsbuf; 2743 bool old_has_stencil = 2744 old_has_zsbuf && 2745 ((struct si_texture*)sctx->framebuffer.state.zsbuf->texture)->surface.has_stencil; 2746 bool unbound = false; 2747 int i; 2748 2749 /* Reject zero-sized framebuffers due to a hw bug on SI that occurs 2750 * when PA_SU_HARDWARE_SCREEN_OFFSET != 0 and any_scissor.BR_X/Y <= 0. 2751 * We could implement the full workaround here, but it's a useless case. 2752 */ 2753 if ((!state->width || !state->height) && (state->nr_cbufs || state->zsbuf)) { 2754 unreachable("the framebuffer shouldn't have zero area"); 2755 return; 2756 } 2757 2758 si_update_fb_dirtiness_after_rendering(sctx); 2759 2760 for (i = 0; i < sctx->framebuffer.state.nr_cbufs; i++) { 2761 if (!sctx->framebuffer.state.cbufs[i]) 2762 continue; 2763 2764 tex = (struct si_texture*)sctx->framebuffer.state.cbufs[i]->texture; 2765 if (tex->dcc_gather_statistics) 2766 vi_separate_dcc_stop_query(sctx, tex); 2767 } 2768 2769 /* Disable DCC if the formats are incompatible. */ 2770 for (i = 0; i < state->nr_cbufs; i++) { 2771 if (!state->cbufs[i]) 2772 continue; 2773 2774 surf = (struct si_surface*)state->cbufs[i]; 2775 tex = (struct si_texture*)surf->base.texture; 2776 2777 if (!surf->dcc_incompatible) 2778 continue; 2779 2780 /* Since the DCC decompression calls back into set_framebuffer- 2781 * _state, we need to unbind the framebuffer, so that 2782 * vi_separate_dcc_stop_query isn't called twice with the same 2783 * color buffer. 2784 */ 2785 if (!unbound) { 2786 util_copy_framebuffer_state(&sctx->framebuffer.state, NULL); 2787 unbound = true; 2788 } 2789 2790 if (vi_dcc_enabled(tex, surf->base.u.tex.level)) 2791 if (!si_texture_disable_dcc(sctx, tex)) 2792 si_decompress_dcc(sctx, tex); 2793 2794 surf->dcc_incompatible = false; 2795 } 2796 2797 /* Only flush TC when changing the framebuffer state, because 2798 * the only client not using TC that can change textures is 2799 * the framebuffer. 2800 * 2801 * Wait for compute shaders because of possible transitions: 2802 * - FB write -> shader read 2803 * - shader write -> FB read 2804 * 2805 * DB caches are flushed on demand (using si_decompress_textures). 2806 * 2807 * When MSAA is enabled, CB and TC caches are flushed on demand 2808 * (after FMASK decompression). Shader write -> FB read transitions 2809 * cannot happen for MSAA textures, because MSAA shader images are 2810 * not supported. 2811 * 2812 * Only flush and wait for CB if there is actually a bound color buffer. 2813 */ 2814 if (sctx->framebuffer.uncompressed_cb_mask) { 2815 si_make_CB_shader_coherent(sctx, sctx->framebuffer.nr_samples, 2816 sctx->framebuffer.CB_has_shader_readable_metadata, 2817 sctx->framebuffer.all_DCC_pipe_aligned); 2818 } 2819 2820 sctx->flags |= SI_CONTEXT_CS_PARTIAL_FLUSH; 2821 2822 /* u_blitter doesn't invoke depth decompression when it does multiple 2823 * blits in a row, but the only case when it matters for DB is when 2824 * doing generate_mipmap. So here we flush DB manually between 2825 * individual generate_mipmap blits. 2826 * Note that lower mipmap levels aren't compressed. 2827 */ 2828 if (sctx->generate_mipmap_for_depth) { 2829 si_make_DB_shader_coherent(sctx, 1, false, 2830 sctx->framebuffer.DB_has_shader_readable_metadata); 2831 } else if (sctx->chip_class == GFX9) { 2832 /* It appears that DB metadata "leaks" in a sequence of: 2833 * - depth clear 2834 * - DCC decompress for shader image writes (with DB disabled) 2835 * - render with DEPTH_BEFORE_SHADER=1 2836 * Flushing DB metadata works around the problem. 2837 */ 2838 sctx->flags |= SI_CONTEXT_FLUSH_AND_INV_DB_META; 2839 } 2840 2841 /* Take the maximum of the old and new count. If the new count is lower, 2842 * dirtying is needed to disable the unbound colorbuffers. 2843 */ 2844 sctx->framebuffer.dirty_cbufs |= 2845 (1 << MAX2(sctx->framebuffer.state.nr_cbufs, state->nr_cbufs)) - 1; 2846 sctx->framebuffer.dirty_zsbuf |= sctx->framebuffer.state.zsbuf != state->zsbuf; 2847 2848 si_dec_framebuffer_counters(&sctx->framebuffer.state); 2849 util_copy_framebuffer_state(&sctx->framebuffer.state, state); 2850 2851 sctx->framebuffer.colorbuf_enabled_4bit = 0; 2852 sctx->framebuffer.spi_shader_col_format = 0; 2853 sctx->framebuffer.spi_shader_col_format_alpha = 0; 2854 sctx->framebuffer.spi_shader_col_format_blend = 0; 2855 sctx->framebuffer.spi_shader_col_format_blend_alpha = 0; 2856 sctx->framebuffer.color_is_int8 = 0; 2857 sctx->framebuffer.color_is_int10 = 0; 2858 2859 sctx->framebuffer.compressed_cb_mask = 0; 2860 sctx->framebuffer.uncompressed_cb_mask = 0; 2861 sctx->framebuffer.nr_samples = util_framebuffer_get_num_samples(state); 2862 sctx->framebuffer.nr_color_samples = sctx->framebuffer.nr_samples; 2863 sctx->framebuffer.log_samples = util_logbase2(sctx->framebuffer.nr_samples); 2864 sctx->framebuffer.any_dst_linear = false; 2865 sctx->framebuffer.CB_has_shader_readable_metadata = false; 2866 sctx->framebuffer.DB_has_shader_readable_metadata = false; 2867 sctx->framebuffer.all_DCC_pipe_aligned = true; 2868 unsigned num_bpp64_colorbufs = 0; 2869 2870 for (i = 0; i < state->nr_cbufs; i++) { 2871 if (!state->cbufs[i]) 2872 continue; 2873 2874 surf = (struct si_surface*)state->cbufs[i]; 2875 tex = (struct si_texture*)surf->base.texture; 2876 2877 if (!surf->color_initialized) { 2878 si_initialize_color_surface(sctx, surf); 2879 } 2880 2881 sctx->framebuffer.colorbuf_enabled_4bit |= 0xf << (i * 4); 2882 sctx->framebuffer.spi_shader_col_format |= 2883 surf->spi_shader_col_format << (i * 4); 2884 sctx->framebuffer.spi_shader_col_format_alpha |= 2885 surf->spi_shader_col_format_alpha << (i * 4); 2886 sctx->framebuffer.spi_shader_col_format_blend |= 2887 surf->spi_shader_col_format_blend << (i * 4); 2888 sctx->framebuffer.spi_shader_col_format_blend_alpha |= 2889 surf->spi_shader_col_format_blend_alpha << (i * 4); 2890 2891 if (surf->color_is_int8) 2892 sctx->framebuffer.color_is_int8 |= 1 << i; 2893 if (surf->color_is_int10) 2894 sctx->framebuffer.color_is_int10 |= 1 << i; 2895 2896 if (tex->surface.fmask_size) 2897 sctx->framebuffer.compressed_cb_mask |= 1 << i; 2898 else 2899 sctx->framebuffer.uncompressed_cb_mask |= 1 << i; 2900 2901 /* Don't update nr_color_samples for non-AA buffers. 2902 * (e.g. destination of MSAA resolve) 2903 */ 2904 if (tex->buffer.b.b.nr_samples >= 2 && 2905 tex->buffer.b.b.nr_storage_samples < tex->buffer.b.b.nr_samples) { 2906 sctx->framebuffer.nr_color_samples = 2907 MIN2(sctx->framebuffer.nr_color_samples, 2908 tex->buffer.b.b.nr_storage_samples); 2909 sctx->framebuffer.nr_color_samples = 2910 MAX2(1, sctx->framebuffer.nr_color_samples); 2911 } 2912 2913 if (tex->surface.is_linear) 2914 sctx->framebuffer.any_dst_linear = true; 2915 if (tex->surface.bpe >= 8) 2916 num_bpp64_colorbufs++; 2917 2918 if (vi_dcc_enabled(tex, surf->base.u.tex.level)) { 2919 sctx->framebuffer.CB_has_shader_readable_metadata = true; 2920 2921 if (sctx->chip_class >= GFX9 && 2922 !tex->surface.u.gfx9.dcc.pipe_aligned) 2923 sctx->framebuffer.all_DCC_pipe_aligned = false; 2924 } 2925 2926 si_context_add_resource_size(sctx, surf->base.texture); 2927 2928 p_atomic_inc(&tex->framebuffers_bound); 2929 2930 if (tex->dcc_gather_statistics) { 2931 /* Dirty tracking must be enabled for DCC usage analysis. */ 2932 sctx->framebuffer.compressed_cb_mask |= 1 << i; 2933 vi_separate_dcc_start_query(sctx, tex); 2934 } 2935 } 2936 2937 /* For optimal DCC performance. */ 2938 if (sctx->chip_class == VI) 2939 sctx->framebuffer.dcc_overwrite_combiner_watermark = 4; 2940 else if (num_bpp64_colorbufs >= 5) 2941 sctx->framebuffer.dcc_overwrite_combiner_watermark = 8; 2942 else 2943 sctx->framebuffer.dcc_overwrite_combiner_watermark = 6; 2944 2945 struct si_texture *zstex = NULL; 2946 2947 if (state->zsbuf) { 2948 surf = (struct si_surface*)state->zsbuf; 2949 zstex = (struct si_texture*)surf->base.texture; 2950 2951 if (!surf->depth_initialized) { 2952 si_init_depth_surface(sctx, surf); 2953 } 2954 2955 if (vi_tc_compat_htile_enabled(zstex, surf->base.u.tex.level)) 2956 sctx->framebuffer.DB_has_shader_readable_metadata = true; 2957 2958 si_context_add_resource_size(sctx, surf->base.texture); 2959 } 2960 2961 si_update_ps_colorbuf0_slot(sctx); 2962 si_update_poly_offset_state(sctx); 2963 si_mark_atom_dirty(sctx, &sctx->atoms.s.cb_render_state); 2964 si_mark_atom_dirty(sctx, &sctx->atoms.s.framebuffer); 2965 2966 if (sctx->screen->dpbb_allowed) 2967 si_mark_atom_dirty(sctx, &sctx->atoms.s.dpbb_state); 2968 2969 if (sctx->framebuffer.any_dst_linear != old_any_dst_linear) 2970 si_mark_atom_dirty(sctx, &sctx->atoms.s.msaa_config); 2971 2972 if (sctx->screen->has_out_of_order_rast && 2973 (sctx->framebuffer.colorbuf_enabled_4bit != old_colorbuf_enabled_4bit || 2974 !!sctx->framebuffer.state.zsbuf != old_has_zsbuf || 2975 (zstex && zstex->surface.has_stencil != old_has_stencil))) 2976 si_mark_atom_dirty(sctx, &sctx->atoms.s.msaa_config); 2977 2978 if (sctx->framebuffer.nr_samples != old_nr_samples) { 2979 struct pipe_constant_buffer constbuf = {0}; 2980 2981 si_mark_atom_dirty(sctx, &sctx->atoms.s.msaa_config); 2982 si_mark_atom_dirty(sctx, &sctx->atoms.s.db_render_state); 2983 2984 constbuf.buffer = sctx->sample_pos_buffer; 2985 2986 /* Set sample locations as fragment shader constants. */ 2987 switch (sctx->framebuffer.nr_samples) { 2988 case 1: 2989 constbuf.buffer_offset = 0; 2990 break; 2991 case 2: 2992 constbuf.buffer_offset = (ubyte*)sctx->sample_positions.x2 - 2993 (ubyte*)sctx->sample_positions.x1; 2994 break; 2995 case 4: 2996 constbuf.buffer_offset = (ubyte*)sctx->sample_positions.x4 - 2997 (ubyte*)sctx->sample_positions.x1; 2998 break; 2999 case 8: 3000 constbuf.buffer_offset = (ubyte*)sctx->sample_positions.x8 - 3001 (ubyte*)sctx->sample_positions.x1; 3002 break; 3003 case 16: 3004 constbuf.buffer_offset = (ubyte*)sctx->sample_positions.x16 - 3005 (ubyte*)sctx->sample_positions.x1; 3006 break; 3007 default: 3008 PRINT_ERR("Requested an invalid number of samples %i.\n", 3009 sctx->framebuffer.nr_samples); 3010 assert(0); 3011 } 3012 constbuf.buffer_size = sctx->framebuffer.nr_samples * 2 * 4; 3013 si_set_rw_buffer(sctx, SI_PS_CONST_SAMPLE_POSITIONS, &constbuf); 3014 3015 si_mark_atom_dirty(sctx, &sctx->atoms.s.msaa_sample_locs); 3016 } 3017 3018 sctx->do_update_shaders = true; 3019 3020 if (!sctx->decompression_enabled) { 3021 /* Prevent textures decompression when the framebuffer state 3022 * changes come from the decompression passes themselves. 3023 */ 3024 sctx->need_check_render_feedback = true; 3025 } 3026} 3027 3028static void si_emit_framebuffer_state(struct si_context *sctx) 3029{ 3030 struct radeon_cmdbuf *cs = sctx->gfx_cs; 3031 struct pipe_framebuffer_state *state = &sctx->framebuffer.state; 3032 unsigned i, nr_cbufs = state->nr_cbufs; 3033 struct si_texture *tex = NULL; 3034 struct si_surface *cb = NULL; 3035 unsigned cb_color_info = 0; 3036 3037 /* Colorbuffers. */ 3038 for (i = 0; i < nr_cbufs; i++) { 3039 uint64_t cb_color_base, cb_color_fmask, cb_color_cmask, cb_dcc_base; 3040 unsigned cb_color_attrib; 3041 3042 if (!(sctx->framebuffer.dirty_cbufs & (1 << i))) 3043 continue; 3044 3045 cb = (struct si_surface*)state->cbufs[i]; 3046 if (!cb) { 3047 radeon_set_context_reg(cs, R_028C70_CB_COLOR0_INFO + i * 0x3C, 3048 S_028C70_FORMAT(V_028C70_COLOR_INVALID)); 3049 continue; 3050 } 3051 3052 tex = (struct si_texture *)cb->base.texture; 3053 radeon_add_to_buffer_list(sctx, sctx->gfx_cs, 3054 &tex->buffer, RADEON_USAGE_READWRITE, 3055 tex->buffer.b.b.nr_samples > 1 ? 3056 RADEON_PRIO_COLOR_BUFFER_MSAA : 3057 RADEON_PRIO_COLOR_BUFFER); 3058 3059 if (tex->cmask_buffer && tex->cmask_buffer != &tex->buffer) { 3060 radeon_add_to_buffer_list(sctx, sctx->gfx_cs, 3061 tex->cmask_buffer, RADEON_USAGE_READWRITE, 3062 RADEON_PRIO_SEPARATE_META); 3063 } 3064 3065 if (tex->dcc_separate_buffer) 3066 radeon_add_to_buffer_list(sctx, sctx->gfx_cs, 3067 tex->dcc_separate_buffer, 3068 RADEON_USAGE_READWRITE, 3069 RADEON_PRIO_SEPARATE_META); 3070 3071 /* Compute mutable surface parameters. */ 3072 cb_color_base = tex->buffer.gpu_address >> 8; 3073 cb_color_fmask = 0; 3074 cb_color_cmask = tex->cmask_base_address_reg; 3075 cb_dcc_base = 0; 3076 cb_color_info = cb->cb_color_info | tex->cb_color_info; 3077 cb_color_attrib = cb->cb_color_attrib; 3078 3079 if (cb->base.u.tex.level > 0) 3080 cb_color_info &= C_028C70_FAST_CLEAR; 3081 3082 if (tex->surface.fmask_size) { 3083 cb_color_fmask = (tex->buffer.gpu_address + tex->fmask_offset) >> 8; 3084 cb_color_fmask |= tex->surface.fmask_tile_swizzle; 3085 } 3086 3087 /* Set up DCC. */ 3088 if (vi_dcc_enabled(tex, cb->base.u.tex.level)) { 3089 bool is_msaa_resolve_dst = state->cbufs[0] && 3090 state->cbufs[0]->texture->nr_samples > 1 && 3091 state->cbufs[1] == &cb->base && 3092 state->cbufs[1]->texture->nr_samples <= 1; 3093 3094 if (!is_msaa_resolve_dst) 3095 cb_color_info |= S_028C70_DCC_ENABLE(1); 3096 3097 cb_dcc_base = ((!tex->dcc_separate_buffer ? tex->buffer.gpu_address : 0) + 3098 tex->dcc_offset) >> 8; 3099 cb_dcc_base |= tex->surface.tile_swizzle; 3100 } 3101 3102 if (sctx->chip_class >= GFX9) { 3103 struct gfx9_surf_meta_flags meta; 3104 3105 if (tex->dcc_offset) 3106 meta = tex->surface.u.gfx9.dcc; 3107 else 3108 meta = tex->surface.u.gfx9.cmask; 3109 3110 /* Set mutable surface parameters. */ 3111 cb_color_base += tex->surface.u.gfx9.surf_offset >> 8; 3112 cb_color_base |= tex->surface.tile_swizzle; 3113 if (!tex->surface.fmask_size) 3114 cb_color_fmask = cb_color_base; 3115 if (cb->base.u.tex.level > 0) 3116 cb_color_cmask = cb_color_base; 3117 cb_color_attrib |= S_028C74_COLOR_SW_MODE(tex->surface.u.gfx9.surf.swizzle_mode) | 3118 S_028C74_FMASK_SW_MODE(tex->surface.u.gfx9.fmask.swizzle_mode) | 3119 S_028C74_RB_ALIGNED(meta.rb_aligned) | 3120 S_028C74_PIPE_ALIGNED(meta.pipe_aligned); 3121 3122 radeon_set_context_reg_seq(cs, R_028C60_CB_COLOR0_BASE + i * 0x3C, 15); 3123 radeon_emit(cs, cb_color_base); /* CB_COLOR0_BASE */ 3124 radeon_emit(cs, S_028C64_BASE_256B(cb_color_base >> 32)); /* CB_COLOR0_BASE_EXT */ 3125 radeon_emit(cs, cb->cb_color_attrib2); /* CB_COLOR0_ATTRIB2 */ 3126 radeon_emit(cs, cb->cb_color_view); /* CB_COLOR0_VIEW */ 3127 radeon_emit(cs, cb_color_info); /* CB_COLOR0_INFO */ 3128 radeon_emit(cs, cb_color_attrib); /* CB_COLOR0_ATTRIB */ 3129 radeon_emit(cs, cb->cb_dcc_control); /* CB_COLOR0_DCC_CONTROL */ 3130 radeon_emit(cs, cb_color_cmask); /* CB_COLOR0_CMASK */ 3131 radeon_emit(cs, S_028C80_BASE_256B(cb_color_cmask >> 32)); /* CB_COLOR0_CMASK_BASE_EXT */ 3132 radeon_emit(cs, cb_color_fmask); /* CB_COLOR0_FMASK */ 3133 radeon_emit(cs, S_028C88_BASE_256B(cb_color_fmask >> 32)); /* CB_COLOR0_FMASK_BASE_EXT */ 3134 radeon_emit(cs, tex->color_clear_value[0]); /* CB_COLOR0_CLEAR_WORD0 */ 3135 radeon_emit(cs, tex->color_clear_value[1]); /* CB_COLOR0_CLEAR_WORD1 */ 3136 radeon_emit(cs, cb_dcc_base); /* CB_COLOR0_DCC_BASE */ 3137 radeon_emit(cs, S_028C98_BASE_256B(cb_dcc_base >> 32)); /* CB_COLOR0_DCC_BASE_EXT */ 3138 3139 radeon_set_context_reg(cs, R_0287A0_CB_MRT0_EPITCH + i * 4, 3140 S_0287A0_EPITCH(tex->surface.u.gfx9.surf.epitch)); 3141 } else { 3142 /* Compute mutable surface parameters (SI-CI-VI). */ 3143 const struct legacy_surf_level *level_info = 3144 &tex->surface.u.legacy.level[cb->base.u.tex.level]; 3145 unsigned pitch_tile_max, slice_tile_max, tile_mode_index; 3146 unsigned cb_color_pitch, cb_color_slice, cb_color_fmask_slice; 3147 3148 cb_color_base += level_info->offset >> 8; 3149 /* Only macrotiled modes can set tile swizzle. */ 3150 if (level_info->mode == RADEON_SURF_MODE_2D) 3151 cb_color_base |= tex->surface.tile_swizzle; 3152 3153 if (!tex->surface.fmask_size) 3154 cb_color_fmask = cb_color_base; 3155 if (cb->base.u.tex.level > 0) 3156 cb_color_cmask = cb_color_base; 3157 if (cb_dcc_base) 3158 cb_dcc_base += level_info->dcc_offset >> 8; 3159 3160 pitch_tile_max = level_info->nblk_x / 8 - 1; 3161 slice_tile_max = level_info->nblk_x * 3162 level_info->nblk_y / 64 - 1; 3163 tile_mode_index = si_tile_mode_index(tex, cb->base.u.tex.level, false); 3164 3165 cb_color_attrib |= S_028C74_TILE_MODE_INDEX(tile_mode_index); 3166 cb_color_pitch = S_028C64_TILE_MAX(pitch_tile_max); 3167 cb_color_slice = S_028C68_TILE_MAX(slice_tile_max); 3168 3169 if (tex->surface.fmask_size) { 3170 if (sctx->chip_class >= CIK) 3171 cb_color_pitch |= S_028C64_FMASK_TILE_MAX(tex->surface.u.legacy.fmask.pitch_in_pixels / 8 - 1); 3172 cb_color_attrib |= S_028C74_FMASK_TILE_MODE_INDEX(tex->surface.u.legacy.fmask.tiling_index); 3173 cb_color_fmask_slice = S_028C88_TILE_MAX(tex->surface.u.legacy.fmask.slice_tile_max); 3174 } else { 3175 /* This must be set for fast clear to work without FMASK. */ 3176 if (sctx->chip_class >= CIK) 3177 cb_color_pitch |= S_028C64_FMASK_TILE_MAX(pitch_tile_max); 3178 cb_color_attrib |= S_028C74_FMASK_TILE_MODE_INDEX(tile_mode_index); 3179 cb_color_fmask_slice = S_028C88_TILE_MAX(slice_tile_max); 3180 } 3181 3182 radeon_set_context_reg_seq(cs, R_028C60_CB_COLOR0_BASE + i * 0x3C, 3183 sctx->chip_class >= VI ? 14 : 13); 3184 radeon_emit(cs, cb_color_base); /* CB_COLOR0_BASE */ 3185 radeon_emit(cs, cb_color_pitch); /* CB_COLOR0_PITCH */ 3186 radeon_emit(cs, cb_color_slice); /* CB_COLOR0_SLICE */ 3187 radeon_emit(cs, cb->cb_color_view); /* CB_COLOR0_VIEW */ 3188 radeon_emit(cs, cb_color_info); /* CB_COLOR0_INFO */ 3189 radeon_emit(cs, cb_color_attrib); /* CB_COLOR0_ATTRIB */ 3190 radeon_emit(cs, cb->cb_dcc_control); /* CB_COLOR0_DCC_CONTROL */ 3191 radeon_emit(cs, cb_color_cmask); /* CB_COLOR0_CMASK */ 3192 radeon_emit(cs, tex->surface.u.legacy.cmask_slice_tile_max); /* CB_COLOR0_CMASK_SLICE */ 3193 radeon_emit(cs, cb_color_fmask); /* CB_COLOR0_FMASK */ 3194 radeon_emit(cs, cb_color_fmask_slice); /* CB_COLOR0_FMASK_SLICE */ 3195 radeon_emit(cs, tex->color_clear_value[0]); /* CB_COLOR0_CLEAR_WORD0 */ 3196 radeon_emit(cs, tex->color_clear_value[1]); /* CB_COLOR0_CLEAR_WORD1 */ 3197 3198 if (sctx->chip_class >= VI) /* R_028C94_CB_COLOR0_DCC_BASE */ 3199 radeon_emit(cs, cb_dcc_base); 3200 } 3201 } 3202 for (; i < 8 ; i++) 3203 if (sctx->framebuffer.dirty_cbufs & (1 << i)) 3204 radeon_set_context_reg(cs, R_028C70_CB_COLOR0_INFO + i * 0x3C, 0); 3205 3206 /* ZS buffer. */ 3207 if (state->zsbuf && sctx->framebuffer.dirty_zsbuf) { 3208 struct si_surface *zb = (struct si_surface*)state->zsbuf; 3209 struct si_texture *tex = (struct si_texture*)zb->base.texture; 3210 3211 radeon_add_to_buffer_list(sctx, sctx->gfx_cs, 3212 &tex->buffer, RADEON_USAGE_READWRITE, 3213 zb->base.texture->nr_samples > 1 ? 3214 RADEON_PRIO_DEPTH_BUFFER_MSAA : 3215 RADEON_PRIO_DEPTH_BUFFER); 3216 3217 if (sctx->chip_class >= GFX9) { 3218 radeon_set_context_reg_seq(cs, R_028014_DB_HTILE_DATA_BASE, 3); 3219 radeon_emit(cs, zb->db_htile_data_base); /* DB_HTILE_DATA_BASE */ 3220 radeon_emit(cs, S_028018_BASE_HI(zb->db_htile_data_base >> 32)); /* DB_HTILE_DATA_BASE_HI */ 3221 radeon_emit(cs, zb->db_depth_size); /* DB_DEPTH_SIZE */ 3222 3223 radeon_set_context_reg_seq(cs, R_028038_DB_Z_INFO, 10); 3224 radeon_emit(cs, zb->db_z_info | /* DB_Z_INFO */ 3225 S_028038_ZRANGE_PRECISION(tex->depth_clear_value != 0)); 3226 radeon_emit(cs, zb->db_stencil_info); /* DB_STENCIL_INFO */ 3227 radeon_emit(cs, zb->db_depth_base); /* DB_Z_READ_BASE */ 3228 radeon_emit(cs, S_028044_BASE_HI(zb->db_depth_base >> 32)); /* DB_Z_READ_BASE_HI */ 3229 radeon_emit(cs, zb->db_stencil_base); /* DB_STENCIL_READ_BASE */ 3230 radeon_emit(cs, S_02804C_BASE_HI(zb->db_stencil_base >> 32)); /* DB_STENCIL_READ_BASE_HI */ 3231 radeon_emit(cs, zb->db_depth_base); /* DB_Z_WRITE_BASE */ 3232 radeon_emit(cs, S_028054_BASE_HI(zb->db_depth_base >> 32)); /* DB_Z_WRITE_BASE_HI */ 3233 radeon_emit(cs, zb->db_stencil_base); /* DB_STENCIL_WRITE_BASE */ 3234 radeon_emit(cs, S_02805C_BASE_HI(zb->db_stencil_base >> 32)); /* DB_STENCIL_WRITE_BASE_HI */ 3235 3236 radeon_set_context_reg_seq(cs, R_028068_DB_Z_INFO2, 2); 3237 radeon_emit(cs, zb->db_z_info2); /* DB_Z_INFO2 */ 3238 radeon_emit(cs, zb->db_stencil_info2); /* DB_STENCIL_INFO2 */ 3239 } else { 3240 radeon_set_context_reg(cs, R_028014_DB_HTILE_DATA_BASE, zb->db_htile_data_base); 3241 3242 radeon_set_context_reg_seq(cs, R_02803C_DB_DEPTH_INFO, 9); 3243 radeon_emit(cs, zb->db_depth_info); /* DB_DEPTH_INFO */ 3244 radeon_emit(cs, zb->db_z_info | /* DB_Z_INFO */ 3245 S_028040_ZRANGE_PRECISION(tex->depth_clear_value != 0)); 3246 radeon_emit(cs, zb->db_stencil_info); /* DB_STENCIL_INFO */ 3247 radeon_emit(cs, zb->db_depth_base); /* DB_Z_READ_BASE */ 3248 radeon_emit(cs, zb->db_stencil_base); /* DB_STENCIL_READ_BASE */ 3249 radeon_emit(cs, zb->db_depth_base); /* DB_Z_WRITE_BASE */ 3250 radeon_emit(cs, zb->db_stencil_base); /* DB_STENCIL_WRITE_BASE */ 3251 radeon_emit(cs, zb->db_depth_size); /* DB_DEPTH_SIZE */ 3252 radeon_emit(cs, zb->db_depth_slice); /* DB_DEPTH_SLICE */ 3253 } 3254 3255 radeon_set_context_reg_seq(cs, R_028028_DB_STENCIL_CLEAR, 2); 3256 radeon_emit(cs, tex->stencil_clear_value); /* R_028028_DB_STENCIL_CLEAR */ 3257 radeon_emit(cs, fui(tex->depth_clear_value)); /* R_02802C_DB_DEPTH_CLEAR */ 3258 3259 radeon_set_context_reg(cs, R_028008_DB_DEPTH_VIEW, zb->db_depth_view); 3260 radeon_set_context_reg(cs, R_028ABC_DB_HTILE_SURFACE, zb->db_htile_surface); 3261 } else if (sctx->framebuffer.dirty_zsbuf) { 3262 if (sctx->chip_class >= GFX9) 3263 radeon_set_context_reg_seq(cs, R_028038_DB_Z_INFO, 2); 3264 else 3265 radeon_set_context_reg_seq(cs, R_028040_DB_Z_INFO, 2); 3266 3267 radeon_emit(cs, S_028040_FORMAT(V_028040_Z_INVALID)); /* DB_Z_INFO */ 3268 radeon_emit(cs, S_028044_FORMAT(V_028044_STENCIL_INVALID)); /* DB_STENCIL_INFO */ 3269 } 3270 3271 /* Framebuffer dimensions. */ 3272 /* PA_SC_WINDOW_SCISSOR_TL is set in si_init_config() */ 3273 radeon_set_context_reg(cs, R_028208_PA_SC_WINDOW_SCISSOR_BR, 3274 S_028208_BR_X(state->width) | S_028208_BR_Y(state->height)); 3275 3276 if (sctx->screen->dfsm_allowed) { 3277 radeon_emit(cs, PKT3(PKT3_EVENT_WRITE, 0, 0)); 3278 radeon_emit(cs, EVENT_TYPE(V_028A90_BREAK_BATCH) | EVENT_INDEX(0)); 3279 } 3280 3281 sctx->framebuffer.dirty_cbufs = 0; 3282 sctx->framebuffer.dirty_zsbuf = false; 3283} 3284 3285static void si_emit_msaa_sample_locs(struct si_context *sctx) 3286{ 3287 struct radeon_cmdbuf *cs = sctx->gfx_cs; 3288 struct si_state_rasterizer *rs = sctx->queued.named.rasterizer; 3289 unsigned nr_samples = sctx->framebuffer.nr_samples; 3290 bool has_msaa_sample_loc_bug = sctx->screen->has_msaa_sample_loc_bug; 3291 3292 /* Smoothing (only possible with nr_samples == 1) uses the same 3293 * sample locations as the MSAA it simulates. 3294 */ 3295 if (nr_samples <= 1 && sctx->smoothing_enabled) 3296 nr_samples = SI_NUM_SMOOTH_AA_SAMPLES; 3297 3298 /* On Polaris, the small primitive filter uses the sample locations 3299 * even when MSAA is off, so we need to make sure they're set to 0. 3300 */ 3301 if ((nr_samples >= 2 || has_msaa_sample_loc_bug) && 3302 nr_samples != sctx->sample_locs_num_samples) { 3303 sctx->sample_locs_num_samples = nr_samples; 3304 si_emit_sample_locations(cs, nr_samples); 3305 } 3306 3307 if (sctx->family >= CHIP_POLARIS10) { 3308 unsigned small_prim_filter_cntl = 3309 S_028830_SMALL_PRIM_FILTER_ENABLE(1) | 3310 /* line bug */ 3311 S_028830_LINE_FILTER_DISABLE(sctx->family <= CHIP_POLARIS12); 3312 3313 /* The alternative of setting sample locations to 0 would 3314 * require a DB flush to avoid Z errors, see 3315 * https://bugs.freedesktop.org/show_bug.cgi?id=96908 3316 */ 3317 if (has_msaa_sample_loc_bug && 3318 sctx->framebuffer.nr_samples > 1 && 3319 !rs->multisample_enable) 3320 small_prim_filter_cntl &= C_028830_SMALL_PRIM_FILTER_ENABLE; 3321 3322 radeon_opt_set_context_reg(sctx, 3323 R_028830_PA_SU_SMALL_PRIM_FILTER_CNTL, 3324 SI_TRACKED_PA_SU_SMALL_PRIM_FILTER_CNTL, 3325 small_prim_filter_cntl); 3326 } 3327 3328 /* The exclusion bits can be set to improve rasterization efficiency 3329 * if no sample lies on the pixel boundary (-8 sample offset). 3330 */ 3331 bool exclusion = sctx->chip_class >= CIK && 3332 (!rs->multisample_enable || nr_samples != 16); 3333 radeon_opt_set_context_reg(sctx, R_02882C_PA_SU_PRIM_FILTER_CNTL, 3334 SI_TRACKED_PA_SU_PRIM_FILTER_CNTL, 3335 S_02882C_XMAX_RIGHT_EXCLUSION(exclusion) | 3336 S_02882C_YMAX_BOTTOM_EXCLUSION(exclusion)); 3337} 3338 3339static bool si_out_of_order_rasterization(struct si_context *sctx) 3340{ 3341 struct si_state_blend *blend = sctx->queued.named.blend; 3342 struct si_state_dsa *dsa = sctx->queued.named.dsa; 3343 3344 if (!sctx->screen->has_out_of_order_rast) 3345 return false; 3346 3347 unsigned colormask = sctx->framebuffer.colorbuf_enabled_4bit; 3348 3349 if (blend) { 3350 colormask &= blend->cb_target_enabled_4bit; 3351 } else { 3352 colormask = 0; 3353 } 3354 3355 /* Conservative: No logic op. */ 3356 if (colormask && blend->logicop_enable) 3357 return false; 3358 3359 struct si_dsa_order_invariance dsa_order_invariant = { 3360 .zs = true, .pass_set = true, .pass_last = false 3361 }; 3362 3363 if (sctx->framebuffer.state.zsbuf) { 3364 struct si_texture *zstex = 3365 (struct si_texture*)sctx->framebuffer.state.zsbuf->texture; 3366 bool has_stencil = zstex->surface.has_stencil; 3367 dsa_order_invariant = dsa->order_invariance[has_stencil]; 3368 if (!dsa_order_invariant.zs) 3369 return false; 3370 3371 /* The set of PS invocations is always order invariant, 3372 * except when early Z/S tests are requested. */ 3373 if (sctx->ps_shader.cso && 3374 sctx->ps_shader.cso->info.writes_memory && 3375 sctx->ps_shader.cso->info.properties[TGSI_PROPERTY_FS_EARLY_DEPTH_STENCIL] && 3376 !dsa_order_invariant.pass_set) 3377 return false; 3378 3379 if (sctx->num_perfect_occlusion_queries != 0 && 3380 !dsa_order_invariant.pass_set) 3381 return false; 3382 } 3383 3384 if (!colormask) 3385 return true; 3386 3387 unsigned blendmask = colormask & blend->blend_enable_4bit; 3388 3389 if (blendmask) { 3390 /* Only commutative blending. */ 3391 if (blendmask & ~blend->commutative_4bit) 3392 return false; 3393 3394 if (!dsa_order_invariant.pass_set) 3395 return false; 3396 } 3397 3398 if (colormask & ~blendmask) { 3399 if (!dsa_order_invariant.pass_last) 3400 return false; 3401 } 3402 3403 return true; 3404} 3405 3406static void si_emit_msaa_config(struct si_context *sctx) 3407{ 3408 struct radeon_cmdbuf *cs = sctx->gfx_cs; 3409 unsigned num_tile_pipes = sctx->screen->info.num_tile_pipes; 3410 /* 33% faster rendering to linear color buffers */ 3411 bool dst_is_linear = sctx->framebuffer.any_dst_linear; 3412 bool out_of_order_rast = si_out_of_order_rasterization(sctx); 3413 unsigned sc_mode_cntl_1 = 3414 S_028A4C_WALK_SIZE(dst_is_linear) | 3415 S_028A4C_WALK_FENCE_ENABLE(!dst_is_linear) | 3416 S_028A4C_WALK_FENCE_SIZE(num_tile_pipes == 2 ? 2 : 3) | 3417 S_028A4C_OUT_OF_ORDER_PRIMITIVE_ENABLE(out_of_order_rast) | 3418 S_028A4C_OUT_OF_ORDER_WATER_MARK(0x7) | 3419 /* always 1: */ 3420 S_028A4C_WALK_ALIGN8_PRIM_FITS_ST(1) | 3421 S_028A4C_SUPERTILE_WALK_ORDER_ENABLE(1) | 3422 S_028A4C_TILE_WALK_ORDER_ENABLE(1) | 3423 S_028A4C_MULTI_SHADER_ENGINE_PRIM_DISCARD_ENABLE(1) | 3424 S_028A4C_FORCE_EOV_CNTDWN_ENABLE(1) | 3425 S_028A4C_FORCE_EOV_REZ_ENABLE(1); 3426 unsigned db_eqaa = S_028804_HIGH_QUALITY_INTERSECTIONS(1) | 3427 S_028804_INCOHERENT_EQAA_READS(1) | 3428 S_028804_INTERPOLATE_COMP_Z(1) | 3429 S_028804_STATIC_ANCHOR_ASSOCIATIONS(1); 3430 unsigned coverage_samples, color_samples, z_samples; 3431 3432 /* S: Coverage samples (up to 16x): 3433 * - Scan conversion samples (PA_SC_AA_CONFIG.MSAA_NUM_SAMPLES) 3434 * - CB FMASK samples (CB_COLORi_ATTRIB.NUM_SAMPLES) 3435 * 3436 * Z: Z/S samples (up to 8x, must be <= coverage samples and >= color samples): 3437 * - Value seen by DB (DB_Z_INFO.NUM_SAMPLES) 3438 * - Value seen by CB, must be correct even if Z/S is unbound (DB_EQAA.MAX_ANCHOR_SAMPLES) 3439 * # Missing samples are derived from Z planes if Z is compressed (up to 16x quality), or 3440 * # from the closest defined sample if Z is uncompressed (same quality as the number of 3441 * # Z samples). 3442 * 3443 * F: Color samples (up to 8x, must be <= coverage samples): 3444 * - CB color samples (CB_COLORi_ATTRIB.NUM_FRAGMENTS) 3445 * - PS iter samples (DB_EQAA.PS_ITER_SAMPLES) 3446 * 3447 * Can be anything between coverage and color samples: 3448 * - SampleMaskIn samples (PA_SC_AA_CONFIG.MSAA_EXPOSED_SAMPLES) 3449 * - SampleMaskOut samples (DB_EQAA.MASK_EXPORT_NUM_SAMPLES) 3450 * - Alpha-to-coverage samples (DB_EQAA.ALPHA_TO_MASK_NUM_SAMPLES) 3451 * - Occlusion query samples (DB_COUNT_CONTROL.SAMPLE_RATE) 3452 * # All are currently set the same as coverage samples. 3453 * 3454 * If color samples < coverage samples, FMASK has a higher bpp to store an "unknown" 3455 * flag for undefined color samples. A shader-based resolve must handle unknowns 3456 * or mask them out with AND. Unknowns can also be guessed from neighbors via 3457 * an edge-detect shader-based resolve, which is required to make "color samples = 1" 3458 * useful. The CB resolve always drops unknowns. 3459 * 3460 * Sensible AA configurations: 3461 * EQAA 16s 8z 8f - might look the same as 16x MSAA if Z is compressed 3462 * EQAA 16s 8z 4f - might look the same as 16x MSAA if Z is compressed 3463 * EQAA 16s 4z 4f - might look the same as 16x MSAA if Z is compressed 3464 * EQAA 8s 8z 8f = 8x MSAA 3465 * EQAA 8s 8z 4f - might look the same as 8x MSAA 3466 * EQAA 8s 8z 2f - might look the same as 8x MSAA with low-density geometry 3467 * EQAA 8s 4z 4f - might look the same as 8x MSAA if Z is compressed 3468 * EQAA 8s 4z 2f - might look the same as 8x MSAA with low-density geometry if Z is compressed 3469 * EQAA 4s 4z 4f = 4x MSAA 3470 * EQAA 4s 4z 2f - might look the same as 4x MSAA with low-density geometry 3471 * EQAA 2s 2z 2f = 2x MSAA 3472 */ 3473 if (sctx->framebuffer.nr_samples > 1) { 3474 coverage_samples = sctx->framebuffer.nr_samples; 3475 color_samples = sctx->framebuffer.nr_color_samples; 3476 3477 if (sctx->framebuffer.state.zsbuf) { 3478 z_samples = sctx->framebuffer.state.zsbuf->texture->nr_samples; 3479 z_samples = MAX2(1, z_samples); 3480 } else { 3481 z_samples = coverage_samples; 3482 } 3483 } else if (sctx->smoothing_enabled) { 3484 coverage_samples = color_samples = z_samples = SI_NUM_SMOOTH_AA_SAMPLES; 3485 } else { 3486 coverage_samples = color_samples = z_samples = 1; 3487 } 3488 3489 /* Required by OpenGL line rasterization. 3490 * 3491 * TODO: We should also enable perpendicular endcaps for AA lines, 3492 * but that requires implementing line stippling in the pixel 3493 * shader. SC can only do line stippling with axis-aligned 3494 * endcaps. 3495 */ 3496 unsigned sc_line_cntl = S_028BDC_DX10_DIAMOND_TEST_ENA(1); 3497 unsigned sc_aa_config = 0; 3498 3499 if (coverage_samples > 1) { 3500 /* distance from the pixel center, indexed by log2(nr_samples) */ 3501 static unsigned max_dist[] = { 3502 0, /* unused */ 3503 4, /* 2x MSAA */ 3504 6, /* 4x MSAA */ 3505 7, /* 8x MSAA */ 3506 8, /* 16x MSAA */ 3507 }; 3508 unsigned log_samples = util_logbase2(coverage_samples); 3509 unsigned log_z_samples = util_logbase2(z_samples); 3510 unsigned ps_iter_samples = si_get_ps_iter_samples(sctx); 3511 unsigned log_ps_iter_samples = util_logbase2(ps_iter_samples); 3512 3513 sc_line_cntl |= S_028BDC_EXPAND_LINE_WIDTH(1); 3514 sc_aa_config = S_028BE0_MSAA_NUM_SAMPLES(log_samples) | 3515 S_028BE0_MAX_SAMPLE_DIST(max_dist[log_samples]) | 3516 S_028BE0_MSAA_EXPOSED_SAMPLES(log_samples); 3517 3518 if (sctx->framebuffer.nr_samples > 1) { 3519 db_eqaa |= S_028804_MAX_ANCHOR_SAMPLES(log_z_samples) | 3520 S_028804_PS_ITER_SAMPLES(log_ps_iter_samples) | 3521 S_028804_MASK_EXPORT_NUM_SAMPLES(log_samples) | 3522 S_028804_ALPHA_TO_MASK_NUM_SAMPLES(log_samples); 3523 sc_mode_cntl_1 |= S_028A4C_PS_ITER_SAMPLE(ps_iter_samples > 1); 3524 } else if (sctx->smoothing_enabled) { 3525 db_eqaa |= S_028804_OVERRASTERIZATION_AMOUNT(log_samples); 3526 } 3527 } 3528 3529 unsigned initial_cdw = cs->current.cdw; 3530 3531 /* R_028BDC_PA_SC_LINE_CNTL, R_028BE0_PA_SC_AA_CONFIG */ 3532 radeon_opt_set_context_reg2(sctx, R_028BDC_PA_SC_LINE_CNTL, 3533 SI_TRACKED_PA_SC_LINE_CNTL, sc_line_cntl, 3534 sc_aa_config); 3535 /* R_028804_DB_EQAA */ 3536 radeon_opt_set_context_reg(sctx, R_028804_DB_EQAA, SI_TRACKED_DB_EQAA, 3537 db_eqaa); 3538 /* R_028A4C_PA_SC_MODE_CNTL_1 */ 3539 radeon_opt_set_context_reg(sctx, R_028A4C_PA_SC_MODE_CNTL_1, 3540 SI_TRACKED_PA_SC_MODE_CNTL_1, sc_mode_cntl_1); 3541 3542 if (initial_cdw != cs->current.cdw) { 3543 sctx->context_roll = true; 3544 3545 /* GFX9: Flush DFSM when the AA mode changes. */ 3546 if (sctx->screen->dfsm_allowed) { 3547 radeon_emit(cs, PKT3(PKT3_EVENT_WRITE, 0, 0)); 3548 radeon_emit(cs, EVENT_TYPE(V_028A90_FLUSH_DFSM) | EVENT_INDEX(0)); 3549 } 3550 } 3551} 3552 3553void si_update_ps_iter_samples(struct si_context *sctx) 3554{ 3555 if (sctx->framebuffer.nr_samples > 1) 3556 si_mark_atom_dirty(sctx, &sctx->atoms.s.msaa_config); 3557 if (sctx->screen->dpbb_allowed) 3558 si_mark_atom_dirty(sctx, &sctx->atoms.s.dpbb_state); 3559} 3560 3561static void si_set_min_samples(struct pipe_context *ctx, unsigned min_samples) 3562{ 3563 struct si_context *sctx = (struct si_context *)ctx; 3564 3565 /* The hardware can only do sample shading with 2^n samples. */ 3566 min_samples = util_next_power_of_two(min_samples); 3567 3568 if (sctx->ps_iter_samples == min_samples) 3569 return; 3570 3571 sctx->ps_iter_samples = min_samples; 3572 sctx->do_update_shaders = true; 3573 3574 si_update_ps_iter_samples(sctx); 3575} 3576 3577/* 3578 * Samplers 3579 */ 3580 3581/** 3582 * Build the sampler view descriptor for a buffer texture. 3583 * @param state 256-bit descriptor; only the high 128 bits are filled in 3584 */ 3585void 3586si_make_buffer_descriptor(struct si_screen *screen, struct si_resource *buf, 3587 enum pipe_format format, 3588 unsigned offset, unsigned size, 3589 uint32_t *state) 3590{ 3591 const struct util_format_description *desc; 3592 int first_non_void; 3593 unsigned stride; 3594 unsigned num_records; 3595 unsigned num_format, data_format; 3596 3597 desc = util_format_description(format); 3598 first_non_void = util_format_get_first_non_void_channel(format); 3599 stride = desc->block.bits / 8; 3600 num_format = si_translate_buffer_numformat(&screen->b, desc, first_non_void); 3601 data_format = si_translate_buffer_dataformat(&screen->b, desc, first_non_void); 3602 3603 num_records = size / stride; 3604 num_records = MIN2(num_records, (buf->b.b.width0 - offset) / stride); 3605 3606 /* The NUM_RECORDS field has a different meaning depending on the chip, 3607 * instruction type, STRIDE, and SWIZZLE_ENABLE. 3608 * 3609 * SI-CIK: 3610 * - If STRIDE == 0, it's in byte units. 3611 * - If STRIDE != 0, it's in units of STRIDE, used with inst.IDXEN. 3612 * 3613 * VI: 3614 * - For SMEM and STRIDE == 0, it's in byte units. 3615 * - For SMEM and STRIDE != 0, it's in units of STRIDE. 3616 * - For VMEM and STRIDE == 0 or SWIZZLE_ENABLE == 0, it's in byte units. 3617 * - For VMEM and STRIDE != 0 and SWIZZLE_ENABLE == 1, it's in units of STRIDE. 3618 * NOTE: There is incompatibility between VMEM and SMEM opcodes due to SWIZZLE_- 3619 * ENABLE. The workaround is to set STRIDE = 0 if SWIZZLE_ENABLE == 0 when 3620 * using SMEM. This can be done in the shader by clearing STRIDE with s_and. 3621 * That way the same descriptor can be used by both SMEM and VMEM. 3622 * 3623 * GFX9: 3624 * - For SMEM and STRIDE == 0, it's in byte units. 3625 * - For SMEM and STRIDE != 0, it's in units of STRIDE. 3626 * - For VMEM and inst.IDXEN == 0 or STRIDE == 0, it's in byte units. 3627 * - For VMEM and inst.IDXEN == 1 and STRIDE != 0, it's in units of STRIDE. 3628 */ 3629 if (screen->info.chip_class >= GFX9 && HAVE_LLVM < 0x0800) 3630 /* When vindex == 0, LLVM < 8.0 sets IDXEN = 0, thus changing units 3631 * from STRIDE to bytes. This works around it by setting 3632 * NUM_RECORDS to at least the size of one element, so that 3633 * the first element is readable when IDXEN == 0. 3634 */ 3635 num_records = num_records ? MAX2(num_records, stride) : 0; 3636 else if (screen->info.chip_class == VI) 3637 num_records *= stride; 3638 3639 state[4] = 0; 3640 state[5] = S_008F04_STRIDE(stride); 3641 state[6] = num_records; 3642 state[7] = S_008F0C_DST_SEL_X(si_map_swizzle(desc->swizzle[0])) | 3643 S_008F0C_DST_SEL_Y(si_map_swizzle(desc->swizzle[1])) | 3644 S_008F0C_DST_SEL_Z(si_map_swizzle(desc->swizzle[2])) | 3645 S_008F0C_DST_SEL_W(si_map_swizzle(desc->swizzle[3])) | 3646 S_008F0C_NUM_FORMAT(num_format) | 3647 S_008F0C_DATA_FORMAT(data_format); 3648} 3649 3650static unsigned gfx9_border_color_swizzle(const unsigned char swizzle[4]) 3651{ 3652 unsigned bc_swizzle = V_008F20_BC_SWIZZLE_XYZW; 3653 3654 if (swizzle[3] == PIPE_SWIZZLE_X) { 3655 /* For the pre-defined border color values (white, opaque 3656 * black, transparent black), the only thing that matters is 3657 * that the alpha channel winds up in the correct place 3658 * (because the RGB channels are all the same) so either of 3659 * these enumerations will work. 3660 */ 3661 if (swizzle[2] == PIPE_SWIZZLE_Y) 3662 bc_swizzle = V_008F20_BC_SWIZZLE_WZYX; 3663 else 3664 bc_swizzle = V_008F20_BC_SWIZZLE_WXYZ; 3665 } else if (swizzle[0] == PIPE_SWIZZLE_X) { 3666 if (swizzle[1] == PIPE_SWIZZLE_Y) 3667 bc_swizzle = V_008F20_BC_SWIZZLE_XYZW; 3668 else 3669 bc_swizzle = V_008F20_BC_SWIZZLE_XWYZ; 3670 } else if (swizzle[1] == PIPE_SWIZZLE_X) { 3671 bc_swizzle = V_008F20_BC_SWIZZLE_YXWZ; 3672 } else if (swizzle[2] == PIPE_SWIZZLE_X) { 3673 bc_swizzle = V_008F20_BC_SWIZZLE_ZYXW; 3674 } 3675 3676 return bc_swizzle; 3677} 3678 3679/** 3680 * Build the sampler view descriptor for a texture. 3681 */ 3682void 3683si_make_texture_descriptor(struct si_screen *screen, 3684 struct si_texture *tex, 3685 bool sampler, 3686 enum pipe_texture_target target, 3687 enum pipe_format pipe_format, 3688 const unsigned char state_swizzle[4], 3689 unsigned first_level, unsigned last_level, 3690 unsigned first_layer, unsigned last_layer, 3691 unsigned width, unsigned height, unsigned depth, 3692 uint32_t *state, 3693 uint32_t *fmask_state) 3694{ 3695 struct pipe_resource *res = &tex->buffer.b.b; 3696 const struct util_format_description *desc; 3697 unsigned char swizzle[4]; 3698 int first_non_void; 3699 unsigned num_format, data_format, type, num_samples; 3700 uint64_t va; 3701 3702 desc = util_format_description(pipe_format); 3703 3704 num_samples = desc->colorspace == UTIL_FORMAT_COLORSPACE_ZS ? 3705 MAX2(1, res->nr_samples) : 3706 MAX2(1, res->nr_storage_samples); 3707 3708 if (desc->colorspace == UTIL_FORMAT_COLORSPACE_ZS) { 3709 const unsigned char swizzle_xxxx[4] = {0, 0, 0, 0}; 3710 const unsigned char swizzle_yyyy[4] = {1, 1, 1, 1}; 3711 const unsigned char swizzle_wwww[4] = {3, 3, 3, 3}; 3712 3713 switch (pipe_format) { 3714 case PIPE_FORMAT_S8_UINT_Z24_UNORM: 3715 case PIPE_FORMAT_X32_S8X24_UINT: 3716 case PIPE_FORMAT_X8Z24_UNORM: 3717 util_format_compose_swizzles(swizzle_yyyy, state_swizzle, swizzle); 3718 break; 3719 case PIPE_FORMAT_X24S8_UINT: 3720 /* 3721 * X24S8 is implemented as an 8_8_8_8 data format, to 3722 * fix texture gathers. This affects at least 3723 * GL45-CTS.texture_cube_map_array.sampling on VI. 3724 */ 3725 if (screen->info.chip_class <= VI) 3726 util_format_compose_swizzles(swizzle_wwww, state_swizzle, swizzle); 3727 else 3728 util_format_compose_swizzles(swizzle_yyyy, state_swizzle, swizzle); 3729 break; 3730 default: 3731 util_format_compose_swizzles(swizzle_xxxx, state_swizzle, swizzle); 3732 } 3733 } else { 3734 util_format_compose_swizzles(desc->swizzle, state_swizzle, swizzle); 3735 } 3736 3737 first_non_void = util_format_get_first_non_void_channel(pipe_format); 3738 3739 switch (pipe_format) { 3740 case PIPE_FORMAT_S8_UINT_Z24_UNORM: 3741 num_format = V_008F14_IMG_NUM_FORMAT_UNORM; 3742 break; 3743 default: 3744 if (first_non_void < 0) { 3745 if (util_format_is_compressed(pipe_format)) { 3746 switch (pipe_format) { 3747 case PIPE_FORMAT_DXT1_SRGB: 3748 case PIPE_FORMAT_DXT1_SRGBA: 3749 case PIPE_FORMAT_DXT3_SRGBA: 3750 case PIPE_FORMAT_DXT5_SRGBA: 3751 case PIPE_FORMAT_BPTC_SRGBA: 3752 case PIPE_FORMAT_ETC2_SRGB8: 3753 case PIPE_FORMAT_ETC2_SRGB8A1: 3754 case PIPE_FORMAT_ETC2_SRGBA8: 3755 num_format = V_008F14_IMG_NUM_FORMAT_SRGB; 3756 break; 3757 case PIPE_FORMAT_RGTC1_SNORM: 3758 case PIPE_FORMAT_LATC1_SNORM: 3759 case PIPE_FORMAT_RGTC2_SNORM: 3760 case PIPE_FORMAT_LATC2_SNORM: 3761 case PIPE_FORMAT_ETC2_R11_SNORM: 3762 case PIPE_FORMAT_ETC2_RG11_SNORM: 3763 /* implies float, so use SNORM/UNORM to determine 3764 whether data is signed or not */ 3765 case PIPE_FORMAT_BPTC_RGB_FLOAT: 3766 num_format = V_008F14_IMG_NUM_FORMAT_SNORM; 3767 break; 3768 default: 3769 num_format = V_008F14_IMG_NUM_FORMAT_UNORM; 3770 break; 3771 } 3772 } else if (desc->layout == UTIL_FORMAT_LAYOUT_SUBSAMPLED) { 3773 num_format = V_008F14_IMG_NUM_FORMAT_UNORM; 3774 } else { 3775 num_format = V_008F14_IMG_NUM_FORMAT_FLOAT; 3776 } 3777 } else if (desc->colorspace == UTIL_FORMAT_COLORSPACE_SRGB) { 3778 num_format = V_008F14_IMG_NUM_FORMAT_SRGB; 3779 } else { 3780 num_format = V_008F14_IMG_NUM_FORMAT_UNORM; 3781 3782 switch (desc->channel[first_non_void].type) { 3783 case UTIL_FORMAT_TYPE_FLOAT: 3784 num_format = V_008F14_IMG_NUM_FORMAT_FLOAT; 3785 break; 3786 case UTIL_FORMAT_TYPE_SIGNED: 3787 if (desc->channel[first_non_void].normalized) 3788 num_format = V_008F14_IMG_NUM_FORMAT_SNORM; 3789 else if (desc->channel[first_non_void].pure_integer) 3790 num_format = V_008F14_IMG_NUM_FORMAT_SINT; 3791 else 3792 num_format = V_008F14_IMG_NUM_FORMAT_SSCALED; 3793 break; 3794 case UTIL_FORMAT_TYPE_UNSIGNED: 3795 if (desc->channel[first_non_void].normalized) 3796 num_format = V_008F14_IMG_NUM_FORMAT_UNORM; 3797 else if (desc->channel[first_non_void].pure_integer) 3798 num_format = V_008F14_IMG_NUM_FORMAT_UINT; 3799 else 3800 num_format = V_008F14_IMG_NUM_FORMAT_USCALED; 3801 } 3802 } 3803 } 3804 3805 data_format = si_translate_texformat(&screen->b, pipe_format, desc, first_non_void); 3806 if (data_format == ~0) { 3807 data_format = 0; 3808 } 3809 3810 /* S8 with Z32 HTILE needs a special format. */ 3811 if (screen->info.chip_class >= GFX9 && 3812 pipe_format == PIPE_FORMAT_S8_UINT && 3813 tex->tc_compatible_htile) 3814 data_format = V_008F14_IMG_DATA_FORMAT_S8_32; 3815 3816 if (!sampler && 3817 (res->target == PIPE_TEXTURE_CUBE || 3818 res->target == PIPE_TEXTURE_CUBE_ARRAY || 3819 (screen->info.chip_class <= VI && 3820 res->target == PIPE_TEXTURE_3D))) { 3821 /* For the purpose of shader images, treat cube maps and 3D 3822 * textures as 2D arrays. For 3D textures, the address 3823 * calculations for mipmaps are different, so we rely on the 3824 * caller to effectively disable mipmaps. 3825 */ 3826 type = V_008F1C_SQ_RSRC_IMG_2D_ARRAY; 3827 3828 assert(res->target != PIPE_TEXTURE_3D || (first_level == 0 && last_level == 0)); 3829 } else { 3830 type = si_tex_dim(screen, tex, target, num_samples); 3831 } 3832 3833 if (type == V_008F1C_SQ_RSRC_IMG_1D_ARRAY) { 3834 height = 1; 3835 depth = res->array_size; 3836 } else if (type == V_008F1C_SQ_RSRC_IMG_2D_ARRAY || 3837 type == V_008F1C_SQ_RSRC_IMG_2D_MSAA_ARRAY) { 3838 if (sampler || res->target != PIPE_TEXTURE_3D) 3839 depth = res->array_size; 3840 } else if (type == V_008F1C_SQ_RSRC_IMG_CUBE) 3841 depth = res->array_size / 6; 3842 3843 state[0] = 0; 3844 state[1] = (S_008F14_DATA_FORMAT_GFX6(data_format) | 3845 S_008F14_NUM_FORMAT_GFX6(num_format)); 3846 state[2] = (S_008F18_WIDTH(width - 1) | 3847 S_008F18_HEIGHT(height - 1) | 3848 S_008F18_PERF_MOD(4)); 3849 state[3] = (S_008F1C_DST_SEL_X(si_map_swizzle(swizzle[0])) | 3850 S_008F1C_DST_SEL_Y(si_map_swizzle(swizzle[1])) | 3851 S_008F1C_DST_SEL_Z(si_map_swizzle(swizzle[2])) | 3852 S_008F1C_DST_SEL_W(si_map_swizzle(swizzle[3])) | 3853 S_008F1C_BASE_LEVEL(num_samples > 1 ? 0 : first_level) | 3854 S_008F1C_LAST_LEVEL(num_samples > 1 ? 3855 util_logbase2(num_samples) : 3856 last_level) | 3857 S_008F1C_TYPE(type)); 3858 state[4] = 0; 3859 state[5] = S_008F24_BASE_ARRAY(first_layer); 3860 state[6] = 0; 3861 state[7] = 0; 3862 3863 if (screen->info.chip_class >= GFX9) { 3864 unsigned bc_swizzle = gfx9_border_color_swizzle(desc->swizzle); 3865 3866 /* Depth is the the last accessible layer on Gfx9. 3867 * The hw doesn't need to know the total number of layers. 3868 */ 3869 if (type == V_008F1C_SQ_RSRC_IMG_3D) 3870 state[4] |= S_008F20_DEPTH(depth - 1); 3871 else 3872 state[4] |= S_008F20_DEPTH(last_layer); 3873 3874 state[4] |= S_008F20_BC_SWIZZLE(bc_swizzle); 3875 state[5] |= S_008F24_MAX_MIP(num_samples > 1 ? 3876 util_logbase2(num_samples) : 3877 tex->buffer.b.b.last_level); 3878 } else { 3879 state[3] |= S_008F1C_POW2_PAD(res->last_level > 0); 3880 state[4] |= S_008F20_DEPTH(depth - 1); 3881 state[5] |= S_008F24_LAST_ARRAY(last_layer); 3882 } 3883 3884 if (tex->dcc_offset) { 3885 state[6] = S_008F28_ALPHA_IS_ON_MSB(vi_alpha_is_on_msb(pipe_format)); 3886 } else { 3887 /* The last dword is unused by hw. The shader uses it to clear 3888 * bits in the first dword of sampler state. 3889 */ 3890 if (screen->info.chip_class <= CIK && res->nr_samples <= 1) { 3891 if (first_level == last_level) 3892 state[7] = C_008F30_MAX_ANISO_RATIO; 3893 else 3894 state[7] = 0xffffffff; 3895 } 3896 } 3897 3898 /* Initialize the sampler view for FMASK. */ 3899 if (tex->surface.fmask_size) { 3900 uint32_t data_format, num_format; 3901 3902 va = tex->buffer.gpu_address + tex->fmask_offset; 3903 3904#define FMASK(s,f) (((unsigned)(MAX2(1, s)) * 16) + (MAX2(1, f))) 3905 if (screen->info.chip_class >= GFX9) { 3906 data_format = V_008F14_IMG_DATA_FORMAT_FMASK; 3907 switch (FMASK(res->nr_samples, res->nr_storage_samples)) { 3908 case FMASK(2,1): 3909 num_format = V_008F14_IMG_FMASK_8_2_1; 3910 break; 3911 case FMASK(2,2): 3912 num_format = V_008F14_IMG_FMASK_8_2_2; 3913 break; 3914 case FMASK(4,1): 3915 num_format = V_008F14_IMG_FMASK_8_4_1; 3916 break; 3917 case FMASK(4,2): 3918 num_format = V_008F14_IMG_FMASK_8_4_2; 3919 break; 3920 case FMASK(4,4): 3921 num_format = V_008F14_IMG_FMASK_8_4_4; 3922 break; 3923 case FMASK(8,1): 3924 num_format = V_008F14_IMG_FMASK_8_8_1; 3925 break; 3926 case FMASK(8,2): 3927 num_format = V_008F14_IMG_FMASK_16_8_2; 3928 break; 3929 case FMASK(8,4): 3930 num_format = V_008F14_IMG_FMASK_32_8_4; 3931 break; 3932 case FMASK(8,8): 3933 num_format = V_008F14_IMG_FMASK_32_8_8; 3934 break; 3935 case FMASK(16,1): 3936 num_format = V_008F14_IMG_FMASK_16_16_1; 3937 break; 3938 case FMASK(16,2): 3939 num_format = V_008F14_IMG_FMASK_32_16_2; 3940 break; 3941 case FMASK(16,4): 3942 num_format = V_008F14_IMG_FMASK_64_16_4; 3943 break; 3944 case FMASK(16,8): 3945 num_format = V_008F14_IMG_FMASK_64_16_8; 3946 break; 3947 default: 3948 unreachable("invalid nr_samples"); 3949 } 3950 } else { 3951 switch (FMASK(res->nr_samples, res->nr_storage_samples)) { 3952 case FMASK(2,1): 3953 data_format = V_008F14_IMG_DATA_FORMAT_FMASK8_S2_F1; 3954 break; 3955 case FMASK(2,2): 3956 data_format = V_008F14_IMG_DATA_FORMAT_FMASK8_S2_F2; 3957 break; 3958 case FMASK(4,1): 3959 data_format = V_008F14_IMG_DATA_FORMAT_FMASK8_S4_F1; 3960 break; 3961 case FMASK(4,2): 3962 data_format = V_008F14_IMG_DATA_FORMAT_FMASK8_S4_F2; 3963 break; 3964 case FMASK(4,4): 3965 data_format = V_008F14_IMG_DATA_FORMAT_FMASK8_S4_F4; 3966 break; 3967 case FMASK(8,1): 3968 data_format = V_008F14_IMG_DATA_FORMAT_FMASK8_S8_F1; 3969 break; 3970 case FMASK(8,2): 3971 data_format = V_008F14_IMG_DATA_FORMAT_FMASK16_S8_F2; 3972 break; 3973 case FMASK(8,4): 3974 data_format = V_008F14_IMG_DATA_FORMAT_FMASK32_S8_F4; 3975 break; 3976 case FMASK(8,8): 3977 data_format = V_008F14_IMG_DATA_FORMAT_FMASK32_S8_F8; 3978 break; 3979 case FMASK(16,1): 3980 data_format = V_008F14_IMG_DATA_FORMAT_FMASK16_S16_F1; 3981 break; 3982 case FMASK(16,2): 3983 data_format = V_008F14_IMG_DATA_FORMAT_FMASK32_S16_F2; 3984 break; 3985 case FMASK(16,4): 3986 data_format = V_008F14_IMG_DATA_FORMAT_FMASK64_S16_F4; 3987 break; 3988 case FMASK(16,8): 3989 data_format = V_008F14_IMG_DATA_FORMAT_FMASK64_S16_F8; 3990 break; 3991 default: 3992 unreachable("invalid nr_samples"); 3993 } 3994 num_format = V_008F14_IMG_NUM_FORMAT_UINT; 3995 } 3996#undef FMASK 3997 3998 fmask_state[0] = (va >> 8) | tex->surface.fmask_tile_swizzle; 3999 fmask_state[1] = S_008F14_BASE_ADDRESS_HI(va >> 40) | 4000 S_008F14_DATA_FORMAT_GFX6(data_format) | 4001 S_008F14_NUM_FORMAT_GFX6(num_format); 4002 fmask_state[2] = S_008F18_WIDTH(width - 1) | 4003 S_008F18_HEIGHT(height - 1); 4004 fmask_state[3] = S_008F1C_DST_SEL_X(V_008F1C_SQ_SEL_X) | 4005 S_008F1C_DST_SEL_Y(V_008F1C_SQ_SEL_X) | 4006 S_008F1C_DST_SEL_Z(V_008F1C_SQ_SEL_X) | 4007 S_008F1C_DST_SEL_W(V_008F1C_SQ_SEL_X) | 4008 S_008F1C_TYPE(si_tex_dim(screen, tex, target, 0)); 4009 fmask_state[4] = 0; 4010 fmask_state[5] = S_008F24_BASE_ARRAY(first_layer); 4011 fmask_state[6] = 0; 4012 fmask_state[7] = 0; 4013 4014 if (screen->info.chip_class >= GFX9) { 4015 fmask_state[3] |= S_008F1C_SW_MODE(tex->surface.u.gfx9.fmask.swizzle_mode); 4016 fmask_state[4] |= S_008F20_DEPTH(last_layer) | 4017 S_008F20_PITCH_GFX9(tex->surface.u.gfx9.fmask.epitch); 4018 fmask_state[5] |= S_008F24_META_PIPE_ALIGNED(tex->surface.u.gfx9.cmask.pipe_aligned) | 4019 S_008F24_META_RB_ALIGNED(tex->surface.u.gfx9.cmask.rb_aligned); 4020 } else { 4021 fmask_state[3] |= S_008F1C_TILING_INDEX(tex->surface.u.legacy.fmask.tiling_index); 4022 fmask_state[4] |= S_008F20_DEPTH(depth - 1) | 4023 S_008F20_PITCH_GFX6(tex->surface.u.legacy.fmask.pitch_in_pixels - 1); 4024 fmask_state[5] |= S_008F24_LAST_ARRAY(last_layer); 4025 } 4026 } 4027} 4028 4029/** 4030 * Create a sampler view. 4031 * 4032 * @param ctx context 4033 * @param texture texture 4034 * @param state sampler view template 4035 * @param width0 width0 override (for compressed textures as int) 4036 * @param height0 height0 override (for compressed textures as int) 4037 * @param force_level set the base address to the level (for compressed textures) 4038 */ 4039struct pipe_sampler_view * 4040si_create_sampler_view_custom(struct pipe_context *ctx, 4041 struct pipe_resource *texture, 4042 const struct pipe_sampler_view *state, 4043 unsigned width0, unsigned height0, 4044 unsigned force_level) 4045{ 4046 struct si_context *sctx = (struct si_context*)ctx; 4047 struct si_sampler_view *view = CALLOC_STRUCT(si_sampler_view); 4048 struct si_texture *tex = (struct si_texture*)texture; 4049 unsigned base_level, first_level, last_level; 4050 unsigned char state_swizzle[4]; 4051 unsigned height, depth, width; 4052 unsigned last_layer = state->u.tex.last_layer; 4053 enum pipe_format pipe_format; 4054 const struct legacy_surf_level *surflevel; 4055 4056 if (!view) 4057 return NULL; 4058 4059 /* initialize base object */ 4060 view->base = *state; 4061 view->base.texture = NULL; 4062 view->base.reference.count = 1; 4063 view->base.context = ctx; 4064 4065 assert(texture); 4066 pipe_resource_reference(&view->base.texture, texture); 4067 4068 if (state->format == PIPE_FORMAT_X24S8_UINT || 4069 state->format == PIPE_FORMAT_S8X24_UINT || 4070 state->format == PIPE_FORMAT_X32_S8X24_UINT || 4071 state->format == PIPE_FORMAT_S8_UINT) 4072 view->is_stencil_sampler = true; 4073 4074 /* Buffer resource. */ 4075 if (texture->target == PIPE_BUFFER) { 4076 si_make_buffer_descriptor(sctx->screen, 4077 si_resource(texture), 4078 state->format, 4079 state->u.buf.offset, 4080 state->u.buf.size, 4081 view->state); 4082 return &view->base; 4083 } 4084 4085 state_swizzle[0] = state->swizzle_r; 4086 state_swizzle[1] = state->swizzle_g; 4087 state_swizzle[2] = state->swizzle_b; 4088 state_swizzle[3] = state->swizzle_a; 4089 4090 base_level = 0; 4091 first_level = state->u.tex.first_level; 4092 last_level = state->u.tex.last_level; 4093 width = width0; 4094 height = height0; 4095 depth = texture->depth0; 4096 4097 if (sctx->chip_class <= VI && force_level) { 4098 assert(force_level == first_level && 4099 force_level == last_level); 4100 base_level = force_level; 4101 first_level = 0; 4102 last_level = 0; 4103 width = u_minify(width, force_level); 4104 height = u_minify(height, force_level); 4105 depth = u_minify(depth, force_level); 4106 } 4107 4108 /* This is not needed if state trackers set last_layer correctly. */ 4109 if (state->target == PIPE_TEXTURE_1D || 4110 state->target == PIPE_TEXTURE_2D || 4111 state->target == PIPE_TEXTURE_RECT || 4112 state->target == PIPE_TEXTURE_CUBE) 4113 last_layer = state->u.tex.first_layer; 4114 4115 /* Texturing with separate depth and stencil. */ 4116 pipe_format = state->format; 4117 4118 /* Depth/stencil texturing sometimes needs separate texture. */ 4119 if (tex->is_depth && !si_can_sample_zs(tex, view->is_stencil_sampler)) { 4120 if (!tex->flushed_depth_texture && 4121 !si_init_flushed_depth_texture(ctx, texture, NULL)) { 4122 pipe_resource_reference(&view->base.texture, NULL); 4123 FREE(view); 4124 return NULL; 4125 } 4126 4127 assert(tex->flushed_depth_texture); 4128 4129 /* Override format for the case where the flushed texture 4130 * contains only Z or only S. 4131 */ 4132 if (tex->flushed_depth_texture->buffer.b.b.format != tex->buffer.b.b.format) 4133 pipe_format = tex->flushed_depth_texture->buffer.b.b.format; 4134 4135 tex = tex->flushed_depth_texture; 4136 } 4137 4138 surflevel = tex->surface.u.legacy.level; 4139 4140 if (tex->db_compatible) { 4141 if (!view->is_stencil_sampler) 4142 pipe_format = tex->db_render_format; 4143 4144 switch (pipe_format) { 4145 case PIPE_FORMAT_Z32_FLOAT_S8X24_UINT: 4146 pipe_format = PIPE_FORMAT_Z32_FLOAT; 4147 break; 4148 case PIPE_FORMAT_X8Z24_UNORM: 4149 case PIPE_FORMAT_S8_UINT_Z24_UNORM: 4150 /* Z24 is always stored like this for DB 4151 * compatibility. 4152 */ 4153 pipe_format = PIPE_FORMAT_Z24X8_UNORM; 4154 break; 4155 case PIPE_FORMAT_X24S8_UINT: 4156 case PIPE_FORMAT_S8X24_UINT: 4157 case PIPE_FORMAT_X32_S8X24_UINT: 4158 pipe_format = PIPE_FORMAT_S8_UINT; 4159 surflevel = tex->surface.u.legacy.stencil_level; 4160 break; 4161 default:; 4162 } 4163 } 4164 4165 view->dcc_incompatible = 4166 vi_dcc_formats_are_incompatible(texture, 4167 state->u.tex.first_level, 4168 state->format); 4169 4170 si_make_texture_descriptor(sctx->screen, tex, true, 4171 state->target, pipe_format, state_swizzle, 4172 first_level, last_level, 4173 state->u.tex.first_layer, last_layer, 4174 width, height, depth, 4175 view->state, view->fmask_state); 4176 4177 unsigned num_format = G_008F14_NUM_FORMAT_GFX6(view->state[1]); 4178 view->is_integer = 4179 num_format == V_008F14_IMG_NUM_FORMAT_USCALED || 4180 num_format == V_008F14_IMG_NUM_FORMAT_SSCALED || 4181 num_format == V_008F14_IMG_NUM_FORMAT_UINT || 4182 num_format == V_008F14_IMG_NUM_FORMAT_SINT; 4183 view->base_level_info = &surflevel[base_level]; 4184 view->base_level = base_level; 4185 view->block_width = util_format_get_blockwidth(pipe_format); 4186 return &view->base; 4187} 4188 4189static struct pipe_sampler_view * 4190si_create_sampler_view(struct pipe_context *ctx, 4191 struct pipe_resource *texture, 4192 const struct pipe_sampler_view *state) 4193{ 4194 return si_create_sampler_view_custom(ctx, texture, state, 4195 texture ? texture->width0 : 0, 4196 texture ? texture->height0 : 0, 0); 4197} 4198 4199static void si_sampler_view_destroy(struct pipe_context *ctx, 4200 struct pipe_sampler_view *state) 4201{ 4202 struct si_sampler_view *view = (struct si_sampler_view *)state; 4203 4204 pipe_resource_reference(&state->texture, NULL); 4205 FREE(view); 4206} 4207 4208static bool wrap_mode_uses_border_color(unsigned wrap, bool linear_filter) 4209{ 4210 return wrap == PIPE_TEX_WRAP_CLAMP_TO_BORDER || 4211 wrap == PIPE_TEX_WRAP_MIRROR_CLAMP_TO_BORDER || 4212 (linear_filter && 4213 (wrap == PIPE_TEX_WRAP_CLAMP || 4214 wrap == PIPE_TEX_WRAP_MIRROR_CLAMP)); 4215} 4216 4217static uint32_t si_translate_border_color(struct si_context *sctx, 4218 const struct pipe_sampler_state *state, 4219 const union pipe_color_union *color, 4220 bool is_integer) 4221{ 4222 bool linear_filter = state->min_img_filter != PIPE_TEX_FILTER_NEAREST || 4223 state->mag_img_filter != PIPE_TEX_FILTER_NEAREST; 4224 4225 if (!wrap_mode_uses_border_color(state->wrap_s, linear_filter) && 4226 !wrap_mode_uses_border_color(state->wrap_t, linear_filter) && 4227 !wrap_mode_uses_border_color(state->wrap_r, linear_filter)) 4228 return S_008F3C_BORDER_COLOR_TYPE(V_008F3C_SQ_TEX_BORDER_COLOR_TRANS_BLACK); 4229 4230#define simple_border_types(elt) \ 4231do { \ 4232 if (color->elt[0] == 0 && color->elt[1] == 0 && \ 4233 color->elt[2] == 0 && color->elt[3] == 0) \ 4234 return S_008F3C_BORDER_COLOR_TYPE(V_008F3C_SQ_TEX_BORDER_COLOR_TRANS_BLACK); \ 4235 if (color->elt[0] == 0 && color->elt[1] == 0 && \ 4236 color->elt[2] == 0 && color->elt[3] == 1) \ 4237 return S_008F3C_BORDER_COLOR_TYPE(V_008F3C_SQ_TEX_BORDER_COLOR_OPAQUE_BLACK); \ 4238 if (color->elt[0] == 1 && color->elt[1] == 1 && \ 4239 color->elt[2] == 1 && color->elt[3] == 1) \ 4240 return S_008F3C_BORDER_COLOR_TYPE(V_008F3C_SQ_TEX_BORDER_COLOR_OPAQUE_WHITE); \ 4241} while (false) 4242 4243 if (is_integer) 4244 simple_border_types(ui); 4245 else 4246 simple_border_types(f); 4247 4248#undef simple_border_types 4249 4250 int i; 4251 4252 /* Check if the border has been uploaded already. */ 4253 for (i = 0; i < sctx->border_color_count; i++) 4254 if (memcmp(&sctx->border_color_table[i], color, 4255 sizeof(*color)) == 0) 4256 break; 4257 4258 if (i >= SI_MAX_BORDER_COLORS) { 4259 /* Getting 4096 unique border colors is very unlikely. */ 4260 fprintf(stderr, "radeonsi: The border color table is full. " 4261 "Any new border colors will be just black. " 4262 "Please file a bug.\n"); 4263 return S_008F3C_BORDER_COLOR_TYPE(V_008F3C_SQ_TEX_BORDER_COLOR_TRANS_BLACK); 4264 } 4265 4266 if (i == sctx->border_color_count) { 4267 /* Upload a new border color. */ 4268 memcpy(&sctx->border_color_table[i], color, 4269 sizeof(*color)); 4270 util_memcpy_cpu_to_le32(&sctx->border_color_map[i], 4271 color, sizeof(*color)); 4272 sctx->border_color_count++; 4273 } 4274 4275 return S_008F3C_BORDER_COLOR_PTR(i) | 4276 S_008F3C_BORDER_COLOR_TYPE(V_008F3C_SQ_TEX_BORDER_COLOR_REGISTER); 4277} 4278 4279static inline int S_FIXED(float value, unsigned frac_bits) 4280{ 4281 return value * (1 << frac_bits); 4282} 4283 4284static inline unsigned si_tex_filter(unsigned filter, unsigned max_aniso) 4285{ 4286 if (filter == PIPE_TEX_FILTER_LINEAR) 4287 return max_aniso > 1 ? V_008F38_SQ_TEX_XY_FILTER_ANISO_BILINEAR 4288 : V_008F38_SQ_TEX_XY_FILTER_BILINEAR; 4289 else 4290 return max_aniso > 1 ? V_008F38_SQ_TEX_XY_FILTER_ANISO_POINT 4291 : V_008F38_SQ_TEX_XY_FILTER_POINT; 4292} 4293 4294static inline unsigned si_tex_aniso_filter(unsigned filter) 4295{ 4296 if (filter < 2) 4297 return 0; 4298 if (filter < 4) 4299 return 1; 4300 if (filter < 8) 4301 return 2; 4302 if (filter < 16) 4303 return 3; 4304 return 4; 4305} 4306 4307static void *si_create_sampler_state(struct pipe_context *ctx, 4308 const struct pipe_sampler_state *state) 4309{ 4310 struct si_context *sctx = (struct si_context *)ctx; 4311 struct si_screen *sscreen = sctx->screen; 4312 struct si_sampler_state *rstate = CALLOC_STRUCT(si_sampler_state); 4313 unsigned max_aniso = sscreen->force_aniso >= 0 ? sscreen->force_aniso 4314 : state->max_anisotropy; 4315 unsigned max_aniso_ratio = si_tex_aniso_filter(max_aniso); 4316 union pipe_color_union clamped_border_color; 4317 4318 if (!rstate) { 4319 return NULL; 4320 } 4321 4322#ifdef DEBUG 4323 rstate->magic = SI_SAMPLER_STATE_MAGIC; 4324#endif 4325 rstate->val[0] = (S_008F30_CLAMP_X(si_tex_wrap(state->wrap_s)) | 4326 S_008F30_CLAMP_Y(si_tex_wrap(state->wrap_t)) | 4327 S_008F30_CLAMP_Z(si_tex_wrap(state->wrap_r)) | 4328 S_008F30_MAX_ANISO_RATIO(max_aniso_ratio) | 4329 S_008F30_DEPTH_COMPARE_FUNC(si_tex_compare(state->compare_func)) | 4330 S_008F30_FORCE_UNNORMALIZED(!state->normalized_coords) | 4331 S_008F30_ANISO_THRESHOLD(max_aniso_ratio >> 1) | 4332 S_008F30_ANISO_BIAS(max_aniso_ratio) | 4333 S_008F30_DISABLE_CUBE_WRAP(!state->seamless_cube_map) | 4334 S_008F30_COMPAT_MODE(sctx->chip_class >= VI)); 4335 rstate->val[1] = (S_008F34_MIN_LOD(S_FIXED(CLAMP(state->min_lod, 0, 15), 8)) | 4336 S_008F34_MAX_LOD(S_FIXED(CLAMP(state->max_lod, 0, 15), 8)) | 4337 S_008F34_PERF_MIP(max_aniso_ratio ? max_aniso_ratio + 6 : 0)); 4338 rstate->val[2] = (S_008F38_LOD_BIAS(S_FIXED(CLAMP(state->lod_bias, -16, 16), 8)) | 4339 S_008F38_XY_MAG_FILTER(si_tex_filter(state->mag_img_filter, max_aniso)) | 4340 S_008F38_XY_MIN_FILTER(si_tex_filter(state->min_img_filter, max_aniso)) | 4341 S_008F38_MIP_FILTER(si_tex_mipfilter(state->min_mip_filter)) | 4342 S_008F38_MIP_POINT_PRECLAMP(0) | 4343 S_008F38_DISABLE_LSB_CEIL(sctx->chip_class <= VI) | 4344 S_008F38_FILTER_PREC_FIX(1) | 4345 S_008F38_ANISO_OVERRIDE(sctx->chip_class >= VI)); 4346 rstate->val[3] = si_translate_border_color(sctx, state, &state->border_color, false); 4347 4348 /* Create sampler resource for integer textures. */ 4349 memcpy(rstate->integer_val, rstate->val, sizeof(rstate->val)); 4350 rstate->integer_val[3] = si_translate_border_color(sctx, state, &state->border_color, true); 4351 4352 /* Create sampler resource for upgraded depth textures. */ 4353 memcpy(rstate->upgraded_depth_val, rstate->val, sizeof(rstate->val)); 4354 4355 for (unsigned i = 0; i < 4; ++i) { 4356 /* Use channel 0 on purpose, so that we can use OPAQUE_WHITE 4357 * when the border color is 1.0. */ 4358 clamped_border_color.f[i] = CLAMP(state->border_color.f[0], 0, 1); 4359 } 4360 4361 if (memcmp(&state->border_color, &clamped_border_color, sizeof(clamped_border_color)) == 0) 4362 rstate->upgraded_depth_val[3] |= S_008F3C_UPGRADED_DEPTH(1); 4363 else 4364 rstate->upgraded_depth_val[3] = 4365 si_translate_border_color(sctx, state, &clamped_border_color, false) | 4366 S_008F3C_UPGRADED_DEPTH(1); 4367 4368 return rstate; 4369} 4370 4371static void si_set_sample_mask(struct pipe_context *ctx, unsigned sample_mask) 4372{ 4373 struct si_context *sctx = (struct si_context *)ctx; 4374 4375 if (sctx->sample_mask == (uint16_t)sample_mask) 4376 return; 4377 4378 sctx->sample_mask = sample_mask; 4379 si_mark_atom_dirty(sctx, &sctx->atoms.s.sample_mask); 4380} 4381 4382static void si_emit_sample_mask(struct si_context *sctx) 4383{ 4384 struct radeon_cmdbuf *cs = sctx->gfx_cs; 4385 unsigned mask = sctx->sample_mask; 4386 4387 /* Needed for line and polygon smoothing as well as for the Polaris 4388 * small primitive filter. We expect the state tracker to take care of 4389 * this for us. 4390 */ 4391 assert(mask == 0xffff || sctx->framebuffer.nr_samples > 1 || 4392 (mask & 1 && sctx->blitter->running)); 4393 4394 radeon_set_context_reg_seq(cs, R_028C38_PA_SC_AA_MASK_X0Y0_X1Y0, 2); 4395 radeon_emit(cs, mask | (mask << 16)); 4396 radeon_emit(cs, mask | (mask << 16)); 4397} 4398 4399static void si_delete_sampler_state(struct pipe_context *ctx, void *state) 4400{ 4401#ifdef DEBUG 4402 struct si_sampler_state *s = state; 4403 4404 assert(s->magic == SI_SAMPLER_STATE_MAGIC); 4405 s->magic = 0; 4406#endif 4407 free(state); 4408} 4409 4410/* 4411 * Vertex elements & buffers 4412 */ 4413 4414struct util_fast_udiv_info32 { 4415 unsigned multiplier; /* the "magic number" multiplier */ 4416 unsigned pre_shift; /* shift for the dividend before multiplying */ 4417 unsigned post_shift; /* shift for the dividend after multiplying */ 4418 int increment; /* 0 or 1; if set then increment the numerator, using one of 4419 the two strategies */ 4420}; 4421 4422static struct util_fast_udiv_info32 4423util_compute_fast_udiv_info32(uint32_t D, unsigned num_bits) 4424{ 4425 struct util_fast_udiv_info info = 4426 util_compute_fast_udiv_info(D, num_bits, 32); 4427 4428 struct util_fast_udiv_info32 result = { 4429 info.multiplier, 4430 info.pre_shift, 4431 info.post_shift, 4432 info.increment, 4433 }; 4434 return result; 4435} 4436 4437static void *si_create_vertex_elements(struct pipe_context *ctx, 4438 unsigned count, 4439 const struct pipe_vertex_element *elements) 4440{ 4441 struct si_screen *sscreen = (struct si_screen*)ctx->screen; 4442 struct si_vertex_elements *v = CALLOC_STRUCT(si_vertex_elements); 4443 bool used[SI_NUM_VERTEX_BUFFERS] = {}; 4444 struct util_fast_udiv_info32 divisor_factors[SI_MAX_ATTRIBS] = {}; 4445 STATIC_ASSERT(sizeof(struct util_fast_udiv_info32) == 16); 4446 STATIC_ASSERT(sizeof(divisor_factors[0].multiplier) == 4); 4447 STATIC_ASSERT(sizeof(divisor_factors[0].pre_shift) == 4); 4448 STATIC_ASSERT(sizeof(divisor_factors[0].post_shift) == 4); 4449 STATIC_ASSERT(sizeof(divisor_factors[0].increment) == 4); 4450 int i; 4451 4452 assert(count <= SI_MAX_ATTRIBS); 4453 if (!v) 4454 return NULL; 4455 4456 v->count = count; 4457 v->desc_list_byte_size = align(count * 16, SI_CPDMA_ALIGNMENT); 4458 4459 for (i = 0; i < count; ++i) { 4460 const struct util_format_description *desc; 4461 const struct util_format_channel_description *channel; 4462 unsigned data_format, num_format; 4463 int first_non_void; 4464 unsigned vbo_index = elements[i].vertex_buffer_index; 4465 unsigned char swizzle[4]; 4466 4467 if (vbo_index >= SI_NUM_VERTEX_BUFFERS) { 4468 FREE(v); 4469 return NULL; 4470 } 4471 4472 unsigned instance_divisor = elements[i].instance_divisor; 4473 if (instance_divisor) { 4474 v->uses_instance_divisors = true; 4475 4476 if (instance_divisor == 1) { 4477 v->instance_divisor_is_one |= 1u << i; 4478 } else { 4479 v->instance_divisor_is_fetched |= 1u << i; 4480 divisor_factors[i] = 4481 util_compute_fast_udiv_info32(instance_divisor, 32); 4482 } 4483 } 4484 4485 if (!used[vbo_index]) { 4486 v->first_vb_use_mask |= 1 << i; 4487 used[vbo_index] = true; 4488 } 4489 4490 desc = util_format_description(elements[i].src_format); 4491 first_non_void = util_format_get_first_non_void_channel(elements[i].src_format); 4492 data_format = si_translate_buffer_dataformat(ctx->screen, desc, first_non_void); 4493 num_format = si_translate_buffer_numformat(ctx->screen, desc, first_non_void); 4494 channel = first_non_void >= 0 ? &desc->channel[first_non_void] : NULL; 4495 memcpy(swizzle, desc->swizzle, sizeof(swizzle)); 4496 4497 v->format_size[i] = desc->block.bits / 8; 4498 v->src_offset[i] = elements[i].src_offset; 4499 v->vertex_buffer_index[i] = vbo_index; 4500 4501 /* The hardware always treats the 2-bit alpha channel as 4502 * unsigned, so a shader workaround is needed. The affected 4503 * chips are VI and older except Stoney (GFX8.1). 4504 */ 4505 if (data_format == V_008F0C_BUF_DATA_FORMAT_2_10_10_10 && 4506 sscreen->info.chip_class <= VI && 4507 sscreen->info.family != CHIP_STONEY) { 4508 if (num_format == V_008F0C_BUF_NUM_FORMAT_SNORM) { 4509 v->fix_fetch[i] = SI_FIX_FETCH_A2_SNORM; 4510 } else if (num_format == V_008F0C_BUF_NUM_FORMAT_SSCALED) { 4511 v->fix_fetch[i] = SI_FIX_FETCH_A2_SSCALED; 4512 } else if (num_format == V_008F0C_BUF_NUM_FORMAT_SINT) { 4513 /* This isn't actually used in OpenGL. */ 4514 v->fix_fetch[i] = SI_FIX_FETCH_A2_SINT; 4515 } 4516 } else if (channel && channel->type == UTIL_FORMAT_TYPE_FIXED) { 4517 if (desc->swizzle[3] == PIPE_SWIZZLE_1) 4518 v->fix_fetch[i] = SI_FIX_FETCH_RGBX_32_FIXED; 4519 else 4520 v->fix_fetch[i] = SI_FIX_FETCH_RGBA_32_FIXED; 4521 } else if (channel && channel->size == 32 && !channel->pure_integer) { 4522 if (channel->type == UTIL_FORMAT_TYPE_SIGNED) { 4523 if (channel->normalized) { 4524 if (desc->swizzle[3] == PIPE_SWIZZLE_1) 4525 v->fix_fetch[i] = SI_FIX_FETCH_RGBX_32_SNORM; 4526 else 4527 v->fix_fetch[i] = SI_FIX_FETCH_RGBA_32_SNORM; 4528 } else { 4529 v->fix_fetch[i] = SI_FIX_FETCH_RGBA_32_SSCALED; 4530 } 4531 } else if (channel->type == UTIL_FORMAT_TYPE_UNSIGNED) { 4532 if (channel->normalized) { 4533 if (desc->swizzle[3] == PIPE_SWIZZLE_1) 4534 v->fix_fetch[i] = SI_FIX_FETCH_RGBX_32_UNORM; 4535 else 4536 v->fix_fetch[i] = SI_FIX_FETCH_RGBA_32_UNORM; 4537 } else { 4538 v->fix_fetch[i] = SI_FIX_FETCH_RGBA_32_USCALED; 4539 } 4540 } 4541 } else if (channel && channel->size == 64 && 4542 channel->type == UTIL_FORMAT_TYPE_FLOAT) { 4543 switch (desc->nr_channels) { 4544 case 1: 4545 case 2: 4546 v->fix_fetch[i] = SI_FIX_FETCH_RG_64_FLOAT; 4547 swizzle[0] = PIPE_SWIZZLE_X; 4548 swizzle[1] = PIPE_SWIZZLE_Y; 4549 swizzle[2] = desc->nr_channels == 2 ? PIPE_SWIZZLE_Z : PIPE_SWIZZLE_0; 4550 swizzle[3] = desc->nr_channels == 2 ? PIPE_SWIZZLE_W : PIPE_SWIZZLE_0; 4551 break; 4552 case 3: 4553 v->fix_fetch[i] = SI_FIX_FETCH_RGB_64_FLOAT; 4554 swizzle[0] = PIPE_SWIZZLE_X; /* 3 loads */ 4555 swizzle[1] = PIPE_SWIZZLE_Y; 4556 swizzle[2] = PIPE_SWIZZLE_0; 4557 swizzle[3] = PIPE_SWIZZLE_0; 4558 break; 4559 case 4: 4560 v->fix_fetch[i] = SI_FIX_FETCH_RGBA_64_FLOAT; 4561 swizzle[0] = PIPE_SWIZZLE_X; /* 2 loads */ 4562 swizzle[1] = PIPE_SWIZZLE_Y; 4563 swizzle[2] = PIPE_SWIZZLE_Z; 4564 swizzle[3] = PIPE_SWIZZLE_W; 4565 break; 4566 default: 4567 assert(0); 4568 } 4569 } else if (channel && desc->nr_channels == 3) { 4570 assert(desc->swizzle[0] == PIPE_SWIZZLE_X); 4571 4572 if (channel->size == 8) { 4573 if (channel->pure_integer) 4574 v->fix_fetch[i] = SI_FIX_FETCH_RGB_8_INT; 4575 else 4576 v->fix_fetch[i] = SI_FIX_FETCH_RGB_8; 4577 } else if (channel->size == 16) { 4578 if (channel->pure_integer) 4579 v->fix_fetch[i] = SI_FIX_FETCH_RGB_16_INT; 4580 else 4581 v->fix_fetch[i] = SI_FIX_FETCH_RGB_16; 4582 } 4583 } 4584 4585 v->rsrc_word3[i] = S_008F0C_DST_SEL_X(si_map_swizzle(swizzle[0])) | 4586 S_008F0C_DST_SEL_Y(si_map_swizzle(swizzle[1])) | 4587 S_008F0C_DST_SEL_Z(si_map_swizzle(swizzle[2])) | 4588 S_008F0C_DST_SEL_W(si_map_swizzle(swizzle[3])) | 4589 S_008F0C_NUM_FORMAT(num_format) | 4590 S_008F0C_DATA_FORMAT(data_format); 4591 } 4592 4593 if (v->instance_divisor_is_fetched) { 4594 unsigned num_divisors = util_last_bit(v->instance_divisor_is_fetched); 4595 4596 v->instance_divisor_factor_buffer = 4597 (struct si_resource*) 4598 pipe_buffer_create(&sscreen->b, 0, PIPE_USAGE_DEFAULT, 4599 num_divisors * sizeof(divisor_factors[0])); 4600 if (!v->instance_divisor_factor_buffer) { 4601 FREE(v); 4602 return NULL; 4603 } 4604 void *map = sscreen->ws->buffer_map(v->instance_divisor_factor_buffer->buf, 4605 NULL, PIPE_TRANSFER_WRITE); 4606 memcpy(map , divisor_factors, num_divisors * sizeof(divisor_factors[0])); 4607 } 4608 return v; 4609} 4610 4611static void si_bind_vertex_elements(struct pipe_context *ctx, void *state) 4612{ 4613 struct si_context *sctx = (struct si_context *)ctx; 4614 struct si_vertex_elements *old = sctx->vertex_elements; 4615 struct si_vertex_elements *v = (struct si_vertex_elements*)state; 4616 4617 sctx->vertex_elements = v; 4618 sctx->vertex_buffers_dirty = true; 4619 4620 if (v && 4621 (!old || 4622 old->count != v->count || 4623 old->uses_instance_divisors != v->uses_instance_divisors || 4624 v->uses_instance_divisors || /* we don't check which divisors changed */ 4625 memcmp(old->fix_fetch, v->fix_fetch, sizeof(v->fix_fetch[0]) * v->count))) 4626 sctx->do_update_shaders = true; 4627 4628 if (v && v->instance_divisor_is_fetched) { 4629 struct pipe_constant_buffer cb; 4630 4631 cb.buffer = &v->instance_divisor_factor_buffer->b.b; 4632 cb.user_buffer = NULL; 4633 cb.buffer_offset = 0; 4634 cb.buffer_size = 0xffffffff; 4635 si_set_rw_buffer(sctx, SI_VS_CONST_INSTANCE_DIVISORS, &cb); 4636 } 4637} 4638 4639static void si_delete_vertex_element(struct pipe_context *ctx, void *state) 4640{ 4641 struct si_context *sctx = (struct si_context *)ctx; 4642 struct si_vertex_elements *v = (struct si_vertex_elements*)state; 4643 4644 if (sctx->vertex_elements == state) 4645 sctx->vertex_elements = NULL; 4646 si_resource_reference(&v->instance_divisor_factor_buffer, NULL); 4647 FREE(state); 4648} 4649 4650static void si_set_vertex_buffers(struct pipe_context *ctx, 4651 unsigned start_slot, unsigned count, 4652 const struct pipe_vertex_buffer *buffers) 4653{ 4654 struct si_context *sctx = (struct si_context *)ctx; 4655 struct pipe_vertex_buffer *dst = sctx->vertex_buffer + start_slot; 4656 int i; 4657 4658 assert(start_slot + count <= ARRAY_SIZE(sctx->vertex_buffer)); 4659 4660 if (buffers) { 4661 for (i = 0; i < count; i++) { 4662 const struct pipe_vertex_buffer *src = buffers + i; 4663 struct pipe_vertex_buffer *dsti = dst + i; 4664 struct pipe_resource *buf = src->buffer.resource; 4665 4666 pipe_resource_reference(&dsti->buffer.resource, buf); 4667 dsti->buffer_offset = src->buffer_offset; 4668 dsti->stride = src->stride; 4669 si_context_add_resource_size(sctx, buf); 4670 if (buf) 4671 si_resource(buf)->bind_history |= PIPE_BIND_VERTEX_BUFFER; 4672 } 4673 } else { 4674 for (i = 0; i < count; i++) { 4675 pipe_resource_reference(&dst[i].buffer.resource, NULL); 4676 } 4677 } 4678 sctx->vertex_buffers_dirty = true; 4679} 4680 4681/* 4682 * Misc 4683 */ 4684 4685static void si_set_tess_state(struct pipe_context *ctx, 4686 const float default_outer_level[4], 4687 const float default_inner_level[2]) 4688{ 4689 struct si_context *sctx = (struct si_context *)ctx; 4690 struct pipe_constant_buffer cb; 4691 float array[8]; 4692 4693 memcpy(array, default_outer_level, sizeof(float) * 4); 4694 memcpy(array+4, default_inner_level, sizeof(float) * 2); 4695 4696 cb.buffer = NULL; 4697 cb.user_buffer = NULL; 4698 cb.buffer_size = sizeof(array); 4699 4700 si_upload_const_buffer(sctx, (struct si_resource**)&cb.buffer, 4701 (void*)array, sizeof(array), 4702 &cb.buffer_offset); 4703 4704 si_set_rw_buffer(sctx, SI_HS_CONST_DEFAULT_TESS_LEVELS, &cb); 4705 pipe_resource_reference(&cb.buffer, NULL); 4706} 4707 4708static void si_texture_barrier(struct pipe_context *ctx, unsigned flags) 4709{ 4710 struct si_context *sctx = (struct si_context *)ctx; 4711 4712 si_update_fb_dirtiness_after_rendering(sctx); 4713 4714 /* Multisample surfaces are flushed in si_decompress_textures. */ 4715 if (sctx->framebuffer.uncompressed_cb_mask) { 4716 si_make_CB_shader_coherent(sctx, sctx->framebuffer.nr_samples, 4717 sctx->framebuffer.CB_has_shader_readable_metadata, 4718 sctx->framebuffer.all_DCC_pipe_aligned); 4719 } 4720} 4721 4722/* This only ensures coherency for shader image/buffer stores. */ 4723static void si_memory_barrier(struct pipe_context *ctx, unsigned flags) 4724{ 4725 struct si_context *sctx = (struct si_context *)ctx; 4726 4727 if (!(flags & ~PIPE_BARRIER_UPDATE)) 4728 return; 4729 4730 /* Subsequent commands must wait for all shader invocations to 4731 * complete. */ 4732 sctx->flags |= SI_CONTEXT_PS_PARTIAL_FLUSH | 4733 SI_CONTEXT_CS_PARTIAL_FLUSH; 4734 4735 if (flags & PIPE_BARRIER_CONSTANT_BUFFER) 4736 sctx->flags |= SI_CONTEXT_INV_SMEM_L1 | 4737 SI_CONTEXT_INV_VMEM_L1; 4738 4739 if (flags & (PIPE_BARRIER_VERTEX_BUFFER | 4740 PIPE_BARRIER_SHADER_BUFFER | 4741 PIPE_BARRIER_TEXTURE | 4742 PIPE_BARRIER_IMAGE | 4743 PIPE_BARRIER_STREAMOUT_BUFFER | 4744 PIPE_BARRIER_GLOBAL_BUFFER)) { 4745 /* As far as I can tell, L1 contents are written back to L2 4746 * automatically at end of shader, but the contents of other 4747 * L1 caches might still be stale. */ 4748 sctx->flags |= SI_CONTEXT_INV_VMEM_L1; 4749 } 4750 4751 if (flags & PIPE_BARRIER_INDEX_BUFFER) { 4752 /* Indices are read through TC L2 since VI. 4753 * L1 isn't used. 4754 */ 4755 if (sctx->screen->info.chip_class <= CIK) 4756 sctx->flags |= SI_CONTEXT_WRITEBACK_GLOBAL_L2; 4757 } 4758 4759 /* MSAA color, any depth and any stencil are flushed in 4760 * si_decompress_textures when needed. 4761 */ 4762 if (flags & PIPE_BARRIER_FRAMEBUFFER && 4763 sctx->framebuffer.uncompressed_cb_mask) { 4764 sctx->flags |= SI_CONTEXT_FLUSH_AND_INV_CB; 4765 4766 if (sctx->chip_class <= VI) 4767 sctx->flags |= SI_CONTEXT_WRITEBACK_GLOBAL_L2; 4768 } 4769 4770 /* Indirect buffers use TC L2 on GFX9, but not older hw. */ 4771 if (sctx->screen->info.chip_class <= VI && 4772 flags & PIPE_BARRIER_INDIRECT_BUFFER) 4773 sctx->flags |= SI_CONTEXT_WRITEBACK_GLOBAL_L2; 4774} 4775 4776static void *si_create_blend_custom(struct si_context *sctx, unsigned mode) 4777{ 4778 struct pipe_blend_state blend; 4779 4780 memset(&blend, 0, sizeof(blend)); 4781 blend.independent_blend_enable = true; 4782 blend.rt[0].colormask = 0xf; 4783 return si_create_blend_state_mode(&sctx->b, &blend, mode); 4784} 4785 4786static void si_init_config(struct si_context *sctx); 4787 4788void si_init_state_compute_functions(struct si_context *sctx) 4789{ 4790 sctx->b.create_sampler_state = si_create_sampler_state; 4791 sctx->b.delete_sampler_state = si_delete_sampler_state; 4792 sctx->b.create_sampler_view = si_create_sampler_view; 4793 sctx->b.sampler_view_destroy = si_sampler_view_destroy; 4794 sctx->b.memory_barrier = si_memory_barrier; 4795} 4796 4797void si_init_state_functions(struct si_context *sctx) 4798{ 4799 sctx->atoms.s.framebuffer.emit = si_emit_framebuffer_state; 4800 sctx->atoms.s.msaa_sample_locs.emit = si_emit_msaa_sample_locs; 4801 sctx->atoms.s.db_render_state.emit = si_emit_db_render_state; 4802 sctx->atoms.s.dpbb_state.emit = si_emit_dpbb_state; 4803 sctx->atoms.s.msaa_config.emit = si_emit_msaa_config; 4804 sctx->atoms.s.sample_mask.emit = si_emit_sample_mask; 4805 sctx->atoms.s.cb_render_state.emit = si_emit_cb_render_state; 4806 sctx->atoms.s.blend_color.emit = si_emit_blend_color; 4807 sctx->atoms.s.clip_regs.emit = si_emit_clip_regs; 4808 sctx->atoms.s.clip_state.emit = si_emit_clip_state; 4809 sctx->atoms.s.stencil_ref.emit = si_emit_stencil_ref; 4810 4811 sctx->b.create_blend_state = si_create_blend_state; 4812 sctx->b.bind_blend_state = si_bind_blend_state; 4813 sctx->b.delete_blend_state = si_delete_blend_state; 4814 sctx->b.set_blend_color = si_set_blend_color; 4815 4816 sctx->b.create_rasterizer_state = si_create_rs_state; 4817 sctx->b.bind_rasterizer_state = si_bind_rs_state; 4818 sctx->b.delete_rasterizer_state = si_delete_rs_state; 4819 4820 sctx->b.create_depth_stencil_alpha_state = si_create_dsa_state; 4821 sctx->b.bind_depth_stencil_alpha_state = si_bind_dsa_state; 4822 sctx->b.delete_depth_stencil_alpha_state = si_delete_dsa_state; 4823 4824 sctx->custom_dsa_flush = si_create_db_flush_dsa(sctx); 4825 sctx->custom_blend_resolve = si_create_blend_custom(sctx, V_028808_CB_RESOLVE); 4826 sctx->custom_blend_fmask_decompress = si_create_blend_custom(sctx, V_028808_CB_FMASK_DECOMPRESS); 4827 sctx->custom_blend_eliminate_fastclear = si_create_blend_custom(sctx, V_028808_CB_ELIMINATE_FAST_CLEAR); 4828 sctx->custom_blend_dcc_decompress = si_create_blend_custom(sctx, V_028808_CB_DCC_DECOMPRESS); 4829 4830 sctx->b.set_clip_state = si_set_clip_state; 4831 sctx->b.set_stencil_ref = si_set_stencil_ref; 4832 4833 sctx->b.set_framebuffer_state = si_set_framebuffer_state; 4834 4835 sctx->b.set_sample_mask = si_set_sample_mask; 4836 4837 sctx->b.create_vertex_elements_state = si_create_vertex_elements; 4838 sctx->b.bind_vertex_elements_state = si_bind_vertex_elements; 4839 sctx->b.delete_vertex_elements_state = si_delete_vertex_element; 4840 sctx->b.set_vertex_buffers = si_set_vertex_buffers; 4841 4842 sctx->b.texture_barrier = si_texture_barrier; 4843 sctx->b.set_min_samples = si_set_min_samples; 4844 sctx->b.set_tess_state = si_set_tess_state; 4845 4846 sctx->b.set_active_query_state = si_set_active_query_state; 4847 4848 si_init_config(sctx); 4849} 4850 4851void si_init_screen_state_functions(struct si_screen *sscreen) 4852{ 4853 sscreen->b.is_format_supported = si_is_format_supported; 4854} 4855 4856static void si_set_grbm_gfx_index(struct si_context *sctx, 4857 struct si_pm4_state *pm4, unsigned value) 4858{ 4859 unsigned reg = sctx->chip_class >= CIK ? R_030800_GRBM_GFX_INDEX : 4860 R_00802C_GRBM_GFX_INDEX; 4861 si_pm4_set_reg(pm4, reg, value); 4862} 4863 4864static void si_set_grbm_gfx_index_se(struct si_context *sctx, 4865 struct si_pm4_state *pm4, unsigned se) 4866{ 4867 assert(se == ~0 || se < sctx->screen->info.max_se); 4868 si_set_grbm_gfx_index(sctx, pm4, 4869 (se == ~0 ? S_030800_SE_BROADCAST_WRITES(1) : 4870 S_030800_SE_INDEX(se)) | 4871 S_030800_SH_BROADCAST_WRITES(1) | 4872 S_030800_INSTANCE_BROADCAST_WRITES(1)); 4873} 4874 4875static void 4876si_write_harvested_raster_configs(struct si_context *sctx, 4877 struct si_pm4_state *pm4, 4878 unsigned raster_config, 4879 unsigned raster_config_1) 4880{ 4881 unsigned num_se = MAX2(sctx->screen->info.max_se, 1); 4882 unsigned raster_config_se[4]; 4883 unsigned se; 4884 4885 ac_get_harvested_configs(&sctx->screen->info, 4886 raster_config, 4887 &raster_config_1, 4888 raster_config_se); 4889 4890 for (se = 0; se < num_se; se++) { 4891 si_set_grbm_gfx_index_se(sctx, pm4, se); 4892 si_pm4_set_reg(pm4, R_028350_PA_SC_RASTER_CONFIG, raster_config_se[se]); 4893 } 4894 si_set_grbm_gfx_index(sctx, pm4, ~0); 4895 4896 if (sctx->chip_class >= CIK) { 4897 si_pm4_set_reg(pm4, R_028354_PA_SC_RASTER_CONFIG_1, raster_config_1); 4898 } 4899} 4900 4901static void si_set_raster_config(struct si_context *sctx, struct si_pm4_state *pm4) 4902{ 4903 struct si_screen *sscreen = sctx->screen; 4904 unsigned num_rb = MIN2(sscreen->info.num_render_backends, 16); 4905 unsigned rb_mask = sscreen->info.enabled_rb_mask; 4906 unsigned raster_config = sscreen->pa_sc_raster_config; 4907 unsigned raster_config_1 = sscreen->pa_sc_raster_config_1; 4908 4909 if (!rb_mask || util_bitcount(rb_mask) >= num_rb) { 4910 /* Always use the default config when all backends are enabled 4911 * (or when we failed to determine the enabled backends). 4912 */ 4913 si_pm4_set_reg(pm4, R_028350_PA_SC_RASTER_CONFIG, 4914 raster_config); 4915 if (sctx->chip_class >= CIK) 4916 si_pm4_set_reg(pm4, R_028354_PA_SC_RASTER_CONFIG_1, 4917 raster_config_1); 4918 } else { 4919 si_write_harvested_raster_configs(sctx, pm4, raster_config, raster_config_1); 4920 } 4921} 4922 4923static void si_init_config(struct si_context *sctx) 4924{ 4925 struct si_screen *sscreen = sctx->screen; 4926 uint64_t border_color_va = sctx->border_color_buffer->gpu_address; 4927 bool has_clear_state = sscreen->has_clear_state; 4928 struct si_pm4_state *pm4 = CALLOC_STRUCT(si_pm4_state); 4929 4930 /* SI, radeon kernel disabled CLEAR_STATE. */ 4931 assert(has_clear_state || sscreen->info.chip_class == SI || 4932 sscreen->info.drm_major != 3); 4933 4934 if (!pm4) 4935 return; 4936 4937 si_pm4_cmd_begin(pm4, PKT3_CONTEXT_CONTROL); 4938 si_pm4_cmd_add(pm4, CONTEXT_CONTROL_LOAD_ENABLE(1)); 4939 si_pm4_cmd_add(pm4, CONTEXT_CONTROL_SHADOW_ENABLE(1)); 4940 si_pm4_cmd_end(pm4, false); 4941 4942 if (has_clear_state) { 4943 si_pm4_cmd_begin(pm4, PKT3_CLEAR_STATE); 4944 si_pm4_cmd_add(pm4, 0); 4945 si_pm4_cmd_end(pm4, false); 4946 } 4947 4948 if (sctx->chip_class <= VI) 4949 si_set_raster_config(sctx, pm4); 4950 4951 si_pm4_set_reg(pm4, R_028A18_VGT_HOS_MAX_TESS_LEVEL, fui(64)); 4952 if (!has_clear_state) 4953 si_pm4_set_reg(pm4, R_028A1C_VGT_HOS_MIN_TESS_LEVEL, fui(0)); 4954 4955 /* FIXME calculate these values somehow ??? */ 4956 if (sctx->chip_class <= VI) { 4957 si_pm4_set_reg(pm4, R_028A54_VGT_GS_PER_ES, SI_GS_PER_ES); 4958 si_pm4_set_reg(pm4, R_028A58_VGT_ES_PER_GS, 0x40); 4959 } 4960 4961 if (!has_clear_state) { 4962 si_pm4_set_reg(pm4, R_028A5C_VGT_GS_PER_VS, 0x2); 4963 si_pm4_set_reg(pm4, R_028A8C_VGT_PRIMITIVEID_RESET, 0x0); 4964 si_pm4_set_reg(pm4, R_028B98_VGT_STRMOUT_BUFFER_CONFIG, 0x0); 4965 } 4966 4967 si_pm4_set_reg(pm4, R_028AA0_VGT_INSTANCE_STEP_RATE_0, 1); 4968 if (!has_clear_state) 4969 si_pm4_set_reg(pm4, R_028AB8_VGT_VTX_CNT_EN, 0x0); 4970 if (sctx->chip_class < CIK) 4971 si_pm4_set_reg(pm4, R_008A14_PA_CL_ENHANCE, S_008A14_NUM_CLIP_SEQ(3) | 4972 S_008A14_CLIP_VTX_REORDER_ENA(1)); 4973 4974 /* CLEAR_STATE doesn't clear these correctly on certain generations. 4975 * I don't know why. Deduced by trial and error. 4976 */ 4977 if (sctx->chip_class <= CIK) { 4978 si_pm4_set_reg(pm4, R_028B28_VGT_STRMOUT_DRAW_OPAQUE_OFFSET, 0); 4979 si_pm4_set_reg(pm4, R_028204_PA_SC_WINDOW_SCISSOR_TL, S_028204_WINDOW_OFFSET_DISABLE(1)); 4980 si_pm4_set_reg(pm4, R_028240_PA_SC_GENERIC_SCISSOR_TL, S_028240_WINDOW_OFFSET_DISABLE(1)); 4981 si_pm4_set_reg(pm4, R_028244_PA_SC_GENERIC_SCISSOR_BR, 4982 S_028244_BR_X(16384) | S_028244_BR_Y(16384)); 4983 si_pm4_set_reg(pm4, R_028030_PA_SC_SCREEN_SCISSOR_TL, 0); 4984 si_pm4_set_reg(pm4, R_028034_PA_SC_SCREEN_SCISSOR_BR, 4985 S_028034_BR_X(16384) | S_028034_BR_Y(16384)); 4986 } 4987 4988 if (!has_clear_state) { 4989 si_pm4_set_reg(pm4, R_028230_PA_SC_EDGERULE, 4990 S_028230_ER_TRI(0xA) | 4991 S_028230_ER_POINT(0xA) | 4992 S_028230_ER_RECT(0xA) | 4993 /* Required by DX10_DIAMOND_TEST_ENA: */ 4994 S_028230_ER_LINE_LR(0x1A) | 4995 S_028230_ER_LINE_RL(0x26) | 4996 S_028230_ER_LINE_TB(0xA) | 4997 S_028230_ER_LINE_BT(0xA)); 4998 si_pm4_set_reg(pm4, R_028820_PA_CL_NANINF_CNTL, 0); 4999 si_pm4_set_reg(pm4, R_028AC0_DB_SRESULTS_COMPARE_STATE0, 0x0); 5000 si_pm4_set_reg(pm4, R_028AC4_DB_SRESULTS_COMPARE_STATE1, 0x0); 5001 si_pm4_set_reg(pm4, R_028AC8_DB_PRELOAD_CONTROL, 0x0); 5002 si_pm4_set_reg(pm4, R_02800C_DB_RENDER_OVERRIDE, 0); 5003 } 5004 5005 if (sctx->chip_class >= GFX9) { 5006 si_pm4_set_reg(pm4, R_030920_VGT_MAX_VTX_INDX, ~0); 5007 si_pm4_set_reg(pm4, R_030924_VGT_MIN_VTX_INDX, 0); 5008 si_pm4_set_reg(pm4, R_030928_VGT_INDX_OFFSET, 0); 5009 } else { 5010 /* These registers, when written, also overwrite the CLEAR_STATE 5011 * context, so we can't rely on CLEAR_STATE setting them. 5012 * It would be an issue if there was another UMD changing them. 5013 */ 5014 si_pm4_set_reg(pm4, R_028400_VGT_MAX_VTX_INDX, ~0); 5015 si_pm4_set_reg(pm4, R_028404_VGT_MIN_VTX_INDX, 0); 5016 si_pm4_set_reg(pm4, R_028408_VGT_INDX_OFFSET, 0); 5017 } 5018 5019 if (sctx->chip_class >= CIK) { 5020 if (sctx->chip_class >= GFX9) { 5021 si_pm4_set_reg(pm4, R_00B41C_SPI_SHADER_PGM_RSRC3_HS, 5022 S_00B41C_CU_EN(0xffff) | S_00B41C_WAVE_LIMIT(0x3F)); 5023 } else { 5024 si_pm4_set_reg(pm4, R_00B51C_SPI_SHADER_PGM_RSRC3_LS, 5025 S_00B51C_CU_EN(0xffff) | S_00B51C_WAVE_LIMIT(0x3F)); 5026 si_pm4_set_reg(pm4, R_00B41C_SPI_SHADER_PGM_RSRC3_HS, 5027 S_00B41C_WAVE_LIMIT(0x3F)); 5028 si_pm4_set_reg(pm4, R_00B31C_SPI_SHADER_PGM_RSRC3_ES, 5029 S_00B31C_CU_EN(0xffff) | S_00B31C_WAVE_LIMIT(0x3F)); 5030 5031 /* If this is 0, Bonaire can hang even if GS isn't being used. 5032 * Other chips are unaffected. These are suboptimal values, 5033 * but we don't use on-chip GS. 5034 */ 5035 si_pm4_set_reg(pm4, R_028A44_VGT_GS_ONCHIP_CNTL, 5036 S_028A44_ES_VERTS_PER_SUBGRP(64) | 5037 S_028A44_GS_PRIMS_PER_SUBGRP(4)); 5038 } 5039 si_pm4_set_reg(pm4, R_00B21C_SPI_SHADER_PGM_RSRC3_GS, 5040 S_00B21C_CU_EN(0xffff) | S_00B21C_WAVE_LIMIT(0x3F)); 5041 5042 /* Compute LATE_ALLOC_VS.LIMIT. */ 5043 unsigned num_cu_per_sh = sscreen->info.num_good_cu_per_sh; 5044 unsigned late_alloc_limit; /* The limit is per SH. */ 5045 5046 if (sctx->family == CHIP_KABINI) { 5047 late_alloc_limit = 0; /* Potential hang on Kabini. */ 5048 } else if (num_cu_per_sh <= 4) { 5049 /* Too few available compute units per SH. Disallowing 5050 * VS to run on one CU could hurt us more than late VS 5051 * allocation would help. 5052 * 5053 * 2 is the highest safe number that allows us to keep 5054 * all CUs enabled. 5055 */ 5056 late_alloc_limit = 2; 5057 } else { 5058 /* This is a good initial value, allowing 1 late_alloc 5059 * wave per SIMD on num_cu - 2. 5060 */ 5061 late_alloc_limit = (num_cu_per_sh - 2) * 4; 5062 5063 /* The limit is 0-based, so 0 means 1. */ 5064 assert(late_alloc_limit > 0 && late_alloc_limit <= 64); 5065 late_alloc_limit -= 1; 5066 } 5067 5068 /* VS can't execute on one CU if the limit is > 2. */ 5069 si_pm4_set_reg(pm4, R_00B118_SPI_SHADER_PGM_RSRC3_VS, 5070 S_00B118_CU_EN(late_alloc_limit > 2 ? 0xfffe : 0xffff) | 5071 S_00B118_WAVE_LIMIT(0x3F)); 5072 si_pm4_set_reg(pm4, R_00B11C_SPI_SHADER_LATE_ALLOC_VS, 5073 S_00B11C_LIMIT(late_alloc_limit)); 5074 si_pm4_set_reg(pm4, R_00B01C_SPI_SHADER_PGM_RSRC3_PS, 5075 S_00B01C_CU_EN(0xffff) | S_00B01C_WAVE_LIMIT(0x3F)); 5076 } 5077 5078 if (sctx->chip_class >= VI) { 5079 unsigned vgt_tess_distribution; 5080 5081 vgt_tess_distribution = 5082 S_028B50_ACCUM_ISOLINE(32) | 5083 S_028B50_ACCUM_TRI(11) | 5084 S_028B50_ACCUM_QUAD(11) | 5085 S_028B50_DONUT_SPLIT(16); 5086 5087 /* Testing with Unigine Heaven extreme tesselation yielded best results 5088 * with TRAP_SPLIT = 3. 5089 */ 5090 if (sctx->family == CHIP_FIJI || 5091 sctx->family >= CHIP_POLARIS10) 5092 vgt_tess_distribution |= S_028B50_TRAP_SPLIT(3); 5093 5094 si_pm4_set_reg(pm4, R_028B50_VGT_TESS_DISTRIBUTION, vgt_tess_distribution); 5095 } else if (!has_clear_state) { 5096 si_pm4_set_reg(pm4, R_028C58_VGT_VERTEX_REUSE_BLOCK_CNTL, 14); 5097 si_pm4_set_reg(pm4, R_028C5C_VGT_OUT_DEALLOC_CNTL, 16); 5098 } 5099 5100 si_pm4_set_reg(pm4, R_028080_TA_BC_BASE_ADDR, border_color_va >> 8); 5101 if (sctx->chip_class >= CIK) { 5102 si_pm4_set_reg(pm4, R_028084_TA_BC_BASE_ADDR_HI, 5103 S_028084_ADDRESS(border_color_va >> 40)); 5104 } 5105 si_pm4_add_bo(pm4, sctx->border_color_buffer, RADEON_USAGE_READ, 5106 RADEON_PRIO_BORDER_COLORS); 5107 5108 if (sctx->chip_class >= GFX9) { 5109 unsigned num_se = sscreen->info.max_se; 5110 unsigned pc_lines = 0; 5111 5112 switch (sctx->family) { 5113 case CHIP_VEGA10: 5114 case CHIP_VEGA12: 5115 case CHIP_VEGA20: 5116 pc_lines = 4096; 5117 break; 5118 case CHIP_RAVEN: 5119 case CHIP_RAVEN2: 5120 pc_lines = 1024; 5121 break; 5122 default: 5123 assert(0); 5124 } 5125 5126 si_pm4_set_reg(pm4, R_028C48_PA_SC_BINNER_CNTL_1, 5127 S_028C48_MAX_ALLOC_COUNT(MIN2(128, pc_lines / (4 * num_se))) | 5128 S_028C48_MAX_PRIM_PER_BATCH(1023)); 5129 si_pm4_set_reg(pm4, R_028C4C_PA_SC_CONSERVATIVE_RASTERIZATION_CNTL, 5130 S_028C4C_NULL_SQUAD_AA_MASK_ENABLE(1)); 5131 si_pm4_set_reg(pm4, R_030968_VGT_INSTANCE_BASE_ID, 0); 5132 } 5133 5134 si_pm4_upload_indirect_buffer(sctx, pm4); 5135 sctx->init_config = pm4; 5136} 5137