1848b8605Smrg/* 2848b8605Smrg * Copyright 2012 Advanced Micro Devices, Inc. 3b8e80941Smrg * All Rights Reserved. 4848b8605Smrg * 5848b8605Smrg * Permission is hereby granted, free of charge, to any person obtaining a 6848b8605Smrg * copy of this software and associated documentation files (the "Software"), 7848b8605Smrg * to deal in the Software without restriction, including without limitation 8848b8605Smrg * on the rights to use, copy, modify, merge, publish, distribute, sub 9848b8605Smrg * license, and/or sell copies of the Software, and to permit persons to whom 10848b8605Smrg * the Software is furnished to do so, subject to the following conditions: 11848b8605Smrg * 12848b8605Smrg * The above copyright notice and this permission notice (including the next 13848b8605Smrg * paragraph) shall be included in all copies or substantial portions of the 14848b8605Smrg * Software. 15848b8605Smrg * 16848b8605Smrg * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR 17848b8605Smrg * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, 18848b8605Smrg * FITNESS FOR A PARTICULAR PURPOSE AND NON-INFRINGEMENT. IN NO EVENT SHALL 19848b8605Smrg * THE AUTHOR(S) AND/OR THEIR SUPPLIERS BE LIABLE FOR ANY CLAIM, 20848b8605Smrg * DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR 21848b8605Smrg * OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE 22848b8605Smrg * USE OR OTHER DEALINGS IN THE SOFTWARE. 23848b8605Smrg */ 24848b8605Smrg 25b8e80941Smrg#include "si_build_pm4.h" 26b8e80941Smrg#include "gfx9d.h" 27b8e80941Smrg#include "si_query.h" 28848b8605Smrg 29b8e80941Smrg#include "util/u_dual_blend.h" 30848b8605Smrg#include "util/u_format.h" 31848b8605Smrg#include "util/u_format_s3tc.h" 32848b8605Smrg#include "util/u_memory.h" 33b8e80941Smrg#include "util/u_resource.h" 34b8e80941Smrg#include "util/u_upload_mgr.h" 35b8e80941Smrg#include "util/fast_idiv_by_const.h" 36848b8605Smrg 37848b8605Smrgstatic unsigned si_map_swizzle(unsigned swizzle) 38848b8605Smrg{ 39848b8605Smrg switch (swizzle) { 40b8e80941Smrg case PIPE_SWIZZLE_Y: 41848b8605Smrg return V_008F0C_SQ_SEL_Y; 42b8e80941Smrg case PIPE_SWIZZLE_Z: 43848b8605Smrg return V_008F0C_SQ_SEL_Z; 44b8e80941Smrg case PIPE_SWIZZLE_W: 45848b8605Smrg return V_008F0C_SQ_SEL_W; 46b8e80941Smrg case PIPE_SWIZZLE_0: 47848b8605Smrg return V_008F0C_SQ_SEL_0; 48b8e80941Smrg case PIPE_SWIZZLE_1: 49848b8605Smrg return V_008F0C_SQ_SEL_1; 50b8e80941Smrg default: /* PIPE_SWIZZLE_X */ 51848b8605Smrg return V_008F0C_SQ_SEL_X; 52848b8605Smrg } 53848b8605Smrg} 54848b8605Smrg 55848b8605Smrg/* 12.4 fixed-point */ 56848b8605Smrgstatic unsigned si_pack_float_12p4(float x) 57848b8605Smrg{ 58848b8605Smrg return x <= 0 ? 0 : 59848b8605Smrg x >= 4096 ? 0xffff : x * 16; 60848b8605Smrg} 61848b8605Smrg 62848b8605Smrg/* 63b8e80941Smrg * Inferred framebuffer and blender state. 64b8e80941Smrg * 65b8e80941Smrg * CB_TARGET_MASK is emitted here to avoid a hang with dual source blending 66b8e80941Smrg * if there is not enough PS outputs. 67848b8605Smrg */ 68b8e80941Smrgstatic void si_emit_cb_render_state(struct si_context *sctx) 69848b8605Smrg{ 70b8e80941Smrg struct radeon_cmdbuf *cs = sctx->gfx_cs; 71848b8605Smrg struct si_state_blend *blend = sctx->queued.named.blend; 72b8e80941Smrg /* CB_COLORn_INFO.FORMAT=INVALID should disable unbound colorbuffers, 73b8e80941Smrg * but you never know. */ 74b8e80941Smrg uint32_t cb_target_mask = sctx->framebuffer.colorbuf_enabled_4bit; 75b8e80941Smrg unsigned i; 76b8e80941Smrg 77b8e80941Smrg if (blend) 78b8e80941Smrg cb_target_mask &= blend->cb_target_mask; 79b8e80941Smrg 80b8e80941Smrg /* Avoid a hang that happens when dual source blending is enabled 81b8e80941Smrg * but there is not enough color outputs. This is undefined behavior, 82b8e80941Smrg * so disable color writes completely. 83b8e80941Smrg * 84b8e80941Smrg * Reproducible with Unigine Heaven 4.0 and drirc missing. 85b8e80941Smrg */ 86b8e80941Smrg if (blend && blend->dual_src_blend && 87b8e80941Smrg sctx->ps_shader.cso && 88b8e80941Smrg (sctx->ps_shader.cso->info.colors_written & 0x3) != 0x3) 89b8e80941Smrg cb_target_mask = 0; 90b8e80941Smrg 91b8e80941Smrg /* GFX9: Flush DFSM when CB_TARGET_MASK changes. 92b8e80941Smrg * I think we don't have to do anything between IBs. 93b8e80941Smrg */ 94b8e80941Smrg if (sctx->screen->dfsm_allowed && 95b8e80941Smrg sctx->last_cb_target_mask != cb_target_mask) { 96b8e80941Smrg sctx->last_cb_target_mask = cb_target_mask; 97b8e80941Smrg 98b8e80941Smrg radeon_emit(cs, PKT3(PKT3_EVENT_WRITE, 0, 0)); 99b8e80941Smrg radeon_emit(cs, EVENT_TYPE(V_028A90_FLUSH_DFSM) | EVENT_INDEX(0)); 100b8e80941Smrg } 101848b8605Smrg 102b8e80941Smrg unsigned initial_cdw = cs->current.cdw; 103b8e80941Smrg radeon_opt_set_context_reg(sctx, R_028238_CB_TARGET_MASK, 104b8e80941Smrg SI_TRACKED_CB_TARGET_MASK, cb_target_mask); 105848b8605Smrg 106b8e80941Smrg if (sctx->chip_class >= VI) { 107b8e80941Smrg /* DCC MSAA workaround for blending. 108b8e80941Smrg * Alternatively, we can set CB_COLORi_DCC_CONTROL.OVERWRITE_- 109b8e80941Smrg * COMBINER_DISABLE, but that would be more complicated. 110b8e80941Smrg */ 111b8e80941Smrg bool oc_disable = (sctx->chip_class == VI || 112b8e80941Smrg sctx->chip_class == GFX9) && 113b8e80941Smrg blend && 114b8e80941Smrg blend->blend_enable_4bit & cb_target_mask && 115b8e80941Smrg sctx->framebuffer.nr_samples >= 2; 116b8e80941Smrg unsigned watermark = sctx->framebuffer.dcc_overwrite_combiner_watermark; 117b8e80941Smrg 118b8e80941Smrg radeon_opt_set_context_reg( 119b8e80941Smrg sctx, R_028424_CB_DCC_CONTROL, 120b8e80941Smrg SI_TRACKED_CB_DCC_CONTROL, 121b8e80941Smrg S_028424_OVERWRITE_COMBINER_MRT_SHARING_DISABLE(1) | 122b8e80941Smrg S_028424_OVERWRITE_COMBINER_WATERMARK(watermark) | 123b8e80941Smrg S_028424_OVERWRITE_COMBINER_DISABLE(oc_disable) | 124b8e80941Smrg S_028424_DISABLE_CONSTANT_ENCODE_REG(sctx->screen->has_dcc_constant_encode)); 125b8e80941Smrg } 126b8e80941Smrg 127b8e80941Smrg /* RB+ register settings. */ 128b8e80941Smrg if (sctx->screen->rbplus_allowed) { 129b8e80941Smrg unsigned spi_shader_col_format = 130b8e80941Smrg sctx->ps_shader.cso ? 131b8e80941Smrg sctx->ps_shader.current->key.part.ps.epilog.spi_shader_col_format : 0; 132b8e80941Smrg unsigned sx_ps_downconvert = 0; 133b8e80941Smrg unsigned sx_blend_opt_epsilon = 0; 134b8e80941Smrg unsigned sx_blend_opt_control = 0; 135b8e80941Smrg 136b8e80941Smrg for (i = 0; i < sctx->framebuffer.state.nr_cbufs; i++) { 137b8e80941Smrg struct si_surface *surf = 138b8e80941Smrg (struct si_surface*)sctx->framebuffer.state.cbufs[i]; 139b8e80941Smrg unsigned format, swap, spi_format, colormask; 140b8e80941Smrg bool has_alpha, has_rgb; 141b8e80941Smrg 142b8e80941Smrg if (!surf) 143b8e80941Smrg continue; 144b8e80941Smrg 145b8e80941Smrg format = G_028C70_FORMAT(surf->cb_color_info); 146b8e80941Smrg swap = G_028C70_COMP_SWAP(surf->cb_color_info); 147b8e80941Smrg spi_format = (spi_shader_col_format >> (i * 4)) & 0xf; 148b8e80941Smrg colormask = (cb_target_mask >> (i * 4)) & 0xf; 149b8e80941Smrg 150b8e80941Smrg /* Set if RGB and A are present. */ 151b8e80941Smrg has_alpha = !G_028C74_FORCE_DST_ALPHA_1(surf->cb_color_attrib); 152b8e80941Smrg 153b8e80941Smrg if (format == V_028C70_COLOR_8 || 154b8e80941Smrg format == V_028C70_COLOR_16 || 155b8e80941Smrg format == V_028C70_COLOR_32) 156b8e80941Smrg has_rgb = !has_alpha; 157b8e80941Smrg else 158b8e80941Smrg has_rgb = true; 159b8e80941Smrg 160b8e80941Smrg /* Check the colormask and export format. */ 161b8e80941Smrg if (!(colormask & (PIPE_MASK_RGBA & ~PIPE_MASK_A))) 162b8e80941Smrg has_rgb = false; 163b8e80941Smrg if (!(colormask & PIPE_MASK_A)) 164b8e80941Smrg has_alpha = false; 165b8e80941Smrg 166b8e80941Smrg if (spi_format == V_028714_SPI_SHADER_ZERO) { 167b8e80941Smrg has_rgb = false; 168b8e80941Smrg has_alpha = false; 169b8e80941Smrg } 170b8e80941Smrg 171b8e80941Smrg /* Disable value checking for disabled channels. */ 172b8e80941Smrg if (!has_rgb) 173b8e80941Smrg sx_blend_opt_control |= S_02875C_MRT0_COLOR_OPT_DISABLE(1) << (i * 4); 174b8e80941Smrg if (!has_alpha) 175b8e80941Smrg sx_blend_opt_control |= S_02875C_MRT0_ALPHA_OPT_DISABLE(1) << (i * 4); 176b8e80941Smrg 177b8e80941Smrg /* Enable down-conversion for 32bpp and smaller formats. */ 178b8e80941Smrg switch (format) { 179b8e80941Smrg case V_028C70_COLOR_8: 180b8e80941Smrg case V_028C70_COLOR_8_8: 181b8e80941Smrg case V_028C70_COLOR_8_8_8_8: 182b8e80941Smrg /* For 1 and 2-channel formats, use the superset thereof. */ 183b8e80941Smrg if (spi_format == V_028714_SPI_SHADER_FP16_ABGR || 184b8e80941Smrg spi_format == V_028714_SPI_SHADER_UINT16_ABGR || 185b8e80941Smrg spi_format == V_028714_SPI_SHADER_SINT16_ABGR) { 186b8e80941Smrg sx_ps_downconvert |= V_028754_SX_RT_EXPORT_8_8_8_8 << (i * 4); 187b8e80941Smrg sx_blend_opt_epsilon |= V_028758_8BIT_FORMAT << (i * 4); 188b8e80941Smrg } 189b8e80941Smrg break; 190b8e80941Smrg 191b8e80941Smrg case V_028C70_COLOR_5_6_5: 192b8e80941Smrg if (spi_format == V_028714_SPI_SHADER_FP16_ABGR) { 193b8e80941Smrg sx_ps_downconvert |= V_028754_SX_RT_EXPORT_5_6_5 << (i * 4); 194b8e80941Smrg sx_blend_opt_epsilon |= V_028758_6BIT_FORMAT << (i * 4); 195b8e80941Smrg } 196b8e80941Smrg break; 197b8e80941Smrg 198b8e80941Smrg case V_028C70_COLOR_1_5_5_5: 199b8e80941Smrg if (spi_format == V_028714_SPI_SHADER_FP16_ABGR) { 200b8e80941Smrg sx_ps_downconvert |= V_028754_SX_RT_EXPORT_1_5_5_5 << (i * 4); 201b8e80941Smrg sx_blend_opt_epsilon |= V_028758_5BIT_FORMAT << (i * 4); 202b8e80941Smrg } 203b8e80941Smrg break; 204b8e80941Smrg 205b8e80941Smrg case V_028C70_COLOR_4_4_4_4: 206b8e80941Smrg if (spi_format == V_028714_SPI_SHADER_FP16_ABGR) { 207b8e80941Smrg sx_ps_downconvert |= V_028754_SX_RT_EXPORT_4_4_4_4 << (i * 4); 208b8e80941Smrg sx_blend_opt_epsilon |= V_028758_4BIT_FORMAT << (i * 4); 209b8e80941Smrg } 210b8e80941Smrg break; 211b8e80941Smrg 212b8e80941Smrg case V_028C70_COLOR_32: 213b8e80941Smrg if (swap == V_028C70_SWAP_STD && 214b8e80941Smrg spi_format == V_028714_SPI_SHADER_32_R) 215b8e80941Smrg sx_ps_downconvert |= V_028754_SX_RT_EXPORT_32_R << (i * 4); 216b8e80941Smrg else if (swap == V_028C70_SWAP_ALT_REV && 217b8e80941Smrg spi_format == V_028714_SPI_SHADER_32_AR) 218b8e80941Smrg sx_ps_downconvert |= V_028754_SX_RT_EXPORT_32_A << (i * 4); 219b8e80941Smrg break; 220b8e80941Smrg 221b8e80941Smrg case V_028C70_COLOR_16: 222b8e80941Smrg case V_028C70_COLOR_16_16: 223b8e80941Smrg /* For 1-channel formats, use the superset thereof. */ 224b8e80941Smrg if (spi_format == V_028714_SPI_SHADER_UNORM16_ABGR || 225b8e80941Smrg spi_format == V_028714_SPI_SHADER_SNORM16_ABGR || 226b8e80941Smrg spi_format == V_028714_SPI_SHADER_UINT16_ABGR || 227b8e80941Smrg spi_format == V_028714_SPI_SHADER_SINT16_ABGR) { 228b8e80941Smrg if (swap == V_028C70_SWAP_STD || 229b8e80941Smrg swap == V_028C70_SWAP_STD_REV) 230b8e80941Smrg sx_ps_downconvert |= V_028754_SX_RT_EXPORT_16_16_GR << (i * 4); 231b8e80941Smrg else 232b8e80941Smrg sx_ps_downconvert |= V_028754_SX_RT_EXPORT_16_16_AR << (i * 4); 233b8e80941Smrg } 234b8e80941Smrg break; 235b8e80941Smrg 236b8e80941Smrg case V_028C70_COLOR_10_11_11: 237b8e80941Smrg if (spi_format == V_028714_SPI_SHADER_FP16_ABGR) { 238b8e80941Smrg sx_ps_downconvert |= V_028754_SX_RT_EXPORT_10_11_11 << (i * 4); 239b8e80941Smrg sx_blend_opt_epsilon |= V_028758_11BIT_FORMAT << (i * 4); 240b8e80941Smrg } 241b8e80941Smrg break; 242848b8605Smrg 243b8e80941Smrg case V_028C70_COLOR_2_10_10_10: 244b8e80941Smrg if (spi_format == V_028714_SPI_SHADER_FP16_ABGR) { 245b8e80941Smrg sx_ps_downconvert |= V_028754_SX_RT_EXPORT_2_10_10_10 << (i * 4); 246b8e80941Smrg sx_blend_opt_epsilon |= V_028758_10BIT_FORMAT << (i * 4); 247b8e80941Smrg } 248b8e80941Smrg break; 249b8e80941Smrg } 250b8e80941Smrg } 251848b8605Smrg 252b8e80941Smrg /* SX_PS_DOWNCONVERT, SX_BLEND_OPT_EPSILON, SX_BLEND_OPT_CONTROL */ 253b8e80941Smrg radeon_opt_set_context_reg3(sctx, R_028754_SX_PS_DOWNCONVERT, 254b8e80941Smrg SI_TRACKED_SX_PS_DOWNCONVERT, 255b8e80941Smrg sx_ps_downconvert, sx_blend_opt_epsilon, 256b8e80941Smrg sx_blend_opt_control); 257b8e80941Smrg } 258b8e80941Smrg if (initial_cdw != cs->current.cdw) 259b8e80941Smrg sctx->context_roll = true; 260848b8605Smrg} 261848b8605Smrg 262848b8605Smrg/* 263848b8605Smrg * Blender functions 264848b8605Smrg */ 265848b8605Smrg 266848b8605Smrgstatic uint32_t si_translate_blend_function(int blend_func) 267848b8605Smrg{ 268848b8605Smrg switch (blend_func) { 269848b8605Smrg case PIPE_BLEND_ADD: 270848b8605Smrg return V_028780_COMB_DST_PLUS_SRC; 271848b8605Smrg case PIPE_BLEND_SUBTRACT: 272848b8605Smrg return V_028780_COMB_SRC_MINUS_DST; 273848b8605Smrg case PIPE_BLEND_REVERSE_SUBTRACT: 274848b8605Smrg return V_028780_COMB_DST_MINUS_SRC; 275848b8605Smrg case PIPE_BLEND_MIN: 276848b8605Smrg return V_028780_COMB_MIN_DST_SRC; 277848b8605Smrg case PIPE_BLEND_MAX: 278848b8605Smrg return V_028780_COMB_MAX_DST_SRC; 279848b8605Smrg default: 280b8e80941Smrg PRINT_ERR("Unknown blend function %d\n", blend_func); 281848b8605Smrg assert(0); 282848b8605Smrg break; 283848b8605Smrg } 284848b8605Smrg return 0; 285848b8605Smrg} 286848b8605Smrg 287848b8605Smrgstatic uint32_t si_translate_blend_factor(int blend_fact) 288848b8605Smrg{ 289848b8605Smrg switch (blend_fact) { 290848b8605Smrg case PIPE_BLENDFACTOR_ONE: 291848b8605Smrg return V_028780_BLEND_ONE; 292848b8605Smrg case PIPE_BLENDFACTOR_SRC_COLOR: 293848b8605Smrg return V_028780_BLEND_SRC_COLOR; 294848b8605Smrg case PIPE_BLENDFACTOR_SRC_ALPHA: 295848b8605Smrg return V_028780_BLEND_SRC_ALPHA; 296848b8605Smrg case PIPE_BLENDFACTOR_DST_ALPHA: 297848b8605Smrg return V_028780_BLEND_DST_ALPHA; 298848b8605Smrg case PIPE_BLENDFACTOR_DST_COLOR: 299848b8605Smrg return V_028780_BLEND_DST_COLOR; 300848b8605Smrg case PIPE_BLENDFACTOR_SRC_ALPHA_SATURATE: 301848b8605Smrg return V_028780_BLEND_SRC_ALPHA_SATURATE; 302848b8605Smrg case PIPE_BLENDFACTOR_CONST_COLOR: 303848b8605Smrg return V_028780_BLEND_CONSTANT_COLOR; 304848b8605Smrg case PIPE_BLENDFACTOR_CONST_ALPHA: 305848b8605Smrg return V_028780_BLEND_CONSTANT_ALPHA; 306848b8605Smrg case PIPE_BLENDFACTOR_ZERO: 307848b8605Smrg return V_028780_BLEND_ZERO; 308848b8605Smrg case PIPE_BLENDFACTOR_INV_SRC_COLOR: 309848b8605Smrg return V_028780_BLEND_ONE_MINUS_SRC_COLOR; 310848b8605Smrg case PIPE_BLENDFACTOR_INV_SRC_ALPHA: 311848b8605Smrg return V_028780_BLEND_ONE_MINUS_SRC_ALPHA; 312848b8605Smrg case PIPE_BLENDFACTOR_INV_DST_ALPHA: 313848b8605Smrg return V_028780_BLEND_ONE_MINUS_DST_ALPHA; 314848b8605Smrg case PIPE_BLENDFACTOR_INV_DST_COLOR: 315848b8605Smrg return V_028780_BLEND_ONE_MINUS_DST_COLOR; 316848b8605Smrg case PIPE_BLENDFACTOR_INV_CONST_COLOR: 317848b8605Smrg return V_028780_BLEND_ONE_MINUS_CONSTANT_COLOR; 318848b8605Smrg case PIPE_BLENDFACTOR_INV_CONST_ALPHA: 319848b8605Smrg return V_028780_BLEND_ONE_MINUS_CONSTANT_ALPHA; 320848b8605Smrg case PIPE_BLENDFACTOR_SRC1_COLOR: 321848b8605Smrg return V_028780_BLEND_SRC1_COLOR; 322848b8605Smrg case PIPE_BLENDFACTOR_SRC1_ALPHA: 323848b8605Smrg return V_028780_BLEND_SRC1_ALPHA; 324848b8605Smrg case PIPE_BLENDFACTOR_INV_SRC1_COLOR: 325848b8605Smrg return V_028780_BLEND_INV_SRC1_COLOR; 326848b8605Smrg case PIPE_BLENDFACTOR_INV_SRC1_ALPHA: 327848b8605Smrg return V_028780_BLEND_INV_SRC1_ALPHA; 328848b8605Smrg default: 329b8e80941Smrg PRINT_ERR("Bad blend factor %d not supported!\n", blend_fact); 330848b8605Smrg assert(0); 331848b8605Smrg break; 332848b8605Smrg } 333848b8605Smrg return 0; 334848b8605Smrg} 335848b8605Smrg 336b8e80941Smrgstatic uint32_t si_translate_blend_opt_function(int blend_func) 337b8e80941Smrg{ 338b8e80941Smrg switch (blend_func) { 339b8e80941Smrg case PIPE_BLEND_ADD: 340b8e80941Smrg return V_028760_OPT_COMB_ADD; 341b8e80941Smrg case PIPE_BLEND_SUBTRACT: 342b8e80941Smrg return V_028760_OPT_COMB_SUBTRACT; 343b8e80941Smrg case PIPE_BLEND_REVERSE_SUBTRACT: 344b8e80941Smrg return V_028760_OPT_COMB_REVSUBTRACT; 345b8e80941Smrg case PIPE_BLEND_MIN: 346b8e80941Smrg return V_028760_OPT_COMB_MIN; 347b8e80941Smrg case PIPE_BLEND_MAX: 348b8e80941Smrg return V_028760_OPT_COMB_MAX; 349b8e80941Smrg default: 350b8e80941Smrg return V_028760_OPT_COMB_BLEND_DISABLED; 351b8e80941Smrg } 352b8e80941Smrg} 353b8e80941Smrg 354b8e80941Smrgstatic uint32_t si_translate_blend_opt_factor(int blend_fact, bool is_alpha) 355b8e80941Smrg{ 356b8e80941Smrg switch (blend_fact) { 357b8e80941Smrg case PIPE_BLENDFACTOR_ZERO: 358b8e80941Smrg return V_028760_BLEND_OPT_PRESERVE_NONE_IGNORE_ALL; 359b8e80941Smrg case PIPE_BLENDFACTOR_ONE: 360b8e80941Smrg return V_028760_BLEND_OPT_PRESERVE_ALL_IGNORE_NONE; 361b8e80941Smrg case PIPE_BLENDFACTOR_SRC_COLOR: 362b8e80941Smrg return is_alpha ? V_028760_BLEND_OPT_PRESERVE_A1_IGNORE_A0 363b8e80941Smrg : V_028760_BLEND_OPT_PRESERVE_C1_IGNORE_C0; 364b8e80941Smrg case PIPE_BLENDFACTOR_INV_SRC_COLOR: 365b8e80941Smrg return is_alpha ? V_028760_BLEND_OPT_PRESERVE_A0_IGNORE_A1 366b8e80941Smrg : V_028760_BLEND_OPT_PRESERVE_C0_IGNORE_C1; 367b8e80941Smrg case PIPE_BLENDFACTOR_SRC_ALPHA: 368b8e80941Smrg return V_028760_BLEND_OPT_PRESERVE_A1_IGNORE_A0; 369b8e80941Smrg case PIPE_BLENDFACTOR_INV_SRC_ALPHA: 370b8e80941Smrg return V_028760_BLEND_OPT_PRESERVE_A0_IGNORE_A1; 371b8e80941Smrg case PIPE_BLENDFACTOR_SRC_ALPHA_SATURATE: 372b8e80941Smrg return is_alpha ? V_028760_BLEND_OPT_PRESERVE_ALL_IGNORE_NONE 373b8e80941Smrg : V_028760_BLEND_OPT_PRESERVE_NONE_IGNORE_A0; 374b8e80941Smrg default: 375b8e80941Smrg return V_028760_BLEND_OPT_PRESERVE_NONE_IGNORE_NONE; 376b8e80941Smrg } 377b8e80941Smrg} 378b8e80941Smrg 379b8e80941Smrgstatic void si_blend_check_commutativity(struct si_screen *sscreen, 380b8e80941Smrg struct si_state_blend *blend, 381b8e80941Smrg enum pipe_blend_func func, 382b8e80941Smrg enum pipe_blendfactor src, 383b8e80941Smrg enum pipe_blendfactor dst, 384b8e80941Smrg unsigned chanmask) 385b8e80941Smrg{ 386b8e80941Smrg /* Src factor is allowed when it does not depend on Dst */ 387b8e80941Smrg static const uint32_t src_allowed = 388b8e80941Smrg (1u << PIPE_BLENDFACTOR_ONE) | 389b8e80941Smrg (1u << PIPE_BLENDFACTOR_SRC_COLOR) | 390b8e80941Smrg (1u << PIPE_BLENDFACTOR_SRC_ALPHA) | 391b8e80941Smrg (1u << PIPE_BLENDFACTOR_SRC_ALPHA_SATURATE) | 392b8e80941Smrg (1u << PIPE_BLENDFACTOR_CONST_COLOR) | 393b8e80941Smrg (1u << PIPE_BLENDFACTOR_CONST_ALPHA) | 394b8e80941Smrg (1u << PIPE_BLENDFACTOR_SRC1_COLOR) | 395b8e80941Smrg (1u << PIPE_BLENDFACTOR_SRC1_ALPHA) | 396b8e80941Smrg (1u << PIPE_BLENDFACTOR_ZERO) | 397b8e80941Smrg (1u << PIPE_BLENDFACTOR_INV_SRC_COLOR) | 398b8e80941Smrg (1u << PIPE_BLENDFACTOR_INV_SRC_ALPHA) | 399b8e80941Smrg (1u << PIPE_BLENDFACTOR_INV_CONST_COLOR) | 400b8e80941Smrg (1u << PIPE_BLENDFACTOR_INV_CONST_ALPHA) | 401b8e80941Smrg (1u << PIPE_BLENDFACTOR_INV_SRC1_COLOR) | 402b8e80941Smrg (1u << PIPE_BLENDFACTOR_INV_SRC1_ALPHA); 403b8e80941Smrg 404b8e80941Smrg if (dst == PIPE_BLENDFACTOR_ONE && 405b8e80941Smrg (src_allowed & (1u << src))) { 406b8e80941Smrg /* Addition is commutative, but floating point addition isn't 407b8e80941Smrg * associative: subtle changes can be introduced via different 408b8e80941Smrg * rounding. 409b8e80941Smrg * 410b8e80941Smrg * Out-of-order is also non-deterministic, which means that 411b8e80941Smrg * this breaks OpenGL invariance requirements. So only enable 412b8e80941Smrg * out-of-order additive blending if explicitly allowed by a 413b8e80941Smrg * setting. 414b8e80941Smrg */ 415b8e80941Smrg if (func == PIPE_BLEND_MAX || func == PIPE_BLEND_MIN || 416b8e80941Smrg (func == PIPE_BLEND_ADD && sscreen->commutative_blend_add)) 417b8e80941Smrg blend->commutative_4bit |= chanmask; 418b8e80941Smrg } 419b8e80941Smrg} 420b8e80941Smrg 421b8e80941Smrg/** 422b8e80941Smrg * Get rid of DST in the blend factors by commuting the operands: 423b8e80941Smrg * func(src * DST, dst * 0) ---> func(src * 0, dst * SRC) 424b8e80941Smrg */ 425b8e80941Smrgstatic void si_blend_remove_dst(unsigned *func, unsigned *src_factor, 426b8e80941Smrg unsigned *dst_factor, unsigned expected_dst, 427b8e80941Smrg unsigned replacement_src) 428b8e80941Smrg{ 429b8e80941Smrg if (*src_factor == expected_dst && 430b8e80941Smrg *dst_factor == PIPE_BLENDFACTOR_ZERO) { 431b8e80941Smrg *src_factor = PIPE_BLENDFACTOR_ZERO; 432b8e80941Smrg *dst_factor = replacement_src; 433b8e80941Smrg 434b8e80941Smrg /* Commuting the operands requires reversing subtractions. */ 435b8e80941Smrg if (*func == PIPE_BLEND_SUBTRACT) 436b8e80941Smrg *func = PIPE_BLEND_REVERSE_SUBTRACT; 437b8e80941Smrg else if (*func == PIPE_BLEND_REVERSE_SUBTRACT) 438b8e80941Smrg *func = PIPE_BLEND_SUBTRACT; 439b8e80941Smrg } 440b8e80941Smrg} 441b8e80941Smrg 442b8e80941Smrgstatic bool si_blend_factor_uses_dst(unsigned factor) 443b8e80941Smrg{ 444b8e80941Smrg return factor == PIPE_BLENDFACTOR_DST_COLOR || 445b8e80941Smrg factor == PIPE_BLENDFACTOR_DST_ALPHA || 446b8e80941Smrg factor == PIPE_BLENDFACTOR_SRC_ALPHA_SATURATE || 447b8e80941Smrg factor == PIPE_BLENDFACTOR_INV_DST_ALPHA || 448b8e80941Smrg factor == PIPE_BLENDFACTOR_INV_DST_COLOR; 449b8e80941Smrg} 450b8e80941Smrg 451848b8605Smrgstatic void *si_create_blend_state_mode(struct pipe_context *ctx, 452848b8605Smrg const struct pipe_blend_state *state, 453848b8605Smrg unsigned mode) 454848b8605Smrg{ 455b8e80941Smrg struct si_context *sctx = (struct si_context*)ctx; 456848b8605Smrg struct si_state_blend *blend = CALLOC_STRUCT(si_state_blend); 457848b8605Smrg struct si_pm4_state *pm4 = &blend->pm4; 458b8e80941Smrg uint32_t sx_mrt_blend_opt[8] = {0}; 459848b8605Smrg uint32_t color_control = 0; 460848b8605Smrg 461b8e80941Smrg if (!blend) 462848b8605Smrg return NULL; 463848b8605Smrg 464b8e80941Smrg blend->alpha_to_coverage = state->alpha_to_coverage; 465848b8605Smrg blend->alpha_to_one = state->alpha_to_one; 466b8e80941Smrg blend->dual_src_blend = util_blend_state_is_dual(state, 0); 467b8e80941Smrg blend->logicop_enable = state->logicop_enable; 468848b8605Smrg 469848b8605Smrg if (state->logicop_enable) { 470848b8605Smrg color_control |= S_028808_ROP3(state->logicop_func | (state->logicop_func << 4)); 471848b8605Smrg } else { 472848b8605Smrg color_control |= S_028808_ROP3(0xcc); 473848b8605Smrg } 474848b8605Smrg 475848b8605Smrg si_pm4_set_reg(pm4, R_028B70_DB_ALPHA_TO_MASK, 476848b8605Smrg S_028B70_ALPHA_TO_MASK_ENABLE(state->alpha_to_coverage) | 477b8e80941Smrg S_028B70_ALPHA_TO_MASK_OFFSET0(3) | 478b8e80941Smrg S_028B70_ALPHA_TO_MASK_OFFSET1(1) | 479b8e80941Smrg S_028B70_ALPHA_TO_MASK_OFFSET2(0) | 480b8e80941Smrg S_028B70_ALPHA_TO_MASK_OFFSET3(2) | 481b8e80941Smrg S_028B70_OFFSET_ROUND(1)); 482b8e80941Smrg 483b8e80941Smrg if (state->alpha_to_coverage) 484b8e80941Smrg blend->need_src_alpha_4bit |= 0xf; 485848b8605Smrg 486848b8605Smrg blend->cb_target_mask = 0; 487b8e80941Smrg blend->cb_target_enabled_4bit = 0; 488b8e80941Smrg 489848b8605Smrg for (int i = 0; i < 8; i++) { 490848b8605Smrg /* state->rt entries > 0 only written if independent blending */ 491848b8605Smrg const int j = state->independent_blend_enable ? i : 0; 492848b8605Smrg 493848b8605Smrg unsigned eqRGB = state->rt[j].rgb_func; 494848b8605Smrg unsigned srcRGB = state->rt[j].rgb_src_factor; 495848b8605Smrg unsigned dstRGB = state->rt[j].rgb_dst_factor; 496848b8605Smrg unsigned eqA = state->rt[j].alpha_func; 497848b8605Smrg unsigned srcA = state->rt[j].alpha_src_factor; 498848b8605Smrg unsigned dstA = state->rt[j].alpha_dst_factor; 499848b8605Smrg 500b8e80941Smrg unsigned srcRGB_opt, dstRGB_opt, srcA_opt, dstA_opt; 501848b8605Smrg unsigned blend_cntl = 0; 502848b8605Smrg 503b8e80941Smrg sx_mrt_blend_opt[i] = 504b8e80941Smrg S_028760_COLOR_COMB_FCN(V_028760_OPT_COMB_BLEND_DISABLED) | 505b8e80941Smrg S_028760_ALPHA_COMB_FCN(V_028760_OPT_COMB_BLEND_DISABLED); 506b8e80941Smrg 507b8e80941Smrg /* Only set dual source blending for MRT0 to avoid a hang. */ 508b8e80941Smrg if (i >= 1 && blend->dual_src_blend) { 509b8e80941Smrg /* Vulkan does this for dual source blending. */ 510b8e80941Smrg if (i == 1) 511b8e80941Smrg blend_cntl |= S_028780_ENABLE(1); 512b8e80941Smrg 513b8e80941Smrg si_pm4_set_reg(pm4, R_028780_CB_BLEND0_CONTROL + i * 4, blend_cntl); 514b8e80941Smrg continue; 515b8e80941Smrg } 516b8e80941Smrg 517b8e80941Smrg /* Only addition and subtraction equations are supported with 518b8e80941Smrg * dual source blending. 519b8e80941Smrg */ 520b8e80941Smrg if (blend->dual_src_blend && 521b8e80941Smrg (eqRGB == PIPE_BLEND_MIN || eqRGB == PIPE_BLEND_MAX || 522b8e80941Smrg eqA == PIPE_BLEND_MIN || eqA == PIPE_BLEND_MAX)) { 523b8e80941Smrg assert(!"Unsupported equation for dual source blending"); 524b8e80941Smrg si_pm4_set_reg(pm4, R_028780_CB_BLEND0_CONTROL + i * 4, blend_cntl); 525b8e80941Smrg continue; 526b8e80941Smrg } 527b8e80941Smrg 528b8e80941Smrg /* cb_render_state will disable unused ones */ 529b8e80941Smrg blend->cb_target_mask |= (unsigned)state->rt[j].colormask << (4 * i); 530b8e80941Smrg if (state->rt[j].colormask) 531b8e80941Smrg blend->cb_target_enabled_4bit |= 0xf << (4 * i); 532848b8605Smrg 533b8e80941Smrg if (!state->rt[j].colormask || !state->rt[j].blend_enable) { 534848b8605Smrg si_pm4_set_reg(pm4, R_028780_CB_BLEND0_CONTROL + i * 4, blend_cntl); 535848b8605Smrg continue; 536848b8605Smrg } 537848b8605Smrg 538b8e80941Smrg si_blend_check_commutativity(sctx->screen, blend, 539b8e80941Smrg eqRGB, srcRGB, dstRGB, 0x7 << (4 * i)); 540b8e80941Smrg si_blend_check_commutativity(sctx->screen, blend, 541b8e80941Smrg eqA, srcA, dstA, 0x8 << (4 * i)); 542b8e80941Smrg 543b8e80941Smrg /* Blending optimizations for RB+. 544b8e80941Smrg * These transformations don't change the behavior. 545b8e80941Smrg * 546b8e80941Smrg * First, get rid of DST in the blend factors: 547b8e80941Smrg * func(src * DST, dst * 0) ---> func(src * 0, dst * SRC) 548b8e80941Smrg */ 549b8e80941Smrg si_blend_remove_dst(&eqRGB, &srcRGB, &dstRGB, 550b8e80941Smrg PIPE_BLENDFACTOR_DST_COLOR, 551b8e80941Smrg PIPE_BLENDFACTOR_SRC_COLOR); 552b8e80941Smrg si_blend_remove_dst(&eqA, &srcA, &dstA, 553b8e80941Smrg PIPE_BLENDFACTOR_DST_COLOR, 554b8e80941Smrg PIPE_BLENDFACTOR_SRC_COLOR); 555b8e80941Smrg si_blend_remove_dst(&eqA, &srcA, &dstA, 556b8e80941Smrg PIPE_BLENDFACTOR_DST_ALPHA, 557b8e80941Smrg PIPE_BLENDFACTOR_SRC_ALPHA); 558b8e80941Smrg 559b8e80941Smrg /* Look up the ideal settings from tables. */ 560b8e80941Smrg srcRGB_opt = si_translate_blend_opt_factor(srcRGB, false); 561b8e80941Smrg dstRGB_opt = si_translate_blend_opt_factor(dstRGB, false); 562b8e80941Smrg srcA_opt = si_translate_blend_opt_factor(srcA, true); 563b8e80941Smrg dstA_opt = si_translate_blend_opt_factor(dstA, true); 564b8e80941Smrg 565b8e80941Smrg /* Handle interdependencies. */ 566b8e80941Smrg if (si_blend_factor_uses_dst(srcRGB)) 567b8e80941Smrg dstRGB_opt = V_028760_BLEND_OPT_PRESERVE_NONE_IGNORE_NONE; 568b8e80941Smrg if (si_blend_factor_uses_dst(srcA)) 569b8e80941Smrg dstA_opt = V_028760_BLEND_OPT_PRESERVE_NONE_IGNORE_NONE; 570b8e80941Smrg 571b8e80941Smrg if (srcRGB == PIPE_BLENDFACTOR_SRC_ALPHA_SATURATE && 572b8e80941Smrg (dstRGB == PIPE_BLENDFACTOR_ZERO || 573b8e80941Smrg dstRGB == PIPE_BLENDFACTOR_SRC_ALPHA || 574b8e80941Smrg dstRGB == PIPE_BLENDFACTOR_SRC_ALPHA_SATURATE)) 575b8e80941Smrg dstRGB_opt = V_028760_BLEND_OPT_PRESERVE_NONE_IGNORE_A0; 576b8e80941Smrg 577b8e80941Smrg /* Set the final value. */ 578b8e80941Smrg sx_mrt_blend_opt[i] = 579b8e80941Smrg S_028760_COLOR_SRC_OPT(srcRGB_opt) | 580b8e80941Smrg S_028760_COLOR_DST_OPT(dstRGB_opt) | 581b8e80941Smrg S_028760_COLOR_COMB_FCN(si_translate_blend_opt_function(eqRGB)) | 582b8e80941Smrg S_028760_ALPHA_SRC_OPT(srcA_opt) | 583b8e80941Smrg S_028760_ALPHA_DST_OPT(dstA_opt) | 584b8e80941Smrg S_028760_ALPHA_COMB_FCN(si_translate_blend_opt_function(eqA)); 585b8e80941Smrg 586b8e80941Smrg /* Set blend state. */ 587848b8605Smrg blend_cntl |= S_028780_ENABLE(1); 588848b8605Smrg blend_cntl |= S_028780_COLOR_COMB_FCN(si_translate_blend_function(eqRGB)); 589848b8605Smrg blend_cntl |= S_028780_COLOR_SRCBLEND(si_translate_blend_factor(srcRGB)); 590848b8605Smrg blend_cntl |= S_028780_COLOR_DESTBLEND(si_translate_blend_factor(dstRGB)); 591848b8605Smrg 592848b8605Smrg if (srcA != srcRGB || dstA != dstRGB || eqA != eqRGB) { 593848b8605Smrg blend_cntl |= S_028780_SEPARATE_ALPHA_BLEND(1); 594848b8605Smrg blend_cntl |= S_028780_ALPHA_COMB_FCN(si_translate_blend_function(eqA)); 595848b8605Smrg blend_cntl |= S_028780_ALPHA_SRCBLEND(si_translate_blend_factor(srcA)); 596848b8605Smrg blend_cntl |= S_028780_ALPHA_DESTBLEND(si_translate_blend_factor(dstA)); 597848b8605Smrg } 598848b8605Smrg si_pm4_set_reg(pm4, R_028780_CB_BLEND0_CONTROL + i * 4, blend_cntl); 599b8e80941Smrg 600b8e80941Smrg blend->blend_enable_4bit |= 0xfu << (i * 4); 601b8e80941Smrg 602b8e80941Smrg /* This is only important for formats without alpha. */ 603b8e80941Smrg if (srcRGB == PIPE_BLENDFACTOR_SRC_ALPHA || 604b8e80941Smrg dstRGB == PIPE_BLENDFACTOR_SRC_ALPHA || 605b8e80941Smrg srcRGB == PIPE_BLENDFACTOR_SRC_ALPHA_SATURATE || 606b8e80941Smrg dstRGB == PIPE_BLENDFACTOR_SRC_ALPHA_SATURATE || 607b8e80941Smrg srcRGB == PIPE_BLENDFACTOR_INV_SRC_ALPHA || 608b8e80941Smrg dstRGB == PIPE_BLENDFACTOR_INV_SRC_ALPHA) 609b8e80941Smrg blend->need_src_alpha_4bit |= 0xfu << (i * 4); 610848b8605Smrg } 611848b8605Smrg 612848b8605Smrg if (blend->cb_target_mask) { 613848b8605Smrg color_control |= S_028808_MODE(mode); 614848b8605Smrg } else { 615848b8605Smrg color_control |= S_028808_MODE(V_028808_CB_DISABLE); 616848b8605Smrg } 617848b8605Smrg 618b8e80941Smrg if (sctx->screen->rbplus_allowed) { 619b8e80941Smrg /* Disable RB+ blend optimizations for dual source blending. 620b8e80941Smrg * Vulkan does this. 621b8e80941Smrg */ 622b8e80941Smrg if (blend->dual_src_blend) { 623b8e80941Smrg for (int i = 0; i < 8; i++) { 624b8e80941Smrg sx_mrt_blend_opt[i] = 625b8e80941Smrg S_028760_COLOR_COMB_FCN(V_028760_OPT_COMB_NONE) | 626b8e80941Smrg S_028760_ALPHA_COMB_FCN(V_028760_OPT_COMB_NONE); 627b8e80941Smrg } 628b8e80941Smrg } 629b8e80941Smrg 630b8e80941Smrg for (int i = 0; i < 8; i++) 631b8e80941Smrg si_pm4_set_reg(pm4, R_028760_SX_MRT0_BLEND_OPT + i * 4, 632b8e80941Smrg sx_mrt_blend_opt[i]); 633b8e80941Smrg 634b8e80941Smrg /* RB+ doesn't work with dual source blending, logic op, and RESOLVE. */ 635b8e80941Smrg if (blend->dual_src_blend || state->logicop_enable || 636b8e80941Smrg mode == V_028808_CB_RESOLVE) 637b8e80941Smrg color_control |= S_028808_DISABLE_DUAL_QUAD(1); 638b8e80941Smrg } 639b8e80941Smrg 640b8e80941Smrg si_pm4_set_reg(pm4, R_028808_CB_COLOR_CONTROL, color_control); 641848b8605Smrg return blend; 642848b8605Smrg} 643848b8605Smrg 644848b8605Smrgstatic void *si_create_blend_state(struct pipe_context *ctx, 645848b8605Smrg const struct pipe_blend_state *state) 646848b8605Smrg{ 647848b8605Smrg return si_create_blend_state_mode(ctx, state, V_028808_CB_NORMAL); 648848b8605Smrg} 649848b8605Smrg 650848b8605Smrgstatic void si_bind_blend_state(struct pipe_context *ctx, void *state) 651848b8605Smrg{ 652848b8605Smrg struct si_context *sctx = (struct si_context *)ctx; 653b8e80941Smrg struct si_state_blend *old_blend = sctx->queued.named.blend; 654b8e80941Smrg struct si_state_blend *blend = (struct si_state_blend *)state; 655b8e80941Smrg 656b8e80941Smrg if (!state) 657b8e80941Smrg return; 658b8e80941Smrg 659b8e80941Smrg si_pm4_bind_state(sctx, blend, state); 660b8e80941Smrg 661b8e80941Smrg if (!old_blend || 662b8e80941Smrg old_blend->cb_target_mask != blend->cb_target_mask || 663b8e80941Smrg old_blend->dual_src_blend != blend->dual_src_blend || 664b8e80941Smrg (old_blend->blend_enable_4bit != blend->blend_enable_4bit && 665b8e80941Smrg sctx->framebuffer.nr_samples >= 2 && 666b8e80941Smrg sctx->screen->dcc_msaa_allowed)) 667b8e80941Smrg si_mark_atom_dirty(sctx, &sctx->atoms.s.cb_render_state); 668b8e80941Smrg 669b8e80941Smrg if (!old_blend || 670b8e80941Smrg old_blend->cb_target_mask != blend->cb_target_mask || 671b8e80941Smrg old_blend->alpha_to_coverage != blend->alpha_to_coverage || 672b8e80941Smrg old_blend->alpha_to_one != blend->alpha_to_one || 673b8e80941Smrg old_blend->dual_src_blend != blend->dual_src_blend || 674b8e80941Smrg old_blend->blend_enable_4bit != blend->blend_enable_4bit || 675b8e80941Smrg old_blend->need_src_alpha_4bit != blend->need_src_alpha_4bit) 676b8e80941Smrg sctx->do_update_shaders = true; 677b8e80941Smrg 678b8e80941Smrg if (sctx->screen->dpbb_allowed && 679b8e80941Smrg (!old_blend || 680b8e80941Smrg old_blend->alpha_to_coverage != blend->alpha_to_coverage || 681b8e80941Smrg old_blend->blend_enable_4bit != blend->blend_enable_4bit || 682b8e80941Smrg old_blend->cb_target_enabled_4bit != blend->cb_target_enabled_4bit)) 683b8e80941Smrg si_mark_atom_dirty(sctx, &sctx->atoms.s.dpbb_state); 684b8e80941Smrg 685b8e80941Smrg if (sctx->screen->has_out_of_order_rast && 686b8e80941Smrg (!old_blend || 687b8e80941Smrg (old_blend->blend_enable_4bit != blend->blend_enable_4bit || 688b8e80941Smrg old_blend->cb_target_enabled_4bit != blend->cb_target_enabled_4bit || 689b8e80941Smrg old_blend->commutative_4bit != blend->commutative_4bit || 690b8e80941Smrg old_blend->logicop_enable != blend->logicop_enable))) 691b8e80941Smrg si_mark_atom_dirty(sctx, &sctx->atoms.s.msaa_config); 692848b8605Smrg} 693848b8605Smrg 694848b8605Smrgstatic void si_delete_blend_state(struct pipe_context *ctx, void *state) 695848b8605Smrg{ 696848b8605Smrg struct si_context *sctx = (struct si_context *)ctx; 697848b8605Smrg si_pm4_delete_state(sctx, blend, (struct si_state_blend *)state); 698848b8605Smrg} 699848b8605Smrg 700848b8605Smrgstatic void si_set_blend_color(struct pipe_context *ctx, 701848b8605Smrg const struct pipe_blend_color *state) 702848b8605Smrg{ 703848b8605Smrg struct si_context *sctx = (struct si_context *)ctx; 704b8e80941Smrg static const struct pipe_blend_color zeros; 705848b8605Smrg 706b8e80941Smrg sctx->blend_color.state = *state; 707b8e80941Smrg sctx->blend_color.any_nonzeros = memcmp(state, &zeros, sizeof(*state)) != 0; 708b8e80941Smrg si_mark_atom_dirty(sctx, &sctx->atoms.s.blend_color); 709b8e80941Smrg} 710848b8605Smrg 711b8e80941Smrgstatic void si_emit_blend_color(struct si_context *sctx) 712b8e80941Smrg{ 713b8e80941Smrg struct radeon_cmdbuf *cs = sctx->gfx_cs; 714848b8605Smrg 715b8e80941Smrg radeon_set_context_reg_seq(cs, R_028414_CB_BLEND_RED, 4); 716b8e80941Smrg radeon_emit_array(cs, (uint32_t*)sctx->blend_color.state.color, 4); 717848b8605Smrg} 718848b8605Smrg 719848b8605Smrg/* 720b8e80941Smrg * Clipping 721848b8605Smrg */ 722848b8605Smrg 723848b8605Smrgstatic void si_set_clip_state(struct pipe_context *ctx, 724848b8605Smrg const struct pipe_clip_state *state) 725848b8605Smrg{ 726848b8605Smrg struct si_context *sctx = (struct si_context *)ctx; 727848b8605Smrg struct pipe_constant_buffer cb; 728b8e80941Smrg static const struct pipe_clip_state zeros; 729848b8605Smrg 730b8e80941Smrg if (memcmp(&sctx->clip_state.state, state, sizeof(*state)) == 0) 731848b8605Smrg return; 732848b8605Smrg 733b8e80941Smrg sctx->clip_state.state = *state; 734b8e80941Smrg sctx->clip_state.any_nonzeros = memcmp(state, &zeros, sizeof(*state)) != 0; 735b8e80941Smrg si_mark_atom_dirty(sctx, &sctx->atoms.s.clip_state); 736848b8605Smrg 737848b8605Smrg cb.buffer = NULL; 738848b8605Smrg cb.user_buffer = state->ucp; 739848b8605Smrg cb.buffer_offset = 0; 740848b8605Smrg cb.buffer_size = 4*4*8; 741b8e80941Smrg si_set_rw_buffer(sctx, SI_VS_CONST_CLIP_PLANES, &cb); 742848b8605Smrg pipe_resource_reference(&cb.buffer, NULL); 743848b8605Smrg} 744848b8605Smrg 745b8e80941Smrgstatic void si_emit_clip_state(struct si_context *sctx) 746848b8605Smrg{ 747b8e80941Smrg struct radeon_cmdbuf *cs = sctx->gfx_cs; 748848b8605Smrg 749b8e80941Smrg radeon_set_context_reg_seq(cs, R_0285BC_PA_CL_UCP_0_X, 6*4); 750b8e80941Smrg radeon_emit_array(cs, (uint32_t*)sctx->clip_state.state.ucp, 6*4); 751848b8605Smrg} 752848b8605Smrg 753b8e80941Smrgstatic void si_emit_clip_regs(struct si_context *sctx) 754848b8605Smrg{ 755b8e80941Smrg struct si_shader *vs = si_get_vs_state(sctx); 756b8e80941Smrg struct si_shader_selector *vs_sel = vs->selector; 757b8e80941Smrg struct tgsi_shader_info *info = &vs_sel->info; 758b8e80941Smrg struct si_state_rasterizer *rs = sctx->queued.named.rasterizer; 759b8e80941Smrg unsigned window_space = 760b8e80941Smrg info->properties[TGSI_PROPERTY_VS_WINDOW_SPACE_POSITION]; 761b8e80941Smrg unsigned clipdist_mask = vs_sel->clipdist_mask; 762b8e80941Smrg unsigned ucp_mask = clipdist_mask ? 0 : rs->clip_plane_enable & SIX_BITS; 763b8e80941Smrg unsigned culldist_mask = vs_sel->culldist_mask; 764b8e80941Smrg unsigned total_mask; 765b8e80941Smrg 766b8e80941Smrg if (vs->key.opt.clip_disable) { 767b8e80941Smrg assert(!info->culldist_writemask); 768b8e80941Smrg clipdist_mask = 0; 769b8e80941Smrg culldist_mask = 0; 770b8e80941Smrg } 771b8e80941Smrg total_mask = clipdist_mask | culldist_mask; 772b8e80941Smrg 773b8e80941Smrg /* Clip distances on points have no effect, so need to be implemented 774b8e80941Smrg * as cull distances. This applies for the clipvertex case as well. 775b8e80941Smrg * 776b8e80941Smrg * Setting this for primitives other than points should have no adverse 777b8e80941Smrg * effects. 778b8e80941Smrg */ 779b8e80941Smrg clipdist_mask &= rs->clip_plane_enable; 780b8e80941Smrg culldist_mask |= clipdist_mask; 781b8e80941Smrg 782b8e80941Smrg unsigned initial_cdw = sctx->gfx_cs->current.cdw; 783b8e80941Smrg radeon_opt_set_context_reg(sctx, R_02881C_PA_CL_VS_OUT_CNTL, 784b8e80941Smrg SI_TRACKED_PA_CL_VS_OUT_CNTL, 785b8e80941Smrg vs_sel->pa_cl_vs_out_cntl | 786b8e80941Smrg S_02881C_VS_OUT_CCDIST0_VEC_ENA((total_mask & 0x0F) != 0) | 787b8e80941Smrg S_02881C_VS_OUT_CCDIST1_VEC_ENA((total_mask & 0xF0) != 0) | 788b8e80941Smrg clipdist_mask | (culldist_mask << 8)); 789b8e80941Smrg radeon_opt_set_context_reg(sctx, R_028810_PA_CL_CLIP_CNTL, 790b8e80941Smrg SI_TRACKED_PA_CL_CLIP_CNTL, 791b8e80941Smrg rs->pa_cl_clip_cntl | 792b8e80941Smrg ucp_mask | 793b8e80941Smrg S_028810_CLIP_DISABLE(window_space)); 794b8e80941Smrg 795b8e80941Smrg if (initial_cdw != sctx->gfx_cs->current.cdw) 796b8e80941Smrg sctx->context_roll = true; 797848b8605Smrg} 798848b8605Smrg 799848b8605Smrg/* 800848b8605Smrg * inferred state between framebuffer and rasterizer 801848b8605Smrg */ 802b8e80941Smrgstatic void si_update_poly_offset_state(struct si_context *sctx) 803848b8605Smrg{ 804848b8605Smrg struct si_state_rasterizer *rs = sctx->queued.named.rasterizer; 805848b8605Smrg 806b8e80941Smrg if (!rs || !rs->uses_poly_offset || !sctx->framebuffer.state.zsbuf) { 807b8e80941Smrg si_pm4_bind_state(sctx, poly_offset, NULL); 808848b8605Smrg return; 809b8e80941Smrg } 810848b8605Smrg 811b8e80941Smrg /* Use the user format, not db_render_format, so that the polygon 812b8e80941Smrg * offset behaves as expected by applications. 813b8e80941Smrg */ 814848b8605Smrg switch (sctx->framebuffer.state.zsbuf->texture->format) { 815b8e80941Smrg case PIPE_FORMAT_Z16_UNORM: 816b8e80941Smrg si_pm4_bind_state(sctx, poly_offset, &rs->pm4_poly_offset[0]); 817b8e80941Smrg break; 818b8e80941Smrg default: /* 24-bit */ 819b8e80941Smrg si_pm4_bind_state(sctx, poly_offset, &rs->pm4_poly_offset[1]); 820848b8605Smrg break; 821848b8605Smrg case PIPE_FORMAT_Z32_FLOAT: 822848b8605Smrg case PIPE_FORMAT_Z32_FLOAT_S8X24_UINT: 823b8e80941Smrg si_pm4_bind_state(sctx, poly_offset, &rs->pm4_poly_offset[2]); 824848b8605Smrg break; 825848b8605Smrg } 826848b8605Smrg} 827848b8605Smrg 828848b8605Smrg/* 829848b8605Smrg * Rasterizer 830848b8605Smrg */ 831848b8605Smrg 832848b8605Smrgstatic uint32_t si_translate_fill(uint32_t func) 833848b8605Smrg{ 834848b8605Smrg switch(func) { 835848b8605Smrg case PIPE_POLYGON_MODE_FILL: 836848b8605Smrg return V_028814_X_DRAW_TRIANGLES; 837848b8605Smrg case PIPE_POLYGON_MODE_LINE: 838848b8605Smrg return V_028814_X_DRAW_LINES; 839848b8605Smrg case PIPE_POLYGON_MODE_POINT: 840848b8605Smrg return V_028814_X_DRAW_POINTS; 841848b8605Smrg default: 842848b8605Smrg assert(0); 843848b8605Smrg return V_028814_X_DRAW_POINTS; 844848b8605Smrg } 845848b8605Smrg} 846848b8605Smrg 847848b8605Smrgstatic void *si_create_rs_state(struct pipe_context *ctx, 848848b8605Smrg const struct pipe_rasterizer_state *state) 849848b8605Smrg{ 850b8e80941Smrg struct si_screen *sscreen = ((struct si_context *)ctx)->screen; 851848b8605Smrg struct si_state_rasterizer *rs = CALLOC_STRUCT(si_state_rasterizer); 852848b8605Smrg struct si_pm4_state *pm4 = &rs->pm4; 853b8e80941Smrg unsigned tmp, i; 854848b8605Smrg float psize_min, psize_max; 855848b8605Smrg 856b8e80941Smrg if (!rs) { 857848b8605Smrg return NULL; 858848b8605Smrg } 859848b8605Smrg 860b8e80941Smrg rs->scissor_enable = state->scissor; 861b8e80941Smrg rs->clip_halfz = state->clip_halfz; 862848b8605Smrg rs->two_side = state->light_twoside; 863848b8605Smrg rs->multisample_enable = state->multisample; 864b8e80941Smrg rs->force_persample_interp = state->force_persample_interp; 865848b8605Smrg rs->clip_plane_enable = state->clip_plane_enable; 866b8e80941Smrg rs->half_pixel_center = state->half_pixel_center; 867848b8605Smrg rs->line_stipple_enable = state->line_stipple_enable; 868b8e80941Smrg rs->poly_stipple_enable = state->poly_stipple_enable; 869b8e80941Smrg rs->line_smooth = state->line_smooth; 870b8e80941Smrg rs->line_width = state->line_width; 871b8e80941Smrg rs->poly_smooth = state->poly_smooth; 872b8e80941Smrg rs->uses_poly_offset = state->offset_point || state->offset_line || 873b8e80941Smrg state->offset_tri; 874b8e80941Smrg rs->clamp_fragment_color = state->clamp_fragment_color; 875b8e80941Smrg rs->clamp_vertex_color = state->clamp_vertex_color; 876848b8605Smrg rs->flatshade = state->flatshade; 877848b8605Smrg rs->sprite_coord_enable = state->sprite_coord_enable; 878b8e80941Smrg rs->rasterizer_discard = state->rasterizer_discard; 879848b8605Smrg rs->pa_sc_line_stipple = state->line_stipple_enable ? 880848b8605Smrg S_028A0C_LINE_PATTERN(state->line_stipple_pattern) | 881848b8605Smrg S_028A0C_REPEAT_COUNT(state->line_stipple_factor) : 0; 882848b8605Smrg rs->pa_cl_clip_cntl = 883b8e80941Smrg S_028810_DX_CLIP_SPACE_DEF(state->clip_halfz) | 884b8e80941Smrg S_028810_ZCLIP_NEAR_DISABLE(!state->depth_clip_near) | 885b8e80941Smrg S_028810_ZCLIP_FAR_DISABLE(!state->depth_clip_far) | 886848b8605Smrg S_028810_DX_RASTERIZATION_KILL(state->rasterizer_discard) | 887848b8605Smrg S_028810_DX_LINEAR_ATTR_CLIP_ENA(1); 888848b8605Smrg 889b8e80941Smrg si_pm4_set_reg(pm4, R_0286D4_SPI_INTERP_CONTROL_0, 890b8e80941Smrg S_0286D4_FLAT_SHADE_ENA(1) | 891b8e80941Smrg S_0286D4_PNT_SPRITE_ENA(state->point_quad_rasterization) | 892b8e80941Smrg S_0286D4_PNT_SPRITE_OVRD_X(V_0286D4_SPI_PNT_SPRITE_SEL_S) | 893b8e80941Smrg S_0286D4_PNT_SPRITE_OVRD_Y(V_0286D4_SPI_PNT_SPRITE_SEL_T) | 894b8e80941Smrg S_0286D4_PNT_SPRITE_OVRD_Z(V_0286D4_SPI_PNT_SPRITE_SEL_0) | 895b8e80941Smrg S_0286D4_PNT_SPRITE_OVRD_W(V_0286D4_SPI_PNT_SPRITE_SEL_1) | 896b8e80941Smrg S_0286D4_PNT_SPRITE_TOP_1(state->sprite_coord_mode != PIPE_SPRITE_COORD_UPPER_LEFT)); 897848b8605Smrg 898848b8605Smrg /* point size 12.4 fixed point */ 899848b8605Smrg tmp = (unsigned)(state->point_size * 8.0); 900848b8605Smrg si_pm4_set_reg(pm4, R_028A00_PA_SU_POINT_SIZE, S_028A00_HEIGHT(tmp) | S_028A00_WIDTH(tmp)); 901848b8605Smrg 902848b8605Smrg if (state->point_size_per_vertex) { 903848b8605Smrg psize_min = util_get_min_point_size(state); 904b8e80941Smrg psize_max = SI_MAX_POINT_SIZE; 905848b8605Smrg } else { 906848b8605Smrg /* Force the point size to be as if the vertex output was disabled. */ 907848b8605Smrg psize_min = state->point_size; 908848b8605Smrg psize_max = state->point_size; 909848b8605Smrg } 910b8e80941Smrg rs->max_point_size = psize_max; 911b8e80941Smrg 912848b8605Smrg /* Divide by two, because 0.5 = 1 pixel. */ 913848b8605Smrg si_pm4_set_reg(pm4, R_028A04_PA_SU_POINT_MINMAX, 914848b8605Smrg S_028A04_MIN_SIZE(si_pack_float_12p4(psize_min/2)) | 915848b8605Smrg S_028A04_MAX_SIZE(si_pack_float_12p4(psize_max/2))); 916848b8605Smrg 917b8e80941Smrg si_pm4_set_reg(pm4, R_028A08_PA_SU_LINE_CNTL, 918b8e80941Smrg S_028A08_WIDTH(si_pack_float_12p4(state->line_width/2))); 919848b8605Smrg si_pm4_set_reg(pm4, R_028A48_PA_SC_MODE_CNTL_0, 920848b8605Smrg S_028A48_LINE_STIPPLE_ENABLE(state->line_stipple_enable) | 921b8e80941Smrg S_028A48_MSAA_ENABLE(state->multisample || 922b8e80941Smrg state->poly_smooth || 923b8e80941Smrg state->line_smooth) | 924b8e80941Smrg S_028A48_VPORT_SCISSOR_ENABLE(1) | 925b8e80941Smrg S_028A48_ALTERNATE_RBS_PER_TILE(sscreen->info.chip_class >= GFX9)); 926848b8605Smrg 927848b8605Smrg si_pm4_set_reg(pm4, R_028B7C_PA_SU_POLY_OFFSET_CLAMP, fui(state->offset_clamp)); 928b8e80941Smrg si_pm4_set_reg(pm4, R_028814_PA_SU_SC_MODE_CNTL, 929b8e80941Smrg S_028814_PROVOKING_VTX_LAST(!state->flatshade_first) | 930b8e80941Smrg S_028814_CULL_FRONT((state->cull_face & PIPE_FACE_FRONT) ? 1 : 0) | 931b8e80941Smrg S_028814_CULL_BACK((state->cull_face & PIPE_FACE_BACK) ? 1 : 0) | 932b8e80941Smrg S_028814_FACE(!state->front_ccw) | 933b8e80941Smrg S_028814_POLY_OFFSET_FRONT_ENABLE(util_get_offset(state, state->fill_front)) | 934b8e80941Smrg S_028814_POLY_OFFSET_BACK_ENABLE(util_get_offset(state, state->fill_back)) | 935b8e80941Smrg S_028814_POLY_OFFSET_PARA_ENABLE(state->offset_point || state->offset_line) | 936b8e80941Smrg S_028814_POLY_MODE(state->fill_front != PIPE_POLYGON_MODE_FILL || 937b8e80941Smrg state->fill_back != PIPE_POLYGON_MODE_FILL) | 938b8e80941Smrg S_028814_POLYMODE_FRONT_PTYPE(si_translate_fill(state->fill_front)) | 939b8e80941Smrg S_028814_POLYMODE_BACK_PTYPE(si_translate_fill(state->fill_back))); 940b8e80941Smrg 941b8e80941Smrg if (!rs->uses_poly_offset) 942b8e80941Smrg return rs; 943b8e80941Smrg 944b8e80941Smrg rs->pm4_poly_offset = CALLOC(3, sizeof(struct si_pm4_state)); 945b8e80941Smrg if (!rs->pm4_poly_offset) { 946b8e80941Smrg FREE(rs); 947b8e80941Smrg return NULL; 948b8e80941Smrg } 949b8e80941Smrg 950b8e80941Smrg /* Precalculate polygon offset states for 16-bit, 24-bit, and 32-bit zbuffers. */ 951b8e80941Smrg for (i = 0; i < 3; i++) { 952b8e80941Smrg struct si_pm4_state *pm4 = &rs->pm4_poly_offset[i]; 953b8e80941Smrg float offset_units = state->offset_units; 954b8e80941Smrg float offset_scale = state->offset_scale * 16.0f; 955b8e80941Smrg uint32_t pa_su_poly_offset_db_fmt_cntl = 0; 956b8e80941Smrg 957b8e80941Smrg if (!state->offset_units_unscaled) { 958b8e80941Smrg switch (i) { 959b8e80941Smrg case 0: /* 16-bit zbuffer */ 960b8e80941Smrg offset_units *= 4.0f; 961b8e80941Smrg pa_su_poly_offset_db_fmt_cntl = 962b8e80941Smrg S_028B78_POLY_OFFSET_NEG_NUM_DB_BITS(-16); 963b8e80941Smrg break; 964b8e80941Smrg case 1: /* 24-bit zbuffer */ 965b8e80941Smrg offset_units *= 2.0f; 966b8e80941Smrg pa_su_poly_offset_db_fmt_cntl = 967b8e80941Smrg S_028B78_POLY_OFFSET_NEG_NUM_DB_BITS(-24); 968b8e80941Smrg break; 969b8e80941Smrg case 2: /* 32-bit zbuffer */ 970b8e80941Smrg offset_units *= 1.0f; 971b8e80941Smrg pa_su_poly_offset_db_fmt_cntl = S_028B78_POLY_OFFSET_NEG_NUM_DB_BITS(-23) | 972b8e80941Smrg S_028B78_POLY_OFFSET_DB_IS_FLOAT_FMT(1); 973b8e80941Smrg break; 974b8e80941Smrg } 975b8e80941Smrg } 976b8e80941Smrg 977b8e80941Smrg si_pm4_set_reg(pm4, R_028B80_PA_SU_POLY_OFFSET_FRONT_SCALE, 978b8e80941Smrg fui(offset_scale)); 979b8e80941Smrg si_pm4_set_reg(pm4, R_028B84_PA_SU_POLY_OFFSET_FRONT_OFFSET, 980b8e80941Smrg fui(offset_units)); 981b8e80941Smrg si_pm4_set_reg(pm4, R_028B88_PA_SU_POLY_OFFSET_BACK_SCALE, 982b8e80941Smrg fui(offset_scale)); 983b8e80941Smrg si_pm4_set_reg(pm4, R_028B8C_PA_SU_POLY_OFFSET_BACK_OFFSET, 984b8e80941Smrg fui(offset_units)); 985b8e80941Smrg si_pm4_set_reg(pm4, R_028B78_PA_SU_POLY_OFFSET_DB_FMT_CNTL, 986b8e80941Smrg pa_su_poly_offset_db_fmt_cntl); 987b8e80941Smrg } 988848b8605Smrg 989848b8605Smrg return rs; 990848b8605Smrg} 991848b8605Smrg 992848b8605Smrgstatic void si_bind_rs_state(struct pipe_context *ctx, void *state) 993848b8605Smrg{ 994848b8605Smrg struct si_context *sctx = (struct si_context *)ctx; 995b8e80941Smrg struct si_state_rasterizer *old_rs = 996b8e80941Smrg (struct si_state_rasterizer*)sctx->queued.named.rasterizer; 997848b8605Smrg struct si_state_rasterizer *rs = (struct si_state_rasterizer *)state; 998848b8605Smrg 999b8e80941Smrg if (!state) 1000848b8605Smrg return; 1001848b8605Smrg 1002b8e80941Smrg if (!old_rs || old_rs->multisample_enable != rs->multisample_enable) { 1003b8e80941Smrg si_mark_atom_dirty(sctx, &sctx->atoms.s.db_render_state); 1004b8e80941Smrg 1005b8e80941Smrg /* Update the small primitive filter workaround if necessary. */ 1006b8e80941Smrg if (sctx->screen->has_msaa_sample_loc_bug && 1007b8e80941Smrg sctx->framebuffer.nr_samples > 1) 1008b8e80941Smrg si_mark_atom_dirty(sctx, &sctx->atoms.s.msaa_sample_locs); 1009b8e80941Smrg } 1010b8e80941Smrg 1011b8e80941Smrg sctx->current_vs_state &= C_VS_STATE_CLAMP_VERTEX_COLOR; 1012b8e80941Smrg sctx->current_vs_state |= S_VS_STATE_CLAMP_VERTEX_COLOR(rs->clamp_vertex_color); 1013848b8605Smrg 1014848b8605Smrg si_pm4_bind_state(sctx, rasterizer, rs); 1015b8e80941Smrg si_update_poly_offset_state(sctx); 1016b8e80941Smrg 1017b8e80941Smrg if (!old_rs || 1018b8e80941Smrg old_rs->scissor_enable != rs->scissor_enable) 1019b8e80941Smrg si_mark_atom_dirty(sctx, &sctx->atoms.s.scissors); 1020b8e80941Smrg 1021b8e80941Smrg if (!old_rs || 1022b8e80941Smrg old_rs->line_width != rs->line_width || 1023b8e80941Smrg old_rs->max_point_size != rs->max_point_size || 1024b8e80941Smrg old_rs->half_pixel_center != rs->half_pixel_center) 1025b8e80941Smrg si_mark_atom_dirty(sctx, &sctx->atoms.s.guardband); 1026b8e80941Smrg 1027b8e80941Smrg if (!old_rs || 1028b8e80941Smrg old_rs->clip_halfz != rs->clip_halfz) 1029b8e80941Smrg si_mark_atom_dirty(sctx, &sctx->atoms.s.viewports); 1030b8e80941Smrg 1031b8e80941Smrg if (!old_rs || 1032b8e80941Smrg old_rs->clip_plane_enable != rs->clip_plane_enable || 1033b8e80941Smrg old_rs->pa_cl_clip_cntl != rs->pa_cl_clip_cntl) 1034b8e80941Smrg si_mark_atom_dirty(sctx, &sctx->atoms.s.clip_regs); 1035b8e80941Smrg 1036b8e80941Smrg sctx->ia_multi_vgt_param_key.u.line_stipple_enabled = 1037b8e80941Smrg rs->line_stipple_enable; 1038b8e80941Smrg 1039b8e80941Smrg if (!old_rs || 1040b8e80941Smrg old_rs->clip_plane_enable != rs->clip_plane_enable || 1041b8e80941Smrg old_rs->rasterizer_discard != rs->rasterizer_discard || 1042b8e80941Smrg old_rs->sprite_coord_enable != rs->sprite_coord_enable || 1043b8e80941Smrg old_rs->flatshade != rs->flatshade || 1044b8e80941Smrg old_rs->two_side != rs->two_side || 1045b8e80941Smrg old_rs->multisample_enable != rs->multisample_enable || 1046b8e80941Smrg old_rs->poly_stipple_enable != rs->poly_stipple_enable || 1047b8e80941Smrg old_rs->poly_smooth != rs->poly_smooth || 1048b8e80941Smrg old_rs->line_smooth != rs->line_smooth || 1049b8e80941Smrg old_rs->clamp_fragment_color != rs->clamp_fragment_color || 1050b8e80941Smrg old_rs->force_persample_interp != rs->force_persample_interp) 1051b8e80941Smrg sctx->do_update_shaders = true; 1052848b8605Smrg} 1053848b8605Smrg 1054848b8605Smrgstatic void si_delete_rs_state(struct pipe_context *ctx, void *state) 1055848b8605Smrg{ 1056848b8605Smrg struct si_context *sctx = (struct si_context *)ctx; 1057b8e80941Smrg struct si_state_rasterizer *rs = (struct si_state_rasterizer *)state; 1058b8e80941Smrg 1059b8e80941Smrg if (sctx->queued.named.rasterizer == state) 1060b8e80941Smrg si_pm4_bind_state(sctx, poly_offset, NULL); 1061b8e80941Smrg 1062b8e80941Smrg FREE(rs->pm4_poly_offset); 1063b8e80941Smrg si_pm4_delete_state(sctx, rasterizer, rs); 1064848b8605Smrg} 1065848b8605Smrg 1066848b8605Smrg/* 1067848b8605Smrg * infeered state between dsa and stencil ref 1068848b8605Smrg */ 1069b8e80941Smrgstatic void si_emit_stencil_ref(struct si_context *sctx) 1070848b8605Smrg{ 1071b8e80941Smrg struct radeon_cmdbuf *cs = sctx->gfx_cs; 1072b8e80941Smrg struct pipe_stencil_ref *ref = &sctx->stencil_ref.state; 1073b8e80941Smrg struct si_dsa_stencil_ref_part *dsa = &sctx->stencil_ref.dsa_part; 1074b8e80941Smrg 1075b8e80941Smrg radeon_set_context_reg_seq(cs, R_028430_DB_STENCILREFMASK, 2); 1076b8e80941Smrg radeon_emit(cs, S_028430_STENCILTESTVAL(ref->ref_value[0]) | 1077b8e80941Smrg S_028430_STENCILMASK(dsa->valuemask[0]) | 1078b8e80941Smrg S_028430_STENCILWRITEMASK(dsa->writemask[0]) | 1079b8e80941Smrg S_028430_STENCILOPVAL(1)); 1080b8e80941Smrg radeon_emit(cs, S_028434_STENCILTESTVAL_BF(ref->ref_value[1]) | 1081b8e80941Smrg S_028434_STENCILMASK_BF(dsa->valuemask[1]) | 1082b8e80941Smrg S_028434_STENCILWRITEMASK_BF(dsa->writemask[1]) | 1083b8e80941Smrg S_028434_STENCILOPVAL_BF(1)); 1084848b8605Smrg} 1085848b8605Smrg 1086b8e80941Smrgstatic void si_set_stencil_ref(struct pipe_context *ctx, 1087b8e80941Smrg const struct pipe_stencil_ref *state) 1088848b8605Smrg{ 1089848b8605Smrg struct si_context *sctx = (struct si_context *)ctx; 1090b8e80941Smrg 1091b8e80941Smrg if (memcmp(&sctx->stencil_ref.state, state, sizeof(*state)) == 0) 1092b8e80941Smrg return; 1093b8e80941Smrg 1094b8e80941Smrg sctx->stencil_ref.state = *state; 1095b8e80941Smrg si_mark_atom_dirty(sctx, &sctx->atoms.s.stencil_ref); 1096848b8605Smrg} 1097848b8605Smrg 1098848b8605Smrg 1099848b8605Smrg/* 1100848b8605Smrg * DSA 1101848b8605Smrg */ 1102848b8605Smrg 1103848b8605Smrgstatic uint32_t si_translate_stencil_op(int s_op) 1104848b8605Smrg{ 1105848b8605Smrg switch (s_op) { 1106848b8605Smrg case PIPE_STENCIL_OP_KEEP: 1107848b8605Smrg return V_02842C_STENCIL_KEEP; 1108848b8605Smrg case PIPE_STENCIL_OP_ZERO: 1109848b8605Smrg return V_02842C_STENCIL_ZERO; 1110848b8605Smrg case PIPE_STENCIL_OP_REPLACE: 1111848b8605Smrg return V_02842C_STENCIL_REPLACE_TEST; 1112848b8605Smrg case PIPE_STENCIL_OP_INCR: 1113848b8605Smrg return V_02842C_STENCIL_ADD_CLAMP; 1114848b8605Smrg case PIPE_STENCIL_OP_DECR: 1115848b8605Smrg return V_02842C_STENCIL_SUB_CLAMP; 1116848b8605Smrg case PIPE_STENCIL_OP_INCR_WRAP: 1117848b8605Smrg return V_02842C_STENCIL_ADD_WRAP; 1118848b8605Smrg case PIPE_STENCIL_OP_DECR_WRAP: 1119848b8605Smrg return V_02842C_STENCIL_SUB_WRAP; 1120848b8605Smrg case PIPE_STENCIL_OP_INVERT: 1121848b8605Smrg return V_02842C_STENCIL_INVERT; 1122848b8605Smrg default: 1123b8e80941Smrg PRINT_ERR("Unknown stencil op %d", s_op); 1124848b8605Smrg assert(0); 1125848b8605Smrg break; 1126848b8605Smrg } 1127848b8605Smrg return 0; 1128848b8605Smrg} 1129848b8605Smrg 1130b8e80941Smrgstatic bool si_dsa_writes_stencil(const struct pipe_stencil_state *s) 1131848b8605Smrg{ 1132b8e80941Smrg return s->enabled && s->writemask && 1133b8e80941Smrg (s->fail_op != PIPE_STENCIL_OP_KEEP || 1134b8e80941Smrg s->zfail_op != PIPE_STENCIL_OP_KEEP || 1135b8e80941Smrg s->zpass_op != PIPE_STENCIL_OP_KEEP); 1136b8e80941Smrg} 1137b8e80941Smrg 1138b8e80941Smrgstatic bool si_order_invariant_stencil_op(enum pipe_stencil_op op) 1139b8e80941Smrg{ 1140b8e80941Smrg /* REPLACE is normally order invariant, except when the stencil 1141b8e80941Smrg * reference value is written by the fragment shader. Tracking this 1142b8e80941Smrg * interaction does not seem worth the effort, so be conservative. */ 1143b8e80941Smrg return op != PIPE_STENCIL_OP_INCR && 1144b8e80941Smrg op != PIPE_STENCIL_OP_DECR && 1145b8e80941Smrg op != PIPE_STENCIL_OP_REPLACE; 1146b8e80941Smrg} 1147b8e80941Smrg 1148b8e80941Smrg/* Compute whether, assuming Z writes are disabled, this stencil state is order 1149b8e80941Smrg * invariant in the sense that the set of passing fragments as well as the 1150b8e80941Smrg * final stencil buffer result does not depend on the order of fragments. */ 1151b8e80941Smrgstatic bool si_order_invariant_stencil_state(const struct pipe_stencil_state *state) 1152b8e80941Smrg{ 1153b8e80941Smrg return !state->enabled || !state->writemask || 1154b8e80941Smrg /* The following assumes that Z writes are disabled. */ 1155b8e80941Smrg (state->func == PIPE_FUNC_ALWAYS && 1156b8e80941Smrg si_order_invariant_stencil_op(state->zpass_op) && 1157b8e80941Smrg si_order_invariant_stencil_op(state->zfail_op)) || 1158b8e80941Smrg (state->func == PIPE_FUNC_NEVER && 1159b8e80941Smrg si_order_invariant_stencil_op(state->fail_op)); 1160b8e80941Smrg} 1161b8e80941Smrg 1162b8e80941Smrgstatic void *si_create_dsa_state(struct pipe_context *ctx, 1163b8e80941Smrg const struct pipe_depth_stencil_alpha_state *state) 1164b8e80941Smrg{ 1165b8e80941Smrg struct si_context *sctx = (struct si_context *)ctx; 1166b8e80941Smrg struct si_state_dsa *dsa = CALLOC_STRUCT(si_state_dsa); 1167848b8605Smrg struct si_pm4_state *pm4 = &dsa->pm4; 1168848b8605Smrg unsigned db_depth_control; 1169848b8605Smrg uint32_t db_stencil_control = 0; 1170848b8605Smrg 1171b8e80941Smrg if (!dsa) { 1172848b8605Smrg return NULL; 1173848b8605Smrg } 1174848b8605Smrg 1175b8e80941Smrg dsa->stencil_ref.valuemask[0] = state->stencil[0].valuemask; 1176b8e80941Smrg dsa->stencil_ref.valuemask[1] = state->stencil[1].valuemask; 1177b8e80941Smrg dsa->stencil_ref.writemask[0] = state->stencil[0].writemask; 1178b8e80941Smrg dsa->stencil_ref.writemask[1] = state->stencil[1].writemask; 1179848b8605Smrg 1180848b8605Smrg db_depth_control = S_028800_Z_ENABLE(state->depth.enabled) | 1181848b8605Smrg S_028800_Z_WRITE_ENABLE(state->depth.writemask) | 1182b8e80941Smrg S_028800_ZFUNC(state->depth.func) | 1183b8e80941Smrg S_028800_DEPTH_BOUNDS_ENABLE(state->depth.bounds_test); 1184848b8605Smrg 1185848b8605Smrg /* stencil */ 1186848b8605Smrg if (state->stencil[0].enabled) { 1187848b8605Smrg db_depth_control |= S_028800_STENCIL_ENABLE(1); 1188848b8605Smrg db_depth_control |= S_028800_STENCILFUNC(state->stencil[0].func); 1189848b8605Smrg db_stencil_control |= S_02842C_STENCILFAIL(si_translate_stencil_op(state->stencil[0].fail_op)); 1190848b8605Smrg db_stencil_control |= S_02842C_STENCILZPASS(si_translate_stencil_op(state->stencil[0].zpass_op)); 1191848b8605Smrg db_stencil_control |= S_02842C_STENCILZFAIL(si_translate_stencil_op(state->stencil[0].zfail_op)); 1192848b8605Smrg 1193848b8605Smrg if (state->stencil[1].enabled) { 1194848b8605Smrg db_depth_control |= S_028800_BACKFACE_ENABLE(1); 1195848b8605Smrg db_depth_control |= S_028800_STENCILFUNC_BF(state->stencil[1].func); 1196848b8605Smrg db_stencil_control |= S_02842C_STENCILFAIL_BF(si_translate_stencil_op(state->stencil[1].fail_op)); 1197848b8605Smrg db_stencil_control |= S_02842C_STENCILZPASS_BF(si_translate_stencil_op(state->stencil[1].zpass_op)); 1198848b8605Smrg db_stencil_control |= S_02842C_STENCILZFAIL_BF(si_translate_stencil_op(state->stencil[1].zfail_op)); 1199848b8605Smrg } 1200848b8605Smrg } 1201848b8605Smrg 1202848b8605Smrg /* alpha */ 1203848b8605Smrg if (state->alpha.enabled) { 1204848b8605Smrg dsa->alpha_func = state->alpha.func; 1205848b8605Smrg 1206848b8605Smrg si_pm4_set_reg(pm4, R_00B030_SPI_SHADER_USER_DATA_PS_0 + 1207b8e80941Smrg SI_SGPR_ALPHA_REF * 4, fui(state->alpha.ref_value)); 1208848b8605Smrg } else { 1209848b8605Smrg dsa->alpha_func = PIPE_FUNC_ALWAYS; 1210848b8605Smrg } 1211848b8605Smrg 1212848b8605Smrg si_pm4_set_reg(pm4, R_028800_DB_DEPTH_CONTROL, db_depth_control); 1213b8e80941Smrg if (state->stencil[0].enabled) 1214b8e80941Smrg si_pm4_set_reg(pm4, R_02842C_DB_STENCIL_CONTROL, db_stencil_control); 1215b8e80941Smrg if (state->depth.bounds_test) { 1216b8e80941Smrg si_pm4_set_reg(pm4, R_028020_DB_DEPTH_BOUNDS_MIN, fui(state->depth.bounds_min)); 1217b8e80941Smrg si_pm4_set_reg(pm4, R_028024_DB_DEPTH_BOUNDS_MAX, fui(state->depth.bounds_max)); 1218b8e80941Smrg } 1219b8e80941Smrg 1220b8e80941Smrg dsa->depth_enabled = state->depth.enabled; 1221b8e80941Smrg dsa->depth_write_enabled = state->depth.enabled && 1222b8e80941Smrg state->depth.writemask; 1223b8e80941Smrg dsa->stencil_enabled = state->stencil[0].enabled; 1224b8e80941Smrg dsa->stencil_write_enabled = state->stencil[0].enabled && 1225b8e80941Smrg (si_dsa_writes_stencil(&state->stencil[0]) || 1226b8e80941Smrg si_dsa_writes_stencil(&state->stencil[1])); 1227b8e80941Smrg dsa->db_can_write = dsa->depth_write_enabled || 1228b8e80941Smrg dsa->stencil_write_enabled; 1229b8e80941Smrg 1230b8e80941Smrg bool zfunc_is_ordered = 1231b8e80941Smrg state->depth.func == PIPE_FUNC_NEVER || 1232b8e80941Smrg state->depth.func == PIPE_FUNC_LESS || 1233b8e80941Smrg state->depth.func == PIPE_FUNC_LEQUAL || 1234b8e80941Smrg state->depth.func == PIPE_FUNC_GREATER || 1235b8e80941Smrg state->depth.func == PIPE_FUNC_GEQUAL; 1236b8e80941Smrg 1237b8e80941Smrg bool nozwrite_and_order_invariant_stencil = 1238b8e80941Smrg !dsa->db_can_write || 1239b8e80941Smrg (!dsa->depth_write_enabled && 1240b8e80941Smrg si_order_invariant_stencil_state(&state->stencil[0]) && 1241b8e80941Smrg si_order_invariant_stencil_state(&state->stencil[1])); 1242b8e80941Smrg 1243b8e80941Smrg dsa->order_invariance[1].zs = 1244b8e80941Smrg nozwrite_and_order_invariant_stencil || 1245b8e80941Smrg (!dsa->stencil_write_enabled && zfunc_is_ordered); 1246b8e80941Smrg dsa->order_invariance[0].zs = !dsa->depth_write_enabled || zfunc_is_ordered; 1247b8e80941Smrg 1248b8e80941Smrg dsa->order_invariance[1].pass_set = 1249b8e80941Smrg nozwrite_and_order_invariant_stencil || 1250b8e80941Smrg (!dsa->stencil_write_enabled && 1251b8e80941Smrg (state->depth.func == PIPE_FUNC_ALWAYS || 1252b8e80941Smrg state->depth.func == PIPE_FUNC_NEVER)); 1253b8e80941Smrg dsa->order_invariance[0].pass_set = 1254b8e80941Smrg !dsa->depth_write_enabled || 1255b8e80941Smrg (state->depth.func == PIPE_FUNC_ALWAYS || 1256b8e80941Smrg state->depth.func == PIPE_FUNC_NEVER); 1257b8e80941Smrg 1258b8e80941Smrg dsa->order_invariance[1].pass_last = 1259b8e80941Smrg sctx->screen->assume_no_z_fights && 1260b8e80941Smrg !dsa->stencil_write_enabled && 1261b8e80941Smrg dsa->depth_write_enabled && zfunc_is_ordered; 1262b8e80941Smrg dsa->order_invariance[0].pass_last = 1263b8e80941Smrg sctx->screen->assume_no_z_fights && 1264b8e80941Smrg dsa->depth_write_enabled && zfunc_is_ordered; 1265848b8605Smrg 1266848b8605Smrg return dsa; 1267848b8605Smrg} 1268848b8605Smrg 1269848b8605Smrgstatic void si_bind_dsa_state(struct pipe_context *ctx, void *state) 1270848b8605Smrg{ 1271848b8605Smrg struct si_context *sctx = (struct si_context *)ctx; 1272b8e80941Smrg struct si_state_dsa *old_dsa = sctx->queued.named.dsa; 1273848b8605Smrg struct si_state_dsa *dsa = state; 1274848b8605Smrg 1275b8e80941Smrg if (!state) 1276848b8605Smrg return; 1277848b8605Smrg 1278848b8605Smrg si_pm4_bind_state(sctx, dsa, dsa); 1279b8e80941Smrg 1280b8e80941Smrg if (memcmp(&dsa->stencil_ref, &sctx->stencil_ref.dsa_part, 1281b8e80941Smrg sizeof(struct si_dsa_stencil_ref_part)) != 0) { 1282b8e80941Smrg sctx->stencil_ref.dsa_part = dsa->stencil_ref; 1283b8e80941Smrg si_mark_atom_dirty(sctx, &sctx->atoms.s.stencil_ref); 1284b8e80941Smrg } 1285b8e80941Smrg 1286b8e80941Smrg if (!old_dsa || old_dsa->alpha_func != dsa->alpha_func) 1287b8e80941Smrg sctx->do_update_shaders = true; 1288b8e80941Smrg 1289b8e80941Smrg if (sctx->screen->dpbb_allowed && 1290b8e80941Smrg (!old_dsa || 1291b8e80941Smrg (old_dsa->depth_enabled != dsa->depth_enabled || 1292b8e80941Smrg old_dsa->stencil_enabled != dsa->stencil_enabled || 1293b8e80941Smrg old_dsa->db_can_write != dsa->db_can_write))) 1294b8e80941Smrg si_mark_atom_dirty(sctx, &sctx->atoms.s.dpbb_state); 1295b8e80941Smrg 1296b8e80941Smrg if (sctx->screen->has_out_of_order_rast && 1297b8e80941Smrg (!old_dsa || 1298b8e80941Smrg memcmp(old_dsa->order_invariance, dsa->order_invariance, 1299b8e80941Smrg sizeof(old_dsa->order_invariance)))) 1300b8e80941Smrg si_mark_atom_dirty(sctx, &sctx->atoms.s.msaa_config); 1301848b8605Smrg} 1302848b8605Smrg 1303848b8605Smrgstatic void si_delete_dsa_state(struct pipe_context *ctx, void *state) 1304848b8605Smrg{ 1305848b8605Smrg struct si_context *sctx = (struct si_context *)ctx; 1306848b8605Smrg si_pm4_delete_state(sctx, dsa, (struct si_state_dsa *)state); 1307848b8605Smrg} 1308848b8605Smrg 1309b8e80941Smrgstatic void *si_create_db_flush_dsa(struct si_context *sctx) 1310b8e80941Smrg{ 1311b8e80941Smrg struct pipe_depth_stencil_alpha_state dsa = {}; 1312b8e80941Smrg 1313b8e80941Smrg return sctx->b.create_depth_stencil_alpha_state(&sctx->b, &dsa); 1314b8e80941Smrg} 1315b8e80941Smrg 1316b8e80941Smrg/* DB RENDER STATE */ 1317b8e80941Smrg 1318b8e80941Smrgstatic void si_set_active_query_state(struct pipe_context *ctx, boolean enable) 1319b8e80941Smrg{ 1320b8e80941Smrg struct si_context *sctx = (struct si_context*)ctx; 1321b8e80941Smrg 1322b8e80941Smrg /* Pipeline stat & streamout queries. */ 1323b8e80941Smrg if (enable) { 1324b8e80941Smrg sctx->flags &= ~SI_CONTEXT_STOP_PIPELINE_STATS; 1325b8e80941Smrg sctx->flags |= SI_CONTEXT_START_PIPELINE_STATS; 1326b8e80941Smrg } else { 1327b8e80941Smrg sctx->flags &= ~SI_CONTEXT_START_PIPELINE_STATS; 1328b8e80941Smrg sctx->flags |= SI_CONTEXT_STOP_PIPELINE_STATS; 1329b8e80941Smrg } 1330b8e80941Smrg 1331b8e80941Smrg /* Occlusion queries. */ 1332b8e80941Smrg if (sctx->occlusion_queries_disabled != !enable) { 1333b8e80941Smrg sctx->occlusion_queries_disabled = !enable; 1334b8e80941Smrg si_mark_atom_dirty(sctx, &sctx->atoms.s.db_render_state); 1335b8e80941Smrg } 1336b8e80941Smrg} 1337b8e80941Smrg 1338b8e80941Smrgvoid si_set_occlusion_query_state(struct si_context *sctx, 1339b8e80941Smrg bool old_perfect_enable) 1340848b8605Smrg{ 1341b8e80941Smrg si_mark_atom_dirty(sctx, &sctx->atoms.s.db_render_state); 1342848b8605Smrg 1343b8e80941Smrg bool perfect_enable = sctx->num_perfect_occlusion_queries != 0; 1344848b8605Smrg 1345b8e80941Smrg if (perfect_enable != old_perfect_enable) 1346b8e80941Smrg si_mark_atom_dirty(sctx, &sctx->atoms.s.msaa_config); 1347b8e80941Smrg} 1348b8e80941Smrg 1349b8e80941Smrgvoid si_save_qbo_state(struct si_context *sctx, struct si_qbo_state *st) 1350b8e80941Smrg{ 1351b8e80941Smrg st->saved_compute = sctx->cs_shader_state.program; 1352b8e80941Smrg 1353b8e80941Smrg si_get_pipe_constant_buffer(sctx, PIPE_SHADER_COMPUTE, 0, &st->saved_const0); 1354b8e80941Smrg si_get_shader_buffers(sctx, PIPE_SHADER_COMPUTE, 0, 3, st->saved_ssbo); 1355b8e80941Smrg 1356b8e80941Smrg st->saved_ssbo_writable_mask = 0; 1357b8e80941Smrg 1358b8e80941Smrg for (unsigned i = 0; i < 3; i++) { 1359b8e80941Smrg if (sctx->const_and_shader_buffers[PIPE_SHADER_COMPUTE].writable_mask & 1360b8e80941Smrg (1u << si_get_shaderbuf_slot(i))) 1361b8e80941Smrg st->saved_ssbo_writable_mask |= 1 << i; 1362b8e80941Smrg } 1363b8e80941Smrg} 1364b8e80941Smrg 1365b8e80941Smrgstatic void si_emit_db_render_state(struct si_context *sctx) 1366b8e80941Smrg{ 1367b8e80941Smrg struct si_state_rasterizer *rs = sctx->queued.named.rasterizer; 1368b8e80941Smrg unsigned db_shader_control, db_render_control, db_count_control; 1369b8e80941Smrg unsigned initial_cdw = sctx->gfx_cs->current.cdw; 1370b8e80941Smrg 1371b8e80941Smrg /* DB_RENDER_CONTROL */ 1372b8e80941Smrg if (sctx->dbcb_depth_copy_enabled || 1373b8e80941Smrg sctx->dbcb_stencil_copy_enabled) { 1374b8e80941Smrg db_render_control = 1375b8e80941Smrg S_028000_DEPTH_COPY(sctx->dbcb_depth_copy_enabled) | 1376b8e80941Smrg S_028000_STENCIL_COPY(sctx->dbcb_stencil_copy_enabled) | 1377b8e80941Smrg S_028000_COPY_CENTROID(1) | 1378b8e80941Smrg S_028000_COPY_SAMPLE(sctx->dbcb_copy_sample); 1379b8e80941Smrg } else if (sctx->db_flush_depth_inplace || sctx->db_flush_stencil_inplace) { 1380b8e80941Smrg db_render_control = 1381b8e80941Smrg S_028000_DEPTH_COMPRESS_DISABLE(sctx->db_flush_depth_inplace) | 1382b8e80941Smrg S_028000_STENCIL_COMPRESS_DISABLE(sctx->db_flush_stencil_inplace); 1383b8e80941Smrg } else { 1384b8e80941Smrg db_render_control = 1385b8e80941Smrg S_028000_DEPTH_CLEAR_ENABLE(sctx->db_depth_clear) | 1386b8e80941Smrg S_028000_STENCIL_CLEAR_ENABLE(sctx->db_stencil_clear); 1387b8e80941Smrg } 1388b8e80941Smrg 1389b8e80941Smrg /* DB_COUNT_CONTROL (occlusion queries) */ 1390b8e80941Smrg if (sctx->num_occlusion_queries > 0 && 1391b8e80941Smrg !sctx->occlusion_queries_disabled) { 1392b8e80941Smrg bool perfect = sctx->num_perfect_occlusion_queries > 0; 1393b8e80941Smrg 1394b8e80941Smrg if (sctx->chip_class >= CIK) { 1395b8e80941Smrg unsigned log_sample_rate = sctx->framebuffer.log_samples; 1396b8e80941Smrg 1397b8e80941Smrg /* Stoney doesn't increment occlusion query counters 1398b8e80941Smrg * if the sample rate is 16x. Use 8x sample rate instead. 1399b8e80941Smrg */ 1400b8e80941Smrg if (sctx->family == CHIP_STONEY) 1401b8e80941Smrg log_sample_rate = MIN2(log_sample_rate, 3); 1402b8e80941Smrg 1403b8e80941Smrg db_count_control = 1404b8e80941Smrg S_028004_PERFECT_ZPASS_COUNTS(perfect) | 1405b8e80941Smrg S_028004_SAMPLE_RATE(log_sample_rate) | 1406b8e80941Smrg S_028004_ZPASS_ENABLE(1) | 1407b8e80941Smrg S_028004_SLICE_EVEN_ENABLE(1) | 1408b8e80941Smrg S_028004_SLICE_ODD_ENABLE(1); 1409b8e80941Smrg } else { 1410b8e80941Smrg db_count_control = 1411b8e80941Smrg S_028004_PERFECT_ZPASS_COUNTS(perfect) | 1412b8e80941Smrg S_028004_SAMPLE_RATE(sctx->framebuffer.log_samples); 1413b8e80941Smrg } 1414848b8605Smrg } else { 1415b8e80941Smrg /* Disable occlusion queries. */ 1416b8e80941Smrg if (sctx->chip_class >= CIK) { 1417b8e80941Smrg db_count_control = 0; 1418b8e80941Smrg } else { 1419b8e80941Smrg db_count_control = S_028004_ZPASS_INCREMENT_DISABLE(1); 1420b8e80941Smrg } 1421b8e80941Smrg } 1422b8e80941Smrg 1423b8e80941Smrg radeon_opt_set_context_reg2(sctx, R_028000_DB_RENDER_CONTROL, 1424b8e80941Smrg SI_TRACKED_DB_RENDER_CONTROL, db_render_control, 1425b8e80941Smrg db_count_control); 1426b8e80941Smrg 1427b8e80941Smrg /* DB_RENDER_OVERRIDE2 */ 1428b8e80941Smrg radeon_opt_set_context_reg(sctx, R_028010_DB_RENDER_OVERRIDE2, 1429b8e80941Smrg SI_TRACKED_DB_RENDER_OVERRIDE2, 1430b8e80941Smrg S_028010_DISABLE_ZMASK_EXPCLEAR_OPTIMIZATION(sctx->db_depth_disable_expclear) | 1431b8e80941Smrg S_028010_DISABLE_SMEM_EXPCLEAR_OPTIMIZATION(sctx->db_stencil_disable_expclear) | 1432b8e80941Smrg S_028010_DECOMPRESS_Z_ON_FLUSH(sctx->framebuffer.nr_samples >= 4)); 1433b8e80941Smrg 1434b8e80941Smrg db_shader_control = sctx->ps_db_shader_control; 1435b8e80941Smrg 1436b8e80941Smrg /* Bug workaround for smoothing (overrasterization) on SI. */ 1437b8e80941Smrg if (sctx->chip_class == SI && sctx->smoothing_enabled) { 1438b8e80941Smrg db_shader_control &= C_02880C_Z_ORDER; 1439b8e80941Smrg db_shader_control |= S_02880C_Z_ORDER(V_02880C_LATE_Z); 1440848b8605Smrg } 1441848b8605Smrg 1442b8e80941Smrg /* Disable the gl_SampleMask fragment shader output if MSAA is disabled. */ 1443b8e80941Smrg if (!rs->multisample_enable) 1444b8e80941Smrg db_shader_control &= C_02880C_MASK_EXPORT_ENABLE; 1445b8e80941Smrg 1446b8e80941Smrg if (sctx->screen->has_rbplus && 1447b8e80941Smrg !sctx->screen->rbplus_allowed) 1448b8e80941Smrg db_shader_control |= S_02880C_DUAL_QUAD_DISABLE(1); 1449b8e80941Smrg 1450b8e80941Smrg radeon_opt_set_context_reg(sctx, R_02880C_DB_SHADER_CONTROL, 1451b8e80941Smrg SI_TRACKED_DB_SHADER_CONTROL, db_shader_control); 1452b8e80941Smrg 1453b8e80941Smrg if (initial_cdw != sctx->gfx_cs->current.cdw) 1454b8e80941Smrg sctx->context_roll = true; 1455848b8605Smrg} 1456848b8605Smrg 1457848b8605Smrg/* 1458848b8605Smrg * format translation 1459848b8605Smrg */ 1460848b8605Smrgstatic uint32_t si_translate_colorformat(enum pipe_format format) 1461848b8605Smrg{ 1462848b8605Smrg const struct util_format_description *desc = util_format_description(format); 1463b8e80941Smrg if (!desc) 1464b8e80941Smrg return V_028C70_COLOR_INVALID; 1465848b8605Smrg 1466848b8605Smrg#define HAS_SIZE(x,y,z,w) \ 1467848b8605Smrg (desc->channel[0].size == (x) && desc->channel[1].size == (y) && \ 1468848b8605Smrg desc->channel[2].size == (z) && desc->channel[3].size == (w)) 1469848b8605Smrg 1470848b8605Smrg if (format == PIPE_FORMAT_R11G11B10_FLOAT) /* isn't plain */ 1471848b8605Smrg return V_028C70_COLOR_10_11_11; 1472848b8605Smrg 1473848b8605Smrg if (desc->layout != UTIL_FORMAT_LAYOUT_PLAIN) 1474848b8605Smrg return V_028C70_COLOR_INVALID; 1475848b8605Smrg 1476b8e80941Smrg /* hw cannot support mixed formats (except depth/stencil, since 1477b8e80941Smrg * stencil is not written to). */ 1478b8e80941Smrg if (desc->is_mixed && desc->colorspace != UTIL_FORMAT_COLORSPACE_ZS) 1479b8e80941Smrg return V_028C70_COLOR_INVALID; 1480b8e80941Smrg 1481848b8605Smrg switch (desc->nr_channels) { 1482848b8605Smrg case 1: 1483848b8605Smrg switch (desc->channel[0].size) { 1484848b8605Smrg case 8: 1485848b8605Smrg return V_028C70_COLOR_8; 1486848b8605Smrg case 16: 1487848b8605Smrg return V_028C70_COLOR_16; 1488848b8605Smrg case 32: 1489848b8605Smrg return V_028C70_COLOR_32; 1490848b8605Smrg } 1491848b8605Smrg break; 1492848b8605Smrg case 2: 1493848b8605Smrg if (desc->channel[0].size == desc->channel[1].size) { 1494848b8605Smrg switch (desc->channel[0].size) { 1495848b8605Smrg case 8: 1496848b8605Smrg return V_028C70_COLOR_8_8; 1497848b8605Smrg case 16: 1498848b8605Smrg return V_028C70_COLOR_16_16; 1499848b8605Smrg case 32: 1500848b8605Smrg return V_028C70_COLOR_32_32; 1501848b8605Smrg } 1502848b8605Smrg } else if (HAS_SIZE(8,24,0,0)) { 1503848b8605Smrg return V_028C70_COLOR_24_8; 1504848b8605Smrg } else if (HAS_SIZE(24,8,0,0)) { 1505848b8605Smrg return V_028C70_COLOR_8_24; 1506848b8605Smrg } 1507848b8605Smrg break; 1508848b8605Smrg case 3: 1509848b8605Smrg if (HAS_SIZE(5,6,5,0)) { 1510848b8605Smrg return V_028C70_COLOR_5_6_5; 1511848b8605Smrg } else if (HAS_SIZE(32,8,24,0)) { 1512848b8605Smrg return V_028C70_COLOR_X24_8_32_FLOAT; 1513848b8605Smrg } 1514848b8605Smrg break; 1515848b8605Smrg case 4: 1516848b8605Smrg if (desc->channel[0].size == desc->channel[1].size && 1517848b8605Smrg desc->channel[0].size == desc->channel[2].size && 1518848b8605Smrg desc->channel[0].size == desc->channel[3].size) { 1519848b8605Smrg switch (desc->channel[0].size) { 1520848b8605Smrg case 4: 1521848b8605Smrg return V_028C70_COLOR_4_4_4_4; 1522848b8605Smrg case 8: 1523848b8605Smrg return V_028C70_COLOR_8_8_8_8; 1524848b8605Smrg case 16: 1525848b8605Smrg return V_028C70_COLOR_16_16_16_16; 1526848b8605Smrg case 32: 1527848b8605Smrg return V_028C70_COLOR_32_32_32_32; 1528848b8605Smrg } 1529848b8605Smrg } else if (HAS_SIZE(5,5,5,1)) { 1530848b8605Smrg return V_028C70_COLOR_1_5_5_5; 1531b8e80941Smrg } else if (HAS_SIZE(1,5,5,5)) { 1532b8e80941Smrg return V_028C70_COLOR_5_5_5_1; 1533848b8605Smrg } else if (HAS_SIZE(10,10,10,2)) { 1534848b8605Smrg return V_028C70_COLOR_2_10_10_10; 1535848b8605Smrg } 1536848b8605Smrg break; 1537848b8605Smrg } 1538848b8605Smrg return V_028C70_COLOR_INVALID; 1539848b8605Smrg} 1540848b8605Smrg 1541848b8605Smrgstatic uint32_t si_colorformat_endian_swap(uint32_t colorformat) 1542848b8605Smrg{ 1543848b8605Smrg if (SI_BIG_ENDIAN) { 1544848b8605Smrg switch(colorformat) { 1545848b8605Smrg /* 8-bit buffers. */ 1546848b8605Smrg case V_028C70_COLOR_8: 1547848b8605Smrg return V_028C70_ENDIAN_NONE; 1548848b8605Smrg 1549848b8605Smrg /* 16-bit buffers. */ 1550848b8605Smrg case V_028C70_COLOR_5_6_5: 1551848b8605Smrg case V_028C70_COLOR_1_5_5_5: 1552848b8605Smrg case V_028C70_COLOR_4_4_4_4: 1553848b8605Smrg case V_028C70_COLOR_16: 1554848b8605Smrg case V_028C70_COLOR_8_8: 1555848b8605Smrg return V_028C70_ENDIAN_8IN16; 1556848b8605Smrg 1557848b8605Smrg /* 32-bit buffers. */ 1558848b8605Smrg case V_028C70_COLOR_8_8_8_8: 1559848b8605Smrg case V_028C70_COLOR_2_10_10_10: 1560848b8605Smrg case V_028C70_COLOR_8_24: 1561848b8605Smrg case V_028C70_COLOR_24_8: 1562848b8605Smrg case V_028C70_COLOR_16_16: 1563848b8605Smrg return V_028C70_ENDIAN_8IN32; 1564848b8605Smrg 1565848b8605Smrg /* 64-bit buffers. */ 1566848b8605Smrg case V_028C70_COLOR_16_16_16_16: 1567848b8605Smrg return V_028C70_ENDIAN_8IN16; 1568848b8605Smrg 1569848b8605Smrg case V_028C70_COLOR_32_32: 1570848b8605Smrg return V_028C70_ENDIAN_8IN32; 1571848b8605Smrg 1572848b8605Smrg /* 128-bit buffers. */ 1573848b8605Smrg case V_028C70_COLOR_32_32_32_32: 1574848b8605Smrg return V_028C70_ENDIAN_8IN32; 1575848b8605Smrg default: 1576848b8605Smrg return V_028C70_ENDIAN_NONE; /* Unsupported. */ 1577848b8605Smrg } 1578848b8605Smrg } else { 1579848b8605Smrg return V_028C70_ENDIAN_NONE; 1580848b8605Smrg } 1581848b8605Smrg} 1582848b8605Smrg 1583848b8605Smrgstatic uint32_t si_translate_dbformat(enum pipe_format format) 1584848b8605Smrg{ 1585848b8605Smrg switch (format) { 1586848b8605Smrg case PIPE_FORMAT_Z16_UNORM: 1587848b8605Smrg return V_028040_Z_16; 1588848b8605Smrg case PIPE_FORMAT_S8_UINT_Z24_UNORM: 1589848b8605Smrg case PIPE_FORMAT_X8Z24_UNORM: 1590848b8605Smrg case PIPE_FORMAT_Z24X8_UNORM: 1591848b8605Smrg case PIPE_FORMAT_Z24_UNORM_S8_UINT: 1592848b8605Smrg return V_028040_Z_24; /* deprecated on SI */ 1593848b8605Smrg case PIPE_FORMAT_Z32_FLOAT: 1594848b8605Smrg case PIPE_FORMAT_Z32_FLOAT_S8X24_UINT: 1595848b8605Smrg return V_028040_Z_32_FLOAT; 1596848b8605Smrg default: 1597848b8605Smrg return V_028040_Z_INVALID; 1598848b8605Smrg } 1599848b8605Smrg} 1600848b8605Smrg 1601848b8605Smrg/* 1602848b8605Smrg * Texture translation 1603848b8605Smrg */ 1604848b8605Smrg 1605848b8605Smrgstatic uint32_t si_translate_texformat(struct pipe_screen *screen, 1606848b8605Smrg enum pipe_format format, 1607848b8605Smrg const struct util_format_description *desc, 1608848b8605Smrg int first_non_void) 1609848b8605Smrg{ 1610848b8605Smrg struct si_screen *sscreen = (struct si_screen*)screen; 1611b8e80941Smrg bool uniform = true; 1612848b8605Smrg int i; 1613848b8605Smrg 1614848b8605Smrg /* Colorspace (return non-RGB formats directly). */ 1615848b8605Smrg switch (desc->colorspace) { 1616848b8605Smrg /* Depth stencil formats */ 1617848b8605Smrg case UTIL_FORMAT_COLORSPACE_ZS: 1618848b8605Smrg switch (format) { 1619848b8605Smrg case PIPE_FORMAT_Z16_UNORM: 1620848b8605Smrg return V_008F14_IMG_DATA_FORMAT_16; 1621848b8605Smrg case PIPE_FORMAT_X24S8_UINT: 1622b8e80941Smrg case PIPE_FORMAT_S8X24_UINT: 1623b8e80941Smrg /* 1624b8e80941Smrg * Implemented as an 8_8_8_8 data format to fix texture 1625b8e80941Smrg * gathers in stencil sampling. This affects at least 1626b8e80941Smrg * GL45-CTS.texture_cube_map_array.sampling on VI. 1627b8e80941Smrg */ 1628b8e80941Smrg if (sscreen->info.chip_class <= VI) 1629b8e80941Smrg return V_008F14_IMG_DATA_FORMAT_8_8_8_8; 1630b8e80941Smrg 1631b8e80941Smrg if (format == PIPE_FORMAT_X24S8_UINT) 1632b8e80941Smrg return V_008F14_IMG_DATA_FORMAT_8_24; 1633b8e80941Smrg else 1634b8e80941Smrg return V_008F14_IMG_DATA_FORMAT_24_8; 1635848b8605Smrg case PIPE_FORMAT_Z24X8_UNORM: 1636848b8605Smrg case PIPE_FORMAT_Z24_UNORM_S8_UINT: 1637848b8605Smrg return V_008F14_IMG_DATA_FORMAT_8_24; 1638848b8605Smrg case PIPE_FORMAT_X8Z24_UNORM: 1639848b8605Smrg case PIPE_FORMAT_S8_UINT_Z24_UNORM: 1640848b8605Smrg return V_008F14_IMG_DATA_FORMAT_24_8; 1641848b8605Smrg case PIPE_FORMAT_S8_UINT: 1642848b8605Smrg return V_008F14_IMG_DATA_FORMAT_8; 1643848b8605Smrg case PIPE_FORMAT_Z32_FLOAT: 1644848b8605Smrg return V_008F14_IMG_DATA_FORMAT_32; 1645848b8605Smrg case PIPE_FORMAT_X32_S8X24_UINT: 1646848b8605Smrg case PIPE_FORMAT_Z32_FLOAT_S8X24_UINT: 1647848b8605Smrg return V_008F14_IMG_DATA_FORMAT_X24_8_32; 1648848b8605Smrg default: 1649848b8605Smrg goto out_unknown; 1650848b8605Smrg } 1651848b8605Smrg 1652848b8605Smrg case UTIL_FORMAT_COLORSPACE_YUV: 1653848b8605Smrg goto out_unknown; /* TODO */ 1654848b8605Smrg 1655848b8605Smrg case UTIL_FORMAT_COLORSPACE_SRGB: 1656848b8605Smrg if (desc->nr_channels != 4 && desc->nr_channels != 1) 1657848b8605Smrg goto out_unknown; 1658848b8605Smrg break; 1659848b8605Smrg 1660848b8605Smrg default: 1661848b8605Smrg break; 1662848b8605Smrg } 1663848b8605Smrg 1664848b8605Smrg if (desc->layout == UTIL_FORMAT_LAYOUT_RGTC) { 1665b8e80941Smrg if (!sscreen->info.has_format_bc1_through_bc7) 1666848b8605Smrg goto out_unknown; 1667848b8605Smrg 1668848b8605Smrg switch (format) { 1669848b8605Smrg case PIPE_FORMAT_RGTC1_SNORM: 1670848b8605Smrg case PIPE_FORMAT_LATC1_SNORM: 1671848b8605Smrg case PIPE_FORMAT_RGTC1_UNORM: 1672848b8605Smrg case PIPE_FORMAT_LATC1_UNORM: 1673848b8605Smrg return V_008F14_IMG_DATA_FORMAT_BC4; 1674848b8605Smrg case PIPE_FORMAT_RGTC2_SNORM: 1675848b8605Smrg case PIPE_FORMAT_LATC2_SNORM: 1676848b8605Smrg case PIPE_FORMAT_RGTC2_UNORM: 1677848b8605Smrg case PIPE_FORMAT_LATC2_UNORM: 1678848b8605Smrg return V_008F14_IMG_DATA_FORMAT_BC5; 1679848b8605Smrg default: 1680848b8605Smrg goto out_unknown; 1681848b8605Smrg } 1682848b8605Smrg } 1683848b8605Smrg 1684b8e80941Smrg if (desc->layout == UTIL_FORMAT_LAYOUT_ETC && 1685b8e80941Smrg (sscreen->info.family == CHIP_STONEY || 1686b8e80941Smrg sscreen->info.family == CHIP_VEGA10 || 1687b8e80941Smrg sscreen->info.family == CHIP_RAVEN)) { 1688b8e80941Smrg switch (format) { 1689b8e80941Smrg case PIPE_FORMAT_ETC1_RGB8: 1690b8e80941Smrg case PIPE_FORMAT_ETC2_RGB8: 1691b8e80941Smrg case PIPE_FORMAT_ETC2_SRGB8: 1692b8e80941Smrg return V_008F14_IMG_DATA_FORMAT_ETC2_RGB; 1693b8e80941Smrg case PIPE_FORMAT_ETC2_RGB8A1: 1694b8e80941Smrg case PIPE_FORMAT_ETC2_SRGB8A1: 1695b8e80941Smrg return V_008F14_IMG_DATA_FORMAT_ETC2_RGBA1; 1696b8e80941Smrg case PIPE_FORMAT_ETC2_RGBA8: 1697b8e80941Smrg case PIPE_FORMAT_ETC2_SRGBA8: 1698b8e80941Smrg return V_008F14_IMG_DATA_FORMAT_ETC2_RGBA; 1699b8e80941Smrg case PIPE_FORMAT_ETC2_R11_UNORM: 1700b8e80941Smrg case PIPE_FORMAT_ETC2_R11_SNORM: 1701b8e80941Smrg return V_008F14_IMG_DATA_FORMAT_ETC2_R; 1702b8e80941Smrg case PIPE_FORMAT_ETC2_RG11_UNORM: 1703b8e80941Smrg case PIPE_FORMAT_ETC2_RG11_SNORM: 1704b8e80941Smrg return V_008F14_IMG_DATA_FORMAT_ETC2_RG; 1705b8e80941Smrg default: 1706b8e80941Smrg goto out_unknown; 1707b8e80941Smrg } 1708b8e80941Smrg } 1709b8e80941Smrg 1710848b8605Smrg if (desc->layout == UTIL_FORMAT_LAYOUT_BPTC) { 1711b8e80941Smrg if (!sscreen->info.has_format_bc1_through_bc7) 1712848b8605Smrg goto out_unknown; 1713848b8605Smrg 1714848b8605Smrg switch (format) { 1715848b8605Smrg case PIPE_FORMAT_BPTC_RGBA_UNORM: 1716848b8605Smrg case PIPE_FORMAT_BPTC_SRGBA: 1717848b8605Smrg return V_008F14_IMG_DATA_FORMAT_BC7; 1718848b8605Smrg case PIPE_FORMAT_BPTC_RGB_FLOAT: 1719848b8605Smrg case PIPE_FORMAT_BPTC_RGB_UFLOAT: 1720848b8605Smrg return V_008F14_IMG_DATA_FORMAT_BC6; 1721848b8605Smrg default: 1722848b8605Smrg goto out_unknown; 1723848b8605Smrg } 1724848b8605Smrg } 1725848b8605Smrg 1726848b8605Smrg if (desc->layout == UTIL_FORMAT_LAYOUT_SUBSAMPLED) { 1727848b8605Smrg switch (format) { 1728848b8605Smrg case PIPE_FORMAT_R8G8_B8G8_UNORM: 1729848b8605Smrg case PIPE_FORMAT_G8R8_B8R8_UNORM: 1730848b8605Smrg return V_008F14_IMG_DATA_FORMAT_GB_GR; 1731848b8605Smrg case PIPE_FORMAT_G8R8_G8B8_UNORM: 1732848b8605Smrg case PIPE_FORMAT_R8G8_R8B8_UNORM: 1733848b8605Smrg return V_008F14_IMG_DATA_FORMAT_BG_RG; 1734848b8605Smrg default: 1735848b8605Smrg goto out_unknown; 1736848b8605Smrg } 1737848b8605Smrg } 1738848b8605Smrg 1739848b8605Smrg if (desc->layout == UTIL_FORMAT_LAYOUT_S3TC) { 1740b8e80941Smrg if (!sscreen->info.has_format_bc1_through_bc7) 1741848b8605Smrg goto out_unknown; 1742848b8605Smrg 1743848b8605Smrg switch (format) { 1744848b8605Smrg case PIPE_FORMAT_DXT1_RGB: 1745848b8605Smrg case PIPE_FORMAT_DXT1_RGBA: 1746848b8605Smrg case PIPE_FORMAT_DXT1_SRGB: 1747848b8605Smrg case PIPE_FORMAT_DXT1_SRGBA: 1748848b8605Smrg return V_008F14_IMG_DATA_FORMAT_BC1; 1749848b8605Smrg case PIPE_FORMAT_DXT3_RGBA: 1750848b8605Smrg case PIPE_FORMAT_DXT3_SRGBA: 1751848b8605Smrg return V_008F14_IMG_DATA_FORMAT_BC2; 1752848b8605Smrg case PIPE_FORMAT_DXT5_RGBA: 1753848b8605Smrg case PIPE_FORMAT_DXT5_SRGBA: 1754848b8605Smrg return V_008F14_IMG_DATA_FORMAT_BC3; 1755848b8605Smrg default: 1756848b8605Smrg goto out_unknown; 1757848b8605Smrg } 1758848b8605Smrg } 1759848b8605Smrg 1760848b8605Smrg if (format == PIPE_FORMAT_R9G9B9E5_FLOAT) { 1761848b8605Smrg return V_008F14_IMG_DATA_FORMAT_5_9_9_9; 1762848b8605Smrg } else if (format == PIPE_FORMAT_R11G11B10_FLOAT) { 1763848b8605Smrg return V_008F14_IMG_DATA_FORMAT_10_11_11; 1764848b8605Smrg } 1765848b8605Smrg 1766848b8605Smrg /* R8G8Bx_SNORM - TODO CxV8U8 */ 1767848b8605Smrg 1768b8e80941Smrg /* hw cannot support mixed formats (except depth/stencil, since only 1769b8e80941Smrg * depth is read).*/ 1770b8e80941Smrg if (desc->is_mixed && desc->colorspace != UTIL_FORMAT_COLORSPACE_ZS) 1771b8e80941Smrg goto out_unknown; 1772b8e80941Smrg 1773848b8605Smrg /* See whether the components are of the same size. */ 1774848b8605Smrg for (i = 1; i < desc->nr_channels; i++) { 1775848b8605Smrg uniform = uniform && desc->channel[0].size == desc->channel[i].size; 1776848b8605Smrg } 1777848b8605Smrg 1778848b8605Smrg /* Non-uniform formats. */ 1779848b8605Smrg if (!uniform) { 1780848b8605Smrg switch(desc->nr_channels) { 1781848b8605Smrg case 3: 1782848b8605Smrg if (desc->channel[0].size == 5 && 1783848b8605Smrg desc->channel[1].size == 6 && 1784848b8605Smrg desc->channel[2].size == 5) { 1785848b8605Smrg return V_008F14_IMG_DATA_FORMAT_5_6_5; 1786848b8605Smrg } 1787848b8605Smrg goto out_unknown; 1788848b8605Smrg case 4: 1789848b8605Smrg if (desc->channel[0].size == 5 && 1790848b8605Smrg desc->channel[1].size == 5 && 1791848b8605Smrg desc->channel[2].size == 5 && 1792848b8605Smrg desc->channel[3].size == 1) { 1793848b8605Smrg return V_008F14_IMG_DATA_FORMAT_1_5_5_5; 1794848b8605Smrg } 1795b8e80941Smrg if (desc->channel[0].size == 1 && 1796b8e80941Smrg desc->channel[1].size == 5 && 1797b8e80941Smrg desc->channel[2].size == 5 && 1798b8e80941Smrg desc->channel[3].size == 5) { 1799b8e80941Smrg return V_008F14_IMG_DATA_FORMAT_5_5_5_1; 1800b8e80941Smrg } 1801848b8605Smrg if (desc->channel[0].size == 10 && 1802848b8605Smrg desc->channel[1].size == 10 && 1803848b8605Smrg desc->channel[2].size == 10 && 1804848b8605Smrg desc->channel[3].size == 2) { 1805848b8605Smrg return V_008F14_IMG_DATA_FORMAT_2_10_10_10; 1806848b8605Smrg } 1807848b8605Smrg goto out_unknown; 1808848b8605Smrg } 1809848b8605Smrg goto out_unknown; 1810848b8605Smrg } 1811848b8605Smrg 1812848b8605Smrg if (first_non_void < 0 || first_non_void > 3) 1813848b8605Smrg goto out_unknown; 1814848b8605Smrg 1815848b8605Smrg /* uniform formats */ 1816848b8605Smrg switch (desc->channel[first_non_void].size) { 1817848b8605Smrg case 4: 1818848b8605Smrg switch (desc->nr_channels) { 1819848b8605Smrg#if 0 /* Not supported for render targets */ 1820848b8605Smrg case 2: 1821848b8605Smrg return V_008F14_IMG_DATA_FORMAT_4_4; 1822848b8605Smrg#endif 1823848b8605Smrg case 4: 1824848b8605Smrg return V_008F14_IMG_DATA_FORMAT_4_4_4_4; 1825848b8605Smrg } 1826848b8605Smrg break; 1827848b8605Smrg case 8: 1828848b8605Smrg switch (desc->nr_channels) { 1829848b8605Smrg case 1: 1830848b8605Smrg return V_008F14_IMG_DATA_FORMAT_8; 1831848b8605Smrg case 2: 1832848b8605Smrg return V_008F14_IMG_DATA_FORMAT_8_8; 1833848b8605Smrg case 4: 1834848b8605Smrg return V_008F14_IMG_DATA_FORMAT_8_8_8_8; 1835848b8605Smrg } 1836848b8605Smrg break; 1837848b8605Smrg case 16: 1838848b8605Smrg switch (desc->nr_channels) { 1839848b8605Smrg case 1: 1840848b8605Smrg return V_008F14_IMG_DATA_FORMAT_16; 1841848b8605Smrg case 2: 1842848b8605Smrg return V_008F14_IMG_DATA_FORMAT_16_16; 1843848b8605Smrg case 4: 1844848b8605Smrg return V_008F14_IMG_DATA_FORMAT_16_16_16_16; 1845848b8605Smrg } 1846848b8605Smrg break; 1847848b8605Smrg case 32: 1848848b8605Smrg switch (desc->nr_channels) { 1849848b8605Smrg case 1: 1850848b8605Smrg return V_008F14_IMG_DATA_FORMAT_32; 1851848b8605Smrg case 2: 1852848b8605Smrg return V_008F14_IMG_DATA_FORMAT_32_32; 1853848b8605Smrg#if 0 /* Not supported for render targets */ 1854848b8605Smrg case 3: 1855848b8605Smrg return V_008F14_IMG_DATA_FORMAT_32_32_32; 1856848b8605Smrg#endif 1857848b8605Smrg case 4: 1858848b8605Smrg return V_008F14_IMG_DATA_FORMAT_32_32_32_32; 1859848b8605Smrg } 1860848b8605Smrg } 1861848b8605Smrg 1862848b8605Smrgout_unknown: 1863848b8605Smrg return ~0; 1864848b8605Smrg} 1865848b8605Smrg 1866848b8605Smrgstatic unsigned si_tex_wrap(unsigned wrap) 1867848b8605Smrg{ 1868848b8605Smrg switch (wrap) { 1869848b8605Smrg default: 1870848b8605Smrg case PIPE_TEX_WRAP_REPEAT: 1871848b8605Smrg return V_008F30_SQ_TEX_WRAP; 1872848b8605Smrg case PIPE_TEX_WRAP_CLAMP: 1873848b8605Smrg return V_008F30_SQ_TEX_CLAMP_HALF_BORDER; 1874848b8605Smrg case PIPE_TEX_WRAP_CLAMP_TO_EDGE: 1875848b8605Smrg return V_008F30_SQ_TEX_CLAMP_LAST_TEXEL; 1876848b8605Smrg case PIPE_TEX_WRAP_CLAMP_TO_BORDER: 1877848b8605Smrg return V_008F30_SQ_TEX_CLAMP_BORDER; 1878848b8605Smrg case PIPE_TEX_WRAP_MIRROR_REPEAT: 1879848b8605Smrg return V_008F30_SQ_TEX_MIRROR; 1880848b8605Smrg case PIPE_TEX_WRAP_MIRROR_CLAMP: 1881848b8605Smrg return V_008F30_SQ_TEX_MIRROR_ONCE_HALF_BORDER; 1882848b8605Smrg case PIPE_TEX_WRAP_MIRROR_CLAMP_TO_EDGE: 1883848b8605Smrg return V_008F30_SQ_TEX_MIRROR_ONCE_LAST_TEXEL; 1884848b8605Smrg case PIPE_TEX_WRAP_MIRROR_CLAMP_TO_BORDER: 1885848b8605Smrg return V_008F30_SQ_TEX_MIRROR_ONCE_BORDER; 1886848b8605Smrg } 1887848b8605Smrg} 1888848b8605Smrg 1889848b8605Smrgstatic unsigned si_tex_mipfilter(unsigned filter) 1890848b8605Smrg{ 1891848b8605Smrg switch (filter) { 1892848b8605Smrg case PIPE_TEX_MIPFILTER_NEAREST: 1893848b8605Smrg return V_008F38_SQ_TEX_Z_FILTER_POINT; 1894848b8605Smrg case PIPE_TEX_MIPFILTER_LINEAR: 1895848b8605Smrg return V_008F38_SQ_TEX_Z_FILTER_LINEAR; 1896848b8605Smrg default: 1897848b8605Smrg case PIPE_TEX_MIPFILTER_NONE: 1898848b8605Smrg return V_008F38_SQ_TEX_Z_FILTER_NONE; 1899848b8605Smrg } 1900848b8605Smrg} 1901848b8605Smrg 1902848b8605Smrgstatic unsigned si_tex_compare(unsigned compare) 1903848b8605Smrg{ 1904848b8605Smrg switch (compare) { 1905848b8605Smrg default: 1906848b8605Smrg case PIPE_FUNC_NEVER: 1907848b8605Smrg return V_008F30_SQ_TEX_DEPTH_COMPARE_NEVER; 1908848b8605Smrg case PIPE_FUNC_LESS: 1909848b8605Smrg return V_008F30_SQ_TEX_DEPTH_COMPARE_LESS; 1910848b8605Smrg case PIPE_FUNC_EQUAL: 1911848b8605Smrg return V_008F30_SQ_TEX_DEPTH_COMPARE_EQUAL; 1912848b8605Smrg case PIPE_FUNC_LEQUAL: 1913848b8605Smrg return V_008F30_SQ_TEX_DEPTH_COMPARE_LESSEQUAL; 1914848b8605Smrg case PIPE_FUNC_GREATER: 1915848b8605Smrg return V_008F30_SQ_TEX_DEPTH_COMPARE_GREATER; 1916848b8605Smrg case PIPE_FUNC_NOTEQUAL: 1917848b8605Smrg return V_008F30_SQ_TEX_DEPTH_COMPARE_NOTEQUAL; 1918848b8605Smrg case PIPE_FUNC_GEQUAL: 1919848b8605Smrg return V_008F30_SQ_TEX_DEPTH_COMPARE_GREATEREQUAL; 1920848b8605Smrg case PIPE_FUNC_ALWAYS: 1921848b8605Smrg return V_008F30_SQ_TEX_DEPTH_COMPARE_ALWAYS; 1922848b8605Smrg } 1923848b8605Smrg} 1924848b8605Smrg 1925b8e80941Smrgstatic unsigned si_tex_dim(struct si_screen *sscreen, struct si_texture *tex, 1926b8e80941Smrg unsigned view_target, unsigned nr_samples) 1927848b8605Smrg{ 1928b8e80941Smrg unsigned res_target = tex->buffer.b.b.target; 1929b8e80941Smrg 1930b8e80941Smrg if (view_target == PIPE_TEXTURE_CUBE || 1931b8e80941Smrg view_target == PIPE_TEXTURE_CUBE_ARRAY) 1932b8e80941Smrg res_target = view_target; 1933b8e80941Smrg /* If interpreting cubemaps as something else, set 2D_ARRAY. */ 1934b8e80941Smrg else if (res_target == PIPE_TEXTURE_CUBE || 1935b8e80941Smrg res_target == PIPE_TEXTURE_CUBE_ARRAY) 1936b8e80941Smrg res_target = PIPE_TEXTURE_2D_ARRAY; 1937b8e80941Smrg 1938b8e80941Smrg /* GFX9 allocates 1D textures as 2D. */ 1939b8e80941Smrg if ((res_target == PIPE_TEXTURE_1D || 1940b8e80941Smrg res_target == PIPE_TEXTURE_1D_ARRAY) && 1941b8e80941Smrg sscreen->info.chip_class >= GFX9 && 1942b8e80941Smrg tex->surface.u.gfx9.resource_type == RADEON_RESOURCE_2D) { 1943b8e80941Smrg if (res_target == PIPE_TEXTURE_1D) 1944b8e80941Smrg res_target = PIPE_TEXTURE_2D; 1945b8e80941Smrg else 1946b8e80941Smrg res_target = PIPE_TEXTURE_2D_ARRAY; 1947b8e80941Smrg } 1948b8e80941Smrg 1949b8e80941Smrg switch (res_target) { 1950848b8605Smrg default: 1951848b8605Smrg case PIPE_TEXTURE_1D: 1952848b8605Smrg return V_008F1C_SQ_RSRC_IMG_1D; 1953848b8605Smrg case PIPE_TEXTURE_1D_ARRAY: 1954848b8605Smrg return V_008F1C_SQ_RSRC_IMG_1D_ARRAY; 1955848b8605Smrg case PIPE_TEXTURE_2D: 1956848b8605Smrg case PIPE_TEXTURE_RECT: 1957848b8605Smrg return nr_samples > 1 ? V_008F1C_SQ_RSRC_IMG_2D_MSAA : 1958848b8605Smrg V_008F1C_SQ_RSRC_IMG_2D; 1959848b8605Smrg case PIPE_TEXTURE_2D_ARRAY: 1960848b8605Smrg return nr_samples > 1 ? V_008F1C_SQ_RSRC_IMG_2D_MSAA_ARRAY : 1961848b8605Smrg V_008F1C_SQ_RSRC_IMG_2D_ARRAY; 1962848b8605Smrg case PIPE_TEXTURE_3D: 1963848b8605Smrg return V_008F1C_SQ_RSRC_IMG_3D; 1964848b8605Smrg case PIPE_TEXTURE_CUBE: 1965848b8605Smrg case PIPE_TEXTURE_CUBE_ARRAY: 1966848b8605Smrg return V_008F1C_SQ_RSRC_IMG_CUBE; 1967848b8605Smrg } 1968848b8605Smrg} 1969848b8605Smrg 1970848b8605Smrg/* 1971848b8605Smrg * Format support testing 1972848b8605Smrg */ 1973848b8605Smrg 1974848b8605Smrgstatic bool si_is_sampler_format_supported(struct pipe_screen *screen, enum pipe_format format) 1975848b8605Smrg{ 1976b8e80941Smrg const struct util_format_description *desc = util_format_description(format); 1977b8e80941Smrg if (!desc) 1978b8e80941Smrg return false; 1979b8e80941Smrg 1980b8e80941Smrg return si_translate_texformat(screen, format, desc, 1981848b8605Smrg util_format_get_first_non_void_channel(format)) != ~0U; 1982848b8605Smrg} 1983848b8605Smrg 1984848b8605Smrgstatic uint32_t si_translate_buffer_dataformat(struct pipe_screen *screen, 1985848b8605Smrg const struct util_format_description *desc, 1986848b8605Smrg int first_non_void) 1987848b8605Smrg{ 1988848b8605Smrg int i; 1989848b8605Smrg 1990848b8605Smrg if (desc->format == PIPE_FORMAT_R11G11B10_FLOAT) 1991848b8605Smrg return V_008F0C_BUF_DATA_FORMAT_10_11_11; 1992848b8605Smrg 1993b8e80941Smrg assert(first_non_void >= 0); 1994b8e80941Smrg 1995848b8605Smrg if (desc->nr_channels == 4 && 1996848b8605Smrg desc->channel[0].size == 10 && 1997848b8605Smrg desc->channel[1].size == 10 && 1998848b8605Smrg desc->channel[2].size == 10 && 1999848b8605Smrg desc->channel[3].size == 2) 2000848b8605Smrg return V_008F0C_BUF_DATA_FORMAT_2_10_10_10; 2001848b8605Smrg 2002848b8605Smrg /* See whether the components are of the same size. */ 2003848b8605Smrg for (i = 0; i < desc->nr_channels; i++) { 2004848b8605Smrg if (desc->channel[first_non_void].size != desc->channel[i].size) 2005848b8605Smrg return V_008F0C_BUF_DATA_FORMAT_INVALID; 2006848b8605Smrg } 2007848b8605Smrg 2008848b8605Smrg switch (desc->channel[first_non_void].size) { 2009848b8605Smrg case 8: 2010848b8605Smrg switch (desc->nr_channels) { 2011848b8605Smrg case 1: 2012b8e80941Smrg case 3: /* 3 loads */ 2013848b8605Smrg return V_008F0C_BUF_DATA_FORMAT_8; 2014848b8605Smrg case 2: 2015848b8605Smrg return V_008F0C_BUF_DATA_FORMAT_8_8; 2016848b8605Smrg case 4: 2017848b8605Smrg return V_008F0C_BUF_DATA_FORMAT_8_8_8_8; 2018848b8605Smrg } 2019848b8605Smrg break; 2020848b8605Smrg case 16: 2021848b8605Smrg switch (desc->nr_channels) { 2022848b8605Smrg case 1: 2023b8e80941Smrg case 3: /* 3 loads */ 2024848b8605Smrg return V_008F0C_BUF_DATA_FORMAT_16; 2025848b8605Smrg case 2: 2026848b8605Smrg return V_008F0C_BUF_DATA_FORMAT_16_16; 2027848b8605Smrg case 4: 2028848b8605Smrg return V_008F0C_BUF_DATA_FORMAT_16_16_16_16; 2029848b8605Smrg } 2030848b8605Smrg break; 2031848b8605Smrg case 32: 2032848b8605Smrg switch (desc->nr_channels) { 2033848b8605Smrg case 1: 2034848b8605Smrg return V_008F0C_BUF_DATA_FORMAT_32; 2035848b8605Smrg case 2: 2036848b8605Smrg return V_008F0C_BUF_DATA_FORMAT_32_32; 2037848b8605Smrg case 3: 2038848b8605Smrg return V_008F0C_BUF_DATA_FORMAT_32_32_32; 2039848b8605Smrg case 4: 2040848b8605Smrg return V_008F0C_BUF_DATA_FORMAT_32_32_32_32; 2041848b8605Smrg } 2042848b8605Smrg break; 2043b8e80941Smrg case 64: 2044b8e80941Smrg /* Legacy double formats. */ 2045b8e80941Smrg switch (desc->nr_channels) { 2046b8e80941Smrg case 1: /* 1 load */ 2047b8e80941Smrg return V_008F0C_BUF_DATA_FORMAT_32_32; 2048b8e80941Smrg case 2: /* 1 load */ 2049b8e80941Smrg return V_008F0C_BUF_DATA_FORMAT_32_32_32_32; 2050b8e80941Smrg case 3: /* 3 loads */ 2051b8e80941Smrg return V_008F0C_BUF_DATA_FORMAT_32_32; 2052b8e80941Smrg case 4: /* 2 loads */ 2053b8e80941Smrg return V_008F0C_BUF_DATA_FORMAT_32_32_32_32; 2054b8e80941Smrg } 2055b8e80941Smrg break; 2056848b8605Smrg } 2057848b8605Smrg 2058848b8605Smrg return V_008F0C_BUF_DATA_FORMAT_INVALID; 2059848b8605Smrg} 2060848b8605Smrg 2061848b8605Smrgstatic uint32_t si_translate_buffer_numformat(struct pipe_screen *screen, 2062848b8605Smrg const struct util_format_description *desc, 2063848b8605Smrg int first_non_void) 2064848b8605Smrg{ 2065848b8605Smrg if (desc->format == PIPE_FORMAT_R11G11B10_FLOAT) 2066848b8605Smrg return V_008F0C_BUF_NUM_FORMAT_FLOAT; 2067848b8605Smrg 2068b8e80941Smrg assert(first_non_void >= 0); 2069b8e80941Smrg 2070848b8605Smrg switch (desc->channel[first_non_void].type) { 2071848b8605Smrg case UTIL_FORMAT_TYPE_SIGNED: 2072b8e80941Smrg case UTIL_FORMAT_TYPE_FIXED: 2073b8e80941Smrg if (desc->channel[first_non_void].size >= 32 || 2074b8e80941Smrg desc->channel[first_non_void].pure_integer) 2075848b8605Smrg return V_008F0C_BUF_NUM_FORMAT_SINT; 2076b8e80941Smrg else if (desc->channel[first_non_void].normalized) 2077b8e80941Smrg return V_008F0C_BUF_NUM_FORMAT_SNORM; 2078848b8605Smrg else 2079848b8605Smrg return V_008F0C_BUF_NUM_FORMAT_SSCALED; 2080848b8605Smrg break; 2081848b8605Smrg case UTIL_FORMAT_TYPE_UNSIGNED: 2082b8e80941Smrg if (desc->channel[first_non_void].size >= 32 || 2083b8e80941Smrg desc->channel[first_non_void].pure_integer) 2084848b8605Smrg return V_008F0C_BUF_NUM_FORMAT_UINT; 2085b8e80941Smrg else if (desc->channel[first_non_void].normalized) 2086b8e80941Smrg return V_008F0C_BUF_NUM_FORMAT_UNORM; 2087848b8605Smrg else 2088848b8605Smrg return V_008F0C_BUF_NUM_FORMAT_USCALED; 2089848b8605Smrg break; 2090848b8605Smrg case UTIL_FORMAT_TYPE_FLOAT: 2091848b8605Smrg default: 2092848b8605Smrg return V_008F0C_BUF_NUM_FORMAT_FLOAT; 2093848b8605Smrg } 2094848b8605Smrg} 2095848b8605Smrg 2096b8e80941Smrgstatic unsigned si_is_vertex_format_supported(struct pipe_screen *screen, 2097b8e80941Smrg enum pipe_format format, 2098b8e80941Smrg unsigned usage) 2099848b8605Smrg{ 2100848b8605Smrg const struct util_format_description *desc; 2101848b8605Smrg int first_non_void; 2102848b8605Smrg unsigned data_format; 2103848b8605Smrg 2104b8e80941Smrg assert((usage & ~(PIPE_BIND_SHADER_IMAGE | 2105b8e80941Smrg PIPE_BIND_SAMPLER_VIEW | 2106b8e80941Smrg PIPE_BIND_VERTEX_BUFFER)) == 0); 2107b8e80941Smrg 2108848b8605Smrg desc = util_format_description(format); 2109b8e80941Smrg if (!desc) 2110b8e80941Smrg return 0; 2111b8e80941Smrg 2112b8e80941Smrg /* There are no native 8_8_8 or 16_16_16 data formats, and we currently 2113b8e80941Smrg * select 8_8_8_8 and 16_16_16_16 instead. This works reasonably well 2114b8e80941Smrg * for read-only access (with caveats surrounding bounds checks), but 2115b8e80941Smrg * obviously fails for write access which we have to implement for 2116b8e80941Smrg * shader images. Luckily, OpenGL doesn't expect this to be supported 2117b8e80941Smrg * anyway, and so the only impact is on PBO uploads / downloads, which 2118b8e80941Smrg * shouldn't be expected to be fast for GL_RGB anyway. 2119b8e80941Smrg */ 2120b8e80941Smrg if (desc->block.bits == 3 * 8 || 2121b8e80941Smrg desc->block.bits == 3 * 16) { 2122b8e80941Smrg if (usage & (PIPE_BIND_SHADER_IMAGE | PIPE_BIND_SAMPLER_VIEW)) { 2123b8e80941Smrg usage &= ~(PIPE_BIND_SHADER_IMAGE | PIPE_BIND_SAMPLER_VIEW); 2124b8e80941Smrg if (!usage) 2125b8e80941Smrg return 0; 2126b8e80941Smrg } 2127b8e80941Smrg } 2128b8e80941Smrg 2129848b8605Smrg first_non_void = util_format_get_first_non_void_channel(format); 2130848b8605Smrg data_format = si_translate_buffer_dataformat(screen, desc, first_non_void); 2131b8e80941Smrg if (data_format == V_008F0C_BUF_DATA_FORMAT_INVALID) 2132b8e80941Smrg return 0; 2133b8e80941Smrg 2134b8e80941Smrg return usage; 2135848b8605Smrg} 2136848b8605Smrg 2137848b8605Smrgstatic bool si_is_colorbuffer_format_supported(enum pipe_format format) 2138848b8605Smrg{ 2139848b8605Smrg return si_translate_colorformat(format) != V_028C70_COLOR_INVALID && 2140b8e80941Smrg si_translate_colorswap(format, false) != ~0U; 2141848b8605Smrg} 2142848b8605Smrg 2143848b8605Smrgstatic bool si_is_zs_format_supported(enum pipe_format format) 2144848b8605Smrg{ 2145848b8605Smrg return si_translate_dbformat(format) != V_028040_Z_INVALID; 2146848b8605Smrg} 2147848b8605Smrg 2148b8e80941Smrgstatic boolean si_is_format_supported(struct pipe_screen *screen, 2149b8e80941Smrg enum pipe_format format, 2150b8e80941Smrg enum pipe_texture_target target, 2151b8e80941Smrg unsigned sample_count, 2152b8e80941Smrg unsigned storage_sample_count, 2153b8e80941Smrg unsigned usage) 2154848b8605Smrg{ 2155848b8605Smrg struct si_screen *sscreen = (struct si_screen *)screen; 2156848b8605Smrg unsigned retval = 0; 2157848b8605Smrg 2158848b8605Smrg if (target >= PIPE_MAX_TEXTURE_TYPES) { 2159b8e80941Smrg PRINT_ERR("radeonsi: unsupported texture type %d\n", target); 2160b8e80941Smrg return false; 2161848b8605Smrg } 2162848b8605Smrg 2163b8e80941Smrg if (MAX2(1, sample_count) < MAX2(1, storage_sample_count)) 2164b8e80941Smrg return false; 2165848b8605Smrg 2166848b8605Smrg if (sample_count > 1) { 2167b8e80941Smrg if (!screen->get_param(screen, PIPE_CAP_TEXTURE_MULTISAMPLE)) 2168b8e80941Smrg return false; 2169b8e80941Smrg 2170b8e80941Smrg if (usage & PIPE_BIND_SHADER_IMAGE) 2171b8e80941Smrg return false; 2172b8e80941Smrg 2173b8e80941Smrg /* Only power-of-two sample counts are supported. */ 2174b8e80941Smrg if (!util_is_power_of_two_or_zero(sample_count) || 2175b8e80941Smrg !util_is_power_of_two_or_zero(storage_sample_count)) 2176b8e80941Smrg return false; 2177b8e80941Smrg 2178b8e80941Smrg /* MSAA support without framebuffer attachments. */ 2179b8e80941Smrg if (format == PIPE_FORMAT_NONE && sample_count <= 16) 2180b8e80941Smrg return true; 2181b8e80941Smrg 2182b8e80941Smrg if (!sscreen->info.has_eqaa_surface_allocator || 2183b8e80941Smrg util_format_is_depth_or_stencil(format)) { 2184b8e80941Smrg /* Color without EQAA or depth/stencil. */ 2185b8e80941Smrg if (sample_count > 8 || 2186b8e80941Smrg sample_count != storage_sample_count) 2187b8e80941Smrg return false; 2188b8e80941Smrg } else { 2189b8e80941Smrg /* Color with EQAA. */ 2190b8e80941Smrg if (sample_count > 16 || 2191b8e80941Smrg storage_sample_count > 8) 2192b8e80941Smrg return false; 2193848b8605Smrg } 2194848b8605Smrg } 2195848b8605Smrg 2196b8e80941Smrg if (usage & (PIPE_BIND_SAMPLER_VIEW | 2197b8e80941Smrg PIPE_BIND_SHADER_IMAGE)) { 2198848b8605Smrg if (target == PIPE_BUFFER) { 2199b8e80941Smrg retval |= si_is_vertex_format_supported( 2200b8e80941Smrg screen, format, usage & (PIPE_BIND_SAMPLER_VIEW | 2201b8e80941Smrg PIPE_BIND_SHADER_IMAGE)); 2202848b8605Smrg } else { 2203848b8605Smrg if (si_is_sampler_format_supported(screen, format)) 2204b8e80941Smrg retval |= usage & (PIPE_BIND_SAMPLER_VIEW | 2205b8e80941Smrg PIPE_BIND_SHADER_IMAGE); 2206848b8605Smrg } 2207848b8605Smrg } 2208848b8605Smrg 2209848b8605Smrg if ((usage & (PIPE_BIND_RENDER_TARGET | 2210848b8605Smrg PIPE_BIND_DISPLAY_TARGET | 2211848b8605Smrg PIPE_BIND_SCANOUT | 2212b8e80941Smrg PIPE_BIND_SHARED | 2213b8e80941Smrg PIPE_BIND_BLENDABLE)) && 2214848b8605Smrg si_is_colorbuffer_format_supported(format)) { 2215848b8605Smrg retval |= usage & 2216848b8605Smrg (PIPE_BIND_RENDER_TARGET | 2217848b8605Smrg PIPE_BIND_DISPLAY_TARGET | 2218848b8605Smrg PIPE_BIND_SCANOUT | 2219848b8605Smrg PIPE_BIND_SHARED); 2220b8e80941Smrg if (!util_format_is_pure_integer(format) && 2221b8e80941Smrg !util_format_is_depth_or_stencil(format)) 2222b8e80941Smrg retval |= usage & PIPE_BIND_BLENDABLE; 2223848b8605Smrg } 2224848b8605Smrg 2225848b8605Smrg if ((usage & PIPE_BIND_DEPTH_STENCIL) && 2226848b8605Smrg si_is_zs_format_supported(format)) { 2227848b8605Smrg retval |= PIPE_BIND_DEPTH_STENCIL; 2228848b8605Smrg } 2229848b8605Smrg 2230b8e80941Smrg if (usage & PIPE_BIND_VERTEX_BUFFER) { 2231b8e80941Smrg retval |= si_is_vertex_format_supported(screen, format, 2232b8e80941Smrg PIPE_BIND_VERTEX_BUFFER); 2233848b8605Smrg } 2234848b8605Smrg 2235b8e80941Smrg if ((usage & PIPE_BIND_LINEAR) && 2236b8e80941Smrg !util_format_is_compressed(format) && 2237b8e80941Smrg !(usage & PIPE_BIND_DEPTH_STENCIL)) 2238b8e80941Smrg retval |= PIPE_BIND_LINEAR; 2239848b8605Smrg 2240848b8605Smrg return retval == usage; 2241848b8605Smrg} 2242848b8605Smrg 2243b8e80941Smrg/* 2244b8e80941Smrg * framebuffer handling 2245b8e80941Smrg */ 2246b8e80941Smrg 2247b8e80941Smrgstatic void si_choose_spi_color_formats(struct si_surface *surf, 2248b8e80941Smrg unsigned format, unsigned swap, 2249b8e80941Smrg unsigned ntype, bool is_depth) 2250848b8605Smrg{ 2251b8e80941Smrg /* Alpha is needed for alpha-to-coverage. 2252b8e80941Smrg * Blending may be with or without alpha. 2253b8e80941Smrg */ 2254b8e80941Smrg unsigned normal = 0; /* most optimal, may not support blending or export alpha */ 2255b8e80941Smrg unsigned alpha = 0; /* exports alpha, but may not support blending */ 2256b8e80941Smrg unsigned blend = 0; /* supports blending, but may not export alpha */ 2257b8e80941Smrg unsigned blend_alpha = 0; /* least optimal, supports blending and exports alpha */ 2258b8e80941Smrg 2259b8e80941Smrg /* Choose the SPI color formats. These are required values for RB+. 2260b8e80941Smrg * Other chips have multiple choices, though they are not necessarily better. 2261b8e80941Smrg */ 2262b8e80941Smrg switch (format) { 2263b8e80941Smrg case V_028C70_COLOR_5_6_5: 2264b8e80941Smrg case V_028C70_COLOR_1_5_5_5: 2265b8e80941Smrg case V_028C70_COLOR_5_5_5_1: 2266b8e80941Smrg case V_028C70_COLOR_4_4_4_4: 2267b8e80941Smrg case V_028C70_COLOR_10_11_11: 2268b8e80941Smrg case V_028C70_COLOR_11_11_10: 2269b8e80941Smrg case V_028C70_COLOR_8: 2270b8e80941Smrg case V_028C70_COLOR_8_8: 2271b8e80941Smrg case V_028C70_COLOR_8_8_8_8: 2272b8e80941Smrg case V_028C70_COLOR_10_10_10_2: 2273b8e80941Smrg case V_028C70_COLOR_2_10_10_10: 2274b8e80941Smrg if (ntype == V_028C70_NUMBER_UINT) 2275b8e80941Smrg alpha = blend = blend_alpha = normal = V_028714_SPI_SHADER_UINT16_ABGR; 2276b8e80941Smrg else if (ntype == V_028C70_NUMBER_SINT) 2277b8e80941Smrg alpha = blend = blend_alpha = normal = V_028714_SPI_SHADER_SINT16_ABGR; 2278b8e80941Smrg else 2279b8e80941Smrg alpha = blend = blend_alpha = normal = V_028714_SPI_SHADER_FP16_ABGR; 2280b8e80941Smrg break; 2281848b8605Smrg 2282b8e80941Smrg case V_028C70_COLOR_16: 2283b8e80941Smrg case V_028C70_COLOR_16_16: 2284b8e80941Smrg case V_028C70_COLOR_16_16_16_16: 2285b8e80941Smrg if (ntype == V_028C70_NUMBER_UNORM || 2286b8e80941Smrg ntype == V_028C70_NUMBER_SNORM) { 2287b8e80941Smrg /* UNORM16 and SNORM16 don't support blending */ 2288b8e80941Smrg if (ntype == V_028C70_NUMBER_UNORM) 2289b8e80941Smrg normal = alpha = V_028714_SPI_SHADER_UNORM16_ABGR; 2290b8e80941Smrg else 2291b8e80941Smrg normal = alpha = V_028714_SPI_SHADER_SNORM16_ABGR; 2292b8e80941Smrg 2293b8e80941Smrg /* Use 32 bits per channel for blending. */ 2294b8e80941Smrg if (format == V_028C70_COLOR_16) { 2295b8e80941Smrg if (swap == V_028C70_SWAP_STD) { /* R */ 2296b8e80941Smrg blend = V_028714_SPI_SHADER_32_R; 2297b8e80941Smrg blend_alpha = V_028714_SPI_SHADER_32_AR; 2298b8e80941Smrg } else if (swap == V_028C70_SWAP_ALT_REV) /* A */ 2299b8e80941Smrg blend = blend_alpha = V_028714_SPI_SHADER_32_AR; 2300b8e80941Smrg else 2301b8e80941Smrg assert(0); 2302b8e80941Smrg } else if (format == V_028C70_COLOR_16_16) { 2303b8e80941Smrg if (swap == V_028C70_SWAP_STD) { /* RG */ 2304b8e80941Smrg blend = V_028714_SPI_SHADER_32_GR; 2305b8e80941Smrg blend_alpha = V_028714_SPI_SHADER_32_ABGR; 2306b8e80941Smrg } else if (swap == V_028C70_SWAP_ALT) /* RA */ 2307b8e80941Smrg blend = blend_alpha = V_028714_SPI_SHADER_32_AR; 2308b8e80941Smrg else 2309b8e80941Smrg assert(0); 2310b8e80941Smrg } else /* 16_16_16_16 */ 2311b8e80941Smrg blend = blend_alpha = V_028714_SPI_SHADER_32_ABGR; 2312b8e80941Smrg } else if (ntype == V_028C70_NUMBER_UINT) 2313b8e80941Smrg alpha = blend = blend_alpha = normal = V_028714_SPI_SHADER_UINT16_ABGR; 2314b8e80941Smrg else if (ntype == V_028C70_NUMBER_SINT) 2315b8e80941Smrg alpha = blend = blend_alpha = normal = V_028714_SPI_SHADER_SINT16_ABGR; 2316b8e80941Smrg else if (ntype == V_028C70_NUMBER_FLOAT) 2317b8e80941Smrg alpha = blend = blend_alpha = normal = V_028714_SPI_SHADER_FP16_ABGR; 2318b8e80941Smrg else 2319b8e80941Smrg assert(0); 2320b8e80941Smrg break; 2321b8e80941Smrg 2322b8e80941Smrg case V_028C70_COLOR_32: 2323b8e80941Smrg if (swap == V_028C70_SWAP_STD) { /* R */ 2324b8e80941Smrg blend = normal = V_028714_SPI_SHADER_32_R; 2325b8e80941Smrg alpha = blend_alpha = V_028714_SPI_SHADER_32_AR; 2326b8e80941Smrg } else if (swap == V_028C70_SWAP_ALT_REV) /* A */ 2327b8e80941Smrg alpha = blend = blend_alpha = normal = V_028714_SPI_SHADER_32_AR; 2328b8e80941Smrg else 2329b8e80941Smrg assert(0); 2330b8e80941Smrg break; 2331b8e80941Smrg 2332b8e80941Smrg case V_028C70_COLOR_32_32: 2333b8e80941Smrg if (swap == V_028C70_SWAP_STD) { /* RG */ 2334b8e80941Smrg blend = normal = V_028714_SPI_SHADER_32_GR; 2335b8e80941Smrg alpha = blend_alpha = V_028714_SPI_SHADER_32_ABGR; 2336b8e80941Smrg } else if (swap == V_028C70_SWAP_ALT) /* RA */ 2337b8e80941Smrg alpha = blend = blend_alpha = normal = V_028714_SPI_SHADER_32_AR; 2338b8e80941Smrg else 2339b8e80941Smrg assert(0); 2340b8e80941Smrg break; 2341b8e80941Smrg 2342b8e80941Smrg case V_028C70_COLOR_32_32_32_32: 2343b8e80941Smrg case V_028C70_COLOR_8_24: 2344b8e80941Smrg case V_028C70_COLOR_24_8: 2345b8e80941Smrg case V_028C70_COLOR_X24_8_32_FLOAT: 2346b8e80941Smrg alpha = blend = blend_alpha = normal = V_028714_SPI_SHADER_32_ABGR; 2347b8e80941Smrg break; 2348b8e80941Smrg 2349b8e80941Smrg default: 2350b8e80941Smrg assert(0); 2351b8e80941Smrg return; 2352848b8605Smrg } 2353848b8605Smrg 2354b8e80941Smrg /* The DB->CB copy needs 32_ABGR. */ 2355b8e80941Smrg if (is_depth) 2356b8e80941Smrg alpha = blend = blend_alpha = normal = V_028714_SPI_SHADER_32_ABGR; 2357b8e80941Smrg 2358b8e80941Smrg surf->spi_shader_col_format = normal; 2359b8e80941Smrg surf->spi_shader_col_format_alpha = alpha; 2360b8e80941Smrg surf->spi_shader_col_format_blend = blend; 2361b8e80941Smrg surf->spi_shader_col_format_blend_alpha = blend_alpha; 2362b8e80941Smrg} 2363848b8605Smrg 2364848b8605Smrgstatic void si_initialize_color_surface(struct si_context *sctx, 2365b8e80941Smrg struct si_surface *surf) 2366848b8605Smrg{ 2367b8e80941Smrg struct si_texture *tex = (struct si_texture*)surf->base.texture; 2368b8e80941Smrg unsigned color_info, color_attrib; 2369848b8605Smrg unsigned format, swap, ntype, endian; 2370848b8605Smrg const struct util_format_description *desc; 2371b8e80941Smrg int firstchan; 2372848b8605Smrg unsigned blend_clamp = 0, blend_bypass = 0; 2373848b8605Smrg 2374848b8605Smrg desc = util_format_description(surf->base.format); 2375b8e80941Smrg for (firstchan = 0; firstchan < 4; firstchan++) { 2376b8e80941Smrg if (desc->channel[firstchan].type != UTIL_FORMAT_TYPE_VOID) { 2377848b8605Smrg break; 2378848b8605Smrg } 2379848b8605Smrg } 2380b8e80941Smrg if (firstchan == 4 || desc->channel[firstchan].type == UTIL_FORMAT_TYPE_FLOAT) { 2381848b8605Smrg ntype = V_028C70_NUMBER_FLOAT; 2382848b8605Smrg } else { 2383848b8605Smrg ntype = V_028C70_NUMBER_UNORM; 2384848b8605Smrg if (desc->colorspace == UTIL_FORMAT_COLORSPACE_SRGB) 2385848b8605Smrg ntype = V_028C70_NUMBER_SRGB; 2386b8e80941Smrg else if (desc->channel[firstchan].type == UTIL_FORMAT_TYPE_SIGNED) { 2387b8e80941Smrg if (desc->channel[firstchan].pure_integer) { 2388848b8605Smrg ntype = V_028C70_NUMBER_SINT; 2389848b8605Smrg } else { 2390b8e80941Smrg assert(desc->channel[firstchan].normalized); 2391848b8605Smrg ntype = V_028C70_NUMBER_SNORM; 2392848b8605Smrg } 2393b8e80941Smrg } else if (desc->channel[firstchan].type == UTIL_FORMAT_TYPE_UNSIGNED) { 2394b8e80941Smrg if (desc->channel[firstchan].pure_integer) { 2395848b8605Smrg ntype = V_028C70_NUMBER_UINT; 2396848b8605Smrg } else { 2397b8e80941Smrg assert(desc->channel[firstchan].normalized); 2398848b8605Smrg ntype = V_028C70_NUMBER_UNORM; 2399848b8605Smrg } 2400848b8605Smrg } 2401848b8605Smrg } 2402848b8605Smrg 2403848b8605Smrg format = si_translate_colorformat(surf->base.format); 2404848b8605Smrg if (format == V_028C70_COLOR_INVALID) { 2405b8e80941Smrg PRINT_ERR("Invalid CB format: %d, disabling CB.\n", surf->base.format); 2406848b8605Smrg } 2407848b8605Smrg assert(format != V_028C70_COLOR_INVALID); 2408b8e80941Smrg swap = si_translate_colorswap(surf->base.format, false); 2409b8e80941Smrg endian = si_colorformat_endian_swap(format); 2410848b8605Smrg 2411848b8605Smrg /* blend clamp should be set for all NORM/SRGB types */ 2412848b8605Smrg if (ntype == V_028C70_NUMBER_UNORM || 2413848b8605Smrg ntype == V_028C70_NUMBER_SNORM || 2414848b8605Smrg ntype == V_028C70_NUMBER_SRGB) 2415848b8605Smrg blend_clamp = 1; 2416848b8605Smrg 2417848b8605Smrg /* set blend bypass according to docs if SINT/UINT or 2418848b8605Smrg 8/24 COLOR variants */ 2419848b8605Smrg if (ntype == V_028C70_NUMBER_UINT || ntype == V_028C70_NUMBER_SINT || 2420848b8605Smrg format == V_028C70_COLOR_8_24 || format == V_028C70_COLOR_24_8 || 2421848b8605Smrg format == V_028C70_COLOR_X24_8_32_FLOAT) { 2422848b8605Smrg blend_clamp = 0; 2423848b8605Smrg blend_bypass = 1; 2424848b8605Smrg } 2425848b8605Smrg 2426b8e80941Smrg if (ntype == V_028C70_NUMBER_UINT || ntype == V_028C70_NUMBER_SINT) { 2427b8e80941Smrg if (format == V_028C70_COLOR_8 || 2428b8e80941Smrg format == V_028C70_COLOR_8_8 || 2429b8e80941Smrg format == V_028C70_COLOR_8_8_8_8) 2430b8e80941Smrg surf->color_is_int8 = true; 2431b8e80941Smrg else if (format == V_028C70_COLOR_10_10_10_2 || 2432b8e80941Smrg format == V_028C70_COLOR_2_10_10_10) 2433b8e80941Smrg surf->color_is_int10 = true; 2434b8e80941Smrg } 2435b8e80941Smrg 2436848b8605Smrg color_info = S_028C70_FORMAT(format) | 2437848b8605Smrg S_028C70_COMP_SWAP(swap) | 2438848b8605Smrg S_028C70_BLEND_CLAMP(blend_clamp) | 2439848b8605Smrg S_028C70_BLEND_BYPASS(blend_bypass) | 2440b8e80941Smrg S_028C70_SIMPLE_FLOAT(1) | 2441b8e80941Smrg S_028C70_ROUND_MODE(ntype != V_028C70_NUMBER_UNORM && 2442b8e80941Smrg ntype != V_028C70_NUMBER_SNORM && 2443b8e80941Smrg ntype != V_028C70_NUMBER_SRGB && 2444b8e80941Smrg format != V_028C70_COLOR_8_24 && 2445b8e80941Smrg format != V_028C70_COLOR_24_8) | 2446848b8605Smrg S_028C70_NUMBER_TYPE(ntype) | 2447848b8605Smrg S_028C70_ENDIAN(endian); 2448848b8605Smrg 2449b8e80941Smrg /* Intensity is implemented as Red, so treat it that way. */ 2450b8e80941Smrg color_attrib = S_028C74_FORCE_DST_ALPHA_1(desc->swizzle[3] == PIPE_SWIZZLE_1 || 2451b8e80941Smrg util_format_is_intensity(surf->base.format)); 2452848b8605Smrg 2453b8e80941Smrg if (tex->buffer.b.b.nr_samples > 1) { 2454b8e80941Smrg unsigned log_samples = util_logbase2(tex->buffer.b.b.nr_samples); 2455b8e80941Smrg unsigned log_fragments = util_logbase2(tex->buffer.b.b.nr_storage_samples); 2456848b8605Smrg 2457848b8605Smrg color_attrib |= S_028C74_NUM_SAMPLES(log_samples) | 2458b8e80941Smrg S_028C74_NUM_FRAGMENTS(log_fragments); 2459848b8605Smrg 2460b8e80941Smrg if (tex->surface.fmask_size) { 2461848b8605Smrg color_info |= S_028C70_COMPRESSION(1); 2462b8e80941Smrg unsigned fmask_bankh = util_logbase2(tex->surface.u.legacy.fmask.bankh); 2463848b8605Smrg 2464b8e80941Smrg if (sctx->chip_class == SI) { 2465848b8605Smrg /* due to a hw bug, FMASK_BANK_HEIGHT must be set on SI too */ 2466848b8605Smrg color_attrib |= S_028C74_FMASK_BANK_HEIGHT(fmask_bankh); 2467848b8605Smrg } 2468848b8605Smrg } 2469848b8605Smrg } 2470848b8605Smrg 2471b8e80941Smrg if (sctx->chip_class >= VI) { 2472b8e80941Smrg unsigned max_uncompressed_block_size = V_028C78_MAX_BLOCK_SIZE_256B; 2473b8e80941Smrg unsigned min_compressed_block_size = V_028C78_MIN_BLOCK_SIZE_32B; 2474b8e80941Smrg 2475b8e80941Smrg /* amdvlk: [min-compressed-block-size] should be set to 32 for dGPU and 2476b8e80941Smrg 64 for APU because all of our APUs to date use DIMMs which have 2477b8e80941Smrg a request granularity size of 64B while all other chips have a 2478b8e80941Smrg 32B request size */ 2479b8e80941Smrg if (!sctx->screen->info.has_dedicated_vram) 2480b8e80941Smrg min_compressed_block_size = V_028C78_MIN_BLOCK_SIZE_64B; 2481b8e80941Smrg 2482b8e80941Smrg if (tex->buffer.b.b.nr_storage_samples > 1) { 2483b8e80941Smrg if (tex->surface.bpe == 1) 2484b8e80941Smrg max_uncompressed_block_size = V_028C78_MAX_BLOCK_SIZE_64B; 2485b8e80941Smrg else if (tex->surface.bpe == 2) 2486b8e80941Smrg max_uncompressed_block_size = V_028C78_MAX_BLOCK_SIZE_128B; 2487b8e80941Smrg } 2488848b8605Smrg 2489b8e80941Smrg surf->cb_dcc_control = S_028C78_MAX_UNCOMPRESSED_BLOCK_SIZE(max_uncompressed_block_size) | 2490b8e80941Smrg S_028C78_MIN_COMPRESSED_BLOCK_SIZE(min_compressed_block_size) | 2491b8e80941Smrg S_028C78_INDEPENDENT_64B_BLOCKS(1); 2492b8e80941Smrg } 2493848b8605Smrg 2494b8e80941Smrg /* This must be set for fast clear to work without FMASK. */ 2495b8e80941Smrg if (!tex->surface.fmask_size && sctx->chip_class == SI) { 2496b8e80941Smrg unsigned bankh = util_logbase2(tex->surface.u.legacy.bankh); 2497b8e80941Smrg color_attrib |= S_028C74_FMASK_BANK_HEIGHT(bankh); 2498b8e80941Smrg } 2499848b8605Smrg 2500b8e80941Smrg unsigned color_view = S_028C6C_SLICE_START(surf->base.u.tex.first_layer) | 2501b8e80941Smrg S_028C6C_SLICE_MAX(surf->base.u.tex.last_layer); 2502848b8605Smrg 2503b8e80941Smrg if (sctx->chip_class >= GFX9) { 2504b8e80941Smrg unsigned mip0_depth = util_max_layer(&tex->buffer.b.b, 0); 2505b8e80941Smrg 2506b8e80941Smrg color_view |= S_028C6C_MIP_LEVEL(surf->base.u.tex.level); 2507b8e80941Smrg color_attrib |= S_028C74_MIP0_DEPTH(mip0_depth) | 2508b8e80941Smrg S_028C74_RESOURCE_TYPE(tex->surface.u.gfx9.resource_type); 2509b8e80941Smrg surf->cb_color_attrib2 = S_028C68_MIP0_WIDTH(surf->width0 - 1) | 2510b8e80941Smrg S_028C68_MIP0_HEIGHT(surf->height0 - 1) | 2511b8e80941Smrg S_028C68_MAX_MIP(tex->buffer.b.b.last_level); 2512848b8605Smrg } 2513848b8605Smrg 2514b8e80941Smrg surf->cb_color_view = color_view; 2515b8e80941Smrg surf->cb_color_info = color_info; 2516b8e80941Smrg surf->cb_color_attrib = color_attrib; 2517b8e80941Smrg 2518848b8605Smrg /* Determine pixel shader export format */ 2519b8e80941Smrg si_choose_spi_color_formats(surf, format, swap, ntype, tex->is_depth); 2520848b8605Smrg 2521848b8605Smrg surf->color_initialized = true; 2522848b8605Smrg} 2523848b8605Smrg 2524848b8605Smrgstatic void si_init_depth_surface(struct si_context *sctx, 2525b8e80941Smrg struct si_surface *surf) 2526848b8605Smrg{ 2527b8e80941Smrg struct si_texture *tex = (struct si_texture*)surf->base.texture; 2528848b8605Smrg unsigned level = surf->base.u.tex.level; 2529b8e80941Smrg unsigned format, stencil_format; 2530b8e80941Smrg uint32_t z_info, s_info; 2531848b8605Smrg 2532b8e80941Smrg format = si_translate_dbformat(tex->db_render_format); 2533b8e80941Smrg stencil_format = tex->surface.has_stencil ? 2534b8e80941Smrg V_028044_STENCIL_8 : V_028044_STENCIL_INVALID; 2535848b8605Smrg 2536b8e80941Smrg assert(format != V_028040_Z_INVALID); 2537b8e80941Smrg if (format == V_028040_Z_INVALID) 2538b8e80941Smrg PRINT_ERR("Invalid DB format: %d, disabling DB.\n", tex->buffer.b.b.format); 2539848b8605Smrg 2540b8e80941Smrg surf->db_depth_view = S_028008_SLICE_START(surf->base.u.tex.first_layer) | 2541b8e80941Smrg S_028008_SLICE_MAX(surf->base.u.tex.last_layer); 2542b8e80941Smrg surf->db_htile_data_base = 0; 2543b8e80941Smrg surf->db_htile_surface = 0; 2544b8e80941Smrg 2545b8e80941Smrg if (sctx->chip_class >= GFX9) { 2546b8e80941Smrg assert(tex->surface.u.gfx9.surf_offset == 0); 2547b8e80941Smrg surf->db_depth_base = tex->buffer.gpu_address >> 8; 2548b8e80941Smrg surf->db_stencil_base = (tex->buffer.gpu_address + 2549b8e80941Smrg tex->surface.u.gfx9.stencil_offset) >> 8; 2550b8e80941Smrg z_info = S_028038_FORMAT(format) | 2551b8e80941Smrg S_028038_NUM_SAMPLES(util_logbase2(tex->buffer.b.b.nr_samples)) | 2552b8e80941Smrg S_028038_SW_MODE(tex->surface.u.gfx9.surf.swizzle_mode) | 2553b8e80941Smrg S_028038_MAXMIP(tex->buffer.b.b.last_level); 2554b8e80941Smrg s_info = S_02803C_FORMAT(stencil_format) | 2555b8e80941Smrg S_02803C_SW_MODE(tex->surface.u.gfx9.stencil.swizzle_mode); 2556b8e80941Smrg surf->db_z_info2 = S_028068_EPITCH(tex->surface.u.gfx9.surf.epitch); 2557b8e80941Smrg surf->db_stencil_info2 = S_02806C_EPITCH(tex->surface.u.gfx9.stencil.epitch); 2558b8e80941Smrg surf->db_depth_view |= S_028008_MIPID(level); 2559b8e80941Smrg surf->db_depth_size = S_02801C_X_MAX(tex->buffer.b.b.width0 - 1) | 2560b8e80941Smrg S_02801C_Y_MAX(tex->buffer.b.b.height0 - 1); 2561b8e80941Smrg 2562b8e80941Smrg if (si_htile_enabled(tex, level)) { 2563b8e80941Smrg z_info |= S_028038_TILE_SURFACE_ENABLE(1) | 2564b8e80941Smrg S_028038_ALLOW_EXPCLEAR(1); 2565b8e80941Smrg 2566b8e80941Smrg if (tex->tc_compatible_htile) { 2567b8e80941Smrg unsigned max_zplanes = 4; 2568b8e80941Smrg 2569b8e80941Smrg if (tex->db_render_format == PIPE_FORMAT_Z16_UNORM && 2570b8e80941Smrg tex->buffer.b.b.nr_samples > 1) 2571b8e80941Smrg max_zplanes = 2; 2572b8e80941Smrg 2573b8e80941Smrg z_info |= S_028038_DECOMPRESS_ON_N_ZPLANES(max_zplanes + 1) | 2574b8e80941Smrg S_028038_ITERATE_FLUSH(1); 2575b8e80941Smrg s_info |= S_02803C_ITERATE_FLUSH(1); 2576b8e80941Smrg } 2577848b8605Smrg 2578b8e80941Smrg if (tex->surface.has_stencil) { 2579b8e80941Smrg /* Stencil buffer workaround ported from the SI-CI-VI code. 2580b8e80941Smrg * See that for explanation. 2581b8e80941Smrg */ 2582b8e80941Smrg s_info |= S_02803C_ALLOW_EXPCLEAR(tex->buffer.b.b.nr_samples <= 1); 2583b8e80941Smrg } else { 2584b8e80941Smrg /* Use all HTILE for depth if there's no stencil. */ 2585b8e80941Smrg s_info |= S_02803C_TILE_STENCIL_DISABLE(1); 2586b8e80941Smrg } 2587848b8605Smrg 2588b8e80941Smrg surf->db_htile_data_base = (tex->buffer.gpu_address + 2589b8e80941Smrg tex->htile_offset) >> 8; 2590b8e80941Smrg surf->db_htile_surface = S_028ABC_FULL_CACHE(1) | 2591b8e80941Smrg S_028ABC_PIPE_ALIGNED(tex->surface.u.gfx9.htile.pipe_aligned) | 2592b8e80941Smrg S_028ABC_RB_ALIGNED(tex->surface.u.gfx9.htile.rb_aligned); 2593b8e80941Smrg } 2594b8e80941Smrg } else { 2595b8e80941Smrg /* SI-CI-VI */ 2596b8e80941Smrg struct legacy_surf_level *levelinfo = &tex->surface.u.legacy.level[level]; 2597b8e80941Smrg 2598b8e80941Smrg assert(levelinfo->nblk_x % 8 == 0 && levelinfo->nblk_y % 8 == 0); 2599b8e80941Smrg 2600b8e80941Smrg surf->db_depth_base = (tex->buffer.gpu_address + 2601b8e80941Smrg tex->surface.u.legacy.level[level].offset) >> 8; 2602b8e80941Smrg surf->db_stencil_base = (tex->buffer.gpu_address + 2603b8e80941Smrg tex->surface.u.legacy.stencil_level[level].offset) >> 8; 2604b8e80941Smrg 2605b8e80941Smrg z_info = S_028040_FORMAT(format) | 2606b8e80941Smrg S_028040_NUM_SAMPLES(util_logbase2(tex->buffer.b.b.nr_samples)); 2607b8e80941Smrg s_info = S_028044_FORMAT(stencil_format); 2608b8e80941Smrg surf->db_depth_info = S_02803C_ADDR5_SWIZZLE_MASK(!tex->tc_compatible_htile); 2609b8e80941Smrg 2610b8e80941Smrg if (sctx->chip_class >= CIK) { 2611b8e80941Smrg struct radeon_info *info = &sctx->screen->info; 2612b8e80941Smrg unsigned index = tex->surface.u.legacy.tiling_index[level]; 2613b8e80941Smrg unsigned stencil_index = tex->surface.u.legacy.stencil_tiling_index[level]; 2614b8e80941Smrg unsigned macro_index = tex->surface.u.legacy.macro_tile_index; 2615b8e80941Smrg unsigned tile_mode = info->si_tile_mode_array[index]; 2616b8e80941Smrg unsigned stencil_tile_mode = info->si_tile_mode_array[stencil_index]; 2617b8e80941Smrg unsigned macro_mode = info->cik_macrotile_mode_array[macro_index]; 2618b8e80941Smrg 2619b8e80941Smrg surf->db_depth_info |= 2620b8e80941Smrg S_02803C_ARRAY_MODE(G_009910_ARRAY_MODE(tile_mode)) | 2621b8e80941Smrg S_02803C_PIPE_CONFIG(G_009910_PIPE_CONFIG(tile_mode)) | 2622b8e80941Smrg S_02803C_BANK_WIDTH(G_009990_BANK_WIDTH(macro_mode)) | 2623b8e80941Smrg S_02803C_BANK_HEIGHT(G_009990_BANK_HEIGHT(macro_mode)) | 2624b8e80941Smrg S_02803C_MACRO_TILE_ASPECT(G_009990_MACRO_TILE_ASPECT(macro_mode)) | 2625b8e80941Smrg S_02803C_NUM_BANKS(G_009990_NUM_BANKS(macro_mode)); 2626b8e80941Smrg z_info |= S_028040_TILE_SPLIT(G_009910_TILE_SPLIT(tile_mode)); 2627b8e80941Smrg s_info |= S_028044_TILE_SPLIT(G_009910_TILE_SPLIT(stencil_tile_mode)); 2628b8e80941Smrg } else { 2629b8e80941Smrg unsigned tile_mode_index = si_tile_mode_index(tex, level, false); 2630b8e80941Smrg z_info |= S_028040_TILE_MODE_INDEX(tile_mode_index); 2631b8e80941Smrg tile_mode_index = si_tile_mode_index(tex, level, true); 2632b8e80941Smrg s_info |= S_028044_TILE_MODE_INDEX(tile_mode_index); 2633b8e80941Smrg } 2634848b8605Smrg 2635b8e80941Smrg surf->db_depth_size = S_028058_PITCH_TILE_MAX((levelinfo->nblk_x / 8) - 1) | 2636b8e80941Smrg S_028058_HEIGHT_TILE_MAX((levelinfo->nblk_y / 8) - 1); 2637b8e80941Smrg surf->db_depth_slice = S_02805C_SLICE_TILE_MAX((levelinfo->nblk_x * 2638b8e80941Smrg levelinfo->nblk_y) / 64 - 1); 2639b8e80941Smrg 2640b8e80941Smrg if (si_htile_enabled(tex, level)) { 2641b8e80941Smrg z_info |= S_028040_TILE_SURFACE_ENABLE(1) | 2642b8e80941Smrg S_028040_ALLOW_EXPCLEAR(1); 2643b8e80941Smrg 2644b8e80941Smrg if (tex->surface.has_stencil) { 2645b8e80941Smrg /* Workaround: For a not yet understood reason, the 2646b8e80941Smrg * combination of MSAA, fast stencil clear and stencil 2647b8e80941Smrg * decompress messes with subsequent stencil buffer 2648b8e80941Smrg * uses. Problem was reproduced on Verde, Bonaire, 2649b8e80941Smrg * Tonga, and Carrizo. 2650b8e80941Smrg * 2651b8e80941Smrg * Disabling EXPCLEAR works around the problem. 2652b8e80941Smrg * 2653b8e80941Smrg * Check piglit's arb_texture_multisample-stencil-clear 2654b8e80941Smrg * test if you want to try changing this. 2655b8e80941Smrg */ 2656b8e80941Smrg if (tex->buffer.b.b.nr_samples <= 1) 2657b8e80941Smrg s_info |= S_028044_ALLOW_EXPCLEAR(1); 2658b8e80941Smrg } else if (!tex->tc_compatible_htile) { 2659b8e80941Smrg /* Use all of the htile_buffer for depth if there's no stencil. 2660b8e80941Smrg * This must not be set when TC-compatible HTILE is enabled 2661b8e80941Smrg * due to a hw bug. 2662b8e80941Smrg */ 2663b8e80941Smrg s_info |= S_028044_TILE_STENCIL_DISABLE(1); 2664b8e80941Smrg } 2665848b8605Smrg 2666b8e80941Smrg surf->db_htile_data_base = (tex->buffer.gpu_address + 2667b8e80941Smrg tex->htile_offset) >> 8; 2668b8e80941Smrg surf->db_htile_surface = S_028ABC_FULL_CACHE(1); 2669848b8605Smrg 2670b8e80941Smrg if (tex->tc_compatible_htile) { 2671b8e80941Smrg surf->db_htile_surface |= S_028ABC_TC_COMPATIBLE(1); 2672848b8605Smrg 2673b8e80941Smrg /* 0 = full compression. N = only compress up to N-1 Z planes. */ 2674b8e80941Smrg if (tex->buffer.b.b.nr_samples <= 1) 2675b8e80941Smrg z_info |= S_028040_DECOMPRESS_ON_N_ZPLANES(5); 2676b8e80941Smrg else if (tex->buffer.b.b.nr_samples <= 4) 2677b8e80941Smrg z_info |= S_028040_DECOMPRESS_ON_N_ZPLANES(3); 2678b8e80941Smrg else 2679b8e80941Smrg z_info |= S_028040_DECOMPRESS_ON_N_ZPLANES(2); 2680b8e80941Smrg } 2681848b8605Smrg } 2682848b8605Smrg } 2683848b8605Smrg 2684b8e80941Smrg surf->db_z_info = z_info; 2685b8e80941Smrg surf->db_stencil_info = s_info; 2686848b8605Smrg 2687b8e80941Smrg surf->depth_initialized = true; 2688b8e80941Smrg} 2689848b8605Smrg 2690b8e80941Smrgvoid si_update_fb_dirtiness_after_rendering(struct si_context *sctx) 2691b8e80941Smrg{ 2692b8e80941Smrg if (sctx->decompression_enabled) 2693b8e80941Smrg return; 2694848b8605Smrg 2695b8e80941Smrg if (sctx->framebuffer.state.zsbuf) { 2696b8e80941Smrg struct pipe_surface *surf = sctx->framebuffer.state.zsbuf; 2697b8e80941Smrg struct si_texture *tex = (struct si_texture *)surf->texture; 2698848b8605Smrg 2699b8e80941Smrg tex->dirty_level_mask |= 1 << surf->u.tex.level; 2700b8e80941Smrg 2701b8e80941Smrg if (tex->surface.has_stencil) 2702b8e80941Smrg tex->stencil_dirty_level_mask |= 1 << surf->u.tex.level; 2703848b8605Smrg } 2704848b8605Smrg 2705b8e80941Smrg unsigned compressed_cb_mask = sctx->framebuffer.compressed_cb_mask; 2706b8e80941Smrg while (compressed_cb_mask) { 2707b8e80941Smrg unsigned i = u_bit_scan(&compressed_cb_mask); 2708b8e80941Smrg struct pipe_surface *surf = sctx->framebuffer.state.cbufs[i]; 2709b8e80941Smrg struct si_texture *tex = (struct si_texture*)surf->texture; 2710848b8605Smrg 2711b8e80941Smrg if (tex->surface.fmask_size) 2712b8e80941Smrg tex->dirty_level_mask |= 1 << surf->u.tex.level; 2713b8e80941Smrg if (tex->dcc_gather_statistics) 2714b8e80941Smrg tex->separate_dcc_dirty = true; 2715b8e80941Smrg } 2716b8e80941Smrg} 2717b8e80941Smrg 2718b8e80941Smrgstatic void si_dec_framebuffer_counters(const struct pipe_framebuffer_state *state) 2719b8e80941Smrg{ 2720b8e80941Smrg for (int i = 0; i < state->nr_cbufs; ++i) { 2721b8e80941Smrg struct si_surface *surf = NULL; 2722b8e80941Smrg struct si_texture *tex; 2723b8e80941Smrg 2724b8e80941Smrg if (!state->cbufs[i]) 2725b8e80941Smrg continue; 2726b8e80941Smrg surf = (struct si_surface*)state->cbufs[i]; 2727b8e80941Smrg tex = (struct si_texture*)surf->base.texture; 2728b8e80941Smrg 2729b8e80941Smrg p_atomic_dec(&tex->framebuffers_bound); 2730b8e80941Smrg } 2731848b8605Smrg} 2732848b8605Smrg 2733848b8605Smrgstatic void si_set_framebuffer_state(struct pipe_context *ctx, 2734848b8605Smrg const struct pipe_framebuffer_state *state) 2735848b8605Smrg{ 2736848b8605Smrg struct si_context *sctx = (struct si_context *)ctx; 2737b8e80941Smrg struct si_surface *surf = NULL; 2738b8e80941Smrg struct si_texture *tex; 2739b8e80941Smrg bool old_any_dst_linear = sctx->framebuffer.any_dst_linear; 2740b8e80941Smrg unsigned old_nr_samples = sctx->framebuffer.nr_samples; 2741b8e80941Smrg unsigned old_colorbuf_enabled_4bit = sctx->framebuffer.colorbuf_enabled_4bit; 2742b8e80941Smrg bool old_has_zsbuf = !!sctx->framebuffer.state.zsbuf; 2743b8e80941Smrg bool old_has_stencil = 2744b8e80941Smrg old_has_zsbuf && 2745b8e80941Smrg ((struct si_texture*)sctx->framebuffer.state.zsbuf->texture)->surface.has_stencil; 2746b8e80941Smrg bool unbound = false; 2747848b8605Smrg int i; 2748848b8605Smrg 2749b8e80941Smrg /* Reject zero-sized framebuffers due to a hw bug on SI that occurs 2750b8e80941Smrg * when PA_SU_HARDWARE_SCREEN_OFFSET != 0 and any_scissor.BR_X/Y <= 0. 2751b8e80941Smrg * We could implement the full workaround here, but it's a useless case. 2752b8e80941Smrg */ 2753b8e80941Smrg if ((!state->width || !state->height) && (state->nr_cbufs || state->zsbuf)) { 2754b8e80941Smrg unreachable("the framebuffer shouldn't have zero area"); 2755b8e80941Smrg return; 2756848b8605Smrg } 2757b8e80941Smrg 2758b8e80941Smrg si_update_fb_dirtiness_after_rendering(sctx); 2759b8e80941Smrg 2760b8e80941Smrg for (i = 0; i < sctx->framebuffer.state.nr_cbufs; i++) { 2761b8e80941Smrg if (!sctx->framebuffer.state.cbufs[i]) 2762b8e80941Smrg continue; 2763b8e80941Smrg 2764b8e80941Smrg tex = (struct si_texture*)sctx->framebuffer.state.cbufs[i]->texture; 2765b8e80941Smrg if (tex->dcc_gather_statistics) 2766b8e80941Smrg vi_separate_dcc_stop_query(sctx, tex); 2767b8e80941Smrg } 2768b8e80941Smrg 2769b8e80941Smrg /* Disable DCC if the formats are incompatible. */ 2770b8e80941Smrg for (i = 0; i < state->nr_cbufs; i++) { 2771b8e80941Smrg if (!state->cbufs[i]) 2772b8e80941Smrg continue; 2773b8e80941Smrg 2774b8e80941Smrg surf = (struct si_surface*)state->cbufs[i]; 2775b8e80941Smrg tex = (struct si_texture*)surf->base.texture; 2776b8e80941Smrg 2777b8e80941Smrg if (!surf->dcc_incompatible) 2778b8e80941Smrg continue; 2779b8e80941Smrg 2780b8e80941Smrg /* Since the DCC decompression calls back into set_framebuffer- 2781b8e80941Smrg * _state, we need to unbind the framebuffer, so that 2782b8e80941Smrg * vi_separate_dcc_stop_query isn't called twice with the same 2783b8e80941Smrg * color buffer. 2784b8e80941Smrg */ 2785b8e80941Smrg if (!unbound) { 2786b8e80941Smrg util_copy_framebuffer_state(&sctx->framebuffer.state, NULL); 2787b8e80941Smrg unbound = true; 2788b8e80941Smrg } 2789b8e80941Smrg 2790b8e80941Smrg if (vi_dcc_enabled(tex, surf->base.u.tex.level)) 2791b8e80941Smrg if (!si_texture_disable_dcc(sctx, tex)) 2792b8e80941Smrg si_decompress_dcc(sctx, tex); 2793b8e80941Smrg 2794b8e80941Smrg surf->dcc_incompatible = false; 2795b8e80941Smrg } 2796b8e80941Smrg 2797b8e80941Smrg /* Only flush TC when changing the framebuffer state, because 2798b8e80941Smrg * the only client not using TC that can change textures is 2799b8e80941Smrg * the framebuffer. 2800b8e80941Smrg * 2801b8e80941Smrg * Wait for compute shaders because of possible transitions: 2802b8e80941Smrg * - FB write -> shader read 2803b8e80941Smrg * - shader write -> FB read 2804b8e80941Smrg * 2805b8e80941Smrg * DB caches are flushed on demand (using si_decompress_textures). 2806b8e80941Smrg * 2807b8e80941Smrg * When MSAA is enabled, CB and TC caches are flushed on demand 2808b8e80941Smrg * (after FMASK decompression). Shader write -> FB read transitions 2809b8e80941Smrg * cannot happen for MSAA textures, because MSAA shader images are 2810b8e80941Smrg * not supported. 2811b8e80941Smrg * 2812b8e80941Smrg * Only flush and wait for CB if there is actually a bound color buffer. 2813b8e80941Smrg */ 2814b8e80941Smrg if (sctx->framebuffer.uncompressed_cb_mask) { 2815b8e80941Smrg si_make_CB_shader_coherent(sctx, sctx->framebuffer.nr_samples, 2816b8e80941Smrg sctx->framebuffer.CB_has_shader_readable_metadata, 2817b8e80941Smrg sctx->framebuffer.all_DCC_pipe_aligned); 2818b8e80941Smrg } 2819b8e80941Smrg 2820b8e80941Smrg sctx->flags |= SI_CONTEXT_CS_PARTIAL_FLUSH; 2821b8e80941Smrg 2822b8e80941Smrg /* u_blitter doesn't invoke depth decompression when it does multiple 2823b8e80941Smrg * blits in a row, but the only case when it matters for DB is when 2824b8e80941Smrg * doing generate_mipmap. So here we flush DB manually between 2825b8e80941Smrg * individual generate_mipmap blits. 2826b8e80941Smrg * Note that lower mipmap levels aren't compressed. 2827b8e80941Smrg */ 2828b8e80941Smrg if (sctx->generate_mipmap_for_depth) { 2829b8e80941Smrg si_make_DB_shader_coherent(sctx, 1, false, 2830b8e80941Smrg sctx->framebuffer.DB_has_shader_readable_metadata); 2831b8e80941Smrg } else if (sctx->chip_class == GFX9) { 2832b8e80941Smrg /* It appears that DB metadata "leaks" in a sequence of: 2833b8e80941Smrg * - depth clear 2834b8e80941Smrg * - DCC decompress for shader image writes (with DB disabled) 2835b8e80941Smrg * - render with DEPTH_BEFORE_SHADER=1 2836b8e80941Smrg * Flushing DB metadata works around the problem. 2837b8e80941Smrg */ 2838b8e80941Smrg sctx->flags |= SI_CONTEXT_FLUSH_AND_INV_DB_META; 2839848b8605Smrg } 2840848b8605Smrg 2841b8e80941Smrg /* Take the maximum of the old and new count. If the new count is lower, 2842b8e80941Smrg * dirtying is needed to disable the unbound colorbuffers. 2843b8e80941Smrg */ 2844b8e80941Smrg sctx->framebuffer.dirty_cbufs |= 2845b8e80941Smrg (1 << MAX2(sctx->framebuffer.state.nr_cbufs, state->nr_cbufs)) - 1; 2846b8e80941Smrg sctx->framebuffer.dirty_zsbuf |= sctx->framebuffer.state.zsbuf != state->zsbuf; 2847b8e80941Smrg 2848b8e80941Smrg si_dec_framebuffer_counters(&sctx->framebuffer.state); 2849848b8605Smrg util_copy_framebuffer_state(&sctx->framebuffer.state, state); 2850848b8605Smrg 2851b8e80941Smrg sctx->framebuffer.colorbuf_enabled_4bit = 0; 2852b8e80941Smrg sctx->framebuffer.spi_shader_col_format = 0; 2853b8e80941Smrg sctx->framebuffer.spi_shader_col_format_alpha = 0; 2854b8e80941Smrg sctx->framebuffer.spi_shader_col_format_blend = 0; 2855b8e80941Smrg sctx->framebuffer.spi_shader_col_format_blend_alpha = 0; 2856b8e80941Smrg sctx->framebuffer.color_is_int8 = 0; 2857b8e80941Smrg sctx->framebuffer.color_is_int10 = 0; 2858b8e80941Smrg 2859848b8605Smrg sctx->framebuffer.compressed_cb_mask = 0; 2860b8e80941Smrg sctx->framebuffer.uncompressed_cb_mask = 0; 2861848b8605Smrg sctx->framebuffer.nr_samples = util_framebuffer_get_num_samples(state); 2862b8e80941Smrg sctx->framebuffer.nr_color_samples = sctx->framebuffer.nr_samples; 2863848b8605Smrg sctx->framebuffer.log_samples = util_logbase2(sctx->framebuffer.nr_samples); 2864b8e80941Smrg sctx->framebuffer.any_dst_linear = false; 2865b8e80941Smrg sctx->framebuffer.CB_has_shader_readable_metadata = false; 2866b8e80941Smrg sctx->framebuffer.DB_has_shader_readable_metadata = false; 2867b8e80941Smrg sctx->framebuffer.all_DCC_pipe_aligned = true; 2868b8e80941Smrg unsigned num_bpp64_colorbufs = 0; 2869848b8605Smrg 2870848b8605Smrg for (i = 0; i < state->nr_cbufs; i++) { 2871848b8605Smrg if (!state->cbufs[i]) 2872848b8605Smrg continue; 2873848b8605Smrg 2874b8e80941Smrg surf = (struct si_surface*)state->cbufs[i]; 2875b8e80941Smrg tex = (struct si_texture*)surf->base.texture; 2876848b8605Smrg 2877848b8605Smrg if (!surf->color_initialized) { 2878848b8605Smrg si_initialize_color_surface(sctx, surf); 2879848b8605Smrg } 2880848b8605Smrg 2881b8e80941Smrg sctx->framebuffer.colorbuf_enabled_4bit |= 0xf << (i * 4); 2882b8e80941Smrg sctx->framebuffer.spi_shader_col_format |= 2883b8e80941Smrg surf->spi_shader_col_format << (i * 4); 2884b8e80941Smrg sctx->framebuffer.spi_shader_col_format_alpha |= 2885b8e80941Smrg surf->spi_shader_col_format_alpha << (i * 4); 2886b8e80941Smrg sctx->framebuffer.spi_shader_col_format_blend |= 2887b8e80941Smrg surf->spi_shader_col_format_blend << (i * 4); 2888b8e80941Smrg sctx->framebuffer.spi_shader_col_format_blend_alpha |= 2889b8e80941Smrg surf->spi_shader_col_format_blend_alpha << (i * 4); 2890b8e80941Smrg 2891b8e80941Smrg if (surf->color_is_int8) 2892b8e80941Smrg sctx->framebuffer.color_is_int8 |= 1 << i; 2893b8e80941Smrg if (surf->color_is_int10) 2894b8e80941Smrg sctx->framebuffer.color_is_int10 |= 1 << i; 2895b8e80941Smrg 2896b8e80941Smrg if (tex->surface.fmask_size) 2897b8e80941Smrg sctx->framebuffer.compressed_cb_mask |= 1 << i; 2898b8e80941Smrg else 2899b8e80941Smrg sctx->framebuffer.uncompressed_cb_mask |= 1 << i; 2900b8e80941Smrg 2901b8e80941Smrg /* Don't update nr_color_samples for non-AA buffers. 2902b8e80941Smrg * (e.g. destination of MSAA resolve) 2903b8e80941Smrg */ 2904b8e80941Smrg if (tex->buffer.b.b.nr_samples >= 2 && 2905b8e80941Smrg tex->buffer.b.b.nr_storage_samples < tex->buffer.b.b.nr_samples) { 2906b8e80941Smrg sctx->framebuffer.nr_color_samples = 2907b8e80941Smrg MIN2(sctx->framebuffer.nr_color_samples, 2908b8e80941Smrg tex->buffer.b.b.nr_storage_samples); 2909b8e80941Smrg sctx->framebuffer.nr_color_samples = 2910b8e80941Smrg MAX2(1, sctx->framebuffer.nr_color_samples); 2911b8e80941Smrg } 2912b8e80941Smrg 2913b8e80941Smrg if (tex->surface.is_linear) 2914b8e80941Smrg sctx->framebuffer.any_dst_linear = true; 2915b8e80941Smrg if (tex->surface.bpe >= 8) 2916b8e80941Smrg num_bpp64_colorbufs++; 2917b8e80941Smrg 2918b8e80941Smrg if (vi_dcc_enabled(tex, surf->base.u.tex.level)) { 2919b8e80941Smrg sctx->framebuffer.CB_has_shader_readable_metadata = true; 2920b8e80941Smrg 2921b8e80941Smrg if (sctx->chip_class >= GFX9 && 2922b8e80941Smrg !tex->surface.u.gfx9.dcc.pipe_aligned) 2923b8e80941Smrg sctx->framebuffer.all_DCC_pipe_aligned = false; 2924848b8605Smrg } 2925848b8605Smrg 2926b8e80941Smrg si_context_add_resource_size(sctx, surf->base.texture); 2927b8e80941Smrg 2928b8e80941Smrg p_atomic_inc(&tex->framebuffers_bound); 2929b8e80941Smrg 2930b8e80941Smrg if (tex->dcc_gather_statistics) { 2931b8e80941Smrg /* Dirty tracking must be enabled for DCC usage analysis. */ 2932848b8605Smrg sctx->framebuffer.compressed_cb_mask |= 1 << i; 2933b8e80941Smrg vi_separate_dcc_start_query(sctx, tex); 2934848b8605Smrg } 2935848b8605Smrg } 2936848b8605Smrg 2937b8e80941Smrg /* For optimal DCC performance. */ 2938b8e80941Smrg if (sctx->chip_class == VI) 2939b8e80941Smrg sctx->framebuffer.dcc_overwrite_combiner_watermark = 4; 2940b8e80941Smrg else if (num_bpp64_colorbufs >= 5) 2941b8e80941Smrg sctx->framebuffer.dcc_overwrite_combiner_watermark = 8; 2942b8e80941Smrg else 2943b8e80941Smrg sctx->framebuffer.dcc_overwrite_combiner_watermark = 6; 2944b8e80941Smrg 2945b8e80941Smrg struct si_texture *zstex = NULL; 2946848b8605Smrg 2947848b8605Smrg if (state->zsbuf) { 2948b8e80941Smrg surf = (struct si_surface*)state->zsbuf; 2949b8e80941Smrg zstex = (struct si_texture*)surf->base.texture; 2950848b8605Smrg 2951848b8605Smrg if (!surf->depth_initialized) { 2952848b8605Smrg si_init_depth_surface(sctx, surf); 2953848b8605Smrg } 2954b8e80941Smrg 2955b8e80941Smrg if (vi_tc_compat_htile_enabled(zstex, surf->base.u.tex.level)) 2956b8e80941Smrg sctx->framebuffer.DB_has_shader_readable_metadata = true; 2957b8e80941Smrg 2958b8e80941Smrg si_context_add_resource_size(sctx, surf->base.texture); 2959848b8605Smrg } 2960848b8605Smrg 2961b8e80941Smrg si_update_ps_colorbuf0_slot(sctx); 2962b8e80941Smrg si_update_poly_offset_state(sctx); 2963b8e80941Smrg si_mark_atom_dirty(sctx, &sctx->atoms.s.cb_render_state); 2964b8e80941Smrg si_mark_atom_dirty(sctx, &sctx->atoms.s.framebuffer); 2965848b8605Smrg 2966b8e80941Smrg if (sctx->screen->dpbb_allowed) 2967b8e80941Smrg si_mark_atom_dirty(sctx, &sctx->atoms.s.dpbb_state); 2968848b8605Smrg 2969b8e80941Smrg if (sctx->framebuffer.any_dst_linear != old_any_dst_linear) 2970b8e80941Smrg si_mark_atom_dirty(sctx, &sctx->atoms.s.msaa_config); 2971b8e80941Smrg 2972b8e80941Smrg if (sctx->screen->has_out_of_order_rast && 2973b8e80941Smrg (sctx->framebuffer.colorbuf_enabled_4bit != old_colorbuf_enabled_4bit || 2974b8e80941Smrg !!sctx->framebuffer.state.zsbuf != old_has_zsbuf || 2975b8e80941Smrg (zstex && zstex->surface.has_stencil != old_has_stencil))) 2976b8e80941Smrg si_mark_atom_dirty(sctx, &sctx->atoms.s.msaa_config); 2977b8e80941Smrg 2978b8e80941Smrg if (sctx->framebuffer.nr_samples != old_nr_samples) { 2979b8e80941Smrg struct pipe_constant_buffer constbuf = {0}; 2980b8e80941Smrg 2981b8e80941Smrg si_mark_atom_dirty(sctx, &sctx->atoms.s.msaa_config); 2982b8e80941Smrg si_mark_atom_dirty(sctx, &sctx->atoms.s.db_render_state); 2983b8e80941Smrg 2984b8e80941Smrg constbuf.buffer = sctx->sample_pos_buffer; 2985b8e80941Smrg 2986b8e80941Smrg /* Set sample locations as fragment shader constants. */ 2987b8e80941Smrg switch (sctx->framebuffer.nr_samples) { 2988b8e80941Smrg case 1: 2989b8e80941Smrg constbuf.buffer_offset = 0; 2990b8e80941Smrg break; 2991b8e80941Smrg case 2: 2992b8e80941Smrg constbuf.buffer_offset = (ubyte*)sctx->sample_positions.x2 - 2993b8e80941Smrg (ubyte*)sctx->sample_positions.x1; 2994b8e80941Smrg break; 2995b8e80941Smrg case 4: 2996b8e80941Smrg constbuf.buffer_offset = (ubyte*)sctx->sample_positions.x4 - 2997b8e80941Smrg (ubyte*)sctx->sample_positions.x1; 2998b8e80941Smrg break; 2999b8e80941Smrg case 8: 3000b8e80941Smrg constbuf.buffer_offset = (ubyte*)sctx->sample_positions.x8 - 3001b8e80941Smrg (ubyte*)sctx->sample_positions.x1; 3002b8e80941Smrg break; 3003b8e80941Smrg case 16: 3004b8e80941Smrg constbuf.buffer_offset = (ubyte*)sctx->sample_positions.x16 - 3005b8e80941Smrg (ubyte*)sctx->sample_positions.x1; 3006b8e80941Smrg break; 3007b8e80941Smrg default: 3008b8e80941Smrg PRINT_ERR("Requested an invalid number of samples %i.\n", 3009b8e80941Smrg sctx->framebuffer.nr_samples); 3010b8e80941Smrg assert(0); 3011b8e80941Smrg } 3012b8e80941Smrg constbuf.buffer_size = sctx->framebuffer.nr_samples * 2 * 4; 3013b8e80941Smrg si_set_rw_buffer(sctx, SI_PS_CONST_SAMPLE_POSITIONS, &constbuf); 3014b8e80941Smrg 3015b8e80941Smrg si_mark_atom_dirty(sctx, &sctx->atoms.s.msaa_sample_locs); 3016b8e80941Smrg } 3017b8e80941Smrg 3018b8e80941Smrg sctx->do_update_shaders = true; 3019b8e80941Smrg 3020b8e80941Smrg if (!sctx->decompression_enabled) { 3021b8e80941Smrg /* Prevent textures decompression when the framebuffer state 3022b8e80941Smrg * changes come from the decompression passes themselves. 3023b8e80941Smrg */ 3024b8e80941Smrg sctx->need_check_render_feedback = true; 3025848b8605Smrg } 3026848b8605Smrg} 3027848b8605Smrg 3028b8e80941Smrgstatic void si_emit_framebuffer_state(struct si_context *sctx) 3029848b8605Smrg{ 3030b8e80941Smrg struct radeon_cmdbuf *cs = sctx->gfx_cs; 3031848b8605Smrg struct pipe_framebuffer_state *state = &sctx->framebuffer.state; 3032848b8605Smrg unsigned i, nr_cbufs = state->nr_cbufs; 3033b8e80941Smrg struct si_texture *tex = NULL; 3034b8e80941Smrg struct si_surface *cb = NULL; 3035b8e80941Smrg unsigned cb_color_info = 0; 3036848b8605Smrg 3037848b8605Smrg /* Colorbuffers. */ 3038848b8605Smrg for (i = 0; i < nr_cbufs; i++) { 3039b8e80941Smrg uint64_t cb_color_base, cb_color_fmask, cb_color_cmask, cb_dcc_base; 3040b8e80941Smrg unsigned cb_color_attrib; 3041b8e80941Smrg 3042b8e80941Smrg if (!(sctx->framebuffer.dirty_cbufs & (1 << i))) 3043b8e80941Smrg continue; 3044b8e80941Smrg 3045b8e80941Smrg cb = (struct si_surface*)state->cbufs[i]; 3046848b8605Smrg if (!cb) { 3047b8e80941Smrg radeon_set_context_reg(cs, R_028C70_CB_COLOR0_INFO + i * 0x3C, 3048848b8605Smrg S_028C70_FORMAT(V_028C70_COLOR_INVALID)); 3049848b8605Smrg continue; 3050848b8605Smrg } 3051848b8605Smrg 3052b8e80941Smrg tex = (struct si_texture *)cb->base.texture; 3053b8e80941Smrg radeon_add_to_buffer_list(sctx, sctx->gfx_cs, 3054b8e80941Smrg &tex->buffer, RADEON_USAGE_READWRITE, 3055b8e80941Smrg tex->buffer.b.b.nr_samples > 1 ? 3056848b8605Smrg RADEON_PRIO_COLOR_BUFFER_MSAA : 3057848b8605Smrg RADEON_PRIO_COLOR_BUFFER); 3058848b8605Smrg 3059b8e80941Smrg if (tex->cmask_buffer && tex->cmask_buffer != &tex->buffer) { 3060b8e80941Smrg radeon_add_to_buffer_list(sctx, sctx->gfx_cs, 3061848b8605Smrg tex->cmask_buffer, RADEON_USAGE_READWRITE, 3062b8e80941Smrg RADEON_PRIO_SEPARATE_META); 3063b8e80941Smrg } 3064b8e80941Smrg 3065b8e80941Smrg if (tex->dcc_separate_buffer) 3066b8e80941Smrg radeon_add_to_buffer_list(sctx, sctx->gfx_cs, 3067b8e80941Smrg tex->dcc_separate_buffer, 3068b8e80941Smrg RADEON_USAGE_READWRITE, 3069b8e80941Smrg RADEON_PRIO_SEPARATE_META); 3070b8e80941Smrg 3071b8e80941Smrg /* Compute mutable surface parameters. */ 3072b8e80941Smrg cb_color_base = tex->buffer.gpu_address >> 8; 3073b8e80941Smrg cb_color_fmask = 0; 3074b8e80941Smrg cb_color_cmask = tex->cmask_base_address_reg; 3075b8e80941Smrg cb_dcc_base = 0; 3076b8e80941Smrg cb_color_info = cb->cb_color_info | tex->cb_color_info; 3077b8e80941Smrg cb_color_attrib = cb->cb_color_attrib; 3078b8e80941Smrg 3079b8e80941Smrg if (cb->base.u.tex.level > 0) 3080b8e80941Smrg cb_color_info &= C_028C70_FAST_CLEAR; 3081b8e80941Smrg 3082b8e80941Smrg if (tex->surface.fmask_size) { 3083b8e80941Smrg cb_color_fmask = (tex->buffer.gpu_address + tex->fmask_offset) >> 8; 3084b8e80941Smrg cb_color_fmask |= tex->surface.fmask_tile_swizzle; 3085b8e80941Smrg } 3086b8e80941Smrg 3087b8e80941Smrg /* Set up DCC. */ 3088b8e80941Smrg if (vi_dcc_enabled(tex, cb->base.u.tex.level)) { 3089b8e80941Smrg bool is_msaa_resolve_dst = state->cbufs[0] && 3090b8e80941Smrg state->cbufs[0]->texture->nr_samples > 1 && 3091b8e80941Smrg state->cbufs[1] == &cb->base && 3092b8e80941Smrg state->cbufs[1]->texture->nr_samples <= 1; 3093b8e80941Smrg 3094b8e80941Smrg if (!is_msaa_resolve_dst) 3095b8e80941Smrg cb_color_info |= S_028C70_DCC_ENABLE(1); 3096b8e80941Smrg 3097b8e80941Smrg cb_dcc_base = ((!tex->dcc_separate_buffer ? tex->buffer.gpu_address : 0) + 3098b8e80941Smrg tex->dcc_offset) >> 8; 3099b8e80941Smrg cb_dcc_base |= tex->surface.tile_swizzle; 3100b8e80941Smrg } 3101b8e80941Smrg 3102b8e80941Smrg if (sctx->chip_class >= GFX9) { 3103b8e80941Smrg struct gfx9_surf_meta_flags meta; 3104b8e80941Smrg 3105b8e80941Smrg if (tex->dcc_offset) 3106b8e80941Smrg meta = tex->surface.u.gfx9.dcc; 3107b8e80941Smrg else 3108b8e80941Smrg meta = tex->surface.u.gfx9.cmask; 3109b8e80941Smrg 3110b8e80941Smrg /* Set mutable surface parameters. */ 3111b8e80941Smrg cb_color_base += tex->surface.u.gfx9.surf_offset >> 8; 3112b8e80941Smrg cb_color_base |= tex->surface.tile_swizzle; 3113b8e80941Smrg if (!tex->surface.fmask_size) 3114b8e80941Smrg cb_color_fmask = cb_color_base; 3115b8e80941Smrg if (cb->base.u.tex.level > 0) 3116b8e80941Smrg cb_color_cmask = cb_color_base; 3117b8e80941Smrg cb_color_attrib |= S_028C74_COLOR_SW_MODE(tex->surface.u.gfx9.surf.swizzle_mode) | 3118b8e80941Smrg S_028C74_FMASK_SW_MODE(tex->surface.u.gfx9.fmask.swizzle_mode) | 3119b8e80941Smrg S_028C74_RB_ALIGNED(meta.rb_aligned) | 3120b8e80941Smrg S_028C74_PIPE_ALIGNED(meta.pipe_aligned); 3121b8e80941Smrg 3122b8e80941Smrg radeon_set_context_reg_seq(cs, R_028C60_CB_COLOR0_BASE + i * 0x3C, 15); 3123b8e80941Smrg radeon_emit(cs, cb_color_base); /* CB_COLOR0_BASE */ 3124b8e80941Smrg radeon_emit(cs, S_028C64_BASE_256B(cb_color_base >> 32)); /* CB_COLOR0_BASE_EXT */ 3125b8e80941Smrg radeon_emit(cs, cb->cb_color_attrib2); /* CB_COLOR0_ATTRIB2 */ 3126b8e80941Smrg radeon_emit(cs, cb->cb_color_view); /* CB_COLOR0_VIEW */ 3127b8e80941Smrg radeon_emit(cs, cb_color_info); /* CB_COLOR0_INFO */ 3128b8e80941Smrg radeon_emit(cs, cb_color_attrib); /* CB_COLOR0_ATTRIB */ 3129b8e80941Smrg radeon_emit(cs, cb->cb_dcc_control); /* CB_COLOR0_DCC_CONTROL */ 3130b8e80941Smrg radeon_emit(cs, cb_color_cmask); /* CB_COLOR0_CMASK */ 3131b8e80941Smrg radeon_emit(cs, S_028C80_BASE_256B(cb_color_cmask >> 32)); /* CB_COLOR0_CMASK_BASE_EXT */ 3132b8e80941Smrg radeon_emit(cs, cb_color_fmask); /* CB_COLOR0_FMASK */ 3133b8e80941Smrg radeon_emit(cs, S_028C88_BASE_256B(cb_color_fmask >> 32)); /* CB_COLOR0_FMASK_BASE_EXT */ 3134b8e80941Smrg radeon_emit(cs, tex->color_clear_value[0]); /* CB_COLOR0_CLEAR_WORD0 */ 3135b8e80941Smrg radeon_emit(cs, tex->color_clear_value[1]); /* CB_COLOR0_CLEAR_WORD1 */ 3136b8e80941Smrg radeon_emit(cs, cb_dcc_base); /* CB_COLOR0_DCC_BASE */ 3137b8e80941Smrg radeon_emit(cs, S_028C98_BASE_256B(cb_dcc_base >> 32)); /* CB_COLOR0_DCC_BASE_EXT */ 3138b8e80941Smrg 3139b8e80941Smrg radeon_set_context_reg(cs, R_0287A0_CB_MRT0_EPITCH + i * 4, 3140b8e80941Smrg S_0287A0_EPITCH(tex->surface.u.gfx9.surf.epitch)); 3141b8e80941Smrg } else { 3142b8e80941Smrg /* Compute mutable surface parameters (SI-CI-VI). */ 3143b8e80941Smrg const struct legacy_surf_level *level_info = 3144b8e80941Smrg &tex->surface.u.legacy.level[cb->base.u.tex.level]; 3145b8e80941Smrg unsigned pitch_tile_max, slice_tile_max, tile_mode_index; 3146b8e80941Smrg unsigned cb_color_pitch, cb_color_slice, cb_color_fmask_slice; 3147b8e80941Smrg 3148b8e80941Smrg cb_color_base += level_info->offset >> 8; 3149b8e80941Smrg /* Only macrotiled modes can set tile swizzle. */ 3150b8e80941Smrg if (level_info->mode == RADEON_SURF_MODE_2D) 3151b8e80941Smrg cb_color_base |= tex->surface.tile_swizzle; 3152b8e80941Smrg 3153b8e80941Smrg if (!tex->surface.fmask_size) 3154b8e80941Smrg cb_color_fmask = cb_color_base; 3155b8e80941Smrg if (cb->base.u.tex.level > 0) 3156b8e80941Smrg cb_color_cmask = cb_color_base; 3157b8e80941Smrg if (cb_dcc_base) 3158b8e80941Smrg cb_dcc_base += level_info->dcc_offset >> 8; 3159b8e80941Smrg 3160b8e80941Smrg pitch_tile_max = level_info->nblk_x / 8 - 1; 3161b8e80941Smrg slice_tile_max = level_info->nblk_x * 3162b8e80941Smrg level_info->nblk_y / 64 - 1; 3163b8e80941Smrg tile_mode_index = si_tile_mode_index(tex, cb->base.u.tex.level, false); 3164b8e80941Smrg 3165b8e80941Smrg cb_color_attrib |= S_028C74_TILE_MODE_INDEX(tile_mode_index); 3166b8e80941Smrg cb_color_pitch = S_028C64_TILE_MAX(pitch_tile_max); 3167b8e80941Smrg cb_color_slice = S_028C68_TILE_MAX(slice_tile_max); 3168b8e80941Smrg 3169b8e80941Smrg if (tex->surface.fmask_size) { 3170b8e80941Smrg if (sctx->chip_class >= CIK) 3171b8e80941Smrg cb_color_pitch |= S_028C64_FMASK_TILE_MAX(tex->surface.u.legacy.fmask.pitch_in_pixels / 8 - 1); 3172b8e80941Smrg cb_color_attrib |= S_028C74_FMASK_TILE_MODE_INDEX(tex->surface.u.legacy.fmask.tiling_index); 3173b8e80941Smrg cb_color_fmask_slice = S_028C88_TILE_MAX(tex->surface.u.legacy.fmask.slice_tile_max); 3174b8e80941Smrg } else { 3175b8e80941Smrg /* This must be set for fast clear to work without FMASK. */ 3176b8e80941Smrg if (sctx->chip_class >= CIK) 3177b8e80941Smrg cb_color_pitch |= S_028C64_FMASK_TILE_MAX(pitch_tile_max); 3178b8e80941Smrg cb_color_attrib |= S_028C74_FMASK_TILE_MODE_INDEX(tile_mode_index); 3179b8e80941Smrg cb_color_fmask_slice = S_028C88_TILE_MAX(slice_tile_max); 3180b8e80941Smrg } 3181b8e80941Smrg 3182b8e80941Smrg radeon_set_context_reg_seq(cs, R_028C60_CB_COLOR0_BASE + i * 0x3C, 3183b8e80941Smrg sctx->chip_class >= VI ? 14 : 13); 3184b8e80941Smrg radeon_emit(cs, cb_color_base); /* CB_COLOR0_BASE */ 3185b8e80941Smrg radeon_emit(cs, cb_color_pitch); /* CB_COLOR0_PITCH */ 3186b8e80941Smrg radeon_emit(cs, cb_color_slice); /* CB_COLOR0_SLICE */ 3187b8e80941Smrg radeon_emit(cs, cb->cb_color_view); /* CB_COLOR0_VIEW */ 3188b8e80941Smrg radeon_emit(cs, cb_color_info); /* CB_COLOR0_INFO */ 3189b8e80941Smrg radeon_emit(cs, cb_color_attrib); /* CB_COLOR0_ATTRIB */ 3190b8e80941Smrg radeon_emit(cs, cb->cb_dcc_control); /* CB_COLOR0_DCC_CONTROL */ 3191b8e80941Smrg radeon_emit(cs, cb_color_cmask); /* CB_COLOR0_CMASK */ 3192b8e80941Smrg radeon_emit(cs, tex->surface.u.legacy.cmask_slice_tile_max); /* CB_COLOR0_CMASK_SLICE */ 3193b8e80941Smrg radeon_emit(cs, cb_color_fmask); /* CB_COLOR0_FMASK */ 3194b8e80941Smrg radeon_emit(cs, cb_color_fmask_slice); /* CB_COLOR0_FMASK_SLICE */ 3195b8e80941Smrg radeon_emit(cs, tex->color_clear_value[0]); /* CB_COLOR0_CLEAR_WORD0 */ 3196b8e80941Smrg radeon_emit(cs, tex->color_clear_value[1]); /* CB_COLOR0_CLEAR_WORD1 */ 3197b8e80941Smrg 3198b8e80941Smrg if (sctx->chip_class >= VI) /* R_028C94_CB_COLOR0_DCC_BASE */ 3199b8e80941Smrg radeon_emit(cs, cb_dcc_base); 3200b8e80941Smrg } 3201848b8605Smrg } 3202b8e80941Smrg for (; i < 8 ; i++) 3203b8e80941Smrg if (sctx->framebuffer.dirty_cbufs & (1 << i)) 3204b8e80941Smrg radeon_set_context_reg(cs, R_028C70_CB_COLOR0_INFO + i * 0x3C, 0); 3205848b8605Smrg 3206848b8605Smrg /* ZS buffer. */ 3207b8e80941Smrg if (state->zsbuf && sctx->framebuffer.dirty_zsbuf) { 3208b8e80941Smrg struct si_surface *zb = (struct si_surface*)state->zsbuf; 3209b8e80941Smrg struct si_texture *tex = (struct si_texture*)zb->base.texture; 3210848b8605Smrg 3211b8e80941Smrg radeon_add_to_buffer_list(sctx, sctx->gfx_cs, 3212b8e80941Smrg &tex->buffer, RADEON_USAGE_READWRITE, 3213848b8605Smrg zb->base.texture->nr_samples > 1 ? 3214848b8605Smrg RADEON_PRIO_DEPTH_BUFFER_MSAA : 3215848b8605Smrg RADEON_PRIO_DEPTH_BUFFER); 3216848b8605Smrg 3217b8e80941Smrg if (sctx->chip_class >= GFX9) { 3218b8e80941Smrg radeon_set_context_reg_seq(cs, R_028014_DB_HTILE_DATA_BASE, 3); 3219b8e80941Smrg radeon_emit(cs, zb->db_htile_data_base); /* DB_HTILE_DATA_BASE */ 3220b8e80941Smrg radeon_emit(cs, S_028018_BASE_HI(zb->db_htile_data_base >> 32)); /* DB_HTILE_DATA_BASE_HI */ 3221b8e80941Smrg radeon_emit(cs, zb->db_depth_size); /* DB_DEPTH_SIZE */ 3222b8e80941Smrg 3223b8e80941Smrg radeon_set_context_reg_seq(cs, R_028038_DB_Z_INFO, 10); 3224b8e80941Smrg radeon_emit(cs, zb->db_z_info | /* DB_Z_INFO */ 3225b8e80941Smrg S_028038_ZRANGE_PRECISION(tex->depth_clear_value != 0)); 3226b8e80941Smrg radeon_emit(cs, zb->db_stencil_info); /* DB_STENCIL_INFO */ 3227b8e80941Smrg radeon_emit(cs, zb->db_depth_base); /* DB_Z_READ_BASE */ 3228b8e80941Smrg radeon_emit(cs, S_028044_BASE_HI(zb->db_depth_base >> 32)); /* DB_Z_READ_BASE_HI */ 3229b8e80941Smrg radeon_emit(cs, zb->db_stencil_base); /* DB_STENCIL_READ_BASE */ 3230b8e80941Smrg radeon_emit(cs, S_02804C_BASE_HI(zb->db_stencil_base >> 32)); /* DB_STENCIL_READ_BASE_HI */ 3231b8e80941Smrg radeon_emit(cs, zb->db_depth_base); /* DB_Z_WRITE_BASE */ 3232b8e80941Smrg radeon_emit(cs, S_028054_BASE_HI(zb->db_depth_base >> 32)); /* DB_Z_WRITE_BASE_HI */ 3233b8e80941Smrg radeon_emit(cs, zb->db_stencil_base); /* DB_STENCIL_WRITE_BASE */ 3234b8e80941Smrg radeon_emit(cs, S_02805C_BASE_HI(zb->db_stencil_base >> 32)); /* DB_STENCIL_WRITE_BASE_HI */ 3235b8e80941Smrg 3236b8e80941Smrg radeon_set_context_reg_seq(cs, R_028068_DB_Z_INFO2, 2); 3237b8e80941Smrg radeon_emit(cs, zb->db_z_info2); /* DB_Z_INFO2 */ 3238b8e80941Smrg radeon_emit(cs, zb->db_stencil_info2); /* DB_STENCIL_INFO2 */ 3239b8e80941Smrg } else { 3240b8e80941Smrg radeon_set_context_reg(cs, R_028014_DB_HTILE_DATA_BASE, zb->db_htile_data_base); 3241b8e80941Smrg 3242b8e80941Smrg radeon_set_context_reg_seq(cs, R_02803C_DB_DEPTH_INFO, 9); 3243b8e80941Smrg radeon_emit(cs, zb->db_depth_info); /* DB_DEPTH_INFO */ 3244b8e80941Smrg radeon_emit(cs, zb->db_z_info | /* DB_Z_INFO */ 3245b8e80941Smrg S_028040_ZRANGE_PRECISION(tex->depth_clear_value != 0)); 3246b8e80941Smrg radeon_emit(cs, zb->db_stencil_info); /* DB_STENCIL_INFO */ 3247b8e80941Smrg radeon_emit(cs, zb->db_depth_base); /* DB_Z_READ_BASE */ 3248b8e80941Smrg radeon_emit(cs, zb->db_stencil_base); /* DB_STENCIL_READ_BASE */ 3249b8e80941Smrg radeon_emit(cs, zb->db_depth_base); /* DB_Z_WRITE_BASE */ 3250b8e80941Smrg radeon_emit(cs, zb->db_stencil_base); /* DB_STENCIL_WRITE_BASE */ 3251b8e80941Smrg radeon_emit(cs, zb->db_depth_size); /* DB_DEPTH_SIZE */ 3252b8e80941Smrg radeon_emit(cs, zb->db_depth_slice); /* DB_DEPTH_SLICE */ 3253b8e80941Smrg } 3254b8e80941Smrg 3255b8e80941Smrg radeon_set_context_reg_seq(cs, R_028028_DB_STENCIL_CLEAR, 2); 3256b8e80941Smrg radeon_emit(cs, tex->stencil_clear_value); /* R_028028_DB_STENCIL_CLEAR */ 3257b8e80941Smrg radeon_emit(cs, fui(tex->depth_clear_value)); /* R_02802C_DB_DEPTH_CLEAR */ 3258b8e80941Smrg 3259b8e80941Smrg radeon_set_context_reg(cs, R_028008_DB_DEPTH_VIEW, zb->db_depth_view); 3260b8e80941Smrg radeon_set_context_reg(cs, R_028ABC_DB_HTILE_SURFACE, zb->db_htile_surface); 3261b8e80941Smrg } else if (sctx->framebuffer.dirty_zsbuf) { 3262b8e80941Smrg if (sctx->chip_class >= GFX9) 3263b8e80941Smrg radeon_set_context_reg_seq(cs, R_028038_DB_Z_INFO, 2); 3264b8e80941Smrg else 3265b8e80941Smrg radeon_set_context_reg_seq(cs, R_028040_DB_Z_INFO, 2); 3266b8e80941Smrg 3267b8e80941Smrg radeon_emit(cs, S_028040_FORMAT(V_028040_Z_INVALID)); /* DB_Z_INFO */ 3268b8e80941Smrg radeon_emit(cs, S_028044_FORMAT(V_028044_STENCIL_INVALID)); /* DB_STENCIL_INFO */ 3269848b8605Smrg } 3270848b8605Smrg 3271848b8605Smrg /* Framebuffer dimensions. */ 3272848b8605Smrg /* PA_SC_WINDOW_SCISSOR_TL is set in si_init_config() */ 3273b8e80941Smrg radeon_set_context_reg(cs, R_028208_PA_SC_WINDOW_SCISSOR_BR, 3274848b8605Smrg S_028208_BR_X(state->width) | S_028208_BR_Y(state->height)); 3275848b8605Smrg 3276b8e80941Smrg if (sctx->screen->dfsm_allowed) { 3277b8e80941Smrg radeon_emit(cs, PKT3(PKT3_EVENT_WRITE, 0, 0)); 3278b8e80941Smrg radeon_emit(cs, EVENT_TYPE(V_028A90_BREAK_BATCH) | EVENT_INDEX(0)); 3279b8e80941Smrg } 3280848b8605Smrg 3281b8e80941Smrg sctx->framebuffer.dirty_cbufs = 0; 3282b8e80941Smrg sctx->framebuffer.dirty_zsbuf = false; 3283848b8605Smrg} 3284848b8605Smrg 3285b8e80941Smrgstatic void si_emit_msaa_sample_locs(struct si_context *sctx) 3286848b8605Smrg{ 3287b8e80941Smrg struct radeon_cmdbuf *cs = sctx->gfx_cs; 3288b8e80941Smrg struct si_state_rasterizer *rs = sctx->queued.named.rasterizer; 3289b8e80941Smrg unsigned nr_samples = sctx->framebuffer.nr_samples; 3290b8e80941Smrg bool has_msaa_sample_loc_bug = sctx->screen->has_msaa_sample_loc_bug; 3291b8e80941Smrg 3292b8e80941Smrg /* Smoothing (only possible with nr_samples == 1) uses the same 3293b8e80941Smrg * sample locations as the MSAA it simulates. 3294b8e80941Smrg */ 3295b8e80941Smrg if (nr_samples <= 1 && sctx->smoothing_enabled) 3296b8e80941Smrg nr_samples = SI_NUM_SMOOTH_AA_SAMPLES; 3297b8e80941Smrg 3298b8e80941Smrg /* On Polaris, the small primitive filter uses the sample locations 3299b8e80941Smrg * even when MSAA is off, so we need to make sure they're set to 0. 3300b8e80941Smrg */ 3301b8e80941Smrg if ((nr_samples >= 2 || has_msaa_sample_loc_bug) && 3302b8e80941Smrg nr_samples != sctx->sample_locs_num_samples) { 3303b8e80941Smrg sctx->sample_locs_num_samples = nr_samples; 3304b8e80941Smrg si_emit_sample_locations(cs, nr_samples); 3305b8e80941Smrg } 3306848b8605Smrg 3307b8e80941Smrg if (sctx->family >= CHIP_POLARIS10) { 3308b8e80941Smrg unsigned small_prim_filter_cntl = 3309b8e80941Smrg S_028830_SMALL_PRIM_FILTER_ENABLE(1) | 3310b8e80941Smrg /* line bug */ 3311b8e80941Smrg S_028830_LINE_FILTER_DISABLE(sctx->family <= CHIP_POLARIS12); 3312848b8605Smrg 3313b8e80941Smrg /* The alternative of setting sample locations to 0 would 3314b8e80941Smrg * require a DB flush to avoid Z errors, see 3315b8e80941Smrg * https://bugs.freedesktop.org/show_bug.cgi?id=96908 3316b8e80941Smrg */ 3317b8e80941Smrg if (has_msaa_sample_loc_bug && 3318b8e80941Smrg sctx->framebuffer.nr_samples > 1 && 3319b8e80941Smrg !rs->multisample_enable) 3320b8e80941Smrg small_prim_filter_cntl &= C_028830_SMALL_PRIM_FILTER_ENABLE; 3321b8e80941Smrg 3322b8e80941Smrg radeon_opt_set_context_reg(sctx, 3323b8e80941Smrg R_028830_PA_SU_SMALL_PRIM_FILTER_CNTL, 3324b8e80941Smrg SI_TRACKED_PA_SU_SMALL_PRIM_FILTER_CNTL, 3325b8e80941Smrg small_prim_filter_cntl); 3326b8e80941Smrg } 3327848b8605Smrg 3328b8e80941Smrg /* The exclusion bits can be set to improve rasterization efficiency 3329b8e80941Smrg * if no sample lies on the pixel boundary (-8 sample offset). 3330b8e80941Smrg */ 3331b8e80941Smrg bool exclusion = sctx->chip_class >= CIK && 3332b8e80941Smrg (!rs->multisample_enable || nr_samples != 16); 3333b8e80941Smrg radeon_opt_set_context_reg(sctx, R_02882C_PA_SU_PRIM_FILTER_CNTL, 3334b8e80941Smrg SI_TRACKED_PA_SU_PRIM_FILTER_CNTL, 3335b8e80941Smrg S_02882C_XMAX_RIGHT_EXCLUSION(exclusion) | 3336b8e80941Smrg S_02882C_YMAX_BOTTOM_EXCLUSION(exclusion)); 3337848b8605Smrg} 3338848b8605Smrg 3339b8e80941Smrgstatic bool si_out_of_order_rasterization(struct si_context *sctx) 3340848b8605Smrg{ 3341b8e80941Smrg struct si_state_blend *blend = sctx->queued.named.blend; 3342b8e80941Smrg struct si_state_dsa *dsa = sctx->queued.named.dsa; 3343848b8605Smrg 3344b8e80941Smrg if (!sctx->screen->has_out_of_order_rast) 3345b8e80941Smrg return false; 3346848b8605Smrg 3347b8e80941Smrg unsigned colormask = sctx->framebuffer.colorbuf_enabled_4bit; 3348848b8605Smrg 3349b8e80941Smrg if (blend) { 3350b8e80941Smrg colormask &= blend->cb_target_enabled_4bit; 3351b8e80941Smrg } else { 3352b8e80941Smrg colormask = 0; 3353848b8605Smrg } 3354848b8605Smrg 3355b8e80941Smrg /* Conservative: No logic op. */ 3356b8e80941Smrg if (colormask && blend->logicop_enable) 3357b8e80941Smrg return false; 3358848b8605Smrg 3359b8e80941Smrg struct si_dsa_order_invariance dsa_order_invariant = { 3360b8e80941Smrg .zs = true, .pass_set = true, .pass_last = false 3361b8e80941Smrg }; 3362848b8605Smrg 3363b8e80941Smrg if (sctx->framebuffer.state.zsbuf) { 3364b8e80941Smrg struct si_texture *zstex = 3365b8e80941Smrg (struct si_texture*)sctx->framebuffer.state.zsbuf->texture; 3366b8e80941Smrg bool has_stencil = zstex->surface.has_stencil; 3367b8e80941Smrg dsa_order_invariant = dsa->order_invariance[has_stencil]; 3368b8e80941Smrg if (!dsa_order_invariant.zs) 3369b8e80941Smrg return false; 3370b8e80941Smrg 3371b8e80941Smrg /* The set of PS invocations is always order invariant, 3372b8e80941Smrg * except when early Z/S tests are requested. */ 3373b8e80941Smrg if (sctx->ps_shader.cso && 3374b8e80941Smrg sctx->ps_shader.cso->info.writes_memory && 3375b8e80941Smrg sctx->ps_shader.cso->info.properties[TGSI_PROPERTY_FS_EARLY_DEPTH_STENCIL] && 3376b8e80941Smrg !dsa_order_invariant.pass_set) 3377b8e80941Smrg return false; 3378b8e80941Smrg 3379b8e80941Smrg if (sctx->num_perfect_occlusion_queries != 0 && 3380b8e80941Smrg !dsa_order_invariant.pass_set) 3381b8e80941Smrg return false; 3382848b8605Smrg } 3383848b8605Smrg 3384b8e80941Smrg if (!colormask) 3385b8e80941Smrg return true; 3386848b8605Smrg 3387b8e80941Smrg unsigned blendmask = colormask & blend->blend_enable_4bit; 3388848b8605Smrg 3389b8e80941Smrg if (blendmask) { 3390b8e80941Smrg /* Only commutative blending. */ 3391b8e80941Smrg if (blendmask & ~blend->commutative_4bit) 3392b8e80941Smrg return false; 3393848b8605Smrg 3394b8e80941Smrg if (!dsa_order_invariant.pass_set) 3395b8e80941Smrg return false; 3396848b8605Smrg } 3397848b8605Smrg 3398b8e80941Smrg if (colormask & ~blendmask) { 3399b8e80941Smrg if (!dsa_order_invariant.pass_last) 3400b8e80941Smrg return false; 3401848b8605Smrg } 3402848b8605Smrg 3403b8e80941Smrg return true; 3404848b8605Smrg} 3405848b8605Smrg 3406b8e80941Smrgstatic void si_emit_msaa_config(struct si_context *sctx) 3407848b8605Smrg{ 3408b8e80941Smrg struct radeon_cmdbuf *cs = sctx->gfx_cs; 3409b8e80941Smrg unsigned num_tile_pipes = sctx->screen->info.num_tile_pipes; 3410b8e80941Smrg /* 33% faster rendering to linear color buffers */ 3411b8e80941Smrg bool dst_is_linear = sctx->framebuffer.any_dst_linear; 3412b8e80941Smrg bool out_of_order_rast = si_out_of_order_rasterization(sctx); 3413b8e80941Smrg unsigned sc_mode_cntl_1 = 3414b8e80941Smrg S_028A4C_WALK_SIZE(dst_is_linear) | 3415b8e80941Smrg S_028A4C_WALK_FENCE_ENABLE(!dst_is_linear) | 3416b8e80941Smrg S_028A4C_WALK_FENCE_SIZE(num_tile_pipes == 2 ? 2 : 3) | 3417b8e80941Smrg S_028A4C_OUT_OF_ORDER_PRIMITIVE_ENABLE(out_of_order_rast) | 3418b8e80941Smrg S_028A4C_OUT_OF_ORDER_WATER_MARK(0x7) | 3419b8e80941Smrg /* always 1: */ 3420b8e80941Smrg S_028A4C_WALK_ALIGN8_PRIM_FITS_ST(1) | 3421b8e80941Smrg S_028A4C_SUPERTILE_WALK_ORDER_ENABLE(1) | 3422b8e80941Smrg S_028A4C_TILE_WALK_ORDER_ENABLE(1) | 3423b8e80941Smrg S_028A4C_MULTI_SHADER_ENGINE_PRIM_DISCARD_ENABLE(1) | 3424b8e80941Smrg S_028A4C_FORCE_EOV_CNTDWN_ENABLE(1) | 3425b8e80941Smrg S_028A4C_FORCE_EOV_REZ_ENABLE(1); 3426b8e80941Smrg unsigned db_eqaa = S_028804_HIGH_QUALITY_INTERSECTIONS(1) | 3427b8e80941Smrg S_028804_INCOHERENT_EQAA_READS(1) | 3428b8e80941Smrg S_028804_INTERPOLATE_COMP_Z(1) | 3429b8e80941Smrg S_028804_STATIC_ANCHOR_ASSOCIATIONS(1); 3430b8e80941Smrg unsigned coverage_samples, color_samples, z_samples; 3431b8e80941Smrg 3432b8e80941Smrg /* S: Coverage samples (up to 16x): 3433b8e80941Smrg * - Scan conversion samples (PA_SC_AA_CONFIG.MSAA_NUM_SAMPLES) 3434b8e80941Smrg * - CB FMASK samples (CB_COLORi_ATTRIB.NUM_SAMPLES) 3435b8e80941Smrg * 3436b8e80941Smrg * Z: Z/S samples (up to 8x, must be <= coverage samples and >= color samples): 3437b8e80941Smrg * - Value seen by DB (DB_Z_INFO.NUM_SAMPLES) 3438b8e80941Smrg * - Value seen by CB, must be correct even if Z/S is unbound (DB_EQAA.MAX_ANCHOR_SAMPLES) 3439b8e80941Smrg * # Missing samples are derived from Z planes if Z is compressed (up to 16x quality), or 3440b8e80941Smrg * # from the closest defined sample if Z is uncompressed (same quality as the number of 3441b8e80941Smrg * # Z samples). 3442b8e80941Smrg * 3443b8e80941Smrg * F: Color samples (up to 8x, must be <= coverage samples): 3444b8e80941Smrg * - CB color samples (CB_COLORi_ATTRIB.NUM_FRAGMENTS) 3445b8e80941Smrg * - PS iter samples (DB_EQAA.PS_ITER_SAMPLES) 3446b8e80941Smrg * 3447b8e80941Smrg * Can be anything between coverage and color samples: 3448b8e80941Smrg * - SampleMaskIn samples (PA_SC_AA_CONFIG.MSAA_EXPOSED_SAMPLES) 3449b8e80941Smrg * - SampleMaskOut samples (DB_EQAA.MASK_EXPORT_NUM_SAMPLES) 3450b8e80941Smrg * - Alpha-to-coverage samples (DB_EQAA.ALPHA_TO_MASK_NUM_SAMPLES) 3451b8e80941Smrg * - Occlusion query samples (DB_COUNT_CONTROL.SAMPLE_RATE) 3452b8e80941Smrg * # All are currently set the same as coverage samples. 3453b8e80941Smrg * 3454b8e80941Smrg * If color samples < coverage samples, FMASK has a higher bpp to store an "unknown" 3455b8e80941Smrg * flag for undefined color samples. A shader-based resolve must handle unknowns 3456b8e80941Smrg * or mask them out with AND. Unknowns can also be guessed from neighbors via 3457b8e80941Smrg * an edge-detect shader-based resolve, which is required to make "color samples = 1" 3458b8e80941Smrg * useful. The CB resolve always drops unknowns. 3459b8e80941Smrg * 3460b8e80941Smrg * Sensible AA configurations: 3461b8e80941Smrg * EQAA 16s 8z 8f - might look the same as 16x MSAA if Z is compressed 3462b8e80941Smrg * EQAA 16s 8z 4f - might look the same as 16x MSAA if Z is compressed 3463b8e80941Smrg * EQAA 16s 4z 4f - might look the same as 16x MSAA if Z is compressed 3464b8e80941Smrg * EQAA 8s 8z 8f = 8x MSAA 3465b8e80941Smrg * EQAA 8s 8z 4f - might look the same as 8x MSAA 3466b8e80941Smrg * EQAA 8s 8z 2f - might look the same as 8x MSAA with low-density geometry 3467b8e80941Smrg * EQAA 8s 4z 4f - might look the same as 8x MSAA if Z is compressed 3468b8e80941Smrg * EQAA 8s 4z 2f - might look the same as 8x MSAA with low-density geometry if Z is compressed 3469b8e80941Smrg * EQAA 4s 4z 4f = 4x MSAA 3470b8e80941Smrg * EQAA 4s 4z 2f - might look the same as 4x MSAA with low-density geometry 3471b8e80941Smrg * EQAA 2s 2z 2f = 2x MSAA 3472b8e80941Smrg */ 3473b8e80941Smrg if (sctx->framebuffer.nr_samples > 1) { 3474b8e80941Smrg coverage_samples = sctx->framebuffer.nr_samples; 3475b8e80941Smrg color_samples = sctx->framebuffer.nr_color_samples; 3476b8e80941Smrg 3477b8e80941Smrg if (sctx->framebuffer.state.zsbuf) { 3478b8e80941Smrg z_samples = sctx->framebuffer.state.zsbuf->texture->nr_samples; 3479b8e80941Smrg z_samples = MAX2(1, z_samples); 3480b8e80941Smrg } else { 3481b8e80941Smrg z_samples = coverage_samples; 3482b8e80941Smrg } 3483b8e80941Smrg } else if (sctx->smoothing_enabled) { 3484b8e80941Smrg coverage_samples = color_samples = z_samples = SI_NUM_SMOOTH_AA_SAMPLES; 3485b8e80941Smrg } else { 3486b8e80941Smrg coverage_samples = color_samples = z_samples = 1; 3487b8e80941Smrg } 3488848b8605Smrg 3489b8e80941Smrg /* Required by OpenGL line rasterization. 3490b8e80941Smrg * 3491b8e80941Smrg * TODO: We should also enable perpendicular endcaps for AA lines, 3492b8e80941Smrg * but that requires implementing line stippling in the pixel 3493b8e80941Smrg * shader. SC can only do line stippling with axis-aligned 3494b8e80941Smrg * endcaps. 3495b8e80941Smrg */ 3496b8e80941Smrg unsigned sc_line_cntl = S_028BDC_DX10_DIAMOND_TEST_ENA(1); 3497b8e80941Smrg unsigned sc_aa_config = 0; 3498b8e80941Smrg 3499b8e80941Smrg if (coverage_samples > 1) { 3500b8e80941Smrg /* distance from the pixel center, indexed by log2(nr_samples) */ 3501b8e80941Smrg static unsigned max_dist[] = { 3502b8e80941Smrg 0, /* unused */ 3503b8e80941Smrg 4, /* 2x MSAA */ 3504b8e80941Smrg 6, /* 4x MSAA */ 3505b8e80941Smrg 7, /* 8x MSAA */ 3506b8e80941Smrg 8, /* 16x MSAA */ 3507b8e80941Smrg }; 3508b8e80941Smrg unsigned log_samples = util_logbase2(coverage_samples); 3509b8e80941Smrg unsigned log_z_samples = util_logbase2(z_samples); 3510b8e80941Smrg unsigned ps_iter_samples = si_get_ps_iter_samples(sctx); 3511b8e80941Smrg unsigned log_ps_iter_samples = util_logbase2(ps_iter_samples); 3512b8e80941Smrg 3513b8e80941Smrg sc_line_cntl |= S_028BDC_EXPAND_LINE_WIDTH(1); 3514b8e80941Smrg sc_aa_config = S_028BE0_MSAA_NUM_SAMPLES(log_samples) | 3515b8e80941Smrg S_028BE0_MAX_SAMPLE_DIST(max_dist[log_samples]) | 3516b8e80941Smrg S_028BE0_MSAA_EXPOSED_SAMPLES(log_samples); 3517b8e80941Smrg 3518b8e80941Smrg if (sctx->framebuffer.nr_samples > 1) { 3519b8e80941Smrg db_eqaa |= S_028804_MAX_ANCHOR_SAMPLES(log_z_samples) | 3520b8e80941Smrg S_028804_PS_ITER_SAMPLES(log_ps_iter_samples) | 3521b8e80941Smrg S_028804_MASK_EXPORT_NUM_SAMPLES(log_samples) | 3522b8e80941Smrg S_028804_ALPHA_TO_MASK_NUM_SAMPLES(log_samples); 3523b8e80941Smrg sc_mode_cntl_1 |= S_028A4C_PS_ITER_SAMPLE(ps_iter_samples > 1); 3524b8e80941Smrg } else if (sctx->smoothing_enabled) { 3525b8e80941Smrg db_eqaa |= S_028804_OVERRASTERIZATION_AMOUNT(log_samples); 3526b8e80941Smrg } 3527b8e80941Smrg } 3528848b8605Smrg 3529b8e80941Smrg unsigned initial_cdw = cs->current.cdw; 3530b8e80941Smrg 3531b8e80941Smrg /* R_028BDC_PA_SC_LINE_CNTL, R_028BE0_PA_SC_AA_CONFIG */ 3532b8e80941Smrg radeon_opt_set_context_reg2(sctx, R_028BDC_PA_SC_LINE_CNTL, 3533b8e80941Smrg SI_TRACKED_PA_SC_LINE_CNTL, sc_line_cntl, 3534b8e80941Smrg sc_aa_config); 3535b8e80941Smrg /* R_028804_DB_EQAA */ 3536b8e80941Smrg radeon_opt_set_context_reg(sctx, R_028804_DB_EQAA, SI_TRACKED_DB_EQAA, 3537b8e80941Smrg db_eqaa); 3538b8e80941Smrg /* R_028A4C_PA_SC_MODE_CNTL_1 */ 3539b8e80941Smrg radeon_opt_set_context_reg(sctx, R_028A4C_PA_SC_MODE_CNTL_1, 3540b8e80941Smrg SI_TRACKED_PA_SC_MODE_CNTL_1, sc_mode_cntl_1); 3541b8e80941Smrg 3542b8e80941Smrg if (initial_cdw != cs->current.cdw) { 3543b8e80941Smrg sctx->context_roll = true; 3544b8e80941Smrg 3545b8e80941Smrg /* GFX9: Flush DFSM when the AA mode changes. */ 3546b8e80941Smrg if (sctx->screen->dfsm_allowed) { 3547b8e80941Smrg radeon_emit(cs, PKT3(PKT3_EVENT_WRITE, 0, 0)); 3548b8e80941Smrg radeon_emit(cs, EVENT_TYPE(V_028A90_FLUSH_DFSM) | EVENT_INDEX(0)); 3549b8e80941Smrg } 3550b8e80941Smrg } 3551848b8605Smrg} 3552848b8605Smrg 3553b8e80941Smrgvoid si_update_ps_iter_samples(struct si_context *sctx) 3554848b8605Smrg{ 3555b8e80941Smrg if (sctx->framebuffer.nr_samples > 1) 3556b8e80941Smrg si_mark_atom_dirty(sctx, &sctx->atoms.s.msaa_config); 3557b8e80941Smrg if (sctx->screen->dpbb_allowed) 3558b8e80941Smrg si_mark_atom_dirty(sctx, &sctx->atoms.s.dpbb_state); 3559848b8605Smrg} 3560848b8605Smrg 3561b8e80941Smrgstatic void si_set_min_samples(struct pipe_context *ctx, unsigned min_samples) 3562848b8605Smrg{ 3563848b8605Smrg struct si_context *sctx = (struct si_context *)ctx; 3564848b8605Smrg 3565b8e80941Smrg /* The hardware can only do sample shading with 2^n samples. */ 3566b8e80941Smrg min_samples = util_next_power_of_two(min_samples); 3567848b8605Smrg 3568b8e80941Smrg if (sctx->ps_iter_samples == min_samples) 3569848b8605Smrg return; 3570848b8605Smrg 3571b8e80941Smrg sctx->ps_iter_samples = min_samples; 3572b8e80941Smrg sctx->do_update_shaders = true; 3573848b8605Smrg 3574b8e80941Smrg si_update_ps_iter_samples(sctx); 3575848b8605Smrg} 3576848b8605Smrg 3577b8e80941Smrg/* 3578b8e80941Smrg * Samplers 3579b8e80941Smrg */ 3580848b8605Smrg 3581b8e80941Smrg/** 3582b8e80941Smrg * Build the sampler view descriptor for a buffer texture. 3583b8e80941Smrg * @param state 256-bit descriptor; only the high 128 bits are filled in 3584b8e80941Smrg */ 3585b8e80941Smrgvoid 3586b8e80941Smrgsi_make_buffer_descriptor(struct si_screen *screen, struct si_resource *buf, 3587b8e80941Smrg enum pipe_format format, 3588b8e80941Smrg unsigned offset, unsigned size, 3589b8e80941Smrg uint32_t *state) 3590848b8605Smrg{ 3591b8e80941Smrg const struct util_format_description *desc; 3592b8e80941Smrg int first_non_void; 3593b8e80941Smrg unsigned stride; 3594b8e80941Smrg unsigned num_records; 3595b8e80941Smrg unsigned num_format, data_format; 3596848b8605Smrg 3597b8e80941Smrg desc = util_format_description(format); 3598b8e80941Smrg first_non_void = util_format_get_first_non_void_channel(format); 3599b8e80941Smrg stride = desc->block.bits / 8; 3600b8e80941Smrg num_format = si_translate_buffer_numformat(&screen->b, desc, first_non_void); 3601b8e80941Smrg data_format = si_translate_buffer_dataformat(&screen->b, desc, first_non_void); 3602b8e80941Smrg 3603b8e80941Smrg num_records = size / stride; 3604b8e80941Smrg num_records = MIN2(num_records, (buf->b.b.width0 - offset) / stride); 3605b8e80941Smrg 3606b8e80941Smrg /* The NUM_RECORDS field has a different meaning depending on the chip, 3607b8e80941Smrg * instruction type, STRIDE, and SWIZZLE_ENABLE. 3608b8e80941Smrg * 3609b8e80941Smrg * SI-CIK: 3610b8e80941Smrg * - If STRIDE == 0, it's in byte units. 3611b8e80941Smrg * - If STRIDE != 0, it's in units of STRIDE, used with inst.IDXEN. 3612b8e80941Smrg * 3613b8e80941Smrg * VI: 3614b8e80941Smrg * - For SMEM and STRIDE == 0, it's in byte units. 3615b8e80941Smrg * - For SMEM and STRIDE != 0, it's in units of STRIDE. 3616b8e80941Smrg * - For VMEM and STRIDE == 0 or SWIZZLE_ENABLE == 0, it's in byte units. 3617b8e80941Smrg * - For VMEM and STRIDE != 0 and SWIZZLE_ENABLE == 1, it's in units of STRIDE. 3618b8e80941Smrg * NOTE: There is incompatibility between VMEM and SMEM opcodes due to SWIZZLE_- 3619b8e80941Smrg * ENABLE. The workaround is to set STRIDE = 0 if SWIZZLE_ENABLE == 0 when 3620b8e80941Smrg * using SMEM. This can be done in the shader by clearing STRIDE with s_and. 3621b8e80941Smrg * That way the same descriptor can be used by both SMEM and VMEM. 3622b8e80941Smrg * 3623b8e80941Smrg * GFX9: 3624b8e80941Smrg * - For SMEM and STRIDE == 0, it's in byte units. 3625b8e80941Smrg * - For SMEM and STRIDE != 0, it's in units of STRIDE. 3626b8e80941Smrg * - For VMEM and inst.IDXEN == 0 or STRIDE == 0, it's in byte units. 3627b8e80941Smrg * - For VMEM and inst.IDXEN == 1 and STRIDE != 0, it's in units of STRIDE. 3628b8e80941Smrg */ 3629b8e80941Smrg if (screen->info.chip_class >= GFX9 && HAVE_LLVM < 0x0800) 3630b8e80941Smrg /* When vindex == 0, LLVM < 8.0 sets IDXEN = 0, thus changing units 3631b8e80941Smrg * from STRIDE to bytes. This works around it by setting 3632b8e80941Smrg * NUM_RECORDS to at least the size of one element, so that 3633b8e80941Smrg * the first element is readable when IDXEN == 0. 3634b8e80941Smrg */ 3635b8e80941Smrg num_records = num_records ? MAX2(num_records, stride) : 0; 3636b8e80941Smrg else if (screen->info.chip_class == VI) 3637b8e80941Smrg num_records *= stride; 3638b8e80941Smrg 3639b8e80941Smrg state[4] = 0; 3640b8e80941Smrg state[5] = S_008F04_STRIDE(stride); 3641b8e80941Smrg state[6] = num_records; 3642b8e80941Smrg state[7] = S_008F0C_DST_SEL_X(si_map_swizzle(desc->swizzle[0])) | 3643b8e80941Smrg S_008F0C_DST_SEL_Y(si_map_swizzle(desc->swizzle[1])) | 3644b8e80941Smrg S_008F0C_DST_SEL_Z(si_map_swizzle(desc->swizzle[2])) | 3645b8e80941Smrg S_008F0C_DST_SEL_W(si_map_swizzle(desc->swizzle[3])) | 3646b8e80941Smrg S_008F0C_NUM_FORMAT(num_format) | 3647b8e80941Smrg S_008F0C_DATA_FORMAT(data_format); 3648848b8605Smrg} 3649848b8605Smrg 3650b8e80941Smrgstatic unsigned gfx9_border_color_swizzle(const unsigned char swizzle[4]) 3651848b8605Smrg{ 3652b8e80941Smrg unsigned bc_swizzle = V_008F20_BC_SWIZZLE_XYZW; 3653b8e80941Smrg 3654b8e80941Smrg if (swizzle[3] == PIPE_SWIZZLE_X) { 3655b8e80941Smrg /* For the pre-defined border color values (white, opaque 3656b8e80941Smrg * black, transparent black), the only thing that matters is 3657b8e80941Smrg * that the alpha channel winds up in the correct place 3658b8e80941Smrg * (because the RGB channels are all the same) so either of 3659b8e80941Smrg * these enumerations will work. 3660b8e80941Smrg */ 3661b8e80941Smrg if (swizzle[2] == PIPE_SWIZZLE_Y) 3662b8e80941Smrg bc_swizzle = V_008F20_BC_SWIZZLE_WZYX; 3663b8e80941Smrg else 3664b8e80941Smrg bc_swizzle = V_008F20_BC_SWIZZLE_WXYZ; 3665b8e80941Smrg } else if (swizzle[0] == PIPE_SWIZZLE_X) { 3666b8e80941Smrg if (swizzle[1] == PIPE_SWIZZLE_Y) 3667b8e80941Smrg bc_swizzle = V_008F20_BC_SWIZZLE_XYZW; 3668b8e80941Smrg else 3669b8e80941Smrg bc_swizzle = V_008F20_BC_SWIZZLE_XWYZ; 3670b8e80941Smrg } else if (swizzle[1] == PIPE_SWIZZLE_X) { 3671b8e80941Smrg bc_swizzle = V_008F20_BC_SWIZZLE_YXWZ; 3672b8e80941Smrg } else if (swizzle[2] == PIPE_SWIZZLE_X) { 3673b8e80941Smrg bc_swizzle = V_008F20_BC_SWIZZLE_ZYXW; 3674848b8605Smrg } 3675848b8605Smrg 3676b8e80941Smrg return bc_swizzle; 3677848b8605Smrg} 3678848b8605Smrg 3679b8e80941Smrg/** 3680b8e80941Smrg * Build the sampler view descriptor for a texture. 3681848b8605Smrg */ 3682b8e80941Smrgvoid 3683b8e80941Smrgsi_make_texture_descriptor(struct si_screen *screen, 3684b8e80941Smrg struct si_texture *tex, 3685b8e80941Smrg bool sampler, 3686b8e80941Smrg enum pipe_texture_target target, 3687b8e80941Smrg enum pipe_format pipe_format, 3688b8e80941Smrg const unsigned char state_swizzle[4], 3689b8e80941Smrg unsigned first_level, unsigned last_level, 3690b8e80941Smrg unsigned first_layer, unsigned last_layer, 3691b8e80941Smrg unsigned width, unsigned height, unsigned depth, 3692b8e80941Smrg uint32_t *state, 3693b8e80941Smrg uint32_t *fmask_state) 3694848b8605Smrg{ 3695b8e80941Smrg struct pipe_resource *res = &tex->buffer.b.b; 3696848b8605Smrg const struct util_format_description *desc; 3697b8e80941Smrg unsigned char swizzle[4]; 3698848b8605Smrg int first_non_void; 3699b8e80941Smrg unsigned num_format, data_format, type, num_samples; 3700848b8605Smrg uint64_t va; 3701848b8605Smrg 3702848b8605Smrg desc = util_format_description(pipe_format); 3703848b8605Smrg 3704b8e80941Smrg num_samples = desc->colorspace == UTIL_FORMAT_COLORSPACE_ZS ? 3705b8e80941Smrg MAX2(1, res->nr_samples) : 3706b8e80941Smrg MAX2(1, res->nr_storage_samples); 3707b8e80941Smrg 3708848b8605Smrg if (desc->colorspace == UTIL_FORMAT_COLORSPACE_ZS) { 3709848b8605Smrg const unsigned char swizzle_xxxx[4] = {0, 0, 0, 0}; 3710848b8605Smrg const unsigned char swizzle_yyyy[4] = {1, 1, 1, 1}; 3711b8e80941Smrg const unsigned char swizzle_wwww[4] = {3, 3, 3, 3}; 3712848b8605Smrg 3713848b8605Smrg switch (pipe_format) { 3714848b8605Smrg case PIPE_FORMAT_S8_UINT_Z24_UNORM: 3715848b8605Smrg case PIPE_FORMAT_X32_S8X24_UINT: 3716848b8605Smrg case PIPE_FORMAT_X8Z24_UNORM: 3717848b8605Smrg util_format_compose_swizzles(swizzle_yyyy, state_swizzle, swizzle); 3718848b8605Smrg break; 3719b8e80941Smrg case PIPE_FORMAT_X24S8_UINT: 3720b8e80941Smrg /* 3721b8e80941Smrg * X24S8 is implemented as an 8_8_8_8 data format, to 3722b8e80941Smrg * fix texture gathers. This affects at least 3723b8e80941Smrg * GL45-CTS.texture_cube_map_array.sampling on VI. 3724b8e80941Smrg */ 3725b8e80941Smrg if (screen->info.chip_class <= VI) 3726b8e80941Smrg util_format_compose_swizzles(swizzle_wwww, state_swizzle, swizzle); 3727b8e80941Smrg else 3728b8e80941Smrg util_format_compose_swizzles(swizzle_yyyy, state_swizzle, swizzle); 3729b8e80941Smrg break; 3730848b8605Smrg default: 3731848b8605Smrg util_format_compose_swizzles(swizzle_xxxx, state_swizzle, swizzle); 3732848b8605Smrg } 3733848b8605Smrg } else { 3734848b8605Smrg util_format_compose_swizzles(desc->swizzle, state_swizzle, swizzle); 3735848b8605Smrg } 3736848b8605Smrg 3737848b8605Smrg first_non_void = util_format_get_first_non_void_channel(pipe_format); 3738848b8605Smrg 3739848b8605Smrg switch (pipe_format) { 3740848b8605Smrg case PIPE_FORMAT_S8_UINT_Z24_UNORM: 3741848b8605Smrg num_format = V_008F14_IMG_NUM_FORMAT_UNORM; 3742848b8605Smrg break; 3743848b8605Smrg default: 3744848b8605Smrg if (first_non_void < 0) { 3745848b8605Smrg if (util_format_is_compressed(pipe_format)) { 3746848b8605Smrg switch (pipe_format) { 3747848b8605Smrg case PIPE_FORMAT_DXT1_SRGB: 3748848b8605Smrg case PIPE_FORMAT_DXT1_SRGBA: 3749848b8605Smrg case PIPE_FORMAT_DXT3_SRGBA: 3750848b8605Smrg case PIPE_FORMAT_DXT5_SRGBA: 3751848b8605Smrg case PIPE_FORMAT_BPTC_SRGBA: 3752b8e80941Smrg case PIPE_FORMAT_ETC2_SRGB8: 3753b8e80941Smrg case PIPE_FORMAT_ETC2_SRGB8A1: 3754b8e80941Smrg case PIPE_FORMAT_ETC2_SRGBA8: 3755848b8605Smrg num_format = V_008F14_IMG_NUM_FORMAT_SRGB; 3756848b8605Smrg break; 3757848b8605Smrg case PIPE_FORMAT_RGTC1_SNORM: 3758848b8605Smrg case PIPE_FORMAT_LATC1_SNORM: 3759848b8605Smrg case PIPE_FORMAT_RGTC2_SNORM: 3760848b8605Smrg case PIPE_FORMAT_LATC2_SNORM: 3761b8e80941Smrg case PIPE_FORMAT_ETC2_R11_SNORM: 3762b8e80941Smrg case PIPE_FORMAT_ETC2_RG11_SNORM: 3763848b8605Smrg /* implies float, so use SNORM/UNORM to determine 3764848b8605Smrg whether data is signed or not */ 3765848b8605Smrg case PIPE_FORMAT_BPTC_RGB_FLOAT: 3766848b8605Smrg num_format = V_008F14_IMG_NUM_FORMAT_SNORM; 3767848b8605Smrg break; 3768848b8605Smrg default: 3769848b8605Smrg num_format = V_008F14_IMG_NUM_FORMAT_UNORM; 3770848b8605Smrg break; 3771848b8605Smrg } 3772848b8605Smrg } else if (desc->layout == UTIL_FORMAT_LAYOUT_SUBSAMPLED) { 3773848b8605Smrg num_format = V_008F14_IMG_NUM_FORMAT_UNORM; 3774848b8605Smrg } else { 3775848b8605Smrg num_format = V_008F14_IMG_NUM_FORMAT_FLOAT; 3776848b8605Smrg } 3777848b8605Smrg } else if (desc->colorspace == UTIL_FORMAT_COLORSPACE_SRGB) { 3778848b8605Smrg num_format = V_008F14_IMG_NUM_FORMAT_SRGB; 3779848b8605Smrg } else { 3780848b8605Smrg num_format = V_008F14_IMG_NUM_FORMAT_UNORM; 3781848b8605Smrg 3782848b8605Smrg switch (desc->channel[first_non_void].type) { 3783848b8605Smrg case UTIL_FORMAT_TYPE_FLOAT: 3784848b8605Smrg num_format = V_008F14_IMG_NUM_FORMAT_FLOAT; 3785848b8605Smrg break; 3786848b8605Smrg case UTIL_FORMAT_TYPE_SIGNED: 3787848b8605Smrg if (desc->channel[first_non_void].normalized) 3788848b8605Smrg num_format = V_008F14_IMG_NUM_FORMAT_SNORM; 3789848b8605Smrg else if (desc->channel[first_non_void].pure_integer) 3790848b8605Smrg num_format = V_008F14_IMG_NUM_FORMAT_SINT; 3791848b8605Smrg else 3792848b8605Smrg num_format = V_008F14_IMG_NUM_FORMAT_SSCALED; 3793848b8605Smrg break; 3794b8e80941Smrg case UTIL_FORMAT_TYPE_UNSIGNED: 3795b8e80941Smrg if (desc->channel[first_non_void].normalized) 3796b8e80941Smrg num_format = V_008F14_IMG_NUM_FORMAT_UNORM; 3797b8e80941Smrg else if (desc->channel[first_non_void].pure_integer) 3798b8e80941Smrg num_format = V_008F14_IMG_NUM_FORMAT_UINT; 3799b8e80941Smrg else 3800b8e80941Smrg num_format = V_008F14_IMG_NUM_FORMAT_USCALED; 3801b8e80941Smrg } 3802b8e80941Smrg } 3803b8e80941Smrg } 3804b8e80941Smrg 3805b8e80941Smrg data_format = si_translate_texformat(&screen->b, pipe_format, desc, first_non_void); 3806b8e80941Smrg if (data_format == ~0) { 3807b8e80941Smrg data_format = 0; 3808b8e80941Smrg } 3809b8e80941Smrg 3810b8e80941Smrg /* S8 with Z32 HTILE needs a special format. */ 3811b8e80941Smrg if (screen->info.chip_class >= GFX9 && 3812b8e80941Smrg pipe_format == PIPE_FORMAT_S8_UINT && 3813b8e80941Smrg tex->tc_compatible_htile) 3814b8e80941Smrg data_format = V_008F14_IMG_DATA_FORMAT_S8_32; 3815b8e80941Smrg 3816b8e80941Smrg if (!sampler && 3817b8e80941Smrg (res->target == PIPE_TEXTURE_CUBE || 3818b8e80941Smrg res->target == PIPE_TEXTURE_CUBE_ARRAY || 3819b8e80941Smrg (screen->info.chip_class <= VI && 3820b8e80941Smrg res->target == PIPE_TEXTURE_3D))) { 3821b8e80941Smrg /* For the purpose of shader images, treat cube maps and 3D 3822b8e80941Smrg * textures as 2D arrays. For 3D textures, the address 3823b8e80941Smrg * calculations for mipmaps are different, so we rely on the 3824b8e80941Smrg * caller to effectively disable mipmaps. 3825b8e80941Smrg */ 3826b8e80941Smrg type = V_008F1C_SQ_RSRC_IMG_2D_ARRAY; 3827b8e80941Smrg 3828b8e80941Smrg assert(res->target != PIPE_TEXTURE_3D || (first_level == 0 && last_level == 0)); 3829b8e80941Smrg } else { 3830b8e80941Smrg type = si_tex_dim(screen, tex, target, num_samples); 3831b8e80941Smrg } 3832b8e80941Smrg 3833b8e80941Smrg if (type == V_008F1C_SQ_RSRC_IMG_1D_ARRAY) { 3834b8e80941Smrg height = 1; 3835b8e80941Smrg depth = res->array_size; 3836b8e80941Smrg } else if (type == V_008F1C_SQ_RSRC_IMG_2D_ARRAY || 3837b8e80941Smrg type == V_008F1C_SQ_RSRC_IMG_2D_MSAA_ARRAY) { 3838b8e80941Smrg if (sampler || res->target != PIPE_TEXTURE_3D) 3839b8e80941Smrg depth = res->array_size; 3840b8e80941Smrg } else if (type == V_008F1C_SQ_RSRC_IMG_CUBE) 3841b8e80941Smrg depth = res->array_size / 6; 3842b8e80941Smrg 3843b8e80941Smrg state[0] = 0; 3844b8e80941Smrg state[1] = (S_008F14_DATA_FORMAT_GFX6(data_format) | 3845b8e80941Smrg S_008F14_NUM_FORMAT_GFX6(num_format)); 3846b8e80941Smrg state[2] = (S_008F18_WIDTH(width - 1) | 3847b8e80941Smrg S_008F18_HEIGHT(height - 1) | 3848b8e80941Smrg S_008F18_PERF_MOD(4)); 3849b8e80941Smrg state[3] = (S_008F1C_DST_SEL_X(si_map_swizzle(swizzle[0])) | 3850b8e80941Smrg S_008F1C_DST_SEL_Y(si_map_swizzle(swizzle[1])) | 3851b8e80941Smrg S_008F1C_DST_SEL_Z(si_map_swizzle(swizzle[2])) | 3852b8e80941Smrg S_008F1C_DST_SEL_W(si_map_swizzle(swizzle[3])) | 3853b8e80941Smrg S_008F1C_BASE_LEVEL(num_samples > 1 ? 0 : first_level) | 3854b8e80941Smrg S_008F1C_LAST_LEVEL(num_samples > 1 ? 3855b8e80941Smrg util_logbase2(num_samples) : 3856b8e80941Smrg last_level) | 3857b8e80941Smrg S_008F1C_TYPE(type)); 3858b8e80941Smrg state[4] = 0; 3859b8e80941Smrg state[5] = S_008F24_BASE_ARRAY(first_layer); 3860b8e80941Smrg state[6] = 0; 3861b8e80941Smrg state[7] = 0; 3862b8e80941Smrg 3863b8e80941Smrg if (screen->info.chip_class >= GFX9) { 3864b8e80941Smrg unsigned bc_swizzle = gfx9_border_color_swizzle(desc->swizzle); 3865b8e80941Smrg 3866b8e80941Smrg /* Depth is the the last accessible layer on Gfx9. 3867b8e80941Smrg * The hw doesn't need to know the total number of layers. 3868b8e80941Smrg */ 3869b8e80941Smrg if (type == V_008F1C_SQ_RSRC_IMG_3D) 3870b8e80941Smrg state[4] |= S_008F20_DEPTH(depth - 1); 3871b8e80941Smrg else 3872b8e80941Smrg state[4] |= S_008F20_DEPTH(last_layer); 3873b8e80941Smrg 3874b8e80941Smrg state[4] |= S_008F20_BC_SWIZZLE(bc_swizzle); 3875b8e80941Smrg state[5] |= S_008F24_MAX_MIP(num_samples > 1 ? 3876b8e80941Smrg util_logbase2(num_samples) : 3877b8e80941Smrg tex->buffer.b.b.last_level); 3878b8e80941Smrg } else { 3879b8e80941Smrg state[3] |= S_008F1C_POW2_PAD(res->last_level > 0); 3880b8e80941Smrg state[4] |= S_008F20_DEPTH(depth - 1); 3881b8e80941Smrg state[5] |= S_008F24_LAST_ARRAY(last_layer); 3882b8e80941Smrg } 3883b8e80941Smrg 3884b8e80941Smrg if (tex->dcc_offset) { 3885b8e80941Smrg state[6] = S_008F28_ALPHA_IS_ON_MSB(vi_alpha_is_on_msb(pipe_format)); 3886b8e80941Smrg } else { 3887b8e80941Smrg /* The last dword is unused by hw. The shader uses it to clear 3888b8e80941Smrg * bits in the first dword of sampler state. 3889b8e80941Smrg */ 3890b8e80941Smrg if (screen->info.chip_class <= CIK && res->nr_samples <= 1) { 3891b8e80941Smrg if (first_level == last_level) 3892b8e80941Smrg state[7] = C_008F30_MAX_ANISO_RATIO; 3893b8e80941Smrg else 3894b8e80941Smrg state[7] = 0xffffffff; 3895b8e80941Smrg } 3896b8e80941Smrg } 3897b8e80941Smrg 3898b8e80941Smrg /* Initialize the sampler view for FMASK. */ 3899b8e80941Smrg if (tex->surface.fmask_size) { 3900b8e80941Smrg uint32_t data_format, num_format; 3901b8e80941Smrg 3902b8e80941Smrg va = tex->buffer.gpu_address + tex->fmask_offset; 3903b8e80941Smrg 3904b8e80941Smrg#define FMASK(s,f) (((unsigned)(MAX2(1, s)) * 16) + (MAX2(1, f))) 3905b8e80941Smrg if (screen->info.chip_class >= GFX9) { 3906b8e80941Smrg data_format = V_008F14_IMG_DATA_FORMAT_FMASK; 3907b8e80941Smrg switch (FMASK(res->nr_samples, res->nr_storage_samples)) { 3908b8e80941Smrg case FMASK(2,1): 3909b8e80941Smrg num_format = V_008F14_IMG_FMASK_8_2_1; 3910b8e80941Smrg break; 3911b8e80941Smrg case FMASK(2,2): 3912b8e80941Smrg num_format = V_008F14_IMG_FMASK_8_2_2; 3913b8e80941Smrg break; 3914b8e80941Smrg case FMASK(4,1): 3915b8e80941Smrg num_format = V_008F14_IMG_FMASK_8_4_1; 3916b8e80941Smrg break; 3917b8e80941Smrg case FMASK(4,2): 3918b8e80941Smrg num_format = V_008F14_IMG_FMASK_8_4_2; 3919b8e80941Smrg break; 3920b8e80941Smrg case FMASK(4,4): 3921b8e80941Smrg num_format = V_008F14_IMG_FMASK_8_4_4; 3922b8e80941Smrg break; 3923b8e80941Smrg case FMASK(8,1): 3924b8e80941Smrg num_format = V_008F14_IMG_FMASK_8_8_1; 3925b8e80941Smrg break; 3926b8e80941Smrg case FMASK(8,2): 3927b8e80941Smrg num_format = V_008F14_IMG_FMASK_16_8_2; 3928b8e80941Smrg break; 3929b8e80941Smrg case FMASK(8,4): 3930b8e80941Smrg num_format = V_008F14_IMG_FMASK_32_8_4; 3931b8e80941Smrg break; 3932b8e80941Smrg case FMASK(8,8): 3933b8e80941Smrg num_format = V_008F14_IMG_FMASK_32_8_8; 3934b8e80941Smrg break; 3935b8e80941Smrg case FMASK(16,1): 3936b8e80941Smrg num_format = V_008F14_IMG_FMASK_16_16_1; 3937b8e80941Smrg break; 3938b8e80941Smrg case FMASK(16,2): 3939b8e80941Smrg num_format = V_008F14_IMG_FMASK_32_16_2; 3940b8e80941Smrg break; 3941b8e80941Smrg case FMASK(16,4): 3942b8e80941Smrg num_format = V_008F14_IMG_FMASK_64_16_4; 3943b8e80941Smrg break; 3944b8e80941Smrg case FMASK(16,8): 3945b8e80941Smrg num_format = V_008F14_IMG_FMASK_64_16_8; 3946b8e80941Smrg break; 3947b8e80941Smrg default: 3948b8e80941Smrg unreachable("invalid nr_samples"); 3949b8e80941Smrg } 3950b8e80941Smrg } else { 3951b8e80941Smrg switch (FMASK(res->nr_samples, res->nr_storage_samples)) { 3952b8e80941Smrg case FMASK(2,1): 3953b8e80941Smrg data_format = V_008F14_IMG_DATA_FORMAT_FMASK8_S2_F1; 3954b8e80941Smrg break; 3955b8e80941Smrg case FMASK(2,2): 3956b8e80941Smrg data_format = V_008F14_IMG_DATA_FORMAT_FMASK8_S2_F2; 3957b8e80941Smrg break; 3958b8e80941Smrg case FMASK(4,1): 3959b8e80941Smrg data_format = V_008F14_IMG_DATA_FORMAT_FMASK8_S4_F1; 3960b8e80941Smrg break; 3961b8e80941Smrg case FMASK(4,2): 3962b8e80941Smrg data_format = V_008F14_IMG_DATA_FORMAT_FMASK8_S4_F2; 3963b8e80941Smrg break; 3964b8e80941Smrg case FMASK(4,4): 3965b8e80941Smrg data_format = V_008F14_IMG_DATA_FORMAT_FMASK8_S4_F4; 3966b8e80941Smrg break; 3967b8e80941Smrg case FMASK(8,1): 3968b8e80941Smrg data_format = V_008F14_IMG_DATA_FORMAT_FMASK8_S8_F1; 3969b8e80941Smrg break; 3970b8e80941Smrg case FMASK(8,2): 3971b8e80941Smrg data_format = V_008F14_IMG_DATA_FORMAT_FMASK16_S8_F2; 3972b8e80941Smrg break; 3973b8e80941Smrg case FMASK(8,4): 3974b8e80941Smrg data_format = V_008F14_IMG_DATA_FORMAT_FMASK32_S8_F4; 3975b8e80941Smrg break; 3976b8e80941Smrg case FMASK(8,8): 3977b8e80941Smrg data_format = V_008F14_IMG_DATA_FORMAT_FMASK32_S8_F8; 3978b8e80941Smrg break; 3979b8e80941Smrg case FMASK(16,1): 3980b8e80941Smrg data_format = V_008F14_IMG_DATA_FORMAT_FMASK16_S16_F1; 3981b8e80941Smrg break; 3982b8e80941Smrg case FMASK(16,2): 3983b8e80941Smrg data_format = V_008F14_IMG_DATA_FORMAT_FMASK32_S16_F2; 3984b8e80941Smrg break; 3985b8e80941Smrg case FMASK(16,4): 3986b8e80941Smrg data_format = V_008F14_IMG_DATA_FORMAT_FMASK64_S16_F4; 3987b8e80941Smrg break; 3988b8e80941Smrg case FMASK(16,8): 3989b8e80941Smrg data_format = V_008F14_IMG_DATA_FORMAT_FMASK64_S16_F8; 3990b8e80941Smrg break; 3991b8e80941Smrg default: 3992b8e80941Smrg unreachable("invalid nr_samples"); 3993848b8605Smrg } 3994b8e80941Smrg num_format = V_008F14_IMG_NUM_FORMAT_UINT; 3995b8e80941Smrg } 3996b8e80941Smrg#undef FMASK 3997b8e80941Smrg 3998b8e80941Smrg fmask_state[0] = (va >> 8) | tex->surface.fmask_tile_swizzle; 3999b8e80941Smrg fmask_state[1] = S_008F14_BASE_ADDRESS_HI(va >> 40) | 4000b8e80941Smrg S_008F14_DATA_FORMAT_GFX6(data_format) | 4001b8e80941Smrg S_008F14_NUM_FORMAT_GFX6(num_format); 4002b8e80941Smrg fmask_state[2] = S_008F18_WIDTH(width - 1) | 4003b8e80941Smrg S_008F18_HEIGHT(height - 1); 4004b8e80941Smrg fmask_state[3] = S_008F1C_DST_SEL_X(V_008F1C_SQ_SEL_X) | 4005b8e80941Smrg S_008F1C_DST_SEL_Y(V_008F1C_SQ_SEL_X) | 4006b8e80941Smrg S_008F1C_DST_SEL_Z(V_008F1C_SQ_SEL_X) | 4007b8e80941Smrg S_008F1C_DST_SEL_W(V_008F1C_SQ_SEL_X) | 4008b8e80941Smrg S_008F1C_TYPE(si_tex_dim(screen, tex, target, 0)); 4009b8e80941Smrg fmask_state[4] = 0; 4010b8e80941Smrg fmask_state[5] = S_008F24_BASE_ARRAY(first_layer); 4011b8e80941Smrg fmask_state[6] = 0; 4012b8e80941Smrg fmask_state[7] = 0; 4013b8e80941Smrg 4014b8e80941Smrg if (screen->info.chip_class >= GFX9) { 4015b8e80941Smrg fmask_state[3] |= S_008F1C_SW_MODE(tex->surface.u.gfx9.fmask.swizzle_mode); 4016b8e80941Smrg fmask_state[4] |= S_008F20_DEPTH(last_layer) | 4017b8e80941Smrg S_008F20_PITCH_GFX9(tex->surface.u.gfx9.fmask.epitch); 4018b8e80941Smrg fmask_state[5] |= S_008F24_META_PIPE_ALIGNED(tex->surface.u.gfx9.cmask.pipe_aligned) | 4019b8e80941Smrg S_008F24_META_RB_ALIGNED(tex->surface.u.gfx9.cmask.rb_aligned); 4020b8e80941Smrg } else { 4021b8e80941Smrg fmask_state[3] |= S_008F1C_TILING_INDEX(tex->surface.u.legacy.fmask.tiling_index); 4022b8e80941Smrg fmask_state[4] |= S_008F20_DEPTH(depth - 1) | 4023b8e80941Smrg S_008F20_PITCH_GFX6(tex->surface.u.legacy.fmask.pitch_in_pixels - 1); 4024b8e80941Smrg fmask_state[5] |= S_008F24_LAST_ARRAY(last_layer); 4025848b8605Smrg } 4026848b8605Smrg } 4027b8e80941Smrg} 4028b8e80941Smrg 4029b8e80941Smrg/** 4030b8e80941Smrg * Create a sampler view. 4031b8e80941Smrg * 4032b8e80941Smrg * @param ctx context 4033b8e80941Smrg * @param texture texture 4034b8e80941Smrg * @param state sampler view template 4035b8e80941Smrg * @param width0 width0 override (for compressed textures as int) 4036b8e80941Smrg * @param height0 height0 override (for compressed textures as int) 4037b8e80941Smrg * @param force_level set the base address to the level (for compressed textures) 4038b8e80941Smrg */ 4039b8e80941Smrgstruct pipe_sampler_view * 4040b8e80941Smrgsi_create_sampler_view_custom(struct pipe_context *ctx, 4041b8e80941Smrg struct pipe_resource *texture, 4042b8e80941Smrg const struct pipe_sampler_view *state, 4043b8e80941Smrg unsigned width0, unsigned height0, 4044b8e80941Smrg unsigned force_level) 4045b8e80941Smrg{ 4046b8e80941Smrg struct si_context *sctx = (struct si_context*)ctx; 4047b8e80941Smrg struct si_sampler_view *view = CALLOC_STRUCT(si_sampler_view); 4048b8e80941Smrg struct si_texture *tex = (struct si_texture*)texture; 4049b8e80941Smrg unsigned base_level, first_level, last_level; 4050b8e80941Smrg unsigned char state_swizzle[4]; 4051b8e80941Smrg unsigned height, depth, width; 4052b8e80941Smrg unsigned last_layer = state->u.tex.last_layer; 4053b8e80941Smrg enum pipe_format pipe_format; 4054b8e80941Smrg const struct legacy_surf_level *surflevel; 4055b8e80941Smrg 4056b8e80941Smrg if (!view) 4057b8e80941Smrg return NULL; 4058b8e80941Smrg 4059b8e80941Smrg /* initialize base object */ 4060b8e80941Smrg view->base = *state; 4061b8e80941Smrg view->base.texture = NULL; 4062b8e80941Smrg view->base.reference.count = 1; 4063b8e80941Smrg view->base.context = ctx; 4064b8e80941Smrg 4065b8e80941Smrg assert(texture); 4066b8e80941Smrg pipe_resource_reference(&view->base.texture, texture); 4067b8e80941Smrg 4068b8e80941Smrg if (state->format == PIPE_FORMAT_X24S8_UINT || 4069b8e80941Smrg state->format == PIPE_FORMAT_S8X24_UINT || 4070b8e80941Smrg state->format == PIPE_FORMAT_X32_S8X24_UINT || 4071b8e80941Smrg state->format == PIPE_FORMAT_S8_UINT) 4072b8e80941Smrg view->is_stencil_sampler = true; 4073b8e80941Smrg 4074b8e80941Smrg /* Buffer resource. */ 4075b8e80941Smrg if (texture->target == PIPE_BUFFER) { 4076b8e80941Smrg si_make_buffer_descriptor(sctx->screen, 4077b8e80941Smrg si_resource(texture), 4078b8e80941Smrg state->format, 4079b8e80941Smrg state->u.buf.offset, 4080b8e80941Smrg state->u.buf.size, 4081b8e80941Smrg view->state); 4082b8e80941Smrg return &view->base; 4083b8e80941Smrg } 4084b8e80941Smrg 4085b8e80941Smrg state_swizzle[0] = state->swizzle_r; 4086b8e80941Smrg state_swizzle[1] = state->swizzle_g; 4087b8e80941Smrg state_swizzle[2] = state->swizzle_b; 4088b8e80941Smrg state_swizzle[3] = state->swizzle_a; 4089848b8605Smrg 4090b8e80941Smrg base_level = 0; 4091b8e80941Smrg first_level = state->u.tex.first_level; 4092b8e80941Smrg last_level = state->u.tex.last_level; 4093b8e80941Smrg width = width0; 4094b8e80941Smrg height = height0; 4095b8e80941Smrg depth = texture->depth0; 4096b8e80941Smrg 4097b8e80941Smrg if (sctx->chip_class <= VI && force_level) { 4098b8e80941Smrg assert(force_level == first_level && 4099b8e80941Smrg force_level == last_level); 4100b8e80941Smrg base_level = force_level; 4101b8e80941Smrg first_level = 0; 4102b8e80941Smrg last_level = 0; 4103b8e80941Smrg width = u_minify(width, force_level); 4104b8e80941Smrg height = u_minify(height, force_level); 4105b8e80941Smrg depth = u_minify(depth, force_level); 4106848b8605Smrg } 4107848b8605Smrg 4108b8e80941Smrg /* This is not needed if state trackers set last_layer correctly. */ 4109b8e80941Smrg if (state->target == PIPE_TEXTURE_1D || 4110b8e80941Smrg state->target == PIPE_TEXTURE_2D || 4111b8e80941Smrg state->target == PIPE_TEXTURE_RECT || 4112b8e80941Smrg state->target == PIPE_TEXTURE_CUBE) 4113b8e80941Smrg last_layer = state->u.tex.first_layer; 4114848b8605Smrg 4115b8e80941Smrg /* Texturing with separate depth and stencil. */ 4116b8e80941Smrg pipe_format = state->format; 4117b8e80941Smrg 4118b8e80941Smrg /* Depth/stencil texturing sometimes needs separate texture. */ 4119b8e80941Smrg if (tex->is_depth && !si_can_sample_zs(tex, view->is_stencil_sampler)) { 4120b8e80941Smrg if (!tex->flushed_depth_texture && 4121b8e80941Smrg !si_init_flushed_depth_texture(ctx, texture, NULL)) { 4122b8e80941Smrg pipe_resource_reference(&view->base.texture, NULL); 4123b8e80941Smrg FREE(view); 4124b8e80941Smrg return NULL; 4125b8e80941Smrg } 4126848b8605Smrg 4127b8e80941Smrg assert(tex->flushed_depth_texture); 4128848b8605Smrg 4129b8e80941Smrg /* Override format for the case where the flushed texture 4130b8e80941Smrg * contains only Z or only S. 4131b8e80941Smrg */ 4132b8e80941Smrg if (tex->flushed_depth_texture->buffer.b.b.format != tex->buffer.b.b.format) 4133b8e80941Smrg pipe_format = tex->flushed_depth_texture->buffer.b.b.format; 4134848b8605Smrg 4135b8e80941Smrg tex = tex->flushed_depth_texture; 4136b8e80941Smrg } 4137b8e80941Smrg 4138b8e80941Smrg surflevel = tex->surface.u.legacy.level; 4139b8e80941Smrg 4140b8e80941Smrg if (tex->db_compatible) { 4141b8e80941Smrg if (!view->is_stencil_sampler) 4142b8e80941Smrg pipe_format = tex->db_render_format; 4143b8e80941Smrg 4144b8e80941Smrg switch (pipe_format) { 4145b8e80941Smrg case PIPE_FORMAT_Z32_FLOAT_S8X24_UINT: 4146b8e80941Smrg pipe_format = PIPE_FORMAT_Z32_FLOAT; 4147848b8605Smrg break; 4148b8e80941Smrg case PIPE_FORMAT_X8Z24_UNORM: 4149b8e80941Smrg case PIPE_FORMAT_S8_UINT_Z24_UNORM: 4150b8e80941Smrg /* Z24 is always stored like this for DB 4151b8e80941Smrg * compatibility. 4152b8e80941Smrg */ 4153b8e80941Smrg pipe_format = PIPE_FORMAT_Z24X8_UNORM; 4154848b8605Smrg break; 4155b8e80941Smrg case PIPE_FORMAT_X24S8_UINT: 4156b8e80941Smrg case PIPE_FORMAT_S8X24_UINT: 4157b8e80941Smrg case PIPE_FORMAT_X32_S8X24_UINT: 4158b8e80941Smrg pipe_format = PIPE_FORMAT_S8_UINT; 4159b8e80941Smrg surflevel = tex->surface.u.legacy.stencil_level; 4160848b8605Smrg break; 4161b8e80941Smrg default:; 4162b8e80941Smrg } 4163848b8605Smrg } 4164848b8605Smrg 4165b8e80941Smrg view->dcc_incompatible = 4166b8e80941Smrg vi_dcc_formats_are_incompatible(texture, 4167b8e80941Smrg state->u.tex.first_level, 4168b8e80941Smrg state->format); 4169b8e80941Smrg 4170b8e80941Smrg si_make_texture_descriptor(sctx->screen, tex, true, 4171b8e80941Smrg state->target, pipe_format, state_swizzle, 4172b8e80941Smrg first_level, last_level, 4173b8e80941Smrg state->u.tex.first_layer, last_layer, 4174b8e80941Smrg width, height, depth, 4175b8e80941Smrg view->state, view->fmask_state); 4176b8e80941Smrg 4177b8e80941Smrg unsigned num_format = G_008F14_NUM_FORMAT_GFX6(view->state[1]); 4178b8e80941Smrg view->is_integer = 4179b8e80941Smrg num_format == V_008F14_IMG_NUM_FORMAT_USCALED || 4180b8e80941Smrg num_format == V_008F14_IMG_NUM_FORMAT_SSCALED || 4181b8e80941Smrg num_format == V_008F14_IMG_NUM_FORMAT_UINT || 4182b8e80941Smrg num_format == V_008F14_IMG_NUM_FORMAT_SINT; 4183b8e80941Smrg view->base_level_info = &surflevel[base_level]; 4184b8e80941Smrg view->base_level = base_level; 4185b8e80941Smrg view->block_width = util_format_get_blockwidth(pipe_format); 4186848b8605Smrg return &view->base; 4187848b8605Smrg} 4188848b8605Smrg 4189b8e80941Smrgstatic struct pipe_sampler_view * 4190b8e80941Smrgsi_create_sampler_view(struct pipe_context *ctx, 4191b8e80941Smrg struct pipe_resource *texture, 4192b8e80941Smrg const struct pipe_sampler_view *state) 4193b8e80941Smrg{ 4194b8e80941Smrg return si_create_sampler_view_custom(ctx, texture, state, 4195b8e80941Smrg texture ? texture->width0 : 0, 4196b8e80941Smrg texture ? texture->height0 : 0, 0); 4197b8e80941Smrg} 4198b8e80941Smrg 4199848b8605Smrgstatic void si_sampler_view_destroy(struct pipe_context *ctx, 4200848b8605Smrg struct pipe_sampler_view *state) 4201848b8605Smrg{ 4202b8e80941Smrg struct si_sampler_view *view = (struct si_sampler_view *)state; 4203848b8605Smrg 4204848b8605Smrg pipe_resource_reference(&state->texture, NULL); 4205848b8605Smrg FREE(view); 4206848b8605Smrg} 4207848b8605Smrg 4208848b8605Smrgstatic bool wrap_mode_uses_border_color(unsigned wrap, bool linear_filter) 4209848b8605Smrg{ 4210848b8605Smrg return wrap == PIPE_TEX_WRAP_CLAMP_TO_BORDER || 4211848b8605Smrg wrap == PIPE_TEX_WRAP_MIRROR_CLAMP_TO_BORDER || 4212848b8605Smrg (linear_filter && 4213848b8605Smrg (wrap == PIPE_TEX_WRAP_CLAMP || 4214848b8605Smrg wrap == PIPE_TEX_WRAP_MIRROR_CLAMP)); 4215848b8605Smrg} 4216848b8605Smrg 4217b8e80941Smrgstatic uint32_t si_translate_border_color(struct si_context *sctx, 4218b8e80941Smrg const struct pipe_sampler_state *state, 4219b8e80941Smrg const union pipe_color_union *color, 4220b8e80941Smrg bool is_integer) 4221848b8605Smrg{ 4222848b8605Smrg bool linear_filter = state->min_img_filter != PIPE_TEX_FILTER_NEAREST || 4223848b8605Smrg state->mag_img_filter != PIPE_TEX_FILTER_NEAREST; 4224848b8605Smrg 4225b8e80941Smrg if (!wrap_mode_uses_border_color(state->wrap_s, linear_filter) && 4226b8e80941Smrg !wrap_mode_uses_border_color(state->wrap_t, linear_filter) && 4227b8e80941Smrg !wrap_mode_uses_border_color(state->wrap_r, linear_filter)) 4228b8e80941Smrg return S_008F3C_BORDER_COLOR_TYPE(V_008F3C_SQ_TEX_BORDER_COLOR_TRANS_BLACK); 4229b8e80941Smrg 4230b8e80941Smrg#define simple_border_types(elt) \ 4231b8e80941Smrgdo { \ 4232b8e80941Smrg if (color->elt[0] == 0 && color->elt[1] == 0 && \ 4233b8e80941Smrg color->elt[2] == 0 && color->elt[3] == 0) \ 4234b8e80941Smrg return S_008F3C_BORDER_COLOR_TYPE(V_008F3C_SQ_TEX_BORDER_COLOR_TRANS_BLACK); \ 4235b8e80941Smrg if (color->elt[0] == 0 && color->elt[1] == 0 && \ 4236b8e80941Smrg color->elt[2] == 0 && color->elt[3] == 1) \ 4237b8e80941Smrg return S_008F3C_BORDER_COLOR_TYPE(V_008F3C_SQ_TEX_BORDER_COLOR_OPAQUE_BLACK); \ 4238b8e80941Smrg if (color->elt[0] == 1 && color->elt[1] == 1 && \ 4239b8e80941Smrg color->elt[2] == 1 && color->elt[3] == 1) \ 4240b8e80941Smrg return S_008F3C_BORDER_COLOR_TYPE(V_008F3C_SQ_TEX_BORDER_COLOR_OPAQUE_WHITE); \ 4241b8e80941Smrg} while (false) 4242b8e80941Smrg 4243b8e80941Smrg if (is_integer) 4244b8e80941Smrg simple_border_types(ui); 4245b8e80941Smrg else 4246b8e80941Smrg simple_border_types(f); 4247b8e80941Smrg 4248b8e80941Smrg#undef simple_border_types 4249b8e80941Smrg 4250b8e80941Smrg int i; 4251b8e80941Smrg 4252b8e80941Smrg /* Check if the border has been uploaded already. */ 4253b8e80941Smrg for (i = 0; i < sctx->border_color_count; i++) 4254b8e80941Smrg if (memcmp(&sctx->border_color_table[i], color, 4255b8e80941Smrg sizeof(*color)) == 0) 4256b8e80941Smrg break; 4257b8e80941Smrg 4258b8e80941Smrg if (i >= SI_MAX_BORDER_COLORS) { 4259b8e80941Smrg /* Getting 4096 unique border colors is very unlikely. */ 4260b8e80941Smrg fprintf(stderr, "radeonsi: The border color table is full. " 4261b8e80941Smrg "Any new border colors will be just black. " 4262b8e80941Smrg "Please file a bug.\n"); 4263b8e80941Smrg return S_008F3C_BORDER_COLOR_TYPE(V_008F3C_SQ_TEX_BORDER_COLOR_TRANS_BLACK); 4264b8e80941Smrg } 4265b8e80941Smrg 4266b8e80941Smrg if (i == sctx->border_color_count) { 4267b8e80941Smrg /* Upload a new border color. */ 4268b8e80941Smrg memcpy(&sctx->border_color_table[i], color, 4269b8e80941Smrg sizeof(*color)); 4270b8e80941Smrg util_memcpy_cpu_to_le32(&sctx->border_color_map[i], 4271b8e80941Smrg color, sizeof(*color)); 4272b8e80941Smrg sctx->border_color_count++; 4273b8e80941Smrg } 4274b8e80941Smrg 4275b8e80941Smrg return S_008F3C_BORDER_COLOR_PTR(i) | 4276b8e80941Smrg S_008F3C_BORDER_COLOR_TYPE(V_008F3C_SQ_TEX_BORDER_COLOR_REGISTER); 4277b8e80941Smrg} 4278b8e80941Smrg 4279b8e80941Smrgstatic inline int S_FIXED(float value, unsigned frac_bits) 4280b8e80941Smrg{ 4281b8e80941Smrg return value * (1 << frac_bits); 4282b8e80941Smrg} 4283b8e80941Smrg 4284b8e80941Smrgstatic inline unsigned si_tex_filter(unsigned filter, unsigned max_aniso) 4285b8e80941Smrg{ 4286b8e80941Smrg if (filter == PIPE_TEX_FILTER_LINEAR) 4287b8e80941Smrg return max_aniso > 1 ? V_008F38_SQ_TEX_XY_FILTER_ANISO_BILINEAR 4288b8e80941Smrg : V_008F38_SQ_TEX_XY_FILTER_BILINEAR; 4289b8e80941Smrg else 4290b8e80941Smrg return max_aniso > 1 ? V_008F38_SQ_TEX_XY_FILTER_ANISO_POINT 4291b8e80941Smrg : V_008F38_SQ_TEX_XY_FILTER_POINT; 4292b8e80941Smrg} 4293b8e80941Smrg 4294b8e80941Smrgstatic inline unsigned si_tex_aniso_filter(unsigned filter) 4295b8e80941Smrg{ 4296b8e80941Smrg if (filter < 2) 4297b8e80941Smrg return 0; 4298b8e80941Smrg if (filter < 4) 4299b8e80941Smrg return 1; 4300b8e80941Smrg if (filter < 8) 4301b8e80941Smrg return 2; 4302b8e80941Smrg if (filter < 16) 4303b8e80941Smrg return 3; 4304b8e80941Smrg return 4; 4305848b8605Smrg} 4306848b8605Smrg 4307848b8605Smrgstatic void *si_create_sampler_state(struct pipe_context *ctx, 4308848b8605Smrg const struct pipe_sampler_state *state) 4309848b8605Smrg{ 4310b8e80941Smrg struct si_context *sctx = (struct si_context *)ctx; 4311b8e80941Smrg struct si_screen *sscreen = sctx->screen; 4312b8e80941Smrg struct si_sampler_state *rstate = CALLOC_STRUCT(si_sampler_state); 4313b8e80941Smrg unsigned max_aniso = sscreen->force_aniso >= 0 ? sscreen->force_aniso 4314b8e80941Smrg : state->max_anisotropy; 4315b8e80941Smrg unsigned max_aniso_ratio = si_tex_aniso_filter(max_aniso); 4316b8e80941Smrg union pipe_color_union clamped_border_color; 4317848b8605Smrg 4318b8e80941Smrg if (!rstate) { 4319848b8605Smrg return NULL; 4320848b8605Smrg } 4321848b8605Smrg 4322b8e80941Smrg#ifdef DEBUG 4323b8e80941Smrg rstate->magic = SI_SAMPLER_STATE_MAGIC; 4324b8e80941Smrg#endif 4325848b8605Smrg rstate->val[0] = (S_008F30_CLAMP_X(si_tex_wrap(state->wrap_s)) | 4326848b8605Smrg S_008F30_CLAMP_Y(si_tex_wrap(state->wrap_t)) | 4327848b8605Smrg S_008F30_CLAMP_Z(si_tex_wrap(state->wrap_r)) | 4328b8e80941Smrg S_008F30_MAX_ANISO_RATIO(max_aniso_ratio) | 4329848b8605Smrg S_008F30_DEPTH_COMPARE_FUNC(si_tex_compare(state->compare_func)) | 4330848b8605Smrg S_008F30_FORCE_UNNORMALIZED(!state->normalized_coords) | 4331b8e80941Smrg S_008F30_ANISO_THRESHOLD(max_aniso_ratio >> 1) | 4332b8e80941Smrg S_008F30_ANISO_BIAS(max_aniso_ratio) | 4333b8e80941Smrg S_008F30_DISABLE_CUBE_WRAP(!state->seamless_cube_map) | 4334b8e80941Smrg S_008F30_COMPAT_MODE(sctx->chip_class >= VI)); 4335848b8605Smrg rstate->val[1] = (S_008F34_MIN_LOD(S_FIXED(CLAMP(state->min_lod, 0, 15), 8)) | 4336b8e80941Smrg S_008F34_MAX_LOD(S_FIXED(CLAMP(state->max_lod, 0, 15), 8)) | 4337b8e80941Smrg S_008F34_PERF_MIP(max_aniso_ratio ? max_aniso_ratio + 6 : 0)); 4338848b8605Smrg rstate->val[2] = (S_008F38_LOD_BIAS(S_FIXED(CLAMP(state->lod_bias, -16, 16), 8)) | 4339b8e80941Smrg S_008F38_XY_MAG_FILTER(si_tex_filter(state->mag_img_filter, max_aniso)) | 4340b8e80941Smrg S_008F38_XY_MIN_FILTER(si_tex_filter(state->min_img_filter, max_aniso)) | 4341b8e80941Smrg S_008F38_MIP_FILTER(si_tex_mipfilter(state->min_mip_filter)) | 4342b8e80941Smrg S_008F38_MIP_POINT_PRECLAMP(0) | 4343b8e80941Smrg S_008F38_DISABLE_LSB_CEIL(sctx->chip_class <= VI) | 4344b8e80941Smrg S_008F38_FILTER_PREC_FIX(1) | 4345b8e80941Smrg S_008F38_ANISO_OVERRIDE(sctx->chip_class >= VI)); 4346b8e80941Smrg rstate->val[3] = si_translate_border_color(sctx, state, &state->border_color, false); 4347b8e80941Smrg 4348b8e80941Smrg /* Create sampler resource for integer textures. */ 4349b8e80941Smrg memcpy(rstate->integer_val, rstate->val, sizeof(rstate->val)); 4350b8e80941Smrg rstate->integer_val[3] = si_translate_border_color(sctx, state, &state->border_color, true); 4351b8e80941Smrg 4352b8e80941Smrg /* Create sampler resource for upgraded depth textures. */ 4353b8e80941Smrg memcpy(rstate->upgraded_depth_val, rstate->val, sizeof(rstate->val)); 4354b8e80941Smrg 4355b8e80941Smrg for (unsigned i = 0; i < 4; ++i) { 4356b8e80941Smrg /* Use channel 0 on purpose, so that we can use OPAQUE_WHITE 4357b8e80941Smrg * when the border color is 1.0. */ 4358b8e80941Smrg clamped_border_color.f[i] = CLAMP(state->border_color.f[0], 0, 1); 4359848b8605Smrg } 4360848b8605Smrg 4361b8e80941Smrg if (memcmp(&state->border_color, &clamped_border_color, sizeof(clamped_border_color)) == 0) 4362b8e80941Smrg rstate->upgraded_depth_val[3] |= S_008F3C_UPGRADED_DEPTH(1); 4363b8e80941Smrg else 4364b8e80941Smrg rstate->upgraded_depth_val[3] = 4365b8e80941Smrg si_translate_border_color(sctx, state, &clamped_border_color, false) | 4366b8e80941Smrg S_008F3C_UPGRADED_DEPTH(1); 4367848b8605Smrg 4368b8e80941Smrg return rstate; 4369848b8605Smrg} 4370848b8605Smrg 4371b8e80941Smrgstatic void si_set_sample_mask(struct pipe_context *ctx, unsigned sample_mask) 4372848b8605Smrg{ 4373848b8605Smrg struct si_context *sctx = (struct si_context *)ctx; 4374848b8605Smrg 4375b8e80941Smrg if (sctx->sample_mask == (uint16_t)sample_mask) 4376848b8605Smrg return; 4377848b8605Smrg 4378b8e80941Smrg sctx->sample_mask = sample_mask; 4379b8e80941Smrg si_mark_atom_dirty(sctx, &sctx->atoms.s.sample_mask); 4380848b8605Smrg} 4381848b8605Smrg 4382b8e80941Smrgstatic void si_emit_sample_mask(struct si_context *sctx) 4383848b8605Smrg{ 4384b8e80941Smrg struct radeon_cmdbuf *cs = sctx->gfx_cs; 4385b8e80941Smrg unsigned mask = sctx->sample_mask; 4386b8e80941Smrg 4387b8e80941Smrg /* Needed for line and polygon smoothing as well as for the Polaris 4388b8e80941Smrg * small primitive filter. We expect the state tracker to take care of 4389b8e80941Smrg * this for us. 4390b8e80941Smrg */ 4391b8e80941Smrg assert(mask == 0xffff || sctx->framebuffer.nr_samples > 1 || 4392b8e80941Smrg (mask & 1 && sctx->blitter->running)); 4393b8e80941Smrg 4394b8e80941Smrg radeon_set_context_reg_seq(cs, R_028C38_PA_SC_AA_MASK_X0Y0_X1Y0, 2); 4395b8e80941Smrg radeon_emit(cs, mask | (mask << 16)); 4396b8e80941Smrg radeon_emit(cs, mask | (mask << 16)); 4397848b8605Smrg} 4398848b8605Smrg 4399848b8605Smrgstatic void si_delete_sampler_state(struct pipe_context *ctx, void *state) 4400848b8605Smrg{ 4401b8e80941Smrg#ifdef DEBUG 4402b8e80941Smrg struct si_sampler_state *s = state; 4403b8e80941Smrg 4404b8e80941Smrg assert(s->magic == SI_SAMPLER_STATE_MAGIC); 4405b8e80941Smrg s->magic = 0; 4406b8e80941Smrg#endif 4407848b8605Smrg free(state); 4408848b8605Smrg} 4409848b8605Smrg 4410848b8605Smrg/* 4411848b8605Smrg * Vertex elements & buffers 4412848b8605Smrg */ 4413848b8605Smrg 4414b8e80941Smrgstruct util_fast_udiv_info32 { 4415b8e80941Smrg unsigned multiplier; /* the "magic number" multiplier */ 4416b8e80941Smrg unsigned pre_shift; /* shift for the dividend before multiplying */ 4417b8e80941Smrg unsigned post_shift; /* shift for the dividend after multiplying */ 4418b8e80941Smrg int increment; /* 0 or 1; if set then increment the numerator, using one of 4419b8e80941Smrg the two strategies */ 4420b8e80941Smrg}; 4421b8e80941Smrg 4422b8e80941Smrgstatic struct util_fast_udiv_info32 4423b8e80941Smrgutil_compute_fast_udiv_info32(uint32_t D, unsigned num_bits) 4424b8e80941Smrg{ 4425b8e80941Smrg struct util_fast_udiv_info info = 4426b8e80941Smrg util_compute_fast_udiv_info(D, num_bits, 32); 4427b8e80941Smrg 4428b8e80941Smrg struct util_fast_udiv_info32 result = { 4429b8e80941Smrg info.multiplier, 4430b8e80941Smrg info.pre_shift, 4431b8e80941Smrg info.post_shift, 4432b8e80941Smrg info.increment, 4433b8e80941Smrg }; 4434b8e80941Smrg return result; 4435b8e80941Smrg} 4436b8e80941Smrg 4437848b8605Smrgstatic void *si_create_vertex_elements(struct pipe_context *ctx, 4438848b8605Smrg unsigned count, 4439848b8605Smrg const struct pipe_vertex_element *elements) 4440848b8605Smrg{ 4441b8e80941Smrg struct si_screen *sscreen = (struct si_screen*)ctx->screen; 4442b8e80941Smrg struct si_vertex_elements *v = CALLOC_STRUCT(si_vertex_elements); 4443b8e80941Smrg bool used[SI_NUM_VERTEX_BUFFERS] = {}; 4444b8e80941Smrg struct util_fast_udiv_info32 divisor_factors[SI_MAX_ATTRIBS] = {}; 4445b8e80941Smrg STATIC_ASSERT(sizeof(struct util_fast_udiv_info32) == 16); 4446b8e80941Smrg STATIC_ASSERT(sizeof(divisor_factors[0].multiplier) == 4); 4447b8e80941Smrg STATIC_ASSERT(sizeof(divisor_factors[0].pre_shift) == 4); 4448b8e80941Smrg STATIC_ASSERT(sizeof(divisor_factors[0].post_shift) == 4); 4449b8e80941Smrg STATIC_ASSERT(sizeof(divisor_factors[0].increment) == 4); 4450848b8605Smrg int i; 4451848b8605Smrg 4452b8e80941Smrg assert(count <= SI_MAX_ATTRIBS); 4453848b8605Smrg if (!v) 4454848b8605Smrg return NULL; 4455848b8605Smrg 4456848b8605Smrg v->count = count; 4457b8e80941Smrg v->desc_list_byte_size = align(count * 16, SI_CPDMA_ALIGNMENT); 4458b8e80941Smrg 4459848b8605Smrg for (i = 0; i < count; ++i) { 4460848b8605Smrg const struct util_format_description *desc; 4461b8e80941Smrg const struct util_format_channel_description *channel; 4462848b8605Smrg unsigned data_format, num_format; 4463848b8605Smrg int first_non_void; 4464b8e80941Smrg unsigned vbo_index = elements[i].vertex_buffer_index; 4465b8e80941Smrg unsigned char swizzle[4]; 4466b8e80941Smrg 4467b8e80941Smrg if (vbo_index >= SI_NUM_VERTEX_BUFFERS) { 4468b8e80941Smrg FREE(v); 4469b8e80941Smrg return NULL; 4470b8e80941Smrg } 4471b8e80941Smrg 4472b8e80941Smrg unsigned instance_divisor = elements[i].instance_divisor; 4473b8e80941Smrg if (instance_divisor) { 4474b8e80941Smrg v->uses_instance_divisors = true; 4475b8e80941Smrg 4476b8e80941Smrg if (instance_divisor == 1) { 4477b8e80941Smrg v->instance_divisor_is_one |= 1u << i; 4478b8e80941Smrg } else { 4479b8e80941Smrg v->instance_divisor_is_fetched |= 1u << i; 4480b8e80941Smrg divisor_factors[i] = 4481b8e80941Smrg util_compute_fast_udiv_info32(instance_divisor, 32); 4482b8e80941Smrg } 4483b8e80941Smrg } 4484b8e80941Smrg 4485b8e80941Smrg if (!used[vbo_index]) { 4486b8e80941Smrg v->first_vb_use_mask |= 1 << i; 4487b8e80941Smrg used[vbo_index] = true; 4488b8e80941Smrg } 4489848b8605Smrg 4490848b8605Smrg desc = util_format_description(elements[i].src_format); 4491848b8605Smrg first_non_void = util_format_get_first_non_void_channel(elements[i].src_format); 4492848b8605Smrg data_format = si_translate_buffer_dataformat(ctx->screen, desc, first_non_void); 4493848b8605Smrg num_format = si_translate_buffer_numformat(ctx->screen, desc, first_non_void); 4494b8e80941Smrg channel = first_non_void >= 0 ? &desc->channel[first_non_void] : NULL; 4495b8e80941Smrg memcpy(swizzle, desc->swizzle, sizeof(swizzle)); 4496b8e80941Smrg 4497b8e80941Smrg v->format_size[i] = desc->block.bits / 8; 4498b8e80941Smrg v->src_offset[i] = elements[i].src_offset; 4499b8e80941Smrg v->vertex_buffer_index[i] = vbo_index; 4500b8e80941Smrg 4501b8e80941Smrg /* The hardware always treats the 2-bit alpha channel as 4502b8e80941Smrg * unsigned, so a shader workaround is needed. The affected 4503b8e80941Smrg * chips are VI and older except Stoney (GFX8.1). 4504b8e80941Smrg */ 4505b8e80941Smrg if (data_format == V_008F0C_BUF_DATA_FORMAT_2_10_10_10 && 4506b8e80941Smrg sscreen->info.chip_class <= VI && 4507b8e80941Smrg sscreen->info.family != CHIP_STONEY) { 4508b8e80941Smrg if (num_format == V_008F0C_BUF_NUM_FORMAT_SNORM) { 4509b8e80941Smrg v->fix_fetch[i] = SI_FIX_FETCH_A2_SNORM; 4510b8e80941Smrg } else if (num_format == V_008F0C_BUF_NUM_FORMAT_SSCALED) { 4511b8e80941Smrg v->fix_fetch[i] = SI_FIX_FETCH_A2_SSCALED; 4512b8e80941Smrg } else if (num_format == V_008F0C_BUF_NUM_FORMAT_SINT) { 4513b8e80941Smrg /* This isn't actually used in OpenGL. */ 4514b8e80941Smrg v->fix_fetch[i] = SI_FIX_FETCH_A2_SINT; 4515b8e80941Smrg } 4516b8e80941Smrg } else if (channel && channel->type == UTIL_FORMAT_TYPE_FIXED) { 4517b8e80941Smrg if (desc->swizzle[3] == PIPE_SWIZZLE_1) 4518b8e80941Smrg v->fix_fetch[i] = SI_FIX_FETCH_RGBX_32_FIXED; 4519b8e80941Smrg else 4520b8e80941Smrg v->fix_fetch[i] = SI_FIX_FETCH_RGBA_32_FIXED; 4521b8e80941Smrg } else if (channel && channel->size == 32 && !channel->pure_integer) { 4522b8e80941Smrg if (channel->type == UTIL_FORMAT_TYPE_SIGNED) { 4523b8e80941Smrg if (channel->normalized) { 4524b8e80941Smrg if (desc->swizzle[3] == PIPE_SWIZZLE_1) 4525b8e80941Smrg v->fix_fetch[i] = SI_FIX_FETCH_RGBX_32_SNORM; 4526b8e80941Smrg else 4527b8e80941Smrg v->fix_fetch[i] = SI_FIX_FETCH_RGBA_32_SNORM; 4528b8e80941Smrg } else { 4529b8e80941Smrg v->fix_fetch[i] = SI_FIX_FETCH_RGBA_32_SSCALED; 4530b8e80941Smrg } 4531b8e80941Smrg } else if (channel->type == UTIL_FORMAT_TYPE_UNSIGNED) { 4532b8e80941Smrg if (channel->normalized) { 4533b8e80941Smrg if (desc->swizzle[3] == PIPE_SWIZZLE_1) 4534b8e80941Smrg v->fix_fetch[i] = SI_FIX_FETCH_RGBX_32_UNORM; 4535b8e80941Smrg else 4536b8e80941Smrg v->fix_fetch[i] = SI_FIX_FETCH_RGBA_32_UNORM; 4537b8e80941Smrg } else { 4538b8e80941Smrg v->fix_fetch[i] = SI_FIX_FETCH_RGBA_32_USCALED; 4539b8e80941Smrg } 4540b8e80941Smrg } 4541b8e80941Smrg } else if (channel && channel->size == 64 && 4542b8e80941Smrg channel->type == UTIL_FORMAT_TYPE_FLOAT) { 4543b8e80941Smrg switch (desc->nr_channels) { 4544b8e80941Smrg case 1: 4545b8e80941Smrg case 2: 4546b8e80941Smrg v->fix_fetch[i] = SI_FIX_FETCH_RG_64_FLOAT; 4547b8e80941Smrg swizzle[0] = PIPE_SWIZZLE_X; 4548b8e80941Smrg swizzle[1] = PIPE_SWIZZLE_Y; 4549b8e80941Smrg swizzle[2] = desc->nr_channels == 2 ? PIPE_SWIZZLE_Z : PIPE_SWIZZLE_0; 4550b8e80941Smrg swizzle[3] = desc->nr_channels == 2 ? PIPE_SWIZZLE_W : PIPE_SWIZZLE_0; 4551b8e80941Smrg break; 4552b8e80941Smrg case 3: 4553b8e80941Smrg v->fix_fetch[i] = SI_FIX_FETCH_RGB_64_FLOAT; 4554b8e80941Smrg swizzle[0] = PIPE_SWIZZLE_X; /* 3 loads */ 4555b8e80941Smrg swizzle[1] = PIPE_SWIZZLE_Y; 4556b8e80941Smrg swizzle[2] = PIPE_SWIZZLE_0; 4557b8e80941Smrg swizzle[3] = PIPE_SWIZZLE_0; 4558b8e80941Smrg break; 4559b8e80941Smrg case 4: 4560b8e80941Smrg v->fix_fetch[i] = SI_FIX_FETCH_RGBA_64_FLOAT; 4561b8e80941Smrg swizzle[0] = PIPE_SWIZZLE_X; /* 2 loads */ 4562b8e80941Smrg swizzle[1] = PIPE_SWIZZLE_Y; 4563b8e80941Smrg swizzle[2] = PIPE_SWIZZLE_Z; 4564b8e80941Smrg swizzle[3] = PIPE_SWIZZLE_W; 4565b8e80941Smrg break; 4566b8e80941Smrg default: 4567b8e80941Smrg assert(0); 4568b8e80941Smrg } 4569b8e80941Smrg } else if (channel && desc->nr_channels == 3) { 4570b8e80941Smrg assert(desc->swizzle[0] == PIPE_SWIZZLE_X); 4571b8e80941Smrg 4572b8e80941Smrg if (channel->size == 8) { 4573b8e80941Smrg if (channel->pure_integer) 4574b8e80941Smrg v->fix_fetch[i] = SI_FIX_FETCH_RGB_8_INT; 4575b8e80941Smrg else 4576b8e80941Smrg v->fix_fetch[i] = SI_FIX_FETCH_RGB_8; 4577b8e80941Smrg } else if (channel->size == 16) { 4578b8e80941Smrg if (channel->pure_integer) 4579b8e80941Smrg v->fix_fetch[i] = SI_FIX_FETCH_RGB_16_INT; 4580b8e80941Smrg else 4581b8e80941Smrg v->fix_fetch[i] = SI_FIX_FETCH_RGB_16; 4582b8e80941Smrg } 4583b8e80941Smrg } 4584848b8605Smrg 4585b8e80941Smrg v->rsrc_word3[i] = S_008F0C_DST_SEL_X(si_map_swizzle(swizzle[0])) | 4586b8e80941Smrg S_008F0C_DST_SEL_Y(si_map_swizzle(swizzle[1])) | 4587b8e80941Smrg S_008F0C_DST_SEL_Z(si_map_swizzle(swizzle[2])) | 4588b8e80941Smrg S_008F0C_DST_SEL_W(si_map_swizzle(swizzle[3])) | 4589848b8605Smrg S_008F0C_NUM_FORMAT(num_format) | 4590848b8605Smrg S_008F0C_DATA_FORMAT(data_format); 4591848b8605Smrg } 4592848b8605Smrg 4593b8e80941Smrg if (v->instance_divisor_is_fetched) { 4594b8e80941Smrg unsigned num_divisors = util_last_bit(v->instance_divisor_is_fetched); 4595b8e80941Smrg 4596b8e80941Smrg v->instance_divisor_factor_buffer = 4597b8e80941Smrg (struct si_resource*) 4598b8e80941Smrg pipe_buffer_create(&sscreen->b, 0, PIPE_USAGE_DEFAULT, 4599b8e80941Smrg num_divisors * sizeof(divisor_factors[0])); 4600b8e80941Smrg if (!v->instance_divisor_factor_buffer) { 4601b8e80941Smrg FREE(v); 4602b8e80941Smrg return NULL; 4603b8e80941Smrg } 4604b8e80941Smrg void *map = sscreen->ws->buffer_map(v->instance_divisor_factor_buffer->buf, 4605b8e80941Smrg NULL, PIPE_TRANSFER_WRITE); 4606b8e80941Smrg memcpy(map , divisor_factors, num_divisors * sizeof(divisor_factors[0])); 4607b8e80941Smrg } 4608848b8605Smrg return v; 4609848b8605Smrg} 4610848b8605Smrg 4611848b8605Smrgstatic void si_bind_vertex_elements(struct pipe_context *ctx, void *state) 4612848b8605Smrg{ 4613848b8605Smrg struct si_context *sctx = (struct si_context *)ctx; 4614b8e80941Smrg struct si_vertex_elements *old = sctx->vertex_elements; 4615b8e80941Smrg struct si_vertex_elements *v = (struct si_vertex_elements*)state; 4616848b8605Smrg 4617848b8605Smrg sctx->vertex_elements = v; 4618848b8605Smrg sctx->vertex_buffers_dirty = true; 4619b8e80941Smrg 4620b8e80941Smrg if (v && 4621b8e80941Smrg (!old || 4622b8e80941Smrg old->count != v->count || 4623b8e80941Smrg old->uses_instance_divisors != v->uses_instance_divisors || 4624b8e80941Smrg v->uses_instance_divisors || /* we don't check which divisors changed */ 4625b8e80941Smrg memcmp(old->fix_fetch, v->fix_fetch, sizeof(v->fix_fetch[0]) * v->count))) 4626b8e80941Smrg sctx->do_update_shaders = true; 4627b8e80941Smrg 4628b8e80941Smrg if (v && v->instance_divisor_is_fetched) { 4629b8e80941Smrg struct pipe_constant_buffer cb; 4630b8e80941Smrg 4631b8e80941Smrg cb.buffer = &v->instance_divisor_factor_buffer->b.b; 4632b8e80941Smrg cb.user_buffer = NULL; 4633b8e80941Smrg cb.buffer_offset = 0; 4634b8e80941Smrg cb.buffer_size = 0xffffffff; 4635b8e80941Smrg si_set_rw_buffer(sctx, SI_VS_CONST_INSTANCE_DIVISORS, &cb); 4636b8e80941Smrg } 4637848b8605Smrg} 4638848b8605Smrg 4639848b8605Smrgstatic void si_delete_vertex_element(struct pipe_context *ctx, void *state) 4640848b8605Smrg{ 4641848b8605Smrg struct si_context *sctx = (struct si_context *)ctx; 4642b8e80941Smrg struct si_vertex_elements *v = (struct si_vertex_elements*)state; 4643848b8605Smrg 4644848b8605Smrg if (sctx->vertex_elements == state) 4645848b8605Smrg sctx->vertex_elements = NULL; 4646b8e80941Smrg si_resource_reference(&v->instance_divisor_factor_buffer, NULL); 4647848b8605Smrg FREE(state); 4648848b8605Smrg} 4649848b8605Smrg 4650848b8605Smrgstatic void si_set_vertex_buffers(struct pipe_context *ctx, 4651848b8605Smrg unsigned start_slot, unsigned count, 4652848b8605Smrg const struct pipe_vertex_buffer *buffers) 4653848b8605Smrg{ 4654848b8605Smrg struct si_context *sctx = (struct si_context *)ctx; 4655848b8605Smrg struct pipe_vertex_buffer *dst = sctx->vertex_buffer + start_slot; 4656848b8605Smrg int i; 4657848b8605Smrg 4658b8e80941Smrg assert(start_slot + count <= ARRAY_SIZE(sctx->vertex_buffer)); 4659848b8605Smrg 4660848b8605Smrg if (buffers) { 4661848b8605Smrg for (i = 0; i < count; i++) { 4662848b8605Smrg const struct pipe_vertex_buffer *src = buffers + i; 4663848b8605Smrg struct pipe_vertex_buffer *dsti = dst + i; 4664b8e80941Smrg struct pipe_resource *buf = src->buffer.resource; 4665848b8605Smrg 4666b8e80941Smrg pipe_resource_reference(&dsti->buffer.resource, buf); 4667848b8605Smrg dsti->buffer_offset = src->buffer_offset; 4668848b8605Smrg dsti->stride = src->stride; 4669b8e80941Smrg si_context_add_resource_size(sctx, buf); 4670b8e80941Smrg if (buf) 4671b8e80941Smrg si_resource(buf)->bind_history |= PIPE_BIND_VERTEX_BUFFER; 4672848b8605Smrg } 4673848b8605Smrg } else { 4674848b8605Smrg for (i = 0; i < count; i++) { 4675b8e80941Smrg pipe_resource_reference(&dst[i].buffer.resource, NULL); 4676848b8605Smrg } 4677848b8605Smrg } 4678848b8605Smrg sctx->vertex_buffers_dirty = true; 4679848b8605Smrg} 4680848b8605Smrg 4681b8e80941Smrg/* 4682b8e80941Smrg * Misc 4683b8e80941Smrg */ 4684b8e80941Smrg 4685b8e80941Smrgstatic void si_set_tess_state(struct pipe_context *ctx, 4686b8e80941Smrg const float default_outer_level[4], 4687b8e80941Smrg const float default_inner_level[2]) 4688848b8605Smrg{ 4689848b8605Smrg struct si_context *sctx = (struct si_context *)ctx; 4690b8e80941Smrg struct pipe_constant_buffer cb; 4691b8e80941Smrg float array[8]; 4692848b8605Smrg 4693b8e80941Smrg memcpy(array, default_outer_level, sizeof(float) * 4); 4694b8e80941Smrg memcpy(array+4, default_inner_level, sizeof(float) * 2); 4695b8e80941Smrg 4696b8e80941Smrg cb.buffer = NULL; 4697b8e80941Smrg cb.user_buffer = NULL; 4698b8e80941Smrg cb.buffer_size = sizeof(array); 4699b8e80941Smrg 4700b8e80941Smrg si_upload_const_buffer(sctx, (struct si_resource**)&cb.buffer, 4701b8e80941Smrg (void*)array, sizeof(array), 4702b8e80941Smrg &cb.buffer_offset); 4703b8e80941Smrg 4704b8e80941Smrg si_set_rw_buffer(sctx, SI_HS_CONST_DEFAULT_TESS_LEVELS, &cb); 4705b8e80941Smrg pipe_resource_reference(&cb.buffer, NULL); 4706848b8605Smrg} 4707848b8605Smrg 4708b8e80941Smrgstatic void si_texture_barrier(struct pipe_context *ctx, unsigned flags) 4709848b8605Smrg{ 4710b8e80941Smrg struct si_context *sctx = (struct si_context *)ctx; 4711b8e80941Smrg 4712b8e80941Smrg si_update_fb_dirtiness_after_rendering(sctx); 4713b8e80941Smrg 4714b8e80941Smrg /* Multisample surfaces are flushed in si_decompress_textures. */ 4715b8e80941Smrg if (sctx->framebuffer.uncompressed_cb_mask) { 4716b8e80941Smrg si_make_CB_shader_coherent(sctx, sctx->framebuffer.nr_samples, 4717b8e80941Smrg sctx->framebuffer.CB_has_shader_readable_metadata, 4718b8e80941Smrg sctx->framebuffer.all_DCC_pipe_aligned); 4719b8e80941Smrg } 4720848b8605Smrg} 4721848b8605Smrg 4722b8e80941Smrg/* This only ensures coherency for shader image/buffer stores. */ 4723b8e80941Smrgstatic void si_memory_barrier(struct pipe_context *ctx, unsigned flags) 4724848b8605Smrg{ 4725848b8605Smrg struct si_context *sctx = (struct si_context *)ctx; 4726848b8605Smrg 4727b8e80941Smrg if (!(flags & ~PIPE_BARRIER_UPDATE)) 4728b8e80941Smrg return; 4729b8e80941Smrg 4730b8e80941Smrg /* Subsequent commands must wait for all shader invocations to 4731b8e80941Smrg * complete. */ 4732b8e80941Smrg sctx->flags |= SI_CONTEXT_PS_PARTIAL_FLUSH | 4733b8e80941Smrg SI_CONTEXT_CS_PARTIAL_FLUSH; 4734b8e80941Smrg 4735b8e80941Smrg if (flags & PIPE_BARRIER_CONSTANT_BUFFER) 4736b8e80941Smrg sctx->flags |= SI_CONTEXT_INV_SMEM_L1 | 4737b8e80941Smrg SI_CONTEXT_INV_VMEM_L1; 4738b8e80941Smrg 4739b8e80941Smrg if (flags & (PIPE_BARRIER_VERTEX_BUFFER | 4740b8e80941Smrg PIPE_BARRIER_SHADER_BUFFER | 4741b8e80941Smrg PIPE_BARRIER_TEXTURE | 4742b8e80941Smrg PIPE_BARRIER_IMAGE | 4743b8e80941Smrg PIPE_BARRIER_STREAMOUT_BUFFER | 4744b8e80941Smrg PIPE_BARRIER_GLOBAL_BUFFER)) { 4745b8e80941Smrg /* As far as I can tell, L1 contents are written back to L2 4746b8e80941Smrg * automatically at end of shader, but the contents of other 4747b8e80941Smrg * L1 caches might still be stale. */ 4748b8e80941Smrg sctx->flags |= SI_CONTEXT_INV_VMEM_L1; 4749b8e80941Smrg } 4750b8e80941Smrg 4751b8e80941Smrg if (flags & PIPE_BARRIER_INDEX_BUFFER) { 4752b8e80941Smrg /* Indices are read through TC L2 since VI. 4753b8e80941Smrg * L1 isn't used. 4754b8e80941Smrg */ 4755b8e80941Smrg if (sctx->screen->info.chip_class <= CIK) 4756b8e80941Smrg sctx->flags |= SI_CONTEXT_WRITEBACK_GLOBAL_L2; 4757b8e80941Smrg } 4758b8e80941Smrg 4759b8e80941Smrg /* MSAA color, any depth and any stencil are flushed in 4760b8e80941Smrg * si_decompress_textures when needed. 4761b8e80941Smrg */ 4762b8e80941Smrg if (flags & PIPE_BARRIER_FRAMEBUFFER && 4763b8e80941Smrg sctx->framebuffer.uncompressed_cb_mask) { 4764b8e80941Smrg sctx->flags |= SI_CONTEXT_FLUSH_AND_INV_CB; 4765b8e80941Smrg 4766b8e80941Smrg if (sctx->chip_class <= VI) 4767b8e80941Smrg sctx->flags |= SI_CONTEXT_WRITEBACK_GLOBAL_L2; 4768b8e80941Smrg } 4769b8e80941Smrg 4770b8e80941Smrg /* Indirect buffers use TC L2 on GFX9, but not older hw. */ 4771b8e80941Smrg if (sctx->screen->info.chip_class <= VI && 4772b8e80941Smrg flags & PIPE_BARRIER_INDIRECT_BUFFER) 4773b8e80941Smrg sctx->flags |= SI_CONTEXT_WRITEBACK_GLOBAL_L2; 4774848b8605Smrg} 4775848b8605Smrg 4776848b8605Smrgstatic void *si_create_blend_custom(struct si_context *sctx, unsigned mode) 4777848b8605Smrg{ 4778848b8605Smrg struct pipe_blend_state blend; 4779848b8605Smrg 4780848b8605Smrg memset(&blend, 0, sizeof(blend)); 4781848b8605Smrg blend.independent_blend_enable = true; 4782848b8605Smrg blend.rt[0].colormask = 0xf; 4783b8e80941Smrg return si_create_blend_state_mode(&sctx->b, &blend, mode); 4784848b8605Smrg} 4785848b8605Smrg 4786b8e80941Smrgstatic void si_init_config(struct si_context *sctx); 4787848b8605Smrg 4788b8e80941Smrgvoid si_init_state_compute_functions(struct si_context *sctx) 4789848b8605Smrg{ 4790b8e80941Smrg sctx->b.create_sampler_state = si_create_sampler_state; 4791b8e80941Smrg sctx->b.delete_sampler_state = si_delete_sampler_state; 4792b8e80941Smrg sctx->b.create_sampler_view = si_create_sampler_view; 4793b8e80941Smrg sctx->b.sampler_view_destroy = si_sampler_view_destroy; 4794b8e80941Smrg sctx->b.memory_barrier = si_memory_barrier; 4795848b8605Smrg} 4796848b8605Smrg 4797848b8605Smrgvoid si_init_state_functions(struct si_context *sctx) 4798848b8605Smrg{ 4799b8e80941Smrg sctx->atoms.s.framebuffer.emit = si_emit_framebuffer_state; 4800b8e80941Smrg sctx->atoms.s.msaa_sample_locs.emit = si_emit_msaa_sample_locs; 4801b8e80941Smrg sctx->atoms.s.db_render_state.emit = si_emit_db_render_state; 4802b8e80941Smrg sctx->atoms.s.dpbb_state.emit = si_emit_dpbb_state; 4803b8e80941Smrg sctx->atoms.s.msaa_config.emit = si_emit_msaa_config; 4804b8e80941Smrg sctx->atoms.s.sample_mask.emit = si_emit_sample_mask; 4805b8e80941Smrg sctx->atoms.s.cb_render_state.emit = si_emit_cb_render_state; 4806b8e80941Smrg sctx->atoms.s.blend_color.emit = si_emit_blend_color; 4807b8e80941Smrg sctx->atoms.s.clip_regs.emit = si_emit_clip_regs; 4808b8e80941Smrg sctx->atoms.s.clip_state.emit = si_emit_clip_state; 4809b8e80941Smrg sctx->atoms.s.stencil_ref.emit = si_emit_stencil_ref; 4810b8e80941Smrg 4811b8e80941Smrg sctx->b.create_blend_state = si_create_blend_state; 4812b8e80941Smrg sctx->b.bind_blend_state = si_bind_blend_state; 4813b8e80941Smrg sctx->b.delete_blend_state = si_delete_blend_state; 4814b8e80941Smrg sctx->b.set_blend_color = si_set_blend_color; 4815b8e80941Smrg 4816b8e80941Smrg sctx->b.create_rasterizer_state = si_create_rs_state; 4817b8e80941Smrg sctx->b.bind_rasterizer_state = si_bind_rs_state; 4818b8e80941Smrg sctx->b.delete_rasterizer_state = si_delete_rs_state; 4819b8e80941Smrg 4820b8e80941Smrg sctx->b.create_depth_stencil_alpha_state = si_create_dsa_state; 4821b8e80941Smrg sctx->b.bind_depth_stencil_alpha_state = si_bind_dsa_state; 4822b8e80941Smrg sctx->b.delete_depth_stencil_alpha_state = si_delete_dsa_state; 4823b8e80941Smrg 4824b8e80941Smrg sctx->custom_dsa_flush = si_create_db_flush_dsa(sctx); 4825b8e80941Smrg sctx->custom_blend_resolve = si_create_blend_custom(sctx, V_028808_CB_RESOLVE); 4826b8e80941Smrg sctx->custom_blend_fmask_decompress = si_create_blend_custom(sctx, V_028808_CB_FMASK_DECOMPRESS); 4827b8e80941Smrg sctx->custom_blend_eliminate_fastclear = si_create_blend_custom(sctx, V_028808_CB_ELIMINATE_FAST_CLEAR); 4828b8e80941Smrg sctx->custom_blend_dcc_decompress = si_create_blend_custom(sctx, V_028808_CB_DCC_DECOMPRESS); 4829848b8605Smrg 4830b8e80941Smrg sctx->b.set_clip_state = si_set_clip_state; 4831b8e80941Smrg sctx->b.set_stencil_ref = si_set_stencil_ref; 4832848b8605Smrg 4833b8e80941Smrg sctx->b.set_framebuffer_state = si_set_framebuffer_state; 4834848b8605Smrg 4835b8e80941Smrg sctx->b.set_sample_mask = si_set_sample_mask; 4836848b8605Smrg 4837b8e80941Smrg sctx->b.create_vertex_elements_state = si_create_vertex_elements; 4838b8e80941Smrg sctx->b.bind_vertex_elements_state = si_bind_vertex_elements; 4839b8e80941Smrg sctx->b.delete_vertex_elements_state = si_delete_vertex_element; 4840b8e80941Smrg sctx->b.set_vertex_buffers = si_set_vertex_buffers; 4841848b8605Smrg 4842b8e80941Smrg sctx->b.texture_barrier = si_texture_barrier; 4843b8e80941Smrg sctx->b.set_min_samples = si_set_min_samples; 4844b8e80941Smrg sctx->b.set_tess_state = si_set_tess_state; 4845848b8605Smrg 4846b8e80941Smrg sctx->b.set_active_query_state = si_set_active_query_state; 4847848b8605Smrg 4848b8e80941Smrg si_init_config(sctx); 4849b8e80941Smrg} 4850848b8605Smrg 4851b8e80941Smrgvoid si_init_screen_state_functions(struct si_screen *sscreen) 4852b8e80941Smrg{ 4853b8e80941Smrg sscreen->b.is_format_supported = si_is_format_supported; 4854b8e80941Smrg} 4855848b8605Smrg 4856b8e80941Smrgstatic void si_set_grbm_gfx_index(struct si_context *sctx, 4857b8e80941Smrg struct si_pm4_state *pm4, unsigned value) 4858b8e80941Smrg{ 4859b8e80941Smrg unsigned reg = sctx->chip_class >= CIK ? R_030800_GRBM_GFX_INDEX : 4860b8e80941Smrg R_00802C_GRBM_GFX_INDEX; 4861b8e80941Smrg si_pm4_set_reg(pm4, reg, value); 4862b8e80941Smrg} 4863848b8605Smrg 4864b8e80941Smrgstatic void si_set_grbm_gfx_index_se(struct si_context *sctx, 4865b8e80941Smrg struct si_pm4_state *pm4, unsigned se) 4866b8e80941Smrg{ 4867b8e80941Smrg assert(se == ~0 || se < sctx->screen->info.max_se); 4868b8e80941Smrg si_set_grbm_gfx_index(sctx, pm4, 4869b8e80941Smrg (se == ~0 ? S_030800_SE_BROADCAST_WRITES(1) : 4870b8e80941Smrg S_030800_SE_INDEX(se)) | 4871b8e80941Smrg S_030800_SH_BROADCAST_WRITES(1) | 4872b8e80941Smrg S_030800_INSTANCE_BROADCAST_WRITES(1)); 4873b8e80941Smrg} 4874848b8605Smrg 4875b8e80941Smrgstatic void 4876b8e80941Smrgsi_write_harvested_raster_configs(struct si_context *sctx, 4877b8e80941Smrg struct si_pm4_state *pm4, 4878b8e80941Smrg unsigned raster_config, 4879b8e80941Smrg unsigned raster_config_1) 4880b8e80941Smrg{ 4881b8e80941Smrg unsigned num_se = MAX2(sctx->screen->info.max_se, 1); 4882b8e80941Smrg unsigned raster_config_se[4]; 4883b8e80941Smrg unsigned se; 4884b8e80941Smrg 4885b8e80941Smrg ac_get_harvested_configs(&sctx->screen->info, 4886b8e80941Smrg raster_config, 4887b8e80941Smrg &raster_config_1, 4888b8e80941Smrg raster_config_se); 4889b8e80941Smrg 4890b8e80941Smrg for (se = 0; se < num_se; se++) { 4891b8e80941Smrg si_set_grbm_gfx_index_se(sctx, pm4, se); 4892b8e80941Smrg si_pm4_set_reg(pm4, R_028350_PA_SC_RASTER_CONFIG, raster_config_se[se]); 4893b8e80941Smrg } 4894b8e80941Smrg si_set_grbm_gfx_index(sctx, pm4, ~0); 4895848b8605Smrg 4896b8e80941Smrg if (sctx->chip_class >= CIK) { 4897b8e80941Smrg si_pm4_set_reg(pm4, R_028354_PA_SC_RASTER_CONFIG_1, raster_config_1); 4898b8e80941Smrg } 4899b8e80941Smrg} 4900848b8605Smrg 4901b8e80941Smrgstatic void si_set_raster_config(struct si_context *sctx, struct si_pm4_state *pm4) 4902b8e80941Smrg{ 4903b8e80941Smrg struct si_screen *sscreen = sctx->screen; 4904b8e80941Smrg unsigned num_rb = MIN2(sscreen->info.num_render_backends, 16); 4905b8e80941Smrg unsigned rb_mask = sscreen->info.enabled_rb_mask; 4906b8e80941Smrg unsigned raster_config = sscreen->pa_sc_raster_config; 4907b8e80941Smrg unsigned raster_config_1 = sscreen->pa_sc_raster_config_1; 4908b8e80941Smrg 4909b8e80941Smrg if (!rb_mask || util_bitcount(rb_mask) >= num_rb) { 4910b8e80941Smrg /* Always use the default config when all backends are enabled 4911b8e80941Smrg * (or when we failed to determine the enabled backends). 4912b8e80941Smrg */ 4913b8e80941Smrg si_pm4_set_reg(pm4, R_028350_PA_SC_RASTER_CONFIG, 4914b8e80941Smrg raster_config); 4915b8e80941Smrg if (sctx->chip_class >= CIK) 4916b8e80941Smrg si_pm4_set_reg(pm4, R_028354_PA_SC_RASTER_CONFIG_1, 4917b8e80941Smrg raster_config_1); 4918b8e80941Smrg } else { 4919b8e80941Smrg si_write_harvested_raster_configs(sctx, pm4, raster_config, raster_config_1); 4920b8e80941Smrg } 4921848b8605Smrg} 4922848b8605Smrg 4923b8e80941Smrgstatic void si_init_config(struct si_context *sctx) 4924848b8605Smrg{ 4925b8e80941Smrg struct si_screen *sscreen = sctx->screen; 4926b8e80941Smrg uint64_t border_color_va = sctx->border_color_buffer->gpu_address; 4927b8e80941Smrg bool has_clear_state = sscreen->has_clear_state; 4928b8e80941Smrg struct si_pm4_state *pm4 = CALLOC_STRUCT(si_pm4_state); 4929b8e80941Smrg 4930b8e80941Smrg /* SI, radeon kernel disabled CLEAR_STATE. */ 4931b8e80941Smrg assert(has_clear_state || sscreen->info.chip_class == SI || 4932b8e80941Smrg sscreen->info.drm_major != 3); 4933848b8605Smrg 4934b8e80941Smrg if (!pm4) 4935848b8605Smrg return; 4936848b8605Smrg 4937b8e80941Smrg si_pm4_cmd_begin(pm4, PKT3_CONTEXT_CONTROL); 4938b8e80941Smrg si_pm4_cmd_add(pm4, CONTEXT_CONTROL_LOAD_ENABLE(1)); 4939b8e80941Smrg si_pm4_cmd_add(pm4, CONTEXT_CONTROL_SHADOW_ENABLE(1)); 4940b8e80941Smrg si_pm4_cmd_end(pm4, false); 4941b8e80941Smrg 4942b8e80941Smrg if (has_clear_state) { 4943b8e80941Smrg si_pm4_cmd_begin(pm4, PKT3_CLEAR_STATE); 4944b8e80941Smrg si_pm4_cmd_add(pm4, 0); 4945b8e80941Smrg si_pm4_cmd_end(pm4, false); 4946b8e80941Smrg } 4947b8e80941Smrg 4948b8e80941Smrg if (sctx->chip_class <= VI) 4949b8e80941Smrg si_set_raster_config(sctx, pm4); 4950b8e80941Smrg 4951b8e80941Smrg si_pm4_set_reg(pm4, R_028A18_VGT_HOS_MAX_TESS_LEVEL, fui(64)); 4952b8e80941Smrg if (!has_clear_state) 4953b8e80941Smrg si_pm4_set_reg(pm4, R_028A1C_VGT_HOS_MIN_TESS_LEVEL, fui(0)); 4954848b8605Smrg 4955848b8605Smrg /* FIXME calculate these values somehow ??? */ 4956b8e80941Smrg if (sctx->chip_class <= VI) { 4957b8e80941Smrg si_pm4_set_reg(pm4, R_028A54_VGT_GS_PER_ES, SI_GS_PER_ES); 4958b8e80941Smrg si_pm4_set_reg(pm4, R_028A58_VGT_ES_PER_GS, 0x40); 4959b8e80941Smrg } 4960b8e80941Smrg 4961b8e80941Smrg if (!has_clear_state) { 4962b8e80941Smrg si_pm4_set_reg(pm4, R_028A5C_VGT_GS_PER_VS, 0x2); 4963b8e80941Smrg si_pm4_set_reg(pm4, R_028A8C_VGT_PRIMITIVEID_RESET, 0x0); 4964b8e80941Smrg si_pm4_set_reg(pm4, R_028B98_VGT_STRMOUT_BUFFER_CONFIG, 0x0); 4965b8e80941Smrg } 4966b8e80941Smrg 4967b8e80941Smrg si_pm4_set_reg(pm4, R_028AA0_VGT_INSTANCE_STEP_RATE_0, 1); 4968b8e80941Smrg if (!has_clear_state) 4969b8e80941Smrg si_pm4_set_reg(pm4, R_028AB8_VGT_VTX_CNT_EN, 0x0); 4970b8e80941Smrg if (sctx->chip_class < CIK) 4971848b8605Smrg si_pm4_set_reg(pm4, R_008A14_PA_CL_ENHANCE, S_008A14_NUM_CLIP_SEQ(3) | 4972848b8605Smrg S_008A14_CLIP_VTX_REORDER_ENA(1)); 4973848b8605Smrg 4974b8e80941Smrg /* CLEAR_STATE doesn't clear these correctly on certain generations. 4975b8e80941Smrg * I don't know why. Deduced by trial and error. 4976b8e80941Smrg */ 4977b8e80941Smrg if (sctx->chip_class <= CIK) { 4978b8e80941Smrg si_pm4_set_reg(pm4, R_028B28_VGT_STRMOUT_DRAW_OPAQUE_OFFSET, 0); 4979b8e80941Smrg si_pm4_set_reg(pm4, R_028204_PA_SC_WINDOW_SCISSOR_TL, S_028204_WINDOW_OFFSET_DISABLE(1)); 4980b8e80941Smrg si_pm4_set_reg(pm4, R_028240_PA_SC_GENERIC_SCISSOR_TL, S_028240_WINDOW_OFFSET_DISABLE(1)); 4981b8e80941Smrg si_pm4_set_reg(pm4, R_028244_PA_SC_GENERIC_SCISSOR_BR, 4982b8e80941Smrg S_028244_BR_X(16384) | S_028244_BR_Y(16384)); 4983b8e80941Smrg si_pm4_set_reg(pm4, R_028030_PA_SC_SCREEN_SCISSOR_TL, 0); 4984b8e80941Smrg si_pm4_set_reg(pm4, R_028034_PA_SC_SCREEN_SCISSOR_BR, 4985b8e80941Smrg S_028034_BR_X(16384) | S_028034_BR_Y(16384)); 4986b8e80941Smrg } 4987848b8605Smrg 4988b8e80941Smrg if (!has_clear_state) { 4989b8e80941Smrg si_pm4_set_reg(pm4, R_028230_PA_SC_EDGERULE, 4990b8e80941Smrg S_028230_ER_TRI(0xA) | 4991b8e80941Smrg S_028230_ER_POINT(0xA) | 4992b8e80941Smrg S_028230_ER_RECT(0xA) | 4993b8e80941Smrg /* Required by DX10_DIAMOND_TEST_ENA: */ 4994b8e80941Smrg S_028230_ER_LINE_LR(0x1A) | 4995b8e80941Smrg S_028230_ER_LINE_RL(0x26) | 4996b8e80941Smrg S_028230_ER_LINE_TB(0xA) | 4997b8e80941Smrg S_028230_ER_LINE_BT(0xA)); 4998b8e80941Smrg si_pm4_set_reg(pm4, R_028820_PA_CL_NANINF_CNTL, 0); 4999b8e80941Smrg si_pm4_set_reg(pm4, R_028AC0_DB_SRESULTS_COMPARE_STATE0, 0x0); 5000b8e80941Smrg si_pm4_set_reg(pm4, R_028AC4_DB_SRESULTS_COMPARE_STATE1, 0x0); 5001b8e80941Smrg si_pm4_set_reg(pm4, R_028AC8_DB_PRELOAD_CONTROL, 0x0); 5002b8e80941Smrg si_pm4_set_reg(pm4, R_02800C_DB_RENDER_OVERRIDE, 0); 5003b8e80941Smrg } 5004848b8605Smrg 5005b8e80941Smrg if (sctx->chip_class >= GFX9) { 5006b8e80941Smrg si_pm4_set_reg(pm4, R_030920_VGT_MAX_VTX_INDX, ~0); 5007b8e80941Smrg si_pm4_set_reg(pm4, R_030924_VGT_MIN_VTX_INDX, 0); 5008b8e80941Smrg si_pm4_set_reg(pm4, R_030928_VGT_INDX_OFFSET, 0); 5009848b8605Smrg } else { 5010b8e80941Smrg /* These registers, when written, also overwrite the CLEAR_STATE 5011b8e80941Smrg * context, so we can't rely on CLEAR_STATE setting them. 5012b8e80941Smrg * It would be an issue if there was another UMD changing them. 5013b8e80941Smrg */ 5014b8e80941Smrg si_pm4_set_reg(pm4, R_028400_VGT_MAX_VTX_INDX, ~0); 5015b8e80941Smrg si_pm4_set_reg(pm4, R_028404_VGT_MIN_VTX_INDX, 0); 5016b8e80941Smrg si_pm4_set_reg(pm4, R_028408_VGT_INDX_OFFSET, 0); 5017b8e80941Smrg } 5018b8e80941Smrg 5019b8e80941Smrg if (sctx->chip_class >= CIK) { 5020b8e80941Smrg if (sctx->chip_class >= GFX9) { 5021b8e80941Smrg si_pm4_set_reg(pm4, R_00B41C_SPI_SHADER_PGM_RSRC3_HS, 5022b8e80941Smrg S_00B41C_CU_EN(0xffff) | S_00B41C_WAVE_LIMIT(0x3F)); 5023b8e80941Smrg } else { 5024b8e80941Smrg si_pm4_set_reg(pm4, R_00B51C_SPI_SHADER_PGM_RSRC3_LS, 5025b8e80941Smrg S_00B51C_CU_EN(0xffff) | S_00B51C_WAVE_LIMIT(0x3F)); 5026b8e80941Smrg si_pm4_set_reg(pm4, R_00B41C_SPI_SHADER_PGM_RSRC3_HS, 5027b8e80941Smrg S_00B41C_WAVE_LIMIT(0x3F)); 5028b8e80941Smrg si_pm4_set_reg(pm4, R_00B31C_SPI_SHADER_PGM_RSRC3_ES, 5029b8e80941Smrg S_00B31C_CU_EN(0xffff) | S_00B31C_WAVE_LIMIT(0x3F)); 5030b8e80941Smrg 5031b8e80941Smrg /* If this is 0, Bonaire can hang even if GS isn't being used. 5032b8e80941Smrg * Other chips are unaffected. These are suboptimal values, 5033b8e80941Smrg * but we don't use on-chip GS. 5034b8e80941Smrg */ 5035b8e80941Smrg si_pm4_set_reg(pm4, R_028A44_VGT_GS_ONCHIP_CNTL, 5036b8e80941Smrg S_028A44_ES_VERTS_PER_SUBGRP(64) | 5037b8e80941Smrg S_028A44_GS_PRIMS_PER_SUBGRP(4)); 5038b8e80941Smrg } 5039b8e80941Smrg si_pm4_set_reg(pm4, R_00B21C_SPI_SHADER_PGM_RSRC3_GS, 5040b8e80941Smrg S_00B21C_CU_EN(0xffff) | S_00B21C_WAVE_LIMIT(0x3F)); 5041b8e80941Smrg 5042b8e80941Smrg /* Compute LATE_ALLOC_VS.LIMIT. */ 5043b8e80941Smrg unsigned num_cu_per_sh = sscreen->info.num_good_cu_per_sh; 5044b8e80941Smrg unsigned late_alloc_limit; /* The limit is per SH. */ 5045b8e80941Smrg 5046b8e80941Smrg if (sctx->family == CHIP_KABINI) { 5047b8e80941Smrg late_alloc_limit = 0; /* Potential hang on Kabini. */ 5048b8e80941Smrg } else if (num_cu_per_sh <= 4) { 5049b8e80941Smrg /* Too few available compute units per SH. Disallowing 5050b8e80941Smrg * VS to run on one CU could hurt us more than late VS 5051b8e80941Smrg * allocation would help. 5052b8e80941Smrg * 5053b8e80941Smrg * 2 is the highest safe number that allows us to keep 5054b8e80941Smrg * all CUs enabled. 5055b8e80941Smrg */ 5056b8e80941Smrg late_alloc_limit = 2; 5057b8e80941Smrg } else { 5058b8e80941Smrg /* This is a good initial value, allowing 1 late_alloc 5059b8e80941Smrg * wave per SIMD on num_cu - 2. 5060b8e80941Smrg */ 5061b8e80941Smrg late_alloc_limit = (num_cu_per_sh - 2) * 4; 5062b8e80941Smrg 5063b8e80941Smrg /* The limit is 0-based, so 0 means 1. */ 5064b8e80941Smrg assert(late_alloc_limit > 0 && late_alloc_limit <= 64); 5065b8e80941Smrg late_alloc_limit -= 1; 5066b8e80941Smrg } 5067b8e80941Smrg 5068b8e80941Smrg /* VS can't execute on one CU if the limit is > 2. */ 5069b8e80941Smrg si_pm4_set_reg(pm4, R_00B118_SPI_SHADER_PGM_RSRC3_VS, 5070b8e80941Smrg S_00B118_CU_EN(late_alloc_limit > 2 ? 0xfffe : 0xffff) | 5071b8e80941Smrg S_00B118_WAVE_LIMIT(0x3F)); 5072b8e80941Smrg si_pm4_set_reg(pm4, R_00B11C_SPI_SHADER_LATE_ALLOC_VS, 5073b8e80941Smrg S_00B11C_LIMIT(late_alloc_limit)); 5074b8e80941Smrg si_pm4_set_reg(pm4, R_00B01C_SPI_SHADER_PGM_RSRC3_PS, 5075b8e80941Smrg S_00B01C_CU_EN(0xffff) | S_00B01C_WAVE_LIMIT(0x3F)); 5076b8e80941Smrg } 5077b8e80941Smrg 5078b8e80941Smrg if (sctx->chip_class >= VI) { 5079b8e80941Smrg unsigned vgt_tess_distribution; 5080b8e80941Smrg 5081b8e80941Smrg vgt_tess_distribution = 5082b8e80941Smrg S_028B50_ACCUM_ISOLINE(32) | 5083b8e80941Smrg S_028B50_ACCUM_TRI(11) | 5084b8e80941Smrg S_028B50_ACCUM_QUAD(11) | 5085b8e80941Smrg S_028B50_DONUT_SPLIT(16); 5086b8e80941Smrg 5087b8e80941Smrg /* Testing with Unigine Heaven extreme tesselation yielded best results 5088b8e80941Smrg * with TRAP_SPLIT = 3. 5089b8e80941Smrg */ 5090b8e80941Smrg if (sctx->family == CHIP_FIJI || 5091b8e80941Smrg sctx->family >= CHIP_POLARIS10) 5092b8e80941Smrg vgt_tess_distribution |= S_028B50_TRAP_SPLIT(3); 5093b8e80941Smrg 5094b8e80941Smrg si_pm4_set_reg(pm4, R_028B50_VGT_TESS_DISTRIBUTION, vgt_tess_distribution); 5095b8e80941Smrg } else if (!has_clear_state) { 5096b8e80941Smrg si_pm4_set_reg(pm4, R_028C58_VGT_VERTEX_REUSE_BLOCK_CNTL, 14); 5097b8e80941Smrg si_pm4_set_reg(pm4, R_028C5C_VGT_OUT_DEALLOC_CNTL, 16); 5098b8e80941Smrg } 5099b8e80941Smrg 5100b8e80941Smrg si_pm4_set_reg(pm4, R_028080_TA_BC_BASE_ADDR, border_color_va >> 8); 5101b8e80941Smrg if (sctx->chip_class >= CIK) { 5102b8e80941Smrg si_pm4_set_reg(pm4, R_028084_TA_BC_BASE_ADDR_HI, 5103b8e80941Smrg S_028084_ADDRESS(border_color_va >> 40)); 5104b8e80941Smrg } 5105b8e80941Smrg si_pm4_add_bo(pm4, sctx->border_color_buffer, RADEON_USAGE_READ, 5106b8e80941Smrg RADEON_PRIO_BORDER_COLORS); 5107b8e80941Smrg 5108b8e80941Smrg if (sctx->chip_class >= GFX9) { 5109b8e80941Smrg unsigned num_se = sscreen->info.max_se; 5110b8e80941Smrg unsigned pc_lines = 0; 5111b8e80941Smrg 5112b8e80941Smrg switch (sctx->family) { 5113b8e80941Smrg case CHIP_VEGA10: 5114b8e80941Smrg case CHIP_VEGA12: 5115b8e80941Smrg case CHIP_VEGA20: 5116b8e80941Smrg pc_lines = 4096; 5117848b8605Smrg break; 5118b8e80941Smrg case CHIP_RAVEN: 5119b8e80941Smrg case CHIP_RAVEN2: 5120b8e80941Smrg pc_lines = 1024; 5121848b8605Smrg break; 5122848b8605Smrg default: 5123b8e80941Smrg assert(0); 5124848b8605Smrg } 5125b8e80941Smrg 5126b8e80941Smrg si_pm4_set_reg(pm4, R_028C48_PA_SC_BINNER_CNTL_1, 5127b8e80941Smrg S_028C48_MAX_ALLOC_COUNT(MIN2(128, pc_lines / (4 * num_se))) | 5128b8e80941Smrg S_028C48_MAX_PRIM_PER_BATCH(1023)); 5129b8e80941Smrg si_pm4_set_reg(pm4, R_028C4C_PA_SC_CONSERVATIVE_RASTERIZATION_CNTL, 5130b8e80941Smrg S_028C4C_NULL_SQUAD_AA_MASK_ENABLE(1)); 5131b8e80941Smrg si_pm4_set_reg(pm4, R_030968_VGT_INSTANCE_BASE_ID, 0); 5132848b8605Smrg } 5133848b8605Smrg 5134b8e80941Smrg si_pm4_upload_indirect_buffer(sctx, pm4); 5135b8e80941Smrg sctx->init_config = pm4; 5136848b8605Smrg} 5137