1af69d88dSmrg/* 2af69d88dSmrg * Copyright 2012 Advanced Micro Devices, Inc. 301e04c3fSmrg * All Rights Reserved. 4af69d88dSmrg * 5af69d88dSmrg * Permission is hereby granted, free of charge, to any person obtaining a 6af69d88dSmrg * copy of this software and associated documentation files (the "Software"), 7af69d88dSmrg * to deal in the Software without restriction, including without limitation 8af69d88dSmrg * on the rights to use, copy, modify, merge, publish, distribute, sub 9af69d88dSmrg * license, and/or sell copies of the Software, and to permit persons to whom 10af69d88dSmrg * the Software is furnished to do so, subject to the following conditions: 11af69d88dSmrg * 12af69d88dSmrg * The above copyright notice and this permission notice (including the next 13af69d88dSmrg * paragraph) shall be included in all copies or substantial portions of the 14af69d88dSmrg * Software. 15af69d88dSmrg * 16af69d88dSmrg * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR 17af69d88dSmrg * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, 18af69d88dSmrg * FITNESS FOR A PARTICULAR PURPOSE AND NON-INFRINGEMENT. IN NO EVENT SHALL 19af69d88dSmrg * THE AUTHOR(S) AND/OR THEIR SUPPLIERS BE LIABLE FOR ANY CLAIM, 20af69d88dSmrg * DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR 21af69d88dSmrg * OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE 22af69d88dSmrg * USE OR OTHER DEALINGS IN THE SOFTWARE. 23af69d88dSmrg */ 24af69d88dSmrg 2501e04c3fSmrg#include "si_build_pm4.h" 2601e04c3fSmrg#include "si_query.h" 277ec681f3Smrg#include "si_shader_internal.h" 287ec681f3Smrg#include "sid.h" 297ec681f3Smrg#include "util/fast_idiv_by_const.h" 307ec681f3Smrg#include "util/format/u_format.h" 317ec681f3Smrg#include "util/format/u_format_s3tc.h" 3201e04c3fSmrg#include "util/u_dual_blend.h" 337ec681f3Smrg#include "util/u_helpers.h" 34af69d88dSmrg#include "util/u_memory.h" 3501e04c3fSmrg#include "util/u_resource.h" 3601e04c3fSmrg#include "util/u_upload_mgr.h" 377ec681f3Smrg#include "util/u_blend.h" 387ec681f3Smrg 397ec681f3Smrg#include "gfx10_format_table.h" 40af69d88dSmrg 41af69d88dSmrgstatic unsigned si_map_swizzle(unsigned swizzle) 42af69d88dSmrg{ 437ec681f3Smrg switch (swizzle) { 447ec681f3Smrg case PIPE_SWIZZLE_Y: 457ec681f3Smrg return V_008F0C_SQ_SEL_Y; 467ec681f3Smrg case PIPE_SWIZZLE_Z: 477ec681f3Smrg return V_008F0C_SQ_SEL_Z; 487ec681f3Smrg case PIPE_SWIZZLE_W: 497ec681f3Smrg return V_008F0C_SQ_SEL_W; 507ec681f3Smrg case PIPE_SWIZZLE_0: 517ec681f3Smrg return V_008F0C_SQ_SEL_0; 527ec681f3Smrg case PIPE_SWIZZLE_1: 537ec681f3Smrg return V_008F0C_SQ_SEL_1; 547ec681f3Smrg default: /* PIPE_SWIZZLE_X */ 557ec681f3Smrg return V_008F0C_SQ_SEL_X; 567ec681f3Smrg } 57af69d88dSmrg} 58af69d88dSmrg 59af69d88dSmrg/* 12.4 fixed-point */ 60af69d88dSmrgstatic unsigned si_pack_float_12p4(float x) 61af69d88dSmrg{ 627ec681f3Smrg return x <= 0 ? 0 : x >= 4096 ? 0xffff : x * 16; 63af69d88dSmrg} 64af69d88dSmrg 65af69d88dSmrg/* 6601e04c3fSmrg * Inferred framebuffer and blender state. 6701e04c3fSmrg * 6801e04c3fSmrg * CB_TARGET_MASK is emitted here to avoid a hang with dual source blending 6901e04c3fSmrg * if there is not enough PS outputs. 70af69d88dSmrg */ 7101e04c3fSmrgstatic void si_emit_cb_render_state(struct si_context *sctx) 72af69d88dSmrg{ 737ec681f3Smrg struct radeon_cmdbuf *cs = &sctx->gfx_cs; 747ec681f3Smrg struct si_state_blend *blend = sctx->queued.named.blend; 757ec681f3Smrg /* CB_COLORn_INFO.FORMAT=INVALID should disable unbound colorbuffers, 767ec681f3Smrg * but you never know. */ 777ec681f3Smrg uint32_t cb_target_mask = sctx->framebuffer.colorbuf_enabled_4bit & blend->cb_target_mask; 787ec681f3Smrg unsigned i; 797ec681f3Smrg 807ec681f3Smrg /* Avoid a hang that happens when dual source blending is enabled 817ec681f3Smrg * but there is not enough color outputs. This is undefined behavior, 827ec681f3Smrg * so disable color writes completely. 837ec681f3Smrg * 847ec681f3Smrg * Reproducible with Unigine Heaven 4.0 and drirc missing. 857ec681f3Smrg */ 867ec681f3Smrg if (blend->dual_src_blend && sctx->shader.ps.cso && 877ec681f3Smrg (sctx->shader.ps.cso->info.colors_written & 0x3) != 0x3) 887ec681f3Smrg cb_target_mask = 0; 897ec681f3Smrg 907ec681f3Smrg /* GFX9: Flush DFSM when CB_TARGET_MASK changes. 917ec681f3Smrg * I think we don't have to do anything between IBs. 927ec681f3Smrg */ 937ec681f3Smrg if (sctx->screen->dpbb_allowed && sctx->last_cb_target_mask != cb_target_mask) { 947ec681f3Smrg sctx->last_cb_target_mask = cb_target_mask; 957ec681f3Smrg 967ec681f3Smrg radeon_begin(cs); 977ec681f3Smrg radeon_emit(PKT3(PKT3_EVENT_WRITE, 0, 0)); 987ec681f3Smrg radeon_emit(EVENT_TYPE(V_028A90_BREAK_BATCH) | EVENT_INDEX(0)); 997ec681f3Smrg radeon_end(); 1007ec681f3Smrg } 1017ec681f3Smrg 1027ec681f3Smrg radeon_begin(cs); 1037ec681f3Smrg radeon_opt_set_context_reg(sctx, R_028238_CB_TARGET_MASK, SI_TRACKED_CB_TARGET_MASK, 1047ec681f3Smrg cb_target_mask); 1057ec681f3Smrg 1067ec681f3Smrg if (sctx->chip_class >= GFX8) { 1077ec681f3Smrg /* DCC MSAA workaround. 1087ec681f3Smrg * Alternatively, we can set CB_COLORi_DCC_CONTROL.OVERWRITE_- 1097ec681f3Smrg * COMBINER_DISABLE, but that would be more complicated. 1107ec681f3Smrg */ 1117ec681f3Smrg bool oc_disable = 1127ec681f3Smrg blend->dcc_msaa_corruption_4bit & cb_target_mask && sctx->framebuffer.nr_samples >= 2; 1137ec681f3Smrg unsigned watermark = sctx->framebuffer.dcc_overwrite_combiner_watermark; 1147ec681f3Smrg 1157ec681f3Smrg radeon_opt_set_context_reg( 1167ec681f3Smrg sctx, R_028424_CB_DCC_CONTROL, SI_TRACKED_CB_DCC_CONTROL, 1177ec681f3Smrg S_028424_OVERWRITE_COMBINER_MRT_SHARING_DISABLE(sctx->chip_class <= GFX9) | 1187ec681f3Smrg S_028424_OVERWRITE_COMBINER_WATERMARK(watermark) | 1197ec681f3Smrg S_028424_OVERWRITE_COMBINER_DISABLE(oc_disable) | 1207ec681f3Smrg S_028424_DISABLE_CONSTANT_ENCODE_REG(sctx->screen->info.has_dcc_constant_encode)); 1217ec681f3Smrg } 1227ec681f3Smrg 1237ec681f3Smrg /* RB+ register settings. */ 1247ec681f3Smrg if (sctx->screen->info.rbplus_allowed) { 1257ec681f3Smrg unsigned spi_shader_col_format = 1267ec681f3Smrg sctx->shader.ps.cso ? sctx->shader.ps.current->key.part.ps.epilog.spi_shader_col_format 1277ec681f3Smrg : 0; 1287ec681f3Smrg unsigned sx_ps_downconvert = 0; 1297ec681f3Smrg unsigned sx_blend_opt_epsilon = 0; 1307ec681f3Smrg unsigned sx_blend_opt_control = 0; 1317ec681f3Smrg 1327ec681f3Smrg for (i = 0; i < sctx->framebuffer.state.nr_cbufs; i++) { 1337ec681f3Smrg struct si_surface *surf = (struct si_surface *)sctx->framebuffer.state.cbufs[i]; 1347ec681f3Smrg unsigned format, swap, spi_format, colormask; 1357ec681f3Smrg bool has_alpha, has_rgb; 1367ec681f3Smrg 1377ec681f3Smrg if (!surf) { 1387ec681f3Smrg /* If the color buffer is not set, the driver sets 32_R 1397ec681f3Smrg * as the SPI color format, because the hw doesn't allow 1407ec681f3Smrg * holes between color outputs, so also set this to 1417ec681f3Smrg * enable RB+. 1427ec681f3Smrg */ 1437ec681f3Smrg sx_ps_downconvert |= V_028754_SX_RT_EXPORT_32_R << (i * 4); 1447ec681f3Smrg continue; 1457ec681f3Smrg } 1467ec681f3Smrg 1477ec681f3Smrg format = G_028C70_FORMAT(surf->cb_color_info); 1487ec681f3Smrg swap = G_028C70_COMP_SWAP(surf->cb_color_info); 1497ec681f3Smrg spi_format = (spi_shader_col_format >> (i * 4)) & 0xf; 1507ec681f3Smrg colormask = (cb_target_mask >> (i * 4)) & 0xf; 1517ec681f3Smrg 1527ec681f3Smrg /* Set if RGB and A are present. */ 1537ec681f3Smrg has_alpha = !G_028C74_FORCE_DST_ALPHA_1(surf->cb_color_attrib); 1547ec681f3Smrg 1557ec681f3Smrg if (format == V_028C70_COLOR_8 || format == V_028C70_COLOR_16 || 1567ec681f3Smrg format == V_028C70_COLOR_32) 1577ec681f3Smrg has_rgb = !has_alpha; 1587ec681f3Smrg else 1597ec681f3Smrg has_rgb = true; 1607ec681f3Smrg 1617ec681f3Smrg /* Check the colormask and export format. */ 1627ec681f3Smrg if (!(colormask & (PIPE_MASK_RGBA & ~PIPE_MASK_A))) 1637ec681f3Smrg has_rgb = false; 1647ec681f3Smrg if (!(colormask & PIPE_MASK_A)) 1657ec681f3Smrg has_alpha = false; 1667ec681f3Smrg 1677ec681f3Smrg if (spi_format == V_028714_SPI_SHADER_ZERO) { 1687ec681f3Smrg has_rgb = false; 1697ec681f3Smrg has_alpha = false; 1707ec681f3Smrg } 1717ec681f3Smrg 1727ec681f3Smrg /* Disable value checking for disabled channels. */ 1737ec681f3Smrg if (!has_rgb) 1747ec681f3Smrg sx_blend_opt_control |= S_02875C_MRT0_COLOR_OPT_DISABLE(1) << (i * 4); 1757ec681f3Smrg if (!has_alpha) 1767ec681f3Smrg sx_blend_opt_control |= S_02875C_MRT0_ALPHA_OPT_DISABLE(1) << (i * 4); 1777ec681f3Smrg 1787ec681f3Smrg /* Enable down-conversion for 32bpp and smaller formats. */ 1797ec681f3Smrg switch (format) { 1807ec681f3Smrg case V_028C70_COLOR_8: 1817ec681f3Smrg case V_028C70_COLOR_8_8: 1827ec681f3Smrg case V_028C70_COLOR_8_8_8_8: 1837ec681f3Smrg /* For 1 and 2-channel formats, use the superset thereof. */ 1847ec681f3Smrg if (spi_format == V_028714_SPI_SHADER_FP16_ABGR || 1857ec681f3Smrg spi_format == V_028714_SPI_SHADER_UINT16_ABGR || 1867ec681f3Smrg spi_format == V_028714_SPI_SHADER_SINT16_ABGR) { 1877ec681f3Smrg sx_ps_downconvert |= V_028754_SX_RT_EXPORT_8_8_8_8 << (i * 4); 1887ec681f3Smrg sx_blend_opt_epsilon |= V_028758_8BIT_FORMAT << (i * 4); 1897ec681f3Smrg } 1907ec681f3Smrg break; 1917ec681f3Smrg 1927ec681f3Smrg case V_028C70_COLOR_5_6_5: 1937ec681f3Smrg if (spi_format == V_028714_SPI_SHADER_FP16_ABGR) { 1947ec681f3Smrg sx_ps_downconvert |= V_028754_SX_RT_EXPORT_5_6_5 << (i * 4); 1957ec681f3Smrg sx_blend_opt_epsilon |= V_028758_6BIT_FORMAT << (i * 4); 1967ec681f3Smrg } 1977ec681f3Smrg break; 1987ec681f3Smrg 1997ec681f3Smrg case V_028C70_COLOR_1_5_5_5: 2007ec681f3Smrg if (spi_format == V_028714_SPI_SHADER_FP16_ABGR) { 2017ec681f3Smrg sx_ps_downconvert |= V_028754_SX_RT_EXPORT_1_5_5_5 << (i * 4); 2027ec681f3Smrg sx_blend_opt_epsilon |= V_028758_5BIT_FORMAT << (i * 4); 2037ec681f3Smrg } 2047ec681f3Smrg break; 2057ec681f3Smrg 2067ec681f3Smrg case V_028C70_COLOR_4_4_4_4: 2077ec681f3Smrg if (spi_format == V_028714_SPI_SHADER_FP16_ABGR) { 2087ec681f3Smrg sx_ps_downconvert |= V_028754_SX_RT_EXPORT_4_4_4_4 << (i * 4); 2097ec681f3Smrg sx_blend_opt_epsilon |= V_028758_4BIT_FORMAT << (i * 4); 2107ec681f3Smrg } 2117ec681f3Smrg break; 2127ec681f3Smrg 2137ec681f3Smrg case V_028C70_COLOR_32: 2147ec681f3Smrg if (swap == V_028C70_SWAP_STD && spi_format == V_028714_SPI_SHADER_32_R) 2157ec681f3Smrg sx_ps_downconvert |= V_028754_SX_RT_EXPORT_32_R << (i * 4); 2167ec681f3Smrg else if (swap == V_028C70_SWAP_ALT_REV && spi_format == V_028714_SPI_SHADER_32_AR) 2177ec681f3Smrg sx_ps_downconvert |= V_028754_SX_RT_EXPORT_32_A << (i * 4); 2187ec681f3Smrg break; 2197ec681f3Smrg 2207ec681f3Smrg case V_028C70_COLOR_16: 2217ec681f3Smrg case V_028C70_COLOR_16_16: 2227ec681f3Smrg /* For 1-channel formats, use the superset thereof. */ 2237ec681f3Smrg if (spi_format == V_028714_SPI_SHADER_UNORM16_ABGR || 2247ec681f3Smrg spi_format == V_028714_SPI_SHADER_SNORM16_ABGR || 2257ec681f3Smrg spi_format == V_028714_SPI_SHADER_UINT16_ABGR || 2267ec681f3Smrg spi_format == V_028714_SPI_SHADER_SINT16_ABGR) { 2277ec681f3Smrg if (swap == V_028C70_SWAP_STD || swap == V_028C70_SWAP_STD_REV) 2287ec681f3Smrg sx_ps_downconvert |= V_028754_SX_RT_EXPORT_16_16_GR << (i * 4); 2297ec681f3Smrg else 2307ec681f3Smrg sx_ps_downconvert |= V_028754_SX_RT_EXPORT_16_16_AR << (i * 4); 2317ec681f3Smrg } 2327ec681f3Smrg break; 2337ec681f3Smrg 2347ec681f3Smrg case V_028C70_COLOR_10_11_11: 2357ec681f3Smrg if (spi_format == V_028714_SPI_SHADER_FP16_ABGR) 2367ec681f3Smrg sx_ps_downconvert |= V_028754_SX_RT_EXPORT_10_11_11 << (i * 4); 2377ec681f3Smrg break; 2387ec681f3Smrg 2397ec681f3Smrg case V_028C70_COLOR_2_10_10_10: 2407ec681f3Smrg if (spi_format == V_028714_SPI_SHADER_FP16_ABGR) { 2417ec681f3Smrg sx_ps_downconvert |= V_028754_SX_RT_EXPORT_2_10_10_10 << (i * 4); 2427ec681f3Smrg sx_blend_opt_epsilon |= V_028758_10BIT_FORMAT << (i * 4); 2437ec681f3Smrg } 2447ec681f3Smrg break; 2457ec681f3Smrg 2467ec681f3Smrg case V_028C70_COLOR_5_9_9_9: 2477ec681f3Smrg if (spi_format == V_028714_SPI_SHADER_FP16_ABGR) 2487ec681f3Smrg sx_ps_downconvert |= V_028754_SX_RT_EXPORT_9_9_9_E5 << (i * 4); 2497ec681f3Smrg break; 2507ec681f3Smrg } 2517ec681f3Smrg } 2527ec681f3Smrg 2537ec681f3Smrg /* If there are no color outputs, the first color export is 2547ec681f3Smrg * always enabled as 32_R, so also set this to enable RB+. 2557ec681f3Smrg */ 2567ec681f3Smrg if (!sx_ps_downconvert) 2577ec681f3Smrg sx_ps_downconvert = V_028754_SX_RT_EXPORT_32_R; 2587ec681f3Smrg 2597ec681f3Smrg /* SX_PS_DOWNCONVERT, SX_BLEND_OPT_EPSILON, SX_BLEND_OPT_CONTROL */ 2607ec681f3Smrg radeon_opt_set_context_reg3(sctx, R_028754_SX_PS_DOWNCONVERT, SI_TRACKED_SX_PS_DOWNCONVERT, 2617ec681f3Smrg sx_ps_downconvert, sx_blend_opt_epsilon, sx_blend_opt_control); 2627ec681f3Smrg } 2637ec681f3Smrg radeon_end_update_context_roll(sctx); 264af69d88dSmrg} 265af69d88dSmrg 266af69d88dSmrg/* 267af69d88dSmrg * Blender functions 268af69d88dSmrg */ 269af69d88dSmrg 270af69d88dSmrgstatic uint32_t si_translate_blend_function(int blend_func) 271af69d88dSmrg{ 2727ec681f3Smrg switch (blend_func) { 2737ec681f3Smrg case PIPE_BLEND_ADD: 2747ec681f3Smrg return V_028780_COMB_DST_PLUS_SRC; 2757ec681f3Smrg case PIPE_BLEND_SUBTRACT: 2767ec681f3Smrg return V_028780_COMB_SRC_MINUS_DST; 2777ec681f3Smrg case PIPE_BLEND_REVERSE_SUBTRACT: 2787ec681f3Smrg return V_028780_COMB_DST_MINUS_SRC; 2797ec681f3Smrg case PIPE_BLEND_MIN: 2807ec681f3Smrg return V_028780_COMB_MIN_DST_SRC; 2817ec681f3Smrg case PIPE_BLEND_MAX: 2827ec681f3Smrg return V_028780_COMB_MAX_DST_SRC; 2837ec681f3Smrg default: 2847ec681f3Smrg PRINT_ERR("Unknown blend function %d\n", blend_func); 2857ec681f3Smrg assert(0); 2867ec681f3Smrg break; 2877ec681f3Smrg } 2887ec681f3Smrg return 0; 289af69d88dSmrg} 290af69d88dSmrg 291af69d88dSmrgstatic uint32_t si_translate_blend_factor(int blend_fact) 292af69d88dSmrg{ 2937ec681f3Smrg switch (blend_fact) { 2947ec681f3Smrg case PIPE_BLENDFACTOR_ONE: 2957ec681f3Smrg return V_028780_BLEND_ONE; 2967ec681f3Smrg case PIPE_BLENDFACTOR_SRC_COLOR: 2977ec681f3Smrg return V_028780_BLEND_SRC_COLOR; 2987ec681f3Smrg case PIPE_BLENDFACTOR_SRC_ALPHA: 2997ec681f3Smrg return V_028780_BLEND_SRC_ALPHA; 3007ec681f3Smrg case PIPE_BLENDFACTOR_DST_ALPHA: 3017ec681f3Smrg return V_028780_BLEND_DST_ALPHA; 3027ec681f3Smrg case PIPE_BLENDFACTOR_DST_COLOR: 3037ec681f3Smrg return V_028780_BLEND_DST_COLOR; 3047ec681f3Smrg case PIPE_BLENDFACTOR_SRC_ALPHA_SATURATE: 3057ec681f3Smrg return V_028780_BLEND_SRC_ALPHA_SATURATE; 3067ec681f3Smrg case PIPE_BLENDFACTOR_CONST_COLOR: 3077ec681f3Smrg return V_028780_BLEND_CONSTANT_COLOR; 3087ec681f3Smrg case PIPE_BLENDFACTOR_CONST_ALPHA: 3097ec681f3Smrg return V_028780_BLEND_CONSTANT_ALPHA; 3107ec681f3Smrg case PIPE_BLENDFACTOR_ZERO: 3117ec681f3Smrg return V_028780_BLEND_ZERO; 3127ec681f3Smrg case PIPE_BLENDFACTOR_INV_SRC_COLOR: 3137ec681f3Smrg return V_028780_BLEND_ONE_MINUS_SRC_COLOR; 3147ec681f3Smrg case PIPE_BLENDFACTOR_INV_SRC_ALPHA: 3157ec681f3Smrg return V_028780_BLEND_ONE_MINUS_SRC_ALPHA; 3167ec681f3Smrg case PIPE_BLENDFACTOR_INV_DST_ALPHA: 3177ec681f3Smrg return V_028780_BLEND_ONE_MINUS_DST_ALPHA; 3187ec681f3Smrg case PIPE_BLENDFACTOR_INV_DST_COLOR: 3197ec681f3Smrg return V_028780_BLEND_ONE_MINUS_DST_COLOR; 3207ec681f3Smrg case PIPE_BLENDFACTOR_INV_CONST_COLOR: 3217ec681f3Smrg return V_028780_BLEND_ONE_MINUS_CONSTANT_COLOR; 3227ec681f3Smrg case PIPE_BLENDFACTOR_INV_CONST_ALPHA: 3237ec681f3Smrg return V_028780_BLEND_ONE_MINUS_CONSTANT_ALPHA; 3247ec681f3Smrg case PIPE_BLENDFACTOR_SRC1_COLOR: 3257ec681f3Smrg return V_028780_BLEND_SRC1_COLOR; 3267ec681f3Smrg case PIPE_BLENDFACTOR_SRC1_ALPHA: 3277ec681f3Smrg return V_028780_BLEND_SRC1_ALPHA; 3287ec681f3Smrg case PIPE_BLENDFACTOR_INV_SRC1_COLOR: 3297ec681f3Smrg return V_028780_BLEND_INV_SRC1_COLOR; 3307ec681f3Smrg case PIPE_BLENDFACTOR_INV_SRC1_ALPHA: 3317ec681f3Smrg return V_028780_BLEND_INV_SRC1_ALPHA; 3327ec681f3Smrg default: 3337ec681f3Smrg PRINT_ERR("Bad blend factor %d not supported!\n", blend_fact); 3347ec681f3Smrg assert(0); 3357ec681f3Smrg break; 3367ec681f3Smrg } 3377ec681f3Smrg return 0; 338af69d88dSmrg} 339af69d88dSmrg 34001e04c3fSmrgstatic uint32_t si_translate_blend_opt_function(int blend_func) 34101e04c3fSmrg{ 3427ec681f3Smrg switch (blend_func) { 3437ec681f3Smrg case PIPE_BLEND_ADD: 3447ec681f3Smrg return V_028760_OPT_COMB_ADD; 3457ec681f3Smrg case PIPE_BLEND_SUBTRACT: 3467ec681f3Smrg return V_028760_OPT_COMB_SUBTRACT; 3477ec681f3Smrg case PIPE_BLEND_REVERSE_SUBTRACT: 3487ec681f3Smrg return V_028760_OPT_COMB_REVSUBTRACT; 3497ec681f3Smrg case PIPE_BLEND_MIN: 3507ec681f3Smrg return V_028760_OPT_COMB_MIN; 3517ec681f3Smrg case PIPE_BLEND_MAX: 3527ec681f3Smrg return V_028760_OPT_COMB_MAX; 3537ec681f3Smrg default: 3547ec681f3Smrg return V_028760_OPT_COMB_BLEND_DISABLED; 3557ec681f3Smrg } 35601e04c3fSmrg} 35701e04c3fSmrg 35801e04c3fSmrgstatic uint32_t si_translate_blend_opt_factor(int blend_fact, bool is_alpha) 35901e04c3fSmrg{ 3607ec681f3Smrg switch (blend_fact) { 3617ec681f3Smrg case PIPE_BLENDFACTOR_ZERO: 3627ec681f3Smrg return V_028760_BLEND_OPT_PRESERVE_NONE_IGNORE_ALL; 3637ec681f3Smrg case PIPE_BLENDFACTOR_ONE: 3647ec681f3Smrg return V_028760_BLEND_OPT_PRESERVE_ALL_IGNORE_NONE; 3657ec681f3Smrg case PIPE_BLENDFACTOR_SRC_COLOR: 3667ec681f3Smrg return is_alpha ? V_028760_BLEND_OPT_PRESERVE_A1_IGNORE_A0 3677ec681f3Smrg : V_028760_BLEND_OPT_PRESERVE_C1_IGNORE_C0; 3687ec681f3Smrg case PIPE_BLENDFACTOR_INV_SRC_COLOR: 3697ec681f3Smrg return is_alpha ? V_028760_BLEND_OPT_PRESERVE_A0_IGNORE_A1 3707ec681f3Smrg : V_028760_BLEND_OPT_PRESERVE_C0_IGNORE_C1; 3717ec681f3Smrg case PIPE_BLENDFACTOR_SRC_ALPHA: 3727ec681f3Smrg return V_028760_BLEND_OPT_PRESERVE_A1_IGNORE_A0; 3737ec681f3Smrg case PIPE_BLENDFACTOR_INV_SRC_ALPHA: 3747ec681f3Smrg return V_028760_BLEND_OPT_PRESERVE_A0_IGNORE_A1; 3757ec681f3Smrg case PIPE_BLENDFACTOR_SRC_ALPHA_SATURATE: 3767ec681f3Smrg return is_alpha ? V_028760_BLEND_OPT_PRESERVE_ALL_IGNORE_NONE 3777ec681f3Smrg : V_028760_BLEND_OPT_PRESERVE_NONE_IGNORE_A0; 3787ec681f3Smrg default: 3797ec681f3Smrg return V_028760_BLEND_OPT_PRESERVE_NONE_IGNORE_NONE; 3807ec681f3Smrg } 38101e04c3fSmrg} 38201e04c3fSmrg 3837ec681f3Smrgstatic void si_blend_check_commutativity(struct si_screen *sscreen, struct si_state_blend *blend, 3847ec681f3Smrg enum pipe_blend_func func, enum pipe_blendfactor src, 3857ec681f3Smrg enum pipe_blendfactor dst, unsigned chanmask) 38601e04c3fSmrg{ 3877ec681f3Smrg /* Src factor is allowed when it does not depend on Dst */ 3887ec681f3Smrg static const uint32_t src_allowed = 3897ec681f3Smrg (1u << PIPE_BLENDFACTOR_ONE) | (1u << PIPE_BLENDFACTOR_SRC_COLOR) | 3907ec681f3Smrg (1u << PIPE_BLENDFACTOR_SRC_ALPHA) | (1u << PIPE_BLENDFACTOR_SRC_ALPHA_SATURATE) | 3917ec681f3Smrg (1u << PIPE_BLENDFACTOR_CONST_COLOR) | (1u << PIPE_BLENDFACTOR_CONST_ALPHA) | 3927ec681f3Smrg (1u << PIPE_BLENDFACTOR_SRC1_COLOR) | (1u << PIPE_BLENDFACTOR_SRC1_ALPHA) | 3937ec681f3Smrg (1u << PIPE_BLENDFACTOR_ZERO) | (1u << PIPE_BLENDFACTOR_INV_SRC_COLOR) | 3947ec681f3Smrg (1u << PIPE_BLENDFACTOR_INV_SRC_ALPHA) | (1u << PIPE_BLENDFACTOR_INV_CONST_COLOR) | 3957ec681f3Smrg (1u << PIPE_BLENDFACTOR_INV_CONST_ALPHA) | (1u << PIPE_BLENDFACTOR_INV_SRC1_COLOR) | 3967ec681f3Smrg (1u << PIPE_BLENDFACTOR_INV_SRC1_ALPHA); 3977ec681f3Smrg 3987ec681f3Smrg if (dst == PIPE_BLENDFACTOR_ONE && (src_allowed & (1u << src))) { 3997ec681f3Smrg /* Addition is commutative, but floating point addition isn't 4007ec681f3Smrg * associative: subtle changes can be introduced via different 4017ec681f3Smrg * rounding. 4027ec681f3Smrg * 4037ec681f3Smrg * Out-of-order is also non-deterministic, which means that 4047ec681f3Smrg * this breaks OpenGL invariance requirements. So only enable 4057ec681f3Smrg * out-of-order additive blending if explicitly allowed by a 4067ec681f3Smrg * setting. 4077ec681f3Smrg */ 4087ec681f3Smrg if (func == PIPE_BLEND_MAX || func == PIPE_BLEND_MIN || 4097ec681f3Smrg (func == PIPE_BLEND_ADD && sscreen->commutative_blend_add)) 4107ec681f3Smrg blend->commutative_4bit |= chanmask; 4117ec681f3Smrg } 41201e04c3fSmrg} 41301e04c3fSmrg 41401e04c3fSmrg/** 41501e04c3fSmrg * Get rid of DST in the blend factors by commuting the operands: 41601e04c3fSmrg * func(src * DST, dst * 0) ---> func(src * 0, dst * SRC) 41701e04c3fSmrg */ 4187ec681f3Smrgstatic void si_blend_remove_dst(unsigned *func, unsigned *src_factor, unsigned *dst_factor, 4197ec681f3Smrg unsigned expected_dst, unsigned replacement_src) 42001e04c3fSmrg{ 4217ec681f3Smrg if (*src_factor == expected_dst && *dst_factor == PIPE_BLENDFACTOR_ZERO) { 4227ec681f3Smrg *src_factor = PIPE_BLENDFACTOR_ZERO; 4237ec681f3Smrg *dst_factor = replacement_src; 4247ec681f3Smrg 4257ec681f3Smrg /* Commuting the operands requires reversing subtractions. */ 4267ec681f3Smrg if (*func == PIPE_BLEND_SUBTRACT) 4277ec681f3Smrg *func = PIPE_BLEND_REVERSE_SUBTRACT; 4287ec681f3Smrg else if (*func == PIPE_BLEND_REVERSE_SUBTRACT) 4297ec681f3Smrg *func = PIPE_BLEND_SUBTRACT; 4307ec681f3Smrg } 43101e04c3fSmrg} 43201e04c3fSmrg 4337ec681f3Smrgstatic void *si_create_blend_state_mode(struct pipe_context *ctx, 4347ec681f3Smrg const struct pipe_blend_state *state, unsigned mode) 43501e04c3fSmrg{ 4367ec681f3Smrg struct si_context *sctx = (struct si_context *)ctx; 4377ec681f3Smrg struct si_state_blend *blend = CALLOC_STRUCT(si_state_blend); 4387ec681f3Smrg struct si_pm4_state *pm4 = &blend->pm4; 4397ec681f3Smrg uint32_t sx_mrt_blend_opt[8] = {0}; 4407ec681f3Smrg uint32_t color_control = 0; 4417ec681f3Smrg bool logicop_enable = state->logicop_enable && state->logicop_func != PIPE_LOGICOP_COPY; 4427ec681f3Smrg 4437ec681f3Smrg if (!blend) 4447ec681f3Smrg return NULL; 4457ec681f3Smrg 4467ec681f3Smrg blend->alpha_to_coverage = state->alpha_to_coverage; 4477ec681f3Smrg blend->alpha_to_one = state->alpha_to_one; 4487ec681f3Smrg blend->dual_src_blend = util_blend_state_is_dual(state, 0); 4497ec681f3Smrg blend->logicop_enable = logicop_enable; 4507ec681f3Smrg blend->allows_noop_optimization = 4517ec681f3Smrg state->rt[0].rgb_func == PIPE_BLEND_ADD && 4527ec681f3Smrg state->rt[0].alpha_func == PIPE_BLEND_ADD && 4537ec681f3Smrg state->rt[0].rgb_src_factor == PIPE_BLENDFACTOR_DST_COLOR && 4547ec681f3Smrg state->rt[0].alpha_src_factor == PIPE_BLENDFACTOR_DST_COLOR && 4557ec681f3Smrg state->rt[0].rgb_dst_factor == PIPE_BLENDFACTOR_ZERO && 4567ec681f3Smrg state->rt[0].alpha_dst_factor == PIPE_BLENDFACTOR_ZERO && 4577ec681f3Smrg mode == V_028808_CB_NORMAL; 4587ec681f3Smrg 4597ec681f3Smrg unsigned num_shader_outputs = state->max_rt + 1; /* estimate */ 4607ec681f3Smrg if (blend->dual_src_blend) 4617ec681f3Smrg num_shader_outputs = MAX2(num_shader_outputs, 2); 4627ec681f3Smrg 4637ec681f3Smrg if (logicop_enable) { 4647ec681f3Smrg color_control |= S_028808_ROP3(state->logicop_func | (state->logicop_func << 4)); 4657ec681f3Smrg } else { 4667ec681f3Smrg color_control |= S_028808_ROP3(0xcc); 4677ec681f3Smrg } 4687ec681f3Smrg 4697ec681f3Smrg if (state->alpha_to_coverage && state->alpha_to_coverage_dither) { 4707ec681f3Smrg si_pm4_set_reg(pm4, R_028B70_DB_ALPHA_TO_MASK, 4717ec681f3Smrg S_028B70_ALPHA_TO_MASK_ENABLE(state->alpha_to_coverage) | 4727ec681f3Smrg S_028B70_ALPHA_TO_MASK_OFFSET0(3) | S_028B70_ALPHA_TO_MASK_OFFSET1(1) | 4737ec681f3Smrg S_028B70_ALPHA_TO_MASK_OFFSET2(0) | S_028B70_ALPHA_TO_MASK_OFFSET3(2) | 4747ec681f3Smrg S_028B70_OFFSET_ROUND(1)); 4757ec681f3Smrg } else { 4767ec681f3Smrg si_pm4_set_reg(pm4, R_028B70_DB_ALPHA_TO_MASK, 4777ec681f3Smrg S_028B70_ALPHA_TO_MASK_ENABLE(state->alpha_to_coverage) | 4787ec681f3Smrg S_028B70_ALPHA_TO_MASK_OFFSET0(2) | S_028B70_ALPHA_TO_MASK_OFFSET1(2) | 4797ec681f3Smrg S_028B70_ALPHA_TO_MASK_OFFSET2(2) | S_028B70_ALPHA_TO_MASK_OFFSET3(2) | 4807ec681f3Smrg S_028B70_OFFSET_ROUND(0)); 4817ec681f3Smrg } 4827ec681f3Smrg 4837ec681f3Smrg if (state->alpha_to_coverage) 4847ec681f3Smrg blend->need_src_alpha_4bit |= 0xf; 4857ec681f3Smrg 4867ec681f3Smrg blend->cb_target_mask = 0; 4877ec681f3Smrg blend->cb_target_enabled_4bit = 0; 4887ec681f3Smrg 4897ec681f3Smrg for (int i = 0; i < num_shader_outputs; i++) { 4907ec681f3Smrg /* state->rt entries > 0 only written if independent blending */ 4917ec681f3Smrg const int j = state->independent_blend_enable ? i : 0; 4927ec681f3Smrg 4937ec681f3Smrg unsigned eqRGB = state->rt[j].rgb_func; 4947ec681f3Smrg unsigned srcRGB = state->rt[j].rgb_src_factor; 4957ec681f3Smrg unsigned dstRGB = state->rt[j].rgb_dst_factor; 4967ec681f3Smrg unsigned eqA = state->rt[j].alpha_func; 4977ec681f3Smrg unsigned srcA = state->rt[j].alpha_src_factor; 4987ec681f3Smrg unsigned dstA = state->rt[j].alpha_dst_factor; 4997ec681f3Smrg 5007ec681f3Smrg unsigned srcRGB_opt, dstRGB_opt, srcA_opt, dstA_opt; 5017ec681f3Smrg unsigned blend_cntl = 0; 5027ec681f3Smrg 5037ec681f3Smrg sx_mrt_blend_opt[i] = S_028760_COLOR_COMB_FCN(V_028760_OPT_COMB_BLEND_DISABLED) | 5047ec681f3Smrg S_028760_ALPHA_COMB_FCN(V_028760_OPT_COMB_BLEND_DISABLED); 5057ec681f3Smrg 5067ec681f3Smrg /* Only set dual source blending for MRT0 to avoid a hang. */ 5077ec681f3Smrg if (i >= 1 && blend->dual_src_blend) { 5087ec681f3Smrg /* Vulkan does this for dual source blending. */ 5097ec681f3Smrg if (i == 1) 5107ec681f3Smrg blend_cntl |= S_028780_ENABLE(1); 5117ec681f3Smrg 5127ec681f3Smrg si_pm4_set_reg(pm4, R_028780_CB_BLEND0_CONTROL + i * 4, blend_cntl); 5137ec681f3Smrg continue; 5147ec681f3Smrg } 5157ec681f3Smrg 5167ec681f3Smrg /* Only addition and subtraction equations are supported with 5177ec681f3Smrg * dual source blending. 5187ec681f3Smrg */ 5197ec681f3Smrg if (blend->dual_src_blend && (eqRGB == PIPE_BLEND_MIN || eqRGB == PIPE_BLEND_MAX || 5207ec681f3Smrg eqA == PIPE_BLEND_MIN || eqA == PIPE_BLEND_MAX)) { 5217ec681f3Smrg assert(!"Unsupported equation for dual source blending"); 5227ec681f3Smrg si_pm4_set_reg(pm4, R_028780_CB_BLEND0_CONTROL + i * 4, blend_cntl); 5237ec681f3Smrg continue; 5247ec681f3Smrg } 5257ec681f3Smrg 5267ec681f3Smrg /* cb_render_state will disable unused ones */ 5277ec681f3Smrg blend->cb_target_mask |= (unsigned)state->rt[j].colormask << (4 * i); 5287ec681f3Smrg if (state->rt[j].colormask) 5297ec681f3Smrg blend->cb_target_enabled_4bit |= 0xf << (4 * i); 5307ec681f3Smrg 5317ec681f3Smrg if (!state->rt[j].colormask || !state->rt[j].blend_enable) { 5327ec681f3Smrg si_pm4_set_reg(pm4, R_028780_CB_BLEND0_CONTROL + i * 4, blend_cntl); 5337ec681f3Smrg continue; 5347ec681f3Smrg } 5357ec681f3Smrg 5367ec681f3Smrg si_blend_check_commutativity(sctx->screen, blend, eqRGB, srcRGB, dstRGB, 0x7 << (4 * i)); 5377ec681f3Smrg si_blend_check_commutativity(sctx->screen, blend, eqA, srcA, dstA, 0x8 << (4 * i)); 5387ec681f3Smrg 5397ec681f3Smrg /* Blending optimizations for RB+. 5407ec681f3Smrg * These transformations don't change the behavior. 5417ec681f3Smrg * 5427ec681f3Smrg * First, get rid of DST in the blend factors: 5437ec681f3Smrg * func(src * DST, dst * 0) ---> func(src * 0, dst * SRC) 5447ec681f3Smrg */ 5457ec681f3Smrg si_blend_remove_dst(&eqRGB, &srcRGB, &dstRGB, PIPE_BLENDFACTOR_DST_COLOR, 5467ec681f3Smrg PIPE_BLENDFACTOR_SRC_COLOR); 5477ec681f3Smrg si_blend_remove_dst(&eqA, &srcA, &dstA, PIPE_BLENDFACTOR_DST_COLOR, 5487ec681f3Smrg PIPE_BLENDFACTOR_SRC_COLOR); 5497ec681f3Smrg si_blend_remove_dst(&eqA, &srcA, &dstA, PIPE_BLENDFACTOR_DST_ALPHA, 5507ec681f3Smrg PIPE_BLENDFACTOR_SRC_ALPHA); 5517ec681f3Smrg 5527ec681f3Smrg /* Look up the ideal settings from tables. */ 5537ec681f3Smrg srcRGB_opt = si_translate_blend_opt_factor(srcRGB, false); 5547ec681f3Smrg dstRGB_opt = si_translate_blend_opt_factor(dstRGB, false); 5557ec681f3Smrg srcA_opt = si_translate_blend_opt_factor(srcA, true); 5567ec681f3Smrg dstA_opt = si_translate_blend_opt_factor(dstA, true); 5577ec681f3Smrg 5587ec681f3Smrg /* Handle interdependencies. */ 5597ec681f3Smrg if (util_blend_factor_uses_dest(srcRGB, false)) 5607ec681f3Smrg dstRGB_opt = V_028760_BLEND_OPT_PRESERVE_NONE_IGNORE_NONE; 5617ec681f3Smrg if (util_blend_factor_uses_dest(srcA, false)) 5627ec681f3Smrg dstA_opt = V_028760_BLEND_OPT_PRESERVE_NONE_IGNORE_NONE; 5637ec681f3Smrg 5647ec681f3Smrg if (srcRGB == PIPE_BLENDFACTOR_SRC_ALPHA_SATURATE && 5657ec681f3Smrg (dstRGB == PIPE_BLENDFACTOR_ZERO || dstRGB == PIPE_BLENDFACTOR_SRC_ALPHA || 5667ec681f3Smrg dstRGB == PIPE_BLENDFACTOR_SRC_ALPHA_SATURATE)) 5677ec681f3Smrg dstRGB_opt = V_028760_BLEND_OPT_PRESERVE_NONE_IGNORE_A0; 5687ec681f3Smrg 5697ec681f3Smrg /* Set the final value. */ 5707ec681f3Smrg sx_mrt_blend_opt[i] = S_028760_COLOR_SRC_OPT(srcRGB_opt) | 5717ec681f3Smrg S_028760_COLOR_DST_OPT(dstRGB_opt) | 5727ec681f3Smrg S_028760_COLOR_COMB_FCN(si_translate_blend_opt_function(eqRGB)) | 5737ec681f3Smrg S_028760_ALPHA_SRC_OPT(srcA_opt) | S_028760_ALPHA_DST_OPT(dstA_opt) | 5747ec681f3Smrg S_028760_ALPHA_COMB_FCN(si_translate_blend_opt_function(eqA)); 5757ec681f3Smrg 5767ec681f3Smrg /* Set blend state. */ 5777ec681f3Smrg blend_cntl |= S_028780_ENABLE(1); 5787ec681f3Smrg blend_cntl |= S_028780_COLOR_COMB_FCN(si_translate_blend_function(eqRGB)); 5797ec681f3Smrg blend_cntl |= S_028780_COLOR_SRCBLEND(si_translate_blend_factor(srcRGB)); 5807ec681f3Smrg blend_cntl |= S_028780_COLOR_DESTBLEND(si_translate_blend_factor(dstRGB)); 5817ec681f3Smrg 5827ec681f3Smrg if (srcA != srcRGB || dstA != dstRGB || eqA != eqRGB) { 5837ec681f3Smrg blend_cntl |= S_028780_SEPARATE_ALPHA_BLEND(1); 5847ec681f3Smrg blend_cntl |= S_028780_ALPHA_COMB_FCN(si_translate_blend_function(eqA)); 5857ec681f3Smrg blend_cntl |= S_028780_ALPHA_SRCBLEND(si_translate_blend_factor(srcA)); 5867ec681f3Smrg blend_cntl |= S_028780_ALPHA_DESTBLEND(si_translate_blend_factor(dstA)); 5877ec681f3Smrg } 5887ec681f3Smrg si_pm4_set_reg(pm4, R_028780_CB_BLEND0_CONTROL + i * 4, blend_cntl); 5897ec681f3Smrg 5907ec681f3Smrg blend->blend_enable_4bit |= 0xfu << (i * 4); 5917ec681f3Smrg 5927ec681f3Smrg if (sctx->chip_class >= GFX8 && sctx->chip_class <= GFX10) 5937ec681f3Smrg blend->dcc_msaa_corruption_4bit |= 0xfu << (i * 4); 5947ec681f3Smrg 5957ec681f3Smrg /* This is only important for formats without alpha. */ 5967ec681f3Smrg if (srcRGB == PIPE_BLENDFACTOR_SRC_ALPHA || dstRGB == PIPE_BLENDFACTOR_SRC_ALPHA || 5977ec681f3Smrg srcRGB == PIPE_BLENDFACTOR_SRC_ALPHA_SATURATE || 5987ec681f3Smrg dstRGB == PIPE_BLENDFACTOR_SRC_ALPHA_SATURATE || 5997ec681f3Smrg srcRGB == PIPE_BLENDFACTOR_INV_SRC_ALPHA || dstRGB == PIPE_BLENDFACTOR_INV_SRC_ALPHA) 6007ec681f3Smrg blend->need_src_alpha_4bit |= 0xfu << (i * 4); 6017ec681f3Smrg } 6027ec681f3Smrg 6037ec681f3Smrg if (sctx->chip_class >= GFX8 && sctx->chip_class <= GFX10 && logicop_enable) 6047ec681f3Smrg blend->dcc_msaa_corruption_4bit |= blend->cb_target_enabled_4bit; 6057ec681f3Smrg 6067ec681f3Smrg if (blend->cb_target_mask) { 6077ec681f3Smrg color_control |= S_028808_MODE(mode); 6087ec681f3Smrg } else { 6097ec681f3Smrg color_control |= S_028808_MODE(V_028808_CB_DISABLE); 6107ec681f3Smrg } 6117ec681f3Smrg 6127ec681f3Smrg if (sctx->screen->info.rbplus_allowed) { 6137ec681f3Smrg /* Disable RB+ blend optimizations for dual source blending. 6147ec681f3Smrg * Vulkan does this. 6157ec681f3Smrg */ 6167ec681f3Smrg if (blend->dual_src_blend) { 6177ec681f3Smrg for (int i = 0; i < num_shader_outputs; i++) { 6187ec681f3Smrg sx_mrt_blend_opt[i] = S_028760_COLOR_COMB_FCN(V_028760_OPT_COMB_NONE) | 6197ec681f3Smrg S_028760_ALPHA_COMB_FCN(V_028760_OPT_COMB_NONE); 6207ec681f3Smrg } 6217ec681f3Smrg } 6227ec681f3Smrg 6237ec681f3Smrg for (int i = 0; i < num_shader_outputs; i++) 6247ec681f3Smrg si_pm4_set_reg(pm4, R_028760_SX_MRT0_BLEND_OPT + i * 4, sx_mrt_blend_opt[i]); 6257ec681f3Smrg 6267ec681f3Smrg /* RB+ doesn't work with dual source blending, logic op, and RESOLVE. */ 6277ec681f3Smrg if (blend->dual_src_blend || logicop_enable || mode == V_028808_CB_RESOLVE) 6287ec681f3Smrg color_control |= S_028808_DISABLE_DUAL_QUAD(1); 6297ec681f3Smrg } 6307ec681f3Smrg 6317ec681f3Smrg si_pm4_set_reg(pm4, R_028808_CB_COLOR_CONTROL, color_control); 6327ec681f3Smrg return blend; 63301e04c3fSmrg} 63401e04c3fSmrg 6357ec681f3Smrgstatic void *si_create_blend_state(struct pipe_context *ctx, const struct pipe_blend_state *state) 636af69d88dSmrg{ 6377ec681f3Smrg return si_create_blend_state_mode(ctx, state, V_028808_CB_NORMAL); 638af69d88dSmrg} 639af69d88dSmrg 6407ec681f3Smrgstatic bool si_check_blend_dst_sampler_noop(struct si_context *sctx) 641af69d88dSmrg{ 6427ec681f3Smrg if (sctx->framebuffer.state.nr_cbufs == 1) { 6437ec681f3Smrg struct si_shader_selector *sel = sctx->shader.ps.cso; 6447ec681f3Smrg bool free_nir; 6457ec681f3Smrg if (unlikely(sel->info.writes_1_if_tex_is_1 == 0xff)) { 6467ec681f3Smrg struct nir_shader *nir = si_get_nir_shader(sel, NULL, &free_nir); 6477ec681f3Smrg 6487ec681f3Smrg /* Determine if this fragment shader always writes vec4(1) if a specific texture 6497ec681f3Smrg * is all 1s. 6507ec681f3Smrg */ 6517ec681f3Smrg float in[4] = { 1.0, 1.0, 1.0, 1.0 }; 6527ec681f3Smrg float out[4]; 6537ec681f3Smrg int texunit; 6547ec681f3Smrg if (si_nir_is_output_const_if_tex_is_const(nir, in, out, &texunit) && 6557ec681f3Smrg !memcmp(in, out, 4 * sizeof(float))) { 6567ec681f3Smrg sel->info.writes_1_if_tex_is_1 = 1 + texunit; 6577ec681f3Smrg } else { 6587ec681f3Smrg sel->info.writes_1_if_tex_is_1 = 0; 6597ec681f3Smrg } 6607ec681f3Smrg 6617ec681f3Smrg if (free_nir) 6627ec681f3Smrg ralloc_free(nir); 6637ec681f3Smrg } 6647ec681f3Smrg 6657ec681f3Smrg if (sel->info.writes_1_if_tex_is_1 && 6667ec681f3Smrg sel->info.writes_1_if_tex_is_1 != 0xff) { 6677ec681f3Smrg /* Now check if the texture is cleared to 1 */ 6687ec681f3Smrg int unit = sctx->shader.ps.cso->info.writes_1_if_tex_is_1 - 1; 6697ec681f3Smrg struct si_samplers *samp = &sctx->samplers[PIPE_SHADER_FRAGMENT]; 6707ec681f3Smrg if ((1u << unit) & samp->enabled_mask) { 6717ec681f3Smrg struct si_texture* tex = (struct si_texture*) samp->views[unit]->texture; 6727ec681f3Smrg if (tex->is_depth && 6737ec681f3Smrg tex->depth_cleared_level_mask & BITFIELD_BIT(samp->views[unit]->u.tex.first_level) && 6747ec681f3Smrg tex->depth_clear_value[0] == 1) { 6757ec681f3Smrg return false; 6767ec681f3Smrg } 6777ec681f3Smrg /* TODO: handle color textures */ 6787ec681f3Smrg } 6797ec681f3Smrg } 6807ec681f3Smrg } 6817ec681f3Smrg 6827ec681f3Smrg return true; 6837ec681f3Smrg} 6847ec681f3Smrg 6857ec681f3Smrgstatic void si_draw_blend_dst_sampler_noop(struct pipe_context *ctx, 6867ec681f3Smrg const struct pipe_draw_info *info, 6877ec681f3Smrg unsigned drawid_offset, 6887ec681f3Smrg const struct pipe_draw_indirect_info *indirect, 6897ec681f3Smrg const struct pipe_draw_start_count_bias *draws, 6907ec681f3Smrg unsigned num_draws) { 6917ec681f3Smrg struct si_context *sctx = (struct si_context *)ctx; 6927ec681f3Smrg 6937ec681f3Smrg if (!si_check_blend_dst_sampler_noop(sctx)) 6947ec681f3Smrg return; 6957ec681f3Smrg 6967ec681f3Smrg sctx->real_draw_vbo(ctx, info, drawid_offset, indirect, draws, num_draws); 6977ec681f3Smrg} 6987ec681f3Smrg 6997ec681f3Smrgstatic void si_draw_vstate_blend_dst_sampler_noop(struct pipe_context *ctx, 7007ec681f3Smrg struct pipe_vertex_state *state, 7017ec681f3Smrg uint32_t partial_velem_mask, 7027ec681f3Smrg struct pipe_draw_vertex_state_info info, 7037ec681f3Smrg const struct pipe_draw_start_count_bias *draws, 7047ec681f3Smrg unsigned num_draws) { 7057ec681f3Smrg struct si_context *sctx = (struct si_context *)ctx; 7067ec681f3Smrg 7077ec681f3Smrg if (!si_check_blend_dst_sampler_noop(sctx)) 7087ec681f3Smrg return; 7097ec681f3Smrg 7107ec681f3Smrg sctx->real_draw_vertex_state(ctx, state, partial_velem_mask, info, draws, num_draws); 711af69d88dSmrg} 712af69d88dSmrg 713af69d88dSmrgstatic void si_bind_blend_state(struct pipe_context *ctx, void *state) 714af69d88dSmrg{ 7157ec681f3Smrg struct si_context *sctx = (struct si_context *)ctx; 7167ec681f3Smrg struct si_state_blend *old_blend = sctx->queued.named.blend; 7177ec681f3Smrg struct si_state_blend *blend = (struct si_state_blend *)state; 7187ec681f3Smrg 7197ec681f3Smrg if (!blend) 7207ec681f3Smrg blend = (struct si_state_blend *)sctx->noop_blend; 7217ec681f3Smrg 7227ec681f3Smrg si_pm4_bind_state(sctx, blend, blend); 7237ec681f3Smrg 7247ec681f3Smrg if (old_blend->cb_target_mask != blend->cb_target_mask || 7257ec681f3Smrg old_blend->dual_src_blend != blend->dual_src_blend || 7267ec681f3Smrg (old_blend->dcc_msaa_corruption_4bit != blend->dcc_msaa_corruption_4bit && 7277ec681f3Smrg sctx->framebuffer.has_dcc_msaa)) 7287ec681f3Smrg si_mark_atom_dirty(sctx, &sctx->atoms.s.cb_render_state); 7297ec681f3Smrg 7307ec681f3Smrg if (old_blend->cb_target_mask != blend->cb_target_mask || 7317ec681f3Smrg old_blend->alpha_to_coverage != blend->alpha_to_coverage || 7327ec681f3Smrg old_blend->alpha_to_one != blend->alpha_to_one || 7337ec681f3Smrg old_blend->dual_src_blend != blend->dual_src_blend || 7347ec681f3Smrg old_blend->blend_enable_4bit != blend->blend_enable_4bit || 7357ec681f3Smrg old_blend->need_src_alpha_4bit != blend->need_src_alpha_4bit) { 7367ec681f3Smrg si_ps_key_update_framebuffer_blend(sctx); 7377ec681f3Smrg si_ps_key_update_blend_rasterizer(sctx); 7387ec681f3Smrg si_update_ps_inputs_read_or_disabled(sctx); 7397ec681f3Smrg sctx->do_update_shaders = true; 7407ec681f3Smrg } 7417ec681f3Smrg 7427ec681f3Smrg if (sctx->screen->dpbb_allowed && 7437ec681f3Smrg (old_blend->alpha_to_coverage != blend->alpha_to_coverage || 7447ec681f3Smrg old_blend->blend_enable_4bit != blend->blend_enable_4bit || 7457ec681f3Smrg old_blend->cb_target_enabled_4bit != blend->cb_target_enabled_4bit)) 7467ec681f3Smrg si_mark_atom_dirty(sctx, &sctx->atoms.s.dpbb_state); 7477ec681f3Smrg 7487ec681f3Smrg if (sctx->screen->has_out_of_order_rast && 7497ec681f3Smrg ((old_blend->blend_enable_4bit != blend->blend_enable_4bit || 7507ec681f3Smrg old_blend->cb_target_enabled_4bit != blend->cb_target_enabled_4bit || 7517ec681f3Smrg old_blend->commutative_4bit != blend->commutative_4bit || 7527ec681f3Smrg old_blend->logicop_enable != blend->logicop_enable))) 7537ec681f3Smrg si_mark_atom_dirty(sctx, &sctx->atoms.s.msaa_config); 7547ec681f3Smrg 7557ec681f3Smrg if (likely(!radeon_uses_secure_bos(sctx->ws))) { 7567ec681f3Smrg if (unlikely(blend->allows_noop_optimization)) { 7577ec681f3Smrg si_install_draw_wrapper(sctx, si_draw_blend_dst_sampler_noop, 7587ec681f3Smrg si_draw_vstate_blend_dst_sampler_noop); 7597ec681f3Smrg } else { 7607ec681f3Smrg si_install_draw_wrapper(sctx, NULL, NULL); 7617ec681f3Smrg } 7627ec681f3Smrg } 763af69d88dSmrg} 764af69d88dSmrg 765af69d88dSmrgstatic void si_delete_blend_state(struct pipe_context *ctx, void *state) 766af69d88dSmrg{ 7677ec681f3Smrg struct si_context *sctx = (struct si_context *)ctx; 7687ec681f3Smrg 7697ec681f3Smrg if (sctx->queued.named.blend == state) 7707ec681f3Smrg si_bind_blend_state(ctx, sctx->noop_blend); 7717ec681f3Smrg 7727ec681f3Smrg si_pm4_free_state(sctx, (struct si_pm4_state*)state, SI_STATE_IDX(blend)); 773af69d88dSmrg} 774af69d88dSmrg 7757ec681f3Smrgstatic void si_set_blend_color(struct pipe_context *ctx, const struct pipe_blend_color *state) 776af69d88dSmrg{ 7777ec681f3Smrg struct si_context *sctx = (struct si_context *)ctx; 7787ec681f3Smrg static const struct pipe_blend_color zeros; 779af69d88dSmrg 7807ec681f3Smrg sctx->blend_color = *state; 7817ec681f3Smrg sctx->blend_color_any_nonzeros = memcmp(state, &zeros, sizeof(*state)) != 0; 7827ec681f3Smrg si_mark_atom_dirty(sctx, &sctx->atoms.s.blend_color); 78301e04c3fSmrg} 784af69d88dSmrg 78501e04c3fSmrgstatic void si_emit_blend_color(struct si_context *sctx) 78601e04c3fSmrg{ 7877ec681f3Smrg struct radeon_cmdbuf *cs = &sctx->gfx_cs; 788af69d88dSmrg 7897ec681f3Smrg radeon_begin(cs); 7907ec681f3Smrg radeon_set_context_reg_seq(R_028414_CB_BLEND_RED, 4); 7917ec681f3Smrg radeon_emit_array((uint32_t *)sctx->blend_color.color, 4); 7927ec681f3Smrg radeon_end(); 793af69d88dSmrg} 794af69d88dSmrg 795af69d88dSmrg/* 79601e04c3fSmrg * Clipping 797af69d88dSmrg */ 798af69d88dSmrg 7997ec681f3Smrgstatic void si_set_clip_state(struct pipe_context *ctx, const struct pipe_clip_state *state) 800af69d88dSmrg{ 8017ec681f3Smrg struct si_context *sctx = (struct si_context *)ctx; 8027ec681f3Smrg struct pipe_constant_buffer cb; 8037ec681f3Smrg static const struct pipe_clip_state zeros; 8047ec681f3Smrg 8057ec681f3Smrg if (memcmp(&sctx->clip_state, state, sizeof(*state)) == 0) 8067ec681f3Smrg return; 8077ec681f3Smrg 8087ec681f3Smrg sctx->clip_state = *state; 8097ec681f3Smrg sctx->clip_state_any_nonzeros = memcmp(state, &zeros, sizeof(*state)) != 0; 8107ec681f3Smrg si_mark_atom_dirty(sctx, &sctx->atoms.s.clip_state); 8117ec681f3Smrg 8127ec681f3Smrg cb.buffer = NULL; 8137ec681f3Smrg cb.user_buffer = state->ucp; 8147ec681f3Smrg cb.buffer_offset = 0; 8157ec681f3Smrg cb.buffer_size = 4 * 4 * 8; 8167ec681f3Smrg si_set_internal_const_buffer(sctx, SI_VS_CONST_CLIP_PLANES, &cb); 817af69d88dSmrg} 818af69d88dSmrg 81901e04c3fSmrgstatic void si_emit_clip_state(struct si_context *sctx) 820af69d88dSmrg{ 8217ec681f3Smrg struct radeon_cmdbuf *cs = &sctx->gfx_cs; 822af69d88dSmrg 8237ec681f3Smrg radeon_begin(cs); 8247ec681f3Smrg radeon_set_context_reg_seq(R_0285BC_PA_CL_UCP_0_X, 6 * 4); 8257ec681f3Smrg radeon_emit_array((uint32_t *)sctx->clip_state.ucp, 6 * 4); 8267ec681f3Smrg radeon_end(); 827af69d88dSmrg} 828af69d88dSmrg 82901e04c3fSmrgstatic void si_emit_clip_regs(struct si_context *sctx) 830af69d88dSmrg{ 8317ec681f3Smrg struct si_shader *vs = si_get_vs(sctx)->current; 8327ec681f3Smrg struct si_shader_selector *vs_sel = vs->selector; 8337ec681f3Smrg struct si_shader_info *info = &vs_sel->info; 8347ec681f3Smrg struct si_state_rasterizer *rs = sctx->queued.named.rasterizer; 8357ec681f3Smrg bool window_space = info->stage == MESA_SHADER_VERTEX ? 8367ec681f3Smrg info->base.vs.window_space_position : 0; 8377ec681f3Smrg unsigned clipdist_mask = vs_sel->clipdist_mask; 8387ec681f3Smrg unsigned ucp_mask = clipdist_mask ? 0 : rs->clip_plane_enable & SIX_BITS; 8397ec681f3Smrg unsigned culldist_mask = vs_sel->culldist_mask; 8407ec681f3Smrg 8417ec681f3Smrg /* Clip distances on points have no effect, so need to be implemented 8427ec681f3Smrg * as cull distances. This applies for the clipvertex case as well. 8437ec681f3Smrg * 8447ec681f3Smrg * Setting this for primitives other than points should have no adverse 8457ec681f3Smrg * effects. 8467ec681f3Smrg */ 8477ec681f3Smrg clipdist_mask &= rs->clip_plane_enable; 8487ec681f3Smrg culldist_mask |= clipdist_mask; 8497ec681f3Smrg 8507ec681f3Smrg unsigned pa_cl_cntl = S_02881C_BYPASS_VTX_RATE_COMBINER(sctx->chip_class >= GFX10_3 && 8517ec681f3Smrg !sctx->screen->options.vrs2x2) | 8527ec681f3Smrg S_02881C_BYPASS_PRIM_RATE_COMBINER(sctx->chip_class >= GFX10_3) | 8537ec681f3Smrg clipdist_mask | (culldist_mask << 8); 8547ec681f3Smrg 8557ec681f3Smrg radeon_begin(&sctx->gfx_cs); 8567ec681f3Smrg radeon_opt_set_context_reg(sctx, R_02881C_PA_CL_VS_OUT_CNTL, SI_TRACKED_PA_CL_VS_OUT_CNTL, 8577ec681f3Smrg pa_cl_cntl | vs->pa_cl_vs_out_cntl); 8587ec681f3Smrg radeon_opt_set_context_reg(sctx, R_028810_PA_CL_CLIP_CNTL, SI_TRACKED_PA_CL_CLIP_CNTL, 8597ec681f3Smrg rs->pa_cl_clip_cntl | ucp_mask | S_028810_CLIP_DISABLE(window_space)); 8607ec681f3Smrg radeon_end_update_context_roll(sctx); 861af69d88dSmrg} 862af69d88dSmrg 863af69d88dSmrg/* 864af69d88dSmrg * inferred state between framebuffer and rasterizer 865af69d88dSmrg */ 86601e04c3fSmrgstatic void si_update_poly_offset_state(struct si_context *sctx) 867af69d88dSmrg{ 8687ec681f3Smrg struct si_state_rasterizer *rs = sctx->queued.named.rasterizer; 8697ec681f3Smrg 8707ec681f3Smrg if (!rs->uses_poly_offset || !sctx->framebuffer.state.zsbuf) { 8717ec681f3Smrg si_pm4_bind_state(sctx, poly_offset, NULL); 8727ec681f3Smrg return; 8737ec681f3Smrg } 8747ec681f3Smrg 8757ec681f3Smrg /* Use the user format, not db_render_format, so that the polygon 8767ec681f3Smrg * offset behaves as expected by applications. 8777ec681f3Smrg */ 8787ec681f3Smrg switch (sctx->framebuffer.state.zsbuf->texture->format) { 8797ec681f3Smrg case PIPE_FORMAT_Z16_UNORM: 8807ec681f3Smrg si_pm4_bind_state(sctx, poly_offset, &rs->pm4_poly_offset[0]); 8817ec681f3Smrg break; 8827ec681f3Smrg default: /* 24-bit */ 8837ec681f3Smrg si_pm4_bind_state(sctx, poly_offset, &rs->pm4_poly_offset[1]); 8847ec681f3Smrg break; 8857ec681f3Smrg case PIPE_FORMAT_Z32_FLOAT: 8867ec681f3Smrg case PIPE_FORMAT_Z32_FLOAT_S8X24_UINT: 8877ec681f3Smrg si_pm4_bind_state(sctx, poly_offset, &rs->pm4_poly_offset[2]); 8887ec681f3Smrg break; 8897ec681f3Smrg } 890af69d88dSmrg} 891af69d88dSmrg 892af69d88dSmrg/* 893af69d88dSmrg * Rasterizer 894af69d88dSmrg */ 895af69d88dSmrg 896af69d88dSmrgstatic uint32_t si_translate_fill(uint32_t func) 897af69d88dSmrg{ 8987ec681f3Smrg switch (func) { 8997ec681f3Smrg case PIPE_POLYGON_MODE_FILL: 9007ec681f3Smrg return V_028814_X_DRAW_TRIANGLES; 9017ec681f3Smrg case PIPE_POLYGON_MODE_LINE: 9027ec681f3Smrg return V_028814_X_DRAW_LINES; 9037ec681f3Smrg case PIPE_POLYGON_MODE_POINT: 9047ec681f3Smrg return V_028814_X_DRAW_POINTS; 9057ec681f3Smrg default: 9067ec681f3Smrg assert(0); 9077ec681f3Smrg return V_028814_X_DRAW_POINTS; 9087ec681f3Smrg } 909af69d88dSmrg} 910af69d88dSmrg 9117ec681f3Smrgstatic void *si_create_rs_state(struct pipe_context *ctx, const struct pipe_rasterizer_state *state) 912af69d88dSmrg{ 9137ec681f3Smrg struct si_screen *sscreen = ((struct si_context *)ctx)->screen; 9147ec681f3Smrg struct si_state_rasterizer *rs = CALLOC_STRUCT(si_state_rasterizer); 9157ec681f3Smrg struct si_pm4_state *pm4 = &rs->pm4; 9167ec681f3Smrg unsigned tmp, i; 9177ec681f3Smrg float psize_min, psize_max; 9187ec681f3Smrg 9197ec681f3Smrg if (!rs) { 9207ec681f3Smrg return NULL; 9217ec681f3Smrg } 9227ec681f3Smrg 9237ec681f3Smrg rs->scissor_enable = state->scissor; 9247ec681f3Smrg rs->clip_halfz = state->clip_halfz; 9257ec681f3Smrg rs->two_side = state->light_twoside; 9267ec681f3Smrg rs->multisample_enable = state->multisample; 9277ec681f3Smrg rs->force_persample_interp = state->force_persample_interp; 9287ec681f3Smrg rs->clip_plane_enable = state->clip_plane_enable; 9297ec681f3Smrg rs->half_pixel_center = state->half_pixel_center; 9307ec681f3Smrg rs->line_stipple_enable = state->line_stipple_enable; 9317ec681f3Smrg rs->poly_stipple_enable = state->poly_stipple_enable; 9327ec681f3Smrg rs->line_smooth = state->line_smooth; 9337ec681f3Smrg rs->line_width = state->line_width; 9347ec681f3Smrg rs->poly_smooth = state->poly_smooth; 9357ec681f3Smrg rs->uses_poly_offset = state->offset_point || state->offset_line || state->offset_tri; 9367ec681f3Smrg rs->clamp_fragment_color = state->clamp_fragment_color; 9377ec681f3Smrg rs->clamp_vertex_color = state->clamp_vertex_color; 9387ec681f3Smrg rs->flatshade = state->flatshade; 9397ec681f3Smrg rs->flatshade_first = state->flatshade_first; 9407ec681f3Smrg rs->sprite_coord_enable = state->sprite_coord_enable; 9417ec681f3Smrg rs->rasterizer_discard = state->rasterizer_discard; 9427ec681f3Smrg rs->polygon_mode_is_lines = 9437ec681f3Smrg (state->fill_front == PIPE_POLYGON_MODE_LINE && !(state->cull_face & PIPE_FACE_FRONT)) || 9447ec681f3Smrg (state->fill_back == PIPE_POLYGON_MODE_LINE && !(state->cull_face & PIPE_FACE_BACK)); 9457ec681f3Smrg rs->polygon_mode_is_points = 9467ec681f3Smrg (state->fill_front == PIPE_POLYGON_MODE_POINT && !(state->cull_face & PIPE_FACE_FRONT)) || 9477ec681f3Smrg (state->fill_back == PIPE_POLYGON_MODE_POINT && !(state->cull_face & PIPE_FACE_BACK)); 9487ec681f3Smrg rs->pa_sc_line_stipple = state->line_stipple_enable 9497ec681f3Smrg ? S_028A0C_LINE_PATTERN(state->line_stipple_pattern) | 9507ec681f3Smrg S_028A0C_REPEAT_COUNT(state->line_stipple_factor) 9517ec681f3Smrg : 0; 9527ec681f3Smrg rs->pa_cl_clip_cntl = S_028810_DX_CLIP_SPACE_DEF(state->clip_halfz) | 9537ec681f3Smrg S_028810_ZCLIP_NEAR_DISABLE(!state->depth_clip_near) | 9547ec681f3Smrg S_028810_ZCLIP_FAR_DISABLE(!state->depth_clip_far) | 9557ec681f3Smrg S_028810_DX_RASTERIZATION_KILL(state->rasterizer_discard) | 9567ec681f3Smrg S_028810_DX_LINEAR_ATTR_CLIP_ENA(1); 9577ec681f3Smrg 9587ec681f3Smrg if (rs->rasterizer_discard) { 9597ec681f3Smrg rs->ngg_cull_flags = SI_NGG_CULL_ENABLED | 9607ec681f3Smrg SI_NGG_CULL_FRONT_FACE | 9617ec681f3Smrg SI_NGG_CULL_BACK_FACE; 9627ec681f3Smrg rs->ngg_cull_flags_y_inverted = rs->ngg_cull_flags; 9637ec681f3Smrg } else { 9647ec681f3Smrg rs->ngg_cull_flags = SI_NGG_CULL_ENABLED; 9657ec681f3Smrg rs->ngg_cull_flags_y_inverted = rs->ngg_cull_flags; 9667ec681f3Smrg 9677ec681f3Smrg bool cull_front, cull_back; 9687ec681f3Smrg 9697ec681f3Smrg if (!state->front_ccw) { 9707ec681f3Smrg cull_front = !!(state->cull_face & PIPE_FACE_FRONT); 9717ec681f3Smrg cull_back = !!(state->cull_face & PIPE_FACE_BACK); 9727ec681f3Smrg } else { 9737ec681f3Smrg cull_back = !!(state->cull_face & PIPE_FACE_FRONT); 9747ec681f3Smrg cull_front = !!(state->cull_face & PIPE_FACE_BACK); 9757ec681f3Smrg } 9767ec681f3Smrg 9777ec681f3Smrg if (cull_front) { 9787ec681f3Smrg rs->ngg_cull_flags |= SI_NGG_CULL_FRONT_FACE; 9797ec681f3Smrg rs->ngg_cull_flags_y_inverted |= SI_NGG_CULL_BACK_FACE; 9807ec681f3Smrg } 9817ec681f3Smrg 9827ec681f3Smrg if (cull_back) { 9837ec681f3Smrg rs->ngg_cull_flags |= SI_NGG_CULL_BACK_FACE; 9847ec681f3Smrg rs->ngg_cull_flags_y_inverted |= SI_NGG_CULL_FRONT_FACE; 9857ec681f3Smrg } 9867ec681f3Smrg } 9877ec681f3Smrg 9887ec681f3Smrg si_pm4_set_reg( 9897ec681f3Smrg pm4, R_0286D4_SPI_INTERP_CONTROL_0, 9907ec681f3Smrg S_0286D4_FLAT_SHADE_ENA(1) | S_0286D4_PNT_SPRITE_ENA(state->point_quad_rasterization) | 9917ec681f3Smrg S_0286D4_PNT_SPRITE_OVRD_X(V_0286D4_SPI_PNT_SPRITE_SEL_S) | 9927ec681f3Smrg S_0286D4_PNT_SPRITE_OVRD_Y(V_0286D4_SPI_PNT_SPRITE_SEL_T) | 9937ec681f3Smrg S_0286D4_PNT_SPRITE_OVRD_Z(V_0286D4_SPI_PNT_SPRITE_SEL_0) | 9947ec681f3Smrg S_0286D4_PNT_SPRITE_OVRD_W(V_0286D4_SPI_PNT_SPRITE_SEL_1) | 9957ec681f3Smrg S_0286D4_PNT_SPRITE_TOP_1(state->sprite_coord_mode != PIPE_SPRITE_COORD_UPPER_LEFT)); 9967ec681f3Smrg 9977ec681f3Smrg /* point size 12.4 fixed point */ 9987ec681f3Smrg tmp = (unsigned)(state->point_size * 8.0); 9997ec681f3Smrg si_pm4_set_reg(pm4, R_028A00_PA_SU_POINT_SIZE, S_028A00_HEIGHT(tmp) | S_028A00_WIDTH(tmp)); 10007ec681f3Smrg 10017ec681f3Smrg if (state->point_size_per_vertex) { 10027ec681f3Smrg psize_min = util_get_min_point_size(state); 10037ec681f3Smrg psize_max = SI_MAX_POINT_SIZE; 10047ec681f3Smrg } else { 10057ec681f3Smrg /* Force the point size to be as if the vertex output was disabled. */ 10067ec681f3Smrg psize_min = state->point_size; 10077ec681f3Smrg psize_max = state->point_size; 10087ec681f3Smrg } 10097ec681f3Smrg rs->max_point_size = psize_max; 10107ec681f3Smrg 10117ec681f3Smrg /* Divide by two, because 0.5 = 1 pixel. */ 10127ec681f3Smrg si_pm4_set_reg(pm4, R_028A04_PA_SU_POINT_MINMAX, 10137ec681f3Smrg S_028A04_MIN_SIZE(si_pack_float_12p4(psize_min / 2)) | 10147ec681f3Smrg S_028A04_MAX_SIZE(si_pack_float_12p4(psize_max / 2))); 10157ec681f3Smrg 10167ec681f3Smrg si_pm4_set_reg(pm4, R_028A08_PA_SU_LINE_CNTL, 10177ec681f3Smrg S_028A08_WIDTH(si_pack_float_12p4(state->line_width / 2))); 10187ec681f3Smrg si_pm4_set_reg( 10197ec681f3Smrg pm4, R_028A48_PA_SC_MODE_CNTL_0, 10207ec681f3Smrg S_028A48_LINE_STIPPLE_ENABLE(state->line_stipple_enable) | 10217ec681f3Smrg S_028A48_MSAA_ENABLE(state->multisample || state->poly_smooth || state->line_smooth) | 10227ec681f3Smrg S_028A48_VPORT_SCISSOR_ENABLE(1) | 10237ec681f3Smrg S_028A48_ALTERNATE_RBS_PER_TILE(sscreen->info.chip_class >= GFX9)); 10247ec681f3Smrg 10257ec681f3Smrg bool polygon_mode_enabled = 10267ec681f3Smrg (state->fill_front != PIPE_POLYGON_MODE_FILL && !(state->cull_face & PIPE_FACE_FRONT)) || 10277ec681f3Smrg (state->fill_back != PIPE_POLYGON_MODE_FILL && !(state->cull_face & PIPE_FACE_BACK)); 10287ec681f3Smrg 10297ec681f3Smrg si_pm4_set_reg(pm4, R_028814_PA_SU_SC_MODE_CNTL, 10307ec681f3Smrg S_028814_PROVOKING_VTX_LAST(!state->flatshade_first) | 10317ec681f3Smrg S_028814_CULL_FRONT((state->cull_face & PIPE_FACE_FRONT) ? 1 : 0) | 10327ec681f3Smrg S_028814_CULL_BACK((state->cull_face & PIPE_FACE_BACK) ? 1 : 0) | 10337ec681f3Smrg S_028814_FACE(!state->front_ccw) | 10347ec681f3Smrg S_028814_POLY_OFFSET_FRONT_ENABLE(util_get_offset(state, state->fill_front)) | 10357ec681f3Smrg S_028814_POLY_OFFSET_BACK_ENABLE(util_get_offset(state, state->fill_back)) | 10367ec681f3Smrg S_028814_POLY_OFFSET_PARA_ENABLE(state->offset_point || state->offset_line) | 10377ec681f3Smrg S_028814_POLY_MODE(polygon_mode_enabled) | 10387ec681f3Smrg S_028814_POLYMODE_FRONT_PTYPE(si_translate_fill(state->fill_front)) | 10397ec681f3Smrg S_028814_POLYMODE_BACK_PTYPE(si_translate_fill(state->fill_back)) | 10407ec681f3Smrg /* this must be set if POLY_MODE or PERPENDICULAR_ENDCAP_ENA is set */ 10417ec681f3Smrg S_028814_KEEP_TOGETHER_ENABLE(sscreen->info.chip_class >= GFX10 ? polygon_mode_enabled : 0)); 10427ec681f3Smrg 10437ec681f3Smrg if (!rs->uses_poly_offset) 10447ec681f3Smrg return rs; 10457ec681f3Smrg 10467ec681f3Smrg rs->pm4_poly_offset = CALLOC(3, sizeof(struct si_pm4_state)); 10477ec681f3Smrg if (!rs->pm4_poly_offset) { 10487ec681f3Smrg FREE(rs); 10497ec681f3Smrg return NULL; 10507ec681f3Smrg } 10517ec681f3Smrg 10527ec681f3Smrg /* Precalculate polygon offset states for 16-bit, 24-bit, and 32-bit zbuffers. */ 10537ec681f3Smrg for (i = 0; i < 3; i++) { 10547ec681f3Smrg struct si_pm4_state *pm4 = &rs->pm4_poly_offset[i]; 10557ec681f3Smrg float offset_units = state->offset_units; 10567ec681f3Smrg float offset_scale = state->offset_scale * 16.0f; 10577ec681f3Smrg uint32_t pa_su_poly_offset_db_fmt_cntl = 0; 10587ec681f3Smrg 10597ec681f3Smrg if (!state->offset_units_unscaled) { 10607ec681f3Smrg switch (i) { 10617ec681f3Smrg case 0: /* 16-bit zbuffer */ 10627ec681f3Smrg offset_units *= 4.0f; 10637ec681f3Smrg pa_su_poly_offset_db_fmt_cntl = S_028B78_POLY_OFFSET_NEG_NUM_DB_BITS(-16); 10647ec681f3Smrg break; 10657ec681f3Smrg case 1: /* 24-bit zbuffer */ 10667ec681f3Smrg offset_units *= 2.0f; 10677ec681f3Smrg pa_su_poly_offset_db_fmt_cntl = S_028B78_POLY_OFFSET_NEG_NUM_DB_BITS(-24); 10687ec681f3Smrg break; 10697ec681f3Smrg case 2: /* 32-bit zbuffer */ 10707ec681f3Smrg offset_units *= 1.0f; 10717ec681f3Smrg pa_su_poly_offset_db_fmt_cntl = 10727ec681f3Smrg S_028B78_POLY_OFFSET_NEG_NUM_DB_BITS(-23) | S_028B78_POLY_OFFSET_DB_IS_FLOAT_FMT(1); 10737ec681f3Smrg break; 10747ec681f3Smrg } 10757ec681f3Smrg } 10767ec681f3Smrg 10777ec681f3Smrg si_pm4_set_reg(pm4, R_028B78_PA_SU_POLY_OFFSET_DB_FMT_CNTL, pa_su_poly_offset_db_fmt_cntl); 10787ec681f3Smrg si_pm4_set_reg(pm4, R_028B7C_PA_SU_POLY_OFFSET_CLAMP, fui(state->offset_clamp)); 10797ec681f3Smrg si_pm4_set_reg(pm4, R_028B80_PA_SU_POLY_OFFSET_FRONT_SCALE, fui(offset_scale)); 10807ec681f3Smrg si_pm4_set_reg(pm4, R_028B84_PA_SU_POLY_OFFSET_FRONT_OFFSET, fui(offset_units)); 10817ec681f3Smrg si_pm4_set_reg(pm4, R_028B88_PA_SU_POLY_OFFSET_BACK_SCALE, fui(offset_scale)); 10827ec681f3Smrg si_pm4_set_reg(pm4, R_028B8C_PA_SU_POLY_OFFSET_BACK_OFFSET, fui(offset_units)); 10837ec681f3Smrg } 10847ec681f3Smrg 10857ec681f3Smrg return rs; 1086af69d88dSmrg} 1087af69d88dSmrg 1088af69d88dSmrgstatic void si_bind_rs_state(struct pipe_context *ctx, void *state) 1089af69d88dSmrg{ 10907ec681f3Smrg struct si_context *sctx = (struct si_context *)ctx; 10917ec681f3Smrg struct si_state_rasterizer *old_rs = (struct si_state_rasterizer *)sctx->queued.named.rasterizer; 10927ec681f3Smrg struct si_state_rasterizer *rs = (struct si_state_rasterizer *)state; 10937ec681f3Smrg 10947ec681f3Smrg if (!rs) 10957ec681f3Smrg rs = (struct si_state_rasterizer *)sctx->discard_rasterizer_state; 10967ec681f3Smrg 10977ec681f3Smrg if (old_rs->multisample_enable != rs->multisample_enable) { 10987ec681f3Smrg si_mark_atom_dirty(sctx, &sctx->atoms.s.db_render_state); 10997ec681f3Smrg 11007ec681f3Smrg si_mark_atom_dirty(sctx, &sctx->atoms.s.msaa_config); 11017ec681f3Smrg 11027ec681f3Smrg /* Update the small primitive filter workaround if necessary. */ 11037ec681f3Smrg if (sctx->screen->info.has_msaa_sample_loc_bug && sctx->framebuffer.nr_samples > 1) 11047ec681f3Smrg si_mark_atom_dirty(sctx, &sctx->atoms.s.msaa_sample_locs); 11057ec681f3Smrg 11067ec681f3Smrg /* NGG cull state uses multisample_enable. */ 11077ec681f3Smrg if (sctx->screen->use_ngg_culling) 11087ec681f3Smrg si_mark_atom_dirty(sctx, &sctx->atoms.s.ngg_cull_state); 11097ec681f3Smrg } 11107ec681f3Smrg 11117ec681f3Smrg sctx->current_vs_state &= C_VS_STATE_CLAMP_VERTEX_COLOR; 11127ec681f3Smrg sctx->current_vs_state |= S_VS_STATE_CLAMP_VERTEX_COLOR(rs->clamp_vertex_color); 11137ec681f3Smrg 11147ec681f3Smrg si_pm4_bind_state(sctx, rasterizer, rs); 11157ec681f3Smrg si_update_poly_offset_state(sctx); 11167ec681f3Smrg 11177ec681f3Smrg if (old_rs->scissor_enable != rs->scissor_enable) 11187ec681f3Smrg si_mark_atom_dirty(sctx, &sctx->atoms.s.scissors); 11197ec681f3Smrg 11207ec681f3Smrg if (old_rs->line_width != rs->line_width || old_rs->max_point_size != rs->max_point_size || 11217ec681f3Smrg old_rs->half_pixel_center != rs->half_pixel_center) 11227ec681f3Smrg si_mark_atom_dirty(sctx, &sctx->atoms.s.guardband); 11237ec681f3Smrg 11247ec681f3Smrg if (old_rs->clip_halfz != rs->clip_halfz) 11257ec681f3Smrg si_mark_atom_dirty(sctx, &sctx->atoms.s.viewports); 11267ec681f3Smrg 11277ec681f3Smrg if (old_rs->clip_plane_enable != rs->clip_plane_enable || 11287ec681f3Smrg old_rs->pa_cl_clip_cntl != rs->pa_cl_clip_cntl) 11297ec681f3Smrg si_mark_atom_dirty(sctx, &sctx->atoms.s.clip_regs); 11307ec681f3Smrg 11317ec681f3Smrg if (old_rs->sprite_coord_enable != rs->sprite_coord_enable || 11327ec681f3Smrg old_rs->flatshade != rs->flatshade) 11337ec681f3Smrg si_mark_atom_dirty(sctx, &sctx->atoms.s.spi_map); 11347ec681f3Smrg 11357ec681f3Smrg if (old_rs->clip_plane_enable != rs->clip_plane_enable || 11367ec681f3Smrg old_rs->rasterizer_discard != rs->rasterizer_discard || 11377ec681f3Smrg old_rs->sprite_coord_enable != rs->sprite_coord_enable || 11387ec681f3Smrg old_rs->flatshade != rs->flatshade || old_rs->two_side != rs->two_side || 11397ec681f3Smrg old_rs->multisample_enable != rs->multisample_enable || 11407ec681f3Smrg old_rs->poly_stipple_enable != rs->poly_stipple_enable || 11417ec681f3Smrg old_rs->poly_smooth != rs->poly_smooth || old_rs->line_smooth != rs->line_smooth || 11427ec681f3Smrg old_rs->clamp_fragment_color != rs->clamp_fragment_color || 11437ec681f3Smrg old_rs->force_persample_interp != rs->force_persample_interp || 11447ec681f3Smrg old_rs->polygon_mode_is_points != rs->polygon_mode_is_points) { 11457ec681f3Smrg si_ps_key_update_blend_rasterizer(sctx); 11467ec681f3Smrg si_ps_key_update_rasterizer(sctx); 11477ec681f3Smrg si_ps_key_update_framebuffer_rasterizer_sample_shading(sctx); 11487ec681f3Smrg si_update_ps_inputs_read_or_disabled(sctx); 11497ec681f3Smrg sctx->do_update_shaders = true; 11507ec681f3Smrg } 11517ec681f3Smrg 11527ec681f3Smrg if (old_rs->line_smooth != rs->line_smooth || 11537ec681f3Smrg old_rs->poly_smooth != rs->poly_smooth || 11547ec681f3Smrg old_rs->poly_stipple_enable != rs->poly_stipple_enable || 11557ec681f3Smrg old_rs->flatshade != rs->flatshade) 11567ec681f3Smrg si_update_vrs_flat_shading(sctx); 1157af69d88dSmrg} 1158af69d88dSmrg 1159af69d88dSmrgstatic void si_delete_rs_state(struct pipe_context *ctx, void *state) 1160af69d88dSmrg{ 11617ec681f3Smrg struct si_context *sctx = (struct si_context *)ctx; 11627ec681f3Smrg struct si_state_rasterizer *rs = (struct si_state_rasterizer *)state; 116301e04c3fSmrg 11647ec681f3Smrg if (sctx->queued.named.rasterizer == state) 11657ec681f3Smrg si_bind_rs_state(ctx, sctx->discard_rasterizer_state); 116601e04c3fSmrg 11677ec681f3Smrg FREE(rs->pm4_poly_offset); 11687ec681f3Smrg si_pm4_free_state(sctx, &rs->pm4, SI_STATE_IDX(rasterizer)); 1169af69d88dSmrg} 1170af69d88dSmrg 1171af69d88dSmrg/* 11727ec681f3Smrg * inferred state between dsa and stencil ref 1173af69d88dSmrg */ 117401e04c3fSmrgstatic void si_emit_stencil_ref(struct si_context *sctx) 1175af69d88dSmrg{ 11767ec681f3Smrg struct radeon_cmdbuf *cs = &sctx->gfx_cs; 11777ec681f3Smrg struct pipe_stencil_ref *ref = &sctx->stencil_ref.state; 11787ec681f3Smrg struct si_dsa_stencil_ref_part *dsa = &sctx->stencil_ref.dsa_part; 11797ec681f3Smrg 11807ec681f3Smrg radeon_begin(cs); 11817ec681f3Smrg radeon_set_context_reg_seq(R_028430_DB_STENCILREFMASK, 2); 11827ec681f3Smrg radeon_emit(S_028430_STENCILTESTVAL(ref->ref_value[0]) | 11837ec681f3Smrg S_028430_STENCILMASK(dsa->valuemask[0]) | 11847ec681f3Smrg S_028430_STENCILWRITEMASK(dsa->writemask[0]) | 11857ec681f3Smrg S_028430_STENCILOPVAL(1)); 11867ec681f3Smrg radeon_emit(S_028434_STENCILTESTVAL_BF(ref->ref_value[1]) | 11877ec681f3Smrg S_028434_STENCILMASK_BF(dsa->valuemask[1]) | 11887ec681f3Smrg S_028434_STENCILWRITEMASK_BF(dsa->writemask[1]) | 11897ec681f3Smrg S_028434_STENCILOPVAL_BF(1)); 11907ec681f3Smrg radeon_end(); 1191af69d88dSmrg} 1192af69d88dSmrg 11937ec681f3Smrgstatic void si_set_stencil_ref(struct pipe_context *ctx, const struct pipe_stencil_ref state) 1194af69d88dSmrg{ 11957ec681f3Smrg struct si_context *sctx = (struct si_context *)ctx; 119601e04c3fSmrg 11977ec681f3Smrg if (memcmp(&sctx->stencil_ref.state, &state, sizeof(state)) == 0) 11987ec681f3Smrg return; 119901e04c3fSmrg 12007ec681f3Smrg sctx->stencil_ref.state = state; 12017ec681f3Smrg si_mark_atom_dirty(sctx, &sctx->atoms.s.stencil_ref); 1202af69d88dSmrg} 1203af69d88dSmrg 1204af69d88dSmrg/* 1205af69d88dSmrg * DSA 1206af69d88dSmrg */ 1207af69d88dSmrg 1208af69d88dSmrgstatic uint32_t si_translate_stencil_op(int s_op) 1209af69d88dSmrg{ 12107ec681f3Smrg switch (s_op) { 12117ec681f3Smrg case PIPE_STENCIL_OP_KEEP: 12127ec681f3Smrg return V_02842C_STENCIL_KEEP; 12137ec681f3Smrg case PIPE_STENCIL_OP_ZERO: 12147ec681f3Smrg return V_02842C_STENCIL_ZERO; 12157ec681f3Smrg case PIPE_STENCIL_OP_REPLACE: 12167ec681f3Smrg return V_02842C_STENCIL_REPLACE_TEST; 12177ec681f3Smrg case PIPE_STENCIL_OP_INCR: 12187ec681f3Smrg return V_02842C_STENCIL_ADD_CLAMP; 12197ec681f3Smrg case PIPE_STENCIL_OP_DECR: 12207ec681f3Smrg return V_02842C_STENCIL_SUB_CLAMP; 12217ec681f3Smrg case PIPE_STENCIL_OP_INCR_WRAP: 12227ec681f3Smrg return V_02842C_STENCIL_ADD_WRAP; 12237ec681f3Smrg case PIPE_STENCIL_OP_DECR_WRAP: 12247ec681f3Smrg return V_02842C_STENCIL_SUB_WRAP; 12257ec681f3Smrg case PIPE_STENCIL_OP_INVERT: 12267ec681f3Smrg return V_02842C_STENCIL_INVERT; 12277ec681f3Smrg default: 12287ec681f3Smrg PRINT_ERR("Unknown stencil op %d", s_op); 12297ec681f3Smrg assert(0); 12307ec681f3Smrg break; 12317ec681f3Smrg } 12327ec681f3Smrg return 0; 123301e04c3fSmrg} 123401e04c3fSmrg 123501e04c3fSmrgstatic bool si_order_invariant_stencil_op(enum pipe_stencil_op op) 123601e04c3fSmrg{ 12377ec681f3Smrg /* REPLACE is normally order invariant, except when the stencil 12387ec681f3Smrg * reference value is written by the fragment shader. Tracking this 12397ec681f3Smrg * interaction does not seem worth the effort, so be conservative. */ 12407ec681f3Smrg return op != PIPE_STENCIL_OP_INCR && op != PIPE_STENCIL_OP_DECR && op != PIPE_STENCIL_OP_REPLACE; 124101e04c3fSmrg} 124201e04c3fSmrg 124301e04c3fSmrg/* Compute whether, assuming Z writes are disabled, this stencil state is order 124401e04c3fSmrg * invariant in the sense that the set of passing fragments as well as the 124501e04c3fSmrg * final stencil buffer result does not depend on the order of fragments. */ 124601e04c3fSmrgstatic bool si_order_invariant_stencil_state(const struct pipe_stencil_state *state) 124701e04c3fSmrg{ 12487ec681f3Smrg return !state->enabled || !state->writemask || 12497ec681f3Smrg /* The following assumes that Z writes are disabled. */ 12507ec681f3Smrg (state->func == PIPE_FUNC_ALWAYS && si_order_invariant_stencil_op(state->zpass_op) && 12517ec681f3Smrg si_order_invariant_stencil_op(state->zfail_op)) || 12527ec681f3Smrg (state->func == PIPE_FUNC_NEVER && si_order_invariant_stencil_op(state->fail_op)); 125301e04c3fSmrg} 125401e04c3fSmrg 1255af69d88dSmrgstatic void *si_create_dsa_state(struct pipe_context *ctx, 12567ec681f3Smrg const struct pipe_depth_stencil_alpha_state *state) 1257af69d88dSmrg{ 12587ec681f3Smrg struct si_context *sctx = (struct si_context *)ctx; 12597ec681f3Smrg struct si_state_dsa *dsa = CALLOC_STRUCT(si_state_dsa); 12607ec681f3Smrg struct si_pm4_state *pm4 = &dsa->pm4; 12617ec681f3Smrg unsigned db_depth_control; 12627ec681f3Smrg uint32_t db_stencil_control = 0; 12637ec681f3Smrg 12647ec681f3Smrg if (!dsa) { 12657ec681f3Smrg return NULL; 12667ec681f3Smrg } 12677ec681f3Smrg 12687ec681f3Smrg dsa->stencil_ref.valuemask[0] = state->stencil[0].valuemask; 12697ec681f3Smrg dsa->stencil_ref.valuemask[1] = state->stencil[1].valuemask; 12707ec681f3Smrg dsa->stencil_ref.writemask[0] = state->stencil[0].writemask; 12717ec681f3Smrg dsa->stencil_ref.writemask[1] = state->stencil[1].writemask; 12727ec681f3Smrg 12737ec681f3Smrg db_depth_control = 12747ec681f3Smrg S_028800_Z_ENABLE(state->depth_enabled) | S_028800_Z_WRITE_ENABLE(state->depth_writemask) | 12757ec681f3Smrg S_028800_ZFUNC(state->depth_func) | S_028800_DEPTH_BOUNDS_ENABLE(state->depth_bounds_test); 12767ec681f3Smrg 12777ec681f3Smrg /* stencil */ 12787ec681f3Smrg if (state->stencil[0].enabled) { 12797ec681f3Smrg db_depth_control |= S_028800_STENCIL_ENABLE(1); 12807ec681f3Smrg db_depth_control |= S_028800_STENCILFUNC(state->stencil[0].func); 12817ec681f3Smrg db_stencil_control |= 12827ec681f3Smrg S_02842C_STENCILFAIL(si_translate_stencil_op(state->stencil[0].fail_op)); 12837ec681f3Smrg db_stencil_control |= 12847ec681f3Smrg S_02842C_STENCILZPASS(si_translate_stencil_op(state->stencil[0].zpass_op)); 12857ec681f3Smrg db_stencil_control |= 12867ec681f3Smrg S_02842C_STENCILZFAIL(si_translate_stencil_op(state->stencil[0].zfail_op)); 12877ec681f3Smrg 12887ec681f3Smrg if (state->stencil[1].enabled) { 12897ec681f3Smrg db_depth_control |= S_028800_BACKFACE_ENABLE(1); 12907ec681f3Smrg db_depth_control |= S_028800_STENCILFUNC_BF(state->stencil[1].func); 12917ec681f3Smrg db_stencil_control |= 12927ec681f3Smrg S_02842C_STENCILFAIL_BF(si_translate_stencil_op(state->stencil[1].fail_op)); 12937ec681f3Smrg db_stencil_control |= 12947ec681f3Smrg S_02842C_STENCILZPASS_BF(si_translate_stencil_op(state->stencil[1].zpass_op)); 12957ec681f3Smrg db_stencil_control |= 12967ec681f3Smrg S_02842C_STENCILZFAIL_BF(si_translate_stencil_op(state->stencil[1].zfail_op)); 12977ec681f3Smrg } 12987ec681f3Smrg } 12997ec681f3Smrg 13007ec681f3Smrg /* alpha */ 13017ec681f3Smrg if (state->alpha_enabled) { 13027ec681f3Smrg dsa->alpha_func = state->alpha_func; 13037ec681f3Smrg 13047ec681f3Smrg si_pm4_set_reg(pm4, R_00B030_SPI_SHADER_USER_DATA_PS_0 + SI_SGPR_ALPHA_REF * 4, 13057ec681f3Smrg fui(state->alpha_ref_value)); 13067ec681f3Smrg } else { 13077ec681f3Smrg dsa->alpha_func = PIPE_FUNC_ALWAYS; 13087ec681f3Smrg } 13097ec681f3Smrg 13107ec681f3Smrg si_pm4_set_reg(pm4, R_028800_DB_DEPTH_CONTROL, db_depth_control); 13117ec681f3Smrg if (state->stencil[0].enabled) 13127ec681f3Smrg si_pm4_set_reg(pm4, R_02842C_DB_STENCIL_CONTROL, db_stencil_control); 13137ec681f3Smrg if (state->depth_bounds_test) { 13147ec681f3Smrg si_pm4_set_reg(pm4, R_028020_DB_DEPTH_BOUNDS_MIN, fui(state->depth_bounds_min)); 13157ec681f3Smrg si_pm4_set_reg(pm4, R_028024_DB_DEPTH_BOUNDS_MAX, fui(state->depth_bounds_max)); 13167ec681f3Smrg } 13177ec681f3Smrg 13187ec681f3Smrg dsa->depth_enabled = state->depth_enabled; 13197ec681f3Smrg dsa->depth_write_enabled = state->depth_enabled && state->depth_writemask; 13207ec681f3Smrg dsa->stencil_enabled = state->stencil[0].enabled; 13217ec681f3Smrg dsa->stencil_write_enabled = 13227ec681f3Smrg (util_writes_stencil(&state->stencil[0]) || util_writes_stencil(&state->stencil[1])); 13237ec681f3Smrg dsa->db_can_write = dsa->depth_write_enabled || dsa->stencil_write_enabled; 13247ec681f3Smrg 13257ec681f3Smrg bool zfunc_is_ordered = 13267ec681f3Smrg state->depth_func == PIPE_FUNC_NEVER || state->depth_func == PIPE_FUNC_LESS || 13277ec681f3Smrg state->depth_func == PIPE_FUNC_LEQUAL || state->depth_func == PIPE_FUNC_GREATER || 13287ec681f3Smrg state->depth_func == PIPE_FUNC_GEQUAL; 13297ec681f3Smrg 13307ec681f3Smrg bool nozwrite_and_order_invariant_stencil = 13317ec681f3Smrg !dsa->db_can_write || 13327ec681f3Smrg (!dsa->depth_write_enabled && si_order_invariant_stencil_state(&state->stencil[0]) && 13337ec681f3Smrg si_order_invariant_stencil_state(&state->stencil[1])); 13347ec681f3Smrg 13357ec681f3Smrg dsa->order_invariance[1].zs = 13367ec681f3Smrg nozwrite_and_order_invariant_stencil || (!dsa->stencil_write_enabled && zfunc_is_ordered); 13377ec681f3Smrg dsa->order_invariance[0].zs = !dsa->depth_write_enabled || zfunc_is_ordered; 13387ec681f3Smrg 13397ec681f3Smrg dsa->order_invariance[1].pass_set = 13407ec681f3Smrg nozwrite_and_order_invariant_stencil || 13417ec681f3Smrg (!dsa->stencil_write_enabled && 13427ec681f3Smrg (state->depth_func == PIPE_FUNC_ALWAYS || state->depth_func == PIPE_FUNC_NEVER)); 13437ec681f3Smrg dsa->order_invariance[0].pass_set = 13447ec681f3Smrg !dsa->depth_write_enabled || 13457ec681f3Smrg (state->depth_func == PIPE_FUNC_ALWAYS || state->depth_func == PIPE_FUNC_NEVER); 13467ec681f3Smrg 13477ec681f3Smrg dsa->order_invariance[1].pass_last = sctx->screen->assume_no_z_fights && 13487ec681f3Smrg !dsa->stencil_write_enabled && dsa->depth_write_enabled && 13497ec681f3Smrg zfunc_is_ordered; 13507ec681f3Smrg dsa->order_invariance[0].pass_last = 13517ec681f3Smrg sctx->screen->assume_no_z_fights && dsa->depth_write_enabled && zfunc_is_ordered; 13527ec681f3Smrg 13537ec681f3Smrg return dsa; 1354af69d88dSmrg} 1355af69d88dSmrg 1356af69d88dSmrgstatic void si_bind_dsa_state(struct pipe_context *ctx, void *state) 1357af69d88dSmrg{ 13587ec681f3Smrg struct si_context *sctx = (struct si_context *)ctx; 13597ec681f3Smrg struct si_state_dsa *old_dsa = sctx->queued.named.dsa; 13607ec681f3Smrg struct si_state_dsa *dsa = state; 13617ec681f3Smrg 13627ec681f3Smrg if (!dsa) 13637ec681f3Smrg dsa = (struct si_state_dsa *)sctx->noop_dsa; 13647ec681f3Smrg 13657ec681f3Smrg si_pm4_bind_state(sctx, dsa, dsa); 13667ec681f3Smrg 13677ec681f3Smrg if (memcmp(&dsa->stencil_ref, &sctx->stencil_ref.dsa_part, 13687ec681f3Smrg sizeof(struct si_dsa_stencil_ref_part)) != 0) { 13697ec681f3Smrg sctx->stencil_ref.dsa_part = dsa->stencil_ref; 13707ec681f3Smrg si_mark_atom_dirty(sctx, &sctx->atoms.s.stencil_ref); 13717ec681f3Smrg } 13727ec681f3Smrg 13737ec681f3Smrg if (old_dsa->alpha_func != dsa->alpha_func) { 13747ec681f3Smrg si_ps_key_update_dsa(sctx); 13757ec681f3Smrg si_update_ps_inputs_read_or_disabled(sctx); 13767ec681f3Smrg si_update_ps_kill_enable(sctx); 13777ec681f3Smrg sctx->do_update_shaders = true; 13787ec681f3Smrg } 13797ec681f3Smrg 13807ec681f3Smrg if (sctx->screen->dpbb_allowed && ((old_dsa->depth_enabled != dsa->depth_enabled || 13817ec681f3Smrg old_dsa->stencil_enabled != dsa->stencil_enabled || 13827ec681f3Smrg old_dsa->db_can_write != dsa->db_can_write))) 13837ec681f3Smrg si_mark_atom_dirty(sctx, &sctx->atoms.s.dpbb_state); 13847ec681f3Smrg 13857ec681f3Smrg if (sctx->screen->has_out_of_order_rast && 13867ec681f3Smrg (memcmp(old_dsa->order_invariance, dsa->order_invariance, 13877ec681f3Smrg sizeof(old_dsa->order_invariance)))) 13887ec681f3Smrg si_mark_atom_dirty(sctx, &sctx->atoms.s.msaa_config); 1389af69d88dSmrg} 1390af69d88dSmrg 1391af69d88dSmrgstatic void si_delete_dsa_state(struct pipe_context *ctx, void *state) 1392af69d88dSmrg{ 13937ec681f3Smrg struct si_context *sctx = (struct si_context *)ctx; 13947ec681f3Smrg 13957ec681f3Smrg if (sctx->queued.named.dsa == state) 13967ec681f3Smrg si_bind_dsa_state(ctx, sctx->noop_dsa); 13977ec681f3Smrg 13987ec681f3Smrg si_pm4_free_state(sctx, (struct si_pm4_state*)state, SI_STATE_IDX(dsa)); 1399af69d88dSmrg} 1400af69d88dSmrg 140101e04c3fSmrgstatic void *si_create_db_flush_dsa(struct si_context *sctx) 140201e04c3fSmrg{ 14037ec681f3Smrg struct pipe_depth_stencil_alpha_state dsa = {}; 140401e04c3fSmrg 14057ec681f3Smrg return sctx->b.create_depth_stencil_alpha_state(&sctx->b, &dsa); 140601e04c3fSmrg} 140701e04c3fSmrg 140801e04c3fSmrg/* DB RENDER STATE */ 140901e04c3fSmrg 14107ec681f3Smrgstatic void si_set_active_query_state(struct pipe_context *ctx, bool enable) 141101e04c3fSmrg{ 14127ec681f3Smrg struct si_context *sctx = (struct si_context *)ctx; 14137ec681f3Smrg 14147ec681f3Smrg /* Pipeline stat & streamout queries. */ 14157ec681f3Smrg if (enable) { 14167ec681f3Smrg sctx->flags &= ~SI_CONTEXT_STOP_PIPELINE_STATS; 14177ec681f3Smrg sctx->flags |= SI_CONTEXT_START_PIPELINE_STATS; 14187ec681f3Smrg } else { 14197ec681f3Smrg sctx->flags &= ~SI_CONTEXT_START_PIPELINE_STATS; 14207ec681f3Smrg sctx->flags |= SI_CONTEXT_STOP_PIPELINE_STATS; 14217ec681f3Smrg } 14227ec681f3Smrg 14237ec681f3Smrg /* Occlusion queries. */ 14247ec681f3Smrg if (sctx->occlusion_queries_disabled != !enable) { 14257ec681f3Smrg sctx->occlusion_queries_disabled = !enable; 14267ec681f3Smrg si_mark_atom_dirty(sctx, &sctx->atoms.s.db_render_state); 14277ec681f3Smrg } 142801e04c3fSmrg} 142901e04c3fSmrg 14307ec681f3Smrgvoid si_set_occlusion_query_state(struct si_context *sctx, bool old_perfect_enable) 1431af69d88dSmrg{ 14327ec681f3Smrg si_mark_atom_dirty(sctx, &sctx->atoms.s.db_render_state); 1433af69d88dSmrg 14347ec681f3Smrg bool perfect_enable = sctx->num_perfect_occlusion_queries != 0; 1435af69d88dSmrg 14367ec681f3Smrg if (perfect_enable != old_perfect_enable) 14377ec681f3Smrg si_mark_atom_dirty(sctx, &sctx->atoms.s.msaa_config); 143801e04c3fSmrg} 143901e04c3fSmrg 144001e04c3fSmrgvoid si_save_qbo_state(struct si_context *sctx, struct si_qbo_state *st) 144101e04c3fSmrg{ 14427ec681f3Smrg si_get_pipe_constant_buffer(sctx, PIPE_SHADER_COMPUTE, 0, &st->saved_const0); 14437ec681f3Smrg} 14449f464c52Smaya 14457ec681f3Smrgvoid si_restore_qbo_state(struct si_context *sctx, struct si_qbo_state *st) 14467ec681f3Smrg{ 14477ec681f3Smrg sctx->b.set_constant_buffer(&sctx->b, PIPE_SHADER_COMPUTE, 0, true, &st->saved_const0); 144801e04c3fSmrg} 144901e04c3fSmrg 145001e04c3fSmrgstatic void si_emit_db_render_state(struct si_context *sctx) 145101e04c3fSmrg{ 14527ec681f3Smrg struct si_state_rasterizer *rs = sctx->queued.named.rasterizer; 14537ec681f3Smrg unsigned db_shader_control, db_render_control, db_count_control; 14547ec681f3Smrg 14557ec681f3Smrg /* DB_RENDER_CONTROL */ 14567ec681f3Smrg if (sctx->dbcb_depth_copy_enabled || sctx->dbcb_stencil_copy_enabled) { 14577ec681f3Smrg db_render_control = S_028000_DEPTH_COPY(sctx->dbcb_depth_copy_enabled) | 14587ec681f3Smrg S_028000_STENCIL_COPY(sctx->dbcb_stencil_copy_enabled) | 14597ec681f3Smrg S_028000_COPY_CENTROID(1) | S_028000_COPY_SAMPLE(sctx->dbcb_copy_sample); 14607ec681f3Smrg } else if (sctx->db_flush_depth_inplace || sctx->db_flush_stencil_inplace) { 14617ec681f3Smrg db_render_control = S_028000_DEPTH_COMPRESS_DISABLE(sctx->db_flush_depth_inplace) | 14627ec681f3Smrg S_028000_STENCIL_COMPRESS_DISABLE(sctx->db_flush_stencil_inplace); 14637ec681f3Smrg } else { 14647ec681f3Smrg db_render_control = S_028000_DEPTH_CLEAR_ENABLE(sctx->db_depth_clear) | 14657ec681f3Smrg S_028000_STENCIL_CLEAR_ENABLE(sctx->db_stencil_clear); 14667ec681f3Smrg } 14677ec681f3Smrg 14687ec681f3Smrg /* DB_COUNT_CONTROL (occlusion queries) */ 14697ec681f3Smrg if (sctx->num_occlusion_queries > 0 && !sctx->occlusion_queries_disabled) { 14707ec681f3Smrg bool perfect = sctx->num_perfect_occlusion_queries > 0; 14717ec681f3Smrg bool gfx10_perfect = sctx->chip_class >= GFX10 && perfect; 14727ec681f3Smrg 14737ec681f3Smrg if (sctx->chip_class >= GFX7) { 14747ec681f3Smrg unsigned log_sample_rate = sctx->framebuffer.log_samples; 14757ec681f3Smrg 14767ec681f3Smrg db_count_control = S_028004_PERFECT_ZPASS_COUNTS(perfect) | 14777ec681f3Smrg S_028004_DISABLE_CONSERVATIVE_ZPASS_COUNTS(gfx10_perfect) | 14787ec681f3Smrg S_028004_SAMPLE_RATE(log_sample_rate) | S_028004_ZPASS_ENABLE(1) | 14797ec681f3Smrg S_028004_SLICE_EVEN_ENABLE(1) | S_028004_SLICE_ODD_ENABLE(1); 14807ec681f3Smrg } else { 14817ec681f3Smrg db_count_control = S_028004_PERFECT_ZPASS_COUNTS(perfect) | 14827ec681f3Smrg S_028004_SAMPLE_RATE(sctx->framebuffer.log_samples); 14837ec681f3Smrg } 14847ec681f3Smrg } else { 14857ec681f3Smrg /* Disable occlusion queries. */ 14867ec681f3Smrg if (sctx->chip_class >= GFX7) { 14877ec681f3Smrg db_count_control = 0; 14887ec681f3Smrg } else { 14897ec681f3Smrg db_count_control = S_028004_ZPASS_INCREMENT_DISABLE(1); 14907ec681f3Smrg } 14917ec681f3Smrg } 14927ec681f3Smrg 14937ec681f3Smrg radeon_begin(&sctx->gfx_cs); 14947ec681f3Smrg radeon_opt_set_context_reg2(sctx, R_028000_DB_RENDER_CONTROL, SI_TRACKED_DB_RENDER_CONTROL, 14957ec681f3Smrg db_render_control, db_count_control); 14967ec681f3Smrg 14977ec681f3Smrg /* DB_RENDER_OVERRIDE2 */ 14987ec681f3Smrg radeon_opt_set_context_reg( 14997ec681f3Smrg sctx, R_028010_DB_RENDER_OVERRIDE2, SI_TRACKED_DB_RENDER_OVERRIDE2, 15007ec681f3Smrg S_028010_DISABLE_ZMASK_EXPCLEAR_OPTIMIZATION(sctx->db_depth_disable_expclear) | 15017ec681f3Smrg S_028010_DISABLE_SMEM_EXPCLEAR_OPTIMIZATION(sctx->db_stencil_disable_expclear) | 15027ec681f3Smrg S_028010_DECOMPRESS_Z_ON_FLUSH(sctx->framebuffer.nr_samples >= 4) | 15037ec681f3Smrg S_028010_CENTROID_COMPUTATION_MODE(sctx->chip_class >= GFX10_3 ? 1 : 0)); 15047ec681f3Smrg 15057ec681f3Smrg db_shader_control = sctx->ps_db_shader_control; 15067ec681f3Smrg 15077ec681f3Smrg /* Bug workaround for smoothing (overrasterization) on GFX6. */ 15087ec681f3Smrg if (sctx->chip_class == GFX6 && sctx->smoothing_enabled) { 15097ec681f3Smrg db_shader_control &= C_02880C_Z_ORDER; 15107ec681f3Smrg db_shader_control |= S_02880C_Z_ORDER(V_02880C_LATE_Z); 15117ec681f3Smrg } 15127ec681f3Smrg 15137ec681f3Smrg /* Disable the gl_SampleMask fragment shader output if MSAA is disabled. */ 15147ec681f3Smrg if (!rs->multisample_enable) 15157ec681f3Smrg db_shader_control &= C_02880C_MASK_EXPORT_ENABLE; 15167ec681f3Smrg 15177ec681f3Smrg if (sctx->screen->info.has_rbplus && !sctx->screen->info.rbplus_allowed) 15187ec681f3Smrg db_shader_control |= S_02880C_DUAL_QUAD_DISABLE(1); 15197ec681f3Smrg 15207ec681f3Smrg radeon_opt_set_context_reg(sctx, R_02880C_DB_SHADER_CONTROL, SI_TRACKED_DB_SHADER_CONTROL, 15217ec681f3Smrg db_shader_control); 15227ec681f3Smrg 15237ec681f3Smrg if (sctx->chip_class >= GFX10_3) { 15247ec681f3Smrg if (sctx->allow_flat_shading) { 15257ec681f3Smrg radeon_opt_set_context_reg(sctx, R_028064_DB_VRS_OVERRIDE_CNTL, 15267ec681f3Smrg SI_TRACKED_DB_VRS_OVERRIDE_CNTL, 15277ec681f3Smrg S_028064_VRS_OVERRIDE_RATE_COMBINER_MODE( 15287ec681f3Smrg V_028064_VRS_COMB_MODE_OVERRIDE) | 15297ec681f3Smrg S_028064_VRS_OVERRIDE_RATE_X(1) | 15307ec681f3Smrg S_028064_VRS_OVERRIDE_RATE_Y(1)); 15317ec681f3Smrg } else { 15327ec681f3Smrg /* If the shader is using discard, turn off coarse shading because 15337ec681f3Smrg * discard at 2x2 pixel granularity degrades quality too much. 15347ec681f3Smrg * 15357ec681f3Smrg * MIN allows sample shading but not coarse shading. 15367ec681f3Smrg */ 15377ec681f3Smrg unsigned mode = sctx->screen->options.vrs2x2 && G_02880C_KILL_ENABLE(db_shader_control) ? 15387ec681f3Smrg V_028064_VRS_COMB_MODE_MIN : V_028064_VRS_COMB_MODE_PASSTHRU; 15397ec681f3Smrg 15407ec681f3Smrg radeon_opt_set_context_reg(sctx, R_028064_DB_VRS_OVERRIDE_CNTL, 15417ec681f3Smrg SI_TRACKED_DB_VRS_OVERRIDE_CNTL, 15427ec681f3Smrg S_028064_VRS_OVERRIDE_RATE_COMBINER_MODE(mode) | 15437ec681f3Smrg S_028064_VRS_OVERRIDE_RATE_X(0) | 15447ec681f3Smrg S_028064_VRS_OVERRIDE_RATE_Y(0)); 15457ec681f3Smrg } 15467ec681f3Smrg } 15477ec681f3Smrg radeon_end_update_context_roll(sctx); 1548af69d88dSmrg} 1549af69d88dSmrg 1550af69d88dSmrg/* 1551af69d88dSmrg * format translation 1552af69d88dSmrg */ 15537ec681f3Smrguint32_t si_translate_colorformat(enum chip_class chip_class, 15547ec681f3Smrg enum pipe_format format) 1555af69d88dSmrg{ 15567ec681f3Smrg const struct util_format_description *desc = util_format_description(format); 15577ec681f3Smrg if (!desc) 15587ec681f3Smrg return V_028C70_COLOR_INVALID; 15597ec681f3Smrg 15607ec681f3Smrg#define HAS_SIZE(x, y, z, w) \ 15617ec681f3Smrg (desc->channel[0].size == (x) && desc->channel[1].size == (y) && \ 15627ec681f3Smrg desc->channel[2].size == (z) && desc->channel[3].size == (w)) 15637ec681f3Smrg 15647ec681f3Smrg if (format == PIPE_FORMAT_R11G11B10_FLOAT) /* isn't plain */ 15657ec681f3Smrg return V_028C70_COLOR_10_11_11; 15667ec681f3Smrg 15677ec681f3Smrg if (chip_class >= GFX10_3 && 15687ec681f3Smrg format == PIPE_FORMAT_R9G9B9E5_FLOAT) /* isn't plain */ 15697ec681f3Smrg return V_028C70_COLOR_5_9_9_9; 15707ec681f3Smrg 15717ec681f3Smrg if (desc->layout != UTIL_FORMAT_LAYOUT_PLAIN) 15727ec681f3Smrg return V_028C70_COLOR_INVALID; 15737ec681f3Smrg 15747ec681f3Smrg /* hw cannot support mixed formats (except depth/stencil, since 15757ec681f3Smrg * stencil is not written to). */ 15767ec681f3Smrg if (desc->is_mixed && desc->colorspace != UTIL_FORMAT_COLORSPACE_ZS) 15777ec681f3Smrg return V_028C70_COLOR_INVALID; 15787ec681f3Smrg 15797ec681f3Smrg switch (desc->nr_channels) { 15807ec681f3Smrg case 1: 15817ec681f3Smrg switch (desc->channel[0].size) { 15827ec681f3Smrg case 8: 15837ec681f3Smrg return V_028C70_COLOR_8; 15847ec681f3Smrg case 16: 15857ec681f3Smrg return V_028C70_COLOR_16; 15867ec681f3Smrg case 32: 15877ec681f3Smrg return V_028C70_COLOR_32; 15887ec681f3Smrg } 15897ec681f3Smrg break; 15907ec681f3Smrg case 2: 15917ec681f3Smrg if (desc->channel[0].size == desc->channel[1].size) { 15927ec681f3Smrg switch (desc->channel[0].size) { 15937ec681f3Smrg case 8: 15947ec681f3Smrg return V_028C70_COLOR_8_8; 15957ec681f3Smrg case 16: 15967ec681f3Smrg return V_028C70_COLOR_16_16; 15977ec681f3Smrg case 32: 15987ec681f3Smrg return V_028C70_COLOR_32_32; 15997ec681f3Smrg } 16007ec681f3Smrg } else if (HAS_SIZE(8, 24, 0, 0)) { 16017ec681f3Smrg return V_028C70_COLOR_24_8; 16027ec681f3Smrg } else if (HAS_SIZE(24, 8, 0, 0)) { 16037ec681f3Smrg return V_028C70_COLOR_8_24; 16047ec681f3Smrg } 16057ec681f3Smrg break; 16067ec681f3Smrg case 3: 16077ec681f3Smrg if (HAS_SIZE(5, 6, 5, 0)) { 16087ec681f3Smrg return V_028C70_COLOR_5_6_5; 16097ec681f3Smrg } else if (HAS_SIZE(32, 8, 24, 0)) { 16107ec681f3Smrg return V_028C70_COLOR_X24_8_32_FLOAT; 16117ec681f3Smrg } 16127ec681f3Smrg break; 16137ec681f3Smrg case 4: 16147ec681f3Smrg if (desc->channel[0].size == desc->channel[1].size && 16157ec681f3Smrg desc->channel[0].size == desc->channel[2].size && 16167ec681f3Smrg desc->channel[0].size == desc->channel[3].size) { 16177ec681f3Smrg switch (desc->channel[0].size) { 16187ec681f3Smrg case 4: 16197ec681f3Smrg return V_028C70_COLOR_4_4_4_4; 16207ec681f3Smrg case 8: 16217ec681f3Smrg return V_028C70_COLOR_8_8_8_8; 16227ec681f3Smrg case 16: 16237ec681f3Smrg return V_028C70_COLOR_16_16_16_16; 16247ec681f3Smrg case 32: 16257ec681f3Smrg return V_028C70_COLOR_32_32_32_32; 16267ec681f3Smrg } 16277ec681f3Smrg } else if (HAS_SIZE(5, 5, 5, 1)) { 16287ec681f3Smrg return V_028C70_COLOR_1_5_5_5; 16297ec681f3Smrg } else if (HAS_SIZE(1, 5, 5, 5)) { 16307ec681f3Smrg return V_028C70_COLOR_5_5_5_1; 16317ec681f3Smrg } else if (HAS_SIZE(10, 10, 10, 2)) { 16327ec681f3Smrg return V_028C70_COLOR_2_10_10_10; 16337ec681f3Smrg } 16347ec681f3Smrg break; 16357ec681f3Smrg } 16367ec681f3Smrg return V_028C70_COLOR_INVALID; 1637af69d88dSmrg} 1638af69d88dSmrg 1639af69d88dSmrgstatic uint32_t si_colorformat_endian_swap(uint32_t colorformat) 1640af69d88dSmrg{ 16417ec681f3Smrg if (SI_BIG_ENDIAN) { 16427ec681f3Smrg switch (colorformat) { 16437ec681f3Smrg /* 8-bit buffers. */ 16447ec681f3Smrg case V_028C70_COLOR_8: 16457ec681f3Smrg return V_028C70_ENDIAN_NONE; 16467ec681f3Smrg 16477ec681f3Smrg /* 16-bit buffers. */ 16487ec681f3Smrg case V_028C70_COLOR_5_6_5: 16497ec681f3Smrg case V_028C70_COLOR_1_5_5_5: 16507ec681f3Smrg case V_028C70_COLOR_4_4_4_4: 16517ec681f3Smrg case V_028C70_COLOR_16: 16527ec681f3Smrg case V_028C70_COLOR_8_8: 16537ec681f3Smrg return V_028C70_ENDIAN_8IN16; 16547ec681f3Smrg 16557ec681f3Smrg /* 32-bit buffers. */ 16567ec681f3Smrg case V_028C70_COLOR_8_8_8_8: 16577ec681f3Smrg case V_028C70_COLOR_2_10_10_10: 16587ec681f3Smrg case V_028C70_COLOR_8_24: 16597ec681f3Smrg case V_028C70_COLOR_24_8: 16607ec681f3Smrg case V_028C70_COLOR_16_16: 16617ec681f3Smrg return V_028C70_ENDIAN_8IN32; 16627ec681f3Smrg 16637ec681f3Smrg /* 64-bit buffers. */ 16647ec681f3Smrg case V_028C70_COLOR_16_16_16_16: 16657ec681f3Smrg return V_028C70_ENDIAN_8IN16; 16667ec681f3Smrg 16677ec681f3Smrg case V_028C70_COLOR_32_32: 16687ec681f3Smrg return V_028C70_ENDIAN_8IN32; 16697ec681f3Smrg 16707ec681f3Smrg /* 128-bit buffers. */ 16717ec681f3Smrg case V_028C70_COLOR_32_32_32_32: 16727ec681f3Smrg return V_028C70_ENDIAN_8IN32; 16737ec681f3Smrg default: 16747ec681f3Smrg return V_028C70_ENDIAN_NONE; /* Unsupported. */ 16757ec681f3Smrg } 16767ec681f3Smrg } else { 16777ec681f3Smrg return V_028C70_ENDIAN_NONE; 16787ec681f3Smrg } 1679af69d88dSmrg} 1680af69d88dSmrg 1681af69d88dSmrgstatic uint32_t si_translate_dbformat(enum pipe_format format) 1682af69d88dSmrg{ 16837ec681f3Smrg switch (format) { 16847ec681f3Smrg case PIPE_FORMAT_Z16_UNORM: 16857ec681f3Smrg return V_028040_Z_16; 16867ec681f3Smrg case PIPE_FORMAT_S8_UINT_Z24_UNORM: 16877ec681f3Smrg case PIPE_FORMAT_X8Z24_UNORM: 16887ec681f3Smrg case PIPE_FORMAT_Z24X8_UNORM: 16897ec681f3Smrg case PIPE_FORMAT_Z24_UNORM_S8_UINT: 16907ec681f3Smrg return V_028040_Z_24; /* deprecated on AMD GCN */ 16917ec681f3Smrg case PIPE_FORMAT_Z32_FLOAT: 16927ec681f3Smrg case PIPE_FORMAT_Z32_FLOAT_S8X24_UINT: 16937ec681f3Smrg return V_028040_Z_32_FLOAT; 16947ec681f3Smrg default: 16957ec681f3Smrg return V_028040_Z_INVALID; 16967ec681f3Smrg } 1697af69d88dSmrg} 1698af69d88dSmrg 1699af69d88dSmrg/* 1700af69d88dSmrg * Texture translation 1701af69d88dSmrg */ 1702af69d88dSmrg 17037ec681f3Smrgstatic uint32_t si_translate_texformat(struct pipe_screen *screen, enum pipe_format format, 17047ec681f3Smrg const struct util_format_description *desc, 17057ec681f3Smrg int first_non_void) 1706af69d88dSmrg{ 17077ec681f3Smrg struct si_screen *sscreen = (struct si_screen *)screen; 17087ec681f3Smrg bool uniform = true; 17097ec681f3Smrg int i; 17107ec681f3Smrg 17117ec681f3Smrg assert(sscreen->info.chip_class <= GFX9); 17127ec681f3Smrg 17137ec681f3Smrg /* Colorspace (return non-RGB formats directly). */ 17147ec681f3Smrg switch (desc->colorspace) { 17157ec681f3Smrg /* Depth stencil formats */ 17167ec681f3Smrg case UTIL_FORMAT_COLORSPACE_ZS: 17177ec681f3Smrg switch (format) { 17187ec681f3Smrg case PIPE_FORMAT_Z16_UNORM: 17197ec681f3Smrg return V_008F14_IMG_DATA_FORMAT_16; 17207ec681f3Smrg case PIPE_FORMAT_X24S8_UINT: 17217ec681f3Smrg case PIPE_FORMAT_S8X24_UINT: 17227ec681f3Smrg /* 17237ec681f3Smrg * Implemented as an 8_8_8_8 data format to fix texture 17247ec681f3Smrg * gathers in stencil sampling. This affects at least 17257ec681f3Smrg * GL45-CTS.texture_cube_map_array.sampling on GFX8. 17267ec681f3Smrg */ 17277ec681f3Smrg if (sscreen->info.chip_class <= GFX8) 17287ec681f3Smrg return V_008F14_IMG_DATA_FORMAT_8_8_8_8; 17297ec681f3Smrg 17307ec681f3Smrg if (format == PIPE_FORMAT_X24S8_UINT) 17317ec681f3Smrg return V_008F14_IMG_DATA_FORMAT_8_24; 17327ec681f3Smrg else 17337ec681f3Smrg return V_008F14_IMG_DATA_FORMAT_24_8; 17347ec681f3Smrg case PIPE_FORMAT_Z24X8_UNORM: 17357ec681f3Smrg case PIPE_FORMAT_Z24_UNORM_S8_UINT: 17367ec681f3Smrg return V_008F14_IMG_DATA_FORMAT_8_24; 17377ec681f3Smrg case PIPE_FORMAT_X8Z24_UNORM: 17387ec681f3Smrg case PIPE_FORMAT_S8_UINT_Z24_UNORM: 17397ec681f3Smrg return V_008F14_IMG_DATA_FORMAT_24_8; 17407ec681f3Smrg case PIPE_FORMAT_S8_UINT: 17417ec681f3Smrg return V_008F14_IMG_DATA_FORMAT_8; 17427ec681f3Smrg case PIPE_FORMAT_Z32_FLOAT: 17437ec681f3Smrg return V_008F14_IMG_DATA_FORMAT_32; 17447ec681f3Smrg case PIPE_FORMAT_X32_S8X24_UINT: 17457ec681f3Smrg case PIPE_FORMAT_Z32_FLOAT_S8X24_UINT: 17467ec681f3Smrg return V_008F14_IMG_DATA_FORMAT_X24_8_32; 17477ec681f3Smrg default: 17487ec681f3Smrg goto out_unknown; 17497ec681f3Smrg } 17507ec681f3Smrg 17517ec681f3Smrg case UTIL_FORMAT_COLORSPACE_YUV: 17527ec681f3Smrg goto out_unknown; /* TODO */ 17537ec681f3Smrg 17547ec681f3Smrg case UTIL_FORMAT_COLORSPACE_SRGB: 17557ec681f3Smrg if (desc->nr_channels != 4 && desc->nr_channels != 1) 17567ec681f3Smrg goto out_unknown; 17577ec681f3Smrg break; 17587ec681f3Smrg 17597ec681f3Smrg default: 17607ec681f3Smrg break; 17617ec681f3Smrg } 17627ec681f3Smrg 17637ec681f3Smrg if (desc->layout == UTIL_FORMAT_LAYOUT_RGTC) { 17647ec681f3Smrg if (!sscreen->info.has_format_bc1_through_bc7) 17657ec681f3Smrg goto out_unknown; 17667ec681f3Smrg 17677ec681f3Smrg switch (format) { 17687ec681f3Smrg case PIPE_FORMAT_RGTC1_SNORM: 17697ec681f3Smrg case PIPE_FORMAT_LATC1_SNORM: 17707ec681f3Smrg case PIPE_FORMAT_RGTC1_UNORM: 17717ec681f3Smrg case PIPE_FORMAT_LATC1_UNORM: 17727ec681f3Smrg return V_008F14_IMG_DATA_FORMAT_BC4; 17737ec681f3Smrg case PIPE_FORMAT_RGTC2_SNORM: 17747ec681f3Smrg case PIPE_FORMAT_LATC2_SNORM: 17757ec681f3Smrg case PIPE_FORMAT_RGTC2_UNORM: 17767ec681f3Smrg case PIPE_FORMAT_LATC2_UNORM: 17777ec681f3Smrg return V_008F14_IMG_DATA_FORMAT_BC5; 17787ec681f3Smrg default: 17797ec681f3Smrg goto out_unknown; 17807ec681f3Smrg } 17817ec681f3Smrg } 17827ec681f3Smrg 17837ec681f3Smrg if (desc->layout == UTIL_FORMAT_LAYOUT_ETC && 17847ec681f3Smrg (sscreen->info.family == CHIP_STONEY || sscreen->info.family == CHIP_VEGA10 || 17857ec681f3Smrg sscreen->info.family == CHIP_RAVEN || sscreen->info.family == CHIP_RAVEN2)) { 17867ec681f3Smrg switch (format) { 17877ec681f3Smrg case PIPE_FORMAT_ETC1_RGB8: 17887ec681f3Smrg case PIPE_FORMAT_ETC2_RGB8: 17897ec681f3Smrg case PIPE_FORMAT_ETC2_SRGB8: 17907ec681f3Smrg return V_008F14_IMG_DATA_FORMAT_ETC2_RGB; 17917ec681f3Smrg case PIPE_FORMAT_ETC2_RGB8A1: 17927ec681f3Smrg case PIPE_FORMAT_ETC2_SRGB8A1: 17937ec681f3Smrg return V_008F14_IMG_DATA_FORMAT_ETC2_RGBA1; 17947ec681f3Smrg case PIPE_FORMAT_ETC2_RGBA8: 17957ec681f3Smrg case PIPE_FORMAT_ETC2_SRGBA8: 17967ec681f3Smrg return V_008F14_IMG_DATA_FORMAT_ETC2_RGBA; 17977ec681f3Smrg case PIPE_FORMAT_ETC2_R11_UNORM: 17987ec681f3Smrg case PIPE_FORMAT_ETC2_R11_SNORM: 17997ec681f3Smrg return V_008F14_IMG_DATA_FORMAT_ETC2_R; 18007ec681f3Smrg case PIPE_FORMAT_ETC2_RG11_UNORM: 18017ec681f3Smrg case PIPE_FORMAT_ETC2_RG11_SNORM: 18027ec681f3Smrg return V_008F14_IMG_DATA_FORMAT_ETC2_RG; 18037ec681f3Smrg default: 18047ec681f3Smrg goto out_unknown; 18057ec681f3Smrg } 18067ec681f3Smrg } 18077ec681f3Smrg 18087ec681f3Smrg if (desc->layout == UTIL_FORMAT_LAYOUT_BPTC) { 18097ec681f3Smrg if (!sscreen->info.has_format_bc1_through_bc7) 18107ec681f3Smrg goto out_unknown; 18117ec681f3Smrg 18127ec681f3Smrg switch (format) { 18137ec681f3Smrg case PIPE_FORMAT_BPTC_RGBA_UNORM: 18147ec681f3Smrg case PIPE_FORMAT_BPTC_SRGBA: 18157ec681f3Smrg return V_008F14_IMG_DATA_FORMAT_BC7; 18167ec681f3Smrg case PIPE_FORMAT_BPTC_RGB_FLOAT: 18177ec681f3Smrg case PIPE_FORMAT_BPTC_RGB_UFLOAT: 18187ec681f3Smrg return V_008F14_IMG_DATA_FORMAT_BC6; 18197ec681f3Smrg default: 18207ec681f3Smrg goto out_unknown; 18217ec681f3Smrg } 18227ec681f3Smrg } 18237ec681f3Smrg 18247ec681f3Smrg if (desc->layout == UTIL_FORMAT_LAYOUT_SUBSAMPLED) { 18257ec681f3Smrg switch (format) { 18267ec681f3Smrg case PIPE_FORMAT_R8G8_B8G8_UNORM: 18277ec681f3Smrg case PIPE_FORMAT_G8R8_B8R8_UNORM: 18287ec681f3Smrg return V_008F14_IMG_DATA_FORMAT_GB_GR; 18297ec681f3Smrg case PIPE_FORMAT_G8R8_G8B8_UNORM: 18307ec681f3Smrg case PIPE_FORMAT_R8G8_R8B8_UNORM: 18317ec681f3Smrg return V_008F14_IMG_DATA_FORMAT_BG_RG; 18327ec681f3Smrg default: 18337ec681f3Smrg goto out_unknown; 18347ec681f3Smrg } 18357ec681f3Smrg } 18367ec681f3Smrg 18377ec681f3Smrg if (desc->layout == UTIL_FORMAT_LAYOUT_S3TC) { 18387ec681f3Smrg if (!sscreen->info.has_format_bc1_through_bc7) 18397ec681f3Smrg goto out_unknown; 18407ec681f3Smrg 18417ec681f3Smrg switch (format) { 18427ec681f3Smrg case PIPE_FORMAT_DXT1_RGB: 18437ec681f3Smrg case PIPE_FORMAT_DXT1_RGBA: 18447ec681f3Smrg case PIPE_FORMAT_DXT1_SRGB: 18457ec681f3Smrg case PIPE_FORMAT_DXT1_SRGBA: 18467ec681f3Smrg return V_008F14_IMG_DATA_FORMAT_BC1; 18477ec681f3Smrg case PIPE_FORMAT_DXT3_RGBA: 18487ec681f3Smrg case PIPE_FORMAT_DXT3_SRGBA: 18497ec681f3Smrg return V_008F14_IMG_DATA_FORMAT_BC2; 18507ec681f3Smrg case PIPE_FORMAT_DXT5_RGBA: 18517ec681f3Smrg case PIPE_FORMAT_DXT5_SRGBA: 18527ec681f3Smrg return V_008F14_IMG_DATA_FORMAT_BC3; 18537ec681f3Smrg default: 18547ec681f3Smrg goto out_unknown; 18557ec681f3Smrg } 18567ec681f3Smrg } 18577ec681f3Smrg 18587ec681f3Smrg if (format == PIPE_FORMAT_R9G9B9E5_FLOAT) { 18597ec681f3Smrg return V_008F14_IMG_DATA_FORMAT_5_9_9_9; 18607ec681f3Smrg } else if (format == PIPE_FORMAT_R11G11B10_FLOAT) { 18617ec681f3Smrg return V_008F14_IMG_DATA_FORMAT_10_11_11; 18627ec681f3Smrg } 18637ec681f3Smrg 18647ec681f3Smrg /* R8G8Bx_SNORM - TODO CxV8U8 */ 18657ec681f3Smrg 18667ec681f3Smrg /* hw cannot support mixed formats (except depth/stencil, since only 18677ec681f3Smrg * depth is read).*/ 18687ec681f3Smrg if (desc->is_mixed && desc->colorspace != UTIL_FORMAT_COLORSPACE_ZS) 18697ec681f3Smrg goto out_unknown; 18707ec681f3Smrg 18717ec681f3Smrg /* See whether the components are of the same size. */ 18727ec681f3Smrg for (i = 1; i < desc->nr_channels; i++) { 18737ec681f3Smrg uniform = uniform && desc->channel[0].size == desc->channel[i].size; 18747ec681f3Smrg } 18757ec681f3Smrg 18767ec681f3Smrg /* Non-uniform formats. */ 18777ec681f3Smrg if (!uniform) { 18787ec681f3Smrg switch (desc->nr_channels) { 18797ec681f3Smrg case 3: 18807ec681f3Smrg if (desc->channel[0].size == 5 && desc->channel[1].size == 6 && 18817ec681f3Smrg desc->channel[2].size == 5) { 18827ec681f3Smrg return V_008F14_IMG_DATA_FORMAT_5_6_5; 18837ec681f3Smrg } 18847ec681f3Smrg goto out_unknown; 18857ec681f3Smrg case 4: 18867ec681f3Smrg if (desc->channel[0].size == 5 && desc->channel[1].size == 5 && 18877ec681f3Smrg desc->channel[2].size == 5 && desc->channel[3].size == 1) { 18887ec681f3Smrg return V_008F14_IMG_DATA_FORMAT_1_5_5_5; 18897ec681f3Smrg } 18907ec681f3Smrg if (desc->channel[0].size == 1 && desc->channel[1].size == 5 && 18917ec681f3Smrg desc->channel[2].size == 5 && desc->channel[3].size == 5) { 18927ec681f3Smrg return V_008F14_IMG_DATA_FORMAT_5_5_5_1; 18937ec681f3Smrg } 18947ec681f3Smrg if (desc->channel[0].size == 10 && desc->channel[1].size == 10 && 18957ec681f3Smrg desc->channel[2].size == 10 && desc->channel[3].size == 2) { 18967ec681f3Smrg return V_008F14_IMG_DATA_FORMAT_2_10_10_10; 18977ec681f3Smrg } 18987ec681f3Smrg goto out_unknown; 18997ec681f3Smrg } 19007ec681f3Smrg goto out_unknown; 19017ec681f3Smrg } 19027ec681f3Smrg 19037ec681f3Smrg if (first_non_void < 0 || first_non_void > 3) 19047ec681f3Smrg goto out_unknown; 19057ec681f3Smrg 19067ec681f3Smrg /* uniform formats */ 19077ec681f3Smrg switch (desc->channel[first_non_void].size) { 19087ec681f3Smrg case 4: 19097ec681f3Smrg switch (desc->nr_channels) { 1910af69d88dSmrg#if 0 /* Not supported for render targets */ 19117ec681f3Smrg case 2: 19127ec681f3Smrg return V_008F14_IMG_DATA_FORMAT_4_4; 1913af69d88dSmrg#endif 19147ec681f3Smrg case 4: 19157ec681f3Smrg return V_008F14_IMG_DATA_FORMAT_4_4_4_4; 19167ec681f3Smrg } 19177ec681f3Smrg break; 19187ec681f3Smrg case 8: 19197ec681f3Smrg switch (desc->nr_channels) { 19207ec681f3Smrg case 1: 19217ec681f3Smrg return V_008F14_IMG_DATA_FORMAT_8; 19227ec681f3Smrg case 2: 19237ec681f3Smrg return V_008F14_IMG_DATA_FORMAT_8_8; 19247ec681f3Smrg case 4: 19257ec681f3Smrg return V_008F14_IMG_DATA_FORMAT_8_8_8_8; 19267ec681f3Smrg } 19277ec681f3Smrg break; 19287ec681f3Smrg case 16: 19297ec681f3Smrg switch (desc->nr_channels) { 19307ec681f3Smrg case 1: 19317ec681f3Smrg return V_008F14_IMG_DATA_FORMAT_16; 19327ec681f3Smrg case 2: 19337ec681f3Smrg return V_008F14_IMG_DATA_FORMAT_16_16; 19347ec681f3Smrg case 4: 19357ec681f3Smrg return V_008F14_IMG_DATA_FORMAT_16_16_16_16; 19367ec681f3Smrg } 19377ec681f3Smrg break; 19387ec681f3Smrg case 32: 19397ec681f3Smrg switch (desc->nr_channels) { 19407ec681f3Smrg case 1: 19417ec681f3Smrg return V_008F14_IMG_DATA_FORMAT_32; 19427ec681f3Smrg case 2: 19437ec681f3Smrg return V_008F14_IMG_DATA_FORMAT_32_32; 1944af69d88dSmrg#if 0 /* Not supported for render targets */ 19457ec681f3Smrg case 3: 19467ec681f3Smrg return V_008F14_IMG_DATA_FORMAT_32_32_32; 1947af69d88dSmrg#endif 19487ec681f3Smrg case 4: 19497ec681f3Smrg return V_008F14_IMG_DATA_FORMAT_32_32_32_32; 19507ec681f3Smrg } 19517ec681f3Smrg } 1952af69d88dSmrg 1953af69d88dSmrgout_unknown: 19547ec681f3Smrg return ~0; 19557ec681f3Smrg} 19567ec681f3Smrg 19577ec681f3Smrgstatic unsigned is_wrap_mode_legal(struct si_screen *screen, unsigned wrap) 19587ec681f3Smrg{ 19597ec681f3Smrg if (!screen->info.has_3d_cube_border_color_mipmap) { 19607ec681f3Smrg switch (wrap) { 19617ec681f3Smrg case PIPE_TEX_WRAP_CLAMP: 19627ec681f3Smrg case PIPE_TEX_WRAP_CLAMP_TO_BORDER: 19637ec681f3Smrg case PIPE_TEX_WRAP_MIRROR_CLAMP: 19647ec681f3Smrg case PIPE_TEX_WRAP_MIRROR_CLAMP_TO_BORDER: 19657ec681f3Smrg return false; 19667ec681f3Smrg } 19677ec681f3Smrg } 19687ec681f3Smrg return true; 1969af69d88dSmrg} 1970af69d88dSmrg 1971af69d88dSmrgstatic unsigned si_tex_wrap(unsigned wrap) 1972af69d88dSmrg{ 19737ec681f3Smrg switch (wrap) { 19747ec681f3Smrg default: 19757ec681f3Smrg case PIPE_TEX_WRAP_REPEAT: 19767ec681f3Smrg return V_008F30_SQ_TEX_WRAP; 19777ec681f3Smrg case PIPE_TEX_WRAP_CLAMP: 19787ec681f3Smrg return V_008F30_SQ_TEX_CLAMP_HALF_BORDER; 19797ec681f3Smrg case PIPE_TEX_WRAP_CLAMP_TO_EDGE: 19807ec681f3Smrg return V_008F30_SQ_TEX_CLAMP_LAST_TEXEL; 19817ec681f3Smrg case PIPE_TEX_WRAP_CLAMP_TO_BORDER: 19827ec681f3Smrg return V_008F30_SQ_TEX_CLAMP_BORDER; 19837ec681f3Smrg case PIPE_TEX_WRAP_MIRROR_REPEAT: 19847ec681f3Smrg return V_008F30_SQ_TEX_MIRROR; 19857ec681f3Smrg case PIPE_TEX_WRAP_MIRROR_CLAMP: 19867ec681f3Smrg return V_008F30_SQ_TEX_MIRROR_ONCE_HALF_BORDER; 19877ec681f3Smrg case PIPE_TEX_WRAP_MIRROR_CLAMP_TO_EDGE: 19887ec681f3Smrg return V_008F30_SQ_TEX_MIRROR_ONCE_LAST_TEXEL; 19897ec681f3Smrg case PIPE_TEX_WRAP_MIRROR_CLAMP_TO_BORDER: 19907ec681f3Smrg return V_008F30_SQ_TEX_MIRROR_ONCE_BORDER; 19917ec681f3Smrg } 1992af69d88dSmrg} 1993af69d88dSmrg 1994af69d88dSmrgstatic unsigned si_tex_mipfilter(unsigned filter) 1995af69d88dSmrg{ 19967ec681f3Smrg switch (filter) { 19977ec681f3Smrg case PIPE_TEX_MIPFILTER_NEAREST: 19987ec681f3Smrg return V_008F38_SQ_TEX_Z_FILTER_POINT; 19997ec681f3Smrg case PIPE_TEX_MIPFILTER_LINEAR: 20007ec681f3Smrg return V_008F38_SQ_TEX_Z_FILTER_LINEAR; 20017ec681f3Smrg default: 20027ec681f3Smrg case PIPE_TEX_MIPFILTER_NONE: 20037ec681f3Smrg return V_008F38_SQ_TEX_Z_FILTER_NONE; 20047ec681f3Smrg } 2005af69d88dSmrg} 2006af69d88dSmrg 2007af69d88dSmrgstatic unsigned si_tex_compare(unsigned compare) 2008af69d88dSmrg{ 20097ec681f3Smrg switch (compare) { 20107ec681f3Smrg default: 20117ec681f3Smrg case PIPE_FUNC_NEVER: 20127ec681f3Smrg return V_008F30_SQ_TEX_DEPTH_COMPARE_NEVER; 20137ec681f3Smrg case PIPE_FUNC_LESS: 20147ec681f3Smrg return V_008F30_SQ_TEX_DEPTH_COMPARE_LESS; 20157ec681f3Smrg case PIPE_FUNC_EQUAL: 20167ec681f3Smrg return V_008F30_SQ_TEX_DEPTH_COMPARE_EQUAL; 20177ec681f3Smrg case PIPE_FUNC_LEQUAL: 20187ec681f3Smrg return V_008F30_SQ_TEX_DEPTH_COMPARE_LESSEQUAL; 20197ec681f3Smrg case PIPE_FUNC_GREATER: 20207ec681f3Smrg return V_008F30_SQ_TEX_DEPTH_COMPARE_GREATER; 20217ec681f3Smrg case PIPE_FUNC_NOTEQUAL: 20227ec681f3Smrg return V_008F30_SQ_TEX_DEPTH_COMPARE_NOTEQUAL; 20237ec681f3Smrg case PIPE_FUNC_GEQUAL: 20247ec681f3Smrg return V_008F30_SQ_TEX_DEPTH_COMPARE_GREATEREQUAL; 20257ec681f3Smrg case PIPE_FUNC_ALWAYS: 20267ec681f3Smrg return V_008F30_SQ_TEX_DEPTH_COMPARE_ALWAYS; 20277ec681f3Smrg } 2028af69d88dSmrg} 2029af69d88dSmrg 20307ec681f3Smrgstatic unsigned si_tex_dim(struct si_screen *sscreen, struct si_texture *tex, unsigned view_target, 20317ec681f3Smrg unsigned nr_samples) 2032af69d88dSmrg{ 20337ec681f3Smrg unsigned res_target = tex->buffer.b.b.target; 20347ec681f3Smrg 20357ec681f3Smrg if (view_target == PIPE_TEXTURE_CUBE || view_target == PIPE_TEXTURE_CUBE_ARRAY) 20367ec681f3Smrg res_target = view_target; 20377ec681f3Smrg /* If interpreting cubemaps as something else, set 2D_ARRAY. */ 20387ec681f3Smrg else if (res_target == PIPE_TEXTURE_CUBE || res_target == PIPE_TEXTURE_CUBE_ARRAY) 20397ec681f3Smrg res_target = PIPE_TEXTURE_2D_ARRAY; 20407ec681f3Smrg 20417ec681f3Smrg /* GFX9 allocates 1D textures as 2D. */ 20427ec681f3Smrg if ((res_target == PIPE_TEXTURE_1D || res_target == PIPE_TEXTURE_1D_ARRAY) && 20437ec681f3Smrg sscreen->info.chip_class == GFX9 && 20447ec681f3Smrg tex->surface.u.gfx9.resource_type == RADEON_RESOURCE_2D) { 20457ec681f3Smrg if (res_target == PIPE_TEXTURE_1D) 20467ec681f3Smrg res_target = PIPE_TEXTURE_2D; 20477ec681f3Smrg else 20487ec681f3Smrg res_target = PIPE_TEXTURE_2D_ARRAY; 20497ec681f3Smrg } 20507ec681f3Smrg 20517ec681f3Smrg switch (res_target) { 20527ec681f3Smrg default: 20537ec681f3Smrg case PIPE_TEXTURE_1D: 20547ec681f3Smrg return V_008F1C_SQ_RSRC_IMG_1D; 20557ec681f3Smrg case PIPE_TEXTURE_1D_ARRAY: 20567ec681f3Smrg return V_008F1C_SQ_RSRC_IMG_1D_ARRAY; 20577ec681f3Smrg case PIPE_TEXTURE_2D: 20587ec681f3Smrg case PIPE_TEXTURE_RECT: 20597ec681f3Smrg return nr_samples > 1 ? V_008F1C_SQ_RSRC_IMG_2D_MSAA : V_008F1C_SQ_RSRC_IMG_2D; 20607ec681f3Smrg case PIPE_TEXTURE_2D_ARRAY: 20617ec681f3Smrg return nr_samples > 1 ? V_008F1C_SQ_RSRC_IMG_2D_MSAA_ARRAY : V_008F1C_SQ_RSRC_IMG_2D_ARRAY; 20627ec681f3Smrg case PIPE_TEXTURE_3D: 20637ec681f3Smrg return V_008F1C_SQ_RSRC_IMG_3D; 20647ec681f3Smrg case PIPE_TEXTURE_CUBE: 20657ec681f3Smrg case PIPE_TEXTURE_CUBE_ARRAY: 20667ec681f3Smrg return V_008F1C_SQ_RSRC_IMG_CUBE; 20677ec681f3Smrg } 2068af69d88dSmrg} 2069af69d88dSmrg 2070af69d88dSmrg/* 2071af69d88dSmrg * Format support testing 2072af69d88dSmrg */ 2073af69d88dSmrg 2074af69d88dSmrgstatic bool si_is_sampler_format_supported(struct pipe_screen *screen, enum pipe_format format) 2075af69d88dSmrg{ 20767ec681f3Smrg struct si_screen *sscreen = (struct si_screen *)screen; 20777ec681f3Smrg 20787ec681f3Smrg if (sscreen->info.chip_class >= GFX10) { 20797ec681f3Smrg const struct gfx10_format *fmt = &gfx10_format_table[format]; 20807ec681f3Smrg if (!fmt->img_format || fmt->buffers_only) 20817ec681f3Smrg return false; 20827ec681f3Smrg return true; 20837ec681f3Smrg } 208401e04c3fSmrg 20857ec681f3Smrg const struct util_format_description *desc = util_format_description(format); 20867ec681f3Smrg if (!desc) 20877ec681f3Smrg return false; 20887ec681f3Smrg 20897ec681f3Smrg return si_translate_texformat(screen, format, desc, 20907ec681f3Smrg util_format_get_first_non_void_channel(format)) != ~0U; 2091af69d88dSmrg} 2092af69d88dSmrg 2093af69d88dSmrgstatic uint32_t si_translate_buffer_dataformat(struct pipe_screen *screen, 20947ec681f3Smrg const struct util_format_description *desc, 20957ec681f3Smrg int first_non_void) 2096af69d88dSmrg{ 20977ec681f3Smrg int i; 20987ec681f3Smrg 20997ec681f3Smrg assert(((struct si_screen *)screen)->info.chip_class <= GFX9); 21007ec681f3Smrg 21017ec681f3Smrg if (desc->format == PIPE_FORMAT_R11G11B10_FLOAT) 21027ec681f3Smrg return V_008F0C_BUF_DATA_FORMAT_10_11_11; 21037ec681f3Smrg 21047ec681f3Smrg assert(first_non_void >= 0); 21057ec681f3Smrg 21067ec681f3Smrg if (desc->nr_channels == 4 && desc->channel[0].size == 10 && desc->channel[1].size == 10 && 21077ec681f3Smrg desc->channel[2].size == 10 && desc->channel[3].size == 2) 21087ec681f3Smrg return V_008F0C_BUF_DATA_FORMAT_2_10_10_10; 21097ec681f3Smrg 21107ec681f3Smrg /* See whether the components are of the same size. */ 21117ec681f3Smrg for (i = 0; i < desc->nr_channels; i++) { 21127ec681f3Smrg if (desc->channel[first_non_void].size != desc->channel[i].size) 21137ec681f3Smrg return V_008F0C_BUF_DATA_FORMAT_INVALID; 21147ec681f3Smrg } 21157ec681f3Smrg 21167ec681f3Smrg switch (desc->channel[first_non_void].size) { 21177ec681f3Smrg case 8: 21187ec681f3Smrg switch (desc->nr_channels) { 21197ec681f3Smrg case 1: 21207ec681f3Smrg case 3: /* 3 loads */ 21217ec681f3Smrg return V_008F0C_BUF_DATA_FORMAT_8; 21227ec681f3Smrg case 2: 21237ec681f3Smrg return V_008F0C_BUF_DATA_FORMAT_8_8; 21247ec681f3Smrg case 4: 21257ec681f3Smrg return V_008F0C_BUF_DATA_FORMAT_8_8_8_8; 21267ec681f3Smrg } 21277ec681f3Smrg break; 21287ec681f3Smrg case 16: 21297ec681f3Smrg switch (desc->nr_channels) { 21307ec681f3Smrg case 1: 21317ec681f3Smrg case 3: /* 3 loads */ 21327ec681f3Smrg return V_008F0C_BUF_DATA_FORMAT_16; 21337ec681f3Smrg case 2: 21347ec681f3Smrg return V_008F0C_BUF_DATA_FORMAT_16_16; 21357ec681f3Smrg case 4: 21367ec681f3Smrg return V_008F0C_BUF_DATA_FORMAT_16_16_16_16; 21377ec681f3Smrg } 21387ec681f3Smrg break; 21397ec681f3Smrg case 32: 21407ec681f3Smrg switch (desc->nr_channels) { 21417ec681f3Smrg case 1: 21427ec681f3Smrg return V_008F0C_BUF_DATA_FORMAT_32; 21437ec681f3Smrg case 2: 21447ec681f3Smrg return V_008F0C_BUF_DATA_FORMAT_32_32; 21457ec681f3Smrg case 3: 21467ec681f3Smrg return V_008F0C_BUF_DATA_FORMAT_32_32_32; 21477ec681f3Smrg case 4: 21487ec681f3Smrg return V_008F0C_BUF_DATA_FORMAT_32_32_32_32; 21497ec681f3Smrg } 21507ec681f3Smrg break; 21517ec681f3Smrg case 64: 21527ec681f3Smrg /* Legacy double formats. */ 21537ec681f3Smrg switch (desc->nr_channels) { 21547ec681f3Smrg case 1: /* 1 load */ 21557ec681f3Smrg return V_008F0C_BUF_DATA_FORMAT_32_32; 21567ec681f3Smrg case 2: /* 1 load */ 21577ec681f3Smrg return V_008F0C_BUF_DATA_FORMAT_32_32_32_32; 21587ec681f3Smrg case 3: /* 3 loads */ 21597ec681f3Smrg return V_008F0C_BUF_DATA_FORMAT_32_32; 21607ec681f3Smrg case 4: /* 2 loads */ 21617ec681f3Smrg return V_008F0C_BUF_DATA_FORMAT_32_32_32_32; 21627ec681f3Smrg } 21637ec681f3Smrg break; 21647ec681f3Smrg } 21657ec681f3Smrg 21667ec681f3Smrg return V_008F0C_BUF_DATA_FORMAT_INVALID; 2167af69d88dSmrg} 2168af69d88dSmrg 2169af69d88dSmrgstatic uint32_t si_translate_buffer_numformat(struct pipe_screen *screen, 21707ec681f3Smrg const struct util_format_description *desc, 21717ec681f3Smrg int first_non_void) 2172af69d88dSmrg{ 21737ec681f3Smrg assert(((struct si_screen *)screen)->info.chip_class <= GFX9); 21747ec681f3Smrg 21757ec681f3Smrg if (desc->format == PIPE_FORMAT_R11G11B10_FLOAT) 21767ec681f3Smrg return V_008F0C_BUF_NUM_FORMAT_FLOAT; 21777ec681f3Smrg 21787ec681f3Smrg assert(first_non_void >= 0); 21797ec681f3Smrg 21807ec681f3Smrg switch (desc->channel[first_non_void].type) { 21817ec681f3Smrg case UTIL_FORMAT_TYPE_SIGNED: 21827ec681f3Smrg case UTIL_FORMAT_TYPE_FIXED: 21837ec681f3Smrg if (desc->channel[first_non_void].size >= 32 || desc->channel[first_non_void].pure_integer) 21847ec681f3Smrg return V_008F0C_BUF_NUM_FORMAT_SINT; 21857ec681f3Smrg else if (desc->channel[first_non_void].normalized) 21867ec681f3Smrg return V_008F0C_BUF_NUM_FORMAT_SNORM; 21877ec681f3Smrg else 21887ec681f3Smrg return V_008F0C_BUF_NUM_FORMAT_SSCALED; 21897ec681f3Smrg break; 21907ec681f3Smrg case UTIL_FORMAT_TYPE_UNSIGNED: 21917ec681f3Smrg if (desc->channel[first_non_void].size >= 32 || desc->channel[first_non_void].pure_integer) 21927ec681f3Smrg return V_008F0C_BUF_NUM_FORMAT_UINT; 21937ec681f3Smrg else if (desc->channel[first_non_void].normalized) 21947ec681f3Smrg return V_008F0C_BUF_NUM_FORMAT_UNORM; 21957ec681f3Smrg else 21967ec681f3Smrg return V_008F0C_BUF_NUM_FORMAT_USCALED; 21977ec681f3Smrg break; 21987ec681f3Smrg case UTIL_FORMAT_TYPE_FLOAT: 21997ec681f3Smrg default: 22007ec681f3Smrg return V_008F0C_BUF_NUM_FORMAT_FLOAT; 22017ec681f3Smrg } 2202af69d88dSmrg} 2203af69d88dSmrg 22047ec681f3Smrgstatic unsigned si_is_vertex_format_supported(struct pipe_screen *screen, enum pipe_format format, 22057ec681f3Smrg unsigned usage) 2206af69d88dSmrg{ 22077ec681f3Smrg struct si_screen *sscreen = (struct si_screen *)screen; 22087ec681f3Smrg const struct util_format_description *desc; 22097ec681f3Smrg int first_non_void; 22107ec681f3Smrg unsigned data_format; 22117ec681f3Smrg 22127ec681f3Smrg assert((usage & ~(PIPE_BIND_SHADER_IMAGE | PIPE_BIND_SAMPLER_VIEW | PIPE_BIND_VERTEX_BUFFER)) == 22137ec681f3Smrg 0); 22147ec681f3Smrg 22157ec681f3Smrg desc = util_format_description(format); 22167ec681f3Smrg if (!desc) 22177ec681f3Smrg return 0; 22187ec681f3Smrg 22197ec681f3Smrg /* There are no native 8_8_8 or 16_16_16 data formats, and we currently 22207ec681f3Smrg * select 8_8_8_8 and 16_16_16_16 instead. This works reasonably well 22217ec681f3Smrg * for read-only access (with caveats surrounding bounds checks), but 22227ec681f3Smrg * obviously fails for write access which we have to implement for 22237ec681f3Smrg * shader images. Luckily, OpenGL doesn't expect this to be supported 22247ec681f3Smrg * anyway, and so the only impact is on PBO uploads / downloads, which 22257ec681f3Smrg * shouldn't be expected to be fast for GL_RGB anyway. 22267ec681f3Smrg */ 22277ec681f3Smrg if (desc->block.bits == 3 * 8 || desc->block.bits == 3 * 16) { 22287ec681f3Smrg if (usage & (PIPE_BIND_SHADER_IMAGE | PIPE_BIND_SAMPLER_VIEW)) { 22297ec681f3Smrg usage &= ~(PIPE_BIND_SHADER_IMAGE | PIPE_BIND_SAMPLER_VIEW); 22307ec681f3Smrg if (!usage) 22317ec681f3Smrg return 0; 22327ec681f3Smrg } 22337ec681f3Smrg } 22347ec681f3Smrg 22357ec681f3Smrg if (sscreen->info.chip_class >= GFX10) { 22367ec681f3Smrg const struct gfx10_format *fmt = &gfx10_format_table[format]; 22377ec681f3Smrg if (!fmt->img_format || fmt->img_format >= 128) 22387ec681f3Smrg return 0; 22397ec681f3Smrg return usage; 22407ec681f3Smrg } 22417ec681f3Smrg 22427ec681f3Smrg first_non_void = util_format_get_first_non_void_channel(format); 22437ec681f3Smrg data_format = si_translate_buffer_dataformat(screen, desc, first_non_void); 22447ec681f3Smrg if (data_format == V_008F0C_BUF_DATA_FORMAT_INVALID) 22457ec681f3Smrg return 0; 22467ec681f3Smrg 22477ec681f3Smrg return usage; 2248af69d88dSmrg} 2249af69d88dSmrg 22507ec681f3Smrgstatic bool si_is_colorbuffer_format_supported(enum chip_class chip_class, 22517ec681f3Smrg enum pipe_format format) 2252af69d88dSmrg{ 22537ec681f3Smrg return si_translate_colorformat(chip_class, format) != V_028C70_COLOR_INVALID && 22547ec681f3Smrg si_translate_colorswap(format, false) != ~0U; 2255af69d88dSmrg} 2256af69d88dSmrg 2257af69d88dSmrgstatic bool si_is_zs_format_supported(enum pipe_format format) 2258af69d88dSmrg{ 22597ec681f3Smrg return si_translate_dbformat(format) != V_028040_Z_INVALID; 2260af69d88dSmrg} 2261af69d88dSmrg 22627ec681f3Smrgstatic bool si_is_format_supported(struct pipe_screen *screen, enum pipe_format format, 22637ec681f3Smrg enum pipe_texture_target target, unsigned sample_count, 22647ec681f3Smrg unsigned storage_sample_count, unsigned usage) 2265af69d88dSmrg{ 22667ec681f3Smrg struct si_screen *sscreen = (struct si_screen *)screen; 22677ec681f3Smrg unsigned retval = 0; 22687ec681f3Smrg 22697ec681f3Smrg if (target >= PIPE_MAX_TEXTURE_TYPES) { 22707ec681f3Smrg PRINT_ERR("radeonsi: unsupported texture type %d\n", target); 22717ec681f3Smrg return false; 22727ec681f3Smrg } 22737ec681f3Smrg 22747ec681f3Smrg if ((target == PIPE_TEXTURE_3D || target == PIPE_TEXTURE_CUBE) && 22757ec681f3Smrg !sscreen->info.has_3d_cube_border_color_mipmap) 22767ec681f3Smrg return false; 22777ec681f3Smrg 22787ec681f3Smrg if (util_format_get_num_planes(format) >= 2) 22797ec681f3Smrg return false; 22807ec681f3Smrg 22817ec681f3Smrg if (MAX2(1, sample_count) < MAX2(1, storage_sample_count)) 22827ec681f3Smrg return false; 22837ec681f3Smrg 22847ec681f3Smrg if (sample_count > 1) { 22857ec681f3Smrg if (!screen->get_param(screen, PIPE_CAP_TEXTURE_MULTISAMPLE)) 22867ec681f3Smrg return false; 22877ec681f3Smrg 22887ec681f3Smrg /* Only power-of-two sample counts are supported. */ 22897ec681f3Smrg if (!util_is_power_of_two_or_zero(sample_count) || 22907ec681f3Smrg !util_is_power_of_two_or_zero(storage_sample_count)) 22917ec681f3Smrg return false; 22927ec681f3Smrg 22937ec681f3Smrg /* Chips with 1 RB don't increment occlusion queries at 16x MSAA sample rate, 22947ec681f3Smrg * so don't expose 16 samples there. 22957ec681f3Smrg */ 22967ec681f3Smrg const unsigned max_eqaa_samples = util_bitcount(sscreen->info.enabled_rb_mask) <= 1 ? 8 : 16; 22977ec681f3Smrg const unsigned max_samples = 8; 22987ec681f3Smrg 22997ec681f3Smrg /* MSAA support without framebuffer attachments. */ 23007ec681f3Smrg if (format == PIPE_FORMAT_NONE && sample_count <= max_eqaa_samples) 23017ec681f3Smrg return true; 23027ec681f3Smrg 23037ec681f3Smrg if (!sscreen->info.has_eqaa_surface_allocator || util_format_is_depth_or_stencil(format)) { 23047ec681f3Smrg /* Color without EQAA or depth/stencil. */ 23057ec681f3Smrg if (sample_count > max_samples || sample_count != storage_sample_count) 23067ec681f3Smrg return false; 23077ec681f3Smrg } else { 23087ec681f3Smrg /* Color with EQAA. */ 23097ec681f3Smrg if (sample_count > max_eqaa_samples || storage_sample_count > max_samples) 23107ec681f3Smrg return false; 23117ec681f3Smrg } 23127ec681f3Smrg } 23137ec681f3Smrg 23147ec681f3Smrg if (usage & (PIPE_BIND_SAMPLER_VIEW | PIPE_BIND_SHADER_IMAGE)) { 23157ec681f3Smrg if (target == PIPE_BUFFER) { 23167ec681f3Smrg retval |= si_is_vertex_format_supported( 23177ec681f3Smrg screen, format, usage & (PIPE_BIND_SAMPLER_VIEW | PIPE_BIND_SHADER_IMAGE)); 23187ec681f3Smrg } else { 23197ec681f3Smrg if (si_is_sampler_format_supported(screen, format)) 23207ec681f3Smrg retval |= usage & (PIPE_BIND_SAMPLER_VIEW | PIPE_BIND_SHADER_IMAGE); 23217ec681f3Smrg } 23227ec681f3Smrg } 23237ec681f3Smrg 23247ec681f3Smrg if ((usage & (PIPE_BIND_RENDER_TARGET | PIPE_BIND_DISPLAY_TARGET | PIPE_BIND_SCANOUT | 23257ec681f3Smrg PIPE_BIND_SHARED | PIPE_BIND_BLENDABLE)) && 23267ec681f3Smrg si_is_colorbuffer_format_supported(sscreen->info.chip_class, format)) { 23277ec681f3Smrg retval |= usage & (PIPE_BIND_RENDER_TARGET | PIPE_BIND_DISPLAY_TARGET | PIPE_BIND_SCANOUT | 23287ec681f3Smrg PIPE_BIND_SHARED); 23297ec681f3Smrg if (!util_format_is_pure_integer(format) && !util_format_is_depth_or_stencil(format)) 23307ec681f3Smrg retval |= usage & PIPE_BIND_BLENDABLE; 23317ec681f3Smrg } 23327ec681f3Smrg 23337ec681f3Smrg if ((usage & PIPE_BIND_DEPTH_STENCIL) && si_is_zs_format_supported(format)) { 23347ec681f3Smrg retval |= PIPE_BIND_DEPTH_STENCIL; 23357ec681f3Smrg } 23367ec681f3Smrg 23377ec681f3Smrg if (usage & PIPE_BIND_VERTEX_BUFFER) { 23387ec681f3Smrg retval |= si_is_vertex_format_supported(screen, format, PIPE_BIND_VERTEX_BUFFER); 23397ec681f3Smrg } 23407ec681f3Smrg 23417ec681f3Smrg if (usage & PIPE_BIND_INDEX_BUFFER) { 23427ec681f3Smrg if (format == PIPE_FORMAT_R8_UINT || 23437ec681f3Smrg format == PIPE_FORMAT_R16_UINT || 23447ec681f3Smrg format == PIPE_FORMAT_R32_UINT) 23457ec681f3Smrg retval |= PIPE_BIND_INDEX_BUFFER; 23467ec681f3Smrg } 23477ec681f3Smrg 23487ec681f3Smrg if ((usage & PIPE_BIND_LINEAR) && !util_format_is_compressed(format) && 23497ec681f3Smrg !(usage & PIPE_BIND_DEPTH_STENCIL)) 23507ec681f3Smrg retval |= PIPE_BIND_LINEAR; 23517ec681f3Smrg 23527ec681f3Smrg return retval == usage; 2353af69d88dSmrg} 2354af69d88dSmrg 235501e04c3fSmrg/* 235601e04c3fSmrg * framebuffer handling 235701e04c3fSmrg */ 235801e04c3fSmrg 23597ec681f3Smrgstatic void si_choose_spi_color_formats(struct si_surface *surf, unsigned format, unsigned swap, 23607ec681f3Smrg unsigned ntype, bool is_depth) 2361af69d88dSmrg{ 23627ec681f3Smrg struct ac_spi_color_formats formats = {}; 23637ec681f3Smrg 23647ec681f3Smrg ac_choose_spi_color_formats(format, swap, ntype, is_depth, true, &formats); 23657ec681f3Smrg 23667ec681f3Smrg surf->spi_shader_col_format = formats.normal; 23677ec681f3Smrg surf->spi_shader_col_format_alpha = formats.alpha; 23687ec681f3Smrg surf->spi_shader_col_format_blend = formats.blend; 23697ec681f3Smrg surf->spi_shader_col_format_blend_alpha = formats.blend_alpha; 237001e04c3fSmrg} 2371af69d88dSmrg 23727ec681f3Smrgstatic void si_initialize_color_surface(struct si_context *sctx, struct si_surface *surf) 2373af69d88dSmrg{ 23747ec681f3Smrg struct si_texture *tex = (struct si_texture *)surf->base.texture; 23757ec681f3Smrg unsigned color_info, color_attrib; 23767ec681f3Smrg unsigned format, swap, ntype, endian; 23777ec681f3Smrg const struct util_format_description *desc; 23787ec681f3Smrg int firstchan; 23797ec681f3Smrg unsigned blend_clamp = 0, blend_bypass = 0; 23807ec681f3Smrg 23817ec681f3Smrg desc = util_format_description(surf->base.format); 23827ec681f3Smrg for (firstchan = 0; firstchan < 4; firstchan++) { 23837ec681f3Smrg if (desc->channel[firstchan].type != UTIL_FORMAT_TYPE_VOID) { 23847ec681f3Smrg break; 23857ec681f3Smrg } 23867ec681f3Smrg } 23877ec681f3Smrg if (firstchan == 4 || desc->channel[firstchan].type == UTIL_FORMAT_TYPE_FLOAT) { 23887ec681f3Smrg ntype = V_028C70_NUMBER_FLOAT; 23897ec681f3Smrg } else { 23907ec681f3Smrg ntype = V_028C70_NUMBER_UNORM; 23917ec681f3Smrg if (desc->colorspace == UTIL_FORMAT_COLORSPACE_SRGB) 23927ec681f3Smrg ntype = V_028C70_NUMBER_SRGB; 23937ec681f3Smrg else if (desc->channel[firstchan].type == UTIL_FORMAT_TYPE_SIGNED) { 23947ec681f3Smrg if (desc->channel[firstchan].pure_integer) { 23957ec681f3Smrg ntype = V_028C70_NUMBER_SINT; 23967ec681f3Smrg } else { 23977ec681f3Smrg assert(desc->channel[firstchan].normalized); 23987ec681f3Smrg ntype = V_028C70_NUMBER_SNORM; 23997ec681f3Smrg } 24007ec681f3Smrg } else if (desc->channel[firstchan].type == UTIL_FORMAT_TYPE_UNSIGNED) { 24017ec681f3Smrg if (desc->channel[firstchan].pure_integer) { 24027ec681f3Smrg ntype = V_028C70_NUMBER_UINT; 24037ec681f3Smrg } else { 24047ec681f3Smrg assert(desc->channel[firstchan].normalized); 24057ec681f3Smrg ntype = V_028C70_NUMBER_UNORM; 24067ec681f3Smrg } 24077ec681f3Smrg } 24087ec681f3Smrg } 24097ec681f3Smrg 24107ec681f3Smrg format = si_translate_colorformat(sctx->chip_class, surf->base.format); 24117ec681f3Smrg if (format == V_028C70_COLOR_INVALID) { 24127ec681f3Smrg PRINT_ERR("Invalid CB format: %d, disabling CB.\n", surf->base.format); 24137ec681f3Smrg } 24147ec681f3Smrg assert(format != V_028C70_COLOR_INVALID); 24157ec681f3Smrg swap = si_translate_colorswap(surf->base.format, false); 24167ec681f3Smrg endian = si_colorformat_endian_swap(format); 24177ec681f3Smrg 24187ec681f3Smrg /* blend clamp should be set for all NORM/SRGB types */ 24197ec681f3Smrg if (ntype == V_028C70_NUMBER_UNORM || ntype == V_028C70_NUMBER_SNORM || 24207ec681f3Smrg ntype == V_028C70_NUMBER_SRGB) 24217ec681f3Smrg blend_clamp = 1; 24227ec681f3Smrg 24237ec681f3Smrg /* set blend bypass according to docs if SINT/UINT or 24247ec681f3Smrg 8/24 COLOR variants */ 24257ec681f3Smrg if (ntype == V_028C70_NUMBER_UINT || ntype == V_028C70_NUMBER_SINT || 24267ec681f3Smrg format == V_028C70_COLOR_8_24 || format == V_028C70_COLOR_24_8 || 24277ec681f3Smrg format == V_028C70_COLOR_X24_8_32_FLOAT) { 24287ec681f3Smrg blend_clamp = 0; 24297ec681f3Smrg blend_bypass = 1; 24307ec681f3Smrg } 24317ec681f3Smrg 24327ec681f3Smrg if (ntype == V_028C70_NUMBER_UINT || ntype == V_028C70_NUMBER_SINT) { 24337ec681f3Smrg if (format == V_028C70_COLOR_8 || format == V_028C70_COLOR_8_8 || 24347ec681f3Smrg format == V_028C70_COLOR_8_8_8_8) 24357ec681f3Smrg surf->color_is_int8 = true; 24367ec681f3Smrg else if (format == V_028C70_COLOR_10_10_10_2 || format == V_028C70_COLOR_2_10_10_10) 24377ec681f3Smrg surf->color_is_int10 = true; 24387ec681f3Smrg } 24397ec681f3Smrg 24407ec681f3Smrg color_info = 24417ec681f3Smrg S_028C70_FORMAT(format) | S_028C70_COMP_SWAP(swap) | S_028C70_BLEND_CLAMP(blend_clamp) | 24427ec681f3Smrg S_028C70_BLEND_BYPASS(blend_bypass) | S_028C70_SIMPLE_FLOAT(1) | 24437ec681f3Smrg S_028C70_ROUND_MODE(ntype != V_028C70_NUMBER_UNORM && ntype != V_028C70_NUMBER_SNORM && 24447ec681f3Smrg ntype != V_028C70_NUMBER_SRGB && format != V_028C70_COLOR_8_24 && 24457ec681f3Smrg format != V_028C70_COLOR_24_8) | 24467ec681f3Smrg S_028C70_NUMBER_TYPE(ntype) | S_028C70_ENDIAN(endian); 24477ec681f3Smrg 24487ec681f3Smrg /* Intensity is implemented as Red, so treat it that way. */ 24497ec681f3Smrg color_attrib = S_028C74_FORCE_DST_ALPHA_1(desc->swizzle[3] == PIPE_SWIZZLE_1 || 24507ec681f3Smrg util_format_is_intensity(surf->base.format)); 24517ec681f3Smrg 24527ec681f3Smrg if (tex->buffer.b.b.nr_samples > 1) { 24537ec681f3Smrg unsigned log_samples = util_logbase2(tex->buffer.b.b.nr_samples); 24547ec681f3Smrg unsigned log_fragments = util_logbase2(tex->buffer.b.b.nr_storage_samples); 24557ec681f3Smrg 24567ec681f3Smrg color_attrib |= S_028C74_NUM_SAMPLES(log_samples) | S_028C74_NUM_FRAGMENTS(log_fragments); 24577ec681f3Smrg 24587ec681f3Smrg if (tex->surface.fmask_offset) { 24597ec681f3Smrg color_info |= S_028C70_COMPRESSION(1); 24607ec681f3Smrg unsigned fmask_bankh = util_logbase2(tex->surface.u.legacy.color.fmask.bankh); 24617ec681f3Smrg 24627ec681f3Smrg if (sctx->chip_class == GFX6) { 24637ec681f3Smrg /* due to a hw bug, FMASK_BANK_HEIGHT must be set on GFX6 too */ 24647ec681f3Smrg color_attrib |= S_028C74_FMASK_BANK_HEIGHT(fmask_bankh); 24657ec681f3Smrg } 24667ec681f3Smrg } 24677ec681f3Smrg } 24687ec681f3Smrg 24697ec681f3Smrg /* amdvlk: [min-compressed-block-size] should be set to 32 for dGPU and 24707ec681f3Smrg * 64 for APU because all of our APUs to date use DIMMs which have 24717ec681f3Smrg * a request granularity size of 64B while all other chips have a 24727ec681f3Smrg * 32B request size */ 24737ec681f3Smrg unsigned min_compressed_block_size = V_028C78_MIN_BLOCK_SIZE_32B; 24747ec681f3Smrg if (!sctx->screen->info.has_dedicated_vram) 24757ec681f3Smrg min_compressed_block_size = V_028C78_MIN_BLOCK_SIZE_64B; 24767ec681f3Smrg 24777ec681f3Smrg if (sctx->chip_class >= GFX10) { 24787ec681f3Smrg surf->cb_dcc_control = S_028C78_MAX_UNCOMPRESSED_BLOCK_SIZE(V_028C78_MAX_BLOCK_SIZE_256B) | 24797ec681f3Smrg S_028C78_MAX_COMPRESSED_BLOCK_SIZE(tex->surface.u.gfx9.color.dcc.max_compressed_block_size) | 24807ec681f3Smrg S_028C78_MIN_COMPRESSED_BLOCK_SIZE(min_compressed_block_size) | 24817ec681f3Smrg S_028C78_INDEPENDENT_64B_BLOCKS(tex->surface.u.gfx9.color.dcc.independent_64B_blocks) | 24827ec681f3Smrg S_028C78_INDEPENDENT_128B_BLOCKS(tex->surface.u.gfx9.color.dcc.independent_128B_blocks); 24837ec681f3Smrg } else if (sctx->chip_class >= GFX8) { 24847ec681f3Smrg unsigned max_uncompressed_block_size = V_028C78_MAX_BLOCK_SIZE_256B; 24857ec681f3Smrg 24867ec681f3Smrg if (tex->buffer.b.b.nr_storage_samples > 1) { 24877ec681f3Smrg if (tex->surface.bpe == 1) 24887ec681f3Smrg max_uncompressed_block_size = V_028C78_MAX_BLOCK_SIZE_64B; 24897ec681f3Smrg else if (tex->surface.bpe == 2) 24907ec681f3Smrg max_uncompressed_block_size = V_028C78_MAX_BLOCK_SIZE_128B; 24917ec681f3Smrg } 24927ec681f3Smrg 24937ec681f3Smrg surf->cb_dcc_control = S_028C78_MAX_UNCOMPRESSED_BLOCK_SIZE(max_uncompressed_block_size) | 24947ec681f3Smrg S_028C78_MIN_COMPRESSED_BLOCK_SIZE(min_compressed_block_size) | 24957ec681f3Smrg S_028C78_INDEPENDENT_64B_BLOCKS(1); 24967ec681f3Smrg } 24977ec681f3Smrg 24987ec681f3Smrg /* This must be set for fast clear to work without FMASK. */ 24997ec681f3Smrg if (!tex->surface.fmask_size && sctx->chip_class == GFX6) { 25007ec681f3Smrg unsigned bankh = util_logbase2(tex->surface.u.legacy.bankh); 25017ec681f3Smrg color_attrib |= S_028C74_FMASK_BANK_HEIGHT(bankh); 25027ec681f3Smrg } 25037ec681f3Smrg 25047ec681f3Smrg /* GFX10 field has the same base shift as the GFX6 field */ 25057ec681f3Smrg unsigned color_view = S_028C6C_SLICE_START(surf->base.u.tex.first_layer) | 25067ec681f3Smrg S_028C6C_SLICE_MAX_GFX10(surf->base.u.tex.last_layer); 25077ec681f3Smrg unsigned mip0_depth = util_max_layer(&tex->buffer.b.b, 0); 25087ec681f3Smrg 25097ec681f3Smrg if (sctx->chip_class >= GFX10) { 25107ec681f3Smrg color_view |= S_028C6C_MIP_LEVEL_GFX10(surf->base.u.tex.level); 25117ec681f3Smrg 25127ec681f3Smrg surf->cb_color_attrib3 = S_028EE0_MIP0_DEPTH(mip0_depth) | 25137ec681f3Smrg S_028EE0_RESOURCE_TYPE(tex->surface.u.gfx9.resource_type) | 25147ec681f3Smrg S_028EE0_RESOURCE_LEVEL(1); 25157ec681f3Smrg } else if (sctx->chip_class == GFX9) { 25167ec681f3Smrg color_view |= S_028C6C_MIP_LEVEL_GFX9(surf->base.u.tex.level); 25177ec681f3Smrg color_attrib |= S_028C74_MIP0_DEPTH(mip0_depth) | 25187ec681f3Smrg S_028C74_RESOURCE_TYPE(tex->surface.u.gfx9.resource_type); 25197ec681f3Smrg } 25207ec681f3Smrg 25217ec681f3Smrg if (sctx->chip_class >= GFX9) { 25227ec681f3Smrg surf->cb_color_attrib2 = S_028C68_MIP0_WIDTH(surf->width0 - 1) | 25237ec681f3Smrg S_028C68_MIP0_HEIGHT(surf->height0 - 1) | 25247ec681f3Smrg S_028C68_MAX_MIP(tex->buffer.b.b.last_level); 25257ec681f3Smrg } 25267ec681f3Smrg 25277ec681f3Smrg surf->cb_color_view = color_view; 25287ec681f3Smrg surf->cb_color_info = color_info; 25297ec681f3Smrg surf->cb_color_attrib = color_attrib; 25307ec681f3Smrg 25317ec681f3Smrg /* Determine pixel shader export format */ 25327ec681f3Smrg si_choose_spi_color_formats(surf, format, swap, ntype, tex->is_depth); 25337ec681f3Smrg 25347ec681f3Smrg surf->color_initialized = true; 2535af69d88dSmrg} 2536af69d88dSmrg 25377ec681f3Smrgstatic void si_init_depth_surface(struct si_context *sctx, struct si_surface *surf) 2538af69d88dSmrg{ 25397ec681f3Smrg struct si_texture *tex = (struct si_texture *)surf->base.texture; 25407ec681f3Smrg unsigned level = surf->base.u.tex.level; 25417ec681f3Smrg unsigned format, stencil_format; 25427ec681f3Smrg uint32_t z_info, s_info; 25437ec681f3Smrg 25447ec681f3Smrg format = si_translate_dbformat(tex->db_render_format); 25457ec681f3Smrg stencil_format = tex->surface.has_stencil ? V_028044_STENCIL_8 : V_028044_STENCIL_INVALID; 25467ec681f3Smrg 25477ec681f3Smrg assert(format != V_028040_Z_INVALID); 25487ec681f3Smrg if (format == V_028040_Z_INVALID) 25497ec681f3Smrg PRINT_ERR("Invalid DB format: %d, disabling DB.\n", tex->buffer.b.b.format); 25507ec681f3Smrg 25517ec681f3Smrg surf->db_depth_view = S_028008_SLICE_START(surf->base.u.tex.first_layer) | 25527ec681f3Smrg S_028008_SLICE_MAX(surf->base.u.tex.last_layer); 25537ec681f3Smrg surf->db_htile_data_base = 0; 25547ec681f3Smrg surf->db_htile_surface = 0; 25557ec681f3Smrg 25567ec681f3Smrg if (sctx->chip_class >= GFX10) { 25577ec681f3Smrg surf->db_depth_view |= S_028008_SLICE_START_HI(surf->base.u.tex.first_layer >> 11) | 25587ec681f3Smrg S_028008_SLICE_MAX_HI(surf->base.u.tex.last_layer >> 11); 25597ec681f3Smrg } 25607ec681f3Smrg 25617ec681f3Smrg if (sctx->chip_class >= GFX9) { 25627ec681f3Smrg assert(tex->surface.u.gfx9.surf_offset == 0); 25637ec681f3Smrg surf->db_depth_base = tex->buffer.gpu_address >> 8; 25647ec681f3Smrg surf->db_stencil_base = (tex->buffer.gpu_address + tex->surface.u.gfx9.zs.stencil_offset) >> 8; 25657ec681f3Smrg z_info = S_028038_FORMAT(format) | 25667ec681f3Smrg S_028038_NUM_SAMPLES(util_logbase2(tex->buffer.b.b.nr_samples)) | 25677ec681f3Smrg S_028038_SW_MODE(tex->surface.u.gfx9.swizzle_mode) | 25687ec681f3Smrg S_028038_MAXMIP(tex->buffer.b.b.last_level); 25697ec681f3Smrg s_info = S_02803C_FORMAT(stencil_format) | 25707ec681f3Smrg S_02803C_SW_MODE(tex->surface.u.gfx9.zs.stencil_swizzle_mode); 25717ec681f3Smrg 25727ec681f3Smrg if (sctx->chip_class == GFX9) { 25737ec681f3Smrg surf->db_z_info2 = S_028068_EPITCH(tex->surface.u.gfx9.epitch); 25747ec681f3Smrg surf->db_stencil_info2 = S_02806C_EPITCH(tex->surface.u.gfx9.zs.stencil_epitch); 25757ec681f3Smrg } 25767ec681f3Smrg surf->db_depth_view |= S_028008_MIPID(level); 25777ec681f3Smrg surf->db_depth_size = 25787ec681f3Smrg S_02801C_X_MAX(tex->buffer.b.b.width0 - 1) | S_02801C_Y_MAX(tex->buffer.b.b.height0 - 1); 25797ec681f3Smrg 25807ec681f3Smrg if (si_htile_enabled(tex, level, PIPE_MASK_ZS)) { 25817ec681f3Smrg z_info |= S_028038_TILE_SURFACE_ENABLE(1) | S_028038_ALLOW_EXPCLEAR(1); 25827ec681f3Smrg s_info |= S_02803C_TILE_STENCIL_DISABLE(tex->htile_stencil_disabled); 25837ec681f3Smrg 25847ec681f3Smrg if (tex->surface.has_stencil && !tex->htile_stencil_disabled) { 25857ec681f3Smrg /* Stencil buffer workaround ported from the GFX6-GFX8 code. 25867ec681f3Smrg * See that for explanation. 25877ec681f3Smrg */ 25887ec681f3Smrg s_info |= S_02803C_ALLOW_EXPCLEAR(tex->buffer.b.b.nr_samples <= 1); 25897ec681f3Smrg } 25907ec681f3Smrg 25917ec681f3Smrg surf->db_htile_data_base = (tex->buffer.gpu_address + tex->surface.meta_offset) >> 8; 25927ec681f3Smrg surf->db_htile_surface = 25937ec681f3Smrg S_028ABC_FULL_CACHE(1) | S_028ABC_PIPE_ALIGNED(1); 25947ec681f3Smrg if (sctx->chip_class == GFX9) { 25957ec681f3Smrg surf->db_htile_surface |= S_028ABC_RB_ALIGNED(1); 25967ec681f3Smrg } 25977ec681f3Smrg } 25987ec681f3Smrg } else { 25997ec681f3Smrg /* GFX6-GFX8 */ 26007ec681f3Smrg struct legacy_surf_level *levelinfo = &tex->surface.u.legacy.level[level]; 26017ec681f3Smrg 26027ec681f3Smrg assert(levelinfo->nblk_x % 8 == 0 && levelinfo->nblk_y % 8 == 0); 26037ec681f3Smrg 26047ec681f3Smrg surf->db_depth_base = 26057ec681f3Smrg (tex->buffer.gpu_address >> 8) + tex->surface.u.legacy.level[level].offset_256B; 26067ec681f3Smrg surf->db_stencil_base = 26077ec681f3Smrg (tex->buffer.gpu_address >> 8) + tex->surface.u.legacy.zs.stencil_level[level].offset_256B; 26087ec681f3Smrg 26097ec681f3Smrg z_info = 26107ec681f3Smrg S_028040_FORMAT(format) | S_028040_NUM_SAMPLES(util_logbase2(tex->buffer.b.b.nr_samples)); 26117ec681f3Smrg s_info = S_028044_FORMAT(stencil_format); 26127ec681f3Smrg surf->db_depth_info = 0; 26137ec681f3Smrg 26147ec681f3Smrg if (sctx->chip_class >= GFX7) { 26157ec681f3Smrg struct radeon_info *info = &sctx->screen->info; 26167ec681f3Smrg unsigned index = tex->surface.u.legacy.tiling_index[level]; 26177ec681f3Smrg unsigned stencil_index = tex->surface.u.legacy.zs.stencil_tiling_index[level]; 26187ec681f3Smrg unsigned macro_index = tex->surface.u.legacy.macro_tile_index; 26197ec681f3Smrg unsigned tile_mode = info->si_tile_mode_array[index]; 26207ec681f3Smrg unsigned stencil_tile_mode = info->si_tile_mode_array[stencil_index]; 26217ec681f3Smrg unsigned macro_mode = info->cik_macrotile_mode_array[macro_index]; 26227ec681f3Smrg 26237ec681f3Smrg surf->db_depth_info |= S_02803C_ARRAY_MODE(G_009910_ARRAY_MODE(tile_mode)) | 26247ec681f3Smrg S_02803C_PIPE_CONFIG(G_009910_PIPE_CONFIG(tile_mode)) | 26257ec681f3Smrg S_02803C_BANK_WIDTH(G_009990_BANK_WIDTH(macro_mode)) | 26267ec681f3Smrg S_02803C_BANK_HEIGHT(G_009990_BANK_HEIGHT(macro_mode)) | 26277ec681f3Smrg S_02803C_MACRO_TILE_ASPECT(G_009990_MACRO_TILE_ASPECT(macro_mode)) | 26287ec681f3Smrg S_02803C_NUM_BANKS(G_009990_NUM_BANKS(macro_mode)); 26297ec681f3Smrg z_info |= S_028040_TILE_SPLIT(G_009910_TILE_SPLIT(tile_mode)); 26307ec681f3Smrg s_info |= S_028044_TILE_SPLIT(G_009910_TILE_SPLIT(stencil_tile_mode)); 26317ec681f3Smrg } else { 26327ec681f3Smrg unsigned tile_mode_index = si_tile_mode_index(tex, level, false); 26337ec681f3Smrg z_info |= S_028040_TILE_MODE_INDEX(tile_mode_index); 26347ec681f3Smrg tile_mode_index = si_tile_mode_index(tex, level, true); 26357ec681f3Smrg s_info |= S_028044_TILE_MODE_INDEX(tile_mode_index); 26367ec681f3Smrg } 26377ec681f3Smrg 26387ec681f3Smrg surf->db_depth_size = S_028058_PITCH_TILE_MAX((levelinfo->nblk_x / 8) - 1) | 26397ec681f3Smrg S_028058_HEIGHT_TILE_MAX((levelinfo->nblk_y / 8) - 1); 26407ec681f3Smrg surf->db_depth_slice = 26417ec681f3Smrg S_02805C_SLICE_TILE_MAX((levelinfo->nblk_x * levelinfo->nblk_y) / 64 - 1); 26427ec681f3Smrg 26437ec681f3Smrg if (si_htile_enabled(tex, level, PIPE_MASK_ZS)) { 26447ec681f3Smrg z_info |= S_028040_TILE_SURFACE_ENABLE(1) | S_028040_ALLOW_EXPCLEAR(1); 26457ec681f3Smrg s_info |= S_028044_TILE_STENCIL_DISABLE(tex->htile_stencil_disabled); 26467ec681f3Smrg 26477ec681f3Smrg if (tex->surface.has_stencil) { 26487ec681f3Smrg /* Workaround: For a not yet understood reason, the 26497ec681f3Smrg * combination of MSAA, fast stencil clear and stencil 26507ec681f3Smrg * decompress messes with subsequent stencil buffer 26517ec681f3Smrg * uses. Problem was reproduced on Verde, Bonaire, 26527ec681f3Smrg * Tonga, and Carrizo. 26537ec681f3Smrg * 26547ec681f3Smrg * Disabling EXPCLEAR works around the problem. 26557ec681f3Smrg * 26567ec681f3Smrg * Check piglit's arb_texture_multisample-stencil-clear 26577ec681f3Smrg * test if you want to try changing this. 26587ec681f3Smrg */ 26597ec681f3Smrg if (tex->buffer.b.b.nr_samples <= 1) 26607ec681f3Smrg s_info |= S_028044_ALLOW_EXPCLEAR(1); 26617ec681f3Smrg } 26627ec681f3Smrg 26637ec681f3Smrg surf->db_htile_data_base = (tex->buffer.gpu_address + tex->surface.meta_offset) >> 8; 26647ec681f3Smrg surf->db_htile_surface = S_028ABC_FULL_CACHE(1); 26657ec681f3Smrg } 26667ec681f3Smrg } 26677ec681f3Smrg 26687ec681f3Smrg surf->db_z_info = z_info; 26697ec681f3Smrg surf->db_stencil_info = s_info; 26707ec681f3Smrg 26717ec681f3Smrg surf->depth_initialized = true; 2672af69d88dSmrg} 2673af69d88dSmrg 267401e04c3fSmrgvoid si_update_fb_dirtiness_after_rendering(struct si_context *sctx) 267501e04c3fSmrg{ 26767ec681f3Smrg if (sctx->decompression_enabled) 26777ec681f3Smrg return; 26787ec681f3Smrg 26797ec681f3Smrg if (sctx->framebuffer.state.zsbuf) { 26807ec681f3Smrg struct pipe_surface *surf = sctx->framebuffer.state.zsbuf; 26817ec681f3Smrg struct si_texture *tex = (struct si_texture *)surf->texture; 26827ec681f3Smrg 26837ec681f3Smrg tex->dirty_level_mask |= 1 << surf->u.tex.level; 26847ec681f3Smrg 26857ec681f3Smrg if (tex->surface.has_stencil) 26867ec681f3Smrg tex->stencil_dirty_level_mask |= 1 << surf->u.tex.level; 26877ec681f3Smrg } 26887ec681f3Smrg 26897ec681f3Smrg unsigned compressed_cb_mask = sctx->framebuffer.compressed_cb_mask; 26907ec681f3Smrg while (compressed_cb_mask) { 26917ec681f3Smrg unsigned i = u_bit_scan(&compressed_cb_mask); 26927ec681f3Smrg struct pipe_surface *surf = sctx->framebuffer.state.cbufs[i]; 26937ec681f3Smrg struct si_texture *tex = (struct si_texture *)surf->texture; 26947ec681f3Smrg 26957ec681f3Smrg if (tex->surface.fmask_offset) { 26967ec681f3Smrg tex->dirty_level_mask |= 1 << surf->u.tex.level; 26977ec681f3Smrg tex->fmask_is_identity = false; 26987ec681f3Smrg } 26997ec681f3Smrg } 270001e04c3fSmrg} 270101e04c3fSmrg 270201e04c3fSmrgstatic void si_dec_framebuffer_counters(const struct pipe_framebuffer_state *state) 270301e04c3fSmrg{ 27047ec681f3Smrg for (int i = 0; i < state->nr_cbufs; ++i) { 27057ec681f3Smrg struct si_surface *surf = NULL; 27067ec681f3Smrg struct si_texture *tex; 27077ec681f3Smrg 27087ec681f3Smrg if (!state->cbufs[i]) 27097ec681f3Smrg continue; 27107ec681f3Smrg surf = (struct si_surface *)state->cbufs[i]; 27117ec681f3Smrg tex = (struct si_texture *)surf->base.texture; 27127ec681f3Smrg 27137ec681f3Smrg p_atomic_dec(&tex->framebuffers_bound); 27147ec681f3Smrg } 27157ec681f3Smrg} 27167ec681f3Smrg 27177ec681f3Smrgvoid si_mark_display_dcc_dirty(struct si_context *sctx, struct si_texture *tex) 27187ec681f3Smrg{ 27197ec681f3Smrg if (!tex->surface.display_dcc_offset || tex->displayable_dcc_dirty) 27207ec681f3Smrg return; 27217ec681f3Smrg 27227ec681f3Smrg if (!(tex->buffer.external_usage & PIPE_HANDLE_USAGE_EXPLICIT_FLUSH)) { 27237ec681f3Smrg struct hash_entry *entry = _mesa_hash_table_search(sctx->dirty_implicit_resources, tex); 27247ec681f3Smrg if (!entry) { 27257ec681f3Smrg struct pipe_resource *dummy = NULL; 27267ec681f3Smrg pipe_resource_reference(&dummy, &tex->buffer.b.b); 27277ec681f3Smrg _mesa_hash_table_insert(sctx->dirty_implicit_resources, tex, tex); 27287ec681f3Smrg } 27297ec681f3Smrg } 27307ec681f3Smrg tex->displayable_dcc_dirty = true; 27317ec681f3Smrg} 273201e04c3fSmrg 27337ec681f3Smrgstatic void si_update_display_dcc_dirty(struct si_context *sctx) 27347ec681f3Smrg{ 27357ec681f3Smrg const struct pipe_framebuffer_state *state = &sctx->framebuffer.state; 273601e04c3fSmrg 27377ec681f3Smrg for (unsigned i = 0; i < state->nr_cbufs; i++) { 27387ec681f3Smrg if (state->cbufs[i]) 27397ec681f3Smrg si_mark_display_dcc_dirty(sctx, (struct si_texture *)state->cbufs[i]->texture); 27407ec681f3Smrg } 274101e04c3fSmrg} 274201e04c3fSmrg 2743af69d88dSmrgstatic void si_set_framebuffer_state(struct pipe_context *ctx, 27447ec681f3Smrg const struct pipe_framebuffer_state *state) 2745af69d88dSmrg{ 27467ec681f3Smrg struct si_context *sctx = (struct si_context *)ctx; 27477ec681f3Smrg struct si_surface *surf = NULL; 27487ec681f3Smrg struct si_texture *tex; 27497ec681f3Smrg bool old_any_dst_linear = sctx->framebuffer.any_dst_linear; 27507ec681f3Smrg unsigned old_nr_samples = sctx->framebuffer.nr_samples; 27517ec681f3Smrg unsigned old_colorbuf_enabled_4bit = sctx->framebuffer.colorbuf_enabled_4bit; 27527ec681f3Smrg bool old_has_zsbuf = !!sctx->framebuffer.state.zsbuf; 27537ec681f3Smrg bool old_has_stencil = 27547ec681f3Smrg old_has_zsbuf && 27557ec681f3Smrg ((struct si_texture *)sctx->framebuffer.state.zsbuf->texture)->surface.has_stencil; 27567ec681f3Smrg bool unbound = false; 27577ec681f3Smrg int i; 27587ec681f3Smrg 27597ec681f3Smrg /* Reject zero-sized framebuffers due to a hw bug on GFX6 that occurs 27607ec681f3Smrg * when PA_SU_HARDWARE_SCREEN_OFFSET != 0 and any_scissor.BR_X/Y <= 0. 27617ec681f3Smrg * We could implement the full workaround here, but it's a useless case. 27627ec681f3Smrg */ 27637ec681f3Smrg if ((!state->width || !state->height) && (state->nr_cbufs || state->zsbuf)) { 27647ec681f3Smrg unreachable("the framebuffer shouldn't have zero area"); 27657ec681f3Smrg return; 27667ec681f3Smrg } 27677ec681f3Smrg 27687ec681f3Smrg si_update_fb_dirtiness_after_rendering(sctx); 27697ec681f3Smrg 27707ec681f3Smrg /* Disable DCC if the formats are incompatible. */ 27717ec681f3Smrg for (i = 0; i < state->nr_cbufs; i++) { 27727ec681f3Smrg if (!state->cbufs[i]) 27737ec681f3Smrg continue; 27747ec681f3Smrg 27757ec681f3Smrg surf = (struct si_surface *)state->cbufs[i]; 27767ec681f3Smrg tex = (struct si_texture *)surf->base.texture; 27777ec681f3Smrg 27787ec681f3Smrg if (!surf->dcc_incompatible) 27797ec681f3Smrg continue; 27807ec681f3Smrg 27817ec681f3Smrg /* Since the DCC decompression calls back into set_framebuffer- 27827ec681f3Smrg * _state, we need to unbind the framebuffer, so that 27837ec681f3Smrg * vi_separate_dcc_stop_query isn't called twice with the same 27847ec681f3Smrg * color buffer. 27857ec681f3Smrg */ 27867ec681f3Smrg if (!unbound) { 27877ec681f3Smrg util_copy_framebuffer_state(&sctx->framebuffer.state, NULL); 27887ec681f3Smrg unbound = true; 27897ec681f3Smrg } 27907ec681f3Smrg 27917ec681f3Smrg if (vi_dcc_enabled(tex, surf->base.u.tex.level)) 27927ec681f3Smrg if (!si_texture_disable_dcc(sctx, tex)) 27937ec681f3Smrg si_decompress_dcc(sctx, tex); 27947ec681f3Smrg 27957ec681f3Smrg surf->dcc_incompatible = false; 27967ec681f3Smrg } 27977ec681f3Smrg 27987ec681f3Smrg /* Only flush TC when changing the framebuffer state, because 27997ec681f3Smrg * the only client not using TC that can change textures is 28007ec681f3Smrg * the framebuffer. 28017ec681f3Smrg * 28027ec681f3Smrg * Wait for compute shaders because of possible transitions: 28037ec681f3Smrg * - FB write -> shader read 28047ec681f3Smrg * - shader write -> FB read 28057ec681f3Smrg * 28067ec681f3Smrg * DB caches are flushed on demand (using si_decompress_textures). 28077ec681f3Smrg * 28087ec681f3Smrg * When MSAA is enabled, CB and TC caches are flushed on demand 28097ec681f3Smrg * (after FMASK decompression). Shader write -> FB read transitions 28107ec681f3Smrg * cannot happen for MSAA textures, because MSAA shader images are 28117ec681f3Smrg * not supported. 28127ec681f3Smrg * 28137ec681f3Smrg * Only flush and wait for CB if there is actually a bound color buffer. 28147ec681f3Smrg */ 28157ec681f3Smrg if (sctx->framebuffer.uncompressed_cb_mask) { 28167ec681f3Smrg si_make_CB_shader_coherent(sctx, sctx->framebuffer.nr_samples, 28177ec681f3Smrg sctx->framebuffer.CB_has_shader_readable_metadata, 28187ec681f3Smrg sctx->framebuffer.all_DCC_pipe_aligned); 28197ec681f3Smrg } 28207ec681f3Smrg 28217ec681f3Smrg sctx->flags |= SI_CONTEXT_CS_PARTIAL_FLUSH; 28227ec681f3Smrg 28237ec681f3Smrg /* u_blitter doesn't invoke depth decompression when it does multiple 28247ec681f3Smrg * blits in a row, but the only case when it matters for DB is when 28257ec681f3Smrg * doing generate_mipmap. So here we flush DB manually between 28267ec681f3Smrg * individual generate_mipmap blits. 28277ec681f3Smrg * Note that lower mipmap levels aren't compressed. 28287ec681f3Smrg */ 28297ec681f3Smrg if (sctx->generate_mipmap_for_depth) { 28307ec681f3Smrg si_make_DB_shader_coherent(sctx, 1, false, sctx->framebuffer.DB_has_shader_readable_metadata); 28317ec681f3Smrg } else if (sctx->chip_class == GFX9) { 28327ec681f3Smrg /* It appears that DB metadata "leaks" in a sequence of: 28337ec681f3Smrg * - depth clear 28347ec681f3Smrg * - DCC decompress for shader image writes (with DB disabled) 28357ec681f3Smrg * - render with DEPTH_BEFORE_SHADER=1 28367ec681f3Smrg * Flushing DB metadata works around the problem. 28377ec681f3Smrg */ 28387ec681f3Smrg sctx->flags |= SI_CONTEXT_FLUSH_AND_INV_DB_META; 28397ec681f3Smrg } 28407ec681f3Smrg 28417ec681f3Smrg /* Take the maximum of the old and new count. If the new count is lower, 28427ec681f3Smrg * dirtying is needed to disable the unbound colorbuffers. 28437ec681f3Smrg */ 28447ec681f3Smrg sctx->framebuffer.dirty_cbufs |= 28457ec681f3Smrg (1 << MAX2(sctx->framebuffer.state.nr_cbufs, state->nr_cbufs)) - 1; 28467ec681f3Smrg sctx->framebuffer.dirty_zsbuf |= sctx->framebuffer.state.zsbuf != state->zsbuf; 28477ec681f3Smrg 28487ec681f3Smrg si_dec_framebuffer_counters(&sctx->framebuffer.state); 28497ec681f3Smrg util_copy_framebuffer_state(&sctx->framebuffer.state, state); 28507ec681f3Smrg 28517ec681f3Smrg sctx->framebuffer.colorbuf_enabled_4bit = 0; 28527ec681f3Smrg sctx->framebuffer.spi_shader_col_format = 0; 28537ec681f3Smrg sctx->framebuffer.spi_shader_col_format_alpha = 0; 28547ec681f3Smrg sctx->framebuffer.spi_shader_col_format_blend = 0; 28557ec681f3Smrg sctx->framebuffer.spi_shader_col_format_blend_alpha = 0; 28567ec681f3Smrg sctx->framebuffer.color_is_int8 = 0; 28577ec681f3Smrg sctx->framebuffer.color_is_int10 = 0; 28587ec681f3Smrg 28597ec681f3Smrg sctx->framebuffer.compressed_cb_mask = 0; 28607ec681f3Smrg sctx->framebuffer.uncompressed_cb_mask = 0; 28617ec681f3Smrg sctx->framebuffer.nr_samples = util_framebuffer_get_num_samples(state); 28627ec681f3Smrg sctx->framebuffer.nr_color_samples = sctx->framebuffer.nr_samples; 28637ec681f3Smrg sctx->framebuffer.log_samples = util_logbase2(sctx->framebuffer.nr_samples); 28647ec681f3Smrg sctx->framebuffer.any_dst_linear = false; 28657ec681f3Smrg sctx->framebuffer.CB_has_shader_readable_metadata = false; 28667ec681f3Smrg sctx->framebuffer.DB_has_shader_readable_metadata = false; 28677ec681f3Smrg sctx->framebuffer.all_DCC_pipe_aligned = true; 28687ec681f3Smrg sctx->framebuffer.has_dcc_msaa = false; 28697ec681f3Smrg sctx->framebuffer.min_bytes_per_pixel = 0; 28707ec681f3Smrg 28717ec681f3Smrg for (i = 0; i < state->nr_cbufs; i++) { 28727ec681f3Smrg if (!state->cbufs[i]) 28737ec681f3Smrg continue; 28747ec681f3Smrg 28757ec681f3Smrg surf = (struct si_surface *)state->cbufs[i]; 28767ec681f3Smrg tex = (struct si_texture *)surf->base.texture; 28777ec681f3Smrg 28787ec681f3Smrg if (!surf->color_initialized) { 28797ec681f3Smrg si_initialize_color_surface(sctx, surf); 28807ec681f3Smrg } 28817ec681f3Smrg 28827ec681f3Smrg sctx->framebuffer.colorbuf_enabled_4bit |= 0xf << (i * 4); 28837ec681f3Smrg sctx->framebuffer.spi_shader_col_format |= surf->spi_shader_col_format << (i * 4); 28847ec681f3Smrg sctx->framebuffer.spi_shader_col_format_alpha |= surf->spi_shader_col_format_alpha << (i * 4); 28857ec681f3Smrg sctx->framebuffer.spi_shader_col_format_blend |= surf->spi_shader_col_format_blend << (i * 4); 28867ec681f3Smrg sctx->framebuffer.spi_shader_col_format_blend_alpha |= surf->spi_shader_col_format_blend_alpha 28877ec681f3Smrg << (i * 4); 28887ec681f3Smrg 28897ec681f3Smrg if (surf->color_is_int8) 28907ec681f3Smrg sctx->framebuffer.color_is_int8 |= 1 << i; 28917ec681f3Smrg if (surf->color_is_int10) 28927ec681f3Smrg sctx->framebuffer.color_is_int10 |= 1 << i; 28937ec681f3Smrg 28947ec681f3Smrg if (tex->surface.fmask_offset) 28957ec681f3Smrg sctx->framebuffer.compressed_cb_mask |= 1 << i; 28967ec681f3Smrg else 28977ec681f3Smrg sctx->framebuffer.uncompressed_cb_mask |= 1 << i; 28987ec681f3Smrg 28997ec681f3Smrg /* Don't update nr_color_samples for non-AA buffers. 29007ec681f3Smrg * (e.g. destination of MSAA resolve) 29017ec681f3Smrg */ 29027ec681f3Smrg if (tex->buffer.b.b.nr_samples >= 2 && 29037ec681f3Smrg tex->buffer.b.b.nr_storage_samples < tex->buffer.b.b.nr_samples) { 29047ec681f3Smrg sctx->framebuffer.nr_color_samples = 29057ec681f3Smrg MIN2(sctx->framebuffer.nr_color_samples, tex->buffer.b.b.nr_storage_samples); 29067ec681f3Smrg sctx->framebuffer.nr_color_samples = MAX2(1, sctx->framebuffer.nr_color_samples); 29077ec681f3Smrg } 29087ec681f3Smrg 29097ec681f3Smrg if (tex->surface.is_linear) 29107ec681f3Smrg sctx->framebuffer.any_dst_linear = true; 29117ec681f3Smrg 29127ec681f3Smrg if (vi_dcc_enabled(tex, surf->base.u.tex.level)) { 29137ec681f3Smrg sctx->framebuffer.CB_has_shader_readable_metadata = true; 29147ec681f3Smrg 29157ec681f3Smrg if (sctx->chip_class >= GFX9 && !tex->surface.u.gfx9.color.dcc.pipe_aligned) 29167ec681f3Smrg sctx->framebuffer.all_DCC_pipe_aligned = false; 29177ec681f3Smrg 29187ec681f3Smrg if (tex->buffer.b.b.nr_storage_samples >= 2) 29197ec681f3Smrg sctx->framebuffer.has_dcc_msaa = true; 29207ec681f3Smrg } 29217ec681f3Smrg 29227ec681f3Smrg si_context_add_resource_size(sctx, surf->base.texture); 29237ec681f3Smrg 29247ec681f3Smrg p_atomic_inc(&tex->framebuffers_bound); 29257ec681f3Smrg 29267ec681f3Smrg /* Update the minimum but don't keep 0. */ 29277ec681f3Smrg if (!sctx->framebuffer.min_bytes_per_pixel || 29287ec681f3Smrg tex->surface.bpe < sctx->framebuffer.min_bytes_per_pixel) 29297ec681f3Smrg sctx->framebuffer.min_bytes_per_pixel = tex->surface.bpe; 29307ec681f3Smrg } 29317ec681f3Smrg 29327ec681f3Smrg /* For optimal DCC performance. */ 29337ec681f3Smrg if (sctx->chip_class >= GFX10) 29347ec681f3Smrg sctx->framebuffer.dcc_overwrite_combiner_watermark = 6; 29357ec681f3Smrg else 29367ec681f3Smrg sctx->framebuffer.dcc_overwrite_combiner_watermark = 4; 29377ec681f3Smrg 29387ec681f3Smrg struct si_texture *zstex = NULL; 29397ec681f3Smrg 29407ec681f3Smrg if (state->zsbuf) { 29417ec681f3Smrg surf = (struct si_surface *)state->zsbuf; 29427ec681f3Smrg zstex = (struct si_texture *)surf->base.texture; 29437ec681f3Smrg 29447ec681f3Smrg if (!surf->depth_initialized) { 29457ec681f3Smrg si_init_depth_surface(sctx, surf); 29467ec681f3Smrg } 29477ec681f3Smrg 29487ec681f3Smrg if (vi_tc_compat_htile_enabled(zstex, surf->base.u.tex.level, PIPE_MASK_ZS)) 29497ec681f3Smrg sctx->framebuffer.DB_has_shader_readable_metadata = true; 29507ec681f3Smrg 29517ec681f3Smrg si_context_add_resource_size(sctx, surf->base.texture); 29527ec681f3Smrg 29537ec681f3Smrg /* Update the minimum but don't keep 0. */ 29547ec681f3Smrg if (!sctx->framebuffer.min_bytes_per_pixel || 29557ec681f3Smrg zstex->surface.bpe < sctx->framebuffer.min_bytes_per_pixel) 29567ec681f3Smrg sctx->framebuffer.min_bytes_per_pixel = zstex->surface.bpe; 29577ec681f3Smrg } 29587ec681f3Smrg 29597ec681f3Smrg si_update_ps_colorbuf0_slot(sctx); 29607ec681f3Smrg si_update_poly_offset_state(sctx); 29617ec681f3Smrg si_mark_atom_dirty(sctx, &sctx->atoms.s.cb_render_state); 29627ec681f3Smrg si_mark_atom_dirty(sctx, &sctx->atoms.s.framebuffer); 29637ec681f3Smrg 29647ec681f3Smrg /* NGG cull state uses the sample count. */ 29657ec681f3Smrg if (sctx->screen->use_ngg_culling) 29667ec681f3Smrg si_mark_atom_dirty(sctx, &sctx->atoms.s.ngg_cull_state); 29677ec681f3Smrg 29687ec681f3Smrg if (sctx->screen->dpbb_allowed) 29697ec681f3Smrg si_mark_atom_dirty(sctx, &sctx->atoms.s.dpbb_state); 29707ec681f3Smrg 29717ec681f3Smrg if (sctx->framebuffer.any_dst_linear != old_any_dst_linear) 29727ec681f3Smrg si_mark_atom_dirty(sctx, &sctx->atoms.s.msaa_config); 29737ec681f3Smrg 29747ec681f3Smrg if (sctx->screen->has_out_of_order_rast && 29757ec681f3Smrg (sctx->framebuffer.colorbuf_enabled_4bit != old_colorbuf_enabled_4bit || 29767ec681f3Smrg !!sctx->framebuffer.state.zsbuf != old_has_zsbuf || 29777ec681f3Smrg (zstex && zstex->surface.has_stencil != old_has_stencil))) 29787ec681f3Smrg si_mark_atom_dirty(sctx, &sctx->atoms.s.msaa_config); 29797ec681f3Smrg 29807ec681f3Smrg if (sctx->framebuffer.nr_samples != old_nr_samples) { 29817ec681f3Smrg struct pipe_constant_buffer constbuf = {0}; 29827ec681f3Smrg 29837ec681f3Smrg si_mark_atom_dirty(sctx, &sctx->atoms.s.msaa_config); 29847ec681f3Smrg si_mark_atom_dirty(sctx, &sctx->atoms.s.db_render_state); 29857ec681f3Smrg 29867ec681f3Smrg if (!sctx->sample_pos_buffer) { 29877ec681f3Smrg sctx->sample_pos_buffer = pipe_buffer_create_with_data(&sctx->b, 0, PIPE_USAGE_DEFAULT, 29887ec681f3Smrg sizeof(sctx->sample_positions), 29897ec681f3Smrg &sctx->sample_positions); 29907ec681f3Smrg } 29917ec681f3Smrg constbuf.buffer = sctx->sample_pos_buffer; 29927ec681f3Smrg 29937ec681f3Smrg /* Set sample locations as fragment shader constants. */ 29947ec681f3Smrg switch (sctx->framebuffer.nr_samples) { 29957ec681f3Smrg case 1: 29967ec681f3Smrg constbuf.buffer_offset = 0; 29977ec681f3Smrg break; 29987ec681f3Smrg case 2: 29997ec681f3Smrg constbuf.buffer_offset = 30007ec681f3Smrg (ubyte *)sctx->sample_positions.x2 - (ubyte *)sctx->sample_positions.x1; 30017ec681f3Smrg break; 30027ec681f3Smrg case 4: 30037ec681f3Smrg constbuf.buffer_offset = 30047ec681f3Smrg (ubyte *)sctx->sample_positions.x4 - (ubyte *)sctx->sample_positions.x1; 30057ec681f3Smrg break; 30067ec681f3Smrg case 8: 30077ec681f3Smrg constbuf.buffer_offset = 30087ec681f3Smrg (ubyte *)sctx->sample_positions.x8 - (ubyte *)sctx->sample_positions.x1; 30097ec681f3Smrg break; 30107ec681f3Smrg case 16: 30117ec681f3Smrg constbuf.buffer_offset = 30127ec681f3Smrg (ubyte *)sctx->sample_positions.x16 - (ubyte *)sctx->sample_positions.x1; 30137ec681f3Smrg break; 30147ec681f3Smrg default: 30157ec681f3Smrg PRINT_ERR("Requested an invalid number of samples %i.\n", sctx->framebuffer.nr_samples); 30167ec681f3Smrg assert(0); 30177ec681f3Smrg } 30187ec681f3Smrg constbuf.buffer_size = sctx->framebuffer.nr_samples * 2 * 4; 30197ec681f3Smrg si_set_internal_const_buffer(sctx, SI_PS_CONST_SAMPLE_POSITIONS, &constbuf); 30207ec681f3Smrg 30217ec681f3Smrg si_mark_atom_dirty(sctx, &sctx->atoms.s.msaa_sample_locs); 30227ec681f3Smrg } 30237ec681f3Smrg 30247ec681f3Smrg si_ps_key_update_framebuffer(sctx); 30257ec681f3Smrg si_ps_key_update_framebuffer_blend(sctx); 30267ec681f3Smrg si_ps_key_update_framebuffer_rasterizer_sample_shading(sctx); 30277ec681f3Smrg si_update_ps_inputs_read_or_disabled(sctx); 30287ec681f3Smrg sctx->do_update_shaders = true; 30297ec681f3Smrg 30307ec681f3Smrg if (!sctx->decompression_enabled) { 30317ec681f3Smrg /* Prevent textures decompression when the framebuffer state 30327ec681f3Smrg * changes come from the decompression passes themselves. 30337ec681f3Smrg */ 30347ec681f3Smrg sctx->need_check_render_feedback = true; 30357ec681f3Smrg } 3036af69d88dSmrg} 3037af69d88dSmrg 303801e04c3fSmrgstatic void si_emit_framebuffer_state(struct si_context *sctx) 3039af69d88dSmrg{ 30407ec681f3Smrg struct radeon_cmdbuf *cs = &sctx->gfx_cs; 30417ec681f3Smrg struct pipe_framebuffer_state *state = &sctx->framebuffer.state; 30427ec681f3Smrg unsigned i, nr_cbufs = state->nr_cbufs; 30437ec681f3Smrg struct si_texture *tex = NULL; 30447ec681f3Smrg struct si_surface *cb = NULL; 30457ec681f3Smrg unsigned cb_color_info = 0; 30467ec681f3Smrg 30477ec681f3Smrg radeon_begin(cs); 30487ec681f3Smrg 30497ec681f3Smrg /* Colorbuffers. */ 30507ec681f3Smrg for (i = 0; i < nr_cbufs; i++) { 30517ec681f3Smrg uint64_t cb_color_base, cb_color_fmask, cb_color_cmask, cb_dcc_base; 30527ec681f3Smrg unsigned cb_color_attrib; 30537ec681f3Smrg 30547ec681f3Smrg if (!(sctx->framebuffer.dirty_cbufs & (1 << i))) 30557ec681f3Smrg continue; 30567ec681f3Smrg 30577ec681f3Smrg cb = (struct si_surface *)state->cbufs[i]; 30587ec681f3Smrg if (!cb) { 30597ec681f3Smrg radeon_set_context_reg(R_028C70_CB_COLOR0_INFO + i * 0x3C, 30607ec681f3Smrg S_028C70_FORMAT(V_028C70_COLOR_INVALID)); 30617ec681f3Smrg continue; 30627ec681f3Smrg } 30637ec681f3Smrg 30647ec681f3Smrg tex = (struct si_texture *)cb->base.texture; 30657ec681f3Smrg radeon_add_to_buffer_list( 30667ec681f3Smrg sctx, &sctx->gfx_cs, &tex->buffer, RADEON_USAGE_READWRITE | RADEON_USAGE_NEEDS_IMPLICIT_SYNC, 30677ec681f3Smrg tex->buffer.b.b.nr_samples > 1 ? RADEON_PRIO_COLOR_BUFFER_MSAA : RADEON_PRIO_COLOR_BUFFER); 30687ec681f3Smrg 30697ec681f3Smrg if (tex->cmask_buffer && tex->cmask_buffer != &tex->buffer) { 30707ec681f3Smrg radeon_add_to_buffer_list(sctx, &sctx->gfx_cs, tex->cmask_buffer, 30717ec681f3Smrg RADEON_USAGE_READWRITE | RADEON_USAGE_NEEDS_IMPLICIT_SYNC, 30727ec681f3Smrg RADEON_PRIO_SEPARATE_META); 30737ec681f3Smrg } 30747ec681f3Smrg 30757ec681f3Smrg /* Compute mutable surface parameters. */ 30767ec681f3Smrg cb_color_base = tex->buffer.gpu_address >> 8; 30777ec681f3Smrg cb_color_fmask = 0; 30787ec681f3Smrg cb_color_cmask = tex->cmask_base_address_reg; 30797ec681f3Smrg cb_dcc_base = 0; 30807ec681f3Smrg cb_color_info = cb->cb_color_info | tex->cb_color_info; 30817ec681f3Smrg cb_color_attrib = cb->cb_color_attrib; 30827ec681f3Smrg 30837ec681f3Smrg if (tex->swap_rgb_to_bgr) { 30847ec681f3Smrg /* Swap R and B channels. */ 30857ec681f3Smrg static unsigned rgb_to_bgr[4] = { 30867ec681f3Smrg [V_028C70_SWAP_STD] = V_028C70_SWAP_ALT, 30877ec681f3Smrg [V_028C70_SWAP_ALT] = V_028C70_SWAP_STD, 30887ec681f3Smrg [V_028C70_SWAP_STD_REV] = V_028C70_SWAP_ALT_REV, 30897ec681f3Smrg [V_028C70_SWAP_ALT_REV] = V_028C70_SWAP_STD_REV, 30907ec681f3Smrg }; 30917ec681f3Smrg unsigned swap = rgb_to_bgr[G_028C70_COMP_SWAP(cb_color_info)]; 30927ec681f3Smrg 30937ec681f3Smrg cb_color_info &= C_028C70_COMP_SWAP; 30947ec681f3Smrg cb_color_info |= S_028C70_COMP_SWAP(swap); 30957ec681f3Smrg } 30967ec681f3Smrg 30977ec681f3Smrg if (cb->base.u.tex.level > 0) 30987ec681f3Smrg cb_color_info &= C_028C70_FAST_CLEAR; 30997ec681f3Smrg 31007ec681f3Smrg if (tex->surface.fmask_offset) { 31017ec681f3Smrg cb_color_fmask = (tex->buffer.gpu_address + tex->surface.fmask_offset) >> 8; 31027ec681f3Smrg cb_color_fmask |= tex->surface.fmask_tile_swizzle; 31037ec681f3Smrg } 31047ec681f3Smrg 31057ec681f3Smrg /* Set up DCC. */ 31067ec681f3Smrg if (vi_dcc_enabled(tex, cb->base.u.tex.level)) { 31077ec681f3Smrg bool is_msaa_resolve_dst = state->cbufs[0] && state->cbufs[0]->texture->nr_samples > 1 && 31087ec681f3Smrg state->cbufs[1] == &cb->base && 31097ec681f3Smrg state->cbufs[1]->texture->nr_samples <= 1; 31107ec681f3Smrg 31117ec681f3Smrg if (!is_msaa_resolve_dst) 31127ec681f3Smrg cb_color_info |= S_028C70_DCC_ENABLE(1); 31137ec681f3Smrg 31147ec681f3Smrg cb_dcc_base = (tex->buffer.gpu_address + tex->surface.meta_offset) >> 8; 31157ec681f3Smrg 31167ec681f3Smrg unsigned dcc_tile_swizzle = tex->surface.tile_swizzle; 31177ec681f3Smrg dcc_tile_swizzle &= ((1 << tex->surface.meta_alignment_log2) - 1) >> 8; 31187ec681f3Smrg cb_dcc_base |= dcc_tile_swizzle; 31197ec681f3Smrg } 31207ec681f3Smrg 31217ec681f3Smrg if (sctx->chip_class >= GFX10) { 31227ec681f3Smrg unsigned cb_color_attrib3; 31237ec681f3Smrg 31247ec681f3Smrg /* Set mutable surface parameters. */ 31257ec681f3Smrg cb_color_base += tex->surface.u.gfx9.surf_offset >> 8; 31267ec681f3Smrg cb_color_base |= tex->surface.tile_swizzle; 31277ec681f3Smrg if (!tex->surface.fmask_offset) 31287ec681f3Smrg cb_color_fmask = cb_color_base; 31297ec681f3Smrg if (cb->base.u.tex.level > 0) 31307ec681f3Smrg cb_color_cmask = cb_color_base; 31317ec681f3Smrg 31327ec681f3Smrg cb_color_attrib3 = cb->cb_color_attrib3 | 31337ec681f3Smrg S_028EE0_COLOR_SW_MODE(tex->surface.u.gfx9.swizzle_mode) | 31347ec681f3Smrg S_028EE0_FMASK_SW_MODE(tex->surface.u.gfx9.color.fmask_swizzle_mode) | 31357ec681f3Smrg S_028EE0_CMASK_PIPE_ALIGNED(1) | 31367ec681f3Smrg S_028EE0_DCC_PIPE_ALIGNED(tex->surface.u.gfx9.color.dcc.pipe_aligned); 31377ec681f3Smrg 31387ec681f3Smrg radeon_set_context_reg_seq(R_028C60_CB_COLOR0_BASE + i * 0x3C, 14); 31397ec681f3Smrg radeon_emit(cb_color_base); /* CB_COLOR0_BASE */ 31407ec681f3Smrg radeon_emit(0); /* hole */ 31417ec681f3Smrg radeon_emit(0); /* hole */ 31427ec681f3Smrg radeon_emit(cb->cb_color_view); /* CB_COLOR0_VIEW */ 31437ec681f3Smrg radeon_emit(cb_color_info); /* CB_COLOR0_INFO */ 31447ec681f3Smrg radeon_emit(cb_color_attrib); /* CB_COLOR0_ATTRIB */ 31457ec681f3Smrg radeon_emit(cb->cb_dcc_control); /* CB_COLOR0_DCC_CONTROL */ 31467ec681f3Smrg radeon_emit(cb_color_cmask); /* CB_COLOR0_CMASK */ 31477ec681f3Smrg radeon_emit(0); /* hole */ 31487ec681f3Smrg radeon_emit(cb_color_fmask); /* CB_COLOR0_FMASK */ 31497ec681f3Smrg radeon_emit(0); /* hole */ 31507ec681f3Smrg radeon_emit(tex->color_clear_value[0]); /* CB_COLOR0_CLEAR_WORD0 */ 31517ec681f3Smrg radeon_emit(tex->color_clear_value[1]); /* CB_COLOR0_CLEAR_WORD1 */ 31527ec681f3Smrg radeon_emit(cb_dcc_base); /* CB_COLOR0_DCC_BASE */ 31537ec681f3Smrg 31547ec681f3Smrg radeon_set_context_reg(R_028E40_CB_COLOR0_BASE_EXT + i * 4, cb_color_base >> 32); 31557ec681f3Smrg radeon_set_context_reg(R_028E60_CB_COLOR0_CMASK_BASE_EXT + i * 4, 31567ec681f3Smrg cb_color_cmask >> 32); 31577ec681f3Smrg radeon_set_context_reg(R_028E80_CB_COLOR0_FMASK_BASE_EXT + i * 4, 31587ec681f3Smrg cb_color_fmask >> 32); 31597ec681f3Smrg radeon_set_context_reg(R_028EA0_CB_COLOR0_DCC_BASE_EXT + i * 4, cb_dcc_base >> 32); 31607ec681f3Smrg radeon_set_context_reg(R_028EC0_CB_COLOR0_ATTRIB2 + i * 4, cb->cb_color_attrib2); 31617ec681f3Smrg radeon_set_context_reg(R_028EE0_CB_COLOR0_ATTRIB3 + i * 4, cb_color_attrib3); 31627ec681f3Smrg } else if (sctx->chip_class == GFX9) { 31637ec681f3Smrg struct gfx9_surf_meta_flags meta = { 31647ec681f3Smrg .rb_aligned = 1, 31657ec681f3Smrg .pipe_aligned = 1, 31667ec681f3Smrg }; 31677ec681f3Smrg 31687ec681f3Smrg if (!tex->is_depth && tex->surface.meta_offset) 31697ec681f3Smrg meta = tex->surface.u.gfx9.color.dcc; 31707ec681f3Smrg 31717ec681f3Smrg /* Set mutable surface parameters. */ 31727ec681f3Smrg cb_color_base += tex->surface.u.gfx9.surf_offset >> 8; 31737ec681f3Smrg cb_color_base |= tex->surface.tile_swizzle; 31747ec681f3Smrg if (!tex->surface.fmask_offset) 31757ec681f3Smrg cb_color_fmask = cb_color_base; 31767ec681f3Smrg if (cb->base.u.tex.level > 0) 31777ec681f3Smrg cb_color_cmask = cb_color_base; 31787ec681f3Smrg cb_color_attrib |= S_028C74_COLOR_SW_MODE(tex->surface.u.gfx9.swizzle_mode) | 31797ec681f3Smrg S_028C74_FMASK_SW_MODE(tex->surface.u.gfx9.color.fmask_swizzle_mode) | 31807ec681f3Smrg S_028C74_RB_ALIGNED(meta.rb_aligned) | 31817ec681f3Smrg S_028C74_PIPE_ALIGNED(meta.pipe_aligned); 31827ec681f3Smrg 31837ec681f3Smrg radeon_set_context_reg_seq(R_028C60_CB_COLOR0_BASE + i * 0x3C, 15); 31847ec681f3Smrg radeon_emit(cb_color_base); /* CB_COLOR0_BASE */ 31857ec681f3Smrg radeon_emit(S_028C64_BASE_256B(cb_color_base >> 32)); /* CB_COLOR0_BASE_EXT */ 31867ec681f3Smrg radeon_emit(cb->cb_color_attrib2); /* CB_COLOR0_ATTRIB2 */ 31877ec681f3Smrg radeon_emit(cb->cb_color_view); /* CB_COLOR0_VIEW */ 31887ec681f3Smrg radeon_emit(cb_color_info); /* CB_COLOR0_INFO */ 31897ec681f3Smrg radeon_emit(cb_color_attrib); /* CB_COLOR0_ATTRIB */ 31907ec681f3Smrg radeon_emit(cb->cb_dcc_control); /* CB_COLOR0_DCC_CONTROL */ 31917ec681f3Smrg radeon_emit(cb_color_cmask); /* CB_COLOR0_CMASK */ 31927ec681f3Smrg radeon_emit(S_028C80_BASE_256B(cb_color_cmask >> 32)); /* CB_COLOR0_CMASK_BASE_EXT */ 31937ec681f3Smrg radeon_emit(cb_color_fmask); /* CB_COLOR0_FMASK */ 31947ec681f3Smrg radeon_emit(S_028C88_BASE_256B(cb_color_fmask >> 32)); /* CB_COLOR0_FMASK_BASE_EXT */ 31957ec681f3Smrg radeon_emit(tex->color_clear_value[0]); /* CB_COLOR0_CLEAR_WORD0 */ 31967ec681f3Smrg radeon_emit(tex->color_clear_value[1]); /* CB_COLOR0_CLEAR_WORD1 */ 31977ec681f3Smrg radeon_emit(cb_dcc_base); /* CB_COLOR0_DCC_BASE */ 31987ec681f3Smrg radeon_emit(S_028C98_BASE_256B(cb_dcc_base >> 32)); /* CB_COLOR0_DCC_BASE_EXT */ 31997ec681f3Smrg 32007ec681f3Smrg radeon_set_context_reg(R_0287A0_CB_MRT0_EPITCH + i * 4, 32017ec681f3Smrg S_0287A0_EPITCH(tex->surface.u.gfx9.epitch)); 32027ec681f3Smrg } else { 32037ec681f3Smrg /* Compute mutable surface parameters (GFX6-GFX8). */ 32047ec681f3Smrg const struct legacy_surf_level *level_info = 32057ec681f3Smrg &tex->surface.u.legacy.level[cb->base.u.tex.level]; 32067ec681f3Smrg unsigned pitch_tile_max, slice_tile_max, tile_mode_index; 32077ec681f3Smrg unsigned cb_color_pitch, cb_color_slice, cb_color_fmask_slice; 32087ec681f3Smrg 32097ec681f3Smrg cb_color_base += level_info->offset_256B; 32107ec681f3Smrg /* Only macrotiled modes can set tile swizzle. */ 32117ec681f3Smrg if (level_info->mode == RADEON_SURF_MODE_2D) 32127ec681f3Smrg cb_color_base |= tex->surface.tile_swizzle; 32137ec681f3Smrg 32147ec681f3Smrg if (!tex->surface.fmask_offset) 32157ec681f3Smrg cb_color_fmask = cb_color_base; 32167ec681f3Smrg if (cb->base.u.tex.level > 0) 32177ec681f3Smrg cb_color_cmask = cb_color_base; 32187ec681f3Smrg if (cb_dcc_base) 32197ec681f3Smrg cb_dcc_base += tex->surface.u.legacy.color.dcc_level[cb->base.u.tex.level].dcc_offset >> 8; 32207ec681f3Smrg 32217ec681f3Smrg pitch_tile_max = level_info->nblk_x / 8 - 1; 32227ec681f3Smrg slice_tile_max = level_info->nblk_x * level_info->nblk_y / 64 - 1; 32237ec681f3Smrg tile_mode_index = si_tile_mode_index(tex, cb->base.u.tex.level, false); 32247ec681f3Smrg 32257ec681f3Smrg cb_color_attrib |= S_028C74_TILE_MODE_INDEX(tile_mode_index); 32267ec681f3Smrg cb_color_pitch = S_028C64_TILE_MAX(pitch_tile_max); 32277ec681f3Smrg cb_color_slice = S_028C68_TILE_MAX(slice_tile_max); 32287ec681f3Smrg 32297ec681f3Smrg if (tex->surface.fmask_offset) { 32307ec681f3Smrg if (sctx->chip_class >= GFX7) 32317ec681f3Smrg cb_color_pitch |= 32327ec681f3Smrg S_028C64_FMASK_TILE_MAX(tex->surface.u.legacy.color.fmask.pitch_in_pixels / 8 - 1); 32337ec681f3Smrg cb_color_attrib |= 32347ec681f3Smrg S_028C74_FMASK_TILE_MODE_INDEX(tex->surface.u.legacy.color.fmask.tiling_index); 32357ec681f3Smrg cb_color_fmask_slice = S_028C88_TILE_MAX(tex->surface.u.legacy.color.fmask.slice_tile_max); 32367ec681f3Smrg } else { 32377ec681f3Smrg /* This must be set for fast clear to work without FMASK. */ 32387ec681f3Smrg if (sctx->chip_class >= GFX7) 32397ec681f3Smrg cb_color_pitch |= S_028C64_FMASK_TILE_MAX(pitch_tile_max); 32407ec681f3Smrg cb_color_attrib |= S_028C74_FMASK_TILE_MODE_INDEX(tile_mode_index); 32417ec681f3Smrg cb_color_fmask_slice = S_028C88_TILE_MAX(slice_tile_max); 32427ec681f3Smrg } 32437ec681f3Smrg 32447ec681f3Smrg radeon_set_context_reg_seq(R_028C60_CB_COLOR0_BASE + i * 0x3C, 32457ec681f3Smrg sctx->chip_class >= GFX8 ? 14 : 13); 32467ec681f3Smrg radeon_emit(cb_color_base); /* CB_COLOR0_BASE */ 32477ec681f3Smrg radeon_emit(cb_color_pitch); /* CB_COLOR0_PITCH */ 32487ec681f3Smrg radeon_emit(cb_color_slice); /* CB_COLOR0_SLICE */ 32497ec681f3Smrg radeon_emit(cb->cb_color_view); /* CB_COLOR0_VIEW */ 32507ec681f3Smrg radeon_emit(cb_color_info); /* CB_COLOR0_INFO */ 32517ec681f3Smrg radeon_emit(cb_color_attrib); /* CB_COLOR0_ATTRIB */ 32527ec681f3Smrg radeon_emit(cb->cb_dcc_control); /* CB_COLOR0_DCC_CONTROL */ 32537ec681f3Smrg radeon_emit(cb_color_cmask); /* CB_COLOR0_CMASK */ 32547ec681f3Smrg radeon_emit(tex->surface.u.legacy.color.cmask_slice_tile_max); /* CB_COLOR0_CMASK_SLICE */ 32557ec681f3Smrg radeon_emit(cb_color_fmask); /* CB_COLOR0_FMASK */ 32567ec681f3Smrg radeon_emit(cb_color_fmask_slice); /* CB_COLOR0_FMASK_SLICE */ 32577ec681f3Smrg radeon_emit(tex->color_clear_value[0]); /* CB_COLOR0_CLEAR_WORD0 */ 32587ec681f3Smrg radeon_emit(tex->color_clear_value[1]); /* CB_COLOR0_CLEAR_WORD1 */ 32597ec681f3Smrg 32607ec681f3Smrg if (sctx->chip_class >= GFX8) /* R_028C94_CB_COLOR0_DCC_BASE */ 32617ec681f3Smrg radeon_emit(cb_dcc_base); 32627ec681f3Smrg } 32637ec681f3Smrg } 32647ec681f3Smrg for (; i < 8; i++) 32657ec681f3Smrg if (sctx->framebuffer.dirty_cbufs & (1 << i)) 32667ec681f3Smrg radeon_set_context_reg(R_028C70_CB_COLOR0_INFO + i * 0x3C, 0); 32677ec681f3Smrg 32687ec681f3Smrg /* ZS buffer. */ 32697ec681f3Smrg if (state->zsbuf && sctx->framebuffer.dirty_zsbuf) { 32707ec681f3Smrg struct si_surface *zb = (struct si_surface *)state->zsbuf; 32717ec681f3Smrg struct si_texture *tex = (struct si_texture *)zb->base.texture; 32727ec681f3Smrg unsigned db_z_info = zb->db_z_info; 32737ec681f3Smrg unsigned db_stencil_info = zb->db_stencil_info; 32747ec681f3Smrg unsigned db_htile_surface = zb->db_htile_surface; 32757ec681f3Smrg 32767ec681f3Smrg radeon_add_to_buffer_list(sctx, &sctx->gfx_cs, &tex->buffer, RADEON_USAGE_READWRITE, 32777ec681f3Smrg zb->base.texture->nr_samples > 1 ? RADEON_PRIO_DEPTH_BUFFER_MSAA 32787ec681f3Smrg : RADEON_PRIO_DEPTH_BUFFER); 32797ec681f3Smrg 32807ec681f3Smrg /* Set fields dependent on tc_compatile_htile. */ 32817ec681f3Smrg if (sctx->chip_class >= GFX9 && 32827ec681f3Smrg vi_tc_compat_htile_enabled(tex, zb->base.u.tex.level, PIPE_MASK_ZS)) { 32837ec681f3Smrg unsigned max_zplanes = 4; 32847ec681f3Smrg 32857ec681f3Smrg if (tex->db_render_format == PIPE_FORMAT_Z16_UNORM && tex->buffer.b.b.nr_samples > 1) 32867ec681f3Smrg max_zplanes = 2; 32877ec681f3Smrg 32887ec681f3Smrg db_z_info |= S_028038_DECOMPRESS_ON_N_ZPLANES(max_zplanes + 1); 32897ec681f3Smrg 32907ec681f3Smrg if (sctx->chip_class >= GFX10) { 32917ec681f3Smrg db_z_info |= S_028040_ITERATE_FLUSH(1); 32927ec681f3Smrg db_stencil_info |= S_028044_ITERATE_FLUSH(!tex->htile_stencil_disabled); 32937ec681f3Smrg } else { 32947ec681f3Smrg db_z_info |= S_028038_ITERATE_FLUSH(1); 32957ec681f3Smrg db_stencil_info |= S_02803C_ITERATE_FLUSH(1); 32967ec681f3Smrg } 32977ec681f3Smrg } 32987ec681f3Smrg 32997ec681f3Smrg unsigned level = zb->base.u.tex.level; 33007ec681f3Smrg 33017ec681f3Smrg if (sctx->chip_class >= GFX10) { 33027ec681f3Smrg radeon_set_context_reg(R_028014_DB_HTILE_DATA_BASE, zb->db_htile_data_base); 33037ec681f3Smrg radeon_set_context_reg(R_02801C_DB_DEPTH_SIZE_XY, zb->db_depth_size); 33047ec681f3Smrg 33057ec681f3Smrg radeon_set_context_reg_seq(R_02803C_DB_DEPTH_INFO, 7); 33067ec681f3Smrg radeon_emit(S_02803C_RESOURCE_LEVEL(1)); /* DB_DEPTH_INFO */ 33077ec681f3Smrg radeon_emit(db_z_info | /* DB_Z_INFO */ 33087ec681f3Smrg S_028038_ZRANGE_PRECISION(tex->depth_clear_value[level] != 0)); 33097ec681f3Smrg radeon_emit(db_stencil_info); /* DB_STENCIL_INFO */ 33107ec681f3Smrg radeon_emit(zb->db_depth_base); /* DB_Z_READ_BASE */ 33117ec681f3Smrg radeon_emit(zb->db_stencil_base); /* DB_STENCIL_READ_BASE */ 33127ec681f3Smrg radeon_emit(zb->db_depth_base); /* DB_Z_WRITE_BASE */ 33137ec681f3Smrg radeon_emit(zb->db_stencil_base); /* DB_STENCIL_WRITE_BASE */ 33147ec681f3Smrg 33157ec681f3Smrg radeon_set_context_reg_seq(R_028068_DB_Z_READ_BASE_HI, 5); 33167ec681f3Smrg radeon_emit(zb->db_depth_base >> 32); /* DB_Z_READ_BASE_HI */ 33177ec681f3Smrg radeon_emit(zb->db_stencil_base >> 32); /* DB_STENCIL_READ_BASE_HI */ 33187ec681f3Smrg radeon_emit(zb->db_depth_base >> 32); /* DB_Z_WRITE_BASE_HI */ 33197ec681f3Smrg radeon_emit(zb->db_stencil_base >> 32); /* DB_STENCIL_WRITE_BASE_HI */ 33207ec681f3Smrg radeon_emit(zb->db_htile_data_base >> 32); /* DB_HTILE_DATA_BASE_HI */ 33217ec681f3Smrg } else if (sctx->chip_class == GFX9) { 33227ec681f3Smrg radeon_set_context_reg_seq(R_028014_DB_HTILE_DATA_BASE, 3); 33237ec681f3Smrg radeon_emit(zb->db_htile_data_base); /* DB_HTILE_DATA_BASE */ 33247ec681f3Smrg radeon_emit(S_028018_BASE_HI(zb->db_htile_data_base >> 32)); /* DB_HTILE_DATA_BASE_HI */ 33257ec681f3Smrg radeon_emit(zb->db_depth_size); /* DB_DEPTH_SIZE */ 33267ec681f3Smrg 33277ec681f3Smrg radeon_set_context_reg_seq(R_028038_DB_Z_INFO, 10); 33287ec681f3Smrg radeon_emit(db_z_info | /* DB_Z_INFO */ 33297ec681f3Smrg S_028038_ZRANGE_PRECISION(tex->depth_clear_value[level] != 0)); 33307ec681f3Smrg radeon_emit(db_stencil_info); /* DB_STENCIL_INFO */ 33317ec681f3Smrg radeon_emit(zb->db_depth_base); /* DB_Z_READ_BASE */ 33327ec681f3Smrg radeon_emit(S_028044_BASE_HI(zb->db_depth_base >> 32)); /* DB_Z_READ_BASE_HI */ 33337ec681f3Smrg radeon_emit(zb->db_stencil_base); /* DB_STENCIL_READ_BASE */ 33347ec681f3Smrg radeon_emit(S_02804C_BASE_HI(zb->db_stencil_base >> 32)); /* DB_STENCIL_READ_BASE_HI */ 33357ec681f3Smrg radeon_emit(zb->db_depth_base); /* DB_Z_WRITE_BASE */ 33367ec681f3Smrg radeon_emit(S_028054_BASE_HI(zb->db_depth_base >> 32)); /* DB_Z_WRITE_BASE_HI */ 33377ec681f3Smrg radeon_emit(zb->db_stencil_base); /* DB_STENCIL_WRITE_BASE */ 33387ec681f3Smrg radeon_emit(S_02805C_BASE_HI(zb->db_stencil_base >> 32)); /* DB_STENCIL_WRITE_BASE_HI */ 33397ec681f3Smrg 33407ec681f3Smrg radeon_set_context_reg_seq(R_028068_DB_Z_INFO2, 2); 33417ec681f3Smrg radeon_emit(zb->db_z_info2); /* DB_Z_INFO2 */ 33427ec681f3Smrg radeon_emit(zb->db_stencil_info2); /* DB_STENCIL_INFO2 */ 33437ec681f3Smrg } else { 33447ec681f3Smrg /* GFX6-GFX8 */ 33457ec681f3Smrg /* Set fields dependent on tc_compatile_htile. */ 33467ec681f3Smrg if (si_htile_enabled(tex, zb->base.u.tex.level, PIPE_MASK_ZS)) { 33477ec681f3Smrg if (tex->tc_compatible_htile) { 33487ec681f3Smrg db_htile_surface |= S_028ABC_TC_COMPATIBLE(1); 33497ec681f3Smrg 33507ec681f3Smrg /* 0 = full compression. N = only compress up to N-1 Z planes. */ 33517ec681f3Smrg if (tex->buffer.b.b.nr_samples <= 1) 33527ec681f3Smrg db_z_info |= S_028040_DECOMPRESS_ON_N_ZPLANES(5); 33537ec681f3Smrg else if (tex->buffer.b.b.nr_samples <= 4) 33547ec681f3Smrg db_z_info |= S_028040_DECOMPRESS_ON_N_ZPLANES(3); 33557ec681f3Smrg else 33567ec681f3Smrg db_z_info |= S_028040_DECOMPRESS_ON_N_ZPLANES(2); 33577ec681f3Smrg } 33587ec681f3Smrg } 33597ec681f3Smrg 33607ec681f3Smrg radeon_set_context_reg(R_028014_DB_HTILE_DATA_BASE, zb->db_htile_data_base); 33617ec681f3Smrg 33627ec681f3Smrg radeon_set_context_reg_seq(R_02803C_DB_DEPTH_INFO, 9); 33637ec681f3Smrg radeon_emit(zb->db_depth_info | /* DB_DEPTH_INFO */ 33647ec681f3Smrg S_02803C_ADDR5_SWIZZLE_MASK(!tex->tc_compatible_htile)); 33657ec681f3Smrg radeon_emit(db_z_info | /* DB_Z_INFO */ 33667ec681f3Smrg S_028040_ZRANGE_PRECISION(tex->depth_clear_value[level] != 0)); 33677ec681f3Smrg radeon_emit(db_stencil_info); /* DB_STENCIL_INFO */ 33687ec681f3Smrg radeon_emit(zb->db_depth_base); /* DB_Z_READ_BASE */ 33697ec681f3Smrg radeon_emit(zb->db_stencil_base); /* DB_STENCIL_READ_BASE */ 33707ec681f3Smrg radeon_emit(zb->db_depth_base); /* DB_Z_WRITE_BASE */ 33717ec681f3Smrg radeon_emit(zb->db_stencil_base); /* DB_STENCIL_WRITE_BASE */ 33727ec681f3Smrg radeon_emit(zb->db_depth_size); /* DB_DEPTH_SIZE */ 33737ec681f3Smrg radeon_emit(zb->db_depth_slice); /* DB_DEPTH_SLICE */ 33747ec681f3Smrg } 33757ec681f3Smrg 33767ec681f3Smrg radeon_set_context_reg_seq(R_028028_DB_STENCIL_CLEAR, 2); 33777ec681f3Smrg radeon_emit(tex->stencil_clear_value[level]); /* R_028028_DB_STENCIL_CLEAR */ 33787ec681f3Smrg radeon_emit(fui(tex->depth_clear_value[level])); /* R_02802C_DB_DEPTH_CLEAR */ 33797ec681f3Smrg 33807ec681f3Smrg radeon_set_context_reg(R_028008_DB_DEPTH_VIEW, zb->db_depth_view); 33817ec681f3Smrg radeon_set_context_reg(R_028ABC_DB_HTILE_SURFACE, db_htile_surface); 33827ec681f3Smrg } else if (sctx->framebuffer.dirty_zsbuf) { 33837ec681f3Smrg if (sctx->chip_class == GFX9) 33847ec681f3Smrg radeon_set_context_reg_seq(R_028038_DB_Z_INFO, 2); 33857ec681f3Smrg else 33867ec681f3Smrg radeon_set_context_reg_seq(R_028040_DB_Z_INFO, 2); 33877ec681f3Smrg 33887ec681f3Smrg radeon_emit(S_028040_FORMAT(V_028040_Z_INVALID)); /* DB_Z_INFO */ 33897ec681f3Smrg radeon_emit(S_028044_FORMAT(V_028044_STENCIL_INVALID)); /* DB_STENCIL_INFO */ 33907ec681f3Smrg } 33917ec681f3Smrg 33927ec681f3Smrg /* Framebuffer dimensions. */ 33937ec681f3Smrg /* PA_SC_WINDOW_SCISSOR_TL is set in si_init_cs_preamble_state */ 33947ec681f3Smrg radeon_set_context_reg(R_028208_PA_SC_WINDOW_SCISSOR_BR, 33957ec681f3Smrg S_028208_BR_X(state->width) | S_028208_BR_Y(state->height)); 33967ec681f3Smrg 33977ec681f3Smrg if (sctx->screen->dpbb_allowed) { 33987ec681f3Smrg radeon_emit(PKT3(PKT3_EVENT_WRITE, 0, 0)); 33997ec681f3Smrg radeon_emit(EVENT_TYPE(V_028A90_BREAK_BATCH) | EVENT_INDEX(0)); 34007ec681f3Smrg } 34017ec681f3Smrg radeon_end(); 34027ec681f3Smrg 34037ec681f3Smrg si_update_display_dcc_dirty(sctx); 34047ec681f3Smrg 34057ec681f3Smrg sctx->framebuffer.dirty_cbufs = 0; 34067ec681f3Smrg sctx->framebuffer.dirty_zsbuf = false; 3407af69d88dSmrg} 3408af69d88dSmrg 340901e04c3fSmrgstatic void si_emit_msaa_sample_locs(struct si_context *sctx) 3410af69d88dSmrg{ 34117ec681f3Smrg struct radeon_cmdbuf *cs = &sctx->gfx_cs; 34127ec681f3Smrg struct si_state_rasterizer *rs = sctx->queued.named.rasterizer; 34137ec681f3Smrg unsigned nr_samples = sctx->framebuffer.nr_samples; 34147ec681f3Smrg bool has_msaa_sample_loc_bug = sctx->screen->info.has_msaa_sample_loc_bug; 34157ec681f3Smrg 34167ec681f3Smrg /* Smoothing (only possible with nr_samples == 1) uses the same 34177ec681f3Smrg * sample locations as the MSAA it simulates. 34187ec681f3Smrg */ 34197ec681f3Smrg if (nr_samples <= 1 && sctx->smoothing_enabled) 34207ec681f3Smrg nr_samples = SI_NUM_SMOOTH_AA_SAMPLES; 34217ec681f3Smrg 34227ec681f3Smrg /* On Polaris, the small primitive filter uses the sample locations 34237ec681f3Smrg * even when MSAA is off, so we need to make sure they're set to 0. 34247ec681f3Smrg * 34257ec681f3Smrg * GFX10 uses sample locations unconditionally, so they always need 34267ec681f3Smrg * to be set up. 34277ec681f3Smrg */ 34287ec681f3Smrg if ((nr_samples >= 2 || has_msaa_sample_loc_bug || sctx->chip_class >= GFX10) && 34297ec681f3Smrg nr_samples != sctx->sample_locs_num_samples) { 34307ec681f3Smrg sctx->sample_locs_num_samples = nr_samples; 34317ec681f3Smrg si_emit_sample_locations(cs, nr_samples); 34327ec681f3Smrg } 34337ec681f3Smrg 34347ec681f3Smrg radeon_begin(cs); 34357ec681f3Smrg 34367ec681f3Smrg if (sctx->family >= CHIP_POLARIS10) { 34377ec681f3Smrg unsigned small_prim_filter_cntl = 34387ec681f3Smrg S_028830_SMALL_PRIM_FILTER_ENABLE(1) | 34397ec681f3Smrg /* line bug */ 34407ec681f3Smrg S_028830_LINE_FILTER_DISABLE(sctx->family <= CHIP_POLARIS12); 34417ec681f3Smrg 34427ec681f3Smrg /* For hardware with the sample location bug, the problem is that in order to use the small 34437ec681f3Smrg * primitive filter, we need to explicitly set the sample locations to 0. But the DB doesn't 34447ec681f3Smrg * properly process the change of sample locations without a flush, and so we can end up 34457ec681f3Smrg * with incorrect Z values. 34467ec681f3Smrg * 34477ec681f3Smrg * Instead of doing a flush, just disable the small primitive filter when MSAA is 34487ec681f3Smrg * force-disabled. 34497ec681f3Smrg * 34507ec681f3Smrg * The alternative of setting sample locations to 0 would require a DB flush to avoid 34517ec681f3Smrg * Z errors, see https://bugs.freedesktop.org/show_bug.cgi?id=96908 34527ec681f3Smrg */ 34537ec681f3Smrg if (has_msaa_sample_loc_bug && sctx->framebuffer.nr_samples > 1 && !rs->multisample_enable) 34547ec681f3Smrg small_prim_filter_cntl &= C_028830_SMALL_PRIM_FILTER_ENABLE; 34557ec681f3Smrg 34567ec681f3Smrg radeon_opt_set_context_reg(sctx, R_028830_PA_SU_SMALL_PRIM_FILTER_CNTL, 34577ec681f3Smrg SI_TRACKED_PA_SU_SMALL_PRIM_FILTER_CNTL, small_prim_filter_cntl); 34587ec681f3Smrg } 34597ec681f3Smrg 34607ec681f3Smrg /* The exclusion bits can be set to improve rasterization efficiency 34617ec681f3Smrg * if no sample lies on the pixel boundary (-8 sample offset). 34627ec681f3Smrg */ 34637ec681f3Smrg bool exclusion = sctx->chip_class >= GFX7 && (!rs->multisample_enable || nr_samples != 16); 34647ec681f3Smrg radeon_opt_set_context_reg( 34657ec681f3Smrg sctx, R_02882C_PA_SU_PRIM_FILTER_CNTL, SI_TRACKED_PA_SU_PRIM_FILTER_CNTL, 34667ec681f3Smrg S_02882C_XMAX_RIGHT_EXCLUSION(exclusion) | S_02882C_YMAX_BOTTOM_EXCLUSION(exclusion)); 34677ec681f3Smrg radeon_end(); 3468af69d88dSmrg} 3469af69d88dSmrg 347001e04c3fSmrgstatic bool si_out_of_order_rasterization(struct si_context *sctx) 3471af69d88dSmrg{ 34727ec681f3Smrg struct si_state_blend *blend = sctx->queued.named.blend; 34737ec681f3Smrg struct si_state_dsa *dsa = sctx->queued.named.dsa; 34747ec681f3Smrg 34757ec681f3Smrg if (!sctx->screen->has_out_of_order_rast) 34767ec681f3Smrg return false; 34777ec681f3Smrg 34787ec681f3Smrg unsigned colormask = sctx->framebuffer.colorbuf_enabled_4bit; 34797ec681f3Smrg 34807ec681f3Smrg colormask &= blend->cb_target_enabled_4bit; 34817ec681f3Smrg 34827ec681f3Smrg /* Conservative: No logic op. */ 34837ec681f3Smrg if (colormask && blend->logicop_enable) 34847ec681f3Smrg return false; 34857ec681f3Smrg 34867ec681f3Smrg struct si_dsa_order_invariance dsa_order_invariant = {.zs = true, 34877ec681f3Smrg .pass_set = true, 34887ec681f3Smrg .pass_last = false}; 34897ec681f3Smrg 34907ec681f3Smrg if (sctx->framebuffer.state.zsbuf) { 34917ec681f3Smrg struct si_texture *zstex = (struct si_texture *)sctx->framebuffer.state.zsbuf->texture; 34927ec681f3Smrg bool has_stencil = zstex->surface.has_stencil; 34937ec681f3Smrg dsa_order_invariant = dsa->order_invariance[has_stencil]; 34947ec681f3Smrg if (!dsa_order_invariant.zs) 34957ec681f3Smrg return false; 34967ec681f3Smrg 34977ec681f3Smrg /* The set of PS invocations is always order invariant, 34987ec681f3Smrg * except when early Z/S tests are requested. */ 34997ec681f3Smrg if (sctx->shader.ps.cso && sctx->shader.ps.cso->info.base.writes_memory && 35007ec681f3Smrg sctx->shader.ps.cso->info.base.fs.early_fragment_tests && 35017ec681f3Smrg !dsa_order_invariant.pass_set) 35027ec681f3Smrg return false; 35037ec681f3Smrg 35047ec681f3Smrg if (sctx->num_perfect_occlusion_queries != 0 && !dsa_order_invariant.pass_set) 35057ec681f3Smrg return false; 35067ec681f3Smrg } 35077ec681f3Smrg 35087ec681f3Smrg if (!colormask) 35097ec681f3Smrg return true; 35107ec681f3Smrg 35117ec681f3Smrg unsigned blendmask = colormask & blend->blend_enable_4bit; 35127ec681f3Smrg 35137ec681f3Smrg if (blendmask) { 35147ec681f3Smrg /* Only commutative blending. */ 35157ec681f3Smrg if (blendmask & ~blend->commutative_4bit) 35167ec681f3Smrg return false; 35177ec681f3Smrg 35187ec681f3Smrg if (!dsa_order_invariant.pass_set) 35197ec681f3Smrg return false; 35207ec681f3Smrg } 35217ec681f3Smrg 35227ec681f3Smrg if (colormask & ~blendmask) { 35237ec681f3Smrg if (!dsa_order_invariant.pass_last) 35247ec681f3Smrg return false; 35257ec681f3Smrg } 35267ec681f3Smrg 35277ec681f3Smrg return true; 3528af69d88dSmrg} 3529af69d88dSmrg 353001e04c3fSmrgstatic void si_emit_msaa_config(struct si_context *sctx) 3531af69d88dSmrg{ 35327ec681f3Smrg struct radeon_cmdbuf *cs = &sctx->gfx_cs; 35337ec681f3Smrg unsigned num_tile_pipes = sctx->screen->info.num_tile_pipes; 35347ec681f3Smrg /* 33% faster rendering to linear color buffers */ 35357ec681f3Smrg bool dst_is_linear = sctx->framebuffer.any_dst_linear; 35367ec681f3Smrg bool out_of_order_rast = si_out_of_order_rasterization(sctx); 35377ec681f3Smrg unsigned sc_mode_cntl_1 = 35387ec681f3Smrg S_028A4C_WALK_SIZE(dst_is_linear) | S_028A4C_WALK_FENCE_ENABLE(!dst_is_linear) | 35397ec681f3Smrg S_028A4C_WALK_FENCE_SIZE(num_tile_pipes == 2 ? 2 : 3) | 35407ec681f3Smrg S_028A4C_OUT_OF_ORDER_PRIMITIVE_ENABLE(out_of_order_rast) | 35417ec681f3Smrg S_028A4C_OUT_OF_ORDER_WATER_MARK(0x7) | 35427ec681f3Smrg /* always 1: */ 35437ec681f3Smrg S_028A4C_WALK_ALIGN8_PRIM_FITS_ST(1) | S_028A4C_SUPERTILE_WALK_ORDER_ENABLE(1) | 35447ec681f3Smrg S_028A4C_TILE_WALK_ORDER_ENABLE(1) | S_028A4C_MULTI_SHADER_ENGINE_PRIM_DISCARD_ENABLE(1) | 35457ec681f3Smrg S_028A4C_FORCE_EOV_CNTDWN_ENABLE(1) | S_028A4C_FORCE_EOV_REZ_ENABLE(1); 35467ec681f3Smrg unsigned db_eqaa = S_028804_HIGH_QUALITY_INTERSECTIONS(1) | S_028804_INCOHERENT_EQAA_READS(1) | 35477ec681f3Smrg S_028804_INTERPOLATE_COMP_Z(1) | S_028804_STATIC_ANCHOR_ASSOCIATIONS(1); 35487ec681f3Smrg unsigned coverage_samples, color_samples, z_samples; 35497ec681f3Smrg struct si_state_rasterizer *rs = sctx->queued.named.rasterizer; 35507ec681f3Smrg 35517ec681f3Smrg /* S: Coverage samples (up to 16x): 35527ec681f3Smrg * - Scan conversion samples (PA_SC_AA_CONFIG.MSAA_NUM_SAMPLES) 35537ec681f3Smrg * - CB FMASK samples (CB_COLORi_ATTRIB.NUM_SAMPLES) 35547ec681f3Smrg * 35557ec681f3Smrg * Z: Z/S samples (up to 8x, must be <= coverage samples and >= color samples): 35567ec681f3Smrg * - Value seen by DB (DB_Z_INFO.NUM_SAMPLES) 35577ec681f3Smrg * - Value seen by CB, must be correct even if Z/S is unbound (DB_EQAA.MAX_ANCHOR_SAMPLES) 35587ec681f3Smrg * # Missing samples are derived from Z planes if Z is compressed (up to 16x quality), or 35597ec681f3Smrg * # from the closest defined sample if Z is uncompressed (same quality as the number of 35607ec681f3Smrg * # Z samples). 35617ec681f3Smrg * 35627ec681f3Smrg * F: Color samples (up to 8x, must be <= coverage samples): 35637ec681f3Smrg * - CB color samples (CB_COLORi_ATTRIB.NUM_FRAGMENTS) 35647ec681f3Smrg * - PS iter samples (DB_EQAA.PS_ITER_SAMPLES) 35657ec681f3Smrg * 35667ec681f3Smrg * Can be anything between coverage and color samples: 35677ec681f3Smrg * - SampleMaskIn samples (PA_SC_AA_CONFIG.MSAA_EXPOSED_SAMPLES) 35687ec681f3Smrg * - SampleMaskOut samples (DB_EQAA.MASK_EXPORT_NUM_SAMPLES) 35697ec681f3Smrg * - Alpha-to-coverage samples (DB_EQAA.ALPHA_TO_MASK_NUM_SAMPLES) 35707ec681f3Smrg * - Occlusion query samples (DB_COUNT_CONTROL.SAMPLE_RATE) 35717ec681f3Smrg * # All are currently set the same as coverage samples. 35727ec681f3Smrg * 35737ec681f3Smrg * If color samples < coverage samples, FMASK has a higher bpp to store an "unknown" 35747ec681f3Smrg * flag for undefined color samples. A shader-based resolve must handle unknowns 35757ec681f3Smrg * or mask them out with AND. Unknowns can also be guessed from neighbors via 35767ec681f3Smrg * an edge-detect shader-based resolve, which is required to make "color samples = 1" 35777ec681f3Smrg * useful. The CB resolve always drops unknowns. 35787ec681f3Smrg * 35797ec681f3Smrg * Sensible AA configurations: 35807ec681f3Smrg * EQAA 16s 8z 8f - might look the same as 16x MSAA if Z is compressed 35817ec681f3Smrg * EQAA 16s 8z 4f - might look the same as 16x MSAA if Z is compressed 35827ec681f3Smrg * EQAA 16s 4z 4f - might look the same as 16x MSAA if Z is compressed 35837ec681f3Smrg * EQAA 8s 8z 8f = 8x MSAA 35847ec681f3Smrg * EQAA 8s 8z 4f - might look the same as 8x MSAA 35857ec681f3Smrg * EQAA 8s 8z 2f - might look the same as 8x MSAA with low-density geometry 35867ec681f3Smrg * EQAA 8s 4z 4f - might look the same as 8x MSAA if Z is compressed 35877ec681f3Smrg * EQAA 8s 4z 2f - might look the same as 8x MSAA with low-density geometry if Z is compressed 35887ec681f3Smrg * EQAA 4s 4z 4f = 4x MSAA 35897ec681f3Smrg * EQAA 4s 4z 2f - might look the same as 4x MSAA with low-density geometry 35907ec681f3Smrg * EQAA 2s 2z 2f = 2x MSAA 35917ec681f3Smrg */ 35927ec681f3Smrg coverage_samples = color_samples = z_samples = si_get_num_coverage_samples(sctx); 35937ec681f3Smrg 35947ec681f3Smrg if (sctx->framebuffer.nr_samples > 1 && rs->multisample_enable) { 35957ec681f3Smrg color_samples = sctx->framebuffer.nr_color_samples; 35967ec681f3Smrg 35977ec681f3Smrg if (sctx->framebuffer.state.zsbuf) { 35987ec681f3Smrg z_samples = sctx->framebuffer.state.zsbuf->texture->nr_samples; 35997ec681f3Smrg z_samples = MAX2(1, z_samples); 36007ec681f3Smrg } else { 36017ec681f3Smrg z_samples = coverage_samples; 36027ec681f3Smrg } 36037ec681f3Smrg } 36047ec681f3Smrg 36057ec681f3Smrg /* The DX10 diamond test is optional in GL and decreases line rasterization 36067ec681f3Smrg * performance, so don't use it. 36077ec681f3Smrg * 36087ec681f3Smrg * TODO: We should also enable perpendicular endcaps for AA lines, 36097ec681f3Smrg * but that requires implementing line stippling in the pixel 36107ec681f3Smrg * shader. SC can only do line stippling with axis-aligned 36117ec681f3Smrg * endcaps. 36127ec681f3Smrg */ 36137ec681f3Smrg unsigned sc_line_cntl = 0; 36147ec681f3Smrg unsigned sc_aa_config = 0; 36157ec681f3Smrg 36167ec681f3Smrg if (coverage_samples > 1) { 36177ec681f3Smrg /* distance from the pixel center, indexed by log2(nr_samples) */ 36187ec681f3Smrg static unsigned max_dist[] = { 36197ec681f3Smrg 0, /* unused */ 36207ec681f3Smrg 4, /* 2x MSAA */ 36217ec681f3Smrg 6, /* 4x MSAA */ 36227ec681f3Smrg 7, /* 8x MSAA */ 36237ec681f3Smrg 8, /* 16x MSAA */ 36247ec681f3Smrg }; 36257ec681f3Smrg unsigned log_samples = util_logbase2(coverage_samples); 36267ec681f3Smrg unsigned log_z_samples = util_logbase2(z_samples); 36277ec681f3Smrg unsigned ps_iter_samples = si_get_ps_iter_samples(sctx); 36287ec681f3Smrg unsigned log_ps_iter_samples = util_logbase2(ps_iter_samples); 36297ec681f3Smrg 36307ec681f3Smrg sc_line_cntl |= S_028BDC_EXPAND_LINE_WIDTH(1); 36317ec681f3Smrg sc_aa_config = S_028BE0_MSAA_NUM_SAMPLES(log_samples) | 36327ec681f3Smrg S_028BE0_MAX_SAMPLE_DIST(max_dist[log_samples]) | 36337ec681f3Smrg S_028BE0_MSAA_EXPOSED_SAMPLES(log_samples) | 36347ec681f3Smrg S_028BE0_COVERED_CENTROID_IS_CENTER(sctx->chip_class >= GFX10_3); 36357ec681f3Smrg 36367ec681f3Smrg if (sctx->framebuffer.nr_samples > 1) { 36377ec681f3Smrg db_eqaa |= S_028804_MAX_ANCHOR_SAMPLES(log_z_samples) | 36387ec681f3Smrg S_028804_PS_ITER_SAMPLES(log_ps_iter_samples) | 36397ec681f3Smrg S_028804_MASK_EXPORT_NUM_SAMPLES(log_samples) | 36407ec681f3Smrg S_028804_ALPHA_TO_MASK_NUM_SAMPLES(log_samples); 36417ec681f3Smrg sc_mode_cntl_1 |= S_028A4C_PS_ITER_SAMPLE(ps_iter_samples > 1); 36427ec681f3Smrg } else if (sctx->smoothing_enabled) { 36437ec681f3Smrg db_eqaa |= S_028804_OVERRASTERIZATION_AMOUNT(log_samples); 36447ec681f3Smrg } 36457ec681f3Smrg } 36467ec681f3Smrg 36477ec681f3Smrg radeon_begin(cs); 36487ec681f3Smrg 36497ec681f3Smrg /* R_028BDC_PA_SC_LINE_CNTL, R_028BE0_PA_SC_AA_CONFIG */ 36507ec681f3Smrg radeon_opt_set_context_reg2(sctx, R_028BDC_PA_SC_LINE_CNTL, SI_TRACKED_PA_SC_LINE_CNTL, 36517ec681f3Smrg sc_line_cntl, sc_aa_config); 36527ec681f3Smrg /* R_028804_DB_EQAA */ 36537ec681f3Smrg radeon_opt_set_context_reg(sctx, R_028804_DB_EQAA, SI_TRACKED_DB_EQAA, db_eqaa); 36547ec681f3Smrg /* R_028A4C_PA_SC_MODE_CNTL_1 */ 36557ec681f3Smrg radeon_opt_set_context_reg(sctx, R_028A4C_PA_SC_MODE_CNTL_1, SI_TRACKED_PA_SC_MODE_CNTL_1, 36567ec681f3Smrg sc_mode_cntl_1); 36577ec681f3Smrg radeon_end_update_context_roll(sctx); 3658af69d88dSmrg} 3659af69d88dSmrg 366001e04c3fSmrgvoid si_update_ps_iter_samples(struct si_context *sctx) 3661af69d88dSmrg{ 36627ec681f3Smrg if (sctx->framebuffer.nr_samples > 1) 36637ec681f3Smrg si_mark_atom_dirty(sctx, &sctx->atoms.s.msaa_config); 36647ec681f3Smrg if (sctx->screen->dpbb_allowed) 36657ec681f3Smrg si_mark_atom_dirty(sctx, &sctx->atoms.s.dpbb_state); 3666af69d88dSmrg} 3667af69d88dSmrg 366801e04c3fSmrgstatic void si_set_min_samples(struct pipe_context *ctx, unsigned min_samples) 3669af69d88dSmrg{ 36707ec681f3Smrg struct si_context *sctx = (struct si_context *)ctx; 36717ec681f3Smrg 36727ec681f3Smrg /* The hardware can only do sample shading with 2^n samples. */ 36737ec681f3Smrg min_samples = util_next_power_of_two(min_samples); 3674af69d88dSmrg 36757ec681f3Smrg if (sctx->ps_iter_samples == min_samples) 36767ec681f3Smrg return; 3677af69d88dSmrg 36787ec681f3Smrg sctx->ps_iter_samples = min_samples; 3679af69d88dSmrg 36807ec681f3Smrg si_ps_key_update_sample_shading(sctx); 36817ec681f3Smrg si_ps_key_update_framebuffer_rasterizer_sample_shading(sctx); 36827ec681f3Smrg sctx->do_update_shaders = true; 3683af69d88dSmrg 36847ec681f3Smrg si_update_ps_iter_samples(sctx); 3685af69d88dSmrg} 3686af69d88dSmrg 368701e04c3fSmrg/* 368801e04c3fSmrg * Samplers 368901e04c3fSmrg */ 3690af69d88dSmrg 369101e04c3fSmrg/** 369201e04c3fSmrg * Build the sampler view descriptor for a buffer texture. 369301e04c3fSmrg * @param state 256-bit descriptor; only the high 128 bits are filled in 369401e04c3fSmrg */ 36957ec681f3Smrgvoid si_make_buffer_descriptor(struct si_screen *screen, struct si_resource *buf, 36967ec681f3Smrg enum pipe_format format, unsigned offset, unsigned size, 36977ec681f3Smrg uint32_t *state) 3698af69d88dSmrg{ 36997ec681f3Smrg const struct util_format_description *desc; 37007ec681f3Smrg unsigned stride; 37017ec681f3Smrg unsigned num_records; 37027ec681f3Smrg 37037ec681f3Smrg desc = util_format_description(format); 37047ec681f3Smrg stride = desc->block.bits / 8; 37057ec681f3Smrg 37067ec681f3Smrg num_records = size / stride; 37077ec681f3Smrg num_records = MIN2(num_records, (buf->b.b.width0 - offset) / stride); 37087ec681f3Smrg 37097ec681f3Smrg /* The NUM_RECORDS field has a different meaning depending on the chip, 37107ec681f3Smrg * instruction type, STRIDE, and SWIZZLE_ENABLE. 37117ec681f3Smrg * 37127ec681f3Smrg * GFX6-7,10: 37137ec681f3Smrg * - If STRIDE == 0, it's in byte units. 37147ec681f3Smrg * - If STRIDE != 0, it's in units of STRIDE, used with inst.IDXEN. 37157ec681f3Smrg * 37167ec681f3Smrg * GFX8: 37177ec681f3Smrg * - For SMEM and STRIDE == 0, it's in byte units. 37187ec681f3Smrg * - For SMEM and STRIDE != 0, it's in units of STRIDE. 37197ec681f3Smrg * - For VMEM and STRIDE == 0 or SWIZZLE_ENABLE == 0, it's in byte units. 37207ec681f3Smrg * - For VMEM and STRIDE != 0 and SWIZZLE_ENABLE == 1, it's in units of STRIDE. 37217ec681f3Smrg * NOTE: There is incompatibility between VMEM and SMEM opcodes due to SWIZZLE_- 37227ec681f3Smrg * ENABLE. The workaround is to set STRIDE = 0 if SWIZZLE_ENABLE == 0 when 37237ec681f3Smrg * using SMEM. This can be done in the shader by clearing STRIDE with s_and. 37247ec681f3Smrg * That way the same descriptor can be used by both SMEM and VMEM. 37257ec681f3Smrg * 37267ec681f3Smrg * GFX9: 37277ec681f3Smrg * - For SMEM and STRIDE == 0, it's in byte units. 37287ec681f3Smrg * - For SMEM and STRIDE != 0, it's in units of STRIDE. 37297ec681f3Smrg * - For VMEM and inst.IDXEN == 0 or STRIDE == 0, it's in byte units. 37307ec681f3Smrg * - For VMEM and inst.IDXEN == 1 and STRIDE != 0, it's in units of STRIDE. 37317ec681f3Smrg */ 37327ec681f3Smrg if (screen->info.chip_class == GFX8) 37337ec681f3Smrg num_records *= stride; 37347ec681f3Smrg 37357ec681f3Smrg state[4] = 0; 37367ec681f3Smrg state[5] = S_008F04_STRIDE(stride); 37377ec681f3Smrg state[6] = num_records; 37387ec681f3Smrg state[7] = S_008F0C_DST_SEL_X(si_map_swizzle(desc->swizzle[0])) | 37397ec681f3Smrg S_008F0C_DST_SEL_Y(si_map_swizzle(desc->swizzle[1])) | 37407ec681f3Smrg S_008F0C_DST_SEL_Z(si_map_swizzle(desc->swizzle[2])) | 37417ec681f3Smrg S_008F0C_DST_SEL_W(si_map_swizzle(desc->swizzle[3])); 37427ec681f3Smrg 37437ec681f3Smrg if (screen->info.chip_class >= GFX10) { 37447ec681f3Smrg const struct gfx10_format *fmt = &gfx10_format_table[format]; 37457ec681f3Smrg 37467ec681f3Smrg /* OOB_SELECT chooses the out-of-bounds check: 37477ec681f3Smrg * - 0: (index >= NUM_RECORDS) || (offset >= STRIDE) 37487ec681f3Smrg * - 1: index >= NUM_RECORDS 37497ec681f3Smrg * - 2: NUM_RECORDS == 0 37507ec681f3Smrg * - 3: if SWIZZLE_ENABLE == 0: offset >= NUM_RECORDS 37517ec681f3Smrg * else: swizzle_address >= NUM_RECORDS 37527ec681f3Smrg */ 37537ec681f3Smrg state[7] |= S_008F0C_FORMAT(fmt->img_format) | 37547ec681f3Smrg S_008F0C_OOB_SELECT(V_008F0C_OOB_SELECT_STRUCTURED_WITH_OFFSET) | 37557ec681f3Smrg S_008F0C_RESOURCE_LEVEL(1); 37567ec681f3Smrg } else { 37577ec681f3Smrg int first_non_void; 37587ec681f3Smrg unsigned num_format, data_format; 37597ec681f3Smrg 37607ec681f3Smrg first_non_void = util_format_get_first_non_void_channel(format); 37617ec681f3Smrg num_format = si_translate_buffer_numformat(&screen->b, desc, first_non_void); 37627ec681f3Smrg data_format = si_translate_buffer_dataformat(&screen->b, desc, first_non_void); 37637ec681f3Smrg 37647ec681f3Smrg state[7] |= S_008F0C_NUM_FORMAT(num_format) | S_008F0C_DATA_FORMAT(data_format); 37657ec681f3Smrg } 3766af69d88dSmrg} 3767af69d88dSmrg 376801e04c3fSmrgstatic unsigned gfx9_border_color_swizzle(const unsigned char swizzle[4]) 3769af69d88dSmrg{ 37707ec681f3Smrg unsigned bc_swizzle = V_008F20_BC_SWIZZLE_XYZW; 37717ec681f3Smrg 37727ec681f3Smrg if (swizzle[3] == PIPE_SWIZZLE_X) { 37737ec681f3Smrg /* For the pre-defined border color values (white, opaque 37747ec681f3Smrg * black, transparent black), the only thing that matters is 37757ec681f3Smrg * that the alpha channel winds up in the correct place 37767ec681f3Smrg * (because the RGB channels are all the same) so either of 37777ec681f3Smrg * these enumerations will work. 37787ec681f3Smrg */ 37797ec681f3Smrg if (swizzle[2] == PIPE_SWIZZLE_Y) 37807ec681f3Smrg bc_swizzle = V_008F20_BC_SWIZZLE_WZYX; 37817ec681f3Smrg else 37827ec681f3Smrg bc_swizzle = V_008F20_BC_SWIZZLE_WXYZ; 37837ec681f3Smrg } else if (swizzle[0] == PIPE_SWIZZLE_X) { 37847ec681f3Smrg if (swizzle[1] == PIPE_SWIZZLE_Y) 37857ec681f3Smrg bc_swizzle = V_008F20_BC_SWIZZLE_XYZW; 37867ec681f3Smrg else 37877ec681f3Smrg bc_swizzle = V_008F20_BC_SWIZZLE_XWYZ; 37887ec681f3Smrg } else if (swizzle[1] == PIPE_SWIZZLE_X) { 37897ec681f3Smrg bc_swizzle = V_008F20_BC_SWIZZLE_YXWZ; 37907ec681f3Smrg } else if (swizzle[2] == PIPE_SWIZZLE_X) { 37917ec681f3Smrg bc_swizzle = V_008F20_BC_SWIZZLE_ZYXW; 37927ec681f3Smrg } 37937ec681f3Smrg 37947ec681f3Smrg return bc_swizzle; 3795af69d88dSmrg} 3796af69d88dSmrg 379701e04c3fSmrg/** 379801e04c3fSmrg * Build the sampler view descriptor for a texture. 3799af69d88dSmrg */ 38007ec681f3Smrgstatic void gfx10_make_texture_descriptor( 38017ec681f3Smrg struct si_screen *screen, struct si_texture *tex, bool sampler, enum pipe_texture_target target, 38027ec681f3Smrg enum pipe_format pipe_format, const unsigned char state_swizzle[4], unsigned first_level, 38037ec681f3Smrg unsigned last_level, unsigned first_layer, unsigned last_layer, unsigned width, unsigned height, 38047ec681f3Smrg unsigned depth, uint32_t *state, uint32_t *fmask_state) 38057ec681f3Smrg{ 38067ec681f3Smrg struct pipe_resource *res = &tex->buffer.b.b; 38077ec681f3Smrg const struct util_format_description *desc; 38087ec681f3Smrg unsigned img_format; 38097ec681f3Smrg unsigned char swizzle[4]; 38107ec681f3Smrg unsigned type; 38117ec681f3Smrg uint64_t va; 38127ec681f3Smrg 38137ec681f3Smrg desc = util_format_description(pipe_format); 38147ec681f3Smrg img_format = gfx10_format_table[pipe_format].img_format; 38157ec681f3Smrg 38167ec681f3Smrg if (desc->colorspace == UTIL_FORMAT_COLORSPACE_ZS) { 38177ec681f3Smrg const unsigned char swizzle_xxxx[4] = {0, 0, 0, 0}; 38187ec681f3Smrg const unsigned char swizzle_yyyy[4] = {1, 1, 1, 1}; 38197ec681f3Smrg const unsigned char swizzle_wwww[4] = {3, 3, 3, 3}; 38207ec681f3Smrg bool is_stencil = false; 38217ec681f3Smrg 38227ec681f3Smrg switch (pipe_format) { 38237ec681f3Smrg case PIPE_FORMAT_S8_UINT_Z24_UNORM: 38247ec681f3Smrg case PIPE_FORMAT_X32_S8X24_UINT: 38257ec681f3Smrg case PIPE_FORMAT_X8Z24_UNORM: 38267ec681f3Smrg util_format_compose_swizzles(swizzle_yyyy, state_swizzle, swizzle); 38277ec681f3Smrg is_stencil = true; 38287ec681f3Smrg break; 38297ec681f3Smrg case PIPE_FORMAT_X24S8_UINT: 38307ec681f3Smrg /* 38317ec681f3Smrg * X24S8 is implemented as an 8_8_8_8 data format, to 38327ec681f3Smrg * fix texture gathers. This affects at least 38337ec681f3Smrg * GL45-CTS.texture_cube_map_array.sampling on GFX8. 38347ec681f3Smrg */ 38357ec681f3Smrg util_format_compose_swizzles(swizzle_wwww, state_swizzle, swizzle); 38367ec681f3Smrg is_stencil = true; 38377ec681f3Smrg break; 38387ec681f3Smrg default: 38397ec681f3Smrg util_format_compose_swizzles(swizzle_xxxx, state_swizzle, swizzle); 38407ec681f3Smrg is_stencil = pipe_format == PIPE_FORMAT_S8_UINT; 38417ec681f3Smrg } 38427ec681f3Smrg 38437ec681f3Smrg if (tex->upgraded_depth && !is_stencil) { 38447ec681f3Smrg assert(img_format == V_008F0C_GFX10_FORMAT_32_FLOAT); 38457ec681f3Smrg img_format = V_008F0C_GFX10_FORMAT_32_FLOAT_CLAMP; 38467ec681f3Smrg } 38477ec681f3Smrg } else { 38487ec681f3Smrg util_format_compose_swizzles(desc->swizzle, state_swizzle, swizzle); 38497ec681f3Smrg } 38507ec681f3Smrg 38517ec681f3Smrg if (!sampler && (res->target == PIPE_TEXTURE_CUBE || res->target == PIPE_TEXTURE_CUBE_ARRAY)) { 38527ec681f3Smrg /* For the purpose of shader images, treat cube maps as 2D 38537ec681f3Smrg * arrays. 38547ec681f3Smrg */ 38557ec681f3Smrg type = V_008F1C_SQ_RSRC_IMG_2D_ARRAY; 38567ec681f3Smrg } else { 38577ec681f3Smrg type = si_tex_dim(screen, tex, target, res->nr_samples); 38587ec681f3Smrg } 38597ec681f3Smrg 38607ec681f3Smrg if (type == V_008F1C_SQ_RSRC_IMG_1D_ARRAY) { 38617ec681f3Smrg height = 1; 38627ec681f3Smrg depth = res->array_size; 38637ec681f3Smrg } else if (type == V_008F1C_SQ_RSRC_IMG_2D_ARRAY || type == V_008F1C_SQ_RSRC_IMG_2D_MSAA_ARRAY) { 38647ec681f3Smrg if (sampler || res->target != PIPE_TEXTURE_3D) 38657ec681f3Smrg depth = res->array_size; 38667ec681f3Smrg } else if (type == V_008F1C_SQ_RSRC_IMG_CUBE) 38677ec681f3Smrg depth = res->array_size / 6; 38687ec681f3Smrg 38697ec681f3Smrg state[0] = 0; 38707ec681f3Smrg state[1] = S_00A004_FORMAT(img_format) | S_00A004_WIDTH_LO(width - 1); 38717ec681f3Smrg state[2] = S_00A008_WIDTH_HI((width - 1) >> 2) | S_00A008_HEIGHT(height - 1) | 38727ec681f3Smrg S_00A008_RESOURCE_LEVEL(1); 38737ec681f3Smrg state[3] = 38747ec681f3Smrg S_00A00C_DST_SEL_X(si_map_swizzle(swizzle[0])) | 38757ec681f3Smrg S_00A00C_DST_SEL_Y(si_map_swizzle(swizzle[1])) | 38767ec681f3Smrg S_00A00C_DST_SEL_Z(si_map_swizzle(swizzle[2])) | 38777ec681f3Smrg S_00A00C_DST_SEL_W(si_map_swizzle(swizzle[3])) | 38787ec681f3Smrg S_00A00C_BASE_LEVEL(res->nr_samples > 1 ? 0 : first_level) | 38797ec681f3Smrg S_00A00C_LAST_LEVEL(res->nr_samples > 1 ? util_logbase2(res->nr_samples) : last_level) | 38807ec681f3Smrg S_00A00C_BC_SWIZZLE(gfx9_border_color_swizzle(desc->swizzle)) | S_00A00C_TYPE(type); 38817ec681f3Smrg /* Depth is the the last accessible layer on gfx9+. The hw doesn't need 38827ec681f3Smrg * to know the total number of layers. 38837ec681f3Smrg */ 38847ec681f3Smrg state[4] = 38857ec681f3Smrg S_00A010_DEPTH((type == V_008F1C_SQ_RSRC_IMG_3D && sampler) ? depth - 1 : last_layer) | 38867ec681f3Smrg S_00A010_BASE_ARRAY(first_layer); 38877ec681f3Smrg state[5] = S_00A014_ARRAY_PITCH(!!(type == V_008F1C_SQ_RSRC_IMG_3D && !sampler)) | 38887ec681f3Smrg S_00A014_MAX_MIP(res->nr_samples > 1 ? util_logbase2(res->nr_samples) 38897ec681f3Smrg : tex->buffer.b.b.last_level) | 38907ec681f3Smrg S_00A014_PERF_MOD(4); 38917ec681f3Smrg state[6] = 0; 38927ec681f3Smrg state[7] = 0; 38937ec681f3Smrg 38947ec681f3Smrg if (vi_dcc_enabled(tex, first_level)) { 38957ec681f3Smrg state[6] |= S_00A018_MAX_UNCOMPRESSED_BLOCK_SIZE(V_028C78_MAX_BLOCK_SIZE_256B) | 38967ec681f3Smrg S_00A018_MAX_COMPRESSED_BLOCK_SIZE(tex->surface.u.gfx9.color.dcc.max_compressed_block_size) | 38977ec681f3Smrg S_00A018_ALPHA_IS_ON_MSB(vi_alpha_is_on_msb(screen, pipe_format)); 38987ec681f3Smrg } 38997ec681f3Smrg 39007ec681f3Smrg /* Initialize the sampler view for FMASK. */ 39017ec681f3Smrg if (tex->surface.fmask_offset) { 39027ec681f3Smrg uint32_t format; 39037ec681f3Smrg 39047ec681f3Smrg va = tex->buffer.gpu_address + tex->surface.fmask_offset; 39057ec681f3Smrg 39067ec681f3Smrg#define FMASK(s, f) (((unsigned)(MAX2(1, s)) * 16) + (MAX2(1, f))) 39077ec681f3Smrg switch (FMASK(res->nr_samples, res->nr_storage_samples)) { 39087ec681f3Smrg case FMASK(2, 1): 39097ec681f3Smrg format = V_008F0C_GFX10_FORMAT_FMASK8_S2_F1; 39107ec681f3Smrg break; 39117ec681f3Smrg case FMASK(2, 2): 39127ec681f3Smrg format = V_008F0C_GFX10_FORMAT_FMASK8_S2_F2; 39137ec681f3Smrg break; 39147ec681f3Smrg case FMASK(4, 1): 39157ec681f3Smrg format = V_008F0C_GFX10_FORMAT_FMASK8_S4_F1; 39167ec681f3Smrg break; 39177ec681f3Smrg case FMASK(4, 2): 39187ec681f3Smrg format = V_008F0C_GFX10_FORMAT_FMASK8_S4_F2; 39197ec681f3Smrg break; 39207ec681f3Smrg case FMASK(4, 4): 39217ec681f3Smrg format = V_008F0C_GFX10_FORMAT_FMASK8_S4_F4; 39227ec681f3Smrg break; 39237ec681f3Smrg case FMASK(8, 1): 39247ec681f3Smrg format = V_008F0C_GFX10_FORMAT_FMASK8_S8_F1; 39257ec681f3Smrg break; 39267ec681f3Smrg case FMASK(8, 2): 39277ec681f3Smrg format = V_008F0C_GFX10_FORMAT_FMASK16_S8_F2; 39287ec681f3Smrg break; 39297ec681f3Smrg case FMASK(8, 4): 39307ec681f3Smrg format = V_008F0C_GFX10_FORMAT_FMASK32_S8_F4; 39317ec681f3Smrg break; 39327ec681f3Smrg case FMASK(8, 8): 39337ec681f3Smrg format = V_008F0C_GFX10_FORMAT_FMASK32_S8_F8; 39347ec681f3Smrg break; 39357ec681f3Smrg case FMASK(16, 1): 39367ec681f3Smrg format = V_008F0C_GFX10_FORMAT_FMASK16_S16_F1; 39377ec681f3Smrg break; 39387ec681f3Smrg case FMASK(16, 2): 39397ec681f3Smrg format = V_008F0C_GFX10_FORMAT_FMASK32_S16_F2; 39407ec681f3Smrg break; 39417ec681f3Smrg case FMASK(16, 4): 39427ec681f3Smrg format = V_008F0C_GFX10_FORMAT_FMASK64_S16_F4; 39437ec681f3Smrg break; 39447ec681f3Smrg case FMASK(16, 8): 39457ec681f3Smrg format = V_008F0C_GFX10_FORMAT_FMASK64_S16_F8; 39467ec681f3Smrg break; 39477ec681f3Smrg default: 39487ec681f3Smrg unreachable("invalid nr_samples"); 39497ec681f3Smrg } 39507ec681f3Smrg#undef FMASK 39517ec681f3Smrg fmask_state[0] = (va >> 8) | tex->surface.fmask_tile_swizzle; 39527ec681f3Smrg fmask_state[1] = S_00A004_BASE_ADDRESS_HI(va >> 40) | S_00A004_FORMAT(format) | 39537ec681f3Smrg S_00A004_WIDTH_LO(width - 1); 39547ec681f3Smrg fmask_state[2] = S_00A008_WIDTH_HI((width - 1) >> 2) | S_00A008_HEIGHT(height - 1) | 39557ec681f3Smrg S_00A008_RESOURCE_LEVEL(1); 39567ec681f3Smrg fmask_state[3] = 39577ec681f3Smrg S_00A00C_DST_SEL_X(V_008F1C_SQ_SEL_X) | S_00A00C_DST_SEL_Y(V_008F1C_SQ_SEL_X) | 39587ec681f3Smrg S_00A00C_DST_SEL_Z(V_008F1C_SQ_SEL_X) | S_00A00C_DST_SEL_W(V_008F1C_SQ_SEL_X) | 39597ec681f3Smrg S_00A00C_SW_MODE(tex->surface.u.gfx9.color.fmask_swizzle_mode) | 39607ec681f3Smrg S_00A00C_TYPE(si_tex_dim(screen, tex, target, 0)); 39617ec681f3Smrg fmask_state[4] = S_00A010_DEPTH(last_layer) | S_00A010_BASE_ARRAY(first_layer); 39627ec681f3Smrg fmask_state[5] = 0; 39637ec681f3Smrg fmask_state[6] = S_00A018_META_PIPE_ALIGNED(1); 39647ec681f3Smrg fmask_state[7] = 0; 39657ec681f3Smrg } 39667ec681f3Smrg} 39677ec681f3Smrg 39687ec681f3Smrg/** 39697ec681f3Smrg * Build the sampler view descriptor for a texture (SI-GFX9). 39707ec681f3Smrg */ 39717ec681f3Smrgstatic void si_make_texture_descriptor(struct si_screen *screen, struct si_texture *tex, 39727ec681f3Smrg bool sampler, enum pipe_texture_target target, 39737ec681f3Smrg enum pipe_format pipe_format, 39747ec681f3Smrg const unsigned char state_swizzle[4], unsigned first_level, 39757ec681f3Smrg unsigned last_level, unsigned first_layer, 39767ec681f3Smrg unsigned last_layer, unsigned width, unsigned height, 39777ec681f3Smrg unsigned depth, uint32_t *state, uint32_t *fmask_state) 3978af69d88dSmrg{ 39797ec681f3Smrg struct pipe_resource *res = &tex->buffer.b.b; 39807ec681f3Smrg const struct util_format_description *desc; 39817ec681f3Smrg unsigned char swizzle[4]; 39827ec681f3Smrg int first_non_void; 39837ec681f3Smrg unsigned num_format, data_format, type, num_samples; 39847ec681f3Smrg uint64_t va; 39857ec681f3Smrg 39867ec681f3Smrg desc = util_format_description(pipe_format); 39877ec681f3Smrg 39887ec681f3Smrg num_samples = desc->colorspace == UTIL_FORMAT_COLORSPACE_ZS ? MAX2(1, res->nr_samples) 39897ec681f3Smrg : MAX2(1, res->nr_storage_samples); 39907ec681f3Smrg 39917ec681f3Smrg if (desc->colorspace == UTIL_FORMAT_COLORSPACE_ZS) { 39927ec681f3Smrg const unsigned char swizzle_xxxx[4] = {0, 0, 0, 0}; 39937ec681f3Smrg const unsigned char swizzle_yyyy[4] = {1, 1, 1, 1}; 39947ec681f3Smrg const unsigned char swizzle_wwww[4] = {3, 3, 3, 3}; 39957ec681f3Smrg 39967ec681f3Smrg switch (pipe_format) { 39977ec681f3Smrg case PIPE_FORMAT_S8_UINT_Z24_UNORM: 39987ec681f3Smrg case PIPE_FORMAT_X32_S8X24_UINT: 39997ec681f3Smrg case PIPE_FORMAT_X8Z24_UNORM: 40007ec681f3Smrg util_format_compose_swizzles(swizzle_yyyy, state_swizzle, swizzle); 40017ec681f3Smrg break; 40027ec681f3Smrg case PIPE_FORMAT_X24S8_UINT: 40037ec681f3Smrg /* 40047ec681f3Smrg * X24S8 is implemented as an 8_8_8_8 data format, to 40057ec681f3Smrg * fix texture gathers. This affects at least 40067ec681f3Smrg * GL45-CTS.texture_cube_map_array.sampling on GFX8. 40077ec681f3Smrg */ 40087ec681f3Smrg if (screen->info.chip_class <= GFX8) 40097ec681f3Smrg util_format_compose_swizzles(swizzle_wwww, state_swizzle, swizzle); 40107ec681f3Smrg else 40117ec681f3Smrg util_format_compose_swizzles(swizzle_yyyy, state_swizzle, swizzle); 40127ec681f3Smrg break; 40137ec681f3Smrg default: 40147ec681f3Smrg util_format_compose_swizzles(swizzle_xxxx, state_swizzle, swizzle); 40157ec681f3Smrg } 40167ec681f3Smrg } else { 40177ec681f3Smrg util_format_compose_swizzles(desc->swizzle, state_swizzle, swizzle); 40187ec681f3Smrg } 40197ec681f3Smrg 40207ec681f3Smrg first_non_void = util_format_get_first_non_void_channel(pipe_format); 40217ec681f3Smrg 40227ec681f3Smrg switch (pipe_format) { 40237ec681f3Smrg case PIPE_FORMAT_S8_UINT_Z24_UNORM: 40247ec681f3Smrg num_format = V_008F14_IMG_NUM_FORMAT_UNORM; 40257ec681f3Smrg break; 40267ec681f3Smrg default: 40277ec681f3Smrg if (first_non_void < 0) { 40287ec681f3Smrg if (util_format_is_compressed(pipe_format)) { 40297ec681f3Smrg switch (pipe_format) { 40307ec681f3Smrg case PIPE_FORMAT_DXT1_SRGB: 40317ec681f3Smrg case PIPE_FORMAT_DXT1_SRGBA: 40327ec681f3Smrg case PIPE_FORMAT_DXT3_SRGBA: 40337ec681f3Smrg case PIPE_FORMAT_DXT5_SRGBA: 40347ec681f3Smrg case PIPE_FORMAT_BPTC_SRGBA: 40357ec681f3Smrg case PIPE_FORMAT_ETC2_SRGB8: 40367ec681f3Smrg case PIPE_FORMAT_ETC2_SRGB8A1: 40377ec681f3Smrg case PIPE_FORMAT_ETC2_SRGBA8: 40387ec681f3Smrg num_format = V_008F14_IMG_NUM_FORMAT_SRGB; 40397ec681f3Smrg break; 40407ec681f3Smrg case PIPE_FORMAT_RGTC1_SNORM: 40417ec681f3Smrg case PIPE_FORMAT_LATC1_SNORM: 40427ec681f3Smrg case PIPE_FORMAT_RGTC2_SNORM: 40437ec681f3Smrg case PIPE_FORMAT_LATC2_SNORM: 40447ec681f3Smrg case PIPE_FORMAT_ETC2_R11_SNORM: 40457ec681f3Smrg case PIPE_FORMAT_ETC2_RG11_SNORM: 40467ec681f3Smrg /* implies float, so use SNORM/UNORM to determine 40477ec681f3Smrg whether data is signed or not */ 40487ec681f3Smrg case PIPE_FORMAT_BPTC_RGB_FLOAT: 40497ec681f3Smrg num_format = V_008F14_IMG_NUM_FORMAT_SNORM; 40507ec681f3Smrg break; 40517ec681f3Smrg default: 40527ec681f3Smrg num_format = V_008F14_IMG_NUM_FORMAT_UNORM; 40537ec681f3Smrg break; 40547ec681f3Smrg } 40557ec681f3Smrg } else if (desc->layout == UTIL_FORMAT_LAYOUT_SUBSAMPLED) { 40567ec681f3Smrg num_format = V_008F14_IMG_NUM_FORMAT_UNORM; 40577ec681f3Smrg } else { 40587ec681f3Smrg num_format = V_008F14_IMG_NUM_FORMAT_FLOAT; 40597ec681f3Smrg } 40607ec681f3Smrg } else if (desc->colorspace == UTIL_FORMAT_COLORSPACE_SRGB) { 40617ec681f3Smrg num_format = V_008F14_IMG_NUM_FORMAT_SRGB; 40627ec681f3Smrg } else { 40637ec681f3Smrg num_format = V_008F14_IMG_NUM_FORMAT_UNORM; 40647ec681f3Smrg 40657ec681f3Smrg switch (desc->channel[first_non_void].type) { 40667ec681f3Smrg case UTIL_FORMAT_TYPE_FLOAT: 40677ec681f3Smrg num_format = V_008F14_IMG_NUM_FORMAT_FLOAT; 40687ec681f3Smrg break; 40697ec681f3Smrg case UTIL_FORMAT_TYPE_SIGNED: 40707ec681f3Smrg if (desc->channel[first_non_void].normalized) 40717ec681f3Smrg num_format = V_008F14_IMG_NUM_FORMAT_SNORM; 40727ec681f3Smrg else if (desc->channel[first_non_void].pure_integer) 40737ec681f3Smrg num_format = V_008F14_IMG_NUM_FORMAT_SINT; 40747ec681f3Smrg else 40757ec681f3Smrg num_format = V_008F14_IMG_NUM_FORMAT_SSCALED; 40767ec681f3Smrg break; 40777ec681f3Smrg case UTIL_FORMAT_TYPE_UNSIGNED: 40787ec681f3Smrg if (desc->channel[first_non_void].normalized) 40797ec681f3Smrg num_format = V_008F14_IMG_NUM_FORMAT_UNORM; 40807ec681f3Smrg else if (desc->channel[first_non_void].pure_integer) 40817ec681f3Smrg num_format = V_008F14_IMG_NUM_FORMAT_UINT; 40827ec681f3Smrg else 40837ec681f3Smrg num_format = V_008F14_IMG_NUM_FORMAT_USCALED; 40847ec681f3Smrg } 40857ec681f3Smrg } 40867ec681f3Smrg } 40877ec681f3Smrg 40887ec681f3Smrg data_format = si_translate_texformat(&screen->b, pipe_format, desc, first_non_void); 40897ec681f3Smrg if (data_format == ~0) { 40907ec681f3Smrg data_format = 0; 40917ec681f3Smrg } 40927ec681f3Smrg 40937ec681f3Smrg /* S8 with Z32 HTILE needs a special format. */ 40947ec681f3Smrg if (screen->info.chip_class == GFX9 && pipe_format == PIPE_FORMAT_S8_UINT) 40957ec681f3Smrg data_format = V_008F14_IMG_DATA_FORMAT_S8_32; 40967ec681f3Smrg 40977ec681f3Smrg if (!sampler && (res->target == PIPE_TEXTURE_CUBE || res->target == PIPE_TEXTURE_CUBE_ARRAY || 40987ec681f3Smrg (screen->info.chip_class <= GFX8 && res->target == PIPE_TEXTURE_3D))) { 40997ec681f3Smrg /* For the purpose of shader images, treat cube maps and 3D 41007ec681f3Smrg * textures as 2D arrays. For 3D textures, the address 41017ec681f3Smrg * calculations for mipmaps are different, so we rely on the 41027ec681f3Smrg * caller to effectively disable mipmaps. 41037ec681f3Smrg */ 41047ec681f3Smrg type = V_008F1C_SQ_RSRC_IMG_2D_ARRAY; 41057ec681f3Smrg 41067ec681f3Smrg assert(res->target != PIPE_TEXTURE_3D || (first_level == 0 && last_level == 0)); 41077ec681f3Smrg } else { 41087ec681f3Smrg type = si_tex_dim(screen, tex, target, num_samples); 41097ec681f3Smrg } 41107ec681f3Smrg 41117ec681f3Smrg if (type == V_008F1C_SQ_RSRC_IMG_1D_ARRAY) { 41127ec681f3Smrg height = 1; 41137ec681f3Smrg depth = res->array_size; 41147ec681f3Smrg } else if (type == V_008F1C_SQ_RSRC_IMG_2D_ARRAY || type == V_008F1C_SQ_RSRC_IMG_2D_MSAA_ARRAY) { 41157ec681f3Smrg if (sampler || res->target != PIPE_TEXTURE_3D) 41167ec681f3Smrg depth = res->array_size; 41177ec681f3Smrg } else if (type == V_008F1C_SQ_RSRC_IMG_CUBE) 41187ec681f3Smrg depth = res->array_size / 6; 41197ec681f3Smrg 41207ec681f3Smrg state[0] = 0; 41217ec681f3Smrg state[1] = (S_008F14_DATA_FORMAT(data_format) | S_008F14_NUM_FORMAT(num_format)); 41227ec681f3Smrg state[2] = (S_008F18_WIDTH(width - 1) | S_008F18_HEIGHT(height - 1) | S_008F18_PERF_MOD(4)); 41237ec681f3Smrg state[3] = (S_008F1C_DST_SEL_X(si_map_swizzle(swizzle[0])) | 41247ec681f3Smrg S_008F1C_DST_SEL_Y(si_map_swizzle(swizzle[1])) | 41257ec681f3Smrg S_008F1C_DST_SEL_Z(si_map_swizzle(swizzle[2])) | 41267ec681f3Smrg S_008F1C_DST_SEL_W(si_map_swizzle(swizzle[3])) | 41277ec681f3Smrg S_008F1C_BASE_LEVEL(num_samples > 1 ? 0 : first_level) | 41287ec681f3Smrg S_008F1C_LAST_LEVEL(num_samples > 1 ? util_logbase2(num_samples) : last_level) | 41297ec681f3Smrg S_008F1C_TYPE(type)); 41307ec681f3Smrg state[4] = 0; 41317ec681f3Smrg state[5] = S_008F24_BASE_ARRAY(first_layer); 41327ec681f3Smrg state[6] = 0; 41337ec681f3Smrg state[7] = 0; 41347ec681f3Smrg 41357ec681f3Smrg if (screen->info.chip_class == GFX9) { 41367ec681f3Smrg unsigned bc_swizzle = gfx9_border_color_swizzle(desc->swizzle); 41377ec681f3Smrg 41387ec681f3Smrg /* Depth is the the last accessible layer on Gfx9. 41397ec681f3Smrg * The hw doesn't need to know the total number of layers. 41407ec681f3Smrg */ 41417ec681f3Smrg if (type == V_008F1C_SQ_RSRC_IMG_3D) 41427ec681f3Smrg state[4] |= S_008F20_DEPTH(depth - 1); 41437ec681f3Smrg else 41447ec681f3Smrg state[4] |= S_008F20_DEPTH(last_layer); 41457ec681f3Smrg 41467ec681f3Smrg state[4] |= S_008F20_BC_SWIZZLE(bc_swizzle); 41477ec681f3Smrg state[5] |= S_008F24_MAX_MIP(num_samples > 1 ? util_logbase2(num_samples) 41487ec681f3Smrg : tex->buffer.b.b.last_level); 41497ec681f3Smrg } else { 41507ec681f3Smrg state[3] |= S_008F1C_POW2_PAD(res->last_level > 0); 41517ec681f3Smrg state[4] |= S_008F20_DEPTH(depth - 1); 41527ec681f3Smrg state[5] |= S_008F24_LAST_ARRAY(last_layer); 41537ec681f3Smrg } 41547ec681f3Smrg 41557ec681f3Smrg if (vi_dcc_enabled(tex, first_level)) { 41567ec681f3Smrg state[6] = S_008F28_ALPHA_IS_ON_MSB(vi_alpha_is_on_msb(screen, pipe_format)); 41577ec681f3Smrg } else { 41587ec681f3Smrg /* The last dword is unused by hw. The shader uses it to clear 41597ec681f3Smrg * bits in the first dword of sampler state. 41607ec681f3Smrg */ 41617ec681f3Smrg if (screen->info.chip_class <= GFX7 && res->nr_samples <= 1) { 41627ec681f3Smrg if (first_level == last_level) 41637ec681f3Smrg state[7] = C_008F30_MAX_ANISO_RATIO; 41647ec681f3Smrg else 41657ec681f3Smrg state[7] = 0xffffffff; 41667ec681f3Smrg } 41677ec681f3Smrg } 41687ec681f3Smrg 41697ec681f3Smrg /* Initialize the sampler view for FMASK. */ 41707ec681f3Smrg if (tex->surface.fmask_offset) { 41717ec681f3Smrg uint32_t data_format, num_format; 41727ec681f3Smrg 41737ec681f3Smrg va = tex->buffer.gpu_address + tex->surface.fmask_offset; 41747ec681f3Smrg 41757ec681f3Smrg#define FMASK(s, f) (((unsigned)(MAX2(1, s)) * 16) + (MAX2(1, f))) 41767ec681f3Smrg if (screen->info.chip_class == GFX9) { 41777ec681f3Smrg data_format = V_008F14_IMG_DATA_FORMAT_FMASK; 41787ec681f3Smrg switch (FMASK(res->nr_samples, res->nr_storage_samples)) { 41797ec681f3Smrg case FMASK(2, 1): 41807ec681f3Smrg num_format = V_008F14_IMG_NUM_FORMAT_FMASK_8_2_1; 41817ec681f3Smrg break; 41827ec681f3Smrg case FMASK(2, 2): 41837ec681f3Smrg num_format = V_008F14_IMG_NUM_FORMAT_FMASK_8_2_2; 41847ec681f3Smrg break; 41857ec681f3Smrg case FMASK(4, 1): 41867ec681f3Smrg num_format = V_008F14_IMG_NUM_FORMAT_FMASK_8_4_1; 41877ec681f3Smrg break; 41887ec681f3Smrg case FMASK(4, 2): 41897ec681f3Smrg num_format = V_008F14_IMG_NUM_FORMAT_FMASK_8_4_2; 41907ec681f3Smrg break; 41917ec681f3Smrg case FMASK(4, 4): 41927ec681f3Smrg num_format = V_008F14_IMG_NUM_FORMAT_FMASK_8_4_4; 41937ec681f3Smrg break; 41947ec681f3Smrg case FMASK(8, 1): 41957ec681f3Smrg num_format = V_008F14_IMG_NUM_FORMAT_FMASK_8_8_1; 41967ec681f3Smrg break; 41977ec681f3Smrg case FMASK(8, 2): 41987ec681f3Smrg num_format = V_008F14_IMG_NUM_FORMAT_FMASK_16_8_2; 41997ec681f3Smrg break; 42007ec681f3Smrg case FMASK(8, 4): 42017ec681f3Smrg num_format = V_008F14_IMG_NUM_FORMAT_FMASK_32_8_4; 42027ec681f3Smrg break; 42037ec681f3Smrg case FMASK(8, 8): 42047ec681f3Smrg num_format = V_008F14_IMG_NUM_FORMAT_FMASK_32_8_8; 42057ec681f3Smrg break; 42067ec681f3Smrg case FMASK(16, 1): 42077ec681f3Smrg num_format = V_008F14_IMG_NUM_FORMAT_FMASK_16_16_1; 42087ec681f3Smrg break; 42097ec681f3Smrg case FMASK(16, 2): 42107ec681f3Smrg num_format = V_008F14_IMG_NUM_FORMAT_FMASK_32_16_2; 42117ec681f3Smrg break; 42127ec681f3Smrg case FMASK(16, 4): 42137ec681f3Smrg num_format = V_008F14_IMG_NUM_FORMAT_FMASK_64_16_4; 42147ec681f3Smrg break; 42157ec681f3Smrg case FMASK(16, 8): 42167ec681f3Smrg num_format = V_008F14_IMG_NUM_FORMAT_FMASK_64_16_8; 42177ec681f3Smrg break; 42187ec681f3Smrg default: 42197ec681f3Smrg unreachable("invalid nr_samples"); 42207ec681f3Smrg } 42217ec681f3Smrg } else { 42227ec681f3Smrg switch (FMASK(res->nr_samples, res->nr_storage_samples)) { 42237ec681f3Smrg case FMASK(2, 1): 42247ec681f3Smrg data_format = V_008F14_IMG_DATA_FORMAT_FMASK8_S2_F1; 42257ec681f3Smrg break; 42267ec681f3Smrg case FMASK(2, 2): 42277ec681f3Smrg data_format = V_008F14_IMG_DATA_FORMAT_FMASK8_S2_F2; 42287ec681f3Smrg break; 42297ec681f3Smrg case FMASK(4, 1): 42307ec681f3Smrg data_format = V_008F14_IMG_DATA_FORMAT_FMASK8_S4_F1; 42317ec681f3Smrg break; 42327ec681f3Smrg case FMASK(4, 2): 42337ec681f3Smrg data_format = V_008F14_IMG_DATA_FORMAT_FMASK8_S4_F2; 42347ec681f3Smrg break; 42357ec681f3Smrg case FMASK(4, 4): 42367ec681f3Smrg data_format = V_008F14_IMG_DATA_FORMAT_FMASK8_S4_F4; 42377ec681f3Smrg break; 42387ec681f3Smrg case FMASK(8, 1): 42397ec681f3Smrg data_format = V_008F14_IMG_DATA_FORMAT_FMASK8_S8_F1; 42407ec681f3Smrg break; 42417ec681f3Smrg case FMASK(8, 2): 42427ec681f3Smrg data_format = V_008F14_IMG_DATA_FORMAT_FMASK16_S8_F2; 42437ec681f3Smrg break; 42447ec681f3Smrg case FMASK(8, 4): 42457ec681f3Smrg data_format = V_008F14_IMG_DATA_FORMAT_FMASK32_S8_F4; 42467ec681f3Smrg break; 42477ec681f3Smrg case FMASK(8, 8): 42487ec681f3Smrg data_format = V_008F14_IMG_DATA_FORMAT_FMASK32_S8_F8; 42497ec681f3Smrg break; 42507ec681f3Smrg case FMASK(16, 1): 42517ec681f3Smrg data_format = V_008F14_IMG_DATA_FORMAT_FMASK16_S16_F1; 42527ec681f3Smrg break; 42537ec681f3Smrg case FMASK(16, 2): 42547ec681f3Smrg data_format = V_008F14_IMG_DATA_FORMAT_FMASK32_S16_F2; 42557ec681f3Smrg break; 42567ec681f3Smrg case FMASK(16, 4): 42577ec681f3Smrg data_format = V_008F14_IMG_DATA_FORMAT_FMASK64_S16_F4; 42587ec681f3Smrg break; 42597ec681f3Smrg case FMASK(16, 8): 42607ec681f3Smrg data_format = V_008F14_IMG_DATA_FORMAT_FMASK64_S16_F8; 42617ec681f3Smrg break; 42627ec681f3Smrg default: 42637ec681f3Smrg unreachable("invalid nr_samples"); 42647ec681f3Smrg } 42657ec681f3Smrg num_format = V_008F14_IMG_NUM_FORMAT_UINT; 42667ec681f3Smrg } 426701e04c3fSmrg#undef FMASK 426801e04c3fSmrg 42697ec681f3Smrg fmask_state[0] = (va >> 8) | tex->surface.fmask_tile_swizzle; 42707ec681f3Smrg fmask_state[1] = S_008F14_BASE_ADDRESS_HI(va >> 40) | S_008F14_DATA_FORMAT(data_format) | 42717ec681f3Smrg S_008F14_NUM_FORMAT(num_format); 42727ec681f3Smrg fmask_state[2] = S_008F18_WIDTH(width - 1) | S_008F18_HEIGHT(height - 1); 42737ec681f3Smrg fmask_state[3] = 42747ec681f3Smrg S_008F1C_DST_SEL_X(V_008F1C_SQ_SEL_X) | S_008F1C_DST_SEL_Y(V_008F1C_SQ_SEL_X) | 42757ec681f3Smrg S_008F1C_DST_SEL_Z(V_008F1C_SQ_SEL_X) | S_008F1C_DST_SEL_W(V_008F1C_SQ_SEL_X) | 42767ec681f3Smrg S_008F1C_TYPE(si_tex_dim(screen, tex, target, 0)); 42777ec681f3Smrg fmask_state[4] = 0; 42787ec681f3Smrg fmask_state[5] = S_008F24_BASE_ARRAY(first_layer); 42797ec681f3Smrg fmask_state[6] = 0; 42807ec681f3Smrg fmask_state[7] = 0; 42817ec681f3Smrg 42827ec681f3Smrg if (screen->info.chip_class == GFX9) { 42837ec681f3Smrg fmask_state[3] |= S_008F1C_SW_MODE(tex->surface.u.gfx9.color.fmask_swizzle_mode); 42847ec681f3Smrg fmask_state[4] |= 42857ec681f3Smrg S_008F20_DEPTH(last_layer) | S_008F20_PITCH(tex->surface.u.gfx9.color.fmask_epitch); 42867ec681f3Smrg fmask_state[5] |= S_008F24_META_PIPE_ALIGNED(1) | 42877ec681f3Smrg S_008F24_META_RB_ALIGNED(1); 42887ec681f3Smrg } else { 42897ec681f3Smrg fmask_state[3] |= S_008F1C_TILING_INDEX(tex->surface.u.legacy.color.fmask.tiling_index); 42907ec681f3Smrg fmask_state[4] |= S_008F20_DEPTH(depth - 1) | 42917ec681f3Smrg S_008F20_PITCH(tex->surface.u.legacy.color.fmask.pitch_in_pixels - 1); 42927ec681f3Smrg fmask_state[5] |= S_008F24_LAST_ARRAY(last_layer); 42937ec681f3Smrg } 42947ec681f3Smrg } 429501e04c3fSmrg} 429601e04c3fSmrg 429701e04c3fSmrg/** 429801e04c3fSmrg * Create a sampler view. 429901e04c3fSmrg * 430001e04c3fSmrg * @param ctx context 430101e04c3fSmrg * @param texture texture 430201e04c3fSmrg * @param state sampler view template 430301e04c3fSmrg * @param width0 width0 override (for compressed textures as int) 430401e04c3fSmrg * @param height0 height0 override (for compressed textures as int) 430501e04c3fSmrg * @param force_level set the base address to the level (for compressed textures) 430601e04c3fSmrg */ 43077ec681f3Smrgstruct pipe_sampler_view *si_create_sampler_view_custom(struct pipe_context *ctx, 43087ec681f3Smrg struct pipe_resource *texture, 43097ec681f3Smrg const struct pipe_sampler_view *state, 43107ec681f3Smrg unsigned width0, unsigned height0, 43117ec681f3Smrg unsigned force_level) 431201e04c3fSmrg{ 43137ec681f3Smrg struct si_context *sctx = (struct si_context *)ctx; 43147ec681f3Smrg struct si_sampler_view *view = CALLOC_STRUCT_CL(si_sampler_view); 43157ec681f3Smrg struct si_texture *tex = (struct si_texture *)texture; 43167ec681f3Smrg unsigned base_level, first_level, last_level; 43177ec681f3Smrg unsigned char state_swizzle[4]; 43187ec681f3Smrg unsigned height, depth, width; 43197ec681f3Smrg unsigned last_layer = state->u.tex.last_layer; 43207ec681f3Smrg enum pipe_format pipe_format; 43217ec681f3Smrg const struct legacy_surf_level *surflevel; 43227ec681f3Smrg 43237ec681f3Smrg if (!view) 43247ec681f3Smrg return NULL; 43257ec681f3Smrg 43267ec681f3Smrg /* initialize base object */ 43277ec681f3Smrg view->base = *state; 43287ec681f3Smrg view->base.texture = NULL; 43297ec681f3Smrg view->base.reference.count = 1; 43307ec681f3Smrg view->base.context = ctx; 43317ec681f3Smrg 43327ec681f3Smrg assert(texture); 43337ec681f3Smrg pipe_resource_reference(&view->base.texture, texture); 43347ec681f3Smrg 43357ec681f3Smrg if (state->format == PIPE_FORMAT_X24S8_UINT || state->format == PIPE_FORMAT_S8X24_UINT || 43367ec681f3Smrg state->format == PIPE_FORMAT_X32_S8X24_UINT || state->format == PIPE_FORMAT_S8_UINT) 43377ec681f3Smrg view->is_stencil_sampler = true; 43387ec681f3Smrg 43397ec681f3Smrg /* Buffer resource. */ 43407ec681f3Smrg if (texture->target == PIPE_BUFFER) { 43417ec681f3Smrg si_make_buffer_descriptor(sctx->screen, si_resource(texture), state->format, 43427ec681f3Smrg state->u.buf.offset, state->u.buf.size, view->state); 43437ec681f3Smrg return &view->base; 43447ec681f3Smrg } 43457ec681f3Smrg 43467ec681f3Smrg state_swizzle[0] = state->swizzle_r; 43477ec681f3Smrg state_swizzle[1] = state->swizzle_g; 43487ec681f3Smrg state_swizzle[2] = state->swizzle_b; 43497ec681f3Smrg state_swizzle[3] = state->swizzle_a; 43507ec681f3Smrg 43517ec681f3Smrg base_level = 0; 43527ec681f3Smrg first_level = state->u.tex.first_level; 43537ec681f3Smrg last_level = state->u.tex.last_level; 43547ec681f3Smrg width = width0; 43557ec681f3Smrg height = height0; 43567ec681f3Smrg depth = texture->depth0; 43577ec681f3Smrg 43587ec681f3Smrg if (sctx->chip_class <= GFX8 && force_level) { 43597ec681f3Smrg assert(force_level == first_level && force_level == last_level); 43607ec681f3Smrg base_level = force_level; 43617ec681f3Smrg first_level = 0; 43627ec681f3Smrg last_level = 0; 43637ec681f3Smrg width = u_minify(width, force_level); 43647ec681f3Smrg height = u_minify(height, force_level); 43657ec681f3Smrg depth = u_minify(depth, force_level); 43667ec681f3Smrg } 43677ec681f3Smrg 43687ec681f3Smrg /* This is not needed if gallium frontends set last_layer correctly. */ 43697ec681f3Smrg if (state->target == PIPE_TEXTURE_1D || state->target == PIPE_TEXTURE_2D || 43707ec681f3Smrg state->target == PIPE_TEXTURE_RECT || state->target == PIPE_TEXTURE_CUBE) 43717ec681f3Smrg last_layer = state->u.tex.first_layer; 43727ec681f3Smrg 43737ec681f3Smrg /* Texturing with separate depth and stencil. */ 43747ec681f3Smrg pipe_format = state->format; 43757ec681f3Smrg 43767ec681f3Smrg /* Depth/stencil texturing sometimes needs separate texture. */ 43777ec681f3Smrg if (tex->is_depth && !si_can_sample_zs(tex, view->is_stencil_sampler)) { 43787ec681f3Smrg if (!tex->flushed_depth_texture && !si_init_flushed_depth_texture(ctx, texture)) { 43797ec681f3Smrg pipe_resource_reference(&view->base.texture, NULL); 43807ec681f3Smrg FREE(view); 43817ec681f3Smrg return NULL; 43827ec681f3Smrg } 43837ec681f3Smrg 43847ec681f3Smrg assert(tex->flushed_depth_texture); 43857ec681f3Smrg 43867ec681f3Smrg /* Override format for the case where the flushed texture 43877ec681f3Smrg * contains only Z or only S. 43887ec681f3Smrg */ 43897ec681f3Smrg if (tex->flushed_depth_texture->buffer.b.b.format != tex->buffer.b.b.format) 43907ec681f3Smrg pipe_format = tex->flushed_depth_texture->buffer.b.b.format; 43917ec681f3Smrg 43927ec681f3Smrg tex = tex->flushed_depth_texture; 43937ec681f3Smrg } 43947ec681f3Smrg 43957ec681f3Smrg surflevel = tex->surface.u.legacy.level; 43967ec681f3Smrg 43977ec681f3Smrg if (tex->db_compatible) { 43987ec681f3Smrg if (!view->is_stencil_sampler) 43997ec681f3Smrg pipe_format = tex->db_render_format; 44007ec681f3Smrg 44017ec681f3Smrg switch (pipe_format) { 44027ec681f3Smrg case PIPE_FORMAT_Z32_FLOAT_S8X24_UINT: 44037ec681f3Smrg pipe_format = PIPE_FORMAT_Z32_FLOAT; 44047ec681f3Smrg break; 44057ec681f3Smrg case PIPE_FORMAT_X8Z24_UNORM: 44067ec681f3Smrg case PIPE_FORMAT_S8_UINT_Z24_UNORM: 44077ec681f3Smrg /* Z24 is always stored like this for DB 44087ec681f3Smrg * compatibility. 44097ec681f3Smrg */ 44107ec681f3Smrg pipe_format = PIPE_FORMAT_Z24X8_UNORM; 44117ec681f3Smrg break; 44127ec681f3Smrg case PIPE_FORMAT_X24S8_UINT: 44137ec681f3Smrg case PIPE_FORMAT_S8X24_UINT: 44147ec681f3Smrg case PIPE_FORMAT_X32_S8X24_UINT: 44157ec681f3Smrg pipe_format = PIPE_FORMAT_S8_UINT; 44167ec681f3Smrg surflevel = tex->surface.u.legacy.zs.stencil_level; 44177ec681f3Smrg break; 44187ec681f3Smrg default:; 44197ec681f3Smrg } 44207ec681f3Smrg } 44217ec681f3Smrg 44227ec681f3Smrg view->dcc_incompatible = 44237ec681f3Smrg vi_dcc_formats_are_incompatible(texture, state->u.tex.first_level, state->format); 44247ec681f3Smrg 44257ec681f3Smrg sctx->screen->make_texture_descriptor( 44267ec681f3Smrg sctx->screen, tex, true, state->target, pipe_format, state_swizzle, first_level, last_level, 44277ec681f3Smrg state->u.tex.first_layer, last_layer, width, height, depth, view->state, view->fmask_state); 44287ec681f3Smrg 44297ec681f3Smrg view->base_level_info = &surflevel[base_level]; 44307ec681f3Smrg view->base_level = base_level; 44317ec681f3Smrg view->block_width = util_format_get_blockwidth(pipe_format); 44327ec681f3Smrg return &view->base; 4433af69d88dSmrg} 4434af69d88dSmrg 44357ec681f3Smrgstatic struct pipe_sampler_view *si_create_sampler_view(struct pipe_context *ctx, 44367ec681f3Smrg struct pipe_resource *texture, 44377ec681f3Smrg const struct pipe_sampler_view *state) 443801e04c3fSmrg{ 44397ec681f3Smrg return si_create_sampler_view_custom(ctx, texture, state, texture ? texture->width0 : 0, 44407ec681f3Smrg texture ? texture->height0 : 0, 0); 444101e04c3fSmrg} 444201e04c3fSmrg 44437ec681f3Smrgstatic void si_sampler_view_destroy(struct pipe_context *ctx, struct pipe_sampler_view *state) 4444af69d88dSmrg{ 44457ec681f3Smrg struct si_sampler_view *view = (struct si_sampler_view *)state; 4446af69d88dSmrg 44477ec681f3Smrg pipe_resource_reference(&state->texture, NULL); 44487ec681f3Smrg FREE_CL(view); 4449af69d88dSmrg} 4450af69d88dSmrg 4451af69d88dSmrgstatic bool wrap_mode_uses_border_color(unsigned wrap, bool linear_filter) 4452af69d88dSmrg{ 44537ec681f3Smrg return wrap == PIPE_TEX_WRAP_CLAMP_TO_BORDER || wrap == PIPE_TEX_WRAP_MIRROR_CLAMP_TO_BORDER || 44547ec681f3Smrg (linear_filter && (wrap == PIPE_TEX_WRAP_CLAMP || wrap == PIPE_TEX_WRAP_MIRROR_CLAMP)); 4455af69d88dSmrg} 4456af69d88dSmrg 445701e04c3fSmrgstatic uint32_t si_translate_border_color(struct si_context *sctx, 44587ec681f3Smrg const struct pipe_sampler_state *state, 44597ec681f3Smrg const union pipe_color_union *color, bool is_integer) 4460af69d88dSmrg{ 44617ec681f3Smrg bool linear_filter = state->min_img_filter != PIPE_TEX_FILTER_NEAREST || 44627ec681f3Smrg state->mag_img_filter != PIPE_TEX_FILTER_NEAREST; 44637ec681f3Smrg 44647ec681f3Smrg if (!wrap_mode_uses_border_color(state->wrap_s, linear_filter) && 44657ec681f3Smrg !wrap_mode_uses_border_color(state->wrap_t, linear_filter) && 44667ec681f3Smrg !wrap_mode_uses_border_color(state->wrap_r, linear_filter)) 44677ec681f3Smrg return S_008F3C_BORDER_COLOR_TYPE(V_008F3C_SQ_TEX_BORDER_COLOR_TRANS_BLACK); 44687ec681f3Smrg 44697ec681f3Smrg#define simple_border_types(elt) \ 44707ec681f3Smrg do { \ 44717ec681f3Smrg if (color->elt[0] == 0 && color->elt[1] == 0 && color->elt[2] == 0 && color->elt[3] == 0) \ 44727ec681f3Smrg return S_008F3C_BORDER_COLOR_TYPE(V_008F3C_SQ_TEX_BORDER_COLOR_TRANS_BLACK); \ 44737ec681f3Smrg if (color->elt[0] == 0 && color->elt[1] == 0 && color->elt[2] == 0 && color->elt[3] == 1) \ 44747ec681f3Smrg return S_008F3C_BORDER_COLOR_TYPE(V_008F3C_SQ_TEX_BORDER_COLOR_OPAQUE_BLACK); \ 44757ec681f3Smrg if (color->elt[0] == 1 && color->elt[1] == 1 && color->elt[2] == 1 && color->elt[3] == 1) \ 44767ec681f3Smrg return S_008F3C_BORDER_COLOR_TYPE(V_008F3C_SQ_TEX_BORDER_COLOR_OPAQUE_WHITE); \ 44777ec681f3Smrg } while (false) 44787ec681f3Smrg 44797ec681f3Smrg if (is_integer) 44807ec681f3Smrg simple_border_types(ui); 44817ec681f3Smrg else 44827ec681f3Smrg simple_border_types(f); 448301e04c3fSmrg 448401e04c3fSmrg#undef simple_border_types 448501e04c3fSmrg 44867ec681f3Smrg int i; 44877ec681f3Smrg 44887ec681f3Smrg /* Check if the border has been uploaded already. */ 44897ec681f3Smrg for (i = 0; i < sctx->border_color_count; i++) 44907ec681f3Smrg if (memcmp(&sctx->border_color_table[i], color, sizeof(*color)) == 0) 44917ec681f3Smrg break; 44927ec681f3Smrg 44937ec681f3Smrg if (i >= SI_MAX_BORDER_COLORS) { 44947ec681f3Smrg /* Getting 4096 unique border colors is very unlikely. */ 44957ec681f3Smrg static bool printed; 44967ec681f3Smrg if (!printed) { 44977ec681f3Smrg fprintf(stderr, "radeonsi: The border color table is full. " 44987ec681f3Smrg "Any new border colors will be just black. " 44997ec681f3Smrg "This is a hardware limitation.\n"); 45007ec681f3Smrg printed = true; 45017ec681f3Smrg } 45027ec681f3Smrg return S_008F3C_BORDER_COLOR_TYPE(V_008F3C_SQ_TEX_BORDER_COLOR_TRANS_BLACK); 45037ec681f3Smrg } 45047ec681f3Smrg 45057ec681f3Smrg if (i == sctx->border_color_count) { 45067ec681f3Smrg /* Upload a new border color. */ 45077ec681f3Smrg memcpy(&sctx->border_color_table[i], color, sizeof(*color)); 45087ec681f3Smrg util_memcpy_cpu_to_le32(&sctx->border_color_map[i], color, sizeof(*color)); 45097ec681f3Smrg sctx->border_color_count++; 45107ec681f3Smrg } 45117ec681f3Smrg 45127ec681f3Smrg return S_008F3C_BORDER_COLOR_PTR(i) | 45137ec681f3Smrg S_008F3C_BORDER_COLOR_TYPE(V_008F3C_SQ_TEX_BORDER_COLOR_REGISTER); 451401e04c3fSmrg} 451501e04c3fSmrg 451601e04c3fSmrgstatic inline int S_FIXED(float value, unsigned frac_bits) 451701e04c3fSmrg{ 45187ec681f3Smrg return value * (1 << frac_bits); 451901e04c3fSmrg} 452001e04c3fSmrg 452101e04c3fSmrgstatic inline unsigned si_tex_filter(unsigned filter, unsigned max_aniso) 452201e04c3fSmrg{ 45237ec681f3Smrg if (filter == PIPE_TEX_FILTER_LINEAR) 45247ec681f3Smrg return max_aniso > 1 ? V_008F38_SQ_TEX_XY_FILTER_ANISO_BILINEAR 45257ec681f3Smrg : V_008F38_SQ_TEX_XY_FILTER_BILINEAR; 45267ec681f3Smrg else 45277ec681f3Smrg return max_aniso > 1 ? V_008F38_SQ_TEX_XY_FILTER_ANISO_POINT 45287ec681f3Smrg : V_008F38_SQ_TEX_XY_FILTER_POINT; 452901e04c3fSmrg} 453001e04c3fSmrg 453101e04c3fSmrgstatic inline unsigned si_tex_aniso_filter(unsigned filter) 453201e04c3fSmrg{ 45337ec681f3Smrg if (filter < 2) 45347ec681f3Smrg return 0; 45357ec681f3Smrg if (filter < 4) 45367ec681f3Smrg return 1; 45377ec681f3Smrg if (filter < 8) 45387ec681f3Smrg return 2; 45397ec681f3Smrg if (filter < 16) 45407ec681f3Smrg return 3; 45417ec681f3Smrg return 4; 4542af69d88dSmrg} 4543af69d88dSmrg 4544af69d88dSmrgstatic void *si_create_sampler_state(struct pipe_context *ctx, 45457ec681f3Smrg const struct pipe_sampler_state *state) 4546af69d88dSmrg{ 45477ec681f3Smrg struct si_context *sctx = (struct si_context *)ctx; 45487ec681f3Smrg struct si_screen *sscreen = sctx->screen; 45497ec681f3Smrg struct si_sampler_state *rstate = CALLOC_STRUCT(si_sampler_state); 45507ec681f3Smrg unsigned max_aniso = sscreen->force_aniso >= 0 ? sscreen->force_aniso : state->max_anisotropy; 45517ec681f3Smrg unsigned max_aniso_ratio = si_tex_aniso_filter(max_aniso); 45527ec681f3Smrg bool trunc_coord = state->min_img_filter == PIPE_TEX_FILTER_NEAREST && 45537ec681f3Smrg state->mag_img_filter == PIPE_TEX_FILTER_NEAREST && 45547ec681f3Smrg state->compare_mode == PIPE_TEX_COMPARE_NONE; 45557ec681f3Smrg union pipe_color_union clamped_border_color; 45567ec681f3Smrg 45577ec681f3Smrg if (!rstate) { 45587ec681f3Smrg return NULL; 45597ec681f3Smrg } 45607ec681f3Smrg 45617ec681f3Smrg /* Validate inputs. */ 45627ec681f3Smrg if (!is_wrap_mode_legal(sscreen, state->wrap_s) || 45637ec681f3Smrg !is_wrap_mode_legal(sscreen, state->wrap_t) || 45647ec681f3Smrg !is_wrap_mode_legal(sscreen, state->wrap_r) || 45657ec681f3Smrg (!sscreen->info.has_3d_cube_border_color_mipmap && 45667ec681f3Smrg (state->min_mip_filter != PIPE_TEX_MIPFILTER_NONE || 45677ec681f3Smrg state->max_anisotropy > 0))) { 45687ec681f3Smrg assert(0); 45697ec681f3Smrg return NULL; 45707ec681f3Smrg } 45717ec681f3Smrg 45727ec681f3Smrg#ifndef NDEBUG 45737ec681f3Smrg rstate->magic = SI_SAMPLER_STATE_MAGIC; 457401e04c3fSmrg#endif 45757ec681f3Smrg rstate->val[0] = 45767ec681f3Smrg (S_008F30_CLAMP_X(si_tex_wrap(state->wrap_s)) | S_008F30_CLAMP_Y(si_tex_wrap(state->wrap_t)) | 45777ec681f3Smrg S_008F30_CLAMP_Z(si_tex_wrap(state->wrap_r)) | S_008F30_MAX_ANISO_RATIO(max_aniso_ratio) | 45787ec681f3Smrg S_008F30_DEPTH_COMPARE_FUNC(si_tex_compare(state->compare_func)) | 45797ec681f3Smrg S_008F30_FORCE_UNNORMALIZED(!state->normalized_coords) | 45807ec681f3Smrg S_008F30_ANISO_THRESHOLD(max_aniso_ratio >> 1) | S_008F30_ANISO_BIAS(max_aniso_ratio) | 45817ec681f3Smrg S_008F30_DISABLE_CUBE_WRAP(!state->seamless_cube_map) | 45827ec681f3Smrg S_008F30_TRUNC_COORD(trunc_coord) | 45837ec681f3Smrg S_008F30_COMPAT_MODE(sctx->chip_class == GFX8 || sctx->chip_class == GFX9)); 45847ec681f3Smrg rstate->val[1] = (S_008F34_MIN_LOD(S_FIXED(CLAMP(state->min_lod, 0, 15), 8)) | 45857ec681f3Smrg S_008F34_MAX_LOD(S_FIXED(CLAMP(state->max_lod, 0, 15), 8)) | 45867ec681f3Smrg S_008F34_PERF_MIP(max_aniso_ratio ? max_aniso_ratio + 6 : 0)); 45877ec681f3Smrg rstate->val[2] = (S_008F38_LOD_BIAS(S_FIXED(CLAMP(state->lod_bias, -16, 16), 8)) | 45887ec681f3Smrg S_008F38_XY_MAG_FILTER(si_tex_filter(state->mag_img_filter, max_aniso)) | 45897ec681f3Smrg S_008F38_XY_MIN_FILTER(si_tex_filter(state->min_img_filter, max_aniso)) | 45907ec681f3Smrg S_008F38_MIP_FILTER(si_tex_mipfilter(state->min_mip_filter)) | 45917ec681f3Smrg S_008F38_MIP_POINT_PRECLAMP(0)); 45927ec681f3Smrg rstate->val[3] = si_translate_border_color(sctx, state, &state->border_color, 45937ec681f3Smrg state->border_color_is_integer); 45947ec681f3Smrg 45957ec681f3Smrg if (sscreen->info.chip_class >= GFX10) { 45967ec681f3Smrg rstate->val[2] |= S_008F38_ANISO_OVERRIDE_GFX10(1); 45977ec681f3Smrg } else { 45987ec681f3Smrg rstate->val[2] |= S_008F38_DISABLE_LSB_CEIL(sctx->chip_class <= GFX8) | 45997ec681f3Smrg S_008F38_FILTER_PREC_FIX(1) | 46007ec681f3Smrg S_008F38_ANISO_OVERRIDE_GFX8(sctx->chip_class >= GFX8); 46017ec681f3Smrg } 46027ec681f3Smrg 46037ec681f3Smrg /* Create sampler resource for upgraded depth textures. */ 46047ec681f3Smrg memcpy(rstate->upgraded_depth_val, rstate->val, sizeof(rstate->val)); 46057ec681f3Smrg 46067ec681f3Smrg for (unsigned i = 0; i < 4; ++i) { 46077ec681f3Smrg /* Use channel 0 on purpose, so that we can use OPAQUE_WHITE 46087ec681f3Smrg * when the border color is 1.0. */ 46097ec681f3Smrg clamped_border_color.f[i] = CLAMP(state->border_color.f[0], 0, 1); 46107ec681f3Smrg } 46117ec681f3Smrg 46127ec681f3Smrg if (memcmp(&state->border_color, &clamped_border_color, sizeof(clamped_border_color)) == 0) { 46137ec681f3Smrg if (sscreen->info.chip_class <= GFX9) 46147ec681f3Smrg rstate->upgraded_depth_val[3] |= S_008F3C_UPGRADED_DEPTH(1); 46157ec681f3Smrg } else { 46167ec681f3Smrg rstate->upgraded_depth_val[3] = 46177ec681f3Smrg si_translate_border_color(sctx, state, &clamped_border_color, false); 46187ec681f3Smrg } 46197ec681f3Smrg 46207ec681f3Smrg return rstate; 4621af69d88dSmrg} 4622af69d88dSmrg 462301e04c3fSmrgstatic void si_set_sample_mask(struct pipe_context *ctx, unsigned sample_mask) 4624af69d88dSmrg{ 46257ec681f3Smrg struct si_context *sctx = (struct si_context *)ctx; 4626af69d88dSmrg 46277ec681f3Smrg if (sctx->sample_mask == (uint16_t)sample_mask) 46287ec681f3Smrg return; 4629af69d88dSmrg 46307ec681f3Smrg sctx->sample_mask = sample_mask; 46317ec681f3Smrg si_mark_atom_dirty(sctx, &sctx->atoms.s.sample_mask); 4632af69d88dSmrg} 4633af69d88dSmrg 463401e04c3fSmrgstatic void si_emit_sample_mask(struct si_context *sctx) 4635af69d88dSmrg{ 46367ec681f3Smrg struct radeon_cmdbuf *cs = &sctx->gfx_cs; 46377ec681f3Smrg unsigned mask = sctx->sample_mask; 46387ec681f3Smrg 46397ec681f3Smrg /* Needed for line and polygon smoothing as well as for the Polaris 46407ec681f3Smrg * small primitive filter. We expect the gallium frontend to take care of 46417ec681f3Smrg * this for us. 46427ec681f3Smrg */ 46437ec681f3Smrg assert(mask == 0xffff || sctx->framebuffer.nr_samples > 1 || 46447ec681f3Smrg (mask & 1 && sctx->blitter_running)); 46457ec681f3Smrg 46467ec681f3Smrg radeon_begin(cs); 46477ec681f3Smrg radeon_set_context_reg_seq(R_028C38_PA_SC_AA_MASK_X0Y0_X1Y0, 2); 46487ec681f3Smrg radeon_emit(mask | (mask << 16)); 46497ec681f3Smrg radeon_emit(mask | (mask << 16)); 46507ec681f3Smrg radeon_end(); 4651af69d88dSmrg} 4652af69d88dSmrg 4653af69d88dSmrgstatic void si_delete_sampler_state(struct pipe_context *ctx, void *state) 4654af69d88dSmrg{ 46557ec681f3Smrg#ifndef NDEBUG 46567ec681f3Smrg struct si_sampler_state *s = state; 465701e04c3fSmrg 46587ec681f3Smrg assert(s->magic == SI_SAMPLER_STATE_MAGIC); 46597ec681f3Smrg s->magic = 0; 466001e04c3fSmrg#endif 46617ec681f3Smrg free(state); 4662af69d88dSmrg} 4663af69d88dSmrg 4664af69d88dSmrg/* 4665af69d88dSmrg * Vertex elements & buffers 4666af69d88dSmrg */ 4667af69d88dSmrg 46687ec681f3Smrgstruct si_fast_udiv_info32 si_compute_fast_udiv_info32(uint32_t D, unsigned num_bits) 466901e04c3fSmrg{ 46707ec681f3Smrg struct util_fast_udiv_info info = util_compute_fast_udiv_info(D, num_bits, 32); 46717ec681f3Smrg 46727ec681f3Smrg struct si_fast_udiv_info32 result = { 46737ec681f3Smrg info.multiplier, 46747ec681f3Smrg info.pre_shift, 46757ec681f3Smrg info.post_shift, 46767ec681f3Smrg info.increment, 46777ec681f3Smrg }; 46787ec681f3Smrg return result; 467901e04c3fSmrg} 468001e04c3fSmrg 46817ec681f3Smrgstatic void *si_create_vertex_elements(struct pipe_context *ctx, unsigned count, 46827ec681f3Smrg const struct pipe_vertex_element *elements) 4683af69d88dSmrg{ 46847ec681f3Smrg struct si_screen *sscreen = (struct si_screen *)ctx->screen; 46857ec681f3Smrg struct si_vertex_elements *v = CALLOC_STRUCT(si_vertex_elements); 46867ec681f3Smrg bool used[SI_NUM_VERTEX_BUFFERS] = {}; 46877ec681f3Smrg struct si_fast_udiv_info32 divisor_factors[SI_MAX_ATTRIBS] = {}; 46887ec681f3Smrg STATIC_ASSERT(sizeof(struct si_fast_udiv_info32) == 16); 46897ec681f3Smrg STATIC_ASSERT(sizeof(divisor_factors[0].multiplier) == 4); 46907ec681f3Smrg STATIC_ASSERT(sizeof(divisor_factors[0].pre_shift) == 4); 46917ec681f3Smrg STATIC_ASSERT(sizeof(divisor_factors[0].post_shift) == 4); 46927ec681f3Smrg STATIC_ASSERT(sizeof(divisor_factors[0].increment) == 4); 46937ec681f3Smrg int i; 46947ec681f3Smrg 46957ec681f3Smrg assert(count <= SI_MAX_ATTRIBS); 46967ec681f3Smrg if (!v) 46977ec681f3Smrg return NULL; 46987ec681f3Smrg 46997ec681f3Smrg v->count = count; 47007ec681f3Smrg 47017ec681f3Smrg unsigned num_vbos_in_user_sgprs = si_num_vbos_in_user_sgprs(sscreen); 47027ec681f3Smrg unsigned alloc_count = 47037ec681f3Smrg count > num_vbos_in_user_sgprs ? count - num_vbos_in_user_sgprs : 0; 47047ec681f3Smrg v->vb_desc_list_alloc_size = align(alloc_count * 16, SI_CPDMA_ALIGNMENT); 47057ec681f3Smrg 47067ec681f3Smrg for (i = 0; i < count; ++i) { 47077ec681f3Smrg const struct util_format_description *desc; 47087ec681f3Smrg const struct util_format_channel_description *channel; 47097ec681f3Smrg int first_non_void; 47107ec681f3Smrg unsigned vbo_index = elements[i].vertex_buffer_index; 47117ec681f3Smrg 47127ec681f3Smrg if (vbo_index >= SI_NUM_VERTEX_BUFFERS) { 47137ec681f3Smrg FREE(v); 47147ec681f3Smrg return NULL; 47157ec681f3Smrg } 47167ec681f3Smrg 47177ec681f3Smrg unsigned instance_divisor = elements[i].instance_divisor; 47187ec681f3Smrg if (instance_divisor) { 47197ec681f3Smrg if (instance_divisor == 1) { 47207ec681f3Smrg v->instance_divisor_is_one |= 1u << i; 47217ec681f3Smrg } else { 47227ec681f3Smrg v->instance_divisor_is_fetched |= 1u << i; 47237ec681f3Smrg divisor_factors[i] = si_compute_fast_udiv_info32(instance_divisor, 32); 47247ec681f3Smrg } 47257ec681f3Smrg } 47267ec681f3Smrg 47277ec681f3Smrg if (!used[vbo_index]) { 47287ec681f3Smrg v->first_vb_use_mask |= 1 << i; 47297ec681f3Smrg used[vbo_index] = true; 47307ec681f3Smrg } 47317ec681f3Smrg 47327ec681f3Smrg desc = util_format_description(elements[i].src_format); 47337ec681f3Smrg first_non_void = util_format_get_first_non_void_channel(elements[i].src_format); 47347ec681f3Smrg channel = first_non_void >= 0 ? &desc->channel[first_non_void] : NULL; 47357ec681f3Smrg 47367ec681f3Smrg v->format_size[i] = desc->block.bits / 8; 47377ec681f3Smrg v->src_offset[i] = elements[i].src_offset; 47387ec681f3Smrg v->vertex_buffer_index[i] = vbo_index; 47397ec681f3Smrg 47407ec681f3Smrg bool always_fix = false; 47417ec681f3Smrg union si_vs_fix_fetch fix_fetch; 47427ec681f3Smrg unsigned log_hw_load_size; /* the load element size as seen by the hardware */ 47437ec681f3Smrg 47447ec681f3Smrg fix_fetch.bits = 0; 47457ec681f3Smrg log_hw_load_size = MIN2(2, util_logbase2(desc->block.bits) - 3); 47467ec681f3Smrg 47477ec681f3Smrg if (channel) { 47487ec681f3Smrg switch (channel->type) { 47497ec681f3Smrg case UTIL_FORMAT_TYPE_FLOAT: 47507ec681f3Smrg fix_fetch.u.format = AC_FETCH_FORMAT_FLOAT; 47517ec681f3Smrg break; 47527ec681f3Smrg case UTIL_FORMAT_TYPE_FIXED: 47537ec681f3Smrg fix_fetch.u.format = AC_FETCH_FORMAT_FIXED; 47547ec681f3Smrg break; 47557ec681f3Smrg case UTIL_FORMAT_TYPE_SIGNED: { 47567ec681f3Smrg if (channel->pure_integer) 47577ec681f3Smrg fix_fetch.u.format = AC_FETCH_FORMAT_SINT; 47587ec681f3Smrg else if (channel->normalized) 47597ec681f3Smrg fix_fetch.u.format = AC_FETCH_FORMAT_SNORM; 47607ec681f3Smrg else 47617ec681f3Smrg fix_fetch.u.format = AC_FETCH_FORMAT_SSCALED; 47627ec681f3Smrg break; 47637ec681f3Smrg } 47647ec681f3Smrg case UTIL_FORMAT_TYPE_UNSIGNED: { 47657ec681f3Smrg if (channel->pure_integer) 47667ec681f3Smrg fix_fetch.u.format = AC_FETCH_FORMAT_UINT; 47677ec681f3Smrg else if (channel->normalized) 47687ec681f3Smrg fix_fetch.u.format = AC_FETCH_FORMAT_UNORM; 47697ec681f3Smrg else 47707ec681f3Smrg fix_fetch.u.format = AC_FETCH_FORMAT_USCALED; 47717ec681f3Smrg break; 47727ec681f3Smrg } 47737ec681f3Smrg default: 47747ec681f3Smrg unreachable("bad format type"); 47757ec681f3Smrg } 47767ec681f3Smrg } else { 47777ec681f3Smrg switch (elements[i].src_format) { 47787ec681f3Smrg case PIPE_FORMAT_R11G11B10_FLOAT: 47797ec681f3Smrg fix_fetch.u.format = AC_FETCH_FORMAT_FLOAT; 47807ec681f3Smrg break; 47817ec681f3Smrg default: 47827ec681f3Smrg unreachable("bad other format"); 47837ec681f3Smrg } 47847ec681f3Smrg } 47857ec681f3Smrg 47867ec681f3Smrg if (desc->channel[0].size == 10) { 47877ec681f3Smrg fix_fetch.u.log_size = 3; /* special encoding for 2_10_10_10 */ 47887ec681f3Smrg log_hw_load_size = 2; 47897ec681f3Smrg 47907ec681f3Smrg /* The hardware always treats the 2-bit alpha channel as 47917ec681f3Smrg * unsigned, so a shader workaround is needed. The affected 47927ec681f3Smrg * chips are GFX8 and older except Stoney (GFX8.1). 47937ec681f3Smrg */ 47947ec681f3Smrg always_fix = sscreen->info.chip_class <= GFX8 && sscreen->info.family != CHIP_STONEY && 47957ec681f3Smrg channel->type == UTIL_FORMAT_TYPE_SIGNED; 47967ec681f3Smrg } else if (elements[i].src_format == PIPE_FORMAT_R11G11B10_FLOAT) { 47977ec681f3Smrg fix_fetch.u.log_size = 3; /* special encoding */ 47987ec681f3Smrg fix_fetch.u.format = AC_FETCH_FORMAT_FIXED; 47997ec681f3Smrg log_hw_load_size = 2; 48007ec681f3Smrg } else { 48017ec681f3Smrg fix_fetch.u.log_size = util_logbase2(channel->size) - 3; 48027ec681f3Smrg fix_fetch.u.num_channels_m1 = desc->nr_channels - 1; 48037ec681f3Smrg 48047ec681f3Smrg /* Always fix up: 48057ec681f3Smrg * - doubles (multiple loads + truncate to float) 48067ec681f3Smrg * - 32-bit requiring a conversion 48077ec681f3Smrg */ 48087ec681f3Smrg always_fix = (fix_fetch.u.log_size == 3) || 48097ec681f3Smrg (fix_fetch.u.log_size == 2 && fix_fetch.u.format != AC_FETCH_FORMAT_FLOAT && 48107ec681f3Smrg fix_fetch.u.format != AC_FETCH_FORMAT_UINT && 48117ec681f3Smrg fix_fetch.u.format != AC_FETCH_FORMAT_SINT); 48127ec681f3Smrg 48137ec681f3Smrg /* Also fixup 8_8_8 and 16_16_16. */ 48147ec681f3Smrg if (desc->nr_channels == 3 && fix_fetch.u.log_size <= 1) { 48157ec681f3Smrg always_fix = true; 48167ec681f3Smrg log_hw_load_size = fix_fetch.u.log_size; 48177ec681f3Smrg } 48187ec681f3Smrg } 48197ec681f3Smrg 48207ec681f3Smrg if (desc->swizzle[0] != PIPE_SWIZZLE_X) { 48217ec681f3Smrg assert(desc->swizzle[0] == PIPE_SWIZZLE_Z && 48227ec681f3Smrg (desc->swizzle[2] == PIPE_SWIZZLE_X || desc->swizzle[2] == PIPE_SWIZZLE_0)); 48237ec681f3Smrg fix_fetch.u.reverse = 1; 48247ec681f3Smrg } 48257ec681f3Smrg 48267ec681f3Smrg /* Force the workaround for unaligned access here already if the 48277ec681f3Smrg * offset relative to the vertex buffer base is unaligned. 48287ec681f3Smrg * 48297ec681f3Smrg * There is a theoretical case in which this is too conservative: 48307ec681f3Smrg * if the vertex buffer's offset is also unaligned in just the 48317ec681f3Smrg * right way, we end up with an aligned address after all. 48327ec681f3Smrg * However, this case should be extremely rare in practice (it 48337ec681f3Smrg * won't happen in well-behaved applications), and taking it 48347ec681f3Smrg * into account would complicate the fast path (where everything 48357ec681f3Smrg * is nicely aligned). 48367ec681f3Smrg */ 48377ec681f3Smrg bool check_alignment = 48387ec681f3Smrg log_hw_load_size >= 1 && 48397ec681f3Smrg (sscreen->info.chip_class == GFX6 || sscreen->info.chip_class >= GFX10); 48407ec681f3Smrg bool opencode = sscreen->options.vs_fetch_always_opencode; 48417ec681f3Smrg 48427ec681f3Smrg if (check_alignment && (elements[i].src_offset & ((1 << log_hw_load_size) - 1)) != 0) 48437ec681f3Smrg opencode = true; 48447ec681f3Smrg 48457ec681f3Smrg if (always_fix || check_alignment || opencode) 48467ec681f3Smrg v->fix_fetch[i] = fix_fetch.bits; 48477ec681f3Smrg 48487ec681f3Smrg if (opencode) 48497ec681f3Smrg v->fix_fetch_opencode |= 1 << i; 48507ec681f3Smrg if (opencode || always_fix) 48517ec681f3Smrg v->fix_fetch_always |= 1 << i; 48527ec681f3Smrg 48537ec681f3Smrg if (check_alignment && !opencode) { 48547ec681f3Smrg assert(log_hw_load_size == 1 || log_hw_load_size == 2); 48557ec681f3Smrg 48567ec681f3Smrg v->fix_fetch_unaligned |= 1 << i; 48577ec681f3Smrg v->hw_load_is_dword |= (log_hw_load_size - 1) << i; 48587ec681f3Smrg v->vb_alignment_check_mask |= 1 << vbo_index; 48597ec681f3Smrg } 48607ec681f3Smrg 48617ec681f3Smrg v->rsrc_word3[i] = S_008F0C_DST_SEL_X(si_map_swizzle(desc->swizzle[0])) | 48627ec681f3Smrg S_008F0C_DST_SEL_Y(si_map_swizzle(desc->swizzle[1])) | 48637ec681f3Smrg S_008F0C_DST_SEL_Z(si_map_swizzle(desc->swizzle[2])) | 48647ec681f3Smrg S_008F0C_DST_SEL_W(si_map_swizzle(desc->swizzle[3])); 48657ec681f3Smrg 48667ec681f3Smrg if (sscreen->info.chip_class >= GFX10) { 48677ec681f3Smrg const struct gfx10_format *fmt = &gfx10_format_table[elements[i].src_format]; 48687ec681f3Smrg assert(fmt->img_format != 0 && fmt->img_format < 128); 48697ec681f3Smrg v->rsrc_word3[i] |= S_008F0C_FORMAT(fmt->img_format) | S_008F0C_RESOURCE_LEVEL(1); 48707ec681f3Smrg } else { 48717ec681f3Smrg unsigned data_format, num_format; 48727ec681f3Smrg data_format = si_translate_buffer_dataformat(ctx->screen, desc, first_non_void); 48737ec681f3Smrg num_format = si_translate_buffer_numformat(ctx->screen, desc, first_non_void); 48747ec681f3Smrg v->rsrc_word3[i] |= S_008F0C_NUM_FORMAT(num_format) | S_008F0C_DATA_FORMAT(data_format); 48757ec681f3Smrg } 48767ec681f3Smrg } 48777ec681f3Smrg 48787ec681f3Smrg if (v->instance_divisor_is_fetched) { 48797ec681f3Smrg unsigned num_divisors = util_last_bit(v->instance_divisor_is_fetched); 48807ec681f3Smrg 48817ec681f3Smrg v->instance_divisor_factor_buffer = (struct si_resource *)pipe_buffer_create( 48827ec681f3Smrg &sscreen->b, 0, PIPE_USAGE_DEFAULT, num_divisors * sizeof(divisor_factors[0])); 48837ec681f3Smrg if (!v->instance_divisor_factor_buffer) { 48847ec681f3Smrg FREE(v); 48857ec681f3Smrg return NULL; 48867ec681f3Smrg } 48877ec681f3Smrg void *map = 48887ec681f3Smrg sscreen->ws->buffer_map(sscreen->ws, v->instance_divisor_factor_buffer->buf, NULL, PIPE_MAP_WRITE); 48897ec681f3Smrg memcpy(map, divisor_factors, num_divisors * sizeof(divisor_factors[0])); 48907ec681f3Smrg } 48917ec681f3Smrg return v; 4892af69d88dSmrg} 4893af69d88dSmrg 4894af69d88dSmrgstatic void si_bind_vertex_elements(struct pipe_context *ctx, void *state) 4895af69d88dSmrg{ 48967ec681f3Smrg struct si_context *sctx = (struct si_context *)ctx; 48977ec681f3Smrg struct si_vertex_elements *old = sctx->vertex_elements; 48987ec681f3Smrg struct si_vertex_elements *v = (struct si_vertex_elements *)state; 48997ec681f3Smrg 49007ec681f3Smrg if (!v) 49017ec681f3Smrg v = sctx->no_velems_state; 49027ec681f3Smrg 49037ec681f3Smrg sctx->vertex_elements = v; 49047ec681f3Smrg sctx->num_vertex_elements = v->count; 49057ec681f3Smrg 49067ec681f3Smrg if (sctx->num_vertex_elements) { 49077ec681f3Smrg sctx->vertex_buffers_dirty = true; 49087ec681f3Smrg } else { 49097ec681f3Smrg sctx->vertex_buffers_dirty = false; 49107ec681f3Smrg sctx->vertex_buffer_pointer_dirty = false; 49117ec681f3Smrg sctx->vertex_buffer_user_sgprs_dirty = false; 49127ec681f3Smrg } 49137ec681f3Smrg 49147ec681f3Smrg if (old->instance_divisor_is_one != v->instance_divisor_is_one || 49157ec681f3Smrg old->instance_divisor_is_fetched != v->instance_divisor_is_fetched || 49167ec681f3Smrg (old->vb_alignment_check_mask ^ v->vb_alignment_check_mask) & 49177ec681f3Smrg sctx->vertex_buffer_unaligned || 49187ec681f3Smrg ((v->vb_alignment_check_mask & sctx->vertex_buffer_unaligned) && 49197ec681f3Smrg memcmp(old->vertex_buffer_index, v->vertex_buffer_index, 49207ec681f3Smrg sizeof(v->vertex_buffer_index[0]) * MAX2(old->count, v->count))) || 49217ec681f3Smrg /* fix_fetch_{always,opencode,unaligned} and hw_load_is_dword are 49227ec681f3Smrg * functions of fix_fetch and the src_offset alignment. 49237ec681f3Smrg * If they change and fix_fetch doesn't, it must be due to different 49247ec681f3Smrg * src_offset alignment, which is reflected in fix_fetch_opencode. */ 49257ec681f3Smrg old->fix_fetch_opencode != v->fix_fetch_opencode || 49267ec681f3Smrg memcmp(old->fix_fetch, v->fix_fetch, sizeof(v->fix_fetch[0]) * 49277ec681f3Smrg MAX2(old->count, v->count))) { 49287ec681f3Smrg si_vs_key_update_inputs(sctx); 49297ec681f3Smrg sctx->do_update_shaders = true; 49307ec681f3Smrg } 49317ec681f3Smrg 49327ec681f3Smrg if (v->instance_divisor_is_fetched) { 49337ec681f3Smrg struct pipe_constant_buffer cb; 49347ec681f3Smrg 49357ec681f3Smrg cb.buffer = &v->instance_divisor_factor_buffer->b.b; 49367ec681f3Smrg cb.user_buffer = NULL; 49377ec681f3Smrg cb.buffer_offset = 0; 49387ec681f3Smrg cb.buffer_size = 0xffffffff; 49397ec681f3Smrg si_set_internal_const_buffer(sctx, SI_VS_CONST_INSTANCE_DIVISORS, &cb); 49407ec681f3Smrg } 4941af69d88dSmrg} 4942af69d88dSmrg 4943af69d88dSmrgstatic void si_delete_vertex_element(struct pipe_context *ctx, void *state) 4944af69d88dSmrg{ 49457ec681f3Smrg struct si_context *sctx = (struct si_context *)ctx; 49467ec681f3Smrg struct si_vertex_elements *v = (struct si_vertex_elements *)state; 49477ec681f3Smrg 49487ec681f3Smrg if (sctx->vertex_elements == state) 49497ec681f3Smrg si_bind_vertex_elements(ctx, sctx->no_velems_state); 49507ec681f3Smrg 49517ec681f3Smrg si_resource_reference(&v->instance_divisor_factor_buffer, NULL); 49527ec681f3Smrg FREE(state); 49537ec681f3Smrg} 49547ec681f3Smrg 49557ec681f3Smrgstatic void si_set_vertex_buffers(struct pipe_context *ctx, unsigned start_slot, unsigned count, 49567ec681f3Smrg unsigned unbind_num_trailing_slots, bool take_ownership, 49577ec681f3Smrg const struct pipe_vertex_buffer *buffers) 49587ec681f3Smrg{ 49597ec681f3Smrg struct si_context *sctx = (struct si_context *)ctx; 49607ec681f3Smrg struct pipe_vertex_buffer *dst = sctx->vertex_buffer + start_slot; 49617ec681f3Smrg unsigned updated_mask = u_bit_consecutive(start_slot, count + unbind_num_trailing_slots); 49627ec681f3Smrg uint32_t orig_unaligned = sctx->vertex_buffer_unaligned; 49637ec681f3Smrg uint32_t unaligned = 0; 49647ec681f3Smrg int i; 49657ec681f3Smrg 49667ec681f3Smrg assert(start_slot + count + unbind_num_trailing_slots <= ARRAY_SIZE(sctx->vertex_buffer)); 49677ec681f3Smrg 49687ec681f3Smrg if (buffers) { 49697ec681f3Smrg if (take_ownership) { 49707ec681f3Smrg for (i = 0; i < count; i++) { 49717ec681f3Smrg const struct pipe_vertex_buffer *src = buffers + i; 49727ec681f3Smrg struct pipe_vertex_buffer *dsti = dst + i; 49737ec681f3Smrg struct pipe_resource *buf = src->buffer.resource; 49747ec681f3Smrg unsigned slot_bit = 1 << (start_slot + i); 49757ec681f3Smrg 49767ec681f3Smrg /* Only unreference bound vertex buffers. (take_ownership) */ 49777ec681f3Smrg pipe_resource_reference(&dsti->buffer.resource, NULL); 49787ec681f3Smrg 49797ec681f3Smrg if (src->buffer_offset & 3 || src->stride & 3) 49807ec681f3Smrg unaligned |= slot_bit; 49817ec681f3Smrg 49827ec681f3Smrg si_context_add_resource_size(sctx, buf); 49837ec681f3Smrg if (buf) 49847ec681f3Smrg si_resource(buf)->bind_history |= PIPE_BIND_VERTEX_BUFFER; 49857ec681f3Smrg } 49867ec681f3Smrg /* take_ownership allows us to copy pipe_resource pointers without refcounting. */ 49877ec681f3Smrg memcpy(dst, buffers, count * sizeof(struct pipe_vertex_buffer)); 49887ec681f3Smrg } else { 49897ec681f3Smrg for (i = 0; i < count; i++) { 49907ec681f3Smrg const struct pipe_vertex_buffer *src = buffers + i; 49917ec681f3Smrg struct pipe_vertex_buffer *dsti = dst + i; 49927ec681f3Smrg struct pipe_resource *buf = src->buffer.resource; 49937ec681f3Smrg unsigned slot_bit = 1 << (start_slot + i); 49947ec681f3Smrg 49957ec681f3Smrg pipe_resource_reference(&dsti->buffer.resource, buf); 49967ec681f3Smrg dsti->buffer_offset = src->buffer_offset; 49977ec681f3Smrg dsti->stride = src->stride; 49987ec681f3Smrg 49997ec681f3Smrg if (dsti->buffer_offset & 3 || dsti->stride & 3) 50007ec681f3Smrg unaligned |= slot_bit; 50017ec681f3Smrg 50027ec681f3Smrg si_context_add_resource_size(sctx, buf); 50037ec681f3Smrg if (buf) 50047ec681f3Smrg si_resource(buf)->bind_history |= PIPE_BIND_VERTEX_BUFFER; 50057ec681f3Smrg } 50067ec681f3Smrg } 50077ec681f3Smrg } else { 50087ec681f3Smrg for (i = 0; i < count; i++) 50097ec681f3Smrg pipe_resource_reference(&dst[i].buffer.resource, NULL); 50107ec681f3Smrg } 50117ec681f3Smrg 50127ec681f3Smrg for (i = 0; i < unbind_num_trailing_slots; i++) 50137ec681f3Smrg pipe_resource_reference(&dst[count + i].buffer.resource, NULL); 50147ec681f3Smrg 50157ec681f3Smrg sctx->vertex_buffers_dirty = sctx->num_vertex_elements > 0; 50167ec681f3Smrg sctx->vertex_buffer_unaligned = (orig_unaligned & ~updated_mask) | unaligned; 50177ec681f3Smrg 50187ec681f3Smrg /* Check whether alignment may have changed in a way that requires 50197ec681f3Smrg * shader changes. This check is conservative: a vertex buffer can only 50207ec681f3Smrg * trigger a shader change if the misalignment amount changes (e.g. 50217ec681f3Smrg * from byte-aligned to short-aligned), but we only keep track of 50227ec681f3Smrg * whether buffers are at least dword-aligned, since that should always 50237ec681f3Smrg * be the case in well-behaved applications anyway. 50247ec681f3Smrg */ 50257ec681f3Smrg if ((sctx->vertex_elements->vb_alignment_check_mask & 50267ec681f3Smrg (unaligned | orig_unaligned) & updated_mask)) { 50277ec681f3Smrg si_vs_key_update_inputs(sctx); 50287ec681f3Smrg sctx->do_update_shaders = true; 50297ec681f3Smrg } 50307ec681f3Smrg} 5031af69d88dSmrg 50327ec681f3Smrgstatic struct pipe_vertex_state * 50337ec681f3Smrgsi_create_vertex_state(struct pipe_screen *screen, 50347ec681f3Smrg struct pipe_vertex_buffer *buffer, 50357ec681f3Smrg const struct pipe_vertex_element *elements, 50367ec681f3Smrg unsigned num_elements, 50377ec681f3Smrg struct pipe_resource *indexbuf, 50387ec681f3Smrg uint32_t full_velem_mask) 50397ec681f3Smrg{ 50407ec681f3Smrg struct si_screen *sscreen = (struct si_screen *)screen; 50417ec681f3Smrg struct si_vertex_state *state = CALLOC_STRUCT(si_vertex_state); 50427ec681f3Smrg 50437ec681f3Smrg util_init_pipe_vertex_state(screen, buffer, elements, num_elements, indexbuf, full_velem_mask, 50447ec681f3Smrg &state->b); 50457ec681f3Smrg 50467ec681f3Smrg /* Initialize the vertex element state in state->element. 50477ec681f3Smrg * Do it by creating a vertex element state object and copying it there. 50487ec681f3Smrg */ 50497ec681f3Smrg struct si_context ctx = {}; 50507ec681f3Smrg ctx.b.screen = screen; 50517ec681f3Smrg struct si_vertex_elements *velems = si_create_vertex_elements(&ctx.b, num_elements, elements); 50527ec681f3Smrg state->velems = *velems; 50537ec681f3Smrg si_delete_vertex_element(&ctx.b, velems); 50547ec681f3Smrg 50557ec681f3Smrg assert(!state->velems.instance_divisor_is_one); 50567ec681f3Smrg assert(!state->velems.instance_divisor_is_fetched); 50577ec681f3Smrg assert(!state->velems.fix_fetch_always); 50587ec681f3Smrg assert(buffer->stride % 4 == 0); 50597ec681f3Smrg assert(buffer->buffer_offset % 4 == 0); 50607ec681f3Smrg assert(!buffer->is_user_buffer); 50617ec681f3Smrg for (unsigned i = 0; i < num_elements; i++) { 50627ec681f3Smrg assert(elements[i].src_offset % 4 == 0); 50637ec681f3Smrg assert(!elements[i].dual_slot); 50647ec681f3Smrg } 50657ec681f3Smrg 50667ec681f3Smrg for (unsigned i = 0; i < num_elements; i++) { 50677ec681f3Smrg si_set_vertex_buffer_descriptor(sscreen, &state->velems, &state->b.input.vbuffer, i, 50687ec681f3Smrg &state->descriptors[i * 4]); 50697ec681f3Smrg } 50707ec681f3Smrg 50717ec681f3Smrg return &state->b; 5072af69d88dSmrg} 5073af69d88dSmrg 50747ec681f3Smrgstatic void si_vertex_state_destroy(struct pipe_screen *screen, 50757ec681f3Smrg struct pipe_vertex_state *state) 5076af69d88dSmrg{ 50777ec681f3Smrg pipe_vertex_buffer_unreference(&state->input.vbuffer); 50787ec681f3Smrg pipe_resource_reference(&state->input.indexbuf, NULL); 50797ec681f3Smrg FREE(state); 50807ec681f3Smrg} 50817ec681f3Smrg 50827ec681f3Smrgstatic struct pipe_vertex_state * 50837ec681f3Smrgsi_pipe_create_vertex_state(struct pipe_screen *screen, 50847ec681f3Smrg struct pipe_vertex_buffer *buffer, 50857ec681f3Smrg const struct pipe_vertex_element *elements, 50867ec681f3Smrg unsigned num_elements, 50877ec681f3Smrg struct pipe_resource *indexbuf, 50887ec681f3Smrg uint32_t full_velem_mask) 50897ec681f3Smrg{ 50907ec681f3Smrg struct si_screen *sscreen = (struct si_screen *)screen; 50917ec681f3Smrg 50927ec681f3Smrg return util_vertex_state_cache_get(screen, buffer, elements, num_elements, indexbuf, 50937ec681f3Smrg full_velem_mask, &sscreen->vertex_state_cache); 50947ec681f3Smrg} 50957ec681f3Smrg 50967ec681f3Smrgstatic void si_pipe_vertex_state_destroy(struct pipe_screen *screen, 50977ec681f3Smrg struct pipe_vertex_state *state) 50987ec681f3Smrg{ 50997ec681f3Smrg struct si_screen *sscreen = (struct si_screen *)screen; 51007ec681f3Smrg 51017ec681f3Smrg util_vertex_state_destroy(screen, &sscreen->vertex_state_cache, state); 5102af69d88dSmrg} 5103af69d88dSmrg 510401e04c3fSmrg/* 510501e04c3fSmrg * Misc 510601e04c3fSmrg */ 510701e04c3fSmrg 51087ec681f3Smrgstatic void si_set_tess_state(struct pipe_context *ctx, const float default_outer_level[4], 51097ec681f3Smrg const float default_inner_level[2]) 5110af69d88dSmrg{ 51117ec681f3Smrg struct si_context *sctx = (struct si_context *)ctx; 51127ec681f3Smrg struct pipe_constant_buffer cb; 51137ec681f3Smrg float array[8]; 5114af69d88dSmrg 51157ec681f3Smrg memcpy(array, default_outer_level, sizeof(float) * 4); 51167ec681f3Smrg memcpy(array + 4, default_inner_level, sizeof(float) * 2); 511701e04c3fSmrg 51187ec681f3Smrg cb.buffer = NULL; 51197ec681f3Smrg cb.user_buffer = array; 51207ec681f3Smrg cb.buffer_offset = 0; 51217ec681f3Smrg cb.buffer_size = sizeof(array); 51227ec681f3Smrg 51237ec681f3Smrg si_set_internal_const_buffer(sctx, SI_HS_CONST_DEFAULT_TESS_LEVELS, &cb); 51247ec681f3Smrg} 512501e04c3fSmrg 51267ec681f3Smrgstatic void si_set_patch_vertices(struct pipe_context *ctx, uint8_t patch_vertices) 51277ec681f3Smrg{ 51287ec681f3Smrg struct si_context *sctx = (struct si_context *)ctx; 512901e04c3fSmrg 51307ec681f3Smrg sctx->patch_vertices = patch_vertices; 5131af69d88dSmrg} 5132af69d88dSmrg 513301e04c3fSmrgstatic void si_texture_barrier(struct pipe_context *ctx, unsigned flags) 5134af69d88dSmrg{ 51357ec681f3Smrg struct si_context *sctx = (struct si_context *)ctx; 513601e04c3fSmrg 51377ec681f3Smrg si_update_fb_dirtiness_after_rendering(sctx); 513801e04c3fSmrg 51397ec681f3Smrg /* Multisample surfaces are flushed in si_decompress_textures. */ 51407ec681f3Smrg if (sctx->framebuffer.uncompressed_cb_mask) { 51417ec681f3Smrg si_make_CB_shader_coherent(sctx, sctx->framebuffer.nr_samples, 51427ec681f3Smrg sctx->framebuffer.CB_has_shader_readable_metadata, 51437ec681f3Smrg sctx->framebuffer.all_DCC_pipe_aligned); 51447ec681f3Smrg } 5145af69d88dSmrg} 5146af69d88dSmrg 514701e04c3fSmrg/* This only ensures coherency for shader image/buffer stores. */ 514801e04c3fSmrgstatic void si_memory_barrier(struct pipe_context *ctx, unsigned flags) 5149af69d88dSmrg{ 51507ec681f3Smrg struct si_context *sctx = (struct si_context *)ctx; 51517ec681f3Smrg 51527ec681f3Smrg if (!(flags & ~PIPE_BARRIER_UPDATE)) 51537ec681f3Smrg return; 51547ec681f3Smrg 51557ec681f3Smrg /* Subsequent commands must wait for all shader invocations to 51567ec681f3Smrg * complete. */ 51577ec681f3Smrg sctx->flags |= SI_CONTEXT_PS_PARTIAL_FLUSH | SI_CONTEXT_CS_PARTIAL_FLUSH | 51587ec681f3Smrg SI_CONTEXT_PFP_SYNC_ME; 51597ec681f3Smrg 51607ec681f3Smrg if (flags & PIPE_BARRIER_CONSTANT_BUFFER) 51617ec681f3Smrg sctx->flags |= SI_CONTEXT_INV_SCACHE | SI_CONTEXT_INV_VCACHE; 51627ec681f3Smrg 51637ec681f3Smrg if (flags & (PIPE_BARRIER_VERTEX_BUFFER | PIPE_BARRIER_SHADER_BUFFER | PIPE_BARRIER_TEXTURE | 51647ec681f3Smrg PIPE_BARRIER_IMAGE | PIPE_BARRIER_STREAMOUT_BUFFER | PIPE_BARRIER_GLOBAL_BUFFER)) { 51657ec681f3Smrg /* As far as I can tell, L1 contents are written back to L2 51667ec681f3Smrg * automatically at end of shader, but the contents of other 51677ec681f3Smrg * L1 caches might still be stale. */ 51687ec681f3Smrg sctx->flags |= SI_CONTEXT_INV_VCACHE; 51697ec681f3Smrg } 51707ec681f3Smrg 51717ec681f3Smrg if (flags & PIPE_BARRIER_INDEX_BUFFER) { 51727ec681f3Smrg /* Indices are read through TC L2 since GFX8. 51737ec681f3Smrg * L1 isn't used. 51747ec681f3Smrg */ 51757ec681f3Smrg if (sctx->screen->info.chip_class <= GFX7) 51767ec681f3Smrg sctx->flags |= SI_CONTEXT_WB_L2; 51777ec681f3Smrg } 51787ec681f3Smrg 51797ec681f3Smrg /* MSAA color, any depth and any stencil are flushed in 51807ec681f3Smrg * si_decompress_textures when needed. 51817ec681f3Smrg */ 51827ec681f3Smrg if (flags & PIPE_BARRIER_FRAMEBUFFER && sctx->framebuffer.uncompressed_cb_mask) { 51837ec681f3Smrg sctx->flags |= SI_CONTEXT_FLUSH_AND_INV_CB; 51847ec681f3Smrg 51857ec681f3Smrg if (sctx->chip_class <= GFX8) 51867ec681f3Smrg sctx->flags |= SI_CONTEXT_WB_L2; 51877ec681f3Smrg } 51887ec681f3Smrg 51897ec681f3Smrg /* Indirect buffers use TC L2 on GFX9, but not older hw. */ 51907ec681f3Smrg if (sctx->screen->info.chip_class <= GFX8 && flags & PIPE_BARRIER_INDIRECT_BUFFER) 51917ec681f3Smrg sctx->flags |= SI_CONTEXT_WB_L2; 5192af69d88dSmrg} 5193af69d88dSmrg 5194af69d88dSmrgstatic void *si_create_blend_custom(struct si_context *sctx, unsigned mode) 5195af69d88dSmrg{ 51967ec681f3Smrg struct pipe_blend_state blend; 5197af69d88dSmrg 51987ec681f3Smrg memset(&blend, 0, sizeof(blend)); 51997ec681f3Smrg blend.independent_blend_enable = true; 52007ec681f3Smrg blend.rt[0].colormask = 0xf; 52017ec681f3Smrg return si_create_blend_state_mode(&sctx->b, &blend, mode); 5202af69d88dSmrg} 5203af69d88dSmrg 52049f464c52Smayavoid si_init_state_compute_functions(struct si_context *sctx) 52059f464c52Smaya{ 52067ec681f3Smrg sctx->b.create_sampler_state = si_create_sampler_state; 52077ec681f3Smrg sctx->b.delete_sampler_state = si_delete_sampler_state; 52087ec681f3Smrg sctx->b.create_sampler_view = si_create_sampler_view; 52097ec681f3Smrg sctx->b.sampler_view_destroy = si_sampler_view_destroy; 52107ec681f3Smrg sctx->b.memory_barrier = si_memory_barrier; 52119f464c52Smaya} 52129f464c52Smaya 5213af69d88dSmrgvoid si_init_state_functions(struct si_context *sctx) 5214af69d88dSmrg{ 52157ec681f3Smrg sctx->atoms.s.framebuffer.emit = si_emit_framebuffer_state; 52167ec681f3Smrg sctx->atoms.s.msaa_sample_locs.emit = si_emit_msaa_sample_locs; 52177ec681f3Smrg sctx->atoms.s.db_render_state.emit = si_emit_db_render_state; 52187ec681f3Smrg sctx->atoms.s.dpbb_state.emit = si_emit_dpbb_state; 52197ec681f3Smrg sctx->atoms.s.msaa_config.emit = si_emit_msaa_config; 52207ec681f3Smrg sctx->atoms.s.sample_mask.emit = si_emit_sample_mask; 52217ec681f3Smrg sctx->atoms.s.cb_render_state.emit = si_emit_cb_render_state; 52227ec681f3Smrg sctx->atoms.s.blend_color.emit = si_emit_blend_color; 52237ec681f3Smrg sctx->atoms.s.clip_regs.emit = si_emit_clip_regs; 52247ec681f3Smrg sctx->atoms.s.clip_state.emit = si_emit_clip_state; 52257ec681f3Smrg sctx->atoms.s.stencil_ref.emit = si_emit_stencil_ref; 52267ec681f3Smrg 52277ec681f3Smrg sctx->b.create_blend_state = si_create_blend_state; 52287ec681f3Smrg sctx->b.bind_blend_state = si_bind_blend_state; 52297ec681f3Smrg sctx->b.delete_blend_state = si_delete_blend_state; 52307ec681f3Smrg sctx->b.set_blend_color = si_set_blend_color; 52317ec681f3Smrg 52327ec681f3Smrg sctx->b.create_rasterizer_state = si_create_rs_state; 52337ec681f3Smrg sctx->b.bind_rasterizer_state = si_bind_rs_state; 52347ec681f3Smrg sctx->b.delete_rasterizer_state = si_delete_rs_state; 52357ec681f3Smrg 52367ec681f3Smrg sctx->b.create_depth_stencil_alpha_state = si_create_dsa_state; 52377ec681f3Smrg sctx->b.bind_depth_stencil_alpha_state = si_bind_dsa_state; 52387ec681f3Smrg sctx->b.delete_depth_stencil_alpha_state = si_delete_dsa_state; 52397ec681f3Smrg 52407ec681f3Smrg sctx->custom_dsa_flush = si_create_db_flush_dsa(sctx); 52417ec681f3Smrg sctx->custom_blend_resolve = si_create_blend_custom(sctx, V_028808_CB_RESOLVE); 52427ec681f3Smrg sctx->custom_blend_fmask_decompress = si_create_blend_custom(sctx, V_028808_CB_FMASK_DECOMPRESS); 52437ec681f3Smrg sctx->custom_blend_eliminate_fastclear = 52447ec681f3Smrg si_create_blend_custom(sctx, V_028808_CB_ELIMINATE_FAST_CLEAR); 52457ec681f3Smrg sctx->custom_blend_dcc_decompress = si_create_blend_custom(sctx, V_028808_CB_DCC_DECOMPRESS); 52467ec681f3Smrg 52477ec681f3Smrg sctx->b.set_clip_state = si_set_clip_state; 52487ec681f3Smrg sctx->b.set_stencil_ref = si_set_stencil_ref; 52497ec681f3Smrg 52507ec681f3Smrg sctx->b.set_framebuffer_state = si_set_framebuffer_state; 52517ec681f3Smrg 52527ec681f3Smrg sctx->b.set_sample_mask = si_set_sample_mask; 52537ec681f3Smrg 52547ec681f3Smrg sctx->b.create_vertex_elements_state = si_create_vertex_elements; 52557ec681f3Smrg sctx->b.bind_vertex_elements_state = si_bind_vertex_elements; 52567ec681f3Smrg sctx->b.delete_vertex_elements_state = si_delete_vertex_element; 52577ec681f3Smrg sctx->b.set_vertex_buffers = si_set_vertex_buffers; 52587ec681f3Smrg 52597ec681f3Smrg sctx->b.texture_barrier = si_texture_barrier; 52607ec681f3Smrg sctx->b.set_min_samples = si_set_min_samples; 52617ec681f3Smrg sctx->b.set_tess_state = si_set_tess_state; 52627ec681f3Smrg sctx->b.set_patch_vertices = si_set_patch_vertices; 52637ec681f3Smrg 52647ec681f3Smrg sctx->b.set_active_query_state = si_set_active_query_state; 526501e04c3fSmrg} 5266af69d88dSmrg 526701e04c3fSmrgvoid si_init_screen_state_functions(struct si_screen *sscreen) 526801e04c3fSmrg{ 52697ec681f3Smrg sscreen->b.is_format_supported = si_is_format_supported; 52707ec681f3Smrg sscreen->b.create_vertex_state = si_pipe_create_vertex_state; 52717ec681f3Smrg sscreen->b.vertex_state_destroy = si_pipe_vertex_state_destroy; 52727ec681f3Smrg 52737ec681f3Smrg if (sscreen->info.chip_class >= GFX10) { 52747ec681f3Smrg sscreen->make_texture_descriptor = gfx10_make_texture_descriptor; 52757ec681f3Smrg } else { 52767ec681f3Smrg sscreen->make_texture_descriptor = si_make_texture_descriptor; 52777ec681f3Smrg } 52787ec681f3Smrg 52797ec681f3Smrg util_vertex_state_cache_init(&sscreen->vertex_state_cache, 52807ec681f3Smrg si_create_vertex_state, si_vertex_state_destroy); 528101e04c3fSmrg} 5282af69d88dSmrg 52837ec681f3Smrgstatic void si_set_grbm_gfx_index(struct si_context *sctx, struct si_pm4_state *pm4, unsigned value) 528401e04c3fSmrg{ 52857ec681f3Smrg unsigned reg = sctx->chip_class >= GFX7 ? R_030800_GRBM_GFX_INDEX : R_00802C_GRBM_GFX_INDEX; 52867ec681f3Smrg si_pm4_set_reg(pm4, reg, value); 528701e04c3fSmrg} 5288af69d88dSmrg 52897ec681f3Smrgstatic void si_set_grbm_gfx_index_se(struct si_context *sctx, struct si_pm4_state *pm4, unsigned se) 529001e04c3fSmrg{ 52917ec681f3Smrg assert(se == ~0 || se < sctx->screen->info.max_se); 52927ec681f3Smrg si_set_grbm_gfx_index(sctx, pm4, 52937ec681f3Smrg (se == ~0 ? S_030800_SE_BROADCAST_WRITES(1) : S_030800_SE_INDEX(se)) | 52947ec681f3Smrg S_030800_SH_BROADCAST_WRITES(1) | 52957ec681f3Smrg S_030800_INSTANCE_BROADCAST_WRITES(1)); 529601e04c3fSmrg} 5297af69d88dSmrg 52987ec681f3Smrgstatic void si_write_harvested_raster_configs(struct si_context *sctx, struct si_pm4_state *pm4, 52997ec681f3Smrg unsigned raster_config, unsigned raster_config_1) 530001e04c3fSmrg{ 53017ec681f3Smrg unsigned num_se = MAX2(sctx->screen->info.max_se, 1); 53027ec681f3Smrg unsigned raster_config_se[4]; 53037ec681f3Smrg unsigned se; 53047ec681f3Smrg 53057ec681f3Smrg ac_get_harvested_configs(&sctx->screen->info, raster_config, &raster_config_1, raster_config_se); 53067ec681f3Smrg 53077ec681f3Smrg for (se = 0; se < num_se; se++) { 53087ec681f3Smrg si_set_grbm_gfx_index_se(sctx, pm4, se); 53097ec681f3Smrg si_pm4_set_reg(pm4, R_028350_PA_SC_RASTER_CONFIG, raster_config_se[se]); 53107ec681f3Smrg } 53117ec681f3Smrg si_set_grbm_gfx_index(sctx, pm4, ~0); 53127ec681f3Smrg 53137ec681f3Smrg if (sctx->chip_class >= GFX7) { 53147ec681f3Smrg si_pm4_set_reg(pm4, R_028354_PA_SC_RASTER_CONFIG_1, raster_config_1); 53157ec681f3Smrg } 531601e04c3fSmrg} 5317af69d88dSmrg 531801e04c3fSmrgstatic void si_set_raster_config(struct si_context *sctx, struct si_pm4_state *pm4) 531901e04c3fSmrg{ 53207ec681f3Smrg struct si_screen *sscreen = sctx->screen; 53217ec681f3Smrg unsigned num_rb = MIN2(sscreen->info.max_render_backends, 16); 53227ec681f3Smrg unsigned rb_mask = sscreen->info.enabled_rb_mask; 53237ec681f3Smrg unsigned raster_config = sscreen->pa_sc_raster_config; 53247ec681f3Smrg unsigned raster_config_1 = sscreen->pa_sc_raster_config_1; 53257ec681f3Smrg 53267ec681f3Smrg if (!rb_mask || util_bitcount(rb_mask) >= num_rb) { 53277ec681f3Smrg /* Always use the default config when all backends are enabled 53287ec681f3Smrg * (or when we failed to determine the enabled backends). 53297ec681f3Smrg */ 53307ec681f3Smrg si_pm4_set_reg(pm4, R_028350_PA_SC_RASTER_CONFIG, raster_config); 53317ec681f3Smrg if (sctx->chip_class >= GFX7) 53327ec681f3Smrg si_pm4_set_reg(pm4, R_028354_PA_SC_RASTER_CONFIG_1, raster_config_1); 53337ec681f3Smrg } else { 53347ec681f3Smrg si_write_harvested_raster_configs(sctx, pm4, raster_config, raster_config_1); 53357ec681f3Smrg } 5336af69d88dSmrg} 5337af69d88dSmrg 53387ec681f3Smrgvoid si_init_cs_preamble_state(struct si_context *sctx, bool uses_reg_shadowing) 5339af69d88dSmrg{ 53407ec681f3Smrg struct si_screen *sscreen = sctx->screen; 53417ec681f3Smrg uint64_t border_color_va = sctx->border_color_buffer->gpu_address; 53427ec681f3Smrg bool has_clear_state = sscreen->info.has_clear_state; 53437ec681f3Smrg struct si_pm4_state *pm4 = CALLOC_STRUCT(si_pm4_state); 53447ec681f3Smrg 53457ec681f3Smrg if (!pm4) 53467ec681f3Smrg return; 53477ec681f3Smrg 53487ec681f3Smrg if (!uses_reg_shadowing) { 53497ec681f3Smrg si_pm4_cmd_add(pm4, PKT3(PKT3_CONTEXT_CONTROL, 1, 0)); 53507ec681f3Smrg si_pm4_cmd_add(pm4, CC0_UPDATE_LOAD_ENABLES(1)); 53517ec681f3Smrg si_pm4_cmd_add(pm4, CC1_UPDATE_SHADOW_ENABLES(1)); 53527ec681f3Smrg 53537ec681f3Smrg if (has_clear_state) { 53547ec681f3Smrg si_pm4_cmd_add(pm4, PKT3(PKT3_CLEAR_STATE, 0, 0)); 53557ec681f3Smrg si_pm4_cmd_add(pm4, 0); 53567ec681f3Smrg } 53577ec681f3Smrg } 53587ec681f3Smrg 53597ec681f3Smrg /* CLEAR_STATE doesn't restore these correctly. */ 53607ec681f3Smrg si_pm4_set_reg(pm4, R_028240_PA_SC_GENERIC_SCISSOR_TL, S_028240_WINDOW_OFFSET_DISABLE(1)); 53617ec681f3Smrg si_pm4_set_reg(pm4, R_028244_PA_SC_GENERIC_SCISSOR_BR, 53627ec681f3Smrg S_028244_BR_X(16384) | S_028244_BR_Y(16384)); 53637ec681f3Smrg 53647ec681f3Smrg si_pm4_set_reg(pm4, R_028A18_VGT_HOS_MAX_TESS_LEVEL, fui(64)); 53657ec681f3Smrg if (!has_clear_state) 53667ec681f3Smrg si_pm4_set_reg(pm4, R_028A1C_VGT_HOS_MIN_TESS_LEVEL, fui(0)); 53677ec681f3Smrg 53687ec681f3Smrg if (!has_clear_state) { 53697ec681f3Smrg si_pm4_set_reg(pm4, R_028230_PA_SC_EDGERULE, 53707ec681f3Smrg S_028230_ER_TRI(0xA) | S_028230_ER_POINT(0xA) | S_028230_ER_RECT(0xA) | 53717ec681f3Smrg /* Required by DX10_DIAMOND_TEST_ENA: */ 53727ec681f3Smrg S_028230_ER_LINE_LR(0x1A) | S_028230_ER_LINE_RL(0x26) | 53737ec681f3Smrg S_028230_ER_LINE_TB(0xA) | S_028230_ER_LINE_BT(0xA)); 53747ec681f3Smrg si_pm4_set_reg(pm4, R_028820_PA_CL_NANINF_CNTL, 0); 53757ec681f3Smrg si_pm4_set_reg(pm4, R_028AC0_DB_SRESULTS_COMPARE_STATE0, 0x0); 53767ec681f3Smrg si_pm4_set_reg(pm4, R_028AC4_DB_SRESULTS_COMPARE_STATE1, 0x0); 53777ec681f3Smrg si_pm4_set_reg(pm4, R_028AC8_DB_PRELOAD_CONTROL, 0x0); 53787ec681f3Smrg si_pm4_set_reg(pm4, R_02800C_DB_RENDER_OVERRIDE, 0); 53797ec681f3Smrg si_pm4_set_reg(pm4, R_028A5C_VGT_GS_PER_VS, 0x2); 53807ec681f3Smrg si_pm4_set_reg(pm4, R_028A8C_VGT_PRIMITIVEID_RESET, 0x0); 53817ec681f3Smrg si_pm4_set_reg(pm4, R_028B98_VGT_STRMOUT_BUFFER_CONFIG, 0x0); 53827ec681f3Smrg si_pm4_set_reg(pm4, R_028AB8_VGT_VTX_CNT_EN, 0x0); 53837ec681f3Smrg } 53847ec681f3Smrg 53857ec681f3Smrg si_pm4_set_reg(pm4, R_028080_TA_BC_BASE_ADDR, border_color_va >> 8); 53867ec681f3Smrg if (sctx->chip_class >= GFX7) 53877ec681f3Smrg si_pm4_set_reg(pm4, R_028084_TA_BC_BASE_ADDR_HI, S_028084_ADDRESS(border_color_va >> 40)); 53887ec681f3Smrg 53897ec681f3Smrg if (sctx->chip_class == GFX6) { 53907ec681f3Smrg si_pm4_set_reg(pm4, R_008A14_PA_CL_ENHANCE, 53917ec681f3Smrg S_008A14_NUM_CLIP_SEQ(3) | S_008A14_CLIP_VTX_REORDER_ENA(1)); 53927ec681f3Smrg } 53937ec681f3Smrg 53947ec681f3Smrg if (sctx->chip_class <= GFX7 || !has_clear_state) { 53957ec681f3Smrg si_pm4_set_reg(pm4, R_028C58_VGT_VERTEX_REUSE_BLOCK_CNTL, 14); 53967ec681f3Smrg si_pm4_set_reg(pm4, R_028C5C_VGT_OUT_DEALLOC_CNTL, 16); 53977ec681f3Smrg 53987ec681f3Smrg /* CLEAR_STATE doesn't clear these correctly on certain generations. 53997ec681f3Smrg * I don't know why. Deduced by trial and error. 54007ec681f3Smrg */ 54017ec681f3Smrg si_pm4_set_reg(pm4, R_028B28_VGT_STRMOUT_DRAW_OPAQUE_OFFSET, 0); 54027ec681f3Smrg si_pm4_set_reg(pm4, R_028204_PA_SC_WINDOW_SCISSOR_TL, S_028204_WINDOW_OFFSET_DISABLE(1)); 54037ec681f3Smrg si_pm4_set_reg(pm4, R_028030_PA_SC_SCREEN_SCISSOR_TL, 0); 54047ec681f3Smrg si_pm4_set_reg(pm4, R_028034_PA_SC_SCREEN_SCISSOR_BR, 54057ec681f3Smrg S_028034_BR_X(16384) | S_028034_BR_Y(16384)); 54067ec681f3Smrg } 54077ec681f3Smrg 54087ec681f3Smrg if (sctx->chip_class >= GFX10) { 54097ec681f3Smrg si_pm4_set_reg(pm4, R_028038_DB_DFSM_CONTROL, 54107ec681f3Smrg S_028038_PUNCHOUT_MODE(V_028038_FORCE_OFF) | 54117ec681f3Smrg S_028038_POPS_DRAIN_PS_ON_OVERLAP(1)); 54127ec681f3Smrg } 54137ec681f3Smrg 54147ec681f3Smrg unsigned cu_mask_ps = 0xffffffff; 54157ec681f3Smrg 54167ec681f3Smrg /* It's wasteful to enable all CUs for PS if shader arrays have a different 54177ec681f3Smrg * number of CUs. The reason is that the hardware sends the same number of PS 54187ec681f3Smrg * waves to each shader array, so the slowest shader array limits the performance. 54197ec681f3Smrg * Disable the extra CUs for PS in other shader arrays to save power and thus 54207ec681f3Smrg * increase clocks for busy CUs. In the future, we might disable or enable this 54217ec681f3Smrg * tweak only for certain apps. 54227ec681f3Smrg */ 54237ec681f3Smrg if (sctx->chip_class >= GFX10_3) 54247ec681f3Smrg cu_mask_ps = u_bit_consecutive(0, sscreen->info.min_good_cu_per_sa); 54257ec681f3Smrg 54267ec681f3Smrg if (sctx->chip_class >= GFX7) { 54277ec681f3Smrg si_pm4_set_reg(pm4, R_00B01C_SPI_SHADER_PGM_RSRC3_PS, 54287ec681f3Smrg S_00B01C_CU_EN(cu_mask_ps) | S_00B01C_WAVE_LIMIT(0x3F)); 54297ec681f3Smrg } 54307ec681f3Smrg 54317ec681f3Smrg if (sctx->chip_class <= GFX8) { 54327ec681f3Smrg si_set_raster_config(sctx, pm4); 54337ec681f3Smrg 54347ec681f3Smrg /* FIXME calculate these values somehow ??? */ 54357ec681f3Smrg si_pm4_set_reg(pm4, R_028A54_VGT_GS_PER_ES, SI_GS_PER_ES); 54367ec681f3Smrg si_pm4_set_reg(pm4, R_028A58_VGT_ES_PER_GS, 0x40); 54377ec681f3Smrg 54387ec681f3Smrg /* These registers, when written, also overwrite the CLEAR_STATE 54397ec681f3Smrg * context, so we can't rely on CLEAR_STATE setting them. 54407ec681f3Smrg * It would be an issue if there was another UMD changing them. 54417ec681f3Smrg */ 54427ec681f3Smrg si_pm4_set_reg(pm4, R_028400_VGT_MAX_VTX_INDX, ~0); 54437ec681f3Smrg si_pm4_set_reg(pm4, R_028404_VGT_MIN_VTX_INDX, 0); 54447ec681f3Smrg si_pm4_set_reg(pm4, R_028408_VGT_INDX_OFFSET, 0); 54457ec681f3Smrg } 54467ec681f3Smrg 54477ec681f3Smrg if (sscreen->info.chip_class >= GFX10) { 54487ec681f3Smrg si_pm4_set_reg(pm4, R_00B524_SPI_SHADER_PGM_HI_LS, 54497ec681f3Smrg S_00B524_MEM_BASE(sscreen->info.address32_hi >> 8)); 54507ec681f3Smrg si_pm4_set_reg(pm4, R_00B324_SPI_SHADER_PGM_HI_ES, 54517ec681f3Smrg S_00B324_MEM_BASE(sscreen->info.address32_hi >> 8)); 54527ec681f3Smrg } else if (sscreen->info.chip_class == GFX9) { 54537ec681f3Smrg si_pm4_set_reg(pm4, R_00B414_SPI_SHADER_PGM_HI_LS, 54547ec681f3Smrg S_00B414_MEM_BASE(sscreen->info.address32_hi >> 8)); 54557ec681f3Smrg si_pm4_set_reg(pm4, R_00B214_SPI_SHADER_PGM_HI_ES, 54567ec681f3Smrg S_00B214_MEM_BASE(sscreen->info.address32_hi >> 8)); 54577ec681f3Smrg } else { 54587ec681f3Smrg si_pm4_set_reg(pm4, R_00B524_SPI_SHADER_PGM_HI_LS, 54597ec681f3Smrg S_00B524_MEM_BASE(sscreen->info.address32_hi >> 8)); 54607ec681f3Smrg } 54617ec681f3Smrg 54627ec681f3Smrg if (sctx->chip_class >= GFX7 && sctx->chip_class <= GFX8) { 54637ec681f3Smrg si_pm4_set_reg(pm4, R_00B51C_SPI_SHADER_PGM_RSRC3_LS, 54647ec681f3Smrg S_00B51C_CU_EN(0xffff) | S_00B51C_WAVE_LIMIT(0x3F)); 54657ec681f3Smrg si_pm4_set_reg(pm4, R_00B41C_SPI_SHADER_PGM_RSRC3_HS, S_00B41C_WAVE_LIMIT(0x3F)); 54667ec681f3Smrg si_pm4_set_reg(pm4, R_00B31C_SPI_SHADER_PGM_RSRC3_ES, 54677ec681f3Smrg S_00B31C_CU_EN(0xffff) | S_00B31C_WAVE_LIMIT(0x3F)); 54687ec681f3Smrg 54697ec681f3Smrg /* If this is 0, Bonaire can hang even if GS isn't being used. 54707ec681f3Smrg * Other chips are unaffected. These are suboptimal values, 54717ec681f3Smrg * but we don't use on-chip GS. 54727ec681f3Smrg */ 54737ec681f3Smrg si_pm4_set_reg(pm4, R_028A44_VGT_GS_ONCHIP_CNTL, 54747ec681f3Smrg S_028A44_ES_VERTS_PER_SUBGRP(64) | S_028A44_GS_PRIMS_PER_SUBGRP(4)); 54757ec681f3Smrg } 54767ec681f3Smrg 54777ec681f3Smrg if (sctx->chip_class == GFX8) { 54787ec681f3Smrg unsigned vgt_tess_distribution; 54797ec681f3Smrg 54807ec681f3Smrg vgt_tess_distribution = S_028B50_ACCUM_ISOLINE(32) | S_028B50_ACCUM_TRI(11) | 54817ec681f3Smrg S_028B50_ACCUM_QUAD(11) | S_028B50_DONUT_SPLIT_GFX81(16); 54827ec681f3Smrg 54837ec681f3Smrg /* Testing with Unigine Heaven extreme tesselation yielded best results 54847ec681f3Smrg * with TRAP_SPLIT = 3. 54857ec681f3Smrg */ 54867ec681f3Smrg if (sctx->family == CHIP_FIJI || sctx->family >= CHIP_POLARIS10) 54877ec681f3Smrg vgt_tess_distribution |= S_028B50_TRAP_SPLIT(3); 54887ec681f3Smrg 54897ec681f3Smrg si_pm4_set_reg(pm4, R_028B50_VGT_TESS_DISTRIBUTION, vgt_tess_distribution); 54907ec681f3Smrg } 54917ec681f3Smrg 54927ec681f3Smrg if (sscreen->info.chip_class <= GFX9) { 54937ec681f3Smrg si_pm4_set_reg(pm4, R_028AA0_VGT_INSTANCE_STEP_RATE_0, 1); 54947ec681f3Smrg } 54957ec681f3Smrg 54967ec681f3Smrg if (sctx->chip_class == GFX9) { 54977ec681f3Smrg si_pm4_set_reg(pm4, R_030920_VGT_MAX_VTX_INDX, ~0); 54987ec681f3Smrg si_pm4_set_reg(pm4, R_030924_VGT_MIN_VTX_INDX, 0); 54997ec681f3Smrg si_pm4_set_reg(pm4, R_030928_VGT_INDX_OFFSET, 0); 55007ec681f3Smrg 55017ec681f3Smrg si_pm4_set_reg(pm4, R_028060_DB_DFSM_CONTROL, 55027ec681f3Smrg S_028060_PUNCHOUT_MODE(V_028060_FORCE_OFF) | 55037ec681f3Smrg S_028060_POPS_DRAIN_PS_ON_OVERLAP(1)); 55047ec681f3Smrg } 55057ec681f3Smrg 55067ec681f3Smrg if (sctx->chip_class >= GFX9) { 55077ec681f3Smrg si_pm4_set_reg(pm4, R_00B41C_SPI_SHADER_PGM_RSRC3_HS, 55087ec681f3Smrg S_00B41C_CU_EN(0xffff) | S_00B41C_WAVE_LIMIT(0x3F)); 55097ec681f3Smrg 55107ec681f3Smrg si_pm4_set_reg(pm4, R_028B50_VGT_TESS_DISTRIBUTION, 55117ec681f3Smrg S_028B50_ACCUM_ISOLINE(40) | S_028B50_ACCUM_TRI(30) | S_028B50_ACCUM_QUAD(24) | 55127ec681f3Smrg S_028B50_DONUT_SPLIT_GFX9(24) | S_028B50_TRAP_SPLIT(6)); 55137ec681f3Smrg si_pm4_set_reg(pm4, R_028C48_PA_SC_BINNER_CNTL_1, 55147ec681f3Smrg S_028C48_MAX_ALLOC_COUNT(sscreen->info.pbb_max_alloc_count - 1) | 55157ec681f3Smrg S_028C48_MAX_PRIM_PER_BATCH(1023)); 55167ec681f3Smrg si_pm4_set_reg(pm4, R_028C4C_PA_SC_CONSERVATIVE_RASTERIZATION_CNTL, 55177ec681f3Smrg S_028C4C_NULL_SQUAD_AA_MASK_ENABLE(1)); 55187ec681f3Smrg 55197ec681f3Smrg si_pm4_set_reg(pm4, R_030968_VGT_INSTANCE_BASE_ID, 0); 55207ec681f3Smrg si_pm4_set_reg(pm4, R_0301EC_CP_COHER_START_DELAY, 55217ec681f3Smrg sctx->chip_class >= GFX10 ? 0x20 : 0); 55227ec681f3Smrg } 55237ec681f3Smrg 55247ec681f3Smrg if (sctx->chip_class >= GFX10) { 55257ec681f3Smrg /* Logical CUs 16 - 31 */ 55267ec681f3Smrg si_pm4_set_reg(pm4, R_00B004_SPI_SHADER_PGM_RSRC4_PS, S_00B004_CU_EN(cu_mask_ps >> 16)); 55277ec681f3Smrg si_pm4_set_reg(pm4, R_00B104_SPI_SHADER_PGM_RSRC4_VS, S_00B104_CU_EN(0xffff)); 55287ec681f3Smrg si_pm4_set_reg(pm4, R_00B404_SPI_SHADER_PGM_RSRC4_HS, S_00B404_CU_EN(0xffff)); 55297ec681f3Smrg 55307ec681f3Smrg si_pm4_set_reg(pm4, R_00B0C8_SPI_SHADER_USER_ACCUM_PS_0, 0); 55317ec681f3Smrg si_pm4_set_reg(pm4, R_00B0CC_SPI_SHADER_USER_ACCUM_PS_1, 0); 55327ec681f3Smrg si_pm4_set_reg(pm4, R_00B0D0_SPI_SHADER_USER_ACCUM_PS_2, 0); 55337ec681f3Smrg si_pm4_set_reg(pm4, R_00B0D4_SPI_SHADER_USER_ACCUM_PS_3, 0); 55347ec681f3Smrg si_pm4_set_reg(pm4, R_00B1C8_SPI_SHADER_USER_ACCUM_VS_0, 0); 55357ec681f3Smrg si_pm4_set_reg(pm4, R_00B1CC_SPI_SHADER_USER_ACCUM_VS_1, 0); 55367ec681f3Smrg si_pm4_set_reg(pm4, R_00B1D0_SPI_SHADER_USER_ACCUM_VS_2, 0); 55377ec681f3Smrg si_pm4_set_reg(pm4, R_00B1D4_SPI_SHADER_USER_ACCUM_VS_3, 0); 55387ec681f3Smrg si_pm4_set_reg(pm4, R_00B2C8_SPI_SHADER_USER_ACCUM_ESGS_0, 0); 55397ec681f3Smrg si_pm4_set_reg(pm4, R_00B2CC_SPI_SHADER_USER_ACCUM_ESGS_1, 0); 55407ec681f3Smrg si_pm4_set_reg(pm4, R_00B2D0_SPI_SHADER_USER_ACCUM_ESGS_2, 0); 55417ec681f3Smrg si_pm4_set_reg(pm4, R_00B2D4_SPI_SHADER_USER_ACCUM_ESGS_3, 0); 55427ec681f3Smrg si_pm4_set_reg(pm4, R_00B4C8_SPI_SHADER_USER_ACCUM_LSHS_0, 0); 55437ec681f3Smrg si_pm4_set_reg(pm4, R_00B4CC_SPI_SHADER_USER_ACCUM_LSHS_1, 0); 55447ec681f3Smrg si_pm4_set_reg(pm4, R_00B4D0_SPI_SHADER_USER_ACCUM_LSHS_2, 0); 55457ec681f3Smrg si_pm4_set_reg(pm4, R_00B4D4_SPI_SHADER_USER_ACCUM_LSHS_3, 0); 55467ec681f3Smrg 55477ec681f3Smrg si_pm4_set_reg(pm4, R_00B0C0_SPI_SHADER_REQ_CTRL_PS, 55487ec681f3Smrg S_00B0C0_SOFT_GROUPING_EN(1) | 55497ec681f3Smrg S_00B0C0_NUMBER_OF_REQUESTS_PER_CU(4 - 1)); 55507ec681f3Smrg si_pm4_set_reg(pm4, R_00B1C0_SPI_SHADER_REQ_CTRL_VS, 0); 55517ec681f3Smrg 55527ec681f3Smrg /* Enable CMASK/HTILE/DCC caching in L2 for small chips. */ 55537ec681f3Smrg unsigned meta_write_policy, meta_read_policy; 55547ec681f3Smrg if (sscreen->info.max_render_backends <= 4) { 55557ec681f3Smrg meta_write_policy = V_02807C_CACHE_LRU_WR; /* cache writes */ 55567ec681f3Smrg meta_read_policy = V_02807C_CACHE_LRU_RD; /* cache reads */ 55577ec681f3Smrg } else { 55587ec681f3Smrg meta_write_policy = V_02807C_CACHE_STREAM; /* write combine */ 55597ec681f3Smrg meta_read_policy = V_02807C_CACHE_NOA; /* don't cache reads */ 55607ec681f3Smrg } 55617ec681f3Smrg 55627ec681f3Smrg si_pm4_set_reg(pm4, R_02807C_DB_RMI_L2_CACHE_CONTROL, 55637ec681f3Smrg S_02807C_Z_WR_POLICY(V_02807C_CACHE_STREAM) | 55647ec681f3Smrg S_02807C_S_WR_POLICY(V_02807C_CACHE_STREAM) | 55657ec681f3Smrg S_02807C_HTILE_WR_POLICY(meta_write_policy) | 55667ec681f3Smrg S_02807C_ZPCPSD_WR_POLICY(V_02807C_CACHE_STREAM) | 55677ec681f3Smrg S_02807C_Z_RD_POLICY(V_02807C_CACHE_NOA) | 55687ec681f3Smrg S_02807C_S_RD_POLICY(V_02807C_CACHE_NOA) | 55697ec681f3Smrg S_02807C_HTILE_RD_POLICY(meta_read_policy)); 55707ec681f3Smrg si_pm4_set_reg(pm4, R_028410_CB_RMI_GL2_CACHE_CONTROL, 55717ec681f3Smrg S_028410_CMASK_WR_POLICY(meta_write_policy) | 55727ec681f3Smrg S_028410_FMASK_WR_POLICY(V_028410_CACHE_STREAM) | 55737ec681f3Smrg S_028410_DCC_WR_POLICY(meta_write_policy) | 55747ec681f3Smrg S_028410_COLOR_WR_POLICY(V_028410_CACHE_STREAM) | 55757ec681f3Smrg S_028410_CMASK_RD_POLICY(meta_read_policy) | 55767ec681f3Smrg S_028410_FMASK_RD_POLICY(V_028410_CACHE_NOA) | 55777ec681f3Smrg S_028410_DCC_RD_POLICY(meta_read_policy) | 55787ec681f3Smrg S_028410_COLOR_RD_POLICY(V_028410_CACHE_NOA)); 55797ec681f3Smrg 55807ec681f3Smrg si_pm4_set_reg(pm4, R_028428_CB_COVERAGE_OUT_CONTROL, 0); 55817ec681f3Smrg si_pm4_set_reg(pm4, R_028A98_VGT_DRAW_PAYLOAD_CNTL, 0); 55827ec681f3Smrg 55837ec681f3Smrg /* Break up a pixel wave if it contains deallocs for more than 55847ec681f3Smrg * half the parameter cache. 55857ec681f3Smrg * 55867ec681f3Smrg * To avoid a deadlock where pixel waves aren't launched 55877ec681f3Smrg * because they're waiting for more pixels while the frontend 55887ec681f3Smrg * is stuck waiting for PC space, the maximum allowed value is 55897ec681f3Smrg * the size of the PC minus the largest possible allocation for 55907ec681f3Smrg * a single primitive shader subgroup. 55917ec681f3Smrg */ 55927ec681f3Smrg si_pm4_set_reg(pm4, R_028C50_PA_SC_NGG_MODE_CNTL, S_028C50_MAX_DEALLOCS_IN_WAVE(512)); 55937ec681f3Smrg /* Reuse for legacy (non-NGG) only. */ 55947ec681f3Smrg si_pm4_set_reg(pm4, R_028C58_VGT_VERTEX_REUSE_BLOCK_CNTL, 14); 55957ec681f3Smrg 55967ec681f3Smrg if (!has_clear_state) { 55977ec681f3Smrg si_pm4_set_reg(pm4, R_02835C_PA_SC_TILE_STEERING_OVERRIDE, 55987ec681f3Smrg sscreen->info.pa_sc_tile_steering_override); 55997ec681f3Smrg } 56007ec681f3Smrg 56017ec681f3Smrg 56027ec681f3Smrg si_pm4_set_reg(pm4, R_030964_GE_MAX_VTX_INDX, ~0); 56037ec681f3Smrg si_pm4_set_reg(pm4, R_030924_GE_MIN_VTX_INDX, 0); 56047ec681f3Smrg si_pm4_set_reg(pm4, R_030928_GE_INDX_OFFSET, 0); 56057ec681f3Smrg si_pm4_set_reg(pm4, R_03097C_GE_STEREO_CNTL, 0); 56067ec681f3Smrg si_pm4_set_reg(pm4, R_030988_GE_USER_VGPR_EN, 0); 56077ec681f3Smrg } 56087ec681f3Smrg 56097ec681f3Smrg if (sctx->chip_class >= GFX10_3) { 56107ec681f3Smrg si_pm4_set_reg(pm4, R_028750_SX_PS_DOWNCONVERT_CONTROL, 0xff); 56117ec681f3Smrg /* The rate combiners have no effect if they are disabled like this: 56127ec681f3Smrg * VERTEX_RATE: BYPASS_VTX_RATE_COMBINER = 1 56137ec681f3Smrg * PRIMITIVE_RATE: BYPASS_PRIM_RATE_COMBINER = 1 56147ec681f3Smrg * HTILE_RATE: VRS_HTILE_ENCODING = 0 56157ec681f3Smrg * SAMPLE_ITER: PS_ITER_SAMPLE = 0 56167ec681f3Smrg * 56177ec681f3Smrg * Use OVERRIDE, which will ignore results from previous combiners. 56187ec681f3Smrg * (e.g. enabled sample shading overrides the vertex rate) 56197ec681f3Smrg */ 56207ec681f3Smrg si_pm4_set_reg(pm4, R_028848_PA_CL_VRS_CNTL, 56217ec681f3Smrg S_028848_VERTEX_RATE_COMBINER_MODE(V_028848_VRS_COMB_MODE_OVERRIDE) | 56227ec681f3Smrg S_028848_SAMPLE_ITER_COMBINER_MODE(V_028848_VRS_COMB_MODE_OVERRIDE)); 56237ec681f3Smrg } 56247ec681f3Smrg 56257ec681f3Smrg sctx->cs_preamble_state = pm4; 5626af69d88dSmrg} 5627