1af69d88dSmrg/*
2af69d88dSmrg * Copyright 2012 Advanced Micro Devices, Inc.
301e04c3fSmrg * All Rights Reserved.
4af69d88dSmrg *
5af69d88dSmrg * Permission is hereby granted, free of charge, to any person obtaining a
6af69d88dSmrg * copy of this software and associated documentation files (the "Software"),
7af69d88dSmrg * to deal in the Software without restriction, including without limitation
8af69d88dSmrg * on the rights to use, copy, modify, merge, publish, distribute, sub
9af69d88dSmrg * license, and/or sell copies of the Software, and to permit persons to whom
10af69d88dSmrg * the Software is furnished to do so, subject to the following conditions:
11af69d88dSmrg *
12af69d88dSmrg * The above copyright notice and this permission notice (including the next
13af69d88dSmrg * paragraph) shall be included in all copies or substantial portions of the
14af69d88dSmrg * Software.
15af69d88dSmrg *
16af69d88dSmrg * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
17af69d88dSmrg * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
18af69d88dSmrg * FITNESS FOR A PARTICULAR PURPOSE AND NON-INFRINGEMENT. IN NO EVENT SHALL
19af69d88dSmrg * THE AUTHOR(S) AND/OR THEIR SUPPLIERS BE LIABLE FOR ANY CLAIM,
20af69d88dSmrg * DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR
21af69d88dSmrg * OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE
22af69d88dSmrg * USE OR OTHER DEALINGS IN THE SOFTWARE.
23af69d88dSmrg */
24af69d88dSmrg
2501e04c3fSmrg#include "si_build_pm4.h"
2601e04c3fSmrg#include "si_query.h"
277ec681f3Smrg#include "si_shader_internal.h"
287ec681f3Smrg#include "sid.h"
297ec681f3Smrg#include "util/fast_idiv_by_const.h"
307ec681f3Smrg#include "util/format/u_format.h"
317ec681f3Smrg#include "util/format/u_format_s3tc.h"
3201e04c3fSmrg#include "util/u_dual_blend.h"
337ec681f3Smrg#include "util/u_helpers.h"
34af69d88dSmrg#include "util/u_memory.h"
3501e04c3fSmrg#include "util/u_resource.h"
3601e04c3fSmrg#include "util/u_upload_mgr.h"
377ec681f3Smrg#include "util/u_blend.h"
387ec681f3Smrg
397ec681f3Smrg#include "gfx10_format_table.h"
40af69d88dSmrg
41af69d88dSmrgstatic unsigned si_map_swizzle(unsigned swizzle)
42af69d88dSmrg{
437ec681f3Smrg   switch (swizzle) {
447ec681f3Smrg   case PIPE_SWIZZLE_Y:
457ec681f3Smrg      return V_008F0C_SQ_SEL_Y;
467ec681f3Smrg   case PIPE_SWIZZLE_Z:
477ec681f3Smrg      return V_008F0C_SQ_SEL_Z;
487ec681f3Smrg   case PIPE_SWIZZLE_W:
497ec681f3Smrg      return V_008F0C_SQ_SEL_W;
507ec681f3Smrg   case PIPE_SWIZZLE_0:
517ec681f3Smrg      return V_008F0C_SQ_SEL_0;
527ec681f3Smrg   case PIPE_SWIZZLE_1:
537ec681f3Smrg      return V_008F0C_SQ_SEL_1;
547ec681f3Smrg   default: /* PIPE_SWIZZLE_X */
557ec681f3Smrg      return V_008F0C_SQ_SEL_X;
567ec681f3Smrg   }
57af69d88dSmrg}
58af69d88dSmrg
59af69d88dSmrg/* 12.4 fixed-point */
60af69d88dSmrgstatic unsigned si_pack_float_12p4(float x)
61af69d88dSmrg{
627ec681f3Smrg   return x <= 0 ? 0 : x >= 4096 ? 0xffff : x * 16;
63af69d88dSmrg}
64af69d88dSmrg
65af69d88dSmrg/*
6601e04c3fSmrg * Inferred framebuffer and blender state.
6701e04c3fSmrg *
6801e04c3fSmrg * CB_TARGET_MASK is emitted here to avoid a hang with dual source blending
6901e04c3fSmrg * if there is not enough PS outputs.
70af69d88dSmrg */
7101e04c3fSmrgstatic void si_emit_cb_render_state(struct si_context *sctx)
72af69d88dSmrg{
737ec681f3Smrg   struct radeon_cmdbuf *cs = &sctx->gfx_cs;
747ec681f3Smrg   struct si_state_blend *blend = sctx->queued.named.blend;
757ec681f3Smrg   /* CB_COLORn_INFO.FORMAT=INVALID should disable unbound colorbuffers,
767ec681f3Smrg    * but you never know. */
777ec681f3Smrg   uint32_t cb_target_mask = sctx->framebuffer.colorbuf_enabled_4bit & blend->cb_target_mask;
787ec681f3Smrg   unsigned i;
797ec681f3Smrg
807ec681f3Smrg   /* Avoid a hang that happens when dual source blending is enabled
817ec681f3Smrg    * but there is not enough color outputs. This is undefined behavior,
827ec681f3Smrg    * so disable color writes completely.
837ec681f3Smrg    *
847ec681f3Smrg    * Reproducible with Unigine Heaven 4.0 and drirc missing.
857ec681f3Smrg    */
867ec681f3Smrg   if (blend->dual_src_blend && sctx->shader.ps.cso &&
877ec681f3Smrg       (sctx->shader.ps.cso->info.colors_written & 0x3) != 0x3)
887ec681f3Smrg      cb_target_mask = 0;
897ec681f3Smrg
907ec681f3Smrg   /* GFX9: Flush DFSM when CB_TARGET_MASK changes.
917ec681f3Smrg    * I think we don't have to do anything between IBs.
927ec681f3Smrg    */
937ec681f3Smrg   if (sctx->screen->dpbb_allowed && sctx->last_cb_target_mask != cb_target_mask) {
947ec681f3Smrg      sctx->last_cb_target_mask = cb_target_mask;
957ec681f3Smrg
967ec681f3Smrg      radeon_begin(cs);
977ec681f3Smrg      radeon_emit(PKT3(PKT3_EVENT_WRITE, 0, 0));
987ec681f3Smrg      radeon_emit(EVENT_TYPE(V_028A90_BREAK_BATCH) | EVENT_INDEX(0));
997ec681f3Smrg      radeon_end();
1007ec681f3Smrg   }
1017ec681f3Smrg
1027ec681f3Smrg   radeon_begin(cs);
1037ec681f3Smrg   radeon_opt_set_context_reg(sctx, R_028238_CB_TARGET_MASK, SI_TRACKED_CB_TARGET_MASK,
1047ec681f3Smrg                              cb_target_mask);
1057ec681f3Smrg
1067ec681f3Smrg   if (sctx->chip_class >= GFX8) {
1077ec681f3Smrg      /* DCC MSAA workaround.
1087ec681f3Smrg       * Alternatively, we can set CB_COLORi_DCC_CONTROL.OVERWRITE_-
1097ec681f3Smrg       * COMBINER_DISABLE, but that would be more complicated.
1107ec681f3Smrg       */
1117ec681f3Smrg      bool oc_disable =
1127ec681f3Smrg         blend->dcc_msaa_corruption_4bit & cb_target_mask && sctx->framebuffer.nr_samples >= 2;
1137ec681f3Smrg      unsigned watermark = sctx->framebuffer.dcc_overwrite_combiner_watermark;
1147ec681f3Smrg
1157ec681f3Smrg      radeon_opt_set_context_reg(
1167ec681f3Smrg         sctx, R_028424_CB_DCC_CONTROL, SI_TRACKED_CB_DCC_CONTROL,
1177ec681f3Smrg         S_028424_OVERWRITE_COMBINER_MRT_SHARING_DISABLE(sctx->chip_class <= GFX9) |
1187ec681f3Smrg            S_028424_OVERWRITE_COMBINER_WATERMARK(watermark) |
1197ec681f3Smrg            S_028424_OVERWRITE_COMBINER_DISABLE(oc_disable) |
1207ec681f3Smrg            S_028424_DISABLE_CONSTANT_ENCODE_REG(sctx->screen->info.has_dcc_constant_encode));
1217ec681f3Smrg   }
1227ec681f3Smrg
1237ec681f3Smrg   /* RB+ register settings. */
1247ec681f3Smrg   if (sctx->screen->info.rbplus_allowed) {
1257ec681f3Smrg      unsigned spi_shader_col_format =
1267ec681f3Smrg         sctx->shader.ps.cso ? sctx->shader.ps.current->key.part.ps.epilog.spi_shader_col_format
1277ec681f3Smrg                             : 0;
1287ec681f3Smrg      unsigned sx_ps_downconvert = 0;
1297ec681f3Smrg      unsigned sx_blend_opt_epsilon = 0;
1307ec681f3Smrg      unsigned sx_blend_opt_control = 0;
1317ec681f3Smrg
1327ec681f3Smrg      for (i = 0; i < sctx->framebuffer.state.nr_cbufs; i++) {
1337ec681f3Smrg         struct si_surface *surf = (struct si_surface *)sctx->framebuffer.state.cbufs[i];
1347ec681f3Smrg         unsigned format, swap, spi_format, colormask;
1357ec681f3Smrg         bool has_alpha, has_rgb;
1367ec681f3Smrg
1377ec681f3Smrg         if (!surf) {
1387ec681f3Smrg            /* If the color buffer is not set, the driver sets 32_R
1397ec681f3Smrg             * as the SPI color format, because the hw doesn't allow
1407ec681f3Smrg             * holes between color outputs, so also set this to
1417ec681f3Smrg             * enable RB+.
1427ec681f3Smrg             */
1437ec681f3Smrg            sx_ps_downconvert |= V_028754_SX_RT_EXPORT_32_R << (i * 4);
1447ec681f3Smrg            continue;
1457ec681f3Smrg         }
1467ec681f3Smrg
1477ec681f3Smrg         format = G_028C70_FORMAT(surf->cb_color_info);
1487ec681f3Smrg         swap = G_028C70_COMP_SWAP(surf->cb_color_info);
1497ec681f3Smrg         spi_format = (spi_shader_col_format >> (i * 4)) & 0xf;
1507ec681f3Smrg         colormask = (cb_target_mask >> (i * 4)) & 0xf;
1517ec681f3Smrg
1527ec681f3Smrg         /* Set if RGB and A are present. */
1537ec681f3Smrg         has_alpha = !G_028C74_FORCE_DST_ALPHA_1(surf->cb_color_attrib);
1547ec681f3Smrg
1557ec681f3Smrg         if (format == V_028C70_COLOR_8 || format == V_028C70_COLOR_16 ||
1567ec681f3Smrg             format == V_028C70_COLOR_32)
1577ec681f3Smrg            has_rgb = !has_alpha;
1587ec681f3Smrg         else
1597ec681f3Smrg            has_rgb = true;
1607ec681f3Smrg
1617ec681f3Smrg         /* Check the colormask and export format. */
1627ec681f3Smrg         if (!(colormask & (PIPE_MASK_RGBA & ~PIPE_MASK_A)))
1637ec681f3Smrg            has_rgb = false;
1647ec681f3Smrg         if (!(colormask & PIPE_MASK_A))
1657ec681f3Smrg            has_alpha = false;
1667ec681f3Smrg
1677ec681f3Smrg         if (spi_format == V_028714_SPI_SHADER_ZERO) {
1687ec681f3Smrg            has_rgb = false;
1697ec681f3Smrg            has_alpha = false;
1707ec681f3Smrg         }
1717ec681f3Smrg
1727ec681f3Smrg         /* Disable value checking for disabled channels. */
1737ec681f3Smrg         if (!has_rgb)
1747ec681f3Smrg            sx_blend_opt_control |= S_02875C_MRT0_COLOR_OPT_DISABLE(1) << (i * 4);
1757ec681f3Smrg         if (!has_alpha)
1767ec681f3Smrg            sx_blend_opt_control |= S_02875C_MRT0_ALPHA_OPT_DISABLE(1) << (i * 4);
1777ec681f3Smrg
1787ec681f3Smrg         /* Enable down-conversion for 32bpp and smaller formats. */
1797ec681f3Smrg         switch (format) {
1807ec681f3Smrg         case V_028C70_COLOR_8:
1817ec681f3Smrg         case V_028C70_COLOR_8_8:
1827ec681f3Smrg         case V_028C70_COLOR_8_8_8_8:
1837ec681f3Smrg            /* For 1 and 2-channel formats, use the superset thereof. */
1847ec681f3Smrg            if (spi_format == V_028714_SPI_SHADER_FP16_ABGR ||
1857ec681f3Smrg                spi_format == V_028714_SPI_SHADER_UINT16_ABGR ||
1867ec681f3Smrg                spi_format == V_028714_SPI_SHADER_SINT16_ABGR) {
1877ec681f3Smrg               sx_ps_downconvert |= V_028754_SX_RT_EXPORT_8_8_8_8 << (i * 4);
1887ec681f3Smrg               sx_blend_opt_epsilon |= V_028758_8BIT_FORMAT << (i * 4);
1897ec681f3Smrg            }
1907ec681f3Smrg            break;
1917ec681f3Smrg
1927ec681f3Smrg         case V_028C70_COLOR_5_6_5:
1937ec681f3Smrg            if (spi_format == V_028714_SPI_SHADER_FP16_ABGR) {
1947ec681f3Smrg               sx_ps_downconvert |= V_028754_SX_RT_EXPORT_5_6_5 << (i * 4);
1957ec681f3Smrg               sx_blend_opt_epsilon |= V_028758_6BIT_FORMAT << (i * 4);
1967ec681f3Smrg            }
1977ec681f3Smrg            break;
1987ec681f3Smrg
1997ec681f3Smrg         case V_028C70_COLOR_1_5_5_5:
2007ec681f3Smrg            if (spi_format == V_028714_SPI_SHADER_FP16_ABGR) {
2017ec681f3Smrg               sx_ps_downconvert |= V_028754_SX_RT_EXPORT_1_5_5_5 << (i * 4);
2027ec681f3Smrg               sx_blend_opt_epsilon |= V_028758_5BIT_FORMAT << (i * 4);
2037ec681f3Smrg            }
2047ec681f3Smrg            break;
2057ec681f3Smrg
2067ec681f3Smrg         case V_028C70_COLOR_4_4_4_4:
2077ec681f3Smrg            if (spi_format == V_028714_SPI_SHADER_FP16_ABGR) {
2087ec681f3Smrg               sx_ps_downconvert |= V_028754_SX_RT_EXPORT_4_4_4_4 << (i * 4);
2097ec681f3Smrg               sx_blend_opt_epsilon |= V_028758_4BIT_FORMAT << (i * 4);
2107ec681f3Smrg            }
2117ec681f3Smrg            break;
2127ec681f3Smrg
2137ec681f3Smrg         case V_028C70_COLOR_32:
2147ec681f3Smrg            if (swap == V_028C70_SWAP_STD && spi_format == V_028714_SPI_SHADER_32_R)
2157ec681f3Smrg               sx_ps_downconvert |= V_028754_SX_RT_EXPORT_32_R << (i * 4);
2167ec681f3Smrg            else if (swap == V_028C70_SWAP_ALT_REV && spi_format == V_028714_SPI_SHADER_32_AR)
2177ec681f3Smrg               sx_ps_downconvert |= V_028754_SX_RT_EXPORT_32_A << (i * 4);
2187ec681f3Smrg            break;
2197ec681f3Smrg
2207ec681f3Smrg         case V_028C70_COLOR_16:
2217ec681f3Smrg         case V_028C70_COLOR_16_16:
2227ec681f3Smrg            /* For 1-channel formats, use the superset thereof. */
2237ec681f3Smrg            if (spi_format == V_028714_SPI_SHADER_UNORM16_ABGR ||
2247ec681f3Smrg                spi_format == V_028714_SPI_SHADER_SNORM16_ABGR ||
2257ec681f3Smrg                spi_format == V_028714_SPI_SHADER_UINT16_ABGR ||
2267ec681f3Smrg                spi_format == V_028714_SPI_SHADER_SINT16_ABGR) {
2277ec681f3Smrg               if (swap == V_028C70_SWAP_STD || swap == V_028C70_SWAP_STD_REV)
2287ec681f3Smrg                  sx_ps_downconvert |= V_028754_SX_RT_EXPORT_16_16_GR << (i * 4);
2297ec681f3Smrg               else
2307ec681f3Smrg                  sx_ps_downconvert |= V_028754_SX_RT_EXPORT_16_16_AR << (i * 4);
2317ec681f3Smrg            }
2327ec681f3Smrg            break;
2337ec681f3Smrg
2347ec681f3Smrg         case V_028C70_COLOR_10_11_11:
2357ec681f3Smrg            if (spi_format == V_028714_SPI_SHADER_FP16_ABGR)
2367ec681f3Smrg               sx_ps_downconvert |= V_028754_SX_RT_EXPORT_10_11_11 << (i * 4);
2377ec681f3Smrg            break;
2387ec681f3Smrg
2397ec681f3Smrg         case V_028C70_COLOR_2_10_10_10:
2407ec681f3Smrg            if (spi_format == V_028714_SPI_SHADER_FP16_ABGR) {
2417ec681f3Smrg               sx_ps_downconvert |= V_028754_SX_RT_EXPORT_2_10_10_10 << (i * 4);
2427ec681f3Smrg               sx_blend_opt_epsilon |= V_028758_10BIT_FORMAT << (i * 4);
2437ec681f3Smrg            }
2447ec681f3Smrg            break;
2457ec681f3Smrg
2467ec681f3Smrg         case V_028C70_COLOR_5_9_9_9:
2477ec681f3Smrg            if (spi_format == V_028714_SPI_SHADER_FP16_ABGR)
2487ec681f3Smrg               sx_ps_downconvert |= V_028754_SX_RT_EXPORT_9_9_9_E5 << (i * 4);
2497ec681f3Smrg            break;
2507ec681f3Smrg         }
2517ec681f3Smrg      }
2527ec681f3Smrg
2537ec681f3Smrg      /* If there are no color outputs, the first color export is
2547ec681f3Smrg       * always enabled as 32_R, so also set this to enable RB+.
2557ec681f3Smrg       */
2567ec681f3Smrg      if (!sx_ps_downconvert)
2577ec681f3Smrg         sx_ps_downconvert = V_028754_SX_RT_EXPORT_32_R;
2587ec681f3Smrg
2597ec681f3Smrg      /* SX_PS_DOWNCONVERT, SX_BLEND_OPT_EPSILON, SX_BLEND_OPT_CONTROL */
2607ec681f3Smrg      radeon_opt_set_context_reg3(sctx, R_028754_SX_PS_DOWNCONVERT, SI_TRACKED_SX_PS_DOWNCONVERT,
2617ec681f3Smrg                                  sx_ps_downconvert, sx_blend_opt_epsilon, sx_blend_opt_control);
2627ec681f3Smrg   }
2637ec681f3Smrg   radeon_end_update_context_roll(sctx);
264af69d88dSmrg}
265af69d88dSmrg
266af69d88dSmrg/*
267af69d88dSmrg * Blender functions
268af69d88dSmrg */
269af69d88dSmrg
270af69d88dSmrgstatic uint32_t si_translate_blend_function(int blend_func)
271af69d88dSmrg{
2727ec681f3Smrg   switch (blend_func) {
2737ec681f3Smrg   case PIPE_BLEND_ADD:
2747ec681f3Smrg      return V_028780_COMB_DST_PLUS_SRC;
2757ec681f3Smrg   case PIPE_BLEND_SUBTRACT:
2767ec681f3Smrg      return V_028780_COMB_SRC_MINUS_DST;
2777ec681f3Smrg   case PIPE_BLEND_REVERSE_SUBTRACT:
2787ec681f3Smrg      return V_028780_COMB_DST_MINUS_SRC;
2797ec681f3Smrg   case PIPE_BLEND_MIN:
2807ec681f3Smrg      return V_028780_COMB_MIN_DST_SRC;
2817ec681f3Smrg   case PIPE_BLEND_MAX:
2827ec681f3Smrg      return V_028780_COMB_MAX_DST_SRC;
2837ec681f3Smrg   default:
2847ec681f3Smrg      PRINT_ERR("Unknown blend function %d\n", blend_func);
2857ec681f3Smrg      assert(0);
2867ec681f3Smrg      break;
2877ec681f3Smrg   }
2887ec681f3Smrg   return 0;
289af69d88dSmrg}
290af69d88dSmrg
291af69d88dSmrgstatic uint32_t si_translate_blend_factor(int blend_fact)
292af69d88dSmrg{
2937ec681f3Smrg   switch (blend_fact) {
2947ec681f3Smrg   case PIPE_BLENDFACTOR_ONE:
2957ec681f3Smrg      return V_028780_BLEND_ONE;
2967ec681f3Smrg   case PIPE_BLENDFACTOR_SRC_COLOR:
2977ec681f3Smrg      return V_028780_BLEND_SRC_COLOR;
2987ec681f3Smrg   case PIPE_BLENDFACTOR_SRC_ALPHA:
2997ec681f3Smrg      return V_028780_BLEND_SRC_ALPHA;
3007ec681f3Smrg   case PIPE_BLENDFACTOR_DST_ALPHA:
3017ec681f3Smrg      return V_028780_BLEND_DST_ALPHA;
3027ec681f3Smrg   case PIPE_BLENDFACTOR_DST_COLOR:
3037ec681f3Smrg      return V_028780_BLEND_DST_COLOR;
3047ec681f3Smrg   case PIPE_BLENDFACTOR_SRC_ALPHA_SATURATE:
3057ec681f3Smrg      return V_028780_BLEND_SRC_ALPHA_SATURATE;
3067ec681f3Smrg   case PIPE_BLENDFACTOR_CONST_COLOR:
3077ec681f3Smrg      return V_028780_BLEND_CONSTANT_COLOR;
3087ec681f3Smrg   case PIPE_BLENDFACTOR_CONST_ALPHA:
3097ec681f3Smrg      return V_028780_BLEND_CONSTANT_ALPHA;
3107ec681f3Smrg   case PIPE_BLENDFACTOR_ZERO:
3117ec681f3Smrg      return V_028780_BLEND_ZERO;
3127ec681f3Smrg   case PIPE_BLENDFACTOR_INV_SRC_COLOR:
3137ec681f3Smrg      return V_028780_BLEND_ONE_MINUS_SRC_COLOR;
3147ec681f3Smrg   case PIPE_BLENDFACTOR_INV_SRC_ALPHA:
3157ec681f3Smrg      return V_028780_BLEND_ONE_MINUS_SRC_ALPHA;
3167ec681f3Smrg   case PIPE_BLENDFACTOR_INV_DST_ALPHA:
3177ec681f3Smrg      return V_028780_BLEND_ONE_MINUS_DST_ALPHA;
3187ec681f3Smrg   case PIPE_BLENDFACTOR_INV_DST_COLOR:
3197ec681f3Smrg      return V_028780_BLEND_ONE_MINUS_DST_COLOR;
3207ec681f3Smrg   case PIPE_BLENDFACTOR_INV_CONST_COLOR:
3217ec681f3Smrg      return V_028780_BLEND_ONE_MINUS_CONSTANT_COLOR;
3227ec681f3Smrg   case PIPE_BLENDFACTOR_INV_CONST_ALPHA:
3237ec681f3Smrg      return V_028780_BLEND_ONE_MINUS_CONSTANT_ALPHA;
3247ec681f3Smrg   case PIPE_BLENDFACTOR_SRC1_COLOR:
3257ec681f3Smrg      return V_028780_BLEND_SRC1_COLOR;
3267ec681f3Smrg   case PIPE_BLENDFACTOR_SRC1_ALPHA:
3277ec681f3Smrg      return V_028780_BLEND_SRC1_ALPHA;
3287ec681f3Smrg   case PIPE_BLENDFACTOR_INV_SRC1_COLOR:
3297ec681f3Smrg      return V_028780_BLEND_INV_SRC1_COLOR;
3307ec681f3Smrg   case PIPE_BLENDFACTOR_INV_SRC1_ALPHA:
3317ec681f3Smrg      return V_028780_BLEND_INV_SRC1_ALPHA;
3327ec681f3Smrg   default:
3337ec681f3Smrg      PRINT_ERR("Bad blend factor %d not supported!\n", blend_fact);
3347ec681f3Smrg      assert(0);
3357ec681f3Smrg      break;
3367ec681f3Smrg   }
3377ec681f3Smrg   return 0;
338af69d88dSmrg}
339af69d88dSmrg
34001e04c3fSmrgstatic uint32_t si_translate_blend_opt_function(int blend_func)
34101e04c3fSmrg{
3427ec681f3Smrg   switch (blend_func) {
3437ec681f3Smrg   case PIPE_BLEND_ADD:
3447ec681f3Smrg      return V_028760_OPT_COMB_ADD;
3457ec681f3Smrg   case PIPE_BLEND_SUBTRACT:
3467ec681f3Smrg      return V_028760_OPT_COMB_SUBTRACT;
3477ec681f3Smrg   case PIPE_BLEND_REVERSE_SUBTRACT:
3487ec681f3Smrg      return V_028760_OPT_COMB_REVSUBTRACT;
3497ec681f3Smrg   case PIPE_BLEND_MIN:
3507ec681f3Smrg      return V_028760_OPT_COMB_MIN;
3517ec681f3Smrg   case PIPE_BLEND_MAX:
3527ec681f3Smrg      return V_028760_OPT_COMB_MAX;
3537ec681f3Smrg   default:
3547ec681f3Smrg      return V_028760_OPT_COMB_BLEND_DISABLED;
3557ec681f3Smrg   }
35601e04c3fSmrg}
35701e04c3fSmrg
35801e04c3fSmrgstatic uint32_t si_translate_blend_opt_factor(int blend_fact, bool is_alpha)
35901e04c3fSmrg{
3607ec681f3Smrg   switch (blend_fact) {
3617ec681f3Smrg   case PIPE_BLENDFACTOR_ZERO:
3627ec681f3Smrg      return V_028760_BLEND_OPT_PRESERVE_NONE_IGNORE_ALL;
3637ec681f3Smrg   case PIPE_BLENDFACTOR_ONE:
3647ec681f3Smrg      return V_028760_BLEND_OPT_PRESERVE_ALL_IGNORE_NONE;
3657ec681f3Smrg   case PIPE_BLENDFACTOR_SRC_COLOR:
3667ec681f3Smrg      return is_alpha ? V_028760_BLEND_OPT_PRESERVE_A1_IGNORE_A0
3677ec681f3Smrg                      : V_028760_BLEND_OPT_PRESERVE_C1_IGNORE_C0;
3687ec681f3Smrg   case PIPE_BLENDFACTOR_INV_SRC_COLOR:
3697ec681f3Smrg      return is_alpha ? V_028760_BLEND_OPT_PRESERVE_A0_IGNORE_A1
3707ec681f3Smrg                      : V_028760_BLEND_OPT_PRESERVE_C0_IGNORE_C1;
3717ec681f3Smrg   case PIPE_BLENDFACTOR_SRC_ALPHA:
3727ec681f3Smrg      return V_028760_BLEND_OPT_PRESERVE_A1_IGNORE_A0;
3737ec681f3Smrg   case PIPE_BLENDFACTOR_INV_SRC_ALPHA:
3747ec681f3Smrg      return V_028760_BLEND_OPT_PRESERVE_A0_IGNORE_A1;
3757ec681f3Smrg   case PIPE_BLENDFACTOR_SRC_ALPHA_SATURATE:
3767ec681f3Smrg      return is_alpha ? V_028760_BLEND_OPT_PRESERVE_ALL_IGNORE_NONE
3777ec681f3Smrg                      : V_028760_BLEND_OPT_PRESERVE_NONE_IGNORE_A0;
3787ec681f3Smrg   default:
3797ec681f3Smrg      return V_028760_BLEND_OPT_PRESERVE_NONE_IGNORE_NONE;
3807ec681f3Smrg   }
38101e04c3fSmrg}
38201e04c3fSmrg
3837ec681f3Smrgstatic void si_blend_check_commutativity(struct si_screen *sscreen, struct si_state_blend *blend,
3847ec681f3Smrg                                         enum pipe_blend_func func, enum pipe_blendfactor src,
3857ec681f3Smrg                                         enum pipe_blendfactor dst, unsigned chanmask)
38601e04c3fSmrg{
3877ec681f3Smrg   /* Src factor is allowed when it does not depend on Dst */
3887ec681f3Smrg   static const uint32_t src_allowed =
3897ec681f3Smrg      (1u << PIPE_BLENDFACTOR_ONE) | (1u << PIPE_BLENDFACTOR_SRC_COLOR) |
3907ec681f3Smrg      (1u << PIPE_BLENDFACTOR_SRC_ALPHA) | (1u << PIPE_BLENDFACTOR_SRC_ALPHA_SATURATE) |
3917ec681f3Smrg      (1u << PIPE_BLENDFACTOR_CONST_COLOR) | (1u << PIPE_BLENDFACTOR_CONST_ALPHA) |
3927ec681f3Smrg      (1u << PIPE_BLENDFACTOR_SRC1_COLOR) | (1u << PIPE_BLENDFACTOR_SRC1_ALPHA) |
3937ec681f3Smrg      (1u << PIPE_BLENDFACTOR_ZERO) | (1u << PIPE_BLENDFACTOR_INV_SRC_COLOR) |
3947ec681f3Smrg      (1u << PIPE_BLENDFACTOR_INV_SRC_ALPHA) | (1u << PIPE_BLENDFACTOR_INV_CONST_COLOR) |
3957ec681f3Smrg      (1u << PIPE_BLENDFACTOR_INV_CONST_ALPHA) | (1u << PIPE_BLENDFACTOR_INV_SRC1_COLOR) |
3967ec681f3Smrg      (1u << PIPE_BLENDFACTOR_INV_SRC1_ALPHA);
3977ec681f3Smrg
3987ec681f3Smrg   if (dst == PIPE_BLENDFACTOR_ONE && (src_allowed & (1u << src))) {
3997ec681f3Smrg      /* Addition is commutative, but floating point addition isn't
4007ec681f3Smrg       * associative: subtle changes can be introduced via different
4017ec681f3Smrg       * rounding.
4027ec681f3Smrg       *
4037ec681f3Smrg       * Out-of-order is also non-deterministic, which means that
4047ec681f3Smrg       * this breaks OpenGL invariance requirements. So only enable
4057ec681f3Smrg       * out-of-order additive blending if explicitly allowed by a
4067ec681f3Smrg       * setting.
4077ec681f3Smrg       */
4087ec681f3Smrg      if (func == PIPE_BLEND_MAX || func == PIPE_BLEND_MIN ||
4097ec681f3Smrg          (func == PIPE_BLEND_ADD && sscreen->commutative_blend_add))
4107ec681f3Smrg         blend->commutative_4bit |= chanmask;
4117ec681f3Smrg   }
41201e04c3fSmrg}
41301e04c3fSmrg
41401e04c3fSmrg/**
41501e04c3fSmrg * Get rid of DST in the blend factors by commuting the operands:
41601e04c3fSmrg *    func(src * DST, dst * 0) ---> func(src * 0, dst * SRC)
41701e04c3fSmrg */
4187ec681f3Smrgstatic void si_blend_remove_dst(unsigned *func, unsigned *src_factor, unsigned *dst_factor,
4197ec681f3Smrg                                unsigned expected_dst, unsigned replacement_src)
42001e04c3fSmrg{
4217ec681f3Smrg   if (*src_factor == expected_dst && *dst_factor == PIPE_BLENDFACTOR_ZERO) {
4227ec681f3Smrg      *src_factor = PIPE_BLENDFACTOR_ZERO;
4237ec681f3Smrg      *dst_factor = replacement_src;
4247ec681f3Smrg
4257ec681f3Smrg      /* Commuting the operands requires reversing subtractions. */
4267ec681f3Smrg      if (*func == PIPE_BLEND_SUBTRACT)
4277ec681f3Smrg         *func = PIPE_BLEND_REVERSE_SUBTRACT;
4287ec681f3Smrg      else if (*func == PIPE_BLEND_REVERSE_SUBTRACT)
4297ec681f3Smrg         *func = PIPE_BLEND_SUBTRACT;
4307ec681f3Smrg   }
43101e04c3fSmrg}
43201e04c3fSmrg
4337ec681f3Smrgstatic void *si_create_blend_state_mode(struct pipe_context *ctx,
4347ec681f3Smrg                                        const struct pipe_blend_state *state, unsigned mode)
43501e04c3fSmrg{
4367ec681f3Smrg   struct si_context *sctx = (struct si_context *)ctx;
4377ec681f3Smrg   struct si_state_blend *blend = CALLOC_STRUCT(si_state_blend);
4387ec681f3Smrg   struct si_pm4_state *pm4 = &blend->pm4;
4397ec681f3Smrg   uint32_t sx_mrt_blend_opt[8] = {0};
4407ec681f3Smrg   uint32_t color_control = 0;
4417ec681f3Smrg   bool logicop_enable = state->logicop_enable && state->logicop_func != PIPE_LOGICOP_COPY;
4427ec681f3Smrg
4437ec681f3Smrg   if (!blend)
4447ec681f3Smrg      return NULL;
4457ec681f3Smrg
4467ec681f3Smrg   blend->alpha_to_coverage = state->alpha_to_coverage;
4477ec681f3Smrg   blend->alpha_to_one = state->alpha_to_one;
4487ec681f3Smrg   blend->dual_src_blend = util_blend_state_is_dual(state, 0);
4497ec681f3Smrg   blend->logicop_enable = logicop_enable;
4507ec681f3Smrg   blend->allows_noop_optimization =
4517ec681f3Smrg      state->rt[0].rgb_func == PIPE_BLEND_ADD &&
4527ec681f3Smrg      state->rt[0].alpha_func == PIPE_BLEND_ADD &&
4537ec681f3Smrg      state->rt[0].rgb_src_factor == PIPE_BLENDFACTOR_DST_COLOR &&
4547ec681f3Smrg      state->rt[0].alpha_src_factor == PIPE_BLENDFACTOR_DST_COLOR &&
4557ec681f3Smrg      state->rt[0].rgb_dst_factor == PIPE_BLENDFACTOR_ZERO &&
4567ec681f3Smrg      state->rt[0].alpha_dst_factor == PIPE_BLENDFACTOR_ZERO &&
4577ec681f3Smrg      mode == V_028808_CB_NORMAL;
4587ec681f3Smrg
4597ec681f3Smrg   unsigned num_shader_outputs = state->max_rt + 1; /* estimate */
4607ec681f3Smrg   if (blend->dual_src_blend)
4617ec681f3Smrg      num_shader_outputs = MAX2(num_shader_outputs, 2);
4627ec681f3Smrg
4637ec681f3Smrg   if (logicop_enable) {
4647ec681f3Smrg      color_control |= S_028808_ROP3(state->logicop_func | (state->logicop_func << 4));
4657ec681f3Smrg   } else {
4667ec681f3Smrg      color_control |= S_028808_ROP3(0xcc);
4677ec681f3Smrg   }
4687ec681f3Smrg
4697ec681f3Smrg   if (state->alpha_to_coverage && state->alpha_to_coverage_dither) {
4707ec681f3Smrg      si_pm4_set_reg(pm4, R_028B70_DB_ALPHA_TO_MASK,
4717ec681f3Smrg                     S_028B70_ALPHA_TO_MASK_ENABLE(state->alpha_to_coverage) |
4727ec681f3Smrg                        S_028B70_ALPHA_TO_MASK_OFFSET0(3) | S_028B70_ALPHA_TO_MASK_OFFSET1(1) |
4737ec681f3Smrg                        S_028B70_ALPHA_TO_MASK_OFFSET2(0) | S_028B70_ALPHA_TO_MASK_OFFSET3(2) |
4747ec681f3Smrg                        S_028B70_OFFSET_ROUND(1));
4757ec681f3Smrg   } else {
4767ec681f3Smrg      si_pm4_set_reg(pm4, R_028B70_DB_ALPHA_TO_MASK,
4777ec681f3Smrg                     S_028B70_ALPHA_TO_MASK_ENABLE(state->alpha_to_coverage) |
4787ec681f3Smrg                        S_028B70_ALPHA_TO_MASK_OFFSET0(2) | S_028B70_ALPHA_TO_MASK_OFFSET1(2) |
4797ec681f3Smrg                        S_028B70_ALPHA_TO_MASK_OFFSET2(2) | S_028B70_ALPHA_TO_MASK_OFFSET3(2) |
4807ec681f3Smrg                        S_028B70_OFFSET_ROUND(0));
4817ec681f3Smrg   }
4827ec681f3Smrg
4837ec681f3Smrg   if (state->alpha_to_coverage)
4847ec681f3Smrg      blend->need_src_alpha_4bit |= 0xf;
4857ec681f3Smrg
4867ec681f3Smrg   blend->cb_target_mask = 0;
4877ec681f3Smrg   blend->cb_target_enabled_4bit = 0;
4887ec681f3Smrg
4897ec681f3Smrg   for (int i = 0; i < num_shader_outputs; i++) {
4907ec681f3Smrg      /* state->rt entries > 0 only written if independent blending */
4917ec681f3Smrg      const int j = state->independent_blend_enable ? i : 0;
4927ec681f3Smrg
4937ec681f3Smrg      unsigned eqRGB = state->rt[j].rgb_func;
4947ec681f3Smrg      unsigned srcRGB = state->rt[j].rgb_src_factor;
4957ec681f3Smrg      unsigned dstRGB = state->rt[j].rgb_dst_factor;
4967ec681f3Smrg      unsigned eqA = state->rt[j].alpha_func;
4977ec681f3Smrg      unsigned srcA = state->rt[j].alpha_src_factor;
4987ec681f3Smrg      unsigned dstA = state->rt[j].alpha_dst_factor;
4997ec681f3Smrg
5007ec681f3Smrg      unsigned srcRGB_opt, dstRGB_opt, srcA_opt, dstA_opt;
5017ec681f3Smrg      unsigned blend_cntl = 0;
5027ec681f3Smrg
5037ec681f3Smrg      sx_mrt_blend_opt[i] = S_028760_COLOR_COMB_FCN(V_028760_OPT_COMB_BLEND_DISABLED) |
5047ec681f3Smrg                            S_028760_ALPHA_COMB_FCN(V_028760_OPT_COMB_BLEND_DISABLED);
5057ec681f3Smrg
5067ec681f3Smrg      /* Only set dual source blending for MRT0 to avoid a hang. */
5077ec681f3Smrg      if (i >= 1 && blend->dual_src_blend) {
5087ec681f3Smrg         /* Vulkan does this for dual source blending. */
5097ec681f3Smrg         if (i == 1)
5107ec681f3Smrg            blend_cntl |= S_028780_ENABLE(1);
5117ec681f3Smrg
5127ec681f3Smrg         si_pm4_set_reg(pm4, R_028780_CB_BLEND0_CONTROL + i * 4, blend_cntl);
5137ec681f3Smrg         continue;
5147ec681f3Smrg      }
5157ec681f3Smrg
5167ec681f3Smrg      /* Only addition and subtraction equations are supported with
5177ec681f3Smrg       * dual source blending.
5187ec681f3Smrg       */
5197ec681f3Smrg      if (blend->dual_src_blend && (eqRGB == PIPE_BLEND_MIN || eqRGB == PIPE_BLEND_MAX ||
5207ec681f3Smrg                                    eqA == PIPE_BLEND_MIN || eqA == PIPE_BLEND_MAX)) {
5217ec681f3Smrg         assert(!"Unsupported equation for dual source blending");
5227ec681f3Smrg         si_pm4_set_reg(pm4, R_028780_CB_BLEND0_CONTROL + i * 4, blend_cntl);
5237ec681f3Smrg         continue;
5247ec681f3Smrg      }
5257ec681f3Smrg
5267ec681f3Smrg      /* cb_render_state will disable unused ones */
5277ec681f3Smrg      blend->cb_target_mask |= (unsigned)state->rt[j].colormask << (4 * i);
5287ec681f3Smrg      if (state->rt[j].colormask)
5297ec681f3Smrg         blend->cb_target_enabled_4bit |= 0xf << (4 * i);
5307ec681f3Smrg
5317ec681f3Smrg      if (!state->rt[j].colormask || !state->rt[j].blend_enable) {
5327ec681f3Smrg         si_pm4_set_reg(pm4, R_028780_CB_BLEND0_CONTROL + i * 4, blend_cntl);
5337ec681f3Smrg         continue;
5347ec681f3Smrg      }
5357ec681f3Smrg
5367ec681f3Smrg      si_blend_check_commutativity(sctx->screen, blend, eqRGB, srcRGB, dstRGB, 0x7 << (4 * i));
5377ec681f3Smrg      si_blend_check_commutativity(sctx->screen, blend, eqA, srcA, dstA, 0x8 << (4 * i));
5387ec681f3Smrg
5397ec681f3Smrg      /* Blending optimizations for RB+.
5407ec681f3Smrg       * These transformations don't change the behavior.
5417ec681f3Smrg       *
5427ec681f3Smrg       * First, get rid of DST in the blend factors:
5437ec681f3Smrg       *    func(src * DST, dst * 0) ---> func(src * 0, dst * SRC)
5447ec681f3Smrg       */
5457ec681f3Smrg      si_blend_remove_dst(&eqRGB, &srcRGB, &dstRGB, PIPE_BLENDFACTOR_DST_COLOR,
5467ec681f3Smrg                          PIPE_BLENDFACTOR_SRC_COLOR);
5477ec681f3Smrg      si_blend_remove_dst(&eqA, &srcA, &dstA, PIPE_BLENDFACTOR_DST_COLOR,
5487ec681f3Smrg                          PIPE_BLENDFACTOR_SRC_COLOR);
5497ec681f3Smrg      si_blend_remove_dst(&eqA, &srcA, &dstA, PIPE_BLENDFACTOR_DST_ALPHA,
5507ec681f3Smrg                          PIPE_BLENDFACTOR_SRC_ALPHA);
5517ec681f3Smrg
5527ec681f3Smrg      /* Look up the ideal settings from tables. */
5537ec681f3Smrg      srcRGB_opt = si_translate_blend_opt_factor(srcRGB, false);
5547ec681f3Smrg      dstRGB_opt = si_translate_blend_opt_factor(dstRGB, false);
5557ec681f3Smrg      srcA_opt = si_translate_blend_opt_factor(srcA, true);
5567ec681f3Smrg      dstA_opt = si_translate_blend_opt_factor(dstA, true);
5577ec681f3Smrg
5587ec681f3Smrg      /* Handle interdependencies. */
5597ec681f3Smrg      if (util_blend_factor_uses_dest(srcRGB, false))
5607ec681f3Smrg         dstRGB_opt = V_028760_BLEND_OPT_PRESERVE_NONE_IGNORE_NONE;
5617ec681f3Smrg      if (util_blend_factor_uses_dest(srcA, false))
5627ec681f3Smrg         dstA_opt = V_028760_BLEND_OPT_PRESERVE_NONE_IGNORE_NONE;
5637ec681f3Smrg
5647ec681f3Smrg      if (srcRGB == PIPE_BLENDFACTOR_SRC_ALPHA_SATURATE &&
5657ec681f3Smrg          (dstRGB == PIPE_BLENDFACTOR_ZERO || dstRGB == PIPE_BLENDFACTOR_SRC_ALPHA ||
5667ec681f3Smrg           dstRGB == PIPE_BLENDFACTOR_SRC_ALPHA_SATURATE))
5677ec681f3Smrg         dstRGB_opt = V_028760_BLEND_OPT_PRESERVE_NONE_IGNORE_A0;
5687ec681f3Smrg
5697ec681f3Smrg      /* Set the final value. */
5707ec681f3Smrg      sx_mrt_blend_opt[i] = S_028760_COLOR_SRC_OPT(srcRGB_opt) |
5717ec681f3Smrg                            S_028760_COLOR_DST_OPT(dstRGB_opt) |
5727ec681f3Smrg                            S_028760_COLOR_COMB_FCN(si_translate_blend_opt_function(eqRGB)) |
5737ec681f3Smrg                            S_028760_ALPHA_SRC_OPT(srcA_opt) | S_028760_ALPHA_DST_OPT(dstA_opt) |
5747ec681f3Smrg                            S_028760_ALPHA_COMB_FCN(si_translate_blend_opt_function(eqA));
5757ec681f3Smrg
5767ec681f3Smrg      /* Set blend state. */
5777ec681f3Smrg      blend_cntl |= S_028780_ENABLE(1);
5787ec681f3Smrg      blend_cntl |= S_028780_COLOR_COMB_FCN(si_translate_blend_function(eqRGB));
5797ec681f3Smrg      blend_cntl |= S_028780_COLOR_SRCBLEND(si_translate_blend_factor(srcRGB));
5807ec681f3Smrg      blend_cntl |= S_028780_COLOR_DESTBLEND(si_translate_blend_factor(dstRGB));
5817ec681f3Smrg
5827ec681f3Smrg      if (srcA != srcRGB || dstA != dstRGB || eqA != eqRGB) {
5837ec681f3Smrg         blend_cntl |= S_028780_SEPARATE_ALPHA_BLEND(1);
5847ec681f3Smrg         blend_cntl |= S_028780_ALPHA_COMB_FCN(si_translate_blend_function(eqA));
5857ec681f3Smrg         blend_cntl |= S_028780_ALPHA_SRCBLEND(si_translate_blend_factor(srcA));
5867ec681f3Smrg         blend_cntl |= S_028780_ALPHA_DESTBLEND(si_translate_blend_factor(dstA));
5877ec681f3Smrg      }
5887ec681f3Smrg      si_pm4_set_reg(pm4, R_028780_CB_BLEND0_CONTROL + i * 4, blend_cntl);
5897ec681f3Smrg
5907ec681f3Smrg      blend->blend_enable_4bit |= 0xfu << (i * 4);
5917ec681f3Smrg
5927ec681f3Smrg      if (sctx->chip_class >= GFX8 && sctx->chip_class <= GFX10)
5937ec681f3Smrg         blend->dcc_msaa_corruption_4bit |= 0xfu << (i * 4);
5947ec681f3Smrg
5957ec681f3Smrg      /* This is only important for formats without alpha. */
5967ec681f3Smrg      if (srcRGB == PIPE_BLENDFACTOR_SRC_ALPHA || dstRGB == PIPE_BLENDFACTOR_SRC_ALPHA ||
5977ec681f3Smrg          srcRGB == PIPE_BLENDFACTOR_SRC_ALPHA_SATURATE ||
5987ec681f3Smrg          dstRGB == PIPE_BLENDFACTOR_SRC_ALPHA_SATURATE ||
5997ec681f3Smrg          srcRGB == PIPE_BLENDFACTOR_INV_SRC_ALPHA || dstRGB == PIPE_BLENDFACTOR_INV_SRC_ALPHA)
6007ec681f3Smrg         blend->need_src_alpha_4bit |= 0xfu << (i * 4);
6017ec681f3Smrg   }
6027ec681f3Smrg
6037ec681f3Smrg   if (sctx->chip_class >= GFX8 && sctx->chip_class <= GFX10 && logicop_enable)
6047ec681f3Smrg      blend->dcc_msaa_corruption_4bit |= blend->cb_target_enabled_4bit;
6057ec681f3Smrg
6067ec681f3Smrg   if (blend->cb_target_mask) {
6077ec681f3Smrg      color_control |= S_028808_MODE(mode);
6087ec681f3Smrg   } else {
6097ec681f3Smrg      color_control |= S_028808_MODE(V_028808_CB_DISABLE);
6107ec681f3Smrg   }
6117ec681f3Smrg
6127ec681f3Smrg   if (sctx->screen->info.rbplus_allowed) {
6137ec681f3Smrg      /* Disable RB+ blend optimizations for dual source blending.
6147ec681f3Smrg       * Vulkan does this.
6157ec681f3Smrg       */
6167ec681f3Smrg      if (blend->dual_src_blend) {
6177ec681f3Smrg         for (int i = 0; i < num_shader_outputs; i++) {
6187ec681f3Smrg            sx_mrt_blend_opt[i] = S_028760_COLOR_COMB_FCN(V_028760_OPT_COMB_NONE) |
6197ec681f3Smrg                                  S_028760_ALPHA_COMB_FCN(V_028760_OPT_COMB_NONE);
6207ec681f3Smrg         }
6217ec681f3Smrg      }
6227ec681f3Smrg
6237ec681f3Smrg      for (int i = 0; i < num_shader_outputs; i++)
6247ec681f3Smrg         si_pm4_set_reg(pm4, R_028760_SX_MRT0_BLEND_OPT + i * 4, sx_mrt_blend_opt[i]);
6257ec681f3Smrg
6267ec681f3Smrg      /* RB+ doesn't work with dual source blending, logic op, and RESOLVE. */
6277ec681f3Smrg      if (blend->dual_src_blend || logicop_enable || mode == V_028808_CB_RESOLVE)
6287ec681f3Smrg         color_control |= S_028808_DISABLE_DUAL_QUAD(1);
6297ec681f3Smrg   }
6307ec681f3Smrg
6317ec681f3Smrg   si_pm4_set_reg(pm4, R_028808_CB_COLOR_CONTROL, color_control);
6327ec681f3Smrg   return blend;
63301e04c3fSmrg}
63401e04c3fSmrg
6357ec681f3Smrgstatic void *si_create_blend_state(struct pipe_context *ctx, const struct pipe_blend_state *state)
636af69d88dSmrg{
6377ec681f3Smrg   return si_create_blend_state_mode(ctx, state, V_028808_CB_NORMAL);
638af69d88dSmrg}
639af69d88dSmrg
6407ec681f3Smrgstatic bool si_check_blend_dst_sampler_noop(struct si_context *sctx)
641af69d88dSmrg{
6427ec681f3Smrg   if (sctx->framebuffer.state.nr_cbufs == 1) {
6437ec681f3Smrg      struct si_shader_selector *sel = sctx->shader.ps.cso;
6447ec681f3Smrg      bool free_nir;
6457ec681f3Smrg      if (unlikely(sel->info.writes_1_if_tex_is_1 == 0xff)) {
6467ec681f3Smrg         struct nir_shader *nir = si_get_nir_shader(sel, NULL, &free_nir);
6477ec681f3Smrg
6487ec681f3Smrg         /* Determine if this fragment shader always writes vec4(1) if a specific texture
6497ec681f3Smrg          * is all 1s.
6507ec681f3Smrg          */
6517ec681f3Smrg         float in[4] = { 1.0, 1.0, 1.0, 1.0 };
6527ec681f3Smrg         float out[4];
6537ec681f3Smrg         int texunit;
6547ec681f3Smrg         if (si_nir_is_output_const_if_tex_is_const(nir, in, out, &texunit) &&
6557ec681f3Smrg             !memcmp(in, out, 4 * sizeof(float))) {
6567ec681f3Smrg            sel->info.writes_1_if_tex_is_1 = 1 + texunit;
6577ec681f3Smrg         } else {
6587ec681f3Smrg            sel->info.writes_1_if_tex_is_1 = 0;
6597ec681f3Smrg         }
6607ec681f3Smrg
6617ec681f3Smrg         if (free_nir)
6627ec681f3Smrg            ralloc_free(nir);
6637ec681f3Smrg      }
6647ec681f3Smrg
6657ec681f3Smrg      if (sel->info.writes_1_if_tex_is_1 &&
6667ec681f3Smrg          sel->info.writes_1_if_tex_is_1 != 0xff) {
6677ec681f3Smrg         /* Now check if the texture is cleared to 1 */
6687ec681f3Smrg         int unit = sctx->shader.ps.cso->info.writes_1_if_tex_is_1 - 1;
6697ec681f3Smrg         struct si_samplers *samp = &sctx->samplers[PIPE_SHADER_FRAGMENT];
6707ec681f3Smrg         if ((1u << unit) & samp->enabled_mask) {
6717ec681f3Smrg            struct si_texture* tex = (struct si_texture*) samp->views[unit]->texture;
6727ec681f3Smrg            if (tex->is_depth &&
6737ec681f3Smrg                tex->depth_cleared_level_mask & BITFIELD_BIT(samp->views[unit]->u.tex.first_level) &&
6747ec681f3Smrg                tex->depth_clear_value[0] == 1) {
6757ec681f3Smrg               return false;
6767ec681f3Smrg            }
6777ec681f3Smrg            /* TODO: handle color textures */
6787ec681f3Smrg         }
6797ec681f3Smrg      }
6807ec681f3Smrg   }
6817ec681f3Smrg
6827ec681f3Smrg   return true;
6837ec681f3Smrg}
6847ec681f3Smrg
6857ec681f3Smrgstatic void si_draw_blend_dst_sampler_noop(struct pipe_context *ctx,
6867ec681f3Smrg                                           const struct pipe_draw_info *info,
6877ec681f3Smrg                                           unsigned drawid_offset,
6887ec681f3Smrg                                           const struct pipe_draw_indirect_info *indirect,
6897ec681f3Smrg                                           const struct pipe_draw_start_count_bias *draws,
6907ec681f3Smrg                                           unsigned num_draws) {
6917ec681f3Smrg   struct si_context *sctx = (struct si_context *)ctx;
6927ec681f3Smrg
6937ec681f3Smrg   if (!si_check_blend_dst_sampler_noop(sctx))
6947ec681f3Smrg      return;
6957ec681f3Smrg
6967ec681f3Smrg   sctx->real_draw_vbo(ctx, info, drawid_offset, indirect, draws, num_draws);
6977ec681f3Smrg}
6987ec681f3Smrg
6997ec681f3Smrgstatic void si_draw_vstate_blend_dst_sampler_noop(struct pipe_context *ctx,
7007ec681f3Smrg                                                  struct pipe_vertex_state *state,
7017ec681f3Smrg                                                  uint32_t partial_velem_mask,
7027ec681f3Smrg                                                  struct pipe_draw_vertex_state_info info,
7037ec681f3Smrg                                                  const struct pipe_draw_start_count_bias *draws,
7047ec681f3Smrg                                                  unsigned num_draws) {
7057ec681f3Smrg   struct si_context *sctx = (struct si_context *)ctx;
7067ec681f3Smrg
7077ec681f3Smrg   if (!si_check_blend_dst_sampler_noop(sctx))
7087ec681f3Smrg      return;
7097ec681f3Smrg
7107ec681f3Smrg   sctx->real_draw_vertex_state(ctx, state, partial_velem_mask, info, draws, num_draws);
711af69d88dSmrg}
712af69d88dSmrg
713af69d88dSmrgstatic void si_bind_blend_state(struct pipe_context *ctx, void *state)
714af69d88dSmrg{
7157ec681f3Smrg   struct si_context *sctx = (struct si_context *)ctx;
7167ec681f3Smrg   struct si_state_blend *old_blend = sctx->queued.named.blend;
7177ec681f3Smrg   struct si_state_blend *blend = (struct si_state_blend *)state;
7187ec681f3Smrg
7197ec681f3Smrg   if (!blend)
7207ec681f3Smrg      blend = (struct si_state_blend *)sctx->noop_blend;
7217ec681f3Smrg
7227ec681f3Smrg   si_pm4_bind_state(sctx, blend, blend);
7237ec681f3Smrg
7247ec681f3Smrg   if (old_blend->cb_target_mask != blend->cb_target_mask ||
7257ec681f3Smrg       old_blend->dual_src_blend != blend->dual_src_blend ||
7267ec681f3Smrg       (old_blend->dcc_msaa_corruption_4bit != blend->dcc_msaa_corruption_4bit &&
7277ec681f3Smrg        sctx->framebuffer.has_dcc_msaa))
7287ec681f3Smrg      si_mark_atom_dirty(sctx, &sctx->atoms.s.cb_render_state);
7297ec681f3Smrg
7307ec681f3Smrg   if (old_blend->cb_target_mask != blend->cb_target_mask ||
7317ec681f3Smrg       old_blend->alpha_to_coverage != blend->alpha_to_coverage ||
7327ec681f3Smrg       old_blend->alpha_to_one != blend->alpha_to_one ||
7337ec681f3Smrg       old_blend->dual_src_blend != blend->dual_src_blend ||
7347ec681f3Smrg       old_blend->blend_enable_4bit != blend->blend_enable_4bit ||
7357ec681f3Smrg       old_blend->need_src_alpha_4bit != blend->need_src_alpha_4bit) {
7367ec681f3Smrg      si_ps_key_update_framebuffer_blend(sctx);
7377ec681f3Smrg      si_ps_key_update_blend_rasterizer(sctx);
7387ec681f3Smrg      si_update_ps_inputs_read_or_disabled(sctx);
7397ec681f3Smrg      sctx->do_update_shaders = true;
7407ec681f3Smrg   }
7417ec681f3Smrg
7427ec681f3Smrg   if (sctx->screen->dpbb_allowed &&
7437ec681f3Smrg       (old_blend->alpha_to_coverage != blend->alpha_to_coverage ||
7447ec681f3Smrg        old_blend->blend_enable_4bit != blend->blend_enable_4bit ||
7457ec681f3Smrg        old_blend->cb_target_enabled_4bit != blend->cb_target_enabled_4bit))
7467ec681f3Smrg      si_mark_atom_dirty(sctx, &sctx->atoms.s.dpbb_state);
7477ec681f3Smrg
7487ec681f3Smrg   if (sctx->screen->has_out_of_order_rast &&
7497ec681f3Smrg       ((old_blend->blend_enable_4bit != blend->blend_enable_4bit ||
7507ec681f3Smrg         old_blend->cb_target_enabled_4bit != blend->cb_target_enabled_4bit ||
7517ec681f3Smrg         old_blend->commutative_4bit != blend->commutative_4bit ||
7527ec681f3Smrg         old_blend->logicop_enable != blend->logicop_enable)))
7537ec681f3Smrg      si_mark_atom_dirty(sctx, &sctx->atoms.s.msaa_config);
7547ec681f3Smrg
7557ec681f3Smrg   if (likely(!radeon_uses_secure_bos(sctx->ws))) {
7567ec681f3Smrg      if (unlikely(blend->allows_noop_optimization)) {
7577ec681f3Smrg         si_install_draw_wrapper(sctx, si_draw_blend_dst_sampler_noop,
7587ec681f3Smrg                                 si_draw_vstate_blend_dst_sampler_noop);
7597ec681f3Smrg      } else {
7607ec681f3Smrg         si_install_draw_wrapper(sctx, NULL, NULL);
7617ec681f3Smrg      }
7627ec681f3Smrg   }
763af69d88dSmrg}
764af69d88dSmrg
765af69d88dSmrgstatic void si_delete_blend_state(struct pipe_context *ctx, void *state)
766af69d88dSmrg{
7677ec681f3Smrg   struct si_context *sctx = (struct si_context *)ctx;
7687ec681f3Smrg
7697ec681f3Smrg   if (sctx->queued.named.blend == state)
7707ec681f3Smrg      si_bind_blend_state(ctx, sctx->noop_blend);
7717ec681f3Smrg
7727ec681f3Smrg   si_pm4_free_state(sctx, (struct si_pm4_state*)state, SI_STATE_IDX(blend));
773af69d88dSmrg}
774af69d88dSmrg
7757ec681f3Smrgstatic void si_set_blend_color(struct pipe_context *ctx, const struct pipe_blend_color *state)
776af69d88dSmrg{
7777ec681f3Smrg   struct si_context *sctx = (struct si_context *)ctx;
7787ec681f3Smrg   static const struct pipe_blend_color zeros;
779af69d88dSmrg
7807ec681f3Smrg   sctx->blend_color = *state;
7817ec681f3Smrg   sctx->blend_color_any_nonzeros = memcmp(state, &zeros, sizeof(*state)) != 0;
7827ec681f3Smrg   si_mark_atom_dirty(sctx, &sctx->atoms.s.blend_color);
78301e04c3fSmrg}
784af69d88dSmrg
78501e04c3fSmrgstatic void si_emit_blend_color(struct si_context *sctx)
78601e04c3fSmrg{
7877ec681f3Smrg   struct radeon_cmdbuf *cs = &sctx->gfx_cs;
788af69d88dSmrg
7897ec681f3Smrg   radeon_begin(cs);
7907ec681f3Smrg   radeon_set_context_reg_seq(R_028414_CB_BLEND_RED, 4);
7917ec681f3Smrg   radeon_emit_array((uint32_t *)sctx->blend_color.color, 4);
7927ec681f3Smrg   radeon_end();
793af69d88dSmrg}
794af69d88dSmrg
795af69d88dSmrg/*
79601e04c3fSmrg * Clipping
797af69d88dSmrg */
798af69d88dSmrg
7997ec681f3Smrgstatic void si_set_clip_state(struct pipe_context *ctx, const struct pipe_clip_state *state)
800af69d88dSmrg{
8017ec681f3Smrg   struct si_context *sctx = (struct si_context *)ctx;
8027ec681f3Smrg   struct pipe_constant_buffer cb;
8037ec681f3Smrg   static const struct pipe_clip_state zeros;
8047ec681f3Smrg
8057ec681f3Smrg   if (memcmp(&sctx->clip_state, state, sizeof(*state)) == 0)
8067ec681f3Smrg      return;
8077ec681f3Smrg
8087ec681f3Smrg   sctx->clip_state = *state;
8097ec681f3Smrg   sctx->clip_state_any_nonzeros = memcmp(state, &zeros, sizeof(*state)) != 0;
8107ec681f3Smrg   si_mark_atom_dirty(sctx, &sctx->atoms.s.clip_state);
8117ec681f3Smrg
8127ec681f3Smrg   cb.buffer = NULL;
8137ec681f3Smrg   cb.user_buffer = state->ucp;
8147ec681f3Smrg   cb.buffer_offset = 0;
8157ec681f3Smrg   cb.buffer_size = 4 * 4 * 8;
8167ec681f3Smrg   si_set_internal_const_buffer(sctx, SI_VS_CONST_CLIP_PLANES, &cb);
817af69d88dSmrg}
818af69d88dSmrg
81901e04c3fSmrgstatic void si_emit_clip_state(struct si_context *sctx)
820af69d88dSmrg{
8217ec681f3Smrg   struct radeon_cmdbuf *cs = &sctx->gfx_cs;
822af69d88dSmrg
8237ec681f3Smrg   radeon_begin(cs);
8247ec681f3Smrg   radeon_set_context_reg_seq(R_0285BC_PA_CL_UCP_0_X, 6 * 4);
8257ec681f3Smrg   radeon_emit_array((uint32_t *)sctx->clip_state.ucp, 6 * 4);
8267ec681f3Smrg   radeon_end();
827af69d88dSmrg}
828af69d88dSmrg
82901e04c3fSmrgstatic void si_emit_clip_regs(struct si_context *sctx)
830af69d88dSmrg{
8317ec681f3Smrg   struct si_shader *vs = si_get_vs(sctx)->current;
8327ec681f3Smrg   struct si_shader_selector *vs_sel = vs->selector;
8337ec681f3Smrg   struct si_shader_info *info = &vs_sel->info;
8347ec681f3Smrg   struct si_state_rasterizer *rs = sctx->queued.named.rasterizer;
8357ec681f3Smrg   bool window_space = info->stage == MESA_SHADER_VERTEX ?
8367ec681f3Smrg                          info->base.vs.window_space_position : 0;
8377ec681f3Smrg   unsigned clipdist_mask = vs_sel->clipdist_mask;
8387ec681f3Smrg   unsigned ucp_mask = clipdist_mask ? 0 : rs->clip_plane_enable & SIX_BITS;
8397ec681f3Smrg   unsigned culldist_mask = vs_sel->culldist_mask;
8407ec681f3Smrg
8417ec681f3Smrg   /* Clip distances on points have no effect, so need to be implemented
8427ec681f3Smrg    * as cull distances. This applies for the clipvertex case as well.
8437ec681f3Smrg    *
8447ec681f3Smrg    * Setting this for primitives other than points should have no adverse
8457ec681f3Smrg    * effects.
8467ec681f3Smrg    */
8477ec681f3Smrg   clipdist_mask &= rs->clip_plane_enable;
8487ec681f3Smrg   culldist_mask |= clipdist_mask;
8497ec681f3Smrg
8507ec681f3Smrg   unsigned pa_cl_cntl = S_02881C_BYPASS_VTX_RATE_COMBINER(sctx->chip_class >= GFX10_3 &&
8517ec681f3Smrg                                                           !sctx->screen->options.vrs2x2) |
8527ec681f3Smrg                         S_02881C_BYPASS_PRIM_RATE_COMBINER(sctx->chip_class >= GFX10_3) |
8537ec681f3Smrg                         clipdist_mask | (culldist_mask << 8);
8547ec681f3Smrg
8557ec681f3Smrg   radeon_begin(&sctx->gfx_cs);
8567ec681f3Smrg   radeon_opt_set_context_reg(sctx, R_02881C_PA_CL_VS_OUT_CNTL, SI_TRACKED_PA_CL_VS_OUT_CNTL,
8577ec681f3Smrg			      pa_cl_cntl | vs->pa_cl_vs_out_cntl);
8587ec681f3Smrg   radeon_opt_set_context_reg(sctx, R_028810_PA_CL_CLIP_CNTL, SI_TRACKED_PA_CL_CLIP_CNTL,
8597ec681f3Smrg                              rs->pa_cl_clip_cntl | ucp_mask | S_028810_CLIP_DISABLE(window_space));
8607ec681f3Smrg   radeon_end_update_context_roll(sctx);
861af69d88dSmrg}
862af69d88dSmrg
863af69d88dSmrg/*
864af69d88dSmrg * inferred state between framebuffer and rasterizer
865af69d88dSmrg */
86601e04c3fSmrgstatic void si_update_poly_offset_state(struct si_context *sctx)
867af69d88dSmrg{
8687ec681f3Smrg   struct si_state_rasterizer *rs = sctx->queued.named.rasterizer;
8697ec681f3Smrg
8707ec681f3Smrg   if (!rs->uses_poly_offset || !sctx->framebuffer.state.zsbuf) {
8717ec681f3Smrg      si_pm4_bind_state(sctx, poly_offset, NULL);
8727ec681f3Smrg      return;
8737ec681f3Smrg   }
8747ec681f3Smrg
8757ec681f3Smrg   /* Use the user format, not db_render_format, so that the polygon
8767ec681f3Smrg    * offset behaves as expected by applications.
8777ec681f3Smrg    */
8787ec681f3Smrg   switch (sctx->framebuffer.state.zsbuf->texture->format) {
8797ec681f3Smrg   case PIPE_FORMAT_Z16_UNORM:
8807ec681f3Smrg      si_pm4_bind_state(sctx, poly_offset, &rs->pm4_poly_offset[0]);
8817ec681f3Smrg      break;
8827ec681f3Smrg   default: /* 24-bit */
8837ec681f3Smrg      si_pm4_bind_state(sctx, poly_offset, &rs->pm4_poly_offset[1]);
8847ec681f3Smrg      break;
8857ec681f3Smrg   case PIPE_FORMAT_Z32_FLOAT:
8867ec681f3Smrg   case PIPE_FORMAT_Z32_FLOAT_S8X24_UINT:
8877ec681f3Smrg      si_pm4_bind_state(sctx, poly_offset, &rs->pm4_poly_offset[2]);
8887ec681f3Smrg      break;
8897ec681f3Smrg   }
890af69d88dSmrg}
891af69d88dSmrg
892af69d88dSmrg/*
893af69d88dSmrg * Rasterizer
894af69d88dSmrg */
895af69d88dSmrg
896af69d88dSmrgstatic uint32_t si_translate_fill(uint32_t func)
897af69d88dSmrg{
8987ec681f3Smrg   switch (func) {
8997ec681f3Smrg   case PIPE_POLYGON_MODE_FILL:
9007ec681f3Smrg      return V_028814_X_DRAW_TRIANGLES;
9017ec681f3Smrg   case PIPE_POLYGON_MODE_LINE:
9027ec681f3Smrg      return V_028814_X_DRAW_LINES;
9037ec681f3Smrg   case PIPE_POLYGON_MODE_POINT:
9047ec681f3Smrg      return V_028814_X_DRAW_POINTS;
9057ec681f3Smrg   default:
9067ec681f3Smrg      assert(0);
9077ec681f3Smrg      return V_028814_X_DRAW_POINTS;
9087ec681f3Smrg   }
909af69d88dSmrg}
910af69d88dSmrg
9117ec681f3Smrgstatic void *si_create_rs_state(struct pipe_context *ctx, const struct pipe_rasterizer_state *state)
912af69d88dSmrg{
9137ec681f3Smrg   struct si_screen *sscreen = ((struct si_context *)ctx)->screen;
9147ec681f3Smrg   struct si_state_rasterizer *rs = CALLOC_STRUCT(si_state_rasterizer);
9157ec681f3Smrg   struct si_pm4_state *pm4 = &rs->pm4;
9167ec681f3Smrg   unsigned tmp, i;
9177ec681f3Smrg   float psize_min, psize_max;
9187ec681f3Smrg
9197ec681f3Smrg   if (!rs) {
9207ec681f3Smrg      return NULL;
9217ec681f3Smrg   }
9227ec681f3Smrg
9237ec681f3Smrg   rs->scissor_enable = state->scissor;
9247ec681f3Smrg   rs->clip_halfz = state->clip_halfz;
9257ec681f3Smrg   rs->two_side = state->light_twoside;
9267ec681f3Smrg   rs->multisample_enable = state->multisample;
9277ec681f3Smrg   rs->force_persample_interp = state->force_persample_interp;
9287ec681f3Smrg   rs->clip_plane_enable = state->clip_plane_enable;
9297ec681f3Smrg   rs->half_pixel_center = state->half_pixel_center;
9307ec681f3Smrg   rs->line_stipple_enable = state->line_stipple_enable;
9317ec681f3Smrg   rs->poly_stipple_enable = state->poly_stipple_enable;
9327ec681f3Smrg   rs->line_smooth = state->line_smooth;
9337ec681f3Smrg   rs->line_width = state->line_width;
9347ec681f3Smrg   rs->poly_smooth = state->poly_smooth;
9357ec681f3Smrg   rs->uses_poly_offset = state->offset_point || state->offset_line || state->offset_tri;
9367ec681f3Smrg   rs->clamp_fragment_color = state->clamp_fragment_color;
9377ec681f3Smrg   rs->clamp_vertex_color = state->clamp_vertex_color;
9387ec681f3Smrg   rs->flatshade = state->flatshade;
9397ec681f3Smrg   rs->flatshade_first = state->flatshade_first;
9407ec681f3Smrg   rs->sprite_coord_enable = state->sprite_coord_enable;
9417ec681f3Smrg   rs->rasterizer_discard = state->rasterizer_discard;
9427ec681f3Smrg   rs->polygon_mode_is_lines =
9437ec681f3Smrg      (state->fill_front == PIPE_POLYGON_MODE_LINE && !(state->cull_face & PIPE_FACE_FRONT)) ||
9447ec681f3Smrg      (state->fill_back == PIPE_POLYGON_MODE_LINE && !(state->cull_face & PIPE_FACE_BACK));
9457ec681f3Smrg   rs->polygon_mode_is_points =
9467ec681f3Smrg      (state->fill_front == PIPE_POLYGON_MODE_POINT && !(state->cull_face & PIPE_FACE_FRONT)) ||
9477ec681f3Smrg      (state->fill_back == PIPE_POLYGON_MODE_POINT && !(state->cull_face & PIPE_FACE_BACK));
9487ec681f3Smrg   rs->pa_sc_line_stipple = state->line_stipple_enable
9497ec681f3Smrg                               ? S_028A0C_LINE_PATTERN(state->line_stipple_pattern) |
9507ec681f3Smrg                                    S_028A0C_REPEAT_COUNT(state->line_stipple_factor)
9517ec681f3Smrg                               : 0;
9527ec681f3Smrg   rs->pa_cl_clip_cntl = S_028810_DX_CLIP_SPACE_DEF(state->clip_halfz) |
9537ec681f3Smrg                         S_028810_ZCLIP_NEAR_DISABLE(!state->depth_clip_near) |
9547ec681f3Smrg                         S_028810_ZCLIP_FAR_DISABLE(!state->depth_clip_far) |
9557ec681f3Smrg                         S_028810_DX_RASTERIZATION_KILL(state->rasterizer_discard) |
9567ec681f3Smrg                         S_028810_DX_LINEAR_ATTR_CLIP_ENA(1);
9577ec681f3Smrg
9587ec681f3Smrg   if (rs->rasterizer_discard) {
9597ec681f3Smrg      rs->ngg_cull_flags = SI_NGG_CULL_ENABLED |
9607ec681f3Smrg                           SI_NGG_CULL_FRONT_FACE |
9617ec681f3Smrg                           SI_NGG_CULL_BACK_FACE;
9627ec681f3Smrg      rs->ngg_cull_flags_y_inverted = rs->ngg_cull_flags;
9637ec681f3Smrg   } else {
9647ec681f3Smrg      rs->ngg_cull_flags = SI_NGG_CULL_ENABLED;
9657ec681f3Smrg      rs->ngg_cull_flags_y_inverted = rs->ngg_cull_flags;
9667ec681f3Smrg
9677ec681f3Smrg      bool cull_front, cull_back;
9687ec681f3Smrg
9697ec681f3Smrg      if (!state->front_ccw) {
9707ec681f3Smrg         cull_front = !!(state->cull_face & PIPE_FACE_FRONT);
9717ec681f3Smrg         cull_back = !!(state->cull_face & PIPE_FACE_BACK);
9727ec681f3Smrg      } else {
9737ec681f3Smrg         cull_back = !!(state->cull_face & PIPE_FACE_FRONT);
9747ec681f3Smrg         cull_front = !!(state->cull_face & PIPE_FACE_BACK);
9757ec681f3Smrg      }
9767ec681f3Smrg
9777ec681f3Smrg      if (cull_front) {
9787ec681f3Smrg         rs->ngg_cull_flags |= SI_NGG_CULL_FRONT_FACE;
9797ec681f3Smrg         rs->ngg_cull_flags_y_inverted |= SI_NGG_CULL_BACK_FACE;
9807ec681f3Smrg      }
9817ec681f3Smrg
9827ec681f3Smrg      if (cull_back) {
9837ec681f3Smrg         rs->ngg_cull_flags |= SI_NGG_CULL_BACK_FACE;
9847ec681f3Smrg         rs->ngg_cull_flags_y_inverted |= SI_NGG_CULL_FRONT_FACE;
9857ec681f3Smrg      }
9867ec681f3Smrg   }
9877ec681f3Smrg
9887ec681f3Smrg   si_pm4_set_reg(
9897ec681f3Smrg      pm4, R_0286D4_SPI_INTERP_CONTROL_0,
9907ec681f3Smrg      S_0286D4_FLAT_SHADE_ENA(1) | S_0286D4_PNT_SPRITE_ENA(state->point_quad_rasterization) |
9917ec681f3Smrg         S_0286D4_PNT_SPRITE_OVRD_X(V_0286D4_SPI_PNT_SPRITE_SEL_S) |
9927ec681f3Smrg         S_0286D4_PNT_SPRITE_OVRD_Y(V_0286D4_SPI_PNT_SPRITE_SEL_T) |
9937ec681f3Smrg         S_0286D4_PNT_SPRITE_OVRD_Z(V_0286D4_SPI_PNT_SPRITE_SEL_0) |
9947ec681f3Smrg         S_0286D4_PNT_SPRITE_OVRD_W(V_0286D4_SPI_PNT_SPRITE_SEL_1) |
9957ec681f3Smrg         S_0286D4_PNT_SPRITE_TOP_1(state->sprite_coord_mode != PIPE_SPRITE_COORD_UPPER_LEFT));
9967ec681f3Smrg
9977ec681f3Smrg   /* point size 12.4 fixed point */
9987ec681f3Smrg   tmp = (unsigned)(state->point_size * 8.0);
9997ec681f3Smrg   si_pm4_set_reg(pm4, R_028A00_PA_SU_POINT_SIZE, S_028A00_HEIGHT(tmp) | S_028A00_WIDTH(tmp));
10007ec681f3Smrg
10017ec681f3Smrg   if (state->point_size_per_vertex) {
10027ec681f3Smrg      psize_min = util_get_min_point_size(state);
10037ec681f3Smrg      psize_max = SI_MAX_POINT_SIZE;
10047ec681f3Smrg   } else {
10057ec681f3Smrg      /* Force the point size to be as if the vertex output was disabled. */
10067ec681f3Smrg      psize_min = state->point_size;
10077ec681f3Smrg      psize_max = state->point_size;
10087ec681f3Smrg   }
10097ec681f3Smrg   rs->max_point_size = psize_max;
10107ec681f3Smrg
10117ec681f3Smrg   /* Divide by two, because 0.5 = 1 pixel. */
10127ec681f3Smrg   si_pm4_set_reg(pm4, R_028A04_PA_SU_POINT_MINMAX,
10137ec681f3Smrg                  S_028A04_MIN_SIZE(si_pack_float_12p4(psize_min / 2)) |
10147ec681f3Smrg                     S_028A04_MAX_SIZE(si_pack_float_12p4(psize_max / 2)));
10157ec681f3Smrg
10167ec681f3Smrg   si_pm4_set_reg(pm4, R_028A08_PA_SU_LINE_CNTL,
10177ec681f3Smrg                  S_028A08_WIDTH(si_pack_float_12p4(state->line_width / 2)));
10187ec681f3Smrg   si_pm4_set_reg(
10197ec681f3Smrg      pm4, R_028A48_PA_SC_MODE_CNTL_0,
10207ec681f3Smrg      S_028A48_LINE_STIPPLE_ENABLE(state->line_stipple_enable) |
10217ec681f3Smrg         S_028A48_MSAA_ENABLE(state->multisample || state->poly_smooth || state->line_smooth) |
10227ec681f3Smrg         S_028A48_VPORT_SCISSOR_ENABLE(1) |
10237ec681f3Smrg         S_028A48_ALTERNATE_RBS_PER_TILE(sscreen->info.chip_class >= GFX9));
10247ec681f3Smrg
10257ec681f3Smrg   bool polygon_mode_enabled =
10267ec681f3Smrg      (state->fill_front != PIPE_POLYGON_MODE_FILL && !(state->cull_face & PIPE_FACE_FRONT)) ||
10277ec681f3Smrg      (state->fill_back != PIPE_POLYGON_MODE_FILL && !(state->cull_face & PIPE_FACE_BACK));
10287ec681f3Smrg
10297ec681f3Smrg   si_pm4_set_reg(pm4, R_028814_PA_SU_SC_MODE_CNTL,
10307ec681f3Smrg                  S_028814_PROVOKING_VTX_LAST(!state->flatshade_first) |
10317ec681f3Smrg                     S_028814_CULL_FRONT((state->cull_face & PIPE_FACE_FRONT) ? 1 : 0) |
10327ec681f3Smrg                     S_028814_CULL_BACK((state->cull_face & PIPE_FACE_BACK) ? 1 : 0) |
10337ec681f3Smrg                     S_028814_FACE(!state->front_ccw) |
10347ec681f3Smrg                     S_028814_POLY_OFFSET_FRONT_ENABLE(util_get_offset(state, state->fill_front)) |
10357ec681f3Smrg                     S_028814_POLY_OFFSET_BACK_ENABLE(util_get_offset(state, state->fill_back)) |
10367ec681f3Smrg                     S_028814_POLY_OFFSET_PARA_ENABLE(state->offset_point || state->offset_line) |
10377ec681f3Smrg                     S_028814_POLY_MODE(polygon_mode_enabled) |
10387ec681f3Smrg                     S_028814_POLYMODE_FRONT_PTYPE(si_translate_fill(state->fill_front)) |
10397ec681f3Smrg                     S_028814_POLYMODE_BACK_PTYPE(si_translate_fill(state->fill_back)) |
10407ec681f3Smrg                     /* this must be set if POLY_MODE or PERPENDICULAR_ENDCAP_ENA is set */
10417ec681f3Smrg                     S_028814_KEEP_TOGETHER_ENABLE(sscreen->info.chip_class >= GFX10 ? polygon_mode_enabled : 0));
10427ec681f3Smrg
10437ec681f3Smrg   if (!rs->uses_poly_offset)
10447ec681f3Smrg      return rs;
10457ec681f3Smrg
10467ec681f3Smrg   rs->pm4_poly_offset = CALLOC(3, sizeof(struct si_pm4_state));
10477ec681f3Smrg   if (!rs->pm4_poly_offset) {
10487ec681f3Smrg      FREE(rs);
10497ec681f3Smrg      return NULL;
10507ec681f3Smrg   }
10517ec681f3Smrg
10527ec681f3Smrg   /* Precalculate polygon offset states for 16-bit, 24-bit, and 32-bit zbuffers. */
10537ec681f3Smrg   for (i = 0; i < 3; i++) {
10547ec681f3Smrg      struct si_pm4_state *pm4 = &rs->pm4_poly_offset[i];
10557ec681f3Smrg      float offset_units = state->offset_units;
10567ec681f3Smrg      float offset_scale = state->offset_scale * 16.0f;
10577ec681f3Smrg      uint32_t pa_su_poly_offset_db_fmt_cntl = 0;
10587ec681f3Smrg
10597ec681f3Smrg      if (!state->offset_units_unscaled) {
10607ec681f3Smrg         switch (i) {
10617ec681f3Smrg         case 0: /* 16-bit zbuffer */
10627ec681f3Smrg            offset_units *= 4.0f;
10637ec681f3Smrg            pa_su_poly_offset_db_fmt_cntl = S_028B78_POLY_OFFSET_NEG_NUM_DB_BITS(-16);
10647ec681f3Smrg            break;
10657ec681f3Smrg         case 1: /* 24-bit zbuffer */
10667ec681f3Smrg            offset_units *= 2.0f;
10677ec681f3Smrg            pa_su_poly_offset_db_fmt_cntl = S_028B78_POLY_OFFSET_NEG_NUM_DB_BITS(-24);
10687ec681f3Smrg            break;
10697ec681f3Smrg         case 2: /* 32-bit zbuffer */
10707ec681f3Smrg            offset_units *= 1.0f;
10717ec681f3Smrg            pa_su_poly_offset_db_fmt_cntl =
10727ec681f3Smrg               S_028B78_POLY_OFFSET_NEG_NUM_DB_BITS(-23) | S_028B78_POLY_OFFSET_DB_IS_FLOAT_FMT(1);
10737ec681f3Smrg            break;
10747ec681f3Smrg         }
10757ec681f3Smrg      }
10767ec681f3Smrg
10777ec681f3Smrg      si_pm4_set_reg(pm4, R_028B78_PA_SU_POLY_OFFSET_DB_FMT_CNTL, pa_su_poly_offset_db_fmt_cntl);
10787ec681f3Smrg      si_pm4_set_reg(pm4, R_028B7C_PA_SU_POLY_OFFSET_CLAMP, fui(state->offset_clamp));
10797ec681f3Smrg      si_pm4_set_reg(pm4, R_028B80_PA_SU_POLY_OFFSET_FRONT_SCALE, fui(offset_scale));
10807ec681f3Smrg      si_pm4_set_reg(pm4, R_028B84_PA_SU_POLY_OFFSET_FRONT_OFFSET, fui(offset_units));
10817ec681f3Smrg      si_pm4_set_reg(pm4, R_028B88_PA_SU_POLY_OFFSET_BACK_SCALE, fui(offset_scale));
10827ec681f3Smrg      si_pm4_set_reg(pm4, R_028B8C_PA_SU_POLY_OFFSET_BACK_OFFSET, fui(offset_units));
10837ec681f3Smrg   }
10847ec681f3Smrg
10857ec681f3Smrg   return rs;
1086af69d88dSmrg}
1087af69d88dSmrg
1088af69d88dSmrgstatic void si_bind_rs_state(struct pipe_context *ctx, void *state)
1089af69d88dSmrg{
10907ec681f3Smrg   struct si_context *sctx = (struct si_context *)ctx;
10917ec681f3Smrg   struct si_state_rasterizer *old_rs = (struct si_state_rasterizer *)sctx->queued.named.rasterizer;
10927ec681f3Smrg   struct si_state_rasterizer *rs = (struct si_state_rasterizer *)state;
10937ec681f3Smrg
10947ec681f3Smrg   if (!rs)
10957ec681f3Smrg      rs = (struct si_state_rasterizer *)sctx->discard_rasterizer_state;
10967ec681f3Smrg
10977ec681f3Smrg   if (old_rs->multisample_enable != rs->multisample_enable) {
10987ec681f3Smrg      si_mark_atom_dirty(sctx, &sctx->atoms.s.db_render_state);
10997ec681f3Smrg
11007ec681f3Smrg      si_mark_atom_dirty(sctx, &sctx->atoms.s.msaa_config);
11017ec681f3Smrg
11027ec681f3Smrg      /* Update the small primitive filter workaround if necessary. */
11037ec681f3Smrg      if (sctx->screen->info.has_msaa_sample_loc_bug && sctx->framebuffer.nr_samples > 1)
11047ec681f3Smrg         si_mark_atom_dirty(sctx, &sctx->atoms.s.msaa_sample_locs);
11057ec681f3Smrg
11067ec681f3Smrg      /* NGG cull state uses multisample_enable. */
11077ec681f3Smrg      if (sctx->screen->use_ngg_culling)
11087ec681f3Smrg         si_mark_atom_dirty(sctx, &sctx->atoms.s.ngg_cull_state);
11097ec681f3Smrg   }
11107ec681f3Smrg
11117ec681f3Smrg   sctx->current_vs_state &= C_VS_STATE_CLAMP_VERTEX_COLOR;
11127ec681f3Smrg   sctx->current_vs_state |= S_VS_STATE_CLAMP_VERTEX_COLOR(rs->clamp_vertex_color);
11137ec681f3Smrg
11147ec681f3Smrg   si_pm4_bind_state(sctx, rasterizer, rs);
11157ec681f3Smrg   si_update_poly_offset_state(sctx);
11167ec681f3Smrg
11177ec681f3Smrg   if (old_rs->scissor_enable != rs->scissor_enable)
11187ec681f3Smrg      si_mark_atom_dirty(sctx, &sctx->atoms.s.scissors);
11197ec681f3Smrg
11207ec681f3Smrg   if (old_rs->line_width != rs->line_width || old_rs->max_point_size != rs->max_point_size ||
11217ec681f3Smrg       old_rs->half_pixel_center != rs->half_pixel_center)
11227ec681f3Smrg      si_mark_atom_dirty(sctx, &sctx->atoms.s.guardband);
11237ec681f3Smrg
11247ec681f3Smrg   if (old_rs->clip_halfz != rs->clip_halfz)
11257ec681f3Smrg      si_mark_atom_dirty(sctx, &sctx->atoms.s.viewports);
11267ec681f3Smrg
11277ec681f3Smrg   if (old_rs->clip_plane_enable != rs->clip_plane_enable ||
11287ec681f3Smrg       old_rs->pa_cl_clip_cntl != rs->pa_cl_clip_cntl)
11297ec681f3Smrg      si_mark_atom_dirty(sctx, &sctx->atoms.s.clip_regs);
11307ec681f3Smrg
11317ec681f3Smrg   if (old_rs->sprite_coord_enable != rs->sprite_coord_enable ||
11327ec681f3Smrg       old_rs->flatshade != rs->flatshade)
11337ec681f3Smrg      si_mark_atom_dirty(sctx, &sctx->atoms.s.spi_map);
11347ec681f3Smrg
11357ec681f3Smrg   if (old_rs->clip_plane_enable != rs->clip_plane_enable ||
11367ec681f3Smrg       old_rs->rasterizer_discard != rs->rasterizer_discard ||
11377ec681f3Smrg       old_rs->sprite_coord_enable != rs->sprite_coord_enable ||
11387ec681f3Smrg       old_rs->flatshade != rs->flatshade || old_rs->two_side != rs->two_side ||
11397ec681f3Smrg       old_rs->multisample_enable != rs->multisample_enable ||
11407ec681f3Smrg       old_rs->poly_stipple_enable != rs->poly_stipple_enable ||
11417ec681f3Smrg       old_rs->poly_smooth != rs->poly_smooth || old_rs->line_smooth != rs->line_smooth ||
11427ec681f3Smrg       old_rs->clamp_fragment_color != rs->clamp_fragment_color ||
11437ec681f3Smrg       old_rs->force_persample_interp != rs->force_persample_interp ||
11447ec681f3Smrg       old_rs->polygon_mode_is_points != rs->polygon_mode_is_points) {
11457ec681f3Smrg      si_ps_key_update_blend_rasterizer(sctx);
11467ec681f3Smrg      si_ps_key_update_rasterizer(sctx);
11477ec681f3Smrg      si_ps_key_update_framebuffer_rasterizer_sample_shading(sctx);
11487ec681f3Smrg      si_update_ps_inputs_read_or_disabled(sctx);
11497ec681f3Smrg      sctx->do_update_shaders = true;
11507ec681f3Smrg   }
11517ec681f3Smrg
11527ec681f3Smrg   if (old_rs->line_smooth != rs->line_smooth ||
11537ec681f3Smrg       old_rs->poly_smooth != rs->poly_smooth ||
11547ec681f3Smrg       old_rs->poly_stipple_enable != rs->poly_stipple_enable ||
11557ec681f3Smrg       old_rs->flatshade != rs->flatshade)
11567ec681f3Smrg      si_update_vrs_flat_shading(sctx);
1157af69d88dSmrg}
1158af69d88dSmrg
1159af69d88dSmrgstatic void si_delete_rs_state(struct pipe_context *ctx, void *state)
1160af69d88dSmrg{
11617ec681f3Smrg   struct si_context *sctx = (struct si_context *)ctx;
11627ec681f3Smrg   struct si_state_rasterizer *rs = (struct si_state_rasterizer *)state;
116301e04c3fSmrg
11647ec681f3Smrg   if (sctx->queued.named.rasterizer == state)
11657ec681f3Smrg      si_bind_rs_state(ctx, sctx->discard_rasterizer_state);
116601e04c3fSmrg
11677ec681f3Smrg   FREE(rs->pm4_poly_offset);
11687ec681f3Smrg   si_pm4_free_state(sctx, &rs->pm4, SI_STATE_IDX(rasterizer));
1169af69d88dSmrg}
1170af69d88dSmrg
1171af69d88dSmrg/*
11727ec681f3Smrg * inferred state between dsa and stencil ref
1173af69d88dSmrg */
117401e04c3fSmrgstatic void si_emit_stencil_ref(struct si_context *sctx)
1175af69d88dSmrg{
11767ec681f3Smrg   struct radeon_cmdbuf *cs = &sctx->gfx_cs;
11777ec681f3Smrg   struct pipe_stencil_ref *ref = &sctx->stencil_ref.state;
11787ec681f3Smrg   struct si_dsa_stencil_ref_part *dsa = &sctx->stencil_ref.dsa_part;
11797ec681f3Smrg
11807ec681f3Smrg   radeon_begin(cs);
11817ec681f3Smrg   radeon_set_context_reg_seq(R_028430_DB_STENCILREFMASK, 2);
11827ec681f3Smrg   radeon_emit(S_028430_STENCILTESTVAL(ref->ref_value[0]) |
11837ec681f3Smrg               S_028430_STENCILMASK(dsa->valuemask[0]) |
11847ec681f3Smrg               S_028430_STENCILWRITEMASK(dsa->writemask[0]) |
11857ec681f3Smrg               S_028430_STENCILOPVAL(1));
11867ec681f3Smrg   radeon_emit(S_028434_STENCILTESTVAL_BF(ref->ref_value[1]) |
11877ec681f3Smrg               S_028434_STENCILMASK_BF(dsa->valuemask[1]) |
11887ec681f3Smrg               S_028434_STENCILWRITEMASK_BF(dsa->writemask[1]) |
11897ec681f3Smrg               S_028434_STENCILOPVAL_BF(1));
11907ec681f3Smrg   radeon_end();
1191af69d88dSmrg}
1192af69d88dSmrg
11937ec681f3Smrgstatic void si_set_stencil_ref(struct pipe_context *ctx, const struct pipe_stencil_ref state)
1194af69d88dSmrg{
11957ec681f3Smrg   struct si_context *sctx = (struct si_context *)ctx;
119601e04c3fSmrg
11977ec681f3Smrg   if (memcmp(&sctx->stencil_ref.state, &state, sizeof(state)) == 0)
11987ec681f3Smrg      return;
119901e04c3fSmrg
12007ec681f3Smrg   sctx->stencil_ref.state = state;
12017ec681f3Smrg   si_mark_atom_dirty(sctx, &sctx->atoms.s.stencil_ref);
1202af69d88dSmrg}
1203af69d88dSmrg
1204af69d88dSmrg/*
1205af69d88dSmrg * DSA
1206af69d88dSmrg */
1207af69d88dSmrg
1208af69d88dSmrgstatic uint32_t si_translate_stencil_op(int s_op)
1209af69d88dSmrg{
12107ec681f3Smrg   switch (s_op) {
12117ec681f3Smrg   case PIPE_STENCIL_OP_KEEP:
12127ec681f3Smrg      return V_02842C_STENCIL_KEEP;
12137ec681f3Smrg   case PIPE_STENCIL_OP_ZERO:
12147ec681f3Smrg      return V_02842C_STENCIL_ZERO;
12157ec681f3Smrg   case PIPE_STENCIL_OP_REPLACE:
12167ec681f3Smrg      return V_02842C_STENCIL_REPLACE_TEST;
12177ec681f3Smrg   case PIPE_STENCIL_OP_INCR:
12187ec681f3Smrg      return V_02842C_STENCIL_ADD_CLAMP;
12197ec681f3Smrg   case PIPE_STENCIL_OP_DECR:
12207ec681f3Smrg      return V_02842C_STENCIL_SUB_CLAMP;
12217ec681f3Smrg   case PIPE_STENCIL_OP_INCR_WRAP:
12227ec681f3Smrg      return V_02842C_STENCIL_ADD_WRAP;
12237ec681f3Smrg   case PIPE_STENCIL_OP_DECR_WRAP:
12247ec681f3Smrg      return V_02842C_STENCIL_SUB_WRAP;
12257ec681f3Smrg   case PIPE_STENCIL_OP_INVERT:
12267ec681f3Smrg      return V_02842C_STENCIL_INVERT;
12277ec681f3Smrg   default:
12287ec681f3Smrg      PRINT_ERR("Unknown stencil op %d", s_op);
12297ec681f3Smrg      assert(0);
12307ec681f3Smrg      break;
12317ec681f3Smrg   }
12327ec681f3Smrg   return 0;
123301e04c3fSmrg}
123401e04c3fSmrg
123501e04c3fSmrgstatic bool si_order_invariant_stencil_op(enum pipe_stencil_op op)
123601e04c3fSmrg{
12377ec681f3Smrg   /* REPLACE is normally order invariant, except when the stencil
12387ec681f3Smrg    * reference value is written by the fragment shader. Tracking this
12397ec681f3Smrg    * interaction does not seem worth the effort, so be conservative. */
12407ec681f3Smrg   return op != PIPE_STENCIL_OP_INCR && op != PIPE_STENCIL_OP_DECR && op != PIPE_STENCIL_OP_REPLACE;
124101e04c3fSmrg}
124201e04c3fSmrg
124301e04c3fSmrg/* Compute whether, assuming Z writes are disabled, this stencil state is order
124401e04c3fSmrg * invariant in the sense that the set of passing fragments as well as the
124501e04c3fSmrg * final stencil buffer result does not depend on the order of fragments. */
124601e04c3fSmrgstatic bool si_order_invariant_stencil_state(const struct pipe_stencil_state *state)
124701e04c3fSmrg{
12487ec681f3Smrg   return !state->enabled || !state->writemask ||
12497ec681f3Smrg          /* The following assumes that Z writes are disabled. */
12507ec681f3Smrg          (state->func == PIPE_FUNC_ALWAYS && si_order_invariant_stencil_op(state->zpass_op) &&
12517ec681f3Smrg           si_order_invariant_stencil_op(state->zfail_op)) ||
12527ec681f3Smrg          (state->func == PIPE_FUNC_NEVER && si_order_invariant_stencil_op(state->fail_op));
125301e04c3fSmrg}
125401e04c3fSmrg
1255af69d88dSmrgstatic void *si_create_dsa_state(struct pipe_context *ctx,
12567ec681f3Smrg                                 const struct pipe_depth_stencil_alpha_state *state)
1257af69d88dSmrg{
12587ec681f3Smrg   struct si_context *sctx = (struct si_context *)ctx;
12597ec681f3Smrg   struct si_state_dsa *dsa = CALLOC_STRUCT(si_state_dsa);
12607ec681f3Smrg   struct si_pm4_state *pm4 = &dsa->pm4;
12617ec681f3Smrg   unsigned db_depth_control;
12627ec681f3Smrg   uint32_t db_stencil_control = 0;
12637ec681f3Smrg
12647ec681f3Smrg   if (!dsa) {
12657ec681f3Smrg      return NULL;
12667ec681f3Smrg   }
12677ec681f3Smrg
12687ec681f3Smrg   dsa->stencil_ref.valuemask[0] = state->stencil[0].valuemask;
12697ec681f3Smrg   dsa->stencil_ref.valuemask[1] = state->stencil[1].valuemask;
12707ec681f3Smrg   dsa->stencil_ref.writemask[0] = state->stencil[0].writemask;
12717ec681f3Smrg   dsa->stencil_ref.writemask[1] = state->stencil[1].writemask;
12727ec681f3Smrg
12737ec681f3Smrg   db_depth_control =
12747ec681f3Smrg      S_028800_Z_ENABLE(state->depth_enabled) | S_028800_Z_WRITE_ENABLE(state->depth_writemask) |
12757ec681f3Smrg      S_028800_ZFUNC(state->depth_func) | S_028800_DEPTH_BOUNDS_ENABLE(state->depth_bounds_test);
12767ec681f3Smrg
12777ec681f3Smrg   /* stencil */
12787ec681f3Smrg   if (state->stencil[0].enabled) {
12797ec681f3Smrg      db_depth_control |= S_028800_STENCIL_ENABLE(1);
12807ec681f3Smrg      db_depth_control |= S_028800_STENCILFUNC(state->stencil[0].func);
12817ec681f3Smrg      db_stencil_control |=
12827ec681f3Smrg         S_02842C_STENCILFAIL(si_translate_stencil_op(state->stencil[0].fail_op));
12837ec681f3Smrg      db_stencil_control |=
12847ec681f3Smrg         S_02842C_STENCILZPASS(si_translate_stencil_op(state->stencil[0].zpass_op));
12857ec681f3Smrg      db_stencil_control |=
12867ec681f3Smrg         S_02842C_STENCILZFAIL(si_translate_stencil_op(state->stencil[0].zfail_op));
12877ec681f3Smrg
12887ec681f3Smrg      if (state->stencil[1].enabled) {
12897ec681f3Smrg         db_depth_control |= S_028800_BACKFACE_ENABLE(1);
12907ec681f3Smrg         db_depth_control |= S_028800_STENCILFUNC_BF(state->stencil[1].func);
12917ec681f3Smrg         db_stencil_control |=
12927ec681f3Smrg            S_02842C_STENCILFAIL_BF(si_translate_stencil_op(state->stencil[1].fail_op));
12937ec681f3Smrg         db_stencil_control |=
12947ec681f3Smrg            S_02842C_STENCILZPASS_BF(si_translate_stencil_op(state->stencil[1].zpass_op));
12957ec681f3Smrg         db_stencil_control |=
12967ec681f3Smrg            S_02842C_STENCILZFAIL_BF(si_translate_stencil_op(state->stencil[1].zfail_op));
12977ec681f3Smrg      }
12987ec681f3Smrg   }
12997ec681f3Smrg
13007ec681f3Smrg   /* alpha */
13017ec681f3Smrg   if (state->alpha_enabled) {
13027ec681f3Smrg      dsa->alpha_func = state->alpha_func;
13037ec681f3Smrg
13047ec681f3Smrg      si_pm4_set_reg(pm4, R_00B030_SPI_SHADER_USER_DATA_PS_0 + SI_SGPR_ALPHA_REF * 4,
13057ec681f3Smrg                     fui(state->alpha_ref_value));
13067ec681f3Smrg   } else {
13077ec681f3Smrg      dsa->alpha_func = PIPE_FUNC_ALWAYS;
13087ec681f3Smrg   }
13097ec681f3Smrg
13107ec681f3Smrg   si_pm4_set_reg(pm4, R_028800_DB_DEPTH_CONTROL, db_depth_control);
13117ec681f3Smrg   if (state->stencil[0].enabled)
13127ec681f3Smrg      si_pm4_set_reg(pm4, R_02842C_DB_STENCIL_CONTROL, db_stencil_control);
13137ec681f3Smrg   if (state->depth_bounds_test) {
13147ec681f3Smrg      si_pm4_set_reg(pm4, R_028020_DB_DEPTH_BOUNDS_MIN, fui(state->depth_bounds_min));
13157ec681f3Smrg      si_pm4_set_reg(pm4, R_028024_DB_DEPTH_BOUNDS_MAX, fui(state->depth_bounds_max));
13167ec681f3Smrg   }
13177ec681f3Smrg
13187ec681f3Smrg   dsa->depth_enabled = state->depth_enabled;
13197ec681f3Smrg   dsa->depth_write_enabled = state->depth_enabled && state->depth_writemask;
13207ec681f3Smrg   dsa->stencil_enabled = state->stencil[0].enabled;
13217ec681f3Smrg   dsa->stencil_write_enabled =
13227ec681f3Smrg      (util_writes_stencil(&state->stencil[0]) || util_writes_stencil(&state->stencil[1]));
13237ec681f3Smrg   dsa->db_can_write = dsa->depth_write_enabled || dsa->stencil_write_enabled;
13247ec681f3Smrg
13257ec681f3Smrg   bool zfunc_is_ordered =
13267ec681f3Smrg      state->depth_func == PIPE_FUNC_NEVER || state->depth_func == PIPE_FUNC_LESS ||
13277ec681f3Smrg      state->depth_func == PIPE_FUNC_LEQUAL || state->depth_func == PIPE_FUNC_GREATER ||
13287ec681f3Smrg      state->depth_func == PIPE_FUNC_GEQUAL;
13297ec681f3Smrg
13307ec681f3Smrg   bool nozwrite_and_order_invariant_stencil =
13317ec681f3Smrg      !dsa->db_can_write ||
13327ec681f3Smrg      (!dsa->depth_write_enabled && si_order_invariant_stencil_state(&state->stencil[0]) &&
13337ec681f3Smrg       si_order_invariant_stencil_state(&state->stencil[1]));
13347ec681f3Smrg
13357ec681f3Smrg   dsa->order_invariance[1].zs =
13367ec681f3Smrg      nozwrite_and_order_invariant_stencil || (!dsa->stencil_write_enabled && zfunc_is_ordered);
13377ec681f3Smrg   dsa->order_invariance[0].zs = !dsa->depth_write_enabled || zfunc_is_ordered;
13387ec681f3Smrg
13397ec681f3Smrg   dsa->order_invariance[1].pass_set =
13407ec681f3Smrg      nozwrite_and_order_invariant_stencil ||
13417ec681f3Smrg      (!dsa->stencil_write_enabled &&
13427ec681f3Smrg       (state->depth_func == PIPE_FUNC_ALWAYS || state->depth_func == PIPE_FUNC_NEVER));
13437ec681f3Smrg   dsa->order_invariance[0].pass_set =
13447ec681f3Smrg      !dsa->depth_write_enabled ||
13457ec681f3Smrg      (state->depth_func == PIPE_FUNC_ALWAYS || state->depth_func == PIPE_FUNC_NEVER);
13467ec681f3Smrg
13477ec681f3Smrg   dsa->order_invariance[1].pass_last = sctx->screen->assume_no_z_fights &&
13487ec681f3Smrg                                        !dsa->stencil_write_enabled && dsa->depth_write_enabled &&
13497ec681f3Smrg                                        zfunc_is_ordered;
13507ec681f3Smrg   dsa->order_invariance[0].pass_last =
13517ec681f3Smrg      sctx->screen->assume_no_z_fights && dsa->depth_write_enabled && zfunc_is_ordered;
13527ec681f3Smrg
13537ec681f3Smrg   return dsa;
1354af69d88dSmrg}
1355af69d88dSmrg
1356af69d88dSmrgstatic void si_bind_dsa_state(struct pipe_context *ctx, void *state)
1357af69d88dSmrg{
13587ec681f3Smrg   struct si_context *sctx = (struct si_context *)ctx;
13597ec681f3Smrg   struct si_state_dsa *old_dsa = sctx->queued.named.dsa;
13607ec681f3Smrg   struct si_state_dsa *dsa = state;
13617ec681f3Smrg
13627ec681f3Smrg   if (!dsa)
13637ec681f3Smrg      dsa = (struct si_state_dsa *)sctx->noop_dsa;
13647ec681f3Smrg
13657ec681f3Smrg   si_pm4_bind_state(sctx, dsa, dsa);
13667ec681f3Smrg
13677ec681f3Smrg   if (memcmp(&dsa->stencil_ref, &sctx->stencil_ref.dsa_part,
13687ec681f3Smrg              sizeof(struct si_dsa_stencil_ref_part)) != 0) {
13697ec681f3Smrg      sctx->stencil_ref.dsa_part = dsa->stencil_ref;
13707ec681f3Smrg      si_mark_atom_dirty(sctx, &sctx->atoms.s.stencil_ref);
13717ec681f3Smrg   }
13727ec681f3Smrg
13737ec681f3Smrg   if (old_dsa->alpha_func != dsa->alpha_func) {
13747ec681f3Smrg      si_ps_key_update_dsa(sctx);
13757ec681f3Smrg      si_update_ps_inputs_read_or_disabled(sctx);
13767ec681f3Smrg      si_update_ps_kill_enable(sctx);
13777ec681f3Smrg      sctx->do_update_shaders = true;
13787ec681f3Smrg   }
13797ec681f3Smrg
13807ec681f3Smrg   if (sctx->screen->dpbb_allowed && ((old_dsa->depth_enabled != dsa->depth_enabled ||
13817ec681f3Smrg                                       old_dsa->stencil_enabled != dsa->stencil_enabled ||
13827ec681f3Smrg                                       old_dsa->db_can_write != dsa->db_can_write)))
13837ec681f3Smrg      si_mark_atom_dirty(sctx, &sctx->atoms.s.dpbb_state);
13847ec681f3Smrg
13857ec681f3Smrg   if (sctx->screen->has_out_of_order_rast &&
13867ec681f3Smrg       (memcmp(old_dsa->order_invariance, dsa->order_invariance,
13877ec681f3Smrg               sizeof(old_dsa->order_invariance))))
13887ec681f3Smrg      si_mark_atom_dirty(sctx, &sctx->atoms.s.msaa_config);
1389af69d88dSmrg}
1390af69d88dSmrg
1391af69d88dSmrgstatic void si_delete_dsa_state(struct pipe_context *ctx, void *state)
1392af69d88dSmrg{
13937ec681f3Smrg   struct si_context *sctx = (struct si_context *)ctx;
13947ec681f3Smrg
13957ec681f3Smrg   if (sctx->queued.named.dsa == state)
13967ec681f3Smrg      si_bind_dsa_state(ctx, sctx->noop_dsa);
13977ec681f3Smrg
13987ec681f3Smrg   si_pm4_free_state(sctx, (struct si_pm4_state*)state, SI_STATE_IDX(dsa));
1399af69d88dSmrg}
1400af69d88dSmrg
140101e04c3fSmrgstatic void *si_create_db_flush_dsa(struct si_context *sctx)
140201e04c3fSmrg{
14037ec681f3Smrg   struct pipe_depth_stencil_alpha_state dsa = {};
140401e04c3fSmrg
14057ec681f3Smrg   return sctx->b.create_depth_stencil_alpha_state(&sctx->b, &dsa);
140601e04c3fSmrg}
140701e04c3fSmrg
140801e04c3fSmrg/* DB RENDER STATE */
140901e04c3fSmrg
14107ec681f3Smrgstatic void si_set_active_query_state(struct pipe_context *ctx, bool enable)
141101e04c3fSmrg{
14127ec681f3Smrg   struct si_context *sctx = (struct si_context *)ctx;
14137ec681f3Smrg
14147ec681f3Smrg   /* Pipeline stat & streamout queries. */
14157ec681f3Smrg   if (enable) {
14167ec681f3Smrg      sctx->flags &= ~SI_CONTEXT_STOP_PIPELINE_STATS;
14177ec681f3Smrg      sctx->flags |= SI_CONTEXT_START_PIPELINE_STATS;
14187ec681f3Smrg   } else {
14197ec681f3Smrg      sctx->flags &= ~SI_CONTEXT_START_PIPELINE_STATS;
14207ec681f3Smrg      sctx->flags |= SI_CONTEXT_STOP_PIPELINE_STATS;
14217ec681f3Smrg   }
14227ec681f3Smrg
14237ec681f3Smrg   /* Occlusion queries. */
14247ec681f3Smrg   if (sctx->occlusion_queries_disabled != !enable) {
14257ec681f3Smrg      sctx->occlusion_queries_disabled = !enable;
14267ec681f3Smrg      si_mark_atom_dirty(sctx, &sctx->atoms.s.db_render_state);
14277ec681f3Smrg   }
142801e04c3fSmrg}
142901e04c3fSmrg
14307ec681f3Smrgvoid si_set_occlusion_query_state(struct si_context *sctx, bool old_perfect_enable)
1431af69d88dSmrg{
14327ec681f3Smrg   si_mark_atom_dirty(sctx, &sctx->atoms.s.db_render_state);
1433af69d88dSmrg
14347ec681f3Smrg   bool perfect_enable = sctx->num_perfect_occlusion_queries != 0;
1435af69d88dSmrg
14367ec681f3Smrg   if (perfect_enable != old_perfect_enable)
14377ec681f3Smrg      si_mark_atom_dirty(sctx, &sctx->atoms.s.msaa_config);
143801e04c3fSmrg}
143901e04c3fSmrg
144001e04c3fSmrgvoid si_save_qbo_state(struct si_context *sctx, struct si_qbo_state *st)
144101e04c3fSmrg{
14427ec681f3Smrg   si_get_pipe_constant_buffer(sctx, PIPE_SHADER_COMPUTE, 0, &st->saved_const0);
14437ec681f3Smrg}
14449f464c52Smaya
14457ec681f3Smrgvoid si_restore_qbo_state(struct si_context *sctx, struct si_qbo_state *st)
14467ec681f3Smrg{
14477ec681f3Smrg   sctx->b.set_constant_buffer(&sctx->b, PIPE_SHADER_COMPUTE, 0, true, &st->saved_const0);
144801e04c3fSmrg}
144901e04c3fSmrg
145001e04c3fSmrgstatic void si_emit_db_render_state(struct si_context *sctx)
145101e04c3fSmrg{
14527ec681f3Smrg   struct si_state_rasterizer *rs = sctx->queued.named.rasterizer;
14537ec681f3Smrg   unsigned db_shader_control, db_render_control, db_count_control;
14547ec681f3Smrg
14557ec681f3Smrg   /* DB_RENDER_CONTROL */
14567ec681f3Smrg   if (sctx->dbcb_depth_copy_enabled || sctx->dbcb_stencil_copy_enabled) {
14577ec681f3Smrg      db_render_control = S_028000_DEPTH_COPY(sctx->dbcb_depth_copy_enabled) |
14587ec681f3Smrg                          S_028000_STENCIL_COPY(sctx->dbcb_stencil_copy_enabled) |
14597ec681f3Smrg                          S_028000_COPY_CENTROID(1) | S_028000_COPY_SAMPLE(sctx->dbcb_copy_sample);
14607ec681f3Smrg   } else if (sctx->db_flush_depth_inplace || sctx->db_flush_stencil_inplace) {
14617ec681f3Smrg      db_render_control = S_028000_DEPTH_COMPRESS_DISABLE(sctx->db_flush_depth_inplace) |
14627ec681f3Smrg                          S_028000_STENCIL_COMPRESS_DISABLE(sctx->db_flush_stencil_inplace);
14637ec681f3Smrg   } else {
14647ec681f3Smrg      db_render_control = S_028000_DEPTH_CLEAR_ENABLE(sctx->db_depth_clear) |
14657ec681f3Smrg                          S_028000_STENCIL_CLEAR_ENABLE(sctx->db_stencil_clear);
14667ec681f3Smrg   }
14677ec681f3Smrg
14687ec681f3Smrg   /* DB_COUNT_CONTROL (occlusion queries) */
14697ec681f3Smrg   if (sctx->num_occlusion_queries > 0 && !sctx->occlusion_queries_disabled) {
14707ec681f3Smrg      bool perfect = sctx->num_perfect_occlusion_queries > 0;
14717ec681f3Smrg      bool gfx10_perfect = sctx->chip_class >= GFX10 && perfect;
14727ec681f3Smrg
14737ec681f3Smrg      if (sctx->chip_class >= GFX7) {
14747ec681f3Smrg         unsigned log_sample_rate = sctx->framebuffer.log_samples;
14757ec681f3Smrg
14767ec681f3Smrg         db_count_control = S_028004_PERFECT_ZPASS_COUNTS(perfect) |
14777ec681f3Smrg                            S_028004_DISABLE_CONSERVATIVE_ZPASS_COUNTS(gfx10_perfect) |
14787ec681f3Smrg                            S_028004_SAMPLE_RATE(log_sample_rate) | S_028004_ZPASS_ENABLE(1) |
14797ec681f3Smrg                            S_028004_SLICE_EVEN_ENABLE(1) | S_028004_SLICE_ODD_ENABLE(1);
14807ec681f3Smrg      } else {
14817ec681f3Smrg         db_count_control = S_028004_PERFECT_ZPASS_COUNTS(perfect) |
14827ec681f3Smrg                            S_028004_SAMPLE_RATE(sctx->framebuffer.log_samples);
14837ec681f3Smrg      }
14847ec681f3Smrg   } else {
14857ec681f3Smrg      /* Disable occlusion queries. */
14867ec681f3Smrg      if (sctx->chip_class >= GFX7) {
14877ec681f3Smrg         db_count_control = 0;
14887ec681f3Smrg      } else {
14897ec681f3Smrg         db_count_control = S_028004_ZPASS_INCREMENT_DISABLE(1);
14907ec681f3Smrg      }
14917ec681f3Smrg   }
14927ec681f3Smrg
14937ec681f3Smrg   radeon_begin(&sctx->gfx_cs);
14947ec681f3Smrg   radeon_opt_set_context_reg2(sctx, R_028000_DB_RENDER_CONTROL, SI_TRACKED_DB_RENDER_CONTROL,
14957ec681f3Smrg                               db_render_control, db_count_control);
14967ec681f3Smrg
14977ec681f3Smrg   /* DB_RENDER_OVERRIDE2 */
14987ec681f3Smrg   radeon_opt_set_context_reg(
14997ec681f3Smrg      sctx, R_028010_DB_RENDER_OVERRIDE2, SI_TRACKED_DB_RENDER_OVERRIDE2,
15007ec681f3Smrg      S_028010_DISABLE_ZMASK_EXPCLEAR_OPTIMIZATION(sctx->db_depth_disable_expclear) |
15017ec681f3Smrg      S_028010_DISABLE_SMEM_EXPCLEAR_OPTIMIZATION(sctx->db_stencil_disable_expclear) |
15027ec681f3Smrg      S_028010_DECOMPRESS_Z_ON_FLUSH(sctx->framebuffer.nr_samples >= 4) |
15037ec681f3Smrg      S_028010_CENTROID_COMPUTATION_MODE(sctx->chip_class >= GFX10_3 ? 1 : 0));
15047ec681f3Smrg
15057ec681f3Smrg   db_shader_control = sctx->ps_db_shader_control;
15067ec681f3Smrg
15077ec681f3Smrg   /* Bug workaround for smoothing (overrasterization) on GFX6. */
15087ec681f3Smrg   if (sctx->chip_class == GFX6 && sctx->smoothing_enabled) {
15097ec681f3Smrg      db_shader_control &= C_02880C_Z_ORDER;
15107ec681f3Smrg      db_shader_control |= S_02880C_Z_ORDER(V_02880C_LATE_Z);
15117ec681f3Smrg   }
15127ec681f3Smrg
15137ec681f3Smrg   /* Disable the gl_SampleMask fragment shader output if MSAA is disabled. */
15147ec681f3Smrg   if (!rs->multisample_enable)
15157ec681f3Smrg      db_shader_control &= C_02880C_MASK_EXPORT_ENABLE;
15167ec681f3Smrg
15177ec681f3Smrg   if (sctx->screen->info.has_rbplus && !sctx->screen->info.rbplus_allowed)
15187ec681f3Smrg      db_shader_control |= S_02880C_DUAL_QUAD_DISABLE(1);
15197ec681f3Smrg
15207ec681f3Smrg   radeon_opt_set_context_reg(sctx, R_02880C_DB_SHADER_CONTROL, SI_TRACKED_DB_SHADER_CONTROL,
15217ec681f3Smrg                              db_shader_control);
15227ec681f3Smrg
15237ec681f3Smrg   if (sctx->chip_class >= GFX10_3) {
15247ec681f3Smrg      if (sctx->allow_flat_shading) {
15257ec681f3Smrg         radeon_opt_set_context_reg(sctx, R_028064_DB_VRS_OVERRIDE_CNTL,
15267ec681f3Smrg                                    SI_TRACKED_DB_VRS_OVERRIDE_CNTL,
15277ec681f3Smrg                                    S_028064_VRS_OVERRIDE_RATE_COMBINER_MODE(
15287ec681f3Smrg                                    V_028064_VRS_COMB_MODE_OVERRIDE) |
15297ec681f3Smrg                                    S_028064_VRS_OVERRIDE_RATE_X(1) |
15307ec681f3Smrg                                    S_028064_VRS_OVERRIDE_RATE_Y(1));
15317ec681f3Smrg      } else {
15327ec681f3Smrg         /* If the shader is using discard, turn off coarse shading because
15337ec681f3Smrg          * discard at 2x2 pixel granularity degrades quality too much.
15347ec681f3Smrg          *
15357ec681f3Smrg          * MIN allows sample shading but not coarse shading.
15367ec681f3Smrg          */
15377ec681f3Smrg         unsigned mode = sctx->screen->options.vrs2x2 && G_02880C_KILL_ENABLE(db_shader_control) ?
15387ec681f3Smrg            V_028064_VRS_COMB_MODE_MIN : V_028064_VRS_COMB_MODE_PASSTHRU;
15397ec681f3Smrg
15407ec681f3Smrg         radeon_opt_set_context_reg(sctx, R_028064_DB_VRS_OVERRIDE_CNTL,
15417ec681f3Smrg                                    SI_TRACKED_DB_VRS_OVERRIDE_CNTL,
15427ec681f3Smrg                                    S_028064_VRS_OVERRIDE_RATE_COMBINER_MODE(mode) |
15437ec681f3Smrg                                    S_028064_VRS_OVERRIDE_RATE_X(0) |
15447ec681f3Smrg                                    S_028064_VRS_OVERRIDE_RATE_Y(0));
15457ec681f3Smrg      }
15467ec681f3Smrg   }
15477ec681f3Smrg   radeon_end_update_context_roll(sctx);
1548af69d88dSmrg}
1549af69d88dSmrg
1550af69d88dSmrg/*
1551af69d88dSmrg * format translation
1552af69d88dSmrg */
15537ec681f3Smrguint32_t si_translate_colorformat(enum chip_class chip_class,
15547ec681f3Smrg                                  enum pipe_format format)
1555af69d88dSmrg{
15567ec681f3Smrg   const struct util_format_description *desc = util_format_description(format);
15577ec681f3Smrg   if (!desc)
15587ec681f3Smrg      return V_028C70_COLOR_INVALID;
15597ec681f3Smrg
15607ec681f3Smrg#define HAS_SIZE(x, y, z, w)                                                                       \
15617ec681f3Smrg   (desc->channel[0].size == (x) && desc->channel[1].size == (y) &&                                \
15627ec681f3Smrg    desc->channel[2].size == (z) && desc->channel[3].size == (w))
15637ec681f3Smrg
15647ec681f3Smrg   if (format == PIPE_FORMAT_R11G11B10_FLOAT) /* isn't plain */
15657ec681f3Smrg      return V_028C70_COLOR_10_11_11;
15667ec681f3Smrg
15677ec681f3Smrg   if (chip_class >= GFX10_3 &&
15687ec681f3Smrg       format == PIPE_FORMAT_R9G9B9E5_FLOAT) /* isn't plain */
15697ec681f3Smrg      return V_028C70_COLOR_5_9_9_9;
15707ec681f3Smrg
15717ec681f3Smrg   if (desc->layout != UTIL_FORMAT_LAYOUT_PLAIN)
15727ec681f3Smrg      return V_028C70_COLOR_INVALID;
15737ec681f3Smrg
15747ec681f3Smrg   /* hw cannot support mixed formats (except depth/stencil, since
15757ec681f3Smrg    * stencil is not written to). */
15767ec681f3Smrg   if (desc->is_mixed && desc->colorspace != UTIL_FORMAT_COLORSPACE_ZS)
15777ec681f3Smrg      return V_028C70_COLOR_INVALID;
15787ec681f3Smrg
15797ec681f3Smrg   switch (desc->nr_channels) {
15807ec681f3Smrg   case 1:
15817ec681f3Smrg      switch (desc->channel[0].size) {
15827ec681f3Smrg      case 8:
15837ec681f3Smrg         return V_028C70_COLOR_8;
15847ec681f3Smrg      case 16:
15857ec681f3Smrg         return V_028C70_COLOR_16;
15867ec681f3Smrg      case 32:
15877ec681f3Smrg         return V_028C70_COLOR_32;
15887ec681f3Smrg      }
15897ec681f3Smrg      break;
15907ec681f3Smrg   case 2:
15917ec681f3Smrg      if (desc->channel[0].size == desc->channel[1].size) {
15927ec681f3Smrg         switch (desc->channel[0].size) {
15937ec681f3Smrg         case 8:
15947ec681f3Smrg            return V_028C70_COLOR_8_8;
15957ec681f3Smrg         case 16:
15967ec681f3Smrg            return V_028C70_COLOR_16_16;
15977ec681f3Smrg         case 32:
15987ec681f3Smrg            return V_028C70_COLOR_32_32;
15997ec681f3Smrg         }
16007ec681f3Smrg      } else if (HAS_SIZE(8, 24, 0, 0)) {
16017ec681f3Smrg         return V_028C70_COLOR_24_8;
16027ec681f3Smrg      } else if (HAS_SIZE(24, 8, 0, 0)) {
16037ec681f3Smrg         return V_028C70_COLOR_8_24;
16047ec681f3Smrg      }
16057ec681f3Smrg      break;
16067ec681f3Smrg   case 3:
16077ec681f3Smrg      if (HAS_SIZE(5, 6, 5, 0)) {
16087ec681f3Smrg         return V_028C70_COLOR_5_6_5;
16097ec681f3Smrg      } else if (HAS_SIZE(32, 8, 24, 0)) {
16107ec681f3Smrg         return V_028C70_COLOR_X24_8_32_FLOAT;
16117ec681f3Smrg      }
16127ec681f3Smrg      break;
16137ec681f3Smrg   case 4:
16147ec681f3Smrg      if (desc->channel[0].size == desc->channel[1].size &&
16157ec681f3Smrg          desc->channel[0].size == desc->channel[2].size &&
16167ec681f3Smrg          desc->channel[0].size == desc->channel[3].size) {
16177ec681f3Smrg         switch (desc->channel[0].size) {
16187ec681f3Smrg         case 4:
16197ec681f3Smrg            return V_028C70_COLOR_4_4_4_4;
16207ec681f3Smrg         case 8:
16217ec681f3Smrg            return V_028C70_COLOR_8_8_8_8;
16227ec681f3Smrg         case 16:
16237ec681f3Smrg            return V_028C70_COLOR_16_16_16_16;
16247ec681f3Smrg         case 32:
16257ec681f3Smrg            return V_028C70_COLOR_32_32_32_32;
16267ec681f3Smrg         }
16277ec681f3Smrg      } else if (HAS_SIZE(5, 5, 5, 1)) {
16287ec681f3Smrg         return V_028C70_COLOR_1_5_5_5;
16297ec681f3Smrg      } else if (HAS_SIZE(1, 5, 5, 5)) {
16307ec681f3Smrg         return V_028C70_COLOR_5_5_5_1;
16317ec681f3Smrg      } else if (HAS_SIZE(10, 10, 10, 2)) {
16327ec681f3Smrg         return V_028C70_COLOR_2_10_10_10;
16337ec681f3Smrg      }
16347ec681f3Smrg      break;
16357ec681f3Smrg   }
16367ec681f3Smrg   return V_028C70_COLOR_INVALID;
1637af69d88dSmrg}
1638af69d88dSmrg
1639af69d88dSmrgstatic uint32_t si_colorformat_endian_swap(uint32_t colorformat)
1640af69d88dSmrg{
16417ec681f3Smrg   if (SI_BIG_ENDIAN) {
16427ec681f3Smrg      switch (colorformat) {
16437ec681f3Smrg      /* 8-bit buffers. */
16447ec681f3Smrg      case V_028C70_COLOR_8:
16457ec681f3Smrg         return V_028C70_ENDIAN_NONE;
16467ec681f3Smrg
16477ec681f3Smrg      /* 16-bit buffers. */
16487ec681f3Smrg      case V_028C70_COLOR_5_6_5:
16497ec681f3Smrg      case V_028C70_COLOR_1_5_5_5:
16507ec681f3Smrg      case V_028C70_COLOR_4_4_4_4:
16517ec681f3Smrg      case V_028C70_COLOR_16:
16527ec681f3Smrg      case V_028C70_COLOR_8_8:
16537ec681f3Smrg         return V_028C70_ENDIAN_8IN16;
16547ec681f3Smrg
16557ec681f3Smrg      /* 32-bit buffers. */
16567ec681f3Smrg      case V_028C70_COLOR_8_8_8_8:
16577ec681f3Smrg      case V_028C70_COLOR_2_10_10_10:
16587ec681f3Smrg      case V_028C70_COLOR_8_24:
16597ec681f3Smrg      case V_028C70_COLOR_24_8:
16607ec681f3Smrg      case V_028C70_COLOR_16_16:
16617ec681f3Smrg         return V_028C70_ENDIAN_8IN32;
16627ec681f3Smrg
16637ec681f3Smrg      /* 64-bit buffers. */
16647ec681f3Smrg      case V_028C70_COLOR_16_16_16_16:
16657ec681f3Smrg         return V_028C70_ENDIAN_8IN16;
16667ec681f3Smrg
16677ec681f3Smrg      case V_028C70_COLOR_32_32:
16687ec681f3Smrg         return V_028C70_ENDIAN_8IN32;
16697ec681f3Smrg
16707ec681f3Smrg      /* 128-bit buffers. */
16717ec681f3Smrg      case V_028C70_COLOR_32_32_32_32:
16727ec681f3Smrg         return V_028C70_ENDIAN_8IN32;
16737ec681f3Smrg      default:
16747ec681f3Smrg         return V_028C70_ENDIAN_NONE; /* Unsupported. */
16757ec681f3Smrg      }
16767ec681f3Smrg   } else {
16777ec681f3Smrg      return V_028C70_ENDIAN_NONE;
16787ec681f3Smrg   }
1679af69d88dSmrg}
1680af69d88dSmrg
1681af69d88dSmrgstatic uint32_t si_translate_dbformat(enum pipe_format format)
1682af69d88dSmrg{
16837ec681f3Smrg   switch (format) {
16847ec681f3Smrg   case PIPE_FORMAT_Z16_UNORM:
16857ec681f3Smrg      return V_028040_Z_16;
16867ec681f3Smrg   case PIPE_FORMAT_S8_UINT_Z24_UNORM:
16877ec681f3Smrg   case PIPE_FORMAT_X8Z24_UNORM:
16887ec681f3Smrg   case PIPE_FORMAT_Z24X8_UNORM:
16897ec681f3Smrg   case PIPE_FORMAT_Z24_UNORM_S8_UINT:
16907ec681f3Smrg      return V_028040_Z_24; /* deprecated on AMD GCN */
16917ec681f3Smrg   case PIPE_FORMAT_Z32_FLOAT:
16927ec681f3Smrg   case PIPE_FORMAT_Z32_FLOAT_S8X24_UINT:
16937ec681f3Smrg      return V_028040_Z_32_FLOAT;
16947ec681f3Smrg   default:
16957ec681f3Smrg      return V_028040_Z_INVALID;
16967ec681f3Smrg   }
1697af69d88dSmrg}
1698af69d88dSmrg
1699af69d88dSmrg/*
1700af69d88dSmrg * Texture translation
1701af69d88dSmrg */
1702af69d88dSmrg
17037ec681f3Smrgstatic uint32_t si_translate_texformat(struct pipe_screen *screen, enum pipe_format format,
17047ec681f3Smrg                                       const struct util_format_description *desc,
17057ec681f3Smrg                                       int first_non_void)
1706af69d88dSmrg{
17077ec681f3Smrg   struct si_screen *sscreen = (struct si_screen *)screen;
17087ec681f3Smrg   bool uniform = true;
17097ec681f3Smrg   int i;
17107ec681f3Smrg
17117ec681f3Smrg   assert(sscreen->info.chip_class <= GFX9);
17127ec681f3Smrg
17137ec681f3Smrg   /* Colorspace (return non-RGB formats directly). */
17147ec681f3Smrg   switch (desc->colorspace) {
17157ec681f3Smrg   /* Depth stencil formats */
17167ec681f3Smrg   case UTIL_FORMAT_COLORSPACE_ZS:
17177ec681f3Smrg      switch (format) {
17187ec681f3Smrg      case PIPE_FORMAT_Z16_UNORM:
17197ec681f3Smrg         return V_008F14_IMG_DATA_FORMAT_16;
17207ec681f3Smrg      case PIPE_FORMAT_X24S8_UINT:
17217ec681f3Smrg      case PIPE_FORMAT_S8X24_UINT:
17227ec681f3Smrg         /*
17237ec681f3Smrg          * Implemented as an 8_8_8_8 data format to fix texture
17247ec681f3Smrg          * gathers in stencil sampling. This affects at least
17257ec681f3Smrg          * GL45-CTS.texture_cube_map_array.sampling on GFX8.
17267ec681f3Smrg          */
17277ec681f3Smrg         if (sscreen->info.chip_class <= GFX8)
17287ec681f3Smrg            return V_008F14_IMG_DATA_FORMAT_8_8_8_8;
17297ec681f3Smrg
17307ec681f3Smrg         if (format == PIPE_FORMAT_X24S8_UINT)
17317ec681f3Smrg            return V_008F14_IMG_DATA_FORMAT_8_24;
17327ec681f3Smrg         else
17337ec681f3Smrg            return V_008F14_IMG_DATA_FORMAT_24_8;
17347ec681f3Smrg      case PIPE_FORMAT_Z24X8_UNORM:
17357ec681f3Smrg      case PIPE_FORMAT_Z24_UNORM_S8_UINT:
17367ec681f3Smrg         return V_008F14_IMG_DATA_FORMAT_8_24;
17377ec681f3Smrg      case PIPE_FORMAT_X8Z24_UNORM:
17387ec681f3Smrg      case PIPE_FORMAT_S8_UINT_Z24_UNORM:
17397ec681f3Smrg         return V_008F14_IMG_DATA_FORMAT_24_8;
17407ec681f3Smrg      case PIPE_FORMAT_S8_UINT:
17417ec681f3Smrg         return V_008F14_IMG_DATA_FORMAT_8;
17427ec681f3Smrg      case PIPE_FORMAT_Z32_FLOAT:
17437ec681f3Smrg         return V_008F14_IMG_DATA_FORMAT_32;
17447ec681f3Smrg      case PIPE_FORMAT_X32_S8X24_UINT:
17457ec681f3Smrg      case PIPE_FORMAT_Z32_FLOAT_S8X24_UINT:
17467ec681f3Smrg         return V_008F14_IMG_DATA_FORMAT_X24_8_32;
17477ec681f3Smrg      default:
17487ec681f3Smrg         goto out_unknown;
17497ec681f3Smrg      }
17507ec681f3Smrg
17517ec681f3Smrg   case UTIL_FORMAT_COLORSPACE_YUV:
17527ec681f3Smrg      goto out_unknown; /* TODO */
17537ec681f3Smrg
17547ec681f3Smrg   case UTIL_FORMAT_COLORSPACE_SRGB:
17557ec681f3Smrg      if (desc->nr_channels != 4 && desc->nr_channels != 1)
17567ec681f3Smrg         goto out_unknown;
17577ec681f3Smrg      break;
17587ec681f3Smrg
17597ec681f3Smrg   default:
17607ec681f3Smrg      break;
17617ec681f3Smrg   }
17627ec681f3Smrg
17637ec681f3Smrg   if (desc->layout == UTIL_FORMAT_LAYOUT_RGTC) {
17647ec681f3Smrg      if (!sscreen->info.has_format_bc1_through_bc7)
17657ec681f3Smrg         goto out_unknown;
17667ec681f3Smrg
17677ec681f3Smrg      switch (format) {
17687ec681f3Smrg      case PIPE_FORMAT_RGTC1_SNORM:
17697ec681f3Smrg      case PIPE_FORMAT_LATC1_SNORM:
17707ec681f3Smrg      case PIPE_FORMAT_RGTC1_UNORM:
17717ec681f3Smrg      case PIPE_FORMAT_LATC1_UNORM:
17727ec681f3Smrg         return V_008F14_IMG_DATA_FORMAT_BC4;
17737ec681f3Smrg      case PIPE_FORMAT_RGTC2_SNORM:
17747ec681f3Smrg      case PIPE_FORMAT_LATC2_SNORM:
17757ec681f3Smrg      case PIPE_FORMAT_RGTC2_UNORM:
17767ec681f3Smrg      case PIPE_FORMAT_LATC2_UNORM:
17777ec681f3Smrg         return V_008F14_IMG_DATA_FORMAT_BC5;
17787ec681f3Smrg      default:
17797ec681f3Smrg         goto out_unknown;
17807ec681f3Smrg      }
17817ec681f3Smrg   }
17827ec681f3Smrg
17837ec681f3Smrg   if (desc->layout == UTIL_FORMAT_LAYOUT_ETC &&
17847ec681f3Smrg       (sscreen->info.family == CHIP_STONEY || sscreen->info.family == CHIP_VEGA10 ||
17857ec681f3Smrg        sscreen->info.family == CHIP_RAVEN || sscreen->info.family == CHIP_RAVEN2)) {
17867ec681f3Smrg      switch (format) {
17877ec681f3Smrg      case PIPE_FORMAT_ETC1_RGB8:
17887ec681f3Smrg      case PIPE_FORMAT_ETC2_RGB8:
17897ec681f3Smrg      case PIPE_FORMAT_ETC2_SRGB8:
17907ec681f3Smrg         return V_008F14_IMG_DATA_FORMAT_ETC2_RGB;
17917ec681f3Smrg      case PIPE_FORMAT_ETC2_RGB8A1:
17927ec681f3Smrg      case PIPE_FORMAT_ETC2_SRGB8A1:
17937ec681f3Smrg         return V_008F14_IMG_DATA_FORMAT_ETC2_RGBA1;
17947ec681f3Smrg      case PIPE_FORMAT_ETC2_RGBA8:
17957ec681f3Smrg      case PIPE_FORMAT_ETC2_SRGBA8:
17967ec681f3Smrg         return V_008F14_IMG_DATA_FORMAT_ETC2_RGBA;
17977ec681f3Smrg      case PIPE_FORMAT_ETC2_R11_UNORM:
17987ec681f3Smrg      case PIPE_FORMAT_ETC2_R11_SNORM:
17997ec681f3Smrg         return V_008F14_IMG_DATA_FORMAT_ETC2_R;
18007ec681f3Smrg      case PIPE_FORMAT_ETC2_RG11_UNORM:
18017ec681f3Smrg      case PIPE_FORMAT_ETC2_RG11_SNORM:
18027ec681f3Smrg         return V_008F14_IMG_DATA_FORMAT_ETC2_RG;
18037ec681f3Smrg      default:
18047ec681f3Smrg         goto out_unknown;
18057ec681f3Smrg      }
18067ec681f3Smrg   }
18077ec681f3Smrg
18087ec681f3Smrg   if (desc->layout == UTIL_FORMAT_LAYOUT_BPTC) {
18097ec681f3Smrg      if (!sscreen->info.has_format_bc1_through_bc7)
18107ec681f3Smrg         goto out_unknown;
18117ec681f3Smrg
18127ec681f3Smrg      switch (format) {
18137ec681f3Smrg      case PIPE_FORMAT_BPTC_RGBA_UNORM:
18147ec681f3Smrg      case PIPE_FORMAT_BPTC_SRGBA:
18157ec681f3Smrg         return V_008F14_IMG_DATA_FORMAT_BC7;
18167ec681f3Smrg      case PIPE_FORMAT_BPTC_RGB_FLOAT:
18177ec681f3Smrg      case PIPE_FORMAT_BPTC_RGB_UFLOAT:
18187ec681f3Smrg         return V_008F14_IMG_DATA_FORMAT_BC6;
18197ec681f3Smrg      default:
18207ec681f3Smrg         goto out_unknown;
18217ec681f3Smrg      }
18227ec681f3Smrg   }
18237ec681f3Smrg
18247ec681f3Smrg   if (desc->layout == UTIL_FORMAT_LAYOUT_SUBSAMPLED) {
18257ec681f3Smrg      switch (format) {
18267ec681f3Smrg      case PIPE_FORMAT_R8G8_B8G8_UNORM:
18277ec681f3Smrg      case PIPE_FORMAT_G8R8_B8R8_UNORM:
18287ec681f3Smrg         return V_008F14_IMG_DATA_FORMAT_GB_GR;
18297ec681f3Smrg      case PIPE_FORMAT_G8R8_G8B8_UNORM:
18307ec681f3Smrg      case PIPE_FORMAT_R8G8_R8B8_UNORM:
18317ec681f3Smrg         return V_008F14_IMG_DATA_FORMAT_BG_RG;
18327ec681f3Smrg      default:
18337ec681f3Smrg         goto out_unknown;
18347ec681f3Smrg      }
18357ec681f3Smrg   }
18367ec681f3Smrg
18377ec681f3Smrg   if (desc->layout == UTIL_FORMAT_LAYOUT_S3TC) {
18387ec681f3Smrg      if (!sscreen->info.has_format_bc1_through_bc7)
18397ec681f3Smrg         goto out_unknown;
18407ec681f3Smrg
18417ec681f3Smrg      switch (format) {
18427ec681f3Smrg      case PIPE_FORMAT_DXT1_RGB:
18437ec681f3Smrg      case PIPE_FORMAT_DXT1_RGBA:
18447ec681f3Smrg      case PIPE_FORMAT_DXT1_SRGB:
18457ec681f3Smrg      case PIPE_FORMAT_DXT1_SRGBA:
18467ec681f3Smrg         return V_008F14_IMG_DATA_FORMAT_BC1;
18477ec681f3Smrg      case PIPE_FORMAT_DXT3_RGBA:
18487ec681f3Smrg      case PIPE_FORMAT_DXT3_SRGBA:
18497ec681f3Smrg         return V_008F14_IMG_DATA_FORMAT_BC2;
18507ec681f3Smrg      case PIPE_FORMAT_DXT5_RGBA:
18517ec681f3Smrg      case PIPE_FORMAT_DXT5_SRGBA:
18527ec681f3Smrg         return V_008F14_IMG_DATA_FORMAT_BC3;
18537ec681f3Smrg      default:
18547ec681f3Smrg         goto out_unknown;
18557ec681f3Smrg      }
18567ec681f3Smrg   }
18577ec681f3Smrg
18587ec681f3Smrg   if (format == PIPE_FORMAT_R9G9B9E5_FLOAT) {
18597ec681f3Smrg      return V_008F14_IMG_DATA_FORMAT_5_9_9_9;
18607ec681f3Smrg   } else if (format == PIPE_FORMAT_R11G11B10_FLOAT) {
18617ec681f3Smrg      return V_008F14_IMG_DATA_FORMAT_10_11_11;
18627ec681f3Smrg   }
18637ec681f3Smrg
18647ec681f3Smrg   /* R8G8Bx_SNORM - TODO CxV8U8 */
18657ec681f3Smrg
18667ec681f3Smrg   /* hw cannot support mixed formats (except depth/stencil, since only
18677ec681f3Smrg    * depth is read).*/
18687ec681f3Smrg   if (desc->is_mixed && desc->colorspace != UTIL_FORMAT_COLORSPACE_ZS)
18697ec681f3Smrg      goto out_unknown;
18707ec681f3Smrg
18717ec681f3Smrg   /* See whether the components are of the same size. */
18727ec681f3Smrg   for (i = 1; i < desc->nr_channels; i++) {
18737ec681f3Smrg      uniform = uniform && desc->channel[0].size == desc->channel[i].size;
18747ec681f3Smrg   }
18757ec681f3Smrg
18767ec681f3Smrg   /* Non-uniform formats. */
18777ec681f3Smrg   if (!uniform) {
18787ec681f3Smrg      switch (desc->nr_channels) {
18797ec681f3Smrg      case 3:
18807ec681f3Smrg         if (desc->channel[0].size == 5 && desc->channel[1].size == 6 &&
18817ec681f3Smrg             desc->channel[2].size == 5) {
18827ec681f3Smrg            return V_008F14_IMG_DATA_FORMAT_5_6_5;
18837ec681f3Smrg         }
18847ec681f3Smrg         goto out_unknown;
18857ec681f3Smrg      case 4:
18867ec681f3Smrg         if (desc->channel[0].size == 5 && desc->channel[1].size == 5 &&
18877ec681f3Smrg             desc->channel[2].size == 5 && desc->channel[3].size == 1) {
18887ec681f3Smrg            return V_008F14_IMG_DATA_FORMAT_1_5_5_5;
18897ec681f3Smrg         }
18907ec681f3Smrg         if (desc->channel[0].size == 1 && desc->channel[1].size == 5 &&
18917ec681f3Smrg             desc->channel[2].size == 5 && desc->channel[3].size == 5) {
18927ec681f3Smrg            return V_008F14_IMG_DATA_FORMAT_5_5_5_1;
18937ec681f3Smrg         }
18947ec681f3Smrg         if (desc->channel[0].size == 10 && desc->channel[1].size == 10 &&
18957ec681f3Smrg             desc->channel[2].size == 10 && desc->channel[3].size == 2) {
18967ec681f3Smrg            return V_008F14_IMG_DATA_FORMAT_2_10_10_10;
18977ec681f3Smrg         }
18987ec681f3Smrg         goto out_unknown;
18997ec681f3Smrg      }
19007ec681f3Smrg      goto out_unknown;
19017ec681f3Smrg   }
19027ec681f3Smrg
19037ec681f3Smrg   if (first_non_void < 0 || first_non_void > 3)
19047ec681f3Smrg      goto out_unknown;
19057ec681f3Smrg
19067ec681f3Smrg   /* uniform formats */
19077ec681f3Smrg   switch (desc->channel[first_non_void].size) {
19087ec681f3Smrg   case 4:
19097ec681f3Smrg      switch (desc->nr_channels) {
1910af69d88dSmrg#if 0 /* Not supported for render targets */
19117ec681f3Smrg      case 2:
19127ec681f3Smrg         return V_008F14_IMG_DATA_FORMAT_4_4;
1913af69d88dSmrg#endif
19147ec681f3Smrg      case 4:
19157ec681f3Smrg         return V_008F14_IMG_DATA_FORMAT_4_4_4_4;
19167ec681f3Smrg      }
19177ec681f3Smrg      break;
19187ec681f3Smrg   case 8:
19197ec681f3Smrg      switch (desc->nr_channels) {
19207ec681f3Smrg      case 1:
19217ec681f3Smrg         return V_008F14_IMG_DATA_FORMAT_8;
19227ec681f3Smrg      case 2:
19237ec681f3Smrg         return V_008F14_IMG_DATA_FORMAT_8_8;
19247ec681f3Smrg      case 4:
19257ec681f3Smrg         return V_008F14_IMG_DATA_FORMAT_8_8_8_8;
19267ec681f3Smrg      }
19277ec681f3Smrg      break;
19287ec681f3Smrg   case 16:
19297ec681f3Smrg      switch (desc->nr_channels) {
19307ec681f3Smrg      case 1:
19317ec681f3Smrg         return V_008F14_IMG_DATA_FORMAT_16;
19327ec681f3Smrg      case 2:
19337ec681f3Smrg         return V_008F14_IMG_DATA_FORMAT_16_16;
19347ec681f3Smrg      case 4:
19357ec681f3Smrg         return V_008F14_IMG_DATA_FORMAT_16_16_16_16;
19367ec681f3Smrg      }
19377ec681f3Smrg      break;
19387ec681f3Smrg   case 32:
19397ec681f3Smrg      switch (desc->nr_channels) {
19407ec681f3Smrg      case 1:
19417ec681f3Smrg         return V_008F14_IMG_DATA_FORMAT_32;
19427ec681f3Smrg      case 2:
19437ec681f3Smrg         return V_008F14_IMG_DATA_FORMAT_32_32;
1944af69d88dSmrg#if 0 /* Not supported for render targets */
19457ec681f3Smrg      case 3:
19467ec681f3Smrg         return V_008F14_IMG_DATA_FORMAT_32_32_32;
1947af69d88dSmrg#endif
19487ec681f3Smrg      case 4:
19497ec681f3Smrg         return V_008F14_IMG_DATA_FORMAT_32_32_32_32;
19507ec681f3Smrg      }
19517ec681f3Smrg   }
1952af69d88dSmrg
1953af69d88dSmrgout_unknown:
19547ec681f3Smrg   return ~0;
19557ec681f3Smrg}
19567ec681f3Smrg
19577ec681f3Smrgstatic unsigned is_wrap_mode_legal(struct si_screen *screen, unsigned wrap)
19587ec681f3Smrg{
19597ec681f3Smrg   if (!screen->info.has_3d_cube_border_color_mipmap) {
19607ec681f3Smrg      switch (wrap) {
19617ec681f3Smrg      case PIPE_TEX_WRAP_CLAMP:
19627ec681f3Smrg      case PIPE_TEX_WRAP_CLAMP_TO_BORDER:
19637ec681f3Smrg      case PIPE_TEX_WRAP_MIRROR_CLAMP:
19647ec681f3Smrg      case PIPE_TEX_WRAP_MIRROR_CLAMP_TO_BORDER:
19657ec681f3Smrg         return false;
19667ec681f3Smrg      }
19677ec681f3Smrg   }
19687ec681f3Smrg   return true;
1969af69d88dSmrg}
1970af69d88dSmrg
1971af69d88dSmrgstatic unsigned si_tex_wrap(unsigned wrap)
1972af69d88dSmrg{
19737ec681f3Smrg   switch (wrap) {
19747ec681f3Smrg   default:
19757ec681f3Smrg   case PIPE_TEX_WRAP_REPEAT:
19767ec681f3Smrg      return V_008F30_SQ_TEX_WRAP;
19777ec681f3Smrg   case PIPE_TEX_WRAP_CLAMP:
19787ec681f3Smrg      return V_008F30_SQ_TEX_CLAMP_HALF_BORDER;
19797ec681f3Smrg   case PIPE_TEX_WRAP_CLAMP_TO_EDGE:
19807ec681f3Smrg      return V_008F30_SQ_TEX_CLAMP_LAST_TEXEL;
19817ec681f3Smrg   case PIPE_TEX_WRAP_CLAMP_TO_BORDER:
19827ec681f3Smrg      return V_008F30_SQ_TEX_CLAMP_BORDER;
19837ec681f3Smrg   case PIPE_TEX_WRAP_MIRROR_REPEAT:
19847ec681f3Smrg      return V_008F30_SQ_TEX_MIRROR;
19857ec681f3Smrg   case PIPE_TEX_WRAP_MIRROR_CLAMP:
19867ec681f3Smrg      return V_008F30_SQ_TEX_MIRROR_ONCE_HALF_BORDER;
19877ec681f3Smrg   case PIPE_TEX_WRAP_MIRROR_CLAMP_TO_EDGE:
19887ec681f3Smrg      return V_008F30_SQ_TEX_MIRROR_ONCE_LAST_TEXEL;
19897ec681f3Smrg   case PIPE_TEX_WRAP_MIRROR_CLAMP_TO_BORDER:
19907ec681f3Smrg      return V_008F30_SQ_TEX_MIRROR_ONCE_BORDER;
19917ec681f3Smrg   }
1992af69d88dSmrg}
1993af69d88dSmrg
1994af69d88dSmrgstatic unsigned si_tex_mipfilter(unsigned filter)
1995af69d88dSmrg{
19967ec681f3Smrg   switch (filter) {
19977ec681f3Smrg   case PIPE_TEX_MIPFILTER_NEAREST:
19987ec681f3Smrg      return V_008F38_SQ_TEX_Z_FILTER_POINT;
19997ec681f3Smrg   case PIPE_TEX_MIPFILTER_LINEAR:
20007ec681f3Smrg      return V_008F38_SQ_TEX_Z_FILTER_LINEAR;
20017ec681f3Smrg   default:
20027ec681f3Smrg   case PIPE_TEX_MIPFILTER_NONE:
20037ec681f3Smrg      return V_008F38_SQ_TEX_Z_FILTER_NONE;
20047ec681f3Smrg   }
2005af69d88dSmrg}
2006af69d88dSmrg
2007af69d88dSmrgstatic unsigned si_tex_compare(unsigned compare)
2008af69d88dSmrg{
20097ec681f3Smrg   switch (compare) {
20107ec681f3Smrg   default:
20117ec681f3Smrg   case PIPE_FUNC_NEVER:
20127ec681f3Smrg      return V_008F30_SQ_TEX_DEPTH_COMPARE_NEVER;
20137ec681f3Smrg   case PIPE_FUNC_LESS:
20147ec681f3Smrg      return V_008F30_SQ_TEX_DEPTH_COMPARE_LESS;
20157ec681f3Smrg   case PIPE_FUNC_EQUAL:
20167ec681f3Smrg      return V_008F30_SQ_TEX_DEPTH_COMPARE_EQUAL;
20177ec681f3Smrg   case PIPE_FUNC_LEQUAL:
20187ec681f3Smrg      return V_008F30_SQ_TEX_DEPTH_COMPARE_LESSEQUAL;
20197ec681f3Smrg   case PIPE_FUNC_GREATER:
20207ec681f3Smrg      return V_008F30_SQ_TEX_DEPTH_COMPARE_GREATER;
20217ec681f3Smrg   case PIPE_FUNC_NOTEQUAL:
20227ec681f3Smrg      return V_008F30_SQ_TEX_DEPTH_COMPARE_NOTEQUAL;
20237ec681f3Smrg   case PIPE_FUNC_GEQUAL:
20247ec681f3Smrg      return V_008F30_SQ_TEX_DEPTH_COMPARE_GREATEREQUAL;
20257ec681f3Smrg   case PIPE_FUNC_ALWAYS:
20267ec681f3Smrg      return V_008F30_SQ_TEX_DEPTH_COMPARE_ALWAYS;
20277ec681f3Smrg   }
2028af69d88dSmrg}
2029af69d88dSmrg
20307ec681f3Smrgstatic unsigned si_tex_dim(struct si_screen *sscreen, struct si_texture *tex, unsigned view_target,
20317ec681f3Smrg                           unsigned nr_samples)
2032af69d88dSmrg{
20337ec681f3Smrg   unsigned res_target = tex->buffer.b.b.target;
20347ec681f3Smrg
20357ec681f3Smrg   if (view_target == PIPE_TEXTURE_CUBE || view_target == PIPE_TEXTURE_CUBE_ARRAY)
20367ec681f3Smrg      res_target = view_target;
20377ec681f3Smrg   /* If interpreting cubemaps as something else, set 2D_ARRAY. */
20387ec681f3Smrg   else if (res_target == PIPE_TEXTURE_CUBE || res_target == PIPE_TEXTURE_CUBE_ARRAY)
20397ec681f3Smrg      res_target = PIPE_TEXTURE_2D_ARRAY;
20407ec681f3Smrg
20417ec681f3Smrg   /* GFX9 allocates 1D textures as 2D. */
20427ec681f3Smrg   if ((res_target == PIPE_TEXTURE_1D || res_target == PIPE_TEXTURE_1D_ARRAY) &&
20437ec681f3Smrg       sscreen->info.chip_class == GFX9 &&
20447ec681f3Smrg       tex->surface.u.gfx9.resource_type == RADEON_RESOURCE_2D) {
20457ec681f3Smrg      if (res_target == PIPE_TEXTURE_1D)
20467ec681f3Smrg         res_target = PIPE_TEXTURE_2D;
20477ec681f3Smrg      else
20487ec681f3Smrg         res_target = PIPE_TEXTURE_2D_ARRAY;
20497ec681f3Smrg   }
20507ec681f3Smrg
20517ec681f3Smrg   switch (res_target) {
20527ec681f3Smrg   default:
20537ec681f3Smrg   case PIPE_TEXTURE_1D:
20547ec681f3Smrg      return V_008F1C_SQ_RSRC_IMG_1D;
20557ec681f3Smrg   case PIPE_TEXTURE_1D_ARRAY:
20567ec681f3Smrg      return V_008F1C_SQ_RSRC_IMG_1D_ARRAY;
20577ec681f3Smrg   case PIPE_TEXTURE_2D:
20587ec681f3Smrg   case PIPE_TEXTURE_RECT:
20597ec681f3Smrg      return nr_samples > 1 ? V_008F1C_SQ_RSRC_IMG_2D_MSAA : V_008F1C_SQ_RSRC_IMG_2D;
20607ec681f3Smrg   case PIPE_TEXTURE_2D_ARRAY:
20617ec681f3Smrg      return nr_samples > 1 ? V_008F1C_SQ_RSRC_IMG_2D_MSAA_ARRAY : V_008F1C_SQ_RSRC_IMG_2D_ARRAY;
20627ec681f3Smrg   case PIPE_TEXTURE_3D:
20637ec681f3Smrg      return V_008F1C_SQ_RSRC_IMG_3D;
20647ec681f3Smrg   case PIPE_TEXTURE_CUBE:
20657ec681f3Smrg   case PIPE_TEXTURE_CUBE_ARRAY:
20667ec681f3Smrg      return V_008F1C_SQ_RSRC_IMG_CUBE;
20677ec681f3Smrg   }
2068af69d88dSmrg}
2069af69d88dSmrg
2070af69d88dSmrg/*
2071af69d88dSmrg * Format support testing
2072af69d88dSmrg */
2073af69d88dSmrg
2074af69d88dSmrgstatic bool si_is_sampler_format_supported(struct pipe_screen *screen, enum pipe_format format)
2075af69d88dSmrg{
20767ec681f3Smrg   struct si_screen *sscreen = (struct si_screen *)screen;
20777ec681f3Smrg
20787ec681f3Smrg   if (sscreen->info.chip_class >= GFX10) {
20797ec681f3Smrg      const struct gfx10_format *fmt = &gfx10_format_table[format];
20807ec681f3Smrg      if (!fmt->img_format || fmt->buffers_only)
20817ec681f3Smrg         return false;
20827ec681f3Smrg      return true;
20837ec681f3Smrg   }
208401e04c3fSmrg
20857ec681f3Smrg   const struct util_format_description *desc = util_format_description(format);
20867ec681f3Smrg   if (!desc)
20877ec681f3Smrg      return false;
20887ec681f3Smrg
20897ec681f3Smrg   return si_translate_texformat(screen, format, desc,
20907ec681f3Smrg                                 util_format_get_first_non_void_channel(format)) != ~0U;
2091af69d88dSmrg}
2092af69d88dSmrg
2093af69d88dSmrgstatic uint32_t si_translate_buffer_dataformat(struct pipe_screen *screen,
20947ec681f3Smrg                                               const struct util_format_description *desc,
20957ec681f3Smrg                                               int first_non_void)
2096af69d88dSmrg{
20977ec681f3Smrg   int i;
20987ec681f3Smrg
20997ec681f3Smrg   assert(((struct si_screen *)screen)->info.chip_class <= GFX9);
21007ec681f3Smrg
21017ec681f3Smrg   if (desc->format == PIPE_FORMAT_R11G11B10_FLOAT)
21027ec681f3Smrg      return V_008F0C_BUF_DATA_FORMAT_10_11_11;
21037ec681f3Smrg
21047ec681f3Smrg   assert(first_non_void >= 0);
21057ec681f3Smrg
21067ec681f3Smrg   if (desc->nr_channels == 4 && desc->channel[0].size == 10 && desc->channel[1].size == 10 &&
21077ec681f3Smrg       desc->channel[2].size == 10 && desc->channel[3].size == 2)
21087ec681f3Smrg      return V_008F0C_BUF_DATA_FORMAT_2_10_10_10;
21097ec681f3Smrg
21107ec681f3Smrg   /* See whether the components are of the same size. */
21117ec681f3Smrg   for (i = 0; i < desc->nr_channels; i++) {
21127ec681f3Smrg      if (desc->channel[first_non_void].size != desc->channel[i].size)
21137ec681f3Smrg         return V_008F0C_BUF_DATA_FORMAT_INVALID;
21147ec681f3Smrg   }
21157ec681f3Smrg
21167ec681f3Smrg   switch (desc->channel[first_non_void].size) {
21177ec681f3Smrg   case 8:
21187ec681f3Smrg      switch (desc->nr_channels) {
21197ec681f3Smrg      case 1:
21207ec681f3Smrg      case 3: /* 3 loads */
21217ec681f3Smrg         return V_008F0C_BUF_DATA_FORMAT_8;
21227ec681f3Smrg      case 2:
21237ec681f3Smrg         return V_008F0C_BUF_DATA_FORMAT_8_8;
21247ec681f3Smrg      case 4:
21257ec681f3Smrg         return V_008F0C_BUF_DATA_FORMAT_8_8_8_8;
21267ec681f3Smrg      }
21277ec681f3Smrg      break;
21287ec681f3Smrg   case 16:
21297ec681f3Smrg      switch (desc->nr_channels) {
21307ec681f3Smrg      case 1:
21317ec681f3Smrg      case 3: /* 3 loads */
21327ec681f3Smrg         return V_008F0C_BUF_DATA_FORMAT_16;
21337ec681f3Smrg      case 2:
21347ec681f3Smrg         return V_008F0C_BUF_DATA_FORMAT_16_16;
21357ec681f3Smrg      case 4:
21367ec681f3Smrg         return V_008F0C_BUF_DATA_FORMAT_16_16_16_16;
21377ec681f3Smrg      }
21387ec681f3Smrg      break;
21397ec681f3Smrg   case 32:
21407ec681f3Smrg      switch (desc->nr_channels) {
21417ec681f3Smrg      case 1:
21427ec681f3Smrg         return V_008F0C_BUF_DATA_FORMAT_32;
21437ec681f3Smrg      case 2:
21447ec681f3Smrg         return V_008F0C_BUF_DATA_FORMAT_32_32;
21457ec681f3Smrg      case 3:
21467ec681f3Smrg         return V_008F0C_BUF_DATA_FORMAT_32_32_32;
21477ec681f3Smrg      case 4:
21487ec681f3Smrg         return V_008F0C_BUF_DATA_FORMAT_32_32_32_32;
21497ec681f3Smrg      }
21507ec681f3Smrg      break;
21517ec681f3Smrg   case 64:
21527ec681f3Smrg      /* Legacy double formats. */
21537ec681f3Smrg      switch (desc->nr_channels) {
21547ec681f3Smrg      case 1: /* 1 load */
21557ec681f3Smrg         return V_008F0C_BUF_DATA_FORMAT_32_32;
21567ec681f3Smrg      case 2: /* 1 load */
21577ec681f3Smrg         return V_008F0C_BUF_DATA_FORMAT_32_32_32_32;
21587ec681f3Smrg      case 3: /* 3 loads */
21597ec681f3Smrg         return V_008F0C_BUF_DATA_FORMAT_32_32;
21607ec681f3Smrg      case 4: /* 2 loads */
21617ec681f3Smrg         return V_008F0C_BUF_DATA_FORMAT_32_32_32_32;
21627ec681f3Smrg      }
21637ec681f3Smrg      break;
21647ec681f3Smrg   }
21657ec681f3Smrg
21667ec681f3Smrg   return V_008F0C_BUF_DATA_FORMAT_INVALID;
2167af69d88dSmrg}
2168af69d88dSmrg
2169af69d88dSmrgstatic uint32_t si_translate_buffer_numformat(struct pipe_screen *screen,
21707ec681f3Smrg                                              const struct util_format_description *desc,
21717ec681f3Smrg                                              int first_non_void)
2172af69d88dSmrg{
21737ec681f3Smrg   assert(((struct si_screen *)screen)->info.chip_class <= GFX9);
21747ec681f3Smrg
21757ec681f3Smrg   if (desc->format == PIPE_FORMAT_R11G11B10_FLOAT)
21767ec681f3Smrg      return V_008F0C_BUF_NUM_FORMAT_FLOAT;
21777ec681f3Smrg
21787ec681f3Smrg   assert(first_non_void >= 0);
21797ec681f3Smrg
21807ec681f3Smrg   switch (desc->channel[first_non_void].type) {
21817ec681f3Smrg   case UTIL_FORMAT_TYPE_SIGNED:
21827ec681f3Smrg   case UTIL_FORMAT_TYPE_FIXED:
21837ec681f3Smrg      if (desc->channel[first_non_void].size >= 32 || desc->channel[first_non_void].pure_integer)
21847ec681f3Smrg         return V_008F0C_BUF_NUM_FORMAT_SINT;
21857ec681f3Smrg      else if (desc->channel[first_non_void].normalized)
21867ec681f3Smrg         return V_008F0C_BUF_NUM_FORMAT_SNORM;
21877ec681f3Smrg      else
21887ec681f3Smrg         return V_008F0C_BUF_NUM_FORMAT_SSCALED;
21897ec681f3Smrg      break;
21907ec681f3Smrg   case UTIL_FORMAT_TYPE_UNSIGNED:
21917ec681f3Smrg      if (desc->channel[first_non_void].size >= 32 || desc->channel[first_non_void].pure_integer)
21927ec681f3Smrg         return V_008F0C_BUF_NUM_FORMAT_UINT;
21937ec681f3Smrg      else if (desc->channel[first_non_void].normalized)
21947ec681f3Smrg         return V_008F0C_BUF_NUM_FORMAT_UNORM;
21957ec681f3Smrg      else
21967ec681f3Smrg         return V_008F0C_BUF_NUM_FORMAT_USCALED;
21977ec681f3Smrg      break;
21987ec681f3Smrg   case UTIL_FORMAT_TYPE_FLOAT:
21997ec681f3Smrg   default:
22007ec681f3Smrg      return V_008F0C_BUF_NUM_FORMAT_FLOAT;
22017ec681f3Smrg   }
2202af69d88dSmrg}
2203af69d88dSmrg
22047ec681f3Smrgstatic unsigned si_is_vertex_format_supported(struct pipe_screen *screen, enum pipe_format format,
22057ec681f3Smrg                                              unsigned usage)
2206af69d88dSmrg{
22077ec681f3Smrg   struct si_screen *sscreen = (struct si_screen *)screen;
22087ec681f3Smrg   const struct util_format_description *desc;
22097ec681f3Smrg   int first_non_void;
22107ec681f3Smrg   unsigned data_format;
22117ec681f3Smrg
22127ec681f3Smrg   assert((usage & ~(PIPE_BIND_SHADER_IMAGE | PIPE_BIND_SAMPLER_VIEW | PIPE_BIND_VERTEX_BUFFER)) ==
22137ec681f3Smrg          0);
22147ec681f3Smrg
22157ec681f3Smrg   desc = util_format_description(format);
22167ec681f3Smrg   if (!desc)
22177ec681f3Smrg      return 0;
22187ec681f3Smrg
22197ec681f3Smrg   /* There are no native 8_8_8 or 16_16_16 data formats, and we currently
22207ec681f3Smrg    * select 8_8_8_8 and 16_16_16_16 instead. This works reasonably well
22217ec681f3Smrg    * for read-only access (with caveats surrounding bounds checks), but
22227ec681f3Smrg    * obviously fails for write access which we have to implement for
22237ec681f3Smrg    * shader images. Luckily, OpenGL doesn't expect this to be supported
22247ec681f3Smrg    * anyway, and so the only impact is on PBO uploads / downloads, which
22257ec681f3Smrg    * shouldn't be expected to be fast for GL_RGB anyway.
22267ec681f3Smrg    */
22277ec681f3Smrg   if (desc->block.bits == 3 * 8 || desc->block.bits == 3 * 16) {
22287ec681f3Smrg      if (usage & (PIPE_BIND_SHADER_IMAGE | PIPE_BIND_SAMPLER_VIEW)) {
22297ec681f3Smrg         usage &= ~(PIPE_BIND_SHADER_IMAGE | PIPE_BIND_SAMPLER_VIEW);
22307ec681f3Smrg         if (!usage)
22317ec681f3Smrg            return 0;
22327ec681f3Smrg      }
22337ec681f3Smrg   }
22347ec681f3Smrg
22357ec681f3Smrg   if (sscreen->info.chip_class >= GFX10) {
22367ec681f3Smrg      const struct gfx10_format *fmt = &gfx10_format_table[format];
22377ec681f3Smrg      if (!fmt->img_format || fmt->img_format >= 128)
22387ec681f3Smrg         return 0;
22397ec681f3Smrg      return usage;
22407ec681f3Smrg   }
22417ec681f3Smrg
22427ec681f3Smrg   first_non_void = util_format_get_first_non_void_channel(format);
22437ec681f3Smrg   data_format = si_translate_buffer_dataformat(screen, desc, first_non_void);
22447ec681f3Smrg   if (data_format == V_008F0C_BUF_DATA_FORMAT_INVALID)
22457ec681f3Smrg      return 0;
22467ec681f3Smrg
22477ec681f3Smrg   return usage;
2248af69d88dSmrg}
2249af69d88dSmrg
22507ec681f3Smrgstatic bool si_is_colorbuffer_format_supported(enum chip_class chip_class,
22517ec681f3Smrg                                               enum pipe_format format)
2252af69d88dSmrg{
22537ec681f3Smrg   return si_translate_colorformat(chip_class, format) != V_028C70_COLOR_INVALID &&
22547ec681f3Smrg          si_translate_colorswap(format, false) != ~0U;
2255af69d88dSmrg}
2256af69d88dSmrg
2257af69d88dSmrgstatic bool si_is_zs_format_supported(enum pipe_format format)
2258af69d88dSmrg{
22597ec681f3Smrg   return si_translate_dbformat(format) != V_028040_Z_INVALID;
2260af69d88dSmrg}
2261af69d88dSmrg
22627ec681f3Smrgstatic bool si_is_format_supported(struct pipe_screen *screen, enum pipe_format format,
22637ec681f3Smrg                                   enum pipe_texture_target target, unsigned sample_count,
22647ec681f3Smrg                                   unsigned storage_sample_count, unsigned usage)
2265af69d88dSmrg{
22667ec681f3Smrg   struct si_screen *sscreen = (struct si_screen *)screen;
22677ec681f3Smrg   unsigned retval = 0;
22687ec681f3Smrg
22697ec681f3Smrg   if (target >= PIPE_MAX_TEXTURE_TYPES) {
22707ec681f3Smrg      PRINT_ERR("radeonsi: unsupported texture type %d\n", target);
22717ec681f3Smrg      return false;
22727ec681f3Smrg   }
22737ec681f3Smrg
22747ec681f3Smrg   if ((target == PIPE_TEXTURE_3D || target == PIPE_TEXTURE_CUBE) &&
22757ec681f3Smrg        !sscreen->info.has_3d_cube_border_color_mipmap)
22767ec681f3Smrg      return false;
22777ec681f3Smrg
22787ec681f3Smrg   if (util_format_get_num_planes(format) >= 2)
22797ec681f3Smrg      return false;
22807ec681f3Smrg
22817ec681f3Smrg   if (MAX2(1, sample_count) < MAX2(1, storage_sample_count))
22827ec681f3Smrg      return false;
22837ec681f3Smrg
22847ec681f3Smrg   if (sample_count > 1) {
22857ec681f3Smrg      if (!screen->get_param(screen, PIPE_CAP_TEXTURE_MULTISAMPLE))
22867ec681f3Smrg         return false;
22877ec681f3Smrg
22887ec681f3Smrg      /* Only power-of-two sample counts are supported. */
22897ec681f3Smrg      if (!util_is_power_of_two_or_zero(sample_count) ||
22907ec681f3Smrg          !util_is_power_of_two_or_zero(storage_sample_count))
22917ec681f3Smrg         return false;
22927ec681f3Smrg
22937ec681f3Smrg      /* Chips with 1 RB don't increment occlusion queries at 16x MSAA sample rate,
22947ec681f3Smrg       * so don't expose 16 samples there.
22957ec681f3Smrg       */
22967ec681f3Smrg      const unsigned max_eqaa_samples = util_bitcount(sscreen->info.enabled_rb_mask) <= 1 ? 8 : 16;
22977ec681f3Smrg      const unsigned max_samples = 8;
22987ec681f3Smrg
22997ec681f3Smrg      /* MSAA support without framebuffer attachments. */
23007ec681f3Smrg      if (format == PIPE_FORMAT_NONE && sample_count <= max_eqaa_samples)
23017ec681f3Smrg         return true;
23027ec681f3Smrg
23037ec681f3Smrg      if (!sscreen->info.has_eqaa_surface_allocator || util_format_is_depth_or_stencil(format)) {
23047ec681f3Smrg         /* Color without EQAA or depth/stencil. */
23057ec681f3Smrg         if (sample_count > max_samples || sample_count != storage_sample_count)
23067ec681f3Smrg            return false;
23077ec681f3Smrg      } else {
23087ec681f3Smrg         /* Color with EQAA. */
23097ec681f3Smrg         if (sample_count > max_eqaa_samples || storage_sample_count > max_samples)
23107ec681f3Smrg            return false;
23117ec681f3Smrg      }
23127ec681f3Smrg   }
23137ec681f3Smrg
23147ec681f3Smrg   if (usage & (PIPE_BIND_SAMPLER_VIEW | PIPE_BIND_SHADER_IMAGE)) {
23157ec681f3Smrg      if (target == PIPE_BUFFER) {
23167ec681f3Smrg         retval |= si_is_vertex_format_supported(
23177ec681f3Smrg            screen, format, usage & (PIPE_BIND_SAMPLER_VIEW | PIPE_BIND_SHADER_IMAGE));
23187ec681f3Smrg      } else {
23197ec681f3Smrg         if (si_is_sampler_format_supported(screen, format))
23207ec681f3Smrg            retval |= usage & (PIPE_BIND_SAMPLER_VIEW | PIPE_BIND_SHADER_IMAGE);
23217ec681f3Smrg      }
23227ec681f3Smrg   }
23237ec681f3Smrg
23247ec681f3Smrg   if ((usage & (PIPE_BIND_RENDER_TARGET | PIPE_BIND_DISPLAY_TARGET | PIPE_BIND_SCANOUT |
23257ec681f3Smrg                 PIPE_BIND_SHARED | PIPE_BIND_BLENDABLE)) &&
23267ec681f3Smrg       si_is_colorbuffer_format_supported(sscreen->info.chip_class, format)) {
23277ec681f3Smrg      retval |= usage & (PIPE_BIND_RENDER_TARGET | PIPE_BIND_DISPLAY_TARGET | PIPE_BIND_SCANOUT |
23287ec681f3Smrg                         PIPE_BIND_SHARED);
23297ec681f3Smrg      if (!util_format_is_pure_integer(format) && !util_format_is_depth_or_stencil(format))
23307ec681f3Smrg         retval |= usage & PIPE_BIND_BLENDABLE;
23317ec681f3Smrg   }
23327ec681f3Smrg
23337ec681f3Smrg   if ((usage & PIPE_BIND_DEPTH_STENCIL) && si_is_zs_format_supported(format)) {
23347ec681f3Smrg      retval |= PIPE_BIND_DEPTH_STENCIL;
23357ec681f3Smrg   }
23367ec681f3Smrg
23377ec681f3Smrg   if (usage & PIPE_BIND_VERTEX_BUFFER) {
23387ec681f3Smrg      retval |= si_is_vertex_format_supported(screen, format, PIPE_BIND_VERTEX_BUFFER);
23397ec681f3Smrg   }
23407ec681f3Smrg
23417ec681f3Smrg   if (usage & PIPE_BIND_INDEX_BUFFER) {
23427ec681f3Smrg      if (format == PIPE_FORMAT_R8_UINT ||
23437ec681f3Smrg          format == PIPE_FORMAT_R16_UINT ||
23447ec681f3Smrg          format == PIPE_FORMAT_R32_UINT)
23457ec681f3Smrg         retval |= PIPE_BIND_INDEX_BUFFER;
23467ec681f3Smrg   }
23477ec681f3Smrg
23487ec681f3Smrg   if ((usage & PIPE_BIND_LINEAR) && !util_format_is_compressed(format) &&
23497ec681f3Smrg       !(usage & PIPE_BIND_DEPTH_STENCIL))
23507ec681f3Smrg      retval |= PIPE_BIND_LINEAR;
23517ec681f3Smrg
23527ec681f3Smrg   return retval == usage;
2353af69d88dSmrg}
2354af69d88dSmrg
235501e04c3fSmrg/*
235601e04c3fSmrg * framebuffer handling
235701e04c3fSmrg */
235801e04c3fSmrg
23597ec681f3Smrgstatic void si_choose_spi_color_formats(struct si_surface *surf, unsigned format, unsigned swap,
23607ec681f3Smrg                                        unsigned ntype, bool is_depth)
2361af69d88dSmrg{
23627ec681f3Smrg   struct ac_spi_color_formats formats = {};
23637ec681f3Smrg
23647ec681f3Smrg   ac_choose_spi_color_formats(format, swap, ntype, is_depth, true, &formats);
23657ec681f3Smrg
23667ec681f3Smrg   surf->spi_shader_col_format = formats.normal;
23677ec681f3Smrg   surf->spi_shader_col_format_alpha = formats.alpha;
23687ec681f3Smrg   surf->spi_shader_col_format_blend = formats.blend;
23697ec681f3Smrg   surf->spi_shader_col_format_blend_alpha = formats.blend_alpha;
237001e04c3fSmrg}
2371af69d88dSmrg
23727ec681f3Smrgstatic void si_initialize_color_surface(struct si_context *sctx, struct si_surface *surf)
2373af69d88dSmrg{
23747ec681f3Smrg   struct si_texture *tex = (struct si_texture *)surf->base.texture;
23757ec681f3Smrg   unsigned color_info, color_attrib;
23767ec681f3Smrg   unsigned format, swap, ntype, endian;
23777ec681f3Smrg   const struct util_format_description *desc;
23787ec681f3Smrg   int firstchan;
23797ec681f3Smrg   unsigned blend_clamp = 0, blend_bypass = 0;
23807ec681f3Smrg
23817ec681f3Smrg   desc = util_format_description(surf->base.format);
23827ec681f3Smrg   for (firstchan = 0; firstchan < 4; firstchan++) {
23837ec681f3Smrg      if (desc->channel[firstchan].type != UTIL_FORMAT_TYPE_VOID) {
23847ec681f3Smrg         break;
23857ec681f3Smrg      }
23867ec681f3Smrg   }
23877ec681f3Smrg   if (firstchan == 4 || desc->channel[firstchan].type == UTIL_FORMAT_TYPE_FLOAT) {
23887ec681f3Smrg      ntype = V_028C70_NUMBER_FLOAT;
23897ec681f3Smrg   } else {
23907ec681f3Smrg      ntype = V_028C70_NUMBER_UNORM;
23917ec681f3Smrg      if (desc->colorspace == UTIL_FORMAT_COLORSPACE_SRGB)
23927ec681f3Smrg         ntype = V_028C70_NUMBER_SRGB;
23937ec681f3Smrg      else if (desc->channel[firstchan].type == UTIL_FORMAT_TYPE_SIGNED) {
23947ec681f3Smrg         if (desc->channel[firstchan].pure_integer) {
23957ec681f3Smrg            ntype = V_028C70_NUMBER_SINT;
23967ec681f3Smrg         } else {
23977ec681f3Smrg            assert(desc->channel[firstchan].normalized);
23987ec681f3Smrg            ntype = V_028C70_NUMBER_SNORM;
23997ec681f3Smrg         }
24007ec681f3Smrg      } else if (desc->channel[firstchan].type == UTIL_FORMAT_TYPE_UNSIGNED) {
24017ec681f3Smrg         if (desc->channel[firstchan].pure_integer) {
24027ec681f3Smrg            ntype = V_028C70_NUMBER_UINT;
24037ec681f3Smrg         } else {
24047ec681f3Smrg            assert(desc->channel[firstchan].normalized);
24057ec681f3Smrg            ntype = V_028C70_NUMBER_UNORM;
24067ec681f3Smrg         }
24077ec681f3Smrg      }
24087ec681f3Smrg   }
24097ec681f3Smrg
24107ec681f3Smrg   format = si_translate_colorformat(sctx->chip_class, surf->base.format);
24117ec681f3Smrg   if (format == V_028C70_COLOR_INVALID) {
24127ec681f3Smrg      PRINT_ERR("Invalid CB format: %d, disabling CB.\n", surf->base.format);
24137ec681f3Smrg   }
24147ec681f3Smrg   assert(format != V_028C70_COLOR_INVALID);
24157ec681f3Smrg   swap = si_translate_colorswap(surf->base.format, false);
24167ec681f3Smrg   endian = si_colorformat_endian_swap(format);
24177ec681f3Smrg
24187ec681f3Smrg   /* blend clamp should be set for all NORM/SRGB types */
24197ec681f3Smrg   if (ntype == V_028C70_NUMBER_UNORM || ntype == V_028C70_NUMBER_SNORM ||
24207ec681f3Smrg       ntype == V_028C70_NUMBER_SRGB)
24217ec681f3Smrg      blend_clamp = 1;
24227ec681f3Smrg
24237ec681f3Smrg   /* set blend bypass according to docs if SINT/UINT or
24247ec681f3Smrg      8/24 COLOR variants */
24257ec681f3Smrg   if (ntype == V_028C70_NUMBER_UINT || ntype == V_028C70_NUMBER_SINT ||
24267ec681f3Smrg       format == V_028C70_COLOR_8_24 || format == V_028C70_COLOR_24_8 ||
24277ec681f3Smrg       format == V_028C70_COLOR_X24_8_32_FLOAT) {
24287ec681f3Smrg      blend_clamp = 0;
24297ec681f3Smrg      blend_bypass = 1;
24307ec681f3Smrg   }
24317ec681f3Smrg
24327ec681f3Smrg   if (ntype == V_028C70_NUMBER_UINT || ntype == V_028C70_NUMBER_SINT) {
24337ec681f3Smrg      if (format == V_028C70_COLOR_8 || format == V_028C70_COLOR_8_8 ||
24347ec681f3Smrg          format == V_028C70_COLOR_8_8_8_8)
24357ec681f3Smrg         surf->color_is_int8 = true;
24367ec681f3Smrg      else if (format == V_028C70_COLOR_10_10_10_2 || format == V_028C70_COLOR_2_10_10_10)
24377ec681f3Smrg         surf->color_is_int10 = true;
24387ec681f3Smrg   }
24397ec681f3Smrg
24407ec681f3Smrg   color_info =
24417ec681f3Smrg      S_028C70_FORMAT(format) | S_028C70_COMP_SWAP(swap) | S_028C70_BLEND_CLAMP(blend_clamp) |
24427ec681f3Smrg      S_028C70_BLEND_BYPASS(blend_bypass) | S_028C70_SIMPLE_FLOAT(1) |
24437ec681f3Smrg      S_028C70_ROUND_MODE(ntype != V_028C70_NUMBER_UNORM && ntype != V_028C70_NUMBER_SNORM &&
24447ec681f3Smrg                          ntype != V_028C70_NUMBER_SRGB && format != V_028C70_COLOR_8_24 &&
24457ec681f3Smrg                          format != V_028C70_COLOR_24_8) |
24467ec681f3Smrg      S_028C70_NUMBER_TYPE(ntype) | S_028C70_ENDIAN(endian);
24477ec681f3Smrg
24487ec681f3Smrg   /* Intensity is implemented as Red, so treat it that way. */
24497ec681f3Smrg   color_attrib = S_028C74_FORCE_DST_ALPHA_1(desc->swizzle[3] == PIPE_SWIZZLE_1 ||
24507ec681f3Smrg                                             util_format_is_intensity(surf->base.format));
24517ec681f3Smrg
24527ec681f3Smrg   if (tex->buffer.b.b.nr_samples > 1) {
24537ec681f3Smrg      unsigned log_samples = util_logbase2(tex->buffer.b.b.nr_samples);
24547ec681f3Smrg      unsigned log_fragments = util_logbase2(tex->buffer.b.b.nr_storage_samples);
24557ec681f3Smrg
24567ec681f3Smrg      color_attrib |= S_028C74_NUM_SAMPLES(log_samples) | S_028C74_NUM_FRAGMENTS(log_fragments);
24577ec681f3Smrg
24587ec681f3Smrg      if (tex->surface.fmask_offset) {
24597ec681f3Smrg         color_info |= S_028C70_COMPRESSION(1);
24607ec681f3Smrg         unsigned fmask_bankh = util_logbase2(tex->surface.u.legacy.color.fmask.bankh);
24617ec681f3Smrg
24627ec681f3Smrg         if (sctx->chip_class == GFX6) {
24637ec681f3Smrg            /* due to a hw bug, FMASK_BANK_HEIGHT must be set on GFX6 too */
24647ec681f3Smrg            color_attrib |= S_028C74_FMASK_BANK_HEIGHT(fmask_bankh);
24657ec681f3Smrg         }
24667ec681f3Smrg      }
24677ec681f3Smrg   }
24687ec681f3Smrg
24697ec681f3Smrg   /* amdvlk: [min-compressed-block-size] should be set to 32 for dGPU and
24707ec681f3Smrg    * 64 for APU because all of our APUs to date use DIMMs which have
24717ec681f3Smrg    * a request granularity size of 64B while all other chips have a
24727ec681f3Smrg    * 32B request size */
24737ec681f3Smrg   unsigned min_compressed_block_size = V_028C78_MIN_BLOCK_SIZE_32B;
24747ec681f3Smrg   if (!sctx->screen->info.has_dedicated_vram)
24757ec681f3Smrg      min_compressed_block_size = V_028C78_MIN_BLOCK_SIZE_64B;
24767ec681f3Smrg
24777ec681f3Smrg   if (sctx->chip_class >= GFX10) {
24787ec681f3Smrg      surf->cb_dcc_control = S_028C78_MAX_UNCOMPRESSED_BLOCK_SIZE(V_028C78_MAX_BLOCK_SIZE_256B) |
24797ec681f3Smrg                             S_028C78_MAX_COMPRESSED_BLOCK_SIZE(tex->surface.u.gfx9.color.dcc.max_compressed_block_size) |
24807ec681f3Smrg                             S_028C78_MIN_COMPRESSED_BLOCK_SIZE(min_compressed_block_size) |
24817ec681f3Smrg                             S_028C78_INDEPENDENT_64B_BLOCKS(tex->surface.u.gfx9.color.dcc.independent_64B_blocks) |
24827ec681f3Smrg                             S_028C78_INDEPENDENT_128B_BLOCKS(tex->surface.u.gfx9.color.dcc.independent_128B_blocks);
24837ec681f3Smrg   } else if (sctx->chip_class >= GFX8) {
24847ec681f3Smrg      unsigned max_uncompressed_block_size = V_028C78_MAX_BLOCK_SIZE_256B;
24857ec681f3Smrg
24867ec681f3Smrg      if (tex->buffer.b.b.nr_storage_samples > 1) {
24877ec681f3Smrg         if (tex->surface.bpe == 1)
24887ec681f3Smrg            max_uncompressed_block_size = V_028C78_MAX_BLOCK_SIZE_64B;
24897ec681f3Smrg         else if (tex->surface.bpe == 2)
24907ec681f3Smrg            max_uncompressed_block_size = V_028C78_MAX_BLOCK_SIZE_128B;
24917ec681f3Smrg      }
24927ec681f3Smrg
24937ec681f3Smrg      surf->cb_dcc_control = S_028C78_MAX_UNCOMPRESSED_BLOCK_SIZE(max_uncompressed_block_size) |
24947ec681f3Smrg                             S_028C78_MIN_COMPRESSED_BLOCK_SIZE(min_compressed_block_size) |
24957ec681f3Smrg                             S_028C78_INDEPENDENT_64B_BLOCKS(1);
24967ec681f3Smrg   }
24977ec681f3Smrg
24987ec681f3Smrg   /* This must be set for fast clear to work without FMASK. */
24997ec681f3Smrg   if (!tex->surface.fmask_size && sctx->chip_class == GFX6) {
25007ec681f3Smrg      unsigned bankh = util_logbase2(tex->surface.u.legacy.bankh);
25017ec681f3Smrg      color_attrib |= S_028C74_FMASK_BANK_HEIGHT(bankh);
25027ec681f3Smrg   }
25037ec681f3Smrg
25047ec681f3Smrg   /* GFX10 field has the same base shift as the GFX6 field */
25057ec681f3Smrg   unsigned color_view = S_028C6C_SLICE_START(surf->base.u.tex.first_layer) |
25067ec681f3Smrg                         S_028C6C_SLICE_MAX_GFX10(surf->base.u.tex.last_layer);
25077ec681f3Smrg   unsigned mip0_depth = util_max_layer(&tex->buffer.b.b, 0);
25087ec681f3Smrg
25097ec681f3Smrg   if (sctx->chip_class >= GFX10) {
25107ec681f3Smrg      color_view |= S_028C6C_MIP_LEVEL_GFX10(surf->base.u.tex.level);
25117ec681f3Smrg
25127ec681f3Smrg      surf->cb_color_attrib3 = S_028EE0_MIP0_DEPTH(mip0_depth) |
25137ec681f3Smrg                               S_028EE0_RESOURCE_TYPE(tex->surface.u.gfx9.resource_type) |
25147ec681f3Smrg                               S_028EE0_RESOURCE_LEVEL(1);
25157ec681f3Smrg   } else if (sctx->chip_class == GFX9) {
25167ec681f3Smrg      color_view |= S_028C6C_MIP_LEVEL_GFX9(surf->base.u.tex.level);
25177ec681f3Smrg      color_attrib |= S_028C74_MIP0_DEPTH(mip0_depth) |
25187ec681f3Smrg                      S_028C74_RESOURCE_TYPE(tex->surface.u.gfx9.resource_type);
25197ec681f3Smrg   }
25207ec681f3Smrg
25217ec681f3Smrg   if (sctx->chip_class >= GFX9) {
25227ec681f3Smrg      surf->cb_color_attrib2 = S_028C68_MIP0_WIDTH(surf->width0 - 1) |
25237ec681f3Smrg                               S_028C68_MIP0_HEIGHT(surf->height0 - 1) |
25247ec681f3Smrg                               S_028C68_MAX_MIP(tex->buffer.b.b.last_level);
25257ec681f3Smrg   }
25267ec681f3Smrg
25277ec681f3Smrg   surf->cb_color_view = color_view;
25287ec681f3Smrg   surf->cb_color_info = color_info;
25297ec681f3Smrg   surf->cb_color_attrib = color_attrib;
25307ec681f3Smrg
25317ec681f3Smrg   /* Determine pixel shader export format */
25327ec681f3Smrg   si_choose_spi_color_formats(surf, format, swap, ntype, tex->is_depth);
25337ec681f3Smrg
25347ec681f3Smrg   surf->color_initialized = true;
2535af69d88dSmrg}
2536af69d88dSmrg
25377ec681f3Smrgstatic void si_init_depth_surface(struct si_context *sctx, struct si_surface *surf)
2538af69d88dSmrg{
25397ec681f3Smrg   struct si_texture *tex = (struct si_texture *)surf->base.texture;
25407ec681f3Smrg   unsigned level = surf->base.u.tex.level;
25417ec681f3Smrg   unsigned format, stencil_format;
25427ec681f3Smrg   uint32_t z_info, s_info;
25437ec681f3Smrg
25447ec681f3Smrg   format = si_translate_dbformat(tex->db_render_format);
25457ec681f3Smrg   stencil_format = tex->surface.has_stencil ? V_028044_STENCIL_8 : V_028044_STENCIL_INVALID;
25467ec681f3Smrg
25477ec681f3Smrg   assert(format != V_028040_Z_INVALID);
25487ec681f3Smrg   if (format == V_028040_Z_INVALID)
25497ec681f3Smrg      PRINT_ERR("Invalid DB format: %d, disabling DB.\n", tex->buffer.b.b.format);
25507ec681f3Smrg
25517ec681f3Smrg   surf->db_depth_view = S_028008_SLICE_START(surf->base.u.tex.first_layer) |
25527ec681f3Smrg                         S_028008_SLICE_MAX(surf->base.u.tex.last_layer);
25537ec681f3Smrg   surf->db_htile_data_base = 0;
25547ec681f3Smrg   surf->db_htile_surface = 0;
25557ec681f3Smrg
25567ec681f3Smrg   if (sctx->chip_class >= GFX10) {
25577ec681f3Smrg      surf->db_depth_view |= S_028008_SLICE_START_HI(surf->base.u.tex.first_layer >> 11) |
25587ec681f3Smrg                             S_028008_SLICE_MAX_HI(surf->base.u.tex.last_layer >> 11);
25597ec681f3Smrg   }
25607ec681f3Smrg
25617ec681f3Smrg   if (sctx->chip_class >= GFX9) {
25627ec681f3Smrg      assert(tex->surface.u.gfx9.surf_offset == 0);
25637ec681f3Smrg      surf->db_depth_base = tex->buffer.gpu_address >> 8;
25647ec681f3Smrg      surf->db_stencil_base = (tex->buffer.gpu_address + tex->surface.u.gfx9.zs.stencil_offset) >> 8;
25657ec681f3Smrg      z_info = S_028038_FORMAT(format) |
25667ec681f3Smrg               S_028038_NUM_SAMPLES(util_logbase2(tex->buffer.b.b.nr_samples)) |
25677ec681f3Smrg               S_028038_SW_MODE(tex->surface.u.gfx9.swizzle_mode) |
25687ec681f3Smrg               S_028038_MAXMIP(tex->buffer.b.b.last_level);
25697ec681f3Smrg      s_info = S_02803C_FORMAT(stencil_format) |
25707ec681f3Smrg               S_02803C_SW_MODE(tex->surface.u.gfx9.zs.stencil_swizzle_mode);
25717ec681f3Smrg
25727ec681f3Smrg      if (sctx->chip_class == GFX9) {
25737ec681f3Smrg         surf->db_z_info2 = S_028068_EPITCH(tex->surface.u.gfx9.epitch);
25747ec681f3Smrg         surf->db_stencil_info2 = S_02806C_EPITCH(tex->surface.u.gfx9.zs.stencil_epitch);
25757ec681f3Smrg      }
25767ec681f3Smrg      surf->db_depth_view |= S_028008_MIPID(level);
25777ec681f3Smrg      surf->db_depth_size =
25787ec681f3Smrg         S_02801C_X_MAX(tex->buffer.b.b.width0 - 1) | S_02801C_Y_MAX(tex->buffer.b.b.height0 - 1);
25797ec681f3Smrg
25807ec681f3Smrg      if (si_htile_enabled(tex, level, PIPE_MASK_ZS)) {
25817ec681f3Smrg         z_info |= S_028038_TILE_SURFACE_ENABLE(1) | S_028038_ALLOW_EXPCLEAR(1);
25827ec681f3Smrg         s_info |= S_02803C_TILE_STENCIL_DISABLE(tex->htile_stencil_disabled);
25837ec681f3Smrg
25847ec681f3Smrg         if (tex->surface.has_stencil && !tex->htile_stencil_disabled) {
25857ec681f3Smrg            /* Stencil buffer workaround ported from the GFX6-GFX8 code.
25867ec681f3Smrg             * See that for explanation.
25877ec681f3Smrg             */
25887ec681f3Smrg            s_info |= S_02803C_ALLOW_EXPCLEAR(tex->buffer.b.b.nr_samples <= 1);
25897ec681f3Smrg         }
25907ec681f3Smrg
25917ec681f3Smrg         surf->db_htile_data_base = (tex->buffer.gpu_address + tex->surface.meta_offset) >> 8;
25927ec681f3Smrg         surf->db_htile_surface =
25937ec681f3Smrg            S_028ABC_FULL_CACHE(1) | S_028ABC_PIPE_ALIGNED(1);
25947ec681f3Smrg         if (sctx->chip_class == GFX9) {
25957ec681f3Smrg            surf->db_htile_surface |= S_028ABC_RB_ALIGNED(1);
25967ec681f3Smrg         }
25977ec681f3Smrg      }
25987ec681f3Smrg   } else {
25997ec681f3Smrg      /* GFX6-GFX8 */
26007ec681f3Smrg      struct legacy_surf_level *levelinfo = &tex->surface.u.legacy.level[level];
26017ec681f3Smrg
26027ec681f3Smrg      assert(levelinfo->nblk_x % 8 == 0 && levelinfo->nblk_y % 8 == 0);
26037ec681f3Smrg
26047ec681f3Smrg      surf->db_depth_base =
26057ec681f3Smrg         (tex->buffer.gpu_address >> 8) + tex->surface.u.legacy.level[level].offset_256B;
26067ec681f3Smrg      surf->db_stencil_base =
26077ec681f3Smrg         (tex->buffer.gpu_address >> 8) + tex->surface.u.legacy.zs.stencil_level[level].offset_256B;
26087ec681f3Smrg
26097ec681f3Smrg      z_info =
26107ec681f3Smrg         S_028040_FORMAT(format) | S_028040_NUM_SAMPLES(util_logbase2(tex->buffer.b.b.nr_samples));
26117ec681f3Smrg      s_info = S_028044_FORMAT(stencil_format);
26127ec681f3Smrg      surf->db_depth_info = 0;
26137ec681f3Smrg
26147ec681f3Smrg      if (sctx->chip_class >= GFX7) {
26157ec681f3Smrg         struct radeon_info *info = &sctx->screen->info;
26167ec681f3Smrg         unsigned index = tex->surface.u.legacy.tiling_index[level];
26177ec681f3Smrg         unsigned stencil_index = tex->surface.u.legacy.zs.stencil_tiling_index[level];
26187ec681f3Smrg         unsigned macro_index = tex->surface.u.legacy.macro_tile_index;
26197ec681f3Smrg         unsigned tile_mode = info->si_tile_mode_array[index];
26207ec681f3Smrg         unsigned stencil_tile_mode = info->si_tile_mode_array[stencil_index];
26217ec681f3Smrg         unsigned macro_mode = info->cik_macrotile_mode_array[macro_index];
26227ec681f3Smrg
26237ec681f3Smrg         surf->db_depth_info |= S_02803C_ARRAY_MODE(G_009910_ARRAY_MODE(tile_mode)) |
26247ec681f3Smrg                                S_02803C_PIPE_CONFIG(G_009910_PIPE_CONFIG(tile_mode)) |
26257ec681f3Smrg                                S_02803C_BANK_WIDTH(G_009990_BANK_WIDTH(macro_mode)) |
26267ec681f3Smrg                                S_02803C_BANK_HEIGHT(G_009990_BANK_HEIGHT(macro_mode)) |
26277ec681f3Smrg                                S_02803C_MACRO_TILE_ASPECT(G_009990_MACRO_TILE_ASPECT(macro_mode)) |
26287ec681f3Smrg                                S_02803C_NUM_BANKS(G_009990_NUM_BANKS(macro_mode));
26297ec681f3Smrg         z_info |= S_028040_TILE_SPLIT(G_009910_TILE_SPLIT(tile_mode));
26307ec681f3Smrg         s_info |= S_028044_TILE_SPLIT(G_009910_TILE_SPLIT(stencil_tile_mode));
26317ec681f3Smrg      } else {
26327ec681f3Smrg         unsigned tile_mode_index = si_tile_mode_index(tex, level, false);
26337ec681f3Smrg         z_info |= S_028040_TILE_MODE_INDEX(tile_mode_index);
26347ec681f3Smrg         tile_mode_index = si_tile_mode_index(tex, level, true);
26357ec681f3Smrg         s_info |= S_028044_TILE_MODE_INDEX(tile_mode_index);
26367ec681f3Smrg      }
26377ec681f3Smrg
26387ec681f3Smrg      surf->db_depth_size = S_028058_PITCH_TILE_MAX((levelinfo->nblk_x / 8) - 1) |
26397ec681f3Smrg                            S_028058_HEIGHT_TILE_MAX((levelinfo->nblk_y / 8) - 1);
26407ec681f3Smrg      surf->db_depth_slice =
26417ec681f3Smrg         S_02805C_SLICE_TILE_MAX((levelinfo->nblk_x * levelinfo->nblk_y) / 64 - 1);
26427ec681f3Smrg
26437ec681f3Smrg      if (si_htile_enabled(tex, level, PIPE_MASK_ZS)) {
26447ec681f3Smrg         z_info |= S_028040_TILE_SURFACE_ENABLE(1) | S_028040_ALLOW_EXPCLEAR(1);
26457ec681f3Smrg         s_info |= S_028044_TILE_STENCIL_DISABLE(tex->htile_stencil_disabled);
26467ec681f3Smrg
26477ec681f3Smrg         if (tex->surface.has_stencil) {
26487ec681f3Smrg            /* Workaround: For a not yet understood reason, the
26497ec681f3Smrg             * combination of MSAA, fast stencil clear and stencil
26507ec681f3Smrg             * decompress messes with subsequent stencil buffer
26517ec681f3Smrg             * uses. Problem was reproduced on Verde, Bonaire,
26527ec681f3Smrg             * Tonga, and Carrizo.
26537ec681f3Smrg             *
26547ec681f3Smrg             * Disabling EXPCLEAR works around the problem.
26557ec681f3Smrg             *
26567ec681f3Smrg             * Check piglit's arb_texture_multisample-stencil-clear
26577ec681f3Smrg             * test if you want to try changing this.
26587ec681f3Smrg             */
26597ec681f3Smrg            if (tex->buffer.b.b.nr_samples <= 1)
26607ec681f3Smrg               s_info |= S_028044_ALLOW_EXPCLEAR(1);
26617ec681f3Smrg         }
26627ec681f3Smrg
26637ec681f3Smrg         surf->db_htile_data_base = (tex->buffer.gpu_address + tex->surface.meta_offset) >> 8;
26647ec681f3Smrg         surf->db_htile_surface = S_028ABC_FULL_CACHE(1);
26657ec681f3Smrg      }
26667ec681f3Smrg   }
26677ec681f3Smrg
26687ec681f3Smrg   surf->db_z_info = z_info;
26697ec681f3Smrg   surf->db_stencil_info = s_info;
26707ec681f3Smrg
26717ec681f3Smrg   surf->depth_initialized = true;
2672af69d88dSmrg}
2673af69d88dSmrg
267401e04c3fSmrgvoid si_update_fb_dirtiness_after_rendering(struct si_context *sctx)
267501e04c3fSmrg{
26767ec681f3Smrg   if (sctx->decompression_enabled)
26777ec681f3Smrg      return;
26787ec681f3Smrg
26797ec681f3Smrg   if (sctx->framebuffer.state.zsbuf) {
26807ec681f3Smrg      struct pipe_surface *surf = sctx->framebuffer.state.zsbuf;
26817ec681f3Smrg      struct si_texture *tex = (struct si_texture *)surf->texture;
26827ec681f3Smrg
26837ec681f3Smrg      tex->dirty_level_mask |= 1 << surf->u.tex.level;
26847ec681f3Smrg
26857ec681f3Smrg      if (tex->surface.has_stencil)
26867ec681f3Smrg         tex->stencil_dirty_level_mask |= 1 << surf->u.tex.level;
26877ec681f3Smrg   }
26887ec681f3Smrg
26897ec681f3Smrg   unsigned compressed_cb_mask = sctx->framebuffer.compressed_cb_mask;
26907ec681f3Smrg   while (compressed_cb_mask) {
26917ec681f3Smrg      unsigned i = u_bit_scan(&compressed_cb_mask);
26927ec681f3Smrg      struct pipe_surface *surf = sctx->framebuffer.state.cbufs[i];
26937ec681f3Smrg      struct si_texture *tex = (struct si_texture *)surf->texture;
26947ec681f3Smrg
26957ec681f3Smrg      if (tex->surface.fmask_offset) {
26967ec681f3Smrg         tex->dirty_level_mask |= 1 << surf->u.tex.level;
26977ec681f3Smrg         tex->fmask_is_identity = false;
26987ec681f3Smrg      }
26997ec681f3Smrg   }
270001e04c3fSmrg}
270101e04c3fSmrg
270201e04c3fSmrgstatic void si_dec_framebuffer_counters(const struct pipe_framebuffer_state *state)
270301e04c3fSmrg{
27047ec681f3Smrg   for (int i = 0; i < state->nr_cbufs; ++i) {
27057ec681f3Smrg      struct si_surface *surf = NULL;
27067ec681f3Smrg      struct si_texture *tex;
27077ec681f3Smrg
27087ec681f3Smrg      if (!state->cbufs[i])
27097ec681f3Smrg         continue;
27107ec681f3Smrg      surf = (struct si_surface *)state->cbufs[i];
27117ec681f3Smrg      tex = (struct si_texture *)surf->base.texture;
27127ec681f3Smrg
27137ec681f3Smrg      p_atomic_dec(&tex->framebuffers_bound);
27147ec681f3Smrg   }
27157ec681f3Smrg}
27167ec681f3Smrg
27177ec681f3Smrgvoid si_mark_display_dcc_dirty(struct si_context *sctx, struct si_texture *tex)
27187ec681f3Smrg{
27197ec681f3Smrg   if (!tex->surface.display_dcc_offset || tex->displayable_dcc_dirty)
27207ec681f3Smrg      return;
27217ec681f3Smrg
27227ec681f3Smrg   if (!(tex->buffer.external_usage & PIPE_HANDLE_USAGE_EXPLICIT_FLUSH)) {
27237ec681f3Smrg      struct hash_entry *entry = _mesa_hash_table_search(sctx->dirty_implicit_resources, tex);
27247ec681f3Smrg      if (!entry) {
27257ec681f3Smrg         struct pipe_resource *dummy = NULL;
27267ec681f3Smrg         pipe_resource_reference(&dummy, &tex->buffer.b.b);
27277ec681f3Smrg         _mesa_hash_table_insert(sctx->dirty_implicit_resources, tex, tex);
27287ec681f3Smrg      }
27297ec681f3Smrg   }
27307ec681f3Smrg   tex->displayable_dcc_dirty = true;
27317ec681f3Smrg}
273201e04c3fSmrg
27337ec681f3Smrgstatic void si_update_display_dcc_dirty(struct si_context *sctx)
27347ec681f3Smrg{
27357ec681f3Smrg   const struct pipe_framebuffer_state *state = &sctx->framebuffer.state;
273601e04c3fSmrg
27377ec681f3Smrg   for (unsigned i = 0; i < state->nr_cbufs; i++) {
27387ec681f3Smrg      if (state->cbufs[i])
27397ec681f3Smrg         si_mark_display_dcc_dirty(sctx, (struct si_texture *)state->cbufs[i]->texture);
27407ec681f3Smrg   }
274101e04c3fSmrg}
274201e04c3fSmrg
2743af69d88dSmrgstatic void si_set_framebuffer_state(struct pipe_context *ctx,
27447ec681f3Smrg                                     const struct pipe_framebuffer_state *state)
2745af69d88dSmrg{
27467ec681f3Smrg   struct si_context *sctx = (struct si_context *)ctx;
27477ec681f3Smrg   struct si_surface *surf = NULL;
27487ec681f3Smrg   struct si_texture *tex;
27497ec681f3Smrg   bool old_any_dst_linear = sctx->framebuffer.any_dst_linear;
27507ec681f3Smrg   unsigned old_nr_samples = sctx->framebuffer.nr_samples;
27517ec681f3Smrg   unsigned old_colorbuf_enabled_4bit = sctx->framebuffer.colorbuf_enabled_4bit;
27527ec681f3Smrg   bool old_has_zsbuf = !!sctx->framebuffer.state.zsbuf;
27537ec681f3Smrg   bool old_has_stencil =
27547ec681f3Smrg      old_has_zsbuf &&
27557ec681f3Smrg      ((struct si_texture *)sctx->framebuffer.state.zsbuf->texture)->surface.has_stencil;
27567ec681f3Smrg   bool unbound = false;
27577ec681f3Smrg   int i;
27587ec681f3Smrg
27597ec681f3Smrg   /* Reject zero-sized framebuffers due to a hw bug on GFX6 that occurs
27607ec681f3Smrg    * when PA_SU_HARDWARE_SCREEN_OFFSET != 0 and any_scissor.BR_X/Y <= 0.
27617ec681f3Smrg    * We could implement the full workaround here, but it's a useless case.
27627ec681f3Smrg    */
27637ec681f3Smrg   if ((!state->width || !state->height) && (state->nr_cbufs || state->zsbuf)) {
27647ec681f3Smrg      unreachable("the framebuffer shouldn't have zero area");
27657ec681f3Smrg      return;
27667ec681f3Smrg   }
27677ec681f3Smrg
27687ec681f3Smrg   si_update_fb_dirtiness_after_rendering(sctx);
27697ec681f3Smrg
27707ec681f3Smrg   /* Disable DCC if the formats are incompatible. */
27717ec681f3Smrg   for (i = 0; i < state->nr_cbufs; i++) {
27727ec681f3Smrg      if (!state->cbufs[i])
27737ec681f3Smrg         continue;
27747ec681f3Smrg
27757ec681f3Smrg      surf = (struct si_surface *)state->cbufs[i];
27767ec681f3Smrg      tex = (struct si_texture *)surf->base.texture;
27777ec681f3Smrg
27787ec681f3Smrg      if (!surf->dcc_incompatible)
27797ec681f3Smrg         continue;
27807ec681f3Smrg
27817ec681f3Smrg      /* Since the DCC decompression calls back into set_framebuffer-
27827ec681f3Smrg       * _state, we need to unbind the framebuffer, so that
27837ec681f3Smrg       * vi_separate_dcc_stop_query isn't called twice with the same
27847ec681f3Smrg       * color buffer.
27857ec681f3Smrg       */
27867ec681f3Smrg      if (!unbound) {
27877ec681f3Smrg         util_copy_framebuffer_state(&sctx->framebuffer.state, NULL);
27887ec681f3Smrg         unbound = true;
27897ec681f3Smrg      }
27907ec681f3Smrg
27917ec681f3Smrg      if (vi_dcc_enabled(tex, surf->base.u.tex.level))
27927ec681f3Smrg         if (!si_texture_disable_dcc(sctx, tex))
27937ec681f3Smrg            si_decompress_dcc(sctx, tex);
27947ec681f3Smrg
27957ec681f3Smrg      surf->dcc_incompatible = false;
27967ec681f3Smrg   }
27977ec681f3Smrg
27987ec681f3Smrg   /* Only flush TC when changing the framebuffer state, because
27997ec681f3Smrg    * the only client not using TC that can change textures is
28007ec681f3Smrg    * the framebuffer.
28017ec681f3Smrg    *
28027ec681f3Smrg    * Wait for compute shaders because of possible transitions:
28037ec681f3Smrg    * - FB write -> shader read
28047ec681f3Smrg    * - shader write -> FB read
28057ec681f3Smrg    *
28067ec681f3Smrg    * DB caches are flushed on demand (using si_decompress_textures).
28077ec681f3Smrg    *
28087ec681f3Smrg    * When MSAA is enabled, CB and TC caches are flushed on demand
28097ec681f3Smrg    * (after FMASK decompression). Shader write -> FB read transitions
28107ec681f3Smrg    * cannot happen for MSAA textures, because MSAA shader images are
28117ec681f3Smrg    * not supported.
28127ec681f3Smrg    *
28137ec681f3Smrg    * Only flush and wait for CB if there is actually a bound color buffer.
28147ec681f3Smrg    */
28157ec681f3Smrg   if (sctx->framebuffer.uncompressed_cb_mask) {
28167ec681f3Smrg      si_make_CB_shader_coherent(sctx, sctx->framebuffer.nr_samples,
28177ec681f3Smrg                                 sctx->framebuffer.CB_has_shader_readable_metadata,
28187ec681f3Smrg                                 sctx->framebuffer.all_DCC_pipe_aligned);
28197ec681f3Smrg   }
28207ec681f3Smrg
28217ec681f3Smrg   sctx->flags |= SI_CONTEXT_CS_PARTIAL_FLUSH;
28227ec681f3Smrg
28237ec681f3Smrg   /* u_blitter doesn't invoke depth decompression when it does multiple
28247ec681f3Smrg    * blits in a row, but the only case when it matters for DB is when
28257ec681f3Smrg    * doing generate_mipmap. So here we flush DB manually between
28267ec681f3Smrg    * individual generate_mipmap blits.
28277ec681f3Smrg    * Note that lower mipmap levels aren't compressed.
28287ec681f3Smrg    */
28297ec681f3Smrg   if (sctx->generate_mipmap_for_depth) {
28307ec681f3Smrg      si_make_DB_shader_coherent(sctx, 1, false, sctx->framebuffer.DB_has_shader_readable_metadata);
28317ec681f3Smrg   } else if (sctx->chip_class == GFX9) {
28327ec681f3Smrg      /* It appears that DB metadata "leaks" in a sequence of:
28337ec681f3Smrg       *  - depth clear
28347ec681f3Smrg       *  - DCC decompress for shader image writes (with DB disabled)
28357ec681f3Smrg       *  - render with DEPTH_BEFORE_SHADER=1
28367ec681f3Smrg       * Flushing DB metadata works around the problem.
28377ec681f3Smrg       */
28387ec681f3Smrg      sctx->flags |= SI_CONTEXT_FLUSH_AND_INV_DB_META;
28397ec681f3Smrg   }
28407ec681f3Smrg
28417ec681f3Smrg   /* Take the maximum of the old and new count. If the new count is lower,
28427ec681f3Smrg    * dirtying is needed to disable the unbound colorbuffers.
28437ec681f3Smrg    */
28447ec681f3Smrg   sctx->framebuffer.dirty_cbufs |=
28457ec681f3Smrg      (1 << MAX2(sctx->framebuffer.state.nr_cbufs, state->nr_cbufs)) - 1;
28467ec681f3Smrg   sctx->framebuffer.dirty_zsbuf |= sctx->framebuffer.state.zsbuf != state->zsbuf;
28477ec681f3Smrg
28487ec681f3Smrg   si_dec_framebuffer_counters(&sctx->framebuffer.state);
28497ec681f3Smrg   util_copy_framebuffer_state(&sctx->framebuffer.state, state);
28507ec681f3Smrg
28517ec681f3Smrg   sctx->framebuffer.colorbuf_enabled_4bit = 0;
28527ec681f3Smrg   sctx->framebuffer.spi_shader_col_format = 0;
28537ec681f3Smrg   sctx->framebuffer.spi_shader_col_format_alpha = 0;
28547ec681f3Smrg   sctx->framebuffer.spi_shader_col_format_blend = 0;
28557ec681f3Smrg   sctx->framebuffer.spi_shader_col_format_blend_alpha = 0;
28567ec681f3Smrg   sctx->framebuffer.color_is_int8 = 0;
28577ec681f3Smrg   sctx->framebuffer.color_is_int10 = 0;
28587ec681f3Smrg
28597ec681f3Smrg   sctx->framebuffer.compressed_cb_mask = 0;
28607ec681f3Smrg   sctx->framebuffer.uncompressed_cb_mask = 0;
28617ec681f3Smrg   sctx->framebuffer.nr_samples = util_framebuffer_get_num_samples(state);
28627ec681f3Smrg   sctx->framebuffer.nr_color_samples = sctx->framebuffer.nr_samples;
28637ec681f3Smrg   sctx->framebuffer.log_samples = util_logbase2(sctx->framebuffer.nr_samples);
28647ec681f3Smrg   sctx->framebuffer.any_dst_linear = false;
28657ec681f3Smrg   sctx->framebuffer.CB_has_shader_readable_metadata = false;
28667ec681f3Smrg   sctx->framebuffer.DB_has_shader_readable_metadata = false;
28677ec681f3Smrg   sctx->framebuffer.all_DCC_pipe_aligned = true;
28687ec681f3Smrg   sctx->framebuffer.has_dcc_msaa = false;
28697ec681f3Smrg   sctx->framebuffer.min_bytes_per_pixel = 0;
28707ec681f3Smrg
28717ec681f3Smrg   for (i = 0; i < state->nr_cbufs; i++) {
28727ec681f3Smrg      if (!state->cbufs[i])
28737ec681f3Smrg         continue;
28747ec681f3Smrg
28757ec681f3Smrg      surf = (struct si_surface *)state->cbufs[i];
28767ec681f3Smrg      tex = (struct si_texture *)surf->base.texture;
28777ec681f3Smrg
28787ec681f3Smrg      if (!surf->color_initialized) {
28797ec681f3Smrg         si_initialize_color_surface(sctx, surf);
28807ec681f3Smrg      }
28817ec681f3Smrg
28827ec681f3Smrg      sctx->framebuffer.colorbuf_enabled_4bit |= 0xf << (i * 4);
28837ec681f3Smrg      sctx->framebuffer.spi_shader_col_format |= surf->spi_shader_col_format << (i * 4);
28847ec681f3Smrg      sctx->framebuffer.spi_shader_col_format_alpha |= surf->spi_shader_col_format_alpha << (i * 4);
28857ec681f3Smrg      sctx->framebuffer.spi_shader_col_format_blend |= surf->spi_shader_col_format_blend << (i * 4);
28867ec681f3Smrg      sctx->framebuffer.spi_shader_col_format_blend_alpha |= surf->spi_shader_col_format_blend_alpha
28877ec681f3Smrg                                                             << (i * 4);
28887ec681f3Smrg
28897ec681f3Smrg      if (surf->color_is_int8)
28907ec681f3Smrg         sctx->framebuffer.color_is_int8 |= 1 << i;
28917ec681f3Smrg      if (surf->color_is_int10)
28927ec681f3Smrg         sctx->framebuffer.color_is_int10 |= 1 << i;
28937ec681f3Smrg
28947ec681f3Smrg      if (tex->surface.fmask_offset)
28957ec681f3Smrg         sctx->framebuffer.compressed_cb_mask |= 1 << i;
28967ec681f3Smrg      else
28977ec681f3Smrg         sctx->framebuffer.uncompressed_cb_mask |= 1 << i;
28987ec681f3Smrg
28997ec681f3Smrg      /* Don't update nr_color_samples for non-AA buffers.
29007ec681f3Smrg       * (e.g. destination of MSAA resolve)
29017ec681f3Smrg       */
29027ec681f3Smrg      if (tex->buffer.b.b.nr_samples >= 2 &&
29037ec681f3Smrg          tex->buffer.b.b.nr_storage_samples < tex->buffer.b.b.nr_samples) {
29047ec681f3Smrg         sctx->framebuffer.nr_color_samples =
29057ec681f3Smrg            MIN2(sctx->framebuffer.nr_color_samples, tex->buffer.b.b.nr_storage_samples);
29067ec681f3Smrg         sctx->framebuffer.nr_color_samples = MAX2(1, sctx->framebuffer.nr_color_samples);
29077ec681f3Smrg      }
29087ec681f3Smrg
29097ec681f3Smrg      if (tex->surface.is_linear)
29107ec681f3Smrg         sctx->framebuffer.any_dst_linear = true;
29117ec681f3Smrg
29127ec681f3Smrg      if (vi_dcc_enabled(tex, surf->base.u.tex.level)) {
29137ec681f3Smrg         sctx->framebuffer.CB_has_shader_readable_metadata = true;
29147ec681f3Smrg
29157ec681f3Smrg         if (sctx->chip_class >= GFX9 && !tex->surface.u.gfx9.color.dcc.pipe_aligned)
29167ec681f3Smrg            sctx->framebuffer.all_DCC_pipe_aligned = false;
29177ec681f3Smrg
29187ec681f3Smrg         if (tex->buffer.b.b.nr_storage_samples >= 2)
29197ec681f3Smrg            sctx->framebuffer.has_dcc_msaa = true;
29207ec681f3Smrg      }
29217ec681f3Smrg
29227ec681f3Smrg      si_context_add_resource_size(sctx, surf->base.texture);
29237ec681f3Smrg
29247ec681f3Smrg      p_atomic_inc(&tex->framebuffers_bound);
29257ec681f3Smrg
29267ec681f3Smrg      /* Update the minimum but don't keep 0. */
29277ec681f3Smrg      if (!sctx->framebuffer.min_bytes_per_pixel ||
29287ec681f3Smrg          tex->surface.bpe < sctx->framebuffer.min_bytes_per_pixel)
29297ec681f3Smrg         sctx->framebuffer.min_bytes_per_pixel = tex->surface.bpe;
29307ec681f3Smrg   }
29317ec681f3Smrg
29327ec681f3Smrg   /* For optimal DCC performance. */
29337ec681f3Smrg   if (sctx->chip_class >= GFX10)
29347ec681f3Smrg      sctx->framebuffer.dcc_overwrite_combiner_watermark = 6;
29357ec681f3Smrg   else
29367ec681f3Smrg      sctx->framebuffer.dcc_overwrite_combiner_watermark = 4;
29377ec681f3Smrg
29387ec681f3Smrg   struct si_texture *zstex = NULL;
29397ec681f3Smrg
29407ec681f3Smrg   if (state->zsbuf) {
29417ec681f3Smrg      surf = (struct si_surface *)state->zsbuf;
29427ec681f3Smrg      zstex = (struct si_texture *)surf->base.texture;
29437ec681f3Smrg
29447ec681f3Smrg      if (!surf->depth_initialized) {
29457ec681f3Smrg         si_init_depth_surface(sctx, surf);
29467ec681f3Smrg      }
29477ec681f3Smrg
29487ec681f3Smrg      if (vi_tc_compat_htile_enabled(zstex, surf->base.u.tex.level, PIPE_MASK_ZS))
29497ec681f3Smrg         sctx->framebuffer.DB_has_shader_readable_metadata = true;
29507ec681f3Smrg
29517ec681f3Smrg      si_context_add_resource_size(sctx, surf->base.texture);
29527ec681f3Smrg
29537ec681f3Smrg      /* Update the minimum but don't keep 0. */
29547ec681f3Smrg      if (!sctx->framebuffer.min_bytes_per_pixel ||
29557ec681f3Smrg          zstex->surface.bpe < sctx->framebuffer.min_bytes_per_pixel)
29567ec681f3Smrg         sctx->framebuffer.min_bytes_per_pixel = zstex->surface.bpe;
29577ec681f3Smrg   }
29587ec681f3Smrg
29597ec681f3Smrg   si_update_ps_colorbuf0_slot(sctx);
29607ec681f3Smrg   si_update_poly_offset_state(sctx);
29617ec681f3Smrg   si_mark_atom_dirty(sctx, &sctx->atoms.s.cb_render_state);
29627ec681f3Smrg   si_mark_atom_dirty(sctx, &sctx->atoms.s.framebuffer);
29637ec681f3Smrg
29647ec681f3Smrg   /* NGG cull state uses the sample count. */
29657ec681f3Smrg   if (sctx->screen->use_ngg_culling)
29667ec681f3Smrg      si_mark_atom_dirty(sctx, &sctx->atoms.s.ngg_cull_state);
29677ec681f3Smrg
29687ec681f3Smrg   if (sctx->screen->dpbb_allowed)
29697ec681f3Smrg      si_mark_atom_dirty(sctx, &sctx->atoms.s.dpbb_state);
29707ec681f3Smrg
29717ec681f3Smrg   if (sctx->framebuffer.any_dst_linear != old_any_dst_linear)
29727ec681f3Smrg      si_mark_atom_dirty(sctx, &sctx->atoms.s.msaa_config);
29737ec681f3Smrg
29747ec681f3Smrg   if (sctx->screen->has_out_of_order_rast &&
29757ec681f3Smrg       (sctx->framebuffer.colorbuf_enabled_4bit != old_colorbuf_enabled_4bit ||
29767ec681f3Smrg        !!sctx->framebuffer.state.zsbuf != old_has_zsbuf ||
29777ec681f3Smrg        (zstex && zstex->surface.has_stencil != old_has_stencil)))
29787ec681f3Smrg      si_mark_atom_dirty(sctx, &sctx->atoms.s.msaa_config);
29797ec681f3Smrg
29807ec681f3Smrg   if (sctx->framebuffer.nr_samples != old_nr_samples) {
29817ec681f3Smrg      struct pipe_constant_buffer constbuf = {0};
29827ec681f3Smrg
29837ec681f3Smrg      si_mark_atom_dirty(sctx, &sctx->atoms.s.msaa_config);
29847ec681f3Smrg      si_mark_atom_dirty(sctx, &sctx->atoms.s.db_render_state);
29857ec681f3Smrg
29867ec681f3Smrg      if (!sctx->sample_pos_buffer) {
29877ec681f3Smrg         sctx->sample_pos_buffer = pipe_buffer_create_with_data(&sctx->b, 0, PIPE_USAGE_DEFAULT,
29887ec681f3Smrg                                                      sizeof(sctx->sample_positions),
29897ec681f3Smrg                                                      &sctx->sample_positions);
29907ec681f3Smrg      }
29917ec681f3Smrg      constbuf.buffer = sctx->sample_pos_buffer;
29927ec681f3Smrg
29937ec681f3Smrg      /* Set sample locations as fragment shader constants. */
29947ec681f3Smrg      switch (sctx->framebuffer.nr_samples) {
29957ec681f3Smrg      case 1:
29967ec681f3Smrg         constbuf.buffer_offset = 0;
29977ec681f3Smrg         break;
29987ec681f3Smrg      case 2:
29997ec681f3Smrg         constbuf.buffer_offset =
30007ec681f3Smrg            (ubyte *)sctx->sample_positions.x2 - (ubyte *)sctx->sample_positions.x1;
30017ec681f3Smrg         break;
30027ec681f3Smrg      case 4:
30037ec681f3Smrg         constbuf.buffer_offset =
30047ec681f3Smrg            (ubyte *)sctx->sample_positions.x4 - (ubyte *)sctx->sample_positions.x1;
30057ec681f3Smrg         break;
30067ec681f3Smrg      case 8:
30077ec681f3Smrg         constbuf.buffer_offset =
30087ec681f3Smrg            (ubyte *)sctx->sample_positions.x8 - (ubyte *)sctx->sample_positions.x1;
30097ec681f3Smrg         break;
30107ec681f3Smrg      case 16:
30117ec681f3Smrg         constbuf.buffer_offset =
30127ec681f3Smrg            (ubyte *)sctx->sample_positions.x16 - (ubyte *)sctx->sample_positions.x1;
30137ec681f3Smrg         break;
30147ec681f3Smrg      default:
30157ec681f3Smrg         PRINT_ERR("Requested an invalid number of samples %i.\n", sctx->framebuffer.nr_samples);
30167ec681f3Smrg         assert(0);
30177ec681f3Smrg      }
30187ec681f3Smrg      constbuf.buffer_size = sctx->framebuffer.nr_samples * 2 * 4;
30197ec681f3Smrg      si_set_internal_const_buffer(sctx, SI_PS_CONST_SAMPLE_POSITIONS, &constbuf);
30207ec681f3Smrg
30217ec681f3Smrg      si_mark_atom_dirty(sctx, &sctx->atoms.s.msaa_sample_locs);
30227ec681f3Smrg   }
30237ec681f3Smrg
30247ec681f3Smrg   si_ps_key_update_framebuffer(sctx);
30257ec681f3Smrg   si_ps_key_update_framebuffer_blend(sctx);
30267ec681f3Smrg   si_ps_key_update_framebuffer_rasterizer_sample_shading(sctx);
30277ec681f3Smrg   si_update_ps_inputs_read_or_disabled(sctx);
30287ec681f3Smrg   sctx->do_update_shaders = true;
30297ec681f3Smrg
30307ec681f3Smrg   if (!sctx->decompression_enabled) {
30317ec681f3Smrg      /* Prevent textures decompression when the framebuffer state
30327ec681f3Smrg       * changes come from the decompression passes themselves.
30337ec681f3Smrg       */
30347ec681f3Smrg      sctx->need_check_render_feedback = true;
30357ec681f3Smrg   }
3036af69d88dSmrg}
3037af69d88dSmrg
303801e04c3fSmrgstatic void si_emit_framebuffer_state(struct si_context *sctx)
3039af69d88dSmrg{
30407ec681f3Smrg   struct radeon_cmdbuf *cs = &sctx->gfx_cs;
30417ec681f3Smrg   struct pipe_framebuffer_state *state = &sctx->framebuffer.state;
30427ec681f3Smrg   unsigned i, nr_cbufs = state->nr_cbufs;
30437ec681f3Smrg   struct si_texture *tex = NULL;
30447ec681f3Smrg   struct si_surface *cb = NULL;
30457ec681f3Smrg   unsigned cb_color_info = 0;
30467ec681f3Smrg
30477ec681f3Smrg   radeon_begin(cs);
30487ec681f3Smrg
30497ec681f3Smrg   /* Colorbuffers. */
30507ec681f3Smrg   for (i = 0; i < nr_cbufs; i++) {
30517ec681f3Smrg      uint64_t cb_color_base, cb_color_fmask, cb_color_cmask, cb_dcc_base;
30527ec681f3Smrg      unsigned cb_color_attrib;
30537ec681f3Smrg
30547ec681f3Smrg      if (!(sctx->framebuffer.dirty_cbufs & (1 << i)))
30557ec681f3Smrg         continue;
30567ec681f3Smrg
30577ec681f3Smrg      cb = (struct si_surface *)state->cbufs[i];
30587ec681f3Smrg      if (!cb) {
30597ec681f3Smrg         radeon_set_context_reg(R_028C70_CB_COLOR0_INFO + i * 0x3C,
30607ec681f3Smrg                                S_028C70_FORMAT(V_028C70_COLOR_INVALID));
30617ec681f3Smrg         continue;
30627ec681f3Smrg      }
30637ec681f3Smrg
30647ec681f3Smrg      tex = (struct si_texture *)cb->base.texture;
30657ec681f3Smrg      radeon_add_to_buffer_list(
30667ec681f3Smrg         sctx, &sctx->gfx_cs, &tex->buffer, RADEON_USAGE_READWRITE | RADEON_USAGE_NEEDS_IMPLICIT_SYNC,
30677ec681f3Smrg         tex->buffer.b.b.nr_samples > 1 ? RADEON_PRIO_COLOR_BUFFER_MSAA : RADEON_PRIO_COLOR_BUFFER);
30687ec681f3Smrg
30697ec681f3Smrg      if (tex->cmask_buffer && tex->cmask_buffer != &tex->buffer) {
30707ec681f3Smrg         radeon_add_to_buffer_list(sctx, &sctx->gfx_cs, tex->cmask_buffer,
30717ec681f3Smrg                                   RADEON_USAGE_READWRITE | RADEON_USAGE_NEEDS_IMPLICIT_SYNC,
30727ec681f3Smrg                                   RADEON_PRIO_SEPARATE_META);
30737ec681f3Smrg      }
30747ec681f3Smrg
30757ec681f3Smrg      /* Compute mutable surface parameters. */
30767ec681f3Smrg      cb_color_base = tex->buffer.gpu_address >> 8;
30777ec681f3Smrg      cb_color_fmask = 0;
30787ec681f3Smrg      cb_color_cmask = tex->cmask_base_address_reg;
30797ec681f3Smrg      cb_dcc_base = 0;
30807ec681f3Smrg      cb_color_info = cb->cb_color_info | tex->cb_color_info;
30817ec681f3Smrg      cb_color_attrib = cb->cb_color_attrib;
30827ec681f3Smrg
30837ec681f3Smrg      if (tex->swap_rgb_to_bgr) {
30847ec681f3Smrg         /* Swap R and B channels. */
30857ec681f3Smrg         static unsigned rgb_to_bgr[4] = {
30867ec681f3Smrg            [V_028C70_SWAP_STD] = V_028C70_SWAP_ALT,
30877ec681f3Smrg            [V_028C70_SWAP_ALT] = V_028C70_SWAP_STD,
30887ec681f3Smrg            [V_028C70_SWAP_STD_REV] = V_028C70_SWAP_ALT_REV,
30897ec681f3Smrg            [V_028C70_SWAP_ALT_REV] = V_028C70_SWAP_STD_REV,
30907ec681f3Smrg         };
30917ec681f3Smrg         unsigned swap = rgb_to_bgr[G_028C70_COMP_SWAP(cb_color_info)];
30927ec681f3Smrg
30937ec681f3Smrg         cb_color_info &= C_028C70_COMP_SWAP;
30947ec681f3Smrg         cb_color_info |= S_028C70_COMP_SWAP(swap);
30957ec681f3Smrg      }
30967ec681f3Smrg
30977ec681f3Smrg      if (cb->base.u.tex.level > 0)
30987ec681f3Smrg         cb_color_info &= C_028C70_FAST_CLEAR;
30997ec681f3Smrg
31007ec681f3Smrg      if (tex->surface.fmask_offset) {
31017ec681f3Smrg         cb_color_fmask = (tex->buffer.gpu_address + tex->surface.fmask_offset) >> 8;
31027ec681f3Smrg         cb_color_fmask |= tex->surface.fmask_tile_swizzle;
31037ec681f3Smrg      }
31047ec681f3Smrg
31057ec681f3Smrg      /* Set up DCC. */
31067ec681f3Smrg      if (vi_dcc_enabled(tex, cb->base.u.tex.level)) {
31077ec681f3Smrg         bool is_msaa_resolve_dst = state->cbufs[0] && state->cbufs[0]->texture->nr_samples > 1 &&
31087ec681f3Smrg                                    state->cbufs[1] == &cb->base &&
31097ec681f3Smrg                                    state->cbufs[1]->texture->nr_samples <= 1;
31107ec681f3Smrg
31117ec681f3Smrg         if (!is_msaa_resolve_dst)
31127ec681f3Smrg            cb_color_info |= S_028C70_DCC_ENABLE(1);
31137ec681f3Smrg
31147ec681f3Smrg         cb_dcc_base = (tex->buffer.gpu_address + tex->surface.meta_offset) >> 8;
31157ec681f3Smrg
31167ec681f3Smrg         unsigned dcc_tile_swizzle = tex->surface.tile_swizzle;
31177ec681f3Smrg         dcc_tile_swizzle &= ((1 << tex->surface.meta_alignment_log2) - 1) >> 8;
31187ec681f3Smrg         cb_dcc_base |= dcc_tile_swizzle;
31197ec681f3Smrg      }
31207ec681f3Smrg
31217ec681f3Smrg      if (sctx->chip_class >= GFX10) {
31227ec681f3Smrg         unsigned cb_color_attrib3;
31237ec681f3Smrg
31247ec681f3Smrg         /* Set mutable surface parameters. */
31257ec681f3Smrg         cb_color_base += tex->surface.u.gfx9.surf_offset >> 8;
31267ec681f3Smrg         cb_color_base |= tex->surface.tile_swizzle;
31277ec681f3Smrg         if (!tex->surface.fmask_offset)
31287ec681f3Smrg            cb_color_fmask = cb_color_base;
31297ec681f3Smrg         if (cb->base.u.tex.level > 0)
31307ec681f3Smrg            cb_color_cmask = cb_color_base;
31317ec681f3Smrg
31327ec681f3Smrg         cb_color_attrib3 = cb->cb_color_attrib3 |
31337ec681f3Smrg                            S_028EE0_COLOR_SW_MODE(tex->surface.u.gfx9.swizzle_mode) |
31347ec681f3Smrg                            S_028EE0_FMASK_SW_MODE(tex->surface.u.gfx9.color.fmask_swizzle_mode) |
31357ec681f3Smrg                            S_028EE0_CMASK_PIPE_ALIGNED(1) |
31367ec681f3Smrg                            S_028EE0_DCC_PIPE_ALIGNED(tex->surface.u.gfx9.color.dcc.pipe_aligned);
31377ec681f3Smrg
31387ec681f3Smrg         radeon_set_context_reg_seq(R_028C60_CB_COLOR0_BASE + i * 0x3C, 14);
31397ec681f3Smrg         radeon_emit(cb_color_base);             /* CB_COLOR0_BASE */
31407ec681f3Smrg         radeon_emit(0);                         /* hole */
31417ec681f3Smrg         radeon_emit(0);                         /* hole */
31427ec681f3Smrg         radeon_emit(cb->cb_color_view);         /* CB_COLOR0_VIEW */
31437ec681f3Smrg         radeon_emit(cb_color_info);             /* CB_COLOR0_INFO */
31447ec681f3Smrg         radeon_emit(cb_color_attrib);           /* CB_COLOR0_ATTRIB */
31457ec681f3Smrg         radeon_emit(cb->cb_dcc_control);        /* CB_COLOR0_DCC_CONTROL */
31467ec681f3Smrg         radeon_emit(cb_color_cmask);            /* CB_COLOR0_CMASK */
31477ec681f3Smrg         radeon_emit(0);                         /* hole */
31487ec681f3Smrg         radeon_emit(cb_color_fmask);            /* CB_COLOR0_FMASK */
31497ec681f3Smrg         radeon_emit(0);                         /* hole */
31507ec681f3Smrg         radeon_emit(tex->color_clear_value[0]); /* CB_COLOR0_CLEAR_WORD0 */
31517ec681f3Smrg         radeon_emit(tex->color_clear_value[1]); /* CB_COLOR0_CLEAR_WORD1 */
31527ec681f3Smrg         radeon_emit(cb_dcc_base);               /* CB_COLOR0_DCC_BASE */
31537ec681f3Smrg
31547ec681f3Smrg         radeon_set_context_reg(R_028E40_CB_COLOR0_BASE_EXT + i * 4, cb_color_base >> 32);
31557ec681f3Smrg         radeon_set_context_reg(R_028E60_CB_COLOR0_CMASK_BASE_EXT + i * 4,
31567ec681f3Smrg                                cb_color_cmask >> 32);
31577ec681f3Smrg         radeon_set_context_reg(R_028E80_CB_COLOR0_FMASK_BASE_EXT + i * 4,
31587ec681f3Smrg                                cb_color_fmask >> 32);
31597ec681f3Smrg         radeon_set_context_reg(R_028EA0_CB_COLOR0_DCC_BASE_EXT + i * 4, cb_dcc_base >> 32);
31607ec681f3Smrg         radeon_set_context_reg(R_028EC0_CB_COLOR0_ATTRIB2 + i * 4, cb->cb_color_attrib2);
31617ec681f3Smrg         radeon_set_context_reg(R_028EE0_CB_COLOR0_ATTRIB3 + i * 4, cb_color_attrib3);
31627ec681f3Smrg      } else if (sctx->chip_class == GFX9) {
31637ec681f3Smrg         struct gfx9_surf_meta_flags meta = {
31647ec681f3Smrg            .rb_aligned = 1,
31657ec681f3Smrg            .pipe_aligned = 1,
31667ec681f3Smrg         };
31677ec681f3Smrg
31687ec681f3Smrg         if (!tex->is_depth && tex->surface.meta_offset)
31697ec681f3Smrg            meta = tex->surface.u.gfx9.color.dcc;
31707ec681f3Smrg
31717ec681f3Smrg         /* Set mutable surface parameters. */
31727ec681f3Smrg         cb_color_base += tex->surface.u.gfx9.surf_offset >> 8;
31737ec681f3Smrg         cb_color_base |= tex->surface.tile_swizzle;
31747ec681f3Smrg         if (!tex->surface.fmask_offset)
31757ec681f3Smrg            cb_color_fmask = cb_color_base;
31767ec681f3Smrg         if (cb->base.u.tex.level > 0)
31777ec681f3Smrg            cb_color_cmask = cb_color_base;
31787ec681f3Smrg         cb_color_attrib |= S_028C74_COLOR_SW_MODE(tex->surface.u.gfx9.swizzle_mode) |
31797ec681f3Smrg                            S_028C74_FMASK_SW_MODE(tex->surface.u.gfx9.color.fmask_swizzle_mode) |
31807ec681f3Smrg                            S_028C74_RB_ALIGNED(meta.rb_aligned) |
31817ec681f3Smrg                            S_028C74_PIPE_ALIGNED(meta.pipe_aligned);
31827ec681f3Smrg
31837ec681f3Smrg         radeon_set_context_reg_seq(R_028C60_CB_COLOR0_BASE + i * 0x3C, 15);
31847ec681f3Smrg         radeon_emit(cb_color_base);                            /* CB_COLOR0_BASE */
31857ec681f3Smrg         radeon_emit(S_028C64_BASE_256B(cb_color_base >> 32));  /* CB_COLOR0_BASE_EXT */
31867ec681f3Smrg         radeon_emit(cb->cb_color_attrib2);                     /* CB_COLOR0_ATTRIB2 */
31877ec681f3Smrg         radeon_emit(cb->cb_color_view);                        /* CB_COLOR0_VIEW */
31887ec681f3Smrg         radeon_emit(cb_color_info);                            /* CB_COLOR0_INFO */
31897ec681f3Smrg         radeon_emit(cb_color_attrib);                          /* CB_COLOR0_ATTRIB */
31907ec681f3Smrg         radeon_emit(cb->cb_dcc_control);                       /* CB_COLOR0_DCC_CONTROL */
31917ec681f3Smrg         radeon_emit(cb_color_cmask);                           /* CB_COLOR0_CMASK */
31927ec681f3Smrg         radeon_emit(S_028C80_BASE_256B(cb_color_cmask >> 32)); /* CB_COLOR0_CMASK_BASE_EXT */
31937ec681f3Smrg         radeon_emit(cb_color_fmask);                           /* CB_COLOR0_FMASK */
31947ec681f3Smrg         radeon_emit(S_028C88_BASE_256B(cb_color_fmask >> 32)); /* CB_COLOR0_FMASK_BASE_EXT */
31957ec681f3Smrg         radeon_emit(tex->color_clear_value[0]);                /* CB_COLOR0_CLEAR_WORD0 */
31967ec681f3Smrg         radeon_emit(tex->color_clear_value[1]);                /* CB_COLOR0_CLEAR_WORD1 */
31977ec681f3Smrg         radeon_emit(cb_dcc_base);                              /* CB_COLOR0_DCC_BASE */
31987ec681f3Smrg         radeon_emit(S_028C98_BASE_256B(cb_dcc_base >> 32));    /* CB_COLOR0_DCC_BASE_EXT */
31997ec681f3Smrg
32007ec681f3Smrg         radeon_set_context_reg(R_0287A0_CB_MRT0_EPITCH + i * 4,
32017ec681f3Smrg                                S_0287A0_EPITCH(tex->surface.u.gfx9.epitch));
32027ec681f3Smrg      } else {
32037ec681f3Smrg         /* Compute mutable surface parameters (GFX6-GFX8). */
32047ec681f3Smrg         const struct legacy_surf_level *level_info =
32057ec681f3Smrg            &tex->surface.u.legacy.level[cb->base.u.tex.level];
32067ec681f3Smrg         unsigned pitch_tile_max, slice_tile_max, tile_mode_index;
32077ec681f3Smrg         unsigned cb_color_pitch, cb_color_slice, cb_color_fmask_slice;
32087ec681f3Smrg
32097ec681f3Smrg         cb_color_base += level_info->offset_256B;
32107ec681f3Smrg         /* Only macrotiled modes can set tile swizzle. */
32117ec681f3Smrg         if (level_info->mode == RADEON_SURF_MODE_2D)
32127ec681f3Smrg            cb_color_base |= tex->surface.tile_swizzle;
32137ec681f3Smrg
32147ec681f3Smrg         if (!tex->surface.fmask_offset)
32157ec681f3Smrg            cb_color_fmask = cb_color_base;
32167ec681f3Smrg         if (cb->base.u.tex.level > 0)
32177ec681f3Smrg            cb_color_cmask = cb_color_base;
32187ec681f3Smrg         if (cb_dcc_base)
32197ec681f3Smrg            cb_dcc_base += tex->surface.u.legacy.color.dcc_level[cb->base.u.tex.level].dcc_offset >> 8;
32207ec681f3Smrg
32217ec681f3Smrg         pitch_tile_max = level_info->nblk_x / 8 - 1;
32227ec681f3Smrg         slice_tile_max = level_info->nblk_x * level_info->nblk_y / 64 - 1;
32237ec681f3Smrg         tile_mode_index = si_tile_mode_index(tex, cb->base.u.tex.level, false);
32247ec681f3Smrg
32257ec681f3Smrg         cb_color_attrib |= S_028C74_TILE_MODE_INDEX(tile_mode_index);
32267ec681f3Smrg         cb_color_pitch = S_028C64_TILE_MAX(pitch_tile_max);
32277ec681f3Smrg         cb_color_slice = S_028C68_TILE_MAX(slice_tile_max);
32287ec681f3Smrg
32297ec681f3Smrg         if (tex->surface.fmask_offset) {
32307ec681f3Smrg            if (sctx->chip_class >= GFX7)
32317ec681f3Smrg               cb_color_pitch |=
32327ec681f3Smrg                  S_028C64_FMASK_TILE_MAX(tex->surface.u.legacy.color.fmask.pitch_in_pixels / 8 - 1);
32337ec681f3Smrg            cb_color_attrib |=
32347ec681f3Smrg               S_028C74_FMASK_TILE_MODE_INDEX(tex->surface.u.legacy.color.fmask.tiling_index);
32357ec681f3Smrg            cb_color_fmask_slice = S_028C88_TILE_MAX(tex->surface.u.legacy.color.fmask.slice_tile_max);
32367ec681f3Smrg         } else {
32377ec681f3Smrg            /* This must be set for fast clear to work without FMASK. */
32387ec681f3Smrg            if (sctx->chip_class >= GFX7)
32397ec681f3Smrg               cb_color_pitch |= S_028C64_FMASK_TILE_MAX(pitch_tile_max);
32407ec681f3Smrg            cb_color_attrib |= S_028C74_FMASK_TILE_MODE_INDEX(tile_mode_index);
32417ec681f3Smrg            cb_color_fmask_slice = S_028C88_TILE_MAX(slice_tile_max);
32427ec681f3Smrg         }
32437ec681f3Smrg
32447ec681f3Smrg         radeon_set_context_reg_seq(R_028C60_CB_COLOR0_BASE + i * 0x3C,
32457ec681f3Smrg                                    sctx->chip_class >= GFX8 ? 14 : 13);
32467ec681f3Smrg         radeon_emit(cb_color_base);                              /* CB_COLOR0_BASE */
32477ec681f3Smrg         radeon_emit(cb_color_pitch);                             /* CB_COLOR0_PITCH */
32487ec681f3Smrg         radeon_emit(cb_color_slice);                             /* CB_COLOR0_SLICE */
32497ec681f3Smrg         radeon_emit(cb->cb_color_view);                          /* CB_COLOR0_VIEW */
32507ec681f3Smrg         radeon_emit(cb_color_info);                              /* CB_COLOR0_INFO */
32517ec681f3Smrg         radeon_emit(cb_color_attrib);                            /* CB_COLOR0_ATTRIB */
32527ec681f3Smrg         radeon_emit(cb->cb_dcc_control);                         /* CB_COLOR0_DCC_CONTROL */
32537ec681f3Smrg         radeon_emit(cb_color_cmask);                             /* CB_COLOR0_CMASK */
32547ec681f3Smrg         radeon_emit(tex->surface.u.legacy.color.cmask_slice_tile_max); /* CB_COLOR0_CMASK_SLICE */
32557ec681f3Smrg         radeon_emit(cb_color_fmask);                             /* CB_COLOR0_FMASK */
32567ec681f3Smrg         radeon_emit(cb_color_fmask_slice);                       /* CB_COLOR0_FMASK_SLICE */
32577ec681f3Smrg         radeon_emit(tex->color_clear_value[0]);                  /* CB_COLOR0_CLEAR_WORD0 */
32587ec681f3Smrg         radeon_emit(tex->color_clear_value[1]);                  /* CB_COLOR0_CLEAR_WORD1 */
32597ec681f3Smrg
32607ec681f3Smrg         if (sctx->chip_class >= GFX8) /* R_028C94_CB_COLOR0_DCC_BASE */
32617ec681f3Smrg            radeon_emit(cb_dcc_base);
32627ec681f3Smrg      }
32637ec681f3Smrg   }
32647ec681f3Smrg   for (; i < 8; i++)
32657ec681f3Smrg      if (sctx->framebuffer.dirty_cbufs & (1 << i))
32667ec681f3Smrg         radeon_set_context_reg(R_028C70_CB_COLOR0_INFO + i * 0x3C, 0);
32677ec681f3Smrg
32687ec681f3Smrg   /* ZS buffer. */
32697ec681f3Smrg   if (state->zsbuf && sctx->framebuffer.dirty_zsbuf) {
32707ec681f3Smrg      struct si_surface *zb = (struct si_surface *)state->zsbuf;
32717ec681f3Smrg      struct si_texture *tex = (struct si_texture *)zb->base.texture;
32727ec681f3Smrg      unsigned db_z_info = zb->db_z_info;
32737ec681f3Smrg      unsigned db_stencil_info = zb->db_stencil_info;
32747ec681f3Smrg      unsigned db_htile_surface = zb->db_htile_surface;
32757ec681f3Smrg
32767ec681f3Smrg      radeon_add_to_buffer_list(sctx, &sctx->gfx_cs, &tex->buffer, RADEON_USAGE_READWRITE,
32777ec681f3Smrg                                zb->base.texture->nr_samples > 1 ? RADEON_PRIO_DEPTH_BUFFER_MSAA
32787ec681f3Smrg                                                                 : RADEON_PRIO_DEPTH_BUFFER);
32797ec681f3Smrg
32807ec681f3Smrg      /* Set fields dependent on tc_compatile_htile. */
32817ec681f3Smrg      if (sctx->chip_class >= GFX9 &&
32827ec681f3Smrg          vi_tc_compat_htile_enabled(tex, zb->base.u.tex.level, PIPE_MASK_ZS)) {
32837ec681f3Smrg         unsigned max_zplanes = 4;
32847ec681f3Smrg
32857ec681f3Smrg         if (tex->db_render_format == PIPE_FORMAT_Z16_UNORM && tex->buffer.b.b.nr_samples > 1)
32867ec681f3Smrg            max_zplanes = 2;
32877ec681f3Smrg
32887ec681f3Smrg         db_z_info |= S_028038_DECOMPRESS_ON_N_ZPLANES(max_zplanes + 1);
32897ec681f3Smrg
32907ec681f3Smrg         if (sctx->chip_class >= GFX10) {
32917ec681f3Smrg            db_z_info |= S_028040_ITERATE_FLUSH(1);
32927ec681f3Smrg            db_stencil_info |= S_028044_ITERATE_FLUSH(!tex->htile_stencil_disabled);
32937ec681f3Smrg         } else {
32947ec681f3Smrg            db_z_info |= S_028038_ITERATE_FLUSH(1);
32957ec681f3Smrg            db_stencil_info |= S_02803C_ITERATE_FLUSH(1);
32967ec681f3Smrg         }
32977ec681f3Smrg      }
32987ec681f3Smrg
32997ec681f3Smrg      unsigned level = zb->base.u.tex.level;
33007ec681f3Smrg
33017ec681f3Smrg      if (sctx->chip_class >= GFX10) {
33027ec681f3Smrg         radeon_set_context_reg(R_028014_DB_HTILE_DATA_BASE, zb->db_htile_data_base);
33037ec681f3Smrg         radeon_set_context_reg(R_02801C_DB_DEPTH_SIZE_XY, zb->db_depth_size);
33047ec681f3Smrg
33057ec681f3Smrg         radeon_set_context_reg_seq(R_02803C_DB_DEPTH_INFO, 7);
33067ec681f3Smrg         radeon_emit(S_02803C_RESOURCE_LEVEL(1)); /* DB_DEPTH_INFO */
33077ec681f3Smrg         radeon_emit(db_z_info |                  /* DB_Z_INFO */
33087ec681f3Smrg                     S_028038_ZRANGE_PRECISION(tex->depth_clear_value[level] != 0));
33097ec681f3Smrg         radeon_emit(db_stencil_info);     /* DB_STENCIL_INFO */
33107ec681f3Smrg         radeon_emit(zb->db_depth_base);   /* DB_Z_READ_BASE */
33117ec681f3Smrg         radeon_emit(zb->db_stencil_base); /* DB_STENCIL_READ_BASE */
33127ec681f3Smrg         radeon_emit(zb->db_depth_base);   /* DB_Z_WRITE_BASE */
33137ec681f3Smrg         radeon_emit(zb->db_stencil_base); /* DB_STENCIL_WRITE_BASE */
33147ec681f3Smrg
33157ec681f3Smrg         radeon_set_context_reg_seq(R_028068_DB_Z_READ_BASE_HI, 5);
33167ec681f3Smrg         radeon_emit(zb->db_depth_base >> 32);      /* DB_Z_READ_BASE_HI */
33177ec681f3Smrg         radeon_emit(zb->db_stencil_base >> 32);    /* DB_STENCIL_READ_BASE_HI */
33187ec681f3Smrg         radeon_emit(zb->db_depth_base >> 32);      /* DB_Z_WRITE_BASE_HI */
33197ec681f3Smrg         radeon_emit(zb->db_stencil_base >> 32);    /* DB_STENCIL_WRITE_BASE_HI */
33207ec681f3Smrg         radeon_emit(zb->db_htile_data_base >> 32); /* DB_HTILE_DATA_BASE_HI */
33217ec681f3Smrg      } else if (sctx->chip_class == GFX9) {
33227ec681f3Smrg         radeon_set_context_reg_seq(R_028014_DB_HTILE_DATA_BASE, 3);
33237ec681f3Smrg         radeon_emit(zb->db_htile_data_base); /* DB_HTILE_DATA_BASE */
33247ec681f3Smrg         radeon_emit(S_028018_BASE_HI(zb->db_htile_data_base >> 32)); /* DB_HTILE_DATA_BASE_HI */
33257ec681f3Smrg         radeon_emit(zb->db_depth_size);                          /* DB_DEPTH_SIZE */
33267ec681f3Smrg
33277ec681f3Smrg         radeon_set_context_reg_seq(R_028038_DB_Z_INFO, 10);
33287ec681f3Smrg         radeon_emit(db_z_info |                                   /* DB_Z_INFO */
33297ec681f3Smrg                     S_028038_ZRANGE_PRECISION(tex->depth_clear_value[level] != 0));
33307ec681f3Smrg         radeon_emit(db_stencil_info);                             /* DB_STENCIL_INFO */
33317ec681f3Smrg         radeon_emit(zb->db_depth_base);                           /* DB_Z_READ_BASE */
33327ec681f3Smrg         radeon_emit(S_028044_BASE_HI(zb->db_depth_base >> 32));   /* DB_Z_READ_BASE_HI */
33337ec681f3Smrg         radeon_emit(zb->db_stencil_base);                         /* DB_STENCIL_READ_BASE */
33347ec681f3Smrg         radeon_emit(S_02804C_BASE_HI(zb->db_stencil_base >> 32)); /* DB_STENCIL_READ_BASE_HI */
33357ec681f3Smrg         radeon_emit(zb->db_depth_base);                           /* DB_Z_WRITE_BASE */
33367ec681f3Smrg         radeon_emit(S_028054_BASE_HI(zb->db_depth_base >> 32));   /* DB_Z_WRITE_BASE_HI */
33377ec681f3Smrg         radeon_emit(zb->db_stencil_base);                         /* DB_STENCIL_WRITE_BASE */
33387ec681f3Smrg         radeon_emit(S_02805C_BASE_HI(zb->db_stencil_base >> 32)); /* DB_STENCIL_WRITE_BASE_HI */
33397ec681f3Smrg
33407ec681f3Smrg         radeon_set_context_reg_seq(R_028068_DB_Z_INFO2, 2);
33417ec681f3Smrg         radeon_emit(zb->db_z_info2);       /* DB_Z_INFO2 */
33427ec681f3Smrg         radeon_emit(zb->db_stencil_info2); /* DB_STENCIL_INFO2 */
33437ec681f3Smrg      } else {
33447ec681f3Smrg         /* GFX6-GFX8 */
33457ec681f3Smrg         /* Set fields dependent on tc_compatile_htile. */
33467ec681f3Smrg         if (si_htile_enabled(tex, zb->base.u.tex.level, PIPE_MASK_ZS)) {
33477ec681f3Smrg            if (tex->tc_compatible_htile) {
33487ec681f3Smrg               db_htile_surface |= S_028ABC_TC_COMPATIBLE(1);
33497ec681f3Smrg
33507ec681f3Smrg               /* 0 = full compression. N = only compress up to N-1 Z planes. */
33517ec681f3Smrg               if (tex->buffer.b.b.nr_samples <= 1)
33527ec681f3Smrg                  db_z_info |= S_028040_DECOMPRESS_ON_N_ZPLANES(5);
33537ec681f3Smrg               else if (tex->buffer.b.b.nr_samples <= 4)
33547ec681f3Smrg                  db_z_info |= S_028040_DECOMPRESS_ON_N_ZPLANES(3);
33557ec681f3Smrg               else
33567ec681f3Smrg                  db_z_info |= S_028040_DECOMPRESS_ON_N_ZPLANES(2);
33577ec681f3Smrg            }
33587ec681f3Smrg         }
33597ec681f3Smrg
33607ec681f3Smrg         radeon_set_context_reg(R_028014_DB_HTILE_DATA_BASE, zb->db_htile_data_base);
33617ec681f3Smrg
33627ec681f3Smrg         radeon_set_context_reg_seq(R_02803C_DB_DEPTH_INFO, 9);
33637ec681f3Smrg         radeon_emit(zb->db_depth_info |   /* DB_DEPTH_INFO */
33647ec681f3Smrg                     S_02803C_ADDR5_SWIZZLE_MASK(!tex->tc_compatible_htile));
33657ec681f3Smrg         radeon_emit(db_z_info |           /* DB_Z_INFO */
33667ec681f3Smrg                     S_028040_ZRANGE_PRECISION(tex->depth_clear_value[level] != 0));
33677ec681f3Smrg         radeon_emit(db_stencil_info);     /* DB_STENCIL_INFO */
33687ec681f3Smrg         radeon_emit(zb->db_depth_base);   /* DB_Z_READ_BASE */
33697ec681f3Smrg         radeon_emit(zb->db_stencil_base); /* DB_STENCIL_READ_BASE */
33707ec681f3Smrg         radeon_emit(zb->db_depth_base);   /* DB_Z_WRITE_BASE */
33717ec681f3Smrg         radeon_emit(zb->db_stencil_base); /* DB_STENCIL_WRITE_BASE */
33727ec681f3Smrg         radeon_emit(zb->db_depth_size);   /* DB_DEPTH_SIZE */
33737ec681f3Smrg         radeon_emit(zb->db_depth_slice);  /* DB_DEPTH_SLICE */
33747ec681f3Smrg      }
33757ec681f3Smrg
33767ec681f3Smrg      radeon_set_context_reg_seq(R_028028_DB_STENCIL_CLEAR, 2);
33777ec681f3Smrg      radeon_emit(tex->stencil_clear_value[level]);    /* R_028028_DB_STENCIL_CLEAR */
33787ec681f3Smrg      radeon_emit(fui(tex->depth_clear_value[level])); /* R_02802C_DB_DEPTH_CLEAR */
33797ec681f3Smrg
33807ec681f3Smrg      radeon_set_context_reg(R_028008_DB_DEPTH_VIEW, zb->db_depth_view);
33817ec681f3Smrg      radeon_set_context_reg(R_028ABC_DB_HTILE_SURFACE, db_htile_surface);
33827ec681f3Smrg   } else if (sctx->framebuffer.dirty_zsbuf) {
33837ec681f3Smrg      if (sctx->chip_class == GFX9)
33847ec681f3Smrg         radeon_set_context_reg_seq(R_028038_DB_Z_INFO, 2);
33857ec681f3Smrg      else
33867ec681f3Smrg         radeon_set_context_reg_seq(R_028040_DB_Z_INFO, 2);
33877ec681f3Smrg
33887ec681f3Smrg      radeon_emit(S_028040_FORMAT(V_028040_Z_INVALID));       /* DB_Z_INFO */
33897ec681f3Smrg      radeon_emit(S_028044_FORMAT(V_028044_STENCIL_INVALID)); /* DB_STENCIL_INFO */
33907ec681f3Smrg   }
33917ec681f3Smrg
33927ec681f3Smrg   /* Framebuffer dimensions. */
33937ec681f3Smrg   /* PA_SC_WINDOW_SCISSOR_TL is set in si_init_cs_preamble_state */
33947ec681f3Smrg   radeon_set_context_reg(R_028208_PA_SC_WINDOW_SCISSOR_BR,
33957ec681f3Smrg                          S_028208_BR_X(state->width) | S_028208_BR_Y(state->height));
33967ec681f3Smrg
33977ec681f3Smrg   if (sctx->screen->dpbb_allowed) {
33987ec681f3Smrg      radeon_emit(PKT3(PKT3_EVENT_WRITE, 0, 0));
33997ec681f3Smrg      radeon_emit(EVENT_TYPE(V_028A90_BREAK_BATCH) | EVENT_INDEX(0));
34007ec681f3Smrg   }
34017ec681f3Smrg   radeon_end();
34027ec681f3Smrg
34037ec681f3Smrg   si_update_display_dcc_dirty(sctx);
34047ec681f3Smrg
34057ec681f3Smrg   sctx->framebuffer.dirty_cbufs = 0;
34067ec681f3Smrg   sctx->framebuffer.dirty_zsbuf = false;
3407af69d88dSmrg}
3408af69d88dSmrg
340901e04c3fSmrgstatic void si_emit_msaa_sample_locs(struct si_context *sctx)
3410af69d88dSmrg{
34117ec681f3Smrg   struct radeon_cmdbuf *cs = &sctx->gfx_cs;
34127ec681f3Smrg   struct si_state_rasterizer *rs = sctx->queued.named.rasterizer;
34137ec681f3Smrg   unsigned nr_samples = sctx->framebuffer.nr_samples;
34147ec681f3Smrg   bool has_msaa_sample_loc_bug = sctx->screen->info.has_msaa_sample_loc_bug;
34157ec681f3Smrg
34167ec681f3Smrg   /* Smoothing (only possible with nr_samples == 1) uses the same
34177ec681f3Smrg    * sample locations as the MSAA it simulates.
34187ec681f3Smrg    */
34197ec681f3Smrg   if (nr_samples <= 1 && sctx->smoothing_enabled)
34207ec681f3Smrg      nr_samples = SI_NUM_SMOOTH_AA_SAMPLES;
34217ec681f3Smrg
34227ec681f3Smrg   /* On Polaris, the small primitive filter uses the sample locations
34237ec681f3Smrg    * even when MSAA is off, so we need to make sure they're set to 0.
34247ec681f3Smrg    *
34257ec681f3Smrg    * GFX10 uses sample locations unconditionally, so they always need
34267ec681f3Smrg    * to be set up.
34277ec681f3Smrg    */
34287ec681f3Smrg   if ((nr_samples >= 2 || has_msaa_sample_loc_bug || sctx->chip_class >= GFX10) &&
34297ec681f3Smrg       nr_samples != sctx->sample_locs_num_samples) {
34307ec681f3Smrg      sctx->sample_locs_num_samples = nr_samples;
34317ec681f3Smrg      si_emit_sample_locations(cs, nr_samples);
34327ec681f3Smrg   }
34337ec681f3Smrg
34347ec681f3Smrg   radeon_begin(cs);
34357ec681f3Smrg
34367ec681f3Smrg   if (sctx->family >= CHIP_POLARIS10) {
34377ec681f3Smrg      unsigned small_prim_filter_cntl =
34387ec681f3Smrg         S_028830_SMALL_PRIM_FILTER_ENABLE(1) |
34397ec681f3Smrg         /* line bug */
34407ec681f3Smrg         S_028830_LINE_FILTER_DISABLE(sctx->family <= CHIP_POLARIS12);
34417ec681f3Smrg
34427ec681f3Smrg      /* For hardware with the sample location bug, the problem is that in order to use the small
34437ec681f3Smrg       * primitive filter, we need to explicitly set the sample locations to 0. But the DB doesn't
34447ec681f3Smrg       * properly process the change of sample locations without a flush, and so we can end up
34457ec681f3Smrg       * with incorrect Z values.
34467ec681f3Smrg       *
34477ec681f3Smrg       * Instead of doing a flush, just disable the small primitive filter when MSAA is
34487ec681f3Smrg       * force-disabled.
34497ec681f3Smrg       *
34507ec681f3Smrg       * The alternative of setting sample locations to 0 would require a DB flush to avoid
34517ec681f3Smrg       * Z errors, see https://bugs.freedesktop.org/show_bug.cgi?id=96908
34527ec681f3Smrg       */
34537ec681f3Smrg      if (has_msaa_sample_loc_bug && sctx->framebuffer.nr_samples > 1 && !rs->multisample_enable)
34547ec681f3Smrg         small_prim_filter_cntl &= C_028830_SMALL_PRIM_FILTER_ENABLE;
34557ec681f3Smrg
34567ec681f3Smrg      radeon_opt_set_context_reg(sctx, R_028830_PA_SU_SMALL_PRIM_FILTER_CNTL,
34577ec681f3Smrg                                 SI_TRACKED_PA_SU_SMALL_PRIM_FILTER_CNTL, small_prim_filter_cntl);
34587ec681f3Smrg   }
34597ec681f3Smrg
34607ec681f3Smrg   /* The exclusion bits can be set to improve rasterization efficiency
34617ec681f3Smrg    * if no sample lies on the pixel boundary (-8 sample offset).
34627ec681f3Smrg    */
34637ec681f3Smrg   bool exclusion = sctx->chip_class >= GFX7 && (!rs->multisample_enable || nr_samples != 16);
34647ec681f3Smrg   radeon_opt_set_context_reg(
34657ec681f3Smrg      sctx, R_02882C_PA_SU_PRIM_FILTER_CNTL, SI_TRACKED_PA_SU_PRIM_FILTER_CNTL,
34667ec681f3Smrg      S_02882C_XMAX_RIGHT_EXCLUSION(exclusion) | S_02882C_YMAX_BOTTOM_EXCLUSION(exclusion));
34677ec681f3Smrg   radeon_end();
3468af69d88dSmrg}
3469af69d88dSmrg
347001e04c3fSmrgstatic bool si_out_of_order_rasterization(struct si_context *sctx)
3471af69d88dSmrg{
34727ec681f3Smrg   struct si_state_blend *blend = sctx->queued.named.blend;
34737ec681f3Smrg   struct si_state_dsa *dsa = sctx->queued.named.dsa;
34747ec681f3Smrg
34757ec681f3Smrg   if (!sctx->screen->has_out_of_order_rast)
34767ec681f3Smrg      return false;
34777ec681f3Smrg
34787ec681f3Smrg   unsigned colormask = sctx->framebuffer.colorbuf_enabled_4bit;
34797ec681f3Smrg
34807ec681f3Smrg   colormask &= blend->cb_target_enabled_4bit;
34817ec681f3Smrg
34827ec681f3Smrg   /* Conservative: No logic op. */
34837ec681f3Smrg   if (colormask && blend->logicop_enable)
34847ec681f3Smrg      return false;
34857ec681f3Smrg
34867ec681f3Smrg   struct si_dsa_order_invariance dsa_order_invariant = {.zs = true,
34877ec681f3Smrg                                                         .pass_set = true,
34887ec681f3Smrg                                                         .pass_last = false};
34897ec681f3Smrg
34907ec681f3Smrg   if (sctx->framebuffer.state.zsbuf) {
34917ec681f3Smrg      struct si_texture *zstex = (struct si_texture *)sctx->framebuffer.state.zsbuf->texture;
34927ec681f3Smrg      bool has_stencil = zstex->surface.has_stencil;
34937ec681f3Smrg      dsa_order_invariant = dsa->order_invariance[has_stencil];
34947ec681f3Smrg      if (!dsa_order_invariant.zs)
34957ec681f3Smrg         return false;
34967ec681f3Smrg
34977ec681f3Smrg      /* The set of PS invocations is always order invariant,
34987ec681f3Smrg       * except when early Z/S tests are requested. */
34997ec681f3Smrg      if (sctx->shader.ps.cso && sctx->shader.ps.cso->info.base.writes_memory &&
35007ec681f3Smrg          sctx->shader.ps.cso->info.base.fs.early_fragment_tests &&
35017ec681f3Smrg          !dsa_order_invariant.pass_set)
35027ec681f3Smrg         return false;
35037ec681f3Smrg
35047ec681f3Smrg      if (sctx->num_perfect_occlusion_queries != 0 && !dsa_order_invariant.pass_set)
35057ec681f3Smrg         return false;
35067ec681f3Smrg   }
35077ec681f3Smrg
35087ec681f3Smrg   if (!colormask)
35097ec681f3Smrg      return true;
35107ec681f3Smrg
35117ec681f3Smrg   unsigned blendmask = colormask & blend->blend_enable_4bit;
35127ec681f3Smrg
35137ec681f3Smrg   if (blendmask) {
35147ec681f3Smrg      /* Only commutative blending. */
35157ec681f3Smrg      if (blendmask & ~blend->commutative_4bit)
35167ec681f3Smrg         return false;
35177ec681f3Smrg
35187ec681f3Smrg      if (!dsa_order_invariant.pass_set)
35197ec681f3Smrg         return false;
35207ec681f3Smrg   }
35217ec681f3Smrg
35227ec681f3Smrg   if (colormask & ~blendmask) {
35237ec681f3Smrg      if (!dsa_order_invariant.pass_last)
35247ec681f3Smrg         return false;
35257ec681f3Smrg   }
35267ec681f3Smrg
35277ec681f3Smrg   return true;
3528af69d88dSmrg}
3529af69d88dSmrg
353001e04c3fSmrgstatic void si_emit_msaa_config(struct si_context *sctx)
3531af69d88dSmrg{
35327ec681f3Smrg   struct radeon_cmdbuf *cs = &sctx->gfx_cs;
35337ec681f3Smrg   unsigned num_tile_pipes = sctx->screen->info.num_tile_pipes;
35347ec681f3Smrg   /* 33% faster rendering to linear color buffers */
35357ec681f3Smrg   bool dst_is_linear = sctx->framebuffer.any_dst_linear;
35367ec681f3Smrg   bool out_of_order_rast = si_out_of_order_rasterization(sctx);
35377ec681f3Smrg   unsigned sc_mode_cntl_1 =
35387ec681f3Smrg      S_028A4C_WALK_SIZE(dst_is_linear) | S_028A4C_WALK_FENCE_ENABLE(!dst_is_linear) |
35397ec681f3Smrg      S_028A4C_WALK_FENCE_SIZE(num_tile_pipes == 2 ? 2 : 3) |
35407ec681f3Smrg      S_028A4C_OUT_OF_ORDER_PRIMITIVE_ENABLE(out_of_order_rast) |
35417ec681f3Smrg      S_028A4C_OUT_OF_ORDER_WATER_MARK(0x7) |
35427ec681f3Smrg      /* always 1: */
35437ec681f3Smrg      S_028A4C_WALK_ALIGN8_PRIM_FITS_ST(1) | S_028A4C_SUPERTILE_WALK_ORDER_ENABLE(1) |
35447ec681f3Smrg      S_028A4C_TILE_WALK_ORDER_ENABLE(1) | S_028A4C_MULTI_SHADER_ENGINE_PRIM_DISCARD_ENABLE(1) |
35457ec681f3Smrg      S_028A4C_FORCE_EOV_CNTDWN_ENABLE(1) | S_028A4C_FORCE_EOV_REZ_ENABLE(1);
35467ec681f3Smrg   unsigned db_eqaa = S_028804_HIGH_QUALITY_INTERSECTIONS(1) | S_028804_INCOHERENT_EQAA_READS(1) |
35477ec681f3Smrg                      S_028804_INTERPOLATE_COMP_Z(1) | S_028804_STATIC_ANCHOR_ASSOCIATIONS(1);
35487ec681f3Smrg   unsigned coverage_samples, color_samples, z_samples;
35497ec681f3Smrg   struct si_state_rasterizer *rs = sctx->queued.named.rasterizer;
35507ec681f3Smrg
35517ec681f3Smrg   /* S: Coverage samples (up to 16x):
35527ec681f3Smrg    * - Scan conversion samples (PA_SC_AA_CONFIG.MSAA_NUM_SAMPLES)
35537ec681f3Smrg    * - CB FMASK samples (CB_COLORi_ATTRIB.NUM_SAMPLES)
35547ec681f3Smrg    *
35557ec681f3Smrg    * Z: Z/S samples (up to 8x, must be <= coverage samples and >= color samples):
35567ec681f3Smrg    * - Value seen by DB (DB_Z_INFO.NUM_SAMPLES)
35577ec681f3Smrg    * - Value seen by CB, must be correct even if Z/S is unbound (DB_EQAA.MAX_ANCHOR_SAMPLES)
35587ec681f3Smrg    * # Missing samples are derived from Z planes if Z is compressed (up to 16x quality), or
35597ec681f3Smrg    * # from the closest defined sample if Z is uncompressed (same quality as the number of
35607ec681f3Smrg    * # Z samples).
35617ec681f3Smrg    *
35627ec681f3Smrg    * F: Color samples (up to 8x, must be <= coverage samples):
35637ec681f3Smrg    * - CB color samples (CB_COLORi_ATTRIB.NUM_FRAGMENTS)
35647ec681f3Smrg    * - PS iter samples (DB_EQAA.PS_ITER_SAMPLES)
35657ec681f3Smrg    *
35667ec681f3Smrg    * Can be anything between coverage and color samples:
35677ec681f3Smrg    * - SampleMaskIn samples (PA_SC_AA_CONFIG.MSAA_EXPOSED_SAMPLES)
35687ec681f3Smrg    * - SampleMaskOut samples (DB_EQAA.MASK_EXPORT_NUM_SAMPLES)
35697ec681f3Smrg    * - Alpha-to-coverage samples (DB_EQAA.ALPHA_TO_MASK_NUM_SAMPLES)
35707ec681f3Smrg    * - Occlusion query samples (DB_COUNT_CONTROL.SAMPLE_RATE)
35717ec681f3Smrg    * # All are currently set the same as coverage samples.
35727ec681f3Smrg    *
35737ec681f3Smrg    * If color samples < coverage samples, FMASK has a higher bpp to store an "unknown"
35747ec681f3Smrg    * flag for undefined color samples. A shader-based resolve must handle unknowns
35757ec681f3Smrg    * or mask them out with AND. Unknowns can also be guessed from neighbors via
35767ec681f3Smrg    * an edge-detect shader-based resolve, which is required to make "color samples = 1"
35777ec681f3Smrg    * useful. The CB resolve always drops unknowns.
35787ec681f3Smrg    *
35797ec681f3Smrg    * Sensible AA configurations:
35807ec681f3Smrg    *   EQAA 16s 8z 8f - might look the same as 16x MSAA if Z is compressed
35817ec681f3Smrg    *   EQAA 16s 8z 4f - might look the same as 16x MSAA if Z is compressed
35827ec681f3Smrg    *   EQAA 16s 4z 4f - might look the same as 16x MSAA if Z is compressed
35837ec681f3Smrg    *   EQAA  8s 8z 8f = 8x MSAA
35847ec681f3Smrg    *   EQAA  8s 8z 4f - might look the same as 8x MSAA
35857ec681f3Smrg    *   EQAA  8s 8z 2f - might look the same as 8x MSAA with low-density geometry
35867ec681f3Smrg    *   EQAA  8s 4z 4f - might look the same as 8x MSAA if Z is compressed
35877ec681f3Smrg    *   EQAA  8s 4z 2f - might look the same as 8x MSAA with low-density geometry if Z is compressed
35887ec681f3Smrg    *   EQAA  4s 4z 4f = 4x MSAA
35897ec681f3Smrg    *   EQAA  4s 4z 2f - might look the same as 4x MSAA with low-density geometry
35907ec681f3Smrg    *   EQAA  2s 2z 2f = 2x MSAA
35917ec681f3Smrg    */
35927ec681f3Smrg   coverage_samples = color_samples = z_samples = si_get_num_coverage_samples(sctx);
35937ec681f3Smrg
35947ec681f3Smrg   if (sctx->framebuffer.nr_samples > 1 && rs->multisample_enable) {
35957ec681f3Smrg      color_samples = sctx->framebuffer.nr_color_samples;
35967ec681f3Smrg
35977ec681f3Smrg      if (sctx->framebuffer.state.zsbuf) {
35987ec681f3Smrg         z_samples = sctx->framebuffer.state.zsbuf->texture->nr_samples;
35997ec681f3Smrg         z_samples = MAX2(1, z_samples);
36007ec681f3Smrg      } else {
36017ec681f3Smrg         z_samples = coverage_samples;
36027ec681f3Smrg      }
36037ec681f3Smrg   }
36047ec681f3Smrg
36057ec681f3Smrg   /* The DX10 diamond test is optional in GL and decreases line rasterization
36067ec681f3Smrg    * performance, so don't use it.
36077ec681f3Smrg    *
36087ec681f3Smrg    * TODO: We should also enable perpendicular endcaps for AA lines,
36097ec681f3Smrg    *       but that requires implementing line stippling in the pixel
36107ec681f3Smrg    *       shader. SC can only do line stippling with axis-aligned
36117ec681f3Smrg    *       endcaps.
36127ec681f3Smrg    */
36137ec681f3Smrg   unsigned sc_line_cntl = 0;
36147ec681f3Smrg   unsigned sc_aa_config = 0;
36157ec681f3Smrg
36167ec681f3Smrg   if (coverage_samples > 1) {
36177ec681f3Smrg      /* distance from the pixel center, indexed by log2(nr_samples) */
36187ec681f3Smrg      static unsigned max_dist[] = {
36197ec681f3Smrg         0, /* unused */
36207ec681f3Smrg         4, /* 2x MSAA */
36217ec681f3Smrg         6, /* 4x MSAA */
36227ec681f3Smrg         7, /* 8x MSAA */
36237ec681f3Smrg         8, /* 16x MSAA */
36247ec681f3Smrg      };
36257ec681f3Smrg      unsigned log_samples = util_logbase2(coverage_samples);
36267ec681f3Smrg      unsigned log_z_samples = util_logbase2(z_samples);
36277ec681f3Smrg      unsigned ps_iter_samples = si_get_ps_iter_samples(sctx);
36287ec681f3Smrg      unsigned log_ps_iter_samples = util_logbase2(ps_iter_samples);
36297ec681f3Smrg
36307ec681f3Smrg      sc_line_cntl |= S_028BDC_EXPAND_LINE_WIDTH(1);
36317ec681f3Smrg      sc_aa_config = S_028BE0_MSAA_NUM_SAMPLES(log_samples) |
36327ec681f3Smrg                     S_028BE0_MAX_SAMPLE_DIST(max_dist[log_samples]) |
36337ec681f3Smrg                     S_028BE0_MSAA_EXPOSED_SAMPLES(log_samples) |
36347ec681f3Smrg                     S_028BE0_COVERED_CENTROID_IS_CENTER(sctx->chip_class >= GFX10_3);
36357ec681f3Smrg
36367ec681f3Smrg      if (sctx->framebuffer.nr_samples > 1) {
36377ec681f3Smrg         db_eqaa |= S_028804_MAX_ANCHOR_SAMPLES(log_z_samples) |
36387ec681f3Smrg                    S_028804_PS_ITER_SAMPLES(log_ps_iter_samples) |
36397ec681f3Smrg                    S_028804_MASK_EXPORT_NUM_SAMPLES(log_samples) |
36407ec681f3Smrg                    S_028804_ALPHA_TO_MASK_NUM_SAMPLES(log_samples);
36417ec681f3Smrg         sc_mode_cntl_1 |= S_028A4C_PS_ITER_SAMPLE(ps_iter_samples > 1);
36427ec681f3Smrg      } else if (sctx->smoothing_enabled) {
36437ec681f3Smrg         db_eqaa |= S_028804_OVERRASTERIZATION_AMOUNT(log_samples);
36447ec681f3Smrg      }
36457ec681f3Smrg   }
36467ec681f3Smrg
36477ec681f3Smrg   radeon_begin(cs);
36487ec681f3Smrg
36497ec681f3Smrg   /* R_028BDC_PA_SC_LINE_CNTL, R_028BE0_PA_SC_AA_CONFIG */
36507ec681f3Smrg   radeon_opt_set_context_reg2(sctx, R_028BDC_PA_SC_LINE_CNTL, SI_TRACKED_PA_SC_LINE_CNTL,
36517ec681f3Smrg                               sc_line_cntl, sc_aa_config);
36527ec681f3Smrg   /* R_028804_DB_EQAA */
36537ec681f3Smrg   radeon_opt_set_context_reg(sctx, R_028804_DB_EQAA, SI_TRACKED_DB_EQAA, db_eqaa);
36547ec681f3Smrg   /* R_028A4C_PA_SC_MODE_CNTL_1 */
36557ec681f3Smrg   radeon_opt_set_context_reg(sctx, R_028A4C_PA_SC_MODE_CNTL_1, SI_TRACKED_PA_SC_MODE_CNTL_1,
36567ec681f3Smrg                              sc_mode_cntl_1);
36577ec681f3Smrg   radeon_end_update_context_roll(sctx);
3658af69d88dSmrg}
3659af69d88dSmrg
366001e04c3fSmrgvoid si_update_ps_iter_samples(struct si_context *sctx)
3661af69d88dSmrg{
36627ec681f3Smrg   if (sctx->framebuffer.nr_samples > 1)
36637ec681f3Smrg      si_mark_atom_dirty(sctx, &sctx->atoms.s.msaa_config);
36647ec681f3Smrg   if (sctx->screen->dpbb_allowed)
36657ec681f3Smrg      si_mark_atom_dirty(sctx, &sctx->atoms.s.dpbb_state);
3666af69d88dSmrg}
3667af69d88dSmrg
366801e04c3fSmrgstatic void si_set_min_samples(struct pipe_context *ctx, unsigned min_samples)
3669af69d88dSmrg{
36707ec681f3Smrg   struct si_context *sctx = (struct si_context *)ctx;
36717ec681f3Smrg
36727ec681f3Smrg   /* The hardware can only do sample shading with 2^n samples. */
36737ec681f3Smrg   min_samples = util_next_power_of_two(min_samples);
3674af69d88dSmrg
36757ec681f3Smrg   if (sctx->ps_iter_samples == min_samples)
36767ec681f3Smrg      return;
3677af69d88dSmrg
36787ec681f3Smrg   sctx->ps_iter_samples = min_samples;
3679af69d88dSmrg
36807ec681f3Smrg   si_ps_key_update_sample_shading(sctx);
36817ec681f3Smrg   si_ps_key_update_framebuffer_rasterizer_sample_shading(sctx);
36827ec681f3Smrg   sctx->do_update_shaders = true;
3683af69d88dSmrg
36847ec681f3Smrg   si_update_ps_iter_samples(sctx);
3685af69d88dSmrg}
3686af69d88dSmrg
368701e04c3fSmrg/*
368801e04c3fSmrg * Samplers
368901e04c3fSmrg */
3690af69d88dSmrg
369101e04c3fSmrg/**
369201e04c3fSmrg * Build the sampler view descriptor for a buffer texture.
369301e04c3fSmrg * @param state 256-bit descriptor; only the high 128 bits are filled in
369401e04c3fSmrg */
36957ec681f3Smrgvoid si_make_buffer_descriptor(struct si_screen *screen, struct si_resource *buf,
36967ec681f3Smrg                               enum pipe_format format, unsigned offset, unsigned size,
36977ec681f3Smrg                               uint32_t *state)
3698af69d88dSmrg{
36997ec681f3Smrg   const struct util_format_description *desc;
37007ec681f3Smrg   unsigned stride;
37017ec681f3Smrg   unsigned num_records;
37027ec681f3Smrg
37037ec681f3Smrg   desc = util_format_description(format);
37047ec681f3Smrg   stride = desc->block.bits / 8;
37057ec681f3Smrg
37067ec681f3Smrg   num_records = size / stride;
37077ec681f3Smrg   num_records = MIN2(num_records, (buf->b.b.width0 - offset) / stride);
37087ec681f3Smrg
37097ec681f3Smrg   /* The NUM_RECORDS field has a different meaning depending on the chip,
37107ec681f3Smrg    * instruction type, STRIDE, and SWIZZLE_ENABLE.
37117ec681f3Smrg    *
37127ec681f3Smrg    * GFX6-7,10:
37137ec681f3Smrg    * - If STRIDE == 0, it's in byte units.
37147ec681f3Smrg    * - If STRIDE != 0, it's in units of STRIDE, used with inst.IDXEN.
37157ec681f3Smrg    *
37167ec681f3Smrg    * GFX8:
37177ec681f3Smrg    * - For SMEM and STRIDE == 0, it's in byte units.
37187ec681f3Smrg    * - For SMEM and STRIDE != 0, it's in units of STRIDE.
37197ec681f3Smrg    * - For VMEM and STRIDE == 0 or SWIZZLE_ENABLE == 0, it's in byte units.
37207ec681f3Smrg    * - For VMEM and STRIDE != 0 and SWIZZLE_ENABLE == 1, it's in units of STRIDE.
37217ec681f3Smrg    * NOTE: There is incompatibility between VMEM and SMEM opcodes due to SWIZZLE_-
37227ec681f3Smrg    *       ENABLE. The workaround is to set STRIDE = 0 if SWIZZLE_ENABLE == 0 when
37237ec681f3Smrg    *       using SMEM. This can be done in the shader by clearing STRIDE with s_and.
37247ec681f3Smrg    *       That way the same descriptor can be used by both SMEM and VMEM.
37257ec681f3Smrg    *
37267ec681f3Smrg    * GFX9:
37277ec681f3Smrg    * - For SMEM and STRIDE == 0, it's in byte units.
37287ec681f3Smrg    * - For SMEM and STRIDE != 0, it's in units of STRIDE.
37297ec681f3Smrg    * - For VMEM and inst.IDXEN == 0 or STRIDE == 0, it's in byte units.
37307ec681f3Smrg    * - For VMEM and inst.IDXEN == 1 and STRIDE != 0, it's in units of STRIDE.
37317ec681f3Smrg    */
37327ec681f3Smrg   if (screen->info.chip_class == GFX8)
37337ec681f3Smrg      num_records *= stride;
37347ec681f3Smrg
37357ec681f3Smrg   state[4] = 0;
37367ec681f3Smrg   state[5] = S_008F04_STRIDE(stride);
37377ec681f3Smrg   state[6] = num_records;
37387ec681f3Smrg   state[7] = S_008F0C_DST_SEL_X(si_map_swizzle(desc->swizzle[0])) |
37397ec681f3Smrg              S_008F0C_DST_SEL_Y(si_map_swizzle(desc->swizzle[1])) |
37407ec681f3Smrg              S_008F0C_DST_SEL_Z(si_map_swizzle(desc->swizzle[2])) |
37417ec681f3Smrg              S_008F0C_DST_SEL_W(si_map_swizzle(desc->swizzle[3]));
37427ec681f3Smrg
37437ec681f3Smrg   if (screen->info.chip_class >= GFX10) {
37447ec681f3Smrg      const struct gfx10_format *fmt = &gfx10_format_table[format];
37457ec681f3Smrg
37467ec681f3Smrg      /* OOB_SELECT chooses the out-of-bounds check:
37477ec681f3Smrg       *  - 0: (index >= NUM_RECORDS) || (offset >= STRIDE)
37487ec681f3Smrg       *  - 1: index >= NUM_RECORDS
37497ec681f3Smrg       *  - 2: NUM_RECORDS == 0
37507ec681f3Smrg       *  - 3: if SWIZZLE_ENABLE == 0: offset >= NUM_RECORDS
37517ec681f3Smrg       *       else: swizzle_address >= NUM_RECORDS
37527ec681f3Smrg       */
37537ec681f3Smrg      state[7] |= S_008F0C_FORMAT(fmt->img_format) |
37547ec681f3Smrg                  S_008F0C_OOB_SELECT(V_008F0C_OOB_SELECT_STRUCTURED_WITH_OFFSET) |
37557ec681f3Smrg                  S_008F0C_RESOURCE_LEVEL(1);
37567ec681f3Smrg   } else {
37577ec681f3Smrg      int first_non_void;
37587ec681f3Smrg      unsigned num_format, data_format;
37597ec681f3Smrg
37607ec681f3Smrg      first_non_void = util_format_get_first_non_void_channel(format);
37617ec681f3Smrg      num_format = si_translate_buffer_numformat(&screen->b, desc, first_non_void);
37627ec681f3Smrg      data_format = si_translate_buffer_dataformat(&screen->b, desc, first_non_void);
37637ec681f3Smrg
37647ec681f3Smrg      state[7] |= S_008F0C_NUM_FORMAT(num_format) | S_008F0C_DATA_FORMAT(data_format);
37657ec681f3Smrg   }
3766af69d88dSmrg}
3767af69d88dSmrg
376801e04c3fSmrgstatic unsigned gfx9_border_color_swizzle(const unsigned char swizzle[4])
3769af69d88dSmrg{
37707ec681f3Smrg   unsigned bc_swizzle = V_008F20_BC_SWIZZLE_XYZW;
37717ec681f3Smrg
37727ec681f3Smrg   if (swizzle[3] == PIPE_SWIZZLE_X) {
37737ec681f3Smrg      /* For the pre-defined border color values (white, opaque
37747ec681f3Smrg       * black, transparent black), the only thing that matters is
37757ec681f3Smrg       * that the alpha channel winds up in the correct place
37767ec681f3Smrg       * (because the RGB channels are all the same) so either of
37777ec681f3Smrg       * these enumerations will work.
37787ec681f3Smrg       */
37797ec681f3Smrg      if (swizzle[2] == PIPE_SWIZZLE_Y)
37807ec681f3Smrg         bc_swizzle = V_008F20_BC_SWIZZLE_WZYX;
37817ec681f3Smrg      else
37827ec681f3Smrg         bc_swizzle = V_008F20_BC_SWIZZLE_WXYZ;
37837ec681f3Smrg   } else if (swizzle[0] == PIPE_SWIZZLE_X) {
37847ec681f3Smrg      if (swizzle[1] == PIPE_SWIZZLE_Y)
37857ec681f3Smrg         bc_swizzle = V_008F20_BC_SWIZZLE_XYZW;
37867ec681f3Smrg      else
37877ec681f3Smrg         bc_swizzle = V_008F20_BC_SWIZZLE_XWYZ;
37887ec681f3Smrg   } else if (swizzle[1] == PIPE_SWIZZLE_X) {
37897ec681f3Smrg      bc_swizzle = V_008F20_BC_SWIZZLE_YXWZ;
37907ec681f3Smrg   } else if (swizzle[2] == PIPE_SWIZZLE_X) {
37917ec681f3Smrg      bc_swizzle = V_008F20_BC_SWIZZLE_ZYXW;
37927ec681f3Smrg   }
37937ec681f3Smrg
37947ec681f3Smrg   return bc_swizzle;
3795af69d88dSmrg}
3796af69d88dSmrg
379701e04c3fSmrg/**
379801e04c3fSmrg * Build the sampler view descriptor for a texture.
3799af69d88dSmrg */
38007ec681f3Smrgstatic void gfx10_make_texture_descriptor(
38017ec681f3Smrg   struct si_screen *screen, struct si_texture *tex, bool sampler, enum pipe_texture_target target,
38027ec681f3Smrg   enum pipe_format pipe_format, const unsigned char state_swizzle[4], unsigned first_level,
38037ec681f3Smrg   unsigned last_level, unsigned first_layer, unsigned last_layer, unsigned width, unsigned height,
38047ec681f3Smrg   unsigned depth, uint32_t *state, uint32_t *fmask_state)
38057ec681f3Smrg{
38067ec681f3Smrg   struct pipe_resource *res = &tex->buffer.b.b;
38077ec681f3Smrg   const struct util_format_description *desc;
38087ec681f3Smrg   unsigned img_format;
38097ec681f3Smrg   unsigned char swizzle[4];
38107ec681f3Smrg   unsigned type;
38117ec681f3Smrg   uint64_t va;
38127ec681f3Smrg
38137ec681f3Smrg   desc = util_format_description(pipe_format);
38147ec681f3Smrg   img_format = gfx10_format_table[pipe_format].img_format;
38157ec681f3Smrg
38167ec681f3Smrg   if (desc->colorspace == UTIL_FORMAT_COLORSPACE_ZS) {
38177ec681f3Smrg      const unsigned char swizzle_xxxx[4] = {0, 0, 0, 0};
38187ec681f3Smrg      const unsigned char swizzle_yyyy[4] = {1, 1, 1, 1};
38197ec681f3Smrg      const unsigned char swizzle_wwww[4] = {3, 3, 3, 3};
38207ec681f3Smrg      bool is_stencil = false;
38217ec681f3Smrg
38227ec681f3Smrg      switch (pipe_format) {
38237ec681f3Smrg      case PIPE_FORMAT_S8_UINT_Z24_UNORM:
38247ec681f3Smrg      case PIPE_FORMAT_X32_S8X24_UINT:
38257ec681f3Smrg      case PIPE_FORMAT_X8Z24_UNORM:
38267ec681f3Smrg         util_format_compose_swizzles(swizzle_yyyy, state_swizzle, swizzle);
38277ec681f3Smrg         is_stencil = true;
38287ec681f3Smrg         break;
38297ec681f3Smrg      case PIPE_FORMAT_X24S8_UINT:
38307ec681f3Smrg         /*
38317ec681f3Smrg          * X24S8 is implemented as an 8_8_8_8 data format, to
38327ec681f3Smrg          * fix texture gathers. This affects at least
38337ec681f3Smrg          * GL45-CTS.texture_cube_map_array.sampling on GFX8.
38347ec681f3Smrg          */
38357ec681f3Smrg         util_format_compose_swizzles(swizzle_wwww, state_swizzle, swizzle);
38367ec681f3Smrg         is_stencil = true;
38377ec681f3Smrg         break;
38387ec681f3Smrg      default:
38397ec681f3Smrg         util_format_compose_swizzles(swizzle_xxxx, state_swizzle, swizzle);
38407ec681f3Smrg         is_stencil = pipe_format == PIPE_FORMAT_S8_UINT;
38417ec681f3Smrg      }
38427ec681f3Smrg
38437ec681f3Smrg      if (tex->upgraded_depth && !is_stencil) {
38447ec681f3Smrg         assert(img_format == V_008F0C_GFX10_FORMAT_32_FLOAT);
38457ec681f3Smrg         img_format = V_008F0C_GFX10_FORMAT_32_FLOAT_CLAMP;
38467ec681f3Smrg      }
38477ec681f3Smrg   } else {
38487ec681f3Smrg      util_format_compose_swizzles(desc->swizzle, state_swizzle, swizzle);
38497ec681f3Smrg   }
38507ec681f3Smrg
38517ec681f3Smrg   if (!sampler && (res->target == PIPE_TEXTURE_CUBE || res->target == PIPE_TEXTURE_CUBE_ARRAY)) {
38527ec681f3Smrg      /* For the purpose of shader images, treat cube maps as 2D
38537ec681f3Smrg       * arrays.
38547ec681f3Smrg       */
38557ec681f3Smrg      type = V_008F1C_SQ_RSRC_IMG_2D_ARRAY;
38567ec681f3Smrg   } else {
38577ec681f3Smrg      type = si_tex_dim(screen, tex, target, res->nr_samples);
38587ec681f3Smrg   }
38597ec681f3Smrg
38607ec681f3Smrg   if (type == V_008F1C_SQ_RSRC_IMG_1D_ARRAY) {
38617ec681f3Smrg      height = 1;
38627ec681f3Smrg      depth = res->array_size;
38637ec681f3Smrg   } else if (type == V_008F1C_SQ_RSRC_IMG_2D_ARRAY || type == V_008F1C_SQ_RSRC_IMG_2D_MSAA_ARRAY) {
38647ec681f3Smrg      if (sampler || res->target != PIPE_TEXTURE_3D)
38657ec681f3Smrg         depth = res->array_size;
38667ec681f3Smrg   } else if (type == V_008F1C_SQ_RSRC_IMG_CUBE)
38677ec681f3Smrg      depth = res->array_size / 6;
38687ec681f3Smrg
38697ec681f3Smrg   state[0] = 0;
38707ec681f3Smrg   state[1] = S_00A004_FORMAT(img_format) | S_00A004_WIDTH_LO(width - 1);
38717ec681f3Smrg   state[2] = S_00A008_WIDTH_HI((width - 1) >> 2) | S_00A008_HEIGHT(height - 1) |
38727ec681f3Smrg              S_00A008_RESOURCE_LEVEL(1);
38737ec681f3Smrg   state[3] =
38747ec681f3Smrg      S_00A00C_DST_SEL_X(si_map_swizzle(swizzle[0])) |
38757ec681f3Smrg      S_00A00C_DST_SEL_Y(si_map_swizzle(swizzle[1])) |
38767ec681f3Smrg      S_00A00C_DST_SEL_Z(si_map_swizzle(swizzle[2])) |
38777ec681f3Smrg      S_00A00C_DST_SEL_W(si_map_swizzle(swizzle[3])) |
38787ec681f3Smrg      S_00A00C_BASE_LEVEL(res->nr_samples > 1 ? 0 : first_level) |
38797ec681f3Smrg      S_00A00C_LAST_LEVEL(res->nr_samples > 1 ? util_logbase2(res->nr_samples) : last_level) |
38807ec681f3Smrg      S_00A00C_BC_SWIZZLE(gfx9_border_color_swizzle(desc->swizzle)) | S_00A00C_TYPE(type);
38817ec681f3Smrg   /* Depth is the the last accessible layer on gfx9+. The hw doesn't need
38827ec681f3Smrg    * to know the total number of layers.
38837ec681f3Smrg    */
38847ec681f3Smrg   state[4] =
38857ec681f3Smrg      S_00A010_DEPTH((type == V_008F1C_SQ_RSRC_IMG_3D && sampler) ? depth - 1 : last_layer) |
38867ec681f3Smrg      S_00A010_BASE_ARRAY(first_layer);
38877ec681f3Smrg   state[5] = S_00A014_ARRAY_PITCH(!!(type == V_008F1C_SQ_RSRC_IMG_3D && !sampler)) |
38887ec681f3Smrg              S_00A014_MAX_MIP(res->nr_samples > 1 ? util_logbase2(res->nr_samples)
38897ec681f3Smrg                                                   : tex->buffer.b.b.last_level) |
38907ec681f3Smrg              S_00A014_PERF_MOD(4);
38917ec681f3Smrg   state[6] = 0;
38927ec681f3Smrg   state[7] = 0;
38937ec681f3Smrg
38947ec681f3Smrg   if (vi_dcc_enabled(tex, first_level)) {
38957ec681f3Smrg      state[6] |= S_00A018_MAX_UNCOMPRESSED_BLOCK_SIZE(V_028C78_MAX_BLOCK_SIZE_256B) |
38967ec681f3Smrg                  S_00A018_MAX_COMPRESSED_BLOCK_SIZE(tex->surface.u.gfx9.color.dcc.max_compressed_block_size) |
38977ec681f3Smrg                  S_00A018_ALPHA_IS_ON_MSB(vi_alpha_is_on_msb(screen, pipe_format));
38987ec681f3Smrg   }
38997ec681f3Smrg
39007ec681f3Smrg   /* Initialize the sampler view for FMASK. */
39017ec681f3Smrg   if (tex->surface.fmask_offset) {
39027ec681f3Smrg      uint32_t format;
39037ec681f3Smrg
39047ec681f3Smrg      va = tex->buffer.gpu_address + tex->surface.fmask_offset;
39057ec681f3Smrg
39067ec681f3Smrg#define FMASK(s, f) (((unsigned)(MAX2(1, s)) * 16) + (MAX2(1, f)))
39077ec681f3Smrg      switch (FMASK(res->nr_samples, res->nr_storage_samples)) {
39087ec681f3Smrg      case FMASK(2, 1):
39097ec681f3Smrg         format = V_008F0C_GFX10_FORMAT_FMASK8_S2_F1;
39107ec681f3Smrg         break;
39117ec681f3Smrg      case FMASK(2, 2):
39127ec681f3Smrg         format = V_008F0C_GFX10_FORMAT_FMASK8_S2_F2;
39137ec681f3Smrg         break;
39147ec681f3Smrg      case FMASK(4, 1):
39157ec681f3Smrg         format = V_008F0C_GFX10_FORMAT_FMASK8_S4_F1;
39167ec681f3Smrg         break;
39177ec681f3Smrg      case FMASK(4, 2):
39187ec681f3Smrg         format = V_008F0C_GFX10_FORMAT_FMASK8_S4_F2;
39197ec681f3Smrg         break;
39207ec681f3Smrg      case FMASK(4, 4):
39217ec681f3Smrg         format = V_008F0C_GFX10_FORMAT_FMASK8_S4_F4;
39227ec681f3Smrg         break;
39237ec681f3Smrg      case FMASK(8, 1):
39247ec681f3Smrg         format = V_008F0C_GFX10_FORMAT_FMASK8_S8_F1;
39257ec681f3Smrg         break;
39267ec681f3Smrg      case FMASK(8, 2):
39277ec681f3Smrg         format = V_008F0C_GFX10_FORMAT_FMASK16_S8_F2;
39287ec681f3Smrg         break;
39297ec681f3Smrg      case FMASK(8, 4):
39307ec681f3Smrg         format = V_008F0C_GFX10_FORMAT_FMASK32_S8_F4;
39317ec681f3Smrg         break;
39327ec681f3Smrg      case FMASK(8, 8):
39337ec681f3Smrg         format = V_008F0C_GFX10_FORMAT_FMASK32_S8_F8;
39347ec681f3Smrg         break;
39357ec681f3Smrg      case FMASK(16, 1):
39367ec681f3Smrg         format = V_008F0C_GFX10_FORMAT_FMASK16_S16_F1;
39377ec681f3Smrg         break;
39387ec681f3Smrg      case FMASK(16, 2):
39397ec681f3Smrg         format = V_008F0C_GFX10_FORMAT_FMASK32_S16_F2;
39407ec681f3Smrg         break;
39417ec681f3Smrg      case FMASK(16, 4):
39427ec681f3Smrg         format = V_008F0C_GFX10_FORMAT_FMASK64_S16_F4;
39437ec681f3Smrg         break;
39447ec681f3Smrg      case FMASK(16, 8):
39457ec681f3Smrg         format = V_008F0C_GFX10_FORMAT_FMASK64_S16_F8;
39467ec681f3Smrg         break;
39477ec681f3Smrg      default:
39487ec681f3Smrg         unreachable("invalid nr_samples");
39497ec681f3Smrg      }
39507ec681f3Smrg#undef FMASK
39517ec681f3Smrg      fmask_state[0] = (va >> 8) | tex->surface.fmask_tile_swizzle;
39527ec681f3Smrg      fmask_state[1] = S_00A004_BASE_ADDRESS_HI(va >> 40) | S_00A004_FORMAT(format) |
39537ec681f3Smrg                       S_00A004_WIDTH_LO(width - 1);
39547ec681f3Smrg      fmask_state[2] = S_00A008_WIDTH_HI((width - 1) >> 2) | S_00A008_HEIGHT(height - 1) |
39557ec681f3Smrg                       S_00A008_RESOURCE_LEVEL(1);
39567ec681f3Smrg      fmask_state[3] =
39577ec681f3Smrg         S_00A00C_DST_SEL_X(V_008F1C_SQ_SEL_X) | S_00A00C_DST_SEL_Y(V_008F1C_SQ_SEL_X) |
39587ec681f3Smrg         S_00A00C_DST_SEL_Z(V_008F1C_SQ_SEL_X) | S_00A00C_DST_SEL_W(V_008F1C_SQ_SEL_X) |
39597ec681f3Smrg         S_00A00C_SW_MODE(tex->surface.u.gfx9.color.fmask_swizzle_mode) |
39607ec681f3Smrg         S_00A00C_TYPE(si_tex_dim(screen, tex, target, 0));
39617ec681f3Smrg      fmask_state[4] = S_00A010_DEPTH(last_layer) | S_00A010_BASE_ARRAY(first_layer);
39627ec681f3Smrg      fmask_state[5] = 0;
39637ec681f3Smrg      fmask_state[6] = S_00A018_META_PIPE_ALIGNED(1);
39647ec681f3Smrg      fmask_state[7] = 0;
39657ec681f3Smrg   }
39667ec681f3Smrg}
39677ec681f3Smrg
39687ec681f3Smrg/**
39697ec681f3Smrg * Build the sampler view descriptor for a texture (SI-GFX9).
39707ec681f3Smrg */
39717ec681f3Smrgstatic void si_make_texture_descriptor(struct si_screen *screen, struct si_texture *tex,
39727ec681f3Smrg                                       bool sampler, enum pipe_texture_target target,
39737ec681f3Smrg                                       enum pipe_format pipe_format,
39747ec681f3Smrg                                       const unsigned char state_swizzle[4], unsigned first_level,
39757ec681f3Smrg                                       unsigned last_level, unsigned first_layer,
39767ec681f3Smrg                                       unsigned last_layer, unsigned width, unsigned height,
39777ec681f3Smrg                                       unsigned depth, uint32_t *state, uint32_t *fmask_state)
3978af69d88dSmrg{
39797ec681f3Smrg   struct pipe_resource *res = &tex->buffer.b.b;
39807ec681f3Smrg   const struct util_format_description *desc;
39817ec681f3Smrg   unsigned char swizzle[4];
39827ec681f3Smrg   int first_non_void;
39837ec681f3Smrg   unsigned num_format, data_format, type, num_samples;
39847ec681f3Smrg   uint64_t va;
39857ec681f3Smrg
39867ec681f3Smrg   desc = util_format_description(pipe_format);
39877ec681f3Smrg
39887ec681f3Smrg   num_samples = desc->colorspace == UTIL_FORMAT_COLORSPACE_ZS ? MAX2(1, res->nr_samples)
39897ec681f3Smrg                                                               : MAX2(1, res->nr_storage_samples);
39907ec681f3Smrg
39917ec681f3Smrg   if (desc->colorspace == UTIL_FORMAT_COLORSPACE_ZS) {
39927ec681f3Smrg      const unsigned char swizzle_xxxx[4] = {0, 0, 0, 0};
39937ec681f3Smrg      const unsigned char swizzle_yyyy[4] = {1, 1, 1, 1};
39947ec681f3Smrg      const unsigned char swizzle_wwww[4] = {3, 3, 3, 3};
39957ec681f3Smrg
39967ec681f3Smrg      switch (pipe_format) {
39977ec681f3Smrg      case PIPE_FORMAT_S8_UINT_Z24_UNORM:
39987ec681f3Smrg      case PIPE_FORMAT_X32_S8X24_UINT:
39997ec681f3Smrg      case PIPE_FORMAT_X8Z24_UNORM:
40007ec681f3Smrg         util_format_compose_swizzles(swizzle_yyyy, state_swizzle, swizzle);
40017ec681f3Smrg         break;
40027ec681f3Smrg      case PIPE_FORMAT_X24S8_UINT:
40037ec681f3Smrg         /*
40047ec681f3Smrg          * X24S8 is implemented as an 8_8_8_8 data format, to
40057ec681f3Smrg          * fix texture gathers. This affects at least
40067ec681f3Smrg          * GL45-CTS.texture_cube_map_array.sampling on GFX8.
40077ec681f3Smrg          */
40087ec681f3Smrg         if (screen->info.chip_class <= GFX8)
40097ec681f3Smrg            util_format_compose_swizzles(swizzle_wwww, state_swizzle, swizzle);
40107ec681f3Smrg         else
40117ec681f3Smrg            util_format_compose_swizzles(swizzle_yyyy, state_swizzle, swizzle);
40127ec681f3Smrg         break;
40137ec681f3Smrg      default:
40147ec681f3Smrg         util_format_compose_swizzles(swizzle_xxxx, state_swizzle, swizzle);
40157ec681f3Smrg      }
40167ec681f3Smrg   } else {
40177ec681f3Smrg      util_format_compose_swizzles(desc->swizzle, state_swizzle, swizzle);
40187ec681f3Smrg   }
40197ec681f3Smrg
40207ec681f3Smrg   first_non_void = util_format_get_first_non_void_channel(pipe_format);
40217ec681f3Smrg
40227ec681f3Smrg   switch (pipe_format) {
40237ec681f3Smrg   case PIPE_FORMAT_S8_UINT_Z24_UNORM:
40247ec681f3Smrg      num_format = V_008F14_IMG_NUM_FORMAT_UNORM;
40257ec681f3Smrg      break;
40267ec681f3Smrg   default:
40277ec681f3Smrg      if (first_non_void < 0) {
40287ec681f3Smrg         if (util_format_is_compressed(pipe_format)) {
40297ec681f3Smrg            switch (pipe_format) {
40307ec681f3Smrg            case PIPE_FORMAT_DXT1_SRGB:
40317ec681f3Smrg            case PIPE_FORMAT_DXT1_SRGBA:
40327ec681f3Smrg            case PIPE_FORMAT_DXT3_SRGBA:
40337ec681f3Smrg            case PIPE_FORMAT_DXT5_SRGBA:
40347ec681f3Smrg            case PIPE_FORMAT_BPTC_SRGBA:
40357ec681f3Smrg            case PIPE_FORMAT_ETC2_SRGB8:
40367ec681f3Smrg            case PIPE_FORMAT_ETC2_SRGB8A1:
40377ec681f3Smrg            case PIPE_FORMAT_ETC2_SRGBA8:
40387ec681f3Smrg               num_format = V_008F14_IMG_NUM_FORMAT_SRGB;
40397ec681f3Smrg               break;
40407ec681f3Smrg            case PIPE_FORMAT_RGTC1_SNORM:
40417ec681f3Smrg            case PIPE_FORMAT_LATC1_SNORM:
40427ec681f3Smrg            case PIPE_FORMAT_RGTC2_SNORM:
40437ec681f3Smrg            case PIPE_FORMAT_LATC2_SNORM:
40447ec681f3Smrg            case PIPE_FORMAT_ETC2_R11_SNORM:
40457ec681f3Smrg            case PIPE_FORMAT_ETC2_RG11_SNORM:
40467ec681f3Smrg            /* implies float, so use SNORM/UNORM to determine
40477ec681f3Smrg               whether data is signed or not */
40487ec681f3Smrg            case PIPE_FORMAT_BPTC_RGB_FLOAT:
40497ec681f3Smrg               num_format = V_008F14_IMG_NUM_FORMAT_SNORM;
40507ec681f3Smrg               break;
40517ec681f3Smrg            default:
40527ec681f3Smrg               num_format = V_008F14_IMG_NUM_FORMAT_UNORM;
40537ec681f3Smrg               break;
40547ec681f3Smrg            }
40557ec681f3Smrg         } else if (desc->layout == UTIL_FORMAT_LAYOUT_SUBSAMPLED) {
40567ec681f3Smrg            num_format = V_008F14_IMG_NUM_FORMAT_UNORM;
40577ec681f3Smrg         } else {
40587ec681f3Smrg            num_format = V_008F14_IMG_NUM_FORMAT_FLOAT;
40597ec681f3Smrg         }
40607ec681f3Smrg      } else if (desc->colorspace == UTIL_FORMAT_COLORSPACE_SRGB) {
40617ec681f3Smrg         num_format = V_008F14_IMG_NUM_FORMAT_SRGB;
40627ec681f3Smrg      } else {
40637ec681f3Smrg         num_format = V_008F14_IMG_NUM_FORMAT_UNORM;
40647ec681f3Smrg
40657ec681f3Smrg         switch (desc->channel[first_non_void].type) {
40667ec681f3Smrg         case UTIL_FORMAT_TYPE_FLOAT:
40677ec681f3Smrg            num_format = V_008F14_IMG_NUM_FORMAT_FLOAT;
40687ec681f3Smrg            break;
40697ec681f3Smrg         case UTIL_FORMAT_TYPE_SIGNED:
40707ec681f3Smrg            if (desc->channel[first_non_void].normalized)
40717ec681f3Smrg               num_format = V_008F14_IMG_NUM_FORMAT_SNORM;
40727ec681f3Smrg            else if (desc->channel[first_non_void].pure_integer)
40737ec681f3Smrg               num_format = V_008F14_IMG_NUM_FORMAT_SINT;
40747ec681f3Smrg            else
40757ec681f3Smrg               num_format = V_008F14_IMG_NUM_FORMAT_SSCALED;
40767ec681f3Smrg            break;
40777ec681f3Smrg         case UTIL_FORMAT_TYPE_UNSIGNED:
40787ec681f3Smrg            if (desc->channel[first_non_void].normalized)
40797ec681f3Smrg               num_format = V_008F14_IMG_NUM_FORMAT_UNORM;
40807ec681f3Smrg            else if (desc->channel[first_non_void].pure_integer)
40817ec681f3Smrg               num_format = V_008F14_IMG_NUM_FORMAT_UINT;
40827ec681f3Smrg            else
40837ec681f3Smrg               num_format = V_008F14_IMG_NUM_FORMAT_USCALED;
40847ec681f3Smrg         }
40857ec681f3Smrg      }
40867ec681f3Smrg   }
40877ec681f3Smrg
40887ec681f3Smrg   data_format = si_translate_texformat(&screen->b, pipe_format, desc, first_non_void);
40897ec681f3Smrg   if (data_format == ~0) {
40907ec681f3Smrg      data_format = 0;
40917ec681f3Smrg   }
40927ec681f3Smrg
40937ec681f3Smrg   /* S8 with Z32 HTILE needs a special format. */
40947ec681f3Smrg   if (screen->info.chip_class == GFX9 && pipe_format == PIPE_FORMAT_S8_UINT)
40957ec681f3Smrg      data_format = V_008F14_IMG_DATA_FORMAT_S8_32;
40967ec681f3Smrg
40977ec681f3Smrg   if (!sampler && (res->target == PIPE_TEXTURE_CUBE || res->target == PIPE_TEXTURE_CUBE_ARRAY ||
40987ec681f3Smrg                    (screen->info.chip_class <= GFX8 && res->target == PIPE_TEXTURE_3D))) {
40997ec681f3Smrg      /* For the purpose of shader images, treat cube maps and 3D
41007ec681f3Smrg       * textures as 2D arrays. For 3D textures, the address
41017ec681f3Smrg       * calculations for mipmaps are different, so we rely on the
41027ec681f3Smrg       * caller to effectively disable mipmaps.
41037ec681f3Smrg       */
41047ec681f3Smrg      type = V_008F1C_SQ_RSRC_IMG_2D_ARRAY;
41057ec681f3Smrg
41067ec681f3Smrg      assert(res->target != PIPE_TEXTURE_3D || (first_level == 0 && last_level == 0));
41077ec681f3Smrg   } else {
41087ec681f3Smrg      type = si_tex_dim(screen, tex, target, num_samples);
41097ec681f3Smrg   }
41107ec681f3Smrg
41117ec681f3Smrg   if (type == V_008F1C_SQ_RSRC_IMG_1D_ARRAY) {
41127ec681f3Smrg      height = 1;
41137ec681f3Smrg      depth = res->array_size;
41147ec681f3Smrg   } else if (type == V_008F1C_SQ_RSRC_IMG_2D_ARRAY || type == V_008F1C_SQ_RSRC_IMG_2D_MSAA_ARRAY) {
41157ec681f3Smrg      if (sampler || res->target != PIPE_TEXTURE_3D)
41167ec681f3Smrg         depth = res->array_size;
41177ec681f3Smrg   } else if (type == V_008F1C_SQ_RSRC_IMG_CUBE)
41187ec681f3Smrg      depth = res->array_size / 6;
41197ec681f3Smrg
41207ec681f3Smrg   state[0] = 0;
41217ec681f3Smrg   state[1] = (S_008F14_DATA_FORMAT(data_format) | S_008F14_NUM_FORMAT(num_format));
41227ec681f3Smrg   state[2] = (S_008F18_WIDTH(width - 1) | S_008F18_HEIGHT(height - 1) | S_008F18_PERF_MOD(4));
41237ec681f3Smrg   state[3] = (S_008F1C_DST_SEL_X(si_map_swizzle(swizzle[0])) |
41247ec681f3Smrg               S_008F1C_DST_SEL_Y(si_map_swizzle(swizzle[1])) |
41257ec681f3Smrg               S_008F1C_DST_SEL_Z(si_map_swizzle(swizzle[2])) |
41267ec681f3Smrg               S_008F1C_DST_SEL_W(si_map_swizzle(swizzle[3])) |
41277ec681f3Smrg               S_008F1C_BASE_LEVEL(num_samples > 1 ? 0 : first_level) |
41287ec681f3Smrg               S_008F1C_LAST_LEVEL(num_samples > 1 ? util_logbase2(num_samples) : last_level) |
41297ec681f3Smrg               S_008F1C_TYPE(type));
41307ec681f3Smrg   state[4] = 0;
41317ec681f3Smrg   state[5] = S_008F24_BASE_ARRAY(first_layer);
41327ec681f3Smrg   state[6] = 0;
41337ec681f3Smrg   state[7] = 0;
41347ec681f3Smrg
41357ec681f3Smrg   if (screen->info.chip_class == GFX9) {
41367ec681f3Smrg      unsigned bc_swizzle = gfx9_border_color_swizzle(desc->swizzle);
41377ec681f3Smrg
41387ec681f3Smrg      /* Depth is the the last accessible layer on Gfx9.
41397ec681f3Smrg       * The hw doesn't need to know the total number of layers.
41407ec681f3Smrg       */
41417ec681f3Smrg      if (type == V_008F1C_SQ_RSRC_IMG_3D)
41427ec681f3Smrg         state[4] |= S_008F20_DEPTH(depth - 1);
41437ec681f3Smrg      else
41447ec681f3Smrg         state[4] |= S_008F20_DEPTH(last_layer);
41457ec681f3Smrg
41467ec681f3Smrg      state[4] |= S_008F20_BC_SWIZZLE(bc_swizzle);
41477ec681f3Smrg      state[5] |= S_008F24_MAX_MIP(num_samples > 1 ? util_logbase2(num_samples)
41487ec681f3Smrg                                                   : tex->buffer.b.b.last_level);
41497ec681f3Smrg   } else {
41507ec681f3Smrg      state[3] |= S_008F1C_POW2_PAD(res->last_level > 0);
41517ec681f3Smrg      state[4] |= S_008F20_DEPTH(depth - 1);
41527ec681f3Smrg      state[5] |= S_008F24_LAST_ARRAY(last_layer);
41537ec681f3Smrg   }
41547ec681f3Smrg
41557ec681f3Smrg   if (vi_dcc_enabled(tex, first_level)) {
41567ec681f3Smrg      state[6] = S_008F28_ALPHA_IS_ON_MSB(vi_alpha_is_on_msb(screen, pipe_format));
41577ec681f3Smrg   } else {
41587ec681f3Smrg      /* The last dword is unused by hw. The shader uses it to clear
41597ec681f3Smrg       * bits in the first dword of sampler state.
41607ec681f3Smrg       */
41617ec681f3Smrg      if (screen->info.chip_class <= GFX7 && res->nr_samples <= 1) {
41627ec681f3Smrg         if (first_level == last_level)
41637ec681f3Smrg            state[7] = C_008F30_MAX_ANISO_RATIO;
41647ec681f3Smrg         else
41657ec681f3Smrg            state[7] = 0xffffffff;
41667ec681f3Smrg      }
41677ec681f3Smrg   }
41687ec681f3Smrg
41697ec681f3Smrg   /* Initialize the sampler view for FMASK. */
41707ec681f3Smrg   if (tex->surface.fmask_offset) {
41717ec681f3Smrg      uint32_t data_format, num_format;
41727ec681f3Smrg
41737ec681f3Smrg      va = tex->buffer.gpu_address + tex->surface.fmask_offset;
41747ec681f3Smrg
41757ec681f3Smrg#define FMASK(s, f) (((unsigned)(MAX2(1, s)) * 16) + (MAX2(1, f)))
41767ec681f3Smrg      if (screen->info.chip_class == GFX9) {
41777ec681f3Smrg         data_format = V_008F14_IMG_DATA_FORMAT_FMASK;
41787ec681f3Smrg         switch (FMASK(res->nr_samples, res->nr_storage_samples)) {
41797ec681f3Smrg         case FMASK(2, 1):
41807ec681f3Smrg            num_format = V_008F14_IMG_NUM_FORMAT_FMASK_8_2_1;
41817ec681f3Smrg            break;
41827ec681f3Smrg         case FMASK(2, 2):
41837ec681f3Smrg            num_format = V_008F14_IMG_NUM_FORMAT_FMASK_8_2_2;
41847ec681f3Smrg            break;
41857ec681f3Smrg         case FMASK(4, 1):
41867ec681f3Smrg            num_format = V_008F14_IMG_NUM_FORMAT_FMASK_8_4_1;
41877ec681f3Smrg            break;
41887ec681f3Smrg         case FMASK(4, 2):
41897ec681f3Smrg            num_format = V_008F14_IMG_NUM_FORMAT_FMASK_8_4_2;
41907ec681f3Smrg            break;
41917ec681f3Smrg         case FMASK(4, 4):
41927ec681f3Smrg            num_format = V_008F14_IMG_NUM_FORMAT_FMASK_8_4_4;
41937ec681f3Smrg            break;
41947ec681f3Smrg         case FMASK(8, 1):
41957ec681f3Smrg            num_format = V_008F14_IMG_NUM_FORMAT_FMASK_8_8_1;
41967ec681f3Smrg            break;
41977ec681f3Smrg         case FMASK(8, 2):
41987ec681f3Smrg            num_format = V_008F14_IMG_NUM_FORMAT_FMASK_16_8_2;
41997ec681f3Smrg            break;
42007ec681f3Smrg         case FMASK(8, 4):
42017ec681f3Smrg            num_format = V_008F14_IMG_NUM_FORMAT_FMASK_32_8_4;
42027ec681f3Smrg            break;
42037ec681f3Smrg         case FMASK(8, 8):
42047ec681f3Smrg            num_format = V_008F14_IMG_NUM_FORMAT_FMASK_32_8_8;
42057ec681f3Smrg            break;
42067ec681f3Smrg         case FMASK(16, 1):
42077ec681f3Smrg            num_format = V_008F14_IMG_NUM_FORMAT_FMASK_16_16_1;
42087ec681f3Smrg            break;
42097ec681f3Smrg         case FMASK(16, 2):
42107ec681f3Smrg            num_format = V_008F14_IMG_NUM_FORMAT_FMASK_32_16_2;
42117ec681f3Smrg            break;
42127ec681f3Smrg         case FMASK(16, 4):
42137ec681f3Smrg            num_format = V_008F14_IMG_NUM_FORMAT_FMASK_64_16_4;
42147ec681f3Smrg            break;
42157ec681f3Smrg         case FMASK(16, 8):
42167ec681f3Smrg            num_format = V_008F14_IMG_NUM_FORMAT_FMASK_64_16_8;
42177ec681f3Smrg            break;
42187ec681f3Smrg         default:
42197ec681f3Smrg            unreachable("invalid nr_samples");
42207ec681f3Smrg         }
42217ec681f3Smrg      } else {
42227ec681f3Smrg         switch (FMASK(res->nr_samples, res->nr_storage_samples)) {
42237ec681f3Smrg         case FMASK(2, 1):
42247ec681f3Smrg            data_format = V_008F14_IMG_DATA_FORMAT_FMASK8_S2_F1;
42257ec681f3Smrg            break;
42267ec681f3Smrg         case FMASK(2, 2):
42277ec681f3Smrg            data_format = V_008F14_IMG_DATA_FORMAT_FMASK8_S2_F2;
42287ec681f3Smrg            break;
42297ec681f3Smrg         case FMASK(4, 1):
42307ec681f3Smrg            data_format = V_008F14_IMG_DATA_FORMAT_FMASK8_S4_F1;
42317ec681f3Smrg            break;
42327ec681f3Smrg         case FMASK(4, 2):
42337ec681f3Smrg            data_format = V_008F14_IMG_DATA_FORMAT_FMASK8_S4_F2;
42347ec681f3Smrg            break;
42357ec681f3Smrg         case FMASK(4, 4):
42367ec681f3Smrg            data_format = V_008F14_IMG_DATA_FORMAT_FMASK8_S4_F4;
42377ec681f3Smrg            break;
42387ec681f3Smrg         case FMASK(8, 1):
42397ec681f3Smrg            data_format = V_008F14_IMG_DATA_FORMAT_FMASK8_S8_F1;
42407ec681f3Smrg            break;
42417ec681f3Smrg         case FMASK(8, 2):
42427ec681f3Smrg            data_format = V_008F14_IMG_DATA_FORMAT_FMASK16_S8_F2;
42437ec681f3Smrg            break;
42447ec681f3Smrg         case FMASK(8, 4):
42457ec681f3Smrg            data_format = V_008F14_IMG_DATA_FORMAT_FMASK32_S8_F4;
42467ec681f3Smrg            break;
42477ec681f3Smrg         case FMASK(8, 8):
42487ec681f3Smrg            data_format = V_008F14_IMG_DATA_FORMAT_FMASK32_S8_F8;
42497ec681f3Smrg            break;
42507ec681f3Smrg         case FMASK(16, 1):
42517ec681f3Smrg            data_format = V_008F14_IMG_DATA_FORMAT_FMASK16_S16_F1;
42527ec681f3Smrg            break;
42537ec681f3Smrg         case FMASK(16, 2):
42547ec681f3Smrg            data_format = V_008F14_IMG_DATA_FORMAT_FMASK32_S16_F2;
42557ec681f3Smrg            break;
42567ec681f3Smrg         case FMASK(16, 4):
42577ec681f3Smrg            data_format = V_008F14_IMG_DATA_FORMAT_FMASK64_S16_F4;
42587ec681f3Smrg            break;
42597ec681f3Smrg         case FMASK(16, 8):
42607ec681f3Smrg            data_format = V_008F14_IMG_DATA_FORMAT_FMASK64_S16_F8;
42617ec681f3Smrg            break;
42627ec681f3Smrg         default:
42637ec681f3Smrg            unreachable("invalid nr_samples");
42647ec681f3Smrg         }
42657ec681f3Smrg         num_format = V_008F14_IMG_NUM_FORMAT_UINT;
42667ec681f3Smrg      }
426701e04c3fSmrg#undef FMASK
426801e04c3fSmrg
42697ec681f3Smrg      fmask_state[0] = (va >> 8) | tex->surface.fmask_tile_swizzle;
42707ec681f3Smrg      fmask_state[1] = S_008F14_BASE_ADDRESS_HI(va >> 40) | S_008F14_DATA_FORMAT(data_format) |
42717ec681f3Smrg                       S_008F14_NUM_FORMAT(num_format);
42727ec681f3Smrg      fmask_state[2] = S_008F18_WIDTH(width - 1) | S_008F18_HEIGHT(height - 1);
42737ec681f3Smrg      fmask_state[3] =
42747ec681f3Smrg         S_008F1C_DST_SEL_X(V_008F1C_SQ_SEL_X) | S_008F1C_DST_SEL_Y(V_008F1C_SQ_SEL_X) |
42757ec681f3Smrg         S_008F1C_DST_SEL_Z(V_008F1C_SQ_SEL_X) | S_008F1C_DST_SEL_W(V_008F1C_SQ_SEL_X) |
42767ec681f3Smrg         S_008F1C_TYPE(si_tex_dim(screen, tex, target, 0));
42777ec681f3Smrg      fmask_state[4] = 0;
42787ec681f3Smrg      fmask_state[5] = S_008F24_BASE_ARRAY(first_layer);
42797ec681f3Smrg      fmask_state[6] = 0;
42807ec681f3Smrg      fmask_state[7] = 0;
42817ec681f3Smrg
42827ec681f3Smrg      if (screen->info.chip_class == GFX9) {
42837ec681f3Smrg         fmask_state[3] |= S_008F1C_SW_MODE(tex->surface.u.gfx9.color.fmask_swizzle_mode);
42847ec681f3Smrg         fmask_state[4] |=
42857ec681f3Smrg            S_008F20_DEPTH(last_layer) | S_008F20_PITCH(tex->surface.u.gfx9.color.fmask_epitch);
42867ec681f3Smrg         fmask_state[5] |= S_008F24_META_PIPE_ALIGNED(1) |
42877ec681f3Smrg                           S_008F24_META_RB_ALIGNED(1);
42887ec681f3Smrg      } else {
42897ec681f3Smrg         fmask_state[3] |= S_008F1C_TILING_INDEX(tex->surface.u.legacy.color.fmask.tiling_index);
42907ec681f3Smrg         fmask_state[4] |= S_008F20_DEPTH(depth - 1) |
42917ec681f3Smrg                           S_008F20_PITCH(tex->surface.u.legacy.color.fmask.pitch_in_pixels - 1);
42927ec681f3Smrg         fmask_state[5] |= S_008F24_LAST_ARRAY(last_layer);
42937ec681f3Smrg      }
42947ec681f3Smrg   }
429501e04c3fSmrg}
429601e04c3fSmrg
429701e04c3fSmrg/**
429801e04c3fSmrg * Create a sampler view.
429901e04c3fSmrg *
430001e04c3fSmrg * @param ctx		context
430101e04c3fSmrg * @param texture	texture
430201e04c3fSmrg * @param state		sampler view template
430301e04c3fSmrg * @param width0	width0 override (for compressed textures as int)
430401e04c3fSmrg * @param height0	height0 override (for compressed textures as int)
430501e04c3fSmrg * @param force_level   set the base address to the level (for compressed textures)
430601e04c3fSmrg */
43077ec681f3Smrgstruct pipe_sampler_view *si_create_sampler_view_custom(struct pipe_context *ctx,
43087ec681f3Smrg                                                        struct pipe_resource *texture,
43097ec681f3Smrg                                                        const struct pipe_sampler_view *state,
43107ec681f3Smrg                                                        unsigned width0, unsigned height0,
43117ec681f3Smrg                                                        unsigned force_level)
431201e04c3fSmrg{
43137ec681f3Smrg   struct si_context *sctx = (struct si_context *)ctx;
43147ec681f3Smrg   struct si_sampler_view *view = CALLOC_STRUCT_CL(si_sampler_view);
43157ec681f3Smrg   struct si_texture *tex = (struct si_texture *)texture;
43167ec681f3Smrg   unsigned base_level, first_level, last_level;
43177ec681f3Smrg   unsigned char state_swizzle[4];
43187ec681f3Smrg   unsigned height, depth, width;
43197ec681f3Smrg   unsigned last_layer = state->u.tex.last_layer;
43207ec681f3Smrg   enum pipe_format pipe_format;
43217ec681f3Smrg   const struct legacy_surf_level *surflevel;
43227ec681f3Smrg
43237ec681f3Smrg   if (!view)
43247ec681f3Smrg      return NULL;
43257ec681f3Smrg
43267ec681f3Smrg   /* initialize base object */
43277ec681f3Smrg   view->base = *state;
43287ec681f3Smrg   view->base.texture = NULL;
43297ec681f3Smrg   view->base.reference.count = 1;
43307ec681f3Smrg   view->base.context = ctx;
43317ec681f3Smrg
43327ec681f3Smrg   assert(texture);
43337ec681f3Smrg   pipe_resource_reference(&view->base.texture, texture);
43347ec681f3Smrg
43357ec681f3Smrg   if (state->format == PIPE_FORMAT_X24S8_UINT || state->format == PIPE_FORMAT_S8X24_UINT ||
43367ec681f3Smrg       state->format == PIPE_FORMAT_X32_S8X24_UINT || state->format == PIPE_FORMAT_S8_UINT)
43377ec681f3Smrg      view->is_stencil_sampler = true;
43387ec681f3Smrg
43397ec681f3Smrg   /* Buffer resource. */
43407ec681f3Smrg   if (texture->target == PIPE_BUFFER) {
43417ec681f3Smrg      si_make_buffer_descriptor(sctx->screen, si_resource(texture), state->format,
43427ec681f3Smrg                                state->u.buf.offset, state->u.buf.size, view->state);
43437ec681f3Smrg      return &view->base;
43447ec681f3Smrg   }
43457ec681f3Smrg
43467ec681f3Smrg   state_swizzle[0] = state->swizzle_r;
43477ec681f3Smrg   state_swizzle[1] = state->swizzle_g;
43487ec681f3Smrg   state_swizzle[2] = state->swizzle_b;
43497ec681f3Smrg   state_swizzle[3] = state->swizzle_a;
43507ec681f3Smrg
43517ec681f3Smrg   base_level = 0;
43527ec681f3Smrg   first_level = state->u.tex.first_level;
43537ec681f3Smrg   last_level = state->u.tex.last_level;
43547ec681f3Smrg   width = width0;
43557ec681f3Smrg   height = height0;
43567ec681f3Smrg   depth = texture->depth0;
43577ec681f3Smrg
43587ec681f3Smrg   if (sctx->chip_class <= GFX8 && force_level) {
43597ec681f3Smrg      assert(force_level == first_level && force_level == last_level);
43607ec681f3Smrg      base_level = force_level;
43617ec681f3Smrg      first_level = 0;
43627ec681f3Smrg      last_level = 0;
43637ec681f3Smrg      width = u_minify(width, force_level);
43647ec681f3Smrg      height = u_minify(height, force_level);
43657ec681f3Smrg      depth = u_minify(depth, force_level);
43667ec681f3Smrg   }
43677ec681f3Smrg
43687ec681f3Smrg   /* This is not needed if gallium frontends set last_layer correctly. */
43697ec681f3Smrg   if (state->target == PIPE_TEXTURE_1D || state->target == PIPE_TEXTURE_2D ||
43707ec681f3Smrg       state->target == PIPE_TEXTURE_RECT || state->target == PIPE_TEXTURE_CUBE)
43717ec681f3Smrg      last_layer = state->u.tex.first_layer;
43727ec681f3Smrg
43737ec681f3Smrg   /* Texturing with separate depth and stencil. */
43747ec681f3Smrg   pipe_format = state->format;
43757ec681f3Smrg
43767ec681f3Smrg   /* Depth/stencil texturing sometimes needs separate texture. */
43777ec681f3Smrg   if (tex->is_depth && !si_can_sample_zs(tex, view->is_stencil_sampler)) {
43787ec681f3Smrg      if (!tex->flushed_depth_texture && !si_init_flushed_depth_texture(ctx, texture)) {
43797ec681f3Smrg         pipe_resource_reference(&view->base.texture, NULL);
43807ec681f3Smrg         FREE(view);
43817ec681f3Smrg         return NULL;
43827ec681f3Smrg      }
43837ec681f3Smrg
43847ec681f3Smrg      assert(tex->flushed_depth_texture);
43857ec681f3Smrg
43867ec681f3Smrg      /* Override format for the case where the flushed texture
43877ec681f3Smrg       * contains only Z or only S.
43887ec681f3Smrg       */
43897ec681f3Smrg      if (tex->flushed_depth_texture->buffer.b.b.format != tex->buffer.b.b.format)
43907ec681f3Smrg         pipe_format = tex->flushed_depth_texture->buffer.b.b.format;
43917ec681f3Smrg
43927ec681f3Smrg      tex = tex->flushed_depth_texture;
43937ec681f3Smrg   }
43947ec681f3Smrg
43957ec681f3Smrg   surflevel = tex->surface.u.legacy.level;
43967ec681f3Smrg
43977ec681f3Smrg   if (tex->db_compatible) {
43987ec681f3Smrg      if (!view->is_stencil_sampler)
43997ec681f3Smrg         pipe_format = tex->db_render_format;
44007ec681f3Smrg
44017ec681f3Smrg      switch (pipe_format) {
44027ec681f3Smrg      case PIPE_FORMAT_Z32_FLOAT_S8X24_UINT:
44037ec681f3Smrg         pipe_format = PIPE_FORMAT_Z32_FLOAT;
44047ec681f3Smrg         break;
44057ec681f3Smrg      case PIPE_FORMAT_X8Z24_UNORM:
44067ec681f3Smrg      case PIPE_FORMAT_S8_UINT_Z24_UNORM:
44077ec681f3Smrg         /* Z24 is always stored like this for DB
44087ec681f3Smrg          * compatibility.
44097ec681f3Smrg          */
44107ec681f3Smrg         pipe_format = PIPE_FORMAT_Z24X8_UNORM;
44117ec681f3Smrg         break;
44127ec681f3Smrg      case PIPE_FORMAT_X24S8_UINT:
44137ec681f3Smrg      case PIPE_FORMAT_S8X24_UINT:
44147ec681f3Smrg      case PIPE_FORMAT_X32_S8X24_UINT:
44157ec681f3Smrg         pipe_format = PIPE_FORMAT_S8_UINT;
44167ec681f3Smrg         surflevel = tex->surface.u.legacy.zs.stencil_level;
44177ec681f3Smrg         break;
44187ec681f3Smrg      default:;
44197ec681f3Smrg      }
44207ec681f3Smrg   }
44217ec681f3Smrg
44227ec681f3Smrg   view->dcc_incompatible =
44237ec681f3Smrg      vi_dcc_formats_are_incompatible(texture, state->u.tex.first_level, state->format);
44247ec681f3Smrg
44257ec681f3Smrg   sctx->screen->make_texture_descriptor(
44267ec681f3Smrg      sctx->screen, tex, true, state->target, pipe_format, state_swizzle, first_level, last_level,
44277ec681f3Smrg      state->u.tex.first_layer, last_layer, width, height, depth, view->state, view->fmask_state);
44287ec681f3Smrg
44297ec681f3Smrg   view->base_level_info = &surflevel[base_level];
44307ec681f3Smrg   view->base_level = base_level;
44317ec681f3Smrg   view->block_width = util_format_get_blockwidth(pipe_format);
44327ec681f3Smrg   return &view->base;
4433af69d88dSmrg}
4434af69d88dSmrg
44357ec681f3Smrgstatic struct pipe_sampler_view *si_create_sampler_view(struct pipe_context *ctx,
44367ec681f3Smrg                                                        struct pipe_resource *texture,
44377ec681f3Smrg                                                        const struct pipe_sampler_view *state)
443801e04c3fSmrg{
44397ec681f3Smrg   return si_create_sampler_view_custom(ctx, texture, state, texture ? texture->width0 : 0,
44407ec681f3Smrg                                        texture ? texture->height0 : 0, 0);
444101e04c3fSmrg}
444201e04c3fSmrg
44437ec681f3Smrgstatic void si_sampler_view_destroy(struct pipe_context *ctx, struct pipe_sampler_view *state)
4444af69d88dSmrg{
44457ec681f3Smrg   struct si_sampler_view *view = (struct si_sampler_view *)state;
4446af69d88dSmrg
44477ec681f3Smrg   pipe_resource_reference(&state->texture, NULL);
44487ec681f3Smrg   FREE_CL(view);
4449af69d88dSmrg}
4450af69d88dSmrg
4451af69d88dSmrgstatic bool wrap_mode_uses_border_color(unsigned wrap, bool linear_filter)
4452af69d88dSmrg{
44537ec681f3Smrg   return wrap == PIPE_TEX_WRAP_CLAMP_TO_BORDER || wrap == PIPE_TEX_WRAP_MIRROR_CLAMP_TO_BORDER ||
44547ec681f3Smrg          (linear_filter && (wrap == PIPE_TEX_WRAP_CLAMP || wrap == PIPE_TEX_WRAP_MIRROR_CLAMP));
4455af69d88dSmrg}
4456af69d88dSmrg
445701e04c3fSmrgstatic uint32_t si_translate_border_color(struct si_context *sctx,
44587ec681f3Smrg                                          const struct pipe_sampler_state *state,
44597ec681f3Smrg                                          const union pipe_color_union *color, bool is_integer)
4460af69d88dSmrg{
44617ec681f3Smrg   bool linear_filter = state->min_img_filter != PIPE_TEX_FILTER_NEAREST ||
44627ec681f3Smrg                        state->mag_img_filter != PIPE_TEX_FILTER_NEAREST;
44637ec681f3Smrg
44647ec681f3Smrg   if (!wrap_mode_uses_border_color(state->wrap_s, linear_filter) &&
44657ec681f3Smrg       !wrap_mode_uses_border_color(state->wrap_t, linear_filter) &&
44667ec681f3Smrg       !wrap_mode_uses_border_color(state->wrap_r, linear_filter))
44677ec681f3Smrg      return S_008F3C_BORDER_COLOR_TYPE(V_008F3C_SQ_TEX_BORDER_COLOR_TRANS_BLACK);
44687ec681f3Smrg
44697ec681f3Smrg#define simple_border_types(elt)                                                                   \
44707ec681f3Smrg   do {                                                                                            \
44717ec681f3Smrg      if (color->elt[0] == 0 && color->elt[1] == 0 && color->elt[2] == 0 && color->elt[3] == 0)    \
44727ec681f3Smrg         return S_008F3C_BORDER_COLOR_TYPE(V_008F3C_SQ_TEX_BORDER_COLOR_TRANS_BLACK);              \
44737ec681f3Smrg      if (color->elt[0] == 0 && color->elt[1] == 0 && color->elt[2] == 0 && color->elt[3] == 1)    \
44747ec681f3Smrg         return S_008F3C_BORDER_COLOR_TYPE(V_008F3C_SQ_TEX_BORDER_COLOR_OPAQUE_BLACK);             \
44757ec681f3Smrg      if (color->elt[0] == 1 && color->elt[1] == 1 && color->elt[2] == 1 && color->elt[3] == 1)    \
44767ec681f3Smrg         return S_008F3C_BORDER_COLOR_TYPE(V_008F3C_SQ_TEX_BORDER_COLOR_OPAQUE_WHITE);             \
44777ec681f3Smrg   } while (false)
44787ec681f3Smrg
44797ec681f3Smrg   if (is_integer)
44807ec681f3Smrg      simple_border_types(ui);
44817ec681f3Smrg   else
44827ec681f3Smrg      simple_border_types(f);
448301e04c3fSmrg
448401e04c3fSmrg#undef simple_border_types
448501e04c3fSmrg
44867ec681f3Smrg   int i;
44877ec681f3Smrg
44887ec681f3Smrg   /* Check if the border has been uploaded already. */
44897ec681f3Smrg   for (i = 0; i < sctx->border_color_count; i++)
44907ec681f3Smrg      if (memcmp(&sctx->border_color_table[i], color, sizeof(*color)) == 0)
44917ec681f3Smrg         break;
44927ec681f3Smrg
44937ec681f3Smrg   if (i >= SI_MAX_BORDER_COLORS) {
44947ec681f3Smrg      /* Getting 4096 unique border colors is very unlikely. */
44957ec681f3Smrg      static bool printed;
44967ec681f3Smrg      if (!printed) {
44977ec681f3Smrg         fprintf(stderr, "radeonsi: The border color table is full. "
44987ec681f3Smrg                         "Any new border colors will be just black. "
44997ec681f3Smrg                         "This is a hardware limitation.\n");
45007ec681f3Smrg         printed = true;
45017ec681f3Smrg      }
45027ec681f3Smrg      return S_008F3C_BORDER_COLOR_TYPE(V_008F3C_SQ_TEX_BORDER_COLOR_TRANS_BLACK);
45037ec681f3Smrg   }
45047ec681f3Smrg
45057ec681f3Smrg   if (i == sctx->border_color_count) {
45067ec681f3Smrg      /* Upload a new border color. */
45077ec681f3Smrg      memcpy(&sctx->border_color_table[i], color, sizeof(*color));
45087ec681f3Smrg      util_memcpy_cpu_to_le32(&sctx->border_color_map[i], color, sizeof(*color));
45097ec681f3Smrg      sctx->border_color_count++;
45107ec681f3Smrg   }
45117ec681f3Smrg
45127ec681f3Smrg   return S_008F3C_BORDER_COLOR_PTR(i) |
45137ec681f3Smrg          S_008F3C_BORDER_COLOR_TYPE(V_008F3C_SQ_TEX_BORDER_COLOR_REGISTER);
451401e04c3fSmrg}
451501e04c3fSmrg
451601e04c3fSmrgstatic inline int S_FIXED(float value, unsigned frac_bits)
451701e04c3fSmrg{
45187ec681f3Smrg   return value * (1 << frac_bits);
451901e04c3fSmrg}
452001e04c3fSmrg
452101e04c3fSmrgstatic inline unsigned si_tex_filter(unsigned filter, unsigned max_aniso)
452201e04c3fSmrg{
45237ec681f3Smrg   if (filter == PIPE_TEX_FILTER_LINEAR)
45247ec681f3Smrg      return max_aniso > 1 ? V_008F38_SQ_TEX_XY_FILTER_ANISO_BILINEAR
45257ec681f3Smrg                           : V_008F38_SQ_TEX_XY_FILTER_BILINEAR;
45267ec681f3Smrg   else
45277ec681f3Smrg      return max_aniso > 1 ? V_008F38_SQ_TEX_XY_FILTER_ANISO_POINT
45287ec681f3Smrg                           : V_008F38_SQ_TEX_XY_FILTER_POINT;
452901e04c3fSmrg}
453001e04c3fSmrg
453101e04c3fSmrgstatic inline unsigned si_tex_aniso_filter(unsigned filter)
453201e04c3fSmrg{
45337ec681f3Smrg   if (filter < 2)
45347ec681f3Smrg      return 0;
45357ec681f3Smrg   if (filter < 4)
45367ec681f3Smrg      return 1;
45377ec681f3Smrg   if (filter < 8)
45387ec681f3Smrg      return 2;
45397ec681f3Smrg   if (filter < 16)
45407ec681f3Smrg      return 3;
45417ec681f3Smrg   return 4;
4542af69d88dSmrg}
4543af69d88dSmrg
4544af69d88dSmrgstatic void *si_create_sampler_state(struct pipe_context *ctx,
45457ec681f3Smrg                                     const struct pipe_sampler_state *state)
4546af69d88dSmrg{
45477ec681f3Smrg   struct si_context *sctx = (struct si_context *)ctx;
45487ec681f3Smrg   struct si_screen *sscreen = sctx->screen;
45497ec681f3Smrg   struct si_sampler_state *rstate = CALLOC_STRUCT(si_sampler_state);
45507ec681f3Smrg   unsigned max_aniso = sscreen->force_aniso >= 0 ? sscreen->force_aniso : state->max_anisotropy;
45517ec681f3Smrg   unsigned max_aniso_ratio = si_tex_aniso_filter(max_aniso);
45527ec681f3Smrg   bool trunc_coord = state->min_img_filter == PIPE_TEX_FILTER_NEAREST &&
45537ec681f3Smrg                      state->mag_img_filter == PIPE_TEX_FILTER_NEAREST &&
45547ec681f3Smrg                      state->compare_mode == PIPE_TEX_COMPARE_NONE;
45557ec681f3Smrg   union pipe_color_union clamped_border_color;
45567ec681f3Smrg
45577ec681f3Smrg   if (!rstate) {
45587ec681f3Smrg      return NULL;
45597ec681f3Smrg   }
45607ec681f3Smrg
45617ec681f3Smrg   /* Validate inputs. */
45627ec681f3Smrg   if (!is_wrap_mode_legal(sscreen, state->wrap_s) ||
45637ec681f3Smrg       !is_wrap_mode_legal(sscreen, state->wrap_t) ||
45647ec681f3Smrg       !is_wrap_mode_legal(sscreen, state->wrap_r) ||
45657ec681f3Smrg       (!sscreen->info.has_3d_cube_border_color_mipmap &&
45667ec681f3Smrg        (state->min_mip_filter != PIPE_TEX_MIPFILTER_NONE ||
45677ec681f3Smrg         state->max_anisotropy > 0))) {
45687ec681f3Smrg      assert(0);
45697ec681f3Smrg      return NULL;
45707ec681f3Smrg   }
45717ec681f3Smrg
45727ec681f3Smrg#ifndef NDEBUG
45737ec681f3Smrg   rstate->magic = SI_SAMPLER_STATE_MAGIC;
457401e04c3fSmrg#endif
45757ec681f3Smrg   rstate->val[0] =
45767ec681f3Smrg      (S_008F30_CLAMP_X(si_tex_wrap(state->wrap_s)) | S_008F30_CLAMP_Y(si_tex_wrap(state->wrap_t)) |
45777ec681f3Smrg       S_008F30_CLAMP_Z(si_tex_wrap(state->wrap_r)) | S_008F30_MAX_ANISO_RATIO(max_aniso_ratio) |
45787ec681f3Smrg       S_008F30_DEPTH_COMPARE_FUNC(si_tex_compare(state->compare_func)) |
45797ec681f3Smrg       S_008F30_FORCE_UNNORMALIZED(!state->normalized_coords) |
45807ec681f3Smrg       S_008F30_ANISO_THRESHOLD(max_aniso_ratio >> 1) | S_008F30_ANISO_BIAS(max_aniso_ratio) |
45817ec681f3Smrg       S_008F30_DISABLE_CUBE_WRAP(!state->seamless_cube_map) |
45827ec681f3Smrg       S_008F30_TRUNC_COORD(trunc_coord) |
45837ec681f3Smrg       S_008F30_COMPAT_MODE(sctx->chip_class == GFX8 || sctx->chip_class == GFX9));
45847ec681f3Smrg   rstate->val[1] = (S_008F34_MIN_LOD(S_FIXED(CLAMP(state->min_lod, 0, 15), 8)) |
45857ec681f3Smrg                     S_008F34_MAX_LOD(S_FIXED(CLAMP(state->max_lod, 0, 15), 8)) |
45867ec681f3Smrg                     S_008F34_PERF_MIP(max_aniso_ratio ? max_aniso_ratio + 6 : 0));
45877ec681f3Smrg   rstate->val[2] = (S_008F38_LOD_BIAS(S_FIXED(CLAMP(state->lod_bias, -16, 16), 8)) |
45887ec681f3Smrg                     S_008F38_XY_MAG_FILTER(si_tex_filter(state->mag_img_filter, max_aniso)) |
45897ec681f3Smrg                     S_008F38_XY_MIN_FILTER(si_tex_filter(state->min_img_filter, max_aniso)) |
45907ec681f3Smrg                     S_008F38_MIP_FILTER(si_tex_mipfilter(state->min_mip_filter)) |
45917ec681f3Smrg                     S_008F38_MIP_POINT_PRECLAMP(0));
45927ec681f3Smrg   rstate->val[3] = si_translate_border_color(sctx, state, &state->border_color,
45937ec681f3Smrg                                              state->border_color_is_integer);
45947ec681f3Smrg
45957ec681f3Smrg   if (sscreen->info.chip_class >= GFX10) {
45967ec681f3Smrg      rstate->val[2] |= S_008F38_ANISO_OVERRIDE_GFX10(1);
45977ec681f3Smrg   } else {
45987ec681f3Smrg      rstate->val[2] |= S_008F38_DISABLE_LSB_CEIL(sctx->chip_class <= GFX8) |
45997ec681f3Smrg                        S_008F38_FILTER_PREC_FIX(1) |
46007ec681f3Smrg                        S_008F38_ANISO_OVERRIDE_GFX8(sctx->chip_class >= GFX8);
46017ec681f3Smrg   }
46027ec681f3Smrg
46037ec681f3Smrg   /* Create sampler resource for upgraded depth textures. */
46047ec681f3Smrg   memcpy(rstate->upgraded_depth_val, rstate->val, sizeof(rstate->val));
46057ec681f3Smrg
46067ec681f3Smrg   for (unsigned i = 0; i < 4; ++i) {
46077ec681f3Smrg      /* Use channel 0 on purpose, so that we can use OPAQUE_WHITE
46087ec681f3Smrg       * when the border color is 1.0. */
46097ec681f3Smrg      clamped_border_color.f[i] = CLAMP(state->border_color.f[0], 0, 1);
46107ec681f3Smrg   }
46117ec681f3Smrg
46127ec681f3Smrg   if (memcmp(&state->border_color, &clamped_border_color, sizeof(clamped_border_color)) == 0) {
46137ec681f3Smrg      if (sscreen->info.chip_class <= GFX9)
46147ec681f3Smrg         rstate->upgraded_depth_val[3] |= S_008F3C_UPGRADED_DEPTH(1);
46157ec681f3Smrg   } else {
46167ec681f3Smrg      rstate->upgraded_depth_val[3] =
46177ec681f3Smrg         si_translate_border_color(sctx, state, &clamped_border_color, false);
46187ec681f3Smrg   }
46197ec681f3Smrg
46207ec681f3Smrg   return rstate;
4621af69d88dSmrg}
4622af69d88dSmrg
462301e04c3fSmrgstatic void si_set_sample_mask(struct pipe_context *ctx, unsigned sample_mask)
4624af69d88dSmrg{
46257ec681f3Smrg   struct si_context *sctx = (struct si_context *)ctx;
4626af69d88dSmrg
46277ec681f3Smrg   if (sctx->sample_mask == (uint16_t)sample_mask)
46287ec681f3Smrg      return;
4629af69d88dSmrg
46307ec681f3Smrg   sctx->sample_mask = sample_mask;
46317ec681f3Smrg   si_mark_atom_dirty(sctx, &sctx->atoms.s.sample_mask);
4632af69d88dSmrg}
4633af69d88dSmrg
463401e04c3fSmrgstatic void si_emit_sample_mask(struct si_context *sctx)
4635af69d88dSmrg{
46367ec681f3Smrg   struct radeon_cmdbuf *cs = &sctx->gfx_cs;
46377ec681f3Smrg   unsigned mask = sctx->sample_mask;
46387ec681f3Smrg
46397ec681f3Smrg   /* Needed for line and polygon smoothing as well as for the Polaris
46407ec681f3Smrg    * small primitive filter. We expect the gallium frontend to take care of
46417ec681f3Smrg    * this for us.
46427ec681f3Smrg    */
46437ec681f3Smrg   assert(mask == 0xffff || sctx->framebuffer.nr_samples > 1 ||
46447ec681f3Smrg          (mask & 1 && sctx->blitter_running));
46457ec681f3Smrg
46467ec681f3Smrg   radeon_begin(cs);
46477ec681f3Smrg   radeon_set_context_reg_seq(R_028C38_PA_SC_AA_MASK_X0Y0_X1Y0, 2);
46487ec681f3Smrg   radeon_emit(mask | (mask << 16));
46497ec681f3Smrg   radeon_emit(mask | (mask << 16));
46507ec681f3Smrg   radeon_end();
4651af69d88dSmrg}
4652af69d88dSmrg
4653af69d88dSmrgstatic void si_delete_sampler_state(struct pipe_context *ctx, void *state)
4654af69d88dSmrg{
46557ec681f3Smrg#ifndef NDEBUG
46567ec681f3Smrg   struct si_sampler_state *s = state;
465701e04c3fSmrg
46587ec681f3Smrg   assert(s->magic == SI_SAMPLER_STATE_MAGIC);
46597ec681f3Smrg   s->magic = 0;
466001e04c3fSmrg#endif
46617ec681f3Smrg   free(state);
4662af69d88dSmrg}
4663af69d88dSmrg
4664af69d88dSmrg/*
4665af69d88dSmrg * Vertex elements & buffers
4666af69d88dSmrg */
4667af69d88dSmrg
46687ec681f3Smrgstruct si_fast_udiv_info32 si_compute_fast_udiv_info32(uint32_t D, unsigned num_bits)
466901e04c3fSmrg{
46707ec681f3Smrg   struct util_fast_udiv_info info = util_compute_fast_udiv_info(D, num_bits, 32);
46717ec681f3Smrg
46727ec681f3Smrg   struct si_fast_udiv_info32 result = {
46737ec681f3Smrg      info.multiplier,
46747ec681f3Smrg      info.pre_shift,
46757ec681f3Smrg      info.post_shift,
46767ec681f3Smrg      info.increment,
46777ec681f3Smrg   };
46787ec681f3Smrg   return result;
467901e04c3fSmrg}
468001e04c3fSmrg
46817ec681f3Smrgstatic void *si_create_vertex_elements(struct pipe_context *ctx, unsigned count,
46827ec681f3Smrg                                       const struct pipe_vertex_element *elements)
4683af69d88dSmrg{
46847ec681f3Smrg   struct si_screen *sscreen = (struct si_screen *)ctx->screen;
46857ec681f3Smrg   struct si_vertex_elements *v = CALLOC_STRUCT(si_vertex_elements);
46867ec681f3Smrg   bool used[SI_NUM_VERTEX_BUFFERS] = {};
46877ec681f3Smrg   struct si_fast_udiv_info32 divisor_factors[SI_MAX_ATTRIBS] = {};
46887ec681f3Smrg   STATIC_ASSERT(sizeof(struct si_fast_udiv_info32) == 16);
46897ec681f3Smrg   STATIC_ASSERT(sizeof(divisor_factors[0].multiplier) == 4);
46907ec681f3Smrg   STATIC_ASSERT(sizeof(divisor_factors[0].pre_shift) == 4);
46917ec681f3Smrg   STATIC_ASSERT(sizeof(divisor_factors[0].post_shift) == 4);
46927ec681f3Smrg   STATIC_ASSERT(sizeof(divisor_factors[0].increment) == 4);
46937ec681f3Smrg   int i;
46947ec681f3Smrg
46957ec681f3Smrg   assert(count <= SI_MAX_ATTRIBS);
46967ec681f3Smrg   if (!v)
46977ec681f3Smrg      return NULL;
46987ec681f3Smrg
46997ec681f3Smrg   v->count = count;
47007ec681f3Smrg
47017ec681f3Smrg   unsigned num_vbos_in_user_sgprs = si_num_vbos_in_user_sgprs(sscreen);
47027ec681f3Smrg   unsigned alloc_count =
47037ec681f3Smrg      count > num_vbos_in_user_sgprs ? count - num_vbos_in_user_sgprs : 0;
47047ec681f3Smrg   v->vb_desc_list_alloc_size = align(alloc_count * 16, SI_CPDMA_ALIGNMENT);
47057ec681f3Smrg
47067ec681f3Smrg   for (i = 0; i < count; ++i) {
47077ec681f3Smrg      const struct util_format_description *desc;
47087ec681f3Smrg      const struct util_format_channel_description *channel;
47097ec681f3Smrg      int first_non_void;
47107ec681f3Smrg      unsigned vbo_index = elements[i].vertex_buffer_index;
47117ec681f3Smrg
47127ec681f3Smrg      if (vbo_index >= SI_NUM_VERTEX_BUFFERS) {
47137ec681f3Smrg         FREE(v);
47147ec681f3Smrg         return NULL;
47157ec681f3Smrg      }
47167ec681f3Smrg
47177ec681f3Smrg      unsigned instance_divisor = elements[i].instance_divisor;
47187ec681f3Smrg      if (instance_divisor) {
47197ec681f3Smrg         if (instance_divisor == 1) {
47207ec681f3Smrg            v->instance_divisor_is_one |= 1u << i;
47217ec681f3Smrg         } else {
47227ec681f3Smrg            v->instance_divisor_is_fetched |= 1u << i;
47237ec681f3Smrg            divisor_factors[i] = si_compute_fast_udiv_info32(instance_divisor, 32);
47247ec681f3Smrg         }
47257ec681f3Smrg      }
47267ec681f3Smrg
47277ec681f3Smrg      if (!used[vbo_index]) {
47287ec681f3Smrg         v->first_vb_use_mask |= 1 << i;
47297ec681f3Smrg         used[vbo_index] = true;
47307ec681f3Smrg      }
47317ec681f3Smrg
47327ec681f3Smrg      desc = util_format_description(elements[i].src_format);
47337ec681f3Smrg      first_non_void = util_format_get_first_non_void_channel(elements[i].src_format);
47347ec681f3Smrg      channel = first_non_void >= 0 ? &desc->channel[first_non_void] : NULL;
47357ec681f3Smrg
47367ec681f3Smrg      v->format_size[i] = desc->block.bits / 8;
47377ec681f3Smrg      v->src_offset[i] = elements[i].src_offset;
47387ec681f3Smrg      v->vertex_buffer_index[i] = vbo_index;
47397ec681f3Smrg
47407ec681f3Smrg      bool always_fix = false;
47417ec681f3Smrg      union si_vs_fix_fetch fix_fetch;
47427ec681f3Smrg      unsigned log_hw_load_size; /* the load element size as seen by the hardware */
47437ec681f3Smrg
47447ec681f3Smrg      fix_fetch.bits = 0;
47457ec681f3Smrg      log_hw_load_size = MIN2(2, util_logbase2(desc->block.bits) - 3);
47467ec681f3Smrg
47477ec681f3Smrg      if (channel) {
47487ec681f3Smrg         switch (channel->type) {
47497ec681f3Smrg         case UTIL_FORMAT_TYPE_FLOAT:
47507ec681f3Smrg            fix_fetch.u.format = AC_FETCH_FORMAT_FLOAT;
47517ec681f3Smrg            break;
47527ec681f3Smrg         case UTIL_FORMAT_TYPE_FIXED:
47537ec681f3Smrg            fix_fetch.u.format = AC_FETCH_FORMAT_FIXED;
47547ec681f3Smrg            break;
47557ec681f3Smrg         case UTIL_FORMAT_TYPE_SIGNED: {
47567ec681f3Smrg            if (channel->pure_integer)
47577ec681f3Smrg               fix_fetch.u.format = AC_FETCH_FORMAT_SINT;
47587ec681f3Smrg            else if (channel->normalized)
47597ec681f3Smrg               fix_fetch.u.format = AC_FETCH_FORMAT_SNORM;
47607ec681f3Smrg            else
47617ec681f3Smrg               fix_fetch.u.format = AC_FETCH_FORMAT_SSCALED;
47627ec681f3Smrg            break;
47637ec681f3Smrg         }
47647ec681f3Smrg         case UTIL_FORMAT_TYPE_UNSIGNED: {
47657ec681f3Smrg            if (channel->pure_integer)
47667ec681f3Smrg               fix_fetch.u.format = AC_FETCH_FORMAT_UINT;
47677ec681f3Smrg            else if (channel->normalized)
47687ec681f3Smrg               fix_fetch.u.format = AC_FETCH_FORMAT_UNORM;
47697ec681f3Smrg            else
47707ec681f3Smrg               fix_fetch.u.format = AC_FETCH_FORMAT_USCALED;
47717ec681f3Smrg            break;
47727ec681f3Smrg         }
47737ec681f3Smrg         default:
47747ec681f3Smrg            unreachable("bad format type");
47757ec681f3Smrg         }
47767ec681f3Smrg      } else {
47777ec681f3Smrg         switch (elements[i].src_format) {
47787ec681f3Smrg         case PIPE_FORMAT_R11G11B10_FLOAT:
47797ec681f3Smrg            fix_fetch.u.format = AC_FETCH_FORMAT_FLOAT;
47807ec681f3Smrg            break;
47817ec681f3Smrg         default:
47827ec681f3Smrg            unreachable("bad other format");
47837ec681f3Smrg         }
47847ec681f3Smrg      }
47857ec681f3Smrg
47867ec681f3Smrg      if (desc->channel[0].size == 10) {
47877ec681f3Smrg         fix_fetch.u.log_size = 3; /* special encoding for 2_10_10_10 */
47887ec681f3Smrg         log_hw_load_size = 2;
47897ec681f3Smrg
47907ec681f3Smrg         /* The hardware always treats the 2-bit alpha channel as
47917ec681f3Smrg          * unsigned, so a shader workaround is needed. The affected
47927ec681f3Smrg          * chips are GFX8 and older except Stoney (GFX8.1).
47937ec681f3Smrg          */
47947ec681f3Smrg         always_fix = sscreen->info.chip_class <= GFX8 && sscreen->info.family != CHIP_STONEY &&
47957ec681f3Smrg                      channel->type == UTIL_FORMAT_TYPE_SIGNED;
47967ec681f3Smrg      } else if (elements[i].src_format == PIPE_FORMAT_R11G11B10_FLOAT) {
47977ec681f3Smrg         fix_fetch.u.log_size = 3; /* special encoding */
47987ec681f3Smrg         fix_fetch.u.format = AC_FETCH_FORMAT_FIXED;
47997ec681f3Smrg         log_hw_load_size = 2;
48007ec681f3Smrg      } else {
48017ec681f3Smrg         fix_fetch.u.log_size = util_logbase2(channel->size) - 3;
48027ec681f3Smrg         fix_fetch.u.num_channels_m1 = desc->nr_channels - 1;
48037ec681f3Smrg
48047ec681f3Smrg         /* Always fix up:
48057ec681f3Smrg          * - doubles (multiple loads + truncate to float)
48067ec681f3Smrg          * - 32-bit requiring a conversion
48077ec681f3Smrg          */
48087ec681f3Smrg         always_fix = (fix_fetch.u.log_size == 3) ||
48097ec681f3Smrg                      (fix_fetch.u.log_size == 2 && fix_fetch.u.format != AC_FETCH_FORMAT_FLOAT &&
48107ec681f3Smrg                       fix_fetch.u.format != AC_FETCH_FORMAT_UINT &&
48117ec681f3Smrg                       fix_fetch.u.format != AC_FETCH_FORMAT_SINT);
48127ec681f3Smrg
48137ec681f3Smrg         /* Also fixup 8_8_8 and 16_16_16. */
48147ec681f3Smrg         if (desc->nr_channels == 3 && fix_fetch.u.log_size <= 1) {
48157ec681f3Smrg            always_fix = true;
48167ec681f3Smrg            log_hw_load_size = fix_fetch.u.log_size;
48177ec681f3Smrg         }
48187ec681f3Smrg      }
48197ec681f3Smrg
48207ec681f3Smrg      if (desc->swizzle[0] != PIPE_SWIZZLE_X) {
48217ec681f3Smrg         assert(desc->swizzle[0] == PIPE_SWIZZLE_Z &&
48227ec681f3Smrg                (desc->swizzle[2] == PIPE_SWIZZLE_X || desc->swizzle[2] == PIPE_SWIZZLE_0));
48237ec681f3Smrg         fix_fetch.u.reverse = 1;
48247ec681f3Smrg      }
48257ec681f3Smrg
48267ec681f3Smrg      /* Force the workaround for unaligned access here already if the
48277ec681f3Smrg       * offset relative to the vertex buffer base is unaligned.
48287ec681f3Smrg       *
48297ec681f3Smrg       * There is a theoretical case in which this is too conservative:
48307ec681f3Smrg       * if the vertex buffer's offset is also unaligned in just the
48317ec681f3Smrg       * right way, we end up with an aligned address after all.
48327ec681f3Smrg       * However, this case should be extremely rare in practice (it
48337ec681f3Smrg       * won't happen in well-behaved applications), and taking it
48347ec681f3Smrg       * into account would complicate the fast path (where everything
48357ec681f3Smrg       * is nicely aligned).
48367ec681f3Smrg       */
48377ec681f3Smrg      bool check_alignment =
48387ec681f3Smrg            log_hw_load_size >= 1 &&
48397ec681f3Smrg            (sscreen->info.chip_class == GFX6 || sscreen->info.chip_class >= GFX10);
48407ec681f3Smrg      bool opencode = sscreen->options.vs_fetch_always_opencode;
48417ec681f3Smrg
48427ec681f3Smrg      if (check_alignment && (elements[i].src_offset & ((1 << log_hw_load_size) - 1)) != 0)
48437ec681f3Smrg         opencode = true;
48447ec681f3Smrg
48457ec681f3Smrg      if (always_fix || check_alignment || opencode)
48467ec681f3Smrg         v->fix_fetch[i] = fix_fetch.bits;
48477ec681f3Smrg
48487ec681f3Smrg      if (opencode)
48497ec681f3Smrg         v->fix_fetch_opencode |= 1 << i;
48507ec681f3Smrg      if (opencode || always_fix)
48517ec681f3Smrg         v->fix_fetch_always |= 1 << i;
48527ec681f3Smrg
48537ec681f3Smrg      if (check_alignment && !opencode) {
48547ec681f3Smrg         assert(log_hw_load_size == 1 || log_hw_load_size == 2);
48557ec681f3Smrg
48567ec681f3Smrg         v->fix_fetch_unaligned |= 1 << i;
48577ec681f3Smrg         v->hw_load_is_dword |= (log_hw_load_size - 1) << i;
48587ec681f3Smrg         v->vb_alignment_check_mask |= 1 << vbo_index;
48597ec681f3Smrg      }
48607ec681f3Smrg
48617ec681f3Smrg      v->rsrc_word3[i] = S_008F0C_DST_SEL_X(si_map_swizzle(desc->swizzle[0])) |
48627ec681f3Smrg                         S_008F0C_DST_SEL_Y(si_map_swizzle(desc->swizzle[1])) |
48637ec681f3Smrg                         S_008F0C_DST_SEL_Z(si_map_swizzle(desc->swizzle[2])) |
48647ec681f3Smrg                         S_008F0C_DST_SEL_W(si_map_swizzle(desc->swizzle[3]));
48657ec681f3Smrg
48667ec681f3Smrg      if (sscreen->info.chip_class >= GFX10) {
48677ec681f3Smrg         const struct gfx10_format *fmt = &gfx10_format_table[elements[i].src_format];
48687ec681f3Smrg         assert(fmt->img_format != 0 && fmt->img_format < 128);
48697ec681f3Smrg         v->rsrc_word3[i] |= S_008F0C_FORMAT(fmt->img_format) | S_008F0C_RESOURCE_LEVEL(1);
48707ec681f3Smrg      } else {
48717ec681f3Smrg         unsigned data_format, num_format;
48727ec681f3Smrg         data_format = si_translate_buffer_dataformat(ctx->screen, desc, first_non_void);
48737ec681f3Smrg         num_format = si_translate_buffer_numformat(ctx->screen, desc, first_non_void);
48747ec681f3Smrg         v->rsrc_word3[i] |= S_008F0C_NUM_FORMAT(num_format) | S_008F0C_DATA_FORMAT(data_format);
48757ec681f3Smrg      }
48767ec681f3Smrg   }
48777ec681f3Smrg
48787ec681f3Smrg   if (v->instance_divisor_is_fetched) {
48797ec681f3Smrg      unsigned num_divisors = util_last_bit(v->instance_divisor_is_fetched);
48807ec681f3Smrg
48817ec681f3Smrg      v->instance_divisor_factor_buffer = (struct si_resource *)pipe_buffer_create(
48827ec681f3Smrg         &sscreen->b, 0, PIPE_USAGE_DEFAULT, num_divisors * sizeof(divisor_factors[0]));
48837ec681f3Smrg      if (!v->instance_divisor_factor_buffer) {
48847ec681f3Smrg         FREE(v);
48857ec681f3Smrg         return NULL;
48867ec681f3Smrg      }
48877ec681f3Smrg      void *map =
48887ec681f3Smrg         sscreen->ws->buffer_map(sscreen->ws, v->instance_divisor_factor_buffer->buf, NULL, PIPE_MAP_WRITE);
48897ec681f3Smrg      memcpy(map, divisor_factors, num_divisors * sizeof(divisor_factors[0]));
48907ec681f3Smrg   }
48917ec681f3Smrg   return v;
4892af69d88dSmrg}
4893af69d88dSmrg
4894af69d88dSmrgstatic void si_bind_vertex_elements(struct pipe_context *ctx, void *state)
4895af69d88dSmrg{
48967ec681f3Smrg   struct si_context *sctx = (struct si_context *)ctx;
48977ec681f3Smrg   struct si_vertex_elements *old = sctx->vertex_elements;
48987ec681f3Smrg   struct si_vertex_elements *v = (struct si_vertex_elements *)state;
48997ec681f3Smrg
49007ec681f3Smrg   if (!v)
49017ec681f3Smrg      v = sctx->no_velems_state;
49027ec681f3Smrg
49037ec681f3Smrg   sctx->vertex_elements = v;
49047ec681f3Smrg   sctx->num_vertex_elements = v->count;
49057ec681f3Smrg
49067ec681f3Smrg   if (sctx->num_vertex_elements) {
49077ec681f3Smrg      sctx->vertex_buffers_dirty = true;
49087ec681f3Smrg   } else {
49097ec681f3Smrg      sctx->vertex_buffers_dirty = false;
49107ec681f3Smrg      sctx->vertex_buffer_pointer_dirty = false;
49117ec681f3Smrg      sctx->vertex_buffer_user_sgprs_dirty = false;
49127ec681f3Smrg   }
49137ec681f3Smrg
49147ec681f3Smrg   if (old->instance_divisor_is_one != v->instance_divisor_is_one ||
49157ec681f3Smrg       old->instance_divisor_is_fetched != v->instance_divisor_is_fetched ||
49167ec681f3Smrg       (old->vb_alignment_check_mask ^ v->vb_alignment_check_mask) &
49177ec681f3Smrg       sctx->vertex_buffer_unaligned ||
49187ec681f3Smrg       ((v->vb_alignment_check_mask & sctx->vertex_buffer_unaligned) &&
49197ec681f3Smrg        memcmp(old->vertex_buffer_index, v->vertex_buffer_index,
49207ec681f3Smrg               sizeof(v->vertex_buffer_index[0]) * MAX2(old->count, v->count))) ||
49217ec681f3Smrg       /* fix_fetch_{always,opencode,unaligned} and hw_load_is_dword are
49227ec681f3Smrg        * functions of fix_fetch and the src_offset alignment.
49237ec681f3Smrg        * If they change and fix_fetch doesn't, it must be due to different
49247ec681f3Smrg        * src_offset alignment, which is reflected in fix_fetch_opencode. */
49257ec681f3Smrg       old->fix_fetch_opencode != v->fix_fetch_opencode ||
49267ec681f3Smrg       memcmp(old->fix_fetch, v->fix_fetch, sizeof(v->fix_fetch[0]) *
49277ec681f3Smrg              MAX2(old->count, v->count))) {
49287ec681f3Smrg      si_vs_key_update_inputs(sctx);
49297ec681f3Smrg      sctx->do_update_shaders = true;
49307ec681f3Smrg   }
49317ec681f3Smrg
49327ec681f3Smrg   if (v->instance_divisor_is_fetched) {
49337ec681f3Smrg      struct pipe_constant_buffer cb;
49347ec681f3Smrg
49357ec681f3Smrg      cb.buffer = &v->instance_divisor_factor_buffer->b.b;
49367ec681f3Smrg      cb.user_buffer = NULL;
49377ec681f3Smrg      cb.buffer_offset = 0;
49387ec681f3Smrg      cb.buffer_size = 0xffffffff;
49397ec681f3Smrg      si_set_internal_const_buffer(sctx, SI_VS_CONST_INSTANCE_DIVISORS, &cb);
49407ec681f3Smrg   }
4941af69d88dSmrg}
4942af69d88dSmrg
4943af69d88dSmrgstatic void si_delete_vertex_element(struct pipe_context *ctx, void *state)
4944af69d88dSmrg{
49457ec681f3Smrg   struct si_context *sctx = (struct si_context *)ctx;
49467ec681f3Smrg   struct si_vertex_elements *v = (struct si_vertex_elements *)state;
49477ec681f3Smrg
49487ec681f3Smrg   if (sctx->vertex_elements == state)
49497ec681f3Smrg      si_bind_vertex_elements(ctx, sctx->no_velems_state);
49507ec681f3Smrg
49517ec681f3Smrg   si_resource_reference(&v->instance_divisor_factor_buffer, NULL);
49527ec681f3Smrg   FREE(state);
49537ec681f3Smrg}
49547ec681f3Smrg
49557ec681f3Smrgstatic void si_set_vertex_buffers(struct pipe_context *ctx, unsigned start_slot, unsigned count,
49567ec681f3Smrg                                  unsigned unbind_num_trailing_slots, bool take_ownership,
49577ec681f3Smrg                                  const struct pipe_vertex_buffer *buffers)
49587ec681f3Smrg{
49597ec681f3Smrg   struct si_context *sctx = (struct si_context *)ctx;
49607ec681f3Smrg   struct pipe_vertex_buffer *dst = sctx->vertex_buffer + start_slot;
49617ec681f3Smrg   unsigned updated_mask = u_bit_consecutive(start_slot, count + unbind_num_trailing_slots);
49627ec681f3Smrg   uint32_t orig_unaligned = sctx->vertex_buffer_unaligned;
49637ec681f3Smrg   uint32_t unaligned = 0;
49647ec681f3Smrg   int i;
49657ec681f3Smrg
49667ec681f3Smrg   assert(start_slot + count + unbind_num_trailing_slots <= ARRAY_SIZE(sctx->vertex_buffer));
49677ec681f3Smrg
49687ec681f3Smrg   if (buffers) {
49697ec681f3Smrg      if (take_ownership) {
49707ec681f3Smrg         for (i = 0; i < count; i++) {
49717ec681f3Smrg            const struct pipe_vertex_buffer *src = buffers + i;
49727ec681f3Smrg            struct pipe_vertex_buffer *dsti = dst + i;
49737ec681f3Smrg            struct pipe_resource *buf = src->buffer.resource;
49747ec681f3Smrg            unsigned slot_bit = 1 << (start_slot + i);
49757ec681f3Smrg
49767ec681f3Smrg            /* Only unreference bound vertex buffers. (take_ownership) */
49777ec681f3Smrg            pipe_resource_reference(&dsti->buffer.resource, NULL);
49787ec681f3Smrg
49797ec681f3Smrg            if (src->buffer_offset & 3 || src->stride & 3)
49807ec681f3Smrg               unaligned |= slot_bit;
49817ec681f3Smrg
49827ec681f3Smrg            si_context_add_resource_size(sctx, buf);
49837ec681f3Smrg            if (buf)
49847ec681f3Smrg               si_resource(buf)->bind_history |= PIPE_BIND_VERTEX_BUFFER;
49857ec681f3Smrg         }
49867ec681f3Smrg         /* take_ownership allows us to copy pipe_resource pointers without refcounting. */
49877ec681f3Smrg         memcpy(dst, buffers, count * sizeof(struct pipe_vertex_buffer));
49887ec681f3Smrg      } else {
49897ec681f3Smrg         for (i = 0; i < count; i++) {
49907ec681f3Smrg            const struct pipe_vertex_buffer *src = buffers + i;
49917ec681f3Smrg            struct pipe_vertex_buffer *dsti = dst + i;
49927ec681f3Smrg            struct pipe_resource *buf = src->buffer.resource;
49937ec681f3Smrg            unsigned slot_bit = 1 << (start_slot + i);
49947ec681f3Smrg
49957ec681f3Smrg            pipe_resource_reference(&dsti->buffer.resource, buf);
49967ec681f3Smrg            dsti->buffer_offset = src->buffer_offset;
49977ec681f3Smrg            dsti->stride = src->stride;
49987ec681f3Smrg
49997ec681f3Smrg            if (dsti->buffer_offset & 3 || dsti->stride & 3)
50007ec681f3Smrg               unaligned |= slot_bit;
50017ec681f3Smrg
50027ec681f3Smrg            si_context_add_resource_size(sctx, buf);
50037ec681f3Smrg            if (buf)
50047ec681f3Smrg               si_resource(buf)->bind_history |= PIPE_BIND_VERTEX_BUFFER;
50057ec681f3Smrg         }
50067ec681f3Smrg      }
50077ec681f3Smrg   } else {
50087ec681f3Smrg      for (i = 0; i < count; i++)
50097ec681f3Smrg         pipe_resource_reference(&dst[i].buffer.resource, NULL);
50107ec681f3Smrg   }
50117ec681f3Smrg
50127ec681f3Smrg   for (i = 0; i < unbind_num_trailing_slots; i++)
50137ec681f3Smrg      pipe_resource_reference(&dst[count + i].buffer.resource, NULL);
50147ec681f3Smrg
50157ec681f3Smrg   sctx->vertex_buffers_dirty = sctx->num_vertex_elements > 0;
50167ec681f3Smrg   sctx->vertex_buffer_unaligned = (orig_unaligned & ~updated_mask) | unaligned;
50177ec681f3Smrg
50187ec681f3Smrg   /* Check whether alignment may have changed in a way that requires
50197ec681f3Smrg    * shader changes. This check is conservative: a vertex buffer can only
50207ec681f3Smrg    * trigger a shader change if the misalignment amount changes (e.g.
50217ec681f3Smrg    * from byte-aligned to short-aligned), but we only keep track of
50227ec681f3Smrg    * whether buffers are at least dword-aligned, since that should always
50237ec681f3Smrg    * be the case in well-behaved applications anyway.
50247ec681f3Smrg    */
50257ec681f3Smrg   if ((sctx->vertex_elements->vb_alignment_check_mask &
50267ec681f3Smrg        (unaligned | orig_unaligned) & updated_mask)) {
50277ec681f3Smrg      si_vs_key_update_inputs(sctx);
50287ec681f3Smrg      sctx->do_update_shaders = true;
50297ec681f3Smrg   }
50307ec681f3Smrg}
5031af69d88dSmrg
50327ec681f3Smrgstatic struct pipe_vertex_state *
50337ec681f3Smrgsi_create_vertex_state(struct pipe_screen *screen,
50347ec681f3Smrg                       struct pipe_vertex_buffer *buffer,
50357ec681f3Smrg                       const struct pipe_vertex_element *elements,
50367ec681f3Smrg                       unsigned num_elements,
50377ec681f3Smrg                       struct pipe_resource *indexbuf,
50387ec681f3Smrg                       uint32_t full_velem_mask)
50397ec681f3Smrg{
50407ec681f3Smrg   struct si_screen *sscreen = (struct si_screen *)screen;
50417ec681f3Smrg   struct si_vertex_state *state = CALLOC_STRUCT(si_vertex_state);
50427ec681f3Smrg
50437ec681f3Smrg   util_init_pipe_vertex_state(screen, buffer, elements, num_elements, indexbuf, full_velem_mask,
50447ec681f3Smrg                               &state->b);
50457ec681f3Smrg
50467ec681f3Smrg   /* Initialize the vertex element state in state->element.
50477ec681f3Smrg    * Do it by creating a vertex element state object and copying it there.
50487ec681f3Smrg    */
50497ec681f3Smrg   struct si_context ctx = {};
50507ec681f3Smrg   ctx.b.screen = screen;
50517ec681f3Smrg   struct si_vertex_elements *velems = si_create_vertex_elements(&ctx.b, num_elements, elements);
50527ec681f3Smrg   state->velems = *velems;
50537ec681f3Smrg   si_delete_vertex_element(&ctx.b, velems);
50547ec681f3Smrg
50557ec681f3Smrg   assert(!state->velems.instance_divisor_is_one);
50567ec681f3Smrg   assert(!state->velems.instance_divisor_is_fetched);
50577ec681f3Smrg   assert(!state->velems.fix_fetch_always);
50587ec681f3Smrg   assert(buffer->stride % 4 == 0);
50597ec681f3Smrg   assert(buffer->buffer_offset % 4 == 0);
50607ec681f3Smrg   assert(!buffer->is_user_buffer);
50617ec681f3Smrg   for (unsigned i = 0; i < num_elements; i++) {
50627ec681f3Smrg      assert(elements[i].src_offset % 4 == 0);
50637ec681f3Smrg      assert(!elements[i].dual_slot);
50647ec681f3Smrg   }
50657ec681f3Smrg
50667ec681f3Smrg   for (unsigned i = 0; i < num_elements; i++) {
50677ec681f3Smrg      si_set_vertex_buffer_descriptor(sscreen, &state->velems, &state->b.input.vbuffer, i,
50687ec681f3Smrg                                      &state->descriptors[i * 4]);
50697ec681f3Smrg   }
50707ec681f3Smrg
50717ec681f3Smrg   return &state->b;
5072af69d88dSmrg}
5073af69d88dSmrg
50747ec681f3Smrgstatic void si_vertex_state_destroy(struct pipe_screen *screen,
50757ec681f3Smrg                                    struct pipe_vertex_state *state)
5076af69d88dSmrg{
50777ec681f3Smrg   pipe_vertex_buffer_unreference(&state->input.vbuffer);
50787ec681f3Smrg   pipe_resource_reference(&state->input.indexbuf, NULL);
50797ec681f3Smrg   FREE(state);
50807ec681f3Smrg}
50817ec681f3Smrg
50827ec681f3Smrgstatic struct pipe_vertex_state *
50837ec681f3Smrgsi_pipe_create_vertex_state(struct pipe_screen *screen,
50847ec681f3Smrg                            struct pipe_vertex_buffer *buffer,
50857ec681f3Smrg                            const struct pipe_vertex_element *elements,
50867ec681f3Smrg                            unsigned num_elements,
50877ec681f3Smrg                            struct pipe_resource *indexbuf,
50887ec681f3Smrg                            uint32_t full_velem_mask)
50897ec681f3Smrg{
50907ec681f3Smrg   struct si_screen *sscreen = (struct si_screen *)screen;
50917ec681f3Smrg
50927ec681f3Smrg   return util_vertex_state_cache_get(screen, buffer, elements, num_elements, indexbuf,
50937ec681f3Smrg                                      full_velem_mask, &sscreen->vertex_state_cache);
50947ec681f3Smrg}
50957ec681f3Smrg
50967ec681f3Smrgstatic void si_pipe_vertex_state_destroy(struct pipe_screen *screen,
50977ec681f3Smrg                                         struct pipe_vertex_state *state)
50987ec681f3Smrg{
50997ec681f3Smrg   struct si_screen *sscreen = (struct si_screen *)screen;
51007ec681f3Smrg
51017ec681f3Smrg   util_vertex_state_destroy(screen, &sscreen->vertex_state_cache, state);
5102af69d88dSmrg}
5103af69d88dSmrg
510401e04c3fSmrg/*
510501e04c3fSmrg * Misc
510601e04c3fSmrg */
510701e04c3fSmrg
51087ec681f3Smrgstatic void si_set_tess_state(struct pipe_context *ctx, const float default_outer_level[4],
51097ec681f3Smrg                              const float default_inner_level[2])
5110af69d88dSmrg{
51117ec681f3Smrg   struct si_context *sctx = (struct si_context *)ctx;
51127ec681f3Smrg   struct pipe_constant_buffer cb;
51137ec681f3Smrg   float array[8];
5114af69d88dSmrg
51157ec681f3Smrg   memcpy(array, default_outer_level, sizeof(float) * 4);
51167ec681f3Smrg   memcpy(array + 4, default_inner_level, sizeof(float) * 2);
511701e04c3fSmrg
51187ec681f3Smrg   cb.buffer = NULL;
51197ec681f3Smrg   cb.user_buffer = array;
51207ec681f3Smrg   cb.buffer_offset = 0;
51217ec681f3Smrg   cb.buffer_size = sizeof(array);
51227ec681f3Smrg
51237ec681f3Smrg   si_set_internal_const_buffer(sctx, SI_HS_CONST_DEFAULT_TESS_LEVELS, &cb);
51247ec681f3Smrg}
512501e04c3fSmrg
51267ec681f3Smrgstatic void si_set_patch_vertices(struct pipe_context *ctx, uint8_t patch_vertices)
51277ec681f3Smrg{
51287ec681f3Smrg   struct si_context *sctx = (struct si_context *)ctx;
512901e04c3fSmrg
51307ec681f3Smrg   sctx->patch_vertices = patch_vertices;
5131af69d88dSmrg}
5132af69d88dSmrg
513301e04c3fSmrgstatic void si_texture_barrier(struct pipe_context *ctx, unsigned flags)
5134af69d88dSmrg{
51357ec681f3Smrg   struct si_context *sctx = (struct si_context *)ctx;
513601e04c3fSmrg
51377ec681f3Smrg   si_update_fb_dirtiness_after_rendering(sctx);
513801e04c3fSmrg
51397ec681f3Smrg   /* Multisample surfaces are flushed in si_decompress_textures. */
51407ec681f3Smrg   if (sctx->framebuffer.uncompressed_cb_mask) {
51417ec681f3Smrg      si_make_CB_shader_coherent(sctx, sctx->framebuffer.nr_samples,
51427ec681f3Smrg                                 sctx->framebuffer.CB_has_shader_readable_metadata,
51437ec681f3Smrg                                 sctx->framebuffer.all_DCC_pipe_aligned);
51447ec681f3Smrg   }
5145af69d88dSmrg}
5146af69d88dSmrg
514701e04c3fSmrg/* This only ensures coherency for shader image/buffer stores. */
514801e04c3fSmrgstatic void si_memory_barrier(struct pipe_context *ctx, unsigned flags)
5149af69d88dSmrg{
51507ec681f3Smrg   struct si_context *sctx = (struct si_context *)ctx;
51517ec681f3Smrg
51527ec681f3Smrg   if (!(flags & ~PIPE_BARRIER_UPDATE))
51537ec681f3Smrg      return;
51547ec681f3Smrg
51557ec681f3Smrg   /* Subsequent commands must wait for all shader invocations to
51567ec681f3Smrg    * complete. */
51577ec681f3Smrg   sctx->flags |= SI_CONTEXT_PS_PARTIAL_FLUSH | SI_CONTEXT_CS_PARTIAL_FLUSH |
51587ec681f3Smrg                  SI_CONTEXT_PFP_SYNC_ME;
51597ec681f3Smrg
51607ec681f3Smrg   if (flags & PIPE_BARRIER_CONSTANT_BUFFER)
51617ec681f3Smrg      sctx->flags |= SI_CONTEXT_INV_SCACHE | SI_CONTEXT_INV_VCACHE;
51627ec681f3Smrg
51637ec681f3Smrg   if (flags & (PIPE_BARRIER_VERTEX_BUFFER | PIPE_BARRIER_SHADER_BUFFER | PIPE_BARRIER_TEXTURE |
51647ec681f3Smrg                PIPE_BARRIER_IMAGE | PIPE_BARRIER_STREAMOUT_BUFFER | PIPE_BARRIER_GLOBAL_BUFFER)) {
51657ec681f3Smrg      /* As far as I can tell, L1 contents are written back to L2
51667ec681f3Smrg       * automatically at end of shader, but the contents of other
51677ec681f3Smrg       * L1 caches might still be stale. */
51687ec681f3Smrg      sctx->flags |= SI_CONTEXT_INV_VCACHE;
51697ec681f3Smrg   }
51707ec681f3Smrg
51717ec681f3Smrg   if (flags & PIPE_BARRIER_INDEX_BUFFER) {
51727ec681f3Smrg      /* Indices are read through TC L2 since GFX8.
51737ec681f3Smrg       * L1 isn't used.
51747ec681f3Smrg       */
51757ec681f3Smrg      if (sctx->screen->info.chip_class <= GFX7)
51767ec681f3Smrg         sctx->flags |= SI_CONTEXT_WB_L2;
51777ec681f3Smrg   }
51787ec681f3Smrg
51797ec681f3Smrg   /* MSAA color, any depth and any stencil are flushed in
51807ec681f3Smrg    * si_decompress_textures when needed.
51817ec681f3Smrg    */
51827ec681f3Smrg   if (flags & PIPE_BARRIER_FRAMEBUFFER && sctx->framebuffer.uncompressed_cb_mask) {
51837ec681f3Smrg      sctx->flags |= SI_CONTEXT_FLUSH_AND_INV_CB;
51847ec681f3Smrg
51857ec681f3Smrg      if (sctx->chip_class <= GFX8)
51867ec681f3Smrg         sctx->flags |= SI_CONTEXT_WB_L2;
51877ec681f3Smrg   }
51887ec681f3Smrg
51897ec681f3Smrg   /* Indirect buffers use TC L2 on GFX9, but not older hw. */
51907ec681f3Smrg   if (sctx->screen->info.chip_class <= GFX8 && flags & PIPE_BARRIER_INDIRECT_BUFFER)
51917ec681f3Smrg      sctx->flags |= SI_CONTEXT_WB_L2;
5192af69d88dSmrg}
5193af69d88dSmrg
5194af69d88dSmrgstatic void *si_create_blend_custom(struct si_context *sctx, unsigned mode)
5195af69d88dSmrg{
51967ec681f3Smrg   struct pipe_blend_state blend;
5197af69d88dSmrg
51987ec681f3Smrg   memset(&blend, 0, sizeof(blend));
51997ec681f3Smrg   blend.independent_blend_enable = true;
52007ec681f3Smrg   blend.rt[0].colormask = 0xf;
52017ec681f3Smrg   return si_create_blend_state_mode(&sctx->b, &blend, mode);
5202af69d88dSmrg}
5203af69d88dSmrg
52049f464c52Smayavoid si_init_state_compute_functions(struct si_context *sctx)
52059f464c52Smaya{
52067ec681f3Smrg   sctx->b.create_sampler_state = si_create_sampler_state;
52077ec681f3Smrg   sctx->b.delete_sampler_state = si_delete_sampler_state;
52087ec681f3Smrg   sctx->b.create_sampler_view = si_create_sampler_view;
52097ec681f3Smrg   sctx->b.sampler_view_destroy = si_sampler_view_destroy;
52107ec681f3Smrg   sctx->b.memory_barrier = si_memory_barrier;
52119f464c52Smaya}
52129f464c52Smaya
5213af69d88dSmrgvoid si_init_state_functions(struct si_context *sctx)
5214af69d88dSmrg{
52157ec681f3Smrg   sctx->atoms.s.framebuffer.emit = si_emit_framebuffer_state;
52167ec681f3Smrg   sctx->atoms.s.msaa_sample_locs.emit = si_emit_msaa_sample_locs;
52177ec681f3Smrg   sctx->atoms.s.db_render_state.emit = si_emit_db_render_state;
52187ec681f3Smrg   sctx->atoms.s.dpbb_state.emit = si_emit_dpbb_state;
52197ec681f3Smrg   sctx->atoms.s.msaa_config.emit = si_emit_msaa_config;
52207ec681f3Smrg   sctx->atoms.s.sample_mask.emit = si_emit_sample_mask;
52217ec681f3Smrg   sctx->atoms.s.cb_render_state.emit = si_emit_cb_render_state;
52227ec681f3Smrg   sctx->atoms.s.blend_color.emit = si_emit_blend_color;
52237ec681f3Smrg   sctx->atoms.s.clip_regs.emit = si_emit_clip_regs;
52247ec681f3Smrg   sctx->atoms.s.clip_state.emit = si_emit_clip_state;
52257ec681f3Smrg   sctx->atoms.s.stencil_ref.emit = si_emit_stencil_ref;
52267ec681f3Smrg
52277ec681f3Smrg   sctx->b.create_blend_state = si_create_blend_state;
52287ec681f3Smrg   sctx->b.bind_blend_state = si_bind_blend_state;
52297ec681f3Smrg   sctx->b.delete_blend_state = si_delete_blend_state;
52307ec681f3Smrg   sctx->b.set_blend_color = si_set_blend_color;
52317ec681f3Smrg
52327ec681f3Smrg   sctx->b.create_rasterizer_state = si_create_rs_state;
52337ec681f3Smrg   sctx->b.bind_rasterizer_state = si_bind_rs_state;
52347ec681f3Smrg   sctx->b.delete_rasterizer_state = si_delete_rs_state;
52357ec681f3Smrg
52367ec681f3Smrg   sctx->b.create_depth_stencil_alpha_state = si_create_dsa_state;
52377ec681f3Smrg   sctx->b.bind_depth_stencil_alpha_state = si_bind_dsa_state;
52387ec681f3Smrg   sctx->b.delete_depth_stencil_alpha_state = si_delete_dsa_state;
52397ec681f3Smrg
52407ec681f3Smrg   sctx->custom_dsa_flush = si_create_db_flush_dsa(sctx);
52417ec681f3Smrg   sctx->custom_blend_resolve = si_create_blend_custom(sctx, V_028808_CB_RESOLVE);
52427ec681f3Smrg   sctx->custom_blend_fmask_decompress = si_create_blend_custom(sctx, V_028808_CB_FMASK_DECOMPRESS);
52437ec681f3Smrg   sctx->custom_blend_eliminate_fastclear =
52447ec681f3Smrg      si_create_blend_custom(sctx, V_028808_CB_ELIMINATE_FAST_CLEAR);
52457ec681f3Smrg   sctx->custom_blend_dcc_decompress = si_create_blend_custom(sctx, V_028808_CB_DCC_DECOMPRESS);
52467ec681f3Smrg
52477ec681f3Smrg   sctx->b.set_clip_state = si_set_clip_state;
52487ec681f3Smrg   sctx->b.set_stencil_ref = si_set_stencil_ref;
52497ec681f3Smrg
52507ec681f3Smrg   sctx->b.set_framebuffer_state = si_set_framebuffer_state;
52517ec681f3Smrg
52527ec681f3Smrg   sctx->b.set_sample_mask = si_set_sample_mask;
52537ec681f3Smrg
52547ec681f3Smrg   sctx->b.create_vertex_elements_state = si_create_vertex_elements;
52557ec681f3Smrg   sctx->b.bind_vertex_elements_state = si_bind_vertex_elements;
52567ec681f3Smrg   sctx->b.delete_vertex_elements_state = si_delete_vertex_element;
52577ec681f3Smrg   sctx->b.set_vertex_buffers = si_set_vertex_buffers;
52587ec681f3Smrg
52597ec681f3Smrg   sctx->b.texture_barrier = si_texture_barrier;
52607ec681f3Smrg   sctx->b.set_min_samples = si_set_min_samples;
52617ec681f3Smrg   sctx->b.set_tess_state = si_set_tess_state;
52627ec681f3Smrg   sctx->b.set_patch_vertices = si_set_patch_vertices;
52637ec681f3Smrg
52647ec681f3Smrg   sctx->b.set_active_query_state = si_set_active_query_state;
526501e04c3fSmrg}
5266af69d88dSmrg
526701e04c3fSmrgvoid si_init_screen_state_functions(struct si_screen *sscreen)
526801e04c3fSmrg{
52697ec681f3Smrg   sscreen->b.is_format_supported = si_is_format_supported;
52707ec681f3Smrg   sscreen->b.create_vertex_state = si_pipe_create_vertex_state;
52717ec681f3Smrg   sscreen->b.vertex_state_destroy = si_pipe_vertex_state_destroy;
52727ec681f3Smrg
52737ec681f3Smrg   if (sscreen->info.chip_class >= GFX10) {
52747ec681f3Smrg      sscreen->make_texture_descriptor = gfx10_make_texture_descriptor;
52757ec681f3Smrg   } else {
52767ec681f3Smrg      sscreen->make_texture_descriptor = si_make_texture_descriptor;
52777ec681f3Smrg   }
52787ec681f3Smrg
52797ec681f3Smrg   util_vertex_state_cache_init(&sscreen->vertex_state_cache,
52807ec681f3Smrg                                si_create_vertex_state, si_vertex_state_destroy);
528101e04c3fSmrg}
5282af69d88dSmrg
52837ec681f3Smrgstatic void si_set_grbm_gfx_index(struct si_context *sctx, struct si_pm4_state *pm4, unsigned value)
528401e04c3fSmrg{
52857ec681f3Smrg   unsigned reg = sctx->chip_class >= GFX7 ? R_030800_GRBM_GFX_INDEX : R_00802C_GRBM_GFX_INDEX;
52867ec681f3Smrg   si_pm4_set_reg(pm4, reg, value);
528701e04c3fSmrg}
5288af69d88dSmrg
52897ec681f3Smrgstatic void si_set_grbm_gfx_index_se(struct si_context *sctx, struct si_pm4_state *pm4, unsigned se)
529001e04c3fSmrg{
52917ec681f3Smrg   assert(se == ~0 || se < sctx->screen->info.max_se);
52927ec681f3Smrg   si_set_grbm_gfx_index(sctx, pm4,
52937ec681f3Smrg                         (se == ~0 ? S_030800_SE_BROADCAST_WRITES(1) : S_030800_SE_INDEX(se)) |
52947ec681f3Smrg                            S_030800_SH_BROADCAST_WRITES(1) |
52957ec681f3Smrg                            S_030800_INSTANCE_BROADCAST_WRITES(1));
529601e04c3fSmrg}
5297af69d88dSmrg
52987ec681f3Smrgstatic void si_write_harvested_raster_configs(struct si_context *sctx, struct si_pm4_state *pm4,
52997ec681f3Smrg                                              unsigned raster_config, unsigned raster_config_1)
530001e04c3fSmrg{
53017ec681f3Smrg   unsigned num_se = MAX2(sctx->screen->info.max_se, 1);
53027ec681f3Smrg   unsigned raster_config_se[4];
53037ec681f3Smrg   unsigned se;
53047ec681f3Smrg
53057ec681f3Smrg   ac_get_harvested_configs(&sctx->screen->info, raster_config, &raster_config_1, raster_config_se);
53067ec681f3Smrg
53077ec681f3Smrg   for (se = 0; se < num_se; se++) {
53087ec681f3Smrg      si_set_grbm_gfx_index_se(sctx, pm4, se);
53097ec681f3Smrg      si_pm4_set_reg(pm4, R_028350_PA_SC_RASTER_CONFIG, raster_config_se[se]);
53107ec681f3Smrg   }
53117ec681f3Smrg   si_set_grbm_gfx_index(sctx, pm4, ~0);
53127ec681f3Smrg
53137ec681f3Smrg   if (sctx->chip_class >= GFX7) {
53147ec681f3Smrg      si_pm4_set_reg(pm4, R_028354_PA_SC_RASTER_CONFIG_1, raster_config_1);
53157ec681f3Smrg   }
531601e04c3fSmrg}
5317af69d88dSmrg
531801e04c3fSmrgstatic void si_set_raster_config(struct si_context *sctx, struct si_pm4_state *pm4)
531901e04c3fSmrg{
53207ec681f3Smrg   struct si_screen *sscreen = sctx->screen;
53217ec681f3Smrg   unsigned num_rb = MIN2(sscreen->info.max_render_backends, 16);
53227ec681f3Smrg   unsigned rb_mask = sscreen->info.enabled_rb_mask;
53237ec681f3Smrg   unsigned raster_config = sscreen->pa_sc_raster_config;
53247ec681f3Smrg   unsigned raster_config_1 = sscreen->pa_sc_raster_config_1;
53257ec681f3Smrg
53267ec681f3Smrg   if (!rb_mask || util_bitcount(rb_mask) >= num_rb) {
53277ec681f3Smrg      /* Always use the default config when all backends are enabled
53287ec681f3Smrg       * (or when we failed to determine the enabled backends).
53297ec681f3Smrg       */
53307ec681f3Smrg      si_pm4_set_reg(pm4, R_028350_PA_SC_RASTER_CONFIG, raster_config);
53317ec681f3Smrg      if (sctx->chip_class >= GFX7)
53327ec681f3Smrg         si_pm4_set_reg(pm4, R_028354_PA_SC_RASTER_CONFIG_1, raster_config_1);
53337ec681f3Smrg   } else {
53347ec681f3Smrg      si_write_harvested_raster_configs(sctx, pm4, raster_config, raster_config_1);
53357ec681f3Smrg   }
5336af69d88dSmrg}
5337af69d88dSmrg
53387ec681f3Smrgvoid si_init_cs_preamble_state(struct si_context *sctx, bool uses_reg_shadowing)
5339af69d88dSmrg{
53407ec681f3Smrg   struct si_screen *sscreen = sctx->screen;
53417ec681f3Smrg   uint64_t border_color_va = sctx->border_color_buffer->gpu_address;
53427ec681f3Smrg   bool has_clear_state = sscreen->info.has_clear_state;
53437ec681f3Smrg   struct si_pm4_state *pm4 = CALLOC_STRUCT(si_pm4_state);
53447ec681f3Smrg
53457ec681f3Smrg   if (!pm4)
53467ec681f3Smrg      return;
53477ec681f3Smrg
53487ec681f3Smrg   if (!uses_reg_shadowing) {
53497ec681f3Smrg      si_pm4_cmd_add(pm4, PKT3(PKT3_CONTEXT_CONTROL, 1, 0));
53507ec681f3Smrg      si_pm4_cmd_add(pm4, CC0_UPDATE_LOAD_ENABLES(1));
53517ec681f3Smrg      si_pm4_cmd_add(pm4, CC1_UPDATE_SHADOW_ENABLES(1));
53527ec681f3Smrg
53537ec681f3Smrg      if (has_clear_state) {
53547ec681f3Smrg         si_pm4_cmd_add(pm4, PKT3(PKT3_CLEAR_STATE, 0, 0));
53557ec681f3Smrg         si_pm4_cmd_add(pm4, 0);
53567ec681f3Smrg      }
53577ec681f3Smrg   }
53587ec681f3Smrg
53597ec681f3Smrg   /* CLEAR_STATE doesn't restore these correctly. */
53607ec681f3Smrg   si_pm4_set_reg(pm4, R_028240_PA_SC_GENERIC_SCISSOR_TL, S_028240_WINDOW_OFFSET_DISABLE(1));
53617ec681f3Smrg   si_pm4_set_reg(pm4, R_028244_PA_SC_GENERIC_SCISSOR_BR,
53627ec681f3Smrg                  S_028244_BR_X(16384) | S_028244_BR_Y(16384));
53637ec681f3Smrg
53647ec681f3Smrg   si_pm4_set_reg(pm4, R_028A18_VGT_HOS_MAX_TESS_LEVEL, fui(64));
53657ec681f3Smrg   if (!has_clear_state)
53667ec681f3Smrg      si_pm4_set_reg(pm4, R_028A1C_VGT_HOS_MIN_TESS_LEVEL, fui(0));
53677ec681f3Smrg
53687ec681f3Smrg   if (!has_clear_state) {
53697ec681f3Smrg      si_pm4_set_reg(pm4, R_028230_PA_SC_EDGERULE,
53707ec681f3Smrg                     S_028230_ER_TRI(0xA) | S_028230_ER_POINT(0xA) | S_028230_ER_RECT(0xA) |
53717ec681f3Smrg                        /* Required by DX10_DIAMOND_TEST_ENA: */
53727ec681f3Smrg                        S_028230_ER_LINE_LR(0x1A) | S_028230_ER_LINE_RL(0x26) |
53737ec681f3Smrg                        S_028230_ER_LINE_TB(0xA) | S_028230_ER_LINE_BT(0xA));
53747ec681f3Smrg      si_pm4_set_reg(pm4, R_028820_PA_CL_NANINF_CNTL, 0);
53757ec681f3Smrg      si_pm4_set_reg(pm4, R_028AC0_DB_SRESULTS_COMPARE_STATE0, 0x0);
53767ec681f3Smrg      si_pm4_set_reg(pm4, R_028AC4_DB_SRESULTS_COMPARE_STATE1, 0x0);
53777ec681f3Smrg      si_pm4_set_reg(pm4, R_028AC8_DB_PRELOAD_CONTROL, 0x0);
53787ec681f3Smrg      si_pm4_set_reg(pm4, R_02800C_DB_RENDER_OVERRIDE, 0);
53797ec681f3Smrg      si_pm4_set_reg(pm4, R_028A5C_VGT_GS_PER_VS, 0x2);
53807ec681f3Smrg      si_pm4_set_reg(pm4, R_028A8C_VGT_PRIMITIVEID_RESET, 0x0);
53817ec681f3Smrg      si_pm4_set_reg(pm4, R_028B98_VGT_STRMOUT_BUFFER_CONFIG, 0x0);
53827ec681f3Smrg      si_pm4_set_reg(pm4, R_028AB8_VGT_VTX_CNT_EN, 0x0);
53837ec681f3Smrg   }
53847ec681f3Smrg
53857ec681f3Smrg   si_pm4_set_reg(pm4, R_028080_TA_BC_BASE_ADDR, border_color_va >> 8);
53867ec681f3Smrg   if (sctx->chip_class >= GFX7)
53877ec681f3Smrg      si_pm4_set_reg(pm4, R_028084_TA_BC_BASE_ADDR_HI, S_028084_ADDRESS(border_color_va >> 40));
53887ec681f3Smrg
53897ec681f3Smrg   if (sctx->chip_class == GFX6) {
53907ec681f3Smrg      si_pm4_set_reg(pm4, R_008A14_PA_CL_ENHANCE,
53917ec681f3Smrg                     S_008A14_NUM_CLIP_SEQ(3) | S_008A14_CLIP_VTX_REORDER_ENA(1));
53927ec681f3Smrg   }
53937ec681f3Smrg
53947ec681f3Smrg   if (sctx->chip_class <= GFX7 || !has_clear_state) {
53957ec681f3Smrg      si_pm4_set_reg(pm4, R_028C58_VGT_VERTEX_REUSE_BLOCK_CNTL, 14);
53967ec681f3Smrg      si_pm4_set_reg(pm4, R_028C5C_VGT_OUT_DEALLOC_CNTL, 16);
53977ec681f3Smrg
53987ec681f3Smrg      /* CLEAR_STATE doesn't clear these correctly on certain generations.
53997ec681f3Smrg       * I don't know why. Deduced by trial and error.
54007ec681f3Smrg       */
54017ec681f3Smrg      si_pm4_set_reg(pm4, R_028B28_VGT_STRMOUT_DRAW_OPAQUE_OFFSET, 0);
54027ec681f3Smrg      si_pm4_set_reg(pm4, R_028204_PA_SC_WINDOW_SCISSOR_TL, S_028204_WINDOW_OFFSET_DISABLE(1));
54037ec681f3Smrg      si_pm4_set_reg(pm4, R_028030_PA_SC_SCREEN_SCISSOR_TL, 0);
54047ec681f3Smrg      si_pm4_set_reg(pm4, R_028034_PA_SC_SCREEN_SCISSOR_BR,
54057ec681f3Smrg                     S_028034_BR_X(16384) | S_028034_BR_Y(16384));
54067ec681f3Smrg   }
54077ec681f3Smrg
54087ec681f3Smrg   if (sctx->chip_class >= GFX10) {
54097ec681f3Smrg      si_pm4_set_reg(pm4, R_028038_DB_DFSM_CONTROL,
54107ec681f3Smrg                     S_028038_PUNCHOUT_MODE(V_028038_FORCE_OFF) |
54117ec681f3Smrg                     S_028038_POPS_DRAIN_PS_ON_OVERLAP(1));
54127ec681f3Smrg   }
54137ec681f3Smrg
54147ec681f3Smrg   unsigned cu_mask_ps = 0xffffffff;
54157ec681f3Smrg
54167ec681f3Smrg   /* It's wasteful to enable all CUs for PS if shader arrays have a different
54177ec681f3Smrg    * number of CUs. The reason is that the hardware sends the same number of PS
54187ec681f3Smrg    * waves to each shader array, so the slowest shader array limits the performance.
54197ec681f3Smrg    * Disable the extra CUs for PS in other shader arrays to save power and thus
54207ec681f3Smrg    * increase clocks for busy CUs. In the future, we might disable or enable this
54217ec681f3Smrg    * tweak only for certain apps.
54227ec681f3Smrg    */
54237ec681f3Smrg   if (sctx->chip_class >= GFX10_3)
54247ec681f3Smrg      cu_mask_ps = u_bit_consecutive(0, sscreen->info.min_good_cu_per_sa);
54257ec681f3Smrg
54267ec681f3Smrg   if (sctx->chip_class >= GFX7) {
54277ec681f3Smrg      si_pm4_set_reg(pm4, R_00B01C_SPI_SHADER_PGM_RSRC3_PS,
54287ec681f3Smrg                     S_00B01C_CU_EN(cu_mask_ps) | S_00B01C_WAVE_LIMIT(0x3F));
54297ec681f3Smrg   }
54307ec681f3Smrg
54317ec681f3Smrg   if (sctx->chip_class <= GFX8) {
54327ec681f3Smrg      si_set_raster_config(sctx, pm4);
54337ec681f3Smrg
54347ec681f3Smrg      /* FIXME calculate these values somehow ??? */
54357ec681f3Smrg      si_pm4_set_reg(pm4, R_028A54_VGT_GS_PER_ES, SI_GS_PER_ES);
54367ec681f3Smrg      si_pm4_set_reg(pm4, R_028A58_VGT_ES_PER_GS, 0x40);
54377ec681f3Smrg
54387ec681f3Smrg      /* These registers, when written, also overwrite the CLEAR_STATE
54397ec681f3Smrg       * context, so we can't rely on CLEAR_STATE setting them.
54407ec681f3Smrg       * It would be an issue if there was another UMD changing them.
54417ec681f3Smrg       */
54427ec681f3Smrg      si_pm4_set_reg(pm4, R_028400_VGT_MAX_VTX_INDX, ~0);
54437ec681f3Smrg      si_pm4_set_reg(pm4, R_028404_VGT_MIN_VTX_INDX, 0);
54447ec681f3Smrg      si_pm4_set_reg(pm4, R_028408_VGT_INDX_OFFSET, 0);
54457ec681f3Smrg   }
54467ec681f3Smrg
54477ec681f3Smrg   if (sscreen->info.chip_class >= GFX10) {
54487ec681f3Smrg      si_pm4_set_reg(pm4, R_00B524_SPI_SHADER_PGM_HI_LS,
54497ec681f3Smrg                     S_00B524_MEM_BASE(sscreen->info.address32_hi >> 8));
54507ec681f3Smrg      si_pm4_set_reg(pm4, R_00B324_SPI_SHADER_PGM_HI_ES,
54517ec681f3Smrg                     S_00B324_MEM_BASE(sscreen->info.address32_hi >> 8));
54527ec681f3Smrg   } else if (sscreen->info.chip_class == GFX9) {
54537ec681f3Smrg      si_pm4_set_reg(pm4, R_00B414_SPI_SHADER_PGM_HI_LS,
54547ec681f3Smrg                     S_00B414_MEM_BASE(sscreen->info.address32_hi >> 8));
54557ec681f3Smrg      si_pm4_set_reg(pm4, R_00B214_SPI_SHADER_PGM_HI_ES,
54567ec681f3Smrg                     S_00B214_MEM_BASE(sscreen->info.address32_hi >> 8));
54577ec681f3Smrg   } else {
54587ec681f3Smrg      si_pm4_set_reg(pm4, R_00B524_SPI_SHADER_PGM_HI_LS,
54597ec681f3Smrg                     S_00B524_MEM_BASE(sscreen->info.address32_hi >> 8));
54607ec681f3Smrg   }
54617ec681f3Smrg
54627ec681f3Smrg   if (sctx->chip_class >= GFX7 && sctx->chip_class <= GFX8) {
54637ec681f3Smrg      si_pm4_set_reg(pm4, R_00B51C_SPI_SHADER_PGM_RSRC3_LS,
54647ec681f3Smrg                     S_00B51C_CU_EN(0xffff) | S_00B51C_WAVE_LIMIT(0x3F));
54657ec681f3Smrg      si_pm4_set_reg(pm4, R_00B41C_SPI_SHADER_PGM_RSRC3_HS, S_00B41C_WAVE_LIMIT(0x3F));
54667ec681f3Smrg      si_pm4_set_reg(pm4, R_00B31C_SPI_SHADER_PGM_RSRC3_ES,
54677ec681f3Smrg                     S_00B31C_CU_EN(0xffff) | S_00B31C_WAVE_LIMIT(0x3F));
54687ec681f3Smrg
54697ec681f3Smrg      /* If this is 0, Bonaire can hang even if GS isn't being used.
54707ec681f3Smrg       * Other chips are unaffected. These are suboptimal values,
54717ec681f3Smrg       * but we don't use on-chip GS.
54727ec681f3Smrg       */
54737ec681f3Smrg      si_pm4_set_reg(pm4, R_028A44_VGT_GS_ONCHIP_CNTL,
54747ec681f3Smrg                     S_028A44_ES_VERTS_PER_SUBGRP(64) | S_028A44_GS_PRIMS_PER_SUBGRP(4));
54757ec681f3Smrg   }
54767ec681f3Smrg
54777ec681f3Smrg   if (sctx->chip_class == GFX8) {
54787ec681f3Smrg      unsigned vgt_tess_distribution;
54797ec681f3Smrg
54807ec681f3Smrg      vgt_tess_distribution = S_028B50_ACCUM_ISOLINE(32) | S_028B50_ACCUM_TRI(11) |
54817ec681f3Smrg                              S_028B50_ACCUM_QUAD(11) | S_028B50_DONUT_SPLIT_GFX81(16);
54827ec681f3Smrg
54837ec681f3Smrg      /* Testing with Unigine Heaven extreme tesselation yielded best results
54847ec681f3Smrg       * with TRAP_SPLIT = 3.
54857ec681f3Smrg       */
54867ec681f3Smrg      if (sctx->family == CHIP_FIJI || sctx->family >= CHIP_POLARIS10)
54877ec681f3Smrg         vgt_tess_distribution |= S_028B50_TRAP_SPLIT(3);
54887ec681f3Smrg
54897ec681f3Smrg      si_pm4_set_reg(pm4, R_028B50_VGT_TESS_DISTRIBUTION, vgt_tess_distribution);
54907ec681f3Smrg   }
54917ec681f3Smrg
54927ec681f3Smrg   if (sscreen->info.chip_class <= GFX9) {
54937ec681f3Smrg      si_pm4_set_reg(pm4, R_028AA0_VGT_INSTANCE_STEP_RATE_0, 1);
54947ec681f3Smrg   }
54957ec681f3Smrg
54967ec681f3Smrg   if (sctx->chip_class == GFX9) {
54977ec681f3Smrg      si_pm4_set_reg(pm4, R_030920_VGT_MAX_VTX_INDX, ~0);
54987ec681f3Smrg      si_pm4_set_reg(pm4, R_030924_VGT_MIN_VTX_INDX, 0);
54997ec681f3Smrg      si_pm4_set_reg(pm4, R_030928_VGT_INDX_OFFSET, 0);
55007ec681f3Smrg
55017ec681f3Smrg      si_pm4_set_reg(pm4, R_028060_DB_DFSM_CONTROL,
55027ec681f3Smrg                     S_028060_PUNCHOUT_MODE(V_028060_FORCE_OFF) |
55037ec681f3Smrg                     S_028060_POPS_DRAIN_PS_ON_OVERLAP(1));
55047ec681f3Smrg   }
55057ec681f3Smrg
55067ec681f3Smrg   if (sctx->chip_class >= GFX9) {
55077ec681f3Smrg      si_pm4_set_reg(pm4, R_00B41C_SPI_SHADER_PGM_RSRC3_HS,
55087ec681f3Smrg                     S_00B41C_CU_EN(0xffff) | S_00B41C_WAVE_LIMIT(0x3F));
55097ec681f3Smrg
55107ec681f3Smrg      si_pm4_set_reg(pm4, R_028B50_VGT_TESS_DISTRIBUTION,
55117ec681f3Smrg                     S_028B50_ACCUM_ISOLINE(40) | S_028B50_ACCUM_TRI(30) | S_028B50_ACCUM_QUAD(24) |
55127ec681f3Smrg                     S_028B50_DONUT_SPLIT_GFX9(24) | S_028B50_TRAP_SPLIT(6));
55137ec681f3Smrg      si_pm4_set_reg(pm4, R_028C48_PA_SC_BINNER_CNTL_1,
55147ec681f3Smrg                     S_028C48_MAX_ALLOC_COUNT(sscreen->info.pbb_max_alloc_count - 1) |
55157ec681f3Smrg                     S_028C48_MAX_PRIM_PER_BATCH(1023));
55167ec681f3Smrg      si_pm4_set_reg(pm4, R_028C4C_PA_SC_CONSERVATIVE_RASTERIZATION_CNTL,
55177ec681f3Smrg                     S_028C4C_NULL_SQUAD_AA_MASK_ENABLE(1));
55187ec681f3Smrg
55197ec681f3Smrg      si_pm4_set_reg(pm4, R_030968_VGT_INSTANCE_BASE_ID, 0);
55207ec681f3Smrg      si_pm4_set_reg(pm4, R_0301EC_CP_COHER_START_DELAY,
55217ec681f3Smrg                     sctx->chip_class >= GFX10 ? 0x20 : 0);
55227ec681f3Smrg   }
55237ec681f3Smrg
55247ec681f3Smrg   if (sctx->chip_class >= GFX10) {
55257ec681f3Smrg      /* Logical CUs 16 - 31 */
55267ec681f3Smrg      si_pm4_set_reg(pm4, R_00B004_SPI_SHADER_PGM_RSRC4_PS, S_00B004_CU_EN(cu_mask_ps >> 16));
55277ec681f3Smrg      si_pm4_set_reg(pm4, R_00B104_SPI_SHADER_PGM_RSRC4_VS, S_00B104_CU_EN(0xffff));
55287ec681f3Smrg      si_pm4_set_reg(pm4, R_00B404_SPI_SHADER_PGM_RSRC4_HS, S_00B404_CU_EN(0xffff));
55297ec681f3Smrg
55307ec681f3Smrg      si_pm4_set_reg(pm4, R_00B0C8_SPI_SHADER_USER_ACCUM_PS_0, 0);
55317ec681f3Smrg      si_pm4_set_reg(pm4, R_00B0CC_SPI_SHADER_USER_ACCUM_PS_1, 0);
55327ec681f3Smrg      si_pm4_set_reg(pm4, R_00B0D0_SPI_SHADER_USER_ACCUM_PS_2, 0);
55337ec681f3Smrg      si_pm4_set_reg(pm4, R_00B0D4_SPI_SHADER_USER_ACCUM_PS_3, 0);
55347ec681f3Smrg      si_pm4_set_reg(pm4, R_00B1C8_SPI_SHADER_USER_ACCUM_VS_0, 0);
55357ec681f3Smrg      si_pm4_set_reg(pm4, R_00B1CC_SPI_SHADER_USER_ACCUM_VS_1, 0);
55367ec681f3Smrg      si_pm4_set_reg(pm4, R_00B1D0_SPI_SHADER_USER_ACCUM_VS_2, 0);
55377ec681f3Smrg      si_pm4_set_reg(pm4, R_00B1D4_SPI_SHADER_USER_ACCUM_VS_3, 0);
55387ec681f3Smrg      si_pm4_set_reg(pm4, R_00B2C8_SPI_SHADER_USER_ACCUM_ESGS_0, 0);
55397ec681f3Smrg      si_pm4_set_reg(pm4, R_00B2CC_SPI_SHADER_USER_ACCUM_ESGS_1, 0);
55407ec681f3Smrg      si_pm4_set_reg(pm4, R_00B2D0_SPI_SHADER_USER_ACCUM_ESGS_2, 0);
55417ec681f3Smrg      si_pm4_set_reg(pm4, R_00B2D4_SPI_SHADER_USER_ACCUM_ESGS_3, 0);
55427ec681f3Smrg      si_pm4_set_reg(pm4, R_00B4C8_SPI_SHADER_USER_ACCUM_LSHS_0, 0);
55437ec681f3Smrg      si_pm4_set_reg(pm4, R_00B4CC_SPI_SHADER_USER_ACCUM_LSHS_1, 0);
55447ec681f3Smrg      si_pm4_set_reg(pm4, R_00B4D0_SPI_SHADER_USER_ACCUM_LSHS_2, 0);
55457ec681f3Smrg      si_pm4_set_reg(pm4, R_00B4D4_SPI_SHADER_USER_ACCUM_LSHS_3, 0);
55467ec681f3Smrg
55477ec681f3Smrg      si_pm4_set_reg(pm4, R_00B0C0_SPI_SHADER_REQ_CTRL_PS,
55487ec681f3Smrg                     S_00B0C0_SOFT_GROUPING_EN(1) |
55497ec681f3Smrg                     S_00B0C0_NUMBER_OF_REQUESTS_PER_CU(4 - 1));
55507ec681f3Smrg      si_pm4_set_reg(pm4, R_00B1C0_SPI_SHADER_REQ_CTRL_VS, 0);
55517ec681f3Smrg
55527ec681f3Smrg      /* Enable CMASK/HTILE/DCC caching in L2 for small chips. */
55537ec681f3Smrg      unsigned meta_write_policy, meta_read_policy;
55547ec681f3Smrg      if (sscreen->info.max_render_backends <= 4) {
55557ec681f3Smrg         meta_write_policy = V_02807C_CACHE_LRU_WR; /* cache writes */
55567ec681f3Smrg         meta_read_policy = V_02807C_CACHE_LRU_RD;  /* cache reads */
55577ec681f3Smrg      } else {
55587ec681f3Smrg         meta_write_policy = V_02807C_CACHE_STREAM; /* write combine */
55597ec681f3Smrg         meta_read_policy = V_02807C_CACHE_NOA;     /* don't cache reads */
55607ec681f3Smrg      }
55617ec681f3Smrg
55627ec681f3Smrg      si_pm4_set_reg(pm4, R_02807C_DB_RMI_L2_CACHE_CONTROL,
55637ec681f3Smrg                     S_02807C_Z_WR_POLICY(V_02807C_CACHE_STREAM) |
55647ec681f3Smrg                     S_02807C_S_WR_POLICY(V_02807C_CACHE_STREAM) |
55657ec681f3Smrg                     S_02807C_HTILE_WR_POLICY(meta_write_policy) |
55667ec681f3Smrg                     S_02807C_ZPCPSD_WR_POLICY(V_02807C_CACHE_STREAM) |
55677ec681f3Smrg                     S_02807C_Z_RD_POLICY(V_02807C_CACHE_NOA) |
55687ec681f3Smrg                     S_02807C_S_RD_POLICY(V_02807C_CACHE_NOA) |
55697ec681f3Smrg                     S_02807C_HTILE_RD_POLICY(meta_read_policy));
55707ec681f3Smrg      si_pm4_set_reg(pm4, R_028410_CB_RMI_GL2_CACHE_CONTROL,
55717ec681f3Smrg                     S_028410_CMASK_WR_POLICY(meta_write_policy) |
55727ec681f3Smrg                     S_028410_FMASK_WR_POLICY(V_028410_CACHE_STREAM) |
55737ec681f3Smrg                     S_028410_DCC_WR_POLICY(meta_write_policy) |
55747ec681f3Smrg                     S_028410_COLOR_WR_POLICY(V_028410_CACHE_STREAM) |
55757ec681f3Smrg                     S_028410_CMASK_RD_POLICY(meta_read_policy) |
55767ec681f3Smrg                     S_028410_FMASK_RD_POLICY(V_028410_CACHE_NOA) |
55777ec681f3Smrg                     S_028410_DCC_RD_POLICY(meta_read_policy) |
55787ec681f3Smrg                     S_028410_COLOR_RD_POLICY(V_028410_CACHE_NOA));
55797ec681f3Smrg
55807ec681f3Smrg      si_pm4_set_reg(pm4, R_028428_CB_COVERAGE_OUT_CONTROL, 0);
55817ec681f3Smrg      si_pm4_set_reg(pm4, R_028A98_VGT_DRAW_PAYLOAD_CNTL, 0);
55827ec681f3Smrg
55837ec681f3Smrg      /* Break up a pixel wave if it contains deallocs for more than
55847ec681f3Smrg       * half the parameter cache.
55857ec681f3Smrg       *
55867ec681f3Smrg       * To avoid a deadlock where pixel waves aren't launched
55877ec681f3Smrg       * because they're waiting for more pixels while the frontend
55887ec681f3Smrg       * is stuck waiting for PC space, the maximum allowed value is
55897ec681f3Smrg       * the size of the PC minus the largest possible allocation for
55907ec681f3Smrg       * a single primitive shader subgroup.
55917ec681f3Smrg       */
55927ec681f3Smrg      si_pm4_set_reg(pm4, R_028C50_PA_SC_NGG_MODE_CNTL, S_028C50_MAX_DEALLOCS_IN_WAVE(512));
55937ec681f3Smrg      /* Reuse for legacy (non-NGG) only. */
55947ec681f3Smrg      si_pm4_set_reg(pm4, R_028C58_VGT_VERTEX_REUSE_BLOCK_CNTL, 14);
55957ec681f3Smrg
55967ec681f3Smrg      if (!has_clear_state) {
55977ec681f3Smrg         si_pm4_set_reg(pm4, R_02835C_PA_SC_TILE_STEERING_OVERRIDE,
55987ec681f3Smrg                        sscreen->info.pa_sc_tile_steering_override);
55997ec681f3Smrg      }
56007ec681f3Smrg
56017ec681f3Smrg
56027ec681f3Smrg      si_pm4_set_reg(pm4, R_030964_GE_MAX_VTX_INDX, ~0);
56037ec681f3Smrg      si_pm4_set_reg(pm4, R_030924_GE_MIN_VTX_INDX, 0);
56047ec681f3Smrg      si_pm4_set_reg(pm4, R_030928_GE_INDX_OFFSET, 0);
56057ec681f3Smrg      si_pm4_set_reg(pm4, R_03097C_GE_STEREO_CNTL, 0);
56067ec681f3Smrg      si_pm4_set_reg(pm4, R_030988_GE_USER_VGPR_EN, 0);
56077ec681f3Smrg   }
56087ec681f3Smrg
56097ec681f3Smrg   if (sctx->chip_class >= GFX10_3) {
56107ec681f3Smrg      si_pm4_set_reg(pm4, R_028750_SX_PS_DOWNCONVERT_CONTROL, 0xff);
56117ec681f3Smrg      /* The rate combiners have no effect if they are disabled like this:
56127ec681f3Smrg       *   VERTEX_RATE:    BYPASS_VTX_RATE_COMBINER = 1
56137ec681f3Smrg       *   PRIMITIVE_RATE: BYPASS_PRIM_RATE_COMBINER = 1
56147ec681f3Smrg       *   HTILE_RATE:     VRS_HTILE_ENCODING = 0
56157ec681f3Smrg       *   SAMPLE_ITER:    PS_ITER_SAMPLE = 0
56167ec681f3Smrg       *
56177ec681f3Smrg       * Use OVERRIDE, which will ignore results from previous combiners.
56187ec681f3Smrg       * (e.g. enabled sample shading overrides the vertex rate)
56197ec681f3Smrg       */
56207ec681f3Smrg      si_pm4_set_reg(pm4, R_028848_PA_CL_VRS_CNTL,
56217ec681f3Smrg                     S_028848_VERTEX_RATE_COMBINER_MODE(V_028848_VRS_COMB_MODE_OVERRIDE) |
56227ec681f3Smrg                     S_028848_SAMPLE_ITER_COMBINER_MODE(V_028848_VRS_COMB_MODE_OVERRIDE));
56237ec681f3Smrg   }
56247ec681f3Smrg
56257ec681f3Smrg   sctx->cs_preamble_state = pm4;
5626af69d88dSmrg}
5627