1848b8605Smrg/*
2848b8605Smrg * Copyright 2012 Advanced Micro Devices, Inc.
3b8e80941Smrg * All Rights Reserved.
4848b8605Smrg *
5848b8605Smrg * Permission is hereby granted, free of charge, to any person obtaining a
6848b8605Smrg * copy of this software and associated documentation files (the "Software"),
7848b8605Smrg * to deal in the Software without restriction, including without limitation
8848b8605Smrg * on the rights to use, copy, modify, merge, publish, distribute, sub
9848b8605Smrg * license, and/or sell copies of the Software, and to permit persons to whom
10848b8605Smrg * the Software is furnished to do so, subject to the following conditions:
11848b8605Smrg *
12848b8605Smrg * The above copyright notice and this permission notice (including the next
13848b8605Smrg * paragraph) shall be included in all copies or substantial portions of the
14848b8605Smrg * Software.
15848b8605Smrg *
16848b8605Smrg * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
17848b8605Smrg * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
18848b8605Smrg * FITNESS FOR A PARTICULAR PURPOSE AND NON-INFRINGEMENT. IN NO EVENT SHALL
19848b8605Smrg * THE AUTHOR(S) AND/OR THEIR SUPPLIERS BE LIABLE FOR ANY CLAIM,
20848b8605Smrg * DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR
21848b8605Smrg * OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE
22848b8605Smrg * USE OR OTHER DEALINGS IN THE SOFTWARE.
23848b8605Smrg */
24848b8605Smrg
25b8e80941Smrg#include "si_build_pm4.h"
26b8e80941Smrg#include "gfx9d.h"
27b8e80941Smrg#include "si_query.h"
28848b8605Smrg
29b8e80941Smrg#include "util/u_dual_blend.h"
30848b8605Smrg#include "util/u_format.h"
31848b8605Smrg#include "util/u_format_s3tc.h"
32848b8605Smrg#include "util/u_memory.h"
33b8e80941Smrg#include "util/u_resource.h"
34b8e80941Smrg#include "util/u_upload_mgr.h"
35b8e80941Smrg#include "util/fast_idiv_by_const.h"
36848b8605Smrg
37848b8605Smrgstatic unsigned si_map_swizzle(unsigned swizzle)
38848b8605Smrg{
39848b8605Smrg	switch (swizzle) {
40b8e80941Smrg	case PIPE_SWIZZLE_Y:
41848b8605Smrg		return V_008F0C_SQ_SEL_Y;
42b8e80941Smrg	case PIPE_SWIZZLE_Z:
43848b8605Smrg		return V_008F0C_SQ_SEL_Z;
44b8e80941Smrg	case PIPE_SWIZZLE_W:
45848b8605Smrg		return V_008F0C_SQ_SEL_W;
46b8e80941Smrg	case PIPE_SWIZZLE_0:
47848b8605Smrg		return V_008F0C_SQ_SEL_0;
48b8e80941Smrg	case PIPE_SWIZZLE_1:
49848b8605Smrg		return V_008F0C_SQ_SEL_1;
50b8e80941Smrg	default: /* PIPE_SWIZZLE_X */
51848b8605Smrg		return V_008F0C_SQ_SEL_X;
52848b8605Smrg	}
53848b8605Smrg}
54848b8605Smrg
55848b8605Smrg/* 12.4 fixed-point */
56848b8605Smrgstatic unsigned si_pack_float_12p4(float x)
57848b8605Smrg{
58848b8605Smrg	return x <= 0    ? 0 :
59848b8605Smrg	       x >= 4096 ? 0xffff : x * 16;
60848b8605Smrg}
61848b8605Smrg
62848b8605Smrg/*
63b8e80941Smrg * Inferred framebuffer and blender state.
64b8e80941Smrg *
65b8e80941Smrg * CB_TARGET_MASK is emitted here to avoid a hang with dual source blending
66b8e80941Smrg * if there is not enough PS outputs.
67848b8605Smrg */
68b8e80941Smrgstatic void si_emit_cb_render_state(struct si_context *sctx)
69848b8605Smrg{
70b8e80941Smrg	struct radeon_cmdbuf *cs = sctx->gfx_cs;
71848b8605Smrg	struct si_state_blend *blend = sctx->queued.named.blend;
72b8e80941Smrg	/* CB_COLORn_INFO.FORMAT=INVALID should disable unbound colorbuffers,
73b8e80941Smrg	 * but you never know. */
74b8e80941Smrg	uint32_t cb_target_mask = sctx->framebuffer.colorbuf_enabled_4bit;
75b8e80941Smrg	unsigned i;
76b8e80941Smrg
77b8e80941Smrg	if (blend)
78b8e80941Smrg		cb_target_mask &= blend->cb_target_mask;
79b8e80941Smrg
80b8e80941Smrg	/* Avoid a hang that happens when dual source blending is enabled
81b8e80941Smrg	 * but there is not enough color outputs. This is undefined behavior,
82b8e80941Smrg	 * so disable color writes completely.
83b8e80941Smrg	 *
84b8e80941Smrg	 * Reproducible with Unigine Heaven 4.0 and drirc missing.
85b8e80941Smrg	 */
86b8e80941Smrg	if (blend && blend->dual_src_blend &&
87b8e80941Smrg	    sctx->ps_shader.cso &&
88b8e80941Smrg	    (sctx->ps_shader.cso->info.colors_written & 0x3) != 0x3)
89b8e80941Smrg		cb_target_mask = 0;
90b8e80941Smrg
91b8e80941Smrg	/* GFX9: Flush DFSM when CB_TARGET_MASK changes.
92b8e80941Smrg	 * I think we don't have to do anything between IBs.
93b8e80941Smrg	 */
94b8e80941Smrg	if (sctx->screen->dfsm_allowed &&
95b8e80941Smrg	    sctx->last_cb_target_mask != cb_target_mask) {
96b8e80941Smrg		sctx->last_cb_target_mask = cb_target_mask;
97b8e80941Smrg
98b8e80941Smrg		radeon_emit(cs, PKT3(PKT3_EVENT_WRITE, 0, 0));
99b8e80941Smrg		radeon_emit(cs, EVENT_TYPE(V_028A90_FLUSH_DFSM) | EVENT_INDEX(0));
100b8e80941Smrg	}
101848b8605Smrg
102b8e80941Smrg	unsigned initial_cdw = cs->current.cdw;
103b8e80941Smrg	radeon_opt_set_context_reg(sctx, R_028238_CB_TARGET_MASK,
104b8e80941Smrg				   SI_TRACKED_CB_TARGET_MASK, cb_target_mask);
105848b8605Smrg
106b8e80941Smrg	if (sctx->chip_class >= VI) {
107b8e80941Smrg		/* DCC MSAA workaround for blending.
108b8e80941Smrg		 * Alternatively, we can set CB_COLORi_DCC_CONTROL.OVERWRITE_-
109b8e80941Smrg		 * COMBINER_DISABLE, but that would be more complicated.
110b8e80941Smrg		 */
111b8e80941Smrg		bool oc_disable = (sctx->chip_class == VI ||
112b8e80941Smrg				   sctx->chip_class == GFX9) &&
113b8e80941Smrg				  blend &&
114b8e80941Smrg				  blend->blend_enable_4bit & cb_target_mask &&
115b8e80941Smrg				  sctx->framebuffer.nr_samples >= 2;
116b8e80941Smrg		unsigned watermark = sctx->framebuffer.dcc_overwrite_combiner_watermark;
117b8e80941Smrg
118b8e80941Smrg		radeon_opt_set_context_reg(
119b8e80941Smrg				sctx, R_028424_CB_DCC_CONTROL,
120b8e80941Smrg				SI_TRACKED_CB_DCC_CONTROL,
121b8e80941Smrg				S_028424_OVERWRITE_COMBINER_MRT_SHARING_DISABLE(1) |
122b8e80941Smrg				S_028424_OVERWRITE_COMBINER_WATERMARK(watermark) |
123b8e80941Smrg				S_028424_OVERWRITE_COMBINER_DISABLE(oc_disable) |
124b8e80941Smrg				S_028424_DISABLE_CONSTANT_ENCODE_REG(sctx->screen->has_dcc_constant_encode));
125b8e80941Smrg	}
126b8e80941Smrg
127b8e80941Smrg	/* RB+ register settings. */
128b8e80941Smrg	if (sctx->screen->rbplus_allowed) {
129b8e80941Smrg		unsigned spi_shader_col_format =
130b8e80941Smrg			sctx->ps_shader.cso ?
131b8e80941Smrg			sctx->ps_shader.current->key.part.ps.epilog.spi_shader_col_format : 0;
132b8e80941Smrg		unsigned sx_ps_downconvert = 0;
133b8e80941Smrg		unsigned sx_blend_opt_epsilon = 0;
134b8e80941Smrg		unsigned sx_blend_opt_control = 0;
135b8e80941Smrg
136b8e80941Smrg		for (i = 0; i < sctx->framebuffer.state.nr_cbufs; i++) {
137b8e80941Smrg			struct si_surface *surf =
138b8e80941Smrg				(struct si_surface*)sctx->framebuffer.state.cbufs[i];
139b8e80941Smrg			unsigned format, swap, spi_format, colormask;
140b8e80941Smrg			bool has_alpha, has_rgb;
141b8e80941Smrg
142b8e80941Smrg			if (!surf)
143b8e80941Smrg				continue;
144b8e80941Smrg
145b8e80941Smrg			format = G_028C70_FORMAT(surf->cb_color_info);
146b8e80941Smrg			swap = G_028C70_COMP_SWAP(surf->cb_color_info);
147b8e80941Smrg			spi_format = (spi_shader_col_format >> (i * 4)) & 0xf;
148b8e80941Smrg			colormask = (cb_target_mask >> (i * 4)) & 0xf;
149b8e80941Smrg
150b8e80941Smrg			/* Set if RGB and A are present. */
151b8e80941Smrg			has_alpha = !G_028C74_FORCE_DST_ALPHA_1(surf->cb_color_attrib);
152b8e80941Smrg
153b8e80941Smrg			if (format == V_028C70_COLOR_8 ||
154b8e80941Smrg			    format == V_028C70_COLOR_16 ||
155b8e80941Smrg			    format == V_028C70_COLOR_32)
156b8e80941Smrg				has_rgb = !has_alpha;
157b8e80941Smrg			else
158b8e80941Smrg				has_rgb = true;
159b8e80941Smrg
160b8e80941Smrg			/* Check the colormask and export format. */
161b8e80941Smrg			if (!(colormask & (PIPE_MASK_RGBA & ~PIPE_MASK_A)))
162b8e80941Smrg				has_rgb = false;
163b8e80941Smrg			if (!(colormask & PIPE_MASK_A))
164b8e80941Smrg				has_alpha = false;
165b8e80941Smrg
166b8e80941Smrg			if (spi_format == V_028714_SPI_SHADER_ZERO) {
167b8e80941Smrg				has_rgb = false;
168b8e80941Smrg				has_alpha = false;
169b8e80941Smrg			}
170b8e80941Smrg
171b8e80941Smrg			/* Disable value checking for disabled channels. */
172b8e80941Smrg			if (!has_rgb)
173b8e80941Smrg				sx_blend_opt_control |= S_02875C_MRT0_COLOR_OPT_DISABLE(1) << (i * 4);
174b8e80941Smrg			if (!has_alpha)
175b8e80941Smrg				sx_blend_opt_control |= S_02875C_MRT0_ALPHA_OPT_DISABLE(1) << (i * 4);
176b8e80941Smrg
177b8e80941Smrg			/* Enable down-conversion for 32bpp and smaller formats. */
178b8e80941Smrg			switch (format) {
179b8e80941Smrg			case V_028C70_COLOR_8:
180b8e80941Smrg			case V_028C70_COLOR_8_8:
181b8e80941Smrg			case V_028C70_COLOR_8_8_8_8:
182b8e80941Smrg				/* For 1 and 2-channel formats, use the superset thereof. */
183b8e80941Smrg				if (spi_format == V_028714_SPI_SHADER_FP16_ABGR ||
184b8e80941Smrg				    spi_format == V_028714_SPI_SHADER_UINT16_ABGR ||
185b8e80941Smrg				    spi_format == V_028714_SPI_SHADER_SINT16_ABGR) {
186b8e80941Smrg					sx_ps_downconvert |= V_028754_SX_RT_EXPORT_8_8_8_8 << (i * 4);
187b8e80941Smrg					sx_blend_opt_epsilon |= V_028758_8BIT_FORMAT << (i * 4);
188b8e80941Smrg				}
189b8e80941Smrg				break;
190b8e80941Smrg
191b8e80941Smrg			case V_028C70_COLOR_5_6_5:
192b8e80941Smrg				if (spi_format == V_028714_SPI_SHADER_FP16_ABGR) {
193b8e80941Smrg					sx_ps_downconvert |= V_028754_SX_RT_EXPORT_5_6_5 << (i * 4);
194b8e80941Smrg					sx_blend_opt_epsilon |= V_028758_6BIT_FORMAT << (i * 4);
195b8e80941Smrg				}
196b8e80941Smrg				break;
197b8e80941Smrg
198b8e80941Smrg			case V_028C70_COLOR_1_5_5_5:
199b8e80941Smrg				if (spi_format == V_028714_SPI_SHADER_FP16_ABGR) {
200b8e80941Smrg					sx_ps_downconvert |= V_028754_SX_RT_EXPORT_1_5_5_5 << (i * 4);
201b8e80941Smrg					sx_blend_opt_epsilon |= V_028758_5BIT_FORMAT << (i * 4);
202b8e80941Smrg				}
203b8e80941Smrg				break;
204b8e80941Smrg
205b8e80941Smrg			case V_028C70_COLOR_4_4_4_4:
206b8e80941Smrg				if (spi_format == V_028714_SPI_SHADER_FP16_ABGR) {
207b8e80941Smrg					sx_ps_downconvert |= V_028754_SX_RT_EXPORT_4_4_4_4 << (i * 4);
208b8e80941Smrg					sx_blend_opt_epsilon |= V_028758_4BIT_FORMAT << (i * 4);
209b8e80941Smrg				}
210b8e80941Smrg				break;
211b8e80941Smrg
212b8e80941Smrg			case V_028C70_COLOR_32:
213b8e80941Smrg				if (swap == V_028C70_SWAP_STD &&
214b8e80941Smrg				    spi_format == V_028714_SPI_SHADER_32_R)
215b8e80941Smrg					sx_ps_downconvert |= V_028754_SX_RT_EXPORT_32_R << (i * 4);
216b8e80941Smrg				else if (swap == V_028C70_SWAP_ALT_REV &&
217b8e80941Smrg					 spi_format == V_028714_SPI_SHADER_32_AR)
218b8e80941Smrg					sx_ps_downconvert |= V_028754_SX_RT_EXPORT_32_A << (i * 4);
219b8e80941Smrg				break;
220b8e80941Smrg
221b8e80941Smrg			case V_028C70_COLOR_16:
222b8e80941Smrg			case V_028C70_COLOR_16_16:
223b8e80941Smrg				/* For 1-channel formats, use the superset thereof. */
224b8e80941Smrg				if (spi_format == V_028714_SPI_SHADER_UNORM16_ABGR ||
225b8e80941Smrg				    spi_format == V_028714_SPI_SHADER_SNORM16_ABGR ||
226b8e80941Smrg				    spi_format == V_028714_SPI_SHADER_UINT16_ABGR ||
227b8e80941Smrg				    spi_format == V_028714_SPI_SHADER_SINT16_ABGR) {
228b8e80941Smrg					if (swap == V_028C70_SWAP_STD ||
229b8e80941Smrg					    swap == V_028C70_SWAP_STD_REV)
230b8e80941Smrg						sx_ps_downconvert |= V_028754_SX_RT_EXPORT_16_16_GR << (i * 4);
231b8e80941Smrg					else
232b8e80941Smrg						sx_ps_downconvert |= V_028754_SX_RT_EXPORT_16_16_AR << (i * 4);
233b8e80941Smrg				}
234b8e80941Smrg				break;
235b8e80941Smrg
236b8e80941Smrg			case V_028C70_COLOR_10_11_11:
237b8e80941Smrg				if (spi_format == V_028714_SPI_SHADER_FP16_ABGR) {
238b8e80941Smrg					sx_ps_downconvert |= V_028754_SX_RT_EXPORT_10_11_11 << (i * 4);
239b8e80941Smrg					sx_blend_opt_epsilon |= V_028758_11BIT_FORMAT << (i * 4);
240b8e80941Smrg				}
241b8e80941Smrg				break;
242848b8605Smrg
243b8e80941Smrg			case V_028C70_COLOR_2_10_10_10:
244b8e80941Smrg				if (spi_format == V_028714_SPI_SHADER_FP16_ABGR) {
245b8e80941Smrg					sx_ps_downconvert |= V_028754_SX_RT_EXPORT_2_10_10_10 << (i * 4);
246b8e80941Smrg					sx_blend_opt_epsilon |= V_028758_10BIT_FORMAT << (i * 4);
247b8e80941Smrg				}
248b8e80941Smrg				break;
249b8e80941Smrg			}
250b8e80941Smrg		}
251848b8605Smrg
252b8e80941Smrg		/* SX_PS_DOWNCONVERT, SX_BLEND_OPT_EPSILON, SX_BLEND_OPT_CONTROL */
253b8e80941Smrg		radeon_opt_set_context_reg3(sctx, R_028754_SX_PS_DOWNCONVERT,
254b8e80941Smrg					    SI_TRACKED_SX_PS_DOWNCONVERT,
255b8e80941Smrg					    sx_ps_downconvert, sx_blend_opt_epsilon,
256b8e80941Smrg					    sx_blend_opt_control);
257b8e80941Smrg	}
258b8e80941Smrg	if (initial_cdw != cs->current.cdw)
259b8e80941Smrg		sctx->context_roll = true;
260848b8605Smrg}
261848b8605Smrg
262848b8605Smrg/*
263848b8605Smrg * Blender functions
264848b8605Smrg */
265848b8605Smrg
266848b8605Smrgstatic uint32_t si_translate_blend_function(int blend_func)
267848b8605Smrg{
268848b8605Smrg	switch (blend_func) {
269848b8605Smrg	case PIPE_BLEND_ADD:
270848b8605Smrg		return V_028780_COMB_DST_PLUS_SRC;
271848b8605Smrg	case PIPE_BLEND_SUBTRACT:
272848b8605Smrg		return V_028780_COMB_SRC_MINUS_DST;
273848b8605Smrg	case PIPE_BLEND_REVERSE_SUBTRACT:
274848b8605Smrg		return V_028780_COMB_DST_MINUS_SRC;
275848b8605Smrg	case PIPE_BLEND_MIN:
276848b8605Smrg		return V_028780_COMB_MIN_DST_SRC;
277848b8605Smrg	case PIPE_BLEND_MAX:
278848b8605Smrg		return V_028780_COMB_MAX_DST_SRC;
279848b8605Smrg	default:
280b8e80941Smrg		PRINT_ERR("Unknown blend function %d\n", blend_func);
281848b8605Smrg		assert(0);
282848b8605Smrg		break;
283848b8605Smrg	}
284848b8605Smrg	return 0;
285848b8605Smrg}
286848b8605Smrg
287848b8605Smrgstatic uint32_t si_translate_blend_factor(int blend_fact)
288848b8605Smrg{
289848b8605Smrg	switch (blend_fact) {
290848b8605Smrg	case PIPE_BLENDFACTOR_ONE:
291848b8605Smrg		return V_028780_BLEND_ONE;
292848b8605Smrg	case PIPE_BLENDFACTOR_SRC_COLOR:
293848b8605Smrg		return V_028780_BLEND_SRC_COLOR;
294848b8605Smrg	case PIPE_BLENDFACTOR_SRC_ALPHA:
295848b8605Smrg		return V_028780_BLEND_SRC_ALPHA;
296848b8605Smrg	case PIPE_BLENDFACTOR_DST_ALPHA:
297848b8605Smrg		return V_028780_BLEND_DST_ALPHA;
298848b8605Smrg	case PIPE_BLENDFACTOR_DST_COLOR:
299848b8605Smrg		return V_028780_BLEND_DST_COLOR;
300848b8605Smrg	case PIPE_BLENDFACTOR_SRC_ALPHA_SATURATE:
301848b8605Smrg		return V_028780_BLEND_SRC_ALPHA_SATURATE;
302848b8605Smrg	case PIPE_BLENDFACTOR_CONST_COLOR:
303848b8605Smrg		return V_028780_BLEND_CONSTANT_COLOR;
304848b8605Smrg	case PIPE_BLENDFACTOR_CONST_ALPHA:
305848b8605Smrg		return V_028780_BLEND_CONSTANT_ALPHA;
306848b8605Smrg	case PIPE_BLENDFACTOR_ZERO:
307848b8605Smrg		return V_028780_BLEND_ZERO;
308848b8605Smrg	case PIPE_BLENDFACTOR_INV_SRC_COLOR:
309848b8605Smrg		return V_028780_BLEND_ONE_MINUS_SRC_COLOR;
310848b8605Smrg	case PIPE_BLENDFACTOR_INV_SRC_ALPHA:
311848b8605Smrg		return V_028780_BLEND_ONE_MINUS_SRC_ALPHA;
312848b8605Smrg	case PIPE_BLENDFACTOR_INV_DST_ALPHA:
313848b8605Smrg		return V_028780_BLEND_ONE_MINUS_DST_ALPHA;
314848b8605Smrg	case PIPE_BLENDFACTOR_INV_DST_COLOR:
315848b8605Smrg		return V_028780_BLEND_ONE_MINUS_DST_COLOR;
316848b8605Smrg	case PIPE_BLENDFACTOR_INV_CONST_COLOR:
317848b8605Smrg		return V_028780_BLEND_ONE_MINUS_CONSTANT_COLOR;
318848b8605Smrg	case PIPE_BLENDFACTOR_INV_CONST_ALPHA:
319848b8605Smrg		return V_028780_BLEND_ONE_MINUS_CONSTANT_ALPHA;
320848b8605Smrg	case PIPE_BLENDFACTOR_SRC1_COLOR:
321848b8605Smrg		return V_028780_BLEND_SRC1_COLOR;
322848b8605Smrg	case PIPE_BLENDFACTOR_SRC1_ALPHA:
323848b8605Smrg		return V_028780_BLEND_SRC1_ALPHA;
324848b8605Smrg	case PIPE_BLENDFACTOR_INV_SRC1_COLOR:
325848b8605Smrg		return V_028780_BLEND_INV_SRC1_COLOR;
326848b8605Smrg	case PIPE_BLENDFACTOR_INV_SRC1_ALPHA:
327848b8605Smrg		return V_028780_BLEND_INV_SRC1_ALPHA;
328848b8605Smrg	default:
329b8e80941Smrg		PRINT_ERR("Bad blend factor %d not supported!\n", blend_fact);
330848b8605Smrg		assert(0);
331848b8605Smrg		break;
332848b8605Smrg	}
333848b8605Smrg	return 0;
334848b8605Smrg}
335848b8605Smrg
336b8e80941Smrgstatic uint32_t si_translate_blend_opt_function(int blend_func)
337b8e80941Smrg{
338b8e80941Smrg	switch (blend_func) {
339b8e80941Smrg	case PIPE_BLEND_ADD:
340b8e80941Smrg		return V_028760_OPT_COMB_ADD;
341b8e80941Smrg	case PIPE_BLEND_SUBTRACT:
342b8e80941Smrg		return V_028760_OPT_COMB_SUBTRACT;
343b8e80941Smrg	case PIPE_BLEND_REVERSE_SUBTRACT:
344b8e80941Smrg		return V_028760_OPT_COMB_REVSUBTRACT;
345b8e80941Smrg	case PIPE_BLEND_MIN:
346b8e80941Smrg		return V_028760_OPT_COMB_MIN;
347b8e80941Smrg	case PIPE_BLEND_MAX:
348b8e80941Smrg		return V_028760_OPT_COMB_MAX;
349b8e80941Smrg	default:
350b8e80941Smrg		return V_028760_OPT_COMB_BLEND_DISABLED;
351b8e80941Smrg	}
352b8e80941Smrg}
353b8e80941Smrg
354b8e80941Smrgstatic uint32_t si_translate_blend_opt_factor(int blend_fact, bool is_alpha)
355b8e80941Smrg{
356b8e80941Smrg	switch (blend_fact) {
357b8e80941Smrg	case PIPE_BLENDFACTOR_ZERO:
358b8e80941Smrg		return V_028760_BLEND_OPT_PRESERVE_NONE_IGNORE_ALL;
359b8e80941Smrg	case PIPE_BLENDFACTOR_ONE:
360b8e80941Smrg		return V_028760_BLEND_OPT_PRESERVE_ALL_IGNORE_NONE;
361b8e80941Smrg	case PIPE_BLENDFACTOR_SRC_COLOR:
362b8e80941Smrg		return is_alpha ? V_028760_BLEND_OPT_PRESERVE_A1_IGNORE_A0
363b8e80941Smrg				: V_028760_BLEND_OPT_PRESERVE_C1_IGNORE_C0;
364b8e80941Smrg	case PIPE_BLENDFACTOR_INV_SRC_COLOR:
365b8e80941Smrg		return is_alpha ? V_028760_BLEND_OPT_PRESERVE_A0_IGNORE_A1
366b8e80941Smrg				: V_028760_BLEND_OPT_PRESERVE_C0_IGNORE_C1;
367b8e80941Smrg	case PIPE_BLENDFACTOR_SRC_ALPHA:
368b8e80941Smrg		return V_028760_BLEND_OPT_PRESERVE_A1_IGNORE_A0;
369b8e80941Smrg	case PIPE_BLENDFACTOR_INV_SRC_ALPHA:
370b8e80941Smrg		return V_028760_BLEND_OPT_PRESERVE_A0_IGNORE_A1;
371b8e80941Smrg	case PIPE_BLENDFACTOR_SRC_ALPHA_SATURATE:
372b8e80941Smrg		return is_alpha ? V_028760_BLEND_OPT_PRESERVE_ALL_IGNORE_NONE
373b8e80941Smrg				: V_028760_BLEND_OPT_PRESERVE_NONE_IGNORE_A0;
374b8e80941Smrg	default:
375b8e80941Smrg		return V_028760_BLEND_OPT_PRESERVE_NONE_IGNORE_NONE;
376b8e80941Smrg	}
377b8e80941Smrg}
378b8e80941Smrg
379b8e80941Smrgstatic void si_blend_check_commutativity(struct si_screen *sscreen,
380b8e80941Smrg					 struct si_state_blend *blend,
381b8e80941Smrg					 enum pipe_blend_func func,
382b8e80941Smrg					 enum pipe_blendfactor src,
383b8e80941Smrg					 enum pipe_blendfactor dst,
384b8e80941Smrg					 unsigned chanmask)
385b8e80941Smrg{
386b8e80941Smrg	/* Src factor is allowed when it does not depend on Dst */
387b8e80941Smrg	static const uint32_t src_allowed =
388b8e80941Smrg		(1u << PIPE_BLENDFACTOR_ONE) |
389b8e80941Smrg		(1u << PIPE_BLENDFACTOR_SRC_COLOR) |
390b8e80941Smrg		(1u << PIPE_BLENDFACTOR_SRC_ALPHA) |
391b8e80941Smrg		(1u << PIPE_BLENDFACTOR_SRC_ALPHA_SATURATE) |
392b8e80941Smrg		(1u << PIPE_BLENDFACTOR_CONST_COLOR) |
393b8e80941Smrg		(1u << PIPE_BLENDFACTOR_CONST_ALPHA) |
394b8e80941Smrg		(1u << PIPE_BLENDFACTOR_SRC1_COLOR) |
395b8e80941Smrg		(1u << PIPE_BLENDFACTOR_SRC1_ALPHA) |
396b8e80941Smrg		(1u << PIPE_BLENDFACTOR_ZERO) |
397b8e80941Smrg		(1u << PIPE_BLENDFACTOR_INV_SRC_COLOR) |
398b8e80941Smrg		(1u << PIPE_BLENDFACTOR_INV_SRC_ALPHA) |
399b8e80941Smrg		(1u << PIPE_BLENDFACTOR_INV_CONST_COLOR) |
400b8e80941Smrg		(1u << PIPE_BLENDFACTOR_INV_CONST_ALPHA) |
401b8e80941Smrg		(1u << PIPE_BLENDFACTOR_INV_SRC1_COLOR) |
402b8e80941Smrg		(1u << PIPE_BLENDFACTOR_INV_SRC1_ALPHA);
403b8e80941Smrg
404b8e80941Smrg	if (dst == PIPE_BLENDFACTOR_ONE &&
405b8e80941Smrg	    (src_allowed & (1u << src))) {
406b8e80941Smrg		/* Addition is commutative, but floating point addition isn't
407b8e80941Smrg		 * associative: subtle changes can be introduced via different
408b8e80941Smrg		 * rounding.
409b8e80941Smrg		 *
410b8e80941Smrg		 * Out-of-order is also non-deterministic, which means that
411b8e80941Smrg		 * this breaks OpenGL invariance requirements. So only enable
412b8e80941Smrg		 * out-of-order additive blending if explicitly allowed by a
413b8e80941Smrg		 * setting.
414b8e80941Smrg		 */
415b8e80941Smrg		if (func == PIPE_BLEND_MAX || func == PIPE_BLEND_MIN ||
416b8e80941Smrg		    (func == PIPE_BLEND_ADD && sscreen->commutative_blend_add))
417b8e80941Smrg			blend->commutative_4bit |= chanmask;
418b8e80941Smrg	}
419b8e80941Smrg}
420b8e80941Smrg
421b8e80941Smrg/**
422b8e80941Smrg * Get rid of DST in the blend factors by commuting the operands:
423b8e80941Smrg *    func(src * DST, dst * 0) ---> func(src * 0, dst * SRC)
424b8e80941Smrg */
425b8e80941Smrgstatic void si_blend_remove_dst(unsigned *func, unsigned *src_factor,
426b8e80941Smrg				unsigned *dst_factor, unsigned expected_dst,
427b8e80941Smrg				unsigned replacement_src)
428b8e80941Smrg{
429b8e80941Smrg	if (*src_factor == expected_dst &&
430b8e80941Smrg	    *dst_factor == PIPE_BLENDFACTOR_ZERO) {
431b8e80941Smrg		*src_factor = PIPE_BLENDFACTOR_ZERO;
432b8e80941Smrg		*dst_factor = replacement_src;
433b8e80941Smrg
434b8e80941Smrg		/* Commuting the operands requires reversing subtractions. */
435b8e80941Smrg		if (*func == PIPE_BLEND_SUBTRACT)
436b8e80941Smrg			*func = PIPE_BLEND_REVERSE_SUBTRACT;
437b8e80941Smrg		else if (*func == PIPE_BLEND_REVERSE_SUBTRACT)
438b8e80941Smrg			*func = PIPE_BLEND_SUBTRACT;
439b8e80941Smrg	}
440b8e80941Smrg}
441b8e80941Smrg
442b8e80941Smrgstatic bool si_blend_factor_uses_dst(unsigned factor)
443b8e80941Smrg{
444b8e80941Smrg	return factor == PIPE_BLENDFACTOR_DST_COLOR ||
445b8e80941Smrg		factor == PIPE_BLENDFACTOR_DST_ALPHA ||
446b8e80941Smrg		factor == PIPE_BLENDFACTOR_SRC_ALPHA_SATURATE ||
447b8e80941Smrg		factor == PIPE_BLENDFACTOR_INV_DST_ALPHA ||
448b8e80941Smrg		factor == PIPE_BLENDFACTOR_INV_DST_COLOR;
449b8e80941Smrg}
450b8e80941Smrg
451848b8605Smrgstatic void *si_create_blend_state_mode(struct pipe_context *ctx,
452848b8605Smrg					const struct pipe_blend_state *state,
453848b8605Smrg					unsigned mode)
454848b8605Smrg{
455b8e80941Smrg	struct si_context *sctx = (struct si_context*)ctx;
456848b8605Smrg	struct si_state_blend *blend = CALLOC_STRUCT(si_state_blend);
457848b8605Smrg	struct si_pm4_state *pm4 = &blend->pm4;
458b8e80941Smrg	uint32_t sx_mrt_blend_opt[8] = {0};
459848b8605Smrg	uint32_t color_control = 0;
460848b8605Smrg
461b8e80941Smrg	if (!blend)
462848b8605Smrg		return NULL;
463848b8605Smrg
464b8e80941Smrg	blend->alpha_to_coverage = state->alpha_to_coverage;
465848b8605Smrg	blend->alpha_to_one = state->alpha_to_one;
466b8e80941Smrg	blend->dual_src_blend = util_blend_state_is_dual(state, 0);
467b8e80941Smrg	blend->logicop_enable = state->logicop_enable;
468848b8605Smrg
469848b8605Smrg	if (state->logicop_enable) {
470848b8605Smrg		color_control |= S_028808_ROP3(state->logicop_func | (state->logicop_func << 4));
471848b8605Smrg	} else {
472848b8605Smrg		color_control |= S_028808_ROP3(0xcc);
473848b8605Smrg	}
474848b8605Smrg
475848b8605Smrg	si_pm4_set_reg(pm4, R_028B70_DB_ALPHA_TO_MASK,
476848b8605Smrg		       S_028B70_ALPHA_TO_MASK_ENABLE(state->alpha_to_coverage) |
477b8e80941Smrg		       S_028B70_ALPHA_TO_MASK_OFFSET0(3) |
478b8e80941Smrg		       S_028B70_ALPHA_TO_MASK_OFFSET1(1) |
479b8e80941Smrg		       S_028B70_ALPHA_TO_MASK_OFFSET2(0) |
480b8e80941Smrg		       S_028B70_ALPHA_TO_MASK_OFFSET3(2) |
481b8e80941Smrg		       S_028B70_OFFSET_ROUND(1));
482b8e80941Smrg
483b8e80941Smrg	if (state->alpha_to_coverage)
484b8e80941Smrg		blend->need_src_alpha_4bit |= 0xf;
485848b8605Smrg
486848b8605Smrg	blend->cb_target_mask = 0;
487b8e80941Smrg	blend->cb_target_enabled_4bit = 0;
488b8e80941Smrg
489848b8605Smrg	for (int i = 0; i < 8; i++) {
490848b8605Smrg		/* state->rt entries > 0 only written if independent blending */
491848b8605Smrg		const int j = state->independent_blend_enable ? i : 0;
492848b8605Smrg
493848b8605Smrg		unsigned eqRGB = state->rt[j].rgb_func;
494848b8605Smrg		unsigned srcRGB = state->rt[j].rgb_src_factor;
495848b8605Smrg		unsigned dstRGB = state->rt[j].rgb_dst_factor;
496848b8605Smrg		unsigned eqA = state->rt[j].alpha_func;
497848b8605Smrg		unsigned srcA = state->rt[j].alpha_src_factor;
498848b8605Smrg		unsigned dstA = state->rt[j].alpha_dst_factor;
499848b8605Smrg
500b8e80941Smrg		unsigned srcRGB_opt, dstRGB_opt, srcA_opt, dstA_opt;
501848b8605Smrg		unsigned blend_cntl = 0;
502848b8605Smrg
503b8e80941Smrg		sx_mrt_blend_opt[i] =
504b8e80941Smrg			S_028760_COLOR_COMB_FCN(V_028760_OPT_COMB_BLEND_DISABLED) |
505b8e80941Smrg			S_028760_ALPHA_COMB_FCN(V_028760_OPT_COMB_BLEND_DISABLED);
506b8e80941Smrg
507b8e80941Smrg		/* Only set dual source blending for MRT0 to avoid a hang. */
508b8e80941Smrg		if (i >= 1 && blend->dual_src_blend) {
509b8e80941Smrg			/* Vulkan does this for dual source blending. */
510b8e80941Smrg			if (i == 1)
511b8e80941Smrg				blend_cntl |= S_028780_ENABLE(1);
512b8e80941Smrg
513b8e80941Smrg			si_pm4_set_reg(pm4, R_028780_CB_BLEND0_CONTROL + i * 4, blend_cntl);
514b8e80941Smrg			continue;
515b8e80941Smrg		}
516b8e80941Smrg
517b8e80941Smrg		/* Only addition and subtraction equations are supported with
518b8e80941Smrg		 * dual source blending.
519b8e80941Smrg		 */
520b8e80941Smrg		if (blend->dual_src_blend &&
521b8e80941Smrg		    (eqRGB == PIPE_BLEND_MIN || eqRGB == PIPE_BLEND_MAX ||
522b8e80941Smrg		     eqA == PIPE_BLEND_MIN || eqA == PIPE_BLEND_MAX)) {
523b8e80941Smrg			assert(!"Unsupported equation for dual source blending");
524b8e80941Smrg			si_pm4_set_reg(pm4, R_028780_CB_BLEND0_CONTROL + i * 4, blend_cntl);
525b8e80941Smrg			continue;
526b8e80941Smrg		}
527b8e80941Smrg
528b8e80941Smrg		/* cb_render_state will disable unused ones */
529b8e80941Smrg		blend->cb_target_mask |= (unsigned)state->rt[j].colormask << (4 * i);
530b8e80941Smrg		if (state->rt[j].colormask)
531b8e80941Smrg			blend->cb_target_enabled_4bit |= 0xf << (4 * i);
532848b8605Smrg
533b8e80941Smrg		if (!state->rt[j].colormask || !state->rt[j].blend_enable) {
534848b8605Smrg			si_pm4_set_reg(pm4, R_028780_CB_BLEND0_CONTROL + i * 4, blend_cntl);
535848b8605Smrg			continue;
536848b8605Smrg		}
537848b8605Smrg
538b8e80941Smrg		si_blend_check_commutativity(sctx->screen, blend,
539b8e80941Smrg					     eqRGB, srcRGB, dstRGB, 0x7 << (4 * i));
540b8e80941Smrg		si_blend_check_commutativity(sctx->screen, blend,
541b8e80941Smrg					     eqA, srcA, dstA, 0x8 << (4 * i));
542b8e80941Smrg
543b8e80941Smrg		/* Blending optimizations for RB+.
544b8e80941Smrg		 * These transformations don't change the behavior.
545b8e80941Smrg		 *
546b8e80941Smrg		 * First, get rid of DST in the blend factors:
547b8e80941Smrg		 *    func(src * DST, dst * 0) ---> func(src * 0, dst * SRC)
548b8e80941Smrg		 */
549b8e80941Smrg		si_blend_remove_dst(&eqRGB, &srcRGB, &dstRGB,
550b8e80941Smrg				    PIPE_BLENDFACTOR_DST_COLOR,
551b8e80941Smrg				    PIPE_BLENDFACTOR_SRC_COLOR);
552b8e80941Smrg		si_blend_remove_dst(&eqA, &srcA, &dstA,
553b8e80941Smrg				    PIPE_BLENDFACTOR_DST_COLOR,
554b8e80941Smrg				    PIPE_BLENDFACTOR_SRC_COLOR);
555b8e80941Smrg		si_blend_remove_dst(&eqA, &srcA, &dstA,
556b8e80941Smrg				    PIPE_BLENDFACTOR_DST_ALPHA,
557b8e80941Smrg				    PIPE_BLENDFACTOR_SRC_ALPHA);
558b8e80941Smrg
559b8e80941Smrg		/* Look up the ideal settings from tables. */
560b8e80941Smrg		srcRGB_opt = si_translate_blend_opt_factor(srcRGB, false);
561b8e80941Smrg		dstRGB_opt = si_translate_blend_opt_factor(dstRGB, false);
562b8e80941Smrg		srcA_opt = si_translate_blend_opt_factor(srcA, true);
563b8e80941Smrg		dstA_opt = si_translate_blend_opt_factor(dstA, true);
564b8e80941Smrg
565b8e80941Smrg		/* Handle interdependencies. */
566b8e80941Smrg		if (si_blend_factor_uses_dst(srcRGB))
567b8e80941Smrg			dstRGB_opt = V_028760_BLEND_OPT_PRESERVE_NONE_IGNORE_NONE;
568b8e80941Smrg		if (si_blend_factor_uses_dst(srcA))
569b8e80941Smrg			dstA_opt = V_028760_BLEND_OPT_PRESERVE_NONE_IGNORE_NONE;
570b8e80941Smrg
571b8e80941Smrg		if (srcRGB == PIPE_BLENDFACTOR_SRC_ALPHA_SATURATE &&
572b8e80941Smrg		    (dstRGB == PIPE_BLENDFACTOR_ZERO ||
573b8e80941Smrg		     dstRGB == PIPE_BLENDFACTOR_SRC_ALPHA ||
574b8e80941Smrg		     dstRGB == PIPE_BLENDFACTOR_SRC_ALPHA_SATURATE))
575b8e80941Smrg			dstRGB_opt = V_028760_BLEND_OPT_PRESERVE_NONE_IGNORE_A0;
576b8e80941Smrg
577b8e80941Smrg		/* Set the final value. */
578b8e80941Smrg		sx_mrt_blend_opt[i] =
579b8e80941Smrg			S_028760_COLOR_SRC_OPT(srcRGB_opt) |
580b8e80941Smrg			S_028760_COLOR_DST_OPT(dstRGB_opt) |
581b8e80941Smrg			S_028760_COLOR_COMB_FCN(si_translate_blend_opt_function(eqRGB)) |
582b8e80941Smrg			S_028760_ALPHA_SRC_OPT(srcA_opt) |
583b8e80941Smrg			S_028760_ALPHA_DST_OPT(dstA_opt) |
584b8e80941Smrg			S_028760_ALPHA_COMB_FCN(si_translate_blend_opt_function(eqA));
585b8e80941Smrg
586b8e80941Smrg		/* Set blend state. */
587848b8605Smrg		blend_cntl |= S_028780_ENABLE(1);
588848b8605Smrg		blend_cntl |= S_028780_COLOR_COMB_FCN(si_translate_blend_function(eqRGB));
589848b8605Smrg		blend_cntl |= S_028780_COLOR_SRCBLEND(si_translate_blend_factor(srcRGB));
590848b8605Smrg		blend_cntl |= S_028780_COLOR_DESTBLEND(si_translate_blend_factor(dstRGB));
591848b8605Smrg
592848b8605Smrg		if (srcA != srcRGB || dstA != dstRGB || eqA != eqRGB) {
593848b8605Smrg			blend_cntl |= S_028780_SEPARATE_ALPHA_BLEND(1);
594848b8605Smrg			blend_cntl |= S_028780_ALPHA_COMB_FCN(si_translate_blend_function(eqA));
595848b8605Smrg			blend_cntl |= S_028780_ALPHA_SRCBLEND(si_translate_blend_factor(srcA));
596848b8605Smrg			blend_cntl |= S_028780_ALPHA_DESTBLEND(si_translate_blend_factor(dstA));
597848b8605Smrg		}
598848b8605Smrg		si_pm4_set_reg(pm4, R_028780_CB_BLEND0_CONTROL + i * 4, blend_cntl);
599b8e80941Smrg
600b8e80941Smrg		blend->blend_enable_4bit |= 0xfu << (i * 4);
601b8e80941Smrg
602b8e80941Smrg		/* This is only important for formats without alpha. */
603b8e80941Smrg		if (srcRGB == PIPE_BLENDFACTOR_SRC_ALPHA ||
604b8e80941Smrg		    dstRGB == PIPE_BLENDFACTOR_SRC_ALPHA ||
605b8e80941Smrg		    srcRGB == PIPE_BLENDFACTOR_SRC_ALPHA_SATURATE ||
606b8e80941Smrg		    dstRGB == PIPE_BLENDFACTOR_SRC_ALPHA_SATURATE ||
607b8e80941Smrg		    srcRGB == PIPE_BLENDFACTOR_INV_SRC_ALPHA ||
608b8e80941Smrg		    dstRGB == PIPE_BLENDFACTOR_INV_SRC_ALPHA)
609b8e80941Smrg			blend->need_src_alpha_4bit |= 0xfu << (i * 4);
610848b8605Smrg	}
611848b8605Smrg
612848b8605Smrg	if (blend->cb_target_mask) {
613848b8605Smrg		color_control |= S_028808_MODE(mode);
614848b8605Smrg	} else {
615848b8605Smrg		color_control |= S_028808_MODE(V_028808_CB_DISABLE);
616848b8605Smrg	}
617848b8605Smrg
618b8e80941Smrg	if (sctx->screen->rbplus_allowed) {
619b8e80941Smrg		/* Disable RB+ blend optimizations for dual source blending.
620b8e80941Smrg		 * Vulkan does this.
621b8e80941Smrg		 */
622b8e80941Smrg		if (blend->dual_src_blend) {
623b8e80941Smrg			for (int i = 0; i < 8; i++) {
624b8e80941Smrg				sx_mrt_blend_opt[i] =
625b8e80941Smrg					S_028760_COLOR_COMB_FCN(V_028760_OPT_COMB_NONE) |
626b8e80941Smrg					S_028760_ALPHA_COMB_FCN(V_028760_OPT_COMB_NONE);
627b8e80941Smrg			}
628b8e80941Smrg		}
629b8e80941Smrg
630b8e80941Smrg		for (int i = 0; i < 8; i++)
631b8e80941Smrg			si_pm4_set_reg(pm4, R_028760_SX_MRT0_BLEND_OPT + i * 4,
632b8e80941Smrg				       sx_mrt_blend_opt[i]);
633b8e80941Smrg
634b8e80941Smrg		/* RB+ doesn't work with dual source blending, logic op, and RESOLVE. */
635b8e80941Smrg		if (blend->dual_src_blend || state->logicop_enable ||
636b8e80941Smrg		    mode == V_028808_CB_RESOLVE)
637b8e80941Smrg			color_control |= S_028808_DISABLE_DUAL_QUAD(1);
638b8e80941Smrg	}
639b8e80941Smrg
640b8e80941Smrg	si_pm4_set_reg(pm4, R_028808_CB_COLOR_CONTROL, color_control);
641848b8605Smrg	return blend;
642848b8605Smrg}
643848b8605Smrg
644848b8605Smrgstatic void *si_create_blend_state(struct pipe_context *ctx,
645848b8605Smrg				   const struct pipe_blend_state *state)
646848b8605Smrg{
647848b8605Smrg	return si_create_blend_state_mode(ctx, state, V_028808_CB_NORMAL);
648848b8605Smrg}
649848b8605Smrg
650848b8605Smrgstatic void si_bind_blend_state(struct pipe_context *ctx, void *state)
651848b8605Smrg{
652848b8605Smrg	struct si_context *sctx = (struct si_context *)ctx;
653b8e80941Smrg	struct si_state_blend *old_blend = sctx->queued.named.blend;
654b8e80941Smrg	struct si_state_blend *blend = (struct si_state_blend *)state;
655b8e80941Smrg
656b8e80941Smrg	if (!state)
657b8e80941Smrg		return;
658b8e80941Smrg
659b8e80941Smrg	si_pm4_bind_state(sctx, blend, state);
660b8e80941Smrg
661b8e80941Smrg	if (!old_blend ||
662b8e80941Smrg	    old_blend->cb_target_mask != blend->cb_target_mask ||
663b8e80941Smrg	    old_blend->dual_src_blend != blend->dual_src_blend ||
664b8e80941Smrg	    (old_blend->blend_enable_4bit != blend->blend_enable_4bit &&
665b8e80941Smrg	     sctx->framebuffer.nr_samples >= 2 &&
666b8e80941Smrg	     sctx->screen->dcc_msaa_allowed))
667b8e80941Smrg		si_mark_atom_dirty(sctx, &sctx->atoms.s.cb_render_state);
668b8e80941Smrg
669b8e80941Smrg	if (!old_blend ||
670b8e80941Smrg	    old_blend->cb_target_mask != blend->cb_target_mask ||
671b8e80941Smrg	    old_blend->alpha_to_coverage != blend->alpha_to_coverage ||
672b8e80941Smrg	    old_blend->alpha_to_one != blend->alpha_to_one ||
673b8e80941Smrg	    old_blend->dual_src_blend != blend->dual_src_blend ||
674b8e80941Smrg	    old_blend->blend_enable_4bit != blend->blend_enable_4bit ||
675b8e80941Smrg	    old_blend->need_src_alpha_4bit != blend->need_src_alpha_4bit)
676b8e80941Smrg		sctx->do_update_shaders = true;
677b8e80941Smrg
678b8e80941Smrg	if (sctx->screen->dpbb_allowed &&
679b8e80941Smrg	    (!old_blend ||
680b8e80941Smrg	     old_blend->alpha_to_coverage != blend->alpha_to_coverage ||
681b8e80941Smrg	     old_blend->blend_enable_4bit != blend->blend_enable_4bit ||
682b8e80941Smrg	     old_blend->cb_target_enabled_4bit != blend->cb_target_enabled_4bit))
683b8e80941Smrg		si_mark_atom_dirty(sctx, &sctx->atoms.s.dpbb_state);
684b8e80941Smrg
685b8e80941Smrg	if (sctx->screen->has_out_of_order_rast &&
686b8e80941Smrg	    (!old_blend ||
687b8e80941Smrg	     (old_blend->blend_enable_4bit != blend->blend_enable_4bit ||
688b8e80941Smrg	      old_blend->cb_target_enabled_4bit != blend->cb_target_enabled_4bit ||
689b8e80941Smrg	      old_blend->commutative_4bit != blend->commutative_4bit ||
690b8e80941Smrg	      old_blend->logicop_enable != blend->logicop_enable)))
691b8e80941Smrg		si_mark_atom_dirty(sctx, &sctx->atoms.s.msaa_config);
692848b8605Smrg}
693848b8605Smrg
694848b8605Smrgstatic void si_delete_blend_state(struct pipe_context *ctx, void *state)
695848b8605Smrg{
696848b8605Smrg	struct si_context *sctx = (struct si_context *)ctx;
697848b8605Smrg	si_pm4_delete_state(sctx, blend, (struct si_state_blend *)state);
698848b8605Smrg}
699848b8605Smrg
700848b8605Smrgstatic void si_set_blend_color(struct pipe_context *ctx,
701848b8605Smrg			       const struct pipe_blend_color *state)
702848b8605Smrg{
703848b8605Smrg	struct si_context *sctx = (struct si_context *)ctx;
704b8e80941Smrg	static const struct pipe_blend_color zeros;
705848b8605Smrg
706b8e80941Smrg	sctx->blend_color.state = *state;
707b8e80941Smrg	sctx->blend_color.any_nonzeros = memcmp(state, &zeros, sizeof(*state)) != 0;
708b8e80941Smrg	si_mark_atom_dirty(sctx, &sctx->atoms.s.blend_color);
709b8e80941Smrg}
710848b8605Smrg
711b8e80941Smrgstatic void si_emit_blend_color(struct si_context *sctx)
712b8e80941Smrg{
713b8e80941Smrg	struct radeon_cmdbuf *cs = sctx->gfx_cs;
714848b8605Smrg
715b8e80941Smrg	radeon_set_context_reg_seq(cs, R_028414_CB_BLEND_RED, 4);
716b8e80941Smrg	radeon_emit_array(cs, (uint32_t*)sctx->blend_color.state.color, 4);
717848b8605Smrg}
718848b8605Smrg
719848b8605Smrg/*
720b8e80941Smrg * Clipping
721848b8605Smrg */
722848b8605Smrg
723848b8605Smrgstatic void si_set_clip_state(struct pipe_context *ctx,
724848b8605Smrg			      const struct pipe_clip_state *state)
725848b8605Smrg{
726848b8605Smrg	struct si_context *sctx = (struct si_context *)ctx;
727848b8605Smrg	struct pipe_constant_buffer cb;
728b8e80941Smrg	static const struct pipe_clip_state zeros;
729848b8605Smrg
730b8e80941Smrg	if (memcmp(&sctx->clip_state.state, state, sizeof(*state)) == 0)
731848b8605Smrg		return;
732848b8605Smrg
733b8e80941Smrg	sctx->clip_state.state = *state;
734b8e80941Smrg	sctx->clip_state.any_nonzeros = memcmp(state, &zeros, sizeof(*state)) != 0;
735b8e80941Smrg	si_mark_atom_dirty(sctx, &sctx->atoms.s.clip_state);
736848b8605Smrg
737848b8605Smrg	cb.buffer = NULL;
738848b8605Smrg	cb.user_buffer = state->ucp;
739848b8605Smrg	cb.buffer_offset = 0;
740848b8605Smrg	cb.buffer_size = 4*4*8;
741b8e80941Smrg	si_set_rw_buffer(sctx, SI_VS_CONST_CLIP_PLANES, &cb);
742848b8605Smrg	pipe_resource_reference(&cb.buffer, NULL);
743848b8605Smrg}
744848b8605Smrg
745b8e80941Smrgstatic void si_emit_clip_state(struct si_context *sctx)
746848b8605Smrg{
747b8e80941Smrg	struct radeon_cmdbuf *cs = sctx->gfx_cs;
748848b8605Smrg
749b8e80941Smrg	radeon_set_context_reg_seq(cs, R_0285BC_PA_CL_UCP_0_X, 6*4);
750b8e80941Smrg	radeon_emit_array(cs, (uint32_t*)sctx->clip_state.state.ucp, 6*4);
751848b8605Smrg}
752848b8605Smrg
753b8e80941Smrgstatic void si_emit_clip_regs(struct si_context *sctx)
754848b8605Smrg{
755b8e80941Smrg	struct si_shader *vs = si_get_vs_state(sctx);
756b8e80941Smrg	struct si_shader_selector *vs_sel = vs->selector;
757b8e80941Smrg	struct tgsi_shader_info *info = &vs_sel->info;
758b8e80941Smrg	struct si_state_rasterizer *rs = sctx->queued.named.rasterizer;
759b8e80941Smrg	unsigned window_space =
760b8e80941Smrg	   info->properties[TGSI_PROPERTY_VS_WINDOW_SPACE_POSITION];
761b8e80941Smrg	unsigned clipdist_mask = vs_sel->clipdist_mask;
762b8e80941Smrg	unsigned ucp_mask = clipdist_mask ? 0 : rs->clip_plane_enable & SIX_BITS;
763b8e80941Smrg	unsigned culldist_mask = vs_sel->culldist_mask;
764b8e80941Smrg	unsigned total_mask;
765b8e80941Smrg
766b8e80941Smrg	if (vs->key.opt.clip_disable) {
767b8e80941Smrg		assert(!info->culldist_writemask);
768b8e80941Smrg		clipdist_mask = 0;
769b8e80941Smrg		culldist_mask = 0;
770b8e80941Smrg	}
771b8e80941Smrg	total_mask = clipdist_mask | culldist_mask;
772b8e80941Smrg
773b8e80941Smrg	/* Clip distances on points have no effect, so need to be implemented
774b8e80941Smrg	 * as cull distances. This applies for the clipvertex case as well.
775b8e80941Smrg	 *
776b8e80941Smrg	 * Setting this for primitives other than points should have no adverse
777b8e80941Smrg	 * effects.
778b8e80941Smrg	 */
779b8e80941Smrg	clipdist_mask &= rs->clip_plane_enable;
780b8e80941Smrg	culldist_mask |= clipdist_mask;
781b8e80941Smrg
782b8e80941Smrg	unsigned initial_cdw = sctx->gfx_cs->current.cdw;
783b8e80941Smrg	radeon_opt_set_context_reg(sctx, R_02881C_PA_CL_VS_OUT_CNTL,
784b8e80941Smrg		SI_TRACKED_PA_CL_VS_OUT_CNTL,
785b8e80941Smrg		vs_sel->pa_cl_vs_out_cntl |
786b8e80941Smrg		S_02881C_VS_OUT_CCDIST0_VEC_ENA((total_mask & 0x0F) != 0) |
787b8e80941Smrg		S_02881C_VS_OUT_CCDIST1_VEC_ENA((total_mask & 0xF0) != 0) |
788b8e80941Smrg		clipdist_mask | (culldist_mask << 8));
789b8e80941Smrg	radeon_opt_set_context_reg(sctx, R_028810_PA_CL_CLIP_CNTL,
790b8e80941Smrg		SI_TRACKED_PA_CL_CLIP_CNTL,
791b8e80941Smrg		rs->pa_cl_clip_cntl |
792b8e80941Smrg		ucp_mask |
793b8e80941Smrg		S_028810_CLIP_DISABLE(window_space));
794b8e80941Smrg
795b8e80941Smrg	if (initial_cdw != sctx->gfx_cs->current.cdw)
796b8e80941Smrg		sctx->context_roll = true;
797848b8605Smrg}
798848b8605Smrg
799848b8605Smrg/*
800848b8605Smrg * inferred state between framebuffer and rasterizer
801848b8605Smrg */
802b8e80941Smrgstatic void si_update_poly_offset_state(struct si_context *sctx)
803848b8605Smrg{
804848b8605Smrg	struct si_state_rasterizer *rs = sctx->queued.named.rasterizer;
805848b8605Smrg
806b8e80941Smrg	if (!rs || !rs->uses_poly_offset || !sctx->framebuffer.state.zsbuf) {
807b8e80941Smrg		si_pm4_bind_state(sctx, poly_offset, NULL);
808848b8605Smrg		return;
809b8e80941Smrg	}
810848b8605Smrg
811b8e80941Smrg	/* Use the user format, not db_render_format, so that the polygon
812b8e80941Smrg	 * offset behaves as expected by applications.
813b8e80941Smrg	 */
814848b8605Smrg	switch (sctx->framebuffer.state.zsbuf->texture->format) {
815b8e80941Smrg	case PIPE_FORMAT_Z16_UNORM:
816b8e80941Smrg		si_pm4_bind_state(sctx, poly_offset, &rs->pm4_poly_offset[0]);
817b8e80941Smrg		break;
818b8e80941Smrg	default: /* 24-bit */
819b8e80941Smrg		si_pm4_bind_state(sctx, poly_offset, &rs->pm4_poly_offset[1]);
820848b8605Smrg		break;
821848b8605Smrg	case PIPE_FORMAT_Z32_FLOAT:
822848b8605Smrg	case PIPE_FORMAT_Z32_FLOAT_S8X24_UINT:
823b8e80941Smrg		si_pm4_bind_state(sctx, poly_offset, &rs->pm4_poly_offset[2]);
824848b8605Smrg		break;
825848b8605Smrg	}
826848b8605Smrg}
827848b8605Smrg
828848b8605Smrg/*
829848b8605Smrg * Rasterizer
830848b8605Smrg */
831848b8605Smrg
832848b8605Smrgstatic uint32_t si_translate_fill(uint32_t func)
833848b8605Smrg{
834848b8605Smrg	switch(func) {
835848b8605Smrg	case PIPE_POLYGON_MODE_FILL:
836848b8605Smrg		return V_028814_X_DRAW_TRIANGLES;
837848b8605Smrg	case PIPE_POLYGON_MODE_LINE:
838848b8605Smrg		return V_028814_X_DRAW_LINES;
839848b8605Smrg	case PIPE_POLYGON_MODE_POINT:
840848b8605Smrg		return V_028814_X_DRAW_POINTS;
841848b8605Smrg	default:
842848b8605Smrg		assert(0);
843848b8605Smrg		return V_028814_X_DRAW_POINTS;
844848b8605Smrg	}
845848b8605Smrg}
846848b8605Smrg
847848b8605Smrgstatic void *si_create_rs_state(struct pipe_context *ctx,
848848b8605Smrg				const struct pipe_rasterizer_state *state)
849848b8605Smrg{
850b8e80941Smrg	struct si_screen *sscreen = ((struct si_context *)ctx)->screen;
851848b8605Smrg	struct si_state_rasterizer *rs = CALLOC_STRUCT(si_state_rasterizer);
852848b8605Smrg	struct si_pm4_state *pm4 = &rs->pm4;
853b8e80941Smrg	unsigned tmp, i;
854848b8605Smrg	float psize_min, psize_max;
855848b8605Smrg
856b8e80941Smrg	if (!rs) {
857848b8605Smrg		return NULL;
858848b8605Smrg	}
859848b8605Smrg
860b8e80941Smrg	rs->scissor_enable = state->scissor;
861b8e80941Smrg	rs->clip_halfz = state->clip_halfz;
862848b8605Smrg	rs->two_side = state->light_twoside;
863848b8605Smrg	rs->multisample_enable = state->multisample;
864b8e80941Smrg	rs->force_persample_interp = state->force_persample_interp;
865848b8605Smrg	rs->clip_plane_enable = state->clip_plane_enable;
866b8e80941Smrg	rs->half_pixel_center = state->half_pixel_center;
867848b8605Smrg	rs->line_stipple_enable = state->line_stipple_enable;
868b8e80941Smrg	rs->poly_stipple_enable = state->poly_stipple_enable;
869b8e80941Smrg	rs->line_smooth = state->line_smooth;
870b8e80941Smrg	rs->line_width = state->line_width;
871b8e80941Smrg	rs->poly_smooth = state->poly_smooth;
872b8e80941Smrg	rs->uses_poly_offset = state->offset_point || state->offset_line ||
873b8e80941Smrg			       state->offset_tri;
874b8e80941Smrg	rs->clamp_fragment_color = state->clamp_fragment_color;
875b8e80941Smrg	rs->clamp_vertex_color = state->clamp_vertex_color;
876848b8605Smrg	rs->flatshade = state->flatshade;
877848b8605Smrg	rs->sprite_coord_enable = state->sprite_coord_enable;
878b8e80941Smrg	rs->rasterizer_discard = state->rasterizer_discard;
879848b8605Smrg	rs->pa_sc_line_stipple = state->line_stipple_enable ?
880848b8605Smrg				S_028A0C_LINE_PATTERN(state->line_stipple_pattern) |
881848b8605Smrg				S_028A0C_REPEAT_COUNT(state->line_stipple_factor) : 0;
882848b8605Smrg	rs->pa_cl_clip_cntl =
883b8e80941Smrg		S_028810_DX_CLIP_SPACE_DEF(state->clip_halfz) |
884b8e80941Smrg		S_028810_ZCLIP_NEAR_DISABLE(!state->depth_clip_near) |
885b8e80941Smrg		S_028810_ZCLIP_FAR_DISABLE(!state->depth_clip_far) |
886848b8605Smrg		S_028810_DX_RASTERIZATION_KILL(state->rasterizer_discard) |
887848b8605Smrg		S_028810_DX_LINEAR_ATTR_CLIP_ENA(1);
888848b8605Smrg
889b8e80941Smrg	si_pm4_set_reg(pm4, R_0286D4_SPI_INTERP_CONTROL_0,
890b8e80941Smrg		S_0286D4_FLAT_SHADE_ENA(1) |
891b8e80941Smrg		S_0286D4_PNT_SPRITE_ENA(state->point_quad_rasterization) |
892b8e80941Smrg		S_0286D4_PNT_SPRITE_OVRD_X(V_0286D4_SPI_PNT_SPRITE_SEL_S) |
893b8e80941Smrg		S_0286D4_PNT_SPRITE_OVRD_Y(V_0286D4_SPI_PNT_SPRITE_SEL_T) |
894b8e80941Smrg		S_0286D4_PNT_SPRITE_OVRD_Z(V_0286D4_SPI_PNT_SPRITE_SEL_0) |
895b8e80941Smrg		S_0286D4_PNT_SPRITE_OVRD_W(V_0286D4_SPI_PNT_SPRITE_SEL_1) |
896b8e80941Smrg		S_0286D4_PNT_SPRITE_TOP_1(state->sprite_coord_mode != PIPE_SPRITE_COORD_UPPER_LEFT));
897848b8605Smrg
898848b8605Smrg	/* point size 12.4 fixed point */
899848b8605Smrg	tmp = (unsigned)(state->point_size * 8.0);
900848b8605Smrg	si_pm4_set_reg(pm4, R_028A00_PA_SU_POINT_SIZE, S_028A00_HEIGHT(tmp) | S_028A00_WIDTH(tmp));
901848b8605Smrg
902848b8605Smrg	if (state->point_size_per_vertex) {
903848b8605Smrg		psize_min = util_get_min_point_size(state);
904b8e80941Smrg		psize_max = SI_MAX_POINT_SIZE;
905848b8605Smrg	} else {
906848b8605Smrg		/* Force the point size to be as if the vertex output was disabled. */
907848b8605Smrg		psize_min = state->point_size;
908848b8605Smrg		psize_max = state->point_size;
909848b8605Smrg	}
910b8e80941Smrg	rs->max_point_size = psize_max;
911b8e80941Smrg
912848b8605Smrg	/* Divide by two, because 0.5 = 1 pixel. */
913848b8605Smrg	si_pm4_set_reg(pm4, R_028A04_PA_SU_POINT_MINMAX,
914848b8605Smrg			S_028A04_MIN_SIZE(si_pack_float_12p4(psize_min/2)) |
915848b8605Smrg			S_028A04_MAX_SIZE(si_pack_float_12p4(psize_max/2)));
916848b8605Smrg
917b8e80941Smrg	si_pm4_set_reg(pm4, R_028A08_PA_SU_LINE_CNTL,
918b8e80941Smrg		       S_028A08_WIDTH(si_pack_float_12p4(state->line_width/2)));
919848b8605Smrg	si_pm4_set_reg(pm4, R_028A48_PA_SC_MODE_CNTL_0,
920848b8605Smrg		       S_028A48_LINE_STIPPLE_ENABLE(state->line_stipple_enable) |
921b8e80941Smrg		       S_028A48_MSAA_ENABLE(state->multisample ||
922b8e80941Smrg					    state->poly_smooth ||
923b8e80941Smrg					    state->line_smooth) |
924b8e80941Smrg		       S_028A48_VPORT_SCISSOR_ENABLE(1) |
925b8e80941Smrg		       S_028A48_ALTERNATE_RBS_PER_TILE(sscreen->info.chip_class >= GFX9));
926848b8605Smrg
927848b8605Smrg	si_pm4_set_reg(pm4, R_028B7C_PA_SU_POLY_OFFSET_CLAMP, fui(state->offset_clamp));
928b8e80941Smrg	si_pm4_set_reg(pm4, R_028814_PA_SU_SC_MODE_CNTL,
929b8e80941Smrg		S_028814_PROVOKING_VTX_LAST(!state->flatshade_first) |
930b8e80941Smrg		S_028814_CULL_FRONT((state->cull_face & PIPE_FACE_FRONT) ? 1 : 0) |
931b8e80941Smrg		S_028814_CULL_BACK((state->cull_face & PIPE_FACE_BACK) ? 1 : 0) |
932b8e80941Smrg		S_028814_FACE(!state->front_ccw) |
933b8e80941Smrg		S_028814_POLY_OFFSET_FRONT_ENABLE(util_get_offset(state, state->fill_front)) |
934b8e80941Smrg		S_028814_POLY_OFFSET_BACK_ENABLE(util_get_offset(state, state->fill_back)) |
935b8e80941Smrg		S_028814_POLY_OFFSET_PARA_ENABLE(state->offset_point || state->offset_line) |
936b8e80941Smrg		S_028814_POLY_MODE(state->fill_front != PIPE_POLYGON_MODE_FILL ||
937b8e80941Smrg				   state->fill_back != PIPE_POLYGON_MODE_FILL) |
938b8e80941Smrg		S_028814_POLYMODE_FRONT_PTYPE(si_translate_fill(state->fill_front)) |
939b8e80941Smrg		S_028814_POLYMODE_BACK_PTYPE(si_translate_fill(state->fill_back)));
940b8e80941Smrg
941b8e80941Smrg	if (!rs->uses_poly_offset)
942b8e80941Smrg		return rs;
943b8e80941Smrg
944b8e80941Smrg	rs->pm4_poly_offset = CALLOC(3, sizeof(struct si_pm4_state));
945b8e80941Smrg	if (!rs->pm4_poly_offset) {
946b8e80941Smrg		FREE(rs);
947b8e80941Smrg		return NULL;
948b8e80941Smrg	}
949b8e80941Smrg
950b8e80941Smrg	/* Precalculate polygon offset states for 16-bit, 24-bit, and 32-bit zbuffers. */
951b8e80941Smrg	for (i = 0; i < 3; i++) {
952b8e80941Smrg		struct si_pm4_state *pm4 = &rs->pm4_poly_offset[i];
953b8e80941Smrg		float offset_units = state->offset_units;
954b8e80941Smrg		float offset_scale = state->offset_scale * 16.0f;
955b8e80941Smrg		uint32_t pa_su_poly_offset_db_fmt_cntl = 0;
956b8e80941Smrg
957b8e80941Smrg		if (!state->offset_units_unscaled) {
958b8e80941Smrg			switch (i) {
959b8e80941Smrg			case 0: /* 16-bit zbuffer */
960b8e80941Smrg				offset_units *= 4.0f;
961b8e80941Smrg				pa_su_poly_offset_db_fmt_cntl =
962b8e80941Smrg					S_028B78_POLY_OFFSET_NEG_NUM_DB_BITS(-16);
963b8e80941Smrg				break;
964b8e80941Smrg			case 1: /* 24-bit zbuffer */
965b8e80941Smrg				offset_units *= 2.0f;
966b8e80941Smrg				pa_su_poly_offset_db_fmt_cntl =
967b8e80941Smrg					S_028B78_POLY_OFFSET_NEG_NUM_DB_BITS(-24);
968b8e80941Smrg				break;
969b8e80941Smrg			case 2: /* 32-bit zbuffer */
970b8e80941Smrg				offset_units *= 1.0f;
971b8e80941Smrg				pa_su_poly_offset_db_fmt_cntl = S_028B78_POLY_OFFSET_NEG_NUM_DB_BITS(-23) |
972b8e80941Smrg								S_028B78_POLY_OFFSET_DB_IS_FLOAT_FMT(1);
973b8e80941Smrg				break;
974b8e80941Smrg			}
975b8e80941Smrg		}
976b8e80941Smrg
977b8e80941Smrg		si_pm4_set_reg(pm4, R_028B80_PA_SU_POLY_OFFSET_FRONT_SCALE,
978b8e80941Smrg			       fui(offset_scale));
979b8e80941Smrg		si_pm4_set_reg(pm4, R_028B84_PA_SU_POLY_OFFSET_FRONT_OFFSET,
980b8e80941Smrg			       fui(offset_units));
981b8e80941Smrg		si_pm4_set_reg(pm4, R_028B88_PA_SU_POLY_OFFSET_BACK_SCALE,
982b8e80941Smrg			       fui(offset_scale));
983b8e80941Smrg		si_pm4_set_reg(pm4, R_028B8C_PA_SU_POLY_OFFSET_BACK_OFFSET,
984b8e80941Smrg			       fui(offset_units));
985b8e80941Smrg		si_pm4_set_reg(pm4, R_028B78_PA_SU_POLY_OFFSET_DB_FMT_CNTL,
986b8e80941Smrg			       pa_su_poly_offset_db_fmt_cntl);
987b8e80941Smrg	}
988848b8605Smrg
989848b8605Smrg	return rs;
990848b8605Smrg}
991848b8605Smrg
992848b8605Smrgstatic void si_bind_rs_state(struct pipe_context *ctx, void *state)
993848b8605Smrg{
994848b8605Smrg	struct si_context *sctx = (struct si_context *)ctx;
995b8e80941Smrg	struct si_state_rasterizer *old_rs =
996b8e80941Smrg		(struct si_state_rasterizer*)sctx->queued.named.rasterizer;
997848b8605Smrg	struct si_state_rasterizer *rs = (struct si_state_rasterizer *)state;
998848b8605Smrg
999b8e80941Smrg	if (!state)
1000848b8605Smrg		return;
1001848b8605Smrg
1002b8e80941Smrg	if (!old_rs || old_rs->multisample_enable != rs->multisample_enable) {
1003b8e80941Smrg		si_mark_atom_dirty(sctx, &sctx->atoms.s.db_render_state);
1004b8e80941Smrg
1005b8e80941Smrg		/* Update the small primitive filter workaround if necessary. */
1006b8e80941Smrg		if (sctx->screen->has_msaa_sample_loc_bug &&
1007b8e80941Smrg		    sctx->framebuffer.nr_samples > 1)
1008b8e80941Smrg			si_mark_atom_dirty(sctx, &sctx->atoms.s.msaa_sample_locs);
1009b8e80941Smrg	}
1010b8e80941Smrg
1011b8e80941Smrg	sctx->current_vs_state &= C_VS_STATE_CLAMP_VERTEX_COLOR;
1012b8e80941Smrg	sctx->current_vs_state |= S_VS_STATE_CLAMP_VERTEX_COLOR(rs->clamp_vertex_color);
1013848b8605Smrg
1014848b8605Smrg	si_pm4_bind_state(sctx, rasterizer, rs);
1015b8e80941Smrg	si_update_poly_offset_state(sctx);
1016b8e80941Smrg
1017b8e80941Smrg	if (!old_rs ||
1018b8e80941Smrg	    old_rs->scissor_enable != rs->scissor_enable)
1019b8e80941Smrg		si_mark_atom_dirty(sctx, &sctx->atoms.s.scissors);
1020b8e80941Smrg
1021b8e80941Smrg	if (!old_rs ||
1022b8e80941Smrg	    old_rs->line_width != rs->line_width ||
1023b8e80941Smrg	    old_rs->max_point_size != rs->max_point_size ||
1024b8e80941Smrg	    old_rs->half_pixel_center != rs->half_pixel_center)
1025b8e80941Smrg		si_mark_atom_dirty(sctx, &sctx->atoms.s.guardband);
1026b8e80941Smrg
1027b8e80941Smrg	if (!old_rs ||
1028b8e80941Smrg	    old_rs->clip_halfz != rs->clip_halfz)
1029b8e80941Smrg		si_mark_atom_dirty(sctx, &sctx->atoms.s.viewports);
1030b8e80941Smrg
1031b8e80941Smrg	if (!old_rs ||
1032b8e80941Smrg	    old_rs->clip_plane_enable != rs->clip_plane_enable ||
1033b8e80941Smrg	    old_rs->pa_cl_clip_cntl != rs->pa_cl_clip_cntl)
1034b8e80941Smrg		si_mark_atom_dirty(sctx, &sctx->atoms.s.clip_regs);
1035b8e80941Smrg
1036b8e80941Smrg	sctx->ia_multi_vgt_param_key.u.line_stipple_enabled =
1037b8e80941Smrg		rs->line_stipple_enable;
1038b8e80941Smrg
1039b8e80941Smrg	if (!old_rs ||
1040b8e80941Smrg	    old_rs->clip_plane_enable != rs->clip_plane_enable ||
1041b8e80941Smrg	    old_rs->rasterizer_discard != rs->rasterizer_discard ||
1042b8e80941Smrg	    old_rs->sprite_coord_enable != rs->sprite_coord_enable ||
1043b8e80941Smrg	    old_rs->flatshade != rs->flatshade ||
1044b8e80941Smrg	    old_rs->two_side != rs->two_side ||
1045b8e80941Smrg	    old_rs->multisample_enable != rs->multisample_enable ||
1046b8e80941Smrg	    old_rs->poly_stipple_enable != rs->poly_stipple_enable ||
1047b8e80941Smrg	    old_rs->poly_smooth != rs->poly_smooth ||
1048b8e80941Smrg	    old_rs->line_smooth != rs->line_smooth ||
1049b8e80941Smrg	    old_rs->clamp_fragment_color != rs->clamp_fragment_color ||
1050b8e80941Smrg	    old_rs->force_persample_interp != rs->force_persample_interp)
1051b8e80941Smrg		sctx->do_update_shaders = true;
1052848b8605Smrg}
1053848b8605Smrg
1054848b8605Smrgstatic void si_delete_rs_state(struct pipe_context *ctx, void *state)
1055848b8605Smrg{
1056848b8605Smrg	struct si_context *sctx = (struct si_context *)ctx;
1057b8e80941Smrg	struct si_state_rasterizer *rs = (struct si_state_rasterizer *)state;
1058b8e80941Smrg
1059b8e80941Smrg	if (sctx->queued.named.rasterizer == state)
1060b8e80941Smrg		si_pm4_bind_state(sctx, poly_offset, NULL);
1061b8e80941Smrg
1062b8e80941Smrg	FREE(rs->pm4_poly_offset);
1063b8e80941Smrg	si_pm4_delete_state(sctx, rasterizer, rs);
1064848b8605Smrg}
1065848b8605Smrg
1066848b8605Smrg/*
1067848b8605Smrg * infeered state between dsa and stencil ref
1068848b8605Smrg */
1069b8e80941Smrgstatic void si_emit_stencil_ref(struct si_context *sctx)
1070848b8605Smrg{
1071b8e80941Smrg	struct radeon_cmdbuf *cs = sctx->gfx_cs;
1072b8e80941Smrg	struct pipe_stencil_ref *ref = &sctx->stencil_ref.state;
1073b8e80941Smrg	struct si_dsa_stencil_ref_part *dsa = &sctx->stencil_ref.dsa_part;
1074b8e80941Smrg
1075b8e80941Smrg	radeon_set_context_reg_seq(cs, R_028430_DB_STENCILREFMASK, 2);
1076b8e80941Smrg	radeon_emit(cs, S_028430_STENCILTESTVAL(ref->ref_value[0]) |
1077b8e80941Smrg			S_028430_STENCILMASK(dsa->valuemask[0]) |
1078b8e80941Smrg			S_028430_STENCILWRITEMASK(dsa->writemask[0]) |
1079b8e80941Smrg			S_028430_STENCILOPVAL(1));
1080b8e80941Smrg	radeon_emit(cs, S_028434_STENCILTESTVAL_BF(ref->ref_value[1]) |
1081b8e80941Smrg			S_028434_STENCILMASK_BF(dsa->valuemask[1]) |
1082b8e80941Smrg			S_028434_STENCILWRITEMASK_BF(dsa->writemask[1]) |
1083b8e80941Smrg			S_028434_STENCILOPVAL_BF(1));
1084848b8605Smrg}
1085848b8605Smrg
1086b8e80941Smrgstatic void si_set_stencil_ref(struct pipe_context *ctx,
1087b8e80941Smrg			       const struct pipe_stencil_ref *state)
1088848b8605Smrg{
1089848b8605Smrg        struct si_context *sctx = (struct si_context *)ctx;
1090b8e80941Smrg
1091b8e80941Smrg	if (memcmp(&sctx->stencil_ref.state, state, sizeof(*state)) == 0)
1092b8e80941Smrg		return;
1093b8e80941Smrg
1094b8e80941Smrg	sctx->stencil_ref.state = *state;
1095b8e80941Smrg	si_mark_atom_dirty(sctx, &sctx->atoms.s.stencil_ref);
1096848b8605Smrg}
1097848b8605Smrg
1098848b8605Smrg
1099848b8605Smrg/*
1100848b8605Smrg * DSA
1101848b8605Smrg */
1102848b8605Smrg
1103848b8605Smrgstatic uint32_t si_translate_stencil_op(int s_op)
1104848b8605Smrg{
1105848b8605Smrg	switch (s_op) {
1106848b8605Smrg	case PIPE_STENCIL_OP_KEEP:
1107848b8605Smrg		return V_02842C_STENCIL_KEEP;
1108848b8605Smrg	case PIPE_STENCIL_OP_ZERO:
1109848b8605Smrg		return V_02842C_STENCIL_ZERO;
1110848b8605Smrg	case PIPE_STENCIL_OP_REPLACE:
1111848b8605Smrg		return V_02842C_STENCIL_REPLACE_TEST;
1112848b8605Smrg	case PIPE_STENCIL_OP_INCR:
1113848b8605Smrg		return V_02842C_STENCIL_ADD_CLAMP;
1114848b8605Smrg	case PIPE_STENCIL_OP_DECR:
1115848b8605Smrg		return V_02842C_STENCIL_SUB_CLAMP;
1116848b8605Smrg	case PIPE_STENCIL_OP_INCR_WRAP:
1117848b8605Smrg		return V_02842C_STENCIL_ADD_WRAP;
1118848b8605Smrg	case PIPE_STENCIL_OP_DECR_WRAP:
1119848b8605Smrg		return V_02842C_STENCIL_SUB_WRAP;
1120848b8605Smrg	case PIPE_STENCIL_OP_INVERT:
1121848b8605Smrg		return V_02842C_STENCIL_INVERT;
1122848b8605Smrg	default:
1123b8e80941Smrg		PRINT_ERR("Unknown stencil op %d", s_op);
1124848b8605Smrg		assert(0);
1125848b8605Smrg		break;
1126848b8605Smrg	}
1127848b8605Smrg	return 0;
1128848b8605Smrg}
1129848b8605Smrg
1130b8e80941Smrgstatic bool si_dsa_writes_stencil(const struct pipe_stencil_state *s)
1131848b8605Smrg{
1132b8e80941Smrg	return s->enabled && s->writemask &&
1133b8e80941Smrg	       (s->fail_op  != PIPE_STENCIL_OP_KEEP ||
1134b8e80941Smrg		s->zfail_op != PIPE_STENCIL_OP_KEEP ||
1135b8e80941Smrg		s->zpass_op != PIPE_STENCIL_OP_KEEP);
1136b8e80941Smrg}
1137b8e80941Smrg
1138b8e80941Smrgstatic bool si_order_invariant_stencil_op(enum pipe_stencil_op op)
1139b8e80941Smrg{
1140b8e80941Smrg	/* REPLACE is normally order invariant, except when the stencil
1141b8e80941Smrg	 * reference value is written by the fragment shader. Tracking this
1142b8e80941Smrg	 * interaction does not seem worth the effort, so be conservative. */
1143b8e80941Smrg	return op != PIPE_STENCIL_OP_INCR &&
1144b8e80941Smrg	       op != PIPE_STENCIL_OP_DECR &&
1145b8e80941Smrg	       op != PIPE_STENCIL_OP_REPLACE;
1146b8e80941Smrg}
1147b8e80941Smrg
1148b8e80941Smrg/* Compute whether, assuming Z writes are disabled, this stencil state is order
1149b8e80941Smrg * invariant in the sense that the set of passing fragments as well as the
1150b8e80941Smrg * final stencil buffer result does not depend on the order of fragments. */
1151b8e80941Smrgstatic bool si_order_invariant_stencil_state(const struct pipe_stencil_state *state)
1152b8e80941Smrg{
1153b8e80941Smrg	return !state->enabled || !state->writemask ||
1154b8e80941Smrg	       /* The following assumes that Z writes are disabled. */
1155b8e80941Smrg	       (state->func == PIPE_FUNC_ALWAYS &&
1156b8e80941Smrg	        si_order_invariant_stencil_op(state->zpass_op) &&
1157b8e80941Smrg	        si_order_invariant_stencil_op(state->zfail_op)) ||
1158b8e80941Smrg	       (state->func == PIPE_FUNC_NEVER &&
1159b8e80941Smrg	        si_order_invariant_stencil_op(state->fail_op));
1160b8e80941Smrg}
1161b8e80941Smrg
1162b8e80941Smrgstatic void *si_create_dsa_state(struct pipe_context *ctx,
1163b8e80941Smrg				 const struct pipe_depth_stencil_alpha_state *state)
1164b8e80941Smrg{
1165b8e80941Smrg	struct si_context *sctx = (struct si_context *)ctx;
1166b8e80941Smrg	struct si_state_dsa *dsa = CALLOC_STRUCT(si_state_dsa);
1167848b8605Smrg	struct si_pm4_state *pm4 = &dsa->pm4;
1168848b8605Smrg	unsigned db_depth_control;
1169848b8605Smrg	uint32_t db_stencil_control = 0;
1170848b8605Smrg
1171b8e80941Smrg	if (!dsa) {
1172848b8605Smrg		return NULL;
1173848b8605Smrg	}
1174848b8605Smrg
1175b8e80941Smrg	dsa->stencil_ref.valuemask[0] = state->stencil[0].valuemask;
1176b8e80941Smrg	dsa->stencil_ref.valuemask[1] = state->stencil[1].valuemask;
1177b8e80941Smrg	dsa->stencil_ref.writemask[0] = state->stencil[0].writemask;
1178b8e80941Smrg	dsa->stencil_ref.writemask[1] = state->stencil[1].writemask;
1179848b8605Smrg
1180848b8605Smrg	db_depth_control = S_028800_Z_ENABLE(state->depth.enabled) |
1181848b8605Smrg		S_028800_Z_WRITE_ENABLE(state->depth.writemask) |
1182b8e80941Smrg		S_028800_ZFUNC(state->depth.func) |
1183b8e80941Smrg		S_028800_DEPTH_BOUNDS_ENABLE(state->depth.bounds_test);
1184848b8605Smrg
1185848b8605Smrg	/* stencil */
1186848b8605Smrg	if (state->stencil[0].enabled) {
1187848b8605Smrg		db_depth_control |= S_028800_STENCIL_ENABLE(1);
1188848b8605Smrg		db_depth_control |= S_028800_STENCILFUNC(state->stencil[0].func);
1189848b8605Smrg		db_stencil_control |= S_02842C_STENCILFAIL(si_translate_stencil_op(state->stencil[0].fail_op));
1190848b8605Smrg		db_stencil_control |= S_02842C_STENCILZPASS(si_translate_stencil_op(state->stencil[0].zpass_op));
1191848b8605Smrg		db_stencil_control |= S_02842C_STENCILZFAIL(si_translate_stencil_op(state->stencil[0].zfail_op));
1192848b8605Smrg
1193848b8605Smrg		if (state->stencil[1].enabled) {
1194848b8605Smrg			db_depth_control |= S_028800_BACKFACE_ENABLE(1);
1195848b8605Smrg			db_depth_control |= S_028800_STENCILFUNC_BF(state->stencil[1].func);
1196848b8605Smrg			db_stencil_control |= S_02842C_STENCILFAIL_BF(si_translate_stencil_op(state->stencil[1].fail_op));
1197848b8605Smrg			db_stencil_control |= S_02842C_STENCILZPASS_BF(si_translate_stencil_op(state->stencil[1].zpass_op));
1198848b8605Smrg			db_stencil_control |= S_02842C_STENCILZFAIL_BF(si_translate_stencil_op(state->stencil[1].zfail_op));
1199848b8605Smrg		}
1200848b8605Smrg	}
1201848b8605Smrg
1202848b8605Smrg	/* alpha */
1203848b8605Smrg	if (state->alpha.enabled) {
1204848b8605Smrg		dsa->alpha_func = state->alpha.func;
1205848b8605Smrg
1206848b8605Smrg		si_pm4_set_reg(pm4, R_00B030_SPI_SHADER_USER_DATA_PS_0 +
1207b8e80941Smrg		               SI_SGPR_ALPHA_REF * 4, fui(state->alpha.ref_value));
1208848b8605Smrg	} else {
1209848b8605Smrg		dsa->alpha_func = PIPE_FUNC_ALWAYS;
1210848b8605Smrg	}
1211848b8605Smrg
1212848b8605Smrg	si_pm4_set_reg(pm4, R_028800_DB_DEPTH_CONTROL, db_depth_control);
1213b8e80941Smrg	if (state->stencil[0].enabled)
1214b8e80941Smrg		si_pm4_set_reg(pm4, R_02842C_DB_STENCIL_CONTROL, db_stencil_control);
1215b8e80941Smrg	if (state->depth.bounds_test) {
1216b8e80941Smrg		si_pm4_set_reg(pm4, R_028020_DB_DEPTH_BOUNDS_MIN, fui(state->depth.bounds_min));
1217b8e80941Smrg		si_pm4_set_reg(pm4, R_028024_DB_DEPTH_BOUNDS_MAX, fui(state->depth.bounds_max));
1218b8e80941Smrg	}
1219b8e80941Smrg
1220b8e80941Smrg	dsa->depth_enabled = state->depth.enabled;
1221b8e80941Smrg	dsa->depth_write_enabled = state->depth.enabled &&
1222b8e80941Smrg				   state->depth.writemask;
1223b8e80941Smrg	dsa->stencil_enabled = state->stencil[0].enabled;
1224b8e80941Smrg	dsa->stencil_write_enabled = state->stencil[0].enabled &&
1225b8e80941Smrg				     (si_dsa_writes_stencil(&state->stencil[0]) ||
1226b8e80941Smrg				      si_dsa_writes_stencil(&state->stencil[1]));
1227b8e80941Smrg	dsa->db_can_write = dsa->depth_write_enabled ||
1228b8e80941Smrg			    dsa->stencil_write_enabled;
1229b8e80941Smrg
1230b8e80941Smrg	bool zfunc_is_ordered =
1231b8e80941Smrg		state->depth.func == PIPE_FUNC_NEVER ||
1232b8e80941Smrg		state->depth.func == PIPE_FUNC_LESS ||
1233b8e80941Smrg		state->depth.func == PIPE_FUNC_LEQUAL ||
1234b8e80941Smrg		state->depth.func == PIPE_FUNC_GREATER ||
1235b8e80941Smrg		state->depth.func == PIPE_FUNC_GEQUAL;
1236b8e80941Smrg
1237b8e80941Smrg	bool nozwrite_and_order_invariant_stencil =
1238b8e80941Smrg		!dsa->db_can_write ||
1239b8e80941Smrg		(!dsa->depth_write_enabled &&
1240b8e80941Smrg		 si_order_invariant_stencil_state(&state->stencil[0]) &&
1241b8e80941Smrg		 si_order_invariant_stencil_state(&state->stencil[1]));
1242b8e80941Smrg
1243b8e80941Smrg	dsa->order_invariance[1].zs =
1244b8e80941Smrg		nozwrite_and_order_invariant_stencil ||
1245b8e80941Smrg		(!dsa->stencil_write_enabled && zfunc_is_ordered);
1246b8e80941Smrg	dsa->order_invariance[0].zs = !dsa->depth_write_enabled || zfunc_is_ordered;
1247b8e80941Smrg
1248b8e80941Smrg	dsa->order_invariance[1].pass_set =
1249b8e80941Smrg		nozwrite_and_order_invariant_stencil ||
1250b8e80941Smrg		(!dsa->stencil_write_enabled &&
1251b8e80941Smrg		 (state->depth.func == PIPE_FUNC_ALWAYS ||
1252b8e80941Smrg		  state->depth.func == PIPE_FUNC_NEVER));
1253b8e80941Smrg	dsa->order_invariance[0].pass_set =
1254b8e80941Smrg		!dsa->depth_write_enabled ||
1255b8e80941Smrg		(state->depth.func == PIPE_FUNC_ALWAYS ||
1256b8e80941Smrg		 state->depth.func == PIPE_FUNC_NEVER);
1257b8e80941Smrg
1258b8e80941Smrg	dsa->order_invariance[1].pass_last =
1259b8e80941Smrg		sctx->screen->assume_no_z_fights &&
1260b8e80941Smrg		!dsa->stencil_write_enabled &&
1261b8e80941Smrg		dsa->depth_write_enabled && zfunc_is_ordered;
1262b8e80941Smrg	dsa->order_invariance[0].pass_last =
1263b8e80941Smrg		sctx->screen->assume_no_z_fights &&
1264b8e80941Smrg		dsa->depth_write_enabled && zfunc_is_ordered;
1265848b8605Smrg
1266848b8605Smrg	return dsa;
1267848b8605Smrg}
1268848b8605Smrg
1269848b8605Smrgstatic void si_bind_dsa_state(struct pipe_context *ctx, void *state)
1270848b8605Smrg{
1271848b8605Smrg        struct si_context *sctx = (struct si_context *)ctx;
1272b8e80941Smrg	struct si_state_dsa *old_dsa = sctx->queued.named.dsa;
1273848b8605Smrg        struct si_state_dsa *dsa = state;
1274848b8605Smrg
1275b8e80941Smrg        if (!state)
1276848b8605Smrg                return;
1277848b8605Smrg
1278848b8605Smrg	si_pm4_bind_state(sctx, dsa, dsa);
1279b8e80941Smrg
1280b8e80941Smrg	if (memcmp(&dsa->stencil_ref, &sctx->stencil_ref.dsa_part,
1281b8e80941Smrg		   sizeof(struct si_dsa_stencil_ref_part)) != 0) {
1282b8e80941Smrg		sctx->stencil_ref.dsa_part = dsa->stencil_ref;
1283b8e80941Smrg		si_mark_atom_dirty(sctx, &sctx->atoms.s.stencil_ref);
1284b8e80941Smrg	}
1285b8e80941Smrg
1286b8e80941Smrg	if (!old_dsa || old_dsa->alpha_func != dsa->alpha_func)
1287b8e80941Smrg		sctx->do_update_shaders = true;
1288b8e80941Smrg
1289b8e80941Smrg	if (sctx->screen->dpbb_allowed &&
1290b8e80941Smrg	    (!old_dsa ||
1291b8e80941Smrg	     (old_dsa->depth_enabled != dsa->depth_enabled ||
1292b8e80941Smrg	      old_dsa->stencil_enabled != dsa->stencil_enabled ||
1293b8e80941Smrg	      old_dsa->db_can_write != dsa->db_can_write)))
1294b8e80941Smrg		si_mark_atom_dirty(sctx, &sctx->atoms.s.dpbb_state);
1295b8e80941Smrg
1296b8e80941Smrg	if (sctx->screen->has_out_of_order_rast &&
1297b8e80941Smrg	    (!old_dsa ||
1298b8e80941Smrg	     memcmp(old_dsa->order_invariance, dsa->order_invariance,
1299b8e80941Smrg		    sizeof(old_dsa->order_invariance))))
1300b8e80941Smrg		si_mark_atom_dirty(sctx, &sctx->atoms.s.msaa_config);
1301848b8605Smrg}
1302848b8605Smrg
1303848b8605Smrgstatic void si_delete_dsa_state(struct pipe_context *ctx, void *state)
1304848b8605Smrg{
1305848b8605Smrg	struct si_context *sctx = (struct si_context *)ctx;
1306848b8605Smrg	si_pm4_delete_state(sctx, dsa, (struct si_state_dsa *)state);
1307848b8605Smrg}
1308848b8605Smrg
1309b8e80941Smrgstatic void *si_create_db_flush_dsa(struct si_context *sctx)
1310b8e80941Smrg{
1311b8e80941Smrg	struct pipe_depth_stencil_alpha_state dsa = {};
1312b8e80941Smrg
1313b8e80941Smrg	return sctx->b.create_depth_stencil_alpha_state(&sctx->b, &dsa);
1314b8e80941Smrg}
1315b8e80941Smrg
1316b8e80941Smrg/* DB RENDER STATE */
1317b8e80941Smrg
1318b8e80941Smrgstatic void si_set_active_query_state(struct pipe_context *ctx, boolean enable)
1319b8e80941Smrg{
1320b8e80941Smrg	struct si_context *sctx = (struct si_context*)ctx;
1321b8e80941Smrg
1322b8e80941Smrg	/* Pipeline stat & streamout queries. */
1323b8e80941Smrg	if (enable) {
1324b8e80941Smrg		sctx->flags &= ~SI_CONTEXT_STOP_PIPELINE_STATS;
1325b8e80941Smrg		sctx->flags |= SI_CONTEXT_START_PIPELINE_STATS;
1326b8e80941Smrg	} else {
1327b8e80941Smrg		sctx->flags &= ~SI_CONTEXT_START_PIPELINE_STATS;
1328b8e80941Smrg		sctx->flags |= SI_CONTEXT_STOP_PIPELINE_STATS;
1329b8e80941Smrg	}
1330b8e80941Smrg
1331b8e80941Smrg	/* Occlusion queries. */
1332b8e80941Smrg	if (sctx->occlusion_queries_disabled != !enable) {
1333b8e80941Smrg		sctx->occlusion_queries_disabled = !enable;
1334b8e80941Smrg		si_mark_atom_dirty(sctx, &sctx->atoms.s.db_render_state);
1335b8e80941Smrg	}
1336b8e80941Smrg}
1337b8e80941Smrg
1338b8e80941Smrgvoid si_set_occlusion_query_state(struct si_context *sctx,
1339b8e80941Smrg				  bool old_perfect_enable)
1340848b8605Smrg{
1341b8e80941Smrg	si_mark_atom_dirty(sctx, &sctx->atoms.s.db_render_state);
1342848b8605Smrg
1343b8e80941Smrg	bool perfect_enable = sctx->num_perfect_occlusion_queries != 0;
1344848b8605Smrg
1345b8e80941Smrg	if (perfect_enable != old_perfect_enable)
1346b8e80941Smrg		si_mark_atom_dirty(sctx, &sctx->atoms.s.msaa_config);
1347b8e80941Smrg}
1348b8e80941Smrg
1349b8e80941Smrgvoid si_save_qbo_state(struct si_context *sctx, struct si_qbo_state *st)
1350b8e80941Smrg{
1351b8e80941Smrg	st->saved_compute = sctx->cs_shader_state.program;
1352b8e80941Smrg
1353b8e80941Smrg	si_get_pipe_constant_buffer(sctx, PIPE_SHADER_COMPUTE, 0, &st->saved_const0);
1354b8e80941Smrg	si_get_shader_buffers(sctx, PIPE_SHADER_COMPUTE, 0, 3, st->saved_ssbo);
1355b8e80941Smrg
1356b8e80941Smrg	st->saved_ssbo_writable_mask = 0;
1357b8e80941Smrg
1358b8e80941Smrg	for (unsigned i = 0; i < 3; i++) {
1359b8e80941Smrg		if (sctx->const_and_shader_buffers[PIPE_SHADER_COMPUTE].writable_mask &
1360b8e80941Smrg		    (1u << si_get_shaderbuf_slot(i)))
1361b8e80941Smrg			st->saved_ssbo_writable_mask |= 1 << i;
1362b8e80941Smrg	}
1363b8e80941Smrg}
1364b8e80941Smrg
1365b8e80941Smrgstatic void si_emit_db_render_state(struct si_context *sctx)
1366b8e80941Smrg{
1367b8e80941Smrg	struct si_state_rasterizer *rs = sctx->queued.named.rasterizer;
1368b8e80941Smrg	unsigned db_shader_control, db_render_control, db_count_control;
1369b8e80941Smrg	unsigned initial_cdw = sctx->gfx_cs->current.cdw;
1370b8e80941Smrg
1371b8e80941Smrg	/* DB_RENDER_CONTROL */
1372b8e80941Smrg	if (sctx->dbcb_depth_copy_enabled ||
1373b8e80941Smrg	    sctx->dbcb_stencil_copy_enabled) {
1374b8e80941Smrg		db_render_control =
1375b8e80941Smrg			S_028000_DEPTH_COPY(sctx->dbcb_depth_copy_enabled) |
1376b8e80941Smrg			S_028000_STENCIL_COPY(sctx->dbcb_stencil_copy_enabled) |
1377b8e80941Smrg			S_028000_COPY_CENTROID(1) |
1378b8e80941Smrg			S_028000_COPY_SAMPLE(sctx->dbcb_copy_sample);
1379b8e80941Smrg	} else if (sctx->db_flush_depth_inplace || sctx->db_flush_stencil_inplace) {
1380b8e80941Smrg		db_render_control =
1381b8e80941Smrg			S_028000_DEPTH_COMPRESS_DISABLE(sctx->db_flush_depth_inplace) |
1382b8e80941Smrg			S_028000_STENCIL_COMPRESS_DISABLE(sctx->db_flush_stencil_inplace);
1383b8e80941Smrg	} else {
1384b8e80941Smrg		db_render_control =
1385b8e80941Smrg			S_028000_DEPTH_CLEAR_ENABLE(sctx->db_depth_clear) |
1386b8e80941Smrg			S_028000_STENCIL_CLEAR_ENABLE(sctx->db_stencil_clear);
1387b8e80941Smrg	}
1388b8e80941Smrg
1389b8e80941Smrg	/* DB_COUNT_CONTROL (occlusion queries) */
1390b8e80941Smrg	if (sctx->num_occlusion_queries > 0 &&
1391b8e80941Smrg	    !sctx->occlusion_queries_disabled) {
1392b8e80941Smrg		bool perfect = sctx->num_perfect_occlusion_queries > 0;
1393b8e80941Smrg
1394b8e80941Smrg		if (sctx->chip_class >= CIK) {
1395b8e80941Smrg			unsigned log_sample_rate = sctx->framebuffer.log_samples;
1396b8e80941Smrg
1397b8e80941Smrg			/* Stoney doesn't increment occlusion query counters
1398b8e80941Smrg			 * if the sample rate is 16x. Use 8x sample rate instead.
1399b8e80941Smrg			 */
1400b8e80941Smrg			if (sctx->family == CHIP_STONEY)
1401b8e80941Smrg				log_sample_rate = MIN2(log_sample_rate, 3);
1402b8e80941Smrg
1403b8e80941Smrg			db_count_control =
1404b8e80941Smrg				S_028004_PERFECT_ZPASS_COUNTS(perfect) |
1405b8e80941Smrg				S_028004_SAMPLE_RATE(log_sample_rate) |
1406b8e80941Smrg				S_028004_ZPASS_ENABLE(1) |
1407b8e80941Smrg				S_028004_SLICE_EVEN_ENABLE(1) |
1408b8e80941Smrg				S_028004_SLICE_ODD_ENABLE(1);
1409b8e80941Smrg		} else {
1410b8e80941Smrg			db_count_control =
1411b8e80941Smrg				S_028004_PERFECT_ZPASS_COUNTS(perfect) |
1412b8e80941Smrg				S_028004_SAMPLE_RATE(sctx->framebuffer.log_samples);
1413b8e80941Smrg		}
1414848b8605Smrg	} else {
1415b8e80941Smrg		/* Disable occlusion queries. */
1416b8e80941Smrg		if (sctx->chip_class >= CIK) {
1417b8e80941Smrg			db_count_control = 0;
1418b8e80941Smrg		} else {
1419b8e80941Smrg			db_count_control = S_028004_ZPASS_INCREMENT_DISABLE(1);
1420b8e80941Smrg		}
1421b8e80941Smrg	}
1422b8e80941Smrg
1423b8e80941Smrg	radeon_opt_set_context_reg2(sctx, R_028000_DB_RENDER_CONTROL,
1424b8e80941Smrg				    SI_TRACKED_DB_RENDER_CONTROL, db_render_control,
1425b8e80941Smrg				    db_count_control);
1426b8e80941Smrg
1427b8e80941Smrg	/* DB_RENDER_OVERRIDE2 */
1428b8e80941Smrg	radeon_opt_set_context_reg(sctx,  R_028010_DB_RENDER_OVERRIDE2,
1429b8e80941Smrg		SI_TRACKED_DB_RENDER_OVERRIDE2,
1430b8e80941Smrg		S_028010_DISABLE_ZMASK_EXPCLEAR_OPTIMIZATION(sctx->db_depth_disable_expclear) |
1431b8e80941Smrg		S_028010_DISABLE_SMEM_EXPCLEAR_OPTIMIZATION(sctx->db_stencil_disable_expclear) |
1432b8e80941Smrg		S_028010_DECOMPRESS_Z_ON_FLUSH(sctx->framebuffer.nr_samples >= 4));
1433b8e80941Smrg
1434b8e80941Smrg	db_shader_control = sctx->ps_db_shader_control;
1435b8e80941Smrg
1436b8e80941Smrg	/* Bug workaround for smoothing (overrasterization) on SI. */
1437b8e80941Smrg	if (sctx->chip_class == SI && sctx->smoothing_enabled) {
1438b8e80941Smrg		db_shader_control &= C_02880C_Z_ORDER;
1439b8e80941Smrg		db_shader_control |= S_02880C_Z_ORDER(V_02880C_LATE_Z);
1440848b8605Smrg	}
1441848b8605Smrg
1442b8e80941Smrg	/* Disable the gl_SampleMask fragment shader output if MSAA is disabled. */
1443b8e80941Smrg	if (!rs->multisample_enable)
1444b8e80941Smrg		db_shader_control &= C_02880C_MASK_EXPORT_ENABLE;
1445b8e80941Smrg
1446b8e80941Smrg	if (sctx->screen->has_rbplus &&
1447b8e80941Smrg	    !sctx->screen->rbplus_allowed)
1448b8e80941Smrg		db_shader_control |= S_02880C_DUAL_QUAD_DISABLE(1);
1449b8e80941Smrg
1450b8e80941Smrg	radeon_opt_set_context_reg(sctx, R_02880C_DB_SHADER_CONTROL,
1451b8e80941Smrg				   SI_TRACKED_DB_SHADER_CONTROL, db_shader_control);
1452b8e80941Smrg
1453b8e80941Smrg	if (initial_cdw != sctx->gfx_cs->current.cdw)
1454b8e80941Smrg		sctx->context_roll = true;
1455848b8605Smrg}
1456848b8605Smrg
1457848b8605Smrg/*
1458848b8605Smrg * format translation
1459848b8605Smrg */
1460848b8605Smrgstatic uint32_t si_translate_colorformat(enum pipe_format format)
1461848b8605Smrg{
1462848b8605Smrg	const struct util_format_description *desc = util_format_description(format);
1463b8e80941Smrg	if (!desc)
1464b8e80941Smrg		return V_028C70_COLOR_INVALID;
1465848b8605Smrg
1466848b8605Smrg#define HAS_SIZE(x,y,z,w) \
1467848b8605Smrg	(desc->channel[0].size == (x) && desc->channel[1].size == (y) && \
1468848b8605Smrg         desc->channel[2].size == (z) && desc->channel[3].size == (w))
1469848b8605Smrg
1470848b8605Smrg	if (format == PIPE_FORMAT_R11G11B10_FLOAT) /* isn't plain */
1471848b8605Smrg		return V_028C70_COLOR_10_11_11;
1472848b8605Smrg
1473848b8605Smrg	if (desc->layout != UTIL_FORMAT_LAYOUT_PLAIN)
1474848b8605Smrg		return V_028C70_COLOR_INVALID;
1475848b8605Smrg
1476b8e80941Smrg	/* hw cannot support mixed formats (except depth/stencil, since
1477b8e80941Smrg	 * stencil is not written to). */
1478b8e80941Smrg	if (desc->is_mixed && desc->colorspace != UTIL_FORMAT_COLORSPACE_ZS)
1479b8e80941Smrg		return V_028C70_COLOR_INVALID;
1480b8e80941Smrg
1481848b8605Smrg	switch (desc->nr_channels) {
1482848b8605Smrg	case 1:
1483848b8605Smrg		switch (desc->channel[0].size) {
1484848b8605Smrg		case 8:
1485848b8605Smrg			return V_028C70_COLOR_8;
1486848b8605Smrg		case 16:
1487848b8605Smrg			return V_028C70_COLOR_16;
1488848b8605Smrg		case 32:
1489848b8605Smrg			return V_028C70_COLOR_32;
1490848b8605Smrg		}
1491848b8605Smrg		break;
1492848b8605Smrg	case 2:
1493848b8605Smrg		if (desc->channel[0].size == desc->channel[1].size) {
1494848b8605Smrg			switch (desc->channel[0].size) {
1495848b8605Smrg			case 8:
1496848b8605Smrg				return V_028C70_COLOR_8_8;
1497848b8605Smrg			case 16:
1498848b8605Smrg				return V_028C70_COLOR_16_16;
1499848b8605Smrg			case 32:
1500848b8605Smrg				return V_028C70_COLOR_32_32;
1501848b8605Smrg			}
1502848b8605Smrg		} else if (HAS_SIZE(8,24,0,0)) {
1503848b8605Smrg			return V_028C70_COLOR_24_8;
1504848b8605Smrg		} else if (HAS_SIZE(24,8,0,0)) {
1505848b8605Smrg			return V_028C70_COLOR_8_24;
1506848b8605Smrg		}
1507848b8605Smrg		break;
1508848b8605Smrg	case 3:
1509848b8605Smrg		if (HAS_SIZE(5,6,5,0)) {
1510848b8605Smrg			return V_028C70_COLOR_5_6_5;
1511848b8605Smrg		} else if (HAS_SIZE(32,8,24,0)) {
1512848b8605Smrg			return V_028C70_COLOR_X24_8_32_FLOAT;
1513848b8605Smrg		}
1514848b8605Smrg		break;
1515848b8605Smrg	case 4:
1516848b8605Smrg		if (desc->channel[0].size == desc->channel[1].size &&
1517848b8605Smrg		    desc->channel[0].size == desc->channel[2].size &&
1518848b8605Smrg		    desc->channel[0].size == desc->channel[3].size) {
1519848b8605Smrg			switch (desc->channel[0].size) {
1520848b8605Smrg			case 4:
1521848b8605Smrg				return V_028C70_COLOR_4_4_4_4;
1522848b8605Smrg			case 8:
1523848b8605Smrg				return V_028C70_COLOR_8_8_8_8;
1524848b8605Smrg			case 16:
1525848b8605Smrg				return V_028C70_COLOR_16_16_16_16;
1526848b8605Smrg			case 32:
1527848b8605Smrg				return V_028C70_COLOR_32_32_32_32;
1528848b8605Smrg			}
1529848b8605Smrg		} else if (HAS_SIZE(5,5,5,1)) {
1530848b8605Smrg			return V_028C70_COLOR_1_5_5_5;
1531b8e80941Smrg		} else if (HAS_SIZE(1,5,5,5)) {
1532b8e80941Smrg			return V_028C70_COLOR_5_5_5_1;
1533848b8605Smrg		} else if (HAS_SIZE(10,10,10,2)) {
1534848b8605Smrg			return V_028C70_COLOR_2_10_10_10;
1535848b8605Smrg		}
1536848b8605Smrg		break;
1537848b8605Smrg	}
1538848b8605Smrg	return V_028C70_COLOR_INVALID;
1539848b8605Smrg}
1540848b8605Smrg
1541848b8605Smrgstatic uint32_t si_colorformat_endian_swap(uint32_t colorformat)
1542848b8605Smrg{
1543848b8605Smrg	if (SI_BIG_ENDIAN) {
1544848b8605Smrg		switch(colorformat) {
1545848b8605Smrg		/* 8-bit buffers. */
1546848b8605Smrg		case V_028C70_COLOR_8:
1547848b8605Smrg			return V_028C70_ENDIAN_NONE;
1548848b8605Smrg
1549848b8605Smrg		/* 16-bit buffers. */
1550848b8605Smrg		case V_028C70_COLOR_5_6_5:
1551848b8605Smrg		case V_028C70_COLOR_1_5_5_5:
1552848b8605Smrg		case V_028C70_COLOR_4_4_4_4:
1553848b8605Smrg		case V_028C70_COLOR_16:
1554848b8605Smrg		case V_028C70_COLOR_8_8:
1555848b8605Smrg			return V_028C70_ENDIAN_8IN16;
1556848b8605Smrg
1557848b8605Smrg		/* 32-bit buffers. */
1558848b8605Smrg		case V_028C70_COLOR_8_8_8_8:
1559848b8605Smrg		case V_028C70_COLOR_2_10_10_10:
1560848b8605Smrg		case V_028C70_COLOR_8_24:
1561848b8605Smrg		case V_028C70_COLOR_24_8:
1562848b8605Smrg		case V_028C70_COLOR_16_16:
1563848b8605Smrg			return V_028C70_ENDIAN_8IN32;
1564848b8605Smrg
1565848b8605Smrg		/* 64-bit buffers. */
1566848b8605Smrg		case V_028C70_COLOR_16_16_16_16:
1567848b8605Smrg			return V_028C70_ENDIAN_8IN16;
1568848b8605Smrg
1569848b8605Smrg		case V_028C70_COLOR_32_32:
1570848b8605Smrg			return V_028C70_ENDIAN_8IN32;
1571848b8605Smrg
1572848b8605Smrg		/* 128-bit buffers. */
1573848b8605Smrg		case V_028C70_COLOR_32_32_32_32:
1574848b8605Smrg			return V_028C70_ENDIAN_8IN32;
1575848b8605Smrg		default:
1576848b8605Smrg			return V_028C70_ENDIAN_NONE; /* Unsupported. */
1577848b8605Smrg		}
1578848b8605Smrg	} else {
1579848b8605Smrg		return V_028C70_ENDIAN_NONE;
1580848b8605Smrg	}
1581848b8605Smrg}
1582848b8605Smrg
1583848b8605Smrgstatic uint32_t si_translate_dbformat(enum pipe_format format)
1584848b8605Smrg{
1585848b8605Smrg	switch (format) {
1586848b8605Smrg	case PIPE_FORMAT_Z16_UNORM:
1587848b8605Smrg		return V_028040_Z_16;
1588848b8605Smrg	case PIPE_FORMAT_S8_UINT_Z24_UNORM:
1589848b8605Smrg	case PIPE_FORMAT_X8Z24_UNORM:
1590848b8605Smrg	case PIPE_FORMAT_Z24X8_UNORM:
1591848b8605Smrg	case PIPE_FORMAT_Z24_UNORM_S8_UINT:
1592848b8605Smrg		return V_028040_Z_24; /* deprecated on SI */
1593848b8605Smrg	case PIPE_FORMAT_Z32_FLOAT:
1594848b8605Smrg	case PIPE_FORMAT_Z32_FLOAT_S8X24_UINT:
1595848b8605Smrg		return V_028040_Z_32_FLOAT;
1596848b8605Smrg	default:
1597848b8605Smrg		return V_028040_Z_INVALID;
1598848b8605Smrg	}
1599848b8605Smrg}
1600848b8605Smrg
1601848b8605Smrg/*
1602848b8605Smrg * Texture translation
1603848b8605Smrg */
1604848b8605Smrg
1605848b8605Smrgstatic uint32_t si_translate_texformat(struct pipe_screen *screen,
1606848b8605Smrg				       enum pipe_format format,
1607848b8605Smrg				       const struct util_format_description *desc,
1608848b8605Smrg				       int first_non_void)
1609848b8605Smrg{
1610848b8605Smrg	struct si_screen *sscreen = (struct si_screen*)screen;
1611b8e80941Smrg	bool uniform = true;
1612848b8605Smrg	int i;
1613848b8605Smrg
1614848b8605Smrg	/* Colorspace (return non-RGB formats directly). */
1615848b8605Smrg	switch (desc->colorspace) {
1616848b8605Smrg	/* Depth stencil formats */
1617848b8605Smrg	case UTIL_FORMAT_COLORSPACE_ZS:
1618848b8605Smrg		switch (format) {
1619848b8605Smrg		case PIPE_FORMAT_Z16_UNORM:
1620848b8605Smrg			return V_008F14_IMG_DATA_FORMAT_16;
1621848b8605Smrg		case PIPE_FORMAT_X24S8_UINT:
1622b8e80941Smrg		case PIPE_FORMAT_S8X24_UINT:
1623b8e80941Smrg			/*
1624b8e80941Smrg			 * Implemented as an 8_8_8_8 data format to fix texture
1625b8e80941Smrg			 * gathers in stencil sampling. This affects at least
1626b8e80941Smrg			 * GL45-CTS.texture_cube_map_array.sampling on VI.
1627b8e80941Smrg			 */
1628b8e80941Smrg			if (sscreen->info.chip_class <= VI)
1629b8e80941Smrg				return V_008F14_IMG_DATA_FORMAT_8_8_8_8;
1630b8e80941Smrg
1631b8e80941Smrg			if (format == PIPE_FORMAT_X24S8_UINT)
1632b8e80941Smrg				return V_008F14_IMG_DATA_FORMAT_8_24;
1633b8e80941Smrg			else
1634b8e80941Smrg				return V_008F14_IMG_DATA_FORMAT_24_8;
1635848b8605Smrg		case PIPE_FORMAT_Z24X8_UNORM:
1636848b8605Smrg		case PIPE_FORMAT_Z24_UNORM_S8_UINT:
1637848b8605Smrg			return V_008F14_IMG_DATA_FORMAT_8_24;
1638848b8605Smrg		case PIPE_FORMAT_X8Z24_UNORM:
1639848b8605Smrg		case PIPE_FORMAT_S8_UINT_Z24_UNORM:
1640848b8605Smrg			return V_008F14_IMG_DATA_FORMAT_24_8;
1641848b8605Smrg		case PIPE_FORMAT_S8_UINT:
1642848b8605Smrg			return V_008F14_IMG_DATA_FORMAT_8;
1643848b8605Smrg		case PIPE_FORMAT_Z32_FLOAT:
1644848b8605Smrg			return V_008F14_IMG_DATA_FORMAT_32;
1645848b8605Smrg		case PIPE_FORMAT_X32_S8X24_UINT:
1646848b8605Smrg		case PIPE_FORMAT_Z32_FLOAT_S8X24_UINT:
1647848b8605Smrg			return V_008F14_IMG_DATA_FORMAT_X24_8_32;
1648848b8605Smrg		default:
1649848b8605Smrg			goto out_unknown;
1650848b8605Smrg		}
1651848b8605Smrg
1652848b8605Smrg	case UTIL_FORMAT_COLORSPACE_YUV:
1653848b8605Smrg		goto out_unknown; /* TODO */
1654848b8605Smrg
1655848b8605Smrg	case UTIL_FORMAT_COLORSPACE_SRGB:
1656848b8605Smrg		if (desc->nr_channels != 4 && desc->nr_channels != 1)
1657848b8605Smrg			goto out_unknown;
1658848b8605Smrg		break;
1659848b8605Smrg
1660848b8605Smrg	default:
1661848b8605Smrg		break;
1662848b8605Smrg	}
1663848b8605Smrg
1664848b8605Smrg	if (desc->layout == UTIL_FORMAT_LAYOUT_RGTC) {
1665b8e80941Smrg		if (!sscreen->info.has_format_bc1_through_bc7)
1666848b8605Smrg			goto out_unknown;
1667848b8605Smrg
1668848b8605Smrg		switch (format) {
1669848b8605Smrg		case PIPE_FORMAT_RGTC1_SNORM:
1670848b8605Smrg		case PIPE_FORMAT_LATC1_SNORM:
1671848b8605Smrg		case PIPE_FORMAT_RGTC1_UNORM:
1672848b8605Smrg		case PIPE_FORMAT_LATC1_UNORM:
1673848b8605Smrg			return V_008F14_IMG_DATA_FORMAT_BC4;
1674848b8605Smrg		case PIPE_FORMAT_RGTC2_SNORM:
1675848b8605Smrg		case PIPE_FORMAT_LATC2_SNORM:
1676848b8605Smrg		case PIPE_FORMAT_RGTC2_UNORM:
1677848b8605Smrg		case PIPE_FORMAT_LATC2_UNORM:
1678848b8605Smrg			return V_008F14_IMG_DATA_FORMAT_BC5;
1679848b8605Smrg		default:
1680848b8605Smrg			goto out_unknown;
1681848b8605Smrg		}
1682848b8605Smrg	}
1683848b8605Smrg
1684b8e80941Smrg	if (desc->layout == UTIL_FORMAT_LAYOUT_ETC &&
1685b8e80941Smrg	    (sscreen->info.family == CHIP_STONEY ||
1686b8e80941Smrg	     sscreen->info.family == CHIP_VEGA10 ||
1687b8e80941Smrg	     sscreen->info.family == CHIP_RAVEN)) {
1688b8e80941Smrg		switch (format) {
1689b8e80941Smrg		case PIPE_FORMAT_ETC1_RGB8:
1690b8e80941Smrg		case PIPE_FORMAT_ETC2_RGB8:
1691b8e80941Smrg		case PIPE_FORMAT_ETC2_SRGB8:
1692b8e80941Smrg			return V_008F14_IMG_DATA_FORMAT_ETC2_RGB;
1693b8e80941Smrg		case PIPE_FORMAT_ETC2_RGB8A1:
1694b8e80941Smrg		case PIPE_FORMAT_ETC2_SRGB8A1:
1695b8e80941Smrg			return V_008F14_IMG_DATA_FORMAT_ETC2_RGBA1;
1696b8e80941Smrg		case PIPE_FORMAT_ETC2_RGBA8:
1697b8e80941Smrg		case PIPE_FORMAT_ETC2_SRGBA8:
1698b8e80941Smrg			return V_008F14_IMG_DATA_FORMAT_ETC2_RGBA;
1699b8e80941Smrg		case PIPE_FORMAT_ETC2_R11_UNORM:
1700b8e80941Smrg		case PIPE_FORMAT_ETC2_R11_SNORM:
1701b8e80941Smrg			return V_008F14_IMG_DATA_FORMAT_ETC2_R;
1702b8e80941Smrg		case PIPE_FORMAT_ETC2_RG11_UNORM:
1703b8e80941Smrg		case PIPE_FORMAT_ETC2_RG11_SNORM:
1704b8e80941Smrg			return V_008F14_IMG_DATA_FORMAT_ETC2_RG;
1705b8e80941Smrg		default:
1706b8e80941Smrg			goto out_unknown;
1707b8e80941Smrg		}
1708b8e80941Smrg	}
1709b8e80941Smrg
1710848b8605Smrg	if (desc->layout == UTIL_FORMAT_LAYOUT_BPTC) {
1711b8e80941Smrg		if (!sscreen->info.has_format_bc1_through_bc7)
1712848b8605Smrg			goto out_unknown;
1713848b8605Smrg
1714848b8605Smrg		switch (format) {
1715848b8605Smrg		case PIPE_FORMAT_BPTC_RGBA_UNORM:
1716848b8605Smrg		case PIPE_FORMAT_BPTC_SRGBA:
1717848b8605Smrg			return V_008F14_IMG_DATA_FORMAT_BC7;
1718848b8605Smrg		case PIPE_FORMAT_BPTC_RGB_FLOAT:
1719848b8605Smrg		case PIPE_FORMAT_BPTC_RGB_UFLOAT:
1720848b8605Smrg			return V_008F14_IMG_DATA_FORMAT_BC6;
1721848b8605Smrg		default:
1722848b8605Smrg			goto out_unknown;
1723848b8605Smrg		}
1724848b8605Smrg	}
1725848b8605Smrg
1726848b8605Smrg	if (desc->layout == UTIL_FORMAT_LAYOUT_SUBSAMPLED) {
1727848b8605Smrg		switch (format) {
1728848b8605Smrg		case PIPE_FORMAT_R8G8_B8G8_UNORM:
1729848b8605Smrg		case PIPE_FORMAT_G8R8_B8R8_UNORM:
1730848b8605Smrg			return V_008F14_IMG_DATA_FORMAT_GB_GR;
1731848b8605Smrg		case PIPE_FORMAT_G8R8_G8B8_UNORM:
1732848b8605Smrg		case PIPE_FORMAT_R8G8_R8B8_UNORM:
1733848b8605Smrg			return V_008F14_IMG_DATA_FORMAT_BG_RG;
1734848b8605Smrg		default:
1735848b8605Smrg			goto out_unknown;
1736848b8605Smrg		}
1737848b8605Smrg	}
1738848b8605Smrg
1739848b8605Smrg	if (desc->layout == UTIL_FORMAT_LAYOUT_S3TC) {
1740b8e80941Smrg		if (!sscreen->info.has_format_bc1_through_bc7)
1741848b8605Smrg			goto out_unknown;
1742848b8605Smrg
1743848b8605Smrg		switch (format) {
1744848b8605Smrg		case PIPE_FORMAT_DXT1_RGB:
1745848b8605Smrg		case PIPE_FORMAT_DXT1_RGBA:
1746848b8605Smrg		case PIPE_FORMAT_DXT1_SRGB:
1747848b8605Smrg		case PIPE_FORMAT_DXT1_SRGBA:
1748848b8605Smrg			return V_008F14_IMG_DATA_FORMAT_BC1;
1749848b8605Smrg		case PIPE_FORMAT_DXT3_RGBA:
1750848b8605Smrg		case PIPE_FORMAT_DXT3_SRGBA:
1751848b8605Smrg			return V_008F14_IMG_DATA_FORMAT_BC2;
1752848b8605Smrg		case PIPE_FORMAT_DXT5_RGBA:
1753848b8605Smrg		case PIPE_FORMAT_DXT5_SRGBA:
1754848b8605Smrg			return V_008F14_IMG_DATA_FORMAT_BC3;
1755848b8605Smrg		default:
1756848b8605Smrg			goto out_unknown;
1757848b8605Smrg		}
1758848b8605Smrg	}
1759848b8605Smrg
1760848b8605Smrg	if (format == PIPE_FORMAT_R9G9B9E5_FLOAT) {
1761848b8605Smrg		return V_008F14_IMG_DATA_FORMAT_5_9_9_9;
1762848b8605Smrg	} else if (format == PIPE_FORMAT_R11G11B10_FLOAT) {
1763848b8605Smrg		return V_008F14_IMG_DATA_FORMAT_10_11_11;
1764848b8605Smrg	}
1765848b8605Smrg
1766848b8605Smrg	/* R8G8Bx_SNORM - TODO CxV8U8 */
1767848b8605Smrg
1768b8e80941Smrg	/* hw cannot support mixed formats (except depth/stencil, since only
1769b8e80941Smrg	 * depth is read).*/
1770b8e80941Smrg	if (desc->is_mixed && desc->colorspace != UTIL_FORMAT_COLORSPACE_ZS)
1771b8e80941Smrg		goto out_unknown;
1772b8e80941Smrg
1773848b8605Smrg	/* See whether the components are of the same size. */
1774848b8605Smrg	for (i = 1; i < desc->nr_channels; i++) {
1775848b8605Smrg		uniform = uniform && desc->channel[0].size == desc->channel[i].size;
1776848b8605Smrg	}
1777848b8605Smrg
1778848b8605Smrg	/* Non-uniform formats. */
1779848b8605Smrg	if (!uniform) {
1780848b8605Smrg		switch(desc->nr_channels) {
1781848b8605Smrg		case 3:
1782848b8605Smrg			if (desc->channel[0].size == 5 &&
1783848b8605Smrg			    desc->channel[1].size == 6 &&
1784848b8605Smrg			    desc->channel[2].size == 5) {
1785848b8605Smrg				return V_008F14_IMG_DATA_FORMAT_5_6_5;
1786848b8605Smrg			}
1787848b8605Smrg			goto out_unknown;
1788848b8605Smrg		case 4:
1789848b8605Smrg			if (desc->channel[0].size == 5 &&
1790848b8605Smrg			    desc->channel[1].size == 5 &&
1791848b8605Smrg			    desc->channel[2].size == 5 &&
1792848b8605Smrg			    desc->channel[3].size == 1) {
1793848b8605Smrg				return V_008F14_IMG_DATA_FORMAT_1_5_5_5;
1794848b8605Smrg			}
1795b8e80941Smrg			if (desc->channel[0].size == 1 &&
1796b8e80941Smrg			    desc->channel[1].size == 5 &&
1797b8e80941Smrg			    desc->channel[2].size == 5 &&
1798b8e80941Smrg			    desc->channel[3].size == 5) {
1799b8e80941Smrg				return V_008F14_IMG_DATA_FORMAT_5_5_5_1;
1800b8e80941Smrg			}
1801848b8605Smrg			if (desc->channel[0].size == 10 &&
1802848b8605Smrg			    desc->channel[1].size == 10 &&
1803848b8605Smrg			    desc->channel[2].size == 10 &&
1804848b8605Smrg			    desc->channel[3].size == 2) {
1805848b8605Smrg				return V_008F14_IMG_DATA_FORMAT_2_10_10_10;
1806848b8605Smrg			}
1807848b8605Smrg			goto out_unknown;
1808848b8605Smrg		}
1809848b8605Smrg		goto out_unknown;
1810848b8605Smrg	}
1811848b8605Smrg
1812848b8605Smrg	if (first_non_void < 0 || first_non_void > 3)
1813848b8605Smrg		goto out_unknown;
1814848b8605Smrg
1815848b8605Smrg	/* uniform formats */
1816848b8605Smrg	switch (desc->channel[first_non_void].size) {
1817848b8605Smrg	case 4:
1818848b8605Smrg		switch (desc->nr_channels) {
1819848b8605Smrg#if 0 /* Not supported for render targets */
1820848b8605Smrg		case 2:
1821848b8605Smrg			return V_008F14_IMG_DATA_FORMAT_4_4;
1822848b8605Smrg#endif
1823848b8605Smrg		case 4:
1824848b8605Smrg			return V_008F14_IMG_DATA_FORMAT_4_4_4_4;
1825848b8605Smrg		}
1826848b8605Smrg		break;
1827848b8605Smrg	case 8:
1828848b8605Smrg		switch (desc->nr_channels) {
1829848b8605Smrg		case 1:
1830848b8605Smrg			return V_008F14_IMG_DATA_FORMAT_8;
1831848b8605Smrg		case 2:
1832848b8605Smrg			return V_008F14_IMG_DATA_FORMAT_8_8;
1833848b8605Smrg		case 4:
1834848b8605Smrg			return V_008F14_IMG_DATA_FORMAT_8_8_8_8;
1835848b8605Smrg		}
1836848b8605Smrg		break;
1837848b8605Smrg	case 16:
1838848b8605Smrg		switch (desc->nr_channels) {
1839848b8605Smrg		case 1:
1840848b8605Smrg			return V_008F14_IMG_DATA_FORMAT_16;
1841848b8605Smrg		case 2:
1842848b8605Smrg			return V_008F14_IMG_DATA_FORMAT_16_16;
1843848b8605Smrg		case 4:
1844848b8605Smrg			return V_008F14_IMG_DATA_FORMAT_16_16_16_16;
1845848b8605Smrg		}
1846848b8605Smrg		break;
1847848b8605Smrg	case 32:
1848848b8605Smrg		switch (desc->nr_channels) {
1849848b8605Smrg		case 1:
1850848b8605Smrg			return V_008F14_IMG_DATA_FORMAT_32;
1851848b8605Smrg		case 2:
1852848b8605Smrg			return V_008F14_IMG_DATA_FORMAT_32_32;
1853848b8605Smrg#if 0 /* Not supported for render targets */
1854848b8605Smrg		case 3:
1855848b8605Smrg			return V_008F14_IMG_DATA_FORMAT_32_32_32;
1856848b8605Smrg#endif
1857848b8605Smrg		case 4:
1858848b8605Smrg			return V_008F14_IMG_DATA_FORMAT_32_32_32_32;
1859848b8605Smrg		}
1860848b8605Smrg	}
1861848b8605Smrg
1862848b8605Smrgout_unknown:
1863848b8605Smrg	return ~0;
1864848b8605Smrg}
1865848b8605Smrg
1866848b8605Smrgstatic unsigned si_tex_wrap(unsigned wrap)
1867848b8605Smrg{
1868848b8605Smrg	switch (wrap) {
1869848b8605Smrg	default:
1870848b8605Smrg	case PIPE_TEX_WRAP_REPEAT:
1871848b8605Smrg		return V_008F30_SQ_TEX_WRAP;
1872848b8605Smrg	case PIPE_TEX_WRAP_CLAMP:
1873848b8605Smrg		return V_008F30_SQ_TEX_CLAMP_HALF_BORDER;
1874848b8605Smrg	case PIPE_TEX_WRAP_CLAMP_TO_EDGE:
1875848b8605Smrg		return V_008F30_SQ_TEX_CLAMP_LAST_TEXEL;
1876848b8605Smrg	case PIPE_TEX_WRAP_CLAMP_TO_BORDER:
1877848b8605Smrg		return V_008F30_SQ_TEX_CLAMP_BORDER;
1878848b8605Smrg	case PIPE_TEX_WRAP_MIRROR_REPEAT:
1879848b8605Smrg		return V_008F30_SQ_TEX_MIRROR;
1880848b8605Smrg	case PIPE_TEX_WRAP_MIRROR_CLAMP:
1881848b8605Smrg		return V_008F30_SQ_TEX_MIRROR_ONCE_HALF_BORDER;
1882848b8605Smrg	case PIPE_TEX_WRAP_MIRROR_CLAMP_TO_EDGE:
1883848b8605Smrg		return V_008F30_SQ_TEX_MIRROR_ONCE_LAST_TEXEL;
1884848b8605Smrg	case PIPE_TEX_WRAP_MIRROR_CLAMP_TO_BORDER:
1885848b8605Smrg		return V_008F30_SQ_TEX_MIRROR_ONCE_BORDER;
1886848b8605Smrg	}
1887848b8605Smrg}
1888848b8605Smrg
1889848b8605Smrgstatic unsigned si_tex_mipfilter(unsigned filter)
1890848b8605Smrg{
1891848b8605Smrg	switch (filter) {
1892848b8605Smrg	case PIPE_TEX_MIPFILTER_NEAREST:
1893848b8605Smrg		return V_008F38_SQ_TEX_Z_FILTER_POINT;
1894848b8605Smrg	case PIPE_TEX_MIPFILTER_LINEAR:
1895848b8605Smrg		return V_008F38_SQ_TEX_Z_FILTER_LINEAR;
1896848b8605Smrg	default:
1897848b8605Smrg	case PIPE_TEX_MIPFILTER_NONE:
1898848b8605Smrg		return V_008F38_SQ_TEX_Z_FILTER_NONE;
1899848b8605Smrg	}
1900848b8605Smrg}
1901848b8605Smrg
1902848b8605Smrgstatic unsigned si_tex_compare(unsigned compare)
1903848b8605Smrg{
1904848b8605Smrg	switch (compare) {
1905848b8605Smrg	default:
1906848b8605Smrg	case PIPE_FUNC_NEVER:
1907848b8605Smrg		return V_008F30_SQ_TEX_DEPTH_COMPARE_NEVER;
1908848b8605Smrg	case PIPE_FUNC_LESS:
1909848b8605Smrg		return V_008F30_SQ_TEX_DEPTH_COMPARE_LESS;
1910848b8605Smrg	case PIPE_FUNC_EQUAL:
1911848b8605Smrg		return V_008F30_SQ_TEX_DEPTH_COMPARE_EQUAL;
1912848b8605Smrg	case PIPE_FUNC_LEQUAL:
1913848b8605Smrg		return V_008F30_SQ_TEX_DEPTH_COMPARE_LESSEQUAL;
1914848b8605Smrg	case PIPE_FUNC_GREATER:
1915848b8605Smrg		return V_008F30_SQ_TEX_DEPTH_COMPARE_GREATER;
1916848b8605Smrg	case PIPE_FUNC_NOTEQUAL:
1917848b8605Smrg		return V_008F30_SQ_TEX_DEPTH_COMPARE_NOTEQUAL;
1918848b8605Smrg	case PIPE_FUNC_GEQUAL:
1919848b8605Smrg		return V_008F30_SQ_TEX_DEPTH_COMPARE_GREATEREQUAL;
1920848b8605Smrg	case PIPE_FUNC_ALWAYS:
1921848b8605Smrg		return V_008F30_SQ_TEX_DEPTH_COMPARE_ALWAYS;
1922848b8605Smrg	}
1923848b8605Smrg}
1924848b8605Smrg
1925b8e80941Smrgstatic unsigned si_tex_dim(struct si_screen *sscreen, struct si_texture *tex,
1926b8e80941Smrg			   unsigned view_target, unsigned nr_samples)
1927848b8605Smrg{
1928b8e80941Smrg	unsigned res_target = tex->buffer.b.b.target;
1929b8e80941Smrg
1930b8e80941Smrg	if (view_target == PIPE_TEXTURE_CUBE ||
1931b8e80941Smrg	    view_target == PIPE_TEXTURE_CUBE_ARRAY)
1932b8e80941Smrg		res_target = view_target;
1933b8e80941Smrg	/* If interpreting cubemaps as something else, set 2D_ARRAY. */
1934b8e80941Smrg	else if (res_target == PIPE_TEXTURE_CUBE ||
1935b8e80941Smrg		 res_target == PIPE_TEXTURE_CUBE_ARRAY)
1936b8e80941Smrg		res_target = PIPE_TEXTURE_2D_ARRAY;
1937b8e80941Smrg
1938b8e80941Smrg	/* GFX9 allocates 1D textures as 2D. */
1939b8e80941Smrg	if ((res_target == PIPE_TEXTURE_1D ||
1940b8e80941Smrg	     res_target == PIPE_TEXTURE_1D_ARRAY) &&
1941b8e80941Smrg	    sscreen->info.chip_class >= GFX9 &&
1942b8e80941Smrg	    tex->surface.u.gfx9.resource_type == RADEON_RESOURCE_2D) {
1943b8e80941Smrg		if (res_target == PIPE_TEXTURE_1D)
1944b8e80941Smrg			res_target = PIPE_TEXTURE_2D;
1945b8e80941Smrg		else
1946b8e80941Smrg			res_target = PIPE_TEXTURE_2D_ARRAY;
1947b8e80941Smrg	}
1948b8e80941Smrg
1949b8e80941Smrg	switch (res_target) {
1950848b8605Smrg	default:
1951848b8605Smrg	case PIPE_TEXTURE_1D:
1952848b8605Smrg		return V_008F1C_SQ_RSRC_IMG_1D;
1953848b8605Smrg	case PIPE_TEXTURE_1D_ARRAY:
1954848b8605Smrg		return V_008F1C_SQ_RSRC_IMG_1D_ARRAY;
1955848b8605Smrg	case PIPE_TEXTURE_2D:
1956848b8605Smrg	case PIPE_TEXTURE_RECT:
1957848b8605Smrg		return nr_samples > 1 ? V_008F1C_SQ_RSRC_IMG_2D_MSAA :
1958848b8605Smrg					V_008F1C_SQ_RSRC_IMG_2D;
1959848b8605Smrg	case PIPE_TEXTURE_2D_ARRAY:
1960848b8605Smrg		return nr_samples > 1 ? V_008F1C_SQ_RSRC_IMG_2D_MSAA_ARRAY :
1961848b8605Smrg					V_008F1C_SQ_RSRC_IMG_2D_ARRAY;
1962848b8605Smrg	case PIPE_TEXTURE_3D:
1963848b8605Smrg		return V_008F1C_SQ_RSRC_IMG_3D;
1964848b8605Smrg	case PIPE_TEXTURE_CUBE:
1965848b8605Smrg	case PIPE_TEXTURE_CUBE_ARRAY:
1966848b8605Smrg		return V_008F1C_SQ_RSRC_IMG_CUBE;
1967848b8605Smrg	}
1968848b8605Smrg}
1969848b8605Smrg
1970848b8605Smrg/*
1971848b8605Smrg * Format support testing
1972848b8605Smrg */
1973848b8605Smrg
1974848b8605Smrgstatic bool si_is_sampler_format_supported(struct pipe_screen *screen, enum pipe_format format)
1975848b8605Smrg{
1976b8e80941Smrg	const struct util_format_description *desc = util_format_description(format);
1977b8e80941Smrg	if (!desc)
1978b8e80941Smrg		return false;
1979b8e80941Smrg
1980b8e80941Smrg	return si_translate_texformat(screen, format, desc,
1981848b8605Smrg				      util_format_get_first_non_void_channel(format)) != ~0U;
1982848b8605Smrg}
1983848b8605Smrg
1984848b8605Smrgstatic uint32_t si_translate_buffer_dataformat(struct pipe_screen *screen,
1985848b8605Smrg					       const struct util_format_description *desc,
1986848b8605Smrg					       int first_non_void)
1987848b8605Smrg{
1988848b8605Smrg	int i;
1989848b8605Smrg
1990848b8605Smrg	if (desc->format == PIPE_FORMAT_R11G11B10_FLOAT)
1991848b8605Smrg		return V_008F0C_BUF_DATA_FORMAT_10_11_11;
1992848b8605Smrg
1993b8e80941Smrg	assert(first_non_void >= 0);
1994b8e80941Smrg
1995848b8605Smrg	if (desc->nr_channels == 4 &&
1996848b8605Smrg	    desc->channel[0].size == 10 &&
1997848b8605Smrg	    desc->channel[1].size == 10 &&
1998848b8605Smrg	    desc->channel[2].size == 10 &&
1999848b8605Smrg	    desc->channel[3].size == 2)
2000848b8605Smrg		return V_008F0C_BUF_DATA_FORMAT_2_10_10_10;
2001848b8605Smrg
2002848b8605Smrg	/* See whether the components are of the same size. */
2003848b8605Smrg	for (i = 0; i < desc->nr_channels; i++) {
2004848b8605Smrg		if (desc->channel[first_non_void].size != desc->channel[i].size)
2005848b8605Smrg			return V_008F0C_BUF_DATA_FORMAT_INVALID;
2006848b8605Smrg	}
2007848b8605Smrg
2008848b8605Smrg	switch (desc->channel[first_non_void].size) {
2009848b8605Smrg	case 8:
2010848b8605Smrg		switch (desc->nr_channels) {
2011848b8605Smrg		case 1:
2012b8e80941Smrg		case 3: /* 3 loads */
2013848b8605Smrg			return V_008F0C_BUF_DATA_FORMAT_8;
2014848b8605Smrg		case 2:
2015848b8605Smrg			return V_008F0C_BUF_DATA_FORMAT_8_8;
2016848b8605Smrg		case 4:
2017848b8605Smrg			return V_008F0C_BUF_DATA_FORMAT_8_8_8_8;
2018848b8605Smrg		}
2019848b8605Smrg		break;
2020848b8605Smrg	case 16:
2021848b8605Smrg		switch (desc->nr_channels) {
2022848b8605Smrg		case 1:
2023b8e80941Smrg		case 3: /* 3 loads */
2024848b8605Smrg			return V_008F0C_BUF_DATA_FORMAT_16;
2025848b8605Smrg		case 2:
2026848b8605Smrg			return V_008F0C_BUF_DATA_FORMAT_16_16;
2027848b8605Smrg		case 4:
2028848b8605Smrg			return V_008F0C_BUF_DATA_FORMAT_16_16_16_16;
2029848b8605Smrg		}
2030848b8605Smrg		break;
2031848b8605Smrg	case 32:
2032848b8605Smrg		switch (desc->nr_channels) {
2033848b8605Smrg		case 1:
2034848b8605Smrg			return V_008F0C_BUF_DATA_FORMAT_32;
2035848b8605Smrg		case 2:
2036848b8605Smrg			return V_008F0C_BUF_DATA_FORMAT_32_32;
2037848b8605Smrg		case 3:
2038848b8605Smrg			return V_008F0C_BUF_DATA_FORMAT_32_32_32;
2039848b8605Smrg		case 4:
2040848b8605Smrg			return V_008F0C_BUF_DATA_FORMAT_32_32_32_32;
2041848b8605Smrg		}
2042848b8605Smrg		break;
2043b8e80941Smrg	case 64:
2044b8e80941Smrg		/* Legacy double formats. */
2045b8e80941Smrg		switch (desc->nr_channels) {
2046b8e80941Smrg		case 1: /* 1 load */
2047b8e80941Smrg			return V_008F0C_BUF_DATA_FORMAT_32_32;
2048b8e80941Smrg		case 2: /* 1 load */
2049b8e80941Smrg			return V_008F0C_BUF_DATA_FORMAT_32_32_32_32;
2050b8e80941Smrg		case 3: /* 3 loads */
2051b8e80941Smrg			return V_008F0C_BUF_DATA_FORMAT_32_32;
2052b8e80941Smrg		case 4: /* 2 loads */
2053b8e80941Smrg			return V_008F0C_BUF_DATA_FORMAT_32_32_32_32;
2054b8e80941Smrg		}
2055b8e80941Smrg		break;
2056848b8605Smrg	}
2057848b8605Smrg
2058848b8605Smrg	return V_008F0C_BUF_DATA_FORMAT_INVALID;
2059848b8605Smrg}
2060848b8605Smrg
2061848b8605Smrgstatic uint32_t si_translate_buffer_numformat(struct pipe_screen *screen,
2062848b8605Smrg					      const struct util_format_description *desc,
2063848b8605Smrg					      int first_non_void)
2064848b8605Smrg{
2065848b8605Smrg	if (desc->format == PIPE_FORMAT_R11G11B10_FLOAT)
2066848b8605Smrg		return V_008F0C_BUF_NUM_FORMAT_FLOAT;
2067848b8605Smrg
2068b8e80941Smrg	assert(first_non_void >= 0);
2069b8e80941Smrg
2070848b8605Smrg	switch (desc->channel[first_non_void].type) {
2071848b8605Smrg	case UTIL_FORMAT_TYPE_SIGNED:
2072b8e80941Smrg	case UTIL_FORMAT_TYPE_FIXED:
2073b8e80941Smrg		if (desc->channel[first_non_void].size >= 32 ||
2074b8e80941Smrg		    desc->channel[first_non_void].pure_integer)
2075848b8605Smrg			return V_008F0C_BUF_NUM_FORMAT_SINT;
2076b8e80941Smrg		else if (desc->channel[first_non_void].normalized)
2077b8e80941Smrg			return V_008F0C_BUF_NUM_FORMAT_SNORM;
2078848b8605Smrg		else
2079848b8605Smrg			return V_008F0C_BUF_NUM_FORMAT_SSCALED;
2080848b8605Smrg		break;
2081848b8605Smrg	case UTIL_FORMAT_TYPE_UNSIGNED:
2082b8e80941Smrg		if (desc->channel[first_non_void].size >= 32 ||
2083b8e80941Smrg		    desc->channel[first_non_void].pure_integer)
2084848b8605Smrg			return V_008F0C_BUF_NUM_FORMAT_UINT;
2085b8e80941Smrg		else if (desc->channel[first_non_void].normalized)
2086b8e80941Smrg			return V_008F0C_BUF_NUM_FORMAT_UNORM;
2087848b8605Smrg		else
2088848b8605Smrg			return V_008F0C_BUF_NUM_FORMAT_USCALED;
2089848b8605Smrg		break;
2090848b8605Smrg	case UTIL_FORMAT_TYPE_FLOAT:
2091848b8605Smrg	default:
2092848b8605Smrg		return V_008F0C_BUF_NUM_FORMAT_FLOAT;
2093848b8605Smrg	}
2094848b8605Smrg}
2095848b8605Smrg
2096b8e80941Smrgstatic unsigned si_is_vertex_format_supported(struct pipe_screen *screen,
2097b8e80941Smrg					      enum pipe_format format,
2098b8e80941Smrg					      unsigned usage)
2099848b8605Smrg{
2100848b8605Smrg	const struct util_format_description *desc;
2101848b8605Smrg	int first_non_void;
2102848b8605Smrg	unsigned data_format;
2103848b8605Smrg
2104b8e80941Smrg	assert((usage & ~(PIPE_BIND_SHADER_IMAGE |
2105b8e80941Smrg			  PIPE_BIND_SAMPLER_VIEW |
2106b8e80941Smrg			  PIPE_BIND_VERTEX_BUFFER)) == 0);
2107b8e80941Smrg
2108848b8605Smrg	desc = util_format_description(format);
2109b8e80941Smrg	if (!desc)
2110b8e80941Smrg		return 0;
2111b8e80941Smrg
2112b8e80941Smrg	/* There are no native 8_8_8 or 16_16_16 data formats, and we currently
2113b8e80941Smrg	 * select 8_8_8_8 and 16_16_16_16 instead. This works reasonably well
2114b8e80941Smrg	 * for read-only access (with caveats surrounding bounds checks), but
2115b8e80941Smrg	 * obviously fails for write access which we have to implement for
2116b8e80941Smrg	 * shader images. Luckily, OpenGL doesn't expect this to be supported
2117b8e80941Smrg	 * anyway, and so the only impact is on PBO uploads / downloads, which
2118b8e80941Smrg	 * shouldn't be expected to be fast for GL_RGB anyway.
2119b8e80941Smrg	 */
2120b8e80941Smrg	if (desc->block.bits == 3 * 8 ||
2121b8e80941Smrg	    desc->block.bits == 3 * 16) {
2122b8e80941Smrg		if (usage & (PIPE_BIND_SHADER_IMAGE | PIPE_BIND_SAMPLER_VIEW)) {
2123b8e80941Smrg		    usage &= ~(PIPE_BIND_SHADER_IMAGE | PIPE_BIND_SAMPLER_VIEW);
2124b8e80941Smrg			if (!usage)
2125b8e80941Smrg				return 0;
2126b8e80941Smrg		}
2127b8e80941Smrg	}
2128b8e80941Smrg
2129848b8605Smrg	first_non_void = util_format_get_first_non_void_channel(format);
2130848b8605Smrg	data_format = si_translate_buffer_dataformat(screen, desc, first_non_void);
2131b8e80941Smrg	if (data_format == V_008F0C_BUF_DATA_FORMAT_INVALID)
2132b8e80941Smrg		return 0;
2133b8e80941Smrg
2134b8e80941Smrg	return usage;
2135848b8605Smrg}
2136848b8605Smrg
2137848b8605Smrgstatic bool si_is_colorbuffer_format_supported(enum pipe_format format)
2138848b8605Smrg{
2139848b8605Smrg	return si_translate_colorformat(format) != V_028C70_COLOR_INVALID &&
2140b8e80941Smrg		si_translate_colorswap(format, false) != ~0U;
2141848b8605Smrg}
2142848b8605Smrg
2143848b8605Smrgstatic bool si_is_zs_format_supported(enum pipe_format format)
2144848b8605Smrg{
2145848b8605Smrg	return si_translate_dbformat(format) != V_028040_Z_INVALID;
2146848b8605Smrg}
2147848b8605Smrg
2148b8e80941Smrgstatic boolean si_is_format_supported(struct pipe_screen *screen,
2149b8e80941Smrg				      enum pipe_format format,
2150b8e80941Smrg				      enum pipe_texture_target target,
2151b8e80941Smrg				      unsigned sample_count,
2152b8e80941Smrg				      unsigned storage_sample_count,
2153b8e80941Smrg				      unsigned usage)
2154848b8605Smrg{
2155848b8605Smrg	struct si_screen *sscreen = (struct si_screen *)screen;
2156848b8605Smrg	unsigned retval = 0;
2157848b8605Smrg
2158848b8605Smrg	if (target >= PIPE_MAX_TEXTURE_TYPES) {
2159b8e80941Smrg		PRINT_ERR("radeonsi: unsupported texture type %d\n", target);
2160b8e80941Smrg		return false;
2161848b8605Smrg	}
2162848b8605Smrg
2163b8e80941Smrg	if (MAX2(1, sample_count) < MAX2(1, storage_sample_count))
2164b8e80941Smrg		return false;
2165848b8605Smrg
2166848b8605Smrg	if (sample_count > 1) {
2167b8e80941Smrg		if (!screen->get_param(screen, PIPE_CAP_TEXTURE_MULTISAMPLE))
2168b8e80941Smrg			return false;
2169b8e80941Smrg
2170b8e80941Smrg		if (usage & PIPE_BIND_SHADER_IMAGE)
2171b8e80941Smrg			return false;
2172b8e80941Smrg
2173b8e80941Smrg		/* Only power-of-two sample counts are supported. */
2174b8e80941Smrg		if (!util_is_power_of_two_or_zero(sample_count) ||
2175b8e80941Smrg		    !util_is_power_of_two_or_zero(storage_sample_count))
2176b8e80941Smrg			return false;
2177b8e80941Smrg
2178b8e80941Smrg		/* MSAA support without framebuffer attachments. */
2179b8e80941Smrg		if (format == PIPE_FORMAT_NONE && sample_count <= 16)
2180b8e80941Smrg			return true;
2181b8e80941Smrg
2182b8e80941Smrg		if (!sscreen->info.has_eqaa_surface_allocator ||
2183b8e80941Smrg		    util_format_is_depth_or_stencil(format)) {
2184b8e80941Smrg			/* Color without EQAA or depth/stencil. */
2185b8e80941Smrg			if (sample_count > 8 ||
2186b8e80941Smrg			    sample_count != storage_sample_count)
2187b8e80941Smrg				return false;
2188b8e80941Smrg		} else {
2189b8e80941Smrg			/* Color with EQAA. */
2190b8e80941Smrg			if (sample_count > 16 ||
2191b8e80941Smrg			    storage_sample_count > 8)
2192b8e80941Smrg				return false;
2193848b8605Smrg		}
2194848b8605Smrg	}
2195848b8605Smrg
2196b8e80941Smrg	if (usage & (PIPE_BIND_SAMPLER_VIEW |
2197b8e80941Smrg		     PIPE_BIND_SHADER_IMAGE)) {
2198848b8605Smrg		if (target == PIPE_BUFFER) {
2199b8e80941Smrg			retval |= si_is_vertex_format_supported(
2200b8e80941Smrg				screen, format, usage & (PIPE_BIND_SAMPLER_VIEW |
2201b8e80941Smrg						         PIPE_BIND_SHADER_IMAGE));
2202848b8605Smrg		} else {
2203848b8605Smrg			if (si_is_sampler_format_supported(screen, format))
2204b8e80941Smrg				retval |= usage & (PIPE_BIND_SAMPLER_VIEW |
2205b8e80941Smrg						   PIPE_BIND_SHADER_IMAGE);
2206848b8605Smrg		}
2207848b8605Smrg	}
2208848b8605Smrg
2209848b8605Smrg	if ((usage & (PIPE_BIND_RENDER_TARGET |
2210848b8605Smrg		      PIPE_BIND_DISPLAY_TARGET |
2211848b8605Smrg		      PIPE_BIND_SCANOUT |
2212b8e80941Smrg		      PIPE_BIND_SHARED |
2213b8e80941Smrg		      PIPE_BIND_BLENDABLE)) &&
2214848b8605Smrg	    si_is_colorbuffer_format_supported(format)) {
2215848b8605Smrg		retval |= usage &
2216848b8605Smrg			  (PIPE_BIND_RENDER_TARGET |
2217848b8605Smrg			   PIPE_BIND_DISPLAY_TARGET |
2218848b8605Smrg			   PIPE_BIND_SCANOUT |
2219848b8605Smrg			   PIPE_BIND_SHARED);
2220b8e80941Smrg		if (!util_format_is_pure_integer(format) &&
2221b8e80941Smrg		    !util_format_is_depth_or_stencil(format))
2222b8e80941Smrg			retval |= usage & PIPE_BIND_BLENDABLE;
2223848b8605Smrg	}
2224848b8605Smrg
2225848b8605Smrg	if ((usage & PIPE_BIND_DEPTH_STENCIL) &&
2226848b8605Smrg	    si_is_zs_format_supported(format)) {
2227848b8605Smrg		retval |= PIPE_BIND_DEPTH_STENCIL;
2228848b8605Smrg	}
2229848b8605Smrg
2230b8e80941Smrg	if (usage & PIPE_BIND_VERTEX_BUFFER) {
2231b8e80941Smrg		retval |= si_is_vertex_format_supported(screen, format,
2232b8e80941Smrg							PIPE_BIND_VERTEX_BUFFER);
2233848b8605Smrg	}
2234848b8605Smrg
2235b8e80941Smrg	if ((usage & PIPE_BIND_LINEAR) &&
2236b8e80941Smrg	    !util_format_is_compressed(format) &&
2237b8e80941Smrg	    !(usage & PIPE_BIND_DEPTH_STENCIL))
2238b8e80941Smrg		retval |= PIPE_BIND_LINEAR;
2239848b8605Smrg
2240848b8605Smrg	return retval == usage;
2241848b8605Smrg}
2242848b8605Smrg
2243b8e80941Smrg/*
2244b8e80941Smrg * framebuffer handling
2245b8e80941Smrg */
2246b8e80941Smrg
2247b8e80941Smrgstatic void si_choose_spi_color_formats(struct si_surface *surf,
2248b8e80941Smrg					unsigned format, unsigned swap,
2249b8e80941Smrg					unsigned ntype, bool is_depth)
2250848b8605Smrg{
2251b8e80941Smrg	/* Alpha is needed for alpha-to-coverage.
2252b8e80941Smrg	 * Blending may be with or without alpha.
2253b8e80941Smrg	 */
2254b8e80941Smrg	unsigned normal = 0; /* most optimal, may not support blending or export alpha */
2255b8e80941Smrg	unsigned alpha = 0; /* exports alpha, but may not support blending */
2256b8e80941Smrg	unsigned blend = 0; /* supports blending, but may not export alpha */
2257b8e80941Smrg	unsigned blend_alpha = 0; /* least optimal, supports blending and exports alpha */
2258b8e80941Smrg
2259b8e80941Smrg	/* Choose the SPI color formats. These are required values for RB+.
2260b8e80941Smrg	 * Other chips have multiple choices, though they are not necessarily better.
2261b8e80941Smrg	 */
2262b8e80941Smrg	switch (format) {
2263b8e80941Smrg	case V_028C70_COLOR_5_6_5:
2264b8e80941Smrg	case V_028C70_COLOR_1_5_5_5:
2265b8e80941Smrg	case V_028C70_COLOR_5_5_5_1:
2266b8e80941Smrg	case V_028C70_COLOR_4_4_4_4:
2267b8e80941Smrg	case V_028C70_COLOR_10_11_11:
2268b8e80941Smrg	case V_028C70_COLOR_11_11_10:
2269b8e80941Smrg	case V_028C70_COLOR_8:
2270b8e80941Smrg	case V_028C70_COLOR_8_8:
2271b8e80941Smrg	case V_028C70_COLOR_8_8_8_8:
2272b8e80941Smrg	case V_028C70_COLOR_10_10_10_2:
2273b8e80941Smrg	case V_028C70_COLOR_2_10_10_10:
2274b8e80941Smrg		if (ntype == V_028C70_NUMBER_UINT)
2275b8e80941Smrg			alpha = blend = blend_alpha = normal = V_028714_SPI_SHADER_UINT16_ABGR;
2276b8e80941Smrg		else if (ntype == V_028C70_NUMBER_SINT)
2277b8e80941Smrg			alpha = blend = blend_alpha = normal = V_028714_SPI_SHADER_SINT16_ABGR;
2278b8e80941Smrg		else
2279b8e80941Smrg			alpha = blend = blend_alpha = normal = V_028714_SPI_SHADER_FP16_ABGR;
2280b8e80941Smrg		break;
2281848b8605Smrg
2282b8e80941Smrg	case V_028C70_COLOR_16:
2283b8e80941Smrg	case V_028C70_COLOR_16_16:
2284b8e80941Smrg	case V_028C70_COLOR_16_16_16_16:
2285b8e80941Smrg		if (ntype == V_028C70_NUMBER_UNORM ||
2286b8e80941Smrg		    ntype == V_028C70_NUMBER_SNORM) {
2287b8e80941Smrg			/* UNORM16 and SNORM16 don't support blending */
2288b8e80941Smrg			if (ntype == V_028C70_NUMBER_UNORM)
2289b8e80941Smrg				normal = alpha = V_028714_SPI_SHADER_UNORM16_ABGR;
2290b8e80941Smrg			else
2291b8e80941Smrg				normal = alpha = V_028714_SPI_SHADER_SNORM16_ABGR;
2292b8e80941Smrg
2293b8e80941Smrg			/* Use 32 bits per channel for blending. */
2294b8e80941Smrg			if (format == V_028C70_COLOR_16) {
2295b8e80941Smrg				if (swap == V_028C70_SWAP_STD) { /* R */
2296b8e80941Smrg					blend = V_028714_SPI_SHADER_32_R;
2297b8e80941Smrg					blend_alpha = V_028714_SPI_SHADER_32_AR;
2298b8e80941Smrg				} else if (swap == V_028C70_SWAP_ALT_REV) /* A */
2299b8e80941Smrg					blend = blend_alpha = V_028714_SPI_SHADER_32_AR;
2300b8e80941Smrg				else
2301b8e80941Smrg					assert(0);
2302b8e80941Smrg			} else if (format == V_028C70_COLOR_16_16) {
2303b8e80941Smrg				if (swap == V_028C70_SWAP_STD) { /* RG */
2304b8e80941Smrg					blend = V_028714_SPI_SHADER_32_GR;
2305b8e80941Smrg					blend_alpha = V_028714_SPI_SHADER_32_ABGR;
2306b8e80941Smrg				} else if (swap == V_028C70_SWAP_ALT) /* RA */
2307b8e80941Smrg					blend = blend_alpha = V_028714_SPI_SHADER_32_AR;
2308b8e80941Smrg				else
2309b8e80941Smrg					assert(0);
2310b8e80941Smrg			} else /* 16_16_16_16 */
2311b8e80941Smrg				blend = blend_alpha = V_028714_SPI_SHADER_32_ABGR;
2312b8e80941Smrg		} else if (ntype == V_028C70_NUMBER_UINT)
2313b8e80941Smrg			alpha = blend = blend_alpha = normal = V_028714_SPI_SHADER_UINT16_ABGR;
2314b8e80941Smrg		else if (ntype == V_028C70_NUMBER_SINT)
2315b8e80941Smrg			alpha = blend = blend_alpha = normal = V_028714_SPI_SHADER_SINT16_ABGR;
2316b8e80941Smrg		else if (ntype == V_028C70_NUMBER_FLOAT)
2317b8e80941Smrg			alpha = blend = blend_alpha = normal = V_028714_SPI_SHADER_FP16_ABGR;
2318b8e80941Smrg		else
2319b8e80941Smrg			assert(0);
2320b8e80941Smrg		break;
2321b8e80941Smrg
2322b8e80941Smrg	case V_028C70_COLOR_32:
2323b8e80941Smrg		if (swap == V_028C70_SWAP_STD) { /* R */
2324b8e80941Smrg			blend = normal = V_028714_SPI_SHADER_32_R;
2325b8e80941Smrg			alpha = blend_alpha = V_028714_SPI_SHADER_32_AR;
2326b8e80941Smrg		} else if (swap == V_028C70_SWAP_ALT_REV) /* A */
2327b8e80941Smrg			alpha = blend = blend_alpha = normal = V_028714_SPI_SHADER_32_AR;
2328b8e80941Smrg		else
2329b8e80941Smrg			assert(0);
2330b8e80941Smrg		break;
2331b8e80941Smrg
2332b8e80941Smrg	case V_028C70_COLOR_32_32:
2333b8e80941Smrg		if (swap == V_028C70_SWAP_STD) { /* RG */
2334b8e80941Smrg			blend = normal = V_028714_SPI_SHADER_32_GR;
2335b8e80941Smrg			alpha = blend_alpha = V_028714_SPI_SHADER_32_ABGR;
2336b8e80941Smrg		} else if (swap == V_028C70_SWAP_ALT) /* RA */
2337b8e80941Smrg			alpha = blend = blend_alpha = normal = V_028714_SPI_SHADER_32_AR;
2338b8e80941Smrg		else
2339b8e80941Smrg			assert(0);
2340b8e80941Smrg		break;
2341b8e80941Smrg
2342b8e80941Smrg	case V_028C70_COLOR_32_32_32_32:
2343b8e80941Smrg	case V_028C70_COLOR_8_24:
2344b8e80941Smrg	case V_028C70_COLOR_24_8:
2345b8e80941Smrg	case V_028C70_COLOR_X24_8_32_FLOAT:
2346b8e80941Smrg		alpha = blend = blend_alpha = normal = V_028714_SPI_SHADER_32_ABGR;
2347b8e80941Smrg		break;
2348b8e80941Smrg
2349b8e80941Smrg	default:
2350b8e80941Smrg		assert(0);
2351b8e80941Smrg		return;
2352848b8605Smrg	}
2353848b8605Smrg
2354b8e80941Smrg	/* The DB->CB copy needs 32_ABGR. */
2355b8e80941Smrg	if (is_depth)
2356b8e80941Smrg		alpha = blend = blend_alpha = normal = V_028714_SPI_SHADER_32_ABGR;
2357b8e80941Smrg
2358b8e80941Smrg	surf->spi_shader_col_format = normal;
2359b8e80941Smrg	surf->spi_shader_col_format_alpha = alpha;
2360b8e80941Smrg	surf->spi_shader_col_format_blend = blend;
2361b8e80941Smrg	surf->spi_shader_col_format_blend_alpha = blend_alpha;
2362b8e80941Smrg}
2363848b8605Smrg
2364848b8605Smrgstatic void si_initialize_color_surface(struct si_context *sctx,
2365b8e80941Smrg					struct si_surface *surf)
2366848b8605Smrg{
2367b8e80941Smrg	struct si_texture *tex = (struct si_texture*)surf->base.texture;
2368b8e80941Smrg	unsigned color_info, color_attrib;
2369848b8605Smrg	unsigned format, swap, ntype, endian;
2370848b8605Smrg	const struct util_format_description *desc;
2371b8e80941Smrg	int firstchan;
2372848b8605Smrg	unsigned blend_clamp = 0, blend_bypass = 0;
2373848b8605Smrg
2374848b8605Smrg	desc = util_format_description(surf->base.format);
2375b8e80941Smrg	for (firstchan = 0; firstchan < 4; firstchan++) {
2376b8e80941Smrg		if (desc->channel[firstchan].type != UTIL_FORMAT_TYPE_VOID) {
2377848b8605Smrg			break;
2378848b8605Smrg		}
2379848b8605Smrg	}
2380b8e80941Smrg	if (firstchan == 4 || desc->channel[firstchan].type == UTIL_FORMAT_TYPE_FLOAT) {
2381848b8605Smrg		ntype = V_028C70_NUMBER_FLOAT;
2382848b8605Smrg	} else {
2383848b8605Smrg		ntype = V_028C70_NUMBER_UNORM;
2384848b8605Smrg		if (desc->colorspace == UTIL_FORMAT_COLORSPACE_SRGB)
2385848b8605Smrg			ntype = V_028C70_NUMBER_SRGB;
2386b8e80941Smrg		else if (desc->channel[firstchan].type == UTIL_FORMAT_TYPE_SIGNED) {
2387b8e80941Smrg			if (desc->channel[firstchan].pure_integer) {
2388848b8605Smrg				ntype = V_028C70_NUMBER_SINT;
2389848b8605Smrg			} else {
2390b8e80941Smrg				assert(desc->channel[firstchan].normalized);
2391848b8605Smrg				ntype = V_028C70_NUMBER_SNORM;
2392848b8605Smrg			}
2393b8e80941Smrg		} else if (desc->channel[firstchan].type == UTIL_FORMAT_TYPE_UNSIGNED) {
2394b8e80941Smrg			if (desc->channel[firstchan].pure_integer) {
2395848b8605Smrg				ntype = V_028C70_NUMBER_UINT;
2396848b8605Smrg			} else {
2397b8e80941Smrg				assert(desc->channel[firstchan].normalized);
2398848b8605Smrg				ntype = V_028C70_NUMBER_UNORM;
2399848b8605Smrg			}
2400848b8605Smrg		}
2401848b8605Smrg	}
2402848b8605Smrg
2403848b8605Smrg	format = si_translate_colorformat(surf->base.format);
2404848b8605Smrg	if (format == V_028C70_COLOR_INVALID) {
2405b8e80941Smrg		PRINT_ERR("Invalid CB format: %d, disabling CB.\n", surf->base.format);
2406848b8605Smrg	}
2407848b8605Smrg	assert(format != V_028C70_COLOR_INVALID);
2408b8e80941Smrg	swap = si_translate_colorswap(surf->base.format, false);
2409b8e80941Smrg	endian = si_colorformat_endian_swap(format);
2410848b8605Smrg
2411848b8605Smrg	/* blend clamp should be set for all NORM/SRGB types */
2412848b8605Smrg	if (ntype == V_028C70_NUMBER_UNORM ||
2413848b8605Smrg	    ntype == V_028C70_NUMBER_SNORM ||
2414848b8605Smrg	    ntype == V_028C70_NUMBER_SRGB)
2415848b8605Smrg		blend_clamp = 1;
2416848b8605Smrg
2417848b8605Smrg	/* set blend bypass according to docs if SINT/UINT or
2418848b8605Smrg	   8/24 COLOR variants */
2419848b8605Smrg	if (ntype == V_028C70_NUMBER_UINT || ntype == V_028C70_NUMBER_SINT ||
2420848b8605Smrg	    format == V_028C70_COLOR_8_24 || format == V_028C70_COLOR_24_8 ||
2421848b8605Smrg	    format == V_028C70_COLOR_X24_8_32_FLOAT) {
2422848b8605Smrg		blend_clamp = 0;
2423848b8605Smrg		blend_bypass = 1;
2424848b8605Smrg	}
2425848b8605Smrg
2426b8e80941Smrg	if (ntype == V_028C70_NUMBER_UINT || ntype == V_028C70_NUMBER_SINT) {
2427b8e80941Smrg		if (format == V_028C70_COLOR_8 ||
2428b8e80941Smrg		    format == V_028C70_COLOR_8_8 ||
2429b8e80941Smrg		    format == V_028C70_COLOR_8_8_8_8)
2430b8e80941Smrg			surf->color_is_int8 = true;
2431b8e80941Smrg		else if (format == V_028C70_COLOR_10_10_10_2 ||
2432b8e80941Smrg			 format == V_028C70_COLOR_2_10_10_10)
2433b8e80941Smrg			surf->color_is_int10 = true;
2434b8e80941Smrg	}
2435b8e80941Smrg
2436848b8605Smrg	color_info = S_028C70_FORMAT(format) |
2437848b8605Smrg		S_028C70_COMP_SWAP(swap) |
2438848b8605Smrg		S_028C70_BLEND_CLAMP(blend_clamp) |
2439848b8605Smrg		S_028C70_BLEND_BYPASS(blend_bypass) |
2440b8e80941Smrg		S_028C70_SIMPLE_FLOAT(1) |
2441b8e80941Smrg		S_028C70_ROUND_MODE(ntype != V_028C70_NUMBER_UNORM &&
2442b8e80941Smrg				    ntype != V_028C70_NUMBER_SNORM &&
2443b8e80941Smrg				    ntype != V_028C70_NUMBER_SRGB &&
2444b8e80941Smrg				    format != V_028C70_COLOR_8_24 &&
2445b8e80941Smrg				    format != V_028C70_COLOR_24_8) |
2446848b8605Smrg		S_028C70_NUMBER_TYPE(ntype) |
2447848b8605Smrg		S_028C70_ENDIAN(endian);
2448848b8605Smrg
2449b8e80941Smrg	/* Intensity is implemented as Red, so treat it that way. */
2450b8e80941Smrg	color_attrib = S_028C74_FORCE_DST_ALPHA_1(desc->swizzle[3] == PIPE_SWIZZLE_1 ||
2451b8e80941Smrg						  util_format_is_intensity(surf->base.format));
2452848b8605Smrg
2453b8e80941Smrg	if (tex->buffer.b.b.nr_samples > 1) {
2454b8e80941Smrg		unsigned log_samples = util_logbase2(tex->buffer.b.b.nr_samples);
2455b8e80941Smrg		unsigned log_fragments = util_logbase2(tex->buffer.b.b.nr_storage_samples);
2456848b8605Smrg
2457848b8605Smrg		color_attrib |= S_028C74_NUM_SAMPLES(log_samples) |
2458b8e80941Smrg				S_028C74_NUM_FRAGMENTS(log_fragments);
2459848b8605Smrg
2460b8e80941Smrg		if (tex->surface.fmask_size) {
2461848b8605Smrg			color_info |= S_028C70_COMPRESSION(1);
2462b8e80941Smrg			unsigned fmask_bankh = util_logbase2(tex->surface.u.legacy.fmask.bankh);
2463848b8605Smrg
2464b8e80941Smrg			if (sctx->chip_class == SI) {
2465848b8605Smrg				/* due to a hw bug, FMASK_BANK_HEIGHT must be set on SI too */
2466848b8605Smrg				color_attrib |= S_028C74_FMASK_BANK_HEIGHT(fmask_bankh);
2467848b8605Smrg			}
2468848b8605Smrg		}
2469848b8605Smrg	}
2470848b8605Smrg
2471b8e80941Smrg	if (sctx->chip_class >= VI) {
2472b8e80941Smrg		unsigned max_uncompressed_block_size = V_028C78_MAX_BLOCK_SIZE_256B;
2473b8e80941Smrg		unsigned min_compressed_block_size = V_028C78_MIN_BLOCK_SIZE_32B;
2474b8e80941Smrg
2475b8e80941Smrg		/* amdvlk: [min-compressed-block-size] should be set to 32 for dGPU and
2476b8e80941Smrg		   64 for APU because all of our APUs to date use DIMMs which have
2477b8e80941Smrg		   a request granularity size of 64B while all other chips have a
2478b8e80941Smrg		   32B request size */
2479b8e80941Smrg		if (!sctx->screen->info.has_dedicated_vram)
2480b8e80941Smrg			min_compressed_block_size = V_028C78_MIN_BLOCK_SIZE_64B;
2481b8e80941Smrg
2482b8e80941Smrg		if (tex->buffer.b.b.nr_storage_samples > 1) {
2483b8e80941Smrg			if (tex->surface.bpe == 1)
2484b8e80941Smrg				max_uncompressed_block_size = V_028C78_MAX_BLOCK_SIZE_64B;
2485b8e80941Smrg			else if (tex->surface.bpe == 2)
2486b8e80941Smrg				max_uncompressed_block_size = V_028C78_MAX_BLOCK_SIZE_128B;
2487b8e80941Smrg		}
2488848b8605Smrg
2489b8e80941Smrg		surf->cb_dcc_control = S_028C78_MAX_UNCOMPRESSED_BLOCK_SIZE(max_uncompressed_block_size) |
2490b8e80941Smrg				       S_028C78_MIN_COMPRESSED_BLOCK_SIZE(min_compressed_block_size) |
2491b8e80941Smrg		                       S_028C78_INDEPENDENT_64B_BLOCKS(1);
2492b8e80941Smrg	}
2493848b8605Smrg
2494b8e80941Smrg	/* This must be set for fast clear to work without FMASK. */
2495b8e80941Smrg	if (!tex->surface.fmask_size && sctx->chip_class == SI) {
2496b8e80941Smrg		unsigned bankh = util_logbase2(tex->surface.u.legacy.bankh);
2497b8e80941Smrg		color_attrib |= S_028C74_FMASK_BANK_HEIGHT(bankh);
2498b8e80941Smrg	}
2499848b8605Smrg
2500b8e80941Smrg	unsigned color_view = S_028C6C_SLICE_START(surf->base.u.tex.first_layer) |
2501b8e80941Smrg			      S_028C6C_SLICE_MAX(surf->base.u.tex.last_layer);
2502848b8605Smrg
2503b8e80941Smrg	if (sctx->chip_class >= GFX9) {
2504b8e80941Smrg		unsigned mip0_depth = util_max_layer(&tex->buffer.b.b, 0);
2505b8e80941Smrg
2506b8e80941Smrg		color_view |= S_028C6C_MIP_LEVEL(surf->base.u.tex.level);
2507b8e80941Smrg		color_attrib |= S_028C74_MIP0_DEPTH(mip0_depth) |
2508b8e80941Smrg				S_028C74_RESOURCE_TYPE(tex->surface.u.gfx9.resource_type);
2509b8e80941Smrg		surf->cb_color_attrib2 = S_028C68_MIP0_WIDTH(surf->width0 - 1) |
2510b8e80941Smrg					 S_028C68_MIP0_HEIGHT(surf->height0 - 1) |
2511b8e80941Smrg					 S_028C68_MAX_MIP(tex->buffer.b.b.last_level);
2512848b8605Smrg	}
2513848b8605Smrg
2514b8e80941Smrg	surf->cb_color_view = color_view;
2515b8e80941Smrg	surf->cb_color_info = color_info;
2516b8e80941Smrg	surf->cb_color_attrib = color_attrib;
2517b8e80941Smrg
2518848b8605Smrg	/* Determine pixel shader export format */
2519b8e80941Smrg	si_choose_spi_color_formats(surf, format, swap, ntype, tex->is_depth);
2520848b8605Smrg
2521848b8605Smrg	surf->color_initialized = true;
2522848b8605Smrg}
2523848b8605Smrg
2524848b8605Smrgstatic void si_init_depth_surface(struct si_context *sctx,
2525b8e80941Smrg				  struct si_surface *surf)
2526848b8605Smrg{
2527b8e80941Smrg	struct si_texture *tex = (struct si_texture*)surf->base.texture;
2528848b8605Smrg	unsigned level = surf->base.u.tex.level;
2529b8e80941Smrg	unsigned format, stencil_format;
2530b8e80941Smrg	uint32_t z_info, s_info;
2531848b8605Smrg
2532b8e80941Smrg	format = si_translate_dbformat(tex->db_render_format);
2533b8e80941Smrg	stencil_format = tex->surface.has_stencil ?
2534b8e80941Smrg				 V_028044_STENCIL_8 : V_028044_STENCIL_INVALID;
2535848b8605Smrg
2536b8e80941Smrg	assert(format != V_028040_Z_INVALID);
2537b8e80941Smrg	if (format == V_028040_Z_INVALID)
2538b8e80941Smrg		PRINT_ERR("Invalid DB format: %d, disabling DB.\n", tex->buffer.b.b.format);
2539848b8605Smrg
2540b8e80941Smrg	surf->db_depth_view = S_028008_SLICE_START(surf->base.u.tex.first_layer) |
2541b8e80941Smrg			      S_028008_SLICE_MAX(surf->base.u.tex.last_layer);
2542b8e80941Smrg	surf->db_htile_data_base = 0;
2543b8e80941Smrg	surf->db_htile_surface = 0;
2544b8e80941Smrg
2545b8e80941Smrg	if (sctx->chip_class >= GFX9) {
2546b8e80941Smrg		assert(tex->surface.u.gfx9.surf_offset == 0);
2547b8e80941Smrg		surf->db_depth_base = tex->buffer.gpu_address >> 8;
2548b8e80941Smrg		surf->db_stencil_base = (tex->buffer.gpu_address +
2549b8e80941Smrg					 tex->surface.u.gfx9.stencil_offset) >> 8;
2550b8e80941Smrg		z_info = S_028038_FORMAT(format) |
2551b8e80941Smrg			 S_028038_NUM_SAMPLES(util_logbase2(tex->buffer.b.b.nr_samples)) |
2552b8e80941Smrg			 S_028038_SW_MODE(tex->surface.u.gfx9.surf.swizzle_mode) |
2553b8e80941Smrg			 S_028038_MAXMIP(tex->buffer.b.b.last_level);
2554b8e80941Smrg		s_info = S_02803C_FORMAT(stencil_format) |
2555b8e80941Smrg			 S_02803C_SW_MODE(tex->surface.u.gfx9.stencil.swizzle_mode);
2556b8e80941Smrg		surf->db_z_info2 = S_028068_EPITCH(tex->surface.u.gfx9.surf.epitch);
2557b8e80941Smrg		surf->db_stencil_info2 = S_02806C_EPITCH(tex->surface.u.gfx9.stencil.epitch);
2558b8e80941Smrg		surf->db_depth_view |= S_028008_MIPID(level);
2559b8e80941Smrg		surf->db_depth_size = S_02801C_X_MAX(tex->buffer.b.b.width0 - 1) |
2560b8e80941Smrg				      S_02801C_Y_MAX(tex->buffer.b.b.height0 - 1);
2561b8e80941Smrg
2562b8e80941Smrg		if (si_htile_enabled(tex, level)) {
2563b8e80941Smrg			z_info |= S_028038_TILE_SURFACE_ENABLE(1) |
2564b8e80941Smrg				  S_028038_ALLOW_EXPCLEAR(1);
2565b8e80941Smrg
2566b8e80941Smrg			if (tex->tc_compatible_htile) {
2567b8e80941Smrg				unsigned max_zplanes = 4;
2568b8e80941Smrg
2569b8e80941Smrg				if (tex->db_render_format == PIPE_FORMAT_Z16_UNORM &&
2570b8e80941Smrg				    tex->buffer.b.b.nr_samples > 1)
2571b8e80941Smrg					max_zplanes = 2;
2572b8e80941Smrg
2573b8e80941Smrg				z_info |= S_028038_DECOMPRESS_ON_N_ZPLANES(max_zplanes + 1) |
2574b8e80941Smrg					  S_028038_ITERATE_FLUSH(1);
2575b8e80941Smrg				s_info |= S_02803C_ITERATE_FLUSH(1);
2576b8e80941Smrg			}
2577848b8605Smrg
2578b8e80941Smrg			if (tex->surface.has_stencil) {
2579b8e80941Smrg				/* Stencil buffer workaround ported from the SI-CI-VI code.
2580b8e80941Smrg				 * See that for explanation.
2581b8e80941Smrg				 */
2582b8e80941Smrg				s_info |= S_02803C_ALLOW_EXPCLEAR(tex->buffer.b.b.nr_samples <= 1);
2583b8e80941Smrg			} else {
2584b8e80941Smrg				/* Use all HTILE for depth if there's no stencil. */
2585b8e80941Smrg				s_info |= S_02803C_TILE_STENCIL_DISABLE(1);
2586b8e80941Smrg			}
2587848b8605Smrg
2588b8e80941Smrg			surf->db_htile_data_base = (tex->buffer.gpu_address +
2589b8e80941Smrg						    tex->htile_offset) >> 8;
2590b8e80941Smrg			surf->db_htile_surface = S_028ABC_FULL_CACHE(1) |
2591b8e80941Smrg						 S_028ABC_PIPE_ALIGNED(tex->surface.u.gfx9.htile.pipe_aligned) |
2592b8e80941Smrg						 S_028ABC_RB_ALIGNED(tex->surface.u.gfx9.htile.rb_aligned);
2593b8e80941Smrg		}
2594b8e80941Smrg	} else {
2595b8e80941Smrg		/* SI-CI-VI */
2596b8e80941Smrg		struct legacy_surf_level *levelinfo = &tex->surface.u.legacy.level[level];
2597b8e80941Smrg
2598b8e80941Smrg		assert(levelinfo->nblk_x % 8 == 0 && levelinfo->nblk_y % 8 == 0);
2599b8e80941Smrg
2600b8e80941Smrg		surf->db_depth_base = (tex->buffer.gpu_address +
2601b8e80941Smrg				       tex->surface.u.legacy.level[level].offset) >> 8;
2602b8e80941Smrg		surf->db_stencil_base = (tex->buffer.gpu_address +
2603b8e80941Smrg					 tex->surface.u.legacy.stencil_level[level].offset) >> 8;
2604b8e80941Smrg
2605b8e80941Smrg		z_info = S_028040_FORMAT(format) |
2606b8e80941Smrg			 S_028040_NUM_SAMPLES(util_logbase2(tex->buffer.b.b.nr_samples));
2607b8e80941Smrg		s_info = S_028044_FORMAT(stencil_format);
2608b8e80941Smrg		surf->db_depth_info = S_02803C_ADDR5_SWIZZLE_MASK(!tex->tc_compatible_htile);
2609b8e80941Smrg
2610b8e80941Smrg		if (sctx->chip_class >= CIK) {
2611b8e80941Smrg			struct radeon_info *info = &sctx->screen->info;
2612b8e80941Smrg			unsigned index = tex->surface.u.legacy.tiling_index[level];
2613b8e80941Smrg			unsigned stencil_index = tex->surface.u.legacy.stencil_tiling_index[level];
2614b8e80941Smrg			unsigned macro_index = tex->surface.u.legacy.macro_tile_index;
2615b8e80941Smrg			unsigned tile_mode = info->si_tile_mode_array[index];
2616b8e80941Smrg			unsigned stencil_tile_mode = info->si_tile_mode_array[stencil_index];
2617b8e80941Smrg			unsigned macro_mode = info->cik_macrotile_mode_array[macro_index];
2618b8e80941Smrg
2619b8e80941Smrg			surf->db_depth_info |=
2620b8e80941Smrg				S_02803C_ARRAY_MODE(G_009910_ARRAY_MODE(tile_mode)) |
2621b8e80941Smrg				S_02803C_PIPE_CONFIG(G_009910_PIPE_CONFIG(tile_mode)) |
2622b8e80941Smrg				S_02803C_BANK_WIDTH(G_009990_BANK_WIDTH(macro_mode)) |
2623b8e80941Smrg				S_02803C_BANK_HEIGHT(G_009990_BANK_HEIGHT(macro_mode)) |
2624b8e80941Smrg				S_02803C_MACRO_TILE_ASPECT(G_009990_MACRO_TILE_ASPECT(macro_mode)) |
2625b8e80941Smrg				S_02803C_NUM_BANKS(G_009990_NUM_BANKS(macro_mode));
2626b8e80941Smrg			z_info |= S_028040_TILE_SPLIT(G_009910_TILE_SPLIT(tile_mode));
2627b8e80941Smrg			s_info |= S_028044_TILE_SPLIT(G_009910_TILE_SPLIT(stencil_tile_mode));
2628b8e80941Smrg		} else {
2629b8e80941Smrg			unsigned tile_mode_index = si_tile_mode_index(tex, level, false);
2630b8e80941Smrg			z_info |= S_028040_TILE_MODE_INDEX(tile_mode_index);
2631b8e80941Smrg			tile_mode_index = si_tile_mode_index(tex, level, true);
2632b8e80941Smrg			s_info |= S_028044_TILE_MODE_INDEX(tile_mode_index);
2633b8e80941Smrg		}
2634848b8605Smrg
2635b8e80941Smrg		surf->db_depth_size = S_028058_PITCH_TILE_MAX((levelinfo->nblk_x / 8) - 1) |
2636b8e80941Smrg				      S_028058_HEIGHT_TILE_MAX((levelinfo->nblk_y / 8) - 1);
2637b8e80941Smrg		surf->db_depth_slice = S_02805C_SLICE_TILE_MAX((levelinfo->nblk_x *
2638b8e80941Smrg								levelinfo->nblk_y) / 64 - 1);
2639b8e80941Smrg
2640b8e80941Smrg		if (si_htile_enabled(tex, level)) {
2641b8e80941Smrg			z_info |= S_028040_TILE_SURFACE_ENABLE(1) |
2642b8e80941Smrg				  S_028040_ALLOW_EXPCLEAR(1);
2643b8e80941Smrg
2644b8e80941Smrg			if (tex->surface.has_stencil) {
2645b8e80941Smrg				/* Workaround: For a not yet understood reason, the
2646b8e80941Smrg				 * combination of MSAA, fast stencil clear and stencil
2647b8e80941Smrg				 * decompress messes with subsequent stencil buffer
2648b8e80941Smrg				 * uses. Problem was reproduced on Verde, Bonaire,
2649b8e80941Smrg				 * Tonga, and Carrizo.
2650b8e80941Smrg				 *
2651b8e80941Smrg				 * Disabling EXPCLEAR works around the problem.
2652b8e80941Smrg				 *
2653b8e80941Smrg				 * Check piglit's arb_texture_multisample-stencil-clear
2654b8e80941Smrg				 * test if you want to try changing this.
2655b8e80941Smrg				 */
2656b8e80941Smrg				if (tex->buffer.b.b.nr_samples <= 1)
2657b8e80941Smrg					s_info |= S_028044_ALLOW_EXPCLEAR(1);
2658b8e80941Smrg			} else if (!tex->tc_compatible_htile) {
2659b8e80941Smrg				/* Use all of the htile_buffer for depth if there's no stencil.
2660b8e80941Smrg				 * This must not be set when TC-compatible HTILE is enabled
2661b8e80941Smrg				 * due to a hw bug.
2662b8e80941Smrg				 */
2663b8e80941Smrg				s_info |= S_028044_TILE_STENCIL_DISABLE(1);
2664b8e80941Smrg			}
2665848b8605Smrg
2666b8e80941Smrg			surf->db_htile_data_base = (tex->buffer.gpu_address +
2667b8e80941Smrg						    tex->htile_offset) >> 8;
2668b8e80941Smrg			surf->db_htile_surface = S_028ABC_FULL_CACHE(1);
2669848b8605Smrg
2670b8e80941Smrg			if (tex->tc_compatible_htile) {
2671b8e80941Smrg				surf->db_htile_surface |= S_028ABC_TC_COMPATIBLE(1);
2672848b8605Smrg
2673b8e80941Smrg				/* 0 = full compression. N = only compress up to N-1 Z planes. */
2674b8e80941Smrg				if (tex->buffer.b.b.nr_samples <= 1)
2675b8e80941Smrg					z_info |= S_028040_DECOMPRESS_ON_N_ZPLANES(5);
2676b8e80941Smrg				else if (tex->buffer.b.b.nr_samples <= 4)
2677b8e80941Smrg					z_info |= S_028040_DECOMPRESS_ON_N_ZPLANES(3);
2678b8e80941Smrg				else
2679b8e80941Smrg					z_info |= S_028040_DECOMPRESS_ON_N_ZPLANES(2);
2680b8e80941Smrg			}
2681848b8605Smrg		}
2682848b8605Smrg	}
2683848b8605Smrg
2684b8e80941Smrg	surf->db_z_info = z_info;
2685b8e80941Smrg	surf->db_stencil_info = s_info;
2686848b8605Smrg
2687b8e80941Smrg	surf->depth_initialized = true;
2688b8e80941Smrg}
2689848b8605Smrg
2690b8e80941Smrgvoid si_update_fb_dirtiness_after_rendering(struct si_context *sctx)
2691b8e80941Smrg{
2692b8e80941Smrg	if (sctx->decompression_enabled)
2693b8e80941Smrg		return;
2694848b8605Smrg
2695b8e80941Smrg	if (sctx->framebuffer.state.zsbuf) {
2696b8e80941Smrg		struct pipe_surface *surf = sctx->framebuffer.state.zsbuf;
2697b8e80941Smrg		struct si_texture *tex = (struct si_texture *)surf->texture;
2698848b8605Smrg
2699b8e80941Smrg		tex->dirty_level_mask |= 1 << surf->u.tex.level;
2700b8e80941Smrg
2701b8e80941Smrg		if (tex->surface.has_stencil)
2702b8e80941Smrg			tex->stencil_dirty_level_mask |= 1 << surf->u.tex.level;
2703848b8605Smrg	}
2704848b8605Smrg
2705b8e80941Smrg	unsigned compressed_cb_mask = sctx->framebuffer.compressed_cb_mask;
2706b8e80941Smrg	while (compressed_cb_mask) {
2707b8e80941Smrg		unsigned i = u_bit_scan(&compressed_cb_mask);
2708b8e80941Smrg		struct pipe_surface *surf = sctx->framebuffer.state.cbufs[i];
2709b8e80941Smrg		struct si_texture *tex = (struct si_texture*)surf->texture;
2710848b8605Smrg
2711b8e80941Smrg		if (tex->surface.fmask_size)
2712b8e80941Smrg			tex->dirty_level_mask |= 1 << surf->u.tex.level;
2713b8e80941Smrg		if (tex->dcc_gather_statistics)
2714b8e80941Smrg			tex->separate_dcc_dirty = true;
2715b8e80941Smrg	}
2716b8e80941Smrg}
2717b8e80941Smrg
2718b8e80941Smrgstatic void si_dec_framebuffer_counters(const struct pipe_framebuffer_state *state)
2719b8e80941Smrg{
2720b8e80941Smrg	for (int i = 0; i < state->nr_cbufs; ++i) {
2721b8e80941Smrg		struct si_surface *surf = NULL;
2722b8e80941Smrg		struct si_texture *tex;
2723b8e80941Smrg
2724b8e80941Smrg		if (!state->cbufs[i])
2725b8e80941Smrg			continue;
2726b8e80941Smrg		surf = (struct si_surface*)state->cbufs[i];
2727b8e80941Smrg		tex = (struct si_texture*)surf->base.texture;
2728b8e80941Smrg
2729b8e80941Smrg		p_atomic_dec(&tex->framebuffers_bound);
2730b8e80941Smrg	}
2731848b8605Smrg}
2732848b8605Smrg
2733848b8605Smrgstatic void si_set_framebuffer_state(struct pipe_context *ctx,
2734848b8605Smrg				     const struct pipe_framebuffer_state *state)
2735848b8605Smrg{
2736848b8605Smrg	struct si_context *sctx = (struct si_context *)ctx;
2737b8e80941Smrg	struct si_surface *surf = NULL;
2738b8e80941Smrg	struct si_texture *tex;
2739b8e80941Smrg	bool old_any_dst_linear = sctx->framebuffer.any_dst_linear;
2740b8e80941Smrg	unsigned old_nr_samples = sctx->framebuffer.nr_samples;
2741b8e80941Smrg	unsigned old_colorbuf_enabled_4bit = sctx->framebuffer.colorbuf_enabled_4bit;
2742b8e80941Smrg	bool old_has_zsbuf = !!sctx->framebuffer.state.zsbuf;
2743b8e80941Smrg	bool old_has_stencil =
2744b8e80941Smrg		old_has_zsbuf &&
2745b8e80941Smrg		((struct si_texture*)sctx->framebuffer.state.zsbuf->texture)->surface.has_stencil;
2746b8e80941Smrg	bool unbound = false;
2747848b8605Smrg	int i;
2748848b8605Smrg
2749b8e80941Smrg	/* Reject zero-sized framebuffers due to a hw bug on SI that occurs
2750b8e80941Smrg	 * when PA_SU_HARDWARE_SCREEN_OFFSET != 0 and any_scissor.BR_X/Y <= 0.
2751b8e80941Smrg	 * We could implement the full workaround here, but it's a useless case.
2752b8e80941Smrg	 */
2753b8e80941Smrg	if ((!state->width || !state->height) && (state->nr_cbufs || state->zsbuf)) {
2754b8e80941Smrg		unreachable("the framebuffer shouldn't have zero area");
2755b8e80941Smrg		return;
2756848b8605Smrg	}
2757b8e80941Smrg
2758b8e80941Smrg	si_update_fb_dirtiness_after_rendering(sctx);
2759b8e80941Smrg
2760b8e80941Smrg	for (i = 0; i < sctx->framebuffer.state.nr_cbufs; i++) {
2761b8e80941Smrg		if (!sctx->framebuffer.state.cbufs[i])
2762b8e80941Smrg			continue;
2763b8e80941Smrg
2764b8e80941Smrg		tex = (struct si_texture*)sctx->framebuffer.state.cbufs[i]->texture;
2765b8e80941Smrg		if (tex->dcc_gather_statistics)
2766b8e80941Smrg			vi_separate_dcc_stop_query(sctx, tex);
2767b8e80941Smrg	}
2768b8e80941Smrg
2769b8e80941Smrg	/* Disable DCC if the formats are incompatible. */
2770b8e80941Smrg	for (i = 0; i < state->nr_cbufs; i++) {
2771b8e80941Smrg		if (!state->cbufs[i])
2772b8e80941Smrg			continue;
2773b8e80941Smrg
2774b8e80941Smrg		surf = (struct si_surface*)state->cbufs[i];
2775b8e80941Smrg		tex = (struct si_texture*)surf->base.texture;
2776b8e80941Smrg
2777b8e80941Smrg		if (!surf->dcc_incompatible)
2778b8e80941Smrg			continue;
2779b8e80941Smrg
2780b8e80941Smrg		/* Since the DCC decompression calls back into set_framebuffer-
2781b8e80941Smrg		 * _state, we need to unbind the framebuffer, so that
2782b8e80941Smrg		 * vi_separate_dcc_stop_query isn't called twice with the same
2783b8e80941Smrg		 * color buffer.
2784b8e80941Smrg		 */
2785b8e80941Smrg		if (!unbound) {
2786b8e80941Smrg			util_copy_framebuffer_state(&sctx->framebuffer.state, NULL);
2787b8e80941Smrg			unbound = true;
2788b8e80941Smrg		}
2789b8e80941Smrg
2790b8e80941Smrg		if (vi_dcc_enabled(tex, surf->base.u.tex.level))
2791b8e80941Smrg			if (!si_texture_disable_dcc(sctx, tex))
2792b8e80941Smrg				si_decompress_dcc(sctx, tex);
2793b8e80941Smrg
2794b8e80941Smrg		surf->dcc_incompatible = false;
2795b8e80941Smrg	}
2796b8e80941Smrg
2797b8e80941Smrg	/* Only flush TC when changing the framebuffer state, because
2798b8e80941Smrg	 * the only client not using TC that can change textures is
2799b8e80941Smrg	 * the framebuffer.
2800b8e80941Smrg	 *
2801b8e80941Smrg	 * Wait for compute shaders because of possible transitions:
2802b8e80941Smrg	 * - FB write -> shader read
2803b8e80941Smrg	 * - shader write -> FB read
2804b8e80941Smrg	 *
2805b8e80941Smrg	 * DB caches are flushed on demand (using si_decompress_textures).
2806b8e80941Smrg	 *
2807b8e80941Smrg	 * When MSAA is enabled, CB and TC caches are flushed on demand
2808b8e80941Smrg	 * (after FMASK decompression). Shader write -> FB read transitions
2809b8e80941Smrg	 * cannot happen for MSAA textures, because MSAA shader images are
2810b8e80941Smrg	 * not supported.
2811b8e80941Smrg	 *
2812b8e80941Smrg	 * Only flush and wait for CB if there is actually a bound color buffer.
2813b8e80941Smrg	 */
2814b8e80941Smrg	if (sctx->framebuffer.uncompressed_cb_mask) {
2815b8e80941Smrg		si_make_CB_shader_coherent(sctx, sctx->framebuffer.nr_samples,
2816b8e80941Smrg					   sctx->framebuffer.CB_has_shader_readable_metadata,
2817b8e80941Smrg					   sctx->framebuffer.all_DCC_pipe_aligned);
2818b8e80941Smrg	}
2819b8e80941Smrg
2820b8e80941Smrg	sctx->flags |= SI_CONTEXT_CS_PARTIAL_FLUSH;
2821b8e80941Smrg
2822b8e80941Smrg	/* u_blitter doesn't invoke depth decompression when it does multiple
2823b8e80941Smrg	 * blits in a row, but the only case when it matters for DB is when
2824b8e80941Smrg	 * doing generate_mipmap. So here we flush DB manually between
2825b8e80941Smrg	 * individual generate_mipmap blits.
2826b8e80941Smrg	 * Note that lower mipmap levels aren't compressed.
2827b8e80941Smrg	 */
2828b8e80941Smrg	if (sctx->generate_mipmap_for_depth) {
2829b8e80941Smrg		si_make_DB_shader_coherent(sctx, 1, false,
2830b8e80941Smrg					   sctx->framebuffer.DB_has_shader_readable_metadata);
2831b8e80941Smrg	} else if (sctx->chip_class == GFX9) {
2832b8e80941Smrg		/* It appears that DB metadata "leaks" in a sequence of:
2833b8e80941Smrg		 *  - depth clear
2834b8e80941Smrg		 *  - DCC decompress for shader image writes (with DB disabled)
2835b8e80941Smrg		 *  - render with DEPTH_BEFORE_SHADER=1
2836b8e80941Smrg		 * Flushing DB metadata works around the problem.
2837b8e80941Smrg		 */
2838b8e80941Smrg		sctx->flags |= SI_CONTEXT_FLUSH_AND_INV_DB_META;
2839848b8605Smrg	}
2840848b8605Smrg
2841b8e80941Smrg	/* Take the maximum of the old and new count. If the new count is lower,
2842b8e80941Smrg	 * dirtying is needed to disable the unbound colorbuffers.
2843b8e80941Smrg	 */
2844b8e80941Smrg	sctx->framebuffer.dirty_cbufs |=
2845b8e80941Smrg		(1 << MAX2(sctx->framebuffer.state.nr_cbufs, state->nr_cbufs)) - 1;
2846b8e80941Smrg	sctx->framebuffer.dirty_zsbuf |= sctx->framebuffer.state.zsbuf != state->zsbuf;
2847b8e80941Smrg
2848b8e80941Smrg	si_dec_framebuffer_counters(&sctx->framebuffer.state);
2849848b8605Smrg	util_copy_framebuffer_state(&sctx->framebuffer.state, state);
2850848b8605Smrg
2851b8e80941Smrg	sctx->framebuffer.colorbuf_enabled_4bit = 0;
2852b8e80941Smrg	sctx->framebuffer.spi_shader_col_format = 0;
2853b8e80941Smrg	sctx->framebuffer.spi_shader_col_format_alpha = 0;
2854b8e80941Smrg	sctx->framebuffer.spi_shader_col_format_blend = 0;
2855b8e80941Smrg	sctx->framebuffer.spi_shader_col_format_blend_alpha = 0;
2856b8e80941Smrg	sctx->framebuffer.color_is_int8 = 0;
2857b8e80941Smrg	sctx->framebuffer.color_is_int10 = 0;
2858b8e80941Smrg
2859848b8605Smrg	sctx->framebuffer.compressed_cb_mask = 0;
2860b8e80941Smrg	sctx->framebuffer.uncompressed_cb_mask = 0;
2861848b8605Smrg	sctx->framebuffer.nr_samples = util_framebuffer_get_num_samples(state);
2862b8e80941Smrg	sctx->framebuffer.nr_color_samples = sctx->framebuffer.nr_samples;
2863848b8605Smrg	sctx->framebuffer.log_samples = util_logbase2(sctx->framebuffer.nr_samples);
2864b8e80941Smrg	sctx->framebuffer.any_dst_linear = false;
2865b8e80941Smrg	sctx->framebuffer.CB_has_shader_readable_metadata = false;
2866b8e80941Smrg	sctx->framebuffer.DB_has_shader_readable_metadata = false;
2867b8e80941Smrg	sctx->framebuffer.all_DCC_pipe_aligned = true;
2868b8e80941Smrg	unsigned num_bpp64_colorbufs = 0;
2869848b8605Smrg
2870848b8605Smrg	for (i = 0; i < state->nr_cbufs; i++) {
2871848b8605Smrg		if (!state->cbufs[i])
2872848b8605Smrg			continue;
2873848b8605Smrg
2874b8e80941Smrg		surf = (struct si_surface*)state->cbufs[i];
2875b8e80941Smrg		tex = (struct si_texture*)surf->base.texture;
2876848b8605Smrg
2877848b8605Smrg		if (!surf->color_initialized) {
2878848b8605Smrg			si_initialize_color_surface(sctx, surf);
2879848b8605Smrg		}
2880848b8605Smrg
2881b8e80941Smrg		sctx->framebuffer.colorbuf_enabled_4bit |= 0xf << (i * 4);
2882b8e80941Smrg		sctx->framebuffer.spi_shader_col_format |=
2883b8e80941Smrg			surf->spi_shader_col_format << (i * 4);
2884b8e80941Smrg		sctx->framebuffer.spi_shader_col_format_alpha |=
2885b8e80941Smrg			surf->spi_shader_col_format_alpha << (i * 4);
2886b8e80941Smrg		sctx->framebuffer.spi_shader_col_format_blend |=
2887b8e80941Smrg			surf->spi_shader_col_format_blend << (i * 4);
2888b8e80941Smrg		sctx->framebuffer.spi_shader_col_format_blend_alpha |=
2889b8e80941Smrg			surf->spi_shader_col_format_blend_alpha << (i * 4);
2890b8e80941Smrg
2891b8e80941Smrg		if (surf->color_is_int8)
2892b8e80941Smrg			sctx->framebuffer.color_is_int8 |= 1 << i;
2893b8e80941Smrg		if (surf->color_is_int10)
2894b8e80941Smrg			sctx->framebuffer.color_is_int10 |= 1 << i;
2895b8e80941Smrg
2896b8e80941Smrg		if (tex->surface.fmask_size)
2897b8e80941Smrg			sctx->framebuffer.compressed_cb_mask |= 1 << i;
2898b8e80941Smrg		else
2899b8e80941Smrg			sctx->framebuffer.uncompressed_cb_mask |= 1 << i;
2900b8e80941Smrg
2901b8e80941Smrg		/* Don't update nr_color_samples for non-AA buffers.
2902b8e80941Smrg		 * (e.g. destination of MSAA resolve)
2903b8e80941Smrg		 */
2904b8e80941Smrg		if (tex->buffer.b.b.nr_samples >= 2 &&
2905b8e80941Smrg		    tex->buffer.b.b.nr_storage_samples < tex->buffer.b.b.nr_samples) {
2906b8e80941Smrg			sctx->framebuffer.nr_color_samples =
2907b8e80941Smrg				MIN2(sctx->framebuffer.nr_color_samples,
2908b8e80941Smrg				     tex->buffer.b.b.nr_storage_samples);
2909b8e80941Smrg			sctx->framebuffer.nr_color_samples =
2910b8e80941Smrg				MAX2(1, sctx->framebuffer.nr_color_samples);
2911b8e80941Smrg		}
2912b8e80941Smrg
2913b8e80941Smrg		if (tex->surface.is_linear)
2914b8e80941Smrg			sctx->framebuffer.any_dst_linear = true;
2915b8e80941Smrg		if (tex->surface.bpe >= 8)
2916b8e80941Smrg			num_bpp64_colorbufs++;
2917b8e80941Smrg
2918b8e80941Smrg		if (vi_dcc_enabled(tex, surf->base.u.tex.level)) {
2919b8e80941Smrg			sctx->framebuffer.CB_has_shader_readable_metadata = true;
2920b8e80941Smrg
2921b8e80941Smrg			if (sctx->chip_class >= GFX9 &&
2922b8e80941Smrg			    !tex->surface.u.gfx9.dcc.pipe_aligned)
2923b8e80941Smrg				sctx->framebuffer.all_DCC_pipe_aligned = false;
2924848b8605Smrg		}
2925848b8605Smrg
2926b8e80941Smrg		si_context_add_resource_size(sctx, surf->base.texture);
2927b8e80941Smrg
2928b8e80941Smrg		p_atomic_inc(&tex->framebuffers_bound);
2929b8e80941Smrg
2930b8e80941Smrg		if (tex->dcc_gather_statistics) {
2931b8e80941Smrg			/* Dirty tracking must be enabled for DCC usage analysis. */
2932848b8605Smrg			sctx->framebuffer.compressed_cb_mask |= 1 << i;
2933b8e80941Smrg			vi_separate_dcc_start_query(sctx, tex);
2934848b8605Smrg		}
2935848b8605Smrg	}
2936848b8605Smrg
2937b8e80941Smrg	/* For optimal DCC performance. */
2938b8e80941Smrg	if (sctx->chip_class == VI)
2939b8e80941Smrg		sctx->framebuffer.dcc_overwrite_combiner_watermark = 4;
2940b8e80941Smrg	else if (num_bpp64_colorbufs >= 5)
2941b8e80941Smrg		sctx->framebuffer.dcc_overwrite_combiner_watermark = 8;
2942b8e80941Smrg	else
2943b8e80941Smrg		sctx->framebuffer.dcc_overwrite_combiner_watermark = 6;
2944b8e80941Smrg
2945b8e80941Smrg	struct si_texture *zstex = NULL;
2946848b8605Smrg
2947848b8605Smrg	if (state->zsbuf) {
2948b8e80941Smrg		surf = (struct si_surface*)state->zsbuf;
2949b8e80941Smrg		zstex = (struct si_texture*)surf->base.texture;
2950848b8605Smrg
2951848b8605Smrg		if (!surf->depth_initialized) {
2952848b8605Smrg			si_init_depth_surface(sctx, surf);
2953848b8605Smrg		}
2954b8e80941Smrg
2955b8e80941Smrg		if (vi_tc_compat_htile_enabled(zstex, surf->base.u.tex.level))
2956b8e80941Smrg			sctx->framebuffer.DB_has_shader_readable_metadata = true;
2957b8e80941Smrg
2958b8e80941Smrg		si_context_add_resource_size(sctx, surf->base.texture);
2959848b8605Smrg	}
2960848b8605Smrg
2961b8e80941Smrg	si_update_ps_colorbuf0_slot(sctx);
2962b8e80941Smrg	si_update_poly_offset_state(sctx);
2963b8e80941Smrg	si_mark_atom_dirty(sctx, &sctx->atoms.s.cb_render_state);
2964b8e80941Smrg	si_mark_atom_dirty(sctx, &sctx->atoms.s.framebuffer);
2965848b8605Smrg
2966b8e80941Smrg	if (sctx->screen->dpbb_allowed)
2967b8e80941Smrg		si_mark_atom_dirty(sctx, &sctx->atoms.s.dpbb_state);
2968848b8605Smrg
2969b8e80941Smrg	if (sctx->framebuffer.any_dst_linear != old_any_dst_linear)
2970b8e80941Smrg		si_mark_atom_dirty(sctx, &sctx->atoms.s.msaa_config);
2971b8e80941Smrg
2972b8e80941Smrg	if (sctx->screen->has_out_of_order_rast &&
2973b8e80941Smrg	    (sctx->framebuffer.colorbuf_enabled_4bit != old_colorbuf_enabled_4bit ||
2974b8e80941Smrg	     !!sctx->framebuffer.state.zsbuf != old_has_zsbuf ||
2975b8e80941Smrg	     (zstex && zstex->surface.has_stencil != old_has_stencil)))
2976b8e80941Smrg		si_mark_atom_dirty(sctx, &sctx->atoms.s.msaa_config);
2977b8e80941Smrg
2978b8e80941Smrg	if (sctx->framebuffer.nr_samples != old_nr_samples) {
2979b8e80941Smrg		struct pipe_constant_buffer constbuf = {0};
2980b8e80941Smrg
2981b8e80941Smrg		si_mark_atom_dirty(sctx, &sctx->atoms.s.msaa_config);
2982b8e80941Smrg		si_mark_atom_dirty(sctx, &sctx->atoms.s.db_render_state);
2983b8e80941Smrg
2984b8e80941Smrg		constbuf.buffer = sctx->sample_pos_buffer;
2985b8e80941Smrg
2986b8e80941Smrg		/* Set sample locations as fragment shader constants. */
2987b8e80941Smrg		switch (sctx->framebuffer.nr_samples) {
2988b8e80941Smrg		case 1:
2989b8e80941Smrg			constbuf.buffer_offset = 0;
2990b8e80941Smrg			break;
2991b8e80941Smrg		case 2:
2992b8e80941Smrg			constbuf.buffer_offset = (ubyte*)sctx->sample_positions.x2 -
2993b8e80941Smrg						 (ubyte*)sctx->sample_positions.x1;
2994b8e80941Smrg			break;
2995b8e80941Smrg		case 4:
2996b8e80941Smrg			constbuf.buffer_offset = (ubyte*)sctx->sample_positions.x4 -
2997b8e80941Smrg						 (ubyte*)sctx->sample_positions.x1;
2998b8e80941Smrg			break;
2999b8e80941Smrg		case 8:
3000b8e80941Smrg			constbuf.buffer_offset = (ubyte*)sctx->sample_positions.x8 -
3001b8e80941Smrg						 (ubyte*)sctx->sample_positions.x1;
3002b8e80941Smrg			break;
3003b8e80941Smrg		case 16:
3004b8e80941Smrg			constbuf.buffer_offset = (ubyte*)sctx->sample_positions.x16 -
3005b8e80941Smrg						 (ubyte*)sctx->sample_positions.x1;
3006b8e80941Smrg			break;
3007b8e80941Smrg		default:
3008b8e80941Smrg			PRINT_ERR("Requested an invalid number of samples %i.\n",
3009b8e80941Smrg				 sctx->framebuffer.nr_samples);
3010b8e80941Smrg			assert(0);
3011b8e80941Smrg		}
3012b8e80941Smrg		constbuf.buffer_size = sctx->framebuffer.nr_samples * 2 * 4;
3013b8e80941Smrg		si_set_rw_buffer(sctx, SI_PS_CONST_SAMPLE_POSITIONS, &constbuf);
3014b8e80941Smrg
3015b8e80941Smrg		si_mark_atom_dirty(sctx, &sctx->atoms.s.msaa_sample_locs);
3016b8e80941Smrg	}
3017b8e80941Smrg
3018b8e80941Smrg	sctx->do_update_shaders = true;
3019b8e80941Smrg
3020b8e80941Smrg	if (!sctx->decompression_enabled) {
3021b8e80941Smrg		/* Prevent textures decompression when the framebuffer state
3022b8e80941Smrg		 * changes come from the decompression passes themselves.
3023b8e80941Smrg		 */
3024b8e80941Smrg		sctx->need_check_render_feedback = true;
3025848b8605Smrg	}
3026848b8605Smrg}
3027848b8605Smrg
3028b8e80941Smrgstatic void si_emit_framebuffer_state(struct si_context *sctx)
3029848b8605Smrg{
3030b8e80941Smrg	struct radeon_cmdbuf *cs = sctx->gfx_cs;
3031848b8605Smrg	struct pipe_framebuffer_state *state = &sctx->framebuffer.state;
3032848b8605Smrg	unsigned i, nr_cbufs = state->nr_cbufs;
3033b8e80941Smrg	struct si_texture *tex = NULL;
3034b8e80941Smrg	struct si_surface *cb = NULL;
3035b8e80941Smrg	unsigned cb_color_info = 0;
3036848b8605Smrg
3037848b8605Smrg	/* Colorbuffers. */
3038848b8605Smrg	for (i = 0; i < nr_cbufs; i++) {
3039b8e80941Smrg		uint64_t cb_color_base, cb_color_fmask, cb_color_cmask, cb_dcc_base;
3040b8e80941Smrg		unsigned cb_color_attrib;
3041b8e80941Smrg
3042b8e80941Smrg		if (!(sctx->framebuffer.dirty_cbufs & (1 << i)))
3043b8e80941Smrg			continue;
3044b8e80941Smrg
3045b8e80941Smrg		cb = (struct si_surface*)state->cbufs[i];
3046848b8605Smrg		if (!cb) {
3047b8e80941Smrg			radeon_set_context_reg(cs, R_028C70_CB_COLOR0_INFO + i * 0x3C,
3048848b8605Smrg					       S_028C70_FORMAT(V_028C70_COLOR_INVALID));
3049848b8605Smrg			continue;
3050848b8605Smrg		}
3051848b8605Smrg
3052b8e80941Smrg		tex = (struct si_texture *)cb->base.texture;
3053b8e80941Smrg		radeon_add_to_buffer_list(sctx, sctx->gfx_cs,
3054b8e80941Smrg				      &tex->buffer, RADEON_USAGE_READWRITE,
3055b8e80941Smrg				      tex->buffer.b.b.nr_samples > 1 ?
3056848b8605Smrg					      RADEON_PRIO_COLOR_BUFFER_MSAA :
3057848b8605Smrg					      RADEON_PRIO_COLOR_BUFFER);
3058848b8605Smrg
3059b8e80941Smrg		if (tex->cmask_buffer && tex->cmask_buffer != &tex->buffer) {
3060b8e80941Smrg			radeon_add_to_buffer_list(sctx, sctx->gfx_cs,
3061848b8605Smrg				tex->cmask_buffer, RADEON_USAGE_READWRITE,
3062b8e80941Smrg				RADEON_PRIO_SEPARATE_META);
3063b8e80941Smrg		}
3064b8e80941Smrg
3065b8e80941Smrg		if (tex->dcc_separate_buffer)
3066b8e80941Smrg			radeon_add_to_buffer_list(sctx, sctx->gfx_cs,
3067b8e80941Smrg						  tex->dcc_separate_buffer,
3068b8e80941Smrg						  RADEON_USAGE_READWRITE,
3069b8e80941Smrg						  RADEON_PRIO_SEPARATE_META);
3070b8e80941Smrg
3071b8e80941Smrg		/* Compute mutable surface parameters. */
3072b8e80941Smrg		cb_color_base = tex->buffer.gpu_address >> 8;
3073b8e80941Smrg		cb_color_fmask = 0;
3074b8e80941Smrg		cb_color_cmask = tex->cmask_base_address_reg;
3075b8e80941Smrg		cb_dcc_base = 0;
3076b8e80941Smrg		cb_color_info = cb->cb_color_info | tex->cb_color_info;
3077b8e80941Smrg		cb_color_attrib = cb->cb_color_attrib;
3078b8e80941Smrg
3079b8e80941Smrg		if (cb->base.u.tex.level > 0)
3080b8e80941Smrg			cb_color_info &= C_028C70_FAST_CLEAR;
3081b8e80941Smrg
3082b8e80941Smrg		if (tex->surface.fmask_size) {
3083b8e80941Smrg			cb_color_fmask = (tex->buffer.gpu_address + tex->fmask_offset) >> 8;
3084b8e80941Smrg			cb_color_fmask |= tex->surface.fmask_tile_swizzle;
3085b8e80941Smrg		}
3086b8e80941Smrg
3087b8e80941Smrg		/* Set up DCC. */
3088b8e80941Smrg		if (vi_dcc_enabled(tex, cb->base.u.tex.level)) {
3089b8e80941Smrg			bool is_msaa_resolve_dst = state->cbufs[0] &&
3090b8e80941Smrg						   state->cbufs[0]->texture->nr_samples > 1 &&
3091b8e80941Smrg						   state->cbufs[1] == &cb->base &&
3092b8e80941Smrg						   state->cbufs[1]->texture->nr_samples <= 1;
3093b8e80941Smrg
3094b8e80941Smrg			if (!is_msaa_resolve_dst)
3095b8e80941Smrg				cb_color_info |= S_028C70_DCC_ENABLE(1);
3096b8e80941Smrg
3097b8e80941Smrg			cb_dcc_base = ((!tex->dcc_separate_buffer ? tex->buffer.gpu_address : 0) +
3098b8e80941Smrg				       tex->dcc_offset) >> 8;
3099b8e80941Smrg			cb_dcc_base |= tex->surface.tile_swizzle;
3100b8e80941Smrg		}
3101b8e80941Smrg
3102b8e80941Smrg		if (sctx->chip_class >= GFX9) {
3103b8e80941Smrg			struct gfx9_surf_meta_flags meta;
3104b8e80941Smrg
3105b8e80941Smrg			if (tex->dcc_offset)
3106b8e80941Smrg				meta = tex->surface.u.gfx9.dcc;
3107b8e80941Smrg			else
3108b8e80941Smrg				meta = tex->surface.u.gfx9.cmask;
3109b8e80941Smrg
3110b8e80941Smrg			/* Set mutable surface parameters. */
3111b8e80941Smrg			cb_color_base += tex->surface.u.gfx9.surf_offset >> 8;
3112b8e80941Smrg			cb_color_base |= tex->surface.tile_swizzle;
3113b8e80941Smrg			if (!tex->surface.fmask_size)
3114b8e80941Smrg				cb_color_fmask = cb_color_base;
3115b8e80941Smrg			if (cb->base.u.tex.level > 0)
3116b8e80941Smrg				cb_color_cmask = cb_color_base;
3117b8e80941Smrg			cb_color_attrib |= S_028C74_COLOR_SW_MODE(tex->surface.u.gfx9.surf.swizzle_mode) |
3118b8e80941Smrg					   S_028C74_FMASK_SW_MODE(tex->surface.u.gfx9.fmask.swizzle_mode) |
3119b8e80941Smrg					   S_028C74_RB_ALIGNED(meta.rb_aligned) |
3120b8e80941Smrg					   S_028C74_PIPE_ALIGNED(meta.pipe_aligned);
3121b8e80941Smrg
3122b8e80941Smrg			radeon_set_context_reg_seq(cs, R_028C60_CB_COLOR0_BASE + i * 0x3C, 15);
3123b8e80941Smrg			radeon_emit(cs, cb_color_base);		/* CB_COLOR0_BASE */
3124b8e80941Smrg			radeon_emit(cs, S_028C64_BASE_256B(cb_color_base >> 32)); /* CB_COLOR0_BASE_EXT */
3125b8e80941Smrg			radeon_emit(cs, cb->cb_color_attrib2);	/* CB_COLOR0_ATTRIB2 */
3126b8e80941Smrg			radeon_emit(cs, cb->cb_color_view);	/* CB_COLOR0_VIEW */
3127b8e80941Smrg			radeon_emit(cs, cb_color_info);		/* CB_COLOR0_INFO */
3128b8e80941Smrg			radeon_emit(cs, cb_color_attrib);	/* CB_COLOR0_ATTRIB */
3129b8e80941Smrg			radeon_emit(cs, cb->cb_dcc_control);	/* CB_COLOR0_DCC_CONTROL */
3130b8e80941Smrg			radeon_emit(cs, cb_color_cmask);	/* CB_COLOR0_CMASK */
3131b8e80941Smrg			radeon_emit(cs, S_028C80_BASE_256B(cb_color_cmask >> 32)); /* CB_COLOR0_CMASK_BASE_EXT */
3132b8e80941Smrg			radeon_emit(cs, cb_color_fmask);	/* CB_COLOR0_FMASK */
3133b8e80941Smrg			radeon_emit(cs, S_028C88_BASE_256B(cb_color_fmask >> 32)); /* CB_COLOR0_FMASK_BASE_EXT */
3134b8e80941Smrg			radeon_emit(cs, tex->color_clear_value[0]); /* CB_COLOR0_CLEAR_WORD0 */
3135b8e80941Smrg			radeon_emit(cs, tex->color_clear_value[1]); /* CB_COLOR0_CLEAR_WORD1 */
3136b8e80941Smrg			radeon_emit(cs, cb_dcc_base);		/* CB_COLOR0_DCC_BASE */
3137b8e80941Smrg			radeon_emit(cs, S_028C98_BASE_256B(cb_dcc_base >> 32)); /* CB_COLOR0_DCC_BASE_EXT */
3138b8e80941Smrg
3139b8e80941Smrg			radeon_set_context_reg(cs, R_0287A0_CB_MRT0_EPITCH + i * 4,
3140b8e80941Smrg					       S_0287A0_EPITCH(tex->surface.u.gfx9.surf.epitch));
3141b8e80941Smrg		} else {
3142b8e80941Smrg			/* Compute mutable surface parameters (SI-CI-VI). */
3143b8e80941Smrg			const struct legacy_surf_level *level_info =
3144b8e80941Smrg				&tex->surface.u.legacy.level[cb->base.u.tex.level];
3145b8e80941Smrg			unsigned pitch_tile_max, slice_tile_max, tile_mode_index;
3146b8e80941Smrg			unsigned cb_color_pitch, cb_color_slice, cb_color_fmask_slice;
3147b8e80941Smrg
3148b8e80941Smrg			cb_color_base += level_info->offset >> 8;
3149b8e80941Smrg			/* Only macrotiled modes can set tile swizzle. */
3150b8e80941Smrg			if (level_info->mode == RADEON_SURF_MODE_2D)
3151b8e80941Smrg				cb_color_base |= tex->surface.tile_swizzle;
3152b8e80941Smrg
3153b8e80941Smrg			if (!tex->surface.fmask_size)
3154b8e80941Smrg				cb_color_fmask = cb_color_base;
3155b8e80941Smrg			if (cb->base.u.tex.level > 0)
3156b8e80941Smrg				cb_color_cmask = cb_color_base;
3157b8e80941Smrg			if (cb_dcc_base)
3158b8e80941Smrg				cb_dcc_base += level_info->dcc_offset >> 8;
3159b8e80941Smrg
3160b8e80941Smrg			pitch_tile_max = level_info->nblk_x / 8 - 1;
3161b8e80941Smrg			slice_tile_max = level_info->nblk_x *
3162b8e80941Smrg					 level_info->nblk_y / 64 - 1;
3163b8e80941Smrg			tile_mode_index = si_tile_mode_index(tex, cb->base.u.tex.level, false);
3164b8e80941Smrg
3165b8e80941Smrg			cb_color_attrib |= S_028C74_TILE_MODE_INDEX(tile_mode_index);
3166b8e80941Smrg			cb_color_pitch = S_028C64_TILE_MAX(pitch_tile_max);
3167b8e80941Smrg			cb_color_slice = S_028C68_TILE_MAX(slice_tile_max);
3168b8e80941Smrg
3169b8e80941Smrg			if (tex->surface.fmask_size) {
3170b8e80941Smrg				if (sctx->chip_class >= CIK)
3171b8e80941Smrg					cb_color_pitch |= S_028C64_FMASK_TILE_MAX(tex->surface.u.legacy.fmask.pitch_in_pixels / 8 - 1);
3172b8e80941Smrg				cb_color_attrib |= S_028C74_FMASK_TILE_MODE_INDEX(tex->surface.u.legacy.fmask.tiling_index);
3173b8e80941Smrg				cb_color_fmask_slice = S_028C88_TILE_MAX(tex->surface.u.legacy.fmask.slice_tile_max);
3174b8e80941Smrg			} else {
3175b8e80941Smrg				/* This must be set for fast clear to work without FMASK. */
3176b8e80941Smrg				if (sctx->chip_class >= CIK)
3177b8e80941Smrg					cb_color_pitch |= S_028C64_FMASK_TILE_MAX(pitch_tile_max);
3178b8e80941Smrg				cb_color_attrib |= S_028C74_FMASK_TILE_MODE_INDEX(tile_mode_index);
3179b8e80941Smrg				cb_color_fmask_slice = S_028C88_TILE_MAX(slice_tile_max);
3180b8e80941Smrg			}
3181b8e80941Smrg
3182b8e80941Smrg			radeon_set_context_reg_seq(cs, R_028C60_CB_COLOR0_BASE + i * 0x3C,
3183b8e80941Smrg						   sctx->chip_class >= VI ? 14 : 13);
3184b8e80941Smrg			radeon_emit(cs, cb_color_base);		/* CB_COLOR0_BASE */
3185b8e80941Smrg			radeon_emit(cs, cb_color_pitch);	/* CB_COLOR0_PITCH */
3186b8e80941Smrg			radeon_emit(cs, cb_color_slice);	/* CB_COLOR0_SLICE */
3187b8e80941Smrg			radeon_emit(cs, cb->cb_color_view);	/* CB_COLOR0_VIEW */
3188b8e80941Smrg			radeon_emit(cs, cb_color_info);		/* CB_COLOR0_INFO */
3189b8e80941Smrg			radeon_emit(cs, cb_color_attrib);	/* CB_COLOR0_ATTRIB */
3190b8e80941Smrg			radeon_emit(cs, cb->cb_dcc_control);	/* CB_COLOR0_DCC_CONTROL */
3191b8e80941Smrg			radeon_emit(cs, cb_color_cmask);	/* CB_COLOR0_CMASK */
3192b8e80941Smrg			radeon_emit(cs, tex->surface.u.legacy.cmask_slice_tile_max); /* CB_COLOR0_CMASK_SLICE */
3193b8e80941Smrg			radeon_emit(cs, cb_color_fmask);		/* CB_COLOR0_FMASK */
3194b8e80941Smrg			radeon_emit(cs, cb_color_fmask_slice);		/* CB_COLOR0_FMASK_SLICE */
3195b8e80941Smrg			radeon_emit(cs, tex->color_clear_value[0]);	/* CB_COLOR0_CLEAR_WORD0 */
3196b8e80941Smrg			radeon_emit(cs, tex->color_clear_value[1]);	/* CB_COLOR0_CLEAR_WORD1 */
3197b8e80941Smrg
3198b8e80941Smrg			if (sctx->chip_class >= VI) /* R_028C94_CB_COLOR0_DCC_BASE */
3199b8e80941Smrg				radeon_emit(cs, cb_dcc_base);
3200b8e80941Smrg		}
3201848b8605Smrg	}
3202b8e80941Smrg	for (; i < 8 ; i++)
3203b8e80941Smrg		if (sctx->framebuffer.dirty_cbufs & (1 << i))
3204b8e80941Smrg			radeon_set_context_reg(cs, R_028C70_CB_COLOR0_INFO + i * 0x3C, 0);
3205848b8605Smrg
3206848b8605Smrg	/* ZS buffer. */
3207b8e80941Smrg	if (state->zsbuf && sctx->framebuffer.dirty_zsbuf) {
3208b8e80941Smrg		struct si_surface *zb = (struct si_surface*)state->zsbuf;
3209b8e80941Smrg		struct si_texture *tex = (struct si_texture*)zb->base.texture;
3210848b8605Smrg
3211b8e80941Smrg		radeon_add_to_buffer_list(sctx, sctx->gfx_cs,
3212b8e80941Smrg				      &tex->buffer, RADEON_USAGE_READWRITE,
3213848b8605Smrg				      zb->base.texture->nr_samples > 1 ?
3214848b8605Smrg					      RADEON_PRIO_DEPTH_BUFFER_MSAA :
3215848b8605Smrg					      RADEON_PRIO_DEPTH_BUFFER);
3216848b8605Smrg
3217b8e80941Smrg		if (sctx->chip_class >= GFX9) {
3218b8e80941Smrg			radeon_set_context_reg_seq(cs, R_028014_DB_HTILE_DATA_BASE, 3);
3219b8e80941Smrg			radeon_emit(cs, zb->db_htile_data_base);	/* DB_HTILE_DATA_BASE */
3220b8e80941Smrg			radeon_emit(cs, S_028018_BASE_HI(zb->db_htile_data_base >> 32)); /* DB_HTILE_DATA_BASE_HI */
3221b8e80941Smrg			radeon_emit(cs, zb->db_depth_size);		/* DB_DEPTH_SIZE */
3222b8e80941Smrg
3223b8e80941Smrg			radeon_set_context_reg_seq(cs, R_028038_DB_Z_INFO, 10);
3224b8e80941Smrg			radeon_emit(cs, zb->db_z_info |			/* DB_Z_INFO */
3225b8e80941Smrg				    S_028038_ZRANGE_PRECISION(tex->depth_clear_value != 0));
3226b8e80941Smrg			radeon_emit(cs, zb->db_stencil_info);		/* DB_STENCIL_INFO */
3227b8e80941Smrg			radeon_emit(cs, zb->db_depth_base);		/* DB_Z_READ_BASE */
3228b8e80941Smrg			radeon_emit(cs, S_028044_BASE_HI(zb->db_depth_base >> 32)); /* DB_Z_READ_BASE_HI */
3229b8e80941Smrg			radeon_emit(cs, zb->db_stencil_base);		/* DB_STENCIL_READ_BASE */
3230b8e80941Smrg			radeon_emit(cs, S_02804C_BASE_HI(zb->db_stencil_base >> 32)); /* DB_STENCIL_READ_BASE_HI */
3231b8e80941Smrg			radeon_emit(cs, zb->db_depth_base);		/* DB_Z_WRITE_BASE */
3232b8e80941Smrg			radeon_emit(cs, S_028054_BASE_HI(zb->db_depth_base >> 32)); /* DB_Z_WRITE_BASE_HI */
3233b8e80941Smrg			radeon_emit(cs, zb->db_stencil_base);		/* DB_STENCIL_WRITE_BASE */
3234b8e80941Smrg			radeon_emit(cs, S_02805C_BASE_HI(zb->db_stencil_base >> 32)); /* DB_STENCIL_WRITE_BASE_HI */
3235b8e80941Smrg
3236b8e80941Smrg			radeon_set_context_reg_seq(cs, R_028068_DB_Z_INFO2, 2);
3237b8e80941Smrg			radeon_emit(cs, zb->db_z_info2);	/* DB_Z_INFO2 */
3238b8e80941Smrg			radeon_emit(cs, zb->db_stencil_info2);	/* DB_STENCIL_INFO2 */
3239b8e80941Smrg		} else {
3240b8e80941Smrg			radeon_set_context_reg(cs, R_028014_DB_HTILE_DATA_BASE, zb->db_htile_data_base);
3241b8e80941Smrg
3242b8e80941Smrg			radeon_set_context_reg_seq(cs, R_02803C_DB_DEPTH_INFO, 9);
3243b8e80941Smrg			radeon_emit(cs, zb->db_depth_info);	/* DB_DEPTH_INFO */
3244b8e80941Smrg			radeon_emit(cs, zb->db_z_info |		/* DB_Z_INFO */
3245b8e80941Smrg				    S_028040_ZRANGE_PRECISION(tex->depth_clear_value != 0));
3246b8e80941Smrg			radeon_emit(cs, zb->db_stencil_info);	/* DB_STENCIL_INFO */
3247b8e80941Smrg			radeon_emit(cs, zb->db_depth_base);	/* DB_Z_READ_BASE */
3248b8e80941Smrg			radeon_emit(cs, zb->db_stencil_base);	/* DB_STENCIL_READ_BASE */
3249b8e80941Smrg			radeon_emit(cs, zb->db_depth_base);	/* DB_Z_WRITE_BASE */
3250b8e80941Smrg			radeon_emit(cs, zb->db_stencil_base);	/* DB_STENCIL_WRITE_BASE */
3251b8e80941Smrg			radeon_emit(cs, zb->db_depth_size);	/* DB_DEPTH_SIZE */
3252b8e80941Smrg			radeon_emit(cs, zb->db_depth_slice);	/* DB_DEPTH_SLICE */
3253b8e80941Smrg		}
3254b8e80941Smrg
3255b8e80941Smrg		radeon_set_context_reg_seq(cs, R_028028_DB_STENCIL_CLEAR, 2);
3256b8e80941Smrg		radeon_emit(cs, tex->stencil_clear_value); /* R_028028_DB_STENCIL_CLEAR */
3257b8e80941Smrg		radeon_emit(cs, fui(tex->depth_clear_value)); /* R_02802C_DB_DEPTH_CLEAR */
3258b8e80941Smrg
3259b8e80941Smrg		radeon_set_context_reg(cs, R_028008_DB_DEPTH_VIEW, zb->db_depth_view);
3260b8e80941Smrg		radeon_set_context_reg(cs, R_028ABC_DB_HTILE_SURFACE, zb->db_htile_surface);
3261b8e80941Smrg	} else if (sctx->framebuffer.dirty_zsbuf) {
3262b8e80941Smrg		if (sctx->chip_class >= GFX9)
3263b8e80941Smrg			radeon_set_context_reg_seq(cs, R_028038_DB_Z_INFO, 2);
3264b8e80941Smrg		else
3265b8e80941Smrg			radeon_set_context_reg_seq(cs, R_028040_DB_Z_INFO, 2);
3266b8e80941Smrg
3267b8e80941Smrg		radeon_emit(cs, S_028040_FORMAT(V_028040_Z_INVALID)); /* DB_Z_INFO */
3268b8e80941Smrg		radeon_emit(cs, S_028044_FORMAT(V_028044_STENCIL_INVALID)); /* DB_STENCIL_INFO */
3269848b8605Smrg	}
3270848b8605Smrg
3271848b8605Smrg	/* Framebuffer dimensions. */
3272848b8605Smrg        /* PA_SC_WINDOW_SCISSOR_TL is set in si_init_config() */
3273b8e80941Smrg	radeon_set_context_reg(cs, R_028208_PA_SC_WINDOW_SCISSOR_BR,
3274848b8605Smrg			       S_028208_BR_X(state->width) | S_028208_BR_Y(state->height));
3275848b8605Smrg
3276b8e80941Smrg	if (sctx->screen->dfsm_allowed) {
3277b8e80941Smrg		radeon_emit(cs, PKT3(PKT3_EVENT_WRITE, 0, 0));
3278b8e80941Smrg		radeon_emit(cs, EVENT_TYPE(V_028A90_BREAK_BATCH) | EVENT_INDEX(0));
3279b8e80941Smrg	}
3280848b8605Smrg
3281b8e80941Smrg	sctx->framebuffer.dirty_cbufs = 0;
3282b8e80941Smrg	sctx->framebuffer.dirty_zsbuf = false;
3283848b8605Smrg}
3284848b8605Smrg
3285b8e80941Smrgstatic void si_emit_msaa_sample_locs(struct si_context *sctx)
3286848b8605Smrg{
3287b8e80941Smrg	struct radeon_cmdbuf *cs = sctx->gfx_cs;
3288b8e80941Smrg	struct si_state_rasterizer *rs = sctx->queued.named.rasterizer;
3289b8e80941Smrg	unsigned nr_samples = sctx->framebuffer.nr_samples;
3290b8e80941Smrg	bool has_msaa_sample_loc_bug = sctx->screen->has_msaa_sample_loc_bug;
3291b8e80941Smrg
3292b8e80941Smrg	/* Smoothing (only possible with nr_samples == 1) uses the same
3293b8e80941Smrg	 * sample locations as the MSAA it simulates.
3294b8e80941Smrg	 */
3295b8e80941Smrg	if (nr_samples <= 1 && sctx->smoothing_enabled)
3296b8e80941Smrg		nr_samples = SI_NUM_SMOOTH_AA_SAMPLES;
3297b8e80941Smrg
3298b8e80941Smrg	/* On Polaris, the small primitive filter uses the sample locations
3299b8e80941Smrg	 * even when MSAA is off, so we need to make sure they're set to 0.
3300b8e80941Smrg	 */
3301b8e80941Smrg	if ((nr_samples >= 2 || has_msaa_sample_loc_bug) &&
3302b8e80941Smrg	    nr_samples != sctx->sample_locs_num_samples) {
3303b8e80941Smrg		sctx->sample_locs_num_samples = nr_samples;
3304b8e80941Smrg		si_emit_sample_locations(cs, nr_samples);
3305b8e80941Smrg	}
3306848b8605Smrg
3307b8e80941Smrg	if (sctx->family >= CHIP_POLARIS10) {
3308b8e80941Smrg		unsigned small_prim_filter_cntl =
3309b8e80941Smrg			S_028830_SMALL_PRIM_FILTER_ENABLE(1) |
3310b8e80941Smrg			/* line bug */
3311b8e80941Smrg			S_028830_LINE_FILTER_DISABLE(sctx->family <= CHIP_POLARIS12);
3312848b8605Smrg
3313b8e80941Smrg		/* The alternative of setting sample locations to 0 would
3314b8e80941Smrg		 * require a DB flush to avoid Z errors, see
3315b8e80941Smrg		 * https://bugs.freedesktop.org/show_bug.cgi?id=96908
3316b8e80941Smrg		 */
3317b8e80941Smrg		if (has_msaa_sample_loc_bug &&
3318b8e80941Smrg		    sctx->framebuffer.nr_samples > 1 &&
3319b8e80941Smrg		    !rs->multisample_enable)
3320b8e80941Smrg			small_prim_filter_cntl &= C_028830_SMALL_PRIM_FILTER_ENABLE;
3321b8e80941Smrg
3322b8e80941Smrg		radeon_opt_set_context_reg(sctx,
3323b8e80941Smrg					   R_028830_PA_SU_SMALL_PRIM_FILTER_CNTL,
3324b8e80941Smrg					   SI_TRACKED_PA_SU_SMALL_PRIM_FILTER_CNTL,
3325b8e80941Smrg					   small_prim_filter_cntl);
3326b8e80941Smrg	}
3327848b8605Smrg
3328b8e80941Smrg	/* The exclusion bits can be set to improve rasterization efficiency
3329b8e80941Smrg	 * if no sample lies on the pixel boundary (-8 sample offset).
3330b8e80941Smrg	 */
3331b8e80941Smrg	bool exclusion = sctx->chip_class >= CIK &&
3332b8e80941Smrg			 (!rs->multisample_enable || nr_samples != 16);
3333b8e80941Smrg	radeon_opt_set_context_reg(sctx, R_02882C_PA_SU_PRIM_FILTER_CNTL,
3334b8e80941Smrg				   SI_TRACKED_PA_SU_PRIM_FILTER_CNTL,
3335b8e80941Smrg				   S_02882C_XMAX_RIGHT_EXCLUSION(exclusion) |
3336b8e80941Smrg				   S_02882C_YMAX_BOTTOM_EXCLUSION(exclusion));
3337848b8605Smrg}
3338848b8605Smrg
3339b8e80941Smrgstatic bool si_out_of_order_rasterization(struct si_context *sctx)
3340848b8605Smrg{
3341b8e80941Smrg	struct si_state_blend *blend = sctx->queued.named.blend;
3342b8e80941Smrg	struct si_state_dsa *dsa = sctx->queued.named.dsa;
3343848b8605Smrg
3344b8e80941Smrg	if (!sctx->screen->has_out_of_order_rast)
3345b8e80941Smrg		return false;
3346848b8605Smrg
3347b8e80941Smrg	unsigned colormask = sctx->framebuffer.colorbuf_enabled_4bit;
3348848b8605Smrg
3349b8e80941Smrg	if (blend) {
3350b8e80941Smrg		colormask &= blend->cb_target_enabled_4bit;
3351b8e80941Smrg	} else {
3352b8e80941Smrg		colormask = 0;
3353848b8605Smrg	}
3354848b8605Smrg
3355b8e80941Smrg	/* Conservative: No logic op. */
3356b8e80941Smrg	if (colormask && blend->logicop_enable)
3357b8e80941Smrg		return false;
3358848b8605Smrg
3359b8e80941Smrg	struct si_dsa_order_invariance dsa_order_invariant = {
3360b8e80941Smrg		.zs = true, .pass_set = true, .pass_last = false
3361b8e80941Smrg	};
3362848b8605Smrg
3363b8e80941Smrg	if (sctx->framebuffer.state.zsbuf) {
3364b8e80941Smrg		struct si_texture *zstex =
3365b8e80941Smrg			(struct si_texture*)sctx->framebuffer.state.zsbuf->texture;
3366b8e80941Smrg		bool has_stencil = zstex->surface.has_stencil;
3367b8e80941Smrg		dsa_order_invariant = dsa->order_invariance[has_stencil];
3368b8e80941Smrg		if (!dsa_order_invariant.zs)
3369b8e80941Smrg			return false;
3370b8e80941Smrg
3371b8e80941Smrg		/* The set of PS invocations is always order invariant,
3372b8e80941Smrg		 * except when early Z/S tests are requested. */
3373b8e80941Smrg		if (sctx->ps_shader.cso &&
3374b8e80941Smrg		    sctx->ps_shader.cso->info.writes_memory &&
3375b8e80941Smrg		    sctx->ps_shader.cso->info.properties[TGSI_PROPERTY_FS_EARLY_DEPTH_STENCIL] &&
3376b8e80941Smrg		    !dsa_order_invariant.pass_set)
3377b8e80941Smrg			return false;
3378b8e80941Smrg
3379b8e80941Smrg		if (sctx->num_perfect_occlusion_queries != 0 &&
3380b8e80941Smrg		    !dsa_order_invariant.pass_set)
3381b8e80941Smrg			return false;
3382848b8605Smrg	}
3383848b8605Smrg
3384b8e80941Smrg	if (!colormask)
3385b8e80941Smrg		return true;
3386848b8605Smrg
3387b8e80941Smrg	unsigned blendmask = colormask & blend->blend_enable_4bit;
3388848b8605Smrg
3389b8e80941Smrg	if (blendmask) {
3390b8e80941Smrg		/* Only commutative blending. */
3391b8e80941Smrg		if (blendmask & ~blend->commutative_4bit)
3392b8e80941Smrg			return false;
3393848b8605Smrg
3394b8e80941Smrg		if (!dsa_order_invariant.pass_set)
3395b8e80941Smrg			return false;
3396848b8605Smrg	}
3397848b8605Smrg
3398b8e80941Smrg	if (colormask & ~blendmask) {
3399b8e80941Smrg		if (!dsa_order_invariant.pass_last)
3400b8e80941Smrg			return false;
3401848b8605Smrg	}
3402848b8605Smrg
3403b8e80941Smrg	return true;
3404848b8605Smrg}
3405848b8605Smrg
3406b8e80941Smrgstatic void si_emit_msaa_config(struct si_context *sctx)
3407848b8605Smrg{
3408b8e80941Smrg	struct radeon_cmdbuf *cs = sctx->gfx_cs;
3409b8e80941Smrg	unsigned num_tile_pipes = sctx->screen->info.num_tile_pipes;
3410b8e80941Smrg	/* 33% faster rendering to linear color buffers */
3411b8e80941Smrg	bool dst_is_linear = sctx->framebuffer.any_dst_linear;
3412b8e80941Smrg	bool out_of_order_rast = si_out_of_order_rasterization(sctx);
3413b8e80941Smrg	unsigned sc_mode_cntl_1 =
3414b8e80941Smrg		S_028A4C_WALK_SIZE(dst_is_linear) |
3415b8e80941Smrg		S_028A4C_WALK_FENCE_ENABLE(!dst_is_linear) |
3416b8e80941Smrg		S_028A4C_WALK_FENCE_SIZE(num_tile_pipes == 2 ? 2 : 3) |
3417b8e80941Smrg		S_028A4C_OUT_OF_ORDER_PRIMITIVE_ENABLE(out_of_order_rast) |
3418b8e80941Smrg		S_028A4C_OUT_OF_ORDER_WATER_MARK(0x7) |
3419b8e80941Smrg		/* always 1: */
3420b8e80941Smrg		S_028A4C_WALK_ALIGN8_PRIM_FITS_ST(1) |
3421b8e80941Smrg		S_028A4C_SUPERTILE_WALK_ORDER_ENABLE(1) |
3422b8e80941Smrg		S_028A4C_TILE_WALK_ORDER_ENABLE(1) |
3423b8e80941Smrg		S_028A4C_MULTI_SHADER_ENGINE_PRIM_DISCARD_ENABLE(1) |
3424b8e80941Smrg		S_028A4C_FORCE_EOV_CNTDWN_ENABLE(1) |
3425b8e80941Smrg		S_028A4C_FORCE_EOV_REZ_ENABLE(1);
3426b8e80941Smrg	unsigned db_eqaa = S_028804_HIGH_QUALITY_INTERSECTIONS(1) |
3427b8e80941Smrg			   S_028804_INCOHERENT_EQAA_READS(1) |
3428b8e80941Smrg			   S_028804_INTERPOLATE_COMP_Z(1) |
3429b8e80941Smrg			   S_028804_STATIC_ANCHOR_ASSOCIATIONS(1);
3430b8e80941Smrg	unsigned coverage_samples, color_samples, z_samples;
3431b8e80941Smrg
3432b8e80941Smrg	/* S: Coverage samples (up to 16x):
3433b8e80941Smrg	 * - Scan conversion samples (PA_SC_AA_CONFIG.MSAA_NUM_SAMPLES)
3434b8e80941Smrg	 * - CB FMASK samples (CB_COLORi_ATTRIB.NUM_SAMPLES)
3435b8e80941Smrg	 *
3436b8e80941Smrg	 * Z: Z/S samples (up to 8x, must be <= coverage samples and >= color samples):
3437b8e80941Smrg	 * - Value seen by DB (DB_Z_INFO.NUM_SAMPLES)
3438b8e80941Smrg	 * - Value seen by CB, must be correct even if Z/S is unbound (DB_EQAA.MAX_ANCHOR_SAMPLES)
3439b8e80941Smrg	 * # Missing samples are derived from Z planes if Z is compressed (up to 16x quality), or
3440b8e80941Smrg	 * # from the closest defined sample if Z is uncompressed (same quality as the number of
3441b8e80941Smrg	 * # Z samples).
3442b8e80941Smrg	 *
3443b8e80941Smrg	 * F: Color samples (up to 8x, must be <= coverage samples):
3444b8e80941Smrg	 * - CB color samples (CB_COLORi_ATTRIB.NUM_FRAGMENTS)
3445b8e80941Smrg	 * - PS iter samples (DB_EQAA.PS_ITER_SAMPLES)
3446b8e80941Smrg	 *
3447b8e80941Smrg	 * Can be anything between coverage and color samples:
3448b8e80941Smrg	 * - SampleMaskIn samples (PA_SC_AA_CONFIG.MSAA_EXPOSED_SAMPLES)
3449b8e80941Smrg	 * - SampleMaskOut samples (DB_EQAA.MASK_EXPORT_NUM_SAMPLES)
3450b8e80941Smrg	 * - Alpha-to-coverage samples (DB_EQAA.ALPHA_TO_MASK_NUM_SAMPLES)
3451b8e80941Smrg	 * - Occlusion query samples (DB_COUNT_CONTROL.SAMPLE_RATE)
3452b8e80941Smrg	 * # All are currently set the same as coverage samples.
3453b8e80941Smrg	 *
3454b8e80941Smrg	 * If color samples < coverage samples, FMASK has a higher bpp to store an "unknown"
3455b8e80941Smrg	 * flag for undefined color samples. A shader-based resolve must handle unknowns
3456b8e80941Smrg	 * or mask them out with AND. Unknowns can also be guessed from neighbors via
3457b8e80941Smrg	 * an edge-detect shader-based resolve, which is required to make "color samples = 1"
3458b8e80941Smrg	 * useful. The CB resolve always drops unknowns.
3459b8e80941Smrg	 *
3460b8e80941Smrg	 * Sensible AA configurations:
3461b8e80941Smrg	 *   EQAA 16s 8z 8f - might look the same as 16x MSAA if Z is compressed
3462b8e80941Smrg	 *   EQAA 16s 8z 4f - might look the same as 16x MSAA if Z is compressed
3463b8e80941Smrg	 *   EQAA 16s 4z 4f - might look the same as 16x MSAA if Z is compressed
3464b8e80941Smrg	 *   EQAA  8s 8z 8f = 8x MSAA
3465b8e80941Smrg	 *   EQAA  8s 8z 4f - might look the same as 8x MSAA
3466b8e80941Smrg	 *   EQAA  8s 8z 2f - might look the same as 8x MSAA with low-density geometry
3467b8e80941Smrg	 *   EQAA  8s 4z 4f - might look the same as 8x MSAA if Z is compressed
3468b8e80941Smrg	 *   EQAA  8s 4z 2f - might look the same as 8x MSAA with low-density geometry if Z is compressed
3469b8e80941Smrg	 *   EQAA  4s 4z 4f = 4x MSAA
3470b8e80941Smrg	 *   EQAA  4s 4z 2f - might look the same as 4x MSAA with low-density geometry
3471b8e80941Smrg	 *   EQAA  2s 2z 2f = 2x MSAA
3472b8e80941Smrg	 */
3473b8e80941Smrg	if (sctx->framebuffer.nr_samples > 1) {
3474b8e80941Smrg		coverage_samples = sctx->framebuffer.nr_samples;
3475b8e80941Smrg		color_samples = sctx->framebuffer.nr_color_samples;
3476b8e80941Smrg
3477b8e80941Smrg		if (sctx->framebuffer.state.zsbuf) {
3478b8e80941Smrg			z_samples = sctx->framebuffer.state.zsbuf->texture->nr_samples;
3479b8e80941Smrg			z_samples = MAX2(1, z_samples);
3480b8e80941Smrg		} else {
3481b8e80941Smrg			z_samples = coverage_samples;
3482b8e80941Smrg		}
3483b8e80941Smrg	} else if (sctx->smoothing_enabled) {
3484b8e80941Smrg		coverage_samples = color_samples = z_samples = SI_NUM_SMOOTH_AA_SAMPLES;
3485b8e80941Smrg	} else {
3486b8e80941Smrg		coverage_samples = color_samples = z_samples = 1;
3487b8e80941Smrg	}
3488848b8605Smrg
3489b8e80941Smrg	/* Required by OpenGL line rasterization.
3490b8e80941Smrg	 *
3491b8e80941Smrg	 * TODO: We should also enable perpendicular endcaps for AA lines,
3492b8e80941Smrg	 *       but that requires implementing line stippling in the pixel
3493b8e80941Smrg	 *       shader. SC can only do line stippling with axis-aligned
3494b8e80941Smrg	 *       endcaps.
3495b8e80941Smrg	 */
3496b8e80941Smrg	unsigned sc_line_cntl = S_028BDC_DX10_DIAMOND_TEST_ENA(1);
3497b8e80941Smrg	unsigned sc_aa_config = 0;
3498b8e80941Smrg
3499b8e80941Smrg	if (coverage_samples > 1) {
3500b8e80941Smrg		/* distance from the pixel center, indexed by log2(nr_samples) */
3501b8e80941Smrg		static unsigned max_dist[] = {
3502b8e80941Smrg			0, /* unused */
3503b8e80941Smrg			4, /* 2x MSAA */
3504b8e80941Smrg			6, /* 4x MSAA */
3505b8e80941Smrg			7, /* 8x MSAA */
3506b8e80941Smrg			8, /* 16x MSAA */
3507b8e80941Smrg		};
3508b8e80941Smrg		unsigned log_samples = util_logbase2(coverage_samples);
3509b8e80941Smrg		unsigned log_z_samples = util_logbase2(z_samples);
3510b8e80941Smrg		unsigned ps_iter_samples = si_get_ps_iter_samples(sctx);
3511b8e80941Smrg		unsigned log_ps_iter_samples = util_logbase2(ps_iter_samples);
3512b8e80941Smrg
3513b8e80941Smrg		sc_line_cntl |= S_028BDC_EXPAND_LINE_WIDTH(1);
3514b8e80941Smrg		sc_aa_config = S_028BE0_MSAA_NUM_SAMPLES(log_samples) |
3515b8e80941Smrg			       S_028BE0_MAX_SAMPLE_DIST(max_dist[log_samples]) |
3516b8e80941Smrg			       S_028BE0_MSAA_EXPOSED_SAMPLES(log_samples);
3517b8e80941Smrg
3518b8e80941Smrg		if (sctx->framebuffer.nr_samples > 1) {
3519b8e80941Smrg			db_eqaa |= S_028804_MAX_ANCHOR_SAMPLES(log_z_samples) |
3520b8e80941Smrg				   S_028804_PS_ITER_SAMPLES(log_ps_iter_samples) |
3521b8e80941Smrg				   S_028804_MASK_EXPORT_NUM_SAMPLES(log_samples) |
3522b8e80941Smrg				   S_028804_ALPHA_TO_MASK_NUM_SAMPLES(log_samples);
3523b8e80941Smrg			sc_mode_cntl_1 |= S_028A4C_PS_ITER_SAMPLE(ps_iter_samples > 1);
3524b8e80941Smrg		} else if (sctx->smoothing_enabled) {
3525b8e80941Smrg			db_eqaa |= S_028804_OVERRASTERIZATION_AMOUNT(log_samples);
3526b8e80941Smrg		}
3527b8e80941Smrg	}
3528848b8605Smrg
3529b8e80941Smrg	unsigned initial_cdw = cs->current.cdw;
3530b8e80941Smrg
3531b8e80941Smrg	/* R_028BDC_PA_SC_LINE_CNTL, R_028BE0_PA_SC_AA_CONFIG */
3532b8e80941Smrg	radeon_opt_set_context_reg2(sctx, R_028BDC_PA_SC_LINE_CNTL,
3533b8e80941Smrg				    SI_TRACKED_PA_SC_LINE_CNTL, sc_line_cntl,
3534b8e80941Smrg				    sc_aa_config);
3535b8e80941Smrg	/* R_028804_DB_EQAA */
3536b8e80941Smrg	radeon_opt_set_context_reg(sctx, R_028804_DB_EQAA, SI_TRACKED_DB_EQAA,
3537b8e80941Smrg				   db_eqaa);
3538b8e80941Smrg	/* R_028A4C_PA_SC_MODE_CNTL_1 */
3539b8e80941Smrg	radeon_opt_set_context_reg(sctx, R_028A4C_PA_SC_MODE_CNTL_1,
3540b8e80941Smrg				   SI_TRACKED_PA_SC_MODE_CNTL_1, sc_mode_cntl_1);
3541b8e80941Smrg
3542b8e80941Smrg	if (initial_cdw != cs->current.cdw) {
3543b8e80941Smrg		sctx->context_roll = true;
3544b8e80941Smrg
3545b8e80941Smrg		/* GFX9: Flush DFSM when the AA mode changes. */
3546b8e80941Smrg		if (sctx->screen->dfsm_allowed) {
3547b8e80941Smrg			radeon_emit(cs, PKT3(PKT3_EVENT_WRITE, 0, 0));
3548b8e80941Smrg			radeon_emit(cs, EVENT_TYPE(V_028A90_FLUSH_DFSM) | EVENT_INDEX(0));
3549b8e80941Smrg		}
3550b8e80941Smrg	}
3551848b8605Smrg}
3552848b8605Smrg
3553b8e80941Smrgvoid si_update_ps_iter_samples(struct si_context *sctx)
3554848b8605Smrg{
3555b8e80941Smrg	if (sctx->framebuffer.nr_samples > 1)
3556b8e80941Smrg		si_mark_atom_dirty(sctx, &sctx->atoms.s.msaa_config);
3557b8e80941Smrg	if (sctx->screen->dpbb_allowed)
3558b8e80941Smrg		si_mark_atom_dirty(sctx, &sctx->atoms.s.dpbb_state);
3559848b8605Smrg}
3560848b8605Smrg
3561b8e80941Smrgstatic void si_set_min_samples(struct pipe_context *ctx, unsigned min_samples)
3562848b8605Smrg{
3563848b8605Smrg	struct si_context *sctx = (struct si_context *)ctx;
3564848b8605Smrg
3565b8e80941Smrg	/* The hardware can only do sample shading with 2^n samples. */
3566b8e80941Smrg	min_samples = util_next_power_of_two(min_samples);
3567848b8605Smrg
3568b8e80941Smrg	if (sctx->ps_iter_samples == min_samples)
3569848b8605Smrg		return;
3570848b8605Smrg
3571b8e80941Smrg	sctx->ps_iter_samples = min_samples;
3572b8e80941Smrg	sctx->do_update_shaders = true;
3573848b8605Smrg
3574b8e80941Smrg	si_update_ps_iter_samples(sctx);
3575848b8605Smrg}
3576848b8605Smrg
3577b8e80941Smrg/*
3578b8e80941Smrg * Samplers
3579b8e80941Smrg */
3580848b8605Smrg
3581b8e80941Smrg/**
3582b8e80941Smrg * Build the sampler view descriptor for a buffer texture.
3583b8e80941Smrg * @param state 256-bit descriptor; only the high 128 bits are filled in
3584b8e80941Smrg */
3585b8e80941Smrgvoid
3586b8e80941Smrgsi_make_buffer_descriptor(struct si_screen *screen, struct si_resource *buf,
3587b8e80941Smrg			  enum pipe_format format,
3588b8e80941Smrg			  unsigned offset, unsigned size,
3589b8e80941Smrg			  uint32_t *state)
3590848b8605Smrg{
3591b8e80941Smrg	const struct util_format_description *desc;
3592b8e80941Smrg	int first_non_void;
3593b8e80941Smrg	unsigned stride;
3594b8e80941Smrg	unsigned num_records;
3595b8e80941Smrg	unsigned num_format, data_format;
3596848b8605Smrg
3597b8e80941Smrg	desc = util_format_description(format);
3598b8e80941Smrg	first_non_void = util_format_get_first_non_void_channel(format);
3599b8e80941Smrg	stride = desc->block.bits / 8;
3600b8e80941Smrg	num_format = si_translate_buffer_numformat(&screen->b, desc, first_non_void);
3601b8e80941Smrg	data_format = si_translate_buffer_dataformat(&screen->b, desc, first_non_void);
3602b8e80941Smrg
3603b8e80941Smrg	num_records = size / stride;
3604b8e80941Smrg	num_records = MIN2(num_records, (buf->b.b.width0 - offset) / stride);
3605b8e80941Smrg
3606b8e80941Smrg	/* The NUM_RECORDS field has a different meaning depending on the chip,
3607b8e80941Smrg	 * instruction type, STRIDE, and SWIZZLE_ENABLE.
3608b8e80941Smrg	 *
3609b8e80941Smrg	 * SI-CIK:
3610b8e80941Smrg	 * - If STRIDE == 0, it's in byte units.
3611b8e80941Smrg	 * - If STRIDE != 0, it's in units of STRIDE, used with inst.IDXEN.
3612b8e80941Smrg	 *
3613b8e80941Smrg	 * VI:
3614b8e80941Smrg	 * - For SMEM and STRIDE == 0, it's in byte units.
3615b8e80941Smrg	 * - For SMEM and STRIDE != 0, it's in units of STRIDE.
3616b8e80941Smrg	 * - For VMEM and STRIDE == 0 or SWIZZLE_ENABLE == 0, it's in byte units.
3617b8e80941Smrg	 * - For VMEM and STRIDE != 0 and SWIZZLE_ENABLE == 1, it's in units of STRIDE.
3618b8e80941Smrg	 * NOTE: There is incompatibility between VMEM and SMEM opcodes due to SWIZZLE_-
3619b8e80941Smrg	 *       ENABLE. The workaround is to set STRIDE = 0 if SWIZZLE_ENABLE == 0 when
3620b8e80941Smrg	 *       using SMEM. This can be done in the shader by clearing STRIDE with s_and.
3621b8e80941Smrg	 *       That way the same descriptor can be used by both SMEM and VMEM.
3622b8e80941Smrg	 *
3623b8e80941Smrg	 * GFX9:
3624b8e80941Smrg	 * - For SMEM and STRIDE == 0, it's in byte units.
3625b8e80941Smrg	 * - For SMEM and STRIDE != 0, it's in units of STRIDE.
3626b8e80941Smrg	 * - For VMEM and inst.IDXEN == 0 or STRIDE == 0, it's in byte units.
3627b8e80941Smrg	 * - For VMEM and inst.IDXEN == 1 and STRIDE != 0, it's in units of STRIDE.
3628b8e80941Smrg	 */
3629b8e80941Smrg	if (screen->info.chip_class >= GFX9 && HAVE_LLVM < 0x0800)
3630b8e80941Smrg		/* When vindex == 0, LLVM < 8.0 sets IDXEN = 0, thus changing units
3631b8e80941Smrg		 * from STRIDE to bytes. This works around it by setting
3632b8e80941Smrg		 * NUM_RECORDS to at least the size of one element, so that
3633b8e80941Smrg		 * the first element is readable when IDXEN == 0.
3634b8e80941Smrg		 */
3635b8e80941Smrg		num_records = num_records ? MAX2(num_records, stride) : 0;
3636b8e80941Smrg	else if (screen->info.chip_class == VI)
3637b8e80941Smrg		num_records *= stride;
3638b8e80941Smrg
3639b8e80941Smrg	state[4] = 0;
3640b8e80941Smrg	state[5] = S_008F04_STRIDE(stride);
3641b8e80941Smrg	state[6] = num_records;
3642b8e80941Smrg	state[7] = S_008F0C_DST_SEL_X(si_map_swizzle(desc->swizzle[0])) |
3643b8e80941Smrg		   S_008F0C_DST_SEL_Y(si_map_swizzle(desc->swizzle[1])) |
3644b8e80941Smrg		   S_008F0C_DST_SEL_Z(si_map_swizzle(desc->swizzle[2])) |
3645b8e80941Smrg		   S_008F0C_DST_SEL_W(si_map_swizzle(desc->swizzle[3])) |
3646b8e80941Smrg		   S_008F0C_NUM_FORMAT(num_format) |
3647b8e80941Smrg		   S_008F0C_DATA_FORMAT(data_format);
3648848b8605Smrg}
3649848b8605Smrg
3650b8e80941Smrgstatic unsigned gfx9_border_color_swizzle(const unsigned char swizzle[4])
3651848b8605Smrg{
3652b8e80941Smrg	unsigned bc_swizzle = V_008F20_BC_SWIZZLE_XYZW;
3653b8e80941Smrg
3654b8e80941Smrg	if (swizzle[3] == PIPE_SWIZZLE_X) {
3655b8e80941Smrg		/* For the pre-defined border color values (white, opaque
3656b8e80941Smrg		 * black, transparent black), the only thing that matters is
3657b8e80941Smrg		 * that the alpha channel winds up in the correct place
3658b8e80941Smrg		 * (because the RGB channels are all the same) so either of
3659b8e80941Smrg		 * these enumerations will work.
3660b8e80941Smrg		 */
3661b8e80941Smrg		if (swizzle[2] == PIPE_SWIZZLE_Y)
3662b8e80941Smrg			bc_swizzle = V_008F20_BC_SWIZZLE_WZYX;
3663b8e80941Smrg		else
3664b8e80941Smrg			bc_swizzle = V_008F20_BC_SWIZZLE_WXYZ;
3665b8e80941Smrg	} else if (swizzle[0] == PIPE_SWIZZLE_X) {
3666b8e80941Smrg		if (swizzle[1] == PIPE_SWIZZLE_Y)
3667b8e80941Smrg			bc_swizzle = V_008F20_BC_SWIZZLE_XYZW;
3668b8e80941Smrg		else
3669b8e80941Smrg			bc_swizzle = V_008F20_BC_SWIZZLE_XWYZ;
3670b8e80941Smrg	} else if (swizzle[1] == PIPE_SWIZZLE_X) {
3671b8e80941Smrg		bc_swizzle = V_008F20_BC_SWIZZLE_YXWZ;
3672b8e80941Smrg	} else if (swizzle[2] == PIPE_SWIZZLE_X) {
3673b8e80941Smrg		bc_swizzle = V_008F20_BC_SWIZZLE_ZYXW;
3674848b8605Smrg	}
3675848b8605Smrg
3676b8e80941Smrg	return bc_swizzle;
3677848b8605Smrg}
3678848b8605Smrg
3679b8e80941Smrg/**
3680b8e80941Smrg * Build the sampler view descriptor for a texture.
3681848b8605Smrg */
3682b8e80941Smrgvoid
3683b8e80941Smrgsi_make_texture_descriptor(struct si_screen *screen,
3684b8e80941Smrg			   struct si_texture *tex,
3685b8e80941Smrg			   bool sampler,
3686b8e80941Smrg			   enum pipe_texture_target target,
3687b8e80941Smrg			   enum pipe_format pipe_format,
3688b8e80941Smrg			   const unsigned char state_swizzle[4],
3689b8e80941Smrg			   unsigned first_level, unsigned last_level,
3690b8e80941Smrg			   unsigned first_layer, unsigned last_layer,
3691b8e80941Smrg			   unsigned width, unsigned height, unsigned depth,
3692b8e80941Smrg			   uint32_t *state,
3693b8e80941Smrg			   uint32_t *fmask_state)
3694848b8605Smrg{
3695b8e80941Smrg	struct pipe_resource *res = &tex->buffer.b.b;
3696848b8605Smrg	const struct util_format_description *desc;
3697b8e80941Smrg	unsigned char swizzle[4];
3698848b8605Smrg	int first_non_void;
3699b8e80941Smrg	unsigned num_format, data_format, type, num_samples;
3700848b8605Smrg	uint64_t va;
3701848b8605Smrg
3702848b8605Smrg	desc = util_format_description(pipe_format);
3703848b8605Smrg
3704b8e80941Smrg	num_samples = desc->colorspace == UTIL_FORMAT_COLORSPACE_ZS ?
3705b8e80941Smrg			MAX2(1, res->nr_samples) :
3706b8e80941Smrg			MAX2(1, res->nr_storage_samples);
3707b8e80941Smrg
3708848b8605Smrg	if (desc->colorspace == UTIL_FORMAT_COLORSPACE_ZS) {
3709848b8605Smrg		const unsigned char swizzle_xxxx[4] = {0, 0, 0, 0};
3710848b8605Smrg		const unsigned char swizzle_yyyy[4] = {1, 1, 1, 1};
3711b8e80941Smrg		const unsigned char swizzle_wwww[4] = {3, 3, 3, 3};
3712848b8605Smrg
3713848b8605Smrg		switch (pipe_format) {
3714848b8605Smrg		case PIPE_FORMAT_S8_UINT_Z24_UNORM:
3715848b8605Smrg		case PIPE_FORMAT_X32_S8X24_UINT:
3716848b8605Smrg		case PIPE_FORMAT_X8Z24_UNORM:
3717848b8605Smrg			util_format_compose_swizzles(swizzle_yyyy, state_swizzle, swizzle);
3718848b8605Smrg			break;
3719b8e80941Smrg		case PIPE_FORMAT_X24S8_UINT:
3720b8e80941Smrg			/*
3721b8e80941Smrg			 * X24S8 is implemented as an 8_8_8_8 data format, to
3722b8e80941Smrg			 * fix texture gathers. This affects at least
3723b8e80941Smrg			 * GL45-CTS.texture_cube_map_array.sampling on VI.
3724b8e80941Smrg			 */
3725b8e80941Smrg			if (screen->info.chip_class <= VI)
3726b8e80941Smrg				util_format_compose_swizzles(swizzle_wwww, state_swizzle, swizzle);
3727b8e80941Smrg			else
3728b8e80941Smrg				util_format_compose_swizzles(swizzle_yyyy, state_swizzle, swizzle);
3729b8e80941Smrg			break;
3730848b8605Smrg		default:
3731848b8605Smrg			util_format_compose_swizzles(swizzle_xxxx, state_swizzle, swizzle);
3732848b8605Smrg		}
3733848b8605Smrg	} else {
3734848b8605Smrg		util_format_compose_swizzles(desc->swizzle, state_swizzle, swizzle);
3735848b8605Smrg	}
3736848b8605Smrg
3737848b8605Smrg	first_non_void = util_format_get_first_non_void_channel(pipe_format);
3738848b8605Smrg
3739848b8605Smrg	switch (pipe_format) {
3740848b8605Smrg	case PIPE_FORMAT_S8_UINT_Z24_UNORM:
3741848b8605Smrg		num_format = V_008F14_IMG_NUM_FORMAT_UNORM;
3742848b8605Smrg		break;
3743848b8605Smrg	default:
3744848b8605Smrg		if (first_non_void < 0) {
3745848b8605Smrg			if (util_format_is_compressed(pipe_format)) {
3746848b8605Smrg				switch (pipe_format) {
3747848b8605Smrg				case PIPE_FORMAT_DXT1_SRGB:
3748848b8605Smrg				case PIPE_FORMAT_DXT1_SRGBA:
3749848b8605Smrg				case PIPE_FORMAT_DXT3_SRGBA:
3750848b8605Smrg				case PIPE_FORMAT_DXT5_SRGBA:
3751848b8605Smrg				case PIPE_FORMAT_BPTC_SRGBA:
3752b8e80941Smrg				case PIPE_FORMAT_ETC2_SRGB8:
3753b8e80941Smrg				case PIPE_FORMAT_ETC2_SRGB8A1:
3754b8e80941Smrg				case PIPE_FORMAT_ETC2_SRGBA8:
3755848b8605Smrg					num_format = V_008F14_IMG_NUM_FORMAT_SRGB;
3756848b8605Smrg					break;
3757848b8605Smrg				case PIPE_FORMAT_RGTC1_SNORM:
3758848b8605Smrg				case PIPE_FORMAT_LATC1_SNORM:
3759848b8605Smrg				case PIPE_FORMAT_RGTC2_SNORM:
3760848b8605Smrg				case PIPE_FORMAT_LATC2_SNORM:
3761b8e80941Smrg				case PIPE_FORMAT_ETC2_R11_SNORM:
3762b8e80941Smrg				case PIPE_FORMAT_ETC2_RG11_SNORM:
3763848b8605Smrg				/* implies float, so use SNORM/UNORM to determine
3764848b8605Smrg				   whether data is signed or not */
3765848b8605Smrg				case PIPE_FORMAT_BPTC_RGB_FLOAT:
3766848b8605Smrg					num_format = V_008F14_IMG_NUM_FORMAT_SNORM;
3767848b8605Smrg					break;
3768848b8605Smrg				default:
3769848b8605Smrg					num_format = V_008F14_IMG_NUM_FORMAT_UNORM;
3770848b8605Smrg					break;
3771848b8605Smrg				}
3772848b8605Smrg			} else if (desc->layout == UTIL_FORMAT_LAYOUT_SUBSAMPLED) {
3773848b8605Smrg				num_format = V_008F14_IMG_NUM_FORMAT_UNORM;
3774848b8605Smrg			} else {
3775848b8605Smrg				num_format = V_008F14_IMG_NUM_FORMAT_FLOAT;
3776848b8605Smrg			}
3777848b8605Smrg		} else if (desc->colorspace == UTIL_FORMAT_COLORSPACE_SRGB) {
3778848b8605Smrg			num_format = V_008F14_IMG_NUM_FORMAT_SRGB;
3779848b8605Smrg		} else {
3780848b8605Smrg			num_format = V_008F14_IMG_NUM_FORMAT_UNORM;
3781848b8605Smrg
3782848b8605Smrg			switch (desc->channel[first_non_void].type) {
3783848b8605Smrg			case UTIL_FORMAT_TYPE_FLOAT:
3784848b8605Smrg				num_format = V_008F14_IMG_NUM_FORMAT_FLOAT;
3785848b8605Smrg				break;
3786848b8605Smrg			case UTIL_FORMAT_TYPE_SIGNED:
3787848b8605Smrg				if (desc->channel[first_non_void].normalized)
3788848b8605Smrg					num_format = V_008F14_IMG_NUM_FORMAT_SNORM;
3789848b8605Smrg				else if (desc->channel[first_non_void].pure_integer)
3790848b8605Smrg					num_format = V_008F14_IMG_NUM_FORMAT_SINT;
3791848b8605Smrg				else
3792848b8605Smrg					num_format = V_008F14_IMG_NUM_FORMAT_SSCALED;
3793848b8605Smrg				break;
3794b8e80941Smrg			case UTIL_FORMAT_TYPE_UNSIGNED:
3795b8e80941Smrg				if (desc->channel[first_non_void].normalized)
3796b8e80941Smrg					num_format = V_008F14_IMG_NUM_FORMAT_UNORM;
3797b8e80941Smrg				else if (desc->channel[first_non_void].pure_integer)
3798b8e80941Smrg					num_format = V_008F14_IMG_NUM_FORMAT_UINT;
3799b8e80941Smrg				else
3800b8e80941Smrg					num_format = V_008F14_IMG_NUM_FORMAT_USCALED;
3801b8e80941Smrg			}
3802b8e80941Smrg		}
3803b8e80941Smrg	}
3804b8e80941Smrg
3805b8e80941Smrg	data_format = si_translate_texformat(&screen->b, pipe_format, desc, first_non_void);
3806b8e80941Smrg	if (data_format == ~0) {
3807b8e80941Smrg		data_format = 0;
3808b8e80941Smrg	}
3809b8e80941Smrg
3810b8e80941Smrg	/* S8 with Z32 HTILE needs a special format. */
3811b8e80941Smrg	if (screen->info.chip_class >= GFX9 &&
3812b8e80941Smrg	    pipe_format == PIPE_FORMAT_S8_UINT &&
3813b8e80941Smrg	    tex->tc_compatible_htile)
3814b8e80941Smrg		data_format = V_008F14_IMG_DATA_FORMAT_S8_32;
3815b8e80941Smrg
3816b8e80941Smrg	if (!sampler &&
3817b8e80941Smrg	    (res->target == PIPE_TEXTURE_CUBE ||
3818b8e80941Smrg	     res->target == PIPE_TEXTURE_CUBE_ARRAY ||
3819b8e80941Smrg	     (screen->info.chip_class <= VI &&
3820b8e80941Smrg	      res->target == PIPE_TEXTURE_3D))) {
3821b8e80941Smrg		/* For the purpose of shader images, treat cube maps and 3D
3822b8e80941Smrg		 * textures as 2D arrays. For 3D textures, the address
3823b8e80941Smrg		 * calculations for mipmaps are different, so we rely on the
3824b8e80941Smrg		 * caller to effectively disable mipmaps.
3825b8e80941Smrg		 */
3826b8e80941Smrg		type = V_008F1C_SQ_RSRC_IMG_2D_ARRAY;
3827b8e80941Smrg
3828b8e80941Smrg		assert(res->target != PIPE_TEXTURE_3D || (first_level == 0 && last_level == 0));
3829b8e80941Smrg	} else {
3830b8e80941Smrg		type = si_tex_dim(screen, tex, target, num_samples);
3831b8e80941Smrg	}
3832b8e80941Smrg
3833b8e80941Smrg	if (type == V_008F1C_SQ_RSRC_IMG_1D_ARRAY) {
3834b8e80941Smrg	        height = 1;
3835b8e80941Smrg		depth = res->array_size;
3836b8e80941Smrg	} else if (type == V_008F1C_SQ_RSRC_IMG_2D_ARRAY ||
3837b8e80941Smrg		   type == V_008F1C_SQ_RSRC_IMG_2D_MSAA_ARRAY) {
3838b8e80941Smrg		if (sampler || res->target != PIPE_TEXTURE_3D)
3839b8e80941Smrg			depth = res->array_size;
3840b8e80941Smrg	} else if (type == V_008F1C_SQ_RSRC_IMG_CUBE)
3841b8e80941Smrg		depth = res->array_size / 6;
3842b8e80941Smrg
3843b8e80941Smrg	state[0] = 0;
3844b8e80941Smrg	state[1] = (S_008F14_DATA_FORMAT_GFX6(data_format) |
3845b8e80941Smrg		    S_008F14_NUM_FORMAT_GFX6(num_format));
3846b8e80941Smrg	state[2] = (S_008F18_WIDTH(width - 1) |
3847b8e80941Smrg		    S_008F18_HEIGHT(height - 1) |
3848b8e80941Smrg		    S_008F18_PERF_MOD(4));
3849b8e80941Smrg	state[3] = (S_008F1C_DST_SEL_X(si_map_swizzle(swizzle[0])) |
3850b8e80941Smrg		    S_008F1C_DST_SEL_Y(si_map_swizzle(swizzle[1])) |
3851b8e80941Smrg		    S_008F1C_DST_SEL_Z(si_map_swizzle(swizzle[2])) |
3852b8e80941Smrg		    S_008F1C_DST_SEL_W(si_map_swizzle(swizzle[3])) |
3853b8e80941Smrg		    S_008F1C_BASE_LEVEL(num_samples > 1 ? 0 : first_level) |
3854b8e80941Smrg		    S_008F1C_LAST_LEVEL(num_samples > 1 ?
3855b8e80941Smrg					util_logbase2(num_samples) :
3856b8e80941Smrg					last_level) |
3857b8e80941Smrg		    S_008F1C_TYPE(type));
3858b8e80941Smrg	state[4] = 0;
3859b8e80941Smrg	state[5] = S_008F24_BASE_ARRAY(first_layer);
3860b8e80941Smrg	state[6] = 0;
3861b8e80941Smrg	state[7] = 0;
3862b8e80941Smrg
3863b8e80941Smrg	if (screen->info.chip_class >= GFX9) {
3864b8e80941Smrg		unsigned bc_swizzle = gfx9_border_color_swizzle(desc->swizzle);
3865b8e80941Smrg
3866b8e80941Smrg		/* Depth is the the last accessible layer on Gfx9.
3867b8e80941Smrg		 * The hw doesn't need to know the total number of layers.
3868b8e80941Smrg		 */
3869b8e80941Smrg		if (type == V_008F1C_SQ_RSRC_IMG_3D)
3870b8e80941Smrg			state[4] |= S_008F20_DEPTH(depth - 1);
3871b8e80941Smrg		else
3872b8e80941Smrg			state[4] |= S_008F20_DEPTH(last_layer);
3873b8e80941Smrg
3874b8e80941Smrg		state[4] |= S_008F20_BC_SWIZZLE(bc_swizzle);
3875b8e80941Smrg		state[5] |= S_008F24_MAX_MIP(num_samples > 1 ?
3876b8e80941Smrg					     util_logbase2(num_samples) :
3877b8e80941Smrg					     tex->buffer.b.b.last_level);
3878b8e80941Smrg	} else {
3879b8e80941Smrg		state[3] |= S_008F1C_POW2_PAD(res->last_level > 0);
3880b8e80941Smrg		state[4] |= S_008F20_DEPTH(depth - 1);
3881b8e80941Smrg		state[5] |= S_008F24_LAST_ARRAY(last_layer);
3882b8e80941Smrg	}
3883b8e80941Smrg
3884b8e80941Smrg	if (tex->dcc_offset) {
3885b8e80941Smrg		state[6] = S_008F28_ALPHA_IS_ON_MSB(vi_alpha_is_on_msb(pipe_format));
3886b8e80941Smrg	} else {
3887b8e80941Smrg		/* The last dword is unused by hw. The shader uses it to clear
3888b8e80941Smrg		 * bits in the first dword of sampler state.
3889b8e80941Smrg		 */
3890b8e80941Smrg		if (screen->info.chip_class <= CIK && res->nr_samples <= 1) {
3891b8e80941Smrg			if (first_level == last_level)
3892b8e80941Smrg				state[7] = C_008F30_MAX_ANISO_RATIO;
3893b8e80941Smrg			else
3894b8e80941Smrg				state[7] = 0xffffffff;
3895b8e80941Smrg		}
3896b8e80941Smrg	}
3897b8e80941Smrg
3898b8e80941Smrg	/* Initialize the sampler view for FMASK. */
3899b8e80941Smrg	if (tex->surface.fmask_size) {
3900b8e80941Smrg		uint32_t data_format, num_format;
3901b8e80941Smrg
3902b8e80941Smrg		va = tex->buffer.gpu_address + tex->fmask_offset;
3903b8e80941Smrg
3904b8e80941Smrg#define FMASK(s,f) (((unsigned)(MAX2(1, s)) * 16) + (MAX2(1, f)))
3905b8e80941Smrg		if (screen->info.chip_class >= GFX9) {
3906b8e80941Smrg			data_format = V_008F14_IMG_DATA_FORMAT_FMASK;
3907b8e80941Smrg			switch (FMASK(res->nr_samples, res->nr_storage_samples)) {
3908b8e80941Smrg			case FMASK(2,1):
3909b8e80941Smrg				num_format = V_008F14_IMG_FMASK_8_2_1;
3910b8e80941Smrg				break;
3911b8e80941Smrg			case FMASK(2,2):
3912b8e80941Smrg				num_format = V_008F14_IMG_FMASK_8_2_2;
3913b8e80941Smrg				break;
3914b8e80941Smrg			case FMASK(4,1):
3915b8e80941Smrg				num_format = V_008F14_IMG_FMASK_8_4_1;
3916b8e80941Smrg				break;
3917b8e80941Smrg			case FMASK(4,2):
3918b8e80941Smrg				num_format = V_008F14_IMG_FMASK_8_4_2;
3919b8e80941Smrg				break;
3920b8e80941Smrg			case FMASK(4,4):
3921b8e80941Smrg				num_format = V_008F14_IMG_FMASK_8_4_4;
3922b8e80941Smrg				break;
3923b8e80941Smrg			case FMASK(8,1):
3924b8e80941Smrg				num_format = V_008F14_IMG_FMASK_8_8_1;
3925b8e80941Smrg				break;
3926b8e80941Smrg			case FMASK(8,2):
3927b8e80941Smrg				num_format = V_008F14_IMG_FMASK_16_8_2;
3928b8e80941Smrg				break;
3929b8e80941Smrg			case FMASK(8,4):
3930b8e80941Smrg				num_format = V_008F14_IMG_FMASK_32_8_4;
3931b8e80941Smrg				break;
3932b8e80941Smrg			case FMASK(8,8):
3933b8e80941Smrg				num_format = V_008F14_IMG_FMASK_32_8_8;
3934b8e80941Smrg				break;
3935b8e80941Smrg			case FMASK(16,1):
3936b8e80941Smrg				num_format = V_008F14_IMG_FMASK_16_16_1;
3937b8e80941Smrg				break;
3938b8e80941Smrg			case FMASK(16,2):
3939b8e80941Smrg				num_format = V_008F14_IMG_FMASK_32_16_2;
3940b8e80941Smrg				break;
3941b8e80941Smrg			case FMASK(16,4):
3942b8e80941Smrg				num_format = V_008F14_IMG_FMASK_64_16_4;
3943b8e80941Smrg				break;
3944b8e80941Smrg			case FMASK(16,8):
3945b8e80941Smrg				num_format = V_008F14_IMG_FMASK_64_16_8;
3946b8e80941Smrg				break;
3947b8e80941Smrg			default:
3948b8e80941Smrg				unreachable("invalid nr_samples");
3949b8e80941Smrg			}
3950b8e80941Smrg		} else {
3951b8e80941Smrg			switch (FMASK(res->nr_samples, res->nr_storage_samples)) {
3952b8e80941Smrg			case FMASK(2,1):
3953b8e80941Smrg				data_format = V_008F14_IMG_DATA_FORMAT_FMASK8_S2_F1;
3954b8e80941Smrg				break;
3955b8e80941Smrg			case FMASK(2,2):
3956b8e80941Smrg				data_format = V_008F14_IMG_DATA_FORMAT_FMASK8_S2_F2;
3957b8e80941Smrg				break;
3958b8e80941Smrg			case FMASK(4,1):
3959b8e80941Smrg				data_format = V_008F14_IMG_DATA_FORMAT_FMASK8_S4_F1;
3960b8e80941Smrg				break;
3961b8e80941Smrg			case FMASK(4,2):
3962b8e80941Smrg				data_format = V_008F14_IMG_DATA_FORMAT_FMASK8_S4_F2;
3963b8e80941Smrg				break;
3964b8e80941Smrg			case FMASK(4,4):
3965b8e80941Smrg				data_format = V_008F14_IMG_DATA_FORMAT_FMASK8_S4_F4;
3966b8e80941Smrg				break;
3967b8e80941Smrg			case FMASK(8,1):
3968b8e80941Smrg				data_format = V_008F14_IMG_DATA_FORMAT_FMASK8_S8_F1;
3969b8e80941Smrg				break;
3970b8e80941Smrg			case FMASK(8,2):
3971b8e80941Smrg				data_format = V_008F14_IMG_DATA_FORMAT_FMASK16_S8_F2;
3972b8e80941Smrg				break;
3973b8e80941Smrg			case FMASK(8,4):
3974b8e80941Smrg				data_format = V_008F14_IMG_DATA_FORMAT_FMASK32_S8_F4;
3975b8e80941Smrg				break;
3976b8e80941Smrg			case FMASK(8,8):
3977b8e80941Smrg				data_format = V_008F14_IMG_DATA_FORMAT_FMASK32_S8_F8;
3978b8e80941Smrg				break;
3979b8e80941Smrg			case FMASK(16,1):
3980b8e80941Smrg				data_format = V_008F14_IMG_DATA_FORMAT_FMASK16_S16_F1;
3981b8e80941Smrg				break;
3982b8e80941Smrg			case FMASK(16,2):
3983b8e80941Smrg				data_format = V_008F14_IMG_DATA_FORMAT_FMASK32_S16_F2;
3984b8e80941Smrg				break;
3985b8e80941Smrg			case FMASK(16,4):
3986b8e80941Smrg				data_format = V_008F14_IMG_DATA_FORMAT_FMASK64_S16_F4;
3987b8e80941Smrg				break;
3988b8e80941Smrg			case FMASK(16,8):
3989b8e80941Smrg				data_format = V_008F14_IMG_DATA_FORMAT_FMASK64_S16_F8;
3990b8e80941Smrg				break;
3991b8e80941Smrg			default:
3992b8e80941Smrg				unreachable("invalid nr_samples");
3993848b8605Smrg			}
3994b8e80941Smrg			num_format = V_008F14_IMG_NUM_FORMAT_UINT;
3995b8e80941Smrg		}
3996b8e80941Smrg#undef FMASK
3997b8e80941Smrg
3998b8e80941Smrg		fmask_state[0] = (va >> 8) | tex->surface.fmask_tile_swizzle;
3999b8e80941Smrg		fmask_state[1] = S_008F14_BASE_ADDRESS_HI(va >> 40) |
4000b8e80941Smrg				 S_008F14_DATA_FORMAT_GFX6(data_format) |
4001b8e80941Smrg				 S_008F14_NUM_FORMAT_GFX6(num_format);
4002b8e80941Smrg		fmask_state[2] = S_008F18_WIDTH(width - 1) |
4003b8e80941Smrg				 S_008F18_HEIGHT(height - 1);
4004b8e80941Smrg		fmask_state[3] = S_008F1C_DST_SEL_X(V_008F1C_SQ_SEL_X) |
4005b8e80941Smrg				 S_008F1C_DST_SEL_Y(V_008F1C_SQ_SEL_X) |
4006b8e80941Smrg				 S_008F1C_DST_SEL_Z(V_008F1C_SQ_SEL_X) |
4007b8e80941Smrg				 S_008F1C_DST_SEL_W(V_008F1C_SQ_SEL_X) |
4008b8e80941Smrg				 S_008F1C_TYPE(si_tex_dim(screen, tex, target, 0));
4009b8e80941Smrg		fmask_state[4] = 0;
4010b8e80941Smrg		fmask_state[5] = S_008F24_BASE_ARRAY(first_layer);
4011b8e80941Smrg		fmask_state[6] = 0;
4012b8e80941Smrg		fmask_state[7] = 0;
4013b8e80941Smrg
4014b8e80941Smrg		if (screen->info.chip_class >= GFX9) {
4015b8e80941Smrg			fmask_state[3] |= S_008F1C_SW_MODE(tex->surface.u.gfx9.fmask.swizzle_mode);
4016b8e80941Smrg			fmask_state[4] |= S_008F20_DEPTH(last_layer) |
4017b8e80941Smrg					  S_008F20_PITCH_GFX9(tex->surface.u.gfx9.fmask.epitch);
4018b8e80941Smrg			fmask_state[5] |= S_008F24_META_PIPE_ALIGNED(tex->surface.u.gfx9.cmask.pipe_aligned) |
4019b8e80941Smrg					  S_008F24_META_RB_ALIGNED(tex->surface.u.gfx9.cmask.rb_aligned);
4020b8e80941Smrg		} else {
4021b8e80941Smrg			fmask_state[3] |= S_008F1C_TILING_INDEX(tex->surface.u.legacy.fmask.tiling_index);
4022b8e80941Smrg			fmask_state[4] |= S_008F20_DEPTH(depth - 1) |
4023b8e80941Smrg					  S_008F20_PITCH_GFX6(tex->surface.u.legacy.fmask.pitch_in_pixels - 1);
4024b8e80941Smrg			fmask_state[5] |= S_008F24_LAST_ARRAY(last_layer);
4025848b8605Smrg		}
4026848b8605Smrg	}
4027b8e80941Smrg}
4028b8e80941Smrg
4029b8e80941Smrg/**
4030b8e80941Smrg * Create a sampler view.
4031b8e80941Smrg *
4032b8e80941Smrg * @param ctx		context
4033b8e80941Smrg * @param texture	texture
4034b8e80941Smrg * @param state		sampler view template
4035b8e80941Smrg * @param width0	width0 override (for compressed textures as int)
4036b8e80941Smrg * @param height0	height0 override (for compressed textures as int)
4037b8e80941Smrg * @param force_level   set the base address to the level (for compressed textures)
4038b8e80941Smrg */
4039b8e80941Smrgstruct pipe_sampler_view *
4040b8e80941Smrgsi_create_sampler_view_custom(struct pipe_context *ctx,
4041b8e80941Smrg			      struct pipe_resource *texture,
4042b8e80941Smrg			      const struct pipe_sampler_view *state,
4043b8e80941Smrg			      unsigned width0, unsigned height0,
4044b8e80941Smrg			      unsigned force_level)
4045b8e80941Smrg{
4046b8e80941Smrg	struct si_context *sctx = (struct si_context*)ctx;
4047b8e80941Smrg	struct si_sampler_view *view = CALLOC_STRUCT(si_sampler_view);
4048b8e80941Smrg	struct si_texture *tex = (struct si_texture*)texture;
4049b8e80941Smrg	unsigned base_level, first_level, last_level;
4050b8e80941Smrg	unsigned char state_swizzle[4];
4051b8e80941Smrg	unsigned height, depth, width;
4052b8e80941Smrg	unsigned last_layer = state->u.tex.last_layer;
4053b8e80941Smrg	enum pipe_format pipe_format;
4054b8e80941Smrg	const struct legacy_surf_level *surflevel;
4055b8e80941Smrg
4056b8e80941Smrg	if (!view)
4057b8e80941Smrg		return NULL;
4058b8e80941Smrg
4059b8e80941Smrg	/* initialize base object */
4060b8e80941Smrg	view->base = *state;
4061b8e80941Smrg	view->base.texture = NULL;
4062b8e80941Smrg	view->base.reference.count = 1;
4063b8e80941Smrg	view->base.context = ctx;
4064b8e80941Smrg
4065b8e80941Smrg	assert(texture);
4066b8e80941Smrg	pipe_resource_reference(&view->base.texture, texture);
4067b8e80941Smrg
4068b8e80941Smrg	if (state->format == PIPE_FORMAT_X24S8_UINT ||
4069b8e80941Smrg	    state->format == PIPE_FORMAT_S8X24_UINT ||
4070b8e80941Smrg	    state->format == PIPE_FORMAT_X32_S8X24_UINT ||
4071b8e80941Smrg	    state->format == PIPE_FORMAT_S8_UINT)
4072b8e80941Smrg		view->is_stencil_sampler = true;
4073b8e80941Smrg
4074b8e80941Smrg	/* Buffer resource. */
4075b8e80941Smrg	if (texture->target == PIPE_BUFFER) {
4076b8e80941Smrg		si_make_buffer_descriptor(sctx->screen,
4077b8e80941Smrg					  si_resource(texture),
4078b8e80941Smrg					  state->format,
4079b8e80941Smrg					  state->u.buf.offset,
4080b8e80941Smrg					  state->u.buf.size,
4081b8e80941Smrg					  view->state);
4082b8e80941Smrg		return &view->base;
4083b8e80941Smrg	}
4084b8e80941Smrg
4085b8e80941Smrg	state_swizzle[0] = state->swizzle_r;
4086b8e80941Smrg	state_swizzle[1] = state->swizzle_g;
4087b8e80941Smrg	state_swizzle[2] = state->swizzle_b;
4088b8e80941Smrg	state_swizzle[3] = state->swizzle_a;
4089848b8605Smrg
4090b8e80941Smrg	base_level = 0;
4091b8e80941Smrg	first_level = state->u.tex.first_level;
4092b8e80941Smrg	last_level = state->u.tex.last_level;
4093b8e80941Smrg	width = width0;
4094b8e80941Smrg	height = height0;
4095b8e80941Smrg	depth = texture->depth0;
4096b8e80941Smrg
4097b8e80941Smrg	if (sctx->chip_class <= VI && force_level) {
4098b8e80941Smrg		assert(force_level == first_level &&
4099b8e80941Smrg		       force_level == last_level);
4100b8e80941Smrg		base_level = force_level;
4101b8e80941Smrg		first_level = 0;
4102b8e80941Smrg		last_level = 0;
4103b8e80941Smrg		width = u_minify(width, force_level);
4104b8e80941Smrg		height = u_minify(height, force_level);
4105b8e80941Smrg		depth = u_minify(depth, force_level);
4106848b8605Smrg	}
4107848b8605Smrg
4108b8e80941Smrg	/* This is not needed if state trackers set last_layer correctly. */
4109b8e80941Smrg	if (state->target == PIPE_TEXTURE_1D ||
4110b8e80941Smrg	    state->target == PIPE_TEXTURE_2D ||
4111b8e80941Smrg	    state->target == PIPE_TEXTURE_RECT ||
4112b8e80941Smrg	    state->target == PIPE_TEXTURE_CUBE)
4113b8e80941Smrg		last_layer = state->u.tex.first_layer;
4114848b8605Smrg
4115b8e80941Smrg	/* Texturing with separate depth and stencil. */
4116b8e80941Smrg	pipe_format = state->format;
4117b8e80941Smrg
4118b8e80941Smrg	/* Depth/stencil texturing sometimes needs separate texture. */
4119b8e80941Smrg	if (tex->is_depth && !si_can_sample_zs(tex, view->is_stencil_sampler)) {
4120b8e80941Smrg		if (!tex->flushed_depth_texture &&
4121b8e80941Smrg		    !si_init_flushed_depth_texture(ctx, texture, NULL)) {
4122b8e80941Smrg			pipe_resource_reference(&view->base.texture, NULL);
4123b8e80941Smrg			FREE(view);
4124b8e80941Smrg			return NULL;
4125b8e80941Smrg		}
4126848b8605Smrg
4127b8e80941Smrg		assert(tex->flushed_depth_texture);
4128848b8605Smrg
4129b8e80941Smrg		/* Override format for the case where the flushed texture
4130b8e80941Smrg		 * contains only Z or only S.
4131b8e80941Smrg		 */
4132b8e80941Smrg		if (tex->flushed_depth_texture->buffer.b.b.format != tex->buffer.b.b.format)
4133b8e80941Smrg			pipe_format = tex->flushed_depth_texture->buffer.b.b.format;
4134848b8605Smrg
4135b8e80941Smrg		tex = tex->flushed_depth_texture;
4136b8e80941Smrg	}
4137b8e80941Smrg
4138b8e80941Smrg	surflevel = tex->surface.u.legacy.level;
4139b8e80941Smrg
4140b8e80941Smrg	if (tex->db_compatible) {
4141b8e80941Smrg		if (!view->is_stencil_sampler)
4142b8e80941Smrg			pipe_format = tex->db_render_format;
4143b8e80941Smrg
4144b8e80941Smrg		switch (pipe_format) {
4145b8e80941Smrg		case PIPE_FORMAT_Z32_FLOAT_S8X24_UINT:
4146b8e80941Smrg			pipe_format = PIPE_FORMAT_Z32_FLOAT;
4147848b8605Smrg			break;
4148b8e80941Smrg		case PIPE_FORMAT_X8Z24_UNORM:
4149b8e80941Smrg		case PIPE_FORMAT_S8_UINT_Z24_UNORM:
4150b8e80941Smrg			/* Z24 is always stored like this for DB
4151b8e80941Smrg			 * compatibility.
4152b8e80941Smrg			 */
4153b8e80941Smrg			pipe_format = PIPE_FORMAT_Z24X8_UNORM;
4154848b8605Smrg			break;
4155b8e80941Smrg		case PIPE_FORMAT_X24S8_UINT:
4156b8e80941Smrg		case PIPE_FORMAT_S8X24_UINT:
4157b8e80941Smrg		case PIPE_FORMAT_X32_S8X24_UINT:
4158b8e80941Smrg			pipe_format = PIPE_FORMAT_S8_UINT;
4159b8e80941Smrg			surflevel = tex->surface.u.legacy.stencil_level;
4160848b8605Smrg			break;
4161b8e80941Smrg		default:;
4162b8e80941Smrg		}
4163848b8605Smrg	}
4164848b8605Smrg
4165b8e80941Smrg	view->dcc_incompatible =
4166b8e80941Smrg		vi_dcc_formats_are_incompatible(texture,
4167b8e80941Smrg						state->u.tex.first_level,
4168b8e80941Smrg						state->format);
4169b8e80941Smrg
4170b8e80941Smrg	si_make_texture_descriptor(sctx->screen, tex, true,
4171b8e80941Smrg				   state->target, pipe_format, state_swizzle,
4172b8e80941Smrg				   first_level, last_level,
4173b8e80941Smrg				   state->u.tex.first_layer, last_layer,
4174b8e80941Smrg				   width, height, depth,
4175b8e80941Smrg				   view->state, view->fmask_state);
4176b8e80941Smrg
4177b8e80941Smrg	unsigned num_format = G_008F14_NUM_FORMAT_GFX6(view->state[1]);
4178b8e80941Smrg	view->is_integer =
4179b8e80941Smrg		num_format == V_008F14_IMG_NUM_FORMAT_USCALED ||
4180b8e80941Smrg		num_format == V_008F14_IMG_NUM_FORMAT_SSCALED ||
4181b8e80941Smrg		num_format == V_008F14_IMG_NUM_FORMAT_UINT ||
4182b8e80941Smrg		num_format == V_008F14_IMG_NUM_FORMAT_SINT;
4183b8e80941Smrg	view->base_level_info = &surflevel[base_level];
4184b8e80941Smrg	view->base_level = base_level;
4185b8e80941Smrg	view->block_width = util_format_get_blockwidth(pipe_format);
4186848b8605Smrg	return &view->base;
4187848b8605Smrg}
4188848b8605Smrg
4189b8e80941Smrgstatic struct pipe_sampler_view *
4190b8e80941Smrgsi_create_sampler_view(struct pipe_context *ctx,
4191b8e80941Smrg		       struct pipe_resource *texture,
4192b8e80941Smrg		       const struct pipe_sampler_view *state)
4193b8e80941Smrg{
4194b8e80941Smrg	return si_create_sampler_view_custom(ctx, texture, state,
4195b8e80941Smrg					     texture ? texture->width0 : 0,
4196b8e80941Smrg					     texture ? texture->height0 : 0, 0);
4197b8e80941Smrg}
4198b8e80941Smrg
4199848b8605Smrgstatic void si_sampler_view_destroy(struct pipe_context *ctx,
4200848b8605Smrg				    struct pipe_sampler_view *state)
4201848b8605Smrg{
4202b8e80941Smrg	struct si_sampler_view *view = (struct si_sampler_view *)state;
4203848b8605Smrg
4204848b8605Smrg	pipe_resource_reference(&state->texture, NULL);
4205848b8605Smrg	FREE(view);
4206848b8605Smrg}
4207848b8605Smrg
4208848b8605Smrgstatic bool wrap_mode_uses_border_color(unsigned wrap, bool linear_filter)
4209848b8605Smrg{
4210848b8605Smrg	return wrap == PIPE_TEX_WRAP_CLAMP_TO_BORDER ||
4211848b8605Smrg	       wrap == PIPE_TEX_WRAP_MIRROR_CLAMP_TO_BORDER ||
4212848b8605Smrg	       (linear_filter &&
4213848b8605Smrg	        (wrap == PIPE_TEX_WRAP_CLAMP ||
4214848b8605Smrg		 wrap == PIPE_TEX_WRAP_MIRROR_CLAMP));
4215848b8605Smrg}
4216848b8605Smrg
4217b8e80941Smrgstatic uint32_t si_translate_border_color(struct si_context *sctx,
4218b8e80941Smrg					  const struct pipe_sampler_state *state,
4219b8e80941Smrg					  const union pipe_color_union *color,
4220b8e80941Smrg					  bool is_integer)
4221848b8605Smrg{
4222848b8605Smrg	bool linear_filter = state->min_img_filter != PIPE_TEX_FILTER_NEAREST ||
4223848b8605Smrg			     state->mag_img_filter != PIPE_TEX_FILTER_NEAREST;
4224848b8605Smrg
4225b8e80941Smrg	if (!wrap_mode_uses_border_color(state->wrap_s, linear_filter) &&
4226b8e80941Smrg	    !wrap_mode_uses_border_color(state->wrap_t, linear_filter) &&
4227b8e80941Smrg	    !wrap_mode_uses_border_color(state->wrap_r, linear_filter))
4228b8e80941Smrg		return S_008F3C_BORDER_COLOR_TYPE(V_008F3C_SQ_TEX_BORDER_COLOR_TRANS_BLACK);
4229b8e80941Smrg
4230b8e80941Smrg#define simple_border_types(elt) \
4231b8e80941Smrgdo { \
4232b8e80941Smrg	if (color->elt[0] == 0 && color->elt[1] == 0 &&                         \
4233b8e80941Smrg	    color->elt[2] == 0 && color->elt[3] == 0)                           \
4234b8e80941Smrg		return S_008F3C_BORDER_COLOR_TYPE(V_008F3C_SQ_TEX_BORDER_COLOR_TRANS_BLACK); \
4235b8e80941Smrg	if (color->elt[0] == 0 && color->elt[1] == 0 &&                         \
4236b8e80941Smrg	    color->elt[2] == 0 && color->elt[3] == 1)                           \
4237b8e80941Smrg		return S_008F3C_BORDER_COLOR_TYPE(V_008F3C_SQ_TEX_BORDER_COLOR_OPAQUE_BLACK); \
4238b8e80941Smrg	if (color->elt[0] == 1 && color->elt[1] == 1 &&                         \
4239b8e80941Smrg	    color->elt[2] == 1 && color->elt[3] == 1)                           \
4240b8e80941Smrg		return S_008F3C_BORDER_COLOR_TYPE(V_008F3C_SQ_TEX_BORDER_COLOR_OPAQUE_WHITE); \
4241b8e80941Smrg} while (false)
4242b8e80941Smrg
4243b8e80941Smrg	if (is_integer)
4244b8e80941Smrg		simple_border_types(ui);
4245b8e80941Smrg	else
4246b8e80941Smrg		simple_border_types(f);
4247b8e80941Smrg
4248b8e80941Smrg#undef simple_border_types
4249b8e80941Smrg
4250b8e80941Smrg	int i;
4251b8e80941Smrg
4252b8e80941Smrg	/* Check if the border has been uploaded already. */
4253b8e80941Smrg	for (i = 0; i < sctx->border_color_count; i++)
4254b8e80941Smrg		if (memcmp(&sctx->border_color_table[i], color,
4255b8e80941Smrg			   sizeof(*color)) == 0)
4256b8e80941Smrg			break;
4257b8e80941Smrg
4258b8e80941Smrg	if (i >= SI_MAX_BORDER_COLORS) {
4259b8e80941Smrg		/* Getting 4096 unique border colors is very unlikely. */
4260b8e80941Smrg		fprintf(stderr, "radeonsi: The border color table is full. "
4261b8e80941Smrg			"Any new border colors will be just black. "
4262b8e80941Smrg			"Please file a bug.\n");
4263b8e80941Smrg		return S_008F3C_BORDER_COLOR_TYPE(V_008F3C_SQ_TEX_BORDER_COLOR_TRANS_BLACK);
4264b8e80941Smrg	}
4265b8e80941Smrg
4266b8e80941Smrg	if (i == sctx->border_color_count) {
4267b8e80941Smrg		/* Upload a new border color. */
4268b8e80941Smrg		memcpy(&sctx->border_color_table[i], color,
4269b8e80941Smrg		       sizeof(*color));
4270b8e80941Smrg		util_memcpy_cpu_to_le32(&sctx->border_color_map[i],
4271b8e80941Smrg					color, sizeof(*color));
4272b8e80941Smrg		sctx->border_color_count++;
4273b8e80941Smrg	}
4274b8e80941Smrg
4275b8e80941Smrg	return S_008F3C_BORDER_COLOR_PTR(i) |
4276b8e80941Smrg	       S_008F3C_BORDER_COLOR_TYPE(V_008F3C_SQ_TEX_BORDER_COLOR_REGISTER);
4277b8e80941Smrg}
4278b8e80941Smrg
4279b8e80941Smrgstatic inline int S_FIXED(float value, unsigned frac_bits)
4280b8e80941Smrg{
4281b8e80941Smrg	return value * (1 << frac_bits);
4282b8e80941Smrg}
4283b8e80941Smrg
4284b8e80941Smrgstatic inline unsigned si_tex_filter(unsigned filter, unsigned max_aniso)
4285b8e80941Smrg{
4286b8e80941Smrg	if (filter == PIPE_TEX_FILTER_LINEAR)
4287b8e80941Smrg		return max_aniso > 1 ? V_008F38_SQ_TEX_XY_FILTER_ANISO_BILINEAR
4288b8e80941Smrg				     : V_008F38_SQ_TEX_XY_FILTER_BILINEAR;
4289b8e80941Smrg	else
4290b8e80941Smrg		return max_aniso > 1 ? V_008F38_SQ_TEX_XY_FILTER_ANISO_POINT
4291b8e80941Smrg				     : V_008F38_SQ_TEX_XY_FILTER_POINT;
4292b8e80941Smrg}
4293b8e80941Smrg
4294b8e80941Smrgstatic inline unsigned si_tex_aniso_filter(unsigned filter)
4295b8e80941Smrg{
4296b8e80941Smrg	if (filter < 2)
4297b8e80941Smrg		return 0;
4298b8e80941Smrg	if (filter < 4)
4299b8e80941Smrg		return 1;
4300b8e80941Smrg	if (filter < 8)
4301b8e80941Smrg		return 2;
4302b8e80941Smrg	if (filter < 16)
4303b8e80941Smrg		return 3;
4304b8e80941Smrg	return 4;
4305848b8605Smrg}
4306848b8605Smrg
4307848b8605Smrgstatic void *si_create_sampler_state(struct pipe_context *ctx,
4308848b8605Smrg				     const struct pipe_sampler_state *state)
4309848b8605Smrg{
4310b8e80941Smrg	struct si_context *sctx = (struct si_context *)ctx;
4311b8e80941Smrg	struct si_screen *sscreen = sctx->screen;
4312b8e80941Smrg	struct si_sampler_state *rstate = CALLOC_STRUCT(si_sampler_state);
4313b8e80941Smrg	unsigned max_aniso = sscreen->force_aniso >= 0 ? sscreen->force_aniso
4314b8e80941Smrg						       : state->max_anisotropy;
4315b8e80941Smrg	unsigned max_aniso_ratio = si_tex_aniso_filter(max_aniso);
4316b8e80941Smrg	union pipe_color_union clamped_border_color;
4317848b8605Smrg
4318b8e80941Smrg	if (!rstate) {
4319848b8605Smrg		return NULL;
4320848b8605Smrg	}
4321848b8605Smrg
4322b8e80941Smrg#ifdef DEBUG
4323b8e80941Smrg	rstate->magic = SI_SAMPLER_STATE_MAGIC;
4324b8e80941Smrg#endif
4325848b8605Smrg	rstate->val[0] = (S_008F30_CLAMP_X(si_tex_wrap(state->wrap_s)) |
4326848b8605Smrg			  S_008F30_CLAMP_Y(si_tex_wrap(state->wrap_t)) |
4327848b8605Smrg			  S_008F30_CLAMP_Z(si_tex_wrap(state->wrap_r)) |
4328b8e80941Smrg			  S_008F30_MAX_ANISO_RATIO(max_aniso_ratio) |
4329848b8605Smrg			  S_008F30_DEPTH_COMPARE_FUNC(si_tex_compare(state->compare_func)) |
4330848b8605Smrg			  S_008F30_FORCE_UNNORMALIZED(!state->normalized_coords) |
4331b8e80941Smrg			  S_008F30_ANISO_THRESHOLD(max_aniso_ratio >> 1) |
4332b8e80941Smrg			  S_008F30_ANISO_BIAS(max_aniso_ratio) |
4333b8e80941Smrg			  S_008F30_DISABLE_CUBE_WRAP(!state->seamless_cube_map) |
4334b8e80941Smrg			  S_008F30_COMPAT_MODE(sctx->chip_class >= VI));
4335848b8605Smrg	rstate->val[1] = (S_008F34_MIN_LOD(S_FIXED(CLAMP(state->min_lod, 0, 15), 8)) |
4336b8e80941Smrg			  S_008F34_MAX_LOD(S_FIXED(CLAMP(state->max_lod, 0, 15), 8)) |
4337b8e80941Smrg			  S_008F34_PERF_MIP(max_aniso_ratio ? max_aniso_ratio + 6 : 0));
4338848b8605Smrg	rstate->val[2] = (S_008F38_LOD_BIAS(S_FIXED(CLAMP(state->lod_bias, -16, 16), 8)) |
4339b8e80941Smrg			  S_008F38_XY_MAG_FILTER(si_tex_filter(state->mag_img_filter, max_aniso)) |
4340b8e80941Smrg			  S_008F38_XY_MIN_FILTER(si_tex_filter(state->min_img_filter, max_aniso)) |
4341b8e80941Smrg			  S_008F38_MIP_FILTER(si_tex_mipfilter(state->min_mip_filter)) |
4342b8e80941Smrg			  S_008F38_MIP_POINT_PRECLAMP(0) |
4343b8e80941Smrg			  S_008F38_DISABLE_LSB_CEIL(sctx->chip_class <= VI) |
4344b8e80941Smrg			  S_008F38_FILTER_PREC_FIX(1) |
4345b8e80941Smrg			  S_008F38_ANISO_OVERRIDE(sctx->chip_class >= VI));
4346b8e80941Smrg	rstate->val[3] = si_translate_border_color(sctx, state, &state->border_color, false);
4347b8e80941Smrg
4348b8e80941Smrg	/* Create sampler resource for integer textures. */
4349b8e80941Smrg	memcpy(rstate->integer_val, rstate->val, sizeof(rstate->val));
4350b8e80941Smrg	rstate->integer_val[3] = si_translate_border_color(sctx, state, &state->border_color, true);
4351b8e80941Smrg
4352b8e80941Smrg	/* Create sampler resource for upgraded depth textures. */
4353b8e80941Smrg	memcpy(rstate->upgraded_depth_val, rstate->val, sizeof(rstate->val));
4354b8e80941Smrg
4355b8e80941Smrg	for (unsigned i = 0; i < 4; ++i) {
4356b8e80941Smrg		/* Use channel 0 on purpose, so that we can use OPAQUE_WHITE
4357b8e80941Smrg		 * when the border color is 1.0. */
4358b8e80941Smrg		clamped_border_color.f[i] = CLAMP(state->border_color.f[0], 0, 1);
4359848b8605Smrg	}
4360848b8605Smrg
4361b8e80941Smrg	if (memcmp(&state->border_color, &clamped_border_color, sizeof(clamped_border_color)) == 0)
4362b8e80941Smrg		rstate->upgraded_depth_val[3] |= S_008F3C_UPGRADED_DEPTH(1);
4363b8e80941Smrg	else
4364b8e80941Smrg		rstate->upgraded_depth_val[3] =
4365b8e80941Smrg			si_translate_border_color(sctx, state, &clamped_border_color, false) |
4366b8e80941Smrg			S_008F3C_UPGRADED_DEPTH(1);
4367848b8605Smrg
4368b8e80941Smrg	return rstate;
4369848b8605Smrg}
4370848b8605Smrg
4371b8e80941Smrgstatic void si_set_sample_mask(struct pipe_context *ctx, unsigned sample_mask)
4372848b8605Smrg{
4373848b8605Smrg	struct si_context *sctx = (struct si_context *)ctx;
4374848b8605Smrg
4375b8e80941Smrg	if (sctx->sample_mask == (uint16_t)sample_mask)
4376848b8605Smrg		return;
4377848b8605Smrg
4378b8e80941Smrg	sctx->sample_mask = sample_mask;
4379b8e80941Smrg	si_mark_atom_dirty(sctx, &sctx->atoms.s.sample_mask);
4380848b8605Smrg}
4381848b8605Smrg
4382b8e80941Smrgstatic void si_emit_sample_mask(struct si_context *sctx)
4383848b8605Smrg{
4384b8e80941Smrg	struct radeon_cmdbuf *cs = sctx->gfx_cs;
4385b8e80941Smrg	unsigned mask = sctx->sample_mask;
4386b8e80941Smrg
4387b8e80941Smrg	/* Needed for line and polygon smoothing as well as for the Polaris
4388b8e80941Smrg	 * small primitive filter. We expect the state tracker to take care of
4389b8e80941Smrg	 * this for us.
4390b8e80941Smrg	 */
4391b8e80941Smrg	assert(mask == 0xffff || sctx->framebuffer.nr_samples > 1 ||
4392b8e80941Smrg	       (mask & 1 && sctx->blitter->running));
4393b8e80941Smrg
4394b8e80941Smrg	radeon_set_context_reg_seq(cs, R_028C38_PA_SC_AA_MASK_X0Y0_X1Y0, 2);
4395b8e80941Smrg	radeon_emit(cs, mask | (mask << 16));
4396b8e80941Smrg	radeon_emit(cs, mask | (mask << 16));
4397848b8605Smrg}
4398848b8605Smrg
4399848b8605Smrgstatic void si_delete_sampler_state(struct pipe_context *ctx, void *state)
4400848b8605Smrg{
4401b8e80941Smrg#ifdef DEBUG
4402b8e80941Smrg	struct si_sampler_state *s = state;
4403b8e80941Smrg
4404b8e80941Smrg	assert(s->magic == SI_SAMPLER_STATE_MAGIC);
4405b8e80941Smrg	s->magic = 0;
4406b8e80941Smrg#endif
4407848b8605Smrg	free(state);
4408848b8605Smrg}
4409848b8605Smrg
4410848b8605Smrg/*
4411848b8605Smrg * Vertex elements & buffers
4412848b8605Smrg */
4413848b8605Smrg
4414b8e80941Smrgstruct util_fast_udiv_info32 {
4415b8e80941Smrg   unsigned multiplier; /* the "magic number" multiplier */
4416b8e80941Smrg   unsigned pre_shift; /* shift for the dividend before multiplying */
4417b8e80941Smrg   unsigned post_shift; /* shift for the dividend after multiplying */
4418b8e80941Smrg   int increment; /* 0 or 1; if set then increment the numerator, using one of
4419b8e80941Smrg                     the two strategies */
4420b8e80941Smrg};
4421b8e80941Smrg
4422b8e80941Smrgstatic struct util_fast_udiv_info32
4423b8e80941Smrgutil_compute_fast_udiv_info32(uint32_t D, unsigned num_bits)
4424b8e80941Smrg{
4425b8e80941Smrg	struct util_fast_udiv_info info =
4426b8e80941Smrg		util_compute_fast_udiv_info(D, num_bits, 32);
4427b8e80941Smrg
4428b8e80941Smrg	struct util_fast_udiv_info32 result = {
4429b8e80941Smrg		info.multiplier,
4430b8e80941Smrg		info.pre_shift,
4431b8e80941Smrg		info.post_shift,
4432b8e80941Smrg		info.increment,
4433b8e80941Smrg	};
4434b8e80941Smrg	return result;
4435b8e80941Smrg}
4436b8e80941Smrg
4437848b8605Smrgstatic void *si_create_vertex_elements(struct pipe_context *ctx,
4438848b8605Smrg				       unsigned count,
4439848b8605Smrg				       const struct pipe_vertex_element *elements)
4440848b8605Smrg{
4441b8e80941Smrg	struct si_screen *sscreen = (struct si_screen*)ctx->screen;
4442b8e80941Smrg	struct si_vertex_elements *v = CALLOC_STRUCT(si_vertex_elements);
4443b8e80941Smrg	bool used[SI_NUM_VERTEX_BUFFERS] = {};
4444b8e80941Smrg	struct util_fast_udiv_info32 divisor_factors[SI_MAX_ATTRIBS] = {};
4445b8e80941Smrg	STATIC_ASSERT(sizeof(struct util_fast_udiv_info32) == 16);
4446b8e80941Smrg	STATIC_ASSERT(sizeof(divisor_factors[0].multiplier) == 4);
4447b8e80941Smrg	STATIC_ASSERT(sizeof(divisor_factors[0].pre_shift) == 4);
4448b8e80941Smrg	STATIC_ASSERT(sizeof(divisor_factors[0].post_shift) == 4);
4449b8e80941Smrg	STATIC_ASSERT(sizeof(divisor_factors[0].increment) == 4);
4450848b8605Smrg	int i;
4451848b8605Smrg
4452b8e80941Smrg	assert(count <= SI_MAX_ATTRIBS);
4453848b8605Smrg	if (!v)
4454848b8605Smrg		return NULL;
4455848b8605Smrg
4456848b8605Smrg	v->count = count;
4457b8e80941Smrg	v->desc_list_byte_size = align(count * 16, SI_CPDMA_ALIGNMENT);
4458b8e80941Smrg
4459848b8605Smrg	for (i = 0; i < count; ++i) {
4460848b8605Smrg		const struct util_format_description *desc;
4461b8e80941Smrg		const struct util_format_channel_description *channel;
4462848b8605Smrg		unsigned data_format, num_format;
4463848b8605Smrg		int first_non_void;
4464b8e80941Smrg		unsigned vbo_index = elements[i].vertex_buffer_index;
4465b8e80941Smrg		unsigned char swizzle[4];
4466b8e80941Smrg
4467b8e80941Smrg		if (vbo_index >= SI_NUM_VERTEX_BUFFERS) {
4468b8e80941Smrg			FREE(v);
4469b8e80941Smrg			return NULL;
4470b8e80941Smrg		}
4471b8e80941Smrg
4472b8e80941Smrg		unsigned instance_divisor = elements[i].instance_divisor;
4473b8e80941Smrg		if (instance_divisor) {
4474b8e80941Smrg			v->uses_instance_divisors = true;
4475b8e80941Smrg
4476b8e80941Smrg			if (instance_divisor == 1) {
4477b8e80941Smrg				v->instance_divisor_is_one |= 1u << i;
4478b8e80941Smrg			} else {
4479b8e80941Smrg				v->instance_divisor_is_fetched |= 1u << i;
4480b8e80941Smrg				divisor_factors[i] =
4481b8e80941Smrg					util_compute_fast_udiv_info32(instance_divisor, 32);
4482b8e80941Smrg			}
4483b8e80941Smrg		}
4484b8e80941Smrg
4485b8e80941Smrg		if (!used[vbo_index]) {
4486b8e80941Smrg			v->first_vb_use_mask |= 1 << i;
4487b8e80941Smrg			used[vbo_index] = true;
4488b8e80941Smrg		}
4489848b8605Smrg
4490848b8605Smrg		desc = util_format_description(elements[i].src_format);
4491848b8605Smrg		first_non_void = util_format_get_first_non_void_channel(elements[i].src_format);
4492848b8605Smrg		data_format = si_translate_buffer_dataformat(ctx->screen, desc, first_non_void);
4493848b8605Smrg		num_format = si_translate_buffer_numformat(ctx->screen, desc, first_non_void);
4494b8e80941Smrg		channel = first_non_void >= 0 ? &desc->channel[first_non_void] : NULL;
4495b8e80941Smrg		memcpy(swizzle, desc->swizzle, sizeof(swizzle));
4496b8e80941Smrg
4497b8e80941Smrg		v->format_size[i] = desc->block.bits / 8;
4498b8e80941Smrg		v->src_offset[i] = elements[i].src_offset;
4499b8e80941Smrg		v->vertex_buffer_index[i] = vbo_index;
4500b8e80941Smrg
4501b8e80941Smrg		/* The hardware always treats the 2-bit alpha channel as
4502b8e80941Smrg		 * unsigned, so a shader workaround is needed. The affected
4503b8e80941Smrg		 * chips are VI and older except Stoney (GFX8.1).
4504b8e80941Smrg		 */
4505b8e80941Smrg		if (data_format == V_008F0C_BUF_DATA_FORMAT_2_10_10_10 &&
4506b8e80941Smrg		    sscreen->info.chip_class <= VI &&
4507b8e80941Smrg		    sscreen->info.family != CHIP_STONEY) {
4508b8e80941Smrg			if (num_format == V_008F0C_BUF_NUM_FORMAT_SNORM) {
4509b8e80941Smrg				v->fix_fetch[i] = SI_FIX_FETCH_A2_SNORM;
4510b8e80941Smrg			} else if (num_format == V_008F0C_BUF_NUM_FORMAT_SSCALED) {
4511b8e80941Smrg				v->fix_fetch[i] = SI_FIX_FETCH_A2_SSCALED;
4512b8e80941Smrg			} else if (num_format == V_008F0C_BUF_NUM_FORMAT_SINT) {
4513b8e80941Smrg				/* This isn't actually used in OpenGL. */
4514b8e80941Smrg				v->fix_fetch[i] = SI_FIX_FETCH_A2_SINT;
4515b8e80941Smrg			}
4516b8e80941Smrg		} else if (channel && channel->type == UTIL_FORMAT_TYPE_FIXED) {
4517b8e80941Smrg			if (desc->swizzle[3] == PIPE_SWIZZLE_1)
4518b8e80941Smrg				v->fix_fetch[i] = SI_FIX_FETCH_RGBX_32_FIXED;
4519b8e80941Smrg			else
4520b8e80941Smrg				v->fix_fetch[i] = SI_FIX_FETCH_RGBA_32_FIXED;
4521b8e80941Smrg		} else if (channel && channel->size == 32 && !channel->pure_integer) {
4522b8e80941Smrg			if (channel->type == UTIL_FORMAT_TYPE_SIGNED) {
4523b8e80941Smrg				if (channel->normalized) {
4524b8e80941Smrg					if (desc->swizzle[3] == PIPE_SWIZZLE_1)
4525b8e80941Smrg						v->fix_fetch[i] = SI_FIX_FETCH_RGBX_32_SNORM;
4526b8e80941Smrg					else
4527b8e80941Smrg						v->fix_fetch[i] = SI_FIX_FETCH_RGBA_32_SNORM;
4528b8e80941Smrg				} else {
4529b8e80941Smrg					v->fix_fetch[i] = SI_FIX_FETCH_RGBA_32_SSCALED;
4530b8e80941Smrg				}
4531b8e80941Smrg			} else if (channel->type == UTIL_FORMAT_TYPE_UNSIGNED) {
4532b8e80941Smrg				if (channel->normalized) {
4533b8e80941Smrg					if (desc->swizzle[3] == PIPE_SWIZZLE_1)
4534b8e80941Smrg						v->fix_fetch[i] = SI_FIX_FETCH_RGBX_32_UNORM;
4535b8e80941Smrg					else
4536b8e80941Smrg						v->fix_fetch[i] = SI_FIX_FETCH_RGBA_32_UNORM;
4537b8e80941Smrg				} else {
4538b8e80941Smrg					v->fix_fetch[i] = SI_FIX_FETCH_RGBA_32_USCALED;
4539b8e80941Smrg				}
4540b8e80941Smrg			}
4541b8e80941Smrg		} else if (channel && channel->size == 64 &&
4542b8e80941Smrg			   channel->type == UTIL_FORMAT_TYPE_FLOAT) {
4543b8e80941Smrg			switch (desc->nr_channels) {
4544b8e80941Smrg			case 1:
4545b8e80941Smrg			case 2:
4546b8e80941Smrg				v->fix_fetch[i] = SI_FIX_FETCH_RG_64_FLOAT;
4547b8e80941Smrg				swizzle[0] = PIPE_SWIZZLE_X;
4548b8e80941Smrg				swizzle[1] = PIPE_SWIZZLE_Y;
4549b8e80941Smrg				swizzle[2] = desc->nr_channels == 2 ? PIPE_SWIZZLE_Z : PIPE_SWIZZLE_0;
4550b8e80941Smrg				swizzle[3] = desc->nr_channels == 2 ? PIPE_SWIZZLE_W : PIPE_SWIZZLE_0;
4551b8e80941Smrg				break;
4552b8e80941Smrg			case 3:
4553b8e80941Smrg				v->fix_fetch[i] = SI_FIX_FETCH_RGB_64_FLOAT;
4554b8e80941Smrg				swizzle[0] = PIPE_SWIZZLE_X; /* 3 loads */
4555b8e80941Smrg				swizzle[1] = PIPE_SWIZZLE_Y;
4556b8e80941Smrg				swizzle[2] = PIPE_SWIZZLE_0;
4557b8e80941Smrg				swizzle[3] = PIPE_SWIZZLE_0;
4558b8e80941Smrg				break;
4559b8e80941Smrg			case 4:
4560b8e80941Smrg				v->fix_fetch[i] = SI_FIX_FETCH_RGBA_64_FLOAT;
4561b8e80941Smrg				swizzle[0] = PIPE_SWIZZLE_X; /* 2 loads */
4562b8e80941Smrg				swizzle[1] = PIPE_SWIZZLE_Y;
4563b8e80941Smrg				swizzle[2] = PIPE_SWIZZLE_Z;
4564b8e80941Smrg				swizzle[3] = PIPE_SWIZZLE_W;
4565b8e80941Smrg				break;
4566b8e80941Smrg			default:
4567b8e80941Smrg				assert(0);
4568b8e80941Smrg			}
4569b8e80941Smrg		} else if (channel && desc->nr_channels == 3) {
4570b8e80941Smrg			assert(desc->swizzle[0] == PIPE_SWIZZLE_X);
4571b8e80941Smrg
4572b8e80941Smrg			if (channel->size == 8) {
4573b8e80941Smrg				if (channel->pure_integer)
4574b8e80941Smrg					v->fix_fetch[i] = SI_FIX_FETCH_RGB_8_INT;
4575b8e80941Smrg				else
4576b8e80941Smrg					v->fix_fetch[i] = SI_FIX_FETCH_RGB_8;
4577b8e80941Smrg			} else if (channel->size == 16) {
4578b8e80941Smrg				if (channel->pure_integer)
4579b8e80941Smrg					v->fix_fetch[i] = SI_FIX_FETCH_RGB_16_INT;
4580b8e80941Smrg				else
4581b8e80941Smrg					v->fix_fetch[i] = SI_FIX_FETCH_RGB_16;
4582b8e80941Smrg			}
4583b8e80941Smrg		}
4584848b8605Smrg
4585b8e80941Smrg		v->rsrc_word3[i] = S_008F0C_DST_SEL_X(si_map_swizzle(swizzle[0])) |
4586b8e80941Smrg				   S_008F0C_DST_SEL_Y(si_map_swizzle(swizzle[1])) |
4587b8e80941Smrg				   S_008F0C_DST_SEL_Z(si_map_swizzle(swizzle[2])) |
4588b8e80941Smrg				   S_008F0C_DST_SEL_W(si_map_swizzle(swizzle[3])) |
4589848b8605Smrg				   S_008F0C_NUM_FORMAT(num_format) |
4590848b8605Smrg				   S_008F0C_DATA_FORMAT(data_format);
4591848b8605Smrg	}
4592848b8605Smrg
4593b8e80941Smrg	if (v->instance_divisor_is_fetched) {
4594b8e80941Smrg		unsigned num_divisors = util_last_bit(v->instance_divisor_is_fetched);
4595b8e80941Smrg
4596b8e80941Smrg		v->instance_divisor_factor_buffer =
4597b8e80941Smrg			(struct si_resource*)
4598b8e80941Smrg			pipe_buffer_create(&sscreen->b, 0, PIPE_USAGE_DEFAULT,
4599b8e80941Smrg					   num_divisors * sizeof(divisor_factors[0]));
4600b8e80941Smrg		if (!v->instance_divisor_factor_buffer) {
4601b8e80941Smrg			FREE(v);
4602b8e80941Smrg			return NULL;
4603b8e80941Smrg		}
4604b8e80941Smrg		void *map = sscreen->ws->buffer_map(v->instance_divisor_factor_buffer->buf,
4605b8e80941Smrg						    NULL, PIPE_TRANSFER_WRITE);
4606b8e80941Smrg		memcpy(map , divisor_factors, num_divisors * sizeof(divisor_factors[0]));
4607b8e80941Smrg	}
4608848b8605Smrg	return v;
4609848b8605Smrg}
4610848b8605Smrg
4611848b8605Smrgstatic void si_bind_vertex_elements(struct pipe_context *ctx, void *state)
4612848b8605Smrg{
4613848b8605Smrg	struct si_context *sctx = (struct si_context *)ctx;
4614b8e80941Smrg	struct si_vertex_elements *old = sctx->vertex_elements;
4615b8e80941Smrg	struct si_vertex_elements *v = (struct si_vertex_elements*)state;
4616848b8605Smrg
4617848b8605Smrg	sctx->vertex_elements = v;
4618848b8605Smrg	sctx->vertex_buffers_dirty = true;
4619b8e80941Smrg
4620b8e80941Smrg	if (v &&
4621b8e80941Smrg	    (!old ||
4622b8e80941Smrg	     old->count != v->count ||
4623b8e80941Smrg	     old->uses_instance_divisors != v->uses_instance_divisors ||
4624b8e80941Smrg	     v->uses_instance_divisors || /* we don't check which divisors changed */
4625b8e80941Smrg	     memcmp(old->fix_fetch, v->fix_fetch, sizeof(v->fix_fetch[0]) * v->count)))
4626b8e80941Smrg		sctx->do_update_shaders = true;
4627b8e80941Smrg
4628b8e80941Smrg	if (v && v->instance_divisor_is_fetched) {
4629b8e80941Smrg		struct pipe_constant_buffer cb;
4630b8e80941Smrg
4631b8e80941Smrg		cb.buffer = &v->instance_divisor_factor_buffer->b.b;
4632b8e80941Smrg		cb.user_buffer = NULL;
4633b8e80941Smrg		cb.buffer_offset = 0;
4634b8e80941Smrg		cb.buffer_size = 0xffffffff;
4635b8e80941Smrg		si_set_rw_buffer(sctx, SI_VS_CONST_INSTANCE_DIVISORS, &cb);
4636b8e80941Smrg	}
4637848b8605Smrg}
4638848b8605Smrg
4639848b8605Smrgstatic void si_delete_vertex_element(struct pipe_context *ctx, void *state)
4640848b8605Smrg{
4641848b8605Smrg	struct si_context *sctx = (struct si_context *)ctx;
4642b8e80941Smrg	struct si_vertex_elements *v = (struct si_vertex_elements*)state;
4643848b8605Smrg
4644848b8605Smrg	if (sctx->vertex_elements == state)
4645848b8605Smrg		sctx->vertex_elements = NULL;
4646b8e80941Smrg	si_resource_reference(&v->instance_divisor_factor_buffer, NULL);
4647848b8605Smrg	FREE(state);
4648848b8605Smrg}
4649848b8605Smrg
4650848b8605Smrgstatic void si_set_vertex_buffers(struct pipe_context *ctx,
4651848b8605Smrg				  unsigned start_slot, unsigned count,
4652848b8605Smrg				  const struct pipe_vertex_buffer *buffers)
4653848b8605Smrg{
4654848b8605Smrg	struct si_context *sctx = (struct si_context *)ctx;
4655848b8605Smrg	struct pipe_vertex_buffer *dst = sctx->vertex_buffer + start_slot;
4656848b8605Smrg	int i;
4657848b8605Smrg
4658b8e80941Smrg	assert(start_slot + count <= ARRAY_SIZE(sctx->vertex_buffer));
4659848b8605Smrg
4660848b8605Smrg	if (buffers) {
4661848b8605Smrg		for (i = 0; i < count; i++) {
4662848b8605Smrg			const struct pipe_vertex_buffer *src = buffers + i;
4663848b8605Smrg			struct pipe_vertex_buffer *dsti = dst + i;
4664b8e80941Smrg			struct pipe_resource *buf = src->buffer.resource;
4665848b8605Smrg
4666b8e80941Smrg			pipe_resource_reference(&dsti->buffer.resource, buf);
4667848b8605Smrg			dsti->buffer_offset = src->buffer_offset;
4668848b8605Smrg			dsti->stride = src->stride;
4669b8e80941Smrg			si_context_add_resource_size(sctx, buf);
4670b8e80941Smrg			if (buf)
4671b8e80941Smrg				si_resource(buf)->bind_history |= PIPE_BIND_VERTEX_BUFFER;
4672848b8605Smrg		}
4673848b8605Smrg	} else {
4674848b8605Smrg		for (i = 0; i < count; i++) {
4675b8e80941Smrg			pipe_resource_reference(&dst[i].buffer.resource, NULL);
4676848b8605Smrg		}
4677848b8605Smrg	}
4678848b8605Smrg	sctx->vertex_buffers_dirty = true;
4679848b8605Smrg}
4680848b8605Smrg
4681b8e80941Smrg/*
4682b8e80941Smrg * Misc
4683b8e80941Smrg */
4684b8e80941Smrg
4685b8e80941Smrgstatic void si_set_tess_state(struct pipe_context *ctx,
4686b8e80941Smrg			      const float default_outer_level[4],
4687b8e80941Smrg			      const float default_inner_level[2])
4688848b8605Smrg{
4689848b8605Smrg	struct si_context *sctx = (struct si_context *)ctx;
4690b8e80941Smrg	struct pipe_constant_buffer cb;
4691b8e80941Smrg	float array[8];
4692848b8605Smrg
4693b8e80941Smrg	memcpy(array, default_outer_level, sizeof(float) * 4);
4694b8e80941Smrg	memcpy(array+4, default_inner_level, sizeof(float) * 2);
4695b8e80941Smrg
4696b8e80941Smrg	cb.buffer = NULL;
4697b8e80941Smrg	cb.user_buffer = NULL;
4698b8e80941Smrg	cb.buffer_size = sizeof(array);
4699b8e80941Smrg
4700b8e80941Smrg	si_upload_const_buffer(sctx, (struct si_resource**)&cb.buffer,
4701b8e80941Smrg			       (void*)array, sizeof(array),
4702b8e80941Smrg			       &cb.buffer_offset);
4703b8e80941Smrg
4704b8e80941Smrg	si_set_rw_buffer(sctx, SI_HS_CONST_DEFAULT_TESS_LEVELS, &cb);
4705b8e80941Smrg	pipe_resource_reference(&cb.buffer, NULL);
4706848b8605Smrg}
4707848b8605Smrg
4708b8e80941Smrgstatic void si_texture_barrier(struct pipe_context *ctx, unsigned flags)
4709848b8605Smrg{
4710b8e80941Smrg	struct si_context *sctx = (struct si_context *)ctx;
4711b8e80941Smrg
4712b8e80941Smrg	si_update_fb_dirtiness_after_rendering(sctx);
4713b8e80941Smrg
4714b8e80941Smrg	/* Multisample surfaces are flushed in si_decompress_textures. */
4715b8e80941Smrg	if (sctx->framebuffer.uncompressed_cb_mask) {
4716b8e80941Smrg		si_make_CB_shader_coherent(sctx, sctx->framebuffer.nr_samples,
4717b8e80941Smrg					   sctx->framebuffer.CB_has_shader_readable_metadata,
4718b8e80941Smrg					   sctx->framebuffer.all_DCC_pipe_aligned);
4719b8e80941Smrg	}
4720848b8605Smrg}
4721848b8605Smrg
4722b8e80941Smrg/* This only ensures coherency for shader image/buffer stores. */
4723b8e80941Smrgstatic void si_memory_barrier(struct pipe_context *ctx, unsigned flags)
4724848b8605Smrg{
4725848b8605Smrg	struct si_context *sctx = (struct si_context *)ctx;
4726848b8605Smrg
4727b8e80941Smrg	if (!(flags & ~PIPE_BARRIER_UPDATE))
4728b8e80941Smrg		return;
4729b8e80941Smrg
4730b8e80941Smrg	/* Subsequent commands must wait for all shader invocations to
4731b8e80941Smrg	 * complete. */
4732b8e80941Smrg	sctx->flags |= SI_CONTEXT_PS_PARTIAL_FLUSH |
4733b8e80941Smrg	                 SI_CONTEXT_CS_PARTIAL_FLUSH;
4734b8e80941Smrg
4735b8e80941Smrg	if (flags & PIPE_BARRIER_CONSTANT_BUFFER)
4736b8e80941Smrg		sctx->flags |= SI_CONTEXT_INV_SMEM_L1 |
4737b8e80941Smrg				 SI_CONTEXT_INV_VMEM_L1;
4738b8e80941Smrg
4739b8e80941Smrg	if (flags & (PIPE_BARRIER_VERTEX_BUFFER |
4740b8e80941Smrg		     PIPE_BARRIER_SHADER_BUFFER |
4741b8e80941Smrg		     PIPE_BARRIER_TEXTURE |
4742b8e80941Smrg		     PIPE_BARRIER_IMAGE |
4743b8e80941Smrg		     PIPE_BARRIER_STREAMOUT_BUFFER |
4744b8e80941Smrg		     PIPE_BARRIER_GLOBAL_BUFFER)) {
4745b8e80941Smrg		/* As far as I can tell, L1 contents are written back to L2
4746b8e80941Smrg		 * automatically at end of shader, but the contents of other
4747b8e80941Smrg		 * L1 caches might still be stale. */
4748b8e80941Smrg		sctx->flags |= SI_CONTEXT_INV_VMEM_L1;
4749b8e80941Smrg	}
4750b8e80941Smrg
4751b8e80941Smrg	if (flags & PIPE_BARRIER_INDEX_BUFFER) {
4752b8e80941Smrg		/* Indices are read through TC L2 since VI.
4753b8e80941Smrg		 * L1 isn't used.
4754b8e80941Smrg		 */
4755b8e80941Smrg		if (sctx->screen->info.chip_class <= CIK)
4756b8e80941Smrg			sctx->flags |= SI_CONTEXT_WRITEBACK_GLOBAL_L2;
4757b8e80941Smrg	}
4758b8e80941Smrg
4759b8e80941Smrg	/* MSAA color, any depth and any stencil are flushed in
4760b8e80941Smrg	 * si_decompress_textures when needed.
4761b8e80941Smrg	 */
4762b8e80941Smrg	if (flags & PIPE_BARRIER_FRAMEBUFFER &&
4763b8e80941Smrg	    sctx->framebuffer.uncompressed_cb_mask) {
4764b8e80941Smrg		sctx->flags |= SI_CONTEXT_FLUSH_AND_INV_CB;
4765b8e80941Smrg
4766b8e80941Smrg		if (sctx->chip_class <= VI)
4767b8e80941Smrg			sctx->flags |= SI_CONTEXT_WRITEBACK_GLOBAL_L2;
4768b8e80941Smrg	}
4769b8e80941Smrg
4770b8e80941Smrg	/* Indirect buffers use TC L2 on GFX9, but not older hw. */
4771b8e80941Smrg	if (sctx->screen->info.chip_class <= VI &&
4772b8e80941Smrg	    flags & PIPE_BARRIER_INDIRECT_BUFFER)
4773b8e80941Smrg		sctx->flags |= SI_CONTEXT_WRITEBACK_GLOBAL_L2;
4774848b8605Smrg}
4775848b8605Smrg
4776848b8605Smrgstatic void *si_create_blend_custom(struct si_context *sctx, unsigned mode)
4777848b8605Smrg{
4778848b8605Smrg	struct pipe_blend_state blend;
4779848b8605Smrg
4780848b8605Smrg	memset(&blend, 0, sizeof(blend));
4781848b8605Smrg	blend.independent_blend_enable = true;
4782848b8605Smrg	blend.rt[0].colormask = 0xf;
4783b8e80941Smrg	return si_create_blend_state_mode(&sctx->b, &blend, mode);
4784848b8605Smrg}
4785848b8605Smrg
4786b8e80941Smrgstatic void si_init_config(struct si_context *sctx);
4787848b8605Smrg
4788b8e80941Smrgvoid si_init_state_compute_functions(struct si_context *sctx)
4789848b8605Smrg{
4790b8e80941Smrg	sctx->b.create_sampler_state = si_create_sampler_state;
4791b8e80941Smrg	sctx->b.delete_sampler_state = si_delete_sampler_state;
4792b8e80941Smrg	sctx->b.create_sampler_view = si_create_sampler_view;
4793b8e80941Smrg	sctx->b.sampler_view_destroy = si_sampler_view_destroy;
4794b8e80941Smrg	sctx->b.memory_barrier = si_memory_barrier;
4795848b8605Smrg}
4796848b8605Smrg
4797848b8605Smrgvoid si_init_state_functions(struct si_context *sctx)
4798848b8605Smrg{
4799b8e80941Smrg	sctx->atoms.s.framebuffer.emit = si_emit_framebuffer_state;
4800b8e80941Smrg	sctx->atoms.s.msaa_sample_locs.emit = si_emit_msaa_sample_locs;
4801b8e80941Smrg	sctx->atoms.s.db_render_state.emit = si_emit_db_render_state;
4802b8e80941Smrg	sctx->atoms.s.dpbb_state.emit = si_emit_dpbb_state;
4803b8e80941Smrg	sctx->atoms.s.msaa_config.emit = si_emit_msaa_config;
4804b8e80941Smrg	sctx->atoms.s.sample_mask.emit = si_emit_sample_mask;
4805b8e80941Smrg	sctx->atoms.s.cb_render_state.emit = si_emit_cb_render_state;
4806b8e80941Smrg	sctx->atoms.s.blend_color.emit = si_emit_blend_color;
4807b8e80941Smrg	sctx->atoms.s.clip_regs.emit = si_emit_clip_regs;
4808b8e80941Smrg	sctx->atoms.s.clip_state.emit = si_emit_clip_state;
4809b8e80941Smrg	sctx->atoms.s.stencil_ref.emit = si_emit_stencil_ref;
4810b8e80941Smrg
4811b8e80941Smrg	sctx->b.create_blend_state = si_create_blend_state;
4812b8e80941Smrg	sctx->b.bind_blend_state = si_bind_blend_state;
4813b8e80941Smrg	sctx->b.delete_blend_state = si_delete_blend_state;
4814b8e80941Smrg	sctx->b.set_blend_color = si_set_blend_color;
4815b8e80941Smrg
4816b8e80941Smrg	sctx->b.create_rasterizer_state = si_create_rs_state;
4817b8e80941Smrg	sctx->b.bind_rasterizer_state = si_bind_rs_state;
4818b8e80941Smrg	sctx->b.delete_rasterizer_state = si_delete_rs_state;
4819b8e80941Smrg
4820b8e80941Smrg	sctx->b.create_depth_stencil_alpha_state = si_create_dsa_state;
4821b8e80941Smrg	sctx->b.bind_depth_stencil_alpha_state = si_bind_dsa_state;
4822b8e80941Smrg	sctx->b.delete_depth_stencil_alpha_state = si_delete_dsa_state;
4823b8e80941Smrg
4824b8e80941Smrg	sctx->custom_dsa_flush = si_create_db_flush_dsa(sctx);
4825b8e80941Smrg	sctx->custom_blend_resolve = si_create_blend_custom(sctx, V_028808_CB_RESOLVE);
4826b8e80941Smrg	sctx->custom_blend_fmask_decompress = si_create_blend_custom(sctx, V_028808_CB_FMASK_DECOMPRESS);
4827b8e80941Smrg	sctx->custom_blend_eliminate_fastclear = si_create_blend_custom(sctx, V_028808_CB_ELIMINATE_FAST_CLEAR);
4828b8e80941Smrg	sctx->custom_blend_dcc_decompress = si_create_blend_custom(sctx, V_028808_CB_DCC_DECOMPRESS);
4829848b8605Smrg
4830b8e80941Smrg	sctx->b.set_clip_state = si_set_clip_state;
4831b8e80941Smrg	sctx->b.set_stencil_ref = si_set_stencil_ref;
4832848b8605Smrg
4833b8e80941Smrg	sctx->b.set_framebuffer_state = si_set_framebuffer_state;
4834848b8605Smrg
4835b8e80941Smrg	sctx->b.set_sample_mask = si_set_sample_mask;
4836848b8605Smrg
4837b8e80941Smrg	sctx->b.create_vertex_elements_state = si_create_vertex_elements;
4838b8e80941Smrg	sctx->b.bind_vertex_elements_state = si_bind_vertex_elements;
4839b8e80941Smrg	sctx->b.delete_vertex_elements_state = si_delete_vertex_element;
4840b8e80941Smrg	sctx->b.set_vertex_buffers = si_set_vertex_buffers;
4841848b8605Smrg
4842b8e80941Smrg	sctx->b.texture_barrier = si_texture_barrier;
4843b8e80941Smrg	sctx->b.set_min_samples = si_set_min_samples;
4844b8e80941Smrg	sctx->b.set_tess_state = si_set_tess_state;
4845848b8605Smrg
4846b8e80941Smrg	sctx->b.set_active_query_state = si_set_active_query_state;
4847848b8605Smrg
4848b8e80941Smrg	si_init_config(sctx);
4849b8e80941Smrg}
4850848b8605Smrg
4851b8e80941Smrgvoid si_init_screen_state_functions(struct si_screen *sscreen)
4852b8e80941Smrg{
4853b8e80941Smrg	sscreen->b.is_format_supported = si_is_format_supported;
4854b8e80941Smrg}
4855848b8605Smrg
4856b8e80941Smrgstatic void si_set_grbm_gfx_index(struct si_context *sctx,
4857b8e80941Smrg				  struct si_pm4_state *pm4,  unsigned value)
4858b8e80941Smrg{
4859b8e80941Smrg	unsigned reg = sctx->chip_class >= CIK ? R_030800_GRBM_GFX_INDEX :
4860b8e80941Smrg						   R_00802C_GRBM_GFX_INDEX;
4861b8e80941Smrg	si_pm4_set_reg(pm4, reg, value);
4862b8e80941Smrg}
4863848b8605Smrg
4864b8e80941Smrgstatic void si_set_grbm_gfx_index_se(struct si_context *sctx,
4865b8e80941Smrg				     struct si_pm4_state *pm4, unsigned se)
4866b8e80941Smrg{
4867b8e80941Smrg	assert(se == ~0 || se < sctx->screen->info.max_se);
4868b8e80941Smrg	si_set_grbm_gfx_index(sctx, pm4,
4869b8e80941Smrg			      (se == ~0 ? S_030800_SE_BROADCAST_WRITES(1) :
4870b8e80941Smrg					  S_030800_SE_INDEX(se)) |
4871b8e80941Smrg			      S_030800_SH_BROADCAST_WRITES(1) |
4872b8e80941Smrg			      S_030800_INSTANCE_BROADCAST_WRITES(1));
4873b8e80941Smrg}
4874848b8605Smrg
4875b8e80941Smrgstatic void
4876b8e80941Smrgsi_write_harvested_raster_configs(struct si_context *sctx,
4877b8e80941Smrg				  struct si_pm4_state *pm4,
4878b8e80941Smrg				  unsigned raster_config,
4879b8e80941Smrg				  unsigned raster_config_1)
4880b8e80941Smrg{
4881b8e80941Smrg	unsigned num_se = MAX2(sctx->screen->info.max_se, 1);
4882b8e80941Smrg	unsigned raster_config_se[4];
4883b8e80941Smrg	unsigned se;
4884b8e80941Smrg
4885b8e80941Smrg	ac_get_harvested_configs(&sctx->screen->info,
4886b8e80941Smrg				 raster_config,
4887b8e80941Smrg				 &raster_config_1,
4888b8e80941Smrg				 raster_config_se);
4889b8e80941Smrg
4890b8e80941Smrg	for (se = 0; se < num_se; se++) {
4891b8e80941Smrg		si_set_grbm_gfx_index_se(sctx, pm4, se);
4892b8e80941Smrg		si_pm4_set_reg(pm4, R_028350_PA_SC_RASTER_CONFIG, raster_config_se[se]);
4893b8e80941Smrg	}
4894b8e80941Smrg	si_set_grbm_gfx_index(sctx, pm4, ~0);
4895848b8605Smrg
4896b8e80941Smrg	if (sctx->chip_class >= CIK) {
4897b8e80941Smrg		si_pm4_set_reg(pm4, R_028354_PA_SC_RASTER_CONFIG_1, raster_config_1);
4898b8e80941Smrg	}
4899b8e80941Smrg}
4900848b8605Smrg
4901b8e80941Smrgstatic void si_set_raster_config(struct si_context *sctx, struct si_pm4_state *pm4)
4902b8e80941Smrg{
4903b8e80941Smrg	struct si_screen *sscreen = sctx->screen;
4904b8e80941Smrg	unsigned num_rb = MIN2(sscreen->info.num_render_backends, 16);
4905b8e80941Smrg	unsigned rb_mask = sscreen->info.enabled_rb_mask;
4906b8e80941Smrg	unsigned raster_config = sscreen->pa_sc_raster_config;
4907b8e80941Smrg	unsigned raster_config_1 = sscreen->pa_sc_raster_config_1;
4908b8e80941Smrg
4909b8e80941Smrg	if (!rb_mask || util_bitcount(rb_mask) >= num_rb) {
4910b8e80941Smrg		/* Always use the default config when all backends are enabled
4911b8e80941Smrg		 * (or when we failed to determine the enabled backends).
4912b8e80941Smrg		 */
4913b8e80941Smrg		si_pm4_set_reg(pm4, R_028350_PA_SC_RASTER_CONFIG,
4914b8e80941Smrg			       raster_config);
4915b8e80941Smrg		if (sctx->chip_class >= CIK)
4916b8e80941Smrg			si_pm4_set_reg(pm4, R_028354_PA_SC_RASTER_CONFIG_1,
4917b8e80941Smrg				       raster_config_1);
4918b8e80941Smrg	} else {
4919b8e80941Smrg		si_write_harvested_raster_configs(sctx, pm4, raster_config, raster_config_1);
4920b8e80941Smrg	}
4921848b8605Smrg}
4922848b8605Smrg
4923b8e80941Smrgstatic void si_init_config(struct si_context *sctx)
4924848b8605Smrg{
4925b8e80941Smrg	struct si_screen *sscreen = sctx->screen;
4926b8e80941Smrg	uint64_t border_color_va = sctx->border_color_buffer->gpu_address;
4927b8e80941Smrg	bool has_clear_state = sscreen->has_clear_state;
4928b8e80941Smrg	struct si_pm4_state *pm4 = CALLOC_STRUCT(si_pm4_state);
4929b8e80941Smrg
4930b8e80941Smrg       /* SI, radeon kernel disabled CLEAR_STATE. */
4931b8e80941Smrg       assert(has_clear_state || sscreen->info.chip_class == SI ||
4932b8e80941Smrg              sscreen->info.drm_major != 3);
4933848b8605Smrg
4934b8e80941Smrg	if (!pm4)
4935848b8605Smrg		return;
4936848b8605Smrg
4937b8e80941Smrg	si_pm4_cmd_begin(pm4, PKT3_CONTEXT_CONTROL);
4938b8e80941Smrg	si_pm4_cmd_add(pm4, CONTEXT_CONTROL_LOAD_ENABLE(1));
4939b8e80941Smrg	si_pm4_cmd_add(pm4, CONTEXT_CONTROL_SHADOW_ENABLE(1));
4940b8e80941Smrg	si_pm4_cmd_end(pm4, false);
4941b8e80941Smrg
4942b8e80941Smrg	if (has_clear_state) {
4943b8e80941Smrg		si_pm4_cmd_begin(pm4, PKT3_CLEAR_STATE);
4944b8e80941Smrg		si_pm4_cmd_add(pm4, 0);
4945b8e80941Smrg		si_pm4_cmd_end(pm4, false);
4946b8e80941Smrg	}
4947b8e80941Smrg
4948b8e80941Smrg	if (sctx->chip_class <= VI)
4949b8e80941Smrg		si_set_raster_config(sctx, pm4);
4950b8e80941Smrg
4951b8e80941Smrg	si_pm4_set_reg(pm4, R_028A18_VGT_HOS_MAX_TESS_LEVEL, fui(64));
4952b8e80941Smrg	if (!has_clear_state)
4953b8e80941Smrg		si_pm4_set_reg(pm4, R_028A1C_VGT_HOS_MIN_TESS_LEVEL, fui(0));
4954848b8605Smrg
4955848b8605Smrg	/* FIXME calculate these values somehow ??? */
4956b8e80941Smrg	if (sctx->chip_class <= VI) {
4957b8e80941Smrg		si_pm4_set_reg(pm4, R_028A54_VGT_GS_PER_ES, SI_GS_PER_ES);
4958b8e80941Smrg		si_pm4_set_reg(pm4, R_028A58_VGT_ES_PER_GS, 0x40);
4959b8e80941Smrg	}
4960b8e80941Smrg
4961b8e80941Smrg	if (!has_clear_state) {
4962b8e80941Smrg		si_pm4_set_reg(pm4, R_028A5C_VGT_GS_PER_VS, 0x2);
4963b8e80941Smrg		si_pm4_set_reg(pm4, R_028A8C_VGT_PRIMITIVEID_RESET, 0x0);
4964b8e80941Smrg		si_pm4_set_reg(pm4, R_028B98_VGT_STRMOUT_BUFFER_CONFIG, 0x0);
4965b8e80941Smrg	}
4966b8e80941Smrg
4967b8e80941Smrg	si_pm4_set_reg(pm4, R_028AA0_VGT_INSTANCE_STEP_RATE_0, 1);
4968b8e80941Smrg	if (!has_clear_state)
4969b8e80941Smrg		si_pm4_set_reg(pm4, R_028AB8_VGT_VTX_CNT_EN, 0x0);
4970b8e80941Smrg	if (sctx->chip_class < CIK)
4971848b8605Smrg		si_pm4_set_reg(pm4, R_008A14_PA_CL_ENHANCE, S_008A14_NUM_CLIP_SEQ(3) |
4972848b8605Smrg			       S_008A14_CLIP_VTX_REORDER_ENA(1));
4973848b8605Smrg
4974b8e80941Smrg	/* CLEAR_STATE doesn't clear these correctly on certain generations.
4975b8e80941Smrg	 * I don't know why. Deduced by trial and error.
4976b8e80941Smrg	 */
4977b8e80941Smrg	if (sctx->chip_class <= CIK) {
4978b8e80941Smrg		si_pm4_set_reg(pm4, R_028B28_VGT_STRMOUT_DRAW_OPAQUE_OFFSET, 0);
4979b8e80941Smrg		si_pm4_set_reg(pm4, R_028204_PA_SC_WINDOW_SCISSOR_TL, S_028204_WINDOW_OFFSET_DISABLE(1));
4980b8e80941Smrg		si_pm4_set_reg(pm4, R_028240_PA_SC_GENERIC_SCISSOR_TL, S_028240_WINDOW_OFFSET_DISABLE(1));
4981b8e80941Smrg		si_pm4_set_reg(pm4, R_028244_PA_SC_GENERIC_SCISSOR_BR,
4982b8e80941Smrg			       S_028244_BR_X(16384) | S_028244_BR_Y(16384));
4983b8e80941Smrg		si_pm4_set_reg(pm4, R_028030_PA_SC_SCREEN_SCISSOR_TL, 0);
4984b8e80941Smrg		si_pm4_set_reg(pm4, R_028034_PA_SC_SCREEN_SCISSOR_BR,
4985b8e80941Smrg			       S_028034_BR_X(16384) | S_028034_BR_Y(16384));
4986b8e80941Smrg	}
4987848b8605Smrg
4988b8e80941Smrg	if (!has_clear_state) {
4989b8e80941Smrg		si_pm4_set_reg(pm4, R_028230_PA_SC_EDGERULE,
4990b8e80941Smrg			       S_028230_ER_TRI(0xA) |
4991b8e80941Smrg			       S_028230_ER_POINT(0xA) |
4992b8e80941Smrg			       S_028230_ER_RECT(0xA) |
4993b8e80941Smrg			       /* Required by DX10_DIAMOND_TEST_ENA: */
4994b8e80941Smrg			       S_028230_ER_LINE_LR(0x1A) |
4995b8e80941Smrg			       S_028230_ER_LINE_RL(0x26) |
4996b8e80941Smrg			       S_028230_ER_LINE_TB(0xA) |
4997b8e80941Smrg			       S_028230_ER_LINE_BT(0xA));
4998b8e80941Smrg		si_pm4_set_reg(pm4, R_028820_PA_CL_NANINF_CNTL, 0);
4999b8e80941Smrg		si_pm4_set_reg(pm4, R_028AC0_DB_SRESULTS_COMPARE_STATE0, 0x0);
5000b8e80941Smrg		si_pm4_set_reg(pm4, R_028AC4_DB_SRESULTS_COMPARE_STATE1, 0x0);
5001b8e80941Smrg		si_pm4_set_reg(pm4, R_028AC8_DB_PRELOAD_CONTROL, 0x0);
5002b8e80941Smrg		si_pm4_set_reg(pm4, R_02800C_DB_RENDER_OVERRIDE, 0);
5003b8e80941Smrg	}
5004848b8605Smrg
5005b8e80941Smrg	if (sctx->chip_class >= GFX9) {
5006b8e80941Smrg		si_pm4_set_reg(pm4, R_030920_VGT_MAX_VTX_INDX, ~0);
5007b8e80941Smrg		si_pm4_set_reg(pm4, R_030924_VGT_MIN_VTX_INDX, 0);
5008b8e80941Smrg		si_pm4_set_reg(pm4, R_030928_VGT_INDX_OFFSET, 0);
5009848b8605Smrg	} else {
5010b8e80941Smrg		/* These registers, when written, also overwrite the CLEAR_STATE
5011b8e80941Smrg		 * context, so we can't rely on CLEAR_STATE setting them.
5012b8e80941Smrg		 * It would be an issue if there was another UMD changing them.
5013b8e80941Smrg		 */
5014b8e80941Smrg		si_pm4_set_reg(pm4, R_028400_VGT_MAX_VTX_INDX, ~0);
5015b8e80941Smrg		si_pm4_set_reg(pm4, R_028404_VGT_MIN_VTX_INDX, 0);
5016b8e80941Smrg		si_pm4_set_reg(pm4, R_028408_VGT_INDX_OFFSET, 0);
5017b8e80941Smrg	}
5018b8e80941Smrg
5019b8e80941Smrg	if (sctx->chip_class >= CIK) {
5020b8e80941Smrg		if (sctx->chip_class >= GFX9) {
5021b8e80941Smrg			si_pm4_set_reg(pm4, R_00B41C_SPI_SHADER_PGM_RSRC3_HS,
5022b8e80941Smrg				       S_00B41C_CU_EN(0xffff) | S_00B41C_WAVE_LIMIT(0x3F));
5023b8e80941Smrg		} else {
5024b8e80941Smrg			si_pm4_set_reg(pm4, R_00B51C_SPI_SHADER_PGM_RSRC3_LS,
5025b8e80941Smrg				       S_00B51C_CU_EN(0xffff) | S_00B51C_WAVE_LIMIT(0x3F));
5026b8e80941Smrg			si_pm4_set_reg(pm4, R_00B41C_SPI_SHADER_PGM_RSRC3_HS,
5027b8e80941Smrg				       S_00B41C_WAVE_LIMIT(0x3F));
5028b8e80941Smrg			si_pm4_set_reg(pm4, R_00B31C_SPI_SHADER_PGM_RSRC3_ES,
5029b8e80941Smrg				       S_00B31C_CU_EN(0xffff) | S_00B31C_WAVE_LIMIT(0x3F));
5030b8e80941Smrg
5031b8e80941Smrg			/* If this is 0, Bonaire can hang even if GS isn't being used.
5032b8e80941Smrg			 * Other chips are unaffected. These are suboptimal values,
5033b8e80941Smrg			 * but we don't use on-chip GS.
5034b8e80941Smrg			 */
5035b8e80941Smrg			si_pm4_set_reg(pm4, R_028A44_VGT_GS_ONCHIP_CNTL,
5036b8e80941Smrg				       S_028A44_ES_VERTS_PER_SUBGRP(64) |
5037b8e80941Smrg				       S_028A44_GS_PRIMS_PER_SUBGRP(4));
5038b8e80941Smrg		}
5039b8e80941Smrg		si_pm4_set_reg(pm4, R_00B21C_SPI_SHADER_PGM_RSRC3_GS,
5040b8e80941Smrg			       S_00B21C_CU_EN(0xffff) | S_00B21C_WAVE_LIMIT(0x3F));
5041b8e80941Smrg
5042b8e80941Smrg		/* Compute LATE_ALLOC_VS.LIMIT. */
5043b8e80941Smrg		unsigned num_cu_per_sh = sscreen->info.num_good_cu_per_sh;
5044b8e80941Smrg		unsigned late_alloc_limit; /* The limit is per SH. */
5045b8e80941Smrg
5046b8e80941Smrg		if (sctx->family == CHIP_KABINI) {
5047b8e80941Smrg			late_alloc_limit = 0; /* Potential hang on Kabini. */
5048b8e80941Smrg		} else if (num_cu_per_sh <= 4) {
5049b8e80941Smrg			/* Too few available compute units per SH. Disallowing
5050b8e80941Smrg			 * VS to run on one CU could hurt us more than late VS
5051b8e80941Smrg			 * allocation would help.
5052b8e80941Smrg			 *
5053b8e80941Smrg			 * 2 is the highest safe number that allows us to keep
5054b8e80941Smrg			 * all CUs enabled.
5055b8e80941Smrg			 */
5056b8e80941Smrg			late_alloc_limit = 2;
5057b8e80941Smrg		} else {
5058b8e80941Smrg			/* This is a good initial value, allowing 1 late_alloc
5059b8e80941Smrg			 * wave per SIMD on num_cu - 2.
5060b8e80941Smrg			 */
5061b8e80941Smrg			late_alloc_limit = (num_cu_per_sh - 2) * 4;
5062b8e80941Smrg
5063b8e80941Smrg			/* The limit is 0-based, so 0 means 1. */
5064b8e80941Smrg			assert(late_alloc_limit > 0 && late_alloc_limit <= 64);
5065b8e80941Smrg			late_alloc_limit -= 1;
5066b8e80941Smrg		}
5067b8e80941Smrg
5068b8e80941Smrg		/* VS can't execute on one CU if the limit is > 2. */
5069b8e80941Smrg		si_pm4_set_reg(pm4, R_00B118_SPI_SHADER_PGM_RSRC3_VS,
5070b8e80941Smrg			       S_00B118_CU_EN(late_alloc_limit > 2 ? 0xfffe : 0xffff) |
5071b8e80941Smrg			       S_00B118_WAVE_LIMIT(0x3F));
5072b8e80941Smrg		si_pm4_set_reg(pm4, R_00B11C_SPI_SHADER_LATE_ALLOC_VS,
5073b8e80941Smrg			       S_00B11C_LIMIT(late_alloc_limit));
5074b8e80941Smrg		si_pm4_set_reg(pm4, R_00B01C_SPI_SHADER_PGM_RSRC3_PS,
5075b8e80941Smrg			       S_00B01C_CU_EN(0xffff) | S_00B01C_WAVE_LIMIT(0x3F));
5076b8e80941Smrg	}
5077b8e80941Smrg
5078b8e80941Smrg	if (sctx->chip_class >= VI) {
5079b8e80941Smrg		unsigned vgt_tess_distribution;
5080b8e80941Smrg
5081b8e80941Smrg		vgt_tess_distribution =
5082b8e80941Smrg			S_028B50_ACCUM_ISOLINE(32) |
5083b8e80941Smrg			S_028B50_ACCUM_TRI(11) |
5084b8e80941Smrg			S_028B50_ACCUM_QUAD(11) |
5085b8e80941Smrg			S_028B50_DONUT_SPLIT(16);
5086b8e80941Smrg
5087b8e80941Smrg		/* Testing with Unigine Heaven extreme tesselation yielded best results
5088b8e80941Smrg		 * with TRAP_SPLIT = 3.
5089b8e80941Smrg		 */
5090b8e80941Smrg		if (sctx->family == CHIP_FIJI ||
5091b8e80941Smrg		    sctx->family >= CHIP_POLARIS10)
5092b8e80941Smrg			vgt_tess_distribution |= S_028B50_TRAP_SPLIT(3);
5093b8e80941Smrg
5094b8e80941Smrg		si_pm4_set_reg(pm4, R_028B50_VGT_TESS_DISTRIBUTION, vgt_tess_distribution);
5095b8e80941Smrg	} else if (!has_clear_state) {
5096b8e80941Smrg		si_pm4_set_reg(pm4, R_028C58_VGT_VERTEX_REUSE_BLOCK_CNTL, 14);
5097b8e80941Smrg		si_pm4_set_reg(pm4, R_028C5C_VGT_OUT_DEALLOC_CNTL, 16);
5098b8e80941Smrg	}
5099b8e80941Smrg
5100b8e80941Smrg	si_pm4_set_reg(pm4, R_028080_TA_BC_BASE_ADDR, border_color_va >> 8);
5101b8e80941Smrg	if (sctx->chip_class >= CIK) {
5102b8e80941Smrg		si_pm4_set_reg(pm4, R_028084_TA_BC_BASE_ADDR_HI,
5103b8e80941Smrg			       S_028084_ADDRESS(border_color_va >> 40));
5104b8e80941Smrg	}
5105b8e80941Smrg	si_pm4_add_bo(pm4, sctx->border_color_buffer, RADEON_USAGE_READ,
5106b8e80941Smrg		      RADEON_PRIO_BORDER_COLORS);
5107b8e80941Smrg
5108b8e80941Smrg	if (sctx->chip_class >= GFX9) {
5109b8e80941Smrg		unsigned num_se = sscreen->info.max_se;
5110b8e80941Smrg		unsigned pc_lines = 0;
5111b8e80941Smrg
5112b8e80941Smrg		switch (sctx->family) {
5113b8e80941Smrg		case CHIP_VEGA10:
5114b8e80941Smrg		case CHIP_VEGA12:
5115b8e80941Smrg		case CHIP_VEGA20:
5116b8e80941Smrg			pc_lines = 4096;
5117848b8605Smrg			break;
5118b8e80941Smrg		case CHIP_RAVEN:
5119b8e80941Smrg		case CHIP_RAVEN2:
5120b8e80941Smrg			pc_lines = 1024;
5121848b8605Smrg			break;
5122848b8605Smrg		default:
5123b8e80941Smrg			assert(0);
5124848b8605Smrg		}
5125b8e80941Smrg
5126b8e80941Smrg		si_pm4_set_reg(pm4, R_028C48_PA_SC_BINNER_CNTL_1,
5127b8e80941Smrg			       S_028C48_MAX_ALLOC_COUNT(MIN2(128, pc_lines / (4 * num_se))) |
5128b8e80941Smrg			       S_028C48_MAX_PRIM_PER_BATCH(1023));
5129b8e80941Smrg		si_pm4_set_reg(pm4, R_028C4C_PA_SC_CONSERVATIVE_RASTERIZATION_CNTL,
5130b8e80941Smrg			       S_028C4C_NULL_SQUAD_AA_MASK_ENABLE(1));
5131b8e80941Smrg		si_pm4_set_reg(pm4, R_030968_VGT_INSTANCE_BASE_ID, 0);
5132848b8605Smrg	}
5133848b8605Smrg
5134b8e80941Smrg	si_pm4_upload_indirect_buffer(sctx, pm4);
5135b8e80941Smrg	sctx->init_config = pm4;
5136848b8605Smrg}
5137