1b8e80941Smrg/*
2b8e80941Smrg * Copyright 2017 Advanced Micro Devices, Inc.
3b8e80941Smrg * All Rights Reserved.
4b8e80941Smrg *
5b8e80941Smrg * Permission is hereby granted, free of charge, to any person obtaining a
6b8e80941Smrg * copy of this software and associated documentation files (the "Software"),
7b8e80941Smrg * to deal in the Software without restriction, including without limitation
8b8e80941Smrg * on the rights to use, copy, modify, merge, publish, distribute, sub
9b8e80941Smrg * license, and/or sell copies of the Software, and to permit persons to whom
10b8e80941Smrg * the Software is furnished to do so, subject to the following conditions:
11b8e80941Smrg *
12b8e80941Smrg * The above copyright notice and this permission notice (including the next
13b8e80941Smrg * paragraph) shall be included in all copies or substantial portions of the
14b8e80941Smrg * Software.
15b8e80941Smrg *
16b8e80941Smrg * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
17b8e80941Smrg * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
18b8e80941Smrg * FITNESS FOR A PARTICULAR PURPOSE AND NON-INFRINGEMENT. IN NO EVENT SHALL
19b8e80941Smrg * THE AUTHOR(S) AND/OR THEIR SUPPLIERS BE LIABLE FOR ANY CLAIM,
20b8e80941Smrg * DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR
21b8e80941Smrg * OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE
22b8e80941Smrg * USE OR OTHER DEALINGS IN THE SOFTWARE.
23b8e80941Smrg */
24b8e80941Smrg
25b8e80941Smrg#include "si_pipe.h"
26b8e80941Smrg#include "sid.h"
27b8e80941Smrg
28b8e80941Smrg#include "util/u_format.h"
29b8e80941Smrg#include "util/u_pack_color.h"
30b8e80941Smrg#include "util/u_surface.h"
31b8e80941Smrg
32b8e80941Smrgenum {
33b8e80941Smrg	SI_CLEAR         = SI_SAVE_FRAGMENT_STATE,
34b8e80941Smrg	SI_CLEAR_SURFACE = SI_SAVE_FRAMEBUFFER | SI_SAVE_FRAGMENT_STATE,
35b8e80941Smrg};
36b8e80941Smrg
37b8e80941Smrgstatic void si_alloc_separate_cmask(struct si_screen *sscreen,
38b8e80941Smrg				    struct si_texture *tex)
39b8e80941Smrg{
40b8e80941Smrg	if (tex->cmask_buffer || !tex->surface.cmask_size)
41b8e80941Smrg                return;
42b8e80941Smrg
43b8e80941Smrg	tex->cmask_buffer =
44b8e80941Smrg		si_aligned_buffer_create(&sscreen->b,
45b8e80941Smrg					 SI_RESOURCE_FLAG_UNMAPPABLE,
46b8e80941Smrg					 PIPE_USAGE_DEFAULT,
47b8e80941Smrg					 tex->surface.cmask_size,
48b8e80941Smrg					 tex->surface.cmask_alignment);
49b8e80941Smrg	if (tex->cmask_buffer == NULL)
50b8e80941Smrg		return;
51b8e80941Smrg
52b8e80941Smrg	tex->cmask_base_address_reg = tex->cmask_buffer->gpu_address >> 8;
53b8e80941Smrg	tex->cb_color_info |= S_028C70_FAST_CLEAR(1);
54b8e80941Smrg
55b8e80941Smrg	p_atomic_inc(&sscreen->compressed_colortex_counter);
56b8e80941Smrg}
57b8e80941Smrg
58b8e80941Smrgstatic bool si_set_clear_color(struct si_texture *tex,
59b8e80941Smrg			       enum pipe_format surface_format,
60b8e80941Smrg			       const union pipe_color_union *color)
61b8e80941Smrg{
62b8e80941Smrg	union util_color uc;
63b8e80941Smrg
64b8e80941Smrg	memset(&uc, 0, sizeof(uc));
65b8e80941Smrg
66b8e80941Smrg	if (tex->surface.bpe == 16) {
67b8e80941Smrg		/* DCC fast clear only:
68b8e80941Smrg		 *   CLEAR_WORD0 = R = G = B
69b8e80941Smrg		 *   CLEAR_WORD1 = A
70b8e80941Smrg		 */
71b8e80941Smrg		assert(color->ui[0] == color->ui[1] &&
72b8e80941Smrg		       color->ui[0] == color->ui[2]);
73b8e80941Smrg		uc.ui[0] = color->ui[0];
74b8e80941Smrg		uc.ui[1] = color->ui[3];
75b8e80941Smrg	} else if (util_format_is_pure_uint(surface_format)) {
76b8e80941Smrg		util_format_write_4ui(surface_format, color->ui, 0, &uc, 0, 0, 0, 1, 1);
77b8e80941Smrg	} else if (util_format_is_pure_sint(surface_format)) {
78b8e80941Smrg		util_format_write_4i(surface_format, color->i, 0, &uc, 0, 0, 0, 1, 1);
79b8e80941Smrg	} else {
80b8e80941Smrg		util_pack_color(color->f, surface_format, &uc);
81b8e80941Smrg	}
82b8e80941Smrg
83b8e80941Smrg	if (memcmp(tex->color_clear_value, &uc, 2 * sizeof(uint32_t)) == 0)
84b8e80941Smrg		return false;
85b8e80941Smrg
86b8e80941Smrg	memcpy(tex->color_clear_value, &uc, 2 * sizeof(uint32_t));
87b8e80941Smrg	return true;
88b8e80941Smrg}
89b8e80941Smrg
90b8e80941Smrg/** Linearize and convert luminace/intensity to red. */
91b8e80941Smrgenum pipe_format si_simplify_cb_format(enum pipe_format format)
92b8e80941Smrg{
93b8e80941Smrg	format = util_format_linear(format);
94b8e80941Smrg	format = util_format_luminance_to_red(format);
95b8e80941Smrg	return util_format_intensity_to_red(format);
96b8e80941Smrg}
97b8e80941Smrg
98b8e80941Smrgbool vi_alpha_is_on_msb(enum pipe_format format)
99b8e80941Smrg{
100b8e80941Smrg	format = si_simplify_cb_format(format);
101b8e80941Smrg
102b8e80941Smrg	/* Formats with 3 channels can't have alpha. */
103b8e80941Smrg	if (util_format_description(format)->nr_channels == 3)
104b8e80941Smrg		return true; /* same as xxxA; is any value OK here? */
105b8e80941Smrg
106b8e80941Smrg	return si_translate_colorswap(format, false) <= 1;
107b8e80941Smrg}
108b8e80941Smrg
109b8e80941Smrgstatic bool vi_get_fast_clear_parameters(enum pipe_format base_format,
110b8e80941Smrg					 enum pipe_format surface_format,
111b8e80941Smrg					 const union pipe_color_union *color,
112b8e80941Smrg					 uint32_t* clear_value,
113b8e80941Smrg					 bool *eliminate_needed)
114b8e80941Smrg{
115b8e80941Smrg	/* If we want to clear without needing a fast clear eliminate step, we
116b8e80941Smrg	 * can set color and alpha independently to 0 or 1 (or 0/max for integer
117b8e80941Smrg	 * formats).
118b8e80941Smrg	 */
119b8e80941Smrg	bool values[4] = {}; /* whether to clear to 0 or 1 */
120b8e80941Smrg	bool color_value = false; /* clear color to 0 or 1 */
121b8e80941Smrg	bool alpha_value = false; /* clear alpha to 0 or 1 */
122b8e80941Smrg	int alpha_channel; /* index of the alpha component */
123b8e80941Smrg	bool has_color = false;
124b8e80941Smrg	bool has_alpha = false;
125b8e80941Smrg
126b8e80941Smrg	const struct util_format_description *desc =
127b8e80941Smrg		util_format_description(si_simplify_cb_format(surface_format));
128b8e80941Smrg
129b8e80941Smrg	/* 128-bit fast clear with different R,G,B values is unsupported. */
130b8e80941Smrg	if (desc->block.bits == 128 &&
131b8e80941Smrg	    (color->ui[0] != color->ui[1] ||
132b8e80941Smrg	     color->ui[0] != color->ui[2]))
133b8e80941Smrg		return false;
134b8e80941Smrg
135b8e80941Smrg	*eliminate_needed = true;
136b8e80941Smrg	*clear_value = DCC_CLEAR_COLOR_REG;
137b8e80941Smrg
138b8e80941Smrg	if (desc->layout != UTIL_FORMAT_LAYOUT_PLAIN)
139b8e80941Smrg		return true; /* need ELIMINATE_FAST_CLEAR */
140b8e80941Smrg
141b8e80941Smrg	bool base_alpha_is_on_msb = vi_alpha_is_on_msb(base_format);
142b8e80941Smrg	bool surf_alpha_is_on_msb = vi_alpha_is_on_msb(surface_format);
143b8e80941Smrg
144b8e80941Smrg	/* Formats with 3 channels can't have alpha. */
145b8e80941Smrg	if (desc->nr_channels == 3)
146b8e80941Smrg		alpha_channel = -1;
147b8e80941Smrg	else if (surf_alpha_is_on_msb)
148b8e80941Smrg		alpha_channel = desc->nr_channels - 1;
149b8e80941Smrg	else
150b8e80941Smrg		alpha_channel = 0;
151b8e80941Smrg
152b8e80941Smrg	for (int i = 0; i < 4; ++i) {
153b8e80941Smrg		if (desc->swizzle[i] >= PIPE_SWIZZLE_0)
154b8e80941Smrg			continue;
155b8e80941Smrg
156b8e80941Smrg		if (desc->channel[i].pure_integer &&
157b8e80941Smrg		    desc->channel[i].type == UTIL_FORMAT_TYPE_SIGNED) {
158b8e80941Smrg			/* Use the maximum value for clamping the clear color. */
159b8e80941Smrg			int max = u_bit_consecutive(0, desc->channel[i].size - 1);
160b8e80941Smrg
161b8e80941Smrg			values[i] = color->i[i] != 0;
162b8e80941Smrg			if (color->i[i] != 0 && MIN2(color->i[i], max) != max)
163b8e80941Smrg				return true; /* need ELIMINATE_FAST_CLEAR */
164b8e80941Smrg		} else if (desc->channel[i].pure_integer &&
165b8e80941Smrg			   desc->channel[i].type == UTIL_FORMAT_TYPE_UNSIGNED) {
166b8e80941Smrg			/* Use the maximum value for clamping the clear color. */
167b8e80941Smrg			unsigned max = u_bit_consecutive(0, desc->channel[i].size);
168b8e80941Smrg
169b8e80941Smrg			values[i] = color->ui[i] != 0U;
170b8e80941Smrg			if (color->ui[i] != 0U && MIN2(color->ui[i], max) != max)
171b8e80941Smrg				return true; /* need ELIMINATE_FAST_CLEAR */
172b8e80941Smrg		} else {
173b8e80941Smrg			values[i] = color->f[i] != 0.0F;
174b8e80941Smrg			if (color->f[i] != 0.0F && color->f[i] != 1.0F)
175b8e80941Smrg				return true; /* need ELIMINATE_FAST_CLEAR */
176b8e80941Smrg		}
177b8e80941Smrg
178b8e80941Smrg		if (desc->swizzle[i] == alpha_channel) {
179b8e80941Smrg			alpha_value = values[i];
180b8e80941Smrg			has_alpha = true;
181b8e80941Smrg		} else {
182b8e80941Smrg			color_value = values[i];
183b8e80941Smrg			has_color = true;
184b8e80941Smrg		}
185b8e80941Smrg	}
186b8e80941Smrg
187b8e80941Smrg	/* If alpha isn't present, make it the same as color, and vice versa. */
188b8e80941Smrg	if (!has_alpha)
189b8e80941Smrg		alpha_value = color_value;
190b8e80941Smrg	else if (!has_color)
191b8e80941Smrg		color_value = alpha_value;
192b8e80941Smrg
193b8e80941Smrg	if (color_value != alpha_value &&
194b8e80941Smrg	    base_alpha_is_on_msb != surf_alpha_is_on_msb)
195b8e80941Smrg		return true; /* require ELIMINATE_FAST_CLEAR */
196b8e80941Smrg
197b8e80941Smrg	/* Check if all color values are equal if they are present. */
198b8e80941Smrg	for (int i = 0; i < 4; ++i) {
199b8e80941Smrg		if (desc->swizzle[i] <= PIPE_SWIZZLE_W &&
200b8e80941Smrg		    desc->swizzle[i] != alpha_channel &&
201b8e80941Smrg		    values[i] != color_value)
202b8e80941Smrg			return true; /* require ELIMINATE_FAST_CLEAR */
203b8e80941Smrg	}
204b8e80941Smrg
205b8e80941Smrg	/* This doesn't need ELIMINATE_FAST_CLEAR.
206b8e80941Smrg	 * On chips predating Raven2, the DCC clear codes and the CB clear
207b8e80941Smrg	 * color registers must match.
208b8e80941Smrg	 */
209b8e80941Smrg	*eliminate_needed = false;
210b8e80941Smrg
211b8e80941Smrg	if (color_value) {
212b8e80941Smrg		if (alpha_value)
213b8e80941Smrg			*clear_value = DCC_CLEAR_COLOR_1111;
214b8e80941Smrg		else
215b8e80941Smrg			*clear_value = DCC_CLEAR_COLOR_1110;
216b8e80941Smrg	} else {
217b8e80941Smrg		if (alpha_value)
218b8e80941Smrg			*clear_value = DCC_CLEAR_COLOR_0001;
219b8e80941Smrg		else
220b8e80941Smrg			*clear_value = DCC_CLEAR_COLOR_0000;
221b8e80941Smrg	}
222b8e80941Smrg	return true;
223b8e80941Smrg}
224b8e80941Smrg
225b8e80941Smrgvoid vi_dcc_clear_level(struct si_context *sctx,
226b8e80941Smrg			struct si_texture *tex,
227b8e80941Smrg			unsigned level, unsigned clear_value)
228b8e80941Smrg{
229b8e80941Smrg	struct pipe_resource *dcc_buffer;
230b8e80941Smrg	uint64_t dcc_offset, clear_size;
231b8e80941Smrg
232b8e80941Smrg	assert(vi_dcc_enabled(tex, level));
233b8e80941Smrg
234b8e80941Smrg	if (tex->dcc_separate_buffer) {
235b8e80941Smrg		dcc_buffer = &tex->dcc_separate_buffer->b.b;
236b8e80941Smrg		dcc_offset = 0;
237b8e80941Smrg	} else {
238b8e80941Smrg		dcc_buffer = &tex->buffer.b.b;
239b8e80941Smrg		dcc_offset = tex->dcc_offset;
240b8e80941Smrg	}
241b8e80941Smrg
242b8e80941Smrg	if (sctx->chip_class >= GFX9) {
243b8e80941Smrg		/* Mipmap level clears aren't implemented. */
244b8e80941Smrg		assert(tex->buffer.b.b.last_level == 0);
245b8e80941Smrg		/* 4x and 8x MSAA needs a sophisticated compute shader for
246b8e80941Smrg		 * the clear. See AMDVLK. */
247b8e80941Smrg		assert(tex->buffer.b.b.nr_storage_samples <= 2);
248b8e80941Smrg		clear_size = tex->surface.dcc_size;
249b8e80941Smrg	} else {
250b8e80941Smrg		unsigned num_layers = util_num_layers(&tex->buffer.b.b, level);
251b8e80941Smrg
252b8e80941Smrg		/* If this is 0, fast clear isn't possible. (can occur with MSAA) */
253b8e80941Smrg		assert(tex->surface.u.legacy.level[level].dcc_fast_clear_size);
254b8e80941Smrg		/* Layered 4x and 8x MSAA DCC fast clears need to clear
255b8e80941Smrg		 * dcc_fast_clear_size bytes for each layer. A compute shader
256b8e80941Smrg		 * would be more efficient than separate per-layer clear operations.
257b8e80941Smrg		 */
258b8e80941Smrg		assert(tex->buffer.b.b.nr_storage_samples <= 2 || num_layers == 1);
259b8e80941Smrg
260b8e80941Smrg		dcc_offset += tex->surface.u.legacy.level[level].dcc_offset;
261b8e80941Smrg		clear_size = tex->surface.u.legacy.level[level].dcc_fast_clear_size *
262b8e80941Smrg			     num_layers;
263b8e80941Smrg	}
264b8e80941Smrg
265b8e80941Smrg	si_clear_buffer(sctx, dcc_buffer, dcc_offset, clear_size,
266b8e80941Smrg			&clear_value, 4, SI_COHERENCY_CB_META, false);
267b8e80941Smrg}
268b8e80941Smrg
269b8e80941Smrg/* Set the same micro tile mode as the destination of the last MSAA resolve.
270b8e80941Smrg * This allows hitting the MSAA resolve fast path, which requires that both
271b8e80941Smrg * src and dst micro tile modes match.
272b8e80941Smrg */
273b8e80941Smrgstatic void si_set_optimal_micro_tile_mode(struct si_screen *sscreen,
274b8e80941Smrg					   struct si_texture *tex)
275b8e80941Smrg{
276b8e80941Smrg	if (tex->buffer.b.is_shared ||
277b8e80941Smrg	    tex->buffer.b.b.nr_samples <= 1 ||
278b8e80941Smrg	    tex->surface.micro_tile_mode == tex->last_msaa_resolve_target_micro_mode)
279b8e80941Smrg		return;
280b8e80941Smrg
281b8e80941Smrg	assert(sscreen->info.chip_class >= GFX9 ||
282b8e80941Smrg	       tex->surface.u.legacy.level[0].mode == RADEON_SURF_MODE_2D);
283b8e80941Smrg	assert(tex->buffer.b.b.last_level == 0);
284b8e80941Smrg
285b8e80941Smrg	if (sscreen->info.chip_class >= GFX9) {
286b8e80941Smrg		/* 4K or larger tiles only. 0 is linear. 1-3 are 256B tiles. */
287b8e80941Smrg		assert(tex->surface.u.gfx9.surf.swizzle_mode >= 4);
288b8e80941Smrg
289b8e80941Smrg		/* If you do swizzle_mode % 4, you'll get:
290b8e80941Smrg		 *   0 = Depth
291b8e80941Smrg		 *   1 = Standard,
292b8e80941Smrg		 *   2 = Displayable
293b8e80941Smrg		 *   3 = Rotated
294b8e80941Smrg		 *
295b8e80941Smrg		 * Depth-sample order isn't allowed:
296b8e80941Smrg		 */
297b8e80941Smrg		assert(tex->surface.u.gfx9.surf.swizzle_mode % 4 != 0);
298b8e80941Smrg
299b8e80941Smrg		switch (tex->last_msaa_resolve_target_micro_mode) {
300b8e80941Smrg		case RADEON_MICRO_MODE_DISPLAY:
301b8e80941Smrg			tex->surface.u.gfx9.surf.swizzle_mode &= ~0x3;
302b8e80941Smrg			tex->surface.u.gfx9.surf.swizzle_mode += 2; /* D */
303b8e80941Smrg			break;
304b8e80941Smrg		case RADEON_MICRO_MODE_THIN:
305b8e80941Smrg			tex->surface.u.gfx9.surf.swizzle_mode &= ~0x3;
306b8e80941Smrg			tex->surface.u.gfx9.surf.swizzle_mode += 1; /* S */
307b8e80941Smrg			break;
308b8e80941Smrg		case RADEON_MICRO_MODE_ROTATED:
309b8e80941Smrg			tex->surface.u.gfx9.surf.swizzle_mode &= ~0x3;
310b8e80941Smrg			tex->surface.u.gfx9.surf.swizzle_mode += 3; /* R */
311b8e80941Smrg			break;
312b8e80941Smrg		default: /* depth */
313b8e80941Smrg			assert(!"unexpected micro mode");
314b8e80941Smrg			return;
315b8e80941Smrg		}
316b8e80941Smrg	} else if (sscreen->info.chip_class >= CIK) {
317b8e80941Smrg		/* These magic numbers were copied from addrlib. It doesn't use
318b8e80941Smrg		 * any definitions for them either. They are all 2D_TILED_THIN1
319b8e80941Smrg		 * modes with different bpp and micro tile mode.
320b8e80941Smrg		 */
321b8e80941Smrg		switch (tex->last_msaa_resolve_target_micro_mode) {
322b8e80941Smrg		case RADEON_MICRO_MODE_DISPLAY:
323b8e80941Smrg			tex->surface.u.legacy.tiling_index[0] = 10;
324b8e80941Smrg			break;
325b8e80941Smrg		case RADEON_MICRO_MODE_THIN:
326b8e80941Smrg			tex->surface.u.legacy.tiling_index[0] = 14;
327b8e80941Smrg			break;
328b8e80941Smrg		case RADEON_MICRO_MODE_ROTATED:
329b8e80941Smrg			tex->surface.u.legacy.tiling_index[0] = 28;
330b8e80941Smrg			break;
331b8e80941Smrg		default: /* depth, thick */
332b8e80941Smrg			assert(!"unexpected micro mode");
333b8e80941Smrg			return;
334b8e80941Smrg		}
335b8e80941Smrg	} else { /* SI */
336b8e80941Smrg		switch (tex->last_msaa_resolve_target_micro_mode) {
337b8e80941Smrg		case RADEON_MICRO_MODE_DISPLAY:
338b8e80941Smrg			switch (tex->surface.bpe) {
339b8e80941Smrg			case 1:
340b8e80941Smrg                            tex->surface.u.legacy.tiling_index[0] = 10;
341b8e80941Smrg                            break;
342b8e80941Smrg			case 2:
343b8e80941Smrg                            tex->surface.u.legacy.tiling_index[0] = 11;
344b8e80941Smrg                            break;
345b8e80941Smrg			default: /* 4, 8 */
346b8e80941Smrg                            tex->surface.u.legacy.tiling_index[0] = 12;
347b8e80941Smrg                            break;
348b8e80941Smrg			}
349b8e80941Smrg			break;
350b8e80941Smrg		case RADEON_MICRO_MODE_THIN:
351b8e80941Smrg			switch (tex->surface.bpe) {
352b8e80941Smrg			case 1:
353b8e80941Smrg                                tex->surface.u.legacy.tiling_index[0] = 14;
354b8e80941Smrg                                break;
355b8e80941Smrg			case 2:
356b8e80941Smrg                                tex->surface.u.legacy.tiling_index[0] = 15;
357b8e80941Smrg                                break;
358b8e80941Smrg			case 4:
359b8e80941Smrg                                tex->surface.u.legacy.tiling_index[0] = 16;
360b8e80941Smrg                                break;
361b8e80941Smrg			default: /* 8, 16 */
362b8e80941Smrg                                tex->surface.u.legacy.tiling_index[0] = 17;
363b8e80941Smrg                                break;
364b8e80941Smrg			}
365b8e80941Smrg			break;
366b8e80941Smrg		default: /* depth, thick */
367b8e80941Smrg			assert(!"unexpected micro mode");
368b8e80941Smrg			return;
369b8e80941Smrg		}
370b8e80941Smrg	}
371b8e80941Smrg
372b8e80941Smrg	tex->surface.micro_tile_mode = tex->last_msaa_resolve_target_micro_mode;
373b8e80941Smrg
374b8e80941Smrg	p_atomic_inc(&sscreen->dirty_tex_counter);
375b8e80941Smrg}
376b8e80941Smrg
377b8e80941Smrgstatic void si_do_fast_color_clear(struct si_context *sctx,
378b8e80941Smrg				   unsigned *buffers,
379b8e80941Smrg				   const union pipe_color_union *color)
380b8e80941Smrg{
381b8e80941Smrg	struct pipe_framebuffer_state *fb = &sctx->framebuffer.state;
382b8e80941Smrg	int i;
383b8e80941Smrg
384b8e80941Smrg	/* This function is broken in BE, so just disable this path for now */
385b8e80941Smrg#ifdef PIPE_ARCH_BIG_ENDIAN
386b8e80941Smrg	return;
387b8e80941Smrg#endif
388b8e80941Smrg
389b8e80941Smrg	if (sctx->render_cond)
390b8e80941Smrg		return;
391b8e80941Smrg
392b8e80941Smrg	for (i = 0; i < fb->nr_cbufs; i++) {
393b8e80941Smrg		struct si_texture *tex;
394b8e80941Smrg		unsigned clear_bit = PIPE_CLEAR_COLOR0 << i;
395b8e80941Smrg
396b8e80941Smrg		if (!fb->cbufs[i])
397b8e80941Smrg			continue;
398b8e80941Smrg
399b8e80941Smrg		/* if this colorbuffer is not being cleared */
400b8e80941Smrg		if (!(*buffers & clear_bit))
401b8e80941Smrg			continue;
402b8e80941Smrg
403b8e80941Smrg		unsigned level = fb->cbufs[i]->u.tex.level;
404b8e80941Smrg		if (level > 0)
405b8e80941Smrg			continue;
406b8e80941Smrg
407b8e80941Smrg		tex = (struct si_texture *)fb->cbufs[i]->texture;
408b8e80941Smrg
409b8e80941Smrg		/* TODO: GFX9: Implement DCC fast clear for level 0 of
410b8e80941Smrg		 * mipmapped textures. Mipmapped DCC has to clear a rectangular
411b8e80941Smrg		 * area of DCC for level 0 (because the whole miptree is
412b8e80941Smrg		 * organized in a 2D plane).
413b8e80941Smrg		 */
414b8e80941Smrg		if (sctx->chip_class >= GFX9 &&
415b8e80941Smrg		    tex->buffer.b.b.last_level > 0)
416b8e80941Smrg			continue;
417b8e80941Smrg
418b8e80941Smrg		/* the clear is allowed if all layers are bound */
419b8e80941Smrg		if (fb->cbufs[i]->u.tex.first_layer != 0 ||
420b8e80941Smrg		    fb->cbufs[i]->u.tex.last_layer != util_max_layer(&tex->buffer.b.b, 0)) {
421b8e80941Smrg			continue;
422b8e80941Smrg		}
423b8e80941Smrg
424b8e80941Smrg		/* only supported on tiled surfaces */
425b8e80941Smrg		if (tex->surface.is_linear) {
426b8e80941Smrg			continue;
427b8e80941Smrg		}
428b8e80941Smrg
429b8e80941Smrg		/* shared textures can't use fast clear without an explicit flush,
430b8e80941Smrg		 * because there is no way to communicate the clear color among
431b8e80941Smrg		 * all clients
432b8e80941Smrg		 */
433b8e80941Smrg		if (tex->buffer.b.is_shared &&
434b8e80941Smrg		    !(tex->buffer.external_usage & PIPE_HANDLE_USAGE_EXPLICIT_FLUSH))
435b8e80941Smrg			continue;
436b8e80941Smrg
437b8e80941Smrg		if (sctx->chip_class <= VI &&
438b8e80941Smrg		    tex->surface.u.legacy.level[0].mode == RADEON_SURF_MODE_1D &&
439b8e80941Smrg		    !sctx->screen->info.htile_cmask_support_1d_tiling)
440b8e80941Smrg			continue;
441b8e80941Smrg
442b8e80941Smrg		/* Use a slow clear for small surfaces where the cost of
443b8e80941Smrg		 * the eliminate pass can be higher than the benefit of fast
444b8e80941Smrg		 * clear. The closed driver does this, but the numbers may differ.
445b8e80941Smrg		 *
446b8e80941Smrg		 * This helps on both dGPUs and APUs, even small APUs like Mullins.
447b8e80941Smrg		 */
448b8e80941Smrg		bool too_small = tex->buffer.b.b.nr_samples <= 1 &&
449b8e80941Smrg				 tex->buffer.b.b.width0 *
450b8e80941Smrg				 tex->buffer.b.b.height0 <= 512 * 512;
451b8e80941Smrg		bool eliminate_needed = false;
452b8e80941Smrg		bool fmask_decompress_needed = false;
453b8e80941Smrg
454b8e80941Smrg		/* Fast clear is the most appropriate place to enable DCC for
455b8e80941Smrg		 * displayable surfaces.
456b8e80941Smrg		 */
457b8e80941Smrg		if (sctx->family == CHIP_STONEY && !too_small) {
458b8e80941Smrg			vi_separate_dcc_try_enable(sctx, tex);
459b8e80941Smrg
460b8e80941Smrg			/* RB+ isn't supported with a CMASK clear only on Stoney,
461b8e80941Smrg			 * so all clears are considered to be hypothetically slow
462b8e80941Smrg			 * clears, which is weighed when determining whether to
463b8e80941Smrg			 * enable separate DCC.
464b8e80941Smrg			 */
465b8e80941Smrg			if (tex->dcc_gather_statistics) /* only for Stoney */
466b8e80941Smrg				tex->num_slow_clears++;
467b8e80941Smrg		}
468b8e80941Smrg
469b8e80941Smrg		/* Try to clear DCC first, otherwise try CMASK. */
470b8e80941Smrg		if (vi_dcc_enabled(tex, 0)) {
471b8e80941Smrg			uint32_t reset_value;
472b8e80941Smrg
473b8e80941Smrg			if (sctx->screen->debug_flags & DBG(NO_DCC_CLEAR))
474b8e80941Smrg				continue;
475b8e80941Smrg
476b8e80941Smrg			/* This can happen with mipmapping or MSAA. */
477b8e80941Smrg			if (sctx->chip_class == VI &&
478b8e80941Smrg			    !tex->surface.u.legacy.level[level].dcc_fast_clear_size)
479b8e80941Smrg				continue;
480b8e80941Smrg
481b8e80941Smrg			if (!vi_get_fast_clear_parameters(tex->buffer.b.b.format,
482b8e80941Smrg							  fb->cbufs[i]->format,
483b8e80941Smrg							  color, &reset_value,
484b8e80941Smrg							  &eliminate_needed))
485b8e80941Smrg				continue;
486b8e80941Smrg
487b8e80941Smrg			if (eliminate_needed && too_small)
488b8e80941Smrg				continue;
489b8e80941Smrg
490b8e80941Smrg			/* DCC fast clear with MSAA should clear CMASK to 0xC. */
491b8e80941Smrg			if (tex->buffer.b.b.nr_samples >= 2 && tex->cmask_buffer) {
492b8e80941Smrg				/* TODO: This doesn't work with MSAA. */
493b8e80941Smrg				if (eliminate_needed)
494b8e80941Smrg					continue;
495b8e80941Smrg
496b8e80941Smrg				uint32_t clear_value = 0xCCCCCCCC;
497b8e80941Smrg				si_clear_buffer(sctx, &tex->cmask_buffer->b.b,
498b8e80941Smrg						tex->cmask_offset, tex->surface.cmask_size,
499b8e80941Smrg						&clear_value, 4, SI_COHERENCY_CB_META, false);
500b8e80941Smrg				fmask_decompress_needed = true;
501b8e80941Smrg			}
502b8e80941Smrg
503b8e80941Smrg			vi_dcc_clear_level(sctx, tex, 0, reset_value);
504b8e80941Smrg			tex->separate_dcc_dirty = true;
505b8e80941Smrg		} else {
506b8e80941Smrg			if (too_small)
507b8e80941Smrg				continue;
508b8e80941Smrg
509b8e80941Smrg			/* 128-bit formats are unusupported */
510b8e80941Smrg			if (tex->surface.bpe > 8) {
511b8e80941Smrg				continue;
512b8e80941Smrg			}
513b8e80941Smrg
514b8e80941Smrg			/* RB+ doesn't work with CMASK fast clear on Stoney. */
515b8e80941Smrg			if (sctx->family == CHIP_STONEY)
516b8e80941Smrg				continue;
517b8e80941Smrg
518b8e80941Smrg			/* ensure CMASK is enabled */
519b8e80941Smrg			si_alloc_separate_cmask(sctx->screen, tex);
520b8e80941Smrg			if (!tex->cmask_buffer)
521b8e80941Smrg				continue;
522b8e80941Smrg
523b8e80941Smrg			/* Do the fast clear. */
524b8e80941Smrg			uint32_t clear_value = 0;
525b8e80941Smrg			si_clear_buffer(sctx, &tex->cmask_buffer->b.b,
526b8e80941Smrg					tex->cmask_offset, tex->surface.cmask_size,
527b8e80941Smrg					&clear_value, 4, SI_COHERENCY_CB_META, false);
528b8e80941Smrg			eliminate_needed = true;
529b8e80941Smrg		}
530b8e80941Smrg
531b8e80941Smrg		if ((eliminate_needed || fmask_decompress_needed) &&
532b8e80941Smrg		    !(tex->dirty_level_mask & (1 << level))) {
533b8e80941Smrg			tex->dirty_level_mask |= 1 << level;
534b8e80941Smrg			p_atomic_inc(&sctx->screen->compressed_colortex_counter);
535b8e80941Smrg		}
536b8e80941Smrg
537b8e80941Smrg		/* We can change the micro tile mode before a full clear. */
538b8e80941Smrg		si_set_optimal_micro_tile_mode(sctx->screen, tex);
539b8e80941Smrg
540b8e80941Smrg		*buffers &= ~clear_bit;
541b8e80941Smrg
542b8e80941Smrg		/* Chips with DCC constant encoding don't need to set the clear
543b8e80941Smrg		 * color registers for DCC clear values 0 and 1.
544b8e80941Smrg		 */
545b8e80941Smrg		if (sctx->screen->has_dcc_constant_encode && !eliminate_needed)
546b8e80941Smrg			continue;
547b8e80941Smrg
548b8e80941Smrg		if (si_set_clear_color(tex, fb->cbufs[i]->format, color)) {
549b8e80941Smrg			sctx->framebuffer.dirty_cbufs |= 1 << i;
550b8e80941Smrg			si_mark_atom_dirty(sctx, &sctx->atoms.s.framebuffer);
551b8e80941Smrg		}
552b8e80941Smrg	}
553b8e80941Smrg}
554b8e80941Smrg
555b8e80941Smrgstatic void si_clear(struct pipe_context *ctx, unsigned buffers,
556b8e80941Smrg		     const union pipe_color_union *color,
557b8e80941Smrg		     double depth, unsigned stencil)
558b8e80941Smrg{
559b8e80941Smrg	struct si_context *sctx = (struct si_context *)ctx;
560b8e80941Smrg	struct pipe_framebuffer_state *fb = &sctx->framebuffer.state;
561b8e80941Smrg	struct pipe_surface *zsbuf = fb->zsbuf;
562b8e80941Smrg	struct si_texture *zstex =
563b8e80941Smrg		zsbuf ? (struct si_texture*)zsbuf->texture : NULL;
564b8e80941Smrg
565b8e80941Smrg	if (buffers & PIPE_CLEAR_COLOR) {
566b8e80941Smrg		si_do_fast_color_clear(sctx, &buffers, color);
567b8e80941Smrg		if (!buffers)
568b8e80941Smrg			return; /* all buffers have been fast cleared */
569b8e80941Smrg
570b8e80941Smrg		/* These buffers cannot use fast clear, make sure to disable expansion. */
571b8e80941Smrg		for (unsigned i = 0; i < fb->nr_cbufs; i++) {
572b8e80941Smrg			struct si_texture *tex;
573b8e80941Smrg
574b8e80941Smrg			/* If not clearing this buffer, skip. */
575b8e80941Smrg			if (!(buffers & (PIPE_CLEAR_COLOR0 << i)) || !fb->cbufs[i])
576b8e80941Smrg				continue;
577b8e80941Smrg
578b8e80941Smrg			tex = (struct si_texture *)fb->cbufs[i]->texture;
579b8e80941Smrg			if (tex->surface.fmask_size == 0)
580b8e80941Smrg				tex->dirty_level_mask &= ~(1 << fb->cbufs[i]->u.tex.level);
581b8e80941Smrg		}
582b8e80941Smrg	}
583b8e80941Smrg
584b8e80941Smrg	if (zstex &&
585b8e80941Smrg	    si_htile_enabled(zstex, zsbuf->u.tex.level) &&
586b8e80941Smrg	    zsbuf->u.tex.first_layer == 0 &&
587b8e80941Smrg	    zsbuf->u.tex.last_layer == util_max_layer(&zstex->buffer.b.b, 0)) {
588b8e80941Smrg		/* TC-compatible HTILE only supports depth clears to 0 or 1. */
589b8e80941Smrg		if (buffers & PIPE_CLEAR_DEPTH &&
590b8e80941Smrg		    (!zstex->tc_compatible_htile ||
591b8e80941Smrg		     depth == 0 || depth == 1)) {
592b8e80941Smrg			/* Need to disable EXPCLEAR temporarily if clearing
593b8e80941Smrg			 * to a new value. */
594b8e80941Smrg			if (!zstex->depth_cleared || zstex->depth_clear_value != depth) {
595b8e80941Smrg				sctx->db_depth_disable_expclear = true;
596b8e80941Smrg			}
597b8e80941Smrg
598b8e80941Smrg			if (zstex->depth_clear_value != (float)depth) {
599b8e80941Smrg				/* Update DB_DEPTH_CLEAR. */
600b8e80941Smrg				zstex->depth_clear_value = depth;
601b8e80941Smrg				sctx->framebuffer.dirty_zsbuf = true;
602b8e80941Smrg				si_mark_atom_dirty(sctx, &sctx->atoms.s.framebuffer);
603b8e80941Smrg			}
604b8e80941Smrg			sctx->db_depth_clear = true;
605b8e80941Smrg			si_mark_atom_dirty(sctx, &sctx->atoms.s.db_render_state);
606b8e80941Smrg		}
607b8e80941Smrg
608b8e80941Smrg		/* TC-compatible HTILE only supports stencil clears to 0. */
609b8e80941Smrg		if (buffers & PIPE_CLEAR_STENCIL &&
610b8e80941Smrg		    (!zstex->tc_compatible_htile || stencil == 0)) {
611b8e80941Smrg			stencil &= 0xff;
612b8e80941Smrg
613b8e80941Smrg			/* Need to disable EXPCLEAR temporarily if clearing
614b8e80941Smrg			 * to a new value. */
615b8e80941Smrg			if (!zstex->stencil_cleared || zstex->stencil_clear_value != stencil) {
616b8e80941Smrg				sctx->db_stencil_disable_expclear = true;
617b8e80941Smrg			}
618b8e80941Smrg
619b8e80941Smrg			if (zstex->stencil_clear_value != (uint8_t)stencil) {
620b8e80941Smrg				/* Update DB_STENCIL_CLEAR. */
621b8e80941Smrg				zstex->stencil_clear_value = stencil;
622b8e80941Smrg				sctx->framebuffer.dirty_zsbuf = true;
623b8e80941Smrg				si_mark_atom_dirty(sctx, &sctx->atoms.s.framebuffer);
624b8e80941Smrg			}
625b8e80941Smrg			sctx->db_stencil_clear = true;
626b8e80941Smrg			si_mark_atom_dirty(sctx, &sctx->atoms.s.db_render_state);
627b8e80941Smrg		}
628b8e80941Smrg
629b8e80941Smrg		/* TODO: Find out what's wrong here. Fast depth clear leads to
630b8e80941Smrg		 * corruption in ARK: Survival Evolved, but that may just be
631b8e80941Smrg		 * a coincidence and the root cause is elsewhere.
632b8e80941Smrg		 *
633b8e80941Smrg		 * The corruption can be fixed by putting the DB flush before
634b8e80941Smrg		 * or after the depth clear. (surprisingly)
635b8e80941Smrg		 *
636b8e80941Smrg		 * https://bugs.freedesktop.org/show_bug.cgi?id=102955 (apitrace)
637b8e80941Smrg		 *
638b8e80941Smrg		 * This hack decreases back-to-back ClearDepth performance.
639b8e80941Smrg		 */
640b8e80941Smrg		if ((sctx->db_depth_clear || sctx->db_stencil_clear) &&
641b8e80941Smrg		    sctx->screen->options.clear_db_cache_before_clear)
642b8e80941Smrg			sctx->flags |= SI_CONTEXT_FLUSH_AND_INV_DB;
643b8e80941Smrg	}
644b8e80941Smrg
645b8e80941Smrg	si_blitter_begin(sctx, SI_CLEAR);
646b8e80941Smrg	util_blitter_clear(sctx->blitter, fb->width, fb->height,
647b8e80941Smrg			   util_framebuffer_get_num_layers(fb),
648b8e80941Smrg			   buffers, color, depth, stencil);
649b8e80941Smrg	si_blitter_end(sctx);
650b8e80941Smrg
651b8e80941Smrg	if (sctx->db_depth_clear) {
652b8e80941Smrg		sctx->db_depth_clear = false;
653b8e80941Smrg		sctx->db_depth_disable_expclear = false;
654b8e80941Smrg		zstex->depth_cleared = true;
655b8e80941Smrg		si_mark_atom_dirty(sctx, &sctx->atoms.s.db_render_state);
656b8e80941Smrg	}
657b8e80941Smrg
658b8e80941Smrg	if (sctx->db_stencil_clear) {
659b8e80941Smrg		sctx->db_stencil_clear = false;
660b8e80941Smrg		sctx->db_stencil_disable_expclear = false;
661b8e80941Smrg		zstex->stencil_cleared = true;
662b8e80941Smrg		si_mark_atom_dirty(sctx, &sctx->atoms.s.db_render_state);
663b8e80941Smrg	}
664b8e80941Smrg}
665b8e80941Smrg
666b8e80941Smrgstatic void si_clear_render_target(struct pipe_context *ctx,
667b8e80941Smrg				   struct pipe_surface *dst,
668b8e80941Smrg				   const union pipe_color_union *color,
669b8e80941Smrg				   unsigned dstx, unsigned dsty,
670b8e80941Smrg				   unsigned width, unsigned height,
671b8e80941Smrg				   bool render_condition_enabled)
672b8e80941Smrg{
673b8e80941Smrg	struct si_context *sctx = (struct si_context *)ctx;
674b8e80941Smrg	struct si_texture *sdst = (struct si_texture*)dst->texture;
675b8e80941Smrg
676b8e80941Smrg	if (dst->texture->nr_samples <= 1 && !sdst->dcc_offset) {
677b8e80941Smrg		si_compute_clear_render_target(ctx, dst, color, dstx, dsty, width,
678b8e80941Smrg					       height, render_condition_enabled);
679b8e80941Smrg		return;
680b8e80941Smrg	}
681b8e80941Smrg
682b8e80941Smrg	si_blitter_begin(sctx, SI_CLEAR_SURFACE |
683b8e80941Smrg			 (render_condition_enabled ? 0 : SI_DISABLE_RENDER_COND));
684b8e80941Smrg	util_blitter_clear_render_target(sctx->blitter, dst, color,
685b8e80941Smrg					 dstx, dsty, width, height);
686b8e80941Smrg	si_blitter_end(sctx);
687b8e80941Smrg}
688b8e80941Smrg
689b8e80941Smrgstatic void si_clear_depth_stencil(struct pipe_context *ctx,
690b8e80941Smrg				   struct pipe_surface *dst,
691b8e80941Smrg				   unsigned clear_flags,
692b8e80941Smrg				   double depth,
693b8e80941Smrg				   unsigned stencil,
694b8e80941Smrg				   unsigned dstx, unsigned dsty,
695b8e80941Smrg				   unsigned width, unsigned height,
696b8e80941Smrg				   bool render_condition_enabled)
697b8e80941Smrg{
698b8e80941Smrg	struct si_context *sctx = (struct si_context *)ctx;
699b8e80941Smrg
700b8e80941Smrg	si_blitter_begin(sctx, SI_CLEAR_SURFACE |
701b8e80941Smrg			 (render_condition_enabled ? 0 : SI_DISABLE_RENDER_COND));
702b8e80941Smrg	util_blitter_clear_depth_stencil(sctx->blitter, dst, clear_flags, depth, stencil,
703b8e80941Smrg					 dstx, dsty, width, height);
704b8e80941Smrg	si_blitter_end(sctx);
705b8e80941Smrg}
706b8e80941Smrg
707b8e80941Smrgstatic void si_clear_texture(struct pipe_context *pipe,
708b8e80941Smrg			     struct pipe_resource *tex,
709b8e80941Smrg			     unsigned level,
710b8e80941Smrg			     const struct pipe_box *box,
711b8e80941Smrg			     const void *data)
712b8e80941Smrg{
713b8e80941Smrg	struct pipe_screen *screen = pipe->screen;
714b8e80941Smrg	struct si_texture *stex = (struct si_texture*)tex;
715b8e80941Smrg	struct pipe_surface tmpl = {{0}};
716b8e80941Smrg	struct pipe_surface *sf;
717b8e80941Smrg	const struct util_format_description *desc =
718b8e80941Smrg		util_format_description(tex->format);
719b8e80941Smrg
720b8e80941Smrg	tmpl.format = tex->format;
721b8e80941Smrg	tmpl.u.tex.first_layer = box->z;
722b8e80941Smrg	tmpl.u.tex.last_layer = box->z + box->depth - 1;
723b8e80941Smrg	tmpl.u.tex.level = level;
724b8e80941Smrg	sf = pipe->create_surface(pipe, tex, &tmpl);
725b8e80941Smrg	if (!sf)
726b8e80941Smrg		return;
727b8e80941Smrg
728b8e80941Smrg	if (stex->is_depth) {
729b8e80941Smrg		unsigned clear;
730b8e80941Smrg		float depth;
731b8e80941Smrg		uint8_t stencil = 0;
732b8e80941Smrg
733b8e80941Smrg		/* Depth is always present. */
734b8e80941Smrg		clear = PIPE_CLEAR_DEPTH;
735b8e80941Smrg		desc->unpack_z_float(&depth, 0, data, 0, 1, 1);
736b8e80941Smrg
737b8e80941Smrg		if (stex->surface.has_stencil) {
738b8e80941Smrg			clear |= PIPE_CLEAR_STENCIL;
739b8e80941Smrg			desc->unpack_s_8uint(&stencil, 0, data, 0, 1, 1);
740b8e80941Smrg		}
741b8e80941Smrg
742b8e80941Smrg		si_clear_depth_stencil(pipe, sf, clear, depth, stencil,
743b8e80941Smrg				       box->x, box->y,
744b8e80941Smrg				       box->width, box->height, false);
745b8e80941Smrg	} else {
746b8e80941Smrg		union pipe_color_union color;
747b8e80941Smrg
748b8e80941Smrg		/* pipe_color_union requires the full vec4 representation. */
749b8e80941Smrg		if (util_format_is_pure_uint(tex->format))
750b8e80941Smrg			desc->unpack_rgba_uint(color.ui, 0, data, 0, 1, 1);
751b8e80941Smrg		else if (util_format_is_pure_sint(tex->format))
752b8e80941Smrg			desc->unpack_rgba_sint(color.i, 0, data, 0, 1, 1);
753b8e80941Smrg		else
754b8e80941Smrg			desc->unpack_rgba_float(color.f, 0, data, 0, 1, 1);
755b8e80941Smrg
756b8e80941Smrg		if (screen->is_format_supported(screen, tex->format,
757b8e80941Smrg						tex->target, 0, 0,
758b8e80941Smrg						PIPE_BIND_RENDER_TARGET)) {
759b8e80941Smrg			si_clear_render_target(pipe, sf, &color,
760b8e80941Smrg					       box->x, box->y,
761b8e80941Smrg					       box->width, box->height, false);
762b8e80941Smrg		} else {
763b8e80941Smrg			/* Software fallback - just for R9G9B9E5_FLOAT */
764b8e80941Smrg			util_clear_render_target(pipe, sf, &color,
765b8e80941Smrg						 box->x, box->y,
766b8e80941Smrg						 box->width, box->height);
767b8e80941Smrg		}
768b8e80941Smrg	}
769b8e80941Smrg	pipe_surface_reference(&sf, NULL);
770b8e80941Smrg}
771b8e80941Smrg
772b8e80941Smrgvoid si_init_clear_functions(struct si_context *sctx)
773b8e80941Smrg{
774b8e80941Smrg	sctx->b.clear_render_target = si_clear_render_target;
775b8e80941Smrg	sctx->b.clear_texture = si_clear_texture;
776b8e80941Smrg
777b8e80941Smrg	if (sctx->has_graphics) {
778b8e80941Smrg		sctx->b.clear = si_clear;
779b8e80941Smrg		sctx->b.clear_depth_stencil = si_clear_depth_stencil;
780b8e80941Smrg	}
781b8e80941Smrg}
782