1b8e80941Smrg/*
2b8e80941Smrg * Copyright 2014 Advanced Micro Devices, Inc.
3b8e80941Smrg *
4b8e80941Smrg * Permission is hereby granted, free of charge, to any person obtaining a
5b8e80941Smrg * copy of this software and associated documentation files (the "Software"),
6b8e80941Smrg * to deal in the Software without restriction, including without limitation
7b8e80941Smrg * the rights to use, copy, modify, merge, publish, distribute, sublicense,
8b8e80941Smrg * and/or sell copies of the Software, and to permit persons to whom the
9b8e80941Smrg * Software is furnished to do so, subject to the following conditions:
10b8e80941Smrg *
11b8e80941Smrg * The above copyright notice and this permission notice (including the next
12b8e80941Smrg * paragraph) shall be included in all copies or substantial portions of the
13b8e80941Smrg * Software.
14b8e80941Smrg *
15b8e80941Smrg * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
16b8e80941Smrg * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
17b8e80941Smrg * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT.  IN NO EVENT SHALL
18b8e80941Smrg * THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
19b8e80941Smrg * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
20b8e80941Smrg * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
21b8e80941Smrg * SOFTWARE.
22b8e80941Smrg *
23b8e80941Smrg * Authors: Marek Olšák <maraeo@gmail.com>
24b8e80941Smrg *
25b8e80941Smrg */
26b8e80941Smrg
27b8e80941Smrg#include "r600_cs.h"
28b8e80941Smrg#include "evergreend.h"
29b8e80941Smrg
30b8e80941Smrg/* 2xMSAA
31b8e80941Smrg * There are two locations (4, 4), (-4, -4). */
32b8e80941Smrgconst uint32_t eg_sample_locs_2x[4] = {
33b8e80941Smrg	FILL_SREG(4, 4, -4, -4, 4, 4, -4, -4),
34b8e80941Smrg	FILL_SREG(4, 4, -4, -4, 4, 4, -4, -4),
35b8e80941Smrg	FILL_SREG(4, 4, -4, -4, 4, 4, -4, -4),
36b8e80941Smrg	FILL_SREG(4, 4, -4, -4, 4, 4, -4, -4),
37b8e80941Smrg};
38b8e80941Smrgconst unsigned eg_max_dist_2x = 4;
39b8e80941Smrg/* 4xMSAA
40b8e80941Smrg * There are 4 locations: (-2, 6), (6, -2), (-6, 2), (2, 6). */
41b8e80941Smrgconst uint32_t eg_sample_locs_4x[4] = {
42b8e80941Smrg	FILL_SREG(-2, -6, 6, -2, -6, 2, 2, 6),
43b8e80941Smrg	FILL_SREG(-2, -6, 6, -2, -6, 2, 2, 6),
44b8e80941Smrg	FILL_SREG(-2, -6, 6, -2, -6, 2, 2, 6),
45b8e80941Smrg	FILL_SREG(-2, -6, 6, -2, -6, 2, 2, 6),
46b8e80941Smrg};
47b8e80941Smrgconst unsigned eg_max_dist_4x = 6;
48b8e80941Smrg
49b8e80941Smrg/* Cayman 8xMSAA */
50b8e80941Smrgstatic const uint32_t cm_sample_locs_8x[] = {
51b8e80941Smrg	FILL_SREG( 1, -3, -1,  3, 5,  1, -3, -5),
52b8e80941Smrg	FILL_SREG( 1, -3, -1,  3, 5,  1, -3, -5),
53b8e80941Smrg	FILL_SREG( 1, -3, -1,  3, 5,  1, -3, -5),
54b8e80941Smrg	FILL_SREG( 1, -3, -1,  3, 5,  1, -3, -5),
55b8e80941Smrg	FILL_SREG(-5,  5, -7, -1, 3,  7,  7, -7),
56b8e80941Smrg	FILL_SREG(-5,  5, -7, -1, 3,  7,  7, -7),
57b8e80941Smrg	FILL_SREG(-5,  5, -7, -1, 3,  7,  7, -7),
58b8e80941Smrg	FILL_SREG(-5,  5, -7, -1, 3,  7,  7, -7),
59b8e80941Smrg};
60b8e80941Smrgstatic const unsigned cm_max_dist_8x = 8;
61b8e80941Smrg/* Cayman 16xMSAA */
62b8e80941Smrgstatic const uint32_t cm_sample_locs_16x[] = {
63b8e80941Smrg	FILL_SREG( 1,  1, -1, -3, -3,  2,  4, -1),
64b8e80941Smrg	FILL_SREG( 1,  1, -1, -3, -3,  2,  4, -1),
65b8e80941Smrg	FILL_SREG( 1,  1, -1, -3, -3,  2,  4, -1),
66b8e80941Smrg	FILL_SREG( 1,  1, -1, -3, -3,  2,  4, -1),
67b8e80941Smrg	FILL_SREG(-5, -2,  2,  5,  5,  3,  3, -5),
68b8e80941Smrg	FILL_SREG(-5, -2,  2,  5,  5,  3,  3, -5),
69b8e80941Smrg	FILL_SREG(-5, -2,  2,  5,  5,  3,  3, -5),
70b8e80941Smrg	FILL_SREG(-5, -2,  2,  5,  5,  3,  3, -5),
71b8e80941Smrg	FILL_SREG(-2,  6,  0, -7, -4, -6, -6,  4),
72b8e80941Smrg	FILL_SREG(-2,  6,  0, -7, -4, -6, -6,  4),
73b8e80941Smrg	FILL_SREG(-2,  6,  0, -7, -4, -6, -6,  4),
74b8e80941Smrg	FILL_SREG(-2,  6,  0, -7, -4, -6, -6,  4),
75b8e80941Smrg	FILL_SREG(-8,  0,  7, -4,  6,  7, -7, -8),
76b8e80941Smrg	FILL_SREG(-8,  0,  7, -4,  6,  7, -7, -8),
77b8e80941Smrg	FILL_SREG(-8,  0,  7, -4,  6,  7, -7, -8),
78b8e80941Smrg	FILL_SREG(-8,  0,  7, -4,  6,  7, -7, -8),
79b8e80941Smrg};
80b8e80941Smrgstatic const unsigned cm_max_dist_16x = 8;
81b8e80941Smrg
82b8e80941Smrgvoid cayman_get_sample_position(struct pipe_context *ctx, unsigned sample_count,
83b8e80941Smrg				unsigned sample_index, float *out_value)
84b8e80941Smrg{
85b8e80941Smrg	int offset, index;
86b8e80941Smrg	struct {
87b8e80941Smrg		int idx:4;
88b8e80941Smrg	} val;
89b8e80941Smrg	switch (sample_count) {
90b8e80941Smrg	case 1:
91b8e80941Smrg	default:
92b8e80941Smrg		out_value[0] = out_value[1] = 0.5;
93b8e80941Smrg		break;
94b8e80941Smrg	case 2:
95b8e80941Smrg		offset = 4 * (sample_index * 2);
96b8e80941Smrg		val.idx = (eg_sample_locs_2x[0] >> offset) & 0xf;
97b8e80941Smrg		out_value[0] = (float)(val.idx + 8) / 16.0f;
98b8e80941Smrg		val.idx = (eg_sample_locs_2x[0] >> (offset + 4)) & 0xf;
99b8e80941Smrg		out_value[1] = (float)(val.idx + 8) / 16.0f;
100b8e80941Smrg		break;
101b8e80941Smrg	case 4:
102b8e80941Smrg		offset = 4 * (sample_index * 2);
103b8e80941Smrg		val.idx = (eg_sample_locs_4x[0] >> offset) & 0xf;
104b8e80941Smrg		out_value[0] = (float)(val.idx + 8) / 16.0f;
105b8e80941Smrg		val.idx = (eg_sample_locs_4x[0] >> (offset + 4)) & 0xf;
106b8e80941Smrg		out_value[1] = (float)(val.idx + 8) / 16.0f;
107b8e80941Smrg		break;
108b8e80941Smrg	case 8:
109b8e80941Smrg		offset = 4 * (sample_index % 4 * 2);
110b8e80941Smrg		index = (sample_index / 4) * 4;
111b8e80941Smrg		val.idx = (cm_sample_locs_8x[index] >> offset) & 0xf;
112b8e80941Smrg		out_value[0] = (float)(val.idx + 8) / 16.0f;
113b8e80941Smrg		val.idx = (cm_sample_locs_8x[index] >> (offset + 4)) & 0xf;
114b8e80941Smrg		out_value[1] = (float)(val.idx + 8) / 16.0f;
115b8e80941Smrg		break;
116b8e80941Smrg	case 16:
117b8e80941Smrg		offset = 4 * (sample_index % 4 * 2);
118b8e80941Smrg		index = (sample_index / 4) * 4;
119b8e80941Smrg		val.idx = (cm_sample_locs_16x[index] >> offset) & 0xf;
120b8e80941Smrg		out_value[0] = (float)(val.idx + 8) / 16.0f;
121b8e80941Smrg		val.idx = (cm_sample_locs_16x[index] >> (offset + 4)) & 0xf;
122b8e80941Smrg		out_value[1] = (float)(val.idx + 8) / 16.0f;
123b8e80941Smrg		break;
124b8e80941Smrg	}
125b8e80941Smrg}
126b8e80941Smrg
127b8e80941Smrgvoid cayman_init_msaa(struct pipe_context *ctx)
128b8e80941Smrg{
129b8e80941Smrg	struct r600_common_context *rctx = (struct r600_common_context*)ctx;
130b8e80941Smrg	int i;
131b8e80941Smrg
132b8e80941Smrg	cayman_get_sample_position(ctx, 1, 0, rctx->sample_locations_1x[0]);
133b8e80941Smrg
134b8e80941Smrg	for (i = 0; i < 2; i++)
135b8e80941Smrg		cayman_get_sample_position(ctx, 2, i, rctx->sample_locations_2x[i]);
136b8e80941Smrg	for (i = 0; i < 4; i++)
137b8e80941Smrg		cayman_get_sample_position(ctx, 4, i, rctx->sample_locations_4x[i]);
138b8e80941Smrg	for (i = 0; i < 8; i++)
139b8e80941Smrg		cayman_get_sample_position(ctx, 8, i, rctx->sample_locations_8x[i]);
140b8e80941Smrg	for (i = 0; i < 16; i++)
141b8e80941Smrg		cayman_get_sample_position(ctx, 16, i, rctx->sample_locations_16x[i]);
142b8e80941Smrg}
143b8e80941Smrg
144b8e80941Smrgstatic void cayman_emit_msaa_sample_locs(struct radeon_cmdbuf *cs, int nr_samples)
145b8e80941Smrg{
146b8e80941Smrg	switch (nr_samples) {
147b8e80941Smrg	default:
148b8e80941Smrg	case 1:
149b8e80941Smrg		radeon_set_context_reg(cs, CM_R_028BF8_PA_SC_AA_SAMPLE_LOCS_PIXEL_X0Y0_0, 0);
150b8e80941Smrg		radeon_set_context_reg(cs, CM_R_028C08_PA_SC_AA_SAMPLE_LOCS_PIXEL_X1Y0_0, 0);
151b8e80941Smrg		radeon_set_context_reg(cs, CM_R_028C18_PA_SC_AA_SAMPLE_LOCS_PIXEL_X0Y1_0, 0);
152b8e80941Smrg		radeon_set_context_reg(cs, CM_R_028C28_PA_SC_AA_SAMPLE_LOCS_PIXEL_X1Y1_0, 0);
153b8e80941Smrg		break;
154b8e80941Smrg	case 2:
155b8e80941Smrg		radeon_set_context_reg(cs, CM_R_028BF8_PA_SC_AA_SAMPLE_LOCS_PIXEL_X0Y0_0, eg_sample_locs_2x[0]);
156b8e80941Smrg		radeon_set_context_reg(cs, CM_R_028C08_PA_SC_AA_SAMPLE_LOCS_PIXEL_X1Y0_0, eg_sample_locs_2x[1]);
157b8e80941Smrg		radeon_set_context_reg(cs, CM_R_028C18_PA_SC_AA_SAMPLE_LOCS_PIXEL_X0Y1_0, eg_sample_locs_2x[2]);
158b8e80941Smrg		radeon_set_context_reg(cs, CM_R_028C28_PA_SC_AA_SAMPLE_LOCS_PIXEL_X1Y1_0, eg_sample_locs_2x[3]);
159b8e80941Smrg		break;
160b8e80941Smrg	case 4:
161b8e80941Smrg		radeon_set_context_reg(cs, CM_R_028BF8_PA_SC_AA_SAMPLE_LOCS_PIXEL_X0Y0_0, eg_sample_locs_4x[0]);
162b8e80941Smrg		radeon_set_context_reg(cs, CM_R_028C08_PA_SC_AA_SAMPLE_LOCS_PIXEL_X1Y0_0, eg_sample_locs_4x[1]);
163b8e80941Smrg		radeon_set_context_reg(cs, CM_R_028C18_PA_SC_AA_SAMPLE_LOCS_PIXEL_X0Y1_0, eg_sample_locs_4x[2]);
164b8e80941Smrg		radeon_set_context_reg(cs, CM_R_028C28_PA_SC_AA_SAMPLE_LOCS_PIXEL_X1Y1_0, eg_sample_locs_4x[3]);
165b8e80941Smrg		break;
166b8e80941Smrg	case 8:
167b8e80941Smrg		radeon_set_context_reg_seq(cs, CM_R_028BF8_PA_SC_AA_SAMPLE_LOCS_PIXEL_X0Y0_0, 14);
168b8e80941Smrg		radeon_emit(cs, cm_sample_locs_8x[0]);
169b8e80941Smrg		radeon_emit(cs, cm_sample_locs_8x[4]);
170b8e80941Smrg		radeon_emit(cs, 0);
171b8e80941Smrg		radeon_emit(cs, 0);
172b8e80941Smrg		radeon_emit(cs, cm_sample_locs_8x[1]);
173b8e80941Smrg		radeon_emit(cs, cm_sample_locs_8x[5]);
174b8e80941Smrg		radeon_emit(cs, 0);
175b8e80941Smrg		radeon_emit(cs, 0);
176b8e80941Smrg		radeon_emit(cs, cm_sample_locs_8x[2]);
177b8e80941Smrg		radeon_emit(cs, cm_sample_locs_8x[6]);
178b8e80941Smrg		radeon_emit(cs, 0);
179b8e80941Smrg		radeon_emit(cs, 0);
180b8e80941Smrg		radeon_emit(cs, cm_sample_locs_8x[3]);
181b8e80941Smrg		radeon_emit(cs, cm_sample_locs_8x[7]);
182b8e80941Smrg		break;
183b8e80941Smrg	case 16:
184b8e80941Smrg		radeon_set_context_reg_seq(cs, CM_R_028BF8_PA_SC_AA_SAMPLE_LOCS_PIXEL_X0Y0_0, 16);
185b8e80941Smrg		radeon_emit(cs, cm_sample_locs_16x[0]);
186b8e80941Smrg		radeon_emit(cs, cm_sample_locs_16x[4]);
187b8e80941Smrg		radeon_emit(cs, cm_sample_locs_16x[8]);
188b8e80941Smrg		radeon_emit(cs, cm_sample_locs_16x[12]);
189b8e80941Smrg		radeon_emit(cs, cm_sample_locs_16x[1]);
190b8e80941Smrg		radeon_emit(cs, cm_sample_locs_16x[5]);
191b8e80941Smrg		radeon_emit(cs, cm_sample_locs_16x[9]);
192b8e80941Smrg		radeon_emit(cs, cm_sample_locs_16x[13]);
193b8e80941Smrg		radeon_emit(cs, cm_sample_locs_16x[2]);
194b8e80941Smrg		radeon_emit(cs, cm_sample_locs_16x[6]);
195b8e80941Smrg		radeon_emit(cs, cm_sample_locs_16x[10]);
196b8e80941Smrg		radeon_emit(cs, cm_sample_locs_16x[14]);
197b8e80941Smrg		radeon_emit(cs, cm_sample_locs_16x[3]);
198b8e80941Smrg		radeon_emit(cs, cm_sample_locs_16x[7]);
199b8e80941Smrg		radeon_emit(cs, cm_sample_locs_16x[11]);
200b8e80941Smrg		radeon_emit(cs, cm_sample_locs_16x[15]);
201b8e80941Smrg		break;
202b8e80941Smrg	}
203b8e80941Smrg}
204b8e80941Smrg
205b8e80941Smrgvoid cayman_emit_msaa_state(struct radeon_cmdbuf *cs, int nr_samples,
206b8e80941Smrg			    int ps_iter_samples, int overrast_samples)
207b8e80941Smrg{
208b8e80941Smrg	int setup_samples = nr_samples > 1 ? nr_samples :
209b8e80941Smrg			    overrast_samples > 1 ? overrast_samples : 0;
210b8e80941Smrg	/* Required by OpenGL line rasterization.
211b8e80941Smrg	 *
212b8e80941Smrg	 * TODO: We should also enable perpendicular endcaps for AA lines,
213b8e80941Smrg	 *       but that requires implementing line stippling in the pixel
214b8e80941Smrg	 *       shader. SC can only do line stippling with axis-aligned
215b8e80941Smrg	 *       endcaps.
216b8e80941Smrg	 */
217b8e80941Smrg	unsigned sc_line_cntl = S_028BDC_DX10_DIAMOND_TEST_ENA(1);
218b8e80941Smrg	unsigned sc_mode_cntl_1 =
219b8e80941Smrg		EG_S_028A4C_FORCE_EOV_CNTDWN_ENABLE(1) |
220b8e80941Smrg		EG_S_028A4C_FORCE_EOV_REZ_ENABLE(1);
221b8e80941Smrg
222b8e80941Smrg	if (nr_samples > 1) {
223b8e80941Smrg		cayman_emit_msaa_sample_locs(cs, nr_samples);
224b8e80941Smrg	}
225b8e80941Smrg
226b8e80941Smrg	if (setup_samples > 1) {
227b8e80941Smrg		/* indexed by log2(nr_samples) */
228b8e80941Smrg		const unsigned max_dist[] = {
229b8e80941Smrg			0,
230b8e80941Smrg			eg_max_dist_2x,
231b8e80941Smrg			eg_max_dist_4x,
232b8e80941Smrg			cm_max_dist_8x,
233b8e80941Smrg			cm_max_dist_16x
234b8e80941Smrg		};
235b8e80941Smrg		unsigned log_samples = util_logbase2(setup_samples);
236b8e80941Smrg		unsigned log_ps_iter_samples =
237b8e80941Smrg			util_logbase2(util_next_power_of_two(ps_iter_samples));
238b8e80941Smrg
239b8e80941Smrg		radeon_set_context_reg_seq(cs, CM_R_028BDC_PA_SC_LINE_CNTL, 2);
240b8e80941Smrg		radeon_emit(cs, sc_line_cntl |
241b8e80941Smrg			    S_028BDC_EXPAND_LINE_WIDTH(1)); /* CM_R_028BDC_PA_SC_LINE_CNTL */
242b8e80941Smrg		radeon_emit(cs, S_028BE0_MSAA_NUM_SAMPLES(log_samples) |
243b8e80941Smrg			    S_028BE0_MAX_SAMPLE_DIST(max_dist[log_samples]) |
244b8e80941Smrg			    S_028BE0_MSAA_EXPOSED_SAMPLES(log_samples)); /* CM_R_028BE0_PA_SC_AA_CONFIG */
245b8e80941Smrg
246b8e80941Smrg		if (nr_samples > 1) {
247b8e80941Smrg			radeon_set_context_reg(cs, CM_R_028804_DB_EQAA,
248b8e80941Smrg					       S_028804_MAX_ANCHOR_SAMPLES(log_samples) |
249b8e80941Smrg					       S_028804_PS_ITER_SAMPLES(log_ps_iter_samples) |
250b8e80941Smrg					       S_028804_MASK_EXPORT_NUM_SAMPLES(log_samples) |
251b8e80941Smrg					       S_028804_ALPHA_TO_MASK_NUM_SAMPLES(log_samples) |
252b8e80941Smrg					       S_028804_HIGH_QUALITY_INTERSECTIONS(1) |
253b8e80941Smrg					       S_028804_STATIC_ANCHOR_ASSOCIATIONS(1));
254b8e80941Smrg			radeon_set_context_reg(cs, EG_R_028A4C_PA_SC_MODE_CNTL_1,
255b8e80941Smrg					       EG_S_028A4C_PS_ITER_SAMPLE(ps_iter_samples > 1) |
256b8e80941Smrg					       sc_mode_cntl_1);
257b8e80941Smrg		} else if (overrast_samples > 1) {
258b8e80941Smrg			radeon_set_context_reg(cs, CM_R_028804_DB_EQAA,
259b8e80941Smrg					       S_028804_HIGH_QUALITY_INTERSECTIONS(1) |
260b8e80941Smrg					       S_028804_STATIC_ANCHOR_ASSOCIATIONS(1) |
261b8e80941Smrg					       S_028804_OVERRASTERIZATION_AMOUNT(log_samples));
262b8e80941Smrg			radeon_set_context_reg(cs, EG_R_028A4C_PA_SC_MODE_CNTL_1,
263b8e80941Smrg					       sc_mode_cntl_1);
264b8e80941Smrg		}
265b8e80941Smrg	} else {
266b8e80941Smrg		radeon_set_context_reg_seq(cs, CM_R_028BDC_PA_SC_LINE_CNTL, 2);
267b8e80941Smrg		radeon_emit(cs, sc_line_cntl); /* CM_R_028BDC_PA_SC_LINE_CNTL */
268b8e80941Smrg		radeon_emit(cs, 0); /* CM_R_028BE0_PA_SC_AA_CONFIG */
269b8e80941Smrg
270b8e80941Smrg		radeon_set_context_reg(cs, CM_R_028804_DB_EQAA,
271b8e80941Smrg				       S_028804_HIGH_QUALITY_INTERSECTIONS(1) |
272b8e80941Smrg				       S_028804_STATIC_ANCHOR_ASSOCIATIONS(1));
273b8e80941Smrg		radeon_set_context_reg(cs, EG_R_028A4C_PA_SC_MODE_CNTL_1,
274b8e80941Smrg				       sc_mode_cntl_1);
275b8e80941Smrg	}
276b8e80941Smrg}
277