1b8e80941Smrg/* 2b8e80941Smrg * Copyright 2014 Advanced Micro Devices, Inc. 3b8e80941Smrg * 4b8e80941Smrg * Permission is hereby granted, free of charge, to any person obtaining a 5b8e80941Smrg * copy of this software and associated documentation files (the "Software"), 6b8e80941Smrg * to deal in the Software without restriction, including without limitation 7b8e80941Smrg * the rights to use, copy, modify, merge, publish, distribute, sublicense, 8b8e80941Smrg * and/or sell copies of the Software, and to permit persons to whom the 9b8e80941Smrg * Software is furnished to do so, subject to the following conditions: 10b8e80941Smrg * 11b8e80941Smrg * The above copyright notice and this permission notice (including the next 12b8e80941Smrg * paragraph) shall be included in all copies or substantial portions of the 13b8e80941Smrg * Software. 14b8e80941Smrg * 15b8e80941Smrg * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR 16b8e80941Smrg * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, 17b8e80941Smrg * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL 18b8e80941Smrg * THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER 19b8e80941Smrg * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, 20b8e80941Smrg * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE 21b8e80941Smrg * SOFTWARE. 22b8e80941Smrg * 23b8e80941Smrg * Authors: Marek Olšák <maraeo@gmail.com> 24b8e80941Smrg * 25b8e80941Smrg */ 26b8e80941Smrg 27b8e80941Smrg#include "r600_cs.h" 28b8e80941Smrg#include "evergreend.h" 29b8e80941Smrg 30b8e80941Smrg/* 2xMSAA 31b8e80941Smrg * There are two locations (4, 4), (-4, -4). */ 32b8e80941Smrgconst uint32_t eg_sample_locs_2x[4] = { 33b8e80941Smrg FILL_SREG(4, 4, -4, -4, 4, 4, -4, -4), 34b8e80941Smrg FILL_SREG(4, 4, -4, -4, 4, 4, -4, -4), 35b8e80941Smrg FILL_SREG(4, 4, -4, -4, 4, 4, -4, -4), 36b8e80941Smrg FILL_SREG(4, 4, -4, -4, 4, 4, -4, -4), 37b8e80941Smrg}; 38b8e80941Smrgconst unsigned eg_max_dist_2x = 4; 39b8e80941Smrg/* 4xMSAA 40b8e80941Smrg * There are 4 locations: (-2, 6), (6, -2), (-6, 2), (2, 6). */ 41b8e80941Smrgconst uint32_t eg_sample_locs_4x[4] = { 42b8e80941Smrg FILL_SREG(-2, -6, 6, -2, -6, 2, 2, 6), 43b8e80941Smrg FILL_SREG(-2, -6, 6, -2, -6, 2, 2, 6), 44b8e80941Smrg FILL_SREG(-2, -6, 6, -2, -6, 2, 2, 6), 45b8e80941Smrg FILL_SREG(-2, -6, 6, -2, -6, 2, 2, 6), 46b8e80941Smrg}; 47b8e80941Smrgconst unsigned eg_max_dist_4x = 6; 48b8e80941Smrg 49b8e80941Smrg/* Cayman 8xMSAA */ 50b8e80941Smrgstatic const uint32_t cm_sample_locs_8x[] = { 51b8e80941Smrg FILL_SREG( 1, -3, -1, 3, 5, 1, -3, -5), 52b8e80941Smrg FILL_SREG( 1, -3, -1, 3, 5, 1, -3, -5), 53b8e80941Smrg FILL_SREG( 1, -3, -1, 3, 5, 1, -3, -5), 54b8e80941Smrg FILL_SREG( 1, -3, -1, 3, 5, 1, -3, -5), 55b8e80941Smrg FILL_SREG(-5, 5, -7, -1, 3, 7, 7, -7), 56b8e80941Smrg FILL_SREG(-5, 5, -7, -1, 3, 7, 7, -7), 57b8e80941Smrg FILL_SREG(-5, 5, -7, -1, 3, 7, 7, -7), 58b8e80941Smrg FILL_SREG(-5, 5, -7, -1, 3, 7, 7, -7), 59b8e80941Smrg}; 60b8e80941Smrgstatic const unsigned cm_max_dist_8x = 8; 61b8e80941Smrg/* Cayman 16xMSAA */ 62b8e80941Smrgstatic const uint32_t cm_sample_locs_16x[] = { 63b8e80941Smrg FILL_SREG( 1, 1, -1, -3, -3, 2, 4, -1), 64b8e80941Smrg FILL_SREG( 1, 1, -1, -3, -3, 2, 4, -1), 65b8e80941Smrg FILL_SREG( 1, 1, -1, -3, -3, 2, 4, -1), 66b8e80941Smrg FILL_SREG( 1, 1, -1, -3, -3, 2, 4, -1), 67b8e80941Smrg FILL_SREG(-5, -2, 2, 5, 5, 3, 3, -5), 68b8e80941Smrg FILL_SREG(-5, -2, 2, 5, 5, 3, 3, -5), 69b8e80941Smrg FILL_SREG(-5, -2, 2, 5, 5, 3, 3, -5), 70b8e80941Smrg FILL_SREG(-5, -2, 2, 5, 5, 3, 3, -5), 71b8e80941Smrg FILL_SREG(-2, 6, 0, -7, -4, -6, -6, 4), 72b8e80941Smrg FILL_SREG(-2, 6, 0, -7, -4, -6, -6, 4), 73b8e80941Smrg FILL_SREG(-2, 6, 0, -7, -4, -6, -6, 4), 74b8e80941Smrg FILL_SREG(-2, 6, 0, -7, -4, -6, -6, 4), 75b8e80941Smrg FILL_SREG(-8, 0, 7, -4, 6, 7, -7, -8), 76b8e80941Smrg FILL_SREG(-8, 0, 7, -4, 6, 7, -7, -8), 77b8e80941Smrg FILL_SREG(-8, 0, 7, -4, 6, 7, -7, -8), 78b8e80941Smrg FILL_SREG(-8, 0, 7, -4, 6, 7, -7, -8), 79b8e80941Smrg}; 80b8e80941Smrgstatic const unsigned cm_max_dist_16x = 8; 81b8e80941Smrg 82b8e80941Smrgvoid cayman_get_sample_position(struct pipe_context *ctx, unsigned sample_count, 83b8e80941Smrg unsigned sample_index, float *out_value) 84b8e80941Smrg{ 85b8e80941Smrg int offset, index; 86b8e80941Smrg struct { 87b8e80941Smrg int idx:4; 88b8e80941Smrg } val; 89b8e80941Smrg switch (sample_count) { 90b8e80941Smrg case 1: 91b8e80941Smrg default: 92b8e80941Smrg out_value[0] = out_value[1] = 0.5; 93b8e80941Smrg break; 94b8e80941Smrg case 2: 95b8e80941Smrg offset = 4 * (sample_index * 2); 96b8e80941Smrg val.idx = (eg_sample_locs_2x[0] >> offset) & 0xf; 97b8e80941Smrg out_value[0] = (float)(val.idx + 8) / 16.0f; 98b8e80941Smrg val.idx = (eg_sample_locs_2x[0] >> (offset + 4)) & 0xf; 99b8e80941Smrg out_value[1] = (float)(val.idx + 8) / 16.0f; 100b8e80941Smrg break; 101b8e80941Smrg case 4: 102b8e80941Smrg offset = 4 * (sample_index * 2); 103b8e80941Smrg val.idx = (eg_sample_locs_4x[0] >> offset) & 0xf; 104b8e80941Smrg out_value[0] = (float)(val.idx + 8) / 16.0f; 105b8e80941Smrg val.idx = (eg_sample_locs_4x[0] >> (offset + 4)) & 0xf; 106b8e80941Smrg out_value[1] = (float)(val.idx + 8) / 16.0f; 107b8e80941Smrg break; 108b8e80941Smrg case 8: 109b8e80941Smrg offset = 4 * (sample_index % 4 * 2); 110b8e80941Smrg index = (sample_index / 4) * 4; 111b8e80941Smrg val.idx = (cm_sample_locs_8x[index] >> offset) & 0xf; 112b8e80941Smrg out_value[0] = (float)(val.idx + 8) / 16.0f; 113b8e80941Smrg val.idx = (cm_sample_locs_8x[index] >> (offset + 4)) & 0xf; 114b8e80941Smrg out_value[1] = (float)(val.idx + 8) / 16.0f; 115b8e80941Smrg break; 116b8e80941Smrg case 16: 117b8e80941Smrg offset = 4 * (sample_index % 4 * 2); 118b8e80941Smrg index = (sample_index / 4) * 4; 119b8e80941Smrg val.idx = (cm_sample_locs_16x[index] >> offset) & 0xf; 120b8e80941Smrg out_value[0] = (float)(val.idx + 8) / 16.0f; 121b8e80941Smrg val.idx = (cm_sample_locs_16x[index] >> (offset + 4)) & 0xf; 122b8e80941Smrg out_value[1] = (float)(val.idx + 8) / 16.0f; 123b8e80941Smrg break; 124b8e80941Smrg } 125b8e80941Smrg} 126b8e80941Smrg 127b8e80941Smrgvoid cayman_init_msaa(struct pipe_context *ctx) 128b8e80941Smrg{ 129b8e80941Smrg struct r600_common_context *rctx = (struct r600_common_context*)ctx; 130b8e80941Smrg int i; 131b8e80941Smrg 132b8e80941Smrg cayman_get_sample_position(ctx, 1, 0, rctx->sample_locations_1x[0]); 133b8e80941Smrg 134b8e80941Smrg for (i = 0; i < 2; i++) 135b8e80941Smrg cayman_get_sample_position(ctx, 2, i, rctx->sample_locations_2x[i]); 136b8e80941Smrg for (i = 0; i < 4; i++) 137b8e80941Smrg cayman_get_sample_position(ctx, 4, i, rctx->sample_locations_4x[i]); 138b8e80941Smrg for (i = 0; i < 8; i++) 139b8e80941Smrg cayman_get_sample_position(ctx, 8, i, rctx->sample_locations_8x[i]); 140b8e80941Smrg for (i = 0; i < 16; i++) 141b8e80941Smrg cayman_get_sample_position(ctx, 16, i, rctx->sample_locations_16x[i]); 142b8e80941Smrg} 143b8e80941Smrg 144b8e80941Smrgstatic void cayman_emit_msaa_sample_locs(struct radeon_cmdbuf *cs, int nr_samples) 145b8e80941Smrg{ 146b8e80941Smrg switch (nr_samples) { 147b8e80941Smrg default: 148b8e80941Smrg case 1: 149b8e80941Smrg radeon_set_context_reg(cs, CM_R_028BF8_PA_SC_AA_SAMPLE_LOCS_PIXEL_X0Y0_0, 0); 150b8e80941Smrg radeon_set_context_reg(cs, CM_R_028C08_PA_SC_AA_SAMPLE_LOCS_PIXEL_X1Y0_0, 0); 151b8e80941Smrg radeon_set_context_reg(cs, CM_R_028C18_PA_SC_AA_SAMPLE_LOCS_PIXEL_X0Y1_0, 0); 152b8e80941Smrg radeon_set_context_reg(cs, CM_R_028C28_PA_SC_AA_SAMPLE_LOCS_PIXEL_X1Y1_0, 0); 153b8e80941Smrg break; 154b8e80941Smrg case 2: 155b8e80941Smrg radeon_set_context_reg(cs, CM_R_028BF8_PA_SC_AA_SAMPLE_LOCS_PIXEL_X0Y0_0, eg_sample_locs_2x[0]); 156b8e80941Smrg radeon_set_context_reg(cs, CM_R_028C08_PA_SC_AA_SAMPLE_LOCS_PIXEL_X1Y0_0, eg_sample_locs_2x[1]); 157b8e80941Smrg radeon_set_context_reg(cs, CM_R_028C18_PA_SC_AA_SAMPLE_LOCS_PIXEL_X0Y1_0, eg_sample_locs_2x[2]); 158b8e80941Smrg radeon_set_context_reg(cs, CM_R_028C28_PA_SC_AA_SAMPLE_LOCS_PIXEL_X1Y1_0, eg_sample_locs_2x[3]); 159b8e80941Smrg break; 160b8e80941Smrg case 4: 161b8e80941Smrg radeon_set_context_reg(cs, CM_R_028BF8_PA_SC_AA_SAMPLE_LOCS_PIXEL_X0Y0_0, eg_sample_locs_4x[0]); 162b8e80941Smrg radeon_set_context_reg(cs, CM_R_028C08_PA_SC_AA_SAMPLE_LOCS_PIXEL_X1Y0_0, eg_sample_locs_4x[1]); 163b8e80941Smrg radeon_set_context_reg(cs, CM_R_028C18_PA_SC_AA_SAMPLE_LOCS_PIXEL_X0Y1_0, eg_sample_locs_4x[2]); 164b8e80941Smrg radeon_set_context_reg(cs, CM_R_028C28_PA_SC_AA_SAMPLE_LOCS_PIXEL_X1Y1_0, eg_sample_locs_4x[3]); 165b8e80941Smrg break; 166b8e80941Smrg case 8: 167b8e80941Smrg radeon_set_context_reg_seq(cs, CM_R_028BF8_PA_SC_AA_SAMPLE_LOCS_PIXEL_X0Y0_0, 14); 168b8e80941Smrg radeon_emit(cs, cm_sample_locs_8x[0]); 169b8e80941Smrg radeon_emit(cs, cm_sample_locs_8x[4]); 170b8e80941Smrg radeon_emit(cs, 0); 171b8e80941Smrg radeon_emit(cs, 0); 172b8e80941Smrg radeon_emit(cs, cm_sample_locs_8x[1]); 173b8e80941Smrg radeon_emit(cs, cm_sample_locs_8x[5]); 174b8e80941Smrg radeon_emit(cs, 0); 175b8e80941Smrg radeon_emit(cs, 0); 176b8e80941Smrg radeon_emit(cs, cm_sample_locs_8x[2]); 177b8e80941Smrg radeon_emit(cs, cm_sample_locs_8x[6]); 178b8e80941Smrg radeon_emit(cs, 0); 179b8e80941Smrg radeon_emit(cs, 0); 180b8e80941Smrg radeon_emit(cs, cm_sample_locs_8x[3]); 181b8e80941Smrg radeon_emit(cs, cm_sample_locs_8x[7]); 182b8e80941Smrg break; 183b8e80941Smrg case 16: 184b8e80941Smrg radeon_set_context_reg_seq(cs, CM_R_028BF8_PA_SC_AA_SAMPLE_LOCS_PIXEL_X0Y0_0, 16); 185b8e80941Smrg radeon_emit(cs, cm_sample_locs_16x[0]); 186b8e80941Smrg radeon_emit(cs, cm_sample_locs_16x[4]); 187b8e80941Smrg radeon_emit(cs, cm_sample_locs_16x[8]); 188b8e80941Smrg radeon_emit(cs, cm_sample_locs_16x[12]); 189b8e80941Smrg radeon_emit(cs, cm_sample_locs_16x[1]); 190b8e80941Smrg radeon_emit(cs, cm_sample_locs_16x[5]); 191b8e80941Smrg radeon_emit(cs, cm_sample_locs_16x[9]); 192b8e80941Smrg radeon_emit(cs, cm_sample_locs_16x[13]); 193b8e80941Smrg radeon_emit(cs, cm_sample_locs_16x[2]); 194b8e80941Smrg radeon_emit(cs, cm_sample_locs_16x[6]); 195b8e80941Smrg radeon_emit(cs, cm_sample_locs_16x[10]); 196b8e80941Smrg radeon_emit(cs, cm_sample_locs_16x[14]); 197b8e80941Smrg radeon_emit(cs, cm_sample_locs_16x[3]); 198b8e80941Smrg radeon_emit(cs, cm_sample_locs_16x[7]); 199b8e80941Smrg radeon_emit(cs, cm_sample_locs_16x[11]); 200b8e80941Smrg radeon_emit(cs, cm_sample_locs_16x[15]); 201b8e80941Smrg break; 202b8e80941Smrg } 203b8e80941Smrg} 204b8e80941Smrg 205b8e80941Smrgvoid cayman_emit_msaa_state(struct radeon_cmdbuf *cs, int nr_samples, 206b8e80941Smrg int ps_iter_samples, int overrast_samples) 207b8e80941Smrg{ 208b8e80941Smrg int setup_samples = nr_samples > 1 ? nr_samples : 209b8e80941Smrg overrast_samples > 1 ? overrast_samples : 0; 210b8e80941Smrg /* Required by OpenGL line rasterization. 211b8e80941Smrg * 212b8e80941Smrg * TODO: We should also enable perpendicular endcaps for AA lines, 213b8e80941Smrg * but that requires implementing line stippling in the pixel 214b8e80941Smrg * shader. SC can only do line stippling with axis-aligned 215b8e80941Smrg * endcaps. 216b8e80941Smrg */ 217b8e80941Smrg unsigned sc_line_cntl = S_028BDC_DX10_DIAMOND_TEST_ENA(1); 218b8e80941Smrg unsigned sc_mode_cntl_1 = 219b8e80941Smrg EG_S_028A4C_FORCE_EOV_CNTDWN_ENABLE(1) | 220b8e80941Smrg EG_S_028A4C_FORCE_EOV_REZ_ENABLE(1); 221b8e80941Smrg 222b8e80941Smrg if (nr_samples > 1) { 223b8e80941Smrg cayman_emit_msaa_sample_locs(cs, nr_samples); 224b8e80941Smrg } 225b8e80941Smrg 226b8e80941Smrg if (setup_samples > 1) { 227b8e80941Smrg /* indexed by log2(nr_samples) */ 228b8e80941Smrg const unsigned max_dist[] = { 229b8e80941Smrg 0, 230b8e80941Smrg eg_max_dist_2x, 231b8e80941Smrg eg_max_dist_4x, 232b8e80941Smrg cm_max_dist_8x, 233b8e80941Smrg cm_max_dist_16x 234b8e80941Smrg }; 235b8e80941Smrg unsigned log_samples = util_logbase2(setup_samples); 236b8e80941Smrg unsigned log_ps_iter_samples = 237b8e80941Smrg util_logbase2(util_next_power_of_two(ps_iter_samples)); 238b8e80941Smrg 239b8e80941Smrg radeon_set_context_reg_seq(cs, CM_R_028BDC_PA_SC_LINE_CNTL, 2); 240b8e80941Smrg radeon_emit(cs, sc_line_cntl | 241b8e80941Smrg S_028BDC_EXPAND_LINE_WIDTH(1)); /* CM_R_028BDC_PA_SC_LINE_CNTL */ 242b8e80941Smrg radeon_emit(cs, S_028BE0_MSAA_NUM_SAMPLES(log_samples) | 243b8e80941Smrg S_028BE0_MAX_SAMPLE_DIST(max_dist[log_samples]) | 244b8e80941Smrg S_028BE0_MSAA_EXPOSED_SAMPLES(log_samples)); /* CM_R_028BE0_PA_SC_AA_CONFIG */ 245b8e80941Smrg 246b8e80941Smrg if (nr_samples > 1) { 247b8e80941Smrg radeon_set_context_reg(cs, CM_R_028804_DB_EQAA, 248b8e80941Smrg S_028804_MAX_ANCHOR_SAMPLES(log_samples) | 249b8e80941Smrg S_028804_PS_ITER_SAMPLES(log_ps_iter_samples) | 250b8e80941Smrg S_028804_MASK_EXPORT_NUM_SAMPLES(log_samples) | 251b8e80941Smrg S_028804_ALPHA_TO_MASK_NUM_SAMPLES(log_samples) | 252b8e80941Smrg S_028804_HIGH_QUALITY_INTERSECTIONS(1) | 253b8e80941Smrg S_028804_STATIC_ANCHOR_ASSOCIATIONS(1)); 254b8e80941Smrg radeon_set_context_reg(cs, EG_R_028A4C_PA_SC_MODE_CNTL_1, 255b8e80941Smrg EG_S_028A4C_PS_ITER_SAMPLE(ps_iter_samples > 1) | 256b8e80941Smrg sc_mode_cntl_1); 257b8e80941Smrg } else if (overrast_samples > 1) { 258b8e80941Smrg radeon_set_context_reg(cs, CM_R_028804_DB_EQAA, 259b8e80941Smrg S_028804_HIGH_QUALITY_INTERSECTIONS(1) | 260b8e80941Smrg S_028804_STATIC_ANCHOR_ASSOCIATIONS(1) | 261b8e80941Smrg S_028804_OVERRASTERIZATION_AMOUNT(log_samples)); 262b8e80941Smrg radeon_set_context_reg(cs, EG_R_028A4C_PA_SC_MODE_CNTL_1, 263b8e80941Smrg sc_mode_cntl_1); 264b8e80941Smrg } 265b8e80941Smrg } else { 266b8e80941Smrg radeon_set_context_reg_seq(cs, CM_R_028BDC_PA_SC_LINE_CNTL, 2); 267b8e80941Smrg radeon_emit(cs, sc_line_cntl); /* CM_R_028BDC_PA_SC_LINE_CNTL */ 268b8e80941Smrg radeon_emit(cs, 0); /* CM_R_028BE0_PA_SC_AA_CONFIG */ 269b8e80941Smrg 270b8e80941Smrg radeon_set_context_reg(cs, CM_R_028804_DB_EQAA, 271b8e80941Smrg S_028804_HIGH_QUALITY_INTERSECTIONS(1) | 272b8e80941Smrg S_028804_STATIC_ANCHOR_ASSOCIATIONS(1)); 273b8e80941Smrg radeon_set_context_reg(cs, EG_R_028A4C_PA_SC_MODE_CNTL_1, 274b8e80941Smrg sc_mode_cntl_1); 275b8e80941Smrg } 276b8e80941Smrg} 277