101e04c3fSmrg/* 201e04c3fSmrg * Copyright 2014 Advanced Micro Devices, Inc. 301e04c3fSmrg * 401e04c3fSmrg * Permission is hereby granted, free of charge, to any person obtaining a 501e04c3fSmrg * copy of this software and associated documentation files (the "Software"), 601e04c3fSmrg * to deal in the Software without restriction, including without limitation 701e04c3fSmrg * the rights to use, copy, modify, merge, publish, distribute, sublicense, 801e04c3fSmrg * and/or sell copies of the Software, and to permit persons to whom the 901e04c3fSmrg * Software is furnished to do so, subject to the following conditions: 1001e04c3fSmrg * 1101e04c3fSmrg * The above copyright notice and this permission notice (including the next 1201e04c3fSmrg * paragraph) shall be included in all copies or substantial portions of the 1301e04c3fSmrg * Software. 1401e04c3fSmrg * 1501e04c3fSmrg * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR 1601e04c3fSmrg * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, 1701e04c3fSmrg * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL 1801e04c3fSmrg * THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER 1901e04c3fSmrg * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, 2001e04c3fSmrg * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE 2101e04c3fSmrg * SOFTWARE. 2201e04c3fSmrg * 2301e04c3fSmrg * Authors: Marek Olšák <maraeo@gmail.com> 2401e04c3fSmrg * 2501e04c3fSmrg */ 2601e04c3fSmrg 2701e04c3fSmrg#include "r600_cs.h" 2801e04c3fSmrg#include "evergreend.h" 2901e04c3fSmrg 3001e04c3fSmrg/* 2xMSAA 3101e04c3fSmrg * There are two locations (4, 4), (-4, -4). */ 3201e04c3fSmrgconst uint32_t eg_sample_locs_2x[4] = { 3301e04c3fSmrg FILL_SREG(4, 4, -4, -4, 4, 4, -4, -4), 3401e04c3fSmrg FILL_SREG(4, 4, -4, -4, 4, 4, -4, -4), 3501e04c3fSmrg FILL_SREG(4, 4, -4, -4, 4, 4, -4, -4), 3601e04c3fSmrg FILL_SREG(4, 4, -4, -4, 4, 4, -4, -4), 3701e04c3fSmrg}; 3801e04c3fSmrgconst unsigned eg_max_dist_2x = 4; 3901e04c3fSmrg/* 4xMSAA 4001e04c3fSmrg * There are 4 locations: (-2, 6), (6, -2), (-6, 2), (2, 6). */ 4101e04c3fSmrgconst uint32_t eg_sample_locs_4x[4] = { 4201e04c3fSmrg FILL_SREG(-2, -6, 6, -2, -6, 2, 2, 6), 4301e04c3fSmrg FILL_SREG(-2, -6, 6, -2, -6, 2, 2, 6), 4401e04c3fSmrg FILL_SREG(-2, -6, 6, -2, -6, 2, 2, 6), 4501e04c3fSmrg FILL_SREG(-2, -6, 6, -2, -6, 2, 2, 6), 4601e04c3fSmrg}; 4701e04c3fSmrgconst unsigned eg_max_dist_4x = 6; 4801e04c3fSmrg 4901e04c3fSmrg/* Cayman 8xMSAA */ 5001e04c3fSmrgstatic const uint32_t cm_sample_locs_8x[] = { 5101e04c3fSmrg FILL_SREG( 1, -3, -1, 3, 5, 1, -3, -5), 5201e04c3fSmrg FILL_SREG( 1, -3, -1, 3, 5, 1, -3, -5), 5301e04c3fSmrg FILL_SREG( 1, -3, -1, 3, 5, 1, -3, -5), 5401e04c3fSmrg FILL_SREG( 1, -3, -1, 3, 5, 1, -3, -5), 5501e04c3fSmrg FILL_SREG(-5, 5, -7, -1, 3, 7, 7, -7), 5601e04c3fSmrg FILL_SREG(-5, 5, -7, -1, 3, 7, 7, -7), 5701e04c3fSmrg FILL_SREG(-5, 5, -7, -1, 3, 7, 7, -7), 5801e04c3fSmrg FILL_SREG(-5, 5, -7, -1, 3, 7, 7, -7), 5901e04c3fSmrg}; 6001e04c3fSmrgstatic const unsigned cm_max_dist_8x = 8; 6101e04c3fSmrg/* Cayman 16xMSAA */ 6201e04c3fSmrgstatic const uint32_t cm_sample_locs_16x[] = { 6301e04c3fSmrg FILL_SREG( 1, 1, -1, -3, -3, 2, 4, -1), 6401e04c3fSmrg FILL_SREG( 1, 1, -1, -3, -3, 2, 4, -1), 6501e04c3fSmrg FILL_SREG( 1, 1, -1, -3, -3, 2, 4, -1), 6601e04c3fSmrg FILL_SREG( 1, 1, -1, -3, -3, 2, 4, -1), 6701e04c3fSmrg FILL_SREG(-5, -2, 2, 5, 5, 3, 3, -5), 6801e04c3fSmrg FILL_SREG(-5, -2, 2, 5, 5, 3, 3, -5), 6901e04c3fSmrg FILL_SREG(-5, -2, 2, 5, 5, 3, 3, -5), 7001e04c3fSmrg FILL_SREG(-5, -2, 2, 5, 5, 3, 3, -5), 7101e04c3fSmrg FILL_SREG(-2, 6, 0, -7, -4, -6, -6, 4), 7201e04c3fSmrg FILL_SREG(-2, 6, 0, -7, -4, -6, -6, 4), 7301e04c3fSmrg FILL_SREG(-2, 6, 0, -7, -4, -6, -6, 4), 7401e04c3fSmrg FILL_SREG(-2, 6, 0, -7, -4, -6, -6, 4), 7501e04c3fSmrg FILL_SREG(-8, 0, 7, -4, 6, 7, -7, -8), 7601e04c3fSmrg FILL_SREG(-8, 0, 7, -4, 6, 7, -7, -8), 7701e04c3fSmrg FILL_SREG(-8, 0, 7, -4, 6, 7, -7, -8), 7801e04c3fSmrg FILL_SREG(-8, 0, 7, -4, 6, 7, -7, -8), 7901e04c3fSmrg}; 8001e04c3fSmrgstatic const unsigned cm_max_dist_16x = 8; 8101e04c3fSmrg 8201e04c3fSmrgvoid cayman_get_sample_position(struct pipe_context *ctx, unsigned sample_count, 8301e04c3fSmrg unsigned sample_index, float *out_value) 8401e04c3fSmrg{ 8501e04c3fSmrg int offset, index; 8601e04c3fSmrg struct { 8701e04c3fSmrg int idx:4; 8801e04c3fSmrg } val; 8901e04c3fSmrg switch (sample_count) { 9001e04c3fSmrg case 1: 9101e04c3fSmrg default: 9201e04c3fSmrg out_value[0] = out_value[1] = 0.5; 9301e04c3fSmrg break; 9401e04c3fSmrg case 2: 9501e04c3fSmrg offset = 4 * (sample_index * 2); 9601e04c3fSmrg val.idx = (eg_sample_locs_2x[0] >> offset) & 0xf; 9701e04c3fSmrg out_value[0] = (float)(val.idx + 8) / 16.0f; 9801e04c3fSmrg val.idx = (eg_sample_locs_2x[0] >> (offset + 4)) & 0xf; 9901e04c3fSmrg out_value[1] = (float)(val.idx + 8) / 16.0f; 10001e04c3fSmrg break; 10101e04c3fSmrg case 4: 10201e04c3fSmrg offset = 4 * (sample_index * 2); 10301e04c3fSmrg val.idx = (eg_sample_locs_4x[0] >> offset) & 0xf; 10401e04c3fSmrg out_value[0] = (float)(val.idx + 8) / 16.0f; 10501e04c3fSmrg val.idx = (eg_sample_locs_4x[0] >> (offset + 4)) & 0xf; 10601e04c3fSmrg out_value[1] = (float)(val.idx + 8) / 16.0f; 10701e04c3fSmrg break; 10801e04c3fSmrg case 8: 10901e04c3fSmrg offset = 4 * (sample_index % 4 * 2); 11001e04c3fSmrg index = (sample_index / 4) * 4; 11101e04c3fSmrg val.idx = (cm_sample_locs_8x[index] >> offset) & 0xf; 11201e04c3fSmrg out_value[0] = (float)(val.idx + 8) / 16.0f; 11301e04c3fSmrg val.idx = (cm_sample_locs_8x[index] >> (offset + 4)) & 0xf; 11401e04c3fSmrg out_value[1] = (float)(val.idx + 8) / 16.0f; 11501e04c3fSmrg break; 11601e04c3fSmrg case 16: 11701e04c3fSmrg offset = 4 * (sample_index % 4 * 2); 11801e04c3fSmrg index = (sample_index / 4) * 4; 11901e04c3fSmrg val.idx = (cm_sample_locs_16x[index] >> offset) & 0xf; 12001e04c3fSmrg out_value[0] = (float)(val.idx + 8) / 16.0f; 12101e04c3fSmrg val.idx = (cm_sample_locs_16x[index] >> (offset + 4)) & 0xf; 12201e04c3fSmrg out_value[1] = (float)(val.idx + 8) / 16.0f; 12301e04c3fSmrg break; 12401e04c3fSmrg } 12501e04c3fSmrg} 12601e04c3fSmrg 12701e04c3fSmrgvoid cayman_init_msaa(struct pipe_context *ctx) 12801e04c3fSmrg{ 12901e04c3fSmrg struct r600_common_context *rctx = (struct r600_common_context*)ctx; 13001e04c3fSmrg int i; 13101e04c3fSmrg 13201e04c3fSmrg cayman_get_sample_position(ctx, 1, 0, rctx->sample_locations_1x[0]); 13301e04c3fSmrg 13401e04c3fSmrg for (i = 0; i < 2; i++) 13501e04c3fSmrg cayman_get_sample_position(ctx, 2, i, rctx->sample_locations_2x[i]); 13601e04c3fSmrg for (i = 0; i < 4; i++) 13701e04c3fSmrg cayman_get_sample_position(ctx, 4, i, rctx->sample_locations_4x[i]); 13801e04c3fSmrg for (i = 0; i < 8; i++) 13901e04c3fSmrg cayman_get_sample_position(ctx, 8, i, rctx->sample_locations_8x[i]); 14001e04c3fSmrg for (i = 0; i < 16; i++) 14101e04c3fSmrg cayman_get_sample_position(ctx, 16, i, rctx->sample_locations_16x[i]); 14201e04c3fSmrg} 14301e04c3fSmrg 14401e04c3fSmrgstatic void cayman_emit_msaa_sample_locs(struct radeon_cmdbuf *cs, int nr_samples) 14501e04c3fSmrg{ 14601e04c3fSmrg switch (nr_samples) { 14701e04c3fSmrg default: 14801e04c3fSmrg case 1: 14901e04c3fSmrg radeon_set_context_reg(cs, CM_R_028BF8_PA_SC_AA_SAMPLE_LOCS_PIXEL_X0Y0_0, 0); 15001e04c3fSmrg radeon_set_context_reg(cs, CM_R_028C08_PA_SC_AA_SAMPLE_LOCS_PIXEL_X1Y0_0, 0); 15101e04c3fSmrg radeon_set_context_reg(cs, CM_R_028C18_PA_SC_AA_SAMPLE_LOCS_PIXEL_X0Y1_0, 0); 15201e04c3fSmrg radeon_set_context_reg(cs, CM_R_028C28_PA_SC_AA_SAMPLE_LOCS_PIXEL_X1Y1_0, 0); 15301e04c3fSmrg break; 15401e04c3fSmrg case 2: 15501e04c3fSmrg radeon_set_context_reg(cs, CM_R_028BF8_PA_SC_AA_SAMPLE_LOCS_PIXEL_X0Y0_0, eg_sample_locs_2x[0]); 15601e04c3fSmrg radeon_set_context_reg(cs, CM_R_028C08_PA_SC_AA_SAMPLE_LOCS_PIXEL_X1Y0_0, eg_sample_locs_2x[1]); 15701e04c3fSmrg radeon_set_context_reg(cs, CM_R_028C18_PA_SC_AA_SAMPLE_LOCS_PIXEL_X0Y1_0, eg_sample_locs_2x[2]); 15801e04c3fSmrg radeon_set_context_reg(cs, CM_R_028C28_PA_SC_AA_SAMPLE_LOCS_PIXEL_X1Y1_0, eg_sample_locs_2x[3]); 15901e04c3fSmrg break; 16001e04c3fSmrg case 4: 16101e04c3fSmrg radeon_set_context_reg(cs, CM_R_028BF8_PA_SC_AA_SAMPLE_LOCS_PIXEL_X0Y0_0, eg_sample_locs_4x[0]); 16201e04c3fSmrg radeon_set_context_reg(cs, CM_R_028C08_PA_SC_AA_SAMPLE_LOCS_PIXEL_X1Y0_0, eg_sample_locs_4x[1]); 16301e04c3fSmrg radeon_set_context_reg(cs, CM_R_028C18_PA_SC_AA_SAMPLE_LOCS_PIXEL_X0Y1_0, eg_sample_locs_4x[2]); 16401e04c3fSmrg radeon_set_context_reg(cs, CM_R_028C28_PA_SC_AA_SAMPLE_LOCS_PIXEL_X1Y1_0, eg_sample_locs_4x[3]); 16501e04c3fSmrg break; 16601e04c3fSmrg case 8: 16701e04c3fSmrg radeon_set_context_reg_seq(cs, CM_R_028BF8_PA_SC_AA_SAMPLE_LOCS_PIXEL_X0Y0_0, 14); 16801e04c3fSmrg radeon_emit(cs, cm_sample_locs_8x[0]); 16901e04c3fSmrg radeon_emit(cs, cm_sample_locs_8x[4]); 17001e04c3fSmrg radeon_emit(cs, 0); 17101e04c3fSmrg radeon_emit(cs, 0); 17201e04c3fSmrg radeon_emit(cs, cm_sample_locs_8x[1]); 17301e04c3fSmrg radeon_emit(cs, cm_sample_locs_8x[5]); 17401e04c3fSmrg radeon_emit(cs, 0); 17501e04c3fSmrg radeon_emit(cs, 0); 17601e04c3fSmrg radeon_emit(cs, cm_sample_locs_8x[2]); 17701e04c3fSmrg radeon_emit(cs, cm_sample_locs_8x[6]); 17801e04c3fSmrg radeon_emit(cs, 0); 17901e04c3fSmrg radeon_emit(cs, 0); 18001e04c3fSmrg radeon_emit(cs, cm_sample_locs_8x[3]); 18101e04c3fSmrg radeon_emit(cs, cm_sample_locs_8x[7]); 18201e04c3fSmrg break; 18301e04c3fSmrg case 16: 18401e04c3fSmrg radeon_set_context_reg_seq(cs, CM_R_028BF8_PA_SC_AA_SAMPLE_LOCS_PIXEL_X0Y0_0, 16); 18501e04c3fSmrg radeon_emit(cs, cm_sample_locs_16x[0]); 18601e04c3fSmrg radeon_emit(cs, cm_sample_locs_16x[4]); 18701e04c3fSmrg radeon_emit(cs, cm_sample_locs_16x[8]); 18801e04c3fSmrg radeon_emit(cs, cm_sample_locs_16x[12]); 18901e04c3fSmrg radeon_emit(cs, cm_sample_locs_16x[1]); 19001e04c3fSmrg radeon_emit(cs, cm_sample_locs_16x[5]); 19101e04c3fSmrg radeon_emit(cs, cm_sample_locs_16x[9]); 19201e04c3fSmrg radeon_emit(cs, cm_sample_locs_16x[13]); 19301e04c3fSmrg radeon_emit(cs, cm_sample_locs_16x[2]); 19401e04c3fSmrg radeon_emit(cs, cm_sample_locs_16x[6]); 19501e04c3fSmrg radeon_emit(cs, cm_sample_locs_16x[10]); 19601e04c3fSmrg radeon_emit(cs, cm_sample_locs_16x[14]); 19701e04c3fSmrg radeon_emit(cs, cm_sample_locs_16x[3]); 19801e04c3fSmrg radeon_emit(cs, cm_sample_locs_16x[7]); 19901e04c3fSmrg radeon_emit(cs, cm_sample_locs_16x[11]); 20001e04c3fSmrg radeon_emit(cs, cm_sample_locs_16x[15]); 20101e04c3fSmrg break; 20201e04c3fSmrg } 20301e04c3fSmrg} 20401e04c3fSmrg 20501e04c3fSmrgvoid cayman_emit_msaa_state(struct radeon_cmdbuf *cs, int nr_samples, 20601e04c3fSmrg int ps_iter_samples, int overrast_samples) 20701e04c3fSmrg{ 20801e04c3fSmrg int setup_samples = nr_samples > 1 ? nr_samples : 20901e04c3fSmrg overrast_samples > 1 ? overrast_samples : 0; 21001e04c3fSmrg /* Required by OpenGL line rasterization. 21101e04c3fSmrg * 21201e04c3fSmrg * TODO: We should also enable perpendicular endcaps for AA lines, 21301e04c3fSmrg * but that requires implementing line stippling in the pixel 21401e04c3fSmrg * shader. SC can only do line stippling with axis-aligned 21501e04c3fSmrg * endcaps. 21601e04c3fSmrg */ 21701e04c3fSmrg unsigned sc_line_cntl = S_028BDC_DX10_DIAMOND_TEST_ENA(1); 21801e04c3fSmrg unsigned sc_mode_cntl_1 = 21901e04c3fSmrg EG_S_028A4C_FORCE_EOV_CNTDWN_ENABLE(1) | 22001e04c3fSmrg EG_S_028A4C_FORCE_EOV_REZ_ENABLE(1); 22101e04c3fSmrg 22201e04c3fSmrg if (nr_samples > 1) { 22301e04c3fSmrg cayman_emit_msaa_sample_locs(cs, nr_samples); 22401e04c3fSmrg } 22501e04c3fSmrg 22601e04c3fSmrg if (setup_samples > 1) { 22701e04c3fSmrg /* indexed by log2(nr_samples) */ 22801e04c3fSmrg const unsigned max_dist[] = { 22901e04c3fSmrg 0, 23001e04c3fSmrg eg_max_dist_2x, 23101e04c3fSmrg eg_max_dist_4x, 23201e04c3fSmrg cm_max_dist_8x, 23301e04c3fSmrg cm_max_dist_16x 23401e04c3fSmrg }; 23501e04c3fSmrg unsigned log_samples = util_logbase2(setup_samples); 23601e04c3fSmrg unsigned log_ps_iter_samples = 23701e04c3fSmrg util_logbase2(util_next_power_of_two(ps_iter_samples)); 23801e04c3fSmrg 23901e04c3fSmrg radeon_set_context_reg_seq(cs, CM_R_028BDC_PA_SC_LINE_CNTL, 2); 24001e04c3fSmrg radeon_emit(cs, sc_line_cntl | 24101e04c3fSmrg S_028BDC_EXPAND_LINE_WIDTH(1)); /* CM_R_028BDC_PA_SC_LINE_CNTL */ 24201e04c3fSmrg radeon_emit(cs, S_028BE0_MSAA_NUM_SAMPLES(log_samples) | 24301e04c3fSmrg S_028BE0_MAX_SAMPLE_DIST(max_dist[log_samples]) | 24401e04c3fSmrg S_028BE0_MSAA_EXPOSED_SAMPLES(log_samples)); /* CM_R_028BE0_PA_SC_AA_CONFIG */ 24501e04c3fSmrg 24601e04c3fSmrg if (nr_samples > 1) { 24701e04c3fSmrg radeon_set_context_reg(cs, CM_R_028804_DB_EQAA, 24801e04c3fSmrg S_028804_MAX_ANCHOR_SAMPLES(log_samples) | 24901e04c3fSmrg S_028804_PS_ITER_SAMPLES(log_ps_iter_samples) | 25001e04c3fSmrg S_028804_MASK_EXPORT_NUM_SAMPLES(log_samples) | 25101e04c3fSmrg S_028804_ALPHA_TO_MASK_NUM_SAMPLES(log_samples) | 25201e04c3fSmrg S_028804_HIGH_QUALITY_INTERSECTIONS(1) | 25301e04c3fSmrg S_028804_STATIC_ANCHOR_ASSOCIATIONS(1)); 25401e04c3fSmrg radeon_set_context_reg(cs, EG_R_028A4C_PA_SC_MODE_CNTL_1, 25501e04c3fSmrg EG_S_028A4C_PS_ITER_SAMPLE(ps_iter_samples > 1) | 25601e04c3fSmrg sc_mode_cntl_1); 25701e04c3fSmrg } else if (overrast_samples > 1) { 25801e04c3fSmrg radeon_set_context_reg(cs, CM_R_028804_DB_EQAA, 25901e04c3fSmrg S_028804_HIGH_QUALITY_INTERSECTIONS(1) | 26001e04c3fSmrg S_028804_STATIC_ANCHOR_ASSOCIATIONS(1) | 26101e04c3fSmrg S_028804_OVERRASTERIZATION_AMOUNT(log_samples)); 26201e04c3fSmrg radeon_set_context_reg(cs, EG_R_028A4C_PA_SC_MODE_CNTL_1, 26301e04c3fSmrg sc_mode_cntl_1); 26401e04c3fSmrg } 26501e04c3fSmrg } else { 26601e04c3fSmrg radeon_set_context_reg_seq(cs, CM_R_028BDC_PA_SC_LINE_CNTL, 2); 26701e04c3fSmrg radeon_emit(cs, sc_line_cntl); /* CM_R_028BDC_PA_SC_LINE_CNTL */ 26801e04c3fSmrg radeon_emit(cs, 0); /* CM_R_028BE0_PA_SC_AA_CONFIG */ 26901e04c3fSmrg 27001e04c3fSmrg radeon_set_context_reg(cs, CM_R_028804_DB_EQAA, 27101e04c3fSmrg S_028804_HIGH_QUALITY_INTERSECTIONS(1) | 27201e04c3fSmrg S_028804_STATIC_ANCHOR_ASSOCIATIONS(1)); 27301e04c3fSmrg radeon_set_context_reg(cs, EG_R_028A4C_PA_SC_MODE_CNTL_1, 27401e04c3fSmrg sc_mode_cntl_1); 27501e04c3fSmrg } 27601e04c3fSmrg} 277