1/* 2 * Copyright 2014 Advanced Micro Devices, Inc. 3 * All Rights Reserved. 4 * 5 * Permission is hereby granted, free of charge, to any person obtaining a 6 * copy of this software and associated documentation files (the "Software"), 7 * to deal in the Software without restriction, including without limitation 8 * the rights to use, copy, modify, merge, publish, distribute, sublicense, 9 * and/or sell copies of the Software, and to permit persons to whom the 10 * Software is furnished to do so, subject to the following conditions: 11 * 12 * The above copyright notice and this permission notice (including the next 13 * paragraph) shall be included in all copies or substantial portions of the 14 * Software. 15 * 16 * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR 17 * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, 18 * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL 19 * THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER 20 * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, 21 * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE 22 * SOFTWARE. 23 */ 24 25#include "si_build_pm4.h" 26 27/* For MSAA sample positions. */ 28#define FILL_SREG(s0x, s0y, s1x, s1y, s2x, s2y, s3x, s3y) \ 29 ((((unsigned)(s0x) & 0xf) << 0) | (((unsigned)(s0y) & 0xf) << 4) | \ 30 (((unsigned)(s1x) & 0xf) << 8) | (((unsigned)(s1y) & 0xf) << 12) | \ 31 (((unsigned)(s2x) & 0xf) << 16) | (((unsigned)(s2y) & 0xf) << 20) | \ 32 (((unsigned)(s3x) & 0xf) << 24) | (((unsigned)(s3y) & 0xf) << 28)) 33 34/* For obtaining location coordinates from registers */ 35#define SEXT4(x) ((int)((x) | ((x) & 0x8 ? 0xfffffff0 : 0))) 36#define GET_SFIELD(reg, index) SEXT4(((reg) >> ((index) * 4)) & 0xf) 37#define GET_SX(reg, index) GET_SFIELD((reg)[(index) / 4], ((index) % 4) * 2) 38#define GET_SY(reg, index) GET_SFIELD((reg)[(index) / 4], ((index) % 4) * 2 + 1) 39 40/* The following sample ordering is required by EQAA. 41 * 42 * Sample 0 is approx. in the top-left quadrant. 43 * Sample 1 is approx. in the bottom-right quadrant. 44 * 45 * Sample 2 is approx. in the bottom-left quadrant. 46 * Sample 3 is approx. in the top-right quadrant. 47 * (sample I={2,3} adds more detail to the vicinity of sample I-2) 48 * 49 * Sample 4 is approx. in the same quadrant as sample 0. (top-left) 50 * Sample 5 is approx. in the same quadrant as sample 1. (bottom-right) 51 * Sample 6 is approx. in the same quadrant as sample 2. (bottom-left) 52 * Sample 7 is approx. in the same quadrant as sample 3. (top-right) 53 * (sample I={4,5,6,7} adds more detail to the vicinity of sample I-4) 54 * 55 * The next 8 samples add more detail to the vicinity of the previous samples. 56 * (sample I (I >= 8) adds more detail to the vicinity of sample I-8) 57 * 58 * The ordering is specified such that: 59 * If we take the first 2 samples, we should get good 2x MSAA. 60 * If we add 2 more samples, we should get good 4x MSAA with the same sample locations. 61 * If we add 4 more samples, we should get good 8x MSAA with the same sample locations. 62 * If we add 8 more samples, we should get perfect 16x MSAA with the same sample locations. 63 * 64 * The ordering also allows finding samples in the same vicinity. 65 * 66 * Group N of 2 samples in the same vicinity in 16x MSAA: {N,N+8} 67 * Group N of 2 samples in the same vicinity in 8x MSAA: {N,N+4} 68 * Group N of 2 samples in the same vicinity in 4x MSAA: {N,N+2} 69 * 70 * Groups of 4 samples in the same vicinity in 16x MSAA: 71 * Top left: {0,4,8,12} 72 * Bottom right: {1,5,9,13} 73 * Bottom left: {2,6,10,14} 74 * Top right: {3,7,11,15} 75 * 76 * Groups of 4 samples in the same vicinity in 8x MSAA: 77 * Left half: {0,2,4,6} 78 * Right half: {1,3,5,7} 79 * 80 * Groups of 8 samples in the same vicinity in 16x MSAA: 81 * Left half: {0,2,4,6,8,10,12,14} 82 * Right half: {1,3,5,7,9,11,13,15} 83 */ 84 85/* 1x MSAA */ 86static const uint32_t sample_locs_1x = 87 FILL_SREG( 0, 0, 0, 0, 0, 0, 0, 0); /* S1, S2, S3 fields are not used by 1x */ 88static const uint64_t centroid_priority_1x = 0x0000000000000000ull; 89 90/* 2x MSAA (the positions are sorted for EQAA) */ 91static const uint32_t sample_locs_2x = 92 FILL_SREG(-4,-4, 4, 4, 0, 0, 0, 0); /* S2 & S3 fields are not used by 2x MSAA */ 93static const uint64_t centroid_priority_2x = 0x1010101010101010ull; 94 95/* 4x MSAA (the positions are sorted for EQAA) */ 96static const uint32_t sample_locs_4x = 97 FILL_SREG(-2,-6, 2, 6, -6, 2, 6,-2); 98static const uint64_t centroid_priority_4x = 0x3210321032103210ull; 99 100/* 8x MSAA (the positions are sorted for EQAA) */ 101static const uint32_t sample_locs_8x[] = { 102 FILL_SREG(-3,-5, 5, 1, -1, 3, 7,-7), 103 FILL_SREG(-7,-1, 3, 7, -5, 5, 1,-3), 104 /* The following are unused by hardware, but we emit them to IBs 105 * instead of multiple SET_CONTEXT_REG packets. */ 106 0, 107 0, 108}; 109static const uint64_t centroid_priority_8x = 0x3546012735460127ull; 110 111/* 16x MSAA (the positions are sorted for EQAA) */ 112static const uint32_t sample_locs_16x[] = { 113 FILL_SREG(-5,-2, 5, 3, -2, 6, 3,-5), 114 FILL_SREG(-4,-6, 1, 1, -6, 4, 7,-4), 115 FILL_SREG(-1,-3, 6, 7, -3, 2, 0,-7), 116 FILL_SREG(-7,-8, 2, 5, -8, 0, 4,-1), 117}; 118static const uint64_t centroid_priority_16x = 0xc97e64b231d0fa85ull; 119 120static void si_get_sample_position(struct pipe_context *ctx, unsigned sample_count, 121 unsigned sample_index, float *out_value) 122{ 123 const uint32_t *sample_locs; 124 125 switch (sample_count) { 126 case 1: 127 default: 128 sample_locs = &sample_locs_1x; 129 break; 130 case 2: 131 sample_locs = &sample_locs_2x; 132 break; 133 case 4: 134 sample_locs = &sample_locs_4x; 135 break; 136 case 8: 137 sample_locs = sample_locs_8x; 138 break; 139 case 16: 140 sample_locs = sample_locs_16x; 141 break; 142 } 143 144 out_value[0] = (GET_SX(sample_locs, sample_index) + 8) / 16.0f; 145 out_value[1] = (GET_SY(sample_locs, sample_index) + 8) / 16.0f; 146} 147 148static void si_emit_max_4_sample_locs(struct radeon_cmdbuf *cs, 149 uint64_t centroid_priority, 150 uint32_t sample_locs) 151{ 152 radeon_set_context_reg_seq(cs, R_028BD4_PA_SC_CENTROID_PRIORITY_0, 2); 153 radeon_emit(cs, centroid_priority); 154 radeon_emit(cs, centroid_priority >> 32); 155 radeon_set_context_reg(cs, R_028BF8_PA_SC_AA_SAMPLE_LOCS_PIXEL_X0Y0_0, sample_locs); 156 radeon_set_context_reg(cs, R_028C08_PA_SC_AA_SAMPLE_LOCS_PIXEL_X1Y0_0, sample_locs); 157 radeon_set_context_reg(cs, R_028C18_PA_SC_AA_SAMPLE_LOCS_PIXEL_X0Y1_0, sample_locs); 158 radeon_set_context_reg(cs, R_028C28_PA_SC_AA_SAMPLE_LOCS_PIXEL_X1Y1_0, sample_locs); 159} 160 161static void si_emit_max_16_sample_locs(struct radeon_cmdbuf *cs, 162 uint64_t centroid_priority, 163 const uint32_t *sample_locs, 164 unsigned num_samples) 165{ 166 radeon_set_context_reg_seq(cs, R_028BD4_PA_SC_CENTROID_PRIORITY_0, 2); 167 radeon_emit(cs, centroid_priority); 168 radeon_emit(cs, centroid_priority >> 32); 169 radeon_set_context_reg_seq(cs, R_028BF8_PA_SC_AA_SAMPLE_LOCS_PIXEL_X0Y0_0, 170 num_samples == 8 ? 14 : 16); 171 radeon_emit_array(cs, sample_locs, 4); 172 radeon_emit_array(cs, sample_locs, 4); 173 radeon_emit_array(cs, sample_locs, 4); 174 radeon_emit_array(cs, sample_locs, num_samples == 8 ? 2 : 4); 175} 176 177void si_emit_sample_locations(struct radeon_cmdbuf *cs, int nr_samples) 178{ 179 switch (nr_samples) { 180 default: 181 case 1: 182 si_emit_max_4_sample_locs(cs, centroid_priority_1x, sample_locs_1x); 183 break; 184 case 2: 185 si_emit_max_4_sample_locs(cs, centroid_priority_2x, sample_locs_2x); 186 break; 187 case 4: 188 si_emit_max_4_sample_locs(cs, centroid_priority_4x, sample_locs_4x); 189 break; 190 case 8: 191 si_emit_max_16_sample_locs(cs, centroid_priority_8x, sample_locs_8x, 8); 192 break; 193 case 16: 194 si_emit_max_16_sample_locs(cs, centroid_priority_16x, sample_locs_16x, 16); 195 break; 196 } 197} 198 199void si_init_msaa_functions(struct si_context *sctx) 200{ 201 int i; 202 203 sctx->b.get_sample_position = si_get_sample_position; 204 205 si_get_sample_position(&sctx->b, 1, 0, sctx->sample_positions.x1[0]); 206 207 for (i = 0; i < 2; i++) 208 si_get_sample_position(&sctx->b, 2, i, sctx->sample_positions.x2[i]); 209 for (i = 0; i < 4; i++) 210 si_get_sample_position(&sctx->b, 4, i, sctx->sample_positions.x4[i]); 211 for (i = 0; i < 8; i++) 212 si_get_sample_position(&sctx->b, 8, i, sctx->sample_positions.x8[i]); 213 for (i = 0; i < 16; i++) 214 si_get_sample_position(&sctx->b, 16, i, sctx->sample_positions.x16[i]); 215} 216