pan_shader.c revision 7ec681f3
1/* 2 * Copyright (C) 2018 Alyssa Rosenzweig 3 * Copyright (C) 2019-2021 Collabora, Ltd. 4 * 5 * Permission is hereby granted, free of charge, to any person obtaining a 6 * copy of this software and associated documentation files (the "Software"), 7 * to deal in the Software without restriction, including without limitation 8 * the rights to use, copy, modify, merge, publish, distribute, sublicense, 9 * and/or sell copies of the Software, and to permit persons to whom the 10 * Software is furnished to do so, subject to the following conditions: 11 * 12 * The above copyright notice and this permission notice (including the next 13 * paragraph) shall be included in all copies or substantial portions of the 14 * Software. 15 * 16 * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR 17 * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, 18 * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL 19 * THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER 20 * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, 21 * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE 22 * SOFTWARE. 23 */ 24 25#include "pan_device.h" 26#include "pan_shader.h" 27#include "pan_format.h" 28 29#if PAN_ARCH <= 5 30#include "panfrost/midgard/midgard_compile.h" 31#else 32#include "panfrost/bifrost/bifrost_compile.h" 33#endif 34 35const nir_shader_compiler_options * 36GENX(pan_shader_get_compiler_options)(void) 37{ 38#if PAN_ARCH >= 6 39 return &bifrost_nir_options; 40#else 41 return &midgard_nir_options; 42#endif 43} 44 45static enum pipe_format 46varying_format(nir_alu_type t, unsigned ncomps) 47{ 48#define VARYING_FORMAT(ntype, nsz, ptype, psz) \ 49 { \ 50 .type = nir_type_ ## ntype ## nsz, \ 51 .formats = { \ 52 PIPE_FORMAT_R ## psz ## _ ## ptype, \ 53 PIPE_FORMAT_R ## psz ## G ## psz ## _ ## ptype, \ 54 PIPE_FORMAT_R ## psz ## G ## psz ## B ## psz ## _ ## ptype, \ 55 PIPE_FORMAT_R ## psz ## G ## psz ## B ## psz ## A ## psz ## _ ## ptype, \ 56 } \ 57 } 58 59 static const struct { 60 nir_alu_type type; 61 enum pipe_format formats[4]; 62 } conv[] = { 63 VARYING_FORMAT(float, 32, FLOAT, 32), 64 VARYING_FORMAT(int, 32, SINT, 32), 65 VARYING_FORMAT(uint, 32, UINT, 32), 66 VARYING_FORMAT(float, 16, FLOAT, 16), 67 VARYING_FORMAT(int, 16, SINT, 16), 68 VARYING_FORMAT(uint, 16, UINT, 16), 69 VARYING_FORMAT(int, 8, SINT, 8), 70 VARYING_FORMAT(uint, 8, UINT, 8), 71 VARYING_FORMAT(bool, 32, UINT, 32), 72 VARYING_FORMAT(bool, 16, UINT, 16), 73 VARYING_FORMAT(bool, 8, UINT, 8), 74 VARYING_FORMAT(bool, 1, UINT, 8), 75 }; 76#undef VARYING_FORMAT 77 78 assert(ncomps > 0 && ncomps <= ARRAY_SIZE(conv[0].formats)); 79 80 for (unsigned i = 0; i < ARRAY_SIZE(conv); i++) { 81 if (conv[i].type == t) 82 return conv[i].formats[ncomps - 1]; 83 } 84 85 return PIPE_FORMAT_NONE; 86} 87 88static void 89collect_varyings(nir_shader *s, nir_variable_mode varying_mode, 90 struct pan_shader_varying *varyings, 91 unsigned *varying_count) 92{ 93 *varying_count = 0; 94 95 unsigned comps[PAN_MAX_VARYINGS] = { 0 }; 96 97 nir_foreach_variable_with_modes(var, s, varying_mode) { 98 unsigned loc = var->data.driver_location; 99 const struct glsl_type *column = 100 glsl_without_array_or_matrix(var->type); 101 unsigned chan = glsl_get_components(column); 102 103 /* If we have a fractional location added, we need to increase the size 104 * so it will fit, i.e. a vec3 in YZW requires us to allocate a vec4. 105 * We could do better but this is an edge case as it is, normally 106 * packed varyings will be aligned. 107 */ 108 chan += var->data.location_frac; 109 comps[loc] = MAX2(comps[loc], chan); 110 } 111 112 nir_foreach_variable_with_modes(var, s, varying_mode) { 113 unsigned loc = var->data.driver_location; 114 unsigned sz = glsl_count_attribute_slots(var->type, FALSE); 115 const struct glsl_type *column = 116 glsl_without_array_or_matrix(var->type); 117 enum glsl_base_type base_type = glsl_get_base_type(column); 118 unsigned chan = comps[loc]; 119 120 nir_alu_type type = nir_get_nir_type_for_glsl_base_type(base_type); 121 type = nir_alu_type_get_base_type(type); 122 123 /* Can't do type conversion since GLSL IR packs in funny ways */ 124 if (PAN_ARCH >= 6 && var->data.interpolation == INTERP_MODE_FLAT) 125 type = nir_type_uint; 126 127 /* Demote to fp16 where possible. int16 varyings are TODO as the hw 128 * will saturate instead of wrap which is not conformant, so we need to 129 * insert i2i16/u2u16 instructions before the st_vary_32i/32u to get 130 * the intended behaviour. 131 */ 132 if (type == nir_type_float && 133 (var->data.precision == GLSL_PRECISION_MEDIUM || 134 var->data.precision == GLSL_PRECISION_LOW) && 135 !s->info.has_transform_feedback_varyings) { 136 type |= 16; 137 } else { 138 type |= 32; 139 } 140 141 enum pipe_format format = varying_format(type, chan); 142 assert(format != PIPE_FORMAT_NONE); 143 144 for (int c = 0; c < sz; ++c) { 145 assert(loc + c < PAN_MAX_VARYINGS); 146 varyings[loc + c].location = var->data.location + c; 147 varyings[loc + c].format = format; 148 } 149 150 *varying_count = MAX2(*varying_count, loc + sz); 151 } 152} 153 154#if PAN_ARCH >= 6 155static enum mali_register_file_format 156bifrost_blend_type_from_nir(nir_alu_type nir_type) 157{ 158 switch(nir_type) { 159 case 0: /* Render target not in use */ 160 return 0; 161 case nir_type_float16: 162 return MALI_REGISTER_FILE_FORMAT_F16; 163 case nir_type_float32: 164 return MALI_REGISTER_FILE_FORMAT_F32; 165 case nir_type_int32: 166 return MALI_REGISTER_FILE_FORMAT_I32; 167 case nir_type_uint32: 168 return MALI_REGISTER_FILE_FORMAT_U32; 169 case nir_type_int16: 170 return MALI_REGISTER_FILE_FORMAT_I16; 171 case nir_type_uint16: 172 return MALI_REGISTER_FILE_FORMAT_U16; 173 default: 174 unreachable("Unsupported blend shader type for NIR alu type"); 175 return 0; 176 } 177} 178#endif 179 180void 181GENX(pan_shader_compile)(nir_shader *s, 182 struct panfrost_compile_inputs *inputs, 183 struct util_dynarray *binary, 184 struct pan_shader_info *info) 185{ 186 memset(info, 0, sizeof(*info)); 187 188#if PAN_ARCH >= 6 189 bifrost_compile_shader_nir(s, inputs, binary, info); 190#else 191 for (unsigned i = 0; i < ARRAY_SIZE(inputs->rt_formats); i++) { 192 enum pipe_format fmt = inputs->rt_formats[i]; 193 unsigned wb_fmt = panfrost_blendable_formats_v6[fmt].writeback; 194 195 if (wb_fmt < MALI_COLOR_FORMAT_R8) 196 inputs->raw_fmt_mask |= BITFIELD_BIT(i); 197 } 198 199 midgard_compile_shader_nir(s, inputs, binary, info); 200#endif 201 202 info->stage = s->info.stage; 203 info->contains_barrier = s->info.uses_memory_barrier || 204 s->info.uses_control_barrier; 205 info->separable = s->info.separate_shader; 206 207 switch (info->stage) { 208 case MESA_SHADER_VERTEX: 209 info->attribute_count = util_bitcount64(s->info.inputs_read); 210 211#if PAN_ARCH <= 5 212 bool vertex_id = BITSET_TEST(s->info.system_values_read, 213 SYSTEM_VALUE_VERTEX_ID_ZERO_BASE); 214 if (vertex_id) 215 info->attribute_count = MAX2(info->attribute_count, PAN_VERTEX_ID + 1); 216 217 bool instance_id = BITSET_TEST(s->info.system_values_read, 218 SYSTEM_VALUE_INSTANCE_ID); 219 if (instance_id) 220 info->attribute_count = MAX2(info->attribute_count, PAN_INSTANCE_ID + 1); 221#endif 222 223 info->vs.writes_point_size = 224 s->info.outputs_written & (1 << VARYING_SLOT_PSIZ); 225 collect_varyings(s, nir_var_shader_out, info->varyings.output, 226 &info->varyings.output_count); 227 break; 228 case MESA_SHADER_FRAGMENT: 229 if (s->info.outputs_written & BITFIELD64_BIT(FRAG_RESULT_DEPTH)) 230 info->fs.writes_depth = true; 231 if (s->info.outputs_written & BITFIELD64_BIT(FRAG_RESULT_STENCIL)) 232 info->fs.writes_stencil = true; 233 if (s->info.outputs_written & BITFIELD64_BIT(FRAG_RESULT_SAMPLE_MASK)) 234 info->fs.writes_coverage = true; 235 236 info->fs.outputs_read = s->info.outputs_read >> FRAG_RESULT_DATA0; 237 info->fs.outputs_written = s->info.outputs_written >> FRAG_RESULT_DATA0; 238 239 /* EXT_shader_framebuffer_fetch requires per-sample */ 240 info->fs.sample_shading = s->info.fs.uses_sample_shading || 241 info->fs.outputs_read; 242 243 info->fs.can_discard = s->info.fs.uses_discard; 244 info->fs.helper_invocations = s->info.fs.needs_quad_helper_invocations; 245 info->fs.early_fragment_tests = s->info.fs.early_fragment_tests; 246 247 /* List of reasons we need to execute frag shaders when things 248 * are masked off */ 249 250 info->fs.sidefx = s->info.writes_memory || 251 s->info.fs.uses_discard || 252 s->info.fs.uses_demote; 253 254 /* With suitable ZSA/blend, is early-z possible? */ 255 info->fs.can_early_z = 256 !info->fs.sidefx && 257 !info->fs.writes_depth && 258 !info->fs.writes_stencil && 259 !info->fs.writes_coverage; 260 261 /* Similiarly with suitable state, is FPK possible? */ 262 info->fs.can_fpk = 263 !info->fs.writes_depth && 264 !info->fs.writes_stencil && 265 !info->fs.writes_coverage && 266 !info->fs.can_discard && 267 !info->fs.outputs_read; 268 269 info->fs.reads_frag_coord = 270 (s->info.inputs_read & (1 << VARYING_SLOT_POS)) || 271 BITSET_TEST(s->info.system_values_read, SYSTEM_VALUE_FRAG_COORD); 272 info->fs.reads_point_coord = 273 s->info.inputs_read & (1 << VARYING_SLOT_PNTC); 274 info->fs.reads_face = 275 (s->info.inputs_read & (1 << VARYING_SLOT_FACE)) || 276 BITSET_TEST(s->info.system_values_read, SYSTEM_VALUE_FRONT_FACE); 277 info->fs.reads_sample_id = 278 BITSET_TEST(s->info.system_values_read, SYSTEM_VALUE_SAMPLE_ID); 279 info->fs.reads_sample_pos = 280 BITSET_TEST(s->info.system_values_read, SYSTEM_VALUE_SAMPLE_POS); 281 info->fs.reads_sample_mask_in = 282 BITSET_TEST(s->info.system_values_read, SYSTEM_VALUE_SAMPLE_MASK_IN); 283 info->fs.reads_helper_invocation = 284 BITSET_TEST(s->info.system_values_read, SYSTEM_VALUE_HELPER_INVOCATION); 285 collect_varyings(s, nir_var_shader_in, info->varyings.input, 286 &info->varyings.input_count); 287 break; 288 case MESA_SHADER_COMPUTE: 289 info->wls_size = s->info.shared_size; 290 break; 291 default: 292 unreachable("Unknown shader state"); 293 } 294 295 info->outputs_written = s->info.outputs_written; 296 297 /* Sysvals have dedicated UBO */ 298 if (info->sysvals.sysval_count) 299 info->ubo_count = MAX2(s->info.num_ubos + 1, inputs->sysval_ubo + 1); 300 else 301 info->ubo_count = s->info.num_ubos; 302 303 info->attribute_count += util_last_bit(s->info.images_used); 304 info->writes_global = s->info.writes_memory; 305 306 info->sampler_count = info->texture_count = BITSET_LAST_BIT(s->info.textures_used); 307 308#if PAN_ARCH >= 6 309 /* This is "redundant" information, but is needed in a draw-time hot path */ 310 for (unsigned i = 0; i < ARRAY_SIZE(info->bifrost.blend); ++i) { 311 info->bifrost.blend[i].format = 312 bifrost_blend_type_from_nir(info->bifrost.blend[i].type); 313 } 314#endif 315} 316