radv_shader.h revision ed98bd31
1/* 2 * Copyright © 2016 Red Hat. 3 * Copyright © 2016 Bas Nieuwenhuizen 4 * 5 * based in part on anv driver which is: 6 * Copyright © 2015 Intel Corporation 7 * 8 * Permission is hereby granted, free of charge, to any person obtaining a 9 * copy of this software and associated documentation files (the "Software"), 10 * to deal in the Software without restriction, including without limitation 11 * the rights to use, copy, modify, merge, publish, distribute, sublicense, 12 * and/or sell copies of the Software, and to permit persons to whom the 13 * Software is furnished to do so, subject to the following conditions: 14 * 15 * The above copyright notice and this permission notice (including the next 16 * paragraph) shall be included in all copies or substantial portions of the 17 * Software. 18 * 19 * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR 20 * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, 21 * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL 22 * THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER 23 * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING 24 * FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS 25 * IN THE SOFTWARE. 26 */ 27 28#ifndef RADV_SHADER_H 29#define RADV_SHADER_H 30 31#include "radv_debug.h" 32#include "radv_private.h" 33 34#include "nir/nir.h" 35 36/* descriptor index into scratch ring offsets */ 37#define RING_SCRATCH 0 38#define RING_ESGS_VS 1 39#define RING_ESGS_GS 2 40#define RING_GSVS_VS 3 41#define RING_GSVS_GS 4 42#define RING_HS_TESS_FACTOR 5 43#define RING_HS_TESS_OFFCHIP 6 44#define RING_PS_SAMPLE_POSITIONS 7 45 46// Match MAX_SETS from radv_descriptor_set.h 47#define RADV_UD_MAX_SETS MAX_SETS 48 49#define RADV_NUM_PHYSICAL_VGPRS 256 50 51struct radv_shader_module { 52 struct nir_shader *nir; 53 unsigned char sha1[20]; 54 uint32_t size; 55 char data[0]; 56}; 57 58enum { 59 RADV_ALPHA_ADJUST_NONE = 0, 60 RADV_ALPHA_ADJUST_SNORM = 1, 61 RADV_ALPHA_ADJUST_SINT = 2, 62 RADV_ALPHA_ADJUST_SSCALED = 3, 63}; 64 65struct radv_vs_variant_key { 66 uint32_t instance_rate_inputs; 67 uint32_t instance_rate_divisors[MAX_VERTEX_ATTRIBS]; 68 uint8_t vertex_attribute_formats[MAX_VERTEX_ATTRIBS]; 69 uint32_t vertex_attribute_bindings[MAX_VERTEX_ATTRIBS]; 70 uint32_t vertex_attribute_offsets[MAX_VERTEX_ATTRIBS]; 71 uint32_t vertex_attribute_strides[MAX_VERTEX_ATTRIBS]; 72 73 /* For 2_10_10_10 formats the alpha is handled as unsigned by pre-vega HW. 74 * so we may need to fix it up. */ 75 uint64_t alpha_adjust; 76 77 /* For some formats the channels have to be shuffled. */ 78 uint32_t post_shuffle; 79 80 uint32_t as_es:1; 81 uint32_t as_ls:1; 82 uint32_t export_prim_id:1; 83 uint32_t export_layer_id:1; 84}; 85 86struct radv_tes_variant_key { 87 uint32_t as_es:1; 88 uint32_t export_prim_id:1; 89 uint32_t export_layer_id:1; 90 uint8_t num_patches; 91 uint8_t tcs_num_outputs; 92}; 93 94struct radv_tcs_variant_key { 95 struct radv_vs_variant_key vs_key; 96 unsigned primitive_mode; 97 unsigned input_vertices; 98 unsigned num_inputs; 99 uint32_t tes_reads_tess_factors:1; 100}; 101 102struct radv_fs_variant_key { 103 uint32_t col_format; 104 uint8_t log2_ps_iter_samples; 105 uint8_t num_samples; 106 uint32_t is_int8; 107 uint32_t is_int10; 108}; 109 110struct radv_shader_variant_key { 111 union { 112 struct radv_vs_variant_key vs; 113 struct radv_fs_variant_key fs; 114 struct radv_tes_variant_key tes; 115 struct radv_tcs_variant_key tcs; 116 }; 117 bool has_multiview_view_index; 118}; 119 120struct radv_nir_compiler_options { 121 struct radv_pipeline_layout *layout; 122 struct radv_shader_variant_key key; 123 bool unsafe_math; 124 bool supports_spill; 125 bool clamp_shadow_reference; 126 bool dump_shader; 127 bool dump_preoptir; 128 bool record_llvm_ir; 129 bool check_ir; 130 enum radeon_family family; 131 enum chip_class chip_class; 132 uint32_t tess_offchip_block_dw_size; 133 uint32_t address32_hi; 134}; 135 136enum radv_ud_index { 137 AC_UD_SCRATCH_RING_OFFSETS = 0, 138 AC_UD_PUSH_CONSTANTS = 1, 139 AC_UD_INLINE_PUSH_CONSTANTS = 2, 140 AC_UD_INDIRECT_DESCRIPTOR_SETS = 3, 141 AC_UD_VIEW_INDEX = 4, 142 AC_UD_STREAMOUT_BUFFERS = 5, 143 AC_UD_SHADER_START = 6, 144 AC_UD_VS_VERTEX_BUFFERS = AC_UD_SHADER_START, 145 AC_UD_VS_BASE_VERTEX_START_INSTANCE, 146 AC_UD_VS_MAX_UD, 147 AC_UD_PS_MAX_UD, 148 AC_UD_CS_GRID_SIZE = AC_UD_SHADER_START, 149 AC_UD_CS_MAX_UD, 150 AC_UD_GS_MAX_UD, 151 AC_UD_TCS_MAX_UD, 152 AC_UD_TES_MAX_UD, 153 AC_UD_MAX_UD = AC_UD_TCS_MAX_UD, 154}; 155 156struct radv_stream_output { 157 uint8_t location; 158 uint8_t buffer; 159 uint16_t offset; 160 uint8_t component_mask; 161 uint8_t stream; 162}; 163 164struct radv_streamout_info { 165 uint16_t num_outputs; 166 struct radv_stream_output outputs[MAX_SO_OUTPUTS]; 167 uint16_t strides[MAX_SO_BUFFERS]; 168 uint32_t enabled_stream_buffers_mask; 169}; 170 171struct radv_shader_info { 172 bool loads_push_constants; 173 bool loads_dynamic_offsets; 174 uint8_t min_push_constant_used; 175 uint8_t max_push_constant_used; 176 bool has_only_32bit_push_constants; 177 bool has_indirect_push_constants; 178 uint8_t num_inline_push_consts; 179 uint8_t base_inline_push_consts; 180 uint32_t desc_set_used_mask; 181 bool needs_multiview_view_index; 182 bool uses_invocation_id; 183 bool uses_prim_id; 184 struct { 185 uint64_t ls_outputs_written; 186 uint8_t input_usage_mask[VERT_ATTRIB_MAX]; 187 uint8_t output_usage_mask[VARYING_SLOT_VAR31 + 1]; 188 bool has_vertex_buffers; /* needs vertex buffers and base/start */ 189 bool needs_draw_id; 190 bool needs_instance_id; 191 } vs; 192 struct { 193 uint8_t output_usage_mask[VARYING_SLOT_VAR31 + 1]; 194 uint8_t num_stream_output_components[4]; 195 uint8_t output_streams[VARYING_SLOT_VAR31 + 1]; 196 uint8_t max_stream; 197 } gs; 198 struct { 199 uint8_t output_usage_mask[VARYING_SLOT_VAR31 + 1]; 200 } tes; 201 struct { 202 bool force_persample; 203 bool needs_sample_positions; 204 bool uses_input_attachments; 205 bool writes_memory; 206 bool writes_z; 207 bool writes_stencil; 208 bool writes_sample_mask; 209 bool has_pcoord; 210 bool prim_id_input; 211 bool layer_input; 212 uint8_t num_input_clips_culls; 213 } ps; 214 struct { 215 bool uses_grid_size; 216 bool uses_block_id[3]; 217 bool uses_thread_id[3]; 218 bool uses_local_invocation_idx; 219 } cs; 220 struct { 221 uint64_t outputs_written; 222 uint64_t patch_outputs_written; 223 } tcs; 224 225 struct radv_streamout_info so; 226}; 227 228struct radv_userdata_info { 229 int8_t sgpr_idx; 230 uint8_t num_sgprs; 231}; 232 233struct radv_userdata_locations { 234 struct radv_userdata_info descriptor_sets[RADV_UD_MAX_SETS]; 235 struct radv_userdata_info shader_data[AC_UD_MAX_UD]; 236 uint32_t descriptor_sets_enabled; 237}; 238 239struct radv_vs_output_info { 240 uint8_t vs_output_param_offset[VARYING_SLOT_MAX]; 241 uint8_t clip_dist_mask; 242 uint8_t cull_dist_mask; 243 uint8_t param_exports; 244 bool writes_pointsize; 245 bool writes_layer; 246 bool writes_viewport_index; 247 bool export_prim_id; 248 unsigned pos_exports; 249}; 250 251struct radv_es_output_info { 252 uint32_t esgs_itemsize; 253}; 254 255struct radv_shader_variant_info { 256 struct radv_userdata_locations user_sgprs_locs; 257 struct radv_shader_info info; 258 unsigned num_user_sgprs; 259 unsigned num_input_sgprs; 260 unsigned num_input_vgprs; 261 unsigned private_mem_vgprs; 262 bool need_indirect_descriptor_sets; 263 struct { 264 struct { 265 struct radv_vs_output_info outinfo; 266 struct radv_es_output_info es_info; 267 unsigned vgpr_comp_cnt; 268 bool as_es; 269 bool as_ls; 270 } vs; 271 struct { 272 unsigned num_interp; 273 uint32_t input_mask; 274 uint32_t flat_shaded_mask; 275 uint32_t float16_shaded_mask; 276 bool can_discard; 277 bool early_fragment_test; 278 } fs; 279 struct { 280 unsigned block_size[3]; 281 } cs; 282 struct { 283 unsigned vertices_in; 284 unsigned vertices_out; 285 unsigned output_prim; 286 unsigned invocations; 287 unsigned gsvs_vertex_size; 288 unsigned max_gsvs_emit_size; 289 unsigned es_type; /* GFX9: VS or TES */ 290 } gs; 291 struct { 292 unsigned tcs_vertices_out; 293 uint32_t num_patches; 294 uint32_t lds_size; 295 } tcs; 296 struct { 297 struct radv_vs_output_info outinfo; 298 struct radv_es_output_info es_info; 299 bool as_es; 300 unsigned primitive_mode; 301 enum gl_tess_spacing spacing; 302 bool ccw; 303 bool point_mode; 304 } tes; 305 }; 306}; 307 308struct radv_shader_variant { 309 uint32_t ref_count; 310 311 struct radeon_winsys_bo *bo; 312 uint64_t bo_offset; 313 struct ac_shader_config config; 314 uint32_t code_size; 315 struct radv_shader_variant_info info; 316 unsigned rsrc1; 317 unsigned rsrc2; 318 319 /* debug only */ 320 uint32_t *spirv; 321 uint32_t spirv_size; 322 struct nir_shader *nir; 323 char *disasm_string; 324 char *llvm_ir_string; 325 326 struct list_head slab_list; 327}; 328 329struct radv_shader_slab { 330 struct list_head slabs; 331 struct list_head shaders; 332 struct radeon_winsys_bo *bo; 333 uint64_t size; 334 char *ptr; 335}; 336 337void 338radv_optimize_nir(struct nir_shader *shader, bool optimize_conservatively, 339 bool allow_copies); 340bool 341radv_nir_lower_ycbcr_textures(nir_shader *shader, 342 const struct radv_pipeline_layout *layout); 343 344nir_shader * 345radv_shader_compile_to_nir(struct radv_device *device, 346 struct radv_shader_module *module, 347 const char *entrypoint_name, 348 gl_shader_stage stage, 349 const VkSpecializationInfo *spec_info, 350 const VkPipelineCreateFlags flags, 351 const struct radv_pipeline_layout *layout); 352 353void * 354radv_alloc_shader_memory(struct radv_device *device, 355 struct radv_shader_variant *shader); 356 357void 358radv_destroy_shader_slabs(struct radv_device *device); 359 360struct radv_shader_variant * 361radv_shader_variant_create(struct radv_device *device, 362 struct radv_shader_module *module, 363 struct nir_shader *const *shaders, 364 int shader_count, 365 struct radv_pipeline_layout *layout, 366 const struct radv_shader_variant_key *key, 367 void **code_out, 368 unsigned *code_size_out); 369 370struct radv_shader_variant * 371radv_create_gs_copy_shader(struct radv_device *device, struct nir_shader *nir, 372 void **code_out, unsigned *code_size_out, 373 bool multiview); 374 375void 376radv_shader_variant_destroy(struct radv_device *device, 377 struct radv_shader_variant *variant); 378 379const char * 380radv_get_shader_name(struct radv_shader_variant *var, gl_shader_stage stage); 381 382void 383radv_shader_dump_stats(struct radv_device *device, 384 struct radv_shader_variant *variant, 385 gl_shader_stage stage, 386 FILE *file); 387 388static inline bool 389radv_can_dump_shader(struct radv_device *device, 390 struct radv_shader_module *module, 391 bool is_gs_copy_shader) 392{ 393 if (!(device->instance->debug_flags & RADV_DEBUG_DUMP_SHADERS)) 394 return false; 395 396 /* Only dump non-meta shaders, useful for debugging purposes. */ 397 return (module && !module->nir) || is_gs_copy_shader; 398} 399 400static inline bool 401radv_can_dump_shader_stats(struct radv_device *device, 402 struct radv_shader_module *module) 403{ 404 /* Only dump non-meta shader stats. */ 405 return device->instance->debug_flags & RADV_DEBUG_DUMP_SHADER_STATS && 406 module && !module->nir; 407} 408 409static inline unsigned shader_io_get_unique_index(gl_varying_slot slot) 410{ 411 /* handle patch indices separate */ 412 if (slot == VARYING_SLOT_TESS_LEVEL_OUTER) 413 return 0; 414 if (slot == VARYING_SLOT_TESS_LEVEL_INNER) 415 return 1; 416 if (slot >= VARYING_SLOT_PATCH0 && slot <= VARYING_SLOT_TESS_MAX) 417 return 2 + (slot - VARYING_SLOT_PATCH0); 418 if (slot == VARYING_SLOT_POS) 419 return 0; 420 if (slot == VARYING_SLOT_PSIZ) 421 return 1; 422 if (slot == VARYING_SLOT_CLIP_DIST0) 423 return 2; 424 if (slot == VARYING_SLOT_CLIP_DIST1) 425 return 3; 426 /* 3 is reserved for clip dist as well */ 427 if (slot >= VARYING_SLOT_VAR0 && slot <= VARYING_SLOT_VAR31) 428 return 4 + (slot - VARYING_SLOT_VAR0); 429 unreachable("illegal slot in get unique index\n"); 430} 431 432#endif 433