radv_shader.h revision 993e1d59
1/* 2 * Copyright © 2016 Red Hat. 3 * Copyright © 2016 Bas Nieuwenhuizen 4 * 5 * based in part on anv driver which is: 6 * Copyright © 2015 Intel Corporation 7 * 8 * Permission is hereby granted, free of charge, to any person obtaining a 9 * copy of this software and associated documentation files (the "Software"), 10 * to deal in the Software without restriction, including without limitation 11 * the rights to use, copy, modify, merge, publish, distribute, sublicense, 12 * and/or sell copies of the Software, and to permit persons to whom the 13 * Software is furnished to do so, subject to the following conditions: 14 * 15 * The above copyright notice and this permission notice (including the next 16 * paragraph) shall be included in all copies or substantial portions of the 17 * Software. 18 * 19 * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR 20 * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, 21 * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL 22 * THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER 23 * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING 24 * FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS 25 * IN THE SOFTWARE. 26 */ 27 28#ifndef RADV_SHADER_H 29#define RADV_SHADER_H 30 31#include "radv_debug.h" 32#include "radv_private.h" 33 34#include "nir/nir.h" 35 36/* descriptor index into scratch ring offsets */ 37#define RING_SCRATCH 0 38#define RING_ESGS_VS 1 39#define RING_ESGS_GS 2 40#define RING_GSVS_VS 3 41#define RING_GSVS_GS 4 42#define RING_HS_TESS_FACTOR 5 43#define RING_HS_TESS_OFFCHIP 6 44#define RING_PS_SAMPLE_POSITIONS 7 45 46// Match MAX_SETS from radv_descriptor_set.h 47#define RADV_UD_MAX_SETS MAX_SETS 48 49#define RADV_NUM_PHYSICAL_VGPRS 256 50 51struct radv_shader_module { 52 struct nir_shader *nir; 53 unsigned char sha1[20]; 54 uint32_t size; 55 char data[0]; 56}; 57 58enum { 59 RADV_ALPHA_ADJUST_NONE = 0, 60 RADV_ALPHA_ADJUST_SNORM = 1, 61 RADV_ALPHA_ADJUST_SINT = 2, 62 RADV_ALPHA_ADJUST_SSCALED = 3, 63}; 64 65struct radv_vs_variant_key { 66 uint32_t instance_rate_inputs; 67 uint32_t instance_rate_divisors[MAX_VERTEX_ATTRIBS]; 68 69 /* For 2_10_10_10 formats the alpha is handled as unsigned by pre-vega HW. 70 * so we may need to fix it up. */ 71 uint64_t alpha_adjust; 72 73 uint32_t as_es:1; 74 uint32_t as_ls:1; 75 uint32_t export_prim_id:1; 76 uint32_t export_layer_id:1; 77}; 78 79struct radv_tes_variant_key { 80 uint32_t as_es:1; 81 uint32_t export_prim_id:1; 82 uint32_t export_layer_id:1; 83 uint8_t num_patches; 84 uint8_t tcs_num_outputs; 85}; 86 87struct radv_tcs_variant_key { 88 struct radv_vs_variant_key vs_key; 89 unsigned primitive_mode; 90 unsigned input_vertices; 91 unsigned num_inputs; 92 uint32_t tes_reads_tess_factors:1; 93}; 94 95struct radv_fs_variant_key { 96 uint32_t col_format; 97 uint8_t log2_ps_iter_samples; 98 uint8_t num_samples; 99 uint32_t is_int8; 100 uint32_t is_int10; 101}; 102 103struct radv_shader_variant_key { 104 union { 105 struct radv_vs_variant_key vs; 106 struct radv_fs_variant_key fs; 107 struct radv_tes_variant_key tes; 108 struct radv_tcs_variant_key tcs; 109 }; 110 bool has_multiview_view_index; 111}; 112 113struct radv_nir_compiler_options { 114 struct radv_pipeline_layout *layout; 115 struct radv_shader_variant_key key; 116 bool unsafe_math; 117 bool supports_spill; 118 bool clamp_shadow_reference; 119 bool dump_shader; 120 bool dump_preoptir; 121 bool record_llvm_ir; 122 bool check_ir; 123 enum radeon_family family; 124 enum chip_class chip_class; 125 uint32_t tess_offchip_block_dw_size; 126 uint32_t address32_hi; 127}; 128 129enum radv_ud_index { 130 AC_UD_SCRATCH_RING_OFFSETS = 0, 131 AC_UD_PUSH_CONSTANTS = 1, 132 AC_UD_INDIRECT_DESCRIPTOR_SETS = 2, 133 AC_UD_VIEW_INDEX = 3, 134 AC_UD_STREAMOUT_BUFFERS = 4, 135 AC_UD_SHADER_START = 5, 136 AC_UD_VS_VERTEX_BUFFERS = AC_UD_SHADER_START, 137 AC_UD_VS_BASE_VERTEX_START_INSTANCE, 138 AC_UD_VS_MAX_UD, 139 AC_UD_PS_MAX_UD, 140 AC_UD_CS_GRID_SIZE = AC_UD_SHADER_START, 141 AC_UD_CS_MAX_UD, 142 AC_UD_GS_MAX_UD, 143 AC_UD_TCS_MAX_UD, 144 AC_UD_TES_MAX_UD, 145 AC_UD_MAX_UD = AC_UD_TCS_MAX_UD, 146}; 147 148struct radv_stream_output { 149 uint8_t location; 150 uint8_t buffer; 151 uint16_t offset; 152 uint8_t component_mask; 153 uint8_t stream; 154}; 155 156struct radv_streamout_info { 157 uint16_t num_outputs; 158 struct radv_stream_output outputs[MAX_SO_OUTPUTS]; 159 uint16_t strides[MAX_SO_BUFFERS]; 160 uint32_t enabled_stream_buffers_mask; 161}; 162 163struct radv_shader_info { 164 bool loads_push_constants; 165 uint32_t desc_set_used_mask; 166 bool needs_multiview_view_index; 167 bool uses_invocation_id; 168 bool uses_prim_id; 169 struct { 170 uint64_t ls_outputs_written; 171 uint8_t input_usage_mask[VERT_ATTRIB_MAX]; 172 uint8_t output_usage_mask[VARYING_SLOT_VAR31 + 1]; 173 bool has_vertex_buffers; /* needs vertex buffers and base/start */ 174 bool needs_draw_id; 175 bool needs_instance_id; 176 } vs; 177 struct { 178 uint8_t output_usage_mask[VARYING_SLOT_VAR31 + 1]; 179 uint8_t num_stream_output_components[4]; 180 uint8_t output_streams[VARYING_SLOT_VAR31 + 1]; 181 uint8_t max_stream; 182 } gs; 183 struct { 184 uint8_t output_usage_mask[VARYING_SLOT_VAR31 + 1]; 185 } tes; 186 struct { 187 bool force_persample; 188 bool needs_sample_positions; 189 bool uses_input_attachments; 190 bool writes_memory; 191 bool writes_z; 192 bool writes_stencil; 193 bool writes_sample_mask; 194 bool has_pcoord; 195 bool prim_id_input; 196 bool layer_input; 197 uint8_t num_input_clips_culls; 198 } ps; 199 struct { 200 bool uses_grid_size; 201 bool uses_block_id[3]; 202 bool uses_thread_id[3]; 203 bool uses_local_invocation_idx; 204 } cs; 205 struct { 206 uint64_t outputs_written; 207 uint64_t patch_outputs_written; 208 } tcs; 209 210 struct radv_streamout_info so; 211}; 212 213struct radv_userdata_info { 214 int8_t sgpr_idx; 215 uint8_t num_sgprs; 216 bool indirect; 217}; 218 219struct radv_userdata_locations { 220 struct radv_userdata_info descriptor_sets[RADV_UD_MAX_SETS]; 221 struct radv_userdata_info shader_data[AC_UD_MAX_UD]; 222 uint32_t descriptor_sets_enabled; 223}; 224 225struct radv_vs_output_info { 226 uint8_t vs_output_param_offset[VARYING_SLOT_MAX]; 227 uint8_t clip_dist_mask; 228 uint8_t cull_dist_mask; 229 uint8_t param_exports; 230 bool writes_pointsize; 231 bool writes_layer; 232 bool writes_viewport_index; 233 bool export_prim_id; 234 unsigned pos_exports; 235}; 236 237struct radv_es_output_info { 238 uint32_t esgs_itemsize; 239}; 240 241struct radv_shader_variant_info { 242 struct radv_userdata_locations user_sgprs_locs; 243 struct radv_shader_info info; 244 unsigned num_user_sgprs; 245 unsigned num_input_sgprs; 246 unsigned num_input_vgprs; 247 unsigned private_mem_vgprs; 248 bool need_indirect_descriptor_sets; 249 struct { 250 struct { 251 struct radv_vs_output_info outinfo; 252 struct radv_es_output_info es_info; 253 unsigned vgpr_comp_cnt; 254 bool as_es; 255 bool as_ls; 256 } vs; 257 struct { 258 unsigned num_interp; 259 uint32_t input_mask; 260 uint32_t flat_shaded_mask; 261 uint32_t float16_shaded_mask; 262 bool can_discard; 263 bool early_fragment_test; 264 } fs; 265 struct { 266 unsigned block_size[3]; 267 } cs; 268 struct { 269 unsigned vertices_in; 270 unsigned vertices_out; 271 unsigned output_prim; 272 unsigned invocations; 273 unsigned gsvs_vertex_size; 274 unsigned max_gsvs_emit_size; 275 unsigned es_type; /* GFX9: VS or TES */ 276 } gs; 277 struct { 278 unsigned tcs_vertices_out; 279 uint32_t num_patches; 280 uint32_t lds_size; 281 } tcs; 282 struct { 283 struct radv_vs_output_info outinfo; 284 struct radv_es_output_info es_info; 285 bool as_es; 286 unsigned primitive_mode; 287 enum gl_tess_spacing spacing; 288 bool ccw; 289 bool point_mode; 290 } tes; 291 }; 292}; 293 294struct radv_shader_variant { 295 uint32_t ref_count; 296 297 struct radeon_winsys_bo *bo; 298 uint64_t bo_offset; 299 struct ac_shader_config config; 300 uint32_t code_size; 301 struct radv_shader_variant_info info; 302 unsigned rsrc1; 303 unsigned rsrc2; 304 305 /* debug only */ 306 uint32_t *spirv; 307 uint32_t spirv_size; 308 struct nir_shader *nir; 309 char *disasm_string; 310 char *llvm_ir_string; 311 312 struct list_head slab_list; 313}; 314 315struct radv_shader_slab { 316 struct list_head slabs; 317 struct list_head shaders; 318 struct radeon_winsys_bo *bo; 319 uint64_t size; 320 char *ptr; 321}; 322 323void 324radv_optimize_nir(struct nir_shader *shader, bool optimize_conservatively, 325 bool allow_copies); 326 327nir_shader * 328radv_shader_compile_to_nir(struct radv_device *device, 329 struct radv_shader_module *module, 330 const char *entrypoint_name, 331 gl_shader_stage stage, 332 const VkSpecializationInfo *spec_info, 333 const VkPipelineCreateFlags flags); 334 335void * 336radv_alloc_shader_memory(struct radv_device *device, 337 struct radv_shader_variant *shader); 338 339void 340radv_destroy_shader_slabs(struct radv_device *device); 341 342struct radv_shader_variant * 343radv_shader_variant_create(struct radv_device *device, 344 struct radv_shader_module *module, 345 struct nir_shader *const *shaders, 346 int shader_count, 347 struct radv_pipeline_layout *layout, 348 const struct radv_shader_variant_key *key, 349 void **code_out, 350 unsigned *code_size_out); 351 352struct radv_shader_variant * 353radv_create_gs_copy_shader(struct radv_device *device, struct nir_shader *nir, 354 void **code_out, unsigned *code_size_out, 355 bool multiview); 356 357void 358radv_shader_variant_destroy(struct radv_device *device, 359 struct radv_shader_variant *variant); 360 361const char * 362radv_get_shader_name(struct radv_shader_variant *var, gl_shader_stage stage); 363 364void 365radv_shader_dump_stats(struct radv_device *device, 366 struct radv_shader_variant *variant, 367 gl_shader_stage stage, 368 FILE *file); 369 370static inline bool 371radv_can_dump_shader(struct radv_device *device, 372 struct radv_shader_module *module, 373 bool is_gs_copy_shader) 374{ 375 if (!(device->instance->debug_flags & RADV_DEBUG_DUMP_SHADERS)) 376 return false; 377 378 /* Only dump non-meta shaders, useful for debugging purposes. */ 379 return (module && !module->nir) || is_gs_copy_shader; 380} 381 382static inline bool 383radv_can_dump_shader_stats(struct radv_device *device, 384 struct radv_shader_module *module) 385{ 386 /* Only dump non-meta shader stats. */ 387 return device->instance->debug_flags & RADV_DEBUG_DUMP_SHADER_STATS && 388 module && !module->nir; 389} 390 391static inline unsigned shader_io_get_unique_index(gl_varying_slot slot) 392{ 393 /* handle patch indices separate */ 394 if (slot == VARYING_SLOT_TESS_LEVEL_OUTER) 395 return 0; 396 if (slot == VARYING_SLOT_TESS_LEVEL_INNER) 397 return 1; 398 if (slot >= VARYING_SLOT_PATCH0 && slot <= VARYING_SLOT_TESS_MAX) 399 return 2 + (slot - VARYING_SLOT_PATCH0); 400 if (slot == VARYING_SLOT_POS) 401 return 0; 402 if (slot == VARYING_SLOT_PSIZ) 403 return 1; 404 if (slot == VARYING_SLOT_CLIP_DIST0) 405 return 2; 406 /* 3 is reserved for clip dist as well */ 407 if (slot >= VARYING_SLOT_VAR0 && slot <= VARYING_SLOT_VAR31) 408 return 4 + (slot - VARYING_SLOT_VAR0); 409 unreachable("illegal slot in get unique index\n"); 410} 411 412static inline uint32_t 413radv_get_num_physical_sgprs(struct radv_physical_device *physical_device) 414{ 415 return physical_device->rad_info.chip_class >= VI ? 800 : 512; 416} 417 418#endif 419