1/* 2 * Copyright © 2017 Red Hat 3 * 4 * Permission is hereby granted, free of charge, to any person obtaining a 5 * copy of this software and associated documentation files (the "Software"), 6 * to deal in the Software without restriction, including without limitation 7 * the rights to use, copy, modify, merge, publish, distribute, sublicense, 8 * and/or sell copies of the Software, and to permit persons to whom the 9 * Software is furnished to do so, subject to the following conditions: 10 * 11 * The above copyright notice and this permission notice (including the next 12 * paragraph) shall be included in all copies or substantial portions of the 13 * Software. 14 * 15 * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR 16 * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, 17 * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL 18 * THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER 19 * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING 20 * FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS 21 * IN THE SOFTWARE. 22 */ 23#include "radv_private.h" 24#include "radv_shader.h" 25#include "nir/nir.h" 26#include "nir/nir_deref.h" 27#include "nir/nir_xfb_info.h" 28 29static void mark_sampler_desc(const nir_variable *var, 30 struct radv_shader_info *info) 31{ 32 info->desc_set_used_mask |= (1 << var->data.descriptor_set); 33} 34 35static void mark_ls_output(struct radv_shader_info *info, 36 uint32_t param, int num_slots) 37{ 38 uint64_t mask = (1ull << num_slots) - 1ull; 39 info->vs.ls_outputs_written |= (mask << param); 40} 41 42static void mark_tess_output(struct radv_shader_info *info, 43 bool is_patch, uint32_t param, int num_slots) 44{ 45 uint64_t mask = (1ull << num_slots) - 1ull; 46 if (is_patch) 47 info->tcs.patch_outputs_written |= (mask << param); 48 else 49 info->tcs.outputs_written |= (mask << param); 50} 51 52static void 53get_deref_offset(nir_deref_instr *instr, 54 unsigned *const_out) 55{ 56 nir_variable *var = nir_deref_instr_get_variable(instr); 57 nir_deref_path path; 58 unsigned idx_lvl = 1; 59 60 if (var->data.compact) { 61 assert(instr->deref_type == nir_deref_type_array); 62 *const_out = nir_src_as_uint(instr->arr.index); 63 return; 64 } 65 66 nir_deref_path_init(&path, instr, NULL); 67 68 uint32_t const_offset = 0; 69 70 for (; path.path[idx_lvl]; ++idx_lvl) { 71 const struct glsl_type *parent_type = path.path[idx_lvl - 1]->type; 72 if (path.path[idx_lvl]->deref_type == nir_deref_type_struct) { 73 unsigned index = path.path[idx_lvl]->strct.index; 74 75 for (unsigned i = 0; i < index; i++) { 76 const struct glsl_type *ft = glsl_get_struct_field(parent_type, i); 77 const_offset += glsl_count_attribute_slots(ft, false); 78 } 79 } else if(path.path[idx_lvl]->deref_type == nir_deref_type_array) { 80 unsigned size = glsl_count_attribute_slots(path.path[idx_lvl]->type, false); 81 if (nir_src_is_const(path.path[idx_lvl]->arr.index)) 82 const_offset += nir_src_as_uint(path.path[idx_lvl]->arr.index) * size; 83 } else 84 unreachable("Uhandled deref type in get_deref_instr_offset"); 85 } 86 87 *const_out = const_offset; 88 89 nir_deref_path_finish(&path); 90} 91 92static void 93gather_intrinsic_load_deref_info(const nir_shader *nir, 94 const nir_intrinsic_instr *instr, 95 struct radv_shader_info *info) 96{ 97 switch (nir->info.stage) { 98 case MESA_SHADER_VERTEX: { 99 nir_variable *var = nir_deref_instr_get_variable(nir_instr_as_deref(instr->src[0].ssa->parent_instr)); 100 101 if (var && var->data.mode == nir_var_shader_in) { 102 unsigned idx = var->data.location; 103 uint8_t mask = nir_ssa_def_components_read(&instr->dest.ssa); 104 105 info->vs.input_usage_mask[idx] |= 106 mask << var->data.location_frac; 107 } 108 break; 109 } 110 default: 111 break; 112 } 113} 114 115static uint32_t 116widen_writemask(uint32_t wrmask) 117{ 118 uint32_t new_wrmask = 0; 119 for(unsigned i = 0; i < 4; i++) 120 new_wrmask |= (wrmask & (1 << i) ? 0x3 : 0x0) << (i * 2); 121 return new_wrmask; 122} 123 124static void 125set_output_usage_mask(const nir_shader *nir, const nir_intrinsic_instr *instr, 126 uint8_t *output_usage_mask) 127{ 128 nir_deref_instr *deref_instr = 129 nir_instr_as_deref(instr->src[0].ssa->parent_instr); 130 nir_variable *var = nir_deref_instr_get_variable(deref_instr); 131 unsigned attrib_count = glsl_count_attribute_slots(deref_instr->type, false); 132 unsigned idx = var->data.location; 133 unsigned comp = var->data.location_frac; 134 unsigned const_offset = 0; 135 136 get_deref_offset(deref_instr, &const_offset); 137 138 if (var->data.compact) { 139 assert(!glsl_type_is_64bit(deref_instr->type)); 140 const_offset += comp; 141 output_usage_mask[idx + const_offset / 4] |= 1 << (const_offset % 4); 142 return; 143 } 144 145 uint32_t wrmask = nir_intrinsic_write_mask(instr); 146 if (glsl_type_is_64bit(deref_instr->type)) 147 wrmask = widen_writemask(wrmask); 148 149 for (unsigned i = 0; i < attrib_count; i++) 150 output_usage_mask[idx + i + const_offset] |= 151 ((wrmask >> (i * 4)) & 0xf) << comp; 152} 153 154static void 155gather_intrinsic_store_deref_info(const nir_shader *nir, 156 const nir_intrinsic_instr *instr, 157 struct radv_shader_info *info) 158{ 159 nir_variable *var = nir_deref_instr_get_variable(nir_instr_as_deref(instr->src[0].ssa->parent_instr)); 160 161 if (var && var->data.mode == nir_var_shader_out) { 162 unsigned idx = var->data.location; 163 164 switch (nir->info.stage) { 165 case MESA_SHADER_VERTEX: 166 set_output_usage_mask(nir, instr, 167 info->vs.output_usage_mask); 168 break; 169 case MESA_SHADER_GEOMETRY: 170 set_output_usage_mask(nir, instr, 171 info->gs.output_usage_mask); 172 break; 173 case MESA_SHADER_TESS_EVAL: 174 set_output_usage_mask(nir, instr, 175 info->tes.output_usage_mask); 176 break; 177 case MESA_SHADER_TESS_CTRL: { 178 unsigned param = shader_io_get_unique_index(idx); 179 const struct glsl_type *type = var->type; 180 181 if (!var->data.patch) 182 type = glsl_get_array_element(var->type); 183 184 unsigned slots = 185 var->data.compact ? DIV_ROUND_UP(var->data.location_frac + glsl_get_length(type), 4) 186 : glsl_count_attribute_slots(type, false); 187 188 mark_tess_output(info, var->data.patch, param, slots); 189 break; 190 } 191 default: 192 break; 193 } 194 } 195} 196 197static void 198gather_push_constant_info(const nir_shader *nir, 199 const nir_intrinsic_instr *instr, 200 struct radv_shader_info *info) 201{ 202 int base = nir_intrinsic_base(instr); 203 204 if (!nir_src_is_const(instr->src[0])) { 205 info->has_indirect_push_constants = true; 206 } else { 207 uint32_t min = base + nir_src_as_uint(instr->src[0]); 208 uint32_t max = min + instr->num_components * 4; 209 210 info->max_push_constant_used = 211 MAX2(max, info->max_push_constant_used); 212 info->min_push_constant_used = 213 MIN2(min, info->min_push_constant_used); 214 } 215 216 if (instr->dest.ssa.bit_size != 32) 217 info->has_only_32bit_push_constants = false; 218 219 info->loads_push_constants = true; 220} 221 222static void 223gather_intrinsic_info(const nir_shader *nir, const nir_intrinsic_instr *instr, 224 struct radv_shader_info *info) 225{ 226 switch (instr->intrinsic) { 227 case nir_intrinsic_interp_deref_at_sample: 228 info->ps.needs_sample_positions = true; 229 break; 230 case nir_intrinsic_load_draw_id: 231 info->vs.needs_draw_id = true; 232 break; 233 case nir_intrinsic_load_instance_id: 234 info->vs.needs_instance_id = true; 235 break; 236 case nir_intrinsic_load_num_work_groups: 237 info->cs.uses_grid_size = true; 238 break; 239 case nir_intrinsic_load_local_invocation_id: 240 case nir_intrinsic_load_work_group_id: { 241 unsigned mask = nir_ssa_def_components_read(&instr->dest.ssa); 242 while (mask) { 243 unsigned i = u_bit_scan(&mask); 244 245 if (instr->intrinsic == nir_intrinsic_load_work_group_id) 246 info->cs.uses_block_id[i] = true; 247 else 248 info->cs.uses_thread_id[i] = true; 249 } 250 break; 251 } 252 case nir_intrinsic_load_local_invocation_index: 253 case nir_intrinsic_load_subgroup_id: 254 case nir_intrinsic_load_num_subgroups: 255 info->cs.uses_local_invocation_idx = true; 256 break; 257 case nir_intrinsic_load_sample_id: 258 info->ps.force_persample = true; 259 break; 260 case nir_intrinsic_load_sample_pos: 261 info->ps.force_persample = true; 262 break; 263 case nir_intrinsic_load_view_index: 264 info->needs_multiview_view_index = true; 265 if (nir->info.stage == MESA_SHADER_FRAGMENT) 266 info->ps.layer_input = true; 267 break; 268 case nir_intrinsic_load_invocation_id: 269 info->uses_invocation_id = true; 270 break; 271 case nir_intrinsic_load_primitive_id: 272 info->uses_prim_id = true; 273 break; 274 case nir_intrinsic_load_push_constant: 275 gather_push_constant_info(nir, instr, info); 276 break; 277 case nir_intrinsic_vulkan_resource_index: 278 info->desc_set_used_mask |= (1 << nir_intrinsic_desc_set(instr)); 279 break; 280 case nir_intrinsic_image_deref_load: 281 case nir_intrinsic_image_deref_store: 282 case nir_intrinsic_image_deref_atomic_add: 283 case nir_intrinsic_image_deref_atomic_min: 284 case nir_intrinsic_image_deref_atomic_max: 285 case nir_intrinsic_image_deref_atomic_and: 286 case nir_intrinsic_image_deref_atomic_or: 287 case nir_intrinsic_image_deref_atomic_xor: 288 case nir_intrinsic_image_deref_atomic_exchange: 289 case nir_intrinsic_image_deref_atomic_comp_swap: 290 case nir_intrinsic_image_deref_size: { 291 nir_variable *var = nir_deref_instr_get_variable(nir_instr_as_deref(instr->src[0].ssa->parent_instr)); 292 const struct glsl_type *type = glsl_without_array(var->type); 293 294 enum glsl_sampler_dim dim = glsl_get_sampler_dim(type); 295 if (dim == GLSL_SAMPLER_DIM_SUBPASS || 296 dim == GLSL_SAMPLER_DIM_SUBPASS_MS) { 297 info->ps.layer_input = true; 298 info->ps.uses_input_attachments = true; 299 } 300 mark_sampler_desc(var, info); 301 302 if (instr->intrinsic == nir_intrinsic_image_deref_store || 303 instr->intrinsic == nir_intrinsic_image_deref_atomic_add || 304 instr->intrinsic == nir_intrinsic_image_deref_atomic_min || 305 instr->intrinsic == nir_intrinsic_image_deref_atomic_max || 306 instr->intrinsic == nir_intrinsic_image_deref_atomic_and || 307 instr->intrinsic == nir_intrinsic_image_deref_atomic_or || 308 instr->intrinsic == nir_intrinsic_image_deref_atomic_xor || 309 instr->intrinsic == nir_intrinsic_image_deref_atomic_exchange || 310 instr->intrinsic == nir_intrinsic_image_deref_atomic_comp_swap) { 311 if (nir->info.stage == MESA_SHADER_FRAGMENT) 312 info->ps.writes_memory = true; 313 } 314 break; 315 } 316 case nir_intrinsic_store_ssbo: 317 case nir_intrinsic_ssbo_atomic_add: 318 case nir_intrinsic_ssbo_atomic_imin: 319 case nir_intrinsic_ssbo_atomic_umin: 320 case nir_intrinsic_ssbo_atomic_imax: 321 case nir_intrinsic_ssbo_atomic_umax: 322 case nir_intrinsic_ssbo_atomic_and: 323 case nir_intrinsic_ssbo_atomic_or: 324 case nir_intrinsic_ssbo_atomic_xor: 325 case nir_intrinsic_ssbo_atomic_exchange: 326 case nir_intrinsic_ssbo_atomic_comp_swap: 327 if (nir->info.stage == MESA_SHADER_FRAGMENT) 328 info->ps.writes_memory = true; 329 break; 330 case nir_intrinsic_load_deref: 331 gather_intrinsic_load_deref_info(nir, instr, info); 332 break; 333 case nir_intrinsic_store_deref: 334 gather_intrinsic_store_deref_info(nir, instr, info); 335 break; 336 default: 337 break; 338 } 339} 340 341static void 342gather_tex_info(const nir_shader *nir, const nir_tex_instr *instr, 343 struct radv_shader_info *info) 344{ 345 for (unsigned i = 0; i < instr->num_srcs; i++) { 346 switch (instr->src[i].src_type) { 347 case nir_tex_src_texture_deref: 348 mark_sampler_desc(nir_deref_instr_get_variable(nir_src_as_deref(instr->src[i].src)), info); 349 break; 350 case nir_tex_src_sampler_deref: 351 mark_sampler_desc(nir_deref_instr_get_variable(nir_src_as_deref(instr->src[i].src)), info); 352 break; 353 default: 354 break; 355 } 356 } 357} 358 359static void 360gather_info_block(const nir_shader *nir, const nir_block *block, 361 struct radv_shader_info *info) 362{ 363 nir_foreach_instr(instr, block) { 364 switch (instr->type) { 365 case nir_instr_type_intrinsic: 366 gather_intrinsic_info(nir, nir_instr_as_intrinsic(instr), info); 367 break; 368 case nir_instr_type_tex: 369 gather_tex_info(nir, nir_instr_as_tex(instr), info); 370 break; 371 default: 372 break; 373 } 374 } 375} 376 377static void 378gather_info_input_decl_vs(const nir_shader *nir, const nir_variable *var, 379 struct radv_shader_info *info) 380{ 381 int idx = var->data.location; 382 383 if (idx >= VERT_ATTRIB_GENERIC0 && idx <= VERT_ATTRIB_GENERIC15) 384 info->vs.has_vertex_buffers = true; 385} 386 387static void 388gather_info_input_decl_ps(const nir_shader *nir, const nir_variable *var, 389 struct radv_shader_info *info) 390{ 391 unsigned attrib_count = glsl_count_attribute_slots(var->type, false); 392 const struct glsl_type *type = glsl_without_array(var->type); 393 int idx = var->data.location; 394 395 switch (idx) { 396 case VARYING_SLOT_PNTC: 397 info->ps.has_pcoord = true; 398 break; 399 case VARYING_SLOT_PRIMITIVE_ID: 400 info->ps.prim_id_input = true; 401 break; 402 case VARYING_SLOT_LAYER: 403 info->ps.layer_input = true; 404 break; 405 case VARYING_SLOT_CLIP_DIST0: 406 case VARYING_SLOT_CLIP_DIST1: 407 info->ps.num_input_clips_culls += attrib_count; 408 break; 409 default: 410 break; 411 } 412 413 if (glsl_get_base_type(type) == GLSL_TYPE_FLOAT) { 414 if (var->data.sample) 415 info->ps.force_persample = true; 416 } 417} 418 419static void 420gather_info_input_decl(const nir_shader *nir, const nir_variable *var, 421 struct radv_shader_info *info) 422{ 423 switch (nir->info.stage) { 424 case MESA_SHADER_VERTEX: 425 gather_info_input_decl_vs(nir, var, info); 426 break; 427 case MESA_SHADER_FRAGMENT: 428 gather_info_input_decl_ps(nir, var, info); 429 break; 430 default: 431 break; 432 } 433} 434 435static void 436gather_info_output_decl_ls(const nir_shader *nir, const nir_variable *var, 437 struct radv_shader_info *info) 438{ 439 int idx = var->data.location; 440 unsigned param = shader_io_get_unique_index(idx); 441 int num_slots = glsl_count_attribute_slots(var->type, false); 442 if (var->data.compact) 443 num_slots = DIV_ROUND_UP(var->data.location_frac + glsl_get_length(var->type), 4); 444 mark_ls_output(info, param, num_slots); 445} 446 447static void 448gather_info_output_decl_ps(const nir_shader *nir, const nir_variable *var, 449 struct radv_shader_info *info) 450{ 451 int idx = var->data.location; 452 453 switch (idx) { 454 case FRAG_RESULT_DEPTH: 455 info->ps.writes_z = true; 456 break; 457 case FRAG_RESULT_STENCIL: 458 info->ps.writes_stencil = true; 459 break; 460 case FRAG_RESULT_SAMPLE_MASK: 461 info->ps.writes_sample_mask = true; 462 break; 463 default: 464 break; 465 } 466} 467 468static void 469gather_info_output_decl_gs(const nir_shader *nir, const nir_variable *var, 470 struct radv_shader_info *info) 471{ 472 unsigned num_components = glsl_get_component_slots(var->type); 473 unsigned stream = var->data.stream; 474 unsigned idx = var->data.location; 475 476 assert(stream < 4); 477 478 info->gs.max_stream = MAX2(info->gs.max_stream, stream); 479 info->gs.num_stream_output_components[stream] += num_components; 480 info->gs.output_streams[idx] = stream; 481} 482 483static void 484gather_info_output_decl(const nir_shader *nir, const nir_variable *var, 485 struct radv_shader_info *info, 486 const struct radv_nir_compiler_options *options) 487{ 488 switch (nir->info.stage) { 489 case MESA_SHADER_FRAGMENT: 490 gather_info_output_decl_ps(nir, var, info); 491 break; 492 case MESA_SHADER_VERTEX: 493 if (options->key.vs.as_ls) 494 gather_info_output_decl_ls(nir, var, info); 495 break; 496 case MESA_SHADER_GEOMETRY: 497 gather_info_output_decl_gs(nir, var, info); 498 break; 499 default: 500 break; 501 } 502} 503 504static void 505gather_xfb_info(const nir_shader *nir, struct radv_shader_info *info) 506{ 507 nir_xfb_info *xfb = nir_gather_xfb_info(nir, NULL); 508 struct radv_streamout_info *so = &info->so; 509 510 if (!xfb) 511 return; 512 513 assert(xfb->output_count < MAX_SO_OUTPUTS); 514 so->num_outputs = xfb->output_count; 515 516 for (unsigned i = 0; i < xfb->output_count; i++) { 517 struct radv_stream_output *output = &so->outputs[i]; 518 519 output->buffer = xfb->outputs[i].buffer; 520 output->stream = xfb->buffer_to_stream[xfb->outputs[i].buffer]; 521 output->offset = xfb->outputs[i].offset; 522 output->location = xfb->outputs[i].location; 523 output->component_mask = xfb->outputs[i].component_mask; 524 525 so->enabled_stream_buffers_mask |= 526 (1 << output->buffer) << (output->stream * 4); 527 528 } 529 530 for (unsigned i = 0; i < NIR_MAX_XFB_BUFFERS; i++) { 531 so->strides[i] = xfb->buffers[i].stride / 4; 532 } 533 534 ralloc_free(xfb); 535} 536 537void 538radv_nir_shader_info_init(struct radv_shader_info *info) 539{ 540 /* Assume that shaders only have 32-bit push constants by default. */ 541 info->min_push_constant_used = UINT8_MAX; 542 info->has_only_32bit_push_constants = true; 543} 544 545void 546radv_nir_shader_info_pass(const struct nir_shader *nir, 547 const struct radv_nir_compiler_options *options, 548 struct radv_shader_info *info) 549{ 550 struct nir_function *func = 551 (struct nir_function *)exec_list_get_head_const(&nir->functions); 552 553 if (options->layout && options->layout->dynamic_offset_count && 554 (options->layout->dynamic_shader_stages & mesa_to_vk_shader_stage(nir->info.stage))) { 555 info->loads_push_constants = true; 556 info->loads_dynamic_offsets = true; 557 } 558 559 nir_foreach_variable(variable, &nir->inputs) 560 gather_info_input_decl(nir, variable, info); 561 562 nir_foreach_block(block, func->impl) { 563 gather_info_block(nir, block, info); 564 } 565 566 nir_foreach_variable(variable, &nir->outputs) 567 gather_info_output_decl(nir, variable, info, options); 568 569 if (nir->info.stage == MESA_SHADER_VERTEX || 570 nir->info.stage == MESA_SHADER_TESS_EVAL || 571 nir->info.stage == MESA_SHADER_GEOMETRY) 572 gather_xfb_info(nir, info); 573} 574