1/* 2 * Copyright 2017 Advanced Micro Devices, Inc. 3 * All Rights Reserved. 4 * 5 * Permission is hereby granted, free of charge, to any person obtaining a 6 * copy of this software and associated documentation files (the "Software"), 7 * to deal in the Software without restriction, including without limitation 8 * on the rights to use, copy, modify, merge, publish, distribute, sub 9 * license, and/or sell copies of the Software, and to permit persons to whom 10 * the Software is furnished to do so, subject to the following conditions: 11 * 12 * The above copyright notice and this permission notice (including the next 13 * paragraph) shall be included in all copies or substantial portions of the 14 * Software. 15 * 16 * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR 17 * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, 18 * FITNESS FOR A PARTICULAR PURPOSE AND NON-INFRINGEMENT. IN NO EVENT SHALL 19 * THE AUTHOR(S) AND/OR THEIR SUPPLIERS BE LIABLE FOR ANY CLAIM, 20 * DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR 21 * OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE 22 * USE OR OTHER DEALINGS IN THE SOFTWARE. 23 */ 24 25#include "si_shader_internal.h" 26#include "si_pipe.h" 27 28#include "ac_nir_to_llvm.h" 29 30#include "tgsi/tgsi_from_mesa.h" 31 32#include "compiler/nir/nir.h" 33#include "compiler/nir_types.h" 34 35static nir_variable* tex_get_texture_var(nir_tex_instr *instr) 36{ 37 for (unsigned i = 0; i < instr->num_srcs; i++) { 38 switch (instr->src[i].src_type) { 39 case nir_tex_src_texture_deref: 40 return nir_deref_instr_get_variable(nir_src_as_deref(instr->src[i].src)); 41 default: 42 break; 43 } 44 } 45 46 return NULL; 47} 48 49static nir_variable* intrinsic_get_var(nir_intrinsic_instr *instr) 50{ 51 return nir_deref_instr_get_variable(nir_src_as_deref(instr->src[0])); 52} 53 54static void gather_intrinsic_load_deref_info(const nir_shader *nir, 55 const nir_intrinsic_instr *instr, 56 nir_variable *var, 57 struct tgsi_shader_info *info) 58{ 59 assert(var && var->data.mode == nir_var_shader_in); 60 61 switch (nir->info.stage) { 62 case MESA_SHADER_VERTEX: { 63 unsigned i = var->data.driver_location; 64 unsigned attrib_count = glsl_count_attribute_slots(var->type, false); 65 66 for (unsigned j = 0; j < attrib_count; j++, i++) { 67 if (glsl_type_is_64bit(glsl_without_array(var->type))) { 68 /* TODO: set usage mask more accurately for doubles */ 69 info->input_usage_mask[i] = TGSI_WRITEMASK_XYZW; 70 } else { 71 uint8_t mask = nir_ssa_def_components_read(&instr->dest.ssa); 72 info->input_usage_mask[i] |= mask << var->data.location_frac; 73 } 74 } 75 break; 76 } 77 default: { 78 unsigned semantic_name, semantic_index; 79 tgsi_get_gl_varying_semantic(var->data.location, true, 80 &semantic_name, &semantic_index); 81 82 if (semantic_name == TGSI_SEMANTIC_COLOR) { 83 uint8_t mask = nir_ssa_def_components_read(&instr->dest.ssa); 84 info->colors_read |= mask << (semantic_index * 4); 85 } 86 break; 87 } 88 } 89} 90 91static void scan_instruction(const struct nir_shader *nir, 92 struct tgsi_shader_info *info, 93 nir_instr *instr) 94{ 95 if (instr->type == nir_instr_type_alu) { 96 nir_alu_instr *alu = nir_instr_as_alu(instr); 97 98 switch (alu->op) { 99 case nir_op_fddx: 100 case nir_op_fddy: 101 case nir_op_fddx_fine: 102 case nir_op_fddy_fine: 103 case nir_op_fddx_coarse: 104 case nir_op_fddy_coarse: 105 info->uses_derivatives = true; 106 break; 107 default: 108 break; 109 } 110 } else if (instr->type == nir_instr_type_tex) { 111 nir_tex_instr *tex = nir_instr_as_tex(instr); 112 nir_variable *texture = tex_get_texture_var(tex); 113 114 if (!texture) { 115 info->samplers_declared |= 116 u_bit_consecutive(tex->sampler_index, 1); 117 } else { 118 if (texture->data.bindless) 119 info->uses_bindless_samplers = true; 120 } 121 122 switch (tex->op) { 123 case nir_texop_tex: 124 case nir_texop_txb: 125 case nir_texop_lod: 126 info->uses_derivatives = true; 127 break; 128 default: 129 break; 130 } 131 } else if (instr->type == nir_instr_type_intrinsic) { 132 nir_intrinsic_instr *intr = nir_instr_as_intrinsic(instr); 133 134 switch (intr->intrinsic) { 135 case nir_intrinsic_load_front_face: 136 info->uses_frontface = 1; 137 break; 138 case nir_intrinsic_load_instance_id: 139 info->uses_instanceid = 1; 140 break; 141 case nir_intrinsic_load_invocation_id: 142 info->uses_invocationid = true; 143 break; 144 case nir_intrinsic_load_num_work_groups: 145 info->uses_grid_size = true; 146 break; 147 case nir_intrinsic_load_local_group_size: 148 /* The block size is translated to IMM with a fixed block size. */ 149 if (info->properties[TGSI_PROPERTY_CS_FIXED_BLOCK_WIDTH] == 0) 150 info->uses_block_size = true; 151 break; 152 case nir_intrinsic_load_local_invocation_id: 153 case nir_intrinsic_load_work_group_id: { 154 unsigned mask = nir_ssa_def_components_read(&intr->dest.ssa); 155 while (mask) { 156 unsigned i = u_bit_scan(&mask); 157 158 if (intr->intrinsic == nir_intrinsic_load_work_group_id) 159 info->uses_block_id[i] = true; 160 else 161 info->uses_thread_id[i] = true; 162 } 163 break; 164 } 165 case nir_intrinsic_load_vertex_id: 166 info->uses_vertexid = 1; 167 break; 168 case nir_intrinsic_load_vertex_id_zero_base: 169 info->uses_vertexid_nobase = 1; 170 break; 171 case nir_intrinsic_load_base_vertex: 172 info->uses_basevertex = 1; 173 break; 174 case nir_intrinsic_load_draw_id: 175 info->uses_drawid = 1; 176 break; 177 case nir_intrinsic_load_primitive_id: 178 info->uses_primid = 1; 179 break; 180 case nir_intrinsic_load_sample_mask_in: 181 info->reads_samplemask = true; 182 break; 183 case nir_intrinsic_load_tess_level_inner: 184 case nir_intrinsic_load_tess_level_outer: 185 info->reads_tess_factors = true; 186 break; 187 case nir_intrinsic_bindless_image_load: 188 info->uses_bindless_images = true; 189 190 if (nir_intrinsic_image_dim(intr) == GLSL_SAMPLER_DIM_BUF) 191 info->uses_bindless_buffer_load = true; 192 else 193 info->uses_bindless_image_load = true; 194 break; 195 case nir_intrinsic_bindless_image_size: 196 case nir_intrinsic_bindless_image_samples: 197 info->uses_bindless_images = true; 198 break; 199 case nir_intrinsic_bindless_image_store: 200 info->uses_bindless_images = true; 201 202 if (nir_intrinsic_image_dim(intr) == GLSL_SAMPLER_DIM_BUF) 203 info->uses_bindless_buffer_store = true; 204 else 205 info->uses_bindless_image_store = true; 206 207 info->writes_memory = true; 208 break; 209 case nir_intrinsic_image_deref_store: 210 info->writes_memory = true; 211 break; 212 case nir_intrinsic_bindless_image_atomic_add: 213 case nir_intrinsic_bindless_image_atomic_min: 214 case nir_intrinsic_bindless_image_atomic_max: 215 case nir_intrinsic_bindless_image_atomic_and: 216 case nir_intrinsic_bindless_image_atomic_or: 217 case nir_intrinsic_bindless_image_atomic_xor: 218 case nir_intrinsic_bindless_image_atomic_exchange: 219 case nir_intrinsic_bindless_image_atomic_comp_swap: 220 info->uses_bindless_images = true; 221 222 if (nir_intrinsic_image_dim(intr) == GLSL_SAMPLER_DIM_BUF) 223 info->uses_bindless_buffer_atomic = true; 224 else 225 info->uses_bindless_image_atomic = true; 226 227 info->writes_memory = true; 228 break; 229 case nir_intrinsic_image_deref_atomic_add: 230 case nir_intrinsic_image_deref_atomic_min: 231 case nir_intrinsic_image_deref_atomic_max: 232 case nir_intrinsic_image_deref_atomic_and: 233 case nir_intrinsic_image_deref_atomic_or: 234 case nir_intrinsic_image_deref_atomic_xor: 235 case nir_intrinsic_image_deref_atomic_exchange: 236 case nir_intrinsic_image_deref_atomic_comp_swap: 237 info->writes_memory = true; 238 break; 239 case nir_intrinsic_store_ssbo: 240 case nir_intrinsic_ssbo_atomic_add: 241 case nir_intrinsic_ssbo_atomic_imin: 242 case nir_intrinsic_ssbo_atomic_umin: 243 case nir_intrinsic_ssbo_atomic_imax: 244 case nir_intrinsic_ssbo_atomic_umax: 245 case nir_intrinsic_ssbo_atomic_and: 246 case nir_intrinsic_ssbo_atomic_or: 247 case nir_intrinsic_ssbo_atomic_xor: 248 case nir_intrinsic_ssbo_atomic_exchange: 249 case nir_intrinsic_ssbo_atomic_comp_swap: 250 info->writes_memory = true; 251 break; 252 case nir_intrinsic_load_deref: { 253 nir_variable *var = intrinsic_get_var(intr); 254 nir_variable_mode mode = var->data.mode; 255 enum glsl_base_type base_type = 256 glsl_get_base_type(glsl_without_array(var->type)); 257 258 if (mode == nir_var_shader_in) { 259 gather_intrinsic_load_deref_info(nir, intr, var, info); 260 261 switch (var->data.interpolation) { 262 case INTERP_MODE_NONE: 263 if (glsl_base_type_is_integer(base_type)) 264 break; 265 266 /* fall-through */ 267 case INTERP_MODE_SMOOTH: 268 if (var->data.sample) 269 info->uses_persp_sample = true; 270 else if (var->data.centroid) 271 info->uses_persp_centroid = true; 272 else 273 info->uses_persp_center = true; 274 break; 275 276 case INTERP_MODE_NOPERSPECTIVE: 277 if (var->data.sample) 278 info->uses_linear_sample = true; 279 else if (var->data.centroid) 280 info->uses_linear_centroid = true; 281 else 282 info->uses_linear_center = true; 283 break; 284 } 285 } 286 break; 287 } 288 case nir_intrinsic_interp_deref_at_centroid: 289 case nir_intrinsic_interp_deref_at_sample: 290 case nir_intrinsic_interp_deref_at_offset: { 291 enum glsl_interp_mode interp = intrinsic_get_var(intr)->data.interpolation; 292 switch (interp) { 293 case INTERP_MODE_SMOOTH: 294 case INTERP_MODE_NONE: 295 if (intr->intrinsic == nir_intrinsic_interp_deref_at_centroid) 296 info->uses_persp_opcode_interp_centroid = true; 297 else if (intr->intrinsic == nir_intrinsic_interp_deref_at_sample) 298 info->uses_persp_opcode_interp_sample = true; 299 else 300 info->uses_persp_opcode_interp_offset = true; 301 break; 302 case INTERP_MODE_NOPERSPECTIVE: 303 if (intr->intrinsic == nir_intrinsic_interp_deref_at_centroid) 304 info->uses_linear_opcode_interp_centroid = true; 305 else if (intr->intrinsic == nir_intrinsic_interp_deref_at_sample) 306 info->uses_linear_opcode_interp_sample = true; 307 else 308 info->uses_linear_opcode_interp_offset = true; 309 break; 310 case INTERP_MODE_FLAT: 311 break; 312 default: 313 unreachable("Unsupported interpoation type"); 314 } 315 break; 316 } 317 default: 318 break; 319 } 320 } 321} 322 323void si_nir_scan_tess_ctrl(const struct nir_shader *nir, 324 struct tgsi_tessctrl_info *out) 325{ 326 memset(out, 0, sizeof(*out)); 327 328 if (nir->info.stage != MESA_SHADER_TESS_CTRL) 329 return; 330 331 out->tessfactors_are_def_in_all_invocs = 332 ac_are_tessfactors_def_in_all_invocs(nir); 333} 334 335void si_nir_scan_shader(const struct nir_shader *nir, 336 struct tgsi_shader_info *info) 337{ 338 nir_function *func; 339 unsigned i; 340 341 info->processor = pipe_shader_type_from_mesa(nir->info.stage); 342 info->num_tokens = 2; /* indicate that the shader is non-empty */ 343 info->num_instructions = 2; 344 345 info->properties[TGSI_PROPERTY_NEXT_SHADER] = 346 pipe_shader_type_from_mesa(nir->info.next_stage); 347 348 if (nir->info.stage == MESA_SHADER_VERTEX) { 349 info->properties[TGSI_PROPERTY_VS_WINDOW_SPACE_POSITION] = 350 nir->info.vs.window_space_position; 351 } 352 353 if (nir->info.stage == MESA_SHADER_TESS_CTRL) { 354 info->properties[TGSI_PROPERTY_TCS_VERTICES_OUT] = 355 nir->info.tess.tcs_vertices_out; 356 } 357 358 if (nir->info.stage == MESA_SHADER_TESS_EVAL) { 359 if (nir->info.tess.primitive_mode == GL_ISOLINES) 360 info->properties[TGSI_PROPERTY_TES_PRIM_MODE] = PIPE_PRIM_LINES; 361 else 362 info->properties[TGSI_PROPERTY_TES_PRIM_MODE] = nir->info.tess.primitive_mode; 363 364 STATIC_ASSERT((TESS_SPACING_EQUAL + 1) % 3 == PIPE_TESS_SPACING_EQUAL); 365 STATIC_ASSERT((TESS_SPACING_FRACTIONAL_ODD + 1) % 3 == 366 PIPE_TESS_SPACING_FRACTIONAL_ODD); 367 STATIC_ASSERT((TESS_SPACING_FRACTIONAL_EVEN + 1) % 3 == 368 PIPE_TESS_SPACING_FRACTIONAL_EVEN); 369 370 info->properties[TGSI_PROPERTY_TES_SPACING] = (nir->info.tess.spacing + 1) % 3; 371 info->properties[TGSI_PROPERTY_TES_VERTEX_ORDER_CW] = !nir->info.tess.ccw; 372 info->properties[TGSI_PROPERTY_TES_POINT_MODE] = nir->info.tess.point_mode; 373 } 374 375 if (nir->info.stage == MESA_SHADER_GEOMETRY) { 376 info->properties[TGSI_PROPERTY_GS_INPUT_PRIM] = nir->info.gs.input_primitive; 377 info->properties[TGSI_PROPERTY_GS_OUTPUT_PRIM] = nir->info.gs.output_primitive; 378 info->properties[TGSI_PROPERTY_GS_MAX_OUTPUT_VERTICES] = nir->info.gs.vertices_out; 379 info->properties[TGSI_PROPERTY_GS_INVOCATIONS] = nir->info.gs.invocations; 380 } 381 382 if (nir->info.stage == MESA_SHADER_FRAGMENT) { 383 info->properties[TGSI_PROPERTY_FS_EARLY_DEPTH_STENCIL] = 384 nir->info.fs.early_fragment_tests | nir->info.fs.post_depth_coverage; 385 info->properties[TGSI_PROPERTY_FS_POST_DEPTH_COVERAGE] = nir->info.fs.post_depth_coverage; 386 387 if (nir->info.fs.pixel_center_integer) { 388 info->properties[TGSI_PROPERTY_FS_COORD_PIXEL_CENTER] = 389 TGSI_FS_COORD_PIXEL_CENTER_INTEGER; 390 } 391 392 if (nir->info.fs.depth_layout != FRAG_DEPTH_LAYOUT_NONE) { 393 switch (nir->info.fs.depth_layout) { 394 case FRAG_DEPTH_LAYOUT_ANY: 395 info->properties[TGSI_PROPERTY_FS_DEPTH_LAYOUT] = TGSI_FS_DEPTH_LAYOUT_ANY; 396 break; 397 case FRAG_DEPTH_LAYOUT_GREATER: 398 info->properties[TGSI_PROPERTY_FS_DEPTH_LAYOUT] = TGSI_FS_DEPTH_LAYOUT_GREATER; 399 break; 400 case FRAG_DEPTH_LAYOUT_LESS: 401 info->properties[TGSI_PROPERTY_FS_DEPTH_LAYOUT] = TGSI_FS_DEPTH_LAYOUT_LESS; 402 break; 403 case FRAG_DEPTH_LAYOUT_UNCHANGED: 404 info->properties[TGSI_PROPERTY_FS_DEPTH_LAYOUT] = TGSI_FS_DEPTH_LAYOUT_UNCHANGED; 405 break; 406 default: 407 unreachable("Unknow depth layout"); 408 } 409 } 410 } 411 412 if (gl_shader_stage_is_compute(nir->info.stage)) { 413 info->properties[TGSI_PROPERTY_CS_FIXED_BLOCK_WIDTH] = nir->info.cs.local_size[0]; 414 info->properties[TGSI_PROPERTY_CS_FIXED_BLOCK_HEIGHT] = nir->info.cs.local_size[1]; 415 info->properties[TGSI_PROPERTY_CS_FIXED_BLOCK_DEPTH] = nir->info.cs.local_size[2]; 416 } 417 418 i = 0; 419 uint64_t processed_inputs = 0; 420 unsigned num_inputs = 0; 421 nir_foreach_variable(variable, &nir->inputs) { 422 unsigned semantic_name, semantic_index; 423 424 const struct glsl_type *type = variable->type; 425 if (nir_is_per_vertex_io(variable, nir->info.stage)) { 426 assert(glsl_type_is_array(type)); 427 type = glsl_get_array_element(type); 428 } 429 430 unsigned attrib_count = glsl_count_attribute_slots(type, 431 nir->info.stage == MESA_SHADER_VERTEX); 432 433 i = variable->data.driver_location; 434 435 /* Vertex shader inputs don't have semantics. The state 436 * tracker has already mapped them to attributes via 437 * variable->data.driver_location. 438 */ 439 if (nir->info.stage == MESA_SHADER_VERTEX) { 440 if (glsl_type_is_dual_slot(glsl_without_array(variable->type))) 441 num_inputs++; 442 443 num_inputs++; 444 continue; 445 } 446 447 /* Fragment shader position is a system value. */ 448 if (nir->info.stage == MESA_SHADER_FRAGMENT && 449 variable->data.location == VARYING_SLOT_POS) { 450 if (nir->info.fs.pixel_center_integer) 451 info->properties[TGSI_PROPERTY_FS_COORD_PIXEL_CENTER] = 452 TGSI_FS_COORD_PIXEL_CENTER_INTEGER; 453 454 num_inputs++; 455 continue; 456 } 457 458 for (unsigned j = 0; j < attrib_count; j++, i++) { 459 460 if (processed_inputs & ((uint64_t)1 << i)) 461 continue; 462 463 processed_inputs |= ((uint64_t)1 << i); 464 num_inputs++; 465 466 tgsi_get_gl_varying_semantic(variable->data.location + j, true, 467 &semantic_name, &semantic_index); 468 469 info->input_semantic_name[i] = semantic_name; 470 info->input_semantic_index[i] = semantic_index; 471 472 if (semantic_name == TGSI_SEMANTIC_PRIMID) 473 info->uses_primid = true; 474 475 if (variable->data.sample) 476 info->input_interpolate_loc[i] = TGSI_INTERPOLATE_LOC_SAMPLE; 477 else if (variable->data.centroid) 478 info->input_interpolate_loc[i] = TGSI_INTERPOLATE_LOC_CENTROID; 479 else 480 info->input_interpolate_loc[i] = TGSI_INTERPOLATE_LOC_CENTER; 481 482 enum glsl_base_type base_type = 483 glsl_get_base_type(glsl_without_array(variable->type)); 484 485 switch (variable->data.interpolation) { 486 case INTERP_MODE_NONE: 487 if (glsl_base_type_is_integer(base_type)) { 488 info->input_interpolate[i] = TGSI_INTERPOLATE_CONSTANT; 489 break; 490 } 491 492 if (semantic_name == TGSI_SEMANTIC_COLOR) { 493 info->input_interpolate[i] = TGSI_INTERPOLATE_COLOR; 494 break; 495 } 496 /* fall-through */ 497 498 case INTERP_MODE_SMOOTH: 499 assert(!glsl_base_type_is_integer(base_type)); 500 501 info->input_interpolate[i] = TGSI_INTERPOLATE_PERSPECTIVE; 502 break; 503 504 case INTERP_MODE_NOPERSPECTIVE: 505 assert(!glsl_base_type_is_integer(base_type)); 506 507 info->input_interpolate[i] = TGSI_INTERPOLATE_LINEAR; 508 break; 509 510 case INTERP_MODE_FLAT: 511 info->input_interpolate[i] = TGSI_INTERPOLATE_CONSTANT; 512 break; 513 } 514 } 515 } 516 517 info->num_inputs = num_inputs; 518 519 520 i = 0; 521 uint64_t processed_outputs = 0; 522 unsigned num_outputs = 0; 523 nir_foreach_variable(variable, &nir->outputs) { 524 unsigned semantic_name, semantic_index; 525 526 i = variable->data.driver_location; 527 528 const struct glsl_type *type = variable->type; 529 if (nir_is_per_vertex_io(variable, nir->info.stage)) { 530 assert(glsl_type_is_array(type)); 531 type = glsl_get_array_element(type); 532 } 533 534 unsigned attrib_count = glsl_count_attribute_slots(type, false); 535 for (unsigned k = 0; k < attrib_count; k++, i++) { 536 537 if (nir->info.stage == MESA_SHADER_FRAGMENT) { 538 tgsi_get_gl_frag_result_semantic(variable->data.location + k, 539 &semantic_name, &semantic_index); 540 541 /* Adjust for dual source blending */ 542 if (variable->data.index > 0) { 543 semantic_index++; 544 } 545 } else { 546 tgsi_get_gl_varying_semantic(variable->data.location + k, true, 547 &semantic_name, &semantic_index); 548 } 549 550 unsigned num_components = 4; 551 unsigned vector_elements = glsl_get_vector_elements(glsl_without_array(variable->type)); 552 if (vector_elements) 553 num_components = vector_elements; 554 555 unsigned component = variable->data.location_frac; 556 if (glsl_type_is_64bit(glsl_without_array(variable->type))) { 557 if (glsl_type_is_dual_slot(glsl_without_array(variable->type)) && k % 2) { 558 num_components = (num_components * 2) - 4; 559 component = 0; 560 } else { 561 num_components = MIN2(num_components * 2, 4); 562 } 563 } 564 565 ubyte usagemask = 0; 566 for (unsigned j = component; j < num_components + component; j++) { 567 switch (j) { 568 case 0: 569 usagemask |= TGSI_WRITEMASK_X; 570 break; 571 case 1: 572 usagemask |= TGSI_WRITEMASK_Y; 573 break; 574 case 2: 575 usagemask |= TGSI_WRITEMASK_Z; 576 break; 577 case 3: 578 usagemask |= TGSI_WRITEMASK_W; 579 break; 580 default: 581 unreachable("error calculating component index"); 582 } 583 } 584 585 unsigned gs_out_streams; 586 if (variable->data.stream & (1u << 31)) { 587 gs_out_streams = variable->data.stream & ~(1u << 31); 588 } else { 589 assert(variable->data.stream < 4); 590 gs_out_streams = 0; 591 for (unsigned j = 0; j < num_components; ++j) 592 gs_out_streams |= variable->data.stream << (2 * (component + j)); 593 } 594 595 unsigned streamx = gs_out_streams & 3; 596 unsigned streamy = (gs_out_streams >> 2) & 3; 597 unsigned streamz = (gs_out_streams >> 4) & 3; 598 unsigned streamw = (gs_out_streams >> 6) & 3; 599 600 if (usagemask & TGSI_WRITEMASK_X) { 601 info->output_usagemask[i] |= TGSI_WRITEMASK_X; 602 info->output_streams[i] |= streamx; 603 info->num_stream_output_components[streamx]++; 604 } 605 if (usagemask & TGSI_WRITEMASK_Y) { 606 info->output_usagemask[i] |= TGSI_WRITEMASK_Y; 607 info->output_streams[i] |= streamy << 2; 608 info->num_stream_output_components[streamy]++; 609 } 610 if (usagemask & TGSI_WRITEMASK_Z) { 611 info->output_usagemask[i] |= TGSI_WRITEMASK_Z; 612 info->output_streams[i] |= streamz << 4; 613 info->num_stream_output_components[streamz]++; 614 } 615 if (usagemask & TGSI_WRITEMASK_W) { 616 info->output_usagemask[i] |= TGSI_WRITEMASK_W; 617 info->output_streams[i] |= streamw << 6; 618 info->num_stream_output_components[streamw]++; 619 } 620 621 /* make sure we only count this location once against 622 * the num_outputs counter. 623 */ 624 if (processed_outputs & ((uint64_t)1 << i)) 625 continue; 626 627 processed_outputs |= ((uint64_t)1 << i); 628 num_outputs++; 629 630 info->output_semantic_name[i] = semantic_name; 631 info->output_semantic_index[i] = semantic_index; 632 633 switch (semantic_name) { 634 case TGSI_SEMANTIC_PRIMID: 635 info->writes_primid = true; 636 break; 637 case TGSI_SEMANTIC_VIEWPORT_INDEX: 638 info->writes_viewport_index = true; 639 break; 640 case TGSI_SEMANTIC_LAYER: 641 info->writes_layer = true; 642 break; 643 case TGSI_SEMANTIC_PSIZE: 644 info->writes_psize = true; 645 break; 646 case TGSI_SEMANTIC_CLIPVERTEX: 647 info->writes_clipvertex = true; 648 break; 649 case TGSI_SEMANTIC_COLOR: 650 info->colors_written |= 1 << semantic_index; 651 break; 652 case TGSI_SEMANTIC_STENCIL: 653 info->writes_stencil = true; 654 break; 655 case TGSI_SEMANTIC_SAMPLEMASK: 656 info->writes_samplemask = true; 657 break; 658 case TGSI_SEMANTIC_EDGEFLAG: 659 info->writes_edgeflag = true; 660 break; 661 case TGSI_SEMANTIC_POSITION: 662 if (info->processor == PIPE_SHADER_FRAGMENT) 663 info->writes_z = true; 664 else 665 info->writes_position = true; 666 break; 667 } 668 669 if (nir->info.stage == MESA_SHADER_TESS_CTRL) { 670 switch (semantic_name) { 671 case TGSI_SEMANTIC_PATCH: 672 info->reads_perpatch_outputs = true; 673 break; 674 case TGSI_SEMANTIC_TESSINNER: 675 case TGSI_SEMANTIC_TESSOUTER: 676 info->reads_tessfactor_outputs = true; 677 break; 678 default: 679 info->reads_pervertex_outputs = true; 680 } 681 } 682 } 683 684 unsigned loc = variable->data.location; 685 if (nir->info.stage == MESA_SHADER_FRAGMENT && 686 loc == FRAG_RESULT_COLOR && 687 nir->info.outputs_written & (1ull << loc)) { 688 assert(attrib_count == 1); 689 info->properties[TGSI_PROPERTY_FS_COLOR0_WRITES_ALL_CBUFS] = true; 690 } 691 } 692 693 info->num_outputs = num_outputs; 694 695 struct set *ubo_set = _mesa_set_create(NULL, _mesa_hash_pointer, 696 _mesa_key_pointer_equal); 697 struct set *ssbo_set = _mesa_set_create(NULL, _mesa_hash_pointer, 698 _mesa_key_pointer_equal); 699 700 /* Intialise const_file_max[0] */ 701 info->const_file_max[0] = -1; 702 703 /* The first 8 are reserved for atomic counters using ssbo */ 704 unsigned ssbo_idx = 8; 705 706 unsigned ubo_idx = 1; 707 nir_foreach_variable(variable, &nir->uniforms) { 708 const struct glsl_type *type = variable->type; 709 enum glsl_base_type base_type = 710 glsl_get_base_type(glsl_without_array(type)); 711 unsigned aoa_size = MAX2(1, glsl_get_aoa_size(type)); 712 unsigned loc = variable->data.location; 713 int slot_count = glsl_count_attribute_slots(type, false); 714 int max_slot = MAX2(info->const_file_max[0], (int) loc) + slot_count; 715 716 /* Gather buffers declared bitmasks. Note: radeonsi doesn't 717 * really use the mask (other than ubo_idx == 1 for regular 718 * uniforms) its really only used for getting the buffer count 719 * so we don't need to worry about the ordering. 720 */ 721 if (variable->interface_type != NULL) { 722 if (variable->data.mode == nir_var_uniform || 723 variable->data.mode == nir_var_mem_ubo || 724 variable->data.mode == nir_var_mem_ssbo) { 725 726 struct set *buf_set = variable->data.mode == nir_var_mem_ssbo ? 727 ssbo_set : ubo_set; 728 729 unsigned block_count; 730 if (base_type != GLSL_TYPE_INTERFACE) { 731 struct set_entry *entry = 732 _mesa_set_search(buf_set, variable->interface_type); 733 734 /* Check if we have already processed 735 * a member from this ubo. 736 */ 737 if (entry) 738 continue; 739 740 block_count = 1; 741 } else { 742 block_count = aoa_size; 743 } 744 745 if (variable->data.mode == nir_var_uniform || 746 variable->data.mode == nir_var_mem_ubo) { 747 info->const_buffers_declared |= u_bit_consecutive(ubo_idx, block_count); 748 ubo_idx += block_count; 749 } else { 750 assert(variable->data.mode == nir_var_mem_ssbo); 751 752 info->shader_buffers_declared |= u_bit_consecutive(ssbo_idx, block_count); 753 ssbo_idx += block_count; 754 } 755 756 _mesa_set_add(buf_set, variable->interface_type); 757 } 758 759 continue; 760 } 761 762 /* We rely on the fact that nir_lower_samplers_as_deref has 763 * eliminated struct dereferences. 764 */ 765 if (base_type == GLSL_TYPE_SAMPLER) { 766 if (variable->data.bindless) { 767 info->const_buffers_declared |= 1; 768 info->const_file_max[0] = max_slot; 769 } else { 770 info->samplers_declared |= 771 u_bit_consecutive(variable->data.binding, aoa_size); 772 } 773 } else if (base_type == GLSL_TYPE_IMAGE) { 774 if (variable->data.bindless) { 775 info->const_buffers_declared |= 1; 776 info->const_file_max[0] = max_slot; 777 } else { 778 info->images_declared |= 779 u_bit_consecutive(variable->data.binding, aoa_size); 780 } 781 } else if (base_type != GLSL_TYPE_ATOMIC_UINT) { 782 if (strncmp(variable->name, "state.", 6) == 0 || 783 strncmp(variable->name, "gl_", 3) == 0) { 784 /* FIXME: figure out why piglit tests with builtin 785 * uniforms are failing without this. 786 */ 787 info->const_buffers_declared = 788 u_bit_consecutive(0, SI_NUM_CONST_BUFFERS); 789 } else { 790 info->const_buffers_declared |= 1; 791 info->const_file_max[0] = max_slot; 792 } 793 } 794 } 795 796 _mesa_set_destroy(ubo_set, NULL); 797 _mesa_set_destroy(ssbo_set, NULL); 798 799 info->num_written_clipdistance = nir->info.clip_distance_array_size; 800 info->num_written_culldistance = nir->info.cull_distance_array_size; 801 info->clipdist_writemask = u_bit_consecutive(0, info->num_written_clipdistance); 802 info->culldist_writemask = u_bit_consecutive(0, info->num_written_culldistance); 803 804 if (info->processor == PIPE_SHADER_FRAGMENT) 805 info->uses_kill = nir->info.fs.uses_discard; 806 807 func = (struct nir_function *)exec_list_get_head_const(&nir->functions); 808 nir_foreach_block(block, func->impl) { 809 nir_foreach_instr(instr, block) 810 scan_instruction(nir, info, instr); 811 } 812} 813 814void 815si_nir_opts(struct nir_shader *nir) 816{ 817 bool progress; 818 do { 819 progress = false; 820 821 NIR_PASS_V(nir, nir_lower_vars_to_ssa); 822 823 NIR_PASS(progress, nir, nir_opt_copy_prop_vars); 824 NIR_PASS(progress, nir, nir_opt_dead_write_vars); 825 826 NIR_PASS_V(nir, nir_lower_alu_to_scalar); 827 NIR_PASS_V(nir, nir_lower_phis_to_scalar); 828 829 /* (Constant) copy propagation is needed for txf with offsets. */ 830 NIR_PASS(progress, nir, nir_copy_prop); 831 NIR_PASS(progress, nir, nir_opt_remove_phis); 832 NIR_PASS(progress, nir, nir_opt_dce); 833 if (nir_opt_trivial_continues(nir)) { 834 progress = true; 835 NIR_PASS(progress, nir, nir_copy_prop); 836 NIR_PASS(progress, nir, nir_opt_dce); 837 } 838 NIR_PASS(progress, nir, nir_opt_if, true); 839 NIR_PASS(progress, nir, nir_opt_dead_cf); 840 NIR_PASS(progress, nir, nir_opt_cse); 841 NIR_PASS(progress, nir, nir_opt_peephole_select, 8, true, true); 842 843 /* Needed for algebraic lowering */ 844 NIR_PASS(progress, nir, nir_opt_algebraic); 845 NIR_PASS(progress, nir, nir_opt_constant_folding); 846 847 NIR_PASS(progress, nir, nir_opt_undef); 848 NIR_PASS(progress, nir, nir_opt_conditional_discard); 849 if (nir->options->max_unroll_iterations) { 850 NIR_PASS(progress, nir, nir_opt_loop_unroll, 0); 851 } 852 } while (progress); 853} 854 855/** 856 * Perform "lowering" operations on the NIR that are run once when the shader 857 * selector is created. 858 */ 859void 860si_lower_nir(struct si_shader_selector* sel) 861{ 862 /* Adjust the driver location of inputs and outputs. The state tracker 863 * interprets them as slots, while the ac/nir backend interprets them 864 * as individual components. 865 */ 866 nir_foreach_variable(variable, &sel->nir->inputs) 867 variable->data.driver_location *= 4; 868 869 nir_foreach_variable(variable, &sel->nir->outputs) { 870 variable->data.driver_location *= 4; 871 872 if (sel->nir->info.stage == MESA_SHADER_FRAGMENT) { 873 if (variable->data.location == FRAG_RESULT_DEPTH) 874 variable->data.driver_location += 2; 875 else if (variable->data.location == FRAG_RESULT_STENCIL) 876 variable->data.driver_location += 1; 877 } 878 } 879 880 /* Perform lowerings (and optimizations) of code. 881 * 882 * Performance considerations aside, we must: 883 * - lower certain ALU operations 884 * - ensure constant offsets for texture instructions are folded 885 * and copy-propagated 886 */ 887 888 static const struct nir_lower_tex_options lower_tex_options = { 889 .lower_txp = ~0u, 890 }; 891 NIR_PASS_V(sel->nir, nir_lower_tex, &lower_tex_options); 892 893 const nir_lower_subgroups_options subgroups_options = { 894 .subgroup_size = 64, 895 .ballot_bit_size = 64, 896 .lower_to_scalar = true, 897 .lower_subgroup_masks = true, 898 .lower_vote_trivial = false, 899 .lower_vote_eq_to_ballot = true, 900 }; 901 NIR_PASS_V(sel->nir, nir_lower_subgroups, &subgroups_options); 902 903 ac_lower_indirect_derefs(sel->nir, sel->screen->info.chip_class); 904 905 si_nir_opts(sel->nir); 906 907 NIR_PASS_V(sel->nir, nir_lower_bool_to_int32); 908 909 /* Strip the resulting shader so that the shader cache is more likely 910 * to hit from other similar shaders. 911 */ 912 nir_strip(sel->nir); 913} 914 915static void declare_nir_input_vs(struct si_shader_context *ctx, 916 struct nir_variable *variable, 917 unsigned input_index, 918 LLVMValueRef out[4]) 919{ 920 si_llvm_load_input_vs(ctx, input_index, out); 921} 922 923static void declare_nir_input_fs(struct si_shader_context *ctx, 924 struct nir_variable *variable, 925 unsigned input_index, 926 LLVMValueRef out[4]) 927{ 928 unsigned slot = variable->data.location; 929 if (slot == VARYING_SLOT_POS) { 930 out[0] = LLVMGetParam(ctx->main_fn, SI_PARAM_POS_X_FLOAT); 931 out[1] = LLVMGetParam(ctx->main_fn, SI_PARAM_POS_Y_FLOAT); 932 out[2] = LLVMGetParam(ctx->main_fn, SI_PARAM_POS_Z_FLOAT); 933 out[3] = ac_build_fdiv(&ctx->ac, ctx->ac.f32_1, 934 LLVMGetParam(ctx->main_fn, SI_PARAM_POS_W_FLOAT)); 935 return; 936 } 937 938 si_llvm_load_input_fs(ctx, input_index, out); 939} 940 941LLVMValueRef 942si_nir_lookup_interp_param(struct ac_shader_abi *abi, 943 enum glsl_interp_mode interp, unsigned location) 944{ 945 struct si_shader_context *ctx = si_shader_context_from_abi(abi); 946 int interp_param_idx = -1; 947 948 switch (interp) { 949 case INTERP_MODE_FLAT: 950 return NULL; 951 case INTERP_MODE_SMOOTH: 952 case INTERP_MODE_NONE: 953 if (location == INTERP_CENTER) 954 interp_param_idx = SI_PARAM_PERSP_CENTER; 955 else if (location == INTERP_CENTROID) 956 interp_param_idx = SI_PARAM_PERSP_CENTROID; 957 else if (location == INTERP_SAMPLE) 958 interp_param_idx = SI_PARAM_PERSP_SAMPLE; 959 break; 960 case INTERP_MODE_NOPERSPECTIVE: 961 if (location == INTERP_CENTER) 962 interp_param_idx = SI_PARAM_LINEAR_CENTER; 963 else if (location == INTERP_CENTROID) 964 interp_param_idx = SI_PARAM_LINEAR_CENTROID; 965 else if (location == INTERP_SAMPLE) 966 interp_param_idx = SI_PARAM_LINEAR_SAMPLE; 967 break; 968 default: 969 assert(!"Unhandled interpolation mode."); 970 return NULL; 971 } 972 973 return interp_param_idx != -1 ? 974 LLVMGetParam(ctx->main_fn, interp_param_idx) : NULL; 975} 976 977static LLVMValueRef 978si_nir_load_sampler_desc(struct ac_shader_abi *abi, 979 unsigned descriptor_set, unsigned base_index, 980 unsigned constant_index, LLVMValueRef dynamic_index, 981 enum ac_descriptor_type desc_type, bool image, 982 bool write, bool bindless) 983{ 984 struct si_shader_context *ctx = si_shader_context_from_abi(abi); 985 const struct tgsi_shader_info *info = &ctx->shader->selector->info; 986 LLVMBuilderRef builder = ctx->ac.builder; 987 unsigned const_index = base_index + constant_index; 988 bool dcc_off = write; 989 990 /* TODO: images_store and images_atomic are not set */ 991 if (!dynamic_index && image && 992 (info->images_store | info->images_atomic) & (1 << const_index)) 993 dcc_off = true; 994 995 assert(!descriptor_set); 996 assert(!image || desc_type == AC_DESC_IMAGE || desc_type == AC_DESC_BUFFER); 997 998 if (bindless) { 999 LLVMValueRef list = 1000 LLVMGetParam(ctx->main_fn, ctx->param_bindless_samplers_and_images); 1001 1002 /* dynamic_index is the bindless handle */ 1003 if (image) { 1004 /* For simplicity, bindless image descriptors use fixed 1005 * 16-dword slots for now. 1006 */ 1007 dynamic_index = LLVMBuildMul(ctx->ac.builder, dynamic_index, 1008 LLVMConstInt(ctx->i32, 2, 0), ""); 1009 1010 return si_load_image_desc(ctx, list, dynamic_index, desc_type, 1011 dcc_off, true); 1012 } 1013 1014 /* Since bindless handle arithmetic can contain an unsigned integer 1015 * wraparound and si_load_sampler_desc assumes there isn't any, 1016 * use GEP without "inbounds" (inside ac_build_pointer_add) 1017 * to prevent incorrect code generation and hangs. 1018 */ 1019 dynamic_index = LLVMBuildMul(ctx->ac.builder, dynamic_index, 1020 LLVMConstInt(ctx->i32, 2, 0), ""); 1021 list = ac_build_pointer_add(&ctx->ac, list, dynamic_index); 1022 return si_load_sampler_desc(ctx, list, ctx->i32_0, desc_type); 1023 } 1024 1025 unsigned num_slots = image ? ctx->num_images : ctx->num_samplers; 1026 assert(const_index < num_slots); 1027 1028 LLVMValueRef list = LLVMGetParam(ctx->main_fn, ctx->param_samplers_and_images); 1029 LLVMValueRef index = LLVMConstInt(ctx->ac.i32, const_index, false); 1030 1031 if (dynamic_index) { 1032 index = LLVMBuildAdd(builder, index, dynamic_index, ""); 1033 1034 /* From the GL_ARB_shader_image_load_store extension spec: 1035 * 1036 * If a shader performs an image load, store, or atomic 1037 * operation using an image variable declared as an array, 1038 * and if the index used to select an individual element is 1039 * negative or greater than or equal to the size of the 1040 * array, the results of the operation are undefined but may 1041 * not lead to termination. 1042 */ 1043 index = si_llvm_bound_index(ctx, index, num_slots); 1044 } 1045 1046 if (image) { 1047 index = LLVMBuildSub(ctx->ac.builder, 1048 LLVMConstInt(ctx->i32, SI_NUM_IMAGES - 1, 0), 1049 index, ""); 1050 return si_load_image_desc(ctx, list, index, desc_type, dcc_off, false); 1051 } 1052 1053 index = LLVMBuildAdd(ctx->ac.builder, index, 1054 LLVMConstInt(ctx->i32, SI_NUM_IMAGES / 2, 0), ""); 1055 return si_load_sampler_desc(ctx, list, index, desc_type); 1056} 1057 1058static void bitcast_inputs(struct si_shader_context *ctx, 1059 LLVMValueRef data[4], 1060 unsigned input_idx) 1061{ 1062 for (unsigned chan = 0; chan < 4; chan++) { 1063 ctx->inputs[input_idx + chan] = 1064 LLVMBuildBitCast(ctx->ac.builder, data[chan], ctx->ac.i32, ""); 1065 } 1066} 1067 1068bool si_nir_build_llvm(struct si_shader_context *ctx, struct nir_shader *nir) 1069{ 1070 struct tgsi_shader_info *info = &ctx->shader->selector->info; 1071 1072 if (nir->info.stage == MESA_SHADER_VERTEX || 1073 nir->info.stage == MESA_SHADER_FRAGMENT) { 1074 uint64_t processed_inputs = 0; 1075 nir_foreach_variable(variable, &nir->inputs) { 1076 unsigned attrib_count = glsl_count_attribute_slots(variable->type, 1077 nir->info.stage == MESA_SHADER_VERTEX); 1078 unsigned input_idx = variable->data.driver_location; 1079 1080 LLVMValueRef data[4]; 1081 unsigned loc = variable->data.location; 1082 1083 if (loc >= VARYING_SLOT_VAR0 && nir->info.stage == MESA_SHADER_FRAGMENT) 1084 ctx->abi.fs_input_attr_indices[loc - VARYING_SLOT_VAR0] = input_idx / 4; 1085 1086 for (unsigned i = 0; i < attrib_count; i++) { 1087 /* Packed components share the same location so skip 1088 * them if we have already processed the location. 1089 */ 1090 if (processed_inputs & ((uint64_t)1 << (loc + i))) { 1091 input_idx += 4; 1092 continue; 1093 } 1094 1095 if (nir->info.stage == MESA_SHADER_VERTEX) { 1096 declare_nir_input_vs(ctx, variable, input_idx / 4, data); 1097 bitcast_inputs(ctx, data, input_idx); 1098 if (glsl_type_is_dual_slot(variable->type)) { 1099 input_idx += 4; 1100 declare_nir_input_vs(ctx, variable, input_idx / 4, data); 1101 bitcast_inputs(ctx, data, input_idx); 1102 } 1103 } else if (nir->info.stage == MESA_SHADER_FRAGMENT) { 1104 declare_nir_input_fs(ctx, variable, input_idx / 4, data); 1105 bitcast_inputs(ctx, data, input_idx); 1106 } 1107 1108 processed_inputs |= ((uint64_t)1 << (loc + i)); 1109 input_idx += 4; 1110 } 1111 } 1112 } 1113 1114 ctx->abi.inputs = &ctx->inputs[0]; 1115 ctx->abi.load_sampler_desc = si_nir_load_sampler_desc; 1116 ctx->abi.clamp_shadow_reference = true; 1117 1118 ctx->num_samplers = util_last_bit(info->samplers_declared); 1119 ctx->num_images = util_last_bit(info->images_declared); 1120 1121 if (ctx->shader->selector->info.properties[TGSI_PROPERTY_CS_LOCAL_SIZE]) { 1122 assert(gl_shader_stage_is_compute(nir->info.stage)); 1123 si_declare_compute_memory(ctx); 1124 } 1125 ac_nir_translate(&ctx->ac, &ctx->abi, nir); 1126 1127 return true; 1128} 1129