1/* 2 * Copyright © 2015 Red Hat 3 * 4 * Permission is hereby granted, free of charge, to any person obtaining a 5 * copy of this software and associated documentation files (the "Software"), 6 * to deal in the Software without restriction, including without limitation 7 * the rights to use, copy, modify, merge, publish, distribute, sublicense, 8 * and/or sell copies of the Software, and to permit persons to whom the 9 * Software is furnished to do so, subject to the following conditions: 10 * 11 * The above copyright notice and this permission notice (including the next 12 * paragraph) shall be included in all copies or substantial portions of the 13 * Software. 14 * 15 * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR 16 * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, 17 * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL 18 * THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER 19 * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, 20 * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE 21 * SOFTWARE. 22 */ 23 24#include "st_nir.h" 25 26#include "pipe/p_defines.h" 27#include "pipe/p_screen.h" 28#include "pipe/p_context.h" 29 30#include "program/program.h" 31#include "program/prog_statevars.h" 32#include "program/prog_parameter.h" 33#include "program/ir_to_mesa.h" 34#include "main/mtypes.h" 35#include "main/errors.h" 36#include "main/shaderapi.h" 37#include "main/uniforms.h" 38 39#include "main/shaderobj.h" 40#include "st_context.h" 41#include "st_glsl_types.h" 42#include "st_program.h" 43 44#include "compiler/nir/nir.h" 45#include "compiler/glsl_types.h" 46#include "compiler/glsl/glsl_to_nir.h" 47#include "compiler/glsl/gl_nir.h" 48#include "compiler/glsl/ir.h" 49#include "compiler/glsl/ir_optimization.h" 50#include "compiler/glsl/string_to_uint_map.h" 51 52static int 53type_size(const struct glsl_type *type) 54{ 55 return type->count_attribute_slots(false); 56} 57 58/* Depending on PIPE_CAP_TGSI_TEXCOORD (st->needs_texcoord_semantic) we 59 * may need to fix up varying slots so the glsl->nir path is aligned 60 * with the anything->tgsi->nir path. 61 */ 62static void 63st_nir_fixup_varying_slots(struct st_context *st, struct exec_list *var_list) 64{ 65 if (st->needs_texcoord_semantic) 66 return; 67 68 nir_foreach_variable(var, var_list) { 69 if (var->data.location >= VARYING_SLOT_VAR0) { 70 var->data.location += 9; 71 } else if ((var->data.location >= VARYING_SLOT_TEX0) && 72 (var->data.location <= VARYING_SLOT_TEX7)) { 73 var->data.location += VARYING_SLOT_VAR0 - VARYING_SLOT_TEX0; 74 } 75 } 76} 77 78/* input location assignment for VS inputs must be handled specially, so 79 * that it is aligned w/ st's vbo state. 80 * (This isn't the case with, for ex, FS inputs, which only need to agree 81 * on varying-slot w/ the VS outputs) 82 */ 83static void 84st_nir_assign_vs_in_locations(nir_shader *nir) 85{ 86 nir->num_inputs = 0; 87 nir_foreach_variable_safe(var, &nir->inputs) { 88 /* NIR already assigns dual-slot inputs to two locations so all we have 89 * to do is compact everything down. 90 */ 91 if (var->data.location == VERT_ATTRIB_EDGEFLAG) { 92 /* bit of a hack, mirroring st_translate_vertex_program */ 93 var->data.driver_location = util_bitcount64(nir->info.inputs_read); 94 } else if (nir->info.inputs_read & BITFIELD64_BIT(var->data.location)) { 95 var->data.driver_location = 96 util_bitcount64(nir->info.inputs_read & 97 BITFIELD64_MASK(var->data.location)); 98 nir->num_inputs++; 99 } else { 100 /* Move unused input variables to the globals list (with no 101 * initialization), to avoid confusing drivers looking through the 102 * inputs array and expecting to find inputs with a driver_location 103 * set. 104 */ 105 exec_node_remove(&var->node); 106 var->data.mode = nir_var_shader_temp; 107 exec_list_push_tail(&nir->globals, &var->node); 108 } 109 } 110} 111 112static void 113st_nir_assign_var_locations(struct exec_list *var_list, unsigned *size, 114 gl_shader_stage stage) 115{ 116 unsigned location = 0; 117 unsigned assigned_locations[VARYING_SLOT_TESS_MAX]; 118 uint64_t processed_locs[2] = {0}; 119 120 const int base = stage == MESA_SHADER_FRAGMENT ? 121 (int) FRAG_RESULT_DATA0 : (int) VARYING_SLOT_VAR0; 122 123 int UNUSED last_loc = 0; 124 nir_foreach_variable(var, var_list) { 125 126 const struct glsl_type *type = var->type; 127 if (nir_is_per_vertex_io(var, stage)) { 128 assert(glsl_type_is_array(type)); 129 type = glsl_get_array_element(type); 130 } 131 132 unsigned var_size = type_size(type); 133 134 /* Builtins don't allow component packing so we only need to worry about 135 * user defined varyings sharing the same location. 136 */ 137 bool processed = false; 138 if (var->data.location >= base) { 139 unsigned glsl_location = var->data.location - base; 140 141 for (unsigned i = 0; i < var_size; i++) { 142 if (processed_locs[var->data.index] & 143 ((uint64_t)1 << (glsl_location + i))) 144 processed = true; 145 else 146 processed_locs[var->data.index] |= 147 ((uint64_t)1 << (glsl_location + i)); 148 } 149 } 150 151 /* Because component packing allows varyings to share the same location 152 * we may have already have processed this location. 153 */ 154 if (processed) { 155 unsigned driver_location = assigned_locations[var->data.location]; 156 var->data.driver_location = driver_location; 157 *size += type_size(type); 158 159 /* An array may be packed such that is crosses multiple other arrays 160 * or variables, we need to make sure we have allocated the elements 161 * consecutively if the previously proccessed var was shorter than 162 * the current array we are processing. 163 * 164 * NOTE: The code below assumes the var list is ordered in ascending 165 * location order. 166 */ 167 assert(last_loc <= var->data.location); 168 last_loc = var->data.location; 169 unsigned last_slot_location = driver_location + var_size; 170 if (last_slot_location > location) { 171 unsigned num_unallocated_slots = last_slot_location - location; 172 unsigned first_unallocated_slot = var_size - num_unallocated_slots; 173 for (unsigned i = first_unallocated_slot; i < num_unallocated_slots; i++) { 174 assigned_locations[var->data.location + i] = location; 175 location++; 176 } 177 } 178 continue; 179 } 180 181 for (unsigned i = 0; i < var_size; i++) { 182 assigned_locations[var->data.location + i] = location + i; 183 } 184 185 var->data.driver_location = location; 186 location += var_size; 187 } 188 189 *size += location; 190} 191 192static int 193st_nir_lookup_parameter_index(const struct gl_program_parameter_list *params, 194 const char *name) 195{ 196 int loc = _mesa_lookup_parameter_index(params, name); 197 198 /* is there a better way to do this? If we have something like: 199 * 200 * struct S { 201 * float f; 202 * vec4 v; 203 * }; 204 * uniform S color; 205 * 206 * Then what we get in prog->Parameters looks like: 207 * 208 * 0: Name=color.f, Type=6, DataType=1406, Size=1 209 * 1: Name=color.v, Type=6, DataType=8b52, Size=4 210 * 211 * So the name doesn't match up and _mesa_lookup_parameter_index() 212 * fails. In this case just find the first matching "color.*".. 213 * 214 * Note for arrays you could end up w/ color[n].f, for example. 215 * 216 * glsl_to_tgsi works slightly differently in this regard. It is 217 * emitting something more low level, so it just translates the 218 * params list 1:1 to CONST[] regs. Going from GLSL IR to TGSI, 219 * it just calculates the additional offset of struct field members 220 * in glsl_to_tgsi_visitor::visit(ir_dereference_record *ir) or 221 * glsl_to_tgsi_visitor::visit(ir_dereference_array *ir). It never 222 * needs to work backwards to get base var loc from the param-list 223 * which already has them separated out. 224 */ 225 if (loc < 0) { 226 int namelen = strlen(name); 227 for (unsigned i = 0; i < params->NumParameters; i++) { 228 struct gl_program_parameter *p = ¶ms->Parameters[i]; 229 if ((strncmp(p->Name, name, namelen) == 0) && 230 ((p->Name[namelen] == '.') || (p->Name[namelen] == '['))) { 231 loc = i; 232 break; 233 } 234 } 235 } 236 237 return loc; 238} 239 240static void 241st_nir_assign_uniform_locations(struct gl_context *ctx, 242 struct gl_program *prog, 243 struct exec_list *uniform_list) 244{ 245 int shaderidx = 0; 246 int imageidx = 0; 247 248 nir_foreach_variable(uniform, uniform_list) { 249 int loc; 250 251 /* 252 * UBO's have their own address spaces, so don't count them towards the 253 * number of global uniforms 254 */ 255 if (uniform->data.mode == nir_var_mem_ubo || uniform->data.mode == nir_var_mem_ssbo) 256 continue; 257 258 const struct glsl_type *type = glsl_without_array(uniform->type); 259 if (!uniform->data.bindless && (type->is_sampler() || type->is_image())) { 260 if (type->is_sampler()) { 261 loc = shaderidx; 262 shaderidx += type_size(uniform->type); 263 } else { 264 loc = imageidx; 265 imageidx += type_size(uniform->type); 266 } 267 } else if (strncmp(uniform->name, "gl_", 3) == 0) { 268 const gl_state_index16 *const stateTokens = uniform->state_slots[0].tokens; 269 /* This state reference has already been setup by ir_to_mesa, but we'll 270 * get the same index back here. 271 */ 272 273 unsigned comps; 274 if (glsl_type_is_struct_or_ifc(type)) { 275 comps = 4; 276 } else { 277 comps = glsl_get_vector_elements(type); 278 } 279 280 if (ctx->Const.PackedDriverUniformStorage) { 281 loc = _mesa_add_sized_state_reference(prog->Parameters, 282 stateTokens, comps, false); 283 loc = prog->Parameters->ParameterValueOffset[loc]; 284 } else { 285 loc = _mesa_add_state_reference(prog->Parameters, stateTokens); 286 } 287 } else { 288 loc = st_nir_lookup_parameter_index(prog->Parameters, uniform->name); 289 290 /* We need to check that loc is not -1 here before accessing the 291 * array. It can be negative for example when we have a struct that 292 * only contains opaque types. 293 */ 294 if (loc >= 0 && ctx->Const.PackedDriverUniformStorage) { 295 loc = prog->Parameters->ParameterValueOffset[loc]; 296 } 297 } 298 299 uniform->data.driver_location = loc; 300 } 301} 302 303void 304st_nir_opts(nir_shader *nir, bool scalar) 305{ 306 bool progress; 307 do { 308 progress = false; 309 310 NIR_PASS_V(nir, nir_lower_vars_to_ssa); 311 312 if (scalar) { 313 NIR_PASS_V(nir, nir_lower_alu_to_scalar); 314 NIR_PASS_V(nir, nir_lower_phis_to_scalar); 315 } 316 317 NIR_PASS_V(nir, nir_lower_alu); 318 NIR_PASS_V(nir, nir_lower_pack); 319 NIR_PASS(progress, nir, nir_copy_prop); 320 NIR_PASS(progress, nir, nir_opt_remove_phis); 321 NIR_PASS(progress, nir, nir_opt_dce); 322 if (nir_opt_trivial_continues(nir)) { 323 progress = true; 324 NIR_PASS(progress, nir, nir_copy_prop); 325 NIR_PASS(progress, nir, nir_opt_dce); 326 } 327 NIR_PASS(progress, nir, nir_opt_if, false); 328 NIR_PASS(progress, nir, nir_opt_dead_cf); 329 NIR_PASS(progress, nir, nir_opt_cse); 330 NIR_PASS(progress, nir, nir_opt_peephole_select, 8, true, true); 331 332 NIR_PASS(progress, nir, nir_opt_algebraic); 333 NIR_PASS(progress, nir, nir_opt_constant_folding); 334 335 NIR_PASS(progress, nir, nir_opt_undef); 336 NIR_PASS(progress, nir, nir_opt_conditional_discard); 337 if (nir->options->max_unroll_iterations) { 338 NIR_PASS(progress, nir, nir_opt_loop_unroll, (nir_variable_mode)0); 339 } 340 } while (progress); 341} 342 343/* First third of converting glsl_to_nir.. this leaves things in a pre- 344 * nir_lower_io state, so that shader variants can more easily insert/ 345 * replace variables, etc. 346 */ 347static nir_shader * 348st_glsl_to_nir(struct st_context *st, struct gl_program *prog, 349 struct gl_shader_program *shader_program, 350 gl_shader_stage stage) 351{ 352 const nir_shader_compiler_options *options = 353 st->ctx->Const.ShaderCompilerOptions[prog->info.stage].NirOptions; 354 enum pipe_shader_type type = pipe_shader_type_from_mesa(stage); 355 struct pipe_screen *screen = st->pipe->screen; 356 bool is_scalar = screen->get_shader_param(screen, type, PIPE_SHADER_CAP_SCALAR_ISA); 357 assert(options); 358 bool lower_64bit = 359 options->lower_int64_options || options->lower_doubles_options; 360 361 if (prog->nir) 362 return prog->nir; 363 364 nir_shader *nir = glsl_to_nir(st->ctx, shader_program, stage, options); 365 366 /* Set the next shader stage hint for VS and TES. */ 367 if (!nir->info.separate_shader && 368 (nir->info.stage == MESA_SHADER_VERTEX || 369 nir->info.stage == MESA_SHADER_TESS_EVAL)) { 370 371 unsigned prev_stages = (1 << (prog->info.stage + 1)) - 1; 372 unsigned stages_mask = 373 ~prev_stages & shader_program->data->linked_stages; 374 375 nir->info.next_stage = stages_mask ? 376 (gl_shader_stage) u_bit_scan(&stages_mask) : MESA_SHADER_FRAGMENT; 377 } else { 378 nir->info.next_stage = MESA_SHADER_FRAGMENT; 379 } 380 381 nir_shader_gather_info(nir, nir_shader_get_entrypoint(nir)); 382 nir_shader *softfp64 = NULL; 383 if (nir->info.uses_64bit && 384 (options->lower_doubles_options & nir_lower_fp64_full_software) != 0) { 385 softfp64 = glsl_float64_funcs_to_nir(st->ctx, options); 386 ralloc_steal(ralloc_parent(nir), softfp64); 387 } 388 389 nir_variable_mode mask = 390 (nir_variable_mode) (nir_var_shader_in | nir_var_shader_out); 391 nir_remove_dead_variables(nir, mask); 392 393 if (options->lower_all_io_to_temps || 394 nir->info.stage == MESA_SHADER_VERTEX || 395 nir->info.stage == MESA_SHADER_GEOMETRY) { 396 NIR_PASS_V(nir, nir_lower_io_to_temporaries, 397 nir_shader_get_entrypoint(nir), 398 true, true); 399 } else if (nir->info.stage == MESA_SHADER_FRAGMENT) { 400 NIR_PASS_V(nir, nir_lower_io_to_temporaries, 401 nir_shader_get_entrypoint(nir), 402 true, false); 403 } 404 405 NIR_PASS_V(nir, nir_lower_global_vars_to_local); 406 NIR_PASS_V(nir, nir_split_var_copies); 407 NIR_PASS_V(nir, nir_lower_var_copies); 408 409 if (is_scalar) { 410 NIR_PASS_V(nir, nir_lower_alu_to_scalar); 411 } 412 413 /* before buffers and vars_to_ssa */ 414 NIR_PASS_V(nir, gl_nir_lower_bindless_images); 415 st_nir_opts(nir, is_scalar); 416 417 NIR_PASS_V(nir, gl_nir_lower_buffers, shader_program); 418 /* Do a round of constant folding to clean up address calculations */ 419 NIR_PASS_V(nir, nir_opt_constant_folding); 420 421 if (lower_64bit) { 422 bool lowered_64bit_ops = false; 423 bool progress = false; 424 425 NIR_PASS_V(nir, nir_opt_algebraic); 426 427 do { 428 progress = false; 429 if (options->lower_int64_options) { 430 NIR_PASS(progress, nir, nir_lower_int64, 431 options->lower_int64_options); 432 } 433 if (options->lower_doubles_options) { 434 NIR_PASS(progress, nir, nir_lower_doubles, 435 softfp64, options->lower_doubles_options); 436 } 437 NIR_PASS(progress, nir, nir_opt_algebraic); 438 lowered_64bit_ops |= progress; 439 } while (progress); 440 441 if (lowered_64bit_ops) 442 st_nir_opts(nir, is_scalar); 443 } 444 445 return nir; 446} 447 448/* Second third of converting glsl_to_nir. This creates uniforms, gathers 449 * info on varyings, etc after NIR link time opts have been applied. 450 */ 451static void 452st_glsl_to_nir_post_opts(struct st_context *st, struct gl_program *prog, 453 struct gl_shader_program *shader_program) 454{ 455 nir_shader *nir = prog->nir; 456 457 /* Make a pass over the IR to add state references for any built-in 458 * uniforms that are used. This has to be done now (during linking). 459 * Code generation doesn't happen until the first time this shader is 460 * used for rendering. Waiting until then to generate the parameters is 461 * too late. At that point, the values for the built-in uniforms won't 462 * get sent to the shader. 463 */ 464 nir_foreach_variable(var, &nir->uniforms) { 465 if (strncmp(var->name, "gl_", 3) == 0) { 466 const nir_state_slot *const slots = var->state_slots; 467 assert(var->state_slots != NULL); 468 469 const struct glsl_type *type = glsl_without_array(var->type); 470 for (unsigned int i = 0; i < var->num_state_slots; i++) { 471 unsigned comps; 472 if (glsl_type_is_struct_or_ifc(type)) { 473 /* Builtin struct require specical handling for now we just 474 * make all members vec4. See st_nir_lower_builtin. 475 */ 476 comps = 4; 477 } else { 478 comps = glsl_get_vector_elements(type); 479 } 480 481 if (st->ctx->Const.PackedDriverUniformStorage) { 482 _mesa_add_sized_state_reference(prog->Parameters, 483 slots[i].tokens, 484 comps, false); 485 } else { 486 _mesa_add_state_reference(prog->Parameters, 487 slots[i].tokens); 488 } 489 } 490 } 491 } 492 493 /* Avoid reallocation of the program parameter list, because the uniform 494 * storage is only associated with the original parameter list. 495 * This should be enough for Bitmap and DrawPixels constants. 496 */ 497 _mesa_reserve_parameter_storage(prog->Parameters, 8); 498 499 /* This has to be done last. Any operation the can cause 500 * prog->ParameterValues to get reallocated (e.g., anything that adds a 501 * program constant) has to happen before creating this linkage. 502 */ 503 _mesa_associate_uniform_storage(st->ctx, shader_program, prog); 504 505 st_set_prog_affected_state_flags(prog); 506 507 NIR_PASS_V(nir, st_nir_lower_builtin); 508 NIR_PASS_V(nir, gl_nir_lower_atomics, shader_program, true); 509 NIR_PASS_V(nir, nir_opt_intrinsics); 510 511 nir_variable_mode mask = nir_var_function_temp; 512 nir_remove_dead_variables(nir, mask); 513 514 if (st->ctx->_Shader->Flags & GLSL_DUMP) { 515 _mesa_log("\n"); 516 _mesa_log("NIR IR for linked %s program %d:\n", 517 _mesa_shader_stage_to_string(prog->info.stage), 518 shader_program->Name); 519 nir_print_shader(nir, _mesa_get_log_file()); 520 _mesa_log("\n\n"); 521 } 522} 523 524/* TODO any better helper somewhere to sort a list? */ 525 526static void 527insert_sorted(struct exec_list *var_list, nir_variable *new_var) 528{ 529 nir_foreach_variable(var, var_list) { 530 if (var->data.location > new_var->data.location) { 531 exec_node_insert_node_before(&var->node, &new_var->node); 532 return; 533 } 534 } 535 exec_list_push_tail(var_list, &new_var->node); 536} 537 538static void 539sort_varyings(struct exec_list *var_list) 540{ 541 struct exec_list new_list; 542 exec_list_make_empty(&new_list); 543 nir_foreach_variable_safe(var, var_list) { 544 exec_node_remove(&var->node); 545 insert_sorted(&new_list, var); 546 } 547 exec_list_move_nodes_to(&new_list, var_list); 548} 549 550static void 551set_st_program(struct gl_program *prog, 552 struct gl_shader_program *shader_program, 553 nir_shader *nir) 554{ 555 struct st_vertex_program *stvp; 556 struct st_common_program *stp; 557 struct st_fragment_program *stfp; 558 struct st_compute_program *stcp; 559 560 switch (prog->info.stage) { 561 case MESA_SHADER_VERTEX: 562 stvp = (struct st_vertex_program *)prog; 563 stvp->shader_program = shader_program; 564 stvp->tgsi.type = PIPE_SHADER_IR_NIR; 565 stvp->tgsi.ir.nir = nir; 566 break; 567 case MESA_SHADER_GEOMETRY: 568 case MESA_SHADER_TESS_CTRL: 569 case MESA_SHADER_TESS_EVAL: 570 stp = (struct st_common_program *)prog; 571 stp->shader_program = shader_program; 572 stp->tgsi.type = PIPE_SHADER_IR_NIR; 573 stp->tgsi.ir.nir = nir; 574 break; 575 case MESA_SHADER_FRAGMENT: 576 stfp = (struct st_fragment_program *)prog; 577 stfp->shader_program = shader_program; 578 stfp->tgsi.type = PIPE_SHADER_IR_NIR; 579 stfp->tgsi.ir.nir = nir; 580 break; 581 case MESA_SHADER_COMPUTE: 582 stcp = (struct st_compute_program *)prog; 583 stcp->shader_program = shader_program; 584 stcp->tgsi.ir_type = PIPE_SHADER_IR_NIR; 585 stcp->tgsi.prog = nir; 586 break; 587 default: 588 unreachable("unknown shader stage"); 589 } 590} 591 592static void 593st_nir_get_mesa_program(struct gl_context *ctx, 594 struct gl_shader_program *shader_program, 595 struct gl_linked_shader *shader) 596{ 597 struct st_context *st = st_context(ctx); 598 struct pipe_screen *pscreen = ctx->st->pipe->screen; 599 struct gl_program *prog; 600 601 validate_ir_tree(shader->ir); 602 603 prog = shader->Program; 604 605 prog->Parameters = _mesa_new_parameter_list(); 606 607 _mesa_copy_linked_program_data(shader_program, shader); 608 _mesa_generate_parameters_list_for_uniforms(ctx, shader_program, shader, 609 prog->Parameters); 610 611 /* Remove reads from output registers. */ 612 if (!pscreen->get_param(pscreen, PIPE_CAP_TGSI_CAN_READ_OUTPUTS)) 613 lower_output_reads(shader->Stage, shader->ir); 614 615 if (ctx->_Shader->Flags & GLSL_DUMP) { 616 _mesa_log("\n"); 617 _mesa_log("GLSL IR for linked %s program %d:\n", 618 _mesa_shader_stage_to_string(shader->Stage), 619 shader_program->Name); 620 _mesa_print_ir(_mesa_get_log_file(), shader->ir, NULL); 621 _mesa_log("\n\n"); 622 } 623 624 prog->ExternalSamplersUsed = gl_external_samplers(prog); 625 _mesa_update_shader_textures_used(shader_program, prog); 626 627 nir_shader *nir = st_glsl_to_nir(st, prog, shader_program, shader->Stage); 628 629 set_st_program(prog, shader_program, nir); 630 prog->nir = nir; 631} 632 633static void 634st_nir_link_shaders(nir_shader **producer, nir_shader **consumer, bool scalar) 635{ 636 if (scalar) { 637 NIR_PASS_V(*producer, nir_lower_io_to_scalar_early, nir_var_shader_out); 638 NIR_PASS_V(*consumer, nir_lower_io_to_scalar_early, nir_var_shader_in); 639 } 640 641 nir_lower_io_arrays_to_elements(*producer, *consumer); 642 643 st_nir_opts(*producer, scalar); 644 st_nir_opts(*consumer, scalar); 645 646 if (nir_link_opt_varyings(*producer, *consumer)) 647 st_nir_opts(*consumer, scalar); 648 649 NIR_PASS_V(*producer, nir_remove_dead_variables, nir_var_shader_out); 650 NIR_PASS_V(*consumer, nir_remove_dead_variables, nir_var_shader_in); 651 652 if (nir_remove_unused_varyings(*producer, *consumer)) { 653 NIR_PASS_V(*producer, nir_lower_global_vars_to_local); 654 NIR_PASS_V(*consumer, nir_lower_global_vars_to_local); 655 656 /* The backend might not be able to handle indirects on 657 * temporaries so we need to lower indirects on any of the 658 * varyings we have demoted here. 659 * 660 * TODO: radeonsi shouldn't need to do this, however LLVM isn't 661 * currently smart enough to handle indirects without causing excess 662 * spilling causing the gpu to hang. 663 * 664 * See the following thread for more details of the problem: 665 * https://lists.freedesktop.org/archives/mesa-dev/2017-July/162106.html 666 */ 667 nir_variable_mode indirect_mask = nir_var_function_temp; 668 669 NIR_PASS_V(*producer, nir_lower_indirect_derefs, indirect_mask); 670 NIR_PASS_V(*consumer, nir_lower_indirect_derefs, indirect_mask); 671 672 st_nir_opts(*producer, scalar); 673 st_nir_opts(*consumer, scalar); 674 675 /* Lowering indirects can cause varying to become unused. 676 * nir_compact_varyings() depends on all dead varyings being removed so 677 * we need to call nir_remove_dead_variables() again here. 678 */ 679 NIR_PASS_V(*producer, nir_remove_dead_variables, nir_var_shader_out); 680 NIR_PASS_V(*consumer, nir_remove_dead_variables, nir_var_shader_in); 681 } 682} 683 684static void 685st_lower_patch_vertices_in(struct gl_shader_program *shader_prog) 686{ 687 struct gl_linked_shader *linked_tcs = 688 shader_prog->_LinkedShaders[MESA_SHADER_TESS_CTRL]; 689 struct gl_linked_shader *linked_tes = 690 shader_prog->_LinkedShaders[MESA_SHADER_TESS_EVAL]; 691 692 /* If we have a TCS and TES linked together, lower TES patch vertices. */ 693 if (linked_tcs && linked_tes) { 694 nir_shader *tcs_nir = linked_tcs->Program->nir; 695 nir_shader *tes_nir = linked_tes->Program->nir; 696 697 /* The TES input vertex count is the TCS output vertex count, 698 * lower TES gl_PatchVerticesIn to a constant. 699 */ 700 uint32_t tes_patch_verts = tcs_nir->info.tess.tcs_vertices_out; 701 NIR_PASS_V(tes_nir, nir_lower_patch_vertices, tes_patch_verts, NULL); 702 } 703} 704 705extern "C" { 706 707void 708st_nir_lower_wpos_ytransform(struct nir_shader *nir, 709 struct gl_program *prog, 710 struct pipe_screen *pscreen) 711{ 712 if (nir->info.stage != MESA_SHADER_FRAGMENT) 713 return; 714 715 static const gl_state_index16 wposTransformState[STATE_LENGTH] = { 716 STATE_INTERNAL, STATE_FB_WPOS_Y_TRANSFORM 717 }; 718 nir_lower_wpos_ytransform_options wpos_options = { { 0 } }; 719 720 memcpy(wpos_options.state_tokens, wposTransformState, 721 sizeof(wpos_options.state_tokens)); 722 wpos_options.fs_coord_origin_upper_left = 723 pscreen->get_param(pscreen, 724 PIPE_CAP_TGSI_FS_COORD_ORIGIN_UPPER_LEFT); 725 wpos_options.fs_coord_origin_lower_left = 726 pscreen->get_param(pscreen, 727 PIPE_CAP_TGSI_FS_COORD_ORIGIN_LOWER_LEFT); 728 wpos_options.fs_coord_pixel_center_integer = 729 pscreen->get_param(pscreen, 730 PIPE_CAP_TGSI_FS_COORD_PIXEL_CENTER_INTEGER); 731 wpos_options.fs_coord_pixel_center_half_integer = 732 pscreen->get_param(pscreen, 733 PIPE_CAP_TGSI_FS_COORD_PIXEL_CENTER_HALF_INTEGER); 734 735 if (nir_lower_wpos_ytransform(nir, &wpos_options)) { 736 nir_validate_shader(nir, "after nir_lower_wpos_ytransform"); 737 _mesa_add_state_reference(prog->Parameters, wposTransformState); 738 } 739} 740 741bool 742st_link_nir(struct gl_context *ctx, 743 struct gl_shader_program *shader_program) 744{ 745 struct st_context *st = st_context(ctx); 746 struct pipe_screen *screen = st->pipe->screen; 747 bool is_scalar[MESA_SHADER_STAGES]; 748 749 unsigned last_stage = 0; 750 for (unsigned i = 0; i < MESA_SHADER_STAGES; i++) { 751 struct gl_linked_shader *shader = shader_program->_LinkedShaders[i]; 752 if (shader == NULL) 753 continue; 754 755 /* Determine scalar property of each shader stage */ 756 enum pipe_shader_type type = pipe_shader_type_from_mesa(shader->Stage); 757 is_scalar[i] = screen->get_shader_param(screen, type, 758 PIPE_SHADER_CAP_SCALAR_ISA); 759 760 st_nir_get_mesa_program(ctx, shader_program, shader); 761 last_stage = i; 762 763 if (is_scalar[i]) { 764 NIR_PASS_V(shader->Program->nir, nir_lower_load_const_to_scalar); 765 } 766 } 767 768 /* Linking the stages in the opposite order (from fragment to vertex) 769 * ensures that inter-shader outputs written to in an earlier stage 770 * are eliminated if they are (transitively) not used in a later 771 * stage. 772 */ 773 int next = last_stage; 774 for (int i = next - 1; i >= 0; i--) { 775 struct gl_linked_shader *shader = shader_program->_LinkedShaders[i]; 776 if (shader == NULL) 777 continue; 778 779 st_nir_link_shaders(&shader->Program->nir, 780 &shader_program->_LinkedShaders[next]->Program->nir, 781 is_scalar[i]); 782 next = i; 783 } 784 785 int prev = -1; 786 for (unsigned i = 0; i < MESA_SHADER_STAGES; i++) { 787 struct gl_linked_shader *shader = shader_program->_LinkedShaders[i]; 788 if (shader == NULL) 789 continue; 790 791 nir_shader *nir = shader->Program->nir; 792 793 NIR_PASS_V(nir, st_nir_lower_wpos_ytransform, shader->Program, 794 st->pipe->screen); 795 796 NIR_PASS_V(nir, nir_lower_system_values); 797 NIR_PASS_V(nir, nir_lower_clip_cull_distance_arrays); 798 799 nir_shader_gather_info(nir, nir_shader_get_entrypoint(nir)); 800 shader->Program->info = nir->info; 801 if (i == MESA_SHADER_VERTEX) { 802 /* NIR expands dual-slot inputs out to two locations. We need to 803 * compact things back down GL-style single-slot inputs to avoid 804 * confusing the state tracker. 805 */ 806 shader->Program->info.inputs_read = 807 nir_get_single_slot_attribs_mask(nir->info.inputs_read, 808 shader->Program->DualSlotInputs); 809 } 810 811 if (prev != -1) { 812 struct gl_program *prev_shader = 813 shader_program->_LinkedShaders[prev]->Program; 814 815 /* We can't use nir_compact_varyings with transform feedback, since 816 * the pipe_stream_output->output_register field is based on the 817 * pre-compacted driver_locations. 818 */ 819 if (!(prev_shader->sh.LinkedTransformFeedback && 820 prev_shader->sh.LinkedTransformFeedback->NumVarying > 0)) 821 nir_compact_varyings(shader_program->_LinkedShaders[prev]->Program->nir, 822 nir, ctx->API != API_OPENGL_COMPAT); 823 } 824 prev = i; 825 } 826 827 st_lower_patch_vertices_in(shader_program); 828 829 for (unsigned i = 0; i < MESA_SHADER_STAGES; i++) { 830 struct gl_linked_shader *shader = shader_program->_LinkedShaders[i]; 831 if (shader == NULL) 832 continue; 833 834 st_glsl_to_nir_post_opts(st, shader->Program, shader_program); 835 836 assert(shader->Program); 837 if (!ctx->Driver.ProgramStringNotify(ctx, 838 _mesa_shader_stage_to_program(i), 839 shader->Program)) { 840 _mesa_reference_program(ctx, &shader->Program, NULL); 841 return false; 842 } 843 844 nir_sweep(shader->Program->nir); 845 846 /* The GLSL IR won't be needed anymore. */ 847 ralloc_free(shader->ir); 848 shader->ir = NULL; 849 } 850 851 return true; 852} 853 854void 855st_nir_assign_varying_locations(struct st_context *st, nir_shader *nir) 856{ 857 if (nir->info.stage == MESA_SHADER_VERTEX) { 858 /* Needs special handling so drvloc matches the vbo state: */ 859 st_nir_assign_vs_in_locations(nir); 860 /* Re-lower global vars, to deal with any dead VS inputs. */ 861 NIR_PASS_V(nir, nir_lower_global_vars_to_local); 862 863 sort_varyings(&nir->outputs); 864 st_nir_assign_var_locations(&nir->outputs, 865 &nir->num_outputs, 866 nir->info.stage); 867 st_nir_fixup_varying_slots(st, &nir->outputs); 868 } else if (nir->info.stage == MESA_SHADER_GEOMETRY || 869 nir->info.stage == MESA_SHADER_TESS_CTRL || 870 nir->info.stage == MESA_SHADER_TESS_EVAL) { 871 sort_varyings(&nir->inputs); 872 st_nir_assign_var_locations(&nir->inputs, 873 &nir->num_inputs, 874 nir->info.stage); 875 st_nir_fixup_varying_slots(st, &nir->inputs); 876 877 sort_varyings(&nir->outputs); 878 st_nir_assign_var_locations(&nir->outputs, 879 &nir->num_outputs, 880 nir->info.stage); 881 st_nir_fixup_varying_slots(st, &nir->outputs); 882 } else if (nir->info.stage == MESA_SHADER_FRAGMENT) { 883 sort_varyings(&nir->inputs); 884 st_nir_assign_var_locations(&nir->inputs, 885 &nir->num_inputs, 886 nir->info.stage); 887 st_nir_fixup_varying_slots(st, &nir->inputs); 888 st_nir_assign_var_locations(&nir->outputs, 889 &nir->num_outputs, 890 nir->info.stage); 891 } else if (nir->info.stage == MESA_SHADER_COMPUTE) { 892 /* TODO? */ 893 } else { 894 unreachable("invalid shader type"); 895 } 896} 897 898void 899st_nir_lower_samplers(struct pipe_screen *screen, nir_shader *nir, 900 struct gl_shader_program *shader_program, 901 struct gl_program *prog) 902{ 903 if (screen->get_param(screen, PIPE_CAP_NIR_SAMPLERS_AS_DEREF)) 904 NIR_PASS_V(nir, gl_nir_lower_samplers_as_deref, shader_program); 905 else 906 NIR_PASS_V(nir, gl_nir_lower_samplers, shader_program); 907 908 if (prog) { 909 prog->info.textures_used = nir->info.textures_used; 910 prog->info.textures_used_by_txf = nir->info.textures_used_by_txf; 911 } 912} 913 914/* Last third of preparing nir from glsl, which happens after shader 915 * variant lowering. 916 */ 917void 918st_finalize_nir(struct st_context *st, struct gl_program *prog, 919 struct gl_shader_program *shader_program, nir_shader *nir) 920{ 921 struct pipe_screen *screen = st->pipe->screen; 922 const nir_shader_compiler_options *options = 923 st->ctx->Const.ShaderCompilerOptions[prog->info.stage].NirOptions; 924 925 NIR_PASS_V(nir, nir_split_var_copies); 926 NIR_PASS_V(nir, nir_lower_var_copies); 927 if (options->lower_all_io_to_temps || 928 options->lower_all_io_to_elements || 929 nir->info.stage == MESA_SHADER_VERTEX || 930 nir->info.stage == MESA_SHADER_GEOMETRY) { 931 NIR_PASS_V(nir, nir_lower_io_arrays_to_elements_no_indirects, false); 932 } else if (nir->info.stage == MESA_SHADER_FRAGMENT) { 933 NIR_PASS_V(nir, nir_lower_io_arrays_to_elements_no_indirects, true); 934 } 935 936 st_nir_assign_varying_locations(st, nir); 937 938 NIR_PASS_V(nir, nir_lower_atomics_to_ssbo, 939 st->ctx->Const.Program[nir->info.stage].MaxAtomicBuffers); 940 941 st_nir_assign_uniform_locations(st->ctx, prog, 942 &nir->uniforms); 943 944 /* Set num_uniforms in number of attribute slots (vec4s) */ 945 nir->num_uniforms = DIV_ROUND_UP(prog->Parameters->NumParameterValues, 4); 946 947 if (st->ctx->Const.PackedDriverUniformStorage) { 948 NIR_PASS_V(nir, nir_lower_io, nir_var_uniform, st_glsl_type_dword_size, 949 (nir_lower_io_options)0); 950 NIR_PASS_V(nir, nir_lower_uniforms_to_ubo, 4); 951 } else { 952 NIR_PASS_V(nir, nir_lower_io, nir_var_uniform, st_glsl_uniforms_type_size, 953 (nir_lower_io_options)0); 954 } 955 956 st_nir_lower_samplers(screen, nir, shader_program, prog); 957} 958 959} /* extern "C" */ 960