1/*
2 * Copyright © 2015 Red Hat
3 *
4 * Permission is hereby granted, free of charge, to any person obtaining a
5 * copy of this software and associated documentation files (the "Software"),
6 * to deal in the Software without restriction, including without limitation
7 * the rights to use, copy, modify, merge, publish, distribute, sublicense,
8 * and/or sell copies of the Software, and to permit persons to whom the
9 * Software is furnished to do so, subject to the following conditions:
10 *
11 * The above copyright notice and this permission notice (including the next
12 * paragraph) shall be included in all copies or substantial portions of the
13 * Software.
14 *
15 * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
16 * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
17 * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT.  IN NO EVENT SHALL
18 * THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
19 * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
20 * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
21 * SOFTWARE.
22 */
23
24#include "st_nir.h"
25
26#include "pipe/p_defines.h"
27#include "pipe/p_screen.h"
28#include "pipe/p_context.h"
29
30#include "program/program.h"
31#include "program/prog_statevars.h"
32#include "program/prog_parameter.h"
33#include "program/ir_to_mesa.h"
34#include "main/mtypes.h"
35#include "main/errors.h"
36#include "main/shaderapi.h"
37#include "main/uniforms.h"
38
39#include "main/shaderobj.h"
40#include "st_context.h"
41#include "st_glsl_types.h"
42#include "st_program.h"
43
44#include "compiler/nir/nir.h"
45#include "compiler/glsl_types.h"
46#include "compiler/glsl/glsl_to_nir.h"
47#include "compiler/glsl/gl_nir.h"
48#include "compiler/glsl/ir.h"
49#include "compiler/glsl/ir_optimization.h"
50#include "compiler/glsl/string_to_uint_map.h"
51
52static int
53type_size(const struct glsl_type *type)
54{
55   return type->count_attribute_slots(false);
56}
57
58/* Depending on PIPE_CAP_TGSI_TEXCOORD (st->needs_texcoord_semantic) we
59 * may need to fix up varying slots so the glsl->nir path is aligned
60 * with the anything->tgsi->nir path.
61 */
62static void
63st_nir_fixup_varying_slots(struct st_context *st, struct exec_list *var_list)
64{
65   if (st->needs_texcoord_semantic)
66      return;
67
68   nir_foreach_variable(var, var_list) {
69      if (var->data.location >= VARYING_SLOT_VAR0) {
70         var->data.location += 9;
71      } else if ((var->data.location >= VARYING_SLOT_TEX0) &&
72               (var->data.location <= VARYING_SLOT_TEX7)) {
73         var->data.location += VARYING_SLOT_VAR0 - VARYING_SLOT_TEX0;
74      }
75   }
76}
77
78/* input location assignment for VS inputs must be handled specially, so
79 * that it is aligned w/ st's vbo state.
80 * (This isn't the case with, for ex, FS inputs, which only need to agree
81 * on varying-slot w/ the VS outputs)
82 */
83static void
84st_nir_assign_vs_in_locations(nir_shader *nir)
85{
86   nir->num_inputs = 0;
87   nir_foreach_variable_safe(var, &nir->inputs) {
88      /* NIR already assigns dual-slot inputs to two locations so all we have
89       * to do is compact everything down.
90       */
91      if (var->data.location == VERT_ATTRIB_EDGEFLAG) {
92         /* bit of a hack, mirroring st_translate_vertex_program */
93         var->data.driver_location = util_bitcount64(nir->info.inputs_read);
94      } else if (nir->info.inputs_read & BITFIELD64_BIT(var->data.location)) {
95         var->data.driver_location =
96            util_bitcount64(nir->info.inputs_read &
97                              BITFIELD64_MASK(var->data.location));
98         nir->num_inputs++;
99      } else {
100         /* Move unused input variables to the globals list (with no
101          * initialization), to avoid confusing drivers looking through the
102          * inputs array and expecting to find inputs with a driver_location
103          * set.
104          */
105         exec_node_remove(&var->node);
106         var->data.mode = nir_var_shader_temp;
107         exec_list_push_tail(&nir->globals, &var->node);
108      }
109   }
110}
111
112static void
113st_nir_assign_var_locations(struct exec_list *var_list, unsigned *size,
114                            gl_shader_stage stage)
115{
116   unsigned location = 0;
117   unsigned assigned_locations[VARYING_SLOT_TESS_MAX];
118   uint64_t processed_locs[2] = {0};
119
120   const int base = stage == MESA_SHADER_FRAGMENT ?
121      (int) FRAG_RESULT_DATA0 : (int) VARYING_SLOT_VAR0;
122
123   int UNUSED last_loc = 0;
124   nir_foreach_variable(var, var_list) {
125
126      const struct glsl_type *type = var->type;
127      if (nir_is_per_vertex_io(var, stage)) {
128         assert(glsl_type_is_array(type));
129         type = glsl_get_array_element(type);
130      }
131
132      unsigned var_size = type_size(type);
133
134      /* Builtins don't allow component packing so we only need to worry about
135       * user defined varyings sharing the same location.
136       */
137      bool processed = false;
138      if (var->data.location >= base) {
139         unsigned glsl_location = var->data.location - base;
140
141         for (unsigned i = 0; i < var_size; i++) {
142            if (processed_locs[var->data.index] &
143                ((uint64_t)1 << (glsl_location + i)))
144               processed = true;
145            else
146               processed_locs[var->data.index] |=
147                  ((uint64_t)1 << (glsl_location + i));
148         }
149      }
150
151      /* Because component packing allows varyings to share the same location
152       * we may have already have processed this location.
153       */
154      if (processed) {
155         unsigned driver_location = assigned_locations[var->data.location];
156         var->data.driver_location = driver_location;
157         *size += type_size(type);
158
159         /* An array may be packed such that is crosses multiple other arrays
160          * or variables, we need to make sure we have allocated the elements
161          * consecutively if the previously proccessed var was shorter than
162          * the current array we are processing.
163          *
164          * NOTE: The code below assumes the var list is ordered in ascending
165          * location order.
166          */
167         assert(last_loc <= var->data.location);
168         last_loc = var->data.location;
169         unsigned last_slot_location = driver_location + var_size;
170         if (last_slot_location > location) {
171            unsigned num_unallocated_slots = last_slot_location - location;
172            unsigned first_unallocated_slot = var_size - num_unallocated_slots;
173            for (unsigned i = first_unallocated_slot; i < num_unallocated_slots; i++) {
174               assigned_locations[var->data.location + i] = location;
175               location++;
176            }
177         }
178         continue;
179      }
180
181      for (unsigned i = 0; i < var_size; i++) {
182         assigned_locations[var->data.location + i] = location + i;
183      }
184
185      var->data.driver_location = location;
186      location += var_size;
187   }
188
189   *size += location;
190}
191
192static int
193st_nir_lookup_parameter_index(const struct gl_program_parameter_list *params,
194                              const char *name)
195{
196   int loc = _mesa_lookup_parameter_index(params, name);
197
198   /* is there a better way to do this?  If we have something like:
199    *
200    *    struct S {
201    *           float f;
202    *           vec4 v;
203    *    };
204    *    uniform S color;
205    *
206    * Then what we get in prog->Parameters looks like:
207    *
208    *    0: Name=color.f, Type=6, DataType=1406, Size=1
209    *    1: Name=color.v, Type=6, DataType=8b52, Size=4
210    *
211    * So the name doesn't match up and _mesa_lookup_parameter_index()
212    * fails.  In this case just find the first matching "color.*"..
213    *
214    * Note for arrays you could end up w/ color[n].f, for example.
215    *
216    * glsl_to_tgsi works slightly differently in this regard.  It is
217    * emitting something more low level, so it just translates the
218    * params list 1:1 to CONST[] regs.  Going from GLSL IR to TGSI,
219    * it just calculates the additional offset of struct field members
220    * in glsl_to_tgsi_visitor::visit(ir_dereference_record *ir) or
221    * glsl_to_tgsi_visitor::visit(ir_dereference_array *ir).  It never
222    * needs to work backwards to get base var loc from the param-list
223    * which already has them separated out.
224    */
225   if (loc < 0) {
226      int namelen = strlen(name);
227      for (unsigned i = 0; i < params->NumParameters; i++) {
228         struct gl_program_parameter *p = &params->Parameters[i];
229         if ((strncmp(p->Name, name, namelen) == 0) &&
230             ((p->Name[namelen] == '.') || (p->Name[namelen] == '['))) {
231            loc = i;
232            break;
233         }
234      }
235   }
236
237   return loc;
238}
239
240static void
241st_nir_assign_uniform_locations(struct gl_context *ctx,
242                                struct gl_program *prog,
243                                struct exec_list *uniform_list)
244{
245   int shaderidx = 0;
246   int imageidx = 0;
247
248   nir_foreach_variable(uniform, uniform_list) {
249      int loc;
250
251      /*
252       * UBO's have their own address spaces, so don't count them towards the
253       * number of global uniforms
254       */
255      if (uniform->data.mode == nir_var_mem_ubo || uniform->data.mode == nir_var_mem_ssbo)
256         continue;
257
258      const struct glsl_type *type = glsl_without_array(uniform->type);
259      if (!uniform->data.bindless && (type->is_sampler() || type->is_image())) {
260         if (type->is_sampler()) {
261            loc = shaderidx;
262            shaderidx += type_size(uniform->type);
263         } else {
264            loc = imageidx;
265            imageidx += type_size(uniform->type);
266         }
267      } else if (strncmp(uniform->name, "gl_", 3) == 0) {
268         const gl_state_index16 *const stateTokens = uniform->state_slots[0].tokens;
269         /* This state reference has already been setup by ir_to_mesa, but we'll
270          * get the same index back here.
271          */
272
273         unsigned comps;
274         if (glsl_type_is_struct_or_ifc(type)) {
275            comps = 4;
276         } else {
277            comps = glsl_get_vector_elements(type);
278         }
279
280         if (ctx->Const.PackedDriverUniformStorage) {
281            loc = _mesa_add_sized_state_reference(prog->Parameters,
282                                                  stateTokens, comps, false);
283            loc = prog->Parameters->ParameterValueOffset[loc];
284         } else {
285            loc = _mesa_add_state_reference(prog->Parameters, stateTokens);
286         }
287      } else {
288         loc = st_nir_lookup_parameter_index(prog->Parameters, uniform->name);
289
290         /* We need to check that loc is not -1 here before accessing the
291          * array. It can be negative for example when we have a struct that
292          * only contains opaque types.
293          */
294         if (loc >= 0 && ctx->Const.PackedDriverUniformStorage) {
295            loc = prog->Parameters->ParameterValueOffset[loc];
296         }
297      }
298
299      uniform->data.driver_location = loc;
300   }
301}
302
303void
304st_nir_opts(nir_shader *nir, bool scalar)
305{
306   bool progress;
307   do {
308      progress = false;
309
310      NIR_PASS_V(nir, nir_lower_vars_to_ssa);
311
312      if (scalar) {
313         NIR_PASS_V(nir, nir_lower_alu_to_scalar);
314         NIR_PASS_V(nir, nir_lower_phis_to_scalar);
315      }
316
317      NIR_PASS_V(nir, nir_lower_alu);
318      NIR_PASS_V(nir, nir_lower_pack);
319      NIR_PASS(progress, nir, nir_copy_prop);
320      NIR_PASS(progress, nir, nir_opt_remove_phis);
321      NIR_PASS(progress, nir, nir_opt_dce);
322      if (nir_opt_trivial_continues(nir)) {
323         progress = true;
324         NIR_PASS(progress, nir, nir_copy_prop);
325         NIR_PASS(progress, nir, nir_opt_dce);
326      }
327      NIR_PASS(progress, nir, nir_opt_if, false);
328      NIR_PASS(progress, nir, nir_opt_dead_cf);
329      NIR_PASS(progress, nir, nir_opt_cse);
330      NIR_PASS(progress, nir, nir_opt_peephole_select, 8, true, true);
331
332      NIR_PASS(progress, nir, nir_opt_algebraic);
333      NIR_PASS(progress, nir, nir_opt_constant_folding);
334
335      NIR_PASS(progress, nir, nir_opt_undef);
336      NIR_PASS(progress, nir, nir_opt_conditional_discard);
337      if (nir->options->max_unroll_iterations) {
338         NIR_PASS(progress, nir, nir_opt_loop_unroll, (nir_variable_mode)0);
339      }
340   } while (progress);
341}
342
343/* First third of converting glsl_to_nir.. this leaves things in a pre-
344 * nir_lower_io state, so that shader variants can more easily insert/
345 * replace variables, etc.
346 */
347static nir_shader *
348st_glsl_to_nir(struct st_context *st, struct gl_program *prog,
349               struct gl_shader_program *shader_program,
350               gl_shader_stage stage)
351{
352   const nir_shader_compiler_options *options =
353      st->ctx->Const.ShaderCompilerOptions[prog->info.stage].NirOptions;
354   enum pipe_shader_type type = pipe_shader_type_from_mesa(stage);
355   struct pipe_screen *screen = st->pipe->screen;
356   bool is_scalar = screen->get_shader_param(screen, type, PIPE_SHADER_CAP_SCALAR_ISA);
357   assert(options);
358   bool lower_64bit =
359      options->lower_int64_options || options->lower_doubles_options;
360
361   if (prog->nir)
362      return prog->nir;
363
364   nir_shader *nir = glsl_to_nir(st->ctx, shader_program, stage, options);
365
366   /* Set the next shader stage hint for VS and TES. */
367   if (!nir->info.separate_shader &&
368       (nir->info.stage == MESA_SHADER_VERTEX ||
369        nir->info.stage == MESA_SHADER_TESS_EVAL)) {
370
371      unsigned prev_stages = (1 << (prog->info.stage + 1)) - 1;
372      unsigned stages_mask =
373         ~prev_stages & shader_program->data->linked_stages;
374
375      nir->info.next_stage = stages_mask ?
376         (gl_shader_stage) u_bit_scan(&stages_mask) : MESA_SHADER_FRAGMENT;
377   } else {
378      nir->info.next_stage = MESA_SHADER_FRAGMENT;
379   }
380
381   nir_shader_gather_info(nir, nir_shader_get_entrypoint(nir));
382   nir_shader *softfp64 = NULL;
383   if (nir->info.uses_64bit &&
384       (options->lower_doubles_options & nir_lower_fp64_full_software) != 0) {
385      softfp64 = glsl_float64_funcs_to_nir(st->ctx, options);
386      ralloc_steal(ralloc_parent(nir), softfp64);
387   }
388
389   nir_variable_mode mask =
390      (nir_variable_mode) (nir_var_shader_in | nir_var_shader_out);
391   nir_remove_dead_variables(nir, mask);
392
393   if (options->lower_all_io_to_temps ||
394       nir->info.stage == MESA_SHADER_VERTEX ||
395       nir->info.stage == MESA_SHADER_GEOMETRY) {
396      NIR_PASS_V(nir, nir_lower_io_to_temporaries,
397                 nir_shader_get_entrypoint(nir),
398                 true, true);
399   } else if (nir->info.stage == MESA_SHADER_FRAGMENT) {
400      NIR_PASS_V(nir, nir_lower_io_to_temporaries,
401                 nir_shader_get_entrypoint(nir),
402                 true, false);
403   }
404
405   NIR_PASS_V(nir, nir_lower_global_vars_to_local);
406   NIR_PASS_V(nir, nir_split_var_copies);
407   NIR_PASS_V(nir, nir_lower_var_copies);
408
409   if (is_scalar) {
410     NIR_PASS_V(nir, nir_lower_alu_to_scalar);
411   }
412
413   /* before buffers and vars_to_ssa */
414   NIR_PASS_V(nir, gl_nir_lower_bindless_images);
415   st_nir_opts(nir, is_scalar);
416
417   NIR_PASS_V(nir, gl_nir_lower_buffers, shader_program);
418   /* Do a round of constant folding to clean up address calculations */
419   NIR_PASS_V(nir, nir_opt_constant_folding);
420
421   if (lower_64bit) {
422      bool lowered_64bit_ops = false;
423      bool progress = false;
424
425      NIR_PASS_V(nir, nir_opt_algebraic);
426
427      do {
428         progress = false;
429         if (options->lower_int64_options) {
430            NIR_PASS(progress, nir, nir_lower_int64,
431                     options->lower_int64_options);
432         }
433         if (options->lower_doubles_options) {
434            NIR_PASS(progress, nir, nir_lower_doubles,
435                     softfp64, options->lower_doubles_options);
436         }
437         NIR_PASS(progress, nir, nir_opt_algebraic);
438         lowered_64bit_ops |= progress;
439      } while (progress);
440
441      if (lowered_64bit_ops)
442         st_nir_opts(nir, is_scalar);
443   }
444
445   return nir;
446}
447
448/* Second third of converting glsl_to_nir. This creates uniforms, gathers
449 * info on varyings, etc after NIR link time opts have been applied.
450 */
451static void
452st_glsl_to_nir_post_opts(struct st_context *st, struct gl_program *prog,
453                         struct gl_shader_program *shader_program)
454{
455   nir_shader *nir = prog->nir;
456
457   /* Make a pass over the IR to add state references for any built-in
458    * uniforms that are used.  This has to be done now (during linking).
459    * Code generation doesn't happen until the first time this shader is
460    * used for rendering.  Waiting until then to generate the parameters is
461    * too late.  At that point, the values for the built-in uniforms won't
462    * get sent to the shader.
463    */
464   nir_foreach_variable(var, &nir->uniforms) {
465      if (strncmp(var->name, "gl_", 3) == 0) {
466         const nir_state_slot *const slots = var->state_slots;
467         assert(var->state_slots != NULL);
468
469         const struct glsl_type *type = glsl_without_array(var->type);
470         for (unsigned int i = 0; i < var->num_state_slots; i++) {
471            unsigned comps;
472            if (glsl_type_is_struct_or_ifc(type)) {
473               /* Builtin struct require specical handling for now we just
474                * make all members vec4. See st_nir_lower_builtin.
475                */
476               comps = 4;
477            } else {
478               comps = glsl_get_vector_elements(type);
479            }
480
481            if (st->ctx->Const.PackedDriverUniformStorage) {
482               _mesa_add_sized_state_reference(prog->Parameters,
483                                               slots[i].tokens,
484                                               comps, false);
485            } else {
486               _mesa_add_state_reference(prog->Parameters,
487                                         slots[i].tokens);
488            }
489         }
490      }
491   }
492
493   /* Avoid reallocation of the program parameter list, because the uniform
494    * storage is only associated with the original parameter list.
495    * This should be enough for Bitmap and DrawPixels constants.
496    */
497   _mesa_reserve_parameter_storage(prog->Parameters, 8);
498
499   /* This has to be done last.  Any operation the can cause
500    * prog->ParameterValues to get reallocated (e.g., anything that adds a
501    * program constant) has to happen before creating this linkage.
502    */
503   _mesa_associate_uniform_storage(st->ctx, shader_program, prog);
504
505   st_set_prog_affected_state_flags(prog);
506
507   NIR_PASS_V(nir, st_nir_lower_builtin);
508   NIR_PASS_V(nir, gl_nir_lower_atomics, shader_program, true);
509   NIR_PASS_V(nir, nir_opt_intrinsics);
510
511   nir_variable_mode mask = nir_var_function_temp;
512   nir_remove_dead_variables(nir, mask);
513
514   if (st->ctx->_Shader->Flags & GLSL_DUMP) {
515      _mesa_log("\n");
516      _mesa_log("NIR IR for linked %s program %d:\n",
517             _mesa_shader_stage_to_string(prog->info.stage),
518             shader_program->Name);
519      nir_print_shader(nir, _mesa_get_log_file());
520      _mesa_log("\n\n");
521   }
522}
523
524/* TODO any better helper somewhere to sort a list? */
525
526static void
527insert_sorted(struct exec_list *var_list, nir_variable *new_var)
528{
529   nir_foreach_variable(var, var_list) {
530      if (var->data.location > new_var->data.location) {
531         exec_node_insert_node_before(&var->node, &new_var->node);
532         return;
533      }
534   }
535   exec_list_push_tail(var_list, &new_var->node);
536}
537
538static void
539sort_varyings(struct exec_list *var_list)
540{
541   struct exec_list new_list;
542   exec_list_make_empty(&new_list);
543   nir_foreach_variable_safe(var, var_list) {
544      exec_node_remove(&var->node);
545      insert_sorted(&new_list, var);
546   }
547   exec_list_move_nodes_to(&new_list, var_list);
548}
549
550static void
551set_st_program(struct gl_program *prog,
552               struct gl_shader_program *shader_program,
553               nir_shader *nir)
554{
555   struct st_vertex_program *stvp;
556   struct st_common_program *stp;
557   struct st_fragment_program *stfp;
558   struct st_compute_program *stcp;
559
560   switch (prog->info.stage) {
561   case MESA_SHADER_VERTEX:
562      stvp = (struct st_vertex_program *)prog;
563      stvp->shader_program = shader_program;
564      stvp->tgsi.type = PIPE_SHADER_IR_NIR;
565      stvp->tgsi.ir.nir = nir;
566      break;
567   case MESA_SHADER_GEOMETRY:
568   case MESA_SHADER_TESS_CTRL:
569   case MESA_SHADER_TESS_EVAL:
570      stp = (struct st_common_program *)prog;
571      stp->shader_program = shader_program;
572      stp->tgsi.type = PIPE_SHADER_IR_NIR;
573      stp->tgsi.ir.nir = nir;
574      break;
575   case MESA_SHADER_FRAGMENT:
576      stfp = (struct st_fragment_program *)prog;
577      stfp->shader_program = shader_program;
578      stfp->tgsi.type = PIPE_SHADER_IR_NIR;
579      stfp->tgsi.ir.nir = nir;
580      break;
581   case MESA_SHADER_COMPUTE:
582      stcp = (struct st_compute_program *)prog;
583      stcp->shader_program = shader_program;
584      stcp->tgsi.ir_type = PIPE_SHADER_IR_NIR;
585      stcp->tgsi.prog = nir;
586      break;
587   default:
588      unreachable("unknown shader stage");
589   }
590}
591
592static void
593st_nir_get_mesa_program(struct gl_context *ctx,
594                        struct gl_shader_program *shader_program,
595                        struct gl_linked_shader *shader)
596{
597   struct st_context *st = st_context(ctx);
598   struct pipe_screen *pscreen = ctx->st->pipe->screen;
599   struct gl_program *prog;
600
601   validate_ir_tree(shader->ir);
602
603   prog = shader->Program;
604
605   prog->Parameters = _mesa_new_parameter_list();
606
607   _mesa_copy_linked_program_data(shader_program, shader);
608   _mesa_generate_parameters_list_for_uniforms(ctx, shader_program, shader,
609                                               prog->Parameters);
610
611   /* Remove reads from output registers. */
612   if (!pscreen->get_param(pscreen, PIPE_CAP_TGSI_CAN_READ_OUTPUTS))
613      lower_output_reads(shader->Stage, shader->ir);
614
615   if (ctx->_Shader->Flags & GLSL_DUMP) {
616      _mesa_log("\n");
617      _mesa_log("GLSL IR for linked %s program %d:\n",
618             _mesa_shader_stage_to_string(shader->Stage),
619             shader_program->Name);
620      _mesa_print_ir(_mesa_get_log_file(), shader->ir, NULL);
621      _mesa_log("\n\n");
622   }
623
624   prog->ExternalSamplersUsed = gl_external_samplers(prog);
625   _mesa_update_shader_textures_used(shader_program, prog);
626
627   nir_shader *nir = st_glsl_to_nir(st, prog, shader_program, shader->Stage);
628
629   set_st_program(prog, shader_program, nir);
630   prog->nir = nir;
631}
632
633static void
634st_nir_link_shaders(nir_shader **producer, nir_shader **consumer, bool scalar)
635{
636   if (scalar) {
637      NIR_PASS_V(*producer, nir_lower_io_to_scalar_early, nir_var_shader_out);
638      NIR_PASS_V(*consumer, nir_lower_io_to_scalar_early, nir_var_shader_in);
639   }
640
641   nir_lower_io_arrays_to_elements(*producer, *consumer);
642
643   st_nir_opts(*producer, scalar);
644   st_nir_opts(*consumer, scalar);
645
646   if (nir_link_opt_varyings(*producer, *consumer))
647      st_nir_opts(*consumer, scalar);
648
649   NIR_PASS_V(*producer, nir_remove_dead_variables, nir_var_shader_out);
650   NIR_PASS_V(*consumer, nir_remove_dead_variables, nir_var_shader_in);
651
652   if (nir_remove_unused_varyings(*producer, *consumer)) {
653      NIR_PASS_V(*producer, nir_lower_global_vars_to_local);
654      NIR_PASS_V(*consumer, nir_lower_global_vars_to_local);
655
656      /* The backend might not be able to handle indirects on
657       * temporaries so we need to lower indirects on any of the
658       * varyings we have demoted here.
659       *
660       * TODO: radeonsi shouldn't need to do this, however LLVM isn't
661       * currently smart enough to handle indirects without causing excess
662       * spilling causing the gpu to hang.
663       *
664       * See the following thread for more details of the problem:
665       * https://lists.freedesktop.org/archives/mesa-dev/2017-July/162106.html
666       */
667      nir_variable_mode indirect_mask = nir_var_function_temp;
668
669      NIR_PASS_V(*producer, nir_lower_indirect_derefs, indirect_mask);
670      NIR_PASS_V(*consumer, nir_lower_indirect_derefs, indirect_mask);
671
672      st_nir_opts(*producer, scalar);
673      st_nir_opts(*consumer, scalar);
674
675      /* Lowering indirects can cause varying to become unused.
676       * nir_compact_varyings() depends on all dead varyings being removed so
677       * we need to call nir_remove_dead_variables() again here.
678       */
679      NIR_PASS_V(*producer, nir_remove_dead_variables, nir_var_shader_out);
680      NIR_PASS_V(*consumer, nir_remove_dead_variables, nir_var_shader_in);
681   }
682}
683
684static void
685st_lower_patch_vertices_in(struct gl_shader_program *shader_prog)
686{
687   struct gl_linked_shader *linked_tcs =
688      shader_prog->_LinkedShaders[MESA_SHADER_TESS_CTRL];
689   struct gl_linked_shader *linked_tes =
690      shader_prog->_LinkedShaders[MESA_SHADER_TESS_EVAL];
691
692   /* If we have a TCS and TES linked together, lower TES patch vertices. */
693   if (linked_tcs && linked_tes) {
694      nir_shader *tcs_nir = linked_tcs->Program->nir;
695      nir_shader *tes_nir = linked_tes->Program->nir;
696
697      /* The TES input vertex count is the TCS output vertex count,
698       * lower TES gl_PatchVerticesIn to a constant.
699       */
700      uint32_t tes_patch_verts = tcs_nir->info.tess.tcs_vertices_out;
701      NIR_PASS_V(tes_nir, nir_lower_patch_vertices, tes_patch_verts, NULL);
702   }
703}
704
705extern "C" {
706
707void
708st_nir_lower_wpos_ytransform(struct nir_shader *nir,
709                             struct gl_program *prog,
710                             struct pipe_screen *pscreen)
711{
712   if (nir->info.stage != MESA_SHADER_FRAGMENT)
713      return;
714
715   static const gl_state_index16 wposTransformState[STATE_LENGTH] = {
716      STATE_INTERNAL, STATE_FB_WPOS_Y_TRANSFORM
717   };
718   nir_lower_wpos_ytransform_options wpos_options = { { 0 } };
719
720   memcpy(wpos_options.state_tokens, wposTransformState,
721          sizeof(wpos_options.state_tokens));
722   wpos_options.fs_coord_origin_upper_left =
723      pscreen->get_param(pscreen,
724                         PIPE_CAP_TGSI_FS_COORD_ORIGIN_UPPER_LEFT);
725   wpos_options.fs_coord_origin_lower_left =
726      pscreen->get_param(pscreen,
727                         PIPE_CAP_TGSI_FS_COORD_ORIGIN_LOWER_LEFT);
728   wpos_options.fs_coord_pixel_center_integer =
729      pscreen->get_param(pscreen,
730                         PIPE_CAP_TGSI_FS_COORD_PIXEL_CENTER_INTEGER);
731   wpos_options.fs_coord_pixel_center_half_integer =
732      pscreen->get_param(pscreen,
733                         PIPE_CAP_TGSI_FS_COORD_PIXEL_CENTER_HALF_INTEGER);
734
735   if (nir_lower_wpos_ytransform(nir, &wpos_options)) {
736      nir_validate_shader(nir, "after nir_lower_wpos_ytransform");
737      _mesa_add_state_reference(prog->Parameters, wposTransformState);
738   }
739}
740
741bool
742st_link_nir(struct gl_context *ctx,
743            struct gl_shader_program *shader_program)
744{
745   struct st_context *st = st_context(ctx);
746   struct pipe_screen *screen = st->pipe->screen;
747   bool is_scalar[MESA_SHADER_STAGES];
748
749   unsigned last_stage = 0;
750   for (unsigned i = 0; i < MESA_SHADER_STAGES; i++) {
751      struct gl_linked_shader *shader = shader_program->_LinkedShaders[i];
752      if (shader == NULL)
753         continue;
754
755      /* Determine scalar property of each shader stage */
756      enum pipe_shader_type type = pipe_shader_type_from_mesa(shader->Stage);
757      is_scalar[i] = screen->get_shader_param(screen, type,
758                                              PIPE_SHADER_CAP_SCALAR_ISA);
759
760      st_nir_get_mesa_program(ctx, shader_program, shader);
761      last_stage = i;
762
763      if (is_scalar[i]) {
764         NIR_PASS_V(shader->Program->nir, nir_lower_load_const_to_scalar);
765      }
766   }
767
768   /* Linking the stages in the opposite order (from fragment to vertex)
769    * ensures that inter-shader outputs written to in an earlier stage
770    * are eliminated if they are (transitively) not used in a later
771    * stage.
772    */
773   int next = last_stage;
774   for (int i = next - 1; i >= 0; i--) {
775      struct gl_linked_shader *shader = shader_program->_LinkedShaders[i];
776      if (shader == NULL)
777         continue;
778
779      st_nir_link_shaders(&shader->Program->nir,
780                          &shader_program->_LinkedShaders[next]->Program->nir,
781                          is_scalar[i]);
782      next = i;
783   }
784
785   int prev = -1;
786   for (unsigned i = 0; i < MESA_SHADER_STAGES; i++) {
787      struct gl_linked_shader *shader = shader_program->_LinkedShaders[i];
788      if (shader == NULL)
789         continue;
790
791      nir_shader *nir = shader->Program->nir;
792
793      NIR_PASS_V(nir, st_nir_lower_wpos_ytransform, shader->Program,
794                 st->pipe->screen);
795
796      NIR_PASS_V(nir, nir_lower_system_values);
797      NIR_PASS_V(nir, nir_lower_clip_cull_distance_arrays);
798
799      nir_shader_gather_info(nir, nir_shader_get_entrypoint(nir));
800      shader->Program->info = nir->info;
801      if (i == MESA_SHADER_VERTEX) {
802         /* NIR expands dual-slot inputs out to two locations.  We need to
803          * compact things back down GL-style single-slot inputs to avoid
804          * confusing the state tracker.
805          */
806         shader->Program->info.inputs_read =
807            nir_get_single_slot_attribs_mask(nir->info.inputs_read,
808                                             shader->Program->DualSlotInputs);
809      }
810
811      if (prev != -1) {
812         struct gl_program *prev_shader =
813            shader_program->_LinkedShaders[prev]->Program;
814
815         /* We can't use nir_compact_varyings with transform feedback, since
816          * the pipe_stream_output->output_register field is based on the
817          * pre-compacted driver_locations.
818          */
819         if (!(prev_shader->sh.LinkedTransformFeedback &&
820               prev_shader->sh.LinkedTransformFeedback->NumVarying > 0))
821            nir_compact_varyings(shader_program->_LinkedShaders[prev]->Program->nir,
822                              nir, ctx->API != API_OPENGL_COMPAT);
823      }
824      prev = i;
825   }
826
827   st_lower_patch_vertices_in(shader_program);
828
829   for (unsigned i = 0; i < MESA_SHADER_STAGES; i++) {
830      struct gl_linked_shader *shader = shader_program->_LinkedShaders[i];
831      if (shader == NULL)
832         continue;
833
834      st_glsl_to_nir_post_opts(st, shader->Program, shader_program);
835
836      assert(shader->Program);
837      if (!ctx->Driver.ProgramStringNotify(ctx,
838                                           _mesa_shader_stage_to_program(i),
839                                           shader->Program)) {
840         _mesa_reference_program(ctx, &shader->Program, NULL);
841         return false;
842      }
843
844      nir_sweep(shader->Program->nir);
845
846      /* The GLSL IR won't be needed anymore. */
847      ralloc_free(shader->ir);
848      shader->ir = NULL;
849   }
850
851   return true;
852}
853
854void
855st_nir_assign_varying_locations(struct st_context *st, nir_shader *nir)
856{
857   if (nir->info.stage == MESA_SHADER_VERTEX) {
858      /* Needs special handling so drvloc matches the vbo state: */
859      st_nir_assign_vs_in_locations(nir);
860      /* Re-lower global vars, to deal with any dead VS inputs. */
861      NIR_PASS_V(nir, nir_lower_global_vars_to_local);
862
863      sort_varyings(&nir->outputs);
864      st_nir_assign_var_locations(&nir->outputs,
865                                  &nir->num_outputs,
866                                  nir->info.stage);
867      st_nir_fixup_varying_slots(st, &nir->outputs);
868   } else if (nir->info.stage == MESA_SHADER_GEOMETRY ||
869              nir->info.stage == MESA_SHADER_TESS_CTRL ||
870              nir->info.stage == MESA_SHADER_TESS_EVAL) {
871      sort_varyings(&nir->inputs);
872      st_nir_assign_var_locations(&nir->inputs,
873                                  &nir->num_inputs,
874                                  nir->info.stage);
875      st_nir_fixup_varying_slots(st, &nir->inputs);
876
877      sort_varyings(&nir->outputs);
878      st_nir_assign_var_locations(&nir->outputs,
879                                  &nir->num_outputs,
880                                  nir->info.stage);
881      st_nir_fixup_varying_slots(st, &nir->outputs);
882   } else if (nir->info.stage == MESA_SHADER_FRAGMENT) {
883      sort_varyings(&nir->inputs);
884      st_nir_assign_var_locations(&nir->inputs,
885                                  &nir->num_inputs,
886                                  nir->info.stage);
887      st_nir_fixup_varying_slots(st, &nir->inputs);
888      st_nir_assign_var_locations(&nir->outputs,
889                                  &nir->num_outputs,
890                                  nir->info.stage);
891   } else if (nir->info.stage == MESA_SHADER_COMPUTE) {
892       /* TODO? */
893   } else {
894      unreachable("invalid shader type");
895   }
896}
897
898void
899st_nir_lower_samplers(struct pipe_screen *screen, nir_shader *nir,
900                      struct gl_shader_program *shader_program,
901                      struct gl_program *prog)
902{
903   if (screen->get_param(screen, PIPE_CAP_NIR_SAMPLERS_AS_DEREF))
904      NIR_PASS_V(nir, gl_nir_lower_samplers_as_deref, shader_program);
905   else
906      NIR_PASS_V(nir, gl_nir_lower_samplers, shader_program);
907
908   if (prog) {
909      prog->info.textures_used = nir->info.textures_used;
910      prog->info.textures_used_by_txf = nir->info.textures_used_by_txf;
911   }
912}
913
914/* Last third of preparing nir from glsl, which happens after shader
915 * variant lowering.
916 */
917void
918st_finalize_nir(struct st_context *st, struct gl_program *prog,
919                struct gl_shader_program *shader_program, nir_shader *nir)
920{
921   struct pipe_screen *screen = st->pipe->screen;
922   const nir_shader_compiler_options *options =
923      st->ctx->Const.ShaderCompilerOptions[prog->info.stage].NirOptions;
924
925   NIR_PASS_V(nir, nir_split_var_copies);
926   NIR_PASS_V(nir, nir_lower_var_copies);
927   if (options->lower_all_io_to_temps ||
928       options->lower_all_io_to_elements ||
929       nir->info.stage == MESA_SHADER_VERTEX ||
930       nir->info.stage == MESA_SHADER_GEOMETRY) {
931      NIR_PASS_V(nir, nir_lower_io_arrays_to_elements_no_indirects, false);
932   } else if (nir->info.stage == MESA_SHADER_FRAGMENT) {
933      NIR_PASS_V(nir, nir_lower_io_arrays_to_elements_no_indirects, true);
934   }
935
936   st_nir_assign_varying_locations(st, nir);
937
938   NIR_PASS_V(nir, nir_lower_atomics_to_ssbo,
939         st->ctx->Const.Program[nir->info.stage].MaxAtomicBuffers);
940
941   st_nir_assign_uniform_locations(st->ctx, prog,
942                                   &nir->uniforms);
943
944   /* Set num_uniforms in number of attribute slots (vec4s) */
945   nir->num_uniforms = DIV_ROUND_UP(prog->Parameters->NumParameterValues, 4);
946
947   if (st->ctx->Const.PackedDriverUniformStorage) {
948      NIR_PASS_V(nir, nir_lower_io, nir_var_uniform, st_glsl_type_dword_size,
949                 (nir_lower_io_options)0);
950      NIR_PASS_V(nir, nir_lower_uniforms_to_ubo, 4);
951   } else {
952      NIR_PASS_V(nir, nir_lower_io, nir_var_uniform, st_glsl_uniforms_type_size,
953                 (nir_lower_io_options)0);
954   }
955
956   st_nir_lower_samplers(screen, nir, shader_program, prog);
957}
958
959} /* extern "C" */
960