zink_compiler.c revision 7ec681f3
1/*
2 * Copyright 2018 Collabora Ltd.
3 *
4 * Permission is hereby granted, free of charge, to any person obtaining a
5 * copy of this software and associated documentation files (the "Software"),
6 * to deal in the Software without restriction, including without limitation
7 * on the rights to use, copy, modify, merge, publish, distribute, sub
8 * license, and/or sell copies of the Software, and to permit persons to whom
9 * the Software is furnished to do so, subject to the following conditions:
10 *
11 * The above copyright notice and this permission notice (including the next
12 * paragraph) shall be included in all copies or substantial portions of the
13 * Software.
14 *
15 * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
16 * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
17 * FITNESS FOR A PARTICULAR PURPOSE AND NON-INFRINGEMENT. IN NO EVENT SHALL
18 * THE AUTHOR(S) AND/OR THEIR SUPPLIERS BE LIABLE FOR ANY CLAIM,
19 * DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR
20 * OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE
21 * USE OR OTHER DEALINGS IN THE SOFTWARE.
22 */
23
24#include "zink_context.h"
25#include "zink_compiler.h"
26#include "zink_program.h"
27#include "zink_screen.h"
28#include "nir_to_spirv/nir_to_spirv.h"
29
30#include "pipe/p_state.h"
31
32#include "nir.h"
33#include "compiler/nir/nir_builder.h"
34
35#include "nir/tgsi_to_nir.h"
36#include "tgsi/tgsi_dump.h"
37#include "tgsi/tgsi_from_mesa.h"
38
39#include "util/u_memory.h"
40
41static void
42create_vs_pushconst(nir_shader *nir)
43{
44   nir_variable *vs_pushconst;
45   /* create compatible layout for the ntv push constant loader */
46   struct glsl_struct_field *fields = rzalloc_array(nir, struct glsl_struct_field, 2);
47   fields[0].type = glsl_array_type(glsl_uint_type(), 1, 0);
48   fields[0].name = ralloc_asprintf(nir, "draw_mode_is_indexed");
49   fields[0].offset = offsetof(struct zink_gfx_push_constant, draw_mode_is_indexed);
50   fields[1].type = glsl_array_type(glsl_uint_type(), 1, 0);
51   fields[1].name = ralloc_asprintf(nir, "draw_id");
52   fields[1].offset = offsetof(struct zink_gfx_push_constant, draw_id);
53   vs_pushconst = nir_variable_create(nir, nir_var_mem_push_const,
54                                                 glsl_struct_type(fields, 2, "struct", false), "vs_pushconst");
55   vs_pushconst->data.location = INT_MAX; //doesn't really matter
56}
57
58static void
59create_cs_pushconst(nir_shader *nir)
60{
61   nir_variable *cs_pushconst;
62   /* create compatible layout for the ntv push constant loader */
63   struct glsl_struct_field *fields = rzalloc_size(nir, 1 * sizeof(struct glsl_struct_field));
64   fields[0].type = glsl_array_type(glsl_uint_type(), 1, 0);
65   fields[0].name = ralloc_asprintf(nir, "work_dim");
66   fields[0].offset = 0;
67   cs_pushconst = nir_variable_create(nir, nir_var_mem_push_const,
68                                                 glsl_struct_type(fields, 1, "struct", false), "cs_pushconst");
69   cs_pushconst->data.location = INT_MAX; //doesn't really matter
70}
71
72static bool
73reads_work_dim(nir_shader *shader)
74{
75   return BITSET_TEST(shader->info.system_values_read, SYSTEM_VALUE_WORK_DIM);
76}
77
78static bool
79lower_discard_if_instr(nir_builder *b, nir_instr *instr_, UNUSED void *cb_data)
80{
81   if (instr_->type != nir_instr_type_intrinsic)
82      return false;
83
84   nir_intrinsic_instr *instr = nir_instr_as_intrinsic(instr_);
85
86   if (instr->intrinsic == nir_intrinsic_discard_if) {
87      b->cursor = nir_before_instr(&instr->instr);
88
89      nir_if *if_stmt = nir_push_if(b, nir_ssa_for_src(b, instr->src[0], 1));
90      nir_discard(b);
91      nir_pop_if(b, if_stmt);
92      nir_instr_remove(&instr->instr);
93      return true;
94   }
95   /* a shader like this (shaders@glsl-fs-discard-04):
96
97      uniform int j, k;
98
99      void main()
100      {
101       for (int i = 0; i < j; i++) {
102        if (i > k)
103         continue;
104        discard;
105       }
106       gl_FragColor = vec4(0.0, 1.0, 0.0, 0.0);
107      }
108
109
110
111      will generate nir like:
112
113      loop   {
114         //snip
115         if   ssa_11   {
116            block   block_5:
117            /   preds:   block_4   /
118            vec1   32   ssa_17   =   iadd   ssa_50,   ssa_31
119            /   succs:   block_7   /
120         }   else   {
121            block   block_6:
122            /   preds:   block_4   /
123            intrinsic   discard   ()   () <-- not last instruction
124            vec1   32   ssa_23   =   iadd   ssa_50,   ssa_31 <-- dead code loop itr increment
125            /   succs:   block_7   /
126         }
127         //snip
128      }
129
130      which means that we can't assert like this:
131
132      assert(instr->intrinsic != nir_intrinsic_discard ||
133             nir_block_last_instr(instr->instr.block) == &instr->instr);
134
135
136      and it's unnecessary anyway since post-vtn optimizing will dce the instructions following the discard
137    */
138
139   return false;
140}
141
142static bool
143lower_discard_if(nir_shader *shader)
144{
145   return nir_shader_instructions_pass(shader,
146                                       lower_discard_if_instr,
147                                       nir_metadata_dominance,
148                                       NULL);
149}
150
151static bool
152lower_work_dim_instr(nir_builder *b, nir_instr *in, void *data)
153{
154   if (in->type != nir_instr_type_intrinsic)
155      return false;
156   nir_intrinsic_instr *instr = nir_instr_as_intrinsic(in);
157   if (instr->intrinsic != nir_intrinsic_load_work_dim)
158      return false;
159
160   if (instr->intrinsic == nir_intrinsic_load_work_dim) {
161      b->cursor = nir_after_instr(&instr->instr);
162      nir_intrinsic_instr *load = nir_intrinsic_instr_create(b->shader, nir_intrinsic_load_push_constant);
163      load->src[0] = nir_src_for_ssa(nir_imm_int(b, 0));
164      nir_intrinsic_set_range(load, 3 * sizeof(uint32_t));
165      load->num_components = 1;
166      nir_ssa_dest_init(&load->instr, &load->dest, 1, 32, "work_dim");
167      nir_builder_instr_insert(b, &load->instr);
168
169      nir_ssa_def_rewrite_uses(&instr->dest.ssa, &load->dest.ssa);
170   }
171
172   return true;
173}
174
175static bool
176lower_work_dim(nir_shader *shader)
177{
178   if (shader->info.stage != MESA_SHADER_KERNEL)
179      return false;
180
181   if (!reads_work_dim(shader))
182      return false;
183
184   return nir_shader_instructions_pass(shader, lower_work_dim_instr, nir_metadata_dominance, NULL);
185}
186
187static bool
188lower_64bit_vertex_attribs_instr(nir_builder *b, nir_instr *instr, void *data)
189{
190   if (instr->type != nir_instr_type_deref)
191      return false;
192   nir_deref_instr *deref = nir_instr_as_deref(instr);
193   if (deref->deref_type != nir_deref_type_var)
194      return false;
195   nir_variable *var = nir_deref_instr_get_variable(deref);
196   if (var->data.mode != nir_var_shader_in)
197      return false;
198   if (!glsl_type_is_64bit(var->type) || !glsl_type_is_vector(var->type) || glsl_get_vector_elements(var->type) < 3)
199      return false;
200
201   /* create second variable for the split */
202   nir_variable *var2 = nir_variable_clone(var, b->shader);
203   /* split new variable into second slot */
204   var2->data.driver_location++;
205   nir_shader_add_variable(b->shader, var2);
206
207   unsigned total_num_components = glsl_get_vector_elements(var->type);
208   /* new variable is the second half of the dvec */
209   var2->type = glsl_vector_type(glsl_get_base_type(var->type), glsl_get_vector_elements(var->type) - 2);
210   /* clamp original variable to a dvec2 */
211   deref->type = var->type = glsl_vector_type(glsl_get_base_type(var->type), 2);
212
213   /* create deref instr for new variable */
214   b->cursor = nir_after_instr(instr);
215   nir_deref_instr *deref2 = nir_build_deref_var(b, var2);
216
217   nir_foreach_use_safe(use_src, &deref->dest.ssa) {
218      nir_instr *use_instr = use_src->parent_instr;
219      assert(use_instr->type == nir_instr_type_intrinsic &&
220             nir_instr_as_intrinsic(use_instr)->intrinsic == nir_intrinsic_load_deref);
221
222      /* this is a load instruction for the deref, and we need to split it into two instructions that we can
223       * then zip back into a single ssa def */
224      nir_intrinsic_instr *intr = nir_instr_as_intrinsic(use_instr);
225      /* clamp the first load to 2 64bit components */
226      intr->num_components = intr->dest.ssa.num_components = 2;
227      b->cursor = nir_after_instr(use_instr);
228      /* this is the second load instruction for the second half of the dvec3/4 components */
229      nir_intrinsic_instr *intr2 = nir_intrinsic_instr_create(b->shader, nir_intrinsic_load_deref);
230      intr2->src[0] = nir_src_for_ssa(&deref2->dest.ssa);
231      intr2->num_components = total_num_components - 2;
232      nir_ssa_dest_init(&intr2->instr, &intr2->dest, intr2->num_components, 64, NULL);
233      nir_builder_instr_insert(b, &intr2->instr);
234
235      nir_ssa_def *def[4];
236      /* create a new dvec3/4 comprised of all the loaded components from both variables */
237      def[0] = nir_vector_extract(b, &intr->dest.ssa, nir_imm_int(b, 0));
238      def[1] = nir_vector_extract(b, &intr->dest.ssa, nir_imm_int(b, 1));
239      def[2] = nir_vector_extract(b, &intr2->dest.ssa, nir_imm_int(b, 0));
240      if (total_num_components == 4)
241         def[3] = nir_vector_extract(b, &intr2->dest.ssa, nir_imm_int(b, 1));
242      nir_ssa_def *new_vec = nir_vec(b, def, total_num_components);
243      /* use the assembled dvec3/4 for all other uses of the load */
244      nir_ssa_def_rewrite_uses_after(&intr->dest.ssa, new_vec,
245                                     new_vec->parent_instr);
246   }
247
248   return true;
249}
250
251/* "64-bit three- and four-component vectors consume two consecutive locations."
252 *  - 14.1.4. Location Assignment
253 *
254 * this pass splits dvec3 and dvec4 vertex inputs into a dvec2 and a double/dvec2 which
255 * are assigned to consecutive locations, loaded separately, and then assembled back into a
256 * composite value that's used in place of the original loaded ssa src
257 */
258static bool
259lower_64bit_vertex_attribs(nir_shader *shader)
260{
261   if (shader->info.stage != MESA_SHADER_VERTEX)
262      return false;
263
264   return nir_shader_instructions_pass(shader, lower_64bit_vertex_attribs_instr, nir_metadata_dominance, NULL);
265}
266
267static bool
268lower_basevertex_instr(nir_builder *b, nir_instr *in, void *data)
269{
270   if (in->type != nir_instr_type_intrinsic)
271      return false;
272   nir_intrinsic_instr *instr = nir_instr_as_intrinsic(in);
273   if (instr->intrinsic != nir_intrinsic_load_base_vertex)
274      return false;
275
276   b->cursor = nir_after_instr(&instr->instr);
277   nir_intrinsic_instr *load = nir_intrinsic_instr_create(b->shader, nir_intrinsic_load_push_constant);
278   load->src[0] = nir_src_for_ssa(nir_imm_int(b, 0));
279   nir_intrinsic_set_range(load, 4);
280   load->num_components = 1;
281   nir_ssa_dest_init(&load->instr, &load->dest, 1, 32, "draw_mode_is_indexed");
282   nir_builder_instr_insert(b, &load->instr);
283
284   nir_ssa_def *composite = nir_build_alu(b, nir_op_bcsel,
285                                          nir_build_alu(b, nir_op_ieq, &load->dest.ssa, nir_imm_int(b, 1), NULL, NULL),
286                                          &instr->dest.ssa,
287                                          nir_imm_int(b, 0),
288                                          NULL);
289
290   nir_ssa_def_rewrite_uses_after(&instr->dest.ssa, composite,
291                                  composite->parent_instr);
292   return true;
293}
294
295static bool
296lower_basevertex(nir_shader *shader)
297{
298   if (shader->info.stage != MESA_SHADER_VERTEX)
299      return false;
300
301   if (!BITSET_TEST(shader->info.system_values_read, SYSTEM_VALUE_BASE_VERTEX))
302      return false;
303
304   return nir_shader_instructions_pass(shader, lower_basevertex_instr, nir_metadata_dominance, NULL);
305}
306
307
308static bool
309lower_drawid_instr(nir_builder *b, nir_instr *in, void *data)
310{
311   if (in->type != nir_instr_type_intrinsic)
312      return false;
313   nir_intrinsic_instr *instr = nir_instr_as_intrinsic(in);
314   if (instr->intrinsic != nir_intrinsic_load_draw_id)
315      return false;
316
317   b->cursor = nir_before_instr(&instr->instr);
318   nir_intrinsic_instr *load = nir_intrinsic_instr_create(b->shader, nir_intrinsic_load_push_constant);
319   load->src[0] = nir_src_for_ssa(nir_imm_int(b, 1));
320   nir_intrinsic_set_range(load, 4);
321   load->num_components = 1;
322   nir_ssa_dest_init(&load->instr, &load->dest, 1, 32, "draw_id");
323   nir_builder_instr_insert(b, &load->instr);
324
325   nir_ssa_def_rewrite_uses(&instr->dest.ssa, &load->dest.ssa);
326
327   return true;
328}
329
330static bool
331lower_drawid(nir_shader *shader)
332{
333   if (shader->info.stage != MESA_SHADER_VERTEX)
334      return false;
335
336   if (!BITSET_TEST(shader->info.system_values_read, SYSTEM_VALUE_DRAW_ID))
337      return false;
338
339   return nir_shader_instructions_pass(shader, lower_drawid_instr, nir_metadata_dominance, NULL);
340}
341
342static bool
343lower_dual_blend(nir_shader *shader)
344{
345   bool progress = false;
346   nir_variable *var = nir_find_variable_with_location(shader, nir_var_shader_out, FRAG_RESULT_DATA1);
347   if (var) {
348      var->data.location = FRAG_RESULT_DATA0;
349      var->data.index = 1;
350      progress = true;
351   }
352   nir_shader_preserve_all_metadata(shader);
353   return progress;
354}
355
356void
357zink_screen_init_compiler(struct zink_screen *screen)
358{
359   static const struct nir_shader_compiler_options
360   default_options = {
361      .lower_ffma16 = true,
362      .lower_ffma32 = true,
363      .lower_ffma64 = true,
364      .lower_scmp = true,
365      .lower_fdph = true,
366      .lower_flrp32 = true,
367      .lower_fpow = true,
368      .lower_fsat = true,
369      .lower_extract_byte = true,
370      .lower_extract_word = true,
371      .lower_insert_byte = true,
372      .lower_insert_word = true,
373      .lower_mul_high = true,
374      .lower_rotate = true,
375      .lower_uadd_carry = true,
376      .lower_pack_64_2x32_split = true,
377      .lower_unpack_64_2x32_split = true,
378      .lower_pack_32_2x16_split = true,
379      .lower_unpack_32_2x16_split = true,
380      .lower_vector_cmp = true,
381      .lower_int64_options = 0,
382      .lower_doubles_options = ~nir_lower_fp64_full_software,
383      .lower_uniforms_to_ubo = true,
384      .has_fsub = true,
385      .has_isub = true,
386      .lower_mul_2x32_64 = true,
387      .support_16bit_alu = true, /* not quite what it sounds like */
388   };
389
390   screen->nir_options = default_options;
391
392   if (!screen->info.feats.features.shaderInt64)
393      screen->nir_options.lower_int64_options = ~0;
394
395   if (!screen->info.feats.features.shaderFloat64) {
396      screen->nir_options.lower_doubles_options = ~0;
397      screen->nir_options.lower_flrp64 = true;
398      screen->nir_options.lower_ffma64 = true;
399   }
400}
401
402const void *
403zink_get_compiler_options(struct pipe_screen *pscreen,
404                          enum pipe_shader_ir ir,
405                          enum pipe_shader_type shader)
406{
407   assert(ir == PIPE_SHADER_IR_NIR);
408   return &zink_screen(pscreen)->nir_options;
409}
410
411struct nir_shader *
412zink_tgsi_to_nir(struct pipe_screen *screen, const struct tgsi_token *tokens)
413{
414   if (zink_debug & ZINK_DEBUG_TGSI) {
415      fprintf(stderr, "TGSI shader:\n---8<---\n");
416      tgsi_dump_to_file(tokens, 0, stderr);
417      fprintf(stderr, "---8<---\n\n");
418   }
419
420   return tgsi_to_nir(tokens, screen, false);
421}
422
423static void
424optimize_nir(struct nir_shader *s)
425{
426   bool progress;
427   do {
428      progress = false;
429      NIR_PASS_V(s, nir_lower_vars_to_ssa);
430      NIR_PASS(progress, s, nir_copy_prop);
431      NIR_PASS(progress, s, nir_opt_remove_phis);
432      NIR_PASS(progress, s, nir_opt_dce);
433      NIR_PASS(progress, s, nir_opt_dead_cf);
434      NIR_PASS(progress, s, nir_opt_cse);
435      NIR_PASS(progress, s, nir_opt_peephole_select, 8, true, true);
436      NIR_PASS(progress, s, nir_opt_algebraic);
437      NIR_PASS(progress, s, nir_opt_constant_folding);
438      NIR_PASS(progress, s, nir_opt_undef);
439      NIR_PASS(progress, s, zink_nir_lower_b2b);
440   } while (progress);
441
442   do {
443      progress = false;
444      NIR_PASS(progress, s, nir_opt_algebraic_late);
445      if (progress) {
446         NIR_PASS_V(s, nir_copy_prop);
447         NIR_PASS_V(s, nir_opt_dce);
448         NIR_PASS_V(s, nir_opt_cse);
449      }
450   } while (progress);
451}
452
453/* - copy the lowered fbfetch variable
454 * - set the new one up as an input attachment for descriptor 0.6
455 * - load it as an image
456 * - overwrite the previous load
457 */
458static bool
459lower_fbfetch_instr(nir_builder *b, nir_instr *instr, void *data)
460{
461   if (instr->type != nir_instr_type_intrinsic)
462      return false;
463   nir_intrinsic_instr *intr = nir_instr_as_intrinsic(instr);
464   if (intr->intrinsic != nir_intrinsic_load_deref)
465      return false;
466   nir_variable *var = nir_deref_instr_get_variable(nir_src_as_deref(intr->src[0]));
467   if (var != data)
468      return false;
469   b->cursor = nir_after_instr(instr);
470   nir_variable *fbfetch = nir_variable_clone(data, b->shader);
471   /* If Dim is SubpassData, ... Image Format must be Unknown
472    * - SPIRV OpTypeImage specification
473    */
474   fbfetch->data.image.format = 0;
475   fbfetch->data.index = 0; /* fix this if more than 1 fbfetch target is supported */
476   fbfetch->data.mode = nir_var_uniform;
477   fbfetch->data.binding = ZINK_FBFETCH_BINDING;
478   fbfetch->type = glsl_image_type(GLSL_SAMPLER_DIM_SUBPASS, false, GLSL_TYPE_FLOAT);
479   nir_shader_add_variable(b->shader, fbfetch);
480   nir_ssa_def *deref = &nir_build_deref_var(b, fbfetch)->dest.ssa;
481   nir_ssa_def *load = nir_image_deref_load(b, 4, 32, deref, nir_imm_vec4(b, 0, 0, 0, 1), nir_ssa_undef(b, 1, 32), nir_imm_int(b, 0));
482   unsigned swiz[4] = {2, 1, 0, 3};
483   nir_ssa_def *swizzle = nir_swizzle(b, load, swiz, 4);
484   nir_ssa_def_rewrite_uses(&intr->dest.ssa, swizzle);
485   return true;
486}
487
488static bool
489lower_fbfetch(nir_shader *shader, nir_variable **fbfetch)
490{
491   nir_foreach_shader_out_variable(var, shader) {
492      if (var->data.fb_fetch_output) {
493         *fbfetch = var;
494         break;
495      }
496   }
497   assert(*fbfetch);
498   if (!*fbfetch)
499      return false;
500   return nir_shader_instructions_pass(shader, lower_fbfetch_instr, nir_metadata_dominance, *fbfetch);
501}
502
503/* check for a genuine gl_PointSize output vs one from nir_lower_point_size_mov */
504static bool
505check_psiz(struct nir_shader *s)
506{
507   nir_foreach_shader_out_variable(var, s) {
508      if (var->data.location == VARYING_SLOT_PSIZ) {
509         /* genuine PSIZ outputs will have this set */
510         return !!var->data.explicit_location;
511      }
512   }
513   return false;
514}
515
516static void
517update_so_info(struct zink_shader *zs, const struct pipe_stream_output_info *so_info,
518               uint64_t outputs_written, bool have_psiz)
519{
520   uint8_t reverse_map[64] = {0};
521   unsigned slot = 0;
522   /* semi-copied from iris */
523   while (outputs_written) {
524      int bit = u_bit_scan64(&outputs_written);
525      /* PSIZ from nir_lower_point_size_mov breaks stream output, so always skip it */
526      if (bit == VARYING_SLOT_PSIZ && !have_psiz)
527         continue;
528      reverse_map[slot++] = bit;
529   }
530
531   nir_foreach_shader_out_variable(var, zs->nir)
532      var->data.explicit_xfb_buffer = 0;
533
534   bool inlined[64] = {0};
535   for (unsigned i = 0; i < so_info->num_outputs; i++) {
536      const struct pipe_stream_output *output = &so_info->output[i];
537      unsigned slot = reverse_map[output->register_index];
538      /* always set stride to be used during draw */
539      zs->streamout.so_info.stride[output->output_buffer] = so_info->stride[output->output_buffer];
540      if ((zs->nir->info.stage != MESA_SHADER_GEOMETRY || util_bitcount(zs->nir->info.gs.active_stream_mask) == 1) &&
541          !output->start_component) {
542         nir_variable *var = NULL;
543         while (!var)
544            var = nir_find_variable_with_location(zs->nir, nir_var_shader_out, slot--);
545         slot++;
546         if (inlined[slot])
547            continue;
548         assert(var && var->data.location == slot);
549         /* if this is the entire variable, try to blast it out during the initial declaration */
550         if (glsl_get_components(var->type) == output->num_components) {
551            var->data.explicit_xfb_buffer = 1;
552            var->data.xfb.buffer = output->output_buffer;
553            var->data.xfb.stride = so_info->stride[output->output_buffer] * 4;
554            var->data.offset = output->dst_offset * 4;
555            var->data.stream = output->stream;
556            inlined[slot] = true;
557            continue;
558         }
559      }
560      zs->streamout.so_info.output[zs->streamout.so_info.num_outputs] = *output;
561      /* Map Gallium's condensed "slots" back to real VARYING_SLOT_* enums */
562      zs->streamout.so_info_slots[zs->streamout.so_info.num_outputs++] = reverse_map[output->register_index];
563   }
564   zs->streamout.have_xfb = !!zs->streamout.so_info.num_outputs;
565}
566
567struct decompose_state {
568  nir_variable **split;
569  bool needs_w;
570};
571
572static bool
573lower_attrib(nir_builder *b, nir_instr *instr, void *data)
574{
575   struct decompose_state *state = data;
576   nir_variable **split = state->split;
577   if (instr->type != nir_instr_type_intrinsic)
578      return false;
579   nir_intrinsic_instr *intr = nir_instr_as_intrinsic(instr);
580   if (intr->intrinsic != nir_intrinsic_load_deref)
581      return false;
582   nir_deref_instr *deref = nir_src_as_deref(intr->src[0]);
583   nir_variable *var = nir_deref_instr_get_variable(deref);
584   if (var != split[0])
585      return false;
586   unsigned num_components = glsl_get_vector_elements(split[0]->type);
587   b->cursor = nir_after_instr(instr);
588   nir_ssa_def *loads[4];
589   for (unsigned i = 0; i < (state->needs_w ? num_components - 1 : num_components); i++)
590      loads[i] = nir_load_deref(b, nir_build_deref_var(b, split[i+1]));
591   if (state->needs_w) {
592      /* oob load w comopnent to get correct value for int/float */
593      loads[3] = nir_channel(b, loads[0], 3);
594      loads[0] = nir_channel(b, loads[0], 0);
595   }
596   nir_ssa_def *new_load = nir_vec(b, loads, num_components);
597   nir_ssa_def_rewrite_uses(&intr->dest.ssa, new_load);
598   nir_instr_remove_v(instr);
599   return true;
600}
601
602static bool
603decompose_attribs(nir_shader *nir, uint32_t decomposed_attrs, uint32_t decomposed_attrs_without_w)
604{
605   uint32_t bits = 0;
606   nir_foreach_variable_with_modes(var, nir, nir_var_shader_in)
607      bits |= BITFIELD_BIT(var->data.driver_location);
608   bits = ~bits;
609   u_foreach_bit(location, decomposed_attrs | decomposed_attrs_without_w) {
610      nir_variable *split[5];
611      struct decompose_state state;
612      state.split = split;
613      nir_variable *var = nir_find_variable_with_driver_location(nir, nir_var_shader_in, location);
614      assert(var);
615      split[0] = var;
616      bits |= BITFIELD_BIT(var->data.driver_location);
617      const struct glsl_type *new_type = glsl_type_is_scalar(var->type) ? var->type : glsl_get_array_element(var->type);
618      unsigned num_components = glsl_get_vector_elements(var->type);
619      state.needs_w = (decomposed_attrs_without_w & BITFIELD_BIT(location)) != 0 && num_components == 4;
620      for (unsigned i = 0; i < (state.needs_w ? num_components - 1 : num_components); i++) {
621         split[i+1] = nir_variable_clone(var, nir);
622         split[i+1]->name = ralloc_asprintf(nir, "%s_split%u", var->name, i);
623         if (decomposed_attrs_without_w & BITFIELD_BIT(location))
624            split[i+1]->type = !i && num_components == 4 ? var->type : new_type;
625         else
626            split[i+1]->type = new_type;
627         split[i+1]->data.driver_location = ffs(bits) - 1;
628         bits &= ~BITFIELD_BIT(split[i+1]->data.driver_location);
629         nir_shader_add_variable(nir, split[i+1]);
630      }
631      var->data.mode = nir_var_shader_temp;
632      nir_shader_instructions_pass(nir, lower_attrib, nir_metadata_dominance, &state);
633   }
634   nir_fixup_deref_modes(nir);
635   NIR_PASS_V(nir, nir_remove_dead_variables, nir_var_shader_temp, NULL);
636   optimize_nir(nir);
637   return true;
638}
639
640static void
641assign_producer_var_io(gl_shader_stage stage, nir_variable *var, unsigned *reserved, unsigned char *slot_map)
642{
643   unsigned slot = var->data.location;
644   switch (var->data.location) {
645   case VARYING_SLOT_POS:
646   case VARYING_SLOT_PNTC:
647   case VARYING_SLOT_PSIZ:
648   case VARYING_SLOT_LAYER:
649   case VARYING_SLOT_PRIMITIVE_ID:
650   case VARYING_SLOT_CLIP_DIST0:
651   case VARYING_SLOT_CULL_DIST0:
652   case VARYING_SLOT_VIEWPORT:
653   case VARYING_SLOT_FACE:
654   case VARYING_SLOT_TESS_LEVEL_OUTER:
655   case VARYING_SLOT_TESS_LEVEL_INNER:
656      /* use a sentinel value to avoid counting later */
657      var->data.driver_location = UINT_MAX;
658      break;
659
660   default:
661      if (var->data.patch) {
662         assert(var->data.location >= VARYING_SLOT_PATCH0);
663         slot = var->data.location - VARYING_SLOT_PATCH0;
664      } else if (var->data.location >= VARYING_SLOT_VAR0 &&
665                 var->data.mode == nir_var_shader_in &&
666                  stage == MESA_SHADER_TESS_EVAL) {
667         slot = var->data.location - VARYING_SLOT_VAR0;
668      } else {
669         if (slot_map[var->data.location] == 0xff) {
670            assert(*reserved < MAX_VARYING);
671            slot_map[var->data.location] = *reserved;
672            *reserved += glsl_count_vec4_slots(var->type, false, false);
673         }
674         slot = slot_map[var->data.location];
675         assert(slot < MAX_VARYING);
676      }
677      var->data.driver_location = slot;
678   }
679}
680
681ALWAYS_INLINE static bool
682is_texcoord(gl_shader_stage stage, const nir_variable *var)
683{
684   if (stage != MESA_SHADER_FRAGMENT)
685      return false;
686   return var->data.location >= VARYING_SLOT_TEX0 &&
687          var->data.location <= VARYING_SLOT_TEX7;
688}
689
690static bool
691assign_consumer_var_io(gl_shader_stage stage, nir_variable *var, unsigned *reserved, unsigned char *slot_map)
692{
693   switch (var->data.location) {
694   case VARYING_SLOT_POS:
695   case VARYING_SLOT_PNTC:
696   case VARYING_SLOT_PSIZ:
697   case VARYING_SLOT_LAYER:
698   case VARYING_SLOT_PRIMITIVE_ID:
699   case VARYING_SLOT_CLIP_DIST0:
700   case VARYING_SLOT_CULL_DIST0:
701   case VARYING_SLOT_VIEWPORT:
702   case VARYING_SLOT_FACE:
703   case VARYING_SLOT_TESS_LEVEL_OUTER:
704   case VARYING_SLOT_TESS_LEVEL_INNER:
705      /* use a sentinel value to avoid counting later */
706      var->data.driver_location = UINT_MAX;
707      break;
708   default:
709      if (var->data.patch) {
710         assert(var->data.location >= VARYING_SLOT_PATCH0);
711         var->data.driver_location = var->data.location - VARYING_SLOT_PATCH0;
712      } else if (var->data.location >= VARYING_SLOT_VAR0 &&
713          stage == MESA_SHADER_TESS_CTRL &&
714          var->data.mode == nir_var_shader_out)
715         var->data.driver_location = var->data.location - VARYING_SLOT_VAR0;
716      else {
717         if (slot_map[var->data.location] == (unsigned char)-1) {
718            if (!is_texcoord(stage, var))
719               /* dead io */
720               return false;
721            /* texcoords can't be eliminated in fs due to GL_COORD_REPLACE */
722            slot_map[var->data.location] = (*reserved)++;
723         }
724         var->data.driver_location = slot_map[var->data.location];
725      }
726   }
727   return true;
728}
729
730
731static bool
732rewrite_and_discard_read(nir_builder *b, nir_instr *instr, void *data)
733{
734   nir_variable *var = data;
735   if (instr->type != nir_instr_type_intrinsic)
736      return false;
737
738   nir_intrinsic_instr *intr = nir_instr_as_intrinsic(instr);
739   if (intr->intrinsic != nir_intrinsic_load_deref)
740      return false;
741   nir_variable *deref_var = nir_intrinsic_get_var(intr, 0);
742   if (deref_var != var)
743      return false;
744   nir_ssa_def *undef = nir_ssa_undef(b, nir_dest_num_components(intr->dest), nir_dest_bit_size(intr->dest));
745   nir_ssa_def_rewrite_uses(&intr->dest.ssa, undef);
746   return true;
747}
748
749void
750zink_compiler_assign_io(nir_shader *producer, nir_shader *consumer)
751{
752   unsigned reserved = 0;
753   unsigned char slot_map[VARYING_SLOT_MAX];
754   memset(slot_map, -1, sizeof(slot_map));
755   bool do_fixup = false;
756   nir_shader *nir = producer->info.stage == MESA_SHADER_TESS_CTRL ? producer : consumer;
757   if (producer->info.stage == MESA_SHADER_TESS_CTRL) {
758      /* never assign from tcs -> tes, always invert */
759      nir_foreach_variable_with_modes(var, consumer, nir_var_shader_in)
760         assign_producer_var_io(consumer->info.stage, var, &reserved, slot_map);
761      nir_foreach_variable_with_modes_safe(var, producer, nir_var_shader_out) {
762         if (!assign_consumer_var_io(producer->info.stage, var, &reserved, slot_map))
763            /* this is an output, nothing more needs to be done for it to be dropped */
764            do_fixup = true;
765      }
766   } else {
767      nir_foreach_variable_with_modes(var, producer, nir_var_shader_out)
768         assign_producer_var_io(producer->info.stage, var, &reserved, slot_map);
769      nir_foreach_variable_with_modes_safe(var, consumer, nir_var_shader_in) {
770         if (!assign_consumer_var_io(consumer->info.stage, var, &reserved, slot_map)) {
771            do_fixup = true;
772            /* input needs to be rewritten as an undef to ensure the entire deref chain is deleted */
773            nir_shader_instructions_pass(consumer, rewrite_and_discard_read, nir_metadata_dominance, var);
774         }
775      }
776   }
777   if (!do_fixup)
778      return;
779   nir_fixup_deref_modes(nir);
780   NIR_PASS_V(nir, nir_remove_dead_variables, nir_var_shader_temp, NULL);
781   optimize_nir(nir);
782}
783
784VkShaderModule
785zink_shader_compile(struct zink_screen *screen, struct zink_shader *zs, nir_shader *base_nir, const struct zink_shader_key *key)
786{
787   VkShaderModule mod = VK_NULL_HANDLE;
788   void *streamout = NULL;
789   nir_shader *nir = nir_shader_clone(NULL, base_nir);
790
791   if (key) {
792      if (key->inline_uniforms) {
793         NIR_PASS_V(nir, nir_inline_uniforms,
794                    nir->info.num_inlinable_uniforms,
795                    key->base.inlined_uniform_values,
796                    nir->info.inlinable_uniform_dw_offsets);
797
798         optimize_nir(nir);
799
800         /* This must be done again. */
801         NIR_PASS_V(nir, nir_io_add_const_offset_to_base, nir_var_shader_in |
802                                                          nir_var_shader_out);
803      }
804
805      /* TODO: use a separate mem ctx here for ralloc */
806      switch (zs->nir->info.stage) {
807      case MESA_SHADER_VERTEX: {
808         uint32_t decomposed_attrs = 0, decomposed_attrs_without_w = 0;
809         const struct zink_vs_key *vs_key = zink_vs_key(key);
810         switch (vs_key->size) {
811         case 4:
812            decomposed_attrs = vs_key->u32.decomposed_attrs;
813            decomposed_attrs_without_w = vs_key->u32.decomposed_attrs_without_w;
814            break;
815         case 2:
816            decomposed_attrs = vs_key->u16.decomposed_attrs;
817            decomposed_attrs_without_w = vs_key->u16.decomposed_attrs_without_w;
818            break;
819         case 1:
820            decomposed_attrs = vs_key->u8.decomposed_attrs;
821            decomposed_attrs_without_w = vs_key->u8.decomposed_attrs_without_w;
822            break;
823         default: break;
824         }
825         if (decomposed_attrs || decomposed_attrs_without_w)
826            NIR_PASS_V(nir, decompose_attribs, decomposed_attrs, decomposed_attrs_without_w);
827         FALLTHROUGH;
828      }
829      case MESA_SHADER_TESS_EVAL:
830      case MESA_SHADER_GEOMETRY:
831         if (zink_vs_key_base(key)->last_vertex_stage) {
832            if (zs->streamout.have_xfb)
833               streamout = &zs->streamout;
834
835            if (!zink_vs_key_base(key)->clip_halfz) {
836               NIR_PASS_V(nir, nir_lower_clip_halfz);
837            }
838            if (zink_vs_key_base(key)->push_drawid) {
839               NIR_PASS_V(nir, lower_drawid);
840            }
841         }
842         break;
843      case MESA_SHADER_FRAGMENT:
844         if (!zink_fs_key(key)->samples &&
845            nir->info.outputs_written & BITFIELD64_BIT(FRAG_RESULT_SAMPLE_MASK)) {
846            /* VK will always use gl_SampleMask[] values even if sample count is 0,
847            * so we need to skip this write here to mimic GL's behavior of ignoring it
848            */
849            nir_foreach_shader_out_variable(var, nir) {
850               if (var->data.location == FRAG_RESULT_SAMPLE_MASK)
851                  var->data.mode = nir_var_shader_temp;
852            }
853            nir_fixup_deref_modes(nir);
854            NIR_PASS_V(nir, nir_remove_dead_variables, nir_var_shader_temp, NULL);
855            optimize_nir(nir);
856         }
857         if (zink_fs_key(key)->force_dual_color_blend && nir->info.outputs_written & BITFIELD64_BIT(FRAG_RESULT_DATA1)) {
858            NIR_PASS_V(nir, lower_dual_blend);
859         }
860         if (zink_fs_key(key)->coord_replace_bits) {
861            NIR_PASS_V(nir, nir_lower_texcoord_replace, zink_fs_key(key)->coord_replace_bits,
862                     false, zink_fs_key(key)->coord_replace_yinvert);
863         }
864         if (nir->info.fs.uses_fbfetch_output) {
865            nir_variable *fbfetch = NULL;
866            NIR_PASS_V(nir, lower_fbfetch, &fbfetch);
867            /* old variable must be deleted to avoid spirv errors */
868            fbfetch->data.mode = nir_var_shader_temp;
869            nir_fixup_deref_modes(nir);
870            NIR_PASS_V(nir, nir_remove_dead_variables, nir_var_shader_temp, NULL);
871            optimize_nir(nir);
872         }
873         break;
874      default: break;
875      }
876   }
877   NIR_PASS_V(nir, nir_convert_from_ssa, true);
878
879   struct spirv_shader *spirv = nir_to_spirv(nir, streamout, screen->spirv_version);
880   if (!spirv)
881      goto done;
882
883   if (zink_debug & ZINK_DEBUG_SPIRV) {
884      char buf[256];
885      static int i;
886      snprintf(buf, sizeof(buf), "dump%02d.spv", i++);
887      FILE *fp = fopen(buf, "wb");
888      if (fp) {
889         fwrite(spirv->words, sizeof(uint32_t), spirv->num_words, fp);
890         fclose(fp);
891         fprintf(stderr, "wrote '%s'...\n", buf);
892      }
893   }
894
895   VkShaderModuleCreateInfo smci = {0};
896   smci.sType = VK_STRUCTURE_TYPE_SHADER_MODULE_CREATE_INFO;
897   smci.codeSize = spirv->num_words * sizeof(uint32_t);
898   smci.pCode = spirv->words;
899
900   if (VKSCR(CreateShaderModule)(screen->dev, &smci, NULL, &mod) != VK_SUCCESS)
901      mod = VK_NULL_HANDLE;
902
903done:
904   ralloc_free(nir);
905
906   /* TODO: determine if there's any reason to cache spirv output? */
907   ralloc_free(spirv);
908   return mod;
909}
910
911static bool
912lower_baseinstance_instr(nir_builder *b, nir_instr *instr, void *data)
913{
914   if (instr->type != nir_instr_type_intrinsic)
915      return false;
916   nir_intrinsic_instr *intr = nir_instr_as_intrinsic(instr);
917   if (intr->intrinsic != nir_intrinsic_load_instance_id)
918      return false;
919   b->cursor = nir_after_instr(instr);
920   nir_ssa_def *def = nir_isub(b, &intr->dest.ssa, nir_load_base_instance(b));
921   nir_ssa_def_rewrite_uses_after(&intr->dest.ssa, def, def->parent_instr);
922   return true;
923}
924
925static bool
926lower_baseinstance(nir_shader *shader)
927{
928   if (shader->info.stage != MESA_SHADER_VERTEX)
929      return false;
930   return nir_shader_instructions_pass(shader, lower_baseinstance_instr, nir_metadata_dominance, NULL);
931}
932
933bool nir_lower_dynamic_bo_access(nir_shader *shader);
934
935/* gl_nir_lower_buffers makes variables unusable for all UBO/SSBO access
936 * so instead we delete all those broken variables and just make new ones
937 */
938static bool
939unbreak_bos(nir_shader *shader)
940{
941   uint32_t ssbo_used = 0;
942   uint32_t ubo_used = 0;
943   uint64_t max_ssbo_size = 0;
944   uint64_t max_ubo_size = 0;
945   bool ssbo_sizes[PIPE_MAX_SHADER_BUFFERS] = {false};
946
947   if (!shader->info.num_ssbos && !shader->info.num_ubos && !shader->num_uniforms)
948      return false;
949   nir_function_impl *impl = nir_shader_get_entrypoint(shader);
950   nir_foreach_block(block, impl) {
951      nir_foreach_instr(instr, block) {
952         if (instr->type != nir_instr_type_intrinsic)
953            continue;
954
955         nir_intrinsic_instr *intrin = nir_instr_as_intrinsic(instr);
956         switch (intrin->intrinsic) {
957         case nir_intrinsic_store_ssbo:
958            ssbo_used |= BITFIELD_BIT(nir_src_as_uint(intrin->src[1]));
959            break;
960
961         case nir_intrinsic_get_ssbo_size: {
962            uint32_t slot = nir_src_as_uint(intrin->src[0]);
963            ssbo_used |= BITFIELD_BIT(slot);
964            ssbo_sizes[slot] = true;
965            break;
966         }
967         case nir_intrinsic_ssbo_atomic_add:
968         case nir_intrinsic_ssbo_atomic_imin:
969         case nir_intrinsic_ssbo_atomic_umin:
970         case nir_intrinsic_ssbo_atomic_imax:
971         case nir_intrinsic_ssbo_atomic_umax:
972         case nir_intrinsic_ssbo_atomic_and:
973         case nir_intrinsic_ssbo_atomic_or:
974         case nir_intrinsic_ssbo_atomic_xor:
975         case nir_intrinsic_ssbo_atomic_exchange:
976         case nir_intrinsic_ssbo_atomic_comp_swap:
977         case nir_intrinsic_ssbo_atomic_fmin:
978         case nir_intrinsic_ssbo_atomic_fmax:
979         case nir_intrinsic_ssbo_atomic_fcomp_swap:
980         case nir_intrinsic_load_ssbo:
981            ssbo_used |= BITFIELD_BIT(nir_src_as_uint(intrin->src[0]));
982            break;
983         case nir_intrinsic_load_ubo:
984         case nir_intrinsic_load_ubo_vec4:
985            ubo_used |= BITFIELD_BIT(nir_src_as_uint(intrin->src[0]));
986            break;
987         default:
988            break;
989         }
990      }
991   }
992
993   nir_foreach_variable_with_modes(var, shader, nir_var_mem_ssbo | nir_var_mem_ubo) {
994      const struct glsl_type *type = glsl_without_array(var->type);
995      if (type_is_counter(type))
996         continue;
997      unsigned size = glsl_count_attribute_slots(glsl_type_is_array(var->type) ? var->type : type, false);
998      if (var->data.mode == nir_var_mem_ubo)
999         max_ubo_size = MAX2(max_ubo_size, size);
1000      else
1001         max_ssbo_size = MAX2(max_ssbo_size, size);
1002      var->data.mode = nir_var_shader_temp;
1003   }
1004   nir_fixup_deref_modes(shader);
1005   NIR_PASS_V(shader, nir_remove_dead_variables, nir_var_shader_temp, NULL);
1006   optimize_nir(shader);
1007
1008   if (!ssbo_used && !ubo_used)
1009      return false;
1010
1011   struct glsl_struct_field *fields = rzalloc_array(shader, struct glsl_struct_field, 2);
1012   fields[0].name = ralloc_strdup(shader, "base");
1013   fields[1].name = ralloc_strdup(shader, "unsized");
1014   if (ubo_used) {
1015      const struct glsl_type *ubo_type = glsl_array_type(glsl_uint_type(), max_ubo_size * 4, 4);
1016      fields[0].type = ubo_type;
1017      u_foreach_bit(slot, ubo_used) {
1018         char buf[64];
1019         snprintf(buf, sizeof(buf), "ubo_slot_%u", slot);
1020         nir_variable *var = nir_variable_create(shader, nir_var_mem_ubo, glsl_struct_type(fields, 1, "struct", false), buf);
1021         var->interface_type = var->type;
1022         var->data.driver_location = slot;
1023      }
1024   }
1025   if (ssbo_used) {
1026      const struct glsl_type *ssbo_type = glsl_array_type(glsl_uint_type(), max_ssbo_size * 4, 4);
1027      const struct glsl_type *unsized = glsl_array_type(glsl_uint_type(), 0, 4);
1028      fields[0].type = ssbo_type;
1029      u_foreach_bit(slot, ssbo_used) {
1030         char buf[64];
1031         snprintf(buf, sizeof(buf), "ssbo_slot_%u", slot);
1032         if (ssbo_sizes[slot])
1033            fields[1].type = unsized;
1034         else
1035            fields[1].type = NULL;
1036         nir_variable *var = nir_variable_create(shader, nir_var_mem_ssbo,
1037                                                 glsl_struct_type(fields, 1 + !!ssbo_sizes[slot], "struct", false), buf);
1038         var->interface_type = var->type;
1039         var->data.driver_location = slot;
1040      }
1041   }
1042   return true;
1043}
1044
1045/* this is a "default" bindless texture used if the shader has no texture variables */
1046static nir_variable *
1047create_bindless_texture(nir_shader *nir, nir_tex_instr *tex)
1048{
1049   unsigned binding = tex->sampler_dim == GLSL_SAMPLER_DIM_BUF ? 1 : 0;
1050   nir_variable *var;
1051
1052   const struct glsl_type *sampler_type = glsl_sampler_type(tex->sampler_dim, tex->is_shadow, tex->is_array, GLSL_TYPE_FLOAT);
1053   var = nir_variable_create(nir, nir_var_uniform, glsl_array_type(sampler_type, ZINK_MAX_BINDLESS_HANDLES, 0), "bindless_texture");
1054   var->data.descriptor_set = ZINK_DESCRIPTOR_BINDLESS;
1055   var->data.driver_location = var->data.binding = binding;
1056   return var;
1057}
1058
1059/* this is a "default" bindless image used if the shader has no image variables */
1060static nir_variable *
1061create_bindless_image(nir_shader *nir, enum glsl_sampler_dim dim)
1062{
1063   unsigned binding = dim == GLSL_SAMPLER_DIM_BUF ? 3 : 2;
1064   nir_variable *var;
1065
1066   const struct glsl_type *image_type = glsl_image_type(dim, false, GLSL_TYPE_FLOAT);
1067   var = nir_variable_create(nir, nir_var_uniform, glsl_array_type(image_type, ZINK_MAX_BINDLESS_HANDLES, 0), "bindless_image");
1068   var->data.descriptor_set = ZINK_DESCRIPTOR_BINDLESS;
1069   var->data.driver_location = var->data.binding = binding;
1070   var->data.image.format = PIPE_FORMAT_R8G8B8A8_UNORM;
1071   return var;
1072}
1073
1074/* rewrite bindless instructions as array deref instructions */
1075static bool
1076lower_bindless_instr(nir_builder *b, nir_instr *in, void *data)
1077{
1078   nir_variable **bindless = data;
1079
1080   if (in->type == nir_instr_type_tex) {
1081      nir_tex_instr *tex = nir_instr_as_tex(in);
1082      int idx = nir_tex_instr_src_index(tex, nir_tex_src_texture_handle);
1083      if (idx == -1)
1084         return false;
1085
1086      nir_variable *var = tex->sampler_dim == GLSL_SAMPLER_DIM_BUF ? bindless[1] : bindless[0];
1087      if (!var)
1088         var = create_bindless_texture(b->shader, tex);
1089      b->cursor = nir_before_instr(in);
1090      nir_deref_instr *deref = nir_build_deref_var(b, var);
1091      if (glsl_type_is_array(var->type))
1092         deref = nir_build_deref_array(b, deref, nir_u2uN(b, tex->src[idx].src.ssa, 32));
1093      nir_instr_rewrite_src_ssa(in, &tex->src[idx].src, &deref->dest.ssa);
1094
1095      /* bindless sampling uses the variable type directly, which means the tex instr has to exactly
1096       * match up with it in contrast to normal sampler ops where things are a bit more flexible;
1097       * this results in cases where a shader is passed with sampler2DArray but the tex instr only has
1098       * 2 components, which explodes spirv compilation even though it doesn't trigger validation errors
1099       *
1100       * to fix this, pad the coord src here and fix the tex instr so that ntv will do the "right" thing
1101       * - Warhammer 40k: Dawn of War III
1102       */
1103      unsigned needed_components = glsl_get_sampler_coordinate_components(glsl_without_array(var->type));
1104      unsigned c = nir_tex_instr_src_index(tex, nir_tex_src_coord);
1105      unsigned coord_components = nir_src_num_components(tex->src[c].src);
1106      if (coord_components < needed_components) {
1107         nir_ssa_def *def = nir_pad_vector(b, tex->src[c].src.ssa, needed_components);
1108         nir_instr_rewrite_src_ssa(in, &tex->src[c].src, def);
1109         tex->coord_components = needed_components;
1110      }
1111      return true;
1112   }
1113   if (in->type != nir_instr_type_intrinsic)
1114      return false;
1115   nir_intrinsic_instr *instr = nir_instr_as_intrinsic(in);
1116
1117   nir_intrinsic_op op;
1118#define OP_SWAP(OP) \
1119   case nir_intrinsic_bindless_image_##OP: \
1120      op = nir_intrinsic_image_deref_##OP; \
1121      break;
1122
1123
1124   /* convert bindless intrinsics to deref intrinsics */
1125   switch (instr->intrinsic) {
1126   OP_SWAP(atomic_add)
1127   OP_SWAP(atomic_and)
1128   OP_SWAP(atomic_comp_swap)
1129   OP_SWAP(atomic_dec_wrap)
1130   OP_SWAP(atomic_exchange)
1131   OP_SWAP(atomic_fadd)
1132   OP_SWAP(atomic_fmax)
1133   OP_SWAP(atomic_fmin)
1134   OP_SWAP(atomic_imax)
1135   OP_SWAP(atomic_imin)
1136   OP_SWAP(atomic_inc_wrap)
1137   OP_SWAP(atomic_or)
1138   OP_SWAP(atomic_umax)
1139   OP_SWAP(atomic_umin)
1140   OP_SWAP(atomic_xor)
1141   OP_SWAP(format)
1142   OP_SWAP(load)
1143   OP_SWAP(order)
1144   OP_SWAP(samples)
1145   OP_SWAP(size)
1146   OP_SWAP(store)
1147   default:
1148      return false;
1149   }
1150
1151   enum glsl_sampler_dim dim = nir_intrinsic_image_dim(instr);
1152   nir_variable *var = dim == GLSL_SAMPLER_DIM_BUF ? bindless[3] : bindless[2];
1153   if (!var)
1154      var = create_bindless_image(b->shader, dim);
1155   instr->intrinsic = op;
1156   b->cursor = nir_before_instr(in);
1157   nir_deref_instr *deref = nir_build_deref_var(b, var);
1158   if (glsl_type_is_array(var->type))
1159      deref = nir_build_deref_array(b, deref, nir_u2uN(b, instr->src[0].ssa, 32));
1160   nir_instr_rewrite_src_ssa(in, &instr->src[0], &deref->dest.ssa);
1161   return true;
1162}
1163
1164static bool
1165lower_bindless(nir_shader *shader, nir_variable **bindless)
1166{
1167   if (!nir_shader_instructions_pass(shader, lower_bindless_instr, nir_metadata_dominance, bindless))
1168      return false;
1169   nir_fixup_deref_modes(shader);
1170   NIR_PASS_V(shader, nir_remove_dead_variables, nir_var_shader_temp, NULL);
1171   optimize_nir(shader);
1172   return true;
1173}
1174
1175/* convert shader image/texture io variables to int64 handles for bindless indexing */
1176static bool
1177lower_bindless_io_instr(nir_builder *b, nir_instr *in, void *data)
1178{
1179   if (in->type != nir_instr_type_intrinsic)
1180      return false;
1181   nir_intrinsic_instr *instr = nir_instr_as_intrinsic(in);
1182   if (instr->intrinsic != nir_intrinsic_load_deref &&
1183       instr->intrinsic != nir_intrinsic_store_deref)
1184      return false;
1185
1186   nir_deref_instr *src_deref = nir_src_as_deref(instr->src[0]);
1187   nir_variable *var = nir_deref_instr_get_variable(src_deref);
1188   if (var->data.bindless)
1189      return false;
1190   if (var->data.mode != nir_var_shader_in && var->data.mode != nir_var_shader_out)
1191      return false;
1192   if (!glsl_type_is_image(var->type) && !glsl_type_is_sampler(var->type))
1193      return false;
1194
1195   var->type = glsl_int64_t_type();
1196   var->data.bindless = 1;
1197   b->cursor = nir_before_instr(in);
1198   nir_deref_instr *deref = nir_build_deref_var(b, var);
1199   if (instr->intrinsic == nir_intrinsic_load_deref) {
1200       nir_ssa_def *def = nir_load_deref(b, deref);
1201       nir_instr_rewrite_src_ssa(in, &instr->src[0], def);
1202       nir_ssa_def_rewrite_uses(&instr->dest.ssa, def);
1203   } else {
1204      nir_store_deref(b, deref, instr->src[1].ssa, nir_intrinsic_write_mask(instr));
1205   }
1206   nir_instr_remove(in);
1207   nir_instr_remove(&src_deref->instr);
1208   return true;
1209}
1210
1211static bool
1212lower_bindless_io(nir_shader *shader)
1213{
1214   return nir_shader_instructions_pass(shader, lower_bindless_io_instr, nir_metadata_dominance, NULL);
1215}
1216
1217static uint32_t
1218zink_binding(gl_shader_stage stage, VkDescriptorType type, int index)
1219{
1220   if (stage == MESA_SHADER_NONE) {
1221      unreachable("not supported");
1222   } else {
1223      switch (type) {
1224      case VK_DESCRIPTOR_TYPE_UNIFORM_BUFFER:
1225      case VK_DESCRIPTOR_TYPE_UNIFORM_BUFFER_DYNAMIC:
1226         assert(index < PIPE_MAX_CONSTANT_BUFFERS);
1227         return (stage * PIPE_MAX_CONSTANT_BUFFERS) + index;
1228
1229      case VK_DESCRIPTOR_TYPE_COMBINED_IMAGE_SAMPLER:
1230      case VK_DESCRIPTOR_TYPE_UNIFORM_TEXEL_BUFFER:
1231         assert(index < PIPE_MAX_SAMPLERS);
1232         return (stage * PIPE_MAX_SAMPLERS) + index;
1233
1234      case VK_DESCRIPTOR_TYPE_STORAGE_BUFFER:
1235         assert(index < PIPE_MAX_SHADER_BUFFERS);
1236         return (stage * PIPE_MAX_SHADER_BUFFERS) + index;
1237
1238      case VK_DESCRIPTOR_TYPE_STORAGE_IMAGE:
1239      case VK_DESCRIPTOR_TYPE_STORAGE_TEXEL_BUFFER:
1240         assert(index < PIPE_MAX_SHADER_IMAGES);
1241         return (stage * PIPE_MAX_SHADER_IMAGES) + index;
1242
1243      default:
1244         unreachable("unexpected type");
1245      }
1246   }
1247}
1248
1249static void
1250handle_bindless_var(nir_shader *nir, nir_variable *var, const struct glsl_type *type, nir_variable **bindless)
1251{
1252   if (glsl_type_is_struct(type)) {
1253      for (unsigned i = 0; i < glsl_get_length(type); i++)
1254         handle_bindless_var(nir, var, glsl_get_struct_field(type, i), bindless);
1255      return;
1256   }
1257
1258   /* just a random scalar in a struct */
1259   if (!glsl_type_is_image(type) && !glsl_type_is_sampler(type))
1260      return;
1261
1262   VkDescriptorType vktype = glsl_type_is_image(type) ? zink_image_type(type) : zink_sampler_type(type);
1263   unsigned binding;
1264   switch (vktype) {
1265      case VK_DESCRIPTOR_TYPE_COMBINED_IMAGE_SAMPLER:
1266         binding = 0;
1267         break;
1268      case VK_DESCRIPTOR_TYPE_UNIFORM_TEXEL_BUFFER:
1269         binding = 1;
1270         break;
1271      case VK_DESCRIPTOR_TYPE_STORAGE_IMAGE:
1272         binding = 2;
1273         break;
1274      case VK_DESCRIPTOR_TYPE_STORAGE_TEXEL_BUFFER:
1275         binding = 3;
1276         break;
1277      default:
1278         unreachable("unknown");
1279   }
1280   if (!bindless[binding]) {
1281      bindless[binding] = nir_variable_clone(var, nir);
1282      bindless[binding]->data.bindless = 0;
1283      bindless[binding]->data.descriptor_set = ZINK_DESCRIPTOR_BINDLESS;
1284      bindless[binding]->type = glsl_array_type(type, ZINK_MAX_BINDLESS_HANDLES, 0);
1285      bindless[binding]->data.driver_location = bindless[binding]->data.binding = binding;
1286      if (!bindless[binding]->data.image.format)
1287         bindless[binding]->data.image.format = PIPE_FORMAT_R8G8B8A8_UNORM;
1288      nir_shader_add_variable(nir, bindless[binding]);
1289   } else {
1290      assert(glsl_get_sampler_dim(glsl_without_array(bindless[binding]->type)) == glsl_get_sampler_dim(glsl_without_array(var->type)));
1291   }
1292   var->data.mode = nir_var_shader_temp;
1293}
1294
1295static enum pipe_prim_type
1296gl_prim_to_pipe(unsigned primitive_type)
1297{
1298   switch (primitive_type) {
1299   case GL_POINTS:
1300      return PIPE_PRIM_POINTS;
1301   case GL_LINES:
1302   case GL_LINE_LOOP:
1303   case GL_LINE_STRIP:
1304   case GL_LINES_ADJACENCY:
1305   case GL_LINE_STRIP_ADJACENCY:
1306   case GL_ISOLINES:
1307      return PIPE_PRIM_LINES;
1308   default:
1309      return PIPE_PRIM_TRIANGLES;
1310   }
1311}
1312
1313static enum pipe_prim_type
1314get_shader_base_prim_type(struct nir_shader *nir)
1315{
1316   switch (nir->info.stage) {
1317   case MESA_SHADER_GEOMETRY:
1318      return gl_prim_to_pipe(nir->info.gs.output_primitive);
1319   case MESA_SHADER_TESS_EVAL:
1320      return nir->info.tess.point_mode ? PIPE_PRIM_POINTS : gl_prim_to_pipe(nir->info.tess.primitive_mode);
1321   default:
1322      break;
1323   }
1324   return PIPE_PRIM_MAX;
1325}
1326
1327struct zink_shader *
1328zink_shader_create(struct zink_screen *screen, struct nir_shader *nir,
1329                   const struct pipe_stream_output_info *so_info)
1330{
1331   struct zink_shader *ret = CALLOC_STRUCT(zink_shader);
1332   bool have_psiz = false;
1333
1334   ret->hash = _mesa_hash_pointer(ret);
1335   ret->reduced_prim = get_shader_base_prim_type(nir);
1336
1337   ret->programs = _mesa_pointer_set_create(NULL);
1338   simple_mtx_init(&ret->lock, mtx_plain);
1339
1340   nir_variable_mode indirect_derefs_modes = nir_var_function_temp;
1341   if (nir->info.stage == MESA_SHADER_TESS_CTRL ||
1342       nir->info.stage == MESA_SHADER_TESS_EVAL)
1343      indirect_derefs_modes |= nir_var_shader_in | nir_var_shader_out;
1344
1345   NIR_PASS_V(nir, nir_lower_indirect_derefs, indirect_derefs_modes,
1346              UINT32_MAX);
1347
1348   if (nir->info.stage == MESA_SHADER_VERTEX)
1349      create_vs_pushconst(nir);
1350   else if (nir->info.stage == MESA_SHADER_TESS_CTRL ||
1351            nir->info.stage == MESA_SHADER_TESS_EVAL)
1352      NIR_PASS_V(nir, nir_lower_io_arrays_to_elements_no_indirects, false);
1353   else if (nir->info.stage == MESA_SHADER_KERNEL)
1354      create_cs_pushconst(nir);
1355
1356   if (nir->info.stage < MESA_SHADER_FRAGMENT)
1357      have_psiz = check_psiz(nir);
1358   NIR_PASS_V(nir, lower_basevertex);
1359   NIR_PASS_V(nir, lower_work_dim);
1360   NIR_PASS_V(nir, nir_lower_regs_to_ssa);
1361   NIR_PASS_V(nir, lower_baseinstance);
1362
1363   {
1364      nir_lower_subgroups_options subgroup_options = {0};
1365      subgroup_options.lower_to_scalar = true;
1366      subgroup_options.subgroup_size = screen->info.props11.subgroupSize;
1367      subgroup_options.ballot_bit_size = 32;
1368      subgroup_options.ballot_components = 4;
1369      subgroup_options.lower_subgroup_masks = true;
1370      NIR_PASS_V(nir, nir_lower_subgroups, &subgroup_options);
1371   }
1372
1373   optimize_nir(nir);
1374   NIR_PASS_V(nir, nir_remove_dead_variables, nir_var_function_temp, NULL);
1375   NIR_PASS_V(nir, lower_discard_if);
1376   NIR_PASS_V(nir, nir_lower_fragcolor,
1377         nir->info.fs.color_is_dual_source ? 1 : 8);
1378   NIR_PASS_V(nir, lower_64bit_vertex_attribs);
1379   NIR_PASS_V(nir, unbreak_bos);
1380
1381   if (zink_debug & ZINK_DEBUG_NIR) {
1382      fprintf(stderr, "NIR shader:\n---8<---\n");
1383      nir_print_shader(nir, stderr);
1384      fprintf(stderr, "---8<---\n");
1385   }
1386
1387   nir_variable *bindless[4] = {0};
1388   bool has_bindless_io = false;
1389   nir_foreach_variable_with_modes(var, nir, nir_var_shader_in | nir_var_shader_out) {
1390      if (glsl_type_is_image(var->type) || glsl_type_is_sampler(var->type)) {
1391         has_bindless_io = true;
1392         break;
1393      }
1394   }
1395   if (has_bindless_io)
1396      NIR_PASS_V(nir, lower_bindless_io);
1397
1398   foreach_list_typed_reverse_safe(nir_variable, var, node, &nir->variables) {
1399      if (_nir_shader_variable_has_mode(var, nir_var_uniform |
1400                                        nir_var_mem_ubo |
1401                                        nir_var_mem_ssbo)) {
1402         enum zink_descriptor_type ztype;
1403         const struct glsl_type *type = glsl_without_array(var->type);
1404         if (var->data.mode == nir_var_mem_ubo) {
1405            ztype = ZINK_DESCRIPTOR_TYPE_UBO;
1406            /* buffer 0 is a push descriptor */
1407            var->data.descriptor_set = !!var->data.driver_location;
1408            var->data.binding = !var->data.driver_location ? nir->info.stage :
1409                                zink_binding(nir->info.stage,
1410                                             VK_DESCRIPTOR_TYPE_UNIFORM_BUFFER,
1411                                             var->data.driver_location);
1412            assert(var->data.driver_location || var->data.binding < 10);
1413            VkDescriptorType vktype = !var->data.driver_location ? VK_DESCRIPTOR_TYPE_UNIFORM_BUFFER_DYNAMIC : VK_DESCRIPTOR_TYPE_UNIFORM_BUFFER;
1414            int binding = var->data.binding;
1415
1416            ret->bindings[ztype][ret->num_bindings[ztype]].index = var->data.driver_location;
1417            ret->bindings[ztype][ret->num_bindings[ztype]].binding = binding;
1418            ret->bindings[ztype][ret->num_bindings[ztype]].type = vktype;
1419            ret->bindings[ztype][ret->num_bindings[ztype]].size = 1;
1420            ret->ubos_used |= (1 << ret->bindings[ztype][ret->num_bindings[ztype]].index);
1421            ret->num_bindings[ztype]++;
1422         } else if (var->data.mode == nir_var_mem_ssbo) {
1423            ztype = ZINK_DESCRIPTOR_TYPE_SSBO;
1424            var->data.descriptor_set = ztype + 1;
1425            var->data.binding = zink_binding(nir->info.stage,
1426                                             VK_DESCRIPTOR_TYPE_STORAGE_BUFFER,
1427                                             var->data.driver_location);
1428            ret->bindings[ztype][ret->num_bindings[ztype]].index = var->data.driver_location;
1429            ret->ssbos_used |= (1 << ret->bindings[ztype][ret->num_bindings[ztype]].index);
1430            ret->bindings[ztype][ret->num_bindings[ztype]].binding = var->data.binding;
1431            ret->bindings[ztype][ret->num_bindings[ztype]].type = VK_DESCRIPTOR_TYPE_STORAGE_BUFFER;
1432            ret->bindings[ztype][ret->num_bindings[ztype]].size = 1;
1433            ret->num_bindings[ztype]++;
1434         } else {
1435            assert(var->data.mode == nir_var_uniform);
1436            if (var->data.bindless) {
1437               ret->bindless = true;
1438               handle_bindless_var(nir, var, type, bindless);
1439            } else if (glsl_type_is_sampler(type) || glsl_type_is_image(type)) {
1440               VkDescriptorType vktype = glsl_type_is_image(type) ? zink_image_type(type) : zink_sampler_type(type);
1441               ztype = zink_desc_type_from_vktype(vktype);
1442               if (vktype == VK_DESCRIPTOR_TYPE_UNIFORM_TEXEL_BUFFER)
1443                  ret->num_texel_buffers++;
1444               var->data.driver_location = var->data.binding;
1445               var->data.descriptor_set = ztype + 1;
1446               var->data.binding = zink_binding(nir->info.stage, vktype, var->data.driver_location);
1447               ret->bindings[ztype][ret->num_bindings[ztype]].index = var->data.driver_location;
1448               ret->bindings[ztype][ret->num_bindings[ztype]].binding = var->data.binding;
1449               ret->bindings[ztype][ret->num_bindings[ztype]].type = vktype;
1450               if (glsl_type_is_array(var->type))
1451                  ret->bindings[ztype][ret->num_bindings[ztype]].size = glsl_get_aoa_size(var->type);
1452               else
1453                  ret->bindings[ztype][ret->num_bindings[ztype]].size = 1;
1454               ret->num_bindings[ztype]++;
1455            }
1456         }
1457      }
1458   }
1459   bool bindless_lowered = false;
1460   NIR_PASS(bindless_lowered, nir, lower_bindless, bindless);
1461   ret->bindless |= bindless_lowered;
1462
1463   ret->nir = nir;
1464   if (so_info && nir->info.outputs_written && nir->info.has_transform_feedback_varyings)
1465      update_so_info(ret, so_info, nir->info.outputs_written, have_psiz);
1466
1467   return ret;
1468}
1469
1470char *
1471zink_shader_finalize(struct pipe_screen *pscreen, void *nirptr)
1472{
1473   struct zink_screen *screen = zink_screen(pscreen);
1474   nir_shader *nir = nirptr;
1475
1476   if (!screen->info.feats.features.shaderImageGatherExtended) {
1477      nir_lower_tex_options tex_opts = {0};
1478      tex_opts.lower_tg4_offsets = true;
1479      NIR_PASS_V(nir, nir_lower_tex, &tex_opts);
1480   }
1481   NIR_PASS_V(nir, nir_lower_uniforms_to_ubo, true, false);
1482   if (nir->info.stage == MESA_SHADER_GEOMETRY)
1483      NIR_PASS_V(nir, nir_lower_gs_intrinsics, nir_lower_gs_intrinsics_per_stream);
1484   optimize_nir(nir);
1485   if (nir->info.num_ubos || nir->info.num_ssbos)
1486      NIR_PASS_V(nir, nir_lower_dynamic_bo_access);
1487   nir_shader_gather_info(nir, nir_shader_get_entrypoint(nir));
1488   if (screen->driconf.inline_uniforms)
1489      nir_find_inlinable_uniforms(nir);
1490
1491   return NULL;
1492}
1493
1494void
1495zink_shader_free(struct zink_context *ctx, struct zink_shader *shader)
1496{
1497   struct zink_screen *screen = zink_screen(ctx->base.screen);
1498   set_foreach(shader->programs, entry) {
1499      if (shader->nir->info.stage == MESA_SHADER_COMPUTE) {
1500         struct zink_compute_program *comp = (void*)entry->key;
1501         if (!comp->base.removed) {
1502            _mesa_hash_table_remove_key(&ctx->compute_program_cache, comp->shader);
1503            comp->base.removed = true;
1504         }
1505         comp->shader = NULL;
1506         zink_compute_program_reference(screen, &comp, NULL);
1507      } else {
1508         struct zink_gfx_program *prog = (void*)entry->key;
1509         enum pipe_shader_type pstage = pipe_shader_type_from_mesa(shader->nir->info.stage);
1510         assert(pstage < ZINK_SHADER_COUNT);
1511         if (!prog->base.removed && (shader->nir->info.stage != MESA_SHADER_TESS_CTRL || !shader->is_generated)) {
1512            _mesa_hash_table_remove_key(&ctx->program_cache[prog->stages_present >> 2], prog->shaders);
1513            prog->base.removed = true;
1514         }
1515         prog->shaders[pstage] = NULL;
1516         if (shader->nir->info.stage == MESA_SHADER_TESS_EVAL && shader->generated)
1517            /* automatically destroy generated tcs shaders when tes is destroyed */
1518            zink_shader_free(ctx, shader->generated);
1519         zink_gfx_program_reference(screen, &prog, NULL);
1520      }
1521   }
1522   _mesa_set_destroy(shader->programs, NULL);
1523   ralloc_free(shader->nir);
1524   FREE(shader);
1525}
1526
1527
1528/* creating a passthrough tcs shader that's roughly:
1529
1530#version 150
1531#extension GL_ARB_tessellation_shader : require
1532
1533in vec4 some_var[gl_MaxPatchVertices];
1534out vec4 some_var_out;
1535
1536layout(push_constant) uniform tcsPushConstants {
1537    layout(offset = 0) float TessLevelInner[2];
1538    layout(offset = 8) float TessLevelOuter[4];
1539} u_tcsPushConstants;
1540layout(vertices = $vertices_per_patch) out;
1541void main()
1542{
1543  gl_TessLevelInner = u_tcsPushConstants.TessLevelInner;
1544  gl_TessLevelOuter = u_tcsPushConstants.TessLevelOuter;
1545  some_var_out = some_var[gl_InvocationID];
1546}
1547
1548*/
1549struct zink_shader *
1550zink_shader_tcs_create(struct zink_screen *screen, struct zink_shader *vs, unsigned vertices_per_patch)
1551{
1552   struct zink_shader *ret = CALLOC_STRUCT(zink_shader);
1553   ret->hash = _mesa_hash_pointer(ret);
1554   ret->programs = _mesa_pointer_set_create(NULL);
1555   simple_mtx_init(&ret->lock, mtx_plain);
1556
1557   nir_shader *nir = nir_shader_create(NULL, MESA_SHADER_TESS_CTRL, &screen->nir_options, NULL);
1558   nir_function *fn = nir_function_create(nir, "main");
1559   fn->is_entrypoint = true;
1560   nir_function_impl *impl = nir_function_impl_create(fn);
1561
1562   nir_builder b;
1563   nir_builder_init(&b, impl);
1564   b.cursor = nir_before_block(nir_start_block(impl));
1565
1566   nir_ssa_def *invocation_id = nir_load_invocation_id(&b);
1567
1568   nir_foreach_shader_out_variable(var, vs->nir) {
1569      const struct glsl_type *type = var->type;
1570      const struct glsl_type *in_type = var->type;
1571      const struct glsl_type *out_type = var->type;
1572      char buf[1024];
1573      snprintf(buf, sizeof(buf), "%s_out", var->name);
1574      in_type = glsl_array_type(type, 32 /* MAX_PATCH_VERTICES */, 0);
1575      out_type = glsl_array_type(type, vertices_per_patch, 0);
1576
1577      nir_variable *in = nir_variable_create(nir, nir_var_shader_in, in_type, var->name);
1578      nir_variable *out = nir_variable_create(nir, nir_var_shader_out, out_type, buf);
1579      out->data.location = in->data.location = var->data.location;
1580      out->data.location_frac = in->data.location_frac = var->data.location_frac;
1581
1582      /* gl_in[] receives values from equivalent built-in output
1583         variables written by the vertex shader (section 2.14.7).  Each array
1584         element of gl_in[] is a structure holding values for a specific vertex of
1585         the input patch.  The length of gl_in[] is equal to the
1586         implementation-dependent maximum patch size (gl_MaxPatchVertices).
1587         - ARB_tessellation_shader
1588       */
1589      for (unsigned i = 0; i < vertices_per_patch; i++) {
1590         /* we need to load the invocation-specific value of the vertex output and then store it to the per-patch output */
1591         nir_if *start_block = nir_push_if(&b, nir_ieq(&b, invocation_id, nir_imm_int(&b, i)));
1592         nir_deref_instr *in_array_var = nir_build_deref_array(&b, nir_build_deref_var(&b, in), invocation_id);
1593         nir_ssa_def *load = nir_load_deref(&b, in_array_var);
1594         nir_deref_instr *out_array_var = nir_build_deref_array_imm(&b, nir_build_deref_var(&b, out), i);
1595         nir_store_deref(&b, out_array_var, load, 0xff);
1596         nir_pop_if(&b, start_block);
1597      }
1598   }
1599   nir_variable *gl_TessLevelInner = nir_variable_create(nir, nir_var_shader_out, glsl_array_type(glsl_float_type(), 2, 0), "gl_TessLevelInner");
1600   gl_TessLevelInner->data.location = VARYING_SLOT_TESS_LEVEL_INNER;
1601   gl_TessLevelInner->data.patch = 1;
1602   nir_variable *gl_TessLevelOuter = nir_variable_create(nir, nir_var_shader_out, glsl_array_type(glsl_float_type(), 4, 0), "gl_TessLevelOuter");
1603   gl_TessLevelOuter->data.location = VARYING_SLOT_TESS_LEVEL_OUTER;
1604   gl_TessLevelOuter->data.patch = 1;
1605
1606   /* hacks so we can size these right for now */
1607   struct glsl_struct_field *fields = rzalloc_array(nir, struct glsl_struct_field, 3);
1608   /* just use a single blob for padding here because it's easier */
1609   fields[0].type = glsl_array_type(glsl_uint_type(), offsetof(struct zink_gfx_push_constant, default_inner_level) / 4, 0);
1610   fields[0].name = ralloc_asprintf(nir, "padding");
1611   fields[0].offset = 0;
1612   fields[1].type = glsl_array_type(glsl_uint_type(), 2, 0);
1613   fields[1].name = ralloc_asprintf(nir, "gl_TessLevelInner");
1614   fields[1].offset = offsetof(struct zink_gfx_push_constant, default_inner_level);
1615   fields[2].type = glsl_array_type(glsl_uint_type(), 4, 0);
1616   fields[2].name = ralloc_asprintf(nir, "gl_TessLevelOuter");
1617   fields[2].offset = offsetof(struct zink_gfx_push_constant, default_outer_level);
1618   nir_variable *pushconst = nir_variable_create(nir, nir_var_mem_push_const,
1619                                                 glsl_struct_type(fields, 3, "struct", false), "pushconst");
1620   pushconst->data.location = VARYING_SLOT_VAR0;
1621
1622   nir_ssa_def *load_inner = nir_load_push_constant(&b, 2, 32, nir_imm_int(&b, 1), .base = 1, .range = 8);
1623   nir_ssa_def *load_outer = nir_load_push_constant(&b, 4, 32, nir_imm_int(&b, 2), .base = 2, .range = 16);
1624
1625   for (unsigned i = 0; i < 2; i++) {
1626      nir_deref_instr *store_idx = nir_build_deref_array_imm(&b, nir_build_deref_var(&b, gl_TessLevelInner), i);
1627      nir_store_deref(&b, store_idx, nir_channel(&b, load_inner, i), 0xff);
1628   }
1629   for (unsigned i = 0; i < 4; i++) {
1630      nir_deref_instr *store_idx = nir_build_deref_array_imm(&b, nir_build_deref_var(&b, gl_TessLevelOuter), i);
1631      nir_store_deref(&b, store_idx, nir_channel(&b, load_outer, i), 0xff);
1632   }
1633
1634   nir->info.tess.tcs_vertices_out = vertices_per_patch;
1635   nir_validate_shader(nir, "created");
1636
1637   NIR_PASS_V(nir, nir_lower_regs_to_ssa);
1638   optimize_nir(nir);
1639   NIR_PASS_V(nir, nir_remove_dead_variables, nir_var_function_temp, NULL);
1640   NIR_PASS_V(nir, lower_discard_if);
1641   NIR_PASS_V(nir, nir_convert_from_ssa, true);
1642
1643   ret->nir = nir;
1644   ret->is_generated = true;
1645   return ret;
1646}
1647