1/*
2 * Copyright © 2019 Valve Corporation.
3 * Copyright © 2016 Red Hat.
4 * Copyright © 2016 Bas Nieuwenhuizen
5 *
6 * based in part on anv driver which is:
7 * Copyright © 2015 Intel Corporation
8 *
9 * Permission is hereby granted, free of charge, to any person obtaining a
10 * copy of this software and associated documentation files (the "Software"),
11 * to deal in the Software without restriction, including without limitation
12 * the rights to use, copy, modify, merge, publish, distribute, sublicense,
13 * and/or sell copies of the Software, and to permit persons to whom the
14 * Software is furnished to do so, subject to the following conditions:
15 *
16 * The above copyright notice and this permission notice (including the next
17 * paragraph) shall be included in all copies or substantial portions of the
18 * Software.
19 *
20 * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
21 * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
22 * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT.  IN NO EVENT SHALL
23 * THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
24 * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING
25 * FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS
26 * IN THE SOFTWARE.
27 */
28
29#include "radv_shader_args.h"
30#include "radv_private.h"
31#include "radv_shader.h"
32
33static void
34set_loc(struct radv_userdata_info *ud_info, uint8_t *sgpr_idx, uint8_t num_sgprs)
35{
36   ud_info->sgpr_idx = *sgpr_idx;
37   ud_info->num_sgprs = num_sgprs;
38   *sgpr_idx += num_sgprs;
39}
40
41static void
42set_loc_shader(struct radv_shader_args *args, int idx, uint8_t *sgpr_idx, uint8_t num_sgprs)
43{
44   struct radv_userdata_info *ud_info = &args->shader_info->user_sgprs_locs.shader_data[idx];
45   assert(ud_info);
46
47   set_loc(ud_info, sgpr_idx, num_sgprs);
48}
49
50static void
51set_loc_shader_ptr(struct radv_shader_args *args, int idx, uint8_t *sgpr_idx)
52{
53   bool use_32bit_pointers = idx != AC_UD_SCRATCH_RING_OFFSETS;
54
55   set_loc_shader(args, idx, sgpr_idx, use_32bit_pointers ? 1 : 2);
56}
57
58static void
59set_loc_desc(struct radv_shader_args *args, int idx, uint8_t *sgpr_idx)
60{
61   struct radv_userdata_locations *locs = &args->shader_info->user_sgprs_locs;
62   struct radv_userdata_info *ud_info = &locs->descriptor_sets[idx];
63   assert(ud_info);
64
65   set_loc(ud_info, sgpr_idx, 1);
66
67   locs->descriptor_sets_enabled |= 1u << idx;
68}
69
70struct user_sgpr_info {
71   bool indirect_all_descriptor_sets;
72   uint8_t remaining_sgprs;
73   unsigned num_inline_push_consts;
74   bool inlined_all_push_consts;
75};
76
77static bool
78needs_view_index_sgpr(struct radv_shader_args *args, gl_shader_stage stage)
79{
80   switch (stage) {
81   case MESA_SHADER_VERTEX:
82      if (args->shader_info->uses_view_index ||
83          (!args->shader_info->vs.as_es && !args->shader_info->vs.as_ls &&
84           args->options->key.has_multiview_view_index))
85         return true;
86      break;
87   case MESA_SHADER_TESS_EVAL:
88      if (args->shader_info->uses_view_index ||
89          (!args->shader_info->tes.as_es && args->options->key.has_multiview_view_index))
90         return true;
91      break;
92   case MESA_SHADER_TESS_CTRL:
93      if (args->shader_info->uses_view_index)
94         return true;
95      break;
96   case MESA_SHADER_GEOMETRY:
97      if (args->shader_info->uses_view_index ||
98          (args->shader_info->is_ngg && args->options->key.has_multiview_view_index))
99         return true;
100      break;
101   default:
102      break;
103   }
104   return false;
105}
106
107static uint8_t
108count_vs_user_sgprs(struct radv_shader_args *args)
109{
110   uint8_t count = 1; /* vertex offset */
111
112   if (args->shader_info->vs.vb_desc_usage_mask)
113      count++;
114   if (args->shader_info->vs.needs_draw_id)
115      count++;
116   if (args->shader_info->vs.needs_base_instance)
117      count++;
118
119   return count;
120}
121
122static unsigned
123count_ngg_sgprs(struct radv_shader_args *args, bool has_api_gs)
124{
125   unsigned count = 0;
126
127   if (has_api_gs)
128      count += 1; /* ngg_gs_state */
129   if (args->shader_info->has_ngg_culling)
130      count += 5; /* ngg_culling_settings + 4x ngg_viewport_* */
131
132   return count;
133}
134
135static void
136allocate_inline_push_consts(struct radv_shader_args *args, struct user_sgpr_info *user_sgpr_info)
137{
138   uint8_t remaining_sgprs = user_sgpr_info->remaining_sgprs;
139
140   /* Only supported if shaders use push constants. */
141   if (args->shader_info->min_push_constant_used == UINT8_MAX)
142      return;
143
144   /* Only supported if shaders don't have indirect push constants. */
145   if (args->shader_info->has_indirect_push_constants)
146      return;
147
148   /* Only supported for 32-bit push constants. */
149   if (!args->shader_info->has_only_32bit_push_constants)
150      return;
151
152   uint8_t num_push_consts =
153      (args->shader_info->max_push_constant_used - args->shader_info->min_push_constant_used) / 4;
154
155   /* Check if the number of user SGPRs is large enough. */
156   if (num_push_consts < remaining_sgprs) {
157      user_sgpr_info->num_inline_push_consts = num_push_consts;
158   } else {
159      user_sgpr_info->num_inline_push_consts = remaining_sgprs;
160   }
161
162   /* Clamp to the maximum number of allowed inlined push constants. */
163   if (user_sgpr_info->num_inline_push_consts > AC_MAX_INLINE_PUSH_CONSTS)
164      user_sgpr_info->num_inline_push_consts = AC_MAX_INLINE_PUSH_CONSTS;
165
166   if (user_sgpr_info->num_inline_push_consts == num_push_consts &&
167       !args->shader_info->loads_dynamic_offsets) {
168      /* Disable the default push constants path if all constants are
169       * inlined and if shaders don't use dynamic descriptors.
170       */
171      user_sgpr_info->inlined_all_push_consts = true;
172   }
173}
174
175static void
176allocate_user_sgprs(struct radv_shader_args *args, gl_shader_stage stage, bool has_previous_stage,
177                    gl_shader_stage previous_stage, bool needs_view_index, bool has_api_gs,
178                    struct user_sgpr_info *user_sgpr_info)
179{
180   uint8_t user_sgpr_count = 0;
181
182   memset(user_sgpr_info, 0, sizeof(struct user_sgpr_info));
183
184   /* 2 user sgprs will always be allocated for scratch/rings */
185   user_sgpr_count += 2;
186
187   /* prolog inputs */
188   if (args->shader_info->vs.has_prolog)
189      user_sgpr_count += 2;
190
191   switch (stage) {
192   case MESA_SHADER_COMPUTE:
193      if (args->shader_info->cs.uses_sbt)
194         user_sgpr_count += 1;
195      if (args->shader_info->cs.uses_grid_size)
196         user_sgpr_count += 3;
197      if (args->shader_info->cs.uses_ray_launch_size)
198         user_sgpr_count += 3;
199      break;
200   case MESA_SHADER_FRAGMENT:
201      break;
202   case MESA_SHADER_VERTEX:
203      if (!args->is_gs_copy_shader)
204         user_sgpr_count += count_vs_user_sgprs(args);
205      break;
206   case MESA_SHADER_TESS_CTRL:
207      if (has_previous_stage) {
208         if (previous_stage == MESA_SHADER_VERTEX)
209            user_sgpr_count += count_vs_user_sgprs(args);
210      }
211      break;
212   case MESA_SHADER_TESS_EVAL:
213      break;
214   case MESA_SHADER_GEOMETRY:
215      if (has_previous_stage) {
216         if (args->shader_info->is_ngg)
217            user_sgpr_count += count_ngg_sgprs(args, has_api_gs);
218
219         if (previous_stage == MESA_SHADER_VERTEX) {
220            user_sgpr_count += count_vs_user_sgprs(args);
221         }
222      }
223      break;
224   default:
225      break;
226   }
227
228   if (needs_view_index)
229      user_sgpr_count++;
230
231   if (args->shader_info->loads_push_constants)
232      user_sgpr_count++;
233
234   if (args->shader_info->so.num_outputs)
235      user_sgpr_count++;
236
237   uint32_t available_sgprs =
238      args->options->chip_class >= GFX9 && stage != MESA_SHADER_COMPUTE ? 32 : 16;
239   uint32_t remaining_sgprs = available_sgprs - user_sgpr_count;
240   uint32_t num_desc_set = util_bitcount(args->shader_info->desc_set_used_mask);
241
242   if (remaining_sgprs < num_desc_set) {
243      user_sgpr_info->indirect_all_descriptor_sets = true;
244      user_sgpr_info->remaining_sgprs = remaining_sgprs - 1;
245   } else {
246      user_sgpr_info->remaining_sgprs = remaining_sgprs - num_desc_set;
247   }
248
249   allocate_inline_push_consts(args, user_sgpr_info);
250}
251
252static void
253declare_global_input_sgprs(struct radv_shader_args *args,
254                           const struct user_sgpr_info *user_sgpr_info)
255{
256   /* 1 for each descriptor set */
257   if (!user_sgpr_info->indirect_all_descriptor_sets) {
258      uint32_t mask = args->shader_info->desc_set_used_mask;
259
260      while (mask) {
261         int i = u_bit_scan(&mask);
262
263         ac_add_arg(&args->ac, AC_ARG_SGPR, 1, AC_ARG_CONST_PTR, &args->descriptor_sets[i]);
264      }
265   } else {
266      ac_add_arg(&args->ac, AC_ARG_SGPR, 1, AC_ARG_CONST_PTR_PTR, &args->descriptor_sets[0]);
267   }
268
269   if (args->shader_info->loads_push_constants && !user_sgpr_info->inlined_all_push_consts) {
270      /* 1 for push constants and dynamic descriptors */
271      ac_add_arg(&args->ac, AC_ARG_SGPR, 1, AC_ARG_CONST_PTR, &args->ac.push_constants);
272   }
273
274   for (unsigned i = 0; i < user_sgpr_info->num_inline_push_consts; i++) {
275      ac_add_arg(&args->ac, AC_ARG_SGPR, 1, AC_ARG_INT, &args->ac.inline_push_consts[i]);
276   }
277   args->ac.base_inline_push_consts = args->shader_info->min_push_constant_used / 4;
278
279   if (args->shader_info->so.num_outputs) {
280      ac_add_arg(&args->ac, AC_ARG_SGPR, 1, AC_ARG_CONST_DESC_PTR, &args->streamout_buffers);
281   }
282}
283
284static void
285declare_vs_specific_input_sgprs(struct radv_shader_args *args, gl_shader_stage stage,
286                                bool has_previous_stage, gl_shader_stage previous_stage)
287{
288   if (args->shader_info->vs.has_prolog)
289      ac_add_arg(&args->ac, AC_ARG_SGPR, 2, AC_ARG_INT, &args->prolog_inputs);
290
291   if (!args->is_gs_copy_shader && (stage == MESA_SHADER_VERTEX ||
292                                    (has_previous_stage && previous_stage == MESA_SHADER_VERTEX))) {
293      if (args->shader_info->vs.vb_desc_usage_mask) {
294         ac_add_arg(&args->ac, AC_ARG_SGPR, 1, AC_ARG_CONST_DESC_PTR, &args->ac.vertex_buffers);
295      }
296      ac_add_arg(&args->ac, AC_ARG_SGPR, 1, AC_ARG_INT, &args->ac.base_vertex);
297      if (args->shader_info->vs.needs_draw_id) {
298         ac_add_arg(&args->ac, AC_ARG_SGPR, 1, AC_ARG_INT, &args->ac.draw_id);
299      }
300      if (args->shader_info->vs.needs_base_instance) {
301         ac_add_arg(&args->ac, AC_ARG_SGPR, 1, AC_ARG_INT, &args->ac.start_instance);
302      }
303   }
304}
305
306static void
307declare_vs_input_vgprs(struct radv_shader_args *args)
308{
309   ac_add_arg(&args->ac, AC_ARG_VGPR, 1, AC_ARG_INT, &args->ac.vertex_id);
310   if (!args->is_gs_copy_shader) {
311      if (args->shader_info->vs.as_ls) {
312         ac_add_arg(&args->ac, AC_ARG_VGPR, 1, AC_ARG_INT, &args->ac.vs_rel_patch_id);
313         if (args->options->chip_class >= GFX10) {
314            ac_add_arg(&args->ac, AC_ARG_VGPR, 1, AC_ARG_INT, NULL); /* user vgpr */
315            ac_add_arg(&args->ac, AC_ARG_VGPR, 1, AC_ARG_INT, &args->ac.instance_id);
316         } else {
317            ac_add_arg(&args->ac, AC_ARG_VGPR, 1, AC_ARG_INT, &args->ac.instance_id);
318            ac_add_arg(&args->ac, AC_ARG_VGPR, 1, AC_ARG_INT, NULL); /* unused */
319         }
320      } else {
321         if (args->options->chip_class >= GFX10) {
322            if (args->shader_info->is_ngg) {
323               ac_add_arg(&args->ac, AC_ARG_VGPR, 1, AC_ARG_INT, NULL); /* user vgpr */
324               ac_add_arg(&args->ac, AC_ARG_VGPR, 1, AC_ARG_INT, NULL); /* user vgpr */
325               ac_add_arg(&args->ac, AC_ARG_VGPR, 1, AC_ARG_INT, &args->ac.instance_id);
326            } else {
327               ac_add_arg(&args->ac, AC_ARG_VGPR, 1, AC_ARG_INT, NULL); /* unused */
328               ac_add_arg(&args->ac, AC_ARG_VGPR, 1, AC_ARG_INT, &args->ac.vs_prim_id);
329               ac_add_arg(&args->ac, AC_ARG_VGPR, 1, AC_ARG_INT, &args->ac.instance_id);
330            }
331         } else {
332            ac_add_arg(&args->ac, AC_ARG_VGPR, 1, AC_ARG_INT, &args->ac.instance_id);
333            ac_add_arg(&args->ac, AC_ARG_VGPR, 1, AC_ARG_INT, &args->ac.vs_prim_id);
334            ac_add_arg(&args->ac, AC_ARG_VGPR, 1, AC_ARG_INT, NULL); /* unused */
335         }
336      }
337   }
338
339   if (args->shader_info->vs.dynamic_inputs) {
340      assert(args->shader_info->vs.use_per_attribute_vb_descs);
341      unsigned num_attributes = util_last_bit(args->shader_info->vs.vb_desc_usage_mask);
342      for (unsigned i = 0; i < num_attributes; i++)
343         ac_add_arg(&args->ac, AC_ARG_VGPR, 4, AC_ARG_INT, &args->vs_inputs[i]);
344      /* Ensure the main shader doesn't use less vgprs than the prolog. The prolog requires one
345       * VGPR more than the number of shader arguments in the case of non-trivial divisors on GFX8.
346       */
347      ac_add_arg(&args->ac, AC_ARG_VGPR, 1, AC_ARG_INT, NULL);
348   }
349}
350
351static void
352declare_streamout_sgprs(struct radv_shader_args *args, gl_shader_stage stage)
353{
354   int i;
355
356   /* Streamout SGPRs. */
357   if (args->shader_info->so.num_outputs) {
358      assert(stage == MESA_SHADER_VERTEX || stage == MESA_SHADER_TESS_EVAL);
359
360      ac_add_arg(&args->ac, AC_ARG_SGPR, 1, AC_ARG_INT, &args->ac.streamout_config);
361      ac_add_arg(&args->ac, AC_ARG_SGPR, 1, AC_ARG_INT, &args->ac.streamout_write_index);
362   } else if (stage == MESA_SHADER_TESS_EVAL) {
363      ac_add_arg(&args->ac, AC_ARG_SGPR, 1, AC_ARG_INT, NULL);
364   }
365
366   /* A streamout buffer offset is loaded if the stride is non-zero. */
367   for (i = 0; i < 4; i++) {
368      if (!args->shader_info->so.strides[i])
369         continue;
370
371      ac_add_arg(&args->ac, AC_ARG_SGPR, 1, AC_ARG_INT, &args->ac.streamout_offset[i]);
372   }
373}
374
375static void
376declare_tes_input_vgprs(struct radv_shader_args *args)
377{
378   ac_add_arg(&args->ac, AC_ARG_VGPR, 1, AC_ARG_FLOAT, &args->ac.tes_u);
379   ac_add_arg(&args->ac, AC_ARG_VGPR, 1, AC_ARG_FLOAT, &args->ac.tes_v);
380   ac_add_arg(&args->ac, AC_ARG_VGPR, 1, AC_ARG_INT, &args->ac.tes_rel_patch_id);
381   ac_add_arg(&args->ac, AC_ARG_VGPR, 1, AC_ARG_INT, &args->ac.tes_patch_id);
382}
383
384static void
385declare_ps_input_vgprs(struct radv_shader_args *args)
386{
387   unsigned spi_ps_input = args->shader_info->ps.spi_ps_input;
388
389   ac_add_arg(&args->ac, AC_ARG_VGPR, 2, AC_ARG_INT, &args->ac.persp_sample);
390   ac_add_arg(&args->ac, AC_ARG_VGPR, 2, AC_ARG_INT, &args->ac.persp_center);
391   ac_add_arg(&args->ac, AC_ARG_VGPR, 2, AC_ARG_INT, &args->ac.persp_centroid);
392   ac_add_arg(&args->ac, AC_ARG_VGPR, 3, AC_ARG_INT, &args->ac.pull_model);
393   ac_add_arg(&args->ac, AC_ARG_VGPR, 2, AC_ARG_INT, &args->ac.linear_sample);
394   ac_add_arg(&args->ac, AC_ARG_VGPR, 2, AC_ARG_INT, &args->ac.linear_center);
395   ac_add_arg(&args->ac, AC_ARG_VGPR, 2, AC_ARG_INT, &args->ac.linear_centroid);
396   ac_add_arg(&args->ac, AC_ARG_VGPR, 1, AC_ARG_FLOAT, NULL); /* line stipple tex */
397   ac_add_arg(&args->ac, AC_ARG_VGPR, 1, AC_ARG_FLOAT, &args->ac.frag_pos[0]);
398   ac_add_arg(&args->ac, AC_ARG_VGPR, 1, AC_ARG_FLOAT, &args->ac.frag_pos[1]);
399   ac_add_arg(&args->ac, AC_ARG_VGPR, 1, AC_ARG_FLOAT, &args->ac.frag_pos[2]);
400   ac_add_arg(&args->ac, AC_ARG_VGPR, 1, AC_ARG_FLOAT, &args->ac.frag_pos[3]);
401   ac_add_arg(&args->ac, AC_ARG_VGPR, 1, AC_ARG_INT, &args->ac.front_face);
402   ac_add_arg(&args->ac, AC_ARG_VGPR, 1, AC_ARG_INT, &args->ac.ancillary);
403   ac_add_arg(&args->ac, AC_ARG_VGPR, 1, AC_ARG_INT, &args->ac.sample_coverage);
404   ac_add_arg(&args->ac, AC_ARG_VGPR, 1, AC_ARG_INT, NULL); /* fixed pt */
405
406   if (args->options->remap_spi_ps_input) {
407      /* LLVM optimizes away unused FS inputs and computes spi_ps_input_addr itself and then
408       * communicates the results back via the ELF binary. Mirror what LLVM does by re-mapping the
409       * VGPR arguments here.
410       */
411      unsigned arg_count = 0;
412      for (unsigned i = 0, vgpr_arg = 0, vgpr_reg = 0; i < args->ac.arg_count; i++) {
413         if (args->ac.args[i].file != AC_ARG_VGPR) {
414            arg_count++;
415            continue;
416         }
417
418         if (!(spi_ps_input & (1 << vgpr_arg))) {
419            args->ac.args[i].skip = true;
420         } else {
421            args->ac.args[i].offset = vgpr_reg;
422            vgpr_reg += args->ac.args[i].size;
423            arg_count++;
424         }
425         vgpr_arg++;
426      }
427   }
428}
429
430static void
431declare_ngg_sgprs(struct radv_shader_args *args, bool has_api_gs)
432{
433   if (has_api_gs) {
434      ac_add_arg(&args->ac, AC_ARG_SGPR, 1, AC_ARG_INT, &args->ngg_gs_state);
435   }
436
437   if (args->shader_info->has_ngg_culling) {
438      ac_add_arg(&args->ac, AC_ARG_SGPR, 1, AC_ARG_INT, &args->ngg_culling_settings);
439      ac_add_arg(&args->ac, AC_ARG_SGPR, 1, AC_ARG_INT, &args->ngg_viewport_scale[0]);
440      ac_add_arg(&args->ac, AC_ARG_SGPR, 1, AC_ARG_INT, &args->ngg_viewport_scale[1]);
441      ac_add_arg(&args->ac, AC_ARG_SGPR, 1, AC_ARG_INT, &args->ngg_viewport_translate[0]);
442      ac_add_arg(&args->ac, AC_ARG_SGPR, 1, AC_ARG_INT, &args->ngg_viewport_translate[1]);
443   }
444}
445
446static void
447set_global_input_locs(struct radv_shader_args *args, const struct user_sgpr_info *user_sgpr_info,
448                      uint8_t *user_sgpr_idx)
449{
450   unsigned num_inline_push_consts = 0;
451
452   if (!user_sgpr_info->indirect_all_descriptor_sets) {
453      for (unsigned i = 0; i < ARRAY_SIZE(args->descriptor_sets); i++) {
454         if (args->descriptor_sets[i].used)
455            set_loc_desc(args, i, user_sgpr_idx);
456      }
457   } else {
458      set_loc_shader_ptr(args, AC_UD_INDIRECT_DESCRIPTOR_SETS, user_sgpr_idx);
459   }
460
461   if (args->ac.push_constants.used) {
462      set_loc_shader_ptr(args, AC_UD_PUSH_CONSTANTS, user_sgpr_idx);
463   }
464
465   for (unsigned i = 0; i < ARRAY_SIZE(args->ac.inline_push_consts); i++) {
466      if (args->ac.inline_push_consts[i].used)
467         num_inline_push_consts++;
468   }
469
470   if (num_inline_push_consts) {
471      set_loc_shader(args, AC_UD_INLINE_PUSH_CONSTANTS, user_sgpr_idx, num_inline_push_consts);
472   }
473
474   if (args->streamout_buffers.used) {
475      set_loc_shader_ptr(args, AC_UD_STREAMOUT_BUFFERS, user_sgpr_idx);
476   }
477}
478
479static void
480set_vs_specific_input_locs(struct radv_shader_args *args, gl_shader_stage stage,
481                           bool has_previous_stage, gl_shader_stage previous_stage,
482                           uint8_t *user_sgpr_idx)
483{
484   if (args->prolog_inputs.used)
485      set_loc_shader(args, AC_UD_VS_PROLOG_INPUTS, user_sgpr_idx, 2);
486
487   if (!args->is_gs_copy_shader && (stage == MESA_SHADER_VERTEX ||
488                                    (has_previous_stage && previous_stage == MESA_SHADER_VERTEX))) {
489      if (args->ac.vertex_buffers.used) {
490         set_loc_shader_ptr(args, AC_UD_VS_VERTEX_BUFFERS, user_sgpr_idx);
491      }
492
493      unsigned vs_num = args->ac.base_vertex.used + args->ac.draw_id.used +
494                        args->ac.start_instance.used;
495      set_loc_shader(args, AC_UD_VS_BASE_VERTEX_START_INSTANCE, user_sgpr_idx, vs_num);
496   }
497}
498
499/* Returns whether the stage is a stage that can be directly before the GS */
500static bool
501is_pre_gs_stage(gl_shader_stage stage)
502{
503   return stage == MESA_SHADER_VERTEX || stage == MESA_SHADER_TESS_EVAL;
504}
505
506void
507radv_declare_shader_args(struct radv_shader_args *args, gl_shader_stage stage,
508                         bool has_previous_stage, gl_shader_stage previous_stage)
509{
510   struct user_sgpr_info user_sgpr_info;
511   bool needs_view_index = needs_view_index_sgpr(args, stage);
512   bool has_api_gs = stage == MESA_SHADER_GEOMETRY;
513
514   if (args->options->chip_class >= GFX10) {
515      if (is_pre_gs_stage(stage) && args->shader_info->is_ngg) {
516         /* On GFX10, VS is merged into GS for NGG. */
517         previous_stage = stage;
518         stage = MESA_SHADER_GEOMETRY;
519         has_previous_stage = true;
520      }
521   }
522
523   for (int i = 0; i < MAX_SETS; i++)
524      args->shader_info->user_sgprs_locs.descriptor_sets[i].sgpr_idx = -1;
525   for (int i = 0; i < AC_UD_MAX_UD; i++)
526      args->shader_info->user_sgprs_locs.shader_data[i].sgpr_idx = -1;
527
528   allocate_user_sgprs(args, stage, has_previous_stage, previous_stage, needs_view_index,
529                       has_api_gs, &user_sgpr_info);
530
531   if (args->options->explicit_scratch_args) {
532      ac_add_arg(&args->ac, AC_ARG_SGPR, 2, AC_ARG_CONST_DESC_PTR, &args->ring_offsets);
533   }
534
535   /* To ensure prologs match the main VS, VS specific input SGPRs have to be placed before other
536    * sgprs.
537    */
538
539   switch (stage) {
540   case MESA_SHADER_COMPUTE:
541      declare_global_input_sgprs(args, &user_sgpr_info);
542
543      if (args->shader_info->cs.uses_sbt) {
544         ac_add_arg(&args->ac, AC_ARG_SGPR, 1, AC_ARG_CONST_DESC_PTR, &args->ac.sbt_descriptors);
545      }
546
547      if (args->shader_info->cs.uses_grid_size) {
548         ac_add_arg(&args->ac, AC_ARG_SGPR, 3, AC_ARG_INT, &args->ac.num_work_groups);
549      }
550
551      if (args->shader_info->cs.uses_ray_launch_size) {
552         ac_add_arg(&args->ac, AC_ARG_SGPR, 3, AC_ARG_INT, &args->ac.ray_launch_size);
553      }
554
555      for (int i = 0; i < 3; i++) {
556         if (args->shader_info->cs.uses_block_id[i]) {
557            ac_add_arg(&args->ac, AC_ARG_SGPR, 1, AC_ARG_INT, &args->ac.workgroup_ids[i]);
558         }
559      }
560
561      if (args->shader_info->cs.uses_local_invocation_idx) {
562         ac_add_arg(&args->ac, AC_ARG_SGPR, 1, AC_ARG_INT, &args->ac.tg_size);
563      }
564
565      if (args->options->explicit_scratch_args) {
566         ac_add_arg(&args->ac, AC_ARG_SGPR, 1, AC_ARG_INT, &args->ac.scratch_offset);
567      }
568
569      ac_add_arg(&args->ac, AC_ARG_VGPR, 3, AC_ARG_INT, &args->ac.local_invocation_ids);
570      break;
571   case MESA_SHADER_VERTEX:
572      /* NGG is handled by the GS case */
573      assert(!args->shader_info->is_ngg);
574
575      declare_vs_specific_input_sgprs(args, stage, has_previous_stage, previous_stage);
576
577      declare_global_input_sgprs(args, &user_sgpr_info);
578
579      if (needs_view_index) {
580         ac_add_arg(&args->ac, AC_ARG_SGPR, 1, AC_ARG_INT, &args->ac.view_index);
581      }
582
583      if (args->shader_info->vs.as_es) {
584         ac_add_arg(&args->ac, AC_ARG_SGPR, 1, AC_ARG_INT, &args->ac.es2gs_offset);
585      } else if (args->shader_info->vs.as_ls) {
586         /* no extra parameters */
587      } else {
588         declare_streamout_sgprs(args, stage);
589      }
590
591      if (args->options->explicit_scratch_args) {
592         ac_add_arg(&args->ac, AC_ARG_SGPR, 1, AC_ARG_INT, &args->ac.scratch_offset);
593      }
594
595      declare_vs_input_vgprs(args);
596      break;
597   case MESA_SHADER_TESS_CTRL:
598      if (has_previous_stage) {
599         // First 6 system regs
600         ac_add_arg(&args->ac, AC_ARG_SGPR, 1, AC_ARG_INT, &args->ac.tess_offchip_offset);
601         ac_add_arg(&args->ac, AC_ARG_SGPR, 1, AC_ARG_INT, &args->ac.merged_wave_info);
602         ac_add_arg(&args->ac, AC_ARG_SGPR, 1, AC_ARG_INT, &args->ac.tcs_factor_offset);
603
604         ac_add_arg(&args->ac, AC_ARG_SGPR, 1, AC_ARG_INT, &args->ac.scratch_offset);
605         ac_add_arg(&args->ac, AC_ARG_SGPR, 1, AC_ARG_INT, NULL); // unknown
606         ac_add_arg(&args->ac, AC_ARG_SGPR, 1, AC_ARG_INT, NULL); // unknown
607
608         declare_vs_specific_input_sgprs(args, stage, has_previous_stage, previous_stage);
609
610         declare_global_input_sgprs(args, &user_sgpr_info);
611
612         if (needs_view_index) {
613            ac_add_arg(&args->ac, AC_ARG_SGPR, 1, AC_ARG_INT, &args->ac.view_index);
614         }
615
616         ac_add_arg(&args->ac, AC_ARG_VGPR, 1, AC_ARG_INT, &args->ac.tcs_patch_id);
617         ac_add_arg(&args->ac, AC_ARG_VGPR, 1, AC_ARG_INT, &args->ac.tcs_rel_ids);
618
619         declare_vs_input_vgprs(args);
620      } else {
621         declare_global_input_sgprs(args, &user_sgpr_info);
622
623         if (needs_view_index) {
624            ac_add_arg(&args->ac, AC_ARG_SGPR, 1, AC_ARG_INT, &args->ac.view_index);
625         }
626
627         ac_add_arg(&args->ac, AC_ARG_SGPR, 1, AC_ARG_INT, &args->ac.tess_offchip_offset);
628         ac_add_arg(&args->ac, AC_ARG_SGPR, 1, AC_ARG_INT, &args->ac.tcs_factor_offset);
629         if (args->options->explicit_scratch_args) {
630            ac_add_arg(&args->ac, AC_ARG_SGPR, 1, AC_ARG_INT, &args->ac.scratch_offset);
631         }
632         ac_add_arg(&args->ac, AC_ARG_VGPR, 1, AC_ARG_INT, &args->ac.tcs_patch_id);
633         ac_add_arg(&args->ac, AC_ARG_VGPR, 1, AC_ARG_INT, &args->ac.tcs_rel_ids);
634      }
635      break;
636   case MESA_SHADER_TESS_EVAL:
637      /* NGG is handled by the GS case */
638      assert(!args->shader_info->is_ngg);
639
640      declare_global_input_sgprs(args, &user_sgpr_info);
641
642      if (needs_view_index)
643         ac_add_arg(&args->ac, AC_ARG_SGPR, 1, AC_ARG_INT, &args->ac.view_index);
644
645      if (args->shader_info->tes.as_es) {
646         ac_add_arg(&args->ac, AC_ARG_SGPR, 1, AC_ARG_INT, &args->ac.tess_offchip_offset);
647         ac_add_arg(&args->ac, AC_ARG_SGPR, 1, AC_ARG_INT, NULL);
648         ac_add_arg(&args->ac, AC_ARG_SGPR, 1, AC_ARG_INT, &args->ac.es2gs_offset);
649      } else {
650         declare_streamout_sgprs(args, stage);
651         ac_add_arg(&args->ac, AC_ARG_SGPR, 1, AC_ARG_INT, &args->ac.tess_offchip_offset);
652      }
653      if (args->options->explicit_scratch_args) {
654         ac_add_arg(&args->ac, AC_ARG_SGPR, 1, AC_ARG_INT, &args->ac.scratch_offset);
655      }
656      declare_tes_input_vgprs(args);
657      break;
658   case MESA_SHADER_GEOMETRY:
659      if (has_previous_stage) {
660         // First 6 system regs
661         if (args->shader_info->is_ngg) {
662            ac_add_arg(&args->ac, AC_ARG_SGPR, 1, AC_ARG_INT, &args->ac.gs_tg_info);
663         } else {
664            ac_add_arg(&args->ac, AC_ARG_SGPR, 1, AC_ARG_INT, &args->ac.gs2vs_offset);
665         }
666
667         ac_add_arg(&args->ac, AC_ARG_SGPR, 1, AC_ARG_INT, &args->ac.merged_wave_info);
668         ac_add_arg(&args->ac, AC_ARG_SGPR, 1, AC_ARG_INT, &args->ac.tess_offchip_offset);
669
670         ac_add_arg(&args->ac, AC_ARG_SGPR, 1, AC_ARG_INT, &args->ac.scratch_offset);
671         ac_add_arg(&args->ac, AC_ARG_SGPR, 1, AC_ARG_INT, NULL); // unknown
672         ac_add_arg(&args->ac, AC_ARG_SGPR, 1, AC_ARG_INT, NULL); // unknown
673
674         if (previous_stage != MESA_SHADER_TESS_EVAL) {
675            declare_vs_specific_input_sgprs(args, stage, has_previous_stage, previous_stage);
676         }
677
678         declare_global_input_sgprs(args, &user_sgpr_info);
679
680         if (needs_view_index) {
681            ac_add_arg(&args->ac, AC_ARG_SGPR, 1, AC_ARG_INT, &args->ac.view_index);
682         }
683
684         if (args->shader_info->is_ngg) {
685            declare_ngg_sgprs(args, has_api_gs);
686         }
687
688         ac_add_arg(&args->ac, AC_ARG_VGPR, 1, AC_ARG_INT, &args->ac.gs_vtx_offset[0]);
689         ac_add_arg(&args->ac, AC_ARG_VGPR, 1, AC_ARG_INT, &args->ac.gs_vtx_offset[1]);
690         ac_add_arg(&args->ac, AC_ARG_VGPR, 1, AC_ARG_INT, &args->ac.gs_prim_id);
691         ac_add_arg(&args->ac, AC_ARG_VGPR, 1, AC_ARG_INT, &args->ac.gs_invocation_id);
692         ac_add_arg(&args->ac, AC_ARG_VGPR, 1, AC_ARG_INT, &args->ac.gs_vtx_offset[2]);
693
694         if (previous_stage == MESA_SHADER_VERTEX) {
695            declare_vs_input_vgprs(args);
696         } else {
697            declare_tes_input_vgprs(args);
698         }
699      } else {
700         declare_global_input_sgprs(args, &user_sgpr_info);
701
702         if (needs_view_index) {
703            ac_add_arg(&args->ac, AC_ARG_SGPR, 1, AC_ARG_INT, &args->ac.view_index);
704         }
705
706         ac_add_arg(&args->ac, AC_ARG_SGPR, 1, AC_ARG_INT, &args->ac.gs2vs_offset);
707         ac_add_arg(&args->ac, AC_ARG_SGPR, 1, AC_ARG_INT, &args->ac.gs_wave_id);
708         if (args->options->explicit_scratch_args) {
709            ac_add_arg(&args->ac, AC_ARG_SGPR, 1, AC_ARG_INT, &args->ac.scratch_offset);
710         }
711         ac_add_arg(&args->ac, AC_ARG_VGPR, 1, AC_ARG_INT, &args->ac.gs_vtx_offset[0]);
712         ac_add_arg(&args->ac, AC_ARG_VGPR, 1, AC_ARG_INT, &args->ac.gs_vtx_offset[1]);
713         ac_add_arg(&args->ac, AC_ARG_VGPR, 1, AC_ARG_INT, &args->ac.gs_prim_id);
714         ac_add_arg(&args->ac, AC_ARG_VGPR, 1, AC_ARG_INT, &args->ac.gs_vtx_offset[2]);
715         ac_add_arg(&args->ac, AC_ARG_VGPR, 1, AC_ARG_INT, &args->ac.gs_vtx_offset[3]);
716         ac_add_arg(&args->ac, AC_ARG_VGPR, 1, AC_ARG_INT, &args->ac.gs_vtx_offset[4]);
717         ac_add_arg(&args->ac, AC_ARG_VGPR, 1, AC_ARG_INT, &args->ac.gs_vtx_offset[5]);
718         ac_add_arg(&args->ac, AC_ARG_VGPR, 1, AC_ARG_INT, &args->ac.gs_invocation_id);
719      }
720      break;
721   case MESA_SHADER_FRAGMENT:
722      declare_global_input_sgprs(args, &user_sgpr_info);
723
724      ac_add_arg(&args->ac, AC_ARG_SGPR, 1, AC_ARG_INT, &args->ac.prim_mask);
725      if (args->options->explicit_scratch_args) {
726         ac_add_arg(&args->ac, AC_ARG_SGPR, 1, AC_ARG_INT, &args->ac.scratch_offset);
727      }
728
729      declare_ps_input_vgprs(args);
730      break;
731   default:
732      unreachable("Shader stage not implemented");
733   }
734
735   args->shader_info->num_input_vgprs = 0;
736   args->shader_info->num_input_sgprs = 2;
737   args->shader_info->num_input_sgprs += args->ac.num_sgprs_used;
738   args->shader_info->num_input_vgprs = args->ac.num_vgprs_used;
739
740   uint8_t user_sgpr_idx = 0;
741
742   set_loc_shader_ptr(args, AC_UD_SCRATCH_RING_OFFSETS, &user_sgpr_idx);
743
744   /* For merged shaders the user SGPRs start at 8, with 8 system SGPRs in front (including
745    * the rw_buffers at s0/s1. With user SGPR0 = s8, lets restart the count from 0 */
746   if (has_previous_stage)
747      user_sgpr_idx = 0;
748
749   if (stage == MESA_SHADER_VERTEX || (has_previous_stage && previous_stage == MESA_SHADER_VERTEX))
750      set_vs_specific_input_locs(args, stage, has_previous_stage, previous_stage, &user_sgpr_idx);
751
752   set_global_input_locs(args, &user_sgpr_info, &user_sgpr_idx);
753
754   switch (stage) {
755   case MESA_SHADER_COMPUTE:
756      if (args->ac.sbt_descriptors.used) {
757         set_loc_shader_ptr(args, AC_UD_CS_SBT_DESCRIPTORS, &user_sgpr_idx);
758      }
759      if (args->ac.num_work_groups.used) {
760         set_loc_shader(args, AC_UD_CS_GRID_SIZE, &user_sgpr_idx, 3);
761      }
762      if (args->ac.ray_launch_size.used) {
763         set_loc_shader(args, AC_UD_CS_RAY_LAUNCH_SIZE, &user_sgpr_idx, 3);
764      }
765      break;
766   case MESA_SHADER_VERTEX:
767      if (args->ac.view_index.used)
768         set_loc_shader(args, AC_UD_VIEW_INDEX, &user_sgpr_idx, 1);
769      break;
770   case MESA_SHADER_TESS_CTRL:
771      if (args->ac.view_index.used)
772         set_loc_shader(args, AC_UD_VIEW_INDEX, &user_sgpr_idx, 1);
773      break;
774   case MESA_SHADER_TESS_EVAL:
775      if (args->ac.view_index.used)
776         set_loc_shader(args, AC_UD_VIEW_INDEX, &user_sgpr_idx, 1);
777      break;
778   case MESA_SHADER_GEOMETRY:
779      if (args->ac.view_index.used)
780         set_loc_shader(args, AC_UD_VIEW_INDEX, &user_sgpr_idx, 1);
781
782      if (args->ngg_gs_state.used) {
783         set_loc_shader(args, AC_UD_NGG_GS_STATE, &user_sgpr_idx, 1);
784      }
785
786      if (args->ngg_culling_settings.used) {
787         set_loc_shader(args, AC_UD_NGG_CULLING_SETTINGS, &user_sgpr_idx, 1);
788      }
789
790      if (args->ngg_viewport_scale[0].used) {
791         assert(args->ngg_viewport_scale[1].used &&
792                args->ngg_viewport_translate[0].used &&
793                args->ngg_viewport_translate[1].used);
794         set_loc_shader(args, AC_UD_NGG_VIEWPORT, &user_sgpr_idx, 4);
795      }
796      break;
797   case MESA_SHADER_FRAGMENT:
798      break;
799   default:
800      unreachable("Shader stage not implemented");
801   }
802
803   args->shader_info->num_user_sgprs = user_sgpr_idx;
804}
805