1/* 2 * Copyright © 2019 Valve Corporation. 3 * Copyright © 2016 Red Hat. 4 * Copyright © 2016 Bas Nieuwenhuizen 5 * 6 * based in part on anv driver which is: 7 * Copyright © 2015 Intel Corporation 8 * 9 * Permission is hereby granted, free of charge, to any person obtaining a 10 * copy of this software and associated documentation files (the "Software"), 11 * to deal in the Software without restriction, including without limitation 12 * the rights to use, copy, modify, merge, publish, distribute, sublicense, 13 * and/or sell copies of the Software, and to permit persons to whom the 14 * Software is furnished to do so, subject to the following conditions: 15 * 16 * The above copyright notice and this permission notice (including the next 17 * paragraph) shall be included in all copies or substantial portions of the 18 * Software. 19 * 20 * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR 21 * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, 22 * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL 23 * THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER 24 * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING 25 * FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS 26 * IN THE SOFTWARE. 27 */ 28 29#include "radv_shader_args.h" 30#include "radv_private.h" 31#include "radv_shader.h" 32 33static void 34set_loc(struct radv_userdata_info *ud_info, uint8_t *sgpr_idx, uint8_t num_sgprs) 35{ 36 ud_info->sgpr_idx = *sgpr_idx; 37 ud_info->num_sgprs = num_sgprs; 38 *sgpr_idx += num_sgprs; 39} 40 41static void 42set_loc_shader(struct radv_shader_args *args, int idx, uint8_t *sgpr_idx, uint8_t num_sgprs) 43{ 44 struct radv_userdata_info *ud_info = &args->shader_info->user_sgprs_locs.shader_data[idx]; 45 assert(ud_info); 46 47 set_loc(ud_info, sgpr_idx, num_sgprs); 48} 49 50static void 51set_loc_shader_ptr(struct radv_shader_args *args, int idx, uint8_t *sgpr_idx) 52{ 53 bool use_32bit_pointers = idx != AC_UD_SCRATCH_RING_OFFSETS; 54 55 set_loc_shader(args, idx, sgpr_idx, use_32bit_pointers ? 1 : 2); 56} 57 58static void 59set_loc_desc(struct radv_shader_args *args, int idx, uint8_t *sgpr_idx) 60{ 61 struct radv_userdata_locations *locs = &args->shader_info->user_sgprs_locs; 62 struct radv_userdata_info *ud_info = &locs->descriptor_sets[idx]; 63 assert(ud_info); 64 65 set_loc(ud_info, sgpr_idx, 1); 66 67 locs->descriptor_sets_enabled |= 1u << idx; 68} 69 70struct user_sgpr_info { 71 bool indirect_all_descriptor_sets; 72 uint8_t remaining_sgprs; 73 unsigned num_inline_push_consts; 74 bool inlined_all_push_consts; 75}; 76 77static bool 78needs_view_index_sgpr(struct radv_shader_args *args, gl_shader_stage stage) 79{ 80 switch (stage) { 81 case MESA_SHADER_VERTEX: 82 if (args->shader_info->uses_view_index || 83 (!args->shader_info->vs.as_es && !args->shader_info->vs.as_ls && 84 args->options->key.has_multiview_view_index)) 85 return true; 86 break; 87 case MESA_SHADER_TESS_EVAL: 88 if (args->shader_info->uses_view_index || 89 (!args->shader_info->tes.as_es && args->options->key.has_multiview_view_index)) 90 return true; 91 break; 92 case MESA_SHADER_TESS_CTRL: 93 if (args->shader_info->uses_view_index) 94 return true; 95 break; 96 case MESA_SHADER_GEOMETRY: 97 if (args->shader_info->uses_view_index || 98 (args->shader_info->is_ngg && args->options->key.has_multiview_view_index)) 99 return true; 100 break; 101 default: 102 break; 103 } 104 return false; 105} 106 107static uint8_t 108count_vs_user_sgprs(struct radv_shader_args *args) 109{ 110 uint8_t count = 1; /* vertex offset */ 111 112 if (args->shader_info->vs.vb_desc_usage_mask) 113 count++; 114 if (args->shader_info->vs.needs_draw_id) 115 count++; 116 if (args->shader_info->vs.needs_base_instance) 117 count++; 118 119 return count; 120} 121 122static unsigned 123count_ngg_sgprs(struct radv_shader_args *args, bool has_api_gs) 124{ 125 unsigned count = 0; 126 127 if (has_api_gs) 128 count += 1; /* ngg_gs_state */ 129 if (args->shader_info->has_ngg_culling) 130 count += 5; /* ngg_culling_settings + 4x ngg_viewport_* */ 131 132 return count; 133} 134 135static void 136allocate_inline_push_consts(struct radv_shader_args *args, struct user_sgpr_info *user_sgpr_info) 137{ 138 uint8_t remaining_sgprs = user_sgpr_info->remaining_sgprs; 139 140 /* Only supported if shaders use push constants. */ 141 if (args->shader_info->min_push_constant_used == UINT8_MAX) 142 return; 143 144 /* Only supported if shaders don't have indirect push constants. */ 145 if (args->shader_info->has_indirect_push_constants) 146 return; 147 148 /* Only supported for 32-bit push constants. */ 149 if (!args->shader_info->has_only_32bit_push_constants) 150 return; 151 152 uint8_t num_push_consts = 153 (args->shader_info->max_push_constant_used - args->shader_info->min_push_constant_used) / 4; 154 155 /* Check if the number of user SGPRs is large enough. */ 156 if (num_push_consts < remaining_sgprs) { 157 user_sgpr_info->num_inline_push_consts = num_push_consts; 158 } else { 159 user_sgpr_info->num_inline_push_consts = remaining_sgprs; 160 } 161 162 /* Clamp to the maximum number of allowed inlined push constants. */ 163 if (user_sgpr_info->num_inline_push_consts > AC_MAX_INLINE_PUSH_CONSTS) 164 user_sgpr_info->num_inline_push_consts = AC_MAX_INLINE_PUSH_CONSTS; 165 166 if (user_sgpr_info->num_inline_push_consts == num_push_consts && 167 !args->shader_info->loads_dynamic_offsets) { 168 /* Disable the default push constants path if all constants are 169 * inlined and if shaders don't use dynamic descriptors. 170 */ 171 user_sgpr_info->inlined_all_push_consts = true; 172 } 173} 174 175static void 176allocate_user_sgprs(struct radv_shader_args *args, gl_shader_stage stage, bool has_previous_stage, 177 gl_shader_stage previous_stage, bool needs_view_index, bool has_api_gs, 178 struct user_sgpr_info *user_sgpr_info) 179{ 180 uint8_t user_sgpr_count = 0; 181 182 memset(user_sgpr_info, 0, sizeof(struct user_sgpr_info)); 183 184 /* 2 user sgprs will always be allocated for scratch/rings */ 185 user_sgpr_count += 2; 186 187 /* prolog inputs */ 188 if (args->shader_info->vs.has_prolog) 189 user_sgpr_count += 2; 190 191 switch (stage) { 192 case MESA_SHADER_COMPUTE: 193 if (args->shader_info->cs.uses_sbt) 194 user_sgpr_count += 1; 195 if (args->shader_info->cs.uses_grid_size) 196 user_sgpr_count += 3; 197 if (args->shader_info->cs.uses_ray_launch_size) 198 user_sgpr_count += 3; 199 break; 200 case MESA_SHADER_FRAGMENT: 201 break; 202 case MESA_SHADER_VERTEX: 203 if (!args->is_gs_copy_shader) 204 user_sgpr_count += count_vs_user_sgprs(args); 205 break; 206 case MESA_SHADER_TESS_CTRL: 207 if (has_previous_stage) { 208 if (previous_stage == MESA_SHADER_VERTEX) 209 user_sgpr_count += count_vs_user_sgprs(args); 210 } 211 break; 212 case MESA_SHADER_TESS_EVAL: 213 break; 214 case MESA_SHADER_GEOMETRY: 215 if (has_previous_stage) { 216 if (args->shader_info->is_ngg) 217 user_sgpr_count += count_ngg_sgprs(args, has_api_gs); 218 219 if (previous_stage == MESA_SHADER_VERTEX) { 220 user_sgpr_count += count_vs_user_sgprs(args); 221 } 222 } 223 break; 224 default: 225 break; 226 } 227 228 if (needs_view_index) 229 user_sgpr_count++; 230 231 if (args->shader_info->loads_push_constants) 232 user_sgpr_count++; 233 234 if (args->shader_info->so.num_outputs) 235 user_sgpr_count++; 236 237 uint32_t available_sgprs = 238 args->options->chip_class >= GFX9 && stage != MESA_SHADER_COMPUTE ? 32 : 16; 239 uint32_t remaining_sgprs = available_sgprs - user_sgpr_count; 240 uint32_t num_desc_set = util_bitcount(args->shader_info->desc_set_used_mask); 241 242 if (remaining_sgprs < num_desc_set) { 243 user_sgpr_info->indirect_all_descriptor_sets = true; 244 user_sgpr_info->remaining_sgprs = remaining_sgprs - 1; 245 } else { 246 user_sgpr_info->remaining_sgprs = remaining_sgprs - num_desc_set; 247 } 248 249 allocate_inline_push_consts(args, user_sgpr_info); 250} 251 252static void 253declare_global_input_sgprs(struct radv_shader_args *args, 254 const struct user_sgpr_info *user_sgpr_info) 255{ 256 /* 1 for each descriptor set */ 257 if (!user_sgpr_info->indirect_all_descriptor_sets) { 258 uint32_t mask = args->shader_info->desc_set_used_mask; 259 260 while (mask) { 261 int i = u_bit_scan(&mask); 262 263 ac_add_arg(&args->ac, AC_ARG_SGPR, 1, AC_ARG_CONST_PTR, &args->descriptor_sets[i]); 264 } 265 } else { 266 ac_add_arg(&args->ac, AC_ARG_SGPR, 1, AC_ARG_CONST_PTR_PTR, &args->descriptor_sets[0]); 267 } 268 269 if (args->shader_info->loads_push_constants && !user_sgpr_info->inlined_all_push_consts) { 270 /* 1 for push constants and dynamic descriptors */ 271 ac_add_arg(&args->ac, AC_ARG_SGPR, 1, AC_ARG_CONST_PTR, &args->ac.push_constants); 272 } 273 274 for (unsigned i = 0; i < user_sgpr_info->num_inline_push_consts; i++) { 275 ac_add_arg(&args->ac, AC_ARG_SGPR, 1, AC_ARG_INT, &args->ac.inline_push_consts[i]); 276 } 277 args->ac.base_inline_push_consts = args->shader_info->min_push_constant_used / 4; 278 279 if (args->shader_info->so.num_outputs) { 280 ac_add_arg(&args->ac, AC_ARG_SGPR, 1, AC_ARG_CONST_DESC_PTR, &args->streamout_buffers); 281 } 282} 283 284static void 285declare_vs_specific_input_sgprs(struct radv_shader_args *args, gl_shader_stage stage, 286 bool has_previous_stage, gl_shader_stage previous_stage) 287{ 288 if (args->shader_info->vs.has_prolog) 289 ac_add_arg(&args->ac, AC_ARG_SGPR, 2, AC_ARG_INT, &args->prolog_inputs); 290 291 if (!args->is_gs_copy_shader && (stage == MESA_SHADER_VERTEX || 292 (has_previous_stage && previous_stage == MESA_SHADER_VERTEX))) { 293 if (args->shader_info->vs.vb_desc_usage_mask) { 294 ac_add_arg(&args->ac, AC_ARG_SGPR, 1, AC_ARG_CONST_DESC_PTR, &args->ac.vertex_buffers); 295 } 296 ac_add_arg(&args->ac, AC_ARG_SGPR, 1, AC_ARG_INT, &args->ac.base_vertex); 297 if (args->shader_info->vs.needs_draw_id) { 298 ac_add_arg(&args->ac, AC_ARG_SGPR, 1, AC_ARG_INT, &args->ac.draw_id); 299 } 300 if (args->shader_info->vs.needs_base_instance) { 301 ac_add_arg(&args->ac, AC_ARG_SGPR, 1, AC_ARG_INT, &args->ac.start_instance); 302 } 303 } 304} 305 306static void 307declare_vs_input_vgprs(struct radv_shader_args *args) 308{ 309 ac_add_arg(&args->ac, AC_ARG_VGPR, 1, AC_ARG_INT, &args->ac.vertex_id); 310 if (!args->is_gs_copy_shader) { 311 if (args->shader_info->vs.as_ls) { 312 ac_add_arg(&args->ac, AC_ARG_VGPR, 1, AC_ARG_INT, &args->ac.vs_rel_patch_id); 313 if (args->options->chip_class >= GFX10) { 314 ac_add_arg(&args->ac, AC_ARG_VGPR, 1, AC_ARG_INT, NULL); /* user vgpr */ 315 ac_add_arg(&args->ac, AC_ARG_VGPR, 1, AC_ARG_INT, &args->ac.instance_id); 316 } else { 317 ac_add_arg(&args->ac, AC_ARG_VGPR, 1, AC_ARG_INT, &args->ac.instance_id); 318 ac_add_arg(&args->ac, AC_ARG_VGPR, 1, AC_ARG_INT, NULL); /* unused */ 319 } 320 } else { 321 if (args->options->chip_class >= GFX10) { 322 if (args->shader_info->is_ngg) { 323 ac_add_arg(&args->ac, AC_ARG_VGPR, 1, AC_ARG_INT, NULL); /* user vgpr */ 324 ac_add_arg(&args->ac, AC_ARG_VGPR, 1, AC_ARG_INT, NULL); /* user vgpr */ 325 ac_add_arg(&args->ac, AC_ARG_VGPR, 1, AC_ARG_INT, &args->ac.instance_id); 326 } else { 327 ac_add_arg(&args->ac, AC_ARG_VGPR, 1, AC_ARG_INT, NULL); /* unused */ 328 ac_add_arg(&args->ac, AC_ARG_VGPR, 1, AC_ARG_INT, &args->ac.vs_prim_id); 329 ac_add_arg(&args->ac, AC_ARG_VGPR, 1, AC_ARG_INT, &args->ac.instance_id); 330 } 331 } else { 332 ac_add_arg(&args->ac, AC_ARG_VGPR, 1, AC_ARG_INT, &args->ac.instance_id); 333 ac_add_arg(&args->ac, AC_ARG_VGPR, 1, AC_ARG_INT, &args->ac.vs_prim_id); 334 ac_add_arg(&args->ac, AC_ARG_VGPR, 1, AC_ARG_INT, NULL); /* unused */ 335 } 336 } 337 } 338 339 if (args->shader_info->vs.dynamic_inputs) { 340 assert(args->shader_info->vs.use_per_attribute_vb_descs); 341 unsigned num_attributes = util_last_bit(args->shader_info->vs.vb_desc_usage_mask); 342 for (unsigned i = 0; i < num_attributes; i++) 343 ac_add_arg(&args->ac, AC_ARG_VGPR, 4, AC_ARG_INT, &args->vs_inputs[i]); 344 /* Ensure the main shader doesn't use less vgprs than the prolog. The prolog requires one 345 * VGPR more than the number of shader arguments in the case of non-trivial divisors on GFX8. 346 */ 347 ac_add_arg(&args->ac, AC_ARG_VGPR, 1, AC_ARG_INT, NULL); 348 } 349} 350 351static void 352declare_streamout_sgprs(struct radv_shader_args *args, gl_shader_stage stage) 353{ 354 int i; 355 356 /* Streamout SGPRs. */ 357 if (args->shader_info->so.num_outputs) { 358 assert(stage == MESA_SHADER_VERTEX || stage == MESA_SHADER_TESS_EVAL); 359 360 ac_add_arg(&args->ac, AC_ARG_SGPR, 1, AC_ARG_INT, &args->ac.streamout_config); 361 ac_add_arg(&args->ac, AC_ARG_SGPR, 1, AC_ARG_INT, &args->ac.streamout_write_index); 362 } else if (stage == MESA_SHADER_TESS_EVAL) { 363 ac_add_arg(&args->ac, AC_ARG_SGPR, 1, AC_ARG_INT, NULL); 364 } 365 366 /* A streamout buffer offset is loaded if the stride is non-zero. */ 367 for (i = 0; i < 4; i++) { 368 if (!args->shader_info->so.strides[i]) 369 continue; 370 371 ac_add_arg(&args->ac, AC_ARG_SGPR, 1, AC_ARG_INT, &args->ac.streamout_offset[i]); 372 } 373} 374 375static void 376declare_tes_input_vgprs(struct radv_shader_args *args) 377{ 378 ac_add_arg(&args->ac, AC_ARG_VGPR, 1, AC_ARG_FLOAT, &args->ac.tes_u); 379 ac_add_arg(&args->ac, AC_ARG_VGPR, 1, AC_ARG_FLOAT, &args->ac.tes_v); 380 ac_add_arg(&args->ac, AC_ARG_VGPR, 1, AC_ARG_INT, &args->ac.tes_rel_patch_id); 381 ac_add_arg(&args->ac, AC_ARG_VGPR, 1, AC_ARG_INT, &args->ac.tes_patch_id); 382} 383 384static void 385declare_ps_input_vgprs(struct radv_shader_args *args) 386{ 387 unsigned spi_ps_input = args->shader_info->ps.spi_ps_input; 388 389 ac_add_arg(&args->ac, AC_ARG_VGPR, 2, AC_ARG_INT, &args->ac.persp_sample); 390 ac_add_arg(&args->ac, AC_ARG_VGPR, 2, AC_ARG_INT, &args->ac.persp_center); 391 ac_add_arg(&args->ac, AC_ARG_VGPR, 2, AC_ARG_INT, &args->ac.persp_centroid); 392 ac_add_arg(&args->ac, AC_ARG_VGPR, 3, AC_ARG_INT, &args->ac.pull_model); 393 ac_add_arg(&args->ac, AC_ARG_VGPR, 2, AC_ARG_INT, &args->ac.linear_sample); 394 ac_add_arg(&args->ac, AC_ARG_VGPR, 2, AC_ARG_INT, &args->ac.linear_center); 395 ac_add_arg(&args->ac, AC_ARG_VGPR, 2, AC_ARG_INT, &args->ac.linear_centroid); 396 ac_add_arg(&args->ac, AC_ARG_VGPR, 1, AC_ARG_FLOAT, NULL); /* line stipple tex */ 397 ac_add_arg(&args->ac, AC_ARG_VGPR, 1, AC_ARG_FLOAT, &args->ac.frag_pos[0]); 398 ac_add_arg(&args->ac, AC_ARG_VGPR, 1, AC_ARG_FLOAT, &args->ac.frag_pos[1]); 399 ac_add_arg(&args->ac, AC_ARG_VGPR, 1, AC_ARG_FLOAT, &args->ac.frag_pos[2]); 400 ac_add_arg(&args->ac, AC_ARG_VGPR, 1, AC_ARG_FLOAT, &args->ac.frag_pos[3]); 401 ac_add_arg(&args->ac, AC_ARG_VGPR, 1, AC_ARG_INT, &args->ac.front_face); 402 ac_add_arg(&args->ac, AC_ARG_VGPR, 1, AC_ARG_INT, &args->ac.ancillary); 403 ac_add_arg(&args->ac, AC_ARG_VGPR, 1, AC_ARG_INT, &args->ac.sample_coverage); 404 ac_add_arg(&args->ac, AC_ARG_VGPR, 1, AC_ARG_INT, NULL); /* fixed pt */ 405 406 if (args->options->remap_spi_ps_input) { 407 /* LLVM optimizes away unused FS inputs and computes spi_ps_input_addr itself and then 408 * communicates the results back via the ELF binary. Mirror what LLVM does by re-mapping the 409 * VGPR arguments here. 410 */ 411 unsigned arg_count = 0; 412 for (unsigned i = 0, vgpr_arg = 0, vgpr_reg = 0; i < args->ac.arg_count; i++) { 413 if (args->ac.args[i].file != AC_ARG_VGPR) { 414 arg_count++; 415 continue; 416 } 417 418 if (!(spi_ps_input & (1 << vgpr_arg))) { 419 args->ac.args[i].skip = true; 420 } else { 421 args->ac.args[i].offset = vgpr_reg; 422 vgpr_reg += args->ac.args[i].size; 423 arg_count++; 424 } 425 vgpr_arg++; 426 } 427 } 428} 429 430static void 431declare_ngg_sgprs(struct radv_shader_args *args, bool has_api_gs) 432{ 433 if (has_api_gs) { 434 ac_add_arg(&args->ac, AC_ARG_SGPR, 1, AC_ARG_INT, &args->ngg_gs_state); 435 } 436 437 if (args->shader_info->has_ngg_culling) { 438 ac_add_arg(&args->ac, AC_ARG_SGPR, 1, AC_ARG_INT, &args->ngg_culling_settings); 439 ac_add_arg(&args->ac, AC_ARG_SGPR, 1, AC_ARG_INT, &args->ngg_viewport_scale[0]); 440 ac_add_arg(&args->ac, AC_ARG_SGPR, 1, AC_ARG_INT, &args->ngg_viewport_scale[1]); 441 ac_add_arg(&args->ac, AC_ARG_SGPR, 1, AC_ARG_INT, &args->ngg_viewport_translate[0]); 442 ac_add_arg(&args->ac, AC_ARG_SGPR, 1, AC_ARG_INT, &args->ngg_viewport_translate[1]); 443 } 444} 445 446static void 447set_global_input_locs(struct radv_shader_args *args, const struct user_sgpr_info *user_sgpr_info, 448 uint8_t *user_sgpr_idx) 449{ 450 unsigned num_inline_push_consts = 0; 451 452 if (!user_sgpr_info->indirect_all_descriptor_sets) { 453 for (unsigned i = 0; i < ARRAY_SIZE(args->descriptor_sets); i++) { 454 if (args->descriptor_sets[i].used) 455 set_loc_desc(args, i, user_sgpr_idx); 456 } 457 } else { 458 set_loc_shader_ptr(args, AC_UD_INDIRECT_DESCRIPTOR_SETS, user_sgpr_idx); 459 } 460 461 if (args->ac.push_constants.used) { 462 set_loc_shader_ptr(args, AC_UD_PUSH_CONSTANTS, user_sgpr_idx); 463 } 464 465 for (unsigned i = 0; i < ARRAY_SIZE(args->ac.inline_push_consts); i++) { 466 if (args->ac.inline_push_consts[i].used) 467 num_inline_push_consts++; 468 } 469 470 if (num_inline_push_consts) { 471 set_loc_shader(args, AC_UD_INLINE_PUSH_CONSTANTS, user_sgpr_idx, num_inline_push_consts); 472 } 473 474 if (args->streamout_buffers.used) { 475 set_loc_shader_ptr(args, AC_UD_STREAMOUT_BUFFERS, user_sgpr_idx); 476 } 477} 478 479static void 480set_vs_specific_input_locs(struct radv_shader_args *args, gl_shader_stage stage, 481 bool has_previous_stage, gl_shader_stage previous_stage, 482 uint8_t *user_sgpr_idx) 483{ 484 if (args->prolog_inputs.used) 485 set_loc_shader(args, AC_UD_VS_PROLOG_INPUTS, user_sgpr_idx, 2); 486 487 if (!args->is_gs_copy_shader && (stage == MESA_SHADER_VERTEX || 488 (has_previous_stage && previous_stage == MESA_SHADER_VERTEX))) { 489 if (args->ac.vertex_buffers.used) { 490 set_loc_shader_ptr(args, AC_UD_VS_VERTEX_BUFFERS, user_sgpr_idx); 491 } 492 493 unsigned vs_num = args->ac.base_vertex.used + args->ac.draw_id.used + 494 args->ac.start_instance.used; 495 set_loc_shader(args, AC_UD_VS_BASE_VERTEX_START_INSTANCE, user_sgpr_idx, vs_num); 496 } 497} 498 499/* Returns whether the stage is a stage that can be directly before the GS */ 500static bool 501is_pre_gs_stage(gl_shader_stage stage) 502{ 503 return stage == MESA_SHADER_VERTEX || stage == MESA_SHADER_TESS_EVAL; 504} 505 506void 507radv_declare_shader_args(struct radv_shader_args *args, gl_shader_stage stage, 508 bool has_previous_stage, gl_shader_stage previous_stage) 509{ 510 struct user_sgpr_info user_sgpr_info; 511 bool needs_view_index = needs_view_index_sgpr(args, stage); 512 bool has_api_gs = stage == MESA_SHADER_GEOMETRY; 513 514 if (args->options->chip_class >= GFX10) { 515 if (is_pre_gs_stage(stage) && args->shader_info->is_ngg) { 516 /* On GFX10, VS is merged into GS for NGG. */ 517 previous_stage = stage; 518 stage = MESA_SHADER_GEOMETRY; 519 has_previous_stage = true; 520 } 521 } 522 523 for (int i = 0; i < MAX_SETS; i++) 524 args->shader_info->user_sgprs_locs.descriptor_sets[i].sgpr_idx = -1; 525 for (int i = 0; i < AC_UD_MAX_UD; i++) 526 args->shader_info->user_sgprs_locs.shader_data[i].sgpr_idx = -1; 527 528 allocate_user_sgprs(args, stage, has_previous_stage, previous_stage, needs_view_index, 529 has_api_gs, &user_sgpr_info); 530 531 if (args->options->explicit_scratch_args) { 532 ac_add_arg(&args->ac, AC_ARG_SGPR, 2, AC_ARG_CONST_DESC_PTR, &args->ring_offsets); 533 } 534 535 /* To ensure prologs match the main VS, VS specific input SGPRs have to be placed before other 536 * sgprs. 537 */ 538 539 switch (stage) { 540 case MESA_SHADER_COMPUTE: 541 declare_global_input_sgprs(args, &user_sgpr_info); 542 543 if (args->shader_info->cs.uses_sbt) { 544 ac_add_arg(&args->ac, AC_ARG_SGPR, 1, AC_ARG_CONST_DESC_PTR, &args->ac.sbt_descriptors); 545 } 546 547 if (args->shader_info->cs.uses_grid_size) { 548 ac_add_arg(&args->ac, AC_ARG_SGPR, 3, AC_ARG_INT, &args->ac.num_work_groups); 549 } 550 551 if (args->shader_info->cs.uses_ray_launch_size) { 552 ac_add_arg(&args->ac, AC_ARG_SGPR, 3, AC_ARG_INT, &args->ac.ray_launch_size); 553 } 554 555 for (int i = 0; i < 3; i++) { 556 if (args->shader_info->cs.uses_block_id[i]) { 557 ac_add_arg(&args->ac, AC_ARG_SGPR, 1, AC_ARG_INT, &args->ac.workgroup_ids[i]); 558 } 559 } 560 561 if (args->shader_info->cs.uses_local_invocation_idx) { 562 ac_add_arg(&args->ac, AC_ARG_SGPR, 1, AC_ARG_INT, &args->ac.tg_size); 563 } 564 565 if (args->options->explicit_scratch_args) { 566 ac_add_arg(&args->ac, AC_ARG_SGPR, 1, AC_ARG_INT, &args->ac.scratch_offset); 567 } 568 569 ac_add_arg(&args->ac, AC_ARG_VGPR, 3, AC_ARG_INT, &args->ac.local_invocation_ids); 570 break; 571 case MESA_SHADER_VERTEX: 572 /* NGG is handled by the GS case */ 573 assert(!args->shader_info->is_ngg); 574 575 declare_vs_specific_input_sgprs(args, stage, has_previous_stage, previous_stage); 576 577 declare_global_input_sgprs(args, &user_sgpr_info); 578 579 if (needs_view_index) { 580 ac_add_arg(&args->ac, AC_ARG_SGPR, 1, AC_ARG_INT, &args->ac.view_index); 581 } 582 583 if (args->shader_info->vs.as_es) { 584 ac_add_arg(&args->ac, AC_ARG_SGPR, 1, AC_ARG_INT, &args->ac.es2gs_offset); 585 } else if (args->shader_info->vs.as_ls) { 586 /* no extra parameters */ 587 } else { 588 declare_streamout_sgprs(args, stage); 589 } 590 591 if (args->options->explicit_scratch_args) { 592 ac_add_arg(&args->ac, AC_ARG_SGPR, 1, AC_ARG_INT, &args->ac.scratch_offset); 593 } 594 595 declare_vs_input_vgprs(args); 596 break; 597 case MESA_SHADER_TESS_CTRL: 598 if (has_previous_stage) { 599 // First 6 system regs 600 ac_add_arg(&args->ac, AC_ARG_SGPR, 1, AC_ARG_INT, &args->ac.tess_offchip_offset); 601 ac_add_arg(&args->ac, AC_ARG_SGPR, 1, AC_ARG_INT, &args->ac.merged_wave_info); 602 ac_add_arg(&args->ac, AC_ARG_SGPR, 1, AC_ARG_INT, &args->ac.tcs_factor_offset); 603 604 ac_add_arg(&args->ac, AC_ARG_SGPR, 1, AC_ARG_INT, &args->ac.scratch_offset); 605 ac_add_arg(&args->ac, AC_ARG_SGPR, 1, AC_ARG_INT, NULL); // unknown 606 ac_add_arg(&args->ac, AC_ARG_SGPR, 1, AC_ARG_INT, NULL); // unknown 607 608 declare_vs_specific_input_sgprs(args, stage, has_previous_stage, previous_stage); 609 610 declare_global_input_sgprs(args, &user_sgpr_info); 611 612 if (needs_view_index) { 613 ac_add_arg(&args->ac, AC_ARG_SGPR, 1, AC_ARG_INT, &args->ac.view_index); 614 } 615 616 ac_add_arg(&args->ac, AC_ARG_VGPR, 1, AC_ARG_INT, &args->ac.tcs_patch_id); 617 ac_add_arg(&args->ac, AC_ARG_VGPR, 1, AC_ARG_INT, &args->ac.tcs_rel_ids); 618 619 declare_vs_input_vgprs(args); 620 } else { 621 declare_global_input_sgprs(args, &user_sgpr_info); 622 623 if (needs_view_index) { 624 ac_add_arg(&args->ac, AC_ARG_SGPR, 1, AC_ARG_INT, &args->ac.view_index); 625 } 626 627 ac_add_arg(&args->ac, AC_ARG_SGPR, 1, AC_ARG_INT, &args->ac.tess_offchip_offset); 628 ac_add_arg(&args->ac, AC_ARG_SGPR, 1, AC_ARG_INT, &args->ac.tcs_factor_offset); 629 if (args->options->explicit_scratch_args) { 630 ac_add_arg(&args->ac, AC_ARG_SGPR, 1, AC_ARG_INT, &args->ac.scratch_offset); 631 } 632 ac_add_arg(&args->ac, AC_ARG_VGPR, 1, AC_ARG_INT, &args->ac.tcs_patch_id); 633 ac_add_arg(&args->ac, AC_ARG_VGPR, 1, AC_ARG_INT, &args->ac.tcs_rel_ids); 634 } 635 break; 636 case MESA_SHADER_TESS_EVAL: 637 /* NGG is handled by the GS case */ 638 assert(!args->shader_info->is_ngg); 639 640 declare_global_input_sgprs(args, &user_sgpr_info); 641 642 if (needs_view_index) 643 ac_add_arg(&args->ac, AC_ARG_SGPR, 1, AC_ARG_INT, &args->ac.view_index); 644 645 if (args->shader_info->tes.as_es) { 646 ac_add_arg(&args->ac, AC_ARG_SGPR, 1, AC_ARG_INT, &args->ac.tess_offchip_offset); 647 ac_add_arg(&args->ac, AC_ARG_SGPR, 1, AC_ARG_INT, NULL); 648 ac_add_arg(&args->ac, AC_ARG_SGPR, 1, AC_ARG_INT, &args->ac.es2gs_offset); 649 } else { 650 declare_streamout_sgprs(args, stage); 651 ac_add_arg(&args->ac, AC_ARG_SGPR, 1, AC_ARG_INT, &args->ac.tess_offchip_offset); 652 } 653 if (args->options->explicit_scratch_args) { 654 ac_add_arg(&args->ac, AC_ARG_SGPR, 1, AC_ARG_INT, &args->ac.scratch_offset); 655 } 656 declare_tes_input_vgprs(args); 657 break; 658 case MESA_SHADER_GEOMETRY: 659 if (has_previous_stage) { 660 // First 6 system regs 661 if (args->shader_info->is_ngg) { 662 ac_add_arg(&args->ac, AC_ARG_SGPR, 1, AC_ARG_INT, &args->ac.gs_tg_info); 663 } else { 664 ac_add_arg(&args->ac, AC_ARG_SGPR, 1, AC_ARG_INT, &args->ac.gs2vs_offset); 665 } 666 667 ac_add_arg(&args->ac, AC_ARG_SGPR, 1, AC_ARG_INT, &args->ac.merged_wave_info); 668 ac_add_arg(&args->ac, AC_ARG_SGPR, 1, AC_ARG_INT, &args->ac.tess_offchip_offset); 669 670 ac_add_arg(&args->ac, AC_ARG_SGPR, 1, AC_ARG_INT, &args->ac.scratch_offset); 671 ac_add_arg(&args->ac, AC_ARG_SGPR, 1, AC_ARG_INT, NULL); // unknown 672 ac_add_arg(&args->ac, AC_ARG_SGPR, 1, AC_ARG_INT, NULL); // unknown 673 674 if (previous_stage != MESA_SHADER_TESS_EVAL) { 675 declare_vs_specific_input_sgprs(args, stage, has_previous_stage, previous_stage); 676 } 677 678 declare_global_input_sgprs(args, &user_sgpr_info); 679 680 if (needs_view_index) { 681 ac_add_arg(&args->ac, AC_ARG_SGPR, 1, AC_ARG_INT, &args->ac.view_index); 682 } 683 684 if (args->shader_info->is_ngg) { 685 declare_ngg_sgprs(args, has_api_gs); 686 } 687 688 ac_add_arg(&args->ac, AC_ARG_VGPR, 1, AC_ARG_INT, &args->ac.gs_vtx_offset[0]); 689 ac_add_arg(&args->ac, AC_ARG_VGPR, 1, AC_ARG_INT, &args->ac.gs_vtx_offset[1]); 690 ac_add_arg(&args->ac, AC_ARG_VGPR, 1, AC_ARG_INT, &args->ac.gs_prim_id); 691 ac_add_arg(&args->ac, AC_ARG_VGPR, 1, AC_ARG_INT, &args->ac.gs_invocation_id); 692 ac_add_arg(&args->ac, AC_ARG_VGPR, 1, AC_ARG_INT, &args->ac.gs_vtx_offset[2]); 693 694 if (previous_stage == MESA_SHADER_VERTEX) { 695 declare_vs_input_vgprs(args); 696 } else { 697 declare_tes_input_vgprs(args); 698 } 699 } else { 700 declare_global_input_sgprs(args, &user_sgpr_info); 701 702 if (needs_view_index) { 703 ac_add_arg(&args->ac, AC_ARG_SGPR, 1, AC_ARG_INT, &args->ac.view_index); 704 } 705 706 ac_add_arg(&args->ac, AC_ARG_SGPR, 1, AC_ARG_INT, &args->ac.gs2vs_offset); 707 ac_add_arg(&args->ac, AC_ARG_SGPR, 1, AC_ARG_INT, &args->ac.gs_wave_id); 708 if (args->options->explicit_scratch_args) { 709 ac_add_arg(&args->ac, AC_ARG_SGPR, 1, AC_ARG_INT, &args->ac.scratch_offset); 710 } 711 ac_add_arg(&args->ac, AC_ARG_VGPR, 1, AC_ARG_INT, &args->ac.gs_vtx_offset[0]); 712 ac_add_arg(&args->ac, AC_ARG_VGPR, 1, AC_ARG_INT, &args->ac.gs_vtx_offset[1]); 713 ac_add_arg(&args->ac, AC_ARG_VGPR, 1, AC_ARG_INT, &args->ac.gs_prim_id); 714 ac_add_arg(&args->ac, AC_ARG_VGPR, 1, AC_ARG_INT, &args->ac.gs_vtx_offset[2]); 715 ac_add_arg(&args->ac, AC_ARG_VGPR, 1, AC_ARG_INT, &args->ac.gs_vtx_offset[3]); 716 ac_add_arg(&args->ac, AC_ARG_VGPR, 1, AC_ARG_INT, &args->ac.gs_vtx_offset[4]); 717 ac_add_arg(&args->ac, AC_ARG_VGPR, 1, AC_ARG_INT, &args->ac.gs_vtx_offset[5]); 718 ac_add_arg(&args->ac, AC_ARG_VGPR, 1, AC_ARG_INT, &args->ac.gs_invocation_id); 719 } 720 break; 721 case MESA_SHADER_FRAGMENT: 722 declare_global_input_sgprs(args, &user_sgpr_info); 723 724 ac_add_arg(&args->ac, AC_ARG_SGPR, 1, AC_ARG_INT, &args->ac.prim_mask); 725 if (args->options->explicit_scratch_args) { 726 ac_add_arg(&args->ac, AC_ARG_SGPR, 1, AC_ARG_INT, &args->ac.scratch_offset); 727 } 728 729 declare_ps_input_vgprs(args); 730 break; 731 default: 732 unreachable("Shader stage not implemented"); 733 } 734 735 args->shader_info->num_input_vgprs = 0; 736 args->shader_info->num_input_sgprs = 2; 737 args->shader_info->num_input_sgprs += args->ac.num_sgprs_used; 738 args->shader_info->num_input_vgprs = args->ac.num_vgprs_used; 739 740 uint8_t user_sgpr_idx = 0; 741 742 set_loc_shader_ptr(args, AC_UD_SCRATCH_RING_OFFSETS, &user_sgpr_idx); 743 744 /* For merged shaders the user SGPRs start at 8, with 8 system SGPRs in front (including 745 * the rw_buffers at s0/s1. With user SGPR0 = s8, lets restart the count from 0 */ 746 if (has_previous_stage) 747 user_sgpr_idx = 0; 748 749 if (stage == MESA_SHADER_VERTEX || (has_previous_stage && previous_stage == MESA_SHADER_VERTEX)) 750 set_vs_specific_input_locs(args, stage, has_previous_stage, previous_stage, &user_sgpr_idx); 751 752 set_global_input_locs(args, &user_sgpr_info, &user_sgpr_idx); 753 754 switch (stage) { 755 case MESA_SHADER_COMPUTE: 756 if (args->ac.sbt_descriptors.used) { 757 set_loc_shader_ptr(args, AC_UD_CS_SBT_DESCRIPTORS, &user_sgpr_idx); 758 } 759 if (args->ac.num_work_groups.used) { 760 set_loc_shader(args, AC_UD_CS_GRID_SIZE, &user_sgpr_idx, 3); 761 } 762 if (args->ac.ray_launch_size.used) { 763 set_loc_shader(args, AC_UD_CS_RAY_LAUNCH_SIZE, &user_sgpr_idx, 3); 764 } 765 break; 766 case MESA_SHADER_VERTEX: 767 if (args->ac.view_index.used) 768 set_loc_shader(args, AC_UD_VIEW_INDEX, &user_sgpr_idx, 1); 769 break; 770 case MESA_SHADER_TESS_CTRL: 771 if (args->ac.view_index.used) 772 set_loc_shader(args, AC_UD_VIEW_INDEX, &user_sgpr_idx, 1); 773 break; 774 case MESA_SHADER_TESS_EVAL: 775 if (args->ac.view_index.used) 776 set_loc_shader(args, AC_UD_VIEW_INDEX, &user_sgpr_idx, 1); 777 break; 778 case MESA_SHADER_GEOMETRY: 779 if (args->ac.view_index.used) 780 set_loc_shader(args, AC_UD_VIEW_INDEX, &user_sgpr_idx, 1); 781 782 if (args->ngg_gs_state.used) { 783 set_loc_shader(args, AC_UD_NGG_GS_STATE, &user_sgpr_idx, 1); 784 } 785 786 if (args->ngg_culling_settings.used) { 787 set_loc_shader(args, AC_UD_NGG_CULLING_SETTINGS, &user_sgpr_idx, 1); 788 } 789 790 if (args->ngg_viewport_scale[0].used) { 791 assert(args->ngg_viewport_scale[1].used && 792 args->ngg_viewport_translate[0].used && 793 args->ngg_viewport_translate[1].used); 794 set_loc_shader(args, AC_UD_NGG_VIEWPORT, &user_sgpr_idx, 4); 795 } 796 break; 797 case MESA_SHADER_FRAGMENT: 798 break; 799 default: 800 unreachable("Shader stage not implemented"); 801 } 802 803 args->shader_info->num_user_sgprs = user_sgpr_idx; 804} 805