1/*
2 * Copyright © 2015 Intel Corporation
3 *
4 * Permission is hereby granted, free of charge, to any person obtaining a
5 * copy of this software and associated documentation files (the "Software"),
6 * to deal in the Software without restriction, including without limitation
7 * the rights to use, copy, modify, merge, publish, distribute, sublicense,
8 * and/or sell copies of the Software, and to permit persons to whom the
9 * Software is furnished to do so, subject to the following conditions:
10 *
11 * The above copyright notice and this permission notice (including the next
12 * paragraph) shall be included in all copies or substantial portions of the
13 * Software.
14 *
15 * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
16 * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
17 * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT.  IN NO EVENT SHALL
18 * THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
19 * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING
20 * FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS
21 * IN THE SOFTWARE.
22 */
23
24#include <assert.h>
25#include <stdbool.h>
26#include <string.h>
27#include <unistd.h>
28#include <fcntl.h>
29
30#include "util/mesa-sha1.h"
31#include "util/os_time.h"
32#include "common/intel_l3_config.h"
33#include "common/intel_disasm.h"
34#include "common/intel_sample_positions.h"
35#include "anv_private.h"
36#include "compiler/brw_nir.h"
37#include "compiler/brw_nir_rt.h"
38#include "anv_nir.h"
39#include "nir/nir_xfb_info.h"
40#include "spirv/nir_spirv.h"
41#include "vk_util.h"
42
43/* Needed for SWIZZLE macros */
44#include "program/prog_instruction.h"
45
46// Shader functions
47#define SPIR_V_MAGIC_NUMBER 0x07230203
48
49struct anv_spirv_debug_data {
50   struct anv_device *device;
51   const struct vk_shader_module *module;
52};
53
54static void anv_spirv_nir_debug(void *private_data,
55                                enum nir_spirv_debug_level level,
56                                size_t spirv_offset,
57                                const char *message)
58{
59   struct anv_spirv_debug_data *debug_data = private_data;
60
61   switch (level) {
62   case NIR_SPIRV_DEBUG_LEVEL_INFO:
63      vk_logi(VK_LOG_OBJS(&debug_data->module->base),
64              "SPIR-V offset %lu: %s",
65              (unsigned long) spirv_offset, message);
66      break;
67   case NIR_SPIRV_DEBUG_LEVEL_WARNING:
68      vk_logw(VK_LOG_OBJS(&debug_data->module->base),
69              "SPIR-V offset %lu: %s",
70              (unsigned long) spirv_offset, message);
71      break;
72   case NIR_SPIRV_DEBUG_LEVEL_ERROR:
73      vk_loge(VK_LOG_OBJS(&debug_data->module->base),
74              "SPIR-V offset %lu: %s",
75              (unsigned long) spirv_offset, message);
76      break;
77   default:
78      break;
79   }
80}
81
82/* Eventually, this will become part of anv_CreateShader.  Unfortunately,
83 * we can't do that yet because we don't have the ability to copy nir.
84 */
85static nir_shader *
86anv_shader_compile_to_nir(struct anv_device *device,
87                          void *mem_ctx,
88                          const struct vk_shader_module *module,
89                          const char *entrypoint_name,
90                          gl_shader_stage stage,
91                          const VkSpecializationInfo *spec_info)
92{
93   const struct anv_physical_device *pdevice = device->physical;
94   const struct brw_compiler *compiler = pdevice->compiler;
95   const nir_shader_compiler_options *nir_options =
96      compiler->glsl_compiler_options[stage].NirOptions;
97
98   uint32_t *spirv = (uint32_t *) module->data;
99   assert(spirv[0] == SPIR_V_MAGIC_NUMBER);
100   assert(module->size % 4 == 0);
101
102   uint32_t num_spec_entries = 0;
103   struct nir_spirv_specialization *spec_entries =
104      vk_spec_info_to_nir_spirv(spec_info, &num_spec_entries);
105
106   struct anv_spirv_debug_data spirv_debug_data = {
107      .device = device,
108      .module = module,
109   };
110   struct spirv_to_nir_options spirv_options = {
111      .caps = {
112         .demote_to_helper_invocation = true,
113         .derivative_group = true,
114         .descriptor_array_dynamic_indexing = true,
115         .descriptor_array_non_uniform_indexing = true,
116         .descriptor_indexing = true,
117         .device_group = true,
118         .draw_parameters = true,
119         .float16 = pdevice->info.ver >= 8,
120         .float32_atomic_add = pdevice->info.has_lsc,
121         .float32_atomic_min_max = pdevice->info.ver >= 9,
122         .float64 = pdevice->info.ver >= 8,
123         .float64_atomic_min_max = pdevice->info.has_lsc,
124         .fragment_shader_sample_interlock = pdevice->info.ver >= 9,
125         .fragment_shader_pixel_interlock = pdevice->info.ver >= 9,
126         .geometry_streams = true,
127         /* When KHR_format_feature_flags2 is enabled, the read/write without
128          * format is per format, so just report true. It's up to the
129          * application to check.
130          */
131         .image_read_without_format = device->vk.enabled_extensions.KHR_format_feature_flags2,
132         .image_write_without_format = true,
133         .int8 = pdevice->info.ver >= 8,
134         .int16 = pdevice->info.ver >= 8,
135         .int64 = pdevice->info.ver >= 8,
136         .int64_atomics = pdevice->info.ver >= 9 && pdevice->use_softpin,
137         .integer_functions2 = pdevice->info.ver >= 8,
138         .min_lod = true,
139         .multiview = true,
140         .physical_storage_buffer_address = pdevice->has_a64_buffer_access,
141         .post_depth_coverage = pdevice->info.ver >= 9,
142         .runtime_descriptor_array = true,
143         .float_controls = pdevice->info.ver >= 8,
144         .ray_tracing = pdevice->info.has_ray_tracing,
145         .shader_clock = true,
146         .shader_viewport_index_layer = true,
147         .stencil_export = pdevice->info.ver >= 9,
148         .storage_8bit = pdevice->info.ver >= 8,
149         .storage_16bit = pdevice->info.ver >= 8,
150         .subgroup_arithmetic = true,
151         .subgroup_basic = true,
152         .subgroup_ballot = true,
153         .subgroup_dispatch = true,
154         .subgroup_quad = true,
155         .subgroup_uniform_control_flow = true,
156         .subgroup_shuffle = true,
157         .subgroup_vote = true,
158         .tessellation = true,
159         .transform_feedback = pdevice->info.ver >= 8,
160         .variable_pointers = true,
161         .vk_memory_model = true,
162         .vk_memory_model_device_scope = true,
163         .workgroup_memory_explicit_layout = true,
164         .fragment_shading_rate = pdevice->info.ver >= 11,
165      },
166      .ubo_addr_format =
167         anv_nir_ubo_addr_format(pdevice, device->robust_buffer_access),
168      .ssbo_addr_format =
169          anv_nir_ssbo_addr_format(pdevice, device->robust_buffer_access),
170      .phys_ssbo_addr_format = nir_address_format_64bit_global,
171      .push_const_addr_format = nir_address_format_logical,
172
173      /* TODO: Consider changing this to an address format that has the NULL
174       * pointer equals to 0.  That might be a better format to play nice
175       * with certain code / code generators.
176       */
177      .shared_addr_format = nir_address_format_32bit_offset,
178      .debug = {
179         .func = anv_spirv_nir_debug,
180         .private_data = &spirv_debug_data,
181      },
182   };
183
184
185   nir_shader *nir =
186      spirv_to_nir(spirv, module->size / 4,
187                   spec_entries, num_spec_entries,
188                   stage, entrypoint_name, &spirv_options, nir_options);
189   if (!nir) {
190      free(spec_entries);
191      return NULL;
192   }
193
194   assert(nir->info.stage == stage);
195   nir_validate_shader(nir, "after spirv_to_nir");
196   nir_validate_ssa_dominance(nir, "after spirv_to_nir");
197   ralloc_steal(mem_ctx, nir);
198
199   free(spec_entries);
200
201   const struct nir_lower_sysvals_to_varyings_options sysvals_to_varyings = {
202      .point_coord = true,
203   };
204   NIR_PASS_V(nir, nir_lower_sysvals_to_varyings, &sysvals_to_varyings);
205
206   if (INTEL_DEBUG(intel_debug_flag_for_shader_stage(stage))) {
207      fprintf(stderr, "NIR (from SPIR-V) for %s shader:\n",
208              gl_shader_stage_name(stage));
209      nir_print_shader(nir, stderr);
210   }
211
212   /* We have to lower away local constant initializers right before we
213    * inline functions.  That way they get properly initialized at the top
214    * of the function and not at the top of its caller.
215    */
216   NIR_PASS_V(nir, nir_lower_variable_initializers, nir_var_function_temp);
217   NIR_PASS_V(nir, nir_lower_returns);
218   NIR_PASS_V(nir, nir_inline_functions);
219   NIR_PASS_V(nir, nir_copy_prop);
220   NIR_PASS_V(nir, nir_opt_deref);
221
222   /* Pick off the single entrypoint that we want */
223   foreach_list_typed_safe(nir_function, func, node, &nir->functions) {
224      if (!func->is_entrypoint)
225         exec_node_remove(&func->node);
226   }
227   assert(exec_list_length(&nir->functions) == 1);
228
229   /* Now that we've deleted all but the main function, we can go ahead and
230    * lower the rest of the constant initializers.  We do this here so that
231    * nir_remove_dead_variables and split_per_member_structs below see the
232    * corresponding stores.
233    */
234   NIR_PASS_V(nir, nir_lower_variable_initializers, ~0);
235
236   /* Split member structs.  We do this before lower_io_to_temporaries so that
237    * it doesn't lower system values to temporaries by accident.
238    */
239   NIR_PASS_V(nir, nir_split_var_copies);
240   NIR_PASS_V(nir, nir_split_per_member_structs);
241
242   NIR_PASS_V(nir, nir_remove_dead_variables,
243              nir_var_shader_in | nir_var_shader_out | nir_var_system_value |
244              nir_var_shader_call_data | nir_var_ray_hit_attrib,
245              NULL);
246
247   NIR_PASS_V(nir, nir_propagate_invariant, false);
248   NIR_PASS_V(nir, nir_lower_io_to_temporaries,
249              nir_shader_get_entrypoint(nir), true, false);
250
251   NIR_PASS_V(nir, nir_lower_frexp);
252
253   /* Vulkan uses the separate-shader linking model */
254   nir->info.separate_shader = true;
255
256   brw_preprocess_nir(compiler, nir, NULL);
257
258   return nir;
259}
260
261VkResult
262anv_pipeline_init(struct anv_pipeline *pipeline,
263                  struct anv_device *device,
264                  enum anv_pipeline_type type,
265                  VkPipelineCreateFlags flags,
266                  const VkAllocationCallbacks *pAllocator)
267{
268   VkResult result;
269
270   memset(pipeline, 0, sizeof(*pipeline));
271
272   vk_object_base_init(&device->vk, &pipeline->base,
273                       VK_OBJECT_TYPE_PIPELINE);
274   pipeline->device = device;
275
276   /* It's the job of the child class to provide actual backing storage for
277    * the batch by setting batch.start, batch.next, and batch.end.
278    */
279   pipeline->batch.alloc = pAllocator ? pAllocator : &device->vk.alloc;
280   pipeline->batch.relocs = &pipeline->batch_relocs;
281   pipeline->batch.status = VK_SUCCESS;
282
283   result = anv_reloc_list_init(&pipeline->batch_relocs,
284                                pipeline->batch.alloc);
285   if (result != VK_SUCCESS)
286      return result;
287
288   pipeline->mem_ctx = ralloc_context(NULL);
289
290   pipeline->type = type;
291   pipeline->flags = flags;
292
293   util_dynarray_init(&pipeline->executables, pipeline->mem_ctx);
294
295   return VK_SUCCESS;
296}
297
298void
299anv_pipeline_finish(struct anv_pipeline *pipeline,
300                    struct anv_device *device,
301                    const VkAllocationCallbacks *pAllocator)
302{
303   anv_reloc_list_finish(&pipeline->batch_relocs,
304                         pAllocator ? pAllocator : &device->vk.alloc);
305   ralloc_free(pipeline->mem_ctx);
306   vk_object_base_finish(&pipeline->base);
307}
308
309void anv_DestroyPipeline(
310    VkDevice                                    _device,
311    VkPipeline                                  _pipeline,
312    const VkAllocationCallbacks*                pAllocator)
313{
314   ANV_FROM_HANDLE(anv_device, device, _device);
315   ANV_FROM_HANDLE(anv_pipeline, pipeline, _pipeline);
316
317   if (!pipeline)
318      return;
319
320   switch (pipeline->type) {
321   case ANV_PIPELINE_GRAPHICS: {
322      struct anv_graphics_pipeline *gfx_pipeline =
323         anv_pipeline_to_graphics(pipeline);
324
325      if (gfx_pipeline->blend_state.map)
326         anv_state_pool_free(&device->dynamic_state_pool, gfx_pipeline->blend_state);
327      if (gfx_pipeline->cps_state.map)
328         anv_state_pool_free(&device->dynamic_state_pool, gfx_pipeline->cps_state);
329
330      for (unsigned s = 0; s < ARRAY_SIZE(gfx_pipeline->shaders); s++) {
331         if (gfx_pipeline->shaders[s])
332            anv_shader_bin_unref(device, gfx_pipeline->shaders[s]);
333      }
334      break;
335   }
336
337   case ANV_PIPELINE_COMPUTE: {
338      struct anv_compute_pipeline *compute_pipeline =
339         anv_pipeline_to_compute(pipeline);
340
341      if (compute_pipeline->cs)
342         anv_shader_bin_unref(device, compute_pipeline->cs);
343
344      break;
345   }
346
347   case ANV_PIPELINE_RAY_TRACING: {
348      struct anv_ray_tracing_pipeline *rt_pipeline =
349         anv_pipeline_to_ray_tracing(pipeline);
350
351      util_dynarray_foreach(&rt_pipeline->shaders,
352                            struct anv_shader_bin *, shader) {
353         anv_shader_bin_unref(device, *shader);
354      }
355      break;
356   }
357
358   default:
359      unreachable("invalid pipeline type");
360   }
361
362   anv_pipeline_finish(pipeline, device, pAllocator);
363   vk_free2(&device->vk.alloc, pAllocator, pipeline);
364}
365
366static const uint32_t vk_to_intel_primitive_type[] = {
367   [VK_PRIMITIVE_TOPOLOGY_POINT_LIST]                    = _3DPRIM_POINTLIST,
368   [VK_PRIMITIVE_TOPOLOGY_LINE_LIST]                     = _3DPRIM_LINELIST,
369   [VK_PRIMITIVE_TOPOLOGY_LINE_STRIP]                    = _3DPRIM_LINESTRIP,
370   [VK_PRIMITIVE_TOPOLOGY_TRIANGLE_LIST]                 = _3DPRIM_TRILIST,
371   [VK_PRIMITIVE_TOPOLOGY_TRIANGLE_STRIP]                = _3DPRIM_TRISTRIP,
372   [VK_PRIMITIVE_TOPOLOGY_TRIANGLE_FAN]                  = _3DPRIM_TRIFAN,
373   [VK_PRIMITIVE_TOPOLOGY_LINE_LIST_WITH_ADJACENCY]      = _3DPRIM_LINELIST_ADJ,
374   [VK_PRIMITIVE_TOPOLOGY_LINE_STRIP_WITH_ADJACENCY]     = _3DPRIM_LINESTRIP_ADJ,
375   [VK_PRIMITIVE_TOPOLOGY_TRIANGLE_LIST_WITH_ADJACENCY]  = _3DPRIM_TRILIST_ADJ,
376   [VK_PRIMITIVE_TOPOLOGY_TRIANGLE_STRIP_WITH_ADJACENCY] = _3DPRIM_TRISTRIP_ADJ,
377};
378
379static void
380populate_sampler_prog_key(const struct intel_device_info *devinfo,
381                          struct brw_sampler_prog_key_data *key)
382{
383   /* Almost all multisampled textures are compressed.  The only time when we
384    * don't compress a multisampled texture is for 16x MSAA with a surface
385    * width greater than 8k which is a bit of an edge case.  Since the sampler
386    * just ignores the MCS parameter to ld2ms when MCS is disabled, it's safe
387    * to tell the compiler to always assume compression.
388    */
389   key->compressed_multisample_layout_mask = ~0;
390
391   /* SkyLake added support for 16x MSAA.  With this came a new message for
392    * reading from a 16x MSAA surface with compression.  The new message was
393    * needed because now the MCS data is 64 bits instead of 32 or lower as is
394    * the case for 8x, 4x, and 2x.  The key->msaa_16 bit-field controls which
395    * message we use.  Fortunately, the 16x message works for 8x, 4x, and 2x
396    * so we can just use it unconditionally.  This may not be quite as
397    * efficient but it saves us from recompiling.
398    */
399   if (devinfo->ver >= 9)
400      key->msaa_16 = ~0;
401
402   /* XXX: Handle texture swizzle on HSW- */
403   for (int i = 0; i < MAX_SAMPLERS; i++) {
404      /* Assume color sampler, no swizzling. (Works for BDW+) */
405      key->swizzles[i] = SWIZZLE_XYZW;
406   }
407}
408
409static void
410populate_base_prog_key(const struct intel_device_info *devinfo,
411                       enum brw_subgroup_size_type subgroup_size_type,
412                       bool robust_buffer_acccess,
413                       struct brw_base_prog_key *key)
414{
415   key->subgroup_size_type = subgroup_size_type;
416   key->robust_buffer_access = robust_buffer_acccess;
417
418   populate_sampler_prog_key(devinfo, &key->tex);
419}
420
421static void
422populate_vs_prog_key(const struct intel_device_info *devinfo,
423                     enum brw_subgroup_size_type subgroup_size_type,
424                     bool robust_buffer_acccess,
425                     struct brw_vs_prog_key *key)
426{
427   memset(key, 0, sizeof(*key));
428
429   populate_base_prog_key(devinfo, subgroup_size_type,
430                          robust_buffer_acccess, &key->base);
431
432   /* XXX: Handle vertex input work-arounds */
433
434   /* XXX: Handle sampler_prog_key */
435}
436
437static void
438populate_tcs_prog_key(const struct intel_device_info *devinfo,
439                      enum brw_subgroup_size_type subgroup_size_type,
440                      bool robust_buffer_acccess,
441                      unsigned input_vertices,
442                      struct brw_tcs_prog_key *key)
443{
444   memset(key, 0, sizeof(*key));
445
446   populate_base_prog_key(devinfo, subgroup_size_type,
447                          robust_buffer_acccess, &key->base);
448
449   key->input_vertices = input_vertices;
450}
451
452static void
453populate_tes_prog_key(const struct intel_device_info *devinfo,
454                      enum brw_subgroup_size_type subgroup_size_type,
455                      bool robust_buffer_acccess,
456                      struct brw_tes_prog_key *key)
457{
458   memset(key, 0, sizeof(*key));
459
460   populate_base_prog_key(devinfo, subgroup_size_type,
461                          robust_buffer_acccess, &key->base);
462}
463
464static void
465populate_gs_prog_key(const struct intel_device_info *devinfo,
466                     enum brw_subgroup_size_type subgroup_size_type,
467                     bool robust_buffer_acccess,
468                     struct brw_gs_prog_key *key)
469{
470   memset(key, 0, sizeof(*key));
471
472   populate_base_prog_key(devinfo, subgroup_size_type,
473                          robust_buffer_acccess, &key->base);
474}
475
476static bool
477pipeline_has_coarse_pixel(const struct anv_graphics_pipeline *pipeline,
478                          const VkPipelineFragmentShadingRateStateCreateInfoKHR *fsr_info)
479{
480   if (pipeline->sample_shading_enable)
481      return false;
482
483   /* Not dynamic & not specified for the pipeline. */
484   if ((pipeline->dynamic_states & ANV_CMD_DIRTY_DYNAMIC_SHADING_RATE) == 0 && !fsr_info)
485      return false;
486
487   /* Not dynamic & pipeline has a 1x1 fragment shading rate with no
488    * possibility for element of the pipeline to change the value.
489    */
490   if ((pipeline->dynamic_states & ANV_CMD_DIRTY_DYNAMIC_SHADING_RATE) == 0 &&
491       fsr_info->fragmentSize.width <= 1 &&
492       fsr_info->fragmentSize.height <= 1 &&
493       fsr_info->combinerOps[0] == VK_FRAGMENT_SHADING_RATE_COMBINER_OP_KEEP_KHR &&
494       fsr_info->combinerOps[1] == VK_FRAGMENT_SHADING_RATE_COMBINER_OP_KEEP_KHR)
495      return false;
496
497   return true;
498}
499
500static void
501populate_wm_prog_key(const struct anv_graphics_pipeline *pipeline,
502                     VkPipelineShaderStageCreateFlags flags,
503                     bool robust_buffer_acccess,
504                     const struct anv_subpass *subpass,
505                     const VkPipelineMultisampleStateCreateInfo *ms_info,
506                     const VkPipelineFragmentShadingRateStateCreateInfoKHR *fsr_info,
507                     struct brw_wm_prog_key *key)
508{
509   const struct anv_device *device = pipeline->base.device;
510   const struct intel_device_info *devinfo = &device->info;
511
512   memset(key, 0, sizeof(*key));
513
514   populate_base_prog_key(devinfo, flags, robust_buffer_acccess, &key->base);
515
516   /* We set this to 0 here and set to the actual value before we call
517    * brw_compile_fs.
518    */
519   key->input_slots_valid = 0;
520
521   /* Vulkan doesn't specify a default */
522   key->high_quality_derivatives = false;
523
524   /* XXX Vulkan doesn't appear to specify */
525   key->clamp_fragment_color = false;
526
527   key->ignore_sample_mask_out = false;
528
529   assert(subpass->color_count <= MAX_RTS);
530   for (uint32_t i = 0; i < subpass->color_count; i++) {
531      if (subpass->color_attachments[i].attachment != VK_ATTACHMENT_UNUSED)
532         key->color_outputs_valid |= (1 << i);
533   }
534
535   key->nr_color_regions = subpass->color_count;
536
537   /* To reduce possible shader recompilations we would need to know if
538    * there is a SampleMask output variable to compute if we should emit
539    * code to workaround the issue that hardware disables alpha to coverage
540    * when there is SampleMask output.
541    */
542   key->alpha_to_coverage = ms_info && ms_info->alphaToCoverageEnable;
543
544   /* Vulkan doesn't support fixed-function alpha test */
545   key->alpha_test_replicate_alpha = false;
546
547   if (ms_info) {
548      /* We should probably pull this out of the shader, but it's fairly
549       * harmless to compute it and then let dead-code take care of it.
550       */
551      if (ms_info->rasterizationSamples > 1) {
552         key->persample_interp = ms_info->sampleShadingEnable &&
553            (ms_info->minSampleShading * ms_info->rasterizationSamples) > 1;
554         key->multisample_fbo = true;
555      }
556
557      key->frag_coord_adds_sample_pos = key->persample_interp;
558   }
559
560   key->coarse_pixel =
561      device->vk.enabled_extensions.KHR_fragment_shading_rate &&
562      pipeline_has_coarse_pixel(pipeline, fsr_info);
563}
564
565static void
566populate_cs_prog_key(const struct intel_device_info *devinfo,
567                     enum brw_subgroup_size_type subgroup_size_type,
568                     bool robust_buffer_acccess,
569                     struct brw_cs_prog_key *key)
570{
571   memset(key, 0, sizeof(*key));
572
573   populate_base_prog_key(devinfo, subgroup_size_type,
574                          robust_buffer_acccess, &key->base);
575}
576
577static void
578populate_bs_prog_key(const struct intel_device_info *devinfo,
579                     VkPipelineShaderStageCreateFlags flags,
580                     bool robust_buffer_access,
581                     struct brw_bs_prog_key *key)
582{
583   memset(key, 0, sizeof(*key));
584
585   populate_base_prog_key(devinfo, flags, robust_buffer_access, &key->base);
586}
587
588struct anv_pipeline_stage {
589   gl_shader_stage stage;
590
591   const struct vk_shader_module *module;
592   const char *entrypoint;
593   const VkSpecializationInfo *spec_info;
594
595   unsigned char shader_sha1[20];
596
597   union brw_any_prog_key key;
598
599   struct {
600      gl_shader_stage stage;
601      unsigned char sha1[20];
602   } cache_key;
603
604   nir_shader *nir;
605
606   struct anv_pipeline_binding surface_to_descriptor[256];
607   struct anv_pipeline_binding sampler_to_descriptor[256];
608   struct anv_pipeline_bind_map bind_map;
609
610   union brw_any_prog_data prog_data;
611
612   uint32_t num_stats;
613   struct brw_compile_stats stats[3];
614   char *disasm[3];
615
616   VkPipelineCreationFeedbackEXT feedback;
617
618   const unsigned *code;
619
620   struct anv_shader_bin *bin;
621};
622
623static void
624anv_pipeline_hash_shader(const struct vk_shader_module *module,
625                         const char *entrypoint,
626                         gl_shader_stage stage,
627                         const VkSpecializationInfo *spec_info,
628                         unsigned char *sha1_out)
629{
630   struct mesa_sha1 ctx;
631   _mesa_sha1_init(&ctx);
632
633   _mesa_sha1_update(&ctx, module->sha1, sizeof(module->sha1));
634   _mesa_sha1_update(&ctx, entrypoint, strlen(entrypoint));
635   _mesa_sha1_update(&ctx, &stage, sizeof(stage));
636   if (spec_info) {
637      _mesa_sha1_update(&ctx, spec_info->pMapEntries,
638                        spec_info->mapEntryCount *
639                        sizeof(*spec_info->pMapEntries));
640      _mesa_sha1_update(&ctx, spec_info->pData,
641                        spec_info->dataSize);
642   }
643
644   _mesa_sha1_final(&ctx, sha1_out);
645}
646
647static void
648anv_pipeline_hash_graphics(struct anv_graphics_pipeline *pipeline,
649                           struct anv_pipeline_layout *layout,
650                           struct anv_pipeline_stage *stages,
651                           unsigned char *sha1_out)
652{
653   struct mesa_sha1 ctx;
654   _mesa_sha1_init(&ctx);
655
656   _mesa_sha1_update(&ctx, &pipeline->subpass->view_mask,
657                     sizeof(pipeline->subpass->view_mask));
658
659   if (layout)
660      _mesa_sha1_update(&ctx, layout->sha1, sizeof(layout->sha1));
661
662   const bool rba = pipeline->base.device->robust_buffer_access;
663   _mesa_sha1_update(&ctx, &rba, sizeof(rba));
664
665   for (unsigned s = 0; s < ARRAY_SIZE(pipeline->shaders); s++) {
666      if (stages[s].entrypoint) {
667         _mesa_sha1_update(&ctx, stages[s].shader_sha1,
668                           sizeof(stages[s].shader_sha1));
669         _mesa_sha1_update(&ctx, &stages[s].key, brw_prog_key_size(s));
670      }
671   }
672
673   _mesa_sha1_final(&ctx, sha1_out);
674}
675
676static void
677anv_pipeline_hash_compute(struct anv_compute_pipeline *pipeline,
678                          struct anv_pipeline_layout *layout,
679                          struct anv_pipeline_stage *stage,
680                          unsigned char *sha1_out)
681{
682   struct mesa_sha1 ctx;
683   _mesa_sha1_init(&ctx);
684
685   if (layout)
686      _mesa_sha1_update(&ctx, layout->sha1, sizeof(layout->sha1));
687
688   const bool rba = pipeline->base.device->robust_buffer_access;
689   _mesa_sha1_update(&ctx, &rba, sizeof(rba));
690
691   _mesa_sha1_update(&ctx, stage->shader_sha1,
692                     sizeof(stage->shader_sha1));
693   _mesa_sha1_update(&ctx, &stage->key.cs, sizeof(stage->key.cs));
694
695   _mesa_sha1_final(&ctx, sha1_out);
696}
697
698static void
699anv_pipeline_hash_ray_tracing_shader(struct anv_ray_tracing_pipeline *pipeline,
700                                     struct anv_pipeline_layout *layout,
701                                     struct anv_pipeline_stage *stage,
702                                     unsigned char *sha1_out)
703{
704   struct mesa_sha1 ctx;
705   _mesa_sha1_init(&ctx);
706
707   if (layout != NULL)
708      _mesa_sha1_update(&ctx, layout->sha1, sizeof(layout->sha1));
709
710   const bool rba = pipeline->base.device->robust_buffer_access;
711   _mesa_sha1_update(&ctx, &rba, sizeof(rba));
712
713   _mesa_sha1_update(&ctx, stage->shader_sha1, sizeof(stage->shader_sha1));
714   _mesa_sha1_update(&ctx, &stage->key, sizeof(stage->key.bs));
715
716   _mesa_sha1_final(&ctx, sha1_out);
717}
718
719static void
720anv_pipeline_hash_ray_tracing_combined_shader(struct anv_ray_tracing_pipeline *pipeline,
721                                              struct anv_pipeline_layout *layout,
722                                              struct anv_pipeline_stage *intersection,
723                                              struct anv_pipeline_stage *any_hit,
724                                              unsigned char *sha1_out)
725{
726   struct mesa_sha1 ctx;
727   _mesa_sha1_init(&ctx);
728
729   if (layout != NULL)
730      _mesa_sha1_update(&ctx, layout->sha1, sizeof(layout->sha1));
731
732   const bool rba = pipeline->base.device->robust_buffer_access;
733   _mesa_sha1_update(&ctx, &rba, sizeof(rba));
734
735   _mesa_sha1_update(&ctx, intersection->shader_sha1, sizeof(intersection->shader_sha1));
736   _mesa_sha1_update(&ctx, &intersection->key, sizeof(intersection->key.bs));
737   _mesa_sha1_update(&ctx, any_hit->shader_sha1, sizeof(any_hit->shader_sha1));
738   _mesa_sha1_update(&ctx, &any_hit->key, sizeof(any_hit->key.bs));
739
740   _mesa_sha1_final(&ctx, sha1_out);
741}
742
743static nir_shader *
744anv_pipeline_stage_get_nir(struct anv_pipeline *pipeline,
745                           struct anv_pipeline_cache *cache,
746                           void *mem_ctx,
747                           struct anv_pipeline_stage *stage)
748{
749   const struct brw_compiler *compiler =
750      pipeline->device->physical->compiler;
751   const nir_shader_compiler_options *nir_options =
752      compiler->glsl_compiler_options[stage->stage].NirOptions;
753   nir_shader *nir;
754
755   nir = anv_device_search_for_nir(pipeline->device, cache,
756                                   nir_options,
757                                   stage->shader_sha1,
758                                   mem_ctx);
759   if (nir) {
760      assert(nir->info.stage == stage->stage);
761      return nir;
762   }
763
764   nir = anv_shader_compile_to_nir(pipeline->device,
765                                   mem_ctx,
766                                   stage->module,
767                                   stage->entrypoint,
768                                   stage->stage,
769                                   stage->spec_info);
770   if (nir) {
771      anv_device_upload_nir(pipeline->device, cache, nir, stage->shader_sha1);
772      return nir;
773   }
774
775   return NULL;
776}
777
778static void
779shared_type_info(const struct glsl_type *type, unsigned *size, unsigned *align)
780{
781   assert(glsl_type_is_vector_or_scalar(type));
782
783   uint32_t comp_size = glsl_type_is_boolean(type)
784      ? 4 : glsl_get_bit_size(type) / 8;
785   unsigned length = glsl_get_vector_elements(type);
786   *size = comp_size * length,
787   *align = comp_size * (length == 3 ? 4 : length);
788}
789
790static void
791anv_pipeline_lower_nir(struct anv_pipeline *pipeline,
792                       void *mem_ctx,
793                       struct anv_pipeline_stage *stage,
794                       struct anv_pipeline_layout *layout)
795{
796   const struct anv_physical_device *pdevice = pipeline->device->physical;
797   const struct brw_compiler *compiler = pdevice->compiler;
798
799   struct brw_stage_prog_data *prog_data = &stage->prog_data.base;
800   nir_shader *nir = stage->nir;
801
802   if (nir->info.stage == MESA_SHADER_FRAGMENT) {
803      /* Check if sample shading is enabled in the shader and toggle
804       * it on for the pipeline independent if sampleShadingEnable is set.
805       */
806      nir_shader_gather_info(nir, nir_shader_get_entrypoint(nir));
807      if (nir->info.fs.uses_sample_shading)
808         anv_pipeline_to_graphics(pipeline)->sample_shading_enable = true;
809
810      NIR_PASS_V(nir, nir_lower_wpos_center,
811                 anv_pipeline_to_graphics(pipeline)->sample_shading_enable);
812      NIR_PASS_V(nir, nir_lower_input_attachments,
813                 &(nir_input_attachment_options) {
814                     .use_fragcoord_sysval = true,
815                     .use_layer_id_sysval = true,
816                 });
817   }
818
819   NIR_PASS_V(nir, anv_nir_lower_ycbcr_textures, layout);
820
821   if (pipeline->type == ANV_PIPELINE_GRAPHICS) {
822      NIR_PASS_V(nir, anv_nir_lower_multiview,
823                 anv_pipeline_to_graphics(pipeline));
824   }
825
826   nir_shader_gather_info(nir, nir_shader_get_entrypoint(nir));
827
828   NIR_PASS_V(nir, brw_nir_lower_storage_image, compiler->devinfo);
829
830   NIR_PASS_V(nir, nir_lower_explicit_io, nir_var_mem_global,
831              nir_address_format_64bit_global);
832   NIR_PASS_V(nir, nir_lower_explicit_io, nir_var_mem_push_const,
833              nir_address_format_32bit_offset);
834
835   /* Apply the actual pipeline layout to UBOs, SSBOs, and textures */
836   anv_nir_apply_pipeline_layout(pdevice,
837                                 pipeline->device->robust_buffer_access,
838                                 layout, nir, &stage->bind_map);
839
840   NIR_PASS_V(nir, nir_lower_explicit_io, nir_var_mem_ubo,
841              anv_nir_ubo_addr_format(pdevice,
842                 pipeline->device->robust_buffer_access));
843   NIR_PASS_V(nir, nir_lower_explicit_io, nir_var_mem_ssbo,
844              anv_nir_ssbo_addr_format(pdevice,
845                 pipeline->device->robust_buffer_access));
846
847   /* First run copy-prop to get rid of all of the vec() that address
848    * calculations often create and then constant-fold so that, when we
849    * get to anv_nir_lower_ubo_loads, we can detect constant offsets.
850    */
851   NIR_PASS_V(nir, nir_copy_prop);
852   NIR_PASS_V(nir, nir_opt_constant_folding);
853
854   NIR_PASS_V(nir, anv_nir_lower_ubo_loads);
855
856   /* We don't support non-uniform UBOs and non-uniform SSBO access is
857    * handled naturally by falling back to A64 messages.
858    */
859   NIR_PASS_V(nir, nir_lower_non_uniform_access,
860              &(nir_lower_non_uniform_access_options) {
861                  .types = nir_lower_non_uniform_texture_access |
862                           nir_lower_non_uniform_image_access,
863                  .callback = NULL,
864              });
865
866   anv_nir_compute_push_layout(pdevice, pipeline->device->robust_buffer_access,
867                               nir, prog_data, &stage->bind_map, mem_ctx);
868
869   if (gl_shader_stage_uses_workgroup(nir->info.stage)) {
870      if (!nir->info.shared_memory_explicit_layout) {
871         NIR_PASS_V(nir, nir_lower_vars_to_explicit_types,
872                    nir_var_mem_shared, shared_type_info);
873      }
874
875      NIR_PASS_V(nir, nir_lower_explicit_io,
876                 nir_var_mem_shared, nir_address_format_32bit_offset);
877
878      if (nir->info.zero_initialize_shared_memory &&
879          nir->info.shared_size > 0) {
880         /* The effective Shared Local Memory size is at least 1024 bytes and
881          * is always rounded to a power of two, so it is OK to align the size
882          * used by the shader to chunk_size -- which does simplify the logic.
883          */
884         const unsigned chunk_size = 16;
885         const unsigned shared_size = ALIGN(nir->info.shared_size, chunk_size);
886         assert(shared_size <=
887                intel_calculate_slm_size(compiler->devinfo->ver, nir->info.shared_size));
888
889         NIR_PASS_V(nir, nir_zero_initialize_shared_memory,
890                    shared_size, chunk_size);
891      }
892   }
893
894   stage->nir = nir;
895}
896
897static void
898anv_pipeline_link_vs(const struct brw_compiler *compiler,
899                     struct anv_pipeline_stage *vs_stage,
900                     struct anv_pipeline_stage *next_stage)
901{
902   if (next_stage)
903      brw_nir_link_shaders(compiler, vs_stage->nir, next_stage->nir);
904}
905
906static void
907anv_pipeline_compile_vs(const struct brw_compiler *compiler,
908                        void *mem_ctx,
909                        struct anv_graphics_pipeline *pipeline,
910                        struct anv_pipeline_stage *vs_stage)
911{
912   /* When using Primitive Replication for multiview, each view gets its own
913    * position slot.
914    */
915   uint32_t pos_slots = pipeline->use_primitive_replication ?
916      anv_subpass_view_count(pipeline->subpass) : 1;
917
918   brw_compute_vue_map(compiler->devinfo,
919                       &vs_stage->prog_data.vs.base.vue_map,
920                       vs_stage->nir->info.outputs_written,
921                       vs_stage->nir->info.separate_shader,
922                       pos_slots);
923
924   vs_stage->num_stats = 1;
925
926   struct brw_compile_vs_params params = {
927      .nir = vs_stage->nir,
928      .key = &vs_stage->key.vs,
929      .prog_data = &vs_stage->prog_data.vs,
930      .stats = vs_stage->stats,
931      .log_data = pipeline->base.device,
932   };
933
934   vs_stage->code = brw_compile_vs(compiler, mem_ctx, &params);
935}
936
937static void
938merge_tess_info(struct shader_info *tes_info,
939                const struct shader_info *tcs_info)
940{
941   /* The Vulkan 1.0.38 spec, section 21.1 Tessellator says:
942    *
943    *    "PointMode. Controls generation of points rather than triangles
944    *     or lines. This functionality defaults to disabled, and is
945    *     enabled if either shader stage includes the execution mode.
946    *
947    * and about Triangles, Quads, IsoLines, VertexOrderCw, VertexOrderCcw,
948    * PointMode, SpacingEqual, SpacingFractionalEven, SpacingFractionalOdd,
949    * and OutputVertices, it says:
950    *
951    *    "One mode must be set in at least one of the tessellation
952    *     shader stages."
953    *
954    * So, the fields can be set in either the TCS or TES, but they must
955    * agree if set in both.  Our backend looks at TES, so bitwise-or in
956    * the values from the TCS.
957    */
958   assert(tcs_info->tess.tcs_vertices_out == 0 ||
959          tes_info->tess.tcs_vertices_out == 0 ||
960          tcs_info->tess.tcs_vertices_out == tes_info->tess.tcs_vertices_out);
961   tes_info->tess.tcs_vertices_out |= tcs_info->tess.tcs_vertices_out;
962
963   assert(tcs_info->tess.spacing == TESS_SPACING_UNSPECIFIED ||
964          tes_info->tess.spacing == TESS_SPACING_UNSPECIFIED ||
965          tcs_info->tess.spacing == tes_info->tess.spacing);
966   tes_info->tess.spacing |= tcs_info->tess.spacing;
967
968   assert(tcs_info->tess.primitive_mode == 0 ||
969          tes_info->tess.primitive_mode == 0 ||
970          tcs_info->tess.primitive_mode == tes_info->tess.primitive_mode);
971   tes_info->tess.primitive_mode |= tcs_info->tess.primitive_mode;
972   tes_info->tess.ccw |= tcs_info->tess.ccw;
973   tes_info->tess.point_mode |= tcs_info->tess.point_mode;
974}
975
976static void
977anv_pipeline_link_tcs(const struct brw_compiler *compiler,
978                      struct anv_pipeline_stage *tcs_stage,
979                      struct anv_pipeline_stage *tes_stage)
980{
981   assert(tes_stage && tes_stage->stage == MESA_SHADER_TESS_EVAL);
982
983   brw_nir_link_shaders(compiler, tcs_stage->nir, tes_stage->nir);
984
985   nir_lower_patch_vertices(tes_stage->nir,
986                            tcs_stage->nir->info.tess.tcs_vertices_out,
987                            NULL);
988
989   /* Copy TCS info into the TES info */
990   merge_tess_info(&tes_stage->nir->info, &tcs_stage->nir->info);
991
992   /* Whacking the key after cache lookup is a bit sketchy, but all of
993    * this comes from the SPIR-V, which is part of the hash used for the
994    * pipeline cache.  So it should be safe.
995    */
996   tcs_stage->key.tcs.tes_primitive_mode =
997      tes_stage->nir->info.tess.primitive_mode;
998   tcs_stage->key.tcs.quads_workaround =
999      compiler->devinfo->ver < 9 &&
1000      tes_stage->nir->info.tess.primitive_mode == 7 /* GL_QUADS */ &&
1001      tes_stage->nir->info.tess.spacing == TESS_SPACING_EQUAL;
1002}
1003
1004static void
1005anv_pipeline_compile_tcs(const struct brw_compiler *compiler,
1006                         void *mem_ctx,
1007                         struct anv_device *device,
1008                         struct anv_pipeline_stage *tcs_stage,
1009                         struct anv_pipeline_stage *prev_stage)
1010{
1011   tcs_stage->key.tcs.outputs_written =
1012      tcs_stage->nir->info.outputs_written;
1013   tcs_stage->key.tcs.patch_outputs_written =
1014      tcs_stage->nir->info.patch_outputs_written;
1015
1016   tcs_stage->num_stats = 1;
1017   tcs_stage->code = brw_compile_tcs(compiler, device, mem_ctx,
1018                                     &tcs_stage->key.tcs,
1019                                     &tcs_stage->prog_data.tcs,
1020                                     tcs_stage->nir, -1,
1021                                     tcs_stage->stats, NULL);
1022}
1023
1024static void
1025anv_pipeline_link_tes(const struct brw_compiler *compiler,
1026                      struct anv_pipeline_stage *tes_stage,
1027                      struct anv_pipeline_stage *next_stage)
1028{
1029   if (next_stage)
1030      brw_nir_link_shaders(compiler, tes_stage->nir, next_stage->nir);
1031}
1032
1033static void
1034anv_pipeline_compile_tes(const struct brw_compiler *compiler,
1035                         void *mem_ctx,
1036                         struct anv_device *device,
1037                         struct anv_pipeline_stage *tes_stage,
1038                         struct anv_pipeline_stage *tcs_stage)
1039{
1040   tes_stage->key.tes.inputs_read =
1041      tcs_stage->nir->info.outputs_written;
1042   tes_stage->key.tes.patch_inputs_read =
1043      tcs_stage->nir->info.patch_outputs_written;
1044
1045   tes_stage->num_stats = 1;
1046   tes_stage->code = brw_compile_tes(compiler, device, mem_ctx,
1047                                     &tes_stage->key.tes,
1048                                     &tcs_stage->prog_data.tcs.base.vue_map,
1049                                     &tes_stage->prog_data.tes,
1050                                     tes_stage->nir, -1,
1051                                     tes_stage->stats, NULL);
1052}
1053
1054static void
1055anv_pipeline_link_gs(const struct brw_compiler *compiler,
1056                     struct anv_pipeline_stage *gs_stage,
1057                     struct anv_pipeline_stage *next_stage)
1058{
1059   if (next_stage)
1060      brw_nir_link_shaders(compiler, gs_stage->nir, next_stage->nir);
1061}
1062
1063static void
1064anv_pipeline_compile_gs(const struct brw_compiler *compiler,
1065                        void *mem_ctx,
1066                        struct anv_device *device,
1067                        struct anv_pipeline_stage *gs_stage,
1068                        struct anv_pipeline_stage *prev_stage)
1069{
1070   brw_compute_vue_map(compiler->devinfo,
1071                       &gs_stage->prog_data.gs.base.vue_map,
1072                       gs_stage->nir->info.outputs_written,
1073                       gs_stage->nir->info.separate_shader, 1);
1074
1075   gs_stage->num_stats = 1;
1076   gs_stage->code = brw_compile_gs(compiler, device, mem_ctx,
1077                                   &gs_stage->key.gs,
1078                                   &gs_stage->prog_data.gs,
1079                                   gs_stage->nir, -1,
1080                                   gs_stage->stats, NULL);
1081}
1082
1083static void
1084anv_pipeline_link_fs(const struct brw_compiler *compiler,
1085                     struct anv_pipeline_stage *stage)
1086{
1087   unsigned num_rt_bindings;
1088   struct anv_pipeline_binding rt_bindings[MAX_RTS];
1089   if (stage->key.wm.nr_color_regions > 0) {
1090      assert(stage->key.wm.nr_color_regions <= MAX_RTS);
1091      for (unsigned rt = 0; rt < stage->key.wm.nr_color_regions; rt++) {
1092         if (stage->key.wm.color_outputs_valid & BITFIELD_BIT(rt)) {
1093            rt_bindings[rt] = (struct anv_pipeline_binding) {
1094               .set = ANV_DESCRIPTOR_SET_COLOR_ATTACHMENTS,
1095               .index = rt,
1096            };
1097         } else {
1098            /* Setup a null render target */
1099            rt_bindings[rt] = (struct anv_pipeline_binding) {
1100               .set = ANV_DESCRIPTOR_SET_COLOR_ATTACHMENTS,
1101               .index = UINT32_MAX,
1102            };
1103         }
1104      }
1105      num_rt_bindings = stage->key.wm.nr_color_regions;
1106   } else {
1107      /* Setup a null render target */
1108      rt_bindings[0] = (struct anv_pipeline_binding) {
1109         .set = ANV_DESCRIPTOR_SET_COLOR_ATTACHMENTS,
1110         .index = UINT32_MAX,
1111      };
1112      num_rt_bindings = 1;
1113   }
1114
1115   assert(num_rt_bindings <= MAX_RTS);
1116   assert(stage->bind_map.surface_count == 0);
1117   typed_memcpy(stage->bind_map.surface_to_descriptor,
1118                rt_bindings, num_rt_bindings);
1119   stage->bind_map.surface_count += num_rt_bindings;
1120
1121   /* Now that we've set up the color attachments, we can go through and
1122    * eliminate any shader outputs that map to VK_ATTACHMENT_UNUSED in the
1123    * hopes that dead code can clean them up in this and any earlier shader
1124    * stages.
1125    */
1126   nir_function_impl *impl = nir_shader_get_entrypoint(stage->nir);
1127   bool deleted_output = false;
1128   nir_foreach_shader_out_variable_safe(var, stage->nir) {
1129      /* TODO: We don't delete depth/stencil writes.  We probably could if the
1130       * subpass doesn't have a depth/stencil attachment.
1131       */
1132      if (var->data.location < FRAG_RESULT_DATA0)
1133         continue;
1134
1135      const unsigned rt = var->data.location - FRAG_RESULT_DATA0;
1136
1137      /* If this is the RT at location 0 and we have alpha to coverage
1138       * enabled we still need that write because it will affect the coverage
1139       * mask even if it's never written to a color target.
1140       */
1141      if (rt == 0 && stage->key.wm.alpha_to_coverage)
1142         continue;
1143
1144      const unsigned array_len =
1145         glsl_type_is_array(var->type) ? glsl_get_length(var->type) : 1;
1146      assert(rt + array_len <= MAX_RTS);
1147
1148      if (rt >= MAX_RTS || !(stage->key.wm.color_outputs_valid &
1149                             BITFIELD_RANGE(rt, array_len))) {
1150         deleted_output = true;
1151         var->data.mode = nir_var_function_temp;
1152         exec_node_remove(&var->node);
1153         exec_list_push_tail(&impl->locals, &var->node);
1154      }
1155   }
1156
1157   if (deleted_output)
1158      nir_fixup_deref_modes(stage->nir);
1159
1160   /* Initially the valid outputs value is based off the renderpass color
1161    * attachments (see populate_wm_prog_key()), now that we've potentially
1162    * deleted variables that map to unused attachments, we need to update the
1163    * valid outputs for the backend compiler based on what output variables
1164    * are actually used. */
1165   stage->key.wm.color_outputs_valid = 0;
1166   nir_foreach_shader_out_variable_safe(var, stage->nir) {
1167      if (var->data.location < FRAG_RESULT_DATA0)
1168         continue;
1169
1170      const unsigned rt = var->data.location - FRAG_RESULT_DATA0;
1171      const unsigned array_len =
1172         glsl_type_is_array(var->type) ? glsl_get_length(var->type) : 1;
1173      assert(rt + array_len <= MAX_RTS);
1174
1175      stage->key.wm.color_outputs_valid |= BITFIELD_RANGE(rt, array_len);
1176   }
1177
1178   /* We stored the number of subpass color attachments in nr_color_regions
1179    * when calculating the key for caching.  Now that we've computed the bind
1180    * map, we can reduce this to the actual max before we go into the back-end
1181    * compiler.
1182    */
1183   stage->key.wm.nr_color_regions =
1184      util_last_bit(stage->key.wm.color_outputs_valid);
1185}
1186
1187static void
1188anv_pipeline_compile_fs(const struct brw_compiler *compiler,
1189                        void *mem_ctx,
1190                        struct anv_device *device,
1191                        struct anv_pipeline_stage *fs_stage,
1192                        struct anv_pipeline_stage *prev_stage)
1193{
1194   /* TODO: we could set this to 0 based on the information in nir_shader, but
1195    * we need this before we call spirv_to_nir.
1196    */
1197   assert(prev_stage);
1198   fs_stage->key.wm.input_slots_valid =
1199      prev_stage->prog_data.vue.vue_map.slots_valid;
1200
1201   struct brw_compile_fs_params params = {
1202      .nir = fs_stage->nir,
1203      .key = &fs_stage->key.wm,
1204      .prog_data = &fs_stage->prog_data.wm,
1205
1206      .allow_spilling = true,
1207      .stats = fs_stage->stats,
1208      .log_data = device,
1209   };
1210
1211   fs_stage->code = brw_compile_fs(compiler, mem_ctx, &params);
1212
1213   fs_stage->num_stats = (uint32_t)fs_stage->prog_data.wm.dispatch_8 +
1214                         (uint32_t)fs_stage->prog_data.wm.dispatch_16 +
1215                         (uint32_t)fs_stage->prog_data.wm.dispatch_32;
1216
1217   if (fs_stage->key.wm.color_outputs_valid == 0 &&
1218       !fs_stage->prog_data.wm.has_side_effects &&
1219       !fs_stage->prog_data.wm.uses_omask &&
1220       !fs_stage->key.wm.alpha_to_coverage &&
1221       !fs_stage->prog_data.wm.uses_kill &&
1222       fs_stage->prog_data.wm.computed_depth_mode == BRW_PSCDEPTH_OFF &&
1223       !fs_stage->prog_data.wm.computed_stencil) {
1224      /* This fragment shader has no outputs and no side effects.  Go ahead
1225       * and return the code pointer so we don't accidentally think the
1226       * compile failed but zero out prog_data which will set program_size to
1227       * zero and disable the stage.
1228       */
1229      memset(&fs_stage->prog_data, 0, sizeof(fs_stage->prog_data));
1230   }
1231}
1232
1233static void
1234anv_pipeline_add_executable(struct anv_pipeline *pipeline,
1235                            struct anv_pipeline_stage *stage,
1236                            struct brw_compile_stats *stats,
1237                            uint32_t code_offset)
1238{
1239   char *nir = NULL;
1240   if (stage->nir &&
1241       (pipeline->flags &
1242        VK_PIPELINE_CREATE_CAPTURE_INTERNAL_REPRESENTATIONS_BIT_KHR)) {
1243      nir = nir_shader_as_str(stage->nir, pipeline->mem_ctx);
1244   }
1245
1246   char *disasm = NULL;
1247   if (stage->code &&
1248       (pipeline->flags &
1249        VK_PIPELINE_CREATE_CAPTURE_INTERNAL_REPRESENTATIONS_BIT_KHR)) {
1250      char *stream_data = NULL;
1251      size_t stream_size = 0;
1252      FILE *stream = open_memstream(&stream_data, &stream_size);
1253
1254      uint32_t push_size = 0;
1255      for (unsigned i = 0; i < 4; i++)
1256         push_size += stage->bind_map.push_ranges[i].length;
1257      if (push_size > 0) {
1258         fprintf(stream, "Push constant ranges:\n");
1259         for (unsigned i = 0; i < 4; i++) {
1260            if (stage->bind_map.push_ranges[i].length == 0)
1261               continue;
1262
1263            fprintf(stream, "    RANGE%d (%dB): ", i,
1264                    stage->bind_map.push_ranges[i].length * 32);
1265
1266            switch (stage->bind_map.push_ranges[i].set) {
1267            case ANV_DESCRIPTOR_SET_NULL:
1268               fprintf(stream, "NULL");
1269               break;
1270
1271            case ANV_DESCRIPTOR_SET_PUSH_CONSTANTS:
1272               fprintf(stream, "Vulkan push constants and API params");
1273               break;
1274
1275            case ANV_DESCRIPTOR_SET_DESCRIPTORS:
1276               fprintf(stream, "Descriptor buffer for set %d (start=%dB)",
1277                       stage->bind_map.push_ranges[i].index,
1278                       stage->bind_map.push_ranges[i].start * 32);
1279               break;
1280
1281            case ANV_DESCRIPTOR_SET_NUM_WORK_GROUPS:
1282               unreachable("gl_NumWorkgroups is never pushed");
1283
1284            case ANV_DESCRIPTOR_SET_SHADER_CONSTANTS:
1285               fprintf(stream, "Inline shader constant data (start=%dB)",
1286                       stage->bind_map.push_ranges[i].start * 32);
1287               break;
1288
1289            case ANV_DESCRIPTOR_SET_COLOR_ATTACHMENTS:
1290               unreachable("Color attachments can't be pushed");
1291
1292            default:
1293               fprintf(stream, "UBO (set=%d binding=%d start=%dB)",
1294                       stage->bind_map.push_ranges[i].set,
1295                       stage->bind_map.push_ranges[i].index,
1296                       stage->bind_map.push_ranges[i].start * 32);
1297               break;
1298            }
1299            fprintf(stream, "\n");
1300         }
1301         fprintf(stream, "\n");
1302      }
1303
1304      /* Creating this is far cheaper than it looks.  It's perfectly fine to
1305       * do it for every binary.
1306       */
1307      intel_disassemble(&pipeline->device->info,
1308                        stage->code, code_offset, stream);
1309
1310      fclose(stream);
1311
1312      /* Copy it to a ralloc'd thing */
1313      disasm = ralloc_size(pipeline->mem_ctx, stream_size + 1);
1314      memcpy(disasm, stream_data, stream_size);
1315      disasm[stream_size] = 0;
1316
1317      free(stream_data);
1318   }
1319
1320   const struct anv_pipeline_executable exe = {
1321      .stage = stage->stage,
1322      .stats = *stats,
1323      .nir = nir,
1324      .disasm = disasm,
1325   };
1326   util_dynarray_append(&pipeline->executables,
1327                        struct anv_pipeline_executable, exe);
1328}
1329
1330static void
1331anv_pipeline_add_executables(struct anv_pipeline *pipeline,
1332                             struct anv_pipeline_stage *stage,
1333                             struct anv_shader_bin *bin)
1334{
1335   if (stage->stage == MESA_SHADER_FRAGMENT) {
1336      /* We pull the prog data and stats out of the anv_shader_bin because
1337       * the anv_pipeline_stage may not be fully populated if we successfully
1338       * looked up the shader in a cache.
1339       */
1340      const struct brw_wm_prog_data *wm_prog_data =
1341         (const struct brw_wm_prog_data *)bin->prog_data;
1342      struct brw_compile_stats *stats = bin->stats;
1343
1344      if (wm_prog_data->dispatch_8) {
1345         anv_pipeline_add_executable(pipeline, stage, stats++, 0);
1346      }
1347
1348      if (wm_prog_data->dispatch_16) {
1349         anv_pipeline_add_executable(pipeline, stage, stats++,
1350                                     wm_prog_data->prog_offset_16);
1351      }
1352
1353      if (wm_prog_data->dispatch_32) {
1354         anv_pipeline_add_executable(pipeline, stage, stats++,
1355                                     wm_prog_data->prog_offset_32);
1356      }
1357   } else {
1358      anv_pipeline_add_executable(pipeline, stage, bin->stats, 0);
1359   }
1360}
1361
1362static enum brw_subgroup_size_type
1363anv_subgroup_size_type(gl_shader_stage stage,
1364                       VkPipelineShaderStageCreateFlags flags,
1365                       const VkPipelineShaderStageRequiredSubgroupSizeCreateInfoEXT *rss_info)
1366{
1367   enum brw_subgroup_size_type subgroup_size_type;
1368
1369   if (rss_info) {
1370      assert(stage == MESA_SHADER_COMPUTE);
1371      /* These enum values are expressly chosen to be equal to the subgroup
1372       * size that they require.
1373       */
1374      assert(rss_info->requiredSubgroupSize == 8 ||
1375             rss_info->requiredSubgroupSize == 16 ||
1376             rss_info->requiredSubgroupSize == 32);
1377      subgroup_size_type = rss_info->requiredSubgroupSize;
1378   } else if (flags & VK_PIPELINE_SHADER_STAGE_CREATE_ALLOW_VARYING_SUBGROUP_SIZE_BIT_EXT) {
1379      subgroup_size_type = BRW_SUBGROUP_SIZE_VARYING;
1380   } else if (flags & VK_PIPELINE_SHADER_STAGE_CREATE_REQUIRE_FULL_SUBGROUPS_BIT_EXT) {
1381      assert(stage == MESA_SHADER_COMPUTE);
1382      /* If the client expressly requests full subgroups and they don't
1383       * specify a subgroup size neither allow varying subgroups, we need to
1384       * pick one.  So we specify the API value of 32.  Performance will
1385       * likely be terrible in this case but there's nothing we can do about
1386       * that.  The client should have chosen a size.
1387       */
1388      subgroup_size_type = BRW_SUBGROUP_SIZE_REQUIRE_32;
1389   } else {
1390      subgroup_size_type = BRW_SUBGROUP_SIZE_API_CONSTANT;
1391   }
1392
1393   return subgroup_size_type;
1394}
1395
1396static void
1397anv_pipeline_init_from_cached_graphics(struct anv_graphics_pipeline *pipeline)
1398{
1399   /* TODO: Cache this pipeline-wide information. */
1400
1401   if (anv_pipeline_is_primitive(pipeline)) {
1402      /* Primitive replication depends on information from all the shaders.
1403       * Recover this bit from the fact that we have more than one position slot
1404       * in the vertex shader when using it.
1405       */
1406      assert(pipeline->active_stages & VK_SHADER_STAGE_VERTEX_BIT);
1407      int pos_slots = 0;
1408      const struct brw_vue_prog_data *vue_prog_data =
1409         (const void *) pipeline->shaders[MESA_SHADER_VERTEX]->prog_data;
1410      const struct brw_vue_map *vue_map = &vue_prog_data->vue_map;
1411      for (int i = 0; i < vue_map->num_slots; i++) {
1412         if (vue_map->slot_to_varying[i] == VARYING_SLOT_POS)
1413            pos_slots++;
1414      }
1415      pipeline->use_primitive_replication = pos_slots > 1;
1416   }
1417}
1418
1419static VkResult
1420anv_pipeline_compile_graphics(struct anv_graphics_pipeline *pipeline,
1421                              struct anv_pipeline_cache *cache,
1422                              const VkGraphicsPipelineCreateInfo *info)
1423{
1424   VkPipelineCreationFeedbackEXT pipeline_feedback = {
1425      .flags = VK_PIPELINE_CREATION_FEEDBACK_VALID_BIT_EXT,
1426   };
1427   int64_t pipeline_start = os_time_get_nano();
1428
1429   const struct brw_compiler *compiler = pipeline->base.device->physical->compiler;
1430   struct anv_pipeline_stage stages[MESA_SHADER_STAGES] = {};
1431
1432   /* Information on which states are considered dynamic. */
1433   const VkPipelineDynamicStateCreateInfo *dyn_info =
1434      info->pDynamicState;
1435   uint32_t dynamic_states = 0;
1436   if (dyn_info) {
1437      for (unsigned i = 0; i < dyn_info->dynamicStateCount; i++)
1438         dynamic_states |=
1439            anv_cmd_dirty_bit_for_vk_dynamic_state(dyn_info->pDynamicStates[i]);
1440   }
1441
1442   VkResult result;
1443   for (uint32_t i = 0; i < info->stageCount; i++) {
1444      const VkPipelineShaderStageCreateInfo *sinfo = &info->pStages[i];
1445      gl_shader_stage stage = vk_to_mesa_shader_stage(sinfo->stage);
1446
1447      int64_t stage_start = os_time_get_nano();
1448
1449      stages[stage].stage = stage;
1450      stages[stage].module = vk_shader_module_from_handle(sinfo->module);
1451      stages[stage].entrypoint = sinfo->pName;
1452      stages[stage].spec_info = sinfo->pSpecializationInfo;
1453      anv_pipeline_hash_shader(stages[stage].module,
1454                               stages[stage].entrypoint,
1455                               stage,
1456                               stages[stage].spec_info,
1457                               stages[stage].shader_sha1);
1458
1459      enum brw_subgroup_size_type subgroup_size_type =
1460         anv_subgroup_size_type(stage, sinfo->flags, NULL);
1461
1462      const struct intel_device_info *devinfo = &pipeline->base.device->info;
1463      switch (stage) {
1464      case MESA_SHADER_VERTEX:
1465         populate_vs_prog_key(devinfo, subgroup_size_type,
1466                              pipeline->base.device->robust_buffer_access,
1467                              &stages[stage].key.vs);
1468         break;
1469      case MESA_SHADER_TESS_CTRL:
1470         populate_tcs_prog_key(devinfo, subgroup_size_type,
1471                               pipeline->base.device->robust_buffer_access,
1472                               info->pTessellationState->patchControlPoints,
1473                               &stages[stage].key.tcs);
1474         break;
1475      case MESA_SHADER_TESS_EVAL:
1476         populate_tes_prog_key(devinfo, subgroup_size_type,
1477                               pipeline->base.device->robust_buffer_access,
1478                               &stages[stage].key.tes);
1479         break;
1480      case MESA_SHADER_GEOMETRY:
1481         populate_gs_prog_key(devinfo, subgroup_size_type,
1482                              pipeline->base.device->robust_buffer_access,
1483                              &stages[stage].key.gs);
1484         break;
1485      case MESA_SHADER_FRAGMENT: {
1486         const bool raster_enabled =
1487            !info->pRasterizationState->rasterizerDiscardEnable ||
1488            dynamic_states & ANV_CMD_DIRTY_DYNAMIC_RASTERIZER_DISCARD_ENABLE;
1489         populate_wm_prog_key(pipeline, subgroup_size_type,
1490                              pipeline->base.device->robust_buffer_access,
1491                              pipeline->subpass,
1492                              raster_enabled ? info->pMultisampleState : NULL,
1493                              vk_find_struct_const(info->pNext,
1494                                                   PIPELINE_FRAGMENT_SHADING_RATE_STATE_CREATE_INFO_KHR),
1495                              &stages[stage].key.wm);
1496         break;
1497      }
1498      default:
1499         unreachable("Invalid graphics shader stage");
1500      }
1501
1502      stages[stage].feedback.duration += os_time_get_nano() - stage_start;
1503      stages[stage].feedback.flags |= VK_PIPELINE_CREATION_FEEDBACK_VALID_BIT_EXT;
1504   }
1505
1506   assert(pipeline->active_stages & VK_SHADER_STAGE_VERTEX_BIT);
1507
1508   ANV_FROM_HANDLE(anv_pipeline_layout, layout, info->layout);
1509
1510   unsigned char sha1[20];
1511   anv_pipeline_hash_graphics(pipeline, layout, stages, sha1);
1512
1513   for (unsigned s = 0; s < ARRAY_SIZE(pipeline->shaders); s++) {
1514      if (!stages[s].entrypoint)
1515         continue;
1516
1517      stages[s].cache_key.stage = s;
1518      memcpy(stages[s].cache_key.sha1, sha1, sizeof(sha1));
1519   }
1520
1521   const bool skip_cache_lookup =
1522      (pipeline->base.flags & VK_PIPELINE_CREATE_CAPTURE_INTERNAL_REPRESENTATIONS_BIT_KHR);
1523
1524   if (!skip_cache_lookup) {
1525      unsigned found = 0;
1526      unsigned cache_hits = 0;
1527      for (unsigned s = 0; s < ARRAY_SIZE(pipeline->shaders); s++) {
1528         if (!stages[s].entrypoint)
1529            continue;
1530
1531         int64_t stage_start = os_time_get_nano();
1532
1533         bool cache_hit;
1534         struct anv_shader_bin *bin =
1535            anv_device_search_for_kernel(pipeline->base.device, cache,
1536                                         &stages[s].cache_key,
1537                                         sizeof(stages[s].cache_key), &cache_hit);
1538         if (bin) {
1539            found++;
1540            pipeline->shaders[s] = bin;
1541         }
1542
1543         if (cache_hit) {
1544            cache_hits++;
1545            stages[s].feedback.flags |=
1546               VK_PIPELINE_CREATION_FEEDBACK_APPLICATION_PIPELINE_CACHE_HIT_BIT_EXT;
1547         }
1548         stages[s].feedback.duration += os_time_get_nano() - stage_start;
1549      }
1550
1551      if (found == __builtin_popcount(pipeline->active_stages)) {
1552         if (cache_hits == found) {
1553            pipeline_feedback.flags |=
1554               VK_PIPELINE_CREATION_FEEDBACK_APPLICATION_PIPELINE_CACHE_HIT_BIT_EXT;
1555         }
1556         /* We found all our shaders in the cache.  We're done. */
1557         for (unsigned s = 0; s < ARRAY_SIZE(pipeline->shaders); s++) {
1558            if (!stages[s].entrypoint)
1559               continue;
1560
1561            anv_pipeline_add_executables(&pipeline->base, &stages[s],
1562                                         pipeline->shaders[s]);
1563         }
1564         anv_pipeline_init_from_cached_graphics(pipeline);
1565         goto done;
1566      } else if (found > 0) {
1567         /* We found some but not all of our shaders.  This shouldn't happen
1568          * most of the time but it can if we have a partially populated
1569          * pipeline cache.
1570          */
1571         assert(found < __builtin_popcount(pipeline->active_stages));
1572
1573         vk_perf(VK_LOG_OBJS(&cache->base),
1574                 "Found a partial pipeline in the cache.  This is "
1575                 "most likely caused by an incomplete pipeline cache "
1576                 "import or export");
1577
1578         /* We're going to have to recompile anyway, so just throw away our
1579          * references to the shaders in the cache.  We'll get them out of the
1580          * cache again as part of the compilation process.
1581          */
1582         for (unsigned s = 0; s < ARRAY_SIZE(pipeline->shaders); s++) {
1583            stages[s].feedback.flags = 0;
1584            if (pipeline->shaders[s]) {
1585               anv_shader_bin_unref(pipeline->base.device, pipeline->shaders[s]);
1586               pipeline->shaders[s] = NULL;
1587            }
1588         }
1589      }
1590   }
1591
1592   if (info->flags & VK_PIPELINE_CREATE_FAIL_ON_PIPELINE_COMPILE_REQUIRED_BIT_EXT)
1593      return VK_PIPELINE_COMPILE_REQUIRED_EXT;
1594
1595   void *pipeline_ctx = ralloc_context(NULL);
1596
1597   for (unsigned s = 0; s < ARRAY_SIZE(pipeline->shaders); s++) {
1598      if (!stages[s].entrypoint)
1599         continue;
1600
1601      int64_t stage_start = os_time_get_nano();
1602
1603      assert(stages[s].stage == s);
1604      assert(pipeline->shaders[s] == NULL);
1605
1606      stages[s].bind_map = (struct anv_pipeline_bind_map) {
1607         .surface_to_descriptor = stages[s].surface_to_descriptor,
1608         .sampler_to_descriptor = stages[s].sampler_to_descriptor
1609      };
1610
1611      stages[s].nir = anv_pipeline_stage_get_nir(&pipeline->base, cache,
1612                                                 pipeline_ctx,
1613                                                 &stages[s]);
1614      if (stages[s].nir == NULL) {
1615         result = vk_error(pipeline, VK_ERROR_UNKNOWN);
1616         goto fail;
1617      }
1618
1619      /* This is rather ugly.
1620       *
1621       * Any variable annotated as interpolated by sample essentially disables
1622       * coarse pixel shading. Unfortunately the CTS tests exercising this set
1623       * the varying value in the previous stage using a constant. Our NIR
1624       * infrastructure is clever enough to lookup variables across stages and
1625       * constant fold, removing the variable. So in order to comply with CTS
1626       * we have check variables here.
1627       */
1628      if (s == MESA_SHADER_FRAGMENT) {
1629         nir_foreach_variable_in_list(var, &stages[s].nir->variables) {
1630            if (var->data.sample) {
1631               stages[s].key.wm.coarse_pixel = false;
1632               break;
1633            }
1634         }
1635      }
1636
1637      stages[s].feedback.duration += os_time_get_nano() - stage_start;
1638   }
1639
1640   /* Walk backwards to link */
1641   struct anv_pipeline_stage *next_stage = NULL;
1642   for (int s = ARRAY_SIZE(pipeline->shaders) - 1; s >= 0; s--) {
1643      if (!stages[s].entrypoint)
1644         continue;
1645
1646      switch (s) {
1647      case MESA_SHADER_VERTEX:
1648         anv_pipeline_link_vs(compiler, &stages[s], next_stage);
1649         break;
1650      case MESA_SHADER_TESS_CTRL:
1651         anv_pipeline_link_tcs(compiler, &stages[s], next_stage);
1652         break;
1653      case MESA_SHADER_TESS_EVAL:
1654         anv_pipeline_link_tes(compiler, &stages[s], next_stage);
1655         break;
1656      case MESA_SHADER_GEOMETRY:
1657         anv_pipeline_link_gs(compiler, &stages[s], next_stage);
1658         break;
1659      case MESA_SHADER_FRAGMENT:
1660         anv_pipeline_link_fs(compiler, &stages[s]);
1661         break;
1662      default:
1663         unreachable("Invalid graphics shader stage");
1664      }
1665
1666      next_stage = &stages[s];
1667   }
1668
1669   if (pipeline->base.device->info.ver >= 12 &&
1670       pipeline->subpass->view_mask != 0) {
1671      /* For some pipelines HW Primitive Replication can be used instead of
1672       * instancing to implement Multiview.  This depend on how viewIndex is
1673       * used in all the active shaders, so this check can't be done per
1674       * individual shaders.
1675       */
1676      nir_shader *shaders[MESA_SHADER_STAGES] = {};
1677      for (unsigned s = 0; s < MESA_SHADER_STAGES; s++)
1678         shaders[s] = stages[s].nir;
1679
1680      pipeline->use_primitive_replication =
1681         anv_check_for_primitive_replication(shaders, pipeline);
1682   } else {
1683      pipeline->use_primitive_replication = false;
1684   }
1685
1686   struct anv_pipeline_stage *prev_stage = NULL;
1687   for (unsigned s = 0; s < ARRAY_SIZE(pipeline->shaders); s++) {
1688      if (!stages[s].entrypoint)
1689         continue;
1690
1691      int64_t stage_start = os_time_get_nano();
1692
1693      void *stage_ctx = ralloc_context(NULL);
1694
1695      anv_pipeline_lower_nir(&pipeline->base, stage_ctx, &stages[s], layout);
1696
1697      if (prev_stage && compiler->glsl_compiler_options[s].NirOptions->unify_interfaces) {
1698         prev_stage->nir->info.outputs_written |= stages[s].nir->info.inputs_read &
1699                  ~(VARYING_BIT_TESS_LEVEL_INNER | VARYING_BIT_TESS_LEVEL_OUTER);
1700         stages[s].nir->info.inputs_read |= prev_stage->nir->info.outputs_written &
1701                  ~(VARYING_BIT_TESS_LEVEL_INNER | VARYING_BIT_TESS_LEVEL_OUTER);
1702         prev_stage->nir->info.patch_outputs_written |= stages[s].nir->info.patch_inputs_read;
1703         stages[s].nir->info.patch_inputs_read |= prev_stage->nir->info.patch_outputs_written;
1704      }
1705
1706      ralloc_free(stage_ctx);
1707
1708      stages[s].feedback.duration += os_time_get_nano() - stage_start;
1709
1710      prev_stage = &stages[s];
1711   }
1712
1713   prev_stage = NULL;
1714   for (unsigned s = 0; s < MESA_SHADER_STAGES; s++) {
1715      if (!stages[s].entrypoint)
1716         continue;
1717
1718      int64_t stage_start = os_time_get_nano();
1719
1720      void *stage_ctx = ralloc_context(NULL);
1721
1722      nir_xfb_info *xfb_info = NULL;
1723      if (s == MESA_SHADER_VERTEX ||
1724          s == MESA_SHADER_TESS_EVAL ||
1725          s == MESA_SHADER_GEOMETRY)
1726         xfb_info = nir_gather_xfb_info(stages[s].nir, stage_ctx);
1727
1728      switch (s) {
1729      case MESA_SHADER_VERTEX:
1730         anv_pipeline_compile_vs(compiler, stage_ctx, pipeline,
1731                                 &stages[s]);
1732         break;
1733      case MESA_SHADER_TESS_CTRL:
1734         anv_pipeline_compile_tcs(compiler, stage_ctx, pipeline->base.device,
1735                                  &stages[s], prev_stage);
1736         break;
1737      case MESA_SHADER_TESS_EVAL:
1738         anv_pipeline_compile_tes(compiler, stage_ctx, pipeline->base.device,
1739                                  &stages[s], prev_stage);
1740         break;
1741      case MESA_SHADER_GEOMETRY:
1742         anv_pipeline_compile_gs(compiler, stage_ctx, pipeline->base.device,
1743                                 &stages[s], prev_stage);
1744         break;
1745      case MESA_SHADER_FRAGMENT:
1746         anv_pipeline_compile_fs(compiler, stage_ctx, pipeline->base.device,
1747                                 &stages[s], prev_stage);
1748         break;
1749      default:
1750         unreachable("Invalid graphics shader stage");
1751      }
1752      if (stages[s].code == NULL) {
1753         ralloc_free(stage_ctx);
1754         result = vk_error(pipeline->base.device, VK_ERROR_OUT_OF_HOST_MEMORY);
1755         goto fail;
1756      }
1757
1758      anv_nir_validate_push_layout(&stages[s].prog_data.base,
1759                                   &stages[s].bind_map);
1760
1761      struct anv_shader_bin *bin =
1762         anv_device_upload_kernel(pipeline->base.device, cache, s,
1763                                  &stages[s].cache_key,
1764                                  sizeof(stages[s].cache_key),
1765                                  stages[s].code,
1766                                  stages[s].prog_data.base.program_size,
1767                                  &stages[s].prog_data.base,
1768                                  brw_prog_data_size(s),
1769                                  stages[s].stats, stages[s].num_stats,
1770                                  xfb_info, &stages[s].bind_map);
1771      if (!bin) {
1772         ralloc_free(stage_ctx);
1773         result = vk_error(pipeline, VK_ERROR_OUT_OF_HOST_MEMORY);
1774         goto fail;
1775      }
1776
1777      anv_pipeline_add_executables(&pipeline->base, &stages[s], bin);
1778
1779      pipeline->shaders[s] = bin;
1780      ralloc_free(stage_ctx);
1781
1782      stages[s].feedback.duration += os_time_get_nano() - stage_start;
1783
1784      prev_stage = &stages[s];
1785   }
1786
1787   ralloc_free(pipeline_ctx);
1788
1789done:
1790
1791   if (pipeline->shaders[MESA_SHADER_FRAGMENT] &&
1792       pipeline->shaders[MESA_SHADER_FRAGMENT]->prog_data->program_size == 0) {
1793      /* This can happen if we decided to implicitly disable the fragment
1794       * shader.  See anv_pipeline_compile_fs().
1795       */
1796      anv_shader_bin_unref(pipeline->base.device,
1797                           pipeline->shaders[MESA_SHADER_FRAGMENT]);
1798      pipeline->shaders[MESA_SHADER_FRAGMENT] = NULL;
1799      pipeline->active_stages &= ~VK_SHADER_STAGE_FRAGMENT_BIT;
1800   }
1801
1802   pipeline_feedback.duration = os_time_get_nano() - pipeline_start;
1803
1804   const VkPipelineCreationFeedbackCreateInfoEXT *create_feedback =
1805      vk_find_struct_const(info->pNext, PIPELINE_CREATION_FEEDBACK_CREATE_INFO_EXT);
1806   if (create_feedback) {
1807      *create_feedback->pPipelineCreationFeedback = pipeline_feedback;
1808
1809      assert(info->stageCount == create_feedback->pipelineStageCreationFeedbackCount);
1810      for (uint32_t i = 0; i < info->stageCount; i++) {
1811         gl_shader_stage s = vk_to_mesa_shader_stage(info->pStages[i].stage);
1812         create_feedback->pPipelineStageCreationFeedbacks[i] = stages[s].feedback;
1813      }
1814   }
1815
1816   return VK_SUCCESS;
1817
1818fail:
1819   ralloc_free(pipeline_ctx);
1820
1821   for (unsigned s = 0; s < ARRAY_SIZE(pipeline->shaders); s++) {
1822      if (pipeline->shaders[s])
1823         anv_shader_bin_unref(pipeline->base.device, pipeline->shaders[s]);
1824   }
1825
1826   return result;
1827}
1828
1829VkResult
1830anv_pipeline_compile_cs(struct anv_compute_pipeline *pipeline,
1831                        struct anv_pipeline_cache *cache,
1832                        const VkComputePipelineCreateInfo *info,
1833                        const struct vk_shader_module *module,
1834                        const char *entrypoint,
1835                        const VkSpecializationInfo *spec_info)
1836{
1837   VkPipelineCreationFeedbackEXT pipeline_feedback = {
1838      .flags = VK_PIPELINE_CREATION_FEEDBACK_VALID_BIT_EXT,
1839   };
1840   int64_t pipeline_start = os_time_get_nano();
1841
1842   const struct brw_compiler *compiler = pipeline->base.device->physical->compiler;
1843
1844   struct anv_pipeline_stage stage = {
1845      .stage = MESA_SHADER_COMPUTE,
1846      .module = module,
1847      .entrypoint = entrypoint,
1848      .spec_info = spec_info,
1849      .cache_key = {
1850         .stage = MESA_SHADER_COMPUTE,
1851      },
1852      .feedback = {
1853         .flags = VK_PIPELINE_CREATION_FEEDBACK_VALID_BIT_EXT,
1854      },
1855   };
1856   anv_pipeline_hash_shader(stage.module,
1857                            stage.entrypoint,
1858                            MESA_SHADER_COMPUTE,
1859                            stage.spec_info,
1860                            stage.shader_sha1);
1861
1862   struct anv_shader_bin *bin = NULL;
1863
1864   const VkPipelineShaderStageRequiredSubgroupSizeCreateInfoEXT *rss_info =
1865      vk_find_struct_const(info->stage.pNext,
1866                           PIPELINE_SHADER_STAGE_REQUIRED_SUBGROUP_SIZE_CREATE_INFO_EXT);
1867
1868   const enum brw_subgroup_size_type subgroup_size_type =
1869      anv_subgroup_size_type(MESA_SHADER_COMPUTE, info->stage.flags, rss_info);
1870
1871   populate_cs_prog_key(&pipeline->base.device->info, subgroup_size_type,
1872                        pipeline->base.device->robust_buffer_access,
1873                        &stage.key.cs);
1874
1875   ANV_FROM_HANDLE(anv_pipeline_layout, layout, info->layout);
1876
1877   const bool skip_cache_lookup =
1878      (pipeline->base.flags & VK_PIPELINE_CREATE_CAPTURE_INTERNAL_REPRESENTATIONS_BIT_KHR);
1879
1880   anv_pipeline_hash_compute(pipeline, layout, &stage, stage.cache_key.sha1);
1881
1882   bool cache_hit = false;
1883   if (!skip_cache_lookup) {
1884      bin = anv_device_search_for_kernel(pipeline->base.device, cache,
1885                                         &stage.cache_key,
1886                                         sizeof(stage.cache_key),
1887                                         &cache_hit);
1888   }
1889
1890   if (bin == NULL &&
1891       (info->flags & VK_PIPELINE_CREATE_FAIL_ON_PIPELINE_COMPILE_REQUIRED_BIT_EXT))
1892      return VK_PIPELINE_COMPILE_REQUIRED_EXT;
1893
1894   void *mem_ctx = ralloc_context(NULL);
1895   if (bin == NULL) {
1896      int64_t stage_start = os_time_get_nano();
1897
1898      stage.bind_map = (struct anv_pipeline_bind_map) {
1899         .surface_to_descriptor = stage.surface_to_descriptor,
1900         .sampler_to_descriptor = stage.sampler_to_descriptor
1901      };
1902
1903      /* Set up a binding for the gl_NumWorkGroups */
1904      stage.bind_map.surface_count = 1;
1905      stage.bind_map.surface_to_descriptor[0] = (struct anv_pipeline_binding) {
1906         .set = ANV_DESCRIPTOR_SET_NUM_WORK_GROUPS,
1907      };
1908
1909      stage.nir = anv_pipeline_stage_get_nir(&pipeline->base, cache, mem_ctx, &stage);
1910      if (stage.nir == NULL) {
1911         ralloc_free(mem_ctx);
1912         return vk_error(pipeline, VK_ERROR_UNKNOWN);
1913      }
1914
1915      NIR_PASS_V(stage.nir, anv_nir_add_base_work_group_id);
1916
1917      anv_pipeline_lower_nir(&pipeline->base, mem_ctx, &stage, layout);
1918
1919      NIR_PASS_V(stage.nir, brw_nir_lower_cs_intrinsics);
1920
1921      stage.num_stats = 1;
1922
1923      struct brw_compile_cs_params params = {
1924         .nir = stage.nir,
1925         .key = &stage.key.cs,
1926         .prog_data = &stage.prog_data.cs,
1927         .stats = stage.stats,
1928         .log_data = pipeline->base.device,
1929      };
1930
1931      stage.code = brw_compile_cs(compiler, mem_ctx, &params);
1932      if (stage.code == NULL) {
1933         ralloc_free(mem_ctx);
1934         return vk_error(pipeline, VK_ERROR_OUT_OF_HOST_MEMORY);
1935      }
1936
1937      anv_nir_validate_push_layout(&stage.prog_data.base, &stage.bind_map);
1938
1939      if (!stage.prog_data.cs.uses_num_work_groups) {
1940         assert(stage.bind_map.surface_to_descriptor[0].set ==
1941                ANV_DESCRIPTOR_SET_NUM_WORK_GROUPS);
1942         stage.bind_map.surface_to_descriptor[0].set = ANV_DESCRIPTOR_SET_NULL;
1943      }
1944
1945      const unsigned code_size = stage.prog_data.base.program_size;
1946      bin = anv_device_upload_kernel(pipeline->base.device, cache,
1947                                     MESA_SHADER_COMPUTE,
1948                                     &stage.cache_key, sizeof(stage.cache_key),
1949                                     stage.code, code_size,
1950                                     &stage.prog_data.base,
1951                                     sizeof(stage.prog_data.cs),
1952                                     stage.stats, stage.num_stats,
1953                                     NULL, &stage.bind_map);
1954      if (!bin) {
1955         ralloc_free(mem_ctx);
1956         return vk_error(pipeline, VK_ERROR_OUT_OF_HOST_MEMORY);
1957      }
1958
1959      stage.feedback.duration = os_time_get_nano() - stage_start;
1960   }
1961
1962   anv_pipeline_add_executables(&pipeline->base, &stage, bin);
1963
1964   ralloc_free(mem_ctx);
1965
1966   if (cache_hit) {
1967      stage.feedback.flags |=
1968         VK_PIPELINE_CREATION_FEEDBACK_APPLICATION_PIPELINE_CACHE_HIT_BIT_EXT;
1969      pipeline_feedback.flags |=
1970         VK_PIPELINE_CREATION_FEEDBACK_APPLICATION_PIPELINE_CACHE_HIT_BIT_EXT;
1971   }
1972   pipeline_feedback.duration = os_time_get_nano() - pipeline_start;
1973
1974   const VkPipelineCreationFeedbackCreateInfoEXT *create_feedback =
1975      vk_find_struct_const(info->pNext, PIPELINE_CREATION_FEEDBACK_CREATE_INFO_EXT);
1976   if (create_feedback) {
1977      *create_feedback->pPipelineCreationFeedback = pipeline_feedback;
1978
1979      assert(create_feedback->pipelineStageCreationFeedbackCount == 1);
1980      create_feedback->pPipelineStageCreationFeedbacks[0] = stage.feedback;
1981   }
1982
1983   pipeline->cs = bin;
1984
1985   return VK_SUCCESS;
1986}
1987
1988/**
1989 * Copy pipeline state not marked as dynamic.
1990 * Dynamic state is pipeline state which hasn't been provided at pipeline
1991 * creation time, but is dynamically provided afterwards using various
1992 * vkCmdSet* functions.
1993 *
1994 * The set of state considered "non_dynamic" is determined by the pieces of
1995 * state that have their corresponding VkDynamicState enums omitted from
1996 * VkPipelineDynamicStateCreateInfo::pDynamicStates.
1997 *
1998 * @param[out] pipeline    Destination non_dynamic state.
1999 * @param[in]  pCreateInfo Source of non_dynamic state to be copied.
2000 */
2001static void
2002copy_non_dynamic_state(struct anv_graphics_pipeline *pipeline,
2003                       const VkGraphicsPipelineCreateInfo *pCreateInfo)
2004{
2005   anv_cmd_dirty_mask_t states = ANV_CMD_DIRTY_DYNAMIC_ALL;
2006   struct anv_subpass *subpass = pipeline->subpass;
2007
2008   pipeline->dynamic_state = default_dynamic_state;
2009
2010   states &= ~pipeline->dynamic_states;
2011
2012   struct anv_dynamic_state *dynamic = &pipeline->dynamic_state;
2013
2014   bool raster_discard =
2015      pCreateInfo->pRasterizationState->rasterizerDiscardEnable &&
2016      !(pipeline->dynamic_states & ANV_CMD_DIRTY_DYNAMIC_RASTERIZER_DISCARD_ENABLE);
2017
2018   /* Section 9.2 of the Vulkan 1.0.15 spec says:
2019    *
2020    *    pViewportState is [...] NULL if the pipeline
2021    *    has rasterization disabled.
2022    */
2023   if (!raster_discard) {
2024      assert(pCreateInfo->pViewportState);
2025
2026      dynamic->viewport.count = pCreateInfo->pViewportState->viewportCount;
2027      if (states & ANV_CMD_DIRTY_DYNAMIC_VIEWPORT) {
2028         typed_memcpy(dynamic->viewport.viewports,
2029                     pCreateInfo->pViewportState->pViewports,
2030                     pCreateInfo->pViewportState->viewportCount);
2031      }
2032
2033      dynamic->scissor.count = pCreateInfo->pViewportState->scissorCount;
2034      if (states & ANV_CMD_DIRTY_DYNAMIC_SCISSOR) {
2035         typed_memcpy(dynamic->scissor.scissors,
2036                     pCreateInfo->pViewportState->pScissors,
2037                     pCreateInfo->pViewportState->scissorCount);
2038      }
2039   }
2040
2041   if (states & ANV_CMD_DIRTY_DYNAMIC_LINE_WIDTH) {
2042      assert(pCreateInfo->pRasterizationState);
2043      dynamic->line_width = pCreateInfo->pRasterizationState->lineWidth;
2044   }
2045
2046   if (states & ANV_CMD_DIRTY_DYNAMIC_DEPTH_BIAS) {
2047      assert(pCreateInfo->pRasterizationState);
2048      dynamic->depth_bias.bias =
2049         pCreateInfo->pRasterizationState->depthBiasConstantFactor;
2050      dynamic->depth_bias.clamp =
2051         pCreateInfo->pRasterizationState->depthBiasClamp;
2052      dynamic->depth_bias.slope =
2053         pCreateInfo->pRasterizationState->depthBiasSlopeFactor;
2054   }
2055
2056   if (states & ANV_CMD_DIRTY_DYNAMIC_CULL_MODE) {
2057      assert(pCreateInfo->pRasterizationState);
2058      dynamic->cull_mode =
2059         pCreateInfo->pRasterizationState->cullMode;
2060   }
2061
2062   if (states & ANV_CMD_DIRTY_DYNAMIC_FRONT_FACE) {
2063      assert(pCreateInfo->pRasterizationState);
2064      dynamic->front_face =
2065         pCreateInfo->pRasterizationState->frontFace;
2066   }
2067
2068   if ((states & ANV_CMD_DIRTY_DYNAMIC_PRIMITIVE_TOPOLOGY) &&
2069         (pipeline->active_stages & VK_SHADER_STAGE_VERTEX_BIT)) {
2070      assert(pCreateInfo->pInputAssemblyState);
2071      dynamic->primitive_topology = pCreateInfo->pInputAssemblyState->topology;
2072   }
2073
2074   if (states & ANV_CMD_DIRTY_DYNAMIC_RASTERIZER_DISCARD_ENABLE) {
2075      assert(pCreateInfo->pRasterizationState);
2076      dynamic->raster_discard =
2077         pCreateInfo->pRasterizationState->rasterizerDiscardEnable;
2078   }
2079
2080   if (states & ANV_CMD_DIRTY_DYNAMIC_DEPTH_BIAS_ENABLE) {
2081      assert(pCreateInfo->pRasterizationState);
2082      dynamic->depth_bias_enable =
2083         pCreateInfo->pRasterizationState->depthBiasEnable;
2084   }
2085
2086   if ((states & ANV_CMD_DIRTY_DYNAMIC_PRIMITIVE_RESTART_ENABLE) &&
2087         (pipeline->active_stages & VK_SHADER_STAGE_VERTEX_BIT)) {
2088      assert(pCreateInfo->pInputAssemblyState);
2089      dynamic->primitive_restart_enable =
2090         pCreateInfo->pInputAssemblyState->primitiveRestartEnable;
2091   }
2092
2093   /* Section 9.2 of the Vulkan 1.0.15 spec says:
2094    *
2095    *    pColorBlendState is [...] NULL if the pipeline has rasterization
2096    *    disabled or if the subpass of the render pass the pipeline is
2097    *    created against does not use any color attachments.
2098    */
2099   bool uses_color_att = false;
2100   for (unsigned i = 0; i < subpass->color_count; ++i) {
2101      if (subpass->color_attachments[i].attachment != VK_ATTACHMENT_UNUSED) {
2102         uses_color_att = true;
2103         break;
2104      }
2105   }
2106
2107   if (uses_color_att && !raster_discard) {
2108      assert(pCreateInfo->pColorBlendState);
2109
2110      if (states & ANV_CMD_DIRTY_DYNAMIC_BLEND_CONSTANTS)
2111         typed_memcpy(dynamic->blend_constants,
2112                     pCreateInfo->pColorBlendState->blendConstants, 4);
2113   }
2114
2115   /* If there is no depthstencil attachment, then don't read
2116    * pDepthStencilState. The Vulkan spec states that pDepthStencilState may
2117    * be NULL in this case. Even if pDepthStencilState is non-NULL, there is
2118    * no need to override the depthstencil defaults in
2119    * anv_pipeline::dynamic_state when there is no depthstencil attachment.
2120    *
2121    * Section 9.2 of the Vulkan 1.0.15 spec says:
2122    *
2123    *    pDepthStencilState is [...] NULL if the pipeline has rasterization
2124    *    disabled or if the subpass of the render pass the pipeline is created
2125    *    against does not use a depth/stencil attachment.
2126    */
2127   if (!raster_discard && subpass->depth_stencil_attachment) {
2128      assert(pCreateInfo->pDepthStencilState);
2129
2130      if (states & ANV_CMD_DIRTY_DYNAMIC_DEPTH_BOUNDS) {
2131         dynamic->depth_bounds.min =
2132            pCreateInfo->pDepthStencilState->minDepthBounds;
2133         dynamic->depth_bounds.max =
2134            pCreateInfo->pDepthStencilState->maxDepthBounds;
2135      }
2136
2137      if (states & ANV_CMD_DIRTY_DYNAMIC_STENCIL_COMPARE_MASK) {
2138         dynamic->stencil_compare_mask.front =
2139            pCreateInfo->pDepthStencilState->front.compareMask;
2140         dynamic->stencil_compare_mask.back =
2141            pCreateInfo->pDepthStencilState->back.compareMask;
2142      }
2143
2144      if (states & ANV_CMD_DIRTY_DYNAMIC_STENCIL_WRITE_MASK) {
2145         dynamic->stencil_write_mask.front =
2146            pCreateInfo->pDepthStencilState->front.writeMask;
2147         dynamic->stencil_write_mask.back =
2148            pCreateInfo->pDepthStencilState->back.writeMask;
2149      }
2150
2151      if (states & ANV_CMD_DIRTY_DYNAMIC_STENCIL_REFERENCE) {
2152         dynamic->stencil_reference.front =
2153            pCreateInfo->pDepthStencilState->front.reference;
2154         dynamic->stencil_reference.back =
2155            pCreateInfo->pDepthStencilState->back.reference;
2156      }
2157
2158      if (states & ANV_CMD_DIRTY_DYNAMIC_DEPTH_TEST_ENABLE) {
2159         dynamic->depth_test_enable =
2160            pCreateInfo->pDepthStencilState->depthTestEnable;
2161      }
2162
2163      if (states & ANV_CMD_DIRTY_DYNAMIC_DEPTH_WRITE_ENABLE) {
2164         dynamic->depth_write_enable =
2165            pCreateInfo->pDepthStencilState->depthWriteEnable;
2166      }
2167
2168      if (states & ANV_CMD_DIRTY_DYNAMIC_DEPTH_COMPARE_OP) {
2169         dynamic->depth_compare_op =
2170            pCreateInfo->pDepthStencilState->depthCompareOp;
2171      }
2172
2173      if (states & ANV_CMD_DIRTY_DYNAMIC_DEPTH_BOUNDS_TEST_ENABLE) {
2174         dynamic->depth_bounds_test_enable =
2175            pCreateInfo->pDepthStencilState->depthBoundsTestEnable;
2176      }
2177
2178      if (states & ANV_CMD_DIRTY_DYNAMIC_STENCIL_TEST_ENABLE) {
2179         dynamic->stencil_test_enable =
2180            pCreateInfo->pDepthStencilState->stencilTestEnable;
2181      }
2182
2183      if (states & ANV_CMD_DIRTY_DYNAMIC_STENCIL_OP) {
2184         const VkPipelineDepthStencilStateCreateInfo *info =
2185            pCreateInfo->pDepthStencilState;
2186         memcpy(&dynamic->stencil_op.front, &info->front,
2187                sizeof(dynamic->stencil_op.front));
2188         memcpy(&dynamic->stencil_op.back, &info->back,
2189                sizeof(dynamic->stencil_op.back));
2190      }
2191   }
2192
2193   const VkPipelineRasterizationLineStateCreateInfoEXT *line_state =
2194      vk_find_struct_const(pCreateInfo->pRasterizationState->pNext,
2195                           PIPELINE_RASTERIZATION_LINE_STATE_CREATE_INFO_EXT);
2196   if (!raster_discard && line_state && line_state->stippledLineEnable) {
2197      if (states & ANV_CMD_DIRTY_DYNAMIC_LINE_STIPPLE) {
2198         dynamic->line_stipple.factor = line_state->lineStippleFactor;
2199         dynamic->line_stipple.pattern = line_state->lineStipplePattern;
2200      }
2201   }
2202
2203   const VkPipelineMultisampleStateCreateInfo *ms_info =
2204      pCreateInfo->pRasterizationState->rasterizerDiscardEnable ? NULL :
2205      pCreateInfo->pMultisampleState;
2206   if (states & ANV_CMD_DIRTY_DYNAMIC_SAMPLE_LOCATIONS) {
2207      const VkPipelineSampleLocationsStateCreateInfoEXT *sl_info = ms_info ?
2208         vk_find_struct_const(ms_info, PIPELINE_SAMPLE_LOCATIONS_STATE_CREATE_INFO_EXT) : NULL;
2209
2210      if (sl_info) {
2211         dynamic->sample_locations.samples =
2212            sl_info->sampleLocationsInfo.sampleLocationsCount;
2213         const VkSampleLocationEXT *positions =
2214            sl_info->sampleLocationsInfo.pSampleLocations;
2215         for (uint32_t i = 0; i < dynamic->sample_locations.samples; i++) {
2216            dynamic->sample_locations.locations[i].x = positions[i].x;
2217            dynamic->sample_locations.locations[i].y = positions[i].y;
2218         }
2219      }
2220   }
2221   /* Ensure we always have valid values for sample_locations. */
2222   if (pipeline->base.device->vk.enabled_extensions.EXT_sample_locations &&
2223       dynamic->sample_locations.samples == 0) {
2224      dynamic->sample_locations.samples =
2225         ms_info ? ms_info->rasterizationSamples : 1;
2226      const struct intel_sample_position *positions =
2227         intel_get_sample_positions(dynamic->sample_locations.samples);
2228      for (uint32_t i = 0; i < dynamic->sample_locations.samples; i++) {
2229         dynamic->sample_locations.locations[i].x = positions[i].x;
2230         dynamic->sample_locations.locations[i].y = positions[i].y;
2231      }
2232   }
2233
2234   if (states & ANV_CMD_DIRTY_DYNAMIC_COLOR_BLEND_STATE) {
2235      if (!pCreateInfo->pRasterizationState->rasterizerDiscardEnable &&
2236          uses_color_att) {
2237         assert(pCreateInfo->pColorBlendState);
2238         const VkPipelineColorWriteCreateInfoEXT *color_write_info =
2239            vk_find_struct_const(pCreateInfo->pColorBlendState->pNext,
2240                                 PIPELINE_COLOR_WRITE_CREATE_INFO_EXT);
2241
2242         if (color_write_info) {
2243            dynamic->color_writes = 0;
2244            for (uint32_t i = 0; i < color_write_info->attachmentCount; i++) {
2245               dynamic->color_writes |=
2246                  color_write_info->pColorWriteEnables[i] ? (1u << i) : 0;
2247            }
2248         }
2249      }
2250   }
2251
2252   const VkPipelineFragmentShadingRateStateCreateInfoKHR *fsr_state =
2253      vk_find_struct_const(pCreateInfo->pNext,
2254                           PIPELINE_FRAGMENT_SHADING_RATE_STATE_CREATE_INFO_KHR);
2255   if (fsr_state) {
2256      if (states & ANV_CMD_DIRTY_DYNAMIC_SHADING_RATE)
2257         dynamic->fragment_shading_rate = fsr_state->fragmentSize;
2258   }
2259
2260   pipeline->dynamic_state_mask = states;
2261
2262   /* Mark states that can either be dynamic or fully baked into the pipeline.
2263    */
2264   pipeline->static_state_mask = states &
2265      (ANV_CMD_DIRTY_DYNAMIC_SAMPLE_LOCATIONS |
2266       ANV_CMD_DIRTY_DYNAMIC_COLOR_BLEND_STATE |
2267       ANV_CMD_DIRTY_DYNAMIC_SHADING_RATE |
2268       ANV_CMD_DIRTY_DYNAMIC_RASTERIZER_DISCARD_ENABLE |
2269       ANV_CMD_DIRTY_DYNAMIC_LOGIC_OP |
2270       ANV_CMD_DIRTY_DYNAMIC_PRIMITIVE_TOPOLOGY);
2271}
2272
2273static void
2274anv_pipeline_validate_create_info(const VkGraphicsPipelineCreateInfo *info)
2275{
2276#ifdef DEBUG
2277   struct anv_render_pass *renderpass = NULL;
2278   struct anv_subpass *subpass = NULL;
2279
2280   /* Assert that all required members of VkGraphicsPipelineCreateInfo are
2281    * present.  See the Vulkan 1.0.28 spec, Section 9.2 Graphics Pipelines.
2282    */
2283   assert(info->sType == VK_STRUCTURE_TYPE_GRAPHICS_PIPELINE_CREATE_INFO);
2284
2285   renderpass = anv_render_pass_from_handle(info->renderPass);
2286   assert(renderpass);
2287
2288   assert(info->subpass < renderpass->subpass_count);
2289   subpass = &renderpass->subpasses[info->subpass];
2290
2291   assert(info->stageCount >= 1);
2292   assert(info->pRasterizationState);
2293   if (!info->pRasterizationState->rasterizerDiscardEnable) {
2294      assert(info->pViewportState);
2295      assert(info->pMultisampleState);
2296
2297      if (subpass && subpass->depth_stencil_attachment)
2298         assert(info->pDepthStencilState);
2299
2300      if (subpass && subpass->color_count > 0) {
2301         bool all_color_unused = true;
2302         for (int i = 0; i < subpass->color_count; i++) {
2303            if (subpass->color_attachments[i].attachment != VK_ATTACHMENT_UNUSED)
2304               all_color_unused = false;
2305         }
2306         /* pColorBlendState is ignored if the pipeline has rasterization
2307          * disabled or if the subpass of the render pass the pipeline is
2308          * created against does not use any color attachments.
2309          */
2310         assert(info->pColorBlendState || all_color_unused);
2311      }
2312   }
2313
2314   for (uint32_t i = 0; i < info->stageCount; ++i) {
2315      switch (info->pStages[i].stage) {
2316      case VK_SHADER_STAGE_VERTEX_BIT:
2317         assert(info->pVertexInputState);
2318         assert(info->pInputAssemblyState);
2319         break;
2320      case VK_SHADER_STAGE_TESSELLATION_CONTROL_BIT:
2321      case VK_SHADER_STAGE_TESSELLATION_EVALUATION_BIT:
2322         assert(info->pTessellationState);
2323         break;
2324      default:
2325         break;
2326      }
2327   }
2328#endif
2329}
2330
2331/**
2332 * Calculate the desired L3 partitioning based on the current state of the
2333 * pipeline.  For now this simply returns the conservative defaults calculated
2334 * by get_default_l3_weights(), but we could probably do better by gathering
2335 * more statistics from the pipeline state (e.g. guess of expected URB usage
2336 * and bound surfaces), or by using feed-back from performance counters.
2337 */
2338void
2339anv_pipeline_setup_l3_config(struct anv_pipeline *pipeline, bool needs_slm)
2340{
2341   const struct intel_device_info *devinfo = &pipeline->device->info;
2342
2343   const struct intel_l3_weights w =
2344      intel_get_default_l3_weights(devinfo, true, needs_slm);
2345
2346   pipeline->l3_config = intel_get_l3_config(devinfo, w);
2347}
2348
2349static VkLineRasterizationModeEXT
2350vk_line_rasterization_mode(const VkPipelineRasterizationLineStateCreateInfoEXT *line_info,
2351                           const VkPipelineMultisampleStateCreateInfo *ms_info)
2352{
2353   VkLineRasterizationModeEXT line_mode =
2354      line_info ? line_info->lineRasterizationMode :
2355                  VK_LINE_RASTERIZATION_MODE_DEFAULT_EXT;
2356
2357   if (line_mode == VK_LINE_RASTERIZATION_MODE_DEFAULT_EXT) {
2358      if (ms_info && ms_info->rasterizationSamples > 1) {
2359         return VK_LINE_RASTERIZATION_MODE_RECTANGULAR_EXT;
2360      } else {
2361         return VK_LINE_RASTERIZATION_MODE_BRESENHAM_EXT;
2362      }
2363   }
2364
2365   return line_mode;
2366}
2367
2368VkResult
2369anv_graphics_pipeline_init(struct anv_graphics_pipeline *pipeline,
2370                           struct anv_device *device,
2371                           struct anv_pipeline_cache *cache,
2372                           const VkGraphicsPipelineCreateInfo *pCreateInfo,
2373                           const VkAllocationCallbacks *alloc)
2374{
2375   VkResult result;
2376
2377   anv_pipeline_validate_create_info(pCreateInfo);
2378
2379   result = anv_pipeline_init(&pipeline->base, device,
2380                              ANV_PIPELINE_GRAPHICS, pCreateInfo->flags,
2381                              alloc);
2382   if (result != VK_SUCCESS)
2383      return result;
2384
2385   anv_batch_set_storage(&pipeline->base.batch, ANV_NULL_ADDRESS,
2386                         pipeline->batch_data, sizeof(pipeline->batch_data));
2387
2388   ANV_FROM_HANDLE(anv_render_pass, render_pass, pCreateInfo->renderPass);
2389   assert(pCreateInfo->subpass < render_pass->subpass_count);
2390   pipeline->subpass = &render_pass->subpasses[pCreateInfo->subpass];
2391
2392   assert(pCreateInfo->pRasterizationState);
2393
2394   if (pCreateInfo->pDynamicState) {
2395      /* Remove all of the states that are marked as dynamic */
2396      uint32_t count = pCreateInfo->pDynamicState->dynamicStateCount;
2397      for (uint32_t s = 0; s < count; s++) {
2398         pipeline->dynamic_states |= anv_cmd_dirty_bit_for_vk_dynamic_state(
2399            pCreateInfo->pDynamicState->pDynamicStates[s]);
2400      }
2401   }
2402
2403   pipeline->active_stages = 0;
2404   for (uint32_t i = 0; i < pCreateInfo->stageCount; i++)
2405      pipeline->active_stages |= pCreateInfo->pStages[i].stage;
2406
2407   if (pipeline->active_stages & VK_SHADER_STAGE_TESSELLATION_EVALUATION_BIT)
2408      pipeline->active_stages |= VK_SHADER_STAGE_TESSELLATION_CONTROL_BIT;
2409
2410   copy_non_dynamic_state(pipeline, pCreateInfo);
2411
2412   pipeline->depth_clamp_enable = pCreateInfo->pRasterizationState->depthClampEnable;
2413
2414   /* Previously we enabled depth clipping when !depthClampEnable.
2415    * DepthClipStateCreateInfo now makes depth clipping explicit so if the
2416    * clipping info is available, use its enable value to determine clipping,
2417    * otherwise fallback to the previous !depthClampEnable logic.
2418    */
2419   const VkPipelineRasterizationDepthClipStateCreateInfoEXT *clip_info =
2420      vk_find_struct_const(pCreateInfo->pRasterizationState->pNext,
2421                           PIPELINE_RASTERIZATION_DEPTH_CLIP_STATE_CREATE_INFO_EXT);
2422   pipeline->depth_clip_enable = clip_info ? clip_info->depthClipEnable : !pipeline->depth_clamp_enable;
2423
2424   pipeline->sample_shading_enable =
2425      !pCreateInfo->pRasterizationState->rasterizerDiscardEnable &&
2426      pCreateInfo->pMultisampleState &&
2427      pCreateInfo->pMultisampleState->sampleShadingEnable;
2428
2429   result = anv_pipeline_compile_graphics(pipeline, cache, pCreateInfo);
2430   if (result != VK_SUCCESS) {
2431      anv_pipeline_finish(&pipeline->base, device, alloc);
2432      return result;
2433   }
2434
2435   anv_pipeline_setup_l3_config(&pipeline->base, false);
2436
2437   if (anv_pipeline_is_primitive(pipeline)) {
2438      const VkPipelineVertexInputStateCreateInfo *vi_info =
2439         pCreateInfo->pVertexInputState;
2440
2441      const uint64_t inputs_read = get_vs_prog_data(pipeline)->inputs_read;
2442
2443      for (uint32_t i = 0; i < vi_info->vertexAttributeDescriptionCount; i++) {
2444         const VkVertexInputAttributeDescription *desc =
2445            &vi_info->pVertexAttributeDescriptions[i];
2446
2447         if (inputs_read & (1ull << (VERT_ATTRIB_GENERIC0 + desc->location)))
2448            pipeline->vb_used |= 1 << desc->binding;
2449      }
2450
2451      for (uint32_t i = 0; i < vi_info->vertexBindingDescriptionCount; i++) {
2452         const VkVertexInputBindingDescription *desc =
2453            &vi_info->pVertexBindingDescriptions[i];
2454
2455         pipeline->vb[desc->binding].stride = desc->stride;
2456
2457         /* Step rate is programmed per vertex element (attribute), not
2458          * binding. Set up a map of which bindings step per instance, for
2459          * reference by vertex element setup. */
2460         switch (desc->inputRate) {
2461         default:
2462         case VK_VERTEX_INPUT_RATE_VERTEX:
2463            pipeline->vb[desc->binding].instanced = false;
2464            break;
2465         case VK_VERTEX_INPUT_RATE_INSTANCE:
2466            pipeline->vb[desc->binding].instanced = true;
2467            break;
2468         }
2469
2470         pipeline->vb[desc->binding].instance_divisor = 1;
2471      }
2472
2473      const VkPipelineVertexInputDivisorStateCreateInfoEXT *vi_div_state =
2474         vk_find_struct_const(vi_info->pNext,
2475                              PIPELINE_VERTEX_INPUT_DIVISOR_STATE_CREATE_INFO_EXT);
2476      if (vi_div_state) {
2477         for (uint32_t i = 0; i < vi_div_state->vertexBindingDivisorCount; i++) {
2478            const VkVertexInputBindingDivisorDescriptionEXT *desc =
2479               &vi_div_state->pVertexBindingDivisors[i];
2480
2481            pipeline->vb[desc->binding].instance_divisor = desc->divisor;
2482         }
2483      }
2484
2485      /* Our implementation of VK_KHR_multiview uses instancing to draw the
2486       * different views.  If the client asks for instancing, we need to multiply
2487       * the instance divisor by the number of views ensure that we repeat the
2488       * client's per-instance data once for each view.
2489       */
2490      if (pipeline->subpass->view_mask && !pipeline->use_primitive_replication) {
2491         const uint32_t view_count = anv_subpass_view_count(pipeline->subpass);
2492         for (uint32_t vb = 0; vb < MAX_VBS; vb++) {
2493            if (pipeline->vb[vb].instanced)
2494               pipeline->vb[vb].instance_divisor *= view_count;
2495         }
2496      }
2497
2498      const VkPipelineInputAssemblyStateCreateInfo *ia_info =
2499         pCreateInfo->pInputAssemblyState;
2500      const VkPipelineTessellationStateCreateInfo *tess_info =
2501         pCreateInfo->pTessellationState;
2502
2503      if (anv_pipeline_has_stage(pipeline, MESA_SHADER_TESS_EVAL))
2504         pipeline->topology = _3DPRIM_PATCHLIST(tess_info->patchControlPoints);
2505      else
2506         pipeline->topology = vk_to_intel_primitive_type[ia_info->topology];
2507   }
2508
2509   /* If rasterization is not enabled, ms_info must be ignored. */
2510   const bool raster_enabled =
2511      !pCreateInfo->pRasterizationState->rasterizerDiscardEnable ||
2512      (pipeline->dynamic_states &
2513       ANV_CMD_DIRTY_DYNAMIC_RASTERIZER_DISCARD_ENABLE);
2514
2515   const VkPipelineMultisampleStateCreateInfo *ms_info =
2516      raster_enabled ? pCreateInfo->pMultisampleState : NULL;
2517
2518   const VkPipelineRasterizationLineStateCreateInfoEXT *line_info =
2519      vk_find_struct_const(pCreateInfo->pRasterizationState->pNext,
2520                           PIPELINE_RASTERIZATION_LINE_STATE_CREATE_INFO_EXT);
2521
2522   /* Store line mode, polygon mode and rasterization samples, these are used
2523    * for dynamic primitive topology.
2524    */
2525   pipeline->line_mode = vk_line_rasterization_mode(line_info, ms_info);
2526   pipeline->polygon_mode = pCreateInfo->pRasterizationState->polygonMode;
2527   pipeline->rasterization_samples =
2528      ms_info ? ms_info->rasterizationSamples : 1;
2529
2530   return VK_SUCCESS;
2531}
2532
2533static VkResult
2534compile_upload_rt_shader(struct anv_ray_tracing_pipeline *pipeline,
2535                         struct anv_pipeline_cache *cache,
2536                         nir_shader *nir,
2537                         struct anv_pipeline_stage *stage,
2538                         struct anv_shader_bin **shader_out,
2539                         void *mem_ctx)
2540{
2541   const struct brw_compiler *compiler =
2542      pipeline->base.device->physical->compiler;
2543   const struct intel_device_info *devinfo = compiler->devinfo;
2544
2545   nir_shader **resume_shaders = NULL;
2546   uint32_t num_resume_shaders = 0;
2547   if (nir->info.stage != MESA_SHADER_COMPUTE) {
2548      NIR_PASS_V(nir, nir_lower_shader_calls,
2549                 nir_address_format_64bit_global,
2550                 BRW_BTD_STACK_ALIGN,
2551                 &resume_shaders, &num_resume_shaders, mem_ctx);
2552      NIR_PASS_V(nir, brw_nir_lower_shader_calls);
2553      NIR_PASS_V(nir, brw_nir_lower_rt_intrinsics, devinfo);
2554   }
2555
2556   for (unsigned i = 0; i < num_resume_shaders; i++) {
2557      NIR_PASS_V(resume_shaders[i], brw_nir_lower_shader_calls);
2558      NIR_PASS_V(resume_shaders[i], brw_nir_lower_rt_intrinsics, devinfo);
2559   }
2560
2561   stage->code =
2562      brw_compile_bs(compiler, pipeline->base.device, mem_ctx,
2563                     &stage->key.bs, &stage->prog_data.bs, nir,
2564                     num_resume_shaders, resume_shaders, stage->stats, NULL);
2565   if (stage->code == NULL)
2566      return vk_error(pipeline, VK_ERROR_OUT_OF_HOST_MEMORY);
2567
2568   /* Ray-tracing shaders don't have a "real" bind map */
2569   struct anv_pipeline_bind_map empty_bind_map = {};
2570
2571   const unsigned code_size = stage->prog_data.base.program_size;
2572   struct anv_shader_bin *bin =
2573      anv_device_upload_kernel(pipeline->base.device,
2574                               cache,
2575                               stage->stage,
2576                               &stage->cache_key, sizeof(stage->cache_key),
2577                               stage->code, code_size,
2578                               &stage->prog_data.base,
2579                               sizeof(stage->prog_data.bs),
2580                               stage->stats, 1,
2581                               NULL, &empty_bind_map);
2582   if (bin == NULL)
2583      return vk_error(pipeline, VK_ERROR_OUT_OF_HOST_MEMORY);
2584
2585   /* TODO: Figure out executables for resume shaders */
2586   anv_pipeline_add_executables(&pipeline->base, stage, bin);
2587   util_dynarray_append(&pipeline->shaders, struct anv_shader_bin *, bin);
2588
2589   *shader_out = bin;
2590
2591   return VK_SUCCESS;
2592}
2593
2594static bool
2595is_rt_stack_size_dynamic(const VkRayTracingPipelineCreateInfoKHR *info)
2596{
2597   if (info->pDynamicState == NULL)
2598      return false;
2599
2600   for (unsigned i = 0; i < info->pDynamicState->dynamicStateCount; i++) {
2601      if (info->pDynamicState->pDynamicStates[i] ==
2602          VK_DYNAMIC_STATE_RAY_TRACING_PIPELINE_STACK_SIZE_KHR)
2603         return true;
2604   }
2605
2606   return false;
2607}
2608
2609static void
2610anv_pipeline_compute_ray_tracing_stacks(struct anv_ray_tracing_pipeline *pipeline,
2611                                        const VkRayTracingPipelineCreateInfoKHR *info,
2612                                        uint32_t *stack_max)
2613{
2614   if (is_rt_stack_size_dynamic(info)) {
2615      pipeline->stack_size = 0; /* 0 means dynamic */
2616   } else {
2617      /* From the Vulkan spec:
2618       *
2619       *    "If the stack size is not set explicitly, the stack size for a
2620       *    pipeline is:
2621       *
2622       *       rayGenStackMax +
2623       *       min(1, maxPipelineRayRecursionDepth) ×
2624       *       max(closestHitStackMax, missStackMax,
2625       *           intersectionStackMax + anyHitStackMax) +
2626       *       max(0, maxPipelineRayRecursionDepth-1) ×
2627       *       max(closestHitStackMax, missStackMax) +
2628       *       2 × callableStackMax"
2629       */
2630      pipeline->stack_size =
2631         stack_max[MESA_SHADER_RAYGEN] +
2632         MIN2(1, info->maxPipelineRayRecursionDepth) *
2633         MAX4(stack_max[MESA_SHADER_CLOSEST_HIT],
2634              stack_max[MESA_SHADER_MISS],
2635              stack_max[MESA_SHADER_INTERSECTION],
2636              stack_max[MESA_SHADER_ANY_HIT]) +
2637         MAX2(0, (int)info->maxPipelineRayRecursionDepth - 1) *
2638         MAX2(stack_max[MESA_SHADER_CLOSEST_HIT],
2639              stack_max[MESA_SHADER_MISS]) +
2640         2 * stack_max[MESA_SHADER_CALLABLE];
2641
2642      /* This is an extremely unlikely case but we need to set it to some
2643       * non-zero value so that we don't accidentally think it's dynamic.
2644       * Our minimum stack size is 2KB anyway so we could set to any small
2645       * value we like.
2646       */
2647      if (pipeline->stack_size == 0)
2648         pipeline->stack_size = 1;
2649   }
2650}
2651
2652static struct anv_pipeline_stage *
2653anv_pipeline_init_ray_tracing_stages(struct anv_ray_tracing_pipeline *pipeline,
2654                                     const VkRayTracingPipelineCreateInfoKHR *info,
2655                                     void *pipeline_ctx)
2656{
2657   ANV_FROM_HANDLE(anv_pipeline_layout, layout, info->layout);
2658
2659   /* Create enough stage entries for all shader modules plus potential
2660    * combinaisons in the groups.
2661    */
2662   struct anv_pipeline_stage *stages =
2663      rzalloc_array(pipeline_ctx, struct anv_pipeline_stage, info->stageCount);
2664
2665   for (uint32_t i = 0; i < info->stageCount; i++) {
2666      const VkPipelineShaderStageCreateInfo *sinfo = &info->pStages[i];
2667      if (sinfo->module == VK_NULL_HANDLE)
2668         continue;
2669
2670      int64_t stage_start = os_time_get_nano();
2671
2672      stages[i] = (struct anv_pipeline_stage) {
2673         .stage = vk_to_mesa_shader_stage(sinfo->stage),
2674         .module = vk_shader_module_from_handle(sinfo->module),
2675         .entrypoint = sinfo->pName,
2676         .spec_info = sinfo->pSpecializationInfo,
2677         .cache_key = {
2678            .stage = vk_to_mesa_shader_stage(sinfo->stage),
2679         },
2680         .feedback = {
2681            .flags = VK_PIPELINE_CREATION_FEEDBACK_VALID_BIT_EXT,
2682         },
2683      };
2684
2685      populate_bs_prog_key(&pipeline->base.device->info, sinfo->flags,
2686                           pipeline->base.device->robust_buffer_access,
2687                           &stages[i].key.bs);
2688
2689      anv_pipeline_hash_shader(stages[i].module,
2690                               stages[i].entrypoint,
2691                               stages[i].stage,
2692                               stages[i].spec_info,
2693                               stages[i].shader_sha1);
2694
2695      if (stages[i].stage != MESA_SHADER_INTERSECTION) {
2696         anv_pipeline_hash_ray_tracing_shader(pipeline, layout, &stages[i],
2697                                              stages[i].cache_key.sha1);
2698      }
2699
2700      stages[i].feedback.duration += os_time_get_nano() - stage_start;
2701   }
2702
2703   for (uint32_t i = 0; i < info->groupCount; i++) {
2704      const VkRayTracingShaderGroupCreateInfoKHR *ginfo = &info->pGroups[i];
2705
2706      if (ginfo->type != VK_RAY_TRACING_SHADER_GROUP_TYPE_PROCEDURAL_HIT_GROUP_KHR)
2707         continue;
2708
2709      int64_t stage_start = os_time_get_nano();
2710
2711      uint32_t intersection_idx = ginfo->intersectionShader;
2712      assert(intersection_idx < info->stageCount);
2713
2714      uint32_t any_hit_idx = ginfo->anyHitShader;
2715      if (any_hit_idx != VK_SHADER_UNUSED_KHR) {
2716         assert(any_hit_idx < info->stageCount);
2717         anv_pipeline_hash_ray_tracing_combined_shader(pipeline,
2718                                                       layout,
2719                                                       &stages[intersection_idx],
2720                                                       &stages[any_hit_idx],
2721                                                       stages[intersection_idx].cache_key.sha1);
2722      } else {
2723         anv_pipeline_hash_ray_tracing_shader(pipeline, layout,
2724                                              &stages[intersection_idx],
2725                                              stages[intersection_idx].cache_key.sha1);
2726      }
2727
2728      stages[intersection_idx].feedback.duration += os_time_get_nano() - stage_start;
2729   }
2730
2731   return stages;
2732}
2733
2734static bool
2735anv_pipeline_load_cached_shaders(struct anv_ray_tracing_pipeline *pipeline,
2736                                 struct anv_pipeline_cache *cache,
2737                                 const VkRayTracingPipelineCreateInfoKHR *info,
2738                                 struct anv_pipeline_stage *stages,
2739                                 uint32_t *stack_max)
2740{
2741   uint32_t shaders = 0, cache_hits = 0;
2742   for (uint32_t i = 0; i < info->stageCount; i++) {
2743      if (stages[i].entrypoint == NULL)
2744         continue;
2745
2746      shaders++;
2747
2748      int64_t stage_start = os_time_get_nano();
2749
2750      bool cache_hit;
2751      stages[i].bin = anv_device_search_for_kernel(pipeline->base.device, cache,
2752                                                   &stages[i].cache_key,
2753                                                   sizeof(stages[i].cache_key),
2754                                                   &cache_hit);
2755      if (cache_hit) {
2756         cache_hits++;
2757         stages[i].feedback.flags |=
2758            VK_PIPELINE_CREATION_FEEDBACK_APPLICATION_PIPELINE_CACHE_HIT_BIT_EXT;
2759      }
2760
2761      if (stages[i].bin != NULL) {
2762         anv_pipeline_add_executables(&pipeline->base, &stages[i], stages[i].bin);
2763         util_dynarray_append(&pipeline->shaders, struct anv_shader_bin *, stages[i].bin);
2764
2765         uint32_t stack_size =
2766            brw_bs_prog_data_const(stages[i].bin->prog_data)->max_stack_size;
2767         stack_max[stages[i].stage] =
2768            MAX2(stack_max[stages[i].stage], stack_size);
2769      }
2770
2771      stages[i].feedback.duration += os_time_get_nano() - stage_start;
2772   }
2773
2774   return cache_hits == shaders;
2775}
2776
2777static VkResult
2778anv_pipeline_compile_ray_tracing(struct anv_ray_tracing_pipeline *pipeline,
2779                                 struct anv_pipeline_cache *cache,
2780                                 const VkRayTracingPipelineCreateInfoKHR *info)
2781{
2782   const struct intel_device_info *devinfo = &pipeline->base.device->info;
2783   VkResult result;
2784
2785   VkPipelineCreationFeedbackEXT pipeline_feedback = {
2786      .flags = VK_PIPELINE_CREATION_FEEDBACK_VALID_BIT_EXT,
2787   };
2788   int64_t pipeline_start = os_time_get_nano();
2789
2790   void *pipeline_ctx = ralloc_context(NULL);
2791
2792   struct anv_pipeline_stage *stages =
2793      anv_pipeline_init_ray_tracing_stages(pipeline, info, pipeline_ctx);
2794
2795   ANV_FROM_HANDLE(anv_pipeline_layout, layout, info->layout);
2796
2797   const bool skip_cache_lookup =
2798      (pipeline->base.flags & VK_PIPELINE_CREATE_CAPTURE_INTERNAL_REPRESENTATIONS_BIT_KHR);
2799
2800   uint32_t stack_max[MESA_VULKAN_SHADER_STAGES] = {};
2801
2802   if (!skip_cache_lookup &&
2803       anv_pipeline_load_cached_shaders(pipeline, cache, info, stages, stack_max)) {
2804      pipeline_feedback.flags |=
2805         VK_PIPELINE_CREATION_FEEDBACK_APPLICATION_PIPELINE_CACHE_HIT_BIT_EXT;
2806      goto done;
2807   }
2808
2809   if (info->flags & VK_PIPELINE_CREATE_FAIL_ON_PIPELINE_COMPILE_REQUIRED_BIT_EXT) {
2810      ralloc_free(pipeline_ctx);
2811      return VK_PIPELINE_COMPILE_REQUIRED_EXT;
2812   }
2813
2814   for (uint32_t i = 0; i < info->stageCount; i++) {
2815      if (stages[i].entrypoint == NULL)
2816         continue;
2817
2818      int64_t stage_start = os_time_get_nano();
2819
2820      stages[i].nir = anv_pipeline_stage_get_nir(&pipeline->base, cache,
2821                                                 pipeline_ctx, &stages[i]);
2822      if (stages[i].nir == NULL) {
2823         ralloc_free(pipeline_ctx);
2824         return vk_error(pipeline, VK_ERROR_OUT_OF_HOST_MEMORY);
2825      }
2826
2827      anv_pipeline_lower_nir(&pipeline->base, pipeline_ctx, &stages[i], layout);
2828
2829      stages[i].feedback.duration += os_time_get_nano() - stage_start;
2830   }
2831
2832   for (uint32_t i = 0; i < info->stageCount; i++) {
2833      if (stages[i].entrypoint == NULL)
2834         continue;
2835
2836      /* Shader found in cache already. */
2837      if (stages[i].bin != NULL)
2838         continue;
2839
2840      /* We handle intersection shaders as part of the group */
2841      if (stages[i].stage == MESA_SHADER_INTERSECTION)
2842         continue;
2843
2844      int64_t stage_start = os_time_get_nano();
2845
2846      void *stage_ctx = ralloc_context(pipeline_ctx);
2847
2848      nir_shader *nir = nir_shader_clone(stage_ctx, stages[i].nir);
2849      switch (stages[i].stage) {
2850      case MESA_SHADER_RAYGEN:
2851         brw_nir_lower_raygen(nir);
2852         break;
2853
2854      case MESA_SHADER_ANY_HIT:
2855         brw_nir_lower_any_hit(nir, devinfo);
2856         break;
2857
2858      case MESA_SHADER_CLOSEST_HIT:
2859         brw_nir_lower_closest_hit(nir);
2860         break;
2861
2862      case MESA_SHADER_MISS:
2863         brw_nir_lower_miss(nir);
2864         break;
2865
2866      case MESA_SHADER_INTERSECTION:
2867         unreachable("These are handled later");
2868
2869      case MESA_SHADER_CALLABLE:
2870         brw_nir_lower_callable(nir);
2871         break;
2872
2873      default:
2874         unreachable("Invalid ray-tracing shader stage");
2875      }
2876
2877      result = compile_upload_rt_shader(pipeline, cache, nir, &stages[i],
2878                                        &stages[i].bin, stage_ctx);
2879      if (result != VK_SUCCESS) {
2880         ralloc_free(pipeline_ctx);
2881         return result;
2882      }
2883
2884      uint32_t stack_size =
2885         brw_bs_prog_data_const(stages[i].bin->prog_data)->max_stack_size;
2886      stack_max[stages[i].stage] = MAX2(stack_max[stages[i].stage], stack_size);
2887
2888      ralloc_free(stage_ctx);
2889
2890      stages[i].feedback.duration += os_time_get_nano() - stage_start;
2891   }
2892
2893   for (uint32_t i = 0; i < info->groupCount; i++) {
2894      const VkRayTracingShaderGroupCreateInfoKHR *ginfo = &info->pGroups[i];
2895      struct anv_rt_shader_group *group = &pipeline->groups[i];
2896      group->type = ginfo->type;
2897      switch (ginfo->type) {
2898      case VK_RAY_TRACING_SHADER_GROUP_TYPE_GENERAL_KHR:
2899         assert(ginfo->generalShader < info->stageCount);
2900         group->general = stages[ginfo->generalShader].bin;
2901         break;
2902
2903      case VK_RAY_TRACING_SHADER_GROUP_TYPE_TRIANGLES_HIT_GROUP_KHR:
2904         if (ginfo->anyHitShader < info->stageCount)
2905            group->any_hit = stages[ginfo->anyHitShader].bin;
2906
2907         if (ginfo->closestHitShader < info->stageCount)
2908            group->closest_hit = stages[ginfo->closestHitShader].bin;
2909         break;
2910
2911      case VK_RAY_TRACING_SHADER_GROUP_TYPE_PROCEDURAL_HIT_GROUP_KHR: {
2912         if (ginfo->closestHitShader < info->stageCount)
2913            group->closest_hit = stages[ginfo->closestHitShader].bin;
2914
2915         uint32_t intersection_idx = info->pGroups[i].intersectionShader;
2916         assert(intersection_idx < info->stageCount);
2917
2918         /* Only compile this stage if not already found in the cache. */
2919         if (stages[intersection_idx].bin == NULL) {
2920            /* The any-hit and intersection shader have to be combined */
2921            uint32_t any_hit_idx = info->pGroups[i].anyHitShader;
2922            const nir_shader *any_hit = NULL;
2923            if (any_hit_idx < info->stageCount)
2924               any_hit = stages[any_hit_idx].nir;
2925
2926            void *group_ctx = ralloc_context(pipeline_ctx);
2927            nir_shader *intersection =
2928               nir_shader_clone(group_ctx, stages[intersection_idx].nir);
2929
2930            brw_nir_lower_combined_intersection_any_hit(intersection, any_hit,
2931                                                        devinfo);
2932
2933            result = compile_upload_rt_shader(pipeline, cache,
2934                                              intersection,
2935                                              &stages[intersection_idx],
2936                                              &group->intersection,
2937                                              group_ctx);
2938            ralloc_free(group_ctx);
2939            if (result != VK_SUCCESS)
2940               return result;
2941         } else {
2942            group->intersection = stages[intersection_idx].bin;
2943         }
2944
2945         uint32_t stack_size =
2946            brw_bs_prog_data_const(group->intersection->prog_data)->max_stack_size;
2947         stack_max[MESA_SHADER_INTERSECTION] =
2948            MAX2(stack_max[MESA_SHADER_INTERSECTION], stack_size);
2949
2950         break;
2951      }
2952
2953      default:
2954         unreachable("Invalid ray tracing shader group type");
2955      }
2956   }
2957
2958 done:
2959   ralloc_free(pipeline_ctx);
2960
2961   anv_pipeline_compute_ray_tracing_stacks(pipeline, info, stack_max);
2962
2963   pipeline_feedback.duration = os_time_get_nano() - pipeline_start;
2964
2965   const VkPipelineCreationFeedbackCreateInfoEXT *create_feedback =
2966      vk_find_struct_const(info->pNext, PIPELINE_CREATION_FEEDBACK_CREATE_INFO_EXT);
2967   if (create_feedback) {
2968      *create_feedback->pPipelineCreationFeedback = pipeline_feedback;
2969
2970      assert(info->stageCount == create_feedback->pipelineStageCreationFeedbackCount);
2971      for (uint32_t i = 0; i < info->stageCount; i++) {
2972         gl_shader_stage s = vk_to_mesa_shader_stage(info->pStages[i].stage);
2973         create_feedback->pPipelineStageCreationFeedbacks[i] = stages[s].feedback;
2974      }
2975   }
2976
2977   return VK_SUCCESS;
2978}
2979
2980VkResult
2981anv_device_init_rt_shaders(struct anv_device *device)
2982{
2983   if (!device->vk.enabled_extensions.KHR_ray_tracing_pipeline)
2984      return VK_SUCCESS;
2985
2986   bool cache_hit;
2987
2988   struct brw_rt_trampoline {
2989      char name[16];
2990      struct brw_cs_prog_key key;
2991   } trampoline_key = {
2992      .name = "rt-trampoline",
2993      .key = {
2994         /* TODO: Other subgroup sizes? */
2995         .base.subgroup_size_type = BRW_SUBGROUP_SIZE_REQUIRE_8,
2996      },
2997   };
2998   device->rt_trampoline =
2999      anv_device_search_for_kernel(device, &device->default_pipeline_cache,
3000                                   &trampoline_key, sizeof(trampoline_key),
3001                                   &cache_hit);
3002   if (device->rt_trampoline == NULL) {
3003
3004      void *tmp_ctx = ralloc_context(NULL);
3005      nir_shader *trampoline_nir =
3006         brw_nir_create_raygen_trampoline(device->physical->compiler, tmp_ctx);
3007
3008      struct anv_pipeline_bind_map bind_map = {
3009         .surface_count = 0,
3010         .sampler_count = 0,
3011      };
3012      uint32_t dummy_params[4] = { 0, };
3013      struct brw_cs_prog_data trampoline_prog_data = {
3014         .base.nr_params = 4,
3015         .base.param = dummy_params,
3016         .uses_inline_data = true,
3017         .uses_btd_stack_ids = true,
3018      };
3019      struct brw_compile_cs_params params = {
3020         .nir = trampoline_nir,
3021         .key = &trampoline_key.key,
3022         .prog_data = &trampoline_prog_data,
3023         .log_data = device,
3024      };
3025      const unsigned *tramp_data =
3026         brw_compile_cs(device->physical->compiler, tmp_ctx, &params);
3027
3028      device->rt_trampoline =
3029         anv_device_upload_kernel(device, &device->default_pipeline_cache,
3030                                  MESA_SHADER_COMPUTE,
3031                                  &trampoline_key, sizeof(trampoline_key),
3032                                  tramp_data,
3033                                  trampoline_prog_data.base.program_size,
3034                                  &trampoline_prog_data.base,
3035                                  sizeof(trampoline_prog_data),
3036                                  NULL, 0, NULL, &bind_map);
3037
3038      ralloc_free(tmp_ctx);
3039
3040      if (device->rt_trampoline == NULL)
3041         return vk_error(device, VK_ERROR_OUT_OF_HOST_MEMORY);
3042   }
3043
3044   struct brw_rt_trivial_return {
3045      char name[16];
3046      struct brw_bs_prog_key key;
3047   } return_key = {
3048      .name = "rt-trivial-ret",
3049   };
3050   device->rt_trivial_return =
3051      anv_device_search_for_kernel(device, &device->default_pipeline_cache,
3052                                   &return_key, sizeof(return_key),
3053                                   &cache_hit);
3054   if (device->rt_trivial_return == NULL) {
3055      void *tmp_ctx = ralloc_context(NULL);
3056      nir_shader *trivial_return_nir =
3057         brw_nir_create_trivial_return_shader(device->physical->compiler, tmp_ctx);
3058
3059      NIR_PASS_V(trivial_return_nir, brw_nir_lower_rt_intrinsics, &device->info);
3060
3061      struct anv_pipeline_bind_map bind_map = {
3062         .surface_count = 0,
3063         .sampler_count = 0,
3064      };
3065      struct brw_bs_prog_data return_prog_data = { 0, };
3066      const unsigned *return_data =
3067         brw_compile_bs(device->physical->compiler, device, tmp_ctx,
3068                        &return_key.key, &return_prog_data, trivial_return_nir,
3069                        0, 0, NULL, NULL);
3070
3071      device->rt_trivial_return =
3072         anv_device_upload_kernel(device, &device->default_pipeline_cache,
3073                                  MESA_SHADER_CALLABLE,
3074                                  &return_key, sizeof(return_key),
3075                                  return_data, return_prog_data.base.program_size,
3076                                  &return_prog_data.base, sizeof(return_prog_data),
3077                                  NULL, 0, NULL, &bind_map);
3078
3079      ralloc_free(tmp_ctx);
3080
3081      if (device->rt_trivial_return == NULL) {
3082         anv_shader_bin_unref(device, device->rt_trampoline);
3083         return vk_error(device, VK_ERROR_OUT_OF_HOST_MEMORY);
3084      }
3085   }
3086
3087   return VK_SUCCESS;
3088}
3089
3090void
3091anv_device_finish_rt_shaders(struct anv_device *device)
3092{
3093   if (!device->vk.enabled_extensions.KHR_ray_tracing_pipeline)
3094      return;
3095
3096   anv_shader_bin_unref(device, device->rt_trampoline);
3097}
3098
3099VkResult
3100anv_ray_tracing_pipeline_init(struct anv_ray_tracing_pipeline *pipeline,
3101                              struct anv_device *device,
3102                              struct anv_pipeline_cache *cache,
3103                              const VkRayTracingPipelineCreateInfoKHR *pCreateInfo,
3104                              const VkAllocationCallbacks *alloc)
3105{
3106   VkResult result;
3107
3108   util_dynarray_init(&pipeline->shaders, pipeline->base.mem_ctx);
3109
3110   result = anv_pipeline_compile_ray_tracing(pipeline, cache, pCreateInfo);
3111   if (result != VK_SUCCESS)
3112      goto fail;
3113
3114   anv_pipeline_setup_l3_config(&pipeline->base, /* needs_slm */ false);
3115
3116   return VK_SUCCESS;
3117
3118fail:
3119   util_dynarray_foreach(&pipeline->shaders,
3120                         struct anv_shader_bin *, shader) {
3121      anv_shader_bin_unref(device, *shader);
3122   }
3123   return result;
3124}
3125
3126#define WRITE_STR(field, ...) ({                               \
3127   memset(field, 0, sizeof(field));                            \
3128   UNUSED int i = snprintf(field, sizeof(field), __VA_ARGS__); \
3129   assert(i > 0 && i < sizeof(field));                         \
3130})
3131
3132VkResult anv_GetPipelineExecutablePropertiesKHR(
3133    VkDevice                                    device,
3134    const VkPipelineInfoKHR*                    pPipelineInfo,
3135    uint32_t*                                   pExecutableCount,
3136    VkPipelineExecutablePropertiesKHR*          pProperties)
3137{
3138   ANV_FROM_HANDLE(anv_pipeline, pipeline, pPipelineInfo->pipeline);
3139   VK_OUTARRAY_MAKE(out, pProperties, pExecutableCount);
3140
3141   util_dynarray_foreach (&pipeline->executables, struct anv_pipeline_executable, exe) {
3142      vk_outarray_append(&out, props) {
3143         gl_shader_stage stage = exe->stage;
3144         props->stages = mesa_to_vk_shader_stage(stage);
3145
3146         unsigned simd_width = exe->stats.dispatch_width;
3147         if (stage == MESA_SHADER_FRAGMENT) {
3148            WRITE_STR(props->name, "%s%d %s",
3149                      simd_width ? "SIMD" : "vec",
3150                      simd_width ? simd_width : 4,
3151                      _mesa_shader_stage_to_string(stage));
3152         } else {
3153            WRITE_STR(props->name, "%s", _mesa_shader_stage_to_string(stage));
3154         }
3155         WRITE_STR(props->description, "%s%d %s shader",
3156                   simd_width ? "SIMD" : "vec",
3157                   simd_width ? simd_width : 4,
3158                   _mesa_shader_stage_to_string(stage));
3159
3160         /* The compiler gives us a dispatch width of 0 for vec4 but Vulkan
3161          * wants a subgroup size of 1.
3162          */
3163         props->subgroupSize = MAX2(simd_width, 1);
3164      }
3165   }
3166
3167   return vk_outarray_status(&out);
3168}
3169
3170static const struct anv_pipeline_executable *
3171anv_pipeline_get_executable(struct anv_pipeline *pipeline, uint32_t index)
3172{
3173   assert(index < util_dynarray_num_elements(&pipeline->executables,
3174                                             struct anv_pipeline_executable));
3175   return util_dynarray_element(
3176      &pipeline->executables, struct anv_pipeline_executable, index);
3177}
3178
3179VkResult anv_GetPipelineExecutableStatisticsKHR(
3180    VkDevice                                    device,
3181    const VkPipelineExecutableInfoKHR*          pExecutableInfo,
3182    uint32_t*                                   pStatisticCount,
3183    VkPipelineExecutableStatisticKHR*           pStatistics)
3184{
3185   ANV_FROM_HANDLE(anv_pipeline, pipeline, pExecutableInfo->pipeline);
3186   VK_OUTARRAY_MAKE(out, pStatistics, pStatisticCount);
3187
3188   const struct anv_pipeline_executable *exe =
3189      anv_pipeline_get_executable(pipeline, pExecutableInfo->executableIndex);
3190
3191   const struct brw_stage_prog_data *prog_data;
3192   switch (pipeline->type) {
3193   case ANV_PIPELINE_GRAPHICS: {
3194      prog_data = anv_pipeline_to_graphics(pipeline)->shaders[exe->stage]->prog_data;
3195      break;
3196   }
3197   case ANV_PIPELINE_COMPUTE: {
3198      prog_data = anv_pipeline_to_compute(pipeline)->cs->prog_data;
3199      break;
3200   }
3201   default:
3202      unreachable("invalid pipeline type");
3203   }
3204
3205   vk_outarray_append(&out, stat) {
3206      WRITE_STR(stat->name, "Instruction Count");
3207      WRITE_STR(stat->description,
3208                "Number of GEN instructions in the final generated "
3209                "shader executable.");
3210      stat->format = VK_PIPELINE_EXECUTABLE_STATISTIC_FORMAT_UINT64_KHR;
3211      stat->value.u64 = exe->stats.instructions;
3212   }
3213
3214   vk_outarray_append(&out, stat) {
3215      WRITE_STR(stat->name, "SEND Count");
3216      WRITE_STR(stat->description,
3217                "Number of instructions in the final generated shader "
3218                "executable which access external units such as the "
3219                "constant cache or the sampler.");
3220      stat->format = VK_PIPELINE_EXECUTABLE_STATISTIC_FORMAT_UINT64_KHR;
3221      stat->value.u64 = exe->stats.sends;
3222   }
3223
3224   vk_outarray_append(&out, stat) {
3225      WRITE_STR(stat->name, "Loop Count");
3226      WRITE_STR(stat->description,
3227                "Number of loops (not unrolled) in the final generated "
3228                "shader executable.");
3229      stat->format = VK_PIPELINE_EXECUTABLE_STATISTIC_FORMAT_UINT64_KHR;
3230      stat->value.u64 = exe->stats.loops;
3231   }
3232
3233   vk_outarray_append(&out, stat) {
3234      WRITE_STR(stat->name, "Cycle Count");
3235      WRITE_STR(stat->description,
3236                "Estimate of the number of EU cycles required to execute "
3237                "the final generated executable.  This is an estimate only "
3238                "and may vary greatly from actual run-time performance.");
3239      stat->format = VK_PIPELINE_EXECUTABLE_STATISTIC_FORMAT_UINT64_KHR;
3240      stat->value.u64 = exe->stats.cycles;
3241   }
3242
3243   vk_outarray_append(&out, stat) {
3244      WRITE_STR(stat->name, "Spill Count");
3245      WRITE_STR(stat->description,
3246                "Number of scratch spill operations.  This gives a rough "
3247                "estimate of the cost incurred due to spilling temporary "
3248                "values to memory.  If this is non-zero, you may want to "
3249                "adjust your shader to reduce register pressure.");
3250      stat->format = VK_PIPELINE_EXECUTABLE_STATISTIC_FORMAT_UINT64_KHR;
3251      stat->value.u64 = exe->stats.spills;
3252   }
3253
3254   vk_outarray_append(&out, stat) {
3255      WRITE_STR(stat->name, "Fill Count");
3256      WRITE_STR(stat->description,
3257                "Number of scratch fill operations.  This gives a rough "
3258                "estimate of the cost incurred due to spilling temporary "
3259                "values to memory.  If this is non-zero, you may want to "
3260                "adjust your shader to reduce register pressure.");
3261      stat->format = VK_PIPELINE_EXECUTABLE_STATISTIC_FORMAT_UINT64_KHR;
3262      stat->value.u64 = exe->stats.fills;
3263   }
3264
3265   vk_outarray_append(&out, stat) {
3266      WRITE_STR(stat->name, "Scratch Memory Size");
3267      WRITE_STR(stat->description,
3268                "Number of bytes of scratch memory required by the "
3269                "generated shader executable.  If this is non-zero, you "
3270                "may want to adjust your shader to reduce register "
3271                "pressure.");
3272      stat->format = VK_PIPELINE_EXECUTABLE_STATISTIC_FORMAT_UINT64_KHR;
3273      stat->value.u64 = prog_data->total_scratch;
3274   }
3275
3276   if (gl_shader_stage_uses_workgroup(exe->stage)) {
3277      vk_outarray_append(&out, stat) {
3278         WRITE_STR(stat->name, "Workgroup Memory Size");
3279         WRITE_STR(stat->description,
3280                   "Number of bytes of workgroup shared memory used by this "
3281                   "shader including any padding.");
3282         stat->format = VK_PIPELINE_EXECUTABLE_STATISTIC_FORMAT_UINT64_KHR;
3283         stat->value.u64 = prog_data->total_shared;
3284      }
3285   }
3286
3287   return vk_outarray_status(&out);
3288}
3289
3290static bool
3291write_ir_text(VkPipelineExecutableInternalRepresentationKHR* ir,
3292              const char *data)
3293{
3294   ir->isText = VK_TRUE;
3295
3296   size_t data_len = strlen(data) + 1;
3297
3298   if (ir->pData == NULL) {
3299      ir->dataSize = data_len;
3300      return true;
3301   }
3302
3303   strncpy(ir->pData, data, ir->dataSize);
3304   if (ir->dataSize < data_len)
3305      return false;
3306
3307   ir->dataSize = data_len;
3308   return true;
3309}
3310
3311VkResult anv_GetPipelineExecutableInternalRepresentationsKHR(
3312    VkDevice                                    device,
3313    const VkPipelineExecutableInfoKHR*          pExecutableInfo,
3314    uint32_t*                                   pInternalRepresentationCount,
3315    VkPipelineExecutableInternalRepresentationKHR* pInternalRepresentations)
3316{
3317   ANV_FROM_HANDLE(anv_pipeline, pipeline, pExecutableInfo->pipeline);
3318   VK_OUTARRAY_MAKE(out, pInternalRepresentations,
3319                    pInternalRepresentationCount);
3320   bool incomplete_text = false;
3321
3322   const struct anv_pipeline_executable *exe =
3323      anv_pipeline_get_executable(pipeline, pExecutableInfo->executableIndex);
3324
3325   if (exe->nir) {
3326      vk_outarray_append(&out, ir) {
3327         WRITE_STR(ir->name, "Final NIR");
3328         WRITE_STR(ir->description,
3329                   "Final NIR before going into the back-end compiler");
3330
3331         if (!write_ir_text(ir, exe->nir))
3332            incomplete_text = true;
3333      }
3334   }
3335
3336   if (exe->disasm) {
3337      vk_outarray_append(&out, ir) {
3338         WRITE_STR(ir->name, "GEN Assembly");
3339         WRITE_STR(ir->description,
3340                   "Final GEN assembly for the generated shader binary");
3341
3342         if (!write_ir_text(ir, exe->disasm))
3343            incomplete_text = true;
3344      }
3345   }
3346
3347   return incomplete_text ? VK_INCOMPLETE : vk_outarray_status(&out);
3348}
3349
3350VkResult
3351anv_GetRayTracingShaderGroupHandlesKHR(
3352    VkDevice                                    _device,
3353    VkPipeline                                  _pipeline,
3354    uint32_t                                    firstGroup,
3355    uint32_t                                    groupCount,
3356    size_t                                      dataSize,
3357    void*                                       pData)
3358{
3359   ANV_FROM_HANDLE(anv_device, device, _device);
3360   ANV_FROM_HANDLE(anv_pipeline, pipeline, _pipeline);
3361
3362   if (pipeline->type != ANV_PIPELINE_RAY_TRACING)
3363      return vk_error(device, VK_ERROR_FEATURE_NOT_PRESENT);
3364
3365   struct anv_ray_tracing_pipeline *rt_pipeline =
3366      anv_pipeline_to_ray_tracing(pipeline);
3367
3368   for (uint32_t i = 0; i < groupCount; i++) {
3369      struct anv_rt_shader_group *group = &rt_pipeline->groups[firstGroup + i];
3370      memcpy(pData, group->handle, sizeof(group->handle));
3371      pData += sizeof(group->handle);
3372   }
3373
3374   return VK_SUCCESS;
3375}
3376
3377VkResult
3378anv_GetRayTracingCaptureReplayShaderGroupHandlesKHR(
3379    VkDevice                                    _device,
3380    VkPipeline                                  pipeline,
3381    uint32_t                                    firstGroup,
3382    uint32_t                                    groupCount,
3383    size_t                                      dataSize,
3384    void*                                       pData)
3385{
3386   ANV_FROM_HANDLE(anv_device, device, _device);
3387   unreachable("Unimplemented");
3388   return vk_error(device, VK_ERROR_FEATURE_NOT_PRESENT);
3389}
3390
3391VkDeviceSize
3392anv_GetRayTracingShaderGroupStackSizeKHR(
3393    VkDevice                                    device,
3394    VkPipeline                                  _pipeline,
3395    uint32_t                                    group,
3396    VkShaderGroupShaderKHR                      groupShader)
3397{
3398   ANV_FROM_HANDLE(anv_pipeline, pipeline, _pipeline);
3399   assert(pipeline->type == ANV_PIPELINE_RAY_TRACING);
3400
3401   struct anv_ray_tracing_pipeline *rt_pipeline =
3402      anv_pipeline_to_ray_tracing(pipeline);
3403
3404   assert(group < rt_pipeline->group_count);
3405
3406   struct anv_shader_bin *bin;
3407   switch (groupShader) {
3408   case VK_SHADER_GROUP_SHADER_GENERAL_KHR:
3409      bin = rt_pipeline->groups[group].general;
3410      break;
3411
3412   case VK_SHADER_GROUP_SHADER_CLOSEST_HIT_KHR:
3413      bin = rt_pipeline->groups[group].closest_hit;
3414      break;
3415
3416   case VK_SHADER_GROUP_SHADER_ANY_HIT_KHR:
3417      bin = rt_pipeline->groups[group].any_hit;
3418      break;
3419
3420   case VK_SHADER_GROUP_SHADER_INTERSECTION_KHR:
3421      bin = rt_pipeline->groups[group].intersection;
3422      break;
3423
3424   default:
3425      unreachable("Invalid VkShaderGroupShader enum");
3426   }
3427
3428   if (bin == NULL)
3429      return 0;
3430
3431   return brw_bs_prog_data_const(bin->prog_data)->max_stack_size;
3432}
3433