1/*
2 * Copyright © 2016 Intel Corporation
3 *
4 * Permission is hereby granted, free of charge, to any person obtaining a
5 * copy of this software and associated documentation files (the "Software"),
6 * to deal in the Software without restriction, including without limitation
7 * the rights to use, copy, modify, merge, publish, distribute, sublicense,
8 * and/or sell copies of the Software, and to permit persons to whom the
9 * Software is furnished to do so, subject to the following conditions:
10 *
11 * The above copyright notice and this permission notice (including the next
12 * paragraph) shall be included in all copies or substantial portions of the
13 * Software.
14 *
15 * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
16 * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
17 * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT.  IN NO EVENT SHALL
18 * THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
19 * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING
20 * FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS
21 * IN THE SOFTWARE.
22 *
23 */
24
25#ifndef SHADER_INFO_H
26#define SHADER_INFO_H
27
28#include "util/bitset.h"
29#include "shader_enums.h"
30#include <stdint.h>
31
32#ifdef __cplusplus
33extern "C" {
34#endif
35
36#define MAX_INLINABLE_UNIFORMS 4
37
38struct spirv_supported_capabilities {
39   bool address;
40   bool atomic_storage;
41   bool demote_to_helper_invocation;
42   bool derivative_group;
43   bool descriptor_array_dynamic_indexing;
44   bool descriptor_array_non_uniform_indexing;
45   bool descriptor_indexing;
46   bool device_group;
47   bool draw_parameters;
48   bool float16_atomic_add;
49   bool float16_atomic_min_max;
50   bool float32_atomic_add;
51   bool float32_atomic_min_max;
52   bool float64;
53   bool float64_atomic_add;
54   bool float64_atomic_min_max;
55   bool fragment_shader_sample_interlock;
56   bool fragment_shader_pixel_interlock;
57   bool fragment_shading_rate;
58   bool generic_pointers;
59   bool geometry_streams;
60   bool groups;
61   bool image_ms_array;
62   bool image_read_without_format;
63   bool image_write_without_format;
64   bool image_atomic_int64;
65   bool int8;
66   bool int16;
67   bool int64;
68   bool int64_atomics;
69   bool integer_functions2;
70   bool kernel;
71   bool kernel_image;
72   bool kernel_image_read_write;
73   bool literal_sampler;
74   bool mesh_shading_nv;
75   bool min_lod;
76   bool multiview;
77   bool physical_storage_buffer_address;
78   bool post_depth_coverage;
79   bool printf;
80   bool ray_tracing;
81   bool ray_query;
82   bool ray_traversal_primitive_culling;
83   bool runtime_descriptor_array;
84   bool float_controls;
85   bool shader_clock;
86   bool shader_viewport_index_layer;
87   bool sparse_residency;
88   bool stencil_export;
89   bool storage_8bit;
90   bool storage_16bit;
91   bool storage_image_ms;
92   bool subgroup_arithmetic;
93   bool subgroup_ballot;
94   bool subgroup_basic;
95   bool subgroup_dispatch;
96   bool subgroup_quad;
97   bool subgroup_shuffle;
98   bool subgroup_uniform_control_flow;
99   bool subgroup_vote;
100   bool tessellation;
101   bool transform_feedback;
102   bool variable_pointers;
103   bool vk_memory_model;
104   bool vk_memory_model_device_scope;
105   bool workgroup_memory_explicit_layout;
106   bool float16;
107   bool amd_fragment_mask;
108   bool amd_gcn_shader;
109   bool amd_shader_ballot;
110   bool amd_trinary_minmax;
111   bool amd_image_read_write_lod;
112   bool amd_shader_explicit_vertex_parameter;
113   bool amd_image_gather_bias_lod;
114
115   bool intel_subgroup_shuffle;
116   bool intel_subgroup_buffer_block_io;
117};
118
119typedef struct shader_info {
120   const char *name;
121
122   /* Descriptive name provided by the client; may be NULL */
123   const char *label;
124
125   /* Shader is internal, and should be ignored by things like NIR_PRINT */
126   bool internal;
127
128   /** The shader stage, such as MESA_SHADER_VERTEX. */
129   gl_shader_stage stage:8;
130
131   /** The shader stage in a non SSO linked program that follows this stage,
132     * such as MESA_SHADER_FRAGMENT.
133     */
134   gl_shader_stage next_stage:8;
135
136   /* Number of textures used by this shader */
137   uint8_t num_textures;
138   /* Number of uniform buffers used by this shader */
139   uint8_t num_ubos;
140   /* Number of atomic buffers used by this shader */
141   uint8_t num_abos;
142   /* Number of shader storage buffers (max .driver_location + 1) used by this
143    * shader.  In the case of nir_lower_atomics_to_ssbo being used, this will
144    * be the number of actual SSBOs in gl_program->info, and the lowered SSBOs
145    * and atomic counters in nir_shader->info.
146    */
147   uint8_t num_ssbos;
148   /* Number of images used by this shader */
149   uint8_t num_images;
150
151   /* Which inputs are actually read */
152   uint64_t inputs_read;
153   /* Which outputs are actually written */
154   uint64_t outputs_written;
155   /* Which outputs are actually read */
156   uint64_t outputs_read;
157   /* Which system values are actually read */
158   BITSET_DECLARE(system_values_read, SYSTEM_VALUE_MAX);
159
160   /* Which I/O is per-primitive, for read/written information combine with
161    * the fields above.
162    */
163   uint64_t per_primitive_inputs;
164   uint64_t per_primitive_outputs;
165
166   /* Which 16-bit inputs and outputs are used corresponding to
167    * VARYING_SLOT_VARn_16BIT.
168    */
169   uint16_t inputs_read_16bit;
170   uint16_t outputs_written_16bit;
171   uint16_t outputs_read_16bit;
172   uint16_t inputs_read_indirectly_16bit;
173   uint16_t outputs_accessed_indirectly_16bit;
174
175   /* Which patch inputs are actually read */
176   uint32_t patch_inputs_read;
177   /* Which patch outputs are actually written */
178   uint32_t patch_outputs_written;
179   /* Which patch outputs are read */
180   uint32_t patch_outputs_read;
181
182   /* Which inputs are read indirectly (subset of inputs_read) */
183   uint64_t inputs_read_indirectly;
184   /* Which outputs are read or written indirectly */
185   uint64_t outputs_accessed_indirectly;
186   /* Which patch inputs are read indirectly (subset of patch_inputs_read) */
187   uint64_t patch_inputs_read_indirectly;
188   /* Which patch outputs are read or written indirectly */
189   uint64_t patch_outputs_accessed_indirectly;
190
191   /** Bitfield of which textures are used */
192   BITSET_DECLARE(textures_used, 32);
193
194   /** Bitfield of which textures are used by texelFetch() */
195   BITSET_DECLARE(textures_used_by_txf, 32);
196
197   /** Bitfield of which images are used */
198   uint32_t images_used;
199   /** Bitfield of which images are buffers. */
200   uint32_t image_buffers;
201   /** Bitfield of which images are MSAA. */
202   uint32_t msaa_images;
203
204   /* SPV_KHR_float_controls: execution mode for floating point ops */
205   uint16_t float_controls_execution_mode;
206
207   /**
208    * Size of shared variables accessed by compute/task/mesh shaders.
209    */
210   unsigned shared_size;
211
212   /**
213    * Local workgroup size used by compute/task/mesh shaders.
214    */
215   uint16_t workgroup_size[3];
216
217   uint16_t inlinable_uniform_dw_offsets[MAX_INLINABLE_UNIFORMS];
218   uint8_t num_inlinable_uniforms:4;
219
220   /* The size of the gl_ClipDistance[] array, if declared. */
221   uint8_t clip_distance_array_size:4;
222
223   /* The size of the gl_CullDistance[] array, if declared. */
224   uint8_t cull_distance_array_size:4;
225
226   /* Whether or not this shader ever uses textureGather() */
227   bool uses_texture_gather:1;
228
229   /**
230    * True if this shader uses the fddx/fddy opcodes.
231    *
232    * Note that this does not include the "fine" and "coarse" variants.
233    */
234   bool uses_fddx_fddy:1;
235
236   /* Bitmask of bit-sizes used with ALU instructions. */
237   uint8_t bit_sizes_float;
238   uint8_t bit_sizes_int;
239
240   /* Whether the first UBO is the default uniform buffer, i.e. uniforms. */
241   bool first_ubo_is_default_ubo:1;
242
243   /* Whether or not separate shader objects were used */
244   bool separate_shader:1;
245
246   /** Was this shader linked with any transform feedback varyings? */
247   bool has_transform_feedback_varyings:1;
248
249   /* Whether flrp has been lowered. */
250   bool flrp_lowered:1;
251
252   /* Whether nir_lower_io has been called to lower derefs.
253    * nir_variables for inputs and outputs might not be present in the IR.
254    */
255   bool io_lowered:1;
256
257   /* Whether the shader writes memory, including transform feedback. */
258   bool writes_memory:1;
259
260   /* Whether gl_Layer is viewport-relative */
261   bool layer_viewport_relative:1;
262
263   /* Whether explicit barriers are used */
264   bool uses_control_barrier : 1;
265   bool uses_memory_barrier : 1;
266
267   /**
268    * Shared memory types have explicit layout set.  Used for
269    * SPV_KHR_workgroup_storage_explicit_layout.
270    */
271   bool shared_memory_explicit_layout:1;
272
273   /**
274    * Used for VK_KHR_zero_initialize_workgroup_memory.
275    */
276   bool zero_initialize_shared_memory:1;
277
278   /**
279    * Used for ARB_compute_variable_group_size.
280    */
281   bool workgroup_size_variable:1;
282
283   /**
284     * Is this an ARB assembly-style program.
285     */
286   bool is_arb_asm;
287
288   union {
289      struct {
290         /* Which inputs are doubles */
291         uint64_t double_inputs;
292
293         /* For AMD-specific driver-internal shaders. It replaces vertex
294          * buffer loads with code generating VS inputs from scalar registers.
295          *
296          * Valid values: SI_VS_BLIT_SGPRS_POS_*
297          */
298         uint8_t blit_sgprs_amd:4;
299
300         /* True if the shader writes position in window space coordinates pre-transform */
301         bool window_space_position:1;
302
303         /** Is an edge flag input needed? */
304         bool needs_edge_flag:1;
305      } vs;
306
307      struct {
308         /** The output primitive type (GL enum value) */
309         uint16_t output_primitive;
310
311         /** The input primitive type (GL enum value) */
312         uint16_t input_primitive;
313
314         /** The maximum number of vertices the geometry shader might write. */
315         uint16_t vertices_out;
316
317         /** 1 .. MAX_GEOMETRY_SHADER_INVOCATIONS */
318         uint8_t invocations;
319
320         /** The number of vertices received per input primitive (max. 6) */
321         uint8_t vertices_in:3;
322
323         /** Whether or not this shader uses EndPrimitive */
324         bool uses_end_primitive:1;
325
326         /** The streams used in this shaders (max. 4) */
327         uint8_t active_stream_mask:4;
328      } gs;
329
330      struct {
331         bool uses_discard:1;
332         bool uses_demote:1;
333         bool uses_fbfetch_output:1;
334         bool color_is_dual_source:1;
335
336         /**
337          * True if this fragment shader requires helper invocations.  This
338          * can be caused by the use of ALU derivative ops, texture
339          * instructions which do implicit derivatives, and the use of quad
340          * subgroup operations.
341          */
342         bool needs_quad_helper_invocations:1;
343
344         /**
345          * True if this fragment shader requires helper invocations for
346          * all subgroup operations, not just quad ops and derivatives.
347          */
348         bool needs_all_helper_invocations:1;
349
350         /**
351          * Whether any inputs are declared with the "sample" qualifier.
352          */
353         bool uses_sample_qualifier:1;
354
355         /**
356          * Whether sample shading is used.
357          */
358         bool uses_sample_shading:1;
359
360         /**
361          * Whether early fragment tests are enabled as defined by
362          * ARB_shader_image_load_store.
363          */
364         bool early_fragment_tests:1;
365
366         /**
367          * Defined by INTEL_conservative_rasterization.
368          */
369         bool inner_coverage:1;
370
371         bool post_depth_coverage:1;
372
373         /**
374          * \name ARB_fragment_coord_conventions
375          * @{
376          */
377         bool pixel_center_integer:1;
378         bool origin_upper_left:1;
379         /*@}*/
380
381         bool pixel_interlock_ordered:1;
382         bool pixel_interlock_unordered:1;
383         bool sample_interlock_ordered:1;
384         bool sample_interlock_unordered:1;
385
386         /**
387          * Flags whether NIR's base types on the FS color outputs should be
388          * ignored.
389          *
390          * GLSL requires that fragment shader output base types match the
391          * render target's base types for the behavior to be defined.  From
392          * the GL 4.6 spec:
393          *
394          *     "If the values written by the fragment shader do not match the
395          *      format(s) of the corresponding color buffer(s), the result is
396          *      undefined."
397          *
398          * However, for NIR shaders translated from TGSI, we don't have the
399          * output types any more, so the driver will need to do whatever
400          * fixups are necessary to handle effectively untyped data being
401          * output from the FS.
402          */
403         bool untyped_color_outputs:1;
404
405         /** gl_FragDepth layout for ARB_conservative_depth. */
406         enum gl_frag_depth_layout depth_layout:3;
407
408         /**
409          * Interpolation qualifiers for drivers that lowers color inputs
410          * to system values.
411          */
412         unsigned color0_interp:3; /* glsl_interp_mode */
413         bool color0_sample:1;
414         bool color0_centroid:1;
415         unsigned color1_interp:3; /* glsl_interp_mode */
416         bool color1_sample:1;
417         bool color1_centroid:1;
418
419         /* Bitmask of gl_advanced_blend_mode values that may be used with this
420          * shader.
421          */
422         unsigned advanced_blend_modes;
423      } fs;
424
425      struct {
426         uint16_t workgroup_size_hint[3];
427
428         uint8_t user_data_components_amd:3;
429
430         /*
431          * Arrangement of invocations used to calculate derivatives in a compute
432          * shader.  From NV_compute_shader_derivatives.
433          */
434         enum gl_derivative_group derivative_group:2;
435
436         /**
437          * Explicit subgroup size if set by the shader, otherwise 0.
438          */
439         unsigned subgroup_size;
440
441         /**
442          * pointer size is:
443          *   AddressingModelLogical:    0    (default)
444          *   AddressingModelPhysical32: 32
445          *   AddressingModelPhysical64: 64
446          */
447         unsigned ptr_size;
448
449         /**
450          * Uses subgroup intrinsics which can communicate across a quad.
451          */
452         bool uses_wide_subgroup_intrinsics;
453      } cs;
454
455      /* Applies to both TCS and TES. */
456      struct {
457         uint16_t primitive_mode; /* GL_TRIANGLES, GL_QUADS or GL_ISOLINES */
458
459         /** The number of vertices in the TCS output patch. */
460         uint8_t tcs_vertices_out;
461         enum gl_tess_spacing spacing:2;
462
463         /** Is the vertex order counterclockwise? */
464         bool ccw:1;
465         bool point_mode:1;
466
467         /* Bit mask of TCS per-vertex inputs (VS outputs) that are used
468          * with a vertex index that is NOT the invocation id
469          */
470         uint64_t tcs_cross_invocation_inputs_read;
471
472         /* Bit mask of TCS per-vertex outputs that are used
473          * with a vertex index that is NOT the invocation id
474          */
475         uint64_t tcs_cross_invocation_outputs_read;
476      } tess;
477
478      /* Applies to MESH. */
479      struct {
480         uint16_t max_vertices_out;
481         uint16_t max_primitives_out;
482         uint16_t primitive_type;  /* GL_POINTS, GL_LINES or GL_TRIANGLES. */
483      } mesh;
484   };
485} shader_info;
486
487#ifdef __cplusplus
488}
489#endif
490
491#endif /* SHADER_INFO_H */
492