1#
2# Copyright (C) 2018 Red Hat
3# Copyright (C) 2014 Intel Corporation
4#
5# Permission is hereby granted, free of charge, to any person obtaining a
6# copy of this software and associated documentation files (the "Software"),
7# to deal in the Software without restriction, including without limitation
8# the rights to use, copy, modify, merge, publish, distribute, sublicense,
9# and/or sell copies of the Software, and to permit persons to whom the
10# Software is furnished to do so, subject to the following conditions:
11#
12# The above copyright notice and this permission notice (including the next
13# paragraph) shall be included in all copies or substantial portions of the
14# Software.
15#
16# THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
17# IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
18# FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT.  IN NO EVENT SHALL
19# THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
20# LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING
21# FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS
22# IN THE SOFTWARE.
23#
24
25# This file defines all the available intrinsics in one place.
26#
27# The Intrinsic class corresponds one-to-one with nir_intrinsic_info
28# structure.
29
30class Intrinsic(object):
31   """Class that represents all the information about an intrinsic opcode.
32   NOTE: this must be kept in sync with nir_intrinsic_info.
33   """
34   def __init__(self, name, src_components, dest_components,
35                indices, flags, sysval, bit_sizes):
36       """Parameters:
37
38       - name: the intrinsic name
39       - src_components: list of the number of components per src, 0 means
40         vectorized instruction with number of components given in the
41         num_components field in nir_intrinsic_instr.
42       - dest_components: number of destination components, -1 means no
43         dest, 0 means number of components given in num_components field
44         in nir_intrinsic_instr.
45       - indices: list of constant indicies
46       - flags: list of semantic flags
47       - sysval: is this a system-value intrinsic
48       - bit_sizes: allowed dest bit_sizes
49       """
50       assert isinstance(name, str)
51       assert isinstance(src_components, list)
52       if src_components:
53           assert isinstance(src_components[0], int)
54       assert isinstance(dest_components, int)
55       assert isinstance(indices, list)
56       if indices:
57           assert isinstance(indices[0], str)
58       assert isinstance(flags, list)
59       if flags:
60           assert isinstance(flags[0], str)
61       assert isinstance(sysval, bool)
62       if bit_sizes:
63           assert isinstance(bit_sizes[0], int)
64
65       self.name = name
66       self.num_srcs = len(src_components)
67       self.src_components = src_components
68       self.has_dest = (dest_components >= 0)
69       self.dest_components = dest_components
70       self.num_indices = len(indices)
71       self.indices = indices
72       self.flags = flags
73       self.sysval = sysval
74       self.bit_sizes = bit_sizes
75
76#
77# Possible indices:
78#
79
80# A constant 'base' value that is added to an offset src:
81BASE = "NIR_INTRINSIC_BASE"
82# For store instructions, a writemask:
83WRMASK = "NIR_INTRINSIC_WRMASK"
84# The stream-id for GS emit_vertex/end_primitive intrinsics:
85STREAM_ID = "NIR_INTRINSIC_STREAM_ID"
86# The clip-plane id for load_user_clip_plane intrinsics:
87UCP_ID = "NIR_INTRINSIC_UCP_ID"
88# The amount of data, starting from BASE, that this instruction
89# may access.  This is used to provide bounds if the offset is
90# not constant.
91RANGE = "NIR_INTRINSIC_RANGE"
92# The vulkan descriptor set binding for vulkan_resource_index
93# intrinsic
94DESC_SET = "NIR_INTRINSIC_DESC_SET"
95# The vulkan descriptor set binding for vulkan_resource_index
96# intrinsic
97BINDING = "NIR_INTRINSIC_BINDING"
98# Component offset
99COMPONENT = "NIR_INTRINSIC_COMPONENT"
100# Interpolation mode (only meaningful for FS inputs)
101INTERP_MODE = "NIR_INTRINSIC_INTERP_MODE"
102# A binary nir_op to use when performing a reduction or scan operation
103REDUCTION_OP = "NIR_INTRINSIC_REDUCTION_OP"
104# Cluster size for reduction operations
105CLUSTER_SIZE = "NIR_INTRINSIC_CLUSTER_SIZE"
106# Parameter index for a load_param intrinsic
107PARAM_IDX = "NIR_INTRINSIC_PARAM_IDX"
108# Image dimensionality for image intrinsics
109IMAGE_DIM = "NIR_INTRINSIC_IMAGE_DIM"
110# Non-zero if we are accessing an array image
111IMAGE_ARRAY = "NIR_INTRINSIC_IMAGE_ARRAY"
112# Access qualifiers for image and memory access intrinsics
113ACCESS = "NIR_INTRINSIC_ACCESS"
114DST_ACCESS = "NIR_INTRINSIC_DST_ACCESS"
115SRC_ACCESS = "NIR_INTRINSIC_SRC_ACCESS"
116# Image format for image intrinsics
117FORMAT = "NIR_INTRINSIC_FORMAT"
118# Offset or address alignment
119ALIGN_MUL = "NIR_INTRINSIC_ALIGN_MUL"
120ALIGN_OFFSET = "NIR_INTRINSIC_ALIGN_OFFSET"
121# The vulkan descriptor type for vulkan_resource_index
122DESC_TYPE = "NIR_INTRINSIC_DESC_TYPE"
123
124#
125# Possible flags:
126#
127
128CAN_ELIMINATE = "NIR_INTRINSIC_CAN_ELIMINATE"
129CAN_REORDER   = "NIR_INTRINSIC_CAN_REORDER"
130
131INTR_OPCODES = {}
132
133# Defines a new NIR intrinsic.  By default, the intrinsic will have no sources
134# and no destination.
135#
136# You can set dest_comp=n to enable a destination for the intrinsic, in which
137# case it will have that many components, or =0 for "as many components as the
138# NIR destination value."
139#
140# Set src_comp=n to enable sources for the intruction.  It can be an array of
141# component counts, or (for convenience) a scalar component count if there's
142# only one source.  If a component count is 0, it will be as many components as
143# the intrinsic has based on the dest_comp.
144def intrinsic(name, src_comp=[], dest_comp=-1, indices=[],
145              flags=[], sysval=False, bit_sizes=[]):
146    assert name not in INTR_OPCODES
147    INTR_OPCODES[name] = Intrinsic(name, src_comp, dest_comp,
148                                   indices, flags, sysval, bit_sizes)
149
150intrinsic("nop", flags=[CAN_ELIMINATE])
151
152intrinsic("load_param", dest_comp=0, indices=[PARAM_IDX], flags=[CAN_ELIMINATE])
153
154intrinsic("load_deref", dest_comp=0, src_comp=[-1],
155          indices=[ACCESS], flags=[CAN_ELIMINATE])
156intrinsic("store_deref", src_comp=[-1, 0], indices=[WRMASK, ACCESS])
157intrinsic("copy_deref", src_comp=[-1, -1], indices=[DST_ACCESS, SRC_ACCESS])
158
159# Interpolation of input.  The interp_deref_at* intrinsics are similar to the
160# load_var intrinsic acting on a shader input except that they interpolate the
161# input differently.  The at_sample and at_offset intrinsics take an
162# additional source that is an integer sample id or a vec2 position offset
163# respectively.
164
165intrinsic("interp_deref_at_centroid", dest_comp=0, src_comp=[1],
166          flags=[ CAN_ELIMINATE, CAN_REORDER])
167intrinsic("interp_deref_at_sample", src_comp=[1, 1], dest_comp=0,
168          flags=[CAN_ELIMINATE, CAN_REORDER])
169intrinsic("interp_deref_at_offset", src_comp=[1, 2], dest_comp=0,
170          flags=[CAN_ELIMINATE, CAN_REORDER])
171
172# Gets the length of an unsized array at the end of a buffer
173intrinsic("deref_buffer_array_length", src_comp=[-1], dest_comp=1,
174          flags=[CAN_ELIMINATE, CAN_REORDER])
175
176# Ask the driver for the size of a given buffer. It takes the buffer index
177# as source.
178intrinsic("get_buffer_size", src_comp=[-1], dest_comp=1,
179          flags=[CAN_ELIMINATE, CAN_REORDER])
180
181# a barrier is an intrinsic with no inputs/outputs but which can't be moved
182# around/optimized in general
183def barrier(name):
184    intrinsic(name)
185
186barrier("barrier")
187barrier("discard")
188
189# Memory barrier with semantics analogous to the memoryBarrier() GLSL
190# intrinsic.
191barrier("memory_barrier")
192
193# Shader clock intrinsic with semantics analogous to the clock2x32ARB()
194# GLSL intrinsic.
195# The latter can be used as code motion barrier, which is currently not
196# feasible with NIR.
197intrinsic("shader_clock", dest_comp=2, flags=[CAN_ELIMINATE])
198
199# Shader ballot intrinsics with semantics analogous to the
200#
201#    ballotARB()
202#    readInvocationARB()
203#    readFirstInvocationARB()
204#
205# GLSL functions from ARB_shader_ballot.
206intrinsic("ballot", src_comp=[1], dest_comp=0, flags=[CAN_ELIMINATE])
207intrinsic("read_invocation", src_comp=[0, 1], dest_comp=0, flags=[CAN_ELIMINATE])
208intrinsic("read_first_invocation", src_comp=[0], dest_comp=0, flags=[CAN_ELIMINATE])
209
210# Additional SPIR-V ballot intrinsics
211#
212# These correspond to the SPIR-V opcodes
213#
214#    OpGroupUniformElect
215#    OpSubgroupFirstInvocationKHR
216intrinsic("elect", dest_comp=1, flags=[CAN_ELIMINATE])
217intrinsic("first_invocation", dest_comp=1, flags=[CAN_ELIMINATE])
218
219# Memory barrier with semantics analogous to the compute shader
220# groupMemoryBarrier(), memoryBarrierAtomicCounter(), memoryBarrierBuffer(),
221# memoryBarrierImage() and memoryBarrierShared() GLSL intrinsics.
222barrier("group_memory_barrier")
223barrier("memory_barrier_atomic_counter")
224barrier("memory_barrier_buffer")
225barrier("memory_barrier_image")
226barrier("memory_barrier_shared")
227barrier("begin_invocation_interlock")
228barrier("end_invocation_interlock")
229
230# A conditional discard, with a single boolean source.
231intrinsic("discard_if", src_comp=[1])
232
233# ARB_shader_group_vote intrinsics
234intrinsic("vote_any", src_comp=[1], dest_comp=1, flags=[CAN_ELIMINATE])
235intrinsic("vote_all", src_comp=[1], dest_comp=1, flags=[CAN_ELIMINATE])
236intrinsic("vote_feq", src_comp=[0], dest_comp=1, flags=[CAN_ELIMINATE])
237intrinsic("vote_ieq", src_comp=[0], dest_comp=1, flags=[CAN_ELIMINATE])
238
239# Ballot ALU operations from SPIR-V.
240#
241# These operations work like their ALU counterparts except that the operate
242# on a uvec4 which is treated as a 128bit integer.  Also, they are, in
243# general, free to ignore any bits which are above the subgroup size.
244intrinsic("ballot_bitfield_extract", src_comp=[4, 1], dest_comp=1, flags=[CAN_ELIMINATE])
245intrinsic("ballot_bit_count_reduce", src_comp=[4], dest_comp=1, flags=[CAN_ELIMINATE])
246intrinsic("ballot_bit_count_inclusive", src_comp=[4], dest_comp=1, flags=[CAN_ELIMINATE])
247intrinsic("ballot_bit_count_exclusive", src_comp=[4], dest_comp=1, flags=[CAN_ELIMINATE])
248intrinsic("ballot_find_lsb", src_comp=[4], dest_comp=1, flags=[CAN_ELIMINATE])
249intrinsic("ballot_find_msb", src_comp=[4], dest_comp=1, flags=[CAN_ELIMINATE])
250
251# Shuffle operations from SPIR-V.
252intrinsic("shuffle", src_comp=[0, 1], dest_comp=0, flags=[CAN_ELIMINATE])
253intrinsic("shuffle_xor", src_comp=[0, 1], dest_comp=0, flags=[CAN_ELIMINATE])
254intrinsic("shuffle_up", src_comp=[0, 1], dest_comp=0, flags=[CAN_ELIMINATE])
255intrinsic("shuffle_down", src_comp=[0, 1], dest_comp=0, flags=[CAN_ELIMINATE])
256
257# Quad operations from SPIR-V.
258intrinsic("quad_broadcast", src_comp=[0, 1], dest_comp=0, flags=[CAN_ELIMINATE])
259intrinsic("quad_swap_horizontal", src_comp=[0], dest_comp=0, flags=[CAN_ELIMINATE])
260intrinsic("quad_swap_vertical", src_comp=[0], dest_comp=0, flags=[CAN_ELIMINATE])
261intrinsic("quad_swap_diagonal", src_comp=[0], dest_comp=0, flags=[CAN_ELIMINATE])
262
263intrinsic("reduce", src_comp=[0], dest_comp=0, indices=[REDUCTION_OP, CLUSTER_SIZE],
264          flags=[CAN_ELIMINATE])
265intrinsic("inclusive_scan", src_comp=[0], dest_comp=0, indices=[REDUCTION_OP],
266          flags=[CAN_ELIMINATE])
267intrinsic("exclusive_scan", src_comp=[0], dest_comp=0, indices=[REDUCTION_OP],
268          flags=[CAN_ELIMINATE])
269
270# Basic Geometry Shader intrinsics.
271#
272# emit_vertex implements GLSL's EmitStreamVertex() built-in.  It takes a single
273# index, which is the stream ID to write to.
274#
275# end_primitive implements GLSL's EndPrimitive() built-in.
276intrinsic("emit_vertex",   indices=[STREAM_ID])
277intrinsic("end_primitive", indices=[STREAM_ID])
278
279# Geometry Shader intrinsics with a vertex count.
280#
281# Alternatively, drivers may implement these intrinsics, and use
282# nir_lower_gs_intrinsics() to convert from the basic intrinsics.
283#
284# These maintain a count of the number of vertices emitted, as an additional
285# unsigned integer source.
286intrinsic("emit_vertex_with_counter", src_comp=[1], indices=[STREAM_ID])
287intrinsic("end_primitive_with_counter", src_comp=[1], indices=[STREAM_ID])
288intrinsic("set_vertex_count", src_comp=[1])
289
290# Atomic counters
291#
292# The *_var variants take an atomic_uint nir_variable, while the other,
293# lowered, variants take a constant buffer index and register offset.
294
295def atomic(name, flags=[]):
296    intrinsic(name + "_deref", src_comp=[-1], dest_comp=1, flags=flags)
297    intrinsic(name, src_comp=[1], dest_comp=1, indices=[BASE], flags=flags)
298
299def atomic2(name):
300    intrinsic(name + "_deref", src_comp=[-1, 1], dest_comp=1)
301    intrinsic(name, src_comp=[1, 1], dest_comp=1, indices=[BASE])
302
303def atomic3(name):
304    intrinsic(name + "_deref", src_comp=[-1, 1, 1], dest_comp=1)
305    intrinsic(name, src_comp=[1, 1, 1], dest_comp=1, indices=[BASE])
306
307atomic("atomic_counter_inc")
308atomic("atomic_counter_pre_dec")
309atomic("atomic_counter_post_dec")
310atomic("atomic_counter_read", flags=[CAN_ELIMINATE])
311atomic2("atomic_counter_add")
312atomic2("atomic_counter_min")
313atomic2("atomic_counter_max")
314atomic2("atomic_counter_and")
315atomic2("atomic_counter_or")
316atomic2("atomic_counter_xor")
317atomic2("atomic_counter_exchange")
318atomic3("atomic_counter_comp_swap")
319
320# Image load, store and atomic intrinsics.
321#
322# All image intrinsics come in three versions.  One which take an image target
323# passed as a deref chain as the first source, one which takes an index as the
324# first source, and one which takes a bindless handle as the first source.
325# In the first version, the image variable contains the memory and layout
326# qualifiers that influence the semantics of the intrinsic.  In the second and
327# third, the image format and access qualifiers are provided as constant
328# indices.
329#
330# All image intrinsics take a four-coordinate vector and a sample index as
331# 2nd and 3rd sources, determining the location within the image that will be
332# accessed by the intrinsic.  Components not applicable to the image target
333# in use are undefined.  Image store takes an additional four-component
334# argument with the value to be written, and image atomic operations take
335# either one or two additional scalar arguments with the same meaning as in
336# the ARB_shader_image_load_store specification.
337def image(name, src_comp=[], **kwargs):
338    intrinsic("image_deref_" + name, src_comp=[1] + src_comp,
339              indices=[ACCESS], **kwargs)
340    intrinsic("image_" + name, src_comp=[1] + src_comp,
341              indices=[IMAGE_DIM, IMAGE_ARRAY, FORMAT, ACCESS], **kwargs)
342    intrinsic("bindless_image_" + name, src_comp=[1] + src_comp,
343              indices=[IMAGE_DIM, IMAGE_ARRAY, FORMAT, ACCESS], **kwargs)
344
345image("load", src_comp=[4, 1], dest_comp=0, flags=[CAN_ELIMINATE])
346image("store", src_comp=[4, 1, 0])
347image("atomic_add",  src_comp=[4, 1, 1], dest_comp=1)
348image("atomic_min",  src_comp=[4, 1, 1], dest_comp=1)
349image("atomic_max",  src_comp=[4, 1, 1], dest_comp=1)
350image("atomic_and",  src_comp=[4, 1, 1], dest_comp=1)
351image("atomic_or",   src_comp=[4, 1, 1], dest_comp=1)
352image("atomic_xor",  src_comp=[4, 1, 1], dest_comp=1)
353image("atomic_exchange",  src_comp=[4, 1, 1], dest_comp=1)
354image("atomic_comp_swap", src_comp=[4, 1, 1, 1], dest_comp=1)
355image("atomic_fadd",  src_comp=[1, 4, 1, 1], dest_comp=1)
356image("size",    dest_comp=0, flags=[CAN_ELIMINATE, CAN_REORDER])
357image("samples", dest_comp=1, flags=[CAN_ELIMINATE, CAN_REORDER])
358
359# Intel-specific query for loading from the brw_image_param struct passed
360# into the shader as a uniform.  The variable is a deref to the image
361# variable. The const index specifies which of the six parameters to load.
362intrinsic("image_deref_load_param_intel", src_comp=[1], dest_comp=0,
363          indices=[BASE], flags=[CAN_ELIMINATE, CAN_REORDER])
364image("load_raw_intel", src_comp=[1], dest_comp=0,
365      flags=[CAN_ELIMINATE])
366image("store_raw_intel", src_comp=[1, 0])
367
368# Vulkan descriptor set intrinsics
369#
370# The Vulkan API uses a different binding model from GL.  In the Vulkan
371# API, all external resources are represented by a tuple:
372#
373# (descriptor set, binding, array index)
374#
375# where the array index is the only thing allowed to be indirect.  The
376# vulkan_surface_index intrinsic takes the descriptor set and binding as
377# its first two indices and the array index as its source.  The third
378# index is a nir_variable_mode in case that's useful to the backend.
379#
380# The intended usage is that the shader will call vulkan_surface_index to
381# get an index and then pass that as the buffer index ubo/ssbo calls.
382#
383# The vulkan_resource_reindex intrinsic takes a resource index in src0
384# (the result of a vulkan_resource_index or vulkan_resource_reindex) which
385# corresponds to the tuple (set, binding, index) and computes an index
386# corresponding to tuple (set, binding, idx + src1).
387intrinsic("vulkan_resource_index", src_comp=[1], dest_comp=0,
388          indices=[DESC_SET, BINDING, DESC_TYPE],
389          flags=[CAN_ELIMINATE, CAN_REORDER])
390intrinsic("vulkan_resource_reindex", src_comp=[0, 1], dest_comp=0,
391          indices=[DESC_TYPE], flags=[CAN_ELIMINATE, CAN_REORDER])
392intrinsic("load_vulkan_descriptor", src_comp=[-1], dest_comp=0,
393          indices=[DESC_TYPE], flags=[CAN_ELIMINATE, CAN_REORDER])
394
395# variable atomic intrinsics
396#
397# All of these variable atomic memory operations read a value from memory,
398# compute a new value using one of the operations below, write the new value
399# to memory, and return the original value read.
400#
401# All operations take 2 sources except CompSwap that takes 3. These sources
402# represent:
403#
404# 0: A deref to the memory on which to perform the atomic
405# 1: The data parameter to the atomic function (i.e. the value to add
406#    in shared_atomic_add, etc).
407# 2: For CompSwap only: the second data parameter.
408intrinsic("deref_atomic_add",  src_comp=[-1, 1], dest_comp=1, indices=[ACCESS])
409intrinsic("deref_atomic_imin", src_comp=[-1, 1], dest_comp=1, indices=[ACCESS])
410intrinsic("deref_atomic_umin", src_comp=[-1, 1], dest_comp=1, indices=[ACCESS])
411intrinsic("deref_atomic_imax", src_comp=[-1, 1], dest_comp=1, indices=[ACCESS])
412intrinsic("deref_atomic_umax", src_comp=[-1, 1], dest_comp=1, indices=[ACCESS])
413intrinsic("deref_atomic_and",  src_comp=[-1, 1], dest_comp=1, indices=[ACCESS])
414intrinsic("deref_atomic_or",   src_comp=[-1, 1], dest_comp=1, indices=[ACCESS])
415intrinsic("deref_atomic_xor",  src_comp=[-1, 1], dest_comp=1, indices=[ACCESS])
416intrinsic("deref_atomic_exchange", src_comp=[-1, 1], dest_comp=1, indices=[ACCESS])
417intrinsic("deref_atomic_comp_swap", src_comp=[-1, 1, 1], dest_comp=1, indices=[ACCESS])
418intrinsic("deref_atomic_fadd",  src_comp=[-1, 1], dest_comp=1, indices=[ACCESS])
419intrinsic("deref_atomic_fmin",  src_comp=[-1, 1], dest_comp=1, indices=[ACCESS])
420intrinsic("deref_atomic_fmax",  src_comp=[-1, 1], dest_comp=1, indices=[ACCESS])
421intrinsic("deref_atomic_fcomp_swap", src_comp=[-1, 1, 1], dest_comp=1, indices=[ACCESS])
422
423# SSBO atomic intrinsics
424#
425# All of the SSBO atomic memory operations read a value from memory,
426# compute a new value using one of the operations below, write the new
427# value to memory, and return the original value read.
428#
429# All operations take 3 sources except CompSwap that takes 4. These
430# sources represent:
431#
432# 0: The SSBO buffer index.
433# 1: The offset into the SSBO buffer of the variable that the atomic
434#    operation will operate on.
435# 2: The data parameter to the atomic function (i.e. the value to add
436#    in ssbo_atomic_add, etc).
437# 3: For CompSwap only: the second data parameter.
438intrinsic("ssbo_atomic_add",  src_comp=[1, 1, 1], dest_comp=1, indices=[ACCESS])
439intrinsic("ssbo_atomic_imin", src_comp=[1, 1, 1], dest_comp=1, indices=[ACCESS])
440intrinsic("ssbo_atomic_umin", src_comp=[1, 1, 1], dest_comp=1, indices=[ACCESS])
441intrinsic("ssbo_atomic_imax", src_comp=[1, 1, 1], dest_comp=1, indices=[ACCESS])
442intrinsic("ssbo_atomic_umax", src_comp=[1, 1, 1], dest_comp=1, indices=[ACCESS])
443intrinsic("ssbo_atomic_and",  src_comp=[1, 1, 1], dest_comp=1, indices=[ACCESS])
444intrinsic("ssbo_atomic_or",   src_comp=[1, 1, 1], dest_comp=1, indices=[ACCESS])
445intrinsic("ssbo_atomic_xor",  src_comp=[1, 1, 1], dest_comp=1, indices=[ACCESS])
446intrinsic("ssbo_atomic_exchange", src_comp=[1, 1, 1], dest_comp=1, indices=[ACCESS])
447intrinsic("ssbo_atomic_comp_swap", src_comp=[1, 1, 1, 1], dest_comp=1, indices=[ACCESS])
448intrinsic("ssbo_atomic_fadd", src_comp=[1, 1, 1], dest_comp=1, indices=[ACCESS])
449intrinsic("ssbo_atomic_fmin", src_comp=[1, 1, 1], dest_comp=1, indices=[ACCESS])
450intrinsic("ssbo_atomic_fmax", src_comp=[1, 1, 1], dest_comp=1, indices=[ACCESS])
451intrinsic("ssbo_atomic_fcomp_swap", src_comp=[1, 1, 1, 1], dest_comp=1, indices=[ACCESS])
452
453# CS shared variable atomic intrinsics
454#
455# All of the shared variable atomic memory operations read a value from
456# memory, compute a new value using one of the operations below, write the
457# new value to memory, and return the original value read.
458#
459# All operations take 2 sources except CompSwap that takes 3. These
460# sources represent:
461#
462# 0: The offset into the shared variable storage region that the atomic
463#    operation will operate on.
464# 1: The data parameter to the atomic function (i.e. the value to add
465#    in shared_atomic_add, etc).
466# 2: For CompSwap only: the second data parameter.
467intrinsic("shared_atomic_add",  src_comp=[1, 1], dest_comp=1, indices=[BASE])
468intrinsic("shared_atomic_imin", src_comp=[1, 1], dest_comp=1, indices=[BASE])
469intrinsic("shared_atomic_umin", src_comp=[1, 1], dest_comp=1, indices=[BASE])
470intrinsic("shared_atomic_imax", src_comp=[1, 1], dest_comp=1, indices=[BASE])
471intrinsic("shared_atomic_umax", src_comp=[1, 1], dest_comp=1, indices=[BASE])
472intrinsic("shared_atomic_and",  src_comp=[1, 1], dest_comp=1, indices=[BASE])
473intrinsic("shared_atomic_or",   src_comp=[1, 1], dest_comp=1, indices=[BASE])
474intrinsic("shared_atomic_xor",  src_comp=[1, 1], dest_comp=1, indices=[BASE])
475intrinsic("shared_atomic_exchange", src_comp=[1, 1], dest_comp=1, indices=[BASE])
476intrinsic("shared_atomic_comp_swap", src_comp=[1, 1, 1], dest_comp=1, indices=[BASE])
477intrinsic("shared_atomic_fadd",  src_comp=[1, 1], dest_comp=1, indices=[BASE])
478intrinsic("shared_atomic_fmin",  src_comp=[1, 1], dest_comp=1, indices=[BASE])
479intrinsic("shared_atomic_fmax",  src_comp=[1, 1], dest_comp=1, indices=[BASE])
480intrinsic("shared_atomic_fcomp_swap", src_comp=[1, 1, 1], dest_comp=1, indices=[BASE])
481
482# Global atomic intrinsics
483#
484# All of the shared variable atomic memory operations read a value from
485# memory, compute a new value using one of the operations below, write the
486# new value to memory, and return the original value read.
487#
488# All operations take 2 sources except CompSwap that takes 3. These
489# sources represent:
490#
491# 0: The memory address that the atomic operation will operate on.
492# 1: The data parameter to the atomic function (i.e. the value to add
493#    in shared_atomic_add, etc).
494# 2: For CompSwap only: the second data parameter.
495intrinsic("global_atomic_add",  src_comp=[1, 1], dest_comp=1, indices=[BASE])
496intrinsic("global_atomic_imin", src_comp=[1, 1], dest_comp=1, indices=[BASE])
497intrinsic("global_atomic_umin", src_comp=[1, 1], dest_comp=1, indices=[BASE])
498intrinsic("global_atomic_imax", src_comp=[1, 1], dest_comp=1, indices=[BASE])
499intrinsic("global_atomic_umax", src_comp=[1, 1], dest_comp=1, indices=[BASE])
500intrinsic("global_atomic_and",  src_comp=[1, 1], dest_comp=1, indices=[BASE])
501intrinsic("global_atomic_or",   src_comp=[1, 1], dest_comp=1, indices=[BASE])
502intrinsic("global_atomic_xor",  src_comp=[1, 1], dest_comp=1, indices=[BASE])
503intrinsic("global_atomic_exchange", src_comp=[1, 1], dest_comp=1, indices=[BASE])
504intrinsic("global_atomic_comp_swap", src_comp=[1, 1, 1], dest_comp=1, indices=[BASE])
505intrinsic("global_atomic_fadd",  src_comp=[1, 1], dest_comp=1, indices=[BASE])
506intrinsic("global_atomic_fmin",  src_comp=[1, 1], dest_comp=1, indices=[BASE])
507intrinsic("global_atomic_fmax",  src_comp=[1, 1], dest_comp=1, indices=[BASE])
508intrinsic("global_atomic_fcomp_swap", src_comp=[1, 1, 1], dest_comp=1, indices=[BASE])
509
510def system_value(name, dest_comp, indices=[], bit_sizes=[32]):
511    intrinsic("load_" + name, [], dest_comp, indices,
512              flags=[CAN_ELIMINATE, CAN_REORDER], sysval=True,
513              bit_sizes=bit_sizes)
514
515system_value("frag_coord", 4)
516system_value("front_face", 1, bit_sizes=[1, 32])
517system_value("vertex_id", 1)
518system_value("vertex_id_zero_base", 1)
519system_value("first_vertex", 1)
520system_value("is_indexed_draw", 1)
521system_value("base_vertex", 1)
522system_value("instance_id", 1)
523system_value("base_instance", 1)
524system_value("draw_id", 1)
525system_value("sample_id", 1)
526# sample_id_no_per_sample is like sample_id but does not imply per-
527# sample shading.  See the lower_helper_invocation option.
528system_value("sample_id_no_per_sample", 1)
529system_value("sample_pos", 2)
530system_value("sample_mask_in", 1)
531system_value("primitive_id", 1)
532system_value("invocation_id", 1)
533system_value("tess_coord", 3)
534system_value("tess_level_outer", 4)
535system_value("tess_level_inner", 2)
536system_value("patch_vertices_in", 1)
537system_value("local_invocation_id", 3)
538system_value("local_invocation_index", 1)
539system_value("work_group_id", 3)
540system_value("user_clip_plane", 4, indices=[UCP_ID])
541system_value("num_work_groups", 3)
542system_value("helper_invocation", 1, bit_sizes=[1, 32])
543system_value("alpha_ref_float", 1)
544system_value("layer_id", 1)
545system_value("view_index", 1)
546system_value("subgroup_size", 1)
547system_value("subgroup_invocation", 1)
548system_value("subgroup_eq_mask", 0, bit_sizes=[32, 64])
549system_value("subgroup_ge_mask", 0, bit_sizes=[32, 64])
550system_value("subgroup_gt_mask", 0, bit_sizes=[32, 64])
551system_value("subgroup_le_mask", 0, bit_sizes=[32, 64])
552system_value("subgroup_lt_mask", 0, bit_sizes=[32, 64])
553system_value("num_subgroups", 1)
554system_value("subgroup_id", 1)
555system_value("local_group_size", 3)
556system_value("global_invocation_id", 3, bit_sizes=[32, 64])
557system_value("global_invocation_index", 1, bit_sizes=[32, 64])
558system_value("work_dim", 1)
559# Driver-specific viewport scale/offset parameters.
560#
561# VC4 and V3D need to emit a scaled version of the position in the vertex
562# shaders for binning, and having system values lets us move the math for that
563# into NIR.
564#
565# Panfrost needs to implement all coordinate transformation in the
566# vertex shader; system values allow us to share this routine in NIR.
567system_value("viewport_x_scale", 1)
568system_value("viewport_y_scale", 1)
569system_value("viewport_z_scale", 1)
570system_value("viewport_z_offset", 1)
571system_value("viewport_scale", 3)
572system_value("viewport_offset", 3)
573
574# Blend constant color values.  Float values are clamped.#
575system_value("blend_const_color_r_float", 1)
576system_value("blend_const_color_g_float", 1)
577system_value("blend_const_color_b_float", 1)
578system_value("blend_const_color_a_float", 1)
579system_value("blend_const_color_rgba8888_unorm", 1)
580system_value("blend_const_color_aaaa8888_unorm", 1)
581
582# Barycentric coordinate intrinsics.
583#
584# These set up the barycentric coordinates for a particular interpolation.
585# The first three are for the simple cases: pixel, centroid, or per-sample
586# (at gl_SampleID).  The next two handle interpolating at a specified
587# sample location, or interpolating with a vec2 offset,
588#
589# The interp_mode index should be either the INTERP_MODE_SMOOTH or
590# INTERP_MODE_NOPERSPECTIVE enum values.
591#
592# The vec2 value produced by these intrinsics is intended for use as the
593# barycoord source of a load_interpolated_input intrinsic.
594
595def barycentric(name, src_comp=[]):
596    intrinsic("load_barycentric_" + name, src_comp=src_comp, dest_comp=2,
597              indices=[INTERP_MODE], flags=[CAN_ELIMINATE, CAN_REORDER])
598
599# no sources.
600barycentric("pixel")
601barycentric("centroid")
602barycentric("sample")
603# src[] = { sample_id }.
604barycentric("at_sample", [1])
605# src[] = { offset.xy }.
606barycentric("at_offset", [2])
607
608# Load sample position:
609#
610# Takes a sample # and returns a sample position.  Used for lowering
611# interpolateAtSample() to interpolateAtOffset()
612intrinsic("load_sample_pos_from_id", src_comp=[1], dest_comp=2,
613          flags=[CAN_ELIMINATE, CAN_REORDER])
614
615# Loads what I believe is the primitive size, for scaling ij to pixel size:
616intrinsic("load_size_ir3", dest_comp=1, flags=[CAN_ELIMINATE, CAN_REORDER])
617
618# Load operations pull data from some piece of GPU memory.  All load
619# operations operate in terms of offsets into some piece of theoretical
620# memory.  Loads from externally visible memory (UBO and SSBO) simply take a
621# byte offset as a source.  Loads from opaque memory (uniforms, inputs, etc.)
622# take a base+offset pair where the nir_intrinsic_base() gives the location
623# of the start of the variable being loaded and and the offset source is a
624# offset into that variable.
625#
626# Uniform load operations have a nir_intrinsic_range() index that specifies the
627# range (starting at base) of the data from which we are loading.  If
628# range == 0, then the range is unknown.
629#
630# Some load operations such as UBO/SSBO load and per_vertex loads take an
631# additional source to specify which UBO/SSBO/vertex to load from.
632#
633# The exact address type depends on the lowering pass that generates the
634# load/store intrinsics.  Typically, this is vec4 units for things such as
635# varying slots and float units for fragment shader inputs.  UBO and SSBO
636# offsets are always in bytes.
637
638def load(name, num_srcs, indices=[], flags=[]):
639    intrinsic("load_" + name, [1] * num_srcs, dest_comp=0, indices=indices,
640              flags=flags)
641
642# src[] = { offset }.
643load("uniform", 1, [BASE, RANGE], [CAN_ELIMINATE, CAN_REORDER])
644# src[] = { buffer_index, offset }.
645load("ubo", 2, [ACCESS, ALIGN_MUL, ALIGN_OFFSET], flags=[CAN_ELIMINATE, CAN_REORDER])
646# src[] = { offset }.
647load("input", 1, [BASE, COMPONENT], [CAN_ELIMINATE, CAN_REORDER])
648# src[] = { vertex, offset }.
649load("per_vertex_input", 2, [BASE, COMPONENT], [CAN_ELIMINATE, CAN_REORDER])
650# src[] = { barycoord, offset }.
651intrinsic("load_interpolated_input", src_comp=[2, 1], dest_comp=0,
652          indices=[BASE, COMPONENT], flags=[CAN_ELIMINATE, CAN_REORDER])
653
654# src[] = { buffer_index, offset }.
655load("ssbo", 2, [ACCESS, ALIGN_MUL, ALIGN_OFFSET], [CAN_ELIMINATE])
656# src[] = { offset }.
657load("output", 1, [BASE, COMPONENT], flags=[CAN_ELIMINATE])
658# src[] = { vertex, offset }.
659load("per_vertex_output", 2, [BASE, COMPONENT], [CAN_ELIMINATE])
660# src[] = { offset }.
661load("shared", 1, [BASE, ALIGN_MUL, ALIGN_OFFSET], [CAN_ELIMINATE])
662# src[] = { offset }.
663load("push_constant", 1, [BASE, RANGE], [CAN_ELIMINATE, CAN_REORDER])
664# src[] = { offset }.
665load("constant", 1, [BASE, RANGE], [CAN_ELIMINATE, CAN_REORDER])
666# src[] = { address }.
667load("global", 1, [ACCESS, ALIGN_MUL, ALIGN_OFFSET], [CAN_ELIMINATE])
668# src[] = { address }.
669load("kernel_input", 1, [BASE, RANGE, ALIGN_MUL, ALIGN_OFFSET], [CAN_ELIMINATE, CAN_REORDER])
670# src[] = { offset }.
671load("scratch", 1, [ALIGN_MUL, ALIGN_OFFSET], [CAN_ELIMINATE])
672
673# Stores work the same way as loads, except now the first source is the value
674# to store and the second (and possibly third) source specify where to store
675# the value.  SSBO and shared memory stores also have a
676# nir_intrinsic_write_mask()
677
678def store(name, num_srcs, indices=[], flags=[]):
679    intrinsic("store_" + name, [0] + ([1] * (num_srcs - 1)), indices=indices, flags=flags)
680
681# src[] = { value, offset }.
682store("output", 2, [BASE, WRMASK, COMPONENT])
683# src[] = { value, vertex, offset }.
684store("per_vertex_output", 3, [BASE, WRMASK, COMPONENT])
685# src[] = { value, block_index, offset }
686store("ssbo", 3, [WRMASK, ACCESS, ALIGN_MUL, ALIGN_OFFSET])
687# src[] = { value, offset }.
688store("shared", 2, [BASE, WRMASK, ALIGN_MUL, ALIGN_OFFSET])
689# src[] = { value, address }.
690store("global", 2, [WRMASK, ACCESS, ALIGN_MUL, ALIGN_OFFSET])
691# src[] = { value, offset }.
692store("scratch", 2, [ALIGN_MUL, ALIGN_OFFSET, WRMASK])
693
694# IR3-specific version of most SSBO intrinsics. The only different
695# compare to the originals is that they add an extra source to hold
696# the dword-offset, which is needed by the backend code apart from
697# the byte-offset already provided by NIR in one of the sources.
698#
699# NIR lowering pass 'ir3_nir_lower_io_offset' will replace the
700# original SSBO intrinsics by these, placing the computed
701# dword-offset always in the last source.
702#
703# The float versions are not handled because those are not supported
704# by the backend.
705intrinsic("store_ssbo_ir3",  src_comp=[0, 1, 1, 1],
706          indices=[WRMASK, ACCESS, ALIGN_MUL, ALIGN_OFFSET])
707intrinsic("load_ssbo_ir3",  src_comp=[1, 1, 1], dest_comp=0,
708          indices=[ACCESS, ALIGN_MUL, ALIGN_OFFSET], flags=[CAN_ELIMINATE])
709intrinsic("ssbo_atomic_add_ir3",        src_comp=[1, 1, 1, 1],    dest_comp=1)
710intrinsic("ssbo_atomic_imin_ir3",       src_comp=[1, 1, 1, 1],    dest_comp=1)
711intrinsic("ssbo_atomic_umin_ir3",       src_comp=[1, 1, 1, 1],    dest_comp=1)
712intrinsic("ssbo_atomic_imax_ir3",       src_comp=[1, 1, 1, 1],    dest_comp=1)
713intrinsic("ssbo_atomic_umax_ir3",       src_comp=[1, 1, 1, 1],    dest_comp=1)
714intrinsic("ssbo_atomic_and_ir3",        src_comp=[1, 1, 1, 1],    dest_comp=1)
715intrinsic("ssbo_atomic_or_ir3",         src_comp=[1, 1, 1, 1],    dest_comp=1)
716intrinsic("ssbo_atomic_xor_ir3",        src_comp=[1, 1, 1, 1],    dest_comp=1)
717intrinsic("ssbo_atomic_exchange_ir3",   src_comp=[1, 1, 1, 1],    dest_comp=1)
718intrinsic("ssbo_atomic_comp_swap_ir3",  src_comp=[1, 1, 1, 1, 1], dest_comp=1)
719