1/*
2 * Copyright © Microsoft Corporation
3 *
4 * Permission is hereby granted, free of charge, to any person obtaining a
5 * copy of this software and associated documentation files (the "Software"),
6 * to deal in the Software without restriction, including without limitation
7 * the rights to use, copy, modify, merge, publish, distribute, sublicense,
8 * and/or sell copies of the Software, and to permit persons to whom the
9 * Software is furnished to do so, subject to the following conditions:
10 *
11 * The above copyright notice and this permission notice (including the next
12 * paragraph) shall be included in all copies or substantial portions of the
13 * Software.
14 *
15 * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
16 * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
17 * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT.  IN NO EVENT SHALL
18 * THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
19 * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING
20 * FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS
21 * IN THE SOFTWARE.
22 */
23
24#include "nir_to_dxil.h"
25
26#include "dxil_container.h"
27#include "dxil_dump.h"
28#include "dxil_enums.h"
29#include "dxil_function.h"
30#include "dxil_module.h"
31#include "dxil_nir.h"
32#include "dxil_signature.h"
33
34#include "nir/nir_builder.h"
35#include "util/u_debug.h"
36#include "util/u_dynarray.h"
37#include "util/u_math.h"
38
39#include "git_sha1.h"
40
41#include "vulkan/vulkan_core.h"
42
43#include <stdint.h>
44
45int debug_dxil = 0;
46
47static const struct debug_named_value
48dxil_debug_options[] = {
49   { "verbose", DXIL_DEBUG_VERBOSE, NULL },
50   { "dump_blob",  DXIL_DEBUG_DUMP_BLOB , "Write shader blobs" },
51   { "trace",  DXIL_DEBUG_TRACE , "Trace instruction conversion" },
52   { "dump_module", DXIL_DEBUG_DUMP_MODULE, "dump module tree to stderr"},
53   DEBUG_NAMED_VALUE_END
54};
55
56DEBUG_GET_ONCE_FLAGS_OPTION(debug_dxil, "DXIL_DEBUG", dxil_debug_options, 0)
57
58#define NIR_INSTR_UNSUPPORTED(instr) \
59   if (debug_dxil & DXIL_DEBUG_VERBOSE) \
60   do { \
61      fprintf(stderr, "Unsupported instruction:"); \
62      nir_print_instr(instr, stderr); \
63      fprintf(stderr, "\n"); \
64   } while (0)
65
66#define TRACE_CONVERSION(instr) \
67   if (debug_dxil & DXIL_DEBUG_TRACE) \
68      do { \
69         fprintf(stderr, "Convert '"); \
70         nir_print_instr(instr, stderr); \
71         fprintf(stderr, "'\n"); \
72      } while (0)
73
74static const nir_shader_compiler_options
75nir_options = {
76   .lower_ineg = true,
77   .lower_fneg = true,
78   .lower_ffma16 = true,
79   .lower_ffma32 = true,
80   .lower_isign = true,
81   .lower_fsign = true,
82   .lower_iabs = true,
83   .lower_fmod = true,
84   .lower_fpow = true,
85   .lower_scmp = true,
86   .lower_ldexp = true,
87   .lower_flrp16 = true,
88   .lower_flrp32 = true,
89   .lower_flrp64 = true,
90   .lower_bitfield_extract_to_shifts = true,
91   .lower_extract_word = true,
92   .lower_extract_byte = true,
93   .lower_insert_word = true,
94   .lower_insert_byte = true,
95   .lower_all_io_to_elements = true,
96   .lower_all_io_to_temps = true,
97   .lower_hadd = true,
98   .lower_uadd_sat = true,
99   .lower_iadd_sat = true,
100   .lower_uadd_carry = true,
101   .lower_mul_high = true,
102   .lower_rotate = true,
103   .lower_pack_64_2x32_split = true,
104   .lower_pack_32_2x16_split = true,
105   .lower_unpack_64_2x32_split = true,
106   .lower_unpack_32_2x16_split = true,
107   .has_fsub = true,
108   .has_isub = true,
109   .use_scoped_barrier = true,
110   .vertex_id_zero_based = true,
111   .lower_base_vertex = true,
112   .has_cs_global_id = true,
113   .has_txs = true,
114};
115
116const nir_shader_compiler_options*
117dxil_get_nir_compiler_options(void)
118{
119   return &nir_options;
120}
121
122static bool
123emit_llvm_ident(struct dxil_module *m)
124{
125   const struct dxil_mdnode *compiler = dxil_get_metadata_string(m, "Mesa version " PACKAGE_VERSION MESA_GIT_SHA1);
126   if (!compiler)
127      return false;
128
129   const struct dxil_mdnode *llvm_ident = dxil_get_metadata_node(m, &compiler, 1);
130   return llvm_ident &&
131          dxil_add_metadata_named_node(m, "llvm.ident", &llvm_ident, 1);
132}
133
134static bool
135emit_named_version(struct dxil_module *m, const char *name,
136                   int major, int minor)
137{
138   const struct dxil_mdnode *major_node = dxil_get_metadata_int32(m, major);
139   const struct dxil_mdnode *minor_node = dxil_get_metadata_int32(m, minor);
140   const struct dxil_mdnode *version_nodes[] = { major_node, minor_node };
141   const struct dxil_mdnode *version = dxil_get_metadata_node(m, version_nodes,
142                                                     ARRAY_SIZE(version_nodes));
143   return dxil_add_metadata_named_node(m, name, &version, 1);
144}
145
146static const char *
147get_shader_kind_str(enum dxil_shader_kind kind)
148{
149   switch (kind) {
150   case DXIL_PIXEL_SHADER:
151      return "ps";
152   case DXIL_VERTEX_SHADER:
153      return "vs";
154   case DXIL_GEOMETRY_SHADER:
155      return "gs";
156   case DXIL_HULL_SHADER:
157      return "hs";
158   case DXIL_DOMAIN_SHADER:
159      return "ds";
160   case DXIL_COMPUTE_SHADER:
161      return "cs";
162   default:
163      unreachable("invalid shader kind");
164   }
165}
166
167static bool
168emit_dx_shader_model(struct dxil_module *m)
169{
170   const struct dxil_mdnode *type_node = dxil_get_metadata_string(m, get_shader_kind_str(m->shader_kind));
171   const struct dxil_mdnode *major_node = dxil_get_metadata_int32(m, m->major_version);
172   const struct dxil_mdnode *minor_node = dxil_get_metadata_int32(m, m->minor_version);
173   const struct dxil_mdnode *shader_model[] = { type_node, major_node,
174                                                minor_node };
175   const struct dxil_mdnode *dx_shader_model = dxil_get_metadata_node(m, shader_model, ARRAY_SIZE(shader_model));
176
177   return dxil_add_metadata_named_node(m, "dx.shaderModel",
178                                       &dx_shader_model, 1);
179}
180
181enum {
182   DXIL_TYPED_BUFFER_ELEMENT_TYPE_TAG = 0,
183   DXIL_STRUCTURED_BUFFER_ELEMENT_STRIDE_TAG = 1
184};
185
186enum dxil_intr {
187   DXIL_INTR_LOAD_INPUT = 4,
188   DXIL_INTR_STORE_OUTPUT = 5,
189   DXIL_INTR_FABS = 6,
190   DXIL_INTR_SATURATE = 7,
191
192   DXIL_INTR_ISFINITE = 10,
193   DXIL_INTR_ISNORMAL = 11,
194
195   DXIL_INTR_FCOS = 12,
196   DXIL_INTR_FSIN = 13,
197
198   DXIL_INTR_FEXP2 = 21,
199   DXIL_INTR_FRC = 22,
200   DXIL_INTR_FLOG2 = 23,
201
202   DXIL_INTR_SQRT = 24,
203   DXIL_INTR_RSQRT = 25,
204   DXIL_INTR_ROUND_NE = 26,
205   DXIL_INTR_ROUND_NI = 27,
206   DXIL_INTR_ROUND_PI = 28,
207   DXIL_INTR_ROUND_Z = 29,
208
209   DXIL_INTR_COUNTBITS = 31,
210   DXIL_INTR_FIRSTBIT_HI = 33,
211
212   DXIL_INTR_FMAX = 35,
213   DXIL_INTR_FMIN = 36,
214   DXIL_INTR_IMAX = 37,
215   DXIL_INTR_IMIN = 38,
216   DXIL_INTR_UMAX = 39,
217   DXIL_INTR_UMIN = 40,
218
219   DXIL_INTR_FMA = 47,
220
221   DXIL_INTR_CREATE_HANDLE = 57,
222   DXIL_INTR_CBUFFER_LOAD_LEGACY = 59,
223
224   DXIL_INTR_SAMPLE = 60,
225   DXIL_INTR_SAMPLE_BIAS = 61,
226   DXIL_INTR_SAMPLE_LEVEL = 62,
227   DXIL_INTR_SAMPLE_GRAD = 63,
228   DXIL_INTR_SAMPLE_CMP = 64,
229   DXIL_INTR_SAMPLE_CMP_LVL_ZERO = 65,
230
231   DXIL_INTR_TEXTURE_LOAD = 66,
232   DXIL_INTR_TEXTURE_STORE = 67,
233
234   DXIL_INTR_BUFFER_LOAD = 68,
235   DXIL_INTR_BUFFER_STORE = 69,
236
237   DXIL_INTR_TEXTURE_SIZE = 72,
238
239   DXIL_INTR_ATOMIC_BINOP = 78,
240   DXIL_INTR_ATOMIC_CMPXCHG = 79,
241   DXIL_INTR_BARRIER = 80,
242   DXIL_INTR_TEXTURE_LOD = 81,
243
244   DXIL_INTR_DISCARD = 82,
245   DXIL_INTR_DDX_COARSE = 83,
246   DXIL_INTR_DDY_COARSE = 84,
247   DXIL_INTR_DDX_FINE = 85,
248   DXIL_INTR_DDY_FINE = 86,
249
250   DXIL_INTR_SAMPLE_INDEX = 90,
251
252   DXIL_INTR_THREAD_ID = 93,
253   DXIL_INTR_GROUP_ID = 94,
254   DXIL_INTR_THREAD_ID_IN_GROUP = 95,
255   DXIL_INTR_FLATTENED_THREAD_ID_IN_GROUP = 96,
256
257   DXIL_INTR_EMIT_STREAM = 97,
258   DXIL_INTR_CUT_STREAM = 98,
259
260   DXIL_INTR_MAKE_DOUBLE = 101,
261   DXIL_INTR_SPLIT_DOUBLE = 102,
262
263   DXIL_INTR_PRIMITIVE_ID = 108,
264
265   DXIL_INTR_LEGACY_F32TOF16 = 130,
266   DXIL_INTR_LEGACY_F16TOF32 = 131,
267
268   DXIL_INTR_ATTRIBUTE_AT_VERTEX = 137,
269};
270
271enum dxil_atomic_op {
272   DXIL_ATOMIC_ADD = 0,
273   DXIL_ATOMIC_AND = 1,
274   DXIL_ATOMIC_OR = 2,
275   DXIL_ATOMIC_XOR = 3,
276   DXIL_ATOMIC_IMIN = 4,
277   DXIL_ATOMIC_IMAX = 5,
278   DXIL_ATOMIC_UMIN = 6,
279   DXIL_ATOMIC_UMAX = 7,
280   DXIL_ATOMIC_EXCHANGE = 8,
281};
282
283typedef struct {
284   unsigned id;
285   unsigned binding;
286   unsigned size;
287   unsigned space;
288} resource_array_layout;
289
290static void
291fill_resource_metadata(struct dxil_module *m, const struct dxil_mdnode **fields,
292                       const struct dxil_type *struct_type,
293                       const char *name, const resource_array_layout *layout)
294{
295   const struct dxil_type *pointer_type = dxil_module_get_pointer_type(m, struct_type);
296   const struct dxil_value *pointer_undef = dxil_module_get_undef(m, pointer_type);
297
298   fields[0] = dxil_get_metadata_int32(m, layout->id); // resource ID
299   fields[1] = dxil_get_metadata_value(m, pointer_type, pointer_undef); // global constant symbol
300   fields[2] = dxil_get_metadata_string(m, name ? name : ""); // name
301   fields[3] = dxil_get_metadata_int32(m, layout->space); // space ID
302   fields[4] = dxil_get_metadata_int32(m, layout->binding); // lower bound
303   fields[5] = dxil_get_metadata_int32(m, layout->size); // range size
304}
305
306static const struct dxil_mdnode *
307emit_srv_metadata(struct dxil_module *m, const struct dxil_type *elem_type,
308                  const char *name, const resource_array_layout *layout,
309                  enum dxil_component_type comp_type,
310                  enum dxil_resource_kind res_kind)
311{
312   const struct dxil_mdnode *fields[9];
313
314   const struct dxil_mdnode *metadata_tag_nodes[2];
315
316   fill_resource_metadata(m, fields, elem_type, name, layout);
317   fields[6] = dxil_get_metadata_int32(m, res_kind); // resource shape
318   fields[7] = dxil_get_metadata_int1(m, 0); // sample count
319   if (res_kind != DXIL_RESOURCE_KIND_RAW_BUFFER &&
320       res_kind != DXIL_RESOURCE_KIND_STRUCTURED_BUFFER) {
321      metadata_tag_nodes[0] = dxil_get_metadata_int32(m, DXIL_TYPED_BUFFER_ELEMENT_TYPE_TAG);
322      metadata_tag_nodes[1] = dxil_get_metadata_int32(m, comp_type);
323      fields[8] = dxil_get_metadata_node(m, metadata_tag_nodes, ARRAY_SIZE(metadata_tag_nodes)); // metadata
324   } else if (res_kind == DXIL_RESOURCE_KIND_RAW_BUFFER)
325      fields[8] = NULL;
326   else
327      unreachable("Structured buffers not supported yet");
328
329   return dxil_get_metadata_node(m, fields, ARRAY_SIZE(fields));
330}
331
332static const struct dxil_mdnode *
333emit_uav_metadata(struct dxil_module *m, const struct dxil_type *struct_type,
334                  const char *name, const resource_array_layout *layout,
335                  enum dxil_component_type comp_type,
336                  enum dxil_resource_kind res_kind)
337{
338   const struct dxil_mdnode *fields[11];
339
340   const struct dxil_mdnode *metadata_tag_nodes[2];
341
342   fill_resource_metadata(m, fields, struct_type, name, layout);
343   fields[6] = dxil_get_metadata_int32(m, res_kind); // resource shape
344   fields[7] = dxil_get_metadata_int1(m, false); // globally-coherent
345   fields[8] = dxil_get_metadata_int1(m, false); // has counter
346   fields[9] = dxil_get_metadata_int1(m, false); // is ROV
347   if (res_kind != DXIL_RESOURCE_KIND_RAW_BUFFER &&
348       res_kind != DXIL_RESOURCE_KIND_STRUCTURED_BUFFER) {
349      metadata_tag_nodes[0] = dxil_get_metadata_int32(m, DXIL_TYPED_BUFFER_ELEMENT_TYPE_TAG);
350      metadata_tag_nodes[1] = dxil_get_metadata_int32(m, comp_type);
351      fields[10] = dxil_get_metadata_node(m, metadata_tag_nodes, ARRAY_SIZE(metadata_tag_nodes)); // metadata
352   } else if (res_kind == DXIL_RESOURCE_KIND_RAW_BUFFER)
353      fields[10] = NULL;
354   else
355      unreachable("Structured buffers not supported yet");
356
357   return dxil_get_metadata_node(m, fields, ARRAY_SIZE(fields));
358}
359
360static const struct dxil_mdnode *
361emit_cbv_metadata(struct dxil_module *m, const struct dxil_type *struct_type,
362                  const char *name, const resource_array_layout *layout,
363                  unsigned size)
364{
365   const struct dxil_mdnode *fields[8];
366
367   fill_resource_metadata(m, fields, struct_type, name, layout);
368   fields[6] = dxil_get_metadata_int32(m, size); // constant buffer size
369   fields[7] = NULL; // metadata
370
371   return dxil_get_metadata_node(m, fields, ARRAY_SIZE(fields));
372}
373
374static const struct dxil_mdnode *
375emit_sampler_metadata(struct dxil_module *m, const struct dxil_type *struct_type,
376                      nir_variable *var, const resource_array_layout *layout)
377{
378   const struct dxil_mdnode *fields[8];
379   const struct glsl_type *type = glsl_without_array(var->type);
380
381   fill_resource_metadata(m, fields, struct_type, var->name, layout);
382   fields[6] = dxil_get_metadata_int32(m, DXIL_SAMPLER_KIND_DEFAULT); // sampler kind
383   enum dxil_sampler_kind sampler_kind = glsl_sampler_type_is_shadow(type) ?
384          DXIL_SAMPLER_KIND_COMPARISON : DXIL_SAMPLER_KIND_DEFAULT;
385   fields[6] = dxil_get_metadata_int32(m, sampler_kind); // sampler kind
386   fields[7] = NULL; // metadata
387
388   return dxil_get_metadata_node(m, fields, ARRAY_SIZE(fields));
389}
390
391
392#define MAX_SRVS 128
393#define MAX_UAVS 64
394#define MAX_CBVS 64 // ??
395#define MAX_SAMPLERS 64 // ??
396
397struct dxil_def {
398   const struct dxil_value *chans[NIR_MAX_VEC_COMPONENTS];
399};
400
401struct ntd_context {
402   void *ralloc_ctx;
403   const struct nir_to_dxil_options *opts;
404   struct nir_shader *shader;
405
406   struct dxil_module mod;
407
408   struct util_dynarray srv_metadata_nodes;
409   const struct dxil_value *srv_handles[MAX_SRVS];
410
411   struct util_dynarray uav_metadata_nodes;
412   const struct dxil_value *uav_handles[MAX_UAVS];
413
414   struct util_dynarray cbv_metadata_nodes;
415   const struct dxil_value *cbv_handles[MAX_CBVS];
416
417   struct util_dynarray sampler_metadata_nodes;
418   const struct dxil_value *sampler_handles[MAX_SAMPLERS];
419
420   struct util_dynarray resources;
421
422   const struct dxil_mdnode *shader_property_nodes[6];
423   size_t num_shader_property_nodes;
424
425   struct dxil_def *defs;
426   unsigned num_defs;
427   struct hash_table *phis;
428
429   const struct dxil_value *sharedvars;
430   const struct dxil_value *scratchvars;
431   struct hash_table *consts;
432
433   nir_variable *ps_front_face;
434   nir_variable *system_value[SYSTEM_VALUE_MAX];
435};
436
437static const char*
438unary_func_name(enum dxil_intr intr)
439{
440   switch (intr) {
441   case DXIL_INTR_COUNTBITS:
442   case DXIL_INTR_FIRSTBIT_HI:
443      return "dx.op.unaryBits";
444   case DXIL_INTR_ISFINITE:
445   case DXIL_INTR_ISNORMAL:
446      return "dx.op.isSpecialFloat";
447   default:
448      return "dx.op.unary";
449   }
450}
451
452static const struct dxil_value *
453emit_unary_call(struct ntd_context *ctx, enum overload_type overload,
454                enum dxil_intr intr,
455                const struct dxil_value *op0)
456{
457   const struct dxil_func *func = dxil_get_function(&ctx->mod,
458                                                    unary_func_name(intr),
459                                                    overload);
460   if (!func)
461      return NULL;
462
463   const struct dxil_value *opcode = dxil_module_get_int32_const(&ctx->mod, intr);
464   if (!opcode)
465      return NULL;
466
467   const struct dxil_value *args[] = {
468     opcode,
469     op0
470   };
471
472   return dxil_emit_call(&ctx->mod, func, args, ARRAY_SIZE(args));
473}
474
475static const struct dxil_value *
476emit_binary_call(struct ntd_context *ctx, enum overload_type overload,
477                 enum dxil_intr intr,
478                 const struct dxil_value *op0, const struct dxil_value *op1)
479{
480   const struct dxil_func *func = dxil_get_function(&ctx->mod, "dx.op.binary", overload);
481   if (!func)
482      return NULL;
483
484   const struct dxil_value *opcode = dxil_module_get_int32_const(&ctx->mod, intr);
485   if (!opcode)
486      return NULL;
487
488   const struct dxil_value *args[] = {
489     opcode,
490     op0,
491     op1
492   };
493
494   return dxil_emit_call(&ctx->mod, func, args, ARRAY_SIZE(args));
495}
496
497static const struct dxil_value *
498emit_tertiary_call(struct ntd_context *ctx, enum overload_type overload,
499                   enum dxil_intr intr,
500                   const struct dxil_value *op0,
501                   const struct dxil_value *op1,
502                   const struct dxil_value *op2)
503{
504   const struct dxil_func *func = dxil_get_function(&ctx->mod, "dx.op.tertiary", overload);
505   if (!func)
506      return NULL;
507
508   const struct dxil_value *opcode = dxil_module_get_int32_const(&ctx->mod, intr);
509   if (!opcode)
510      return NULL;
511
512   const struct dxil_value *args[] = {
513     opcode,
514     op0,
515     op1,
516     op2
517   };
518
519   return dxil_emit_call(&ctx->mod, func, args, ARRAY_SIZE(args));
520}
521
522static const struct dxil_value *
523emit_threadid_call(struct ntd_context *ctx, const struct dxil_value *comp)
524{
525   const struct dxil_func *func = dxil_get_function(&ctx->mod, "dx.op.threadId", DXIL_I32);
526   if (!func)
527      return NULL;
528
529   const struct dxil_value *opcode = dxil_module_get_int32_const(&ctx->mod,
530       DXIL_INTR_THREAD_ID);
531   if (!opcode)
532      return NULL;
533
534   const struct dxil_value *args[] = {
535     opcode,
536     comp
537   };
538
539   return dxil_emit_call(&ctx->mod, func, args, ARRAY_SIZE(args));
540}
541
542static const struct dxil_value *
543emit_threadidingroup_call(struct ntd_context *ctx,
544                          const struct dxil_value *comp)
545{
546   const struct dxil_func *func = dxil_get_function(&ctx->mod, "dx.op.threadIdInGroup", DXIL_I32);
547
548   if (!func)
549      return NULL;
550
551   const struct dxil_value *opcode = dxil_module_get_int32_const(&ctx->mod,
552       DXIL_INTR_THREAD_ID_IN_GROUP);
553   if (!opcode)
554      return NULL;
555
556   const struct dxil_value *args[] = {
557     opcode,
558     comp
559   };
560
561   return dxil_emit_call(&ctx->mod, func, args, ARRAY_SIZE(args));
562}
563
564static const struct dxil_value *
565emit_flattenedthreadidingroup_call(struct ntd_context *ctx)
566{
567   const struct dxil_func *func = dxil_get_function(&ctx->mod, "dx.op.flattenedThreadIdInGroup", DXIL_I32);
568
569   if (!func)
570      return NULL;
571
572   const struct dxil_value *opcode = dxil_module_get_int32_const(&ctx->mod,
573      DXIL_INTR_FLATTENED_THREAD_ID_IN_GROUP);
574   if (!opcode)
575      return NULL;
576
577   const struct dxil_value *args[] = {
578     opcode
579   };
580
581   return dxil_emit_call(&ctx->mod, func, args, ARRAY_SIZE(args));
582}
583
584static const struct dxil_value *
585emit_groupid_call(struct ntd_context *ctx, const struct dxil_value *comp)
586{
587   const struct dxil_func *func = dxil_get_function(&ctx->mod, "dx.op.groupId", DXIL_I32);
588
589   if (!func)
590      return NULL;
591
592   const struct dxil_value *opcode = dxil_module_get_int32_const(&ctx->mod,
593       DXIL_INTR_GROUP_ID);
594   if (!opcode)
595      return NULL;
596
597   const struct dxil_value *args[] = {
598     opcode,
599     comp
600   };
601
602   return dxil_emit_call(&ctx->mod, func, args, ARRAY_SIZE(args));
603}
604
605static const struct dxil_value *
606emit_bufferload_call(struct ntd_context *ctx,
607                     const struct dxil_value *handle,
608                     const struct dxil_value *coord[2],
609                     enum overload_type overload)
610{
611   const struct dxil_func *func = dxil_get_function(&ctx->mod, "dx.op.bufferLoad", overload);
612   if (!func)
613      return NULL;
614
615   const struct dxil_value *opcode = dxil_module_get_int32_const(&ctx->mod,
616      DXIL_INTR_BUFFER_LOAD);
617   const struct dxil_value *args[] = { opcode, handle, coord[0], coord[1] };
618
619   return dxil_emit_call(&ctx->mod, func, args, ARRAY_SIZE(args));
620}
621
622static bool
623emit_bufferstore_call(struct ntd_context *ctx,
624                      const struct dxil_value *handle,
625                      const struct dxil_value *coord[2],
626                      const struct dxil_value *value[4],
627                      const struct dxil_value *write_mask,
628                      enum overload_type overload)
629{
630   const struct dxil_func *func = dxil_get_function(&ctx->mod, "dx.op.bufferStore", overload);
631
632   if (!func)
633      return false;
634
635   const struct dxil_value *opcode = dxil_module_get_int32_const(&ctx->mod,
636      DXIL_INTR_BUFFER_STORE);
637   const struct dxil_value *args[] = {
638      opcode, handle, coord[0], coord[1],
639      value[0], value[1], value[2], value[3],
640      write_mask
641   };
642
643   return dxil_emit_call_void(&ctx->mod, func,
644                              args, ARRAY_SIZE(args));
645}
646
647static const struct dxil_value *
648emit_textureload_call(struct ntd_context *ctx,
649                      const struct dxil_value *handle,
650                      const struct dxil_value *coord[3],
651                      enum overload_type overload)
652{
653   const struct dxil_func *func = dxil_get_function(&ctx->mod, "dx.op.textureLoad", overload);
654   if (!func)
655      return NULL;
656   const struct dxil_type *int_type = dxil_module_get_int_type(&ctx->mod, 32);
657   const struct dxil_value *int_undef = dxil_module_get_undef(&ctx->mod, int_type);
658
659   const struct dxil_value *opcode = dxil_module_get_int32_const(&ctx->mod,
660      DXIL_INTR_TEXTURE_LOAD);
661   const struct dxil_value *args[] = { opcode, handle,
662      /*lod_or_sample*/ int_undef,
663      coord[0], coord[1], coord[2],
664      /* offsets */ int_undef, int_undef, int_undef};
665
666   return dxil_emit_call(&ctx->mod, func, args, ARRAY_SIZE(args));
667}
668
669static bool
670emit_texturestore_call(struct ntd_context *ctx,
671                       const struct dxil_value *handle,
672                       const struct dxil_value *coord[3],
673                       const struct dxil_value *value[4],
674                       const struct dxil_value *write_mask,
675                       enum overload_type overload)
676{
677   const struct dxil_func *func = dxil_get_function(&ctx->mod, "dx.op.textureStore", overload);
678
679   if (!func)
680      return false;
681
682   const struct dxil_value *opcode = dxil_module_get_int32_const(&ctx->mod,
683      DXIL_INTR_TEXTURE_STORE);
684   const struct dxil_value *args[] = {
685      opcode, handle, coord[0], coord[1], coord[2],
686      value[0], value[1], value[2], value[3],
687      write_mask
688   };
689
690   return dxil_emit_call_void(&ctx->mod, func,
691                              args, ARRAY_SIZE(args));
692}
693
694static const struct dxil_value *
695emit_atomic_binop(struct ntd_context *ctx,
696                  const struct dxil_value *handle,
697                  enum dxil_atomic_op atomic_op,
698                  const struct dxil_value *coord[3],
699                  const struct dxil_value *value)
700{
701   const struct dxil_func *func = dxil_get_function(&ctx->mod, "dx.op.atomicBinOp", DXIL_I32);
702
703   if (!func)
704      return false;
705
706   const struct dxil_value *opcode =
707      dxil_module_get_int32_const(&ctx->mod, DXIL_INTR_ATOMIC_BINOP);
708   const struct dxil_value *atomic_op_value =
709      dxil_module_get_int32_const(&ctx->mod, atomic_op);
710   const struct dxil_value *args[] = {
711      opcode, handle, atomic_op_value,
712      coord[0], coord[1], coord[2], value
713   };
714
715   return dxil_emit_call(&ctx->mod, func, args, ARRAY_SIZE(args));
716}
717
718static const struct dxil_value *
719emit_atomic_cmpxchg(struct ntd_context *ctx,
720                    const struct dxil_value *handle,
721                    const struct dxil_value *coord[3],
722                    const struct dxil_value *cmpval,
723                    const struct dxil_value *newval)
724{
725   const struct dxil_func *func =
726      dxil_get_function(&ctx->mod, "dx.op.atomicCompareExchange", DXIL_I32);
727
728   if (!func)
729      return false;
730
731   const struct dxil_value *opcode =
732      dxil_module_get_int32_const(&ctx->mod, DXIL_INTR_ATOMIC_CMPXCHG);
733   const struct dxil_value *args[] = {
734      opcode, handle, coord[0], coord[1], coord[2], cmpval, newval
735   };
736
737   return dxil_emit_call(&ctx->mod, func, args, ARRAY_SIZE(args));
738}
739
740static const struct dxil_value *
741emit_createhandle_call(struct ntd_context *ctx,
742                       enum dxil_resource_class resource_class,
743                       unsigned resource_range_id,
744                       const struct dxil_value *resource_range_index,
745                       bool non_uniform_resource_index)
746{
747   const struct dxil_value *opcode = dxil_module_get_int32_const(&ctx->mod, DXIL_INTR_CREATE_HANDLE);
748   const struct dxil_value *resource_class_value = dxil_module_get_int8_const(&ctx->mod, resource_class);
749   const struct dxil_value *resource_range_id_value = dxil_module_get_int32_const(&ctx->mod, resource_range_id);
750   const struct dxil_value *non_uniform_resource_index_value = dxil_module_get_int1_const(&ctx->mod, non_uniform_resource_index);
751   if (!opcode || !resource_class_value || !resource_range_id_value ||
752       !non_uniform_resource_index_value)
753      return NULL;
754
755   const struct dxil_value *args[] = {
756      opcode,
757      resource_class_value,
758      resource_range_id_value,
759      resource_range_index,
760      non_uniform_resource_index_value
761   };
762
763   const struct dxil_func *func =
764         dxil_get_function(&ctx->mod, "dx.op.createHandle", DXIL_NONE);
765
766   if (!func)
767         return NULL;
768
769   return dxil_emit_call(&ctx->mod, func, args, ARRAY_SIZE(args));
770}
771
772static const struct dxil_value *
773emit_createhandle_call_const_index(struct ntd_context *ctx,
774                                   enum dxil_resource_class resource_class,
775                                   unsigned resource_range_id,
776                                   unsigned resource_range_index,
777                                   bool non_uniform_resource_index)
778{
779
780   const struct dxil_value *resource_range_index_value = dxil_module_get_int32_const(&ctx->mod, resource_range_index);
781   if (!resource_range_index_value)
782      return NULL;
783
784   return emit_createhandle_call(ctx, resource_class, resource_range_id,
785                                 resource_range_index_value,
786                                 non_uniform_resource_index);
787}
788
789static void
790add_resource(struct ntd_context *ctx, enum dxil_resource_type type,
791             const resource_array_layout *layout)
792{
793   struct dxil_resource *resource = util_dynarray_grow(&ctx->resources, struct dxil_resource, 1);
794   resource->resource_type = type;
795   resource->space = layout->space;
796   resource->lower_bound = layout->binding;
797   if (layout->size == 0 || (uint64_t)layout->size + layout->binding >= UINT_MAX)
798      resource->upper_bound = UINT_MAX;
799   else
800      resource->upper_bound = layout->binding + layout->size - 1;
801}
802
803static unsigned
804get_resource_id(struct ntd_context *ctx, enum dxil_resource_class class,
805                unsigned space, unsigned binding)
806{
807   unsigned offset = 0;
808   unsigned count = 0;
809
810   unsigned num_srvs = util_dynarray_num_elements(&ctx->srv_metadata_nodes, const struct dxil_mdnode *);
811   unsigned num_uavs = util_dynarray_num_elements(&ctx->uav_metadata_nodes, const struct dxil_mdnode *);
812   unsigned num_cbvs = util_dynarray_num_elements(&ctx->cbv_metadata_nodes, const struct dxil_mdnode *);
813   unsigned num_samplers = util_dynarray_num_elements(&ctx->sampler_metadata_nodes, const struct dxil_mdnode *);
814
815   switch (class) {
816   case DXIL_RESOURCE_CLASS_UAV:
817      offset = num_srvs + num_samplers + num_cbvs;
818      count = num_uavs;
819      break;
820   case DXIL_RESOURCE_CLASS_SRV:
821      offset = num_samplers + num_cbvs;
822      count = num_srvs;
823      break;
824   case DXIL_RESOURCE_CLASS_SAMPLER:
825      offset = num_cbvs;
826      count = num_samplers;
827      break;
828   case DXIL_RESOURCE_CLASS_CBV:
829      offset = 0;
830      count = num_cbvs;
831      break;
832   }
833
834   assert(offset + count <= util_dynarray_num_elements(&ctx->resources, struct dxil_resource));
835   for (unsigned i = offset; i < offset + count; ++i) {
836      const struct dxil_resource *resource = util_dynarray_element(&ctx->resources, struct dxil_resource, i);
837      if (resource->space == space &&
838          resource->lower_bound <= binding &&
839          resource->upper_bound >= binding) {
840         return i - offset;
841      }
842   }
843
844   unreachable("Resource access for undeclared range");
845   return 0;
846}
847
848static bool
849emit_srv(struct ntd_context *ctx, nir_variable *var, unsigned count)
850{
851   unsigned id = util_dynarray_num_elements(&ctx->srv_metadata_nodes, const struct dxil_mdnode *);
852   unsigned binding = var->data.binding;
853   resource_array_layout layout = {id, binding, count, var->data.descriptor_set};
854
855   enum dxil_component_type comp_type;
856   enum dxil_resource_kind res_kind;
857   enum dxil_resource_type res_type;
858   if (var->data.mode == nir_var_mem_ssbo) {
859      comp_type = DXIL_COMP_TYPE_INVALID;
860      res_kind = DXIL_RESOURCE_KIND_RAW_BUFFER;
861      res_type = DXIL_RES_SRV_RAW;
862   } else {
863      comp_type = dxil_get_comp_type(var->type);
864      res_kind = dxil_get_resource_kind(var->type);
865      res_type = DXIL_RES_SRV_TYPED;
866   }
867   const struct dxil_type *res_type_as_type = dxil_module_get_res_type(&ctx->mod, res_kind, comp_type, false /* readwrite */);
868   const struct dxil_mdnode *srv_meta = emit_srv_metadata(&ctx->mod, res_type_as_type, var->name,
869                                                          &layout, comp_type, res_kind);
870
871   if (!srv_meta)
872      return false;
873
874   util_dynarray_append(&ctx->srv_metadata_nodes, const struct dxil_mdnode *, srv_meta);
875   add_resource(ctx, res_type, &layout);
876   if (res_type == DXIL_RES_SRV_RAW)
877      ctx->mod.raw_and_structured_buffers = true;
878
879   if (!ctx->opts->vulkan_environment) {
880      for (unsigned i = 0; i < count; ++i) {
881         const struct dxil_value *handle =
882            emit_createhandle_call_const_index(ctx, DXIL_RESOURCE_CLASS_SRV,
883                                               id, binding + i, false);
884         if (!handle)
885            return false;
886
887         int idx = var->data.binding + i;
888         ctx->srv_handles[idx] = handle;
889      }
890   }
891
892   return true;
893}
894
895static bool
896emit_globals(struct ntd_context *ctx, unsigned size)
897{
898   nir_foreach_variable_with_modes(var, ctx->shader, nir_var_mem_ssbo)
899      size++;
900
901   if (!size)
902      return true;
903
904   const struct dxil_type *struct_type = dxil_module_get_res_type(&ctx->mod,
905      DXIL_RESOURCE_KIND_RAW_BUFFER, DXIL_COMP_TYPE_INVALID, true /* readwrite */);
906   if (!struct_type)
907      return false;
908
909   const struct dxil_type *array_type =
910      dxil_module_get_array_type(&ctx->mod, struct_type, size);
911   if (!array_type)
912      return false;
913
914   resource_array_layout layout = {0, 0, size, 0};
915   const struct dxil_mdnode *uav_meta =
916      emit_uav_metadata(&ctx->mod, array_type,
917                                   "globals", &layout,
918                                   DXIL_COMP_TYPE_INVALID,
919                                   DXIL_RESOURCE_KIND_RAW_BUFFER);
920   if (!uav_meta)
921      return false;
922
923   util_dynarray_append(&ctx->uav_metadata_nodes, const struct dxil_mdnode *, uav_meta);
924   if (util_dynarray_num_elements(&ctx->uav_metadata_nodes, const struct dxil_mdnode *) > 8)
925      ctx->mod.feats.use_64uavs = 1;
926   /* Handles to UAVs used for kernel globals are created on-demand */
927   add_resource(ctx, DXIL_RES_UAV_RAW, &layout);
928   ctx->mod.raw_and_structured_buffers = true;
929   return true;
930}
931
932static bool
933emit_uav(struct ntd_context *ctx, unsigned binding, unsigned space, unsigned count,
934         enum dxil_component_type comp_type, enum dxil_resource_kind res_kind, const char *name)
935{
936   unsigned id = util_dynarray_num_elements(&ctx->uav_metadata_nodes, const struct dxil_mdnode *);
937   resource_array_layout layout = { id, binding, count, space };
938
939   const struct dxil_type *res_type = dxil_module_get_res_type(&ctx->mod, res_kind, comp_type, true /* readwrite */);
940   const struct dxil_mdnode *uav_meta = emit_uav_metadata(&ctx->mod, res_type, name,
941                                                          &layout, comp_type, res_kind);
942
943   if (!uav_meta)
944      return false;
945
946   util_dynarray_append(&ctx->uav_metadata_nodes, const struct dxil_mdnode *, uav_meta);
947   if (util_dynarray_num_elements(&ctx->uav_metadata_nodes, const struct dxil_mdnode *) > 8)
948      ctx->mod.feats.use_64uavs = 1;
949
950   add_resource(ctx, res_kind == DXIL_RESOURCE_KIND_RAW_BUFFER ? DXIL_RES_UAV_RAW : DXIL_RES_UAV_TYPED, &layout);
951   if (res_kind == DXIL_RESOURCE_KIND_RAW_BUFFER)
952      ctx->mod.raw_and_structured_buffers = true;
953
954   if (!ctx->opts->vulkan_environment) {
955      for (unsigned i = 0; i < count; ++i) {
956         const struct dxil_value *handle = emit_createhandle_call_const_index(ctx, DXIL_RESOURCE_CLASS_UAV,
957                                                                              id, binding + i, false);
958         if (!handle)
959            return false;
960
961         ctx->uav_handles[binding + i] = handle;
962      }
963   }
964
965   return true;
966}
967
968static bool
969emit_uav_var(struct ntd_context *ctx, nir_variable *var, unsigned count)
970{
971   unsigned binding = var->data.binding;
972   unsigned space = var->data.descriptor_set;
973   enum dxil_component_type comp_type = dxil_get_comp_type(var->type);
974   enum dxil_resource_kind res_kind = dxil_get_resource_kind(var->type);
975   const char *name = var->name;
976
977   return emit_uav(ctx, binding, space, count, comp_type, res_kind, name);
978}
979
980static unsigned get_dword_size(const struct glsl_type *type)
981{
982   if (glsl_type_is_array(type)) {
983      type = glsl_without_array(type);
984   }
985   assert(glsl_type_is_struct(type) || glsl_type_is_interface(type));
986   return glsl_get_explicit_size(type, false);
987}
988
989static void
990var_fill_const_array_with_vector_or_scalar(struct ntd_context *ctx,
991                                           const struct nir_constant *c,
992                                           const struct glsl_type *type,
993                                           void *const_vals,
994                                           unsigned int offset)
995{
996   assert(glsl_type_is_vector_or_scalar(type));
997   unsigned int components = glsl_get_vector_elements(type);
998   unsigned bit_size = glsl_get_bit_size(type);
999   unsigned int increment = bit_size / 8;
1000
1001   for (unsigned int comp = 0; comp < components; comp++) {
1002      uint8_t *dst = (uint8_t *)const_vals + offset;
1003
1004      switch (bit_size) {
1005      case 64:
1006         memcpy(dst, &c->values[comp].u64, sizeof(c->values[0].u64));
1007         break;
1008      case 32:
1009         memcpy(dst, &c->values[comp].u32, sizeof(c->values[0].u32));
1010         break;
1011      case 16:
1012         memcpy(dst, &c->values[comp].u16, sizeof(c->values[0].u16));
1013         break;
1014      case 8:
1015         assert(glsl_base_type_is_integer(glsl_get_base_type(type)));
1016         memcpy(dst, &c->values[comp].u8, sizeof(c->values[0].u8));
1017         break;
1018      default:
1019         unreachable("unexpeted bit-size");
1020      }
1021
1022      offset += increment;
1023   }
1024}
1025
1026static void
1027var_fill_const_array(struct ntd_context *ctx, const struct nir_constant *c,
1028                     const struct glsl_type *type, void *const_vals,
1029                     unsigned int offset)
1030{
1031   assert(!glsl_type_is_interface(type));
1032
1033   if (glsl_type_is_vector_or_scalar(type)) {
1034      var_fill_const_array_with_vector_or_scalar(ctx, c, type,
1035                                                 const_vals,
1036                                                 offset);
1037   } else if (glsl_type_is_array(type)) {
1038      assert(!glsl_type_is_unsized_array(type));
1039      const struct glsl_type *without = glsl_without_array(type);
1040      unsigned stride = glsl_get_explicit_stride(without);
1041
1042      for (unsigned elt = 0; elt < glsl_get_length(type); elt++) {
1043         var_fill_const_array(ctx, c->elements[elt], without,
1044                              const_vals, offset + (elt * stride));
1045         offset += glsl_get_cl_size(without);
1046      }
1047   } else if (glsl_type_is_struct(type)) {
1048      for (unsigned int elt = 0; elt < glsl_get_length(type); elt++) {
1049         const struct glsl_type *elt_type = glsl_get_struct_field(type, elt);
1050         unsigned field_offset = glsl_get_struct_field_offset(type, elt);
1051
1052         var_fill_const_array(ctx, c->elements[elt],
1053                              elt_type, const_vals,
1054                              offset + field_offset);
1055      }
1056   } else
1057      unreachable("unknown GLSL type in var_fill_const_array");
1058}
1059
1060static bool
1061emit_global_consts(struct ntd_context *ctx)
1062{
1063   nir_foreach_variable_with_modes(var, ctx->shader, nir_var_shader_temp) {
1064      assert(var->constant_initializer);
1065
1066      unsigned int num_members = DIV_ROUND_UP(glsl_get_cl_size(var->type), 4);
1067      uint32_t *const_ints = ralloc_array(ctx->ralloc_ctx, uint32_t, num_members);
1068      var_fill_const_array(ctx, var->constant_initializer, var->type,
1069                                 const_ints, 0);
1070      const struct dxil_value **const_vals =
1071         ralloc_array(ctx->ralloc_ctx, const struct dxil_value *, num_members);
1072      if (!const_vals)
1073         return false;
1074      for (int i = 0; i < num_members; i++)
1075         const_vals[i] = dxil_module_get_int32_const(&ctx->mod, const_ints[i]);
1076
1077      const struct dxil_type *elt_type = dxil_module_get_int_type(&ctx->mod, 32);
1078      if (!elt_type)
1079         return false;
1080      const struct dxil_type *type =
1081         dxil_module_get_array_type(&ctx->mod, elt_type, num_members);
1082      if (!type)
1083         return false;
1084      const struct dxil_value *agg_vals =
1085         dxil_module_get_array_const(&ctx->mod, type, const_vals);
1086      if (!agg_vals)
1087         return false;
1088
1089      const struct dxil_value *gvar = dxil_add_global_ptr_var(&ctx->mod, var->name, type,
1090                                                              DXIL_AS_DEFAULT, 4,
1091                                                              agg_vals);
1092      if (!gvar)
1093         return false;
1094
1095      if (!_mesa_hash_table_insert(ctx->consts, var, (void *)gvar))
1096         return false;
1097   }
1098
1099   return true;
1100}
1101
1102static bool
1103emit_cbv(struct ntd_context *ctx, unsigned binding, unsigned space,
1104         unsigned size, unsigned count, char *name)
1105{
1106   unsigned idx = util_dynarray_num_elements(&ctx->cbv_metadata_nodes, const struct dxil_mdnode *);
1107
1108   const struct dxil_type *float32 = dxil_module_get_float_type(&ctx->mod, 32);
1109   const struct dxil_type *array_type = dxil_module_get_array_type(&ctx->mod, float32, size);
1110   const struct dxil_type *buffer_type = dxil_module_get_struct_type(&ctx->mod, name,
1111                                                                     &array_type, 1);
1112   const struct dxil_type *final_type = count != 1 ? dxil_module_get_array_type(&ctx->mod, buffer_type, count) : buffer_type;
1113   resource_array_layout layout = {idx, binding, count, space};
1114   const struct dxil_mdnode *cbv_meta = emit_cbv_metadata(&ctx->mod, final_type,
1115                                                          name, &layout, 4 * size);
1116
1117   if (!cbv_meta)
1118      return false;
1119
1120   util_dynarray_append(&ctx->cbv_metadata_nodes, const struct dxil_mdnode *, cbv_meta);
1121   add_resource(ctx, DXIL_RES_CBV, &layout);
1122
1123   if (!ctx->opts->vulkan_environment) {
1124      for (unsigned i = 0; i < count; ++i) {
1125         const struct dxil_value *handle = emit_createhandle_call_const_index(ctx, DXIL_RESOURCE_CLASS_CBV,
1126                                                                              idx, binding + i, false);
1127         if (!handle)
1128            return false;
1129
1130         assert(!ctx->cbv_handles[binding + i]);
1131         ctx->cbv_handles[binding + i] = handle;
1132      }
1133   }
1134
1135   return true;
1136}
1137
1138static bool
1139emit_ubo_var(struct ntd_context *ctx, nir_variable *var)
1140{
1141   unsigned count = 1;
1142   if (glsl_type_is_array(var->type))
1143      count = glsl_get_length(var->type);
1144   return emit_cbv(ctx, var->data.binding, var->data.descriptor_set, get_dword_size(var->type), count, var->name);
1145}
1146
1147static bool
1148emit_sampler(struct ntd_context *ctx, nir_variable *var, unsigned count)
1149{
1150   unsigned id = util_dynarray_num_elements(&ctx->sampler_metadata_nodes, const struct dxil_mdnode *);
1151   unsigned binding = var->data.binding;
1152   resource_array_layout layout = {id, binding, count, var->data.descriptor_set};
1153   const struct dxil_type *int32_type = dxil_module_get_int_type(&ctx->mod, 32);
1154   const struct dxil_type *sampler_type = dxil_module_get_struct_type(&ctx->mod, "struct.SamplerState", &int32_type, 1);
1155   const struct dxil_mdnode *sampler_meta = emit_sampler_metadata(&ctx->mod, sampler_type, var, &layout);
1156
1157   if (!sampler_meta)
1158      return false;
1159
1160   util_dynarray_append(&ctx->sampler_metadata_nodes, const struct dxil_mdnode *, sampler_meta);
1161   add_resource(ctx, DXIL_RES_SAMPLER, &layout);
1162
1163   if (!ctx->opts->vulkan_environment) {
1164      for (unsigned i = 0; i < count; ++i) {
1165         const struct dxil_value *handle =
1166            emit_createhandle_call_const_index(ctx, DXIL_RESOURCE_CLASS_SAMPLER,
1167                                               id, binding + i, false);
1168         if (!handle)
1169            return false;
1170
1171         unsigned idx = var->data.binding + i;
1172         ctx->sampler_handles[idx] = handle;
1173      }
1174   }
1175
1176   return true;
1177}
1178
1179static const struct dxil_mdnode *
1180emit_gs_state(struct ntd_context *ctx)
1181{
1182   const struct dxil_mdnode *gs_state_nodes[5];
1183   const nir_shader *s = ctx->shader;
1184
1185   gs_state_nodes[0] = dxil_get_metadata_int32(&ctx->mod, dxil_get_input_primitive(s->info.gs.input_primitive));
1186   gs_state_nodes[1] = dxil_get_metadata_int32(&ctx->mod, s->info.gs.vertices_out);
1187   gs_state_nodes[2] = dxil_get_metadata_int32(&ctx->mod, s->info.gs.active_stream_mask);
1188   gs_state_nodes[3] = dxil_get_metadata_int32(&ctx->mod, dxil_get_primitive_topology(s->info.gs.output_primitive));
1189   gs_state_nodes[4] = dxil_get_metadata_int32(&ctx->mod, s->info.gs.invocations);
1190
1191   for (unsigned i = 0; i < ARRAY_SIZE(gs_state_nodes); ++i) {
1192      if (!gs_state_nodes[i])
1193         return NULL;
1194   }
1195
1196   return dxil_get_metadata_node(&ctx->mod, gs_state_nodes, ARRAY_SIZE(gs_state_nodes));
1197}
1198
1199static const struct dxil_mdnode *
1200emit_threads(struct ntd_context *ctx)
1201{
1202   const nir_shader *s = ctx->shader;
1203   const struct dxil_mdnode *threads_x = dxil_get_metadata_int32(&ctx->mod, MAX2(s->info.workgroup_size[0], 1));
1204   const struct dxil_mdnode *threads_y = dxil_get_metadata_int32(&ctx->mod, MAX2(s->info.workgroup_size[1], 1));
1205   const struct dxil_mdnode *threads_z = dxil_get_metadata_int32(&ctx->mod, MAX2(s->info.workgroup_size[2], 1));
1206   if (!threads_x || !threads_y || !threads_z)
1207      return false;
1208
1209   const struct dxil_mdnode *threads_nodes[] = { threads_x, threads_y, threads_z };
1210   return dxil_get_metadata_node(&ctx->mod, threads_nodes, ARRAY_SIZE(threads_nodes));
1211}
1212
1213static int64_t
1214get_module_flags(struct ntd_context *ctx)
1215{
1216   /* See the DXIL documentation for the definition of these flags:
1217    *
1218    * https://github.com/Microsoft/DirectXShaderCompiler/blob/master/docs/DXIL.rst#shader-flags
1219    */
1220
1221   uint64_t flags = 0;
1222   if (ctx->mod.feats.doubles)
1223      flags |= (1 << 2);
1224   if (ctx->mod.raw_and_structured_buffers)
1225      flags |= (1 << 4);
1226   if (ctx->mod.feats.min_precision)
1227      flags |= (1 << 5);
1228   if (ctx->mod.feats.dx11_1_double_extensions)
1229      flags |= (1 << 6);
1230   if (ctx->mod.feats.inner_coverage)
1231      flags |= (1 << 10);
1232   if (ctx->mod.feats.typed_uav_load_additional_formats)
1233      flags |= (1 << 13);
1234   if (ctx->mod.feats.use_64uavs)
1235      flags |= (1 << 15);
1236   if (ctx->mod.feats.cs_4x_raw_sb)
1237      flags |= (1 << 17);
1238   if (ctx->mod.feats.wave_ops)
1239      flags |= (1 << 19);
1240   if (ctx->mod.feats.int64_ops)
1241      flags |= (1 << 20);
1242   if (ctx->mod.feats.stencil_ref)
1243      flags |= (1 << 11);
1244   if (ctx->mod.feats.native_low_precision)
1245      flags |= (1 << 23) | (1 << 5);
1246
1247   if (ctx->opts->disable_math_refactoring)
1248      flags |= (1 << 1);
1249
1250   return flags;
1251}
1252
1253static const struct dxil_mdnode *
1254emit_entrypoint(struct ntd_context *ctx,
1255                const struct dxil_func *func, const char *name,
1256                const struct dxil_mdnode *signatures,
1257                const struct dxil_mdnode *resources,
1258                const struct dxil_mdnode *shader_props)
1259{
1260   const struct dxil_mdnode *func_md = dxil_get_metadata_func(&ctx->mod, func);
1261   const struct dxil_mdnode *name_md = dxil_get_metadata_string(&ctx->mod, name);
1262   const struct dxil_mdnode *nodes[] = {
1263      func_md,
1264      name_md,
1265      signatures,
1266      resources,
1267      shader_props
1268   };
1269   return dxil_get_metadata_node(&ctx->mod, nodes,
1270                                 ARRAY_SIZE(nodes));
1271}
1272
1273static const struct dxil_mdnode *
1274emit_resources(struct ntd_context *ctx)
1275{
1276   bool emit_resources = false;
1277   const struct dxil_mdnode *resources_nodes[] = {
1278      NULL, NULL, NULL, NULL
1279   };
1280
1281#define ARRAY_AND_SIZE(arr) arr.data, util_dynarray_num_elements(&arr, const struct dxil_mdnode *)
1282
1283   if (ctx->srv_metadata_nodes.size) {
1284      resources_nodes[0] = dxil_get_metadata_node(&ctx->mod, ARRAY_AND_SIZE(ctx->srv_metadata_nodes));
1285      emit_resources = true;
1286   }
1287
1288   if (ctx->uav_metadata_nodes.size) {
1289      resources_nodes[1] = dxil_get_metadata_node(&ctx->mod, ARRAY_AND_SIZE(ctx->uav_metadata_nodes));
1290      emit_resources = true;
1291   }
1292
1293   if (ctx->cbv_metadata_nodes.size) {
1294      resources_nodes[2] = dxil_get_metadata_node(&ctx->mod, ARRAY_AND_SIZE(ctx->cbv_metadata_nodes));
1295      emit_resources = true;
1296   }
1297
1298   if (ctx->sampler_metadata_nodes.size) {
1299      resources_nodes[3] = dxil_get_metadata_node(&ctx->mod, ARRAY_AND_SIZE(ctx->sampler_metadata_nodes));
1300      emit_resources = true;
1301   }
1302
1303#undef ARRAY_AND_SIZE
1304
1305   return emit_resources ?
1306      dxil_get_metadata_node(&ctx->mod, resources_nodes, ARRAY_SIZE(resources_nodes)): NULL;
1307}
1308
1309static boolean
1310emit_tag(struct ntd_context *ctx, enum dxil_shader_tag tag,
1311         const struct dxil_mdnode *value_node)
1312{
1313   const struct dxil_mdnode *tag_node = dxil_get_metadata_int32(&ctx->mod, tag);
1314   if (!tag_node || !value_node)
1315      return false;
1316   assert(ctx->num_shader_property_nodes <= ARRAY_SIZE(ctx->shader_property_nodes) - 2);
1317   ctx->shader_property_nodes[ctx->num_shader_property_nodes++] = tag_node;
1318   ctx->shader_property_nodes[ctx->num_shader_property_nodes++] = value_node;
1319
1320   return true;
1321}
1322
1323static bool
1324emit_metadata(struct ntd_context *ctx)
1325{
1326   unsigned dxilMinor = ctx->mod.minor_version;
1327   if (!emit_llvm_ident(&ctx->mod) ||
1328       !emit_named_version(&ctx->mod, "dx.version", 1, dxilMinor) ||
1329       !emit_named_version(&ctx->mod, "dx.valver", 1, 4) ||
1330       !emit_dx_shader_model(&ctx->mod))
1331      return false;
1332
1333   const struct dxil_type *void_type = dxil_module_get_void_type(&ctx->mod);
1334   const struct dxil_type *main_func_type = dxil_module_add_function_type(&ctx->mod, void_type, NULL, 0);
1335   const struct dxil_func *main_func = dxil_add_function_def(&ctx->mod, "main", main_func_type);
1336   if (!main_func)
1337      return false;
1338
1339   const struct dxil_mdnode *resources_node = emit_resources(ctx);
1340
1341   const struct dxil_mdnode *main_entrypoint = dxil_get_metadata_func(&ctx->mod, main_func);
1342   const struct dxil_mdnode *node27 = dxil_get_metadata_node(&ctx->mod, NULL, 0);
1343
1344   const struct dxil_mdnode *node4 = dxil_get_metadata_int32(&ctx->mod, 0);
1345   const struct dxil_mdnode *nodes_4_27_27[] = {
1346      node4, node27, node27
1347   };
1348   const struct dxil_mdnode *node28 = dxil_get_metadata_node(&ctx->mod, nodes_4_27_27,
1349                                                      ARRAY_SIZE(nodes_4_27_27));
1350
1351   const struct dxil_mdnode *node29 = dxil_get_metadata_node(&ctx->mod, &node28, 1);
1352
1353   const struct dxil_mdnode *node3 = dxil_get_metadata_int32(&ctx->mod, 1);
1354   const struct dxil_mdnode *main_type_annotation_nodes[] = {
1355      node3, main_entrypoint, node29
1356   };
1357   const struct dxil_mdnode *main_type_annotation = dxil_get_metadata_node(&ctx->mod, main_type_annotation_nodes,
1358                                                                           ARRAY_SIZE(main_type_annotation_nodes));
1359
1360   if (ctx->mod.shader_kind == DXIL_GEOMETRY_SHADER) {
1361      if (!emit_tag(ctx, DXIL_SHADER_TAG_GS_STATE, emit_gs_state(ctx)))
1362         return false;
1363   } else if (ctx->mod.shader_kind == DXIL_COMPUTE_SHADER) {
1364      if (!emit_tag(ctx, DXIL_SHADER_TAG_NUM_THREADS, emit_threads(ctx)))
1365         return false;
1366   }
1367
1368   uint64_t flags = get_module_flags(ctx);
1369   if (flags != 0) {
1370      if (!emit_tag(ctx, DXIL_SHADER_TAG_FLAGS, dxil_get_metadata_int64(&ctx->mod, flags)))
1371         return false;
1372   }
1373   const struct dxil_mdnode *shader_properties = NULL;
1374   if (ctx->num_shader_property_nodes > 0) {
1375      shader_properties = dxil_get_metadata_node(&ctx->mod, ctx->shader_property_nodes,
1376                                                 ctx->num_shader_property_nodes);
1377      if (!shader_properties)
1378         return false;
1379   }
1380
1381   const struct dxil_mdnode *signatures = get_signatures(&ctx->mod, ctx->shader,
1382                                                         ctx->opts->vulkan_environment);
1383
1384   const struct dxil_mdnode *dx_entry_point = emit_entrypoint(ctx, main_func,
1385       "main", signatures, resources_node, shader_properties);
1386   if (!dx_entry_point)
1387      return false;
1388
1389   if (resources_node) {
1390      const struct dxil_mdnode *dx_resources = resources_node;
1391      dxil_add_metadata_named_node(&ctx->mod, "dx.resources",
1392                                       &dx_resources, 1);
1393   }
1394
1395   const struct dxil_mdnode *dx_type_annotations[] = { main_type_annotation };
1396   return dxil_add_metadata_named_node(&ctx->mod, "dx.typeAnnotations",
1397                                       dx_type_annotations,
1398                                       ARRAY_SIZE(dx_type_annotations)) &&
1399          dxil_add_metadata_named_node(&ctx->mod, "dx.entryPoints",
1400                                       &dx_entry_point, 1);
1401}
1402
1403static const struct dxil_value *
1404bitcast_to_int(struct ntd_context *ctx, unsigned bit_size,
1405               const struct dxil_value *value)
1406{
1407   const struct dxil_type *type = dxil_module_get_int_type(&ctx->mod, bit_size);
1408   if (!type)
1409      return NULL;
1410
1411   return dxil_emit_cast(&ctx->mod, DXIL_CAST_BITCAST, type, value);
1412}
1413
1414static const struct dxil_value *
1415bitcast_to_float(struct ntd_context *ctx, unsigned bit_size,
1416                 const struct dxil_value *value)
1417{
1418   const struct dxil_type *type = dxil_module_get_float_type(&ctx->mod, bit_size);
1419   if (!type)
1420      return NULL;
1421
1422   return dxil_emit_cast(&ctx->mod, DXIL_CAST_BITCAST, type, value);
1423}
1424
1425static void
1426store_ssa_def(struct ntd_context *ctx, nir_ssa_def *ssa, unsigned chan,
1427              const struct dxil_value *value)
1428{
1429   assert(ssa->index < ctx->num_defs);
1430   assert(chan < ssa->num_components);
1431   /* We pre-defined the dest value because of a phi node, so bitcast while storing if the
1432    * base type differs */
1433   if (ctx->defs[ssa->index].chans[chan]) {
1434      const struct dxil_type *expect_type = dxil_value_get_type(ctx->defs[ssa->index].chans[chan]);
1435      const struct dxil_type *value_type = dxil_value_get_type(value);
1436      if (dxil_type_to_nir_type(expect_type) != dxil_type_to_nir_type(value_type))
1437         value = dxil_emit_cast(&ctx->mod, DXIL_CAST_BITCAST, expect_type, value);
1438   }
1439   ctx->defs[ssa->index].chans[chan] = value;
1440}
1441
1442static void
1443store_dest_value(struct ntd_context *ctx, nir_dest *dest, unsigned chan,
1444                 const struct dxil_value *value)
1445{
1446   assert(dest->is_ssa);
1447   assert(value);
1448   store_ssa_def(ctx, &dest->ssa, chan, value);
1449}
1450
1451static void
1452store_dest(struct ntd_context *ctx, nir_dest *dest, unsigned chan,
1453           const struct dxil_value *value, nir_alu_type type)
1454{
1455   switch (nir_alu_type_get_base_type(type)) {
1456   case nir_type_float:
1457      if (nir_dest_bit_size(*dest) == 64)
1458         ctx->mod.feats.doubles = true;
1459      FALLTHROUGH;
1460   case nir_type_uint:
1461   case nir_type_int:
1462      if (nir_dest_bit_size(*dest) == 16)
1463         ctx->mod.feats.native_low_precision = true;
1464      if (nir_dest_bit_size(*dest) == 64)
1465         ctx->mod.feats.int64_ops = true;
1466      FALLTHROUGH;
1467   case nir_type_bool:
1468      store_dest_value(ctx, dest, chan, value);
1469      break;
1470   default:
1471      unreachable("unexpected nir_alu_type");
1472   }
1473}
1474
1475static void
1476store_alu_dest(struct ntd_context *ctx, nir_alu_instr *alu, unsigned chan,
1477               const struct dxil_value *value)
1478{
1479   assert(!alu->dest.saturate);
1480   store_dest(ctx, &alu->dest.dest, chan, value,
1481              nir_op_infos[alu->op].output_type);
1482}
1483
1484static const struct dxil_value *
1485get_src_ssa(struct ntd_context *ctx, const nir_ssa_def *ssa, unsigned chan)
1486{
1487   assert(ssa->index < ctx->num_defs);
1488   assert(chan < ssa->num_components);
1489   assert(ctx->defs[ssa->index].chans[chan]);
1490   return ctx->defs[ssa->index].chans[chan];
1491}
1492
1493static const struct dxil_value *
1494get_src(struct ntd_context *ctx, nir_src *src, unsigned chan,
1495        nir_alu_type type)
1496{
1497   assert(src->is_ssa);
1498   const struct dxil_value *value = get_src_ssa(ctx, src->ssa, chan);
1499
1500   const int bit_size = nir_src_bit_size(*src);
1501
1502   switch (nir_alu_type_get_base_type(type)) {
1503   case nir_type_int:
1504   case nir_type_uint: {
1505      assert(bit_size != 64 || ctx->mod.feats.int64_ops);
1506      const struct dxil_type *expect_type =  dxil_module_get_int_type(&ctx->mod, bit_size);
1507      /* nohing to do */
1508      if (dxil_value_type_equal_to(value, expect_type))
1509         return value;
1510      assert(dxil_value_type_bitsize_equal_to(value, bit_size));
1511      return bitcast_to_int(ctx,  bit_size, value);
1512      }
1513
1514   case nir_type_float:
1515      assert(nir_src_bit_size(*src) >= 16);
1516      assert(nir_src_bit_size(*src) != 64 || (ctx->mod.feats.doubles &&
1517                                              ctx->mod.feats.int64_ops));
1518      if (dxil_value_type_equal_to(value, dxil_module_get_float_type(&ctx->mod, bit_size)))
1519         return value;
1520      assert(dxil_value_type_bitsize_equal_to(value, bit_size));
1521      return bitcast_to_float(ctx, bit_size, value);
1522
1523   case nir_type_bool:
1524      if (!dxil_value_type_bitsize_equal_to(value, 1)) {
1525         return dxil_emit_cast(&ctx->mod, DXIL_CAST_TRUNC,
1526                               dxil_module_get_int_type(&ctx->mod, 1), value);
1527      }
1528      return value;
1529
1530   default:
1531      unreachable("unexpected nir_alu_type");
1532   }
1533}
1534
1535static const struct dxil_type *
1536get_alu_src_type(struct ntd_context *ctx, nir_alu_instr *alu, unsigned src)
1537{
1538   assert(!alu->src[src].abs);
1539   assert(!alu->src[src].negate);
1540   nir_ssa_def *ssa_src = alu->src[src].src.ssa;
1541   unsigned chan = alu->src[src].swizzle[0];
1542   const struct dxil_value *value = get_src_ssa(ctx, ssa_src, chan);
1543   return dxil_value_get_type(value);
1544}
1545
1546static const struct dxil_value *
1547get_alu_src(struct ntd_context *ctx, nir_alu_instr *alu, unsigned src)
1548{
1549   assert(!alu->src[src].abs);
1550   assert(!alu->src[src].negate);
1551
1552   unsigned chan = alu->src[src].swizzle[0];
1553   return get_src(ctx, &alu->src[src].src, chan,
1554                  nir_op_infos[alu->op].input_types[src]);
1555}
1556
1557static bool
1558emit_binop(struct ntd_context *ctx, nir_alu_instr *alu,
1559           enum dxil_bin_opcode opcode,
1560           const struct dxil_value *op0, const struct dxil_value *op1)
1561{
1562   bool is_float_op = nir_alu_type_get_base_type(nir_op_infos[alu->op].output_type) == nir_type_float;
1563
1564   enum dxil_opt_flags flags = 0;
1565   if (is_float_op && !alu->exact)
1566      flags |= DXIL_UNSAFE_ALGEBRA;
1567
1568   const struct dxil_value *v = dxil_emit_binop(&ctx->mod, opcode, op0, op1, flags);
1569   if (!v)
1570      return false;
1571   store_alu_dest(ctx, alu, 0, v);
1572   return true;
1573}
1574
1575static bool
1576emit_shift(struct ntd_context *ctx, nir_alu_instr *alu,
1577           enum dxil_bin_opcode opcode,
1578           const struct dxil_value *op0, const struct dxil_value *op1)
1579{
1580   unsigned op0_bit_size = nir_src_bit_size(alu->src[0].src);
1581   unsigned op1_bit_size = nir_src_bit_size(alu->src[1].src);
1582   if (op0_bit_size != op1_bit_size) {
1583      const struct dxil_type *type =
1584         dxil_module_get_int_type(&ctx->mod, op0_bit_size);
1585      enum dxil_cast_opcode cast_op =
1586         op1_bit_size < op0_bit_size ? DXIL_CAST_ZEXT : DXIL_CAST_TRUNC;
1587      op1 = dxil_emit_cast(&ctx->mod, cast_op, type, op1);
1588   }
1589
1590   const struct dxil_value *v =
1591      dxil_emit_binop(&ctx->mod, opcode, op0, op1, 0);
1592   if (!v)
1593      return false;
1594   store_alu_dest(ctx, alu, 0, v);
1595   return true;
1596}
1597
1598static bool
1599emit_cmp(struct ntd_context *ctx, nir_alu_instr *alu,
1600         enum dxil_cmp_pred pred,
1601         const struct dxil_value *op0, const struct dxil_value *op1)
1602{
1603   const struct dxil_value *v = dxil_emit_cmp(&ctx->mod, pred, op0, op1);
1604   if (!v)
1605      return false;
1606   store_alu_dest(ctx, alu, 0, v);
1607   return true;
1608}
1609
1610static enum dxil_cast_opcode
1611get_cast_op(nir_alu_instr *alu)
1612{
1613   unsigned dst_bits = nir_dest_bit_size(alu->dest.dest);
1614   unsigned src_bits = nir_src_bit_size(alu->src[0].src);
1615
1616   switch (alu->op) {
1617   /* bool -> int */
1618   case nir_op_b2i16:
1619   case nir_op_b2i32:
1620   case nir_op_b2i64:
1621      return DXIL_CAST_ZEXT;
1622
1623   /* float -> float */
1624   case nir_op_f2f16_rtz:
1625   case nir_op_f2f32:
1626   case nir_op_f2f64:
1627      assert(dst_bits != src_bits);
1628      if (dst_bits < src_bits)
1629         return DXIL_CAST_FPTRUNC;
1630      else
1631         return DXIL_CAST_FPEXT;
1632
1633   /* int -> int */
1634   case nir_op_i2i16:
1635   case nir_op_i2i32:
1636   case nir_op_i2i64:
1637      assert(dst_bits != src_bits);
1638      if (dst_bits < src_bits)
1639         return DXIL_CAST_TRUNC;
1640      else
1641         return DXIL_CAST_SEXT;
1642
1643   /* uint -> uint */
1644   case nir_op_u2u16:
1645   case nir_op_u2u32:
1646   case nir_op_u2u64:
1647      assert(dst_bits != src_bits);
1648      if (dst_bits < src_bits)
1649         return DXIL_CAST_TRUNC;
1650      else
1651         return DXIL_CAST_ZEXT;
1652
1653   /* float -> int */
1654   case nir_op_f2i16:
1655   case nir_op_f2i32:
1656   case nir_op_f2i64:
1657      return DXIL_CAST_FPTOSI;
1658
1659   /* float -> uint */
1660   case nir_op_f2u16:
1661   case nir_op_f2u32:
1662   case nir_op_f2u64:
1663      return DXIL_CAST_FPTOUI;
1664
1665   /* int -> float */
1666   case nir_op_i2f16:
1667   case nir_op_i2f32:
1668   case nir_op_i2f64:
1669      return DXIL_CAST_SITOFP;
1670
1671   /* uint -> float */
1672   case nir_op_u2f16:
1673   case nir_op_u2f32:
1674   case nir_op_u2f64:
1675      return DXIL_CAST_UITOFP;
1676
1677   default:
1678      unreachable("unexpected cast op");
1679   }
1680}
1681
1682static const struct dxil_type *
1683get_cast_dest_type(struct ntd_context *ctx, nir_alu_instr *alu)
1684{
1685   unsigned dst_bits = nir_dest_bit_size(alu->dest.dest);
1686   switch (nir_alu_type_get_base_type(nir_op_infos[alu->op].output_type)) {
1687   case nir_type_bool:
1688      assert(dst_bits == 1);
1689      FALLTHROUGH;
1690   case nir_type_int:
1691   case nir_type_uint:
1692      return dxil_module_get_int_type(&ctx->mod, dst_bits);
1693
1694   case nir_type_float:
1695      return dxil_module_get_float_type(&ctx->mod, dst_bits);
1696
1697   default:
1698      unreachable("unknown nir_alu_type");
1699   }
1700}
1701
1702static bool
1703is_double(nir_alu_type alu_type, unsigned bit_size)
1704{
1705   return nir_alu_type_get_base_type(alu_type) == nir_type_float &&
1706          bit_size == 64;
1707}
1708
1709static bool
1710emit_cast(struct ntd_context *ctx, nir_alu_instr *alu,
1711          const struct dxil_value *value)
1712{
1713   enum dxil_cast_opcode opcode = get_cast_op(alu);
1714   const struct dxil_type *type = get_cast_dest_type(ctx, alu);
1715   if (!type)
1716      return false;
1717
1718   const nir_op_info *info = &nir_op_infos[alu->op];
1719   switch (opcode) {
1720   case DXIL_CAST_UITOFP:
1721   case DXIL_CAST_SITOFP:
1722      if (is_double(info->output_type, nir_dest_bit_size(alu->dest.dest)))
1723         ctx->mod.feats.dx11_1_double_extensions = true;
1724      break;
1725   case DXIL_CAST_FPTOUI:
1726   case DXIL_CAST_FPTOSI:
1727      if (is_double(info->input_types[0], nir_src_bit_size(alu->src[0].src)))
1728         ctx->mod.feats.dx11_1_double_extensions = true;
1729      break;
1730   default:
1731      break;
1732   }
1733
1734   const struct dxil_value *v = dxil_emit_cast(&ctx->mod, opcode, type,
1735                                               value);
1736   if (!v)
1737      return false;
1738   store_alu_dest(ctx, alu, 0, v);
1739   return true;
1740}
1741
1742static enum overload_type
1743get_overload(nir_alu_type alu_type, unsigned bit_size)
1744{
1745   switch (nir_alu_type_get_base_type(alu_type)) {
1746   case nir_type_int:
1747   case nir_type_uint:
1748      switch (bit_size) {
1749      case 16: return DXIL_I16;
1750      case 32: return DXIL_I32;
1751      case 64: return DXIL_I64;
1752      default:
1753         unreachable("unexpected bit_size");
1754      }
1755   case nir_type_float:
1756      switch (bit_size) {
1757      case 16: return DXIL_F16;
1758      case 32: return DXIL_F32;
1759      case 64: return DXIL_F64;
1760      default:
1761         unreachable("unexpected bit_size");
1762      }
1763   default:
1764      unreachable("unexpected output type");
1765   }
1766}
1767
1768static bool
1769emit_unary_intin(struct ntd_context *ctx, nir_alu_instr *alu,
1770                 enum dxil_intr intr, const struct dxil_value *op)
1771{
1772   const nir_op_info *info = &nir_op_infos[alu->op];
1773   unsigned src_bits = nir_src_bit_size(alu->src[0].src);
1774   enum overload_type overload = get_overload(info->input_types[0], src_bits);
1775
1776   const struct dxil_value *v = emit_unary_call(ctx, overload, intr, op);
1777   if (!v)
1778      return false;
1779   store_alu_dest(ctx, alu, 0, v);
1780   return true;
1781}
1782
1783static bool
1784emit_binary_intin(struct ntd_context *ctx, nir_alu_instr *alu,
1785                  enum dxil_intr intr,
1786                  const struct dxil_value *op0, const struct dxil_value *op1)
1787{
1788   const nir_op_info *info = &nir_op_infos[alu->op];
1789   assert(info->output_type == info->input_types[0]);
1790   assert(info->output_type == info->input_types[1]);
1791   unsigned dst_bits = nir_dest_bit_size(alu->dest.dest);
1792   assert(nir_src_bit_size(alu->src[0].src) == dst_bits);
1793   assert(nir_src_bit_size(alu->src[1].src) == dst_bits);
1794   enum overload_type overload = get_overload(info->output_type, dst_bits);
1795
1796   const struct dxil_value *v = emit_binary_call(ctx, overload, intr,
1797                                                 op0, op1);
1798   if (!v)
1799      return false;
1800   store_alu_dest(ctx, alu, 0, v);
1801   return true;
1802}
1803
1804static bool
1805emit_tertiary_intin(struct ntd_context *ctx, nir_alu_instr *alu,
1806                    enum dxil_intr intr,
1807                    const struct dxil_value *op0,
1808                    const struct dxil_value *op1,
1809                    const struct dxil_value *op2)
1810{
1811   const nir_op_info *info = &nir_op_infos[alu->op];
1812   assert(info->output_type == info->input_types[0]);
1813   assert(info->output_type == info->input_types[1]);
1814   assert(info->output_type == info->input_types[2]);
1815
1816   unsigned dst_bits = nir_dest_bit_size(alu->dest.dest);
1817   assert(nir_src_bit_size(alu->src[0].src) == dst_bits);
1818   assert(nir_src_bit_size(alu->src[1].src) == dst_bits);
1819   assert(nir_src_bit_size(alu->src[2].src) == dst_bits);
1820
1821   enum overload_type overload = get_overload(info->output_type, dst_bits);
1822
1823   const struct dxil_value *v = emit_tertiary_call(ctx, overload, intr,
1824                                                   op0, op1, op2);
1825   if (!v)
1826      return false;
1827   store_alu_dest(ctx, alu, 0, v);
1828   return true;
1829}
1830
1831static bool emit_select(struct ntd_context *ctx, nir_alu_instr *alu,
1832                        const struct dxil_value *sel,
1833                        const struct dxil_value *val_true,
1834                        const struct dxil_value *val_false)
1835{
1836   assert(sel);
1837   assert(val_true);
1838   assert(val_false);
1839
1840   const struct dxil_value *v = dxil_emit_select(&ctx->mod, sel, val_true, val_false);
1841   if (!v)
1842      return false;
1843
1844   store_alu_dest(ctx, alu, 0, v);
1845   return true;
1846}
1847
1848static bool
1849emit_b2f16(struct ntd_context *ctx, nir_alu_instr *alu, const struct dxil_value *val)
1850{
1851   assert(val);
1852
1853   struct dxil_module *m = &ctx->mod;
1854
1855   const struct dxil_value *c1 = dxil_module_get_float16_const(m, 0x3C00);
1856   const struct dxil_value *c0 = dxil_module_get_float16_const(m, 0);
1857
1858   if (!c0 || !c1)
1859      return false;
1860
1861   return emit_select(ctx, alu, val, c1, c0);
1862}
1863
1864static bool
1865emit_b2f32(struct ntd_context *ctx, nir_alu_instr *alu, const struct dxil_value *val)
1866{
1867   assert(val);
1868
1869   struct dxil_module *m = &ctx->mod;
1870
1871   const struct dxil_value *c1 = dxil_module_get_float_const(m, 1.0f);
1872   const struct dxil_value *c0 = dxil_module_get_float_const(m, 0.0f);
1873
1874   if (!c0 || !c1)
1875      return false;
1876
1877   return emit_select(ctx, alu, val, c1, c0);
1878}
1879
1880static bool
1881emit_f2b32(struct ntd_context *ctx, nir_alu_instr *alu, const struct dxil_value *val)
1882{
1883   assert(val);
1884
1885   const struct dxil_value *zero = dxil_module_get_float_const(&ctx->mod, 0.0f);
1886   return emit_cmp(ctx, alu, DXIL_FCMP_UNE, val, zero);
1887}
1888
1889static bool
1890emit_ufind_msb(struct ntd_context *ctx, nir_alu_instr *alu,
1891               const struct dxil_value *val)
1892{
1893   const nir_op_info *info = &nir_op_infos[alu->op];
1894   unsigned dst_bits = nir_dest_bit_size(alu->dest.dest);
1895   unsigned src_bits = nir_src_bit_size(alu->src[0].src);
1896   enum overload_type overload = get_overload(info->output_type, src_bits);
1897
1898   const struct dxil_value *v = emit_unary_call(ctx, overload,
1899                                                DXIL_INTR_FIRSTBIT_HI, val);
1900   if (!v)
1901      return false;
1902
1903   const struct dxil_value *size = dxil_module_get_int32_const(&ctx->mod,
1904      src_bits - 1);
1905   const struct dxil_value *zero = dxil_module_get_int_const(&ctx->mod, 0,
1906                                                             src_bits);
1907   if (!size || !zero)
1908      return false;
1909
1910   v = dxil_emit_binop(&ctx->mod, DXIL_BINOP_SUB, size, v, 0);
1911   const struct dxil_value *cnd = dxil_emit_cmp(&ctx->mod, DXIL_ICMP_NE,
1912                                                val, zero);
1913   if (!v || !cnd)
1914      return false;
1915
1916   const struct dxil_value *minus_one =
1917      dxil_module_get_int_const(&ctx->mod, -1, dst_bits);
1918   if (!minus_one)
1919      return false;
1920
1921   v = dxil_emit_select(&ctx->mod, cnd, v, minus_one);
1922   if (!v)
1923      return false;
1924
1925   store_alu_dest(ctx, alu, 0, v);
1926   return true;
1927}
1928
1929static bool
1930emit_f16tof32(struct ntd_context *ctx, nir_alu_instr *alu, const struct dxil_value *val)
1931{
1932   const struct dxil_func *func = dxil_get_function(&ctx->mod,
1933                                                    "dx.op.legacyF16ToF32",
1934                                                    DXIL_NONE);
1935   if (!func)
1936      return false;
1937
1938   const struct dxil_value *opcode = dxil_module_get_int32_const(&ctx->mod, DXIL_INTR_LEGACY_F16TOF32);
1939   if (!opcode)
1940      return false;
1941
1942   const struct dxil_value *args[] = {
1943     opcode,
1944     val
1945   };
1946
1947   const struct dxil_value *v = dxil_emit_call(&ctx->mod, func, args, ARRAY_SIZE(args));
1948   if (!v)
1949      return false;
1950   store_alu_dest(ctx, alu, 0, v);
1951   return true;
1952}
1953
1954static bool
1955emit_f32tof16(struct ntd_context *ctx, nir_alu_instr *alu, const struct dxil_value *val)
1956{
1957   const struct dxil_func *func = dxil_get_function(&ctx->mod,
1958                                                    "dx.op.legacyF32ToF16",
1959                                                    DXIL_NONE);
1960   if (!func)
1961      return false;
1962
1963   const struct dxil_value *opcode = dxil_module_get_int32_const(&ctx->mod, DXIL_INTR_LEGACY_F32TOF16);
1964   if (!opcode)
1965      return false;
1966
1967   const struct dxil_value *args[] = {
1968     opcode,
1969     val
1970   };
1971
1972   const struct dxil_value *v = dxil_emit_call(&ctx->mod, func, args, ARRAY_SIZE(args));
1973   if (!v)
1974      return false;
1975   store_alu_dest(ctx, alu, 0, v);
1976   return true;
1977}
1978
1979static bool
1980emit_vec(struct ntd_context *ctx, nir_alu_instr *alu, unsigned num_inputs)
1981{
1982   const struct dxil_type *type = get_alu_src_type(ctx, alu, 0);
1983   nir_alu_type t = dxil_type_to_nir_type(type);
1984
1985   for (unsigned i = 0; i < num_inputs; i++) {
1986      const struct dxil_value *src =
1987         get_src(ctx, &alu->src[i].src, alu->src[i].swizzle[0], t);
1988      if (!src)
1989         return false;
1990
1991      store_alu_dest(ctx, alu, i, src);
1992   }
1993   return true;
1994}
1995
1996static bool
1997emit_make_double(struct ntd_context *ctx, nir_alu_instr *alu)
1998{
1999   const struct dxil_func *func = dxil_get_function(&ctx->mod, "dx.op.makeDouble", DXIL_F64);
2000   if (!func)
2001      return false;
2002
2003   const struct dxil_value *opcode = dxil_module_get_int32_const(&ctx->mod, DXIL_INTR_MAKE_DOUBLE);
2004   if (!opcode)
2005      return false;
2006
2007   const struct dxil_value *args[3] = {
2008      opcode,
2009      get_src(ctx, &alu->src[0].src, 0, nir_type_uint32),
2010      get_src(ctx, &alu->src[0].src, 1, nir_type_uint32),
2011   };
2012   if (!args[1] || !args[2])
2013      return false;
2014
2015   const struct dxil_value *v = dxil_emit_call(&ctx->mod, func, args, ARRAY_SIZE(args));
2016   if (!v)
2017      return false;
2018   store_dest(ctx, &alu->dest.dest, 0, v, nir_type_float64);
2019   return true;
2020}
2021
2022static bool
2023emit_split_double(struct ntd_context *ctx, nir_alu_instr *alu)
2024{
2025   const struct dxil_func *func = dxil_get_function(&ctx->mod, "dx.op.splitDouble", DXIL_F64);
2026   if (!func)
2027      return false;
2028
2029   const struct dxil_value *opcode = dxil_module_get_int32_const(&ctx->mod, DXIL_INTR_SPLIT_DOUBLE);
2030   if (!opcode)
2031      return false;
2032
2033   const struct dxil_value *args[] = {
2034      opcode,
2035      get_src(ctx, &alu->src[0].src, 0, nir_type_float64)
2036   };
2037   if (!args[1])
2038      return false;
2039
2040   const struct dxil_value *v = dxil_emit_call(&ctx->mod, func, args, ARRAY_SIZE(args));
2041   if (!v)
2042      return false;
2043
2044   const struct dxil_value *hi = dxil_emit_extractval(&ctx->mod, v, 0);
2045   const struct dxil_value *lo = dxil_emit_extractval(&ctx->mod, v, 1);
2046   if (!hi || !lo)
2047      return false;
2048
2049   store_dest_value(ctx, &alu->dest.dest, 0, hi);
2050   store_dest_value(ctx, &alu->dest.dest, 1, lo);
2051   return true;
2052}
2053
2054static bool
2055emit_alu(struct ntd_context *ctx, nir_alu_instr *alu)
2056{
2057   /* handle vec-instructions first; they are the only ones that produce
2058    * vector results.
2059    */
2060   switch (alu->op) {
2061   case nir_op_vec2:
2062   case nir_op_vec3:
2063   case nir_op_vec4:
2064   case nir_op_vec8:
2065   case nir_op_vec16:
2066      return emit_vec(ctx, alu, nir_op_infos[alu->op].num_inputs);
2067   case nir_op_mov: {
2068         assert(nir_dest_num_components(alu->dest.dest) == 1);
2069         store_ssa_def(ctx, &alu->dest.dest.ssa, 0, get_src_ssa(ctx,
2070                        alu->src->src.ssa, alu->src->swizzle[0]));
2071         return true;
2072      }
2073   case nir_op_pack_double_2x32_dxil:
2074      return emit_make_double(ctx, alu);
2075   case nir_op_unpack_double_2x32_dxil:
2076      return emit_split_double(ctx, alu);
2077   default:
2078      /* silence warnings */
2079      ;
2080   }
2081
2082   /* other ops should be scalar */
2083   assert(alu->dest.write_mask == 1);
2084   const struct dxil_value *src[4];
2085   assert(nir_op_infos[alu->op].num_inputs <= 4);
2086   for (unsigned i = 0; i < nir_op_infos[alu->op].num_inputs; i++) {
2087      src[i] = get_alu_src(ctx, alu, i);
2088      if (!src[i])
2089         return false;
2090   }
2091
2092   switch (alu->op) {
2093   case nir_op_iadd:
2094   case nir_op_fadd: return emit_binop(ctx, alu, DXIL_BINOP_ADD, src[0], src[1]);
2095
2096   case nir_op_isub:
2097   case nir_op_fsub: return emit_binop(ctx, alu, DXIL_BINOP_SUB, src[0], src[1]);
2098
2099   case nir_op_imul:
2100   case nir_op_fmul: return emit_binop(ctx, alu, DXIL_BINOP_MUL, src[0], src[1]);
2101
2102   case nir_op_idiv:
2103   case nir_op_fdiv: return emit_binop(ctx, alu, DXIL_BINOP_SDIV, src[0], src[1]);
2104
2105   case nir_op_udiv: return emit_binop(ctx, alu, DXIL_BINOP_UDIV, src[0], src[1]);
2106   case nir_op_irem: return emit_binop(ctx, alu, DXIL_BINOP_SREM, src[0], src[1]);
2107   case nir_op_imod: return emit_binop(ctx, alu, DXIL_BINOP_UREM, src[0], src[1]);
2108   case nir_op_umod: return emit_binop(ctx, alu, DXIL_BINOP_UREM, src[0], src[1]);
2109   case nir_op_ishl: return emit_shift(ctx, alu, DXIL_BINOP_SHL, src[0], src[1]);
2110   case nir_op_ishr: return emit_shift(ctx, alu, DXIL_BINOP_ASHR, src[0], src[1]);
2111   case nir_op_ushr: return emit_shift(ctx, alu, DXIL_BINOP_LSHR, src[0], src[1]);
2112   case nir_op_iand: return emit_binop(ctx, alu, DXIL_BINOP_AND, src[0], src[1]);
2113   case nir_op_ior:  return emit_binop(ctx, alu, DXIL_BINOP_OR, src[0], src[1]);
2114   case nir_op_ixor: return emit_binop(ctx, alu, DXIL_BINOP_XOR, src[0], src[1]);
2115   case nir_op_inot: {
2116      unsigned bit_size = alu->dest.dest.ssa.bit_size;
2117      intmax_t val = bit_size == 1 ? 1 : -1;
2118      const struct dxil_value *negative_one = dxil_module_get_int_const(&ctx->mod, val, bit_size);
2119      return emit_binop(ctx, alu, DXIL_BINOP_XOR, src[0], negative_one);
2120   }
2121   case nir_op_ieq:  return emit_cmp(ctx, alu, DXIL_ICMP_EQ, src[0], src[1]);
2122   case nir_op_ine:  return emit_cmp(ctx, alu, DXIL_ICMP_NE, src[0], src[1]);
2123   case nir_op_ige:  return emit_cmp(ctx, alu, DXIL_ICMP_SGE, src[0], src[1]);
2124   case nir_op_uge:  return emit_cmp(ctx, alu, DXIL_ICMP_UGE, src[0], src[1]);
2125   case nir_op_ilt:  return emit_cmp(ctx, alu, DXIL_ICMP_SLT, src[0], src[1]);
2126   case nir_op_ult:  return emit_cmp(ctx, alu, DXIL_ICMP_ULT, src[0], src[1]);
2127   case nir_op_feq:  return emit_cmp(ctx, alu, DXIL_FCMP_OEQ, src[0], src[1]);
2128   case nir_op_fneu: return emit_cmp(ctx, alu, DXIL_FCMP_UNE, src[0], src[1]);
2129   case nir_op_flt:  return emit_cmp(ctx, alu, DXIL_FCMP_OLT, src[0], src[1]);
2130   case nir_op_fge:  return emit_cmp(ctx, alu, DXIL_FCMP_OGE, src[0], src[1]);
2131   case nir_op_bcsel: return emit_select(ctx, alu, src[0], src[1], src[2]);
2132   case nir_op_ftrunc: return emit_unary_intin(ctx, alu, DXIL_INTR_ROUND_Z, src[0]);
2133   case nir_op_fabs: return emit_unary_intin(ctx, alu, DXIL_INTR_FABS, src[0]);
2134   case nir_op_fcos: return emit_unary_intin(ctx, alu, DXIL_INTR_FCOS, src[0]);
2135   case nir_op_fsin: return emit_unary_intin(ctx, alu, DXIL_INTR_FSIN, src[0]);
2136   case nir_op_fceil: return emit_unary_intin(ctx, alu, DXIL_INTR_ROUND_PI, src[0]);
2137   case nir_op_fexp2: return emit_unary_intin(ctx, alu, DXIL_INTR_FEXP2, src[0]);
2138   case nir_op_flog2: return emit_unary_intin(ctx, alu, DXIL_INTR_FLOG2, src[0]);
2139   case nir_op_ffloor: return emit_unary_intin(ctx, alu, DXIL_INTR_ROUND_NI, src[0]);
2140   case nir_op_ffract: return emit_unary_intin(ctx, alu, DXIL_INTR_FRC, src[0]);
2141   case nir_op_fisnormal: return emit_unary_intin(ctx, alu, DXIL_INTR_ISNORMAL, src[0]);
2142   case nir_op_fisfinite: return emit_unary_intin(ctx, alu, DXIL_INTR_ISFINITE, src[0]);
2143
2144   case nir_op_fddx:
2145   case nir_op_fddx_coarse: return emit_unary_intin(ctx, alu, DXIL_INTR_DDX_COARSE, src[0]);
2146   case nir_op_fddx_fine: return emit_unary_intin(ctx, alu, DXIL_INTR_DDX_FINE, src[0]);
2147   case nir_op_fddy:
2148   case nir_op_fddy_coarse: return emit_unary_intin(ctx, alu, DXIL_INTR_DDY_COARSE, src[0]);
2149   case nir_op_fddy_fine: return emit_unary_intin(ctx, alu, DXIL_INTR_DDY_FINE, src[0]);
2150
2151   case nir_op_fround_even: return emit_unary_intin(ctx, alu, DXIL_INTR_ROUND_NE, src[0]);
2152   case nir_op_frcp: {
2153         const struct dxil_value *one = dxil_module_get_float_const(&ctx->mod, 1.0f);
2154         return emit_binop(ctx, alu, DXIL_BINOP_SDIV, one, src[0]);
2155      }
2156   case nir_op_fsat: return emit_unary_intin(ctx, alu, DXIL_INTR_SATURATE, src[0]);
2157   case nir_op_bit_count: return emit_unary_intin(ctx, alu, DXIL_INTR_COUNTBITS, src[0]);
2158   case nir_op_ufind_msb: return emit_ufind_msb(ctx, alu, src[0]);
2159   case nir_op_imax: return emit_binary_intin(ctx, alu, DXIL_INTR_IMAX, src[0], src[1]);
2160   case nir_op_imin: return emit_binary_intin(ctx, alu, DXIL_INTR_IMIN, src[0], src[1]);
2161   case nir_op_umax: return emit_binary_intin(ctx, alu, DXIL_INTR_UMAX, src[0], src[1]);
2162   case nir_op_umin: return emit_binary_intin(ctx, alu, DXIL_INTR_UMIN, src[0], src[1]);
2163   case nir_op_frsq: return emit_unary_intin(ctx, alu, DXIL_INTR_RSQRT, src[0]);
2164   case nir_op_fsqrt: return emit_unary_intin(ctx, alu, DXIL_INTR_SQRT, src[0]);
2165   case nir_op_fmax: return emit_binary_intin(ctx, alu, DXIL_INTR_FMAX, src[0], src[1]);
2166   case nir_op_fmin: return emit_binary_intin(ctx, alu, DXIL_INTR_FMIN, src[0], src[1]);
2167   case nir_op_ffma: return emit_tertiary_intin(ctx, alu, DXIL_INTR_FMA, src[0], src[1], src[2]);
2168
2169   case nir_op_unpack_half_2x16_split_x: return emit_f16tof32(ctx, alu, src[0]);
2170   case nir_op_pack_half_2x16_split: return emit_f32tof16(ctx, alu, src[0]);
2171
2172   case nir_op_b2i16:
2173   case nir_op_i2i16:
2174   case nir_op_f2i16:
2175   case nir_op_f2u16:
2176   case nir_op_u2u16:
2177   case nir_op_u2f16:
2178   case nir_op_i2f16:
2179   case nir_op_f2f16_rtz:
2180   case nir_op_b2i32:
2181   case nir_op_f2f32:
2182   case nir_op_f2i32:
2183   case nir_op_f2u32:
2184   case nir_op_i2f32:
2185   case nir_op_i2i32:
2186   case nir_op_u2f32:
2187   case nir_op_u2u32:
2188   case nir_op_b2i64:
2189   case nir_op_f2f64:
2190   case nir_op_f2i64:
2191   case nir_op_f2u64:
2192   case nir_op_i2f64:
2193   case nir_op_i2i64:
2194   case nir_op_u2f64:
2195   case nir_op_u2u64:
2196      return emit_cast(ctx, alu, src[0]);
2197
2198   case nir_op_f2b32: return emit_f2b32(ctx, alu, src[0]);
2199   case nir_op_b2f16: return emit_b2f16(ctx, alu, src[0]);
2200   case nir_op_b2f32: return emit_b2f32(ctx, alu, src[0]);
2201   default:
2202      NIR_INSTR_UNSUPPORTED(&alu->instr);
2203      assert("Unimplemented ALU instruction");
2204      return false;
2205   }
2206}
2207
2208static const struct dxil_value *
2209load_ubo(struct ntd_context *ctx, const struct dxil_value *handle,
2210         const struct dxil_value *offset, enum overload_type overload)
2211{
2212   assert(handle && offset);
2213
2214   const struct dxil_value *opcode = dxil_module_get_int32_const(&ctx->mod, DXIL_INTR_CBUFFER_LOAD_LEGACY);
2215   if (!opcode)
2216      return NULL;
2217
2218   const struct dxil_value *args[] = {
2219      opcode, handle, offset
2220   };
2221
2222   const struct dxil_func *func = dxil_get_function(&ctx->mod, "dx.op.cbufferLoadLegacy", overload);
2223   if (!func)
2224      return NULL;
2225   return dxil_emit_call(&ctx->mod, func, args, ARRAY_SIZE(args));
2226}
2227
2228static bool
2229emit_barrier(struct ntd_context *ctx, nir_intrinsic_instr *intr)
2230{
2231   const struct dxil_value *opcode, *mode;
2232   const struct dxil_func *func;
2233   uint32_t flags = 0;
2234
2235   if (nir_intrinsic_execution_scope(intr) == NIR_SCOPE_WORKGROUP)
2236      flags |= DXIL_BARRIER_MODE_SYNC_THREAD_GROUP;
2237
2238   nir_variable_mode modes = nir_intrinsic_memory_modes(intr);
2239   nir_scope mem_scope = nir_intrinsic_memory_scope(intr);
2240
2241   /* Currently vtn uses uniform to indicate image memory, which DXIL considers global */
2242   if (modes & nir_var_uniform)
2243      modes |= nir_var_mem_global;
2244
2245   if (modes & (nir_var_mem_ssbo | nir_var_mem_global)) {
2246      if (mem_scope > NIR_SCOPE_WORKGROUP)
2247         flags |= DXIL_BARRIER_MODE_UAV_FENCE_GLOBAL;
2248      else
2249         flags |= DXIL_BARRIER_MODE_UAV_FENCE_THREAD_GROUP;
2250   }
2251
2252   if (modes & nir_var_mem_shared)
2253      flags |= DXIL_BARRIER_MODE_GROUPSHARED_MEM_FENCE;
2254
2255   func = dxil_get_function(&ctx->mod, "dx.op.barrier", DXIL_NONE);
2256   if (!func)
2257      return false;
2258
2259   opcode = dxil_module_get_int32_const(&ctx->mod, DXIL_INTR_BARRIER);
2260   if (!opcode)
2261      return false;
2262
2263   mode = dxil_module_get_int32_const(&ctx->mod, flags);
2264   if (!mode)
2265      return false;
2266
2267   const struct dxil_value *args[] = { opcode, mode };
2268
2269   return dxil_emit_call_void(&ctx->mod, func,
2270                              args, ARRAY_SIZE(args));
2271}
2272
2273static bool
2274emit_load_global_invocation_id(struct ntd_context *ctx,
2275                                    nir_intrinsic_instr *intr)
2276{
2277   assert(intr->dest.is_ssa);
2278   nir_component_mask_t comps = nir_ssa_def_components_read(&intr->dest.ssa);
2279
2280   for (int i = 0; i < nir_intrinsic_dest_components(intr); i++) {
2281      if (comps & (1 << i)) {
2282         const struct dxil_value *idx = dxil_module_get_int32_const(&ctx->mod, i);
2283         if (!idx)
2284            return false;
2285         const struct dxil_value *globalid = emit_threadid_call(ctx, idx);
2286
2287         if (!globalid)
2288            return false;
2289
2290         store_dest_value(ctx, &intr->dest, i, globalid);
2291      }
2292   }
2293   return true;
2294}
2295
2296static bool
2297emit_load_local_invocation_id(struct ntd_context *ctx,
2298                              nir_intrinsic_instr *intr)
2299{
2300   assert(intr->dest.is_ssa);
2301   nir_component_mask_t comps = nir_ssa_def_components_read(&intr->dest.ssa);
2302
2303   for (int i = 0; i < nir_intrinsic_dest_components(intr); i++) {
2304      if (comps & (1 << i)) {
2305         const struct dxil_value
2306            *idx = dxil_module_get_int32_const(&ctx->mod, i);
2307         if (!idx)
2308            return false;
2309         const struct dxil_value
2310            *threadidingroup = emit_threadidingroup_call(ctx, idx);
2311         if (!threadidingroup)
2312            return false;
2313         store_dest_value(ctx, &intr->dest, i, threadidingroup);
2314      }
2315   }
2316   return true;
2317}
2318
2319static bool
2320emit_load_local_invocation_index(struct ntd_context *ctx,
2321                                 nir_intrinsic_instr *intr)
2322{
2323   assert(intr->dest.is_ssa);
2324
2325   const struct dxil_value
2326      *flattenedthreadidingroup = emit_flattenedthreadidingroup_call(ctx);
2327   if (!flattenedthreadidingroup)
2328      return false;
2329   store_dest_value(ctx, &intr->dest, 0, flattenedthreadidingroup);
2330
2331   return true;
2332}
2333
2334static bool
2335emit_load_local_workgroup_id(struct ntd_context *ctx,
2336                              nir_intrinsic_instr *intr)
2337{
2338   assert(intr->dest.is_ssa);
2339   nir_component_mask_t comps = nir_ssa_def_components_read(&intr->dest.ssa);
2340
2341   for (int i = 0; i < nir_intrinsic_dest_components(intr); i++) {
2342      if (comps & (1 << i)) {
2343         const struct dxil_value *idx = dxil_module_get_int32_const(&ctx->mod, i);
2344         if (!idx)
2345            return false;
2346         const struct dxil_value *groupid = emit_groupid_call(ctx, idx);
2347         if (!groupid)
2348            return false;
2349         store_dest_value(ctx, &intr->dest, i, groupid);
2350      }
2351   }
2352   return true;
2353}
2354
2355static bool
2356emit_load_unary_external_function(struct ntd_context *ctx,
2357                                  nir_intrinsic_instr *intr, const char *name,
2358                                  int32_t dxil_intr)
2359{
2360   const struct dxil_func *func =
2361      dxil_get_function(&ctx->mod, name, DXIL_I32);
2362   if (!func)
2363      return false;
2364
2365   const struct dxil_value *opcode =
2366      dxil_module_get_int32_const(&ctx->mod, dxil_intr);
2367   if (!opcode)
2368      return false;
2369
2370   const struct dxil_value *args[] = {opcode};
2371
2372   const struct dxil_value *value =
2373      dxil_emit_call(&ctx->mod, func, args, ARRAY_SIZE(args));
2374   store_dest_value(ctx, &intr->dest, 0, value);
2375
2376   return true;
2377}
2378
2379static const struct dxil_value *
2380get_int32_undef(struct dxil_module *m)
2381{
2382   const struct dxil_type *int32_type =
2383      dxil_module_get_int_type(m, 32);
2384   if (!int32_type)
2385      return NULL;
2386
2387   return dxil_module_get_undef(m, int32_type);
2388}
2389
2390static const struct dxil_value *
2391emit_gep_for_index(struct ntd_context *ctx, const nir_variable *var,
2392                   const struct dxil_value *index)
2393{
2394   assert(var->data.mode == nir_var_shader_temp);
2395
2396   struct hash_entry *he = _mesa_hash_table_search(ctx->consts, var);
2397   assert(he != NULL);
2398   const struct dxil_value *ptr = he->data;
2399
2400   const struct dxil_value *zero = dxil_module_get_int32_const(&ctx->mod, 0);
2401   if (!zero)
2402      return NULL;
2403
2404   const struct dxil_value *ops[] = { ptr, zero, index };
2405   return dxil_emit_gep_inbounds(&ctx->mod, ops, ARRAY_SIZE(ops));
2406}
2407
2408static const struct dxil_value *
2409get_ubo_ssbo_handle(struct ntd_context *ctx, nir_src *src, enum dxil_resource_class class, unsigned base_binding)
2410{
2411   /* This source might be one of:
2412    * 1. Constant resource index - just look it up in precomputed handle arrays
2413    *    If it's null in that array, create a handle, and store the result
2414    * 2. A handle from load_vulkan_descriptor - just get the stored SSA value
2415    * 3. Dynamic resource index - create a handle for it here
2416    */
2417   assert(src->ssa->num_components == 1 && src->ssa->bit_size == 32);
2418   nir_const_value *const_block_index = nir_src_as_const_value(*src);
2419   const struct dxil_value **handle_entry = NULL;
2420   if (const_block_index) {
2421      assert(!ctx->opts->vulkan_environment);
2422      switch (class) {
2423      case DXIL_RESOURCE_CLASS_CBV:
2424         handle_entry = &ctx->cbv_handles[const_block_index->u32];
2425         break;
2426      case DXIL_RESOURCE_CLASS_UAV:
2427         handle_entry = &ctx->uav_handles[const_block_index->u32];
2428         break;
2429      case DXIL_RESOURCE_CLASS_SRV:
2430         handle_entry = &ctx->srv_handles[const_block_index->u32];
2431         break;
2432      default:
2433         unreachable("Unexpected resource class");
2434      }
2435   }
2436
2437   if (handle_entry && *handle_entry)
2438      return *handle_entry;
2439
2440   const struct dxil_value *value = get_src_ssa(ctx, src->ssa, 0);
2441   if (ctx->opts->vulkan_environment) {
2442      return value;
2443   }
2444
2445   const struct dxil_value *handle = emit_createhandle_call(ctx, class,
2446      get_resource_id(ctx, class, 0, base_binding), value, !const_block_index);
2447   if (handle_entry)
2448      *handle_entry = handle;
2449
2450   return handle;
2451}
2452
2453static bool
2454emit_load_ssbo(struct ntd_context *ctx, nir_intrinsic_instr *intr)
2455{
2456   const struct dxil_value *int32_undef = get_int32_undef(&ctx->mod);
2457
2458   nir_variable *var = nir_get_binding_variable(ctx->shader, nir_chase_binding(intr->src[0]));
2459   enum dxil_resource_class class = DXIL_RESOURCE_CLASS_UAV;
2460   if (var && var->data.access & ACCESS_NON_WRITEABLE)
2461      class = DXIL_RESOURCE_CLASS_SRV;
2462
2463   const struct dxil_value *handle = get_ubo_ssbo_handle(ctx, &intr->src[0], class, 0);
2464   const struct dxil_value *offset =
2465      get_src(ctx, &intr->src[1], 0, nir_type_uint);
2466   if (!int32_undef || !handle || !offset)
2467      return false;
2468
2469   assert(nir_src_bit_size(intr->src[0]) == 32);
2470   assert(nir_intrinsic_dest_components(intr) <= 4);
2471
2472   const struct dxil_value *coord[2] = {
2473      offset,
2474      int32_undef
2475   };
2476
2477   const struct dxil_value *load = emit_bufferload_call(ctx, handle, coord, DXIL_I32);
2478   if (!load)
2479      return false;
2480
2481   for (int i = 0; i < nir_intrinsic_dest_components(intr); i++) {
2482      const struct dxil_value *val =
2483         dxil_emit_extractval(&ctx->mod, load, i);
2484      if (!val)
2485         return false;
2486      store_dest_value(ctx, &intr->dest, i, val);
2487   }
2488   return true;
2489}
2490
2491static bool
2492emit_store_ssbo(struct ntd_context *ctx, nir_intrinsic_instr *intr)
2493{
2494   const struct dxil_value* handle = get_ubo_ssbo_handle(ctx, &intr->src[1], DXIL_RESOURCE_CLASS_UAV, 0);
2495   const struct dxil_value *offset =
2496      get_src(ctx, &intr->src[2], 0, nir_type_uint);
2497   if (!handle || !offset)
2498      return false;
2499
2500   assert(nir_src_bit_size(intr->src[0]) == 32);
2501   unsigned num_components = nir_src_num_components(intr->src[0]);
2502   assert(num_components <= 4);
2503   const struct dxil_value *value[4];
2504   for (unsigned i = 0; i < num_components; ++i) {
2505      value[i] = get_src(ctx, &intr->src[0], i, nir_type_uint);
2506      if (!value[i])
2507         return false;
2508   }
2509
2510   const struct dxil_value *int32_undef = get_int32_undef(&ctx->mod);
2511   if (!int32_undef)
2512      return false;
2513
2514   const struct dxil_value *coord[2] = {
2515      offset,
2516      int32_undef
2517   };
2518
2519   for (int i = num_components; i < 4; ++i)
2520      value[i] = int32_undef;
2521
2522   const struct dxil_value *write_mask =
2523      dxil_module_get_int8_const(&ctx->mod, (1u << num_components) - 1);
2524   if (!write_mask)
2525      return false;
2526
2527   return emit_bufferstore_call(ctx, handle, coord, value, write_mask, DXIL_I32);
2528}
2529
2530static bool
2531emit_store_ssbo_masked(struct ntd_context *ctx, nir_intrinsic_instr *intr)
2532{
2533   const struct dxil_value *value =
2534      get_src(ctx, &intr->src[0], 0, nir_type_uint);
2535   const struct dxil_value *mask =
2536      get_src(ctx, &intr->src[1], 0, nir_type_uint);
2537   const struct dxil_value* handle = get_ubo_ssbo_handle(ctx, &intr->src[2], DXIL_RESOURCE_CLASS_UAV, 0);
2538   const struct dxil_value *offset =
2539      get_src(ctx, &intr->src[3], 0, nir_type_uint);
2540   if (!value || !mask || !handle || !offset)
2541      return false;
2542
2543   const struct dxil_value *int32_undef = get_int32_undef(&ctx->mod);
2544   if (!int32_undef)
2545      return false;
2546
2547   const struct dxil_value *coord[3] = {
2548      offset, int32_undef, int32_undef
2549   };
2550
2551   return
2552      emit_atomic_binop(ctx, handle, DXIL_ATOMIC_AND, coord, mask) != NULL &&
2553      emit_atomic_binop(ctx, handle, DXIL_ATOMIC_OR, coord, value) != NULL;
2554}
2555
2556static bool
2557emit_store_shared(struct ntd_context *ctx, nir_intrinsic_instr *intr)
2558{
2559   const struct dxil_value *zero, *index;
2560
2561   /* All shared mem accesses should have been lowered to scalar 32bit
2562    * accesses.
2563    */
2564   assert(nir_src_bit_size(intr->src[0]) == 32);
2565   assert(nir_src_num_components(intr->src[0]) == 1);
2566
2567   zero = dxil_module_get_int32_const(&ctx->mod, 0);
2568   if (!zero)
2569      return false;
2570
2571   if (intr->intrinsic == nir_intrinsic_store_shared_dxil)
2572      index = get_src(ctx, &intr->src[1], 0, nir_type_uint);
2573   else
2574      index = get_src(ctx, &intr->src[2], 0, nir_type_uint);
2575   if (!index)
2576      return false;
2577
2578   const struct dxil_value *ops[] = { ctx->sharedvars, zero, index };
2579   const struct dxil_value *ptr, *value;
2580
2581   ptr = dxil_emit_gep_inbounds(&ctx->mod, ops, ARRAY_SIZE(ops));
2582   if (!ptr)
2583      return false;
2584
2585   value = get_src(ctx, &intr->src[0], 0, nir_type_uint);
2586   if (!value)
2587      return false;
2588
2589   if (intr->intrinsic == nir_intrinsic_store_shared_dxil)
2590      return dxil_emit_store(&ctx->mod, value, ptr, 4, false);
2591
2592   const struct dxil_value *mask = get_src(ctx, &intr->src[1], 0, nir_type_uint);
2593   if (!mask)
2594      return false;
2595
2596   if (!dxil_emit_atomicrmw(&ctx->mod, mask, ptr, DXIL_RMWOP_AND, false,
2597                            DXIL_ATOMIC_ORDERING_ACQREL,
2598                            DXIL_SYNC_SCOPE_CROSSTHREAD))
2599      return false;
2600
2601   if (!dxil_emit_atomicrmw(&ctx->mod, value, ptr, DXIL_RMWOP_OR, false,
2602                            DXIL_ATOMIC_ORDERING_ACQREL,
2603                            DXIL_SYNC_SCOPE_CROSSTHREAD))
2604      return false;
2605
2606   return true;
2607}
2608
2609static bool
2610emit_store_scratch(struct ntd_context *ctx, nir_intrinsic_instr *intr)
2611{
2612   const struct dxil_value *zero, *index;
2613
2614   /* All scratch mem accesses should have been lowered to scalar 32bit
2615    * accesses.
2616    */
2617   assert(nir_src_bit_size(intr->src[0]) == 32);
2618   assert(nir_src_num_components(intr->src[0]) == 1);
2619
2620   zero = dxil_module_get_int32_const(&ctx->mod, 0);
2621   if (!zero)
2622      return false;
2623
2624   index = get_src(ctx, &intr->src[1], 0, nir_type_uint);
2625   if (!index)
2626      return false;
2627
2628   const struct dxil_value *ops[] = { ctx->scratchvars, zero, index };
2629   const struct dxil_value *ptr, *value;
2630
2631   ptr = dxil_emit_gep_inbounds(&ctx->mod, ops, ARRAY_SIZE(ops));
2632   if (!ptr)
2633      return false;
2634
2635   value = get_src(ctx, &intr->src[0], 0, nir_type_uint);
2636   if (!value)
2637      return false;
2638
2639   return dxil_emit_store(&ctx->mod, value, ptr, 4, false);
2640}
2641
2642static bool
2643emit_load_ubo(struct ntd_context *ctx, nir_intrinsic_instr *intr)
2644{
2645   const struct dxil_value* handle = get_ubo_ssbo_handle(ctx, &intr->src[0], DXIL_RESOURCE_CLASS_CBV, 0);
2646   if (!handle)
2647      return false;
2648
2649   const struct dxil_value *offset;
2650   nir_const_value *const_offset = nir_src_as_const_value(intr->src[1]);
2651   if (const_offset) {
2652      offset = dxil_module_get_int32_const(&ctx->mod, const_offset->i32 >> 4);
2653   } else {
2654      const struct dxil_value *offset_src = get_src(ctx, &intr->src[1], 0, nir_type_uint);
2655      const struct dxil_value *c4 = dxil_module_get_int32_const(&ctx->mod, 4);
2656      if (!offset_src || !c4)
2657         return false;
2658
2659      offset = dxil_emit_binop(&ctx->mod, DXIL_BINOP_ASHR, offset_src, c4, 0);
2660   }
2661
2662   const struct dxil_value *agg = load_ubo(ctx, handle, offset, DXIL_F32);
2663
2664   if (!agg)
2665      return false;
2666
2667   for (unsigned i = 0; i < nir_dest_num_components(intr->dest); ++i) {
2668      const struct dxil_value *retval = dxil_emit_extractval(&ctx->mod, agg, i);
2669      store_dest(ctx, &intr->dest, i, retval,
2670                 nir_dest_bit_size(intr->dest) > 1 ? nir_type_float : nir_type_bool);
2671   }
2672   return true;
2673}
2674
2675static bool
2676emit_load_ubo_dxil(struct ntd_context *ctx, nir_intrinsic_instr *intr)
2677{
2678   assert(nir_dest_num_components(intr->dest) <= 4);
2679   assert(nir_dest_bit_size(intr->dest) == 32);
2680
2681   const struct dxil_value* handle = get_ubo_ssbo_handle(ctx, &intr->src[0], DXIL_RESOURCE_CLASS_CBV, 0);
2682   const struct dxil_value *offset =
2683      get_src(ctx, &intr->src[1], 0, nir_type_uint);
2684
2685   if (!handle || !offset)
2686      return false;
2687
2688   const struct dxil_value *agg = load_ubo(ctx, handle, offset, DXIL_I32);
2689   if (!agg)
2690      return false;
2691
2692   for (unsigned i = 0; i < nir_dest_num_components(intr->dest); i++)
2693      store_dest_value(ctx, &intr->dest, i,
2694                       dxil_emit_extractval(&ctx->mod, agg, i));
2695
2696   return true;
2697}
2698
2699static bool
2700emit_store_output(struct ntd_context *ctx, nir_intrinsic_instr *intr,
2701                  nir_variable *output)
2702{
2703   nir_alu_type out_type = nir_get_nir_type_for_glsl_base_type(glsl_get_base_type(output->type));
2704   enum overload_type overload = DXIL_F32;
2705   if (output->data.compact)
2706      out_type = nir_type_float;
2707   else
2708      overload = get_overload(out_type, glsl_get_bit_size(output->type));
2709   const struct dxil_func *func = dxil_get_function(&ctx->mod, "dx.op.storeOutput", overload);
2710
2711   if (!func)
2712      return false;
2713
2714   const struct dxil_value *opcode = dxil_module_get_int32_const(&ctx->mod, DXIL_INTR_STORE_OUTPUT);
2715   const struct dxil_value *output_id = dxil_module_get_int32_const(&ctx->mod, (int)output->data.driver_location);
2716   const struct dxil_value *row = dxil_module_get_int32_const(&ctx->mod, 0);
2717
2718   bool success = true;
2719   if (output->data.compact) {
2720      nir_deref_instr *array_deref = nir_instr_as_deref(intr->src[0].ssa->parent_instr);
2721      unsigned array_index = nir_src_as_uint(array_deref->arr.index);
2722
2723      const struct dxil_value *col = dxil_module_get_int8_const(&ctx->mod, array_index);
2724      const struct dxil_value *value = get_src(ctx, &intr->src[1], 0, out_type);
2725      if (!col || !value)
2726         return false;
2727
2728      const struct dxil_value *args[] = {
2729         opcode, output_id, row, col, value
2730      };
2731      success = dxil_emit_call_void(&ctx->mod, func, args, ARRAY_SIZE(args));
2732   } else {
2733      uint32_t writemask = nir_intrinsic_write_mask(intr);
2734      for (unsigned i = 0; i < nir_src_num_components(intr->src[1]) && success; ++i) {
2735         if (writemask & (1 << i)) {
2736            const struct dxil_value *col = dxil_module_get_int8_const(&ctx->mod, i);
2737            const struct dxil_value *value = get_src(ctx, &intr->src[1], i, out_type);
2738            if (!col || !value)
2739               return false;
2740
2741            const struct dxil_value *args[] = {
2742               opcode, output_id, row, col, value
2743            };
2744            success &= dxil_emit_call_void(&ctx->mod, func, args, ARRAY_SIZE(args));
2745         }
2746      }
2747   }
2748   return success;
2749}
2750
2751static bool
2752emit_store_deref(struct ntd_context *ctx, nir_intrinsic_instr *intr)
2753{
2754   nir_deref_instr *deref = nir_src_as_deref(intr->src[0]);
2755   nir_variable *var = nir_deref_instr_get_variable(deref);
2756
2757   switch (var->data.mode) {
2758   case nir_var_shader_out:
2759      return emit_store_output(ctx, intr, var);
2760
2761   default:
2762      unreachable("unsupported nir_variable_mode");
2763   }
2764}
2765
2766static bool
2767emit_load_input_array(struct ntd_context *ctx, nir_intrinsic_instr *intr, nir_variable *var, nir_src *index)
2768{
2769   assert(var);
2770   const struct dxil_value *opcode = dxil_module_get_int32_const(&ctx->mod, DXIL_INTR_LOAD_INPUT);
2771   const struct dxil_value *input_id = dxil_module_get_int32_const(&ctx->mod, var->data.driver_location);
2772   const struct dxil_value *vertex_id;
2773   const struct dxil_value *row;
2774
2775   if (ctx->mod.shader_kind == DXIL_GEOMETRY_SHADER) {
2776      vertex_id = get_src(ctx, index, 0, nir_type_int);
2777      row = dxil_module_get_int32_const(&ctx->mod, 0);
2778   } else {
2779      const struct dxil_type *int32_type = dxil_module_get_int_type(&ctx->mod, 32);
2780      vertex_id = dxil_module_get_undef(&ctx->mod, int32_type);
2781      row = get_src(ctx, index, 0, nir_type_int);
2782   }
2783
2784   if (!opcode || !input_id || !vertex_id || !row)
2785      return false;
2786
2787   nir_alu_type out_type = nir_get_nir_type_for_glsl_base_type(glsl_get_base_type(glsl_get_array_element(var->type)));
2788   enum overload_type overload = get_overload(out_type, glsl_get_bit_size(glsl_get_array_element(var->type)));
2789
2790   const struct dxil_func *func = dxil_get_function(&ctx->mod, "dx.op.loadInput", overload);
2791
2792   if (!func)
2793      return false;
2794
2795   for (unsigned i = 0; i < nir_dest_num_components(intr->dest); ++i) {
2796      const struct dxil_value *comp = dxil_module_get_int8_const(&ctx->mod, i);
2797      if (!comp)
2798         return false;
2799
2800      const struct dxil_value *args[] = {
2801         opcode, input_id, row, comp, vertex_id
2802      };
2803
2804      const struct dxil_value *retval = dxil_emit_call(&ctx->mod, func, args, ARRAY_SIZE(args));
2805      if (!retval)
2806         return false;
2807      store_dest(ctx, &intr->dest, i, retval, out_type);
2808   }
2809   return true;
2810}
2811
2812static bool
2813emit_load_compact_input_array(struct ntd_context *ctx, nir_intrinsic_instr *intr, nir_variable *var, nir_deref_instr *deref)
2814{
2815   assert(var);
2816   const struct dxil_value *opcode = dxil_module_get_int32_const(&ctx->mod, DXIL_INTR_LOAD_INPUT);
2817   const struct dxil_value *input_id = dxil_module_get_int32_const(&ctx->mod, var->data.driver_location);
2818   const struct dxil_value *row = dxil_module_get_int32_const(&ctx->mod, 0);
2819   const struct dxil_value *vertex_id;
2820
2821   nir_src *col = &deref->arr.index;
2822   nir_src_is_const(*col);
2823
2824   if (ctx->mod.shader_kind == DXIL_GEOMETRY_SHADER) {
2825      nir_deref_instr *deref_parent = nir_deref_instr_parent(deref);
2826      assert(deref_parent->deref_type == nir_deref_type_array);
2827
2828      vertex_id = get_src(ctx, &deref_parent->arr.index, 0, nir_type_int);
2829   } else {
2830      const struct dxil_type *int32_type = dxil_module_get_int_type(&ctx->mod, 32);
2831      vertex_id = dxil_module_get_undef(&ctx->mod, int32_type);
2832   }
2833
2834   if (!opcode || !input_id || !row || !vertex_id)
2835      return false;
2836
2837   nir_alu_type out_type = nir_type_float;
2838   enum overload_type overload = get_overload(out_type, 32);
2839
2840   const struct dxil_func *func = dxil_get_function(&ctx->mod, "dx.op.loadInput", overload);
2841
2842   if (!func)
2843      return false;
2844
2845   const struct dxil_value *comp = dxil_module_get_int8_const(&ctx->mod, nir_src_as_int(*col));
2846   if (!comp)
2847      return false;
2848
2849   const struct dxil_value *args[] = {
2850      opcode, input_id, row, comp, vertex_id
2851   };
2852
2853   const struct dxil_value *retval = dxil_emit_call(&ctx->mod, func, args, ARRAY_SIZE(args));
2854   if (!retval)
2855      return false;
2856   store_dest(ctx, &intr->dest, 0, retval, out_type);
2857   return true;
2858}
2859
2860static bool
2861emit_load_input_interpolated(struct ntd_context *ctx, nir_intrinsic_instr *intr, nir_variable *var)
2862{
2863   assert(var);
2864   const struct dxil_value *opcode = dxil_module_get_int32_const(&ctx->mod, DXIL_INTR_LOAD_INPUT);
2865   const struct dxil_value *input_id = dxil_module_get_int32_const(&ctx->mod, var->data.driver_location);
2866   const struct dxil_value *row = dxil_module_get_int32_const(&ctx->mod, 0);
2867   const struct dxil_type *int32_type = dxil_module_get_int_type(&ctx->mod, 32);
2868   const struct dxil_value *vertex_id = dxil_module_get_undef(&ctx->mod, int32_type);
2869
2870   if (!opcode || !input_id || !row || !int32_type || !vertex_id)
2871      return false;
2872
2873   nir_alu_type out_type = nir_get_nir_type_for_glsl_base_type(glsl_get_base_type(var->type));
2874   enum overload_type overload = get_overload(out_type, glsl_get_bit_size(var->type));
2875
2876   const struct dxil_func *func = dxil_get_function(&ctx->mod, "dx.op.loadInput", overload);
2877
2878   if (!func)
2879      return false;
2880
2881   for (unsigned i = 0; i < nir_dest_num_components(intr->dest); ++i) {
2882      const struct dxil_value *comp = dxil_module_get_int8_const(&ctx->mod, i);
2883
2884      const struct dxil_value *args[] = {
2885         opcode, input_id, row, comp, vertex_id
2886      };
2887
2888      const struct dxil_value *retval = dxil_emit_call(&ctx->mod, func, args, ARRAY_SIZE(args));
2889      if (!retval)
2890         return false;
2891      store_dest(ctx, &intr->dest, i, retval, out_type);
2892   }
2893   return true;
2894}
2895
2896static bool
2897emit_load_input_flat(struct ntd_context *ctx, nir_intrinsic_instr *intr, nir_variable* var)
2898{
2899   const struct dxil_value *opcode = dxil_module_get_int32_const(&ctx->mod, DXIL_INTR_ATTRIBUTE_AT_VERTEX);
2900   const struct dxil_value *input_id = dxil_module_get_int32_const(&ctx->mod, (int)var->data.driver_location);
2901   const struct dxil_value *row = dxil_module_get_int32_const(&ctx->mod, 0);
2902   const struct dxil_value *vertex_id = dxil_module_get_int8_const(&ctx->mod, ctx->opts->provoking_vertex);
2903
2904   nir_alu_type out_type = nir_get_nir_type_for_glsl_base_type(glsl_get_base_type(var->type));
2905   enum overload_type overload = get_overload(out_type, glsl_get_bit_size(var->type));
2906
2907   const struct dxil_func *func = dxil_get_function(&ctx->mod, "dx.op.attributeAtVertex", overload);
2908   if (!func)
2909      return false;
2910
2911   for (unsigned i = 0; i < nir_dest_num_components(intr->dest); ++i) {
2912      const struct dxil_value *comp = dxil_module_get_int8_const(&ctx->mod, i);
2913      const struct dxil_value *args[] = {
2914         opcode, input_id, row, comp, vertex_id
2915      };
2916
2917      const struct dxil_value *retval = dxil_emit_call(&ctx->mod, func, args, ARRAY_SIZE(args));
2918      if (!retval)
2919         return false;
2920
2921      store_dest(ctx, &intr->dest, i, retval, out_type);
2922   }
2923   return true;
2924}
2925
2926static bool
2927emit_load_input(struct ntd_context *ctx, nir_intrinsic_instr *intr,
2928                nir_variable *input)
2929{
2930   if (ctx->mod.shader_kind != DXIL_PIXEL_SHADER ||
2931       input->data.interpolation != INTERP_MODE_FLAT ||
2932       !ctx->opts->interpolate_at_vertex ||
2933       ctx->opts->provoking_vertex == 0 ||
2934       glsl_type_is_integer(input->type))
2935      return emit_load_input_interpolated(ctx, intr, input);
2936   else
2937      return emit_load_input_flat(ctx, intr, input);
2938}
2939
2940static bool
2941emit_load_ptr(struct ntd_context *ctx, nir_intrinsic_instr *intr)
2942{
2943   struct nir_variable *var =
2944      nir_deref_instr_get_variable(nir_src_as_deref(intr->src[0]));
2945
2946   const struct dxil_value *index =
2947      get_src(ctx, &intr->src[1], 0, nir_type_uint);
2948   if (!index)
2949      return false;
2950
2951   const struct dxil_value *ptr = emit_gep_for_index(ctx, var, index);
2952   if (!ptr)
2953      return false;
2954
2955   const struct dxil_value *retval =
2956      dxil_emit_load(&ctx->mod, ptr, 4, false);
2957   if (!retval)
2958      return false;
2959
2960   store_dest(ctx, &intr->dest, 0, retval, nir_type_uint);
2961   return true;
2962}
2963
2964static bool
2965emit_load_shared(struct ntd_context *ctx, nir_intrinsic_instr *intr)
2966{
2967   const struct dxil_value *zero, *index;
2968   unsigned bit_size = nir_dest_bit_size(intr->dest);
2969   unsigned align = bit_size / 8;
2970
2971   /* All shared mem accesses should have been lowered to scalar 32bit
2972    * accesses.
2973    */
2974   assert(bit_size == 32);
2975   assert(nir_dest_num_components(intr->dest) == 1);
2976
2977   zero = dxil_module_get_int32_const(&ctx->mod, 0);
2978   if (!zero)
2979      return false;
2980
2981   index = get_src(ctx, &intr->src[0], 0, nir_type_uint);
2982   if (!index)
2983      return false;
2984
2985   const struct dxil_value *ops[] = { ctx->sharedvars, zero, index };
2986   const struct dxil_value *ptr, *retval;
2987
2988   ptr = dxil_emit_gep_inbounds(&ctx->mod, ops, ARRAY_SIZE(ops));
2989   if (!ptr)
2990      return false;
2991
2992   retval = dxil_emit_load(&ctx->mod, ptr, align, false);
2993   if (!retval)
2994      return false;
2995
2996   store_dest(ctx, &intr->dest, 0, retval, nir_type_uint);
2997   return true;
2998}
2999
3000static bool
3001emit_load_scratch(struct ntd_context *ctx, nir_intrinsic_instr *intr)
3002{
3003   const struct dxil_value *zero, *index;
3004   unsigned bit_size = nir_dest_bit_size(intr->dest);
3005   unsigned align = bit_size / 8;
3006
3007   /* All scratch mem accesses should have been lowered to scalar 32bit
3008    * accesses.
3009    */
3010   assert(bit_size == 32);
3011   assert(nir_dest_num_components(intr->dest) == 1);
3012
3013   zero = dxil_module_get_int32_const(&ctx->mod, 0);
3014   if (!zero)
3015      return false;
3016
3017   index = get_src(ctx, &intr->src[0], 0, nir_type_uint);
3018   if (!index)
3019      return false;
3020
3021   const struct dxil_value *ops[] = { ctx->scratchvars, zero, index };
3022   const struct dxil_value *ptr, *retval;
3023
3024   ptr = dxil_emit_gep_inbounds(&ctx->mod, ops, ARRAY_SIZE(ops));
3025   if (!ptr)
3026      return false;
3027
3028   retval = dxil_emit_load(&ctx->mod, ptr, align, false);
3029   if (!retval)
3030      return false;
3031
3032   store_dest(ctx, &intr->dest, 0, retval, nir_type_uint);
3033   return true;
3034}
3035
3036static bool
3037emit_load_deref(struct ntd_context *ctx, nir_intrinsic_instr *intr)
3038{
3039   assert(intr->src[0].is_ssa);
3040   nir_deref_instr *deref = nir_instr_as_deref(intr->src[0].ssa->parent_instr);
3041   nir_variable *var = nir_deref_instr_get_variable(deref);
3042
3043   switch (var->data.mode) {
3044   case nir_var_shader_in:
3045      if (glsl_type_is_array(var->type)) {
3046         if (var->data.compact)
3047            return emit_load_compact_input_array(ctx, intr, var, deref);
3048         else
3049            return emit_load_input_array(ctx, intr, var, &deref->arr.index);
3050      }
3051      return emit_load_input(ctx, intr, var);
3052
3053   default:
3054      unreachable("unsupported nir_variable_mode");
3055   }
3056}
3057
3058static bool
3059emit_discard_if_with_value(struct ntd_context *ctx, const struct dxil_value *value)
3060{
3061   const struct dxil_value *opcode = dxil_module_get_int32_const(&ctx->mod, DXIL_INTR_DISCARD);
3062   if (!opcode)
3063      return false;
3064
3065   const struct dxil_value *args[] = {
3066     opcode,
3067     value
3068   };
3069
3070   const struct dxil_func *func = dxil_get_function(&ctx->mod, "dx.op.discard", DXIL_NONE);
3071   if (!func)
3072      return false;
3073
3074   return dxil_emit_call_void(&ctx->mod, func, args, ARRAY_SIZE(args));
3075}
3076
3077static bool
3078emit_discard_if(struct ntd_context *ctx, nir_intrinsic_instr *intr)
3079{
3080   const struct dxil_value *value = get_src(ctx, &intr->src[0], 0, nir_type_bool);
3081   if (!value)
3082      return false;
3083
3084   return emit_discard_if_with_value(ctx, value);
3085}
3086
3087static bool
3088emit_discard(struct ntd_context *ctx)
3089{
3090   const struct dxil_value *value = dxil_module_get_int1_const(&ctx->mod, true);
3091   return emit_discard_if_with_value(ctx, value);
3092}
3093
3094static bool
3095emit_emit_vertex(struct ntd_context *ctx, nir_intrinsic_instr *intr)
3096{
3097   const struct dxil_value *opcode = dxil_module_get_int32_const(&ctx->mod, DXIL_INTR_EMIT_STREAM);
3098   const struct dxil_value *stream_id = dxil_module_get_int8_const(&ctx->mod, nir_intrinsic_stream_id(intr));
3099   if (!opcode || !stream_id)
3100      return false;
3101
3102   const struct dxil_value *args[] = {
3103     opcode,
3104     stream_id
3105   };
3106
3107   const struct dxil_func *func = dxil_get_function(&ctx->mod, "dx.op.emitStream", DXIL_NONE);
3108   if (!func)
3109      return false;
3110
3111   return dxil_emit_call_void(&ctx->mod, func, args, ARRAY_SIZE(args));
3112}
3113
3114static bool
3115emit_end_primitive(struct ntd_context *ctx, nir_intrinsic_instr *intr)
3116{
3117   const struct dxil_value *opcode = dxil_module_get_int32_const(&ctx->mod, DXIL_INTR_CUT_STREAM);
3118   const struct dxil_value *stream_id = dxil_module_get_int8_const(&ctx->mod, nir_intrinsic_stream_id(intr));
3119   if (!opcode || !stream_id)
3120      return false;
3121
3122   const struct dxil_value *args[] = {
3123     opcode,
3124     stream_id
3125   };
3126
3127   const struct dxil_func *func = dxil_get_function(&ctx->mod, "dx.op.cutStream", DXIL_NONE);
3128   if (!func)
3129      return false;
3130
3131   return dxil_emit_call_void(&ctx->mod, func, args, ARRAY_SIZE(args));
3132}
3133
3134static bool
3135emit_image_store(struct ntd_context *ctx, nir_intrinsic_instr *intr)
3136{
3137   const struct dxil_value *handle;
3138   bool is_array = false;
3139   if (ctx->opts->vulkan_environment) {
3140      assert(intr->intrinsic == nir_intrinsic_image_deref_store);
3141      handle = get_src_ssa(ctx, intr->src[0].ssa, 0);
3142      is_array = glsl_sampler_type_is_array(nir_src_as_deref(intr->src[0])->type);
3143   } else {
3144      assert(intr->intrinsic == nir_intrinsic_image_store);
3145      int binding = nir_src_as_int(intr->src[0]);
3146      is_array = nir_intrinsic_image_array(intr);
3147      handle = ctx->uav_handles[binding];
3148   }
3149   if (!handle)
3150      return false;
3151
3152   const struct dxil_value *int32_undef = get_int32_undef(&ctx->mod);
3153   if (!int32_undef)
3154      return false;
3155
3156   const struct dxil_value *coord[3] = { int32_undef, int32_undef, int32_undef };
3157   enum glsl_sampler_dim image_dim = intr->intrinsic == nir_intrinsic_image_store ?
3158      nir_intrinsic_image_dim(intr) :
3159      glsl_get_sampler_dim(nir_src_as_deref(intr->src[0])->type);
3160   unsigned num_coords = glsl_get_sampler_dim_coordinate_components(image_dim);
3161   if (is_array)
3162      ++num_coords;
3163
3164   assert(num_coords <= nir_src_num_components(intr->src[1]));
3165   for (unsigned i = 0; i < num_coords; ++i) {
3166      coord[i] = get_src(ctx, &intr->src[1], i, nir_type_uint);
3167      if (!coord[i])
3168         return false;
3169   }
3170
3171   nir_alu_type in_type = nir_intrinsic_src_type(intr);
3172   enum overload_type overload = get_overload(in_type, 32);
3173
3174   assert(nir_src_bit_size(intr->src[3]) == 32);
3175   unsigned num_components = nir_src_num_components(intr->src[3]);
3176   assert(num_components <= 4);
3177   const struct dxil_value *value[4];
3178   for (unsigned i = 0; i < num_components; ++i) {
3179      value[i] = get_src(ctx, &intr->src[3], i, in_type);
3180      if (!value[i])
3181         return false;
3182   }
3183
3184   for (int i = num_components; i < 4; ++i)
3185      value[i] = int32_undef;
3186
3187   const struct dxil_value *write_mask =
3188      dxil_module_get_int8_const(&ctx->mod, (1u << num_components) - 1);
3189   if (!write_mask)
3190      return false;
3191
3192   if (image_dim == GLSL_SAMPLER_DIM_BUF) {
3193      coord[1] = int32_undef;
3194      return emit_bufferstore_call(ctx, handle, coord, value, write_mask, overload);
3195   } else
3196      return emit_texturestore_call(ctx, handle, coord, value, write_mask, overload);
3197}
3198
3199static bool
3200emit_image_load(struct ntd_context *ctx, nir_intrinsic_instr *intr)
3201{
3202   const struct dxil_value *handle;
3203   bool is_array = false;
3204   if (ctx->opts->vulkan_environment) {
3205      assert(intr->intrinsic == nir_intrinsic_image_deref_load);
3206      handle = get_src_ssa(ctx, intr->src[0].ssa, 0);
3207      is_array = glsl_sampler_type_is_array(nir_src_as_deref(intr->src[0])->type);
3208   } else {
3209      assert(intr->intrinsic == nir_intrinsic_image_load);
3210      int binding = nir_src_as_int(intr->src[0]);
3211      is_array = nir_intrinsic_image_array(intr);
3212      handle = ctx->uav_handles[binding];
3213   }
3214   if (!handle)
3215      return false;
3216
3217   const struct dxil_value *int32_undef = get_int32_undef(&ctx->mod);
3218   if (!int32_undef)
3219      return false;
3220
3221   const struct dxil_value *coord[3] = { int32_undef, int32_undef, int32_undef };
3222   enum glsl_sampler_dim image_dim = intr->intrinsic == nir_intrinsic_image_load ?
3223      nir_intrinsic_image_dim(intr) :
3224      glsl_get_sampler_dim(nir_src_as_deref(intr->src[0])->type);
3225   unsigned num_coords = glsl_get_sampler_dim_coordinate_components(image_dim);
3226   if (is_array)
3227      ++num_coords;
3228
3229   assert(num_coords <= nir_src_num_components(intr->src[1]));
3230   for (unsigned i = 0; i < num_coords; ++i) {
3231      coord[i] = get_src(ctx, &intr->src[1], i, nir_type_uint);
3232      if (!coord[i])
3233         return false;
3234   }
3235
3236   nir_alu_type out_type = nir_intrinsic_dest_type(intr);
3237   enum overload_type overload = get_overload(out_type, 32);
3238
3239   const struct dxil_value *load_result;
3240   if (image_dim == GLSL_SAMPLER_DIM_BUF) {
3241      coord[1] = int32_undef;
3242      load_result = emit_bufferload_call(ctx, handle, coord, overload);
3243   } else
3244      load_result = emit_textureload_call(ctx, handle, coord, overload);
3245
3246   if (!load_result)
3247      return false;
3248
3249   assert(nir_dest_bit_size(intr->dest) == 32);
3250   unsigned num_components = nir_dest_num_components(intr->dest);
3251   assert(num_components <= 4);
3252   for (unsigned i = 0; i < num_components; ++i) {
3253      const struct dxil_value *component = dxil_emit_extractval(&ctx->mod, load_result, i);
3254      if (!component)
3255         return false;
3256      store_dest(ctx, &intr->dest, i, component, out_type);
3257   }
3258
3259   if (num_components > 1)
3260      ctx->mod.feats.typed_uav_load_additional_formats = true;
3261
3262   return true;
3263}
3264
3265struct texop_parameters {
3266   const struct dxil_value *tex;
3267   const struct dxil_value *sampler;
3268   const struct dxil_value *bias, *lod_or_sample, *min_lod;
3269   const struct dxil_value *coord[4], *offset[3], *dx[3], *dy[3];
3270   const struct dxil_value *cmp;
3271   enum overload_type overload;
3272};
3273
3274static const struct dxil_value *
3275emit_texture_size(struct ntd_context *ctx, struct texop_parameters *params)
3276{
3277   const struct dxil_func *func = dxil_get_function(&ctx->mod, "dx.op.getDimensions", DXIL_NONE);
3278   if (!func)
3279      return false;
3280
3281   const struct dxil_value *args[] = {
3282      dxil_module_get_int32_const(&ctx->mod, DXIL_INTR_TEXTURE_SIZE),
3283      params->tex,
3284      params->lod_or_sample
3285   };
3286
3287   return dxil_emit_call(&ctx->mod, func, args, ARRAY_SIZE(args));
3288}
3289
3290static bool
3291emit_image_size(struct ntd_context *ctx, nir_intrinsic_instr *intr)
3292{
3293   const struct dxil_value *handle;
3294   if (ctx->opts->vulkan_environment) {
3295      assert(intr->intrinsic == nir_intrinsic_image_deref_size);
3296      handle = get_src_ssa(ctx, intr->src[0].ssa, 0);
3297   }
3298   else {
3299      assert(intr->intrinsic == nir_intrinsic_image_size);
3300      int binding = nir_src_as_int(intr->src[0]);
3301      handle = ctx->uav_handles[binding];
3302   }
3303   if (!handle)
3304      return false;
3305
3306   const struct dxil_value *lod = get_src(ctx, &intr->src[1], 0, nir_type_uint);
3307   if (!lod)
3308      return false;
3309
3310   struct texop_parameters params = {
3311      .tex = handle,
3312      .lod_or_sample = lod
3313   };
3314   const struct dxil_value *dimensions = emit_texture_size(ctx, &params);
3315   if (!dimensions)
3316      return false;
3317
3318   for (unsigned i = 0; i < nir_dest_num_components(intr->dest); ++i) {
3319      const struct dxil_value *retval = dxil_emit_extractval(&ctx->mod, dimensions, i);
3320      store_dest(ctx, &intr->dest, i, retval, nir_type_uint);
3321   }
3322
3323   return true;
3324}
3325
3326static bool
3327emit_get_ssbo_size(struct ntd_context *ctx, nir_intrinsic_instr *intr)
3328{
3329   const struct dxil_value* handle = NULL;
3330   if (ctx->opts->vulkan_environment) {
3331      handle = get_src_ssa(ctx, intr->src[0].ssa, 0);
3332   } else {
3333      int binding = nir_src_as_int(intr->src[0]);
3334      handle = ctx->uav_handles[binding];
3335   }
3336
3337   if (!handle)
3338     return false;
3339
3340   struct texop_parameters params = {
3341      .tex = handle,
3342      .lod_or_sample = dxil_module_get_undef(
3343                        &ctx->mod, dxil_module_get_int_type(&ctx->mod, 32))
3344   };
3345
3346   const struct dxil_value *dimensions = emit_texture_size(ctx, &params);
3347   if (!dimensions)
3348      return false;
3349
3350   const struct dxil_value *retval = dxil_emit_extractval(&ctx->mod, dimensions, 0);
3351   store_dest(ctx, &intr->dest, 0, retval, nir_type_uint);
3352
3353   return true;
3354}
3355
3356static bool
3357emit_ssbo_atomic(struct ntd_context *ctx, nir_intrinsic_instr *intr,
3358                   enum dxil_atomic_op op, nir_alu_type type)
3359{
3360   const struct dxil_value* handle = get_ubo_ssbo_handle(ctx, &intr->src[0], DXIL_RESOURCE_CLASS_UAV, 0);
3361   const struct dxil_value *offset =
3362      get_src(ctx, &intr->src[1], 0, nir_type_uint);
3363   const struct dxil_value *value =
3364      get_src(ctx, &intr->src[2], 0, type);
3365
3366   if (!value || !handle || !offset)
3367      return false;
3368
3369   const struct dxil_value *int32_undef = get_int32_undef(&ctx->mod);
3370   if (!int32_undef)
3371      return false;
3372
3373   const struct dxil_value *coord[3] = {
3374      offset, int32_undef, int32_undef
3375   };
3376
3377   const struct dxil_value *retval =
3378      emit_atomic_binop(ctx, handle, op, coord, value);
3379
3380   if (!retval)
3381      return false;
3382
3383   store_dest(ctx, &intr->dest, 0, retval, type);
3384   return true;
3385}
3386
3387static bool
3388emit_ssbo_atomic_comp_swap(struct ntd_context *ctx, nir_intrinsic_instr *intr)
3389{
3390   const struct dxil_value* handle = get_ubo_ssbo_handle(ctx, &intr->src[0], DXIL_RESOURCE_CLASS_UAV, 0);
3391   const struct dxil_value *offset =
3392      get_src(ctx, &intr->src[1], 0, nir_type_uint);
3393   const struct dxil_value *cmpval =
3394      get_src(ctx, &intr->src[2], 0, nir_type_int);
3395   const struct dxil_value *newval =
3396      get_src(ctx, &intr->src[3], 0, nir_type_int);
3397
3398   if (!cmpval || !newval || !handle || !offset)
3399      return false;
3400
3401   const struct dxil_value *int32_undef = get_int32_undef(&ctx->mod);
3402   if (!int32_undef)
3403      return false;
3404
3405   const struct dxil_value *coord[3] = {
3406      offset, int32_undef, int32_undef
3407   };
3408
3409   const struct dxil_value *retval =
3410      emit_atomic_cmpxchg(ctx, handle, coord, cmpval, newval);
3411
3412   if (!retval)
3413      return false;
3414
3415   store_dest(ctx, &intr->dest, 0, retval, nir_type_int);
3416   return true;
3417}
3418
3419static bool
3420emit_shared_atomic(struct ntd_context *ctx, nir_intrinsic_instr *intr,
3421                   enum dxil_rmw_op op, nir_alu_type type)
3422{
3423   const struct dxil_value *zero, *index;
3424
3425   assert(nir_src_bit_size(intr->src[1]) == 32);
3426
3427   zero = dxil_module_get_int32_const(&ctx->mod, 0);
3428   if (!zero)
3429      return false;
3430
3431   index = get_src(ctx, &intr->src[0], 0, nir_type_uint);
3432   if (!index)
3433      return false;
3434
3435   const struct dxil_value *ops[] = { ctx->sharedvars, zero, index };
3436   const struct dxil_value *ptr, *value, *retval;
3437
3438   ptr = dxil_emit_gep_inbounds(&ctx->mod, ops, ARRAY_SIZE(ops));
3439   if (!ptr)
3440      return false;
3441
3442   value = get_src(ctx, &intr->src[1], 0, type);
3443   if (!value)
3444      return false;
3445
3446   retval = dxil_emit_atomicrmw(&ctx->mod, value, ptr, op, false,
3447                                DXIL_ATOMIC_ORDERING_ACQREL,
3448                                DXIL_SYNC_SCOPE_CROSSTHREAD);
3449   if (!retval)
3450      return false;
3451
3452   store_dest(ctx, &intr->dest, 0, retval, type);
3453   return true;
3454}
3455
3456static bool
3457emit_shared_atomic_comp_swap(struct ntd_context *ctx, nir_intrinsic_instr *intr)
3458{
3459   const struct dxil_value *zero, *index;
3460
3461   assert(nir_src_bit_size(intr->src[1]) == 32);
3462
3463   zero = dxil_module_get_int32_const(&ctx->mod, 0);
3464   if (!zero)
3465      return false;
3466
3467   index = get_src(ctx, &intr->src[0], 0, nir_type_uint);
3468   if (!index)
3469      return false;
3470
3471   const struct dxil_value *ops[] = { ctx->sharedvars, zero, index };
3472   const struct dxil_value *ptr, *cmpval, *newval, *retval;
3473
3474   ptr = dxil_emit_gep_inbounds(&ctx->mod, ops, ARRAY_SIZE(ops));
3475   if (!ptr)
3476      return false;
3477
3478   cmpval = get_src(ctx, &intr->src[1], 0, nir_type_uint);
3479   newval = get_src(ctx, &intr->src[2], 0, nir_type_uint);
3480   if (!cmpval || !newval)
3481      return false;
3482
3483   retval = dxil_emit_cmpxchg(&ctx->mod, cmpval, newval, ptr, false,
3484                              DXIL_ATOMIC_ORDERING_ACQREL,
3485                              DXIL_SYNC_SCOPE_CROSSTHREAD);
3486   if (!retval)
3487      return false;
3488
3489   store_dest(ctx, &intr->dest, 0, retval, nir_type_uint);
3490   return true;
3491}
3492
3493static bool
3494emit_vulkan_resource_index(struct ntd_context *ctx, nir_intrinsic_instr *intr)
3495{
3496   unsigned int binding = nir_intrinsic_binding(intr);
3497
3498   bool const_index = nir_src_is_const(intr->src[0]);
3499   if (const_index) {
3500      binding += nir_src_as_const_value(intr->src[0])->u32;
3501   }
3502
3503   const struct dxil_value *index_value = dxil_module_get_int32_const(&ctx->mod, binding);
3504   if (!index_value)
3505      return false;
3506
3507   if (!const_index) {
3508      const struct dxil_value *offset = get_src(ctx, &intr->src[0], 0, nir_type_uint32);
3509      if (!offset)
3510         return false;
3511
3512      index_value = dxil_emit_binop(&ctx->mod, DXIL_BINOP_ADD, index_value, offset, 0);
3513      if (!index_value)
3514         return false;
3515   }
3516
3517   store_dest(ctx, &intr->dest, 0, index_value, nir_type_uint32);
3518   store_dest(ctx, &intr->dest, 1, dxil_module_get_int32_const(&ctx->mod, 0), nir_type_uint32);
3519   return true;
3520}
3521
3522static bool
3523emit_load_vulkan_descriptor(struct ntd_context *ctx, nir_intrinsic_instr *intr)
3524{
3525   nir_intrinsic_instr* index = nir_src_as_intrinsic(intr->src[0]);
3526   /* We currently do not support reindex */
3527   assert(index && index->intrinsic == nir_intrinsic_vulkan_resource_index);
3528
3529   unsigned binding = nir_intrinsic_binding(index);
3530   unsigned space = nir_intrinsic_desc_set(index);
3531
3532   /* The descriptor_set field for variables is only 5 bits. We shouldn't have intrinsics trying to go beyond that. */
3533   assert(space < 32);
3534
3535   nir_variable *var = nir_get_binding_variable(ctx->shader, nir_chase_binding(intr->src[0]));
3536
3537   const struct dxil_value *handle = NULL;
3538   enum dxil_resource_class resource_class;
3539
3540   switch (nir_intrinsic_desc_type(intr)) {
3541   case VK_DESCRIPTOR_TYPE_UNIFORM_BUFFER:
3542      resource_class = DXIL_RESOURCE_CLASS_CBV;
3543      break;
3544   case VK_DESCRIPTOR_TYPE_STORAGE_BUFFER:
3545      if (var->data.access & ACCESS_NON_WRITEABLE)
3546         resource_class = DXIL_RESOURCE_CLASS_SRV;
3547      else
3548         resource_class = DXIL_RESOURCE_CLASS_UAV;
3549      break;
3550   default:
3551      unreachable("unknown descriptor type");
3552      return false;
3553   }
3554
3555   const struct dxil_value *index_value = get_src(ctx, &intr->src[0], 0, nir_type_uint32);
3556   if (!index_value)
3557      return false;
3558
3559   handle = emit_createhandle_call(ctx, resource_class,
3560      get_resource_id(ctx, resource_class, space, binding),
3561      index_value, false);
3562
3563   store_dest_value(ctx, &intr->dest, 0, handle);
3564   store_dest(ctx, &intr->dest, 1, get_src(ctx, &intr->src[0], 1, nir_type_uint32), nir_type_uint32);
3565
3566   return true;
3567}
3568
3569static bool
3570emit_intrinsic(struct ntd_context *ctx, nir_intrinsic_instr *intr)
3571{
3572   switch (intr->intrinsic) {
3573   case nir_intrinsic_load_global_invocation_id:
3574   case nir_intrinsic_load_global_invocation_id_zero_base:
3575      return emit_load_global_invocation_id(ctx, intr);
3576   case nir_intrinsic_load_local_invocation_id:
3577      return emit_load_local_invocation_id(ctx, intr);
3578   case nir_intrinsic_load_local_invocation_index:
3579      return emit_load_local_invocation_index(ctx, intr);
3580   case nir_intrinsic_load_workgroup_id:
3581   case nir_intrinsic_load_workgroup_id_zero_base:
3582      return emit_load_local_workgroup_id(ctx, intr);
3583   case nir_intrinsic_load_ssbo:
3584      return emit_load_ssbo(ctx, intr);
3585   case nir_intrinsic_store_ssbo:
3586      return emit_store_ssbo(ctx, intr);
3587   case nir_intrinsic_store_ssbo_masked_dxil:
3588      return emit_store_ssbo_masked(ctx, intr);
3589   case nir_intrinsic_store_deref:
3590      return emit_store_deref(ctx, intr);
3591   case nir_intrinsic_store_shared_dxil:
3592   case nir_intrinsic_store_shared_masked_dxil:
3593      return emit_store_shared(ctx, intr);
3594   case nir_intrinsic_store_scratch_dxil:
3595      return emit_store_scratch(ctx, intr);
3596   case nir_intrinsic_load_deref:
3597      return emit_load_deref(ctx, intr);
3598   case nir_intrinsic_load_ptr_dxil:
3599      return emit_load_ptr(ctx, intr);
3600   case nir_intrinsic_load_ubo:
3601      return emit_load_ubo(ctx, intr);
3602   case nir_intrinsic_load_ubo_dxil:
3603      return emit_load_ubo_dxil(ctx, intr);
3604   case nir_intrinsic_load_front_face:
3605      return emit_load_input_interpolated(ctx, intr,
3606                                          ctx->system_value[SYSTEM_VALUE_FRONT_FACE]);
3607   case nir_intrinsic_load_vertex_id_zero_base:
3608      return emit_load_input_interpolated(ctx, intr,
3609                                          ctx->system_value[SYSTEM_VALUE_VERTEX_ID_ZERO_BASE]);
3610   case nir_intrinsic_load_instance_id:
3611      return emit_load_input_interpolated(ctx, intr,
3612                                          ctx->system_value[SYSTEM_VALUE_INSTANCE_ID]);
3613   case nir_intrinsic_load_primitive_id:
3614      return emit_load_unary_external_function(ctx, intr, "dx.op.primitiveID",
3615                                               DXIL_INTR_PRIMITIVE_ID);
3616   case nir_intrinsic_load_sample_id:
3617      return emit_load_unary_external_function(ctx, intr, "dx.op.sampleIndex",
3618                                               DXIL_INTR_SAMPLE_INDEX);
3619   case nir_intrinsic_load_shared_dxil:
3620      return emit_load_shared(ctx, intr);
3621   case nir_intrinsic_load_scratch_dxil:
3622      return emit_load_scratch(ctx, intr);
3623   case nir_intrinsic_discard_if:
3624      return emit_discard_if(ctx, intr);
3625   case nir_intrinsic_discard:
3626      return emit_discard(ctx);
3627   case nir_intrinsic_emit_vertex:
3628      return emit_emit_vertex(ctx, intr);
3629   case nir_intrinsic_end_primitive:
3630      return emit_end_primitive(ctx, intr);
3631   case nir_intrinsic_scoped_barrier:
3632      return emit_barrier(ctx, intr);
3633   case nir_intrinsic_ssbo_atomic_add:
3634      return emit_ssbo_atomic(ctx, intr, DXIL_ATOMIC_ADD, nir_type_int);
3635   case nir_intrinsic_ssbo_atomic_imin:
3636      return emit_ssbo_atomic(ctx, intr, DXIL_ATOMIC_IMIN, nir_type_int);
3637   case nir_intrinsic_ssbo_atomic_umin:
3638      return emit_ssbo_atomic(ctx, intr, DXIL_ATOMIC_UMIN, nir_type_uint);
3639   case nir_intrinsic_ssbo_atomic_imax:
3640      return emit_ssbo_atomic(ctx, intr, DXIL_ATOMIC_IMAX, nir_type_int);
3641   case nir_intrinsic_ssbo_atomic_umax:
3642      return emit_ssbo_atomic(ctx, intr, DXIL_ATOMIC_UMAX, nir_type_uint);
3643   case nir_intrinsic_ssbo_atomic_and:
3644      return emit_ssbo_atomic(ctx, intr, DXIL_ATOMIC_AND, nir_type_uint);
3645   case nir_intrinsic_ssbo_atomic_or:
3646      return emit_ssbo_atomic(ctx, intr, DXIL_ATOMIC_OR, nir_type_uint);
3647   case nir_intrinsic_ssbo_atomic_xor:
3648      return emit_ssbo_atomic(ctx, intr, DXIL_ATOMIC_XOR, nir_type_uint);
3649   case nir_intrinsic_ssbo_atomic_exchange:
3650      return emit_ssbo_atomic(ctx, intr, DXIL_ATOMIC_EXCHANGE, nir_type_int);
3651   case nir_intrinsic_ssbo_atomic_comp_swap:
3652      return emit_ssbo_atomic_comp_swap(ctx, intr);
3653   case nir_intrinsic_shared_atomic_add_dxil:
3654      return emit_shared_atomic(ctx, intr, DXIL_RMWOP_ADD, nir_type_int);
3655   case nir_intrinsic_shared_atomic_imin_dxil:
3656      return emit_shared_atomic(ctx, intr, DXIL_RMWOP_MIN, nir_type_int);
3657   case nir_intrinsic_shared_atomic_umin_dxil:
3658      return emit_shared_atomic(ctx, intr, DXIL_RMWOP_UMIN, nir_type_uint);
3659   case nir_intrinsic_shared_atomic_imax_dxil:
3660      return emit_shared_atomic(ctx, intr, DXIL_RMWOP_MAX, nir_type_int);
3661   case nir_intrinsic_shared_atomic_umax_dxil:
3662      return emit_shared_atomic(ctx, intr, DXIL_RMWOP_UMAX, nir_type_uint);
3663   case nir_intrinsic_shared_atomic_and_dxil:
3664      return emit_shared_atomic(ctx, intr, DXIL_RMWOP_AND, nir_type_uint);
3665   case nir_intrinsic_shared_atomic_or_dxil:
3666      return emit_shared_atomic(ctx, intr, DXIL_RMWOP_OR, nir_type_uint);
3667   case nir_intrinsic_shared_atomic_xor_dxil:
3668      return emit_shared_atomic(ctx, intr, DXIL_RMWOP_XOR, nir_type_uint);
3669   case nir_intrinsic_shared_atomic_exchange_dxil:
3670      return emit_shared_atomic(ctx, intr, DXIL_RMWOP_XCHG, nir_type_int);
3671   case nir_intrinsic_shared_atomic_comp_swap_dxil:
3672      return emit_shared_atomic_comp_swap(ctx, intr);
3673   case nir_intrinsic_image_store:
3674   case nir_intrinsic_image_deref_store:
3675      return emit_image_store(ctx, intr);
3676   case nir_intrinsic_image_load:
3677   case nir_intrinsic_image_deref_load:
3678      return emit_image_load(ctx, intr);
3679   case nir_intrinsic_image_size:
3680   case nir_intrinsic_image_deref_size:
3681      return emit_image_size(ctx, intr);
3682   case nir_intrinsic_get_ssbo_size:
3683      return emit_get_ssbo_size(ctx, intr);
3684
3685   case nir_intrinsic_vulkan_resource_index:
3686      return emit_vulkan_resource_index(ctx, intr);
3687   case nir_intrinsic_load_vulkan_descriptor:
3688      return emit_load_vulkan_descriptor(ctx, intr);
3689
3690   case nir_intrinsic_load_num_workgroups:
3691   case nir_intrinsic_load_workgroup_size:
3692   default:
3693      NIR_INSTR_UNSUPPORTED(&intr->instr);
3694      assert("Unimplemented intrinsic instruction");
3695      return false;
3696   }
3697}
3698
3699static bool
3700emit_load_const(struct ntd_context *ctx, nir_load_const_instr *load_const)
3701{
3702   for (int i = 0; i < load_const->def.num_components; ++i) {
3703      const struct dxil_value *value;
3704      switch (load_const->def.bit_size) {
3705      case 1:
3706         value = dxil_module_get_int1_const(&ctx->mod,
3707                                            load_const->value[i].b);
3708         break;
3709      case 16:
3710         ctx->mod.feats.native_low_precision = true;
3711         value = dxil_module_get_int16_const(&ctx->mod,
3712                                             load_const->value[i].u16);
3713         break;
3714      case 32:
3715         value = dxil_module_get_int32_const(&ctx->mod,
3716                                             load_const->value[i].u32);
3717         break;
3718      case 64:
3719         ctx->mod.feats.int64_ops = true;
3720         value = dxil_module_get_int64_const(&ctx->mod,
3721                                             load_const->value[i].u64);
3722         break;
3723      default:
3724         unreachable("unexpected bit_size");
3725      }
3726      if (!value)
3727         return false;
3728
3729      store_ssa_def(ctx, &load_const->def, i, value);
3730   }
3731   return true;
3732}
3733
3734static bool
3735emit_deref(struct ntd_context* ctx, nir_deref_instr* instr)
3736{
3737   assert(instr->deref_type == nir_deref_type_var ||
3738          instr->deref_type == nir_deref_type_array);
3739
3740   /* In the non-Vulkan environment, there's nothing to emit. Any references to
3741    * derefs will emit the necessary logic to handle scratch/shared GEP addressing
3742    */
3743   if (!ctx->opts->vulkan_environment)
3744      return true;
3745
3746   /* In the Vulkan environment, we don't have cached handles for textures or
3747    * samplers, so let's use the opportunity of walking through the derefs to
3748    * emit those.
3749    */
3750   nir_variable *var = nir_deref_instr_get_variable(instr);
3751   assert(var);
3752
3753   if (!glsl_type_is_sampler(glsl_without_array(var->type)) &&
3754       !glsl_type_is_image(glsl_without_array(var->type)))
3755      return true;
3756
3757   const struct glsl_type *type = instr->type;
3758   const struct dxil_value *binding;
3759
3760   if (instr->deref_type == nir_deref_type_var) {
3761      binding = dxil_module_get_int32_const(&ctx->mod, var->data.binding);
3762   } else {
3763      const struct dxil_value *base = get_src(ctx, &instr->parent, 0, nir_type_uint32);
3764      const struct dxil_value *offset = get_src(ctx, &instr->arr.index, 0, nir_type_uint32);
3765      if (!base || !offset)
3766         return false;
3767
3768      binding = dxil_emit_binop(&ctx->mod, DXIL_BINOP_ADD, base, offset, 0);
3769   }
3770
3771   if (!binding)
3772      return false;
3773
3774   /* Haven't finished chasing the deref chain yet, just store the value */
3775   if (glsl_type_is_array(type)) {
3776      store_dest(ctx, &instr->dest, 0, binding, nir_type_uint32);
3777      return true;
3778   }
3779
3780   assert(glsl_type_is_sampler(type) || glsl_type_is_image(type));
3781   enum dxil_resource_class res_class;
3782   if (glsl_type_is_image(type))
3783      res_class = DXIL_RESOURCE_CLASS_UAV;
3784   else if (glsl_get_sampler_result_type(type) == GLSL_TYPE_VOID)
3785      res_class = DXIL_RESOURCE_CLASS_SAMPLER;
3786   else
3787      res_class = DXIL_RESOURCE_CLASS_SRV;
3788
3789   const struct dxil_value *handle = emit_createhandle_call(ctx, res_class,
3790      get_resource_id(ctx, res_class, var->data.descriptor_set, var->data.binding), binding, false);
3791   if (!handle)
3792      return false;
3793
3794   store_dest_value(ctx, &instr->dest, 0, handle);
3795   return true;
3796}
3797
3798static bool
3799emit_cond_branch(struct ntd_context *ctx, const struct dxil_value *cond,
3800                 int true_block, int false_block)
3801{
3802   assert(cond);
3803   assert(true_block >= 0);
3804   assert(false_block >= 0);
3805   return dxil_emit_branch(&ctx->mod, cond, true_block, false_block);
3806}
3807
3808static bool
3809emit_branch(struct ntd_context *ctx, int block)
3810{
3811   assert(block >= 0);
3812   return dxil_emit_branch(&ctx->mod, NULL, block, -1);
3813}
3814
3815static bool
3816emit_jump(struct ntd_context *ctx, nir_jump_instr *instr)
3817{
3818   switch (instr->type) {
3819   case nir_jump_break:
3820   case nir_jump_continue:
3821      assert(instr->instr.block->successors[0]);
3822      assert(!instr->instr.block->successors[1]);
3823      return emit_branch(ctx, instr->instr.block->successors[0]->index);
3824
3825   default:
3826      unreachable("Unsupported jump type\n");
3827   }
3828}
3829
3830struct phi_block {
3831   unsigned num_components;
3832   struct dxil_instr *comp[NIR_MAX_VEC_COMPONENTS];
3833};
3834
3835static bool
3836emit_phi(struct ntd_context *ctx, nir_phi_instr *instr)
3837{
3838   unsigned bit_size = nir_dest_bit_size(instr->dest);
3839   const struct dxil_type *type = dxil_module_get_int_type(&ctx->mod,
3840                                                           bit_size);
3841
3842   struct phi_block *vphi = ralloc(ctx->phis, struct phi_block);
3843   vphi->num_components = nir_dest_num_components(instr->dest);
3844
3845   for (unsigned i = 0; i < vphi->num_components; ++i) {
3846      struct dxil_instr *phi = vphi->comp[i] = dxil_emit_phi(&ctx->mod, type);
3847      if (!phi)
3848         return false;
3849      store_dest_value(ctx, &instr->dest, i, dxil_instr_get_return_value(phi));
3850   }
3851   _mesa_hash_table_insert(ctx->phis, instr, vphi);
3852   return true;
3853}
3854
3855static void
3856fixup_phi(struct ntd_context *ctx, nir_phi_instr *instr,
3857          struct phi_block *vphi)
3858{
3859   const struct dxil_value *values[128];
3860   unsigned blocks[128];
3861   for (unsigned i = 0; i < vphi->num_components; ++i) {
3862      size_t num_incoming = 0;
3863      nir_foreach_phi_src(src, instr) {
3864         assert(src->src.is_ssa);
3865         const struct dxil_value *val = get_src_ssa(ctx, src->src.ssa, i);
3866         assert(num_incoming < ARRAY_SIZE(values));
3867         values[num_incoming] = val;
3868         assert(num_incoming < ARRAY_SIZE(blocks));
3869         blocks[num_incoming] = src->pred->index;
3870         ++num_incoming;
3871      }
3872      dxil_phi_set_incoming(vphi->comp[i], values, blocks, num_incoming);
3873   }
3874}
3875
3876static unsigned
3877get_n_src(struct ntd_context *ctx, const struct dxil_value **values,
3878          unsigned max_components, nir_tex_src *src, nir_alu_type type)
3879{
3880   unsigned num_components = nir_src_num_components(src->src);
3881   unsigned i = 0;
3882
3883   assert(num_components <= max_components);
3884
3885   for (i = 0; i < num_components; ++i) {
3886      values[i] = get_src(ctx, &src->src, i, type);
3887      if (!values[i])
3888         return 0;
3889   }
3890
3891   return num_components;
3892}
3893
3894#define PAD_SRC(ctx, array, components, undef) \
3895   for (unsigned i = components; i < ARRAY_SIZE(array); ++i) { \
3896      array[i] = undef; \
3897   }
3898
3899static const struct dxil_value *
3900emit_sample(struct ntd_context *ctx, struct texop_parameters *params)
3901{
3902   const struct dxil_func *func = dxil_get_function(&ctx->mod, "dx.op.sample", params->overload);
3903   if (!func)
3904      return NULL;
3905
3906   const struct dxil_value *args[11] = {
3907      dxil_module_get_int32_const(&ctx->mod, DXIL_INTR_SAMPLE),
3908      params->tex, params->sampler,
3909      params->coord[0], params->coord[1], params->coord[2], params->coord[3],
3910      params->offset[0], params->offset[1], params->offset[2],
3911      params->min_lod
3912   };
3913
3914   return dxil_emit_call(&ctx->mod, func, args, ARRAY_SIZE(args));
3915}
3916
3917static const struct dxil_value *
3918emit_sample_bias(struct ntd_context *ctx, struct texop_parameters *params)
3919{
3920   const struct dxil_func *func = dxil_get_function(&ctx->mod, "dx.op.sampleBias", params->overload);
3921   if (!func)
3922      return NULL;
3923
3924   assert(params->bias != NULL);
3925
3926   const struct dxil_value *args[12] = {
3927      dxil_module_get_int32_const(&ctx->mod, DXIL_INTR_SAMPLE_BIAS),
3928      params->tex, params->sampler,
3929      params->coord[0], params->coord[1], params->coord[2], params->coord[3],
3930      params->offset[0], params->offset[1], params->offset[2],
3931      params->bias, params->min_lod
3932   };
3933
3934   return dxil_emit_call(&ctx->mod, func, args, ARRAY_SIZE(args));
3935}
3936
3937static const struct dxil_value *
3938emit_sample_level(struct ntd_context *ctx, struct texop_parameters *params)
3939{
3940   const struct dxil_func *func = dxil_get_function(&ctx->mod, "dx.op.sampleLevel", params->overload);
3941   if (!func)
3942      return NULL;
3943
3944   assert(params->lod_or_sample != NULL);
3945
3946   const struct dxil_value *args[11] = {
3947      dxil_module_get_int32_const(&ctx->mod, DXIL_INTR_SAMPLE_LEVEL),
3948      params->tex, params->sampler,
3949      params->coord[0], params->coord[1], params->coord[2], params->coord[3],
3950      params->offset[0], params->offset[1], params->offset[2],
3951      params->lod_or_sample
3952   };
3953
3954   return dxil_emit_call(&ctx->mod, func, args, ARRAY_SIZE(args));
3955}
3956
3957static const struct dxil_value *
3958emit_sample_cmp(struct ntd_context *ctx, struct texop_parameters *params)
3959{
3960   const struct dxil_func *func;
3961   enum dxil_intr opcode;
3962   int numparam;
3963
3964   if (ctx->mod.shader_kind == DXIL_PIXEL_SHADER)  {
3965      func = dxil_get_function(&ctx->mod, "dx.op.sampleCmp", DXIL_F32);
3966      opcode = DXIL_INTR_SAMPLE_CMP;
3967      numparam = 12;
3968   } else {
3969      func = dxil_get_function(&ctx->mod, "dx.op.sampleCmpLevelZero", DXIL_F32);
3970      opcode = DXIL_INTR_SAMPLE_CMP_LVL_ZERO;
3971      numparam = 11;
3972   }
3973
3974   if (!func)
3975      return NULL;
3976
3977   const struct dxil_value *args[12] = {
3978      dxil_module_get_int32_const(&ctx->mod, opcode),
3979      params->tex, params->sampler,
3980      params->coord[0], params->coord[1], params->coord[2], params->coord[3],
3981      params->offset[0], params->offset[1], params->offset[2],
3982      params->cmp, params->min_lod
3983   };
3984
3985   return dxil_emit_call(&ctx->mod, func, args, numparam);
3986}
3987
3988static const struct dxil_value *
3989emit_sample_grad(struct ntd_context *ctx, struct texop_parameters *params)
3990{
3991   const struct dxil_func *func = dxil_get_function(&ctx->mod, "dx.op.sampleGrad", params->overload);
3992   if (!func)
3993      return false;
3994
3995   const struct dxil_value *args[17] = {
3996      dxil_module_get_int32_const(&ctx->mod, DXIL_INTR_SAMPLE_GRAD),
3997      params->tex, params->sampler,
3998      params->coord[0], params->coord[1], params->coord[2], params->coord[3],
3999      params->offset[0], params->offset[1], params->offset[2],
4000      params->dx[0], params->dx[1], params->dx[2],
4001      params->dy[0], params->dy[1], params->dy[2],
4002      params->min_lod
4003   };
4004
4005   return dxil_emit_call(&ctx->mod, func, args, ARRAY_SIZE(args));
4006}
4007
4008static const struct dxil_value *
4009emit_texel_fetch(struct ntd_context *ctx, struct texop_parameters *params)
4010{
4011   const struct dxil_func *func = dxil_get_function(&ctx->mod, "dx.op.textureLoad", params->overload);
4012   if (!func)
4013      return false;
4014
4015   if (!params->lod_or_sample)
4016      params->lod_or_sample = dxil_module_get_undef(&ctx->mod, dxil_module_get_int_type(&ctx->mod, 32));
4017
4018   const struct dxil_value *args[] = {
4019      dxil_module_get_int32_const(&ctx->mod, DXIL_INTR_TEXTURE_LOAD),
4020      params->tex,
4021      params->lod_or_sample, params->coord[0], params->coord[1], params->coord[2],
4022      params->offset[0], params->offset[1], params->offset[2]
4023   };
4024
4025   return dxil_emit_call(&ctx->mod, func, args, ARRAY_SIZE(args));
4026}
4027
4028static const struct dxil_value *
4029emit_texture_lod(struct ntd_context *ctx, struct texop_parameters *params)
4030{
4031   const struct dxil_func *func = dxil_get_function(&ctx->mod, "dx.op.calculateLOD", DXIL_F32);
4032   if (!func)
4033      return false;
4034
4035   const struct dxil_value *args[] = {
4036      dxil_module_get_int32_const(&ctx->mod, DXIL_INTR_TEXTURE_LOD),
4037      params->tex,
4038      params->sampler,
4039      params->coord[0],
4040      params->coord[1],
4041      params->coord[2],
4042      dxil_module_get_int1_const(&ctx->mod, 1)
4043   };
4044
4045   return dxil_emit_call(&ctx->mod, func, args, ARRAY_SIZE(args));
4046}
4047
4048static bool
4049emit_tex(struct ntd_context *ctx, nir_tex_instr *instr)
4050{
4051   struct texop_parameters params;
4052   memset(&params, 0, sizeof(struct texop_parameters));
4053   if (!ctx->opts->vulkan_environment) {
4054      params.tex = ctx->srv_handles[instr->texture_index];
4055      params.sampler = ctx->sampler_handles[instr->sampler_index];
4056   }
4057
4058   const struct dxil_type *int_type = dxil_module_get_int_type(&ctx->mod, 32);
4059   const struct dxil_type *float_type = dxil_module_get_float_type(&ctx->mod, 32);
4060   const struct dxil_value *int_undef = dxil_module_get_undef(&ctx->mod, int_type);
4061   const struct dxil_value *float_undef = dxil_module_get_undef(&ctx->mod, float_type);
4062
4063   unsigned coord_components = 0, offset_components = 0, dx_components = 0, dy_components = 0;
4064   params.overload = get_overload(instr->dest_type, 32);
4065
4066   for (unsigned i = 0; i < instr->num_srcs; i++) {
4067      nir_alu_type type = nir_tex_instr_src_type(instr, i);
4068
4069      switch (instr->src[i].src_type) {
4070      case nir_tex_src_coord:
4071         coord_components = get_n_src(ctx, params.coord, ARRAY_SIZE(params.coord),
4072                                      &instr->src[i], type);
4073         if (!coord_components)
4074            return false;
4075         break;
4076
4077      case nir_tex_src_offset:
4078         offset_components = get_n_src(ctx, params.offset, ARRAY_SIZE(params.offset),
4079                                       &instr->src[i],  nir_type_int);
4080         if (!offset_components)
4081            return false;
4082         break;
4083
4084      case nir_tex_src_bias:
4085         assert(instr->op == nir_texop_txb);
4086         assert(nir_src_num_components(instr->src[i].src) == 1);
4087         params.bias = get_src(ctx, &instr->src[i].src, 0, nir_type_float);
4088         if (!params.bias)
4089            return false;
4090         break;
4091
4092      case nir_tex_src_lod:
4093         assert(nir_src_num_components(instr->src[i].src) == 1);
4094         /* Buffers don't have a LOD */
4095         if (instr->sampler_dim != GLSL_SAMPLER_DIM_BUF)
4096            params.lod_or_sample = get_src(ctx, &instr->src[i].src, 0, type);
4097         else
4098            params.lod_or_sample = int_undef;
4099         if (!params.lod_or_sample)
4100            return false;
4101         break;
4102
4103      case nir_tex_src_min_lod:
4104         assert(nir_src_num_components(instr->src[i].src) == 1);
4105         params.min_lod = get_src(ctx, &instr->src[i].src, 0, type);
4106         if (!params.min_lod)
4107            return false;
4108         break;
4109
4110      case nir_tex_src_comparator:
4111         assert(nir_src_num_components(instr->src[i].src) == 1);
4112         params.cmp = get_src(ctx, &instr->src[i].src, 0, nir_type_float);
4113         if (!params.cmp)
4114            return false;
4115         break;
4116
4117      case nir_tex_src_ddx:
4118         dx_components = get_n_src(ctx, params.dx, ARRAY_SIZE(params.dx),
4119                                   &instr->src[i], nir_type_float);
4120         if (!dx_components)
4121            return false;
4122         break;
4123
4124      case nir_tex_src_ddy:
4125         dy_components = get_n_src(ctx, params.dy, ARRAY_SIZE(params.dy),
4126                                   &instr->src[i], nir_type_float);
4127         if (!dy_components)
4128            return false;
4129         break;
4130
4131      case nir_tex_src_ms_index:
4132         params.lod_or_sample = get_src(ctx, &instr->src[i].src, 0, nir_type_int);
4133         if (!params.lod_or_sample)
4134            return false;
4135         break;
4136
4137      case nir_tex_src_texture_deref:
4138         assert(ctx->opts->vulkan_environment);
4139         params.tex = get_src_ssa(ctx, instr->src[i].src.ssa, 0);
4140         break;
4141
4142      case nir_tex_src_sampler_deref:
4143         assert(ctx->opts->vulkan_environment);
4144         params.sampler = get_src_ssa(ctx, instr->src[i].src.ssa, 0);
4145         break;
4146
4147      case nir_tex_src_projector:
4148         unreachable("Texture projector should have been lowered");
4149
4150      default:
4151         fprintf(stderr, "texture source: %d\n", instr->src[i].src_type);
4152         unreachable("unknown texture source");
4153      }
4154   }
4155
4156   assert(params.tex != NULL);
4157   assert(instr->op == nir_texop_txf ||
4158          instr->op == nir_texop_txf_ms ||
4159          nir_tex_instr_is_query(instr) ||
4160          params.sampler != NULL);
4161
4162   PAD_SRC(ctx, params.coord, coord_components, float_undef);
4163   PAD_SRC(ctx, params.offset, offset_components, int_undef);
4164   if (!params.min_lod) params.min_lod = float_undef;
4165
4166   const struct dxil_value *sample = NULL;
4167   switch (instr->op) {
4168   case nir_texop_txb:
4169      sample = emit_sample_bias(ctx, &params);
4170      break;
4171
4172   case nir_texop_tex:
4173      if (params.cmp != NULL) {
4174         sample = emit_sample_cmp(ctx, &params);
4175         break;
4176      } else if (ctx->mod.shader_kind == DXIL_PIXEL_SHADER) {
4177         sample = emit_sample(ctx, &params);
4178         break;
4179      }
4180      params.lod_or_sample = dxil_module_get_float_const(&ctx->mod, 0);
4181      FALLTHROUGH;
4182   case nir_texop_txl:
4183      sample = emit_sample_level(ctx, &params);
4184      break;
4185
4186   case nir_texop_txd:
4187      PAD_SRC(ctx, params.dx, dx_components, float_undef);
4188      PAD_SRC(ctx, params.dy, dy_components,float_undef);
4189      sample = emit_sample_grad(ctx, &params);
4190      break;
4191
4192   case nir_texop_txf:
4193   case nir_texop_txf_ms:
4194      if (instr->sampler_dim == GLSL_SAMPLER_DIM_BUF) {
4195         params.coord[1] = int_undef;
4196         sample = emit_bufferload_call(ctx, params.tex, params.coord, params.overload);
4197      } else {
4198         PAD_SRC(ctx, params.coord, coord_components, int_undef);
4199         sample = emit_texel_fetch(ctx, &params);
4200      }
4201      break;
4202
4203   case nir_texop_txs:
4204      sample = emit_texture_size(ctx, &params);
4205      break;
4206
4207   case nir_texop_lod:
4208      sample = emit_texture_lod(ctx, &params);
4209      store_dest(ctx, &instr->dest, 0, sample, nir_alu_type_get_base_type(instr->dest_type));
4210      return true;
4211
4212   case nir_texop_query_levels:
4213      params.lod_or_sample = dxil_module_get_int_const(&ctx->mod, 0, 32);
4214      sample = emit_texture_size(ctx, &params);
4215      const struct dxil_value *retval = dxil_emit_extractval(&ctx->mod, sample, 3);
4216      store_dest(ctx, &instr->dest, 0, retval, nir_alu_type_get_base_type(instr->dest_type));
4217      return true;
4218
4219   default:
4220      fprintf(stderr, "texture op: %d\n", instr->op);
4221      unreachable("unknown texture op");
4222   }
4223
4224   if (!sample)
4225      return false;
4226
4227   for (unsigned i = 0; i < nir_dest_num_components(instr->dest); ++i) {
4228      const struct dxil_value *retval = dxil_emit_extractval(&ctx->mod, sample, i);
4229      store_dest(ctx, &instr->dest, i, retval, nir_alu_type_get_base_type(instr->dest_type));
4230   }
4231
4232   return true;
4233}
4234
4235static bool
4236emit_undefined(struct ntd_context *ctx, nir_ssa_undef_instr *undef)
4237{
4238   for (unsigned i = 0; i < undef->def.num_components; ++i)
4239      store_ssa_def(ctx, &undef->def, i, dxil_module_get_int32_const(&ctx->mod, 0));
4240   return true;
4241}
4242
4243static bool emit_instr(struct ntd_context *ctx, struct nir_instr* instr)
4244{
4245   switch (instr->type) {
4246   case nir_instr_type_alu:
4247      return emit_alu(ctx, nir_instr_as_alu(instr));
4248   case nir_instr_type_intrinsic:
4249      return emit_intrinsic(ctx, nir_instr_as_intrinsic(instr));
4250   case nir_instr_type_load_const:
4251      return emit_load_const(ctx, nir_instr_as_load_const(instr));
4252   case nir_instr_type_deref:
4253      return emit_deref(ctx, nir_instr_as_deref(instr));
4254   case nir_instr_type_jump:
4255      return emit_jump(ctx, nir_instr_as_jump(instr));
4256   case nir_instr_type_phi:
4257      return emit_phi(ctx, nir_instr_as_phi(instr));
4258   case nir_instr_type_tex:
4259      return emit_tex(ctx, nir_instr_as_tex(instr));
4260   case nir_instr_type_ssa_undef:
4261      return emit_undefined(ctx, nir_instr_as_ssa_undef(instr));
4262   default:
4263      NIR_INSTR_UNSUPPORTED(instr);
4264      unreachable("Unimplemented instruction type");
4265      return false;
4266   }
4267}
4268
4269
4270static bool
4271emit_block(struct ntd_context *ctx, struct nir_block *block)
4272{
4273   assert(block->index < ctx->mod.num_basic_block_ids);
4274   ctx->mod.basic_block_ids[block->index] = ctx->mod.curr_block;
4275
4276   nir_foreach_instr(instr, block) {
4277      TRACE_CONVERSION(instr);
4278
4279      if (!emit_instr(ctx, instr))  {
4280         return false;
4281      }
4282   }
4283   return true;
4284}
4285
4286static bool
4287emit_cf_list(struct ntd_context *ctx, struct exec_list *list);
4288
4289static bool
4290emit_if(struct ntd_context *ctx, struct nir_if *if_stmt)
4291{
4292   assert(nir_src_num_components(if_stmt->condition) == 1);
4293   const struct dxil_value *cond = get_src(ctx, &if_stmt->condition, 0,
4294                                           nir_type_bool);
4295   if (!cond)
4296      return false;
4297
4298   /* prepare blocks */
4299   nir_block *then_block = nir_if_first_then_block(if_stmt);
4300   assert(nir_if_last_then_block(if_stmt)->successors[0]);
4301   assert(!nir_if_last_then_block(if_stmt)->successors[1]);
4302   int then_succ = nir_if_last_then_block(if_stmt)->successors[0]->index;
4303
4304   nir_block *else_block = NULL;
4305   int else_succ = -1;
4306   if (!exec_list_is_empty(&if_stmt->else_list)) {
4307      else_block = nir_if_first_else_block(if_stmt);
4308      assert(nir_if_last_else_block(if_stmt)->successors[0]);
4309      assert(!nir_if_last_else_block(if_stmt)->successors[1]);
4310      else_succ = nir_if_last_else_block(if_stmt)->successors[0]->index;
4311   }
4312
4313   if (!emit_cond_branch(ctx, cond, then_block->index,
4314                         else_block ? else_block->index : then_succ))
4315      return false;
4316
4317   /* handle then-block */
4318   if (!emit_cf_list(ctx, &if_stmt->then_list) ||
4319       (!nir_block_ends_in_jump(nir_if_last_then_block(if_stmt)) &&
4320        !emit_branch(ctx, then_succ)))
4321      return false;
4322
4323   if (else_block) {
4324      /* handle else-block */
4325      if (!emit_cf_list(ctx, &if_stmt->else_list) ||
4326          (!nir_block_ends_in_jump(nir_if_last_else_block(if_stmt)) &&
4327           !emit_branch(ctx, else_succ)))
4328         return false;
4329   }
4330
4331   return true;
4332}
4333
4334static bool
4335emit_loop(struct ntd_context *ctx, nir_loop *loop)
4336{
4337   nir_block *first_block = nir_loop_first_block(loop);
4338
4339   assert(nir_loop_last_block(loop)->successors[0]);
4340   assert(!nir_loop_last_block(loop)->successors[1]);
4341
4342   if (!emit_branch(ctx, first_block->index))
4343      return false;
4344
4345   if (!emit_cf_list(ctx, &loop->body))
4346      return false;
4347
4348   if (!emit_branch(ctx, first_block->index))
4349      return false;
4350
4351   return true;
4352}
4353
4354static bool
4355emit_cf_list(struct ntd_context *ctx, struct exec_list *list)
4356{
4357   foreach_list_typed(nir_cf_node, node, node, list) {
4358      switch (node->type) {
4359      case nir_cf_node_block:
4360         if (!emit_block(ctx, nir_cf_node_as_block(node)))
4361            return false;
4362         break;
4363
4364      case nir_cf_node_if:
4365         if (!emit_if(ctx, nir_cf_node_as_if(node)))
4366            return false;
4367         break;
4368
4369      case nir_cf_node_loop:
4370         if (!emit_loop(ctx, nir_cf_node_as_loop(node)))
4371            return false;
4372         break;
4373
4374      default:
4375         unreachable("unsupported cf-list node");
4376         break;
4377      }
4378   }
4379   return true;
4380}
4381
4382static void
4383insert_sorted_by_binding(struct exec_list *var_list, nir_variable *new_var)
4384{
4385   nir_foreach_variable_in_list(var, var_list) {
4386      if (var->data.binding > new_var->data.binding) {
4387         exec_node_insert_node_before(&var->node, &new_var->node);
4388         return;
4389      }
4390   }
4391   exec_list_push_tail(var_list, &new_var->node);
4392}
4393
4394
4395static void
4396sort_uniforms_by_binding_and_remove_structs(nir_shader *s)
4397{
4398   struct exec_list new_list;
4399   exec_list_make_empty(&new_list);
4400
4401   nir_foreach_variable_with_modes_safe(var, s, nir_var_uniform) {
4402      exec_node_remove(&var->node);
4403      const struct glsl_type *type = glsl_without_array(var->type);
4404      if (!glsl_type_is_struct(type))
4405         insert_sorted_by_binding(&new_list, var);
4406   }
4407   exec_list_append(&s->variables, &new_list);
4408}
4409
4410static void
4411prepare_phi_values(struct ntd_context *ctx)
4412{
4413   /* PHI nodes are difficult to get right when tracking the types:
4414    * Since the incoming sources are linked to blocks, we can't bitcast
4415    * on the fly while loading. So scan the shader and insert a typed dummy
4416    * value for each phi source, and when storing we convert if the incoming
4417    * value has a different type then the one expected by the phi node.
4418    * We choose int as default, because it supports more bit sizes.
4419    */
4420   nir_foreach_function(function, ctx->shader) {
4421      if (function->impl) {
4422         nir_foreach_block(block, function->impl) {
4423            nir_foreach_instr(instr, block) {
4424               if (instr->type == nir_instr_type_phi) {
4425                  nir_phi_instr *ir = nir_instr_as_phi(instr);
4426                  unsigned bitsize = nir_dest_bit_size(ir->dest);
4427                  const struct dxil_value *dummy = dxil_module_get_int_const(&ctx->mod, 0, bitsize);
4428                  nir_foreach_phi_src(src, ir) {
4429                     for(unsigned int i = 0; i < ir->dest.ssa.num_components; ++i)
4430                        store_ssa_def(ctx, src->src.ssa, i, dummy);
4431                  }
4432               }
4433            }
4434         }
4435      }
4436   }
4437}
4438
4439static bool
4440emit_cbvs(struct ntd_context *ctx)
4441{
4442   if (ctx->shader->info.stage == MESA_SHADER_KERNEL || ctx->opts->vulkan_environment) {
4443      nir_foreach_variable_with_modes(var, ctx->shader, nir_var_mem_ubo) {
4444         if (!emit_ubo_var(ctx, var))
4445            return false;
4446      }
4447   } else {
4448      for (int i = ctx->opts->ubo_binding_offset; i < ctx->shader->info.num_ubos; ++i) {
4449         char name[64];
4450         snprintf(name, sizeof(name), "__ubo%d", i);
4451         if (!emit_cbv(ctx, i, 0, 16384 /*4096 vec4's*/, 1, name))
4452            return false;
4453      }
4454   }
4455
4456   return true;
4457}
4458
4459static bool
4460emit_scratch(struct ntd_context *ctx)
4461{
4462   if (ctx->shader->scratch_size) {
4463      /*
4464       * We always allocate an u32 array, no matter the actual variable types.
4465       * According to the DXIL spec, the minimum load/store granularity is
4466       * 32-bit, anything smaller requires using a read-extract/read-write-modify
4467       * approach.
4468       */
4469      unsigned size = ALIGN_POT(ctx->shader->scratch_size, sizeof(uint32_t));
4470      const struct dxil_type *int32 = dxil_module_get_int_type(&ctx->mod, 32);
4471      const struct dxil_value *array_length = dxil_module_get_int32_const(&ctx->mod, size / sizeof(uint32_t));
4472      if (!int32 || !array_length)
4473         return false;
4474
4475      const struct dxil_type *type = dxil_module_get_array_type(
4476         &ctx->mod, int32, size / sizeof(uint32_t));
4477      if (!type)
4478         return false;
4479
4480      ctx->scratchvars = dxil_emit_alloca(&ctx->mod, type, int32, array_length, 4);
4481      if (!ctx->scratchvars)
4482         return false;
4483   }
4484
4485   return true;
4486}
4487
4488/* The validator complains if we don't have ops that reference a global variable. */
4489static bool
4490shader_has_shared_ops(struct nir_shader *s)
4491{
4492   nir_foreach_function(func, s) {
4493      if (!func->impl)
4494         continue;
4495      nir_foreach_block(block, func->impl) {
4496         nir_foreach_instr(instr, block) {
4497            if (instr->type != nir_instr_type_intrinsic)
4498               continue;
4499            nir_intrinsic_instr *intrin = nir_instr_as_intrinsic(instr);
4500            switch (intrin->intrinsic) {
4501            case nir_intrinsic_load_shared_dxil:
4502            case nir_intrinsic_store_shared_dxil:
4503            case nir_intrinsic_shared_atomic_add_dxil:
4504            case nir_intrinsic_shared_atomic_and_dxil:
4505            case nir_intrinsic_shared_atomic_comp_swap_dxil:
4506            case nir_intrinsic_shared_atomic_exchange_dxil:
4507            case nir_intrinsic_shared_atomic_imax_dxil:
4508            case nir_intrinsic_shared_atomic_imin_dxil:
4509            case nir_intrinsic_shared_atomic_or_dxil:
4510            case nir_intrinsic_shared_atomic_umax_dxil:
4511            case nir_intrinsic_shared_atomic_umin_dxil:
4512            case nir_intrinsic_shared_atomic_xor_dxil:
4513               return true;
4514            default: break;
4515            }
4516         }
4517      }
4518   }
4519   return false;
4520}
4521
4522static bool
4523emit_module(struct ntd_context *ctx, const struct nir_to_dxil_options *opts)
4524{
4525   /* The validator forces us to emit resources in a specific order:
4526    * CBVs, Samplers, SRVs, UAVs. While we are at it also remove
4527    * stale struct uniforms, they are lowered but might not have been removed */
4528   sort_uniforms_by_binding_and_remove_structs(ctx->shader);
4529
4530   /* CBVs */
4531   if (!emit_cbvs(ctx))
4532      return false;
4533
4534   /* Samplers */
4535   nir_foreach_variable_with_modes(var, ctx->shader, nir_var_uniform) {
4536      unsigned count = glsl_type_get_sampler_count(var->type);
4537      const struct glsl_type *without_array = glsl_without_array(var->type);
4538      if (var->data.mode == nir_var_uniform && glsl_type_is_sampler(without_array) &&
4539          glsl_get_sampler_result_type(without_array) == GLSL_TYPE_VOID) {
4540         if (!emit_sampler(ctx, var, count))
4541            return false;
4542      }
4543   }
4544
4545   /* SRVs */
4546   nir_foreach_variable_with_modes(var, ctx->shader, nir_var_uniform) {
4547      unsigned count = glsl_type_get_sampler_count(var->type);
4548      const struct glsl_type *without_array = glsl_without_array(var->type);
4549      if (var->data.mode == nir_var_uniform && glsl_type_is_sampler(without_array) &&
4550          glsl_get_sampler_result_type(without_array) != GLSL_TYPE_VOID) {
4551         if (!emit_srv(ctx, var, count))
4552            return false;
4553      }
4554   }
4555   /* Handle read-only SSBOs as SRVs */
4556   nir_foreach_variable_with_modes(var, ctx->shader, nir_var_mem_ssbo) {
4557      if ((var->data.access & ACCESS_NON_WRITEABLE) != 0) {
4558         unsigned count = 1;
4559         if (glsl_type_is_array(var->type))
4560            count = glsl_get_length(var->type);
4561         if (!emit_srv(ctx, var, count))
4562            return false;
4563      }
4564   }
4565
4566   if (ctx->shader->info.shared_size && shader_has_shared_ops(ctx->shader)) {
4567      const struct dxil_type *type;
4568      unsigned size;
4569
4570     /*
4571      * We always allocate an u32 array, no matter the actual variable types.
4572      * According to the DXIL spec, the minimum load/store granularity is
4573      * 32-bit, anything smaller requires using a read-extract/read-write-modify
4574      * approach. Non-atomic 64-bit accesses are allowed, but the
4575      * GEP(cast(gvar, u64[] *), offset) and cast(GEP(gvar, offset), u64 *))
4576      * sequences don't seem to be accepted by the DXIL validator when the
4577      * pointer is in the groupshared address space, making the 32-bit -> 64-bit
4578      * pointer cast impossible.
4579      */
4580      size = ALIGN_POT(ctx->shader->info.shared_size, sizeof(uint32_t));
4581      type = dxil_module_get_array_type(&ctx->mod,
4582                                        dxil_module_get_int_type(&ctx->mod, 32),
4583                                        size / sizeof(uint32_t));
4584      ctx->sharedvars = dxil_add_global_ptr_var(&ctx->mod, "shared", type,
4585                                                DXIL_AS_GROUPSHARED,
4586                                                ffs(sizeof(uint64_t)),
4587                                                NULL);
4588   }
4589
4590   if (!emit_scratch(ctx))
4591      return false;
4592
4593   /* UAVs */
4594   if (ctx->shader->info.stage == MESA_SHADER_KERNEL) {
4595      if (!emit_globals(ctx, opts->num_kernel_globals))
4596         return false;
4597
4598      ctx->consts = _mesa_pointer_hash_table_create(ctx->ralloc_ctx);
4599      if (!ctx->consts)
4600         return false;
4601      if (!emit_global_consts(ctx))
4602         return false;
4603   } else {
4604      /* Handle read/write SSBOs as UAVs */
4605      nir_foreach_variable_with_modes(var, ctx->shader, nir_var_mem_ssbo) {
4606         if ((var->data.access & ACCESS_NON_WRITEABLE) == 0) {
4607            unsigned count = 1;
4608            if (glsl_type_is_array(var->type))
4609               count = glsl_get_length(var->type);
4610            if (!emit_uav(ctx, var->data.binding, var->data.descriptor_set,
4611                        count, DXIL_COMP_TYPE_INVALID,
4612                        DXIL_RESOURCE_KIND_RAW_BUFFER, var->name))
4613               return false;
4614
4615         }
4616      }
4617   }
4618
4619   nir_foreach_variable_with_modes(var, ctx->shader, nir_var_uniform) {
4620      if (var->data.mode == nir_var_uniform && glsl_type_is_image(glsl_without_array(var->type))) {
4621         if (!emit_uav_var(ctx, var, glsl_type_get_image_count(var->type)))
4622            return false;
4623      }
4624   }
4625
4626   nir_function_impl *entry = nir_shader_get_entrypoint(ctx->shader);
4627   nir_metadata_require(entry, nir_metadata_block_index);
4628
4629   assert(entry->num_blocks > 0);
4630   ctx->mod.basic_block_ids = rzalloc_array(ctx->ralloc_ctx, int,
4631                                            entry->num_blocks);
4632   if (!ctx->mod.basic_block_ids)
4633      return false;
4634
4635   for (int i = 0; i < entry->num_blocks; ++i)
4636      ctx->mod.basic_block_ids[i] = -1;
4637   ctx->mod.num_basic_block_ids = entry->num_blocks;
4638
4639   ctx->defs = rzalloc_array(ctx->ralloc_ctx, struct dxil_def,
4640                             entry->ssa_alloc);
4641   if (!ctx->defs)
4642      return false;
4643   ctx->num_defs = entry->ssa_alloc;
4644
4645   ctx->phis = _mesa_pointer_hash_table_create(ctx->ralloc_ctx);
4646   if (!ctx->phis)
4647      return false;
4648
4649   prepare_phi_values(ctx);
4650
4651   if (!emit_cf_list(ctx, &entry->body))
4652      return false;
4653
4654   hash_table_foreach(ctx->phis, entry) {
4655      fixup_phi(ctx, (nir_phi_instr *)entry->key,
4656                (struct phi_block *)entry->data);
4657   }
4658
4659   if (!dxil_emit_ret_void(&ctx->mod))
4660      return false;
4661
4662   if (ctx->shader->info.stage == MESA_SHADER_FRAGMENT) {
4663      nir_foreach_variable_with_modes(var, ctx->shader, nir_var_shader_out) {
4664         if (var->data.location == FRAG_RESULT_STENCIL) {
4665            ctx->mod.feats.stencil_ref = true;
4666         }
4667      }
4668   }
4669
4670   if (ctx->mod.feats.native_low_precision)
4671      ctx->mod.minor_version = MAX2(ctx->mod.minor_version, 2);
4672
4673   return emit_metadata(ctx) &&
4674          dxil_emit_module(&ctx->mod);
4675}
4676
4677static unsigned int
4678get_dxil_shader_kind(struct nir_shader *s)
4679{
4680   switch (s->info.stage) {
4681   case MESA_SHADER_VERTEX:
4682      return DXIL_VERTEX_SHADER;
4683   case MESA_SHADER_GEOMETRY:
4684      return DXIL_GEOMETRY_SHADER;
4685   case MESA_SHADER_FRAGMENT:
4686      return DXIL_PIXEL_SHADER;
4687   case MESA_SHADER_KERNEL:
4688   case MESA_SHADER_COMPUTE:
4689      return DXIL_COMPUTE_SHADER;
4690   default:
4691      unreachable("unknown shader stage in nir_to_dxil");
4692      return DXIL_COMPUTE_SHADER;
4693   }
4694}
4695
4696static unsigned
4697lower_bit_size_callback(const nir_instr* instr, void *data)
4698{
4699   if (instr->type != nir_instr_type_alu)
4700      return 0;
4701   const nir_alu_instr *alu = nir_instr_as_alu(instr);
4702
4703   if (nir_op_infos[alu->op].is_conversion)
4704      return 0;
4705
4706   unsigned num_inputs = nir_op_infos[alu->op].num_inputs;
4707   const struct nir_to_dxil_options *opts = (const struct nir_to_dxil_options*)data;
4708   unsigned min_bit_size = opts->lower_int16 ? 32 : 16;
4709
4710   unsigned ret = 0;
4711   for (unsigned i = 0; i < num_inputs; i++) {
4712      unsigned bit_size = nir_src_bit_size(alu->src[i].src);
4713      if (bit_size != 1 && bit_size < min_bit_size)
4714         ret = min_bit_size;
4715   }
4716
4717   return ret;
4718}
4719
4720static void
4721optimize_nir(struct nir_shader *s, const struct nir_to_dxil_options *opts)
4722{
4723   bool progress;
4724   do {
4725      progress = false;
4726      NIR_PASS_V(s, nir_lower_vars_to_ssa);
4727      NIR_PASS(progress, s, nir_lower_indirect_derefs, nir_var_function_temp, UINT32_MAX);
4728      NIR_PASS(progress, s, nir_lower_alu_to_scalar, NULL, NULL);
4729      NIR_PASS(progress, s, nir_copy_prop);
4730      NIR_PASS(progress, s, nir_opt_copy_prop_vars);
4731      NIR_PASS(progress, s, nir_lower_bit_size, lower_bit_size_callback, (void*)opts);
4732      NIR_PASS(progress, s, dxil_nir_lower_8bit_conv);
4733      if (opts->lower_int16)
4734         NIR_PASS(progress, s, dxil_nir_lower_16bit_conv);
4735      NIR_PASS(progress, s, nir_opt_remove_phis);
4736      NIR_PASS(progress, s, nir_opt_dce);
4737      NIR_PASS(progress, s, nir_opt_if, true);
4738      NIR_PASS(progress, s, nir_opt_dead_cf);
4739      NIR_PASS(progress, s, nir_opt_cse);
4740      NIR_PASS(progress, s, nir_opt_peephole_select, 8, true, true);
4741      NIR_PASS(progress, s, nir_opt_algebraic);
4742      NIR_PASS(progress, s, dxil_nir_lower_x2b);
4743      if (s->options->lower_int64_options)
4744         NIR_PASS(progress, s, nir_lower_int64);
4745      NIR_PASS(progress, s, nir_lower_alu);
4746      NIR_PASS(progress, s, dxil_nir_lower_inot);
4747      NIR_PASS(progress, s, nir_opt_constant_folding);
4748      NIR_PASS(progress, s, nir_opt_undef);
4749      NIR_PASS(progress, s, nir_lower_undef_to_zero);
4750      NIR_PASS(progress, s, nir_opt_deref);
4751      NIR_PASS(progress, s, dxil_nir_lower_upcast_phis, opts->lower_int16 ? 32 : 16);
4752      NIR_PASS(progress, s, nir_lower_64bit_phis);
4753      NIR_PASS_V(s, nir_lower_system_values);
4754   } while (progress);
4755
4756   do {
4757      progress = false;
4758      NIR_PASS(progress, s, nir_opt_algebraic_late);
4759   } while (progress);
4760}
4761
4762static
4763void dxil_fill_validation_state(struct ntd_context *ctx,
4764                                struct dxil_validation_state *state)
4765{
4766   state->num_resources = util_dynarray_num_elements(&ctx->resources, struct dxil_resource);
4767   state->resources = (struct dxil_resource*)ctx->resources.data;
4768   state->state.psv0.max_expected_wave_lane_count = UINT_MAX;
4769   state->state.shader_stage = (uint8_t)ctx->mod.shader_kind;
4770   state->state.sig_input_elements = (uint8_t)ctx->mod.num_sig_inputs;
4771   state->state.sig_output_elements = (uint8_t)ctx->mod.num_sig_outputs;
4772   //state->state.sig_patch_const_or_prim_elements = 0;
4773
4774   switch (ctx->mod.shader_kind) {
4775   case DXIL_VERTEX_SHADER:
4776      state->state.psv0.vs.output_position_present = ctx->mod.info.has_out_position;
4777      break;
4778   case DXIL_PIXEL_SHADER:
4779      /* TODO: handle depth outputs */
4780      state->state.psv0.ps.depth_output = ctx->mod.info.has_out_depth;
4781      state->state.psv0.ps.sample_frequency =
4782         ctx->mod.info.has_per_sample_input;
4783      break;
4784   case DXIL_COMPUTE_SHADER:
4785      break;
4786   case DXIL_GEOMETRY_SHADER:
4787      state->state.max_vertex_count = ctx->shader->info.gs.vertices_out;
4788      state->state.psv0.gs.input_primitive = dxil_get_input_primitive(ctx->shader->info.gs.input_primitive);
4789      state->state.psv0.gs.output_toplology = dxil_get_primitive_topology(ctx->shader->info.gs.output_primitive);
4790      state->state.psv0.gs.output_stream_mask = ctx->shader->info.gs.active_stream_mask;
4791      state->state.psv0.gs.output_position_present = ctx->mod.info.has_out_position;
4792      break;
4793   default:
4794      assert(0 && "Shader type not (yet) supported");
4795   }
4796}
4797
4798static nir_variable *
4799add_sysvalue(struct ntd_context *ctx,
4800              uint8_t value, char *name,
4801              int driver_location)
4802{
4803
4804   nir_variable *var = rzalloc(ctx->shader, nir_variable);
4805   if (!var)
4806      return NULL;
4807   var->data.driver_location = driver_location;
4808   var->data.location = value;
4809   var->type = glsl_uint_type();
4810   var->name = name;
4811   var->data.mode = nir_var_system_value;
4812   var->data.interpolation = INTERP_MODE_FLAT;
4813   return var;
4814}
4815
4816static bool
4817append_input_or_sysvalue(struct ntd_context *ctx,
4818                         int input_loc,  int sv_slot,
4819                         char *name, int driver_location)
4820{
4821   if (input_loc >= 0) {
4822      /* Check inputs whether a variable is available the corresponds
4823       * to the sysvalue */
4824      nir_foreach_variable_with_modes(var, ctx->shader, nir_var_shader_in) {
4825         if (var->data.location == input_loc) {
4826            ctx->system_value[sv_slot] = var;
4827            return true;
4828         }
4829      }
4830   }
4831
4832   ctx->system_value[sv_slot] = add_sysvalue(ctx, sv_slot, name, driver_location);
4833   if (!ctx->system_value[sv_slot])
4834      return false;
4835
4836   nir_shader_add_variable(ctx->shader, ctx->system_value[sv_slot]);
4837   return true;
4838}
4839
4840struct sysvalue_name {
4841   gl_system_value value;
4842   int slot;
4843   char *name;
4844} possible_sysvalues[] = {
4845   {SYSTEM_VALUE_VERTEX_ID_ZERO_BASE, -1, "SV_VertexID"},
4846   {SYSTEM_VALUE_INSTANCE_ID, -1, "SV_InstanceID"},
4847   {SYSTEM_VALUE_FRONT_FACE, VARYING_SLOT_FACE, "SV_IsFrontFace"},
4848   {SYSTEM_VALUE_PRIMITIVE_ID, VARYING_SLOT_PRIMITIVE_ID, "SV_PrimitiveID"},
4849   {SYSTEM_VALUE_SAMPLE_ID, -1, "SV_SampleIndex"},
4850};
4851
4852static bool
4853allocate_sysvalues(struct ntd_context *ctx)
4854{
4855   unsigned driver_location = 0;
4856   nir_foreach_variable_with_modes(var, ctx->shader, nir_var_shader_in)
4857      driver_location++;
4858   nir_foreach_variable_with_modes(var, ctx->shader, nir_var_system_value)
4859      driver_location++;
4860
4861   for (unsigned i = 0; i < ARRAY_SIZE(possible_sysvalues); ++i) {
4862      struct sysvalue_name *info = &possible_sysvalues[i];
4863      if (BITSET_TEST(ctx->shader->info.system_values_read, info->value)) {
4864         if (!append_input_or_sysvalue(ctx, info->slot,
4865                                       info->value, info->name,
4866                                       driver_location++))
4867            return false;
4868      }
4869   }
4870   return true;
4871}
4872
4873bool
4874nir_to_dxil(struct nir_shader *s, const struct nir_to_dxil_options *opts,
4875            struct blob *blob)
4876{
4877   assert(opts);
4878   bool retval = true;
4879   debug_dxil = (int)debug_get_option_debug_dxil();
4880   blob_init(blob);
4881
4882   struct ntd_context *ctx = calloc(1, sizeof(*ctx));
4883   if (!ctx)
4884      return false;
4885
4886   ctx->opts = opts;
4887   ctx->shader = s;
4888
4889   ctx->ralloc_ctx = ralloc_context(NULL);
4890   if (!ctx->ralloc_ctx) {
4891      retval = false;
4892      goto out;
4893   }
4894
4895   util_dynarray_init(&ctx->srv_metadata_nodes, ctx->ralloc_ctx);
4896   util_dynarray_init(&ctx->uav_metadata_nodes, ctx->ralloc_ctx);
4897   util_dynarray_init(&ctx->cbv_metadata_nodes, ctx->ralloc_ctx);
4898   util_dynarray_init(&ctx->sampler_metadata_nodes, ctx->ralloc_ctx);
4899   util_dynarray_init(&ctx->resources, ctx->ralloc_ctx);
4900   dxil_module_init(&ctx->mod, ctx->ralloc_ctx);
4901   ctx->mod.shader_kind = get_dxil_shader_kind(s);
4902   ctx->mod.major_version = 6;
4903   ctx->mod.minor_version = 1;
4904
4905   NIR_PASS_V(s, nir_lower_pack);
4906   NIR_PASS_V(s, nir_lower_frexp);
4907   NIR_PASS_V(s, nir_lower_flrp, 16 | 32 | 64, true);
4908
4909   optimize_nir(s, opts);
4910
4911   NIR_PASS_V(s, nir_remove_dead_variables,
4912              nir_var_function_temp | nir_var_shader_temp, NULL);
4913
4914   if (!allocate_sysvalues(ctx))
4915      return false;
4916
4917   if (debug_dxil & DXIL_DEBUG_VERBOSE)
4918      nir_print_shader(s, stderr);
4919
4920   if (!emit_module(ctx, opts)) {
4921      debug_printf("D3D12: dxil_container_add_module failed\n");
4922      retval = false;
4923      goto out;
4924   }
4925
4926   if (debug_dxil & DXIL_DEBUG_DUMP_MODULE) {
4927      struct dxil_dumper *dumper = dxil_dump_create();
4928      dxil_dump_module(dumper, &ctx->mod);
4929      fprintf(stderr, "\n");
4930      dxil_dump_buf_to_file(dumper, stderr);
4931      fprintf(stderr, "\n\n");
4932      dxil_dump_free(dumper);
4933   }
4934
4935   struct dxil_container container;
4936   dxil_container_init(&container);
4937   if (!dxil_container_add_features(&container, &ctx->mod.feats)) {
4938      debug_printf("D3D12: dxil_container_add_features failed\n");
4939      retval = false;
4940      goto out;
4941   }
4942
4943   if (!dxil_container_add_io_signature(&container,
4944                                        DXIL_ISG1,
4945                                        ctx->mod.num_sig_inputs,
4946                                        ctx->mod.inputs)) {
4947      debug_printf("D3D12: failed to write input signature\n");
4948      retval = false;
4949      goto out;
4950   }
4951
4952   if (!dxil_container_add_io_signature(&container,
4953                                        DXIL_OSG1,
4954                                        ctx->mod.num_sig_outputs,
4955                                        ctx->mod.outputs)) {
4956      debug_printf("D3D12: failed to write output signature\n");
4957      retval = false;
4958      goto out;
4959   }
4960
4961   struct dxil_validation_state validation_state;
4962   memset(&validation_state, 0, sizeof(validation_state));
4963   dxil_fill_validation_state(ctx, &validation_state);
4964
4965   if (!dxil_container_add_state_validation(&container,&ctx->mod,
4966                                            &validation_state)) {
4967      debug_printf("D3D12: failed to write state-validation\n");
4968      retval = false;
4969      goto out;
4970   }
4971
4972   if (!dxil_container_add_module(&container, &ctx->mod)) {
4973      debug_printf("D3D12: failed to write module\n");
4974      retval = false;
4975      goto out;
4976   }
4977
4978   if (!dxil_container_write(&container, blob)) {
4979      debug_printf("D3D12: dxil_container_write failed\n");
4980      retval = false;
4981      goto out;
4982   }
4983   dxil_container_finish(&container);
4984
4985   if (debug_dxil & DXIL_DEBUG_DUMP_BLOB) {
4986      static int shader_id = 0;
4987      char buffer[64];
4988      snprintf(buffer, sizeof(buffer), "shader_%s_%d.blob",
4989               get_shader_kind_str(ctx->mod.shader_kind), shader_id++);
4990      debug_printf("Try to write blob to %s\n", buffer);
4991      FILE *f = fopen(buffer, "wb");
4992      if (f) {
4993         fwrite(blob->data, 1, blob->size, f);
4994         fclose(f);
4995      }
4996   }
4997
4998out:
4999   dxil_module_release(&ctx->mod);
5000   ralloc_free(ctx->ralloc_ctx);
5001   free(ctx);
5002   return retval;
5003}
5004
5005enum dxil_sysvalue_type
5006nir_var_to_dxil_sysvalue_type(nir_variable *var, uint64_t other_stage_mask)
5007{
5008   switch (var->data.location) {
5009   case VARYING_SLOT_FACE:
5010      return DXIL_GENERATED_SYSVALUE;
5011   case VARYING_SLOT_POS:
5012   case VARYING_SLOT_PRIMITIVE_ID:
5013   case VARYING_SLOT_CLIP_DIST0:
5014   case VARYING_SLOT_CLIP_DIST1:
5015   case VARYING_SLOT_PSIZ:
5016      if (!((1ull << var->data.location) & other_stage_mask))
5017         return DXIL_SYSVALUE;
5018      FALLTHROUGH;
5019   default:
5020      return DXIL_NO_SYSVALUE;
5021   }
5022}
5023