1/* 2 * Copyright © Microsoft Corporation 3 * 4 * Permission is hereby granted, free of charge, to any person obtaining a 5 * copy of this software and associated documentation files (the "Software"), 6 * to deal in the Software without restriction, including without limitation 7 * the rights to use, copy, modify, merge, publish, distribute, sublicense, 8 * and/or sell copies of the Software, and to permit persons to whom the 9 * Software is furnished to do so, subject to the following conditions: 10 * 11 * The above copyright notice and this permission notice (including the next 12 * paragraph) shall be included in all copies or substantial portions of the 13 * Software. 14 * 15 * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR 16 * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, 17 * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL 18 * THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER 19 * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING 20 * FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS 21 * IN THE SOFTWARE. 22 */ 23 24#include "nir_to_dxil.h" 25 26#include "dxil_container.h" 27#include "dxil_dump.h" 28#include "dxil_enums.h" 29#include "dxil_function.h" 30#include "dxil_module.h" 31#include "dxil_nir.h" 32#include "dxil_signature.h" 33 34#include "nir/nir_builder.h" 35#include "util/u_debug.h" 36#include "util/u_dynarray.h" 37#include "util/u_math.h" 38 39#include "git_sha1.h" 40 41#include "vulkan/vulkan_core.h" 42 43#include <stdint.h> 44 45int debug_dxil = 0; 46 47static const struct debug_named_value 48dxil_debug_options[] = { 49 { "verbose", DXIL_DEBUG_VERBOSE, NULL }, 50 { "dump_blob", DXIL_DEBUG_DUMP_BLOB , "Write shader blobs" }, 51 { "trace", DXIL_DEBUG_TRACE , "Trace instruction conversion" }, 52 { "dump_module", DXIL_DEBUG_DUMP_MODULE, "dump module tree to stderr"}, 53 DEBUG_NAMED_VALUE_END 54}; 55 56DEBUG_GET_ONCE_FLAGS_OPTION(debug_dxil, "DXIL_DEBUG", dxil_debug_options, 0) 57 58#define NIR_INSTR_UNSUPPORTED(instr) \ 59 if (debug_dxil & DXIL_DEBUG_VERBOSE) \ 60 do { \ 61 fprintf(stderr, "Unsupported instruction:"); \ 62 nir_print_instr(instr, stderr); \ 63 fprintf(stderr, "\n"); \ 64 } while (0) 65 66#define TRACE_CONVERSION(instr) \ 67 if (debug_dxil & DXIL_DEBUG_TRACE) \ 68 do { \ 69 fprintf(stderr, "Convert '"); \ 70 nir_print_instr(instr, stderr); \ 71 fprintf(stderr, "'\n"); \ 72 } while (0) 73 74static const nir_shader_compiler_options 75nir_options = { 76 .lower_ineg = true, 77 .lower_fneg = true, 78 .lower_ffma16 = true, 79 .lower_ffma32 = true, 80 .lower_isign = true, 81 .lower_fsign = true, 82 .lower_iabs = true, 83 .lower_fmod = true, 84 .lower_fpow = true, 85 .lower_scmp = true, 86 .lower_ldexp = true, 87 .lower_flrp16 = true, 88 .lower_flrp32 = true, 89 .lower_flrp64 = true, 90 .lower_bitfield_extract_to_shifts = true, 91 .lower_extract_word = true, 92 .lower_extract_byte = true, 93 .lower_insert_word = true, 94 .lower_insert_byte = true, 95 .lower_all_io_to_elements = true, 96 .lower_all_io_to_temps = true, 97 .lower_hadd = true, 98 .lower_uadd_sat = true, 99 .lower_iadd_sat = true, 100 .lower_uadd_carry = true, 101 .lower_mul_high = true, 102 .lower_rotate = true, 103 .lower_pack_64_2x32_split = true, 104 .lower_pack_32_2x16_split = true, 105 .lower_unpack_64_2x32_split = true, 106 .lower_unpack_32_2x16_split = true, 107 .has_fsub = true, 108 .has_isub = true, 109 .use_scoped_barrier = true, 110 .vertex_id_zero_based = true, 111 .lower_base_vertex = true, 112 .has_cs_global_id = true, 113 .has_txs = true, 114}; 115 116const nir_shader_compiler_options* 117dxil_get_nir_compiler_options(void) 118{ 119 return &nir_options; 120} 121 122static bool 123emit_llvm_ident(struct dxil_module *m) 124{ 125 const struct dxil_mdnode *compiler = dxil_get_metadata_string(m, "Mesa version " PACKAGE_VERSION MESA_GIT_SHA1); 126 if (!compiler) 127 return false; 128 129 const struct dxil_mdnode *llvm_ident = dxil_get_metadata_node(m, &compiler, 1); 130 return llvm_ident && 131 dxil_add_metadata_named_node(m, "llvm.ident", &llvm_ident, 1); 132} 133 134static bool 135emit_named_version(struct dxil_module *m, const char *name, 136 int major, int minor) 137{ 138 const struct dxil_mdnode *major_node = dxil_get_metadata_int32(m, major); 139 const struct dxil_mdnode *minor_node = dxil_get_metadata_int32(m, minor); 140 const struct dxil_mdnode *version_nodes[] = { major_node, minor_node }; 141 const struct dxil_mdnode *version = dxil_get_metadata_node(m, version_nodes, 142 ARRAY_SIZE(version_nodes)); 143 return dxil_add_metadata_named_node(m, name, &version, 1); 144} 145 146static const char * 147get_shader_kind_str(enum dxil_shader_kind kind) 148{ 149 switch (kind) { 150 case DXIL_PIXEL_SHADER: 151 return "ps"; 152 case DXIL_VERTEX_SHADER: 153 return "vs"; 154 case DXIL_GEOMETRY_SHADER: 155 return "gs"; 156 case DXIL_HULL_SHADER: 157 return "hs"; 158 case DXIL_DOMAIN_SHADER: 159 return "ds"; 160 case DXIL_COMPUTE_SHADER: 161 return "cs"; 162 default: 163 unreachable("invalid shader kind"); 164 } 165} 166 167static bool 168emit_dx_shader_model(struct dxil_module *m) 169{ 170 const struct dxil_mdnode *type_node = dxil_get_metadata_string(m, get_shader_kind_str(m->shader_kind)); 171 const struct dxil_mdnode *major_node = dxil_get_metadata_int32(m, m->major_version); 172 const struct dxil_mdnode *minor_node = dxil_get_metadata_int32(m, m->minor_version); 173 const struct dxil_mdnode *shader_model[] = { type_node, major_node, 174 minor_node }; 175 const struct dxil_mdnode *dx_shader_model = dxil_get_metadata_node(m, shader_model, ARRAY_SIZE(shader_model)); 176 177 return dxil_add_metadata_named_node(m, "dx.shaderModel", 178 &dx_shader_model, 1); 179} 180 181enum { 182 DXIL_TYPED_BUFFER_ELEMENT_TYPE_TAG = 0, 183 DXIL_STRUCTURED_BUFFER_ELEMENT_STRIDE_TAG = 1 184}; 185 186enum dxil_intr { 187 DXIL_INTR_LOAD_INPUT = 4, 188 DXIL_INTR_STORE_OUTPUT = 5, 189 DXIL_INTR_FABS = 6, 190 DXIL_INTR_SATURATE = 7, 191 192 DXIL_INTR_ISFINITE = 10, 193 DXIL_INTR_ISNORMAL = 11, 194 195 DXIL_INTR_FCOS = 12, 196 DXIL_INTR_FSIN = 13, 197 198 DXIL_INTR_FEXP2 = 21, 199 DXIL_INTR_FRC = 22, 200 DXIL_INTR_FLOG2 = 23, 201 202 DXIL_INTR_SQRT = 24, 203 DXIL_INTR_RSQRT = 25, 204 DXIL_INTR_ROUND_NE = 26, 205 DXIL_INTR_ROUND_NI = 27, 206 DXIL_INTR_ROUND_PI = 28, 207 DXIL_INTR_ROUND_Z = 29, 208 209 DXIL_INTR_COUNTBITS = 31, 210 DXIL_INTR_FIRSTBIT_HI = 33, 211 212 DXIL_INTR_FMAX = 35, 213 DXIL_INTR_FMIN = 36, 214 DXIL_INTR_IMAX = 37, 215 DXIL_INTR_IMIN = 38, 216 DXIL_INTR_UMAX = 39, 217 DXIL_INTR_UMIN = 40, 218 219 DXIL_INTR_FMA = 47, 220 221 DXIL_INTR_CREATE_HANDLE = 57, 222 DXIL_INTR_CBUFFER_LOAD_LEGACY = 59, 223 224 DXIL_INTR_SAMPLE = 60, 225 DXIL_INTR_SAMPLE_BIAS = 61, 226 DXIL_INTR_SAMPLE_LEVEL = 62, 227 DXIL_INTR_SAMPLE_GRAD = 63, 228 DXIL_INTR_SAMPLE_CMP = 64, 229 DXIL_INTR_SAMPLE_CMP_LVL_ZERO = 65, 230 231 DXIL_INTR_TEXTURE_LOAD = 66, 232 DXIL_INTR_TEXTURE_STORE = 67, 233 234 DXIL_INTR_BUFFER_LOAD = 68, 235 DXIL_INTR_BUFFER_STORE = 69, 236 237 DXIL_INTR_TEXTURE_SIZE = 72, 238 239 DXIL_INTR_ATOMIC_BINOP = 78, 240 DXIL_INTR_ATOMIC_CMPXCHG = 79, 241 DXIL_INTR_BARRIER = 80, 242 DXIL_INTR_TEXTURE_LOD = 81, 243 244 DXIL_INTR_DISCARD = 82, 245 DXIL_INTR_DDX_COARSE = 83, 246 DXIL_INTR_DDY_COARSE = 84, 247 DXIL_INTR_DDX_FINE = 85, 248 DXIL_INTR_DDY_FINE = 86, 249 250 DXIL_INTR_SAMPLE_INDEX = 90, 251 252 DXIL_INTR_THREAD_ID = 93, 253 DXIL_INTR_GROUP_ID = 94, 254 DXIL_INTR_THREAD_ID_IN_GROUP = 95, 255 DXIL_INTR_FLATTENED_THREAD_ID_IN_GROUP = 96, 256 257 DXIL_INTR_EMIT_STREAM = 97, 258 DXIL_INTR_CUT_STREAM = 98, 259 260 DXIL_INTR_MAKE_DOUBLE = 101, 261 DXIL_INTR_SPLIT_DOUBLE = 102, 262 263 DXIL_INTR_PRIMITIVE_ID = 108, 264 265 DXIL_INTR_LEGACY_F32TOF16 = 130, 266 DXIL_INTR_LEGACY_F16TOF32 = 131, 267 268 DXIL_INTR_ATTRIBUTE_AT_VERTEX = 137, 269}; 270 271enum dxil_atomic_op { 272 DXIL_ATOMIC_ADD = 0, 273 DXIL_ATOMIC_AND = 1, 274 DXIL_ATOMIC_OR = 2, 275 DXIL_ATOMIC_XOR = 3, 276 DXIL_ATOMIC_IMIN = 4, 277 DXIL_ATOMIC_IMAX = 5, 278 DXIL_ATOMIC_UMIN = 6, 279 DXIL_ATOMIC_UMAX = 7, 280 DXIL_ATOMIC_EXCHANGE = 8, 281}; 282 283typedef struct { 284 unsigned id; 285 unsigned binding; 286 unsigned size; 287 unsigned space; 288} resource_array_layout; 289 290static void 291fill_resource_metadata(struct dxil_module *m, const struct dxil_mdnode **fields, 292 const struct dxil_type *struct_type, 293 const char *name, const resource_array_layout *layout) 294{ 295 const struct dxil_type *pointer_type = dxil_module_get_pointer_type(m, struct_type); 296 const struct dxil_value *pointer_undef = dxil_module_get_undef(m, pointer_type); 297 298 fields[0] = dxil_get_metadata_int32(m, layout->id); // resource ID 299 fields[1] = dxil_get_metadata_value(m, pointer_type, pointer_undef); // global constant symbol 300 fields[2] = dxil_get_metadata_string(m, name ? name : ""); // name 301 fields[3] = dxil_get_metadata_int32(m, layout->space); // space ID 302 fields[4] = dxil_get_metadata_int32(m, layout->binding); // lower bound 303 fields[5] = dxil_get_metadata_int32(m, layout->size); // range size 304} 305 306static const struct dxil_mdnode * 307emit_srv_metadata(struct dxil_module *m, const struct dxil_type *elem_type, 308 const char *name, const resource_array_layout *layout, 309 enum dxil_component_type comp_type, 310 enum dxil_resource_kind res_kind) 311{ 312 const struct dxil_mdnode *fields[9]; 313 314 const struct dxil_mdnode *metadata_tag_nodes[2]; 315 316 fill_resource_metadata(m, fields, elem_type, name, layout); 317 fields[6] = dxil_get_metadata_int32(m, res_kind); // resource shape 318 fields[7] = dxil_get_metadata_int1(m, 0); // sample count 319 if (res_kind != DXIL_RESOURCE_KIND_RAW_BUFFER && 320 res_kind != DXIL_RESOURCE_KIND_STRUCTURED_BUFFER) { 321 metadata_tag_nodes[0] = dxil_get_metadata_int32(m, DXIL_TYPED_BUFFER_ELEMENT_TYPE_TAG); 322 metadata_tag_nodes[1] = dxil_get_metadata_int32(m, comp_type); 323 fields[8] = dxil_get_metadata_node(m, metadata_tag_nodes, ARRAY_SIZE(metadata_tag_nodes)); // metadata 324 } else if (res_kind == DXIL_RESOURCE_KIND_RAW_BUFFER) 325 fields[8] = NULL; 326 else 327 unreachable("Structured buffers not supported yet"); 328 329 return dxil_get_metadata_node(m, fields, ARRAY_SIZE(fields)); 330} 331 332static const struct dxil_mdnode * 333emit_uav_metadata(struct dxil_module *m, const struct dxil_type *struct_type, 334 const char *name, const resource_array_layout *layout, 335 enum dxil_component_type comp_type, 336 enum dxil_resource_kind res_kind) 337{ 338 const struct dxil_mdnode *fields[11]; 339 340 const struct dxil_mdnode *metadata_tag_nodes[2]; 341 342 fill_resource_metadata(m, fields, struct_type, name, layout); 343 fields[6] = dxil_get_metadata_int32(m, res_kind); // resource shape 344 fields[7] = dxil_get_metadata_int1(m, false); // globally-coherent 345 fields[8] = dxil_get_metadata_int1(m, false); // has counter 346 fields[9] = dxil_get_metadata_int1(m, false); // is ROV 347 if (res_kind != DXIL_RESOURCE_KIND_RAW_BUFFER && 348 res_kind != DXIL_RESOURCE_KIND_STRUCTURED_BUFFER) { 349 metadata_tag_nodes[0] = dxil_get_metadata_int32(m, DXIL_TYPED_BUFFER_ELEMENT_TYPE_TAG); 350 metadata_tag_nodes[1] = dxil_get_metadata_int32(m, comp_type); 351 fields[10] = dxil_get_metadata_node(m, metadata_tag_nodes, ARRAY_SIZE(metadata_tag_nodes)); // metadata 352 } else if (res_kind == DXIL_RESOURCE_KIND_RAW_BUFFER) 353 fields[10] = NULL; 354 else 355 unreachable("Structured buffers not supported yet"); 356 357 return dxil_get_metadata_node(m, fields, ARRAY_SIZE(fields)); 358} 359 360static const struct dxil_mdnode * 361emit_cbv_metadata(struct dxil_module *m, const struct dxil_type *struct_type, 362 const char *name, const resource_array_layout *layout, 363 unsigned size) 364{ 365 const struct dxil_mdnode *fields[8]; 366 367 fill_resource_metadata(m, fields, struct_type, name, layout); 368 fields[6] = dxil_get_metadata_int32(m, size); // constant buffer size 369 fields[7] = NULL; // metadata 370 371 return dxil_get_metadata_node(m, fields, ARRAY_SIZE(fields)); 372} 373 374static const struct dxil_mdnode * 375emit_sampler_metadata(struct dxil_module *m, const struct dxil_type *struct_type, 376 nir_variable *var, const resource_array_layout *layout) 377{ 378 const struct dxil_mdnode *fields[8]; 379 const struct glsl_type *type = glsl_without_array(var->type); 380 381 fill_resource_metadata(m, fields, struct_type, var->name, layout); 382 fields[6] = dxil_get_metadata_int32(m, DXIL_SAMPLER_KIND_DEFAULT); // sampler kind 383 enum dxil_sampler_kind sampler_kind = glsl_sampler_type_is_shadow(type) ? 384 DXIL_SAMPLER_KIND_COMPARISON : DXIL_SAMPLER_KIND_DEFAULT; 385 fields[6] = dxil_get_metadata_int32(m, sampler_kind); // sampler kind 386 fields[7] = NULL; // metadata 387 388 return dxil_get_metadata_node(m, fields, ARRAY_SIZE(fields)); 389} 390 391 392#define MAX_SRVS 128 393#define MAX_UAVS 64 394#define MAX_CBVS 64 // ?? 395#define MAX_SAMPLERS 64 // ?? 396 397struct dxil_def { 398 const struct dxil_value *chans[NIR_MAX_VEC_COMPONENTS]; 399}; 400 401struct ntd_context { 402 void *ralloc_ctx; 403 const struct nir_to_dxil_options *opts; 404 struct nir_shader *shader; 405 406 struct dxil_module mod; 407 408 struct util_dynarray srv_metadata_nodes; 409 const struct dxil_value *srv_handles[MAX_SRVS]; 410 411 struct util_dynarray uav_metadata_nodes; 412 const struct dxil_value *uav_handles[MAX_UAVS]; 413 414 struct util_dynarray cbv_metadata_nodes; 415 const struct dxil_value *cbv_handles[MAX_CBVS]; 416 417 struct util_dynarray sampler_metadata_nodes; 418 const struct dxil_value *sampler_handles[MAX_SAMPLERS]; 419 420 struct util_dynarray resources; 421 422 const struct dxil_mdnode *shader_property_nodes[6]; 423 size_t num_shader_property_nodes; 424 425 struct dxil_def *defs; 426 unsigned num_defs; 427 struct hash_table *phis; 428 429 const struct dxil_value *sharedvars; 430 const struct dxil_value *scratchvars; 431 struct hash_table *consts; 432 433 nir_variable *ps_front_face; 434 nir_variable *system_value[SYSTEM_VALUE_MAX]; 435}; 436 437static const char* 438unary_func_name(enum dxil_intr intr) 439{ 440 switch (intr) { 441 case DXIL_INTR_COUNTBITS: 442 case DXIL_INTR_FIRSTBIT_HI: 443 return "dx.op.unaryBits"; 444 case DXIL_INTR_ISFINITE: 445 case DXIL_INTR_ISNORMAL: 446 return "dx.op.isSpecialFloat"; 447 default: 448 return "dx.op.unary"; 449 } 450} 451 452static const struct dxil_value * 453emit_unary_call(struct ntd_context *ctx, enum overload_type overload, 454 enum dxil_intr intr, 455 const struct dxil_value *op0) 456{ 457 const struct dxil_func *func = dxil_get_function(&ctx->mod, 458 unary_func_name(intr), 459 overload); 460 if (!func) 461 return NULL; 462 463 const struct dxil_value *opcode = dxil_module_get_int32_const(&ctx->mod, intr); 464 if (!opcode) 465 return NULL; 466 467 const struct dxil_value *args[] = { 468 opcode, 469 op0 470 }; 471 472 return dxil_emit_call(&ctx->mod, func, args, ARRAY_SIZE(args)); 473} 474 475static const struct dxil_value * 476emit_binary_call(struct ntd_context *ctx, enum overload_type overload, 477 enum dxil_intr intr, 478 const struct dxil_value *op0, const struct dxil_value *op1) 479{ 480 const struct dxil_func *func = dxil_get_function(&ctx->mod, "dx.op.binary", overload); 481 if (!func) 482 return NULL; 483 484 const struct dxil_value *opcode = dxil_module_get_int32_const(&ctx->mod, intr); 485 if (!opcode) 486 return NULL; 487 488 const struct dxil_value *args[] = { 489 opcode, 490 op0, 491 op1 492 }; 493 494 return dxil_emit_call(&ctx->mod, func, args, ARRAY_SIZE(args)); 495} 496 497static const struct dxil_value * 498emit_tertiary_call(struct ntd_context *ctx, enum overload_type overload, 499 enum dxil_intr intr, 500 const struct dxil_value *op0, 501 const struct dxil_value *op1, 502 const struct dxil_value *op2) 503{ 504 const struct dxil_func *func = dxil_get_function(&ctx->mod, "dx.op.tertiary", overload); 505 if (!func) 506 return NULL; 507 508 const struct dxil_value *opcode = dxil_module_get_int32_const(&ctx->mod, intr); 509 if (!opcode) 510 return NULL; 511 512 const struct dxil_value *args[] = { 513 opcode, 514 op0, 515 op1, 516 op2 517 }; 518 519 return dxil_emit_call(&ctx->mod, func, args, ARRAY_SIZE(args)); 520} 521 522static const struct dxil_value * 523emit_threadid_call(struct ntd_context *ctx, const struct dxil_value *comp) 524{ 525 const struct dxil_func *func = dxil_get_function(&ctx->mod, "dx.op.threadId", DXIL_I32); 526 if (!func) 527 return NULL; 528 529 const struct dxil_value *opcode = dxil_module_get_int32_const(&ctx->mod, 530 DXIL_INTR_THREAD_ID); 531 if (!opcode) 532 return NULL; 533 534 const struct dxil_value *args[] = { 535 opcode, 536 comp 537 }; 538 539 return dxil_emit_call(&ctx->mod, func, args, ARRAY_SIZE(args)); 540} 541 542static const struct dxil_value * 543emit_threadidingroup_call(struct ntd_context *ctx, 544 const struct dxil_value *comp) 545{ 546 const struct dxil_func *func = dxil_get_function(&ctx->mod, "dx.op.threadIdInGroup", DXIL_I32); 547 548 if (!func) 549 return NULL; 550 551 const struct dxil_value *opcode = dxil_module_get_int32_const(&ctx->mod, 552 DXIL_INTR_THREAD_ID_IN_GROUP); 553 if (!opcode) 554 return NULL; 555 556 const struct dxil_value *args[] = { 557 opcode, 558 comp 559 }; 560 561 return dxil_emit_call(&ctx->mod, func, args, ARRAY_SIZE(args)); 562} 563 564static const struct dxil_value * 565emit_flattenedthreadidingroup_call(struct ntd_context *ctx) 566{ 567 const struct dxil_func *func = dxil_get_function(&ctx->mod, "dx.op.flattenedThreadIdInGroup", DXIL_I32); 568 569 if (!func) 570 return NULL; 571 572 const struct dxil_value *opcode = dxil_module_get_int32_const(&ctx->mod, 573 DXIL_INTR_FLATTENED_THREAD_ID_IN_GROUP); 574 if (!opcode) 575 return NULL; 576 577 const struct dxil_value *args[] = { 578 opcode 579 }; 580 581 return dxil_emit_call(&ctx->mod, func, args, ARRAY_SIZE(args)); 582} 583 584static const struct dxil_value * 585emit_groupid_call(struct ntd_context *ctx, const struct dxil_value *comp) 586{ 587 const struct dxil_func *func = dxil_get_function(&ctx->mod, "dx.op.groupId", DXIL_I32); 588 589 if (!func) 590 return NULL; 591 592 const struct dxil_value *opcode = dxil_module_get_int32_const(&ctx->mod, 593 DXIL_INTR_GROUP_ID); 594 if (!opcode) 595 return NULL; 596 597 const struct dxil_value *args[] = { 598 opcode, 599 comp 600 }; 601 602 return dxil_emit_call(&ctx->mod, func, args, ARRAY_SIZE(args)); 603} 604 605static const struct dxil_value * 606emit_bufferload_call(struct ntd_context *ctx, 607 const struct dxil_value *handle, 608 const struct dxil_value *coord[2], 609 enum overload_type overload) 610{ 611 const struct dxil_func *func = dxil_get_function(&ctx->mod, "dx.op.bufferLoad", overload); 612 if (!func) 613 return NULL; 614 615 const struct dxil_value *opcode = dxil_module_get_int32_const(&ctx->mod, 616 DXIL_INTR_BUFFER_LOAD); 617 const struct dxil_value *args[] = { opcode, handle, coord[0], coord[1] }; 618 619 return dxil_emit_call(&ctx->mod, func, args, ARRAY_SIZE(args)); 620} 621 622static bool 623emit_bufferstore_call(struct ntd_context *ctx, 624 const struct dxil_value *handle, 625 const struct dxil_value *coord[2], 626 const struct dxil_value *value[4], 627 const struct dxil_value *write_mask, 628 enum overload_type overload) 629{ 630 const struct dxil_func *func = dxil_get_function(&ctx->mod, "dx.op.bufferStore", overload); 631 632 if (!func) 633 return false; 634 635 const struct dxil_value *opcode = dxil_module_get_int32_const(&ctx->mod, 636 DXIL_INTR_BUFFER_STORE); 637 const struct dxil_value *args[] = { 638 opcode, handle, coord[0], coord[1], 639 value[0], value[1], value[2], value[3], 640 write_mask 641 }; 642 643 return dxil_emit_call_void(&ctx->mod, func, 644 args, ARRAY_SIZE(args)); 645} 646 647static const struct dxil_value * 648emit_textureload_call(struct ntd_context *ctx, 649 const struct dxil_value *handle, 650 const struct dxil_value *coord[3], 651 enum overload_type overload) 652{ 653 const struct dxil_func *func = dxil_get_function(&ctx->mod, "dx.op.textureLoad", overload); 654 if (!func) 655 return NULL; 656 const struct dxil_type *int_type = dxil_module_get_int_type(&ctx->mod, 32); 657 const struct dxil_value *int_undef = dxil_module_get_undef(&ctx->mod, int_type); 658 659 const struct dxil_value *opcode = dxil_module_get_int32_const(&ctx->mod, 660 DXIL_INTR_TEXTURE_LOAD); 661 const struct dxil_value *args[] = { opcode, handle, 662 /*lod_or_sample*/ int_undef, 663 coord[0], coord[1], coord[2], 664 /* offsets */ int_undef, int_undef, int_undef}; 665 666 return dxil_emit_call(&ctx->mod, func, args, ARRAY_SIZE(args)); 667} 668 669static bool 670emit_texturestore_call(struct ntd_context *ctx, 671 const struct dxil_value *handle, 672 const struct dxil_value *coord[3], 673 const struct dxil_value *value[4], 674 const struct dxil_value *write_mask, 675 enum overload_type overload) 676{ 677 const struct dxil_func *func = dxil_get_function(&ctx->mod, "dx.op.textureStore", overload); 678 679 if (!func) 680 return false; 681 682 const struct dxil_value *opcode = dxil_module_get_int32_const(&ctx->mod, 683 DXIL_INTR_TEXTURE_STORE); 684 const struct dxil_value *args[] = { 685 opcode, handle, coord[0], coord[1], coord[2], 686 value[0], value[1], value[2], value[3], 687 write_mask 688 }; 689 690 return dxil_emit_call_void(&ctx->mod, func, 691 args, ARRAY_SIZE(args)); 692} 693 694static const struct dxil_value * 695emit_atomic_binop(struct ntd_context *ctx, 696 const struct dxil_value *handle, 697 enum dxil_atomic_op atomic_op, 698 const struct dxil_value *coord[3], 699 const struct dxil_value *value) 700{ 701 const struct dxil_func *func = dxil_get_function(&ctx->mod, "dx.op.atomicBinOp", DXIL_I32); 702 703 if (!func) 704 return false; 705 706 const struct dxil_value *opcode = 707 dxil_module_get_int32_const(&ctx->mod, DXIL_INTR_ATOMIC_BINOP); 708 const struct dxil_value *atomic_op_value = 709 dxil_module_get_int32_const(&ctx->mod, atomic_op); 710 const struct dxil_value *args[] = { 711 opcode, handle, atomic_op_value, 712 coord[0], coord[1], coord[2], value 713 }; 714 715 return dxil_emit_call(&ctx->mod, func, args, ARRAY_SIZE(args)); 716} 717 718static const struct dxil_value * 719emit_atomic_cmpxchg(struct ntd_context *ctx, 720 const struct dxil_value *handle, 721 const struct dxil_value *coord[3], 722 const struct dxil_value *cmpval, 723 const struct dxil_value *newval) 724{ 725 const struct dxil_func *func = 726 dxil_get_function(&ctx->mod, "dx.op.atomicCompareExchange", DXIL_I32); 727 728 if (!func) 729 return false; 730 731 const struct dxil_value *opcode = 732 dxil_module_get_int32_const(&ctx->mod, DXIL_INTR_ATOMIC_CMPXCHG); 733 const struct dxil_value *args[] = { 734 opcode, handle, coord[0], coord[1], coord[2], cmpval, newval 735 }; 736 737 return dxil_emit_call(&ctx->mod, func, args, ARRAY_SIZE(args)); 738} 739 740static const struct dxil_value * 741emit_createhandle_call(struct ntd_context *ctx, 742 enum dxil_resource_class resource_class, 743 unsigned resource_range_id, 744 const struct dxil_value *resource_range_index, 745 bool non_uniform_resource_index) 746{ 747 const struct dxil_value *opcode = dxil_module_get_int32_const(&ctx->mod, DXIL_INTR_CREATE_HANDLE); 748 const struct dxil_value *resource_class_value = dxil_module_get_int8_const(&ctx->mod, resource_class); 749 const struct dxil_value *resource_range_id_value = dxil_module_get_int32_const(&ctx->mod, resource_range_id); 750 const struct dxil_value *non_uniform_resource_index_value = dxil_module_get_int1_const(&ctx->mod, non_uniform_resource_index); 751 if (!opcode || !resource_class_value || !resource_range_id_value || 752 !non_uniform_resource_index_value) 753 return NULL; 754 755 const struct dxil_value *args[] = { 756 opcode, 757 resource_class_value, 758 resource_range_id_value, 759 resource_range_index, 760 non_uniform_resource_index_value 761 }; 762 763 const struct dxil_func *func = 764 dxil_get_function(&ctx->mod, "dx.op.createHandle", DXIL_NONE); 765 766 if (!func) 767 return NULL; 768 769 return dxil_emit_call(&ctx->mod, func, args, ARRAY_SIZE(args)); 770} 771 772static const struct dxil_value * 773emit_createhandle_call_const_index(struct ntd_context *ctx, 774 enum dxil_resource_class resource_class, 775 unsigned resource_range_id, 776 unsigned resource_range_index, 777 bool non_uniform_resource_index) 778{ 779 780 const struct dxil_value *resource_range_index_value = dxil_module_get_int32_const(&ctx->mod, resource_range_index); 781 if (!resource_range_index_value) 782 return NULL; 783 784 return emit_createhandle_call(ctx, resource_class, resource_range_id, 785 resource_range_index_value, 786 non_uniform_resource_index); 787} 788 789static void 790add_resource(struct ntd_context *ctx, enum dxil_resource_type type, 791 const resource_array_layout *layout) 792{ 793 struct dxil_resource *resource = util_dynarray_grow(&ctx->resources, struct dxil_resource, 1); 794 resource->resource_type = type; 795 resource->space = layout->space; 796 resource->lower_bound = layout->binding; 797 if (layout->size == 0 || (uint64_t)layout->size + layout->binding >= UINT_MAX) 798 resource->upper_bound = UINT_MAX; 799 else 800 resource->upper_bound = layout->binding + layout->size - 1; 801} 802 803static unsigned 804get_resource_id(struct ntd_context *ctx, enum dxil_resource_class class, 805 unsigned space, unsigned binding) 806{ 807 unsigned offset = 0; 808 unsigned count = 0; 809 810 unsigned num_srvs = util_dynarray_num_elements(&ctx->srv_metadata_nodes, const struct dxil_mdnode *); 811 unsigned num_uavs = util_dynarray_num_elements(&ctx->uav_metadata_nodes, const struct dxil_mdnode *); 812 unsigned num_cbvs = util_dynarray_num_elements(&ctx->cbv_metadata_nodes, const struct dxil_mdnode *); 813 unsigned num_samplers = util_dynarray_num_elements(&ctx->sampler_metadata_nodes, const struct dxil_mdnode *); 814 815 switch (class) { 816 case DXIL_RESOURCE_CLASS_UAV: 817 offset = num_srvs + num_samplers + num_cbvs; 818 count = num_uavs; 819 break; 820 case DXIL_RESOURCE_CLASS_SRV: 821 offset = num_samplers + num_cbvs; 822 count = num_srvs; 823 break; 824 case DXIL_RESOURCE_CLASS_SAMPLER: 825 offset = num_cbvs; 826 count = num_samplers; 827 break; 828 case DXIL_RESOURCE_CLASS_CBV: 829 offset = 0; 830 count = num_cbvs; 831 break; 832 } 833 834 assert(offset + count <= util_dynarray_num_elements(&ctx->resources, struct dxil_resource)); 835 for (unsigned i = offset; i < offset + count; ++i) { 836 const struct dxil_resource *resource = util_dynarray_element(&ctx->resources, struct dxil_resource, i); 837 if (resource->space == space && 838 resource->lower_bound <= binding && 839 resource->upper_bound >= binding) { 840 return i - offset; 841 } 842 } 843 844 unreachable("Resource access for undeclared range"); 845 return 0; 846} 847 848static bool 849emit_srv(struct ntd_context *ctx, nir_variable *var, unsigned count) 850{ 851 unsigned id = util_dynarray_num_elements(&ctx->srv_metadata_nodes, const struct dxil_mdnode *); 852 unsigned binding = var->data.binding; 853 resource_array_layout layout = {id, binding, count, var->data.descriptor_set}; 854 855 enum dxil_component_type comp_type; 856 enum dxil_resource_kind res_kind; 857 enum dxil_resource_type res_type; 858 if (var->data.mode == nir_var_mem_ssbo) { 859 comp_type = DXIL_COMP_TYPE_INVALID; 860 res_kind = DXIL_RESOURCE_KIND_RAW_BUFFER; 861 res_type = DXIL_RES_SRV_RAW; 862 } else { 863 comp_type = dxil_get_comp_type(var->type); 864 res_kind = dxil_get_resource_kind(var->type); 865 res_type = DXIL_RES_SRV_TYPED; 866 } 867 const struct dxil_type *res_type_as_type = dxil_module_get_res_type(&ctx->mod, res_kind, comp_type, false /* readwrite */); 868 const struct dxil_mdnode *srv_meta = emit_srv_metadata(&ctx->mod, res_type_as_type, var->name, 869 &layout, comp_type, res_kind); 870 871 if (!srv_meta) 872 return false; 873 874 util_dynarray_append(&ctx->srv_metadata_nodes, const struct dxil_mdnode *, srv_meta); 875 add_resource(ctx, res_type, &layout); 876 if (res_type == DXIL_RES_SRV_RAW) 877 ctx->mod.raw_and_structured_buffers = true; 878 879 if (!ctx->opts->vulkan_environment) { 880 for (unsigned i = 0; i < count; ++i) { 881 const struct dxil_value *handle = 882 emit_createhandle_call_const_index(ctx, DXIL_RESOURCE_CLASS_SRV, 883 id, binding + i, false); 884 if (!handle) 885 return false; 886 887 int idx = var->data.binding + i; 888 ctx->srv_handles[idx] = handle; 889 } 890 } 891 892 return true; 893} 894 895static bool 896emit_globals(struct ntd_context *ctx, unsigned size) 897{ 898 nir_foreach_variable_with_modes(var, ctx->shader, nir_var_mem_ssbo) 899 size++; 900 901 if (!size) 902 return true; 903 904 const struct dxil_type *struct_type = dxil_module_get_res_type(&ctx->mod, 905 DXIL_RESOURCE_KIND_RAW_BUFFER, DXIL_COMP_TYPE_INVALID, true /* readwrite */); 906 if (!struct_type) 907 return false; 908 909 const struct dxil_type *array_type = 910 dxil_module_get_array_type(&ctx->mod, struct_type, size); 911 if (!array_type) 912 return false; 913 914 resource_array_layout layout = {0, 0, size, 0}; 915 const struct dxil_mdnode *uav_meta = 916 emit_uav_metadata(&ctx->mod, array_type, 917 "globals", &layout, 918 DXIL_COMP_TYPE_INVALID, 919 DXIL_RESOURCE_KIND_RAW_BUFFER); 920 if (!uav_meta) 921 return false; 922 923 util_dynarray_append(&ctx->uav_metadata_nodes, const struct dxil_mdnode *, uav_meta); 924 if (util_dynarray_num_elements(&ctx->uav_metadata_nodes, const struct dxil_mdnode *) > 8) 925 ctx->mod.feats.use_64uavs = 1; 926 /* Handles to UAVs used for kernel globals are created on-demand */ 927 add_resource(ctx, DXIL_RES_UAV_RAW, &layout); 928 ctx->mod.raw_and_structured_buffers = true; 929 return true; 930} 931 932static bool 933emit_uav(struct ntd_context *ctx, unsigned binding, unsigned space, unsigned count, 934 enum dxil_component_type comp_type, enum dxil_resource_kind res_kind, const char *name) 935{ 936 unsigned id = util_dynarray_num_elements(&ctx->uav_metadata_nodes, const struct dxil_mdnode *); 937 resource_array_layout layout = { id, binding, count, space }; 938 939 const struct dxil_type *res_type = dxil_module_get_res_type(&ctx->mod, res_kind, comp_type, true /* readwrite */); 940 const struct dxil_mdnode *uav_meta = emit_uav_metadata(&ctx->mod, res_type, name, 941 &layout, comp_type, res_kind); 942 943 if (!uav_meta) 944 return false; 945 946 util_dynarray_append(&ctx->uav_metadata_nodes, const struct dxil_mdnode *, uav_meta); 947 if (util_dynarray_num_elements(&ctx->uav_metadata_nodes, const struct dxil_mdnode *) > 8) 948 ctx->mod.feats.use_64uavs = 1; 949 950 add_resource(ctx, res_kind == DXIL_RESOURCE_KIND_RAW_BUFFER ? DXIL_RES_UAV_RAW : DXIL_RES_UAV_TYPED, &layout); 951 if (res_kind == DXIL_RESOURCE_KIND_RAW_BUFFER) 952 ctx->mod.raw_and_structured_buffers = true; 953 954 if (!ctx->opts->vulkan_environment) { 955 for (unsigned i = 0; i < count; ++i) { 956 const struct dxil_value *handle = emit_createhandle_call_const_index(ctx, DXIL_RESOURCE_CLASS_UAV, 957 id, binding + i, false); 958 if (!handle) 959 return false; 960 961 ctx->uav_handles[binding + i] = handle; 962 } 963 } 964 965 return true; 966} 967 968static bool 969emit_uav_var(struct ntd_context *ctx, nir_variable *var, unsigned count) 970{ 971 unsigned binding = var->data.binding; 972 unsigned space = var->data.descriptor_set; 973 enum dxil_component_type comp_type = dxil_get_comp_type(var->type); 974 enum dxil_resource_kind res_kind = dxil_get_resource_kind(var->type); 975 const char *name = var->name; 976 977 return emit_uav(ctx, binding, space, count, comp_type, res_kind, name); 978} 979 980static unsigned get_dword_size(const struct glsl_type *type) 981{ 982 if (glsl_type_is_array(type)) { 983 type = glsl_without_array(type); 984 } 985 assert(glsl_type_is_struct(type) || glsl_type_is_interface(type)); 986 return glsl_get_explicit_size(type, false); 987} 988 989static void 990var_fill_const_array_with_vector_or_scalar(struct ntd_context *ctx, 991 const struct nir_constant *c, 992 const struct glsl_type *type, 993 void *const_vals, 994 unsigned int offset) 995{ 996 assert(glsl_type_is_vector_or_scalar(type)); 997 unsigned int components = glsl_get_vector_elements(type); 998 unsigned bit_size = glsl_get_bit_size(type); 999 unsigned int increment = bit_size / 8; 1000 1001 for (unsigned int comp = 0; comp < components; comp++) { 1002 uint8_t *dst = (uint8_t *)const_vals + offset; 1003 1004 switch (bit_size) { 1005 case 64: 1006 memcpy(dst, &c->values[comp].u64, sizeof(c->values[0].u64)); 1007 break; 1008 case 32: 1009 memcpy(dst, &c->values[comp].u32, sizeof(c->values[0].u32)); 1010 break; 1011 case 16: 1012 memcpy(dst, &c->values[comp].u16, sizeof(c->values[0].u16)); 1013 break; 1014 case 8: 1015 assert(glsl_base_type_is_integer(glsl_get_base_type(type))); 1016 memcpy(dst, &c->values[comp].u8, sizeof(c->values[0].u8)); 1017 break; 1018 default: 1019 unreachable("unexpeted bit-size"); 1020 } 1021 1022 offset += increment; 1023 } 1024} 1025 1026static void 1027var_fill_const_array(struct ntd_context *ctx, const struct nir_constant *c, 1028 const struct glsl_type *type, void *const_vals, 1029 unsigned int offset) 1030{ 1031 assert(!glsl_type_is_interface(type)); 1032 1033 if (glsl_type_is_vector_or_scalar(type)) { 1034 var_fill_const_array_with_vector_or_scalar(ctx, c, type, 1035 const_vals, 1036 offset); 1037 } else if (glsl_type_is_array(type)) { 1038 assert(!glsl_type_is_unsized_array(type)); 1039 const struct glsl_type *without = glsl_without_array(type); 1040 unsigned stride = glsl_get_explicit_stride(without); 1041 1042 for (unsigned elt = 0; elt < glsl_get_length(type); elt++) { 1043 var_fill_const_array(ctx, c->elements[elt], without, 1044 const_vals, offset + (elt * stride)); 1045 offset += glsl_get_cl_size(without); 1046 } 1047 } else if (glsl_type_is_struct(type)) { 1048 for (unsigned int elt = 0; elt < glsl_get_length(type); elt++) { 1049 const struct glsl_type *elt_type = glsl_get_struct_field(type, elt); 1050 unsigned field_offset = glsl_get_struct_field_offset(type, elt); 1051 1052 var_fill_const_array(ctx, c->elements[elt], 1053 elt_type, const_vals, 1054 offset + field_offset); 1055 } 1056 } else 1057 unreachable("unknown GLSL type in var_fill_const_array"); 1058} 1059 1060static bool 1061emit_global_consts(struct ntd_context *ctx) 1062{ 1063 nir_foreach_variable_with_modes(var, ctx->shader, nir_var_shader_temp) { 1064 assert(var->constant_initializer); 1065 1066 unsigned int num_members = DIV_ROUND_UP(glsl_get_cl_size(var->type), 4); 1067 uint32_t *const_ints = ralloc_array(ctx->ralloc_ctx, uint32_t, num_members); 1068 var_fill_const_array(ctx, var->constant_initializer, var->type, 1069 const_ints, 0); 1070 const struct dxil_value **const_vals = 1071 ralloc_array(ctx->ralloc_ctx, const struct dxil_value *, num_members); 1072 if (!const_vals) 1073 return false; 1074 for (int i = 0; i < num_members; i++) 1075 const_vals[i] = dxil_module_get_int32_const(&ctx->mod, const_ints[i]); 1076 1077 const struct dxil_type *elt_type = dxil_module_get_int_type(&ctx->mod, 32); 1078 if (!elt_type) 1079 return false; 1080 const struct dxil_type *type = 1081 dxil_module_get_array_type(&ctx->mod, elt_type, num_members); 1082 if (!type) 1083 return false; 1084 const struct dxil_value *agg_vals = 1085 dxil_module_get_array_const(&ctx->mod, type, const_vals); 1086 if (!agg_vals) 1087 return false; 1088 1089 const struct dxil_value *gvar = dxil_add_global_ptr_var(&ctx->mod, var->name, type, 1090 DXIL_AS_DEFAULT, 4, 1091 agg_vals); 1092 if (!gvar) 1093 return false; 1094 1095 if (!_mesa_hash_table_insert(ctx->consts, var, (void *)gvar)) 1096 return false; 1097 } 1098 1099 return true; 1100} 1101 1102static bool 1103emit_cbv(struct ntd_context *ctx, unsigned binding, unsigned space, 1104 unsigned size, unsigned count, char *name) 1105{ 1106 unsigned idx = util_dynarray_num_elements(&ctx->cbv_metadata_nodes, const struct dxil_mdnode *); 1107 1108 const struct dxil_type *float32 = dxil_module_get_float_type(&ctx->mod, 32); 1109 const struct dxil_type *array_type = dxil_module_get_array_type(&ctx->mod, float32, size); 1110 const struct dxil_type *buffer_type = dxil_module_get_struct_type(&ctx->mod, name, 1111 &array_type, 1); 1112 const struct dxil_type *final_type = count != 1 ? dxil_module_get_array_type(&ctx->mod, buffer_type, count) : buffer_type; 1113 resource_array_layout layout = {idx, binding, count, space}; 1114 const struct dxil_mdnode *cbv_meta = emit_cbv_metadata(&ctx->mod, final_type, 1115 name, &layout, 4 * size); 1116 1117 if (!cbv_meta) 1118 return false; 1119 1120 util_dynarray_append(&ctx->cbv_metadata_nodes, const struct dxil_mdnode *, cbv_meta); 1121 add_resource(ctx, DXIL_RES_CBV, &layout); 1122 1123 if (!ctx->opts->vulkan_environment) { 1124 for (unsigned i = 0; i < count; ++i) { 1125 const struct dxil_value *handle = emit_createhandle_call_const_index(ctx, DXIL_RESOURCE_CLASS_CBV, 1126 idx, binding + i, false); 1127 if (!handle) 1128 return false; 1129 1130 assert(!ctx->cbv_handles[binding + i]); 1131 ctx->cbv_handles[binding + i] = handle; 1132 } 1133 } 1134 1135 return true; 1136} 1137 1138static bool 1139emit_ubo_var(struct ntd_context *ctx, nir_variable *var) 1140{ 1141 unsigned count = 1; 1142 if (glsl_type_is_array(var->type)) 1143 count = glsl_get_length(var->type); 1144 return emit_cbv(ctx, var->data.binding, var->data.descriptor_set, get_dword_size(var->type), count, var->name); 1145} 1146 1147static bool 1148emit_sampler(struct ntd_context *ctx, nir_variable *var, unsigned count) 1149{ 1150 unsigned id = util_dynarray_num_elements(&ctx->sampler_metadata_nodes, const struct dxil_mdnode *); 1151 unsigned binding = var->data.binding; 1152 resource_array_layout layout = {id, binding, count, var->data.descriptor_set}; 1153 const struct dxil_type *int32_type = dxil_module_get_int_type(&ctx->mod, 32); 1154 const struct dxil_type *sampler_type = dxil_module_get_struct_type(&ctx->mod, "struct.SamplerState", &int32_type, 1); 1155 const struct dxil_mdnode *sampler_meta = emit_sampler_metadata(&ctx->mod, sampler_type, var, &layout); 1156 1157 if (!sampler_meta) 1158 return false; 1159 1160 util_dynarray_append(&ctx->sampler_metadata_nodes, const struct dxil_mdnode *, sampler_meta); 1161 add_resource(ctx, DXIL_RES_SAMPLER, &layout); 1162 1163 if (!ctx->opts->vulkan_environment) { 1164 for (unsigned i = 0; i < count; ++i) { 1165 const struct dxil_value *handle = 1166 emit_createhandle_call_const_index(ctx, DXIL_RESOURCE_CLASS_SAMPLER, 1167 id, binding + i, false); 1168 if (!handle) 1169 return false; 1170 1171 unsigned idx = var->data.binding + i; 1172 ctx->sampler_handles[idx] = handle; 1173 } 1174 } 1175 1176 return true; 1177} 1178 1179static const struct dxil_mdnode * 1180emit_gs_state(struct ntd_context *ctx) 1181{ 1182 const struct dxil_mdnode *gs_state_nodes[5]; 1183 const nir_shader *s = ctx->shader; 1184 1185 gs_state_nodes[0] = dxil_get_metadata_int32(&ctx->mod, dxil_get_input_primitive(s->info.gs.input_primitive)); 1186 gs_state_nodes[1] = dxil_get_metadata_int32(&ctx->mod, s->info.gs.vertices_out); 1187 gs_state_nodes[2] = dxil_get_metadata_int32(&ctx->mod, s->info.gs.active_stream_mask); 1188 gs_state_nodes[3] = dxil_get_metadata_int32(&ctx->mod, dxil_get_primitive_topology(s->info.gs.output_primitive)); 1189 gs_state_nodes[4] = dxil_get_metadata_int32(&ctx->mod, s->info.gs.invocations); 1190 1191 for (unsigned i = 0; i < ARRAY_SIZE(gs_state_nodes); ++i) { 1192 if (!gs_state_nodes[i]) 1193 return NULL; 1194 } 1195 1196 return dxil_get_metadata_node(&ctx->mod, gs_state_nodes, ARRAY_SIZE(gs_state_nodes)); 1197} 1198 1199static const struct dxil_mdnode * 1200emit_threads(struct ntd_context *ctx) 1201{ 1202 const nir_shader *s = ctx->shader; 1203 const struct dxil_mdnode *threads_x = dxil_get_metadata_int32(&ctx->mod, MAX2(s->info.workgroup_size[0], 1)); 1204 const struct dxil_mdnode *threads_y = dxil_get_metadata_int32(&ctx->mod, MAX2(s->info.workgroup_size[1], 1)); 1205 const struct dxil_mdnode *threads_z = dxil_get_metadata_int32(&ctx->mod, MAX2(s->info.workgroup_size[2], 1)); 1206 if (!threads_x || !threads_y || !threads_z) 1207 return false; 1208 1209 const struct dxil_mdnode *threads_nodes[] = { threads_x, threads_y, threads_z }; 1210 return dxil_get_metadata_node(&ctx->mod, threads_nodes, ARRAY_SIZE(threads_nodes)); 1211} 1212 1213static int64_t 1214get_module_flags(struct ntd_context *ctx) 1215{ 1216 /* See the DXIL documentation for the definition of these flags: 1217 * 1218 * https://github.com/Microsoft/DirectXShaderCompiler/blob/master/docs/DXIL.rst#shader-flags 1219 */ 1220 1221 uint64_t flags = 0; 1222 if (ctx->mod.feats.doubles) 1223 flags |= (1 << 2); 1224 if (ctx->mod.raw_and_structured_buffers) 1225 flags |= (1 << 4); 1226 if (ctx->mod.feats.min_precision) 1227 flags |= (1 << 5); 1228 if (ctx->mod.feats.dx11_1_double_extensions) 1229 flags |= (1 << 6); 1230 if (ctx->mod.feats.inner_coverage) 1231 flags |= (1 << 10); 1232 if (ctx->mod.feats.typed_uav_load_additional_formats) 1233 flags |= (1 << 13); 1234 if (ctx->mod.feats.use_64uavs) 1235 flags |= (1 << 15); 1236 if (ctx->mod.feats.cs_4x_raw_sb) 1237 flags |= (1 << 17); 1238 if (ctx->mod.feats.wave_ops) 1239 flags |= (1 << 19); 1240 if (ctx->mod.feats.int64_ops) 1241 flags |= (1 << 20); 1242 if (ctx->mod.feats.stencil_ref) 1243 flags |= (1 << 11); 1244 if (ctx->mod.feats.native_low_precision) 1245 flags |= (1 << 23) | (1 << 5); 1246 1247 if (ctx->opts->disable_math_refactoring) 1248 flags |= (1 << 1); 1249 1250 return flags; 1251} 1252 1253static const struct dxil_mdnode * 1254emit_entrypoint(struct ntd_context *ctx, 1255 const struct dxil_func *func, const char *name, 1256 const struct dxil_mdnode *signatures, 1257 const struct dxil_mdnode *resources, 1258 const struct dxil_mdnode *shader_props) 1259{ 1260 const struct dxil_mdnode *func_md = dxil_get_metadata_func(&ctx->mod, func); 1261 const struct dxil_mdnode *name_md = dxil_get_metadata_string(&ctx->mod, name); 1262 const struct dxil_mdnode *nodes[] = { 1263 func_md, 1264 name_md, 1265 signatures, 1266 resources, 1267 shader_props 1268 }; 1269 return dxil_get_metadata_node(&ctx->mod, nodes, 1270 ARRAY_SIZE(nodes)); 1271} 1272 1273static const struct dxil_mdnode * 1274emit_resources(struct ntd_context *ctx) 1275{ 1276 bool emit_resources = false; 1277 const struct dxil_mdnode *resources_nodes[] = { 1278 NULL, NULL, NULL, NULL 1279 }; 1280 1281#define ARRAY_AND_SIZE(arr) arr.data, util_dynarray_num_elements(&arr, const struct dxil_mdnode *) 1282 1283 if (ctx->srv_metadata_nodes.size) { 1284 resources_nodes[0] = dxil_get_metadata_node(&ctx->mod, ARRAY_AND_SIZE(ctx->srv_metadata_nodes)); 1285 emit_resources = true; 1286 } 1287 1288 if (ctx->uav_metadata_nodes.size) { 1289 resources_nodes[1] = dxil_get_metadata_node(&ctx->mod, ARRAY_AND_SIZE(ctx->uav_metadata_nodes)); 1290 emit_resources = true; 1291 } 1292 1293 if (ctx->cbv_metadata_nodes.size) { 1294 resources_nodes[2] = dxil_get_metadata_node(&ctx->mod, ARRAY_AND_SIZE(ctx->cbv_metadata_nodes)); 1295 emit_resources = true; 1296 } 1297 1298 if (ctx->sampler_metadata_nodes.size) { 1299 resources_nodes[3] = dxil_get_metadata_node(&ctx->mod, ARRAY_AND_SIZE(ctx->sampler_metadata_nodes)); 1300 emit_resources = true; 1301 } 1302 1303#undef ARRAY_AND_SIZE 1304 1305 return emit_resources ? 1306 dxil_get_metadata_node(&ctx->mod, resources_nodes, ARRAY_SIZE(resources_nodes)): NULL; 1307} 1308 1309static boolean 1310emit_tag(struct ntd_context *ctx, enum dxil_shader_tag tag, 1311 const struct dxil_mdnode *value_node) 1312{ 1313 const struct dxil_mdnode *tag_node = dxil_get_metadata_int32(&ctx->mod, tag); 1314 if (!tag_node || !value_node) 1315 return false; 1316 assert(ctx->num_shader_property_nodes <= ARRAY_SIZE(ctx->shader_property_nodes) - 2); 1317 ctx->shader_property_nodes[ctx->num_shader_property_nodes++] = tag_node; 1318 ctx->shader_property_nodes[ctx->num_shader_property_nodes++] = value_node; 1319 1320 return true; 1321} 1322 1323static bool 1324emit_metadata(struct ntd_context *ctx) 1325{ 1326 unsigned dxilMinor = ctx->mod.minor_version; 1327 if (!emit_llvm_ident(&ctx->mod) || 1328 !emit_named_version(&ctx->mod, "dx.version", 1, dxilMinor) || 1329 !emit_named_version(&ctx->mod, "dx.valver", 1, 4) || 1330 !emit_dx_shader_model(&ctx->mod)) 1331 return false; 1332 1333 const struct dxil_type *void_type = dxil_module_get_void_type(&ctx->mod); 1334 const struct dxil_type *main_func_type = dxil_module_add_function_type(&ctx->mod, void_type, NULL, 0); 1335 const struct dxil_func *main_func = dxil_add_function_def(&ctx->mod, "main", main_func_type); 1336 if (!main_func) 1337 return false; 1338 1339 const struct dxil_mdnode *resources_node = emit_resources(ctx); 1340 1341 const struct dxil_mdnode *main_entrypoint = dxil_get_metadata_func(&ctx->mod, main_func); 1342 const struct dxil_mdnode *node27 = dxil_get_metadata_node(&ctx->mod, NULL, 0); 1343 1344 const struct dxil_mdnode *node4 = dxil_get_metadata_int32(&ctx->mod, 0); 1345 const struct dxil_mdnode *nodes_4_27_27[] = { 1346 node4, node27, node27 1347 }; 1348 const struct dxil_mdnode *node28 = dxil_get_metadata_node(&ctx->mod, nodes_4_27_27, 1349 ARRAY_SIZE(nodes_4_27_27)); 1350 1351 const struct dxil_mdnode *node29 = dxil_get_metadata_node(&ctx->mod, &node28, 1); 1352 1353 const struct dxil_mdnode *node3 = dxil_get_metadata_int32(&ctx->mod, 1); 1354 const struct dxil_mdnode *main_type_annotation_nodes[] = { 1355 node3, main_entrypoint, node29 1356 }; 1357 const struct dxil_mdnode *main_type_annotation = dxil_get_metadata_node(&ctx->mod, main_type_annotation_nodes, 1358 ARRAY_SIZE(main_type_annotation_nodes)); 1359 1360 if (ctx->mod.shader_kind == DXIL_GEOMETRY_SHADER) { 1361 if (!emit_tag(ctx, DXIL_SHADER_TAG_GS_STATE, emit_gs_state(ctx))) 1362 return false; 1363 } else if (ctx->mod.shader_kind == DXIL_COMPUTE_SHADER) { 1364 if (!emit_tag(ctx, DXIL_SHADER_TAG_NUM_THREADS, emit_threads(ctx))) 1365 return false; 1366 } 1367 1368 uint64_t flags = get_module_flags(ctx); 1369 if (flags != 0) { 1370 if (!emit_tag(ctx, DXIL_SHADER_TAG_FLAGS, dxil_get_metadata_int64(&ctx->mod, flags))) 1371 return false; 1372 } 1373 const struct dxil_mdnode *shader_properties = NULL; 1374 if (ctx->num_shader_property_nodes > 0) { 1375 shader_properties = dxil_get_metadata_node(&ctx->mod, ctx->shader_property_nodes, 1376 ctx->num_shader_property_nodes); 1377 if (!shader_properties) 1378 return false; 1379 } 1380 1381 const struct dxil_mdnode *signatures = get_signatures(&ctx->mod, ctx->shader, 1382 ctx->opts->vulkan_environment); 1383 1384 const struct dxil_mdnode *dx_entry_point = emit_entrypoint(ctx, main_func, 1385 "main", signatures, resources_node, shader_properties); 1386 if (!dx_entry_point) 1387 return false; 1388 1389 if (resources_node) { 1390 const struct dxil_mdnode *dx_resources = resources_node; 1391 dxil_add_metadata_named_node(&ctx->mod, "dx.resources", 1392 &dx_resources, 1); 1393 } 1394 1395 const struct dxil_mdnode *dx_type_annotations[] = { main_type_annotation }; 1396 return dxil_add_metadata_named_node(&ctx->mod, "dx.typeAnnotations", 1397 dx_type_annotations, 1398 ARRAY_SIZE(dx_type_annotations)) && 1399 dxil_add_metadata_named_node(&ctx->mod, "dx.entryPoints", 1400 &dx_entry_point, 1); 1401} 1402 1403static const struct dxil_value * 1404bitcast_to_int(struct ntd_context *ctx, unsigned bit_size, 1405 const struct dxil_value *value) 1406{ 1407 const struct dxil_type *type = dxil_module_get_int_type(&ctx->mod, bit_size); 1408 if (!type) 1409 return NULL; 1410 1411 return dxil_emit_cast(&ctx->mod, DXIL_CAST_BITCAST, type, value); 1412} 1413 1414static const struct dxil_value * 1415bitcast_to_float(struct ntd_context *ctx, unsigned bit_size, 1416 const struct dxil_value *value) 1417{ 1418 const struct dxil_type *type = dxil_module_get_float_type(&ctx->mod, bit_size); 1419 if (!type) 1420 return NULL; 1421 1422 return dxil_emit_cast(&ctx->mod, DXIL_CAST_BITCAST, type, value); 1423} 1424 1425static void 1426store_ssa_def(struct ntd_context *ctx, nir_ssa_def *ssa, unsigned chan, 1427 const struct dxil_value *value) 1428{ 1429 assert(ssa->index < ctx->num_defs); 1430 assert(chan < ssa->num_components); 1431 /* We pre-defined the dest value because of a phi node, so bitcast while storing if the 1432 * base type differs */ 1433 if (ctx->defs[ssa->index].chans[chan]) { 1434 const struct dxil_type *expect_type = dxil_value_get_type(ctx->defs[ssa->index].chans[chan]); 1435 const struct dxil_type *value_type = dxil_value_get_type(value); 1436 if (dxil_type_to_nir_type(expect_type) != dxil_type_to_nir_type(value_type)) 1437 value = dxil_emit_cast(&ctx->mod, DXIL_CAST_BITCAST, expect_type, value); 1438 } 1439 ctx->defs[ssa->index].chans[chan] = value; 1440} 1441 1442static void 1443store_dest_value(struct ntd_context *ctx, nir_dest *dest, unsigned chan, 1444 const struct dxil_value *value) 1445{ 1446 assert(dest->is_ssa); 1447 assert(value); 1448 store_ssa_def(ctx, &dest->ssa, chan, value); 1449} 1450 1451static void 1452store_dest(struct ntd_context *ctx, nir_dest *dest, unsigned chan, 1453 const struct dxil_value *value, nir_alu_type type) 1454{ 1455 switch (nir_alu_type_get_base_type(type)) { 1456 case nir_type_float: 1457 if (nir_dest_bit_size(*dest) == 64) 1458 ctx->mod.feats.doubles = true; 1459 FALLTHROUGH; 1460 case nir_type_uint: 1461 case nir_type_int: 1462 if (nir_dest_bit_size(*dest) == 16) 1463 ctx->mod.feats.native_low_precision = true; 1464 if (nir_dest_bit_size(*dest) == 64) 1465 ctx->mod.feats.int64_ops = true; 1466 FALLTHROUGH; 1467 case nir_type_bool: 1468 store_dest_value(ctx, dest, chan, value); 1469 break; 1470 default: 1471 unreachable("unexpected nir_alu_type"); 1472 } 1473} 1474 1475static void 1476store_alu_dest(struct ntd_context *ctx, nir_alu_instr *alu, unsigned chan, 1477 const struct dxil_value *value) 1478{ 1479 assert(!alu->dest.saturate); 1480 store_dest(ctx, &alu->dest.dest, chan, value, 1481 nir_op_infos[alu->op].output_type); 1482} 1483 1484static const struct dxil_value * 1485get_src_ssa(struct ntd_context *ctx, const nir_ssa_def *ssa, unsigned chan) 1486{ 1487 assert(ssa->index < ctx->num_defs); 1488 assert(chan < ssa->num_components); 1489 assert(ctx->defs[ssa->index].chans[chan]); 1490 return ctx->defs[ssa->index].chans[chan]; 1491} 1492 1493static const struct dxil_value * 1494get_src(struct ntd_context *ctx, nir_src *src, unsigned chan, 1495 nir_alu_type type) 1496{ 1497 assert(src->is_ssa); 1498 const struct dxil_value *value = get_src_ssa(ctx, src->ssa, chan); 1499 1500 const int bit_size = nir_src_bit_size(*src); 1501 1502 switch (nir_alu_type_get_base_type(type)) { 1503 case nir_type_int: 1504 case nir_type_uint: { 1505 assert(bit_size != 64 || ctx->mod.feats.int64_ops); 1506 const struct dxil_type *expect_type = dxil_module_get_int_type(&ctx->mod, bit_size); 1507 /* nohing to do */ 1508 if (dxil_value_type_equal_to(value, expect_type)) 1509 return value; 1510 assert(dxil_value_type_bitsize_equal_to(value, bit_size)); 1511 return bitcast_to_int(ctx, bit_size, value); 1512 } 1513 1514 case nir_type_float: 1515 assert(nir_src_bit_size(*src) >= 16); 1516 assert(nir_src_bit_size(*src) != 64 || (ctx->mod.feats.doubles && 1517 ctx->mod.feats.int64_ops)); 1518 if (dxil_value_type_equal_to(value, dxil_module_get_float_type(&ctx->mod, bit_size))) 1519 return value; 1520 assert(dxil_value_type_bitsize_equal_to(value, bit_size)); 1521 return bitcast_to_float(ctx, bit_size, value); 1522 1523 case nir_type_bool: 1524 if (!dxil_value_type_bitsize_equal_to(value, 1)) { 1525 return dxil_emit_cast(&ctx->mod, DXIL_CAST_TRUNC, 1526 dxil_module_get_int_type(&ctx->mod, 1), value); 1527 } 1528 return value; 1529 1530 default: 1531 unreachable("unexpected nir_alu_type"); 1532 } 1533} 1534 1535static const struct dxil_type * 1536get_alu_src_type(struct ntd_context *ctx, nir_alu_instr *alu, unsigned src) 1537{ 1538 assert(!alu->src[src].abs); 1539 assert(!alu->src[src].negate); 1540 nir_ssa_def *ssa_src = alu->src[src].src.ssa; 1541 unsigned chan = alu->src[src].swizzle[0]; 1542 const struct dxil_value *value = get_src_ssa(ctx, ssa_src, chan); 1543 return dxil_value_get_type(value); 1544} 1545 1546static const struct dxil_value * 1547get_alu_src(struct ntd_context *ctx, nir_alu_instr *alu, unsigned src) 1548{ 1549 assert(!alu->src[src].abs); 1550 assert(!alu->src[src].negate); 1551 1552 unsigned chan = alu->src[src].swizzle[0]; 1553 return get_src(ctx, &alu->src[src].src, chan, 1554 nir_op_infos[alu->op].input_types[src]); 1555} 1556 1557static bool 1558emit_binop(struct ntd_context *ctx, nir_alu_instr *alu, 1559 enum dxil_bin_opcode opcode, 1560 const struct dxil_value *op0, const struct dxil_value *op1) 1561{ 1562 bool is_float_op = nir_alu_type_get_base_type(nir_op_infos[alu->op].output_type) == nir_type_float; 1563 1564 enum dxil_opt_flags flags = 0; 1565 if (is_float_op && !alu->exact) 1566 flags |= DXIL_UNSAFE_ALGEBRA; 1567 1568 const struct dxil_value *v = dxil_emit_binop(&ctx->mod, opcode, op0, op1, flags); 1569 if (!v) 1570 return false; 1571 store_alu_dest(ctx, alu, 0, v); 1572 return true; 1573} 1574 1575static bool 1576emit_shift(struct ntd_context *ctx, nir_alu_instr *alu, 1577 enum dxil_bin_opcode opcode, 1578 const struct dxil_value *op0, const struct dxil_value *op1) 1579{ 1580 unsigned op0_bit_size = nir_src_bit_size(alu->src[0].src); 1581 unsigned op1_bit_size = nir_src_bit_size(alu->src[1].src); 1582 if (op0_bit_size != op1_bit_size) { 1583 const struct dxil_type *type = 1584 dxil_module_get_int_type(&ctx->mod, op0_bit_size); 1585 enum dxil_cast_opcode cast_op = 1586 op1_bit_size < op0_bit_size ? DXIL_CAST_ZEXT : DXIL_CAST_TRUNC; 1587 op1 = dxil_emit_cast(&ctx->mod, cast_op, type, op1); 1588 } 1589 1590 const struct dxil_value *v = 1591 dxil_emit_binop(&ctx->mod, opcode, op0, op1, 0); 1592 if (!v) 1593 return false; 1594 store_alu_dest(ctx, alu, 0, v); 1595 return true; 1596} 1597 1598static bool 1599emit_cmp(struct ntd_context *ctx, nir_alu_instr *alu, 1600 enum dxil_cmp_pred pred, 1601 const struct dxil_value *op0, const struct dxil_value *op1) 1602{ 1603 const struct dxil_value *v = dxil_emit_cmp(&ctx->mod, pred, op0, op1); 1604 if (!v) 1605 return false; 1606 store_alu_dest(ctx, alu, 0, v); 1607 return true; 1608} 1609 1610static enum dxil_cast_opcode 1611get_cast_op(nir_alu_instr *alu) 1612{ 1613 unsigned dst_bits = nir_dest_bit_size(alu->dest.dest); 1614 unsigned src_bits = nir_src_bit_size(alu->src[0].src); 1615 1616 switch (alu->op) { 1617 /* bool -> int */ 1618 case nir_op_b2i16: 1619 case nir_op_b2i32: 1620 case nir_op_b2i64: 1621 return DXIL_CAST_ZEXT; 1622 1623 /* float -> float */ 1624 case nir_op_f2f16_rtz: 1625 case nir_op_f2f32: 1626 case nir_op_f2f64: 1627 assert(dst_bits != src_bits); 1628 if (dst_bits < src_bits) 1629 return DXIL_CAST_FPTRUNC; 1630 else 1631 return DXIL_CAST_FPEXT; 1632 1633 /* int -> int */ 1634 case nir_op_i2i16: 1635 case nir_op_i2i32: 1636 case nir_op_i2i64: 1637 assert(dst_bits != src_bits); 1638 if (dst_bits < src_bits) 1639 return DXIL_CAST_TRUNC; 1640 else 1641 return DXIL_CAST_SEXT; 1642 1643 /* uint -> uint */ 1644 case nir_op_u2u16: 1645 case nir_op_u2u32: 1646 case nir_op_u2u64: 1647 assert(dst_bits != src_bits); 1648 if (dst_bits < src_bits) 1649 return DXIL_CAST_TRUNC; 1650 else 1651 return DXIL_CAST_ZEXT; 1652 1653 /* float -> int */ 1654 case nir_op_f2i16: 1655 case nir_op_f2i32: 1656 case nir_op_f2i64: 1657 return DXIL_CAST_FPTOSI; 1658 1659 /* float -> uint */ 1660 case nir_op_f2u16: 1661 case nir_op_f2u32: 1662 case nir_op_f2u64: 1663 return DXIL_CAST_FPTOUI; 1664 1665 /* int -> float */ 1666 case nir_op_i2f16: 1667 case nir_op_i2f32: 1668 case nir_op_i2f64: 1669 return DXIL_CAST_SITOFP; 1670 1671 /* uint -> float */ 1672 case nir_op_u2f16: 1673 case nir_op_u2f32: 1674 case nir_op_u2f64: 1675 return DXIL_CAST_UITOFP; 1676 1677 default: 1678 unreachable("unexpected cast op"); 1679 } 1680} 1681 1682static const struct dxil_type * 1683get_cast_dest_type(struct ntd_context *ctx, nir_alu_instr *alu) 1684{ 1685 unsigned dst_bits = nir_dest_bit_size(alu->dest.dest); 1686 switch (nir_alu_type_get_base_type(nir_op_infos[alu->op].output_type)) { 1687 case nir_type_bool: 1688 assert(dst_bits == 1); 1689 FALLTHROUGH; 1690 case nir_type_int: 1691 case nir_type_uint: 1692 return dxil_module_get_int_type(&ctx->mod, dst_bits); 1693 1694 case nir_type_float: 1695 return dxil_module_get_float_type(&ctx->mod, dst_bits); 1696 1697 default: 1698 unreachable("unknown nir_alu_type"); 1699 } 1700} 1701 1702static bool 1703is_double(nir_alu_type alu_type, unsigned bit_size) 1704{ 1705 return nir_alu_type_get_base_type(alu_type) == nir_type_float && 1706 bit_size == 64; 1707} 1708 1709static bool 1710emit_cast(struct ntd_context *ctx, nir_alu_instr *alu, 1711 const struct dxil_value *value) 1712{ 1713 enum dxil_cast_opcode opcode = get_cast_op(alu); 1714 const struct dxil_type *type = get_cast_dest_type(ctx, alu); 1715 if (!type) 1716 return false; 1717 1718 const nir_op_info *info = &nir_op_infos[alu->op]; 1719 switch (opcode) { 1720 case DXIL_CAST_UITOFP: 1721 case DXIL_CAST_SITOFP: 1722 if (is_double(info->output_type, nir_dest_bit_size(alu->dest.dest))) 1723 ctx->mod.feats.dx11_1_double_extensions = true; 1724 break; 1725 case DXIL_CAST_FPTOUI: 1726 case DXIL_CAST_FPTOSI: 1727 if (is_double(info->input_types[0], nir_src_bit_size(alu->src[0].src))) 1728 ctx->mod.feats.dx11_1_double_extensions = true; 1729 break; 1730 default: 1731 break; 1732 } 1733 1734 const struct dxil_value *v = dxil_emit_cast(&ctx->mod, opcode, type, 1735 value); 1736 if (!v) 1737 return false; 1738 store_alu_dest(ctx, alu, 0, v); 1739 return true; 1740} 1741 1742static enum overload_type 1743get_overload(nir_alu_type alu_type, unsigned bit_size) 1744{ 1745 switch (nir_alu_type_get_base_type(alu_type)) { 1746 case nir_type_int: 1747 case nir_type_uint: 1748 switch (bit_size) { 1749 case 16: return DXIL_I16; 1750 case 32: return DXIL_I32; 1751 case 64: return DXIL_I64; 1752 default: 1753 unreachable("unexpected bit_size"); 1754 } 1755 case nir_type_float: 1756 switch (bit_size) { 1757 case 16: return DXIL_F16; 1758 case 32: return DXIL_F32; 1759 case 64: return DXIL_F64; 1760 default: 1761 unreachable("unexpected bit_size"); 1762 } 1763 default: 1764 unreachable("unexpected output type"); 1765 } 1766} 1767 1768static bool 1769emit_unary_intin(struct ntd_context *ctx, nir_alu_instr *alu, 1770 enum dxil_intr intr, const struct dxil_value *op) 1771{ 1772 const nir_op_info *info = &nir_op_infos[alu->op]; 1773 unsigned src_bits = nir_src_bit_size(alu->src[0].src); 1774 enum overload_type overload = get_overload(info->input_types[0], src_bits); 1775 1776 const struct dxil_value *v = emit_unary_call(ctx, overload, intr, op); 1777 if (!v) 1778 return false; 1779 store_alu_dest(ctx, alu, 0, v); 1780 return true; 1781} 1782 1783static bool 1784emit_binary_intin(struct ntd_context *ctx, nir_alu_instr *alu, 1785 enum dxil_intr intr, 1786 const struct dxil_value *op0, const struct dxil_value *op1) 1787{ 1788 const nir_op_info *info = &nir_op_infos[alu->op]; 1789 assert(info->output_type == info->input_types[0]); 1790 assert(info->output_type == info->input_types[1]); 1791 unsigned dst_bits = nir_dest_bit_size(alu->dest.dest); 1792 assert(nir_src_bit_size(alu->src[0].src) == dst_bits); 1793 assert(nir_src_bit_size(alu->src[1].src) == dst_bits); 1794 enum overload_type overload = get_overload(info->output_type, dst_bits); 1795 1796 const struct dxil_value *v = emit_binary_call(ctx, overload, intr, 1797 op0, op1); 1798 if (!v) 1799 return false; 1800 store_alu_dest(ctx, alu, 0, v); 1801 return true; 1802} 1803 1804static bool 1805emit_tertiary_intin(struct ntd_context *ctx, nir_alu_instr *alu, 1806 enum dxil_intr intr, 1807 const struct dxil_value *op0, 1808 const struct dxil_value *op1, 1809 const struct dxil_value *op2) 1810{ 1811 const nir_op_info *info = &nir_op_infos[alu->op]; 1812 assert(info->output_type == info->input_types[0]); 1813 assert(info->output_type == info->input_types[1]); 1814 assert(info->output_type == info->input_types[2]); 1815 1816 unsigned dst_bits = nir_dest_bit_size(alu->dest.dest); 1817 assert(nir_src_bit_size(alu->src[0].src) == dst_bits); 1818 assert(nir_src_bit_size(alu->src[1].src) == dst_bits); 1819 assert(nir_src_bit_size(alu->src[2].src) == dst_bits); 1820 1821 enum overload_type overload = get_overload(info->output_type, dst_bits); 1822 1823 const struct dxil_value *v = emit_tertiary_call(ctx, overload, intr, 1824 op0, op1, op2); 1825 if (!v) 1826 return false; 1827 store_alu_dest(ctx, alu, 0, v); 1828 return true; 1829} 1830 1831static bool emit_select(struct ntd_context *ctx, nir_alu_instr *alu, 1832 const struct dxil_value *sel, 1833 const struct dxil_value *val_true, 1834 const struct dxil_value *val_false) 1835{ 1836 assert(sel); 1837 assert(val_true); 1838 assert(val_false); 1839 1840 const struct dxil_value *v = dxil_emit_select(&ctx->mod, sel, val_true, val_false); 1841 if (!v) 1842 return false; 1843 1844 store_alu_dest(ctx, alu, 0, v); 1845 return true; 1846} 1847 1848static bool 1849emit_b2f16(struct ntd_context *ctx, nir_alu_instr *alu, const struct dxil_value *val) 1850{ 1851 assert(val); 1852 1853 struct dxil_module *m = &ctx->mod; 1854 1855 const struct dxil_value *c1 = dxil_module_get_float16_const(m, 0x3C00); 1856 const struct dxil_value *c0 = dxil_module_get_float16_const(m, 0); 1857 1858 if (!c0 || !c1) 1859 return false; 1860 1861 return emit_select(ctx, alu, val, c1, c0); 1862} 1863 1864static bool 1865emit_b2f32(struct ntd_context *ctx, nir_alu_instr *alu, const struct dxil_value *val) 1866{ 1867 assert(val); 1868 1869 struct dxil_module *m = &ctx->mod; 1870 1871 const struct dxil_value *c1 = dxil_module_get_float_const(m, 1.0f); 1872 const struct dxil_value *c0 = dxil_module_get_float_const(m, 0.0f); 1873 1874 if (!c0 || !c1) 1875 return false; 1876 1877 return emit_select(ctx, alu, val, c1, c0); 1878} 1879 1880static bool 1881emit_f2b32(struct ntd_context *ctx, nir_alu_instr *alu, const struct dxil_value *val) 1882{ 1883 assert(val); 1884 1885 const struct dxil_value *zero = dxil_module_get_float_const(&ctx->mod, 0.0f); 1886 return emit_cmp(ctx, alu, DXIL_FCMP_UNE, val, zero); 1887} 1888 1889static bool 1890emit_ufind_msb(struct ntd_context *ctx, nir_alu_instr *alu, 1891 const struct dxil_value *val) 1892{ 1893 const nir_op_info *info = &nir_op_infos[alu->op]; 1894 unsigned dst_bits = nir_dest_bit_size(alu->dest.dest); 1895 unsigned src_bits = nir_src_bit_size(alu->src[0].src); 1896 enum overload_type overload = get_overload(info->output_type, src_bits); 1897 1898 const struct dxil_value *v = emit_unary_call(ctx, overload, 1899 DXIL_INTR_FIRSTBIT_HI, val); 1900 if (!v) 1901 return false; 1902 1903 const struct dxil_value *size = dxil_module_get_int32_const(&ctx->mod, 1904 src_bits - 1); 1905 const struct dxil_value *zero = dxil_module_get_int_const(&ctx->mod, 0, 1906 src_bits); 1907 if (!size || !zero) 1908 return false; 1909 1910 v = dxil_emit_binop(&ctx->mod, DXIL_BINOP_SUB, size, v, 0); 1911 const struct dxil_value *cnd = dxil_emit_cmp(&ctx->mod, DXIL_ICMP_NE, 1912 val, zero); 1913 if (!v || !cnd) 1914 return false; 1915 1916 const struct dxil_value *minus_one = 1917 dxil_module_get_int_const(&ctx->mod, -1, dst_bits); 1918 if (!minus_one) 1919 return false; 1920 1921 v = dxil_emit_select(&ctx->mod, cnd, v, minus_one); 1922 if (!v) 1923 return false; 1924 1925 store_alu_dest(ctx, alu, 0, v); 1926 return true; 1927} 1928 1929static bool 1930emit_f16tof32(struct ntd_context *ctx, nir_alu_instr *alu, const struct dxil_value *val) 1931{ 1932 const struct dxil_func *func = dxil_get_function(&ctx->mod, 1933 "dx.op.legacyF16ToF32", 1934 DXIL_NONE); 1935 if (!func) 1936 return false; 1937 1938 const struct dxil_value *opcode = dxil_module_get_int32_const(&ctx->mod, DXIL_INTR_LEGACY_F16TOF32); 1939 if (!opcode) 1940 return false; 1941 1942 const struct dxil_value *args[] = { 1943 opcode, 1944 val 1945 }; 1946 1947 const struct dxil_value *v = dxil_emit_call(&ctx->mod, func, args, ARRAY_SIZE(args)); 1948 if (!v) 1949 return false; 1950 store_alu_dest(ctx, alu, 0, v); 1951 return true; 1952} 1953 1954static bool 1955emit_f32tof16(struct ntd_context *ctx, nir_alu_instr *alu, const struct dxil_value *val) 1956{ 1957 const struct dxil_func *func = dxil_get_function(&ctx->mod, 1958 "dx.op.legacyF32ToF16", 1959 DXIL_NONE); 1960 if (!func) 1961 return false; 1962 1963 const struct dxil_value *opcode = dxil_module_get_int32_const(&ctx->mod, DXIL_INTR_LEGACY_F32TOF16); 1964 if (!opcode) 1965 return false; 1966 1967 const struct dxil_value *args[] = { 1968 opcode, 1969 val 1970 }; 1971 1972 const struct dxil_value *v = dxil_emit_call(&ctx->mod, func, args, ARRAY_SIZE(args)); 1973 if (!v) 1974 return false; 1975 store_alu_dest(ctx, alu, 0, v); 1976 return true; 1977} 1978 1979static bool 1980emit_vec(struct ntd_context *ctx, nir_alu_instr *alu, unsigned num_inputs) 1981{ 1982 const struct dxil_type *type = get_alu_src_type(ctx, alu, 0); 1983 nir_alu_type t = dxil_type_to_nir_type(type); 1984 1985 for (unsigned i = 0; i < num_inputs; i++) { 1986 const struct dxil_value *src = 1987 get_src(ctx, &alu->src[i].src, alu->src[i].swizzle[0], t); 1988 if (!src) 1989 return false; 1990 1991 store_alu_dest(ctx, alu, i, src); 1992 } 1993 return true; 1994} 1995 1996static bool 1997emit_make_double(struct ntd_context *ctx, nir_alu_instr *alu) 1998{ 1999 const struct dxil_func *func = dxil_get_function(&ctx->mod, "dx.op.makeDouble", DXIL_F64); 2000 if (!func) 2001 return false; 2002 2003 const struct dxil_value *opcode = dxil_module_get_int32_const(&ctx->mod, DXIL_INTR_MAKE_DOUBLE); 2004 if (!opcode) 2005 return false; 2006 2007 const struct dxil_value *args[3] = { 2008 opcode, 2009 get_src(ctx, &alu->src[0].src, 0, nir_type_uint32), 2010 get_src(ctx, &alu->src[0].src, 1, nir_type_uint32), 2011 }; 2012 if (!args[1] || !args[2]) 2013 return false; 2014 2015 const struct dxil_value *v = dxil_emit_call(&ctx->mod, func, args, ARRAY_SIZE(args)); 2016 if (!v) 2017 return false; 2018 store_dest(ctx, &alu->dest.dest, 0, v, nir_type_float64); 2019 return true; 2020} 2021 2022static bool 2023emit_split_double(struct ntd_context *ctx, nir_alu_instr *alu) 2024{ 2025 const struct dxil_func *func = dxil_get_function(&ctx->mod, "dx.op.splitDouble", DXIL_F64); 2026 if (!func) 2027 return false; 2028 2029 const struct dxil_value *opcode = dxil_module_get_int32_const(&ctx->mod, DXIL_INTR_SPLIT_DOUBLE); 2030 if (!opcode) 2031 return false; 2032 2033 const struct dxil_value *args[] = { 2034 opcode, 2035 get_src(ctx, &alu->src[0].src, 0, nir_type_float64) 2036 }; 2037 if (!args[1]) 2038 return false; 2039 2040 const struct dxil_value *v = dxil_emit_call(&ctx->mod, func, args, ARRAY_SIZE(args)); 2041 if (!v) 2042 return false; 2043 2044 const struct dxil_value *hi = dxil_emit_extractval(&ctx->mod, v, 0); 2045 const struct dxil_value *lo = dxil_emit_extractval(&ctx->mod, v, 1); 2046 if (!hi || !lo) 2047 return false; 2048 2049 store_dest_value(ctx, &alu->dest.dest, 0, hi); 2050 store_dest_value(ctx, &alu->dest.dest, 1, lo); 2051 return true; 2052} 2053 2054static bool 2055emit_alu(struct ntd_context *ctx, nir_alu_instr *alu) 2056{ 2057 /* handle vec-instructions first; they are the only ones that produce 2058 * vector results. 2059 */ 2060 switch (alu->op) { 2061 case nir_op_vec2: 2062 case nir_op_vec3: 2063 case nir_op_vec4: 2064 case nir_op_vec8: 2065 case nir_op_vec16: 2066 return emit_vec(ctx, alu, nir_op_infos[alu->op].num_inputs); 2067 case nir_op_mov: { 2068 assert(nir_dest_num_components(alu->dest.dest) == 1); 2069 store_ssa_def(ctx, &alu->dest.dest.ssa, 0, get_src_ssa(ctx, 2070 alu->src->src.ssa, alu->src->swizzle[0])); 2071 return true; 2072 } 2073 case nir_op_pack_double_2x32_dxil: 2074 return emit_make_double(ctx, alu); 2075 case nir_op_unpack_double_2x32_dxil: 2076 return emit_split_double(ctx, alu); 2077 default: 2078 /* silence warnings */ 2079 ; 2080 } 2081 2082 /* other ops should be scalar */ 2083 assert(alu->dest.write_mask == 1); 2084 const struct dxil_value *src[4]; 2085 assert(nir_op_infos[alu->op].num_inputs <= 4); 2086 for (unsigned i = 0; i < nir_op_infos[alu->op].num_inputs; i++) { 2087 src[i] = get_alu_src(ctx, alu, i); 2088 if (!src[i]) 2089 return false; 2090 } 2091 2092 switch (alu->op) { 2093 case nir_op_iadd: 2094 case nir_op_fadd: return emit_binop(ctx, alu, DXIL_BINOP_ADD, src[0], src[1]); 2095 2096 case nir_op_isub: 2097 case nir_op_fsub: return emit_binop(ctx, alu, DXIL_BINOP_SUB, src[0], src[1]); 2098 2099 case nir_op_imul: 2100 case nir_op_fmul: return emit_binop(ctx, alu, DXIL_BINOP_MUL, src[0], src[1]); 2101 2102 case nir_op_idiv: 2103 case nir_op_fdiv: return emit_binop(ctx, alu, DXIL_BINOP_SDIV, src[0], src[1]); 2104 2105 case nir_op_udiv: return emit_binop(ctx, alu, DXIL_BINOP_UDIV, src[0], src[1]); 2106 case nir_op_irem: return emit_binop(ctx, alu, DXIL_BINOP_SREM, src[0], src[1]); 2107 case nir_op_imod: return emit_binop(ctx, alu, DXIL_BINOP_UREM, src[0], src[1]); 2108 case nir_op_umod: return emit_binop(ctx, alu, DXIL_BINOP_UREM, src[0], src[1]); 2109 case nir_op_ishl: return emit_shift(ctx, alu, DXIL_BINOP_SHL, src[0], src[1]); 2110 case nir_op_ishr: return emit_shift(ctx, alu, DXIL_BINOP_ASHR, src[0], src[1]); 2111 case nir_op_ushr: return emit_shift(ctx, alu, DXIL_BINOP_LSHR, src[0], src[1]); 2112 case nir_op_iand: return emit_binop(ctx, alu, DXIL_BINOP_AND, src[0], src[1]); 2113 case nir_op_ior: return emit_binop(ctx, alu, DXIL_BINOP_OR, src[0], src[1]); 2114 case nir_op_ixor: return emit_binop(ctx, alu, DXIL_BINOP_XOR, src[0], src[1]); 2115 case nir_op_inot: { 2116 unsigned bit_size = alu->dest.dest.ssa.bit_size; 2117 intmax_t val = bit_size == 1 ? 1 : -1; 2118 const struct dxil_value *negative_one = dxil_module_get_int_const(&ctx->mod, val, bit_size); 2119 return emit_binop(ctx, alu, DXIL_BINOP_XOR, src[0], negative_one); 2120 } 2121 case nir_op_ieq: return emit_cmp(ctx, alu, DXIL_ICMP_EQ, src[0], src[1]); 2122 case nir_op_ine: return emit_cmp(ctx, alu, DXIL_ICMP_NE, src[0], src[1]); 2123 case nir_op_ige: return emit_cmp(ctx, alu, DXIL_ICMP_SGE, src[0], src[1]); 2124 case nir_op_uge: return emit_cmp(ctx, alu, DXIL_ICMP_UGE, src[0], src[1]); 2125 case nir_op_ilt: return emit_cmp(ctx, alu, DXIL_ICMP_SLT, src[0], src[1]); 2126 case nir_op_ult: return emit_cmp(ctx, alu, DXIL_ICMP_ULT, src[0], src[1]); 2127 case nir_op_feq: return emit_cmp(ctx, alu, DXIL_FCMP_OEQ, src[0], src[1]); 2128 case nir_op_fneu: return emit_cmp(ctx, alu, DXIL_FCMP_UNE, src[0], src[1]); 2129 case nir_op_flt: return emit_cmp(ctx, alu, DXIL_FCMP_OLT, src[0], src[1]); 2130 case nir_op_fge: return emit_cmp(ctx, alu, DXIL_FCMP_OGE, src[0], src[1]); 2131 case nir_op_bcsel: return emit_select(ctx, alu, src[0], src[1], src[2]); 2132 case nir_op_ftrunc: return emit_unary_intin(ctx, alu, DXIL_INTR_ROUND_Z, src[0]); 2133 case nir_op_fabs: return emit_unary_intin(ctx, alu, DXIL_INTR_FABS, src[0]); 2134 case nir_op_fcos: return emit_unary_intin(ctx, alu, DXIL_INTR_FCOS, src[0]); 2135 case nir_op_fsin: return emit_unary_intin(ctx, alu, DXIL_INTR_FSIN, src[0]); 2136 case nir_op_fceil: return emit_unary_intin(ctx, alu, DXIL_INTR_ROUND_PI, src[0]); 2137 case nir_op_fexp2: return emit_unary_intin(ctx, alu, DXIL_INTR_FEXP2, src[0]); 2138 case nir_op_flog2: return emit_unary_intin(ctx, alu, DXIL_INTR_FLOG2, src[0]); 2139 case nir_op_ffloor: return emit_unary_intin(ctx, alu, DXIL_INTR_ROUND_NI, src[0]); 2140 case nir_op_ffract: return emit_unary_intin(ctx, alu, DXIL_INTR_FRC, src[0]); 2141 case nir_op_fisnormal: return emit_unary_intin(ctx, alu, DXIL_INTR_ISNORMAL, src[0]); 2142 case nir_op_fisfinite: return emit_unary_intin(ctx, alu, DXIL_INTR_ISFINITE, src[0]); 2143 2144 case nir_op_fddx: 2145 case nir_op_fddx_coarse: return emit_unary_intin(ctx, alu, DXIL_INTR_DDX_COARSE, src[0]); 2146 case nir_op_fddx_fine: return emit_unary_intin(ctx, alu, DXIL_INTR_DDX_FINE, src[0]); 2147 case nir_op_fddy: 2148 case nir_op_fddy_coarse: return emit_unary_intin(ctx, alu, DXIL_INTR_DDY_COARSE, src[0]); 2149 case nir_op_fddy_fine: return emit_unary_intin(ctx, alu, DXIL_INTR_DDY_FINE, src[0]); 2150 2151 case nir_op_fround_even: return emit_unary_intin(ctx, alu, DXIL_INTR_ROUND_NE, src[0]); 2152 case nir_op_frcp: { 2153 const struct dxil_value *one = dxil_module_get_float_const(&ctx->mod, 1.0f); 2154 return emit_binop(ctx, alu, DXIL_BINOP_SDIV, one, src[0]); 2155 } 2156 case nir_op_fsat: return emit_unary_intin(ctx, alu, DXIL_INTR_SATURATE, src[0]); 2157 case nir_op_bit_count: return emit_unary_intin(ctx, alu, DXIL_INTR_COUNTBITS, src[0]); 2158 case nir_op_ufind_msb: return emit_ufind_msb(ctx, alu, src[0]); 2159 case nir_op_imax: return emit_binary_intin(ctx, alu, DXIL_INTR_IMAX, src[0], src[1]); 2160 case nir_op_imin: return emit_binary_intin(ctx, alu, DXIL_INTR_IMIN, src[0], src[1]); 2161 case nir_op_umax: return emit_binary_intin(ctx, alu, DXIL_INTR_UMAX, src[0], src[1]); 2162 case nir_op_umin: return emit_binary_intin(ctx, alu, DXIL_INTR_UMIN, src[0], src[1]); 2163 case nir_op_frsq: return emit_unary_intin(ctx, alu, DXIL_INTR_RSQRT, src[0]); 2164 case nir_op_fsqrt: return emit_unary_intin(ctx, alu, DXIL_INTR_SQRT, src[0]); 2165 case nir_op_fmax: return emit_binary_intin(ctx, alu, DXIL_INTR_FMAX, src[0], src[1]); 2166 case nir_op_fmin: return emit_binary_intin(ctx, alu, DXIL_INTR_FMIN, src[0], src[1]); 2167 case nir_op_ffma: return emit_tertiary_intin(ctx, alu, DXIL_INTR_FMA, src[0], src[1], src[2]); 2168 2169 case nir_op_unpack_half_2x16_split_x: return emit_f16tof32(ctx, alu, src[0]); 2170 case nir_op_pack_half_2x16_split: return emit_f32tof16(ctx, alu, src[0]); 2171 2172 case nir_op_b2i16: 2173 case nir_op_i2i16: 2174 case nir_op_f2i16: 2175 case nir_op_f2u16: 2176 case nir_op_u2u16: 2177 case nir_op_u2f16: 2178 case nir_op_i2f16: 2179 case nir_op_f2f16_rtz: 2180 case nir_op_b2i32: 2181 case nir_op_f2f32: 2182 case nir_op_f2i32: 2183 case nir_op_f2u32: 2184 case nir_op_i2f32: 2185 case nir_op_i2i32: 2186 case nir_op_u2f32: 2187 case nir_op_u2u32: 2188 case nir_op_b2i64: 2189 case nir_op_f2f64: 2190 case nir_op_f2i64: 2191 case nir_op_f2u64: 2192 case nir_op_i2f64: 2193 case nir_op_i2i64: 2194 case nir_op_u2f64: 2195 case nir_op_u2u64: 2196 return emit_cast(ctx, alu, src[0]); 2197 2198 case nir_op_f2b32: return emit_f2b32(ctx, alu, src[0]); 2199 case nir_op_b2f16: return emit_b2f16(ctx, alu, src[0]); 2200 case nir_op_b2f32: return emit_b2f32(ctx, alu, src[0]); 2201 default: 2202 NIR_INSTR_UNSUPPORTED(&alu->instr); 2203 assert("Unimplemented ALU instruction"); 2204 return false; 2205 } 2206} 2207 2208static const struct dxil_value * 2209load_ubo(struct ntd_context *ctx, const struct dxil_value *handle, 2210 const struct dxil_value *offset, enum overload_type overload) 2211{ 2212 assert(handle && offset); 2213 2214 const struct dxil_value *opcode = dxil_module_get_int32_const(&ctx->mod, DXIL_INTR_CBUFFER_LOAD_LEGACY); 2215 if (!opcode) 2216 return NULL; 2217 2218 const struct dxil_value *args[] = { 2219 opcode, handle, offset 2220 }; 2221 2222 const struct dxil_func *func = dxil_get_function(&ctx->mod, "dx.op.cbufferLoadLegacy", overload); 2223 if (!func) 2224 return NULL; 2225 return dxil_emit_call(&ctx->mod, func, args, ARRAY_SIZE(args)); 2226} 2227 2228static bool 2229emit_barrier(struct ntd_context *ctx, nir_intrinsic_instr *intr) 2230{ 2231 const struct dxil_value *opcode, *mode; 2232 const struct dxil_func *func; 2233 uint32_t flags = 0; 2234 2235 if (nir_intrinsic_execution_scope(intr) == NIR_SCOPE_WORKGROUP) 2236 flags |= DXIL_BARRIER_MODE_SYNC_THREAD_GROUP; 2237 2238 nir_variable_mode modes = nir_intrinsic_memory_modes(intr); 2239 nir_scope mem_scope = nir_intrinsic_memory_scope(intr); 2240 2241 /* Currently vtn uses uniform to indicate image memory, which DXIL considers global */ 2242 if (modes & nir_var_uniform) 2243 modes |= nir_var_mem_global; 2244 2245 if (modes & (nir_var_mem_ssbo | nir_var_mem_global)) { 2246 if (mem_scope > NIR_SCOPE_WORKGROUP) 2247 flags |= DXIL_BARRIER_MODE_UAV_FENCE_GLOBAL; 2248 else 2249 flags |= DXIL_BARRIER_MODE_UAV_FENCE_THREAD_GROUP; 2250 } 2251 2252 if (modes & nir_var_mem_shared) 2253 flags |= DXIL_BARRIER_MODE_GROUPSHARED_MEM_FENCE; 2254 2255 func = dxil_get_function(&ctx->mod, "dx.op.barrier", DXIL_NONE); 2256 if (!func) 2257 return false; 2258 2259 opcode = dxil_module_get_int32_const(&ctx->mod, DXIL_INTR_BARRIER); 2260 if (!opcode) 2261 return false; 2262 2263 mode = dxil_module_get_int32_const(&ctx->mod, flags); 2264 if (!mode) 2265 return false; 2266 2267 const struct dxil_value *args[] = { opcode, mode }; 2268 2269 return dxil_emit_call_void(&ctx->mod, func, 2270 args, ARRAY_SIZE(args)); 2271} 2272 2273static bool 2274emit_load_global_invocation_id(struct ntd_context *ctx, 2275 nir_intrinsic_instr *intr) 2276{ 2277 assert(intr->dest.is_ssa); 2278 nir_component_mask_t comps = nir_ssa_def_components_read(&intr->dest.ssa); 2279 2280 for (int i = 0; i < nir_intrinsic_dest_components(intr); i++) { 2281 if (comps & (1 << i)) { 2282 const struct dxil_value *idx = dxil_module_get_int32_const(&ctx->mod, i); 2283 if (!idx) 2284 return false; 2285 const struct dxil_value *globalid = emit_threadid_call(ctx, idx); 2286 2287 if (!globalid) 2288 return false; 2289 2290 store_dest_value(ctx, &intr->dest, i, globalid); 2291 } 2292 } 2293 return true; 2294} 2295 2296static bool 2297emit_load_local_invocation_id(struct ntd_context *ctx, 2298 nir_intrinsic_instr *intr) 2299{ 2300 assert(intr->dest.is_ssa); 2301 nir_component_mask_t comps = nir_ssa_def_components_read(&intr->dest.ssa); 2302 2303 for (int i = 0; i < nir_intrinsic_dest_components(intr); i++) { 2304 if (comps & (1 << i)) { 2305 const struct dxil_value 2306 *idx = dxil_module_get_int32_const(&ctx->mod, i); 2307 if (!idx) 2308 return false; 2309 const struct dxil_value 2310 *threadidingroup = emit_threadidingroup_call(ctx, idx); 2311 if (!threadidingroup) 2312 return false; 2313 store_dest_value(ctx, &intr->dest, i, threadidingroup); 2314 } 2315 } 2316 return true; 2317} 2318 2319static bool 2320emit_load_local_invocation_index(struct ntd_context *ctx, 2321 nir_intrinsic_instr *intr) 2322{ 2323 assert(intr->dest.is_ssa); 2324 2325 const struct dxil_value 2326 *flattenedthreadidingroup = emit_flattenedthreadidingroup_call(ctx); 2327 if (!flattenedthreadidingroup) 2328 return false; 2329 store_dest_value(ctx, &intr->dest, 0, flattenedthreadidingroup); 2330 2331 return true; 2332} 2333 2334static bool 2335emit_load_local_workgroup_id(struct ntd_context *ctx, 2336 nir_intrinsic_instr *intr) 2337{ 2338 assert(intr->dest.is_ssa); 2339 nir_component_mask_t comps = nir_ssa_def_components_read(&intr->dest.ssa); 2340 2341 for (int i = 0; i < nir_intrinsic_dest_components(intr); i++) { 2342 if (comps & (1 << i)) { 2343 const struct dxil_value *idx = dxil_module_get_int32_const(&ctx->mod, i); 2344 if (!idx) 2345 return false; 2346 const struct dxil_value *groupid = emit_groupid_call(ctx, idx); 2347 if (!groupid) 2348 return false; 2349 store_dest_value(ctx, &intr->dest, i, groupid); 2350 } 2351 } 2352 return true; 2353} 2354 2355static bool 2356emit_load_unary_external_function(struct ntd_context *ctx, 2357 nir_intrinsic_instr *intr, const char *name, 2358 int32_t dxil_intr) 2359{ 2360 const struct dxil_func *func = 2361 dxil_get_function(&ctx->mod, name, DXIL_I32); 2362 if (!func) 2363 return false; 2364 2365 const struct dxil_value *opcode = 2366 dxil_module_get_int32_const(&ctx->mod, dxil_intr); 2367 if (!opcode) 2368 return false; 2369 2370 const struct dxil_value *args[] = {opcode}; 2371 2372 const struct dxil_value *value = 2373 dxil_emit_call(&ctx->mod, func, args, ARRAY_SIZE(args)); 2374 store_dest_value(ctx, &intr->dest, 0, value); 2375 2376 return true; 2377} 2378 2379static const struct dxil_value * 2380get_int32_undef(struct dxil_module *m) 2381{ 2382 const struct dxil_type *int32_type = 2383 dxil_module_get_int_type(m, 32); 2384 if (!int32_type) 2385 return NULL; 2386 2387 return dxil_module_get_undef(m, int32_type); 2388} 2389 2390static const struct dxil_value * 2391emit_gep_for_index(struct ntd_context *ctx, const nir_variable *var, 2392 const struct dxil_value *index) 2393{ 2394 assert(var->data.mode == nir_var_shader_temp); 2395 2396 struct hash_entry *he = _mesa_hash_table_search(ctx->consts, var); 2397 assert(he != NULL); 2398 const struct dxil_value *ptr = he->data; 2399 2400 const struct dxil_value *zero = dxil_module_get_int32_const(&ctx->mod, 0); 2401 if (!zero) 2402 return NULL; 2403 2404 const struct dxil_value *ops[] = { ptr, zero, index }; 2405 return dxil_emit_gep_inbounds(&ctx->mod, ops, ARRAY_SIZE(ops)); 2406} 2407 2408static const struct dxil_value * 2409get_ubo_ssbo_handle(struct ntd_context *ctx, nir_src *src, enum dxil_resource_class class, unsigned base_binding) 2410{ 2411 /* This source might be one of: 2412 * 1. Constant resource index - just look it up in precomputed handle arrays 2413 * If it's null in that array, create a handle, and store the result 2414 * 2. A handle from load_vulkan_descriptor - just get the stored SSA value 2415 * 3. Dynamic resource index - create a handle for it here 2416 */ 2417 assert(src->ssa->num_components == 1 && src->ssa->bit_size == 32); 2418 nir_const_value *const_block_index = nir_src_as_const_value(*src); 2419 const struct dxil_value **handle_entry = NULL; 2420 if (const_block_index) { 2421 assert(!ctx->opts->vulkan_environment); 2422 switch (class) { 2423 case DXIL_RESOURCE_CLASS_CBV: 2424 handle_entry = &ctx->cbv_handles[const_block_index->u32]; 2425 break; 2426 case DXIL_RESOURCE_CLASS_UAV: 2427 handle_entry = &ctx->uav_handles[const_block_index->u32]; 2428 break; 2429 case DXIL_RESOURCE_CLASS_SRV: 2430 handle_entry = &ctx->srv_handles[const_block_index->u32]; 2431 break; 2432 default: 2433 unreachable("Unexpected resource class"); 2434 } 2435 } 2436 2437 if (handle_entry && *handle_entry) 2438 return *handle_entry; 2439 2440 const struct dxil_value *value = get_src_ssa(ctx, src->ssa, 0); 2441 if (ctx->opts->vulkan_environment) { 2442 return value; 2443 } 2444 2445 const struct dxil_value *handle = emit_createhandle_call(ctx, class, 2446 get_resource_id(ctx, class, 0, base_binding), value, !const_block_index); 2447 if (handle_entry) 2448 *handle_entry = handle; 2449 2450 return handle; 2451} 2452 2453static bool 2454emit_load_ssbo(struct ntd_context *ctx, nir_intrinsic_instr *intr) 2455{ 2456 const struct dxil_value *int32_undef = get_int32_undef(&ctx->mod); 2457 2458 nir_variable *var = nir_get_binding_variable(ctx->shader, nir_chase_binding(intr->src[0])); 2459 enum dxil_resource_class class = DXIL_RESOURCE_CLASS_UAV; 2460 if (var && var->data.access & ACCESS_NON_WRITEABLE) 2461 class = DXIL_RESOURCE_CLASS_SRV; 2462 2463 const struct dxil_value *handle = get_ubo_ssbo_handle(ctx, &intr->src[0], class, 0); 2464 const struct dxil_value *offset = 2465 get_src(ctx, &intr->src[1], 0, nir_type_uint); 2466 if (!int32_undef || !handle || !offset) 2467 return false; 2468 2469 assert(nir_src_bit_size(intr->src[0]) == 32); 2470 assert(nir_intrinsic_dest_components(intr) <= 4); 2471 2472 const struct dxil_value *coord[2] = { 2473 offset, 2474 int32_undef 2475 }; 2476 2477 const struct dxil_value *load = emit_bufferload_call(ctx, handle, coord, DXIL_I32); 2478 if (!load) 2479 return false; 2480 2481 for (int i = 0; i < nir_intrinsic_dest_components(intr); i++) { 2482 const struct dxil_value *val = 2483 dxil_emit_extractval(&ctx->mod, load, i); 2484 if (!val) 2485 return false; 2486 store_dest_value(ctx, &intr->dest, i, val); 2487 } 2488 return true; 2489} 2490 2491static bool 2492emit_store_ssbo(struct ntd_context *ctx, nir_intrinsic_instr *intr) 2493{ 2494 const struct dxil_value* handle = get_ubo_ssbo_handle(ctx, &intr->src[1], DXIL_RESOURCE_CLASS_UAV, 0); 2495 const struct dxil_value *offset = 2496 get_src(ctx, &intr->src[2], 0, nir_type_uint); 2497 if (!handle || !offset) 2498 return false; 2499 2500 assert(nir_src_bit_size(intr->src[0]) == 32); 2501 unsigned num_components = nir_src_num_components(intr->src[0]); 2502 assert(num_components <= 4); 2503 const struct dxil_value *value[4]; 2504 for (unsigned i = 0; i < num_components; ++i) { 2505 value[i] = get_src(ctx, &intr->src[0], i, nir_type_uint); 2506 if (!value[i]) 2507 return false; 2508 } 2509 2510 const struct dxil_value *int32_undef = get_int32_undef(&ctx->mod); 2511 if (!int32_undef) 2512 return false; 2513 2514 const struct dxil_value *coord[2] = { 2515 offset, 2516 int32_undef 2517 }; 2518 2519 for (int i = num_components; i < 4; ++i) 2520 value[i] = int32_undef; 2521 2522 const struct dxil_value *write_mask = 2523 dxil_module_get_int8_const(&ctx->mod, (1u << num_components) - 1); 2524 if (!write_mask) 2525 return false; 2526 2527 return emit_bufferstore_call(ctx, handle, coord, value, write_mask, DXIL_I32); 2528} 2529 2530static bool 2531emit_store_ssbo_masked(struct ntd_context *ctx, nir_intrinsic_instr *intr) 2532{ 2533 const struct dxil_value *value = 2534 get_src(ctx, &intr->src[0], 0, nir_type_uint); 2535 const struct dxil_value *mask = 2536 get_src(ctx, &intr->src[1], 0, nir_type_uint); 2537 const struct dxil_value* handle = get_ubo_ssbo_handle(ctx, &intr->src[2], DXIL_RESOURCE_CLASS_UAV, 0); 2538 const struct dxil_value *offset = 2539 get_src(ctx, &intr->src[3], 0, nir_type_uint); 2540 if (!value || !mask || !handle || !offset) 2541 return false; 2542 2543 const struct dxil_value *int32_undef = get_int32_undef(&ctx->mod); 2544 if (!int32_undef) 2545 return false; 2546 2547 const struct dxil_value *coord[3] = { 2548 offset, int32_undef, int32_undef 2549 }; 2550 2551 return 2552 emit_atomic_binop(ctx, handle, DXIL_ATOMIC_AND, coord, mask) != NULL && 2553 emit_atomic_binop(ctx, handle, DXIL_ATOMIC_OR, coord, value) != NULL; 2554} 2555 2556static bool 2557emit_store_shared(struct ntd_context *ctx, nir_intrinsic_instr *intr) 2558{ 2559 const struct dxil_value *zero, *index; 2560 2561 /* All shared mem accesses should have been lowered to scalar 32bit 2562 * accesses. 2563 */ 2564 assert(nir_src_bit_size(intr->src[0]) == 32); 2565 assert(nir_src_num_components(intr->src[0]) == 1); 2566 2567 zero = dxil_module_get_int32_const(&ctx->mod, 0); 2568 if (!zero) 2569 return false; 2570 2571 if (intr->intrinsic == nir_intrinsic_store_shared_dxil) 2572 index = get_src(ctx, &intr->src[1], 0, nir_type_uint); 2573 else 2574 index = get_src(ctx, &intr->src[2], 0, nir_type_uint); 2575 if (!index) 2576 return false; 2577 2578 const struct dxil_value *ops[] = { ctx->sharedvars, zero, index }; 2579 const struct dxil_value *ptr, *value; 2580 2581 ptr = dxil_emit_gep_inbounds(&ctx->mod, ops, ARRAY_SIZE(ops)); 2582 if (!ptr) 2583 return false; 2584 2585 value = get_src(ctx, &intr->src[0], 0, nir_type_uint); 2586 if (!value) 2587 return false; 2588 2589 if (intr->intrinsic == nir_intrinsic_store_shared_dxil) 2590 return dxil_emit_store(&ctx->mod, value, ptr, 4, false); 2591 2592 const struct dxil_value *mask = get_src(ctx, &intr->src[1], 0, nir_type_uint); 2593 if (!mask) 2594 return false; 2595 2596 if (!dxil_emit_atomicrmw(&ctx->mod, mask, ptr, DXIL_RMWOP_AND, false, 2597 DXIL_ATOMIC_ORDERING_ACQREL, 2598 DXIL_SYNC_SCOPE_CROSSTHREAD)) 2599 return false; 2600 2601 if (!dxil_emit_atomicrmw(&ctx->mod, value, ptr, DXIL_RMWOP_OR, false, 2602 DXIL_ATOMIC_ORDERING_ACQREL, 2603 DXIL_SYNC_SCOPE_CROSSTHREAD)) 2604 return false; 2605 2606 return true; 2607} 2608 2609static bool 2610emit_store_scratch(struct ntd_context *ctx, nir_intrinsic_instr *intr) 2611{ 2612 const struct dxil_value *zero, *index; 2613 2614 /* All scratch mem accesses should have been lowered to scalar 32bit 2615 * accesses. 2616 */ 2617 assert(nir_src_bit_size(intr->src[0]) == 32); 2618 assert(nir_src_num_components(intr->src[0]) == 1); 2619 2620 zero = dxil_module_get_int32_const(&ctx->mod, 0); 2621 if (!zero) 2622 return false; 2623 2624 index = get_src(ctx, &intr->src[1], 0, nir_type_uint); 2625 if (!index) 2626 return false; 2627 2628 const struct dxil_value *ops[] = { ctx->scratchvars, zero, index }; 2629 const struct dxil_value *ptr, *value; 2630 2631 ptr = dxil_emit_gep_inbounds(&ctx->mod, ops, ARRAY_SIZE(ops)); 2632 if (!ptr) 2633 return false; 2634 2635 value = get_src(ctx, &intr->src[0], 0, nir_type_uint); 2636 if (!value) 2637 return false; 2638 2639 return dxil_emit_store(&ctx->mod, value, ptr, 4, false); 2640} 2641 2642static bool 2643emit_load_ubo(struct ntd_context *ctx, nir_intrinsic_instr *intr) 2644{ 2645 const struct dxil_value* handle = get_ubo_ssbo_handle(ctx, &intr->src[0], DXIL_RESOURCE_CLASS_CBV, 0); 2646 if (!handle) 2647 return false; 2648 2649 const struct dxil_value *offset; 2650 nir_const_value *const_offset = nir_src_as_const_value(intr->src[1]); 2651 if (const_offset) { 2652 offset = dxil_module_get_int32_const(&ctx->mod, const_offset->i32 >> 4); 2653 } else { 2654 const struct dxil_value *offset_src = get_src(ctx, &intr->src[1], 0, nir_type_uint); 2655 const struct dxil_value *c4 = dxil_module_get_int32_const(&ctx->mod, 4); 2656 if (!offset_src || !c4) 2657 return false; 2658 2659 offset = dxil_emit_binop(&ctx->mod, DXIL_BINOP_ASHR, offset_src, c4, 0); 2660 } 2661 2662 const struct dxil_value *agg = load_ubo(ctx, handle, offset, DXIL_F32); 2663 2664 if (!agg) 2665 return false; 2666 2667 for (unsigned i = 0; i < nir_dest_num_components(intr->dest); ++i) { 2668 const struct dxil_value *retval = dxil_emit_extractval(&ctx->mod, agg, i); 2669 store_dest(ctx, &intr->dest, i, retval, 2670 nir_dest_bit_size(intr->dest) > 1 ? nir_type_float : nir_type_bool); 2671 } 2672 return true; 2673} 2674 2675static bool 2676emit_load_ubo_dxil(struct ntd_context *ctx, nir_intrinsic_instr *intr) 2677{ 2678 assert(nir_dest_num_components(intr->dest) <= 4); 2679 assert(nir_dest_bit_size(intr->dest) == 32); 2680 2681 const struct dxil_value* handle = get_ubo_ssbo_handle(ctx, &intr->src[0], DXIL_RESOURCE_CLASS_CBV, 0); 2682 const struct dxil_value *offset = 2683 get_src(ctx, &intr->src[1], 0, nir_type_uint); 2684 2685 if (!handle || !offset) 2686 return false; 2687 2688 const struct dxil_value *agg = load_ubo(ctx, handle, offset, DXIL_I32); 2689 if (!agg) 2690 return false; 2691 2692 for (unsigned i = 0; i < nir_dest_num_components(intr->dest); i++) 2693 store_dest_value(ctx, &intr->dest, i, 2694 dxil_emit_extractval(&ctx->mod, agg, i)); 2695 2696 return true; 2697} 2698 2699static bool 2700emit_store_output(struct ntd_context *ctx, nir_intrinsic_instr *intr, 2701 nir_variable *output) 2702{ 2703 nir_alu_type out_type = nir_get_nir_type_for_glsl_base_type(glsl_get_base_type(output->type)); 2704 enum overload_type overload = DXIL_F32; 2705 if (output->data.compact) 2706 out_type = nir_type_float; 2707 else 2708 overload = get_overload(out_type, glsl_get_bit_size(output->type)); 2709 const struct dxil_func *func = dxil_get_function(&ctx->mod, "dx.op.storeOutput", overload); 2710 2711 if (!func) 2712 return false; 2713 2714 const struct dxil_value *opcode = dxil_module_get_int32_const(&ctx->mod, DXIL_INTR_STORE_OUTPUT); 2715 const struct dxil_value *output_id = dxil_module_get_int32_const(&ctx->mod, (int)output->data.driver_location); 2716 const struct dxil_value *row = dxil_module_get_int32_const(&ctx->mod, 0); 2717 2718 bool success = true; 2719 if (output->data.compact) { 2720 nir_deref_instr *array_deref = nir_instr_as_deref(intr->src[0].ssa->parent_instr); 2721 unsigned array_index = nir_src_as_uint(array_deref->arr.index); 2722 2723 const struct dxil_value *col = dxil_module_get_int8_const(&ctx->mod, array_index); 2724 const struct dxil_value *value = get_src(ctx, &intr->src[1], 0, out_type); 2725 if (!col || !value) 2726 return false; 2727 2728 const struct dxil_value *args[] = { 2729 opcode, output_id, row, col, value 2730 }; 2731 success = dxil_emit_call_void(&ctx->mod, func, args, ARRAY_SIZE(args)); 2732 } else { 2733 uint32_t writemask = nir_intrinsic_write_mask(intr); 2734 for (unsigned i = 0; i < nir_src_num_components(intr->src[1]) && success; ++i) { 2735 if (writemask & (1 << i)) { 2736 const struct dxil_value *col = dxil_module_get_int8_const(&ctx->mod, i); 2737 const struct dxil_value *value = get_src(ctx, &intr->src[1], i, out_type); 2738 if (!col || !value) 2739 return false; 2740 2741 const struct dxil_value *args[] = { 2742 opcode, output_id, row, col, value 2743 }; 2744 success &= dxil_emit_call_void(&ctx->mod, func, args, ARRAY_SIZE(args)); 2745 } 2746 } 2747 } 2748 return success; 2749} 2750 2751static bool 2752emit_store_deref(struct ntd_context *ctx, nir_intrinsic_instr *intr) 2753{ 2754 nir_deref_instr *deref = nir_src_as_deref(intr->src[0]); 2755 nir_variable *var = nir_deref_instr_get_variable(deref); 2756 2757 switch (var->data.mode) { 2758 case nir_var_shader_out: 2759 return emit_store_output(ctx, intr, var); 2760 2761 default: 2762 unreachable("unsupported nir_variable_mode"); 2763 } 2764} 2765 2766static bool 2767emit_load_input_array(struct ntd_context *ctx, nir_intrinsic_instr *intr, nir_variable *var, nir_src *index) 2768{ 2769 assert(var); 2770 const struct dxil_value *opcode = dxil_module_get_int32_const(&ctx->mod, DXIL_INTR_LOAD_INPUT); 2771 const struct dxil_value *input_id = dxil_module_get_int32_const(&ctx->mod, var->data.driver_location); 2772 const struct dxil_value *vertex_id; 2773 const struct dxil_value *row; 2774 2775 if (ctx->mod.shader_kind == DXIL_GEOMETRY_SHADER) { 2776 vertex_id = get_src(ctx, index, 0, nir_type_int); 2777 row = dxil_module_get_int32_const(&ctx->mod, 0); 2778 } else { 2779 const struct dxil_type *int32_type = dxil_module_get_int_type(&ctx->mod, 32); 2780 vertex_id = dxil_module_get_undef(&ctx->mod, int32_type); 2781 row = get_src(ctx, index, 0, nir_type_int); 2782 } 2783 2784 if (!opcode || !input_id || !vertex_id || !row) 2785 return false; 2786 2787 nir_alu_type out_type = nir_get_nir_type_for_glsl_base_type(glsl_get_base_type(glsl_get_array_element(var->type))); 2788 enum overload_type overload = get_overload(out_type, glsl_get_bit_size(glsl_get_array_element(var->type))); 2789 2790 const struct dxil_func *func = dxil_get_function(&ctx->mod, "dx.op.loadInput", overload); 2791 2792 if (!func) 2793 return false; 2794 2795 for (unsigned i = 0; i < nir_dest_num_components(intr->dest); ++i) { 2796 const struct dxil_value *comp = dxil_module_get_int8_const(&ctx->mod, i); 2797 if (!comp) 2798 return false; 2799 2800 const struct dxil_value *args[] = { 2801 opcode, input_id, row, comp, vertex_id 2802 }; 2803 2804 const struct dxil_value *retval = dxil_emit_call(&ctx->mod, func, args, ARRAY_SIZE(args)); 2805 if (!retval) 2806 return false; 2807 store_dest(ctx, &intr->dest, i, retval, out_type); 2808 } 2809 return true; 2810} 2811 2812static bool 2813emit_load_compact_input_array(struct ntd_context *ctx, nir_intrinsic_instr *intr, nir_variable *var, nir_deref_instr *deref) 2814{ 2815 assert(var); 2816 const struct dxil_value *opcode = dxil_module_get_int32_const(&ctx->mod, DXIL_INTR_LOAD_INPUT); 2817 const struct dxil_value *input_id = dxil_module_get_int32_const(&ctx->mod, var->data.driver_location); 2818 const struct dxil_value *row = dxil_module_get_int32_const(&ctx->mod, 0); 2819 const struct dxil_value *vertex_id; 2820 2821 nir_src *col = &deref->arr.index; 2822 nir_src_is_const(*col); 2823 2824 if (ctx->mod.shader_kind == DXIL_GEOMETRY_SHADER) { 2825 nir_deref_instr *deref_parent = nir_deref_instr_parent(deref); 2826 assert(deref_parent->deref_type == nir_deref_type_array); 2827 2828 vertex_id = get_src(ctx, &deref_parent->arr.index, 0, nir_type_int); 2829 } else { 2830 const struct dxil_type *int32_type = dxil_module_get_int_type(&ctx->mod, 32); 2831 vertex_id = dxil_module_get_undef(&ctx->mod, int32_type); 2832 } 2833 2834 if (!opcode || !input_id || !row || !vertex_id) 2835 return false; 2836 2837 nir_alu_type out_type = nir_type_float; 2838 enum overload_type overload = get_overload(out_type, 32); 2839 2840 const struct dxil_func *func = dxil_get_function(&ctx->mod, "dx.op.loadInput", overload); 2841 2842 if (!func) 2843 return false; 2844 2845 const struct dxil_value *comp = dxil_module_get_int8_const(&ctx->mod, nir_src_as_int(*col)); 2846 if (!comp) 2847 return false; 2848 2849 const struct dxil_value *args[] = { 2850 opcode, input_id, row, comp, vertex_id 2851 }; 2852 2853 const struct dxil_value *retval = dxil_emit_call(&ctx->mod, func, args, ARRAY_SIZE(args)); 2854 if (!retval) 2855 return false; 2856 store_dest(ctx, &intr->dest, 0, retval, out_type); 2857 return true; 2858} 2859 2860static bool 2861emit_load_input_interpolated(struct ntd_context *ctx, nir_intrinsic_instr *intr, nir_variable *var) 2862{ 2863 assert(var); 2864 const struct dxil_value *opcode = dxil_module_get_int32_const(&ctx->mod, DXIL_INTR_LOAD_INPUT); 2865 const struct dxil_value *input_id = dxil_module_get_int32_const(&ctx->mod, var->data.driver_location); 2866 const struct dxil_value *row = dxil_module_get_int32_const(&ctx->mod, 0); 2867 const struct dxil_type *int32_type = dxil_module_get_int_type(&ctx->mod, 32); 2868 const struct dxil_value *vertex_id = dxil_module_get_undef(&ctx->mod, int32_type); 2869 2870 if (!opcode || !input_id || !row || !int32_type || !vertex_id) 2871 return false; 2872 2873 nir_alu_type out_type = nir_get_nir_type_for_glsl_base_type(glsl_get_base_type(var->type)); 2874 enum overload_type overload = get_overload(out_type, glsl_get_bit_size(var->type)); 2875 2876 const struct dxil_func *func = dxil_get_function(&ctx->mod, "dx.op.loadInput", overload); 2877 2878 if (!func) 2879 return false; 2880 2881 for (unsigned i = 0; i < nir_dest_num_components(intr->dest); ++i) { 2882 const struct dxil_value *comp = dxil_module_get_int8_const(&ctx->mod, i); 2883 2884 const struct dxil_value *args[] = { 2885 opcode, input_id, row, comp, vertex_id 2886 }; 2887 2888 const struct dxil_value *retval = dxil_emit_call(&ctx->mod, func, args, ARRAY_SIZE(args)); 2889 if (!retval) 2890 return false; 2891 store_dest(ctx, &intr->dest, i, retval, out_type); 2892 } 2893 return true; 2894} 2895 2896static bool 2897emit_load_input_flat(struct ntd_context *ctx, nir_intrinsic_instr *intr, nir_variable* var) 2898{ 2899 const struct dxil_value *opcode = dxil_module_get_int32_const(&ctx->mod, DXIL_INTR_ATTRIBUTE_AT_VERTEX); 2900 const struct dxil_value *input_id = dxil_module_get_int32_const(&ctx->mod, (int)var->data.driver_location); 2901 const struct dxil_value *row = dxil_module_get_int32_const(&ctx->mod, 0); 2902 const struct dxil_value *vertex_id = dxil_module_get_int8_const(&ctx->mod, ctx->opts->provoking_vertex); 2903 2904 nir_alu_type out_type = nir_get_nir_type_for_glsl_base_type(glsl_get_base_type(var->type)); 2905 enum overload_type overload = get_overload(out_type, glsl_get_bit_size(var->type)); 2906 2907 const struct dxil_func *func = dxil_get_function(&ctx->mod, "dx.op.attributeAtVertex", overload); 2908 if (!func) 2909 return false; 2910 2911 for (unsigned i = 0; i < nir_dest_num_components(intr->dest); ++i) { 2912 const struct dxil_value *comp = dxil_module_get_int8_const(&ctx->mod, i); 2913 const struct dxil_value *args[] = { 2914 opcode, input_id, row, comp, vertex_id 2915 }; 2916 2917 const struct dxil_value *retval = dxil_emit_call(&ctx->mod, func, args, ARRAY_SIZE(args)); 2918 if (!retval) 2919 return false; 2920 2921 store_dest(ctx, &intr->dest, i, retval, out_type); 2922 } 2923 return true; 2924} 2925 2926static bool 2927emit_load_input(struct ntd_context *ctx, nir_intrinsic_instr *intr, 2928 nir_variable *input) 2929{ 2930 if (ctx->mod.shader_kind != DXIL_PIXEL_SHADER || 2931 input->data.interpolation != INTERP_MODE_FLAT || 2932 !ctx->opts->interpolate_at_vertex || 2933 ctx->opts->provoking_vertex == 0 || 2934 glsl_type_is_integer(input->type)) 2935 return emit_load_input_interpolated(ctx, intr, input); 2936 else 2937 return emit_load_input_flat(ctx, intr, input); 2938} 2939 2940static bool 2941emit_load_ptr(struct ntd_context *ctx, nir_intrinsic_instr *intr) 2942{ 2943 struct nir_variable *var = 2944 nir_deref_instr_get_variable(nir_src_as_deref(intr->src[0])); 2945 2946 const struct dxil_value *index = 2947 get_src(ctx, &intr->src[1], 0, nir_type_uint); 2948 if (!index) 2949 return false; 2950 2951 const struct dxil_value *ptr = emit_gep_for_index(ctx, var, index); 2952 if (!ptr) 2953 return false; 2954 2955 const struct dxil_value *retval = 2956 dxil_emit_load(&ctx->mod, ptr, 4, false); 2957 if (!retval) 2958 return false; 2959 2960 store_dest(ctx, &intr->dest, 0, retval, nir_type_uint); 2961 return true; 2962} 2963 2964static bool 2965emit_load_shared(struct ntd_context *ctx, nir_intrinsic_instr *intr) 2966{ 2967 const struct dxil_value *zero, *index; 2968 unsigned bit_size = nir_dest_bit_size(intr->dest); 2969 unsigned align = bit_size / 8; 2970 2971 /* All shared mem accesses should have been lowered to scalar 32bit 2972 * accesses. 2973 */ 2974 assert(bit_size == 32); 2975 assert(nir_dest_num_components(intr->dest) == 1); 2976 2977 zero = dxil_module_get_int32_const(&ctx->mod, 0); 2978 if (!zero) 2979 return false; 2980 2981 index = get_src(ctx, &intr->src[0], 0, nir_type_uint); 2982 if (!index) 2983 return false; 2984 2985 const struct dxil_value *ops[] = { ctx->sharedvars, zero, index }; 2986 const struct dxil_value *ptr, *retval; 2987 2988 ptr = dxil_emit_gep_inbounds(&ctx->mod, ops, ARRAY_SIZE(ops)); 2989 if (!ptr) 2990 return false; 2991 2992 retval = dxil_emit_load(&ctx->mod, ptr, align, false); 2993 if (!retval) 2994 return false; 2995 2996 store_dest(ctx, &intr->dest, 0, retval, nir_type_uint); 2997 return true; 2998} 2999 3000static bool 3001emit_load_scratch(struct ntd_context *ctx, nir_intrinsic_instr *intr) 3002{ 3003 const struct dxil_value *zero, *index; 3004 unsigned bit_size = nir_dest_bit_size(intr->dest); 3005 unsigned align = bit_size / 8; 3006 3007 /* All scratch mem accesses should have been lowered to scalar 32bit 3008 * accesses. 3009 */ 3010 assert(bit_size == 32); 3011 assert(nir_dest_num_components(intr->dest) == 1); 3012 3013 zero = dxil_module_get_int32_const(&ctx->mod, 0); 3014 if (!zero) 3015 return false; 3016 3017 index = get_src(ctx, &intr->src[0], 0, nir_type_uint); 3018 if (!index) 3019 return false; 3020 3021 const struct dxil_value *ops[] = { ctx->scratchvars, zero, index }; 3022 const struct dxil_value *ptr, *retval; 3023 3024 ptr = dxil_emit_gep_inbounds(&ctx->mod, ops, ARRAY_SIZE(ops)); 3025 if (!ptr) 3026 return false; 3027 3028 retval = dxil_emit_load(&ctx->mod, ptr, align, false); 3029 if (!retval) 3030 return false; 3031 3032 store_dest(ctx, &intr->dest, 0, retval, nir_type_uint); 3033 return true; 3034} 3035 3036static bool 3037emit_load_deref(struct ntd_context *ctx, nir_intrinsic_instr *intr) 3038{ 3039 assert(intr->src[0].is_ssa); 3040 nir_deref_instr *deref = nir_instr_as_deref(intr->src[0].ssa->parent_instr); 3041 nir_variable *var = nir_deref_instr_get_variable(deref); 3042 3043 switch (var->data.mode) { 3044 case nir_var_shader_in: 3045 if (glsl_type_is_array(var->type)) { 3046 if (var->data.compact) 3047 return emit_load_compact_input_array(ctx, intr, var, deref); 3048 else 3049 return emit_load_input_array(ctx, intr, var, &deref->arr.index); 3050 } 3051 return emit_load_input(ctx, intr, var); 3052 3053 default: 3054 unreachable("unsupported nir_variable_mode"); 3055 } 3056} 3057 3058static bool 3059emit_discard_if_with_value(struct ntd_context *ctx, const struct dxil_value *value) 3060{ 3061 const struct dxil_value *opcode = dxil_module_get_int32_const(&ctx->mod, DXIL_INTR_DISCARD); 3062 if (!opcode) 3063 return false; 3064 3065 const struct dxil_value *args[] = { 3066 opcode, 3067 value 3068 }; 3069 3070 const struct dxil_func *func = dxil_get_function(&ctx->mod, "dx.op.discard", DXIL_NONE); 3071 if (!func) 3072 return false; 3073 3074 return dxil_emit_call_void(&ctx->mod, func, args, ARRAY_SIZE(args)); 3075} 3076 3077static bool 3078emit_discard_if(struct ntd_context *ctx, nir_intrinsic_instr *intr) 3079{ 3080 const struct dxil_value *value = get_src(ctx, &intr->src[0], 0, nir_type_bool); 3081 if (!value) 3082 return false; 3083 3084 return emit_discard_if_with_value(ctx, value); 3085} 3086 3087static bool 3088emit_discard(struct ntd_context *ctx) 3089{ 3090 const struct dxil_value *value = dxil_module_get_int1_const(&ctx->mod, true); 3091 return emit_discard_if_with_value(ctx, value); 3092} 3093 3094static bool 3095emit_emit_vertex(struct ntd_context *ctx, nir_intrinsic_instr *intr) 3096{ 3097 const struct dxil_value *opcode = dxil_module_get_int32_const(&ctx->mod, DXIL_INTR_EMIT_STREAM); 3098 const struct dxil_value *stream_id = dxil_module_get_int8_const(&ctx->mod, nir_intrinsic_stream_id(intr)); 3099 if (!opcode || !stream_id) 3100 return false; 3101 3102 const struct dxil_value *args[] = { 3103 opcode, 3104 stream_id 3105 }; 3106 3107 const struct dxil_func *func = dxil_get_function(&ctx->mod, "dx.op.emitStream", DXIL_NONE); 3108 if (!func) 3109 return false; 3110 3111 return dxil_emit_call_void(&ctx->mod, func, args, ARRAY_SIZE(args)); 3112} 3113 3114static bool 3115emit_end_primitive(struct ntd_context *ctx, nir_intrinsic_instr *intr) 3116{ 3117 const struct dxil_value *opcode = dxil_module_get_int32_const(&ctx->mod, DXIL_INTR_CUT_STREAM); 3118 const struct dxil_value *stream_id = dxil_module_get_int8_const(&ctx->mod, nir_intrinsic_stream_id(intr)); 3119 if (!opcode || !stream_id) 3120 return false; 3121 3122 const struct dxil_value *args[] = { 3123 opcode, 3124 stream_id 3125 }; 3126 3127 const struct dxil_func *func = dxil_get_function(&ctx->mod, "dx.op.cutStream", DXIL_NONE); 3128 if (!func) 3129 return false; 3130 3131 return dxil_emit_call_void(&ctx->mod, func, args, ARRAY_SIZE(args)); 3132} 3133 3134static bool 3135emit_image_store(struct ntd_context *ctx, nir_intrinsic_instr *intr) 3136{ 3137 const struct dxil_value *handle; 3138 bool is_array = false; 3139 if (ctx->opts->vulkan_environment) { 3140 assert(intr->intrinsic == nir_intrinsic_image_deref_store); 3141 handle = get_src_ssa(ctx, intr->src[0].ssa, 0); 3142 is_array = glsl_sampler_type_is_array(nir_src_as_deref(intr->src[0])->type); 3143 } else { 3144 assert(intr->intrinsic == nir_intrinsic_image_store); 3145 int binding = nir_src_as_int(intr->src[0]); 3146 is_array = nir_intrinsic_image_array(intr); 3147 handle = ctx->uav_handles[binding]; 3148 } 3149 if (!handle) 3150 return false; 3151 3152 const struct dxil_value *int32_undef = get_int32_undef(&ctx->mod); 3153 if (!int32_undef) 3154 return false; 3155 3156 const struct dxil_value *coord[3] = { int32_undef, int32_undef, int32_undef }; 3157 enum glsl_sampler_dim image_dim = intr->intrinsic == nir_intrinsic_image_store ? 3158 nir_intrinsic_image_dim(intr) : 3159 glsl_get_sampler_dim(nir_src_as_deref(intr->src[0])->type); 3160 unsigned num_coords = glsl_get_sampler_dim_coordinate_components(image_dim); 3161 if (is_array) 3162 ++num_coords; 3163 3164 assert(num_coords <= nir_src_num_components(intr->src[1])); 3165 for (unsigned i = 0; i < num_coords; ++i) { 3166 coord[i] = get_src(ctx, &intr->src[1], i, nir_type_uint); 3167 if (!coord[i]) 3168 return false; 3169 } 3170 3171 nir_alu_type in_type = nir_intrinsic_src_type(intr); 3172 enum overload_type overload = get_overload(in_type, 32); 3173 3174 assert(nir_src_bit_size(intr->src[3]) == 32); 3175 unsigned num_components = nir_src_num_components(intr->src[3]); 3176 assert(num_components <= 4); 3177 const struct dxil_value *value[4]; 3178 for (unsigned i = 0; i < num_components; ++i) { 3179 value[i] = get_src(ctx, &intr->src[3], i, in_type); 3180 if (!value[i]) 3181 return false; 3182 } 3183 3184 for (int i = num_components; i < 4; ++i) 3185 value[i] = int32_undef; 3186 3187 const struct dxil_value *write_mask = 3188 dxil_module_get_int8_const(&ctx->mod, (1u << num_components) - 1); 3189 if (!write_mask) 3190 return false; 3191 3192 if (image_dim == GLSL_SAMPLER_DIM_BUF) { 3193 coord[1] = int32_undef; 3194 return emit_bufferstore_call(ctx, handle, coord, value, write_mask, overload); 3195 } else 3196 return emit_texturestore_call(ctx, handle, coord, value, write_mask, overload); 3197} 3198 3199static bool 3200emit_image_load(struct ntd_context *ctx, nir_intrinsic_instr *intr) 3201{ 3202 const struct dxil_value *handle; 3203 bool is_array = false; 3204 if (ctx->opts->vulkan_environment) { 3205 assert(intr->intrinsic == nir_intrinsic_image_deref_load); 3206 handle = get_src_ssa(ctx, intr->src[0].ssa, 0); 3207 is_array = glsl_sampler_type_is_array(nir_src_as_deref(intr->src[0])->type); 3208 } else { 3209 assert(intr->intrinsic == nir_intrinsic_image_load); 3210 int binding = nir_src_as_int(intr->src[0]); 3211 is_array = nir_intrinsic_image_array(intr); 3212 handle = ctx->uav_handles[binding]; 3213 } 3214 if (!handle) 3215 return false; 3216 3217 const struct dxil_value *int32_undef = get_int32_undef(&ctx->mod); 3218 if (!int32_undef) 3219 return false; 3220 3221 const struct dxil_value *coord[3] = { int32_undef, int32_undef, int32_undef }; 3222 enum glsl_sampler_dim image_dim = intr->intrinsic == nir_intrinsic_image_load ? 3223 nir_intrinsic_image_dim(intr) : 3224 glsl_get_sampler_dim(nir_src_as_deref(intr->src[0])->type); 3225 unsigned num_coords = glsl_get_sampler_dim_coordinate_components(image_dim); 3226 if (is_array) 3227 ++num_coords; 3228 3229 assert(num_coords <= nir_src_num_components(intr->src[1])); 3230 for (unsigned i = 0; i < num_coords; ++i) { 3231 coord[i] = get_src(ctx, &intr->src[1], i, nir_type_uint); 3232 if (!coord[i]) 3233 return false; 3234 } 3235 3236 nir_alu_type out_type = nir_intrinsic_dest_type(intr); 3237 enum overload_type overload = get_overload(out_type, 32); 3238 3239 const struct dxil_value *load_result; 3240 if (image_dim == GLSL_SAMPLER_DIM_BUF) { 3241 coord[1] = int32_undef; 3242 load_result = emit_bufferload_call(ctx, handle, coord, overload); 3243 } else 3244 load_result = emit_textureload_call(ctx, handle, coord, overload); 3245 3246 if (!load_result) 3247 return false; 3248 3249 assert(nir_dest_bit_size(intr->dest) == 32); 3250 unsigned num_components = nir_dest_num_components(intr->dest); 3251 assert(num_components <= 4); 3252 for (unsigned i = 0; i < num_components; ++i) { 3253 const struct dxil_value *component = dxil_emit_extractval(&ctx->mod, load_result, i); 3254 if (!component) 3255 return false; 3256 store_dest(ctx, &intr->dest, i, component, out_type); 3257 } 3258 3259 if (num_components > 1) 3260 ctx->mod.feats.typed_uav_load_additional_formats = true; 3261 3262 return true; 3263} 3264 3265struct texop_parameters { 3266 const struct dxil_value *tex; 3267 const struct dxil_value *sampler; 3268 const struct dxil_value *bias, *lod_or_sample, *min_lod; 3269 const struct dxil_value *coord[4], *offset[3], *dx[3], *dy[3]; 3270 const struct dxil_value *cmp; 3271 enum overload_type overload; 3272}; 3273 3274static const struct dxil_value * 3275emit_texture_size(struct ntd_context *ctx, struct texop_parameters *params) 3276{ 3277 const struct dxil_func *func = dxil_get_function(&ctx->mod, "dx.op.getDimensions", DXIL_NONE); 3278 if (!func) 3279 return false; 3280 3281 const struct dxil_value *args[] = { 3282 dxil_module_get_int32_const(&ctx->mod, DXIL_INTR_TEXTURE_SIZE), 3283 params->tex, 3284 params->lod_or_sample 3285 }; 3286 3287 return dxil_emit_call(&ctx->mod, func, args, ARRAY_SIZE(args)); 3288} 3289 3290static bool 3291emit_image_size(struct ntd_context *ctx, nir_intrinsic_instr *intr) 3292{ 3293 const struct dxil_value *handle; 3294 if (ctx->opts->vulkan_environment) { 3295 assert(intr->intrinsic == nir_intrinsic_image_deref_size); 3296 handle = get_src_ssa(ctx, intr->src[0].ssa, 0); 3297 } 3298 else { 3299 assert(intr->intrinsic == nir_intrinsic_image_size); 3300 int binding = nir_src_as_int(intr->src[0]); 3301 handle = ctx->uav_handles[binding]; 3302 } 3303 if (!handle) 3304 return false; 3305 3306 const struct dxil_value *lod = get_src(ctx, &intr->src[1], 0, nir_type_uint); 3307 if (!lod) 3308 return false; 3309 3310 struct texop_parameters params = { 3311 .tex = handle, 3312 .lod_or_sample = lod 3313 }; 3314 const struct dxil_value *dimensions = emit_texture_size(ctx, ¶ms); 3315 if (!dimensions) 3316 return false; 3317 3318 for (unsigned i = 0; i < nir_dest_num_components(intr->dest); ++i) { 3319 const struct dxil_value *retval = dxil_emit_extractval(&ctx->mod, dimensions, i); 3320 store_dest(ctx, &intr->dest, i, retval, nir_type_uint); 3321 } 3322 3323 return true; 3324} 3325 3326static bool 3327emit_get_ssbo_size(struct ntd_context *ctx, nir_intrinsic_instr *intr) 3328{ 3329 const struct dxil_value* handle = NULL; 3330 if (ctx->opts->vulkan_environment) { 3331 handle = get_src_ssa(ctx, intr->src[0].ssa, 0); 3332 } else { 3333 int binding = nir_src_as_int(intr->src[0]); 3334 handle = ctx->uav_handles[binding]; 3335 } 3336 3337 if (!handle) 3338 return false; 3339 3340 struct texop_parameters params = { 3341 .tex = handle, 3342 .lod_or_sample = dxil_module_get_undef( 3343 &ctx->mod, dxil_module_get_int_type(&ctx->mod, 32)) 3344 }; 3345 3346 const struct dxil_value *dimensions = emit_texture_size(ctx, ¶ms); 3347 if (!dimensions) 3348 return false; 3349 3350 const struct dxil_value *retval = dxil_emit_extractval(&ctx->mod, dimensions, 0); 3351 store_dest(ctx, &intr->dest, 0, retval, nir_type_uint); 3352 3353 return true; 3354} 3355 3356static bool 3357emit_ssbo_atomic(struct ntd_context *ctx, nir_intrinsic_instr *intr, 3358 enum dxil_atomic_op op, nir_alu_type type) 3359{ 3360 const struct dxil_value* handle = get_ubo_ssbo_handle(ctx, &intr->src[0], DXIL_RESOURCE_CLASS_UAV, 0); 3361 const struct dxil_value *offset = 3362 get_src(ctx, &intr->src[1], 0, nir_type_uint); 3363 const struct dxil_value *value = 3364 get_src(ctx, &intr->src[2], 0, type); 3365 3366 if (!value || !handle || !offset) 3367 return false; 3368 3369 const struct dxil_value *int32_undef = get_int32_undef(&ctx->mod); 3370 if (!int32_undef) 3371 return false; 3372 3373 const struct dxil_value *coord[3] = { 3374 offset, int32_undef, int32_undef 3375 }; 3376 3377 const struct dxil_value *retval = 3378 emit_atomic_binop(ctx, handle, op, coord, value); 3379 3380 if (!retval) 3381 return false; 3382 3383 store_dest(ctx, &intr->dest, 0, retval, type); 3384 return true; 3385} 3386 3387static bool 3388emit_ssbo_atomic_comp_swap(struct ntd_context *ctx, nir_intrinsic_instr *intr) 3389{ 3390 const struct dxil_value* handle = get_ubo_ssbo_handle(ctx, &intr->src[0], DXIL_RESOURCE_CLASS_UAV, 0); 3391 const struct dxil_value *offset = 3392 get_src(ctx, &intr->src[1], 0, nir_type_uint); 3393 const struct dxil_value *cmpval = 3394 get_src(ctx, &intr->src[2], 0, nir_type_int); 3395 const struct dxil_value *newval = 3396 get_src(ctx, &intr->src[3], 0, nir_type_int); 3397 3398 if (!cmpval || !newval || !handle || !offset) 3399 return false; 3400 3401 const struct dxil_value *int32_undef = get_int32_undef(&ctx->mod); 3402 if (!int32_undef) 3403 return false; 3404 3405 const struct dxil_value *coord[3] = { 3406 offset, int32_undef, int32_undef 3407 }; 3408 3409 const struct dxil_value *retval = 3410 emit_atomic_cmpxchg(ctx, handle, coord, cmpval, newval); 3411 3412 if (!retval) 3413 return false; 3414 3415 store_dest(ctx, &intr->dest, 0, retval, nir_type_int); 3416 return true; 3417} 3418 3419static bool 3420emit_shared_atomic(struct ntd_context *ctx, nir_intrinsic_instr *intr, 3421 enum dxil_rmw_op op, nir_alu_type type) 3422{ 3423 const struct dxil_value *zero, *index; 3424 3425 assert(nir_src_bit_size(intr->src[1]) == 32); 3426 3427 zero = dxil_module_get_int32_const(&ctx->mod, 0); 3428 if (!zero) 3429 return false; 3430 3431 index = get_src(ctx, &intr->src[0], 0, nir_type_uint); 3432 if (!index) 3433 return false; 3434 3435 const struct dxil_value *ops[] = { ctx->sharedvars, zero, index }; 3436 const struct dxil_value *ptr, *value, *retval; 3437 3438 ptr = dxil_emit_gep_inbounds(&ctx->mod, ops, ARRAY_SIZE(ops)); 3439 if (!ptr) 3440 return false; 3441 3442 value = get_src(ctx, &intr->src[1], 0, type); 3443 if (!value) 3444 return false; 3445 3446 retval = dxil_emit_atomicrmw(&ctx->mod, value, ptr, op, false, 3447 DXIL_ATOMIC_ORDERING_ACQREL, 3448 DXIL_SYNC_SCOPE_CROSSTHREAD); 3449 if (!retval) 3450 return false; 3451 3452 store_dest(ctx, &intr->dest, 0, retval, type); 3453 return true; 3454} 3455 3456static bool 3457emit_shared_atomic_comp_swap(struct ntd_context *ctx, nir_intrinsic_instr *intr) 3458{ 3459 const struct dxil_value *zero, *index; 3460 3461 assert(nir_src_bit_size(intr->src[1]) == 32); 3462 3463 zero = dxil_module_get_int32_const(&ctx->mod, 0); 3464 if (!zero) 3465 return false; 3466 3467 index = get_src(ctx, &intr->src[0], 0, nir_type_uint); 3468 if (!index) 3469 return false; 3470 3471 const struct dxil_value *ops[] = { ctx->sharedvars, zero, index }; 3472 const struct dxil_value *ptr, *cmpval, *newval, *retval; 3473 3474 ptr = dxil_emit_gep_inbounds(&ctx->mod, ops, ARRAY_SIZE(ops)); 3475 if (!ptr) 3476 return false; 3477 3478 cmpval = get_src(ctx, &intr->src[1], 0, nir_type_uint); 3479 newval = get_src(ctx, &intr->src[2], 0, nir_type_uint); 3480 if (!cmpval || !newval) 3481 return false; 3482 3483 retval = dxil_emit_cmpxchg(&ctx->mod, cmpval, newval, ptr, false, 3484 DXIL_ATOMIC_ORDERING_ACQREL, 3485 DXIL_SYNC_SCOPE_CROSSTHREAD); 3486 if (!retval) 3487 return false; 3488 3489 store_dest(ctx, &intr->dest, 0, retval, nir_type_uint); 3490 return true; 3491} 3492 3493static bool 3494emit_vulkan_resource_index(struct ntd_context *ctx, nir_intrinsic_instr *intr) 3495{ 3496 unsigned int binding = nir_intrinsic_binding(intr); 3497 3498 bool const_index = nir_src_is_const(intr->src[0]); 3499 if (const_index) { 3500 binding += nir_src_as_const_value(intr->src[0])->u32; 3501 } 3502 3503 const struct dxil_value *index_value = dxil_module_get_int32_const(&ctx->mod, binding); 3504 if (!index_value) 3505 return false; 3506 3507 if (!const_index) { 3508 const struct dxil_value *offset = get_src(ctx, &intr->src[0], 0, nir_type_uint32); 3509 if (!offset) 3510 return false; 3511 3512 index_value = dxil_emit_binop(&ctx->mod, DXIL_BINOP_ADD, index_value, offset, 0); 3513 if (!index_value) 3514 return false; 3515 } 3516 3517 store_dest(ctx, &intr->dest, 0, index_value, nir_type_uint32); 3518 store_dest(ctx, &intr->dest, 1, dxil_module_get_int32_const(&ctx->mod, 0), nir_type_uint32); 3519 return true; 3520} 3521 3522static bool 3523emit_load_vulkan_descriptor(struct ntd_context *ctx, nir_intrinsic_instr *intr) 3524{ 3525 nir_intrinsic_instr* index = nir_src_as_intrinsic(intr->src[0]); 3526 /* We currently do not support reindex */ 3527 assert(index && index->intrinsic == nir_intrinsic_vulkan_resource_index); 3528 3529 unsigned binding = nir_intrinsic_binding(index); 3530 unsigned space = nir_intrinsic_desc_set(index); 3531 3532 /* The descriptor_set field for variables is only 5 bits. We shouldn't have intrinsics trying to go beyond that. */ 3533 assert(space < 32); 3534 3535 nir_variable *var = nir_get_binding_variable(ctx->shader, nir_chase_binding(intr->src[0])); 3536 3537 const struct dxil_value *handle = NULL; 3538 enum dxil_resource_class resource_class; 3539 3540 switch (nir_intrinsic_desc_type(intr)) { 3541 case VK_DESCRIPTOR_TYPE_UNIFORM_BUFFER: 3542 resource_class = DXIL_RESOURCE_CLASS_CBV; 3543 break; 3544 case VK_DESCRIPTOR_TYPE_STORAGE_BUFFER: 3545 if (var->data.access & ACCESS_NON_WRITEABLE) 3546 resource_class = DXIL_RESOURCE_CLASS_SRV; 3547 else 3548 resource_class = DXIL_RESOURCE_CLASS_UAV; 3549 break; 3550 default: 3551 unreachable("unknown descriptor type"); 3552 return false; 3553 } 3554 3555 const struct dxil_value *index_value = get_src(ctx, &intr->src[0], 0, nir_type_uint32); 3556 if (!index_value) 3557 return false; 3558 3559 handle = emit_createhandle_call(ctx, resource_class, 3560 get_resource_id(ctx, resource_class, space, binding), 3561 index_value, false); 3562 3563 store_dest_value(ctx, &intr->dest, 0, handle); 3564 store_dest(ctx, &intr->dest, 1, get_src(ctx, &intr->src[0], 1, nir_type_uint32), nir_type_uint32); 3565 3566 return true; 3567} 3568 3569static bool 3570emit_intrinsic(struct ntd_context *ctx, nir_intrinsic_instr *intr) 3571{ 3572 switch (intr->intrinsic) { 3573 case nir_intrinsic_load_global_invocation_id: 3574 case nir_intrinsic_load_global_invocation_id_zero_base: 3575 return emit_load_global_invocation_id(ctx, intr); 3576 case nir_intrinsic_load_local_invocation_id: 3577 return emit_load_local_invocation_id(ctx, intr); 3578 case nir_intrinsic_load_local_invocation_index: 3579 return emit_load_local_invocation_index(ctx, intr); 3580 case nir_intrinsic_load_workgroup_id: 3581 case nir_intrinsic_load_workgroup_id_zero_base: 3582 return emit_load_local_workgroup_id(ctx, intr); 3583 case nir_intrinsic_load_ssbo: 3584 return emit_load_ssbo(ctx, intr); 3585 case nir_intrinsic_store_ssbo: 3586 return emit_store_ssbo(ctx, intr); 3587 case nir_intrinsic_store_ssbo_masked_dxil: 3588 return emit_store_ssbo_masked(ctx, intr); 3589 case nir_intrinsic_store_deref: 3590 return emit_store_deref(ctx, intr); 3591 case nir_intrinsic_store_shared_dxil: 3592 case nir_intrinsic_store_shared_masked_dxil: 3593 return emit_store_shared(ctx, intr); 3594 case nir_intrinsic_store_scratch_dxil: 3595 return emit_store_scratch(ctx, intr); 3596 case nir_intrinsic_load_deref: 3597 return emit_load_deref(ctx, intr); 3598 case nir_intrinsic_load_ptr_dxil: 3599 return emit_load_ptr(ctx, intr); 3600 case nir_intrinsic_load_ubo: 3601 return emit_load_ubo(ctx, intr); 3602 case nir_intrinsic_load_ubo_dxil: 3603 return emit_load_ubo_dxil(ctx, intr); 3604 case nir_intrinsic_load_front_face: 3605 return emit_load_input_interpolated(ctx, intr, 3606 ctx->system_value[SYSTEM_VALUE_FRONT_FACE]); 3607 case nir_intrinsic_load_vertex_id_zero_base: 3608 return emit_load_input_interpolated(ctx, intr, 3609 ctx->system_value[SYSTEM_VALUE_VERTEX_ID_ZERO_BASE]); 3610 case nir_intrinsic_load_instance_id: 3611 return emit_load_input_interpolated(ctx, intr, 3612 ctx->system_value[SYSTEM_VALUE_INSTANCE_ID]); 3613 case nir_intrinsic_load_primitive_id: 3614 return emit_load_unary_external_function(ctx, intr, "dx.op.primitiveID", 3615 DXIL_INTR_PRIMITIVE_ID); 3616 case nir_intrinsic_load_sample_id: 3617 return emit_load_unary_external_function(ctx, intr, "dx.op.sampleIndex", 3618 DXIL_INTR_SAMPLE_INDEX); 3619 case nir_intrinsic_load_shared_dxil: 3620 return emit_load_shared(ctx, intr); 3621 case nir_intrinsic_load_scratch_dxil: 3622 return emit_load_scratch(ctx, intr); 3623 case nir_intrinsic_discard_if: 3624 return emit_discard_if(ctx, intr); 3625 case nir_intrinsic_discard: 3626 return emit_discard(ctx); 3627 case nir_intrinsic_emit_vertex: 3628 return emit_emit_vertex(ctx, intr); 3629 case nir_intrinsic_end_primitive: 3630 return emit_end_primitive(ctx, intr); 3631 case nir_intrinsic_scoped_barrier: 3632 return emit_barrier(ctx, intr); 3633 case nir_intrinsic_ssbo_atomic_add: 3634 return emit_ssbo_atomic(ctx, intr, DXIL_ATOMIC_ADD, nir_type_int); 3635 case nir_intrinsic_ssbo_atomic_imin: 3636 return emit_ssbo_atomic(ctx, intr, DXIL_ATOMIC_IMIN, nir_type_int); 3637 case nir_intrinsic_ssbo_atomic_umin: 3638 return emit_ssbo_atomic(ctx, intr, DXIL_ATOMIC_UMIN, nir_type_uint); 3639 case nir_intrinsic_ssbo_atomic_imax: 3640 return emit_ssbo_atomic(ctx, intr, DXIL_ATOMIC_IMAX, nir_type_int); 3641 case nir_intrinsic_ssbo_atomic_umax: 3642 return emit_ssbo_atomic(ctx, intr, DXIL_ATOMIC_UMAX, nir_type_uint); 3643 case nir_intrinsic_ssbo_atomic_and: 3644 return emit_ssbo_atomic(ctx, intr, DXIL_ATOMIC_AND, nir_type_uint); 3645 case nir_intrinsic_ssbo_atomic_or: 3646 return emit_ssbo_atomic(ctx, intr, DXIL_ATOMIC_OR, nir_type_uint); 3647 case nir_intrinsic_ssbo_atomic_xor: 3648 return emit_ssbo_atomic(ctx, intr, DXIL_ATOMIC_XOR, nir_type_uint); 3649 case nir_intrinsic_ssbo_atomic_exchange: 3650 return emit_ssbo_atomic(ctx, intr, DXIL_ATOMIC_EXCHANGE, nir_type_int); 3651 case nir_intrinsic_ssbo_atomic_comp_swap: 3652 return emit_ssbo_atomic_comp_swap(ctx, intr); 3653 case nir_intrinsic_shared_atomic_add_dxil: 3654 return emit_shared_atomic(ctx, intr, DXIL_RMWOP_ADD, nir_type_int); 3655 case nir_intrinsic_shared_atomic_imin_dxil: 3656 return emit_shared_atomic(ctx, intr, DXIL_RMWOP_MIN, nir_type_int); 3657 case nir_intrinsic_shared_atomic_umin_dxil: 3658 return emit_shared_atomic(ctx, intr, DXIL_RMWOP_UMIN, nir_type_uint); 3659 case nir_intrinsic_shared_atomic_imax_dxil: 3660 return emit_shared_atomic(ctx, intr, DXIL_RMWOP_MAX, nir_type_int); 3661 case nir_intrinsic_shared_atomic_umax_dxil: 3662 return emit_shared_atomic(ctx, intr, DXIL_RMWOP_UMAX, nir_type_uint); 3663 case nir_intrinsic_shared_atomic_and_dxil: 3664 return emit_shared_atomic(ctx, intr, DXIL_RMWOP_AND, nir_type_uint); 3665 case nir_intrinsic_shared_atomic_or_dxil: 3666 return emit_shared_atomic(ctx, intr, DXIL_RMWOP_OR, nir_type_uint); 3667 case nir_intrinsic_shared_atomic_xor_dxil: 3668 return emit_shared_atomic(ctx, intr, DXIL_RMWOP_XOR, nir_type_uint); 3669 case nir_intrinsic_shared_atomic_exchange_dxil: 3670 return emit_shared_atomic(ctx, intr, DXIL_RMWOP_XCHG, nir_type_int); 3671 case nir_intrinsic_shared_atomic_comp_swap_dxil: 3672 return emit_shared_atomic_comp_swap(ctx, intr); 3673 case nir_intrinsic_image_store: 3674 case nir_intrinsic_image_deref_store: 3675 return emit_image_store(ctx, intr); 3676 case nir_intrinsic_image_load: 3677 case nir_intrinsic_image_deref_load: 3678 return emit_image_load(ctx, intr); 3679 case nir_intrinsic_image_size: 3680 case nir_intrinsic_image_deref_size: 3681 return emit_image_size(ctx, intr); 3682 case nir_intrinsic_get_ssbo_size: 3683 return emit_get_ssbo_size(ctx, intr); 3684 3685 case nir_intrinsic_vulkan_resource_index: 3686 return emit_vulkan_resource_index(ctx, intr); 3687 case nir_intrinsic_load_vulkan_descriptor: 3688 return emit_load_vulkan_descriptor(ctx, intr); 3689 3690 case nir_intrinsic_load_num_workgroups: 3691 case nir_intrinsic_load_workgroup_size: 3692 default: 3693 NIR_INSTR_UNSUPPORTED(&intr->instr); 3694 assert("Unimplemented intrinsic instruction"); 3695 return false; 3696 } 3697} 3698 3699static bool 3700emit_load_const(struct ntd_context *ctx, nir_load_const_instr *load_const) 3701{ 3702 for (int i = 0; i < load_const->def.num_components; ++i) { 3703 const struct dxil_value *value; 3704 switch (load_const->def.bit_size) { 3705 case 1: 3706 value = dxil_module_get_int1_const(&ctx->mod, 3707 load_const->value[i].b); 3708 break; 3709 case 16: 3710 ctx->mod.feats.native_low_precision = true; 3711 value = dxil_module_get_int16_const(&ctx->mod, 3712 load_const->value[i].u16); 3713 break; 3714 case 32: 3715 value = dxil_module_get_int32_const(&ctx->mod, 3716 load_const->value[i].u32); 3717 break; 3718 case 64: 3719 ctx->mod.feats.int64_ops = true; 3720 value = dxil_module_get_int64_const(&ctx->mod, 3721 load_const->value[i].u64); 3722 break; 3723 default: 3724 unreachable("unexpected bit_size"); 3725 } 3726 if (!value) 3727 return false; 3728 3729 store_ssa_def(ctx, &load_const->def, i, value); 3730 } 3731 return true; 3732} 3733 3734static bool 3735emit_deref(struct ntd_context* ctx, nir_deref_instr* instr) 3736{ 3737 assert(instr->deref_type == nir_deref_type_var || 3738 instr->deref_type == nir_deref_type_array); 3739 3740 /* In the non-Vulkan environment, there's nothing to emit. Any references to 3741 * derefs will emit the necessary logic to handle scratch/shared GEP addressing 3742 */ 3743 if (!ctx->opts->vulkan_environment) 3744 return true; 3745 3746 /* In the Vulkan environment, we don't have cached handles for textures or 3747 * samplers, so let's use the opportunity of walking through the derefs to 3748 * emit those. 3749 */ 3750 nir_variable *var = nir_deref_instr_get_variable(instr); 3751 assert(var); 3752 3753 if (!glsl_type_is_sampler(glsl_without_array(var->type)) && 3754 !glsl_type_is_image(glsl_without_array(var->type))) 3755 return true; 3756 3757 const struct glsl_type *type = instr->type; 3758 const struct dxil_value *binding; 3759 3760 if (instr->deref_type == nir_deref_type_var) { 3761 binding = dxil_module_get_int32_const(&ctx->mod, var->data.binding); 3762 } else { 3763 const struct dxil_value *base = get_src(ctx, &instr->parent, 0, nir_type_uint32); 3764 const struct dxil_value *offset = get_src(ctx, &instr->arr.index, 0, nir_type_uint32); 3765 if (!base || !offset) 3766 return false; 3767 3768 binding = dxil_emit_binop(&ctx->mod, DXIL_BINOP_ADD, base, offset, 0); 3769 } 3770 3771 if (!binding) 3772 return false; 3773 3774 /* Haven't finished chasing the deref chain yet, just store the value */ 3775 if (glsl_type_is_array(type)) { 3776 store_dest(ctx, &instr->dest, 0, binding, nir_type_uint32); 3777 return true; 3778 } 3779 3780 assert(glsl_type_is_sampler(type) || glsl_type_is_image(type)); 3781 enum dxil_resource_class res_class; 3782 if (glsl_type_is_image(type)) 3783 res_class = DXIL_RESOURCE_CLASS_UAV; 3784 else if (glsl_get_sampler_result_type(type) == GLSL_TYPE_VOID) 3785 res_class = DXIL_RESOURCE_CLASS_SAMPLER; 3786 else 3787 res_class = DXIL_RESOURCE_CLASS_SRV; 3788 3789 const struct dxil_value *handle = emit_createhandle_call(ctx, res_class, 3790 get_resource_id(ctx, res_class, var->data.descriptor_set, var->data.binding), binding, false); 3791 if (!handle) 3792 return false; 3793 3794 store_dest_value(ctx, &instr->dest, 0, handle); 3795 return true; 3796} 3797 3798static bool 3799emit_cond_branch(struct ntd_context *ctx, const struct dxil_value *cond, 3800 int true_block, int false_block) 3801{ 3802 assert(cond); 3803 assert(true_block >= 0); 3804 assert(false_block >= 0); 3805 return dxil_emit_branch(&ctx->mod, cond, true_block, false_block); 3806} 3807 3808static bool 3809emit_branch(struct ntd_context *ctx, int block) 3810{ 3811 assert(block >= 0); 3812 return dxil_emit_branch(&ctx->mod, NULL, block, -1); 3813} 3814 3815static bool 3816emit_jump(struct ntd_context *ctx, nir_jump_instr *instr) 3817{ 3818 switch (instr->type) { 3819 case nir_jump_break: 3820 case nir_jump_continue: 3821 assert(instr->instr.block->successors[0]); 3822 assert(!instr->instr.block->successors[1]); 3823 return emit_branch(ctx, instr->instr.block->successors[0]->index); 3824 3825 default: 3826 unreachable("Unsupported jump type\n"); 3827 } 3828} 3829 3830struct phi_block { 3831 unsigned num_components; 3832 struct dxil_instr *comp[NIR_MAX_VEC_COMPONENTS]; 3833}; 3834 3835static bool 3836emit_phi(struct ntd_context *ctx, nir_phi_instr *instr) 3837{ 3838 unsigned bit_size = nir_dest_bit_size(instr->dest); 3839 const struct dxil_type *type = dxil_module_get_int_type(&ctx->mod, 3840 bit_size); 3841 3842 struct phi_block *vphi = ralloc(ctx->phis, struct phi_block); 3843 vphi->num_components = nir_dest_num_components(instr->dest); 3844 3845 for (unsigned i = 0; i < vphi->num_components; ++i) { 3846 struct dxil_instr *phi = vphi->comp[i] = dxil_emit_phi(&ctx->mod, type); 3847 if (!phi) 3848 return false; 3849 store_dest_value(ctx, &instr->dest, i, dxil_instr_get_return_value(phi)); 3850 } 3851 _mesa_hash_table_insert(ctx->phis, instr, vphi); 3852 return true; 3853} 3854 3855static void 3856fixup_phi(struct ntd_context *ctx, nir_phi_instr *instr, 3857 struct phi_block *vphi) 3858{ 3859 const struct dxil_value *values[128]; 3860 unsigned blocks[128]; 3861 for (unsigned i = 0; i < vphi->num_components; ++i) { 3862 size_t num_incoming = 0; 3863 nir_foreach_phi_src(src, instr) { 3864 assert(src->src.is_ssa); 3865 const struct dxil_value *val = get_src_ssa(ctx, src->src.ssa, i); 3866 assert(num_incoming < ARRAY_SIZE(values)); 3867 values[num_incoming] = val; 3868 assert(num_incoming < ARRAY_SIZE(blocks)); 3869 blocks[num_incoming] = src->pred->index; 3870 ++num_incoming; 3871 } 3872 dxil_phi_set_incoming(vphi->comp[i], values, blocks, num_incoming); 3873 } 3874} 3875 3876static unsigned 3877get_n_src(struct ntd_context *ctx, const struct dxil_value **values, 3878 unsigned max_components, nir_tex_src *src, nir_alu_type type) 3879{ 3880 unsigned num_components = nir_src_num_components(src->src); 3881 unsigned i = 0; 3882 3883 assert(num_components <= max_components); 3884 3885 for (i = 0; i < num_components; ++i) { 3886 values[i] = get_src(ctx, &src->src, i, type); 3887 if (!values[i]) 3888 return 0; 3889 } 3890 3891 return num_components; 3892} 3893 3894#define PAD_SRC(ctx, array, components, undef) \ 3895 for (unsigned i = components; i < ARRAY_SIZE(array); ++i) { \ 3896 array[i] = undef; \ 3897 } 3898 3899static const struct dxil_value * 3900emit_sample(struct ntd_context *ctx, struct texop_parameters *params) 3901{ 3902 const struct dxil_func *func = dxil_get_function(&ctx->mod, "dx.op.sample", params->overload); 3903 if (!func) 3904 return NULL; 3905 3906 const struct dxil_value *args[11] = { 3907 dxil_module_get_int32_const(&ctx->mod, DXIL_INTR_SAMPLE), 3908 params->tex, params->sampler, 3909 params->coord[0], params->coord[1], params->coord[2], params->coord[3], 3910 params->offset[0], params->offset[1], params->offset[2], 3911 params->min_lod 3912 }; 3913 3914 return dxil_emit_call(&ctx->mod, func, args, ARRAY_SIZE(args)); 3915} 3916 3917static const struct dxil_value * 3918emit_sample_bias(struct ntd_context *ctx, struct texop_parameters *params) 3919{ 3920 const struct dxil_func *func = dxil_get_function(&ctx->mod, "dx.op.sampleBias", params->overload); 3921 if (!func) 3922 return NULL; 3923 3924 assert(params->bias != NULL); 3925 3926 const struct dxil_value *args[12] = { 3927 dxil_module_get_int32_const(&ctx->mod, DXIL_INTR_SAMPLE_BIAS), 3928 params->tex, params->sampler, 3929 params->coord[0], params->coord[1], params->coord[2], params->coord[3], 3930 params->offset[0], params->offset[1], params->offset[2], 3931 params->bias, params->min_lod 3932 }; 3933 3934 return dxil_emit_call(&ctx->mod, func, args, ARRAY_SIZE(args)); 3935} 3936 3937static const struct dxil_value * 3938emit_sample_level(struct ntd_context *ctx, struct texop_parameters *params) 3939{ 3940 const struct dxil_func *func = dxil_get_function(&ctx->mod, "dx.op.sampleLevel", params->overload); 3941 if (!func) 3942 return NULL; 3943 3944 assert(params->lod_or_sample != NULL); 3945 3946 const struct dxil_value *args[11] = { 3947 dxil_module_get_int32_const(&ctx->mod, DXIL_INTR_SAMPLE_LEVEL), 3948 params->tex, params->sampler, 3949 params->coord[0], params->coord[1], params->coord[2], params->coord[3], 3950 params->offset[0], params->offset[1], params->offset[2], 3951 params->lod_or_sample 3952 }; 3953 3954 return dxil_emit_call(&ctx->mod, func, args, ARRAY_SIZE(args)); 3955} 3956 3957static const struct dxil_value * 3958emit_sample_cmp(struct ntd_context *ctx, struct texop_parameters *params) 3959{ 3960 const struct dxil_func *func; 3961 enum dxil_intr opcode; 3962 int numparam; 3963 3964 if (ctx->mod.shader_kind == DXIL_PIXEL_SHADER) { 3965 func = dxil_get_function(&ctx->mod, "dx.op.sampleCmp", DXIL_F32); 3966 opcode = DXIL_INTR_SAMPLE_CMP; 3967 numparam = 12; 3968 } else { 3969 func = dxil_get_function(&ctx->mod, "dx.op.sampleCmpLevelZero", DXIL_F32); 3970 opcode = DXIL_INTR_SAMPLE_CMP_LVL_ZERO; 3971 numparam = 11; 3972 } 3973 3974 if (!func) 3975 return NULL; 3976 3977 const struct dxil_value *args[12] = { 3978 dxil_module_get_int32_const(&ctx->mod, opcode), 3979 params->tex, params->sampler, 3980 params->coord[0], params->coord[1], params->coord[2], params->coord[3], 3981 params->offset[0], params->offset[1], params->offset[2], 3982 params->cmp, params->min_lod 3983 }; 3984 3985 return dxil_emit_call(&ctx->mod, func, args, numparam); 3986} 3987 3988static const struct dxil_value * 3989emit_sample_grad(struct ntd_context *ctx, struct texop_parameters *params) 3990{ 3991 const struct dxil_func *func = dxil_get_function(&ctx->mod, "dx.op.sampleGrad", params->overload); 3992 if (!func) 3993 return false; 3994 3995 const struct dxil_value *args[17] = { 3996 dxil_module_get_int32_const(&ctx->mod, DXIL_INTR_SAMPLE_GRAD), 3997 params->tex, params->sampler, 3998 params->coord[0], params->coord[1], params->coord[2], params->coord[3], 3999 params->offset[0], params->offset[1], params->offset[2], 4000 params->dx[0], params->dx[1], params->dx[2], 4001 params->dy[0], params->dy[1], params->dy[2], 4002 params->min_lod 4003 }; 4004 4005 return dxil_emit_call(&ctx->mod, func, args, ARRAY_SIZE(args)); 4006} 4007 4008static const struct dxil_value * 4009emit_texel_fetch(struct ntd_context *ctx, struct texop_parameters *params) 4010{ 4011 const struct dxil_func *func = dxil_get_function(&ctx->mod, "dx.op.textureLoad", params->overload); 4012 if (!func) 4013 return false; 4014 4015 if (!params->lod_or_sample) 4016 params->lod_or_sample = dxil_module_get_undef(&ctx->mod, dxil_module_get_int_type(&ctx->mod, 32)); 4017 4018 const struct dxil_value *args[] = { 4019 dxil_module_get_int32_const(&ctx->mod, DXIL_INTR_TEXTURE_LOAD), 4020 params->tex, 4021 params->lod_or_sample, params->coord[0], params->coord[1], params->coord[2], 4022 params->offset[0], params->offset[1], params->offset[2] 4023 }; 4024 4025 return dxil_emit_call(&ctx->mod, func, args, ARRAY_SIZE(args)); 4026} 4027 4028static const struct dxil_value * 4029emit_texture_lod(struct ntd_context *ctx, struct texop_parameters *params) 4030{ 4031 const struct dxil_func *func = dxil_get_function(&ctx->mod, "dx.op.calculateLOD", DXIL_F32); 4032 if (!func) 4033 return false; 4034 4035 const struct dxil_value *args[] = { 4036 dxil_module_get_int32_const(&ctx->mod, DXIL_INTR_TEXTURE_LOD), 4037 params->tex, 4038 params->sampler, 4039 params->coord[0], 4040 params->coord[1], 4041 params->coord[2], 4042 dxil_module_get_int1_const(&ctx->mod, 1) 4043 }; 4044 4045 return dxil_emit_call(&ctx->mod, func, args, ARRAY_SIZE(args)); 4046} 4047 4048static bool 4049emit_tex(struct ntd_context *ctx, nir_tex_instr *instr) 4050{ 4051 struct texop_parameters params; 4052 memset(¶ms, 0, sizeof(struct texop_parameters)); 4053 if (!ctx->opts->vulkan_environment) { 4054 params.tex = ctx->srv_handles[instr->texture_index]; 4055 params.sampler = ctx->sampler_handles[instr->sampler_index]; 4056 } 4057 4058 const struct dxil_type *int_type = dxil_module_get_int_type(&ctx->mod, 32); 4059 const struct dxil_type *float_type = dxil_module_get_float_type(&ctx->mod, 32); 4060 const struct dxil_value *int_undef = dxil_module_get_undef(&ctx->mod, int_type); 4061 const struct dxil_value *float_undef = dxil_module_get_undef(&ctx->mod, float_type); 4062 4063 unsigned coord_components = 0, offset_components = 0, dx_components = 0, dy_components = 0; 4064 params.overload = get_overload(instr->dest_type, 32); 4065 4066 for (unsigned i = 0; i < instr->num_srcs; i++) { 4067 nir_alu_type type = nir_tex_instr_src_type(instr, i); 4068 4069 switch (instr->src[i].src_type) { 4070 case nir_tex_src_coord: 4071 coord_components = get_n_src(ctx, params.coord, ARRAY_SIZE(params.coord), 4072 &instr->src[i], type); 4073 if (!coord_components) 4074 return false; 4075 break; 4076 4077 case nir_tex_src_offset: 4078 offset_components = get_n_src(ctx, params.offset, ARRAY_SIZE(params.offset), 4079 &instr->src[i], nir_type_int); 4080 if (!offset_components) 4081 return false; 4082 break; 4083 4084 case nir_tex_src_bias: 4085 assert(instr->op == nir_texop_txb); 4086 assert(nir_src_num_components(instr->src[i].src) == 1); 4087 params.bias = get_src(ctx, &instr->src[i].src, 0, nir_type_float); 4088 if (!params.bias) 4089 return false; 4090 break; 4091 4092 case nir_tex_src_lod: 4093 assert(nir_src_num_components(instr->src[i].src) == 1); 4094 /* Buffers don't have a LOD */ 4095 if (instr->sampler_dim != GLSL_SAMPLER_DIM_BUF) 4096 params.lod_or_sample = get_src(ctx, &instr->src[i].src, 0, type); 4097 else 4098 params.lod_or_sample = int_undef; 4099 if (!params.lod_or_sample) 4100 return false; 4101 break; 4102 4103 case nir_tex_src_min_lod: 4104 assert(nir_src_num_components(instr->src[i].src) == 1); 4105 params.min_lod = get_src(ctx, &instr->src[i].src, 0, type); 4106 if (!params.min_lod) 4107 return false; 4108 break; 4109 4110 case nir_tex_src_comparator: 4111 assert(nir_src_num_components(instr->src[i].src) == 1); 4112 params.cmp = get_src(ctx, &instr->src[i].src, 0, nir_type_float); 4113 if (!params.cmp) 4114 return false; 4115 break; 4116 4117 case nir_tex_src_ddx: 4118 dx_components = get_n_src(ctx, params.dx, ARRAY_SIZE(params.dx), 4119 &instr->src[i], nir_type_float); 4120 if (!dx_components) 4121 return false; 4122 break; 4123 4124 case nir_tex_src_ddy: 4125 dy_components = get_n_src(ctx, params.dy, ARRAY_SIZE(params.dy), 4126 &instr->src[i], nir_type_float); 4127 if (!dy_components) 4128 return false; 4129 break; 4130 4131 case nir_tex_src_ms_index: 4132 params.lod_or_sample = get_src(ctx, &instr->src[i].src, 0, nir_type_int); 4133 if (!params.lod_or_sample) 4134 return false; 4135 break; 4136 4137 case nir_tex_src_texture_deref: 4138 assert(ctx->opts->vulkan_environment); 4139 params.tex = get_src_ssa(ctx, instr->src[i].src.ssa, 0); 4140 break; 4141 4142 case nir_tex_src_sampler_deref: 4143 assert(ctx->opts->vulkan_environment); 4144 params.sampler = get_src_ssa(ctx, instr->src[i].src.ssa, 0); 4145 break; 4146 4147 case nir_tex_src_projector: 4148 unreachable("Texture projector should have been lowered"); 4149 4150 default: 4151 fprintf(stderr, "texture source: %d\n", instr->src[i].src_type); 4152 unreachable("unknown texture source"); 4153 } 4154 } 4155 4156 assert(params.tex != NULL); 4157 assert(instr->op == nir_texop_txf || 4158 instr->op == nir_texop_txf_ms || 4159 nir_tex_instr_is_query(instr) || 4160 params.sampler != NULL); 4161 4162 PAD_SRC(ctx, params.coord, coord_components, float_undef); 4163 PAD_SRC(ctx, params.offset, offset_components, int_undef); 4164 if (!params.min_lod) params.min_lod = float_undef; 4165 4166 const struct dxil_value *sample = NULL; 4167 switch (instr->op) { 4168 case nir_texop_txb: 4169 sample = emit_sample_bias(ctx, ¶ms); 4170 break; 4171 4172 case nir_texop_tex: 4173 if (params.cmp != NULL) { 4174 sample = emit_sample_cmp(ctx, ¶ms); 4175 break; 4176 } else if (ctx->mod.shader_kind == DXIL_PIXEL_SHADER) { 4177 sample = emit_sample(ctx, ¶ms); 4178 break; 4179 } 4180 params.lod_or_sample = dxil_module_get_float_const(&ctx->mod, 0); 4181 FALLTHROUGH; 4182 case nir_texop_txl: 4183 sample = emit_sample_level(ctx, ¶ms); 4184 break; 4185 4186 case nir_texop_txd: 4187 PAD_SRC(ctx, params.dx, dx_components, float_undef); 4188 PAD_SRC(ctx, params.dy, dy_components,float_undef); 4189 sample = emit_sample_grad(ctx, ¶ms); 4190 break; 4191 4192 case nir_texop_txf: 4193 case nir_texop_txf_ms: 4194 if (instr->sampler_dim == GLSL_SAMPLER_DIM_BUF) { 4195 params.coord[1] = int_undef; 4196 sample = emit_bufferload_call(ctx, params.tex, params.coord, params.overload); 4197 } else { 4198 PAD_SRC(ctx, params.coord, coord_components, int_undef); 4199 sample = emit_texel_fetch(ctx, ¶ms); 4200 } 4201 break; 4202 4203 case nir_texop_txs: 4204 sample = emit_texture_size(ctx, ¶ms); 4205 break; 4206 4207 case nir_texop_lod: 4208 sample = emit_texture_lod(ctx, ¶ms); 4209 store_dest(ctx, &instr->dest, 0, sample, nir_alu_type_get_base_type(instr->dest_type)); 4210 return true; 4211 4212 case nir_texop_query_levels: 4213 params.lod_or_sample = dxil_module_get_int_const(&ctx->mod, 0, 32); 4214 sample = emit_texture_size(ctx, ¶ms); 4215 const struct dxil_value *retval = dxil_emit_extractval(&ctx->mod, sample, 3); 4216 store_dest(ctx, &instr->dest, 0, retval, nir_alu_type_get_base_type(instr->dest_type)); 4217 return true; 4218 4219 default: 4220 fprintf(stderr, "texture op: %d\n", instr->op); 4221 unreachable("unknown texture op"); 4222 } 4223 4224 if (!sample) 4225 return false; 4226 4227 for (unsigned i = 0; i < nir_dest_num_components(instr->dest); ++i) { 4228 const struct dxil_value *retval = dxil_emit_extractval(&ctx->mod, sample, i); 4229 store_dest(ctx, &instr->dest, i, retval, nir_alu_type_get_base_type(instr->dest_type)); 4230 } 4231 4232 return true; 4233} 4234 4235static bool 4236emit_undefined(struct ntd_context *ctx, nir_ssa_undef_instr *undef) 4237{ 4238 for (unsigned i = 0; i < undef->def.num_components; ++i) 4239 store_ssa_def(ctx, &undef->def, i, dxil_module_get_int32_const(&ctx->mod, 0)); 4240 return true; 4241} 4242 4243static bool emit_instr(struct ntd_context *ctx, struct nir_instr* instr) 4244{ 4245 switch (instr->type) { 4246 case nir_instr_type_alu: 4247 return emit_alu(ctx, nir_instr_as_alu(instr)); 4248 case nir_instr_type_intrinsic: 4249 return emit_intrinsic(ctx, nir_instr_as_intrinsic(instr)); 4250 case nir_instr_type_load_const: 4251 return emit_load_const(ctx, nir_instr_as_load_const(instr)); 4252 case nir_instr_type_deref: 4253 return emit_deref(ctx, nir_instr_as_deref(instr)); 4254 case nir_instr_type_jump: 4255 return emit_jump(ctx, nir_instr_as_jump(instr)); 4256 case nir_instr_type_phi: 4257 return emit_phi(ctx, nir_instr_as_phi(instr)); 4258 case nir_instr_type_tex: 4259 return emit_tex(ctx, nir_instr_as_tex(instr)); 4260 case nir_instr_type_ssa_undef: 4261 return emit_undefined(ctx, nir_instr_as_ssa_undef(instr)); 4262 default: 4263 NIR_INSTR_UNSUPPORTED(instr); 4264 unreachable("Unimplemented instruction type"); 4265 return false; 4266 } 4267} 4268 4269 4270static bool 4271emit_block(struct ntd_context *ctx, struct nir_block *block) 4272{ 4273 assert(block->index < ctx->mod.num_basic_block_ids); 4274 ctx->mod.basic_block_ids[block->index] = ctx->mod.curr_block; 4275 4276 nir_foreach_instr(instr, block) { 4277 TRACE_CONVERSION(instr); 4278 4279 if (!emit_instr(ctx, instr)) { 4280 return false; 4281 } 4282 } 4283 return true; 4284} 4285 4286static bool 4287emit_cf_list(struct ntd_context *ctx, struct exec_list *list); 4288 4289static bool 4290emit_if(struct ntd_context *ctx, struct nir_if *if_stmt) 4291{ 4292 assert(nir_src_num_components(if_stmt->condition) == 1); 4293 const struct dxil_value *cond = get_src(ctx, &if_stmt->condition, 0, 4294 nir_type_bool); 4295 if (!cond) 4296 return false; 4297 4298 /* prepare blocks */ 4299 nir_block *then_block = nir_if_first_then_block(if_stmt); 4300 assert(nir_if_last_then_block(if_stmt)->successors[0]); 4301 assert(!nir_if_last_then_block(if_stmt)->successors[1]); 4302 int then_succ = nir_if_last_then_block(if_stmt)->successors[0]->index; 4303 4304 nir_block *else_block = NULL; 4305 int else_succ = -1; 4306 if (!exec_list_is_empty(&if_stmt->else_list)) { 4307 else_block = nir_if_first_else_block(if_stmt); 4308 assert(nir_if_last_else_block(if_stmt)->successors[0]); 4309 assert(!nir_if_last_else_block(if_stmt)->successors[1]); 4310 else_succ = nir_if_last_else_block(if_stmt)->successors[0]->index; 4311 } 4312 4313 if (!emit_cond_branch(ctx, cond, then_block->index, 4314 else_block ? else_block->index : then_succ)) 4315 return false; 4316 4317 /* handle then-block */ 4318 if (!emit_cf_list(ctx, &if_stmt->then_list) || 4319 (!nir_block_ends_in_jump(nir_if_last_then_block(if_stmt)) && 4320 !emit_branch(ctx, then_succ))) 4321 return false; 4322 4323 if (else_block) { 4324 /* handle else-block */ 4325 if (!emit_cf_list(ctx, &if_stmt->else_list) || 4326 (!nir_block_ends_in_jump(nir_if_last_else_block(if_stmt)) && 4327 !emit_branch(ctx, else_succ))) 4328 return false; 4329 } 4330 4331 return true; 4332} 4333 4334static bool 4335emit_loop(struct ntd_context *ctx, nir_loop *loop) 4336{ 4337 nir_block *first_block = nir_loop_first_block(loop); 4338 4339 assert(nir_loop_last_block(loop)->successors[0]); 4340 assert(!nir_loop_last_block(loop)->successors[1]); 4341 4342 if (!emit_branch(ctx, first_block->index)) 4343 return false; 4344 4345 if (!emit_cf_list(ctx, &loop->body)) 4346 return false; 4347 4348 if (!emit_branch(ctx, first_block->index)) 4349 return false; 4350 4351 return true; 4352} 4353 4354static bool 4355emit_cf_list(struct ntd_context *ctx, struct exec_list *list) 4356{ 4357 foreach_list_typed(nir_cf_node, node, node, list) { 4358 switch (node->type) { 4359 case nir_cf_node_block: 4360 if (!emit_block(ctx, nir_cf_node_as_block(node))) 4361 return false; 4362 break; 4363 4364 case nir_cf_node_if: 4365 if (!emit_if(ctx, nir_cf_node_as_if(node))) 4366 return false; 4367 break; 4368 4369 case nir_cf_node_loop: 4370 if (!emit_loop(ctx, nir_cf_node_as_loop(node))) 4371 return false; 4372 break; 4373 4374 default: 4375 unreachable("unsupported cf-list node"); 4376 break; 4377 } 4378 } 4379 return true; 4380} 4381 4382static void 4383insert_sorted_by_binding(struct exec_list *var_list, nir_variable *new_var) 4384{ 4385 nir_foreach_variable_in_list(var, var_list) { 4386 if (var->data.binding > new_var->data.binding) { 4387 exec_node_insert_node_before(&var->node, &new_var->node); 4388 return; 4389 } 4390 } 4391 exec_list_push_tail(var_list, &new_var->node); 4392} 4393 4394 4395static void 4396sort_uniforms_by_binding_and_remove_structs(nir_shader *s) 4397{ 4398 struct exec_list new_list; 4399 exec_list_make_empty(&new_list); 4400 4401 nir_foreach_variable_with_modes_safe(var, s, nir_var_uniform) { 4402 exec_node_remove(&var->node); 4403 const struct glsl_type *type = glsl_without_array(var->type); 4404 if (!glsl_type_is_struct(type)) 4405 insert_sorted_by_binding(&new_list, var); 4406 } 4407 exec_list_append(&s->variables, &new_list); 4408} 4409 4410static void 4411prepare_phi_values(struct ntd_context *ctx) 4412{ 4413 /* PHI nodes are difficult to get right when tracking the types: 4414 * Since the incoming sources are linked to blocks, we can't bitcast 4415 * on the fly while loading. So scan the shader and insert a typed dummy 4416 * value for each phi source, and when storing we convert if the incoming 4417 * value has a different type then the one expected by the phi node. 4418 * We choose int as default, because it supports more bit sizes. 4419 */ 4420 nir_foreach_function(function, ctx->shader) { 4421 if (function->impl) { 4422 nir_foreach_block(block, function->impl) { 4423 nir_foreach_instr(instr, block) { 4424 if (instr->type == nir_instr_type_phi) { 4425 nir_phi_instr *ir = nir_instr_as_phi(instr); 4426 unsigned bitsize = nir_dest_bit_size(ir->dest); 4427 const struct dxil_value *dummy = dxil_module_get_int_const(&ctx->mod, 0, bitsize); 4428 nir_foreach_phi_src(src, ir) { 4429 for(unsigned int i = 0; i < ir->dest.ssa.num_components; ++i) 4430 store_ssa_def(ctx, src->src.ssa, i, dummy); 4431 } 4432 } 4433 } 4434 } 4435 } 4436 } 4437} 4438 4439static bool 4440emit_cbvs(struct ntd_context *ctx) 4441{ 4442 if (ctx->shader->info.stage == MESA_SHADER_KERNEL || ctx->opts->vulkan_environment) { 4443 nir_foreach_variable_with_modes(var, ctx->shader, nir_var_mem_ubo) { 4444 if (!emit_ubo_var(ctx, var)) 4445 return false; 4446 } 4447 } else { 4448 for (int i = ctx->opts->ubo_binding_offset; i < ctx->shader->info.num_ubos; ++i) { 4449 char name[64]; 4450 snprintf(name, sizeof(name), "__ubo%d", i); 4451 if (!emit_cbv(ctx, i, 0, 16384 /*4096 vec4's*/, 1, name)) 4452 return false; 4453 } 4454 } 4455 4456 return true; 4457} 4458 4459static bool 4460emit_scratch(struct ntd_context *ctx) 4461{ 4462 if (ctx->shader->scratch_size) { 4463 /* 4464 * We always allocate an u32 array, no matter the actual variable types. 4465 * According to the DXIL spec, the minimum load/store granularity is 4466 * 32-bit, anything smaller requires using a read-extract/read-write-modify 4467 * approach. 4468 */ 4469 unsigned size = ALIGN_POT(ctx->shader->scratch_size, sizeof(uint32_t)); 4470 const struct dxil_type *int32 = dxil_module_get_int_type(&ctx->mod, 32); 4471 const struct dxil_value *array_length = dxil_module_get_int32_const(&ctx->mod, size / sizeof(uint32_t)); 4472 if (!int32 || !array_length) 4473 return false; 4474 4475 const struct dxil_type *type = dxil_module_get_array_type( 4476 &ctx->mod, int32, size / sizeof(uint32_t)); 4477 if (!type) 4478 return false; 4479 4480 ctx->scratchvars = dxil_emit_alloca(&ctx->mod, type, int32, array_length, 4); 4481 if (!ctx->scratchvars) 4482 return false; 4483 } 4484 4485 return true; 4486} 4487 4488/* The validator complains if we don't have ops that reference a global variable. */ 4489static bool 4490shader_has_shared_ops(struct nir_shader *s) 4491{ 4492 nir_foreach_function(func, s) { 4493 if (!func->impl) 4494 continue; 4495 nir_foreach_block(block, func->impl) { 4496 nir_foreach_instr(instr, block) { 4497 if (instr->type != nir_instr_type_intrinsic) 4498 continue; 4499 nir_intrinsic_instr *intrin = nir_instr_as_intrinsic(instr); 4500 switch (intrin->intrinsic) { 4501 case nir_intrinsic_load_shared_dxil: 4502 case nir_intrinsic_store_shared_dxil: 4503 case nir_intrinsic_shared_atomic_add_dxil: 4504 case nir_intrinsic_shared_atomic_and_dxil: 4505 case nir_intrinsic_shared_atomic_comp_swap_dxil: 4506 case nir_intrinsic_shared_atomic_exchange_dxil: 4507 case nir_intrinsic_shared_atomic_imax_dxil: 4508 case nir_intrinsic_shared_atomic_imin_dxil: 4509 case nir_intrinsic_shared_atomic_or_dxil: 4510 case nir_intrinsic_shared_atomic_umax_dxil: 4511 case nir_intrinsic_shared_atomic_umin_dxil: 4512 case nir_intrinsic_shared_atomic_xor_dxil: 4513 return true; 4514 default: break; 4515 } 4516 } 4517 } 4518 } 4519 return false; 4520} 4521 4522static bool 4523emit_module(struct ntd_context *ctx, const struct nir_to_dxil_options *opts) 4524{ 4525 /* The validator forces us to emit resources in a specific order: 4526 * CBVs, Samplers, SRVs, UAVs. While we are at it also remove 4527 * stale struct uniforms, they are lowered but might not have been removed */ 4528 sort_uniforms_by_binding_and_remove_structs(ctx->shader); 4529 4530 /* CBVs */ 4531 if (!emit_cbvs(ctx)) 4532 return false; 4533 4534 /* Samplers */ 4535 nir_foreach_variable_with_modes(var, ctx->shader, nir_var_uniform) { 4536 unsigned count = glsl_type_get_sampler_count(var->type); 4537 const struct glsl_type *without_array = glsl_without_array(var->type); 4538 if (var->data.mode == nir_var_uniform && glsl_type_is_sampler(without_array) && 4539 glsl_get_sampler_result_type(without_array) == GLSL_TYPE_VOID) { 4540 if (!emit_sampler(ctx, var, count)) 4541 return false; 4542 } 4543 } 4544 4545 /* SRVs */ 4546 nir_foreach_variable_with_modes(var, ctx->shader, nir_var_uniform) { 4547 unsigned count = glsl_type_get_sampler_count(var->type); 4548 const struct glsl_type *without_array = glsl_without_array(var->type); 4549 if (var->data.mode == nir_var_uniform && glsl_type_is_sampler(without_array) && 4550 glsl_get_sampler_result_type(without_array) != GLSL_TYPE_VOID) { 4551 if (!emit_srv(ctx, var, count)) 4552 return false; 4553 } 4554 } 4555 /* Handle read-only SSBOs as SRVs */ 4556 nir_foreach_variable_with_modes(var, ctx->shader, nir_var_mem_ssbo) { 4557 if ((var->data.access & ACCESS_NON_WRITEABLE) != 0) { 4558 unsigned count = 1; 4559 if (glsl_type_is_array(var->type)) 4560 count = glsl_get_length(var->type); 4561 if (!emit_srv(ctx, var, count)) 4562 return false; 4563 } 4564 } 4565 4566 if (ctx->shader->info.shared_size && shader_has_shared_ops(ctx->shader)) { 4567 const struct dxil_type *type; 4568 unsigned size; 4569 4570 /* 4571 * We always allocate an u32 array, no matter the actual variable types. 4572 * According to the DXIL spec, the minimum load/store granularity is 4573 * 32-bit, anything smaller requires using a read-extract/read-write-modify 4574 * approach. Non-atomic 64-bit accesses are allowed, but the 4575 * GEP(cast(gvar, u64[] *), offset) and cast(GEP(gvar, offset), u64 *)) 4576 * sequences don't seem to be accepted by the DXIL validator when the 4577 * pointer is in the groupshared address space, making the 32-bit -> 64-bit 4578 * pointer cast impossible. 4579 */ 4580 size = ALIGN_POT(ctx->shader->info.shared_size, sizeof(uint32_t)); 4581 type = dxil_module_get_array_type(&ctx->mod, 4582 dxil_module_get_int_type(&ctx->mod, 32), 4583 size / sizeof(uint32_t)); 4584 ctx->sharedvars = dxil_add_global_ptr_var(&ctx->mod, "shared", type, 4585 DXIL_AS_GROUPSHARED, 4586 ffs(sizeof(uint64_t)), 4587 NULL); 4588 } 4589 4590 if (!emit_scratch(ctx)) 4591 return false; 4592 4593 /* UAVs */ 4594 if (ctx->shader->info.stage == MESA_SHADER_KERNEL) { 4595 if (!emit_globals(ctx, opts->num_kernel_globals)) 4596 return false; 4597 4598 ctx->consts = _mesa_pointer_hash_table_create(ctx->ralloc_ctx); 4599 if (!ctx->consts) 4600 return false; 4601 if (!emit_global_consts(ctx)) 4602 return false; 4603 } else { 4604 /* Handle read/write SSBOs as UAVs */ 4605 nir_foreach_variable_with_modes(var, ctx->shader, nir_var_mem_ssbo) { 4606 if ((var->data.access & ACCESS_NON_WRITEABLE) == 0) { 4607 unsigned count = 1; 4608 if (glsl_type_is_array(var->type)) 4609 count = glsl_get_length(var->type); 4610 if (!emit_uav(ctx, var->data.binding, var->data.descriptor_set, 4611 count, DXIL_COMP_TYPE_INVALID, 4612 DXIL_RESOURCE_KIND_RAW_BUFFER, var->name)) 4613 return false; 4614 4615 } 4616 } 4617 } 4618 4619 nir_foreach_variable_with_modes(var, ctx->shader, nir_var_uniform) { 4620 if (var->data.mode == nir_var_uniform && glsl_type_is_image(glsl_without_array(var->type))) { 4621 if (!emit_uav_var(ctx, var, glsl_type_get_image_count(var->type))) 4622 return false; 4623 } 4624 } 4625 4626 nir_function_impl *entry = nir_shader_get_entrypoint(ctx->shader); 4627 nir_metadata_require(entry, nir_metadata_block_index); 4628 4629 assert(entry->num_blocks > 0); 4630 ctx->mod.basic_block_ids = rzalloc_array(ctx->ralloc_ctx, int, 4631 entry->num_blocks); 4632 if (!ctx->mod.basic_block_ids) 4633 return false; 4634 4635 for (int i = 0; i < entry->num_blocks; ++i) 4636 ctx->mod.basic_block_ids[i] = -1; 4637 ctx->mod.num_basic_block_ids = entry->num_blocks; 4638 4639 ctx->defs = rzalloc_array(ctx->ralloc_ctx, struct dxil_def, 4640 entry->ssa_alloc); 4641 if (!ctx->defs) 4642 return false; 4643 ctx->num_defs = entry->ssa_alloc; 4644 4645 ctx->phis = _mesa_pointer_hash_table_create(ctx->ralloc_ctx); 4646 if (!ctx->phis) 4647 return false; 4648 4649 prepare_phi_values(ctx); 4650 4651 if (!emit_cf_list(ctx, &entry->body)) 4652 return false; 4653 4654 hash_table_foreach(ctx->phis, entry) { 4655 fixup_phi(ctx, (nir_phi_instr *)entry->key, 4656 (struct phi_block *)entry->data); 4657 } 4658 4659 if (!dxil_emit_ret_void(&ctx->mod)) 4660 return false; 4661 4662 if (ctx->shader->info.stage == MESA_SHADER_FRAGMENT) { 4663 nir_foreach_variable_with_modes(var, ctx->shader, nir_var_shader_out) { 4664 if (var->data.location == FRAG_RESULT_STENCIL) { 4665 ctx->mod.feats.stencil_ref = true; 4666 } 4667 } 4668 } 4669 4670 if (ctx->mod.feats.native_low_precision) 4671 ctx->mod.minor_version = MAX2(ctx->mod.minor_version, 2); 4672 4673 return emit_metadata(ctx) && 4674 dxil_emit_module(&ctx->mod); 4675} 4676 4677static unsigned int 4678get_dxil_shader_kind(struct nir_shader *s) 4679{ 4680 switch (s->info.stage) { 4681 case MESA_SHADER_VERTEX: 4682 return DXIL_VERTEX_SHADER; 4683 case MESA_SHADER_GEOMETRY: 4684 return DXIL_GEOMETRY_SHADER; 4685 case MESA_SHADER_FRAGMENT: 4686 return DXIL_PIXEL_SHADER; 4687 case MESA_SHADER_KERNEL: 4688 case MESA_SHADER_COMPUTE: 4689 return DXIL_COMPUTE_SHADER; 4690 default: 4691 unreachable("unknown shader stage in nir_to_dxil"); 4692 return DXIL_COMPUTE_SHADER; 4693 } 4694} 4695 4696static unsigned 4697lower_bit_size_callback(const nir_instr* instr, void *data) 4698{ 4699 if (instr->type != nir_instr_type_alu) 4700 return 0; 4701 const nir_alu_instr *alu = nir_instr_as_alu(instr); 4702 4703 if (nir_op_infos[alu->op].is_conversion) 4704 return 0; 4705 4706 unsigned num_inputs = nir_op_infos[alu->op].num_inputs; 4707 const struct nir_to_dxil_options *opts = (const struct nir_to_dxil_options*)data; 4708 unsigned min_bit_size = opts->lower_int16 ? 32 : 16; 4709 4710 unsigned ret = 0; 4711 for (unsigned i = 0; i < num_inputs; i++) { 4712 unsigned bit_size = nir_src_bit_size(alu->src[i].src); 4713 if (bit_size != 1 && bit_size < min_bit_size) 4714 ret = min_bit_size; 4715 } 4716 4717 return ret; 4718} 4719 4720static void 4721optimize_nir(struct nir_shader *s, const struct nir_to_dxil_options *opts) 4722{ 4723 bool progress; 4724 do { 4725 progress = false; 4726 NIR_PASS_V(s, nir_lower_vars_to_ssa); 4727 NIR_PASS(progress, s, nir_lower_indirect_derefs, nir_var_function_temp, UINT32_MAX); 4728 NIR_PASS(progress, s, nir_lower_alu_to_scalar, NULL, NULL); 4729 NIR_PASS(progress, s, nir_copy_prop); 4730 NIR_PASS(progress, s, nir_opt_copy_prop_vars); 4731 NIR_PASS(progress, s, nir_lower_bit_size, lower_bit_size_callback, (void*)opts); 4732 NIR_PASS(progress, s, dxil_nir_lower_8bit_conv); 4733 if (opts->lower_int16) 4734 NIR_PASS(progress, s, dxil_nir_lower_16bit_conv); 4735 NIR_PASS(progress, s, nir_opt_remove_phis); 4736 NIR_PASS(progress, s, nir_opt_dce); 4737 NIR_PASS(progress, s, nir_opt_if, true); 4738 NIR_PASS(progress, s, nir_opt_dead_cf); 4739 NIR_PASS(progress, s, nir_opt_cse); 4740 NIR_PASS(progress, s, nir_opt_peephole_select, 8, true, true); 4741 NIR_PASS(progress, s, nir_opt_algebraic); 4742 NIR_PASS(progress, s, dxil_nir_lower_x2b); 4743 if (s->options->lower_int64_options) 4744 NIR_PASS(progress, s, nir_lower_int64); 4745 NIR_PASS(progress, s, nir_lower_alu); 4746 NIR_PASS(progress, s, dxil_nir_lower_inot); 4747 NIR_PASS(progress, s, nir_opt_constant_folding); 4748 NIR_PASS(progress, s, nir_opt_undef); 4749 NIR_PASS(progress, s, nir_lower_undef_to_zero); 4750 NIR_PASS(progress, s, nir_opt_deref); 4751 NIR_PASS(progress, s, dxil_nir_lower_upcast_phis, opts->lower_int16 ? 32 : 16); 4752 NIR_PASS(progress, s, nir_lower_64bit_phis); 4753 NIR_PASS_V(s, nir_lower_system_values); 4754 } while (progress); 4755 4756 do { 4757 progress = false; 4758 NIR_PASS(progress, s, nir_opt_algebraic_late); 4759 } while (progress); 4760} 4761 4762static 4763void dxil_fill_validation_state(struct ntd_context *ctx, 4764 struct dxil_validation_state *state) 4765{ 4766 state->num_resources = util_dynarray_num_elements(&ctx->resources, struct dxil_resource); 4767 state->resources = (struct dxil_resource*)ctx->resources.data; 4768 state->state.psv0.max_expected_wave_lane_count = UINT_MAX; 4769 state->state.shader_stage = (uint8_t)ctx->mod.shader_kind; 4770 state->state.sig_input_elements = (uint8_t)ctx->mod.num_sig_inputs; 4771 state->state.sig_output_elements = (uint8_t)ctx->mod.num_sig_outputs; 4772 //state->state.sig_patch_const_or_prim_elements = 0; 4773 4774 switch (ctx->mod.shader_kind) { 4775 case DXIL_VERTEX_SHADER: 4776 state->state.psv0.vs.output_position_present = ctx->mod.info.has_out_position; 4777 break; 4778 case DXIL_PIXEL_SHADER: 4779 /* TODO: handle depth outputs */ 4780 state->state.psv0.ps.depth_output = ctx->mod.info.has_out_depth; 4781 state->state.psv0.ps.sample_frequency = 4782 ctx->mod.info.has_per_sample_input; 4783 break; 4784 case DXIL_COMPUTE_SHADER: 4785 break; 4786 case DXIL_GEOMETRY_SHADER: 4787 state->state.max_vertex_count = ctx->shader->info.gs.vertices_out; 4788 state->state.psv0.gs.input_primitive = dxil_get_input_primitive(ctx->shader->info.gs.input_primitive); 4789 state->state.psv0.gs.output_toplology = dxil_get_primitive_topology(ctx->shader->info.gs.output_primitive); 4790 state->state.psv0.gs.output_stream_mask = ctx->shader->info.gs.active_stream_mask; 4791 state->state.psv0.gs.output_position_present = ctx->mod.info.has_out_position; 4792 break; 4793 default: 4794 assert(0 && "Shader type not (yet) supported"); 4795 } 4796} 4797 4798static nir_variable * 4799add_sysvalue(struct ntd_context *ctx, 4800 uint8_t value, char *name, 4801 int driver_location) 4802{ 4803 4804 nir_variable *var = rzalloc(ctx->shader, nir_variable); 4805 if (!var) 4806 return NULL; 4807 var->data.driver_location = driver_location; 4808 var->data.location = value; 4809 var->type = glsl_uint_type(); 4810 var->name = name; 4811 var->data.mode = nir_var_system_value; 4812 var->data.interpolation = INTERP_MODE_FLAT; 4813 return var; 4814} 4815 4816static bool 4817append_input_or_sysvalue(struct ntd_context *ctx, 4818 int input_loc, int sv_slot, 4819 char *name, int driver_location) 4820{ 4821 if (input_loc >= 0) { 4822 /* Check inputs whether a variable is available the corresponds 4823 * to the sysvalue */ 4824 nir_foreach_variable_with_modes(var, ctx->shader, nir_var_shader_in) { 4825 if (var->data.location == input_loc) { 4826 ctx->system_value[sv_slot] = var; 4827 return true; 4828 } 4829 } 4830 } 4831 4832 ctx->system_value[sv_slot] = add_sysvalue(ctx, sv_slot, name, driver_location); 4833 if (!ctx->system_value[sv_slot]) 4834 return false; 4835 4836 nir_shader_add_variable(ctx->shader, ctx->system_value[sv_slot]); 4837 return true; 4838} 4839 4840struct sysvalue_name { 4841 gl_system_value value; 4842 int slot; 4843 char *name; 4844} possible_sysvalues[] = { 4845 {SYSTEM_VALUE_VERTEX_ID_ZERO_BASE, -1, "SV_VertexID"}, 4846 {SYSTEM_VALUE_INSTANCE_ID, -1, "SV_InstanceID"}, 4847 {SYSTEM_VALUE_FRONT_FACE, VARYING_SLOT_FACE, "SV_IsFrontFace"}, 4848 {SYSTEM_VALUE_PRIMITIVE_ID, VARYING_SLOT_PRIMITIVE_ID, "SV_PrimitiveID"}, 4849 {SYSTEM_VALUE_SAMPLE_ID, -1, "SV_SampleIndex"}, 4850}; 4851 4852static bool 4853allocate_sysvalues(struct ntd_context *ctx) 4854{ 4855 unsigned driver_location = 0; 4856 nir_foreach_variable_with_modes(var, ctx->shader, nir_var_shader_in) 4857 driver_location++; 4858 nir_foreach_variable_with_modes(var, ctx->shader, nir_var_system_value) 4859 driver_location++; 4860 4861 for (unsigned i = 0; i < ARRAY_SIZE(possible_sysvalues); ++i) { 4862 struct sysvalue_name *info = &possible_sysvalues[i]; 4863 if (BITSET_TEST(ctx->shader->info.system_values_read, info->value)) { 4864 if (!append_input_or_sysvalue(ctx, info->slot, 4865 info->value, info->name, 4866 driver_location++)) 4867 return false; 4868 } 4869 } 4870 return true; 4871} 4872 4873bool 4874nir_to_dxil(struct nir_shader *s, const struct nir_to_dxil_options *opts, 4875 struct blob *blob) 4876{ 4877 assert(opts); 4878 bool retval = true; 4879 debug_dxil = (int)debug_get_option_debug_dxil(); 4880 blob_init(blob); 4881 4882 struct ntd_context *ctx = calloc(1, sizeof(*ctx)); 4883 if (!ctx) 4884 return false; 4885 4886 ctx->opts = opts; 4887 ctx->shader = s; 4888 4889 ctx->ralloc_ctx = ralloc_context(NULL); 4890 if (!ctx->ralloc_ctx) { 4891 retval = false; 4892 goto out; 4893 } 4894 4895 util_dynarray_init(&ctx->srv_metadata_nodes, ctx->ralloc_ctx); 4896 util_dynarray_init(&ctx->uav_metadata_nodes, ctx->ralloc_ctx); 4897 util_dynarray_init(&ctx->cbv_metadata_nodes, ctx->ralloc_ctx); 4898 util_dynarray_init(&ctx->sampler_metadata_nodes, ctx->ralloc_ctx); 4899 util_dynarray_init(&ctx->resources, ctx->ralloc_ctx); 4900 dxil_module_init(&ctx->mod, ctx->ralloc_ctx); 4901 ctx->mod.shader_kind = get_dxil_shader_kind(s); 4902 ctx->mod.major_version = 6; 4903 ctx->mod.minor_version = 1; 4904 4905 NIR_PASS_V(s, nir_lower_pack); 4906 NIR_PASS_V(s, nir_lower_frexp); 4907 NIR_PASS_V(s, nir_lower_flrp, 16 | 32 | 64, true); 4908 4909 optimize_nir(s, opts); 4910 4911 NIR_PASS_V(s, nir_remove_dead_variables, 4912 nir_var_function_temp | nir_var_shader_temp, NULL); 4913 4914 if (!allocate_sysvalues(ctx)) 4915 return false; 4916 4917 if (debug_dxil & DXIL_DEBUG_VERBOSE) 4918 nir_print_shader(s, stderr); 4919 4920 if (!emit_module(ctx, opts)) { 4921 debug_printf("D3D12: dxil_container_add_module failed\n"); 4922 retval = false; 4923 goto out; 4924 } 4925 4926 if (debug_dxil & DXIL_DEBUG_DUMP_MODULE) { 4927 struct dxil_dumper *dumper = dxil_dump_create(); 4928 dxil_dump_module(dumper, &ctx->mod); 4929 fprintf(stderr, "\n"); 4930 dxil_dump_buf_to_file(dumper, stderr); 4931 fprintf(stderr, "\n\n"); 4932 dxil_dump_free(dumper); 4933 } 4934 4935 struct dxil_container container; 4936 dxil_container_init(&container); 4937 if (!dxil_container_add_features(&container, &ctx->mod.feats)) { 4938 debug_printf("D3D12: dxil_container_add_features failed\n"); 4939 retval = false; 4940 goto out; 4941 } 4942 4943 if (!dxil_container_add_io_signature(&container, 4944 DXIL_ISG1, 4945 ctx->mod.num_sig_inputs, 4946 ctx->mod.inputs)) { 4947 debug_printf("D3D12: failed to write input signature\n"); 4948 retval = false; 4949 goto out; 4950 } 4951 4952 if (!dxil_container_add_io_signature(&container, 4953 DXIL_OSG1, 4954 ctx->mod.num_sig_outputs, 4955 ctx->mod.outputs)) { 4956 debug_printf("D3D12: failed to write output signature\n"); 4957 retval = false; 4958 goto out; 4959 } 4960 4961 struct dxil_validation_state validation_state; 4962 memset(&validation_state, 0, sizeof(validation_state)); 4963 dxil_fill_validation_state(ctx, &validation_state); 4964 4965 if (!dxil_container_add_state_validation(&container,&ctx->mod, 4966 &validation_state)) { 4967 debug_printf("D3D12: failed to write state-validation\n"); 4968 retval = false; 4969 goto out; 4970 } 4971 4972 if (!dxil_container_add_module(&container, &ctx->mod)) { 4973 debug_printf("D3D12: failed to write module\n"); 4974 retval = false; 4975 goto out; 4976 } 4977 4978 if (!dxil_container_write(&container, blob)) { 4979 debug_printf("D3D12: dxil_container_write failed\n"); 4980 retval = false; 4981 goto out; 4982 } 4983 dxil_container_finish(&container); 4984 4985 if (debug_dxil & DXIL_DEBUG_DUMP_BLOB) { 4986 static int shader_id = 0; 4987 char buffer[64]; 4988 snprintf(buffer, sizeof(buffer), "shader_%s_%d.blob", 4989 get_shader_kind_str(ctx->mod.shader_kind), shader_id++); 4990 debug_printf("Try to write blob to %s\n", buffer); 4991 FILE *f = fopen(buffer, "wb"); 4992 if (f) { 4993 fwrite(blob->data, 1, blob->size, f); 4994 fclose(f); 4995 } 4996 } 4997 4998out: 4999 dxil_module_release(&ctx->mod); 5000 ralloc_free(ctx->ralloc_ctx); 5001 free(ctx); 5002 return retval; 5003} 5004 5005enum dxil_sysvalue_type 5006nir_var_to_dxil_sysvalue_type(nir_variable *var, uint64_t other_stage_mask) 5007{ 5008 switch (var->data.location) { 5009 case VARYING_SLOT_FACE: 5010 return DXIL_GENERATED_SYSVALUE; 5011 case VARYING_SLOT_POS: 5012 case VARYING_SLOT_PRIMITIVE_ID: 5013 case VARYING_SLOT_CLIP_DIST0: 5014 case VARYING_SLOT_CLIP_DIST1: 5015 case VARYING_SLOT_PSIZ: 5016 if (!((1ull << var->data.location) & other_stage_mask)) 5017 return DXIL_SYSVALUE; 5018 FALLTHROUGH; 5019 default: 5020 return DXIL_NO_SYSVALUE; 5021 } 5022} 5023