1/* 2 * Copyright © 2015 Intel Corporation 3 * 4 * Permission is hereby granted, free of charge, to any person obtaining a 5 * copy of this software and associated documentation files (the "Software"), 6 * to deal in the Software without restriction, including without limitation 7 * the rights to use, copy, modify, merge, publish, distribute, sublicense, 8 * and/or sell copies of the Software, and to permit persons to whom the 9 * Software is furnished to do so, subject to the following conditions: 10 * 11 * The above copyright notice and this permission notice (including the next 12 * paragraph) shall be included in all copies or substantial portions of the 13 * Software. 14 * 15 * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR 16 * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, 17 * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL 18 * THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER 19 * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING 20 * FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS 21 * IN THE SOFTWARE. 22 * 23 * Authors: 24 * Jason Ekstrand (jason@jlekstrand.net) 25 * 26 */ 27 28#include "vtn_private.h" 29#include "nir/nir_vla.h" 30#include "nir/nir_control_flow.h" 31#include "nir/nir_constant_expressions.h" 32#include "nir/nir_deref.h" 33#include "spirv_info.h" 34 35#include "util/format/u_format.h" 36#include "util/u_math.h" 37#include "util/u_string.h" 38 39#include <stdio.h> 40 41#ifndef NDEBUG 42static enum nir_spirv_debug_level 43vtn_default_log_level(void) 44{ 45 enum nir_spirv_debug_level level = NIR_SPIRV_DEBUG_LEVEL_WARNING; 46 const char *vtn_log_level_strings[] = { 47 [NIR_SPIRV_DEBUG_LEVEL_WARNING] = "warning", 48 [NIR_SPIRV_DEBUG_LEVEL_INFO] = "info", 49 [NIR_SPIRV_DEBUG_LEVEL_ERROR] = "error", 50 }; 51 const char *str = getenv("MESA_SPIRV_LOG_LEVEL"); 52 53 if (str == NULL) 54 return NIR_SPIRV_DEBUG_LEVEL_WARNING; 55 56 for (int i = 0; i < ARRAY_SIZE(vtn_log_level_strings); i++) { 57 if (strcasecmp(str, vtn_log_level_strings[i]) == 0) { 58 level = i; 59 break; 60 } 61 } 62 63 return level; 64} 65#endif 66 67void 68vtn_log(struct vtn_builder *b, enum nir_spirv_debug_level level, 69 size_t spirv_offset, const char *message) 70{ 71 if (b->options->debug.func) { 72 b->options->debug.func(b->options->debug.private_data, 73 level, spirv_offset, message); 74 } 75 76#ifndef NDEBUG 77 static enum nir_spirv_debug_level default_level = 78 NIR_SPIRV_DEBUG_LEVEL_INVALID; 79 80 if (default_level == NIR_SPIRV_DEBUG_LEVEL_INVALID) 81 default_level = vtn_default_log_level(); 82 83 if (level >= default_level) 84 fprintf(stderr, "%s\n", message); 85#endif 86} 87 88void 89vtn_logf(struct vtn_builder *b, enum nir_spirv_debug_level level, 90 size_t spirv_offset, const char *fmt, ...) 91{ 92 va_list args; 93 char *msg; 94 95 va_start(args, fmt); 96 msg = ralloc_vasprintf(NULL, fmt, args); 97 va_end(args); 98 99 vtn_log(b, level, spirv_offset, msg); 100 101 ralloc_free(msg); 102} 103 104static void 105vtn_log_err(struct vtn_builder *b, 106 enum nir_spirv_debug_level level, const char *prefix, 107 const char *file, unsigned line, 108 const char *fmt, va_list args) 109{ 110 char *msg; 111 112 msg = ralloc_strdup(NULL, prefix); 113 114#ifndef NDEBUG 115 ralloc_asprintf_append(&msg, " In file %s:%u\n", file, line); 116#endif 117 118 ralloc_asprintf_append(&msg, " "); 119 120 ralloc_vasprintf_append(&msg, fmt, args); 121 122 ralloc_asprintf_append(&msg, "\n %zu bytes into the SPIR-V binary", 123 b->spirv_offset); 124 125 if (b->file) { 126 ralloc_asprintf_append(&msg, 127 "\n in SPIR-V source file %s, line %d, col %d", 128 b->file, b->line, b->col); 129 } 130 131 vtn_log(b, level, b->spirv_offset, msg); 132 133 ralloc_free(msg); 134} 135 136static void 137vtn_dump_shader(struct vtn_builder *b, const char *path, const char *prefix) 138{ 139 static int idx = 0; 140 141 char filename[1024]; 142 int len = snprintf(filename, sizeof(filename), "%s/%s-%d.spirv", 143 path, prefix, idx++); 144 if (len < 0 || len >= sizeof(filename)) 145 return; 146 147 FILE *f = fopen(filename, "w"); 148 if (f == NULL) 149 return; 150 151 fwrite(b->spirv, sizeof(*b->spirv), b->spirv_word_count, f); 152 fclose(f); 153 154 vtn_info("SPIR-V shader dumped to %s", filename); 155} 156 157void 158_vtn_warn(struct vtn_builder *b, const char *file, unsigned line, 159 const char *fmt, ...) 160{ 161 va_list args; 162 163 va_start(args, fmt); 164 vtn_log_err(b, NIR_SPIRV_DEBUG_LEVEL_WARNING, "SPIR-V WARNING:\n", 165 file, line, fmt, args); 166 va_end(args); 167} 168 169void 170_vtn_err(struct vtn_builder *b, const char *file, unsigned line, 171 const char *fmt, ...) 172{ 173 va_list args; 174 175 va_start(args, fmt); 176 vtn_log_err(b, NIR_SPIRV_DEBUG_LEVEL_ERROR, "SPIR-V ERROR:\n", 177 file, line, fmt, args); 178 va_end(args); 179} 180 181void 182_vtn_fail(struct vtn_builder *b, const char *file, unsigned line, 183 const char *fmt, ...) 184{ 185 va_list args; 186 187 va_start(args, fmt); 188 vtn_log_err(b, NIR_SPIRV_DEBUG_LEVEL_ERROR, "SPIR-V parsing FAILED:\n", 189 file, line, fmt, args); 190 va_end(args); 191 192 const char *dump_path = getenv("MESA_SPIRV_FAIL_DUMP_PATH"); 193 if (dump_path) 194 vtn_dump_shader(b, dump_path, "fail"); 195 196 vtn_longjmp(b->fail_jump, 1); 197} 198 199static struct vtn_ssa_value * 200vtn_undef_ssa_value(struct vtn_builder *b, const struct glsl_type *type) 201{ 202 struct vtn_ssa_value *val = rzalloc(b, struct vtn_ssa_value); 203 val->type = glsl_get_bare_type(type); 204 205 if (glsl_type_is_vector_or_scalar(type)) { 206 unsigned num_components = glsl_get_vector_elements(val->type); 207 unsigned bit_size = glsl_get_bit_size(val->type); 208 val->def = nir_ssa_undef(&b->nb, num_components, bit_size); 209 } else { 210 unsigned elems = glsl_get_length(val->type); 211 val->elems = ralloc_array(b, struct vtn_ssa_value *, elems); 212 if (glsl_type_is_array_or_matrix(type)) { 213 const struct glsl_type *elem_type = glsl_get_array_element(type); 214 for (unsigned i = 0; i < elems; i++) 215 val->elems[i] = vtn_undef_ssa_value(b, elem_type); 216 } else { 217 vtn_assert(glsl_type_is_struct_or_ifc(type)); 218 for (unsigned i = 0; i < elems; i++) { 219 const struct glsl_type *elem_type = glsl_get_struct_field(type, i); 220 val->elems[i] = vtn_undef_ssa_value(b, elem_type); 221 } 222 } 223 } 224 225 return val; 226} 227 228struct vtn_ssa_value * 229vtn_const_ssa_value(struct vtn_builder *b, nir_constant *constant, 230 const struct glsl_type *type) 231{ 232 struct hash_entry *entry = _mesa_hash_table_search(b->const_table, constant); 233 234 if (entry) 235 return entry->data; 236 237 struct vtn_ssa_value *val = rzalloc(b, struct vtn_ssa_value); 238 val->type = glsl_get_bare_type(type); 239 240 if (glsl_type_is_vector_or_scalar(type)) { 241 unsigned num_components = glsl_get_vector_elements(val->type); 242 unsigned bit_size = glsl_get_bit_size(type); 243 nir_load_const_instr *load = 244 nir_load_const_instr_create(b->shader, num_components, bit_size); 245 246 memcpy(load->value, constant->values, 247 sizeof(nir_const_value) * num_components); 248 249 nir_instr_insert_before_cf_list(&b->nb.impl->body, &load->instr); 250 val->def = &load->def; 251 } else { 252 unsigned elems = glsl_get_length(val->type); 253 val->elems = ralloc_array(b, struct vtn_ssa_value *, elems); 254 if (glsl_type_is_array_or_matrix(type)) { 255 const struct glsl_type *elem_type = glsl_get_array_element(type); 256 for (unsigned i = 0; i < elems; i++) { 257 val->elems[i] = vtn_const_ssa_value(b, constant->elements[i], 258 elem_type); 259 } 260 } else { 261 vtn_assert(glsl_type_is_struct_or_ifc(type)); 262 for (unsigned i = 0; i < elems; i++) { 263 const struct glsl_type *elem_type = glsl_get_struct_field(type, i); 264 val->elems[i] = vtn_const_ssa_value(b, constant->elements[i], 265 elem_type); 266 } 267 } 268 } 269 270 return val; 271} 272 273struct vtn_ssa_value * 274vtn_ssa_value(struct vtn_builder *b, uint32_t value_id) 275{ 276 struct vtn_value *val = vtn_untyped_value(b, value_id); 277 switch (val->value_type) { 278 case vtn_value_type_undef: 279 return vtn_undef_ssa_value(b, val->type->type); 280 281 case vtn_value_type_constant: 282 return vtn_const_ssa_value(b, val->constant, val->type->type); 283 284 case vtn_value_type_ssa: 285 return val->ssa; 286 287 case vtn_value_type_pointer: 288 vtn_assert(val->pointer->ptr_type && val->pointer->ptr_type->type); 289 struct vtn_ssa_value *ssa = 290 vtn_create_ssa_value(b, val->pointer->ptr_type->type); 291 ssa->def = vtn_pointer_to_ssa(b, val->pointer); 292 return ssa; 293 294 default: 295 vtn_fail("Invalid type for an SSA value"); 296 } 297} 298 299struct vtn_value * 300vtn_push_ssa_value(struct vtn_builder *b, uint32_t value_id, 301 struct vtn_ssa_value *ssa) 302{ 303 struct vtn_type *type = vtn_get_value_type(b, value_id); 304 305 /* See vtn_create_ssa_value */ 306 vtn_fail_if(ssa->type != glsl_get_bare_type(type->type), 307 "Type mismatch for SPIR-V SSA value"); 308 309 struct vtn_value *val; 310 if (type->base_type == vtn_base_type_pointer) { 311 val = vtn_push_pointer(b, value_id, vtn_pointer_from_ssa(b, ssa->def, type)); 312 } else { 313 /* Don't trip the value_type_ssa check in vtn_push_value */ 314 val = vtn_push_value(b, value_id, vtn_value_type_invalid); 315 val->value_type = vtn_value_type_ssa; 316 val->ssa = ssa; 317 } 318 319 return val; 320} 321 322nir_ssa_def * 323vtn_get_nir_ssa(struct vtn_builder *b, uint32_t value_id) 324{ 325 struct vtn_ssa_value *ssa = vtn_ssa_value(b, value_id); 326 vtn_fail_if(!glsl_type_is_vector_or_scalar(ssa->type), 327 "Expected a vector or scalar type"); 328 return ssa->def; 329} 330 331struct vtn_value * 332vtn_push_nir_ssa(struct vtn_builder *b, uint32_t value_id, nir_ssa_def *def) 333{ 334 /* Types for all SPIR-V SSA values are set as part of a pre-pass so the 335 * type will be valid by the time we get here. 336 */ 337 struct vtn_type *type = vtn_get_value_type(b, value_id); 338 vtn_fail_if(def->num_components != glsl_get_vector_elements(type->type) || 339 def->bit_size != glsl_get_bit_size(type->type), 340 "Mismatch between NIR and SPIR-V type."); 341 struct vtn_ssa_value *ssa = vtn_create_ssa_value(b, type->type); 342 ssa->def = def; 343 return vtn_push_ssa_value(b, value_id, ssa); 344} 345 346static enum gl_access_qualifier 347spirv_to_gl_access_qualifier(struct vtn_builder *b, 348 SpvAccessQualifier access_qualifier) 349{ 350 switch (access_qualifier) { 351 case SpvAccessQualifierReadOnly: 352 return ACCESS_NON_WRITEABLE; 353 case SpvAccessQualifierWriteOnly: 354 return ACCESS_NON_READABLE; 355 case SpvAccessQualifierReadWrite: 356 return 0; 357 default: 358 vtn_fail("Invalid image access qualifier"); 359 } 360} 361 362static nir_deref_instr * 363vtn_get_image(struct vtn_builder *b, uint32_t value_id, 364 enum gl_access_qualifier *access) 365{ 366 struct vtn_type *type = vtn_get_value_type(b, value_id); 367 vtn_assert(type->base_type == vtn_base_type_image); 368 if (access) 369 *access |= spirv_to_gl_access_qualifier(b, type->access_qualifier); 370 return nir_build_deref_cast(&b->nb, vtn_get_nir_ssa(b, value_id), 371 nir_var_uniform, type->glsl_image, 0); 372} 373 374static void 375vtn_push_image(struct vtn_builder *b, uint32_t value_id, 376 nir_deref_instr *deref, bool propagate_non_uniform) 377{ 378 struct vtn_type *type = vtn_get_value_type(b, value_id); 379 vtn_assert(type->base_type == vtn_base_type_image); 380 struct vtn_value *value = vtn_push_nir_ssa(b, value_id, &deref->dest.ssa); 381 value->propagated_non_uniform = propagate_non_uniform; 382} 383 384static nir_deref_instr * 385vtn_get_sampler(struct vtn_builder *b, uint32_t value_id) 386{ 387 struct vtn_type *type = vtn_get_value_type(b, value_id); 388 vtn_assert(type->base_type == vtn_base_type_sampler); 389 return nir_build_deref_cast(&b->nb, vtn_get_nir_ssa(b, value_id), 390 nir_var_uniform, glsl_bare_sampler_type(), 0); 391} 392 393nir_ssa_def * 394vtn_sampled_image_to_nir_ssa(struct vtn_builder *b, 395 struct vtn_sampled_image si) 396{ 397 return nir_vec2(&b->nb, &si.image->dest.ssa, &si.sampler->dest.ssa); 398} 399 400static void 401vtn_push_sampled_image(struct vtn_builder *b, uint32_t value_id, 402 struct vtn_sampled_image si, bool propagate_non_uniform) 403{ 404 struct vtn_type *type = vtn_get_value_type(b, value_id); 405 vtn_assert(type->base_type == vtn_base_type_sampled_image); 406 struct vtn_value *value = vtn_push_nir_ssa(b, value_id, 407 vtn_sampled_image_to_nir_ssa(b, si)); 408 value->propagated_non_uniform = propagate_non_uniform; 409} 410 411static struct vtn_sampled_image 412vtn_get_sampled_image(struct vtn_builder *b, uint32_t value_id) 413{ 414 struct vtn_type *type = vtn_get_value_type(b, value_id); 415 vtn_assert(type->base_type == vtn_base_type_sampled_image); 416 nir_ssa_def *si_vec2 = vtn_get_nir_ssa(b, value_id); 417 418 struct vtn_sampled_image si = { NULL, }; 419 si.image = nir_build_deref_cast(&b->nb, nir_channel(&b->nb, si_vec2, 0), 420 nir_var_uniform, 421 type->image->glsl_image, 0); 422 si.sampler = nir_build_deref_cast(&b->nb, nir_channel(&b->nb, si_vec2, 1), 423 nir_var_uniform, 424 glsl_bare_sampler_type(), 0); 425 return si; 426} 427 428static const char * 429vtn_string_literal(struct vtn_builder *b, const uint32_t *words, 430 unsigned word_count, unsigned *words_used) 431{ 432 /* From the SPIR-V spec: 433 * 434 * "A string is interpreted as a nul-terminated stream of characters. 435 * The character set is Unicode in the UTF-8 encoding scheme. The UTF-8 436 * octets (8-bit bytes) are packed four per word, following the 437 * little-endian convention (i.e., the first octet is in the 438 * lowest-order 8 bits of the word). The final word contains the 439 * string’s nul-termination character (0), and all contents past the 440 * end of the string in the final word are padded with 0." 441 * 442 * On big-endian, we need to byte-swap. 443 */ 444#if UTIL_ARCH_BIG_ENDIAN 445 { 446 uint32_t *copy = ralloc_array(b, uint32_t, word_count); 447 for (unsigned i = 0; i < word_count; i++) 448 copy[i] = util_bswap32(words[i]); 449 words = copy; 450 } 451#endif 452 453 const char *str = (char *)words; 454 const char *end = memchr(str, 0, word_count * 4); 455 vtn_fail_if(end == NULL, "String is not null-terminated"); 456 457 if (words_used) 458 *words_used = DIV_ROUND_UP(end - str + 1, sizeof(*words)); 459 460 return str; 461} 462 463const uint32_t * 464vtn_foreach_instruction(struct vtn_builder *b, const uint32_t *start, 465 const uint32_t *end, vtn_instruction_handler handler) 466{ 467 b->file = NULL; 468 b->line = -1; 469 b->col = -1; 470 471 const uint32_t *w = start; 472 while (w < end) { 473 SpvOp opcode = w[0] & SpvOpCodeMask; 474 unsigned count = w[0] >> SpvWordCountShift; 475 vtn_assert(count >= 1 && w + count <= end); 476 477 b->spirv_offset = (uint8_t *)w - (uint8_t *)b->spirv; 478 479 switch (opcode) { 480 case SpvOpNop: 481 break; /* Do nothing */ 482 483 case SpvOpLine: 484 b->file = vtn_value(b, w[1], vtn_value_type_string)->str; 485 b->line = w[2]; 486 b->col = w[3]; 487 break; 488 489 case SpvOpNoLine: 490 b->file = NULL; 491 b->line = -1; 492 b->col = -1; 493 break; 494 495 default: 496 if (!handler(b, opcode, w, count)) 497 return w; 498 break; 499 } 500 501 w += count; 502 } 503 504 b->spirv_offset = 0; 505 b->file = NULL; 506 b->line = -1; 507 b->col = -1; 508 509 assert(w == end); 510 return w; 511} 512 513static bool 514vtn_handle_non_semantic_instruction(struct vtn_builder *b, SpvOp ext_opcode, 515 const uint32_t *w, unsigned count) 516{ 517 /* Do nothing. */ 518 return true; 519} 520 521static void 522vtn_handle_extension(struct vtn_builder *b, SpvOp opcode, 523 const uint32_t *w, unsigned count) 524{ 525 switch (opcode) { 526 case SpvOpExtInstImport: { 527 struct vtn_value *val = vtn_push_value(b, w[1], vtn_value_type_extension); 528 const char *ext = vtn_string_literal(b, &w[2], count - 2, NULL); 529 if (strcmp(ext, "GLSL.std.450") == 0) { 530 val->ext_handler = vtn_handle_glsl450_instruction; 531 } else if ((strcmp(ext, "SPV_AMD_gcn_shader") == 0) 532 && (b->options && b->options->caps.amd_gcn_shader)) { 533 val->ext_handler = vtn_handle_amd_gcn_shader_instruction; 534 } else if ((strcmp(ext, "SPV_AMD_shader_ballot") == 0) 535 && (b->options && b->options->caps.amd_shader_ballot)) { 536 val->ext_handler = vtn_handle_amd_shader_ballot_instruction; 537 } else if ((strcmp(ext, "SPV_AMD_shader_trinary_minmax") == 0) 538 && (b->options && b->options->caps.amd_trinary_minmax)) { 539 val->ext_handler = vtn_handle_amd_shader_trinary_minmax_instruction; 540 } else if ((strcmp(ext, "SPV_AMD_shader_explicit_vertex_parameter") == 0) 541 && (b->options && b->options->caps.amd_shader_explicit_vertex_parameter)) { 542 val->ext_handler = vtn_handle_amd_shader_explicit_vertex_parameter_instruction; 543 } else if (strcmp(ext, "OpenCL.std") == 0) { 544 val->ext_handler = vtn_handle_opencl_instruction; 545 } else if (strstr(ext, "NonSemantic.") == ext) { 546 val->ext_handler = vtn_handle_non_semantic_instruction; 547 } else { 548 vtn_fail("Unsupported extension: %s", ext); 549 } 550 break; 551 } 552 553 case SpvOpExtInst: { 554 struct vtn_value *val = vtn_value(b, w[3], vtn_value_type_extension); 555 bool handled = val->ext_handler(b, w[4], w, count); 556 vtn_assert(handled); 557 break; 558 } 559 560 default: 561 vtn_fail_with_opcode("Unhandled opcode", opcode); 562 } 563} 564 565static void 566_foreach_decoration_helper(struct vtn_builder *b, 567 struct vtn_value *base_value, 568 int parent_member, 569 struct vtn_value *value, 570 vtn_decoration_foreach_cb cb, void *data) 571{ 572 for (struct vtn_decoration *dec = value->decoration; dec; dec = dec->next) { 573 int member; 574 if (dec->scope == VTN_DEC_DECORATION) { 575 member = parent_member; 576 } else if (dec->scope >= VTN_DEC_STRUCT_MEMBER0) { 577 vtn_fail_if(value->value_type != vtn_value_type_type || 578 value->type->base_type != vtn_base_type_struct, 579 "OpMemberDecorate and OpGroupMemberDecorate are only " 580 "allowed on OpTypeStruct"); 581 /* This means we haven't recursed yet */ 582 assert(value == base_value); 583 584 member = dec->scope - VTN_DEC_STRUCT_MEMBER0; 585 586 vtn_fail_if(member >= base_value->type->length, 587 "OpMemberDecorate specifies member %d but the " 588 "OpTypeStruct has only %u members", 589 member, base_value->type->length); 590 } else { 591 /* Not a decoration */ 592 assert(dec->scope == VTN_DEC_EXECUTION_MODE); 593 continue; 594 } 595 596 if (dec->group) { 597 assert(dec->group->value_type == vtn_value_type_decoration_group); 598 _foreach_decoration_helper(b, base_value, member, dec->group, 599 cb, data); 600 } else { 601 cb(b, base_value, member, dec, data); 602 } 603 } 604} 605 606/** Iterates (recursively if needed) over all of the decorations on a value 607 * 608 * This function iterates over all of the decorations applied to a given 609 * value. If it encounters a decoration group, it recurses into the group 610 * and iterates over all of those decorations as well. 611 */ 612void 613vtn_foreach_decoration(struct vtn_builder *b, struct vtn_value *value, 614 vtn_decoration_foreach_cb cb, void *data) 615{ 616 _foreach_decoration_helper(b, value, -1, value, cb, data); 617} 618 619void 620vtn_foreach_execution_mode(struct vtn_builder *b, struct vtn_value *value, 621 vtn_execution_mode_foreach_cb cb, void *data) 622{ 623 for (struct vtn_decoration *dec = value->decoration; dec; dec = dec->next) { 624 if (dec->scope != VTN_DEC_EXECUTION_MODE) 625 continue; 626 627 assert(dec->group == NULL); 628 cb(b, value, dec, data); 629 } 630} 631 632void 633vtn_handle_decoration(struct vtn_builder *b, SpvOp opcode, 634 const uint32_t *w, unsigned count) 635{ 636 const uint32_t *w_end = w + count; 637 const uint32_t target = w[1]; 638 w += 2; 639 640 switch (opcode) { 641 case SpvOpDecorationGroup: 642 vtn_push_value(b, target, vtn_value_type_decoration_group); 643 break; 644 645 case SpvOpDecorate: 646 case SpvOpDecorateId: 647 case SpvOpMemberDecorate: 648 case SpvOpDecorateString: 649 case SpvOpMemberDecorateString: 650 case SpvOpExecutionMode: 651 case SpvOpExecutionModeId: { 652 struct vtn_value *val = vtn_untyped_value(b, target); 653 654 struct vtn_decoration *dec = rzalloc(b, struct vtn_decoration); 655 switch (opcode) { 656 case SpvOpDecorate: 657 case SpvOpDecorateId: 658 case SpvOpDecorateString: 659 dec->scope = VTN_DEC_DECORATION; 660 break; 661 case SpvOpMemberDecorate: 662 case SpvOpMemberDecorateString: 663 dec->scope = VTN_DEC_STRUCT_MEMBER0 + *(w++); 664 vtn_fail_if(dec->scope < VTN_DEC_STRUCT_MEMBER0, /* overflow */ 665 "Member argument of OpMemberDecorate too large"); 666 break; 667 case SpvOpExecutionMode: 668 case SpvOpExecutionModeId: 669 dec->scope = VTN_DEC_EXECUTION_MODE; 670 break; 671 default: 672 unreachable("Invalid decoration opcode"); 673 } 674 dec->decoration = *(w++); 675 dec->operands = w; 676 677 /* Link into the list */ 678 dec->next = val->decoration; 679 val->decoration = dec; 680 break; 681 } 682 683 case SpvOpGroupMemberDecorate: 684 case SpvOpGroupDecorate: { 685 struct vtn_value *group = 686 vtn_value(b, target, vtn_value_type_decoration_group); 687 688 for (; w < w_end; w++) { 689 struct vtn_value *val = vtn_untyped_value(b, *w); 690 struct vtn_decoration *dec = rzalloc(b, struct vtn_decoration); 691 692 dec->group = group; 693 if (opcode == SpvOpGroupDecorate) { 694 dec->scope = VTN_DEC_DECORATION; 695 } else { 696 dec->scope = VTN_DEC_STRUCT_MEMBER0 + *(++w); 697 vtn_fail_if(dec->scope < 0, /* Check for overflow */ 698 "Member argument of OpGroupMemberDecorate too large"); 699 } 700 701 /* Link into the list */ 702 dec->next = val->decoration; 703 val->decoration = dec; 704 } 705 break; 706 } 707 708 default: 709 unreachable("Unhandled opcode"); 710 } 711} 712 713struct member_decoration_ctx { 714 unsigned num_fields; 715 struct glsl_struct_field *fields; 716 struct vtn_type *type; 717}; 718 719/** 720 * Returns true if the given type contains a struct decorated Block or 721 * BufferBlock 722 */ 723bool 724vtn_type_contains_block(struct vtn_builder *b, struct vtn_type *type) 725{ 726 switch (type->base_type) { 727 case vtn_base_type_array: 728 return vtn_type_contains_block(b, type->array_element); 729 case vtn_base_type_struct: 730 if (type->block || type->buffer_block) 731 return true; 732 for (unsigned i = 0; i < type->length; i++) { 733 if (vtn_type_contains_block(b, type->members[i])) 734 return true; 735 } 736 return false; 737 default: 738 return false; 739 } 740} 741 742/** Returns true if two types are "compatible", i.e. you can do an OpLoad, 743 * OpStore, or OpCopyMemory between them without breaking anything. 744 * Technically, the SPIR-V rules require the exact same type ID but this lets 745 * us internally be a bit looser. 746 */ 747bool 748vtn_types_compatible(struct vtn_builder *b, 749 struct vtn_type *t1, struct vtn_type *t2) 750{ 751 if (t1->id == t2->id) 752 return true; 753 754 if (t1->base_type != t2->base_type) 755 return false; 756 757 switch (t1->base_type) { 758 case vtn_base_type_void: 759 case vtn_base_type_scalar: 760 case vtn_base_type_vector: 761 case vtn_base_type_matrix: 762 case vtn_base_type_image: 763 case vtn_base_type_sampler: 764 case vtn_base_type_sampled_image: 765 case vtn_base_type_event: 766 return t1->type == t2->type; 767 768 case vtn_base_type_array: 769 return t1->length == t2->length && 770 vtn_types_compatible(b, t1->array_element, t2->array_element); 771 772 case vtn_base_type_pointer: 773 return vtn_types_compatible(b, t1->deref, t2->deref); 774 775 case vtn_base_type_struct: 776 if (t1->length != t2->length) 777 return false; 778 779 for (unsigned i = 0; i < t1->length; i++) { 780 if (!vtn_types_compatible(b, t1->members[i], t2->members[i])) 781 return false; 782 } 783 return true; 784 785 case vtn_base_type_accel_struct: 786 return true; 787 788 case vtn_base_type_function: 789 /* This case shouldn't get hit since you can't copy around function 790 * types. Just require them to be identical. 791 */ 792 return false; 793 } 794 795 vtn_fail("Invalid base type"); 796} 797 798struct vtn_type * 799vtn_type_without_array(struct vtn_type *type) 800{ 801 while (type->base_type == vtn_base_type_array) 802 type = type->array_element; 803 return type; 804} 805 806/* does a shallow copy of a vtn_type */ 807 808static struct vtn_type * 809vtn_type_copy(struct vtn_builder *b, struct vtn_type *src) 810{ 811 struct vtn_type *dest = ralloc(b, struct vtn_type); 812 *dest = *src; 813 814 switch (src->base_type) { 815 case vtn_base_type_void: 816 case vtn_base_type_scalar: 817 case vtn_base_type_vector: 818 case vtn_base_type_matrix: 819 case vtn_base_type_array: 820 case vtn_base_type_pointer: 821 case vtn_base_type_image: 822 case vtn_base_type_sampler: 823 case vtn_base_type_sampled_image: 824 case vtn_base_type_event: 825 case vtn_base_type_accel_struct: 826 /* Nothing more to do */ 827 break; 828 829 case vtn_base_type_struct: 830 dest->members = ralloc_array(b, struct vtn_type *, src->length); 831 memcpy(dest->members, src->members, 832 src->length * sizeof(src->members[0])); 833 834 dest->offsets = ralloc_array(b, unsigned, src->length); 835 memcpy(dest->offsets, src->offsets, 836 src->length * sizeof(src->offsets[0])); 837 break; 838 839 case vtn_base_type_function: 840 dest->params = ralloc_array(b, struct vtn_type *, src->length); 841 memcpy(dest->params, src->params, src->length * sizeof(src->params[0])); 842 break; 843 } 844 845 return dest; 846} 847 848static const struct glsl_type * 849wrap_type_in_array(const struct glsl_type *type, 850 const struct glsl_type *array_type) 851{ 852 if (!glsl_type_is_array(array_type)) 853 return type; 854 855 const struct glsl_type *elem_type = 856 wrap_type_in_array(type, glsl_get_array_element(array_type)); 857 return glsl_array_type(elem_type, glsl_get_length(array_type), 858 glsl_get_explicit_stride(array_type)); 859} 860 861static bool 862vtn_type_needs_explicit_layout(struct vtn_builder *b, struct vtn_type *type, 863 enum vtn_variable_mode mode) 864{ 865 /* For OpenCL we never want to strip the info from the types, and it makes 866 * type comparisons easier in later stages. 867 */ 868 if (b->options->environment == NIR_SPIRV_OPENCL) 869 return true; 870 871 switch (mode) { 872 case vtn_variable_mode_input: 873 case vtn_variable_mode_output: 874 /* Layout decorations kept because we need offsets for XFB arrays of 875 * blocks. 876 */ 877 return b->shader->info.has_transform_feedback_varyings; 878 879 case vtn_variable_mode_ssbo: 880 case vtn_variable_mode_phys_ssbo: 881 case vtn_variable_mode_ubo: 882 case vtn_variable_mode_push_constant: 883 case vtn_variable_mode_shader_record: 884 return true; 885 886 case vtn_variable_mode_workgroup: 887 return b->options->caps.workgroup_memory_explicit_layout; 888 889 default: 890 return false; 891 } 892} 893 894const struct glsl_type * 895vtn_type_get_nir_type(struct vtn_builder *b, struct vtn_type *type, 896 enum vtn_variable_mode mode) 897{ 898 if (mode == vtn_variable_mode_atomic_counter) { 899 vtn_fail_if(glsl_without_array(type->type) != glsl_uint_type(), 900 "Variables in the AtomicCounter storage class should be " 901 "(possibly arrays of arrays of) uint."); 902 return wrap_type_in_array(glsl_atomic_uint_type(), type->type); 903 } 904 905 if (mode == vtn_variable_mode_uniform) { 906 switch (type->base_type) { 907 case vtn_base_type_array: { 908 const struct glsl_type *elem_type = 909 vtn_type_get_nir_type(b, type->array_element, mode); 910 911 return glsl_array_type(elem_type, type->length, 912 glsl_get_explicit_stride(type->type)); 913 } 914 915 case vtn_base_type_struct: { 916 bool need_new_struct = false; 917 const uint32_t num_fields = type->length; 918 NIR_VLA(struct glsl_struct_field, fields, num_fields); 919 for (unsigned i = 0; i < num_fields; i++) { 920 fields[i] = *glsl_get_struct_field_data(type->type, i); 921 const struct glsl_type *field_nir_type = 922 vtn_type_get_nir_type(b, type->members[i], mode); 923 if (fields[i].type != field_nir_type) { 924 fields[i].type = field_nir_type; 925 need_new_struct = true; 926 } 927 } 928 if (need_new_struct) { 929 if (glsl_type_is_interface(type->type)) { 930 return glsl_interface_type(fields, num_fields, 931 /* packing */ 0, false, 932 glsl_get_type_name(type->type)); 933 } else { 934 return glsl_struct_type(fields, num_fields, 935 glsl_get_type_name(type->type), 936 glsl_struct_type_is_packed(type->type)); 937 } 938 } else { 939 /* No changes, just pass it on */ 940 return type->type; 941 } 942 } 943 944 case vtn_base_type_image: 945 return type->glsl_image; 946 947 case vtn_base_type_sampler: 948 return glsl_bare_sampler_type(); 949 950 case vtn_base_type_sampled_image: 951 return type->image->glsl_image; 952 953 default: 954 return type->type; 955 } 956 } 957 958 /* Layout decorations are allowed but ignored in certain conditions, 959 * to allow SPIR-V generators perform type deduplication. Discard 960 * unnecessary ones when passing to NIR. 961 */ 962 if (!vtn_type_needs_explicit_layout(b, type, mode)) 963 return glsl_get_bare_type(type->type); 964 965 return type->type; 966} 967 968static struct vtn_type * 969mutable_matrix_member(struct vtn_builder *b, struct vtn_type *type, int member) 970{ 971 type->members[member] = vtn_type_copy(b, type->members[member]); 972 type = type->members[member]; 973 974 /* We may have an array of matrices.... Oh, joy! */ 975 while (glsl_type_is_array(type->type)) { 976 type->array_element = vtn_type_copy(b, type->array_element); 977 type = type->array_element; 978 } 979 980 vtn_assert(glsl_type_is_matrix(type->type)); 981 982 return type; 983} 984 985static void 986vtn_handle_access_qualifier(struct vtn_builder *b, struct vtn_type *type, 987 int member, enum gl_access_qualifier access) 988{ 989 type->members[member] = vtn_type_copy(b, type->members[member]); 990 type = type->members[member]; 991 992 type->access |= access; 993} 994 995static void 996array_stride_decoration_cb(struct vtn_builder *b, 997 struct vtn_value *val, int member, 998 const struct vtn_decoration *dec, void *void_ctx) 999{ 1000 struct vtn_type *type = val->type; 1001 1002 if (dec->decoration == SpvDecorationArrayStride) { 1003 if (vtn_type_contains_block(b, type)) { 1004 vtn_warn("The ArrayStride decoration cannot be applied to an array " 1005 "type which contains a structure type decorated Block " 1006 "or BufferBlock"); 1007 /* Ignore the decoration */ 1008 } else { 1009 vtn_fail_if(dec->operands[0] == 0, "ArrayStride must be non-zero"); 1010 type->stride = dec->operands[0]; 1011 } 1012 } 1013} 1014 1015static void 1016struct_member_decoration_cb(struct vtn_builder *b, 1017 UNUSED struct vtn_value *val, int member, 1018 const struct vtn_decoration *dec, void *void_ctx) 1019{ 1020 struct member_decoration_ctx *ctx = void_ctx; 1021 1022 if (member < 0) 1023 return; 1024 1025 assert(member < ctx->num_fields); 1026 1027 switch (dec->decoration) { 1028 case SpvDecorationRelaxedPrecision: 1029 case SpvDecorationUniform: 1030 case SpvDecorationUniformId: 1031 break; /* FIXME: Do nothing with this for now. */ 1032 case SpvDecorationNonWritable: 1033 vtn_handle_access_qualifier(b, ctx->type, member, ACCESS_NON_WRITEABLE); 1034 break; 1035 case SpvDecorationNonReadable: 1036 vtn_handle_access_qualifier(b, ctx->type, member, ACCESS_NON_READABLE); 1037 break; 1038 case SpvDecorationVolatile: 1039 vtn_handle_access_qualifier(b, ctx->type, member, ACCESS_VOLATILE); 1040 break; 1041 case SpvDecorationCoherent: 1042 vtn_handle_access_qualifier(b, ctx->type, member, ACCESS_COHERENT); 1043 break; 1044 case SpvDecorationNoPerspective: 1045 ctx->fields[member].interpolation = INTERP_MODE_NOPERSPECTIVE; 1046 break; 1047 case SpvDecorationFlat: 1048 ctx->fields[member].interpolation = INTERP_MODE_FLAT; 1049 break; 1050 case SpvDecorationExplicitInterpAMD: 1051 ctx->fields[member].interpolation = INTERP_MODE_EXPLICIT; 1052 break; 1053 case SpvDecorationCentroid: 1054 ctx->fields[member].centroid = true; 1055 break; 1056 case SpvDecorationSample: 1057 ctx->fields[member].sample = true; 1058 break; 1059 case SpvDecorationStream: 1060 /* This is handled later by var_decoration_cb in vtn_variables.c */ 1061 break; 1062 case SpvDecorationLocation: 1063 ctx->fields[member].location = dec->operands[0]; 1064 break; 1065 case SpvDecorationComponent: 1066 break; /* FIXME: What should we do with these? */ 1067 case SpvDecorationBuiltIn: 1068 ctx->type->members[member] = vtn_type_copy(b, ctx->type->members[member]); 1069 ctx->type->members[member]->is_builtin = true; 1070 ctx->type->members[member]->builtin = dec->operands[0]; 1071 ctx->type->builtin_block = true; 1072 break; 1073 case SpvDecorationOffset: 1074 ctx->type->offsets[member] = dec->operands[0]; 1075 ctx->fields[member].offset = dec->operands[0]; 1076 break; 1077 case SpvDecorationMatrixStride: 1078 /* Handled as a second pass */ 1079 break; 1080 case SpvDecorationColMajor: 1081 break; /* Nothing to do here. Column-major is the default. */ 1082 case SpvDecorationRowMajor: 1083 mutable_matrix_member(b, ctx->type, member)->row_major = true; 1084 break; 1085 1086 case SpvDecorationPatch: 1087 case SpvDecorationPerPrimitiveNV: 1088 case SpvDecorationPerTaskNV: 1089 break; 1090 1091 case SpvDecorationSpecId: 1092 case SpvDecorationBlock: 1093 case SpvDecorationBufferBlock: 1094 case SpvDecorationArrayStride: 1095 case SpvDecorationGLSLShared: 1096 case SpvDecorationGLSLPacked: 1097 case SpvDecorationInvariant: 1098 case SpvDecorationRestrict: 1099 case SpvDecorationAliased: 1100 case SpvDecorationConstant: 1101 case SpvDecorationIndex: 1102 case SpvDecorationBinding: 1103 case SpvDecorationDescriptorSet: 1104 case SpvDecorationLinkageAttributes: 1105 case SpvDecorationNoContraction: 1106 case SpvDecorationInputAttachmentIndex: 1107 case SpvDecorationCPacked: 1108 vtn_warn("Decoration not allowed on struct members: %s", 1109 spirv_decoration_to_string(dec->decoration)); 1110 break; 1111 1112 case SpvDecorationXfbBuffer: 1113 case SpvDecorationXfbStride: 1114 /* This is handled later by var_decoration_cb in vtn_variables.c */ 1115 break; 1116 1117 case SpvDecorationSaturatedConversion: 1118 case SpvDecorationFuncParamAttr: 1119 case SpvDecorationFPRoundingMode: 1120 case SpvDecorationFPFastMathMode: 1121 case SpvDecorationAlignment: 1122 if (b->shader->info.stage != MESA_SHADER_KERNEL) { 1123 vtn_warn("Decoration only allowed for CL-style kernels: %s", 1124 spirv_decoration_to_string(dec->decoration)); 1125 } 1126 break; 1127 1128 case SpvDecorationUserSemantic: 1129 case SpvDecorationUserTypeGOOGLE: 1130 /* User semantic decorations can safely be ignored by the driver. */ 1131 break; 1132 1133 case SpvDecorationPerViewNV: 1134 /* TODO(mesh): Handle multiview. */ 1135 vtn_warn("Mesh multiview not yet supported. Needed for decoration PerViewNV."); 1136 break; 1137 1138 default: 1139 vtn_fail_with_decoration("Unhandled decoration", dec->decoration); 1140 } 1141} 1142 1143/** Chases the array type all the way down to the tail and rewrites the 1144 * glsl_types to be based off the tail's glsl_type. 1145 */ 1146static void 1147vtn_array_type_rewrite_glsl_type(struct vtn_type *type) 1148{ 1149 if (type->base_type != vtn_base_type_array) 1150 return; 1151 1152 vtn_array_type_rewrite_glsl_type(type->array_element); 1153 1154 type->type = glsl_array_type(type->array_element->type, 1155 type->length, type->stride); 1156} 1157 1158/* Matrix strides are handled as a separate pass because we need to know 1159 * whether the matrix is row-major or not first. 1160 */ 1161static void 1162struct_member_matrix_stride_cb(struct vtn_builder *b, 1163 UNUSED struct vtn_value *val, int member, 1164 const struct vtn_decoration *dec, 1165 void *void_ctx) 1166{ 1167 if (dec->decoration != SpvDecorationMatrixStride) 1168 return; 1169 1170 vtn_fail_if(member < 0, 1171 "The MatrixStride decoration is only allowed on members " 1172 "of OpTypeStruct"); 1173 vtn_fail_if(dec->operands[0] == 0, "MatrixStride must be non-zero"); 1174 1175 struct member_decoration_ctx *ctx = void_ctx; 1176 1177 struct vtn_type *mat_type = mutable_matrix_member(b, ctx->type, member); 1178 if (mat_type->row_major) { 1179 mat_type->array_element = vtn_type_copy(b, mat_type->array_element); 1180 mat_type->stride = mat_type->array_element->stride; 1181 mat_type->array_element->stride = dec->operands[0]; 1182 1183 mat_type->type = glsl_explicit_matrix_type(mat_type->type, 1184 dec->operands[0], true); 1185 mat_type->array_element->type = glsl_get_column_type(mat_type->type); 1186 } else { 1187 vtn_assert(mat_type->array_element->stride > 0); 1188 mat_type->stride = dec->operands[0]; 1189 1190 mat_type->type = glsl_explicit_matrix_type(mat_type->type, 1191 dec->operands[0], false); 1192 } 1193 1194 /* Now that we've replaced the glsl_type with a properly strided matrix 1195 * type, rewrite the member type so that it's an array of the proper kind 1196 * of glsl_type. 1197 */ 1198 vtn_array_type_rewrite_glsl_type(ctx->type->members[member]); 1199 ctx->fields[member].type = ctx->type->members[member]->type; 1200} 1201 1202static void 1203struct_packed_decoration_cb(struct vtn_builder *b, 1204 struct vtn_value *val, int member, 1205 const struct vtn_decoration *dec, void *void_ctx) 1206{ 1207 vtn_assert(val->type->base_type == vtn_base_type_struct); 1208 if (dec->decoration == SpvDecorationCPacked) { 1209 if (b->shader->info.stage != MESA_SHADER_KERNEL) { 1210 vtn_warn("Decoration only allowed for CL-style kernels: %s", 1211 spirv_decoration_to_string(dec->decoration)); 1212 } 1213 val->type->packed = true; 1214 } 1215} 1216 1217static void 1218struct_block_decoration_cb(struct vtn_builder *b, 1219 struct vtn_value *val, int member, 1220 const struct vtn_decoration *dec, void *ctx) 1221{ 1222 if (member != -1) 1223 return; 1224 1225 struct vtn_type *type = val->type; 1226 if (dec->decoration == SpvDecorationBlock) 1227 type->block = true; 1228 else if (dec->decoration == SpvDecorationBufferBlock) 1229 type->buffer_block = true; 1230} 1231 1232static void 1233type_decoration_cb(struct vtn_builder *b, 1234 struct vtn_value *val, int member, 1235 const struct vtn_decoration *dec, UNUSED void *ctx) 1236{ 1237 struct vtn_type *type = val->type; 1238 1239 if (member != -1) { 1240 /* This should have been handled by OpTypeStruct */ 1241 assert(val->type->base_type == vtn_base_type_struct); 1242 assert(member >= 0 && member < val->type->length); 1243 return; 1244 } 1245 1246 switch (dec->decoration) { 1247 case SpvDecorationArrayStride: 1248 vtn_assert(type->base_type == vtn_base_type_array || 1249 type->base_type == vtn_base_type_pointer); 1250 break; 1251 case SpvDecorationBlock: 1252 vtn_assert(type->base_type == vtn_base_type_struct); 1253 vtn_assert(type->block); 1254 break; 1255 case SpvDecorationBufferBlock: 1256 vtn_assert(type->base_type == vtn_base_type_struct); 1257 vtn_assert(type->buffer_block); 1258 break; 1259 case SpvDecorationGLSLShared: 1260 case SpvDecorationGLSLPacked: 1261 /* Ignore these, since we get explicit offsets anyways */ 1262 break; 1263 1264 case SpvDecorationRowMajor: 1265 case SpvDecorationColMajor: 1266 case SpvDecorationMatrixStride: 1267 case SpvDecorationBuiltIn: 1268 case SpvDecorationNoPerspective: 1269 case SpvDecorationFlat: 1270 case SpvDecorationPatch: 1271 case SpvDecorationCentroid: 1272 case SpvDecorationSample: 1273 case SpvDecorationExplicitInterpAMD: 1274 case SpvDecorationVolatile: 1275 case SpvDecorationCoherent: 1276 case SpvDecorationNonWritable: 1277 case SpvDecorationNonReadable: 1278 case SpvDecorationUniform: 1279 case SpvDecorationUniformId: 1280 case SpvDecorationLocation: 1281 case SpvDecorationComponent: 1282 case SpvDecorationOffset: 1283 case SpvDecorationXfbBuffer: 1284 case SpvDecorationXfbStride: 1285 case SpvDecorationUserSemantic: 1286 vtn_warn("Decoration only allowed for struct members: %s", 1287 spirv_decoration_to_string(dec->decoration)); 1288 break; 1289 1290 case SpvDecorationStream: 1291 /* We don't need to do anything here, as stream is filled up when 1292 * aplying the decoration to a variable, just check that if it is not a 1293 * struct member, it should be a struct. 1294 */ 1295 vtn_assert(type->base_type == vtn_base_type_struct); 1296 break; 1297 1298 case SpvDecorationRelaxedPrecision: 1299 case SpvDecorationSpecId: 1300 case SpvDecorationInvariant: 1301 case SpvDecorationRestrict: 1302 case SpvDecorationAliased: 1303 case SpvDecorationConstant: 1304 case SpvDecorationIndex: 1305 case SpvDecorationBinding: 1306 case SpvDecorationDescriptorSet: 1307 case SpvDecorationLinkageAttributes: 1308 case SpvDecorationNoContraction: 1309 case SpvDecorationInputAttachmentIndex: 1310 vtn_warn("Decoration not allowed on types: %s", 1311 spirv_decoration_to_string(dec->decoration)); 1312 break; 1313 1314 case SpvDecorationCPacked: 1315 /* Handled when parsing a struct type, nothing to do here. */ 1316 break; 1317 1318 case SpvDecorationSaturatedConversion: 1319 case SpvDecorationFuncParamAttr: 1320 case SpvDecorationFPRoundingMode: 1321 case SpvDecorationFPFastMathMode: 1322 case SpvDecorationAlignment: 1323 vtn_warn("Decoration only allowed for CL-style kernels: %s", 1324 spirv_decoration_to_string(dec->decoration)); 1325 break; 1326 1327 case SpvDecorationUserTypeGOOGLE: 1328 /* User semantic decorations can safely be ignored by the driver. */ 1329 break; 1330 1331 default: 1332 vtn_fail_with_decoration("Unhandled decoration", dec->decoration); 1333 } 1334} 1335 1336static unsigned 1337translate_image_format(struct vtn_builder *b, SpvImageFormat format) 1338{ 1339 switch (format) { 1340 case SpvImageFormatUnknown: return PIPE_FORMAT_NONE; 1341 case SpvImageFormatRgba32f: return PIPE_FORMAT_R32G32B32A32_FLOAT; 1342 case SpvImageFormatRgba16f: return PIPE_FORMAT_R16G16B16A16_FLOAT; 1343 case SpvImageFormatR32f: return PIPE_FORMAT_R32_FLOAT; 1344 case SpvImageFormatRgba8: return PIPE_FORMAT_R8G8B8A8_UNORM; 1345 case SpvImageFormatRgba8Snorm: return PIPE_FORMAT_R8G8B8A8_SNORM; 1346 case SpvImageFormatRg32f: return PIPE_FORMAT_R32G32_FLOAT; 1347 case SpvImageFormatRg16f: return PIPE_FORMAT_R16G16_FLOAT; 1348 case SpvImageFormatR11fG11fB10f: return PIPE_FORMAT_R11G11B10_FLOAT; 1349 case SpvImageFormatR16f: return PIPE_FORMAT_R16_FLOAT; 1350 case SpvImageFormatRgba16: return PIPE_FORMAT_R16G16B16A16_UNORM; 1351 case SpvImageFormatRgb10A2: return PIPE_FORMAT_R10G10B10A2_UNORM; 1352 case SpvImageFormatRg16: return PIPE_FORMAT_R16G16_UNORM; 1353 case SpvImageFormatRg8: return PIPE_FORMAT_R8G8_UNORM; 1354 case SpvImageFormatR16: return PIPE_FORMAT_R16_UNORM; 1355 case SpvImageFormatR8: return PIPE_FORMAT_R8_UNORM; 1356 case SpvImageFormatRgba16Snorm: return PIPE_FORMAT_R16G16B16A16_SNORM; 1357 case SpvImageFormatRg16Snorm: return PIPE_FORMAT_R16G16_SNORM; 1358 case SpvImageFormatRg8Snorm: return PIPE_FORMAT_R8G8_SNORM; 1359 case SpvImageFormatR16Snorm: return PIPE_FORMAT_R16_SNORM; 1360 case SpvImageFormatR8Snorm: return PIPE_FORMAT_R8_SNORM; 1361 case SpvImageFormatRgba32i: return PIPE_FORMAT_R32G32B32A32_SINT; 1362 case SpvImageFormatRgba16i: return PIPE_FORMAT_R16G16B16A16_SINT; 1363 case SpvImageFormatRgba8i: return PIPE_FORMAT_R8G8B8A8_SINT; 1364 case SpvImageFormatR32i: return PIPE_FORMAT_R32_SINT; 1365 case SpvImageFormatRg32i: return PIPE_FORMAT_R32G32_SINT; 1366 case SpvImageFormatRg16i: return PIPE_FORMAT_R16G16_SINT; 1367 case SpvImageFormatRg8i: return PIPE_FORMAT_R8G8_SINT; 1368 case SpvImageFormatR16i: return PIPE_FORMAT_R16_SINT; 1369 case SpvImageFormatR8i: return PIPE_FORMAT_R8_SINT; 1370 case SpvImageFormatRgba32ui: return PIPE_FORMAT_R32G32B32A32_UINT; 1371 case SpvImageFormatRgba16ui: return PIPE_FORMAT_R16G16B16A16_UINT; 1372 case SpvImageFormatRgba8ui: return PIPE_FORMAT_R8G8B8A8_UINT; 1373 case SpvImageFormatR32ui: return PIPE_FORMAT_R32_UINT; 1374 case SpvImageFormatRgb10a2ui: return PIPE_FORMAT_R10G10B10A2_UINT; 1375 case SpvImageFormatRg32ui: return PIPE_FORMAT_R32G32_UINT; 1376 case SpvImageFormatRg16ui: return PIPE_FORMAT_R16G16_UINT; 1377 case SpvImageFormatRg8ui: return PIPE_FORMAT_R8G8_UINT; 1378 case SpvImageFormatR16ui: return PIPE_FORMAT_R16_UINT; 1379 case SpvImageFormatR8ui: return PIPE_FORMAT_R8_UINT; 1380 case SpvImageFormatR64ui: return PIPE_FORMAT_R64_UINT; 1381 case SpvImageFormatR64i: return PIPE_FORMAT_R64_SINT; 1382 default: 1383 vtn_fail("Invalid image format: %s (%u)", 1384 spirv_imageformat_to_string(format), format); 1385 } 1386} 1387 1388static void 1389vtn_handle_type(struct vtn_builder *b, SpvOp opcode, 1390 const uint32_t *w, unsigned count) 1391{ 1392 struct vtn_value *val = NULL; 1393 1394 /* In order to properly handle forward declarations, we have to defer 1395 * allocation for pointer types. 1396 */ 1397 if (opcode != SpvOpTypePointer && opcode != SpvOpTypeForwardPointer) { 1398 val = vtn_push_value(b, w[1], vtn_value_type_type); 1399 vtn_fail_if(val->type != NULL, 1400 "Only pointers can have forward declarations"); 1401 val->type = rzalloc(b, struct vtn_type); 1402 val->type->id = w[1]; 1403 } 1404 1405 switch (opcode) { 1406 case SpvOpTypeVoid: 1407 val->type->base_type = vtn_base_type_void; 1408 val->type->type = glsl_void_type(); 1409 break; 1410 case SpvOpTypeBool: 1411 val->type->base_type = vtn_base_type_scalar; 1412 val->type->type = glsl_bool_type(); 1413 val->type->length = 1; 1414 break; 1415 case SpvOpTypeInt: { 1416 int bit_size = w[2]; 1417 const bool signedness = w[3]; 1418 vtn_fail_if(bit_size != 8 && bit_size != 16 && 1419 bit_size != 32 && bit_size != 64, 1420 "Invalid int bit size: %u", bit_size); 1421 val->type->base_type = vtn_base_type_scalar; 1422 val->type->type = signedness ? glsl_intN_t_type(bit_size) : 1423 glsl_uintN_t_type(bit_size); 1424 val->type->length = 1; 1425 break; 1426 } 1427 1428 case SpvOpTypeFloat: { 1429 int bit_size = w[2]; 1430 val->type->base_type = vtn_base_type_scalar; 1431 vtn_fail_if(bit_size != 16 && bit_size != 32 && bit_size != 64, 1432 "Invalid float bit size: %u", bit_size); 1433 val->type->type = glsl_floatN_t_type(bit_size); 1434 val->type->length = 1; 1435 break; 1436 } 1437 1438 case SpvOpTypeVector: { 1439 struct vtn_type *base = vtn_get_type(b, w[2]); 1440 unsigned elems = w[3]; 1441 1442 vtn_fail_if(base->base_type != vtn_base_type_scalar, 1443 "Base type for OpTypeVector must be a scalar"); 1444 vtn_fail_if((elems < 2 || elems > 4) && (elems != 8) && (elems != 16), 1445 "Invalid component count for OpTypeVector"); 1446 1447 val->type->base_type = vtn_base_type_vector; 1448 val->type->type = glsl_vector_type(glsl_get_base_type(base->type), elems); 1449 val->type->length = elems; 1450 val->type->stride = glsl_type_is_boolean(val->type->type) 1451 ? 4 : glsl_get_bit_size(base->type) / 8; 1452 val->type->array_element = base; 1453 break; 1454 } 1455 1456 case SpvOpTypeMatrix: { 1457 struct vtn_type *base = vtn_get_type(b, w[2]); 1458 unsigned columns = w[3]; 1459 1460 vtn_fail_if(base->base_type != vtn_base_type_vector, 1461 "Base type for OpTypeMatrix must be a vector"); 1462 vtn_fail_if(columns < 2 || columns > 4, 1463 "Invalid column count for OpTypeMatrix"); 1464 1465 val->type->base_type = vtn_base_type_matrix; 1466 val->type->type = glsl_matrix_type(glsl_get_base_type(base->type), 1467 glsl_get_vector_elements(base->type), 1468 columns); 1469 vtn_fail_if(glsl_type_is_error(val->type->type), 1470 "Unsupported base type for OpTypeMatrix"); 1471 assert(!glsl_type_is_error(val->type->type)); 1472 val->type->length = columns; 1473 val->type->array_element = base; 1474 val->type->row_major = false; 1475 val->type->stride = 0; 1476 break; 1477 } 1478 1479 case SpvOpTypeRuntimeArray: 1480 case SpvOpTypeArray: { 1481 struct vtn_type *array_element = vtn_get_type(b, w[2]); 1482 1483 if (opcode == SpvOpTypeRuntimeArray) { 1484 /* A length of 0 is used to denote unsized arrays */ 1485 val->type->length = 0; 1486 } else { 1487 val->type->length = vtn_constant_uint(b, w[3]); 1488 } 1489 1490 val->type->base_type = vtn_base_type_array; 1491 val->type->array_element = array_element; 1492 1493 vtn_foreach_decoration(b, val, array_stride_decoration_cb, NULL); 1494 val->type->type = glsl_array_type(array_element->type, val->type->length, 1495 val->type->stride); 1496 break; 1497 } 1498 1499 case SpvOpTypeStruct: { 1500 unsigned num_fields = count - 2; 1501 val->type->base_type = vtn_base_type_struct; 1502 val->type->length = num_fields; 1503 val->type->members = ralloc_array(b, struct vtn_type *, num_fields); 1504 val->type->offsets = ralloc_array(b, unsigned, num_fields); 1505 val->type->packed = false; 1506 1507 NIR_VLA(struct glsl_struct_field, fields, count); 1508 for (unsigned i = 0; i < num_fields; i++) { 1509 val->type->members[i] = vtn_get_type(b, w[i + 2]); 1510 fields[i] = (struct glsl_struct_field) { 1511 .type = val->type->members[i]->type, 1512 .name = ralloc_asprintf(b, "field%d", i), 1513 .location = -1, 1514 .offset = -1, 1515 }; 1516 } 1517 1518 vtn_foreach_decoration(b, val, struct_packed_decoration_cb, NULL); 1519 1520 struct member_decoration_ctx ctx = { 1521 .num_fields = num_fields, 1522 .fields = fields, 1523 .type = val->type 1524 }; 1525 1526 vtn_foreach_decoration(b, val, struct_member_decoration_cb, &ctx); 1527 1528 /* Propagate access specifiers that are present on all members to the overall type */ 1529 enum gl_access_qualifier overall_access = ACCESS_COHERENT | ACCESS_VOLATILE | 1530 ACCESS_NON_READABLE | ACCESS_NON_WRITEABLE; 1531 for (unsigned i = 0; i < num_fields; ++i) 1532 overall_access &= val->type->members[i]->access; 1533 val->type->access = overall_access; 1534 1535 vtn_foreach_decoration(b, val, struct_member_matrix_stride_cb, &ctx); 1536 1537 vtn_foreach_decoration(b, val, struct_block_decoration_cb, NULL); 1538 1539 const char *name = val->name; 1540 1541 if (val->type->block || val->type->buffer_block) { 1542 /* Packing will be ignored since types coming from SPIR-V are 1543 * explicitly laid out. 1544 */ 1545 val->type->type = glsl_interface_type(fields, num_fields, 1546 /* packing */ 0, false, 1547 name ? name : "block"); 1548 } else { 1549 val->type->type = glsl_struct_type(fields, num_fields, 1550 name ? name : "struct", 1551 val->type->packed); 1552 } 1553 break; 1554 } 1555 1556 case SpvOpTypeFunction: { 1557 val->type->base_type = vtn_base_type_function; 1558 val->type->type = NULL; 1559 1560 val->type->return_type = vtn_get_type(b, w[2]); 1561 1562 const unsigned num_params = count - 3; 1563 val->type->length = num_params; 1564 val->type->params = ralloc_array(b, struct vtn_type *, num_params); 1565 for (unsigned i = 0; i < count - 3; i++) { 1566 val->type->params[i] = vtn_get_type(b, w[i + 3]); 1567 } 1568 break; 1569 } 1570 1571 case SpvOpTypePointer: 1572 case SpvOpTypeForwardPointer: { 1573 /* We can't blindly push the value because it might be a forward 1574 * declaration. 1575 */ 1576 val = vtn_untyped_value(b, w[1]); 1577 1578 SpvStorageClass storage_class = w[2]; 1579 1580 vtn_fail_if(opcode == SpvOpTypeForwardPointer && 1581 b->shader->info.stage != MESA_SHADER_KERNEL && 1582 storage_class != SpvStorageClassPhysicalStorageBuffer, 1583 "OpTypeForwardPointer is only allowed in Vulkan with " 1584 "the PhysicalStorageBuffer storage class"); 1585 1586 struct vtn_type *deref_type = NULL; 1587 if (opcode == SpvOpTypePointer) 1588 deref_type = vtn_get_type(b, w[3]); 1589 1590 if (val->value_type == vtn_value_type_invalid) { 1591 val->value_type = vtn_value_type_type; 1592 val->type = rzalloc(b, struct vtn_type); 1593 val->type->id = w[1]; 1594 val->type->base_type = vtn_base_type_pointer; 1595 val->type->storage_class = storage_class; 1596 1597 /* These can actually be stored to nir_variables and used as SSA 1598 * values so they need a real glsl_type. 1599 */ 1600 enum vtn_variable_mode mode = vtn_storage_class_to_mode( 1601 b, storage_class, deref_type, NULL); 1602 1603 /* The deref type should only matter for the UniformConstant storage 1604 * class. In particular, it should never matter for any storage 1605 * classes that are allowed in combination with OpTypeForwardPointer. 1606 */ 1607 if (storage_class != SpvStorageClassUniform && 1608 storage_class != SpvStorageClassUniformConstant) { 1609 assert(mode == vtn_storage_class_to_mode(b, storage_class, 1610 NULL, NULL)); 1611 } 1612 1613 val->type->type = nir_address_format_to_glsl_type( 1614 vtn_mode_to_address_format(b, mode)); 1615 } else { 1616 vtn_fail_if(val->type->storage_class != storage_class, 1617 "The storage classes of an OpTypePointer and any " 1618 "OpTypeForwardPointers that provide forward " 1619 "declarations of it must match."); 1620 } 1621 1622 if (opcode == SpvOpTypePointer) { 1623 vtn_fail_if(val->type->deref != NULL, 1624 "While OpTypeForwardPointer can be used to provide a " 1625 "forward declaration of a pointer, OpTypePointer can " 1626 "only be used once for a given id."); 1627 1628 val->type->deref = deref_type; 1629 1630 /* Only certain storage classes use ArrayStride. */ 1631 switch (storage_class) { 1632 case SpvStorageClassWorkgroup: 1633 if (!b->options->caps.workgroup_memory_explicit_layout) 1634 break; 1635 FALLTHROUGH; 1636 1637 case SpvStorageClassUniform: 1638 case SpvStorageClassPushConstant: 1639 case SpvStorageClassStorageBuffer: 1640 case SpvStorageClassPhysicalStorageBuffer: 1641 vtn_foreach_decoration(b, val, array_stride_decoration_cb, NULL); 1642 break; 1643 1644 default: 1645 /* Nothing to do. */ 1646 break; 1647 } 1648 } 1649 break; 1650 } 1651 1652 case SpvOpTypeImage: { 1653 val->type->base_type = vtn_base_type_image; 1654 1655 /* Images are represented in NIR as a scalar SSA value that is the 1656 * result of a deref instruction. An OpLoad on an OpTypeImage pointer 1657 * from UniformConstant memory just takes the NIR deref from the pointer 1658 * and turns it into an SSA value. 1659 */ 1660 val->type->type = nir_address_format_to_glsl_type( 1661 vtn_mode_to_address_format(b, vtn_variable_mode_function)); 1662 1663 const struct vtn_type *sampled_type = vtn_get_type(b, w[2]); 1664 if (b->shader->info.stage == MESA_SHADER_KERNEL) { 1665 vtn_fail_if(sampled_type->base_type != vtn_base_type_void, 1666 "Sampled type of OpTypeImage must be void for kernels"); 1667 } else { 1668 vtn_fail_if(sampled_type->base_type != vtn_base_type_scalar, 1669 "Sampled type of OpTypeImage must be a scalar"); 1670 if (b->options->caps.image_atomic_int64) { 1671 vtn_fail_if(glsl_get_bit_size(sampled_type->type) != 32 && 1672 glsl_get_bit_size(sampled_type->type) != 64, 1673 "Sampled type of OpTypeImage must be a 32 or 64-bit " 1674 "scalar"); 1675 } else { 1676 vtn_fail_if(glsl_get_bit_size(sampled_type->type) != 32, 1677 "Sampled type of OpTypeImage must be a 32-bit scalar"); 1678 } 1679 } 1680 1681 enum glsl_sampler_dim dim; 1682 switch ((SpvDim)w[3]) { 1683 case SpvDim1D: dim = GLSL_SAMPLER_DIM_1D; break; 1684 case SpvDim2D: dim = GLSL_SAMPLER_DIM_2D; break; 1685 case SpvDim3D: dim = GLSL_SAMPLER_DIM_3D; break; 1686 case SpvDimCube: dim = GLSL_SAMPLER_DIM_CUBE; break; 1687 case SpvDimRect: dim = GLSL_SAMPLER_DIM_RECT; break; 1688 case SpvDimBuffer: dim = GLSL_SAMPLER_DIM_BUF; break; 1689 case SpvDimSubpassData: dim = GLSL_SAMPLER_DIM_SUBPASS; break; 1690 default: 1691 vtn_fail("Invalid SPIR-V image dimensionality: %s (%u)", 1692 spirv_dim_to_string((SpvDim)w[3]), w[3]); 1693 } 1694 1695 /* w[4]: as per Vulkan spec "Validation Rules within a Module", 1696 * The “Depth” operand of OpTypeImage is ignored. 1697 */ 1698 bool is_array = w[5]; 1699 bool multisampled = w[6]; 1700 unsigned sampled = w[7]; 1701 SpvImageFormat format = w[8]; 1702 1703 if (count > 9) 1704 val->type->access_qualifier = w[9]; 1705 else if (b->shader->info.stage == MESA_SHADER_KERNEL) 1706 /* Per the CL C spec: If no qualifier is provided, read_only is assumed. */ 1707 val->type->access_qualifier = SpvAccessQualifierReadOnly; 1708 else 1709 val->type->access_qualifier = SpvAccessQualifierReadWrite; 1710 1711 if (multisampled) { 1712 if (dim == GLSL_SAMPLER_DIM_2D) 1713 dim = GLSL_SAMPLER_DIM_MS; 1714 else if (dim == GLSL_SAMPLER_DIM_SUBPASS) 1715 dim = GLSL_SAMPLER_DIM_SUBPASS_MS; 1716 else 1717 vtn_fail("Unsupported multisampled image type"); 1718 } 1719 1720 val->type->image_format = translate_image_format(b, format); 1721 1722 enum glsl_base_type sampled_base_type = 1723 glsl_get_base_type(sampled_type->type); 1724 if (sampled == 1) { 1725 val->type->glsl_image = glsl_sampler_type(dim, false, is_array, 1726 sampled_base_type); 1727 } else if (sampled == 2) { 1728 val->type->glsl_image = glsl_image_type(dim, is_array, 1729 sampled_base_type); 1730 } else if (b->shader->info.stage == MESA_SHADER_KERNEL) { 1731 val->type->glsl_image = glsl_image_type(dim, is_array, 1732 GLSL_TYPE_VOID); 1733 } else { 1734 vtn_fail("We need to know if the image will be sampled"); 1735 } 1736 break; 1737 } 1738 1739 case SpvOpTypeSampledImage: { 1740 val->type->base_type = vtn_base_type_sampled_image; 1741 val->type->image = vtn_get_type(b, w[2]); 1742 1743 /* Sampled images are represented NIR as a vec2 SSA value where each 1744 * component is the result of a deref instruction. The first component 1745 * is the image and the second is the sampler. An OpLoad on an 1746 * OpTypeSampledImage pointer from UniformConstant memory just takes 1747 * the NIR deref from the pointer and duplicates it to both vector 1748 * components. 1749 */ 1750 nir_address_format addr_format = 1751 vtn_mode_to_address_format(b, vtn_variable_mode_function); 1752 assert(nir_address_format_num_components(addr_format) == 1); 1753 unsigned bit_size = nir_address_format_bit_size(addr_format); 1754 assert(bit_size == 32 || bit_size == 64); 1755 1756 enum glsl_base_type base_type = 1757 bit_size == 32 ? GLSL_TYPE_UINT : GLSL_TYPE_UINT64; 1758 val->type->type = glsl_vector_type(base_type, 2); 1759 break; 1760 } 1761 1762 case SpvOpTypeSampler: 1763 val->type->base_type = vtn_base_type_sampler; 1764 1765 /* Samplers are represented in NIR as a scalar SSA value that is the 1766 * result of a deref instruction. An OpLoad on an OpTypeSampler pointer 1767 * from UniformConstant memory just takes the NIR deref from the pointer 1768 * and turns it into an SSA value. 1769 */ 1770 val->type->type = nir_address_format_to_glsl_type( 1771 vtn_mode_to_address_format(b, vtn_variable_mode_function)); 1772 break; 1773 1774 case SpvOpTypeAccelerationStructureKHR: 1775 val->type->base_type = vtn_base_type_accel_struct; 1776 val->type->type = glsl_uint64_t_type(); 1777 break; 1778 1779 case SpvOpTypeOpaque: 1780 val->type->base_type = vtn_base_type_struct; 1781 const char *name = vtn_string_literal(b, &w[2], count - 2, NULL); 1782 val->type->type = glsl_struct_type(NULL, 0, name, false); 1783 break; 1784 1785 case SpvOpTypeEvent: 1786 val->type->base_type = vtn_base_type_event; 1787 val->type->type = glsl_int_type(); 1788 break; 1789 1790 case SpvOpTypeDeviceEvent: 1791 case SpvOpTypeReserveId: 1792 case SpvOpTypeQueue: 1793 case SpvOpTypePipe: 1794 default: 1795 vtn_fail_with_opcode("Unhandled opcode", opcode); 1796 } 1797 1798 vtn_foreach_decoration(b, val, type_decoration_cb, NULL); 1799 1800 if (val->type->base_type == vtn_base_type_struct && 1801 (val->type->block || val->type->buffer_block)) { 1802 for (unsigned i = 0; i < val->type->length; i++) { 1803 vtn_fail_if(vtn_type_contains_block(b, val->type->members[i]), 1804 "Block and BufferBlock decorations cannot decorate a " 1805 "structure type that is nested at any level inside " 1806 "another structure type decorated with Block or " 1807 "BufferBlock."); 1808 } 1809 } 1810} 1811 1812static nir_constant * 1813vtn_null_constant(struct vtn_builder *b, struct vtn_type *type) 1814{ 1815 nir_constant *c = rzalloc(b, nir_constant); 1816 1817 switch (type->base_type) { 1818 case vtn_base_type_scalar: 1819 case vtn_base_type_vector: 1820 /* Nothing to do here. It's already initialized to zero */ 1821 break; 1822 1823 case vtn_base_type_pointer: { 1824 enum vtn_variable_mode mode = vtn_storage_class_to_mode( 1825 b, type->storage_class, type->deref, NULL); 1826 nir_address_format addr_format = vtn_mode_to_address_format(b, mode); 1827 1828 const nir_const_value *null_value = nir_address_format_null_value(addr_format); 1829 memcpy(c->values, null_value, 1830 sizeof(nir_const_value) * nir_address_format_num_components(addr_format)); 1831 break; 1832 } 1833 1834 case vtn_base_type_void: 1835 case vtn_base_type_image: 1836 case vtn_base_type_sampler: 1837 case vtn_base_type_sampled_image: 1838 case vtn_base_type_function: 1839 case vtn_base_type_event: 1840 /* For those we have to return something but it doesn't matter what. */ 1841 break; 1842 1843 case vtn_base_type_matrix: 1844 case vtn_base_type_array: 1845 vtn_assert(type->length > 0); 1846 c->num_elements = type->length; 1847 c->elements = ralloc_array(b, nir_constant *, c->num_elements); 1848 1849 c->elements[0] = vtn_null_constant(b, type->array_element); 1850 for (unsigned i = 1; i < c->num_elements; i++) 1851 c->elements[i] = c->elements[0]; 1852 break; 1853 1854 case vtn_base_type_struct: 1855 c->num_elements = type->length; 1856 c->elements = ralloc_array(b, nir_constant *, c->num_elements); 1857 for (unsigned i = 0; i < c->num_elements; i++) 1858 c->elements[i] = vtn_null_constant(b, type->members[i]); 1859 break; 1860 1861 default: 1862 vtn_fail("Invalid type for null constant"); 1863 } 1864 1865 return c; 1866} 1867 1868static void 1869spec_constant_decoration_cb(struct vtn_builder *b, UNUSED struct vtn_value *val, 1870 ASSERTED int member, 1871 const struct vtn_decoration *dec, void *data) 1872{ 1873 vtn_assert(member == -1); 1874 if (dec->decoration != SpvDecorationSpecId) 1875 return; 1876 1877 nir_const_value *value = data; 1878 for (unsigned i = 0; i < b->num_specializations; i++) { 1879 if (b->specializations[i].id == dec->operands[0]) { 1880 *value = b->specializations[i].value; 1881 return; 1882 } 1883 } 1884} 1885 1886static void 1887handle_workgroup_size_decoration_cb(struct vtn_builder *b, 1888 struct vtn_value *val, 1889 ASSERTED int member, 1890 const struct vtn_decoration *dec, 1891 UNUSED void *data) 1892{ 1893 vtn_assert(member == -1); 1894 if (dec->decoration != SpvDecorationBuiltIn || 1895 dec->operands[0] != SpvBuiltInWorkgroupSize) 1896 return; 1897 1898 vtn_assert(val->type->type == glsl_vector_type(GLSL_TYPE_UINT, 3)); 1899 b->workgroup_size_builtin = val; 1900} 1901 1902static void 1903vtn_handle_constant(struct vtn_builder *b, SpvOp opcode, 1904 const uint32_t *w, unsigned count) 1905{ 1906 struct vtn_value *val = vtn_push_value(b, w[2], vtn_value_type_constant); 1907 val->constant = rzalloc(b, nir_constant); 1908 switch (opcode) { 1909 case SpvOpConstantTrue: 1910 case SpvOpConstantFalse: 1911 case SpvOpSpecConstantTrue: 1912 case SpvOpSpecConstantFalse: { 1913 vtn_fail_if(val->type->type != glsl_bool_type(), 1914 "Result type of %s must be OpTypeBool", 1915 spirv_op_to_string(opcode)); 1916 1917 bool bval = (opcode == SpvOpConstantTrue || 1918 opcode == SpvOpSpecConstantTrue); 1919 1920 nir_const_value u32val = nir_const_value_for_uint(bval, 32); 1921 1922 if (opcode == SpvOpSpecConstantTrue || 1923 opcode == SpvOpSpecConstantFalse) 1924 vtn_foreach_decoration(b, val, spec_constant_decoration_cb, &u32val); 1925 1926 val->constant->values[0].b = u32val.u32 != 0; 1927 break; 1928 } 1929 1930 case SpvOpConstant: 1931 case SpvOpSpecConstant: { 1932 vtn_fail_if(val->type->base_type != vtn_base_type_scalar, 1933 "Result type of %s must be a scalar", 1934 spirv_op_to_string(opcode)); 1935 int bit_size = glsl_get_bit_size(val->type->type); 1936 switch (bit_size) { 1937 case 64: 1938 val->constant->values[0].u64 = vtn_u64_literal(&w[3]); 1939 break; 1940 case 32: 1941 val->constant->values[0].u32 = w[3]; 1942 break; 1943 case 16: 1944 val->constant->values[0].u16 = w[3]; 1945 break; 1946 case 8: 1947 val->constant->values[0].u8 = w[3]; 1948 break; 1949 default: 1950 vtn_fail("Unsupported SpvOpConstant bit size: %u", bit_size); 1951 } 1952 1953 if (opcode == SpvOpSpecConstant) 1954 vtn_foreach_decoration(b, val, spec_constant_decoration_cb, 1955 &val->constant->values[0]); 1956 break; 1957 } 1958 1959 case SpvOpSpecConstantComposite: 1960 case SpvOpConstantComposite: { 1961 unsigned elem_count = count - 3; 1962 vtn_fail_if(elem_count != val->type->length, 1963 "%s has %u constituents, expected %u", 1964 spirv_op_to_string(opcode), elem_count, val->type->length); 1965 1966 nir_constant **elems = ralloc_array(b, nir_constant *, elem_count); 1967 val->is_undef_constant = true; 1968 for (unsigned i = 0; i < elem_count; i++) { 1969 struct vtn_value *elem_val = vtn_untyped_value(b, w[i + 3]); 1970 1971 if (elem_val->value_type == vtn_value_type_constant) { 1972 elems[i] = elem_val->constant; 1973 val->is_undef_constant = val->is_undef_constant && 1974 elem_val->is_undef_constant; 1975 } else { 1976 vtn_fail_if(elem_val->value_type != vtn_value_type_undef, 1977 "only constants or undefs allowed for " 1978 "SpvOpConstantComposite"); 1979 /* to make it easier, just insert a NULL constant for now */ 1980 elems[i] = vtn_null_constant(b, elem_val->type); 1981 } 1982 } 1983 1984 switch (val->type->base_type) { 1985 case vtn_base_type_vector: { 1986 assert(glsl_type_is_vector(val->type->type)); 1987 for (unsigned i = 0; i < elem_count; i++) 1988 val->constant->values[i] = elems[i]->values[0]; 1989 break; 1990 } 1991 1992 case vtn_base_type_matrix: 1993 case vtn_base_type_struct: 1994 case vtn_base_type_array: 1995 ralloc_steal(val->constant, elems); 1996 val->constant->num_elements = elem_count; 1997 val->constant->elements = elems; 1998 break; 1999 2000 default: 2001 vtn_fail("Result type of %s must be a composite type", 2002 spirv_op_to_string(opcode)); 2003 } 2004 break; 2005 } 2006 2007 case SpvOpSpecConstantOp: { 2008 nir_const_value u32op = nir_const_value_for_uint(w[3], 32); 2009 vtn_foreach_decoration(b, val, spec_constant_decoration_cb, &u32op); 2010 SpvOp opcode = u32op.u32; 2011 switch (opcode) { 2012 case SpvOpVectorShuffle: { 2013 struct vtn_value *v0 = &b->values[w[4]]; 2014 struct vtn_value *v1 = &b->values[w[5]]; 2015 2016 vtn_assert(v0->value_type == vtn_value_type_constant || 2017 v0->value_type == vtn_value_type_undef); 2018 vtn_assert(v1->value_type == vtn_value_type_constant || 2019 v1->value_type == vtn_value_type_undef); 2020 2021 unsigned len0 = glsl_get_vector_elements(v0->type->type); 2022 unsigned len1 = glsl_get_vector_elements(v1->type->type); 2023 2024 vtn_assert(len0 + len1 < 16); 2025 2026 unsigned bit_size = glsl_get_bit_size(val->type->type); 2027 unsigned bit_size0 = glsl_get_bit_size(v0->type->type); 2028 unsigned bit_size1 = glsl_get_bit_size(v1->type->type); 2029 2030 vtn_assert(bit_size == bit_size0 && bit_size == bit_size1); 2031 (void)bit_size0; (void)bit_size1; 2032 2033 nir_const_value undef = { .u64 = 0xdeadbeefdeadbeef }; 2034 nir_const_value combined[NIR_MAX_VEC_COMPONENTS * 2]; 2035 2036 if (v0->value_type == vtn_value_type_constant) { 2037 for (unsigned i = 0; i < len0; i++) 2038 combined[i] = v0->constant->values[i]; 2039 } 2040 if (v1->value_type == vtn_value_type_constant) { 2041 for (unsigned i = 0; i < len1; i++) 2042 combined[len0 + i] = v1->constant->values[i]; 2043 } 2044 2045 for (unsigned i = 0, j = 0; i < count - 6; i++, j++) { 2046 uint32_t comp = w[i + 6]; 2047 if (comp == (uint32_t)-1) { 2048 /* If component is not used, set the value to a known constant 2049 * to detect if it is wrongly used. 2050 */ 2051 val->constant->values[j] = undef; 2052 } else { 2053 vtn_fail_if(comp >= len0 + len1, 2054 "All Component literals must either be FFFFFFFF " 2055 "or in [0, N - 1] (inclusive)."); 2056 val->constant->values[j] = combined[comp]; 2057 } 2058 } 2059 break; 2060 } 2061 2062 case SpvOpCompositeExtract: 2063 case SpvOpCompositeInsert: { 2064 struct vtn_value *comp; 2065 unsigned deref_start; 2066 struct nir_constant **c; 2067 if (opcode == SpvOpCompositeExtract) { 2068 comp = vtn_value(b, w[4], vtn_value_type_constant); 2069 deref_start = 5; 2070 c = &comp->constant; 2071 } else { 2072 comp = vtn_value(b, w[5], vtn_value_type_constant); 2073 deref_start = 6; 2074 val->constant = nir_constant_clone(comp->constant, 2075 (nir_variable *)b); 2076 c = &val->constant; 2077 } 2078 2079 int elem = -1; 2080 const struct vtn_type *type = comp->type; 2081 for (unsigned i = deref_start; i < count; i++) { 2082 vtn_fail_if(w[i] > type->length, 2083 "%uth index of %s is %u but the type has only " 2084 "%u elements", i - deref_start, 2085 spirv_op_to_string(opcode), w[i], type->length); 2086 2087 switch (type->base_type) { 2088 case vtn_base_type_vector: 2089 elem = w[i]; 2090 type = type->array_element; 2091 break; 2092 2093 case vtn_base_type_matrix: 2094 case vtn_base_type_array: 2095 c = &(*c)->elements[w[i]]; 2096 type = type->array_element; 2097 break; 2098 2099 case vtn_base_type_struct: 2100 c = &(*c)->elements[w[i]]; 2101 type = type->members[w[i]]; 2102 break; 2103 2104 default: 2105 vtn_fail("%s must only index into composite types", 2106 spirv_op_to_string(opcode)); 2107 } 2108 } 2109 2110 if (opcode == SpvOpCompositeExtract) { 2111 if (elem == -1) { 2112 val->constant = *c; 2113 } else { 2114 unsigned num_components = type->length; 2115 for (unsigned i = 0; i < num_components; i++) 2116 val->constant->values[i] = (*c)->values[elem + i]; 2117 } 2118 } else { 2119 struct vtn_value *insert = 2120 vtn_value(b, w[4], vtn_value_type_constant); 2121 vtn_assert(insert->type == type); 2122 if (elem == -1) { 2123 *c = insert->constant; 2124 } else { 2125 unsigned num_components = type->length; 2126 for (unsigned i = 0; i < num_components; i++) 2127 (*c)->values[elem + i] = insert->constant->values[i]; 2128 } 2129 } 2130 break; 2131 } 2132 2133 default: { 2134 bool swap; 2135 nir_alu_type dst_alu_type = nir_get_nir_type_for_glsl_type(val->type->type); 2136 nir_alu_type src_alu_type = dst_alu_type; 2137 unsigned num_components = glsl_get_vector_elements(val->type->type); 2138 unsigned bit_size; 2139 2140 vtn_assert(count <= 7); 2141 2142 switch (opcode) { 2143 case SpvOpSConvert: 2144 case SpvOpFConvert: 2145 case SpvOpUConvert: 2146 /* We have a source in a conversion */ 2147 src_alu_type = 2148 nir_get_nir_type_for_glsl_type(vtn_get_value_type(b, w[4])->type); 2149 /* We use the bitsize of the conversion source to evaluate the opcode later */ 2150 bit_size = glsl_get_bit_size(vtn_get_value_type(b, w[4])->type); 2151 break; 2152 default: 2153 bit_size = glsl_get_bit_size(val->type->type); 2154 }; 2155 2156 bool exact; 2157 nir_op op = vtn_nir_alu_op_for_spirv_opcode(b, opcode, &swap, &exact, 2158 nir_alu_type_get_type_size(src_alu_type), 2159 nir_alu_type_get_type_size(dst_alu_type)); 2160 2161 /* No SPIR-V opcodes handled through this path should set exact. 2162 * Since it is ignored, assert on it. 2163 */ 2164 assert(!exact); 2165 2166 nir_const_value src[3][NIR_MAX_VEC_COMPONENTS]; 2167 2168 for (unsigned i = 0; i < count - 4; i++) { 2169 struct vtn_value *src_val = 2170 vtn_value(b, w[4 + i], vtn_value_type_constant); 2171 2172 /* If this is an unsized source, pull the bit size from the 2173 * source; otherwise, we'll use the bit size from the destination. 2174 */ 2175 if (!nir_alu_type_get_type_size(nir_op_infos[op].input_types[i])) 2176 bit_size = glsl_get_bit_size(src_val->type->type); 2177 2178 unsigned src_comps = nir_op_infos[op].input_sizes[i] ? 2179 nir_op_infos[op].input_sizes[i] : 2180 num_components; 2181 2182 unsigned j = swap ? 1 - i : i; 2183 for (unsigned c = 0; c < src_comps; c++) 2184 src[j][c] = src_val->constant->values[c]; 2185 } 2186 2187 /* fix up fixed size sources */ 2188 switch (op) { 2189 case nir_op_ishl: 2190 case nir_op_ishr: 2191 case nir_op_ushr: { 2192 if (bit_size == 32) 2193 break; 2194 for (unsigned i = 0; i < num_components; ++i) { 2195 switch (bit_size) { 2196 case 64: src[1][i].u32 = src[1][i].u64; break; 2197 case 16: src[1][i].u32 = src[1][i].u16; break; 2198 case 8: src[1][i].u32 = src[1][i].u8; break; 2199 } 2200 } 2201 break; 2202 } 2203 default: 2204 break; 2205 } 2206 2207 nir_const_value *srcs[3] = { 2208 src[0], src[1], src[2], 2209 }; 2210 nir_eval_const_opcode(op, val->constant->values, 2211 num_components, bit_size, srcs, 2212 b->shader->info.float_controls_execution_mode); 2213 break; 2214 } /* default */ 2215 } 2216 break; 2217 } 2218 2219 case SpvOpConstantNull: 2220 val->constant = vtn_null_constant(b, val->type); 2221 val->is_null_constant = true; 2222 break; 2223 2224 default: 2225 vtn_fail_with_opcode("Unhandled opcode", opcode); 2226 } 2227 2228 /* Now that we have the value, update the workgroup size if needed */ 2229 if (gl_shader_stage_uses_workgroup(b->entry_point_stage)) 2230 vtn_foreach_decoration(b, val, handle_workgroup_size_decoration_cb, 2231 NULL); 2232} 2233 2234static void 2235vtn_split_barrier_semantics(struct vtn_builder *b, 2236 SpvMemorySemanticsMask semantics, 2237 SpvMemorySemanticsMask *before, 2238 SpvMemorySemanticsMask *after) 2239{ 2240 /* For memory semantics embedded in operations, we split them into up to 2241 * two barriers, to be added before and after the operation. This is less 2242 * strict than if we propagated until the final backend stage, but still 2243 * result in correct execution. 2244 * 2245 * A further improvement could be pipe this information (and use!) into the 2246 * next compiler layers, at the expense of making the handling of barriers 2247 * more complicated. 2248 */ 2249 2250 *before = SpvMemorySemanticsMaskNone; 2251 *after = SpvMemorySemanticsMaskNone; 2252 2253 SpvMemorySemanticsMask order_semantics = 2254 semantics & (SpvMemorySemanticsAcquireMask | 2255 SpvMemorySemanticsReleaseMask | 2256 SpvMemorySemanticsAcquireReleaseMask | 2257 SpvMemorySemanticsSequentiallyConsistentMask); 2258 2259 if (util_bitcount(order_semantics) > 1) { 2260 /* Old GLSLang versions incorrectly set all the ordering bits. This was 2261 * fixed in c51287d744fb6e7e9ccc09f6f8451e6c64b1dad6 of glslang repo, 2262 * and it is in GLSLang since revision "SPIRV99.1321" (from Jul-2016). 2263 */ 2264 vtn_warn("Multiple memory ordering semantics specified, " 2265 "assuming AcquireRelease."); 2266 order_semantics = SpvMemorySemanticsAcquireReleaseMask; 2267 } 2268 2269 const SpvMemorySemanticsMask av_vis_semantics = 2270 semantics & (SpvMemorySemanticsMakeAvailableMask | 2271 SpvMemorySemanticsMakeVisibleMask); 2272 2273 const SpvMemorySemanticsMask storage_semantics = 2274 semantics & (SpvMemorySemanticsUniformMemoryMask | 2275 SpvMemorySemanticsSubgroupMemoryMask | 2276 SpvMemorySemanticsWorkgroupMemoryMask | 2277 SpvMemorySemanticsCrossWorkgroupMemoryMask | 2278 SpvMemorySemanticsAtomicCounterMemoryMask | 2279 SpvMemorySemanticsImageMemoryMask | 2280 SpvMemorySemanticsOutputMemoryMask); 2281 2282 const SpvMemorySemanticsMask other_semantics = 2283 semantics & ~(order_semantics | av_vis_semantics | storage_semantics | 2284 SpvMemorySemanticsVolatileMask); 2285 2286 if (other_semantics) 2287 vtn_warn("Ignoring unhandled memory semantics: %u\n", other_semantics); 2288 2289 /* SequentiallyConsistent is treated as AcquireRelease. */ 2290 2291 /* The RELEASE barrier happens BEFORE the operation, and it is usually 2292 * associated with a Store. All the write operations with a matching 2293 * semantics will not be reordered after the Store. 2294 */ 2295 if (order_semantics & (SpvMemorySemanticsReleaseMask | 2296 SpvMemorySemanticsAcquireReleaseMask | 2297 SpvMemorySemanticsSequentiallyConsistentMask)) { 2298 *before |= SpvMemorySemanticsReleaseMask | storage_semantics; 2299 } 2300 2301 /* The ACQUIRE barrier happens AFTER the operation, and it is usually 2302 * associated with a Load. All the operations with a matching semantics 2303 * will not be reordered before the Load. 2304 */ 2305 if (order_semantics & (SpvMemorySemanticsAcquireMask | 2306 SpvMemorySemanticsAcquireReleaseMask | 2307 SpvMemorySemanticsSequentiallyConsistentMask)) { 2308 *after |= SpvMemorySemanticsAcquireMask | storage_semantics; 2309 } 2310 2311 if (av_vis_semantics & SpvMemorySemanticsMakeVisibleMask) 2312 *before |= SpvMemorySemanticsMakeVisibleMask | storage_semantics; 2313 2314 if (av_vis_semantics & SpvMemorySemanticsMakeAvailableMask) 2315 *after |= SpvMemorySemanticsMakeAvailableMask | storage_semantics; 2316} 2317 2318static nir_memory_semantics 2319vtn_mem_semantics_to_nir_mem_semantics(struct vtn_builder *b, 2320 SpvMemorySemanticsMask semantics) 2321{ 2322 nir_memory_semantics nir_semantics = 0; 2323 2324 SpvMemorySemanticsMask order_semantics = 2325 semantics & (SpvMemorySemanticsAcquireMask | 2326 SpvMemorySemanticsReleaseMask | 2327 SpvMemorySemanticsAcquireReleaseMask | 2328 SpvMemorySemanticsSequentiallyConsistentMask); 2329 2330 if (util_bitcount(order_semantics) > 1) { 2331 /* Old GLSLang versions incorrectly set all the ordering bits. This was 2332 * fixed in c51287d744fb6e7e9ccc09f6f8451e6c64b1dad6 of glslang repo, 2333 * and it is in GLSLang since revision "SPIRV99.1321" (from Jul-2016). 2334 */ 2335 vtn_warn("Multiple memory ordering semantics bits specified, " 2336 "assuming AcquireRelease."); 2337 order_semantics = SpvMemorySemanticsAcquireReleaseMask; 2338 } 2339 2340 switch (order_semantics) { 2341 case 0: 2342 /* Not an ordering barrier. */ 2343 break; 2344 2345 case SpvMemorySemanticsAcquireMask: 2346 nir_semantics = NIR_MEMORY_ACQUIRE; 2347 break; 2348 2349 case SpvMemorySemanticsReleaseMask: 2350 nir_semantics = NIR_MEMORY_RELEASE; 2351 break; 2352 2353 case SpvMemorySemanticsSequentiallyConsistentMask: 2354 FALLTHROUGH; /* Treated as AcquireRelease in Vulkan. */ 2355 case SpvMemorySemanticsAcquireReleaseMask: 2356 nir_semantics = NIR_MEMORY_ACQUIRE | NIR_MEMORY_RELEASE; 2357 break; 2358 2359 default: 2360 unreachable("Invalid memory order semantics"); 2361 } 2362 2363 if (semantics & SpvMemorySemanticsMakeAvailableMask) { 2364 vtn_fail_if(!b->options->caps.vk_memory_model, 2365 "To use MakeAvailable memory semantics the VulkanMemoryModel " 2366 "capability must be declared."); 2367 nir_semantics |= NIR_MEMORY_MAKE_AVAILABLE; 2368 } 2369 2370 if (semantics & SpvMemorySemanticsMakeVisibleMask) { 2371 vtn_fail_if(!b->options->caps.vk_memory_model, 2372 "To use MakeVisible memory semantics the VulkanMemoryModel " 2373 "capability must be declared."); 2374 nir_semantics |= NIR_MEMORY_MAKE_VISIBLE; 2375 } 2376 2377 return nir_semantics; 2378} 2379 2380static nir_variable_mode 2381vtn_mem_semantics_to_nir_var_modes(struct vtn_builder *b, 2382 SpvMemorySemanticsMask semantics) 2383{ 2384 /* Vulkan Environment for SPIR-V says "SubgroupMemory, CrossWorkgroupMemory, 2385 * and AtomicCounterMemory are ignored". 2386 */ 2387 if (b->options->environment == NIR_SPIRV_VULKAN) { 2388 semantics &= ~(SpvMemorySemanticsSubgroupMemoryMask | 2389 SpvMemorySemanticsCrossWorkgroupMemoryMask | 2390 SpvMemorySemanticsAtomicCounterMemoryMask); 2391 } 2392 2393 /* TODO: Consider adding nir_var_mem_image mode to NIR so it can be used 2394 * for SpvMemorySemanticsImageMemoryMask. 2395 */ 2396 2397 nir_variable_mode modes = 0; 2398 if (semantics & (SpvMemorySemanticsUniformMemoryMask | 2399 SpvMemorySemanticsImageMemoryMask)) { 2400 modes |= nir_var_uniform | 2401 nir_var_mem_ubo | 2402 nir_var_mem_ssbo | 2403 nir_var_mem_global; 2404 } 2405 if (semantics & SpvMemorySemanticsWorkgroupMemoryMask) 2406 modes |= nir_var_mem_shared; 2407 if (semantics & SpvMemorySemanticsCrossWorkgroupMemoryMask) 2408 modes |= nir_var_mem_global; 2409 if (semantics & SpvMemorySemanticsOutputMemoryMask) { 2410 modes |= nir_var_shader_out; 2411 } 2412 2413 return modes; 2414} 2415 2416static nir_scope 2417vtn_scope_to_nir_scope(struct vtn_builder *b, SpvScope scope) 2418{ 2419 nir_scope nir_scope; 2420 switch (scope) { 2421 case SpvScopeDevice: 2422 vtn_fail_if(b->options->caps.vk_memory_model && 2423 !b->options->caps.vk_memory_model_device_scope, 2424 "If the Vulkan memory model is declared and any instruction " 2425 "uses Device scope, the VulkanMemoryModelDeviceScope " 2426 "capability must be declared."); 2427 nir_scope = NIR_SCOPE_DEVICE; 2428 break; 2429 2430 case SpvScopeQueueFamily: 2431 vtn_fail_if(!b->options->caps.vk_memory_model, 2432 "To use Queue Family scope, the VulkanMemoryModel capability " 2433 "must be declared."); 2434 nir_scope = NIR_SCOPE_QUEUE_FAMILY; 2435 break; 2436 2437 case SpvScopeWorkgroup: 2438 nir_scope = NIR_SCOPE_WORKGROUP; 2439 break; 2440 2441 case SpvScopeSubgroup: 2442 nir_scope = NIR_SCOPE_SUBGROUP; 2443 break; 2444 2445 case SpvScopeInvocation: 2446 nir_scope = NIR_SCOPE_INVOCATION; 2447 break; 2448 2449 case SpvScopeShaderCallKHR: 2450 nir_scope = NIR_SCOPE_SHADER_CALL; 2451 break; 2452 2453 default: 2454 vtn_fail("Invalid memory scope"); 2455 } 2456 2457 return nir_scope; 2458} 2459 2460static void 2461vtn_emit_scoped_control_barrier(struct vtn_builder *b, SpvScope exec_scope, 2462 SpvScope mem_scope, 2463 SpvMemorySemanticsMask semantics) 2464{ 2465 nir_memory_semantics nir_semantics = 2466 vtn_mem_semantics_to_nir_mem_semantics(b, semantics); 2467 nir_variable_mode modes = vtn_mem_semantics_to_nir_var_modes(b, semantics); 2468 nir_scope nir_exec_scope = vtn_scope_to_nir_scope(b, exec_scope); 2469 2470 /* Memory semantics is optional for OpControlBarrier. */ 2471 nir_scope nir_mem_scope; 2472 if (nir_semantics == 0 || modes == 0) 2473 nir_mem_scope = NIR_SCOPE_NONE; 2474 else 2475 nir_mem_scope = vtn_scope_to_nir_scope(b, mem_scope); 2476 2477 nir_scoped_barrier(&b->nb, .execution_scope=nir_exec_scope, .memory_scope=nir_mem_scope, 2478 .memory_semantics=nir_semantics, .memory_modes=modes); 2479} 2480 2481static void 2482vtn_emit_scoped_memory_barrier(struct vtn_builder *b, SpvScope scope, 2483 SpvMemorySemanticsMask semantics) 2484{ 2485 nir_variable_mode modes = vtn_mem_semantics_to_nir_var_modes(b, semantics); 2486 nir_memory_semantics nir_semantics = 2487 vtn_mem_semantics_to_nir_mem_semantics(b, semantics); 2488 2489 /* No barrier to add. */ 2490 if (nir_semantics == 0 || modes == 0) 2491 return; 2492 2493 nir_scoped_barrier(&b->nb, .memory_scope=vtn_scope_to_nir_scope(b, scope), 2494 .memory_semantics=nir_semantics, 2495 .memory_modes=modes); 2496} 2497 2498struct vtn_ssa_value * 2499vtn_create_ssa_value(struct vtn_builder *b, const struct glsl_type *type) 2500{ 2501 /* Always use bare types for SSA values for a couple of reasons: 2502 * 2503 * 1. Code which emits deref chains should never listen to the explicit 2504 * layout information on the SSA value if any exists. If we've 2505 * accidentally been relying on this, we want to find those bugs. 2506 * 2507 * 2. We want to be able to quickly check that an SSA value being assigned 2508 * to a SPIR-V value has the right type. Using bare types everywhere 2509 * ensures that we can pointer-compare. 2510 */ 2511 struct vtn_ssa_value *val = rzalloc(b, struct vtn_ssa_value); 2512 val->type = glsl_get_bare_type(type); 2513 2514 2515 if (!glsl_type_is_vector_or_scalar(type)) { 2516 unsigned elems = glsl_get_length(val->type); 2517 val->elems = ralloc_array(b, struct vtn_ssa_value *, elems); 2518 if (glsl_type_is_array_or_matrix(type)) { 2519 const struct glsl_type *elem_type = glsl_get_array_element(type); 2520 for (unsigned i = 0; i < elems; i++) 2521 val->elems[i] = vtn_create_ssa_value(b, elem_type); 2522 } else { 2523 vtn_assert(glsl_type_is_struct_or_ifc(type)); 2524 for (unsigned i = 0; i < elems; i++) { 2525 const struct glsl_type *elem_type = glsl_get_struct_field(type, i); 2526 val->elems[i] = vtn_create_ssa_value(b, elem_type); 2527 } 2528 } 2529 } 2530 2531 return val; 2532} 2533 2534static nir_tex_src 2535vtn_tex_src(struct vtn_builder *b, unsigned index, nir_tex_src_type type) 2536{ 2537 nir_tex_src src; 2538 src.src = nir_src_for_ssa(vtn_get_nir_ssa(b, index)); 2539 src.src_type = type; 2540 return src; 2541} 2542 2543static uint32_t 2544image_operand_arg(struct vtn_builder *b, const uint32_t *w, uint32_t count, 2545 uint32_t mask_idx, SpvImageOperandsMask op) 2546{ 2547 static const SpvImageOperandsMask ops_with_arg = 2548 SpvImageOperandsBiasMask | 2549 SpvImageOperandsLodMask | 2550 SpvImageOperandsGradMask | 2551 SpvImageOperandsConstOffsetMask | 2552 SpvImageOperandsOffsetMask | 2553 SpvImageOperandsConstOffsetsMask | 2554 SpvImageOperandsSampleMask | 2555 SpvImageOperandsMinLodMask | 2556 SpvImageOperandsMakeTexelAvailableMask | 2557 SpvImageOperandsMakeTexelVisibleMask; 2558 2559 assert(util_bitcount(op) == 1); 2560 assert(w[mask_idx] & op); 2561 assert(op & ops_with_arg); 2562 2563 uint32_t idx = util_bitcount(w[mask_idx] & (op - 1) & ops_with_arg) + 1; 2564 2565 /* Adjust indices for operands with two arguments. */ 2566 static const SpvImageOperandsMask ops_with_two_args = 2567 SpvImageOperandsGradMask; 2568 idx += util_bitcount(w[mask_idx] & (op - 1) & ops_with_two_args); 2569 2570 idx += mask_idx; 2571 2572 vtn_fail_if(idx + (op & ops_with_two_args ? 1 : 0) >= count, 2573 "Image op claims to have %s but does not enough " 2574 "following operands", spirv_imageoperands_to_string(op)); 2575 2576 return idx; 2577} 2578 2579static void 2580non_uniform_decoration_cb(struct vtn_builder *b, 2581 struct vtn_value *val, int member, 2582 const struct vtn_decoration *dec, void *void_ctx) 2583{ 2584 enum gl_access_qualifier *access = void_ctx; 2585 switch (dec->decoration) { 2586 case SpvDecorationNonUniformEXT: 2587 *access |= ACCESS_NON_UNIFORM; 2588 break; 2589 2590 default: 2591 break; 2592 } 2593} 2594 2595/* Apply SignExtend/ZeroExtend operands to get the actual result type for 2596 * image read/sample operations and source type for write operations. 2597 */ 2598static nir_alu_type 2599get_image_type(struct vtn_builder *b, nir_alu_type type, unsigned operands) 2600{ 2601 unsigned extend_operands = 2602 operands & (SpvImageOperandsSignExtendMask | SpvImageOperandsZeroExtendMask); 2603 vtn_fail_if(nir_alu_type_get_base_type(type) == nir_type_float && extend_operands, 2604 "SignExtend/ZeroExtend used on floating-point texel type"); 2605 vtn_fail_if(extend_operands == 2606 (SpvImageOperandsSignExtendMask | SpvImageOperandsZeroExtendMask), 2607 "SignExtend and ZeroExtend both specified"); 2608 2609 if (operands & SpvImageOperandsSignExtendMask) 2610 return nir_type_int | nir_alu_type_get_type_size(type); 2611 if (operands & SpvImageOperandsZeroExtendMask) 2612 return nir_type_uint | nir_alu_type_get_type_size(type); 2613 2614 return type; 2615} 2616 2617static void 2618vtn_handle_texture(struct vtn_builder *b, SpvOp opcode, 2619 const uint32_t *w, unsigned count) 2620{ 2621 if (opcode == SpvOpSampledImage) { 2622 struct vtn_sampled_image si = { 2623 .image = vtn_get_image(b, w[3], NULL), 2624 .sampler = vtn_get_sampler(b, w[4]), 2625 }; 2626 2627 enum gl_access_qualifier access = 0; 2628 vtn_foreach_decoration(b, vtn_untyped_value(b, w[3]), 2629 non_uniform_decoration_cb, &access); 2630 vtn_foreach_decoration(b, vtn_untyped_value(b, w[4]), 2631 non_uniform_decoration_cb, &access); 2632 2633 vtn_push_sampled_image(b, w[2], si, access & ACCESS_NON_UNIFORM); 2634 return; 2635 } else if (opcode == SpvOpImage) { 2636 struct vtn_sampled_image si = vtn_get_sampled_image(b, w[3]); 2637 2638 enum gl_access_qualifier access = 0; 2639 vtn_foreach_decoration(b, vtn_untyped_value(b, w[3]), 2640 non_uniform_decoration_cb, &access); 2641 2642 vtn_push_image(b, w[2], si.image, access & ACCESS_NON_UNIFORM); 2643 return; 2644 } else if (opcode == SpvOpImageSparseTexelsResident) { 2645 nir_ssa_def *code = vtn_get_nir_ssa(b, w[3]); 2646 vtn_push_nir_ssa(b, w[2], nir_is_sparse_texels_resident(&b->nb, code)); 2647 return; 2648 } 2649 2650 nir_deref_instr *image = NULL, *sampler = NULL; 2651 struct vtn_value *sampled_val = vtn_untyped_value(b, w[3]); 2652 if (sampled_val->type->base_type == vtn_base_type_sampled_image) { 2653 struct vtn_sampled_image si = vtn_get_sampled_image(b, w[3]); 2654 image = si.image; 2655 sampler = si.sampler; 2656 } else { 2657 image = vtn_get_image(b, w[3], NULL); 2658 } 2659 2660 const enum glsl_sampler_dim sampler_dim = glsl_get_sampler_dim(image->type); 2661 const bool is_array = glsl_sampler_type_is_array(image->type); 2662 nir_alu_type dest_type = nir_type_invalid; 2663 2664 /* Figure out the base texture operation */ 2665 nir_texop texop; 2666 switch (opcode) { 2667 case SpvOpImageSampleImplicitLod: 2668 case SpvOpImageSparseSampleImplicitLod: 2669 case SpvOpImageSampleDrefImplicitLod: 2670 case SpvOpImageSparseSampleDrefImplicitLod: 2671 case SpvOpImageSampleProjImplicitLod: 2672 case SpvOpImageSampleProjDrefImplicitLod: 2673 texop = nir_texop_tex; 2674 break; 2675 2676 case SpvOpImageSampleExplicitLod: 2677 case SpvOpImageSparseSampleExplicitLod: 2678 case SpvOpImageSampleDrefExplicitLod: 2679 case SpvOpImageSparseSampleDrefExplicitLod: 2680 case SpvOpImageSampleProjExplicitLod: 2681 case SpvOpImageSampleProjDrefExplicitLod: 2682 texop = nir_texop_txl; 2683 break; 2684 2685 case SpvOpImageFetch: 2686 case SpvOpImageSparseFetch: 2687 if (sampler_dim == GLSL_SAMPLER_DIM_MS) { 2688 texop = nir_texop_txf_ms; 2689 } else { 2690 texop = nir_texop_txf; 2691 } 2692 break; 2693 2694 case SpvOpImageGather: 2695 case SpvOpImageSparseGather: 2696 case SpvOpImageDrefGather: 2697 case SpvOpImageSparseDrefGather: 2698 texop = nir_texop_tg4; 2699 break; 2700 2701 case SpvOpImageQuerySizeLod: 2702 case SpvOpImageQuerySize: 2703 texop = nir_texop_txs; 2704 dest_type = nir_type_int32; 2705 break; 2706 2707 case SpvOpImageQueryLod: 2708 texop = nir_texop_lod; 2709 dest_type = nir_type_float32; 2710 break; 2711 2712 case SpvOpImageQueryLevels: 2713 texop = nir_texop_query_levels; 2714 dest_type = nir_type_int32; 2715 break; 2716 2717 case SpvOpImageQuerySamples: 2718 texop = nir_texop_texture_samples; 2719 dest_type = nir_type_int32; 2720 break; 2721 2722 case SpvOpFragmentFetchAMD: 2723 texop = nir_texop_fragment_fetch_amd; 2724 break; 2725 2726 case SpvOpFragmentMaskFetchAMD: 2727 texop = nir_texop_fragment_mask_fetch_amd; 2728 dest_type = nir_type_uint32; 2729 break; 2730 2731 default: 2732 vtn_fail_with_opcode("Unhandled opcode", opcode); 2733 } 2734 2735 nir_tex_src srcs[10]; /* 10 should be enough */ 2736 nir_tex_src *p = srcs; 2737 2738 p->src = nir_src_for_ssa(&image->dest.ssa); 2739 p->src_type = nir_tex_src_texture_deref; 2740 p++; 2741 2742 switch (texop) { 2743 case nir_texop_tex: 2744 case nir_texop_txb: 2745 case nir_texop_txl: 2746 case nir_texop_txd: 2747 case nir_texop_tg4: 2748 case nir_texop_lod: 2749 vtn_fail_if(sampler == NULL, 2750 "%s requires an image of type OpTypeSampledImage", 2751 spirv_op_to_string(opcode)); 2752 p->src = nir_src_for_ssa(&sampler->dest.ssa); 2753 p->src_type = nir_tex_src_sampler_deref; 2754 p++; 2755 break; 2756 case nir_texop_txf: 2757 case nir_texop_txf_ms: 2758 case nir_texop_txs: 2759 case nir_texop_query_levels: 2760 case nir_texop_texture_samples: 2761 case nir_texop_samples_identical: 2762 case nir_texop_fragment_fetch_amd: 2763 case nir_texop_fragment_mask_fetch_amd: 2764 /* These don't */ 2765 break; 2766 case nir_texop_txf_ms_fb: 2767 vtn_fail("unexpected nir_texop_txf_ms_fb"); 2768 break; 2769 case nir_texop_txf_ms_mcs_intel: 2770 vtn_fail("unexpected nir_texop_txf_ms_mcs"); 2771 case nir_texop_tex_prefetch: 2772 vtn_fail("unexpected nir_texop_tex_prefetch"); 2773 } 2774 2775 unsigned idx = 4; 2776 2777 struct nir_ssa_def *coord; 2778 unsigned coord_components; 2779 switch (opcode) { 2780 case SpvOpImageSampleImplicitLod: 2781 case SpvOpImageSparseSampleImplicitLod: 2782 case SpvOpImageSampleExplicitLod: 2783 case SpvOpImageSparseSampleExplicitLod: 2784 case SpvOpImageSampleDrefImplicitLod: 2785 case SpvOpImageSparseSampleDrefImplicitLod: 2786 case SpvOpImageSampleDrefExplicitLod: 2787 case SpvOpImageSparseSampleDrefExplicitLod: 2788 case SpvOpImageSampleProjImplicitLod: 2789 case SpvOpImageSampleProjExplicitLod: 2790 case SpvOpImageSampleProjDrefImplicitLod: 2791 case SpvOpImageSampleProjDrefExplicitLod: 2792 case SpvOpImageFetch: 2793 case SpvOpImageSparseFetch: 2794 case SpvOpImageGather: 2795 case SpvOpImageSparseGather: 2796 case SpvOpImageDrefGather: 2797 case SpvOpImageSparseDrefGather: 2798 case SpvOpImageQueryLod: 2799 case SpvOpFragmentFetchAMD: 2800 case SpvOpFragmentMaskFetchAMD: { 2801 /* All these types have the coordinate as their first real argument */ 2802 coord_components = glsl_get_sampler_dim_coordinate_components(sampler_dim); 2803 2804 if (is_array && texop != nir_texop_lod) 2805 coord_components++; 2806 2807 struct vtn_ssa_value *coord_val = vtn_ssa_value(b, w[idx++]); 2808 coord = coord_val->def; 2809 /* From the SPIR-V spec verxion 1.5, rev. 5: 2810 * 2811 * "Coordinate must be a scalar or vector of floating-point type. It 2812 * contains (u[, v] ... [, array layer]) as needed by the definition 2813 * of Sampled Image. It may be a vector larger than needed, but all 2814 * unused components appear after all used components." 2815 */ 2816 vtn_fail_if(coord->num_components < coord_components, 2817 "Coordinate value passed has fewer components than sampler dimensionality."); 2818 p->src = nir_src_for_ssa(nir_channels(&b->nb, coord, 2819 (1 << coord_components) - 1)); 2820 2821 /* OpenCL allows integer sampling coordinates */ 2822 if (glsl_type_is_integer(coord_val->type) && 2823 opcode == SpvOpImageSampleExplicitLod) { 2824 vtn_fail_if(b->shader->info.stage != MESA_SHADER_KERNEL, 2825 "Unless the Kernel capability is being used, the coordinate parameter " 2826 "OpImageSampleExplicitLod must be floating point."); 2827 2828 nir_ssa_def *coords[4]; 2829 nir_ssa_def *f0_5 = nir_imm_float(&b->nb, 0.5); 2830 for (unsigned i = 0; i < coord_components; i++) { 2831 coords[i] = nir_i2f32(&b->nb, nir_channel(&b->nb, p->src.ssa, i)); 2832 2833 if (!is_array || i != coord_components - 1) 2834 coords[i] = nir_fadd(&b->nb, coords[i], f0_5); 2835 } 2836 2837 p->src = nir_src_for_ssa(nir_vec(&b->nb, coords, coord_components)); 2838 } 2839 2840 p->src_type = nir_tex_src_coord; 2841 p++; 2842 break; 2843 } 2844 2845 default: 2846 coord = NULL; 2847 coord_components = 0; 2848 break; 2849 } 2850 2851 switch (opcode) { 2852 case SpvOpImageSampleProjImplicitLod: 2853 case SpvOpImageSampleProjExplicitLod: 2854 case SpvOpImageSampleProjDrefImplicitLod: 2855 case SpvOpImageSampleProjDrefExplicitLod: 2856 /* These have the projector as the last coordinate component */ 2857 p->src = nir_src_for_ssa(nir_channel(&b->nb, coord, coord_components)); 2858 p->src_type = nir_tex_src_projector; 2859 p++; 2860 break; 2861 2862 default: 2863 break; 2864 } 2865 2866 bool is_shadow = false; 2867 unsigned gather_component = 0; 2868 switch (opcode) { 2869 case SpvOpImageSampleDrefImplicitLod: 2870 case SpvOpImageSparseSampleDrefImplicitLod: 2871 case SpvOpImageSampleDrefExplicitLod: 2872 case SpvOpImageSparseSampleDrefExplicitLod: 2873 case SpvOpImageSampleProjDrefImplicitLod: 2874 case SpvOpImageSampleProjDrefExplicitLod: 2875 case SpvOpImageDrefGather: 2876 case SpvOpImageSparseDrefGather: 2877 /* These all have an explicit depth value as their next source */ 2878 is_shadow = true; 2879 (*p++) = vtn_tex_src(b, w[idx++], nir_tex_src_comparator); 2880 break; 2881 2882 case SpvOpImageGather: 2883 case SpvOpImageSparseGather: 2884 /* This has a component as its next source */ 2885 gather_component = vtn_constant_uint(b, w[idx++]); 2886 break; 2887 2888 default: 2889 break; 2890 } 2891 2892 bool is_sparse = false; 2893 switch (opcode) { 2894 case SpvOpImageSparseSampleImplicitLod: 2895 case SpvOpImageSparseSampleExplicitLod: 2896 case SpvOpImageSparseSampleDrefImplicitLod: 2897 case SpvOpImageSparseSampleDrefExplicitLod: 2898 case SpvOpImageSparseFetch: 2899 case SpvOpImageSparseGather: 2900 case SpvOpImageSparseDrefGather: 2901 is_sparse = true; 2902 break; 2903 default: 2904 break; 2905 } 2906 2907 /* For OpImageQuerySizeLod, we always have an LOD */ 2908 if (opcode == SpvOpImageQuerySizeLod) 2909 (*p++) = vtn_tex_src(b, w[idx++], nir_tex_src_lod); 2910 2911 /* For OpFragmentFetchAMD, we always have a multisample index */ 2912 if (opcode == SpvOpFragmentFetchAMD) 2913 (*p++) = vtn_tex_src(b, w[idx++], nir_tex_src_ms_index); 2914 2915 /* Now we need to handle some number of optional arguments */ 2916 struct vtn_value *gather_offsets = NULL; 2917 uint32_t operands = SpvImageOperandsMaskNone; 2918 if (idx < count) { 2919 operands = w[idx]; 2920 2921 if (operands & SpvImageOperandsBiasMask) { 2922 vtn_assert(texop == nir_texop_tex || 2923 texop == nir_texop_tg4); 2924 if (texop == nir_texop_tex) 2925 texop = nir_texop_txb; 2926 uint32_t arg = image_operand_arg(b, w, count, idx, 2927 SpvImageOperandsBiasMask); 2928 (*p++) = vtn_tex_src(b, w[arg], nir_tex_src_bias); 2929 } 2930 2931 if (operands & SpvImageOperandsLodMask) { 2932 vtn_assert(texop == nir_texop_txl || texop == nir_texop_txf || 2933 texop == nir_texop_txs || texop == nir_texop_tg4); 2934 uint32_t arg = image_operand_arg(b, w, count, idx, 2935 SpvImageOperandsLodMask); 2936 (*p++) = vtn_tex_src(b, w[arg], nir_tex_src_lod); 2937 } 2938 2939 if (operands & SpvImageOperandsGradMask) { 2940 vtn_assert(texop == nir_texop_txl); 2941 texop = nir_texop_txd; 2942 uint32_t arg = image_operand_arg(b, w, count, idx, 2943 SpvImageOperandsGradMask); 2944 (*p++) = vtn_tex_src(b, w[arg], nir_tex_src_ddx); 2945 (*p++) = vtn_tex_src(b, w[arg + 1], nir_tex_src_ddy); 2946 } 2947 2948 vtn_fail_if(util_bitcount(operands & (SpvImageOperandsConstOffsetsMask | 2949 SpvImageOperandsOffsetMask | 2950 SpvImageOperandsConstOffsetMask)) > 1, 2951 "At most one of the ConstOffset, Offset, and ConstOffsets " 2952 "image operands can be used on a given instruction."); 2953 2954 if (operands & SpvImageOperandsOffsetMask) { 2955 uint32_t arg = image_operand_arg(b, w, count, idx, 2956 SpvImageOperandsOffsetMask); 2957 (*p++) = vtn_tex_src(b, w[arg], nir_tex_src_offset); 2958 } 2959 2960 if (operands & SpvImageOperandsConstOffsetMask) { 2961 uint32_t arg = image_operand_arg(b, w, count, idx, 2962 SpvImageOperandsConstOffsetMask); 2963 (*p++) = vtn_tex_src(b, w[arg], nir_tex_src_offset); 2964 } 2965 2966 if (operands & SpvImageOperandsConstOffsetsMask) { 2967 vtn_assert(texop == nir_texop_tg4); 2968 uint32_t arg = image_operand_arg(b, w, count, idx, 2969 SpvImageOperandsConstOffsetsMask); 2970 gather_offsets = vtn_value(b, w[arg], vtn_value_type_constant); 2971 } 2972 2973 if (operands & SpvImageOperandsSampleMask) { 2974 vtn_assert(texop == nir_texop_txf_ms); 2975 uint32_t arg = image_operand_arg(b, w, count, idx, 2976 SpvImageOperandsSampleMask); 2977 texop = nir_texop_txf_ms; 2978 (*p++) = vtn_tex_src(b, w[arg], nir_tex_src_ms_index); 2979 } 2980 2981 if (operands & SpvImageOperandsMinLodMask) { 2982 vtn_assert(texop == nir_texop_tex || 2983 texop == nir_texop_txb || 2984 texop == nir_texop_txd); 2985 uint32_t arg = image_operand_arg(b, w, count, idx, 2986 SpvImageOperandsMinLodMask); 2987 (*p++) = vtn_tex_src(b, w[arg], nir_tex_src_min_lod); 2988 } 2989 } 2990 2991 struct vtn_type *ret_type = vtn_get_type(b, w[1]); 2992 struct vtn_type *struct_type = NULL; 2993 if (is_sparse) { 2994 vtn_assert(glsl_type_is_struct_or_ifc(ret_type->type)); 2995 struct_type = ret_type; 2996 ret_type = struct_type->members[1]; 2997 } 2998 2999 nir_tex_instr *instr = nir_tex_instr_create(b->shader, p - srcs); 3000 instr->op = texop; 3001 3002 memcpy(instr->src, srcs, instr->num_srcs * sizeof(*instr->src)); 3003 3004 instr->coord_components = coord_components; 3005 instr->sampler_dim = sampler_dim; 3006 instr->is_array = is_array; 3007 instr->is_shadow = is_shadow; 3008 instr->is_sparse = is_sparse; 3009 instr->is_new_style_shadow = 3010 is_shadow && glsl_get_components(ret_type->type) == 1; 3011 instr->component = gather_component; 3012 3013 /* The Vulkan spec says: 3014 * 3015 * "If an instruction loads from or stores to a resource (including 3016 * atomics and image instructions) and the resource descriptor being 3017 * accessed is not dynamically uniform, then the operand corresponding 3018 * to that resource (e.g. the pointer or sampled image operand) must be 3019 * decorated with NonUniform." 3020 * 3021 * It's very careful to specify that the exact operand must be decorated 3022 * NonUniform. The SPIR-V parser is not expected to chase through long 3023 * chains to find the NonUniform decoration. It's either right there or we 3024 * can assume it doesn't exist. 3025 */ 3026 enum gl_access_qualifier access = 0; 3027 vtn_foreach_decoration(b, sampled_val, non_uniform_decoration_cb, &access); 3028 3029 if (sampled_val->propagated_non_uniform) 3030 access |= ACCESS_NON_UNIFORM; 3031 3032 if (image && (access & ACCESS_NON_UNIFORM)) 3033 instr->texture_non_uniform = true; 3034 3035 if (sampler && (access & ACCESS_NON_UNIFORM)) 3036 instr->sampler_non_uniform = true; 3037 3038 /* for non-query ops, get dest_type from SPIR-V return type */ 3039 if (dest_type == nir_type_invalid) { 3040 /* the return type should match the image type, unless the image type is 3041 * VOID (CL image), in which case the return type dictates the sampler 3042 */ 3043 enum glsl_base_type sampler_base = 3044 glsl_get_sampler_result_type(image->type); 3045 enum glsl_base_type ret_base = glsl_get_base_type(ret_type->type); 3046 vtn_fail_if(sampler_base != ret_base && sampler_base != GLSL_TYPE_VOID, 3047 "SPIR-V return type mismatches image type. This is only valid " 3048 "for untyped images (OpenCL)."); 3049 dest_type = nir_get_nir_type_for_glsl_base_type(ret_base); 3050 dest_type = get_image_type(b, dest_type, operands); 3051 } 3052 3053 instr->dest_type = dest_type; 3054 3055 nir_ssa_dest_init(&instr->instr, &instr->dest, 3056 nir_tex_instr_dest_size(instr), 32, NULL); 3057 3058 vtn_assert(glsl_get_vector_elements(ret_type->type) == 3059 nir_tex_instr_result_size(instr)); 3060 3061 if (gather_offsets) { 3062 vtn_fail_if(gather_offsets->type->base_type != vtn_base_type_array || 3063 gather_offsets->type->length != 4, 3064 "ConstOffsets must be an array of size four of vectors " 3065 "of two integer components"); 3066 3067 struct vtn_type *vec_type = gather_offsets->type->array_element; 3068 vtn_fail_if(vec_type->base_type != vtn_base_type_vector || 3069 vec_type->length != 2 || 3070 !glsl_type_is_integer(vec_type->type), 3071 "ConstOffsets must be an array of size four of vectors " 3072 "of two integer components"); 3073 3074 unsigned bit_size = glsl_get_bit_size(vec_type->type); 3075 for (uint32_t i = 0; i < 4; i++) { 3076 const nir_const_value *cvec = 3077 gather_offsets->constant->elements[i]->values; 3078 for (uint32_t j = 0; j < 2; j++) { 3079 switch (bit_size) { 3080 case 8: instr->tg4_offsets[i][j] = cvec[j].i8; break; 3081 case 16: instr->tg4_offsets[i][j] = cvec[j].i16; break; 3082 case 32: instr->tg4_offsets[i][j] = cvec[j].i32; break; 3083 case 64: instr->tg4_offsets[i][j] = cvec[j].i64; break; 3084 default: 3085 vtn_fail("Unsupported bit size: %u", bit_size); 3086 } 3087 } 3088 } 3089 } 3090 3091 nir_builder_instr_insert(&b->nb, &instr->instr); 3092 3093 if (is_sparse) { 3094 struct vtn_ssa_value *dest = vtn_create_ssa_value(b, struct_type->type); 3095 unsigned result_size = glsl_get_vector_elements(ret_type->type); 3096 dest->elems[0]->def = nir_channel(&b->nb, &instr->dest.ssa, result_size); 3097 dest->elems[1]->def = nir_channels(&b->nb, &instr->dest.ssa, 3098 (nir_component_mask_t) 3099 BITFIELD_MASK(result_size)); 3100 vtn_push_ssa_value(b, w[2], dest); 3101 } else { 3102 vtn_push_nir_ssa(b, w[2], &instr->dest.ssa); 3103 } 3104} 3105 3106static void 3107fill_common_atomic_sources(struct vtn_builder *b, SpvOp opcode, 3108 const uint32_t *w, nir_src *src) 3109{ 3110 const struct glsl_type *type = vtn_get_type(b, w[1])->type; 3111 unsigned bit_size = glsl_get_bit_size(type); 3112 3113 switch (opcode) { 3114 case SpvOpAtomicIIncrement: 3115 src[0] = nir_src_for_ssa(nir_imm_intN_t(&b->nb, 1, bit_size)); 3116 break; 3117 3118 case SpvOpAtomicIDecrement: 3119 src[0] = nir_src_for_ssa(nir_imm_intN_t(&b->nb, -1, bit_size)); 3120 break; 3121 3122 case SpvOpAtomicISub: 3123 src[0] = 3124 nir_src_for_ssa(nir_ineg(&b->nb, vtn_get_nir_ssa(b, w[6]))); 3125 break; 3126 3127 case SpvOpAtomicCompareExchange: 3128 case SpvOpAtomicCompareExchangeWeak: 3129 src[0] = nir_src_for_ssa(vtn_get_nir_ssa(b, w[8])); 3130 src[1] = nir_src_for_ssa(vtn_get_nir_ssa(b, w[7])); 3131 break; 3132 3133 case SpvOpAtomicExchange: 3134 case SpvOpAtomicIAdd: 3135 case SpvOpAtomicSMin: 3136 case SpvOpAtomicUMin: 3137 case SpvOpAtomicSMax: 3138 case SpvOpAtomicUMax: 3139 case SpvOpAtomicAnd: 3140 case SpvOpAtomicOr: 3141 case SpvOpAtomicXor: 3142 case SpvOpAtomicFAddEXT: 3143 case SpvOpAtomicFMinEXT: 3144 case SpvOpAtomicFMaxEXT: 3145 src[0] = nir_src_for_ssa(vtn_get_nir_ssa(b, w[6])); 3146 break; 3147 3148 default: 3149 vtn_fail_with_opcode("Invalid SPIR-V atomic", opcode); 3150 } 3151} 3152 3153static nir_ssa_def * 3154get_image_coord(struct vtn_builder *b, uint32_t value) 3155{ 3156 nir_ssa_def *coord = vtn_get_nir_ssa(b, value); 3157 /* The image_load_store intrinsics assume a 4-dim coordinate */ 3158 return nir_pad_vec4(&b->nb, coord); 3159} 3160 3161static void 3162vtn_handle_image(struct vtn_builder *b, SpvOp opcode, 3163 const uint32_t *w, unsigned count) 3164{ 3165 /* Just get this one out of the way */ 3166 if (opcode == SpvOpImageTexelPointer) { 3167 struct vtn_value *val = 3168 vtn_push_value(b, w[2], vtn_value_type_image_pointer); 3169 val->image = ralloc(b, struct vtn_image_pointer); 3170 3171 val->image->image = vtn_nir_deref(b, w[3]); 3172 val->image->coord = get_image_coord(b, w[4]); 3173 val->image->sample = vtn_get_nir_ssa(b, w[5]); 3174 val->image->lod = nir_imm_int(&b->nb, 0); 3175 return; 3176 } 3177 3178 struct vtn_image_pointer image; 3179 SpvScope scope = SpvScopeInvocation; 3180 SpvMemorySemanticsMask semantics = 0; 3181 SpvImageOperandsMask operands = SpvImageOperandsMaskNone; 3182 3183 enum gl_access_qualifier access = 0; 3184 3185 struct vtn_value *res_val; 3186 switch (opcode) { 3187 case SpvOpAtomicExchange: 3188 case SpvOpAtomicCompareExchange: 3189 case SpvOpAtomicCompareExchangeWeak: 3190 case SpvOpAtomicIIncrement: 3191 case SpvOpAtomicIDecrement: 3192 case SpvOpAtomicIAdd: 3193 case SpvOpAtomicISub: 3194 case SpvOpAtomicLoad: 3195 case SpvOpAtomicSMin: 3196 case SpvOpAtomicUMin: 3197 case SpvOpAtomicSMax: 3198 case SpvOpAtomicUMax: 3199 case SpvOpAtomicAnd: 3200 case SpvOpAtomicOr: 3201 case SpvOpAtomicXor: 3202 case SpvOpAtomicFAddEXT: 3203 case SpvOpAtomicFMinEXT: 3204 case SpvOpAtomicFMaxEXT: 3205 res_val = vtn_value(b, w[3], vtn_value_type_image_pointer); 3206 image = *res_val->image; 3207 scope = vtn_constant_uint(b, w[4]); 3208 semantics = vtn_constant_uint(b, w[5]); 3209 access |= ACCESS_COHERENT; 3210 break; 3211 3212 case SpvOpAtomicStore: 3213 res_val = vtn_value(b, w[1], vtn_value_type_image_pointer); 3214 image = *res_val->image; 3215 scope = vtn_constant_uint(b, w[2]); 3216 semantics = vtn_constant_uint(b, w[3]); 3217 access |= ACCESS_COHERENT; 3218 break; 3219 3220 case SpvOpImageQuerySizeLod: 3221 res_val = vtn_untyped_value(b, w[3]); 3222 image.image = vtn_get_image(b, w[3], &access); 3223 image.coord = NULL; 3224 image.sample = NULL; 3225 image.lod = vtn_ssa_value(b, w[4])->def; 3226 break; 3227 3228 case SpvOpImageQuerySize: 3229 case SpvOpImageQuerySamples: 3230 res_val = vtn_untyped_value(b, w[3]); 3231 image.image = vtn_get_image(b, w[3], &access); 3232 image.coord = NULL; 3233 image.sample = NULL; 3234 image.lod = NULL; 3235 break; 3236 3237 case SpvOpImageQueryFormat: 3238 case SpvOpImageQueryOrder: 3239 res_val = vtn_untyped_value(b, w[3]); 3240 image.image = vtn_get_image(b, w[3], &access); 3241 image.coord = NULL; 3242 image.sample = NULL; 3243 image.lod = NULL; 3244 break; 3245 3246 case SpvOpImageRead: 3247 case SpvOpImageSparseRead: { 3248 res_val = vtn_untyped_value(b, w[3]); 3249 image.image = vtn_get_image(b, w[3], &access); 3250 image.coord = get_image_coord(b, w[4]); 3251 3252 operands = count > 5 ? w[5] : SpvImageOperandsMaskNone; 3253 3254 if (operands & SpvImageOperandsSampleMask) { 3255 uint32_t arg = image_operand_arg(b, w, count, 5, 3256 SpvImageOperandsSampleMask); 3257 image.sample = vtn_get_nir_ssa(b, w[arg]); 3258 } else { 3259 image.sample = nir_ssa_undef(&b->nb, 1, 32); 3260 } 3261 3262 if (operands & SpvImageOperandsMakeTexelVisibleMask) { 3263 vtn_fail_if((operands & SpvImageOperandsNonPrivateTexelMask) == 0, 3264 "MakeTexelVisible requires NonPrivateTexel to also be set."); 3265 uint32_t arg = image_operand_arg(b, w, count, 5, 3266 SpvImageOperandsMakeTexelVisibleMask); 3267 semantics = SpvMemorySemanticsMakeVisibleMask; 3268 scope = vtn_constant_uint(b, w[arg]); 3269 } 3270 3271 if (operands & SpvImageOperandsLodMask) { 3272 uint32_t arg = image_operand_arg(b, w, count, 5, 3273 SpvImageOperandsLodMask); 3274 image.lod = vtn_get_nir_ssa(b, w[arg]); 3275 } else { 3276 image.lod = nir_imm_int(&b->nb, 0); 3277 } 3278 3279 if (operands & SpvImageOperandsVolatileTexelMask) 3280 access |= ACCESS_VOLATILE; 3281 3282 break; 3283 } 3284 3285 case SpvOpImageWrite: { 3286 res_val = vtn_untyped_value(b, w[1]); 3287 image.image = vtn_get_image(b, w[1], &access); 3288 image.coord = get_image_coord(b, w[2]); 3289 3290 /* texel = w[3] */ 3291 3292 operands = count > 4 ? w[4] : SpvImageOperandsMaskNone; 3293 3294 if (operands & SpvImageOperandsSampleMask) { 3295 uint32_t arg = image_operand_arg(b, w, count, 4, 3296 SpvImageOperandsSampleMask); 3297 image.sample = vtn_get_nir_ssa(b, w[arg]); 3298 } else { 3299 image.sample = nir_ssa_undef(&b->nb, 1, 32); 3300 } 3301 3302 if (operands & SpvImageOperandsMakeTexelAvailableMask) { 3303 vtn_fail_if((operands & SpvImageOperandsNonPrivateTexelMask) == 0, 3304 "MakeTexelAvailable requires NonPrivateTexel to also be set."); 3305 uint32_t arg = image_operand_arg(b, w, count, 4, 3306 SpvImageOperandsMakeTexelAvailableMask); 3307 semantics = SpvMemorySemanticsMakeAvailableMask; 3308 scope = vtn_constant_uint(b, w[arg]); 3309 } 3310 3311 if (operands & SpvImageOperandsLodMask) { 3312 uint32_t arg = image_operand_arg(b, w, count, 4, 3313 SpvImageOperandsLodMask); 3314 image.lod = vtn_get_nir_ssa(b, w[arg]); 3315 } else { 3316 image.lod = nir_imm_int(&b->nb, 0); 3317 } 3318 3319 if (operands & SpvImageOperandsVolatileTexelMask) 3320 access |= ACCESS_VOLATILE; 3321 3322 break; 3323 } 3324 3325 default: 3326 vtn_fail_with_opcode("Invalid image opcode", opcode); 3327 } 3328 3329 if (semantics & SpvMemorySemanticsVolatileMask) 3330 access |= ACCESS_VOLATILE; 3331 3332 nir_intrinsic_op op; 3333 switch (opcode) { 3334#define OP(S, N) case SpvOp##S: op = nir_intrinsic_image_deref_##N; break; 3335 OP(ImageQuerySize, size) 3336 OP(ImageQuerySizeLod, size) 3337 OP(ImageRead, load) 3338 OP(ImageSparseRead, sparse_load) 3339 OP(ImageWrite, store) 3340 OP(AtomicLoad, load) 3341 OP(AtomicStore, store) 3342 OP(AtomicExchange, atomic_exchange) 3343 OP(AtomicCompareExchange, atomic_comp_swap) 3344 OP(AtomicCompareExchangeWeak, atomic_comp_swap) 3345 OP(AtomicIIncrement, atomic_add) 3346 OP(AtomicIDecrement, atomic_add) 3347 OP(AtomicIAdd, atomic_add) 3348 OP(AtomicISub, atomic_add) 3349 OP(AtomicSMin, atomic_imin) 3350 OP(AtomicUMin, atomic_umin) 3351 OP(AtomicSMax, atomic_imax) 3352 OP(AtomicUMax, atomic_umax) 3353 OP(AtomicAnd, atomic_and) 3354 OP(AtomicOr, atomic_or) 3355 OP(AtomicXor, atomic_xor) 3356 OP(AtomicFAddEXT, atomic_fadd) 3357 OP(AtomicFMinEXT, atomic_fmin) 3358 OP(AtomicFMaxEXT, atomic_fmax) 3359 OP(ImageQueryFormat, format) 3360 OP(ImageQueryOrder, order) 3361 OP(ImageQuerySamples, samples) 3362#undef OP 3363 default: 3364 vtn_fail_with_opcode("Invalid image opcode", opcode); 3365 } 3366 3367 nir_intrinsic_instr *intrin = nir_intrinsic_instr_create(b->shader, op); 3368 3369 intrin->src[0] = nir_src_for_ssa(&image.image->dest.ssa); 3370 nir_intrinsic_set_image_dim(intrin, glsl_get_sampler_dim(image.image->type)); 3371 nir_intrinsic_set_image_array(intrin, 3372 glsl_sampler_type_is_array(image.image->type)); 3373 3374 switch (opcode) { 3375 case SpvOpImageQuerySamples: 3376 case SpvOpImageQuerySize: 3377 case SpvOpImageQuerySizeLod: 3378 case SpvOpImageQueryFormat: 3379 case SpvOpImageQueryOrder: 3380 break; 3381 default: 3382 /* The image coordinate is always 4 components but we may not have that 3383 * many. Swizzle to compensate. 3384 */ 3385 intrin->src[1] = nir_src_for_ssa(nir_pad_vec4(&b->nb, image.coord)); 3386 intrin->src[2] = nir_src_for_ssa(image.sample); 3387 break; 3388 } 3389 3390 /* The Vulkan spec says: 3391 * 3392 * "If an instruction loads from or stores to a resource (including 3393 * atomics and image instructions) and the resource descriptor being 3394 * accessed is not dynamically uniform, then the operand corresponding 3395 * to that resource (e.g. the pointer or sampled image operand) must be 3396 * decorated with NonUniform." 3397 * 3398 * It's very careful to specify that the exact operand must be decorated 3399 * NonUniform. The SPIR-V parser is not expected to chase through long 3400 * chains to find the NonUniform decoration. It's either right there or we 3401 * can assume it doesn't exist. 3402 */ 3403 vtn_foreach_decoration(b, res_val, non_uniform_decoration_cb, &access); 3404 nir_intrinsic_set_access(intrin, access); 3405 3406 switch (opcode) { 3407 case SpvOpImageQuerySamples: 3408 case SpvOpImageQueryFormat: 3409 case SpvOpImageQueryOrder: 3410 /* No additional sources */ 3411 break; 3412 case SpvOpImageQuerySize: 3413 intrin->src[1] = nir_src_for_ssa(nir_imm_int(&b->nb, 0)); 3414 break; 3415 case SpvOpImageQuerySizeLod: 3416 intrin->src[1] = nir_src_for_ssa(image.lod); 3417 break; 3418 case SpvOpAtomicLoad: 3419 case SpvOpImageRead: 3420 case SpvOpImageSparseRead: 3421 /* Only OpImageRead can support a lod parameter if 3422 * SPV_AMD_shader_image_load_store_lod is used but the current NIR 3423 * intrinsics definition for atomics requires us to set it for 3424 * OpAtomicLoad. 3425 */ 3426 intrin->src[3] = nir_src_for_ssa(image.lod); 3427 break; 3428 case SpvOpAtomicStore: 3429 case SpvOpImageWrite: { 3430 const uint32_t value_id = opcode == SpvOpAtomicStore ? w[4] : w[3]; 3431 struct vtn_ssa_value *value = vtn_ssa_value(b, value_id); 3432 /* nir_intrinsic_image_deref_store always takes a vec4 value */ 3433 assert(op == nir_intrinsic_image_deref_store); 3434 intrin->num_components = 4; 3435 intrin->src[3] = nir_src_for_ssa(nir_pad_vec4(&b->nb, value->def)); 3436 /* Only OpImageWrite can support a lod parameter if 3437 * SPV_AMD_shader_image_load_store_lod is used but the current NIR 3438 * intrinsics definition for atomics requires us to set it for 3439 * OpAtomicStore. 3440 */ 3441 intrin->src[4] = nir_src_for_ssa(image.lod); 3442 3443 if (opcode == SpvOpImageWrite) { 3444 nir_alu_type src_type = 3445 get_image_type(b, nir_get_nir_type_for_glsl_type(value->type), operands); 3446 nir_intrinsic_set_src_type(intrin, src_type); 3447 } 3448 break; 3449 } 3450 3451 case SpvOpAtomicCompareExchange: 3452 case SpvOpAtomicCompareExchangeWeak: 3453 case SpvOpAtomicIIncrement: 3454 case SpvOpAtomicIDecrement: 3455 case SpvOpAtomicExchange: 3456 case SpvOpAtomicIAdd: 3457 case SpvOpAtomicISub: 3458 case SpvOpAtomicSMin: 3459 case SpvOpAtomicUMin: 3460 case SpvOpAtomicSMax: 3461 case SpvOpAtomicUMax: 3462 case SpvOpAtomicAnd: 3463 case SpvOpAtomicOr: 3464 case SpvOpAtomicXor: 3465 case SpvOpAtomicFAddEXT: 3466 case SpvOpAtomicFMinEXT: 3467 case SpvOpAtomicFMaxEXT: 3468 fill_common_atomic_sources(b, opcode, w, &intrin->src[3]); 3469 break; 3470 3471 default: 3472 vtn_fail_with_opcode("Invalid image opcode", opcode); 3473 } 3474 3475 /* Image operations implicitly have the Image storage memory semantics. */ 3476 semantics |= SpvMemorySemanticsImageMemoryMask; 3477 3478 SpvMemorySemanticsMask before_semantics; 3479 SpvMemorySemanticsMask after_semantics; 3480 vtn_split_barrier_semantics(b, semantics, &before_semantics, &after_semantics); 3481 3482 if (before_semantics) 3483 vtn_emit_memory_barrier(b, scope, before_semantics); 3484 3485 if (opcode != SpvOpImageWrite && opcode != SpvOpAtomicStore) { 3486 struct vtn_type *type = vtn_get_type(b, w[1]); 3487 struct vtn_type *struct_type = NULL; 3488 if (opcode == SpvOpImageSparseRead) { 3489 vtn_assert(glsl_type_is_struct_or_ifc(type->type)); 3490 struct_type = type; 3491 type = struct_type->members[1]; 3492 } 3493 3494 unsigned dest_components = glsl_get_vector_elements(type->type); 3495 if (opcode == SpvOpImageSparseRead) 3496 dest_components++; 3497 3498 if (nir_intrinsic_infos[op].dest_components == 0) 3499 intrin->num_components = dest_components; 3500 3501 nir_ssa_dest_init(&intrin->instr, &intrin->dest, 3502 nir_intrinsic_dest_components(intrin), 3503 glsl_get_bit_size(type->type), NULL); 3504 3505 nir_builder_instr_insert(&b->nb, &intrin->instr); 3506 3507 nir_ssa_def *result = &intrin->dest.ssa; 3508 if (nir_intrinsic_dest_components(intrin) != dest_components) 3509 result = nir_channels(&b->nb, result, (1 << dest_components) - 1); 3510 3511 if (opcode == SpvOpImageSparseRead) { 3512 struct vtn_ssa_value *dest = vtn_create_ssa_value(b, struct_type->type); 3513 unsigned res_type_size = glsl_get_vector_elements(type->type); 3514 dest->elems[0]->def = nir_channel(&b->nb, result, res_type_size); 3515 if (intrin->dest.ssa.bit_size != 32) 3516 dest->elems[0]->def = nir_u2u32(&b->nb, dest->elems[0]->def); 3517 dest->elems[1]->def = nir_channels(&b->nb, result, 3518 (nir_component_mask_t) 3519 BITFIELD_MASK(res_type_size)); 3520 vtn_push_ssa_value(b, w[2], dest); 3521 } else { 3522 vtn_push_nir_ssa(b, w[2], result); 3523 } 3524 3525 if (opcode == SpvOpImageRead || opcode == SpvOpImageSparseRead) { 3526 nir_alu_type dest_type = 3527 get_image_type(b, nir_get_nir_type_for_glsl_type(type->type), operands); 3528 nir_intrinsic_set_dest_type(intrin, dest_type); 3529 } 3530 } else { 3531 nir_builder_instr_insert(&b->nb, &intrin->instr); 3532 } 3533 3534 if (after_semantics) 3535 vtn_emit_memory_barrier(b, scope, after_semantics); 3536} 3537 3538static nir_intrinsic_op 3539get_uniform_nir_atomic_op(struct vtn_builder *b, SpvOp opcode) 3540{ 3541 switch (opcode) { 3542#define OP(S, N) case SpvOp##S: return nir_intrinsic_atomic_counter_ ##N; 3543 OP(AtomicLoad, read_deref) 3544 OP(AtomicExchange, exchange) 3545 OP(AtomicCompareExchange, comp_swap) 3546 OP(AtomicCompareExchangeWeak, comp_swap) 3547 OP(AtomicIIncrement, inc_deref) 3548 OP(AtomicIDecrement, post_dec_deref) 3549 OP(AtomicIAdd, add_deref) 3550 OP(AtomicISub, add_deref) 3551 OP(AtomicUMin, min_deref) 3552 OP(AtomicUMax, max_deref) 3553 OP(AtomicAnd, and_deref) 3554 OP(AtomicOr, or_deref) 3555 OP(AtomicXor, xor_deref) 3556#undef OP 3557 default: 3558 /* We left the following out: AtomicStore, AtomicSMin and 3559 * AtomicSmax. Right now there are not nir intrinsics for them. At this 3560 * moment Atomic Counter support is needed for ARB_spirv support, so is 3561 * only need to support GLSL Atomic Counters that are uints and don't 3562 * allow direct storage. 3563 */ 3564 vtn_fail("Invalid uniform atomic"); 3565 } 3566} 3567 3568static nir_intrinsic_op 3569get_deref_nir_atomic_op(struct vtn_builder *b, SpvOp opcode) 3570{ 3571 switch (opcode) { 3572 case SpvOpAtomicLoad: return nir_intrinsic_load_deref; 3573 case SpvOpAtomicFlagClear: 3574 case SpvOpAtomicStore: return nir_intrinsic_store_deref; 3575#define OP(S, N) case SpvOp##S: return nir_intrinsic_deref_##N; 3576 OP(AtomicExchange, atomic_exchange) 3577 OP(AtomicCompareExchange, atomic_comp_swap) 3578 OP(AtomicCompareExchangeWeak, atomic_comp_swap) 3579 OP(AtomicIIncrement, atomic_add) 3580 OP(AtomicIDecrement, atomic_add) 3581 OP(AtomicIAdd, atomic_add) 3582 OP(AtomicISub, atomic_add) 3583 OP(AtomicSMin, atomic_imin) 3584 OP(AtomicUMin, atomic_umin) 3585 OP(AtomicSMax, atomic_imax) 3586 OP(AtomicUMax, atomic_umax) 3587 OP(AtomicAnd, atomic_and) 3588 OP(AtomicOr, atomic_or) 3589 OP(AtomicXor, atomic_xor) 3590 OP(AtomicFAddEXT, atomic_fadd) 3591 OP(AtomicFMinEXT, atomic_fmin) 3592 OP(AtomicFMaxEXT, atomic_fmax) 3593 OP(AtomicFlagTestAndSet, atomic_comp_swap) 3594#undef OP 3595 default: 3596 vtn_fail_with_opcode("Invalid shared atomic", opcode); 3597 } 3598} 3599 3600/* 3601 * Handles shared atomics, ssbo atomics and atomic counters. 3602 */ 3603static void 3604vtn_handle_atomics(struct vtn_builder *b, SpvOp opcode, 3605 const uint32_t *w, UNUSED unsigned count) 3606{ 3607 struct vtn_pointer *ptr; 3608 nir_intrinsic_instr *atomic; 3609 3610 SpvScope scope = SpvScopeInvocation; 3611 SpvMemorySemanticsMask semantics = 0; 3612 enum gl_access_qualifier access = 0; 3613 3614 switch (opcode) { 3615 case SpvOpAtomicLoad: 3616 case SpvOpAtomicExchange: 3617 case SpvOpAtomicCompareExchange: 3618 case SpvOpAtomicCompareExchangeWeak: 3619 case SpvOpAtomicIIncrement: 3620 case SpvOpAtomicIDecrement: 3621 case SpvOpAtomicIAdd: 3622 case SpvOpAtomicISub: 3623 case SpvOpAtomicSMin: 3624 case SpvOpAtomicUMin: 3625 case SpvOpAtomicSMax: 3626 case SpvOpAtomicUMax: 3627 case SpvOpAtomicAnd: 3628 case SpvOpAtomicOr: 3629 case SpvOpAtomicXor: 3630 case SpvOpAtomicFAddEXT: 3631 case SpvOpAtomicFMinEXT: 3632 case SpvOpAtomicFMaxEXT: 3633 case SpvOpAtomicFlagTestAndSet: 3634 ptr = vtn_pointer(b, w[3]); 3635 scope = vtn_constant_uint(b, w[4]); 3636 semantics = vtn_constant_uint(b, w[5]); 3637 break; 3638 case SpvOpAtomicFlagClear: 3639 case SpvOpAtomicStore: 3640 ptr = vtn_pointer(b, w[1]); 3641 scope = vtn_constant_uint(b, w[2]); 3642 semantics = vtn_constant_uint(b, w[3]); 3643 break; 3644 3645 default: 3646 vtn_fail_with_opcode("Invalid SPIR-V atomic", opcode); 3647 } 3648 3649 if (semantics & SpvMemorySemanticsVolatileMask) 3650 access |= ACCESS_VOLATILE; 3651 3652 /* uniform as "atomic counter uniform" */ 3653 if (ptr->mode == vtn_variable_mode_atomic_counter) { 3654 nir_deref_instr *deref = vtn_pointer_to_deref(b, ptr); 3655 nir_intrinsic_op op = get_uniform_nir_atomic_op(b, opcode); 3656 atomic = nir_intrinsic_instr_create(b->nb.shader, op); 3657 atomic->src[0] = nir_src_for_ssa(&deref->dest.ssa); 3658 3659 /* SSBO needs to initialize index/offset. In this case we don't need to, 3660 * as that info is already stored on the ptr->var->var nir_variable (see 3661 * vtn_create_variable) 3662 */ 3663 3664 switch (opcode) { 3665 case SpvOpAtomicLoad: 3666 case SpvOpAtomicExchange: 3667 case SpvOpAtomicCompareExchange: 3668 case SpvOpAtomicCompareExchangeWeak: 3669 case SpvOpAtomicIIncrement: 3670 case SpvOpAtomicIDecrement: 3671 case SpvOpAtomicIAdd: 3672 case SpvOpAtomicISub: 3673 case SpvOpAtomicSMin: 3674 case SpvOpAtomicUMin: 3675 case SpvOpAtomicSMax: 3676 case SpvOpAtomicUMax: 3677 case SpvOpAtomicAnd: 3678 case SpvOpAtomicOr: 3679 case SpvOpAtomicXor: 3680 /* Nothing: we don't need to call fill_common_atomic_sources here, as 3681 * atomic counter uniforms doesn't have sources 3682 */ 3683 break; 3684 3685 default: 3686 unreachable("Invalid SPIR-V atomic"); 3687 3688 } 3689 } else { 3690 nir_deref_instr *deref = vtn_pointer_to_deref(b, ptr); 3691 const struct glsl_type *deref_type = deref->type; 3692 nir_intrinsic_op op = get_deref_nir_atomic_op(b, opcode); 3693 atomic = nir_intrinsic_instr_create(b->nb.shader, op); 3694 atomic->src[0] = nir_src_for_ssa(&deref->dest.ssa); 3695 3696 if (ptr->mode != vtn_variable_mode_workgroup) 3697 access |= ACCESS_COHERENT; 3698 3699 nir_intrinsic_set_access(atomic, access); 3700 3701 switch (opcode) { 3702 case SpvOpAtomicLoad: 3703 atomic->num_components = glsl_get_vector_elements(deref_type); 3704 break; 3705 3706 case SpvOpAtomicStore: 3707 atomic->num_components = glsl_get_vector_elements(deref_type); 3708 nir_intrinsic_set_write_mask(atomic, (1 << atomic->num_components) - 1); 3709 atomic->src[1] = nir_src_for_ssa(vtn_get_nir_ssa(b, w[4])); 3710 break; 3711 3712 case SpvOpAtomicFlagClear: 3713 atomic->num_components = 1; 3714 nir_intrinsic_set_write_mask(atomic, 1); 3715 atomic->src[1] = nir_src_for_ssa(nir_imm_intN_t(&b->nb, 0, 32)); 3716 break; 3717 case SpvOpAtomicFlagTestAndSet: 3718 atomic->src[1] = nir_src_for_ssa(nir_imm_intN_t(&b->nb, 0, 32)); 3719 atomic->src[2] = nir_src_for_ssa(nir_imm_intN_t(&b->nb, -1, 32)); 3720 break; 3721 case SpvOpAtomicExchange: 3722 case SpvOpAtomicCompareExchange: 3723 case SpvOpAtomicCompareExchangeWeak: 3724 case SpvOpAtomicIIncrement: 3725 case SpvOpAtomicIDecrement: 3726 case SpvOpAtomicIAdd: 3727 case SpvOpAtomicISub: 3728 case SpvOpAtomicSMin: 3729 case SpvOpAtomicUMin: 3730 case SpvOpAtomicSMax: 3731 case SpvOpAtomicUMax: 3732 case SpvOpAtomicAnd: 3733 case SpvOpAtomicOr: 3734 case SpvOpAtomicXor: 3735 case SpvOpAtomicFAddEXT: 3736 case SpvOpAtomicFMinEXT: 3737 case SpvOpAtomicFMaxEXT: 3738 fill_common_atomic_sources(b, opcode, w, &atomic->src[1]); 3739 break; 3740 3741 default: 3742 vtn_fail_with_opcode("Invalid SPIR-V atomic", opcode); 3743 } 3744 } 3745 3746 /* Atomic ordering operations will implicitly apply to the atomic operation 3747 * storage class, so include that too. 3748 */ 3749 semantics |= vtn_mode_to_memory_semantics(ptr->mode); 3750 3751 SpvMemorySemanticsMask before_semantics; 3752 SpvMemorySemanticsMask after_semantics; 3753 vtn_split_barrier_semantics(b, semantics, &before_semantics, &after_semantics); 3754 3755 if (before_semantics) 3756 vtn_emit_memory_barrier(b, scope, before_semantics); 3757 3758 if (opcode != SpvOpAtomicStore && opcode != SpvOpAtomicFlagClear) { 3759 struct vtn_type *type = vtn_get_type(b, w[1]); 3760 3761 if (opcode == SpvOpAtomicFlagTestAndSet) { 3762 /* map atomic flag to a 32-bit atomic integer. */ 3763 nir_ssa_dest_init(&atomic->instr, &atomic->dest, 3764 1, 32, NULL); 3765 } else { 3766 nir_ssa_dest_init(&atomic->instr, &atomic->dest, 3767 glsl_get_vector_elements(type->type), 3768 glsl_get_bit_size(type->type), NULL); 3769 3770 vtn_push_nir_ssa(b, w[2], &atomic->dest.ssa); 3771 } 3772 } 3773 3774 nir_builder_instr_insert(&b->nb, &atomic->instr); 3775 3776 if (opcode == SpvOpAtomicFlagTestAndSet) { 3777 vtn_push_nir_ssa(b, w[2], nir_i2b1(&b->nb, &atomic->dest.ssa)); 3778 } 3779 if (after_semantics) 3780 vtn_emit_memory_barrier(b, scope, after_semantics); 3781} 3782 3783static nir_alu_instr * 3784create_vec(struct vtn_builder *b, unsigned num_components, unsigned bit_size) 3785{ 3786 nir_op op = nir_op_vec(num_components); 3787 nir_alu_instr *vec = nir_alu_instr_create(b->shader, op); 3788 nir_ssa_dest_init(&vec->instr, &vec->dest.dest, num_components, 3789 bit_size, NULL); 3790 vec->dest.write_mask = (1 << num_components) - 1; 3791 3792 return vec; 3793} 3794 3795struct vtn_ssa_value * 3796vtn_ssa_transpose(struct vtn_builder *b, struct vtn_ssa_value *src) 3797{ 3798 if (src->transposed) 3799 return src->transposed; 3800 3801 struct vtn_ssa_value *dest = 3802 vtn_create_ssa_value(b, glsl_transposed_type(src->type)); 3803 3804 for (unsigned i = 0; i < glsl_get_matrix_columns(dest->type); i++) { 3805 nir_alu_instr *vec = create_vec(b, glsl_get_matrix_columns(src->type), 3806 glsl_get_bit_size(src->type)); 3807 if (glsl_type_is_vector_or_scalar(src->type)) { 3808 vec->src[0].src = nir_src_for_ssa(src->def); 3809 vec->src[0].swizzle[0] = i; 3810 } else { 3811 for (unsigned j = 0; j < glsl_get_matrix_columns(src->type); j++) { 3812 vec->src[j].src = nir_src_for_ssa(src->elems[j]->def); 3813 vec->src[j].swizzle[0] = i; 3814 } 3815 } 3816 nir_builder_instr_insert(&b->nb, &vec->instr); 3817 dest->elems[i]->def = &vec->dest.dest.ssa; 3818 } 3819 3820 dest->transposed = src; 3821 3822 return dest; 3823} 3824 3825static nir_ssa_def * 3826vtn_vector_shuffle(struct vtn_builder *b, unsigned num_components, 3827 nir_ssa_def *src0, nir_ssa_def *src1, 3828 const uint32_t *indices) 3829{ 3830 nir_alu_instr *vec = create_vec(b, num_components, src0->bit_size); 3831 3832 for (unsigned i = 0; i < num_components; i++) { 3833 uint32_t index = indices[i]; 3834 if (index == 0xffffffff) { 3835 vec->src[i].src = 3836 nir_src_for_ssa(nir_ssa_undef(&b->nb, 1, src0->bit_size)); 3837 } else if (index < src0->num_components) { 3838 vec->src[i].src = nir_src_for_ssa(src0); 3839 vec->src[i].swizzle[0] = index; 3840 } else { 3841 vec->src[i].src = nir_src_for_ssa(src1); 3842 vec->src[i].swizzle[0] = index - src0->num_components; 3843 } 3844 } 3845 3846 nir_builder_instr_insert(&b->nb, &vec->instr); 3847 3848 return &vec->dest.dest.ssa; 3849} 3850 3851/* 3852 * Concatentates a number of vectors/scalars together to produce a vector 3853 */ 3854static nir_ssa_def * 3855vtn_vector_construct(struct vtn_builder *b, unsigned num_components, 3856 unsigned num_srcs, nir_ssa_def **srcs) 3857{ 3858 nir_alu_instr *vec = create_vec(b, num_components, srcs[0]->bit_size); 3859 3860 /* From the SPIR-V 1.1 spec for OpCompositeConstruct: 3861 * 3862 * "When constructing a vector, there must be at least two Constituent 3863 * operands." 3864 */ 3865 vtn_assert(num_srcs >= 2); 3866 3867 unsigned dest_idx = 0; 3868 for (unsigned i = 0; i < num_srcs; i++) { 3869 nir_ssa_def *src = srcs[i]; 3870 vtn_assert(dest_idx + src->num_components <= num_components); 3871 for (unsigned j = 0; j < src->num_components; j++) { 3872 vec->src[dest_idx].src = nir_src_for_ssa(src); 3873 vec->src[dest_idx].swizzle[0] = j; 3874 dest_idx++; 3875 } 3876 } 3877 3878 /* From the SPIR-V 1.1 spec for OpCompositeConstruct: 3879 * 3880 * "When constructing a vector, the total number of components in all 3881 * the operands must equal the number of components in Result Type." 3882 */ 3883 vtn_assert(dest_idx == num_components); 3884 3885 nir_builder_instr_insert(&b->nb, &vec->instr); 3886 3887 return &vec->dest.dest.ssa; 3888} 3889 3890static struct vtn_ssa_value * 3891vtn_composite_copy(void *mem_ctx, struct vtn_ssa_value *src) 3892{ 3893 struct vtn_ssa_value *dest = rzalloc(mem_ctx, struct vtn_ssa_value); 3894 dest->type = src->type; 3895 3896 if (glsl_type_is_vector_or_scalar(src->type)) { 3897 dest->def = src->def; 3898 } else { 3899 unsigned elems = glsl_get_length(src->type); 3900 3901 dest->elems = ralloc_array(mem_ctx, struct vtn_ssa_value *, elems); 3902 for (unsigned i = 0; i < elems; i++) 3903 dest->elems[i] = vtn_composite_copy(mem_ctx, src->elems[i]); 3904 } 3905 3906 return dest; 3907} 3908 3909static struct vtn_ssa_value * 3910vtn_composite_insert(struct vtn_builder *b, struct vtn_ssa_value *src, 3911 struct vtn_ssa_value *insert, const uint32_t *indices, 3912 unsigned num_indices) 3913{ 3914 struct vtn_ssa_value *dest = vtn_composite_copy(b, src); 3915 3916 struct vtn_ssa_value *cur = dest; 3917 unsigned i; 3918 for (i = 0; i < num_indices - 1; i++) { 3919 /* If we got a vector here, that means the next index will be trying to 3920 * dereference a scalar. 3921 */ 3922 vtn_fail_if(glsl_type_is_vector_or_scalar(cur->type), 3923 "OpCompositeInsert has too many indices."); 3924 vtn_fail_if(indices[i] >= glsl_get_length(cur->type), 3925 "All indices in an OpCompositeInsert must be in-bounds"); 3926 cur = cur->elems[indices[i]]; 3927 } 3928 3929 if (glsl_type_is_vector_or_scalar(cur->type)) { 3930 vtn_fail_if(indices[i] >= glsl_get_vector_elements(cur->type), 3931 "All indices in an OpCompositeInsert must be in-bounds"); 3932 3933 /* According to the SPIR-V spec, OpCompositeInsert may work down to 3934 * the component granularity. In that case, the last index will be 3935 * the index to insert the scalar into the vector. 3936 */ 3937 3938 cur->def = nir_vector_insert_imm(&b->nb, cur->def, insert->def, indices[i]); 3939 } else { 3940 vtn_fail_if(indices[i] >= glsl_get_length(cur->type), 3941 "All indices in an OpCompositeInsert must be in-bounds"); 3942 cur->elems[indices[i]] = insert; 3943 } 3944 3945 return dest; 3946} 3947 3948static struct vtn_ssa_value * 3949vtn_composite_extract(struct vtn_builder *b, struct vtn_ssa_value *src, 3950 const uint32_t *indices, unsigned num_indices) 3951{ 3952 struct vtn_ssa_value *cur = src; 3953 for (unsigned i = 0; i < num_indices; i++) { 3954 if (glsl_type_is_vector_or_scalar(cur->type)) { 3955 vtn_assert(i == num_indices - 1); 3956 vtn_fail_if(indices[i] >= glsl_get_vector_elements(cur->type), 3957 "All indices in an OpCompositeExtract must be in-bounds"); 3958 3959 /* According to the SPIR-V spec, OpCompositeExtract may work down to 3960 * the component granularity. The last index will be the index of the 3961 * vector to extract. 3962 */ 3963 3964 const struct glsl_type *scalar_type = 3965 glsl_scalar_type(glsl_get_base_type(cur->type)); 3966 struct vtn_ssa_value *ret = vtn_create_ssa_value(b, scalar_type); 3967 ret->def = nir_channel(&b->nb, cur->def, indices[i]); 3968 return ret; 3969 } else { 3970 vtn_fail_if(indices[i] >= glsl_get_length(cur->type), 3971 "All indices in an OpCompositeExtract must be in-bounds"); 3972 cur = cur->elems[indices[i]]; 3973 } 3974 } 3975 3976 return cur; 3977} 3978 3979static void 3980vtn_handle_composite(struct vtn_builder *b, SpvOp opcode, 3981 const uint32_t *w, unsigned count) 3982{ 3983 struct vtn_type *type = vtn_get_type(b, w[1]); 3984 struct vtn_ssa_value *ssa = vtn_create_ssa_value(b, type->type); 3985 3986 switch (opcode) { 3987 case SpvOpVectorExtractDynamic: 3988 ssa->def = nir_vector_extract(&b->nb, vtn_get_nir_ssa(b, w[3]), 3989 vtn_get_nir_ssa(b, w[4])); 3990 break; 3991 3992 case SpvOpVectorInsertDynamic: 3993 ssa->def = nir_vector_insert(&b->nb, vtn_get_nir_ssa(b, w[3]), 3994 vtn_get_nir_ssa(b, w[4]), 3995 vtn_get_nir_ssa(b, w[5])); 3996 break; 3997 3998 case SpvOpVectorShuffle: 3999 ssa->def = vtn_vector_shuffle(b, glsl_get_vector_elements(type->type), 4000 vtn_get_nir_ssa(b, w[3]), 4001 vtn_get_nir_ssa(b, w[4]), 4002 w + 5); 4003 break; 4004 4005 case SpvOpCompositeConstruct: { 4006 unsigned elems = count - 3; 4007 assume(elems >= 1); 4008 if (glsl_type_is_vector_or_scalar(type->type)) { 4009 nir_ssa_def *srcs[NIR_MAX_VEC_COMPONENTS]; 4010 for (unsigned i = 0; i < elems; i++) 4011 srcs[i] = vtn_get_nir_ssa(b, w[3 + i]); 4012 ssa->def = 4013 vtn_vector_construct(b, glsl_get_vector_elements(type->type), 4014 elems, srcs); 4015 } else { 4016 ssa->elems = ralloc_array(b, struct vtn_ssa_value *, elems); 4017 for (unsigned i = 0; i < elems; i++) 4018 ssa->elems[i] = vtn_ssa_value(b, w[3 + i]); 4019 } 4020 break; 4021 } 4022 case SpvOpCompositeExtract: 4023 ssa = vtn_composite_extract(b, vtn_ssa_value(b, w[3]), 4024 w + 4, count - 4); 4025 break; 4026 4027 case SpvOpCompositeInsert: 4028 ssa = vtn_composite_insert(b, vtn_ssa_value(b, w[4]), 4029 vtn_ssa_value(b, w[3]), 4030 w + 5, count - 5); 4031 break; 4032 4033 case SpvOpCopyLogical: 4034 ssa = vtn_composite_copy(b, vtn_ssa_value(b, w[3])); 4035 break; 4036 case SpvOpCopyObject: 4037 vtn_copy_value(b, w[3], w[2]); 4038 return; 4039 4040 default: 4041 vtn_fail_with_opcode("unknown composite operation", opcode); 4042 } 4043 4044 vtn_push_ssa_value(b, w[2], ssa); 4045} 4046 4047void 4048vtn_emit_memory_barrier(struct vtn_builder *b, SpvScope scope, 4049 SpvMemorySemanticsMask semantics) 4050{ 4051 if (b->shader->options->use_scoped_barrier) { 4052 vtn_emit_scoped_memory_barrier(b, scope, semantics); 4053 return; 4054 } 4055 4056 static const SpvMemorySemanticsMask all_memory_semantics = 4057 SpvMemorySemanticsUniformMemoryMask | 4058 SpvMemorySemanticsWorkgroupMemoryMask | 4059 SpvMemorySemanticsAtomicCounterMemoryMask | 4060 SpvMemorySemanticsImageMemoryMask | 4061 SpvMemorySemanticsOutputMemoryMask; 4062 4063 /* If we're not actually doing a memory barrier, bail */ 4064 if (!(semantics & all_memory_semantics)) 4065 return; 4066 4067 /* GL and Vulkan don't have these */ 4068 vtn_assert(scope != SpvScopeCrossDevice); 4069 4070 if (scope == SpvScopeSubgroup) 4071 return; /* Nothing to do here */ 4072 4073 if (scope == SpvScopeWorkgroup) { 4074 nir_group_memory_barrier(&b->nb); 4075 return; 4076 } 4077 4078 /* There's only two scopes thing left */ 4079 vtn_assert(scope == SpvScopeInvocation || scope == SpvScopeDevice); 4080 4081 /* Map the GLSL memoryBarrier() construct and any barriers with more than one 4082 * semantic to the corresponding NIR one. 4083 */ 4084 if (util_bitcount(semantics & all_memory_semantics) > 1) { 4085 nir_memory_barrier(&b->nb); 4086 if (semantics & SpvMemorySemanticsOutputMemoryMask) { 4087 /* GLSL memoryBarrier() (and the corresponding NIR one) doesn't include 4088 * TCS outputs, so we have to emit it's own intrinsic for that. We 4089 * then need to emit another memory_barrier to prevent moving 4090 * non-output operations to before the tcs_patch barrier. 4091 */ 4092 nir_memory_barrier_tcs_patch(&b->nb); 4093 nir_memory_barrier(&b->nb); 4094 } 4095 return; 4096 } 4097 4098 /* Issue a more specific barrier */ 4099 switch (semantics & all_memory_semantics) { 4100 case SpvMemorySemanticsUniformMemoryMask: 4101 nir_memory_barrier_buffer(&b->nb); 4102 break; 4103 case SpvMemorySemanticsWorkgroupMemoryMask: 4104 nir_memory_barrier_shared(&b->nb); 4105 break; 4106 case SpvMemorySemanticsAtomicCounterMemoryMask: 4107 nir_memory_barrier_atomic_counter(&b->nb); 4108 break; 4109 case SpvMemorySemanticsImageMemoryMask: 4110 nir_memory_barrier_image(&b->nb); 4111 break; 4112 case SpvMemorySemanticsOutputMemoryMask: 4113 if (b->nb.shader->info.stage == MESA_SHADER_TESS_CTRL) 4114 nir_memory_barrier_tcs_patch(&b->nb); 4115 break; 4116 default: 4117 break; 4118 } 4119} 4120 4121static void 4122vtn_handle_barrier(struct vtn_builder *b, SpvOp opcode, 4123 const uint32_t *w, UNUSED unsigned count) 4124{ 4125 switch (opcode) { 4126 case SpvOpEmitVertex: 4127 case SpvOpEmitStreamVertex: 4128 case SpvOpEndPrimitive: 4129 case SpvOpEndStreamPrimitive: { 4130 unsigned stream = 0; 4131 if (opcode == SpvOpEmitStreamVertex || opcode == SpvOpEndStreamPrimitive) 4132 stream = vtn_constant_uint(b, w[1]); 4133 4134 switch (opcode) { 4135 case SpvOpEmitStreamVertex: 4136 case SpvOpEmitVertex: 4137 nir_emit_vertex(&b->nb, stream); 4138 break; 4139 case SpvOpEndPrimitive: 4140 case SpvOpEndStreamPrimitive: 4141 nir_end_primitive(&b->nb, stream); 4142 break; 4143 default: 4144 unreachable("Invalid opcode"); 4145 } 4146 break; 4147 } 4148 4149 case SpvOpMemoryBarrier: { 4150 SpvScope scope = vtn_constant_uint(b, w[1]); 4151 SpvMemorySemanticsMask semantics = vtn_constant_uint(b, w[2]); 4152 vtn_emit_memory_barrier(b, scope, semantics); 4153 return; 4154 } 4155 4156 case SpvOpControlBarrier: { 4157 SpvScope execution_scope = vtn_constant_uint(b, w[1]); 4158 SpvScope memory_scope = vtn_constant_uint(b, w[2]); 4159 SpvMemorySemanticsMask memory_semantics = vtn_constant_uint(b, w[3]); 4160 4161 /* GLSLang, prior to commit 8297936dd6eb3, emitted OpControlBarrier with 4162 * memory semantics of None for GLSL barrier(). 4163 * And before that, prior to c3f1cdfa, emitted the OpControlBarrier with 4164 * Device instead of Workgroup for execution scope. 4165 */ 4166 if (b->wa_glslang_cs_barrier && 4167 b->nb.shader->info.stage == MESA_SHADER_COMPUTE && 4168 (execution_scope == SpvScopeWorkgroup || 4169 execution_scope == SpvScopeDevice) && 4170 memory_semantics == SpvMemorySemanticsMaskNone) { 4171 execution_scope = SpvScopeWorkgroup; 4172 memory_scope = SpvScopeWorkgroup; 4173 memory_semantics = SpvMemorySemanticsAcquireReleaseMask | 4174 SpvMemorySemanticsWorkgroupMemoryMask; 4175 } 4176 4177 /* From the SPIR-V spec: 4178 * 4179 * "When used with the TessellationControl execution model, it also 4180 * implicitly synchronizes the Output Storage Class: Writes to Output 4181 * variables performed by any invocation executed prior to a 4182 * OpControlBarrier will be visible to any other invocation after 4183 * return from that OpControlBarrier." 4184 * 4185 * The same applies to VK_NV_mesh_shader. 4186 */ 4187 if (b->nb.shader->info.stage == MESA_SHADER_TESS_CTRL || 4188 b->nb.shader->info.stage == MESA_SHADER_TASK || 4189 b->nb.shader->info.stage == MESA_SHADER_MESH) { 4190 memory_semantics &= ~(SpvMemorySemanticsAcquireMask | 4191 SpvMemorySemanticsReleaseMask | 4192 SpvMemorySemanticsAcquireReleaseMask | 4193 SpvMemorySemanticsSequentiallyConsistentMask); 4194 memory_semantics |= SpvMemorySemanticsAcquireReleaseMask | 4195 SpvMemorySemanticsOutputMemoryMask; 4196 } 4197 4198 if (b->shader->options->use_scoped_barrier) { 4199 vtn_emit_scoped_control_barrier(b, execution_scope, memory_scope, 4200 memory_semantics); 4201 } else { 4202 vtn_emit_memory_barrier(b, memory_scope, memory_semantics); 4203 4204 if (execution_scope == SpvScopeWorkgroup) 4205 nir_control_barrier(&b->nb); 4206 } 4207 break; 4208 } 4209 4210 default: 4211 unreachable("unknown barrier instruction"); 4212 } 4213} 4214 4215static unsigned 4216gl_primitive_from_spv_execution_mode(struct vtn_builder *b, 4217 SpvExecutionMode mode) 4218{ 4219 switch (mode) { 4220 case SpvExecutionModeInputPoints: 4221 case SpvExecutionModeOutputPoints: 4222 return 0; /* GL_POINTS */ 4223 case SpvExecutionModeInputLines: 4224 case SpvExecutionModeOutputLinesNV: 4225 return 1; /* GL_LINES */ 4226 case SpvExecutionModeInputLinesAdjacency: 4227 return 0x000A; /* GL_LINE_STRIP_ADJACENCY_ARB */ 4228 case SpvExecutionModeTriangles: 4229 case SpvExecutionModeOutputTrianglesNV: 4230 return 4; /* GL_TRIANGLES */ 4231 case SpvExecutionModeInputTrianglesAdjacency: 4232 return 0x000C; /* GL_TRIANGLES_ADJACENCY_ARB */ 4233 case SpvExecutionModeQuads: 4234 return 7; /* GL_QUADS */ 4235 case SpvExecutionModeIsolines: 4236 return 0x8E7A; /* GL_ISOLINES */ 4237 case SpvExecutionModeOutputLineStrip: 4238 return 3; /* GL_LINE_STRIP */ 4239 case SpvExecutionModeOutputTriangleStrip: 4240 return 5; /* GL_TRIANGLE_STRIP */ 4241 default: 4242 vtn_fail("Invalid primitive type: %s (%u)", 4243 spirv_executionmode_to_string(mode), mode); 4244 } 4245} 4246 4247static unsigned 4248vertices_in_from_spv_execution_mode(struct vtn_builder *b, 4249 SpvExecutionMode mode) 4250{ 4251 switch (mode) { 4252 case SpvExecutionModeInputPoints: 4253 return 1; 4254 case SpvExecutionModeInputLines: 4255 return 2; 4256 case SpvExecutionModeInputLinesAdjacency: 4257 return 4; 4258 case SpvExecutionModeTriangles: 4259 return 3; 4260 case SpvExecutionModeInputTrianglesAdjacency: 4261 return 6; 4262 default: 4263 vtn_fail("Invalid GS input mode: %s (%u)", 4264 spirv_executionmode_to_string(mode), mode); 4265 } 4266} 4267 4268static gl_shader_stage 4269stage_for_execution_model(struct vtn_builder *b, SpvExecutionModel model) 4270{ 4271 switch (model) { 4272 case SpvExecutionModelVertex: 4273 return MESA_SHADER_VERTEX; 4274 case SpvExecutionModelTessellationControl: 4275 return MESA_SHADER_TESS_CTRL; 4276 case SpvExecutionModelTessellationEvaluation: 4277 return MESA_SHADER_TESS_EVAL; 4278 case SpvExecutionModelGeometry: 4279 return MESA_SHADER_GEOMETRY; 4280 case SpvExecutionModelFragment: 4281 return MESA_SHADER_FRAGMENT; 4282 case SpvExecutionModelGLCompute: 4283 return MESA_SHADER_COMPUTE; 4284 case SpvExecutionModelKernel: 4285 return MESA_SHADER_KERNEL; 4286 case SpvExecutionModelRayGenerationKHR: 4287 return MESA_SHADER_RAYGEN; 4288 case SpvExecutionModelAnyHitKHR: 4289 return MESA_SHADER_ANY_HIT; 4290 case SpvExecutionModelClosestHitKHR: 4291 return MESA_SHADER_CLOSEST_HIT; 4292 case SpvExecutionModelMissKHR: 4293 return MESA_SHADER_MISS; 4294 case SpvExecutionModelIntersectionKHR: 4295 return MESA_SHADER_INTERSECTION; 4296 case SpvExecutionModelCallableKHR: 4297 return MESA_SHADER_CALLABLE; 4298 case SpvExecutionModelTaskNV: 4299 return MESA_SHADER_TASK; 4300 case SpvExecutionModelMeshNV: 4301 return MESA_SHADER_MESH; 4302 default: 4303 vtn_fail("Unsupported execution model: %s (%u)", 4304 spirv_executionmodel_to_string(model), model); 4305 } 4306} 4307 4308#define spv_check_supported(name, cap) do { \ 4309 if (!(b->options && b->options->caps.name)) \ 4310 vtn_warn("Unsupported SPIR-V capability: %s (%u)", \ 4311 spirv_capability_to_string(cap), cap); \ 4312 } while(0) 4313 4314 4315void 4316vtn_handle_entry_point(struct vtn_builder *b, const uint32_t *w, 4317 unsigned count) 4318{ 4319 struct vtn_value *entry_point = &b->values[w[2]]; 4320 /* Let this be a name label regardless */ 4321 unsigned name_words; 4322 entry_point->name = vtn_string_literal(b, &w[3], count - 3, &name_words); 4323 4324 if (strcmp(entry_point->name, b->entry_point_name) != 0 || 4325 stage_for_execution_model(b, w[1]) != b->entry_point_stage) 4326 return; 4327 4328 vtn_assert(b->entry_point == NULL); 4329 b->entry_point = entry_point; 4330 4331 /* Entry points enumerate which global variables are used. */ 4332 size_t start = 3 + name_words; 4333 b->interface_ids_count = count - start; 4334 b->interface_ids = ralloc_array(b, uint32_t, b->interface_ids_count); 4335 memcpy(b->interface_ids, &w[start], b->interface_ids_count * 4); 4336 qsort(b->interface_ids, b->interface_ids_count, 4, cmp_uint32_t); 4337} 4338 4339static bool 4340vtn_handle_preamble_instruction(struct vtn_builder *b, SpvOp opcode, 4341 const uint32_t *w, unsigned count) 4342{ 4343 switch (opcode) { 4344 case SpvOpSource: { 4345 const char *lang; 4346 switch (w[1]) { 4347 default: 4348 case SpvSourceLanguageUnknown: lang = "unknown"; break; 4349 case SpvSourceLanguageESSL: lang = "ESSL"; break; 4350 case SpvSourceLanguageGLSL: lang = "GLSL"; break; 4351 case SpvSourceLanguageOpenCL_C: lang = "OpenCL C"; break; 4352 case SpvSourceLanguageOpenCL_CPP: lang = "OpenCL C++"; break; 4353 case SpvSourceLanguageHLSL: lang = "HLSL"; break; 4354 } 4355 4356 uint32_t version = w[2]; 4357 4358 const char *file = 4359 (count > 3) ? vtn_value(b, w[3], vtn_value_type_string)->str : ""; 4360 4361 vtn_info("Parsing SPIR-V from %s %u source file %s", lang, version, file); 4362 4363 b->source_lang = w[1]; 4364 break; 4365 } 4366 4367 case SpvOpSourceExtension: 4368 case SpvOpSourceContinued: 4369 case SpvOpExtension: 4370 case SpvOpModuleProcessed: 4371 /* Unhandled, but these are for debug so that's ok. */ 4372 break; 4373 4374 case SpvOpCapability: { 4375 SpvCapability cap = w[1]; 4376 switch (cap) { 4377 case SpvCapabilityMatrix: 4378 case SpvCapabilityShader: 4379 case SpvCapabilityGeometry: 4380 case SpvCapabilityGeometryPointSize: 4381 case SpvCapabilityUniformBufferArrayDynamicIndexing: 4382 case SpvCapabilitySampledImageArrayDynamicIndexing: 4383 case SpvCapabilityStorageBufferArrayDynamicIndexing: 4384 case SpvCapabilityStorageImageArrayDynamicIndexing: 4385 case SpvCapabilityImageRect: 4386 case SpvCapabilitySampledRect: 4387 case SpvCapabilitySampled1D: 4388 case SpvCapabilityImage1D: 4389 case SpvCapabilitySampledCubeArray: 4390 case SpvCapabilityImageCubeArray: 4391 case SpvCapabilitySampledBuffer: 4392 case SpvCapabilityImageBuffer: 4393 case SpvCapabilityImageQuery: 4394 case SpvCapabilityDerivativeControl: 4395 case SpvCapabilityInterpolationFunction: 4396 case SpvCapabilityMultiViewport: 4397 case SpvCapabilitySampleRateShading: 4398 case SpvCapabilityClipDistance: 4399 case SpvCapabilityCullDistance: 4400 case SpvCapabilityInputAttachment: 4401 case SpvCapabilityImageGatherExtended: 4402 case SpvCapabilityStorageImageExtendedFormats: 4403 case SpvCapabilityVector16: 4404 case SpvCapabilityDotProductKHR: 4405 case SpvCapabilityDotProductInputAllKHR: 4406 case SpvCapabilityDotProductInput4x8BitKHR: 4407 case SpvCapabilityDotProductInput4x8BitPackedKHR: 4408 break; 4409 4410 case SpvCapabilityLinkage: 4411 if (!b->options->create_library) 4412 vtn_warn("Unsupported SPIR-V capability: %s", 4413 spirv_capability_to_string(cap)); 4414 break; 4415 4416 case SpvCapabilitySparseResidency: 4417 spv_check_supported(sparse_residency, cap); 4418 break; 4419 4420 case SpvCapabilityMinLod: 4421 spv_check_supported(min_lod, cap); 4422 break; 4423 4424 case SpvCapabilityAtomicStorage: 4425 spv_check_supported(atomic_storage, cap); 4426 break; 4427 4428 case SpvCapabilityFloat64: 4429 spv_check_supported(float64, cap); 4430 break; 4431 case SpvCapabilityInt64: 4432 spv_check_supported(int64, cap); 4433 break; 4434 case SpvCapabilityInt16: 4435 spv_check_supported(int16, cap); 4436 break; 4437 case SpvCapabilityInt8: 4438 spv_check_supported(int8, cap); 4439 break; 4440 4441 case SpvCapabilityTransformFeedback: 4442 spv_check_supported(transform_feedback, cap); 4443 break; 4444 4445 case SpvCapabilityGeometryStreams: 4446 spv_check_supported(geometry_streams, cap); 4447 break; 4448 4449 case SpvCapabilityInt64Atomics: 4450 spv_check_supported(int64_atomics, cap); 4451 break; 4452 4453 case SpvCapabilityStorageImageMultisample: 4454 spv_check_supported(storage_image_ms, cap); 4455 break; 4456 4457 case SpvCapabilityAddresses: 4458 spv_check_supported(address, cap); 4459 break; 4460 4461 case SpvCapabilityKernel: 4462 case SpvCapabilityFloat16Buffer: 4463 spv_check_supported(kernel, cap); 4464 break; 4465 4466 case SpvCapabilityGenericPointer: 4467 spv_check_supported(generic_pointers, cap); 4468 break; 4469 4470 case SpvCapabilityImageBasic: 4471 spv_check_supported(kernel_image, cap); 4472 break; 4473 4474 case SpvCapabilityImageReadWrite: 4475 spv_check_supported(kernel_image_read_write, cap); 4476 break; 4477 4478 case SpvCapabilityLiteralSampler: 4479 spv_check_supported(literal_sampler, cap); 4480 break; 4481 4482 case SpvCapabilityImageMipmap: 4483 case SpvCapabilityPipes: 4484 case SpvCapabilityDeviceEnqueue: 4485 vtn_warn("Unsupported OpenCL-style SPIR-V capability: %s", 4486 spirv_capability_to_string(cap)); 4487 break; 4488 4489 case SpvCapabilityImageMSArray: 4490 spv_check_supported(image_ms_array, cap); 4491 break; 4492 4493 case SpvCapabilityTessellation: 4494 case SpvCapabilityTessellationPointSize: 4495 spv_check_supported(tessellation, cap); 4496 break; 4497 4498 case SpvCapabilityDrawParameters: 4499 spv_check_supported(draw_parameters, cap); 4500 break; 4501 4502 case SpvCapabilityStorageImageReadWithoutFormat: 4503 spv_check_supported(image_read_without_format, cap); 4504 break; 4505 4506 case SpvCapabilityStorageImageWriteWithoutFormat: 4507 spv_check_supported(image_write_without_format, cap); 4508 break; 4509 4510 case SpvCapabilityDeviceGroup: 4511 spv_check_supported(device_group, cap); 4512 break; 4513 4514 case SpvCapabilityMultiView: 4515 spv_check_supported(multiview, cap); 4516 break; 4517 4518 case SpvCapabilityGroupNonUniform: 4519 spv_check_supported(subgroup_basic, cap); 4520 break; 4521 4522 case SpvCapabilitySubgroupVoteKHR: 4523 case SpvCapabilityGroupNonUniformVote: 4524 spv_check_supported(subgroup_vote, cap); 4525 break; 4526 4527 case SpvCapabilitySubgroupBallotKHR: 4528 case SpvCapabilityGroupNonUniformBallot: 4529 spv_check_supported(subgroup_ballot, cap); 4530 break; 4531 4532 case SpvCapabilityGroupNonUniformShuffle: 4533 case SpvCapabilityGroupNonUniformShuffleRelative: 4534 spv_check_supported(subgroup_shuffle, cap); 4535 break; 4536 4537 case SpvCapabilityGroupNonUniformQuad: 4538 spv_check_supported(subgroup_quad, cap); 4539 break; 4540 4541 case SpvCapabilityGroupNonUniformArithmetic: 4542 case SpvCapabilityGroupNonUniformClustered: 4543 spv_check_supported(subgroup_arithmetic, cap); 4544 break; 4545 4546 case SpvCapabilityGroups: 4547 spv_check_supported(groups, cap); 4548 break; 4549 4550 case SpvCapabilitySubgroupDispatch: 4551 spv_check_supported(subgroup_dispatch, cap); 4552 /* Missing : 4553 * - SpvOpGetKernelLocalSizeForSubgroupCount 4554 * - SpvOpGetKernelMaxNumSubgroups 4555 * - SpvExecutionModeSubgroupsPerWorkgroup 4556 * - SpvExecutionModeSubgroupsPerWorkgroupId 4557 */ 4558 vtn_warn("Not fully supported capability: %s", 4559 spirv_capability_to_string(cap)); 4560 break; 4561 4562 case SpvCapabilityVariablePointersStorageBuffer: 4563 case SpvCapabilityVariablePointers: 4564 spv_check_supported(variable_pointers, cap); 4565 b->variable_pointers = true; 4566 break; 4567 4568 case SpvCapabilityStorageUniformBufferBlock16: 4569 case SpvCapabilityStorageUniform16: 4570 case SpvCapabilityStoragePushConstant16: 4571 case SpvCapabilityStorageInputOutput16: 4572 spv_check_supported(storage_16bit, cap); 4573 break; 4574 4575 case SpvCapabilityShaderLayer: 4576 case SpvCapabilityShaderViewportIndex: 4577 case SpvCapabilityShaderViewportIndexLayerEXT: 4578 spv_check_supported(shader_viewport_index_layer, cap); 4579 break; 4580 4581 case SpvCapabilityStorageBuffer8BitAccess: 4582 case SpvCapabilityUniformAndStorageBuffer8BitAccess: 4583 case SpvCapabilityStoragePushConstant8: 4584 spv_check_supported(storage_8bit, cap); 4585 break; 4586 4587 case SpvCapabilityShaderNonUniformEXT: 4588 spv_check_supported(descriptor_indexing, cap); 4589 break; 4590 4591 case SpvCapabilityInputAttachmentArrayDynamicIndexingEXT: 4592 case SpvCapabilityUniformTexelBufferArrayDynamicIndexingEXT: 4593 case SpvCapabilityStorageTexelBufferArrayDynamicIndexingEXT: 4594 spv_check_supported(descriptor_array_dynamic_indexing, cap); 4595 break; 4596 4597 case SpvCapabilityUniformBufferArrayNonUniformIndexingEXT: 4598 case SpvCapabilitySampledImageArrayNonUniformIndexingEXT: 4599 case SpvCapabilityStorageBufferArrayNonUniformIndexingEXT: 4600 case SpvCapabilityStorageImageArrayNonUniformIndexingEXT: 4601 case SpvCapabilityInputAttachmentArrayNonUniformIndexingEXT: 4602 case SpvCapabilityUniformTexelBufferArrayNonUniformIndexingEXT: 4603 case SpvCapabilityStorageTexelBufferArrayNonUniformIndexingEXT: 4604 spv_check_supported(descriptor_array_non_uniform_indexing, cap); 4605 break; 4606 4607 case SpvCapabilityRuntimeDescriptorArrayEXT: 4608 spv_check_supported(runtime_descriptor_array, cap); 4609 break; 4610 4611 case SpvCapabilityStencilExportEXT: 4612 spv_check_supported(stencil_export, cap); 4613 break; 4614 4615 case SpvCapabilitySampleMaskPostDepthCoverage: 4616 spv_check_supported(post_depth_coverage, cap); 4617 break; 4618 4619 case SpvCapabilityDenormFlushToZero: 4620 case SpvCapabilityDenormPreserve: 4621 case SpvCapabilitySignedZeroInfNanPreserve: 4622 case SpvCapabilityRoundingModeRTE: 4623 case SpvCapabilityRoundingModeRTZ: 4624 spv_check_supported(float_controls, cap); 4625 break; 4626 4627 case SpvCapabilityPhysicalStorageBufferAddresses: 4628 spv_check_supported(physical_storage_buffer_address, cap); 4629 break; 4630 4631 case SpvCapabilityComputeDerivativeGroupQuadsNV: 4632 case SpvCapabilityComputeDerivativeGroupLinearNV: 4633 spv_check_supported(derivative_group, cap); 4634 break; 4635 4636 case SpvCapabilityFloat16: 4637 spv_check_supported(float16, cap); 4638 break; 4639 4640 case SpvCapabilityFragmentShaderSampleInterlockEXT: 4641 spv_check_supported(fragment_shader_sample_interlock, cap); 4642 break; 4643 4644 case SpvCapabilityFragmentShaderPixelInterlockEXT: 4645 spv_check_supported(fragment_shader_pixel_interlock, cap); 4646 break; 4647 4648 case SpvCapabilityDemoteToHelperInvocationEXT: 4649 spv_check_supported(demote_to_helper_invocation, cap); 4650 b->uses_demote_to_helper_invocation = true; 4651 break; 4652 4653 case SpvCapabilityShaderClockKHR: 4654 spv_check_supported(shader_clock, cap); 4655 break; 4656 4657 case SpvCapabilityVulkanMemoryModel: 4658 spv_check_supported(vk_memory_model, cap); 4659 break; 4660 4661 case SpvCapabilityVulkanMemoryModelDeviceScope: 4662 spv_check_supported(vk_memory_model_device_scope, cap); 4663 break; 4664 4665 case SpvCapabilityImageReadWriteLodAMD: 4666 spv_check_supported(amd_image_read_write_lod, cap); 4667 break; 4668 4669 case SpvCapabilityIntegerFunctions2INTEL: 4670 spv_check_supported(integer_functions2, cap); 4671 break; 4672 4673 case SpvCapabilityFragmentMaskAMD: 4674 spv_check_supported(amd_fragment_mask, cap); 4675 break; 4676 4677 case SpvCapabilityImageGatherBiasLodAMD: 4678 spv_check_supported(amd_image_gather_bias_lod, cap); 4679 break; 4680 4681 case SpvCapabilityAtomicFloat16AddEXT: 4682 spv_check_supported(float16_atomic_add, cap); 4683 break; 4684 4685 case SpvCapabilityAtomicFloat32AddEXT: 4686 spv_check_supported(float32_atomic_add, cap); 4687 break; 4688 4689 case SpvCapabilityAtomicFloat64AddEXT: 4690 spv_check_supported(float64_atomic_add, cap); 4691 break; 4692 4693 case SpvCapabilitySubgroupShuffleINTEL: 4694 spv_check_supported(intel_subgroup_shuffle, cap); 4695 break; 4696 4697 case SpvCapabilitySubgroupBufferBlockIOINTEL: 4698 spv_check_supported(intel_subgroup_buffer_block_io, cap); 4699 break; 4700 4701 case SpvCapabilityRayTracingKHR: 4702 spv_check_supported(ray_tracing, cap); 4703 break; 4704 4705 case SpvCapabilityRayQueryKHR: 4706 spv_check_supported(ray_query, cap); 4707 break; 4708 4709 case SpvCapabilityRayTraversalPrimitiveCullingKHR: 4710 spv_check_supported(ray_traversal_primitive_culling, cap); 4711 break; 4712 4713 case SpvCapabilityInt64ImageEXT: 4714 spv_check_supported(image_atomic_int64, cap); 4715 break; 4716 4717 case SpvCapabilityFragmentShadingRateKHR: 4718 spv_check_supported(fragment_shading_rate, cap); 4719 break; 4720 4721 case SpvCapabilityWorkgroupMemoryExplicitLayoutKHR: 4722 spv_check_supported(workgroup_memory_explicit_layout, cap); 4723 break; 4724 4725 case SpvCapabilityWorkgroupMemoryExplicitLayout8BitAccessKHR: 4726 spv_check_supported(workgroup_memory_explicit_layout, cap); 4727 spv_check_supported(storage_8bit, cap); 4728 break; 4729 4730 case SpvCapabilityWorkgroupMemoryExplicitLayout16BitAccessKHR: 4731 spv_check_supported(workgroup_memory_explicit_layout, cap); 4732 spv_check_supported(storage_16bit, cap); 4733 break; 4734 4735 case SpvCapabilityAtomicFloat16MinMaxEXT: 4736 spv_check_supported(float16_atomic_min_max, cap); 4737 break; 4738 4739 case SpvCapabilityAtomicFloat32MinMaxEXT: 4740 spv_check_supported(float32_atomic_min_max, cap); 4741 break; 4742 4743 case SpvCapabilityAtomicFloat64MinMaxEXT: 4744 spv_check_supported(float64_atomic_min_max, cap); 4745 break; 4746 4747 case SpvCapabilityMeshShadingNV: 4748 spv_check_supported(mesh_shading_nv, cap); 4749 break; 4750 4751 default: 4752 vtn_fail("Unhandled capability: %s (%u)", 4753 spirv_capability_to_string(cap), cap); 4754 } 4755 break; 4756 } 4757 4758 case SpvOpExtInstImport: 4759 vtn_handle_extension(b, opcode, w, count); 4760 break; 4761 4762 case SpvOpMemoryModel: 4763 switch (w[1]) { 4764 case SpvAddressingModelPhysical32: 4765 vtn_fail_if(b->shader->info.stage != MESA_SHADER_KERNEL, 4766 "AddressingModelPhysical32 only supported for kernels"); 4767 b->shader->info.cs.ptr_size = 32; 4768 b->physical_ptrs = true; 4769 assert(nir_address_format_bit_size(b->options->global_addr_format) == 32); 4770 assert(nir_address_format_num_components(b->options->global_addr_format) == 1); 4771 assert(nir_address_format_bit_size(b->options->shared_addr_format) == 32); 4772 assert(nir_address_format_num_components(b->options->shared_addr_format) == 1); 4773 assert(nir_address_format_bit_size(b->options->constant_addr_format) == 32); 4774 assert(nir_address_format_num_components(b->options->constant_addr_format) == 1); 4775 break; 4776 case SpvAddressingModelPhysical64: 4777 vtn_fail_if(b->shader->info.stage != MESA_SHADER_KERNEL, 4778 "AddressingModelPhysical64 only supported for kernels"); 4779 b->shader->info.cs.ptr_size = 64; 4780 b->physical_ptrs = true; 4781 assert(nir_address_format_bit_size(b->options->global_addr_format) == 64); 4782 assert(nir_address_format_num_components(b->options->global_addr_format) == 1); 4783 assert(nir_address_format_bit_size(b->options->shared_addr_format) == 64); 4784 assert(nir_address_format_num_components(b->options->shared_addr_format) == 1); 4785 assert(nir_address_format_bit_size(b->options->constant_addr_format) == 64); 4786 assert(nir_address_format_num_components(b->options->constant_addr_format) == 1); 4787 break; 4788 case SpvAddressingModelLogical: 4789 vtn_fail_if(b->shader->info.stage == MESA_SHADER_KERNEL, 4790 "AddressingModelLogical only supported for shaders"); 4791 b->physical_ptrs = false; 4792 break; 4793 case SpvAddressingModelPhysicalStorageBuffer64: 4794 vtn_fail_if(!b->options || 4795 !b->options->caps.physical_storage_buffer_address, 4796 "AddressingModelPhysicalStorageBuffer64 not supported"); 4797 break; 4798 default: 4799 vtn_fail("Unknown addressing model: %s (%u)", 4800 spirv_addressingmodel_to_string(w[1]), w[1]); 4801 break; 4802 } 4803 4804 b->mem_model = w[2]; 4805 switch (w[2]) { 4806 case SpvMemoryModelSimple: 4807 case SpvMemoryModelGLSL450: 4808 case SpvMemoryModelOpenCL: 4809 break; 4810 case SpvMemoryModelVulkan: 4811 vtn_fail_if(!b->options->caps.vk_memory_model, 4812 "Vulkan memory model is unsupported by this driver"); 4813 break; 4814 default: 4815 vtn_fail("Unsupported memory model: %s", 4816 spirv_memorymodel_to_string(w[2])); 4817 break; 4818 } 4819 break; 4820 4821 case SpvOpEntryPoint: 4822 vtn_handle_entry_point(b, w, count); 4823 break; 4824 4825 case SpvOpString: 4826 vtn_push_value(b, w[1], vtn_value_type_string)->str = 4827 vtn_string_literal(b, &w[2], count - 2, NULL); 4828 break; 4829 4830 case SpvOpName: 4831 b->values[w[1]].name = vtn_string_literal(b, &w[2], count - 2, NULL); 4832 break; 4833 4834 case SpvOpMemberName: 4835 /* TODO */ 4836 break; 4837 4838 case SpvOpExecutionMode: 4839 case SpvOpExecutionModeId: 4840 case SpvOpDecorationGroup: 4841 case SpvOpDecorate: 4842 case SpvOpDecorateId: 4843 case SpvOpMemberDecorate: 4844 case SpvOpGroupDecorate: 4845 case SpvOpGroupMemberDecorate: 4846 case SpvOpDecorateString: 4847 case SpvOpMemberDecorateString: 4848 vtn_handle_decoration(b, opcode, w, count); 4849 break; 4850 4851 case SpvOpExtInst: { 4852 struct vtn_value *val = vtn_value(b, w[3], vtn_value_type_extension); 4853 if (val->ext_handler == vtn_handle_non_semantic_instruction) { 4854 /* NonSemantic extended instructions are acceptable in preamble. */ 4855 vtn_handle_non_semantic_instruction(b, w[4], w, count); 4856 return true; 4857 } else { 4858 return false; /* End of preamble. */ 4859 } 4860 } 4861 4862 default: 4863 return false; /* End of preamble */ 4864 } 4865 4866 return true; 4867} 4868 4869static void 4870vtn_handle_execution_mode(struct vtn_builder *b, struct vtn_value *entry_point, 4871 const struct vtn_decoration *mode, UNUSED void *data) 4872{ 4873 vtn_assert(b->entry_point == entry_point); 4874 4875 switch(mode->exec_mode) { 4876 case SpvExecutionModeOriginUpperLeft: 4877 case SpvExecutionModeOriginLowerLeft: 4878 vtn_assert(b->shader->info.stage == MESA_SHADER_FRAGMENT); 4879 b->shader->info.fs.origin_upper_left = 4880 (mode->exec_mode == SpvExecutionModeOriginUpperLeft); 4881 break; 4882 4883 case SpvExecutionModeEarlyFragmentTests: 4884 vtn_assert(b->shader->info.stage == MESA_SHADER_FRAGMENT); 4885 b->shader->info.fs.early_fragment_tests = true; 4886 break; 4887 4888 case SpvExecutionModePostDepthCoverage: 4889 vtn_assert(b->shader->info.stage == MESA_SHADER_FRAGMENT); 4890 b->shader->info.fs.post_depth_coverage = true; 4891 break; 4892 4893 case SpvExecutionModeInvocations: 4894 vtn_assert(b->shader->info.stage == MESA_SHADER_GEOMETRY); 4895 b->shader->info.gs.invocations = MAX2(1, mode->operands[0]); 4896 break; 4897 4898 case SpvExecutionModeDepthReplacing: 4899 vtn_assert(b->shader->info.stage == MESA_SHADER_FRAGMENT); 4900 b->shader->info.fs.depth_layout = FRAG_DEPTH_LAYOUT_ANY; 4901 break; 4902 case SpvExecutionModeDepthGreater: 4903 vtn_assert(b->shader->info.stage == MESA_SHADER_FRAGMENT); 4904 b->shader->info.fs.depth_layout = FRAG_DEPTH_LAYOUT_GREATER; 4905 break; 4906 case SpvExecutionModeDepthLess: 4907 vtn_assert(b->shader->info.stage == MESA_SHADER_FRAGMENT); 4908 b->shader->info.fs.depth_layout = FRAG_DEPTH_LAYOUT_LESS; 4909 break; 4910 case SpvExecutionModeDepthUnchanged: 4911 vtn_assert(b->shader->info.stage == MESA_SHADER_FRAGMENT); 4912 b->shader->info.fs.depth_layout = FRAG_DEPTH_LAYOUT_UNCHANGED; 4913 break; 4914 4915 case SpvExecutionModeLocalSizeHint: 4916 vtn_assert(b->shader->info.stage == MESA_SHADER_KERNEL); 4917 b->shader->info.cs.workgroup_size_hint[0] = mode->operands[0]; 4918 b->shader->info.cs.workgroup_size_hint[1] = mode->operands[1]; 4919 b->shader->info.cs.workgroup_size_hint[2] = mode->operands[2]; 4920 break; 4921 4922 case SpvExecutionModeLocalSize: 4923 if (gl_shader_stage_uses_workgroup(b->shader->info.stage)) { 4924 b->shader->info.workgroup_size[0] = mode->operands[0]; 4925 b->shader->info.workgroup_size[1] = mode->operands[1]; 4926 b->shader->info.workgroup_size[2] = mode->operands[2]; 4927 } else { 4928 vtn_fail("Execution mode LocalSize not supported in stage %s", 4929 _mesa_shader_stage_to_string(b->shader->info.stage)); 4930 } 4931 break; 4932 4933 case SpvExecutionModeOutputVertices: 4934 switch (b->shader->info.stage) { 4935 case MESA_SHADER_TESS_CTRL: 4936 case MESA_SHADER_TESS_EVAL: 4937 b->shader->info.tess.tcs_vertices_out = mode->operands[0]; 4938 break; 4939 case MESA_SHADER_GEOMETRY: 4940 b->shader->info.gs.vertices_out = mode->operands[0]; 4941 break; 4942 case MESA_SHADER_MESH: 4943 b->shader->info.mesh.max_vertices_out = mode->operands[0]; 4944 break; 4945 default: 4946 vtn_fail("Execution mode OutputVertices not supported in stage %s", 4947 _mesa_shader_stage_to_string(b->shader->info.stage)); 4948 break; 4949 } 4950 break; 4951 4952 case SpvExecutionModeInputPoints: 4953 case SpvExecutionModeInputLines: 4954 case SpvExecutionModeInputLinesAdjacency: 4955 case SpvExecutionModeTriangles: 4956 case SpvExecutionModeInputTrianglesAdjacency: 4957 case SpvExecutionModeQuads: 4958 case SpvExecutionModeIsolines: 4959 if (b->shader->info.stage == MESA_SHADER_TESS_CTRL || 4960 b->shader->info.stage == MESA_SHADER_TESS_EVAL) { 4961 b->shader->info.tess.primitive_mode = 4962 gl_primitive_from_spv_execution_mode(b, mode->exec_mode); 4963 } else { 4964 vtn_assert(b->shader->info.stage == MESA_SHADER_GEOMETRY); 4965 b->shader->info.gs.vertices_in = 4966 vertices_in_from_spv_execution_mode(b, mode->exec_mode); 4967 b->shader->info.gs.input_primitive = 4968 gl_primitive_from_spv_execution_mode(b, mode->exec_mode); 4969 } 4970 break; 4971 4972 case SpvExecutionModeOutputPrimitivesNV: 4973 vtn_assert(b->shader->info.stage == MESA_SHADER_MESH); 4974 b->shader->info.mesh.max_primitives_out = mode->operands[0]; 4975 break; 4976 4977 case SpvExecutionModeOutputLinesNV: 4978 case SpvExecutionModeOutputTrianglesNV: 4979 vtn_assert(b->shader->info.stage == MESA_SHADER_MESH); 4980 b->shader->info.mesh.primitive_type = 4981 gl_primitive_from_spv_execution_mode(b, mode->exec_mode); 4982 break; 4983 4984 case SpvExecutionModeOutputPoints: { 4985 const unsigned primitive = 4986 gl_primitive_from_spv_execution_mode(b, mode->exec_mode); 4987 4988 switch (b->shader->info.stage) { 4989 case MESA_SHADER_GEOMETRY: 4990 b->shader->info.gs.output_primitive = primitive; 4991 break; 4992 case MESA_SHADER_MESH: 4993 b->shader->info.mesh.primitive_type = primitive; 4994 break; 4995 default: 4996 vtn_fail("Execution mode OutputPoints not supported in stage %s", 4997 _mesa_shader_stage_to_string(b->shader->info.stage)); 4998 break; 4999 } 5000 break; 5001 } 5002 5003 case SpvExecutionModeOutputLineStrip: 5004 case SpvExecutionModeOutputTriangleStrip: 5005 vtn_assert(b->shader->info.stage == MESA_SHADER_GEOMETRY); 5006 b->shader->info.gs.output_primitive = 5007 gl_primitive_from_spv_execution_mode(b, mode->exec_mode); 5008 break; 5009 5010 case SpvExecutionModeSpacingEqual: 5011 vtn_assert(b->shader->info.stage == MESA_SHADER_TESS_CTRL || 5012 b->shader->info.stage == MESA_SHADER_TESS_EVAL); 5013 b->shader->info.tess.spacing = TESS_SPACING_EQUAL; 5014 break; 5015 case SpvExecutionModeSpacingFractionalEven: 5016 vtn_assert(b->shader->info.stage == MESA_SHADER_TESS_CTRL || 5017 b->shader->info.stage == MESA_SHADER_TESS_EVAL); 5018 b->shader->info.tess.spacing = TESS_SPACING_FRACTIONAL_EVEN; 5019 break; 5020 case SpvExecutionModeSpacingFractionalOdd: 5021 vtn_assert(b->shader->info.stage == MESA_SHADER_TESS_CTRL || 5022 b->shader->info.stage == MESA_SHADER_TESS_EVAL); 5023 b->shader->info.tess.spacing = TESS_SPACING_FRACTIONAL_ODD; 5024 break; 5025 case SpvExecutionModeVertexOrderCw: 5026 vtn_assert(b->shader->info.stage == MESA_SHADER_TESS_CTRL || 5027 b->shader->info.stage == MESA_SHADER_TESS_EVAL); 5028 b->shader->info.tess.ccw = false; 5029 break; 5030 case SpvExecutionModeVertexOrderCcw: 5031 vtn_assert(b->shader->info.stage == MESA_SHADER_TESS_CTRL || 5032 b->shader->info.stage == MESA_SHADER_TESS_EVAL); 5033 b->shader->info.tess.ccw = true; 5034 break; 5035 case SpvExecutionModePointMode: 5036 vtn_assert(b->shader->info.stage == MESA_SHADER_TESS_CTRL || 5037 b->shader->info.stage == MESA_SHADER_TESS_EVAL); 5038 b->shader->info.tess.point_mode = true; 5039 break; 5040 5041 case SpvExecutionModePixelCenterInteger: 5042 vtn_assert(b->shader->info.stage == MESA_SHADER_FRAGMENT); 5043 b->shader->info.fs.pixel_center_integer = true; 5044 break; 5045 5046 case SpvExecutionModeXfb: 5047 b->shader->info.has_transform_feedback_varyings = true; 5048 break; 5049 5050 case SpvExecutionModeVecTypeHint: 5051 break; /* OpenCL */ 5052 5053 case SpvExecutionModeContractionOff: 5054 if (b->shader->info.stage != MESA_SHADER_KERNEL) 5055 vtn_warn("ExectionMode only allowed for CL-style kernels: %s", 5056 spirv_executionmode_to_string(mode->exec_mode)); 5057 else 5058 b->exact = true; 5059 break; 5060 5061 case SpvExecutionModeStencilRefReplacingEXT: 5062 vtn_assert(b->shader->info.stage == MESA_SHADER_FRAGMENT); 5063 break; 5064 5065 case SpvExecutionModeDerivativeGroupQuadsNV: 5066 vtn_assert(b->shader->info.stage == MESA_SHADER_COMPUTE); 5067 b->shader->info.cs.derivative_group = DERIVATIVE_GROUP_QUADS; 5068 break; 5069 5070 case SpvExecutionModeDerivativeGroupLinearNV: 5071 vtn_assert(b->shader->info.stage == MESA_SHADER_COMPUTE); 5072 b->shader->info.cs.derivative_group = DERIVATIVE_GROUP_LINEAR; 5073 break; 5074 5075 case SpvExecutionModePixelInterlockOrderedEXT: 5076 vtn_assert(b->shader->info.stage == MESA_SHADER_FRAGMENT); 5077 b->shader->info.fs.pixel_interlock_ordered = true; 5078 break; 5079 5080 case SpvExecutionModePixelInterlockUnorderedEXT: 5081 vtn_assert(b->shader->info.stage == MESA_SHADER_FRAGMENT); 5082 b->shader->info.fs.pixel_interlock_unordered = true; 5083 break; 5084 5085 case SpvExecutionModeSampleInterlockOrderedEXT: 5086 vtn_assert(b->shader->info.stage == MESA_SHADER_FRAGMENT); 5087 b->shader->info.fs.sample_interlock_ordered = true; 5088 break; 5089 5090 case SpvExecutionModeSampleInterlockUnorderedEXT: 5091 vtn_assert(b->shader->info.stage == MESA_SHADER_FRAGMENT); 5092 b->shader->info.fs.sample_interlock_unordered = true; 5093 break; 5094 5095 case SpvExecutionModeDenormPreserve: 5096 case SpvExecutionModeDenormFlushToZero: 5097 case SpvExecutionModeSignedZeroInfNanPreserve: 5098 case SpvExecutionModeRoundingModeRTE: 5099 case SpvExecutionModeRoundingModeRTZ: { 5100 unsigned execution_mode = 0; 5101 switch (mode->exec_mode) { 5102 case SpvExecutionModeDenormPreserve: 5103 switch (mode->operands[0]) { 5104 case 16: execution_mode = FLOAT_CONTROLS_DENORM_PRESERVE_FP16; break; 5105 case 32: execution_mode = FLOAT_CONTROLS_DENORM_PRESERVE_FP32; break; 5106 case 64: execution_mode = FLOAT_CONTROLS_DENORM_PRESERVE_FP64; break; 5107 default: vtn_fail("Floating point type not supported"); 5108 } 5109 break; 5110 case SpvExecutionModeDenormFlushToZero: 5111 switch (mode->operands[0]) { 5112 case 16: execution_mode = FLOAT_CONTROLS_DENORM_FLUSH_TO_ZERO_FP16; break; 5113 case 32: execution_mode = FLOAT_CONTROLS_DENORM_FLUSH_TO_ZERO_FP32; break; 5114 case 64: execution_mode = FLOAT_CONTROLS_DENORM_FLUSH_TO_ZERO_FP64; break; 5115 default: vtn_fail("Floating point type not supported"); 5116 } 5117 break; 5118 case SpvExecutionModeSignedZeroInfNanPreserve: 5119 switch (mode->operands[0]) { 5120 case 16: execution_mode = FLOAT_CONTROLS_SIGNED_ZERO_INF_NAN_PRESERVE_FP16; break; 5121 case 32: execution_mode = FLOAT_CONTROLS_SIGNED_ZERO_INF_NAN_PRESERVE_FP32; break; 5122 case 64: execution_mode = FLOAT_CONTROLS_SIGNED_ZERO_INF_NAN_PRESERVE_FP64; break; 5123 default: vtn_fail("Floating point type not supported"); 5124 } 5125 break; 5126 case SpvExecutionModeRoundingModeRTE: 5127 switch (mode->operands[0]) { 5128 case 16: execution_mode = FLOAT_CONTROLS_ROUNDING_MODE_RTE_FP16; break; 5129 case 32: execution_mode = FLOAT_CONTROLS_ROUNDING_MODE_RTE_FP32; break; 5130 case 64: execution_mode = FLOAT_CONTROLS_ROUNDING_MODE_RTE_FP64; break; 5131 default: vtn_fail("Floating point type not supported"); 5132 } 5133 break; 5134 case SpvExecutionModeRoundingModeRTZ: 5135 switch (mode->operands[0]) { 5136 case 16: execution_mode = FLOAT_CONTROLS_ROUNDING_MODE_RTZ_FP16; break; 5137 case 32: execution_mode = FLOAT_CONTROLS_ROUNDING_MODE_RTZ_FP32; break; 5138 case 64: execution_mode = FLOAT_CONTROLS_ROUNDING_MODE_RTZ_FP64; break; 5139 default: vtn_fail("Floating point type not supported"); 5140 } 5141 break; 5142 default: 5143 break; 5144 } 5145 5146 b->shader->info.float_controls_execution_mode |= execution_mode; 5147 5148 for (unsigned bit_size = 16; bit_size <= 64; bit_size *= 2) { 5149 vtn_fail_if(nir_is_denorm_flush_to_zero(b->shader->info.float_controls_execution_mode, bit_size) && 5150 nir_is_denorm_preserve(b->shader->info.float_controls_execution_mode, bit_size), 5151 "Cannot flush to zero and preserve denorms for the same bit size."); 5152 vtn_fail_if(nir_is_rounding_mode_rtne(b->shader->info.float_controls_execution_mode, bit_size) && 5153 nir_is_rounding_mode_rtz(b->shader->info.float_controls_execution_mode, bit_size), 5154 "Cannot set rounding mode to RTNE and RTZ for the same bit size."); 5155 } 5156 break; 5157 } 5158 5159 case SpvExecutionModeLocalSizeId: 5160 case SpvExecutionModeLocalSizeHintId: 5161 /* Handled later by vtn_handle_execution_mode_id(). */ 5162 break; 5163 5164 case SpvExecutionModeSubgroupSize: 5165 vtn_assert(b->shader->info.stage == MESA_SHADER_KERNEL); 5166 b->shader->info.cs.subgroup_size = mode->operands[0]; 5167 break; 5168 5169 case SpvExecutionModeSubgroupUniformControlFlowKHR: 5170 /* There's no corresponding SPIR-V capability, so check here. */ 5171 vtn_fail_if(!b->options->caps.subgroup_uniform_control_flow, 5172 "SpvExecutionModeSubgroupUniformControlFlowKHR not supported."); 5173 break; 5174 5175 default: 5176 vtn_fail("Unhandled execution mode: %s (%u)", 5177 spirv_executionmode_to_string(mode->exec_mode), 5178 mode->exec_mode); 5179 } 5180} 5181 5182static void 5183vtn_handle_execution_mode_id(struct vtn_builder *b, struct vtn_value *entry_point, 5184 const struct vtn_decoration *mode, UNUSED void *data) 5185{ 5186 5187 vtn_assert(b->entry_point == entry_point); 5188 5189 switch (mode->exec_mode) { 5190 case SpvExecutionModeLocalSizeId: 5191 if (gl_shader_stage_uses_workgroup(b->shader->info.stage)) { 5192 b->shader->info.workgroup_size[0] = vtn_constant_uint(b, mode->operands[0]); 5193 b->shader->info.workgroup_size[1] = vtn_constant_uint(b, mode->operands[1]); 5194 b->shader->info.workgroup_size[2] = vtn_constant_uint(b, mode->operands[2]); 5195 } else { 5196 vtn_fail("Execution mode LocalSizeId not supported in stage %s", 5197 _mesa_shader_stage_to_string(b->shader->info.stage)); 5198 } 5199 break; 5200 5201 case SpvExecutionModeLocalSizeHintId: 5202 vtn_assert(b->shader->info.stage == MESA_SHADER_KERNEL); 5203 b->shader->info.cs.workgroup_size_hint[0] = vtn_constant_uint(b, mode->operands[0]); 5204 b->shader->info.cs.workgroup_size_hint[1] = vtn_constant_uint(b, mode->operands[1]); 5205 b->shader->info.cs.workgroup_size_hint[2] = vtn_constant_uint(b, mode->operands[2]); 5206 break; 5207 5208 default: 5209 /* Nothing to do. Literal execution modes already handled by 5210 * vtn_handle_execution_mode(). */ 5211 break; 5212 } 5213} 5214 5215static bool 5216vtn_handle_variable_or_type_instruction(struct vtn_builder *b, SpvOp opcode, 5217 const uint32_t *w, unsigned count) 5218{ 5219 vtn_set_instruction_result_type(b, opcode, w, count); 5220 5221 switch (opcode) { 5222 case SpvOpSource: 5223 case SpvOpSourceContinued: 5224 case SpvOpSourceExtension: 5225 case SpvOpExtension: 5226 case SpvOpCapability: 5227 case SpvOpExtInstImport: 5228 case SpvOpMemoryModel: 5229 case SpvOpEntryPoint: 5230 case SpvOpExecutionMode: 5231 case SpvOpString: 5232 case SpvOpName: 5233 case SpvOpMemberName: 5234 case SpvOpDecorationGroup: 5235 case SpvOpDecorate: 5236 case SpvOpDecorateId: 5237 case SpvOpMemberDecorate: 5238 case SpvOpGroupDecorate: 5239 case SpvOpGroupMemberDecorate: 5240 case SpvOpDecorateString: 5241 case SpvOpMemberDecorateString: 5242 vtn_fail("Invalid opcode types and variables section"); 5243 break; 5244 5245 case SpvOpTypeVoid: 5246 case SpvOpTypeBool: 5247 case SpvOpTypeInt: 5248 case SpvOpTypeFloat: 5249 case SpvOpTypeVector: 5250 case SpvOpTypeMatrix: 5251 case SpvOpTypeImage: 5252 case SpvOpTypeSampler: 5253 case SpvOpTypeSampledImage: 5254 case SpvOpTypeArray: 5255 case SpvOpTypeRuntimeArray: 5256 case SpvOpTypeStruct: 5257 case SpvOpTypeOpaque: 5258 case SpvOpTypePointer: 5259 case SpvOpTypeForwardPointer: 5260 case SpvOpTypeFunction: 5261 case SpvOpTypeEvent: 5262 case SpvOpTypeDeviceEvent: 5263 case SpvOpTypeReserveId: 5264 case SpvOpTypeQueue: 5265 case SpvOpTypePipe: 5266 case SpvOpTypeAccelerationStructureKHR: 5267 vtn_handle_type(b, opcode, w, count); 5268 break; 5269 5270 case SpvOpConstantTrue: 5271 case SpvOpConstantFalse: 5272 case SpvOpConstant: 5273 case SpvOpConstantComposite: 5274 case SpvOpConstantNull: 5275 case SpvOpSpecConstantTrue: 5276 case SpvOpSpecConstantFalse: 5277 case SpvOpSpecConstant: 5278 case SpvOpSpecConstantComposite: 5279 case SpvOpSpecConstantOp: 5280 vtn_handle_constant(b, opcode, w, count); 5281 break; 5282 5283 case SpvOpUndef: 5284 case SpvOpVariable: 5285 case SpvOpConstantSampler: 5286 vtn_handle_variables(b, opcode, w, count); 5287 break; 5288 5289 case SpvOpExtInst: { 5290 struct vtn_value *val = vtn_value(b, w[3], vtn_value_type_extension); 5291 /* NonSemantic extended instructions are acceptable in preamble, others 5292 * will indicate the end of preamble. 5293 */ 5294 return val->ext_handler == vtn_handle_non_semantic_instruction; 5295 } 5296 5297 default: 5298 return false; /* End of preamble */ 5299 } 5300 5301 return true; 5302} 5303 5304static struct vtn_ssa_value * 5305vtn_nir_select(struct vtn_builder *b, struct vtn_ssa_value *src0, 5306 struct vtn_ssa_value *src1, struct vtn_ssa_value *src2) 5307{ 5308 struct vtn_ssa_value *dest = rzalloc(b, struct vtn_ssa_value); 5309 dest->type = src1->type; 5310 5311 if (glsl_type_is_vector_or_scalar(src1->type)) { 5312 dest->def = nir_bcsel(&b->nb, src0->def, src1->def, src2->def); 5313 } else { 5314 unsigned elems = glsl_get_length(src1->type); 5315 5316 dest->elems = ralloc_array(b, struct vtn_ssa_value *, elems); 5317 for (unsigned i = 0; i < elems; i++) { 5318 dest->elems[i] = vtn_nir_select(b, src0, 5319 src1->elems[i], src2->elems[i]); 5320 } 5321 } 5322 5323 return dest; 5324} 5325 5326static void 5327vtn_handle_select(struct vtn_builder *b, SpvOp opcode, 5328 const uint32_t *w, unsigned count) 5329{ 5330 /* Handle OpSelect up-front here because it needs to be able to handle 5331 * pointers and not just regular vectors and scalars. 5332 */ 5333 struct vtn_value *res_val = vtn_untyped_value(b, w[2]); 5334 struct vtn_value *cond_val = vtn_untyped_value(b, w[3]); 5335 struct vtn_value *obj1_val = vtn_untyped_value(b, w[4]); 5336 struct vtn_value *obj2_val = vtn_untyped_value(b, w[5]); 5337 5338 vtn_fail_if(obj1_val->type != res_val->type || 5339 obj2_val->type != res_val->type, 5340 "Object types must match the result type in OpSelect"); 5341 5342 vtn_fail_if((cond_val->type->base_type != vtn_base_type_scalar && 5343 cond_val->type->base_type != vtn_base_type_vector) || 5344 !glsl_type_is_boolean(cond_val->type->type), 5345 "OpSelect must have either a vector of booleans or " 5346 "a boolean as Condition type"); 5347 5348 vtn_fail_if(cond_val->type->base_type == vtn_base_type_vector && 5349 (res_val->type->base_type != vtn_base_type_vector || 5350 res_val->type->length != cond_val->type->length), 5351 "When Condition type in OpSelect is a vector, the Result " 5352 "type must be a vector of the same length"); 5353 5354 switch (res_val->type->base_type) { 5355 case vtn_base_type_scalar: 5356 case vtn_base_type_vector: 5357 case vtn_base_type_matrix: 5358 case vtn_base_type_array: 5359 case vtn_base_type_struct: 5360 /* OK. */ 5361 break; 5362 case vtn_base_type_pointer: 5363 /* We need to have actual storage for pointer types. */ 5364 vtn_fail_if(res_val->type->type == NULL, 5365 "Invalid pointer result type for OpSelect"); 5366 break; 5367 default: 5368 vtn_fail("Result type of OpSelect must be a scalar, composite, or pointer"); 5369 } 5370 5371 vtn_push_ssa_value(b, w[2], 5372 vtn_nir_select(b, vtn_ssa_value(b, w[3]), 5373 vtn_ssa_value(b, w[4]), 5374 vtn_ssa_value(b, w[5]))); 5375} 5376 5377static void 5378vtn_handle_ptr(struct vtn_builder *b, SpvOp opcode, 5379 const uint32_t *w, unsigned count) 5380{ 5381 struct vtn_type *type1 = vtn_get_value_type(b, w[3]); 5382 struct vtn_type *type2 = vtn_get_value_type(b, w[4]); 5383 vtn_fail_if(type1->base_type != vtn_base_type_pointer || 5384 type2->base_type != vtn_base_type_pointer, 5385 "%s operands must have pointer types", 5386 spirv_op_to_string(opcode)); 5387 vtn_fail_if(type1->storage_class != type2->storage_class, 5388 "%s operands must have the same storage class", 5389 spirv_op_to_string(opcode)); 5390 5391 struct vtn_type *vtn_type = vtn_get_type(b, w[1]); 5392 const struct glsl_type *type = vtn_type->type; 5393 5394 nir_address_format addr_format = vtn_mode_to_address_format( 5395 b, vtn_storage_class_to_mode(b, type1->storage_class, NULL, NULL)); 5396 5397 nir_ssa_def *def; 5398 5399 switch (opcode) { 5400 case SpvOpPtrDiff: { 5401 /* OpPtrDiff returns the difference in number of elements (not byte offset). */ 5402 unsigned elem_size, elem_align; 5403 glsl_get_natural_size_align_bytes(type1->deref->type, 5404 &elem_size, &elem_align); 5405 5406 def = nir_build_addr_isub(&b->nb, 5407 vtn_get_nir_ssa(b, w[3]), 5408 vtn_get_nir_ssa(b, w[4]), 5409 addr_format); 5410 def = nir_idiv(&b->nb, def, nir_imm_intN_t(&b->nb, elem_size, def->bit_size)); 5411 def = nir_i2i(&b->nb, def, glsl_get_bit_size(type)); 5412 break; 5413 } 5414 5415 case SpvOpPtrEqual: 5416 case SpvOpPtrNotEqual: { 5417 def = nir_build_addr_ieq(&b->nb, 5418 vtn_get_nir_ssa(b, w[3]), 5419 vtn_get_nir_ssa(b, w[4]), 5420 addr_format); 5421 if (opcode == SpvOpPtrNotEqual) 5422 def = nir_inot(&b->nb, def); 5423 break; 5424 } 5425 5426 default: 5427 unreachable("Invalid ptr operation"); 5428 } 5429 5430 vtn_push_nir_ssa(b, w[2], def); 5431} 5432 5433static void 5434vtn_handle_ray_intrinsic(struct vtn_builder *b, SpvOp opcode, 5435 const uint32_t *w, unsigned count) 5436{ 5437 nir_intrinsic_instr *intrin; 5438 5439 switch (opcode) { 5440 case SpvOpTraceNV: 5441 case SpvOpTraceRayKHR: { 5442 intrin = nir_intrinsic_instr_create(b->nb.shader, 5443 nir_intrinsic_trace_ray); 5444 5445 /* The sources are in the same order in the NIR intrinsic */ 5446 for (unsigned i = 0; i < 10; i++) 5447 intrin->src[i] = nir_src_for_ssa(vtn_ssa_value(b, w[i + 1])->def); 5448 5449 nir_deref_instr *payload; 5450 if (opcode == SpvOpTraceNV) 5451 payload = vtn_get_call_payload_for_location(b, w[11]); 5452 else 5453 payload = vtn_nir_deref(b, w[11]); 5454 intrin->src[10] = nir_src_for_ssa(&payload->dest.ssa); 5455 nir_builder_instr_insert(&b->nb, &intrin->instr); 5456 break; 5457 } 5458 5459 case SpvOpReportIntersectionKHR: { 5460 intrin = nir_intrinsic_instr_create(b->nb.shader, 5461 nir_intrinsic_report_ray_intersection); 5462 intrin->src[0] = nir_src_for_ssa(vtn_ssa_value(b, w[3])->def); 5463 intrin->src[1] = nir_src_for_ssa(vtn_ssa_value(b, w[4])->def); 5464 nir_ssa_dest_init(&intrin->instr, &intrin->dest, 1, 1, NULL); 5465 nir_builder_instr_insert(&b->nb, &intrin->instr); 5466 vtn_push_nir_ssa(b, w[2], &intrin->dest.ssa); 5467 break; 5468 } 5469 5470 case SpvOpIgnoreIntersectionNV: 5471 intrin = nir_intrinsic_instr_create(b->nb.shader, 5472 nir_intrinsic_ignore_ray_intersection); 5473 nir_builder_instr_insert(&b->nb, &intrin->instr); 5474 break; 5475 5476 case SpvOpTerminateRayNV: 5477 intrin = nir_intrinsic_instr_create(b->nb.shader, 5478 nir_intrinsic_terminate_ray); 5479 nir_builder_instr_insert(&b->nb, &intrin->instr); 5480 break; 5481 5482 case SpvOpExecuteCallableNV: 5483 case SpvOpExecuteCallableKHR: { 5484 intrin = nir_intrinsic_instr_create(b->nb.shader, 5485 nir_intrinsic_execute_callable); 5486 intrin->src[0] = nir_src_for_ssa(vtn_ssa_value(b, w[1])->def); 5487 nir_deref_instr *payload; 5488 if (opcode == SpvOpExecuteCallableNV) 5489 payload = vtn_get_call_payload_for_location(b, w[2]); 5490 else 5491 payload = vtn_nir_deref(b, w[2]); 5492 intrin->src[1] = nir_src_for_ssa(&payload->dest.ssa); 5493 nir_builder_instr_insert(&b->nb, &intrin->instr); 5494 break; 5495 } 5496 5497 default: 5498 vtn_fail_with_opcode("Unhandled opcode", opcode); 5499 } 5500} 5501 5502static void 5503vtn_handle_write_packed_primitive_indices(struct vtn_builder *b, SpvOp opcode, 5504 const uint32_t *w, unsigned count) 5505{ 5506 vtn_assert(opcode == SpvOpWritePackedPrimitiveIndices4x8NV); 5507 5508 /* TODO(mesh): Use or create a primitive that allow the unpacking to 5509 * happen in the backend. What we have here is functional but too 5510 * blunt. 5511 */ 5512 5513 struct vtn_type *offset_type = vtn_get_value_type(b, w[1]); 5514 vtn_fail_if(offset_type->base_type != vtn_base_type_scalar || 5515 offset_type->type != glsl_uint_type(), 5516 "Index Offset type of OpWritePackedPrimitiveIndices4x8NV " 5517 "must be an OpTypeInt with 32-bit Width and 0 Signedness."); 5518 5519 struct vtn_type *packed_type = vtn_get_value_type(b, w[2]); 5520 vtn_fail_if(packed_type->base_type != vtn_base_type_scalar || 5521 packed_type->type != glsl_uint_type(), 5522 "Packed Indices type of OpWritePackedPrimitiveIndices4x8NV " 5523 "must be an OpTypeInt with 32-bit Width and 0 Signedness."); 5524 5525 nir_deref_instr *indices = NULL; 5526 nir_foreach_variable_with_modes(var, b->nb.shader, nir_var_shader_out) { 5527 if (var->data.location == VARYING_SLOT_PRIMITIVE_INDICES) { 5528 indices = nir_build_deref_var(&b->nb, var); 5529 break; 5530 } 5531 } 5532 5533 /* TODO(mesh): It may be the case that the variable is not present in the 5534 * entry point interface list. 5535 * 5536 * See https://github.com/KhronosGroup/SPIRV-Registry/issues/104. 5537 */ 5538 vtn_fail_if(indices == NULL, 5539 "Missing output variable decorated with PrimitiveIndices builtin."); 5540 5541 nir_ssa_def *offset = vtn_get_nir_ssa(b, w[1]); 5542 nir_ssa_def *packed = vtn_get_nir_ssa(b, w[2]); 5543 nir_ssa_def *unpacked = nir_unpack_bits(&b->nb, packed, 8); 5544 for (int i = 0; i < 4; i++) { 5545 nir_deref_instr *offset_deref = 5546 nir_build_deref_array(&b->nb, indices, 5547 nir_iadd_imm(&b->nb, offset, i)); 5548 nir_ssa_def *val = nir_u2u(&b->nb, nir_channel(&b->nb, unpacked, i), 32); 5549 5550 nir_store_deref(&b->nb, offset_deref, val, 0x1); 5551 } 5552} 5553 5554static bool 5555vtn_handle_body_instruction(struct vtn_builder *b, SpvOp opcode, 5556 const uint32_t *w, unsigned count) 5557{ 5558 switch (opcode) { 5559 case SpvOpLabel: 5560 break; 5561 5562 case SpvOpLoopMerge: 5563 case SpvOpSelectionMerge: 5564 /* This is handled by cfg pre-pass and walk_blocks */ 5565 break; 5566 5567 case SpvOpUndef: { 5568 struct vtn_value *val = vtn_push_value(b, w[2], vtn_value_type_undef); 5569 val->type = vtn_get_type(b, w[1]); 5570 break; 5571 } 5572 5573 case SpvOpExtInst: 5574 vtn_handle_extension(b, opcode, w, count); 5575 break; 5576 5577 case SpvOpVariable: 5578 case SpvOpLoad: 5579 case SpvOpStore: 5580 case SpvOpCopyMemory: 5581 case SpvOpCopyMemorySized: 5582 case SpvOpAccessChain: 5583 case SpvOpPtrAccessChain: 5584 case SpvOpInBoundsAccessChain: 5585 case SpvOpInBoundsPtrAccessChain: 5586 case SpvOpArrayLength: 5587 case SpvOpConvertPtrToU: 5588 case SpvOpConvertUToPtr: 5589 case SpvOpGenericCastToPtrExplicit: 5590 case SpvOpGenericPtrMemSemantics: 5591 case SpvOpSubgroupBlockReadINTEL: 5592 case SpvOpSubgroupBlockWriteINTEL: 5593 case SpvOpConvertUToAccelerationStructureKHR: 5594 vtn_handle_variables(b, opcode, w, count); 5595 break; 5596 5597 case SpvOpFunctionCall: 5598 vtn_handle_function_call(b, opcode, w, count); 5599 break; 5600 5601 case SpvOpSampledImage: 5602 case SpvOpImage: 5603 case SpvOpImageSparseTexelsResident: 5604 case SpvOpImageSampleImplicitLod: 5605 case SpvOpImageSparseSampleImplicitLod: 5606 case SpvOpImageSampleExplicitLod: 5607 case SpvOpImageSparseSampleExplicitLod: 5608 case SpvOpImageSampleDrefImplicitLod: 5609 case SpvOpImageSparseSampleDrefImplicitLod: 5610 case SpvOpImageSampleDrefExplicitLod: 5611 case SpvOpImageSparseSampleDrefExplicitLod: 5612 case SpvOpImageSampleProjImplicitLod: 5613 case SpvOpImageSampleProjExplicitLod: 5614 case SpvOpImageSampleProjDrefImplicitLod: 5615 case SpvOpImageSampleProjDrefExplicitLod: 5616 case SpvOpImageFetch: 5617 case SpvOpImageSparseFetch: 5618 case SpvOpImageGather: 5619 case SpvOpImageSparseGather: 5620 case SpvOpImageDrefGather: 5621 case SpvOpImageSparseDrefGather: 5622 case SpvOpImageQueryLod: 5623 case SpvOpImageQueryLevels: 5624 vtn_handle_texture(b, opcode, w, count); 5625 break; 5626 5627 case SpvOpImageRead: 5628 case SpvOpImageSparseRead: 5629 case SpvOpImageWrite: 5630 case SpvOpImageTexelPointer: 5631 case SpvOpImageQueryFormat: 5632 case SpvOpImageQueryOrder: 5633 vtn_handle_image(b, opcode, w, count); 5634 break; 5635 5636 case SpvOpImageQuerySamples: 5637 case SpvOpImageQuerySizeLod: 5638 case SpvOpImageQuerySize: { 5639 struct vtn_type *image_type = vtn_get_value_type(b, w[3]); 5640 vtn_assert(image_type->base_type == vtn_base_type_image); 5641 if (glsl_type_is_image(image_type->glsl_image)) { 5642 vtn_handle_image(b, opcode, w, count); 5643 } else { 5644 vtn_assert(glsl_type_is_sampler(image_type->glsl_image)); 5645 vtn_handle_texture(b, opcode, w, count); 5646 } 5647 break; 5648 } 5649 5650 case SpvOpFragmentMaskFetchAMD: 5651 case SpvOpFragmentFetchAMD: 5652 vtn_handle_texture(b, opcode, w, count); 5653 break; 5654 5655 case SpvOpAtomicLoad: 5656 case SpvOpAtomicExchange: 5657 case SpvOpAtomicCompareExchange: 5658 case SpvOpAtomicCompareExchangeWeak: 5659 case SpvOpAtomicIIncrement: 5660 case SpvOpAtomicIDecrement: 5661 case SpvOpAtomicIAdd: 5662 case SpvOpAtomicISub: 5663 case SpvOpAtomicSMin: 5664 case SpvOpAtomicUMin: 5665 case SpvOpAtomicSMax: 5666 case SpvOpAtomicUMax: 5667 case SpvOpAtomicAnd: 5668 case SpvOpAtomicOr: 5669 case SpvOpAtomicXor: 5670 case SpvOpAtomicFAddEXT: 5671 case SpvOpAtomicFMinEXT: 5672 case SpvOpAtomicFMaxEXT: 5673 case SpvOpAtomicFlagTestAndSet: { 5674 struct vtn_value *pointer = vtn_untyped_value(b, w[3]); 5675 if (pointer->value_type == vtn_value_type_image_pointer) { 5676 vtn_handle_image(b, opcode, w, count); 5677 } else { 5678 vtn_assert(pointer->value_type == vtn_value_type_pointer); 5679 vtn_handle_atomics(b, opcode, w, count); 5680 } 5681 break; 5682 } 5683 5684 case SpvOpAtomicStore: 5685 case SpvOpAtomicFlagClear: { 5686 struct vtn_value *pointer = vtn_untyped_value(b, w[1]); 5687 if (pointer->value_type == vtn_value_type_image_pointer) { 5688 vtn_handle_image(b, opcode, w, count); 5689 } else { 5690 vtn_assert(pointer->value_type == vtn_value_type_pointer); 5691 vtn_handle_atomics(b, opcode, w, count); 5692 } 5693 break; 5694 } 5695 5696 case SpvOpSelect: 5697 vtn_handle_select(b, opcode, w, count); 5698 break; 5699 5700 case SpvOpSNegate: 5701 case SpvOpFNegate: 5702 case SpvOpNot: 5703 case SpvOpAny: 5704 case SpvOpAll: 5705 case SpvOpConvertFToU: 5706 case SpvOpConvertFToS: 5707 case SpvOpConvertSToF: 5708 case SpvOpConvertUToF: 5709 case SpvOpUConvert: 5710 case SpvOpSConvert: 5711 case SpvOpFConvert: 5712 case SpvOpQuantizeToF16: 5713 case SpvOpSatConvertSToU: 5714 case SpvOpSatConvertUToS: 5715 case SpvOpPtrCastToGeneric: 5716 case SpvOpGenericCastToPtr: 5717 case SpvOpIsNan: 5718 case SpvOpIsInf: 5719 case SpvOpIsFinite: 5720 case SpvOpIsNormal: 5721 case SpvOpSignBitSet: 5722 case SpvOpLessOrGreater: 5723 case SpvOpOrdered: 5724 case SpvOpUnordered: 5725 case SpvOpIAdd: 5726 case SpvOpFAdd: 5727 case SpvOpISub: 5728 case SpvOpFSub: 5729 case SpvOpIMul: 5730 case SpvOpFMul: 5731 case SpvOpUDiv: 5732 case SpvOpSDiv: 5733 case SpvOpFDiv: 5734 case SpvOpUMod: 5735 case SpvOpSRem: 5736 case SpvOpSMod: 5737 case SpvOpFRem: 5738 case SpvOpFMod: 5739 case SpvOpVectorTimesScalar: 5740 case SpvOpDot: 5741 case SpvOpIAddCarry: 5742 case SpvOpISubBorrow: 5743 case SpvOpUMulExtended: 5744 case SpvOpSMulExtended: 5745 case SpvOpShiftRightLogical: 5746 case SpvOpShiftRightArithmetic: 5747 case SpvOpShiftLeftLogical: 5748 case SpvOpLogicalEqual: 5749 case SpvOpLogicalNotEqual: 5750 case SpvOpLogicalOr: 5751 case SpvOpLogicalAnd: 5752 case SpvOpLogicalNot: 5753 case SpvOpBitwiseOr: 5754 case SpvOpBitwiseXor: 5755 case SpvOpBitwiseAnd: 5756 case SpvOpIEqual: 5757 case SpvOpFOrdEqual: 5758 case SpvOpFUnordEqual: 5759 case SpvOpINotEqual: 5760 case SpvOpFOrdNotEqual: 5761 case SpvOpFUnordNotEqual: 5762 case SpvOpULessThan: 5763 case SpvOpSLessThan: 5764 case SpvOpFOrdLessThan: 5765 case SpvOpFUnordLessThan: 5766 case SpvOpUGreaterThan: 5767 case SpvOpSGreaterThan: 5768 case SpvOpFOrdGreaterThan: 5769 case SpvOpFUnordGreaterThan: 5770 case SpvOpULessThanEqual: 5771 case SpvOpSLessThanEqual: 5772 case SpvOpFOrdLessThanEqual: 5773 case SpvOpFUnordLessThanEqual: 5774 case SpvOpUGreaterThanEqual: 5775 case SpvOpSGreaterThanEqual: 5776 case SpvOpFOrdGreaterThanEqual: 5777 case SpvOpFUnordGreaterThanEqual: 5778 case SpvOpDPdx: 5779 case SpvOpDPdy: 5780 case SpvOpFwidth: 5781 case SpvOpDPdxFine: 5782 case SpvOpDPdyFine: 5783 case SpvOpFwidthFine: 5784 case SpvOpDPdxCoarse: 5785 case SpvOpDPdyCoarse: 5786 case SpvOpFwidthCoarse: 5787 case SpvOpBitFieldInsert: 5788 case SpvOpBitFieldSExtract: 5789 case SpvOpBitFieldUExtract: 5790 case SpvOpBitReverse: 5791 case SpvOpBitCount: 5792 case SpvOpTranspose: 5793 case SpvOpOuterProduct: 5794 case SpvOpMatrixTimesScalar: 5795 case SpvOpVectorTimesMatrix: 5796 case SpvOpMatrixTimesVector: 5797 case SpvOpMatrixTimesMatrix: 5798 case SpvOpUCountLeadingZerosINTEL: 5799 case SpvOpUCountTrailingZerosINTEL: 5800 case SpvOpAbsISubINTEL: 5801 case SpvOpAbsUSubINTEL: 5802 case SpvOpIAddSatINTEL: 5803 case SpvOpUAddSatINTEL: 5804 case SpvOpIAverageINTEL: 5805 case SpvOpUAverageINTEL: 5806 case SpvOpIAverageRoundedINTEL: 5807 case SpvOpUAverageRoundedINTEL: 5808 case SpvOpISubSatINTEL: 5809 case SpvOpUSubSatINTEL: 5810 case SpvOpIMul32x16INTEL: 5811 case SpvOpUMul32x16INTEL: 5812 vtn_handle_alu(b, opcode, w, count); 5813 break; 5814 5815 case SpvOpSDotKHR: 5816 case SpvOpUDotKHR: 5817 case SpvOpSUDotKHR: 5818 case SpvOpSDotAccSatKHR: 5819 case SpvOpUDotAccSatKHR: 5820 case SpvOpSUDotAccSatKHR: 5821 vtn_handle_integer_dot(b, opcode, w, count); 5822 break; 5823 5824 case SpvOpBitcast: 5825 vtn_handle_bitcast(b, w, count); 5826 break; 5827 5828 case SpvOpVectorExtractDynamic: 5829 case SpvOpVectorInsertDynamic: 5830 case SpvOpVectorShuffle: 5831 case SpvOpCompositeConstruct: 5832 case SpvOpCompositeExtract: 5833 case SpvOpCompositeInsert: 5834 case SpvOpCopyLogical: 5835 case SpvOpCopyObject: 5836 vtn_handle_composite(b, opcode, w, count); 5837 break; 5838 5839 case SpvOpEmitVertex: 5840 case SpvOpEndPrimitive: 5841 case SpvOpEmitStreamVertex: 5842 case SpvOpEndStreamPrimitive: 5843 case SpvOpControlBarrier: 5844 case SpvOpMemoryBarrier: 5845 vtn_handle_barrier(b, opcode, w, count); 5846 break; 5847 5848 case SpvOpGroupNonUniformElect: 5849 case SpvOpGroupNonUniformAll: 5850 case SpvOpGroupNonUniformAny: 5851 case SpvOpGroupNonUniformAllEqual: 5852 case SpvOpGroupNonUniformBroadcast: 5853 case SpvOpGroupNonUniformBroadcastFirst: 5854 case SpvOpGroupNonUniformBallot: 5855 case SpvOpGroupNonUniformInverseBallot: 5856 case SpvOpGroupNonUniformBallotBitExtract: 5857 case SpvOpGroupNonUniformBallotBitCount: 5858 case SpvOpGroupNonUniformBallotFindLSB: 5859 case SpvOpGroupNonUniformBallotFindMSB: 5860 case SpvOpGroupNonUniformShuffle: 5861 case SpvOpGroupNonUniformShuffleXor: 5862 case SpvOpGroupNonUniformShuffleUp: 5863 case SpvOpGroupNonUniformShuffleDown: 5864 case SpvOpGroupNonUniformIAdd: 5865 case SpvOpGroupNonUniformFAdd: 5866 case SpvOpGroupNonUniformIMul: 5867 case SpvOpGroupNonUniformFMul: 5868 case SpvOpGroupNonUniformSMin: 5869 case SpvOpGroupNonUniformUMin: 5870 case SpvOpGroupNonUniformFMin: 5871 case SpvOpGroupNonUniformSMax: 5872 case SpvOpGroupNonUniformUMax: 5873 case SpvOpGroupNonUniformFMax: 5874 case SpvOpGroupNonUniformBitwiseAnd: 5875 case SpvOpGroupNonUniformBitwiseOr: 5876 case SpvOpGroupNonUniformBitwiseXor: 5877 case SpvOpGroupNonUniformLogicalAnd: 5878 case SpvOpGroupNonUniformLogicalOr: 5879 case SpvOpGroupNonUniformLogicalXor: 5880 case SpvOpGroupNonUniformQuadBroadcast: 5881 case SpvOpGroupNonUniformQuadSwap: 5882 case SpvOpGroupAll: 5883 case SpvOpGroupAny: 5884 case SpvOpGroupBroadcast: 5885 case SpvOpGroupIAdd: 5886 case SpvOpGroupFAdd: 5887 case SpvOpGroupFMin: 5888 case SpvOpGroupUMin: 5889 case SpvOpGroupSMin: 5890 case SpvOpGroupFMax: 5891 case SpvOpGroupUMax: 5892 case SpvOpGroupSMax: 5893 case SpvOpSubgroupBallotKHR: 5894 case SpvOpSubgroupFirstInvocationKHR: 5895 case SpvOpSubgroupReadInvocationKHR: 5896 case SpvOpSubgroupAllKHR: 5897 case SpvOpSubgroupAnyKHR: 5898 case SpvOpSubgroupAllEqualKHR: 5899 case SpvOpGroupIAddNonUniformAMD: 5900 case SpvOpGroupFAddNonUniformAMD: 5901 case SpvOpGroupFMinNonUniformAMD: 5902 case SpvOpGroupUMinNonUniformAMD: 5903 case SpvOpGroupSMinNonUniformAMD: 5904 case SpvOpGroupFMaxNonUniformAMD: 5905 case SpvOpGroupUMaxNonUniformAMD: 5906 case SpvOpGroupSMaxNonUniformAMD: 5907 case SpvOpSubgroupShuffleINTEL: 5908 case SpvOpSubgroupShuffleDownINTEL: 5909 case SpvOpSubgroupShuffleUpINTEL: 5910 case SpvOpSubgroupShuffleXorINTEL: 5911 vtn_handle_subgroup(b, opcode, w, count); 5912 break; 5913 5914 case SpvOpPtrDiff: 5915 case SpvOpPtrEqual: 5916 case SpvOpPtrNotEqual: 5917 vtn_handle_ptr(b, opcode, w, count); 5918 break; 5919 5920 case SpvOpBeginInvocationInterlockEXT: 5921 nir_begin_invocation_interlock(&b->nb); 5922 break; 5923 5924 case SpvOpEndInvocationInterlockEXT: 5925 nir_end_invocation_interlock(&b->nb); 5926 break; 5927 5928 case SpvOpDemoteToHelperInvocationEXT: { 5929 nir_demote(&b->nb); 5930 break; 5931 } 5932 5933 case SpvOpIsHelperInvocationEXT: { 5934 vtn_push_nir_ssa(b, w[2], nir_is_helper_invocation(&b->nb, 1)); 5935 break; 5936 } 5937 5938 case SpvOpReadClockKHR: { 5939 SpvScope scope = vtn_constant_uint(b, w[3]); 5940 nir_scope nir_scope; 5941 5942 switch (scope) { 5943 case SpvScopeDevice: 5944 nir_scope = NIR_SCOPE_DEVICE; 5945 break; 5946 case SpvScopeSubgroup: 5947 nir_scope = NIR_SCOPE_SUBGROUP; 5948 break; 5949 default: 5950 vtn_fail("invalid read clock scope"); 5951 } 5952 5953 /* Operation supports two result types: uvec2 and uint64_t. The NIR 5954 * intrinsic gives uvec2, so pack the result for the other case. 5955 */ 5956 nir_ssa_def *result = nir_shader_clock(&b->nb, nir_scope); 5957 5958 struct vtn_type *type = vtn_get_type(b, w[1]); 5959 const struct glsl_type *dest_type = type->type; 5960 5961 if (glsl_type_is_vector(dest_type)) { 5962 assert(dest_type == glsl_vector_type(GLSL_TYPE_UINT, 2)); 5963 } else { 5964 assert(glsl_type_is_scalar(dest_type)); 5965 assert(glsl_get_base_type(dest_type) == GLSL_TYPE_UINT64); 5966 result = nir_pack_64_2x32(&b->nb, result); 5967 } 5968 5969 vtn_push_nir_ssa(b, w[2], result); 5970 break; 5971 } 5972 5973 case SpvOpTraceNV: 5974 case SpvOpTraceRayKHR: 5975 case SpvOpReportIntersectionKHR: 5976 case SpvOpIgnoreIntersectionNV: 5977 case SpvOpTerminateRayNV: 5978 case SpvOpExecuteCallableNV: 5979 case SpvOpExecuteCallableKHR: 5980 vtn_handle_ray_intrinsic(b, opcode, w, count); 5981 break; 5982 5983 case SpvOpLifetimeStart: 5984 case SpvOpLifetimeStop: 5985 break; 5986 5987 case SpvOpGroupAsyncCopy: 5988 case SpvOpGroupWaitEvents: 5989 vtn_handle_opencl_core_instruction(b, opcode, w, count); 5990 break; 5991 5992 case SpvOpWritePackedPrimitiveIndices4x8NV: 5993 vtn_handle_write_packed_primitive_indices(b, opcode, w, count); 5994 break; 5995 5996 default: 5997 vtn_fail_with_opcode("Unhandled opcode", opcode); 5998 } 5999 6000 return true; 6001} 6002 6003struct vtn_builder* 6004vtn_create_builder(const uint32_t *words, size_t word_count, 6005 gl_shader_stage stage, const char *entry_point_name, 6006 const struct spirv_to_nir_options *options) 6007{ 6008 /* Initialize the vtn_builder object */ 6009 struct vtn_builder *b = rzalloc(NULL, struct vtn_builder); 6010 struct spirv_to_nir_options *dup_options = 6011 ralloc(b, struct spirv_to_nir_options); 6012 *dup_options = *options; 6013 6014 b->spirv = words; 6015 b->spirv_word_count = word_count; 6016 b->file = NULL; 6017 b->line = -1; 6018 b->col = -1; 6019 list_inithead(&b->functions); 6020 b->entry_point_stage = stage; 6021 b->entry_point_name = entry_point_name; 6022 b->options = dup_options; 6023 6024 /* 6025 * Handle the SPIR-V header (first 5 dwords). 6026 * Can't use vtx_assert() as the setjmp(3) target isn't initialized yet. 6027 */ 6028 if (word_count <= 5) 6029 goto fail; 6030 6031 if (words[0] != SpvMagicNumber) { 6032 vtn_err("words[0] was 0x%x, want 0x%x", words[0], SpvMagicNumber); 6033 goto fail; 6034 } 6035 6036 b->version = words[1]; 6037 if (b->version < 0x10000) { 6038 vtn_err("version was 0x%x, want >= 0x10000", b->version); 6039 goto fail; 6040 } 6041 6042 b->generator_id = words[2] >> 16; 6043 uint16_t generator_version = words[2]; 6044 6045 /* In GLSLang commit 8297936dd6eb3, their handling of barrier() was fixed 6046 * to provide correct memory semantics on compute shader barrier() 6047 * commands. Prior to that, we need to fix them up ourselves. This 6048 * GLSLang fix caused them to bump to generator version 3. 6049 */ 6050 b->wa_glslang_cs_barrier = 6051 (b->generator_id == vtn_generator_glslang_reference_front_end && 6052 generator_version < 3); 6053 6054 /* Identifying the LLVM-SPIRV translator: 6055 * 6056 * The LLVM-SPIRV translator currently doesn't store any generator ID [1]. 6057 * Our use case involving the SPIRV-Tools linker also mean we want to check 6058 * for that tool instead. Finally the SPIRV-Tools linker also stores its 6059 * generator ID in the wrong location [2]. 6060 * 6061 * [1] : https://github.com/KhronosGroup/SPIRV-LLVM-Translator/pull/1223 6062 * [2] : https://github.com/KhronosGroup/SPIRV-Tools/pull/4549 6063 */ 6064 const bool is_llvm_spirv_translator = 6065 (b->generator_id == 0 && 6066 generator_version == vtn_generator_spirv_tools_linker) || 6067 b->generator_id == vtn_generator_spirv_tools_linker; 6068 6069 /* The LLVM-SPIRV translator generates Undef initializers for _local 6070 * variables [1]. 6071 * 6072 * [1] : https://github.com/KhronosGroup/SPIRV-LLVM-Translator/issues/1224 6073 */ 6074 b->wa_llvm_spirv_ignore_workgroup_initializer = 6075 b->options->environment == NIR_SPIRV_OPENCL && is_llvm_spirv_translator; 6076 6077 /* words[2] == generator magic */ 6078 unsigned value_id_bound = words[3]; 6079 if (words[4] != 0) { 6080 vtn_err("words[4] was %u, want 0", words[4]); 6081 goto fail; 6082 } 6083 6084 b->value_id_bound = value_id_bound; 6085 b->values = rzalloc_array(b, struct vtn_value, value_id_bound); 6086 6087 if (b->options->environment == NIR_SPIRV_VULKAN && b->version < 0x10400) 6088 b->vars_used_indirectly = _mesa_pointer_set_create(b); 6089 6090 return b; 6091 fail: 6092 ralloc_free(b); 6093 return NULL; 6094} 6095 6096static nir_function * 6097vtn_emit_kernel_entry_point_wrapper(struct vtn_builder *b, 6098 nir_function *entry_point) 6099{ 6100 vtn_assert(entry_point == b->entry_point->func->nir_func); 6101 vtn_fail_if(!entry_point->name, "entry points are required to have a name"); 6102 const char *func_name = 6103 ralloc_asprintf(b->shader, "__wrapped_%s", entry_point->name); 6104 6105 vtn_assert(b->shader->info.stage == MESA_SHADER_KERNEL); 6106 6107 nir_function *main_entry_point = nir_function_create(b->shader, func_name); 6108 main_entry_point->impl = nir_function_impl_create(main_entry_point); 6109 nir_builder_init(&b->nb, main_entry_point->impl); 6110 b->nb.cursor = nir_after_cf_list(&main_entry_point->impl->body); 6111 b->func_param_idx = 0; 6112 6113 nir_call_instr *call = nir_call_instr_create(b->nb.shader, entry_point); 6114 6115 for (unsigned i = 0; i < entry_point->num_params; ++i) { 6116 struct vtn_type *param_type = b->entry_point->func->type->params[i]; 6117 6118 /* consider all pointers to function memory to be parameters passed 6119 * by value 6120 */ 6121 bool is_by_val = param_type->base_type == vtn_base_type_pointer && 6122 param_type->storage_class == SpvStorageClassFunction; 6123 6124 /* input variable */ 6125 nir_variable *in_var = rzalloc(b->nb.shader, nir_variable); 6126 in_var->data.mode = nir_var_uniform; 6127 in_var->data.read_only = true; 6128 in_var->data.location = i; 6129 if (param_type->base_type == vtn_base_type_image) { 6130 in_var->data.access = 6131 spirv_to_gl_access_qualifier(b, param_type->access_qualifier); 6132 } 6133 6134 if (is_by_val) 6135 in_var->type = param_type->deref->type; 6136 else if (param_type->base_type == vtn_base_type_image) 6137 in_var->type = param_type->glsl_image; 6138 else if (param_type->base_type == vtn_base_type_sampler) 6139 in_var->type = glsl_bare_sampler_type(); 6140 else 6141 in_var->type = param_type->type; 6142 6143 nir_shader_add_variable(b->nb.shader, in_var); 6144 6145 /* we have to copy the entire variable into function memory */ 6146 if (is_by_val) { 6147 nir_variable *copy_var = 6148 nir_local_variable_create(main_entry_point->impl, in_var->type, 6149 "copy_in"); 6150 nir_copy_var(&b->nb, copy_var, in_var); 6151 call->params[i] = 6152 nir_src_for_ssa(&nir_build_deref_var(&b->nb, copy_var)->dest.ssa); 6153 } else if (param_type->base_type == vtn_base_type_image || 6154 param_type->base_type == vtn_base_type_sampler) { 6155 /* Don't load the var, just pass a deref of it */ 6156 call->params[i] = nir_src_for_ssa(&nir_build_deref_var(&b->nb, in_var)->dest.ssa); 6157 } else { 6158 call->params[i] = nir_src_for_ssa(nir_load_var(&b->nb, in_var)); 6159 } 6160 } 6161 6162 nir_builder_instr_insert(&b->nb, &call->instr); 6163 6164 return main_entry_point; 6165} 6166 6167static bool 6168can_remove(nir_variable *var, void *data) 6169{ 6170 const struct set *vars_used_indirectly = data; 6171 return !_mesa_set_search(vars_used_indirectly, var); 6172} 6173 6174nir_shader * 6175spirv_to_nir(const uint32_t *words, size_t word_count, 6176 struct nir_spirv_specialization *spec, unsigned num_spec, 6177 gl_shader_stage stage, const char *entry_point_name, 6178 const struct spirv_to_nir_options *options, 6179 const nir_shader_compiler_options *nir_options) 6180 6181{ 6182 const uint32_t *word_end = words + word_count; 6183 6184 struct vtn_builder *b = vtn_create_builder(words, word_count, 6185 stage, entry_point_name, 6186 options); 6187 6188 if (b == NULL) 6189 return NULL; 6190 6191 /* See also _vtn_fail() */ 6192 if (vtn_setjmp(b->fail_jump)) { 6193 ralloc_free(b); 6194 return NULL; 6195 } 6196 6197 /* Skip the SPIR-V header, handled at vtn_create_builder */ 6198 words+= 5; 6199 6200 b->shader = nir_shader_create(b, stage, nir_options, NULL); 6201 b->shader->info.float_controls_execution_mode = options->float_controls_execution_mode; 6202 6203 /* Handle all the preamble instructions */ 6204 words = vtn_foreach_instruction(b, words, word_end, 6205 vtn_handle_preamble_instruction); 6206 6207 /* DirectXShaderCompiler and glslang/shaderc both create OpKill from HLSL's 6208 * discard/clip, which uses demote semantics. DirectXShaderCompiler will use 6209 * demote if the extension is enabled, so we disable this workaround in that 6210 * case. 6211 * 6212 * Related glslang issue: https://github.com/KhronosGroup/glslang/issues/2416 6213 */ 6214 bool glslang = b->generator_id == vtn_generator_glslang_reference_front_end || 6215 b->generator_id == vtn_generator_shaderc_over_glslang; 6216 bool dxsc = b->generator_id == vtn_generator_spiregg; 6217 b->convert_discard_to_demote = ((dxsc && !b->uses_demote_to_helper_invocation) || 6218 (glslang && b->source_lang == SpvSourceLanguageHLSL)) && 6219 options->caps.demote_to_helper_invocation; 6220 6221 if (!options->create_library && b->entry_point == NULL) { 6222 vtn_fail("Entry point not found for %s shader \"%s\"", 6223 _mesa_shader_stage_to_string(stage), entry_point_name); 6224 ralloc_free(b); 6225 return NULL; 6226 } 6227 6228 /* Ensure a sane address mode is being used for function temps */ 6229 assert(nir_address_format_bit_size(b->options->temp_addr_format) == nir_get_ptr_bitsize(b->shader)); 6230 assert(nir_address_format_num_components(b->options->temp_addr_format) == 1); 6231 6232 /* Set shader info defaults */ 6233 if (stage == MESA_SHADER_GEOMETRY) 6234 b->shader->info.gs.invocations = 1; 6235 6236 /* Parse execution modes. */ 6237 if (!options->create_library) 6238 vtn_foreach_execution_mode(b, b->entry_point, 6239 vtn_handle_execution_mode, NULL); 6240 6241 b->specializations = spec; 6242 b->num_specializations = num_spec; 6243 6244 /* Handle all variable, type, and constant instructions */ 6245 words = vtn_foreach_instruction(b, words, word_end, 6246 vtn_handle_variable_or_type_instruction); 6247 6248 /* Parse execution modes that depend on IDs. Must happen after we have 6249 * constants parsed. 6250 */ 6251 if (!options->create_library) 6252 vtn_foreach_execution_mode(b, b->entry_point, 6253 vtn_handle_execution_mode_id, NULL); 6254 6255 if (b->workgroup_size_builtin) { 6256 vtn_assert(gl_shader_stage_uses_workgroup(stage)); 6257 vtn_assert(b->workgroup_size_builtin->type->type == 6258 glsl_vector_type(GLSL_TYPE_UINT, 3)); 6259 6260 nir_const_value *const_size = 6261 b->workgroup_size_builtin->constant->values; 6262 6263 b->shader->info.workgroup_size[0] = const_size[0].u32; 6264 b->shader->info.workgroup_size[1] = const_size[1].u32; 6265 b->shader->info.workgroup_size[2] = const_size[2].u32; 6266 } 6267 6268 /* Set types on all vtn_values */ 6269 vtn_foreach_instruction(b, words, word_end, vtn_set_instruction_result_type); 6270 6271 vtn_build_cfg(b, words, word_end); 6272 6273 if (!options->create_library) { 6274 assert(b->entry_point->value_type == vtn_value_type_function); 6275 b->entry_point->func->referenced = true; 6276 } 6277 6278 bool progress; 6279 do { 6280 progress = false; 6281 vtn_foreach_cf_node(node, &b->functions) { 6282 struct vtn_function *func = vtn_cf_node_as_function(node); 6283 if ((options->create_library || func->referenced) && !func->emitted) { 6284 b->const_table = _mesa_pointer_hash_table_create(b); 6285 6286 vtn_function_emit(b, func, vtn_handle_body_instruction); 6287 progress = true; 6288 } 6289 } 6290 } while (progress); 6291 6292 if (!options->create_library) { 6293 vtn_assert(b->entry_point->value_type == vtn_value_type_function); 6294 nir_function *entry_point = b->entry_point->func->nir_func; 6295 vtn_assert(entry_point); 6296 6297 /* post process entry_points with input params */ 6298 if (entry_point->num_params && b->shader->info.stage == MESA_SHADER_KERNEL) 6299 entry_point = vtn_emit_kernel_entry_point_wrapper(b, entry_point); 6300 6301 entry_point->is_entrypoint = true; 6302 } 6303 6304 /* structurize the CFG */ 6305 nir_lower_goto_ifs(b->shader); 6306 6307 /* A SPIR-V module can have multiple shaders stages and also multiple 6308 * shaders of the same stage. Global variables are declared per-module. 6309 * 6310 * Starting in SPIR-V 1.4 the list of global variables is part of 6311 * OpEntryPoint, so only valid ones will be created. Previous versions 6312 * only have Input and Output variables listed, so remove dead variables to 6313 * clean up the remaining ones. 6314 */ 6315 if (!options->create_library && b->version < 0x10400) { 6316 const nir_remove_dead_variables_options dead_opts = { 6317 .can_remove_var = can_remove, 6318 .can_remove_var_data = b->vars_used_indirectly, 6319 }; 6320 nir_remove_dead_variables(b->shader, ~(nir_var_function_temp | 6321 nir_var_shader_out | 6322 nir_var_shader_in | 6323 nir_var_system_value), 6324 b->vars_used_indirectly ? &dead_opts : NULL); 6325 } 6326 6327 nir_foreach_variable_in_shader(var, b->shader) { 6328 switch (var->data.mode) { 6329 case nir_var_mem_ubo: 6330 b->shader->info.num_ubos++; 6331 break; 6332 case nir_var_mem_ssbo: 6333 b->shader->info.num_ssbos++; 6334 break; 6335 case nir_var_mem_push_const: 6336 vtn_assert(b->shader->num_uniforms == 0); 6337 b->shader->num_uniforms = 6338 glsl_get_explicit_size(glsl_without_array(var->type), false); 6339 break; 6340 } 6341 } 6342 6343 /* We sometimes generate bogus derefs that, while never used, give the 6344 * validator a bit of heartburn. Run dead code to get rid of them. 6345 */ 6346 nir_opt_dce(b->shader); 6347 6348 /* Per SPV_KHR_workgroup_storage_explicit_layout, if one shared variable is 6349 * a Block, all of them will be and Blocks are explicitly laid out. 6350 */ 6351 nir_foreach_variable_with_modes(var, b->shader, nir_var_mem_shared) { 6352 if (glsl_type_is_interface(var->type)) { 6353 assert(b->options->caps.workgroup_memory_explicit_layout); 6354 b->shader->info.shared_memory_explicit_layout = true; 6355 break; 6356 } 6357 } 6358 if (b->shader->info.shared_memory_explicit_layout) { 6359 unsigned size = 0; 6360 nir_foreach_variable_with_modes(var, b->shader, nir_var_mem_shared) { 6361 assert(glsl_type_is_interface(var->type)); 6362 const bool align_to_stride = false; 6363 size = MAX2(size, glsl_get_explicit_size(var->type, align_to_stride)); 6364 } 6365 b->shader->info.shared_size = size; 6366 } 6367 6368 /* Unparent the shader from the vtn_builder before we delete the builder */ 6369 ralloc_steal(NULL, b->shader); 6370 6371 nir_shader *shader = b->shader; 6372 ralloc_free(b); 6373 6374 return shader; 6375} 6376