1/* 2 * Copyright © 2014 Intel Corporation 3 * 4 * Permission is hereby granted, free of charge, to any person obtaining a 5 * copy of this software and associated documentation files (the "Software"), 6 * to deal in the Software without restriction, including without limitation 7 * the rights to use, copy, modify, merge, publish, distribute, sublicense, 8 * and/or sell copies of the Software, and to permit persons to whom the 9 * Software is furnished to do so, subject to the following conditions: 10 * 11 * The above copyright notice and this permission notice (including the next 12 * paragraph) shall be included in all copies or substantial portions of the 13 * Software. 14 * 15 * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR 16 * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, 17 * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL 18 * THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER 19 * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING 20 * FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS 21 * IN THE SOFTWARE. 22 * 23 * Authors: 24 * Connor Abbott (cwabbott0@gmail.com) 25 * 26 */ 27 28#include "float64_glsl.h" 29#include "glsl_to_nir.h" 30#include "ir_visitor.h" 31#include "ir_hierarchical_visitor.h" 32#include "ir.h" 33#include "ir_optimization.h" 34#include "program.h" 35#include "compiler/nir/nir_control_flow.h" 36#include "compiler/nir/nir_builder.h" 37#include "main/errors.h" 38#include "main/imports.h" 39#include "main/mtypes.h" 40#include "main/shaderobj.h" 41#include "util/u_math.h" 42 43/* 44 * pass to lower GLSL IR to NIR 45 * 46 * This will lower variable dereferences to loads/stores of corresponding 47 * variables in NIR - the variables will be converted to registers in a later 48 * pass. 49 */ 50 51namespace { 52 53class nir_visitor : public ir_visitor 54{ 55public: 56 nir_visitor(gl_context *ctx, nir_shader *shader); 57 ~nir_visitor(); 58 59 virtual void visit(ir_variable *); 60 virtual void visit(ir_function *); 61 virtual void visit(ir_function_signature *); 62 virtual void visit(ir_loop *); 63 virtual void visit(ir_if *); 64 virtual void visit(ir_discard *); 65 virtual void visit(ir_loop_jump *); 66 virtual void visit(ir_return *); 67 virtual void visit(ir_call *); 68 virtual void visit(ir_assignment *); 69 virtual void visit(ir_emit_vertex *); 70 virtual void visit(ir_end_primitive *); 71 virtual void visit(ir_expression *); 72 virtual void visit(ir_swizzle *); 73 virtual void visit(ir_texture *); 74 virtual void visit(ir_constant *); 75 virtual void visit(ir_dereference_variable *); 76 virtual void visit(ir_dereference_record *); 77 virtual void visit(ir_dereference_array *); 78 virtual void visit(ir_barrier *); 79 80 void create_function(ir_function_signature *ir); 81 82private: 83 void add_instr(nir_instr *instr, unsigned num_components, unsigned bit_size); 84 nir_ssa_def *evaluate_rvalue(ir_rvalue *ir); 85 86 nir_alu_instr *emit(nir_op op, unsigned dest_size, nir_ssa_def **srcs); 87 nir_alu_instr *emit(nir_op op, unsigned dest_size, nir_ssa_def *src1); 88 nir_alu_instr *emit(nir_op op, unsigned dest_size, nir_ssa_def *src1, 89 nir_ssa_def *src2); 90 nir_alu_instr *emit(nir_op op, unsigned dest_size, nir_ssa_def *src1, 91 nir_ssa_def *src2, nir_ssa_def *src3); 92 93 bool supports_ints; 94 bool supports_std430; 95 96 nir_shader *shader; 97 nir_function_impl *impl; 98 nir_builder b; 99 nir_ssa_def *result; /* result of the expression tree last visited */ 100 101 nir_deref_instr *evaluate_deref(ir_instruction *ir); 102 103 nir_constant *constant_copy(ir_constant *ir, void *mem_ctx); 104 105 /* most recent deref instruction created */ 106 nir_deref_instr *deref; 107 108 /* whether the IR we're operating on is per-function or global */ 109 bool is_global; 110 111 ir_function_signature *sig; 112 113 /* map of ir_variable -> nir_variable */ 114 struct hash_table *var_table; 115 116 /* map of ir_function_signature -> nir_function_overload */ 117 struct hash_table *overload_table; 118}; 119 120/* 121 * This visitor runs before the main visitor, calling create_function() for 122 * each function so that the main visitor can resolve forward references in 123 * calls. 124 */ 125 126class nir_function_visitor : public ir_hierarchical_visitor 127{ 128public: 129 nir_function_visitor(nir_visitor *v) : visitor(v) 130 { 131 } 132 virtual ir_visitor_status visit_enter(ir_function *); 133 134private: 135 nir_visitor *visitor; 136}; 137 138/* glsl_to_nir can only handle converting certain function paramaters 139 * to NIR. This visitor checks for parameters it can't currently handle. 140 */ 141class ir_function_param_visitor : public ir_hierarchical_visitor 142{ 143public: 144 ir_function_param_visitor() 145 : unsupported(false) 146 { 147 } 148 149 virtual ir_visitor_status visit_enter(ir_function_signature *ir) 150 { 151 152 if (ir->is_intrinsic()) 153 return visit_continue; 154 155 foreach_in_list(ir_variable, param, &ir->parameters) { 156 if (!param->type->is_vector() || !param->type->is_scalar()) { 157 unsupported = true; 158 return visit_stop; 159 } 160 161 if (param->data.mode == ir_var_function_inout) { 162 unsupported = true; 163 return visit_stop; 164 } 165 } 166 167 return visit_continue; 168 } 169 170 bool unsupported; 171}; 172 173} /* end of anonymous namespace */ 174 175 176static bool 177has_unsupported_function_param(exec_list *ir) 178{ 179 ir_function_param_visitor visitor; 180 visit_list_elements(&visitor, ir); 181 return visitor.unsupported; 182} 183 184nir_shader * 185glsl_to_nir(struct gl_context *ctx, 186 const struct gl_shader_program *shader_prog, 187 gl_shader_stage stage, 188 const nir_shader_compiler_options *options) 189{ 190 struct gl_linked_shader *sh = shader_prog->_LinkedShaders[stage]; 191 192 const struct gl_shader_compiler_options *gl_options = 193 &ctx->Const.ShaderCompilerOptions[stage]; 194 195 /* glsl_to_nir can only handle converting certain function paramaters 196 * to NIR. If we find something we can't handle then we get the GLSL IR 197 * opts to remove it before we continue on. 198 * 199 * TODO: add missing glsl ir to nir support and remove this loop. 200 */ 201 while (has_unsupported_function_param(sh->ir)) { 202 do_common_optimization(sh->ir, true, true, gl_options, 203 ctx->Const.NativeIntegers); 204 } 205 206 nir_shader *shader = nir_shader_create(NULL, stage, options, 207 &sh->Program->info); 208 209 nir_visitor v1(ctx, shader); 210 nir_function_visitor v2(&v1); 211 v2.run(sh->ir); 212 visit_exec_list(sh->ir, &v1); 213 214 nir_validate_shader(shader, "after glsl to nir, before function inline"); 215 216 /* We have to lower away local constant initializers right before we 217 * inline functions. That way they get properly initialized at the top 218 * of the function and not at the top of its caller. 219 */ 220 nir_lower_constant_initializers(shader, (nir_variable_mode)~0); 221 nir_lower_returns(shader); 222 nir_inline_functions(shader); 223 nir_opt_deref(shader); 224 225 nir_validate_shader(shader, "after function inlining and return lowering"); 226 227 /* Now that we have inlined everything remove all of the functions except 228 * main(). 229 */ 230 foreach_list_typed_safe(nir_function, function, node, &(shader)->functions){ 231 if (strcmp("main", function->name) != 0) { 232 exec_node_remove(&function->node); 233 } 234 } 235 236 /* Remap the locations to slots so those requiring two slots will occupy 237 * two locations. For instance, if we have in the IR code a dvec3 attr0 in 238 * location 0 and vec4 attr1 in location 1, in NIR attr0 will use 239 * locations/slots 0 and 1, and attr1 will use location/slot 2 */ 240 if (shader->info.stage == MESA_SHADER_VERTEX) 241 nir_remap_dual_slot_attributes(shader, &sh->Program->DualSlotInputs); 242 243 shader->info.name = ralloc_asprintf(shader, "GLSL%d", shader_prog->Name); 244 if (shader_prog->Label) 245 shader->info.label = ralloc_strdup(shader, shader_prog->Label); 246 247 /* Check for transform feedback varyings specified via the API */ 248 shader->info.has_transform_feedback_varyings = 249 shader_prog->TransformFeedback.NumVarying > 0; 250 251 /* Check for transform feedback varyings specified in the Shader */ 252 if (shader_prog->last_vert_prog) 253 shader->info.has_transform_feedback_varyings |= 254 shader_prog->last_vert_prog->sh.LinkedTransformFeedback->NumVarying > 0; 255 256 if (shader->info.stage == MESA_SHADER_FRAGMENT) { 257 shader->info.fs.pixel_center_integer = sh->Program->info.fs.pixel_center_integer; 258 shader->info.fs.origin_upper_left = sh->Program->info.fs.origin_upper_left; 259 } 260 261 return shader; 262} 263 264nir_visitor::nir_visitor(gl_context *ctx, nir_shader *shader) 265{ 266 this->supports_ints = shader->options->native_integers; 267 this->supports_std430 = ctx->Const.UseSTD430AsDefaultPacking; 268 this->shader = shader; 269 this->is_global = true; 270 this->var_table = _mesa_pointer_hash_table_create(NULL); 271 this->overload_table = _mesa_pointer_hash_table_create(NULL); 272 this->result = NULL; 273 this->impl = NULL; 274 this->deref = NULL; 275 memset(&this->b, 0, sizeof(this->b)); 276} 277 278nir_visitor::~nir_visitor() 279{ 280 _mesa_hash_table_destroy(this->var_table, NULL); 281 _mesa_hash_table_destroy(this->overload_table, NULL); 282} 283 284nir_deref_instr * 285nir_visitor::evaluate_deref(ir_instruction *ir) 286{ 287 ir->accept(this); 288 return this->deref; 289} 290 291nir_constant * 292nir_visitor::constant_copy(ir_constant *ir, void *mem_ctx) 293{ 294 if (ir == NULL) 295 return NULL; 296 297 nir_constant *ret = rzalloc(mem_ctx, nir_constant); 298 299 const unsigned rows = ir->type->vector_elements; 300 const unsigned cols = ir->type->matrix_columns; 301 unsigned i; 302 303 ret->num_elements = 0; 304 switch (ir->type->base_type) { 305 case GLSL_TYPE_UINT: 306 /* Only float base types can be matrices. */ 307 assert(cols == 1); 308 309 for (unsigned r = 0; r < rows; r++) 310 if (supports_ints) 311 ret->values[0][r].u32 = ir->value.u[r]; 312 else 313 ret->values[0][r].f32 = ir->value.u[r]; 314 315 break; 316 317 case GLSL_TYPE_INT: 318 /* Only float base types can be matrices. */ 319 assert(cols == 1); 320 321 for (unsigned r = 0; r < rows; r++) 322 if (supports_ints) 323 ret->values[0][r].i32 = ir->value.i[r]; 324 else 325 ret->values[0][r].f32 = ir->value.i[r]; 326 327 break; 328 329 case GLSL_TYPE_FLOAT: 330 for (unsigned c = 0; c < cols; c++) { 331 for (unsigned r = 0; r < rows; r++) 332 ret->values[c][r].f32 = ir->value.f[c * rows + r]; 333 } 334 break; 335 336 case GLSL_TYPE_DOUBLE: 337 for (unsigned c = 0; c < cols; c++) { 338 for (unsigned r = 0; r < rows; r++) 339 ret->values[c][r].f64 = ir->value.d[c * rows + r]; 340 } 341 break; 342 343 case GLSL_TYPE_UINT64: 344 /* Only float base types can be matrices. */ 345 assert(cols == 1); 346 347 for (unsigned r = 0; r < rows; r++) 348 ret->values[0][r].u64 = ir->value.u64[r]; 349 break; 350 351 case GLSL_TYPE_INT64: 352 /* Only float base types can be matrices. */ 353 assert(cols == 1); 354 355 for (unsigned r = 0; r < rows; r++) 356 ret->values[0][r].i64 = ir->value.i64[r]; 357 break; 358 359 case GLSL_TYPE_BOOL: 360 /* Only float base types can be matrices. */ 361 assert(cols == 1); 362 363 for (unsigned r = 0; r < rows; r++) 364 ret->values[0][r].b = ir->value.b[r]; 365 366 break; 367 368 case GLSL_TYPE_STRUCT: 369 case GLSL_TYPE_ARRAY: 370 ret->elements = ralloc_array(mem_ctx, nir_constant *, 371 ir->type->length); 372 ret->num_elements = ir->type->length; 373 374 for (i = 0; i < ir->type->length; i++) 375 ret->elements[i] = constant_copy(ir->const_elements[i], mem_ctx); 376 break; 377 378 default: 379 unreachable("not reached"); 380 } 381 382 return ret; 383} 384 385static const glsl_type * 386wrap_type_in_array(const glsl_type *elem_type, const glsl_type *array_type) 387{ 388 if (!array_type->is_array()) 389 return elem_type; 390 391 elem_type = wrap_type_in_array(elem_type, array_type->fields.array); 392 393 return glsl_type::get_array_instance(elem_type, array_type->length); 394} 395 396void 397nir_visitor::visit(ir_variable *ir) 398{ 399 /* TODO: In future we should switch to using the NIR lowering pass but for 400 * now just ignore these variables as GLSL IR should have lowered them. 401 * Anything remaining are just dead vars that weren't cleaned up. 402 */ 403 if (ir->data.mode == ir_var_shader_shared) 404 return; 405 406 /* FINISHME: inout parameters */ 407 assert(ir->data.mode != ir_var_function_inout); 408 409 if (ir->data.mode == ir_var_function_out) 410 return; 411 412 nir_variable *var = rzalloc(shader, nir_variable); 413 var->type = ir->type; 414 var->name = ralloc_strdup(var, ir->name); 415 416 var->data.always_active_io = ir->data.always_active_io; 417 var->data.read_only = ir->data.read_only; 418 var->data.centroid = ir->data.centroid; 419 var->data.sample = ir->data.sample; 420 var->data.patch = ir->data.patch; 421 var->data.invariant = ir->data.invariant; 422 var->data.location = ir->data.location; 423 var->data.stream = ir->data.stream; 424 var->data.compact = false; 425 426 switch(ir->data.mode) { 427 case ir_var_auto: 428 case ir_var_temporary: 429 if (is_global) 430 var->data.mode = nir_var_shader_temp; 431 else 432 var->data.mode = nir_var_function_temp; 433 break; 434 435 case ir_var_function_in: 436 case ir_var_const_in: 437 var->data.mode = nir_var_function_temp; 438 break; 439 440 case ir_var_shader_in: 441 if (shader->info.stage == MESA_SHADER_FRAGMENT && 442 ir->data.location == VARYING_SLOT_FACE) { 443 /* For whatever reason, GLSL IR makes gl_FrontFacing an input */ 444 var->data.location = SYSTEM_VALUE_FRONT_FACE; 445 var->data.mode = nir_var_system_value; 446 } else if (shader->info.stage == MESA_SHADER_GEOMETRY && 447 ir->data.location == VARYING_SLOT_PRIMITIVE_ID) { 448 /* For whatever reason, GLSL IR makes gl_PrimitiveIDIn an input */ 449 var->data.location = SYSTEM_VALUE_PRIMITIVE_ID; 450 var->data.mode = nir_var_system_value; 451 } else { 452 var->data.mode = nir_var_shader_in; 453 454 if (shader->info.stage == MESA_SHADER_TESS_EVAL && 455 (ir->data.location == VARYING_SLOT_TESS_LEVEL_INNER || 456 ir->data.location == VARYING_SLOT_TESS_LEVEL_OUTER)) { 457 var->data.compact = ir->type->without_array()->is_scalar(); 458 } 459 460 if (shader->info.stage > MESA_SHADER_VERTEX && 461 ir->data.location >= VARYING_SLOT_CLIP_DIST0 && 462 ir->data.location <= VARYING_SLOT_CULL_DIST1) { 463 var->data.compact = ir->type->without_array()->is_scalar(); 464 } 465 } 466 break; 467 468 case ir_var_shader_out: 469 var->data.mode = nir_var_shader_out; 470 if (shader->info.stage == MESA_SHADER_TESS_CTRL && 471 (ir->data.location == VARYING_SLOT_TESS_LEVEL_INNER || 472 ir->data.location == VARYING_SLOT_TESS_LEVEL_OUTER)) { 473 var->data.compact = ir->type->without_array()->is_scalar(); 474 } 475 476 if (shader->info.stage <= MESA_SHADER_GEOMETRY && 477 ir->data.location >= VARYING_SLOT_CLIP_DIST0 && 478 ir->data.location <= VARYING_SLOT_CULL_DIST1) { 479 var->data.compact = ir->type->without_array()->is_scalar(); 480 } 481 break; 482 483 case ir_var_uniform: 484 if (ir->get_interface_type()) 485 var->data.mode = nir_var_mem_ubo; 486 else 487 var->data.mode = nir_var_uniform; 488 break; 489 490 case ir_var_shader_storage: 491 var->data.mode = nir_var_mem_ssbo; 492 break; 493 494 case ir_var_system_value: 495 var->data.mode = nir_var_system_value; 496 break; 497 498 default: 499 unreachable("not reached"); 500 } 501 502 unsigned image_access = 0; 503 if (ir->data.memory_read_only) 504 image_access |= ACCESS_NON_WRITEABLE; 505 if (ir->data.memory_write_only) 506 image_access |= ACCESS_NON_READABLE; 507 if (ir->data.memory_coherent) 508 image_access |= ACCESS_COHERENT; 509 if (ir->data.memory_volatile) 510 image_access |= ACCESS_VOLATILE; 511 if (ir->data.memory_restrict) 512 image_access |= ACCESS_RESTRICT; 513 514 /* For UBO and SSBO variables, we need explicit types */ 515 if (var->data.mode & (nir_var_mem_ubo | nir_var_mem_ssbo)) { 516 const glsl_type *explicit_ifc_type = 517 ir->get_interface_type()->get_explicit_interface_type(supports_std430); 518 519 if (ir->type->without_array()->is_interface()) { 520 /* If the type contains the interface, wrap the explicit type in the 521 * right number of arrays. 522 */ 523 var->type = wrap_type_in_array(explicit_ifc_type, ir->type); 524 } else { 525 /* Otherwise, this variable is one entry in the interface */ 526 UNUSED bool found = false; 527 for (unsigned i = 0; i < explicit_ifc_type->length; i++) { 528 const glsl_struct_field *field = 529 &explicit_ifc_type->fields.structure[i]; 530 if (strcmp(ir->name, field->name) != 0) 531 continue; 532 533 var->type = field->type; 534 if (field->memory_read_only) 535 image_access |= ACCESS_NON_WRITEABLE; 536 if (field->memory_write_only) 537 image_access |= ACCESS_NON_READABLE; 538 if (field->memory_coherent) 539 image_access |= ACCESS_COHERENT; 540 if (field->memory_volatile) 541 image_access |= ACCESS_VOLATILE; 542 if (field->memory_restrict) 543 image_access |= ACCESS_RESTRICT; 544 545 found = true; 546 break; 547 } 548 assert(found); 549 } 550 } 551 552 var->data.interpolation = ir->data.interpolation; 553 var->data.location_frac = ir->data.location_frac; 554 555 switch (ir->data.depth_layout) { 556 case ir_depth_layout_none: 557 var->data.depth_layout = nir_depth_layout_none; 558 break; 559 case ir_depth_layout_any: 560 var->data.depth_layout = nir_depth_layout_any; 561 break; 562 case ir_depth_layout_greater: 563 var->data.depth_layout = nir_depth_layout_greater; 564 break; 565 case ir_depth_layout_less: 566 var->data.depth_layout = nir_depth_layout_less; 567 break; 568 case ir_depth_layout_unchanged: 569 var->data.depth_layout = nir_depth_layout_unchanged; 570 break; 571 default: 572 unreachable("not reached"); 573 } 574 575 var->data.index = ir->data.index; 576 var->data.descriptor_set = 0; 577 var->data.binding = ir->data.binding; 578 var->data.explicit_binding = ir->data.explicit_binding; 579 var->data.bindless = ir->data.bindless; 580 var->data.offset = ir->data.offset; 581 582 var->data.image.access = (gl_access_qualifier)image_access; 583 var->data.image.format = ir->data.image_format; 584 585 var->data.fb_fetch_output = ir->data.fb_fetch_output; 586 var->data.explicit_xfb_buffer = ir->data.explicit_xfb_buffer; 587 var->data.explicit_xfb_stride = ir->data.explicit_xfb_stride; 588 var->data.xfb_buffer = ir->data.xfb_buffer; 589 var->data.xfb_stride = ir->data.xfb_stride; 590 591 var->num_state_slots = ir->get_num_state_slots(); 592 if (var->num_state_slots > 0) { 593 var->state_slots = rzalloc_array(var, nir_state_slot, 594 var->num_state_slots); 595 596 ir_state_slot *state_slots = ir->get_state_slots(); 597 for (unsigned i = 0; i < var->num_state_slots; i++) { 598 for (unsigned j = 0; j < 5; j++) 599 var->state_slots[i].tokens[j] = state_slots[i].tokens[j]; 600 var->state_slots[i].swizzle = state_slots[i].swizzle; 601 } 602 } else { 603 var->state_slots = NULL; 604 } 605 606 var->constant_initializer = constant_copy(ir->constant_initializer, var); 607 608 var->interface_type = ir->get_interface_type(); 609 610 if (var->data.mode == nir_var_function_temp) 611 nir_function_impl_add_variable(impl, var); 612 else 613 nir_shader_add_variable(shader, var); 614 615 _mesa_hash_table_insert(var_table, ir, var); 616} 617 618ir_visitor_status 619nir_function_visitor::visit_enter(ir_function *ir) 620{ 621 foreach_in_list(ir_function_signature, sig, &ir->signatures) { 622 visitor->create_function(sig); 623 } 624 return visit_continue_with_parent; 625} 626 627void 628nir_visitor::create_function(ir_function_signature *ir) 629{ 630 if (ir->is_intrinsic()) 631 return; 632 633 nir_function *func = nir_function_create(shader, ir->function_name()); 634 if (strcmp(ir->function_name(), "main") == 0) 635 func->is_entrypoint = true; 636 637 func->num_params = ir->parameters.length() + 638 (ir->return_type != glsl_type::void_type); 639 func->params = ralloc_array(shader, nir_parameter, func->num_params); 640 641 unsigned np = 0; 642 643 if (ir->return_type != glsl_type::void_type) { 644 /* The return value is a variable deref (basically an out parameter) */ 645 func->params[np].num_components = 1; 646 func->params[np].bit_size = 32; 647 np++; 648 } 649 650 foreach_in_list(ir_variable, param, &ir->parameters) { 651 /* FINISHME: pass arrays, structs, etc by reference? */ 652 assert(param->type->is_vector() || param->type->is_scalar()); 653 654 if (param->data.mode == ir_var_function_in) { 655 func->params[np].num_components = param->type->vector_elements; 656 func->params[np].bit_size = glsl_get_bit_size(param->type); 657 } else { 658 func->params[np].num_components = 1; 659 func->params[np].bit_size = 32; 660 } 661 np++; 662 } 663 assert(np == func->num_params); 664 665 _mesa_hash_table_insert(this->overload_table, ir, func); 666} 667 668void 669nir_visitor::visit(ir_function *ir) 670{ 671 foreach_in_list(ir_function_signature, sig, &ir->signatures) 672 sig->accept(this); 673} 674 675void 676nir_visitor::visit(ir_function_signature *ir) 677{ 678 if (ir->is_intrinsic()) 679 return; 680 681 this->sig = ir; 682 683 struct hash_entry *entry = 684 _mesa_hash_table_search(this->overload_table, ir); 685 686 assert(entry); 687 nir_function *func = (nir_function *) entry->data; 688 689 if (ir->is_defined) { 690 nir_function_impl *impl = nir_function_impl_create(func); 691 this->impl = impl; 692 693 this->is_global = false; 694 695 nir_builder_init(&b, impl); 696 b.cursor = nir_after_cf_list(&impl->body); 697 698 unsigned i = (ir->return_type != glsl_type::void_type) ? 1 : 0; 699 700 foreach_in_list(ir_variable, param, &ir->parameters) { 701 nir_variable *var = 702 nir_local_variable_create(impl, param->type, param->name); 703 704 if (param->data.mode == ir_var_function_in) { 705 nir_store_var(&b, var, nir_load_param(&b, i), ~0); 706 } 707 708 _mesa_hash_table_insert(var_table, param, var); 709 i++; 710 } 711 712 visit_exec_list(&ir->body, this); 713 714 this->is_global = true; 715 } else { 716 func->impl = NULL; 717 } 718} 719 720void 721nir_visitor::visit(ir_loop *ir) 722{ 723 nir_push_loop(&b); 724 visit_exec_list(&ir->body_instructions, this); 725 nir_pop_loop(&b, NULL); 726} 727 728void 729nir_visitor::visit(ir_if *ir) 730{ 731 nir_push_if(&b, evaluate_rvalue(ir->condition)); 732 visit_exec_list(&ir->then_instructions, this); 733 nir_push_else(&b, NULL); 734 visit_exec_list(&ir->else_instructions, this); 735 nir_pop_if(&b, NULL); 736} 737 738void 739nir_visitor::visit(ir_discard *ir) 740{ 741 /* 742 * discards aren't treated as control flow, because before we lower them 743 * they can appear anywhere in the shader and the stuff after them may still 744 * be executed (yay, crazy GLSL rules!). However, after lowering, all the 745 * discards will be immediately followed by a return. 746 */ 747 748 nir_intrinsic_instr *discard; 749 if (ir->condition) { 750 discard = nir_intrinsic_instr_create(this->shader, 751 nir_intrinsic_discard_if); 752 discard->src[0] = 753 nir_src_for_ssa(evaluate_rvalue(ir->condition)); 754 } else { 755 discard = nir_intrinsic_instr_create(this->shader, nir_intrinsic_discard); 756 } 757 758 nir_builder_instr_insert(&b, &discard->instr); 759} 760 761void 762nir_visitor::visit(ir_emit_vertex *ir) 763{ 764 nir_intrinsic_instr *instr = 765 nir_intrinsic_instr_create(this->shader, nir_intrinsic_emit_vertex); 766 nir_intrinsic_set_stream_id(instr, ir->stream_id()); 767 nir_builder_instr_insert(&b, &instr->instr); 768} 769 770void 771nir_visitor::visit(ir_end_primitive *ir) 772{ 773 nir_intrinsic_instr *instr = 774 nir_intrinsic_instr_create(this->shader, nir_intrinsic_end_primitive); 775 nir_intrinsic_set_stream_id(instr, ir->stream_id()); 776 nir_builder_instr_insert(&b, &instr->instr); 777} 778 779void 780nir_visitor::visit(ir_loop_jump *ir) 781{ 782 nir_jump_type type; 783 switch (ir->mode) { 784 case ir_loop_jump::jump_break: 785 type = nir_jump_break; 786 break; 787 case ir_loop_jump::jump_continue: 788 type = nir_jump_continue; 789 break; 790 default: 791 unreachable("not reached"); 792 } 793 794 nir_jump_instr *instr = nir_jump_instr_create(this->shader, type); 795 nir_builder_instr_insert(&b, &instr->instr); 796} 797 798void 799nir_visitor::visit(ir_return *ir) 800{ 801 if (ir->value != NULL) { 802 nir_deref_instr *ret_deref = 803 nir_build_deref_cast(&b, nir_load_param(&b, 0), 804 nir_var_function_temp, ir->value->type, 0); 805 806 nir_ssa_def *val = evaluate_rvalue(ir->value); 807 nir_store_deref(&b, ret_deref, val, ~0); 808 } 809 810 nir_jump_instr *instr = nir_jump_instr_create(this->shader, nir_jump_return); 811 nir_builder_instr_insert(&b, &instr->instr); 812} 813 814static void 815intrinsic_set_std430_align(nir_intrinsic_instr *intrin, const glsl_type *type) 816{ 817 unsigned bit_size = type->is_boolean() ? 32 : glsl_get_bit_size(type); 818 unsigned pow2_components = util_next_power_of_two(type->vector_elements); 819 nir_intrinsic_set_align(intrin, (bit_size / 8) * pow2_components, 0); 820} 821 822void 823nir_visitor::visit(ir_call *ir) 824{ 825 if (ir->callee->is_intrinsic()) { 826 nir_intrinsic_op op; 827 828 switch (ir->callee->intrinsic_id) { 829 case ir_intrinsic_generic_atomic_add: 830 op = ir->return_deref->type->is_integer_32_64() 831 ? nir_intrinsic_deref_atomic_add : nir_intrinsic_deref_atomic_fadd; 832 break; 833 case ir_intrinsic_generic_atomic_and: 834 op = nir_intrinsic_deref_atomic_and; 835 break; 836 case ir_intrinsic_generic_atomic_or: 837 op = nir_intrinsic_deref_atomic_or; 838 break; 839 case ir_intrinsic_generic_atomic_xor: 840 op = nir_intrinsic_deref_atomic_xor; 841 break; 842 case ir_intrinsic_generic_atomic_min: 843 assert(ir->return_deref); 844 if (ir->return_deref->type == glsl_type::int_type) 845 op = nir_intrinsic_deref_atomic_imin; 846 else if (ir->return_deref->type == glsl_type::uint_type) 847 op = nir_intrinsic_deref_atomic_umin; 848 else if (ir->return_deref->type == glsl_type::float_type) 849 op = nir_intrinsic_deref_atomic_fmin; 850 else 851 unreachable("Invalid type"); 852 break; 853 case ir_intrinsic_generic_atomic_max: 854 assert(ir->return_deref); 855 if (ir->return_deref->type == glsl_type::int_type) 856 op = nir_intrinsic_deref_atomic_imax; 857 else if (ir->return_deref->type == glsl_type::uint_type) 858 op = nir_intrinsic_deref_atomic_umax; 859 else if (ir->return_deref->type == glsl_type::float_type) 860 op = nir_intrinsic_deref_atomic_fmax; 861 else 862 unreachable("Invalid type"); 863 break; 864 case ir_intrinsic_generic_atomic_exchange: 865 op = nir_intrinsic_deref_atomic_exchange; 866 break; 867 case ir_intrinsic_generic_atomic_comp_swap: 868 op = ir->return_deref->type->is_integer_32_64() 869 ? nir_intrinsic_deref_atomic_comp_swap 870 : nir_intrinsic_deref_atomic_fcomp_swap; 871 break; 872 case ir_intrinsic_atomic_counter_read: 873 op = nir_intrinsic_atomic_counter_read_deref; 874 break; 875 case ir_intrinsic_atomic_counter_increment: 876 op = nir_intrinsic_atomic_counter_inc_deref; 877 break; 878 case ir_intrinsic_atomic_counter_predecrement: 879 op = nir_intrinsic_atomic_counter_pre_dec_deref; 880 break; 881 case ir_intrinsic_atomic_counter_add: 882 op = nir_intrinsic_atomic_counter_add_deref; 883 break; 884 case ir_intrinsic_atomic_counter_and: 885 op = nir_intrinsic_atomic_counter_and_deref; 886 break; 887 case ir_intrinsic_atomic_counter_or: 888 op = nir_intrinsic_atomic_counter_or_deref; 889 break; 890 case ir_intrinsic_atomic_counter_xor: 891 op = nir_intrinsic_atomic_counter_xor_deref; 892 break; 893 case ir_intrinsic_atomic_counter_min: 894 op = nir_intrinsic_atomic_counter_min_deref; 895 break; 896 case ir_intrinsic_atomic_counter_max: 897 op = nir_intrinsic_atomic_counter_max_deref; 898 break; 899 case ir_intrinsic_atomic_counter_exchange: 900 op = nir_intrinsic_atomic_counter_exchange_deref; 901 break; 902 case ir_intrinsic_atomic_counter_comp_swap: 903 op = nir_intrinsic_atomic_counter_comp_swap_deref; 904 break; 905 case ir_intrinsic_image_load: 906 op = nir_intrinsic_image_deref_load; 907 break; 908 case ir_intrinsic_image_store: 909 op = nir_intrinsic_image_deref_store; 910 break; 911 case ir_intrinsic_image_atomic_add: 912 op = ir->return_deref->type->is_integer_32_64() 913 ? nir_intrinsic_image_deref_atomic_add 914 : nir_intrinsic_image_deref_atomic_fadd; 915 break; 916 case ir_intrinsic_image_atomic_min: 917 op = nir_intrinsic_image_deref_atomic_min; 918 break; 919 case ir_intrinsic_image_atomic_max: 920 op = nir_intrinsic_image_deref_atomic_max; 921 break; 922 case ir_intrinsic_image_atomic_and: 923 op = nir_intrinsic_image_deref_atomic_and; 924 break; 925 case ir_intrinsic_image_atomic_or: 926 op = nir_intrinsic_image_deref_atomic_or; 927 break; 928 case ir_intrinsic_image_atomic_xor: 929 op = nir_intrinsic_image_deref_atomic_xor; 930 break; 931 case ir_intrinsic_image_atomic_exchange: 932 op = nir_intrinsic_image_deref_atomic_exchange; 933 break; 934 case ir_intrinsic_image_atomic_comp_swap: 935 op = nir_intrinsic_image_deref_atomic_comp_swap; 936 break; 937 case ir_intrinsic_memory_barrier: 938 op = nir_intrinsic_memory_barrier; 939 break; 940 case ir_intrinsic_image_size: 941 op = nir_intrinsic_image_deref_size; 942 break; 943 case ir_intrinsic_image_samples: 944 op = nir_intrinsic_image_deref_samples; 945 break; 946 case ir_intrinsic_ssbo_store: 947 op = nir_intrinsic_store_ssbo; 948 break; 949 case ir_intrinsic_ssbo_load: 950 op = nir_intrinsic_load_ssbo; 951 break; 952 case ir_intrinsic_ssbo_atomic_add: 953 op = ir->return_deref->type->is_integer_32_64() 954 ? nir_intrinsic_ssbo_atomic_add : nir_intrinsic_ssbo_atomic_fadd; 955 break; 956 case ir_intrinsic_ssbo_atomic_and: 957 op = nir_intrinsic_ssbo_atomic_and; 958 break; 959 case ir_intrinsic_ssbo_atomic_or: 960 op = nir_intrinsic_ssbo_atomic_or; 961 break; 962 case ir_intrinsic_ssbo_atomic_xor: 963 op = nir_intrinsic_ssbo_atomic_xor; 964 break; 965 case ir_intrinsic_ssbo_atomic_min: 966 assert(ir->return_deref); 967 if (ir->return_deref->type == glsl_type::int_type) 968 op = nir_intrinsic_ssbo_atomic_imin; 969 else if (ir->return_deref->type == glsl_type::uint_type) 970 op = nir_intrinsic_ssbo_atomic_umin; 971 else if (ir->return_deref->type == glsl_type::float_type) 972 op = nir_intrinsic_ssbo_atomic_fmin; 973 else 974 unreachable("Invalid type"); 975 break; 976 case ir_intrinsic_ssbo_atomic_max: 977 assert(ir->return_deref); 978 if (ir->return_deref->type == glsl_type::int_type) 979 op = nir_intrinsic_ssbo_atomic_imax; 980 else if (ir->return_deref->type == glsl_type::uint_type) 981 op = nir_intrinsic_ssbo_atomic_umax; 982 else if (ir->return_deref->type == glsl_type::float_type) 983 op = nir_intrinsic_ssbo_atomic_fmax; 984 else 985 unreachable("Invalid type"); 986 break; 987 case ir_intrinsic_ssbo_atomic_exchange: 988 op = nir_intrinsic_ssbo_atomic_exchange; 989 break; 990 case ir_intrinsic_ssbo_atomic_comp_swap: 991 op = ir->return_deref->type->is_integer_32_64() 992 ? nir_intrinsic_ssbo_atomic_comp_swap 993 : nir_intrinsic_ssbo_atomic_fcomp_swap; 994 break; 995 case ir_intrinsic_shader_clock: 996 op = nir_intrinsic_shader_clock; 997 break; 998 case ir_intrinsic_begin_invocation_interlock: 999 op = nir_intrinsic_begin_invocation_interlock; 1000 break; 1001 case ir_intrinsic_end_invocation_interlock: 1002 op = nir_intrinsic_end_invocation_interlock; 1003 break; 1004 case ir_intrinsic_group_memory_barrier: 1005 op = nir_intrinsic_group_memory_barrier; 1006 break; 1007 case ir_intrinsic_memory_barrier_atomic_counter: 1008 op = nir_intrinsic_memory_barrier_atomic_counter; 1009 break; 1010 case ir_intrinsic_memory_barrier_buffer: 1011 op = nir_intrinsic_memory_barrier_buffer; 1012 break; 1013 case ir_intrinsic_memory_barrier_image: 1014 op = nir_intrinsic_memory_barrier_image; 1015 break; 1016 case ir_intrinsic_memory_barrier_shared: 1017 op = nir_intrinsic_memory_barrier_shared; 1018 break; 1019 case ir_intrinsic_shared_load: 1020 op = nir_intrinsic_load_shared; 1021 break; 1022 case ir_intrinsic_shared_store: 1023 op = nir_intrinsic_store_shared; 1024 break; 1025 case ir_intrinsic_shared_atomic_add: 1026 op = ir->return_deref->type->is_integer_32_64() 1027 ? nir_intrinsic_shared_atomic_add 1028 : nir_intrinsic_shared_atomic_fadd; 1029 break; 1030 case ir_intrinsic_shared_atomic_and: 1031 op = nir_intrinsic_shared_atomic_and; 1032 break; 1033 case ir_intrinsic_shared_atomic_or: 1034 op = nir_intrinsic_shared_atomic_or; 1035 break; 1036 case ir_intrinsic_shared_atomic_xor: 1037 op = nir_intrinsic_shared_atomic_xor; 1038 break; 1039 case ir_intrinsic_shared_atomic_min: 1040 assert(ir->return_deref); 1041 if (ir->return_deref->type == glsl_type::int_type) 1042 op = nir_intrinsic_shared_atomic_imin; 1043 else if (ir->return_deref->type == glsl_type::uint_type) 1044 op = nir_intrinsic_shared_atomic_umin; 1045 else if (ir->return_deref->type == glsl_type::float_type) 1046 op = nir_intrinsic_shared_atomic_fmin; 1047 else 1048 unreachable("Invalid type"); 1049 break; 1050 case ir_intrinsic_shared_atomic_max: 1051 assert(ir->return_deref); 1052 if (ir->return_deref->type == glsl_type::int_type) 1053 op = nir_intrinsic_shared_atomic_imax; 1054 else if (ir->return_deref->type == glsl_type::uint_type) 1055 op = nir_intrinsic_shared_atomic_umax; 1056 else if (ir->return_deref->type == glsl_type::float_type) 1057 op = nir_intrinsic_shared_atomic_fmax; 1058 else 1059 unreachable("Invalid type"); 1060 break; 1061 case ir_intrinsic_shared_atomic_exchange: 1062 op = nir_intrinsic_shared_atomic_exchange; 1063 break; 1064 case ir_intrinsic_shared_atomic_comp_swap: 1065 op = ir->return_deref->type->is_integer_32_64() 1066 ? nir_intrinsic_shared_atomic_comp_swap 1067 : nir_intrinsic_shared_atomic_fcomp_swap; 1068 break; 1069 case ir_intrinsic_vote_any: 1070 op = nir_intrinsic_vote_any; 1071 break; 1072 case ir_intrinsic_vote_all: 1073 op = nir_intrinsic_vote_all; 1074 break; 1075 case ir_intrinsic_vote_eq: 1076 op = nir_intrinsic_vote_ieq; 1077 break; 1078 case ir_intrinsic_ballot: 1079 op = nir_intrinsic_ballot; 1080 break; 1081 case ir_intrinsic_read_invocation: 1082 op = nir_intrinsic_read_invocation; 1083 break; 1084 case ir_intrinsic_read_first_invocation: 1085 op = nir_intrinsic_read_first_invocation; 1086 break; 1087 default: 1088 unreachable("not reached"); 1089 } 1090 1091 nir_intrinsic_instr *instr = nir_intrinsic_instr_create(shader, op); 1092 nir_ssa_def *ret = &instr->dest.ssa; 1093 1094 switch (op) { 1095 case nir_intrinsic_deref_atomic_add: 1096 case nir_intrinsic_deref_atomic_imin: 1097 case nir_intrinsic_deref_atomic_umin: 1098 case nir_intrinsic_deref_atomic_imax: 1099 case nir_intrinsic_deref_atomic_umax: 1100 case nir_intrinsic_deref_atomic_and: 1101 case nir_intrinsic_deref_atomic_or: 1102 case nir_intrinsic_deref_atomic_xor: 1103 case nir_intrinsic_deref_atomic_exchange: 1104 case nir_intrinsic_deref_atomic_comp_swap: 1105 case nir_intrinsic_deref_atomic_fadd: 1106 case nir_intrinsic_deref_atomic_fmin: 1107 case nir_intrinsic_deref_atomic_fmax: 1108 case nir_intrinsic_deref_atomic_fcomp_swap: { 1109 int param_count = ir->actual_parameters.length(); 1110 assert(param_count == 2 || param_count == 3); 1111 1112 /* Deref */ 1113 exec_node *param = ir->actual_parameters.get_head(); 1114 ir_rvalue *rvalue = (ir_rvalue *) param; 1115 ir_dereference *deref = rvalue->as_dereference(); 1116 ir_swizzle *swizzle = NULL; 1117 if (!deref) { 1118 /* We may have a swizzle to pick off a single vec4 component */ 1119 swizzle = rvalue->as_swizzle(); 1120 assert(swizzle && swizzle->type->vector_elements == 1); 1121 deref = swizzle->val->as_dereference(); 1122 assert(deref); 1123 } 1124 nir_deref_instr *nir_deref = evaluate_deref(deref); 1125 if (swizzle) { 1126 nir_deref = nir_build_deref_array_imm(&b, nir_deref, 1127 swizzle->mask.x); 1128 } 1129 instr->src[0] = nir_src_for_ssa(&nir_deref->dest.ssa); 1130 1131 /* data1 parameter (this is always present) */ 1132 param = param->get_next(); 1133 ir_instruction *inst = (ir_instruction *) param; 1134 instr->src[1] = nir_src_for_ssa(evaluate_rvalue(inst->as_rvalue())); 1135 1136 /* data2 parameter (only with atomic_comp_swap) */ 1137 if (param_count == 3) { 1138 assert(op == nir_intrinsic_deref_atomic_comp_swap || 1139 op == nir_intrinsic_deref_atomic_fcomp_swap); 1140 param = param->get_next(); 1141 inst = (ir_instruction *) param; 1142 instr->src[2] = nir_src_for_ssa(evaluate_rvalue(inst->as_rvalue())); 1143 } 1144 1145 /* Atomic result */ 1146 assert(ir->return_deref); 1147 nir_ssa_dest_init(&instr->instr, &instr->dest, 1148 ir->return_deref->type->vector_elements, 32, NULL); 1149 nir_builder_instr_insert(&b, &instr->instr); 1150 break; 1151 } 1152 case nir_intrinsic_atomic_counter_read_deref: 1153 case nir_intrinsic_atomic_counter_inc_deref: 1154 case nir_intrinsic_atomic_counter_pre_dec_deref: 1155 case nir_intrinsic_atomic_counter_add_deref: 1156 case nir_intrinsic_atomic_counter_min_deref: 1157 case nir_intrinsic_atomic_counter_max_deref: 1158 case nir_intrinsic_atomic_counter_and_deref: 1159 case nir_intrinsic_atomic_counter_or_deref: 1160 case nir_intrinsic_atomic_counter_xor_deref: 1161 case nir_intrinsic_atomic_counter_exchange_deref: 1162 case nir_intrinsic_atomic_counter_comp_swap_deref: { 1163 /* Set the counter variable dereference. */ 1164 exec_node *param = ir->actual_parameters.get_head(); 1165 ir_dereference *counter = (ir_dereference *)param; 1166 1167 instr->src[0] = nir_src_for_ssa(&evaluate_deref(counter)->dest.ssa); 1168 param = param->get_next(); 1169 1170 /* Set the intrinsic destination. */ 1171 if (ir->return_deref) { 1172 nir_ssa_dest_init(&instr->instr, &instr->dest, 1, 32, NULL); 1173 } 1174 1175 /* Set the intrinsic parameters. */ 1176 if (!param->is_tail_sentinel()) { 1177 instr->src[1] = 1178 nir_src_for_ssa(evaluate_rvalue((ir_dereference *)param)); 1179 param = param->get_next(); 1180 } 1181 1182 if (!param->is_tail_sentinel()) { 1183 instr->src[2] = 1184 nir_src_for_ssa(evaluate_rvalue((ir_dereference *)param)); 1185 param = param->get_next(); 1186 } 1187 1188 nir_builder_instr_insert(&b, &instr->instr); 1189 break; 1190 } 1191 case nir_intrinsic_image_deref_load: 1192 case nir_intrinsic_image_deref_store: 1193 case nir_intrinsic_image_deref_atomic_add: 1194 case nir_intrinsic_image_deref_atomic_min: 1195 case nir_intrinsic_image_deref_atomic_max: 1196 case nir_intrinsic_image_deref_atomic_and: 1197 case nir_intrinsic_image_deref_atomic_or: 1198 case nir_intrinsic_image_deref_atomic_xor: 1199 case nir_intrinsic_image_deref_atomic_exchange: 1200 case nir_intrinsic_image_deref_atomic_comp_swap: 1201 case nir_intrinsic_image_deref_atomic_fadd: 1202 case nir_intrinsic_image_deref_samples: 1203 case nir_intrinsic_image_deref_size: { 1204 nir_ssa_undef_instr *instr_undef = 1205 nir_ssa_undef_instr_create(shader, 1, 32); 1206 nir_builder_instr_insert(&b, &instr_undef->instr); 1207 1208 /* Set the image variable dereference. */ 1209 exec_node *param = ir->actual_parameters.get_head(); 1210 ir_dereference *image = (ir_dereference *)param; 1211 nir_deref_instr *deref = evaluate_deref(image); 1212 const glsl_type *type = deref->type; 1213 1214 instr->src[0] = nir_src_for_ssa(&deref->dest.ssa); 1215 param = param->get_next(); 1216 1217 /* Set the intrinsic destination. */ 1218 if (ir->return_deref) { 1219 unsigned num_components = ir->return_deref->type->vector_elements; 1220 nir_ssa_dest_init(&instr->instr, &instr->dest, 1221 num_components, 32, NULL); 1222 } 1223 1224 if (op == nir_intrinsic_image_deref_size) { 1225 instr->num_components = instr->dest.ssa.num_components; 1226 } else if (op == nir_intrinsic_image_deref_load || 1227 op == nir_intrinsic_image_deref_store) { 1228 instr->num_components = 4; 1229 } 1230 1231 if (op == nir_intrinsic_image_deref_size || 1232 op == nir_intrinsic_image_deref_samples) { 1233 nir_builder_instr_insert(&b, &instr->instr); 1234 break; 1235 } 1236 1237 /* Set the address argument, extending the coordinate vector to four 1238 * components. 1239 */ 1240 nir_ssa_def *src_addr = 1241 evaluate_rvalue((ir_dereference *)param); 1242 nir_ssa_def *srcs[4]; 1243 1244 for (int i = 0; i < 4; i++) { 1245 if (i < type->coordinate_components()) 1246 srcs[i] = nir_channel(&b, src_addr, i); 1247 else 1248 srcs[i] = &instr_undef->def; 1249 } 1250 1251 instr->src[1] = nir_src_for_ssa(nir_vec(&b, srcs, 4)); 1252 param = param->get_next(); 1253 1254 /* Set the sample argument, which is undefined for single-sample 1255 * images. 1256 */ 1257 if (type->sampler_dimensionality == GLSL_SAMPLER_DIM_MS) { 1258 instr->src[2] = 1259 nir_src_for_ssa(evaluate_rvalue((ir_dereference *)param)); 1260 param = param->get_next(); 1261 } else { 1262 instr->src[2] = nir_src_for_ssa(&instr_undef->def); 1263 } 1264 1265 /* Set the intrinsic parameters. */ 1266 if (!param->is_tail_sentinel()) { 1267 instr->src[3] = 1268 nir_src_for_ssa(evaluate_rvalue((ir_dereference *)param)); 1269 param = param->get_next(); 1270 } 1271 1272 if (!param->is_tail_sentinel()) { 1273 instr->src[4] = 1274 nir_src_for_ssa(evaluate_rvalue((ir_dereference *)param)); 1275 param = param->get_next(); 1276 } 1277 nir_builder_instr_insert(&b, &instr->instr); 1278 break; 1279 } 1280 case nir_intrinsic_memory_barrier: 1281 case nir_intrinsic_group_memory_barrier: 1282 case nir_intrinsic_memory_barrier_atomic_counter: 1283 case nir_intrinsic_memory_barrier_buffer: 1284 case nir_intrinsic_memory_barrier_image: 1285 case nir_intrinsic_memory_barrier_shared: 1286 nir_builder_instr_insert(&b, &instr->instr); 1287 break; 1288 case nir_intrinsic_shader_clock: 1289 nir_ssa_dest_init(&instr->instr, &instr->dest, 2, 32, NULL); 1290 instr->num_components = 2; 1291 nir_builder_instr_insert(&b, &instr->instr); 1292 break; 1293 case nir_intrinsic_begin_invocation_interlock: 1294 nir_builder_instr_insert(&b, &instr->instr); 1295 break; 1296 case nir_intrinsic_end_invocation_interlock: 1297 nir_builder_instr_insert(&b, &instr->instr); 1298 break; 1299 case nir_intrinsic_store_ssbo: { 1300 exec_node *param = ir->actual_parameters.get_head(); 1301 ir_rvalue *block = ((ir_instruction *)param)->as_rvalue(); 1302 1303 param = param->get_next(); 1304 ir_rvalue *offset = ((ir_instruction *)param)->as_rvalue(); 1305 1306 param = param->get_next(); 1307 ir_rvalue *val = ((ir_instruction *)param)->as_rvalue(); 1308 1309 param = param->get_next(); 1310 ir_constant *write_mask = ((ir_instruction *)param)->as_constant(); 1311 assert(write_mask); 1312 1313 nir_ssa_def *nir_val = evaluate_rvalue(val); 1314 if (val->type->is_boolean()) 1315 nir_val = nir_b2i32(&b, nir_val); 1316 1317 instr->src[0] = nir_src_for_ssa(nir_val); 1318 instr->src[1] = nir_src_for_ssa(evaluate_rvalue(block)); 1319 instr->src[2] = nir_src_for_ssa(evaluate_rvalue(offset)); 1320 intrinsic_set_std430_align(instr, val->type); 1321 nir_intrinsic_set_write_mask(instr, write_mask->value.u[0]); 1322 instr->num_components = val->type->vector_elements; 1323 1324 nir_builder_instr_insert(&b, &instr->instr); 1325 break; 1326 } 1327 case nir_intrinsic_load_ssbo: { 1328 exec_node *param = ir->actual_parameters.get_head(); 1329 ir_rvalue *block = ((ir_instruction *)param)->as_rvalue(); 1330 1331 param = param->get_next(); 1332 ir_rvalue *offset = ((ir_instruction *)param)->as_rvalue(); 1333 1334 instr->src[0] = nir_src_for_ssa(evaluate_rvalue(block)); 1335 instr->src[1] = nir_src_for_ssa(evaluate_rvalue(offset)); 1336 1337 const glsl_type *type = ir->return_deref->var->type; 1338 instr->num_components = type->vector_elements; 1339 intrinsic_set_std430_align(instr, type); 1340 1341 /* Setup destination register */ 1342 unsigned bit_size = type->is_boolean() ? 32 : glsl_get_bit_size(type); 1343 nir_ssa_dest_init(&instr->instr, &instr->dest, 1344 type->vector_elements, bit_size, NULL); 1345 1346 /* Insert the created nir instruction now since in the case of boolean 1347 * result we will need to emit another instruction after it 1348 */ 1349 nir_builder_instr_insert(&b, &instr->instr); 1350 1351 /* 1352 * In SSBO/UBO's, a true boolean value is any non-zero value, but we 1353 * consider a true boolean to be ~0. Fix this up with a != 0 1354 * comparison. 1355 */ 1356 if (type->is_boolean()) 1357 ret = nir_i2b(&b, &instr->dest.ssa); 1358 break; 1359 } 1360 case nir_intrinsic_ssbo_atomic_add: 1361 case nir_intrinsic_ssbo_atomic_imin: 1362 case nir_intrinsic_ssbo_atomic_umin: 1363 case nir_intrinsic_ssbo_atomic_imax: 1364 case nir_intrinsic_ssbo_atomic_umax: 1365 case nir_intrinsic_ssbo_atomic_and: 1366 case nir_intrinsic_ssbo_atomic_or: 1367 case nir_intrinsic_ssbo_atomic_xor: 1368 case nir_intrinsic_ssbo_atomic_exchange: 1369 case nir_intrinsic_ssbo_atomic_comp_swap: 1370 case nir_intrinsic_ssbo_atomic_fadd: 1371 case nir_intrinsic_ssbo_atomic_fmin: 1372 case nir_intrinsic_ssbo_atomic_fmax: 1373 case nir_intrinsic_ssbo_atomic_fcomp_swap: { 1374 int param_count = ir->actual_parameters.length(); 1375 assert(param_count == 3 || param_count == 4); 1376 1377 /* Block index */ 1378 exec_node *param = ir->actual_parameters.get_head(); 1379 ir_instruction *inst = (ir_instruction *) param; 1380 instr->src[0] = nir_src_for_ssa(evaluate_rvalue(inst->as_rvalue())); 1381 1382 /* Offset */ 1383 param = param->get_next(); 1384 inst = (ir_instruction *) param; 1385 instr->src[1] = nir_src_for_ssa(evaluate_rvalue(inst->as_rvalue())); 1386 1387 /* data1 parameter (this is always present) */ 1388 param = param->get_next(); 1389 inst = (ir_instruction *) param; 1390 instr->src[2] = nir_src_for_ssa(evaluate_rvalue(inst->as_rvalue())); 1391 1392 /* data2 parameter (only with atomic_comp_swap) */ 1393 if (param_count == 4) { 1394 assert(op == nir_intrinsic_ssbo_atomic_comp_swap || 1395 op == nir_intrinsic_ssbo_atomic_fcomp_swap); 1396 param = param->get_next(); 1397 inst = (ir_instruction *) param; 1398 instr->src[3] = nir_src_for_ssa(evaluate_rvalue(inst->as_rvalue())); 1399 } 1400 1401 /* Atomic result */ 1402 assert(ir->return_deref); 1403 nir_ssa_dest_init(&instr->instr, &instr->dest, 1404 ir->return_deref->type->vector_elements, 32, NULL); 1405 nir_builder_instr_insert(&b, &instr->instr); 1406 break; 1407 } 1408 case nir_intrinsic_load_shared: { 1409 exec_node *param = ir->actual_parameters.get_head(); 1410 ir_rvalue *offset = ((ir_instruction *)param)->as_rvalue(); 1411 1412 nir_intrinsic_set_base(instr, 0); 1413 instr->src[0] = nir_src_for_ssa(evaluate_rvalue(offset)); 1414 1415 const glsl_type *type = ir->return_deref->var->type; 1416 instr->num_components = type->vector_elements; 1417 intrinsic_set_std430_align(instr, type); 1418 1419 /* Setup destination register */ 1420 unsigned bit_size = type->is_boolean() ? 32 : glsl_get_bit_size(type); 1421 nir_ssa_dest_init(&instr->instr, &instr->dest, 1422 type->vector_elements, bit_size, NULL); 1423 1424 nir_builder_instr_insert(&b, &instr->instr); 1425 1426 /* The value in shared memory is a 32-bit value */ 1427 if (type->is_boolean()) 1428 ret = nir_i2b(&b, &instr->dest.ssa); 1429 break; 1430 } 1431 case nir_intrinsic_store_shared: { 1432 exec_node *param = ir->actual_parameters.get_head(); 1433 ir_rvalue *offset = ((ir_instruction *)param)->as_rvalue(); 1434 1435 param = param->get_next(); 1436 ir_rvalue *val = ((ir_instruction *)param)->as_rvalue(); 1437 1438 param = param->get_next(); 1439 ir_constant *write_mask = ((ir_instruction *)param)->as_constant(); 1440 assert(write_mask); 1441 1442 nir_intrinsic_set_base(instr, 0); 1443 instr->src[1] = nir_src_for_ssa(evaluate_rvalue(offset)); 1444 1445 nir_intrinsic_set_write_mask(instr, write_mask->value.u[0]); 1446 1447 nir_ssa_def *nir_val = evaluate_rvalue(val); 1448 /* The value in shared memory is a 32-bit value */ 1449 if (val->type->is_boolean()) 1450 nir_val = nir_b2i32(&b, nir_val); 1451 1452 instr->src[0] = nir_src_for_ssa(nir_val); 1453 instr->num_components = val->type->vector_elements; 1454 intrinsic_set_std430_align(instr, val->type); 1455 1456 nir_builder_instr_insert(&b, &instr->instr); 1457 break; 1458 } 1459 case nir_intrinsic_shared_atomic_add: 1460 case nir_intrinsic_shared_atomic_imin: 1461 case nir_intrinsic_shared_atomic_umin: 1462 case nir_intrinsic_shared_atomic_imax: 1463 case nir_intrinsic_shared_atomic_umax: 1464 case nir_intrinsic_shared_atomic_and: 1465 case nir_intrinsic_shared_atomic_or: 1466 case nir_intrinsic_shared_atomic_xor: 1467 case nir_intrinsic_shared_atomic_exchange: 1468 case nir_intrinsic_shared_atomic_comp_swap: 1469 case nir_intrinsic_shared_atomic_fadd: 1470 case nir_intrinsic_shared_atomic_fmin: 1471 case nir_intrinsic_shared_atomic_fmax: 1472 case nir_intrinsic_shared_atomic_fcomp_swap: { 1473 int param_count = ir->actual_parameters.length(); 1474 assert(param_count == 2 || param_count == 3); 1475 1476 /* Offset */ 1477 exec_node *param = ir->actual_parameters.get_head(); 1478 ir_instruction *inst = (ir_instruction *) param; 1479 instr->src[0] = nir_src_for_ssa(evaluate_rvalue(inst->as_rvalue())); 1480 1481 /* data1 parameter (this is always present) */ 1482 param = param->get_next(); 1483 inst = (ir_instruction *) param; 1484 instr->src[1] = nir_src_for_ssa(evaluate_rvalue(inst->as_rvalue())); 1485 1486 /* data2 parameter (only with atomic_comp_swap) */ 1487 if (param_count == 3) { 1488 assert(op == nir_intrinsic_shared_atomic_comp_swap || 1489 op == nir_intrinsic_shared_atomic_fcomp_swap); 1490 param = param->get_next(); 1491 inst = (ir_instruction *) param; 1492 instr->src[2] = 1493 nir_src_for_ssa(evaluate_rvalue(inst->as_rvalue())); 1494 } 1495 1496 /* Atomic result */ 1497 assert(ir->return_deref); 1498 unsigned bit_size = glsl_get_bit_size(ir->return_deref->type); 1499 nir_ssa_dest_init(&instr->instr, &instr->dest, 1500 ir->return_deref->type->vector_elements, 1501 bit_size, NULL); 1502 nir_builder_instr_insert(&b, &instr->instr); 1503 break; 1504 } 1505 case nir_intrinsic_vote_any: 1506 case nir_intrinsic_vote_all: 1507 case nir_intrinsic_vote_ieq: { 1508 nir_ssa_dest_init(&instr->instr, &instr->dest, 1, 1, NULL); 1509 instr->num_components = 1; 1510 1511 ir_rvalue *value = (ir_rvalue *) ir->actual_parameters.get_head(); 1512 instr->src[0] = nir_src_for_ssa(evaluate_rvalue(value)); 1513 1514 nir_builder_instr_insert(&b, &instr->instr); 1515 break; 1516 } 1517 1518 case nir_intrinsic_ballot: { 1519 nir_ssa_dest_init(&instr->instr, &instr->dest, 1520 ir->return_deref->type->vector_elements, 64, NULL); 1521 instr->num_components = ir->return_deref->type->vector_elements; 1522 1523 ir_rvalue *value = (ir_rvalue *) ir->actual_parameters.get_head(); 1524 instr->src[0] = nir_src_for_ssa(evaluate_rvalue(value)); 1525 1526 nir_builder_instr_insert(&b, &instr->instr); 1527 break; 1528 } 1529 case nir_intrinsic_read_invocation: { 1530 nir_ssa_dest_init(&instr->instr, &instr->dest, 1531 ir->return_deref->type->vector_elements, 32, NULL); 1532 instr->num_components = ir->return_deref->type->vector_elements; 1533 1534 ir_rvalue *value = (ir_rvalue *) ir->actual_parameters.get_head(); 1535 instr->src[0] = nir_src_for_ssa(evaluate_rvalue(value)); 1536 1537 ir_rvalue *invocation = (ir_rvalue *) ir->actual_parameters.get_head()->next; 1538 instr->src[1] = nir_src_for_ssa(evaluate_rvalue(invocation)); 1539 1540 nir_builder_instr_insert(&b, &instr->instr); 1541 break; 1542 } 1543 case nir_intrinsic_read_first_invocation: { 1544 nir_ssa_dest_init(&instr->instr, &instr->dest, 1545 ir->return_deref->type->vector_elements, 32, NULL); 1546 instr->num_components = ir->return_deref->type->vector_elements; 1547 1548 ir_rvalue *value = (ir_rvalue *) ir->actual_parameters.get_head(); 1549 instr->src[0] = nir_src_for_ssa(evaluate_rvalue(value)); 1550 1551 nir_builder_instr_insert(&b, &instr->instr); 1552 break; 1553 } 1554 default: 1555 unreachable("not reached"); 1556 } 1557 1558 if (ir->return_deref) 1559 nir_store_deref(&b, evaluate_deref(ir->return_deref), ret, ~0); 1560 1561 return; 1562 } 1563 1564 struct hash_entry *entry = 1565 _mesa_hash_table_search(this->overload_table, ir->callee); 1566 assert(entry); 1567 nir_function *callee = (nir_function *) entry->data; 1568 1569 nir_call_instr *call = nir_call_instr_create(this->shader, callee); 1570 1571 unsigned i = 0; 1572 nir_deref_instr *ret_deref = NULL; 1573 if (ir->return_deref) { 1574 nir_variable *ret_tmp = 1575 nir_local_variable_create(this->impl, ir->return_deref->type, 1576 "return_tmp"); 1577 ret_deref = nir_build_deref_var(&b, ret_tmp); 1578 call->params[i++] = nir_src_for_ssa(&ret_deref->dest.ssa); 1579 } 1580 1581 foreach_two_lists(formal_node, &ir->callee->parameters, 1582 actual_node, &ir->actual_parameters) { 1583 ir_rvalue *param_rvalue = (ir_rvalue *) actual_node; 1584 ir_variable *sig_param = (ir_variable *) formal_node; 1585 1586 if (sig_param->data.mode == ir_var_function_out) { 1587 nir_deref_instr *out_deref = evaluate_deref(param_rvalue); 1588 call->params[i] = nir_src_for_ssa(&out_deref->dest.ssa); 1589 } else if (sig_param->data.mode == ir_var_function_in) { 1590 nir_ssa_def *val = evaluate_rvalue(param_rvalue); 1591 nir_src src = nir_src_for_ssa(val); 1592 1593 nir_src_copy(&call->params[i], &src, call); 1594 } else if (sig_param->data.mode == ir_var_function_inout) { 1595 unreachable("unimplemented: inout parameters"); 1596 } 1597 1598 i++; 1599 } 1600 1601 nir_builder_instr_insert(&b, &call->instr); 1602 1603 if (ir->return_deref) 1604 nir_store_deref(&b, evaluate_deref(ir->return_deref), nir_load_deref(&b, ret_deref), ~0); 1605} 1606 1607void 1608nir_visitor::visit(ir_assignment *ir) 1609{ 1610 unsigned num_components = ir->lhs->type->vector_elements; 1611 1612 b.exact = ir->lhs->variable_referenced()->data.invariant || 1613 ir->lhs->variable_referenced()->data.precise; 1614 1615 if ((ir->rhs->as_dereference() || ir->rhs->as_constant()) && 1616 (ir->write_mask == (1 << num_components) - 1 || ir->write_mask == 0)) { 1617 if (ir->condition) { 1618 nir_push_if(&b, evaluate_rvalue(ir->condition)); 1619 nir_copy_deref(&b, evaluate_deref(ir->lhs), evaluate_deref(ir->rhs)); 1620 nir_pop_if(&b, NULL); 1621 } else { 1622 nir_copy_deref(&b, evaluate_deref(ir->lhs), evaluate_deref(ir->rhs)); 1623 } 1624 return; 1625 } 1626 1627 assert(ir->rhs->type->is_scalar() || ir->rhs->type->is_vector()); 1628 1629 ir->lhs->accept(this); 1630 nir_deref_instr *lhs_deref = this->deref; 1631 nir_ssa_def *src = evaluate_rvalue(ir->rhs); 1632 1633 if (ir->write_mask != (1 << num_components) - 1 && ir->write_mask != 0) { 1634 /* GLSL IR will give us the input to the write-masked assignment in a 1635 * single packed vector. So, for example, if the writemask is xzw, then 1636 * we have to swizzle x -> x, y -> z, and z -> w and get the y component 1637 * from the load. 1638 */ 1639 unsigned swiz[4]; 1640 unsigned component = 0; 1641 for (unsigned i = 0; i < 4; i++) { 1642 swiz[i] = ir->write_mask & (1 << i) ? component++ : 0; 1643 } 1644 src = nir_swizzle(&b, src, swiz, num_components, false); 1645 } 1646 1647 if (ir->condition) { 1648 nir_push_if(&b, evaluate_rvalue(ir->condition)); 1649 nir_store_deref(&b, lhs_deref, src, ir->write_mask); 1650 nir_pop_if(&b, NULL); 1651 } else { 1652 nir_store_deref(&b, lhs_deref, src, ir->write_mask); 1653 } 1654} 1655 1656/* 1657 * Given an instruction, returns a pointer to its destination or NULL if there 1658 * is no destination. 1659 * 1660 * Note that this only handles instructions we generate at this level. 1661 */ 1662static nir_dest * 1663get_instr_dest(nir_instr *instr) 1664{ 1665 nir_alu_instr *alu_instr; 1666 nir_intrinsic_instr *intrinsic_instr; 1667 nir_tex_instr *tex_instr; 1668 1669 switch (instr->type) { 1670 case nir_instr_type_alu: 1671 alu_instr = nir_instr_as_alu(instr); 1672 return &alu_instr->dest.dest; 1673 1674 case nir_instr_type_intrinsic: 1675 intrinsic_instr = nir_instr_as_intrinsic(instr); 1676 if (nir_intrinsic_infos[intrinsic_instr->intrinsic].has_dest) 1677 return &intrinsic_instr->dest; 1678 else 1679 return NULL; 1680 1681 case nir_instr_type_tex: 1682 tex_instr = nir_instr_as_tex(instr); 1683 return &tex_instr->dest; 1684 1685 default: 1686 unreachable("not reached"); 1687 } 1688 1689 return NULL; 1690} 1691 1692void 1693nir_visitor::add_instr(nir_instr *instr, unsigned num_components, 1694 unsigned bit_size) 1695{ 1696 nir_dest *dest = get_instr_dest(instr); 1697 1698 if (dest) 1699 nir_ssa_dest_init(instr, dest, num_components, bit_size, NULL); 1700 1701 nir_builder_instr_insert(&b, instr); 1702 1703 if (dest) { 1704 assert(dest->is_ssa); 1705 this->result = &dest->ssa; 1706 } 1707} 1708 1709nir_ssa_def * 1710nir_visitor::evaluate_rvalue(ir_rvalue* ir) 1711{ 1712 ir->accept(this); 1713 if (ir->as_dereference() || ir->as_constant()) { 1714 /* 1715 * A dereference is being used on the right hand side, which means we 1716 * must emit a variable load. 1717 */ 1718 1719 this->result = nir_load_deref(&b, this->deref); 1720 } 1721 1722 return this->result; 1723} 1724 1725static bool 1726type_is_float(glsl_base_type type) 1727{ 1728 return type == GLSL_TYPE_FLOAT || type == GLSL_TYPE_DOUBLE || 1729 type == GLSL_TYPE_FLOAT16; 1730} 1731 1732static bool 1733type_is_signed(glsl_base_type type) 1734{ 1735 return type == GLSL_TYPE_INT || type == GLSL_TYPE_INT64 || 1736 type == GLSL_TYPE_INT16; 1737} 1738 1739static bool 1740type_is_int(glsl_base_type type) 1741{ 1742 return type == GLSL_TYPE_UINT || type == GLSL_TYPE_INT || 1743 type == GLSL_TYPE_UINT8 || type == GLSL_TYPE_INT8 || 1744 type == GLSL_TYPE_UINT16 || type == GLSL_TYPE_INT16 || 1745 type == GLSL_TYPE_UINT64 || type == GLSL_TYPE_INT64; 1746} 1747 1748void 1749nir_visitor::visit(ir_expression *ir) 1750{ 1751 /* Some special cases */ 1752 switch (ir->operation) { 1753 case ir_binop_ubo_load: { 1754 nir_intrinsic_instr *load = 1755 nir_intrinsic_instr_create(this->shader, nir_intrinsic_load_ubo); 1756 unsigned bit_size = ir->type->is_boolean() ? 32 : 1757 glsl_get_bit_size(ir->type); 1758 load->num_components = ir->type->vector_elements; 1759 load->src[0] = nir_src_for_ssa(evaluate_rvalue(ir->operands[0])); 1760 load->src[1] = nir_src_for_ssa(evaluate_rvalue(ir->operands[1])); 1761 intrinsic_set_std430_align(load, ir->type); 1762 add_instr(&load->instr, ir->type->vector_elements, bit_size); 1763 1764 /* 1765 * In UBO's, a true boolean value is any non-zero value, but we consider 1766 * a true boolean to be ~0. Fix this up with a != 0 comparison. 1767 */ 1768 1769 if (ir->type->is_boolean()) 1770 this->result = nir_i2b(&b, &load->dest.ssa); 1771 1772 return; 1773 } 1774 1775 case ir_unop_interpolate_at_centroid: 1776 case ir_binop_interpolate_at_offset: 1777 case ir_binop_interpolate_at_sample: { 1778 ir_dereference *deref = ir->operands[0]->as_dereference(); 1779 ir_swizzle *swizzle = NULL; 1780 if (!deref) { 1781 /* the api does not allow a swizzle here, but the varying packing code 1782 * may have pushed one into here. 1783 */ 1784 swizzle = ir->operands[0]->as_swizzle(); 1785 assert(swizzle); 1786 deref = swizzle->val->as_dereference(); 1787 assert(deref); 1788 } 1789 1790 deref->accept(this); 1791 1792 nir_intrinsic_op op; 1793 if (this->deref->mode == nir_var_shader_in) { 1794 switch (ir->operation) { 1795 case ir_unop_interpolate_at_centroid: 1796 op = nir_intrinsic_interp_deref_at_centroid; 1797 break; 1798 case ir_binop_interpolate_at_offset: 1799 op = nir_intrinsic_interp_deref_at_offset; 1800 break; 1801 case ir_binop_interpolate_at_sample: 1802 op = nir_intrinsic_interp_deref_at_sample; 1803 break; 1804 default: 1805 unreachable("Invalid interpolation intrinsic"); 1806 } 1807 } else { 1808 /* This case can happen if the vertex shader does not write the 1809 * given varying. In this case, the linker will lower it to a 1810 * global variable. Since interpolating a variable makes no 1811 * sense, we'll just turn it into a load which will probably 1812 * eventually end up as an SSA definition. 1813 */ 1814 assert(this->deref->mode == nir_var_shader_temp); 1815 op = nir_intrinsic_load_deref; 1816 } 1817 1818 nir_intrinsic_instr *intrin = nir_intrinsic_instr_create(shader, op); 1819 intrin->num_components = deref->type->vector_elements; 1820 intrin->src[0] = nir_src_for_ssa(&this->deref->dest.ssa); 1821 1822 if (intrin->intrinsic == nir_intrinsic_interp_deref_at_offset || 1823 intrin->intrinsic == nir_intrinsic_interp_deref_at_sample) 1824 intrin->src[1] = nir_src_for_ssa(evaluate_rvalue(ir->operands[1])); 1825 1826 unsigned bit_size = glsl_get_bit_size(deref->type); 1827 add_instr(&intrin->instr, deref->type->vector_elements, bit_size); 1828 1829 if (swizzle) { 1830 unsigned swiz[4] = { 1831 swizzle->mask.x, swizzle->mask.y, swizzle->mask.z, swizzle->mask.w 1832 }; 1833 1834 result = nir_swizzle(&b, result, swiz, 1835 swizzle->type->vector_elements, false); 1836 } 1837 1838 return; 1839 } 1840 1841 case ir_unop_ssbo_unsized_array_length: { 1842 nir_intrinsic_instr *intrin = 1843 nir_intrinsic_instr_create(b.shader, 1844 nir_intrinsic_deref_buffer_array_length); 1845 1846 ir_dereference *deref = ir->operands[0]->as_dereference(); 1847 intrin->src[0] = nir_src_for_ssa(&evaluate_deref(deref)->dest.ssa); 1848 1849 add_instr(&intrin->instr, 1, 32); 1850 return; 1851 } 1852 1853 default: 1854 break; 1855 } 1856 1857 nir_ssa_def *srcs[4]; 1858 for (unsigned i = 0; i < ir->num_operands; i++) 1859 srcs[i] = evaluate_rvalue(ir->operands[i]); 1860 1861 glsl_base_type types[4]; 1862 for (unsigned i = 0; i < ir->num_operands; i++) 1863 if (supports_ints || !type_is_int(ir->operands[i]->type->base_type)) 1864 types[i] = ir->operands[i]->type->base_type; 1865 else 1866 types[i] = GLSL_TYPE_FLOAT; 1867 1868 glsl_base_type out_type; 1869 if (supports_ints || !type_is_int(ir->type->base_type)) 1870 out_type = ir->type->base_type; 1871 else 1872 out_type = GLSL_TYPE_FLOAT; 1873 1874 switch (ir->operation) { 1875 case ir_unop_bit_not: result = nir_inot(&b, srcs[0]); break; 1876 case ir_unop_logic_not: 1877 result = nir_inot(&b, srcs[0]); 1878 break; 1879 case ir_unop_neg: 1880 result = type_is_float(types[0]) ? nir_fneg(&b, srcs[0]) 1881 : nir_ineg(&b, srcs[0]); 1882 break; 1883 case ir_unop_abs: 1884 result = type_is_float(types[0]) ? nir_fabs(&b, srcs[0]) 1885 : nir_iabs(&b, srcs[0]); 1886 break; 1887 case ir_unop_saturate: 1888 assert(type_is_float(types[0])); 1889 result = nir_fsat(&b, srcs[0]); 1890 break; 1891 case ir_unop_sign: 1892 result = type_is_float(types[0]) ? nir_fsign(&b, srcs[0]) 1893 : nir_isign(&b, srcs[0]); 1894 break; 1895 case ir_unop_rcp: result = nir_frcp(&b, srcs[0]); break; 1896 case ir_unop_rsq: result = nir_frsq(&b, srcs[0]); break; 1897 case ir_unop_sqrt: result = nir_fsqrt(&b, srcs[0]); break; 1898 case ir_unop_exp: unreachable("ir_unop_exp should have been lowered"); 1899 case ir_unop_log: unreachable("ir_unop_log should have been lowered"); 1900 case ir_unop_exp2: result = nir_fexp2(&b, srcs[0]); break; 1901 case ir_unop_log2: result = nir_flog2(&b, srcs[0]); break; 1902 case ir_unop_i2f: 1903 result = supports_ints ? nir_i2f32(&b, srcs[0]) : nir_fmov(&b, srcs[0]); 1904 break; 1905 case ir_unop_u2f: 1906 result = supports_ints ? nir_u2f32(&b, srcs[0]) : nir_fmov(&b, srcs[0]); 1907 break; 1908 case ir_unop_b2f: 1909 result = nir_b2f32(&b, srcs[0]); 1910 break; 1911 case ir_unop_f2i: 1912 result = supports_ints ? nir_f2i32(&b, srcs[0]) : nir_ftrunc(&b, srcs[0]); 1913 break; 1914 case ir_unop_f2u: 1915 result = supports_ints ? nir_f2u32(&b, srcs[0]) : nir_ftrunc(&b, srcs[0]); 1916 break; 1917 case ir_unop_f2b: 1918 case ir_unop_i2b: 1919 case ir_unop_b2i: 1920 case ir_unop_b2i64: 1921 case ir_unop_d2f: 1922 case ir_unop_f2d: 1923 case ir_unop_d2i: 1924 case ir_unop_d2u: 1925 case ir_unop_d2b: 1926 case ir_unop_i2d: 1927 case ir_unop_u2d: 1928 case ir_unop_i642i: 1929 case ir_unop_i642u: 1930 case ir_unop_i642f: 1931 case ir_unop_i642b: 1932 case ir_unop_i642d: 1933 case ir_unop_u642i: 1934 case ir_unop_u642u: 1935 case ir_unop_u642f: 1936 case ir_unop_u642d: 1937 case ir_unop_i2i64: 1938 case ir_unop_u2i64: 1939 case ir_unop_f2i64: 1940 case ir_unop_d2i64: 1941 case ir_unop_i2u64: 1942 case ir_unop_u2u64: 1943 case ir_unop_f2u64: 1944 case ir_unop_d2u64: 1945 case ir_unop_i2u: 1946 case ir_unop_u2i: 1947 case ir_unop_i642u64: 1948 case ir_unop_u642i64: { 1949 nir_alu_type src_type = nir_get_nir_type_for_glsl_base_type(types[0]); 1950 nir_alu_type dst_type = nir_get_nir_type_for_glsl_base_type(out_type); 1951 result = nir_build_alu(&b, nir_type_conversion_op(src_type, dst_type, 1952 nir_rounding_mode_undef), 1953 srcs[0], NULL, NULL, NULL); 1954 /* b2i and b2f don't have fixed bit-size versions so the builder will 1955 * just assume 32 and we have to fix it up here. 1956 */ 1957 result->bit_size = nir_alu_type_get_type_size(dst_type); 1958 break; 1959 } 1960 1961 case ir_unop_bitcast_i2f: 1962 case ir_unop_bitcast_f2i: 1963 case ir_unop_bitcast_u2f: 1964 case ir_unop_bitcast_f2u: 1965 case ir_unop_bitcast_i642d: 1966 case ir_unop_bitcast_d2i64: 1967 case ir_unop_bitcast_u642d: 1968 case ir_unop_bitcast_d2u64: 1969 case ir_unop_subroutine_to_int: 1970 /* no-op */ 1971 result = nir_imov(&b, srcs[0]); 1972 break; 1973 case ir_unop_trunc: result = nir_ftrunc(&b, srcs[0]); break; 1974 case ir_unop_ceil: result = nir_fceil(&b, srcs[0]); break; 1975 case ir_unop_floor: result = nir_ffloor(&b, srcs[0]); break; 1976 case ir_unop_fract: result = nir_ffract(&b, srcs[0]); break; 1977 case ir_unop_frexp_exp: result = nir_frexp_exp(&b, srcs[0]); break; 1978 case ir_unop_frexp_sig: result = nir_frexp_sig(&b, srcs[0]); break; 1979 case ir_unop_round_even: result = nir_fround_even(&b, srcs[0]); break; 1980 case ir_unop_sin: result = nir_fsin(&b, srcs[0]); break; 1981 case ir_unop_cos: result = nir_fcos(&b, srcs[0]); break; 1982 case ir_unop_dFdx: result = nir_fddx(&b, srcs[0]); break; 1983 case ir_unop_dFdy: result = nir_fddy(&b, srcs[0]); break; 1984 case ir_unop_dFdx_fine: result = nir_fddx_fine(&b, srcs[0]); break; 1985 case ir_unop_dFdy_fine: result = nir_fddy_fine(&b, srcs[0]); break; 1986 case ir_unop_dFdx_coarse: result = nir_fddx_coarse(&b, srcs[0]); break; 1987 case ir_unop_dFdy_coarse: result = nir_fddy_coarse(&b, srcs[0]); break; 1988 case ir_unop_pack_snorm_2x16: 1989 result = nir_pack_snorm_2x16(&b, srcs[0]); 1990 break; 1991 case ir_unop_pack_snorm_4x8: 1992 result = nir_pack_snorm_4x8(&b, srcs[0]); 1993 break; 1994 case ir_unop_pack_unorm_2x16: 1995 result = nir_pack_unorm_2x16(&b, srcs[0]); 1996 break; 1997 case ir_unop_pack_unorm_4x8: 1998 result = nir_pack_unorm_4x8(&b, srcs[0]); 1999 break; 2000 case ir_unop_pack_half_2x16: 2001 result = nir_pack_half_2x16(&b, srcs[0]); 2002 break; 2003 case ir_unop_unpack_snorm_2x16: 2004 result = nir_unpack_snorm_2x16(&b, srcs[0]); 2005 break; 2006 case ir_unop_unpack_snorm_4x8: 2007 result = nir_unpack_snorm_4x8(&b, srcs[0]); 2008 break; 2009 case ir_unop_unpack_unorm_2x16: 2010 result = nir_unpack_unorm_2x16(&b, srcs[0]); 2011 break; 2012 case ir_unop_unpack_unorm_4x8: 2013 result = nir_unpack_unorm_4x8(&b, srcs[0]); 2014 break; 2015 case ir_unop_unpack_half_2x16: 2016 result = nir_unpack_half_2x16(&b, srcs[0]); 2017 break; 2018 case ir_unop_pack_sampler_2x32: 2019 case ir_unop_pack_image_2x32: 2020 case ir_unop_pack_double_2x32: 2021 case ir_unop_pack_int_2x32: 2022 case ir_unop_pack_uint_2x32: 2023 result = nir_pack_64_2x32(&b, srcs[0]); 2024 break; 2025 case ir_unop_unpack_sampler_2x32: 2026 case ir_unop_unpack_image_2x32: 2027 case ir_unop_unpack_double_2x32: 2028 case ir_unop_unpack_int_2x32: 2029 case ir_unop_unpack_uint_2x32: 2030 result = nir_unpack_64_2x32(&b, srcs[0]); 2031 break; 2032 case ir_unop_bitfield_reverse: 2033 result = nir_bitfield_reverse(&b, srcs[0]); 2034 break; 2035 case ir_unop_bit_count: 2036 result = nir_bit_count(&b, srcs[0]); 2037 break; 2038 case ir_unop_find_msb: 2039 switch (types[0]) { 2040 case GLSL_TYPE_UINT: 2041 result = nir_ufind_msb(&b, srcs[0]); 2042 break; 2043 case GLSL_TYPE_INT: 2044 result = nir_ifind_msb(&b, srcs[0]); 2045 break; 2046 default: 2047 unreachable("Invalid type for findMSB()"); 2048 } 2049 break; 2050 case ir_unop_find_lsb: 2051 result = nir_find_lsb(&b, srcs[0]); 2052 break; 2053 2054 case ir_unop_noise: 2055 switch (ir->type->vector_elements) { 2056 case 1: 2057 switch (ir->operands[0]->type->vector_elements) { 2058 case 1: result = nir_fnoise1_1(&b, srcs[0]); break; 2059 case 2: result = nir_fnoise1_2(&b, srcs[0]); break; 2060 case 3: result = nir_fnoise1_3(&b, srcs[0]); break; 2061 case 4: result = nir_fnoise1_4(&b, srcs[0]); break; 2062 default: unreachable("not reached"); 2063 } 2064 break; 2065 case 2: 2066 switch (ir->operands[0]->type->vector_elements) { 2067 case 1: result = nir_fnoise2_1(&b, srcs[0]); break; 2068 case 2: result = nir_fnoise2_2(&b, srcs[0]); break; 2069 case 3: result = nir_fnoise2_3(&b, srcs[0]); break; 2070 case 4: result = nir_fnoise2_4(&b, srcs[0]); break; 2071 default: unreachable("not reached"); 2072 } 2073 break; 2074 case 3: 2075 switch (ir->operands[0]->type->vector_elements) { 2076 case 1: result = nir_fnoise3_1(&b, srcs[0]); break; 2077 case 2: result = nir_fnoise3_2(&b, srcs[0]); break; 2078 case 3: result = nir_fnoise3_3(&b, srcs[0]); break; 2079 case 4: result = nir_fnoise3_4(&b, srcs[0]); break; 2080 default: unreachable("not reached"); 2081 } 2082 break; 2083 case 4: 2084 switch (ir->operands[0]->type->vector_elements) { 2085 case 1: result = nir_fnoise4_1(&b, srcs[0]); break; 2086 case 2: result = nir_fnoise4_2(&b, srcs[0]); break; 2087 case 3: result = nir_fnoise4_3(&b, srcs[0]); break; 2088 case 4: result = nir_fnoise4_4(&b, srcs[0]); break; 2089 default: unreachable("not reached"); 2090 } 2091 break; 2092 default: 2093 unreachable("not reached"); 2094 } 2095 break; 2096 case ir_unop_get_buffer_size: { 2097 nir_intrinsic_instr *load = nir_intrinsic_instr_create( 2098 this->shader, 2099 nir_intrinsic_get_buffer_size); 2100 load->num_components = ir->type->vector_elements; 2101 load->src[0] = nir_src_for_ssa(evaluate_rvalue(ir->operands[0])); 2102 unsigned bit_size = glsl_get_bit_size(ir->type); 2103 add_instr(&load->instr, ir->type->vector_elements, bit_size); 2104 return; 2105 } 2106 2107 case ir_binop_add: 2108 result = type_is_float(out_type) ? nir_fadd(&b, srcs[0], srcs[1]) 2109 : nir_iadd(&b, srcs[0], srcs[1]); 2110 break; 2111 case ir_binop_sub: 2112 result = type_is_float(out_type) ? nir_fsub(&b, srcs[0], srcs[1]) 2113 : nir_isub(&b, srcs[0], srcs[1]); 2114 break; 2115 case ir_binop_mul: 2116 if (type_is_float(out_type)) 2117 result = nir_fmul(&b, srcs[0], srcs[1]); 2118 else if (out_type == GLSL_TYPE_INT64 && 2119 (ir->operands[0]->type->base_type == GLSL_TYPE_INT || 2120 ir->operands[1]->type->base_type == GLSL_TYPE_INT)) 2121 result = nir_imul_2x32_64(&b, srcs[0], srcs[1]); 2122 else if (out_type == GLSL_TYPE_UINT64 && 2123 (ir->operands[0]->type->base_type == GLSL_TYPE_UINT || 2124 ir->operands[1]->type->base_type == GLSL_TYPE_UINT)) 2125 result = nir_umul_2x32_64(&b, srcs[0], srcs[1]); 2126 else 2127 result = nir_imul(&b, srcs[0], srcs[1]); 2128 break; 2129 case ir_binop_div: 2130 if (type_is_float(out_type)) 2131 result = nir_fdiv(&b, srcs[0], srcs[1]); 2132 else if (type_is_signed(out_type)) 2133 result = nir_idiv(&b, srcs[0], srcs[1]); 2134 else 2135 result = nir_udiv(&b, srcs[0], srcs[1]); 2136 break; 2137 case ir_binop_mod: 2138 result = type_is_float(out_type) ? nir_fmod(&b, srcs[0], srcs[1]) 2139 : nir_umod(&b, srcs[0], srcs[1]); 2140 break; 2141 case ir_binop_min: 2142 if (type_is_float(out_type)) 2143 result = nir_fmin(&b, srcs[0], srcs[1]); 2144 else if (type_is_signed(out_type)) 2145 result = nir_imin(&b, srcs[0], srcs[1]); 2146 else 2147 result = nir_umin(&b, srcs[0], srcs[1]); 2148 break; 2149 case ir_binop_max: 2150 if (type_is_float(out_type)) 2151 result = nir_fmax(&b, srcs[0], srcs[1]); 2152 else if (type_is_signed(out_type)) 2153 result = nir_imax(&b, srcs[0], srcs[1]); 2154 else 2155 result = nir_umax(&b, srcs[0], srcs[1]); 2156 break; 2157 case ir_binop_pow: result = nir_fpow(&b, srcs[0], srcs[1]); break; 2158 case ir_binop_bit_and: result = nir_iand(&b, srcs[0], srcs[1]); break; 2159 case ir_binop_bit_or: result = nir_ior(&b, srcs[0], srcs[1]); break; 2160 case ir_binop_bit_xor: result = nir_ixor(&b, srcs[0], srcs[1]); break; 2161 case ir_binop_logic_and: 2162 result = nir_iand(&b, srcs[0], srcs[1]); 2163 break; 2164 case ir_binop_logic_or: 2165 result = nir_ior(&b, srcs[0], srcs[1]); 2166 break; 2167 case ir_binop_logic_xor: 2168 result = nir_ixor(&b, srcs[0], srcs[1]); 2169 break; 2170 case ir_binop_lshift: result = nir_ishl(&b, srcs[0], srcs[1]); break; 2171 case ir_binop_rshift: 2172 result = (type_is_signed(out_type)) ? nir_ishr(&b, srcs[0], srcs[1]) 2173 : nir_ushr(&b, srcs[0], srcs[1]); 2174 break; 2175 case ir_binop_imul_high: 2176 result = (out_type == GLSL_TYPE_INT) ? nir_imul_high(&b, srcs[0], srcs[1]) 2177 : nir_umul_high(&b, srcs[0], srcs[1]); 2178 break; 2179 case ir_binop_carry: result = nir_uadd_carry(&b, srcs[0], srcs[1]); break; 2180 case ir_binop_borrow: result = nir_usub_borrow(&b, srcs[0], srcs[1]); break; 2181 case ir_binop_less: 2182 if (type_is_float(types[0])) 2183 result = nir_flt(&b, srcs[0], srcs[1]); 2184 else if (type_is_signed(types[0])) 2185 result = nir_ilt(&b, srcs[0], srcs[1]); 2186 else 2187 result = nir_ult(&b, srcs[0], srcs[1]); 2188 break; 2189 case ir_binop_gequal: 2190 if (type_is_float(types[0])) 2191 result = nir_fge(&b, srcs[0], srcs[1]); 2192 else if (type_is_signed(types[0])) 2193 result = nir_ige(&b, srcs[0], srcs[1]); 2194 else 2195 result = nir_uge(&b, srcs[0], srcs[1]); 2196 break; 2197 case ir_binop_equal: 2198 if (type_is_float(types[0])) 2199 result = nir_feq(&b, srcs[0], srcs[1]); 2200 else 2201 result = nir_ieq(&b, srcs[0], srcs[1]); 2202 break; 2203 case ir_binop_nequal: 2204 if (type_is_float(types[0])) 2205 result = nir_fne(&b, srcs[0], srcs[1]); 2206 else 2207 result = nir_ine(&b, srcs[0], srcs[1]); 2208 break; 2209 case ir_binop_all_equal: 2210 if (type_is_float(types[0])) { 2211 switch (ir->operands[0]->type->vector_elements) { 2212 case 1: result = nir_feq(&b, srcs[0], srcs[1]); break; 2213 case 2: result = nir_ball_fequal2(&b, srcs[0], srcs[1]); break; 2214 case 3: result = nir_ball_fequal3(&b, srcs[0], srcs[1]); break; 2215 case 4: result = nir_ball_fequal4(&b, srcs[0], srcs[1]); break; 2216 default: 2217 unreachable("not reached"); 2218 } 2219 } else { 2220 switch (ir->operands[0]->type->vector_elements) { 2221 case 1: result = nir_ieq(&b, srcs[0], srcs[1]); break; 2222 case 2: result = nir_ball_iequal2(&b, srcs[0], srcs[1]); break; 2223 case 3: result = nir_ball_iequal3(&b, srcs[0], srcs[1]); break; 2224 case 4: result = nir_ball_iequal4(&b, srcs[0], srcs[1]); break; 2225 default: 2226 unreachable("not reached"); 2227 } 2228 } 2229 break; 2230 case ir_binop_any_nequal: 2231 if (type_is_float(types[0])) { 2232 switch (ir->operands[0]->type->vector_elements) { 2233 case 1: result = nir_fne(&b, srcs[0], srcs[1]); break; 2234 case 2: result = nir_bany_fnequal2(&b, srcs[0], srcs[1]); break; 2235 case 3: result = nir_bany_fnequal3(&b, srcs[0], srcs[1]); break; 2236 case 4: result = nir_bany_fnequal4(&b, srcs[0], srcs[1]); break; 2237 default: 2238 unreachable("not reached"); 2239 } 2240 } else { 2241 switch (ir->operands[0]->type->vector_elements) { 2242 case 1: result = nir_ine(&b, srcs[0], srcs[1]); break; 2243 case 2: result = nir_bany_inequal2(&b, srcs[0], srcs[1]); break; 2244 case 3: result = nir_bany_inequal3(&b, srcs[0], srcs[1]); break; 2245 case 4: result = nir_bany_inequal4(&b, srcs[0], srcs[1]); break; 2246 default: 2247 unreachable("not reached"); 2248 } 2249 } 2250 break; 2251 case ir_binop_dot: 2252 switch (ir->operands[0]->type->vector_elements) { 2253 case 2: result = nir_fdot2(&b, srcs[0], srcs[1]); break; 2254 case 3: result = nir_fdot3(&b, srcs[0], srcs[1]); break; 2255 case 4: result = nir_fdot4(&b, srcs[0], srcs[1]); break; 2256 default: 2257 unreachable("not reached"); 2258 } 2259 break; 2260 case ir_binop_vector_extract: { 2261 result = nir_channel(&b, srcs[0], 0); 2262 for (unsigned i = 1; i < ir->operands[0]->type->vector_elements; i++) { 2263 nir_ssa_def *swizzled = nir_channel(&b, srcs[0], i); 2264 result = nir_bcsel(&b, nir_ieq(&b, srcs[1], nir_imm_int(&b, i)), 2265 swizzled, result); 2266 } 2267 break; 2268 } 2269 2270 case ir_binop_ldexp: result = nir_ldexp(&b, srcs[0], srcs[1]); break; 2271 case ir_triop_fma: 2272 result = nir_ffma(&b, srcs[0], srcs[1], srcs[2]); 2273 break; 2274 case ir_triop_lrp: 2275 result = nir_flrp(&b, srcs[0], srcs[1], srcs[2]); 2276 break; 2277 case ir_triop_csel: 2278 result = nir_bcsel(&b, srcs[0], srcs[1], srcs[2]); 2279 break; 2280 case ir_triop_bitfield_extract: 2281 result = (out_type == GLSL_TYPE_INT) ? 2282 nir_ibitfield_extract(&b, srcs[0], srcs[1], srcs[2]) : 2283 nir_ubitfield_extract(&b, srcs[0], srcs[1], srcs[2]); 2284 break; 2285 case ir_quadop_bitfield_insert: 2286 result = nir_bitfield_insert(&b, srcs[0], srcs[1], srcs[2], srcs[3]); 2287 break; 2288 case ir_quadop_vector: 2289 result = nir_vec(&b, srcs, ir->type->vector_elements); 2290 break; 2291 2292 default: 2293 unreachable("not reached"); 2294 } 2295} 2296 2297void 2298nir_visitor::visit(ir_swizzle *ir) 2299{ 2300 unsigned swizzle[4] = { ir->mask.x, ir->mask.y, ir->mask.z, ir->mask.w }; 2301 result = nir_swizzle(&b, evaluate_rvalue(ir->val), swizzle, 2302 ir->type->vector_elements, false); 2303} 2304 2305void 2306nir_visitor::visit(ir_texture *ir) 2307{ 2308 unsigned num_srcs; 2309 nir_texop op; 2310 switch (ir->op) { 2311 case ir_tex: 2312 op = nir_texop_tex; 2313 num_srcs = 1; /* coordinate */ 2314 break; 2315 2316 case ir_txb: 2317 case ir_txl: 2318 op = (ir->op == ir_txb) ? nir_texop_txb : nir_texop_txl; 2319 num_srcs = 2; /* coordinate, bias/lod */ 2320 break; 2321 2322 case ir_txd: 2323 op = nir_texop_txd; /* coordinate, dPdx, dPdy */ 2324 num_srcs = 3; 2325 break; 2326 2327 case ir_txf: 2328 op = nir_texop_txf; 2329 if (ir->lod_info.lod != NULL) 2330 num_srcs = 2; /* coordinate, lod */ 2331 else 2332 num_srcs = 1; /* coordinate */ 2333 break; 2334 2335 case ir_txf_ms: 2336 op = nir_texop_txf_ms; 2337 num_srcs = 2; /* coordinate, sample_index */ 2338 break; 2339 2340 case ir_txs: 2341 op = nir_texop_txs; 2342 if (ir->lod_info.lod != NULL) 2343 num_srcs = 1; /* lod */ 2344 else 2345 num_srcs = 0; 2346 break; 2347 2348 case ir_lod: 2349 op = nir_texop_lod; 2350 num_srcs = 1; /* coordinate */ 2351 break; 2352 2353 case ir_tg4: 2354 op = nir_texop_tg4; 2355 num_srcs = 1; /* coordinate */ 2356 break; 2357 2358 case ir_query_levels: 2359 op = nir_texop_query_levels; 2360 num_srcs = 0; 2361 break; 2362 2363 case ir_texture_samples: 2364 op = nir_texop_texture_samples; 2365 num_srcs = 0; 2366 break; 2367 2368 case ir_samples_identical: 2369 op = nir_texop_samples_identical; 2370 num_srcs = 1; /* coordinate */ 2371 break; 2372 2373 default: 2374 unreachable("not reached"); 2375 } 2376 2377 if (ir->projector != NULL) 2378 num_srcs++; 2379 if (ir->shadow_comparator != NULL) 2380 num_srcs++; 2381 /* offsets are constants we store inside nir_tex_intrs.offsets */ 2382 if (ir->offset != NULL && !ir->offset->type->is_array()) 2383 num_srcs++; 2384 2385 /* Add one for the texture deref */ 2386 num_srcs += 2; 2387 2388 nir_tex_instr *instr = nir_tex_instr_create(this->shader, num_srcs); 2389 2390 instr->op = op; 2391 instr->sampler_dim = 2392 (glsl_sampler_dim) ir->sampler->type->sampler_dimensionality; 2393 instr->is_array = ir->sampler->type->sampler_array; 2394 instr->is_shadow = ir->sampler->type->sampler_shadow; 2395 if (instr->is_shadow) 2396 instr->is_new_style_shadow = (ir->type->vector_elements == 1); 2397 switch (ir->type->base_type) { 2398 case GLSL_TYPE_FLOAT: 2399 instr->dest_type = nir_type_float; 2400 break; 2401 case GLSL_TYPE_INT: 2402 instr->dest_type = nir_type_int; 2403 break; 2404 case GLSL_TYPE_BOOL: 2405 case GLSL_TYPE_UINT: 2406 instr->dest_type = nir_type_uint; 2407 break; 2408 default: 2409 unreachable("not reached"); 2410 } 2411 2412 nir_deref_instr *sampler_deref = evaluate_deref(ir->sampler); 2413 2414 /* check for bindless handles */ 2415 if (sampler_deref->mode != nir_var_uniform || 2416 nir_deref_instr_get_variable(sampler_deref)->data.bindless) { 2417 nir_ssa_def *load = nir_load_deref(&b, sampler_deref); 2418 instr->src[0].src = nir_src_for_ssa(load); 2419 instr->src[0].src_type = nir_tex_src_texture_handle; 2420 instr->src[1].src = nir_src_for_ssa(load); 2421 instr->src[1].src_type = nir_tex_src_sampler_handle; 2422 } else { 2423 instr->src[0].src = nir_src_for_ssa(&sampler_deref->dest.ssa); 2424 instr->src[0].src_type = nir_tex_src_texture_deref; 2425 instr->src[1].src = nir_src_for_ssa(&sampler_deref->dest.ssa); 2426 instr->src[1].src_type = nir_tex_src_sampler_deref; 2427 } 2428 2429 unsigned src_number = 2; 2430 2431 if (ir->coordinate != NULL) { 2432 instr->coord_components = ir->coordinate->type->vector_elements; 2433 instr->src[src_number].src = 2434 nir_src_for_ssa(evaluate_rvalue(ir->coordinate)); 2435 instr->src[src_number].src_type = nir_tex_src_coord; 2436 src_number++; 2437 } 2438 2439 if (ir->projector != NULL) { 2440 instr->src[src_number].src = 2441 nir_src_for_ssa(evaluate_rvalue(ir->projector)); 2442 instr->src[src_number].src_type = nir_tex_src_projector; 2443 src_number++; 2444 } 2445 2446 if (ir->shadow_comparator != NULL) { 2447 instr->src[src_number].src = 2448 nir_src_for_ssa(evaluate_rvalue(ir->shadow_comparator)); 2449 instr->src[src_number].src_type = nir_tex_src_comparator; 2450 src_number++; 2451 } 2452 2453 if (ir->offset != NULL) { 2454 if (ir->offset->type->is_array()) { 2455 for (int i = 0; i < ir->offset->type->array_size(); i++) { 2456 const ir_constant *c = 2457 ir->offset->as_constant()->get_array_element(i); 2458 2459 for (unsigned j = 0; j < 2; ++j) { 2460 int val = c->get_int_component(j); 2461 assert(val <= 31 && val >= -32); 2462 instr->tg4_offsets[i][j] = val; 2463 } 2464 } 2465 } else { 2466 assert(ir->offset->type->is_vector() || ir->offset->type->is_scalar()); 2467 2468 instr->src[src_number].src = 2469 nir_src_for_ssa(evaluate_rvalue(ir->offset)); 2470 instr->src[src_number].src_type = nir_tex_src_offset; 2471 src_number++; 2472 } 2473 } 2474 2475 switch (ir->op) { 2476 case ir_txb: 2477 instr->src[src_number].src = 2478 nir_src_for_ssa(evaluate_rvalue(ir->lod_info.bias)); 2479 instr->src[src_number].src_type = nir_tex_src_bias; 2480 src_number++; 2481 break; 2482 2483 case ir_txl: 2484 case ir_txf: 2485 case ir_txs: 2486 if (ir->lod_info.lod != NULL) { 2487 instr->src[src_number].src = 2488 nir_src_for_ssa(evaluate_rvalue(ir->lod_info.lod)); 2489 instr->src[src_number].src_type = nir_tex_src_lod; 2490 src_number++; 2491 } 2492 break; 2493 2494 case ir_txd: 2495 instr->src[src_number].src = 2496 nir_src_for_ssa(evaluate_rvalue(ir->lod_info.grad.dPdx)); 2497 instr->src[src_number].src_type = nir_tex_src_ddx; 2498 src_number++; 2499 instr->src[src_number].src = 2500 nir_src_for_ssa(evaluate_rvalue(ir->lod_info.grad.dPdy)); 2501 instr->src[src_number].src_type = nir_tex_src_ddy; 2502 src_number++; 2503 break; 2504 2505 case ir_txf_ms: 2506 instr->src[src_number].src = 2507 nir_src_for_ssa(evaluate_rvalue(ir->lod_info.sample_index)); 2508 instr->src[src_number].src_type = nir_tex_src_ms_index; 2509 src_number++; 2510 break; 2511 2512 case ir_tg4: 2513 instr->component = ir->lod_info.component->as_constant()->value.u[0]; 2514 break; 2515 2516 default: 2517 break; 2518 } 2519 2520 assert(src_number == num_srcs); 2521 2522 unsigned bit_size = glsl_get_bit_size(ir->type); 2523 add_instr(&instr->instr, nir_tex_instr_dest_size(instr), bit_size); 2524} 2525 2526void 2527nir_visitor::visit(ir_constant *ir) 2528{ 2529 /* 2530 * We don't know if this variable is an array or struct that gets 2531 * dereferenced, so do the safe thing an make it a variable with a 2532 * constant initializer and return a dereference. 2533 */ 2534 2535 nir_variable *var = 2536 nir_local_variable_create(this->impl, ir->type, "const_temp"); 2537 var->data.read_only = true; 2538 var->constant_initializer = constant_copy(ir, var); 2539 2540 this->deref = nir_build_deref_var(&b, var); 2541} 2542 2543void 2544nir_visitor::visit(ir_dereference_variable *ir) 2545{ 2546 if (ir->variable_referenced()->data.mode == ir_var_function_out) { 2547 unsigned i = (sig->return_type != glsl_type::void_type) ? 1 : 0; 2548 2549 foreach_in_list(ir_variable, param, &sig->parameters) { 2550 if (param == ir->variable_referenced()) { 2551 break; 2552 } 2553 i++; 2554 } 2555 2556 this->deref = nir_build_deref_cast(&b, nir_load_param(&b, i), 2557 nir_var_function_temp, ir->type, 0); 2558 return; 2559 } 2560 2561 assert(ir->variable_referenced()->data.mode != ir_var_function_inout); 2562 2563 struct hash_entry *entry = 2564 _mesa_hash_table_search(this->var_table, ir->var); 2565 assert(entry); 2566 nir_variable *var = (nir_variable *) entry->data; 2567 2568 this->deref = nir_build_deref_var(&b, var); 2569} 2570 2571void 2572nir_visitor::visit(ir_dereference_record *ir) 2573{ 2574 ir->record->accept(this); 2575 2576 int field_index = ir->field_idx; 2577 assert(field_index >= 0); 2578 2579 this->deref = nir_build_deref_struct(&b, this->deref, field_index); 2580} 2581 2582void 2583nir_visitor::visit(ir_dereference_array *ir) 2584{ 2585 nir_ssa_def *index = evaluate_rvalue(ir->array_index); 2586 2587 ir->array->accept(this); 2588 2589 this->deref = nir_build_deref_array(&b, this->deref, index); 2590} 2591 2592void 2593nir_visitor::visit(ir_barrier *) 2594{ 2595 nir_intrinsic_instr *instr = 2596 nir_intrinsic_instr_create(this->shader, nir_intrinsic_barrier); 2597 nir_builder_instr_insert(&b, &instr->instr); 2598} 2599 2600nir_shader * 2601glsl_float64_funcs_to_nir(struct gl_context *ctx, 2602 const nir_shader_compiler_options *options) 2603{ 2604 /* We pretend it's a vertex shader. Ultimately, the stage shouldn't 2605 * matter because we're not optimizing anything here. 2606 */ 2607 struct gl_shader *sh = _mesa_new_shader(-1, MESA_SHADER_VERTEX); 2608 sh->Source = float64_source; 2609 sh->CompileStatus = COMPILE_FAILURE; 2610 _mesa_glsl_compile_shader(ctx, sh, false, false, true); 2611 2612 if (!sh->CompileStatus) { 2613 if (sh->InfoLog) { 2614 _mesa_problem(ctx, 2615 "fp64 software impl compile failed:\n%s\nsource:\n%s\n", 2616 sh->InfoLog, float64_source); 2617 } 2618 return NULL; 2619 } 2620 2621 nir_shader *nir = nir_shader_create(NULL, MESA_SHADER_VERTEX, options, NULL); 2622 2623 nir_visitor v1(ctx, nir); 2624 nir_function_visitor v2(&v1); 2625 v2.run(sh->ir); 2626 visit_exec_list(sh->ir, &v1); 2627 2628 /* _mesa_delete_shader will try to free sh->Source but it's static const */ 2629 sh->Source = NULL; 2630 _mesa_delete_shader(ctx, sh); 2631 2632 nir_validate_shader(nir, "float64_funcs_to_nir"); 2633 2634 NIR_PASS_V(nir, nir_lower_constant_initializers, nir_var_function_temp); 2635 NIR_PASS_V(nir, nir_lower_returns); 2636 NIR_PASS_V(nir, nir_inline_functions); 2637 NIR_PASS_V(nir, nir_opt_deref); 2638 2639 return nir; 2640} 2641