lower_ubo_reference.cpp revision 7ec681f3
1/* 2 * Copyright © 2012 Intel Corporation 3 * 4 * Permission is hereby granted, free of charge, to any person obtaining a 5 * copy of this software and associated documentation files (the "Software"), 6 * to deal in the Software without restriction, including without limitation 7 * the rights to use, copy, modify, merge, publish, distribute, sublicense, 8 * and/or sell copies of the Software, and to permit persons to whom the 9 * Software is furnished to do so, subject to the following conditions: 10 * 11 * The above copyright notice and this permission notice (including the next 12 * paragraph) shall be included in all copies or substantial portions of the 13 * Software. 14 * 15 * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR 16 * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, 17 * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL 18 * THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER 19 * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING 20 * FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER 21 * DEALINGS IN THE SOFTWARE. 22 */ 23 24/** 25 * \file lower_ubo_reference.cpp 26 * 27 * IR lower pass to replace dereferences of variables in a uniform 28 * buffer object with usage of ir_binop_ubo_load expressions, each of 29 * which can read data up to the size of a vec4. 30 * 31 * This relieves drivers of the responsibility to deal with tricky UBO 32 * layout issues like std140 structures and row_major matrices on 33 * their own. 34 */ 35 36#include "lower_buffer_access.h" 37#include "ir_builder.h" 38#include "main/macros.h" 39#include "glsl_parser_extras.h" 40#include "main/mtypes.h" 41 42using namespace ir_builder; 43 44namespace { 45class lower_ubo_reference_visitor : 46 public lower_buffer_access::lower_buffer_access { 47public: 48 lower_ubo_reference_visitor(struct gl_linked_shader *shader, 49 bool clamp_block_indices, 50 bool use_std430_as_default) 51 : buffer_access_type(ubo_load_access), 52 shader(shader), clamp_block_indices(clamp_block_indices), 53 struct_field(NULL), variable(NULL), uniform_block(NULL), 54 progress(false) 55 { 56 this->use_std430_as_default = use_std430_as_default; 57 } 58 59 void handle_rvalue(ir_rvalue **rvalue); 60 ir_visitor_status visit_enter(ir_assignment *ir); 61 62 void setup_for_load_or_store(void *mem_ctx, 63 ir_variable *var, 64 ir_rvalue *deref, 65 ir_rvalue **offset, 66 unsigned *const_offset, 67 bool *row_major, 68 const glsl_type **matrix_type, 69 enum glsl_interface_packing packing); 70 uint32_t ssbo_access_params(); 71 ir_expression *ubo_load(void *mem_ctx, const struct glsl_type *type, 72 ir_rvalue *offset); 73 ir_call *ssbo_load(void *mem_ctx, const struct glsl_type *type, 74 ir_rvalue *offset); 75 76 bool check_for_buffer_array_copy(ir_assignment *ir); 77 bool check_for_buffer_struct_copy(ir_assignment *ir); 78 void check_for_ssbo_store(ir_assignment *ir); 79 void write_to_memory(void *mem_ctx, ir_dereference *deref, ir_variable *var, 80 ir_variable *write_var, unsigned write_mask); 81 ir_call *ssbo_store(void *mem_ctx, ir_rvalue *deref, ir_rvalue *offset, 82 unsigned write_mask); 83 84 enum { 85 ubo_load_access, 86 ssbo_load_access, 87 ssbo_store_access, 88 ssbo_unsized_array_length_access, 89 ssbo_atomic_access, 90 } buffer_access_type; 91 92 void insert_buffer_access(void *mem_ctx, ir_dereference *deref, 93 const glsl_type *type, ir_rvalue *offset, 94 unsigned mask, int channel); 95 96 ir_visitor_status visit_enter(class ir_expression *); 97 ir_expression *calculate_ssbo_unsized_array_length(ir_expression *expr); 98 void check_ssbo_unsized_array_length_expression(class ir_expression *); 99 void check_ssbo_unsized_array_length_assignment(ir_assignment *ir); 100 101 ir_expression *process_ssbo_unsized_array_length(ir_rvalue **, 102 ir_dereference *, 103 ir_variable *); 104 ir_expression *emit_ssbo_get_buffer_size(void *mem_ctx); 105 106 unsigned calculate_unsized_array_stride(ir_dereference *deref, 107 enum glsl_interface_packing packing); 108 109 ir_call *lower_ssbo_atomic_intrinsic(ir_call *ir); 110 ir_call *check_for_ssbo_atomic_intrinsic(ir_call *ir); 111 ir_visitor_status visit_enter(ir_call *ir); 112 ir_visitor_status visit_enter(ir_texture *ir); 113 114 struct gl_linked_shader *shader; 115 bool clamp_block_indices; 116 const struct glsl_struct_field *struct_field; 117 ir_variable *variable; 118 ir_rvalue *uniform_block; 119 bool progress; 120}; 121 122/** 123 * Determine the name of the interface block field 124 * 125 * This is the name of the specific member as it would appear in the 126 * \c gl_uniform_buffer_variable::Name field in the shader's 127 * \c UniformBlocks array. 128 */ 129static const char * 130interface_field_name(void *mem_ctx, char *base_name, ir_rvalue *d, 131 ir_rvalue **nonconst_block_index) 132{ 133 *nonconst_block_index = NULL; 134 char *name_copy = NULL; 135 size_t base_length = 0; 136 137 /* Loop back through the IR until we find the uniform block */ 138 ir_rvalue *ir = d; 139 while (ir != NULL) { 140 switch (ir->ir_type) { 141 case ir_type_dereference_variable: { 142 /* Exit loop */ 143 ir = NULL; 144 break; 145 } 146 147 case ir_type_dereference_record: { 148 ir_dereference_record *r = (ir_dereference_record *) ir; 149 ir = r->record->as_dereference(); 150 151 /* If we got here it means any previous array subscripts belong to 152 * block members and not the block itself so skip over them in the 153 * next pass. 154 */ 155 d = ir; 156 break; 157 } 158 159 case ir_type_dereference_array: { 160 ir_dereference_array *a = (ir_dereference_array *) ir; 161 ir = a->array->as_dereference(); 162 break; 163 } 164 165 case ir_type_swizzle: { 166 ir_swizzle *s = (ir_swizzle *) ir; 167 ir = s->val->as_dereference(); 168 /* Skip swizzle in the next pass */ 169 d = ir; 170 break; 171 } 172 173 default: 174 assert(!"Should not get here."); 175 break; 176 } 177 } 178 179 while (d != NULL) { 180 switch (d->ir_type) { 181 case ir_type_dereference_variable: { 182 ir_dereference_variable *v = (ir_dereference_variable *) d; 183 if (name_copy != NULL && 184 v->var->is_interface_instance() && 185 v->var->type->is_array()) { 186 return name_copy; 187 } else { 188 *nonconst_block_index = NULL; 189 return base_name; 190 } 191 192 break; 193 } 194 195 case ir_type_dereference_array: { 196 ir_dereference_array *a = (ir_dereference_array *) d; 197 size_t new_length; 198 199 if (name_copy == NULL) { 200 name_copy = ralloc_strdup(mem_ctx, base_name); 201 base_length = strlen(name_copy); 202 } 203 204 /* For arrays of arrays we start at the innermost array and work our 205 * way out so we need to insert the subscript at the base of the 206 * name string rather than just attaching it to the end. 207 */ 208 new_length = base_length; 209 ir_constant *const_index = a->array_index->as_constant(); 210 char *end = ralloc_strdup(NULL, &name_copy[new_length]); 211 if (!const_index) { 212 ir_rvalue *array_index = a->array_index; 213 if (array_index->type != glsl_type::uint_type) 214 array_index = i2u(array_index); 215 216 if (a->array->type->is_array() && 217 a->array->type->fields.array->is_array()) { 218 ir_constant *base_size = new(mem_ctx) 219 ir_constant(a->array->type->fields.array->arrays_of_arrays_size()); 220 array_index = mul(array_index, base_size); 221 } 222 223 if (*nonconst_block_index) { 224 *nonconst_block_index = add(*nonconst_block_index, array_index); 225 } else { 226 *nonconst_block_index = array_index; 227 } 228 229 ralloc_asprintf_rewrite_tail(&name_copy, &new_length, "[0]%s", 230 end); 231 } else { 232 ralloc_asprintf_rewrite_tail(&name_copy, &new_length, "[%d]%s", 233 const_index->get_uint_component(0), 234 end); 235 } 236 ralloc_free(end); 237 238 d = a->array->as_dereference(); 239 240 break; 241 } 242 243 default: 244 assert(!"Should not get here."); 245 break; 246 } 247 } 248 249 assert(!"Should not get here."); 250 return NULL; 251} 252 253static ir_rvalue * 254clamp_to_array_bounds(void *mem_ctx, ir_rvalue *index, const glsl_type *type) 255{ 256 assert(type->is_array()); 257 258 const unsigned array_size = type->arrays_of_arrays_size(); 259 260 ir_constant *max_index = new(mem_ctx) ir_constant(array_size - 1); 261 max_index->type = index->type; 262 263 ir_constant *zero = new(mem_ctx) ir_constant(0); 264 zero->type = index->type; 265 266 if (index->type->base_type == GLSL_TYPE_INT) 267 index = max2(index, zero); 268 index = min2(index, max_index); 269 270 return index; 271} 272 273void 274lower_ubo_reference_visitor::setup_for_load_or_store(void *mem_ctx, 275 ir_variable *var, 276 ir_rvalue *deref, 277 ir_rvalue **offset, 278 unsigned *const_offset, 279 bool *row_major, 280 const glsl_type **matrix_type, 281 enum glsl_interface_packing packing) 282{ 283 /* Determine the name of the interface block */ 284 ir_rvalue *nonconst_block_index; 285 const char *const field_name = 286 interface_field_name(mem_ctx, (char *) var->get_interface_type()->name, 287 deref, &nonconst_block_index); 288 289 if (nonconst_block_index && clamp_block_indices) { 290 nonconst_block_index = 291 clamp_to_array_bounds(mem_ctx, nonconst_block_index, var->type); 292 } 293 294 /* Locate the block by interface name */ 295 unsigned num_blocks; 296 struct gl_uniform_block **blocks; 297 if (this->buffer_access_type != ubo_load_access) { 298 num_blocks = shader->Program->info.num_ssbos; 299 blocks = shader->Program->sh.ShaderStorageBlocks; 300 } else { 301 num_blocks = shader->Program->info.num_ubos; 302 blocks = shader->Program->sh.UniformBlocks; 303 } 304 this->uniform_block = NULL; 305 for (unsigned i = 0; i < num_blocks; i++) { 306 if (strcmp(field_name, blocks[i]->Name) == 0) { 307 308 ir_constant *index = new(mem_ctx) ir_constant(i); 309 310 if (nonconst_block_index) { 311 this->uniform_block = add(nonconst_block_index, index); 312 } else { 313 this->uniform_block = index; 314 } 315 316 if (var->is_interface_instance()) { 317 *const_offset = 0; 318 } else { 319 *const_offset = blocks[i]->Uniforms[var->data.location].Offset; 320 } 321 322 break; 323 } 324 } 325 326 assert(this->uniform_block); 327 328 this->struct_field = NULL; 329 setup_buffer_access(mem_ctx, deref, offset, const_offset, row_major, 330 matrix_type, &this->struct_field, packing); 331} 332 333void 334lower_ubo_reference_visitor::handle_rvalue(ir_rvalue **rvalue) 335{ 336 if (!*rvalue) 337 return; 338 339 ir_dereference *deref = (*rvalue)->as_dereference(); 340 if (!deref) 341 return; 342 343 ir_variable *var = deref->variable_referenced(); 344 if (!var || !var->is_in_buffer_block()) 345 return; 346 347 void *mem_ctx = ralloc_parent(shader->ir); 348 349 ir_rvalue *offset = NULL; 350 unsigned const_offset; 351 bool row_major; 352 const glsl_type *matrix_type; 353 354 enum glsl_interface_packing packing = 355 var->get_interface_type()-> 356 get_internal_ifc_packing(use_std430_as_default); 357 358 this->buffer_access_type = 359 var->is_in_shader_storage_block() ? 360 ssbo_load_access : ubo_load_access; 361 this->variable = var; 362 363 /* Compute the offset to the start if the dereference as well as other 364 * information we need to configure the write 365 */ 366 setup_for_load_or_store(mem_ctx, var, deref, 367 &offset, &const_offset, 368 &row_major, &matrix_type, 369 packing); 370 assert(offset); 371 372 /* Now that we've calculated the offset to the start of the 373 * dereference, walk over the type and emit loads into a temporary. 374 */ 375 const glsl_type *type = (*rvalue)->type; 376 ir_variable *load_var = new(mem_ctx) ir_variable(type, 377 "ubo_load_temp", 378 ir_var_temporary); 379 base_ir->insert_before(load_var); 380 381 ir_variable *load_offset = new(mem_ctx) ir_variable(glsl_type::uint_type, 382 "ubo_load_temp_offset", 383 ir_var_temporary); 384 base_ir->insert_before(load_offset); 385 base_ir->insert_before(assign(load_offset, offset)); 386 387 deref = new(mem_ctx) ir_dereference_variable(load_var); 388 emit_access(mem_ctx, false, deref, load_offset, const_offset, 389 row_major, matrix_type, packing, 0); 390 *rvalue = deref; 391 392 progress = true; 393} 394 395ir_expression * 396lower_ubo_reference_visitor::ubo_load(void *mem_ctx, 397 const glsl_type *type, 398 ir_rvalue *offset) 399{ 400 ir_rvalue *block_ref = this->uniform_block->clone(mem_ctx, NULL); 401 return new(mem_ctx) 402 ir_expression(ir_binop_ubo_load, 403 type, 404 block_ref, 405 offset); 406 407} 408 409static bool 410shader_storage_buffer_object(const _mesa_glsl_parse_state *state) 411{ 412 return state->has_shader_storage_buffer_objects(); 413} 414 415uint32_t 416lower_ubo_reference_visitor::ssbo_access_params() 417{ 418 assert(variable); 419 420 if (variable->is_interface_instance()) { 421 assert(struct_field); 422 423 return ((struct_field->memory_coherent ? ACCESS_COHERENT : 0) | 424 (struct_field->memory_restrict ? ACCESS_RESTRICT : 0) | 425 (struct_field->memory_volatile ? ACCESS_VOLATILE : 0)); 426 } else { 427 return ((variable->data.memory_coherent ? ACCESS_COHERENT : 0) | 428 (variable->data.memory_restrict ? ACCESS_RESTRICT : 0) | 429 (variable->data.memory_volatile ? ACCESS_VOLATILE : 0)); 430 } 431} 432 433ir_call * 434lower_ubo_reference_visitor::ssbo_store(void *mem_ctx, 435 ir_rvalue *deref, 436 ir_rvalue *offset, 437 unsigned write_mask) 438{ 439 exec_list sig_params; 440 441 ir_variable *block_ref = new(mem_ctx) 442 ir_variable(glsl_type::uint_type, "block_ref" , ir_var_function_in); 443 sig_params.push_tail(block_ref); 444 445 ir_variable *offset_ref = new(mem_ctx) 446 ir_variable(glsl_type::uint_type, "offset" , ir_var_function_in); 447 sig_params.push_tail(offset_ref); 448 449 ir_variable *val_ref = new(mem_ctx) 450 ir_variable(deref->type, "value" , ir_var_function_in); 451 sig_params.push_tail(val_ref); 452 453 ir_variable *writemask_ref = new(mem_ctx) 454 ir_variable(glsl_type::uint_type, "write_mask" , ir_var_function_in); 455 sig_params.push_tail(writemask_ref); 456 457 ir_variable *access_ref = new(mem_ctx) 458 ir_variable(glsl_type::uint_type, "access" , ir_var_function_in); 459 sig_params.push_tail(access_ref); 460 461 ir_function_signature *sig = new(mem_ctx) 462 ir_function_signature(glsl_type::void_type, shader_storage_buffer_object); 463 assert(sig); 464 sig->replace_parameters(&sig_params); 465 sig->intrinsic_id = ir_intrinsic_ssbo_store; 466 467 ir_function *f = new(mem_ctx) ir_function("__intrinsic_store_ssbo"); 468 f->add_signature(sig); 469 470 exec_list call_params; 471 call_params.push_tail(this->uniform_block->clone(mem_ctx, NULL)); 472 call_params.push_tail(offset->clone(mem_ctx, NULL)); 473 call_params.push_tail(deref->clone(mem_ctx, NULL)); 474 call_params.push_tail(new(mem_ctx) ir_constant(write_mask)); 475 call_params.push_tail(new(mem_ctx) ir_constant(ssbo_access_params())); 476 return new(mem_ctx) ir_call(sig, NULL, &call_params); 477} 478 479ir_call * 480lower_ubo_reference_visitor::ssbo_load(void *mem_ctx, 481 const struct glsl_type *type, 482 ir_rvalue *offset) 483{ 484 exec_list sig_params; 485 486 ir_variable *block_ref = new(mem_ctx) 487 ir_variable(glsl_type::uint_type, "block_ref" , ir_var_function_in); 488 sig_params.push_tail(block_ref); 489 490 ir_variable *offset_ref = new(mem_ctx) 491 ir_variable(glsl_type::uint_type, "offset_ref" , ir_var_function_in); 492 sig_params.push_tail(offset_ref); 493 494 ir_variable *access_ref = new(mem_ctx) 495 ir_variable(glsl_type::uint_type, "access" , ir_var_function_in); 496 sig_params.push_tail(access_ref); 497 498 ir_function_signature *sig = 499 new(mem_ctx) ir_function_signature(type, shader_storage_buffer_object); 500 assert(sig); 501 sig->replace_parameters(&sig_params); 502 sig->intrinsic_id = ir_intrinsic_ssbo_load; 503 504 ir_function *f = new(mem_ctx) ir_function("__intrinsic_load_ssbo"); 505 f->add_signature(sig); 506 507 ir_variable *result = new(mem_ctx) 508 ir_variable(type, "ssbo_load_result", ir_var_temporary); 509 base_ir->insert_before(result); 510 ir_dereference_variable *deref_result = new(mem_ctx) 511 ir_dereference_variable(result); 512 513 exec_list call_params; 514 call_params.push_tail(this->uniform_block->clone(mem_ctx, NULL)); 515 call_params.push_tail(offset->clone(mem_ctx, NULL)); 516 call_params.push_tail(new(mem_ctx) ir_constant(ssbo_access_params())); 517 518 return new(mem_ctx) ir_call(sig, deref_result, &call_params); 519} 520 521void 522lower_ubo_reference_visitor::insert_buffer_access(void *mem_ctx, 523 ir_dereference *deref, 524 const glsl_type *type, 525 ir_rvalue *offset, 526 unsigned mask, 527 int channel) 528{ 529 switch (this->buffer_access_type) { 530 case ubo_load_access: 531 base_ir->insert_before(assign(deref->clone(mem_ctx, NULL), 532 ubo_load(mem_ctx, type, offset), 533 mask)); 534 break; 535 case ssbo_load_access: { 536 ir_call *load_ssbo = ssbo_load(mem_ctx, type, offset); 537 base_ir->insert_before(load_ssbo); 538 ir_rvalue *value = load_ssbo->return_deref->as_rvalue()->clone(mem_ctx, NULL); 539 ir_assignment *assignment = 540 assign(deref->clone(mem_ctx, NULL), value, mask); 541 base_ir->insert_before(assignment); 542 break; 543 } 544 case ssbo_store_access: 545 if (channel >= 0) { 546 base_ir->insert_after(ssbo_store(mem_ctx, 547 swizzle(deref, channel, 1), 548 offset, 1)); 549 } else { 550 base_ir->insert_after(ssbo_store(mem_ctx, deref, offset, mask)); 551 } 552 break; 553 default: 554 unreachable("invalid buffer_access_type in insert_buffer_access"); 555 } 556} 557 558void 559lower_ubo_reference_visitor::write_to_memory(void *mem_ctx, 560 ir_dereference *deref, 561 ir_variable *var, 562 ir_variable *write_var, 563 unsigned write_mask) 564{ 565 ir_rvalue *offset = NULL; 566 unsigned const_offset; 567 bool row_major; 568 const glsl_type *matrix_type; 569 570 enum glsl_interface_packing packing = 571 var->get_interface_type()-> 572 get_internal_ifc_packing(use_std430_as_default); 573 574 this->buffer_access_type = ssbo_store_access; 575 this->variable = var; 576 577 /* Compute the offset to the start if the dereference as well as other 578 * information we need to configure the write 579 */ 580 setup_for_load_or_store(mem_ctx, var, deref, 581 &offset, &const_offset, 582 &row_major, &matrix_type, 583 packing); 584 assert(offset); 585 586 /* Now emit writes from the temporary to memory */ 587 ir_variable *write_offset = 588 new(mem_ctx) ir_variable(glsl_type::uint_type, 589 "ssbo_store_temp_offset", 590 ir_var_temporary); 591 592 base_ir->insert_before(write_offset); 593 base_ir->insert_before(assign(write_offset, offset)); 594 595 deref = new(mem_ctx) ir_dereference_variable(write_var); 596 emit_access(mem_ctx, true, deref, write_offset, const_offset, 597 row_major, matrix_type, packing, write_mask); 598} 599 600ir_visitor_status 601lower_ubo_reference_visitor::visit_enter(ir_expression *ir) 602{ 603 check_ssbo_unsized_array_length_expression(ir); 604 return rvalue_visit(ir); 605} 606 607ir_expression * 608lower_ubo_reference_visitor::calculate_ssbo_unsized_array_length(ir_expression *expr) 609{ 610 if (expr->operation != 611 ir_expression_operation(ir_unop_ssbo_unsized_array_length)) 612 return NULL; 613 614 ir_rvalue *rvalue = expr->operands[0]->as_rvalue(); 615 if (!rvalue || 616 !rvalue->type->is_array() || !rvalue->type->is_unsized_array()) 617 return NULL; 618 619 ir_dereference *deref = expr->operands[0]->as_dereference(); 620 if (!deref) 621 return NULL; 622 623 ir_variable *var = expr->operands[0]->variable_referenced(); 624 if (!var || !var->is_in_shader_storage_block()) 625 return NULL; 626 return process_ssbo_unsized_array_length(&rvalue, deref, var); 627} 628 629void 630lower_ubo_reference_visitor::check_ssbo_unsized_array_length_expression(ir_expression *ir) 631{ 632 if (ir->operation == 633 ir_expression_operation(ir_unop_ssbo_unsized_array_length)) { 634 /* Don't replace this unop if it is found alone. It is going to be 635 * removed by the optimization passes or replaced if it is part of 636 * an ir_assignment or another ir_expression. 637 */ 638 return; 639 } 640 641 for (unsigned i = 0; i < ir->num_operands; i++) { 642 if (ir->operands[i]->ir_type != ir_type_expression) 643 continue; 644 ir_expression *expr = (ir_expression *) ir->operands[i]; 645 ir_expression *temp = calculate_ssbo_unsized_array_length(expr); 646 if (!temp) 647 continue; 648 649 delete expr; 650 ir->operands[i] = temp; 651 } 652} 653 654void 655lower_ubo_reference_visitor::check_ssbo_unsized_array_length_assignment(ir_assignment *ir) 656{ 657 if (!ir->rhs || ir->rhs->ir_type != ir_type_expression) 658 return; 659 660 ir_expression *expr = (ir_expression *) ir->rhs; 661 ir_expression *temp = calculate_ssbo_unsized_array_length(expr); 662 if (!temp) 663 return; 664 665 delete expr; 666 ir->rhs = temp; 667 return; 668} 669 670ir_expression * 671lower_ubo_reference_visitor::emit_ssbo_get_buffer_size(void *mem_ctx) 672{ 673 ir_rvalue *block_ref = this->uniform_block->clone(mem_ctx, NULL); 674 return new(mem_ctx) ir_expression(ir_unop_get_buffer_size, 675 glsl_type::int_type, 676 block_ref); 677} 678 679unsigned 680lower_ubo_reference_visitor::calculate_unsized_array_stride(ir_dereference *deref, 681 enum glsl_interface_packing packing) 682{ 683 unsigned array_stride = 0; 684 685 switch (deref->ir_type) { 686 case ir_type_dereference_variable: 687 { 688 ir_dereference_variable *deref_var = (ir_dereference_variable *)deref; 689 const struct glsl_type *unsized_array_type = NULL; 690 /* An unsized array can be sized by other lowering passes, so pick 691 * the first field of the array which has the data type of the unsized 692 * array. 693 */ 694 unsized_array_type = deref_var->var->type->fields.array; 695 696 /* Whether or not the field is row-major (because it might be a 697 * bvec2 or something) does not affect the array itself. We need 698 * to know whether an array element in its entirety is row-major. 699 */ 700 const bool array_row_major = 701 is_dereferenced_thing_row_major(deref_var); 702 703 if (packing == GLSL_INTERFACE_PACKING_STD430) { 704 array_stride = unsized_array_type->std430_array_stride(array_row_major); 705 } else { 706 array_stride = unsized_array_type->std140_size(array_row_major); 707 array_stride = glsl_align(array_stride, 16); 708 } 709 break; 710 } 711 case ir_type_dereference_record: 712 { 713 ir_dereference_record *deref_record = (ir_dereference_record *) deref; 714 ir_dereference *interface_deref = 715 deref_record->record->as_dereference(); 716 assert(interface_deref != NULL); 717 const struct glsl_type *interface_type = interface_deref->type; 718 unsigned record_length = interface_type->length; 719 /* Unsized array is always the last element of the interface */ 720 const struct glsl_type *unsized_array_type = 721 interface_type->fields.structure[record_length - 1].type->fields.array; 722 723 const bool array_row_major = 724 is_dereferenced_thing_row_major(deref_record); 725 726 if (packing == GLSL_INTERFACE_PACKING_STD430) { 727 array_stride = unsized_array_type->std430_array_stride(array_row_major); 728 } else { 729 array_stride = unsized_array_type->std140_size(array_row_major); 730 array_stride = glsl_align(array_stride, 16); 731 } 732 break; 733 } 734 default: 735 unreachable("Unsupported dereference type"); 736 } 737 return array_stride; 738} 739 740ir_expression * 741lower_ubo_reference_visitor::process_ssbo_unsized_array_length(ir_rvalue **rvalue, 742 ir_dereference *deref, 743 ir_variable *var) 744{ 745 void *mem_ctx = ralloc_parent(*rvalue); 746 747 ir_rvalue *base_offset = NULL; 748 unsigned const_offset; 749 bool row_major; 750 const glsl_type *matrix_type; 751 752 enum glsl_interface_packing packing = 753 var->get_interface_type()-> 754 get_internal_ifc_packing(use_std430_as_default); 755 int unsized_array_stride = 756 calculate_unsized_array_stride(deref, packing); 757 758 this->buffer_access_type = ssbo_unsized_array_length_access; 759 this->variable = var; 760 761 /* Compute the offset to the start if the dereference as well as other 762 * information we need to calculate the length. 763 */ 764 setup_for_load_or_store(mem_ctx, var, deref, 765 &base_offset, &const_offset, 766 &row_major, &matrix_type, 767 packing); 768 /* array.length() = 769 * max((buffer_object_size - offset_of_array) / stride_of_array, 0) 770 */ 771 ir_expression *buffer_size = emit_ssbo_get_buffer_size(mem_ctx); 772 773 ir_expression *offset_of_array = new(mem_ctx) 774 ir_expression(ir_binop_add, base_offset, 775 new(mem_ctx) ir_constant(const_offset)); 776 ir_expression *offset_of_array_int = new(mem_ctx) 777 ir_expression(ir_unop_u2i, offset_of_array); 778 779 ir_expression *sub = new(mem_ctx) 780 ir_expression(ir_binop_sub, buffer_size, offset_of_array_int); 781 ir_expression *div = new(mem_ctx) 782 ir_expression(ir_binop_div, sub, 783 new(mem_ctx) ir_constant(unsized_array_stride)); 784 ir_expression *max = new(mem_ctx) 785 ir_expression(ir_binop_max, div, new(mem_ctx) ir_constant(0)); 786 787 return max; 788} 789 790void 791lower_ubo_reference_visitor::check_for_ssbo_store(ir_assignment *ir) 792{ 793 if (!ir || !ir->lhs) 794 return; 795 796 ir_rvalue *rvalue = ir->lhs->as_rvalue(); 797 if (!rvalue) 798 return; 799 800 ir_dereference *deref = ir->lhs->as_dereference(); 801 if (!deref) 802 return; 803 804 ir_variable *var = ir->lhs->variable_referenced(); 805 if (!var || !var->is_in_shader_storage_block()) 806 return; 807 808 /* We have a write to a buffer variable, so declare a temporary and rewrite 809 * the assignment so that the temporary is the LHS. 810 */ 811 void *mem_ctx = ralloc_parent(shader->ir); 812 813 const glsl_type *type = rvalue->type; 814 ir_variable *write_var = new(mem_ctx) ir_variable(type, 815 "ssbo_store_temp", 816 ir_var_temporary); 817 base_ir->insert_before(write_var); 818 ir->lhs = new(mem_ctx) ir_dereference_variable(write_var); 819 820 /* Now we have to write the value assigned to the temporary back to memory */ 821 write_to_memory(mem_ctx, deref, var, write_var, ir->write_mask); 822 progress = true; 823} 824 825static bool 826is_buffer_backed_variable(ir_variable *var) 827{ 828 return var->is_in_buffer_block() || 829 var->data.mode == ir_var_shader_shared; 830} 831 832bool 833lower_ubo_reference_visitor::check_for_buffer_array_copy(ir_assignment *ir) 834{ 835 if (!ir || !ir->lhs || !ir->rhs) 836 return false; 837 838 /* LHS and RHS must be arrays 839 * FIXME: arrays of arrays? 840 */ 841 if (!ir->lhs->type->is_array() || !ir->rhs->type->is_array()) 842 return false; 843 844 /* RHS must be a buffer-backed variable. This is what can cause the problem 845 * since it would lead to a series of loads that need to live until we 846 * see the writes to the LHS. 847 */ 848 ir_variable *rhs_var = ir->rhs->variable_referenced(); 849 if (!rhs_var || !is_buffer_backed_variable(rhs_var)) 850 return false; 851 852 /* Split the array copy into individual element copies to reduce 853 * register pressure 854 */ 855 ir_dereference *rhs_deref = ir->rhs->as_dereference(); 856 if (!rhs_deref) 857 return false; 858 859 ir_dereference *lhs_deref = ir->lhs->as_dereference(); 860 if (!lhs_deref) 861 return false; 862 863 assert(lhs_deref->type->length == rhs_deref->type->length); 864 void *mem_ctx = ralloc_parent(shader->ir); 865 866 for (unsigned i = 0; i < lhs_deref->type->length; i++) { 867 ir_dereference *lhs_i = 868 new(mem_ctx) ir_dereference_array(lhs_deref->clone(mem_ctx, NULL), 869 new(mem_ctx) ir_constant(i)); 870 871 ir_dereference *rhs_i = 872 new(mem_ctx) ir_dereference_array(rhs_deref->clone(mem_ctx, NULL), 873 new(mem_ctx) ir_constant(i)); 874 ir->insert_after(assign(lhs_i, rhs_i)); 875 } 876 877 ir->remove(); 878 progress = true; 879 return true; 880} 881 882bool 883lower_ubo_reference_visitor::check_for_buffer_struct_copy(ir_assignment *ir) 884{ 885 if (!ir || !ir->lhs || !ir->rhs) 886 return false; 887 888 /* LHS and RHS must be records */ 889 if (!ir->lhs->type->is_struct() || !ir->rhs->type->is_struct()) 890 return false; 891 892 /* RHS must be a buffer-backed variable. This is what can cause the problem 893 * since it would lead to a series of loads that need to live until we 894 * see the writes to the LHS. 895 */ 896 ir_variable *rhs_var = ir->rhs->variable_referenced(); 897 if (!rhs_var || !is_buffer_backed_variable(rhs_var)) 898 return false; 899 900 /* Split the struct copy into individual element copies to reduce 901 * register pressure 902 */ 903 ir_dereference *rhs_deref = ir->rhs->as_dereference(); 904 if (!rhs_deref) 905 return false; 906 907 ir_dereference *lhs_deref = ir->lhs->as_dereference(); 908 if (!lhs_deref) 909 return false; 910 911 assert(lhs_deref->type == rhs_deref->type); 912 void *mem_ctx = ralloc_parent(shader->ir); 913 914 for (unsigned i = 0; i < lhs_deref->type->length; i++) { 915 const char *field_name = lhs_deref->type->fields.structure[i].name; 916 ir_dereference *lhs_field = 917 new(mem_ctx) ir_dereference_record(lhs_deref->clone(mem_ctx, NULL), 918 field_name); 919 ir_dereference *rhs_field = 920 new(mem_ctx) ir_dereference_record(rhs_deref->clone(mem_ctx, NULL), 921 field_name); 922 ir->insert_after(assign(lhs_field, rhs_field)); 923 } 924 925 ir->remove(); 926 progress = true; 927 return true; 928} 929 930ir_visitor_status 931lower_ubo_reference_visitor::visit_enter(ir_assignment *ir) 932{ 933 /* Array and struct copies could involve large amounts of load/store 934 * operations. To improve register pressure we want to special-case 935 * these and split them into individual element copies. 936 * This way we avoid emitting all the loads for the RHS first and 937 * all the writes for the LHS second and register usage is more 938 * efficient. 939 */ 940 if (check_for_buffer_array_copy(ir)) 941 return visit_continue_with_parent; 942 943 if (check_for_buffer_struct_copy(ir)) 944 return visit_continue_with_parent; 945 946 check_ssbo_unsized_array_length_assignment(ir); 947 check_for_ssbo_store(ir); 948 return rvalue_visit(ir); 949} 950 951/* Lowers the intrinsic call to a new internal intrinsic that swaps the 952 * access to the buffer variable in the first parameter by an offset 953 * and block index. This involves creating the new internal intrinsic 954 * (i.e. the new function signature). 955 */ 956ir_call * 957lower_ubo_reference_visitor::lower_ssbo_atomic_intrinsic(ir_call *ir) 958{ 959 /* SSBO atomics usually have 2 parameters, the buffer variable and an 960 * integer argument. The exception is CompSwap, that has an additional 961 * integer parameter. 962 */ 963 int param_count = ir->actual_parameters.length(); 964 assert(param_count == 2 || param_count == 3); 965 966 /* First argument must be a scalar integer buffer variable */ 967 exec_node *param = ir->actual_parameters.get_head(); 968 ir_instruction *inst = (ir_instruction *) param; 969 assert(inst->ir_type == ir_type_dereference_variable || 970 inst->ir_type == ir_type_dereference_array || 971 inst->ir_type == ir_type_dereference_record || 972 inst->ir_type == ir_type_swizzle); 973 974 ir_rvalue *deref = (ir_rvalue *) inst; 975 assert(deref->type->is_scalar() && 976 (deref->type->is_integer_32_64() || deref->type->is_float())); 977 978 ir_variable *var = deref->variable_referenced(); 979 assert(var); 980 981 /* Compute the offset to the start if the dereference and the 982 * block index 983 */ 984 void *mem_ctx = ralloc_parent(shader->ir); 985 986 ir_rvalue *offset = NULL; 987 unsigned const_offset; 988 bool row_major; 989 const glsl_type *matrix_type; 990 991 enum glsl_interface_packing packing = 992 var->get_interface_type()-> 993 get_internal_ifc_packing(use_std430_as_default); 994 995 this->buffer_access_type = ssbo_atomic_access; 996 this->variable = var; 997 998 setup_for_load_or_store(mem_ctx, var, deref, 999 &offset, &const_offset, 1000 &row_major, &matrix_type, 1001 packing); 1002 assert(offset); 1003 assert(!row_major); 1004 assert(matrix_type == NULL); 1005 1006 ir_rvalue *deref_offset = 1007 add(offset, new(mem_ctx) ir_constant(const_offset)); 1008 ir_rvalue *block_index = this->uniform_block->clone(mem_ctx, NULL); 1009 1010 /* Create the new internal function signature that will take a block 1011 * index and offset instead of a buffer variable 1012 */ 1013 exec_list sig_params; 1014 ir_variable *sig_param = new(mem_ctx) 1015 ir_variable(glsl_type::uint_type, "block_ref" , ir_var_function_in); 1016 sig_params.push_tail(sig_param); 1017 1018 sig_param = new(mem_ctx) 1019 ir_variable(glsl_type::uint_type, "offset" , ir_var_function_in); 1020 sig_params.push_tail(sig_param); 1021 1022 const glsl_type *type = deref->type->get_scalar_type(); 1023 sig_param = new(mem_ctx) 1024 ir_variable(type, "data1", ir_var_function_in); 1025 sig_params.push_tail(sig_param); 1026 1027 if (param_count == 3) { 1028 sig_param = new(mem_ctx) 1029 ir_variable(type, "data2", ir_var_function_in); 1030 sig_params.push_tail(sig_param); 1031 } 1032 1033 ir_function_signature *sig = 1034 new(mem_ctx) ir_function_signature(deref->type, 1035 shader_storage_buffer_object); 1036 assert(sig); 1037 sig->replace_parameters(&sig_params); 1038 1039 assert(ir->callee->intrinsic_id >= ir_intrinsic_generic_load); 1040 assert(ir->callee->intrinsic_id <= ir_intrinsic_generic_atomic_comp_swap); 1041 sig->intrinsic_id = MAP_INTRINSIC_TO_TYPE(ir->callee->intrinsic_id, ssbo); 1042 1043 char func_name[64]; 1044 sprintf(func_name, "%s_ssbo", ir->callee_name()); 1045 ir_function *f = new(mem_ctx) ir_function(func_name); 1046 f->add_signature(sig); 1047 1048 /* Now, create the call to the internal intrinsic */ 1049 exec_list call_params; 1050 call_params.push_tail(block_index); 1051 call_params.push_tail(deref_offset); 1052 param = ir->actual_parameters.get_head()->get_next(); 1053 ir_rvalue *param_as_rvalue = ((ir_instruction *) param)->as_rvalue(); 1054 call_params.push_tail(param_as_rvalue->clone(mem_ctx, NULL)); 1055 if (param_count == 3) { 1056 param = param->get_next(); 1057 param_as_rvalue = ((ir_instruction *) param)->as_rvalue(); 1058 call_params.push_tail(param_as_rvalue->clone(mem_ctx, NULL)); 1059 } 1060 ir_dereference_variable *return_deref = 1061 ir->return_deref->clone(mem_ctx, NULL); 1062 return new(mem_ctx) ir_call(sig, return_deref, &call_params); 1063} 1064 1065ir_call * 1066lower_ubo_reference_visitor::check_for_ssbo_atomic_intrinsic(ir_call *ir) 1067{ 1068 exec_list& params = ir->actual_parameters; 1069 1070 if (params.length() < 2 || params.length() > 3) 1071 return ir; 1072 1073 ir_rvalue *rvalue = 1074 ((ir_instruction *) params.get_head())->as_rvalue(); 1075 if (!rvalue) 1076 return ir; 1077 1078 ir_variable *var = rvalue->variable_referenced(); 1079 if (!var || !var->is_in_shader_storage_block()) 1080 return ir; 1081 1082 const enum ir_intrinsic_id id = ir->callee->intrinsic_id; 1083 if (id == ir_intrinsic_generic_atomic_add || 1084 id == ir_intrinsic_generic_atomic_min || 1085 id == ir_intrinsic_generic_atomic_max || 1086 id == ir_intrinsic_generic_atomic_and || 1087 id == ir_intrinsic_generic_atomic_or || 1088 id == ir_intrinsic_generic_atomic_xor || 1089 id == ir_intrinsic_generic_atomic_exchange || 1090 id == ir_intrinsic_generic_atomic_comp_swap) { 1091 return lower_ssbo_atomic_intrinsic(ir); 1092 } 1093 1094 return ir; 1095} 1096 1097 1098ir_visitor_status 1099lower_ubo_reference_visitor::visit_enter(ir_call *ir) 1100{ 1101 ir_call *new_ir = check_for_ssbo_atomic_intrinsic(ir); 1102 if (new_ir != ir) { 1103 progress = true; 1104 base_ir->replace_with(new_ir); 1105 return visit_continue_with_parent; 1106 } 1107 1108 return rvalue_visit(ir); 1109} 1110 1111 1112ir_visitor_status 1113lower_ubo_reference_visitor::visit_enter(ir_texture *ir) 1114{ 1115 ir_dereference *sampler = ir->sampler; 1116 1117 if (sampler->ir_type == ir_type_dereference_record) { 1118 handle_rvalue((ir_rvalue **)&ir->sampler); 1119 return visit_continue_with_parent; 1120 } 1121 1122 return rvalue_visit(ir); 1123} 1124 1125 1126} /* unnamed namespace */ 1127 1128void 1129lower_ubo_reference(struct gl_linked_shader *shader, 1130 bool clamp_block_indices, bool use_std430_as_default) 1131{ 1132 lower_ubo_reference_visitor v(shader, clamp_block_indices, 1133 use_std430_as_default); 1134 1135 /* Loop over the instructions lowering references, because we take 1136 * a deref of a UBO array using a UBO dereference as the index will 1137 * produce a collection of instructions all of which have cloned 1138 * UBO dereferences for that array index. 1139 */ 1140 do { 1141 v.progress = false; 1142 visit_list_elements(&v, shader->ir); 1143 } while (v.progress); 1144} 1145