lower_ubo_reference.cpp revision b8e80941
1/* 2 * Copyright © 2012 Intel Corporation 3 * 4 * Permission is hereby granted, free of charge, to any person obtaining a 5 * copy of this software and associated documentation files (the "Software"), 6 * to deal in the Software without restriction, including without limitation 7 * the rights to use, copy, modify, merge, publish, distribute, sublicense, 8 * and/or sell copies of the Software, and to permit persons to whom the 9 * Software is furnished to do so, subject to the following conditions: 10 * 11 * The above copyright notice and this permission notice (including the next 12 * paragraph) shall be included in all copies or substantial portions of the 13 * Software. 14 * 15 * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR 16 * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, 17 * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL 18 * THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER 19 * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING 20 * FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER 21 * DEALINGS IN THE SOFTWARE. 22 */ 23 24/** 25 * \file lower_ubo_reference.cpp 26 * 27 * IR lower pass to replace dereferences of variables in a uniform 28 * buffer object with usage of ir_binop_ubo_load expressions, each of 29 * which can read data up to the size of a vec4. 30 * 31 * This relieves drivers of the responsibility to deal with tricky UBO 32 * layout issues like std140 structures and row_major matrices on 33 * their own. 34 */ 35 36#include "lower_buffer_access.h" 37#include "ir_builder.h" 38#include "main/macros.h" 39#include "glsl_parser_extras.h" 40#include "main/mtypes.h" 41 42using namespace ir_builder; 43 44namespace { 45class lower_ubo_reference_visitor : 46 public lower_buffer_access::lower_buffer_access { 47public: 48 lower_ubo_reference_visitor(struct gl_linked_shader *shader, 49 bool clamp_block_indices, 50 bool use_std430_as_default) 51 : shader(shader), clamp_block_indices(clamp_block_indices), 52 struct_field(NULL), variable(NULL) 53 { 54 this->use_std430_as_default = use_std430_as_default; 55 } 56 57 void handle_rvalue(ir_rvalue **rvalue); 58 ir_visitor_status visit_enter(ir_assignment *ir); 59 60 void setup_for_load_or_store(void *mem_ctx, 61 ir_variable *var, 62 ir_rvalue *deref, 63 ir_rvalue **offset, 64 unsigned *const_offset, 65 bool *row_major, 66 const glsl_type **matrix_type, 67 enum glsl_interface_packing packing); 68 uint32_t ssbo_access_params(); 69 ir_expression *ubo_load(void *mem_ctx, const struct glsl_type *type, 70 ir_rvalue *offset); 71 ir_call *ssbo_load(void *mem_ctx, const struct glsl_type *type, 72 ir_rvalue *offset); 73 74 bool check_for_buffer_array_copy(ir_assignment *ir); 75 bool check_for_buffer_struct_copy(ir_assignment *ir); 76 void check_for_ssbo_store(ir_assignment *ir); 77 void write_to_memory(void *mem_ctx, ir_dereference *deref, ir_variable *var, 78 ir_variable *write_var, unsigned write_mask); 79 ir_call *ssbo_store(void *mem_ctx, ir_rvalue *deref, ir_rvalue *offset, 80 unsigned write_mask); 81 82 enum { 83 ubo_load_access, 84 ssbo_load_access, 85 ssbo_store_access, 86 ssbo_unsized_array_length_access, 87 ssbo_atomic_access, 88 } buffer_access_type; 89 90 void insert_buffer_access(void *mem_ctx, ir_dereference *deref, 91 const glsl_type *type, ir_rvalue *offset, 92 unsigned mask, int channel); 93 94 ir_visitor_status visit_enter(class ir_expression *); 95 ir_expression *calculate_ssbo_unsized_array_length(ir_expression *expr); 96 void check_ssbo_unsized_array_length_expression(class ir_expression *); 97 void check_ssbo_unsized_array_length_assignment(ir_assignment *ir); 98 99 ir_expression *process_ssbo_unsized_array_length(ir_rvalue **, 100 ir_dereference *, 101 ir_variable *); 102 ir_expression *emit_ssbo_get_buffer_size(void *mem_ctx); 103 104 unsigned calculate_unsized_array_stride(ir_dereference *deref, 105 enum glsl_interface_packing packing); 106 107 ir_call *lower_ssbo_atomic_intrinsic(ir_call *ir); 108 ir_call *check_for_ssbo_atomic_intrinsic(ir_call *ir); 109 ir_visitor_status visit_enter(ir_call *ir); 110 ir_visitor_status visit_enter(ir_texture *ir); 111 112 struct gl_linked_shader *shader; 113 bool clamp_block_indices; 114 const struct glsl_struct_field *struct_field; 115 ir_variable *variable; 116 ir_rvalue *uniform_block; 117 bool progress; 118}; 119 120/** 121 * Determine the name of the interface block field 122 * 123 * This is the name of the specific member as it would appear in the 124 * \c gl_uniform_buffer_variable::Name field in the shader's 125 * \c UniformBlocks array. 126 */ 127static const char * 128interface_field_name(void *mem_ctx, char *base_name, ir_rvalue *d, 129 ir_rvalue **nonconst_block_index) 130{ 131 *nonconst_block_index = NULL; 132 char *name_copy = NULL; 133 size_t base_length = 0; 134 135 /* Loop back through the IR until we find the uniform block */ 136 ir_rvalue *ir = d; 137 while (ir != NULL) { 138 switch (ir->ir_type) { 139 case ir_type_dereference_variable: { 140 /* Exit loop */ 141 ir = NULL; 142 break; 143 } 144 145 case ir_type_dereference_record: { 146 ir_dereference_record *r = (ir_dereference_record *) ir; 147 ir = r->record->as_dereference(); 148 149 /* If we got here it means any previous array subscripts belong to 150 * block members and not the block itself so skip over them in the 151 * next pass. 152 */ 153 d = ir; 154 break; 155 } 156 157 case ir_type_dereference_array: { 158 ir_dereference_array *a = (ir_dereference_array *) ir; 159 ir = a->array->as_dereference(); 160 break; 161 } 162 163 case ir_type_swizzle: { 164 ir_swizzle *s = (ir_swizzle *) ir; 165 ir = s->val->as_dereference(); 166 /* Skip swizzle in the next pass */ 167 d = ir; 168 break; 169 } 170 171 default: 172 assert(!"Should not get here."); 173 break; 174 } 175 } 176 177 while (d != NULL) { 178 switch (d->ir_type) { 179 case ir_type_dereference_variable: { 180 ir_dereference_variable *v = (ir_dereference_variable *) d; 181 if (name_copy != NULL && 182 v->var->is_interface_instance() && 183 v->var->type->is_array()) { 184 return name_copy; 185 } else { 186 *nonconst_block_index = NULL; 187 return base_name; 188 } 189 190 break; 191 } 192 193 case ir_type_dereference_array: { 194 ir_dereference_array *a = (ir_dereference_array *) d; 195 size_t new_length; 196 197 if (name_copy == NULL) { 198 name_copy = ralloc_strdup(mem_ctx, base_name); 199 base_length = strlen(name_copy); 200 } 201 202 /* For arrays of arrays we start at the innermost array and work our 203 * way out so we need to insert the subscript at the base of the 204 * name string rather than just attaching it to the end. 205 */ 206 new_length = base_length; 207 ir_constant *const_index = a->array_index->as_constant(); 208 char *end = ralloc_strdup(NULL, &name_copy[new_length]); 209 if (!const_index) { 210 ir_rvalue *array_index = a->array_index; 211 if (array_index->type != glsl_type::uint_type) 212 array_index = i2u(array_index); 213 214 if (a->array->type->is_array() && 215 a->array->type->fields.array->is_array()) { 216 ir_constant *base_size = new(mem_ctx) 217 ir_constant(a->array->type->fields.array->arrays_of_arrays_size()); 218 array_index = mul(array_index, base_size); 219 } 220 221 if (*nonconst_block_index) { 222 *nonconst_block_index = add(*nonconst_block_index, array_index); 223 } else { 224 *nonconst_block_index = array_index; 225 } 226 227 ralloc_asprintf_rewrite_tail(&name_copy, &new_length, "[0]%s", 228 end); 229 } else { 230 ralloc_asprintf_rewrite_tail(&name_copy, &new_length, "[%d]%s", 231 const_index->get_uint_component(0), 232 end); 233 } 234 ralloc_free(end); 235 236 d = a->array->as_dereference(); 237 238 break; 239 } 240 241 default: 242 assert(!"Should not get here."); 243 break; 244 } 245 } 246 247 assert(!"Should not get here."); 248 return NULL; 249} 250 251static ir_rvalue * 252clamp_to_array_bounds(void *mem_ctx, ir_rvalue *index, const glsl_type *type) 253{ 254 assert(type->is_array()); 255 256 const unsigned array_size = type->arrays_of_arrays_size(); 257 258 ir_constant *max_index = new(mem_ctx) ir_constant(array_size - 1); 259 max_index->type = index->type; 260 261 ir_constant *zero = new(mem_ctx) ir_constant(0); 262 zero->type = index->type; 263 264 if (index->type->base_type == GLSL_TYPE_INT) 265 index = max2(index, zero); 266 index = min2(index, max_index); 267 268 return index; 269} 270 271void 272lower_ubo_reference_visitor::setup_for_load_or_store(void *mem_ctx, 273 ir_variable *var, 274 ir_rvalue *deref, 275 ir_rvalue **offset, 276 unsigned *const_offset, 277 bool *row_major, 278 const glsl_type **matrix_type, 279 enum glsl_interface_packing packing) 280{ 281 /* Determine the name of the interface block */ 282 ir_rvalue *nonconst_block_index; 283 const char *const field_name = 284 interface_field_name(mem_ctx, (char *) var->get_interface_type()->name, 285 deref, &nonconst_block_index); 286 287 if (nonconst_block_index && clamp_block_indices) { 288 nonconst_block_index = 289 clamp_to_array_bounds(mem_ctx, nonconst_block_index, var->type); 290 } 291 292 /* Locate the block by interface name */ 293 unsigned num_blocks; 294 struct gl_uniform_block **blocks; 295 if (this->buffer_access_type != ubo_load_access) { 296 num_blocks = shader->Program->info.num_ssbos; 297 blocks = shader->Program->sh.ShaderStorageBlocks; 298 } else { 299 num_blocks = shader->Program->info.num_ubos; 300 blocks = shader->Program->sh.UniformBlocks; 301 } 302 this->uniform_block = NULL; 303 for (unsigned i = 0; i < num_blocks; i++) { 304 if (strcmp(field_name, blocks[i]->Name) == 0) { 305 306 ir_constant *index = new(mem_ctx) ir_constant(i); 307 308 if (nonconst_block_index) { 309 this->uniform_block = add(nonconst_block_index, index); 310 } else { 311 this->uniform_block = index; 312 } 313 314 if (var->is_interface_instance()) { 315 *const_offset = 0; 316 } else { 317 *const_offset = blocks[i]->Uniforms[var->data.location].Offset; 318 } 319 320 break; 321 } 322 } 323 324 assert(this->uniform_block); 325 326 this->struct_field = NULL; 327 setup_buffer_access(mem_ctx, deref, offset, const_offset, row_major, 328 matrix_type, &this->struct_field, packing); 329} 330 331void 332lower_ubo_reference_visitor::handle_rvalue(ir_rvalue **rvalue) 333{ 334 if (!*rvalue) 335 return; 336 337 ir_dereference *deref = (*rvalue)->as_dereference(); 338 if (!deref) 339 return; 340 341 ir_variable *var = deref->variable_referenced(); 342 if (!var || !var->is_in_buffer_block()) 343 return; 344 345 void *mem_ctx = ralloc_parent(shader->ir); 346 347 ir_rvalue *offset = NULL; 348 unsigned const_offset; 349 bool row_major; 350 const glsl_type *matrix_type; 351 352 enum glsl_interface_packing packing = 353 var->get_interface_type()-> 354 get_internal_ifc_packing(use_std430_as_default); 355 356 this->buffer_access_type = 357 var->is_in_shader_storage_block() ? 358 ssbo_load_access : ubo_load_access; 359 this->variable = var; 360 361 /* Compute the offset to the start if the dereference as well as other 362 * information we need to configure the write 363 */ 364 setup_for_load_or_store(mem_ctx, var, deref, 365 &offset, &const_offset, 366 &row_major, &matrix_type, 367 packing); 368 assert(offset); 369 370 /* Now that we've calculated the offset to the start of the 371 * dereference, walk over the type and emit loads into a temporary. 372 */ 373 const glsl_type *type = (*rvalue)->type; 374 ir_variable *load_var = new(mem_ctx) ir_variable(type, 375 "ubo_load_temp", 376 ir_var_temporary); 377 base_ir->insert_before(load_var); 378 379 ir_variable *load_offset = new(mem_ctx) ir_variable(glsl_type::uint_type, 380 "ubo_load_temp_offset", 381 ir_var_temporary); 382 base_ir->insert_before(load_offset); 383 base_ir->insert_before(assign(load_offset, offset)); 384 385 deref = new(mem_ctx) ir_dereference_variable(load_var); 386 emit_access(mem_ctx, false, deref, load_offset, const_offset, 387 row_major, matrix_type, packing, 0); 388 *rvalue = deref; 389 390 progress = true; 391} 392 393ir_expression * 394lower_ubo_reference_visitor::ubo_load(void *mem_ctx, 395 const glsl_type *type, 396 ir_rvalue *offset) 397{ 398 ir_rvalue *block_ref = this->uniform_block->clone(mem_ctx, NULL); 399 return new(mem_ctx) 400 ir_expression(ir_binop_ubo_load, 401 type, 402 block_ref, 403 offset); 404 405} 406 407static bool 408shader_storage_buffer_object(const _mesa_glsl_parse_state *state) 409{ 410 return state->has_shader_storage_buffer_objects(); 411} 412 413uint32_t 414lower_ubo_reference_visitor::ssbo_access_params() 415{ 416 assert(variable); 417 418 if (variable->is_interface_instance()) { 419 assert(struct_field); 420 421 return ((struct_field->memory_coherent ? ACCESS_COHERENT : 0) | 422 (struct_field->memory_restrict ? ACCESS_RESTRICT : 0) | 423 (struct_field->memory_volatile ? ACCESS_VOLATILE : 0)); 424 } else { 425 return ((variable->data.memory_coherent ? ACCESS_COHERENT : 0) | 426 (variable->data.memory_restrict ? ACCESS_RESTRICT : 0) | 427 (variable->data.memory_volatile ? ACCESS_VOLATILE : 0)); 428 } 429} 430 431ir_call * 432lower_ubo_reference_visitor::ssbo_store(void *mem_ctx, 433 ir_rvalue *deref, 434 ir_rvalue *offset, 435 unsigned write_mask) 436{ 437 exec_list sig_params; 438 439 ir_variable *block_ref = new(mem_ctx) 440 ir_variable(glsl_type::uint_type, "block_ref" , ir_var_function_in); 441 sig_params.push_tail(block_ref); 442 443 ir_variable *offset_ref = new(mem_ctx) 444 ir_variable(glsl_type::uint_type, "offset" , ir_var_function_in); 445 sig_params.push_tail(offset_ref); 446 447 ir_variable *val_ref = new(mem_ctx) 448 ir_variable(deref->type, "value" , ir_var_function_in); 449 sig_params.push_tail(val_ref); 450 451 ir_variable *writemask_ref = new(mem_ctx) 452 ir_variable(glsl_type::uint_type, "write_mask" , ir_var_function_in); 453 sig_params.push_tail(writemask_ref); 454 455 ir_variable *access_ref = new(mem_ctx) 456 ir_variable(glsl_type::uint_type, "access" , ir_var_function_in); 457 sig_params.push_tail(access_ref); 458 459 ir_function_signature *sig = new(mem_ctx) 460 ir_function_signature(glsl_type::void_type, shader_storage_buffer_object); 461 assert(sig); 462 sig->replace_parameters(&sig_params); 463 sig->intrinsic_id = ir_intrinsic_ssbo_store; 464 465 ir_function *f = new(mem_ctx) ir_function("__intrinsic_store_ssbo"); 466 f->add_signature(sig); 467 468 exec_list call_params; 469 call_params.push_tail(this->uniform_block->clone(mem_ctx, NULL)); 470 call_params.push_tail(offset->clone(mem_ctx, NULL)); 471 call_params.push_tail(deref->clone(mem_ctx, NULL)); 472 call_params.push_tail(new(mem_ctx) ir_constant(write_mask)); 473 call_params.push_tail(new(mem_ctx) ir_constant(ssbo_access_params())); 474 return new(mem_ctx) ir_call(sig, NULL, &call_params); 475} 476 477ir_call * 478lower_ubo_reference_visitor::ssbo_load(void *mem_ctx, 479 const struct glsl_type *type, 480 ir_rvalue *offset) 481{ 482 exec_list sig_params; 483 484 ir_variable *block_ref = new(mem_ctx) 485 ir_variable(glsl_type::uint_type, "block_ref" , ir_var_function_in); 486 sig_params.push_tail(block_ref); 487 488 ir_variable *offset_ref = new(mem_ctx) 489 ir_variable(glsl_type::uint_type, "offset_ref" , ir_var_function_in); 490 sig_params.push_tail(offset_ref); 491 492 ir_variable *access_ref = new(mem_ctx) 493 ir_variable(glsl_type::uint_type, "access" , ir_var_function_in); 494 sig_params.push_tail(access_ref); 495 496 ir_function_signature *sig = 497 new(mem_ctx) ir_function_signature(type, shader_storage_buffer_object); 498 assert(sig); 499 sig->replace_parameters(&sig_params); 500 sig->intrinsic_id = ir_intrinsic_ssbo_load; 501 502 ir_function *f = new(mem_ctx) ir_function("__intrinsic_load_ssbo"); 503 f->add_signature(sig); 504 505 ir_variable *result = new(mem_ctx) 506 ir_variable(type, "ssbo_load_result", ir_var_temporary); 507 base_ir->insert_before(result); 508 ir_dereference_variable *deref_result = new(mem_ctx) 509 ir_dereference_variable(result); 510 511 exec_list call_params; 512 call_params.push_tail(this->uniform_block->clone(mem_ctx, NULL)); 513 call_params.push_tail(offset->clone(mem_ctx, NULL)); 514 call_params.push_tail(new(mem_ctx) ir_constant(ssbo_access_params())); 515 516 return new(mem_ctx) ir_call(sig, deref_result, &call_params); 517} 518 519void 520lower_ubo_reference_visitor::insert_buffer_access(void *mem_ctx, 521 ir_dereference *deref, 522 const glsl_type *type, 523 ir_rvalue *offset, 524 unsigned mask, 525 int channel) 526{ 527 switch (this->buffer_access_type) { 528 case ubo_load_access: 529 base_ir->insert_before(assign(deref->clone(mem_ctx, NULL), 530 ubo_load(mem_ctx, type, offset), 531 mask)); 532 break; 533 case ssbo_load_access: { 534 ir_call *load_ssbo = ssbo_load(mem_ctx, type, offset); 535 base_ir->insert_before(load_ssbo); 536 ir_rvalue *value = load_ssbo->return_deref->as_rvalue()->clone(mem_ctx, NULL); 537 ir_assignment *assignment = 538 assign(deref->clone(mem_ctx, NULL), value, mask); 539 base_ir->insert_before(assignment); 540 break; 541 } 542 case ssbo_store_access: 543 if (channel >= 0) { 544 base_ir->insert_after(ssbo_store(mem_ctx, 545 swizzle(deref, channel, 1), 546 offset, 1)); 547 } else { 548 base_ir->insert_after(ssbo_store(mem_ctx, deref, offset, mask)); 549 } 550 break; 551 default: 552 unreachable("invalid buffer_access_type in insert_buffer_access"); 553 } 554} 555 556void 557lower_ubo_reference_visitor::write_to_memory(void *mem_ctx, 558 ir_dereference *deref, 559 ir_variable *var, 560 ir_variable *write_var, 561 unsigned write_mask) 562{ 563 ir_rvalue *offset = NULL; 564 unsigned const_offset; 565 bool row_major; 566 const glsl_type *matrix_type; 567 568 enum glsl_interface_packing packing = 569 var->get_interface_type()-> 570 get_internal_ifc_packing(use_std430_as_default); 571 572 this->buffer_access_type = ssbo_store_access; 573 this->variable = var; 574 575 /* Compute the offset to the start if the dereference as well as other 576 * information we need to configure the write 577 */ 578 setup_for_load_or_store(mem_ctx, var, deref, 579 &offset, &const_offset, 580 &row_major, &matrix_type, 581 packing); 582 assert(offset); 583 584 /* Now emit writes from the temporary to memory */ 585 ir_variable *write_offset = 586 new(mem_ctx) ir_variable(glsl_type::uint_type, 587 "ssbo_store_temp_offset", 588 ir_var_temporary); 589 590 base_ir->insert_before(write_offset); 591 base_ir->insert_before(assign(write_offset, offset)); 592 593 deref = new(mem_ctx) ir_dereference_variable(write_var); 594 emit_access(mem_ctx, true, deref, write_offset, const_offset, 595 row_major, matrix_type, packing, write_mask); 596} 597 598ir_visitor_status 599lower_ubo_reference_visitor::visit_enter(ir_expression *ir) 600{ 601 check_ssbo_unsized_array_length_expression(ir); 602 return rvalue_visit(ir); 603} 604 605ir_expression * 606lower_ubo_reference_visitor::calculate_ssbo_unsized_array_length(ir_expression *expr) 607{ 608 if (expr->operation != 609 ir_expression_operation(ir_unop_ssbo_unsized_array_length)) 610 return NULL; 611 612 ir_rvalue *rvalue = expr->operands[0]->as_rvalue(); 613 if (!rvalue || 614 !rvalue->type->is_array() || !rvalue->type->is_unsized_array()) 615 return NULL; 616 617 ir_dereference *deref = expr->operands[0]->as_dereference(); 618 if (!deref) 619 return NULL; 620 621 ir_variable *var = expr->operands[0]->variable_referenced(); 622 if (!var || !var->is_in_shader_storage_block()) 623 return NULL; 624 return process_ssbo_unsized_array_length(&rvalue, deref, var); 625} 626 627void 628lower_ubo_reference_visitor::check_ssbo_unsized_array_length_expression(ir_expression *ir) 629{ 630 if (ir->operation == 631 ir_expression_operation(ir_unop_ssbo_unsized_array_length)) { 632 /* Don't replace this unop if it is found alone. It is going to be 633 * removed by the optimization passes or replaced if it is part of 634 * an ir_assignment or another ir_expression. 635 */ 636 return; 637 } 638 639 for (unsigned i = 0; i < ir->num_operands; i++) { 640 if (ir->operands[i]->ir_type != ir_type_expression) 641 continue; 642 ir_expression *expr = (ir_expression *) ir->operands[i]; 643 ir_expression *temp = calculate_ssbo_unsized_array_length(expr); 644 if (!temp) 645 continue; 646 647 delete expr; 648 ir->operands[i] = temp; 649 } 650} 651 652void 653lower_ubo_reference_visitor::check_ssbo_unsized_array_length_assignment(ir_assignment *ir) 654{ 655 if (!ir->rhs || ir->rhs->ir_type != ir_type_expression) 656 return; 657 658 ir_expression *expr = (ir_expression *) ir->rhs; 659 ir_expression *temp = calculate_ssbo_unsized_array_length(expr); 660 if (!temp) 661 return; 662 663 delete expr; 664 ir->rhs = temp; 665 return; 666} 667 668ir_expression * 669lower_ubo_reference_visitor::emit_ssbo_get_buffer_size(void *mem_ctx) 670{ 671 ir_rvalue *block_ref = this->uniform_block->clone(mem_ctx, NULL); 672 return new(mem_ctx) ir_expression(ir_unop_get_buffer_size, 673 glsl_type::int_type, 674 block_ref); 675} 676 677unsigned 678lower_ubo_reference_visitor::calculate_unsized_array_stride(ir_dereference *deref, 679 enum glsl_interface_packing packing) 680{ 681 unsigned array_stride = 0; 682 683 switch (deref->ir_type) { 684 case ir_type_dereference_variable: 685 { 686 ir_dereference_variable *deref_var = (ir_dereference_variable *)deref; 687 const struct glsl_type *unsized_array_type = NULL; 688 /* An unsized array can be sized by other lowering passes, so pick 689 * the first field of the array which has the data type of the unsized 690 * array. 691 */ 692 unsized_array_type = deref_var->var->type->fields.array; 693 694 /* Whether or not the field is row-major (because it might be a 695 * bvec2 or something) does not affect the array itself. We need 696 * to know whether an array element in its entirety is row-major. 697 */ 698 const bool array_row_major = 699 is_dereferenced_thing_row_major(deref_var); 700 701 if (packing == GLSL_INTERFACE_PACKING_STD430) { 702 array_stride = unsized_array_type->std430_array_stride(array_row_major); 703 } else { 704 array_stride = unsized_array_type->std140_size(array_row_major); 705 array_stride = glsl_align(array_stride, 16); 706 } 707 break; 708 } 709 case ir_type_dereference_record: 710 { 711 ir_dereference_record *deref_record = (ir_dereference_record *) deref; 712 ir_dereference *interface_deref = 713 deref_record->record->as_dereference(); 714 assert(interface_deref != NULL); 715 const struct glsl_type *interface_type = interface_deref->type; 716 unsigned record_length = interface_type->length; 717 /* Unsized array is always the last element of the interface */ 718 const struct glsl_type *unsized_array_type = 719 interface_type->fields.structure[record_length - 1].type->fields.array; 720 721 const bool array_row_major = 722 is_dereferenced_thing_row_major(deref_record); 723 724 if (packing == GLSL_INTERFACE_PACKING_STD430) { 725 array_stride = unsized_array_type->std430_array_stride(array_row_major); 726 } else { 727 array_stride = unsized_array_type->std140_size(array_row_major); 728 array_stride = glsl_align(array_stride, 16); 729 } 730 break; 731 } 732 default: 733 unreachable("Unsupported dereference type"); 734 } 735 return array_stride; 736} 737 738ir_expression * 739lower_ubo_reference_visitor::process_ssbo_unsized_array_length(ir_rvalue **rvalue, 740 ir_dereference *deref, 741 ir_variable *var) 742{ 743 void *mem_ctx = ralloc_parent(*rvalue); 744 745 ir_rvalue *base_offset = NULL; 746 unsigned const_offset; 747 bool row_major; 748 const glsl_type *matrix_type; 749 750 enum glsl_interface_packing packing = 751 var->get_interface_type()-> 752 get_internal_ifc_packing(use_std430_as_default); 753 int unsized_array_stride = 754 calculate_unsized_array_stride(deref, packing); 755 756 this->buffer_access_type = ssbo_unsized_array_length_access; 757 this->variable = var; 758 759 /* Compute the offset to the start if the dereference as well as other 760 * information we need to calculate the length. 761 */ 762 setup_for_load_or_store(mem_ctx, var, deref, 763 &base_offset, &const_offset, 764 &row_major, &matrix_type, 765 packing); 766 /* array.length() = 767 * max((buffer_object_size - offset_of_array) / stride_of_array, 0) 768 */ 769 ir_expression *buffer_size = emit_ssbo_get_buffer_size(mem_ctx); 770 771 ir_expression *offset_of_array = new(mem_ctx) 772 ir_expression(ir_binop_add, base_offset, 773 new(mem_ctx) ir_constant(const_offset)); 774 ir_expression *offset_of_array_int = new(mem_ctx) 775 ir_expression(ir_unop_u2i, offset_of_array); 776 777 ir_expression *sub = new(mem_ctx) 778 ir_expression(ir_binop_sub, buffer_size, offset_of_array_int); 779 ir_expression *div = new(mem_ctx) 780 ir_expression(ir_binop_div, sub, 781 new(mem_ctx) ir_constant(unsized_array_stride)); 782 ir_expression *max = new(mem_ctx) 783 ir_expression(ir_binop_max, div, new(mem_ctx) ir_constant(0)); 784 785 return max; 786} 787 788void 789lower_ubo_reference_visitor::check_for_ssbo_store(ir_assignment *ir) 790{ 791 if (!ir || !ir->lhs) 792 return; 793 794 ir_rvalue *rvalue = ir->lhs->as_rvalue(); 795 if (!rvalue) 796 return; 797 798 ir_dereference *deref = ir->lhs->as_dereference(); 799 if (!deref) 800 return; 801 802 ir_variable *var = ir->lhs->variable_referenced(); 803 if (!var || !var->is_in_shader_storage_block()) 804 return; 805 806 /* We have a write to a buffer variable, so declare a temporary and rewrite 807 * the assignment so that the temporary is the LHS. 808 */ 809 void *mem_ctx = ralloc_parent(shader->ir); 810 811 const glsl_type *type = rvalue->type; 812 ir_variable *write_var = new(mem_ctx) ir_variable(type, 813 "ssbo_store_temp", 814 ir_var_temporary); 815 base_ir->insert_before(write_var); 816 ir->lhs = new(mem_ctx) ir_dereference_variable(write_var); 817 818 /* Now we have to write the value assigned to the temporary back to memory */ 819 write_to_memory(mem_ctx, deref, var, write_var, ir->write_mask); 820 progress = true; 821} 822 823static bool 824is_buffer_backed_variable(ir_variable *var) 825{ 826 return var->is_in_buffer_block() || 827 var->data.mode == ir_var_shader_shared; 828} 829 830bool 831lower_ubo_reference_visitor::check_for_buffer_array_copy(ir_assignment *ir) 832{ 833 if (!ir || !ir->lhs || !ir->rhs) 834 return false; 835 836 /* LHS and RHS must be arrays 837 * FIXME: arrays of arrays? 838 */ 839 if (!ir->lhs->type->is_array() || !ir->rhs->type->is_array()) 840 return false; 841 842 /* RHS must be a buffer-backed variable. This is what can cause the problem 843 * since it would lead to a series of loads that need to live until we 844 * see the writes to the LHS. 845 */ 846 ir_variable *rhs_var = ir->rhs->variable_referenced(); 847 if (!rhs_var || !is_buffer_backed_variable(rhs_var)) 848 return false; 849 850 /* Split the array copy into individual element copies to reduce 851 * register pressure 852 */ 853 ir_dereference *rhs_deref = ir->rhs->as_dereference(); 854 if (!rhs_deref) 855 return false; 856 857 ir_dereference *lhs_deref = ir->lhs->as_dereference(); 858 if (!lhs_deref) 859 return false; 860 861 assert(lhs_deref->type->length == rhs_deref->type->length); 862 void *mem_ctx = ralloc_parent(shader->ir); 863 864 for (unsigned i = 0; i < lhs_deref->type->length; i++) { 865 ir_dereference *lhs_i = 866 new(mem_ctx) ir_dereference_array(lhs_deref->clone(mem_ctx, NULL), 867 new(mem_ctx) ir_constant(i)); 868 869 ir_dereference *rhs_i = 870 new(mem_ctx) ir_dereference_array(rhs_deref->clone(mem_ctx, NULL), 871 new(mem_ctx) ir_constant(i)); 872 ir->insert_after(assign(lhs_i, rhs_i)); 873 } 874 875 ir->remove(); 876 progress = true; 877 return true; 878} 879 880bool 881lower_ubo_reference_visitor::check_for_buffer_struct_copy(ir_assignment *ir) 882{ 883 if (!ir || !ir->lhs || !ir->rhs) 884 return false; 885 886 /* LHS and RHS must be records */ 887 if (!ir->lhs->type->is_struct() || !ir->rhs->type->is_struct()) 888 return false; 889 890 /* RHS must be a buffer-backed variable. This is what can cause the problem 891 * since it would lead to a series of loads that need to live until we 892 * see the writes to the LHS. 893 */ 894 ir_variable *rhs_var = ir->rhs->variable_referenced(); 895 if (!rhs_var || !is_buffer_backed_variable(rhs_var)) 896 return false; 897 898 /* Split the struct copy into individual element copies to reduce 899 * register pressure 900 */ 901 ir_dereference *rhs_deref = ir->rhs->as_dereference(); 902 if (!rhs_deref) 903 return false; 904 905 ir_dereference *lhs_deref = ir->lhs->as_dereference(); 906 if (!lhs_deref) 907 return false; 908 909 assert(lhs_deref->type == rhs_deref->type); 910 void *mem_ctx = ralloc_parent(shader->ir); 911 912 for (unsigned i = 0; i < lhs_deref->type->length; i++) { 913 const char *field_name = lhs_deref->type->fields.structure[i].name; 914 ir_dereference *lhs_field = 915 new(mem_ctx) ir_dereference_record(lhs_deref->clone(mem_ctx, NULL), 916 field_name); 917 ir_dereference *rhs_field = 918 new(mem_ctx) ir_dereference_record(rhs_deref->clone(mem_ctx, NULL), 919 field_name); 920 ir->insert_after(assign(lhs_field, rhs_field)); 921 } 922 923 ir->remove(); 924 progress = true; 925 return true; 926} 927 928ir_visitor_status 929lower_ubo_reference_visitor::visit_enter(ir_assignment *ir) 930{ 931 /* Array and struct copies could involve large amounts of load/store 932 * operations. To improve register pressure we want to special-case 933 * these and split them into individual element copies. 934 * This way we avoid emitting all the loads for the RHS first and 935 * all the writes for the LHS second and register usage is more 936 * efficient. 937 */ 938 if (check_for_buffer_array_copy(ir)) 939 return visit_continue_with_parent; 940 941 if (check_for_buffer_struct_copy(ir)) 942 return visit_continue_with_parent; 943 944 check_ssbo_unsized_array_length_assignment(ir); 945 check_for_ssbo_store(ir); 946 return rvalue_visit(ir); 947} 948 949/* Lowers the intrinsic call to a new internal intrinsic that swaps the 950 * access to the buffer variable in the first parameter by an offset 951 * and block index. This involves creating the new internal intrinsic 952 * (i.e. the new function signature). 953 */ 954ir_call * 955lower_ubo_reference_visitor::lower_ssbo_atomic_intrinsic(ir_call *ir) 956{ 957 /* SSBO atomics usually have 2 parameters, the buffer variable and an 958 * integer argument. The exception is CompSwap, that has an additional 959 * integer parameter. 960 */ 961 int param_count = ir->actual_parameters.length(); 962 assert(param_count == 2 || param_count == 3); 963 964 /* First argument must be a scalar integer buffer variable */ 965 exec_node *param = ir->actual_parameters.get_head(); 966 ir_instruction *inst = (ir_instruction *) param; 967 assert(inst->ir_type == ir_type_dereference_variable || 968 inst->ir_type == ir_type_dereference_array || 969 inst->ir_type == ir_type_dereference_record || 970 inst->ir_type == ir_type_swizzle); 971 972 ir_rvalue *deref = (ir_rvalue *) inst; 973 assert(deref->type->is_scalar() && 974 (deref->type->is_integer() || deref->type->is_float())); 975 976 ir_variable *var = deref->variable_referenced(); 977 assert(var); 978 979 /* Compute the offset to the start if the dereference and the 980 * block index 981 */ 982 void *mem_ctx = ralloc_parent(shader->ir); 983 984 ir_rvalue *offset = NULL; 985 unsigned const_offset; 986 bool row_major; 987 const glsl_type *matrix_type; 988 989 enum glsl_interface_packing packing = 990 var->get_interface_type()-> 991 get_internal_ifc_packing(use_std430_as_default); 992 993 this->buffer_access_type = ssbo_atomic_access; 994 this->variable = var; 995 996 setup_for_load_or_store(mem_ctx, var, deref, 997 &offset, &const_offset, 998 &row_major, &matrix_type, 999 packing); 1000 assert(offset); 1001 assert(!row_major); 1002 assert(matrix_type == NULL); 1003 1004 ir_rvalue *deref_offset = 1005 add(offset, new(mem_ctx) ir_constant(const_offset)); 1006 ir_rvalue *block_index = this->uniform_block->clone(mem_ctx, NULL); 1007 1008 /* Create the new internal function signature that will take a block 1009 * index and offset instead of a buffer variable 1010 */ 1011 exec_list sig_params; 1012 ir_variable *sig_param = new(mem_ctx) 1013 ir_variable(glsl_type::uint_type, "block_ref" , ir_var_function_in); 1014 sig_params.push_tail(sig_param); 1015 1016 sig_param = new(mem_ctx) 1017 ir_variable(glsl_type::uint_type, "offset" , ir_var_function_in); 1018 sig_params.push_tail(sig_param); 1019 1020 const glsl_type *type = deref->type->get_scalar_type(); 1021 sig_param = new(mem_ctx) 1022 ir_variable(type, "data1", ir_var_function_in); 1023 sig_params.push_tail(sig_param); 1024 1025 if (param_count == 3) { 1026 sig_param = new(mem_ctx) 1027 ir_variable(type, "data2", ir_var_function_in); 1028 sig_params.push_tail(sig_param); 1029 } 1030 1031 ir_function_signature *sig = 1032 new(mem_ctx) ir_function_signature(deref->type, 1033 shader_storage_buffer_object); 1034 assert(sig); 1035 sig->replace_parameters(&sig_params); 1036 1037 assert(ir->callee->intrinsic_id >= ir_intrinsic_generic_load); 1038 assert(ir->callee->intrinsic_id <= ir_intrinsic_generic_atomic_comp_swap); 1039 sig->intrinsic_id = MAP_INTRINSIC_TO_TYPE(ir->callee->intrinsic_id, ssbo); 1040 1041 char func_name[64]; 1042 sprintf(func_name, "%s_ssbo", ir->callee_name()); 1043 ir_function *f = new(mem_ctx) ir_function(func_name); 1044 f->add_signature(sig); 1045 1046 /* Now, create the call to the internal intrinsic */ 1047 exec_list call_params; 1048 call_params.push_tail(block_index); 1049 call_params.push_tail(deref_offset); 1050 param = ir->actual_parameters.get_head()->get_next(); 1051 ir_rvalue *param_as_rvalue = ((ir_instruction *) param)->as_rvalue(); 1052 call_params.push_tail(param_as_rvalue->clone(mem_ctx, NULL)); 1053 if (param_count == 3) { 1054 param = param->get_next(); 1055 param_as_rvalue = ((ir_instruction *) param)->as_rvalue(); 1056 call_params.push_tail(param_as_rvalue->clone(mem_ctx, NULL)); 1057 } 1058 ir_dereference_variable *return_deref = 1059 ir->return_deref->clone(mem_ctx, NULL); 1060 return new(mem_ctx) ir_call(sig, return_deref, &call_params); 1061} 1062 1063ir_call * 1064lower_ubo_reference_visitor::check_for_ssbo_atomic_intrinsic(ir_call *ir) 1065{ 1066 exec_list& params = ir->actual_parameters; 1067 1068 if (params.length() < 2 || params.length() > 3) 1069 return ir; 1070 1071 ir_rvalue *rvalue = 1072 ((ir_instruction *) params.get_head())->as_rvalue(); 1073 if (!rvalue) 1074 return ir; 1075 1076 ir_variable *var = rvalue->variable_referenced(); 1077 if (!var || !var->is_in_shader_storage_block()) 1078 return ir; 1079 1080 const enum ir_intrinsic_id id = ir->callee->intrinsic_id; 1081 if (id == ir_intrinsic_generic_atomic_add || 1082 id == ir_intrinsic_generic_atomic_min || 1083 id == ir_intrinsic_generic_atomic_max || 1084 id == ir_intrinsic_generic_atomic_and || 1085 id == ir_intrinsic_generic_atomic_or || 1086 id == ir_intrinsic_generic_atomic_xor || 1087 id == ir_intrinsic_generic_atomic_exchange || 1088 id == ir_intrinsic_generic_atomic_comp_swap) { 1089 return lower_ssbo_atomic_intrinsic(ir); 1090 } 1091 1092 return ir; 1093} 1094 1095 1096ir_visitor_status 1097lower_ubo_reference_visitor::visit_enter(ir_call *ir) 1098{ 1099 ir_call *new_ir = check_for_ssbo_atomic_intrinsic(ir); 1100 if (new_ir != ir) { 1101 progress = true; 1102 base_ir->replace_with(new_ir); 1103 return visit_continue_with_parent; 1104 } 1105 1106 return rvalue_visit(ir); 1107} 1108 1109 1110ir_visitor_status 1111lower_ubo_reference_visitor::visit_enter(ir_texture *ir) 1112{ 1113 ir_dereference *sampler = ir->sampler; 1114 1115 if (sampler->ir_type == ir_type_dereference_record) { 1116 handle_rvalue((ir_rvalue **)&ir->sampler); 1117 return visit_continue_with_parent; 1118 } 1119 1120 return rvalue_visit(ir); 1121} 1122 1123 1124} /* unnamed namespace */ 1125 1126void 1127lower_ubo_reference(struct gl_linked_shader *shader, 1128 bool clamp_block_indices, bool use_std430_as_default) 1129{ 1130 lower_ubo_reference_visitor v(shader, clamp_block_indices, 1131 use_std430_as_default); 1132 1133 /* Loop over the instructions lowering references, because we take 1134 * a deref of a UBO array using a UBO dereference as the index will 1135 * produce a collection of instructions all of which have cloned 1136 * UBO dereferences for that array index. 1137 */ 1138 do { 1139 v.progress = false; 1140 visit_list_elements(&v, shader->ir); 1141 } while (v.progress); 1142} 1143