1b8e80941Smrg/* 2b8e80941Smrg * Copyright © 2012 Intel Corporation 3b8e80941Smrg * 4b8e80941Smrg * Permission is hereby granted, free of charge, to any person obtaining a 5b8e80941Smrg * copy of this software and associated documentation files (the "Software"), 6b8e80941Smrg * to deal in the Software without restriction, including without limitation 7b8e80941Smrg * the rights to use, copy, modify, merge, publish, distribute, sublicense, 8b8e80941Smrg * and/or sell copies of the Software, and to permit persons to whom the 9b8e80941Smrg * Software is furnished to do so, subject to the following conditions: 10b8e80941Smrg * 11b8e80941Smrg * The above copyright notice and this permission notice (including the next 12b8e80941Smrg * paragraph) shall be included in all copies or substantial portions of the 13b8e80941Smrg * Software. 14b8e80941Smrg * 15b8e80941Smrg * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR 16b8e80941Smrg * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, 17b8e80941Smrg * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL 18b8e80941Smrg * THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER 19b8e80941Smrg * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING 20b8e80941Smrg * FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER 21b8e80941Smrg * DEALINGS IN THE SOFTWARE. 22b8e80941Smrg */ 23b8e80941Smrg 24b8e80941Smrg/** 25b8e80941Smrg * \file lower_ubo_reference.cpp 26b8e80941Smrg * 27b8e80941Smrg * IR lower pass to replace dereferences of variables in a uniform 28b8e80941Smrg * buffer object with usage of ir_binop_ubo_load expressions, each of 29b8e80941Smrg * which can read data up to the size of a vec4. 30b8e80941Smrg * 31b8e80941Smrg * This relieves drivers of the responsibility to deal with tricky UBO 32b8e80941Smrg * layout issues like std140 structures and row_major matrices on 33b8e80941Smrg * their own. 34b8e80941Smrg */ 35b8e80941Smrg 36b8e80941Smrg#include "lower_buffer_access.h" 37b8e80941Smrg#include "ir_builder.h" 38b8e80941Smrg#include "main/macros.h" 39b8e80941Smrg#include "glsl_parser_extras.h" 40b8e80941Smrg#include "main/mtypes.h" 41b8e80941Smrg 42b8e80941Smrgusing namespace ir_builder; 43b8e80941Smrg 44b8e80941Smrgnamespace { 45b8e80941Smrgclass lower_ubo_reference_visitor : 46b8e80941Smrg public lower_buffer_access::lower_buffer_access { 47b8e80941Smrgpublic: 48b8e80941Smrg lower_ubo_reference_visitor(struct gl_linked_shader *shader, 49b8e80941Smrg bool clamp_block_indices, 50b8e80941Smrg bool use_std430_as_default) 51b8e80941Smrg : shader(shader), clamp_block_indices(clamp_block_indices), 52b8e80941Smrg struct_field(NULL), variable(NULL) 53b8e80941Smrg { 54b8e80941Smrg this->use_std430_as_default = use_std430_as_default; 55b8e80941Smrg } 56b8e80941Smrg 57b8e80941Smrg void handle_rvalue(ir_rvalue **rvalue); 58b8e80941Smrg ir_visitor_status visit_enter(ir_assignment *ir); 59b8e80941Smrg 60b8e80941Smrg void setup_for_load_or_store(void *mem_ctx, 61b8e80941Smrg ir_variable *var, 62b8e80941Smrg ir_rvalue *deref, 63b8e80941Smrg ir_rvalue **offset, 64b8e80941Smrg unsigned *const_offset, 65b8e80941Smrg bool *row_major, 66b8e80941Smrg const glsl_type **matrix_type, 67b8e80941Smrg enum glsl_interface_packing packing); 68b8e80941Smrg uint32_t ssbo_access_params(); 69b8e80941Smrg ir_expression *ubo_load(void *mem_ctx, const struct glsl_type *type, 70b8e80941Smrg ir_rvalue *offset); 71b8e80941Smrg ir_call *ssbo_load(void *mem_ctx, const struct glsl_type *type, 72b8e80941Smrg ir_rvalue *offset); 73b8e80941Smrg 74b8e80941Smrg bool check_for_buffer_array_copy(ir_assignment *ir); 75b8e80941Smrg bool check_for_buffer_struct_copy(ir_assignment *ir); 76b8e80941Smrg void check_for_ssbo_store(ir_assignment *ir); 77b8e80941Smrg void write_to_memory(void *mem_ctx, ir_dereference *deref, ir_variable *var, 78b8e80941Smrg ir_variable *write_var, unsigned write_mask); 79b8e80941Smrg ir_call *ssbo_store(void *mem_ctx, ir_rvalue *deref, ir_rvalue *offset, 80b8e80941Smrg unsigned write_mask); 81b8e80941Smrg 82b8e80941Smrg enum { 83b8e80941Smrg ubo_load_access, 84b8e80941Smrg ssbo_load_access, 85b8e80941Smrg ssbo_store_access, 86b8e80941Smrg ssbo_unsized_array_length_access, 87b8e80941Smrg ssbo_atomic_access, 88b8e80941Smrg } buffer_access_type; 89b8e80941Smrg 90b8e80941Smrg void insert_buffer_access(void *mem_ctx, ir_dereference *deref, 91b8e80941Smrg const glsl_type *type, ir_rvalue *offset, 92b8e80941Smrg unsigned mask, int channel); 93b8e80941Smrg 94b8e80941Smrg ir_visitor_status visit_enter(class ir_expression *); 95b8e80941Smrg ir_expression *calculate_ssbo_unsized_array_length(ir_expression *expr); 96b8e80941Smrg void check_ssbo_unsized_array_length_expression(class ir_expression *); 97b8e80941Smrg void check_ssbo_unsized_array_length_assignment(ir_assignment *ir); 98b8e80941Smrg 99b8e80941Smrg ir_expression *process_ssbo_unsized_array_length(ir_rvalue **, 100b8e80941Smrg ir_dereference *, 101b8e80941Smrg ir_variable *); 102b8e80941Smrg ir_expression *emit_ssbo_get_buffer_size(void *mem_ctx); 103b8e80941Smrg 104b8e80941Smrg unsigned calculate_unsized_array_stride(ir_dereference *deref, 105b8e80941Smrg enum glsl_interface_packing packing); 106b8e80941Smrg 107b8e80941Smrg ir_call *lower_ssbo_atomic_intrinsic(ir_call *ir); 108b8e80941Smrg ir_call *check_for_ssbo_atomic_intrinsic(ir_call *ir); 109b8e80941Smrg ir_visitor_status visit_enter(ir_call *ir); 110b8e80941Smrg ir_visitor_status visit_enter(ir_texture *ir); 111b8e80941Smrg 112b8e80941Smrg struct gl_linked_shader *shader; 113b8e80941Smrg bool clamp_block_indices; 114b8e80941Smrg const struct glsl_struct_field *struct_field; 115b8e80941Smrg ir_variable *variable; 116b8e80941Smrg ir_rvalue *uniform_block; 117b8e80941Smrg bool progress; 118b8e80941Smrg}; 119b8e80941Smrg 120b8e80941Smrg/** 121b8e80941Smrg * Determine the name of the interface block field 122b8e80941Smrg * 123b8e80941Smrg * This is the name of the specific member as it would appear in the 124b8e80941Smrg * \c gl_uniform_buffer_variable::Name field in the shader's 125b8e80941Smrg * \c UniformBlocks array. 126b8e80941Smrg */ 127b8e80941Smrgstatic const char * 128b8e80941Smrginterface_field_name(void *mem_ctx, char *base_name, ir_rvalue *d, 129b8e80941Smrg ir_rvalue **nonconst_block_index) 130b8e80941Smrg{ 131b8e80941Smrg *nonconst_block_index = NULL; 132b8e80941Smrg char *name_copy = NULL; 133b8e80941Smrg size_t base_length = 0; 134b8e80941Smrg 135b8e80941Smrg /* Loop back through the IR until we find the uniform block */ 136b8e80941Smrg ir_rvalue *ir = d; 137b8e80941Smrg while (ir != NULL) { 138b8e80941Smrg switch (ir->ir_type) { 139b8e80941Smrg case ir_type_dereference_variable: { 140b8e80941Smrg /* Exit loop */ 141b8e80941Smrg ir = NULL; 142b8e80941Smrg break; 143b8e80941Smrg } 144b8e80941Smrg 145b8e80941Smrg case ir_type_dereference_record: { 146b8e80941Smrg ir_dereference_record *r = (ir_dereference_record *) ir; 147b8e80941Smrg ir = r->record->as_dereference(); 148b8e80941Smrg 149b8e80941Smrg /* If we got here it means any previous array subscripts belong to 150b8e80941Smrg * block members and not the block itself so skip over them in the 151b8e80941Smrg * next pass. 152b8e80941Smrg */ 153b8e80941Smrg d = ir; 154b8e80941Smrg break; 155b8e80941Smrg } 156b8e80941Smrg 157b8e80941Smrg case ir_type_dereference_array: { 158b8e80941Smrg ir_dereference_array *a = (ir_dereference_array *) ir; 159b8e80941Smrg ir = a->array->as_dereference(); 160b8e80941Smrg break; 161b8e80941Smrg } 162b8e80941Smrg 163b8e80941Smrg case ir_type_swizzle: { 164b8e80941Smrg ir_swizzle *s = (ir_swizzle *) ir; 165b8e80941Smrg ir = s->val->as_dereference(); 166b8e80941Smrg /* Skip swizzle in the next pass */ 167b8e80941Smrg d = ir; 168b8e80941Smrg break; 169b8e80941Smrg } 170b8e80941Smrg 171b8e80941Smrg default: 172b8e80941Smrg assert(!"Should not get here."); 173b8e80941Smrg break; 174b8e80941Smrg } 175b8e80941Smrg } 176b8e80941Smrg 177b8e80941Smrg while (d != NULL) { 178b8e80941Smrg switch (d->ir_type) { 179b8e80941Smrg case ir_type_dereference_variable: { 180b8e80941Smrg ir_dereference_variable *v = (ir_dereference_variable *) d; 181b8e80941Smrg if (name_copy != NULL && 182b8e80941Smrg v->var->is_interface_instance() && 183b8e80941Smrg v->var->type->is_array()) { 184b8e80941Smrg return name_copy; 185b8e80941Smrg } else { 186b8e80941Smrg *nonconst_block_index = NULL; 187b8e80941Smrg return base_name; 188b8e80941Smrg } 189b8e80941Smrg 190b8e80941Smrg break; 191b8e80941Smrg } 192b8e80941Smrg 193b8e80941Smrg case ir_type_dereference_array: { 194b8e80941Smrg ir_dereference_array *a = (ir_dereference_array *) d; 195b8e80941Smrg size_t new_length; 196b8e80941Smrg 197b8e80941Smrg if (name_copy == NULL) { 198b8e80941Smrg name_copy = ralloc_strdup(mem_ctx, base_name); 199b8e80941Smrg base_length = strlen(name_copy); 200b8e80941Smrg } 201b8e80941Smrg 202b8e80941Smrg /* For arrays of arrays we start at the innermost array and work our 203b8e80941Smrg * way out so we need to insert the subscript at the base of the 204b8e80941Smrg * name string rather than just attaching it to the end. 205b8e80941Smrg */ 206b8e80941Smrg new_length = base_length; 207b8e80941Smrg ir_constant *const_index = a->array_index->as_constant(); 208b8e80941Smrg char *end = ralloc_strdup(NULL, &name_copy[new_length]); 209b8e80941Smrg if (!const_index) { 210b8e80941Smrg ir_rvalue *array_index = a->array_index; 211b8e80941Smrg if (array_index->type != glsl_type::uint_type) 212b8e80941Smrg array_index = i2u(array_index); 213b8e80941Smrg 214b8e80941Smrg if (a->array->type->is_array() && 215b8e80941Smrg a->array->type->fields.array->is_array()) { 216b8e80941Smrg ir_constant *base_size = new(mem_ctx) 217b8e80941Smrg ir_constant(a->array->type->fields.array->arrays_of_arrays_size()); 218b8e80941Smrg array_index = mul(array_index, base_size); 219b8e80941Smrg } 220b8e80941Smrg 221b8e80941Smrg if (*nonconst_block_index) { 222b8e80941Smrg *nonconst_block_index = add(*nonconst_block_index, array_index); 223b8e80941Smrg } else { 224b8e80941Smrg *nonconst_block_index = array_index; 225b8e80941Smrg } 226b8e80941Smrg 227b8e80941Smrg ralloc_asprintf_rewrite_tail(&name_copy, &new_length, "[0]%s", 228b8e80941Smrg end); 229b8e80941Smrg } else { 230b8e80941Smrg ralloc_asprintf_rewrite_tail(&name_copy, &new_length, "[%d]%s", 231b8e80941Smrg const_index->get_uint_component(0), 232b8e80941Smrg end); 233b8e80941Smrg } 234b8e80941Smrg ralloc_free(end); 235b8e80941Smrg 236b8e80941Smrg d = a->array->as_dereference(); 237b8e80941Smrg 238b8e80941Smrg break; 239b8e80941Smrg } 240b8e80941Smrg 241b8e80941Smrg default: 242b8e80941Smrg assert(!"Should not get here."); 243b8e80941Smrg break; 244b8e80941Smrg } 245b8e80941Smrg } 246b8e80941Smrg 247b8e80941Smrg assert(!"Should not get here."); 248b8e80941Smrg return NULL; 249b8e80941Smrg} 250b8e80941Smrg 251b8e80941Smrgstatic ir_rvalue * 252b8e80941Smrgclamp_to_array_bounds(void *mem_ctx, ir_rvalue *index, const glsl_type *type) 253b8e80941Smrg{ 254b8e80941Smrg assert(type->is_array()); 255b8e80941Smrg 256b8e80941Smrg const unsigned array_size = type->arrays_of_arrays_size(); 257b8e80941Smrg 258b8e80941Smrg ir_constant *max_index = new(mem_ctx) ir_constant(array_size - 1); 259b8e80941Smrg max_index->type = index->type; 260b8e80941Smrg 261b8e80941Smrg ir_constant *zero = new(mem_ctx) ir_constant(0); 262b8e80941Smrg zero->type = index->type; 263b8e80941Smrg 264b8e80941Smrg if (index->type->base_type == GLSL_TYPE_INT) 265b8e80941Smrg index = max2(index, zero); 266b8e80941Smrg index = min2(index, max_index); 267b8e80941Smrg 268b8e80941Smrg return index; 269b8e80941Smrg} 270b8e80941Smrg 271b8e80941Smrgvoid 272b8e80941Smrglower_ubo_reference_visitor::setup_for_load_or_store(void *mem_ctx, 273b8e80941Smrg ir_variable *var, 274b8e80941Smrg ir_rvalue *deref, 275b8e80941Smrg ir_rvalue **offset, 276b8e80941Smrg unsigned *const_offset, 277b8e80941Smrg bool *row_major, 278b8e80941Smrg const glsl_type **matrix_type, 279b8e80941Smrg enum glsl_interface_packing packing) 280b8e80941Smrg{ 281b8e80941Smrg /* Determine the name of the interface block */ 282b8e80941Smrg ir_rvalue *nonconst_block_index; 283b8e80941Smrg const char *const field_name = 284b8e80941Smrg interface_field_name(mem_ctx, (char *) var->get_interface_type()->name, 285b8e80941Smrg deref, &nonconst_block_index); 286b8e80941Smrg 287b8e80941Smrg if (nonconst_block_index && clamp_block_indices) { 288b8e80941Smrg nonconst_block_index = 289b8e80941Smrg clamp_to_array_bounds(mem_ctx, nonconst_block_index, var->type); 290b8e80941Smrg } 291b8e80941Smrg 292b8e80941Smrg /* Locate the block by interface name */ 293b8e80941Smrg unsigned num_blocks; 294b8e80941Smrg struct gl_uniform_block **blocks; 295b8e80941Smrg if (this->buffer_access_type != ubo_load_access) { 296b8e80941Smrg num_blocks = shader->Program->info.num_ssbos; 297b8e80941Smrg blocks = shader->Program->sh.ShaderStorageBlocks; 298b8e80941Smrg } else { 299b8e80941Smrg num_blocks = shader->Program->info.num_ubos; 300b8e80941Smrg blocks = shader->Program->sh.UniformBlocks; 301b8e80941Smrg } 302b8e80941Smrg this->uniform_block = NULL; 303b8e80941Smrg for (unsigned i = 0; i < num_blocks; i++) { 304b8e80941Smrg if (strcmp(field_name, blocks[i]->Name) == 0) { 305b8e80941Smrg 306b8e80941Smrg ir_constant *index = new(mem_ctx) ir_constant(i); 307b8e80941Smrg 308b8e80941Smrg if (nonconst_block_index) { 309b8e80941Smrg this->uniform_block = add(nonconst_block_index, index); 310b8e80941Smrg } else { 311b8e80941Smrg this->uniform_block = index; 312b8e80941Smrg } 313b8e80941Smrg 314b8e80941Smrg if (var->is_interface_instance()) { 315b8e80941Smrg *const_offset = 0; 316b8e80941Smrg } else { 317b8e80941Smrg *const_offset = blocks[i]->Uniforms[var->data.location].Offset; 318b8e80941Smrg } 319b8e80941Smrg 320b8e80941Smrg break; 321b8e80941Smrg } 322b8e80941Smrg } 323b8e80941Smrg 324b8e80941Smrg assert(this->uniform_block); 325b8e80941Smrg 326b8e80941Smrg this->struct_field = NULL; 327b8e80941Smrg setup_buffer_access(mem_ctx, deref, offset, const_offset, row_major, 328b8e80941Smrg matrix_type, &this->struct_field, packing); 329b8e80941Smrg} 330b8e80941Smrg 331b8e80941Smrgvoid 332b8e80941Smrglower_ubo_reference_visitor::handle_rvalue(ir_rvalue **rvalue) 333b8e80941Smrg{ 334b8e80941Smrg if (!*rvalue) 335b8e80941Smrg return; 336b8e80941Smrg 337b8e80941Smrg ir_dereference *deref = (*rvalue)->as_dereference(); 338b8e80941Smrg if (!deref) 339b8e80941Smrg return; 340b8e80941Smrg 341b8e80941Smrg ir_variable *var = deref->variable_referenced(); 342b8e80941Smrg if (!var || !var->is_in_buffer_block()) 343b8e80941Smrg return; 344b8e80941Smrg 345b8e80941Smrg void *mem_ctx = ralloc_parent(shader->ir); 346b8e80941Smrg 347b8e80941Smrg ir_rvalue *offset = NULL; 348b8e80941Smrg unsigned const_offset; 349b8e80941Smrg bool row_major; 350b8e80941Smrg const glsl_type *matrix_type; 351b8e80941Smrg 352b8e80941Smrg enum glsl_interface_packing packing = 353b8e80941Smrg var->get_interface_type()-> 354b8e80941Smrg get_internal_ifc_packing(use_std430_as_default); 355b8e80941Smrg 356b8e80941Smrg this->buffer_access_type = 357b8e80941Smrg var->is_in_shader_storage_block() ? 358b8e80941Smrg ssbo_load_access : ubo_load_access; 359b8e80941Smrg this->variable = var; 360b8e80941Smrg 361b8e80941Smrg /* Compute the offset to the start if the dereference as well as other 362b8e80941Smrg * information we need to configure the write 363b8e80941Smrg */ 364b8e80941Smrg setup_for_load_or_store(mem_ctx, var, deref, 365b8e80941Smrg &offset, &const_offset, 366b8e80941Smrg &row_major, &matrix_type, 367b8e80941Smrg packing); 368b8e80941Smrg assert(offset); 369b8e80941Smrg 370b8e80941Smrg /* Now that we've calculated the offset to the start of the 371b8e80941Smrg * dereference, walk over the type and emit loads into a temporary. 372b8e80941Smrg */ 373b8e80941Smrg const glsl_type *type = (*rvalue)->type; 374b8e80941Smrg ir_variable *load_var = new(mem_ctx) ir_variable(type, 375b8e80941Smrg "ubo_load_temp", 376b8e80941Smrg ir_var_temporary); 377b8e80941Smrg base_ir->insert_before(load_var); 378b8e80941Smrg 379b8e80941Smrg ir_variable *load_offset = new(mem_ctx) ir_variable(glsl_type::uint_type, 380b8e80941Smrg "ubo_load_temp_offset", 381b8e80941Smrg ir_var_temporary); 382b8e80941Smrg base_ir->insert_before(load_offset); 383b8e80941Smrg base_ir->insert_before(assign(load_offset, offset)); 384b8e80941Smrg 385b8e80941Smrg deref = new(mem_ctx) ir_dereference_variable(load_var); 386b8e80941Smrg emit_access(mem_ctx, false, deref, load_offset, const_offset, 387b8e80941Smrg row_major, matrix_type, packing, 0); 388b8e80941Smrg *rvalue = deref; 389b8e80941Smrg 390b8e80941Smrg progress = true; 391b8e80941Smrg} 392b8e80941Smrg 393b8e80941Smrgir_expression * 394b8e80941Smrglower_ubo_reference_visitor::ubo_load(void *mem_ctx, 395b8e80941Smrg const glsl_type *type, 396b8e80941Smrg ir_rvalue *offset) 397b8e80941Smrg{ 398b8e80941Smrg ir_rvalue *block_ref = this->uniform_block->clone(mem_ctx, NULL); 399b8e80941Smrg return new(mem_ctx) 400b8e80941Smrg ir_expression(ir_binop_ubo_load, 401b8e80941Smrg type, 402b8e80941Smrg block_ref, 403b8e80941Smrg offset); 404b8e80941Smrg 405b8e80941Smrg} 406b8e80941Smrg 407b8e80941Smrgstatic bool 408b8e80941Smrgshader_storage_buffer_object(const _mesa_glsl_parse_state *state) 409b8e80941Smrg{ 410b8e80941Smrg return state->has_shader_storage_buffer_objects(); 411b8e80941Smrg} 412b8e80941Smrg 413b8e80941Smrguint32_t 414b8e80941Smrglower_ubo_reference_visitor::ssbo_access_params() 415b8e80941Smrg{ 416b8e80941Smrg assert(variable); 417b8e80941Smrg 418b8e80941Smrg if (variable->is_interface_instance()) { 419b8e80941Smrg assert(struct_field); 420b8e80941Smrg 421b8e80941Smrg return ((struct_field->memory_coherent ? ACCESS_COHERENT : 0) | 422b8e80941Smrg (struct_field->memory_restrict ? ACCESS_RESTRICT : 0) | 423b8e80941Smrg (struct_field->memory_volatile ? ACCESS_VOLATILE : 0)); 424b8e80941Smrg } else { 425b8e80941Smrg return ((variable->data.memory_coherent ? ACCESS_COHERENT : 0) | 426b8e80941Smrg (variable->data.memory_restrict ? ACCESS_RESTRICT : 0) | 427b8e80941Smrg (variable->data.memory_volatile ? ACCESS_VOLATILE : 0)); 428b8e80941Smrg } 429b8e80941Smrg} 430b8e80941Smrg 431b8e80941Smrgir_call * 432b8e80941Smrglower_ubo_reference_visitor::ssbo_store(void *mem_ctx, 433b8e80941Smrg ir_rvalue *deref, 434b8e80941Smrg ir_rvalue *offset, 435b8e80941Smrg unsigned write_mask) 436b8e80941Smrg{ 437b8e80941Smrg exec_list sig_params; 438b8e80941Smrg 439b8e80941Smrg ir_variable *block_ref = new(mem_ctx) 440b8e80941Smrg ir_variable(glsl_type::uint_type, "block_ref" , ir_var_function_in); 441b8e80941Smrg sig_params.push_tail(block_ref); 442b8e80941Smrg 443b8e80941Smrg ir_variable *offset_ref = new(mem_ctx) 444b8e80941Smrg ir_variable(glsl_type::uint_type, "offset" , ir_var_function_in); 445b8e80941Smrg sig_params.push_tail(offset_ref); 446b8e80941Smrg 447b8e80941Smrg ir_variable *val_ref = new(mem_ctx) 448b8e80941Smrg ir_variable(deref->type, "value" , ir_var_function_in); 449b8e80941Smrg sig_params.push_tail(val_ref); 450b8e80941Smrg 451b8e80941Smrg ir_variable *writemask_ref = new(mem_ctx) 452b8e80941Smrg ir_variable(glsl_type::uint_type, "write_mask" , ir_var_function_in); 453b8e80941Smrg sig_params.push_tail(writemask_ref); 454b8e80941Smrg 455b8e80941Smrg ir_variable *access_ref = new(mem_ctx) 456b8e80941Smrg ir_variable(glsl_type::uint_type, "access" , ir_var_function_in); 457b8e80941Smrg sig_params.push_tail(access_ref); 458b8e80941Smrg 459b8e80941Smrg ir_function_signature *sig = new(mem_ctx) 460b8e80941Smrg ir_function_signature(glsl_type::void_type, shader_storage_buffer_object); 461b8e80941Smrg assert(sig); 462b8e80941Smrg sig->replace_parameters(&sig_params); 463b8e80941Smrg sig->intrinsic_id = ir_intrinsic_ssbo_store; 464b8e80941Smrg 465b8e80941Smrg ir_function *f = new(mem_ctx) ir_function("__intrinsic_store_ssbo"); 466b8e80941Smrg f->add_signature(sig); 467b8e80941Smrg 468b8e80941Smrg exec_list call_params; 469b8e80941Smrg call_params.push_tail(this->uniform_block->clone(mem_ctx, NULL)); 470b8e80941Smrg call_params.push_tail(offset->clone(mem_ctx, NULL)); 471b8e80941Smrg call_params.push_tail(deref->clone(mem_ctx, NULL)); 472b8e80941Smrg call_params.push_tail(new(mem_ctx) ir_constant(write_mask)); 473b8e80941Smrg call_params.push_tail(new(mem_ctx) ir_constant(ssbo_access_params())); 474b8e80941Smrg return new(mem_ctx) ir_call(sig, NULL, &call_params); 475b8e80941Smrg} 476b8e80941Smrg 477b8e80941Smrgir_call * 478b8e80941Smrglower_ubo_reference_visitor::ssbo_load(void *mem_ctx, 479b8e80941Smrg const struct glsl_type *type, 480b8e80941Smrg ir_rvalue *offset) 481b8e80941Smrg{ 482b8e80941Smrg exec_list sig_params; 483b8e80941Smrg 484b8e80941Smrg ir_variable *block_ref = new(mem_ctx) 485b8e80941Smrg ir_variable(glsl_type::uint_type, "block_ref" , ir_var_function_in); 486b8e80941Smrg sig_params.push_tail(block_ref); 487b8e80941Smrg 488b8e80941Smrg ir_variable *offset_ref = new(mem_ctx) 489b8e80941Smrg ir_variable(glsl_type::uint_type, "offset_ref" , ir_var_function_in); 490b8e80941Smrg sig_params.push_tail(offset_ref); 491b8e80941Smrg 492b8e80941Smrg ir_variable *access_ref = new(mem_ctx) 493b8e80941Smrg ir_variable(glsl_type::uint_type, "access" , ir_var_function_in); 494b8e80941Smrg sig_params.push_tail(access_ref); 495b8e80941Smrg 496b8e80941Smrg ir_function_signature *sig = 497b8e80941Smrg new(mem_ctx) ir_function_signature(type, shader_storage_buffer_object); 498b8e80941Smrg assert(sig); 499b8e80941Smrg sig->replace_parameters(&sig_params); 500b8e80941Smrg sig->intrinsic_id = ir_intrinsic_ssbo_load; 501b8e80941Smrg 502b8e80941Smrg ir_function *f = new(mem_ctx) ir_function("__intrinsic_load_ssbo"); 503b8e80941Smrg f->add_signature(sig); 504b8e80941Smrg 505b8e80941Smrg ir_variable *result = new(mem_ctx) 506b8e80941Smrg ir_variable(type, "ssbo_load_result", ir_var_temporary); 507b8e80941Smrg base_ir->insert_before(result); 508b8e80941Smrg ir_dereference_variable *deref_result = new(mem_ctx) 509b8e80941Smrg ir_dereference_variable(result); 510b8e80941Smrg 511b8e80941Smrg exec_list call_params; 512b8e80941Smrg call_params.push_tail(this->uniform_block->clone(mem_ctx, NULL)); 513b8e80941Smrg call_params.push_tail(offset->clone(mem_ctx, NULL)); 514b8e80941Smrg call_params.push_tail(new(mem_ctx) ir_constant(ssbo_access_params())); 515b8e80941Smrg 516b8e80941Smrg return new(mem_ctx) ir_call(sig, deref_result, &call_params); 517b8e80941Smrg} 518b8e80941Smrg 519b8e80941Smrgvoid 520b8e80941Smrglower_ubo_reference_visitor::insert_buffer_access(void *mem_ctx, 521b8e80941Smrg ir_dereference *deref, 522b8e80941Smrg const glsl_type *type, 523b8e80941Smrg ir_rvalue *offset, 524b8e80941Smrg unsigned mask, 525b8e80941Smrg int channel) 526b8e80941Smrg{ 527b8e80941Smrg switch (this->buffer_access_type) { 528b8e80941Smrg case ubo_load_access: 529b8e80941Smrg base_ir->insert_before(assign(deref->clone(mem_ctx, NULL), 530b8e80941Smrg ubo_load(mem_ctx, type, offset), 531b8e80941Smrg mask)); 532b8e80941Smrg break; 533b8e80941Smrg case ssbo_load_access: { 534b8e80941Smrg ir_call *load_ssbo = ssbo_load(mem_ctx, type, offset); 535b8e80941Smrg base_ir->insert_before(load_ssbo); 536b8e80941Smrg ir_rvalue *value = load_ssbo->return_deref->as_rvalue()->clone(mem_ctx, NULL); 537b8e80941Smrg ir_assignment *assignment = 538b8e80941Smrg assign(deref->clone(mem_ctx, NULL), value, mask); 539b8e80941Smrg base_ir->insert_before(assignment); 540b8e80941Smrg break; 541b8e80941Smrg } 542b8e80941Smrg case ssbo_store_access: 543b8e80941Smrg if (channel >= 0) { 544b8e80941Smrg base_ir->insert_after(ssbo_store(mem_ctx, 545b8e80941Smrg swizzle(deref, channel, 1), 546b8e80941Smrg offset, 1)); 547b8e80941Smrg } else { 548b8e80941Smrg base_ir->insert_after(ssbo_store(mem_ctx, deref, offset, mask)); 549b8e80941Smrg } 550b8e80941Smrg break; 551b8e80941Smrg default: 552b8e80941Smrg unreachable("invalid buffer_access_type in insert_buffer_access"); 553b8e80941Smrg } 554b8e80941Smrg} 555b8e80941Smrg 556b8e80941Smrgvoid 557b8e80941Smrglower_ubo_reference_visitor::write_to_memory(void *mem_ctx, 558b8e80941Smrg ir_dereference *deref, 559b8e80941Smrg ir_variable *var, 560b8e80941Smrg ir_variable *write_var, 561b8e80941Smrg unsigned write_mask) 562b8e80941Smrg{ 563b8e80941Smrg ir_rvalue *offset = NULL; 564b8e80941Smrg unsigned const_offset; 565b8e80941Smrg bool row_major; 566b8e80941Smrg const glsl_type *matrix_type; 567b8e80941Smrg 568b8e80941Smrg enum glsl_interface_packing packing = 569b8e80941Smrg var->get_interface_type()-> 570b8e80941Smrg get_internal_ifc_packing(use_std430_as_default); 571b8e80941Smrg 572b8e80941Smrg this->buffer_access_type = ssbo_store_access; 573b8e80941Smrg this->variable = var; 574b8e80941Smrg 575b8e80941Smrg /* Compute the offset to the start if the dereference as well as other 576b8e80941Smrg * information we need to configure the write 577b8e80941Smrg */ 578b8e80941Smrg setup_for_load_or_store(mem_ctx, var, deref, 579b8e80941Smrg &offset, &const_offset, 580b8e80941Smrg &row_major, &matrix_type, 581b8e80941Smrg packing); 582b8e80941Smrg assert(offset); 583b8e80941Smrg 584b8e80941Smrg /* Now emit writes from the temporary to memory */ 585b8e80941Smrg ir_variable *write_offset = 586b8e80941Smrg new(mem_ctx) ir_variable(glsl_type::uint_type, 587b8e80941Smrg "ssbo_store_temp_offset", 588b8e80941Smrg ir_var_temporary); 589b8e80941Smrg 590b8e80941Smrg base_ir->insert_before(write_offset); 591b8e80941Smrg base_ir->insert_before(assign(write_offset, offset)); 592b8e80941Smrg 593b8e80941Smrg deref = new(mem_ctx) ir_dereference_variable(write_var); 594b8e80941Smrg emit_access(mem_ctx, true, deref, write_offset, const_offset, 595b8e80941Smrg row_major, matrix_type, packing, write_mask); 596b8e80941Smrg} 597b8e80941Smrg 598b8e80941Smrgir_visitor_status 599b8e80941Smrglower_ubo_reference_visitor::visit_enter(ir_expression *ir) 600b8e80941Smrg{ 601b8e80941Smrg check_ssbo_unsized_array_length_expression(ir); 602b8e80941Smrg return rvalue_visit(ir); 603b8e80941Smrg} 604b8e80941Smrg 605b8e80941Smrgir_expression * 606b8e80941Smrglower_ubo_reference_visitor::calculate_ssbo_unsized_array_length(ir_expression *expr) 607b8e80941Smrg{ 608b8e80941Smrg if (expr->operation != 609b8e80941Smrg ir_expression_operation(ir_unop_ssbo_unsized_array_length)) 610b8e80941Smrg return NULL; 611b8e80941Smrg 612b8e80941Smrg ir_rvalue *rvalue = expr->operands[0]->as_rvalue(); 613b8e80941Smrg if (!rvalue || 614b8e80941Smrg !rvalue->type->is_array() || !rvalue->type->is_unsized_array()) 615b8e80941Smrg return NULL; 616b8e80941Smrg 617b8e80941Smrg ir_dereference *deref = expr->operands[0]->as_dereference(); 618b8e80941Smrg if (!deref) 619b8e80941Smrg return NULL; 620b8e80941Smrg 621b8e80941Smrg ir_variable *var = expr->operands[0]->variable_referenced(); 622b8e80941Smrg if (!var || !var->is_in_shader_storage_block()) 623b8e80941Smrg return NULL; 624b8e80941Smrg return process_ssbo_unsized_array_length(&rvalue, deref, var); 625b8e80941Smrg} 626b8e80941Smrg 627b8e80941Smrgvoid 628b8e80941Smrglower_ubo_reference_visitor::check_ssbo_unsized_array_length_expression(ir_expression *ir) 629b8e80941Smrg{ 630b8e80941Smrg if (ir->operation == 631b8e80941Smrg ir_expression_operation(ir_unop_ssbo_unsized_array_length)) { 632b8e80941Smrg /* Don't replace this unop if it is found alone. It is going to be 633b8e80941Smrg * removed by the optimization passes or replaced if it is part of 634b8e80941Smrg * an ir_assignment or another ir_expression. 635b8e80941Smrg */ 636b8e80941Smrg return; 637b8e80941Smrg } 638b8e80941Smrg 639b8e80941Smrg for (unsigned i = 0; i < ir->num_operands; i++) { 640b8e80941Smrg if (ir->operands[i]->ir_type != ir_type_expression) 641b8e80941Smrg continue; 642b8e80941Smrg ir_expression *expr = (ir_expression *) ir->operands[i]; 643b8e80941Smrg ir_expression *temp = calculate_ssbo_unsized_array_length(expr); 644b8e80941Smrg if (!temp) 645b8e80941Smrg continue; 646b8e80941Smrg 647b8e80941Smrg delete expr; 648b8e80941Smrg ir->operands[i] = temp; 649b8e80941Smrg } 650b8e80941Smrg} 651b8e80941Smrg 652b8e80941Smrgvoid 653b8e80941Smrglower_ubo_reference_visitor::check_ssbo_unsized_array_length_assignment(ir_assignment *ir) 654b8e80941Smrg{ 655b8e80941Smrg if (!ir->rhs || ir->rhs->ir_type != ir_type_expression) 656b8e80941Smrg return; 657b8e80941Smrg 658b8e80941Smrg ir_expression *expr = (ir_expression *) ir->rhs; 659b8e80941Smrg ir_expression *temp = calculate_ssbo_unsized_array_length(expr); 660b8e80941Smrg if (!temp) 661b8e80941Smrg return; 662b8e80941Smrg 663b8e80941Smrg delete expr; 664b8e80941Smrg ir->rhs = temp; 665b8e80941Smrg return; 666b8e80941Smrg} 667b8e80941Smrg 668b8e80941Smrgir_expression * 669b8e80941Smrglower_ubo_reference_visitor::emit_ssbo_get_buffer_size(void *mem_ctx) 670b8e80941Smrg{ 671b8e80941Smrg ir_rvalue *block_ref = this->uniform_block->clone(mem_ctx, NULL); 672b8e80941Smrg return new(mem_ctx) ir_expression(ir_unop_get_buffer_size, 673b8e80941Smrg glsl_type::int_type, 674b8e80941Smrg block_ref); 675b8e80941Smrg} 676b8e80941Smrg 677b8e80941Smrgunsigned 678b8e80941Smrglower_ubo_reference_visitor::calculate_unsized_array_stride(ir_dereference *deref, 679b8e80941Smrg enum glsl_interface_packing packing) 680b8e80941Smrg{ 681b8e80941Smrg unsigned array_stride = 0; 682b8e80941Smrg 683b8e80941Smrg switch (deref->ir_type) { 684b8e80941Smrg case ir_type_dereference_variable: 685b8e80941Smrg { 686b8e80941Smrg ir_dereference_variable *deref_var = (ir_dereference_variable *)deref; 687b8e80941Smrg const struct glsl_type *unsized_array_type = NULL; 688b8e80941Smrg /* An unsized array can be sized by other lowering passes, so pick 689b8e80941Smrg * the first field of the array which has the data type of the unsized 690b8e80941Smrg * array. 691b8e80941Smrg */ 692b8e80941Smrg unsized_array_type = deref_var->var->type->fields.array; 693b8e80941Smrg 694b8e80941Smrg /* Whether or not the field is row-major (because it might be a 695b8e80941Smrg * bvec2 or something) does not affect the array itself. We need 696b8e80941Smrg * to know whether an array element in its entirety is row-major. 697b8e80941Smrg */ 698b8e80941Smrg const bool array_row_major = 699b8e80941Smrg is_dereferenced_thing_row_major(deref_var); 700b8e80941Smrg 701b8e80941Smrg if (packing == GLSL_INTERFACE_PACKING_STD430) { 702b8e80941Smrg array_stride = unsized_array_type->std430_array_stride(array_row_major); 703b8e80941Smrg } else { 704b8e80941Smrg array_stride = unsized_array_type->std140_size(array_row_major); 705b8e80941Smrg array_stride = glsl_align(array_stride, 16); 706b8e80941Smrg } 707b8e80941Smrg break; 708b8e80941Smrg } 709b8e80941Smrg case ir_type_dereference_record: 710b8e80941Smrg { 711b8e80941Smrg ir_dereference_record *deref_record = (ir_dereference_record *) deref; 712b8e80941Smrg ir_dereference *interface_deref = 713b8e80941Smrg deref_record->record->as_dereference(); 714b8e80941Smrg assert(interface_deref != NULL); 715b8e80941Smrg const struct glsl_type *interface_type = interface_deref->type; 716b8e80941Smrg unsigned record_length = interface_type->length; 717b8e80941Smrg /* Unsized array is always the last element of the interface */ 718b8e80941Smrg const struct glsl_type *unsized_array_type = 719b8e80941Smrg interface_type->fields.structure[record_length - 1].type->fields.array; 720b8e80941Smrg 721b8e80941Smrg const bool array_row_major = 722b8e80941Smrg is_dereferenced_thing_row_major(deref_record); 723b8e80941Smrg 724b8e80941Smrg if (packing == GLSL_INTERFACE_PACKING_STD430) { 725b8e80941Smrg array_stride = unsized_array_type->std430_array_stride(array_row_major); 726b8e80941Smrg } else { 727b8e80941Smrg array_stride = unsized_array_type->std140_size(array_row_major); 728b8e80941Smrg array_stride = glsl_align(array_stride, 16); 729b8e80941Smrg } 730b8e80941Smrg break; 731b8e80941Smrg } 732b8e80941Smrg default: 733b8e80941Smrg unreachable("Unsupported dereference type"); 734b8e80941Smrg } 735b8e80941Smrg return array_stride; 736b8e80941Smrg} 737b8e80941Smrg 738b8e80941Smrgir_expression * 739b8e80941Smrglower_ubo_reference_visitor::process_ssbo_unsized_array_length(ir_rvalue **rvalue, 740b8e80941Smrg ir_dereference *deref, 741b8e80941Smrg ir_variable *var) 742b8e80941Smrg{ 743b8e80941Smrg void *mem_ctx = ralloc_parent(*rvalue); 744b8e80941Smrg 745b8e80941Smrg ir_rvalue *base_offset = NULL; 746b8e80941Smrg unsigned const_offset; 747b8e80941Smrg bool row_major; 748b8e80941Smrg const glsl_type *matrix_type; 749b8e80941Smrg 750b8e80941Smrg enum glsl_interface_packing packing = 751b8e80941Smrg var->get_interface_type()-> 752b8e80941Smrg get_internal_ifc_packing(use_std430_as_default); 753b8e80941Smrg int unsized_array_stride = 754b8e80941Smrg calculate_unsized_array_stride(deref, packing); 755b8e80941Smrg 756b8e80941Smrg this->buffer_access_type = ssbo_unsized_array_length_access; 757b8e80941Smrg this->variable = var; 758b8e80941Smrg 759b8e80941Smrg /* Compute the offset to the start if the dereference as well as other 760b8e80941Smrg * information we need to calculate the length. 761b8e80941Smrg */ 762b8e80941Smrg setup_for_load_or_store(mem_ctx, var, deref, 763b8e80941Smrg &base_offset, &const_offset, 764b8e80941Smrg &row_major, &matrix_type, 765b8e80941Smrg packing); 766b8e80941Smrg /* array.length() = 767b8e80941Smrg * max((buffer_object_size - offset_of_array) / stride_of_array, 0) 768b8e80941Smrg */ 769b8e80941Smrg ir_expression *buffer_size = emit_ssbo_get_buffer_size(mem_ctx); 770b8e80941Smrg 771b8e80941Smrg ir_expression *offset_of_array = new(mem_ctx) 772b8e80941Smrg ir_expression(ir_binop_add, base_offset, 773b8e80941Smrg new(mem_ctx) ir_constant(const_offset)); 774b8e80941Smrg ir_expression *offset_of_array_int = new(mem_ctx) 775b8e80941Smrg ir_expression(ir_unop_u2i, offset_of_array); 776b8e80941Smrg 777b8e80941Smrg ir_expression *sub = new(mem_ctx) 778b8e80941Smrg ir_expression(ir_binop_sub, buffer_size, offset_of_array_int); 779b8e80941Smrg ir_expression *div = new(mem_ctx) 780b8e80941Smrg ir_expression(ir_binop_div, sub, 781b8e80941Smrg new(mem_ctx) ir_constant(unsized_array_stride)); 782b8e80941Smrg ir_expression *max = new(mem_ctx) 783b8e80941Smrg ir_expression(ir_binop_max, div, new(mem_ctx) ir_constant(0)); 784b8e80941Smrg 785b8e80941Smrg return max; 786b8e80941Smrg} 787b8e80941Smrg 788b8e80941Smrgvoid 789b8e80941Smrglower_ubo_reference_visitor::check_for_ssbo_store(ir_assignment *ir) 790b8e80941Smrg{ 791b8e80941Smrg if (!ir || !ir->lhs) 792b8e80941Smrg return; 793b8e80941Smrg 794b8e80941Smrg ir_rvalue *rvalue = ir->lhs->as_rvalue(); 795b8e80941Smrg if (!rvalue) 796b8e80941Smrg return; 797b8e80941Smrg 798b8e80941Smrg ir_dereference *deref = ir->lhs->as_dereference(); 799b8e80941Smrg if (!deref) 800b8e80941Smrg return; 801b8e80941Smrg 802b8e80941Smrg ir_variable *var = ir->lhs->variable_referenced(); 803b8e80941Smrg if (!var || !var->is_in_shader_storage_block()) 804b8e80941Smrg return; 805b8e80941Smrg 806b8e80941Smrg /* We have a write to a buffer variable, so declare a temporary and rewrite 807b8e80941Smrg * the assignment so that the temporary is the LHS. 808b8e80941Smrg */ 809b8e80941Smrg void *mem_ctx = ralloc_parent(shader->ir); 810b8e80941Smrg 811b8e80941Smrg const glsl_type *type = rvalue->type; 812b8e80941Smrg ir_variable *write_var = new(mem_ctx) ir_variable(type, 813b8e80941Smrg "ssbo_store_temp", 814b8e80941Smrg ir_var_temporary); 815b8e80941Smrg base_ir->insert_before(write_var); 816b8e80941Smrg ir->lhs = new(mem_ctx) ir_dereference_variable(write_var); 817b8e80941Smrg 818b8e80941Smrg /* Now we have to write the value assigned to the temporary back to memory */ 819b8e80941Smrg write_to_memory(mem_ctx, deref, var, write_var, ir->write_mask); 820b8e80941Smrg progress = true; 821b8e80941Smrg} 822b8e80941Smrg 823b8e80941Smrgstatic bool 824b8e80941Smrgis_buffer_backed_variable(ir_variable *var) 825b8e80941Smrg{ 826b8e80941Smrg return var->is_in_buffer_block() || 827b8e80941Smrg var->data.mode == ir_var_shader_shared; 828b8e80941Smrg} 829b8e80941Smrg 830b8e80941Smrgbool 831b8e80941Smrglower_ubo_reference_visitor::check_for_buffer_array_copy(ir_assignment *ir) 832b8e80941Smrg{ 833b8e80941Smrg if (!ir || !ir->lhs || !ir->rhs) 834b8e80941Smrg return false; 835b8e80941Smrg 836b8e80941Smrg /* LHS and RHS must be arrays 837b8e80941Smrg * FIXME: arrays of arrays? 838b8e80941Smrg */ 839b8e80941Smrg if (!ir->lhs->type->is_array() || !ir->rhs->type->is_array()) 840b8e80941Smrg return false; 841b8e80941Smrg 842b8e80941Smrg /* RHS must be a buffer-backed variable. This is what can cause the problem 843b8e80941Smrg * since it would lead to a series of loads that need to live until we 844b8e80941Smrg * see the writes to the LHS. 845b8e80941Smrg */ 846b8e80941Smrg ir_variable *rhs_var = ir->rhs->variable_referenced(); 847b8e80941Smrg if (!rhs_var || !is_buffer_backed_variable(rhs_var)) 848b8e80941Smrg return false; 849b8e80941Smrg 850b8e80941Smrg /* Split the array copy into individual element copies to reduce 851b8e80941Smrg * register pressure 852b8e80941Smrg */ 853b8e80941Smrg ir_dereference *rhs_deref = ir->rhs->as_dereference(); 854b8e80941Smrg if (!rhs_deref) 855b8e80941Smrg return false; 856b8e80941Smrg 857b8e80941Smrg ir_dereference *lhs_deref = ir->lhs->as_dereference(); 858b8e80941Smrg if (!lhs_deref) 859b8e80941Smrg return false; 860b8e80941Smrg 861b8e80941Smrg assert(lhs_deref->type->length == rhs_deref->type->length); 862b8e80941Smrg void *mem_ctx = ralloc_parent(shader->ir); 863b8e80941Smrg 864b8e80941Smrg for (unsigned i = 0; i < lhs_deref->type->length; i++) { 865b8e80941Smrg ir_dereference *lhs_i = 866b8e80941Smrg new(mem_ctx) ir_dereference_array(lhs_deref->clone(mem_ctx, NULL), 867b8e80941Smrg new(mem_ctx) ir_constant(i)); 868b8e80941Smrg 869b8e80941Smrg ir_dereference *rhs_i = 870b8e80941Smrg new(mem_ctx) ir_dereference_array(rhs_deref->clone(mem_ctx, NULL), 871b8e80941Smrg new(mem_ctx) ir_constant(i)); 872b8e80941Smrg ir->insert_after(assign(lhs_i, rhs_i)); 873b8e80941Smrg } 874b8e80941Smrg 875b8e80941Smrg ir->remove(); 876b8e80941Smrg progress = true; 877b8e80941Smrg return true; 878b8e80941Smrg} 879b8e80941Smrg 880b8e80941Smrgbool 881b8e80941Smrglower_ubo_reference_visitor::check_for_buffer_struct_copy(ir_assignment *ir) 882b8e80941Smrg{ 883b8e80941Smrg if (!ir || !ir->lhs || !ir->rhs) 884b8e80941Smrg return false; 885b8e80941Smrg 886b8e80941Smrg /* LHS and RHS must be records */ 887b8e80941Smrg if (!ir->lhs->type->is_struct() || !ir->rhs->type->is_struct()) 888b8e80941Smrg return false; 889b8e80941Smrg 890b8e80941Smrg /* RHS must be a buffer-backed variable. This is what can cause the problem 891b8e80941Smrg * since it would lead to a series of loads that need to live until we 892b8e80941Smrg * see the writes to the LHS. 893b8e80941Smrg */ 894b8e80941Smrg ir_variable *rhs_var = ir->rhs->variable_referenced(); 895b8e80941Smrg if (!rhs_var || !is_buffer_backed_variable(rhs_var)) 896b8e80941Smrg return false; 897b8e80941Smrg 898b8e80941Smrg /* Split the struct copy into individual element copies to reduce 899b8e80941Smrg * register pressure 900b8e80941Smrg */ 901b8e80941Smrg ir_dereference *rhs_deref = ir->rhs->as_dereference(); 902b8e80941Smrg if (!rhs_deref) 903b8e80941Smrg return false; 904b8e80941Smrg 905b8e80941Smrg ir_dereference *lhs_deref = ir->lhs->as_dereference(); 906b8e80941Smrg if (!lhs_deref) 907b8e80941Smrg return false; 908b8e80941Smrg 909b8e80941Smrg assert(lhs_deref->type == rhs_deref->type); 910b8e80941Smrg void *mem_ctx = ralloc_parent(shader->ir); 911b8e80941Smrg 912b8e80941Smrg for (unsigned i = 0; i < lhs_deref->type->length; i++) { 913b8e80941Smrg const char *field_name = lhs_deref->type->fields.structure[i].name; 914b8e80941Smrg ir_dereference *lhs_field = 915b8e80941Smrg new(mem_ctx) ir_dereference_record(lhs_deref->clone(mem_ctx, NULL), 916b8e80941Smrg field_name); 917b8e80941Smrg ir_dereference *rhs_field = 918b8e80941Smrg new(mem_ctx) ir_dereference_record(rhs_deref->clone(mem_ctx, NULL), 919b8e80941Smrg field_name); 920b8e80941Smrg ir->insert_after(assign(lhs_field, rhs_field)); 921b8e80941Smrg } 922b8e80941Smrg 923b8e80941Smrg ir->remove(); 924b8e80941Smrg progress = true; 925b8e80941Smrg return true; 926b8e80941Smrg} 927b8e80941Smrg 928b8e80941Smrgir_visitor_status 929b8e80941Smrglower_ubo_reference_visitor::visit_enter(ir_assignment *ir) 930b8e80941Smrg{ 931b8e80941Smrg /* Array and struct copies could involve large amounts of load/store 932b8e80941Smrg * operations. To improve register pressure we want to special-case 933b8e80941Smrg * these and split them into individual element copies. 934b8e80941Smrg * This way we avoid emitting all the loads for the RHS first and 935b8e80941Smrg * all the writes for the LHS second and register usage is more 936b8e80941Smrg * efficient. 937b8e80941Smrg */ 938b8e80941Smrg if (check_for_buffer_array_copy(ir)) 939b8e80941Smrg return visit_continue_with_parent; 940b8e80941Smrg 941b8e80941Smrg if (check_for_buffer_struct_copy(ir)) 942b8e80941Smrg return visit_continue_with_parent; 943b8e80941Smrg 944b8e80941Smrg check_ssbo_unsized_array_length_assignment(ir); 945b8e80941Smrg check_for_ssbo_store(ir); 946b8e80941Smrg return rvalue_visit(ir); 947b8e80941Smrg} 948b8e80941Smrg 949b8e80941Smrg/* Lowers the intrinsic call to a new internal intrinsic that swaps the 950b8e80941Smrg * access to the buffer variable in the first parameter by an offset 951b8e80941Smrg * and block index. This involves creating the new internal intrinsic 952b8e80941Smrg * (i.e. the new function signature). 953b8e80941Smrg */ 954b8e80941Smrgir_call * 955b8e80941Smrglower_ubo_reference_visitor::lower_ssbo_atomic_intrinsic(ir_call *ir) 956b8e80941Smrg{ 957b8e80941Smrg /* SSBO atomics usually have 2 parameters, the buffer variable and an 958b8e80941Smrg * integer argument. The exception is CompSwap, that has an additional 959b8e80941Smrg * integer parameter. 960b8e80941Smrg */ 961b8e80941Smrg int param_count = ir->actual_parameters.length(); 962b8e80941Smrg assert(param_count == 2 || param_count == 3); 963b8e80941Smrg 964b8e80941Smrg /* First argument must be a scalar integer buffer variable */ 965b8e80941Smrg exec_node *param = ir->actual_parameters.get_head(); 966b8e80941Smrg ir_instruction *inst = (ir_instruction *) param; 967b8e80941Smrg assert(inst->ir_type == ir_type_dereference_variable || 968b8e80941Smrg inst->ir_type == ir_type_dereference_array || 969b8e80941Smrg inst->ir_type == ir_type_dereference_record || 970b8e80941Smrg inst->ir_type == ir_type_swizzle); 971b8e80941Smrg 972b8e80941Smrg ir_rvalue *deref = (ir_rvalue *) inst; 973b8e80941Smrg assert(deref->type->is_scalar() && 974b8e80941Smrg (deref->type->is_integer() || deref->type->is_float())); 975b8e80941Smrg 976b8e80941Smrg ir_variable *var = deref->variable_referenced(); 977b8e80941Smrg assert(var); 978b8e80941Smrg 979b8e80941Smrg /* Compute the offset to the start if the dereference and the 980b8e80941Smrg * block index 981b8e80941Smrg */ 982b8e80941Smrg void *mem_ctx = ralloc_parent(shader->ir); 983b8e80941Smrg 984b8e80941Smrg ir_rvalue *offset = NULL; 985b8e80941Smrg unsigned const_offset; 986b8e80941Smrg bool row_major; 987b8e80941Smrg const glsl_type *matrix_type; 988b8e80941Smrg 989b8e80941Smrg enum glsl_interface_packing packing = 990b8e80941Smrg var->get_interface_type()-> 991b8e80941Smrg get_internal_ifc_packing(use_std430_as_default); 992b8e80941Smrg 993b8e80941Smrg this->buffer_access_type = ssbo_atomic_access; 994b8e80941Smrg this->variable = var; 995b8e80941Smrg 996b8e80941Smrg setup_for_load_or_store(mem_ctx, var, deref, 997b8e80941Smrg &offset, &const_offset, 998b8e80941Smrg &row_major, &matrix_type, 999b8e80941Smrg packing); 1000b8e80941Smrg assert(offset); 1001b8e80941Smrg assert(!row_major); 1002b8e80941Smrg assert(matrix_type == NULL); 1003b8e80941Smrg 1004b8e80941Smrg ir_rvalue *deref_offset = 1005b8e80941Smrg add(offset, new(mem_ctx) ir_constant(const_offset)); 1006b8e80941Smrg ir_rvalue *block_index = this->uniform_block->clone(mem_ctx, NULL); 1007b8e80941Smrg 1008b8e80941Smrg /* Create the new internal function signature that will take a block 1009b8e80941Smrg * index and offset instead of a buffer variable 1010b8e80941Smrg */ 1011b8e80941Smrg exec_list sig_params; 1012b8e80941Smrg ir_variable *sig_param = new(mem_ctx) 1013b8e80941Smrg ir_variable(glsl_type::uint_type, "block_ref" , ir_var_function_in); 1014b8e80941Smrg sig_params.push_tail(sig_param); 1015b8e80941Smrg 1016b8e80941Smrg sig_param = new(mem_ctx) 1017b8e80941Smrg ir_variable(glsl_type::uint_type, "offset" , ir_var_function_in); 1018b8e80941Smrg sig_params.push_tail(sig_param); 1019b8e80941Smrg 1020b8e80941Smrg const glsl_type *type = deref->type->get_scalar_type(); 1021b8e80941Smrg sig_param = new(mem_ctx) 1022b8e80941Smrg ir_variable(type, "data1", ir_var_function_in); 1023b8e80941Smrg sig_params.push_tail(sig_param); 1024b8e80941Smrg 1025b8e80941Smrg if (param_count == 3) { 1026b8e80941Smrg sig_param = new(mem_ctx) 1027b8e80941Smrg ir_variable(type, "data2", ir_var_function_in); 1028b8e80941Smrg sig_params.push_tail(sig_param); 1029b8e80941Smrg } 1030b8e80941Smrg 1031b8e80941Smrg ir_function_signature *sig = 1032b8e80941Smrg new(mem_ctx) ir_function_signature(deref->type, 1033b8e80941Smrg shader_storage_buffer_object); 1034b8e80941Smrg assert(sig); 1035b8e80941Smrg sig->replace_parameters(&sig_params); 1036b8e80941Smrg 1037b8e80941Smrg assert(ir->callee->intrinsic_id >= ir_intrinsic_generic_load); 1038b8e80941Smrg assert(ir->callee->intrinsic_id <= ir_intrinsic_generic_atomic_comp_swap); 1039b8e80941Smrg sig->intrinsic_id = MAP_INTRINSIC_TO_TYPE(ir->callee->intrinsic_id, ssbo); 1040b8e80941Smrg 1041b8e80941Smrg char func_name[64]; 1042b8e80941Smrg sprintf(func_name, "%s_ssbo", ir->callee_name()); 1043b8e80941Smrg ir_function *f = new(mem_ctx) ir_function(func_name); 1044b8e80941Smrg f->add_signature(sig); 1045b8e80941Smrg 1046b8e80941Smrg /* Now, create the call to the internal intrinsic */ 1047b8e80941Smrg exec_list call_params; 1048b8e80941Smrg call_params.push_tail(block_index); 1049b8e80941Smrg call_params.push_tail(deref_offset); 1050b8e80941Smrg param = ir->actual_parameters.get_head()->get_next(); 1051b8e80941Smrg ir_rvalue *param_as_rvalue = ((ir_instruction *) param)->as_rvalue(); 1052b8e80941Smrg call_params.push_tail(param_as_rvalue->clone(mem_ctx, NULL)); 1053b8e80941Smrg if (param_count == 3) { 1054b8e80941Smrg param = param->get_next(); 1055b8e80941Smrg param_as_rvalue = ((ir_instruction *) param)->as_rvalue(); 1056b8e80941Smrg call_params.push_tail(param_as_rvalue->clone(mem_ctx, NULL)); 1057b8e80941Smrg } 1058b8e80941Smrg ir_dereference_variable *return_deref = 1059b8e80941Smrg ir->return_deref->clone(mem_ctx, NULL); 1060b8e80941Smrg return new(mem_ctx) ir_call(sig, return_deref, &call_params); 1061b8e80941Smrg} 1062b8e80941Smrg 1063b8e80941Smrgir_call * 1064b8e80941Smrglower_ubo_reference_visitor::check_for_ssbo_atomic_intrinsic(ir_call *ir) 1065b8e80941Smrg{ 1066b8e80941Smrg exec_list& params = ir->actual_parameters; 1067b8e80941Smrg 1068b8e80941Smrg if (params.length() < 2 || params.length() > 3) 1069b8e80941Smrg return ir; 1070b8e80941Smrg 1071b8e80941Smrg ir_rvalue *rvalue = 1072b8e80941Smrg ((ir_instruction *) params.get_head())->as_rvalue(); 1073b8e80941Smrg if (!rvalue) 1074b8e80941Smrg return ir; 1075b8e80941Smrg 1076b8e80941Smrg ir_variable *var = rvalue->variable_referenced(); 1077b8e80941Smrg if (!var || !var->is_in_shader_storage_block()) 1078b8e80941Smrg return ir; 1079b8e80941Smrg 1080b8e80941Smrg const enum ir_intrinsic_id id = ir->callee->intrinsic_id; 1081b8e80941Smrg if (id == ir_intrinsic_generic_atomic_add || 1082b8e80941Smrg id == ir_intrinsic_generic_atomic_min || 1083b8e80941Smrg id == ir_intrinsic_generic_atomic_max || 1084b8e80941Smrg id == ir_intrinsic_generic_atomic_and || 1085b8e80941Smrg id == ir_intrinsic_generic_atomic_or || 1086b8e80941Smrg id == ir_intrinsic_generic_atomic_xor || 1087b8e80941Smrg id == ir_intrinsic_generic_atomic_exchange || 1088b8e80941Smrg id == ir_intrinsic_generic_atomic_comp_swap) { 1089b8e80941Smrg return lower_ssbo_atomic_intrinsic(ir); 1090b8e80941Smrg } 1091b8e80941Smrg 1092b8e80941Smrg return ir; 1093b8e80941Smrg} 1094b8e80941Smrg 1095b8e80941Smrg 1096b8e80941Smrgir_visitor_status 1097b8e80941Smrglower_ubo_reference_visitor::visit_enter(ir_call *ir) 1098b8e80941Smrg{ 1099b8e80941Smrg ir_call *new_ir = check_for_ssbo_atomic_intrinsic(ir); 1100b8e80941Smrg if (new_ir != ir) { 1101b8e80941Smrg progress = true; 1102b8e80941Smrg base_ir->replace_with(new_ir); 1103b8e80941Smrg return visit_continue_with_parent; 1104b8e80941Smrg } 1105b8e80941Smrg 1106b8e80941Smrg return rvalue_visit(ir); 1107b8e80941Smrg} 1108b8e80941Smrg 1109b8e80941Smrg 1110b8e80941Smrgir_visitor_status 1111b8e80941Smrglower_ubo_reference_visitor::visit_enter(ir_texture *ir) 1112b8e80941Smrg{ 1113b8e80941Smrg ir_dereference *sampler = ir->sampler; 1114b8e80941Smrg 1115b8e80941Smrg if (sampler->ir_type == ir_type_dereference_record) { 1116b8e80941Smrg handle_rvalue((ir_rvalue **)&ir->sampler); 1117b8e80941Smrg return visit_continue_with_parent; 1118b8e80941Smrg } 1119b8e80941Smrg 1120b8e80941Smrg return rvalue_visit(ir); 1121b8e80941Smrg} 1122b8e80941Smrg 1123b8e80941Smrg 1124b8e80941Smrg} /* unnamed namespace */ 1125b8e80941Smrg 1126b8e80941Smrgvoid 1127b8e80941Smrglower_ubo_reference(struct gl_linked_shader *shader, 1128b8e80941Smrg bool clamp_block_indices, bool use_std430_as_default) 1129b8e80941Smrg{ 1130b8e80941Smrg lower_ubo_reference_visitor v(shader, clamp_block_indices, 1131b8e80941Smrg use_std430_as_default); 1132b8e80941Smrg 1133b8e80941Smrg /* Loop over the instructions lowering references, because we take 1134b8e80941Smrg * a deref of a UBO array using a UBO dereference as the index will 1135b8e80941Smrg * produce a collection of instructions all of which have cloned 1136b8e80941Smrg * UBO dereferences for that array index. 1137b8e80941Smrg */ 1138b8e80941Smrg do { 1139b8e80941Smrg v.progress = false; 1140b8e80941Smrg visit_list_elements(&v, shader->ir); 1141b8e80941Smrg } while (v.progress); 1142b8e80941Smrg} 1143