1b8e80941Smrg/* 2b8e80941Smrg * Copyright (c) 2015 Intel Corporation 3b8e80941Smrg * 4b8e80941Smrg * Permission is hereby granted, free of charge, to any person obtaining a 5b8e80941Smrg * copy of this software and associated documentation files (the "Software"), 6b8e80941Smrg * to deal in the Software without restriction, including without limitation 7b8e80941Smrg * the rights to use, copy, modify, merge, publish, distribute, sublicense, 8b8e80941Smrg * and/or sell copies of the Software, and to permit persons to whom the 9b8e80941Smrg * Software is furnished to do so, subject to the following conditions: 10b8e80941Smrg * 11b8e80941Smrg * The above copyright notice and this permission notice (including the next 12b8e80941Smrg * paragraph) shall be included in all copies or substantial portions of the 13b8e80941Smrg * Software. 14b8e80941Smrg * 15b8e80941Smrg * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR 16b8e80941Smrg * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, 17b8e80941Smrg * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL 18b8e80941Smrg * THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER 19b8e80941Smrg * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING 20b8e80941Smrg * FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER 21b8e80941Smrg * DEALINGS IN THE SOFTWARE. 22b8e80941Smrg */ 23b8e80941Smrg 24b8e80941Smrg/** 25b8e80941Smrg * \file lower_shared_reference.cpp 26b8e80941Smrg * 27b8e80941Smrg * IR lower pass to replace dereferences of compute shader shared variables 28b8e80941Smrg * with intrinsic function calls. 29b8e80941Smrg * 30b8e80941Smrg * This relieves drivers of the responsibility of allocating space for the 31b8e80941Smrg * shared variables in the shared memory region. 32b8e80941Smrg */ 33b8e80941Smrg 34b8e80941Smrg#include "lower_buffer_access.h" 35b8e80941Smrg#include "ir_builder.h" 36b8e80941Smrg#include "linker.h" 37b8e80941Smrg#include "main/macros.h" 38b8e80941Smrg#include "util/list.h" 39b8e80941Smrg#include "glsl_parser_extras.h" 40b8e80941Smrg#include "main/mtypes.h" 41b8e80941Smrg 42b8e80941Smrgusing namespace ir_builder; 43b8e80941Smrg 44b8e80941Smrgnamespace { 45b8e80941Smrg 46b8e80941Smrgstruct var_offset { 47b8e80941Smrg struct list_head node; 48b8e80941Smrg const ir_variable *var; 49b8e80941Smrg unsigned offset; 50b8e80941Smrg}; 51b8e80941Smrg 52b8e80941Smrgclass lower_shared_reference_visitor : 53b8e80941Smrg public lower_buffer_access::lower_buffer_access { 54b8e80941Smrgpublic: 55b8e80941Smrg 56b8e80941Smrg lower_shared_reference_visitor(struct gl_linked_shader *shader) 57b8e80941Smrg : list_ctx(ralloc_context(NULL)), shader(shader), shared_size(0u) 58b8e80941Smrg { 59b8e80941Smrg list_inithead(&var_offsets); 60b8e80941Smrg } 61b8e80941Smrg 62b8e80941Smrg ~lower_shared_reference_visitor() 63b8e80941Smrg { 64b8e80941Smrg ralloc_free(list_ctx); 65b8e80941Smrg } 66b8e80941Smrg 67b8e80941Smrg enum { 68b8e80941Smrg shared_load_access, 69b8e80941Smrg shared_store_access, 70b8e80941Smrg shared_atomic_access, 71b8e80941Smrg } buffer_access_type; 72b8e80941Smrg 73b8e80941Smrg void insert_buffer_access(void *mem_ctx, ir_dereference *deref, 74b8e80941Smrg const glsl_type *type, ir_rvalue *offset, 75b8e80941Smrg unsigned mask, int channel); 76b8e80941Smrg 77b8e80941Smrg void handle_rvalue(ir_rvalue **rvalue); 78b8e80941Smrg ir_visitor_status visit_enter(ir_assignment *ir); 79b8e80941Smrg void handle_assignment(ir_assignment *ir); 80b8e80941Smrg 81b8e80941Smrg ir_call *lower_shared_atomic_intrinsic(ir_call *ir); 82b8e80941Smrg ir_call *check_for_shared_atomic_intrinsic(ir_call *ir); 83b8e80941Smrg ir_visitor_status visit_enter(ir_call *ir); 84b8e80941Smrg 85b8e80941Smrg unsigned get_shared_offset(const ir_variable *); 86b8e80941Smrg 87b8e80941Smrg ir_call *shared_load(void *mem_ctx, const struct glsl_type *type, 88b8e80941Smrg ir_rvalue *offset); 89b8e80941Smrg ir_call *shared_store(void *mem_ctx, ir_rvalue *deref, ir_rvalue *offset, 90b8e80941Smrg unsigned write_mask); 91b8e80941Smrg 92b8e80941Smrg void *list_ctx; 93b8e80941Smrg struct gl_linked_shader *shader; 94b8e80941Smrg struct list_head var_offsets; 95b8e80941Smrg unsigned shared_size; 96b8e80941Smrg bool progress; 97b8e80941Smrg}; 98b8e80941Smrg 99b8e80941Smrgunsigned 100b8e80941Smrglower_shared_reference_visitor::get_shared_offset(const ir_variable *var) 101b8e80941Smrg{ 102b8e80941Smrg list_for_each_entry(var_offset, var_entry, &var_offsets, node) { 103b8e80941Smrg if (var_entry->var == var) 104b8e80941Smrg return var_entry->offset; 105b8e80941Smrg } 106b8e80941Smrg 107b8e80941Smrg struct var_offset *new_entry = rzalloc(list_ctx, struct var_offset); 108b8e80941Smrg list_add(&new_entry->node, &var_offsets); 109b8e80941Smrg new_entry->var = var; 110b8e80941Smrg 111b8e80941Smrg unsigned var_align = var->type->std430_base_alignment(false); 112b8e80941Smrg new_entry->offset = glsl_align(shared_size, var_align); 113b8e80941Smrg 114b8e80941Smrg unsigned var_size = var->type->std430_size(false); 115b8e80941Smrg shared_size = new_entry->offset + var_size; 116b8e80941Smrg 117b8e80941Smrg return new_entry->offset; 118b8e80941Smrg} 119b8e80941Smrg 120b8e80941Smrgvoid 121b8e80941Smrglower_shared_reference_visitor::handle_rvalue(ir_rvalue **rvalue) 122b8e80941Smrg{ 123b8e80941Smrg if (!*rvalue) 124b8e80941Smrg return; 125b8e80941Smrg 126b8e80941Smrg ir_dereference *deref = (*rvalue)->as_dereference(); 127b8e80941Smrg if (!deref) 128b8e80941Smrg return; 129b8e80941Smrg 130b8e80941Smrg ir_variable *var = deref->variable_referenced(); 131b8e80941Smrg if (!var || var->data.mode != ir_var_shader_shared) 132b8e80941Smrg return; 133b8e80941Smrg 134b8e80941Smrg buffer_access_type = shared_load_access; 135b8e80941Smrg 136b8e80941Smrg void *mem_ctx = ralloc_parent(shader->ir); 137b8e80941Smrg 138b8e80941Smrg ir_rvalue *offset = NULL; 139b8e80941Smrg unsigned const_offset = get_shared_offset(var); 140b8e80941Smrg bool row_major; 141b8e80941Smrg const glsl_type *matrix_type; 142b8e80941Smrg assert(var->get_interface_type() == NULL); 143b8e80941Smrg const enum glsl_interface_packing packing = GLSL_INTERFACE_PACKING_STD430; 144b8e80941Smrg 145b8e80941Smrg setup_buffer_access(mem_ctx, deref, 146b8e80941Smrg &offset, &const_offset, 147b8e80941Smrg &row_major, &matrix_type, NULL, packing); 148b8e80941Smrg 149b8e80941Smrg /* Now that we've calculated the offset to the start of the 150b8e80941Smrg * dereference, walk over the type and emit loads into a temporary. 151b8e80941Smrg */ 152b8e80941Smrg const glsl_type *type = (*rvalue)->type; 153b8e80941Smrg ir_variable *load_var = new(mem_ctx) ir_variable(type, 154b8e80941Smrg "shared_load_temp", 155b8e80941Smrg ir_var_temporary); 156b8e80941Smrg base_ir->insert_before(load_var); 157b8e80941Smrg 158b8e80941Smrg ir_variable *load_offset = new(mem_ctx) ir_variable(glsl_type::uint_type, 159b8e80941Smrg "shared_load_temp_offset", 160b8e80941Smrg ir_var_temporary); 161b8e80941Smrg base_ir->insert_before(load_offset); 162b8e80941Smrg base_ir->insert_before(assign(load_offset, offset)); 163b8e80941Smrg 164b8e80941Smrg deref = new(mem_ctx) ir_dereference_variable(load_var); 165b8e80941Smrg 166b8e80941Smrg emit_access(mem_ctx, false, deref, load_offset, const_offset, row_major, 167b8e80941Smrg matrix_type, packing, 0); 168b8e80941Smrg 169b8e80941Smrg *rvalue = deref; 170b8e80941Smrg 171b8e80941Smrg progress = true; 172b8e80941Smrg} 173b8e80941Smrg 174b8e80941Smrgvoid 175b8e80941Smrglower_shared_reference_visitor::handle_assignment(ir_assignment *ir) 176b8e80941Smrg{ 177b8e80941Smrg if (!ir || !ir->lhs) 178b8e80941Smrg return; 179b8e80941Smrg 180b8e80941Smrg ir_rvalue *rvalue = ir->lhs->as_rvalue(); 181b8e80941Smrg if (!rvalue) 182b8e80941Smrg return; 183b8e80941Smrg 184b8e80941Smrg ir_dereference *deref = ir->lhs->as_dereference(); 185b8e80941Smrg if (!deref) 186b8e80941Smrg return; 187b8e80941Smrg 188b8e80941Smrg ir_variable *var = ir->lhs->variable_referenced(); 189b8e80941Smrg if (!var || var->data.mode != ir_var_shader_shared) 190b8e80941Smrg return; 191b8e80941Smrg 192b8e80941Smrg buffer_access_type = shared_store_access; 193b8e80941Smrg 194b8e80941Smrg /* We have a write to a shared variable, so declare a temporary and rewrite 195b8e80941Smrg * the assignment so that the temporary is the LHS. 196b8e80941Smrg */ 197b8e80941Smrg void *mem_ctx = ralloc_parent(shader->ir); 198b8e80941Smrg 199b8e80941Smrg const glsl_type *type = rvalue->type; 200b8e80941Smrg ir_variable *store_var = new(mem_ctx) ir_variable(type, 201b8e80941Smrg "shared_store_temp", 202b8e80941Smrg ir_var_temporary); 203b8e80941Smrg base_ir->insert_before(store_var); 204b8e80941Smrg ir->lhs = new(mem_ctx) ir_dereference_variable(store_var); 205b8e80941Smrg 206b8e80941Smrg ir_rvalue *offset = NULL; 207b8e80941Smrg unsigned const_offset = get_shared_offset(var); 208b8e80941Smrg bool row_major; 209b8e80941Smrg const glsl_type *matrix_type; 210b8e80941Smrg assert(var->get_interface_type() == NULL); 211b8e80941Smrg const enum glsl_interface_packing packing = GLSL_INTERFACE_PACKING_STD430; 212b8e80941Smrg 213b8e80941Smrg setup_buffer_access(mem_ctx, deref, 214b8e80941Smrg &offset, &const_offset, 215b8e80941Smrg &row_major, &matrix_type, NULL, packing); 216b8e80941Smrg 217b8e80941Smrg deref = new(mem_ctx) ir_dereference_variable(store_var); 218b8e80941Smrg 219b8e80941Smrg ir_variable *store_offset = new(mem_ctx) ir_variable(glsl_type::uint_type, 220b8e80941Smrg "shared_store_temp_offset", 221b8e80941Smrg ir_var_temporary); 222b8e80941Smrg base_ir->insert_before(store_offset); 223b8e80941Smrg base_ir->insert_before(assign(store_offset, offset)); 224b8e80941Smrg 225b8e80941Smrg /* Now we have to write the value assigned to the temporary back to memory */ 226b8e80941Smrg emit_access(mem_ctx, true, deref, store_offset, const_offset, row_major, 227b8e80941Smrg matrix_type, packing, ir->write_mask); 228b8e80941Smrg 229b8e80941Smrg progress = true; 230b8e80941Smrg} 231b8e80941Smrg 232b8e80941Smrgir_visitor_status 233b8e80941Smrglower_shared_reference_visitor::visit_enter(ir_assignment *ir) 234b8e80941Smrg{ 235b8e80941Smrg handle_assignment(ir); 236b8e80941Smrg return rvalue_visit(ir); 237b8e80941Smrg} 238b8e80941Smrg 239b8e80941Smrgvoid 240b8e80941Smrglower_shared_reference_visitor::insert_buffer_access(void *mem_ctx, 241b8e80941Smrg ir_dereference *deref, 242b8e80941Smrg const glsl_type *type, 243b8e80941Smrg ir_rvalue *offset, 244b8e80941Smrg unsigned mask, 245b8e80941Smrg int /* channel */) 246b8e80941Smrg{ 247b8e80941Smrg if (buffer_access_type == shared_store_access) { 248b8e80941Smrg ir_call *store = shared_store(mem_ctx, deref, offset, mask); 249b8e80941Smrg base_ir->insert_after(store); 250b8e80941Smrg } else { 251b8e80941Smrg ir_call *load = shared_load(mem_ctx, type, offset); 252b8e80941Smrg base_ir->insert_before(load); 253b8e80941Smrg ir_rvalue *value = load->return_deref->as_rvalue()->clone(mem_ctx, NULL); 254b8e80941Smrg base_ir->insert_before(assign(deref->clone(mem_ctx, NULL), 255b8e80941Smrg value)); 256b8e80941Smrg } 257b8e80941Smrg} 258b8e80941Smrg 259b8e80941Smrgstatic bool 260b8e80941Smrgcompute_shader_enabled(const _mesa_glsl_parse_state *state) 261b8e80941Smrg{ 262b8e80941Smrg return state->stage == MESA_SHADER_COMPUTE; 263b8e80941Smrg} 264b8e80941Smrg 265b8e80941Smrgir_call * 266b8e80941Smrglower_shared_reference_visitor::shared_store(void *mem_ctx, 267b8e80941Smrg ir_rvalue *deref, 268b8e80941Smrg ir_rvalue *offset, 269b8e80941Smrg unsigned write_mask) 270b8e80941Smrg{ 271b8e80941Smrg exec_list sig_params; 272b8e80941Smrg 273b8e80941Smrg ir_variable *offset_ref = new(mem_ctx) 274b8e80941Smrg ir_variable(glsl_type::uint_type, "offset" , ir_var_function_in); 275b8e80941Smrg sig_params.push_tail(offset_ref); 276b8e80941Smrg 277b8e80941Smrg ir_variable *val_ref = new(mem_ctx) 278b8e80941Smrg ir_variable(deref->type, "value" , ir_var_function_in); 279b8e80941Smrg sig_params.push_tail(val_ref); 280b8e80941Smrg 281b8e80941Smrg ir_variable *writemask_ref = new(mem_ctx) 282b8e80941Smrg ir_variable(glsl_type::uint_type, "write_mask" , ir_var_function_in); 283b8e80941Smrg sig_params.push_tail(writemask_ref); 284b8e80941Smrg 285b8e80941Smrg ir_function_signature *sig = new(mem_ctx) 286b8e80941Smrg ir_function_signature(glsl_type::void_type, compute_shader_enabled); 287b8e80941Smrg assert(sig); 288b8e80941Smrg sig->replace_parameters(&sig_params); 289b8e80941Smrg sig->intrinsic_id = ir_intrinsic_shared_store; 290b8e80941Smrg 291b8e80941Smrg ir_function *f = new(mem_ctx) ir_function("__intrinsic_store_shared"); 292b8e80941Smrg f->add_signature(sig); 293b8e80941Smrg 294b8e80941Smrg exec_list call_params; 295b8e80941Smrg call_params.push_tail(offset->clone(mem_ctx, NULL)); 296b8e80941Smrg call_params.push_tail(deref->clone(mem_ctx, NULL)); 297b8e80941Smrg call_params.push_tail(new(mem_ctx) ir_constant(write_mask)); 298b8e80941Smrg return new(mem_ctx) ir_call(sig, NULL, &call_params); 299b8e80941Smrg} 300b8e80941Smrg 301b8e80941Smrgir_call * 302b8e80941Smrglower_shared_reference_visitor::shared_load(void *mem_ctx, 303b8e80941Smrg const struct glsl_type *type, 304b8e80941Smrg ir_rvalue *offset) 305b8e80941Smrg{ 306b8e80941Smrg exec_list sig_params; 307b8e80941Smrg 308b8e80941Smrg ir_variable *offset_ref = new(mem_ctx) 309b8e80941Smrg ir_variable(glsl_type::uint_type, "offset_ref" , ir_var_function_in); 310b8e80941Smrg sig_params.push_tail(offset_ref); 311b8e80941Smrg 312b8e80941Smrg ir_function_signature *sig = 313b8e80941Smrg new(mem_ctx) ir_function_signature(type, compute_shader_enabled); 314b8e80941Smrg assert(sig); 315b8e80941Smrg sig->replace_parameters(&sig_params); 316b8e80941Smrg sig->intrinsic_id = ir_intrinsic_shared_load; 317b8e80941Smrg 318b8e80941Smrg ir_function *f = new(mem_ctx) ir_function("__intrinsic_load_shared"); 319b8e80941Smrg f->add_signature(sig); 320b8e80941Smrg 321b8e80941Smrg ir_variable *result = new(mem_ctx) 322b8e80941Smrg ir_variable(type, "shared_load_result", ir_var_temporary); 323b8e80941Smrg base_ir->insert_before(result); 324b8e80941Smrg ir_dereference_variable *deref_result = new(mem_ctx) 325b8e80941Smrg ir_dereference_variable(result); 326b8e80941Smrg 327b8e80941Smrg exec_list call_params; 328b8e80941Smrg call_params.push_tail(offset->clone(mem_ctx, NULL)); 329b8e80941Smrg 330b8e80941Smrg return new(mem_ctx) ir_call(sig, deref_result, &call_params); 331b8e80941Smrg} 332b8e80941Smrg 333b8e80941Smrg/* Lowers the intrinsic call to a new internal intrinsic that swaps the access 334b8e80941Smrg * to the shared variable in the first parameter by an offset. This involves 335b8e80941Smrg * creating the new internal intrinsic (i.e. the new function signature). 336b8e80941Smrg */ 337b8e80941Smrgir_call * 338b8e80941Smrglower_shared_reference_visitor::lower_shared_atomic_intrinsic(ir_call *ir) 339b8e80941Smrg{ 340b8e80941Smrg /* Shared atomics usually have 2 parameters, the shared variable and an 341b8e80941Smrg * integer argument. The exception is CompSwap, that has an additional 342b8e80941Smrg * integer parameter. 343b8e80941Smrg */ 344b8e80941Smrg int param_count = ir->actual_parameters.length(); 345b8e80941Smrg assert(param_count == 2 || param_count == 3); 346b8e80941Smrg 347b8e80941Smrg /* First argument must be a scalar integer shared variable */ 348b8e80941Smrg exec_node *param = ir->actual_parameters.get_head(); 349b8e80941Smrg ir_instruction *inst = (ir_instruction *) param; 350b8e80941Smrg assert(inst->ir_type == ir_type_dereference_variable || 351b8e80941Smrg inst->ir_type == ir_type_dereference_array || 352b8e80941Smrg inst->ir_type == ir_type_dereference_record || 353b8e80941Smrg inst->ir_type == ir_type_swizzle); 354b8e80941Smrg 355b8e80941Smrg ir_rvalue *deref = (ir_rvalue *) inst; 356b8e80941Smrg assert(deref->type->is_scalar() && 357b8e80941Smrg (deref->type->is_integer() || deref->type->is_float())); 358b8e80941Smrg 359b8e80941Smrg ir_variable *var = deref->variable_referenced(); 360b8e80941Smrg assert(var); 361b8e80941Smrg 362b8e80941Smrg /* Compute the offset to the start if the dereference 363b8e80941Smrg */ 364b8e80941Smrg void *mem_ctx = ralloc_parent(shader->ir); 365b8e80941Smrg 366b8e80941Smrg ir_rvalue *offset = NULL; 367b8e80941Smrg unsigned const_offset = get_shared_offset(var); 368b8e80941Smrg bool row_major; 369b8e80941Smrg const glsl_type *matrix_type; 370b8e80941Smrg assert(var->get_interface_type() == NULL); 371b8e80941Smrg const enum glsl_interface_packing packing = GLSL_INTERFACE_PACKING_STD430; 372b8e80941Smrg buffer_access_type = shared_atomic_access; 373b8e80941Smrg 374b8e80941Smrg setup_buffer_access(mem_ctx, deref, 375b8e80941Smrg &offset, &const_offset, 376b8e80941Smrg &row_major, &matrix_type, NULL, packing); 377b8e80941Smrg 378b8e80941Smrg assert(offset); 379b8e80941Smrg assert(!row_major); 380b8e80941Smrg assert(matrix_type == NULL); 381b8e80941Smrg 382b8e80941Smrg ir_rvalue *deref_offset = 383b8e80941Smrg add(offset, new(mem_ctx) ir_constant(const_offset)); 384b8e80941Smrg 385b8e80941Smrg /* Create the new internal function signature that will take an offset 386b8e80941Smrg * instead of a shared variable 387b8e80941Smrg */ 388b8e80941Smrg exec_list sig_params; 389b8e80941Smrg ir_variable *sig_param = new(mem_ctx) 390b8e80941Smrg ir_variable(glsl_type::uint_type, "offset" , ir_var_function_in); 391b8e80941Smrg sig_params.push_tail(sig_param); 392b8e80941Smrg 393b8e80941Smrg const glsl_type *type = deref->type->get_scalar_type(); 394b8e80941Smrg sig_param = new(mem_ctx) 395b8e80941Smrg ir_variable(type, "data1", ir_var_function_in); 396b8e80941Smrg sig_params.push_tail(sig_param); 397b8e80941Smrg 398b8e80941Smrg if (param_count == 3) { 399b8e80941Smrg sig_param = new(mem_ctx) 400b8e80941Smrg ir_variable(type, "data2", ir_var_function_in); 401b8e80941Smrg sig_params.push_tail(sig_param); 402b8e80941Smrg } 403b8e80941Smrg 404b8e80941Smrg ir_function_signature *sig = 405b8e80941Smrg new(mem_ctx) ir_function_signature(deref->type, 406b8e80941Smrg compute_shader_enabled); 407b8e80941Smrg assert(sig); 408b8e80941Smrg sig->replace_parameters(&sig_params); 409b8e80941Smrg 410b8e80941Smrg assert(ir->callee->intrinsic_id >= ir_intrinsic_generic_load); 411b8e80941Smrg assert(ir->callee->intrinsic_id <= ir_intrinsic_generic_atomic_comp_swap); 412b8e80941Smrg sig->intrinsic_id = MAP_INTRINSIC_TO_TYPE(ir->callee->intrinsic_id, shared); 413b8e80941Smrg 414b8e80941Smrg char func_name[64]; 415b8e80941Smrg sprintf(func_name, "%s_shared", ir->callee_name()); 416b8e80941Smrg ir_function *f = new(mem_ctx) ir_function(func_name); 417b8e80941Smrg f->add_signature(sig); 418b8e80941Smrg 419b8e80941Smrg /* Now, create the call to the internal intrinsic */ 420b8e80941Smrg exec_list call_params; 421b8e80941Smrg call_params.push_tail(deref_offset); 422b8e80941Smrg param = ir->actual_parameters.get_head()->get_next(); 423b8e80941Smrg ir_rvalue *param_as_rvalue = ((ir_instruction *) param)->as_rvalue(); 424b8e80941Smrg call_params.push_tail(param_as_rvalue->clone(mem_ctx, NULL)); 425b8e80941Smrg if (param_count == 3) { 426b8e80941Smrg param = param->get_next(); 427b8e80941Smrg param_as_rvalue = ((ir_instruction *) param)->as_rvalue(); 428b8e80941Smrg call_params.push_tail(param_as_rvalue->clone(mem_ctx, NULL)); 429b8e80941Smrg } 430b8e80941Smrg ir_dereference_variable *return_deref = 431b8e80941Smrg ir->return_deref->clone(mem_ctx, NULL); 432b8e80941Smrg return new(mem_ctx) ir_call(sig, return_deref, &call_params); 433b8e80941Smrg} 434b8e80941Smrg 435b8e80941Smrgir_call * 436b8e80941Smrglower_shared_reference_visitor::check_for_shared_atomic_intrinsic(ir_call *ir) 437b8e80941Smrg{ 438b8e80941Smrg exec_list& params = ir->actual_parameters; 439b8e80941Smrg 440b8e80941Smrg if (params.length() < 2 || params.length() > 3) 441b8e80941Smrg return ir; 442b8e80941Smrg 443b8e80941Smrg ir_rvalue *rvalue = 444b8e80941Smrg ((ir_instruction *) params.get_head())->as_rvalue(); 445b8e80941Smrg if (!rvalue) 446b8e80941Smrg return ir; 447b8e80941Smrg 448b8e80941Smrg ir_variable *var = rvalue->variable_referenced(); 449b8e80941Smrg if (!var || var->data.mode != ir_var_shader_shared) 450b8e80941Smrg return ir; 451b8e80941Smrg 452b8e80941Smrg const enum ir_intrinsic_id id = ir->callee->intrinsic_id; 453b8e80941Smrg if (id == ir_intrinsic_generic_atomic_add || 454b8e80941Smrg id == ir_intrinsic_generic_atomic_min || 455b8e80941Smrg id == ir_intrinsic_generic_atomic_max || 456b8e80941Smrg id == ir_intrinsic_generic_atomic_and || 457b8e80941Smrg id == ir_intrinsic_generic_atomic_or || 458b8e80941Smrg id == ir_intrinsic_generic_atomic_xor || 459b8e80941Smrg id == ir_intrinsic_generic_atomic_exchange || 460b8e80941Smrg id == ir_intrinsic_generic_atomic_comp_swap) { 461b8e80941Smrg return lower_shared_atomic_intrinsic(ir); 462b8e80941Smrg } 463b8e80941Smrg 464b8e80941Smrg return ir; 465b8e80941Smrg} 466b8e80941Smrg 467b8e80941Smrgir_visitor_status 468b8e80941Smrglower_shared_reference_visitor::visit_enter(ir_call *ir) 469b8e80941Smrg{ 470b8e80941Smrg ir_call *new_ir = check_for_shared_atomic_intrinsic(ir); 471b8e80941Smrg if (new_ir != ir) { 472b8e80941Smrg progress = true; 473b8e80941Smrg base_ir->replace_with(new_ir); 474b8e80941Smrg return visit_continue_with_parent; 475b8e80941Smrg } 476b8e80941Smrg 477b8e80941Smrg return rvalue_visit(ir); 478b8e80941Smrg} 479b8e80941Smrg 480b8e80941Smrg} /* unnamed namespace */ 481b8e80941Smrg 482b8e80941Smrgvoid 483b8e80941Smrglower_shared_reference(struct gl_context *ctx, 484b8e80941Smrg struct gl_shader_program *prog, 485b8e80941Smrg struct gl_linked_shader *shader) 486b8e80941Smrg{ 487b8e80941Smrg if (shader->Stage != MESA_SHADER_COMPUTE) 488b8e80941Smrg return; 489b8e80941Smrg 490b8e80941Smrg lower_shared_reference_visitor v(shader); 491b8e80941Smrg 492b8e80941Smrg /* Loop over the instructions lowering references, because we take a deref 493b8e80941Smrg * of an shared variable array using a shared variable dereference as the 494b8e80941Smrg * index will produce a collection of instructions all of which have cloned 495b8e80941Smrg * shared variable dereferences for that array index. 496b8e80941Smrg */ 497b8e80941Smrg do { 498b8e80941Smrg v.progress = false; 499b8e80941Smrg visit_list_elements(&v, shader->ir); 500b8e80941Smrg } while (v.progress); 501b8e80941Smrg 502b8e80941Smrg prog->Comp.SharedSize = v.shared_size; 503b8e80941Smrg 504b8e80941Smrg /* Section 19.1 (Compute Shader Variables) of the OpenGL 4.5 (Core Profile) 505b8e80941Smrg * specification says: 506b8e80941Smrg * 507b8e80941Smrg * "There is a limit to the total size of all variables declared as 508b8e80941Smrg * shared in a single program object. This limit, expressed in units of 509b8e80941Smrg * basic machine units, may be queried as the value of 510b8e80941Smrg * MAX_COMPUTE_SHARED_MEMORY_SIZE." 511b8e80941Smrg */ 512b8e80941Smrg if (prog->Comp.SharedSize > ctx->Const.MaxComputeSharedMemorySize) { 513b8e80941Smrg linker_error(prog, "Too much shared memory used (%u/%u)\n", 514b8e80941Smrg prog->Comp.SharedSize, 515b8e80941Smrg ctx->Const.MaxComputeSharedMemorySize); 516b8e80941Smrg } 517b8e80941Smrg} 518