101e04c3fSmrg/* 201e04c3fSmrg * Copyright © 2014 Intel Corporation 301e04c3fSmrg * 401e04c3fSmrg * Permission is hereby granted, free of charge, to any person obtaining a 501e04c3fSmrg * copy of this software and associated documentation files (the "Software"), 601e04c3fSmrg * to deal in the Software without restriction, including without limitation 701e04c3fSmrg * the rights to use, copy, modify, merge, publish, distribute, sublicense, 801e04c3fSmrg * and/or sell copies of the Software, and to permit persons to whom the 901e04c3fSmrg * Software is furnished to do so, subject to the following conditions: 1001e04c3fSmrg * 1101e04c3fSmrg * The above copyright notice and this permission notice (including the next 1201e04c3fSmrg * paragraph) shall be included in all copies or substantial portions of the 1301e04c3fSmrg * Software. 1401e04c3fSmrg * 1501e04c3fSmrg * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR 1601e04c3fSmrg * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, 1701e04c3fSmrg * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL 1801e04c3fSmrg * THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER 1901e04c3fSmrg * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING 2001e04c3fSmrg * FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS 2101e04c3fSmrg * IN THE SOFTWARE. 2201e04c3fSmrg * 2301e04c3fSmrg * Authors: 2401e04c3fSmrg * Connor Abbott (cwabbott0@gmail.com) 2501e04c3fSmrg * Jason Ekstrand (jason@jlekstrand.net) 2601e04c3fSmrg * 2701e04c3fSmrg */ 2801e04c3fSmrg 2901e04c3fSmrg/* 3001e04c3fSmrg * This lowering pass converts references to input/output variables with 3101e04c3fSmrg * loads/stores to actual input/output intrinsics. 3201e04c3fSmrg */ 3301e04c3fSmrg 3401e04c3fSmrg#include "nir.h" 3501e04c3fSmrg#include "nir_builder.h" 3601e04c3fSmrg#include "nir_deref.h" 3701e04c3fSmrg 387ec681f3Smrg#include "util/u_math.h" 397ec681f3Smrg 4001e04c3fSmrgstruct lower_io_state { 4101e04c3fSmrg void *dead_ctx; 4201e04c3fSmrg nir_builder builder; 437e102996Smaya int (*type_size)(const struct glsl_type *type, bool); 4401e04c3fSmrg nir_variable_mode modes; 4501e04c3fSmrg nir_lower_io_options options; 4601e04c3fSmrg}; 4701e04c3fSmrg 487e102996Smayastatic nir_intrinsic_op 497e102996Smayassbo_atomic_for_deref(nir_intrinsic_op deref_op) 507e102996Smaya{ 517e102996Smaya switch (deref_op) { 527e102996Smaya#define OP(O) case nir_intrinsic_deref_##O: return nir_intrinsic_ssbo_##O; 537e102996Smaya OP(atomic_exchange) 547e102996Smaya OP(atomic_comp_swap) 557e102996Smaya OP(atomic_add) 567e102996Smaya OP(atomic_imin) 577e102996Smaya OP(atomic_umin) 587e102996Smaya OP(atomic_imax) 597e102996Smaya OP(atomic_umax) 607e102996Smaya OP(atomic_and) 617e102996Smaya OP(atomic_or) 627e102996Smaya OP(atomic_xor) 637e102996Smaya OP(atomic_fadd) 647e102996Smaya OP(atomic_fmin) 657e102996Smaya OP(atomic_fmax) 667e102996Smaya OP(atomic_fcomp_swap) 677e102996Smaya#undef OP 687e102996Smaya default: 697e102996Smaya unreachable("Invalid SSBO atomic"); 707e102996Smaya } 717e102996Smaya} 727e102996Smaya 737e102996Smayastatic nir_intrinsic_op 747e102996Smayaglobal_atomic_for_deref(nir_intrinsic_op deref_op) 757e102996Smaya{ 767e102996Smaya switch (deref_op) { 777e102996Smaya#define OP(O) case nir_intrinsic_deref_##O: return nir_intrinsic_global_##O; 787e102996Smaya OP(atomic_exchange) 797e102996Smaya OP(atomic_comp_swap) 807e102996Smaya OP(atomic_add) 817e102996Smaya OP(atomic_imin) 827e102996Smaya OP(atomic_umin) 837e102996Smaya OP(atomic_imax) 847e102996Smaya OP(atomic_umax) 857e102996Smaya OP(atomic_and) 867e102996Smaya OP(atomic_or) 877e102996Smaya OP(atomic_xor) 887e102996Smaya OP(atomic_fadd) 897e102996Smaya OP(atomic_fmin) 907e102996Smaya OP(atomic_fmax) 917e102996Smaya OP(atomic_fcomp_swap) 927e102996Smaya#undef OP 937e102996Smaya default: 947e102996Smaya unreachable("Invalid SSBO atomic"); 957e102996Smaya } 967e102996Smaya} 977e102996Smaya 987ec681f3Smrgstatic nir_intrinsic_op 997ec681f3Smrgshared_atomic_for_deref(nir_intrinsic_op deref_op) 1007ec681f3Smrg{ 1017ec681f3Smrg switch (deref_op) { 1027ec681f3Smrg#define OP(O) case nir_intrinsic_deref_##O: return nir_intrinsic_shared_##O; 1037ec681f3Smrg OP(atomic_exchange) 1047ec681f3Smrg OP(atomic_comp_swap) 1057ec681f3Smrg OP(atomic_add) 1067ec681f3Smrg OP(atomic_imin) 1077ec681f3Smrg OP(atomic_umin) 1087ec681f3Smrg OP(atomic_imax) 1097ec681f3Smrg OP(atomic_umax) 1107ec681f3Smrg OP(atomic_and) 1117ec681f3Smrg OP(atomic_or) 1127ec681f3Smrg OP(atomic_xor) 1137ec681f3Smrg OP(atomic_fadd) 1147ec681f3Smrg OP(atomic_fmin) 1157ec681f3Smrg OP(atomic_fmax) 1167ec681f3Smrg OP(atomic_fcomp_swap) 1177ec681f3Smrg#undef OP 1187ec681f3Smrg default: 1197ec681f3Smrg unreachable("Invalid shared atomic"); 1207ec681f3Smrg } 1217ec681f3Smrg} 1227ec681f3Smrg 12301e04c3fSmrgvoid 1247ec681f3Smrgnir_assign_var_locations(nir_shader *shader, nir_variable_mode mode, 1257ec681f3Smrg unsigned *size, 1267e102996Smaya int (*type_size)(const struct glsl_type *, bool)) 12701e04c3fSmrg{ 12801e04c3fSmrg unsigned location = 0; 12901e04c3fSmrg 1307ec681f3Smrg nir_foreach_variable_with_modes(var, shader, mode) { 13101e04c3fSmrg var->data.driver_location = location; 1327e102996Smaya bool bindless_type_size = var->data.mode == nir_var_shader_in || 1337e102996Smaya var->data.mode == nir_var_shader_out || 1347e102996Smaya var->data.bindless; 1357e102996Smaya location += type_size(var->type, bindless_type_size); 13601e04c3fSmrg } 13701e04c3fSmrg 13801e04c3fSmrg *size = location; 13901e04c3fSmrg} 14001e04c3fSmrg 14101e04c3fSmrg/** 1427ec681f3Smrg * Some inputs and outputs are arrayed, meaning that there is an extra level 1437ec681f3Smrg * of array indexing to handle mismatches between the shader interface and the 1447ec681f3Smrg * dispatch pattern of the shader. For instance, geometry shaders are 1457ec681f3Smrg * executed per-primitive while their inputs and outputs are specified 1467ec681f3Smrg * per-vertex so all inputs and outputs have to be additionally indexed with 1477ec681f3Smrg * the vertex index within the primitive. 14801e04c3fSmrg */ 14901e04c3fSmrgbool 1507ec681f3Smrgnir_is_arrayed_io(const nir_variable *var, gl_shader_stage stage) 15101e04c3fSmrg{ 15201e04c3fSmrg if (var->data.patch || !glsl_type_is_array(var->type)) 15301e04c3fSmrg return false; 15401e04c3fSmrg 15501e04c3fSmrg if (var->data.mode == nir_var_shader_in) 15601e04c3fSmrg return stage == MESA_SHADER_GEOMETRY || 15701e04c3fSmrg stage == MESA_SHADER_TESS_CTRL || 15801e04c3fSmrg stage == MESA_SHADER_TESS_EVAL; 15901e04c3fSmrg 16001e04c3fSmrg if (var->data.mode == nir_var_shader_out) 1617ec681f3Smrg return stage == MESA_SHADER_TESS_CTRL || 1627ec681f3Smrg stage == MESA_SHADER_MESH; 16301e04c3fSmrg 16401e04c3fSmrg return false; 16501e04c3fSmrg} 16601e04c3fSmrg 1677ec681f3Smrgstatic unsigned get_number_of_slots(struct lower_io_state *state, 1687ec681f3Smrg const nir_variable *var) 1697ec681f3Smrg{ 1707ec681f3Smrg const struct glsl_type *type = var->type; 1717ec681f3Smrg 1727ec681f3Smrg if (nir_is_arrayed_io(var, state->builder.shader->info.stage)) { 1737ec681f3Smrg assert(glsl_type_is_array(type)); 1747ec681f3Smrg type = glsl_get_array_element(type); 1757ec681f3Smrg } 1767ec681f3Smrg 1777ec681f3Smrg return state->type_size(type, var->data.bindless); 1787ec681f3Smrg} 1797ec681f3Smrg 18001e04c3fSmrgstatic nir_ssa_def * 18101e04c3fSmrgget_io_offset(nir_builder *b, nir_deref_instr *deref, 1827ec681f3Smrg nir_ssa_def **array_index, 1837e102996Smaya int (*type_size)(const struct glsl_type *, bool), 1847e102996Smaya unsigned *component, bool bts) 18501e04c3fSmrg{ 18601e04c3fSmrg nir_deref_path path; 18701e04c3fSmrg nir_deref_path_init(&path, deref, NULL); 18801e04c3fSmrg 18901e04c3fSmrg assert(path.path[0]->deref_type == nir_deref_type_var); 19001e04c3fSmrg nir_deref_instr **p = &path.path[1]; 19101e04c3fSmrg 1927ec681f3Smrg /* For arrayed I/O (e.g., per-vertex input arrays in geometry shader 1937ec681f3Smrg * inputs), skip the outermost array index. Process the rest normally. 19401e04c3fSmrg */ 1957ec681f3Smrg if (array_index != NULL) { 19601e04c3fSmrg assert((*p)->deref_type == nir_deref_type_array); 1977ec681f3Smrg *array_index = nir_ssa_for_src(b, (*p)->arr.index, 1); 19801e04c3fSmrg p++; 19901e04c3fSmrg } 20001e04c3fSmrg 20101e04c3fSmrg if (path.path[0]->var->data.compact) { 20201e04c3fSmrg assert((*p)->deref_type == nir_deref_type_array); 20301e04c3fSmrg assert(glsl_type_is_scalar((*p)->type)); 20401e04c3fSmrg 20501e04c3fSmrg /* We always lower indirect dereferences for "compact" array vars. */ 20601e04c3fSmrg const unsigned index = nir_src_as_uint((*p)->arr.index); 20701e04c3fSmrg const unsigned total_offset = *component + index; 20801e04c3fSmrg const unsigned slot_offset = total_offset / 4; 20901e04c3fSmrg *component = total_offset % 4; 2107e102996Smaya return nir_imm_int(b, type_size(glsl_vec4_type(), bts) * slot_offset); 21101e04c3fSmrg } 21201e04c3fSmrg 21301e04c3fSmrg /* Just emit code and let constant-folding go to town */ 21401e04c3fSmrg nir_ssa_def *offset = nir_imm_int(b, 0); 21501e04c3fSmrg 21601e04c3fSmrg for (; *p; p++) { 21701e04c3fSmrg if ((*p)->deref_type == nir_deref_type_array) { 2187e102996Smaya unsigned size = type_size((*p)->type, bts); 21901e04c3fSmrg 22001e04c3fSmrg nir_ssa_def *mul = 2217ec681f3Smrg nir_amul_imm(b, nir_ssa_for_src(b, (*p)->arr.index, 1), size); 22201e04c3fSmrg 22301e04c3fSmrg offset = nir_iadd(b, offset, mul); 22401e04c3fSmrg } else if ((*p)->deref_type == nir_deref_type_struct) { 22501e04c3fSmrg /* p starts at path[1], so this is safe */ 22601e04c3fSmrg nir_deref_instr *parent = *(p - 1); 22701e04c3fSmrg 22801e04c3fSmrg unsigned field_offset = 0; 22901e04c3fSmrg for (unsigned i = 0; i < (*p)->strct.index; i++) { 2307e102996Smaya field_offset += type_size(glsl_get_struct_field(parent->type, i), bts); 23101e04c3fSmrg } 2327e102996Smaya offset = nir_iadd_imm(b, offset, field_offset); 23301e04c3fSmrg } else { 23401e04c3fSmrg unreachable("Unsupported deref type"); 23501e04c3fSmrg } 23601e04c3fSmrg } 23701e04c3fSmrg 23801e04c3fSmrg nir_deref_path_finish(&path); 23901e04c3fSmrg 24001e04c3fSmrg return offset; 24101e04c3fSmrg} 24201e04c3fSmrg 2437ec681f3Smrgstatic nir_ssa_def * 2447ec681f3Smrgemit_load(struct lower_io_state *state, 2457ec681f3Smrg nir_ssa_def *array_index, nir_variable *var, nir_ssa_def *offset, 2467ec681f3Smrg unsigned component, unsigned num_components, unsigned bit_size, 2477ec681f3Smrg nir_alu_type dest_type) 24801e04c3fSmrg{ 2497ec681f3Smrg nir_builder *b = &state->builder; 2507ec681f3Smrg const nir_shader *nir = b->shader; 25101e04c3fSmrg nir_variable_mode mode = var->data.mode; 25201e04c3fSmrg nir_ssa_def *barycentric = NULL; 25301e04c3fSmrg 25401e04c3fSmrg nir_intrinsic_op op; 25501e04c3fSmrg switch (mode) { 25601e04c3fSmrg case nir_var_shader_in: 25701e04c3fSmrg if (nir->info.stage == MESA_SHADER_FRAGMENT && 25801e04c3fSmrg nir->options->use_interpolated_input_intrinsics && 2597ec681f3Smrg var->data.interpolation != INTERP_MODE_FLAT && 2607ec681f3Smrg !var->data.per_primitive) { 2617ec681f3Smrg if (var->data.interpolation == INTERP_MODE_EXPLICIT) { 2627ec681f3Smrg assert(array_index != NULL); 2637ec681f3Smrg op = nir_intrinsic_load_input_vertex; 2647ec681f3Smrg } else { 2657ec681f3Smrg assert(array_index == NULL); 2667ec681f3Smrg 2677ec681f3Smrg nir_intrinsic_op bary_op; 2687ec681f3Smrg if (var->data.sample || 2697ec681f3Smrg (state->options & nir_lower_io_force_sample_interpolation)) 2707ec681f3Smrg bary_op = nir_intrinsic_load_barycentric_sample; 2717ec681f3Smrg else if (var->data.centroid) 2727ec681f3Smrg bary_op = nir_intrinsic_load_barycentric_centroid; 2737ec681f3Smrg else 2747ec681f3Smrg bary_op = nir_intrinsic_load_barycentric_pixel; 2757ec681f3Smrg 2767ec681f3Smrg barycentric = nir_load_barycentric(&state->builder, bary_op, 2777ec681f3Smrg var->data.interpolation); 2787ec681f3Smrg op = nir_intrinsic_load_interpolated_input; 2797ec681f3Smrg } 28001e04c3fSmrg } else { 2817ec681f3Smrg op = array_index ? nir_intrinsic_load_per_vertex_input : 2827ec681f3Smrg nir_intrinsic_load_input; 28301e04c3fSmrg } 28401e04c3fSmrg break; 28501e04c3fSmrg case nir_var_shader_out: 2867ec681f3Smrg op = !array_index ? nir_intrinsic_load_output : 2877ec681f3Smrg var->data.per_primitive ? nir_intrinsic_load_per_primitive_output : 2887ec681f3Smrg nir_intrinsic_load_per_vertex_output; 28901e04c3fSmrg break; 29001e04c3fSmrg case nir_var_uniform: 29101e04c3fSmrg op = nir_intrinsic_load_uniform; 29201e04c3fSmrg break; 29301e04c3fSmrg default: 29401e04c3fSmrg unreachable("Unknown variable mode"); 29501e04c3fSmrg } 29601e04c3fSmrg 29701e04c3fSmrg nir_intrinsic_instr *load = 29801e04c3fSmrg nir_intrinsic_instr_create(state->builder.shader, op); 2997ec681f3Smrg load->num_components = num_components; 30001e04c3fSmrg 30101e04c3fSmrg nir_intrinsic_set_base(load, var->data.driver_location); 30201e04c3fSmrg if (mode == nir_var_shader_in || mode == nir_var_shader_out) 30301e04c3fSmrg nir_intrinsic_set_component(load, component); 30401e04c3fSmrg 30501e04c3fSmrg if (load->intrinsic == nir_intrinsic_load_uniform) 3067e102996Smaya nir_intrinsic_set_range(load, 3077e102996Smaya state->type_size(var->type, var->data.bindless)); 30801e04c3fSmrg 3097ec681f3Smrg if (nir_intrinsic_has_access(load)) 3107ec681f3Smrg nir_intrinsic_set_access(load, var->data.access); 3117ec681f3Smrg 3127ec681f3Smrg nir_intrinsic_set_dest_type(load, dest_type); 3137ec681f3Smrg 3147ec681f3Smrg if (load->intrinsic != nir_intrinsic_load_uniform) { 3157ec681f3Smrg nir_io_semantics semantics = {0}; 3167ec681f3Smrg semantics.location = var->data.location; 3177ec681f3Smrg semantics.num_slots = get_number_of_slots(state, var); 3187ec681f3Smrg semantics.fb_fetch_output = var->data.fb_fetch_output; 3197ec681f3Smrg semantics.medium_precision = 3207ec681f3Smrg var->data.precision == GLSL_PRECISION_MEDIUM || 3217ec681f3Smrg var->data.precision == GLSL_PRECISION_LOW; 3227ec681f3Smrg nir_intrinsic_set_io_semantics(load, semantics); 3237ec681f3Smrg } 3247ec681f3Smrg 3257ec681f3Smrg if (array_index) { 3267ec681f3Smrg load->src[0] = nir_src_for_ssa(array_index); 32701e04c3fSmrg load->src[1] = nir_src_for_ssa(offset); 32801e04c3fSmrg } else if (barycentric) { 32901e04c3fSmrg load->src[0] = nir_src_for_ssa(barycentric); 33001e04c3fSmrg load->src[1] = nir_src_for_ssa(offset); 33101e04c3fSmrg } else { 33201e04c3fSmrg load->src[0] = nir_src_for_ssa(offset); 33301e04c3fSmrg } 33401e04c3fSmrg 3357ec681f3Smrg nir_ssa_dest_init(&load->instr, &load->dest, 3367ec681f3Smrg num_components, bit_size, NULL); 3377ec681f3Smrg nir_builder_instr_insert(b, &load->instr); 3387ec681f3Smrg 3397ec681f3Smrg return &load->dest.ssa; 34001e04c3fSmrg} 34101e04c3fSmrg 3427ec681f3Smrgstatic nir_ssa_def * 3437ec681f3Smrglower_load(nir_intrinsic_instr *intrin, struct lower_io_state *state, 3447ec681f3Smrg nir_ssa_def *array_index, nir_variable *var, nir_ssa_def *offset, 3457ec681f3Smrg unsigned component, const struct glsl_type *type) 34601e04c3fSmrg{ 3477ec681f3Smrg assert(intrin->dest.is_ssa); 3487ec681f3Smrg if (intrin->dest.ssa.bit_size == 64 && 3497ec681f3Smrg (state->options & nir_lower_io_lower_64bit_to_32)) { 3507ec681f3Smrg nir_builder *b = &state->builder; 3517ec681f3Smrg 3527ec681f3Smrg const unsigned slot_size = state->type_size(glsl_dvec_type(2), false); 3537ec681f3Smrg 3547ec681f3Smrg nir_ssa_def *comp64[4]; 3557ec681f3Smrg assert(component == 0 || component == 2); 3567ec681f3Smrg unsigned dest_comp = 0; 3577ec681f3Smrg while (dest_comp < intrin->dest.ssa.num_components) { 3587ec681f3Smrg const unsigned num_comps = 3597ec681f3Smrg MIN2(intrin->dest.ssa.num_components - dest_comp, 3607ec681f3Smrg (4 - component) / 2); 3617ec681f3Smrg 3627ec681f3Smrg nir_ssa_def *data32 = 3637ec681f3Smrg emit_load(state, array_index, var, offset, component, 3647ec681f3Smrg num_comps * 2, 32, nir_type_uint32); 3657ec681f3Smrg for (unsigned i = 0; i < num_comps; i++) { 3667ec681f3Smrg comp64[dest_comp + i] = 3677ec681f3Smrg nir_pack_64_2x32(b, nir_channels(b, data32, 3 << (i * 2))); 3687ec681f3Smrg } 36901e04c3fSmrg 3707ec681f3Smrg /* Only the first store has a component offset */ 3717ec681f3Smrg component = 0; 3727ec681f3Smrg dest_comp += num_comps; 3737ec681f3Smrg offset = nir_iadd_imm(b, offset, slot_size); 3747ec681f3Smrg } 3757ec681f3Smrg 3767ec681f3Smrg return nir_vec(b, comp64, intrin->dest.ssa.num_components); 3777ec681f3Smrg } else if (intrin->dest.ssa.bit_size == 1) { 3787ec681f3Smrg /* Booleans are 32-bit */ 3797ec681f3Smrg assert(glsl_type_is_boolean(type)); 3807ec681f3Smrg return nir_b2b1(&state->builder, 3817ec681f3Smrg emit_load(state, array_index, var, offset, component, 3827ec681f3Smrg intrin->dest.ssa.num_components, 32, 3837ec681f3Smrg nir_type_bool32)); 38401e04c3fSmrg } else { 3857ec681f3Smrg return emit_load(state, array_index, var, offset, component, 3867ec681f3Smrg intrin->dest.ssa.num_components, 3877ec681f3Smrg intrin->dest.ssa.bit_size, 3887ec681f3Smrg nir_get_nir_type_for_glsl_type(type)); 38901e04c3fSmrg } 3907ec681f3Smrg} 3917ec681f3Smrg 3927ec681f3Smrgstatic void 3937ec681f3Smrgemit_store(struct lower_io_state *state, nir_ssa_def *data, 3947ec681f3Smrg nir_ssa_def *array_index, nir_variable *var, nir_ssa_def *offset, 3957ec681f3Smrg unsigned component, unsigned num_components, 3967ec681f3Smrg nir_component_mask_t write_mask, nir_alu_type src_type) 3977ec681f3Smrg{ 3987ec681f3Smrg nir_builder *b = &state->builder; 3997ec681f3Smrg 4007ec681f3Smrg assert(var->data.mode == nir_var_shader_out); 4017ec681f3Smrg nir_intrinsic_op op = 4027ec681f3Smrg !array_index ? nir_intrinsic_store_output : 4037ec681f3Smrg var->data.per_primitive ? nir_intrinsic_store_per_primitive_output : 4047ec681f3Smrg nir_intrinsic_store_per_vertex_output; 40501e04c3fSmrg 40601e04c3fSmrg nir_intrinsic_instr *store = 40701e04c3fSmrg nir_intrinsic_instr_create(state->builder.shader, op); 4087ec681f3Smrg store->num_components = num_components; 40901e04c3fSmrg 4107ec681f3Smrg store->src[0] = nir_src_for_ssa(data); 41101e04c3fSmrg 41201e04c3fSmrg nir_intrinsic_set_base(store, var->data.driver_location); 4137ec681f3Smrg nir_intrinsic_set_component(store, component); 4147ec681f3Smrg nir_intrinsic_set_src_type(store, src_type); 4157ec681f3Smrg 4167ec681f3Smrg nir_intrinsic_set_write_mask(store, write_mask); 41701e04c3fSmrg 4187ec681f3Smrg if (nir_intrinsic_has_access(store)) 4197ec681f3Smrg nir_intrinsic_set_access(store, var->data.access); 42001e04c3fSmrg 4217ec681f3Smrg if (array_index) 4227ec681f3Smrg store->src[1] = nir_src_for_ssa(array_index); 42301e04c3fSmrg 4247ec681f3Smrg store->src[array_index ? 2 : 1] = nir_src_for_ssa(offset); 42501e04c3fSmrg 4267ec681f3Smrg unsigned gs_streams = 0; 4277ec681f3Smrg if (state->builder.shader->info.stage == MESA_SHADER_GEOMETRY) { 4287ec681f3Smrg if (var->data.stream & NIR_STREAM_PACKED) { 4297ec681f3Smrg gs_streams = var->data.stream & ~NIR_STREAM_PACKED; 4307ec681f3Smrg } else { 4317ec681f3Smrg assert(var->data.stream < 4); 4327ec681f3Smrg gs_streams = 0; 4337ec681f3Smrg for (unsigned i = 0; i < num_components; ++i) 4347ec681f3Smrg gs_streams |= var->data.stream << (2 * i); 4357ec681f3Smrg } 4367ec681f3Smrg } 43701e04c3fSmrg 4387ec681f3Smrg nir_io_semantics semantics = {0}; 4397ec681f3Smrg semantics.location = var->data.location; 4407ec681f3Smrg semantics.num_slots = get_number_of_slots(state, var); 4417ec681f3Smrg semantics.dual_source_blend_index = var->data.index; 4427ec681f3Smrg semantics.gs_streams = gs_streams; 4437ec681f3Smrg semantics.medium_precision = 4447ec681f3Smrg var->data.precision == GLSL_PRECISION_MEDIUM || 4457ec681f3Smrg var->data.precision == GLSL_PRECISION_LOW; 4467ec681f3Smrg semantics.per_view = var->data.per_view; 4477ec681f3Smrg nir_intrinsic_set_io_semantics(store, semantics); 4487ec681f3Smrg 4497ec681f3Smrg nir_builder_instr_insert(b, &store->instr); 45001e04c3fSmrg} 45101e04c3fSmrg 4527ec681f3Smrgstatic void 4537ec681f3Smrglower_store(nir_intrinsic_instr *intrin, struct lower_io_state *state, 4547ec681f3Smrg nir_ssa_def *array_index, nir_variable *var, nir_ssa_def *offset, 4557ec681f3Smrg unsigned component, const struct glsl_type *type) 45601e04c3fSmrg{ 4577ec681f3Smrg assert(intrin->src[1].is_ssa); 4587ec681f3Smrg if (intrin->src[1].ssa->bit_size == 64 && 4597ec681f3Smrg (state->options & nir_lower_io_lower_64bit_to_32)) { 4607ec681f3Smrg nir_builder *b = &state->builder; 46101e04c3fSmrg 4627ec681f3Smrg const unsigned slot_size = state->type_size(glsl_dvec_type(2), false); 46301e04c3fSmrg 4647ec681f3Smrg assert(component == 0 || component == 2); 4657ec681f3Smrg unsigned src_comp = 0; 4667ec681f3Smrg nir_component_mask_t write_mask = nir_intrinsic_write_mask(intrin); 4677ec681f3Smrg while (src_comp < intrin->num_components) { 4687ec681f3Smrg const unsigned num_comps = 4697ec681f3Smrg MIN2(intrin->num_components - src_comp, 4707ec681f3Smrg (4 - component) / 2); 4717ec681f3Smrg 4727ec681f3Smrg if (write_mask & BITFIELD_MASK(num_comps)) { 4737ec681f3Smrg nir_ssa_def *data = 4747ec681f3Smrg nir_channels(b, intrin->src[1].ssa, 4757ec681f3Smrg BITFIELD_RANGE(src_comp, num_comps)); 4767ec681f3Smrg nir_ssa_def *data32 = nir_bitcast_vector(b, data, 32); 4777ec681f3Smrg 4787ec681f3Smrg nir_component_mask_t write_mask32 = 0; 4797ec681f3Smrg for (unsigned i = 0; i < num_comps; i++) { 4807ec681f3Smrg if (write_mask & BITFIELD_MASK(num_comps) & (1 << i)) 4817ec681f3Smrg write_mask32 |= 3 << (i * 2); 4827ec681f3Smrg } 48301e04c3fSmrg 4847ec681f3Smrg emit_store(state, data32, array_index, var, offset, 4857ec681f3Smrg component, data32->num_components, write_mask32, 4867ec681f3Smrg nir_type_uint32); 4877ec681f3Smrg } 48801e04c3fSmrg 4897ec681f3Smrg /* Only the first store has a component offset */ 4907ec681f3Smrg component = 0; 4917ec681f3Smrg src_comp += num_comps; 4927ec681f3Smrg write_mask >>= num_comps; 4937ec681f3Smrg offset = nir_iadd_imm(b, offset, slot_size); 4947ec681f3Smrg } 4957ec681f3Smrg } else if (intrin->dest.ssa.bit_size == 1) { 4967ec681f3Smrg /* Booleans are 32-bit */ 4977ec681f3Smrg assert(glsl_type_is_boolean(type)); 4987ec681f3Smrg nir_ssa_def *b32_val = nir_b2b32(&state->builder, intrin->src[1].ssa); 4997ec681f3Smrg emit_store(state, b32_val, array_index, var, offset, 5007ec681f3Smrg component, intrin->num_components, 5017ec681f3Smrg nir_intrinsic_write_mask(intrin), 5027ec681f3Smrg nir_type_bool32); 5037ec681f3Smrg } else { 5047ec681f3Smrg emit_store(state, intrin->src[1].ssa, array_index, var, offset, 5057ec681f3Smrg component, intrin->num_components, 5067ec681f3Smrg nir_intrinsic_write_mask(intrin), 5077ec681f3Smrg nir_get_nir_type_for_glsl_type(type)); 50801e04c3fSmrg } 50901e04c3fSmrg} 51001e04c3fSmrg 5117ec681f3Smrgstatic nir_ssa_def * 51201e04c3fSmrglower_interpolate_at(nir_intrinsic_instr *intrin, struct lower_io_state *state, 5137ec681f3Smrg nir_variable *var, nir_ssa_def *offset, unsigned component, 5147ec681f3Smrg const struct glsl_type *type) 51501e04c3fSmrg{ 5167ec681f3Smrg nir_builder *b = &state->builder; 51701e04c3fSmrg assert(var->data.mode == nir_var_shader_in); 51801e04c3fSmrg 5197ec681f3Smrg /* Ignore interpolateAt() for flat variables - flat is flat. Lower 5207ec681f3Smrg * interpolateAtVertex() for explicit variables. 5217ec681f3Smrg */ 5227ec681f3Smrg if (var->data.interpolation == INTERP_MODE_FLAT || 5237ec681f3Smrg var->data.interpolation == INTERP_MODE_EXPLICIT) { 5247ec681f3Smrg nir_ssa_def *vertex_index = NULL; 5257ec681f3Smrg 5267ec681f3Smrg if (var->data.interpolation == INTERP_MODE_EXPLICIT) { 5277ec681f3Smrg assert(intrin->intrinsic == nir_intrinsic_interp_deref_at_vertex); 5287ec681f3Smrg vertex_index = intrin->src[1].ssa; 5297ec681f3Smrg } 5307ec681f3Smrg 5317ec681f3Smrg return lower_load(intrin, state, vertex_index, var, offset, component, type); 5327ec681f3Smrg } 5337ec681f3Smrg 5347ec681f3Smrg /* None of the supported APIs allow interpolation on 64-bit things */ 5357ec681f3Smrg assert(intrin->dest.is_ssa && intrin->dest.ssa.bit_size <= 32); 53601e04c3fSmrg 53701e04c3fSmrg nir_intrinsic_op bary_op; 53801e04c3fSmrg switch (intrin->intrinsic) { 53901e04c3fSmrg case nir_intrinsic_interp_deref_at_centroid: 54001e04c3fSmrg bary_op = (state->options & nir_lower_io_force_sample_interpolation) ? 54101e04c3fSmrg nir_intrinsic_load_barycentric_sample : 54201e04c3fSmrg nir_intrinsic_load_barycentric_centroid; 54301e04c3fSmrg break; 54401e04c3fSmrg case nir_intrinsic_interp_deref_at_sample: 54501e04c3fSmrg bary_op = nir_intrinsic_load_barycentric_at_sample; 54601e04c3fSmrg break; 54701e04c3fSmrg case nir_intrinsic_interp_deref_at_offset: 54801e04c3fSmrg bary_op = nir_intrinsic_load_barycentric_at_offset; 54901e04c3fSmrg break; 55001e04c3fSmrg default: 55101e04c3fSmrg unreachable("Bogus interpolateAt() intrinsic."); 55201e04c3fSmrg } 55301e04c3fSmrg 55401e04c3fSmrg nir_intrinsic_instr *bary_setup = 55501e04c3fSmrg nir_intrinsic_instr_create(state->builder.shader, bary_op); 55601e04c3fSmrg 55701e04c3fSmrg nir_ssa_dest_init(&bary_setup->instr, &bary_setup->dest, 2, 32, NULL); 55801e04c3fSmrg nir_intrinsic_set_interp_mode(bary_setup, var->data.interpolation); 55901e04c3fSmrg 56001e04c3fSmrg if (intrin->intrinsic == nir_intrinsic_interp_deref_at_sample || 5617ec681f3Smrg intrin->intrinsic == nir_intrinsic_interp_deref_at_offset || 5627ec681f3Smrg intrin->intrinsic == nir_intrinsic_interp_deref_at_vertex) 5637ec681f3Smrg nir_src_copy(&bary_setup->src[0], &intrin->src[1]); 56401e04c3fSmrg 5657ec681f3Smrg nir_builder_instr_insert(b, &bary_setup->instr); 56601e04c3fSmrg 5677ec681f3Smrg nir_io_semantics semantics = {0}; 5687ec681f3Smrg semantics.location = var->data.location; 5697ec681f3Smrg semantics.num_slots = get_number_of_slots(state, var); 5707ec681f3Smrg semantics.medium_precision = 5717ec681f3Smrg var->data.precision == GLSL_PRECISION_MEDIUM || 5727ec681f3Smrg var->data.precision == GLSL_PRECISION_LOW; 57301e04c3fSmrg 5747ec681f3Smrg assert(intrin->dest.is_ssa); 5757ec681f3Smrg nir_ssa_def *load = 5767ec681f3Smrg nir_load_interpolated_input(&state->builder, 5777ec681f3Smrg intrin->dest.ssa.num_components, 5787ec681f3Smrg intrin->dest.ssa.bit_size, 5797ec681f3Smrg &bary_setup->dest.ssa, 5807ec681f3Smrg offset, 5817ec681f3Smrg .base = var->data.driver_location, 5827ec681f3Smrg .component = component, 5837ec681f3Smrg .io_semantics = semantics); 58401e04c3fSmrg 58501e04c3fSmrg return load; 58601e04c3fSmrg} 58701e04c3fSmrg 58801e04c3fSmrgstatic bool 58901e04c3fSmrgnir_lower_io_block(nir_block *block, 59001e04c3fSmrg struct lower_io_state *state) 59101e04c3fSmrg{ 59201e04c3fSmrg nir_builder *b = &state->builder; 59301e04c3fSmrg const nir_shader_compiler_options *options = b->shader->options; 59401e04c3fSmrg bool progress = false; 59501e04c3fSmrg 59601e04c3fSmrg nir_foreach_instr_safe(instr, block) { 59701e04c3fSmrg if (instr->type != nir_instr_type_intrinsic) 59801e04c3fSmrg continue; 59901e04c3fSmrg 60001e04c3fSmrg nir_intrinsic_instr *intrin = nir_instr_as_intrinsic(instr); 60101e04c3fSmrg 60201e04c3fSmrg switch (intrin->intrinsic) { 60301e04c3fSmrg case nir_intrinsic_load_deref: 60401e04c3fSmrg case nir_intrinsic_store_deref: 60501e04c3fSmrg /* We can lower the io for this nir instrinsic */ 60601e04c3fSmrg break; 60701e04c3fSmrg case nir_intrinsic_interp_deref_at_centroid: 60801e04c3fSmrg case nir_intrinsic_interp_deref_at_sample: 60901e04c3fSmrg case nir_intrinsic_interp_deref_at_offset: 6107ec681f3Smrg case nir_intrinsic_interp_deref_at_vertex: 61101e04c3fSmrg /* We can optionally lower these to load_interpolated_input */ 6127ec681f3Smrg if (options->use_interpolated_input_intrinsics || 6137ec681f3Smrg options->lower_interpolate_at) 61401e04c3fSmrg break; 6157ec681f3Smrg FALLTHROUGH; 61601e04c3fSmrg default: 61701e04c3fSmrg /* We can't lower the io for this nir instrinsic, so skip it */ 61801e04c3fSmrg continue; 61901e04c3fSmrg } 62001e04c3fSmrg 62101e04c3fSmrg nir_deref_instr *deref = nir_src_as_deref(intrin->src[0]); 6227ec681f3Smrg if (!nir_deref_mode_is_one_of(deref, state->modes)) 62301e04c3fSmrg continue; 62401e04c3fSmrg 6257ec681f3Smrg nir_variable *var = nir_deref_instr_get_variable(deref); 62601e04c3fSmrg 62701e04c3fSmrg b->cursor = nir_before_instr(instr); 62801e04c3fSmrg 6297ec681f3Smrg const bool is_arrayed = nir_is_arrayed_io(var, b->shader->info.stage); 63001e04c3fSmrg 63101e04c3fSmrg nir_ssa_def *offset; 6327ec681f3Smrg nir_ssa_def *array_index = NULL; 63301e04c3fSmrg unsigned component_offset = var->data.location_frac; 6347ec681f3Smrg bool bindless_type_size = var->data.mode == nir_var_shader_in || 6357ec681f3Smrg var->data.mode == nir_var_shader_out || 6367e102996Smaya var->data.bindless; 63701e04c3fSmrg 6387ec681f3Smrg if (nir_deref_instr_is_known_out_of_bounds(deref)) { 6397ec681f3Smrg /* Section 5.11 (Out-of-Bounds Accesses) of the GLSL 4.60 spec says: 6407ec681f3Smrg * 6417ec681f3Smrg * In the subsections described above for array, vector, matrix and 6427ec681f3Smrg * structure accesses, any out-of-bounds access produced undefined 6437ec681f3Smrg * behavior.... 6447ec681f3Smrg * Out-of-bounds reads return undefined values, which 6457ec681f3Smrg * include values from other variables of the active program or zero. 6467ec681f3Smrg * Out-of-bounds writes may be discarded or overwrite 6477ec681f3Smrg * other variables of the active program. 6487ec681f3Smrg * 6497ec681f3Smrg * GL_KHR_robustness and GL_ARB_robustness encourage us to return zero 6507ec681f3Smrg * for reads. 6517ec681f3Smrg * 6527ec681f3Smrg * Otherwise get_io_offset would return out-of-bound offset which may 6537ec681f3Smrg * result in out-of-bound loading/storing of inputs/outputs, 6547ec681f3Smrg * that could cause issues in drivers down the line. 6557ec681f3Smrg */ 6567ec681f3Smrg if (intrin->intrinsic != nir_intrinsic_store_deref) { 6577ec681f3Smrg nir_ssa_def *zero = 6587ec681f3Smrg nir_imm_zero(b, intrin->dest.ssa.num_components, 6597ec681f3Smrg intrin->dest.ssa.bit_size); 6607ec681f3Smrg nir_ssa_def_rewrite_uses(&intrin->dest.ssa, 6617ec681f3Smrg zero); 6627ec681f3Smrg } 6637ec681f3Smrg 6647ec681f3Smrg nir_instr_remove(&intrin->instr); 6657ec681f3Smrg progress = true; 6667ec681f3Smrg continue; 6677ec681f3Smrg } 6687ec681f3Smrg 6697ec681f3Smrg offset = get_io_offset(b, deref, is_arrayed ? &array_index : NULL, 6707e102996Smaya state->type_size, &component_offset, 6717e102996Smaya bindless_type_size); 67201e04c3fSmrg 6737ec681f3Smrg nir_ssa_def *replacement = NULL; 67401e04c3fSmrg 67501e04c3fSmrg switch (intrin->intrinsic) { 67601e04c3fSmrg case nir_intrinsic_load_deref: 6777ec681f3Smrg replacement = lower_load(intrin, state, array_index, var, offset, 6787ec681f3Smrg component_offset, deref->type); 67901e04c3fSmrg break; 68001e04c3fSmrg 68101e04c3fSmrg case nir_intrinsic_store_deref: 6827ec681f3Smrg lower_store(intrin, state, array_index, var, offset, 6837ec681f3Smrg component_offset, deref->type); 68401e04c3fSmrg break; 68501e04c3fSmrg 68601e04c3fSmrg case nir_intrinsic_interp_deref_at_centroid: 68701e04c3fSmrg case nir_intrinsic_interp_deref_at_sample: 68801e04c3fSmrg case nir_intrinsic_interp_deref_at_offset: 6897ec681f3Smrg case nir_intrinsic_interp_deref_at_vertex: 6907ec681f3Smrg assert(array_index == NULL); 69101e04c3fSmrg replacement = lower_interpolate_at(intrin, state, var, offset, 6927ec681f3Smrg component_offset, deref->type); 69301e04c3fSmrg break; 69401e04c3fSmrg 69501e04c3fSmrg default: 69601e04c3fSmrg continue; 69701e04c3fSmrg } 69801e04c3fSmrg 6997ec681f3Smrg if (replacement) { 7007ec681f3Smrg nir_ssa_def_rewrite_uses(&intrin->dest.ssa, 7017ec681f3Smrg replacement); 70201e04c3fSmrg } 70301e04c3fSmrg nir_instr_remove(&intrin->instr); 70401e04c3fSmrg progress = true; 70501e04c3fSmrg } 70601e04c3fSmrg 70701e04c3fSmrg return progress; 70801e04c3fSmrg} 70901e04c3fSmrg 71001e04c3fSmrgstatic bool 71101e04c3fSmrgnir_lower_io_impl(nir_function_impl *impl, 71201e04c3fSmrg nir_variable_mode modes, 7137e102996Smaya int (*type_size)(const struct glsl_type *, bool), 71401e04c3fSmrg nir_lower_io_options options) 71501e04c3fSmrg{ 71601e04c3fSmrg struct lower_io_state state; 71701e04c3fSmrg bool progress = false; 71801e04c3fSmrg 71901e04c3fSmrg nir_builder_init(&state.builder, impl); 72001e04c3fSmrg state.dead_ctx = ralloc_context(NULL); 72101e04c3fSmrg state.modes = modes; 72201e04c3fSmrg state.type_size = type_size; 72301e04c3fSmrg state.options = options; 72401e04c3fSmrg 7257ec681f3Smrg ASSERTED nir_variable_mode supported_modes = 7267ec681f3Smrg nir_var_shader_in | nir_var_shader_out | nir_var_uniform; 7277ec681f3Smrg assert(!(modes & ~supported_modes)); 7287ec681f3Smrg 72901e04c3fSmrg nir_foreach_block(block, impl) { 73001e04c3fSmrg progress |= nir_lower_io_block(block, &state); 73101e04c3fSmrg } 73201e04c3fSmrg 73301e04c3fSmrg ralloc_free(state.dead_ctx); 73401e04c3fSmrg 7357ec681f3Smrg nir_metadata_preserve(impl, nir_metadata_none); 7367ec681f3Smrg 73701e04c3fSmrg return progress; 73801e04c3fSmrg} 73901e04c3fSmrg 7407ec681f3Smrg/** Lower load/store_deref intrinsics on I/O variables to offset-based intrinsics 7417ec681f3Smrg * 7427ec681f3Smrg * This pass is intended to be used for cross-stage shader I/O and driver- 7437ec681f3Smrg * managed uniforms to turn deref-based access into a simpler model using 7447ec681f3Smrg * locations or offsets. For fragment shader inputs, it can optionally turn 7457ec681f3Smrg * load_deref into an explicit interpolation using barycentrics coming from 7467ec681f3Smrg * one of the load_barycentric_* intrinsics. This pass requires that all 7477ec681f3Smrg * deref chains are complete and contain no casts. 7487ec681f3Smrg */ 74901e04c3fSmrgbool 75001e04c3fSmrgnir_lower_io(nir_shader *shader, nir_variable_mode modes, 7517e102996Smaya int (*type_size)(const struct glsl_type *, bool), 75201e04c3fSmrg nir_lower_io_options options) 75301e04c3fSmrg{ 75401e04c3fSmrg bool progress = false; 75501e04c3fSmrg 75601e04c3fSmrg nir_foreach_function(function, shader) { 75701e04c3fSmrg if (function->impl) { 75801e04c3fSmrg progress |= nir_lower_io_impl(function->impl, modes, 75901e04c3fSmrg type_size, options); 76001e04c3fSmrg } 76101e04c3fSmrg } 76201e04c3fSmrg 76301e04c3fSmrg return progress; 76401e04c3fSmrg} 76501e04c3fSmrg 7667e102996Smayastatic unsigned 7677e102996Smayatype_scalar_size_bytes(const struct glsl_type *type) 7687e102996Smaya{ 7697e102996Smaya assert(glsl_type_is_vector_or_scalar(type) || 7707e102996Smaya glsl_type_is_matrix(type)); 7717e102996Smaya return glsl_type_is_boolean(type) ? 4 : glsl_get_bit_size(type) / 8; 7727e102996Smaya} 7737e102996Smaya 7747e102996Smayastatic nir_ssa_def * 7757e102996Smayabuild_addr_iadd(nir_builder *b, nir_ssa_def *addr, 7767ec681f3Smrg nir_address_format addr_format, 7777ec681f3Smrg nir_variable_mode modes, 7787ec681f3Smrg nir_ssa_def *offset) 7797e102996Smaya{ 7807e102996Smaya assert(offset->num_components == 1); 7817e102996Smaya 7827e102996Smaya switch (addr_format) { 7837e102996Smaya case nir_address_format_32bit_global: 7847e102996Smaya case nir_address_format_64bit_global: 7857ec681f3Smrg case nir_address_format_32bit_offset: 7867ec681f3Smrg assert(addr->bit_size == offset->bit_size); 7877e102996Smaya assert(addr->num_components == 1); 7887e102996Smaya return nir_iadd(b, addr, offset); 7897e102996Smaya 7907ec681f3Smrg case nir_address_format_32bit_offset_as_64bit: 7917ec681f3Smrg assert(addr->num_components == 1); 7927ec681f3Smrg assert(offset->bit_size == 32); 7937ec681f3Smrg return nir_u2u64(b, nir_iadd(b, nir_u2u32(b, addr), offset)); 7947ec681f3Smrg 7957ec681f3Smrg case nir_address_format_64bit_global_32bit_offset: 7967e102996Smaya case nir_address_format_64bit_bounded_global: 7977e102996Smaya assert(addr->num_components == 4); 7987ec681f3Smrg assert(addr->bit_size == offset->bit_size); 7997ec681f3Smrg return nir_vector_insert_imm(b, addr, nir_iadd(b, nir_channel(b, addr, 3), offset), 3); 8007e102996Smaya 8017e102996Smaya case nir_address_format_32bit_index_offset: 8027e102996Smaya assert(addr->num_components == 2); 8037ec681f3Smrg assert(addr->bit_size == offset->bit_size); 8047ec681f3Smrg return nir_vector_insert_imm(b, addr, nir_iadd(b, nir_channel(b, addr, 1), offset), 1); 8057ec681f3Smrg 8067ec681f3Smrg case nir_address_format_32bit_index_offset_pack64: 8077ec681f3Smrg assert(addr->num_components == 1); 8087ec681f3Smrg assert(offset->bit_size == 32); 8097ec681f3Smrg return nir_pack_64_2x32_split(b, 8107ec681f3Smrg nir_iadd(b, nir_unpack_64_2x32_split_x(b, addr), offset), 8117ec681f3Smrg nir_unpack_64_2x32_split_y(b, addr)); 8127ec681f3Smrg 8137ec681f3Smrg case nir_address_format_vec2_index_32bit_offset: 8147ec681f3Smrg assert(addr->num_components == 3); 8157ec681f3Smrg assert(offset->bit_size == 32); 8167ec681f3Smrg return nir_vector_insert_imm(b, addr, nir_iadd(b, nir_channel(b, addr, 2), offset), 2); 8177ec681f3Smrg 8187ec681f3Smrg case nir_address_format_62bit_generic: 8197ec681f3Smrg assert(addr->num_components == 1); 8207ec681f3Smrg assert(addr->bit_size == 64); 8217ec681f3Smrg assert(offset->bit_size == 64); 8227ec681f3Smrg if (!(modes & ~(nir_var_function_temp | 8237ec681f3Smrg nir_var_shader_temp | 8247ec681f3Smrg nir_var_mem_shared))) { 8257ec681f3Smrg /* If we're sure it's one of these modes, we can do an easy 32-bit 8267ec681f3Smrg * addition and don't need to bother with 64-bit math. 8277ec681f3Smrg */ 8287ec681f3Smrg nir_ssa_def *addr32 = nir_unpack_64_2x32_split_x(b, addr); 8297ec681f3Smrg nir_ssa_def *type = nir_unpack_64_2x32_split_y(b, addr); 8307ec681f3Smrg addr32 = nir_iadd(b, addr32, nir_u2u32(b, offset)); 8317ec681f3Smrg return nir_pack_64_2x32_split(b, addr32, type); 8327ec681f3Smrg } else { 8337ec681f3Smrg return nir_iadd(b, addr, offset); 8347ec681f3Smrg } 8357ec681f3Smrg 8367ec681f3Smrg case nir_address_format_logical: 8377ec681f3Smrg unreachable("Unsupported address format"); 8387e102996Smaya } 8397e102996Smaya unreachable("Invalid address format"); 8407e102996Smaya} 8417e102996Smaya 8427ec681f3Smrgstatic unsigned 8437ec681f3Smrgaddr_get_offset_bit_size(nir_ssa_def *addr, nir_address_format addr_format) 8447ec681f3Smrg{ 8457ec681f3Smrg if (addr_format == nir_address_format_32bit_offset_as_64bit || 8467ec681f3Smrg addr_format == nir_address_format_32bit_index_offset_pack64) 8477ec681f3Smrg return 32; 8487ec681f3Smrg return addr->bit_size; 8497ec681f3Smrg} 8507ec681f3Smrg 8517e102996Smayastatic nir_ssa_def * 8527e102996Smayabuild_addr_iadd_imm(nir_builder *b, nir_ssa_def *addr, 8537ec681f3Smrg nir_address_format addr_format, 8547ec681f3Smrg nir_variable_mode modes, 8557ec681f3Smrg int64_t offset) 8567ec681f3Smrg{ 8577ec681f3Smrg return build_addr_iadd(b, addr, addr_format, modes, 8587ec681f3Smrg nir_imm_intN_t(b, offset, 8597ec681f3Smrg addr_get_offset_bit_size(addr, addr_format))); 8607ec681f3Smrg} 8617ec681f3Smrg 8627ec681f3Smrgstatic nir_ssa_def * 8637ec681f3Smrgbuild_addr_for_var(nir_builder *b, nir_variable *var, 8647ec681f3Smrg nir_address_format addr_format) 8657ec681f3Smrg{ 8667ec681f3Smrg assert(var->data.mode & (nir_var_uniform | nir_var_mem_shared | 8677ec681f3Smrg nir_var_shader_temp | nir_var_function_temp | 8687ec681f3Smrg nir_var_mem_push_const | nir_var_mem_constant)); 8697ec681f3Smrg 8707ec681f3Smrg const unsigned num_comps = nir_address_format_num_components(addr_format); 8717ec681f3Smrg const unsigned bit_size = nir_address_format_bit_size(addr_format); 8727ec681f3Smrg 8737ec681f3Smrg switch (addr_format) { 8747ec681f3Smrg case nir_address_format_32bit_global: 8757ec681f3Smrg case nir_address_format_64bit_global: { 8767ec681f3Smrg nir_ssa_def *base_addr; 8777ec681f3Smrg switch (var->data.mode) { 8787ec681f3Smrg case nir_var_shader_temp: 8797ec681f3Smrg base_addr = nir_load_scratch_base_ptr(b, num_comps, bit_size, 0); 8807ec681f3Smrg break; 8817ec681f3Smrg 8827ec681f3Smrg case nir_var_function_temp: 8837ec681f3Smrg base_addr = nir_load_scratch_base_ptr(b, num_comps, bit_size, 1); 8847ec681f3Smrg break; 8857ec681f3Smrg 8867ec681f3Smrg case nir_var_mem_constant: 8877ec681f3Smrg base_addr = nir_load_constant_base_ptr(b, num_comps, bit_size); 8887ec681f3Smrg break; 8897ec681f3Smrg 8907ec681f3Smrg case nir_var_mem_shared: 8917ec681f3Smrg base_addr = nir_load_shared_base_ptr(b, num_comps, bit_size); 8927ec681f3Smrg break; 8937ec681f3Smrg 8947ec681f3Smrg default: 8957ec681f3Smrg unreachable("Unsupported variable mode"); 8967ec681f3Smrg } 8977ec681f3Smrg 8987ec681f3Smrg return build_addr_iadd_imm(b, base_addr, addr_format, var->data.mode, 8997ec681f3Smrg var->data.driver_location); 9007ec681f3Smrg } 9017ec681f3Smrg 9027ec681f3Smrg case nir_address_format_32bit_offset: 9037ec681f3Smrg assert(var->data.driver_location <= UINT32_MAX); 9047ec681f3Smrg return nir_imm_int(b, var->data.driver_location); 9057ec681f3Smrg 9067ec681f3Smrg case nir_address_format_32bit_offset_as_64bit: 9077ec681f3Smrg assert(var->data.driver_location <= UINT32_MAX); 9087ec681f3Smrg return nir_imm_int64(b, var->data.driver_location); 9097ec681f3Smrg 9107ec681f3Smrg case nir_address_format_62bit_generic: 9117ec681f3Smrg switch (var->data.mode) { 9127ec681f3Smrg case nir_var_shader_temp: 9137ec681f3Smrg case nir_var_function_temp: 9147ec681f3Smrg assert(var->data.driver_location <= UINT32_MAX); 9157ec681f3Smrg return nir_imm_intN_t(b, var->data.driver_location | 2ull << 62, 64); 9167ec681f3Smrg 9177ec681f3Smrg case nir_var_mem_shared: 9187ec681f3Smrg assert(var->data.driver_location <= UINT32_MAX); 9197ec681f3Smrg return nir_imm_intN_t(b, var->data.driver_location | 1ull << 62, 64); 9207ec681f3Smrg 9217ec681f3Smrg default: 9227ec681f3Smrg unreachable("Unsupported variable mode"); 9237ec681f3Smrg } 9247ec681f3Smrg 9257ec681f3Smrg default: 9267ec681f3Smrg unreachable("Unsupported address format"); 9277ec681f3Smrg } 9287ec681f3Smrg} 9297ec681f3Smrg 9307ec681f3Smrgstatic nir_ssa_def * 9317ec681f3Smrgbuild_runtime_addr_mode_check(nir_builder *b, nir_ssa_def *addr, 9327ec681f3Smrg nir_address_format addr_format, 9337ec681f3Smrg nir_variable_mode mode) 9347e102996Smaya{ 9357ec681f3Smrg /* The compile-time check failed; do a run-time check */ 9367ec681f3Smrg switch (addr_format) { 9377ec681f3Smrg case nir_address_format_62bit_generic: { 9387ec681f3Smrg assert(addr->num_components == 1); 9397ec681f3Smrg assert(addr->bit_size == 64); 9407ec681f3Smrg nir_ssa_def *mode_enum = nir_ushr(b, addr, nir_imm_int(b, 62)); 9417ec681f3Smrg switch (mode) { 9427ec681f3Smrg case nir_var_function_temp: 9437ec681f3Smrg case nir_var_shader_temp: 9447ec681f3Smrg return nir_ieq_imm(b, mode_enum, 0x2); 9457ec681f3Smrg 9467ec681f3Smrg case nir_var_mem_shared: 9477ec681f3Smrg return nir_ieq_imm(b, mode_enum, 0x1); 9487ec681f3Smrg 9497ec681f3Smrg case nir_var_mem_global: 9507ec681f3Smrg return nir_ior(b, nir_ieq_imm(b, mode_enum, 0x0), 9517ec681f3Smrg nir_ieq_imm(b, mode_enum, 0x3)); 9527ec681f3Smrg 9537ec681f3Smrg default: 9547ec681f3Smrg unreachable("Invalid mode check intrinsic"); 9557ec681f3Smrg } 9567ec681f3Smrg } 9577ec681f3Smrg 9587ec681f3Smrg default: 9597ec681f3Smrg unreachable("Unsupported address mode"); 9607ec681f3Smrg } 9617e102996Smaya} 9627e102996Smaya 9637e102996Smayastatic nir_ssa_def * 9647e102996Smayaaddr_to_index(nir_builder *b, nir_ssa_def *addr, 9657e102996Smaya nir_address_format addr_format) 9667e102996Smaya{ 9677ec681f3Smrg switch (addr_format) { 9687ec681f3Smrg case nir_address_format_32bit_index_offset: 9697ec681f3Smrg assert(addr->num_components == 2); 9707ec681f3Smrg return nir_channel(b, addr, 0); 9717ec681f3Smrg case nir_address_format_32bit_index_offset_pack64: 9727ec681f3Smrg return nir_unpack_64_2x32_split_y(b, addr); 9737ec681f3Smrg case nir_address_format_vec2_index_32bit_offset: 9747ec681f3Smrg assert(addr->num_components == 3); 9757ec681f3Smrg return nir_channels(b, addr, 0x3); 9767ec681f3Smrg default: unreachable("Invalid address format"); 9777ec681f3Smrg } 9787e102996Smaya} 9797e102996Smaya 9807e102996Smayastatic nir_ssa_def * 9817e102996Smayaaddr_to_offset(nir_builder *b, nir_ssa_def *addr, 9827e102996Smaya nir_address_format addr_format) 9837e102996Smaya{ 9847ec681f3Smrg switch (addr_format) { 9857ec681f3Smrg case nir_address_format_32bit_index_offset: 9867ec681f3Smrg assert(addr->num_components == 2); 9877ec681f3Smrg return nir_channel(b, addr, 1); 9887ec681f3Smrg case nir_address_format_32bit_index_offset_pack64: 9897ec681f3Smrg return nir_unpack_64_2x32_split_x(b, addr); 9907ec681f3Smrg case nir_address_format_vec2_index_32bit_offset: 9917ec681f3Smrg assert(addr->num_components == 3); 9927ec681f3Smrg return nir_channel(b, addr, 2); 9937ec681f3Smrg case nir_address_format_32bit_offset: 9947ec681f3Smrg return addr; 9957ec681f3Smrg case nir_address_format_32bit_offset_as_64bit: 9967ec681f3Smrg case nir_address_format_62bit_generic: 9977ec681f3Smrg return nir_u2u32(b, addr); 9987ec681f3Smrg default: 9997ec681f3Smrg unreachable("Invalid address format"); 10007ec681f3Smrg } 10017e102996Smaya} 10027e102996Smaya 10037e102996Smaya/** Returns true if the given address format resolves to a global address */ 10047e102996Smayastatic bool 10057ec681f3Smrgaddr_format_is_global(nir_address_format addr_format, 10067ec681f3Smrg nir_variable_mode mode) 10077e102996Smaya{ 10087ec681f3Smrg if (addr_format == nir_address_format_62bit_generic) 10097ec681f3Smrg return mode == nir_var_mem_global; 10107ec681f3Smrg 10117e102996Smaya return addr_format == nir_address_format_32bit_global || 10127e102996Smaya addr_format == nir_address_format_64bit_global || 10137ec681f3Smrg addr_format == nir_address_format_64bit_global_32bit_offset || 10147e102996Smaya addr_format == nir_address_format_64bit_bounded_global; 10157e102996Smaya} 10167e102996Smaya 10177ec681f3Smrgstatic bool 10187ec681f3Smrgaddr_format_is_offset(nir_address_format addr_format, 10197ec681f3Smrg nir_variable_mode mode) 10207ec681f3Smrg{ 10217ec681f3Smrg if (addr_format == nir_address_format_62bit_generic) 10227ec681f3Smrg return mode != nir_var_mem_global; 10237ec681f3Smrg 10247ec681f3Smrg return addr_format == nir_address_format_32bit_offset || 10257ec681f3Smrg addr_format == nir_address_format_32bit_offset_as_64bit; 10267ec681f3Smrg} 10277ec681f3Smrg 10287e102996Smayastatic nir_ssa_def * 10297e102996Smayaaddr_to_global(nir_builder *b, nir_ssa_def *addr, 10307e102996Smaya nir_address_format addr_format) 10317e102996Smaya{ 10327e102996Smaya switch (addr_format) { 10337e102996Smaya case nir_address_format_32bit_global: 10347e102996Smaya case nir_address_format_64bit_global: 10357ec681f3Smrg case nir_address_format_62bit_generic: 10367e102996Smaya assert(addr->num_components == 1); 10377e102996Smaya return addr; 10387e102996Smaya 10397ec681f3Smrg case nir_address_format_64bit_global_32bit_offset: 10407e102996Smaya case nir_address_format_64bit_bounded_global: 10417e102996Smaya assert(addr->num_components == 4); 10427e102996Smaya return nir_iadd(b, nir_pack_64_2x32(b, nir_channels(b, addr, 0x3)), 10437e102996Smaya nir_u2u64(b, nir_channel(b, addr, 3))); 10447e102996Smaya 10457e102996Smaya case nir_address_format_32bit_index_offset: 10467ec681f3Smrg case nir_address_format_32bit_index_offset_pack64: 10477ec681f3Smrg case nir_address_format_vec2_index_32bit_offset: 10487ec681f3Smrg case nir_address_format_32bit_offset: 10497ec681f3Smrg case nir_address_format_32bit_offset_as_64bit: 10507ec681f3Smrg case nir_address_format_logical: 10517e102996Smaya unreachable("Cannot get a 64-bit address with this address format"); 10527e102996Smaya } 10537e102996Smaya 10547e102996Smaya unreachable("Invalid address format"); 10557e102996Smaya} 10567e102996Smaya 10577e102996Smayastatic bool 10587e102996Smayaaddr_format_needs_bounds_check(nir_address_format addr_format) 10597e102996Smaya{ 10607e102996Smaya return addr_format == nir_address_format_64bit_bounded_global; 10617e102996Smaya} 10627e102996Smaya 10637e102996Smayastatic nir_ssa_def * 10647e102996Smayaaddr_is_in_bounds(nir_builder *b, nir_ssa_def *addr, 10657e102996Smaya nir_address_format addr_format, unsigned size) 10667e102996Smaya{ 10677e102996Smaya assert(addr_format == nir_address_format_64bit_bounded_global); 10687e102996Smaya assert(addr->num_components == 4); 10697e102996Smaya return nir_ige(b, nir_channel(b, addr, 2), 10707e102996Smaya nir_iadd_imm(b, nir_channel(b, addr, 3), size)); 10717e102996Smaya} 10727e102996Smaya 10737ec681f3Smrgstatic void 10747ec681f3Smrgnir_get_explicit_deref_range(nir_deref_instr *deref, 10757ec681f3Smrg nir_address_format addr_format, 10767ec681f3Smrg uint32_t *out_base, 10777ec681f3Smrg uint32_t *out_range) 10787e102996Smaya{ 10797ec681f3Smrg uint32_t base = 0; 10807ec681f3Smrg uint32_t range = glsl_get_explicit_size(deref->type, false); 10817e102996Smaya 10827ec681f3Smrg while (true) { 10837ec681f3Smrg nir_deref_instr *parent = nir_deref_instr_parent(deref); 10847e102996Smaya 10857ec681f3Smrg switch (deref->deref_type) { 10867ec681f3Smrg case nir_deref_type_array: 10877ec681f3Smrg case nir_deref_type_array_wildcard: 10887ec681f3Smrg case nir_deref_type_ptr_as_array: { 10897ec681f3Smrg const unsigned stride = nir_deref_instr_array_stride(deref); 10907ec681f3Smrg if (stride == 0) 10917ec681f3Smrg goto fail; 10927e102996Smaya 10937ec681f3Smrg if (!parent) 10947ec681f3Smrg goto fail; 10957e102996Smaya 10967ec681f3Smrg if (deref->deref_type != nir_deref_type_array_wildcard && 10977ec681f3Smrg nir_src_is_const(deref->arr.index)) { 10987ec681f3Smrg base += stride * nir_src_as_uint(deref->arr.index); 10997ec681f3Smrg } else { 11007ec681f3Smrg if (glsl_get_length(parent->type) == 0) 11017ec681f3Smrg goto fail; 11027ec681f3Smrg range += stride * (glsl_get_length(parent->type) - 1); 11037ec681f3Smrg } 11047ec681f3Smrg break; 11057ec681f3Smrg } 11067e102996Smaya 11077ec681f3Smrg case nir_deref_type_struct: { 11087ec681f3Smrg if (!parent) 11097ec681f3Smrg goto fail; 11107e102996Smaya 11117ec681f3Smrg base += glsl_get_struct_field_offset(parent->type, deref->strct.index); 11127ec681f3Smrg break; 11137ec681f3Smrg } 11147e102996Smaya 11157ec681f3Smrg case nir_deref_type_cast: { 11167ec681f3Smrg nir_instr *parent_instr = deref->parent.ssa->parent_instr; 11177e102996Smaya 11187ec681f3Smrg switch (parent_instr->type) { 11197ec681f3Smrg case nir_instr_type_load_const: { 11207ec681f3Smrg nir_load_const_instr *load = nir_instr_as_load_const(parent_instr); 11217e102996Smaya 11227ec681f3Smrg switch (addr_format) { 11237ec681f3Smrg case nir_address_format_32bit_offset: 11247ec681f3Smrg base += load->value[1].u32; 11257ec681f3Smrg break; 11267ec681f3Smrg case nir_address_format_32bit_index_offset: 11277ec681f3Smrg base += load->value[1].u32; 11287ec681f3Smrg break; 11297ec681f3Smrg case nir_address_format_vec2_index_32bit_offset: 11307ec681f3Smrg base += load->value[2].u32; 11317ec681f3Smrg break; 11327ec681f3Smrg default: 11337ec681f3Smrg goto fail; 11347ec681f3Smrg } 11357ec681f3Smrg 11367ec681f3Smrg *out_base = base; 11377ec681f3Smrg *out_range = range; 11387ec681f3Smrg return; 11397ec681f3Smrg } 11407ec681f3Smrg 11417ec681f3Smrg case nir_instr_type_intrinsic: { 11427ec681f3Smrg nir_intrinsic_instr *intr = nir_instr_as_intrinsic(parent_instr); 11437ec681f3Smrg switch (intr->intrinsic) { 11447ec681f3Smrg case nir_intrinsic_load_vulkan_descriptor: 11457ec681f3Smrg /* Assume that a load_vulkan_descriptor won't contribute to an 11467ec681f3Smrg * offset within the resource. 11477ec681f3Smrg */ 11487ec681f3Smrg break; 11497ec681f3Smrg default: 11507ec681f3Smrg goto fail; 11517ec681f3Smrg } 11527ec681f3Smrg 11537ec681f3Smrg *out_base = base; 11547ec681f3Smrg *out_range = range; 11557ec681f3Smrg return; 11567ec681f3Smrg } 11577ec681f3Smrg 11587ec681f3Smrg default: 11597ec681f3Smrg goto fail; 11607ec681f3Smrg } 11617ec681f3Smrg } 11627ec681f3Smrg 11637ec681f3Smrg default: 11647ec681f3Smrg goto fail; 11657ec681f3Smrg } 11667ec681f3Smrg 11677ec681f3Smrg deref = parent; 11687ec681f3Smrg } 11697ec681f3Smrg 11707ec681f3Smrgfail: 11717ec681f3Smrg *out_base = 0; 11727ec681f3Smrg *out_range = ~0; 11737ec681f3Smrg} 11747ec681f3Smrg 11757ec681f3Smrgstatic nir_variable_mode 11767ec681f3Smrgcanonicalize_generic_modes(nir_variable_mode modes) 11777ec681f3Smrg{ 11787ec681f3Smrg assert(modes != 0); 11797ec681f3Smrg if (util_bitcount(modes) == 1) 11807ec681f3Smrg return modes; 11817ec681f3Smrg 11827ec681f3Smrg assert(!(modes & ~(nir_var_function_temp | nir_var_shader_temp | 11837ec681f3Smrg nir_var_mem_shared | nir_var_mem_global))); 11847ec681f3Smrg 11857ec681f3Smrg /* Canonicalize by converting shader_temp to function_temp */ 11867ec681f3Smrg if (modes & nir_var_shader_temp) { 11877ec681f3Smrg modes &= ~nir_var_shader_temp; 11887ec681f3Smrg modes |= nir_var_function_temp; 11897ec681f3Smrg } 11907ec681f3Smrg 11917ec681f3Smrg return modes; 11927ec681f3Smrg} 11937ec681f3Smrg 11947ec681f3Smrgstatic nir_ssa_def * 11957ec681f3Smrgbuild_explicit_io_load(nir_builder *b, nir_intrinsic_instr *intrin, 11967ec681f3Smrg nir_ssa_def *addr, nir_address_format addr_format, 11977ec681f3Smrg nir_variable_mode modes, 11987ec681f3Smrg uint32_t align_mul, uint32_t align_offset, 11997ec681f3Smrg unsigned num_components) 12007ec681f3Smrg{ 12017ec681f3Smrg nir_deref_instr *deref = nir_src_as_deref(intrin->src[0]); 12027ec681f3Smrg modes = canonicalize_generic_modes(modes); 12037ec681f3Smrg 12047ec681f3Smrg if (util_bitcount(modes) > 1) { 12057ec681f3Smrg if (addr_format_is_global(addr_format, modes)) { 12067ec681f3Smrg return build_explicit_io_load(b, intrin, addr, addr_format, 12077ec681f3Smrg nir_var_mem_global, 12087ec681f3Smrg align_mul, align_offset, 12097ec681f3Smrg num_components); 12107ec681f3Smrg } else if (modes & nir_var_function_temp) { 12117ec681f3Smrg nir_push_if(b, build_runtime_addr_mode_check(b, addr, addr_format, 12127ec681f3Smrg nir_var_function_temp)); 12137ec681f3Smrg nir_ssa_def *res1 = 12147ec681f3Smrg build_explicit_io_load(b, intrin, addr, addr_format, 12157ec681f3Smrg nir_var_function_temp, 12167ec681f3Smrg align_mul, align_offset, 12177ec681f3Smrg num_components); 12187ec681f3Smrg nir_push_else(b, NULL); 12197ec681f3Smrg nir_ssa_def *res2 = 12207ec681f3Smrg build_explicit_io_load(b, intrin, addr, addr_format, 12217ec681f3Smrg modes & ~nir_var_function_temp, 12227ec681f3Smrg align_mul, align_offset, 12237ec681f3Smrg num_components); 12247ec681f3Smrg nir_pop_if(b, NULL); 12257ec681f3Smrg return nir_if_phi(b, res1, res2); 12267ec681f3Smrg } else { 12277ec681f3Smrg nir_push_if(b, build_runtime_addr_mode_check(b, addr, addr_format, 12287ec681f3Smrg nir_var_mem_shared)); 12297ec681f3Smrg assert(modes & nir_var_mem_shared); 12307ec681f3Smrg nir_ssa_def *res1 = 12317ec681f3Smrg build_explicit_io_load(b, intrin, addr, addr_format, 12327ec681f3Smrg nir_var_mem_shared, 12337ec681f3Smrg align_mul, align_offset, 12347ec681f3Smrg num_components); 12357ec681f3Smrg nir_push_else(b, NULL); 12367ec681f3Smrg assert(modes & nir_var_mem_global); 12377ec681f3Smrg nir_ssa_def *res2 = 12387ec681f3Smrg build_explicit_io_load(b, intrin, addr, addr_format, 12397ec681f3Smrg nir_var_mem_global, 12407ec681f3Smrg align_mul, align_offset, 12417ec681f3Smrg num_components); 12427ec681f3Smrg nir_pop_if(b, NULL); 12437ec681f3Smrg return nir_if_phi(b, res1, res2); 12447ec681f3Smrg } 12457ec681f3Smrg } 12467ec681f3Smrg 12477ec681f3Smrg assert(util_bitcount(modes) == 1); 12487ec681f3Smrg const nir_variable_mode mode = modes; 12497ec681f3Smrg 12507ec681f3Smrg nir_intrinsic_op op; 12517ec681f3Smrg switch (intrin->intrinsic) { 12527ec681f3Smrg case nir_intrinsic_load_deref: 12537ec681f3Smrg switch (mode) { 12547ec681f3Smrg case nir_var_mem_ubo: 12557ec681f3Smrg if (addr_format == nir_address_format_64bit_global_32bit_offset) 12567ec681f3Smrg op = nir_intrinsic_load_global_constant_offset; 12577ec681f3Smrg else if (addr_format == nir_address_format_64bit_bounded_global) 12587ec681f3Smrg op = nir_intrinsic_load_global_constant_bounded; 12597ec681f3Smrg else if (addr_format_is_global(addr_format, mode)) 12607ec681f3Smrg op = nir_intrinsic_load_global_constant; 12617ec681f3Smrg else 12627ec681f3Smrg op = nir_intrinsic_load_ubo; 12637ec681f3Smrg break; 12647ec681f3Smrg case nir_var_mem_ssbo: 12657ec681f3Smrg if (addr_format_is_global(addr_format, mode)) 12667ec681f3Smrg op = nir_intrinsic_load_global; 12677ec681f3Smrg else 12687ec681f3Smrg op = nir_intrinsic_load_ssbo; 12697ec681f3Smrg break; 12707ec681f3Smrg case nir_var_mem_global: 12717ec681f3Smrg assert(addr_format_is_global(addr_format, mode)); 12727ec681f3Smrg op = nir_intrinsic_load_global; 12737ec681f3Smrg break; 12747ec681f3Smrg case nir_var_uniform: 12757ec681f3Smrg assert(addr_format_is_offset(addr_format, mode)); 12767ec681f3Smrg assert(b->shader->info.stage == MESA_SHADER_KERNEL); 12777ec681f3Smrg op = nir_intrinsic_load_kernel_input; 12787ec681f3Smrg break; 12797ec681f3Smrg case nir_var_mem_shared: 12807ec681f3Smrg assert(addr_format_is_offset(addr_format, mode)); 12817ec681f3Smrg op = nir_intrinsic_load_shared; 12827ec681f3Smrg break; 12837ec681f3Smrg case nir_var_shader_temp: 12847ec681f3Smrg case nir_var_function_temp: 12857ec681f3Smrg if (addr_format_is_offset(addr_format, mode)) { 12867ec681f3Smrg op = nir_intrinsic_load_scratch; 12877ec681f3Smrg } else { 12887ec681f3Smrg assert(addr_format_is_global(addr_format, mode)); 12897ec681f3Smrg op = nir_intrinsic_load_global; 12907ec681f3Smrg } 12917ec681f3Smrg break; 12927ec681f3Smrg case nir_var_mem_push_const: 12937ec681f3Smrg assert(addr_format == nir_address_format_32bit_offset); 12947ec681f3Smrg op = nir_intrinsic_load_push_constant; 12957ec681f3Smrg break; 12967ec681f3Smrg case nir_var_mem_constant: 12977ec681f3Smrg if (addr_format_is_offset(addr_format, mode)) { 12987ec681f3Smrg op = nir_intrinsic_load_constant; 12997ec681f3Smrg } else { 13007ec681f3Smrg assert(addr_format_is_global(addr_format, mode)); 13017ec681f3Smrg op = nir_intrinsic_load_global_constant; 13027ec681f3Smrg } 13037ec681f3Smrg break; 13047ec681f3Smrg default: 13057ec681f3Smrg unreachable("Unsupported explicit IO variable mode"); 13067ec681f3Smrg } 13077ec681f3Smrg break; 13087ec681f3Smrg 13097ec681f3Smrg case nir_intrinsic_load_deref_block_intel: 13107ec681f3Smrg switch (mode) { 13117ec681f3Smrg case nir_var_mem_ssbo: 13127ec681f3Smrg if (addr_format_is_global(addr_format, mode)) 13137ec681f3Smrg op = nir_intrinsic_load_global_block_intel; 13147ec681f3Smrg else 13157ec681f3Smrg op = nir_intrinsic_load_ssbo_block_intel; 13167ec681f3Smrg break; 13177ec681f3Smrg case nir_var_mem_global: 13187ec681f3Smrg op = nir_intrinsic_load_global_block_intel; 13197ec681f3Smrg break; 13207ec681f3Smrg case nir_var_mem_shared: 13217ec681f3Smrg op = nir_intrinsic_load_shared_block_intel; 13227ec681f3Smrg break; 13237ec681f3Smrg default: 13247ec681f3Smrg unreachable("Unsupported explicit IO variable mode"); 13257ec681f3Smrg } 13267ec681f3Smrg break; 13277ec681f3Smrg 13287ec681f3Smrg default: 13297ec681f3Smrg unreachable("Invalid intrinsic"); 13307ec681f3Smrg } 13317ec681f3Smrg 13327ec681f3Smrg nir_intrinsic_instr *load = nir_intrinsic_instr_create(b->shader, op); 13337ec681f3Smrg 13347ec681f3Smrg if (op == nir_intrinsic_load_global_constant_offset) { 13357ec681f3Smrg assert(addr_format == nir_address_format_64bit_global_32bit_offset); 13367ec681f3Smrg load->src[0] = nir_src_for_ssa( 13377ec681f3Smrg nir_pack_64_2x32(b, nir_channels(b, addr, 0x3))); 13387ec681f3Smrg load->src[1] = nir_src_for_ssa(nir_channel(b, addr, 3)); 13397ec681f3Smrg } else if (op == nir_intrinsic_load_global_constant_bounded) { 13407ec681f3Smrg assert(addr_format == nir_address_format_64bit_bounded_global); 13417ec681f3Smrg load->src[0] = nir_src_for_ssa( 13427ec681f3Smrg nir_pack_64_2x32(b, nir_channels(b, addr, 0x3))); 13437ec681f3Smrg load->src[1] = nir_src_for_ssa(nir_channel(b, addr, 3)); 13447ec681f3Smrg load->src[2] = nir_src_for_ssa(nir_channel(b, addr, 2)); 13457ec681f3Smrg } else if (addr_format_is_global(addr_format, mode)) { 13467ec681f3Smrg load->src[0] = nir_src_for_ssa(addr_to_global(b, addr, addr_format)); 13477ec681f3Smrg } else if (addr_format_is_offset(addr_format, mode)) { 13487ec681f3Smrg assert(addr->num_components == 1); 13497ec681f3Smrg load->src[0] = nir_src_for_ssa(addr_to_offset(b, addr, addr_format)); 13507ec681f3Smrg } else { 13517ec681f3Smrg load->src[0] = nir_src_for_ssa(addr_to_index(b, addr, addr_format)); 13527ec681f3Smrg load->src[1] = nir_src_for_ssa(addr_to_offset(b, addr, addr_format)); 13537ec681f3Smrg } 13547ec681f3Smrg 13557ec681f3Smrg if (nir_intrinsic_has_access(load)) 13567ec681f3Smrg nir_intrinsic_set_access(load, nir_intrinsic_access(intrin)); 13577ec681f3Smrg 13587ec681f3Smrg if (op == nir_intrinsic_load_constant) { 13597ec681f3Smrg nir_intrinsic_set_base(load, 0); 13607ec681f3Smrg nir_intrinsic_set_range(load, b->shader->constant_data_size); 13617ec681f3Smrg } else if (mode == nir_var_mem_push_const) { 13627ec681f3Smrg /* Push constants are required to be able to be chased back to the 13637ec681f3Smrg * variable so we can provide a base/range. 13647ec681f3Smrg */ 13657ec681f3Smrg nir_variable *var = nir_deref_instr_get_variable(deref); 13667ec681f3Smrg nir_intrinsic_set_base(load, 0); 13677ec681f3Smrg nir_intrinsic_set_range(load, glsl_get_explicit_size(var->type, false)); 13687ec681f3Smrg } 13697ec681f3Smrg 13707ec681f3Smrg unsigned bit_size = intrin->dest.ssa.bit_size; 13717ec681f3Smrg if (bit_size == 1) { 13727ec681f3Smrg /* TODO: Make the native bool bit_size an option. */ 13737ec681f3Smrg bit_size = 32; 13747ec681f3Smrg } 13757ec681f3Smrg 13767ec681f3Smrg if (nir_intrinsic_has_align(load)) 13777ec681f3Smrg nir_intrinsic_set_align(load, align_mul, align_offset); 13787ec681f3Smrg 13797ec681f3Smrg if (nir_intrinsic_has_range_base(load)) { 13807ec681f3Smrg unsigned base, range; 13817ec681f3Smrg nir_get_explicit_deref_range(deref, addr_format, &base, &range); 13827ec681f3Smrg nir_intrinsic_set_range_base(load, base); 13837ec681f3Smrg nir_intrinsic_set_range(load, range); 13847ec681f3Smrg } 13857ec681f3Smrg 13867ec681f3Smrg assert(intrin->dest.is_ssa); 13877ec681f3Smrg load->num_components = num_components; 13887ec681f3Smrg nir_ssa_dest_init(&load->instr, &load->dest, num_components, 13897ec681f3Smrg bit_size, NULL); 13907ec681f3Smrg 13917ec681f3Smrg assert(bit_size % 8 == 0); 13927ec681f3Smrg 13937ec681f3Smrg nir_ssa_def *result; 13947ec681f3Smrg if (addr_format_needs_bounds_check(addr_format) && 13957ec681f3Smrg op != nir_intrinsic_load_global_constant_bounded) { 13967ec681f3Smrg /* We don't need to bounds-check global_constant_bounded because bounds 13977ec681f3Smrg * checking is handled by the intrinsic itself. 13987ec681f3Smrg * 13997ec681f3Smrg * The Vulkan spec for robustBufferAccess gives us quite a few options 14007ec681f3Smrg * as to what we can do with an OOB read. Unfortunately, returning 14017ec681f3Smrg * undefined values isn't one of them so we return an actual zero. 14027ec681f3Smrg */ 14037ec681f3Smrg nir_ssa_def *zero = nir_imm_zero(b, load->num_components, bit_size); 14047ec681f3Smrg 14057ec681f3Smrg /* TODO: Better handle block_intel. */ 14067ec681f3Smrg const unsigned load_size = (bit_size / 8) * load->num_components; 14077ec681f3Smrg nir_push_if(b, addr_is_in_bounds(b, addr, addr_format, load_size)); 14087e102996Smaya 14097e102996Smaya nir_builder_instr_insert(b, &load->instr); 14107e102996Smaya 14117e102996Smaya nir_pop_if(b, NULL); 14127e102996Smaya 14137ec681f3Smrg result = nir_if_phi(b, &load->dest.ssa, zero); 14147e102996Smaya } else { 14157e102996Smaya nir_builder_instr_insert(b, &load->instr); 14167ec681f3Smrg result = &load->dest.ssa; 14177ec681f3Smrg } 14187ec681f3Smrg 14197ec681f3Smrg if (intrin->dest.ssa.bit_size == 1) { 14207ec681f3Smrg /* For shared, we can go ahead and use NIR's and/or the back-end's 14217ec681f3Smrg * standard encoding for booleans rather than forcing a 0/1 boolean. 14227ec681f3Smrg * This should save an instruction or two. 14237ec681f3Smrg */ 14247ec681f3Smrg if (mode == nir_var_mem_shared || 14257ec681f3Smrg mode == nir_var_shader_temp || 14267ec681f3Smrg mode == nir_var_function_temp) 14277ec681f3Smrg result = nir_b2b1(b, result); 14287ec681f3Smrg else 14297ec681f3Smrg result = nir_i2b(b, result); 14307e102996Smaya } 14317ec681f3Smrg 14327ec681f3Smrg return result; 14337e102996Smaya} 14347e102996Smaya 14357e102996Smayastatic void 14367e102996Smayabuild_explicit_io_store(nir_builder *b, nir_intrinsic_instr *intrin, 14377e102996Smaya nir_ssa_def *addr, nir_address_format addr_format, 14387ec681f3Smrg nir_variable_mode modes, 14397ec681f3Smrg uint32_t align_mul, uint32_t align_offset, 14407e102996Smaya nir_ssa_def *value, nir_component_mask_t write_mask) 14417e102996Smaya{ 14427ec681f3Smrg modes = canonicalize_generic_modes(modes); 14437ec681f3Smrg 14447ec681f3Smrg if (util_bitcount(modes) > 1) { 14457ec681f3Smrg if (addr_format_is_global(addr_format, modes)) { 14467ec681f3Smrg build_explicit_io_store(b, intrin, addr, addr_format, 14477ec681f3Smrg nir_var_mem_global, 14487ec681f3Smrg align_mul, align_offset, 14497ec681f3Smrg value, write_mask); 14507ec681f3Smrg } else if (modes & nir_var_function_temp) { 14517ec681f3Smrg nir_push_if(b, build_runtime_addr_mode_check(b, addr, addr_format, 14527ec681f3Smrg nir_var_function_temp)); 14537ec681f3Smrg build_explicit_io_store(b, intrin, addr, addr_format, 14547ec681f3Smrg nir_var_function_temp, 14557ec681f3Smrg align_mul, align_offset, 14567ec681f3Smrg value, write_mask); 14577ec681f3Smrg nir_push_else(b, NULL); 14587ec681f3Smrg build_explicit_io_store(b, intrin, addr, addr_format, 14597ec681f3Smrg modes & ~nir_var_function_temp, 14607ec681f3Smrg align_mul, align_offset, 14617ec681f3Smrg value, write_mask); 14627ec681f3Smrg nir_pop_if(b, NULL); 14637ec681f3Smrg } else { 14647ec681f3Smrg nir_push_if(b, build_runtime_addr_mode_check(b, addr, addr_format, 14657ec681f3Smrg nir_var_mem_shared)); 14667ec681f3Smrg assert(modes & nir_var_mem_shared); 14677ec681f3Smrg build_explicit_io_store(b, intrin, addr, addr_format, 14687ec681f3Smrg nir_var_mem_shared, 14697ec681f3Smrg align_mul, align_offset, 14707ec681f3Smrg value, write_mask); 14717ec681f3Smrg nir_push_else(b, NULL); 14727ec681f3Smrg assert(modes & nir_var_mem_global); 14737ec681f3Smrg build_explicit_io_store(b, intrin, addr, addr_format, 14747ec681f3Smrg nir_var_mem_global, 14757ec681f3Smrg align_mul, align_offset, 14767ec681f3Smrg value, write_mask); 14777ec681f3Smrg nir_pop_if(b, NULL); 14787ec681f3Smrg } 14797ec681f3Smrg return; 14807ec681f3Smrg } 14817ec681f3Smrg 14827ec681f3Smrg assert(util_bitcount(modes) == 1); 14837ec681f3Smrg const nir_variable_mode mode = modes; 14847e102996Smaya 14857e102996Smaya nir_intrinsic_op op; 14867ec681f3Smrg switch (intrin->intrinsic) { 14877ec681f3Smrg case nir_intrinsic_store_deref: 14887ec681f3Smrg assert(write_mask != 0); 14897ec681f3Smrg 14907ec681f3Smrg switch (mode) { 14917ec681f3Smrg case nir_var_mem_ssbo: 14927ec681f3Smrg if (addr_format_is_global(addr_format, mode)) 14937ec681f3Smrg op = nir_intrinsic_store_global; 14947ec681f3Smrg else 14957ec681f3Smrg op = nir_intrinsic_store_ssbo; 14967ec681f3Smrg break; 14977ec681f3Smrg case nir_var_mem_global: 14987ec681f3Smrg assert(addr_format_is_global(addr_format, mode)); 14997e102996Smaya op = nir_intrinsic_store_global; 15007ec681f3Smrg break; 15017ec681f3Smrg case nir_var_mem_shared: 15027ec681f3Smrg assert(addr_format_is_offset(addr_format, mode)); 15037ec681f3Smrg op = nir_intrinsic_store_shared; 15047ec681f3Smrg break; 15057ec681f3Smrg case nir_var_shader_temp: 15067ec681f3Smrg case nir_var_function_temp: 15077ec681f3Smrg if (addr_format_is_offset(addr_format, mode)) { 15087ec681f3Smrg op = nir_intrinsic_store_scratch; 15097ec681f3Smrg } else { 15107ec681f3Smrg assert(addr_format_is_global(addr_format, mode)); 15117ec681f3Smrg op = nir_intrinsic_store_global; 15127ec681f3Smrg } 15137ec681f3Smrg break; 15147ec681f3Smrg default: 15157ec681f3Smrg unreachable("Unsupported explicit IO variable mode"); 15167ec681f3Smrg } 15177e102996Smaya break; 15187ec681f3Smrg 15197ec681f3Smrg case nir_intrinsic_store_deref_block_intel: 15207ec681f3Smrg assert(write_mask == 0); 15217ec681f3Smrg 15227ec681f3Smrg switch (mode) { 15237ec681f3Smrg case nir_var_mem_ssbo: 15247ec681f3Smrg if (addr_format_is_global(addr_format, mode)) 15257ec681f3Smrg op = nir_intrinsic_store_global_block_intel; 15267ec681f3Smrg else 15277ec681f3Smrg op = nir_intrinsic_store_ssbo_block_intel; 15287ec681f3Smrg break; 15297ec681f3Smrg case nir_var_mem_global: 15307ec681f3Smrg op = nir_intrinsic_store_global_block_intel; 15317ec681f3Smrg break; 15327ec681f3Smrg case nir_var_mem_shared: 15337ec681f3Smrg op = nir_intrinsic_store_shared_block_intel; 15347ec681f3Smrg break; 15357ec681f3Smrg default: 15367ec681f3Smrg unreachable("Unsupported explicit IO variable mode"); 15377ec681f3Smrg } 15387e102996Smaya break; 15397ec681f3Smrg 15407e102996Smaya default: 15417ec681f3Smrg unreachable("Invalid intrinsic"); 15427e102996Smaya } 15437e102996Smaya 15447e102996Smaya nir_intrinsic_instr *store = nir_intrinsic_instr_create(b->shader, op); 15457e102996Smaya 15467ec681f3Smrg if (value->bit_size == 1) { 15477ec681f3Smrg /* For shared, we can go ahead and use NIR's and/or the back-end's 15487ec681f3Smrg * standard encoding for booleans rather than forcing a 0/1 boolean. 15497ec681f3Smrg * This should save an instruction or two. 15507ec681f3Smrg * 15517ec681f3Smrg * TODO: Make the native bool bit_size an option. 15527ec681f3Smrg */ 15537ec681f3Smrg if (mode == nir_var_mem_shared || 15547ec681f3Smrg mode == nir_var_shader_temp || 15557ec681f3Smrg mode == nir_var_function_temp) 15567ec681f3Smrg value = nir_b2b32(b, value); 15577ec681f3Smrg else 15587ec681f3Smrg value = nir_b2i(b, value, 32); 15597ec681f3Smrg } 15607ec681f3Smrg 15617e102996Smaya store->src[0] = nir_src_for_ssa(value); 15627ec681f3Smrg if (addr_format_is_global(addr_format, mode)) { 15637e102996Smaya store->src[1] = nir_src_for_ssa(addr_to_global(b, addr, addr_format)); 15647ec681f3Smrg } else if (addr_format_is_offset(addr_format, mode)) { 15657ec681f3Smrg assert(addr->num_components == 1); 15667ec681f3Smrg store->src[1] = nir_src_for_ssa(addr_to_offset(b, addr, addr_format)); 15677e102996Smaya } else { 15687e102996Smaya store->src[1] = nir_src_for_ssa(addr_to_index(b, addr, addr_format)); 15697e102996Smaya store->src[2] = nir_src_for_ssa(addr_to_offset(b, addr, addr_format)); 15707e102996Smaya } 15717e102996Smaya 15727e102996Smaya nir_intrinsic_set_write_mask(store, write_mask); 15737e102996Smaya 15747ec681f3Smrg if (nir_intrinsic_has_access(store)) 15757ec681f3Smrg nir_intrinsic_set_access(store, nir_intrinsic_access(intrin)); 15767e102996Smaya 15777ec681f3Smrg nir_intrinsic_set_align(store, align_mul, align_offset); 15787e102996Smaya 15797e102996Smaya assert(value->num_components == 1 || 15807e102996Smaya value->num_components == intrin->num_components); 15817e102996Smaya store->num_components = value->num_components; 15827e102996Smaya 15837e102996Smaya assert(value->bit_size % 8 == 0); 15847e102996Smaya 15857e102996Smaya if (addr_format_needs_bounds_check(addr_format)) { 15867ec681f3Smrg /* TODO: Better handle block_intel. */ 15877e102996Smaya const unsigned store_size = (value->bit_size / 8) * store->num_components; 15887e102996Smaya nir_push_if(b, addr_is_in_bounds(b, addr, addr_format, store_size)); 15897e102996Smaya 15907e102996Smaya nir_builder_instr_insert(b, &store->instr); 15917e102996Smaya 15927e102996Smaya nir_pop_if(b, NULL); 15937e102996Smaya } else { 15947e102996Smaya nir_builder_instr_insert(b, &store->instr); 15957e102996Smaya } 15967e102996Smaya} 15977e102996Smaya 15987e102996Smayastatic nir_ssa_def * 15997e102996Smayabuild_explicit_io_atomic(nir_builder *b, nir_intrinsic_instr *intrin, 16007ec681f3Smrg nir_ssa_def *addr, nir_address_format addr_format, 16017ec681f3Smrg nir_variable_mode modes) 16027e102996Smaya{ 16037ec681f3Smrg modes = canonicalize_generic_modes(modes); 16047ec681f3Smrg 16057ec681f3Smrg if (util_bitcount(modes) > 1) { 16067ec681f3Smrg if (addr_format_is_global(addr_format, modes)) { 16077ec681f3Smrg return build_explicit_io_atomic(b, intrin, addr, addr_format, 16087ec681f3Smrg nir_var_mem_global); 16097ec681f3Smrg } else if (modes & nir_var_function_temp) { 16107ec681f3Smrg nir_push_if(b, build_runtime_addr_mode_check(b, addr, addr_format, 16117ec681f3Smrg nir_var_function_temp)); 16127ec681f3Smrg nir_ssa_def *res1 = 16137ec681f3Smrg build_explicit_io_atomic(b, intrin, addr, addr_format, 16147ec681f3Smrg nir_var_function_temp); 16157ec681f3Smrg nir_push_else(b, NULL); 16167ec681f3Smrg nir_ssa_def *res2 = 16177ec681f3Smrg build_explicit_io_atomic(b, intrin, addr, addr_format, 16187ec681f3Smrg modes & ~nir_var_function_temp); 16197ec681f3Smrg nir_pop_if(b, NULL); 16207ec681f3Smrg return nir_if_phi(b, res1, res2); 16217ec681f3Smrg } else { 16227ec681f3Smrg nir_push_if(b, build_runtime_addr_mode_check(b, addr, addr_format, 16237ec681f3Smrg nir_var_mem_shared)); 16247ec681f3Smrg assert(modes & nir_var_mem_shared); 16257ec681f3Smrg nir_ssa_def *res1 = 16267ec681f3Smrg build_explicit_io_atomic(b, intrin, addr, addr_format, 16277ec681f3Smrg nir_var_mem_shared); 16287ec681f3Smrg nir_push_else(b, NULL); 16297ec681f3Smrg assert(modes & nir_var_mem_global); 16307ec681f3Smrg nir_ssa_def *res2 = 16317ec681f3Smrg build_explicit_io_atomic(b, intrin, addr, addr_format, 16327ec681f3Smrg nir_var_mem_global); 16337ec681f3Smrg nir_pop_if(b, NULL); 16347ec681f3Smrg return nir_if_phi(b, res1, res2); 16357ec681f3Smrg } 16367ec681f3Smrg } 16377ec681f3Smrg 16387ec681f3Smrg assert(util_bitcount(modes) == 1); 16397ec681f3Smrg const nir_variable_mode mode = modes; 16407ec681f3Smrg 16417e102996Smaya const unsigned num_data_srcs = 16427e102996Smaya nir_intrinsic_infos[intrin->intrinsic].num_srcs - 1; 16437e102996Smaya 16447e102996Smaya nir_intrinsic_op op; 16457e102996Smaya switch (mode) { 16467e102996Smaya case nir_var_mem_ssbo: 16477ec681f3Smrg if (addr_format_is_global(addr_format, mode)) 16487e102996Smaya op = global_atomic_for_deref(intrin->intrinsic); 16497e102996Smaya else 16507e102996Smaya op = ssbo_atomic_for_deref(intrin->intrinsic); 16517e102996Smaya break; 16527e102996Smaya case nir_var_mem_global: 16537ec681f3Smrg assert(addr_format_is_global(addr_format, mode)); 16547e102996Smaya op = global_atomic_for_deref(intrin->intrinsic); 16557e102996Smaya break; 16567ec681f3Smrg case nir_var_mem_shared: 16577ec681f3Smrg assert(addr_format_is_offset(addr_format, mode)); 16587ec681f3Smrg op = shared_atomic_for_deref(intrin->intrinsic); 16597ec681f3Smrg break; 16607e102996Smaya default: 16617e102996Smaya unreachable("Unsupported explicit IO variable mode"); 16627e102996Smaya } 16637e102996Smaya 16647e102996Smaya nir_intrinsic_instr *atomic = nir_intrinsic_instr_create(b->shader, op); 16657e102996Smaya 16667e102996Smaya unsigned src = 0; 16677ec681f3Smrg if (addr_format_is_global(addr_format, mode)) { 16687e102996Smaya atomic->src[src++] = nir_src_for_ssa(addr_to_global(b, addr, addr_format)); 16697ec681f3Smrg } else if (addr_format_is_offset(addr_format, mode)) { 16707ec681f3Smrg assert(addr->num_components == 1); 16717ec681f3Smrg atomic->src[src++] = nir_src_for_ssa(addr_to_offset(b, addr, addr_format)); 16727e102996Smaya } else { 16737e102996Smaya atomic->src[src++] = nir_src_for_ssa(addr_to_index(b, addr, addr_format)); 16747e102996Smaya atomic->src[src++] = nir_src_for_ssa(addr_to_offset(b, addr, addr_format)); 16757e102996Smaya } 16767e102996Smaya for (unsigned i = 0; i < num_data_srcs; i++) { 16777e102996Smaya atomic->src[src++] = nir_src_for_ssa(intrin->src[1 + i].ssa); 16787e102996Smaya } 16797e102996Smaya 16807e102996Smaya /* Global atomics don't have access flags because they assume that the 16817e102996Smaya * address may be non-uniform. 16827e102996Smaya */ 16837ec681f3Smrg if (nir_intrinsic_has_access(atomic)) 16847e102996Smaya nir_intrinsic_set_access(atomic, nir_intrinsic_access(intrin)); 16857e102996Smaya 16867e102996Smaya assert(intrin->dest.ssa.num_components == 1); 16877e102996Smaya nir_ssa_dest_init(&atomic->instr, &atomic->dest, 16887ec681f3Smrg 1, intrin->dest.ssa.bit_size, NULL); 16897e102996Smaya 16907e102996Smaya assert(atomic->dest.ssa.bit_size % 8 == 0); 16917e102996Smaya 16927e102996Smaya if (addr_format_needs_bounds_check(addr_format)) { 16937e102996Smaya const unsigned atomic_size = atomic->dest.ssa.bit_size / 8; 16947e102996Smaya nir_push_if(b, addr_is_in_bounds(b, addr, addr_format, atomic_size)); 16957e102996Smaya 16967e102996Smaya nir_builder_instr_insert(b, &atomic->instr); 16977e102996Smaya 16987e102996Smaya nir_pop_if(b, NULL); 16997e102996Smaya return nir_if_phi(b, &atomic->dest.ssa, 17007e102996Smaya nir_ssa_undef(b, 1, atomic->dest.ssa.bit_size)); 17017e102996Smaya } else { 17027e102996Smaya nir_builder_instr_insert(b, &atomic->instr); 17037e102996Smaya return &atomic->dest.ssa; 17047e102996Smaya } 17057e102996Smaya} 17067e102996Smaya 17077e102996Smayanir_ssa_def * 17087e102996Smayanir_explicit_io_address_from_deref(nir_builder *b, nir_deref_instr *deref, 17097e102996Smaya nir_ssa_def *base_addr, 17107e102996Smaya nir_address_format addr_format) 17117e102996Smaya{ 17127e102996Smaya assert(deref->dest.is_ssa); 17137e102996Smaya switch (deref->deref_type) { 17147e102996Smaya case nir_deref_type_var: 17157ec681f3Smrg return build_addr_for_var(b, deref->var, addr_format); 17167e102996Smaya 17177e102996Smaya case nir_deref_type_array: { 17187ec681f3Smrg unsigned stride = nir_deref_instr_array_stride(deref); 17197e102996Smaya assert(stride > 0); 17207e102996Smaya 17217e102996Smaya nir_ssa_def *index = nir_ssa_for_src(b, deref->arr.index, 1); 17227ec681f3Smrg index = nir_i2i(b, index, addr_get_offset_bit_size(base_addr, addr_format)); 17237ec681f3Smrg return build_addr_iadd(b, base_addr, addr_format, deref->modes, 17247ec681f3Smrg nir_amul_imm(b, index, stride)); 17257e102996Smaya } 17267e102996Smaya 17277e102996Smaya case nir_deref_type_ptr_as_array: { 17287e102996Smaya nir_ssa_def *index = nir_ssa_for_src(b, deref->arr.index, 1); 17297ec681f3Smrg index = nir_i2i(b, index, addr_get_offset_bit_size(base_addr, addr_format)); 17307ec681f3Smrg unsigned stride = nir_deref_instr_array_stride(deref); 17317ec681f3Smrg return build_addr_iadd(b, base_addr, addr_format, deref->modes, 17327ec681f3Smrg nir_amul_imm(b, index, stride)); 17337e102996Smaya } 17347e102996Smaya 17357e102996Smaya case nir_deref_type_array_wildcard: 17367e102996Smaya unreachable("Wildcards should be lowered by now"); 17377e102996Smaya break; 17387e102996Smaya 17397e102996Smaya case nir_deref_type_struct: { 17407e102996Smaya nir_deref_instr *parent = nir_deref_instr_parent(deref); 17417e102996Smaya int offset = glsl_get_struct_field_offset(parent->type, 17427e102996Smaya deref->strct.index); 17437e102996Smaya assert(offset >= 0); 17447ec681f3Smrg return build_addr_iadd_imm(b, base_addr, addr_format, 17457ec681f3Smrg deref->modes, offset); 17467e102996Smaya } 17477e102996Smaya 17487e102996Smaya case nir_deref_type_cast: 17497e102996Smaya /* Nothing to do here */ 17507e102996Smaya return base_addr; 17517e102996Smaya } 17527e102996Smaya 17537e102996Smaya unreachable("Invalid NIR deref type"); 17547e102996Smaya} 17557e102996Smaya 17567e102996Smayavoid 17577e102996Smayanir_lower_explicit_io_instr(nir_builder *b, 17587e102996Smaya nir_intrinsic_instr *intrin, 17597e102996Smaya nir_ssa_def *addr, 17607e102996Smaya nir_address_format addr_format) 17617e102996Smaya{ 17627e102996Smaya b->cursor = nir_after_instr(&intrin->instr); 17637e102996Smaya 17647e102996Smaya nir_deref_instr *deref = nir_src_as_deref(intrin->src[0]); 17657e102996Smaya unsigned vec_stride = glsl_get_explicit_stride(deref->type); 17667e102996Smaya unsigned scalar_size = type_scalar_size_bytes(deref->type); 17677e102996Smaya assert(vec_stride == 0 || glsl_type_is_vector(deref->type)); 17687e102996Smaya assert(vec_stride == 0 || vec_stride >= scalar_size); 17697e102996Smaya 17707ec681f3Smrg uint32_t align_mul, align_offset; 17717ec681f3Smrg if (!nir_get_explicit_deref_align(deref, true, &align_mul, &align_offset)) { 17727ec681f3Smrg /* If we don't have an alignment from the deref, assume scalar */ 17737ec681f3Smrg align_mul = scalar_size; 17747ec681f3Smrg align_offset = 0; 17757ec681f3Smrg } 17767ec681f3Smrg 17777ec681f3Smrg switch (intrin->intrinsic) { 17787ec681f3Smrg case nir_intrinsic_load_deref: { 17797e102996Smaya nir_ssa_def *value; 17807e102996Smaya if (vec_stride > scalar_size) { 17817ec681f3Smrg nir_ssa_def *comps[NIR_MAX_VEC_COMPONENTS] = { NULL, }; 17827e102996Smaya for (unsigned i = 0; i < intrin->num_components; i++) { 17837ec681f3Smrg unsigned comp_offset = i * vec_stride; 17847e102996Smaya nir_ssa_def *comp_addr = build_addr_iadd_imm(b, addr, addr_format, 17857ec681f3Smrg deref->modes, 17867ec681f3Smrg comp_offset); 17877e102996Smaya comps[i] = build_explicit_io_load(b, intrin, comp_addr, 17887ec681f3Smrg addr_format, deref->modes, 17897ec681f3Smrg align_mul, 17907ec681f3Smrg (align_offset + comp_offset) % 17917ec681f3Smrg align_mul, 17927ec681f3Smrg 1); 17937e102996Smaya } 17947e102996Smaya value = nir_vec(b, comps, intrin->num_components); 17957e102996Smaya } else { 17967e102996Smaya value = build_explicit_io_load(b, intrin, addr, addr_format, 17977ec681f3Smrg deref->modes, align_mul, align_offset, 17987e102996Smaya intrin->num_components); 17997e102996Smaya } 18007ec681f3Smrg nir_ssa_def_rewrite_uses(&intrin->dest.ssa, value); 18017ec681f3Smrg break; 18027ec681f3Smrg } 18037ec681f3Smrg 18047ec681f3Smrg case nir_intrinsic_store_deref: { 18057e102996Smaya assert(intrin->src[1].is_ssa); 18067e102996Smaya nir_ssa_def *value = intrin->src[1].ssa; 18077e102996Smaya nir_component_mask_t write_mask = nir_intrinsic_write_mask(intrin); 18087e102996Smaya if (vec_stride > scalar_size) { 18097e102996Smaya for (unsigned i = 0; i < intrin->num_components; i++) { 18107e102996Smaya if (!(write_mask & (1 << i))) 18117e102996Smaya continue; 18127e102996Smaya 18137ec681f3Smrg unsigned comp_offset = i * vec_stride; 18147e102996Smaya nir_ssa_def *comp_addr = build_addr_iadd_imm(b, addr, addr_format, 18157ec681f3Smrg deref->modes, 18167ec681f3Smrg comp_offset); 18177e102996Smaya build_explicit_io_store(b, intrin, comp_addr, addr_format, 18187ec681f3Smrg deref->modes, align_mul, 18197ec681f3Smrg (align_offset + comp_offset) % align_mul, 18207e102996Smaya nir_channel(b, value, i), 1); 18217e102996Smaya } 18227e102996Smaya } else { 18237e102996Smaya build_explicit_io_store(b, intrin, addr, addr_format, 18247ec681f3Smrg deref->modes, align_mul, align_offset, 18257e102996Smaya value, write_mask); 18267e102996Smaya } 18277ec681f3Smrg break; 18287ec681f3Smrg } 18297ec681f3Smrg 18307ec681f3Smrg case nir_intrinsic_load_deref_block_intel: { 18317ec681f3Smrg nir_ssa_def *value = build_explicit_io_load(b, intrin, addr, addr_format, 18327ec681f3Smrg deref->modes, 18337ec681f3Smrg align_mul, align_offset, 18347ec681f3Smrg intrin->num_components); 18357ec681f3Smrg nir_ssa_def_rewrite_uses(&intrin->dest.ssa, value); 18367ec681f3Smrg break; 18377ec681f3Smrg } 18387ec681f3Smrg 18397ec681f3Smrg case nir_intrinsic_store_deref_block_intel: { 18407ec681f3Smrg assert(intrin->src[1].is_ssa); 18417ec681f3Smrg nir_ssa_def *value = intrin->src[1].ssa; 18427ec681f3Smrg const nir_component_mask_t write_mask = 0; 18437ec681f3Smrg build_explicit_io_store(b, intrin, addr, addr_format, 18447ec681f3Smrg deref->modes, align_mul, align_offset, 18457ec681f3Smrg value, write_mask); 18467ec681f3Smrg break; 18477ec681f3Smrg } 18487ec681f3Smrg 18497ec681f3Smrg default: { 18507e102996Smaya nir_ssa_def *value = 18517ec681f3Smrg build_explicit_io_atomic(b, intrin, addr, addr_format, deref->modes); 18527ec681f3Smrg nir_ssa_def_rewrite_uses(&intrin->dest.ssa, value); 18537ec681f3Smrg break; 18547ec681f3Smrg } 18557e102996Smaya } 18567e102996Smaya 18577e102996Smaya nir_instr_remove(&intrin->instr); 18587e102996Smaya} 18597e102996Smaya 18607ec681f3Smrgbool 18617ec681f3Smrgnir_get_explicit_deref_align(nir_deref_instr *deref, 18627ec681f3Smrg bool default_to_type_align, 18637ec681f3Smrg uint32_t *align_mul, 18647ec681f3Smrg uint32_t *align_offset) 18657ec681f3Smrg{ 18667ec681f3Smrg if (deref->deref_type == nir_deref_type_var) { 18677ec681f3Smrg /* If we see a variable, align_mul is effectively infinite because we 18687ec681f3Smrg * know the offset exactly (up to the offset of the base pointer for the 18697ec681f3Smrg * given variable mode). We have to pick something so we choose 256B 18707ec681f3Smrg * as an arbitrary alignment which seems high enough for any reasonable 18717ec681f3Smrg * wide-load use-case. Back-ends should clamp alignments down if 256B 18727ec681f3Smrg * is too large for some reason. 18737ec681f3Smrg */ 18747ec681f3Smrg *align_mul = 256; 18757ec681f3Smrg *align_offset = deref->var->data.driver_location % 256; 18767ec681f3Smrg return true; 18777ec681f3Smrg } 18787ec681f3Smrg 18797ec681f3Smrg /* If we're a cast deref that has an alignment, use that. */ 18807ec681f3Smrg if (deref->deref_type == nir_deref_type_cast && deref->cast.align_mul > 0) { 18817ec681f3Smrg *align_mul = deref->cast.align_mul; 18827ec681f3Smrg *align_offset = deref->cast.align_offset; 18837ec681f3Smrg return true; 18847ec681f3Smrg } 18857ec681f3Smrg 18867ec681f3Smrg /* Otherwise, we need to compute the alignment based on the parent */ 18877ec681f3Smrg nir_deref_instr *parent = nir_deref_instr_parent(deref); 18887ec681f3Smrg if (parent == NULL) { 18897ec681f3Smrg assert(deref->deref_type == nir_deref_type_cast); 18907ec681f3Smrg if (default_to_type_align) { 18917ec681f3Smrg /* If we don't have a parent, assume the type's alignment, if any. */ 18927ec681f3Smrg unsigned type_align = glsl_get_explicit_alignment(deref->type); 18937ec681f3Smrg if (type_align == 0) 18947ec681f3Smrg return false; 18957ec681f3Smrg 18967ec681f3Smrg *align_mul = type_align; 18977ec681f3Smrg *align_offset = 0; 18987ec681f3Smrg return true; 18997ec681f3Smrg } else { 19007ec681f3Smrg return false; 19017ec681f3Smrg } 19027ec681f3Smrg } 19037ec681f3Smrg 19047ec681f3Smrg uint32_t parent_mul, parent_offset; 19057ec681f3Smrg if (!nir_get_explicit_deref_align(parent, default_to_type_align, 19067ec681f3Smrg &parent_mul, &parent_offset)) 19077ec681f3Smrg return false; 19087ec681f3Smrg 19097ec681f3Smrg switch (deref->deref_type) { 19107ec681f3Smrg case nir_deref_type_var: 19117ec681f3Smrg unreachable("Handled above"); 19127ec681f3Smrg 19137ec681f3Smrg case nir_deref_type_array: 19147ec681f3Smrg case nir_deref_type_array_wildcard: 19157ec681f3Smrg case nir_deref_type_ptr_as_array: { 19167ec681f3Smrg const unsigned stride = nir_deref_instr_array_stride(deref); 19177ec681f3Smrg if (stride == 0) 19187ec681f3Smrg return false; 19197ec681f3Smrg 19207ec681f3Smrg if (deref->deref_type != nir_deref_type_array_wildcard && 19217ec681f3Smrg nir_src_is_const(deref->arr.index)) { 19227ec681f3Smrg unsigned offset = nir_src_as_uint(deref->arr.index) * stride; 19237ec681f3Smrg *align_mul = parent_mul; 19247ec681f3Smrg *align_offset = (parent_offset + offset) % parent_mul; 19257ec681f3Smrg } else { 19267ec681f3Smrg /* If this is a wildcard or an indirect deref, we have to go with the 19277ec681f3Smrg * power-of-two gcd. 19287ec681f3Smrg */ 19297ec681f3Smrg *align_mul = MIN2(parent_mul, 1 << (ffs(stride) - 1)); 19307ec681f3Smrg *align_offset = parent_offset % *align_mul; 19317ec681f3Smrg } 19327ec681f3Smrg return true; 19337ec681f3Smrg } 19347ec681f3Smrg 19357ec681f3Smrg case nir_deref_type_struct: { 19367ec681f3Smrg const int offset = glsl_get_struct_field_offset(parent->type, 19377ec681f3Smrg deref->strct.index); 19387ec681f3Smrg if (offset < 0) 19397ec681f3Smrg return false; 19407ec681f3Smrg 19417ec681f3Smrg *align_mul = parent_mul; 19427ec681f3Smrg *align_offset = (parent_offset + offset) % parent_mul; 19437ec681f3Smrg return true; 19447ec681f3Smrg } 19457ec681f3Smrg 19467ec681f3Smrg case nir_deref_type_cast: 19477ec681f3Smrg /* We handled the explicit alignment case above. */ 19487ec681f3Smrg assert(deref->cast.align_mul == 0); 19497ec681f3Smrg *align_mul = parent_mul; 19507ec681f3Smrg *align_offset = parent_offset; 19517ec681f3Smrg return true; 19527ec681f3Smrg } 19537ec681f3Smrg 19547ec681f3Smrg unreachable("Invalid deref_instr_type"); 19557ec681f3Smrg} 19567ec681f3Smrg 19577e102996Smayastatic void 19587e102996Smayalower_explicit_io_deref(nir_builder *b, nir_deref_instr *deref, 19597e102996Smaya nir_address_format addr_format) 19607e102996Smaya{ 19617e102996Smaya /* Just delete the deref if it's not used. We can't use 19627e102996Smaya * nir_deref_instr_remove_if_unused here because it may remove more than 19637e102996Smaya * one deref which could break our list walking since we walk the list 19647e102996Smaya * backwards. 19657e102996Smaya */ 19667ec681f3Smrg assert(list_is_empty(&deref->dest.ssa.if_uses)); 19677ec681f3Smrg if (list_is_empty(&deref->dest.ssa.uses)) { 19687e102996Smaya nir_instr_remove(&deref->instr); 19697e102996Smaya return; 19707e102996Smaya } 19717e102996Smaya 19727e102996Smaya b->cursor = nir_after_instr(&deref->instr); 19737e102996Smaya 19747e102996Smaya nir_ssa_def *base_addr = NULL; 19757e102996Smaya if (deref->deref_type != nir_deref_type_var) { 19767e102996Smaya assert(deref->parent.is_ssa); 19777e102996Smaya base_addr = deref->parent.ssa; 19787e102996Smaya } 19797e102996Smaya 19807e102996Smaya nir_ssa_def *addr = nir_explicit_io_address_from_deref(b, deref, base_addr, 19817e102996Smaya addr_format); 19827ec681f3Smrg assert(addr->bit_size == deref->dest.ssa.bit_size); 19837ec681f3Smrg assert(addr->num_components == deref->dest.ssa.num_components); 19847e102996Smaya 19857e102996Smaya nir_instr_remove(&deref->instr); 19867ec681f3Smrg nir_ssa_def_rewrite_uses(&deref->dest.ssa, addr); 19877e102996Smaya} 19887e102996Smaya 19897e102996Smayastatic void 19907e102996Smayalower_explicit_io_access(nir_builder *b, nir_intrinsic_instr *intrin, 19917e102996Smaya nir_address_format addr_format) 19927e102996Smaya{ 19937e102996Smaya assert(intrin->src[0].is_ssa); 19947e102996Smaya nir_lower_explicit_io_instr(b, intrin, intrin->src[0].ssa, addr_format); 19957e102996Smaya} 19967e102996Smaya 19977e102996Smayastatic void 19987e102996Smayalower_explicit_io_array_length(nir_builder *b, nir_intrinsic_instr *intrin, 19997e102996Smaya nir_address_format addr_format) 20007e102996Smaya{ 20017e102996Smaya b->cursor = nir_after_instr(&intrin->instr); 20027e102996Smaya 20037e102996Smaya nir_deref_instr *deref = nir_src_as_deref(intrin->src[0]); 20047e102996Smaya 20057e102996Smaya assert(glsl_type_is_array(deref->type)); 20067e102996Smaya assert(glsl_get_length(deref->type) == 0); 20077ec681f3Smrg assert(nir_deref_mode_is(deref, nir_var_mem_ssbo)); 20087e102996Smaya unsigned stride = glsl_get_explicit_stride(deref->type); 20097e102996Smaya assert(stride > 0); 20107e102996Smaya 20117e102996Smaya nir_ssa_def *addr = &deref->dest.ssa; 20127e102996Smaya nir_ssa_def *index = addr_to_index(b, addr, addr_format); 20137e102996Smaya nir_ssa_def *offset = addr_to_offset(b, addr, addr_format); 20147ec681f3Smrg unsigned access = nir_intrinsic_access(intrin); 20157e102996Smaya 20167ec681f3Smrg nir_ssa_def *arr_size = nir_get_ssbo_size(b, index, .access=access); 20177ec681f3Smrg arr_size = nir_imax(b, nir_isub(b, arr_size, offset), nir_imm_int(b, 0u)); 20187ec681f3Smrg arr_size = nir_idiv(b, arr_size, nir_imm_int(b, stride)); 20197e102996Smaya 20207ec681f3Smrg nir_ssa_def_rewrite_uses(&intrin->dest.ssa, arr_size); 20217e102996Smaya nir_instr_remove(&intrin->instr); 20227e102996Smaya} 20237e102996Smaya 20247ec681f3Smrgstatic void 20257ec681f3Smrglower_explicit_io_mode_check(nir_builder *b, nir_intrinsic_instr *intrin, 20267ec681f3Smrg nir_address_format addr_format) 20277ec681f3Smrg{ 20287ec681f3Smrg if (addr_format_is_global(addr_format, 0)) { 20297ec681f3Smrg /* If the address format is always global, then the driver can use 20307ec681f3Smrg * global addresses regardless of the mode. In that case, don't create 20317ec681f3Smrg * a check, just whack the intrinsic to addr_mode_is and delegate to the 20327ec681f3Smrg * driver lowering. 20337ec681f3Smrg */ 20347ec681f3Smrg intrin->intrinsic = nir_intrinsic_addr_mode_is; 20357ec681f3Smrg return; 20367ec681f3Smrg } 20377ec681f3Smrg 20387ec681f3Smrg assert(intrin->src[0].is_ssa); 20397ec681f3Smrg nir_ssa_def *addr = intrin->src[0].ssa; 20407ec681f3Smrg 20417ec681f3Smrg b->cursor = nir_instr_remove(&intrin->instr); 20427ec681f3Smrg 20437ec681f3Smrg nir_ssa_def *is_mode = 20447ec681f3Smrg build_runtime_addr_mode_check(b, addr, addr_format, 20457ec681f3Smrg nir_intrinsic_memory_modes(intrin)); 20467ec681f3Smrg 20477ec681f3Smrg nir_ssa_def_rewrite_uses(&intrin->dest.ssa, is_mode); 20487ec681f3Smrg} 20497ec681f3Smrg 20507e102996Smayastatic bool 20517e102996Smayanir_lower_explicit_io_impl(nir_function_impl *impl, nir_variable_mode modes, 20527e102996Smaya nir_address_format addr_format) 20537e102996Smaya{ 20547e102996Smaya bool progress = false; 20557e102996Smaya 20567e102996Smaya nir_builder b; 20577e102996Smaya nir_builder_init(&b, impl); 20587e102996Smaya 20597e102996Smaya /* Walk in reverse order so that we can see the full deref chain when we 20607e102996Smaya * lower the access operations. We lower them assuming that the derefs 20617e102996Smaya * will be turned into address calculations later. 20627e102996Smaya */ 20637e102996Smaya nir_foreach_block_reverse(block, impl) { 20647e102996Smaya nir_foreach_instr_reverse_safe(instr, block) { 20657e102996Smaya switch (instr->type) { 20667e102996Smaya case nir_instr_type_deref: { 20677e102996Smaya nir_deref_instr *deref = nir_instr_as_deref(instr); 20687ec681f3Smrg if (nir_deref_mode_is_in_set(deref, modes)) { 20697e102996Smaya lower_explicit_io_deref(&b, deref, addr_format); 20707e102996Smaya progress = true; 20717e102996Smaya } 20727e102996Smaya break; 20737e102996Smaya } 20747e102996Smaya 20757e102996Smaya case nir_instr_type_intrinsic: { 20767e102996Smaya nir_intrinsic_instr *intrin = nir_instr_as_intrinsic(instr); 20777e102996Smaya switch (intrin->intrinsic) { 20787e102996Smaya case nir_intrinsic_load_deref: 20797e102996Smaya case nir_intrinsic_store_deref: 20807ec681f3Smrg case nir_intrinsic_load_deref_block_intel: 20817ec681f3Smrg case nir_intrinsic_store_deref_block_intel: 20827e102996Smaya case nir_intrinsic_deref_atomic_add: 20837e102996Smaya case nir_intrinsic_deref_atomic_imin: 20847e102996Smaya case nir_intrinsic_deref_atomic_umin: 20857e102996Smaya case nir_intrinsic_deref_atomic_imax: 20867e102996Smaya case nir_intrinsic_deref_atomic_umax: 20877e102996Smaya case nir_intrinsic_deref_atomic_and: 20887e102996Smaya case nir_intrinsic_deref_atomic_or: 20897e102996Smaya case nir_intrinsic_deref_atomic_xor: 20907e102996Smaya case nir_intrinsic_deref_atomic_exchange: 20917e102996Smaya case nir_intrinsic_deref_atomic_comp_swap: 20927e102996Smaya case nir_intrinsic_deref_atomic_fadd: 20937e102996Smaya case nir_intrinsic_deref_atomic_fmin: 20947e102996Smaya case nir_intrinsic_deref_atomic_fmax: 20957e102996Smaya case nir_intrinsic_deref_atomic_fcomp_swap: { 20967e102996Smaya nir_deref_instr *deref = nir_src_as_deref(intrin->src[0]); 20977ec681f3Smrg if (nir_deref_mode_is_in_set(deref, modes)) { 20987e102996Smaya lower_explicit_io_access(&b, intrin, addr_format); 20997e102996Smaya progress = true; 21007e102996Smaya } 21017e102996Smaya break; 21027e102996Smaya } 21037e102996Smaya 21047e102996Smaya case nir_intrinsic_deref_buffer_array_length: { 21057e102996Smaya nir_deref_instr *deref = nir_src_as_deref(intrin->src[0]); 21067ec681f3Smrg if (nir_deref_mode_is_in_set(deref, modes)) { 21077e102996Smaya lower_explicit_io_array_length(&b, intrin, addr_format); 21087e102996Smaya progress = true; 21097e102996Smaya } 21107e102996Smaya break; 21117e102996Smaya } 21127e102996Smaya 21137ec681f3Smrg case nir_intrinsic_deref_mode_is: { 21147ec681f3Smrg nir_deref_instr *deref = nir_src_as_deref(intrin->src[0]); 21157ec681f3Smrg if (nir_deref_mode_is_in_set(deref, modes)) { 21167ec681f3Smrg lower_explicit_io_mode_check(&b, intrin, addr_format); 21177ec681f3Smrg progress = true; 21187ec681f3Smrg } 21197ec681f3Smrg break; 21207ec681f3Smrg } 21217ec681f3Smrg 21227e102996Smaya default: 21237e102996Smaya break; 21247e102996Smaya } 21257e102996Smaya break; 21267e102996Smaya } 21277e102996Smaya 21287e102996Smaya default: 21297e102996Smaya /* Nothing to do */ 21307e102996Smaya break; 21317e102996Smaya } 21327e102996Smaya } 21337e102996Smaya } 21347e102996Smaya 21357e102996Smaya if (progress) { 21367e102996Smaya nir_metadata_preserve(impl, nir_metadata_block_index | 21377e102996Smaya nir_metadata_dominance); 21387ec681f3Smrg } else { 21397ec681f3Smrg nir_metadata_preserve(impl, nir_metadata_all); 21407e102996Smaya } 21417e102996Smaya 21427e102996Smaya return progress; 21437e102996Smaya} 21447e102996Smaya 21457ec681f3Smrg/** Lower explicitly laid out I/O access to byte offset/address intrinsics 21467ec681f3Smrg * 21477ec681f3Smrg * This pass is intended to be used for any I/O which touches memory external 21487ec681f3Smrg * to the shader or which is directly visible to the client. It requires that 21497ec681f3Smrg * all data types in the given modes have a explicit stride/offset decorations 21507ec681f3Smrg * to tell it exactly how to calculate the offset/address for the given load, 21517ec681f3Smrg * store, or atomic operation. If the offset/stride information does not come 21527ec681f3Smrg * from the client explicitly (as with shared variables in GL or Vulkan), 21537ec681f3Smrg * nir_lower_vars_to_explicit_types() can be used to add them. 21547ec681f3Smrg * 21557ec681f3Smrg * Unlike nir_lower_io, this pass is fully capable of handling incomplete 21567ec681f3Smrg * pointer chains which may contain cast derefs. It does so by walking the 21577ec681f3Smrg * deref chain backwards and simply replacing each deref, one at a time, with 21587ec681f3Smrg * the appropriate address calculation. The pass takes a nir_address_format 21597ec681f3Smrg * parameter which describes how the offset or address is to be represented 21607ec681f3Smrg * during calculations. By ensuring that the address is always in a 21617ec681f3Smrg * consistent format, pointers can safely be conjured from thin air by the 21627ec681f3Smrg * driver, stored to variables, passed through phis, etc. 21637ec681f3Smrg * 21647ec681f3Smrg * The one exception to the simple algorithm described above is for handling 21657ec681f3Smrg * row-major matrices in which case we may look down one additional level of 21667ec681f3Smrg * the deref chain. 21677ec681f3Smrg * 21687ec681f3Smrg * This pass is also capable of handling OpenCL generic pointers. If the 21697ec681f3Smrg * address mode is global, it will lower any ambiguous (more than one mode) 21707ec681f3Smrg * access to global and pass through the deref_mode_is run-time checks as 21717ec681f3Smrg * addr_mode_is. This assumes the driver has somehow mapped shared and 21727ec681f3Smrg * scratch memory to the global address space. For other modes such as 21737ec681f3Smrg * 62bit_generic, there is an enum embedded in the address and we lower 21747ec681f3Smrg * ambiguous access to an if-ladder and deref_mode_is to a check against the 21757ec681f3Smrg * embedded enum. If nir_lower_explicit_io is called on any shader that 21767ec681f3Smrg * contains generic pointers, it must either be used on all of the generic 21777ec681f3Smrg * modes or none. 21787ec681f3Smrg */ 21797e102996Smayabool 21807e102996Smayanir_lower_explicit_io(nir_shader *shader, nir_variable_mode modes, 21817e102996Smaya nir_address_format addr_format) 21827e102996Smaya{ 21837e102996Smaya bool progress = false; 21847e102996Smaya 21857e102996Smaya nir_foreach_function(function, shader) { 21867e102996Smaya if (function->impl && 21877e102996Smaya nir_lower_explicit_io_impl(function->impl, modes, addr_format)) 21887e102996Smaya progress = true; 21897e102996Smaya } 21907e102996Smaya 21917e102996Smaya return progress; 21927e102996Smaya} 21937e102996Smaya 21947ec681f3Smrgstatic bool 21957ec681f3Smrgnir_lower_vars_to_explicit_types_impl(nir_function_impl *impl, 21967ec681f3Smrg nir_variable_mode modes, 21977ec681f3Smrg glsl_type_size_align_func type_info) 21987ec681f3Smrg{ 21997ec681f3Smrg bool progress = false; 22007ec681f3Smrg 22017ec681f3Smrg nir_foreach_block(block, impl) { 22027ec681f3Smrg nir_foreach_instr(instr, block) { 22037ec681f3Smrg if (instr->type != nir_instr_type_deref) 22047ec681f3Smrg continue; 22057ec681f3Smrg 22067ec681f3Smrg nir_deref_instr *deref = nir_instr_as_deref(instr); 22077ec681f3Smrg if (!nir_deref_mode_is_in_set(deref, modes)) 22087ec681f3Smrg continue; 22097ec681f3Smrg 22107ec681f3Smrg unsigned size, alignment; 22117ec681f3Smrg const struct glsl_type *new_type = 22127ec681f3Smrg glsl_get_explicit_type_for_size_align(deref->type, type_info, &size, &alignment); 22137ec681f3Smrg if (new_type != deref->type) { 22147ec681f3Smrg progress = true; 22157ec681f3Smrg deref->type = new_type; 22167ec681f3Smrg } 22177ec681f3Smrg if (deref->deref_type == nir_deref_type_cast) { 22187ec681f3Smrg /* See also glsl_type::get_explicit_type_for_size_align() */ 22197ec681f3Smrg unsigned new_stride = align(size, alignment); 22207ec681f3Smrg if (new_stride != deref->cast.ptr_stride) { 22217ec681f3Smrg deref->cast.ptr_stride = new_stride; 22227ec681f3Smrg progress = true; 22237ec681f3Smrg } 22247ec681f3Smrg } 22257ec681f3Smrg } 22267ec681f3Smrg } 22277ec681f3Smrg 22287ec681f3Smrg if (progress) { 22297ec681f3Smrg nir_metadata_preserve(impl, nir_metadata_block_index | 22307ec681f3Smrg nir_metadata_dominance | 22317ec681f3Smrg nir_metadata_live_ssa_defs | 22327ec681f3Smrg nir_metadata_loop_analysis); 22337ec681f3Smrg } else { 22347ec681f3Smrg nir_metadata_preserve(impl, nir_metadata_all); 22357ec681f3Smrg } 22367ec681f3Smrg 22377ec681f3Smrg return progress; 22387ec681f3Smrg} 22397ec681f3Smrg 22407ec681f3Smrgstatic bool 22417ec681f3Smrglower_vars_to_explicit(nir_shader *shader, 22427ec681f3Smrg struct exec_list *vars, nir_variable_mode mode, 22437ec681f3Smrg glsl_type_size_align_func type_info) 22447ec681f3Smrg{ 22457ec681f3Smrg bool progress = false; 22467ec681f3Smrg unsigned offset; 22477ec681f3Smrg switch (mode) { 22487ec681f3Smrg case nir_var_uniform: 22497ec681f3Smrg assert(shader->info.stage == MESA_SHADER_KERNEL); 22507ec681f3Smrg offset = 0; 22517ec681f3Smrg break; 22527ec681f3Smrg case nir_var_function_temp: 22537ec681f3Smrg case nir_var_shader_temp: 22547ec681f3Smrg offset = shader->scratch_size; 22557ec681f3Smrg break; 22567ec681f3Smrg case nir_var_mem_shared: 22577ec681f3Smrg offset = shader->info.shared_size; 22587ec681f3Smrg break; 22597ec681f3Smrg case nir_var_mem_constant: 22607ec681f3Smrg offset = shader->constant_data_size; 22617ec681f3Smrg break; 22627ec681f3Smrg case nir_var_shader_call_data: 22637ec681f3Smrg case nir_var_ray_hit_attrib: 22647ec681f3Smrg offset = 0; 22657ec681f3Smrg break; 22667ec681f3Smrg default: 22677ec681f3Smrg unreachable("Unsupported mode"); 22687ec681f3Smrg } 22697ec681f3Smrg nir_foreach_variable_in_list(var, vars) { 22707ec681f3Smrg if (var->data.mode != mode) 22717ec681f3Smrg continue; 22727ec681f3Smrg 22737ec681f3Smrg unsigned size, align; 22747ec681f3Smrg const struct glsl_type *explicit_type = 22757ec681f3Smrg glsl_get_explicit_type_for_size_align(var->type, type_info, &size, &align); 22767ec681f3Smrg 22777ec681f3Smrg if (explicit_type != var->type) 22787ec681f3Smrg var->type = explicit_type; 22797ec681f3Smrg 22807ec681f3Smrg UNUSED bool is_empty_struct = 22817ec681f3Smrg glsl_type_is_struct_or_ifc(explicit_type) && 22827ec681f3Smrg glsl_get_length(explicit_type) == 0; 22837ec681f3Smrg 22847ec681f3Smrg assert(util_is_power_of_two_nonzero(align) || is_empty_struct); 22857ec681f3Smrg var->data.driver_location = ALIGN_POT(offset, align); 22867ec681f3Smrg offset = var->data.driver_location + size; 22877ec681f3Smrg progress = true; 22887ec681f3Smrg } 22897ec681f3Smrg 22907ec681f3Smrg switch (mode) { 22917ec681f3Smrg case nir_var_uniform: 22927ec681f3Smrg assert(shader->info.stage == MESA_SHADER_KERNEL); 22937ec681f3Smrg shader->num_uniforms = offset; 22947ec681f3Smrg break; 22957ec681f3Smrg case nir_var_shader_temp: 22967ec681f3Smrg case nir_var_function_temp: 22977ec681f3Smrg shader->scratch_size = offset; 22987ec681f3Smrg break; 22997ec681f3Smrg case nir_var_mem_shared: 23007ec681f3Smrg shader->info.shared_size = offset; 23017ec681f3Smrg break; 23027ec681f3Smrg case nir_var_mem_constant: 23037ec681f3Smrg shader->constant_data_size = offset; 23047ec681f3Smrg break; 23057ec681f3Smrg case nir_var_shader_call_data: 23067ec681f3Smrg case nir_var_ray_hit_attrib: 23077ec681f3Smrg break; 23087ec681f3Smrg default: 23097ec681f3Smrg unreachable("Unsupported mode"); 23107ec681f3Smrg } 23117ec681f3Smrg 23127ec681f3Smrg return progress; 23137ec681f3Smrg} 23147ec681f3Smrg 23157ec681f3Smrg/* If nir_lower_vars_to_explicit_types is called on any shader that contains 23167ec681f3Smrg * generic pointers, it must either be used on all of the generic modes or 23177ec681f3Smrg * none. 23187ec681f3Smrg */ 23197ec681f3Smrgbool 23207ec681f3Smrgnir_lower_vars_to_explicit_types(nir_shader *shader, 23217ec681f3Smrg nir_variable_mode modes, 23227ec681f3Smrg glsl_type_size_align_func type_info) 23237ec681f3Smrg{ 23247ec681f3Smrg /* TODO: Situations which need to be handled to support more modes: 23257ec681f3Smrg * - row-major matrices 23267ec681f3Smrg * - compact shader inputs/outputs 23277ec681f3Smrg * - interface types 23287ec681f3Smrg */ 23297ec681f3Smrg ASSERTED nir_variable_mode supported = 23307ec681f3Smrg nir_var_mem_shared | nir_var_mem_global | nir_var_mem_constant | 23317ec681f3Smrg nir_var_shader_temp | nir_var_function_temp | nir_var_uniform | 23327ec681f3Smrg nir_var_shader_call_data | nir_var_ray_hit_attrib; 23337ec681f3Smrg assert(!(modes & ~supported) && "unsupported"); 23347ec681f3Smrg 23357ec681f3Smrg bool progress = false; 23367ec681f3Smrg 23377ec681f3Smrg if (modes & nir_var_uniform) 23387ec681f3Smrg progress |= lower_vars_to_explicit(shader, &shader->variables, nir_var_uniform, type_info); 23397ec681f3Smrg 23407ec681f3Smrg if (modes & nir_var_mem_shared) { 23417ec681f3Smrg assert(!shader->info.shared_memory_explicit_layout); 23427ec681f3Smrg progress |= lower_vars_to_explicit(shader, &shader->variables, nir_var_mem_shared, type_info); 23437ec681f3Smrg } 23447ec681f3Smrg 23457ec681f3Smrg if (modes & nir_var_shader_temp) 23467ec681f3Smrg progress |= lower_vars_to_explicit(shader, &shader->variables, nir_var_shader_temp, type_info); 23477ec681f3Smrg if (modes & nir_var_mem_constant) 23487ec681f3Smrg progress |= lower_vars_to_explicit(shader, &shader->variables, nir_var_mem_constant, type_info); 23497ec681f3Smrg if (modes & nir_var_shader_call_data) 23507ec681f3Smrg progress |= lower_vars_to_explicit(shader, &shader->variables, nir_var_shader_call_data, type_info); 23517ec681f3Smrg if (modes & nir_var_ray_hit_attrib) 23527ec681f3Smrg progress |= lower_vars_to_explicit(shader, &shader->variables, nir_var_ray_hit_attrib, type_info); 23537ec681f3Smrg 23547ec681f3Smrg nir_foreach_function(function, shader) { 23557ec681f3Smrg if (function->impl) { 23567ec681f3Smrg if (modes & nir_var_function_temp) 23577ec681f3Smrg progress |= lower_vars_to_explicit(shader, &function->impl->locals, nir_var_function_temp, type_info); 23587ec681f3Smrg 23597ec681f3Smrg progress |= nir_lower_vars_to_explicit_types_impl(function->impl, modes, type_info); 23607ec681f3Smrg } 23617ec681f3Smrg } 23627ec681f3Smrg 23637ec681f3Smrg return progress; 23647ec681f3Smrg} 23657ec681f3Smrg 23667ec681f3Smrgstatic void 23677ec681f3Smrgwrite_constant(void *dst, size_t dst_size, 23687ec681f3Smrg const nir_constant *c, const struct glsl_type *type) 23697ec681f3Smrg{ 23707ec681f3Smrg if (glsl_type_is_vector_or_scalar(type)) { 23717ec681f3Smrg const unsigned num_components = glsl_get_vector_elements(type); 23727ec681f3Smrg const unsigned bit_size = glsl_get_bit_size(type); 23737ec681f3Smrg if (bit_size == 1) { 23747ec681f3Smrg /* Booleans are special-cased to be 32-bit 23757ec681f3Smrg * 23767ec681f3Smrg * TODO: Make the native bool bit_size an option. 23777ec681f3Smrg */ 23787ec681f3Smrg assert(num_components * 4 <= dst_size); 23797ec681f3Smrg for (unsigned i = 0; i < num_components; i++) { 23807ec681f3Smrg int32_t b32 = -(int)c->values[i].b; 23817ec681f3Smrg memcpy((char *)dst + i * 4, &b32, 4); 23827ec681f3Smrg } 23837ec681f3Smrg } else { 23847ec681f3Smrg assert(bit_size >= 8 && bit_size % 8 == 0); 23857ec681f3Smrg const unsigned byte_size = bit_size / 8; 23867ec681f3Smrg assert(num_components * byte_size <= dst_size); 23877ec681f3Smrg for (unsigned i = 0; i < num_components; i++) { 23887ec681f3Smrg /* Annoyingly, thanks to packed structs, we can't make any 23897ec681f3Smrg * assumptions about the alignment of dst. To avoid any strange 23907ec681f3Smrg * issues with unaligned writes, we always use memcpy. 23917ec681f3Smrg */ 23927ec681f3Smrg memcpy((char *)dst + i * byte_size, &c->values[i], byte_size); 23937ec681f3Smrg } 23947ec681f3Smrg } 23957ec681f3Smrg } else if (glsl_type_is_array_or_matrix(type)) { 23967ec681f3Smrg const unsigned array_len = glsl_get_length(type); 23977ec681f3Smrg const unsigned stride = glsl_get_explicit_stride(type); 23987ec681f3Smrg assert(stride > 0); 23997ec681f3Smrg const struct glsl_type *elem_type = glsl_get_array_element(type); 24007ec681f3Smrg for (unsigned i = 0; i < array_len; i++) { 24017ec681f3Smrg unsigned elem_offset = i * stride; 24027ec681f3Smrg assert(elem_offset < dst_size); 24037ec681f3Smrg write_constant((char *)dst + elem_offset, dst_size - elem_offset, 24047ec681f3Smrg c->elements[i], elem_type); 24057ec681f3Smrg } 24067ec681f3Smrg } else { 24077ec681f3Smrg assert(glsl_type_is_struct_or_ifc(type)); 24087ec681f3Smrg const unsigned num_fields = glsl_get_length(type); 24097ec681f3Smrg for (unsigned i = 0; i < num_fields; i++) { 24107ec681f3Smrg const int field_offset = glsl_get_struct_field_offset(type, i); 24117ec681f3Smrg assert(field_offset >= 0 && field_offset < dst_size); 24127ec681f3Smrg const struct glsl_type *field_type = glsl_get_struct_field(type, i); 24137ec681f3Smrg write_constant((char *)dst + field_offset, dst_size - field_offset, 24147ec681f3Smrg c->elements[i], field_type); 24157ec681f3Smrg } 24167ec681f3Smrg } 24177ec681f3Smrg} 24187ec681f3Smrg 24197ec681f3Smrgvoid 24207ec681f3Smrgnir_gather_explicit_io_initializers(nir_shader *shader, 24217ec681f3Smrg void *dst, size_t dst_size, 24227ec681f3Smrg nir_variable_mode mode) 24237ec681f3Smrg{ 24247ec681f3Smrg /* It doesn't really make sense to gather initializers for more than one 24257ec681f3Smrg * mode at a time. If this ever becomes well-defined, we can drop the 24267ec681f3Smrg * assert then. 24277ec681f3Smrg */ 24287ec681f3Smrg assert(util_bitcount(mode) == 1); 24297ec681f3Smrg 24307ec681f3Smrg nir_foreach_variable_with_modes(var, shader, mode) { 24317ec681f3Smrg assert(var->data.driver_location < dst_size); 24327ec681f3Smrg write_constant((char *)dst + var->data.driver_location, 24337ec681f3Smrg dst_size - var->data.driver_location, 24347ec681f3Smrg var->constant_initializer, var->type); 24357ec681f3Smrg } 24367ec681f3Smrg} 24377ec681f3Smrg 243801e04c3fSmrg/** 243901e04c3fSmrg * Return the offset source for a load/store intrinsic. 244001e04c3fSmrg */ 244101e04c3fSmrgnir_src * 244201e04c3fSmrgnir_get_io_offset_src(nir_intrinsic_instr *instr) 244301e04c3fSmrg{ 244401e04c3fSmrg switch (instr->intrinsic) { 244501e04c3fSmrg case nir_intrinsic_load_input: 244601e04c3fSmrg case nir_intrinsic_load_output: 24477e102996Smaya case nir_intrinsic_load_shared: 244801e04c3fSmrg case nir_intrinsic_load_uniform: 24497ec681f3Smrg case nir_intrinsic_load_kernel_input: 24507e102996Smaya case nir_intrinsic_load_global: 24517ec681f3Smrg case nir_intrinsic_load_global_constant: 24527e102996Smaya case nir_intrinsic_load_scratch: 24537ec681f3Smrg case nir_intrinsic_load_fs_input_interp_deltas: 24547ec681f3Smrg case nir_intrinsic_shared_atomic_add: 24557ec681f3Smrg case nir_intrinsic_shared_atomic_and: 24567ec681f3Smrg case nir_intrinsic_shared_atomic_comp_swap: 24577ec681f3Smrg case nir_intrinsic_shared_atomic_exchange: 24587ec681f3Smrg case nir_intrinsic_shared_atomic_fadd: 24597ec681f3Smrg case nir_intrinsic_shared_atomic_fcomp_swap: 24607ec681f3Smrg case nir_intrinsic_shared_atomic_fmax: 24617ec681f3Smrg case nir_intrinsic_shared_atomic_fmin: 24627ec681f3Smrg case nir_intrinsic_shared_atomic_imax: 24637ec681f3Smrg case nir_intrinsic_shared_atomic_imin: 24647ec681f3Smrg case nir_intrinsic_shared_atomic_or: 24657ec681f3Smrg case nir_intrinsic_shared_atomic_umax: 24667ec681f3Smrg case nir_intrinsic_shared_atomic_umin: 24677ec681f3Smrg case nir_intrinsic_shared_atomic_xor: 24687ec681f3Smrg case nir_intrinsic_global_atomic_add: 24697ec681f3Smrg case nir_intrinsic_global_atomic_and: 24707ec681f3Smrg case nir_intrinsic_global_atomic_comp_swap: 24717ec681f3Smrg case nir_intrinsic_global_atomic_exchange: 24727ec681f3Smrg case nir_intrinsic_global_atomic_fadd: 24737ec681f3Smrg case nir_intrinsic_global_atomic_fcomp_swap: 24747ec681f3Smrg case nir_intrinsic_global_atomic_fmax: 24757ec681f3Smrg case nir_intrinsic_global_atomic_fmin: 24767ec681f3Smrg case nir_intrinsic_global_atomic_imax: 24777ec681f3Smrg case nir_intrinsic_global_atomic_imin: 24787ec681f3Smrg case nir_intrinsic_global_atomic_or: 24797ec681f3Smrg case nir_intrinsic_global_atomic_umax: 24807ec681f3Smrg case nir_intrinsic_global_atomic_umin: 24817ec681f3Smrg case nir_intrinsic_global_atomic_xor: 248201e04c3fSmrg return &instr->src[0]; 248301e04c3fSmrg case nir_intrinsic_load_ubo: 248401e04c3fSmrg case nir_intrinsic_load_ssbo: 24857ec681f3Smrg case nir_intrinsic_load_input_vertex: 248601e04c3fSmrg case nir_intrinsic_load_per_vertex_input: 248701e04c3fSmrg case nir_intrinsic_load_per_vertex_output: 24887ec681f3Smrg case nir_intrinsic_load_per_primitive_output: 248901e04c3fSmrg case nir_intrinsic_load_interpolated_input: 249001e04c3fSmrg case nir_intrinsic_store_output: 24917e102996Smaya case nir_intrinsic_store_shared: 24927e102996Smaya case nir_intrinsic_store_global: 24937e102996Smaya case nir_intrinsic_store_scratch: 24947ec681f3Smrg case nir_intrinsic_ssbo_atomic_add: 24957ec681f3Smrg case nir_intrinsic_ssbo_atomic_imin: 24967ec681f3Smrg case nir_intrinsic_ssbo_atomic_umin: 24977ec681f3Smrg case nir_intrinsic_ssbo_atomic_imax: 24987ec681f3Smrg case nir_intrinsic_ssbo_atomic_umax: 24997ec681f3Smrg case nir_intrinsic_ssbo_atomic_and: 25007ec681f3Smrg case nir_intrinsic_ssbo_atomic_or: 25017ec681f3Smrg case nir_intrinsic_ssbo_atomic_xor: 25027ec681f3Smrg case nir_intrinsic_ssbo_atomic_exchange: 25037ec681f3Smrg case nir_intrinsic_ssbo_atomic_comp_swap: 25047ec681f3Smrg case nir_intrinsic_ssbo_atomic_fadd: 25057ec681f3Smrg case nir_intrinsic_ssbo_atomic_fmin: 25067ec681f3Smrg case nir_intrinsic_ssbo_atomic_fmax: 25077ec681f3Smrg case nir_intrinsic_ssbo_atomic_fcomp_swap: 250801e04c3fSmrg return &instr->src[1]; 250901e04c3fSmrg case nir_intrinsic_store_ssbo: 251001e04c3fSmrg case nir_intrinsic_store_per_vertex_output: 25117ec681f3Smrg case nir_intrinsic_store_per_primitive_output: 251201e04c3fSmrg return &instr->src[2]; 251301e04c3fSmrg default: 251401e04c3fSmrg return NULL; 251501e04c3fSmrg } 251601e04c3fSmrg} 251701e04c3fSmrg 251801e04c3fSmrg/** 251901e04c3fSmrg * Return the vertex index source for a load/store per_vertex intrinsic. 252001e04c3fSmrg */ 252101e04c3fSmrgnir_src * 252201e04c3fSmrgnir_get_io_vertex_index_src(nir_intrinsic_instr *instr) 252301e04c3fSmrg{ 252401e04c3fSmrg switch (instr->intrinsic) { 252501e04c3fSmrg case nir_intrinsic_load_per_vertex_input: 252601e04c3fSmrg case nir_intrinsic_load_per_vertex_output: 252701e04c3fSmrg return &instr->src[0]; 252801e04c3fSmrg case nir_intrinsic_store_per_vertex_output: 252901e04c3fSmrg return &instr->src[1]; 253001e04c3fSmrg default: 253101e04c3fSmrg return NULL; 253201e04c3fSmrg } 253301e04c3fSmrg} 25347ec681f3Smrg 25357ec681f3Smrg/** 25367ec681f3Smrg * Return the numeric constant that identify a NULL pointer for each address 25377ec681f3Smrg * format. 25387ec681f3Smrg */ 25397ec681f3Smrgconst nir_const_value * 25407ec681f3Smrgnir_address_format_null_value(nir_address_format addr_format) 25417ec681f3Smrg{ 25427ec681f3Smrg const static nir_const_value null_values[][NIR_MAX_VEC_COMPONENTS] = { 25437ec681f3Smrg [nir_address_format_32bit_global] = {{0}}, 25447ec681f3Smrg [nir_address_format_64bit_global] = {{0}}, 25457ec681f3Smrg [nir_address_format_64bit_global_32bit_offset] = {{0}}, 25467ec681f3Smrg [nir_address_format_64bit_bounded_global] = {{0}}, 25477ec681f3Smrg [nir_address_format_32bit_index_offset] = {{.u32 = ~0}, {.u32 = ~0}}, 25487ec681f3Smrg [nir_address_format_32bit_index_offset_pack64] = {{.u64 = ~0ull}}, 25497ec681f3Smrg [nir_address_format_vec2_index_32bit_offset] = {{.u32 = ~0}, {.u32 = ~0}, {.u32 = ~0}}, 25507ec681f3Smrg [nir_address_format_32bit_offset] = {{.u32 = ~0}}, 25517ec681f3Smrg [nir_address_format_32bit_offset_as_64bit] = {{.u64 = ~0ull}}, 25527ec681f3Smrg [nir_address_format_62bit_generic] = {{.u64 = 0}}, 25537ec681f3Smrg [nir_address_format_logical] = {{.u32 = ~0}}, 25547ec681f3Smrg }; 25557ec681f3Smrg 25567ec681f3Smrg assert(addr_format < ARRAY_SIZE(null_values)); 25577ec681f3Smrg return null_values[addr_format]; 25587ec681f3Smrg} 25597ec681f3Smrg 25607ec681f3Smrgnir_ssa_def * 25617ec681f3Smrgnir_build_addr_ieq(nir_builder *b, nir_ssa_def *addr0, nir_ssa_def *addr1, 25627ec681f3Smrg nir_address_format addr_format) 25637ec681f3Smrg{ 25647ec681f3Smrg switch (addr_format) { 25657ec681f3Smrg case nir_address_format_32bit_global: 25667ec681f3Smrg case nir_address_format_64bit_global: 25677ec681f3Smrg case nir_address_format_64bit_bounded_global: 25687ec681f3Smrg case nir_address_format_32bit_index_offset: 25697ec681f3Smrg case nir_address_format_vec2_index_32bit_offset: 25707ec681f3Smrg case nir_address_format_32bit_offset: 25717ec681f3Smrg case nir_address_format_62bit_generic: 25727ec681f3Smrg return nir_ball_iequal(b, addr0, addr1); 25737ec681f3Smrg 25747ec681f3Smrg case nir_address_format_64bit_global_32bit_offset: 25757ec681f3Smrg return nir_ball_iequal(b, nir_channels(b, addr0, 0xb), 25767ec681f3Smrg nir_channels(b, addr1, 0xb)); 25777ec681f3Smrg 25787ec681f3Smrg case nir_address_format_32bit_offset_as_64bit: 25797ec681f3Smrg assert(addr0->num_components == 1 && addr1->num_components == 1); 25807ec681f3Smrg return nir_ieq(b, nir_u2u32(b, addr0), nir_u2u32(b, addr1)); 25817ec681f3Smrg 25827ec681f3Smrg case nir_address_format_32bit_index_offset_pack64: 25837ec681f3Smrg assert(addr0->num_components == 1 && addr1->num_components == 1); 25847ec681f3Smrg return nir_ball_iequal(b, nir_unpack_64_2x32(b, addr0), nir_unpack_64_2x32(b, addr1)); 25857ec681f3Smrg 25867ec681f3Smrg case nir_address_format_logical: 25877ec681f3Smrg unreachable("Unsupported address format"); 25887ec681f3Smrg } 25897ec681f3Smrg 25907ec681f3Smrg unreachable("Invalid address format"); 25917ec681f3Smrg} 25927ec681f3Smrg 25937ec681f3Smrgnir_ssa_def * 25947ec681f3Smrgnir_build_addr_isub(nir_builder *b, nir_ssa_def *addr0, nir_ssa_def *addr1, 25957ec681f3Smrg nir_address_format addr_format) 25967ec681f3Smrg{ 25977ec681f3Smrg switch (addr_format) { 25987ec681f3Smrg case nir_address_format_32bit_global: 25997ec681f3Smrg case nir_address_format_64bit_global: 26007ec681f3Smrg case nir_address_format_32bit_offset: 26017ec681f3Smrg case nir_address_format_32bit_index_offset_pack64: 26027ec681f3Smrg case nir_address_format_62bit_generic: 26037ec681f3Smrg assert(addr0->num_components == 1); 26047ec681f3Smrg assert(addr1->num_components == 1); 26057ec681f3Smrg return nir_isub(b, addr0, addr1); 26067ec681f3Smrg 26077ec681f3Smrg case nir_address_format_32bit_offset_as_64bit: 26087ec681f3Smrg assert(addr0->num_components == 1); 26097ec681f3Smrg assert(addr1->num_components == 1); 26107ec681f3Smrg return nir_u2u64(b, nir_isub(b, nir_u2u32(b, addr0), nir_u2u32(b, addr1))); 26117ec681f3Smrg 26127ec681f3Smrg case nir_address_format_64bit_global_32bit_offset: 26137ec681f3Smrg case nir_address_format_64bit_bounded_global: 26147ec681f3Smrg return nir_isub(b, addr_to_global(b, addr0, addr_format), 26157ec681f3Smrg addr_to_global(b, addr1, addr_format)); 26167ec681f3Smrg 26177ec681f3Smrg case nir_address_format_32bit_index_offset: 26187ec681f3Smrg assert(addr0->num_components == 2); 26197ec681f3Smrg assert(addr1->num_components == 2); 26207ec681f3Smrg /* Assume the same buffer index. */ 26217ec681f3Smrg return nir_isub(b, nir_channel(b, addr0, 1), nir_channel(b, addr1, 1)); 26227ec681f3Smrg 26237ec681f3Smrg case nir_address_format_vec2_index_32bit_offset: 26247ec681f3Smrg assert(addr0->num_components == 3); 26257ec681f3Smrg assert(addr1->num_components == 3); 26267ec681f3Smrg /* Assume the same buffer index. */ 26277ec681f3Smrg return nir_isub(b, nir_channel(b, addr0, 2), nir_channel(b, addr1, 2)); 26287ec681f3Smrg 26297ec681f3Smrg case nir_address_format_logical: 26307ec681f3Smrg unreachable("Unsupported address format"); 26317ec681f3Smrg } 26327ec681f3Smrg 26337ec681f3Smrg unreachable("Invalid address format"); 26347ec681f3Smrg} 26357ec681f3Smrg 26367ec681f3Smrgstatic bool 26377ec681f3Smrgis_input(nir_intrinsic_instr *intrin) 26387ec681f3Smrg{ 26397ec681f3Smrg return intrin->intrinsic == nir_intrinsic_load_input || 26407ec681f3Smrg intrin->intrinsic == nir_intrinsic_load_per_vertex_input || 26417ec681f3Smrg intrin->intrinsic == nir_intrinsic_load_interpolated_input || 26427ec681f3Smrg intrin->intrinsic == nir_intrinsic_load_fs_input_interp_deltas; 26437ec681f3Smrg} 26447ec681f3Smrg 26457ec681f3Smrgstatic bool 26467ec681f3Smrgis_output(nir_intrinsic_instr *intrin) 26477ec681f3Smrg{ 26487ec681f3Smrg return intrin->intrinsic == nir_intrinsic_load_output || 26497ec681f3Smrg intrin->intrinsic == nir_intrinsic_load_per_vertex_output || 26507ec681f3Smrg intrin->intrinsic == nir_intrinsic_load_per_primitive_output || 26517ec681f3Smrg intrin->intrinsic == nir_intrinsic_store_output || 26527ec681f3Smrg intrin->intrinsic == nir_intrinsic_store_per_vertex_output || 26537ec681f3Smrg intrin->intrinsic == nir_intrinsic_store_per_primitive_output; 26547ec681f3Smrg} 26557ec681f3Smrg 26567ec681f3Smrgstatic bool is_dual_slot(nir_intrinsic_instr *intrin) 26577ec681f3Smrg{ 26587ec681f3Smrg if (intrin->intrinsic == nir_intrinsic_store_output || 26597ec681f3Smrg intrin->intrinsic == nir_intrinsic_store_per_vertex_output) { 26607ec681f3Smrg return nir_src_bit_size(intrin->src[0]) == 64 && 26617ec681f3Smrg nir_src_num_components(intrin->src[0]) >= 3; 26627ec681f3Smrg } 26637ec681f3Smrg 26647ec681f3Smrg return nir_dest_bit_size(intrin->dest) == 64 && 26657ec681f3Smrg nir_dest_num_components(intrin->dest) >= 3; 26667ec681f3Smrg} 26677ec681f3Smrg 26687ec681f3Smrg/** 26697ec681f3Smrg * This pass adds constant offsets to instr->const_index[0] for input/output 26707ec681f3Smrg * intrinsics, and resets the offset source to 0. Non-constant offsets remain 26717ec681f3Smrg * unchanged - since we don't know what part of a compound variable is 26727ec681f3Smrg * accessed, we allocate storage for the entire thing. For drivers that use 26737ec681f3Smrg * nir_lower_io_to_temporaries() before nir_lower_io(), this guarantees that 26747ec681f3Smrg * the offset source will be 0, so that they don't have to add it in manually. 26757ec681f3Smrg */ 26767ec681f3Smrg 26777ec681f3Smrgstatic bool 26787ec681f3Smrgadd_const_offset_to_base_block(nir_block *block, nir_builder *b, 26797ec681f3Smrg nir_variable_mode modes) 26807ec681f3Smrg{ 26817ec681f3Smrg bool progress = false; 26827ec681f3Smrg nir_foreach_instr_safe(instr, block) { 26837ec681f3Smrg if (instr->type != nir_instr_type_intrinsic) 26847ec681f3Smrg continue; 26857ec681f3Smrg 26867ec681f3Smrg nir_intrinsic_instr *intrin = nir_instr_as_intrinsic(instr); 26877ec681f3Smrg 26887ec681f3Smrg if (((modes & nir_var_shader_in) && is_input(intrin)) || 26897ec681f3Smrg ((modes & nir_var_shader_out) && is_output(intrin))) { 26907ec681f3Smrg nir_src *offset = nir_get_io_offset_src(intrin); 26917ec681f3Smrg 26927ec681f3Smrg /* TODO: Better handling of per-view variables here */ 26937ec681f3Smrg if (nir_src_is_const(*offset) && 26947ec681f3Smrg !nir_intrinsic_io_semantics(intrin).per_view) { 26957ec681f3Smrg unsigned off = nir_src_as_uint(*offset); 26967ec681f3Smrg 26977ec681f3Smrg nir_intrinsic_set_base(intrin, nir_intrinsic_base(intrin) + off); 26987ec681f3Smrg 26997ec681f3Smrg nir_io_semantics sem = nir_intrinsic_io_semantics(intrin); 27007ec681f3Smrg sem.location += off; 27017ec681f3Smrg /* non-indirect indexing should reduce num_slots */ 27027ec681f3Smrg sem.num_slots = is_dual_slot(intrin) ? 2 : 1; 27037ec681f3Smrg nir_intrinsic_set_io_semantics(intrin, sem); 27047ec681f3Smrg 27057ec681f3Smrg b->cursor = nir_before_instr(&intrin->instr); 27067ec681f3Smrg nir_instr_rewrite_src(&intrin->instr, offset, 27077ec681f3Smrg nir_src_for_ssa(nir_imm_int(b, 0))); 27087ec681f3Smrg progress = true; 27097ec681f3Smrg } 27107ec681f3Smrg } 27117ec681f3Smrg } 27127ec681f3Smrg 27137ec681f3Smrg return progress; 27147ec681f3Smrg} 27157ec681f3Smrg 27167ec681f3Smrgbool 27177ec681f3Smrgnir_io_add_const_offset_to_base(nir_shader *nir, nir_variable_mode modes) 27187ec681f3Smrg{ 27197ec681f3Smrg bool progress = false; 27207ec681f3Smrg 27217ec681f3Smrg nir_foreach_function(f, nir) { 27227ec681f3Smrg if (f->impl) { 27237ec681f3Smrg nir_builder b; 27247ec681f3Smrg nir_builder_init(&b, f->impl); 27257ec681f3Smrg nir_foreach_block(block, f->impl) { 27267ec681f3Smrg progress |= add_const_offset_to_base_block(block, &b, modes); 27277ec681f3Smrg } 27287ec681f3Smrg } 27297ec681f3Smrg } 27307ec681f3Smrg 27317ec681f3Smrg return progress; 27327ec681f3Smrg} 27337ec681f3Smrg 2734