1b8e80941Smrg/* 2b8e80941Smrg * Copyright © 2014 Intel Corporation 3b8e80941Smrg * 4b8e80941Smrg * Permission is hereby granted, free of charge, to any person obtaining a 5b8e80941Smrg * copy of this software and associated documentation files (the "Software"), 6b8e80941Smrg * to deal in the Software without restriction, including without limitation 7b8e80941Smrg * the rights to use, copy, modify, merge, publish, distribute, sublicense, 8b8e80941Smrg * and/or sell copies of the Software, and to permit persons to whom the 9b8e80941Smrg * Software is furnished to do so, subject to the following conditions: 10b8e80941Smrg * 11b8e80941Smrg * The above copyright notice and this permission notice (including the next 12b8e80941Smrg * paragraph) shall be included in all copies or substantial portions of the 13b8e80941Smrg * Software. 14b8e80941Smrg * 15b8e80941Smrg * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR 16b8e80941Smrg * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, 17b8e80941Smrg * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL 18b8e80941Smrg * THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER 19b8e80941Smrg * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING 20b8e80941Smrg * FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS 21b8e80941Smrg * IN THE SOFTWARE. 22b8e80941Smrg * 23b8e80941Smrg * Authors: 24b8e80941Smrg * Connor Abbott (cwabbott0@gmail.com) 25b8e80941Smrg * Jason Ekstrand (jason@jlekstrand.net) 26b8e80941Smrg * 27b8e80941Smrg */ 28b8e80941Smrg 29b8e80941Smrg/* 30b8e80941Smrg * This lowering pass converts references to input/output variables with 31b8e80941Smrg * loads/stores to actual input/output intrinsics. 32b8e80941Smrg */ 33b8e80941Smrg 34b8e80941Smrg#include "nir.h" 35b8e80941Smrg#include "nir_builder.h" 36b8e80941Smrg#include "nir_deref.h" 37b8e80941Smrg 38b8e80941Smrgstruct lower_io_state { 39b8e80941Smrg void *dead_ctx; 40b8e80941Smrg nir_builder builder; 41b8e80941Smrg int (*type_size)(const struct glsl_type *type, bool); 42b8e80941Smrg nir_variable_mode modes; 43b8e80941Smrg nir_lower_io_options options; 44b8e80941Smrg}; 45b8e80941Smrg 46b8e80941Smrgstatic nir_intrinsic_op 47b8e80941Smrgssbo_atomic_for_deref(nir_intrinsic_op deref_op) 48b8e80941Smrg{ 49b8e80941Smrg switch (deref_op) { 50b8e80941Smrg#define OP(O) case nir_intrinsic_deref_##O: return nir_intrinsic_ssbo_##O; 51b8e80941Smrg OP(atomic_exchange) 52b8e80941Smrg OP(atomic_comp_swap) 53b8e80941Smrg OP(atomic_add) 54b8e80941Smrg OP(atomic_imin) 55b8e80941Smrg OP(atomic_umin) 56b8e80941Smrg OP(atomic_imax) 57b8e80941Smrg OP(atomic_umax) 58b8e80941Smrg OP(atomic_and) 59b8e80941Smrg OP(atomic_or) 60b8e80941Smrg OP(atomic_xor) 61b8e80941Smrg OP(atomic_fadd) 62b8e80941Smrg OP(atomic_fmin) 63b8e80941Smrg OP(atomic_fmax) 64b8e80941Smrg OP(atomic_fcomp_swap) 65b8e80941Smrg#undef OP 66b8e80941Smrg default: 67b8e80941Smrg unreachable("Invalid SSBO atomic"); 68b8e80941Smrg } 69b8e80941Smrg} 70b8e80941Smrg 71b8e80941Smrgstatic nir_intrinsic_op 72b8e80941Smrgglobal_atomic_for_deref(nir_intrinsic_op deref_op) 73b8e80941Smrg{ 74b8e80941Smrg switch (deref_op) { 75b8e80941Smrg#define OP(O) case nir_intrinsic_deref_##O: return nir_intrinsic_global_##O; 76b8e80941Smrg OP(atomic_exchange) 77b8e80941Smrg OP(atomic_comp_swap) 78b8e80941Smrg OP(atomic_add) 79b8e80941Smrg OP(atomic_imin) 80b8e80941Smrg OP(atomic_umin) 81b8e80941Smrg OP(atomic_imax) 82b8e80941Smrg OP(atomic_umax) 83b8e80941Smrg OP(atomic_and) 84b8e80941Smrg OP(atomic_or) 85b8e80941Smrg OP(atomic_xor) 86b8e80941Smrg OP(atomic_fadd) 87b8e80941Smrg OP(atomic_fmin) 88b8e80941Smrg OP(atomic_fmax) 89b8e80941Smrg OP(atomic_fcomp_swap) 90b8e80941Smrg#undef OP 91b8e80941Smrg default: 92b8e80941Smrg unreachable("Invalid SSBO atomic"); 93b8e80941Smrg } 94b8e80941Smrg} 95b8e80941Smrg 96b8e80941Smrgvoid 97b8e80941Smrgnir_assign_var_locations(struct exec_list *var_list, unsigned *size, 98b8e80941Smrg int (*type_size)(const struct glsl_type *, bool)) 99b8e80941Smrg{ 100b8e80941Smrg unsigned location = 0; 101b8e80941Smrg 102b8e80941Smrg nir_foreach_variable(var, var_list) { 103b8e80941Smrg /* 104b8e80941Smrg * UBOs have their own address spaces, so don't count them towards the 105b8e80941Smrg * number of global uniforms 106b8e80941Smrg */ 107b8e80941Smrg if (var->data.mode == nir_var_mem_ubo || var->data.mode == nir_var_mem_ssbo) 108b8e80941Smrg continue; 109b8e80941Smrg 110b8e80941Smrg var->data.driver_location = location; 111b8e80941Smrg bool bindless_type_size = var->data.mode == nir_var_shader_in || 112b8e80941Smrg var->data.mode == nir_var_shader_out || 113b8e80941Smrg var->data.bindless; 114b8e80941Smrg location += type_size(var->type, bindless_type_size); 115b8e80941Smrg } 116b8e80941Smrg 117b8e80941Smrg *size = location; 118b8e80941Smrg} 119b8e80941Smrg 120b8e80941Smrg/** 121b8e80941Smrg * Return true if the given variable is a per-vertex input/output array. 122b8e80941Smrg * (such as geometry shader inputs). 123b8e80941Smrg */ 124b8e80941Smrgbool 125b8e80941Smrgnir_is_per_vertex_io(const nir_variable *var, gl_shader_stage stage) 126b8e80941Smrg{ 127b8e80941Smrg if (var->data.patch || !glsl_type_is_array(var->type)) 128b8e80941Smrg return false; 129b8e80941Smrg 130b8e80941Smrg if (var->data.mode == nir_var_shader_in) 131b8e80941Smrg return stage == MESA_SHADER_GEOMETRY || 132b8e80941Smrg stage == MESA_SHADER_TESS_CTRL || 133b8e80941Smrg stage == MESA_SHADER_TESS_EVAL; 134b8e80941Smrg 135b8e80941Smrg if (var->data.mode == nir_var_shader_out) 136b8e80941Smrg return stage == MESA_SHADER_TESS_CTRL; 137b8e80941Smrg 138b8e80941Smrg return false; 139b8e80941Smrg} 140b8e80941Smrg 141b8e80941Smrgstatic nir_ssa_def * 142b8e80941Smrgget_io_offset(nir_builder *b, nir_deref_instr *deref, 143b8e80941Smrg nir_ssa_def **vertex_index, 144b8e80941Smrg int (*type_size)(const struct glsl_type *, bool), 145b8e80941Smrg unsigned *component, bool bts) 146b8e80941Smrg{ 147b8e80941Smrg nir_deref_path path; 148b8e80941Smrg nir_deref_path_init(&path, deref, NULL); 149b8e80941Smrg 150b8e80941Smrg assert(path.path[0]->deref_type == nir_deref_type_var); 151b8e80941Smrg nir_deref_instr **p = &path.path[1]; 152b8e80941Smrg 153b8e80941Smrg /* For per-vertex input arrays (i.e. geometry shader inputs), keep the 154b8e80941Smrg * outermost array index separate. Process the rest normally. 155b8e80941Smrg */ 156b8e80941Smrg if (vertex_index != NULL) { 157b8e80941Smrg assert((*p)->deref_type == nir_deref_type_array); 158b8e80941Smrg *vertex_index = nir_ssa_for_src(b, (*p)->arr.index, 1); 159b8e80941Smrg p++; 160b8e80941Smrg } 161b8e80941Smrg 162b8e80941Smrg if (path.path[0]->var->data.compact) { 163b8e80941Smrg assert((*p)->deref_type == nir_deref_type_array); 164b8e80941Smrg assert(glsl_type_is_scalar((*p)->type)); 165b8e80941Smrg 166b8e80941Smrg /* We always lower indirect dereferences for "compact" array vars. */ 167b8e80941Smrg const unsigned index = nir_src_as_uint((*p)->arr.index); 168b8e80941Smrg const unsigned total_offset = *component + index; 169b8e80941Smrg const unsigned slot_offset = total_offset / 4; 170b8e80941Smrg *component = total_offset % 4; 171b8e80941Smrg return nir_imm_int(b, type_size(glsl_vec4_type(), bts) * slot_offset); 172b8e80941Smrg } 173b8e80941Smrg 174b8e80941Smrg /* Just emit code and let constant-folding go to town */ 175b8e80941Smrg nir_ssa_def *offset = nir_imm_int(b, 0); 176b8e80941Smrg 177b8e80941Smrg for (; *p; p++) { 178b8e80941Smrg if ((*p)->deref_type == nir_deref_type_array) { 179b8e80941Smrg unsigned size = type_size((*p)->type, bts); 180b8e80941Smrg 181b8e80941Smrg nir_ssa_def *mul = 182b8e80941Smrg nir_imul_imm(b, nir_ssa_for_src(b, (*p)->arr.index, 1), size); 183b8e80941Smrg 184b8e80941Smrg offset = nir_iadd(b, offset, mul); 185b8e80941Smrg } else if ((*p)->deref_type == nir_deref_type_struct) { 186b8e80941Smrg /* p starts at path[1], so this is safe */ 187b8e80941Smrg nir_deref_instr *parent = *(p - 1); 188b8e80941Smrg 189b8e80941Smrg unsigned field_offset = 0; 190b8e80941Smrg for (unsigned i = 0; i < (*p)->strct.index; i++) { 191b8e80941Smrg field_offset += type_size(glsl_get_struct_field(parent->type, i), bts); 192b8e80941Smrg } 193b8e80941Smrg offset = nir_iadd_imm(b, offset, field_offset); 194b8e80941Smrg } else { 195b8e80941Smrg unreachable("Unsupported deref type"); 196b8e80941Smrg } 197b8e80941Smrg } 198b8e80941Smrg 199b8e80941Smrg nir_deref_path_finish(&path); 200b8e80941Smrg 201b8e80941Smrg return offset; 202b8e80941Smrg} 203b8e80941Smrg 204b8e80941Smrgstatic nir_intrinsic_instr * 205b8e80941Smrglower_load(nir_intrinsic_instr *intrin, struct lower_io_state *state, 206b8e80941Smrg nir_ssa_def *vertex_index, nir_variable *var, nir_ssa_def *offset, 207b8e80941Smrg unsigned component) 208b8e80941Smrg{ 209b8e80941Smrg const nir_shader *nir = state->builder.shader; 210b8e80941Smrg nir_variable_mode mode = var->data.mode; 211b8e80941Smrg nir_ssa_def *barycentric = NULL; 212b8e80941Smrg 213b8e80941Smrg nir_intrinsic_op op; 214b8e80941Smrg switch (mode) { 215b8e80941Smrg case nir_var_shader_in: 216b8e80941Smrg if (nir->info.stage == MESA_SHADER_FRAGMENT && 217b8e80941Smrg nir->options->use_interpolated_input_intrinsics && 218b8e80941Smrg var->data.interpolation != INTERP_MODE_FLAT) { 219b8e80941Smrg assert(vertex_index == NULL); 220b8e80941Smrg 221b8e80941Smrg nir_intrinsic_op bary_op; 222b8e80941Smrg if (var->data.sample || 223b8e80941Smrg (state->options & nir_lower_io_force_sample_interpolation)) 224b8e80941Smrg bary_op = nir_intrinsic_load_barycentric_sample; 225b8e80941Smrg else if (var->data.centroid) 226b8e80941Smrg bary_op = nir_intrinsic_load_barycentric_centroid; 227b8e80941Smrg else 228b8e80941Smrg bary_op = nir_intrinsic_load_barycentric_pixel; 229b8e80941Smrg 230b8e80941Smrg barycentric = nir_load_barycentric(&state->builder, bary_op, 231b8e80941Smrg var->data.interpolation); 232b8e80941Smrg op = nir_intrinsic_load_interpolated_input; 233b8e80941Smrg } else { 234b8e80941Smrg op = vertex_index ? nir_intrinsic_load_per_vertex_input : 235b8e80941Smrg nir_intrinsic_load_input; 236b8e80941Smrg } 237b8e80941Smrg break; 238b8e80941Smrg case nir_var_shader_out: 239b8e80941Smrg op = vertex_index ? nir_intrinsic_load_per_vertex_output : 240b8e80941Smrg nir_intrinsic_load_output; 241b8e80941Smrg break; 242b8e80941Smrg case nir_var_uniform: 243b8e80941Smrg op = nir_intrinsic_load_uniform; 244b8e80941Smrg break; 245b8e80941Smrg case nir_var_mem_shared: 246b8e80941Smrg op = nir_intrinsic_load_shared; 247b8e80941Smrg break; 248b8e80941Smrg default: 249b8e80941Smrg unreachable("Unknown variable mode"); 250b8e80941Smrg } 251b8e80941Smrg 252b8e80941Smrg nir_intrinsic_instr *load = 253b8e80941Smrg nir_intrinsic_instr_create(state->builder.shader, op); 254b8e80941Smrg load->num_components = intrin->num_components; 255b8e80941Smrg 256b8e80941Smrg nir_intrinsic_set_base(load, var->data.driver_location); 257b8e80941Smrg if (mode == nir_var_shader_in || mode == nir_var_shader_out) 258b8e80941Smrg nir_intrinsic_set_component(load, component); 259b8e80941Smrg 260b8e80941Smrg if (load->intrinsic == nir_intrinsic_load_uniform) 261b8e80941Smrg nir_intrinsic_set_range(load, 262b8e80941Smrg state->type_size(var->type, var->data.bindless)); 263b8e80941Smrg 264b8e80941Smrg if (vertex_index) { 265b8e80941Smrg load->src[0] = nir_src_for_ssa(vertex_index); 266b8e80941Smrg load->src[1] = nir_src_for_ssa(offset); 267b8e80941Smrg } else if (barycentric) { 268b8e80941Smrg load->src[0] = nir_src_for_ssa(barycentric); 269b8e80941Smrg load->src[1] = nir_src_for_ssa(offset); 270b8e80941Smrg } else { 271b8e80941Smrg load->src[0] = nir_src_for_ssa(offset); 272b8e80941Smrg } 273b8e80941Smrg 274b8e80941Smrg return load; 275b8e80941Smrg} 276b8e80941Smrg 277b8e80941Smrgstatic nir_intrinsic_instr * 278b8e80941Smrglower_store(nir_intrinsic_instr *intrin, struct lower_io_state *state, 279b8e80941Smrg nir_ssa_def *vertex_index, nir_variable *var, nir_ssa_def *offset, 280b8e80941Smrg unsigned component) 281b8e80941Smrg{ 282b8e80941Smrg nir_variable_mode mode = var->data.mode; 283b8e80941Smrg 284b8e80941Smrg nir_intrinsic_op op; 285b8e80941Smrg if (mode == nir_var_mem_shared) { 286b8e80941Smrg op = nir_intrinsic_store_shared; 287b8e80941Smrg } else { 288b8e80941Smrg assert(mode == nir_var_shader_out); 289b8e80941Smrg op = vertex_index ? nir_intrinsic_store_per_vertex_output : 290b8e80941Smrg nir_intrinsic_store_output; 291b8e80941Smrg } 292b8e80941Smrg 293b8e80941Smrg nir_intrinsic_instr *store = 294b8e80941Smrg nir_intrinsic_instr_create(state->builder.shader, op); 295b8e80941Smrg store->num_components = intrin->num_components; 296b8e80941Smrg 297b8e80941Smrg nir_src_copy(&store->src[0], &intrin->src[1], store); 298b8e80941Smrg 299b8e80941Smrg nir_intrinsic_set_base(store, var->data.driver_location); 300b8e80941Smrg 301b8e80941Smrg if (mode == nir_var_shader_out) 302b8e80941Smrg nir_intrinsic_set_component(store, component); 303b8e80941Smrg 304b8e80941Smrg nir_intrinsic_set_write_mask(store, nir_intrinsic_write_mask(intrin)); 305b8e80941Smrg 306b8e80941Smrg if (vertex_index) 307b8e80941Smrg store->src[1] = nir_src_for_ssa(vertex_index); 308b8e80941Smrg 309b8e80941Smrg store->src[vertex_index ? 2 : 1] = nir_src_for_ssa(offset); 310b8e80941Smrg 311b8e80941Smrg return store; 312b8e80941Smrg} 313b8e80941Smrg 314b8e80941Smrgstatic nir_intrinsic_instr * 315b8e80941Smrglower_atomic(nir_intrinsic_instr *intrin, struct lower_io_state *state, 316b8e80941Smrg nir_variable *var, nir_ssa_def *offset) 317b8e80941Smrg{ 318b8e80941Smrg assert(var->data.mode == nir_var_mem_shared); 319b8e80941Smrg 320b8e80941Smrg nir_intrinsic_op op; 321b8e80941Smrg switch (intrin->intrinsic) { 322b8e80941Smrg#define OP(O) case nir_intrinsic_deref_##O: op = nir_intrinsic_shared_##O; break; 323b8e80941Smrg OP(atomic_exchange) 324b8e80941Smrg OP(atomic_comp_swap) 325b8e80941Smrg OP(atomic_add) 326b8e80941Smrg OP(atomic_imin) 327b8e80941Smrg OP(atomic_umin) 328b8e80941Smrg OP(atomic_imax) 329b8e80941Smrg OP(atomic_umax) 330b8e80941Smrg OP(atomic_and) 331b8e80941Smrg OP(atomic_or) 332b8e80941Smrg OP(atomic_xor) 333b8e80941Smrg OP(atomic_fadd) 334b8e80941Smrg OP(atomic_fmin) 335b8e80941Smrg OP(atomic_fmax) 336b8e80941Smrg OP(atomic_fcomp_swap) 337b8e80941Smrg#undef OP 338b8e80941Smrg default: 339b8e80941Smrg unreachable("Invalid atomic"); 340b8e80941Smrg } 341b8e80941Smrg 342b8e80941Smrg nir_intrinsic_instr *atomic = 343b8e80941Smrg nir_intrinsic_instr_create(state->builder.shader, op); 344b8e80941Smrg 345b8e80941Smrg nir_intrinsic_set_base(atomic, var->data.driver_location); 346b8e80941Smrg 347b8e80941Smrg atomic->src[0] = nir_src_for_ssa(offset); 348b8e80941Smrg assert(nir_intrinsic_infos[intrin->intrinsic].num_srcs == 349b8e80941Smrg nir_intrinsic_infos[op].num_srcs); 350b8e80941Smrg for (unsigned i = 1; i < nir_intrinsic_infos[op].num_srcs; i++) { 351b8e80941Smrg nir_src_copy(&atomic->src[i], &intrin->src[i], atomic); 352b8e80941Smrg } 353b8e80941Smrg 354b8e80941Smrg return atomic; 355b8e80941Smrg} 356b8e80941Smrg 357b8e80941Smrgstatic nir_intrinsic_instr * 358b8e80941Smrglower_interpolate_at(nir_intrinsic_instr *intrin, struct lower_io_state *state, 359b8e80941Smrg nir_variable *var, nir_ssa_def *offset, unsigned component) 360b8e80941Smrg{ 361b8e80941Smrg assert(var->data.mode == nir_var_shader_in); 362b8e80941Smrg 363b8e80941Smrg /* Ignore interpolateAt() for flat variables - flat is flat. */ 364b8e80941Smrg if (var->data.interpolation == INTERP_MODE_FLAT) 365b8e80941Smrg return lower_load(intrin, state, NULL, var, offset, component); 366b8e80941Smrg 367b8e80941Smrg nir_intrinsic_op bary_op; 368b8e80941Smrg switch (intrin->intrinsic) { 369b8e80941Smrg case nir_intrinsic_interp_deref_at_centroid: 370b8e80941Smrg bary_op = (state->options & nir_lower_io_force_sample_interpolation) ? 371b8e80941Smrg nir_intrinsic_load_barycentric_sample : 372b8e80941Smrg nir_intrinsic_load_barycentric_centroid; 373b8e80941Smrg break; 374b8e80941Smrg case nir_intrinsic_interp_deref_at_sample: 375b8e80941Smrg bary_op = nir_intrinsic_load_barycentric_at_sample; 376b8e80941Smrg break; 377b8e80941Smrg case nir_intrinsic_interp_deref_at_offset: 378b8e80941Smrg bary_op = nir_intrinsic_load_barycentric_at_offset; 379b8e80941Smrg break; 380b8e80941Smrg default: 381b8e80941Smrg unreachable("Bogus interpolateAt() intrinsic."); 382b8e80941Smrg } 383b8e80941Smrg 384b8e80941Smrg nir_intrinsic_instr *bary_setup = 385b8e80941Smrg nir_intrinsic_instr_create(state->builder.shader, bary_op); 386b8e80941Smrg 387b8e80941Smrg nir_ssa_dest_init(&bary_setup->instr, &bary_setup->dest, 2, 32, NULL); 388b8e80941Smrg nir_intrinsic_set_interp_mode(bary_setup, var->data.interpolation); 389b8e80941Smrg 390b8e80941Smrg if (intrin->intrinsic == nir_intrinsic_interp_deref_at_sample || 391b8e80941Smrg intrin->intrinsic == nir_intrinsic_interp_deref_at_offset) 392b8e80941Smrg nir_src_copy(&bary_setup->src[0], &intrin->src[1], bary_setup); 393b8e80941Smrg 394b8e80941Smrg nir_builder_instr_insert(&state->builder, &bary_setup->instr); 395b8e80941Smrg 396b8e80941Smrg nir_intrinsic_instr *load = 397b8e80941Smrg nir_intrinsic_instr_create(state->builder.shader, 398b8e80941Smrg nir_intrinsic_load_interpolated_input); 399b8e80941Smrg load->num_components = intrin->num_components; 400b8e80941Smrg 401b8e80941Smrg nir_intrinsic_set_base(load, var->data.driver_location); 402b8e80941Smrg nir_intrinsic_set_component(load, component); 403b8e80941Smrg 404b8e80941Smrg load->src[0] = nir_src_for_ssa(&bary_setup->dest.ssa); 405b8e80941Smrg load->src[1] = nir_src_for_ssa(offset); 406b8e80941Smrg 407b8e80941Smrg return load; 408b8e80941Smrg} 409b8e80941Smrg 410b8e80941Smrgstatic bool 411b8e80941Smrgnir_lower_io_block(nir_block *block, 412b8e80941Smrg struct lower_io_state *state) 413b8e80941Smrg{ 414b8e80941Smrg nir_builder *b = &state->builder; 415b8e80941Smrg const nir_shader_compiler_options *options = b->shader->options; 416b8e80941Smrg bool progress = false; 417b8e80941Smrg 418b8e80941Smrg nir_foreach_instr_safe(instr, block) { 419b8e80941Smrg if (instr->type != nir_instr_type_intrinsic) 420b8e80941Smrg continue; 421b8e80941Smrg 422b8e80941Smrg nir_intrinsic_instr *intrin = nir_instr_as_intrinsic(instr); 423b8e80941Smrg 424b8e80941Smrg switch (intrin->intrinsic) { 425b8e80941Smrg case nir_intrinsic_load_deref: 426b8e80941Smrg case nir_intrinsic_store_deref: 427b8e80941Smrg case nir_intrinsic_deref_atomic_add: 428b8e80941Smrg case nir_intrinsic_deref_atomic_imin: 429b8e80941Smrg case nir_intrinsic_deref_atomic_umin: 430b8e80941Smrg case nir_intrinsic_deref_atomic_imax: 431b8e80941Smrg case nir_intrinsic_deref_atomic_umax: 432b8e80941Smrg case nir_intrinsic_deref_atomic_and: 433b8e80941Smrg case nir_intrinsic_deref_atomic_or: 434b8e80941Smrg case nir_intrinsic_deref_atomic_xor: 435b8e80941Smrg case nir_intrinsic_deref_atomic_exchange: 436b8e80941Smrg case nir_intrinsic_deref_atomic_comp_swap: 437b8e80941Smrg case nir_intrinsic_deref_atomic_fadd: 438b8e80941Smrg case nir_intrinsic_deref_atomic_fmin: 439b8e80941Smrg case nir_intrinsic_deref_atomic_fmax: 440b8e80941Smrg case nir_intrinsic_deref_atomic_fcomp_swap: 441b8e80941Smrg /* We can lower the io for this nir instrinsic */ 442b8e80941Smrg break; 443b8e80941Smrg case nir_intrinsic_interp_deref_at_centroid: 444b8e80941Smrg case nir_intrinsic_interp_deref_at_sample: 445b8e80941Smrg case nir_intrinsic_interp_deref_at_offset: 446b8e80941Smrg /* We can optionally lower these to load_interpolated_input */ 447b8e80941Smrg if (options->use_interpolated_input_intrinsics) 448b8e80941Smrg break; 449b8e80941Smrg default: 450b8e80941Smrg /* We can't lower the io for this nir instrinsic, so skip it */ 451b8e80941Smrg continue; 452b8e80941Smrg } 453b8e80941Smrg 454b8e80941Smrg nir_deref_instr *deref = nir_src_as_deref(intrin->src[0]); 455b8e80941Smrg 456b8e80941Smrg nir_variable *var = nir_deref_instr_get_variable(deref); 457b8e80941Smrg nir_variable_mode mode = var->data.mode; 458b8e80941Smrg 459b8e80941Smrg if ((state->modes & mode) == 0) 460b8e80941Smrg continue; 461b8e80941Smrg 462b8e80941Smrg if (mode != nir_var_shader_in && 463b8e80941Smrg mode != nir_var_shader_out && 464b8e80941Smrg mode != nir_var_mem_shared && 465b8e80941Smrg mode != nir_var_uniform) 466b8e80941Smrg continue; 467b8e80941Smrg 468b8e80941Smrg b->cursor = nir_before_instr(instr); 469b8e80941Smrg 470b8e80941Smrg const bool per_vertex = nir_is_per_vertex_io(var, b->shader->info.stage); 471b8e80941Smrg 472b8e80941Smrg nir_ssa_def *offset; 473b8e80941Smrg nir_ssa_def *vertex_index = NULL; 474b8e80941Smrg unsigned component_offset = var->data.location_frac; 475b8e80941Smrg bool bindless_type_size = mode == nir_var_shader_in || 476b8e80941Smrg mode == nir_var_shader_out || 477b8e80941Smrg var->data.bindless; 478b8e80941Smrg 479b8e80941Smrg offset = get_io_offset(b, deref, per_vertex ? &vertex_index : NULL, 480b8e80941Smrg state->type_size, &component_offset, 481b8e80941Smrg bindless_type_size); 482b8e80941Smrg 483b8e80941Smrg nir_intrinsic_instr *replacement; 484b8e80941Smrg 485b8e80941Smrg switch (intrin->intrinsic) { 486b8e80941Smrg case nir_intrinsic_load_deref: 487b8e80941Smrg replacement = lower_load(intrin, state, vertex_index, var, offset, 488b8e80941Smrg component_offset); 489b8e80941Smrg break; 490b8e80941Smrg 491b8e80941Smrg case nir_intrinsic_store_deref: 492b8e80941Smrg replacement = lower_store(intrin, state, vertex_index, var, offset, 493b8e80941Smrg component_offset); 494b8e80941Smrg break; 495b8e80941Smrg 496b8e80941Smrg case nir_intrinsic_deref_atomic_add: 497b8e80941Smrg case nir_intrinsic_deref_atomic_imin: 498b8e80941Smrg case nir_intrinsic_deref_atomic_umin: 499b8e80941Smrg case nir_intrinsic_deref_atomic_imax: 500b8e80941Smrg case nir_intrinsic_deref_atomic_umax: 501b8e80941Smrg case nir_intrinsic_deref_atomic_and: 502b8e80941Smrg case nir_intrinsic_deref_atomic_or: 503b8e80941Smrg case nir_intrinsic_deref_atomic_xor: 504b8e80941Smrg case nir_intrinsic_deref_atomic_exchange: 505b8e80941Smrg case nir_intrinsic_deref_atomic_comp_swap: 506b8e80941Smrg case nir_intrinsic_deref_atomic_fadd: 507b8e80941Smrg case nir_intrinsic_deref_atomic_fmin: 508b8e80941Smrg case nir_intrinsic_deref_atomic_fmax: 509b8e80941Smrg case nir_intrinsic_deref_atomic_fcomp_swap: 510b8e80941Smrg assert(vertex_index == NULL); 511b8e80941Smrg replacement = lower_atomic(intrin, state, var, offset); 512b8e80941Smrg break; 513b8e80941Smrg 514b8e80941Smrg case nir_intrinsic_interp_deref_at_centroid: 515b8e80941Smrg case nir_intrinsic_interp_deref_at_sample: 516b8e80941Smrg case nir_intrinsic_interp_deref_at_offset: 517b8e80941Smrg assert(vertex_index == NULL); 518b8e80941Smrg replacement = lower_interpolate_at(intrin, state, var, offset, 519b8e80941Smrg component_offset); 520b8e80941Smrg break; 521b8e80941Smrg 522b8e80941Smrg default: 523b8e80941Smrg continue; 524b8e80941Smrg } 525b8e80941Smrg 526b8e80941Smrg if (nir_intrinsic_infos[intrin->intrinsic].has_dest) { 527b8e80941Smrg if (intrin->dest.is_ssa) { 528b8e80941Smrg nir_ssa_dest_init(&replacement->instr, &replacement->dest, 529b8e80941Smrg intrin->dest.ssa.num_components, 530b8e80941Smrg intrin->dest.ssa.bit_size, NULL); 531b8e80941Smrg nir_ssa_def_rewrite_uses(&intrin->dest.ssa, 532b8e80941Smrg nir_src_for_ssa(&replacement->dest.ssa)); 533b8e80941Smrg } else { 534b8e80941Smrg nir_dest_copy(&replacement->dest, &intrin->dest, &intrin->instr); 535b8e80941Smrg } 536b8e80941Smrg } 537b8e80941Smrg 538b8e80941Smrg nir_instr_insert_before(&intrin->instr, &replacement->instr); 539b8e80941Smrg nir_instr_remove(&intrin->instr); 540b8e80941Smrg progress = true; 541b8e80941Smrg } 542b8e80941Smrg 543b8e80941Smrg return progress; 544b8e80941Smrg} 545b8e80941Smrg 546b8e80941Smrgstatic bool 547b8e80941Smrgnir_lower_io_impl(nir_function_impl *impl, 548b8e80941Smrg nir_variable_mode modes, 549b8e80941Smrg int (*type_size)(const struct glsl_type *, bool), 550b8e80941Smrg nir_lower_io_options options) 551b8e80941Smrg{ 552b8e80941Smrg struct lower_io_state state; 553b8e80941Smrg bool progress = false; 554b8e80941Smrg 555b8e80941Smrg nir_builder_init(&state.builder, impl); 556b8e80941Smrg state.dead_ctx = ralloc_context(NULL); 557b8e80941Smrg state.modes = modes; 558b8e80941Smrg state.type_size = type_size; 559b8e80941Smrg state.options = options; 560b8e80941Smrg 561b8e80941Smrg nir_foreach_block(block, impl) { 562b8e80941Smrg progress |= nir_lower_io_block(block, &state); 563b8e80941Smrg } 564b8e80941Smrg 565b8e80941Smrg ralloc_free(state.dead_ctx); 566b8e80941Smrg 567b8e80941Smrg nir_metadata_preserve(impl, nir_metadata_block_index | 568b8e80941Smrg nir_metadata_dominance); 569b8e80941Smrg return progress; 570b8e80941Smrg} 571b8e80941Smrg 572b8e80941Smrgbool 573b8e80941Smrgnir_lower_io(nir_shader *shader, nir_variable_mode modes, 574b8e80941Smrg int (*type_size)(const struct glsl_type *, bool), 575b8e80941Smrg nir_lower_io_options options) 576b8e80941Smrg{ 577b8e80941Smrg bool progress = false; 578b8e80941Smrg 579b8e80941Smrg nir_foreach_function(function, shader) { 580b8e80941Smrg if (function->impl) { 581b8e80941Smrg progress |= nir_lower_io_impl(function->impl, modes, 582b8e80941Smrg type_size, options); 583b8e80941Smrg } 584b8e80941Smrg } 585b8e80941Smrg 586b8e80941Smrg return progress; 587b8e80941Smrg} 588b8e80941Smrg 589b8e80941Smrgstatic unsigned 590b8e80941Smrgtype_scalar_size_bytes(const struct glsl_type *type) 591b8e80941Smrg{ 592b8e80941Smrg assert(glsl_type_is_vector_or_scalar(type) || 593b8e80941Smrg glsl_type_is_matrix(type)); 594b8e80941Smrg return glsl_type_is_boolean(type) ? 4 : glsl_get_bit_size(type) / 8; 595b8e80941Smrg} 596b8e80941Smrg 597b8e80941Smrgstatic nir_ssa_def * 598b8e80941Smrgbuild_addr_iadd(nir_builder *b, nir_ssa_def *addr, 599b8e80941Smrg nir_address_format addr_format, nir_ssa_def *offset) 600b8e80941Smrg{ 601b8e80941Smrg assert(offset->num_components == 1); 602b8e80941Smrg assert(addr->bit_size == offset->bit_size); 603b8e80941Smrg 604b8e80941Smrg switch (addr_format) { 605b8e80941Smrg case nir_address_format_32bit_global: 606b8e80941Smrg case nir_address_format_64bit_global: 607b8e80941Smrg assert(addr->num_components == 1); 608b8e80941Smrg return nir_iadd(b, addr, offset); 609b8e80941Smrg 610b8e80941Smrg case nir_address_format_64bit_bounded_global: 611b8e80941Smrg assert(addr->num_components == 4); 612b8e80941Smrg return nir_vec4(b, nir_channel(b, addr, 0), 613b8e80941Smrg nir_channel(b, addr, 1), 614b8e80941Smrg nir_channel(b, addr, 2), 615b8e80941Smrg nir_iadd(b, nir_channel(b, addr, 3), offset)); 616b8e80941Smrg 617b8e80941Smrg case nir_address_format_32bit_index_offset: 618b8e80941Smrg assert(addr->num_components == 2); 619b8e80941Smrg return nir_vec2(b, nir_channel(b, addr, 0), 620b8e80941Smrg nir_iadd(b, nir_channel(b, addr, 1), offset)); 621b8e80941Smrg } 622b8e80941Smrg unreachable("Invalid address format"); 623b8e80941Smrg} 624b8e80941Smrg 625b8e80941Smrgstatic nir_ssa_def * 626b8e80941Smrgbuild_addr_iadd_imm(nir_builder *b, nir_ssa_def *addr, 627b8e80941Smrg nir_address_format addr_format, int64_t offset) 628b8e80941Smrg{ 629b8e80941Smrg return build_addr_iadd(b, addr, addr_format, 630b8e80941Smrg nir_imm_intN_t(b, offset, addr->bit_size)); 631b8e80941Smrg} 632b8e80941Smrg 633b8e80941Smrgstatic nir_ssa_def * 634b8e80941Smrgaddr_to_index(nir_builder *b, nir_ssa_def *addr, 635b8e80941Smrg nir_address_format addr_format) 636b8e80941Smrg{ 637b8e80941Smrg assert(addr_format == nir_address_format_32bit_index_offset); 638b8e80941Smrg assert(addr->num_components == 2); 639b8e80941Smrg return nir_channel(b, addr, 0); 640b8e80941Smrg} 641b8e80941Smrg 642b8e80941Smrgstatic nir_ssa_def * 643b8e80941Smrgaddr_to_offset(nir_builder *b, nir_ssa_def *addr, 644b8e80941Smrg nir_address_format addr_format) 645b8e80941Smrg{ 646b8e80941Smrg assert(addr_format == nir_address_format_32bit_index_offset); 647b8e80941Smrg assert(addr->num_components == 2); 648b8e80941Smrg return nir_channel(b, addr, 1); 649b8e80941Smrg} 650b8e80941Smrg 651b8e80941Smrg/** Returns true if the given address format resolves to a global address */ 652b8e80941Smrgstatic bool 653b8e80941Smrgaddr_format_is_global(nir_address_format addr_format) 654b8e80941Smrg{ 655b8e80941Smrg return addr_format == nir_address_format_32bit_global || 656b8e80941Smrg addr_format == nir_address_format_64bit_global || 657b8e80941Smrg addr_format == nir_address_format_64bit_bounded_global; 658b8e80941Smrg} 659b8e80941Smrg 660b8e80941Smrgstatic nir_ssa_def * 661b8e80941Smrgaddr_to_global(nir_builder *b, nir_ssa_def *addr, 662b8e80941Smrg nir_address_format addr_format) 663b8e80941Smrg{ 664b8e80941Smrg switch (addr_format) { 665b8e80941Smrg case nir_address_format_32bit_global: 666b8e80941Smrg case nir_address_format_64bit_global: 667b8e80941Smrg assert(addr->num_components == 1); 668b8e80941Smrg return addr; 669b8e80941Smrg 670b8e80941Smrg case nir_address_format_64bit_bounded_global: 671b8e80941Smrg assert(addr->num_components == 4); 672b8e80941Smrg return nir_iadd(b, nir_pack_64_2x32(b, nir_channels(b, addr, 0x3)), 673b8e80941Smrg nir_u2u64(b, nir_channel(b, addr, 3))); 674b8e80941Smrg 675b8e80941Smrg case nir_address_format_32bit_index_offset: 676b8e80941Smrg unreachable("Cannot get a 64-bit address with this address format"); 677b8e80941Smrg } 678b8e80941Smrg 679b8e80941Smrg unreachable("Invalid address format"); 680b8e80941Smrg} 681b8e80941Smrg 682b8e80941Smrgstatic bool 683b8e80941Smrgaddr_format_needs_bounds_check(nir_address_format addr_format) 684b8e80941Smrg{ 685b8e80941Smrg return addr_format == nir_address_format_64bit_bounded_global; 686b8e80941Smrg} 687b8e80941Smrg 688b8e80941Smrgstatic nir_ssa_def * 689b8e80941Smrgaddr_is_in_bounds(nir_builder *b, nir_ssa_def *addr, 690b8e80941Smrg nir_address_format addr_format, unsigned size) 691b8e80941Smrg{ 692b8e80941Smrg assert(addr_format == nir_address_format_64bit_bounded_global); 693b8e80941Smrg assert(addr->num_components == 4); 694b8e80941Smrg return nir_ige(b, nir_channel(b, addr, 2), 695b8e80941Smrg nir_iadd_imm(b, nir_channel(b, addr, 3), size)); 696b8e80941Smrg} 697b8e80941Smrg 698b8e80941Smrgstatic nir_ssa_def * 699b8e80941Smrgbuild_explicit_io_load(nir_builder *b, nir_intrinsic_instr *intrin, 700b8e80941Smrg nir_ssa_def *addr, nir_address_format addr_format, 701b8e80941Smrg unsigned num_components) 702b8e80941Smrg{ 703b8e80941Smrg nir_variable_mode mode = nir_src_as_deref(intrin->src[0])->mode; 704b8e80941Smrg 705b8e80941Smrg nir_intrinsic_op op; 706b8e80941Smrg switch (mode) { 707b8e80941Smrg case nir_var_mem_ubo: 708b8e80941Smrg op = nir_intrinsic_load_ubo; 709b8e80941Smrg break; 710b8e80941Smrg case nir_var_mem_ssbo: 711b8e80941Smrg if (addr_format_is_global(addr_format)) 712b8e80941Smrg op = nir_intrinsic_load_global; 713b8e80941Smrg else 714b8e80941Smrg op = nir_intrinsic_load_ssbo; 715b8e80941Smrg break; 716b8e80941Smrg case nir_var_mem_global: 717b8e80941Smrg assert(addr_format_is_global(addr_format)); 718b8e80941Smrg op = nir_intrinsic_load_global; 719b8e80941Smrg break; 720b8e80941Smrg case nir_var_shader_in: 721b8e80941Smrg assert(addr_format_is_global(addr_format)); 722b8e80941Smrg op = nir_intrinsic_load_kernel_input; 723b8e80941Smrg break; 724b8e80941Smrg default: 725b8e80941Smrg unreachable("Unsupported explicit IO variable mode"); 726b8e80941Smrg } 727b8e80941Smrg 728b8e80941Smrg nir_intrinsic_instr *load = nir_intrinsic_instr_create(b->shader, op); 729b8e80941Smrg 730b8e80941Smrg if (addr_format_is_global(addr_format)) { 731b8e80941Smrg load->src[0] = nir_src_for_ssa(addr_to_global(b, addr, addr_format)); 732b8e80941Smrg } else { 733b8e80941Smrg load->src[0] = nir_src_for_ssa(addr_to_index(b, addr, addr_format)); 734b8e80941Smrg load->src[1] = nir_src_for_ssa(addr_to_offset(b, addr, addr_format)); 735b8e80941Smrg } 736b8e80941Smrg 737b8e80941Smrg if (mode != nir_var_mem_ubo && mode != nir_var_shader_in) 738b8e80941Smrg nir_intrinsic_set_access(load, nir_intrinsic_access(intrin)); 739b8e80941Smrg 740b8e80941Smrg /* TODO: We should try and provide a better alignment. For OpenCL, we need 741b8e80941Smrg * to plumb the alignment through from SPIR-V when we have one. 742b8e80941Smrg */ 743b8e80941Smrg nir_intrinsic_set_align(load, intrin->dest.ssa.bit_size / 8, 0); 744b8e80941Smrg 745b8e80941Smrg assert(intrin->dest.is_ssa); 746b8e80941Smrg load->num_components = num_components; 747b8e80941Smrg nir_ssa_dest_init(&load->instr, &load->dest, num_components, 748b8e80941Smrg intrin->dest.ssa.bit_size, intrin->dest.ssa.name); 749b8e80941Smrg 750b8e80941Smrg assert(load->dest.ssa.bit_size % 8 == 0); 751b8e80941Smrg 752b8e80941Smrg if (addr_format_needs_bounds_check(addr_format)) { 753b8e80941Smrg /* The Vulkan spec for robustBufferAccess gives us quite a few options 754b8e80941Smrg * as to what we can do with an OOB read. Unfortunately, returning 755b8e80941Smrg * undefined values isn't one of them so we return an actual zero. 756b8e80941Smrg */ 757b8e80941Smrg nir_ssa_def *zero = nir_imm_zero(b, load->num_components, 758b8e80941Smrg load->dest.ssa.bit_size); 759b8e80941Smrg 760b8e80941Smrg const unsigned load_size = 761b8e80941Smrg (load->dest.ssa.bit_size / 8) * load->num_components; 762b8e80941Smrg nir_push_if(b, addr_is_in_bounds(b, addr, addr_format, load_size)); 763b8e80941Smrg 764b8e80941Smrg nir_builder_instr_insert(b, &load->instr); 765b8e80941Smrg 766b8e80941Smrg nir_pop_if(b, NULL); 767b8e80941Smrg 768b8e80941Smrg return nir_if_phi(b, &load->dest.ssa, zero); 769b8e80941Smrg } else { 770b8e80941Smrg nir_builder_instr_insert(b, &load->instr); 771b8e80941Smrg return &load->dest.ssa; 772b8e80941Smrg } 773b8e80941Smrg} 774b8e80941Smrg 775b8e80941Smrgstatic void 776b8e80941Smrgbuild_explicit_io_store(nir_builder *b, nir_intrinsic_instr *intrin, 777b8e80941Smrg nir_ssa_def *addr, nir_address_format addr_format, 778b8e80941Smrg nir_ssa_def *value, nir_component_mask_t write_mask) 779b8e80941Smrg{ 780b8e80941Smrg nir_variable_mode mode = nir_src_as_deref(intrin->src[0])->mode; 781b8e80941Smrg 782b8e80941Smrg nir_intrinsic_op op; 783b8e80941Smrg switch (mode) { 784b8e80941Smrg case nir_var_mem_ssbo: 785b8e80941Smrg if (addr_format_is_global(addr_format)) 786b8e80941Smrg op = nir_intrinsic_store_global; 787b8e80941Smrg else 788b8e80941Smrg op = nir_intrinsic_store_ssbo; 789b8e80941Smrg break; 790b8e80941Smrg case nir_var_mem_global: 791b8e80941Smrg assert(addr_format_is_global(addr_format)); 792b8e80941Smrg op = nir_intrinsic_store_global; 793b8e80941Smrg break; 794b8e80941Smrg default: 795b8e80941Smrg unreachable("Unsupported explicit IO variable mode"); 796b8e80941Smrg } 797b8e80941Smrg 798b8e80941Smrg nir_intrinsic_instr *store = nir_intrinsic_instr_create(b->shader, op); 799b8e80941Smrg 800b8e80941Smrg store->src[0] = nir_src_for_ssa(value); 801b8e80941Smrg if (addr_format_is_global(addr_format)) { 802b8e80941Smrg store->src[1] = nir_src_for_ssa(addr_to_global(b, addr, addr_format)); 803b8e80941Smrg } else { 804b8e80941Smrg store->src[1] = nir_src_for_ssa(addr_to_index(b, addr, addr_format)); 805b8e80941Smrg store->src[2] = nir_src_for_ssa(addr_to_offset(b, addr, addr_format)); 806b8e80941Smrg } 807b8e80941Smrg 808b8e80941Smrg nir_intrinsic_set_write_mask(store, write_mask); 809b8e80941Smrg 810b8e80941Smrg nir_intrinsic_set_access(store, nir_intrinsic_access(intrin)); 811b8e80941Smrg 812b8e80941Smrg /* TODO: We should try and provide a better alignment. For OpenCL, we need 813b8e80941Smrg * to plumb the alignment through from SPIR-V when we have one. 814b8e80941Smrg */ 815b8e80941Smrg nir_intrinsic_set_align(store, value->bit_size / 8, 0); 816b8e80941Smrg 817b8e80941Smrg assert(value->num_components == 1 || 818b8e80941Smrg value->num_components == intrin->num_components); 819b8e80941Smrg store->num_components = value->num_components; 820b8e80941Smrg 821b8e80941Smrg assert(value->bit_size % 8 == 0); 822b8e80941Smrg 823b8e80941Smrg if (addr_format_needs_bounds_check(addr_format)) { 824b8e80941Smrg const unsigned store_size = (value->bit_size / 8) * store->num_components; 825b8e80941Smrg nir_push_if(b, addr_is_in_bounds(b, addr, addr_format, store_size)); 826b8e80941Smrg 827b8e80941Smrg nir_builder_instr_insert(b, &store->instr); 828b8e80941Smrg 829b8e80941Smrg nir_pop_if(b, NULL); 830b8e80941Smrg } else { 831b8e80941Smrg nir_builder_instr_insert(b, &store->instr); 832b8e80941Smrg } 833b8e80941Smrg} 834b8e80941Smrg 835b8e80941Smrgstatic nir_ssa_def * 836b8e80941Smrgbuild_explicit_io_atomic(nir_builder *b, nir_intrinsic_instr *intrin, 837b8e80941Smrg nir_ssa_def *addr, nir_address_format addr_format) 838b8e80941Smrg{ 839b8e80941Smrg nir_variable_mode mode = nir_src_as_deref(intrin->src[0])->mode; 840b8e80941Smrg const unsigned num_data_srcs = 841b8e80941Smrg nir_intrinsic_infos[intrin->intrinsic].num_srcs - 1; 842b8e80941Smrg 843b8e80941Smrg nir_intrinsic_op op; 844b8e80941Smrg switch (mode) { 845b8e80941Smrg case nir_var_mem_ssbo: 846b8e80941Smrg if (addr_format_is_global(addr_format)) 847b8e80941Smrg op = global_atomic_for_deref(intrin->intrinsic); 848b8e80941Smrg else 849b8e80941Smrg op = ssbo_atomic_for_deref(intrin->intrinsic); 850b8e80941Smrg break; 851b8e80941Smrg case nir_var_mem_global: 852b8e80941Smrg assert(addr_format_is_global(addr_format)); 853b8e80941Smrg op = global_atomic_for_deref(intrin->intrinsic); 854b8e80941Smrg break; 855b8e80941Smrg default: 856b8e80941Smrg unreachable("Unsupported explicit IO variable mode"); 857b8e80941Smrg } 858b8e80941Smrg 859b8e80941Smrg nir_intrinsic_instr *atomic = nir_intrinsic_instr_create(b->shader, op); 860b8e80941Smrg 861b8e80941Smrg unsigned src = 0; 862b8e80941Smrg if (addr_format_is_global(addr_format)) { 863b8e80941Smrg atomic->src[src++] = nir_src_for_ssa(addr_to_global(b, addr, addr_format)); 864b8e80941Smrg } else { 865b8e80941Smrg atomic->src[src++] = nir_src_for_ssa(addr_to_index(b, addr, addr_format)); 866b8e80941Smrg atomic->src[src++] = nir_src_for_ssa(addr_to_offset(b, addr, addr_format)); 867b8e80941Smrg } 868b8e80941Smrg for (unsigned i = 0; i < num_data_srcs; i++) { 869b8e80941Smrg atomic->src[src++] = nir_src_for_ssa(intrin->src[1 + i].ssa); 870b8e80941Smrg } 871b8e80941Smrg 872b8e80941Smrg /* Global atomics don't have access flags because they assume that the 873b8e80941Smrg * address may be non-uniform. 874b8e80941Smrg */ 875b8e80941Smrg if (!addr_format_is_global(addr_format)) 876b8e80941Smrg nir_intrinsic_set_access(atomic, nir_intrinsic_access(intrin)); 877b8e80941Smrg 878b8e80941Smrg assert(intrin->dest.ssa.num_components == 1); 879b8e80941Smrg nir_ssa_dest_init(&atomic->instr, &atomic->dest, 880b8e80941Smrg 1, intrin->dest.ssa.bit_size, intrin->dest.ssa.name); 881b8e80941Smrg 882b8e80941Smrg assert(atomic->dest.ssa.bit_size % 8 == 0); 883b8e80941Smrg 884b8e80941Smrg if (addr_format_needs_bounds_check(addr_format)) { 885b8e80941Smrg const unsigned atomic_size = atomic->dest.ssa.bit_size / 8; 886b8e80941Smrg nir_push_if(b, addr_is_in_bounds(b, addr, addr_format, atomic_size)); 887b8e80941Smrg 888b8e80941Smrg nir_builder_instr_insert(b, &atomic->instr); 889b8e80941Smrg 890b8e80941Smrg nir_pop_if(b, NULL); 891b8e80941Smrg return nir_if_phi(b, &atomic->dest.ssa, 892b8e80941Smrg nir_ssa_undef(b, 1, atomic->dest.ssa.bit_size)); 893b8e80941Smrg } else { 894b8e80941Smrg nir_builder_instr_insert(b, &atomic->instr); 895b8e80941Smrg return &atomic->dest.ssa; 896b8e80941Smrg } 897b8e80941Smrg} 898b8e80941Smrg 899b8e80941Smrgnir_ssa_def * 900b8e80941Smrgnir_explicit_io_address_from_deref(nir_builder *b, nir_deref_instr *deref, 901b8e80941Smrg nir_ssa_def *base_addr, 902b8e80941Smrg nir_address_format addr_format) 903b8e80941Smrg{ 904b8e80941Smrg assert(deref->dest.is_ssa); 905b8e80941Smrg switch (deref->deref_type) { 906b8e80941Smrg case nir_deref_type_var: 907b8e80941Smrg assert(deref->mode == nir_var_shader_in); 908b8e80941Smrg return nir_imm_intN_t(b, deref->var->data.driver_location, 909b8e80941Smrg deref->dest.ssa.bit_size); 910b8e80941Smrg 911b8e80941Smrg case nir_deref_type_array: { 912b8e80941Smrg nir_deref_instr *parent = nir_deref_instr_parent(deref); 913b8e80941Smrg 914b8e80941Smrg unsigned stride = glsl_get_explicit_stride(parent->type); 915b8e80941Smrg if ((glsl_type_is_matrix(parent->type) && 916b8e80941Smrg glsl_matrix_type_is_row_major(parent->type)) || 917b8e80941Smrg (glsl_type_is_vector(parent->type) && stride == 0)) 918b8e80941Smrg stride = type_scalar_size_bytes(parent->type); 919b8e80941Smrg 920b8e80941Smrg assert(stride > 0); 921b8e80941Smrg 922b8e80941Smrg nir_ssa_def *index = nir_ssa_for_src(b, deref->arr.index, 1); 923b8e80941Smrg index = nir_i2i(b, index, base_addr->bit_size); 924b8e80941Smrg return build_addr_iadd(b, base_addr, addr_format, 925b8e80941Smrg nir_imul_imm(b, index, stride)); 926b8e80941Smrg } 927b8e80941Smrg 928b8e80941Smrg case nir_deref_type_ptr_as_array: { 929b8e80941Smrg nir_ssa_def *index = nir_ssa_for_src(b, deref->arr.index, 1); 930b8e80941Smrg index = nir_i2i(b, index, base_addr->bit_size); 931b8e80941Smrg unsigned stride = nir_deref_instr_ptr_as_array_stride(deref); 932b8e80941Smrg return build_addr_iadd(b, base_addr, addr_format, 933b8e80941Smrg nir_imul_imm(b, index, stride)); 934b8e80941Smrg } 935b8e80941Smrg 936b8e80941Smrg case nir_deref_type_array_wildcard: 937b8e80941Smrg unreachable("Wildcards should be lowered by now"); 938b8e80941Smrg break; 939b8e80941Smrg 940b8e80941Smrg case nir_deref_type_struct: { 941b8e80941Smrg nir_deref_instr *parent = nir_deref_instr_parent(deref); 942b8e80941Smrg int offset = glsl_get_struct_field_offset(parent->type, 943b8e80941Smrg deref->strct.index); 944b8e80941Smrg assert(offset >= 0); 945b8e80941Smrg return build_addr_iadd_imm(b, base_addr, addr_format, offset); 946b8e80941Smrg } 947b8e80941Smrg 948b8e80941Smrg case nir_deref_type_cast: 949b8e80941Smrg /* Nothing to do here */ 950b8e80941Smrg return base_addr; 951b8e80941Smrg } 952b8e80941Smrg 953b8e80941Smrg unreachable("Invalid NIR deref type"); 954b8e80941Smrg} 955b8e80941Smrg 956b8e80941Smrgvoid 957b8e80941Smrgnir_lower_explicit_io_instr(nir_builder *b, 958b8e80941Smrg nir_intrinsic_instr *intrin, 959b8e80941Smrg nir_ssa_def *addr, 960b8e80941Smrg nir_address_format addr_format) 961b8e80941Smrg{ 962b8e80941Smrg b->cursor = nir_after_instr(&intrin->instr); 963b8e80941Smrg 964b8e80941Smrg nir_deref_instr *deref = nir_src_as_deref(intrin->src[0]); 965b8e80941Smrg unsigned vec_stride = glsl_get_explicit_stride(deref->type); 966b8e80941Smrg unsigned scalar_size = type_scalar_size_bytes(deref->type); 967b8e80941Smrg assert(vec_stride == 0 || glsl_type_is_vector(deref->type)); 968b8e80941Smrg assert(vec_stride == 0 || vec_stride >= scalar_size); 969b8e80941Smrg 970b8e80941Smrg if (intrin->intrinsic == nir_intrinsic_load_deref) { 971b8e80941Smrg nir_ssa_def *value; 972b8e80941Smrg if (vec_stride > scalar_size) { 973b8e80941Smrg nir_ssa_def *comps[4] = { NULL, }; 974b8e80941Smrg for (unsigned i = 0; i < intrin->num_components; i++) { 975b8e80941Smrg nir_ssa_def *comp_addr = build_addr_iadd_imm(b, addr, addr_format, 976b8e80941Smrg vec_stride * i); 977b8e80941Smrg comps[i] = build_explicit_io_load(b, intrin, comp_addr, 978b8e80941Smrg addr_format, 1); 979b8e80941Smrg } 980b8e80941Smrg value = nir_vec(b, comps, intrin->num_components); 981b8e80941Smrg } else { 982b8e80941Smrg value = build_explicit_io_load(b, intrin, addr, addr_format, 983b8e80941Smrg intrin->num_components); 984b8e80941Smrg } 985b8e80941Smrg nir_ssa_def_rewrite_uses(&intrin->dest.ssa, nir_src_for_ssa(value)); 986b8e80941Smrg } else if (intrin->intrinsic == nir_intrinsic_store_deref) { 987b8e80941Smrg assert(intrin->src[1].is_ssa); 988b8e80941Smrg nir_ssa_def *value = intrin->src[1].ssa; 989b8e80941Smrg nir_component_mask_t write_mask = nir_intrinsic_write_mask(intrin); 990b8e80941Smrg if (vec_stride > scalar_size) { 991b8e80941Smrg for (unsigned i = 0; i < intrin->num_components; i++) { 992b8e80941Smrg if (!(write_mask & (1 << i))) 993b8e80941Smrg continue; 994b8e80941Smrg 995b8e80941Smrg nir_ssa_def *comp_addr = build_addr_iadd_imm(b, addr, addr_format, 996b8e80941Smrg vec_stride * i); 997b8e80941Smrg build_explicit_io_store(b, intrin, comp_addr, addr_format, 998b8e80941Smrg nir_channel(b, value, i), 1); 999b8e80941Smrg } 1000b8e80941Smrg } else { 1001b8e80941Smrg build_explicit_io_store(b, intrin, addr, addr_format, 1002b8e80941Smrg value, write_mask); 1003b8e80941Smrg } 1004b8e80941Smrg } else { 1005b8e80941Smrg nir_ssa_def *value = 1006b8e80941Smrg build_explicit_io_atomic(b, intrin, addr, addr_format); 1007b8e80941Smrg nir_ssa_def_rewrite_uses(&intrin->dest.ssa, nir_src_for_ssa(value)); 1008b8e80941Smrg } 1009b8e80941Smrg 1010b8e80941Smrg nir_instr_remove(&intrin->instr); 1011b8e80941Smrg} 1012b8e80941Smrg 1013b8e80941Smrgstatic void 1014b8e80941Smrglower_explicit_io_deref(nir_builder *b, nir_deref_instr *deref, 1015b8e80941Smrg nir_address_format addr_format) 1016b8e80941Smrg{ 1017b8e80941Smrg /* Just delete the deref if it's not used. We can't use 1018b8e80941Smrg * nir_deref_instr_remove_if_unused here because it may remove more than 1019b8e80941Smrg * one deref which could break our list walking since we walk the list 1020b8e80941Smrg * backwards. 1021b8e80941Smrg */ 1022b8e80941Smrg assert(list_empty(&deref->dest.ssa.if_uses)); 1023b8e80941Smrg if (list_empty(&deref->dest.ssa.uses)) { 1024b8e80941Smrg nir_instr_remove(&deref->instr); 1025b8e80941Smrg return; 1026b8e80941Smrg } 1027b8e80941Smrg 1028b8e80941Smrg b->cursor = nir_after_instr(&deref->instr); 1029b8e80941Smrg 1030b8e80941Smrg nir_ssa_def *base_addr = NULL; 1031b8e80941Smrg if (deref->deref_type != nir_deref_type_var) { 1032b8e80941Smrg assert(deref->parent.is_ssa); 1033b8e80941Smrg base_addr = deref->parent.ssa; 1034b8e80941Smrg } 1035b8e80941Smrg 1036b8e80941Smrg nir_ssa_def *addr = nir_explicit_io_address_from_deref(b, deref, base_addr, 1037b8e80941Smrg addr_format); 1038b8e80941Smrg 1039b8e80941Smrg nir_instr_remove(&deref->instr); 1040b8e80941Smrg nir_ssa_def_rewrite_uses(&deref->dest.ssa, nir_src_for_ssa(addr)); 1041b8e80941Smrg} 1042b8e80941Smrg 1043b8e80941Smrgstatic void 1044b8e80941Smrglower_explicit_io_access(nir_builder *b, nir_intrinsic_instr *intrin, 1045b8e80941Smrg nir_address_format addr_format) 1046b8e80941Smrg{ 1047b8e80941Smrg assert(intrin->src[0].is_ssa); 1048b8e80941Smrg nir_lower_explicit_io_instr(b, intrin, intrin->src[0].ssa, addr_format); 1049b8e80941Smrg} 1050b8e80941Smrg 1051b8e80941Smrgstatic void 1052b8e80941Smrglower_explicit_io_array_length(nir_builder *b, nir_intrinsic_instr *intrin, 1053b8e80941Smrg nir_address_format addr_format) 1054b8e80941Smrg{ 1055b8e80941Smrg b->cursor = nir_after_instr(&intrin->instr); 1056b8e80941Smrg 1057b8e80941Smrg nir_deref_instr *deref = nir_src_as_deref(intrin->src[0]); 1058b8e80941Smrg 1059b8e80941Smrg assert(glsl_type_is_array(deref->type)); 1060b8e80941Smrg assert(glsl_get_length(deref->type) == 0); 1061b8e80941Smrg unsigned stride = glsl_get_explicit_stride(deref->type); 1062b8e80941Smrg assert(stride > 0); 1063b8e80941Smrg 1064b8e80941Smrg assert(addr_format == nir_address_format_32bit_index_offset); 1065b8e80941Smrg nir_ssa_def *addr = &deref->dest.ssa; 1066b8e80941Smrg nir_ssa_def *index = addr_to_index(b, addr, addr_format); 1067b8e80941Smrg nir_ssa_def *offset = addr_to_offset(b, addr, addr_format); 1068b8e80941Smrg 1069b8e80941Smrg nir_intrinsic_instr *bsize = 1070b8e80941Smrg nir_intrinsic_instr_create(b->shader, nir_intrinsic_get_buffer_size); 1071b8e80941Smrg bsize->src[0] = nir_src_for_ssa(index); 1072b8e80941Smrg nir_ssa_dest_init(&bsize->instr, &bsize->dest, 1, 32, NULL); 1073b8e80941Smrg nir_builder_instr_insert(b, &bsize->instr); 1074b8e80941Smrg 1075b8e80941Smrg nir_ssa_def *arr_size = 1076b8e80941Smrg nir_idiv(b, nir_isub(b, &bsize->dest.ssa, offset), 1077b8e80941Smrg nir_imm_int(b, stride)); 1078b8e80941Smrg 1079b8e80941Smrg nir_ssa_def_rewrite_uses(&intrin->dest.ssa, nir_src_for_ssa(arr_size)); 1080b8e80941Smrg nir_instr_remove(&intrin->instr); 1081b8e80941Smrg} 1082b8e80941Smrg 1083b8e80941Smrgstatic bool 1084b8e80941Smrgnir_lower_explicit_io_impl(nir_function_impl *impl, nir_variable_mode modes, 1085b8e80941Smrg nir_address_format addr_format) 1086b8e80941Smrg{ 1087b8e80941Smrg bool progress = false; 1088b8e80941Smrg 1089b8e80941Smrg nir_builder b; 1090b8e80941Smrg nir_builder_init(&b, impl); 1091b8e80941Smrg 1092b8e80941Smrg /* Walk in reverse order so that we can see the full deref chain when we 1093b8e80941Smrg * lower the access operations. We lower them assuming that the derefs 1094b8e80941Smrg * will be turned into address calculations later. 1095b8e80941Smrg */ 1096b8e80941Smrg nir_foreach_block_reverse(block, impl) { 1097b8e80941Smrg nir_foreach_instr_reverse_safe(instr, block) { 1098b8e80941Smrg switch (instr->type) { 1099b8e80941Smrg case nir_instr_type_deref: { 1100b8e80941Smrg nir_deref_instr *deref = nir_instr_as_deref(instr); 1101b8e80941Smrg if (deref->mode & modes) { 1102b8e80941Smrg lower_explicit_io_deref(&b, deref, addr_format); 1103b8e80941Smrg progress = true; 1104b8e80941Smrg } 1105b8e80941Smrg break; 1106b8e80941Smrg } 1107b8e80941Smrg 1108b8e80941Smrg case nir_instr_type_intrinsic: { 1109b8e80941Smrg nir_intrinsic_instr *intrin = nir_instr_as_intrinsic(instr); 1110b8e80941Smrg switch (intrin->intrinsic) { 1111b8e80941Smrg case nir_intrinsic_load_deref: 1112b8e80941Smrg case nir_intrinsic_store_deref: 1113b8e80941Smrg case nir_intrinsic_deref_atomic_add: 1114b8e80941Smrg case nir_intrinsic_deref_atomic_imin: 1115b8e80941Smrg case nir_intrinsic_deref_atomic_umin: 1116b8e80941Smrg case nir_intrinsic_deref_atomic_imax: 1117b8e80941Smrg case nir_intrinsic_deref_atomic_umax: 1118b8e80941Smrg case nir_intrinsic_deref_atomic_and: 1119b8e80941Smrg case nir_intrinsic_deref_atomic_or: 1120b8e80941Smrg case nir_intrinsic_deref_atomic_xor: 1121b8e80941Smrg case nir_intrinsic_deref_atomic_exchange: 1122b8e80941Smrg case nir_intrinsic_deref_atomic_comp_swap: 1123b8e80941Smrg case nir_intrinsic_deref_atomic_fadd: 1124b8e80941Smrg case nir_intrinsic_deref_atomic_fmin: 1125b8e80941Smrg case nir_intrinsic_deref_atomic_fmax: 1126b8e80941Smrg case nir_intrinsic_deref_atomic_fcomp_swap: { 1127b8e80941Smrg nir_deref_instr *deref = nir_src_as_deref(intrin->src[0]); 1128b8e80941Smrg if (deref->mode & modes) { 1129b8e80941Smrg lower_explicit_io_access(&b, intrin, addr_format); 1130b8e80941Smrg progress = true; 1131b8e80941Smrg } 1132b8e80941Smrg break; 1133b8e80941Smrg } 1134b8e80941Smrg 1135b8e80941Smrg case nir_intrinsic_deref_buffer_array_length: { 1136b8e80941Smrg nir_deref_instr *deref = nir_src_as_deref(intrin->src[0]); 1137b8e80941Smrg if (deref->mode & modes) { 1138b8e80941Smrg lower_explicit_io_array_length(&b, intrin, addr_format); 1139b8e80941Smrg progress = true; 1140b8e80941Smrg } 1141b8e80941Smrg break; 1142b8e80941Smrg } 1143b8e80941Smrg 1144b8e80941Smrg default: 1145b8e80941Smrg break; 1146b8e80941Smrg } 1147b8e80941Smrg break; 1148b8e80941Smrg } 1149b8e80941Smrg 1150b8e80941Smrg default: 1151b8e80941Smrg /* Nothing to do */ 1152b8e80941Smrg break; 1153b8e80941Smrg } 1154b8e80941Smrg } 1155b8e80941Smrg } 1156b8e80941Smrg 1157b8e80941Smrg if (progress) { 1158b8e80941Smrg nir_metadata_preserve(impl, nir_metadata_block_index | 1159b8e80941Smrg nir_metadata_dominance); 1160b8e80941Smrg } 1161b8e80941Smrg 1162b8e80941Smrg return progress; 1163b8e80941Smrg} 1164b8e80941Smrg 1165b8e80941Smrgbool 1166b8e80941Smrgnir_lower_explicit_io(nir_shader *shader, nir_variable_mode modes, 1167b8e80941Smrg nir_address_format addr_format) 1168b8e80941Smrg{ 1169b8e80941Smrg bool progress = false; 1170b8e80941Smrg 1171b8e80941Smrg nir_foreach_function(function, shader) { 1172b8e80941Smrg if (function->impl && 1173b8e80941Smrg nir_lower_explicit_io_impl(function->impl, modes, addr_format)) 1174b8e80941Smrg progress = true; 1175b8e80941Smrg } 1176b8e80941Smrg 1177b8e80941Smrg return progress; 1178b8e80941Smrg} 1179b8e80941Smrg 1180b8e80941Smrg/** 1181b8e80941Smrg * Return the offset source for a load/store intrinsic. 1182b8e80941Smrg */ 1183b8e80941Smrgnir_src * 1184b8e80941Smrgnir_get_io_offset_src(nir_intrinsic_instr *instr) 1185b8e80941Smrg{ 1186b8e80941Smrg switch (instr->intrinsic) { 1187b8e80941Smrg case nir_intrinsic_load_input: 1188b8e80941Smrg case nir_intrinsic_load_output: 1189b8e80941Smrg case nir_intrinsic_load_shared: 1190b8e80941Smrg case nir_intrinsic_load_uniform: 1191b8e80941Smrg case nir_intrinsic_load_global: 1192b8e80941Smrg case nir_intrinsic_load_scratch: 1193b8e80941Smrg return &instr->src[0]; 1194b8e80941Smrg case nir_intrinsic_load_ubo: 1195b8e80941Smrg case nir_intrinsic_load_ssbo: 1196b8e80941Smrg case nir_intrinsic_load_per_vertex_input: 1197b8e80941Smrg case nir_intrinsic_load_per_vertex_output: 1198b8e80941Smrg case nir_intrinsic_load_interpolated_input: 1199b8e80941Smrg case nir_intrinsic_store_output: 1200b8e80941Smrg case nir_intrinsic_store_shared: 1201b8e80941Smrg case nir_intrinsic_store_global: 1202b8e80941Smrg case nir_intrinsic_store_scratch: 1203b8e80941Smrg return &instr->src[1]; 1204b8e80941Smrg case nir_intrinsic_store_ssbo: 1205b8e80941Smrg case nir_intrinsic_store_per_vertex_output: 1206b8e80941Smrg return &instr->src[2]; 1207b8e80941Smrg default: 1208b8e80941Smrg return NULL; 1209b8e80941Smrg } 1210b8e80941Smrg} 1211b8e80941Smrg 1212b8e80941Smrg/** 1213b8e80941Smrg * Return the vertex index source for a load/store per_vertex intrinsic. 1214b8e80941Smrg */ 1215b8e80941Smrgnir_src * 1216b8e80941Smrgnir_get_io_vertex_index_src(nir_intrinsic_instr *instr) 1217b8e80941Smrg{ 1218b8e80941Smrg switch (instr->intrinsic) { 1219b8e80941Smrg case nir_intrinsic_load_per_vertex_input: 1220b8e80941Smrg case nir_intrinsic_load_per_vertex_output: 1221b8e80941Smrg return &instr->src[0]; 1222b8e80941Smrg case nir_intrinsic_store_per_vertex_output: 1223b8e80941Smrg return &instr->src[1]; 1224b8e80941Smrg default: 1225b8e80941Smrg return NULL; 1226b8e80941Smrg } 1227b8e80941Smrg} 1228