17ec681f3Smrg/* 27ec681f3Smrg * Copyright © 2021 Valve Corporation 37ec681f3Smrg * 47ec681f3Smrg * Permission is hereby granted, free of charge, to any person obtaining a 57ec681f3Smrg * copy of this software and associated documentation files (the "Software"), 67ec681f3Smrg * to deal in the Software without restriction, including without limitation 77ec681f3Smrg * the rights to use, copy, modify, merge, publish, distribute, sublicense, 87ec681f3Smrg * and/or sell copies of the Software, and to permit persons to whom the 97ec681f3Smrg * Software is furnished to do so, subject to the following conditions: 107ec681f3Smrg * 117ec681f3Smrg * The above copyright notice and this permission notice (including the next 127ec681f3Smrg * paragraph) shall be included in all copies or substantial portions of the 137ec681f3Smrg * Software. 147ec681f3Smrg * 157ec681f3Smrg * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR 167ec681f3Smrg * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, 177ec681f3Smrg * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL 187ec681f3Smrg * THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER 197ec681f3Smrg * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING 207ec681f3Smrg * FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS 217ec681f3Smrg * IN THE SOFTWARE. 227ec681f3Smrg * 237ec681f3Smrg * Authors: 247ec681f3Smrg * Timur Kristóf 257ec681f3Smrg * 267ec681f3Smrg */ 277ec681f3Smrg 287ec681f3Smrg#include "nir.h" 297ec681f3Smrg#include "nir_builder.h" 307ec681f3Smrg 317ec681f3Smrgtypedef struct 327ec681f3Smrg{ 337ec681f3Smrg struct hash_table *range_ht; 347ec681f3Smrg} opt_offsets_state; 357ec681f3Smrg 367ec681f3Smrgstatic nir_ssa_def * 377ec681f3Smrgtry_extract_const_addition(nir_builder *b, nir_instr *instr, opt_offsets_state *state, unsigned *out_const) 387ec681f3Smrg{ 397ec681f3Smrg if (instr->type != nir_instr_type_alu) 407ec681f3Smrg return NULL; 417ec681f3Smrg 427ec681f3Smrg nir_alu_instr *alu = nir_instr_as_alu(instr); 437ec681f3Smrg if (alu->op != nir_op_iadd || 447ec681f3Smrg !nir_alu_src_is_trivial_ssa(alu, 0) || 457ec681f3Smrg !nir_alu_src_is_trivial_ssa(alu, 1)) 467ec681f3Smrg return NULL; 477ec681f3Smrg 487ec681f3Smrg if (!alu->no_unsigned_wrap) { 497ec681f3Smrg if (!state->range_ht) { 507ec681f3Smrg /* Cache for nir_unsigned_upper_bound */ 517ec681f3Smrg state->range_ht = _mesa_pointer_hash_table_create(NULL); 527ec681f3Smrg } 537ec681f3Smrg 547ec681f3Smrg /* Check if there can really be an unsigned wrap. */ 557ec681f3Smrg nir_ssa_scalar src0 = {alu->src[0].src.ssa, 0}; 567ec681f3Smrg nir_ssa_scalar src1 = {alu->src[1].src.ssa, 0}; 577ec681f3Smrg uint32_t ub0 = nir_unsigned_upper_bound(b->shader, state->range_ht, src0, NULL); 587ec681f3Smrg uint32_t ub1 = nir_unsigned_upper_bound(b->shader, state->range_ht, src1, NULL); 597ec681f3Smrg 607ec681f3Smrg if ((UINT32_MAX - ub0) < ub1) 617ec681f3Smrg return NULL; 627ec681f3Smrg 637ec681f3Smrg /* We proved that unsigned wrap won't be possible, so we can set the flag too. */ 647ec681f3Smrg alu->no_unsigned_wrap = true; 657ec681f3Smrg } 667ec681f3Smrg 677ec681f3Smrg for (unsigned i = 0; i < 2; ++i) { 687ec681f3Smrg if (nir_src_is_const(alu->src[i].src)) { 697ec681f3Smrg *out_const += nir_src_as_uint(alu->src[i].src); 707ec681f3Smrg return alu->src[1 - i].src.ssa; 717ec681f3Smrg } 727ec681f3Smrg 737ec681f3Smrg nir_ssa_def *replace_src = try_extract_const_addition(b, alu->src[0].src.ssa->parent_instr, state, out_const); 747ec681f3Smrg if (replace_src) { 757ec681f3Smrg b->cursor = nir_before_instr(&alu->instr); 767ec681f3Smrg return nir_iadd(b, replace_src, alu->src[1 - i].src.ssa); 777ec681f3Smrg } 787ec681f3Smrg } 797ec681f3Smrg 807ec681f3Smrg return NULL; 817ec681f3Smrg} 827ec681f3Smrg 837ec681f3Smrgstatic bool 847ec681f3Smrgtry_fold_load_store(nir_builder *b, 857ec681f3Smrg nir_intrinsic_instr *intrin, 867ec681f3Smrg opt_offsets_state *state, 877ec681f3Smrg unsigned offset_src_idx) 887ec681f3Smrg{ 897ec681f3Smrg /* Assume that BASE is the constant offset of a load/store. 907ec681f3Smrg * Try to constant-fold additions to the offset source 917ec681f3Smrg * into the actual const offset of the instruction. 927ec681f3Smrg */ 937ec681f3Smrg 947ec681f3Smrg unsigned off_const = nir_intrinsic_base(intrin); 957ec681f3Smrg nir_src *off_src = &intrin->src[offset_src_idx]; 967ec681f3Smrg nir_ssa_def *replace_src = NULL; 977ec681f3Smrg 987ec681f3Smrg if (!off_src->is_ssa || off_src->ssa->bit_size != 32) 997ec681f3Smrg return false; 1007ec681f3Smrg 1017ec681f3Smrg if (!nir_src_is_const(*off_src)) { 1027ec681f3Smrg nir_ssa_def *r = off_src->ssa; 1037ec681f3Smrg while ((r = try_extract_const_addition(b, r->parent_instr, state, &off_const))) 1047ec681f3Smrg replace_src = r; 1057ec681f3Smrg } else if (nir_src_as_uint(*off_src)) { 1067ec681f3Smrg off_const += nir_src_as_uint(*off_src); 1077ec681f3Smrg b->cursor = nir_before_instr(&intrin->instr); 1087ec681f3Smrg replace_src = nir_imm_zero(b, off_src->ssa->num_components, off_src->ssa->bit_size); 1097ec681f3Smrg } 1107ec681f3Smrg 1117ec681f3Smrg if (!replace_src) 1127ec681f3Smrg return false; 1137ec681f3Smrg 1147ec681f3Smrg nir_instr_rewrite_src(&intrin->instr, &intrin->src[offset_src_idx], nir_src_for_ssa(replace_src)); 1157ec681f3Smrg nir_intrinsic_set_base(intrin, off_const); 1167ec681f3Smrg return true; 1177ec681f3Smrg} 1187ec681f3Smrg 1197ec681f3Smrgstatic bool 1207ec681f3Smrgprocess_instr(nir_builder *b, nir_instr *instr, void *s) 1217ec681f3Smrg{ 1227ec681f3Smrg if (instr->type != nir_instr_type_intrinsic) 1237ec681f3Smrg return false; 1247ec681f3Smrg 1257ec681f3Smrg opt_offsets_state *state = (opt_offsets_state *) s; 1267ec681f3Smrg nir_intrinsic_instr *intrin = nir_instr_as_intrinsic(instr); 1277ec681f3Smrg 1287ec681f3Smrg switch (intrin->intrinsic) { 1297ec681f3Smrg case nir_intrinsic_load_shared: 1307ec681f3Smrg return try_fold_load_store(b, intrin, state, 0); 1317ec681f3Smrg case nir_intrinsic_store_shared: 1327ec681f3Smrg return try_fold_load_store(b, intrin, state, 1); 1337ec681f3Smrg case nir_intrinsic_load_buffer_amd: 1347ec681f3Smrg return try_fold_load_store(b, intrin, state, 1); 1357ec681f3Smrg case nir_intrinsic_store_buffer_amd: 1367ec681f3Smrg return try_fold_load_store(b, intrin, state, 2); 1377ec681f3Smrg default: 1387ec681f3Smrg return false; 1397ec681f3Smrg } 1407ec681f3Smrg 1417ec681f3Smrg unreachable("Can't reach here."); 1427ec681f3Smrg} 1437ec681f3Smrg 1447ec681f3Smrgbool 1457ec681f3Smrgnir_opt_offsets(nir_shader *shader) 1467ec681f3Smrg{ 1477ec681f3Smrg opt_offsets_state state; 1487ec681f3Smrg state.range_ht = NULL; 1497ec681f3Smrg 1507ec681f3Smrg bool p = nir_shader_instructions_pass(shader, process_instr, 1517ec681f3Smrg nir_metadata_block_index | 1527ec681f3Smrg nir_metadata_dominance, 1537ec681f3Smrg &state); 1547ec681f3Smrg 1557ec681f3Smrg if (state.range_ht) 1567ec681f3Smrg _mesa_hash_table_destroy(state.range_ht, NULL); 1577ec681f3Smrg 1587ec681f3Smrg 1597ec681f3Smrg return p; 1607ec681f3Smrg} 161