17ec681f3Smrg/* 27ec681f3Smrg * Copyright (C) 2019 Collabora, Ltd. 37ec681f3Smrg * 47ec681f3Smrg * Permission is hereby granted, free of charge, to any person obtaining a 57ec681f3Smrg * copy of this software and associated documentation files (the "Software"), 67ec681f3Smrg * to deal in the Software without restriction, including without limitation 77ec681f3Smrg * the rights to use, copy, modify, merge, publish, distribute, sublicense, 87ec681f3Smrg * and/or sell copies of the Software, and to permit persons to whom the 97ec681f3Smrg * Software is furnished to do so, subject to the following conditions: 107ec681f3Smrg * 117ec681f3Smrg * The above copyright notice and this permission notice (including the next 127ec681f3Smrg * paragraph) shall be included in all copies or substantial portions of the 137ec681f3Smrg * Software. 147ec681f3Smrg * 157ec681f3Smrg * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR 167ec681f3Smrg * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, 177ec681f3Smrg * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL 187ec681f3Smrg * THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER 197ec681f3Smrg * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, 207ec681f3Smrg * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE 217ec681f3Smrg * SOFTWARE. 227ec681f3Smrg * 237ec681f3Smrg * Authors (Collabora): 247ec681f3Smrg * Alyssa Rosenzweig <alyssa.rosenzweig@collabora.com> 257ec681f3Smrg */ 267ec681f3Smrg 277ec681f3Smrg#include "compiler.h" 287ec681f3Smrg 297ec681f3Smrg/* Midgard's generic load/store instructions, particularly to implement SSBOs 307ec681f3Smrg * and globals, have support for address arithmetic natively. In particularly, 317ec681f3Smrg * they take two indirect arguments A, B and two immediates #s, #c, calculating 327ec681f3Smrg * the address: 337ec681f3Smrg * 347ec681f3Smrg * A + (zext?(B) << #s) + #c 357ec681f3Smrg * 367ec681f3Smrg * This allows for fast indexing into arrays. This file tries to pattern match the offset in NIR with this form to reduce pressure on the ALU pipe. 377ec681f3Smrg */ 387ec681f3Smrg 397ec681f3Smrgstruct mir_address { 407ec681f3Smrg nir_ssa_scalar A; 417ec681f3Smrg nir_ssa_scalar B; 427ec681f3Smrg 437ec681f3Smrg midgard_index_address_format type; 447ec681f3Smrg unsigned shift; 457ec681f3Smrg unsigned bias; 467ec681f3Smrg}; 477ec681f3Smrg 487ec681f3Smrgstatic bool 497ec681f3Smrgmir_args_ssa(nir_ssa_scalar s, unsigned count) 507ec681f3Smrg{ 517ec681f3Smrg nir_alu_instr *alu = nir_instr_as_alu(s.def->parent_instr); 527ec681f3Smrg 537ec681f3Smrg if (count > nir_op_infos[alu->op].num_inputs) 547ec681f3Smrg return false; 557ec681f3Smrg 567ec681f3Smrg for (unsigned i = 0; i < count; ++i) { 577ec681f3Smrg if (!alu->src[i].src.is_ssa) 587ec681f3Smrg return false; 597ec681f3Smrg } 607ec681f3Smrg 617ec681f3Smrg return true; 627ec681f3Smrg} 637ec681f3Smrg 647ec681f3Smrg/* Matches a constant in either slot and moves it to the bias */ 657ec681f3Smrg 667ec681f3Smrgstatic void 677ec681f3Smrgmir_match_constant(struct mir_address *address) 687ec681f3Smrg{ 697ec681f3Smrg if (address->A.def && nir_ssa_scalar_is_const(address->A)) { 707ec681f3Smrg address->bias += nir_ssa_scalar_as_uint(address->A); 717ec681f3Smrg address->A.def = NULL; 727ec681f3Smrg } 737ec681f3Smrg 747ec681f3Smrg if (address->B.def && nir_ssa_scalar_is_const(address->B)) { 757ec681f3Smrg address->bias += nir_ssa_scalar_as_uint(address->B); 767ec681f3Smrg address->B.def = NULL; 777ec681f3Smrg } 787ec681f3Smrg} 797ec681f3Smrg 807ec681f3Smrg/* Matches an iadd when there is a free slot or constant */ 817ec681f3Smrg 827ec681f3Smrg/* The offset field is a 18-bit signed integer */ 837ec681f3Smrg#define MAX_POSITIVE_OFFSET ((1 << 17) - 1) 847ec681f3Smrg 857ec681f3Smrgstatic void 867ec681f3Smrgmir_match_iadd(struct mir_address *address, bool first_free) 877ec681f3Smrg{ 887ec681f3Smrg if (!address->B.def || !nir_ssa_scalar_is_alu(address->B)) 897ec681f3Smrg return; 907ec681f3Smrg 917ec681f3Smrg if (!mir_args_ssa(address->B, 2)) 927ec681f3Smrg return; 937ec681f3Smrg 947ec681f3Smrg nir_op op = nir_ssa_scalar_alu_op(address->B); 957ec681f3Smrg 967ec681f3Smrg if (op != nir_op_iadd) return; 977ec681f3Smrg 987ec681f3Smrg nir_ssa_scalar op1 = nir_ssa_scalar_chase_alu_src(address->B, 0); 997ec681f3Smrg nir_ssa_scalar op2 = nir_ssa_scalar_chase_alu_src(address->B, 1); 1007ec681f3Smrg 1017ec681f3Smrg if (nir_ssa_scalar_is_const(op1) && 1027ec681f3Smrg nir_ssa_scalar_as_uint(op1) <= MAX_POSITIVE_OFFSET) { 1037ec681f3Smrg address->bias += nir_ssa_scalar_as_uint(op1); 1047ec681f3Smrg address->B = op2; 1057ec681f3Smrg } else if (nir_ssa_scalar_is_const(op2) && 1067ec681f3Smrg nir_ssa_scalar_as_uint(op2) <= MAX_POSITIVE_OFFSET) { 1077ec681f3Smrg address->bias += nir_ssa_scalar_as_uint(op2); 1087ec681f3Smrg address->B = op1; 1097ec681f3Smrg } else if (!nir_ssa_scalar_is_const(op1) && 1107ec681f3Smrg !nir_ssa_scalar_is_const(op2) && 1117ec681f3Smrg first_free && !address->A.def) { 1127ec681f3Smrg address->A = op1; 1137ec681f3Smrg address->B = op2; 1147ec681f3Smrg } 1157ec681f3Smrg} 1167ec681f3Smrg 1177ec681f3Smrg/* Matches u2u64 and sets type */ 1187ec681f3Smrg 1197ec681f3Smrgstatic void 1207ec681f3Smrgmir_match_u2u64(struct mir_address *address) 1217ec681f3Smrg{ 1227ec681f3Smrg if (!address->B.def || !nir_ssa_scalar_is_alu(address->B)) 1237ec681f3Smrg return; 1247ec681f3Smrg 1257ec681f3Smrg if (!mir_args_ssa(address->B, 1)) 1267ec681f3Smrg return; 1277ec681f3Smrg 1287ec681f3Smrg nir_op op = nir_ssa_scalar_alu_op(address->B); 1297ec681f3Smrg if (op != nir_op_u2u64) return; 1307ec681f3Smrg nir_ssa_scalar arg = nir_ssa_scalar_chase_alu_src(address->B, 0); 1317ec681f3Smrg 1327ec681f3Smrg address->B = arg; 1337ec681f3Smrg address->type = midgard_index_address_u32; 1347ec681f3Smrg} 1357ec681f3Smrg 1367ec681f3Smrg/* Matches i2i64 and sets type */ 1377ec681f3Smrg 1387ec681f3Smrgstatic void 1397ec681f3Smrgmir_match_i2i64(struct mir_address *address) 1407ec681f3Smrg{ 1417ec681f3Smrg if (!address->B.def || !nir_ssa_scalar_is_alu(address->B)) 1427ec681f3Smrg return; 1437ec681f3Smrg 1447ec681f3Smrg if (!mir_args_ssa(address->B, 1)) 1457ec681f3Smrg return; 1467ec681f3Smrg 1477ec681f3Smrg nir_op op = nir_ssa_scalar_alu_op(address->B); 1487ec681f3Smrg if (op != nir_op_i2i64) return; 1497ec681f3Smrg nir_ssa_scalar arg = nir_ssa_scalar_chase_alu_src(address->B, 0); 1507ec681f3Smrg 1517ec681f3Smrg address->B = arg; 1527ec681f3Smrg address->type = midgard_index_address_s32; 1537ec681f3Smrg} 1547ec681f3Smrg 1557ec681f3Smrg/* Matches ishl to shift */ 1567ec681f3Smrg 1577ec681f3Smrgstatic void 1587ec681f3Smrgmir_match_ishl(struct mir_address *address) 1597ec681f3Smrg{ 1607ec681f3Smrg if (!address->B.def || !nir_ssa_scalar_is_alu(address->B)) 1617ec681f3Smrg return; 1627ec681f3Smrg 1637ec681f3Smrg if (!mir_args_ssa(address->B, 2)) 1647ec681f3Smrg return; 1657ec681f3Smrg 1667ec681f3Smrg nir_op op = nir_ssa_scalar_alu_op(address->B); 1677ec681f3Smrg if (op != nir_op_ishl) return; 1687ec681f3Smrg nir_ssa_scalar op1 = nir_ssa_scalar_chase_alu_src(address->B, 0); 1697ec681f3Smrg nir_ssa_scalar op2 = nir_ssa_scalar_chase_alu_src(address->B, 1); 1707ec681f3Smrg 1717ec681f3Smrg if (!nir_ssa_scalar_is_const(op2)) return; 1727ec681f3Smrg 1737ec681f3Smrg unsigned shift = nir_ssa_scalar_as_uint(op2); 1747ec681f3Smrg if (shift > 0x7) return; 1757ec681f3Smrg 1767ec681f3Smrg address->B = op1; 1777ec681f3Smrg address->shift = shift; 1787ec681f3Smrg} 1797ec681f3Smrg 1807ec681f3Smrg/* Strings through mov which can happen from NIR vectorization */ 1817ec681f3Smrg 1827ec681f3Smrgstatic void 1837ec681f3Smrgmir_match_mov(struct mir_address *address) 1847ec681f3Smrg{ 1857ec681f3Smrg if (address->A.def && nir_ssa_scalar_is_alu(address->A)) { 1867ec681f3Smrg nir_op op = nir_ssa_scalar_alu_op(address->A); 1877ec681f3Smrg 1887ec681f3Smrg if (op == nir_op_mov && mir_args_ssa(address->A, 1)) 1897ec681f3Smrg address->A = nir_ssa_scalar_chase_alu_src(address->A, 0); 1907ec681f3Smrg } 1917ec681f3Smrg 1927ec681f3Smrg if (address->B.def && nir_ssa_scalar_is_alu(address->B)) { 1937ec681f3Smrg nir_op op = nir_ssa_scalar_alu_op(address->B); 1947ec681f3Smrg 1957ec681f3Smrg if (op == nir_op_mov && mir_args_ssa(address->B, 1)) 1967ec681f3Smrg address->B = nir_ssa_scalar_chase_alu_src(address->B, 0); 1977ec681f3Smrg } 1987ec681f3Smrg} 1997ec681f3Smrg 2007ec681f3Smrg/* Tries to pattern match into mir_address */ 2017ec681f3Smrg 2027ec681f3Smrgstatic struct mir_address 2037ec681f3Smrgmir_match_offset(nir_ssa_def *offset, bool first_free, bool extend) 2047ec681f3Smrg{ 2057ec681f3Smrg struct mir_address address = { 2067ec681f3Smrg .B = { .def = offset }, 2077ec681f3Smrg .type = extend ? midgard_index_address_u64 : midgard_index_address_u32, 2087ec681f3Smrg }; 2097ec681f3Smrg 2107ec681f3Smrg mir_match_mov(&address); 2117ec681f3Smrg mir_match_constant(&address); 2127ec681f3Smrg mir_match_mov(&address); 2137ec681f3Smrg mir_match_iadd(&address, first_free); 2147ec681f3Smrg mir_match_mov(&address); 2157ec681f3Smrg 2167ec681f3Smrg if (extend) { 2177ec681f3Smrg mir_match_u2u64(&address); 2187ec681f3Smrg mir_match_i2i64(&address); 2197ec681f3Smrg mir_match_mov(&address); 2207ec681f3Smrg } 2217ec681f3Smrg 2227ec681f3Smrg mir_match_ishl(&address); 2237ec681f3Smrg 2247ec681f3Smrg return address; 2257ec681f3Smrg} 2267ec681f3Smrg 2277ec681f3Smrgvoid 2287ec681f3Smrgmir_set_offset(compiler_context *ctx, midgard_instruction *ins, nir_src *offset, unsigned seg) 2297ec681f3Smrg{ 2307ec681f3Smrg for(unsigned i = 0; i < 16; ++i) { 2317ec681f3Smrg ins->swizzle[1][i] = 0; 2327ec681f3Smrg ins->swizzle[2][i] = 0; 2337ec681f3Smrg } 2347ec681f3Smrg 2357ec681f3Smrg /* Sign extend instead of zero extend in case the address is something 2367ec681f3Smrg * like `base + offset + 20`, where offset could be negative. */ 2377ec681f3Smrg bool force_sext = (nir_src_bit_size(*offset) < 64); 2387ec681f3Smrg 2397ec681f3Smrg if (!offset->is_ssa) { 2407ec681f3Smrg ins->load_store.bitsize_toggle = true; 2417ec681f3Smrg ins->load_store.arg_comp = seg & 0x3; 2427ec681f3Smrg ins->load_store.arg_reg = (seg >> 2) & 0x7; 2437ec681f3Smrg ins->src[2] = nir_src_index(ctx, offset); 2447ec681f3Smrg ins->src_types[2] = nir_type_uint | nir_src_bit_size(*offset); 2457ec681f3Smrg 2467ec681f3Smrg if (force_sext) 2477ec681f3Smrg ins->load_store.index_format = midgard_index_address_s32; 2487ec681f3Smrg else 2497ec681f3Smrg ins->load_store.index_format = midgard_index_address_u64; 2507ec681f3Smrg 2517ec681f3Smrg return; 2527ec681f3Smrg } 2537ec681f3Smrg 2547ec681f3Smrg bool first_free = (seg == LDST_GLOBAL); 2557ec681f3Smrg 2567ec681f3Smrg struct mir_address match = mir_match_offset(offset->ssa, first_free, true); 2577ec681f3Smrg 2587ec681f3Smrg if (match.A.def) { 2597ec681f3Smrg ins->src[1] = nir_ssa_index(match.A.def); 2607ec681f3Smrg ins->swizzle[1][0] = match.A.comp; 2617ec681f3Smrg ins->src_types[1] = nir_type_uint | match.A.def->bit_size; 2627ec681f3Smrg } else { 2637ec681f3Smrg ins->load_store.bitsize_toggle = true; 2647ec681f3Smrg ins->load_store.arg_comp = seg & 0x3; 2657ec681f3Smrg ins->load_store.arg_reg = (seg >> 2) & 0x7; 2667ec681f3Smrg } 2677ec681f3Smrg 2687ec681f3Smrg if (match.B.def) { 2697ec681f3Smrg ins->src[2] = nir_ssa_index(match.B.def); 2707ec681f3Smrg ins->swizzle[2][0] = match.B.comp; 2717ec681f3Smrg ins->src_types[2] = nir_type_uint | match.B.def->bit_size; 2727ec681f3Smrg } else 2737ec681f3Smrg ins->load_store.index_reg = REGISTER_LDST_ZERO; 2747ec681f3Smrg 2757ec681f3Smrg if (force_sext) 2767ec681f3Smrg match.type = midgard_index_address_s32; 2777ec681f3Smrg 2787ec681f3Smrg ins->load_store.index_format = match.type; 2797ec681f3Smrg 2807ec681f3Smrg assert(match.shift <= 7); 2817ec681f3Smrg ins->load_store.index_shift = match.shift; 2827ec681f3Smrg 2837ec681f3Smrg ins->constants.u32[0] = match.bias; 2847ec681f3Smrg} 2857ec681f3Smrg 2867ec681f3Smrg 2877ec681f3Smrgvoid 2887ec681f3Smrgmir_set_ubo_offset(midgard_instruction *ins, nir_src *src, unsigned bias) 2897ec681f3Smrg{ 2907ec681f3Smrg assert(src->is_ssa); 2917ec681f3Smrg struct mir_address match = mir_match_offset(src->ssa, false, false); 2927ec681f3Smrg 2937ec681f3Smrg if (match.B.def) { 2947ec681f3Smrg ins->src[2] = nir_ssa_index(match.B.def); 2957ec681f3Smrg 2967ec681f3Smrg for (unsigned i = 0; i < ARRAY_SIZE(ins->swizzle[2]); ++i) 2977ec681f3Smrg ins->swizzle[2][i] = match.B.comp; 2987ec681f3Smrg } 2997ec681f3Smrg 3007ec681f3Smrg ins->load_store.index_shift = match.shift; 3017ec681f3Smrg ins->constants.u32[0] = match.bias + bias; 3027ec681f3Smrg} 303