17ec681f3Smrg/*
27ec681f3Smrg * Copyright (C) 2019 Collabora, Ltd.
37ec681f3Smrg *
47ec681f3Smrg * Permission is hereby granted, free of charge, to any person obtaining a
57ec681f3Smrg * copy of this software and associated documentation files (the "Software"),
67ec681f3Smrg * to deal in the Software without restriction, including without limitation
77ec681f3Smrg * the rights to use, copy, modify, merge, publish, distribute, sublicense,
87ec681f3Smrg * and/or sell copies of the Software, and to permit persons to whom the
97ec681f3Smrg * Software is furnished to do so, subject to the following conditions:
107ec681f3Smrg *
117ec681f3Smrg * The above copyright notice and this permission notice (including the next
127ec681f3Smrg * paragraph) shall be included in all copies or substantial portions of the
137ec681f3Smrg * Software.
147ec681f3Smrg *
157ec681f3Smrg * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
167ec681f3Smrg * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
177ec681f3Smrg * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT.  IN NO EVENT SHALL
187ec681f3Smrg * THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
197ec681f3Smrg * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
207ec681f3Smrg * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
217ec681f3Smrg * SOFTWARE.
227ec681f3Smrg *
237ec681f3Smrg * Authors (Collabora):
247ec681f3Smrg *    Alyssa Rosenzweig <alyssa.rosenzweig@collabora.com>
257ec681f3Smrg */
267ec681f3Smrg
277ec681f3Smrg#include "compiler.h"
287ec681f3Smrg
297ec681f3Smrg/* Midgard's generic load/store instructions, particularly to implement SSBOs
307ec681f3Smrg * and globals, have support for address arithmetic natively. In particularly,
317ec681f3Smrg * they take two indirect arguments A, B and two immediates #s, #c, calculating
327ec681f3Smrg * the address:
337ec681f3Smrg *
347ec681f3Smrg *      A + (zext?(B) << #s) + #c
357ec681f3Smrg *
367ec681f3Smrg * This allows for fast indexing into arrays. This file tries to pattern match the offset in NIR with this form to reduce pressure on the ALU pipe.
377ec681f3Smrg */
387ec681f3Smrg
397ec681f3Smrgstruct mir_address {
407ec681f3Smrg        nir_ssa_scalar A;
417ec681f3Smrg        nir_ssa_scalar B;
427ec681f3Smrg
437ec681f3Smrg        midgard_index_address_format type;
447ec681f3Smrg        unsigned shift;
457ec681f3Smrg        unsigned bias;
467ec681f3Smrg};
477ec681f3Smrg
487ec681f3Smrgstatic bool
497ec681f3Smrgmir_args_ssa(nir_ssa_scalar s, unsigned count)
507ec681f3Smrg{
517ec681f3Smrg        nir_alu_instr *alu = nir_instr_as_alu(s.def->parent_instr);
527ec681f3Smrg
537ec681f3Smrg        if (count > nir_op_infos[alu->op].num_inputs)
547ec681f3Smrg                return false;
557ec681f3Smrg
567ec681f3Smrg        for (unsigned i = 0; i < count; ++i) {
577ec681f3Smrg                if (!alu->src[i].src.is_ssa)
587ec681f3Smrg                        return false;
597ec681f3Smrg        }
607ec681f3Smrg
617ec681f3Smrg        return true;
627ec681f3Smrg}
637ec681f3Smrg
647ec681f3Smrg/* Matches a constant in either slot and moves it to the bias */
657ec681f3Smrg
667ec681f3Smrgstatic void
677ec681f3Smrgmir_match_constant(struct mir_address *address)
687ec681f3Smrg{
697ec681f3Smrg        if (address->A.def && nir_ssa_scalar_is_const(address->A)) {
707ec681f3Smrg                address->bias += nir_ssa_scalar_as_uint(address->A);
717ec681f3Smrg                address->A.def = NULL;
727ec681f3Smrg        }
737ec681f3Smrg
747ec681f3Smrg        if (address->B.def && nir_ssa_scalar_is_const(address->B)) {
757ec681f3Smrg                address->bias += nir_ssa_scalar_as_uint(address->B);
767ec681f3Smrg                address->B.def = NULL;
777ec681f3Smrg        }
787ec681f3Smrg}
797ec681f3Smrg
807ec681f3Smrg/* Matches an iadd when there is a free slot or constant */
817ec681f3Smrg
827ec681f3Smrg/* The offset field is a 18-bit signed integer */
837ec681f3Smrg#define MAX_POSITIVE_OFFSET ((1 << 17) - 1)
847ec681f3Smrg
857ec681f3Smrgstatic void
867ec681f3Smrgmir_match_iadd(struct mir_address *address, bool first_free)
877ec681f3Smrg{
887ec681f3Smrg        if (!address->B.def || !nir_ssa_scalar_is_alu(address->B))
897ec681f3Smrg                return;
907ec681f3Smrg
917ec681f3Smrg        if (!mir_args_ssa(address->B, 2))
927ec681f3Smrg                return;
937ec681f3Smrg
947ec681f3Smrg        nir_op op = nir_ssa_scalar_alu_op(address->B);
957ec681f3Smrg
967ec681f3Smrg        if (op != nir_op_iadd) return;
977ec681f3Smrg
987ec681f3Smrg        nir_ssa_scalar op1 = nir_ssa_scalar_chase_alu_src(address->B, 0);
997ec681f3Smrg        nir_ssa_scalar op2 = nir_ssa_scalar_chase_alu_src(address->B, 1);
1007ec681f3Smrg
1017ec681f3Smrg        if (nir_ssa_scalar_is_const(op1) &&
1027ec681f3Smrg            nir_ssa_scalar_as_uint(op1) <= MAX_POSITIVE_OFFSET) {
1037ec681f3Smrg                address->bias += nir_ssa_scalar_as_uint(op1);
1047ec681f3Smrg                address->B = op2;
1057ec681f3Smrg        } else if (nir_ssa_scalar_is_const(op2) &&
1067ec681f3Smrg                   nir_ssa_scalar_as_uint(op2) <= MAX_POSITIVE_OFFSET) {
1077ec681f3Smrg                address->bias += nir_ssa_scalar_as_uint(op2);
1087ec681f3Smrg                address->B = op1;
1097ec681f3Smrg        } else if (!nir_ssa_scalar_is_const(op1) &&
1107ec681f3Smrg                   !nir_ssa_scalar_is_const(op2) &&
1117ec681f3Smrg                   first_free && !address->A.def) {
1127ec681f3Smrg                address->A = op1;
1137ec681f3Smrg                address->B = op2;
1147ec681f3Smrg        }
1157ec681f3Smrg}
1167ec681f3Smrg
1177ec681f3Smrg/* Matches u2u64 and sets type */
1187ec681f3Smrg
1197ec681f3Smrgstatic void
1207ec681f3Smrgmir_match_u2u64(struct mir_address *address)
1217ec681f3Smrg{
1227ec681f3Smrg        if (!address->B.def || !nir_ssa_scalar_is_alu(address->B))
1237ec681f3Smrg                return;
1247ec681f3Smrg
1257ec681f3Smrg        if (!mir_args_ssa(address->B, 1))
1267ec681f3Smrg                return;
1277ec681f3Smrg
1287ec681f3Smrg        nir_op op = nir_ssa_scalar_alu_op(address->B);
1297ec681f3Smrg        if (op != nir_op_u2u64) return;
1307ec681f3Smrg        nir_ssa_scalar arg = nir_ssa_scalar_chase_alu_src(address->B, 0);
1317ec681f3Smrg
1327ec681f3Smrg        address->B = arg;
1337ec681f3Smrg        address->type = midgard_index_address_u32;
1347ec681f3Smrg}
1357ec681f3Smrg
1367ec681f3Smrg/* Matches i2i64 and sets type */
1377ec681f3Smrg
1387ec681f3Smrgstatic void
1397ec681f3Smrgmir_match_i2i64(struct mir_address *address)
1407ec681f3Smrg{
1417ec681f3Smrg        if (!address->B.def || !nir_ssa_scalar_is_alu(address->B))
1427ec681f3Smrg                return;
1437ec681f3Smrg
1447ec681f3Smrg        if (!mir_args_ssa(address->B, 1))
1457ec681f3Smrg                return;
1467ec681f3Smrg
1477ec681f3Smrg        nir_op op = nir_ssa_scalar_alu_op(address->B);
1487ec681f3Smrg        if (op != nir_op_i2i64) return;
1497ec681f3Smrg        nir_ssa_scalar arg = nir_ssa_scalar_chase_alu_src(address->B, 0);
1507ec681f3Smrg
1517ec681f3Smrg        address->B = arg;
1527ec681f3Smrg        address->type = midgard_index_address_s32;
1537ec681f3Smrg}
1547ec681f3Smrg
1557ec681f3Smrg/* Matches ishl to shift */
1567ec681f3Smrg
1577ec681f3Smrgstatic void
1587ec681f3Smrgmir_match_ishl(struct mir_address *address)
1597ec681f3Smrg{
1607ec681f3Smrg        if (!address->B.def || !nir_ssa_scalar_is_alu(address->B))
1617ec681f3Smrg                return;
1627ec681f3Smrg
1637ec681f3Smrg        if (!mir_args_ssa(address->B, 2))
1647ec681f3Smrg                return;
1657ec681f3Smrg
1667ec681f3Smrg        nir_op op = nir_ssa_scalar_alu_op(address->B);
1677ec681f3Smrg        if (op != nir_op_ishl) return;
1687ec681f3Smrg        nir_ssa_scalar op1 = nir_ssa_scalar_chase_alu_src(address->B, 0);
1697ec681f3Smrg        nir_ssa_scalar op2 = nir_ssa_scalar_chase_alu_src(address->B, 1);
1707ec681f3Smrg
1717ec681f3Smrg        if (!nir_ssa_scalar_is_const(op2)) return;
1727ec681f3Smrg
1737ec681f3Smrg        unsigned shift = nir_ssa_scalar_as_uint(op2);
1747ec681f3Smrg        if (shift > 0x7) return;
1757ec681f3Smrg
1767ec681f3Smrg        address->B = op1;
1777ec681f3Smrg        address->shift = shift;
1787ec681f3Smrg}
1797ec681f3Smrg
1807ec681f3Smrg/* Strings through mov which can happen from NIR vectorization */
1817ec681f3Smrg
1827ec681f3Smrgstatic void
1837ec681f3Smrgmir_match_mov(struct mir_address *address)
1847ec681f3Smrg{
1857ec681f3Smrg        if (address->A.def && nir_ssa_scalar_is_alu(address->A)) {
1867ec681f3Smrg                nir_op op = nir_ssa_scalar_alu_op(address->A);
1877ec681f3Smrg
1887ec681f3Smrg                if (op == nir_op_mov && mir_args_ssa(address->A, 1))
1897ec681f3Smrg                        address->A = nir_ssa_scalar_chase_alu_src(address->A, 0);
1907ec681f3Smrg        }
1917ec681f3Smrg
1927ec681f3Smrg        if (address->B.def && nir_ssa_scalar_is_alu(address->B)) {
1937ec681f3Smrg                nir_op op = nir_ssa_scalar_alu_op(address->B);
1947ec681f3Smrg
1957ec681f3Smrg                if (op == nir_op_mov && mir_args_ssa(address->B, 1))
1967ec681f3Smrg                        address->B = nir_ssa_scalar_chase_alu_src(address->B, 0);
1977ec681f3Smrg        }
1987ec681f3Smrg}
1997ec681f3Smrg
2007ec681f3Smrg/* Tries to pattern match into mir_address */
2017ec681f3Smrg
2027ec681f3Smrgstatic struct mir_address
2037ec681f3Smrgmir_match_offset(nir_ssa_def *offset, bool first_free, bool extend)
2047ec681f3Smrg{
2057ec681f3Smrg        struct mir_address address = {
2067ec681f3Smrg                .B = { .def = offset },
2077ec681f3Smrg                .type = extend ? midgard_index_address_u64 : midgard_index_address_u32,
2087ec681f3Smrg        };
2097ec681f3Smrg
2107ec681f3Smrg        mir_match_mov(&address);
2117ec681f3Smrg        mir_match_constant(&address);
2127ec681f3Smrg        mir_match_mov(&address);
2137ec681f3Smrg        mir_match_iadd(&address, first_free);
2147ec681f3Smrg        mir_match_mov(&address);
2157ec681f3Smrg
2167ec681f3Smrg        if (extend) {
2177ec681f3Smrg                mir_match_u2u64(&address);
2187ec681f3Smrg                mir_match_i2i64(&address);
2197ec681f3Smrg                mir_match_mov(&address);
2207ec681f3Smrg        }
2217ec681f3Smrg
2227ec681f3Smrg        mir_match_ishl(&address);
2237ec681f3Smrg
2247ec681f3Smrg        return address;
2257ec681f3Smrg}
2267ec681f3Smrg
2277ec681f3Smrgvoid
2287ec681f3Smrgmir_set_offset(compiler_context *ctx, midgard_instruction *ins, nir_src *offset, unsigned seg)
2297ec681f3Smrg{
2307ec681f3Smrg        for(unsigned i = 0; i < 16; ++i) {
2317ec681f3Smrg                ins->swizzle[1][i] = 0;
2327ec681f3Smrg                ins->swizzle[2][i] = 0;
2337ec681f3Smrg        }
2347ec681f3Smrg
2357ec681f3Smrg        /* Sign extend instead of zero extend in case the address is something
2367ec681f3Smrg         * like `base + offset + 20`, where offset could be negative. */
2377ec681f3Smrg        bool force_sext = (nir_src_bit_size(*offset) < 64);
2387ec681f3Smrg
2397ec681f3Smrg        if (!offset->is_ssa) {
2407ec681f3Smrg                ins->load_store.bitsize_toggle = true;
2417ec681f3Smrg                ins->load_store.arg_comp = seg & 0x3;
2427ec681f3Smrg                ins->load_store.arg_reg = (seg >> 2) & 0x7;
2437ec681f3Smrg                ins->src[2] = nir_src_index(ctx, offset);
2447ec681f3Smrg                ins->src_types[2] = nir_type_uint | nir_src_bit_size(*offset);
2457ec681f3Smrg
2467ec681f3Smrg                if (force_sext)
2477ec681f3Smrg                        ins->load_store.index_format = midgard_index_address_s32;
2487ec681f3Smrg                else
2497ec681f3Smrg                        ins->load_store.index_format = midgard_index_address_u64;
2507ec681f3Smrg
2517ec681f3Smrg                return;
2527ec681f3Smrg        }
2537ec681f3Smrg
2547ec681f3Smrg        bool first_free = (seg == LDST_GLOBAL);
2557ec681f3Smrg
2567ec681f3Smrg        struct mir_address match = mir_match_offset(offset->ssa, first_free, true);
2577ec681f3Smrg
2587ec681f3Smrg        if (match.A.def) {
2597ec681f3Smrg                ins->src[1] = nir_ssa_index(match.A.def);
2607ec681f3Smrg                ins->swizzle[1][0] = match.A.comp;
2617ec681f3Smrg                ins->src_types[1] = nir_type_uint | match.A.def->bit_size;
2627ec681f3Smrg        } else {
2637ec681f3Smrg                ins->load_store.bitsize_toggle = true;
2647ec681f3Smrg                ins->load_store.arg_comp = seg & 0x3;
2657ec681f3Smrg                ins->load_store.arg_reg = (seg >> 2) & 0x7;
2667ec681f3Smrg        }
2677ec681f3Smrg
2687ec681f3Smrg        if (match.B.def) {
2697ec681f3Smrg                ins->src[2] = nir_ssa_index(match.B.def);
2707ec681f3Smrg                ins->swizzle[2][0] = match.B.comp;
2717ec681f3Smrg                ins->src_types[2] = nir_type_uint | match.B.def->bit_size;
2727ec681f3Smrg        } else
2737ec681f3Smrg                ins->load_store.index_reg = REGISTER_LDST_ZERO;
2747ec681f3Smrg
2757ec681f3Smrg        if (force_sext)
2767ec681f3Smrg                match.type = midgard_index_address_s32;
2777ec681f3Smrg
2787ec681f3Smrg        ins->load_store.index_format = match.type;
2797ec681f3Smrg
2807ec681f3Smrg        assert(match.shift <= 7);
2817ec681f3Smrg        ins->load_store.index_shift = match.shift;
2827ec681f3Smrg
2837ec681f3Smrg        ins->constants.u32[0] = match.bias;
2847ec681f3Smrg}
2857ec681f3Smrg
2867ec681f3Smrg
2877ec681f3Smrgvoid
2887ec681f3Smrgmir_set_ubo_offset(midgard_instruction *ins, nir_src *src, unsigned bias)
2897ec681f3Smrg{
2907ec681f3Smrg        assert(src->is_ssa);
2917ec681f3Smrg        struct mir_address match = mir_match_offset(src->ssa, false, false);
2927ec681f3Smrg
2937ec681f3Smrg        if (match.B.def) {
2947ec681f3Smrg                ins->src[2] = nir_ssa_index(match.B.def);
2957ec681f3Smrg
2967ec681f3Smrg                for (unsigned i = 0; i < ARRAY_SIZE(ins->swizzle[2]); ++i)
2977ec681f3Smrg                        ins->swizzle[2][i] = match.B.comp;
2987ec681f3Smrg        }
2997ec681f3Smrg
3007ec681f3Smrg        ins->load_store.index_shift = match.shift;
3017ec681f3Smrg        ins->constants.u32[0] = match.bias + bias;
3027ec681f3Smrg}
303