17ec681f3Smrg/*
27ec681f3Smrg * Copyright (C) 2019 Collabora, Ltd.
37ec681f3Smrg *
47ec681f3Smrg * Permission is hereby granted, free of charge, to any person obtaining a
57ec681f3Smrg * copy of this software and associated documentation files (the "Software"),
67ec681f3Smrg * to deal in the Software without restriction, including without limitation
77ec681f3Smrg * the rights to use, copy, modify, merge, publish, distribute, sublicense,
87ec681f3Smrg * and/or sell copies of the Software, and to permit persons to whom the
97ec681f3Smrg * Software is furnished to do so, subject to the following conditions:
107ec681f3Smrg *
117ec681f3Smrg * The above copyright notice and this permission notice (including the next
127ec681f3Smrg * paragraph) shall be included in all copies or substantial portions of the
137ec681f3Smrg * Software.
147ec681f3Smrg *
157ec681f3Smrg * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
167ec681f3Smrg * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
177ec681f3Smrg * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT.  IN NO EVENT SHALL
187ec681f3Smrg * THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
197ec681f3Smrg * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
207ec681f3Smrg * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
217ec681f3Smrg * SOFTWARE.
227ec681f3Smrg */
237ec681f3Smrg
247ec681f3Smrg/* Midgard has some accelerated support for perspective projection on the
257ec681f3Smrg * load/store pipes. So the first perspective projection pass looks for
267ec681f3Smrg * lowered/open-coded perspective projection of the form "fmul (A.xyz,
277ec681f3Smrg * frcp(A.w))" or "fmul (A.xy, frcp(A.z))" and rewrite with a native
287ec681f3Smrg * perspective division opcode (on the load/store pipe). Caveats apply: the
297ec681f3Smrg * frcp should be used only once to make this optimization worthwhile. And the
307ec681f3Smrg * source of the frcp ought to be a varying to make it worthwhile...
317ec681f3Smrg *
327ec681f3Smrg * The second pass in this file is a step #2 of sorts: fusing that load/store
337ec681f3Smrg * projection into a varying load instruction (they can be done together
347ec681f3Smrg * implicitly). This depends on the combination pass. Again caveat: the vary
357ec681f3Smrg * should only be used once to make this worthwhile.
367ec681f3Smrg */
377ec681f3Smrg
387ec681f3Smrg#include "compiler.h"
397ec681f3Smrg
407ec681f3Smrgstatic bool
417ec681f3Smrgis_swizzle_0(unsigned *swizzle)
427ec681f3Smrg{
437ec681f3Smrg        for (unsigned c = 0; c < MIR_VEC_COMPONENTS; ++c)
447ec681f3Smrg                if (swizzle[c])
457ec681f3Smrg                        return false;
467ec681f3Smrg
477ec681f3Smrg        return true;
487ec681f3Smrg}
497ec681f3Smrg
507ec681f3Smrgbool
517ec681f3Smrgmidgard_opt_combine_projection(compiler_context *ctx, midgard_block *block)
527ec681f3Smrg{
537ec681f3Smrg        bool progress = false;
547ec681f3Smrg
557ec681f3Smrg        mir_foreach_instr_in_block_safe(block, ins) {
567ec681f3Smrg                /* First search for fmul */
577ec681f3Smrg                if (ins->type != TAG_ALU_4) continue;
587ec681f3Smrg                if (ins->op != midgard_alu_op_fmul) continue;
597ec681f3Smrg
607ec681f3Smrg                /* TODO: Flip */
617ec681f3Smrg
627ec681f3Smrg                /* Check the swizzles */
637ec681f3Smrg
647ec681f3Smrg                if (!mir_is_simple_swizzle(ins->swizzle[0], ins->mask)) continue;
657ec681f3Smrg                if (!is_swizzle_0(ins->swizzle[1])) continue;
667ec681f3Smrg
677ec681f3Smrg                /* Awesome, we're the right form. Now check where src2 is from */
687ec681f3Smrg                unsigned frcp = ins->src[1];
697ec681f3Smrg                unsigned to = ins->dest;
707ec681f3Smrg
717ec681f3Smrg                if (frcp & PAN_IS_REG) continue;
727ec681f3Smrg                if (to & PAN_IS_REG) continue;
737ec681f3Smrg
747ec681f3Smrg                bool frcp_found = false;
757ec681f3Smrg                unsigned frcp_component = 0;
767ec681f3Smrg                unsigned frcp_from = 0;
777ec681f3Smrg
787ec681f3Smrg                mir_foreach_instr_in_block_safe(block, sub) {
797ec681f3Smrg                        if (sub->dest != frcp) continue;
807ec681f3Smrg
817ec681f3Smrg                        frcp_component = sub->swizzle[0][0];
827ec681f3Smrg                        frcp_from = sub->src[0];
837ec681f3Smrg
847ec681f3Smrg                        frcp_found =
857ec681f3Smrg                                (sub->type == TAG_ALU_4) &&
867ec681f3Smrg                                (sub->op == midgard_alu_op_frcp);
877ec681f3Smrg                        break;
887ec681f3Smrg                }
897ec681f3Smrg
907ec681f3Smrg                if (!frcp_found) continue;
917ec681f3Smrg                if (frcp_from != ins->src[0]) continue;
927ec681f3Smrg                if (frcp_component != COMPONENT_W && frcp_component != COMPONENT_Z) continue;
937ec681f3Smrg                if (!mir_single_use(ctx, frcp)) continue;
947ec681f3Smrg
957ec681f3Smrg                /* Heuristic: check if the frcp is from a single-use varying */
967ec681f3Smrg
977ec681f3Smrg                bool ok = false;
987ec681f3Smrg
997ec681f3Smrg                /* One for frcp and one for fmul */
1007ec681f3Smrg                if (mir_use_count(ctx, frcp_from) > 2) continue;
1017ec681f3Smrg
1027ec681f3Smrg                mir_foreach_instr_in_block_safe(block, v) {
1037ec681f3Smrg                        if (v->dest != frcp_from) continue;
1047ec681f3Smrg                        if (v->type != TAG_LOAD_STORE_4) break;
1057ec681f3Smrg                        if (!OP_IS_LOAD_VARY_F(v->op)) break;
1067ec681f3Smrg
1077ec681f3Smrg                        ok = true;
1087ec681f3Smrg                        break;
1097ec681f3Smrg                }
1107ec681f3Smrg
1117ec681f3Smrg                if (!ok)
1127ec681f3Smrg                        continue;
1137ec681f3Smrg
1147ec681f3Smrg                /* Nice, we got the form spot on. Let's convert! */
1157ec681f3Smrg
1167ec681f3Smrg                midgard_instruction accel = {
1177ec681f3Smrg                        .type = TAG_LOAD_STORE_4,
1187ec681f3Smrg                        .mask = ins->mask,
1197ec681f3Smrg                        .dest = to,
1207ec681f3Smrg                        .dest_type = nir_type_float32,
1217ec681f3Smrg                        .src = { frcp_from, ~0, ~0, ~0 },
1227ec681f3Smrg                        .src_types = { nir_type_float32 },
1237ec681f3Smrg                        .swizzle = SWIZZLE_IDENTITY_4,
1247ec681f3Smrg                        .op = frcp_component == COMPONENT_W ?
1257ec681f3Smrg                                midgard_op_ldst_perspective_div_w :
1267ec681f3Smrg                                midgard_op_ldst_perspective_div_z,
1277ec681f3Smrg                        .load_store = {
1287ec681f3Smrg                                .bitsize_toggle = true,
1297ec681f3Smrg                        }
1307ec681f3Smrg                };
1317ec681f3Smrg
1327ec681f3Smrg                mir_insert_instruction_before(ctx, ins, accel);
1337ec681f3Smrg                mir_remove_instruction(ins);
1347ec681f3Smrg
1357ec681f3Smrg                progress |= true;
1367ec681f3Smrg        }
1377ec681f3Smrg
1387ec681f3Smrg        return progress;
1397ec681f3Smrg}
1407ec681f3Smrg
1417ec681f3Smrgbool
1427ec681f3Smrgmidgard_opt_varying_projection(compiler_context *ctx, midgard_block *block)
1437ec681f3Smrg{
1447ec681f3Smrg        bool progress = false;
1457ec681f3Smrg
1467ec681f3Smrg        mir_foreach_instr_in_block_safe(block, ins) {
1477ec681f3Smrg                /* Search for a projection */
1487ec681f3Smrg                if (ins->type != TAG_LOAD_STORE_4) continue;
1497ec681f3Smrg                if (!OP_IS_PROJECTION(ins->op)) continue;
1507ec681f3Smrg
1517ec681f3Smrg                unsigned vary = ins->src[0];
1527ec681f3Smrg                unsigned to = ins->dest;
1537ec681f3Smrg
1547ec681f3Smrg                if (vary & PAN_IS_REG) continue;
1557ec681f3Smrg                if (to & PAN_IS_REG) continue;
1567ec681f3Smrg                if (!mir_single_use(ctx, vary)) continue;
1577ec681f3Smrg
1587ec681f3Smrg                /* Check for a varying source. If we find it, we rewrite */
1597ec681f3Smrg
1607ec681f3Smrg                bool rewritten = false;
1617ec681f3Smrg
1627ec681f3Smrg                mir_foreach_instr_in_block_safe(block, v) {
1637ec681f3Smrg                        if (v->dest != vary) continue;
1647ec681f3Smrg                        if (v->type != TAG_LOAD_STORE_4) break;
1657ec681f3Smrg                        if (!OP_IS_LOAD_VARY_F(v->op)) break;
1667ec681f3Smrg
1677ec681f3Smrg                        /* We found it, so rewrite it to project. Grab the
1687ec681f3Smrg                         * modifier */
1697ec681f3Smrg
1707ec681f3Smrg                        midgard_varying_params p =
1717ec681f3Smrg                                midgard_unpack_varying_params(v->load_store);
1727ec681f3Smrg
1737ec681f3Smrg                        if (p.modifier != midgard_varying_mod_none)
1747ec681f3Smrg                                break;
1757ec681f3Smrg
1767ec681f3Smrg                        bool projects_w =
1777ec681f3Smrg                                ins->op == midgard_op_ldst_perspective_div_w;
1787ec681f3Smrg
1797ec681f3Smrg                        p.modifier = projects_w ?
1807ec681f3Smrg                                midgard_varying_mod_perspective_w :
1817ec681f3Smrg                                midgard_varying_mod_perspective_z;
1827ec681f3Smrg
1837ec681f3Smrg                        midgard_pack_varying_params(&v->load_store, p);
1847ec681f3Smrg
1857ec681f3Smrg                        /* Use the new destination */
1867ec681f3Smrg                        v->dest = to;
1877ec681f3Smrg
1887ec681f3Smrg                        rewritten = true;
1897ec681f3Smrg                        break;
1907ec681f3Smrg                }
1917ec681f3Smrg
1927ec681f3Smrg                if (rewritten)
1937ec681f3Smrg                        mir_remove_instruction(ins);
1947ec681f3Smrg
1957ec681f3Smrg                progress |= rewritten;
1967ec681f3Smrg        }
1977ec681f3Smrg
1987ec681f3Smrg        return progress;
1997ec681f3Smrg}
200