17ec681f3Smrg/*
27ec681f3Smrg * Copyright (C) 2018 Alyssa Rosenzweig
37ec681f3Smrg * Copyright (C) 2019 Collabora, Ltd.
47ec681f3Smrg *
57ec681f3Smrg * Permission is hereby granted, free of charge, to any person obtaining a
67ec681f3Smrg * copy of this software and associated documentation files (the "Software"),
77ec681f3Smrg * to deal in the Software without restriction, including without limitation
87ec681f3Smrg * the rights to use, copy, modify, merge, publish, distribute, sublicense,
97ec681f3Smrg * and/or sell copies of the Software, and to permit persons to whom the
107ec681f3Smrg * Software is furnished to do so, subject to the following conditions:
117ec681f3Smrg *
127ec681f3Smrg * The above copyright notice and this permission notice (including the next
137ec681f3Smrg * paragraph) shall be included in all copies or substantial portions of the
147ec681f3Smrg * Software.
157ec681f3Smrg *
167ec681f3Smrg * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
177ec681f3Smrg * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
187ec681f3Smrg * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT.  IN NO EVENT SHALL
197ec681f3Smrg * THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
207ec681f3Smrg * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
217ec681f3Smrg * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
227ec681f3Smrg * SOFTWARE.
237ec681f3Smrg */
247ec681f3Smrg
257ec681f3Smrg#include "compiler.h"
267ec681f3Smrg#include "util/u_memory.h"
277ec681f3Smrg#include "midgard_ops.h"
287ec681f3Smrg
297ec681f3Smrg/* SIMD-aware dead code elimination. Perform liveness analysis step-by-step,
307ec681f3Smrg * removing dead components. If an instruction ends up with a zero mask, the
317ec681f3Smrg * instruction in total is dead and should be removed. */
327ec681f3Smrg
337ec681f3Smrgstatic bool
347ec681f3Smrgcan_cull_mask(compiler_context *ctx, midgard_instruction *ins)
357ec681f3Smrg{
367ec681f3Smrg        if (ins->dest >= ctx->temp_count)
377ec681f3Smrg                return false;
387ec681f3Smrg
397ec681f3Smrg        if (ins->dest == ctx->blend_src1)
407ec681f3Smrg                return false;
417ec681f3Smrg
427ec681f3Smrg        if (ins->type == TAG_LOAD_STORE_4)
437ec681f3Smrg                if (load_store_opcode_props[ins->op].props & LDST_SPECIAL_MASK)
447ec681f3Smrg                        return false;
457ec681f3Smrg
467ec681f3Smrg        return true;
477ec681f3Smrg}
487ec681f3Smrg
497ec681f3Smrgstatic bool
507ec681f3Smrgcan_dce(midgard_instruction *ins)
517ec681f3Smrg{
527ec681f3Smrg        if (ins->mask)
537ec681f3Smrg                return false;
547ec681f3Smrg
557ec681f3Smrg        if (ins->compact_branch)
567ec681f3Smrg                return false;
577ec681f3Smrg
587ec681f3Smrg        if (ins->type == TAG_LOAD_STORE_4)
597ec681f3Smrg                if (load_store_opcode_props[ins->op].props & LDST_SIDE_FX)
607ec681f3Smrg                        return false;
617ec681f3Smrg
627ec681f3Smrg        if (ins->type == TAG_TEXTURE_4)
637ec681f3Smrg                if (ins->op == midgard_tex_op_barrier)
647ec681f3Smrg                        return false;
657ec681f3Smrg
667ec681f3Smrg        return true;
677ec681f3Smrg}
687ec681f3Smrg
697ec681f3Smrgstatic bool
707ec681f3Smrgmidgard_opt_dead_code_eliminate_block(compiler_context *ctx, midgard_block *block)
717ec681f3Smrg{
727ec681f3Smrg        bool progress = false;
737ec681f3Smrg
747ec681f3Smrg        uint16_t *live = mem_dup(block->base.live_out, ctx->temp_count * sizeof(uint16_t));
757ec681f3Smrg
767ec681f3Smrg        mir_foreach_instr_in_block_rev(block, ins) {
777ec681f3Smrg                if (can_cull_mask(ctx, ins)) {
787ec681f3Smrg                        unsigned type_size = nir_alu_type_get_type_size(ins->dest_type);
797ec681f3Smrg                        unsigned round_size = type_size;
807ec681f3Smrg                        unsigned oldmask = ins->mask;
817ec681f3Smrg
827ec681f3Smrg                        /* Make sure we're packable */
837ec681f3Smrg                        if (type_size == 16 && ins->type == TAG_LOAD_STORE_4)
847ec681f3Smrg                                round_size = 32;
857ec681f3Smrg
867ec681f3Smrg                        unsigned rounded = mir_round_bytemask_up(live[ins->dest], round_size);
877ec681f3Smrg                        unsigned cmask = mir_from_bytemask(rounded, type_size);
887ec681f3Smrg
897ec681f3Smrg                        ins->mask &= cmask;
907ec681f3Smrg                        progress |= (ins->mask != oldmask);
917ec681f3Smrg                }
927ec681f3Smrg
937ec681f3Smrg                mir_liveness_ins_update(live, ins, ctx->temp_count);
947ec681f3Smrg        }
957ec681f3Smrg
967ec681f3Smrg        mir_foreach_instr_in_block_safe(block, ins) {
977ec681f3Smrg                if (can_dce(ins)) {
987ec681f3Smrg                        mir_remove_instruction(ins);
997ec681f3Smrg                        progress = true;
1007ec681f3Smrg                }
1017ec681f3Smrg        }
1027ec681f3Smrg
1037ec681f3Smrg        free(live);
1047ec681f3Smrg
1057ec681f3Smrg        return progress;
1067ec681f3Smrg}
1077ec681f3Smrg
1087ec681f3Smrgbool
1097ec681f3Smrgmidgard_opt_dead_code_eliminate(compiler_context *ctx)
1107ec681f3Smrg{
1117ec681f3Smrg        /* We track liveness. In fact, it's ok if we assume more things are
1127ec681f3Smrg         * live than they actually are, that just reduces the effectiveness of
1137ec681f3Smrg         * this iterations lightly. And DCE has the effect of strictly reducing
1147ec681f3Smrg         * liveness, so we can run DCE across all blocks while only computing
1157ec681f3Smrg         * liveness at the beginning. */
1167ec681f3Smrg
1177ec681f3Smrg        mir_invalidate_liveness(ctx);
1187ec681f3Smrg        mir_compute_liveness(ctx);
1197ec681f3Smrg
1207ec681f3Smrg        bool progress = false;
1217ec681f3Smrg
1227ec681f3Smrg        mir_foreach_block(ctx, block) {
1237ec681f3Smrg                progress |= midgard_opt_dead_code_eliminate_block(ctx, (midgard_block *) block);
1247ec681f3Smrg        }
1257ec681f3Smrg
1267ec681f3Smrg        return progress;
1277ec681f3Smrg}
1287ec681f3Smrg
1297ec681f3Smrg/* Removes dead moves, that is, moves with a destination overwritten before
1307ec681f3Smrg * being read. Normally handled implicitly as part of DCE, but this has to run
1317ec681f3Smrg * after the out-of-SSA pass */
1327ec681f3Smrg
1337ec681f3Smrgbool
1347ec681f3Smrgmidgard_opt_dead_move_eliminate(compiler_context *ctx, midgard_block *block)
1357ec681f3Smrg{
1367ec681f3Smrg        bool progress = false;
1377ec681f3Smrg
1387ec681f3Smrg        mir_foreach_instr_in_block_safe(block, ins) {
1397ec681f3Smrg                if (ins->type != TAG_ALU_4) continue;
1407ec681f3Smrg                if (ins->compact_branch) continue;
1417ec681f3Smrg                if (!OP_IS_MOVE(ins->op)) continue;
1427ec681f3Smrg
1437ec681f3Smrg                /* Check if it's overwritten in this block before being read */
1447ec681f3Smrg                bool overwritten = false;
1457ec681f3Smrg
1467ec681f3Smrg                mir_foreach_instr_in_block_from(block, q, mir_next_op(ins)) {
1477ec681f3Smrg                        /* Check if used */
1487ec681f3Smrg                        if (mir_has_arg(q, ins->dest))
1497ec681f3Smrg                                break;
1507ec681f3Smrg
1517ec681f3Smrg                        /* Check if overwritten */
1527ec681f3Smrg                        if (q->dest == ins->dest) {
1537ec681f3Smrg                                /* Special case to vec4; component tracking is
1547ec681f3Smrg                                 * harder */
1557ec681f3Smrg
1567ec681f3Smrg                                overwritten = (q->mask == 0xF);
1577ec681f3Smrg                                break;
1587ec681f3Smrg                        }
1597ec681f3Smrg                }
1607ec681f3Smrg
1617ec681f3Smrg                if (overwritten) {
1627ec681f3Smrg                        mir_remove_instruction(ins);
1637ec681f3Smrg                        progress = true;
1647ec681f3Smrg                }
1657ec681f3Smrg        }
1667ec681f3Smrg
1677ec681f3Smrg        return progress;
1687ec681f3Smrg}
169