1/*
2 * Copyright (C) 2018 Alyssa Rosenzweig
3 * Copyright (C) 2019 Collabora, Ltd.
4 *
5 * Permission is hereby granted, free of charge, to any person obtaining a
6 * copy of this software and associated documentation files (the "Software"),
7 * to deal in the Software without restriction, including without limitation
8 * the rights to use, copy, modify, merge, publish, distribute, sublicense,
9 * and/or sell copies of the Software, and to permit persons to whom the
10 * Software is furnished to do so, subject to the following conditions:
11 *
12 * The above copyright notice and this permission notice (including the next
13 * paragraph) shall be included in all copies or substantial portions of the
14 * Software.
15 *
16 * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
17 * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
18 * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT.  IN NO EVENT SHALL
19 * THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
20 * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
21 * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
22 * SOFTWARE.
23 */
24
25#include "compiler.h"
26#include "util/u_memory.h"
27#include "midgard_ops.h"
28
29/* SIMD-aware dead code elimination. Perform liveness analysis step-by-step,
30 * removing dead components. If an instruction ends up with a zero mask, the
31 * instruction in total is dead and should be removed. */
32
33static bool
34can_cull_mask(compiler_context *ctx, midgard_instruction *ins)
35{
36        if (ins->dest >= ctx->temp_count)
37                return false;
38
39        if (ins->dest == ctx->blend_src1)
40                return false;
41
42        if (ins->type == TAG_LOAD_STORE_4)
43                if (load_store_opcode_props[ins->op].props & LDST_SPECIAL_MASK)
44                        return false;
45
46        return true;
47}
48
49static bool
50can_dce(midgard_instruction *ins)
51{
52        if (ins->mask)
53                return false;
54
55        if (ins->compact_branch)
56                return false;
57
58        if (ins->type == TAG_LOAD_STORE_4)
59                if (load_store_opcode_props[ins->op].props & LDST_SIDE_FX)
60                        return false;
61
62        if (ins->type == TAG_TEXTURE_4)
63                if (ins->op == midgard_tex_op_barrier)
64                        return false;
65
66        return true;
67}
68
69static bool
70midgard_opt_dead_code_eliminate_block(compiler_context *ctx, midgard_block *block)
71{
72        bool progress = false;
73
74        uint16_t *live = mem_dup(block->base.live_out, ctx->temp_count * sizeof(uint16_t));
75
76        mir_foreach_instr_in_block_rev(block, ins) {
77                if (can_cull_mask(ctx, ins)) {
78                        unsigned type_size = nir_alu_type_get_type_size(ins->dest_type);
79                        unsigned round_size = type_size;
80                        unsigned oldmask = ins->mask;
81
82                        /* Make sure we're packable */
83                        if (type_size == 16 && ins->type == TAG_LOAD_STORE_4)
84                                round_size = 32;
85
86                        unsigned rounded = mir_round_bytemask_up(live[ins->dest], round_size);
87                        unsigned cmask = mir_from_bytemask(rounded, type_size);
88
89                        ins->mask &= cmask;
90                        progress |= (ins->mask != oldmask);
91                }
92
93                mir_liveness_ins_update(live, ins, ctx->temp_count);
94        }
95
96        mir_foreach_instr_in_block_safe(block, ins) {
97                if (can_dce(ins)) {
98                        mir_remove_instruction(ins);
99                        progress = true;
100                }
101        }
102
103        free(live);
104
105        return progress;
106}
107
108bool
109midgard_opt_dead_code_eliminate(compiler_context *ctx)
110{
111        /* We track liveness. In fact, it's ok if we assume more things are
112         * live than they actually are, that just reduces the effectiveness of
113         * this iterations lightly. And DCE has the effect of strictly reducing
114         * liveness, so we can run DCE across all blocks while only computing
115         * liveness at the beginning. */
116
117        mir_invalidate_liveness(ctx);
118        mir_compute_liveness(ctx);
119
120        bool progress = false;
121
122        mir_foreach_block(ctx, block) {
123                progress |= midgard_opt_dead_code_eliminate_block(ctx, (midgard_block *) block);
124        }
125
126        return progress;
127}
128
129/* Removes dead moves, that is, moves with a destination overwritten before
130 * being read. Normally handled implicitly as part of DCE, but this has to run
131 * after the out-of-SSA pass */
132
133bool
134midgard_opt_dead_move_eliminate(compiler_context *ctx, midgard_block *block)
135{
136        bool progress = false;
137
138        mir_foreach_instr_in_block_safe(block, ins) {
139                if (ins->type != TAG_ALU_4) continue;
140                if (ins->compact_branch) continue;
141                if (!OP_IS_MOVE(ins->op)) continue;
142
143                /* Check if it's overwritten in this block before being read */
144                bool overwritten = false;
145
146                mir_foreach_instr_in_block_from(block, q, mir_next_op(ins)) {
147                        /* Check if used */
148                        if (mir_has_arg(q, ins->dest))
149                                break;
150
151                        /* Check if overwritten */
152                        if (q->dest == ins->dest) {
153                                /* Special case to vec4; component tracking is
154                                 * harder */
155
156                                overwritten = (q->mask == 0xF);
157                                break;
158                        }
159                }
160
161                if (overwritten) {
162                        mir_remove_instruction(ins);
163                        progress = true;
164                }
165        }
166
167        return progress;
168}
169