1/*
2 * Copyright © 2014 Broadcom
3 *
4 * Permission is hereby granted, free of charge, to any person obtaining a
5 * copy of this software and associated documentation files (the "Software"),
6 * to deal in the Software without restriction, including without limitation
7 * the rights to use, copy, modify, merge, publish, distribute, sublicense,
8 * and/or sell copies of the Software, and to permit persons to whom the
9 * Software is furnished to do so, subject to the following conditions:
10 *
11 * The above copyright notice and this permission notice (including the next
12 * paragraph) shall be included in all copies or substantial portions of the
13 * Software.
14 *
15 * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
16 * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
17 * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT.  IN NO EVENT SHALL
18 * THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
19 * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING
20 * FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS
21 * IN THE SOFTWARE.
22 */
23
24/**
25 * @file v3d_opt_dead_code.c
26 *
27 * This is a simple dead code eliminator for SSA values in VIR.
28 *
29 * It walks all the instructions finding what temps are used, then walks again
30 * to remove instructions writing unused temps.
31 *
32 * This is an inefficient implementation if you have long chains of
33 * instructions where the entire chain is dead, but we expect those to have
34 * been eliminated at the NIR level, and here we're just cleaning up small
35 * problems produced by NIR->VIR.
36 */
37
38#include "v3d_compiler.h"
39
40static bool debug;
41
42static void
43dce(struct v3d_compile *c, struct qinst *inst)
44{
45        if (debug) {
46                fprintf(stderr, "Removing: ");
47                vir_dump_inst(c, inst);
48                fprintf(stderr, "\n");
49        }
50        assert(!v3d_qpu_writes_flags(&inst->qpu));
51        vir_remove_instruction(c, inst);
52}
53
54static bool
55has_nonremovable_reads(struct v3d_compile *c, struct qinst *inst)
56{
57        for (int i = 0; i < vir_get_nsrc(inst); i++) {
58                if (inst->src[i].file == QFILE_VPM)
59                        return true;
60        }
61
62        return false;
63}
64
65static bool
66can_write_to_null(struct v3d_compile *c, struct qinst *inst)
67{
68        /* The SFU instructions must write to a physical register. */
69        if (c->devinfo->ver >= 41 && v3d_qpu_uses_sfu(&inst->qpu))
70                return false;
71
72        return true;
73}
74
75static void
76vir_dce_flags(struct v3d_compile *c, struct qinst *inst)
77{
78        if (debug) {
79                fprintf(stderr,
80                        "Removing flags write from: ");
81                vir_dump_inst(c, inst);
82                fprintf(stderr, "\n");
83        }
84
85        assert(inst->qpu.type == V3D_QPU_INSTR_TYPE_ALU);
86
87        inst->qpu.flags.apf = V3D_QPU_PF_NONE;
88        inst->qpu.flags.mpf = V3D_QPU_PF_NONE;
89        inst->qpu.flags.auf = V3D_QPU_UF_NONE;
90        inst->qpu.flags.muf = V3D_QPU_UF_NONE;
91}
92
93static bool
94check_last_ldunifa(struct v3d_compile *c,
95                   struct qinst *inst,
96                   struct qblock *block)
97{
98        if (!inst->qpu.sig.ldunifa && !inst->qpu.sig.ldunifarf)
99                return false;
100
101        list_for_each_entry_from(struct qinst, scan_inst, inst->link.next,
102                                 &block->instructions, link) {
103                /* If we find a new write to unifa, then this was the last
104                 * ldunifa in its sequence and is safe to remove.
105                 */
106                if (scan_inst->dst.file == QFILE_MAGIC &&
107                    scan_inst->dst.index == V3D_QPU_WADDR_UNIFA) {
108                        return true;
109                }
110
111                /* If we find another ldunifa in the same sequence then we
112                 * can't remove it.
113                 */
114                if (scan_inst->qpu.sig.ldunifa || scan_inst->qpu.sig.ldunifarf)
115                        return false;
116        }
117
118        return true;
119}
120
121static bool
122check_first_ldunifa(struct v3d_compile *c,
123                    struct qinst *inst,
124                    struct qblock *block,
125                    struct qinst **unifa)
126{
127        if (!inst->qpu.sig.ldunifa && !inst->qpu.sig.ldunifarf)
128                return false;
129
130        list_for_each_entry_from_rev(struct qinst, scan_inst, inst->link.prev,
131                                     &block->instructions, link) {
132                /* If we find a write to unifa, then this was the first
133                 * ldunifa in its sequence and is safe to remove.
134                 */
135                if (scan_inst->dst.file == QFILE_MAGIC &&
136                    scan_inst->dst.index == V3D_QPU_WADDR_UNIFA) {
137                        *unifa = scan_inst;
138                        return true;
139                }
140
141                /* If we find another ldunifa in the same sequence then we
142                 * can't remove it.
143                 */
144                if (scan_inst->qpu.sig.ldunifa || scan_inst->qpu.sig.ldunifarf)
145                        return false;
146        }
147
148        unreachable("could not find starting unifa for ldunifa sequence");
149}
150
151static bool
152increment_unifa_address(struct v3d_compile *c, struct qblock *block, struct qinst *unifa)
153{
154        struct qblock *current_block = c->cur_block;
155        if (unifa->qpu.type == V3D_QPU_INSTR_TYPE_ALU &&
156            unifa->qpu.alu.mul.op == V3D_QPU_M_MOV) {
157                c->cursor = vir_after_inst(unifa);
158                c->cur_block = block;
159                struct qreg unifa_reg = vir_reg(QFILE_MAGIC, V3D_QPU_WADDR_UNIFA);
160                vir_ADD_dest(c, unifa_reg, unifa->src[0], vir_uniform_ui(c, 4u));
161                vir_remove_instruction(c, unifa);
162                c->cur_block = current_block;
163                return true;
164        }
165
166        if (unifa->qpu.type == V3D_QPU_INSTR_TYPE_ALU &&
167            unifa->qpu.alu.add.op == V3D_QPU_A_ADD) {
168                c->cursor = vir_after_inst(unifa);
169                c->cur_block = block;
170                struct qreg unifa_reg = vir_reg(QFILE_MAGIC, V3D_QPU_WADDR_UNIFA);
171                struct qreg tmp =
172                        vir_ADD(c, unifa->src[1], vir_uniform_ui(c, 4u));
173                vir_ADD_dest(c, unifa_reg, unifa->src[0], tmp);
174                vir_remove_instruction(c, unifa);
175                c->cur_block = current_block;
176                return true;
177        }
178
179        return false;
180}
181
182bool
183vir_opt_dead_code(struct v3d_compile *c)
184{
185        bool progress = false;
186        bool *used = calloc(c->num_temps, sizeof(bool));
187
188        /* Defuse the "are you removing the cursor?" assertion in the core.
189         * You'll need to set up a new cursor for any new instructions after
190         * doing DCE (which we would expect, anyway).
191         */
192        c->cursor.link = NULL;
193
194        vir_for_each_inst_inorder(inst, c) {
195                for (int i = 0; i < vir_get_nsrc(inst); i++) {
196                        if (inst->src[i].file == QFILE_TEMP)
197                                used[inst->src[i].index] = true;
198                }
199        }
200
201        vir_for_each_block(block, c) {
202                struct qinst *last_flags_write = NULL;
203
204                vir_for_each_inst_safe(inst, block) {
205                        /* If this instruction reads the flags, we can't
206                         * remove the flags generation for it.
207                         */
208                        if (v3d_qpu_reads_flags(&inst->qpu))
209                                last_flags_write = NULL;
210
211                        if (inst->dst.file != QFILE_NULL &&
212                            !(inst->dst.file == QFILE_TEMP &&
213                              !used[inst->dst.index])) {
214                                continue;
215                        }
216
217                        const bool is_ldunifa = inst->qpu.sig.ldunifa ||
218                                                inst->qpu.sig.ldunifarf;
219
220                        if (vir_has_side_effects(c, inst) && !is_ldunifa)
221                                continue;
222
223                        bool is_first_ldunifa = false;
224                        bool is_last_ldunifa = false;
225                        struct qinst *unifa = NULL;
226                        if (is_ldunifa) {
227                                is_last_ldunifa =
228                                        check_last_ldunifa(c, inst, block);
229
230                                is_first_ldunifa =
231                                        check_first_ldunifa(c, inst, block, &unifa);
232                        }
233
234                        if (v3d_qpu_writes_flags(&inst->qpu)) {
235                                /* If we obscure a previous flags write,
236                                 * drop it.
237                                 */
238                                if (last_flags_write &&
239                                    (inst->qpu.flags.apf != V3D_QPU_PF_NONE ||
240                                     inst->qpu.flags.mpf != V3D_QPU_PF_NONE)) {
241                                        vir_dce_flags(c, last_flags_write);
242                                        progress = true;
243                                }
244
245                                last_flags_write = inst;
246                        }
247
248                        if (v3d_qpu_writes_flags(&inst->qpu) ||
249                            has_nonremovable_reads(c, inst) ||
250                            (is_ldunifa && !is_first_ldunifa && !is_last_ldunifa)) {
251                                /* If we can't remove the instruction, but we
252                                 * don't need its destination value, just
253                                 * remove the destination.  The register
254                                 * allocator would trivially color it and it
255                                 * wouldn't cause any register pressure, but
256                                 * it's nicer to read the VIR code without
257                                 * unused destination regs.
258                                 */
259                                if (inst->dst.file == QFILE_TEMP &&
260                                    can_write_to_null(c, inst)) {
261                                        if (debug) {
262                                                fprintf(stderr,
263                                                        "Removing dst from: ");
264                                                vir_dump_inst(c, inst);
265                                                fprintf(stderr, "\n");
266                                        }
267                                        c->defs[inst->dst.index] = NULL;
268                                        inst->dst.file = QFILE_NULL;
269                                        progress = true;
270                                }
271                                continue;
272                        }
273
274                        /* If we are removing the first ldunifa in a sequence
275                         * we need to update the unifa address.
276                         */
277                        if (is_first_ldunifa) {
278                                assert(unifa);
279                                if (!increment_unifa_address(c, block, unifa))
280                                        continue;
281                        }
282
283                        assert(inst != last_flags_write);
284                        dce(c, inst);
285                        progress = true;
286                        continue;
287                }
288        }
289
290        free(used);
291
292        return progress;
293}
294