101e04c3fSmrg/*
201e04c3fSmrg * Copyright © 2012 Intel Corporation
301e04c3fSmrg * Copyright © 2016 Broadcom
401e04c3fSmrg *
501e04c3fSmrg * Permission is hereby granted, free of charge, to any person obtaining a
601e04c3fSmrg * copy of this software and associated documentation files (the "Software"),
701e04c3fSmrg * to deal in the Software without restriction, including without limitation
801e04c3fSmrg * the rights to use, copy, modify, merge, publish, distribute, sublicense,
901e04c3fSmrg * and/or sell copies of the Software, and to permit persons to whom the
1001e04c3fSmrg * Software is furnished to do so, subject to the following conditions:
1101e04c3fSmrg *
1201e04c3fSmrg * The above copyright notice and this permission notice (including the next
1301e04c3fSmrg * paragraph) shall be included in all copies or substantial portions of the
1401e04c3fSmrg * Software.
1501e04c3fSmrg *
1601e04c3fSmrg * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
1701e04c3fSmrg * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
1801e04c3fSmrg * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT.  IN NO EVENT SHALL
1901e04c3fSmrg * THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
2001e04c3fSmrg * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING
2101e04c3fSmrg * FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS
2201e04c3fSmrg * IN THE SOFTWARE.
2301e04c3fSmrg */
2401e04c3fSmrg
2501e04c3fSmrg#define MAX_INSTRUCTION (1 << 30)
2601e04c3fSmrg
2701e04c3fSmrg#include "util/ralloc.h"
2801e04c3fSmrg#include "util/register_allocate.h"
2901e04c3fSmrg#include "v3d_compiler.h"
3001e04c3fSmrg
317ec681f3Smrg/* Keeps track of conditional / partial writes in a block */
3201e04c3fSmrgstruct partial_update_state {
337ec681f3Smrg        /* Instruction doing a conditional or partial write */
347ec681f3Smrg        struct qinst *inst;
357ec681f3Smrg        /* Instruction that set the flags for the conditional write */
367ec681f3Smrg        struct qinst *flags_inst;
3701e04c3fSmrg};
3801e04c3fSmrg
3901e04c3fSmrgstatic int
4001e04c3fSmrgvir_reg_to_var(struct qreg reg)
4101e04c3fSmrg{
4201e04c3fSmrg        if (reg.file == QFILE_TEMP)
4301e04c3fSmrg                return reg.index;
4401e04c3fSmrg
4501e04c3fSmrg        return -1;
4601e04c3fSmrg}
4701e04c3fSmrg
4801e04c3fSmrgstatic void
4901e04c3fSmrgvir_setup_use(struct v3d_compile *c, struct qblock *block, int ip,
507ec681f3Smrg              struct partial_update_state *partial_update_ht, struct qinst *inst,
517ec681f3Smrg              struct qreg src, struct qinst *flags_inst)
5201e04c3fSmrg{
5301e04c3fSmrg        int var = vir_reg_to_var(src);
5401e04c3fSmrg        if (var == -1)
5501e04c3fSmrg                return;
5601e04c3fSmrg
5701e04c3fSmrg        c->temp_start[var] = MIN2(c->temp_start[var], ip);
5801e04c3fSmrg        c->temp_end[var] = MAX2(c->temp_end[var], ip);
5901e04c3fSmrg
6001e04c3fSmrg        /* The use[] bitset marks when the block makes
6101e04c3fSmrg         * use of a variable without having completely
6201e04c3fSmrg         * defined that variable within the block.
6301e04c3fSmrg         */
647ec681f3Smrg        if (!BITSET_TEST(block->def, var)) {
657ec681f3Smrg                /* If this use of var is conditional and the condition
667ec681f3Smrg                 * and flags match those of a previous instruction
677ec681f3Smrg                 * in the same block partially defining var then we
687ec681f3Smrg                 * consider var completely defined within the block.
697ec681f3Smrg                 */
707ec681f3Smrg                if (BITSET_TEST(block->defout, var)) {
717ec681f3Smrg                        struct partial_update_state *state =
727ec681f3Smrg                                &partial_update_ht[var];
737ec681f3Smrg                        if (state->inst) {
747ec681f3Smrg                                if (vir_get_cond(inst) == vir_get_cond(state->inst) &&
757ec681f3Smrg                                    flags_inst == state->flags_inst) {
767ec681f3Smrg                                        return;
777ec681f3Smrg                                }
787ec681f3Smrg                        }
797ec681f3Smrg                }
8001e04c3fSmrg
817ec681f3Smrg                BITSET_SET(block->use, var);
827ec681f3Smrg        }
8301e04c3fSmrg}
8401e04c3fSmrg
857ec681f3Smrg/* The def[] bitset marks when an initialization in a
867ec681f3Smrg * block completely screens off previous updates of
877ec681f3Smrg * that variable.
887ec681f3Smrg */
8901e04c3fSmrgstatic void
9001e04c3fSmrgvir_setup_def(struct v3d_compile *c, struct qblock *block, int ip,
917ec681f3Smrg              struct partial_update_state *partial_update, struct qinst *inst,
927ec681f3Smrg              struct qinst *flags_inst)
9301e04c3fSmrg{
9401e04c3fSmrg        if (inst->qpu.type != V3D_QPU_INSTR_TYPE_ALU)
9501e04c3fSmrg                return;
9601e04c3fSmrg
9701e04c3fSmrg        int var = vir_reg_to_var(inst->dst);
9801e04c3fSmrg        if (var == -1)
9901e04c3fSmrg                return;
10001e04c3fSmrg
10101e04c3fSmrg        c->temp_start[var] = MIN2(c->temp_start[var], ip);
10201e04c3fSmrg        c->temp_end[var] = MAX2(c->temp_end[var], ip);
10301e04c3fSmrg
104ed98bd31Smaya        /* Mark the block as having a (partial) def of the var. */
105ed98bd31Smaya        BITSET_SET(block->defout, var);
106ed98bd31Smaya
107ed98bd31Smaya        /* If we've already tracked this as a def that screens off previous
108ed98bd31Smaya         * uses, or already used it within the block, there's nothing to do.
10901e04c3fSmrg         */
11001e04c3fSmrg        if (BITSET_TEST(block->use, var) || BITSET_TEST(block->def, var))
11101e04c3fSmrg                return;
11201e04c3fSmrg
113ed98bd31Smaya        /* Easy, common case: unconditional full register update.*/
114ed98bd31Smaya        if ((inst->qpu.flags.ac == V3D_QPU_COND_NONE &&
115ed98bd31Smaya             inst->qpu.flags.mc == V3D_QPU_COND_NONE) &&
11601e04c3fSmrg            inst->qpu.alu.add.output_pack == V3D_QPU_PACK_NONE &&
11701e04c3fSmrg            inst->qpu.alu.mul.output_pack == V3D_QPU_PACK_NONE) {
11801e04c3fSmrg                BITSET_SET(block->def, var);
11901e04c3fSmrg                return;
12001e04c3fSmrg        }
12101e04c3fSmrg
1227ec681f3Smrg        /* Keep track of conditional writes.
12301e04c3fSmrg         *
1247ec681f3Smrg         * Notice that the dst's live range for a conditional or partial writes
1257ec681f3Smrg         * will get extended up the control flow to the top of the program until
1267ec681f3Smrg         * we find a full write, making register allocation more difficult, so
1277ec681f3Smrg         * we should try our best to keep track of these and figure out if a
1287ec681f3Smrg         * combination of them actually writes the entire register so we can
1297ec681f3Smrg         * stop that process early and reduce liveness.
13001e04c3fSmrg         *
1317ec681f3Smrg         * FIXME: Track partial updates via pack/unpack.
13201e04c3fSmrg         */
1337ec681f3Smrg        struct partial_update_state *state = &partial_update[var];
1347ec681f3Smrg        if (inst->qpu.flags.ac != V3D_QPU_COND_NONE ||
1357ec681f3Smrg            inst->qpu.flags.mc != V3D_QPU_COND_NONE) {
1367ec681f3Smrg                state->inst = inst;
1377ec681f3Smrg                state->flags_inst = flags_inst;
13801e04c3fSmrg        }
13901e04c3fSmrg}
14001e04c3fSmrg
14101e04c3fSmrg/* Sets up the def/use arrays for when variables are used-before-defined or
14201e04c3fSmrg * defined-before-used in the block.
14301e04c3fSmrg *
14401e04c3fSmrg * Also initializes the temp_start/temp_end to cover just the instruction IPs
14501e04c3fSmrg * where the variable is used, which will be extended later in
14601e04c3fSmrg * vir_compute_start_end().
14701e04c3fSmrg */
14801e04c3fSmrgstatic void
14901e04c3fSmrgvir_setup_def_use(struct v3d_compile *c)
15001e04c3fSmrg{
1517ec681f3Smrg        struct partial_update_state *partial_update =
1527ec681f3Smrg                rzalloc_array(c, struct partial_update_state, c->num_temps);
15301e04c3fSmrg        int ip = 0;
15401e04c3fSmrg
15501e04c3fSmrg        vir_for_each_block(block, c) {
15601e04c3fSmrg                block->start_ip = ip;
15701e04c3fSmrg
1587ec681f3Smrg                memset(partial_update, 0,
1597ec681f3Smrg                       sizeof(struct partial_update_state) * c->num_temps);
1607ec681f3Smrg
1617ec681f3Smrg                struct qinst *flags_inst = NULL;
16201e04c3fSmrg
16301e04c3fSmrg                vir_for_each_inst(inst, block) {
1647ec681f3Smrg                        for (int i = 0; i < vir_get_nsrc(inst); i++) {
1657ec681f3Smrg                                vir_setup_use(c, block, ip, partial_update,
1667ec681f3Smrg                                              inst, inst->src[i], flags_inst);
1677ec681f3Smrg                        }
1687ec681f3Smrg
1697ec681f3Smrg                        vir_setup_def(c, block, ip, partial_update,
1707ec681f3Smrg                                      inst, flags_inst);
17101e04c3fSmrg
1727ec681f3Smrg                        if (inst->qpu.flags.apf != V3D_QPU_PF_NONE ||
1737ec681f3Smrg                            inst->qpu.flags.mpf != V3D_QPU_PF_NONE) {
1747ec681f3Smrg                               flags_inst = inst;
1757ec681f3Smrg                        }
17601e04c3fSmrg
1777ec681f3Smrg                        if (inst->qpu.flags.auf != V3D_QPU_UF_NONE ||
1787ec681f3Smrg                            inst->qpu.flags.muf != V3D_QPU_UF_NONE) {
1797ec681f3Smrg                                flags_inst = NULL;
1807ec681f3Smrg                        }
18101e04c3fSmrg
18201e04c3fSmrg                        /* Payload registers: r0/1/2 contain W, centroid W,
18301e04c3fSmrg                         * and Z at program start.  Register allocation will
18401e04c3fSmrg                         * force their nodes to R0/1/2.
18501e04c3fSmrg                         */
18601e04c3fSmrg                        if (inst->src[0].file == QFILE_REG) {
18701e04c3fSmrg                                switch (inst->src[0].index) {
18801e04c3fSmrg                                case 0:
18901e04c3fSmrg                                case 1:
19001e04c3fSmrg                                case 2:
19101e04c3fSmrg                                        c->temp_start[inst->dst.index] = 0;
19201e04c3fSmrg                                        break;
19301e04c3fSmrg                                }
19401e04c3fSmrg                        }
19501e04c3fSmrg
19601e04c3fSmrg                        ip++;
19701e04c3fSmrg                }
19801e04c3fSmrg                block->end_ip = ip;
19901e04c3fSmrg        }
20001e04c3fSmrg
2017ec681f3Smrg        ralloc_free(partial_update);
20201e04c3fSmrg}
20301e04c3fSmrg
20401e04c3fSmrgstatic bool
20501e04c3fSmrgvir_live_variables_dataflow(struct v3d_compile *c, int bitset_words)
20601e04c3fSmrg{
20701e04c3fSmrg        bool cont = false;
20801e04c3fSmrg
20901e04c3fSmrg        vir_for_each_block_rev(block, c) {
21001e04c3fSmrg                /* Update live_out: Any successor using the variable
21101e04c3fSmrg                 * on entrance needs us to have the variable live on
21201e04c3fSmrg                 * exit.
21301e04c3fSmrg                 */
21401e04c3fSmrg                vir_for_each_successor(succ, block) {
21501e04c3fSmrg                        for (int i = 0; i < bitset_words; i++) {
21601e04c3fSmrg                                BITSET_WORD new_live_out = (succ->live_in[i] &
21701e04c3fSmrg                                                            ~block->live_out[i]);
21801e04c3fSmrg                                if (new_live_out) {
21901e04c3fSmrg                                        block->live_out[i] |= new_live_out;
22001e04c3fSmrg                                        cont = true;
22101e04c3fSmrg                                }
22201e04c3fSmrg                        }
22301e04c3fSmrg                }
22401e04c3fSmrg
22501e04c3fSmrg                /* Update live_in */
22601e04c3fSmrg                for (int i = 0; i < bitset_words; i++) {
22701e04c3fSmrg                        BITSET_WORD new_live_in = (block->use[i] |
22801e04c3fSmrg                                                   (block->live_out[i] &
22901e04c3fSmrg                                                    ~block->def[i]));
23001e04c3fSmrg                        if (new_live_in & ~block->live_in[i]) {
23101e04c3fSmrg                                block->live_in[i] |= new_live_in;
23201e04c3fSmrg                                cont = true;
23301e04c3fSmrg                        }
23401e04c3fSmrg                }
23501e04c3fSmrg        }
23601e04c3fSmrg
23701e04c3fSmrg        return cont;
23801e04c3fSmrg}
23901e04c3fSmrg
240ed98bd31Smayastatic bool
241ed98bd31Smayavir_live_variables_defin_defout_dataflow(struct v3d_compile *c, int bitset_words)
242ed98bd31Smaya{
243ed98bd31Smaya        bool cont = false;
244ed98bd31Smaya
245ed98bd31Smaya        vir_for_each_block_rev(block, c) {
246ed98bd31Smaya                /* Propagate defin/defout down the successors to produce the
247ed98bd31Smaya                 * union of blocks with a reachable (partial) definition of
248ed98bd31Smaya                 * the var.
249ed98bd31Smaya                 *
250ed98bd31Smaya                 * This keeps a conditional first write to a reg from
251ed98bd31Smaya                 * extending its lifetime back to the start of the program.
252ed98bd31Smaya                 */
253ed98bd31Smaya                vir_for_each_successor(succ, block) {
254ed98bd31Smaya                        for (int i = 0; i < bitset_words; i++) {
255ed98bd31Smaya                                BITSET_WORD new_def = (block->defout[i] &
256ed98bd31Smaya                                                       ~succ->defin[i]);
257ed98bd31Smaya                                succ->defin[i] |= new_def;
258ed98bd31Smaya                                succ->defout[i] |= new_def;
259ed98bd31Smaya                                cont |= new_def;
260ed98bd31Smaya                        }
261ed98bd31Smaya                }
262ed98bd31Smaya        }
263ed98bd31Smaya
264ed98bd31Smaya        return cont;
265ed98bd31Smaya}
266ed98bd31Smaya
26701e04c3fSmrg/**
26801e04c3fSmrg * Extend the start/end ranges for each variable to account for the
26901e04c3fSmrg * new information calculated from control flow.
27001e04c3fSmrg */
27101e04c3fSmrgstatic void
27201e04c3fSmrgvir_compute_start_end(struct v3d_compile *c, int num_vars)
27301e04c3fSmrg{
27401e04c3fSmrg        vir_for_each_block(block, c) {
27501e04c3fSmrg                for (int i = 0; i < num_vars; i++) {
276ed98bd31Smaya                        if (BITSET_TEST(block->live_in, i) &&
277ed98bd31Smaya                            BITSET_TEST(block->defin, i)) {
27801e04c3fSmrg                                c->temp_start[i] = MIN2(c->temp_start[i],
27901e04c3fSmrg                                                        block->start_ip);
28001e04c3fSmrg                                c->temp_end[i] = MAX2(c->temp_end[i],
28101e04c3fSmrg                                                      block->start_ip);
28201e04c3fSmrg                        }
28301e04c3fSmrg
284ed98bd31Smaya                        if (BITSET_TEST(block->live_out, i) &&
285ed98bd31Smaya                            BITSET_TEST(block->defout, i)) {
28601e04c3fSmrg                                c->temp_start[i] = MIN2(c->temp_start[i],
28701e04c3fSmrg                                                        block->end_ip);
28801e04c3fSmrg                                c->temp_end[i] = MAX2(c->temp_end[i],
28901e04c3fSmrg                                                      block->end_ip);
29001e04c3fSmrg                        }
29101e04c3fSmrg                }
29201e04c3fSmrg        }
29301e04c3fSmrg}
29401e04c3fSmrg
29501e04c3fSmrgvoid
29601e04c3fSmrgvir_calculate_live_intervals(struct v3d_compile *c)
29701e04c3fSmrg{
29801e04c3fSmrg        int bitset_words = BITSET_WORDS(c->num_temps);
29901e04c3fSmrg
30001e04c3fSmrg        /* We may be called more than once if we've rearranged the program to
30101e04c3fSmrg         * try to get register allocation to succeed.
30201e04c3fSmrg         */
30301e04c3fSmrg        if (c->temp_start) {
30401e04c3fSmrg                ralloc_free(c->temp_start);
30501e04c3fSmrg                ralloc_free(c->temp_end);
30601e04c3fSmrg
30701e04c3fSmrg                vir_for_each_block(block, c) {
30801e04c3fSmrg                        ralloc_free(block->def);
30901e04c3fSmrg                        ralloc_free(block->use);
31001e04c3fSmrg                        ralloc_free(block->live_in);
31101e04c3fSmrg                        ralloc_free(block->live_out);
31201e04c3fSmrg                }
31301e04c3fSmrg        }
31401e04c3fSmrg
31501e04c3fSmrg        c->temp_start = rzalloc_array(c, int, c->num_temps);
31601e04c3fSmrg        c->temp_end = rzalloc_array(c, int, c->num_temps);
31701e04c3fSmrg
31801e04c3fSmrg        for (int i = 0; i < c->num_temps; i++) {
31901e04c3fSmrg                c->temp_start[i] = MAX_INSTRUCTION;
32001e04c3fSmrg                c->temp_end[i] = -1;
32101e04c3fSmrg        }
32201e04c3fSmrg
32301e04c3fSmrg        vir_for_each_block(block, c) {
32401e04c3fSmrg                block->def = rzalloc_array(c, BITSET_WORD, bitset_words);
325ed98bd31Smaya                block->defin = rzalloc_array(c, BITSET_WORD, bitset_words);
326ed98bd31Smaya                block->defout = rzalloc_array(c, BITSET_WORD, bitset_words);
32701e04c3fSmrg                block->use = rzalloc_array(c, BITSET_WORD, bitset_words);
32801e04c3fSmrg                block->live_in = rzalloc_array(c, BITSET_WORD, bitset_words);
32901e04c3fSmrg                block->live_out = rzalloc_array(c, BITSET_WORD, bitset_words);
33001e04c3fSmrg        }
33101e04c3fSmrg
33201e04c3fSmrg        vir_setup_def_use(c);
33301e04c3fSmrg
33401e04c3fSmrg        while (vir_live_variables_dataflow(c, bitset_words))
33501e04c3fSmrg                ;
33601e04c3fSmrg
337ed98bd31Smaya        while (vir_live_variables_defin_defout_dataflow(c, bitset_words))
338ed98bd31Smaya                ;
339ed98bd31Smaya
34001e04c3fSmrg        vir_compute_start_end(c, c->num_temps);
34101e04c3fSmrg
34201e04c3fSmrg        c->live_intervals_valid = true;
34301e04c3fSmrg}
344