1/*
2 * Copyright © 2012 Intel Corporation
3 * Copyright © 2016 Broadcom
4 *
5 * Permission is hereby granted, free of charge, to any person obtaining a
6 * copy of this software and associated documentation files (the "Software"),
7 * to deal in the Software without restriction, including without limitation
8 * the rights to use, copy, modify, merge, publish, distribute, sublicense,
9 * and/or sell copies of the Software, and to permit persons to whom the
10 * Software is furnished to do so, subject to the following conditions:
11 *
12 * The above copyright notice and this permission notice (including the next
13 * paragraph) shall be included in all copies or substantial portions of the
14 * Software.
15 *
16 * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
17 * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
18 * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT.  IN NO EVENT SHALL
19 * THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
20 * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING
21 * FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS
22 * IN THE SOFTWARE.
23 */
24
25#define MAX_INSTRUCTION (1 << 30)
26
27#include "util/ralloc.h"
28#include "util/register_allocate.h"
29#include "v3d_compiler.h"
30
31struct partial_update_state {
32        struct qinst *insts[4];
33        uint8_t channels;
34};
35
36static uint32_t
37int_hash(const void *key)
38{
39        return _mesa_hash_data(key, sizeof(int));
40}
41
42static bool
43int_compare(const void *key1, const void *key2)
44{
45        return *(const int *)key1 == *(const int *)key2;
46}
47
48static int
49vir_reg_to_var(struct qreg reg)
50{
51        if (reg.file == QFILE_TEMP)
52                return reg.index;
53
54        return -1;
55}
56
57static void
58vir_setup_use(struct v3d_compile *c, struct qblock *block, int ip,
59              struct qreg src)
60{
61        int var = vir_reg_to_var(src);
62        if (var == -1)
63                return;
64
65        c->temp_start[var] = MIN2(c->temp_start[var], ip);
66        c->temp_end[var] = MAX2(c->temp_end[var], ip);
67
68        /* The use[] bitset marks when the block makes
69         * use of a variable without having completely
70         * defined that variable within the block.
71         */
72        if (!BITSET_TEST(block->def, var))
73                BITSET_SET(block->use, var);
74}
75
76static struct partial_update_state *
77get_partial_update_state(struct hash_table *partial_update_ht,
78                         struct qinst *inst)
79{
80        struct hash_entry *entry =
81                _mesa_hash_table_search(partial_update_ht,
82                                        &inst->dst.index);
83        if (entry)
84                return entry->data;
85
86        struct partial_update_state *state =
87                rzalloc(partial_update_ht, struct partial_update_state);
88
89        _mesa_hash_table_insert(partial_update_ht, &inst->dst.index, state);
90
91        return state;
92}
93
94static void
95vir_setup_def(struct v3d_compile *c, struct qblock *block, int ip,
96              struct hash_table *partial_update_ht, struct qinst *inst)
97{
98        if (inst->qpu.type != V3D_QPU_INSTR_TYPE_ALU)
99                return;
100
101        /* The def[] bitset marks when an initialization in a
102         * block completely screens off previous updates of
103         * that variable.
104         */
105        int var = vir_reg_to_var(inst->dst);
106        if (var == -1)
107                return;
108
109        c->temp_start[var] = MIN2(c->temp_start[var], ip);
110        c->temp_end[var] = MAX2(c->temp_end[var], ip);
111
112        /* Mark the block as having a (partial) def of the var. */
113        BITSET_SET(block->defout, var);
114
115        /* If we've already tracked this as a def that screens off previous
116         * uses, or already used it within the block, there's nothing to do.
117         */
118        if (BITSET_TEST(block->use, var) || BITSET_TEST(block->def, var))
119                return;
120
121        /* Easy, common case: unconditional full register update.*/
122        if ((inst->qpu.flags.ac == V3D_QPU_COND_NONE &&
123             inst->qpu.flags.mc == V3D_QPU_COND_NONE) &&
124            inst->qpu.alu.add.output_pack == V3D_QPU_PACK_NONE &&
125            inst->qpu.alu.mul.output_pack == V3D_QPU_PACK_NONE) {
126                BITSET_SET(block->def, var);
127                return;
128        }
129
130        /* Finally, look at the condition code and packing and mark it as a
131         * def.  We need to make sure that we understand sequences
132         * instructions like:
133         *
134         *     mov.zs t0, t1
135         *     mov.zc t0, t2
136         *
137         * or:
138         *
139         *     mmov t0.8a, t1
140         *     mmov t0.8b, t2
141         *     mmov t0.8c, t3
142         *     mmov t0.8d, t4
143         *
144         * as defining the temp within the block, because otherwise dst's live
145         * range will get extended up the control flow to the top of the
146         * program.
147         */
148        struct partial_update_state *state =
149                get_partial_update_state(partial_update_ht, inst);
150        uint8_t mask = 0xf; /* XXX vir_channels_written(inst); */
151
152        if (inst->qpu.flags.ac == V3D_QPU_COND_NONE &&
153            inst->qpu.flags.mc == V3D_QPU_COND_NONE) {
154                state->channels |= mask;
155        } else {
156                for (int i = 0; i < 4; i++) {
157                        if (!(mask & (1 << i)))
158                                continue;
159
160                        /* XXXif (state->insts[i] &&
161                            state->insts[i]->cond ==
162                            qpu_cond_complement(inst->cond))
163                                state->channels |= 1 << i;
164                        else
165                        */
166                                state->insts[i] = inst;
167                }
168        }
169
170        if (state->channels == 0xf)
171                BITSET_SET(block->def, var);
172}
173
174static void
175sf_state_clear(struct hash_table *partial_update_ht)
176{
177        hash_table_foreach(partial_update_ht, entry) {
178                struct partial_update_state *state = entry->data;
179
180                for (int i = 0; i < 4; i++) {
181                        if (state->insts[i] &&
182                            (state->insts[i]->qpu.flags.ac != V3D_QPU_COND_NONE ||
183                             state->insts[i]->qpu.flags.mc != V3D_QPU_COND_NONE))
184                                state->insts[i] = NULL;
185                }
186        }
187}
188
189/* Sets up the def/use arrays for when variables are used-before-defined or
190 * defined-before-used in the block.
191 *
192 * Also initializes the temp_start/temp_end to cover just the instruction IPs
193 * where the variable is used, which will be extended later in
194 * vir_compute_start_end().
195 */
196static void
197vir_setup_def_use(struct v3d_compile *c)
198{
199        struct hash_table *partial_update_ht =
200                _mesa_hash_table_create(c, int_hash, int_compare);
201        int ip = 0;
202
203        vir_for_each_block(block, c) {
204                block->start_ip = ip;
205
206                _mesa_hash_table_clear(partial_update_ht, NULL);
207
208                vir_for_each_inst(inst, block) {
209                        for (int i = 0; i < vir_get_nsrc(inst); i++)
210                                vir_setup_use(c, block, ip, inst->src[i]);
211
212                        vir_setup_def(c, block, ip, partial_update_ht, inst);
213
214                        if (false /* XXX inst->uf */)
215                                sf_state_clear(partial_update_ht);
216
217                        /* Payload registers: r0/1/2 contain W, centroid W,
218                         * and Z at program start.  Register allocation will
219                         * force their nodes to R0/1/2.
220                         */
221                        if (inst->src[0].file == QFILE_REG) {
222                                switch (inst->src[0].index) {
223                                case 0:
224                                case 1:
225                                case 2:
226                                        c->temp_start[inst->dst.index] = 0;
227                                        break;
228                                }
229                        }
230
231                        ip++;
232                }
233                block->end_ip = ip;
234        }
235
236        _mesa_hash_table_destroy(partial_update_ht, NULL);
237}
238
239static bool
240vir_live_variables_dataflow(struct v3d_compile *c, int bitset_words)
241{
242        bool cont = false;
243
244        vir_for_each_block_rev(block, c) {
245                /* Update live_out: Any successor using the variable
246                 * on entrance needs us to have the variable live on
247                 * exit.
248                 */
249                vir_for_each_successor(succ, block) {
250                        for (int i = 0; i < bitset_words; i++) {
251                                BITSET_WORD new_live_out = (succ->live_in[i] &
252                                                            ~block->live_out[i]);
253                                if (new_live_out) {
254                                        block->live_out[i] |= new_live_out;
255                                        cont = true;
256                                }
257                        }
258                }
259
260                /* Update live_in */
261                for (int i = 0; i < bitset_words; i++) {
262                        BITSET_WORD new_live_in = (block->use[i] |
263                                                   (block->live_out[i] &
264                                                    ~block->def[i]));
265                        if (new_live_in & ~block->live_in[i]) {
266                                block->live_in[i] |= new_live_in;
267                                cont = true;
268                        }
269                }
270        }
271
272        return cont;
273}
274
275static bool
276vir_live_variables_defin_defout_dataflow(struct v3d_compile *c, int bitset_words)
277{
278        bool cont = false;
279
280        vir_for_each_block_rev(block, c) {
281                /* Propagate defin/defout down the successors to produce the
282                 * union of blocks with a reachable (partial) definition of
283                 * the var.
284                 *
285                 * This keeps a conditional first write to a reg from
286                 * extending its lifetime back to the start of the program.
287                 */
288                vir_for_each_successor(succ, block) {
289                        for (int i = 0; i < bitset_words; i++) {
290                                BITSET_WORD new_def = (block->defout[i] &
291                                                       ~succ->defin[i]);
292                                succ->defin[i] |= new_def;
293                                succ->defout[i] |= new_def;
294                                cont |= new_def;
295                        }
296                }
297        }
298
299        return cont;
300}
301
302/**
303 * Extend the start/end ranges for each variable to account for the
304 * new information calculated from control flow.
305 */
306static void
307vir_compute_start_end(struct v3d_compile *c, int num_vars)
308{
309        vir_for_each_block(block, c) {
310                for (int i = 0; i < num_vars; i++) {
311                        if (BITSET_TEST(block->live_in, i) &&
312                            BITSET_TEST(block->defin, i)) {
313                                c->temp_start[i] = MIN2(c->temp_start[i],
314                                                        block->start_ip);
315                                c->temp_end[i] = MAX2(c->temp_end[i],
316                                                      block->start_ip);
317                        }
318
319                        if (BITSET_TEST(block->live_out, i) &&
320                            BITSET_TEST(block->defout, i)) {
321                                c->temp_start[i] = MIN2(c->temp_start[i],
322                                                        block->end_ip);
323                                c->temp_end[i] = MAX2(c->temp_end[i],
324                                                      block->end_ip);
325                        }
326                }
327        }
328}
329
330void
331vir_calculate_live_intervals(struct v3d_compile *c)
332{
333        int bitset_words = BITSET_WORDS(c->num_temps);
334
335        /* We may be called more than once if we've rearranged the program to
336         * try to get register allocation to succeed.
337         */
338        if (c->temp_start) {
339                ralloc_free(c->temp_start);
340                ralloc_free(c->temp_end);
341
342                vir_for_each_block(block, c) {
343                        ralloc_free(block->def);
344                        ralloc_free(block->use);
345                        ralloc_free(block->live_in);
346                        ralloc_free(block->live_out);
347                }
348        }
349
350        c->temp_start = rzalloc_array(c, int, c->num_temps);
351        c->temp_end = rzalloc_array(c, int, c->num_temps);
352
353        for (int i = 0; i < c->num_temps; i++) {
354                c->temp_start[i] = MAX_INSTRUCTION;
355                c->temp_end[i] = -1;
356        }
357
358        vir_for_each_block(block, c) {
359                block->def = rzalloc_array(c, BITSET_WORD, bitset_words);
360                block->defin = rzalloc_array(c, BITSET_WORD, bitset_words);
361                block->defout = rzalloc_array(c, BITSET_WORD, bitset_words);
362                block->use = rzalloc_array(c, BITSET_WORD, bitset_words);
363                block->live_in = rzalloc_array(c, BITSET_WORD, bitset_words);
364                block->live_out = rzalloc_array(c, BITSET_WORD, bitset_words);
365        }
366
367        vir_setup_def_use(c);
368
369        while (vir_live_variables_dataflow(c, bitset_words))
370                ;
371
372        while (vir_live_variables_defin_defout_dataflow(c, bitset_words))
373                ;
374
375        vir_compute_start_end(c, c->num_temps);
376
377        c->live_intervals_valid = true;
378}
379