1/*
2 * Copyright © 2012 Intel Corporation
3 * Copyright © 2016 Broadcom
4 *
5 * Permission is hereby granted, free of charge, to any person obtaining a
6 * copy of this software and associated documentation files (the "Software"),
7 * to deal in the Software without restriction, including without limitation
8 * the rights to use, copy, modify, merge, publish, distribute, sublicense,
9 * and/or sell copies of the Software, and to permit persons to whom the
10 * Software is furnished to do so, subject to the following conditions:
11 *
12 * The above copyright notice and this permission notice (including the next
13 * paragraph) shall be included in all copies or substantial portions of the
14 * Software.
15 *
16 * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
17 * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
18 * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT.  IN NO EVENT SHALL
19 * THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
20 * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING
21 * FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS
22 * IN THE SOFTWARE.
23 */
24
25#define MAX_INSTRUCTION (1 << 30)
26
27#include "util/ralloc.h"
28#include "util/register_allocate.h"
29#include "vc4_context.h"
30#include "vc4_qir.h"
31
32struct partial_update_state {
33        struct qinst *insts[4];
34        uint8_t channels;
35};
36
37static int
38qir_reg_to_var(struct qreg reg)
39{
40        if (reg.file == QFILE_TEMP)
41                return reg.index;
42
43        return -1;
44}
45
46static void
47qir_setup_use(struct vc4_compile *c, struct qblock *block, int ip,
48              struct qreg src)
49{
50        int var = qir_reg_to_var(src);
51        if (var == -1)
52                return;
53
54        c->temp_start[var] = MIN2(c->temp_start[var], ip);
55        c->temp_end[var] = MAX2(c->temp_end[var], ip);
56
57        /* The use[] bitset marks when the block makes
58         * use of a variable without having completely
59         * defined that variable within the block.
60         */
61        if (!BITSET_TEST(block->def, var))
62                BITSET_SET(block->use, var);
63}
64
65static struct partial_update_state *
66get_partial_update_state(struct hash_table *partial_update_ht,
67                         struct qinst *inst)
68{
69        struct hash_entry *entry =
70                _mesa_hash_table_search(partial_update_ht,
71                                        &inst->dst.index);
72        if (entry)
73                return entry->data;
74
75        struct partial_update_state *state =
76                rzalloc(partial_update_ht, struct partial_update_state);
77
78        _mesa_hash_table_insert(partial_update_ht, &inst->dst.index, state);
79
80        return state;
81}
82
83static void
84qir_setup_def(struct vc4_compile *c, struct qblock *block, int ip,
85              struct hash_table *partial_update_ht, struct qinst *inst)
86{
87        /* The def[] bitset marks when an initialization in a
88         * block completely screens off previous updates of
89         * that variable.
90         */
91        int var = qir_reg_to_var(inst->dst);
92        if (var == -1)
93                return;
94
95        c->temp_start[var] = MIN2(c->temp_start[var], ip);
96        c->temp_end[var] = MAX2(c->temp_end[var], ip);
97
98        /* If we've already tracked this as a def, or already used it within
99         * the block, there's nothing to do.
100         */
101        if (BITSET_TEST(block->use, var) || BITSET_TEST(block->def, var))
102                return;
103
104        /* Easy, common case: unconditional full register update.
105         *
106         * We treat conditioning on the exec mask as the same as not being
107         * conditional.  This makes sure that if the register gets set on
108         * either side of an if, it is treated as being screened off before
109         * the if.  Otherwise, if there was no intervening def, its live
110         * interval doesn't extend back to the start of he program, and if too
111         * many registers did that we'd fail to register allocate.
112         */
113        if ((inst->cond == QPU_COND_ALWAYS ||
114             inst->cond_is_exec_mask) && !inst->dst.pack) {
115                BITSET_SET(block->def, var);
116                return;
117        }
118
119        /* Finally, look at the condition code and packing and mark it as a
120         * def.  We need to make sure that we understand sequences
121         * instructions like:
122         *
123         *     mov.zs t0, t1
124         *     mov.zc t0, t2
125         *
126         * or:
127         *
128         *     mmov t0.8a, t1
129         *     mmov t0.8b, t2
130         *     mmov t0.8c, t3
131         *     mmov t0.8d, t4
132         *
133         * as defining the temp within the block, because otherwise dst's live
134         * range will get extended up the control flow to the top of the
135         * program.
136         */
137        struct partial_update_state *state =
138                get_partial_update_state(partial_update_ht, inst);
139        uint8_t mask = qir_channels_written(inst);
140
141        if (inst->cond == QPU_COND_ALWAYS) {
142                state->channels |= mask;
143        } else {
144                for (int i = 0; i < 4; i++) {
145                        if (!(mask & (1 << i)))
146                                continue;
147
148                        if (state->insts[i] &&
149                            state->insts[i]->cond ==
150                            qpu_cond_complement(inst->cond))
151                                state->channels |= 1 << i;
152                        else
153                                state->insts[i] = inst;
154                }
155        }
156
157        if (state->channels == 0xf)
158                BITSET_SET(block->def, var);
159}
160
161static void
162sf_state_clear(struct hash_table *partial_update_ht)
163{
164        hash_table_foreach(partial_update_ht, entry) {
165                struct partial_update_state *state = entry->data;
166
167                for (int i = 0; i < 4; i++) {
168                        if (state->insts[i] && state->insts[i]->cond)
169                                state->insts[i] = NULL;
170                }
171        }
172}
173
174/* Sets up the def/use arrays for when variables are used-before-defined or
175 * defined-before-used in the block.
176 *
177 * Also initializes the temp_start/temp_end to cover just the instruction IPs
178 * where the variable is used, which will be extended later in
179 * qir_compute_start_end().
180 */
181static void
182qir_setup_def_use(struct vc4_compile *c)
183{
184        struct hash_table *partial_update_ht =
185                _mesa_hash_table_create(c, _mesa_hash_int, _mesa_key_int_equal);
186        int ip = 0;
187
188        qir_for_each_block(block, c) {
189                block->start_ip = ip;
190
191                _mesa_hash_table_clear(partial_update_ht, NULL);
192
193                qir_for_each_inst(inst, block) {
194                        for (int i = 0; i < qir_get_nsrc(inst); i++)
195                                qir_setup_use(c, block, ip, inst->src[i]);
196
197                        qir_setup_def(c, block, ip, partial_update_ht, inst);
198
199                        if (inst->sf)
200                                sf_state_clear(partial_update_ht);
201
202                        switch (inst->op) {
203                        case QOP_FRAG_Z:
204                        case QOP_FRAG_W:
205                                /* The payload registers have values
206                                 * implicitly loaded at the start of the
207                                 * program.
208                                 */
209                                if (inst->dst.file == QFILE_TEMP)
210                                        c->temp_start[inst->dst.index] = 0;
211                                break;
212                        default:
213                                break;
214                        }
215                        ip++;
216                }
217                block->end_ip = ip;
218        }
219
220        _mesa_hash_table_destroy(partial_update_ht, NULL);
221}
222
223static bool
224qir_live_variables_dataflow(struct vc4_compile *c, int bitset_words)
225{
226        bool cont = false;
227
228        qir_for_each_block_rev(block, c) {
229                /* Update live_out: Any successor using the variable
230                 * on entrance needs us to have the variable live on
231                 * exit.
232                 */
233                qir_for_each_successor(succ, block) {
234                        for (int i = 0; i < bitset_words; i++) {
235                                BITSET_WORD new_live_out = (succ->live_in[i] &
236                                                            ~block->live_out[i]);
237                                if (new_live_out) {
238                                        block->live_out[i] |= new_live_out;
239                                        cont = true;
240                                }
241                        }
242                }
243
244                /* Update live_in */
245                for (int i = 0; i < bitset_words; i++) {
246                        BITSET_WORD new_live_in = (block->use[i] |
247                                                   (block->live_out[i] &
248                                                    ~block->def[i]));
249                        if (new_live_in & ~block->live_in[i]) {
250                                block->live_in[i] |= new_live_in;
251                                cont = true;
252                        }
253                }
254        }
255
256        return cont;
257}
258
259/**
260 * Extend the start/end ranges for each variable to account for the
261 * new information calculated from control flow.
262 */
263static void
264qir_compute_start_end(struct vc4_compile *c, int num_vars)
265{
266        qir_for_each_block(block, c) {
267                for (int i = 0; i < num_vars; i++) {
268                        if (BITSET_TEST(block->live_in, i)) {
269                                c->temp_start[i] = MIN2(c->temp_start[i],
270                                                        block->start_ip);
271                                c->temp_end[i] = MAX2(c->temp_end[i],
272                                                      block->start_ip);
273                        }
274
275                        if (BITSET_TEST(block->live_out, i)) {
276                                c->temp_start[i] = MIN2(c->temp_start[i],
277                                                        block->end_ip);
278                                c->temp_end[i] = MAX2(c->temp_end[i],
279                                                      block->end_ip);
280                        }
281                }
282        }
283}
284
285void
286qir_calculate_live_intervals(struct vc4_compile *c)
287{
288        int bitset_words = BITSET_WORDS(c->num_temps);
289
290        /* If we called this function more than once, then we should be
291         * freeing the previous arrays.
292         */
293        assert(!c->temp_start);
294
295        c->temp_start = rzalloc_array(c, int, c->num_temps);
296        c->temp_end = rzalloc_array(c, int, c->num_temps);
297
298        for (int i = 0; i < c->num_temps; i++) {
299                c->temp_start[i] = MAX_INSTRUCTION;
300                c->temp_end[i] = -1;
301        }
302
303        qir_for_each_block(block, c) {
304                block->def = rzalloc_array(c, BITSET_WORD, bitset_words);
305                block->use = rzalloc_array(c, BITSET_WORD, bitset_words);
306                block->live_in = rzalloc_array(c, BITSET_WORD, bitset_words);
307                block->live_out = rzalloc_array(c, BITSET_WORD, bitset_words);
308        }
309
310        qir_setup_def_use(c);
311
312        while (qir_live_variables_dataflow(c, bitset_words))
313                ;
314
315        qir_compute_start_end(c, c->num_temps);
316
317        if (vc4_debug & VC4_DEBUG_SHADERDB) {
318                int last_ip = 0;
319                for (int i = 0; i < c->num_temps; i++)
320                        last_ip = MAX2(last_ip, c->temp_end[i]);
321
322                int reg_pressure = 0;
323                int max_reg_pressure = 0;
324                for (int i = 0; i < last_ip; i++) {
325                        for (int j = 0; j < c->num_temps; j++) {
326                                if (c->temp_start[j] == i)
327                                        reg_pressure++;
328                                if (c->temp_end[j] == i)
329                                        reg_pressure--;
330                        }
331                        max_reg_pressure = MAX2(max_reg_pressure, reg_pressure);
332                }
333
334                fprintf(stderr, "SHADER-DB: %s prog %d/%d: %d max temps\n",
335                        qir_get_stage_name(c->stage),
336                        c->program_id, c->variant_id,
337                        max_reg_pressure);
338        }
339}
340