1/*
2 * Copyright © 2012 Intel Corporation
3 * Copyright © 2016 Broadcom
4 *
5 * Permission is hereby granted, free of charge, to any person obtaining a
6 * copy of this software and associated documentation files (the "Software"),
7 * to deal in the Software without restriction, including without limitation
8 * the rights to use, copy, modify, merge, publish, distribute, sublicense,
9 * and/or sell copies of the Software, and to permit persons to whom the
10 * Software is furnished to do so, subject to the following conditions:
11 *
12 * The above copyright notice and this permission notice (including the next
13 * paragraph) shall be included in all copies or substantial portions of the
14 * Software.
15 *
16 * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
17 * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
18 * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT.  IN NO EVENT SHALL
19 * THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
20 * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING
21 * FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS
22 * IN THE SOFTWARE.
23 */
24
25#define MAX_INSTRUCTION (1 << 30)
26
27#include "util/ralloc.h"
28#include "util/register_allocate.h"
29#include "vc4_context.h"
30#include "vc4_qir.h"
31
32struct partial_update_state {
33        struct qinst *insts[4];
34        uint8_t channels;
35};
36
37static uint32_t
38int_hash(const void *key)
39{
40        return _mesa_hash_data(key, sizeof(int));
41}
42
43static bool
44int_compare(const void *key1, const void *key2)
45{
46        return *(const int *)key1 == *(const int *)key2;
47}
48
49static int
50qir_reg_to_var(struct qreg reg)
51{
52        if (reg.file == QFILE_TEMP)
53                return reg.index;
54
55        return -1;
56}
57
58static void
59qir_setup_use(struct vc4_compile *c, struct qblock *block, int ip,
60              struct qreg src)
61{
62        int var = qir_reg_to_var(src);
63        if (var == -1)
64                return;
65
66        c->temp_start[var] = MIN2(c->temp_start[var], ip);
67        c->temp_end[var] = MAX2(c->temp_end[var], ip);
68
69        /* The use[] bitset marks when the block makes
70         * use of a variable without having completely
71         * defined that variable within the block.
72         */
73        if (!BITSET_TEST(block->def, var))
74                BITSET_SET(block->use, var);
75}
76
77static struct partial_update_state *
78get_partial_update_state(struct hash_table *partial_update_ht,
79                         struct qinst *inst)
80{
81        struct hash_entry *entry =
82                _mesa_hash_table_search(partial_update_ht,
83                                        &inst->dst.index);
84        if (entry)
85                return entry->data;
86
87        struct partial_update_state *state =
88                rzalloc(partial_update_ht, struct partial_update_state);
89
90        _mesa_hash_table_insert(partial_update_ht, &inst->dst.index, state);
91
92        return state;
93}
94
95static void
96qir_setup_def(struct vc4_compile *c, struct qblock *block, int ip,
97              struct hash_table *partial_update_ht, struct qinst *inst)
98{
99        /* The def[] bitset marks when an initialization in a
100         * block completely screens off previous updates of
101         * that variable.
102         */
103        int var = qir_reg_to_var(inst->dst);
104        if (var == -1)
105                return;
106
107        c->temp_start[var] = MIN2(c->temp_start[var], ip);
108        c->temp_end[var] = MAX2(c->temp_end[var], ip);
109
110        /* If we've already tracked this as a def, or already used it within
111         * the block, there's nothing to do.
112         */
113        if (BITSET_TEST(block->use, var) || BITSET_TEST(block->def, var))
114                return;
115
116        /* Easy, common case: unconditional full register update.
117         *
118         * We treat conditioning on the exec mask as the same as not being
119         * conditional.  This makes sure that if the register gets set on
120         * either side of an if, it is treated as being screened off before
121         * the if.  Otherwise, if there was no intervening def, its live
122         * interval doesn't extend back to the start of he program, and if too
123         * many registers did that we'd fail to register allocate.
124         */
125        if ((inst->cond == QPU_COND_ALWAYS ||
126             inst->cond_is_exec_mask) && !inst->dst.pack) {
127                BITSET_SET(block->def, var);
128                return;
129        }
130
131        /* Finally, look at the condition code and packing and mark it as a
132         * def.  We need to make sure that we understand sequences
133         * instructions like:
134         *
135         *     mov.zs t0, t1
136         *     mov.zc t0, t2
137         *
138         * or:
139         *
140         *     mmov t0.8a, t1
141         *     mmov t0.8b, t2
142         *     mmov t0.8c, t3
143         *     mmov t0.8d, t4
144         *
145         * as defining the temp within the block, because otherwise dst's live
146         * range will get extended up the control flow to the top of the
147         * program.
148         */
149        struct partial_update_state *state =
150                get_partial_update_state(partial_update_ht, inst);
151        uint8_t mask = qir_channels_written(inst);
152
153        if (inst->cond == QPU_COND_ALWAYS) {
154                state->channels |= mask;
155        } else {
156                for (int i = 0; i < 4; i++) {
157                        if (!(mask & (1 << i)))
158                                continue;
159
160                        if (state->insts[i] &&
161                            state->insts[i]->cond ==
162                            qpu_cond_complement(inst->cond))
163                                state->channels |= 1 << i;
164                        else
165                                state->insts[i] = inst;
166                }
167        }
168
169        if (state->channels == 0xf)
170                BITSET_SET(block->def, var);
171}
172
173static void
174sf_state_clear(struct hash_table *partial_update_ht)
175{
176        hash_table_foreach(partial_update_ht, entry) {
177                struct partial_update_state *state = entry->data;
178
179                for (int i = 0; i < 4; i++) {
180                        if (state->insts[i] && state->insts[i]->cond)
181                                state->insts[i] = NULL;
182                }
183        }
184}
185
186/* Sets up the def/use arrays for when variables are used-before-defined or
187 * defined-before-used in the block.
188 *
189 * Also initializes the temp_start/temp_end to cover just the instruction IPs
190 * where the variable is used, which will be extended later in
191 * qir_compute_start_end().
192 */
193static void
194qir_setup_def_use(struct vc4_compile *c)
195{
196        struct hash_table *partial_update_ht =
197                _mesa_hash_table_create(c, int_hash, int_compare);
198        int ip = 0;
199
200        qir_for_each_block(block, c) {
201                block->start_ip = ip;
202
203                _mesa_hash_table_clear(partial_update_ht, NULL);
204
205                qir_for_each_inst(inst, block) {
206                        for (int i = 0; i < qir_get_nsrc(inst); i++)
207                                qir_setup_use(c, block, ip, inst->src[i]);
208
209                        qir_setup_def(c, block, ip, partial_update_ht, inst);
210
211                        if (inst->sf)
212                                sf_state_clear(partial_update_ht);
213
214                        switch (inst->op) {
215                        case QOP_FRAG_Z:
216                        case QOP_FRAG_W:
217                                /* The payload registers have values
218                                 * implicitly loaded at the start of the
219                                 * program.
220                                 */
221                                if (inst->dst.file == QFILE_TEMP)
222                                        c->temp_start[inst->dst.index] = 0;
223                                break;
224                        default:
225                                break;
226                        }
227                        ip++;
228                }
229                block->end_ip = ip;
230        }
231
232        _mesa_hash_table_destroy(partial_update_ht, NULL);
233}
234
235static bool
236qir_live_variables_dataflow(struct vc4_compile *c, int bitset_words)
237{
238        bool cont = false;
239
240        qir_for_each_block_rev(block, c) {
241                /* Update live_out: Any successor using the variable
242                 * on entrance needs us to have the variable live on
243                 * exit.
244                 */
245                qir_for_each_successor(succ, block) {
246                        for (int i = 0; i < bitset_words; i++) {
247                                BITSET_WORD new_live_out = (succ->live_in[i] &
248                                                            ~block->live_out[i]);
249                                if (new_live_out) {
250                                        block->live_out[i] |= new_live_out;
251                                        cont = true;
252                                }
253                        }
254                }
255
256                /* Update live_in */
257                for (int i = 0; i < bitset_words; i++) {
258                        BITSET_WORD new_live_in = (block->use[i] |
259                                                   (block->live_out[i] &
260                                                    ~block->def[i]));
261                        if (new_live_in & ~block->live_in[i]) {
262                                block->live_in[i] |= new_live_in;
263                                cont = true;
264                        }
265                }
266        }
267
268        return cont;
269}
270
271/**
272 * Extend the start/end ranges for each variable to account for the
273 * new information calculated from control flow.
274 */
275static void
276qir_compute_start_end(struct vc4_compile *c, int num_vars)
277{
278        qir_for_each_block(block, c) {
279                for (int i = 0; i < num_vars; i++) {
280                        if (BITSET_TEST(block->live_in, i)) {
281                                c->temp_start[i] = MIN2(c->temp_start[i],
282                                                        block->start_ip);
283                                c->temp_end[i] = MAX2(c->temp_end[i],
284                                                      block->start_ip);
285                        }
286
287                        if (BITSET_TEST(block->live_out, i)) {
288                                c->temp_start[i] = MIN2(c->temp_start[i],
289                                                        block->end_ip);
290                                c->temp_end[i] = MAX2(c->temp_end[i],
291                                                      block->end_ip);
292                        }
293                }
294        }
295}
296
297void
298qir_calculate_live_intervals(struct vc4_compile *c)
299{
300        int bitset_words = BITSET_WORDS(c->num_temps);
301
302        /* If we called this function more than once, then we should be
303         * freeing the previous arrays.
304         */
305        assert(!c->temp_start);
306
307        c->temp_start = rzalloc_array(c, int, c->num_temps);
308        c->temp_end = rzalloc_array(c, int, c->num_temps);
309
310        for (int i = 0; i < c->num_temps; i++) {
311                c->temp_start[i] = MAX_INSTRUCTION;
312                c->temp_end[i] = -1;
313        }
314
315        qir_for_each_block(block, c) {
316                block->def = rzalloc_array(c, BITSET_WORD, bitset_words);
317                block->use = rzalloc_array(c, BITSET_WORD, bitset_words);
318                block->live_in = rzalloc_array(c, BITSET_WORD, bitset_words);
319                block->live_out = rzalloc_array(c, BITSET_WORD, bitset_words);
320        }
321
322        qir_setup_def_use(c);
323
324        while (qir_live_variables_dataflow(c, bitset_words))
325                ;
326
327        qir_compute_start_end(c, c->num_temps);
328
329        if (vc4_debug & VC4_DEBUG_SHADERDB) {
330                int last_ip = 0;
331                for (int i = 0; i < c->num_temps; i++)
332                        last_ip = MAX2(last_ip, c->temp_end[i]);
333
334                int reg_pressure = 0;
335                int max_reg_pressure = 0;
336                for (int i = 0; i < last_ip; i++) {
337                        for (int j = 0; j < c->num_temps; j++) {
338                                if (c->temp_start[j] == i)
339                                        reg_pressure++;
340                                if (c->temp_end[j] == i)
341                                        reg_pressure--;
342                        }
343                        max_reg_pressure = MAX2(max_reg_pressure, reg_pressure);
344                }
345
346                fprintf(stderr, "SHADER-DB: %s prog %d/%d: %d max temps\n",
347                        qir_get_stage_name(c->stage),
348                        c->program_id, c->variant_id,
349                        max_reg_pressure);
350        }
351}
352