1/*
2 * Copyright (C) 2019 Collabora, Ltd.
3 *
4 * Permission is hereby granted, free of charge, to any person obtaining a
5 * copy of this software and associated documentation files (the "Software"),
6 * to deal in the Software without restriction, including without limitation
7 * the rights to use, copy, modify, merge, publish, distribute, sublicense,
8 * and/or sell copies of the Software, and to permit persons to whom the
9 * Software is furnished to do so, subject to the following conditions:
10 *
11 * The above copyright notice and this permission notice (including the next
12 * paragraph) shall be included in all copies or substantial portions of the
13 * Software.
14 *
15 * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
16 * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
17 * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT.  IN NO EVENT SHALL
18 * THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
19 * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
20 * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
21 * SOFTWARE.
22 *
23 * Authors (Collabora):
24 *    Alyssa Rosenzweig <alyssa.rosenzweig@collabora.com>
25 */
26
27#include "compiler.h"
28
29/* Midgard texture/derivative operations have a pair of bits controlling the
30 * behaviour of helper invocations:
31 *
32 *  - Should a helper invocation terminate after executing this instruction?
33 *  - Should a helper invocation actually execute this instruction?
34 *
35 * The terminate bit should be set on the last instruction requiring helper
36 * invocations. Without control flow, that's literally the last instruction;
37 * with control flow, there may be multiple such instructions (with ifs) or no
38 * such instruction (with loops).
39 *
40 * The execute bit should be set if the value of this instruction is required
41 * by a future instruction requiring helper invocations. Consider:
42 *
43 *      0 = texture ...
44 *      1 = fmul 0, #10
45 *      2 = dfdx 1
46 *      store 2
47 *
48 * Since the derivative calculation 2 requires helper invocations, the value 1
49 * must be calculated by helper invocations, and since it depends on 0, 0 must
50 * be calculated by helpers. Hence the texture op has the execute bit set, and
51 * the derivative op has the terminate bit set.
52 *
53 * Calculating the terminate bit occurs by forward dataflow analysis to
54 * determine which blocks require helper invocations. A block requires
55 * invocations in if any of its instructions use helper invocations, or if it
56 * depends on a block that requires invocation. With that analysis, the
57 * terminate bit is set on the last instruction using invocations within any
58 * block that does *not* require invocations out.
59 *
60 * Likewise, calculating the execute bit requires backward dataflow analysis
61 * with union as the join operation and the generating set being the union of
62 * sources of instructions writing executed values.
63 */
64
65/* Does a block use helpers directly */
66static bool
67mir_block_uses_helpers(gl_shader_stage stage, midgard_block *block)
68{
69        mir_foreach_instr_in_block(block, ins) {
70                if (ins->type != TAG_TEXTURE_4) continue;
71                if (mir_op_computes_derivatives(stage, ins->op))
72                        return true;
73        }
74
75        return false;
76}
77
78static bool
79mir_block_terminates_helpers(midgard_block *block)
80{
81        /* Can't terminate if there are no helpers */
82        if (!block->helpers_in)
83                return false;
84
85        /* Can't terminate if a successor needs helpers */
86        pan_foreach_successor((&block->base), succ) {
87                if (((midgard_block *) succ)->helpers_in)
88                        return false;
89        }
90
91        /* Otherwise we terminate */
92        return true;
93}
94
95void
96mir_analyze_helper_terminate(compiler_context *ctx)
97{
98        /* Set blocks as directly requiring helpers, and if they do add them to
99         * the worklist to propagate to their predecessors */
100
101        struct set *worklist = _mesa_set_create(NULL,
102                        _mesa_hash_pointer,
103                        _mesa_key_pointer_equal);
104
105        struct set *visited = _mesa_set_create(NULL,
106                        _mesa_hash_pointer,
107                        _mesa_key_pointer_equal);
108
109        mir_foreach_block(ctx, _block) {
110                midgard_block *block = (midgard_block *) _block;
111                block->helpers_in |= mir_block_uses_helpers(ctx->stage, block);
112
113                if (block->helpers_in)
114                        _mesa_set_add(worklist, _block);
115        }
116
117        /* Next, propagate back. Since there are a finite number of blocks, the
118         * worklist (a subset of all the blocks) is finite. Since a block can
119         * only be added to the worklist if it is not on the visited list and
120         * the visited list - also a subset of the blocks - grows every
121         * iteration, the algorithm must terminate. */
122
123        struct set_entry *cur;
124
125        while((cur = _mesa_set_next_entry(worklist, NULL)) != NULL) {
126                /* Pop off a block requiring helpers */
127                pan_block *blk = (struct pan_block *) cur->key;
128                _mesa_set_remove(worklist, cur);
129
130                /* Its predecessors also require helpers */
131                pan_foreach_predecessor(blk, pred) {
132                        if (!_mesa_set_search(visited, pred)) {
133                                ((midgard_block *) pred)->helpers_in = true;
134                                _mesa_set_add(worklist, pred);
135                        }
136                }
137
138                _mesa_set_add(visited, blk);
139        }
140
141        _mesa_set_destroy(visited, NULL);
142        _mesa_set_destroy(worklist, NULL);
143
144        /* Finally, set helper_terminate on the last derivative-calculating
145         * instruction in a block that terminates helpers */
146        mir_foreach_block(ctx, _block) {
147                midgard_block *block = (midgard_block *) _block;
148
149                if (!mir_block_terminates_helpers(block))
150                        continue;
151
152                mir_foreach_instr_in_block_rev(block, ins) {
153                        if (ins->type != TAG_TEXTURE_4) continue;
154                        if (!mir_op_computes_derivatives(ctx->stage, ins->op)) continue;
155
156                        ins->helper_terminate = true;
157                        break;
158                }
159        }
160}
161
162static bool
163mir_helper_block_update(BITSET_WORD *deps, pan_block *_block, unsigned temp_count)
164{
165        bool progress = false;
166        midgard_block *block = (midgard_block *) _block;
167
168        mir_foreach_instr_in_block_rev(block, ins) {
169                /* Ensure we write to a helper dependency */
170                if (ins->dest >= temp_count || !BITSET_TEST(deps, ins->dest))
171                        continue;
172
173                /* Then add all of our dependencies */
174                mir_foreach_src(ins, s) {
175                        if (ins->src[s] >= temp_count)
176                                continue;
177
178                        /* Progress if the dependency set changes */
179                        progress |= !BITSET_TEST(deps, ins->src[s]);
180                        BITSET_SET(deps, ins->src[s]);
181                }
182        }
183
184        return progress;
185}
186
187void
188mir_analyze_helper_requirements(compiler_context *ctx)
189{
190        mir_compute_temp_count(ctx);
191        unsigned temp_count = ctx->temp_count;
192        BITSET_WORD *deps = calloc(sizeof(BITSET_WORD), BITSET_WORDS(temp_count));
193
194        /* Initialize with the sources of instructions consuming
195         * derivatives */
196
197        mir_foreach_instr_global(ctx, ins) {
198                if (ins->type != TAG_TEXTURE_4) continue;
199                if (ins->dest >= ctx->temp_count) continue;
200                if (!mir_op_computes_derivatives(ctx->stage, ins->op)) continue;
201
202                mir_foreach_src(ins, s) {
203                        if (ins->src[s] < temp_count)
204                                BITSET_SET(deps, ins->src[s]);
205                }
206        }
207
208        /* Propagate that up */
209
210        struct set *work_list = _mesa_set_create(NULL,
211                        _mesa_hash_pointer,
212                        _mesa_key_pointer_equal);
213
214        struct set *visited = _mesa_set_create(NULL,
215                        _mesa_hash_pointer,
216                        _mesa_key_pointer_equal);
217
218        struct set_entry *cur = _mesa_set_add(work_list, pan_exit_block(&ctx->blocks));
219
220        do {
221                pan_block *blk = (struct pan_block *) cur->key;
222                _mesa_set_remove(work_list, cur);
223
224                bool progress = mir_helper_block_update(deps, blk, temp_count);
225
226                if (progress || !_mesa_set_search(visited, blk)) {
227                        pan_foreach_predecessor(blk, pred)
228                                _mesa_set_add(work_list, pred);
229                }
230
231                _mesa_set_add(visited, blk);
232        } while((cur = _mesa_set_next_entry(work_list, NULL)) != NULL);
233
234        _mesa_set_destroy(visited, NULL);
235        _mesa_set_destroy(work_list, NULL);
236
237        /* Set the execute bits */
238
239        mir_foreach_instr_global(ctx, ins) {
240                if (ins->type != TAG_TEXTURE_4) continue;
241                if (ins->dest >= ctx->temp_count) continue;
242
243                ins->helper_execute = BITSET_TEST(deps, ins->dest);
244        }
245
246        free(deps);
247}
248