1/*
2 * Copyright (C) 2020 Collabora, Ltd.
3 *
4 * Permission is hereby granted, free of charge, to any person obtaining a
5 * copy of this software and associated documentation files (the "Software"),
6 * to deal in the Software without restriction, including without limitation
7 * the rights to use, copy, modify, merge, publish, distribute, sublicense,
8 * and/or sell copies of the Software, and to permit persons to whom the
9 * Software is furnished to do so, subject to the following conditions:
10 *
11 * The above copyright notice and this permission notice (including the next
12 * paragraph) shall be included in all copies or substantial portions of the
13 * Software.
14 *
15 * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
16 * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
17 * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT.  IN NO EVENT SHALL
18 * THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
19 * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
20 * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
21 * SOFTWARE.
22 */
23
24#ifndef __PAN_IR_H
25#define __PAN_IR_H
26
27#include <stdint.h>
28#include "compiler/nir/nir.h"
29#include "util/u_dynarray.h"
30#include "util/hash_table.h"
31
32/* Indices for named (non-XFB) varyings that are present. These are packed
33 * tightly so they correspond to a bitfield present (P) indexed by (1 <<
34 * PAN_VARY_*). This has the nice property that you can lookup the buffer index
35 * of a given special field given a shift S by:
36 *
37 *      idx = popcount(P & ((1 << S) - 1))
38 *
39 * That is... look at all of the varyings that come earlier and count them, the
40 * count is the new index since plus one. Likewise, the total number of special
41 * buffers required is simply popcount(P)
42 */
43
44enum pan_special_varying {
45        PAN_VARY_GENERAL = 0,
46        PAN_VARY_POSITION = 1,
47        PAN_VARY_PSIZ = 2,
48        PAN_VARY_PNTCOORD = 3,
49        PAN_VARY_FACE = 4,
50        PAN_VARY_FRAGCOORD = 5,
51
52        /* Keep last */
53        PAN_VARY_MAX,
54};
55
56/* Maximum number of attribute descriptors required for varyings. These include
57 * up to MAX_VARYING source level varyings plus a descriptor each non-GENERAL
58 * special varying */
59#define PAN_MAX_VARYINGS (MAX_VARYING + PAN_VARY_MAX - 1)
60
61/* Define the general compiler entry point */
62
63#define MAX_SYSVAL_COUNT 32
64
65/* Allow 2D of sysval IDs, while allowing nonparametric sysvals to equal
66 * their class for equal comparison */
67
68#define PAN_SYSVAL(type, no) (((no) << 16) | PAN_SYSVAL_##type)
69#define PAN_SYSVAL_TYPE(sysval) ((sysval) & 0xffff)
70#define PAN_SYSVAL_ID(sysval) ((sysval) >> 16)
71
72/* Define some common types. We start at one for easy indexing of hash
73 * tables internal to the compiler */
74
75enum {
76        PAN_SYSVAL_VIEWPORT_SCALE = 1,
77        PAN_SYSVAL_VIEWPORT_OFFSET = 2,
78        PAN_SYSVAL_TEXTURE_SIZE = 3,
79        PAN_SYSVAL_SSBO = 4,
80        PAN_SYSVAL_NUM_WORK_GROUPS = 5,
81        PAN_SYSVAL_SAMPLER = 7,
82        PAN_SYSVAL_LOCAL_GROUP_SIZE = 8,
83        PAN_SYSVAL_WORK_DIM = 9,
84        PAN_SYSVAL_IMAGE_SIZE = 10,
85        PAN_SYSVAL_SAMPLE_POSITIONS = 11,
86        PAN_SYSVAL_MULTISAMPLED = 12,
87        PAN_SYSVAL_RT_CONVERSION = 13,
88        PAN_SYSVAL_VERTEX_INSTANCE_OFFSETS = 14,
89        PAN_SYSVAL_DRAWID = 15,
90        PAN_SYSVAL_BLEND_CONSTANTS = 16,
91};
92
93#define PAN_TXS_SYSVAL_ID(texidx, dim, is_array)          \
94	((texidx) | ((dim) << 7) | ((is_array) ? (1 << 9) : 0))
95
96#define PAN_SYSVAL_ID_TO_TXS_TEX_IDX(id)        ((id) & 0x7f)
97#define PAN_SYSVAL_ID_TO_TXS_DIM(id)            (((id) >> 7) & 0x3)
98#define PAN_SYSVAL_ID_TO_TXS_IS_ARRAY(id)       !!((id) & (1 << 9))
99
100/* Special attribute slots for vertex builtins. Sort of arbitrary but let's be
101 * consistent with the blob so we can compare traces easier. */
102
103enum {
104        PAN_VERTEX_ID   = 16,
105        PAN_INSTANCE_ID = 17,
106        PAN_MAX_ATTRIBUTE
107};
108
109struct panfrost_sysvals {
110        /* The mapping of sysvals to uniforms, the count, and the off-by-one inverse */
111        unsigned sysvals[MAX_SYSVAL_COUNT];
112        unsigned sysval_count;
113};
114
115/* Technically Midgard could go up to 92 in a pathological case but we don't
116 * take advantage of that. Likewise Bifrost's FAU encoding can address 128
117 * words but actual implementations (G72, G76) are capped at 64 */
118
119#define PAN_MAX_PUSH 64
120
121/* Architectural invariants (Midgard and Bifrost): UBO must be <= 2^16 bytes so
122 * an offset to a word must be < 2^16. There are less than 2^8 UBOs */
123
124struct panfrost_ubo_word {
125        uint16_t ubo;
126        uint16_t offset;
127};
128
129struct panfrost_ubo_push {
130        unsigned count;
131        struct panfrost_ubo_word words[PAN_MAX_PUSH];
132};
133
134/* Helper for searching the above. Note this is O(N) to the number of pushed
135 * constants, do not run in the draw call hot path */
136
137unsigned
138pan_lookup_pushed_ubo(struct panfrost_ubo_push *push, unsigned ubo, unsigned offs);
139
140struct hash_table_u64 *
141panfrost_init_sysvals(struct panfrost_sysvals *sysvals, void *memctx);
142
143unsigned
144pan_lookup_sysval(struct hash_table_u64 *sysval_to_id,
145                  struct panfrost_sysvals *sysvals,
146                  int sysval);
147
148int
149panfrost_sysval_for_instr(nir_instr *instr, nir_dest *dest);
150
151struct panfrost_compile_inputs {
152        unsigned gpu_id;
153        bool is_blend, is_blit;
154        struct {
155                unsigned rt;
156                unsigned nr_samples;
157                uint64_t bifrost_blend_desc;
158        } blend;
159        unsigned sysval_ubo;
160        bool shaderdb;
161        bool no_ubo_to_push;
162
163        enum pipe_format rt_formats[8];
164        uint8_t raw_fmt_mask;
165        unsigned nr_cbufs;
166
167        union {
168                struct {
169                        bool static_rt_conv;
170                        uint32_t rt_conv[8];
171                } bifrost;
172        };
173};
174
175struct pan_shader_varying {
176        gl_varying_slot location;
177        enum pipe_format format;
178};
179
180struct bifrost_shader_blend_info {
181        nir_alu_type type;
182        uint32_t return_offset;
183
184        /* mali_bifrost_register_file_format corresponding to nir_alu_type */
185        unsigned format;
186};
187
188struct bifrost_shader_info {
189        struct bifrost_shader_blend_info blend[8];
190        nir_alu_type blend_src1_type;
191        bool wait_6, wait_7;
192
193        /* Packed, preloaded message descriptors */
194        uint16_t messages[2];
195};
196
197struct midgard_shader_info {
198        unsigned first_tag;
199};
200
201struct pan_shader_info {
202        gl_shader_stage stage;
203        unsigned work_reg_count;
204        unsigned tls_size;
205        unsigned wls_size;
206
207        union {
208                struct {
209                        bool reads_frag_coord;
210                        bool reads_point_coord;
211                        bool reads_face;
212                        bool helper_invocations;
213                        bool can_discard;
214                        bool writes_depth;
215                        bool writes_stencil;
216                        bool writes_coverage;
217                        bool sidefx;
218                        bool reads_sample_id;
219                        bool reads_sample_pos;
220                        bool reads_sample_mask_in;
221                        bool reads_helper_invocation;
222                        bool sample_shading;
223                        bool early_fragment_tests;
224                        bool can_early_z, can_fpk;
225                        BITSET_WORD outputs_read;
226                        BITSET_WORD outputs_written;
227                } fs;
228
229                struct {
230                        bool writes_point_size;
231                } vs;
232        };
233
234        bool separable;
235        bool contains_barrier;
236        bool writes_global;
237        uint64_t outputs_written;
238
239        unsigned sampler_count;
240        unsigned texture_count;
241        unsigned ubo_count;
242        unsigned attribute_count;
243
244        struct {
245                unsigned input_count;
246                struct pan_shader_varying input[PAN_MAX_VARYINGS];
247                unsigned output_count;
248                struct pan_shader_varying output[PAN_MAX_VARYINGS];
249        } varyings;
250
251        struct panfrost_sysvals sysvals;
252
253        /* UBOs to push to Register Mapped Uniforms (Midgard) or Fast Access
254         * Uniforms (Bifrost) */
255        struct panfrost_ubo_push push;
256
257        uint32_t ubo_mask;
258
259        union {
260                struct bifrost_shader_info bifrost;
261                struct midgard_shader_info midgard;
262        };
263};
264
265typedef struct pan_block {
266        /* Link to next block. Must be first for mir_get_block */
267        struct list_head link;
268
269        /* List of instructions emitted for the current block */
270        struct list_head instructions;
271
272        /* Index of the block in source order */
273        unsigned name;
274
275        /* Control flow graph */
276        struct pan_block *successors[2];
277        struct set *predecessors;
278        bool unconditional_jumps;
279
280        /* In liveness analysis, these are live masks (per-component) for
281         * indices for the block. Scalar compilers have the luxury of using
282         * simple bit fields, but for us, liveness is a vector idea. */
283        uint16_t *live_in;
284        uint16_t *live_out;
285} pan_block;
286
287struct pan_instruction {
288        struct list_head link;
289};
290
291#define pan_foreach_instr_in_block_rev(block, v) \
292        list_for_each_entry_rev(struct pan_instruction, v, &block->instructions, link)
293
294#define pan_foreach_successor(blk, v) \
295        pan_block *v; \
296        pan_block **_v; \
297        for (_v = (pan_block **) &blk->successors[0], \
298                v = *_v; \
299                v != NULL && _v < (pan_block **) &blk->successors[2]; \
300                _v++, v = *_v) \
301
302#define pan_foreach_predecessor(blk, v) \
303        struct set_entry *_entry_##v; \
304        struct pan_block *v; \
305        for (_entry_##v = _mesa_set_next_entry(blk->predecessors, NULL), \
306                v = (struct pan_block *) (_entry_##v ? _entry_##v->key : NULL);  \
307                _entry_##v != NULL; \
308                _entry_##v = _mesa_set_next_entry(blk->predecessors, _entry_##v), \
309                v = (struct pan_block *) (_entry_##v ? _entry_##v->key : NULL))
310
311static inline pan_block *
312pan_exit_block(struct list_head *blocks)
313{
314        pan_block *last = list_last_entry(blocks, pan_block, link);
315        assert(!last->successors[0] && !last->successors[1]);
316        return last;
317}
318
319typedef void (*pan_liveness_update)(uint16_t *, void *, unsigned max);
320
321void pan_liveness_gen(uint16_t *live, unsigned node, unsigned max, uint16_t mask);
322void pan_liveness_kill(uint16_t *live, unsigned node, unsigned max, uint16_t mask);
323bool pan_liveness_get(uint16_t *live, unsigned node, uint16_t max);
324
325void pan_compute_liveness(struct list_head *blocks,
326                unsigned temp_count,
327                pan_liveness_update callback);
328
329void pan_free_liveness(struct list_head *blocks);
330
331uint16_t
332pan_to_bytemask(unsigned bytes, unsigned mask);
333
334void pan_block_add_successor(pan_block *block, pan_block *successor);
335
336/* IR indexing */
337#define PAN_IS_REG (1)
338
339static inline unsigned
340pan_ssa_index(nir_ssa_def *ssa)
341{
342        /* Off-by-one ensures BIR_NO_ARG is skipped */
343        return ((ssa->index + 1) << 1) | 0;
344}
345
346static inline unsigned
347pan_src_index(nir_src *src)
348{
349        if (src->is_ssa)
350                return pan_ssa_index(src->ssa);
351        else {
352                assert(!src->reg.indirect);
353                return (src->reg.reg->index << 1) | PAN_IS_REG;
354        }
355}
356
357static inline unsigned
358pan_dest_index(nir_dest *dst)
359{
360        if (dst->is_ssa)
361                return pan_ssa_index(&dst->ssa);
362        else {
363                assert(!dst->reg.indirect);
364                return (dst->reg.reg->index << 1) | PAN_IS_REG;
365        }
366}
367
368/* IR printing helpers */
369void pan_print_alu_type(nir_alu_type t, FILE *fp);
370
371/* Until it can be upstreamed.. */
372bool pan_has_source_mod(nir_alu_src *src, nir_op op);
373bool pan_has_dest_mod(nir_dest **dest, nir_op op);
374
375/* NIR passes to do some backend-specific lowering */
376
377#define PAN_WRITEOUT_C 1
378#define PAN_WRITEOUT_Z 2
379#define PAN_WRITEOUT_S 4
380
381bool pan_nir_reorder_writeout(nir_shader *nir);
382bool pan_nir_lower_zs_store(nir_shader *nir);
383
384bool pan_nir_lower_64bit_intrin(nir_shader *shader);
385
386bool pan_lower_helper_invocation(nir_shader *shader);
387bool pan_lower_sample_pos(nir_shader *shader);
388
389#endif
390