1/*
2 * Copyright (C) 2021 Valve Corporation
3 *
4 * Permission is hereby granted, free of charge, to any person obtaining a
5 * copy of this software and associated documentation files (the "Software"),
6 * to deal in the Software without restriction, including without limitation
7 * the rights to use, copy, modify, merge, publish, distribute, sublicense,
8 * and/or sell copies of the Software, and to permit persons to whom the
9 * Software is furnished to do so, subject to the following conditions:
10 *
11 * The above copyright notice and this permission notice (including the next
12 * paragraph) shall be included in all copies or substantial portions of the
13 * Software.
14 *
15 * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
16 * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
17 * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT.  IN NO EVENT SHALL
18 * THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
19 * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
20 * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
21 * SOFTWARE.
22 */
23
24#ifndef _IR3_RA_H
25#define _IR3_RA_H
26
27#include "util/rb_tree.h"
28#include "ir3.h"
29#include "ir3_compiler.h"
30
31#ifdef DEBUG
32#define RA_DEBUG (ir3_shader_debug & IR3_DBG_RAMSGS)
33#else
34#define RA_DEBUG 0
35#endif
36#define d(fmt, ...)                                                            \
37   do {                                                                        \
38      if (RA_DEBUG) {                                                          \
39         mesa_logi("RA: " fmt, ##__VA_ARGS__);                                 \
40      }                                                                        \
41   } while (0)
42
43#define di(instr, fmt, ...)                                                    \
44   do {                                                                        \
45      if (RA_DEBUG) {                                                          \
46         struct log_stream *stream = mesa_log_streami();                       \
47         mesa_log_stream_printf(stream, "RA: " fmt ": ", ##__VA_ARGS__);       \
48         ir3_print_instr_stream(stream, instr);                                \
49         mesa_log_stream_destroy(stream);                                      \
50      }                                                                        \
51   } while (0)
52
53typedef uint16_t physreg_t;
54
55static inline unsigned
56ra_physreg_to_num(physreg_t physreg, unsigned flags)
57{
58   if (!(flags & IR3_REG_HALF))
59      physreg /= 2;
60   if (flags & IR3_REG_SHARED)
61      physreg += 48 * 4;
62   return physreg;
63}
64
65static inline physreg_t
66ra_num_to_physreg(unsigned num, unsigned flags)
67{
68   if (flags & IR3_REG_SHARED)
69      num -= 48 * 4;
70   if (!(flags & IR3_REG_HALF))
71      num *= 2;
72   return num;
73}
74
75static inline unsigned
76ra_reg_get_num(const struct ir3_register *reg)
77{
78   return (reg->flags & IR3_REG_ARRAY) ? reg->array.base : reg->num;
79}
80
81static inline physreg_t
82ra_reg_get_physreg(const struct ir3_register *reg)
83{
84   return ra_num_to_physreg(ra_reg_get_num(reg), reg->flags);
85}
86
87static inline bool
88def_is_gpr(const struct ir3_register *reg)
89{
90   return reg_num(reg) != REG_A0 && reg_num(reg) != REG_P0;
91}
92
93/* Note: don't count undef as a source.
94 */
95static inline bool
96ra_reg_is_src(const struct ir3_register *reg)
97{
98   return (reg->flags & IR3_REG_SSA) && reg->def && def_is_gpr(reg->def);
99}
100
101static inline bool
102ra_reg_is_dst(const struct ir3_register *reg)
103{
104   return (reg->flags & IR3_REG_SSA) && def_is_gpr(reg) &&
105          ((reg->flags & IR3_REG_ARRAY) || reg->wrmask);
106}
107
108/* Iterators for sources and destinations which:
109 * - Don't include fake sources (irrelevant for RA)
110 * - Don't include non-SSA sources (immediates and constants, also irrelevant)
111 */
112
113#define ra_foreach_src_n(__srcreg, __n, __instr)                               \
114   foreach_src_n(__srcreg, __n, __instr)                                       \
115      if (ra_reg_is_src(__srcreg))
116
117#define ra_foreach_src(__srcreg, __instr)                                      \
118   ra_foreach_src_n(__srcreg, __i, __instr)
119
120#define ra_foreach_src_rev(__srcreg, __instr)                                  \
121   for (struct ir3_register *__srcreg = (void *)~0; __srcreg; __srcreg = NULL) \
122      for (int __cnt = (__instr)->srcs_count, __i = __cnt - 1; __i >= 0;       \
123           __i--)                                                              \
124         if (ra_reg_is_src((__srcreg = (__instr)->srcs[__i])))
125
126#define ra_foreach_dst_n(__dstreg, __n, __instr)                               \
127   foreach_dst_n(__dstreg, __n, instr)                                         \
128      if (ra_reg_is_dst(__dstreg))
129
130#define ra_foreach_dst(__dstreg, __instr)                                      \
131   ra_foreach_dst_n(__dstreg, __i, __instr)
132
133#define RA_HALF_SIZE     (4 * 48)
134#define RA_FULL_SIZE     (4 * 48 * 2)
135#define RA_SHARED_SIZE   (2 * 4 * 8)
136#define RA_MAX_FILE_SIZE RA_FULL_SIZE
137
138struct ir3_liveness {
139   unsigned block_count;
140   unsigned interval_offset;
141   DECLARE_ARRAY(struct ir3_register *, definitions);
142   DECLARE_ARRAY(BITSET_WORD *, live_out);
143   DECLARE_ARRAY(BITSET_WORD *, live_in);
144};
145
146struct ir3_liveness *ir3_calc_liveness(void *mem_ctx, struct ir3 *ir);
147
148bool ir3_def_live_after(struct ir3_liveness *live, struct ir3_register *def,
149                        struct ir3_instruction *instr);
150
151void ir3_create_parallel_copies(struct ir3 *ir);
152
153void ir3_merge_regs(struct ir3_liveness *live, struct ir3 *ir);
154
155void ir3_force_merge(struct ir3_register *a, struct ir3_register *b,
156                     int b_offset);
157
158struct ir3_pressure {
159   unsigned full, half, shared;
160};
161
162void ir3_calc_pressure(struct ir3_shader_variant *v, struct ir3_liveness *live,
163                       struct ir3_pressure *max_pressure);
164
165bool ir3_spill(struct ir3 *ir, struct ir3_shader_variant *v,
166               struct ir3_liveness **live,
167               const struct ir3_pressure *limit_pressure);
168
169bool ir3_lower_spill(struct ir3 *ir);
170
171void ir3_ra_validate(struct ir3_shader_variant *v, unsigned full_size,
172                     unsigned half_size, unsigned block_count);
173
174void ir3_lower_copies(struct ir3_shader_variant *v);
175
176/* Register interval datastructure
177 *
178 * ir3_reg_ctx is used to track which registers are live. The tricky part is
179 * that some registers may overlap each other, when registers with overlapping
180 * live ranges get coalesced. For example, splits will overlap with their
181 * parent vector and sometimes collect sources will also overlap with the
182 * collect'ed vector. ir3_merge_regs guarantees for us that none of the
183 * registers in a merge set that are live at any given point partially
184 * overlap, which means that we can organize them into a forest. While each
185 * register has a per-merge-set offset, ir3_merge_regs also computes a
186 * "global" offset which allows us to throw away the original merge sets and
187 * think of registers as just intervals in a forest of live intervals. When a
188 * register becomes live, we insert it into the forest, and when it dies we
189 * remove it from the forest (and then its children get moved up a level). We
190 * use red-black trees to keep track of each level of the forest, so insertion
191 * and deletion should be fast operations. ir3_reg_ctx handles all the
192 * internal bookkeeping for this, so that it can be shared between RA,
193 * spilling, and register pressure tracking.
194 */
195
196struct ir3_reg_interval {
197   struct rb_node node;
198
199   struct rb_tree children;
200
201   struct ir3_reg_interval *parent;
202
203   struct ir3_register *reg;
204
205   bool inserted;
206};
207
208struct ir3_reg_ctx {
209   /* The tree of top-level intervals in the forest. */
210   struct rb_tree intervals;
211
212   /* Users of ir3_reg_ctx need to keep around additional state that is
213    * modified when top-level intervals are added or removed. For register
214    * pressure tracking, this is just the register pressure, but for RA we
215    * need to keep track of the physreg of each top-level interval. These
216    * callbacks provide a place to let users deriving from ir3_reg_ctx update
217    * their state when top-level intervals are inserted/removed.
218    */
219
220   /* Called when an interval is added and it turns out to be at the top
221    * level.
222    */
223   void (*interval_add)(struct ir3_reg_ctx *ctx,
224                        struct ir3_reg_interval *interval);
225
226   /* Called when an interval is deleted from the top level. */
227   void (*interval_delete)(struct ir3_reg_ctx *ctx,
228                           struct ir3_reg_interval *interval);
229
230   /* Called when an interval is deleted and its child becomes top-level.
231    */
232   void (*interval_readd)(struct ir3_reg_ctx *ctx,
233                          struct ir3_reg_interval *parent,
234                          struct ir3_reg_interval *child);
235};
236
237static inline struct ir3_reg_interval *
238ir3_rb_node_to_interval(struct rb_node *node)
239{
240   return rb_node_data(struct ir3_reg_interval, node, node);
241}
242
243static inline const struct ir3_reg_interval *
244ir3_rb_node_to_interval_const(const struct rb_node *node)
245{
246   return rb_node_data(struct ir3_reg_interval, node, node);
247}
248
249static inline struct ir3_reg_interval *
250ir3_reg_interval_next(struct ir3_reg_interval *interval)
251{
252   struct rb_node *next = rb_node_next(&interval->node);
253   return next ? ir3_rb_node_to_interval(next) : NULL;
254}
255
256static inline struct ir3_reg_interval *
257ir3_reg_interval_next_or_null(struct ir3_reg_interval *interval)
258{
259   return interval ? ir3_reg_interval_next(interval) : NULL;
260}
261
262static inline void
263ir3_reg_interval_init(struct ir3_reg_interval *interval,
264                      struct ir3_register *reg)
265{
266   rb_tree_init(&interval->children);
267   interval->reg = reg;
268   interval->parent = NULL;
269   interval->inserted = false;
270}
271
272void ir3_reg_interval_dump(struct log_stream *stream,
273                           struct ir3_reg_interval *interval);
274
275void ir3_reg_interval_insert(struct ir3_reg_ctx *ctx,
276                             struct ir3_reg_interval *interval);
277
278void ir3_reg_interval_remove(struct ir3_reg_ctx *ctx,
279                             struct ir3_reg_interval *interval);
280
281void ir3_reg_interval_remove_all(struct ir3_reg_ctx *ctx,
282                                 struct ir3_reg_interval *interval);
283
284#endif
285