1/* 2 * Copyright (C) 2021 Valve Corporation 3 * 4 * Permission is hereby granted, free of charge, to any person obtaining a 5 * copy of this software and associated documentation files (the "Software"), 6 * to deal in the Software without restriction, including without limitation 7 * the rights to use, copy, modify, merge, publish, distribute, sublicense, 8 * and/or sell copies of the Software, and to permit persons to whom the 9 * Software is furnished to do so, subject to the following conditions: 10 * 11 * The above copyright notice and this permission notice (including the next 12 * paragraph) shall be included in all copies or substantial portions of the 13 * Software. 14 * 15 * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR 16 * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, 17 * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL 18 * THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER 19 * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, 20 * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE 21 * SOFTWARE. 22 */ 23 24#ifndef _IR3_RA_H 25#define _IR3_RA_H 26 27#include "util/rb_tree.h" 28#include "ir3.h" 29#include "ir3_compiler.h" 30 31#ifdef DEBUG 32#define RA_DEBUG (ir3_shader_debug & IR3_DBG_RAMSGS) 33#else 34#define RA_DEBUG 0 35#endif 36#define d(fmt, ...) \ 37 do { \ 38 if (RA_DEBUG) { \ 39 mesa_logi("RA: " fmt, ##__VA_ARGS__); \ 40 } \ 41 } while (0) 42 43#define di(instr, fmt, ...) \ 44 do { \ 45 if (RA_DEBUG) { \ 46 struct log_stream *stream = mesa_log_streami(); \ 47 mesa_log_stream_printf(stream, "RA: " fmt ": ", ##__VA_ARGS__); \ 48 ir3_print_instr_stream(stream, instr); \ 49 mesa_log_stream_destroy(stream); \ 50 } \ 51 } while (0) 52 53typedef uint16_t physreg_t; 54 55static inline unsigned 56ra_physreg_to_num(physreg_t physreg, unsigned flags) 57{ 58 if (!(flags & IR3_REG_HALF)) 59 physreg /= 2; 60 if (flags & IR3_REG_SHARED) 61 physreg += 48 * 4; 62 return physreg; 63} 64 65static inline physreg_t 66ra_num_to_physreg(unsigned num, unsigned flags) 67{ 68 if (flags & IR3_REG_SHARED) 69 num -= 48 * 4; 70 if (!(flags & IR3_REG_HALF)) 71 num *= 2; 72 return num; 73} 74 75static inline unsigned 76ra_reg_get_num(const struct ir3_register *reg) 77{ 78 return (reg->flags & IR3_REG_ARRAY) ? reg->array.base : reg->num; 79} 80 81static inline physreg_t 82ra_reg_get_physreg(const struct ir3_register *reg) 83{ 84 return ra_num_to_physreg(ra_reg_get_num(reg), reg->flags); 85} 86 87static inline bool 88def_is_gpr(const struct ir3_register *reg) 89{ 90 return reg_num(reg) != REG_A0 && reg_num(reg) != REG_P0; 91} 92 93/* Note: don't count undef as a source. 94 */ 95static inline bool 96ra_reg_is_src(const struct ir3_register *reg) 97{ 98 return (reg->flags & IR3_REG_SSA) && reg->def && def_is_gpr(reg->def); 99} 100 101static inline bool 102ra_reg_is_dst(const struct ir3_register *reg) 103{ 104 return (reg->flags & IR3_REG_SSA) && def_is_gpr(reg) && 105 ((reg->flags & IR3_REG_ARRAY) || reg->wrmask); 106} 107 108/* Iterators for sources and destinations which: 109 * - Don't include fake sources (irrelevant for RA) 110 * - Don't include non-SSA sources (immediates and constants, also irrelevant) 111 */ 112 113#define ra_foreach_src_n(__srcreg, __n, __instr) \ 114 foreach_src_n(__srcreg, __n, __instr) \ 115 if (ra_reg_is_src(__srcreg)) 116 117#define ra_foreach_src(__srcreg, __instr) \ 118 ra_foreach_src_n(__srcreg, __i, __instr) 119 120#define ra_foreach_src_rev(__srcreg, __instr) \ 121 for (struct ir3_register *__srcreg = (void *)~0; __srcreg; __srcreg = NULL) \ 122 for (int __cnt = (__instr)->srcs_count, __i = __cnt - 1; __i >= 0; \ 123 __i--) \ 124 if (ra_reg_is_src((__srcreg = (__instr)->srcs[__i]))) 125 126#define ra_foreach_dst_n(__dstreg, __n, __instr) \ 127 foreach_dst_n(__dstreg, __n, instr) \ 128 if (ra_reg_is_dst(__dstreg)) 129 130#define ra_foreach_dst(__dstreg, __instr) \ 131 ra_foreach_dst_n(__dstreg, __i, __instr) 132 133#define RA_HALF_SIZE (4 * 48) 134#define RA_FULL_SIZE (4 * 48 * 2) 135#define RA_SHARED_SIZE (2 * 4 * 8) 136#define RA_MAX_FILE_SIZE RA_FULL_SIZE 137 138struct ir3_liveness { 139 unsigned block_count; 140 unsigned interval_offset; 141 DECLARE_ARRAY(struct ir3_register *, definitions); 142 DECLARE_ARRAY(BITSET_WORD *, live_out); 143 DECLARE_ARRAY(BITSET_WORD *, live_in); 144}; 145 146struct ir3_liveness *ir3_calc_liveness(void *mem_ctx, struct ir3 *ir); 147 148bool ir3_def_live_after(struct ir3_liveness *live, struct ir3_register *def, 149 struct ir3_instruction *instr); 150 151void ir3_create_parallel_copies(struct ir3 *ir); 152 153void ir3_merge_regs(struct ir3_liveness *live, struct ir3 *ir); 154 155void ir3_force_merge(struct ir3_register *a, struct ir3_register *b, 156 int b_offset); 157 158struct ir3_pressure { 159 unsigned full, half, shared; 160}; 161 162void ir3_calc_pressure(struct ir3_shader_variant *v, struct ir3_liveness *live, 163 struct ir3_pressure *max_pressure); 164 165bool ir3_spill(struct ir3 *ir, struct ir3_shader_variant *v, 166 struct ir3_liveness **live, 167 const struct ir3_pressure *limit_pressure); 168 169bool ir3_lower_spill(struct ir3 *ir); 170 171void ir3_ra_validate(struct ir3_shader_variant *v, unsigned full_size, 172 unsigned half_size, unsigned block_count); 173 174void ir3_lower_copies(struct ir3_shader_variant *v); 175 176/* Register interval datastructure 177 * 178 * ir3_reg_ctx is used to track which registers are live. The tricky part is 179 * that some registers may overlap each other, when registers with overlapping 180 * live ranges get coalesced. For example, splits will overlap with their 181 * parent vector and sometimes collect sources will also overlap with the 182 * collect'ed vector. ir3_merge_regs guarantees for us that none of the 183 * registers in a merge set that are live at any given point partially 184 * overlap, which means that we can organize them into a forest. While each 185 * register has a per-merge-set offset, ir3_merge_regs also computes a 186 * "global" offset which allows us to throw away the original merge sets and 187 * think of registers as just intervals in a forest of live intervals. When a 188 * register becomes live, we insert it into the forest, and when it dies we 189 * remove it from the forest (and then its children get moved up a level). We 190 * use red-black trees to keep track of each level of the forest, so insertion 191 * and deletion should be fast operations. ir3_reg_ctx handles all the 192 * internal bookkeeping for this, so that it can be shared between RA, 193 * spilling, and register pressure tracking. 194 */ 195 196struct ir3_reg_interval { 197 struct rb_node node; 198 199 struct rb_tree children; 200 201 struct ir3_reg_interval *parent; 202 203 struct ir3_register *reg; 204 205 bool inserted; 206}; 207 208struct ir3_reg_ctx { 209 /* The tree of top-level intervals in the forest. */ 210 struct rb_tree intervals; 211 212 /* Users of ir3_reg_ctx need to keep around additional state that is 213 * modified when top-level intervals are added or removed. For register 214 * pressure tracking, this is just the register pressure, but for RA we 215 * need to keep track of the physreg of each top-level interval. These 216 * callbacks provide a place to let users deriving from ir3_reg_ctx update 217 * their state when top-level intervals are inserted/removed. 218 */ 219 220 /* Called when an interval is added and it turns out to be at the top 221 * level. 222 */ 223 void (*interval_add)(struct ir3_reg_ctx *ctx, 224 struct ir3_reg_interval *interval); 225 226 /* Called when an interval is deleted from the top level. */ 227 void (*interval_delete)(struct ir3_reg_ctx *ctx, 228 struct ir3_reg_interval *interval); 229 230 /* Called when an interval is deleted and its child becomes top-level. 231 */ 232 void (*interval_readd)(struct ir3_reg_ctx *ctx, 233 struct ir3_reg_interval *parent, 234 struct ir3_reg_interval *child); 235}; 236 237static inline struct ir3_reg_interval * 238ir3_rb_node_to_interval(struct rb_node *node) 239{ 240 return rb_node_data(struct ir3_reg_interval, node, node); 241} 242 243static inline const struct ir3_reg_interval * 244ir3_rb_node_to_interval_const(const struct rb_node *node) 245{ 246 return rb_node_data(struct ir3_reg_interval, node, node); 247} 248 249static inline struct ir3_reg_interval * 250ir3_reg_interval_next(struct ir3_reg_interval *interval) 251{ 252 struct rb_node *next = rb_node_next(&interval->node); 253 return next ? ir3_rb_node_to_interval(next) : NULL; 254} 255 256static inline struct ir3_reg_interval * 257ir3_reg_interval_next_or_null(struct ir3_reg_interval *interval) 258{ 259 return interval ? ir3_reg_interval_next(interval) : NULL; 260} 261 262static inline void 263ir3_reg_interval_init(struct ir3_reg_interval *interval, 264 struct ir3_register *reg) 265{ 266 rb_tree_init(&interval->children); 267 interval->reg = reg; 268 interval->parent = NULL; 269 interval->inserted = false; 270} 271 272void ir3_reg_interval_dump(struct log_stream *stream, 273 struct ir3_reg_interval *interval); 274 275void ir3_reg_interval_insert(struct ir3_reg_ctx *ctx, 276 struct ir3_reg_interval *interval); 277 278void ir3_reg_interval_remove(struct ir3_reg_ctx *ctx, 279 struct ir3_reg_interval *interval); 280 281void ir3_reg_interval_remove_all(struct ir3_reg_ctx *ctx, 282 struct ir3_reg_interval *interval); 283 284#endif 285