1/* 2 * Copyright © 2012 Intel Corporation 3 * Copyright © 2016 Broadcom 4 * 5 * Permission is hereby granted, free of charge, to any person obtaining a 6 * copy of this software and associated documentation files (the "Software"), 7 * to deal in the Software without restriction, including without limitation 8 * the rights to use, copy, modify, merge, publish, distribute, sublicense, 9 * and/or sell copies of the Software, and to permit persons to whom the 10 * Software is furnished to do so, subject to the following conditions: 11 * 12 * The above copyright notice and this permission notice (including the next 13 * paragraph) shall be included in all copies or substantial portions of the 14 * Software. 15 * 16 * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR 17 * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, 18 * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL 19 * THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER 20 * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING 21 * FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS 22 * IN THE SOFTWARE. 23 */ 24 25#define MAX_INSTRUCTION (1 << 30) 26 27#include "util/ralloc.h" 28#include "util/register_allocate.h" 29#include "vc4_context.h" 30#include "vc4_qir.h" 31 32struct partial_update_state { 33 struct qinst *insts[4]; 34 uint8_t channels; 35}; 36 37static int 38qir_reg_to_var(struct qreg reg) 39{ 40 if (reg.file == QFILE_TEMP) 41 return reg.index; 42 43 return -1; 44} 45 46static void 47qir_setup_use(struct vc4_compile *c, struct qblock *block, int ip, 48 struct qreg src) 49{ 50 int var = qir_reg_to_var(src); 51 if (var == -1) 52 return; 53 54 c->temp_start[var] = MIN2(c->temp_start[var], ip); 55 c->temp_end[var] = MAX2(c->temp_end[var], ip); 56 57 /* The use[] bitset marks when the block makes 58 * use of a variable without having completely 59 * defined that variable within the block. 60 */ 61 if (!BITSET_TEST(block->def, var)) 62 BITSET_SET(block->use, var); 63} 64 65static struct partial_update_state * 66get_partial_update_state(struct hash_table *partial_update_ht, 67 struct qinst *inst) 68{ 69 struct hash_entry *entry = 70 _mesa_hash_table_search(partial_update_ht, 71 &inst->dst.index); 72 if (entry) 73 return entry->data; 74 75 struct partial_update_state *state = 76 rzalloc(partial_update_ht, struct partial_update_state); 77 78 _mesa_hash_table_insert(partial_update_ht, &inst->dst.index, state); 79 80 return state; 81} 82 83static void 84qir_setup_def(struct vc4_compile *c, struct qblock *block, int ip, 85 struct hash_table *partial_update_ht, struct qinst *inst) 86{ 87 /* The def[] bitset marks when an initialization in a 88 * block completely screens off previous updates of 89 * that variable. 90 */ 91 int var = qir_reg_to_var(inst->dst); 92 if (var == -1) 93 return; 94 95 c->temp_start[var] = MIN2(c->temp_start[var], ip); 96 c->temp_end[var] = MAX2(c->temp_end[var], ip); 97 98 /* If we've already tracked this as a def, or already used it within 99 * the block, there's nothing to do. 100 */ 101 if (BITSET_TEST(block->use, var) || BITSET_TEST(block->def, var)) 102 return; 103 104 /* Easy, common case: unconditional full register update. 105 * 106 * We treat conditioning on the exec mask as the same as not being 107 * conditional. This makes sure that if the register gets set on 108 * either side of an if, it is treated as being screened off before 109 * the if. Otherwise, if there was no intervening def, its live 110 * interval doesn't extend back to the start of he program, and if too 111 * many registers did that we'd fail to register allocate. 112 */ 113 if ((inst->cond == QPU_COND_ALWAYS || 114 inst->cond_is_exec_mask) && !inst->dst.pack) { 115 BITSET_SET(block->def, var); 116 return; 117 } 118 119 /* Finally, look at the condition code and packing and mark it as a 120 * def. We need to make sure that we understand sequences 121 * instructions like: 122 * 123 * mov.zs t0, t1 124 * mov.zc t0, t2 125 * 126 * or: 127 * 128 * mmov t0.8a, t1 129 * mmov t0.8b, t2 130 * mmov t0.8c, t3 131 * mmov t0.8d, t4 132 * 133 * as defining the temp within the block, because otherwise dst's live 134 * range will get extended up the control flow to the top of the 135 * program. 136 */ 137 struct partial_update_state *state = 138 get_partial_update_state(partial_update_ht, inst); 139 uint8_t mask = qir_channels_written(inst); 140 141 if (inst->cond == QPU_COND_ALWAYS) { 142 state->channels |= mask; 143 } else { 144 for (int i = 0; i < 4; i++) { 145 if (!(mask & (1 << i))) 146 continue; 147 148 if (state->insts[i] && 149 state->insts[i]->cond == 150 qpu_cond_complement(inst->cond)) 151 state->channels |= 1 << i; 152 else 153 state->insts[i] = inst; 154 } 155 } 156 157 if (state->channels == 0xf) 158 BITSET_SET(block->def, var); 159} 160 161static void 162sf_state_clear(struct hash_table *partial_update_ht) 163{ 164 hash_table_foreach(partial_update_ht, entry) { 165 struct partial_update_state *state = entry->data; 166 167 for (int i = 0; i < 4; i++) { 168 if (state->insts[i] && state->insts[i]->cond) 169 state->insts[i] = NULL; 170 } 171 } 172} 173 174/* Sets up the def/use arrays for when variables are used-before-defined or 175 * defined-before-used in the block. 176 * 177 * Also initializes the temp_start/temp_end to cover just the instruction IPs 178 * where the variable is used, which will be extended later in 179 * qir_compute_start_end(). 180 */ 181static void 182qir_setup_def_use(struct vc4_compile *c) 183{ 184 struct hash_table *partial_update_ht = 185 _mesa_hash_table_create(c, _mesa_hash_int, _mesa_key_int_equal); 186 int ip = 0; 187 188 qir_for_each_block(block, c) { 189 block->start_ip = ip; 190 191 _mesa_hash_table_clear(partial_update_ht, NULL); 192 193 qir_for_each_inst(inst, block) { 194 for (int i = 0; i < qir_get_nsrc(inst); i++) 195 qir_setup_use(c, block, ip, inst->src[i]); 196 197 qir_setup_def(c, block, ip, partial_update_ht, inst); 198 199 if (inst->sf) 200 sf_state_clear(partial_update_ht); 201 202 switch (inst->op) { 203 case QOP_FRAG_Z: 204 case QOP_FRAG_W: 205 /* The payload registers have values 206 * implicitly loaded at the start of the 207 * program. 208 */ 209 if (inst->dst.file == QFILE_TEMP) 210 c->temp_start[inst->dst.index] = 0; 211 break; 212 default: 213 break; 214 } 215 ip++; 216 } 217 block->end_ip = ip; 218 } 219 220 _mesa_hash_table_destroy(partial_update_ht, NULL); 221} 222 223static bool 224qir_live_variables_dataflow(struct vc4_compile *c, int bitset_words) 225{ 226 bool cont = false; 227 228 qir_for_each_block_rev(block, c) { 229 /* Update live_out: Any successor using the variable 230 * on entrance needs us to have the variable live on 231 * exit. 232 */ 233 qir_for_each_successor(succ, block) { 234 for (int i = 0; i < bitset_words; i++) { 235 BITSET_WORD new_live_out = (succ->live_in[i] & 236 ~block->live_out[i]); 237 if (new_live_out) { 238 block->live_out[i] |= new_live_out; 239 cont = true; 240 } 241 } 242 } 243 244 /* Update live_in */ 245 for (int i = 0; i < bitset_words; i++) { 246 BITSET_WORD new_live_in = (block->use[i] | 247 (block->live_out[i] & 248 ~block->def[i])); 249 if (new_live_in & ~block->live_in[i]) { 250 block->live_in[i] |= new_live_in; 251 cont = true; 252 } 253 } 254 } 255 256 return cont; 257} 258 259/** 260 * Extend the start/end ranges for each variable to account for the 261 * new information calculated from control flow. 262 */ 263static void 264qir_compute_start_end(struct vc4_compile *c, int num_vars) 265{ 266 qir_for_each_block(block, c) { 267 for (int i = 0; i < num_vars; i++) { 268 if (BITSET_TEST(block->live_in, i)) { 269 c->temp_start[i] = MIN2(c->temp_start[i], 270 block->start_ip); 271 c->temp_end[i] = MAX2(c->temp_end[i], 272 block->start_ip); 273 } 274 275 if (BITSET_TEST(block->live_out, i)) { 276 c->temp_start[i] = MIN2(c->temp_start[i], 277 block->end_ip); 278 c->temp_end[i] = MAX2(c->temp_end[i], 279 block->end_ip); 280 } 281 } 282 } 283} 284 285void 286qir_calculate_live_intervals(struct vc4_compile *c) 287{ 288 int bitset_words = BITSET_WORDS(c->num_temps); 289 290 /* If we called this function more than once, then we should be 291 * freeing the previous arrays. 292 */ 293 assert(!c->temp_start); 294 295 c->temp_start = rzalloc_array(c, int, c->num_temps); 296 c->temp_end = rzalloc_array(c, int, c->num_temps); 297 298 for (int i = 0; i < c->num_temps; i++) { 299 c->temp_start[i] = MAX_INSTRUCTION; 300 c->temp_end[i] = -1; 301 } 302 303 qir_for_each_block(block, c) { 304 block->def = rzalloc_array(c, BITSET_WORD, bitset_words); 305 block->use = rzalloc_array(c, BITSET_WORD, bitset_words); 306 block->live_in = rzalloc_array(c, BITSET_WORD, bitset_words); 307 block->live_out = rzalloc_array(c, BITSET_WORD, bitset_words); 308 } 309 310 qir_setup_def_use(c); 311 312 while (qir_live_variables_dataflow(c, bitset_words)) 313 ; 314 315 qir_compute_start_end(c, c->num_temps); 316 317 if (vc4_debug & VC4_DEBUG_SHADERDB) { 318 int last_ip = 0; 319 for (int i = 0; i < c->num_temps; i++) 320 last_ip = MAX2(last_ip, c->temp_end[i]); 321 322 int reg_pressure = 0; 323 int max_reg_pressure = 0; 324 for (int i = 0; i < last_ip; i++) { 325 for (int j = 0; j < c->num_temps; j++) { 326 if (c->temp_start[j] == i) 327 reg_pressure++; 328 if (c->temp_end[j] == i) 329 reg_pressure--; 330 } 331 max_reg_pressure = MAX2(max_reg_pressure, reg_pressure); 332 } 333 334 fprintf(stderr, "SHADER-DB: %s prog %d/%d: %d max temps\n", 335 qir_get_stage_name(c->stage), 336 c->program_id, c->variant_id, 337 max_reg_pressure); 338 } 339} 340