1/* 2 * Copyright © 2012 Intel Corporation 3 * Copyright © 2016 Broadcom 4 * 5 * Permission is hereby granted, free of charge, to any person obtaining a 6 * copy of this software and associated documentation files (the "Software"), 7 * to deal in the Software without restriction, including without limitation 8 * the rights to use, copy, modify, merge, publish, distribute, sublicense, 9 * and/or sell copies of the Software, and to permit persons to whom the 10 * Software is furnished to do so, subject to the following conditions: 11 * 12 * The above copyright notice and this permission notice (including the next 13 * paragraph) shall be included in all copies or substantial portions of the 14 * Software. 15 * 16 * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR 17 * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, 18 * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL 19 * THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER 20 * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING 21 * FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS 22 * IN THE SOFTWARE. 23 */ 24 25#define MAX_INSTRUCTION (1 << 30) 26 27#include "util/ralloc.h" 28#include "util/register_allocate.h" 29#include "vc4_context.h" 30#include "vc4_qir.h" 31 32struct partial_update_state { 33 struct qinst *insts[4]; 34 uint8_t channels; 35}; 36 37static uint32_t 38int_hash(const void *key) 39{ 40 return _mesa_hash_data(key, sizeof(int)); 41} 42 43static bool 44int_compare(const void *key1, const void *key2) 45{ 46 return *(const int *)key1 == *(const int *)key2; 47} 48 49static int 50qir_reg_to_var(struct qreg reg) 51{ 52 if (reg.file == QFILE_TEMP) 53 return reg.index; 54 55 return -1; 56} 57 58static void 59qir_setup_use(struct vc4_compile *c, struct qblock *block, int ip, 60 struct qreg src) 61{ 62 int var = qir_reg_to_var(src); 63 if (var == -1) 64 return; 65 66 c->temp_start[var] = MIN2(c->temp_start[var], ip); 67 c->temp_end[var] = MAX2(c->temp_end[var], ip); 68 69 /* The use[] bitset marks when the block makes 70 * use of a variable without having completely 71 * defined that variable within the block. 72 */ 73 if (!BITSET_TEST(block->def, var)) 74 BITSET_SET(block->use, var); 75} 76 77static struct partial_update_state * 78get_partial_update_state(struct hash_table *partial_update_ht, 79 struct qinst *inst) 80{ 81 struct hash_entry *entry = 82 _mesa_hash_table_search(partial_update_ht, 83 &inst->dst.index); 84 if (entry) 85 return entry->data; 86 87 struct partial_update_state *state = 88 rzalloc(partial_update_ht, struct partial_update_state); 89 90 _mesa_hash_table_insert(partial_update_ht, &inst->dst.index, state); 91 92 return state; 93} 94 95static void 96qir_setup_def(struct vc4_compile *c, struct qblock *block, int ip, 97 struct hash_table *partial_update_ht, struct qinst *inst) 98{ 99 /* The def[] bitset marks when an initialization in a 100 * block completely screens off previous updates of 101 * that variable. 102 */ 103 int var = qir_reg_to_var(inst->dst); 104 if (var == -1) 105 return; 106 107 c->temp_start[var] = MIN2(c->temp_start[var], ip); 108 c->temp_end[var] = MAX2(c->temp_end[var], ip); 109 110 /* If we've already tracked this as a def, or already used it within 111 * the block, there's nothing to do. 112 */ 113 if (BITSET_TEST(block->use, var) || BITSET_TEST(block->def, var)) 114 return; 115 116 /* Easy, common case: unconditional full register update. 117 * 118 * We treat conditioning on the exec mask as the same as not being 119 * conditional. This makes sure that if the register gets set on 120 * either side of an if, it is treated as being screened off before 121 * the if. Otherwise, if there was no intervening def, its live 122 * interval doesn't extend back to the start of he program, and if too 123 * many registers did that we'd fail to register allocate. 124 */ 125 if ((inst->cond == QPU_COND_ALWAYS || 126 inst->cond_is_exec_mask) && !inst->dst.pack) { 127 BITSET_SET(block->def, var); 128 return; 129 } 130 131 /* Finally, look at the condition code and packing and mark it as a 132 * def. We need to make sure that we understand sequences 133 * instructions like: 134 * 135 * mov.zs t0, t1 136 * mov.zc t0, t2 137 * 138 * or: 139 * 140 * mmov t0.8a, t1 141 * mmov t0.8b, t2 142 * mmov t0.8c, t3 143 * mmov t0.8d, t4 144 * 145 * as defining the temp within the block, because otherwise dst's live 146 * range will get extended up the control flow to the top of the 147 * program. 148 */ 149 struct partial_update_state *state = 150 get_partial_update_state(partial_update_ht, inst); 151 uint8_t mask = qir_channels_written(inst); 152 153 if (inst->cond == QPU_COND_ALWAYS) { 154 state->channels |= mask; 155 } else { 156 for (int i = 0; i < 4; i++) { 157 if (!(mask & (1 << i))) 158 continue; 159 160 if (state->insts[i] && 161 state->insts[i]->cond == 162 qpu_cond_complement(inst->cond)) 163 state->channels |= 1 << i; 164 else 165 state->insts[i] = inst; 166 } 167 } 168 169 if (state->channels == 0xf) 170 BITSET_SET(block->def, var); 171} 172 173static void 174sf_state_clear(struct hash_table *partial_update_ht) 175{ 176 hash_table_foreach(partial_update_ht, entry) { 177 struct partial_update_state *state = entry->data; 178 179 for (int i = 0; i < 4; i++) { 180 if (state->insts[i] && state->insts[i]->cond) 181 state->insts[i] = NULL; 182 } 183 } 184} 185 186/* Sets up the def/use arrays for when variables are used-before-defined or 187 * defined-before-used in the block. 188 * 189 * Also initializes the temp_start/temp_end to cover just the instruction IPs 190 * where the variable is used, which will be extended later in 191 * qir_compute_start_end(). 192 */ 193static void 194qir_setup_def_use(struct vc4_compile *c) 195{ 196 struct hash_table *partial_update_ht = 197 _mesa_hash_table_create(c, int_hash, int_compare); 198 int ip = 0; 199 200 qir_for_each_block(block, c) { 201 block->start_ip = ip; 202 203 _mesa_hash_table_clear(partial_update_ht, NULL); 204 205 qir_for_each_inst(inst, block) { 206 for (int i = 0; i < qir_get_nsrc(inst); i++) 207 qir_setup_use(c, block, ip, inst->src[i]); 208 209 qir_setup_def(c, block, ip, partial_update_ht, inst); 210 211 if (inst->sf) 212 sf_state_clear(partial_update_ht); 213 214 switch (inst->op) { 215 case QOP_FRAG_Z: 216 case QOP_FRAG_W: 217 /* The payload registers have values 218 * implicitly loaded at the start of the 219 * program. 220 */ 221 if (inst->dst.file == QFILE_TEMP) 222 c->temp_start[inst->dst.index] = 0; 223 break; 224 default: 225 break; 226 } 227 ip++; 228 } 229 block->end_ip = ip; 230 } 231 232 _mesa_hash_table_destroy(partial_update_ht, NULL); 233} 234 235static bool 236qir_live_variables_dataflow(struct vc4_compile *c, int bitset_words) 237{ 238 bool cont = false; 239 240 qir_for_each_block_rev(block, c) { 241 /* Update live_out: Any successor using the variable 242 * on entrance needs us to have the variable live on 243 * exit. 244 */ 245 qir_for_each_successor(succ, block) { 246 for (int i = 0; i < bitset_words; i++) { 247 BITSET_WORD new_live_out = (succ->live_in[i] & 248 ~block->live_out[i]); 249 if (new_live_out) { 250 block->live_out[i] |= new_live_out; 251 cont = true; 252 } 253 } 254 } 255 256 /* Update live_in */ 257 for (int i = 0; i < bitset_words; i++) { 258 BITSET_WORD new_live_in = (block->use[i] | 259 (block->live_out[i] & 260 ~block->def[i])); 261 if (new_live_in & ~block->live_in[i]) { 262 block->live_in[i] |= new_live_in; 263 cont = true; 264 } 265 } 266 } 267 268 return cont; 269} 270 271/** 272 * Extend the start/end ranges for each variable to account for the 273 * new information calculated from control flow. 274 */ 275static void 276qir_compute_start_end(struct vc4_compile *c, int num_vars) 277{ 278 qir_for_each_block(block, c) { 279 for (int i = 0; i < num_vars; i++) { 280 if (BITSET_TEST(block->live_in, i)) { 281 c->temp_start[i] = MIN2(c->temp_start[i], 282 block->start_ip); 283 c->temp_end[i] = MAX2(c->temp_end[i], 284 block->start_ip); 285 } 286 287 if (BITSET_TEST(block->live_out, i)) { 288 c->temp_start[i] = MIN2(c->temp_start[i], 289 block->end_ip); 290 c->temp_end[i] = MAX2(c->temp_end[i], 291 block->end_ip); 292 } 293 } 294 } 295} 296 297void 298qir_calculate_live_intervals(struct vc4_compile *c) 299{ 300 int bitset_words = BITSET_WORDS(c->num_temps); 301 302 /* If we called this function more than once, then we should be 303 * freeing the previous arrays. 304 */ 305 assert(!c->temp_start); 306 307 c->temp_start = rzalloc_array(c, int, c->num_temps); 308 c->temp_end = rzalloc_array(c, int, c->num_temps); 309 310 for (int i = 0; i < c->num_temps; i++) { 311 c->temp_start[i] = MAX_INSTRUCTION; 312 c->temp_end[i] = -1; 313 } 314 315 qir_for_each_block(block, c) { 316 block->def = rzalloc_array(c, BITSET_WORD, bitset_words); 317 block->use = rzalloc_array(c, BITSET_WORD, bitset_words); 318 block->live_in = rzalloc_array(c, BITSET_WORD, bitset_words); 319 block->live_out = rzalloc_array(c, BITSET_WORD, bitset_words); 320 } 321 322 qir_setup_def_use(c); 323 324 while (qir_live_variables_dataflow(c, bitset_words)) 325 ; 326 327 qir_compute_start_end(c, c->num_temps); 328 329 if (vc4_debug & VC4_DEBUG_SHADERDB) { 330 int last_ip = 0; 331 for (int i = 0; i < c->num_temps; i++) 332 last_ip = MAX2(last_ip, c->temp_end[i]); 333 334 int reg_pressure = 0; 335 int max_reg_pressure = 0; 336 for (int i = 0; i < last_ip; i++) { 337 for (int j = 0; j < c->num_temps; j++) { 338 if (c->temp_start[j] == i) 339 reg_pressure++; 340 if (c->temp_end[j] == i) 341 reg_pressure--; 342 } 343 max_reg_pressure = MAX2(max_reg_pressure, reg_pressure); 344 } 345 346 fprintf(stderr, "SHADER-DB: %s prog %d/%d: %d max temps\n", 347 qir_get_stage_name(c->stage), 348 c->program_id, c->variant_id, 349 max_reg_pressure); 350 } 351} 352