1/* 2 * Copyright © 2012 Intel Corporation 3 * Copyright © 2016 Broadcom 4 * 5 * Permission is hereby granted, free of charge, to any person obtaining a 6 * copy of this software and associated documentation files (the "Software"), 7 * to deal in the Software without restriction, including without limitation 8 * the rights to use, copy, modify, merge, publish, distribute, sublicense, 9 * and/or sell copies of the Software, and to permit persons to whom the 10 * Software is furnished to do so, subject to the following conditions: 11 * 12 * The above copyright notice and this permission notice (including the next 13 * paragraph) shall be included in all copies or substantial portions of the 14 * Software. 15 * 16 * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR 17 * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, 18 * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL 19 * THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER 20 * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING 21 * FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS 22 * IN THE SOFTWARE. 23 */ 24 25#define MAX_INSTRUCTION (1 << 30) 26 27#include "util/ralloc.h" 28#include "util/register_allocate.h" 29#include "v3d_compiler.h" 30 31struct partial_update_state { 32 struct qinst *insts[4]; 33 uint8_t channels; 34}; 35 36static uint32_t 37int_hash(const void *key) 38{ 39 return _mesa_hash_data(key, sizeof(int)); 40} 41 42static bool 43int_compare(const void *key1, const void *key2) 44{ 45 return *(const int *)key1 == *(const int *)key2; 46} 47 48static int 49vir_reg_to_var(struct qreg reg) 50{ 51 if (reg.file == QFILE_TEMP) 52 return reg.index; 53 54 return -1; 55} 56 57static void 58vir_setup_use(struct v3d_compile *c, struct qblock *block, int ip, 59 struct qreg src) 60{ 61 int var = vir_reg_to_var(src); 62 if (var == -1) 63 return; 64 65 c->temp_start[var] = MIN2(c->temp_start[var], ip); 66 c->temp_end[var] = MAX2(c->temp_end[var], ip); 67 68 /* The use[] bitset marks when the block makes 69 * use of a variable without having completely 70 * defined that variable within the block. 71 */ 72 if (!BITSET_TEST(block->def, var)) 73 BITSET_SET(block->use, var); 74} 75 76static struct partial_update_state * 77get_partial_update_state(struct hash_table *partial_update_ht, 78 struct qinst *inst) 79{ 80 struct hash_entry *entry = 81 _mesa_hash_table_search(partial_update_ht, 82 &inst->dst.index); 83 if (entry) 84 return entry->data; 85 86 struct partial_update_state *state = 87 rzalloc(partial_update_ht, struct partial_update_state); 88 89 _mesa_hash_table_insert(partial_update_ht, &inst->dst.index, state); 90 91 return state; 92} 93 94static void 95vir_setup_def(struct v3d_compile *c, struct qblock *block, int ip, 96 struct hash_table *partial_update_ht, struct qinst *inst) 97{ 98 if (inst->qpu.type != V3D_QPU_INSTR_TYPE_ALU) 99 return; 100 101 /* The def[] bitset marks when an initialization in a 102 * block completely screens off previous updates of 103 * that variable. 104 */ 105 int var = vir_reg_to_var(inst->dst); 106 if (var == -1) 107 return; 108 109 c->temp_start[var] = MIN2(c->temp_start[var], ip); 110 c->temp_end[var] = MAX2(c->temp_end[var], ip); 111 112 /* Mark the block as having a (partial) def of the var. */ 113 BITSET_SET(block->defout, var); 114 115 /* If we've already tracked this as a def that screens off previous 116 * uses, or already used it within the block, there's nothing to do. 117 */ 118 if (BITSET_TEST(block->use, var) || BITSET_TEST(block->def, var)) 119 return; 120 121 /* Easy, common case: unconditional full register update.*/ 122 if ((inst->qpu.flags.ac == V3D_QPU_COND_NONE && 123 inst->qpu.flags.mc == V3D_QPU_COND_NONE) && 124 inst->qpu.alu.add.output_pack == V3D_QPU_PACK_NONE && 125 inst->qpu.alu.mul.output_pack == V3D_QPU_PACK_NONE) { 126 BITSET_SET(block->def, var); 127 return; 128 } 129 130 /* Finally, look at the condition code and packing and mark it as a 131 * def. We need to make sure that we understand sequences 132 * instructions like: 133 * 134 * mov.zs t0, t1 135 * mov.zc t0, t2 136 * 137 * or: 138 * 139 * mmov t0.8a, t1 140 * mmov t0.8b, t2 141 * mmov t0.8c, t3 142 * mmov t0.8d, t4 143 * 144 * as defining the temp within the block, because otherwise dst's live 145 * range will get extended up the control flow to the top of the 146 * program. 147 */ 148 struct partial_update_state *state = 149 get_partial_update_state(partial_update_ht, inst); 150 uint8_t mask = 0xf; /* XXX vir_channels_written(inst); */ 151 152 if (inst->qpu.flags.ac == V3D_QPU_COND_NONE && 153 inst->qpu.flags.mc == V3D_QPU_COND_NONE) { 154 state->channels |= mask; 155 } else { 156 for (int i = 0; i < 4; i++) { 157 if (!(mask & (1 << i))) 158 continue; 159 160 /* XXXif (state->insts[i] && 161 state->insts[i]->cond == 162 qpu_cond_complement(inst->cond)) 163 state->channels |= 1 << i; 164 else 165 */ 166 state->insts[i] = inst; 167 } 168 } 169 170 if (state->channels == 0xf) 171 BITSET_SET(block->def, var); 172} 173 174static void 175sf_state_clear(struct hash_table *partial_update_ht) 176{ 177 hash_table_foreach(partial_update_ht, entry) { 178 struct partial_update_state *state = entry->data; 179 180 for (int i = 0; i < 4; i++) { 181 if (state->insts[i] && 182 (state->insts[i]->qpu.flags.ac != V3D_QPU_COND_NONE || 183 state->insts[i]->qpu.flags.mc != V3D_QPU_COND_NONE)) 184 state->insts[i] = NULL; 185 } 186 } 187} 188 189/* Sets up the def/use arrays for when variables are used-before-defined or 190 * defined-before-used in the block. 191 * 192 * Also initializes the temp_start/temp_end to cover just the instruction IPs 193 * where the variable is used, which will be extended later in 194 * vir_compute_start_end(). 195 */ 196static void 197vir_setup_def_use(struct v3d_compile *c) 198{ 199 struct hash_table *partial_update_ht = 200 _mesa_hash_table_create(c, int_hash, int_compare); 201 int ip = 0; 202 203 vir_for_each_block(block, c) { 204 block->start_ip = ip; 205 206 _mesa_hash_table_clear(partial_update_ht, NULL); 207 208 vir_for_each_inst(inst, block) { 209 for (int i = 0; i < vir_get_nsrc(inst); i++) 210 vir_setup_use(c, block, ip, inst->src[i]); 211 212 vir_setup_def(c, block, ip, partial_update_ht, inst); 213 214 if (false /* XXX inst->uf */) 215 sf_state_clear(partial_update_ht); 216 217 /* Payload registers: r0/1/2 contain W, centroid W, 218 * and Z at program start. Register allocation will 219 * force their nodes to R0/1/2. 220 */ 221 if (inst->src[0].file == QFILE_REG) { 222 switch (inst->src[0].index) { 223 case 0: 224 case 1: 225 case 2: 226 c->temp_start[inst->dst.index] = 0; 227 break; 228 } 229 } 230 231 ip++; 232 } 233 block->end_ip = ip; 234 } 235 236 _mesa_hash_table_destroy(partial_update_ht, NULL); 237} 238 239static bool 240vir_live_variables_dataflow(struct v3d_compile *c, int bitset_words) 241{ 242 bool cont = false; 243 244 vir_for_each_block_rev(block, c) { 245 /* Update live_out: Any successor using the variable 246 * on entrance needs us to have the variable live on 247 * exit. 248 */ 249 vir_for_each_successor(succ, block) { 250 for (int i = 0; i < bitset_words; i++) { 251 BITSET_WORD new_live_out = (succ->live_in[i] & 252 ~block->live_out[i]); 253 if (new_live_out) { 254 block->live_out[i] |= new_live_out; 255 cont = true; 256 } 257 } 258 } 259 260 /* Update live_in */ 261 for (int i = 0; i < bitset_words; i++) { 262 BITSET_WORD new_live_in = (block->use[i] | 263 (block->live_out[i] & 264 ~block->def[i])); 265 if (new_live_in & ~block->live_in[i]) { 266 block->live_in[i] |= new_live_in; 267 cont = true; 268 } 269 } 270 } 271 272 return cont; 273} 274 275static bool 276vir_live_variables_defin_defout_dataflow(struct v3d_compile *c, int bitset_words) 277{ 278 bool cont = false; 279 280 vir_for_each_block_rev(block, c) { 281 /* Propagate defin/defout down the successors to produce the 282 * union of blocks with a reachable (partial) definition of 283 * the var. 284 * 285 * This keeps a conditional first write to a reg from 286 * extending its lifetime back to the start of the program. 287 */ 288 vir_for_each_successor(succ, block) { 289 for (int i = 0; i < bitset_words; i++) { 290 BITSET_WORD new_def = (block->defout[i] & 291 ~succ->defin[i]); 292 succ->defin[i] |= new_def; 293 succ->defout[i] |= new_def; 294 cont |= new_def; 295 } 296 } 297 } 298 299 return cont; 300} 301 302/** 303 * Extend the start/end ranges for each variable to account for the 304 * new information calculated from control flow. 305 */ 306static void 307vir_compute_start_end(struct v3d_compile *c, int num_vars) 308{ 309 vir_for_each_block(block, c) { 310 for (int i = 0; i < num_vars; i++) { 311 if (BITSET_TEST(block->live_in, i) && 312 BITSET_TEST(block->defin, i)) { 313 c->temp_start[i] = MIN2(c->temp_start[i], 314 block->start_ip); 315 c->temp_end[i] = MAX2(c->temp_end[i], 316 block->start_ip); 317 } 318 319 if (BITSET_TEST(block->live_out, i) && 320 BITSET_TEST(block->defout, i)) { 321 c->temp_start[i] = MIN2(c->temp_start[i], 322 block->end_ip); 323 c->temp_end[i] = MAX2(c->temp_end[i], 324 block->end_ip); 325 } 326 } 327 } 328} 329 330void 331vir_calculate_live_intervals(struct v3d_compile *c) 332{ 333 int bitset_words = BITSET_WORDS(c->num_temps); 334 335 /* We may be called more than once if we've rearranged the program to 336 * try to get register allocation to succeed. 337 */ 338 if (c->temp_start) { 339 ralloc_free(c->temp_start); 340 ralloc_free(c->temp_end); 341 342 vir_for_each_block(block, c) { 343 ralloc_free(block->def); 344 ralloc_free(block->use); 345 ralloc_free(block->live_in); 346 ralloc_free(block->live_out); 347 } 348 } 349 350 c->temp_start = rzalloc_array(c, int, c->num_temps); 351 c->temp_end = rzalloc_array(c, int, c->num_temps); 352 353 for (int i = 0; i < c->num_temps; i++) { 354 c->temp_start[i] = MAX_INSTRUCTION; 355 c->temp_end[i] = -1; 356 } 357 358 vir_for_each_block(block, c) { 359 block->def = rzalloc_array(c, BITSET_WORD, bitset_words); 360 block->defin = rzalloc_array(c, BITSET_WORD, bitset_words); 361 block->defout = rzalloc_array(c, BITSET_WORD, bitset_words); 362 block->use = rzalloc_array(c, BITSET_WORD, bitset_words); 363 block->live_in = rzalloc_array(c, BITSET_WORD, bitset_words); 364 block->live_out = rzalloc_array(c, BITSET_WORD, bitset_words); 365 } 366 367 vir_setup_def_use(c); 368 369 while (vir_live_variables_dataflow(c, bitset_words)) 370 ; 371 372 while (vir_live_variables_defin_defout_dataflow(c, bitset_words)) 373 ; 374 375 vir_compute_start_end(c, c->num_temps); 376 377 c->live_intervals_valid = true; 378} 379