101e04c3fSmrg/* 201e04c3fSmrg * Copyright © 2012 Intel Corporation 301e04c3fSmrg * Copyright © 2016 Broadcom 401e04c3fSmrg * 501e04c3fSmrg * Permission is hereby granted, free of charge, to any person obtaining a 601e04c3fSmrg * copy of this software and associated documentation files (the "Software"), 701e04c3fSmrg * to deal in the Software without restriction, including without limitation 801e04c3fSmrg * the rights to use, copy, modify, merge, publish, distribute, sublicense, 901e04c3fSmrg * and/or sell copies of the Software, and to permit persons to whom the 1001e04c3fSmrg * Software is furnished to do so, subject to the following conditions: 1101e04c3fSmrg * 1201e04c3fSmrg * The above copyright notice and this permission notice (including the next 1301e04c3fSmrg * paragraph) shall be included in all copies or substantial portions of the 1401e04c3fSmrg * Software. 1501e04c3fSmrg * 1601e04c3fSmrg * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR 1701e04c3fSmrg * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, 1801e04c3fSmrg * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL 1901e04c3fSmrg * THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER 2001e04c3fSmrg * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING 2101e04c3fSmrg * FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS 2201e04c3fSmrg * IN THE SOFTWARE. 2301e04c3fSmrg */ 2401e04c3fSmrg 2501e04c3fSmrg#define MAX_INSTRUCTION (1 << 30) 2601e04c3fSmrg 2701e04c3fSmrg#include "util/ralloc.h" 2801e04c3fSmrg#include "util/register_allocate.h" 2901e04c3fSmrg#include "v3d_compiler.h" 3001e04c3fSmrg 317ec681f3Smrg/* Keeps track of conditional / partial writes in a block */ 3201e04c3fSmrgstruct partial_update_state { 337ec681f3Smrg /* Instruction doing a conditional or partial write */ 347ec681f3Smrg struct qinst *inst; 357ec681f3Smrg /* Instruction that set the flags for the conditional write */ 367ec681f3Smrg struct qinst *flags_inst; 3701e04c3fSmrg}; 3801e04c3fSmrg 3901e04c3fSmrgstatic int 4001e04c3fSmrgvir_reg_to_var(struct qreg reg) 4101e04c3fSmrg{ 4201e04c3fSmrg if (reg.file == QFILE_TEMP) 4301e04c3fSmrg return reg.index; 4401e04c3fSmrg 4501e04c3fSmrg return -1; 4601e04c3fSmrg} 4701e04c3fSmrg 4801e04c3fSmrgstatic void 4901e04c3fSmrgvir_setup_use(struct v3d_compile *c, struct qblock *block, int ip, 507ec681f3Smrg struct partial_update_state *partial_update_ht, struct qinst *inst, 517ec681f3Smrg struct qreg src, struct qinst *flags_inst) 5201e04c3fSmrg{ 5301e04c3fSmrg int var = vir_reg_to_var(src); 5401e04c3fSmrg if (var == -1) 5501e04c3fSmrg return; 5601e04c3fSmrg 5701e04c3fSmrg c->temp_start[var] = MIN2(c->temp_start[var], ip); 5801e04c3fSmrg c->temp_end[var] = MAX2(c->temp_end[var], ip); 5901e04c3fSmrg 6001e04c3fSmrg /* The use[] bitset marks when the block makes 6101e04c3fSmrg * use of a variable without having completely 6201e04c3fSmrg * defined that variable within the block. 6301e04c3fSmrg */ 647ec681f3Smrg if (!BITSET_TEST(block->def, var)) { 657ec681f3Smrg /* If this use of var is conditional and the condition 667ec681f3Smrg * and flags match those of a previous instruction 677ec681f3Smrg * in the same block partially defining var then we 687ec681f3Smrg * consider var completely defined within the block. 697ec681f3Smrg */ 707ec681f3Smrg if (BITSET_TEST(block->defout, var)) { 717ec681f3Smrg struct partial_update_state *state = 727ec681f3Smrg &partial_update_ht[var]; 737ec681f3Smrg if (state->inst) { 747ec681f3Smrg if (vir_get_cond(inst) == vir_get_cond(state->inst) && 757ec681f3Smrg flags_inst == state->flags_inst) { 767ec681f3Smrg return; 777ec681f3Smrg } 787ec681f3Smrg } 797ec681f3Smrg } 8001e04c3fSmrg 817ec681f3Smrg BITSET_SET(block->use, var); 827ec681f3Smrg } 8301e04c3fSmrg} 8401e04c3fSmrg 857ec681f3Smrg/* The def[] bitset marks when an initialization in a 867ec681f3Smrg * block completely screens off previous updates of 877ec681f3Smrg * that variable. 887ec681f3Smrg */ 8901e04c3fSmrgstatic void 9001e04c3fSmrgvir_setup_def(struct v3d_compile *c, struct qblock *block, int ip, 917ec681f3Smrg struct partial_update_state *partial_update, struct qinst *inst, 927ec681f3Smrg struct qinst *flags_inst) 9301e04c3fSmrg{ 9401e04c3fSmrg if (inst->qpu.type != V3D_QPU_INSTR_TYPE_ALU) 9501e04c3fSmrg return; 9601e04c3fSmrg 9701e04c3fSmrg int var = vir_reg_to_var(inst->dst); 9801e04c3fSmrg if (var == -1) 9901e04c3fSmrg return; 10001e04c3fSmrg 10101e04c3fSmrg c->temp_start[var] = MIN2(c->temp_start[var], ip); 10201e04c3fSmrg c->temp_end[var] = MAX2(c->temp_end[var], ip); 10301e04c3fSmrg 104ed98bd31Smaya /* Mark the block as having a (partial) def of the var. */ 105ed98bd31Smaya BITSET_SET(block->defout, var); 106ed98bd31Smaya 107ed98bd31Smaya /* If we've already tracked this as a def that screens off previous 108ed98bd31Smaya * uses, or already used it within the block, there's nothing to do. 10901e04c3fSmrg */ 11001e04c3fSmrg if (BITSET_TEST(block->use, var) || BITSET_TEST(block->def, var)) 11101e04c3fSmrg return; 11201e04c3fSmrg 113ed98bd31Smaya /* Easy, common case: unconditional full register update.*/ 114ed98bd31Smaya if ((inst->qpu.flags.ac == V3D_QPU_COND_NONE && 115ed98bd31Smaya inst->qpu.flags.mc == V3D_QPU_COND_NONE) && 11601e04c3fSmrg inst->qpu.alu.add.output_pack == V3D_QPU_PACK_NONE && 11701e04c3fSmrg inst->qpu.alu.mul.output_pack == V3D_QPU_PACK_NONE) { 11801e04c3fSmrg BITSET_SET(block->def, var); 11901e04c3fSmrg return; 12001e04c3fSmrg } 12101e04c3fSmrg 1227ec681f3Smrg /* Keep track of conditional writes. 12301e04c3fSmrg * 1247ec681f3Smrg * Notice that the dst's live range for a conditional or partial writes 1257ec681f3Smrg * will get extended up the control flow to the top of the program until 1267ec681f3Smrg * we find a full write, making register allocation more difficult, so 1277ec681f3Smrg * we should try our best to keep track of these and figure out if a 1287ec681f3Smrg * combination of them actually writes the entire register so we can 1297ec681f3Smrg * stop that process early and reduce liveness. 13001e04c3fSmrg * 1317ec681f3Smrg * FIXME: Track partial updates via pack/unpack. 13201e04c3fSmrg */ 1337ec681f3Smrg struct partial_update_state *state = &partial_update[var]; 1347ec681f3Smrg if (inst->qpu.flags.ac != V3D_QPU_COND_NONE || 1357ec681f3Smrg inst->qpu.flags.mc != V3D_QPU_COND_NONE) { 1367ec681f3Smrg state->inst = inst; 1377ec681f3Smrg state->flags_inst = flags_inst; 13801e04c3fSmrg } 13901e04c3fSmrg} 14001e04c3fSmrg 14101e04c3fSmrg/* Sets up the def/use arrays for when variables are used-before-defined or 14201e04c3fSmrg * defined-before-used in the block. 14301e04c3fSmrg * 14401e04c3fSmrg * Also initializes the temp_start/temp_end to cover just the instruction IPs 14501e04c3fSmrg * where the variable is used, which will be extended later in 14601e04c3fSmrg * vir_compute_start_end(). 14701e04c3fSmrg */ 14801e04c3fSmrgstatic void 14901e04c3fSmrgvir_setup_def_use(struct v3d_compile *c) 15001e04c3fSmrg{ 1517ec681f3Smrg struct partial_update_state *partial_update = 1527ec681f3Smrg rzalloc_array(c, struct partial_update_state, c->num_temps); 15301e04c3fSmrg int ip = 0; 15401e04c3fSmrg 15501e04c3fSmrg vir_for_each_block(block, c) { 15601e04c3fSmrg block->start_ip = ip; 15701e04c3fSmrg 1587ec681f3Smrg memset(partial_update, 0, 1597ec681f3Smrg sizeof(struct partial_update_state) * c->num_temps); 1607ec681f3Smrg 1617ec681f3Smrg struct qinst *flags_inst = NULL; 16201e04c3fSmrg 16301e04c3fSmrg vir_for_each_inst(inst, block) { 1647ec681f3Smrg for (int i = 0; i < vir_get_nsrc(inst); i++) { 1657ec681f3Smrg vir_setup_use(c, block, ip, partial_update, 1667ec681f3Smrg inst, inst->src[i], flags_inst); 1677ec681f3Smrg } 1687ec681f3Smrg 1697ec681f3Smrg vir_setup_def(c, block, ip, partial_update, 1707ec681f3Smrg inst, flags_inst); 17101e04c3fSmrg 1727ec681f3Smrg if (inst->qpu.flags.apf != V3D_QPU_PF_NONE || 1737ec681f3Smrg inst->qpu.flags.mpf != V3D_QPU_PF_NONE) { 1747ec681f3Smrg flags_inst = inst; 1757ec681f3Smrg } 17601e04c3fSmrg 1777ec681f3Smrg if (inst->qpu.flags.auf != V3D_QPU_UF_NONE || 1787ec681f3Smrg inst->qpu.flags.muf != V3D_QPU_UF_NONE) { 1797ec681f3Smrg flags_inst = NULL; 1807ec681f3Smrg } 18101e04c3fSmrg 18201e04c3fSmrg /* Payload registers: r0/1/2 contain W, centroid W, 18301e04c3fSmrg * and Z at program start. Register allocation will 18401e04c3fSmrg * force their nodes to R0/1/2. 18501e04c3fSmrg */ 18601e04c3fSmrg if (inst->src[0].file == QFILE_REG) { 18701e04c3fSmrg switch (inst->src[0].index) { 18801e04c3fSmrg case 0: 18901e04c3fSmrg case 1: 19001e04c3fSmrg case 2: 19101e04c3fSmrg c->temp_start[inst->dst.index] = 0; 19201e04c3fSmrg break; 19301e04c3fSmrg } 19401e04c3fSmrg } 19501e04c3fSmrg 19601e04c3fSmrg ip++; 19701e04c3fSmrg } 19801e04c3fSmrg block->end_ip = ip; 19901e04c3fSmrg } 20001e04c3fSmrg 2017ec681f3Smrg ralloc_free(partial_update); 20201e04c3fSmrg} 20301e04c3fSmrg 20401e04c3fSmrgstatic bool 20501e04c3fSmrgvir_live_variables_dataflow(struct v3d_compile *c, int bitset_words) 20601e04c3fSmrg{ 20701e04c3fSmrg bool cont = false; 20801e04c3fSmrg 20901e04c3fSmrg vir_for_each_block_rev(block, c) { 21001e04c3fSmrg /* Update live_out: Any successor using the variable 21101e04c3fSmrg * on entrance needs us to have the variable live on 21201e04c3fSmrg * exit. 21301e04c3fSmrg */ 21401e04c3fSmrg vir_for_each_successor(succ, block) { 21501e04c3fSmrg for (int i = 0; i < bitset_words; i++) { 21601e04c3fSmrg BITSET_WORD new_live_out = (succ->live_in[i] & 21701e04c3fSmrg ~block->live_out[i]); 21801e04c3fSmrg if (new_live_out) { 21901e04c3fSmrg block->live_out[i] |= new_live_out; 22001e04c3fSmrg cont = true; 22101e04c3fSmrg } 22201e04c3fSmrg } 22301e04c3fSmrg } 22401e04c3fSmrg 22501e04c3fSmrg /* Update live_in */ 22601e04c3fSmrg for (int i = 0; i < bitset_words; i++) { 22701e04c3fSmrg BITSET_WORD new_live_in = (block->use[i] | 22801e04c3fSmrg (block->live_out[i] & 22901e04c3fSmrg ~block->def[i])); 23001e04c3fSmrg if (new_live_in & ~block->live_in[i]) { 23101e04c3fSmrg block->live_in[i] |= new_live_in; 23201e04c3fSmrg cont = true; 23301e04c3fSmrg } 23401e04c3fSmrg } 23501e04c3fSmrg } 23601e04c3fSmrg 23701e04c3fSmrg return cont; 23801e04c3fSmrg} 23901e04c3fSmrg 240ed98bd31Smayastatic bool 241ed98bd31Smayavir_live_variables_defin_defout_dataflow(struct v3d_compile *c, int bitset_words) 242ed98bd31Smaya{ 243ed98bd31Smaya bool cont = false; 244ed98bd31Smaya 245ed98bd31Smaya vir_for_each_block_rev(block, c) { 246ed98bd31Smaya /* Propagate defin/defout down the successors to produce the 247ed98bd31Smaya * union of blocks with a reachable (partial) definition of 248ed98bd31Smaya * the var. 249ed98bd31Smaya * 250ed98bd31Smaya * This keeps a conditional first write to a reg from 251ed98bd31Smaya * extending its lifetime back to the start of the program. 252ed98bd31Smaya */ 253ed98bd31Smaya vir_for_each_successor(succ, block) { 254ed98bd31Smaya for (int i = 0; i < bitset_words; i++) { 255ed98bd31Smaya BITSET_WORD new_def = (block->defout[i] & 256ed98bd31Smaya ~succ->defin[i]); 257ed98bd31Smaya succ->defin[i] |= new_def; 258ed98bd31Smaya succ->defout[i] |= new_def; 259ed98bd31Smaya cont |= new_def; 260ed98bd31Smaya } 261ed98bd31Smaya } 262ed98bd31Smaya } 263ed98bd31Smaya 264ed98bd31Smaya return cont; 265ed98bd31Smaya} 266ed98bd31Smaya 26701e04c3fSmrg/** 26801e04c3fSmrg * Extend the start/end ranges for each variable to account for the 26901e04c3fSmrg * new information calculated from control flow. 27001e04c3fSmrg */ 27101e04c3fSmrgstatic void 27201e04c3fSmrgvir_compute_start_end(struct v3d_compile *c, int num_vars) 27301e04c3fSmrg{ 27401e04c3fSmrg vir_for_each_block(block, c) { 27501e04c3fSmrg for (int i = 0; i < num_vars; i++) { 276ed98bd31Smaya if (BITSET_TEST(block->live_in, i) && 277ed98bd31Smaya BITSET_TEST(block->defin, i)) { 27801e04c3fSmrg c->temp_start[i] = MIN2(c->temp_start[i], 27901e04c3fSmrg block->start_ip); 28001e04c3fSmrg c->temp_end[i] = MAX2(c->temp_end[i], 28101e04c3fSmrg block->start_ip); 28201e04c3fSmrg } 28301e04c3fSmrg 284ed98bd31Smaya if (BITSET_TEST(block->live_out, i) && 285ed98bd31Smaya BITSET_TEST(block->defout, i)) { 28601e04c3fSmrg c->temp_start[i] = MIN2(c->temp_start[i], 28701e04c3fSmrg block->end_ip); 28801e04c3fSmrg c->temp_end[i] = MAX2(c->temp_end[i], 28901e04c3fSmrg block->end_ip); 29001e04c3fSmrg } 29101e04c3fSmrg } 29201e04c3fSmrg } 29301e04c3fSmrg} 29401e04c3fSmrg 29501e04c3fSmrgvoid 29601e04c3fSmrgvir_calculate_live_intervals(struct v3d_compile *c) 29701e04c3fSmrg{ 29801e04c3fSmrg int bitset_words = BITSET_WORDS(c->num_temps); 29901e04c3fSmrg 30001e04c3fSmrg /* We may be called more than once if we've rearranged the program to 30101e04c3fSmrg * try to get register allocation to succeed. 30201e04c3fSmrg */ 30301e04c3fSmrg if (c->temp_start) { 30401e04c3fSmrg ralloc_free(c->temp_start); 30501e04c3fSmrg ralloc_free(c->temp_end); 30601e04c3fSmrg 30701e04c3fSmrg vir_for_each_block(block, c) { 30801e04c3fSmrg ralloc_free(block->def); 30901e04c3fSmrg ralloc_free(block->use); 31001e04c3fSmrg ralloc_free(block->live_in); 31101e04c3fSmrg ralloc_free(block->live_out); 31201e04c3fSmrg } 31301e04c3fSmrg } 31401e04c3fSmrg 31501e04c3fSmrg c->temp_start = rzalloc_array(c, int, c->num_temps); 31601e04c3fSmrg c->temp_end = rzalloc_array(c, int, c->num_temps); 31701e04c3fSmrg 31801e04c3fSmrg for (int i = 0; i < c->num_temps; i++) { 31901e04c3fSmrg c->temp_start[i] = MAX_INSTRUCTION; 32001e04c3fSmrg c->temp_end[i] = -1; 32101e04c3fSmrg } 32201e04c3fSmrg 32301e04c3fSmrg vir_for_each_block(block, c) { 32401e04c3fSmrg block->def = rzalloc_array(c, BITSET_WORD, bitset_words); 325ed98bd31Smaya block->defin = rzalloc_array(c, BITSET_WORD, bitset_words); 326ed98bd31Smaya block->defout = rzalloc_array(c, BITSET_WORD, bitset_words); 32701e04c3fSmrg block->use = rzalloc_array(c, BITSET_WORD, bitset_words); 32801e04c3fSmrg block->live_in = rzalloc_array(c, BITSET_WORD, bitset_words); 32901e04c3fSmrg block->live_out = rzalloc_array(c, BITSET_WORD, bitset_words); 33001e04c3fSmrg } 33101e04c3fSmrg 33201e04c3fSmrg vir_setup_def_use(c); 33301e04c3fSmrg 33401e04c3fSmrg while (vir_live_variables_dataflow(c, bitset_words)) 33501e04c3fSmrg ; 33601e04c3fSmrg 337ed98bd31Smaya while (vir_live_variables_defin_defout_dataflow(c, bitset_words)) 338ed98bd31Smaya ; 339ed98bd31Smaya 34001e04c3fSmrg vir_compute_start_end(c, c->num_temps); 34101e04c3fSmrg 34201e04c3fSmrg c->live_intervals_valid = true; 34301e04c3fSmrg} 344