17ec681f3Smrg/* 27ec681f3Smrg * Copyright © 2018 Valve Corporation 37ec681f3Smrg * Copyright © 2018 Google 47ec681f3Smrg * 57ec681f3Smrg * Permission is hereby granted, free of charge, to any person obtaining a 67ec681f3Smrg * copy of this software and associated documentation files (the "Software"), 77ec681f3Smrg * to deal in the Software without restriction, including without limitation 87ec681f3Smrg * the rights to use, copy, modify, merge, publish, distribute, sublicense, 97ec681f3Smrg * and/or sell copies of the Software, and to permit persons to whom the 107ec681f3Smrg * Software is furnished to do so, subject to the following conditions: 117ec681f3Smrg * 127ec681f3Smrg * The above copyright notice and this permission notice (including the next 137ec681f3Smrg * paragraph) shall be included in all copies or substantial portions of the 147ec681f3Smrg * Software. 157ec681f3Smrg * 167ec681f3Smrg * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR 177ec681f3Smrg * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, 187ec681f3Smrg * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL 197ec681f3Smrg * THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER 207ec681f3Smrg * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING 217ec681f3Smrg * FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS 227ec681f3Smrg * IN THE SOFTWARE. 237ec681f3Smrg * 247ec681f3Smrg */ 257ec681f3Smrg 267ec681f3Smrg#include "aco_builder.h" 277ec681f3Smrg#include "aco_ir.h" 287ec681f3Smrg 297ec681f3Smrg#include "common/sid.h" 307ec681f3Smrg 317ec681f3Smrg#include <algorithm> 327ec681f3Smrg#include <cstring> 337ec681f3Smrg#include <map> 347ec681f3Smrg#include <set> 357ec681f3Smrg#include <stack> 367ec681f3Smrg#include <unordered_map> 377ec681f3Smrg#include <unordered_set> 387ec681f3Smrg#include <vector> 397ec681f3Smrg 407ec681f3Smrgnamespace std { 417ec681f3Smrgtemplate <> struct hash<aco::Temp> { 427ec681f3Smrg size_t operator()(aco::Temp temp) const noexcept 437ec681f3Smrg { 447ec681f3Smrg uint32_t v; 457ec681f3Smrg std::memcpy(&v, &temp, sizeof(temp)); 467ec681f3Smrg return std::hash<uint32_t>{}(v); 477ec681f3Smrg } 487ec681f3Smrg}; 497ec681f3Smrg} // namespace std 507ec681f3Smrg 517ec681f3Smrg/* 527ec681f3Smrg * Implements the spilling algorithm on SSA-form from 537ec681f3Smrg * "Register Spilling and Live-Range Splitting for SSA-Form Programs" 547ec681f3Smrg * by Matthias Braun and Sebastian Hack. 557ec681f3Smrg */ 567ec681f3Smrg 577ec681f3Smrgnamespace aco { 587ec681f3Smrg 597ec681f3Smrgnamespace { 607ec681f3Smrg 617ec681f3Smrgstruct remat_info { 627ec681f3Smrg Instruction* instr; 637ec681f3Smrg}; 647ec681f3Smrg 657ec681f3Smrgstruct spill_ctx { 667ec681f3Smrg RegisterDemand target_pressure; 677ec681f3Smrg Program* program; 687ec681f3Smrg std::vector<std::vector<RegisterDemand>> register_demand; 697ec681f3Smrg std::vector<std::map<Temp, Temp>> renames; 707ec681f3Smrg std::vector<std::unordered_map<Temp, uint32_t>> spills_entry; 717ec681f3Smrg std::vector<std::unordered_map<Temp, uint32_t>> spills_exit; 727ec681f3Smrg 737ec681f3Smrg std::vector<bool> processed; 747ec681f3Smrg std::stack<Block*, std::vector<Block*>> loop_header; 757ec681f3Smrg std::vector<std::unordered_map<Temp, std::pair<uint32_t, uint32_t>>> next_use_distances_start; 767ec681f3Smrg std::vector<std::unordered_map<Temp, std::pair<uint32_t, uint32_t>>> next_use_distances_end; 777ec681f3Smrg std::vector<std::vector<std::pair<Temp, uint32_t>>> local_next_use_distance; /* Working buffer */ 787ec681f3Smrg std::vector<std::pair<RegClass, std::unordered_set<uint32_t>>> interferences; 797ec681f3Smrg std::vector<std::vector<uint32_t>> affinities; 807ec681f3Smrg std::vector<bool> is_reloaded; 817ec681f3Smrg std::unordered_map<Temp, remat_info> remat; 827ec681f3Smrg std::set<Instruction*> unused_remats; 837ec681f3Smrg unsigned wave_size; 847ec681f3Smrg 857ec681f3Smrg spill_ctx(const RegisterDemand target_pressure_, Program* program_, 867ec681f3Smrg std::vector<std::vector<RegisterDemand>> register_demand_) 877ec681f3Smrg : target_pressure(target_pressure_), program(program_), 887ec681f3Smrg register_demand(std::move(register_demand_)), renames(program->blocks.size()), 897ec681f3Smrg spills_entry(program->blocks.size()), spills_exit(program->blocks.size()), 907ec681f3Smrg processed(program->blocks.size(), false), wave_size(program->wave_size) 917ec681f3Smrg {} 927ec681f3Smrg 937ec681f3Smrg void add_affinity(uint32_t first, uint32_t second) 947ec681f3Smrg { 957ec681f3Smrg unsigned found_first = affinities.size(); 967ec681f3Smrg unsigned found_second = affinities.size(); 977ec681f3Smrg for (unsigned i = 0; i < affinities.size(); i++) { 987ec681f3Smrg std::vector<uint32_t>& vec = affinities[i]; 997ec681f3Smrg for (uint32_t entry : vec) { 1007ec681f3Smrg if (entry == first) 1017ec681f3Smrg found_first = i; 1027ec681f3Smrg else if (entry == second) 1037ec681f3Smrg found_second = i; 1047ec681f3Smrg } 1057ec681f3Smrg } 1067ec681f3Smrg if (found_first == affinities.size() && found_second == affinities.size()) { 1077ec681f3Smrg affinities.emplace_back(std::vector<uint32_t>({first, second})); 1087ec681f3Smrg } else if (found_first < affinities.size() && found_second == affinities.size()) { 1097ec681f3Smrg affinities[found_first].push_back(second); 1107ec681f3Smrg } else if (found_second < affinities.size() && found_first == affinities.size()) { 1117ec681f3Smrg affinities[found_second].push_back(first); 1127ec681f3Smrg } else if (found_first != found_second) { 1137ec681f3Smrg /* merge second into first */ 1147ec681f3Smrg affinities[found_first].insert(affinities[found_first].end(), 1157ec681f3Smrg affinities[found_second].begin(), 1167ec681f3Smrg affinities[found_second].end()); 1177ec681f3Smrg affinities.erase(std::next(affinities.begin(), found_second)); 1187ec681f3Smrg } else { 1197ec681f3Smrg assert(found_first == found_second); 1207ec681f3Smrg } 1217ec681f3Smrg } 1227ec681f3Smrg 1237ec681f3Smrg void add_interference(uint32_t first, uint32_t second) 1247ec681f3Smrg { 1257ec681f3Smrg if (interferences[first].first.type() != interferences[second].first.type()) 1267ec681f3Smrg return; 1277ec681f3Smrg 1287ec681f3Smrg bool inserted = interferences[first].second.insert(second).second; 1297ec681f3Smrg if (inserted) 1307ec681f3Smrg interferences[second].second.insert(first); 1317ec681f3Smrg } 1327ec681f3Smrg 1337ec681f3Smrg uint32_t allocate_spill_id(RegClass rc) 1347ec681f3Smrg { 1357ec681f3Smrg interferences.emplace_back(rc, std::unordered_set<uint32_t>()); 1367ec681f3Smrg is_reloaded.push_back(false); 1377ec681f3Smrg return next_spill_id++; 1387ec681f3Smrg } 1397ec681f3Smrg 1407ec681f3Smrg uint32_t next_spill_id = 0; 1417ec681f3Smrg}; 1427ec681f3Smrg 1437ec681f3Smrgint32_t 1447ec681f3Smrgget_dominator(int idx_a, int idx_b, Program* program, bool is_linear) 1457ec681f3Smrg{ 1467ec681f3Smrg 1477ec681f3Smrg if (idx_a == -1) 1487ec681f3Smrg return idx_b; 1497ec681f3Smrg if (idx_b == -1) 1507ec681f3Smrg return idx_a; 1517ec681f3Smrg if (is_linear) { 1527ec681f3Smrg while (idx_a != idx_b) { 1537ec681f3Smrg if (idx_a > idx_b) 1547ec681f3Smrg idx_a = program->blocks[idx_a].linear_idom; 1557ec681f3Smrg else 1567ec681f3Smrg idx_b = program->blocks[idx_b].linear_idom; 1577ec681f3Smrg } 1587ec681f3Smrg } else { 1597ec681f3Smrg while (idx_a != idx_b) { 1607ec681f3Smrg if (idx_a > idx_b) 1617ec681f3Smrg idx_a = program->blocks[idx_a].logical_idom; 1627ec681f3Smrg else 1637ec681f3Smrg idx_b = program->blocks[idx_b].logical_idom; 1647ec681f3Smrg } 1657ec681f3Smrg } 1667ec681f3Smrg assert(idx_a != -1); 1677ec681f3Smrg return idx_a; 1687ec681f3Smrg} 1697ec681f3Smrg 1707ec681f3Smrgvoid 1717ec681f3Smrgnext_uses_per_block(spill_ctx& ctx, unsigned block_idx, uint32_t& worklist) 1727ec681f3Smrg{ 1737ec681f3Smrg Block* block = &ctx.program->blocks[block_idx]; 1747ec681f3Smrg ctx.next_use_distances_start[block_idx] = ctx.next_use_distances_end[block_idx]; 1757ec681f3Smrg auto& next_use_distances_start = ctx.next_use_distances_start[block_idx]; 1767ec681f3Smrg 1777ec681f3Smrg /* to compute the next use distance at the beginning of the block, we have to add the block's 1787ec681f3Smrg * size */ 1797ec681f3Smrg for (std::unordered_map<Temp, std::pair<uint32_t, uint32_t>>::iterator it = 1807ec681f3Smrg next_use_distances_start.begin(); 1817ec681f3Smrg it != next_use_distances_start.end(); ++it) 1827ec681f3Smrg it->second.second = it->second.second + block->instructions.size(); 1837ec681f3Smrg 1847ec681f3Smrg int idx = block->instructions.size() - 1; 1857ec681f3Smrg while (idx >= 0) { 1867ec681f3Smrg aco_ptr<Instruction>& instr = block->instructions[idx]; 1877ec681f3Smrg 1887ec681f3Smrg if (instr->opcode == aco_opcode::p_linear_phi || instr->opcode == aco_opcode::p_phi) 1897ec681f3Smrg break; 1907ec681f3Smrg 1917ec681f3Smrg for (const Definition& def : instr->definitions) { 1927ec681f3Smrg if (def.isTemp()) 1937ec681f3Smrg next_use_distances_start.erase(def.getTemp()); 1947ec681f3Smrg } 1957ec681f3Smrg 1967ec681f3Smrg for (const Operand& op : instr->operands) { 1977ec681f3Smrg /* omit exec mask */ 1987ec681f3Smrg if (op.isFixed() && op.physReg() == exec) 1997ec681f3Smrg continue; 2007ec681f3Smrg if (op.regClass().type() == RegType::vgpr && op.regClass().is_linear()) 2017ec681f3Smrg continue; 2027ec681f3Smrg if (op.isTemp()) 2037ec681f3Smrg next_use_distances_start[op.getTemp()] = {block_idx, idx}; 2047ec681f3Smrg } 2057ec681f3Smrg idx--; 2067ec681f3Smrg } 2077ec681f3Smrg 2087ec681f3Smrg assert(block_idx != 0 || next_use_distances_start.empty()); 2097ec681f3Smrg std::unordered_set<Temp> phi_defs; 2107ec681f3Smrg while (idx >= 0) { 2117ec681f3Smrg aco_ptr<Instruction>& instr = block->instructions[idx]; 2127ec681f3Smrg assert(instr->opcode == aco_opcode::p_linear_phi || instr->opcode == aco_opcode::p_phi); 2137ec681f3Smrg 2147ec681f3Smrg std::pair<uint32_t, uint32_t> distance{block_idx, 0}; 2157ec681f3Smrg 2167ec681f3Smrg auto it = instr->definitions[0].isTemp() ? next_use_distances_start.find(instr->definitions[0].getTemp()) 2177ec681f3Smrg : next_use_distances_start.end(); 2187ec681f3Smrg if (it != next_use_distances_start.end() && 2197ec681f3Smrg phi_defs.insert(instr->definitions[0].getTemp()).second) { 2207ec681f3Smrg distance = it->second; 2217ec681f3Smrg } 2227ec681f3Smrg 2237ec681f3Smrg for (unsigned i = 0; i < instr->operands.size(); i++) { 2247ec681f3Smrg unsigned pred_idx = 2257ec681f3Smrg instr->opcode == aco_opcode::p_phi ? block->logical_preds[i] : block->linear_preds[i]; 2267ec681f3Smrg if (instr->operands[i].isTemp()) { 2277ec681f3Smrg auto insert_result = ctx.next_use_distances_end[pred_idx].insert( 2287ec681f3Smrg std::make_pair(instr->operands[i].getTemp(), distance)); 2297ec681f3Smrg const bool inserted = insert_result.second; 2307ec681f3Smrg std::pair<uint32_t, uint32_t>& entry_distance = insert_result.first->second; 2317ec681f3Smrg if (inserted || entry_distance != distance) 2327ec681f3Smrg worklist = std::max(worklist, pred_idx + 1); 2337ec681f3Smrg entry_distance = distance; 2347ec681f3Smrg } 2357ec681f3Smrg } 2367ec681f3Smrg idx--; 2377ec681f3Smrg } 2387ec681f3Smrg 2397ec681f3Smrg /* all remaining live vars must be live-out at the predecessors */ 2407ec681f3Smrg for (std::pair<const Temp, std::pair<uint32_t, uint32_t>>& pair : next_use_distances_start) { 2417ec681f3Smrg Temp temp = pair.first; 2427ec681f3Smrg if (phi_defs.count(temp)) { 2437ec681f3Smrg continue; 2447ec681f3Smrg } 2457ec681f3Smrg uint32_t distance = pair.second.second; 2467ec681f3Smrg uint32_t dom = pair.second.first; 2477ec681f3Smrg std::vector<unsigned>& preds = temp.is_linear() ? block->linear_preds : block->logical_preds; 2487ec681f3Smrg for (unsigned pred_idx : preds) { 2497ec681f3Smrg if (ctx.program->blocks[pred_idx].loop_nest_depth > block->loop_nest_depth) 2507ec681f3Smrg distance += 0xFFFF; 2517ec681f3Smrg auto insert_result = ctx.next_use_distances_end[pred_idx].insert( 2527ec681f3Smrg std::make_pair(temp, std::pair<uint32_t, uint32_t>{})); 2537ec681f3Smrg const bool inserted = insert_result.second; 2547ec681f3Smrg std::pair<uint32_t, uint32_t>& entry_distance = insert_result.first->second; 2557ec681f3Smrg 2567ec681f3Smrg if (!inserted) { 2577ec681f3Smrg dom = get_dominator(dom, entry_distance.first, ctx.program, temp.is_linear()); 2587ec681f3Smrg distance = std::min(entry_distance.second, distance); 2597ec681f3Smrg } 2607ec681f3Smrg if (entry_distance != std::pair<uint32_t, uint32_t>{dom, distance}) { 2617ec681f3Smrg worklist = std::max(worklist, pred_idx + 1); 2627ec681f3Smrg entry_distance = {dom, distance}; 2637ec681f3Smrg } 2647ec681f3Smrg } 2657ec681f3Smrg } 2667ec681f3Smrg} 2677ec681f3Smrg 2687ec681f3Smrgvoid 2697ec681f3Smrgcompute_global_next_uses(spill_ctx& ctx) 2707ec681f3Smrg{ 2717ec681f3Smrg ctx.next_use_distances_start.resize(ctx.program->blocks.size()); 2727ec681f3Smrg ctx.next_use_distances_end.resize(ctx.program->blocks.size()); 2737ec681f3Smrg 2747ec681f3Smrg uint32_t worklist = ctx.program->blocks.size(); 2757ec681f3Smrg while (worklist) { 2767ec681f3Smrg unsigned block_idx = --worklist; 2777ec681f3Smrg next_uses_per_block(ctx, block_idx, worklist); 2787ec681f3Smrg } 2797ec681f3Smrg} 2807ec681f3Smrg 2817ec681f3Smrgbool 2827ec681f3Smrgshould_rematerialize(aco_ptr<Instruction>& instr) 2837ec681f3Smrg{ 2847ec681f3Smrg /* TODO: rematerialization is only supported for VOP1, SOP1 and PSEUDO */ 2857ec681f3Smrg if (instr->format != Format::VOP1 && instr->format != Format::SOP1 && 2867ec681f3Smrg instr->format != Format::PSEUDO && instr->format != Format::SOPK) 2877ec681f3Smrg return false; 2887ec681f3Smrg /* TODO: pseudo-instruction rematerialization is only supported for 2897ec681f3Smrg * p_create_vector/p_parallelcopy */ 2907ec681f3Smrg if (instr->isPseudo() && instr->opcode != aco_opcode::p_create_vector && 2917ec681f3Smrg instr->opcode != aco_opcode::p_parallelcopy) 2927ec681f3Smrg return false; 2937ec681f3Smrg if (instr->isSOPK() && instr->opcode != aco_opcode::s_movk_i32) 2947ec681f3Smrg return false; 2957ec681f3Smrg 2967ec681f3Smrg for (const Operand& op : instr->operands) { 2977ec681f3Smrg /* TODO: rematerialization using temporaries isn't yet supported */ 2987ec681f3Smrg if (!op.isConstant()) 2997ec681f3Smrg return false; 3007ec681f3Smrg } 3017ec681f3Smrg 3027ec681f3Smrg /* TODO: rematerialization with multiple definitions isn't yet supported */ 3037ec681f3Smrg if (instr->definitions.size() > 1) 3047ec681f3Smrg return false; 3057ec681f3Smrg 3067ec681f3Smrg return true; 3077ec681f3Smrg} 3087ec681f3Smrg 3097ec681f3Smrgaco_ptr<Instruction> 3107ec681f3Smrgdo_reload(spill_ctx& ctx, Temp tmp, Temp new_name, uint32_t spill_id) 3117ec681f3Smrg{ 3127ec681f3Smrg std::unordered_map<Temp, remat_info>::iterator remat = ctx.remat.find(tmp); 3137ec681f3Smrg if (remat != ctx.remat.end()) { 3147ec681f3Smrg Instruction* instr = remat->second.instr; 3157ec681f3Smrg assert((instr->isVOP1() || instr->isSOP1() || instr->isPseudo() || instr->isSOPK()) && 3167ec681f3Smrg "unsupported"); 3177ec681f3Smrg assert((instr->format != Format::PSEUDO || instr->opcode == aco_opcode::p_create_vector || 3187ec681f3Smrg instr->opcode == aco_opcode::p_parallelcopy) && 3197ec681f3Smrg "unsupported"); 3207ec681f3Smrg assert(instr->definitions.size() == 1 && "unsupported"); 3217ec681f3Smrg 3227ec681f3Smrg aco_ptr<Instruction> res; 3237ec681f3Smrg if (instr->isVOP1()) { 3247ec681f3Smrg res.reset(create_instruction<VOP1_instruction>( 3257ec681f3Smrg instr->opcode, instr->format, instr->operands.size(), instr->definitions.size())); 3267ec681f3Smrg } else if (instr->isSOP1()) { 3277ec681f3Smrg res.reset(create_instruction<SOP1_instruction>( 3287ec681f3Smrg instr->opcode, instr->format, instr->operands.size(), instr->definitions.size())); 3297ec681f3Smrg } else if (instr->isPseudo()) { 3307ec681f3Smrg res.reset(create_instruction<Pseudo_instruction>( 3317ec681f3Smrg instr->opcode, instr->format, instr->operands.size(), instr->definitions.size())); 3327ec681f3Smrg } else if (instr->isSOPK()) { 3337ec681f3Smrg res.reset(create_instruction<SOPK_instruction>( 3347ec681f3Smrg instr->opcode, instr->format, instr->operands.size(), instr->definitions.size())); 3357ec681f3Smrg res->sopk().imm = instr->sopk().imm; 3367ec681f3Smrg } 3377ec681f3Smrg for (unsigned i = 0; i < instr->operands.size(); i++) { 3387ec681f3Smrg res->operands[i] = instr->operands[i]; 3397ec681f3Smrg if (instr->operands[i].isTemp()) { 3407ec681f3Smrg assert(false && "unsupported"); 3417ec681f3Smrg if (ctx.remat.count(instr->operands[i].getTemp())) 3427ec681f3Smrg ctx.unused_remats.erase(ctx.remat[instr->operands[i].getTemp()].instr); 3437ec681f3Smrg } 3447ec681f3Smrg } 3457ec681f3Smrg res->definitions[0] = Definition(new_name); 3467ec681f3Smrg return res; 3477ec681f3Smrg } else { 3487ec681f3Smrg aco_ptr<Pseudo_instruction> reload{ 3497ec681f3Smrg create_instruction<Pseudo_instruction>(aco_opcode::p_reload, Format::PSEUDO, 1, 1)}; 3507ec681f3Smrg reload->operands[0] = Operand::c32(spill_id); 3517ec681f3Smrg reload->definitions[0] = Definition(new_name); 3527ec681f3Smrg ctx.is_reloaded[spill_id] = true; 3537ec681f3Smrg return reload; 3547ec681f3Smrg } 3557ec681f3Smrg} 3567ec681f3Smrg 3577ec681f3Smrgvoid 3587ec681f3Smrgget_rematerialize_info(spill_ctx& ctx) 3597ec681f3Smrg{ 3607ec681f3Smrg for (Block& block : ctx.program->blocks) { 3617ec681f3Smrg bool logical = false; 3627ec681f3Smrg for (aco_ptr<Instruction>& instr : block.instructions) { 3637ec681f3Smrg if (instr->opcode == aco_opcode::p_logical_start) 3647ec681f3Smrg logical = true; 3657ec681f3Smrg else if (instr->opcode == aco_opcode::p_logical_end) 3667ec681f3Smrg logical = false; 3677ec681f3Smrg if (logical && should_rematerialize(instr)) { 3687ec681f3Smrg for (const Definition& def : instr->definitions) { 3697ec681f3Smrg if (def.isTemp()) { 3707ec681f3Smrg ctx.remat[def.getTemp()] = remat_info{instr.get()}; 3717ec681f3Smrg ctx.unused_remats.insert(instr.get()); 3727ec681f3Smrg } 3737ec681f3Smrg } 3747ec681f3Smrg } 3757ec681f3Smrg } 3767ec681f3Smrg } 3777ec681f3Smrg} 3787ec681f3Smrg 3797ec681f3Smrgvoid 3807ec681f3Smrgupdate_local_next_uses(spill_ctx& ctx, Block* block, 3817ec681f3Smrg std::vector<std::vector<std::pair<Temp, uint32_t>>>& local_next_uses) 3827ec681f3Smrg{ 3837ec681f3Smrg if (local_next_uses.size() < block->instructions.size()) { 3847ec681f3Smrg /* Allocate more next-use-maps. Note that by never reducing the vector size, we enable 3857ec681f3Smrg * future calls to this function to re-use already allocated map memory. */ 3867ec681f3Smrg local_next_uses.resize(block->instructions.size()); 3877ec681f3Smrg } 3887ec681f3Smrg 3897ec681f3Smrg local_next_uses[block->instructions.size() - 1].clear(); 3907ec681f3Smrg for (std::pair<const Temp, std::pair<uint32_t, uint32_t>>& pair : 3917ec681f3Smrg ctx.next_use_distances_end[block->index]) { 3927ec681f3Smrg local_next_uses[block->instructions.size() - 1].push_back(std::make_pair<Temp, uint32_t>( 3937ec681f3Smrg (Temp)pair.first, pair.second.second + block->instructions.size())); 3947ec681f3Smrg } 3957ec681f3Smrg 3967ec681f3Smrg for (int idx = block->instructions.size() - 1; idx >= 0; idx--) { 3977ec681f3Smrg aco_ptr<Instruction>& instr = block->instructions[idx]; 3987ec681f3Smrg if (!instr) 3997ec681f3Smrg break; 4007ec681f3Smrg if (instr->opcode == aco_opcode::p_phi || instr->opcode == aco_opcode::p_linear_phi) 4017ec681f3Smrg break; 4027ec681f3Smrg 4037ec681f3Smrg if (idx != (int)block->instructions.size() - 1) { 4047ec681f3Smrg local_next_uses[idx] = local_next_uses[idx + 1]; 4057ec681f3Smrg } 4067ec681f3Smrg 4077ec681f3Smrg for (const Operand& op : instr->operands) { 4087ec681f3Smrg if (op.isFixed() && op.physReg() == exec) 4097ec681f3Smrg continue; 4107ec681f3Smrg if (op.regClass().type() == RegType::vgpr && op.regClass().is_linear()) 4117ec681f3Smrg continue; 4127ec681f3Smrg if (op.isTemp()) { 4137ec681f3Smrg auto it = std::find_if(local_next_uses[idx].begin(), local_next_uses[idx].end(), 4147ec681f3Smrg [op](auto& pair) { return pair.first == op.getTemp(); }); 4157ec681f3Smrg if (it == local_next_uses[idx].end()) { 4167ec681f3Smrg local_next_uses[idx].push_back(std::make_pair<Temp, uint32_t>(op.getTemp(), idx)); 4177ec681f3Smrg } else { 4187ec681f3Smrg it->second = idx; 4197ec681f3Smrg } 4207ec681f3Smrg } 4217ec681f3Smrg } 4227ec681f3Smrg for (const Definition& def : instr->definitions) { 4237ec681f3Smrg if (def.isTemp()) { 4247ec681f3Smrg auto it = std::find_if(local_next_uses[idx].begin(), local_next_uses[idx].end(), 4257ec681f3Smrg [def](auto& pair) { return pair.first == def.getTemp(); }); 4267ec681f3Smrg if (it != local_next_uses[idx].end()) { 4277ec681f3Smrg local_next_uses[idx].erase(it); 4287ec681f3Smrg } 4297ec681f3Smrg } 4307ec681f3Smrg } 4317ec681f3Smrg } 4327ec681f3Smrg} 4337ec681f3Smrg 4347ec681f3SmrgRegisterDemand 4357ec681f3Smrgget_demand_before(spill_ctx& ctx, unsigned block_idx, unsigned idx) 4367ec681f3Smrg{ 4377ec681f3Smrg if (idx == 0) { 4387ec681f3Smrg RegisterDemand demand = ctx.register_demand[block_idx][idx]; 4397ec681f3Smrg aco_ptr<Instruction>& instr = ctx.program->blocks[block_idx].instructions[idx]; 4407ec681f3Smrg aco_ptr<Instruction> instr_before(nullptr); 4417ec681f3Smrg return get_demand_before(demand, instr, instr_before); 4427ec681f3Smrg } else { 4437ec681f3Smrg return ctx.register_demand[block_idx][idx - 1]; 4447ec681f3Smrg } 4457ec681f3Smrg} 4467ec681f3Smrg 4477ec681f3SmrgRegisterDemand 4487ec681f3Smrgget_live_in_demand(spill_ctx& ctx, unsigned block_idx) 4497ec681f3Smrg{ 4507ec681f3Smrg unsigned idx = 0; 4517ec681f3Smrg RegisterDemand reg_pressure = RegisterDemand(); 4527ec681f3Smrg Block& block = ctx.program->blocks[block_idx]; 4537ec681f3Smrg for (aco_ptr<Instruction>& phi : block.instructions) { 4547ec681f3Smrg if (!is_phi(phi)) 4557ec681f3Smrg break; 4567ec681f3Smrg idx++; 4577ec681f3Smrg 4587ec681f3Smrg /* Killed phi definitions increase pressure in the predecessor but not 4597ec681f3Smrg * the block they're in. Since the loops below are both to control 4607ec681f3Smrg * pressure of the start of this block and the ends of it's 4617ec681f3Smrg * predecessors, we need to count killed unspilled phi definitions here. */ 4627ec681f3Smrg if (phi->definitions[0].isTemp() && phi->definitions[0].isKill() && 4637ec681f3Smrg !ctx.spills_entry[block_idx].count(phi->definitions[0].getTemp())) 4647ec681f3Smrg reg_pressure += phi->definitions[0].getTemp(); 4657ec681f3Smrg } 4667ec681f3Smrg 4677ec681f3Smrg reg_pressure += get_demand_before(ctx, block_idx, idx); 4687ec681f3Smrg 4697ec681f3Smrg /* Consider register pressure from linear predecessors. This can affect 4707ec681f3Smrg * reg_pressure if the branch instructions define sgprs. */ 4717ec681f3Smrg for (unsigned pred : block.linear_preds) 4727ec681f3Smrg reg_pressure.sgpr = 4737ec681f3Smrg std::max<int16_t>(reg_pressure.sgpr, ctx.register_demand[pred].back().sgpr); 4747ec681f3Smrg 4757ec681f3Smrg return reg_pressure; 4767ec681f3Smrg} 4777ec681f3Smrg 4787ec681f3SmrgRegisterDemand 4797ec681f3Smrginit_live_in_vars(spill_ctx& ctx, Block* block, unsigned block_idx) 4807ec681f3Smrg{ 4817ec681f3Smrg RegisterDemand spilled_registers; 4827ec681f3Smrg 4837ec681f3Smrg /* first block, nothing was spilled before */ 4847ec681f3Smrg if (block_idx == 0) 4857ec681f3Smrg return {0, 0}; 4867ec681f3Smrg 4877ec681f3Smrg /* next use distances at the beginning of the current block */ 4887ec681f3Smrg const auto& next_use_distances = ctx.next_use_distances_start[block_idx]; 4897ec681f3Smrg 4907ec681f3Smrg /* loop header block */ 4917ec681f3Smrg if (block->loop_nest_depth > ctx.program->blocks[block_idx - 1].loop_nest_depth) { 4927ec681f3Smrg assert(block->linear_preds[0] == block_idx - 1); 4937ec681f3Smrg assert(block->logical_preds[0] == block_idx - 1); 4947ec681f3Smrg 4957ec681f3Smrg /* create new loop_info */ 4967ec681f3Smrg ctx.loop_header.emplace(block); 4977ec681f3Smrg 4987ec681f3Smrg /* check how many live-through variables should be spilled */ 4997ec681f3Smrg RegisterDemand reg_pressure = get_live_in_demand(ctx, block_idx); 5007ec681f3Smrg RegisterDemand loop_demand = reg_pressure; 5017ec681f3Smrg unsigned i = block_idx; 5027ec681f3Smrg while (ctx.program->blocks[i].loop_nest_depth >= block->loop_nest_depth) { 5037ec681f3Smrg assert(ctx.program->blocks.size() > i); 5047ec681f3Smrg loop_demand.update(ctx.program->blocks[i++].register_demand); 5057ec681f3Smrg } 5067ec681f3Smrg unsigned loop_end = i; 5077ec681f3Smrg 5087ec681f3Smrg for (auto spilled : ctx.spills_exit[block_idx - 1]) { 5097ec681f3Smrg auto it = next_use_distances.find(spilled.first); 5107ec681f3Smrg 5117ec681f3Smrg /* variable is not live at loop entry: probably a phi operand */ 5127ec681f3Smrg if (it == next_use_distances.end()) 5137ec681f3Smrg continue; 5147ec681f3Smrg 5157ec681f3Smrg /* keep constants and live-through variables spilled */ 5167ec681f3Smrg if (it->second.first >= loop_end || ctx.remat.count(spilled.first)) { 5177ec681f3Smrg ctx.spills_entry[block_idx][spilled.first] = spilled.second; 5187ec681f3Smrg spilled_registers += spilled.first; 5197ec681f3Smrg loop_demand -= spilled.first; 5207ec681f3Smrg } 5217ec681f3Smrg } 5227ec681f3Smrg 5237ec681f3Smrg /* select live-through variables and constants */ 5247ec681f3Smrg RegType type = RegType::vgpr; 5257ec681f3Smrg while (loop_demand.exceeds(ctx.target_pressure)) { 5267ec681f3Smrg /* if VGPR demand is low enough, select SGPRs */ 5277ec681f3Smrg if (type == RegType::vgpr && loop_demand.vgpr <= ctx.target_pressure.vgpr) 5287ec681f3Smrg type = RegType::sgpr; 5297ec681f3Smrg /* if SGPR demand is low enough, break */ 5307ec681f3Smrg if (type == RegType::sgpr && loop_demand.sgpr <= ctx.target_pressure.sgpr) 5317ec681f3Smrg break; 5327ec681f3Smrg 5337ec681f3Smrg unsigned distance = 0; 5347ec681f3Smrg Temp to_spill; 5357ec681f3Smrg for (const std::pair<const Temp, std::pair<uint32_t, uint32_t>>& pair : 5367ec681f3Smrg next_use_distances) { 5377ec681f3Smrg if (pair.first.type() == type && 5387ec681f3Smrg (pair.second.first >= loop_end || 5397ec681f3Smrg (ctx.remat.count(pair.first) && type == RegType::sgpr)) && 5407ec681f3Smrg pair.second.second > distance && !ctx.spills_entry[block_idx].count(pair.first)) { 5417ec681f3Smrg to_spill = pair.first; 5427ec681f3Smrg distance = pair.second.second; 5437ec681f3Smrg } 5447ec681f3Smrg } 5457ec681f3Smrg 5467ec681f3Smrg /* select SGPRs or break */ 5477ec681f3Smrg if (distance == 0) { 5487ec681f3Smrg if (type == RegType::sgpr) 5497ec681f3Smrg break; 5507ec681f3Smrg type = RegType::sgpr; 5517ec681f3Smrg continue; 5527ec681f3Smrg } 5537ec681f3Smrg 5547ec681f3Smrg uint32_t spill_id; 5557ec681f3Smrg if (!ctx.spills_exit[block_idx - 1].count(to_spill)) { 5567ec681f3Smrg spill_id = ctx.allocate_spill_id(to_spill.regClass()); 5577ec681f3Smrg } else { 5587ec681f3Smrg spill_id = ctx.spills_exit[block_idx - 1][to_spill]; 5597ec681f3Smrg } 5607ec681f3Smrg 5617ec681f3Smrg ctx.spills_entry[block_idx][to_spill] = spill_id; 5627ec681f3Smrg spilled_registers += to_spill; 5637ec681f3Smrg loop_demand -= to_spill; 5647ec681f3Smrg } 5657ec681f3Smrg 5667ec681f3Smrg /* shortcut */ 5677ec681f3Smrg if (!loop_demand.exceeds(ctx.target_pressure)) 5687ec681f3Smrg return spilled_registers; 5697ec681f3Smrg 5707ec681f3Smrg /* if reg pressure is too high at beginning of loop, add variables with furthest use */ 5717ec681f3Smrg reg_pressure -= spilled_registers; 5727ec681f3Smrg 5737ec681f3Smrg while (reg_pressure.exceeds(ctx.target_pressure)) { 5747ec681f3Smrg unsigned distance = 0; 5757ec681f3Smrg Temp to_spill; 5767ec681f3Smrg type = reg_pressure.vgpr > ctx.target_pressure.vgpr ? RegType::vgpr : RegType::sgpr; 5777ec681f3Smrg 5787ec681f3Smrg for (const std::pair<const Temp, std::pair<uint32_t, uint32_t>>& pair : 5797ec681f3Smrg next_use_distances) { 5807ec681f3Smrg if (pair.first.type() == type && pair.second.second > distance && 5817ec681f3Smrg !ctx.spills_entry[block_idx].count(pair.first)) { 5827ec681f3Smrg to_spill = pair.first; 5837ec681f3Smrg distance = pair.second.second; 5847ec681f3Smrg } 5857ec681f3Smrg } 5867ec681f3Smrg assert(distance != 0); 5877ec681f3Smrg ctx.spills_entry[block_idx][to_spill] = ctx.allocate_spill_id(to_spill.regClass()); 5887ec681f3Smrg spilled_registers += to_spill; 5897ec681f3Smrg reg_pressure -= to_spill; 5907ec681f3Smrg } 5917ec681f3Smrg 5927ec681f3Smrg return spilled_registers; 5937ec681f3Smrg } 5947ec681f3Smrg 5957ec681f3Smrg /* branch block */ 5967ec681f3Smrg if (block->linear_preds.size() == 1 && !(block->kind & block_kind_loop_exit)) { 5977ec681f3Smrg /* keep variables spilled if they are alive and not used in the current block */ 5987ec681f3Smrg unsigned pred_idx = block->linear_preds[0]; 5997ec681f3Smrg for (std::pair<Temp, uint32_t> pair : ctx.spills_exit[pred_idx]) { 6007ec681f3Smrg if (pair.first.type() != RegType::sgpr) { 6017ec681f3Smrg continue; 6027ec681f3Smrg } 6037ec681f3Smrg auto next_use_distance_it = next_use_distances.find(pair.first); 6047ec681f3Smrg if (next_use_distance_it != next_use_distances.end() && 6057ec681f3Smrg next_use_distance_it->second.first != block_idx) { 6067ec681f3Smrg ctx.spills_entry[block_idx].insert(pair); 6077ec681f3Smrg spilled_registers.sgpr += pair.first.size(); 6087ec681f3Smrg } 6097ec681f3Smrg } 6107ec681f3Smrg if (block->logical_preds.size() == 1) { 6117ec681f3Smrg pred_idx = block->logical_preds[0]; 6127ec681f3Smrg for (std::pair<Temp, uint32_t> pair : ctx.spills_exit[pred_idx]) { 6137ec681f3Smrg if (pair.first.type() != RegType::vgpr) { 6147ec681f3Smrg continue; 6157ec681f3Smrg } 6167ec681f3Smrg auto next_use_distance_it = next_use_distances.find(pair.first); 6177ec681f3Smrg if (next_use_distance_it != next_use_distances.end() && 6187ec681f3Smrg next_use_distance_it->second.first != block_idx) { 6197ec681f3Smrg ctx.spills_entry[block_idx].insert(pair); 6207ec681f3Smrg spilled_registers.vgpr += pair.first.size(); 6217ec681f3Smrg } 6227ec681f3Smrg } 6237ec681f3Smrg } 6247ec681f3Smrg 6257ec681f3Smrg /* if register demand is still too high, we just keep all spilled live vars 6267ec681f3Smrg * and process the block */ 6277ec681f3Smrg if (block->register_demand.sgpr - spilled_registers.sgpr > ctx.target_pressure.sgpr) { 6287ec681f3Smrg pred_idx = block->linear_preds[0]; 6297ec681f3Smrg for (std::pair<Temp, uint32_t> pair : ctx.spills_exit[pred_idx]) { 6307ec681f3Smrg if (pair.first.type() == RegType::sgpr && next_use_distances.count(pair.first) && 6317ec681f3Smrg ctx.spills_entry[block_idx].insert(pair).second) { 6327ec681f3Smrg spilled_registers.sgpr += pair.first.size(); 6337ec681f3Smrg } 6347ec681f3Smrg } 6357ec681f3Smrg } 6367ec681f3Smrg if (block->register_demand.vgpr - spilled_registers.vgpr > ctx.target_pressure.vgpr && 6377ec681f3Smrg block->logical_preds.size() == 1) { 6387ec681f3Smrg pred_idx = block->logical_preds[0]; 6397ec681f3Smrg for (std::pair<Temp, uint32_t> pair : ctx.spills_exit[pred_idx]) { 6407ec681f3Smrg if (pair.first.type() == RegType::vgpr && next_use_distances.count(pair.first) && 6417ec681f3Smrg ctx.spills_entry[block_idx].insert(pair).second) { 6427ec681f3Smrg spilled_registers.vgpr += pair.first.size(); 6437ec681f3Smrg } 6447ec681f3Smrg } 6457ec681f3Smrg } 6467ec681f3Smrg 6477ec681f3Smrg return spilled_registers; 6487ec681f3Smrg } 6497ec681f3Smrg 6507ec681f3Smrg /* else: merge block */ 6517ec681f3Smrg std::set<Temp> partial_spills; 6527ec681f3Smrg 6537ec681f3Smrg /* keep variables spilled on all incoming paths */ 6547ec681f3Smrg for (const std::pair<const Temp, std::pair<uint32_t, uint32_t>>& pair : next_use_distances) { 6557ec681f3Smrg std::vector<unsigned>& preds = 6567ec681f3Smrg pair.first.is_linear() ? block->linear_preds : block->logical_preds; 6577ec681f3Smrg /* If it can be rematerialized, keep the variable spilled if all predecessors do not reload 6587ec681f3Smrg * it. Otherwise, if any predecessor reloads it, ensure it's reloaded on all other 6597ec681f3Smrg * predecessors. The idea is that it's better in practice to rematerialize redundantly than to 6607ec681f3Smrg * create lots of phis. */ 6617ec681f3Smrg /* TODO: test this idea with more than Dawn of War III shaders (the current pipeline-db 6627ec681f3Smrg * doesn't seem to exercise this path much) */ 6637ec681f3Smrg bool remat = ctx.remat.count(pair.first); 6647ec681f3Smrg bool spill = !remat; 6657ec681f3Smrg uint32_t spill_id = 0; 6667ec681f3Smrg for (unsigned pred_idx : preds) { 6677ec681f3Smrg /* variable is not even live at the predecessor: probably from a phi */ 6687ec681f3Smrg if (!ctx.next_use_distances_end[pred_idx].count(pair.first)) { 6697ec681f3Smrg spill = false; 6707ec681f3Smrg break; 6717ec681f3Smrg } 6727ec681f3Smrg if (!ctx.spills_exit[pred_idx].count(pair.first)) { 6737ec681f3Smrg if (!remat) 6747ec681f3Smrg spill = false; 6757ec681f3Smrg } else { 6767ec681f3Smrg partial_spills.insert(pair.first); 6777ec681f3Smrg /* it might be that on one incoming path, the variable has a different spill_id, but 6787ec681f3Smrg * add_couple_code() will take care of that. */ 6797ec681f3Smrg spill_id = ctx.spills_exit[pred_idx][pair.first]; 6807ec681f3Smrg if (remat) 6817ec681f3Smrg spill = true; 6827ec681f3Smrg } 6837ec681f3Smrg } 6847ec681f3Smrg if (spill) { 6857ec681f3Smrg ctx.spills_entry[block_idx][pair.first] = spill_id; 6867ec681f3Smrg partial_spills.erase(pair.first); 6877ec681f3Smrg spilled_registers += pair.first; 6887ec681f3Smrg } 6897ec681f3Smrg } 6907ec681f3Smrg 6917ec681f3Smrg /* same for phis */ 6927ec681f3Smrg for (aco_ptr<Instruction>& phi : block->instructions) { 6937ec681f3Smrg if (!is_phi(phi)) 6947ec681f3Smrg break; 6957ec681f3Smrg if (!phi->definitions[0].isTemp()) 6967ec681f3Smrg continue; 6977ec681f3Smrg 6987ec681f3Smrg std::vector<unsigned>& preds = 6997ec681f3Smrg phi->opcode == aco_opcode::p_phi ? block->logical_preds : block->linear_preds; 7007ec681f3Smrg bool spill = true; 7017ec681f3Smrg 7027ec681f3Smrg for (unsigned i = 0; i < phi->operands.size(); i++) { 7037ec681f3Smrg /* non-temp operands can increase the register pressure */ 7047ec681f3Smrg if (!phi->operands[i].isTemp()) { 7057ec681f3Smrg partial_spills.insert(phi->definitions[0].getTemp()); 7067ec681f3Smrg continue; 7077ec681f3Smrg } 7087ec681f3Smrg 7097ec681f3Smrg if (!ctx.spills_exit[preds[i]].count(phi->operands[i].getTemp())) 7107ec681f3Smrg spill = false; 7117ec681f3Smrg else 7127ec681f3Smrg partial_spills.insert(phi->definitions[0].getTemp()); 7137ec681f3Smrg } 7147ec681f3Smrg if (spill) { 7157ec681f3Smrg ctx.spills_entry[block_idx][phi->definitions[0].getTemp()] = 7167ec681f3Smrg ctx.allocate_spill_id(phi->definitions[0].regClass()); 7177ec681f3Smrg partial_spills.erase(phi->definitions[0].getTemp()); 7187ec681f3Smrg spilled_registers += phi->definitions[0].getTemp(); 7197ec681f3Smrg } 7207ec681f3Smrg } 7217ec681f3Smrg 7227ec681f3Smrg /* if reg pressure at first instruction is still too high, add partially spilled variables */ 7237ec681f3Smrg RegisterDemand reg_pressure = get_live_in_demand(ctx, block_idx); 7247ec681f3Smrg reg_pressure -= spilled_registers; 7257ec681f3Smrg 7267ec681f3Smrg while (reg_pressure.exceeds(ctx.target_pressure)) { 7277ec681f3Smrg assert(!partial_spills.empty()); 7287ec681f3Smrg std::set<Temp>::iterator it = partial_spills.begin(); 7297ec681f3Smrg Temp to_spill = Temp(); 7307ec681f3Smrg unsigned distance = 0; 7317ec681f3Smrg RegType type = reg_pressure.vgpr > ctx.target_pressure.vgpr ? RegType::vgpr : RegType::sgpr; 7327ec681f3Smrg 7337ec681f3Smrg while (it != partial_spills.end()) { 7347ec681f3Smrg assert(!ctx.spills_entry[block_idx].count(*it)); 7357ec681f3Smrg 7367ec681f3Smrg if (it->type() == type && next_use_distances.at(*it).second > distance) { 7377ec681f3Smrg distance = next_use_distances.at(*it).second; 7387ec681f3Smrg to_spill = *it; 7397ec681f3Smrg } 7407ec681f3Smrg ++it; 7417ec681f3Smrg } 7427ec681f3Smrg assert(distance != 0); 7437ec681f3Smrg 7447ec681f3Smrg ctx.spills_entry[block_idx][to_spill] = ctx.allocate_spill_id(to_spill.regClass()); 7457ec681f3Smrg partial_spills.erase(to_spill); 7467ec681f3Smrg spilled_registers += to_spill; 7477ec681f3Smrg reg_pressure -= to_spill; 7487ec681f3Smrg } 7497ec681f3Smrg 7507ec681f3Smrg return spilled_registers; 7517ec681f3Smrg} 7527ec681f3Smrg 7537ec681f3Smrgvoid 7547ec681f3Smrgadd_coupling_code(spill_ctx& ctx, Block* block, unsigned block_idx) 7557ec681f3Smrg{ 7567ec681f3Smrg /* no coupling code necessary */ 7577ec681f3Smrg if (block->linear_preds.size() == 0) 7587ec681f3Smrg return; 7597ec681f3Smrg 7607ec681f3Smrg std::vector<aco_ptr<Instruction>> instructions; 7617ec681f3Smrg /* branch block: TODO take other branch into consideration */ 7627ec681f3Smrg if (block->linear_preds.size() == 1 && 7637ec681f3Smrg !(block->kind & (block_kind_loop_exit | block_kind_loop_header))) { 7647ec681f3Smrg assert(ctx.processed[block->linear_preds[0]]); 7657ec681f3Smrg assert(ctx.register_demand[block_idx].size() == block->instructions.size()); 7667ec681f3Smrg std::vector<RegisterDemand> reg_demand; 7677ec681f3Smrg unsigned insert_idx = 0; 7687ec681f3Smrg RegisterDemand demand_before = get_demand_before(ctx, block_idx, 0); 7697ec681f3Smrg 7707ec681f3Smrg for (std::pair<const Temp, std::pair<uint32_t, uint32_t>>& live : 7717ec681f3Smrg ctx.next_use_distances_start[block_idx]) { 7727ec681f3Smrg const unsigned pred_idx = block->linear_preds[0]; 7737ec681f3Smrg 7747ec681f3Smrg if (!live.first.is_linear()) 7757ec681f3Smrg continue; 7767ec681f3Smrg /* still spilled */ 7777ec681f3Smrg if (ctx.spills_entry[block_idx].count(live.first)) 7787ec681f3Smrg continue; 7797ec681f3Smrg 7807ec681f3Smrg /* in register at end of predecessor */ 7817ec681f3Smrg auto spills_exit_it = ctx.spills_exit[pred_idx].find(live.first); 7827ec681f3Smrg if (spills_exit_it == ctx.spills_exit[pred_idx].end()) { 7837ec681f3Smrg std::map<Temp, Temp>::iterator it = ctx.renames[pred_idx].find(live.first); 7847ec681f3Smrg if (it != ctx.renames[pred_idx].end()) 7857ec681f3Smrg ctx.renames[block_idx].insert(*it); 7867ec681f3Smrg continue; 7877ec681f3Smrg } 7887ec681f3Smrg 7897ec681f3Smrg /* variable is spilled at predecessor and live at current block: create reload instruction */ 7907ec681f3Smrg Temp new_name = ctx.program->allocateTmp(live.first.regClass()); 7917ec681f3Smrg aco_ptr<Instruction> reload = do_reload(ctx, live.first, new_name, spills_exit_it->second); 7927ec681f3Smrg instructions.emplace_back(std::move(reload)); 7937ec681f3Smrg reg_demand.push_back(demand_before); 7947ec681f3Smrg ctx.renames[block_idx][live.first] = new_name; 7957ec681f3Smrg } 7967ec681f3Smrg 7977ec681f3Smrg if (block->logical_preds.size() == 1) { 7987ec681f3Smrg do { 7997ec681f3Smrg assert(insert_idx < block->instructions.size()); 8007ec681f3Smrg instructions.emplace_back(std::move(block->instructions[insert_idx])); 8017ec681f3Smrg reg_demand.push_back(ctx.register_demand[block_idx][insert_idx]); 8027ec681f3Smrg insert_idx++; 8037ec681f3Smrg } while (instructions.back()->opcode != aco_opcode::p_logical_start); 8047ec681f3Smrg 8057ec681f3Smrg unsigned pred_idx = block->logical_preds[0]; 8067ec681f3Smrg for (std::pair<const Temp, std::pair<uint32_t, uint32_t>>& live : 8077ec681f3Smrg ctx.next_use_distances_start[block_idx]) { 8087ec681f3Smrg if (live.first.is_linear()) 8097ec681f3Smrg continue; 8107ec681f3Smrg /* still spilled */ 8117ec681f3Smrg if (ctx.spills_entry[block_idx].count(live.first)) 8127ec681f3Smrg continue; 8137ec681f3Smrg 8147ec681f3Smrg /* in register at end of predecessor */ 8157ec681f3Smrg auto spills_exit_it = ctx.spills_exit[pred_idx].find(live.first); 8167ec681f3Smrg if (spills_exit_it == ctx.spills_exit[pred_idx].end()) { 8177ec681f3Smrg std::map<Temp, Temp>::iterator it = ctx.renames[pred_idx].find(live.first); 8187ec681f3Smrg if (it != ctx.renames[pred_idx].end()) 8197ec681f3Smrg ctx.renames[block_idx].insert(*it); 8207ec681f3Smrg continue; 8217ec681f3Smrg } 8227ec681f3Smrg 8237ec681f3Smrg /* variable is spilled at predecessor and live at current block: 8247ec681f3Smrg * create reload instruction */ 8257ec681f3Smrg Temp new_name = ctx.program->allocateTmp(live.first.regClass()); 8267ec681f3Smrg aco_ptr<Instruction> reload = 8277ec681f3Smrg do_reload(ctx, live.first, new_name, spills_exit_it->second); 8287ec681f3Smrg instructions.emplace_back(std::move(reload)); 8297ec681f3Smrg reg_demand.emplace_back(reg_demand.back()); 8307ec681f3Smrg ctx.renames[block_idx][live.first] = new_name; 8317ec681f3Smrg } 8327ec681f3Smrg } 8337ec681f3Smrg 8347ec681f3Smrg /* combine new reload instructions with original block */ 8357ec681f3Smrg if (!instructions.empty()) { 8367ec681f3Smrg reg_demand.insert(reg_demand.end(), 8377ec681f3Smrg std::next(ctx.register_demand[block->index].begin(), insert_idx), 8387ec681f3Smrg ctx.register_demand[block->index].end()); 8397ec681f3Smrg ctx.register_demand[block_idx] = std::move(reg_demand); 8407ec681f3Smrg instructions.insert(instructions.end(), 8417ec681f3Smrg std::move_iterator<std::vector<aco_ptr<Instruction>>::iterator>( 8427ec681f3Smrg std::next(block->instructions.begin(), insert_idx)), 8437ec681f3Smrg std::move_iterator<std::vector<aco_ptr<Instruction>>::iterator>( 8447ec681f3Smrg block->instructions.end())); 8457ec681f3Smrg block->instructions = std::move(instructions); 8467ec681f3Smrg } 8477ec681f3Smrg return; 8487ec681f3Smrg } 8497ec681f3Smrg 8507ec681f3Smrg /* loop header and merge blocks: check if all (linear) predecessors have been processed */ 8517ec681f3Smrg for (ASSERTED unsigned pred : block->linear_preds) 8527ec681f3Smrg assert(ctx.processed[pred]); 8537ec681f3Smrg 8547ec681f3Smrg /* iterate the phi nodes for which operands to spill at the predecessor */ 8557ec681f3Smrg for (aco_ptr<Instruction>& phi : block->instructions) { 8567ec681f3Smrg if (!is_phi(phi)) 8577ec681f3Smrg break; 8587ec681f3Smrg 8597ec681f3Smrg /* if the phi is not spilled, add to instructions */ 8607ec681f3Smrg if (!phi->definitions[0].isTemp() || 8617ec681f3Smrg !ctx.spills_entry[block_idx].count(phi->definitions[0].getTemp())) { 8627ec681f3Smrg instructions.emplace_back(std::move(phi)); 8637ec681f3Smrg continue; 8647ec681f3Smrg } 8657ec681f3Smrg 8667ec681f3Smrg std::vector<unsigned>& preds = 8677ec681f3Smrg phi->opcode == aco_opcode::p_phi ? block->logical_preds : block->linear_preds; 8687ec681f3Smrg uint32_t def_spill_id = ctx.spills_entry[block_idx][phi->definitions[0].getTemp()]; 8697ec681f3Smrg 8707ec681f3Smrg for (unsigned i = 0; i < phi->operands.size(); i++) { 8717ec681f3Smrg if (phi->operands[i].isUndefined()) 8727ec681f3Smrg continue; 8737ec681f3Smrg 8747ec681f3Smrg unsigned pred_idx = preds[i]; 8757ec681f3Smrg Operand spill_op = phi->operands[i]; 8767ec681f3Smrg 8777ec681f3Smrg if (spill_op.isTemp()) { 8787ec681f3Smrg assert(phi->operands[i].isKill()); 8797ec681f3Smrg Temp var = phi->operands[i].getTemp(); 8807ec681f3Smrg 8817ec681f3Smrg std::map<Temp, Temp>::iterator rename_it = ctx.renames[pred_idx].find(var); 8827ec681f3Smrg /* prevent the definining instruction from being DCE'd if it could be rematerialized */ 8837ec681f3Smrg if (rename_it == ctx.renames[preds[i]].end() && ctx.remat.count(var)) 8847ec681f3Smrg ctx.unused_remats.erase(ctx.remat[var].instr); 8857ec681f3Smrg 8867ec681f3Smrg /* check if variable is already spilled at predecessor */ 8877ec681f3Smrg auto spilled = ctx.spills_exit[pred_idx].find(var); 8887ec681f3Smrg if (spilled != ctx.spills_exit[pred_idx].end()) { 8897ec681f3Smrg if (spilled->second != def_spill_id) 8907ec681f3Smrg ctx.add_affinity(def_spill_id, spilled->second); 8917ec681f3Smrg continue; 8927ec681f3Smrg } 8937ec681f3Smrg 8947ec681f3Smrg /* rename if necessary */ 8957ec681f3Smrg if (rename_it != ctx.renames[pred_idx].end()) { 8967ec681f3Smrg spill_op.setTemp(rename_it->second); 8977ec681f3Smrg ctx.renames[pred_idx].erase(rename_it); 8987ec681f3Smrg } 8997ec681f3Smrg } 9007ec681f3Smrg 9017ec681f3Smrg uint32_t spill_id = ctx.allocate_spill_id(phi->definitions[0].regClass()); 9027ec681f3Smrg 9037ec681f3Smrg /* add interferences and affinity */ 9047ec681f3Smrg for (std::pair<Temp, uint32_t> pair : ctx.spills_exit[pred_idx]) 9057ec681f3Smrg ctx.add_interference(spill_id, pair.second); 9067ec681f3Smrg ctx.add_affinity(def_spill_id, spill_id); 9077ec681f3Smrg 9087ec681f3Smrg aco_ptr<Pseudo_instruction> spill{ 9097ec681f3Smrg create_instruction<Pseudo_instruction>(aco_opcode::p_spill, Format::PSEUDO, 2, 0)}; 9107ec681f3Smrg spill->operands[0] = spill_op; 9117ec681f3Smrg spill->operands[1] = Operand::c32(spill_id); 9127ec681f3Smrg Block& pred = ctx.program->blocks[pred_idx]; 9137ec681f3Smrg unsigned idx = pred.instructions.size(); 9147ec681f3Smrg do { 9157ec681f3Smrg assert(idx != 0); 9167ec681f3Smrg idx--; 9177ec681f3Smrg } while (phi->opcode == aco_opcode::p_phi && 9187ec681f3Smrg pred.instructions[idx]->opcode != aco_opcode::p_logical_end); 9197ec681f3Smrg std::vector<aco_ptr<Instruction>>::iterator it = std::next(pred.instructions.begin(), idx); 9207ec681f3Smrg pred.instructions.insert(it, std::move(spill)); 9217ec681f3Smrg if (spill_op.isTemp()) 9227ec681f3Smrg ctx.spills_exit[pred_idx][spill_op.getTemp()] = spill_id; 9237ec681f3Smrg } 9247ec681f3Smrg 9257ec681f3Smrg /* remove phi from instructions */ 9267ec681f3Smrg phi.reset(); 9277ec681f3Smrg } 9287ec681f3Smrg 9297ec681f3Smrg /* iterate all (other) spilled variables for which to spill at the predecessor */ 9307ec681f3Smrg // TODO: would be better to have them sorted: first vgprs and first with longest distance 9317ec681f3Smrg for (std::pair<Temp, uint32_t> pair : ctx.spills_entry[block_idx]) { 9327ec681f3Smrg std::vector<unsigned> preds = 9337ec681f3Smrg pair.first.is_linear() ? block->linear_preds : block->logical_preds; 9347ec681f3Smrg 9357ec681f3Smrg for (unsigned pred_idx : preds) { 9367ec681f3Smrg /* variable is already spilled at predecessor */ 9377ec681f3Smrg auto spilled = ctx.spills_exit[pred_idx].find(pair.first); 9387ec681f3Smrg if (spilled != ctx.spills_exit[pred_idx].end()) { 9397ec681f3Smrg if (spilled->second != pair.second) 9407ec681f3Smrg ctx.add_affinity(pair.second, spilled->second); 9417ec681f3Smrg continue; 9427ec681f3Smrg } 9437ec681f3Smrg 9447ec681f3Smrg /* variable is dead at predecessor, it must be from a phi: this works because of CSSA form */ 9457ec681f3Smrg if (!ctx.next_use_distances_end[pred_idx].count(pair.first)) 9467ec681f3Smrg continue; 9477ec681f3Smrg 9487ec681f3Smrg /* add interferences between spilled variable and predecessors exit spills */ 9497ec681f3Smrg for (std::pair<Temp, uint32_t> exit_spill : ctx.spills_exit[pred_idx]) { 9507ec681f3Smrg if (exit_spill.first == pair.first) 9517ec681f3Smrg continue; 9527ec681f3Smrg ctx.add_interference(exit_spill.second, pair.second); 9537ec681f3Smrg } 9547ec681f3Smrg 9557ec681f3Smrg /* variable is in register at predecessor and has to be spilled */ 9567ec681f3Smrg /* rename if necessary */ 9577ec681f3Smrg Temp var = pair.first; 9587ec681f3Smrg std::map<Temp, Temp>::iterator rename_it = ctx.renames[pred_idx].find(var); 9597ec681f3Smrg if (rename_it != ctx.renames[pred_idx].end()) { 9607ec681f3Smrg var = rename_it->second; 9617ec681f3Smrg ctx.renames[pred_idx].erase(rename_it); 9627ec681f3Smrg } 9637ec681f3Smrg 9647ec681f3Smrg aco_ptr<Pseudo_instruction> spill{ 9657ec681f3Smrg create_instruction<Pseudo_instruction>(aco_opcode::p_spill, Format::PSEUDO, 2, 0)}; 9667ec681f3Smrg spill->operands[0] = Operand(var); 9677ec681f3Smrg spill->operands[1] = Operand::c32(pair.second); 9687ec681f3Smrg Block& pred = ctx.program->blocks[pred_idx]; 9697ec681f3Smrg unsigned idx = pred.instructions.size(); 9707ec681f3Smrg do { 9717ec681f3Smrg assert(idx != 0); 9727ec681f3Smrg idx--; 9737ec681f3Smrg } while (pair.first.type() == RegType::vgpr && 9747ec681f3Smrg pred.instructions[idx]->opcode != aco_opcode::p_logical_end); 9757ec681f3Smrg std::vector<aco_ptr<Instruction>>::iterator it = std::next(pred.instructions.begin(), idx); 9767ec681f3Smrg pred.instructions.insert(it, std::move(spill)); 9777ec681f3Smrg ctx.spills_exit[pred.index][pair.first] = pair.second; 9787ec681f3Smrg } 9797ec681f3Smrg } 9807ec681f3Smrg 9817ec681f3Smrg /* iterate phis for which operands to reload */ 9827ec681f3Smrg for (aco_ptr<Instruction>& phi : instructions) { 9837ec681f3Smrg assert(phi->opcode == aco_opcode::p_phi || phi->opcode == aco_opcode::p_linear_phi); 9847ec681f3Smrg assert(!phi->definitions[0].isTemp() || 9857ec681f3Smrg !ctx.spills_entry[block_idx].count(phi->definitions[0].getTemp())); 9867ec681f3Smrg 9877ec681f3Smrg std::vector<unsigned>& preds = 9887ec681f3Smrg phi->opcode == aco_opcode::p_phi ? block->logical_preds : block->linear_preds; 9897ec681f3Smrg for (unsigned i = 0; i < phi->operands.size(); i++) { 9907ec681f3Smrg if (!phi->operands[i].isTemp()) 9917ec681f3Smrg continue; 9927ec681f3Smrg unsigned pred_idx = preds[i]; 9937ec681f3Smrg 9947ec681f3Smrg /* if the operand was reloaded, rename */ 9957ec681f3Smrg if (!ctx.spills_exit[pred_idx].count(phi->operands[i].getTemp())) { 9967ec681f3Smrg std::map<Temp, Temp>::iterator it = 9977ec681f3Smrg ctx.renames[pred_idx].find(phi->operands[i].getTemp()); 9987ec681f3Smrg if (it != ctx.renames[pred_idx].end()) { 9997ec681f3Smrg phi->operands[i].setTemp(it->second); 10007ec681f3Smrg /* prevent the definining instruction from being DCE'd if it could be rematerialized */ 10017ec681f3Smrg } else { 10027ec681f3Smrg auto remat_it = ctx.remat.find(phi->operands[i].getTemp()); 10037ec681f3Smrg if (remat_it != ctx.remat.end()) { 10047ec681f3Smrg ctx.unused_remats.erase(remat_it->second.instr); 10057ec681f3Smrg } 10067ec681f3Smrg } 10077ec681f3Smrg continue; 10087ec681f3Smrg } 10097ec681f3Smrg 10107ec681f3Smrg Temp tmp = phi->operands[i].getTemp(); 10117ec681f3Smrg 10127ec681f3Smrg /* reload phi operand at end of predecessor block */ 10137ec681f3Smrg Temp new_name = ctx.program->allocateTmp(tmp.regClass()); 10147ec681f3Smrg Block& pred = ctx.program->blocks[pred_idx]; 10157ec681f3Smrg unsigned idx = pred.instructions.size(); 10167ec681f3Smrg do { 10177ec681f3Smrg assert(idx != 0); 10187ec681f3Smrg idx--; 10197ec681f3Smrg } while (phi->opcode == aco_opcode::p_phi && 10207ec681f3Smrg pred.instructions[idx]->opcode != aco_opcode::p_logical_end); 10217ec681f3Smrg std::vector<aco_ptr<Instruction>>::iterator it = std::next(pred.instructions.begin(), idx); 10227ec681f3Smrg aco_ptr<Instruction> reload = 10237ec681f3Smrg do_reload(ctx, tmp, new_name, ctx.spills_exit[pred_idx][tmp]); 10247ec681f3Smrg 10257ec681f3Smrg /* reload spilled exec mask directly to exec */ 10267ec681f3Smrg if (!phi->definitions[0].isTemp()) { 10277ec681f3Smrg assert(phi->definitions[0].isFixed() && phi->definitions[0].physReg() == exec); 10287ec681f3Smrg reload->definitions[0] = phi->definitions[0]; 10297ec681f3Smrg phi->operands[i] = Operand(exec, ctx.program->lane_mask); 10307ec681f3Smrg } else { 10317ec681f3Smrg ctx.spills_exit[pred_idx].erase(tmp); 10327ec681f3Smrg ctx.renames[pred_idx][tmp] = new_name; 10337ec681f3Smrg phi->operands[i].setTemp(new_name); 10347ec681f3Smrg } 10357ec681f3Smrg 10367ec681f3Smrg pred.instructions.insert(it, std::move(reload)); 10377ec681f3Smrg } 10387ec681f3Smrg } 10397ec681f3Smrg 10407ec681f3Smrg /* iterate live variables for which to reload */ 10417ec681f3Smrg // TODO: reload at current block if variable is spilled on all predecessors 10427ec681f3Smrg for (std::pair<const Temp, std::pair<uint32_t, uint32_t>>& pair : 10437ec681f3Smrg ctx.next_use_distances_start[block_idx]) { 10447ec681f3Smrg /* skip spilled variables */ 10457ec681f3Smrg if (ctx.spills_entry[block_idx].count(pair.first)) 10467ec681f3Smrg continue; 10477ec681f3Smrg std::vector<unsigned> preds = 10487ec681f3Smrg pair.first.is_linear() ? block->linear_preds : block->logical_preds; 10497ec681f3Smrg 10507ec681f3Smrg /* variable is dead at predecessor, it must be from a phi */ 10517ec681f3Smrg bool is_dead = false; 10527ec681f3Smrg for (unsigned pred_idx : preds) { 10537ec681f3Smrg if (!ctx.next_use_distances_end[pred_idx].count(pair.first)) 10547ec681f3Smrg is_dead = true; 10557ec681f3Smrg } 10567ec681f3Smrg if (is_dead) 10577ec681f3Smrg continue; 10587ec681f3Smrg for (unsigned pred_idx : preds) { 10597ec681f3Smrg /* the variable is not spilled at the predecessor */ 10607ec681f3Smrg if (!ctx.spills_exit[pred_idx].count(pair.first)) 10617ec681f3Smrg continue; 10627ec681f3Smrg 10637ec681f3Smrg /* variable is spilled at predecessor and has to be reloaded */ 10647ec681f3Smrg Temp new_name = ctx.program->allocateTmp(pair.first.regClass()); 10657ec681f3Smrg Block& pred = ctx.program->blocks[pred_idx]; 10667ec681f3Smrg unsigned idx = pred.instructions.size(); 10677ec681f3Smrg do { 10687ec681f3Smrg assert(idx != 0); 10697ec681f3Smrg idx--; 10707ec681f3Smrg } while (pair.first.type() == RegType::vgpr && 10717ec681f3Smrg pred.instructions[idx]->opcode != aco_opcode::p_logical_end); 10727ec681f3Smrg std::vector<aco_ptr<Instruction>>::iterator it = std::next(pred.instructions.begin(), idx); 10737ec681f3Smrg 10747ec681f3Smrg aco_ptr<Instruction> reload = 10757ec681f3Smrg do_reload(ctx, pair.first, new_name, ctx.spills_exit[pred.index][pair.first]); 10767ec681f3Smrg pred.instructions.insert(it, std::move(reload)); 10777ec681f3Smrg 10787ec681f3Smrg ctx.spills_exit[pred.index].erase(pair.first); 10797ec681f3Smrg ctx.renames[pred.index][pair.first] = new_name; 10807ec681f3Smrg } 10817ec681f3Smrg 10827ec681f3Smrg /* check if we have to create a new phi for this variable */ 10837ec681f3Smrg Temp rename = Temp(); 10847ec681f3Smrg bool is_same = true; 10857ec681f3Smrg for (unsigned pred_idx : preds) { 10867ec681f3Smrg if (!ctx.renames[pred_idx].count(pair.first)) { 10877ec681f3Smrg if (rename == Temp()) 10887ec681f3Smrg rename = pair.first; 10897ec681f3Smrg else 10907ec681f3Smrg is_same = rename == pair.first; 10917ec681f3Smrg } else { 10927ec681f3Smrg if (rename == Temp()) 10937ec681f3Smrg rename = ctx.renames[pred_idx][pair.first]; 10947ec681f3Smrg else 10957ec681f3Smrg is_same = rename == ctx.renames[pred_idx][pair.first]; 10967ec681f3Smrg } 10977ec681f3Smrg 10987ec681f3Smrg if (!is_same) 10997ec681f3Smrg break; 11007ec681f3Smrg } 11017ec681f3Smrg 11027ec681f3Smrg if (!is_same) { 11037ec681f3Smrg /* the variable was renamed differently in the predecessors: we have to create a phi */ 11047ec681f3Smrg aco_opcode opcode = pair.first.is_linear() ? aco_opcode::p_linear_phi : aco_opcode::p_phi; 11057ec681f3Smrg aco_ptr<Pseudo_instruction> phi{ 11067ec681f3Smrg create_instruction<Pseudo_instruction>(opcode, Format::PSEUDO, preds.size(), 1)}; 11077ec681f3Smrg rename = ctx.program->allocateTmp(pair.first.regClass()); 11087ec681f3Smrg for (unsigned i = 0; i < phi->operands.size(); i++) { 11097ec681f3Smrg Temp tmp; 11107ec681f3Smrg if (ctx.renames[preds[i]].count(pair.first)) { 11117ec681f3Smrg tmp = ctx.renames[preds[i]][pair.first]; 11127ec681f3Smrg } else if (preds[i] >= block_idx) { 11137ec681f3Smrg tmp = rename; 11147ec681f3Smrg } else { 11157ec681f3Smrg tmp = pair.first; 11167ec681f3Smrg /* prevent the definining instruction from being DCE'd if it could be rematerialized */ 11177ec681f3Smrg if (ctx.remat.count(tmp)) 11187ec681f3Smrg ctx.unused_remats.erase(ctx.remat[tmp].instr); 11197ec681f3Smrg } 11207ec681f3Smrg phi->operands[i] = Operand(tmp); 11217ec681f3Smrg } 11227ec681f3Smrg phi->definitions[0] = Definition(rename); 11237ec681f3Smrg instructions.emplace_back(std::move(phi)); 11247ec681f3Smrg } 11257ec681f3Smrg 11267ec681f3Smrg /* the variable was renamed: add new name to renames */ 11277ec681f3Smrg if (!(rename == Temp() || rename == pair.first)) 11287ec681f3Smrg ctx.renames[block_idx][pair.first] = rename; 11297ec681f3Smrg } 11307ec681f3Smrg 11317ec681f3Smrg /* combine phis with instructions */ 11327ec681f3Smrg unsigned idx = 0; 11337ec681f3Smrg while (!block->instructions[idx]) { 11347ec681f3Smrg idx++; 11357ec681f3Smrg } 11367ec681f3Smrg 11377ec681f3Smrg if (!ctx.processed[block_idx]) { 11387ec681f3Smrg assert(!(block->kind & block_kind_loop_header)); 11397ec681f3Smrg RegisterDemand demand_before = get_demand_before(ctx, block_idx, idx); 11407ec681f3Smrg ctx.register_demand[block->index].erase(ctx.register_demand[block->index].begin(), 11417ec681f3Smrg ctx.register_demand[block->index].begin() + idx); 11427ec681f3Smrg ctx.register_demand[block->index].insert(ctx.register_demand[block->index].begin(), 11437ec681f3Smrg instructions.size(), demand_before); 11447ec681f3Smrg } 11457ec681f3Smrg 11467ec681f3Smrg std::vector<aco_ptr<Instruction>>::iterator start = std::next(block->instructions.begin(), idx); 11477ec681f3Smrg instructions.insert( 11487ec681f3Smrg instructions.end(), std::move_iterator<std::vector<aco_ptr<Instruction>>::iterator>(start), 11497ec681f3Smrg std::move_iterator<std::vector<aco_ptr<Instruction>>::iterator>(block->instructions.end())); 11507ec681f3Smrg block->instructions = std::move(instructions); 11517ec681f3Smrg} 11527ec681f3Smrg 11537ec681f3Smrgvoid 11547ec681f3Smrgprocess_block(spill_ctx& ctx, unsigned block_idx, Block* block, RegisterDemand spilled_registers) 11557ec681f3Smrg{ 11567ec681f3Smrg assert(!ctx.processed[block_idx]); 11577ec681f3Smrg 11587ec681f3Smrg std::vector<aco_ptr<Instruction>> instructions; 11597ec681f3Smrg unsigned idx = 0; 11607ec681f3Smrg 11617ec681f3Smrg /* phis are handled separetely */ 11627ec681f3Smrg while (block->instructions[idx]->opcode == aco_opcode::p_phi || 11637ec681f3Smrg block->instructions[idx]->opcode == aco_opcode::p_linear_phi) { 11647ec681f3Smrg instructions.emplace_back(std::move(block->instructions[idx++])); 11657ec681f3Smrg } 11667ec681f3Smrg 11677ec681f3Smrg if (block->register_demand.exceeds(ctx.target_pressure)) { 11687ec681f3Smrg update_local_next_uses(ctx, block, ctx.local_next_use_distance); 11697ec681f3Smrg } else { 11707ec681f3Smrg /* We won't use local_next_use_distance, so no initialization needed */ 11717ec681f3Smrg } 11727ec681f3Smrg 11737ec681f3Smrg auto& current_spills = ctx.spills_exit[block_idx]; 11747ec681f3Smrg 11757ec681f3Smrg while (idx < block->instructions.size()) { 11767ec681f3Smrg aco_ptr<Instruction>& instr = block->instructions[idx]; 11777ec681f3Smrg 11787ec681f3Smrg std::map<Temp, std::pair<Temp, uint32_t>> reloads; 11797ec681f3Smrg 11807ec681f3Smrg /* rename and reload operands */ 11817ec681f3Smrg for (Operand& op : instr->operands) { 11827ec681f3Smrg if (!op.isTemp()) 11837ec681f3Smrg continue; 11847ec681f3Smrg if (!current_spills.count(op.getTemp())) { 11857ec681f3Smrg /* the Operand is in register: check if it was renamed */ 11867ec681f3Smrg auto rename_it = ctx.renames[block_idx].find(op.getTemp()); 11877ec681f3Smrg if (rename_it != ctx.renames[block_idx].end()) { 11887ec681f3Smrg op.setTemp(rename_it->second); 11897ec681f3Smrg } else { 11907ec681f3Smrg /* prevent its definining instruction from being DCE'd if it could be rematerialized */ 11917ec681f3Smrg auto remat_it = ctx.remat.find(op.getTemp()); 11927ec681f3Smrg if (remat_it != ctx.remat.end()) { 11937ec681f3Smrg ctx.unused_remats.erase(remat_it->second.instr); 11947ec681f3Smrg } 11957ec681f3Smrg } 11967ec681f3Smrg continue; 11977ec681f3Smrg } 11987ec681f3Smrg /* the Operand is spilled: add it to reloads */ 11997ec681f3Smrg Temp new_tmp = ctx.program->allocateTmp(op.regClass()); 12007ec681f3Smrg ctx.renames[block_idx][op.getTemp()] = new_tmp; 12017ec681f3Smrg reloads[new_tmp] = std::make_pair(op.getTemp(), current_spills[op.getTemp()]); 12027ec681f3Smrg current_spills.erase(op.getTemp()); 12037ec681f3Smrg op.setTemp(new_tmp); 12047ec681f3Smrg spilled_registers -= new_tmp; 12057ec681f3Smrg } 12067ec681f3Smrg 12077ec681f3Smrg /* check if register demand is low enough before and after the current instruction */ 12087ec681f3Smrg if (block->register_demand.exceeds(ctx.target_pressure)) { 12097ec681f3Smrg 12107ec681f3Smrg RegisterDemand new_demand = ctx.register_demand[block_idx][idx]; 12117ec681f3Smrg new_demand.update(get_demand_before(ctx, block_idx, idx)); 12127ec681f3Smrg 12137ec681f3Smrg assert(!ctx.local_next_use_distance.empty()); 12147ec681f3Smrg 12157ec681f3Smrg /* if reg pressure is too high, spill variable with furthest next use */ 12167ec681f3Smrg while ((new_demand - spilled_registers).exceeds(ctx.target_pressure)) { 12177ec681f3Smrg unsigned distance = 0; 12187ec681f3Smrg Temp to_spill; 12197ec681f3Smrg bool do_rematerialize = false; 12207ec681f3Smrg RegType type = RegType::sgpr; 12217ec681f3Smrg if (new_demand.vgpr - spilled_registers.vgpr > ctx.target_pressure.vgpr) 12227ec681f3Smrg type = RegType::vgpr; 12237ec681f3Smrg 12247ec681f3Smrg for (std::pair<Temp, uint32_t> pair : ctx.local_next_use_distance[idx]) { 12257ec681f3Smrg if (pair.first.type() != type) 12267ec681f3Smrg continue; 12277ec681f3Smrg bool can_rematerialize = ctx.remat.count(pair.first); 12287ec681f3Smrg if (((pair.second > distance && can_rematerialize == do_rematerialize) || 12297ec681f3Smrg (can_rematerialize && !do_rematerialize && pair.second > idx)) && 12307ec681f3Smrg !current_spills.count(pair.first)) { 12317ec681f3Smrg to_spill = pair.first; 12327ec681f3Smrg distance = pair.second; 12337ec681f3Smrg do_rematerialize = can_rematerialize; 12347ec681f3Smrg } 12357ec681f3Smrg } 12367ec681f3Smrg 12377ec681f3Smrg assert(distance != 0 && distance > idx); 12387ec681f3Smrg uint32_t spill_id = ctx.allocate_spill_id(to_spill.regClass()); 12397ec681f3Smrg 12407ec681f3Smrg /* add interferences with currently spilled variables */ 12417ec681f3Smrg for (std::pair<Temp, uint32_t> pair : current_spills) 12427ec681f3Smrg ctx.add_interference(spill_id, pair.second); 12437ec681f3Smrg for (std::pair<const Temp, std::pair<Temp, uint32_t>>& pair : reloads) 12447ec681f3Smrg ctx.add_interference(spill_id, pair.second.second); 12457ec681f3Smrg 12467ec681f3Smrg current_spills[to_spill] = spill_id; 12477ec681f3Smrg spilled_registers += to_spill; 12487ec681f3Smrg 12497ec681f3Smrg /* rename if necessary */ 12507ec681f3Smrg if (ctx.renames[block_idx].count(to_spill)) { 12517ec681f3Smrg to_spill = ctx.renames[block_idx][to_spill]; 12527ec681f3Smrg } 12537ec681f3Smrg 12547ec681f3Smrg /* add spill to new instructions */ 12557ec681f3Smrg aco_ptr<Pseudo_instruction> spill{ 12567ec681f3Smrg create_instruction<Pseudo_instruction>(aco_opcode::p_spill, Format::PSEUDO, 2, 0)}; 12577ec681f3Smrg spill->operands[0] = Operand(to_spill); 12587ec681f3Smrg spill->operands[1] = Operand::c32(spill_id); 12597ec681f3Smrg instructions.emplace_back(std::move(spill)); 12607ec681f3Smrg } 12617ec681f3Smrg } 12627ec681f3Smrg 12637ec681f3Smrg /* add reloads and instruction to new instructions */ 12647ec681f3Smrg for (std::pair<const Temp, std::pair<Temp, uint32_t>>& pair : reloads) { 12657ec681f3Smrg aco_ptr<Instruction> reload = 12667ec681f3Smrg do_reload(ctx, pair.second.first, pair.first, pair.second.second); 12677ec681f3Smrg instructions.emplace_back(std::move(reload)); 12687ec681f3Smrg } 12697ec681f3Smrg instructions.emplace_back(std::move(instr)); 12707ec681f3Smrg idx++; 12717ec681f3Smrg } 12727ec681f3Smrg 12737ec681f3Smrg block->instructions = std::move(instructions); 12747ec681f3Smrg} 12757ec681f3Smrg 12767ec681f3Smrgvoid 12777ec681f3Smrgspill_block(spill_ctx& ctx, unsigned block_idx) 12787ec681f3Smrg{ 12797ec681f3Smrg Block* block = &ctx.program->blocks[block_idx]; 12807ec681f3Smrg 12817ec681f3Smrg /* determine set of variables which are spilled at the beginning of the block */ 12827ec681f3Smrg RegisterDemand spilled_registers = init_live_in_vars(ctx, block, block_idx); 12837ec681f3Smrg 12847ec681f3Smrg /* add interferences for spilled variables */ 12857ec681f3Smrg for (auto it = ctx.spills_entry[block_idx].begin(); it != ctx.spills_entry[block_idx].end(); 12867ec681f3Smrg ++it) { 12877ec681f3Smrg for (auto it2 = std::next(it); it2 != ctx.spills_entry[block_idx].end(); ++it2) 12887ec681f3Smrg ctx.add_interference(it->second, it2->second); 12897ec681f3Smrg } 12907ec681f3Smrg 12917ec681f3Smrg bool is_loop_header = block->loop_nest_depth && ctx.loop_header.top()->index == block_idx; 12927ec681f3Smrg if (!is_loop_header) { 12937ec681f3Smrg /* add spill/reload code on incoming control flow edges */ 12947ec681f3Smrg add_coupling_code(ctx, block, block_idx); 12957ec681f3Smrg } 12967ec681f3Smrg 12977ec681f3Smrg const auto& current_spills = ctx.spills_entry[block_idx]; 12987ec681f3Smrg 12997ec681f3Smrg /* check conditions to process this block */ 13007ec681f3Smrg bool process = (block->register_demand - spilled_registers).exceeds(ctx.target_pressure) || 13017ec681f3Smrg !ctx.renames[block_idx].empty() || ctx.unused_remats.size(); 13027ec681f3Smrg 13037ec681f3Smrg for (auto it = current_spills.begin(); !process && it != current_spills.end(); ++it) { 13047ec681f3Smrg if (ctx.next_use_distances_start[block_idx].at(it->first).first == block_idx) 13057ec681f3Smrg process = true; 13067ec681f3Smrg } 13077ec681f3Smrg 13087ec681f3Smrg assert(ctx.spills_exit[block_idx].empty()); 13097ec681f3Smrg ctx.spills_exit[block_idx] = current_spills; 13107ec681f3Smrg if (process) { 13117ec681f3Smrg process_block(ctx, block_idx, block, spilled_registers); 13127ec681f3Smrg } 13137ec681f3Smrg 13147ec681f3Smrg ctx.processed[block_idx] = true; 13157ec681f3Smrg 13167ec681f3Smrg /* check if the next block leaves the current loop */ 13177ec681f3Smrg if (block->loop_nest_depth == 0 || 13187ec681f3Smrg ctx.program->blocks[block_idx + 1].loop_nest_depth >= block->loop_nest_depth) 13197ec681f3Smrg return; 13207ec681f3Smrg 13217ec681f3Smrg Block* loop_header = ctx.loop_header.top(); 13227ec681f3Smrg 13237ec681f3Smrg /* preserve original renames at end of loop header block */ 13247ec681f3Smrg std::map<Temp, Temp> renames = std::move(ctx.renames[loop_header->index]); 13257ec681f3Smrg 13267ec681f3Smrg /* add coupling code to all loop header predecessors */ 13277ec681f3Smrg add_coupling_code(ctx, loop_header, loop_header->index); 13287ec681f3Smrg 13297ec681f3Smrg /* propagate new renames through loop: i.e. repair the SSA */ 13307ec681f3Smrg renames.swap(ctx.renames[loop_header->index]); 13317ec681f3Smrg for (std::pair<Temp, Temp> rename : renames) { 13327ec681f3Smrg for (unsigned idx = loop_header->index; idx <= block_idx; idx++) { 13337ec681f3Smrg Block& current = ctx.program->blocks[idx]; 13347ec681f3Smrg std::vector<aco_ptr<Instruction>>::iterator instr_it = current.instructions.begin(); 13357ec681f3Smrg 13367ec681f3Smrg /* first rename phis */ 13377ec681f3Smrg while (instr_it != current.instructions.end()) { 13387ec681f3Smrg aco_ptr<Instruction>& phi = *instr_it; 13397ec681f3Smrg if (phi->opcode != aco_opcode::p_phi && phi->opcode != aco_opcode::p_linear_phi) 13407ec681f3Smrg break; 13417ec681f3Smrg /* no need to rename the loop header phis once again. this happened in 13427ec681f3Smrg * add_coupling_code() */ 13437ec681f3Smrg if (idx == loop_header->index) { 13447ec681f3Smrg instr_it++; 13457ec681f3Smrg continue; 13467ec681f3Smrg } 13477ec681f3Smrg 13487ec681f3Smrg for (Operand& op : phi->operands) { 13497ec681f3Smrg if (!op.isTemp()) 13507ec681f3Smrg continue; 13517ec681f3Smrg if (op.getTemp() == rename.first) 13527ec681f3Smrg op.setTemp(rename.second); 13537ec681f3Smrg } 13547ec681f3Smrg instr_it++; 13557ec681f3Smrg } 13567ec681f3Smrg 13577ec681f3Smrg /* variable is not live at beginning of this block */ 13587ec681f3Smrg if (ctx.next_use_distances_start[idx].count(rename.first) == 0) 13597ec681f3Smrg continue; 13607ec681f3Smrg 13617ec681f3Smrg /* if the variable is live at the block's exit, add rename */ 13627ec681f3Smrg if (ctx.next_use_distances_end[idx].count(rename.first) != 0) 13637ec681f3Smrg ctx.renames[idx].insert(rename); 13647ec681f3Smrg 13657ec681f3Smrg /* rename all uses in this block */ 13667ec681f3Smrg bool renamed = false; 13677ec681f3Smrg while (!renamed && instr_it != current.instructions.end()) { 13687ec681f3Smrg aco_ptr<Instruction>& instr = *instr_it; 13697ec681f3Smrg for (Operand& op : instr->operands) { 13707ec681f3Smrg if (!op.isTemp()) 13717ec681f3Smrg continue; 13727ec681f3Smrg if (op.getTemp() == rename.first) { 13737ec681f3Smrg op.setTemp(rename.second); 13747ec681f3Smrg /* we can stop with this block as soon as the variable is spilled */ 13757ec681f3Smrg if (instr->opcode == aco_opcode::p_spill) 13767ec681f3Smrg renamed = true; 13777ec681f3Smrg } 13787ec681f3Smrg } 13797ec681f3Smrg instr_it++; 13807ec681f3Smrg } 13817ec681f3Smrg } 13827ec681f3Smrg } 13837ec681f3Smrg 13847ec681f3Smrg /* remove loop header info from stack */ 13857ec681f3Smrg ctx.loop_header.pop(); 13867ec681f3Smrg} 13877ec681f3Smrg 13887ec681f3SmrgTemp 13897ec681f3Smrgload_scratch_resource(spill_ctx& ctx, Temp& scratch_offset, 13907ec681f3Smrg std::vector<aco_ptr<Instruction>>& instructions, unsigned offset, 13917ec681f3Smrg bool is_top_level) 13927ec681f3Smrg{ 13937ec681f3Smrg Builder bld(ctx.program); 13947ec681f3Smrg if (is_top_level) { 13957ec681f3Smrg bld.reset(&instructions); 13967ec681f3Smrg } else { 13977ec681f3Smrg /* find p_logical_end */ 13987ec681f3Smrg unsigned idx = instructions.size() - 1; 13997ec681f3Smrg while (instructions[idx]->opcode != aco_opcode::p_logical_end) 14007ec681f3Smrg idx--; 14017ec681f3Smrg bld.reset(&instructions, std::next(instructions.begin(), idx)); 14027ec681f3Smrg } 14037ec681f3Smrg 14047ec681f3Smrg Temp private_segment_buffer = ctx.program->private_segment_buffer; 14057ec681f3Smrg if (ctx.program->stage != compute_cs) 14067ec681f3Smrg private_segment_buffer = 14077ec681f3Smrg bld.smem(aco_opcode::s_load_dwordx2, bld.def(s2), private_segment_buffer, Operand::zero()); 14087ec681f3Smrg 14097ec681f3Smrg if (offset) 14107ec681f3Smrg scratch_offset = bld.sop2(aco_opcode::s_add_u32, bld.def(s1), bld.def(s1, scc), 14117ec681f3Smrg scratch_offset, Operand::c32(offset)); 14127ec681f3Smrg 14137ec681f3Smrg uint32_t rsrc_conf = 14147ec681f3Smrg S_008F0C_ADD_TID_ENABLE(1) | S_008F0C_INDEX_STRIDE(ctx.program->wave_size == 64 ? 3 : 2); 14157ec681f3Smrg 14167ec681f3Smrg if (ctx.program->chip_class >= GFX10) { 14177ec681f3Smrg rsrc_conf |= S_008F0C_FORMAT(V_008F0C_GFX10_FORMAT_32_FLOAT) | 14187ec681f3Smrg S_008F0C_OOB_SELECT(V_008F0C_OOB_SELECT_RAW) | S_008F0C_RESOURCE_LEVEL(1); 14197ec681f3Smrg } else if (ctx.program->chip_class <= GFX7) { 14207ec681f3Smrg /* dfmt modifies stride on GFX8/GFX9 when ADD_TID_EN=1 */ 14217ec681f3Smrg rsrc_conf |= S_008F0C_NUM_FORMAT(V_008F0C_BUF_NUM_FORMAT_FLOAT) | 14227ec681f3Smrg S_008F0C_DATA_FORMAT(V_008F0C_BUF_DATA_FORMAT_32); 14237ec681f3Smrg } 14247ec681f3Smrg /* older generations need element size = 4 bytes. element size removed in GFX9 */ 14257ec681f3Smrg if (ctx.program->chip_class <= GFX8) 14267ec681f3Smrg rsrc_conf |= S_008F0C_ELEMENT_SIZE(1); 14277ec681f3Smrg 14287ec681f3Smrg return bld.pseudo(aco_opcode::p_create_vector, bld.def(s4), private_segment_buffer, 14297ec681f3Smrg Operand::c32(-1u), Operand::c32(rsrc_conf)); 14307ec681f3Smrg} 14317ec681f3Smrg 14327ec681f3Smrgvoid 14337ec681f3Smrgadd_interferences(spill_ctx& ctx, std::vector<bool>& is_assigned, std::vector<uint32_t>& slots, 14347ec681f3Smrg std::vector<bool>& slots_used, unsigned id) 14357ec681f3Smrg{ 14367ec681f3Smrg for (unsigned other : ctx.interferences[id].second) { 14377ec681f3Smrg if (!is_assigned[other]) 14387ec681f3Smrg continue; 14397ec681f3Smrg 14407ec681f3Smrg RegClass other_rc = ctx.interferences[other].first; 14417ec681f3Smrg unsigned slot = slots[other]; 14427ec681f3Smrg std::fill(slots_used.begin() + slot, slots_used.begin() + slot + other_rc.size(), true); 14437ec681f3Smrg } 14447ec681f3Smrg} 14457ec681f3Smrg 14467ec681f3Smrgunsigned 14477ec681f3Smrgfind_available_slot(std::vector<bool>& used, unsigned wave_size, unsigned size, bool is_sgpr, 14487ec681f3Smrg unsigned* num_slots) 14497ec681f3Smrg{ 14507ec681f3Smrg unsigned wave_size_minus_one = wave_size - 1; 14517ec681f3Smrg unsigned slot = 0; 14527ec681f3Smrg 14537ec681f3Smrg while (true) { 14547ec681f3Smrg bool available = true; 14557ec681f3Smrg for (unsigned i = 0; i < size; i++) { 14567ec681f3Smrg if (slot + i < used.size() && used[slot + i]) { 14577ec681f3Smrg available = false; 14587ec681f3Smrg break; 14597ec681f3Smrg } 14607ec681f3Smrg } 14617ec681f3Smrg if (!available) { 14627ec681f3Smrg slot++; 14637ec681f3Smrg continue; 14647ec681f3Smrg } 14657ec681f3Smrg 14667ec681f3Smrg if (is_sgpr && ((slot & wave_size_minus_one) > wave_size - size)) { 14677ec681f3Smrg slot = align(slot, wave_size); 14687ec681f3Smrg continue; 14697ec681f3Smrg } 14707ec681f3Smrg 14717ec681f3Smrg std::fill(used.begin(), used.end(), false); 14727ec681f3Smrg 14737ec681f3Smrg if (slot + size > used.size()) 14747ec681f3Smrg used.resize(slot + size); 14757ec681f3Smrg 14767ec681f3Smrg return slot; 14777ec681f3Smrg } 14787ec681f3Smrg} 14797ec681f3Smrg 14807ec681f3Smrgvoid 14817ec681f3Smrgassign_spill_slots_helper(spill_ctx& ctx, RegType type, std::vector<bool>& is_assigned, 14827ec681f3Smrg std::vector<uint32_t>& slots, unsigned* num_slots) 14837ec681f3Smrg{ 14847ec681f3Smrg std::vector<bool> slots_used(*num_slots); 14857ec681f3Smrg 14867ec681f3Smrg /* assign slots for ids with affinities first */ 14877ec681f3Smrg for (std::vector<uint32_t>& vec : ctx.affinities) { 14887ec681f3Smrg if (ctx.interferences[vec[0]].first.type() != type) 14897ec681f3Smrg continue; 14907ec681f3Smrg 14917ec681f3Smrg for (unsigned id : vec) { 14927ec681f3Smrg if (!ctx.is_reloaded[id]) 14937ec681f3Smrg continue; 14947ec681f3Smrg 14957ec681f3Smrg add_interferences(ctx, is_assigned, slots, slots_used, id); 14967ec681f3Smrg } 14977ec681f3Smrg 14987ec681f3Smrg unsigned slot = 14997ec681f3Smrg find_available_slot(slots_used, ctx.wave_size, ctx.interferences[vec[0]].first.size(), 15007ec681f3Smrg type == RegType::sgpr, num_slots); 15017ec681f3Smrg 15027ec681f3Smrg for (unsigned id : vec) { 15037ec681f3Smrg assert(!is_assigned[id]); 15047ec681f3Smrg 15057ec681f3Smrg if (ctx.is_reloaded[id]) { 15067ec681f3Smrg slots[id] = slot; 15077ec681f3Smrg is_assigned[id] = true; 15087ec681f3Smrg } 15097ec681f3Smrg } 15107ec681f3Smrg } 15117ec681f3Smrg 15127ec681f3Smrg /* assign slots for ids without affinities */ 15137ec681f3Smrg for (unsigned id = 0; id < ctx.interferences.size(); id++) { 15147ec681f3Smrg if (is_assigned[id] || !ctx.is_reloaded[id] || ctx.interferences[id].first.type() != type) 15157ec681f3Smrg continue; 15167ec681f3Smrg 15177ec681f3Smrg add_interferences(ctx, is_assigned, slots, slots_used, id); 15187ec681f3Smrg 15197ec681f3Smrg unsigned slot = 15207ec681f3Smrg find_available_slot(slots_used, ctx.wave_size, ctx.interferences[id].first.size(), 15217ec681f3Smrg type == RegType::sgpr, num_slots); 15227ec681f3Smrg 15237ec681f3Smrg slots[id] = slot; 15247ec681f3Smrg is_assigned[id] = true; 15257ec681f3Smrg } 15267ec681f3Smrg 15277ec681f3Smrg *num_slots = slots_used.size(); 15287ec681f3Smrg} 15297ec681f3Smrg 15307ec681f3Smrgvoid 15317ec681f3Smrgassign_spill_slots(spill_ctx& ctx, unsigned spills_to_vgpr) 15327ec681f3Smrg{ 15337ec681f3Smrg std::vector<uint32_t> slots(ctx.interferences.size()); 15347ec681f3Smrg std::vector<bool> is_assigned(ctx.interferences.size()); 15357ec681f3Smrg 15367ec681f3Smrg /* first, handle affinities: just merge all interferences into both spill ids */ 15377ec681f3Smrg for (std::vector<uint32_t>& vec : ctx.affinities) { 15387ec681f3Smrg for (unsigned i = 0; i < vec.size(); i++) { 15397ec681f3Smrg for (unsigned j = i + 1; j < vec.size(); j++) { 15407ec681f3Smrg assert(vec[i] != vec[j]); 15417ec681f3Smrg bool reloaded = ctx.is_reloaded[vec[i]] || ctx.is_reloaded[vec[j]]; 15427ec681f3Smrg ctx.is_reloaded[vec[i]] = reloaded; 15437ec681f3Smrg ctx.is_reloaded[vec[j]] = reloaded; 15447ec681f3Smrg } 15457ec681f3Smrg } 15467ec681f3Smrg } 15477ec681f3Smrg for (ASSERTED uint32_t i = 0; i < ctx.interferences.size(); i++) 15487ec681f3Smrg for (ASSERTED uint32_t id : ctx.interferences[i].second) 15497ec681f3Smrg assert(i != id); 15507ec681f3Smrg 15517ec681f3Smrg /* for each spill slot, assign as many spill ids as possible */ 15527ec681f3Smrg unsigned sgpr_spill_slots = 0, vgpr_spill_slots = 0; 15537ec681f3Smrg assign_spill_slots_helper(ctx, RegType::sgpr, is_assigned, slots, &sgpr_spill_slots); 15547ec681f3Smrg assign_spill_slots_helper(ctx, RegType::vgpr, is_assigned, slots, &vgpr_spill_slots); 15557ec681f3Smrg 15567ec681f3Smrg for (unsigned id = 0; id < is_assigned.size(); id++) 15577ec681f3Smrg assert(is_assigned[id] || !ctx.is_reloaded[id]); 15587ec681f3Smrg 15597ec681f3Smrg for (std::vector<uint32_t>& vec : ctx.affinities) { 15607ec681f3Smrg for (unsigned i = 0; i < vec.size(); i++) { 15617ec681f3Smrg for (unsigned j = i + 1; j < vec.size(); j++) { 15627ec681f3Smrg assert(is_assigned[vec[i]] == is_assigned[vec[j]]); 15637ec681f3Smrg if (!is_assigned[vec[i]]) 15647ec681f3Smrg continue; 15657ec681f3Smrg assert(ctx.is_reloaded[vec[i]] == ctx.is_reloaded[vec[j]]); 15667ec681f3Smrg assert(ctx.interferences[vec[i]].first.type() == 15677ec681f3Smrg ctx.interferences[vec[j]].first.type()); 15687ec681f3Smrg assert(slots[vec[i]] == slots[vec[j]]); 15697ec681f3Smrg } 15707ec681f3Smrg } 15717ec681f3Smrg } 15727ec681f3Smrg 15737ec681f3Smrg /* hope, we didn't mess up */ 15747ec681f3Smrg std::vector<Temp> vgpr_spill_temps((sgpr_spill_slots + ctx.wave_size - 1) / ctx.wave_size); 15757ec681f3Smrg assert(vgpr_spill_temps.size() <= spills_to_vgpr); 15767ec681f3Smrg 15777ec681f3Smrg /* replace pseudo instructions with actual hardware instructions */ 15787ec681f3Smrg Temp scratch_offset = ctx.program->scratch_offset, scratch_rsrc = Temp(); 15797ec681f3Smrg unsigned last_top_level_block_idx = 0; 15807ec681f3Smrg std::vector<bool> reload_in_loop(vgpr_spill_temps.size()); 15817ec681f3Smrg for (Block& block : ctx.program->blocks) { 15827ec681f3Smrg 15837ec681f3Smrg /* after loops, we insert a user if there was a reload inside the loop */ 15847ec681f3Smrg if (block.loop_nest_depth == 0) { 15857ec681f3Smrg int end_vgprs = 0; 15867ec681f3Smrg for (unsigned i = 0; i < vgpr_spill_temps.size(); i++) { 15877ec681f3Smrg if (reload_in_loop[i]) 15887ec681f3Smrg end_vgprs++; 15897ec681f3Smrg } 15907ec681f3Smrg 15917ec681f3Smrg if (end_vgprs > 0) { 15927ec681f3Smrg aco_ptr<Instruction> destr{create_instruction<Pseudo_instruction>( 15937ec681f3Smrg aco_opcode::p_end_linear_vgpr, Format::PSEUDO, end_vgprs, 0)}; 15947ec681f3Smrg int k = 0; 15957ec681f3Smrg for (unsigned i = 0; i < vgpr_spill_temps.size(); i++) { 15967ec681f3Smrg if (reload_in_loop[i]) 15977ec681f3Smrg destr->operands[k++] = Operand(vgpr_spill_temps[i]); 15987ec681f3Smrg reload_in_loop[i] = false; 15997ec681f3Smrg } 16007ec681f3Smrg /* find insertion point */ 16017ec681f3Smrg std::vector<aco_ptr<Instruction>>::iterator it = block.instructions.begin(); 16027ec681f3Smrg while ((*it)->opcode == aco_opcode::p_linear_phi || (*it)->opcode == aco_opcode::p_phi) 16037ec681f3Smrg ++it; 16047ec681f3Smrg block.instructions.insert(it, std::move(destr)); 16057ec681f3Smrg } 16067ec681f3Smrg } 16077ec681f3Smrg 16087ec681f3Smrg if (block.kind & block_kind_top_level && !block.linear_preds.empty()) { 16097ec681f3Smrg last_top_level_block_idx = block.index; 16107ec681f3Smrg 16117ec681f3Smrg /* check if any spilled variables use a created linear vgpr, otherwise destroy them */ 16127ec681f3Smrg for (unsigned i = 0; i < vgpr_spill_temps.size(); i++) { 16137ec681f3Smrg if (vgpr_spill_temps[i] == Temp()) 16147ec681f3Smrg continue; 16157ec681f3Smrg 16167ec681f3Smrg bool can_destroy = true; 16177ec681f3Smrg for (std::pair<Temp, uint32_t> pair : ctx.spills_entry[block.index]) { 16187ec681f3Smrg 16197ec681f3Smrg if (ctx.interferences[pair.second].first.type() == RegType::sgpr && 16207ec681f3Smrg slots[pair.second] / ctx.wave_size == i) { 16217ec681f3Smrg can_destroy = false; 16227ec681f3Smrg break; 16237ec681f3Smrg } 16247ec681f3Smrg } 16257ec681f3Smrg if (can_destroy) 16267ec681f3Smrg vgpr_spill_temps[i] = Temp(); 16277ec681f3Smrg } 16287ec681f3Smrg } 16297ec681f3Smrg 16307ec681f3Smrg std::vector<aco_ptr<Instruction>>::iterator it; 16317ec681f3Smrg std::vector<aco_ptr<Instruction>> instructions; 16327ec681f3Smrg instructions.reserve(block.instructions.size()); 16337ec681f3Smrg Builder bld(ctx.program, &instructions); 16347ec681f3Smrg for (it = block.instructions.begin(); it != block.instructions.end(); ++it) { 16357ec681f3Smrg 16367ec681f3Smrg if ((*it)->opcode == aco_opcode::p_spill) { 16377ec681f3Smrg uint32_t spill_id = (*it)->operands[1].constantValue(); 16387ec681f3Smrg 16397ec681f3Smrg if (!ctx.is_reloaded[spill_id]) { 16407ec681f3Smrg /* never reloaded, so don't spill */ 16417ec681f3Smrg } else if (!is_assigned[spill_id]) { 16427ec681f3Smrg unreachable("No spill slot assigned for spill id"); 16437ec681f3Smrg } else if (ctx.interferences[spill_id].first.type() == RegType::vgpr) { 16447ec681f3Smrg /* spill vgpr */ 16457ec681f3Smrg ctx.program->config->spilled_vgprs += (*it)->operands[0].size(); 16467ec681f3Smrg uint32_t spill_slot = slots[spill_id]; 16477ec681f3Smrg bool add_offset_to_sgpr = 16487ec681f3Smrg ctx.program->config->scratch_bytes_per_wave / ctx.program->wave_size + 16497ec681f3Smrg vgpr_spill_slots * 4 > 16507ec681f3Smrg 4096; 16517ec681f3Smrg unsigned base_offset = 16527ec681f3Smrg add_offset_to_sgpr 16537ec681f3Smrg ? 0 16547ec681f3Smrg : ctx.program->config->scratch_bytes_per_wave / ctx.program->wave_size; 16557ec681f3Smrg 16567ec681f3Smrg /* check if the scratch resource descriptor already exists */ 16577ec681f3Smrg if (scratch_rsrc == Temp()) { 16587ec681f3Smrg unsigned offset = 16597ec681f3Smrg add_offset_to_sgpr ? ctx.program->config->scratch_bytes_per_wave : 0; 16607ec681f3Smrg scratch_rsrc = load_scratch_resource( 16617ec681f3Smrg ctx, scratch_offset, 16627ec681f3Smrg last_top_level_block_idx == block.index 16637ec681f3Smrg ? instructions 16647ec681f3Smrg : ctx.program->blocks[last_top_level_block_idx].instructions, 16657ec681f3Smrg offset, last_top_level_block_idx == block.index); 16667ec681f3Smrg } 16677ec681f3Smrg 16687ec681f3Smrg unsigned offset = base_offset + spill_slot * 4; 16697ec681f3Smrg aco_opcode opcode = aco_opcode::buffer_store_dword; 16707ec681f3Smrg assert((*it)->operands[0].isTemp()); 16717ec681f3Smrg Temp temp = (*it)->operands[0].getTemp(); 16727ec681f3Smrg assert(temp.type() == RegType::vgpr && !temp.is_linear()); 16737ec681f3Smrg if (temp.size() > 1) { 16747ec681f3Smrg Instruction* split{create_instruction<Pseudo_instruction>( 16757ec681f3Smrg aco_opcode::p_split_vector, Format::PSEUDO, 1, temp.size())}; 16767ec681f3Smrg split->operands[0] = Operand(temp); 16777ec681f3Smrg for (unsigned i = 0; i < temp.size(); i++) 16787ec681f3Smrg split->definitions[i] = bld.def(v1); 16797ec681f3Smrg bld.insert(split); 16807ec681f3Smrg for (unsigned i = 0; i < temp.size(); i++) { 16817ec681f3Smrg Instruction* instr = 16827ec681f3Smrg bld.mubuf(opcode, scratch_rsrc, Operand(v1), scratch_offset, 16837ec681f3Smrg split->definitions[i].getTemp(), offset + i * 4, false, true); 16847ec681f3Smrg instr->mubuf().sync = memory_sync_info(storage_vgpr_spill, semantic_private); 16857ec681f3Smrg } 16867ec681f3Smrg } else { 16877ec681f3Smrg Instruction* instr = bld.mubuf(opcode, scratch_rsrc, Operand(v1), scratch_offset, 16887ec681f3Smrg temp, offset, false, true); 16897ec681f3Smrg instr->mubuf().sync = memory_sync_info(storage_vgpr_spill, semantic_private); 16907ec681f3Smrg } 16917ec681f3Smrg } else { 16927ec681f3Smrg ctx.program->config->spilled_sgprs += (*it)->operands[0].size(); 16937ec681f3Smrg 16947ec681f3Smrg uint32_t spill_slot = slots[spill_id]; 16957ec681f3Smrg 16967ec681f3Smrg /* check if the linear vgpr already exists */ 16977ec681f3Smrg if (vgpr_spill_temps[spill_slot / ctx.wave_size] == Temp()) { 16987ec681f3Smrg Temp linear_vgpr = ctx.program->allocateTmp(v1.as_linear()); 16997ec681f3Smrg vgpr_spill_temps[spill_slot / ctx.wave_size] = linear_vgpr; 17007ec681f3Smrg aco_ptr<Pseudo_instruction> create{create_instruction<Pseudo_instruction>( 17017ec681f3Smrg aco_opcode::p_start_linear_vgpr, Format::PSEUDO, 0, 1)}; 17027ec681f3Smrg create->definitions[0] = Definition(linear_vgpr); 17037ec681f3Smrg /* find the right place to insert this definition */ 17047ec681f3Smrg if (last_top_level_block_idx == block.index) { 17057ec681f3Smrg /* insert right before the current instruction */ 17067ec681f3Smrg instructions.emplace_back(std::move(create)); 17077ec681f3Smrg } else { 17087ec681f3Smrg assert(last_top_level_block_idx < block.index); 17097ec681f3Smrg /* insert before the branch at last top level block */ 17107ec681f3Smrg std::vector<aco_ptr<Instruction>>& block_instrs = 17117ec681f3Smrg ctx.program->blocks[last_top_level_block_idx].instructions; 17127ec681f3Smrg block_instrs.insert(std::prev(block_instrs.end()), std::move(create)); 17137ec681f3Smrg } 17147ec681f3Smrg } 17157ec681f3Smrg 17167ec681f3Smrg /* spill sgpr: just add the vgpr temp to operands */ 17177ec681f3Smrg Pseudo_instruction* spill = 17187ec681f3Smrg create_instruction<Pseudo_instruction>(aco_opcode::p_spill, Format::PSEUDO, 3, 0); 17197ec681f3Smrg spill->operands[0] = Operand(vgpr_spill_temps[spill_slot / ctx.wave_size]); 17207ec681f3Smrg spill->operands[1] = Operand::c32(spill_slot % ctx.wave_size); 17217ec681f3Smrg spill->operands[2] = (*it)->operands[0]; 17227ec681f3Smrg instructions.emplace_back(aco_ptr<Instruction>(spill)); 17237ec681f3Smrg } 17247ec681f3Smrg 17257ec681f3Smrg } else if ((*it)->opcode == aco_opcode::p_reload) { 17267ec681f3Smrg uint32_t spill_id = (*it)->operands[0].constantValue(); 17277ec681f3Smrg assert(ctx.is_reloaded[spill_id]); 17287ec681f3Smrg 17297ec681f3Smrg if (!is_assigned[spill_id]) { 17307ec681f3Smrg unreachable("No spill slot assigned for spill id"); 17317ec681f3Smrg } else if (ctx.interferences[spill_id].first.type() == RegType::vgpr) { 17327ec681f3Smrg /* reload vgpr */ 17337ec681f3Smrg uint32_t spill_slot = slots[spill_id]; 17347ec681f3Smrg bool add_offset_to_sgpr = 17357ec681f3Smrg ctx.program->config->scratch_bytes_per_wave / ctx.program->wave_size + 17367ec681f3Smrg vgpr_spill_slots * 4 > 17377ec681f3Smrg 4096; 17387ec681f3Smrg unsigned base_offset = 17397ec681f3Smrg add_offset_to_sgpr 17407ec681f3Smrg ? 0 17417ec681f3Smrg : ctx.program->config->scratch_bytes_per_wave / ctx.program->wave_size; 17427ec681f3Smrg 17437ec681f3Smrg /* check if the scratch resource descriptor already exists */ 17447ec681f3Smrg if (scratch_rsrc == Temp()) { 17457ec681f3Smrg unsigned offset = 17467ec681f3Smrg add_offset_to_sgpr ? ctx.program->config->scratch_bytes_per_wave : 0; 17477ec681f3Smrg scratch_rsrc = load_scratch_resource( 17487ec681f3Smrg ctx, scratch_offset, 17497ec681f3Smrg last_top_level_block_idx == block.index 17507ec681f3Smrg ? instructions 17517ec681f3Smrg : ctx.program->blocks[last_top_level_block_idx].instructions, 17527ec681f3Smrg offset, last_top_level_block_idx == block.index); 17537ec681f3Smrg } 17547ec681f3Smrg 17557ec681f3Smrg unsigned offset = base_offset + spill_slot * 4; 17567ec681f3Smrg aco_opcode opcode = aco_opcode::buffer_load_dword; 17577ec681f3Smrg Definition def = (*it)->definitions[0]; 17587ec681f3Smrg if (def.size() > 1) { 17597ec681f3Smrg Instruction* vec{create_instruction<Pseudo_instruction>( 17607ec681f3Smrg aco_opcode::p_create_vector, Format::PSEUDO, def.size(), 1)}; 17617ec681f3Smrg vec->definitions[0] = def; 17627ec681f3Smrg for (unsigned i = 0; i < def.size(); i++) { 17637ec681f3Smrg Temp tmp = bld.tmp(v1); 17647ec681f3Smrg vec->operands[i] = Operand(tmp); 17657ec681f3Smrg Instruction* instr = 17667ec681f3Smrg bld.mubuf(opcode, Definition(tmp), scratch_rsrc, Operand(v1), 17677ec681f3Smrg scratch_offset, offset + i * 4, false, true); 17687ec681f3Smrg instr->mubuf().sync = memory_sync_info(storage_vgpr_spill, semantic_private); 17697ec681f3Smrg } 17707ec681f3Smrg bld.insert(vec); 17717ec681f3Smrg } else { 17727ec681f3Smrg Instruction* instr = bld.mubuf(opcode, def, scratch_rsrc, Operand(v1), 17737ec681f3Smrg scratch_offset, offset, false, true); 17747ec681f3Smrg instr->mubuf().sync = memory_sync_info(storage_vgpr_spill, semantic_private); 17757ec681f3Smrg } 17767ec681f3Smrg } else { 17777ec681f3Smrg uint32_t spill_slot = slots[spill_id]; 17787ec681f3Smrg reload_in_loop[spill_slot / ctx.wave_size] = block.loop_nest_depth > 0; 17797ec681f3Smrg 17807ec681f3Smrg /* check if the linear vgpr already exists */ 17817ec681f3Smrg if (vgpr_spill_temps[spill_slot / ctx.wave_size] == Temp()) { 17827ec681f3Smrg Temp linear_vgpr = ctx.program->allocateTmp(v1.as_linear()); 17837ec681f3Smrg vgpr_spill_temps[spill_slot / ctx.wave_size] = linear_vgpr; 17847ec681f3Smrg aco_ptr<Pseudo_instruction> create{create_instruction<Pseudo_instruction>( 17857ec681f3Smrg aco_opcode::p_start_linear_vgpr, Format::PSEUDO, 0, 1)}; 17867ec681f3Smrg create->definitions[0] = Definition(linear_vgpr); 17877ec681f3Smrg /* find the right place to insert this definition */ 17887ec681f3Smrg if (last_top_level_block_idx == block.index) { 17897ec681f3Smrg /* insert right before the current instruction */ 17907ec681f3Smrg instructions.emplace_back(std::move(create)); 17917ec681f3Smrg } else { 17927ec681f3Smrg assert(last_top_level_block_idx < block.index); 17937ec681f3Smrg /* insert before the branch at last top level block */ 17947ec681f3Smrg std::vector<aco_ptr<Instruction>>& block_instrs = 17957ec681f3Smrg ctx.program->blocks[last_top_level_block_idx].instructions; 17967ec681f3Smrg block_instrs.insert(std::prev(block_instrs.end()), std::move(create)); 17977ec681f3Smrg } 17987ec681f3Smrg } 17997ec681f3Smrg 18007ec681f3Smrg /* reload sgpr: just add the vgpr temp to operands */ 18017ec681f3Smrg Pseudo_instruction* reload = create_instruction<Pseudo_instruction>( 18027ec681f3Smrg aco_opcode::p_reload, Format::PSEUDO, 2, 1); 18037ec681f3Smrg reload->operands[0] = Operand(vgpr_spill_temps[spill_slot / ctx.wave_size]); 18047ec681f3Smrg reload->operands[1] = Operand::c32(spill_slot % ctx.wave_size); 18057ec681f3Smrg reload->definitions[0] = (*it)->definitions[0]; 18067ec681f3Smrg instructions.emplace_back(aco_ptr<Instruction>(reload)); 18077ec681f3Smrg } 18087ec681f3Smrg } else if (!ctx.unused_remats.count(it->get())) { 18097ec681f3Smrg instructions.emplace_back(std::move(*it)); 18107ec681f3Smrg } 18117ec681f3Smrg } 18127ec681f3Smrg block.instructions = std::move(instructions); 18137ec681f3Smrg } 18147ec681f3Smrg 18157ec681f3Smrg /* update required scratch memory */ 18167ec681f3Smrg ctx.program->config->scratch_bytes_per_wave += 18177ec681f3Smrg align(vgpr_spill_slots * 4 * ctx.program->wave_size, 1024); 18187ec681f3Smrg 18197ec681f3Smrg /* SSA elimination inserts copies for logical phis right before p_logical_end 18207ec681f3Smrg * So if a linear vgpr is used between that p_logical_end and the branch, 18217ec681f3Smrg * we need to ensure logical phis don't choose a definition which aliases 18227ec681f3Smrg * the linear vgpr. 18237ec681f3Smrg * TODO: Moving the spills and reloads to before p_logical_end might produce 18247ec681f3Smrg * slightly better code. */ 18257ec681f3Smrg for (Block& block : ctx.program->blocks) { 18267ec681f3Smrg /* loops exits are already handled */ 18277ec681f3Smrg if (block.logical_preds.size() <= 1) 18287ec681f3Smrg continue; 18297ec681f3Smrg 18307ec681f3Smrg bool has_logical_phis = false; 18317ec681f3Smrg for (aco_ptr<Instruction>& instr : block.instructions) { 18327ec681f3Smrg if (instr->opcode == aco_opcode::p_phi) { 18337ec681f3Smrg has_logical_phis = true; 18347ec681f3Smrg break; 18357ec681f3Smrg } else if (instr->opcode != aco_opcode::p_linear_phi) { 18367ec681f3Smrg break; 18377ec681f3Smrg } 18387ec681f3Smrg } 18397ec681f3Smrg if (!has_logical_phis) 18407ec681f3Smrg continue; 18417ec681f3Smrg 18427ec681f3Smrg std::set<Temp> vgprs; 18437ec681f3Smrg for (unsigned pred_idx : block.logical_preds) { 18447ec681f3Smrg Block& pred = ctx.program->blocks[pred_idx]; 18457ec681f3Smrg for (int i = pred.instructions.size() - 1; i >= 0; i--) { 18467ec681f3Smrg aco_ptr<Instruction>& pred_instr = pred.instructions[i]; 18477ec681f3Smrg if (pred_instr->opcode == aco_opcode::p_logical_end) { 18487ec681f3Smrg break; 18497ec681f3Smrg } else if (pred_instr->opcode == aco_opcode::p_spill || 18507ec681f3Smrg pred_instr->opcode == aco_opcode::p_reload) { 18517ec681f3Smrg vgprs.insert(pred_instr->operands[0].getTemp()); 18527ec681f3Smrg } 18537ec681f3Smrg } 18547ec681f3Smrg } 18557ec681f3Smrg if (!vgprs.size()) 18567ec681f3Smrg continue; 18577ec681f3Smrg 18587ec681f3Smrg aco_ptr<Instruction> destr{create_instruction<Pseudo_instruction>( 18597ec681f3Smrg aco_opcode::p_end_linear_vgpr, Format::PSEUDO, vgprs.size(), 0)}; 18607ec681f3Smrg int k = 0; 18617ec681f3Smrg for (Temp tmp : vgprs) { 18627ec681f3Smrg destr->operands[k++] = Operand(tmp); 18637ec681f3Smrg } 18647ec681f3Smrg /* find insertion point */ 18657ec681f3Smrg std::vector<aco_ptr<Instruction>>::iterator it = block.instructions.begin(); 18667ec681f3Smrg while ((*it)->opcode == aco_opcode::p_linear_phi || (*it)->opcode == aco_opcode::p_phi) 18677ec681f3Smrg ++it; 18687ec681f3Smrg block.instructions.insert(it, std::move(destr)); 18697ec681f3Smrg } 18707ec681f3Smrg} 18717ec681f3Smrg 18727ec681f3Smrg} /* end namespace */ 18737ec681f3Smrg 18747ec681f3Smrgvoid 18757ec681f3Smrgspill(Program* program, live& live_vars) 18767ec681f3Smrg{ 18777ec681f3Smrg program->config->spilled_vgprs = 0; 18787ec681f3Smrg program->config->spilled_sgprs = 0; 18797ec681f3Smrg 18807ec681f3Smrg program->progress = CompilationProgress::after_spilling; 18817ec681f3Smrg 18827ec681f3Smrg /* no spilling when register pressure is low enough */ 18837ec681f3Smrg if (program->num_waves > 0) 18847ec681f3Smrg return; 18857ec681f3Smrg 18867ec681f3Smrg /* lower to CSSA before spilling to ensure correctness w.r.t. phis */ 18877ec681f3Smrg lower_to_cssa(program, live_vars); 18887ec681f3Smrg 18897ec681f3Smrg /* calculate target register demand */ 18907ec681f3Smrg const RegisterDemand demand = program->max_reg_demand; /* current max */ 18917ec681f3Smrg const uint16_t sgpr_limit = get_addr_sgpr_from_waves(program, program->min_waves); 18927ec681f3Smrg const uint16_t vgpr_limit = get_addr_vgpr_from_waves(program, program->min_waves); 18937ec681f3Smrg uint16_t extra_vgprs = 0; 18947ec681f3Smrg uint16_t extra_sgprs = 0; 18957ec681f3Smrg 18967ec681f3Smrg /* calculate extra VGPRs required for spilling SGPRs */ 18977ec681f3Smrg if (demand.sgpr > sgpr_limit) { 18987ec681f3Smrg unsigned sgpr_spills = demand.sgpr - sgpr_limit; 18997ec681f3Smrg extra_vgprs = DIV_ROUND_UP(sgpr_spills, program->wave_size) + 1; 19007ec681f3Smrg } 19017ec681f3Smrg /* add extra SGPRs required for spilling VGPRs */ 19027ec681f3Smrg if (demand.vgpr + extra_vgprs > vgpr_limit) { 19037ec681f3Smrg extra_sgprs = 5; /* scratch_resource (s4) + scratch_offset (s1) */ 19047ec681f3Smrg if (demand.sgpr + extra_sgprs > sgpr_limit) { 19057ec681f3Smrg /* re-calculate in case something has changed */ 19067ec681f3Smrg unsigned sgpr_spills = demand.sgpr + extra_sgprs - sgpr_limit; 19077ec681f3Smrg extra_vgprs = DIV_ROUND_UP(sgpr_spills, program->wave_size) + 1; 19087ec681f3Smrg } 19097ec681f3Smrg } 19107ec681f3Smrg /* the spiller has to target the following register demand */ 19117ec681f3Smrg const RegisterDemand target(vgpr_limit - extra_vgprs, sgpr_limit - extra_sgprs); 19127ec681f3Smrg 19137ec681f3Smrg /* initialize ctx */ 19147ec681f3Smrg spill_ctx ctx(target, program, live_vars.register_demand); 19157ec681f3Smrg compute_global_next_uses(ctx); 19167ec681f3Smrg get_rematerialize_info(ctx); 19177ec681f3Smrg 19187ec681f3Smrg /* create spills and reloads */ 19197ec681f3Smrg for (unsigned i = 0; i < program->blocks.size(); i++) 19207ec681f3Smrg spill_block(ctx, i); 19217ec681f3Smrg 19227ec681f3Smrg /* assign spill slots and DCE rematerialized code */ 19237ec681f3Smrg assign_spill_slots(ctx, extra_vgprs); 19247ec681f3Smrg 19257ec681f3Smrg /* update live variable information */ 19267ec681f3Smrg live_vars = live_var_analysis(program); 19277ec681f3Smrg 19287ec681f3Smrg assert(program->num_waves > 0); 19297ec681f3Smrg} 19307ec681f3Smrg 19317ec681f3Smrg} // namespace aco 1932