17ec681f3Smrg/* 27ec681f3Smrg * Copyright © 2018 Valve Corporation 37ec681f3Smrg * Copyright © 2018 Google 47ec681f3Smrg * 57ec681f3Smrg * Permission is hereby granted, free of charge, to any person obtaining a 67ec681f3Smrg * copy of this software and associated documentation files (the "Software"), 77ec681f3Smrg * to deal in the Software without restriction, including without limitation 87ec681f3Smrg * the rights to use, copy, modify, merge, publish, distribute, sublicense, 97ec681f3Smrg * and/or sell copies of the Software, and to permit persons to whom the 107ec681f3Smrg * Software is furnished to do so, subject to the following conditions: 117ec681f3Smrg * 127ec681f3Smrg * The above copyright notice and this permission notice (including the next 137ec681f3Smrg * paragraph) shall be included in all copies or substantial portions of the 147ec681f3Smrg * Software. 157ec681f3Smrg * 167ec681f3Smrg * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR 177ec681f3Smrg * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, 187ec681f3Smrg * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL 197ec681f3Smrg * THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER 207ec681f3Smrg * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING 217ec681f3Smrg * FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS 227ec681f3Smrg * IN THE SOFTWARE. 237ec681f3Smrg * 247ec681f3Smrg */ 257ec681f3Smrg 267ec681f3Smrg#include "aco_builder.h" 277ec681f3Smrg#include "aco_ir.h" 287ec681f3Smrg 297ec681f3Smrg#include <vector> 307ec681f3Smrg 317ec681f3Smrg/* 327ec681f3Smrg * Insert p_linear_start instructions right before RA to correctly allocate 337ec681f3Smrg * temporaries for reductions that have to disrespect EXEC by executing in 347ec681f3Smrg * WWM. 357ec681f3Smrg */ 367ec681f3Smrg 377ec681f3Smrgnamespace aco { 387ec681f3Smrg 397ec681f3Smrgvoid 407ec681f3Smrgsetup_reduce_temp(Program* program) 417ec681f3Smrg{ 427ec681f3Smrg unsigned last_top_level_block_idx = 0; 437ec681f3Smrg unsigned maxSize = 0; 447ec681f3Smrg 457ec681f3Smrg std::vector<bool> hasReductions(program->blocks.size()); 467ec681f3Smrg for (Block& block : program->blocks) { 477ec681f3Smrg for (aco_ptr<Instruction>& instr : block.instructions) { 487ec681f3Smrg if (instr->format != Format::PSEUDO_REDUCTION) 497ec681f3Smrg continue; 507ec681f3Smrg 517ec681f3Smrg maxSize = MAX2(maxSize, instr->operands[0].size()); 527ec681f3Smrg hasReductions[block.index] = true; 537ec681f3Smrg } 547ec681f3Smrg } 557ec681f3Smrg 567ec681f3Smrg if (maxSize == 0) 577ec681f3Smrg return; 587ec681f3Smrg 597ec681f3Smrg assert(maxSize == 1 || maxSize == 2); 607ec681f3Smrg Temp reduceTmp(0, RegClass(RegType::vgpr, maxSize).as_linear()); 617ec681f3Smrg Temp vtmp(0, RegClass(RegType::vgpr, maxSize).as_linear()); 627ec681f3Smrg int inserted_at = -1; 637ec681f3Smrg int vtmp_inserted_at = -1; 647ec681f3Smrg bool reduceTmp_in_loop = false; 657ec681f3Smrg bool vtmp_in_loop = false; 667ec681f3Smrg 677ec681f3Smrg for (Block& block : program->blocks) { 687ec681f3Smrg 697ec681f3Smrg /* insert p_end_linear_vgpr after the outermost loop */ 707ec681f3Smrg if (reduceTmp_in_loop && block.loop_nest_depth == 0) { 717ec681f3Smrg assert(inserted_at == (int)last_top_level_block_idx); 727ec681f3Smrg 737ec681f3Smrg aco_ptr<Instruction> end{create_instruction<Instruction>( 747ec681f3Smrg aco_opcode::p_end_linear_vgpr, Format::PSEUDO, vtmp_in_loop ? 2 : 1, 0)}; 757ec681f3Smrg end->operands[0] = Operand(reduceTmp); 767ec681f3Smrg if (vtmp_in_loop) 777ec681f3Smrg end->operands[1] = Operand(vtmp); 787ec681f3Smrg /* insert after the phis of the loop exit block */ 797ec681f3Smrg std::vector<aco_ptr<Instruction>>::iterator it = block.instructions.begin(); 807ec681f3Smrg while ((*it)->opcode == aco_opcode::p_linear_phi || (*it)->opcode == aco_opcode::p_phi) 817ec681f3Smrg ++it; 827ec681f3Smrg block.instructions.insert(it, std::move(end)); 837ec681f3Smrg reduceTmp_in_loop = false; 847ec681f3Smrg } 857ec681f3Smrg 867ec681f3Smrg if (block.kind & block_kind_top_level) 877ec681f3Smrg last_top_level_block_idx = block.index; 887ec681f3Smrg 897ec681f3Smrg if (!hasReductions[block.index]) 907ec681f3Smrg continue; 917ec681f3Smrg 927ec681f3Smrg std::vector<aco_ptr<Instruction>>::iterator it; 937ec681f3Smrg for (it = block.instructions.begin(); it != block.instructions.end(); ++it) { 947ec681f3Smrg Instruction* instr = (*it).get(); 957ec681f3Smrg if (instr->format != Format::PSEUDO_REDUCTION) 967ec681f3Smrg continue; 977ec681f3Smrg 987ec681f3Smrg ReduceOp op = instr->reduction().reduce_op; 997ec681f3Smrg reduceTmp_in_loop |= block.loop_nest_depth > 0; 1007ec681f3Smrg 1017ec681f3Smrg if ((int)last_top_level_block_idx != inserted_at) { 1027ec681f3Smrg reduceTmp = program->allocateTmp(reduceTmp.regClass()); 1037ec681f3Smrg aco_ptr<Pseudo_instruction> create{create_instruction<Pseudo_instruction>( 1047ec681f3Smrg aco_opcode::p_start_linear_vgpr, Format::PSEUDO, 0, 1)}; 1057ec681f3Smrg create->definitions[0] = Definition(reduceTmp); 1067ec681f3Smrg /* find the right place to insert this definition */ 1077ec681f3Smrg if (last_top_level_block_idx == block.index) { 1087ec681f3Smrg /* insert right before the current instruction */ 1097ec681f3Smrg it = block.instructions.insert(it, std::move(create)); 1107ec681f3Smrg it++; 1117ec681f3Smrg /* inserted_at is intentionally not updated here, so later blocks 1127ec681f3Smrg * would insert at the end instead of using this one. */ 1137ec681f3Smrg } else { 1147ec681f3Smrg assert(last_top_level_block_idx < block.index); 1157ec681f3Smrg /* insert before the branch at last top level block */ 1167ec681f3Smrg std::vector<aco_ptr<Instruction>>& instructions = 1177ec681f3Smrg program->blocks[last_top_level_block_idx].instructions; 1187ec681f3Smrg instructions.insert(std::next(instructions.begin(), instructions.size() - 1), 1197ec681f3Smrg std::move(create)); 1207ec681f3Smrg inserted_at = last_top_level_block_idx; 1217ec681f3Smrg } 1227ec681f3Smrg } 1237ec681f3Smrg 1247ec681f3Smrg /* same as before, except for the vector temporary instead of the reduce temporary */ 1257ec681f3Smrg unsigned cluster_size = instr->reduction().cluster_size; 1267ec681f3Smrg bool need_vtmp = op == imul32 || op == fadd64 || op == fmul64 || op == fmin64 || 1277ec681f3Smrg op == fmax64 || op == umin64 || op == umax64 || op == imin64 || 1287ec681f3Smrg op == imax64 || op == imul64; 1297ec681f3Smrg bool gfx10_need_vtmp = op == imul8 || op == imax8 || op == imin8 || op == umin8 || 1307ec681f3Smrg op == imul16 || op == imax16 || op == imin16 || op == umin16 || 1317ec681f3Smrg op == iadd64; 1327ec681f3Smrg 1337ec681f3Smrg if (program->chip_class >= GFX10 && cluster_size == 64) 1347ec681f3Smrg need_vtmp = true; 1357ec681f3Smrg if (program->chip_class >= GFX10 && gfx10_need_vtmp) 1367ec681f3Smrg need_vtmp = true; 1377ec681f3Smrg if (program->chip_class <= GFX7) 1387ec681f3Smrg need_vtmp = true; 1397ec681f3Smrg 1407ec681f3Smrg need_vtmp |= cluster_size == 32; 1417ec681f3Smrg 1427ec681f3Smrg vtmp_in_loop |= need_vtmp && block.loop_nest_depth > 0; 1437ec681f3Smrg if (need_vtmp && (int)last_top_level_block_idx != vtmp_inserted_at) { 1447ec681f3Smrg vtmp = program->allocateTmp(vtmp.regClass()); 1457ec681f3Smrg aco_ptr<Pseudo_instruction> create{create_instruction<Pseudo_instruction>( 1467ec681f3Smrg aco_opcode::p_start_linear_vgpr, Format::PSEUDO, 0, 1)}; 1477ec681f3Smrg create->definitions[0] = Definition(vtmp); 1487ec681f3Smrg if (last_top_level_block_idx == block.index) { 1497ec681f3Smrg it = block.instructions.insert(it, std::move(create)); 1507ec681f3Smrg it++; 1517ec681f3Smrg } else { 1527ec681f3Smrg assert(last_top_level_block_idx < block.index); 1537ec681f3Smrg std::vector<aco_ptr<Instruction>>& instructions = 1547ec681f3Smrg program->blocks[last_top_level_block_idx].instructions; 1557ec681f3Smrg instructions.insert(std::next(instructions.begin(), instructions.size() - 1), 1567ec681f3Smrg std::move(create)); 1577ec681f3Smrg vtmp_inserted_at = last_top_level_block_idx; 1587ec681f3Smrg } 1597ec681f3Smrg } 1607ec681f3Smrg 1617ec681f3Smrg instr->operands[1] = Operand(reduceTmp); 1627ec681f3Smrg if (need_vtmp) 1637ec681f3Smrg instr->operands[2] = Operand(vtmp); 1647ec681f3Smrg } 1657ec681f3Smrg } 1667ec681f3Smrg} 1677ec681f3Smrg 1687ec681f3Smrg}; // namespace aco 169