17ec681f3Smrg/* 27ec681f3Smrg * Copyright (C) 2021 Valve Corporation 37ec681f3Smrg * 47ec681f3Smrg * Permission is hereby granted, free of charge, to any person obtaining a 57ec681f3Smrg * copy of this software and associated documentation files (the "Software"), 67ec681f3Smrg * to deal in the Software without restriction, including without limitation 77ec681f3Smrg * the rights to use, copy, modify, merge, publish, distribute, sublicense, 87ec681f3Smrg * and/or sell copies of the Software, and to permit persons to whom the 97ec681f3Smrg * Software is furnished to do so, subject to the following conditions: 107ec681f3Smrg * 117ec681f3Smrg * The above copyright notice and this permission notice (including the next 127ec681f3Smrg * paragraph) shall be included in all copies or substantial portions of the 137ec681f3Smrg * Software. 147ec681f3Smrg * 157ec681f3Smrg * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR 167ec681f3Smrg * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, 177ec681f3Smrg * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL 187ec681f3Smrg * THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER 197ec681f3Smrg * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, 207ec681f3Smrg * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE 217ec681f3Smrg * SOFTWARE. 227ec681f3Smrg */ 237ec681f3Smrg 247ec681f3Smrg#include "ir3.h" 257ec681f3Smrg 267ec681f3Smrg/* Lower several macro-instructions needed for shader subgroup support that 277ec681f3Smrg * must be turned into if statements. We do this after RA and post-RA 287ec681f3Smrg * scheduling to give the scheduler a chance to rearrange them, because RA 297ec681f3Smrg * may need to insert OPC_META_READ_FIRST to handle splitting live ranges, and 307ec681f3Smrg * also because some (e.g. BALLOT and READ_FIRST) must produce a shared 317ec681f3Smrg * register that cannot be spilled to a normal register until after the if, 327ec681f3Smrg * which makes implementing spilling more complicated if they are already 337ec681f3Smrg * lowered. 347ec681f3Smrg */ 357ec681f3Smrg 367ec681f3Smrgstatic void 377ec681f3Smrgreplace_pred(struct ir3_block *block, struct ir3_block *old_pred, 387ec681f3Smrg struct ir3_block *new_pred) 397ec681f3Smrg{ 407ec681f3Smrg for (unsigned i = 0; i < block->predecessors_count; i++) { 417ec681f3Smrg if (block->predecessors[i] == old_pred) { 427ec681f3Smrg block->predecessors[i] = new_pred; 437ec681f3Smrg return; 447ec681f3Smrg } 457ec681f3Smrg } 467ec681f3Smrg} 477ec681f3Smrg 487ec681f3Smrgstatic void 497ec681f3Smrgreplace_physical_pred(struct ir3_block *block, struct ir3_block *old_pred, 507ec681f3Smrg struct ir3_block *new_pred) 517ec681f3Smrg{ 527ec681f3Smrg for (unsigned i = 0; i < block->physical_predecessors_count; i++) { 537ec681f3Smrg if (block->physical_predecessors[i] == old_pred) { 547ec681f3Smrg block->physical_predecessors[i] = new_pred; 557ec681f3Smrg return; 567ec681f3Smrg } 577ec681f3Smrg } 587ec681f3Smrg} 597ec681f3Smrg 607ec681f3Smrgstatic void 617ec681f3Smrgmov_immed(struct ir3_register *dst, struct ir3_block *block, unsigned immed) 627ec681f3Smrg{ 637ec681f3Smrg struct ir3_instruction *mov = ir3_instr_create(block, OPC_MOV, 1, 1); 647ec681f3Smrg struct ir3_register *mov_dst = ir3_dst_create(mov, dst->num, dst->flags); 657ec681f3Smrg mov_dst->wrmask = dst->wrmask; 667ec681f3Smrg struct ir3_register *src = ir3_src_create( 677ec681f3Smrg mov, INVALID_REG, (dst->flags & IR3_REG_HALF) | IR3_REG_IMMED); 687ec681f3Smrg src->uim_val = immed; 697ec681f3Smrg mov->cat1.dst_type = (dst->flags & IR3_REG_HALF) ? TYPE_U16 : TYPE_U32; 707ec681f3Smrg mov->cat1.src_type = mov->cat1.dst_type; 717ec681f3Smrg mov->repeat = util_last_bit(mov_dst->wrmask) - 1; 727ec681f3Smrg} 737ec681f3Smrg 747ec681f3Smrgstatic struct ir3_block * 757ec681f3Smrgsplit_block(struct ir3 *ir, struct ir3_block *before_block, 767ec681f3Smrg struct ir3_instruction *instr, struct ir3_block **then) 777ec681f3Smrg{ 787ec681f3Smrg struct ir3_block *then_block = ir3_block_create(ir); 797ec681f3Smrg struct ir3_block *after_block = ir3_block_create(ir); 807ec681f3Smrg list_add(&then_block->node, &before_block->node); 817ec681f3Smrg list_add(&after_block->node, &then_block->node); 827ec681f3Smrg 837ec681f3Smrg for (unsigned i = 0; i < ARRAY_SIZE(before_block->successors); i++) { 847ec681f3Smrg after_block->successors[i] = before_block->successors[i]; 857ec681f3Smrg if (after_block->successors[i]) 867ec681f3Smrg replace_pred(after_block->successors[i], before_block, after_block); 877ec681f3Smrg } 887ec681f3Smrg 897ec681f3Smrg for (unsigned i = 0; i < ARRAY_SIZE(before_block->physical_successors); 907ec681f3Smrg i++) { 917ec681f3Smrg after_block->physical_successors[i] = 927ec681f3Smrg before_block->physical_successors[i]; 937ec681f3Smrg if (after_block->physical_successors[i]) { 947ec681f3Smrg replace_physical_pred(after_block->physical_successors[i], 957ec681f3Smrg before_block, after_block); 967ec681f3Smrg } 977ec681f3Smrg } 987ec681f3Smrg 997ec681f3Smrg before_block->successors[0] = then_block; 1007ec681f3Smrg before_block->successors[1] = after_block; 1017ec681f3Smrg before_block->physical_successors[0] = then_block; 1027ec681f3Smrg before_block->physical_successors[1] = after_block; 1037ec681f3Smrg ir3_block_add_predecessor(then_block, before_block); 1047ec681f3Smrg ir3_block_add_predecessor(after_block, before_block); 1057ec681f3Smrg ir3_block_add_physical_predecessor(then_block, before_block); 1067ec681f3Smrg ir3_block_add_physical_predecessor(after_block, before_block); 1077ec681f3Smrg 1087ec681f3Smrg then_block->successors[0] = after_block; 1097ec681f3Smrg then_block->physical_successors[0] = after_block; 1107ec681f3Smrg ir3_block_add_predecessor(after_block, then_block); 1117ec681f3Smrg ir3_block_add_physical_predecessor(after_block, then_block); 1127ec681f3Smrg 1137ec681f3Smrg foreach_instr_from_safe (rem_instr, &instr->node, 1147ec681f3Smrg &before_block->instr_list) { 1157ec681f3Smrg list_del(&rem_instr->node); 1167ec681f3Smrg list_addtail(&rem_instr->node, &after_block->instr_list); 1177ec681f3Smrg rem_instr->block = after_block; 1187ec681f3Smrg } 1197ec681f3Smrg 1207ec681f3Smrg after_block->brtype = before_block->brtype; 1217ec681f3Smrg after_block->condition = before_block->condition; 1227ec681f3Smrg 1237ec681f3Smrg *then = then_block; 1247ec681f3Smrg return after_block; 1257ec681f3Smrg} 1267ec681f3Smrg 1277ec681f3Smrgstatic bool 1287ec681f3Smrglower_block(struct ir3 *ir, struct ir3_block **block) 1297ec681f3Smrg{ 1307ec681f3Smrg bool progress = false; 1317ec681f3Smrg 1327ec681f3Smrg foreach_instr_safe (instr, &(*block)->instr_list) { 1337ec681f3Smrg switch (instr->opc) { 1347ec681f3Smrg case OPC_BALLOT_MACRO: 1357ec681f3Smrg case OPC_ANY_MACRO: 1367ec681f3Smrg case OPC_ALL_MACRO: 1377ec681f3Smrg case OPC_ELECT_MACRO: 1387ec681f3Smrg case OPC_READ_COND_MACRO: 1397ec681f3Smrg case OPC_READ_FIRST_MACRO: 1407ec681f3Smrg case OPC_SWZ_SHARED_MACRO: 1417ec681f3Smrg break; 1427ec681f3Smrg default: 1437ec681f3Smrg continue; 1447ec681f3Smrg } 1457ec681f3Smrg 1467ec681f3Smrg struct ir3_block *before_block = *block; 1477ec681f3Smrg struct ir3_block *then_block; 1487ec681f3Smrg struct ir3_block *after_block = 1497ec681f3Smrg split_block(ir, before_block, instr, &then_block); 1507ec681f3Smrg 1517ec681f3Smrg /* For ballot, the destination must be initialized to 0 before we do 1527ec681f3Smrg * the movmsk because the condition may be 0 and then the movmsk will 1537ec681f3Smrg * be skipped. Because it's a shared register we have to wrap the 1547ec681f3Smrg * initialization in a getone block. 1557ec681f3Smrg */ 1567ec681f3Smrg if (instr->opc == OPC_BALLOT_MACRO) { 1577ec681f3Smrg before_block->brtype = IR3_BRANCH_GETONE; 1587ec681f3Smrg before_block->condition = NULL; 1597ec681f3Smrg mov_immed(instr->dsts[0], then_block, 0); 1607ec681f3Smrg before_block = after_block; 1617ec681f3Smrg after_block = split_block(ir, before_block, instr, &then_block); 1627ec681f3Smrg } 1637ec681f3Smrg 1647ec681f3Smrg switch (instr->opc) { 1657ec681f3Smrg case OPC_BALLOT_MACRO: 1667ec681f3Smrg case OPC_READ_COND_MACRO: 1677ec681f3Smrg case OPC_ANY_MACRO: 1687ec681f3Smrg case OPC_ALL_MACRO: 1697ec681f3Smrg before_block->condition = instr->srcs[0]->def->instr; 1707ec681f3Smrg break; 1717ec681f3Smrg default: 1727ec681f3Smrg before_block->condition = NULL; 1737ec681f3Smrg break; 1747ec681f3Smrg } 1757ec681f3Smrg 1767ec681f3Smrg switch (instr->opc) { 1777ec681f3Smrg case OPC_BALLOT_MACRO: 1787ec681f3Smrg case OPC_READ_COND_MACRO: 1797ec681f3Smrg before_block->brtype = IR3_BRANCH_COND; 1807ec681f3Smrg break; 1817ec681f3Smrg case OPC_ANY_MACRO: 1827ec681f3Smrg before_block->brtype = IR3_BRANCH_ANY; 1837ec681f3Smrg break; 1847ec681f3Smrg case OPC_ALL_MACRO: 1857ec681f3Smrg before_block->brtype = IR3_BRANCH_ALL; 1867ec681f3Smrg break; 1877ec681f3Smrg case OPC_ELECT_MACRO: 1887ec681f3Smrg case OPC_READ_FIRST_MACRO: 1897ec681f3Smrg case OPC_SWZ_SHARED_MACRO: 1907ec681f3Smrg before_block->brtype = IR3_BRANCH_GETONE; 1917ec681f3Smrg break; 1927ec681f3Smrg default: 1937ec681f3Smrg unreachable("bad opcode"); 1947ec681f3Smrg } 1957ec681f3Smrg 1967ec681f3Smrg switch (instr->opc) { 1977ec681f3Smrg case OPC_ALL_MACRO: 1987ec681f3Smrg case OPC_ANY_MACRO: 1997ec681f3Smrg case OPC_ELECT_MACRO: 2007ec681f3Smrg mov_immed(instr->dsts[0], then_block, 1); 2017ec681f3Smrg mov_immed(instr->dsts[0], before_block, 0); 2027ec681f3Smrg break; 2037ec681f3Smrg 2047ec681f3Smrg case OPC_BALLOT_MACRO: { 2057ec681f3Smrg unsigned comp_count = util_last_bit(instr->dsts[0]->wrmask); 2067ec681f3Smrg struct ir3_instruction *movmsk = 2077ec681f3Smrg ir3_instr_create(then_block, OPC_MOVMSK, 1, 0); 2087ec681f3Smrg ir3_dst_create(movmsk, instr->dsts[0]->num, instr->dsts[0]->flags); 2097ec681f3Smrg movmsk->repeat = comp_count - 1; 2107ec681f3Smrg break; 2117ec681f3Smrg } 2127ec681f3Smrg 2137ec681f3Smrg case OPC_READ_COND_MACRO: 2147ec681f3Smrg case OPC_READ_FIRST_MACRO: { 2157ec681f3Smrg struct ir3_instruction *mov = 2167ec681f3Smrg ir3_instr_create(then_block, OPC_MOV, 1, 1); 2177ec681f3Smrg unsigned src = instr->opc == OPC_READ_COND_MACRO ? 1 : 0; 2187ec681f3Smrg ir3_dst_create(mov, instr->dsts[0]->num, instr->dsts[0]->flags); 2197ec681f3Smrg struct ir3_register *new_src = ir3_src_create(mov, 0, 0); 2207ec681f3Smrg *new_src = *instr->srcs[src]; 2217ec681f3Smrg mov->cat1.dst_type = TYPE_U32; 2227ec681f3Smrg mov->cat1.src_type = 2237ec681f3Smrg (new_src->flags & IR3_REG_HALF) ? TYPE_U16 : TYPE_U32; 2247ec681f3Smrg break; 2257ec681f3Smrg } 2267ec681f3Smrg 2277ec681f3Smrg case OPC_SWZ_SHARED_MACRO: { 2287ec681f3Smrg struct ir3_instruction *swz = 2297ec681f3Smrg ir3_instr_create(then_block, OPC_SWZ, 2, 2); 2307ec681f3Smrg ir3_dst_create(swz, instr->dsts[0]->num, instr->dsts[0]->flags); 2317ec681f3Smrg ir3_dst_create(swz, instr->dsts[1]->num, instr->dsts[1]->flags); 2327ec681f3Smrg ir3_src_create(swz, instr->srcs[0]->num, instr->srcs[0]->flags); 2337ec681f3Smrg ir3_src_create(swz, instr->srcs[1]->num, instr->srcs[1]->flags); 2347ec681f3Smrg swz->cat1.dst_type = swz->cat1.src_type = TYPE_U32; 2357ec681f3Smrg swz->repeat = 1; 2367ec681f3Smrg break; 2377ec681f3Smrg } 2387ec681f3Smrg 2397ec681f3Smrg default: 2407ec681f3Smrg unreachable("bad opcode"); 2417ec681f3Smrg } 2427ec681f3Smrg 2437ec681f3Smrg *block = after_block; 2447ec681f3Smrg list_delinit(&instr->node); 2457ec681f3Smrg progress = true; 2467ec681f3Smrg } 2477ec681f3Smrg 2487ec681f3Smrg return progress; 2497ec681f3Smrg} 2507ec681f3Smrg 2517ec681f3Smrgbool 2527ec681f3Smrgir3_lower_subgroups(struct ir3 *ir) 2537ec681f3Smrg{ 2547ec681f3Smrg bool progress = false; 2557ec681f3Smrg 2567ec681f3Smrg foreach_block (block, &ir->block_list) 2577ec681f3Smrg progress |= lower_block(ir, &block); 2587ec681f3Smrg 2597ec681f3Smrg return progress; 2607ec681f3Smrg} 261