17ec681f3Smrg/*
27ec681f3Smrg * Copyright (C) 2021 Valve Corporation
37ec681f3Smrg *
47ec681f3Smrg * Permission is hereby granted, free of charge, to any person obtaining a
57ec681f3Smrg * copy of this software and associated documentation files (the "Software"),
67ec681f3Smrg * to deal in the Software without restriction, including without limitation
77ec681f3Smrg * the rights to use, copy, modify, merge, publish, distribute, sublicense,
87ec681f3Smrg * and/or sell copies of the Software, and to permit persons to whom the
97ec681f3Smrg * Software is furnished to do so, subject to the following conditions:
107ec681f3Smrg *
117ec681f3Smrg * The above copyright notice and this permission notice (including the next
127ec681f3Smrg * paragraph) shall be included in all copies or substantial portions of the
137ec681f3Smrg * Software.
147ec681f3Smrg *
157ec681f3Smrg * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
167ec681f3Smrg * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
177ec681f3Smrg * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT.  IN NO EVENT SHALL
187ec681f3Smrg * THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
197ec681f3Smrg * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
207ec681f3Smrg * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
217ec681f3Smrg * SOFTWARE.
227ec681f3Smrg */
237ec681f3Smrg
247ec681f3Smrg#include "ir3.h"
257ec681f3Smrg
267ec681f3Smrg/* Lower several macro-instructions needed for shader subgroup support that
277ec681f3Smrg * must be turned into if statements. We do this after RA and post-RA
287ec681f3Smrg * scheduling to give the scheduler a chance to rearrange them, because RA
297ec681f3Smrg * may need to insert OPC_META_READ_FIRST to handle splitting live ranges, and
307ec681f3Smrg * also because some (e.g. BALLOT and READ_FIRST) must produce a shared
317ec681f3Smrg * register that cannot be spilled to a normal register until after the if,
327ec681f3Smrg * which makes implementing spilling more complicated if they are already
337ec681f3Smrg * lowered.
347ec681f3Smrg */
357ec681f3Smrg
367ec681f3Smrgstatic void
377ec681f3Smrgreplace_pred(struct ir3_block *block, struct ir3_block *old_pred,
387ec681f3Smrg             struct ir3_block *new_pred)
397ec681f3Smrg{
407ec681f3Smrg   for (unsigned i = 0; i < block->predecessors_count; i++) {
417ec681f3Smrg      if (block->predecessors[i] == old_pred) {
427ec681f3Smrg         block->predecessors[i] = new_pred;
437ec681f3Smrg         return;
447ec681f3Smrg      }
457ec681f3Smrg   }
467ec681f3Smrg}
477ec681f3Smrg
487ec681f3Smrgstatic void
497ec681f3Smrgreplace_physical_pred(struct ir3_block *block, struct ir3_block *old_pred,
507ec681f3Smrg                      struct ir3_block *new_pred)
517ec681f3Smrg{
527ec681f3Smrg   for (unsigned i = 0; i < block->physical_predecessors_count; i++) {
537ec681f3Smrg      if (block->physical_predecessors[i] == old_pred) {
547ec681f3Smrg         block->physical_predecessors[i] = new_pred;
557ec681f3Smrg         return;
567ec681f3Smrg      }
577ec681f3Smrg   }
587ec681f3Smrg}
597ec681f3Smrg
607ec681f3Smrgstatic void
617ec681f3Smrgmov_immed(struct ir3_register *dst, struct ir3_block *block, unsigned immed)
627ec681f3Smrg{
637ec681f3Smrg   struct ir3_instruction *mov = ir3_instr_create(block, OPC_MOV, 1, 1);
647ec681f3Smrg   struct ir3_register *mov_dst = ir3_dst_create(mov, dst->num, dst->flags);
657ec681f3Smrg   mov_dst->wrmask = dst->wrmask;
667ec681f3Smrg   struct ir3_register *src = ir3_src_create(
677ec681f3Smrg      mov, INVALID_REG, (dst->flags & IR3_REG_HALF) | IR3_REG_IMMED);
687ec681f3Smrg   src->uim_val = immed;
697ec681f3Smrg   mov->cat1.dst_type = (dst->flags & IR3_REG_HALF) ? TYPE_U16 : TYPE_U32;
707ec681f3Smrg   mov->cat1.src_type = mov->cat1.dst_type;
717ec681f3Smrg   mov->repeat = util_last_bit(mov_dst->wrmask) - 1;
727ec681f3Smrg}
737ec681f3Smrg
747ec681f3Smrgstatic struct ir3_block *
757ec681f3Smrgsplit_block(struct ir3 *ir, struct ir3_block *before_block,
767ec681f3Smrg            struct ir3_instruction *instr, struct ir3_block **then)
777ec681f3Smrg{
787ec681f3Smrg   struct ir3_block *then_block = ir3_block_create(ir);
797ec681f3Smrg   struct ir3_block *after_block = ir3_block_create(ir);
807ec681f3Smrg   list_add(&then_block->node, &before_block->node);
817ec681f3Smrg   list_add(&after_block->node, &then_block->node);
827ec681f3Smrg
837ec681f3Smrg   for (unsigned i = 0; i < ARRAY_SIZE(before_block->successors); i++) {
847ec681f3Smrg      after_block->successors[i] = before_block->successors[i];
857ec681f3Smrg      if (after_block->successors[i])
867ec681f3Smrg         replace_pred(after_block->successors[i], before_block, after_block);
877ec681f3Smrg   }
887ec681f3Smrg
897ec681f3Smrg   for (unsigned i = 0; i < ARRAY_SIZE(before_block->physical_successors);
907ec681f3Smrg        i++) {
917ec681f3Smrg      after_block->physical_successors[i] =
927ec681f3Smrg         before_block->physical_successors[i];
937ec681f3Smrg      if (after_block->physical_successors[i]) {
947ec681f3Smrg         replace_physical_pred(after_block->physical_successors[i],
957ec681f3Smrg                               before_block, after_block);
967ec681f3Smrg      }
977ec681f3Smrg   }
987ec681f3Smrg
997ec681f3Smrg   before_block->successors[0] = then_block;
1007ec681f3Smrg   before_block->successors[1] = after_block;
1017ec681f3Smrg   before_block->physical_successors[0] = then_block;
1027ec681f3Smrg   before_block->physical_successors[1] = after_block;
1037ec681f3Smrg   ir3_block_add_predecessor(then_block, before_block);
1047ec681f3Smrg   ir3_block_add_predecessor(after_block, before_block);
1057ec681f3Smrg   ir3_block_add_physical_predecessor(then_block, before_block);
1067ec681f3Smrg   ir3_block_add_physical_predecessor(after_block, before_block);
1077ec681f3Smrg
1087ec681f3Smrg   then_block->successors[0] = after_block;
1097ec681f3Smrg   then_block->physical_successors[0] = after_block;
1107ec681f3Smrg   ir3_block_add_predecessor(after_block, then_block);
1117ec681f3Smrg   ir3_block_add_physical_predecessor(after_block, then_block);
1127ec681f3Smrg
1137ec681f3Smrg   foreach_instr_from_safe (rem_instr, &instr->node,
1147ec681f3Smrg                            &before_block->instr_list) {
1157ec681f3Smrg      list_del(&rem_instr->node);
1167ec681f3Smrg      list_addtail(&rem_instr->node, &after_block->instr_list);
1177ec681f3Smrg      rem_instr->block = after_block;
1187ec681f3Smrg   }
1197ec681f3Smrg
1207ec681f3Smrg   after_block->brtype = before_block->brtype;
1217ec681f3Smrg   after_block->condition = before_block->condition;
1227ec681f3Smrg
1237ec681f3Smrg   *then = then_block;
1247ec681f3Smrg   return after_block;
1257ec681f3Smrg}
1267ec681f3Smrg
1277ec681f3Smrgstatic bool
1287ec681f3Smrglower_block(struct ir3 *ir, struct ir3_block **block)
1297ec681f3Smrg{
1307ec681f3Smrg   bool progress = false;
1317ec681f3Smrg
1327ec681f3Smrg   foreach_instr_safe (instr, &(*block)->instr_list) {
1337ec681f3Smrg      switch (instr->opc) {
1347ec681f3Smrg      case OPC_BALLOT_MACRO:
1357ec681f3Smrg      case OPC_ANY_MACRO:
1367ec681f3Smrg      case OPC_ALL_MACRO:
1377ec681f3Smrg      case OPC_ELECT_MACRO:
1387ec681f3Smrg      case OPC_READ_COND_MACRO:
1397ec681f3Smrg      case OPC_READ_FIRST_MACRO:
1407ec681f3Smrg      case OPC_SWZ_SHARED_MACRO:
1417ec681f3Smrg         break;
1427ec681f3Smrg      default:
1437ec681f3Smrg         continue;
1447ec681f3Smrg      }
1457ec681f3Smrg
1467ec681f3Smrg      struct ir3_block *before_block = *block;
1477ec681f3Smrg      struct ir3_block *then_block;
1487ec681f3Smrg      struct ir3_block *after_block =
1497ec681f3Smrg         split_block(ir, before_block, instr, &then_block);
1507ec681f3Smrg
1517ec681f3Smrg      /* For ballot, the destination must be initialized to 0 before we do
1527ec681f3Smrg       * the movmsk because the condition may be 0 and then the movmsk will
1537ec681f3Smrg       * be skipped. Because it's a shared register we have to wrap the
1547ec681f3Smrg       * initialization in a getone block.
1557ec681f3Smrg       */
1567ec681f3Smrg      if (instr->opc == OPC_BALLOT_MACRO) {
1577ec681f3Smrg         before_block->brtype = IR3_BRANCH_GETONE;
1587ec681f3Smrg         before_block->condition = NULL;
1597ec681f3Smrg         mov_immed(instr->dsts[0], then_block, 0);
1607ec681f3Smrg         before_block = after_block;
1617ec681f3Smrg         after_block = split_block(ir, before_block, instr, &then_block);
1627ec681f3Smrg      }
1637ec681f3Smrg
1647ec681f3Smrg      switch (instr->opc) {
1657ec681f3Smrg      case OPC_BALLOT_MACRO:
1667ec681f3Smrg      case OPC_READ_COND_MACRO:
1677ec681f3Smrg      case OPC_ANY_MACRO:
1687ec681f3Smrg      case OPC_ALL_MACRO:
1697ec681f3Smrg         before_block->condition = instr->srcs[0]->def->instr;
1707ec681f3Smrg         break;
1717ec681f3Smrg      default:
1727ec681f3Smrg         before_block->condition = NULL;
1737ec681f3Smrg         break;
1747ec681f3Smrg      }
1757ec681f3Smrg
1767ec681f3Smrg      switch (instr->opc) {
1777ec681f3Smrg      case OPC_BALLOT_MACRO:
1787ec681f3Smrg      case OPC_READ_COND_MACRO:
1797ec681f3Smrg         before_block->brtype = IR3_BRANCH_COND;
1807ec681f3Smrg         break;
1817ec681f3Smrg      case OPC_ANY_MACRO:
1827ec681f3Smrg         before_block->brtype = IR3_BRANCH_ANY;
1837ec681f3Smrg         break;
1847ec681f3Smrg      case OPC_ALL_MACRO:
1857ec681f3Smrg         before_block->brtype = IR3_BRANCH_ALL;
1867ec681f3Smrg         break;
1877ec681f3Smrg      case OPC_ELECT_MACRO:
1887ec681f3Smrg      case OPC_READ_FIRST_MACRO:
1897ec681f3Smrg      case OPC_SWZ_SHARED_MACRO:
1907ec681f3Smrg         before_block->brtype = IR3_BRANCH_GETONE;
1917ec681f3Smrg         break;
1927ec681f3Smrg      default:
1937ec681f3Smrg         unreachable("bad opcode");
1947ec681f3Smrg      }
1957ec681f3Smrg
1967ec681f3Smrg      switch (instr->opc) {
1977ec681f3Smrg      case OPC_ALL_MACRO:
1987ec681f3Smrg      case OPC_ANY_MACRO:
1997ec681f3Smrg      case OPC_ELECT_MACRO:
2007ec681f3Smrg         mov_immed(instr->dsts[0], then_block, 1);
2017ec681f3Smrg         mov_immed(instr->dsts[0], before_block, 0);
2027ec681f3Smrg         break;
2037ec681f3Smrg
2047ec681f3Smrg      case OPC_BALLOT_MACRO: {
2057ec681f3Smrg         unsigned comp_count = util_last_bit(instr->dsts[0]->wrmask);
2067ec681f3Smrg         struct ir3_instruction *movmsk =
2077ec681f3Smrg            ir3_instr_create(then_block, OPC_MOVMSK, 1, 0);
2087ec681f3Smrg         ir3_dst_create(movmsk, instr->dsts[0]->num, instr->dsts[0]->flags);
2097ec681f3Smrg         movmsk->repeat = comp_count - 1;
2107ec681f3Smrg         break;
2117ec681f3Smrg      }
2127ec681f3Smrg
2137ec681f3Smrg      case OPC_READ_COND_MACRO:
2147ec681f3Smrg      case OPC_READ_FIRST_MACRO: {
2157ec681f3Smrg         struct ir3_instruction *mov =
2167ec681f3Smrg            ir3_instr_create(then_block, OPC_MOV, 1, 1);
2177ec681f3Smrg         unsigned src = instr->opc == OPC_READ_COND_MACRO ? 1 : 0;
2187ec681f3Smrg         ir3_dst_create(mov, instr->dsts[0]->num, instr->dsts[0]->flags);
2197ec681f3Smrg         struct ir3_register *new_src = ir3_src_create(mov, 0, 0);
2207ec681f3Smrg         *new_src = *instr->srcs[src];
2217ec681f3Smrg         mov->cat1.dst_type = TYPE_U32;
2227ec681f3Smrg         mov->cat1.src_type =
2237ec681f3Smrg            (new_src->flags & IR3_REG_HALF) ? TYPE_U16 : TYPE_U32;
2247ec681f3Smrg         break;
2257ec681f3Smrg      }
2267ec681f3Smrg
2277ec681f3Smrg      case OPC_SWZ_SHARED_MACRO: {
2287ec681f3Smrg         struct ir3_instruction *swz =
2297ec681f3Smrg            ir3_instr_create(then_block, OPC_SWZ, 2, 2);
2307ec681f3Smrg         ir3_dst_create(swz, instr->dsts[0]->num, instr->dsts[0]->flags);
2317ec681f3Smrg         ir3_dst_create(swz, instr->dsts[1]->num, instr->dsts[1]->flags);
2327ec681f3Smrg         ir3_src_create(swz, instr->srcs[0]->num, instr->srcs[0]->flags);
2337ec681f3Smrg         ir3_src_create(swz, instr->srcs[1]->num, instr->srcs[1]->flags);
2347ec681f3Smrg         swz->cat1.dst_type = swz->cat1.src_type = TYPE_U32;
2357ec681f3Smrg         swz->repeat = 1;
2367ec681f3Smrg         break;
2377ec681f3Smrg      }
2387ec681f3Smrg
2397ec681f3Smrg      default:
2407ec681f3Smrg         unreachable("bad opcode");
2417ec681f3Smrg      }
2427ec681f3Smrg
2437ec681f3Smrg      *block = after_block;
2447ec681f3Smrg      list_delinit(&instr->node);
2457ec681f3Smrg      progress = true;
2467ec681f3Smrg   }
2477ec681f3Smrg
2487ec681f3Smrg   return progress;
2497ec681f3Smrg}
2507ec681f3Smrg
2517ec681f3Smrgbool
2527ec681f3Smrgir3_lower_subgroups(struct ir3 *ir)
2537ec681f3Smrg{
2547ec681f3Smrg   bool progress = false;
2557ec681f3Smrg
2567ec681f3Smrg   foreach_block (block, &ir->block_list)
2577ec681f3Smrg      progress |= lower_block(ir, &block);
2587ec681f3Smrg
2597ec681f3Smrg   return progress;
2607ec681f3Smrg}
261