ir3_lower_subgroups.c revision 7ec681f3
1/* 2 * Copyright (C) 2021 Valve Corporation 3 * 4 * Permission is hereby granted, free of charge, to any person obtaining a 5 * copy of this software and associated documentation files (the "Software"), 6 * to deal in the Software without restriction, including without limitation 7 * the rights to use, copy, modify, merge, publish, distribute, sublicense, 8 * and/or sell copies of the Software, and to permit persons to whom the 9 * Software is furnished to do so, subject to the following conditions: 10 * 11 * The above copyright notice and this permission notice (including the next 12 * paragraph) shall be included in all copies or substantial portions of the 13 * Software. 14 * 15 * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR 16 * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, 17 * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL 18 * THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER 19 * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, 20 * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE 21 * SOFTWARE. 22 */ 23 24#include "ir3.h" 25 26/* Lower several macro-instructions needed for shader subgroup support that 27 * must be turned into if statements. We do this after RA and post-RA 28 * scheduling to give the scheduler a chance to rearrange them, because RA 29 * may need to insert OPC_META_READ_FIRST to handle splitting live ranges, and 30 * also because some (e.g. BALLOT and READ_FIRST) must produce a shared 31 * register that cannot be spilled to a normal register until after the if, 32 * which makes implementing spilling more complicated if they are already 33 * lowered. 34 */ 35 36static void 37replace_pred(struct ir3_block *block, struct ir3_block *old_pred, 38 struct ir3_block *new_pred) 39{ 40 for (unsigned i = 0; i < block->predecessors_count; i++) { 41 if (block->predecessors[i] == old_pred) { 42 block->predecessors[i] = new_pred; 43 return; 44 } 45 } 46} 47 48static void 49replace_physical_pred(struct ir3_block *block, struct ir3_block *old_pred, 50 struct ir3_block *new_pred) 51{ 52 for (unsigned i = 0; i < block->physical_predecessors_count; i++) { 53 if (block->physical_predecessors[i] == old_pred) { 54 block->physical_predecessors[i] = new_pred; 55 return; 56 } 57 } 58} 59 60static void 61mov_immed(struct ir3_register *dst, struct ir3_block *block, unsigned immed) 62{ 63 struct ir3_instruction *mov = ir3_instr_create(block, OPC_MOV, 1, 1); 64 struct ir3_register *mov_dst = ir3_dst_create(mov, dst->num, dst->flags); 65 mov_dst->wrmask = dst->wrmask; 66 struct ir3_register *src = ir3_src_create( 67 mov, INVALID_REG, (dst->flags & IR3_REG_HALF) | IR3_REG_IMMED); 68 src->uim_val = immed; 69 mov->cat1.dst_type = (dst->flags & IR3_REG_HALF) ? TYPE_U16 : TYPE_U32; 70 mov->cat1.src_type = mov->cat1.dst_type; 71 mov->repeat = util_last_bit(mov_dst->wrmask) - 1; 72} 73 74static struct ir3_block * 75split_block(struct ir3 *ir, struct ir3_block *before_block, 76 struct ir3_instruction *instr, struct ir3_block **then) 77{ 78 struct ir3_block *then_block = ir3_block_create(ir); 79 struct ir3_block *after_block = ir3_block_create(ir); 80 list_add(&then_block->node, &before_block->node); 81 list_add(&after_block->node, &then_block->node); 82 83 for (unsigned i = 0; i < ARRAY_SIZE(before_block->successors); i++) { 84 after_block->successors[i] = before_block->successors[i]; 85 if (after_block->successors[i]) 86 replace_pred(after_block->successors[i], before_block, after_block); 87 } 88 89 for (unsigned i = 0; i < ARRAY_SIZE(before_block->physical_successors); 90 i++) { 91 after_block->physical_successors[i] = 92 before_block->physical_successors[i]; 93 if (after_block->physical_successors[i]) { 94 replace_physical_pred(after_block->physical_successors[i], 95 before_block, after_block); 96 } 97 } 98 99 before_block->successors[0] = then_block; 100 before_block->successors[1] = after_block; 101 before_block->physical_successors[0] = then_block; 102 before_block->physical_successors[1] = after_block; 103 ir3_block_add_predecessor(then_block, before_block); 104 ir3_block_add_predecessor(after_block, before_block); 105 ir3_block_add_physical_predecessor(then_block, before_block); 106 ir3_block_add_physical_predecessor(after_block, before_block); 107 108 then_block->successors[0] = after_block; 109 then_block->physical_successors[0] = after_block; 110 ir3_block_add_predecessor(after_block, then_block); 111 ir3_block_add_physical_predecessor(after_block, then_block); 112 113 foreach_instr_from_safe (rem_instr, &instr->node, 114 &before_block->instr_list) { 115 list_del(&rem_instr->node); 116 list_addtail(&rem_instr->node, &after_block->instr_list); 117 rem_instr->block = after_block; 118 } 119 120 after_block->brtype = before_block->brtype; 121 after_block->condition = before_block->condition; 122 123 *then = then_block; 124 return after_block; 125} 126 127static bool 128lower_block(struct ir3 *ir, struct ir3_block **block) 129{ 130 bool progress = false; 131 132 foreach_instr_safe (instr, &(*block)->instr_list) { 133 switch (instr->opc) { 134 case OPC_BALLOT_MACRO: 135 case OPC_ANY_MACRO: 136 case OPC_ALL_MACRO: 137 case OPC_ELECT_MACRO: 138 case OPC_READ_COND_MACRO: 139 case OPC_READ_FIRST_MACRO: 140 case OPC_SWZ_SHARED_MACRO: 141 break; 142 default: 143 continue; 144 } 145 146 struct ir3_block *before_block = *block; 147 struct ir3_block *then_block; 148 struct ir3_block *after_block = 149 split_block(ir, before_block, instr, &then_block); 150 151 /* For ballot, the destination must be initialized to 0 before we do 152 * the movmsk because the condition may be 0 and then the movmsk will 153 * be skipped. Because it's a shared register we have to wrap the 154 * initialization in a getone block. 155 */ 156 if (instr->opc == OPC_BALLOT_MACRO) { 157 before_block->brtype = IR3_BRANCH_GETONE; 158 before_block->condition = NULL; 159 mov_immed(instr->dsts[0], then_block, 0); 160 before_block = after_block; 161 after_block = split_block(ir, before_block, instr, &then_block); 162 } 163 164 switch (instr->opc) { 165 case OPC_BALLOT_MACRO: 166 case OPC_READ_COND_MACRO: 167 case OPC_ANY_MACRO: 168 case OPC_ALL_MACRO: 169 before_block->condition = instr->srcs[0]->def->instr; 170 break; 171 default: 172 before_block->condition = NULL; 173 break; 174 } 175 176 switch (instr->opc) { 177 case OPC_BALLOT_MACRO: 178 case OPC_READ_COND_MACRO: 179 before_block->brtype = IR3_BRANCH_COND; 180 break; 181 case OPC_ANY_MACRO: 182 before_block->brtype = IR3_BRANCH_ANY; 183 break; 184 case OPC_ALL_MACRO: 185 before_block->brtype = IR3_BRANCH_ALL; 186 break; 187 case OPC_ELECT_MACRO: 188 case OPC_READ_FIRST_MACRO: 189 case OPC_SWZ_SHARED_MACRO: 190 before_block->brtype = IR3_BRANCH_GETONE; 191 break; 192 default: 193 unreachable("bad opcode"); 194 } 195 196 switch (instr->opc) { 197 case OPC_ALL_MACRO: 198 case OPC_ANY_MACRO: 199 case OPC_ELECT_MACRO: 200 mov_immed(instr->dsts[0], then_block, 1); 201 mov_immed(instr->dsts[0], before_block, 0); 202 break; 203 204 case OPC_BALLOT_MACRO: { 205 unsigned comp_count = util_last_bit(instr->dsts[0]->wrmask); 206 struct ir3_instruction *movmsk = 207 ir3_instr_create(then_block, OPC_MOVMSK, 1, 0); 208 ir3_dst_create(movmsk, instr->dsts[0]->num, instr->dsts[0]->flags); 209 movmsk->repeat = comp_count - 1; 210 break; 211 } 212 213 case OPC_READ_COND_MACRO: 214 case OPC_READ_FIRST_MACRO: { 215 struct ir3_instruction *mov = 216 ir3_instr_create(then_block, OPC_MOV, 1, 1); 217 unsigned src = instr->opc == OPC_READ_COND_MACRO ? 1 : 0; 218 ir3_dst_create(mov, instr->dsts[0]->num, instr->dsts[0]->flags); 219 struct ir3_register *new_src = ir3_src_create(mov, 0, 0); 220 *new_src = *instr->srcs[src]; 221 mov->cat1.dst_type = TYPE_U32; 222 mov->cat1.src_type = 223 (new_src->flags & IR3_REG_HALF) ? TYPE_U16 : TYPE_U32; 224 break; 225 } 226 227 case OPC_SWZ_SHARED_MACRO: { 228 struct ir3_instruction *swz = 229 ir3_instr_create(then_block, OPC_SWZ, 2, 2); 230 ir3_dst_create(swz, instr->dsts[0]->num, instr->dsts[0]->flags); 231 ir3_dst_create(swz, instr->dsts[1]->num, instr->dsts[1]->flags); 232 ir3_src_create(swz, instr->srcs[0]->num, instr->srcs[0]->flags); 233 ir3_src_create(swz, instr->srcs[1]->num, instr->srcs[1]->flags); 234 swz->cat1.dst_type = swz->cat1.src_type = TYPE_U32; 235 swz->repeat = 1; 236 break; 237 } 238 239 default: 240 unreachable("bad opcode"); 241 } 242 243 *block = after_block; 244 list_delinit(&instr->node); 245 progress = true; 246 } 247 248 return progress; 249} 250 251bool 252ir3_lower_subgroups(struct ir3 *ir) 253{ 254 bool progress = false; 255 256 foreach_block (block, &ir->block_list) 257 progress |= lower_block(ir, &block); 258 259 return progress; 260} 261