1b8e80941Smrg/* 2b8e80941Smrg * Copyright (C) 2014 Rob Clark <robclark@freedesktop.org> 3b8e80941Smrg * 4b8e80941Smrg * Permission is hereby granted, free of charge, to any person obtaining a 5b8e80941Smrg * copy of this software and associated documentation files (the "Software"), 6b8e80941Smrg * to deal in the Software without restriction, including without limitation 7b8e80941Smrg * the rights to use, copy, modify, merge, publish, distribute, sublicense, 8b8e80941Smrg * and/or sell copies of the Software, and to permit persons to whom the 9b8e80941Smrg * Software is furnished to do so, subject to the following conditions: 10b8e80941Smrg * 11b8e80941Smrg * The above copyright notice and this permission notice (including the next 12b8e80941Smrg * paragraph) shall be included in all copies or substantial portions of the 13b8e80941Smrg * Software. 14b8e80941Smrg * 15b8e80941Smrg * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR 16b8e80941Smrg * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, 17b8e80941Smrg * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL 18b8e80941Smrg * THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER 19b8e80941Smrg * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, 20b8e80941Smrg * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE 21b8e80941Smrg * SOFTWARE. 22b8e80941Smrg * 23b8e80941Smrg * Authors: 24b8e80941Smrg * Rob Clark <robclark@freedesktop.org> 25b8e80941Smrg */ 26b8e80941Smrg 27b8e80941Smrg#include "ir3.h" 28b8e80941Smrg 29b8e80941Smrg/* 30b8e80941Smrg * Find/group instruction neighbors: 31b8e80941Smrg */ 32b8e80941Smrg 33b8e80941Smrg/* bleh.. we need to do the same group_n() thing for both inputs/outputs 34b8e80941Smrg * (where we have a simple instr[] array), and fanin nodes (where we have 35b8e80941Smrg * an extra indirection via reg->instr). 36b8e80941Smrg */ 37b8e80941Smrgstruct group_ops { 38b8e80941Smrg struct ir3_instruction *(*get)(void *arr, int idx); 39b8e80941Smrg void (*insert_mov)(void *arr, int idx, struct ir3_instruction *instr); 40b8e80941Smrg}; 41b8e80941Smrg 42b8e80941Smrgstatic struct ir3_instruction *arr_get(void *arr, int idx) 43b8e80941Smrg{ 44b8e80941Smrg return ((struct ir3_instruction **)arr)[idx]; 45b8e80941Smrg} 46b8e80941Smrgstatic void arr_insert_mov_out(void *arr, int idx, struct ir3_instruction *instr) 47b8e80941Smrg{ 48b8e80941Smrg ((struct ir3_instruction **)arr)[idx] = 49b8e80941Smrg ir3_MOV(instr->block, instr, TYPE_F32); 50b8e80941Smrg} 51b8e80941Smrgstatic void arr_insert_mov_in(void *arr, int idx, struct ir3_instruction *instr) 52b8e80941Smrg{ 53b8e80941Smrg /* so, we can't insert a mov in front of a meta:in.. and the downstream 54b8e80941Smrg * instruction already has a pointer to 'instr'. So we cheat a bit and 55b8e80941Smrg * morph the meta:in instruction into a mov and insert a new meta:in 56b8e80941Smrg * in front. 57b8e80941Smrg */ 58b8e80941Smrg struct ir3_instruction *in; 59b8e80941Smrg 60b8e80941Smrg debug_assert(instr->regs_count == 1); 61b8e80941Smrg 62b8e80941Smrg in = ir3_instr_create(instr->block, OPC_META_INPUT); 63b8e80941Smrg in->inout.block = instr->block; 64b8e80941Smrg ir3_reg_create(in, instr->regs[0]->num, 0); 65b8e80941Smrg 66b8e80941Smrg /* create src reg for meta:in and fixup to now be a mov: */ 67b8e80941Smrg ir3_reg_create(instr, 0, IR3_REG_SSA)->instr = in; 68b8e80941Smrg instr->opc = OPC_MOV; 69b8e80941Smrg instr->cat1.src_type = TYPE_F32; 70b8e80941Smrg instr->cat1.dst_type = TYPE_F32; 71b8e80941Smrg 72b8e80941Smrg ((struct ir3_instruction **)arr)[idx] = in; 73b8e80941Smrg} 74b8e80941Smrgstatic struct group_ops arr_ops_out = { arr_get, arr_insert_mov_out }; 75b8e80941Smrgstatic struct group_ops arr_ops_in = { arr_get, arr_insert_mov_in }; 76b8e80941Smrg 77b8e80941Smrgstatic struct ir3_instruction *instr_get(void *arr, int idx) 78b8e80941Smrg{ 79b8e80941Smrg return ssa(((struct ir3_instruction *)arr)->regs[idx+1]); 80b8e80941Smrg} 81b8e80941Smrgstatic void 82b8e80941Smrginstr_insert_mov(void *arr, int idx, struct ir3_instruction *instr) 83b8e80941Smrg{ 84b8e80941Smrg ((struct ir3_instruction *)arr)->regs[idx+1]->instr = 85b8e80941Smrg ir3_MOV(instr->block, instr, TYPE_F32); 86b8e80941Smrg} 87b8e80941Smrgstatic struct group_ops instr_ops = { instr_get, instr_insert_mov }; 88b8e80941Smrg 89b8e80941Smrg/* verify that cur != instr, but cur is also not in instr's neighbor-list: */ 90b8e80941Smrgstatic bool 91b8e80941Smrgin_neighbor_list(struct ir3_instruction *instr, struct ir3_instruction *cur, int pos) 92b8e80941Smrg{ 93b8e80941Smrg int idx = 0; 94b8e80941Smrg 95b8e80941Smrg if (!instr) 96b8e80941Smrg return false; 97b8e80941Smrg 98b8e80941Smrg if (instr == cur) 99b8e80941Smrg return true; 100b8e80941Smrg 101b8e80941Smrg for (instr = ir3_neighbor_first(instr); instr; instr = instr->cp.right) 102b8e80941Smrg if ((idx++ != pos) && (instr == cur)) 103b8e80941Smrg return true; 104b8e80941Smrg 105b8e80941Smrg return false; 106b8e80941Smrg} 107b8e80941Smrg 108b8e80941Smrgstatic void 109b8e80941Smrggroup_n(struct group_ops *ops, void *arr, unsigned n) 110b8e80941Smrg{ 111b8e80941Smrg unsigned i, j; 112b8e80941Smrg 113b8e80941Smrg /* first pass, figure out what has conflicts and needs a mov 114b8e80941Smrg * inserted. Do this up front, before starting to setup 115b8e80941Smrg * left/right neighbor pointers. Trying to do it in a single 116b8e80941Smrg * pass could result in a situation where we can't even setup 117b8e80941Smrg * the mov's right neighbor ptr if the next instr also needs 118b8e80941Smrg * a mov. 119b8e80941Smrg */ 120b8e80941Smrgrestart: 121b8e80941Smrg for (i = 0; i < n; i++) { 122b8e80941Smrg struct ir3_instruction *instr = ops->get(arr, i); 123b8e80941Smrg if (instr) { 124b8e80941Smrg struct ir3_instruction *left = (i > 0) ? ops->get(arr, i - 1) : NULL; 125b8e80941Smrg struct ir3_instruction *right = (i < (n-1)) ? ops->get(arr, i + 1) : NULL; 126b8e80941Smrg bool conflict; 127b8e80941Smrg 128b8e80941Smrg /* check for left/right neighbor conflicts: */ 129b8e80941Smrg conflict = conflicts(instr->cp.left, left) || 130b8e80941Smrg conflicts(instr->cp.right, right); 131b8e80941Smrg 132b8e80941Smrg /* Mixing array elements and higher register classes 133b8e80941Smrg * (ie. groups) doesn't really work out in RA. See: 134b8e80941Smrg * 135b8e80941Smrg * https://trello.com/c/DqeDkeVf/156-bug-with-stk-70frag 136b8e80941Smrg */ 137b8e80941Smrg if (instr->regs[0]->flags & IR3_REG_ARRAY) 138b8e80941Smrg conflict = true; 139b8e80941Smrg 140b8e80941Smrg /* we also can't have an instr twice in the group: */ 141b8e80941Smrg for (j = i + 1; (j < n) && !conflict; j++) 142b8e80941Smrg if (in_neighbor_list(ops->get(arr, j), instr, i)) 143b8e80941Smrg conflict = true; 144b8e80941Smrg 145b8e80941Smrg if (conflict) { 146b8e80941Smrg ops->insert_mov(arr, i, instr); 147b8e80941Smrg /* inserting the mov may have caused a conflict 148b8e80941Smrg * against the previous: 149b8e80941Smrg */ 150b8e80941Smrg goto restart; 151b8e80941Smrg } 152b8e80941Smrg } 153b8e80941Smrg } 154b8e80941Smrg 155b8e80941Smrg /* second pass, now that we've inserted mov's, fixup left/right 156b8e80941Smrg * neighbors. This is guaranteed to succeed, since by definition 157b8e80941Smrg * the newly inserted mov's cannot conflict with anything. 158b8e80941Smrg */ 159b8e80941Smrg for (i = 0; i < n; i++) { 160b8e80941Smrg struct ir3_instruction *instr = ops->get(arr, i); 161b8e80941Smrg if (instr) { 162b8e80941Smrg struct ir3_instruction *left = (i > 0) ? ops->get(arr, i - 1) : NULL; 163b8e80941Smrg struct ir3_instruction *right = (i < (n-1)) ? ops->get(arr, i + 1) : NULL; 164b8e80941Smrg 165b8e80941Smrg debug_assert(!conflicts(instr->cp.left, left)); 166b8e80941Smrg if (left) { 167b8e80941Smrg instr->cp.left_cnt++; 168b8e80941Smrg instr->cp.left = left; 169b8e80941Smrg } 170b8e80941Smrg 171b8e80941Smrg debug_assert(!conflicts(instr->cp.right, right)); 172b8e80941Smrg if (right) { 173b8e80941Smrg instr->cp.right_cnt++; 174b8e80941Smrg instr->cp.right = right; 175b8e80941Smrg } 176b8e80941Smrg } 177b8e80941Smrg } 178b8e80941Smrg} 179b8e80941Smrg 180b8e80941Smrgstatic void 181b8e80941Smrginstr_find_neighbors(struct ir3_instruction *instr) 182b8e80941Smrg{ 183b8e80941Smrg struct ir3_instruction *src; 184b8e80941Smrg 185b8e80941Smrg if (ir3_instr_check_mark(instr)) 186b8e80941Smrg return; 187b8e80941Smrg 188b8e80941Smrg if (instr->opc == OPC_META_FI) 189b8e80941Smrg group_n(&instr_ops, instr, instr->regs_count - 1); 190b8e80941Smrg 191b8e80941Smrg foreach_ssa_src(src, instr) 192b8e80941Smrg instr_find_neighbors(src); 193b8e80941Smrg} 194b8e80941Smrg 195b8e80941Smrg/* a bit of sadness.. we can't have "holes" in inputs from PoV of 196b8e80941Smrg * register assignment, they still need to be grouped together. So 197b8e80941Smrg * we need to insert dummy/padding instruction for grouping, and 198b8e80941Smrg * then take it back out again before anyone notices. 199b8e80941Smrg */ 200b8e80941Smrgstatic void 201b8e80941Smrgpad_and_group_input(struct ir3_instruction **input, unsigned n) 202b8e80941Smrg{ 203b8e80941Smrg int i, mask = 0; 204b8e80941Smrg struct ir3_block *block = NULL; 205b8e80941Smrg 206b8e80941Smrg for (i = n - 1; i >= 0; i--) { 207b8e80941Smrg struct ir3_instruction *instr = input[i]; 208b8e80941Smrg if (instr) { 209b8e80941Smrg block = instr->block; 210b8e80941Smrg } else if (block) { 211b8e80941Smrg instr = ir3_NOP(block); 212b8e80941Smrg ir3_reg_create(instr, 0, IR3_REG_SSA); /* dummy dst */ 213b8e80941Smrg input[i] = instr; 214b8e80941Smrg mask |= (1 << i); 215b8e80941Smrg } 216b8e80941Smrg } 217b8e80941Smrg 218b8e80941Smrg group_n(&arr_ops_in, input, n); 219b8e80941Smrg 220b8e80941Smrg for (i = 0; i < n; i++) { 221b8e80941Smrg if (mask & (1 << i)) 222b8e80941Smrg input[i] = NULL; 223b8e80941Smrg } 224b8e80941Smrg} 225b8e80941Smrg 226b8e80941Smrgstatic void 227b8e80941Smrgfind_neighbors(struct ir3 *ir) 228b8e80941Smrg{ 229b8e80941Smrg unsigned i; 230b8e80941Smrg 231b8e80941Smrg /* shader inputs/outputs themselves must be contiguous as well: 232b8e80941Smrg * 233b8e80941Smrg * NOTE: group inputs first, since we only insert mov's 234b8e80941Smrg * *before* the conflicted instr (and that would go badly 235b8e80941Smrg * for inputs). By doing inputs first, we should never 236b8e80941Smrg * have a conflict on inputs.. pushing any conflict to 237b8e80941Smrg * resolve to the outputs, for stuff like: 238b8e80941Smrg * 239b8e80941Smrg * MOV OUT[n], IN[m].wzyx 240b8e80941Smrg * 241b8e80941Smrg * NOTE: we assume here inputs/outputs are grouped in vec4. 242b8e80941Smrg * This logic won't quite cut it if we don't align smaller 243b8e80941Smrg * on vec4 boundaries 244b8e80941Smrg */ 245b8e80941Smrg for (i = 0; i < ir->ninputs; i += 4) 246b8e80941Smrg pad_and_group_input(&ir->inputs[i], 4); 247b8e80941Smrg for (i = 0; i < ir->noutputs; i += 4) 248b8e80941Smrg group_n(&arr_ops_out, &ir->outputs[i], 4); 249b8e80941Smrg 250b8e80941Smrg for (i = 0; i < ir->noutputs; i++) { 251b8e80941Smrg if (ir->outputs[i]) { 252b8e80941Smrg struct ir3_instruction *instr = ir->outputs[i]; 253b8e80941Smrg instr_find_neighbors(instr); 254b8e80941Smrg } 255b8e80941Smrg } 256b8e80941Smrg 257b8e80941Smrg list_for_each_entry (struct ir3_block, block, &ir->block_list, node) { 258b8e80941Smrg for (i = 0; i < block->keeps_count; i++) { 259b8e80941Smrg struct ir3_instruction *instr = block->keeps[i]; 260b8e80941Smrg instr_find_neighbors(instr); 261b8e80941Smrg } 262b8e80941Smrg 263b8e80941Smrg /* We also need to account for if-condition: */ 264b8e80941Smrg if (block->condition) 265b8e80941Smrg instr_find_neighbors(block->condition); 266b8e80941Smrg } 267b8e80941Smrg} 268b8e80941Smrg 269b8e80941Smrgvoid 270b8e80941Smrgir3_group(struct ir3 *ir) 271b8e80941Smrg{ 272b8e80941Smrg ir3_clear_mark(ir); 273b8e80941Smrg find_neighbors(ir); 274b8e80941Smrg} 275