1/* 2 * Copyright (C) 2018 Jonathan Marek <jonathan@marek.ca> 3 * 4 * Permission is hereby granted, free of charge, to any person obtaining a 5 * copy of this software and associated documentation files (the "Software"), 6 * to deal in the Software without restriction, including without limitation 7 * the rights to use, copy, modify, merge, publish, distribute, sublicense, 8 * and/or sell copies of the Software, and to permit persons to whom the 9 * Software is furnished to do so, subject to the following conditions: 10 * 11 * The above copyright notice and this permission notice (including the next 12 * paragraph) shall be included in all copies or substantial portions of the 13 * Software. 14 * 15 * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR 16 * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, 17 * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL 18 * THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER 19 * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, 20 * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE 21 * SOFTWARE. 22 * 23 * Authors: 24 * Jonathan Marek <jonathan@marek.ca> 25 */ 26 27#include "ir2_private.h" 28 29static bool is_mov(struct ir2_instr *instr) 30{ 31 return instr->type == IR2_ALU && instr->alu.vector_opc == MAXv && 32 instr->src_count == 1; 33} 34 35static void src_combine(struct ir2_src *src, struct ir2_src b) 36{ 37 src->num = b.num; 38 src->type = b.type; 39 src->swizzle = swiz_merge(b.swizzle, src->swizzle); 40 if (!src->abs) /* if we have abs we don't care about previous negate */ 41 src->negate ^= b.negate; 42 src->abs |= b.abs; 43} 44 45/* cp_src: replace src regs when they refer to a mov instruction 46 * example: 47 * ALU: MAXv R7 = C7, C7 48 * ALU: MULADDv R7 = R7, R10, R0.xxxx 49 * becomes: 50 * ALU: MULADDv R7 = C7, R10, R0.xxxx 51 */ 52void cp_src(struct ir2_context *ctx) 53{ 54 struct ir2_instr *p; 55 56 ir2_foreach_instr(instr, ctx) { 57 ir2_foreach_src(src, instr) { 58 /* loop to replace recursively */ 59 do { 60 if (src->type != IR2_SRC_SSA) 61 break; 62 63 p = &ctx->instr[src->num]; 64 /* don't work across blocks to avoid possible issues */ 65 if (p->block_idx != instr->block_idx) 66 break; 67 68 if (!is_mov(p)) 69 break; 70 71 /* cant apply abs to const src, const src only for alu */ 72 if (p->src[0].type == IR2_SRC_CONST && 73 (src->abs || instr->type != IR2_ALU)) 74 break; 75 76 src_combine(src, p->src[0]); 77 } while (1); 78 } 79 } 80} 81 82/* cp_export: replace mov to export when possible 83 * in the cp_src pass we bypass any mov instructions related 84 * to the src registers, but for exports for need something different 85 * example: 86 * ALU: MAXv R3.x___ = C9.x???, C9.x??? 87 * ALU: MAXv R3._y__ = R0.?x??, C8.?x?? 88 * ALU: MAXv export0 = R3.yyyx, R3.yyyx 89 * becomes: 90 * ALU: MAXv export0.___w = C9.???x, C9.???x 91 * ALU: MAXv export0.xyz_ = R0.xxx?, C8.xxx? 92 * 93 */ 94void cp_export(struct ir2_context *ctx) 95{ 96 struct ir2_instr *c[4], *ins[4]; 97 struct ir2_src *src; 98 struct ir2_reg *reg; 99 unsigned ncomp; 100 101 ir2_foreach_instr(instr, ctx) { 102 if (!is_export(instr)) /* TODO */ 103 continue; 104 105 if (!is_mov(instr)) 106 continue; 107 108 src = &instr->src[0]; 109 110 if (src->negate || src->abs) /* TODO handle these cases */ 111 continue; 112 113 if (src->type == IR2_SRC_INPUT || src->type == IR2_SRC_CONST) 114 continue; 115 116 reg = get_reg_src(ctx, src); 117 ncomp = dst_ncomp(instr); 118 119 unsigned reswiz[4] = {}; 120 unsigned num_instr = 0; 121 122 /* fill array c with pointers to instrs that write each component */ 123 if (src->type == IR2_SRC_SSA) { 124 struct ir2_instr *instr = &ctx->instr[src->num]; 125 126 if (instr->type != IR2_ALU) 127 continue; 128 129 for (int i = 0; i < ncomp; i++) 130 c[i] = instr; 131 132 ins[num_instr++] = instr; 133 reswiz[0] = src->swizzle; 134 } else { 135 bool ok = true; 136 unsigned write_mask = 0; 137 138 ir2_foreach_instr(instr, ctx) { 139 if (instr->is_ssa || instr->reg != reg) 140 continue; 141 142 /* set by non-ALU */ 143 if (instr->type != IR2_ALU) { 144 ok = false; 145 break; 146 } 147 148 /* component written more than once */ 149 if (write_mask & instr->alu.write_mask) { 150 ok = false; 151 break; 152 } 153 154 write_mask |= instr->alu.write_mask; 155 156 /* src pointers for components */ 157 for (int i = 0, j = 0; i < 4; i++) { 158 unsigned k = swiz_get(src->swizzle, i); 159 if (instr->alu.write_mask & 1 << k) { 160 c[i] = instr; 161 162 /* reswiz = compressed src->swizzle */ 163 unsigned x = 0; 164 for (int i = 0; i < k; i++) 165 x += !!(instr->alu.write_mask & 1 << i); 166 167 assert(src->swizzle || x == j); 168 reswiz[num_instr] |= swiz_set(x, j++); 169 } 170 } 171 ins[num_instr++] = instr; 172 } 173 if (!ok) 174 continue; 175 } 176 177 bool redirect = true; 178 179 /* must all be in same block */ 180 for (int i = 0; i < ncomp; i++) 181 redirect &= (c[i]->block_idx == instr->block_idx); 182 183 /* no other instr using the value */ 184 ir2_foreach_instr(p, ctx) { 185 if (p == instr) 186 continue; 187 ir2_foreach_src(src, p) 188 redirect &= reg != get_reg_src(ctx, src); 189 } 190 191 if (!redirect) 192 continue; 193 194 /* redirect the instructions writing to the register */ 195 for (int i = 0; i < num_instr; i++) { 196 struct ir2_instr *p = ins[i]; 197 198 p->alu.export = instr->alu.export; 199 p->alu.write_mask = 0; 200 p->is_ssa = true; 201 p->ssa.ncomp = 0; 202 memset(p->ssa.comp, 0, sizeof(p->ssa.comp)); 203 204 switch (instr->alu.vector_opc) { 205 case PRED_SETE_PUSHv ... PRED_SETGTE_PUSHv: 206 case DOT2ADDv: 207 case DOT3v: 208 case DOT4v: 209 case CUBEv: 210 continue; 211 default: 212 break; 213 } 214 ir2_foreach_src(s, p) 215 swiz_merge_p(&s->swizzle, reswiz[i]); 216 } 217 218 for (int i = 0; i < ncomp; i++) { 219 c[i]->alu.write_mask |= (1 << i); 220 c[i]->ssa.ncomp++; 221 } 222 instr->type = IR2_NONE; 223 instr->need_emit = false; 224 } 225} 226