1/* 2 * Copyright © 2016 Broadcom 3 * 4 * Permission is hereby granted, free of charge, to any person obtaining a 5 * copy of this software and associated documentation files (the "Software"), 6 * to deal in the Software without restriction, including without limitation 7 * the rights to use, copy, modify, merge, publish, distribute, sublicense, 8 * and/or sell copies of the Software, and to permit persons to whom the 9 * Software is furnished to do so, subject to the following conditions: 10 * 11 * The above copyright notice and this permission notice (including the next 12 * paragraph) shall be included in all copies or substantial portions of the 13 * Software. 14 * 15 * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR 16 * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, 17 * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL 18 * THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER 19 * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING 20 * FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS 21 * IN THE SOFTWARE. 22 */ 23 24#include "compiler/v3d_compiler.h" 25#include "qpu/qpu_instr.h" 26#include "qpu/qpu_disasm.h" 27 28static inline struct qpu_reg 29qpu_reg(int index) 30{ 31 struct qpu_reg reg = { 32 .magic = false, 33 .index = index, 34 }; 35 return reg; 36} 37 38static inline struct qpu_reg 39qpu_magic(enum v3d_qpu_waddr waddr) 40{ 41 struct qpu_reg reg = { 42 .magic = true, 43 .index = waddr, 44 }; 45 return reg; 46} 47 48static inline struct qpu_reg 49qpu_acc(int acc) 50{ 51 return qpu_magic(V3D_QPU_WADDR_R0 + acc); 52} 53 54struct v3d_qpu_instr 55v3d_qpu_nop(void) 56{ 57 struct v3d_qpu_instr instr = { 58 .type = V3D_QPU_INSTR_TYPE_ALU, 59 .alu = { 60 .add = { 61 .op = V3D_QPU_A_NOP, 62 .waddr = V3D_QPU_WADDR_NOP, 63 .magic_write = true, 64 }, 65 .mul = { 66 .op = V3D_QPU_M_NOP, 67 .waddr = V3D_QPU_WADDR_NOP, 68 .magic_write = true, 69 }, 70 } 71 }; 72 73 return instr; 74} 75 76static struct qinst * 77vir_nop(void) 78{ 79 struct qreg undef = vir_nop_reg(); 80 struct qinst *qinst = vir_add_inst(V3D_QPU_A_NOP, undef, undef, undef); 81 82 return qinst; 83} 84 85static struct qinst * 86new_qpu_nop_before(struct qinst *inst) 87{ 88 struct qinst *q = vir_nop(); 89 90 list_addtail(&q->link, &inst->link); 91 92 return q; 93} 94 95/** 96 * Allocates the src register (accumulator or register file) into the RADDR 97 * fields of the instruction. 98 */ 99static void 100set_src(struct v3d_qpu_instr *instr, enum v3d_qpu_mux *mux, struct qpu_reg src) 101{ 102 if (src.smimm) { 103 assert(instr->sig.small_imm); 104 *mux = V3D_QPU_MUX_B; 105 return; 106 } 107 108 if (src.magic) { 109 assert(src.index >= V3D_QPU_WADDR_R0 && 110 src.index <= V3D_QPU_WADDR_R5); 111 *mux = src.index - V3D_QPU_WADDR_R0 + V3D_QPU_MUX_R0; 112 return; 113 } 114 115 if (instr->alu.add.a != V3D_QPU_MUX_A && 116 instr->alu.add.b != V3D_QPU_MUX_A && 117 instr->alu.mul.a != V3D_QPU_MUX_A && 118 instr->alu.mul.b != V3D_QPU_MUX_A) { 119 instr->raddr_a = src.index; 120 *mux = V3D_QPU_MUX_A; 121 } else { 122 if (instr->raddr_a == src.index) { 123 *mux = V3D_QPU_MUX_A; 124 } else { 125 assert(!(instr->alu.add.a == V3D_QPU_MUX_B && 126 instr->alu.add.b == V3D_QPU_MUX_B && 127 instr->alu.mul.a == V3D_QPU_MUX_B && 128 instr->alu.mul.b == V3D_QPU_MUX_B) || 129 src.index == instr->raddr_b); 130 131 instr->raddr_b = src.index; 132 *mux = V3D_QPU_MUX_B; 133 } 134 } 135} 136 137static bool 138is_no_op_mov(struct qinst *qinst) 139{ 140 static const struct v3d_qpu_sig no_sig = {0}; 141 142 /* Make sure it's just a lone MOV. */ 143 if (qinst->qpu.type != V3D_QPU_INSTR_TYPE_ALU || 144 qinst->qpu.alu.mul.op != V3D_QPU_M_MOV || 145 qinst->qpu.alu.add.op != V3D_QPU_A_NOP || 146 memcmp(&qinst->qpu.sig, &no_sig, sizeof(no_sig)) != 0) { 147 return false; 148 } 149 150 /* Check if it's a MOV from a register to itself. */ 151 enum v3d_qpu_waddr waddr = qinst->qpu.alu.mul.waddr; 152 if (qinst->qpu.alu.mul.magic_write) { 153 if (waddr < V3D_QPU_WADDR_R0 || waddr > V3D_QPU_WADDR_R4) 154 return false; 155 156 if (qinst->qpu.alu.mul.a != 157 V3D_QPU_MUX_R0 + (waddr - V3D_QPU_WADDR_R0)) { 158 return false; 159 } 160 } else { 161 int raddr; 162 163 switch (qinst->qpu.alu.mul.a) { 164 case V3D_QPU_MUX_A: 165 raddr = qinst->qpu.raddr_a; 166 break; 167 case V3D_QPU_MUX_B: 168 raddr = qinst->qpu.raddr_b; 169 break; 170 default: 171 return false; 172 } 173 if (raddr != waddr) 174 return false; 175 } 176 177 /* No packing or flags updates, or we need to execute the 178 * instruction. 179 */ 180 if (qinst->qpu.alu.mul.a_unpack != V3D_QPU_UNPACK_NONE || 181 qinst->qpu.alu.mul.output_pack != V3D_QPU_PACK_NONE || 182 qinst->qpu.flags.mc != V3D_QPU_COND_NONE || 183 qinst->qpu.flags.mpf != V3D_QPU_PF_NONE || 184 qinst->qpu.flags.muf != V3D_QPU_UF_NONE) { 185 return false; 186 } 187 188 return true; 189} 190 191static void 192v3d_generate_code_block(struct v3d_compile *c, 193 struct qblock *block, 194 struct qpu_reg *temp_registers) 195{ 196 int last_vpm_read_index = -1; 197 198 vir_for_each_inst_safe(qinst, block) { 199#if 0 200 fprintf(stderr, "translating qinst to qpu: "); 201 vir_dump_inst(c, qinst); 202 fprintf(stderr, "\n"); 203#endif 204 205 struct qinst *temp; 206 207 if (vir_has_uniform(qinst)) 208 c->num_uniforms++; 209 210 int nsrc = vir_get_nsrc(qinst); 211 struct qpu_reg src[ARRAY_SIZE(qinst->src)]; 212 for (int i = 0; i < nsrc; i++) { 213 int index = qinst->src[i].index; 214 switch (qinst->src[i].file) { 215 case QFILE_REG: 216 src[i] = qpu_reg(qinst->src[i].index); 217 break; 218 case QFILE_MAGIC: 219 src[i] = qpu_magic(qinst->src[i].index); 220 break; 221 case QFILE_NULL: 222 case QFILE_LOAD_IMM: 223 src[i] = qpu_acc(0); 224 break; 225 case QFILE_TEMP: 226 src[i] = temp_registers[index]; 227 break; 228 case QFILE_SMALL_IMM: 229 src[i].smimm = true; 230 break; 231 232 case QFILE_VPM: 233 assert((int)qinst->src[i].index >= 234 last_vpm_read_index); 235 (void)last_vpm_read_index; 236 last_vpm_read_index = qinst->src[i].index; 237 238 temp = new_qpu_nop_before(qinst); 239 temp->qpu.sig.ldvpm = true; 240 241 src[i] = qpu_acc(3); 242 break; 243 } 244 } 245 246 struct qpu_reg dst; 247 switch (qinst->dst.file) { 248 case QFILE_NULL: 249 dst = qpu_magic(V3D_QPU_WADDR_NOP); 250 break; 251 252 case QFILE_REG: 253 dst = qpu_reg(qinst->dst.index); 254 break; 255 256 case QFILE_MAGIC: 257 dst = qpu_magic(qinst->dst.index); 258 break; 259 260 case QFILE_TEMP: 261 dst = temp_registers[qinst->dst.index]; 262 break; 263 264 case QFILE_VPM: 265 dst = qpu_magic(V3D_QPU_WADDR_VPM); 266 break; 267 268 case QFILE_SMALL_IMM: 269 case QFILE_LOAD_IMM: 270 assert(!"not reached"); 271 break; 272 } 273 274 if (qinst->qpu.type == V3D_QPU_INSTR_TYPE_ALU) { 275 if (qinst->qpu.sig.ldunif) { 276 assert(qinst->qpu.alu.add.op == V3D_QPU_A_NOP); 277 assert(qinst->qpu.alu.mul.op == V3D_QPU_M_NOP); 278 279 if (!dst.magic || 280 dst.index != V3D_QPU_WADDR_R5) { 281 assert(c->devinfo->ver >= 40); 282 283 qinst->qpu.sig.ldunif = false; 284 qinst->qpu.sig.ldunifrf = true; 285 qinst->qpu.sig_addr = dst.index; 286 qinst->qpu.sig_magic = dst.magic; 287 } 288 } else if (v3d_qpu_sig_writes_address(c->devinfo, 289 &qinst->qpu.sig)) { 290 assert(qinst->qpu.alu.add.op == V3D_QPU_A_NOP); 291 assert(qinst->qpu.alu.mul.op == V3D_QPU_M_NOP); 292 293 qinst->qpu.sig_addr = dst.index; 294 qinst->qpu.sig_magic = dst.magic; 295 } else if (qinst->qpu.alu.add.op != V3D_QPU_A_NOP) { 296 assert(qinst->qpu.alu.mul.op == V3D_QPU_M_NOP); 297 if (nsrc >= 1) { 298 set_src(&qinst->qpu, 299 &qinst->qpu.alu.add.a, src[0]); 300 } 301 if (nsrc >= 2) { 302 set_src(&qinst->qpu, 303 &qinst->qpu.alu.add.b, src[1]); 304 } 305 306 qinst->qpu.alu.add.waddr = dst.index; 307 qinst->qpu.alu.add.magic_write = dst.magic; 308 } else { 309 if (nsrc >= 1) { 310 set_src(&qinst->qpu, 311 &qinst->qpu.alu.mul.a, src[0]); 312 } 313 if (nsrc >= 2) { 314 set_src(&qinst->qpu, 315 &qinst->qpu.alu.mul.b, src[1]); 316 } 317 318 qinst->qpu.alu.mul.waddr = dst.index; 319 qinst->qpu.alu.mul.magic_write = dst.magic; 320 321 if (is_no_op_mov(qinst)) { 322 vir_remove_instruction(c, qinst); 323 continue; 324 } 325 } 326 } else { 327 assert(qinst->qpu.type == V3D_QPU_INSTR_TYPE_BRANCH); 328 } 329 } 330} 331 332static bool 333reads_uniform(const struct v3d_device_info *devinfo, uint64_t instruction) 334{ 335 struct v3d_qpu_instr qpu; 336 MAYBE_UNUSED bool ok = v3d_qpu_instr_unpack(devinfo, instruction, &qpu); 337 assert(ok); 338 339 if (qpu.sig.ldunif || 340 qpu.sig.ldunifrf || 341 qpu.sig.wrtmuc) { 342 return true; 343 } 344 345 if (qpu.type == V3D_QPU_INSTR_TYPE_BRANCH) 346 return true; 347 348 if (qpu.type == V3D_QPU_INSTR_TYPE_ALU) { 349 if (qpu.alu.add.magic_write && 350 v3d_qpu_magic_waddr_loads_unif(qpu.alu.add.waddr)) { 351 return true; 352 } 353 354 if (qpu.alu.mul.magic_write && 355 v3d_qpu_magic_waddr_loads_unif(qpu.alu.mul.waddr)) { 356 return true; 357 } 358 } 359 360 return false; 361} 362 363static void 364v3d_dump_qpu(struct v3d_compile *c) 365{ 366 fprintf(stderr, "%s prog %d/%d QPU:\n", 367 vir_get_stage_name(c), 368 c->program_id, c->variant_id); 369 370 int next_uniform = 0; 371 for (int i = 0; i < c->qpu_inst_count; i++) { 372 const char *str = v3d_qpu_disasm(c->devinfo, c->qpu_insts[i]); 373 fprintf(stderr, "0x%016"PRIx64" %s", c->qpu_insts[i], str); 374 375 /* We can only do this on 4.x, because we're not tracking TMU 376 * implicit uniforms here on 3.x. 377 */ 378 if (c->devinfo->ver >= 40 && 379 reads_uniform(c->devinfo, c->qpu_insts[i])) { 380 fprintf(stderr, " ("); 381 vir_dump_uniform(c->uniform_contents[next_uniform], 382 c->uniform_data[next_uniform]); 383 fprintf(stderr, ")"); 384 next_uniform++; 385 } 386 fprintf(stderr, "\n"); 387 ralloc_free((void *)str); 388 } 389 390 /* Make sure our dumping lined up. */ 391 if (c->devinfo->ver >= 40) 392 assert(next_uniform == c->num_uniforms); 393 394 fprintf(stderr, "\n"); 395} 396 397void 398v3d_vir_to_qpu(struct v3d_compile *c, struct qpu_reg *temp_registers) 399{ 400 /* Reset the uniform count to how many will be actually loaded by the 401 * generated QPU code. 402 */ 403 c->num_uniforms = 0; 404 405 vir_for_each_block(block, c) 406 v3d_generate_code_block(c, block, temp_registers); 407 408 v3d_qpu_schedule_instructions(c); 409 410 c->qpu_insts = rzalloc_array(c, uint64_t, c->qpu_inst_count); 411 int i = 0; 412 vir_for_each_inst_inorder(inst, c) { 413 bool ok = v3d_qpu_instr_pack(c->devinfo, &inst->qpu, 414 &c->qpu_insts[i++]); 415 if (!ok) { 416 fprintf(stderr, "Failed to pack instruction:\n"); 417 vir_dump_inst(c, inst); 418 fprintf(stderr, "\n"); 419 c->failed = true; 420 return; 421 } 422 } 423 assert(i == c->qpu_inst_count); 424 425 if (V3D_DEBUG & (V3D_DEBUG_QPU | 426 v3d_debug_flag_for_shader_stage(c->s->info.stage))) { 427 v3d_dump_qpu(c); 428 } 429 430 qpu_validate(c); 431 432 free(temp_registers); 433} 434