1b8e80941Smrg/* 2b8e80941Smrg * Copyright © 2016 Broadcom 3b8e80941Smrg * 4b8e80941Smrg * Permission is hereby granted, free of charge, to any person obtaining a 5b8e80941Smrg * copy of this software and associated documentation files (the "Software"), 6b8e80941Smrg * to deal in the Software without restriction, including without limitation 7b8e80941Smrg * the rights to use, copy, modify, merge, publish, distribute, sublicense, 8b8e80941Smrg * and/or sell copies of the Software, and to permit persons to whom the 9b8e80941Smrg * Software is furnished to do so, subject to the following conditions: 10b8e80941Smrg * 11b8e80941Smrg * The above copyright notice and this permission notice (including the next 12b8e80941Smrg * paragraph) shall be included in all copies or substantial portions of the 13b8e80941Smrg * Software. 14b8e80941Smrg * 15b8e80941Smrg * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR 16b8e80941Smrg * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, 17b8e80941Smrg * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL 18b8e80941Smrg * THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER 19b8e80941Smrg * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING 20b8e80941Smrg * FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS 21b8e80941Smrg * IN THE SOFTWARE. 22b8e80941Smrg */ 23b8e80941Smrg 24b8e80941Smrg#include "compiler/v3d_compiler.h" 25b8e80941Smrg#include "qpu/qpu_instr.h" 26b8e80941Smrg#include "qpu/qpu_disasm.h" 27b8e80941Smrg 28b8e80941Smrgstatic inline struct qpu_reg 29b8e80941Smrgqpu_reg(int index) 30b8e80941Smrg{ 31b8e80941Smrg struct qpu_reg reg = { 32b8e80941Smrg .magic = false, 33b8e80941Smrg .index = index, 34b8e80941Smrg }; 35b8e80941Smrg return reg; 36b8e80941Smrg} 37b8e80941Smrg 38b8e80941Smrgstatic inline struct qpu_reg 39b8e80941Smrgqpu_magic(enum v3d_qpu_waddr waddr) 40b8e80941Smrg{ 41b8e80941Smrg struct qpu_reg reg = { 42b8e80941Smrg .magic = true, 43b8e80941Smrg .index = waddr, 44b8e80941Smrg }; 45b8e80941Smrg return reg; 46b8e80941Smrg} 47b8e80941Smrg 48b8e80941Smrgstatic inline struct qpu_reg 49b8e80941Smrgqpu_acc(int acc) 50b8e80941Smrg{ 51b8e80941Smrg return qpu_magic(V3D_QPU_WADDR_R0 + acc); 52b8e80941Smrg} 53b8e80941Smrg 54b8e80941Smrgstruct v3d_qpu_instr 55b8e80941Smrgv3d_qpu_nop(void) 56b8e80941Smrg{ 57b8e80941Smrg struct v3d_qpu_instr instr = { 58b8e80941Smrg .type = V3D_QPU_INSTR_TYPE_ALU, 59b8e80941Smrg .alu = { 60b8e80941Smrg .add = { 61b8e80941Smrg .op = V3D_QPU_A_NOP, 62b8e80941Smrg .waddr = V3D_QPU_WADDR_NOP, 63b8e80941Smrg .magic_write = true, 64b8e80941Smrg }, 65b8e80941Smrg .mul = { 66b8e80941Smrg .op = V3D_QPU_M_NOP, 67b8e80941Smrg .waddr = V3D_QPU_WADDR_NOP, 68b8e80941Smrg .magic_write = true, 69b8e80941Smrg }, 70b8e80941Smrg } 71b8e80941Smrg }; 72b8e80941Smrg 73b8e80941Smrg return instr; 74b8e80941Smrg} 75b8e80941Smrg 76b8e80941Smrgstatic struct qinst * 77b8e80941Smrgvir_nop(void) 78b8e80941Smrg{ 79b8e80941Smrg struct qreg undef = vir_nop_reg(); 80b8e80941Smrg struct qinst *qinst = vir_add_inst(V3D_QPU_A_NOP, undef, undef, undef); 81b8e80941Smrg 82b8e80941Smrg return qinst; 83b8e80941Smrg} 84b8e80941Smrg 85b8e80941Smrgstatic struct qinst * 86b8e80941Smrgnew_qpu_nop_before(struct qinst *inst) 87b8e80941Smrg{ 88b8e80941Smrg struct qinst *q = vir_nop(); 89b8e80941Smrg 90b8e80941Smrg list_addtail(&q->link, &inst->link); 91b8e80941Smrg 92b8e80941Smrg return q; 93b8e80941Smrg} 94b8e80941Smrg 95b8e80941Smrg/** 96b8e80941Smrg * Allocates the src register (accumulator or register file) into the RADDR 97b8e80941Smrg * fields of the instruction. 98b8e80941Smrg */ 99b8e80941Smrgstatic void 100b8e80941Smrgset_src(struct v3d_qpu_instr *instr, enum v3d_qpu_mux *mux, struct qpu_reg src) 101b8e80941Smrg{ 102b8e80941Smrg if (src.smimm) { 103b8e80941Smrg assert(instr->sig.small_imm); 104b8e80941Smrg *mux = V3D_QPU_MUX_B; 105b8e80941Smrg return; 106b8e80941Smrg } 107b8e80941Smrg 108b8e80941Smrg if (src.magic) { 109b8e80941Smrg assert(src.index >= V3D_QPU_WADDR_R0 && 110b8e80941Smrg src.index <= V3D_QPU_WADDR_R5); 111b8e80941Smrg *mux = src.index - V3D_QPU_WADDR_R0 + V3D_QPU_MUX_R0; 112b8e80941Smrg return; 113b8e80941Smrg } 114b8e80941Smrg 115b8e80941Smrg if (instr->alu.add.a != V3D_QPU_MUX_A && 116b8e80941Smrg instr->alu.add.b != V3D_QPU_MUX_A && 117b8e80941Smrg instr->alu.mul.a != V3D_QPU_MUX_A && 118b8e80941Smrg instr->alu.mul.b != V3D_QPU_MUX_A) { 119b8e80941Smrg instr->raddr_a = src.index; 120b8e80941Smrg *mux = V3D_QPU_MUX_A; 121b8e80941Smrg } else { 122b8e80941Smrg if (instr->raddr_a == src.index) { 123b8e80941Smrg *mux = V3D_QPU_MUX_A; 124b8e80941Smrg } else { 125b8e80941Smrg assert(!(instr->alu.add.a == V3D_QPU_MUX_B && 126b8e80941Smrg instr->alu.add.b == V3D_QPU_MUX_B && 127b8e80941Smrg instr->alu.mul.a == V3D_QPU_MUX_B && 128b8e80941Smrg instr->alu.mul.b == V3D_QPU_MUX_B) || 129b8e80941Smrg src.index == instr->raddr_b); 130b8e80941Smrg 131b8e80941Smrg instr->raddr_b = src.index; 132b8e80941Smrg *mux = V3D_QPU_MUX_B; 133b8e80941Smrg } 134b8e80941Smrg } 135b8e80941Smrg} 136b8e80941Smrg 137b8e80941Smrgstatic bool 138b8e80941Smrgis_no_op_mov(struct qinst *qinst) 139b8e80941Smrg{ 140b8e80941Smrg static const struct v3d_qpu_sig no_sig = {0}; 141b8e80941Smrg 142b8e80941Smrg /* Make sure it's just a lone MOV. */ 143b8e80941Smrg if (qinst->qpu.type != V3D_QPU_INSTR_TYPE_ALU || 144b8e80941Smrg qinst->qpu.alu.mul.op != V3D_QPU_M_MOV || 145b8e80941Smrg qinst->qpu.alu.add.op != V3D_QPU_A_NOP || 146b8e80941Smrg memcmp(&qinst->qpu.sig, &no_sig, sizeof(no_sig)) != 0) { 147b8e80941Smrg return false; 148b8e80941Smrg } 149b8e80941Smrg 150b8e80941Smrg /* Check if it's a MOV from a register to itself. */ 151b8e80941Smrg enum v3d_qpu_waddr waddr = qinst->qpu.alu.mul.waddr; 152b8e80941Smrg if (qinst->qpu.alu.mul.magic_write) { 153b8e80941Smrg if (waddr < V3D_QPU_WADDR_R0 || waddr > V3D_QPU_WADDR_R4) 154b8e80941Smrg return false; 155b8e80941Smrg 156b8e80941Smrg if (qinst->qpu.alu.mul.a != 157b8e80941Smrg V3D_QPU_MUX_R0 + (waddr - V3D_QPU_WADDR_R0)) { 158b8e80941Smrg return false; 159b8e80941Smrg } 160b8e80941Smrg } else { 161b8e80941Smrg int raddr; 162b8e80941Smrg 163b8e80941Smrg switch (qinst->qpu.alu.mul.a) { 164b8e80941Smrg case V3D_QPU_MUX_A: 165b8e80941Smrg raddr = qinst->qpu.raddr_a; 166b8e80941Smrg break; 167b8e80941Smrg case V3D_QPU_MUX_B: 168b8e80941Smrg raddr = qinst->qpu.raddr_b; 169b8e80941Smrg break; 170b8e80941Smrg default: 171b8e80941Smrg return false; 172b8e80941Smrg } 173b8e80941Smrg if (raddr != waddr) 174b8e80941Smrg return false; 175b8e80941Smrg } 176b8e80941Smrg 177b8e80941Smrg /* No packing or flags updates, or we need to execute the 178b8e80941Smrg * instruction. 179b8e80941Smrg */ 180b8e80941Smrg if (qinst->qpu.alu.mul.a_unpack != V3D_QPU_UNPACK_NONE || 181b8e80941Smrg qinst->qpu.alu.mul.output_pack != V3D_QPU_PACK_NONE || 182b8e80941Smrg qinst->qpu.flags.mc != V3D_QPU_COND_NONE || 183b8e80941Smrg qinst->qpu.flags.mpf != V3D_QPU_PF_NONE || 184b8e80941Smrg qinst->qpu.flags.muf != V3D_QPU_UF_NONE) { 185b8e80941Smrg return false; 186b8e80941Smrg } 187b8e80941Smrg 188b8e80941Smrg return true; 189b8e80941Smrg} 190b8e80941Smrg 191b8e80941Smrgstatic void 192b8e80941Smrgv3d_generate_code_block(struct v3d_compile *c, 193b8e80941Smrg struct qblock *block, 194b8e80941Smrg struct qpu_reg *temp_registers) 195b8e80941Smrg{ 196b8e80941Smrg int last_vpm_read_index = -1; 197b8e80941Smrg 198b8e80941Smrg vir_for_each_inst_safe(qinst, block) { 199b8e80941Smrg#if 0 200b8e80941Smrg fprintf(stderr, "translating qinst to qpu: "); 201b8e80941Smrg vir_dump_inst(c, qinst); 202b8e80941Smrg fprintf(stderr, "\n"); 203b8e80941Smrg#endif 204b8e80941Smrg 205b8e80941Smrg struct qinst *temp; 206b8e80941Smrg 207b8e80941Smrg if (vir_has_uniform(qinst)) 208b8e80941Smrg c->num_uniforms++; 209b8e80941Smrg 210b8e80941Smrg int nsrc = vir_get_nsrc(qinst); 211b8e80941Smrg struct qpu_reg src[ARRAY_SIZE(qinst->src)]; 212b8e80941Smrg for (int i = 0; i < nsrc; i++) { 213b8e80941Smrg int index = qinst->src[i].index; 214b8e80941Smrg switch (qinst->src[i].file) { 215b8e80941Smrg case QFILE_REG: 216b8e80941Smrg src[i] = qpu_reg(qinst->src[i].index); 217b8e80941Smrg break; 218b8e80941Smrg case QFILE_MAGIC: 219b8e80941Smrg src[i] = qpu_magic(qinst->src[i].index); 220b8e80941Smrg break; 221b8e80941Smrg case QFILE_NULL: 222b8e80941Smrg case QFILE_LOAD_IMM: 223b8e80941Smrg src[i] = qpu_acc(0); 224b8e80941Smrg break; 225b8e80941Smrg case QFILE_TEMP: 226b8e80941Smrg src[i] = temp_registers[index]; 227b8e80941Smrg break; 228b8e80941Smrg case QFILE_SMALL_IMM: 229b8e80941Smrg src[i].smimm = true; 230b8e80941Smrg break; 231b8e80941Smrg 232b8e80941Smrg case QFILE_VPM: 233b8e80941Smrg assert((int)qinst->src[i].index >= 234b8e80941Smrg last_vpm_read_index); 235b8e80941Smrg (void)last_vpm_read_index; 236b8e80941Smrg last_vpm_read_index = qinst->src[i].index; 237b8e80941Smrg 238b8e80941Smrg temp = new_qpu_nop_before(qinst); 239b8e80941Smrg temp->qpu.sig.ldvpm = true; 240b8e80941Smrg 241b8e80941Smrg src[i] = qpu_acc(3); 242b8e80941Smrg break; 243b8e80941Smrg } 244b8e80941Smrg } 245b8e80941Smrg 246b8e80941Smrg struct qpu_reg dst; 247b8e80941Smrg switch (qinst->dst.file) { 248b8e80941Smrg case QFILE_NULL: 249b8e80941Smrg dst = qpu_magic(V3D_QPU_WADDR_NOP); 250b8e80941Smrg break; 251b8e80941Smrg 252b8e80941Smrg case QFILE_REG: 253b8e80941Smrg dst = qpu_reg(qinst->dst.index); 254b8e80941Smrg break; 255b8e80941Smrg 256b8e80941Smrg case QFILE_MAGIC: 257b8e80941Smrg dst = qpu_magic(qinst->dst.index); 258b8e80941Smrg break; 259b8e80941Smrg 260b8e80941Smrg case QFILE_TEMP: 261b8e80941Smrg dst = temp_registers[qinst->dst.index]; 262b8e80941Smrg break; 263b8e80941Smrg 264b8e80941Smrg case QFILE_VPM: 265b8e80941Smrg dst = qpu_magic(V3D_QPU_WADDR_VPM); 266b8e80941Smrg break; 267b8e80941Smrg 268b8e80941Smrg case QFILE_SMALL_IMM: 269b8e80941Smrg case QFILE_LOAD_IMM: 270b8e80941Smrg assert(!"not reached"); 271b8e80941Smrg break; 272b8e80941Smrg } 273b8e80941Smrg 274b8e80941Smrg if (qinst->qpu.type == V3D_QPU_INSTR_TYPE_ALU) { 275b8e80941Smrg if (qinst->qpu.sig.ldunif) { 276b8e80941Smrg assert(qinst->qpu.alu.add.op == V3D_QPU_A_NOP); 277b8e80941Smrg assert(qinst->qpu.alu.mul.op == V3D_QPU_M_NOP); 278b8e80941Smrg 279b8e80941Smrg if (!dst.magic || 280b8e80941Smrg dst.index != V3D_QPU_WADDR_R5) { 281b8e80941Smrg assert(c->devinfo->ver >= 40); 282b8e80941Smrg 283b8e80941Smrg qinst->qpu.sig.ldunif = false; 284b8e80941Smrg qinst->qpu.sig.ldunifrf = true; 285b8e80941Smrg qinst->qpu.sig_addr = dst.index; 286b8e80941Smrg qinst->qpu.sig_magic = dst.magic; 287b8e80941Smrg } 288b8e80941Smrg } else if (v3d_qpu_sig_writes_address(c->devinfo, 289b8e80941Smrg &qinst->qpu.sig)) { 290b8e80941Smrg assert(qinst->qpu.alu.add.op == V3D_QPU_A_NOP); 291b8e80941Smrg assert(qinst->qpu.alu.mul.op == V3D_QPU_M_NOP); 292b8e80941Smrg 293b8e80941Smrg qinst->qpu.sig_addr = dst.index; 294b8e80941Smrg qinst->qpu.sig_magic = dst.magic; 295b8e80941Smrg } else if (qinst->qpu.alu.add.op != V3D_QPU_A_NOP) { 296b8e80941Smrg assert(qinst->qpu.alu.mul.op == V3D_QPU_M_NOP); 297b8e80941Smrg if (nsrc >= 1) { 298b8e80941Smrg set_src(&qinst->qpu, 299b8e80941Smrg &qinst->qpu.alu.add.a, src[0]); 300b8e80941Smrg } 301b8e80941Smrg if (nsrc >= 2) { 302b8e80941Smrg set_src(&qinst->qpu, 303b8e80941Smrg &qinst->qpu.alu.add.b, src[1]); 304b8e80941Smrg } 305b8e80941Smrg 306b8e80941Smrg qinst->qpu.alu.add.waddr = dst.index; 307b8e80941Smrg qinst->qpu.alu.add.magic_write = dst.magic; 308b8e80941Smrg } else { 309b8e80941Smrg if (nsrc >= 1) { 310b8e80941Smrg set_src(&qinst->qpu, 311b8e80941Smrg &qinst->qpu.alu.mul.a, src[0]); 312b8e80941Smrg } 313b8e80941Smrg if (nsrc >= 2) { 314b8e80941Smrg set_src(&qinst->qpu, 315b8e80941Smrg &qinst->qpu.alu.mul.b, src[1]); 316b8e80941Smrg } 317b8e80941Smrg 318b8e80941Smrg qinst->qpu.alu.mul.waddr = dst.index; 319b8e80941Smrg qinst->qpu.alu.mul.magic_write = dst.magic; 320b8e80941Smrg 321b8e80941Smrg if (is_no_op_mov(qinst)) { 322b8e80941Smrg vir_remove_instruction(c, qinst); 323b8e80941Smrg continue; 324b8e80941Smrg } 325b8e80941Smrg } 326b8e80941Smrg } else { 327b8e80941Smrg assert(qinst->qpu.type == V3D_QPU_INSTR_TYPE_BRANCH); 328b8e80941Smrg } 329b8e80941Smrg } 330b8e80941Smrg} 331b8e80941Smrg 332b8e80941Smrgstatic bool 333b8e80941Smrgreads_uniform(const struct v3d_device_info *devinfo, uint64_t instruction) 334b8e80941Smrg{ 335b8e80941Smrg struct v3d_qpu_instr qpu; 336b8e80941Smrg MAYBE_UNUSED bool ok = v3d_qpu_instr_unpack(devinfo, instruction, &qpu); 337b8e80941Smrg assert(ok); 338b8e80941Smrg 339b8e80941Smrg if (qpu.sig.ldunif || 340b8e80941Smrg qpu.sig.ldunifrf || 341b8e80941Smrg qpu.sig.wrtmuc) { 342b8e80941Smrg return true; 343b8e80941Smrg } 344b8e80941Smrg 345b8e80941Smrg if (qpu.type == V3D_QPU_INSTR_TYPE_BRANCH) 346b8e80941Smrg return true; 347b8e80941Smrg 348b8e80941Smrg if (qpu.type == V3D_QPU_INSTR_TYPE_ALU) { 349b8e80941Smrg if (qpu.alu.add.magic_write && 350b8e80941Smrg v3d_qpu_magic_waddr_loads_unif(qpu.alu.add.waddr)) { 351b8e80941Smrg return true; 352b8e80941Smrg } 353b8e80941Smrg 354b8e80941Smrg if (qpu.alu.mul.magic_write && 355b8e80941Smrg v3d_qpu_magic_waddr_loads_unif(qpu.alu.mul.waddr)) { 356b8e80941Smrg return true; 357b8e80941Smrg } 358b8e80941Smrg } 359b8e80941Smrg 360b8e80941Smrg return false; 361b8e80941Smrg} 362b8e80941Smrg 363b8e80941Smrgstatic void 364b8e80941Smrgv3d_dump_qpu(struct v3d_compile *c) 365b8e80941Smrg{ 366b8e80941Smrg fprintf(stderr, "%s prog %d/%d QPU:\n", 367b8e80941Smrg vir_get_stage_name(c), 368b8e80941Smrg c->program_id, c->variant_id); 369b8e80941Smrg 370b8e80941Smrg int next_uniform = 0; 371b8e80941Smrg for (int i = 0; i < c->qpu_inst_count; i++) { 372b8e80941Smrg const char *str = v3d_qpu_disasm(c->devinfo, c->qpu_insts[i]); 373b8e80941Smrg fprintf(stderr, "0x%016"PRIx64" %s", c->qpu_insts[i], str); 374b8e80941Smrg 375b8e80941Smrg /* We can only do this on 4.x, because we're not tracking TMU 376b8e80941Smrg * implicit uniforms here on 3.x. 377b8e80941Smrg */ 378b8e80941Smrg if (c->devinfo->ver >= 40 && 379b8e80941Smrg reads_uniform(c->devinfo, c->qpu_insts[i])) { 380b8e80941Smrg fprintf(stderr, " ("); 381b8e80941Smrg vir_dump_uniform(c->uniform_contents[next_uniform], 382b8e80941Smrg c->uniform_data[next_uniform]); 383b8e80941Smrg fprintf(stderr, ")"); 384b8e80941Smrg next_uniform++; 385b8e80941Smrg } 386b8e80941Smrg fprintf(stderr, "\n"); 387b8e80941Smrg ralloc_free((void *)str); 388b8e80941Smrg } 389b8e80941Smrg 390b8e80941Smrg /* Make sure our dumping lined up. */ 391b8e80941Smrg if (c->devinfo->ver >= 40) 392b8e80941Smrg assert(next_uniform == c->num_uniforms); 393b8e80941Smrg 394b8e80941Smrg fprintf(stderr, "\n"); 395b8e80941Smrg} 396b8e80941Smrg 397b8e80941Smrgvoid 398b8e80941Smrgv3d_vir_to_qpu(struct v3d_compile *c, struct qpu_reg *temp_registers) 399b8e80941Smrg{ 400b8e80941Smrg /* Reset the uniform count to how many will be actually loaded by the 401b8e80941Smrg * generated QPU code. 402b8e80941Smrg */ 403b8e80941Smrg c->num_uniforms = 0; 404b8e80941Smrg 405b8e80941Smrg vir_for_each_block(block, c) 406b8e80941Smrg v3d_generate_code_block(c, block, temp_registers); 407b8e80941Smrg 408b8e80941Smrg v3d_qpu_schedule_instructions(c); 409b8e80941Smrg 410b8e80941Smrg c->qpu_insts = rzalloc_array(c, uint64_t, c->qpu_inst_count); 411b8e80941Smrg int i = 0; 412b8e80941Smrg vir_for_each_inst_inorder(inst, c) { 413b8e80941Smrg bool ok = v3d_qpu_instr_pack(c->devinfo, &inst->qpu, 414b8e80941Smrg &c->qpu_insts[i++]); 415b8e80941Smrg if (!ok) { 416b8e80941Smrg fprintf(stderr, "Failed to pack instruction:\n"); 417b8e80941Smrg vir_dump_inst(c, inst); 418b8e80941Smrg fprintf(stderr, "\n"); 419b8e80941Smrg c->failed = true; 420b8e80941Smrg return; 421b8e80941Smrg } 422b8e80941Smrg } 423b8e80941Smrg assert(i == c->qpu_inst_count); 424b8e80941Smrg 425b8e80941Smrg if (V3D_DEBUG & (V3D_DEBUG_QPU | 426b8e80941Smrg v3d_debug_flag_for_shader_stage(c->s->info.stage))) { 427b8e80941Smrg v3d_dump_qpu(c); 428b8e80941Smrg } 429b8e80941Smrg 430b8e80941Smrg qpu_validate(c); 431b8e80941Smrg 432b8e80941Smrg free(temp_registers); 433b8e80941Smrg} 434