vir_to_qpu.c revision 01e04c3f
1/* 2 * Copyright © 2016 Broadcom 3 * 4 * Permission is hereby granted, free of charge, to any person obtaining a 5 * copy of this software and associated documentation files (the "Software"), 6 * to deal in the Software without restriction, including without limitation 7 * the rights to use, copy, modify, merge, publish, distribute, sublicense, 8 * and/or sell copies of the Software, and to permit persons to whom the 9 * Software is furnished to do so, subject to the following conditions: 10 * 11 * The above copyright notice and this permission notice (including the next 12 * paragraph) shall be included in all copies or substantial portions of the 13 * Software. 14 * 15 * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR 16 * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, 17 * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL 18 * THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER 19 * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING 20 * FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS 21 * IN THE SOFTWARE. 22 */ 23 24#include "compiler/v3d_compiler.h" 25#include "qpu/qpu_instr.h" 26#include "qpu/qpu_disasm.h" 27 28static inline struct qpu_reg 29qpu_reg(int index) 30{ 31 struct qpu_reg reg = { 32 .magic = false, 33 .index = index, 34 }; 35 return reg; 36} 37 38static inline struct qpu_reg 39qpu_magic(enum v3d_qpu_waddr waddr) 40{ 41 struct qpu_reg reg = { 42 .magic = true, 43 .index = waddr, 44 }; 45 return reg; 46} 47 48static inline struct qpu_reg 49qpu_acc(int acc) 50{ 51 return qpu_magic(V3D_QPU_WADDR_R0 + acc); 52} 53 54struct v3d_qpu_instr 55v3d_qpu_nop(void) 56{ 57 struct v3d_qpu_instr instr = { 58 .type = V3D_QPU_INSTR_TYPE_ALU, 59 .alu = { 60 .add = { 61 .op = V3D_QPU_A_NOP, 62 .waddr = V3D_QPU_WADDR_NOP, 63 .magic_write = true, 64 }, 65 .mul = { 66 .op = V3D_QPU_M_NOP, 67 .waddr = V3D_QPU_WADDR_NOP, 68 .magic_write = true, 69 }, 70 } 71 }; 72 73 return instr; 74} 75 76static struct qinst * 77vir_nop(void) 78{ 79 struct qreg undef = { QFILE_NULL, 0 }; 80 struct qinst *qinst = vir_add_inst(V3D_QPU_A_NOP, undef, undef, undef); 81 82 return qinst; 83} 84 85static struct qinst * 86new_qpu_nop_before(struct qinst *inst) 87{ 88 struct qinst *q = vir_nop(); 89 90 list_addtail(&q->link, &inst->link); 91 92 return q; 93} 94 95static void 96new_ldunif_instr(struct qinst *inst, int i) 97{ 98 struct qinst *ldunif = new_qpu_nop_before(inst); 99 100 ldunif->qpu.sig.ldunif = true; 101 assert(inst->src[i].file == QFILE_UNIF); 102 ldunif->uniform = inst->src[i].index; 103} 104 105/** 106 * Allocates the src register (accumulator or register file) into the RADDR 107 * fields of the instruction. 108 */ 109static void 110set_src(struct v3d_qpu_instr *instr, enum v3d_qpu_mux *mux, struct qpu_reg src) 111{ 112 if (src.smimm) { 113 assert(instr->sig.small_imm); 114 *mux = V3D_QPU_MUX_B; 115 return; 116 } 117 118 if (src.magic) { 119 assert(src.index >= V3D_QPU_WADDR_R0 && 120 src.index <= V3D_QPU_WADDR_R5); 121 *mux = src.index - V3D_QPU_WADDR_R0 + V3D_QPU_MUX_R0; 122 return; 123 } 124 125 if (instr->alu.add.a != V3D_QPU_MUX_A && 126 instr->alu.add.b != V3D_QPU_MUX_A && 127 instr->alu.mul.a != V3D_QPU_MUX_A && 128 instr->alu.mul.b != V3D_QPU_MUX_A) { 129 instr->raddr_a = src.index; 130 *mux = V3D_QPU_MUX_A; 131 } else { 132 if (instr->raddr_a == src.index) { 133 *mux = V3D_QPU_MUX_A; 134 } else { 135 assert(!(instr->alu.add.a == V3D_QPU_MUX_B && 136 instr->alu.add.b == V3D_QPU_MUX_B && 137 instr->alu.mul.a == V3D_QPU_MUX_B && 138 instr->alu.mul.b == V3D_QPU_MUX_B) || 139 src.index == instr->raddr_b); 140 141 instr->raddr_b = src.index; 142 *mux = V3D_QPU_MUX_B; 143 } 144 } 145} 146 147static bool 148is_no_op_mov(struct qinst *qinst) 149{ 150 static const struct v3d_qpu_sig no_sig = {0}; 151 152 /* Make sure it's just a lone MOV. */ 153 if (qinst->qpu.type != V3D_QPU_INSTR_TYPE_ALU || 154 qinst->qpu.alu.mul.op != V3D_QPU_M_MOV || 155 qinst->qpu.alu.add.op != V3D_QPU_A_NOP || 156 memcmp(&qinst->qpu.sig, &no_sig, sizeof(no_sig)) != 0) { 157 return false; 158 } 159 160 /* Check if it's a MOV from a register to itself. */ 161 enum v3d_qpu_waddr waddr = qinst->qpu.alu.mul.waddr; 162 if (qinst->qpu.alu.mul.magic_write) { 163 if (waddr < V3D_QPU_WADDR_R0 || waddr > V3D_QPU_WADDR_R4) 164 return false; 165 166 if (qinst->qpu.alu.mul.a != 167 V3D_QPU_MUX_R0 + (waddr - V3D_QPU_WADDR_R0)) { 168 return false; 169 } 170 } else { 171 int raddr; 172 173 switch (qinst->qpu.alu.mul.a) { 174 case V3D_QPU_MUX_A: 175 raddr = qinst->qpu.raddr_a; 176 break; 177 case V3D_QPU_MUX_B: 178 raddr = qinst->qpu.raddr_b; 179 break; 180 default: 181 return false; 182 } 183 if (raddr != waddr) 184 return false; 185 } 186 187 /* No packing or flags updates, or we need to execute the 188 * instruction. 189 */ 190 if (qinst->qpu.alu.mul.a_unpack != V3D_QPU_UNPACK_NONE || 191 qinst->qpu.alu.mul.output_pack != V3D_QPU_PACK_NONE || 192 qinst->qpu.flags.mc != V3D_QPU_COND_NONE || 193 qinst->qpu.flags.mpf != V3D_QPU_PF_NONE || 194 qinst->qpu.flags.muf != V3D_QPU_UF_NONE) { 195 return false; 196 } 197 198 return true; 199} 200 201static void 202v3d_generate_code_block(struct v3d_compile *c, 203 struct qblock *block, 204 struct qpu_reg *temp_registers) 205{ 206 int last_vpm_read_index = -1; 207 208 vir_for_each_inst_safe(qinst, block) { 209#if 0 210 fprintf(stderr, "translating qinst to qpu: "); 211 vir_dump_inst(c, qinst); 212 fprintf(stderr, "\n"); 213#endif 214 215 struct qinst *temp; 216 217 if (vir_has_implicit_uniform(qinst)) { 218 int src = vir_get_implicit_uniform_src(qinst); 219 assert(qinst->src[src].file == QFILE_UNIF); 220 qinst->uniform = qinst->src[src].index; 221 c->num_uniforms++; 222 } 223 224 int nsrc = vir_get_non_sideband_nsrc(qinst); 225 struct qpu_reg src[ARRAY_SIZE(qinst->src)]; 226 bool emitted_ldunif = false; 227 for (int i = 0; i < nsrc; i++) { 228 int index = qinst->src[i].index; 229 switch (qinst->src[i].file) { 230 case QFILE_REG: 231 src[i] = qpu_reg(qinst->src[i].index); 232 break; 233 case QFILE_MAGIC: 234 src[i] = qpu_magic(qinst->src[i].index); 235 break; 236 case QFILE_NULL: 237 case QFILE_LOAD_IMM: 238 src[i] = qpu_acc(0); 239 break; 240 case QFILE_TEMP: 241 src[i] = temp_registers[index]; 242 break; 243 case QFILE_UNIF: 244 if (!emitted_ldunif) { 245 new_ldunif_instr(qinst, i); 246 c->num_uniforms++; 247 emitted_ldunif = true; 248 } 249 250 src[i] = qpu_acc(5); 251 break; 252 case QFILE_SMALL_IMM: 253 src[i].smimm = true; 254 break; 255 256 case QFILE_VPM: 257 assert((int)qinst->src[i].index >= 258 last_vpm_read_index); 259 (void)last_vpm_read_index; 260 last_vpm_read_index = qinst->src[i].index; 261 262 temp = new_qpu_nop_before(qinst); 263 temp->qpu.sig.ldvpm = true; 264 265 src[i] = qpu_acc(3); 266 break; 267 268 case QFILE_TLB: 269 case QFILE_TLBU: 270 unreachable("bad vir src file"); 271 } 272 } 273 274 struct qpu_reg dst; 275 switch (qinst->dst.file) { 276 case QFILE_NULL: 277 dst = qpu_magic(V3D_QPU_WADDR_NOP); 278 break; 279 280 case QFILE_REG: 281 dst = qpu_reg(qinst->dst.index); 282 break; 283 284 case QFILE_MAGIC: 285 dst = qpu_magic(qinst->dst.index); 286 break; 287 288 case QFILE_TEMP: 289 dst = temp_registers[qinst->dst.index]; 290 break; 291 292 case QFILE_VPM: 293 dst = qpu_magic(V3D_QPU_WADDR_VPM); 294 break; 295 296 case QFILE_TLB: 297 dst = qpu_magic(V3D_QPU_WADDR_TLB); 298 break; 299 300 case QFILE_TLBU: 301 dst = qpu_magic(V3D_QPU_WADDR_TLBU); 302 break; 303 304 case QFILE_UNIF: 305 case QFILE_SMALL_IMM: 306 case QFILE_LOAD_IMM: 307 assert(!"not reached"); 308 break; 309 } 310 311 if (qinst->qpu.type == V3D_QPU_INSTR_TYPE_ALU) { 312 if (v3d_qpu_sig_writes_address(c->devinfo, 313 &qinst->qpu.sig)) { 314 assert(qinst->qpu.alu.add.op == V3D_QPU_A_NOP); 315 assert(qinst->qpu.alu.mul.op == V3D_QPU_M_NOP); 316 317 qinst->qpu.sig_addr = dst.index; 318 qinst->qpu.sig_magic = dst.magic; 319 } else if (qinst->qpu.alu.add.op != V3D_QPU_A_NOP) { 320 assert(qinst->qpu.alu.mul.op == V3D_QPU_M_NOP); 321 if (nsrc >= 1) { 322 set_src(&qinst->qpu, 323 &qinst->qpu.alu.add.a, src[0]); 324 } 325 if (nsrc >= 2) { 326 set_src(&qinst->qpu, 327 &qinst->qpu.alu.add.b, src[1]); 328 } 329 330 qinst->qpu.alu.add.waddr = dst.index; 331 qinst->qpu.alu.add.magic_write = dst.magic; 332 } else { 333 if (nsrc >= 1) { 334 set_src(&qinst->qpu, 335 &qinst->qpu.alu.mul.a, src[0]); 336 } 337 if (nsrc >= 2) { 338 set_src(&qinst->qpu, 339 &qinst->qpu.alu.mul.b, src[1]); 340 } 341 342 qinst->qpu.alu.mul.waddr = dst.index; 343 qinst->qpu.alu.mul.magic_write = dst.magic; 344 345 if (is_no_op_mov(qinst)) { 346 vir_remove_instruction(c, qinst); 347 continue; 348 } 349 } 350 } else { 351 assert(qinst->qpu.type == V3D_QPU_INSTR_TYPE_BRANCH); 352 } 353 } 354} 355 356 357static void 358v3d_dump_qpu(struct v3d_compile *c) 359{ 360 fprintf(stderr, "%s prog %d/%d QPU:\n", 361 vir_get_stage_name(c), 362 c->program_id, c->variant_id); 363 364 for (int i = 0; i < c->qpu_inst_count; i++) { 365 const char *str = v3d_qpu_disasm(c->devinfo, c->qpu_insts[i]); 366 fprintf(stderr, "0x%016"PRIx64" %s\n", c->qpu_insts[i], str); 367 ralloc_free((void *)str); 368 } 369 fprintf(stderr, "\n"); 370} 371 372void 373v3d_vir_to_qpu(struct v3d_compile *c, struct qpu_reg *temp_registers) 374{ 375 /* Reset the uniform count to how many will be actually loaded by the 376 * generated QPU code. 377 */ 378 c->num_uniforms = 0; 379 380 vir_for_each_block(block, c) 381 v3d_generate_code_block(c, block, temp_registers); 382 383 uint32_t cycles = v3d_qpu_schedule_instructions(c); 384 385 c->qpu_insts = rzalloc_array(c, uint64_t, c->qpu_inst_count); 386 int i = 0; 387 vir_for_each_inst_inorder(inst, c) { 388 bool ok = v3d_qpu_instr_pack(c->devinfo, &inst->qpu, 389 &c->qpu_insts[i++]); 390 if (!ok) { 391 fprintf(stderr, "Failed to pack instruction:\n"); 392 vir_dump_inst(c, inst); 393 fprintf(stderr, "\n"); 394 c->failed = true; 395 return; 396 } 397 } 398 assert(i == c->qpu_inst_count); 399 400 if (V3D_DEBUG & V3D_DEBUG_SHADERDB) { 401 fprintf(stderr, "SHADER-DB: %s prog %d/%d: %d instructions\n", 402 vir_get_stage_name(c), 403 c->program_id, c->variant_id, 404 c->qpu_inst_count); 405 } 406 407 /* The QPU cycle estimates are pretty broken (see waddr_latency()), so 408 * don't report them for now. 409 */ 410 if (false) { 411 fprintf(stderr, "SHADER-DB: %s prog %d/%d: %d estimated cycles\n", 412 vir_get_stage_name(c), 413 c->program_id, c->variant_id, 414 cycles); 415 } 416 417 if (V3D_DEBUG & (V3D_DEBUG_QPU | 418 v3d_debug_flag_for_shader_stage(c->s->info.stage))) { 419 v3d_dump_qpu(c); 420 } 421 422 qpu_validate(c); 423 424 free(temp_registers); 425} 426