1b8e80941Smrg/* 2b8e80941Smrg * Copyright © 2014 Broadcom 3b8e80941Smrg * 4b8e80941Smrg * Permission is hereby granted, free of charge, to any person obtaining a 5b8e80941Smrg * copy of this software and associated documentation files (the "Software"), 6b8e80941Smrg * to deal in the Software without restriction, including without limitation 7b8e80941Smrg * the rights to use, copy, modify, merge, publish, distribute, sublicense, 8b8e80941Smrg * and/or sell copies of the Software, and to permit persons to whom the 9b8e80941Smrg * Software is furnished to do so, subject to the following conditions: 10b8e80941Smrg * 11b8e80941Smrg * The above copyright notice and this permission notice (including the next 12b8e80941Smrg * paragraph) shall be included in all copies or substantial portions of the 13b8e80941Smrg * Software. 14b8e80941Smrg * 15b8e80941Smrg * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR 16b8e80941Smrg * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, 17b8e80941Smrg * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL 18b8e80941Smrg * THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER 19b8e80941Smrg * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING 20b8e80941Smrg * FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS 21b8e80941Smrg * IN THE SOFTWARE. 22b8e80941Smrg */ 23b8e80941Smrg 24b8e80941Smrg/** 25b8e80941Smrg * @file 26b8e80941Smrg * 27b8e80941Smrg * Validates the QPU instruction sequence after register allocation and 28b8e80941Smrg * scheduling. 29b8e80941Smrg */ 30b8e80941Smrg 31b8e80941Smrg#include <assert.h> 32b8e80941Smrg#include <stdio.h> 33b8e80941Smrg#include <stdlib.h> 34b8e80941Smrg#include "v3d_compiler.h" 35b8e80941Smrg#include "qpu/qpu_disasm.h" 36b8e80941Smrg 37b8e80941Smrgstruct v3d_qpu_validate_state { 38b8e80941Smrg struct v3d_compile *c; 39b8e80941Smrg const struct v3d_qpu_instr *last; 40b8e80941Smrg int ip; 41b8e80941Smrg int last_sfu_write; 42b8e80941Smrg int last_branch_ip; 43b8e80941Smrg int last_thrsw_ip; 44b8e80941Smrg 45b8e80941Smrg /* Set when we've found the last-THRSW signal, or if we were started 46b8e80941Smrg * in single-segment mode. 47b8e80941Smrg */ 48b8e80941Smrg bool last_thrsw_found; 49b8e80941Smrg 50b8e80941Smrg /* Set when we've found the THRSW after the last THRSW */ 51b8e80941Smrg bool thrend_found; 52b8e80941Smrg 53b8e80941Smrg int thrsw_count; 54b8e80941Smrg}; 55b8e80941Smrg 56b8e80941Smrgstatic void 57b8e80941Smrgfail_instr(struct v3d_qpu_validate_state *state, const char *msg) 58b8e80941Smrg{ 59b8e80941Smrg struct v3d_compile *c = state->c; 60b8e80941Smrg 61b8e80941Smrg fprintf(stderr, "v3d_qpu_validate at ip %d: %s:\n", state->ip, msg); 62b8e80941Smrg 63b8e80941Smrg int dump_ip = 0; 64b8e80941Smrg vir_for_each_inst_inorder(inst, c) { 65b8e80941Smrg v3d_qpu_dump(c->devinfo, &inst->qpu); 66b8e80941Smrg 67b8e80941Smrg if (dump_ip++ == state->ip) 68b8e80941Smrg fprintf(stderr, " *** ERROR ***"); 69b8e80941Smrg 70b8e80941Smrg fprintf(stderr, "\n"); 71b8e80941Smrg } 72b8e80941Smrg 73b8e80941Smrg fprintf(stderr, "\n"); 74b8e80941Smrg abort(); 75b8e80941Smrg} 76b8e80941Smrg 77b8e80941Smrgstatic bool 78b8e80941Smrgin_branch_delay_slots(struct v3d_qpu_validate_state *state) 79b8e80941Smrg{ 80b8e80941Smrg return (state->ip - state->last_branch_ip) < 3; 81b8e80941Smrg} 82b8e80941Smrg 83b8e80941Smrgstatic bool 84b8e80941Smrgin_thrsw_delay_slots(struct v3d_qpu_validate_state *state) 85b8e80941Smrg{ 86b8e80941Smrg return (state->ip - state->last_thrsw_ip) < 3; 87b8e80941Smrg} 88b8e80941Smrg 89b8e80941Smrgstatic bool 90b8e80941Smrgqpu_magic_waddr_matches(const struct v3d_qpu_instr *inst, 91b8e80941Smrg bool (*predicate)(enum v3d_qpu_waddr waddr)) 92b8e80941Smrg{ 93b8e80941Smrg if (inst->type == V3D_QPU_INSTR_TYPE_ALU) 94b8e80941Smrg return false; 95b8e80941Smrg 96b8e80941Smrg if (inst->alu.add.op != V3D_QPU_A_NOP && 97b8e80941Smrg inst->alu.add.magic_write && 98b8e80941Smrg predicate(inst->alu.add.waddr)) 99b8e80941Smrg return true; 100b8e80941Smrg 101b8e80941Smrg if (inst->alu.mul.op != V3D_QPU_M_NOP && 102b8e80941Smrg inst->alu.mul.magic_write && 103b8e80941Smrg predicate(inst->alu.mul.waddr)) 104b8e80941Smrg return true; 105b8e80941Smrg 106b8e80941Smrg return false; 107b8e80941Smrg} 108b8e80941Smrg 109b8e80941Smrgstatic void 110b8e80941Smrgqpu_validate_inst(struct v3d_qpu_validate_state *state, struct qinst *qinst) 111b8e80941Smrg{ 112b8e80941Smrg const struct v3d_device_info *devinfo = state->c->devinfo; 113b8e80941Smrg const struct v3d_qpu_instr *inst = &qinst->qpu; 114b8e80941Smrg 115b8e80941Smrg if (inst->type != V3D_QPU_INSTR_TYPE_ALU) 116b8e80941Smrg return; 117b8e80941Smrg 118b8e80941Smrg /* LDVARY writes r5 two instructions later and LDUNIF writes 119b8e80941Smrg * r5 one instruction later, which is illegal to have 120b8e80941Smrg * together. 121b8e80941Smrg */ 122b8e80941Smrg if (state->last && state->last->sig.ldvary && 123b8e80941Smrg (inst->sig.ldunif || inst->sig.ldunifa)) { 124b8e80941Smrg fail_instr(state, "LDUNIF after a LDVARY"); 125b8e80941Smrg } 126b8e80941Smrg 127b8e80941Smrg /* GFXH-1633 */ 128b8e80941Smrg bool last_reads_ldunif = (state->last && (state->last->sig.ldunif || 129b8e80941Smrg state->last->sig.ldunifrf)); 130b8e80941Smrg bool last_reads_ldunifa = (state->last && (state->last->sig.ldunifa || 131b8e80941Smrg state->last->sig.ldunifarf)); 132b8e80941Smrg bool reads_ldunif = inst->sig.ldunif || inst->sig.ldunifrf; 133b8e80941Smrg bool reads_ldunifa = inst->sig.ldunifa || inst->sig.ldunifarf; 134b8e80941Smrg if ((last_reads_ldunif && reads_ldunifa) || 135b8e80941Smrg (last_reads_ldunifa && reads_ldunif)) { 136b8e80941Smrg fail_instr(state, 137b8e80941Smrg "LDUNIF and LDUNIFA can't be next to each other"); 138b8e80941Smrg } 139b8e80941Smrg 140b8e80941Smrg int tmu_writes = 0; 141b8e80941Smrg int sfu_writes = 0; 142b8e80941Smrg int vpm_writes = 0; 143b8e80941Smrg int tlb_writes = 0; 144b8e80941Smrg int tsy_writes = 0; 145b8e80941Smrg 146b8e80941Smrg if (inst->alu.add.op != V3D_QPU_A_NOP) { 147b8e80941Smrg if (inst->alu.add.magic_write) { 148b8e80941Smrg if (v3d_qpu_magic_waddr_is_tmu(inst->alu.add.waddr)) 149b8e80941Smrg tmu_writes++; 150b8e80941Smrg if (v3d_qpu_magic_waddr_is_sfu(inst->alu.add.waddr)) 151b8e80941Smrg sfu_writes++; 152b8e80941Smrg if (v3d_qpu_magic_waddr_is_vpm(inst->alu.add.waddr)) 153b8e80941Smrg vpm_writes++; 154b8e80941Smrg if (v3d_qpu_magic_waddr_is_tlb(inst->alu.add.waddr)) 155b8e80941Smrg tlb_writes++; 156b8e80941Smrg if (v3d_qpu_magic_waddr_is_tsy(inst->alu.add.waddr)) 157b8e80941Smrg tsy_writes++; 158b8e80941Smrg } 159b8e80941Smrg } 160b8e80941Smrg 161b8e80941Smrg if (inst->alu.mul.op != V3D_QPU_M_NOP) { 162b8e80941Smrg if (inst->alu.mul.magic_write) { 163b8e80941Smrg if (v3d_qpu_magic_waddr_is_tmu(inst->alu.mul.waddr)) 164b8e80941Smrg tmu_writes++; 165b8e80941Smrg if (v3d_qpu_magic_waddr_is_sfu(inst->alu.mul.waddr)) 166b8e80941Smrg sfu_writes++; 167b8e80941Smrg if (v3d_qpu_magic_waddr_is_vpm(inst->alu.mul.waddr)) 168b8e80941Smrg vpm_writes++; 169b8e80941Smrg if (v3d_qpu_magic_waddr_is_tlb(inst->alu.mul.waddr)) 170b8e80941Smrg tlb_writes++; 171b8e80941Smrg if (v3d_qpu_magic_waddr_is_tsy(inst->alu.mul.waddr)) 172b8e80941Smrg tsy_writes++; 173b8e80941Smrg } 174b8e80941Smrg } 175b8e80941Smrg 176b8e80941Smrg if (in_thrsw_delay_slots(state)) { 177b8e80941Smrg /* There's no way you want to start SFU during the THRSW delay 178b8e80941Smrg * slots, since the result would land in the other thread. 179b8e80941Smrg */ 180b8e80941Smrg if (sfu_writes) { 181b8e80941Smrg fail_instr(state, 182b8e80941Smrg "SFU write started during THRSW delay slots "); 183b8e80941Smrg } 184b8e80941Smrg 185b8e80941Smrg if (inst->sig.ldvary) 186b8e80941Smrg fail_instr(state, "LDVARY during THRSW delay slots"); 187b8e80941Smrg } 188b8e80941Smrg 189b8e80941Smrg (void)qpu_magic_waddr_matches; /* XXX */ 190b8e80941Smrg 191b8e80941Smrg /* SFU r4 results come back two instructions later. No doing 192b8e80941Smrg * r4 read/writes or other SFU lookups until it's done. 193b8e80941Smrg */ 194b8e80941Smrg if (state->ip - state->last_sfu_write < 2) { 195b8e80941Smrg if (v3d_qpu_uses_mux(inst, V3D_QPU_MUX_R4)) 196b8e80941Smrg fail_instr(state, "R4 read too soon after SFU"); 197b8e80941Smrg 198b8e80941Smrg if (v3d_qpu_writes_r4(devinfo, inst)) 199b8e80941Smrg fail_instr(state, "R4 write too soon after SFU"); 200b8e80941Smrg 201b8e80941Smrg if (sfu_writes) 202b8e80941Smrg fail_instr(state, "SFU write too soon after SFU"); 203b8e80941Smrg } 204b8e80941Smrg 205b8e80941Smrg /* XXX: The docs say VPM can happen with the others, but the simulator 206b8e80941Smrg * disagrees. 207b8e80941Smrg */ 208b8e80941Smrg if (tmu_writes + 209b8e80941Smrg sfu_writes + 210b8e80941Smrg vpm_writes + 211b8e80941Smrg tlb_writes + 212b8e80941Smrg tsy_writes + 213b8e80941Smrg inst->sig.ldtmu + 214b8e80941Smrg inst->sig.ldtlb + 215b8e80941Smrg inst->sig.ldvpm + 216b8e80941Smrg inst->sig.ldtlbu > 1) { 217b8e80941Smrg fail_instr(state, 218b8e80941Smrg "Only one of [TMU, SFU, TSY, TLB read, VPM] allowed"); 219b8e80941Smrg } 220b8e80941Smrg 221b8e80941Smrg if (sfu_writes) 222b8e80941Smrg state->last_sfu_write = state->ip; 223b8e80941Smrg 224b8e80941Smrg if (inst->sig.thrsw) { 225b8e80941Smrg if (in_branch_delay_slots(state)) 226b8e80941Smrg fail_instr(state, "THRSW in a branch delay slot."); 227b8e80941Smrg 228b8e80941Smrg if (state->last_thrsw_found) 229b8e80941Smrg state->thrend_found = true; 230b8e80941Smrg 231b8e80941Smrg if (state->last_thrsw_ip == state->ip - 1) { 232b8e80941Smrg /* If it's the second THRSW in a row, then it's just a 233b8e80941Smrg * last-thrsw signal. 234b8e80941Smrg */ 235b8e80941Smrg if (state->last_thrsw_found) 236b8e80941Smrg fail_instr(state, "Two last-THRSW signals"); 237b8e80941Smrg state->last_thrsw_found = true; 238b8e80941Smrg } else { 239b8e80941Smrg if (in_thrsw_delay_slots(state)) { 240b8e80941Smrg fail_instr(state, 241b8e80941Smrg "THRSW too close to another THRSW."); 242b8e80941Smrg } 243b8e80941Smrg state->thrsw_count++; 244b8e80941Smrg state->last_thrsw_ip = state->ip; 245b8e80941Smrg } 246b8e80941Smrg } 247b8e80941Smrg 248b8e80941Smrg if (state->thrend_found && 249b8e80941Smrg state->last_thrsw_ip - state->ip <= 2 && 250b8e80941Smrg inst->type == V3D_QPU_INSTR_TYPE_ALU) { 251b8e80941Smrg if ((inst->alu.add.op != V3D_QPU_A_NOP && 252b8e80941Smrg !inst->alu.add.magic_write)) { 253b8e80941Smrg fail_instr(state, "RF write after THREND"); 254b8e80941Smrg } 255b8e80941Smrg 256b8e80941Smrg if ((inst->alu.mul.op != V3D_QPU_M_NOP && 257b8e80941Smrg !inst->alu.mul.magic_write)) { 258b8e80941Smrg fail_instr(state, "RF write after THREND"); 259b8e80941Smrg } 260b8e80941Smrg 261b8e80941Smrg if (v3d_qpu_sig_writes_address(devinfo, &inst->sig)) 262b8e80941Smrg fail_instr(state, "RF write after THREND"); 263b8e80941Smrg 264b8e80941Smrg /* GFXH-1625: No TMUWT in the last instruction */ 265b8e80941Smrg if (state->last_thrsw_ip - state->ip == 2 && 266b8e80941Smrg inst->alu.add.op == V3D_QPU_A_TMUWT) 267b8e80941Smrg fail_instr(state, "TMUWT in last instruction"); 268b8e80941Smrg } 269b8e80941Smrg 270b8e80941Smrg if (inst->type == V3D_QPU_INSTR_TYPE_BRANCH) { 271b8e80941Smrg if (in_branch_delay_slots(state)) 272b8e80941Smrg fail_instr(state, "branch in a branch delay slot."); 273b8e80941Smrg if (in_thrsw_delay_slots(state)) 274b8e80941Smrg fail_instr(state, "branch in a THRSW delay slot."); 275b8e80941Smrg state->last_branch_ip = state->ip; 276b8e80941Smrg } 277b8e80941Smrg} 278b8e80941Smrg 279b8e80941Smrgstatic void 280b8e80941Smrgqpu_validate_block(struct v3d_qpu_validate_state *state, struct qblock *block) 281b8e80941Smrg{ 282b8e80941Smrg vir_for_each_inst(qinst, block) { 283b8e80941Smrg qpu_validate_inst(state, qinst); 284b8e80941Smrg 285b8e80941Smrg state->last = &qinst->qpu; 286b8e80941Smrg state->ip++; 287b8e80941Smrg } 288b8e80941Smrg} 289b8e80941Smrg 290b8e80941Smrg/** 291b8e80941Smrg * Checks for the instruction restrictions from page 37 ("Summary of 292b8e80941Smrg * Instruction Restrictions"). 293b8e80941Smrg */ 294b8e80941Smrgvoid 295b8e80941Smrgqpu_validate(struct v3d_compile *c) 296b8e80941Smrg{ 297b8e80941Smrg /* We don't want to do validation in release builds, but we want to 298b8e80941Smrg * keep compiling the validation code to make sure it doesn't get 299b8e80941Smrg * broken. 300b8e80941Smrg */ 301b8e80941Smrg#ifndef DEBUG 302b8e80941Smrg return; 303b8e80941Smrg#endif 304b8e80941Smrg 305b8e80941Smrg struct v3d_qpu_validate_state state = { 306b8e80941Smrg .c = c, 307b8e80941Smrg .last_sfu_write = -10, 308b8e80941Smrg .last_thrsw_ip = -10, 309b8e80941Smrg .last_branch_ip = -10, 310b8e80941Smrg .ip = 0, 311b8e80941Smrg 312b8e80941Smrg .last_thrsw_found = !c->last_thrsw, 313b8e80941Smrg }; 314b8e80941Smrg 315b8e80941Smrg vir_for_each_block(block, c) { 316b8e80941Smrg qpu_validate_block(&state, block); 317b8e80941Smrg } 318b8e80941Smrg 319b8e80941Smrg if (state.thrsw_count > 1 && !state.last_thrsw_found) { 320b8e80941Smrg fail_instr(&state, 321b8e80941Smrg "thread switch found without last-THRSW in program"); 322b8e80941Smrg } 323b8e80941Smrg 324b8e80941Smrg if (!state.thrend_found) 325b8e80941Smrg fail_instr(&state, "No program-end THRSW found"); 326b8e80941Smrg} 327