1/* 2 * Copyright © 2014 Broadcom 3 * 4 * Permission is hereby granted, free of charge, to any person obtaining a 5 * copy of this software and associated documentation files (the "Software"), 6 * to deal in the Software without restriction, including without limitation 7 * the rights to use, copy, modify, merge, publish, distribute, sublicense, 8 * and/or sell copies of the Software, and to permit persons to whom the 9 * Software is furnished to do so, subject to the following conditions: 10 * 11 * The above copyright notice and this permission notice (including the next 12 * paragraph) shall be included in all copies or substantial portions of the 13 * Software. 14 * 15 * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR 16 * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, 17 * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL 18 * THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER 19 * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING 20 * FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS 21 * IN THE SOFTWARE. 22 */ 23 24/** 25 * @file 26 * 27 * Validates the QPU instruction sequence after register allocation and 28 * scheduling. 29 */ 30 31#include <assert.h> 32#include <stdio.h> 33#include <stdlib.h> 34#include "v3d_compiler.h" 35#include "qpu/qpu_disasm.h" 36 37struct v3d_qpu_validate_state { 38 struct v3d_compile *c; 39 const struct v3d_qpu_instr *last; 40 int ip; 41 int last_sfu_write; 42 int last_branch_ip; 43 int last_thrsw_ip; 44 45 /* Set when we've found the last-THRSW signal, or if we were started 46 * in single-segment mode. 47 */ 48 bool last_thrsw_found; 49 50 /* Set when we've found the THRSW after the last THRSW */ 51 bool thrend_found; 52 53 int thrsw_count; 54}; 55 56static void 57fail_instr(struct v3d_qpu_validate_state *state, const char *msg) 58{ 59 struct v3d_compile *c = state->c; 60 61 fprintf(stderr, "v3d_qpu_validate at ip %d: %s:\n", state->ip, msg); 62 63 int dump_ip = 0; 64 vir_for_each_inst_inorder(inst, c) { 65 v3d_qpu_dump(c->devinfo, &inst->qpu); 66 67 if (dump_ip++ == state->ip) 68 fprintf(stderr, " *** ERROR ***"); 69 70 fprintf(stderr, "\n"); 71 } 72 73 fprintf(stderr, "\n"); 74 abort(); 75} 76 77static bool 78in_branch_delay_slots(struct v3d_qpu_validate_state *state) 79{ 80 return (state->ip - state->last_branch_ip) < 3; 81} 82 83static bool 84in_thrsw_delay_slots(struct v3d_qpu_validate_state *state) 85{ 86 return (state->ip - state->last_thrsw_ip) < 3; 87} 88 89static bool 90qpu_magic_waddr_matches(const struct v3d_qpu_instr *inst, 91 bool (*predicate)(enum v3d_qpu_waddr waddr)) 92{ 93 if (inst->type == V3D_QPU_INSTR_TYPE_ALU) 94 return false; 95 96 if (inst->alu.add.op != V3D_QPU_A_NOP && 97 inst->alu.add.magic_write && 98 predicate(inst->alu.add.waddr)) 99 return true; 100 101 if (inst->alu.mul.op != V3D_QPU_M_NOP && 102 inst->alu.mul.magic_write && 103 predicate(inst->alu.mul.waddr)) 104 return true; 105 106 return false; 107} 108 109static void 110qpu_validate_inst(struct v3d_qpu_validate_state *state, struct qinst *qinst) 111{ 112 const struct v3d_device_info *devinfo = state->c->devinfo; 113 const struct v3d_qpu_instr *inst = &qinst->qpu; 114 115 if (inst->type != V3D_QPU_INSTR_TYPE_ALU) 116 return; 117 118 /* LDVARY writes r5 two instructions later and LDUNIF writes 119 * r5 one instruction later, which is illegal to have 120 * together. 121 */ 122 if (state->last && state->last->sig.ldvary && 123 (inst->sig.ldunif || inst->sig.ldunifa)) { 124 fail_instr(state, "LDUNIF after a LDVARY"); 125 } 126 127 /* GFXH-1633 */ 128 bool last_reads_ldunif = (state->last && (state->last->sig.ldunif || 129 state->last->sig.ldunifrf)); 130 bool last_reads_ldunifa = (state->last && (state->last->sig.ldunifa || 131 state->last->sig.ldunifarf)); 132 bool reads_ldunif = inst->sig.ldunif || inst->sig.ldunifrf; 133 bool reads_ldunifa = inst->sig.ldunifa || inst->sig.ldunifarf; 134 if ((last_reads_ldunif && reads_ldunifa) || 135 (last_reads_ldunifa && reads_ldunif)) { 136 fail_instr(state, 137 "LDUNIF and LDUNIFA can't be next to each other"); 138 } 139 140 int tmu_writes = 0; 141 int sfu_writes = 0; 142 int vpm_writes = 0; 143 int tlb_writes = 0; 144 int tsy_writes = 0; 145 146 if (inst->alu.add.op != V3D_QPU_A_NOP) { 147 if (inst->alu.add.magic_write) { 148 if (v3d_qpu_magic_waddr_is_tmu(inst->alu.add.waddr)) 149 tmu_writes++; 150 if (v3d_qpu_magic_waddr_is_sfu(inst->alu.add.waddr)) 151 sfu_writes++; 152 if (v3d_qpu_magic_waddr_is_vpm(inst->alu.add.waddr)) 153 vpm_writes++; 154 if (v3d_qpu_magic_waddr_is_tlb(inst->alu.add.waddr)) 155 tlb_writes++; 156 if (v3d_qpu_magic_waddr_is_tsy(inst->alu.add.waddr)) 157 tsy_writes++; 158 } 159 } 160 161 if (inst->alu.mul.op != V3D_QPU_M_NOP) { 162 if (inst->alu.mul.magic_write) { 163 if (v3d_qpu_magic_waddr_is_tmu(inst->alu.mul.waddr)) 164 tmu_writes++; 165 if (v3d_qpu_magic_waddr_is_sfu(inst->alu.mul.waddr)) 166 sfu_writes++; 167 if (v3d_qpu_magic_waddr_is_vpm(inst->alu.mul.waddr)) 168 vpm_writes++; 169 if (v3d_qpu_magic_waddr_is_tlb(inst->alu.mul.waddr)) 170 tlb_writes++; 171 if (v3d_qpu_magic_waddr_is_tsy(inst->alu.mul.waddr)) 172 tsy_writes++; 173 } 174 } 175 176 if (in_thrsw_delay_slots(state)) { 177 /* There's no way you want to start SFU during the THRSW delay 178 * slots, since the result would land in the other thread. 179 */ 180 if (sfu_writes) { 181 fail_instr(state, 182 "SFU write started during THRSW delay slots "); 183 } 184 185 if (inst->sig.ldvary) 186 fail_instr(state, "LDVARY during THRSW delay slots"); 187 } 188 189 (void)qpu_magic_waddr_matches; /* XXX */ 190 191 /* SFU r4 results come back two instructions later. No doing 192 * r4 read/writes or other SFU lookups until it's done. 193 */ 194 if (state->ip - state->last_sfu_write < 2) { 195 if (v3d_qpu_uses_mux(inst, V3D_QPU_MUX_R4)) 196 fail_instr(state, "R4 read too soon after SFU"); 197 198 if (v3d_qpu_writes_r4(devinfo, inst)) 199 fail_instr(state, "R4 write too soon after SFU"); 200 201 if (sfu_writes) 202 fail_instr(state, "SFU write too soon after SFU"); 203 } 204 205 /* XXX: The docs say VPM can happen with the others, but the simulator 206 * disagrees. 207 */ 208 if (tmu_writes + 209 sfu_writes + 210 vpm_writes + 211 tlb_writes + 212 tsy_writes + 213 inst->sig.ldtmu + 214 inst->sig.ldtlb + 215 inst->sig.ldvpm + 216 inst->sig.ldtlbu > 1) { 217 fail_instr(state, 218 "Only one of [TMU, SFU, TSY, TLB read, VPM] allowed"); 219 } 220 221 if (sfu_writes) 222 state->last_sfu_write = state->ip; 223 224 if (inst->sig.thrsw) { 225 if (in_branch_delay_slots(state)) 226 fail_instr(state, "THRSW in a branch delay slot."); 227 228 if (state->last_thrsw_found) 229 state->thrend_found = true; 230 231 if (state->last_thrsw_ip == state->ip - 1) { 232 /* If it's the second THRSW in a row, then it's just a 233 * last-thrsw signal. 234 */ 235 if (state->last_thrsw_found) 236 fail_instr(state, "Two last-THRSW signals"); 237 state->last_thrsw_found = true; 238 } else { 239 if (in_thrsw_delay_slots(state)) { 240 fail_instr(state, 241 "THRSW too close to another THRSW."); 242 } 243 state->thrsw_count++; 244 state->last_thrsw_ip = state->ip; 245 } 246 } 247 248 if (state->thrend_found && 249 state->last_thrsw_ip - state->ip <= 2 && 250 inst->type == V3D_QPU_INSTR_TYPE_ALU) { 251 if ((inst->alu.add.op != V3D_QPU_A_NOP && 252 !inst->alu.add.magic_write)) { 253 fail_instr(state, "RF write after THREND"); 254 } 255 256 if ((inst->alu.mul.op != V3D_QPU_M_NOP && 257 !inst->alu.mul.magic_write)) { 258 fail_instr(state, "RF write after THREND"); 259 } 260 261 if (v3d_qpu_sig_writes_address(devinfo, &inst->sig)) 262 fail_instr(state, "RF write after THREND"); 263 264 /* GFXH-1625: No TMUWT in the last instruction */ 265 if (state->last_thrsw_ip - state->ip == 2 && 266 inst->alu.add.op == V3D_QPU_A_TMUWT) 267 fail_instr(state, "TMUWT in last instruction"); 268 } 269 270 if (inst->type == V3D_QPU_INSTR_TYPE_BRANCH) { 271 if (in_branch_delay_slots(state)) 272 fail_instr(state, "branch in a branch delay slot."); 273 if (in_thrsw_delay_slots(state)) 274 fail_instr(state, "branch in a THRSW delay slot."); 275 state->last_branch_ip = state->ip; 276 } 277} 278 279static void 280qpu_validate_block(struct v3d_qpu_validate_state *state, struct qblock *block) 281{ 282 vir_for_each_inst(qinst, block) { 283 qpu_validate_inst(state, qinst); 284 285 state->last = &qinst->qpu; 286 state->ip++; 287 } 288} 289 290/** 291 * Checks for the instruction restrictions from page 37 ("Summary of 292 * Instruction Restrictions"). 293 */ 294void 295qpu_validate(struct v3d_compile *c) 296{ 297 /* We don't want to do validation in release builds, but we want to 298 * keep compiling the validation code to make sure it doesn't get 299 * broken. 300 */ 301#ifndef DEBUG 302 return; 303#endif 304 305 struct v3d_qpu_validate_state state = { 306 .c = c, 307 .last_sfu_write = -10, 308 .last_thrsw_ip = -10, 309 .last_branch_ip = -10, 310 .ip = 0, 311 312 .last_thrsw_found = !c->last_thrsw, 313 }; 314 315 vir_for_each_block(block, c) { 316 qpu_validate_block(&state, block); 317 } 318 319 if (state.thrsw_count > 1 && !state.last_thrsw_found) { 320 fail_instr(&state, 321 "thread switch found without last-THRSW in program"); 322 } 323 324 if (!state.thrend_found) 325 fail_instr(&state, "No program-end THRSW found"); 326} 327