1b8e80941Smrg/*
2b8e80941Smrg * Copyright © 2014 Broadcom
3b8e80941Smrg *
4b8e80941Smrg * Permission is hereby granted, free of charge, to any person obtaining a
5b8e80941Smrg * copy of this software and associated documentation files (the "Software"),
6b8e80941Smrg * to deal in the Software without restriction, including without limitation
7b8e80941Smrg * the rights to use, copy, modify, merge, publish, distribute, sublicense,
8b8e80941Smrg * and/or sell copies of the Software, and to permit persons to whom the
9b8e80941Smrg * Software is furnished to do so, subject to the following conditions:
10b8e80941Smrg *
11b8e80941Smrg * The above copyright notice and this permission notice (including the next
12b8e80941Smrg * paragraph) shall be included in all copies or substantial portions of the
13b8e80941Smrg * Software.
14b8e80941Smrg *
15b8e80941Smrg * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
16b8e80941Smrg * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
17b8e80941Smrg * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT.  IN NO EVENT SHALL
18b8e80941Smrg * THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
19b8e80941Smrg * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING
20b8e80941Smrg * FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS
21b8e80941Smrg * IN THE SOFTWARE.
22b8e80941Smrg */
23b8e80941Smrg
24b8e80941Smrg/**
25b8e80941Smrg * @file
26b8e80941Smrg *
27b8e80941Smrg * Validates the QPU instruction sequence after register allocation and
28b8e80941Smrg * scheduling.
29b8e80941Smrg */
30b8e80941Smrg
31b8e80941Smrg#include <assert.h>
32b8e80941Smrg#include <stdio.h>
33b8e80941Smrg#include <stdlib.h>
34b8e80941Smrg#include "v3d_compiler.h"
35b8e80941Smrg#include "qpu/qpu_disasm.h"
36b8e80941Smrg
37b8e80941Smrgstruct v3d_qpu_validate_state {
38b8e80941Smrg        struct v3d_compile *c;
39b8e80941Smrg        const struct v3d_qpu_instr *last;
40b8e80941Smrg        int ip;
41b8e80941Smrg        int last_sfu_write;
42b8e80941Smrg        int last_branch_ip;
43b8e80941Smrg        int last_thrsw_ip;
44b8e80941Smrg
45b8e80941Smrg        /* Set when we've found the last-THRSW signal, or if we were started
46b8e80941Smrg         * in single-segment mode.
47b8e80941Smrg         */
48b8e80941Smrg        bool last_thrsw_found;
49b8e80941Smrg
50b8e80941Smrg        /* Set when we've found the THRSW after the last THRSW */
51b8e80941Smrg        bool thrend_found;
52b8e80941Smrg
53b8e80941Smrg        int thrsw_count;
54b8e80941Smrg};
55b8e80941Smrg
56b8e80941Smrgstatic void
57b8e80941Smrgfail_instr(struct v3d_qpu_validate_state *state, const char *msg)
58b8e80941Smrg{
59b8e80941Smrg        struct v3d_compile *c = state->c;
60b8e80941Smrg
61b8e80941Smrg        fprintf(stderr, "v3d_qpu_validate at ip %d: %s:\n", state->ip, msg);
62b8e80941Smrg
63b8e80941Smrg        int dump_ip = 0;
64b8e80941Smrg        vir_for_each_inst_inorder(inst, c) {
65b8e80941Smrg                v3d_qpu_dump(c->devinfo, &inst->qpu);
66b8e80941Smrg
67b8e80941Smrg                if (dump_ip++ == state->ip)
68b8e80941Smrg                        fprintf(stderr, " *** ERROR ***");
69b8e80941Smrg
70b8e80941Smrg                fprintf(stderr, "\n");
71b8e80941Smrg        }
72b8e80941Smrg
73b8e80941Smrg        fprintf(stderr, "\n");
74b8e80941Smrg        abort();
75b8e80941Smrg}
76b8e80941Smrg
77b8e80941Smrgstatic bool
78b8e80941Smrgin_branch_delay_slots(struct v3d_qpu_validate_state *state)
79b8e80941Smrg{
80b8e80941Smrg        return (state->ip - state->last_branch_ip) < 3;
81b8e80941Smrg}
82b8e80941Smrg
83b8e80941Smrgstatic bool
84b8e80941Smrgin_thrsw_delay_slots(struct v3d_qpu_validate_state *state)
85b8e80941Smrg{
86b8e80941Smrg        return (state->ip - state->last_thrsw_ip) < 3;
87b8e80941Smrg}
88b8e80941Smrg
89b8e80941Smrgstatic bool
90b8e80941Smrgqpu_magic_waddr_matches(const struct v3d_qpu_instr *inst,
91b8e80941Smrg                        bool (*predicate)(enum v3d_qpu_waddr waddr))
92b8e80941Smrg{
93b8e80941Smrg        if (inst->type == V3D_QPU_INSTR_TYPE_ALU)
94b8e80941Smrg                return false;
95b8e80941Smrg
96b8e80941Smrg        if (inst->alu.add.op != V3D_QPU_A_NOP &&
97b8e80941Smrg            inst->alu.add.magic_write &&
98b8e80941Smrg            predicate(inst->alu.add.waddr))
99b8e80941Smrg                return true;
100b8e80941Smrg
101b8e80941Smrg        if (inst->alu.mul.op != V3D_QPU_M_NOP &&
102b8e80941Smrg            inst->alu.mul.magic_write &&
103b8e80941Smrg            predicate(inst->alu.mul.waddr))
104b8e80941Smrg                return true;
105b8e80941Smrg
106b8e80941Smrg        return false;
107b8e80941Smrg}
108b8e80941Smrg
109b8e80941Smrgstatic void
110b8e80941Smrgqpu_validate_inst(struct v3d_qpu_validate_state *state, struct qinst *qinst)
111b8e80941Smrg{
112b8e80941Smrg        const struct v3d_device_info *devinfo = state->c->devinfo;
113b8e80941Smrg        const struct v3d_qpu_instr *inst = &qinst->qpu;
114b8e80941Smrg
115b8e80941Smrg        if (inst->type != V3D_QPU_INSTR_TYPE_ALU)
116b8e80941Smrg                return;
117b8e80941Smrg
118b8e80941Smrg        /* LDVARY writes r5 two instructions later and LDUNIF writes
119b8e80941Smrg         * r5 one instruction later, which is illegal to have
120b8e80941Smrg         * together.
121b8e80941Smrg         */
122b8e80941Smrg        if (state->last && state->last->sig.ldvary &&
123b8e80941Smrg            (inst->sig.ldunif || inst->sig.ldunifa)) {
124b8e80941Smrg                fail_instr(state, "LDUNIF after a LDVARY");
125b8e80941Smrg        }
126b8e80941Smrg
127b8e80941Smrg        /* GFXH-1633 */
128b8e80941Smrg        bool last_reads_ldunif = (state->last && (state->last->sig.ldunif ||
129b8e80941Smrg                                                  state->last->sig.ldunifrf));
130b8e80941Smrg        bool last_reads_ldunifa = (state->last && (state->last->sig.ldunifa ||
131b8e80941Smrg                                                   state->last->sig.ldunifarf));
132b8e80941Smrg        bool reads_ldunif = inst->sig.ldunif || inst->sig.ldunifrf;
133b8e80941Smrg        bool reads_ldunifa = inst->sig.ldunifa || inst->sig.ldunifarf;
134b8e80941Smrg        if ((last_reads_ldunif && reads_ldunifa) ||
135b8e80941Smrg            (last_reads_ldunifa && reads_ldunif)) {
136b8e80941Smrg                fail_instr(state,
137b8e80941Smrg                           "LDUNIF and LDUNIFA can't be next to each other");
138b8e80941Smrg        }
139b8e80941Smrg
140b8e80941Smrg        int tmu_writes = 0;
141b8e80941Smrg        int sfu_writes = 0;
142b8e80941Smrg        int vpm_writes = 0;
143b8e80941Smrg        int tlb_writes = 0;
144b8e80941Smrg        int tsy_writes = 0;
145b8e80941Smrg
146b8e80941Smrg        if (inst->alu.add.op != V3D_QPU_A_NOP) {
147b8e80941Smrg                if (inst->alu.add.magic_write) {
148b8e80941Smrg                        if (v3d_qpu_magic_waddr_is_tmu(inst->alu.add.waddr))
149b8e80941Smrg                                tmu_writes++;
150b8e80941Smrg                        if (v3d_qpu_magic_waddr_is_sfu(inst->alu.add.waddr))
151b8e80941Smrg                                sfu_writes++;
152b8e80941Smrg                        if (v3d_qpu_magic_waddr_is_vpm(inst->alu.add.waddr))
153b8e80941Smrg                                vpm_writes++;
154b8e80941Smrg                        if (v3d_qpu_magic_waddr_is_tlb(inst->alu.add.waddr))
155b8e80941Smrg                                tlb_writes++;
156b8e80941Smrg                        if (v3d_qpu_magic_waddr_is_tsy(inst->alu.add.waddr))
157b8e80941Smrg                                tsy_writes++;
158b8e80941Smrg                }
159b8e80941Smrg        }
160b8e80941Smrg
161b8e80941Smrg        if (inst->alu.mul.op != V3D_QPU_M_NOP) {
162b8e80941Smrg                if (inst->alu.mul.magic_write) {
163b8e80941Smrg                        if (v3d_qpu_magic_waddr_is_tmu(inst->alu.mul.waddr))
164b8e80941Smrg                                tmu_writes++;
165b8e80941Smrg                        if (v3d_qpu_magic_waddr_is_sfu(inst->alu.mul.waddr))
166b8e80941Smrg                                sfu_writes++;
167b8e80941Smrg                        if (v3d_qpu_magic_waddr_is_vpm(inst->alu.mul.waddr))
168b8e80941Smrg                                vpm_writes++;
169b8e80941Smrg                        if (v3d_qpu_magic_waddr_is_tlb(inst->alu.mul.waddr))
170b8e80941Smrg                                tlb_writes++;
171b8e80941Smrg                        if (v3d_qpu_magic_waddr_is_tsy(inst->alu.mul.waddr))
172b8e80941Smrg                                tsy_writes++;
173b8e80941Smrg                }
174b8e80941Smrg        }
175b8e80941Smrg
176b8e80941Smrg        if (in_thrsw_delay_slots(state)) {
177b8e80941Smrg                /* There's no way you want to start SFU during the THRSW delay
178b8e80941Smrg                 * slots, since the result would land in the other thread.
179b8e80941Smrg                 */
180b8e80941Smrg                if (sfu_writes) {
181b8e80941Smrg                        fail_instr(state,
182b8e80941Smrg                                   "SFU write started during THRSW delay slots ");
183b8e80941Smrg                }
184b8e80941Smrg
185b8e80941Smrg                if (inst->sig.ldvary)
186b8e80941Smrg                        fail_instr(state, "LDVARY during THRSW delay slots");
187b8e80941Smrg        }
188b8e80941Smrg
189b8e80941Smrg        (void)qpu_magic_waddr_matches; /* XXX */
190b8e80941Smrg
191b8e80941Smrg        /* SFU r4 results come back two instructions later.  No doing
192b8e80941Smrg         * r4 read/writes or other SFU lookups until it's done.
193b8e80941Smrg         */
194b8e80941Smrg        if (state->ip - state->last_sfu_write < 2) {
195b8e80941Smrg                if (v3d_qpu_uses_mux(inst, V3D_QPU_MUX_R4))
196b8e80941Smrg                        fail_instr(state, "R4 read too soon after SFU");
197b8e80941Smrg
198b8e80941Smrg                if (v3d_qpu_writes_r4(devinfo, inst))
199b8e80941Smrg                        fail_instr(state, "R4 write too soon after SFU");
200b8e80941Smrg
201b8e80941Smrg                if (sfu_writes)
202b8e80941Smrg                        fail_instr(state, "SFU write too soon after SFU");
203b8e80941Smrg        }
204b8e80941Smrg
205b8e80941Smrg        /* XXX: The docs say VPM can happen with the others, but the simulator
206b8e80941Smrg         * disagrees.
207b8e80941Smrg         */
208b8e80941Smrg        if (tmu_writes +
209b8e80941Smrg            sfu_writes +
210b8e80941Smrg            vpm_writes +
211b8e80941Smrg            tlb_writes +
212b8e80941Smrg            tsy_writes +
213b8e80941Smrg            inst->sig.ldtmu +
214b8e80941Smrg            inst->sig.ldtlb +
215b8e80941Smrg            inst->sig.ldvpm +
216b8e80941Smrg            inst->sig.ldtlbu > 1) {
217b8e80941Smrg                fail_instr(state,
218b8e80941Smrg                           "Only one of [TMU, SFU, TSY, TLB read, VPM] allowed");
219b8e80941Smrg        }
220b8e80941Smrg
221b8e80941Smrg        if (sfu_writes)
222b8e80941Smrg                state->last_sfu_write = state->ip;
223b8e80941Smrg
224b8e80941Smrg        if (inst->sig.thrsw) {
225b8e80941Smrg                if (in_branch_delay_slots(state))
226b8e80941Smrg                        fail_instr(state, "THRSW in a branch delay slot.");
227b8e80941Smrg
228b8e80941Smrg                if (state->last_thrsw_found)
229b8e80941Smrg                        state->thrend_found = true;
230b8e80941Smrg
231b8e80941Smrg                if (state->last_thrsw_ip == state->ip - 1) {
232b8e80941Smrg                        /* If it's the second THRSW in a row, then it's just a
233b8e80941Smrg                         * last-thrsw signal.
234b8e80941Smrg                         */
235b8e80941Smrg                        if (state->last_thrsw_found)
236b8e80941Smrg                                fail_instr(state, "Two last-THRSW signals");
237b8e80941Smrg                        state->last_thrsw_found = true;
238b8e80941Smrg                } else {
239b8e80941Smrg                        if (in_thrsw_delay_slots(state)) {
240b8e80941Smrg                                fail_instr(state,
241b8e80941Smrg                                           "THRSW too close to another THRSW.");
242b8e80941Smrg                        }
243b8e80941Smrg                        state->thrsw_count++;
244b8e80941Smrg                        state->last_thrsw_ip = state->ip;
245b8e80941Smrg                }
246b8e80941Smrg        }
247b8e80941Smrg
248b8e80941Smrg        if (state->thrend_found &&
249b8e80941Smrg            state->last_thrsw_ip - state->ip <= 2 &&
250b8e80941Smrg            inst->type == V3D_QPU_INSTR_TYPE_ALU) {
251b8e80941Smrg                if ((inst->alu.add.op != V3D_QPU_A_NOP &&
252b8e80941Smrg                     !inst->alu.add.magic_write)) {
253b8e80941Smrg                        fail_instr(state, "RF write after THREND");
254b8e80941Smrg                }
255b8e80941Smrg
256b8e80941Smrg                if ((inst->alu.mul.op != V3D_QPU_M_NOP &&
257b8e80941Smrg                     !inst->alu.mul.magic_write)) {
258b8e80941Smrg                        fail_instr(state, "RF write after THREND");
259b8e80941Smrg                }
260b8e80941Smrg
261b8e80941Smrg                if (v3d_qpu_sig_writes_address(devinfo, &inst->sig))
262b8e80941Smrg                        fail_instr(state, "RF write after THREND");
263b8e80941Smrg
264b8e80941Smrg                /* GFXH-1625: No TMUWT in the last instruction */
265b8e80941Smrg                if (state->last_thrsw_ip - state->ip == 2 &&
266b8e80941Smrg                    inst->alu.add.op == V3D_QPU_A_TMUWT)
267b8e80941Smrg                        fail_instr(state, "TMUWT in last instruction");
268b8e80941Smrg        }
269b8e80941Smrg
270b8e80941Smrg        if (inst->type == V3D_QPU_INSTR_TYPE_BRANCH) {
271b8e80941Smrg                if (in_branch_delay_slots(state))
272b8e80941Smrg                        fail_instr(state, "branch in a branch delay slot.");
273b8e80941Smrg                if (in_thrsw_delay_slots(state))
274b8e80941Smrg                        fail_instr(state, "branch in a THRSW delay slot.");
275b8e80941Smrg                state->last_branch_ip = state->ip;
276b8e80941Smrg        }
277b8e80941Smrg}
278b8e80941Smrg
279b8e80941Smrgstatic void
280b8e80941Smrgqpu_validate_block(struct v3d_qpu_validate_state *state, struct qblock *block)
281b8e80941Smrg{
282b8e80941Smrg        vir_for_each_inst(qinst, block) {
283b8e80941Smrg                qpu_validate_inst(state, qinst);
284b8e80941Smrg
285b8e80941Smrg                state->last = &qinst->qpu;
286b8e80941Smrg                state->ip++;
287b8e80941Smrg        }
288b8e80941Smrg}
289b8e80941Smrg
290b8e80941Smrg/**
291b8e80941Smrg * Checks for the instruction restrictions from page 37 ("Summary of
292b8e80941Smrg * Instruction Restrictions").
293b8e80941Smrg */
294b8e80941Smrgvoid
295b8e80941Smrgqpu_validate(struct v3d_compile *c)
296b8e80941Smrg{
297b8e80941Smrg        /* We don't want to do validation in release builds, but we want to
298b8e80941Smrg         * keep compiling the validation code to make sure it doesn't get
299b8e80941Smrg         * broken.
300b8e80941Smrg         */
301b8e80941Smrg#ifndef DEBUG
302b8e80941Smrg        return;
303b8e80941Smrg#endif
304b8e80941Smrg
305b8e80941Smrg        struct v3d_qpu_validate_state state = {
306b8e80941Smrg                .c = c,
307b8e80941Smrg                .last_sfu_write = -10,
308b8e80941Smrg                .last_thrsw_ip = -10,
309b8e80941Smrg                .last_branch_ip = -10,
310b8e80941Smrg                .ip = 0,
311b8e80941Smrg
312b8e80941Smrg                .last_thrsw_found = !c->last_thrsw,
313b8e80941Smrg        };
314b8e80941Smrg
315b8e80941Smrg        vir_for_each_block(block, c) {
316b8e80941Smrg                qpu_validate_block(&state, block);
317b8e80941Smrg        }
318b8e80941Smrg
319b8e80941Smrg        if (state.thrsw_count > 1 && !state.last_thrsw_found) {
320b8e80941Smrg                fail_instr(&state,
321b8e80941Smrg                           "thread switch found without last-THRSW in program");
322b8e80941Smrg        }
323b8e80941Smrg
324b8e80941Smrg        if (!state.thrend_found)
325b8e80941Smrg                fail_instr(&state, "No program-end THRSW found");
326b8e80941Smrg}
327