vir_to_qpu.c revision 01e04c3f
1/*
2 * Copyright © 2016 Broadcom
3 *
4 * Permission is hereby granted, free of charge, to any person obtaining a
5 * copy of this software and associated documentation files (the "Software"),
6 * to deal in the Software without restriction, including without limitation
7 * the rights to use, copy, modify, merge, publish, distribute, sublicense,
8 * and/or sell copies of the Software, and to permit persons to whom the
9 * Software is furnished to do so, subject to the following conditions:
10 *
11 * The above copyright notice and this permission notice (including the next
12 * paragraph) shall be included in all copies or substantial portions of the
13 * Software.
14 *
15 * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
16 * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
17 * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT.  IN NO EVENT SHALL
18 * THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
19 * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING
20 * FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS
21 * IN THE SOFTWARE.
22 */
23
24#include "compiler/v3d_compiler.h"
25#include "qpu/qpu_instr.h"
26#include "qpu/qpu_disasm.h"
27
28static inline struct qpu_reg
29qpu_reg(int index)
30{
31        struct qpu_reg reg = {
32                .magic = false,
33                .index = index,
34        };
35        return reg;
36}
37
38static inline struct qpu_reg
39qpu_magic(enum v3d_qpu_waddr waddr)
40{
41        struct qpu_reg reg = {
42                .magic = true,
43                .index = waddr,
44        };
45        return reg;
46}
47
48static inline struct qpu_reg
49qpu_acc(int acc)
50{
51        return qpu_magic(V3D_QPU_WADDR_R0 + acc);
52}
53
54struct v3d_qpu_instr
55v3d_qpu_nop(void)
56{
57        struct v3d_qpu_instr instr = {
58                .type = V3D_QPU_INSTR_TYPE_ALU,
59                .alu = {
60                        .add = {
61                                .op = V3D_QPU_A_NOP,
62                                .waddr = V3D_QPU_WADDR_NOP,
63                                .magic_write = true,
64                        },
65                        .mul = {
66                                .op = V3D_QPU_M_NOP,
67                                .waddr = V3D_QPU_WADDR_NOP,
68                                .magic_write = true,
69                        },
70                }
71        };
72
73        return instr;
74}
75
76static struct qinst *
77vir_nop(void)
78{
79        struct qreg undef = { QFILE_NULL, 0 };
80        struct qinst *qinst = vir_add_inst(V3D_QPU_A_NOP, undef, undef, undef);
81
82        return qinst;
83}
84
85static struct qinst *
86new_qpu_nop_before(struct qinst *inst)
87{
88        struct qinst *q = vir_nop();
89
90        list_addtail(&q->link, &inst->link);
91
92        return q;
93}
94
95static void
96new_ldunif_instr(struct qinst *inst, int i)
97{
98        struct qinst *ldunif = new_qpu_nop_before(inst);
99
100        ldunif->qpu.sig.ldunif = true;
101        assert(inst->src[i].file == QFILE_UNIF);
102        ldunif->uniform = inst->src[i].index;
103}
104
105/**
106 * Allocates the src register (accumulator or register file) into the RADDR
107 * fields of the instruction.
108 */
109static void
110set_src(struct v3d_qpu_instr *instr, enum v3d_qpu_mux *mux, struct qpu_reg src)
111{
112        if (src.smimm) {
113                assert(instr->sig.small_imm);
114                *mux = V3D_QPU_MUX_B;
115                return;
116        }
117
118        if (src.magic) {
119                assert(src.index >= V3D_QPU_WADDR_R0 &&
120                       src.index <= V3D_QPU_WADDR_R5);
121                *mux = src.index - V3D_QPU_WADDR_R0 + V3D_QPU_MUX_R0;
122                return;
123        }
124
125        if (instr->alu.add.a != V3D_QPU_MUX_A &&
126            instr->alu.add.b != V3D_QPU_MUX_A &&
127            instr->alu.mul.a != V3D_QPU_MUX_A &&
128            instr->alu.mul.b != V3D_QPU_MUX_A) {
129                instr->raddr_a = src.index;
130                *mux = V3D_QPU_MUX_A;
131        } else {
132                if (instr->raddr_a == src.index) {
133                        *mux = V3D_QPU_MUX_A;
134                } else {
135                        assert(!(instr->alu.add.a == V3D_QPU_MUX_B &&
136                                 instr->alu.add.b == V3D_QPU_MUX_B &&
137                                 instr->alu.mul.a == V3D_QPU_MUX_B &&
138                                 instr->alu.mul.b == V3D_QPU_MUX_B) ||
139                               src.index == instr->raddr_b);
140
141                        instr->raddr_b = src.index;
142                        *mux = V3D_QPU_MUX_B;
143                }
144        }
145}
146
147static bool
148is_no_op_mov(struct qinst *qinst)
149{
150        static const struct v3d_qpu_sig no_sig = {0};
151
152        /* Make sure it's just a lone MOV. */
153        if (qinst->qpu.type != V3D_QPU_INSTR_TYPE_ALU ||
154            qinst->qpu.alu.mul.op != V3D_QPU_M_MOV ||
155            qinst->qpu.alu.add.op != V3D_QPU_A_NOP ||
156            memcmp(&qinst->qpu.sig, &no_sig, sizeof(no_sig)) != 0) {
157                return false;
158        }
159
160        /* Check if it's a MOV from a register to itself. */
161        enum v3d_qpu_waddr waddr = qinst->qpu.alu.mul.waddr;
162        if (qinst->qpu.alu.mul.magic_write) {
163                if (waddr < V3D_QPU_WADDR_R0 || waddr > V3D_QPU_WADDR_R4)
164                        return false;
165
166                if (qinst->qpu.alu.mul.a !=
167                    V3D_QPU_MUX_R0 + (waddr - V3D_QPU_WADDR_R0)) {
168                        return false;
169                }
170        } else {
171                int raddr;
172
173                switch (qinst->qpu.alu.mul.a) {
174                case V3D_QPU_MUX_A:
175                        raddr = qinst->qpu.raddr_a;
176                        break;
177                case V3D_QPU_MUX_B:
178                        raddr = qinst->qpu.raddr_b;
179                        break;
180                default:
181                        return false;
182                }
183                if (raddr != waddr)
184                        return false;
185        }
186
187        /* No packing or flags updates, or we need to execute the
188         * instruction.
189         */
190        if (qinst->qpu.alu.mul.a_unpack != V3D_QPU_UNPACK_NONE ||
191            qinst->qpu.alu.mul.output_pack != V3D_QPU_PACK_NONE ||
192            qinst->qpu.flags.mc != V3D_QPU_COND_NONE ||
193            qinst->qpu.flags.mpf != V3D_QPU_PF_NONE ||
194            qinst->qpu.flags.muf != V3D_QPU_UF_NONE) {
195                return false;
196        }
197
198        return true;
199}
200
201static void
202v3d_generate_code_block(struct v3d_compile *c,
203                        struct qblock *block,
204                        struct qpu_reg *temp_registers)
205{
206        int last_vpm_read_index = -1;
207
208        vir_for_each_inst_safe(qinst, block) {
209#if 0
210                fprintf(stderr, "translating qinst to qpu: ");
211                vir_dump_inst(c, qinst);
212                fprintf(stderr, "\n");
213#endif
214
215                struct qinst *temp;
216
217                if (vir_has_implicit_uniform(qinst)) {
218                        int src = vir_get_implicit_uniform_src(qinst);
219                        assert(qinst->src[src].file == QFILE_UNIF);
220                        qinst->uniform = qinst->src[src].index;
221                        c->num_uniforms++;
222                }
223
224                int nsrc = vir_get_non_sideband_nsrc(qinst);
225                struct qpu_reg src[ARRAY_SIZE(qinst->src)];
226                bool emitted_ldunif = false;
227                for (int i = 0; i < nsrc; i++) {
228                        int index = qinst->src[i].index;
229                        switch (qinst->src[i].file) {
230                        case QFILE_REG:
231                                src[i] = qpu_reg(qinst->src[i].index);
232                                break;
233                        case QFILE_MAGIC:
234                                src[i] = qpu_magic(qinst->src[i].index);
235                                break;
236                        case QFILE_NULL:
237                        case QFILE_LOAD_IMM:
238                                src[i] = qpu_acc(0);
239                                break;
240                        case QFILE_TEMP:
241                                src[i] = temp_registers[index];
242                                break;
243                        case QFILE_UNIF:
244                                if (!emitted_ldunif) {
245                                        new_ldunif_instr(qinst, i);
246                                        c->num_uniforms++;
247                                        emitted_ldunif = true;
248                                }
249
250                                src[i] = qpu_acc(5);
251                                break;
252                        case QFILE_SMALL_IMM:
253                                src[i].smimm = true;
254                                break;
255
256                        case QFILE_VPM:
257                                assert((int)qinst->src[i].index >=
258                                       last_vpm_read_index);
259                                (void)last_vpm_read_index;
260                                last_vpm_read_index = qinst->src[i].index;
261
262                                temp = new_qpu_nop_before(qinst);
263                                temp->qpu.sig.ldvpm = true;
264
265                                src[i] = qpu_acc(3);
266                                break;
267
268                        case QFILE_TLB:
269                        case QFILE_TLBU:
270                                unreachable("bad vir src file");
271                        }
272                }
273
274                struct qpu_reg dst;
275                switch (qinst->dst.file) {
276                case QFILE_NULL:
277                        dst = qpu_magic(V3D_QPU_WADDR_NOP);
278                        break;
279
280                case QFILE_REG:
281                        dst = qpu_reg(qinst->dst.index);
282                        break;
283
284                case QFILE_MAGIC:
285                        dst = qpu_magic(qinst->dst.index);
286                        break;
287
288                case QFILE_TEMP:
289                        dst = temp_registers[qinst->dst.index];
290                        break;
291
292                case QFILE_VPM:
293                        dst = qpu_magic(V3D_QPU_WADDR_VPM);
294                        break;
295
296                case QFILE_TLB:
297                        dst = qpu_magic(V3D_QPU_WADDR_TLB);
298                        break;
299
300                case QFILE_TLBU:
301                        dst = qpu_magic(V3D_QPU_WADDR_TLBU);
302                        break;
303
304                case QFILE_UNIF:
305                case QFILE_SMALL_IMM:
306                case QFILE_LOAD_IMM:
307                        assert(!"not reached");
308                        break;
309                }
310
311                if (qinst->qpu.type == V3D_QPU_INSTR_TYPE_ALU) {
312                        if (v3d_qpu_sig_writes_address(c->devinfo,
313                                                       &qinst->qpu.sig)) {
314                                assert(qinst->qpu.alu.add.op == V3D_QPU_A_NOP);
315                                assert(qinst->qpu.alu.mul.op == V3D_QPU_M_NOP);
316
317                                qinst->qpu.sig_addr = dst.index;
318                                qinst->qpu.sig_magic = dst.magic;
319                        } else if (qinst->qpu.alu.add.op != V3D_QPU_A_NOP) {
320                                assert(qinst->qpu.alu.mul.op == V3D_QPU_M_NOP);
321                                if (nsrc >= 1) {
322                                        set_src(&qinst->qpu,
323                                                &qinst->qpu.alu.add.a, src[0]);
324                                }
325                                if (nsrc >= 2) {
326                                        set_src(&qinst->qpu,
327                                                &qinst->qpu.alu.add.b, src[1]);
328                                }
329
330                                qinst->qpu.alu.add.waddr = dst.index;
331                                qinst->qpu.alu.add.magic_write = dst.magic;
332                        } else {
333                                if (nsrc >= 1) {
334                                        set_src(&qinst->qpu,
335                                                &qinst->qpu.alu.mul.a, src[0]);
336                                }
337                                if (nsrc >= 2) {
338                                        set_src(&qinst->qpu,
339                                                &qinst->qpu.alu.mul.b, src[1]);
340                                }
341
342                                qinst->qpu.alu.mul.waddr = dst.index;
343                                qinst->qpu.alu.mul.magic_write = dst.magic;
344
345                                if (is_no_op_mov(qinst)) {
346                                        vir_remove_instruction(c, qinst);
347                                        continue;
348                                }
349                        }
350                } else {
351                        assert(qinst->qpu.type == V3D_QPU_INSTR_TYPE_BRANCH);
352                }
353        }
354}
355
356
357static void
358v3d_dump_qpu(struct v3d_compile *c)
359{
360        fprintf(stderr, "%s prog %d/%d QPU:\n",
361                vir_get_stage_name(c),
362                c->program_id, c->variant_id);
363
364        for (int i = 0; i < c->qpu_inst_count; i++) {
365                const char *str = v3d_qpu_disasm(c->devinfo, c->qpu_insts[i]);
366                fprintf(stderr, "0x%016"PRIx64" %s\n", c->qpu_insts[i], str);
367                ralloc_free((void *)str);
368        }
369        fprintf(stderr, "\n");
370}
371
372void
373v3d_vir_to_qpu(struct v3d_compile *c, struct qpu_reg *temp_registers)
374{
375        /* Reset the uniform count to how many will be actually loaded by the
376         * generated QPU code.
377         */
378        c->num_uniforms = 0;
379
380        vir_for_each_block(block, c)
381                v3d_generate_code_block(c, block, temp_registers);
382
383        uint32_t cycles = v3d_qpu_schedule_instructions(c);
384
385        c->qpu_insts = rzalloc_array(c, uint64_t, c->qpu_inst_count);
386        int i = 0;
387        vir_for_each_inst_inorder(inst, c) {
388                bool ok = v3d_qpu_instr_pack(c->devinfo, &inst->qpu,
389                                             &c->qpu_insts[i++]);
390                if (!ok) {
391                        fprintf(stderr, "Failed to pack instruction:\n");
392                        vir_dump_inst(c, inst);
393                        fprintf(stderr, "\n");
394                        c->failed = true;
395                        return;
396                }
397        }
398        assert(i == c->qpu_inst_count);
399
400        if (V3D_DEBUG & V3D_DEBUG_SHADERDB) {
401                fprintf(stderr, "SHADER-DB: %s prog %d/%d: %d instructions\n",
402                        vir_get_stage_name(c),
403                        c->program_id, c->variant_id,
404                        c->qpu_inst_count);
405        }
406
407        /* The QPU cycle estimates are pretty broken (see waddr_latency()), so
408         * don't report them for now.
409         */
410        if (false) {
411                fprintf(stderr, "SHADER-DB: %s prog %d/%d: %d estimated cycles\n",
412                        vir_get_stage_name(c),
413                        c->program_id, c->variant_id,
414                        cycles);
415        }
416
417        if (V3D_DEBUG & (V3D_DEBUG_QPU |
418                         v3d_debug_flag_for_shader_stage(c->s->info.stage))) {
419                v3d_dump_qpu(c);
420        }
421
422        qpu_validate(c);
423
424        free(temp_registers);
425}
426