1/*
2 * Copyright © 2016 Broadcom
3 *
4 * Permission is hereby granted, free of charge, to any person obtaining a
5 * copy of this software and associated documentation files (the "Software"),
6 * to deal in the Software without restriction, including without limitation
7 * the rights to use, copy, modify, merge, publish, distribute, sublicense,
8 * and/or sell copies of the Software, and to permit persons to whom the
9 * Software is furnished to do so, subject to the following conditions:
10 *
11 * The above copyright notice and this permission notice (including the next
12 * paragraph) shall be included in all copies or substantial portions of the
13 * Software.
14 *
15 * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
16 * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
17 * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT.  IN NO EVENT SHALL
18 * THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
19 * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING
20 * FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS
21 * IN THE SOFTWARE.
22 */
23
24#include "compiler/v3d_compiler.h"
25#include "qpu/qpu_instr.h"
26#include "qpu/qpu_disasm.h"
27
28static inline struct qpu_reg
29qpu_reg(int index)
30{
31        struct qpu_reg reg = {
32                .magic = false,
33                .index = index,
34        };
35        return reg;
36}
37
38static inline struct qpu_reg
39qpu_magic(enum v3d_qpu_waddr waddr)
40{
41        struct qpu_reg reg = {
42                .magic = true,
43                .index = waddr,
44        };
45        return reg;
46}
47
48static inline struct qpu_reg
49qpu_acc(int acc)
50{
51        return qpu_magic(V3D_QPU_WADDR_R0 + acc);
52}
53
54struct v3d_qpu_instr
55v3d_qpu_nop(void)
56{
57        struct v3d_qpu_instr instr = {
58                .type = V3D_QPU_INSTR_TYPE_ALU,
59                .alu = {
60                        .add = {
61                                .op = V3D_QPU_A_NOP,
62                                .waddr = V3D_QPU_WADDR_NOP,
63                                .magic_write = true,
64                        },
65                        .mul = {
66                                .op = V3D_QPU_M_NOP,
67                                .waddr = V3D_QPU_WADDR_NOP,
68                                .magic_write = true,
69                        },
70                }
71        };
72
73        return instr;
74}
75
76static struct qinst *
77vir_nop(void)
78{
79        struct qreg undef = vir_nop_reg();
80        struct qinst *qinst = vir_add_inst(V3D_QPU_A_NOP, undef, undef, undef);
81
82        return qinst;
83}
84
85static struct qinst *
86new_qpu_nop_before(struct qinst *inst)
87{
88        struct qinst *q = vir_nop();
89
90        list_addtail(&q->link, &inst->link);
91
92        return q;
93}
94
95/**
96 * Allocates the src register (accumulator or register file) into the RADDR
97 * fields of the instruction.
98 */
99static void
100set_src(struct v3d_qpu_instr *instr, enum v3d_qpu_mux *mux, struct qpu_reg src)
101{
102        if (src.smimm) {
103                assert(instr->sig.small_imm);
104                *mux = V3D_QPU_MUX_B;
105                return;
106        }
107
108        if (src.magic) {
109                assert(src.index >= V3D_QPU_WADDR_R0 &&
110                       src.index <= V3D_QPU_WADDR_R5);
111                *mux = src.index - V3D_QPU_WADDR_R0 + V3D_QPU_MUX_R0;
112                return;
113        }
114
115        if (instr->alu.add.a != V3D_QPU_MUX_A &&
116            instr->alu.add.b != V3D_QPU_MUX_A &&
117            instr->alu.mul.a != V3D_QPU_MUX_A &&
118            instr->alu.mul.b != V3D_QPU_MUX_A) {
119                instr->raddr_a = src.index;
120                *mux = V3D_QPU_MUX_A;
121        } else {
122                if (instr->raddr_a == src.index) {
123                        *mux = V3D_QPU_MUX_A;
124                } else {
125                        assert(!(instr->alu.add.a == V3D_QPU_MUX_B &&
126                                 instr->alu.add.b == V3D_QPU_MUX_B &&
127                                 instr->alu.mul.a == V3D_QPU_MUX_B &&
128                                 instr->alu.mul.b == V3D_QPU_MUX_B) ||
129                               src.index == instr->raddr_b);
130
131                        instr->raddr_b = src.index;
132                        *mux = V3D_QPU_MUX_B;
133                }
134        }
135}
136
137static bool
138is_no_op_mov(struct qinst *qinst)
139{
140        static const struct v3d_qpu_sig no_sig = {0};
141
142        /* Make sure it's just a lone MOV. */
143        if (qinst->qpu.type != V3D_QPU_INSTR_TYPE_ALU ||
144            qinst->qpu.alu.mul.op != V3D_QPU_M_MOV ||
145            qinst->qpu.alu.add.op != V3D_QPU_A_NOP ||
146            memcmp(&qinst->qpu.sig, &no_sig, sizeof(no_sig)) != 0) {
147                return false;
148        }
149
150        /* Check if it's a MOV from a register to itself. */
151        enum v3d_qpu_waddr waddr = qinst->qpu.alu.mul.waddr;
152        if (qinst->qpu.alu.mul.magic_write) {
153                if (waddr < V3D_QPU_WADDR_R0 || waddr > V3D_QPU_WADDR_R4)
154                        return false;
155
156                if (qinst->qpu.alu.mul.a !=
157                    V3D_QPU_MUX_R0 + (waddr - V3D_QPU_WADDR_R0)) {
158                        return false;
159                }
160        } else {
161                int raddr;
162
163                switch (qinst->qpu.alu.mul.a) {
164                case V3D_QPU_MUX_A:
165                        raddr = qinst->qpu.raddr_a;
166                        break;
167                case V3D_QPU_MUX_B:
168                        raddr = qinst->qpu.raddr_b;
169                        break;
170                default:
171                        return false;
172                }
173                if (raddr != waddr)
174                        return false;
175        }
176
177        /* No packing or flags updates, or we need to execute the
178         * instruction.
179         */
180        if (qinst->qpu.alu.mul.a_unpack != V3D_QPU_UNPACK_NONE ||
181            qinst->qpu.alu.mul.output_pack != V3D_QPU_PACK_NONE ||
182            qinst->qpu.flags.mc != V3D_QPU_COND_NONE ||
183            qinst->qpu.flags.mpf != V3D_QPU_PF_NONE ||
184            qinst->qpu.flags.muf != V3D_QPU_UF_NONE) {
185                return false;
186        }
187
188        return true;
189}
190
191static void
192v3d_generate_code_block(struct v3d_compile *c,
193                        struct qblock *block,
194                        struct qpu_reg *temp_registers)
195{
196        int last_vpm_read_index = -1;
197
198        vir_for_each_inst_safe(qinst, block) {
199#if 0
200                fprintf(stderr, "translating qinst to qpu: ");
201                vir_dump_inst(c, qinst);
202                fprintf(stderr, "\n");
203#endif
204
205                struct qinst *temp;
206
207                if (vir_has_uniform(qinst))
208                        c->num_uniforms++;
209
210                int nsrc = vir_get_nsrc(qinst);
211                struct qpu_reg src[ARRAY_SIZE(qinst->src)];
212                for (int i = 0; i < nsrc; i++) {
213                        int index = qinst->src[i].index;
214                        switch (qinst->src[i].file) {
215                        case QFILE_REG:
216                                src[i] = qpu_reg(qinst->src[i].index);
217                                break;
218                        case QFILE_MAGIC:
219                                src[i] = qpu_magic(qinst->src[i].index);
220                                break;
221                        case QFILE_NULL:
222                        case QFILE_LOAD_IMM:
223                                src[i] = qpu_acc(0);
224                                break;
225                        case QFILE_TEMP:
226                                src[i] = temp_registers[index];
227                                break;
228                        case QFILE_SMALL_IMM:
229                                src[i].smimm = true;
230                                break;
231
232                        case QFILE_VPM:
233                                assert((int)qinst->src[i].index >=
234                                       last_vpm_read_index);
235                                (void)last_vpm_read_index;
236                                last_vpm_read_index = qinst->src[i].index;
237
238                                temp = new_qpu_nop_before(qinst);
239                                temp->qpu.sig.ldvpm = true;
240
241                                src[i] = qpu_acc(3);
242                                break;
243                        }
244                }
245
246                struct qpu_reg dst;
247                switch (qinst->dst.file) {
248                case QFILE_NULL:
249                        dst = qpu_magic(V3D_QPU_WADDR_NOP);
250                        break;
251
252                case QFILE_REG:
253                        dst = qpu_reg(qinst->dst.index);
254                        break;
255
256                case QFILE_MAGIC:
257                        dst = qpu_magic(qinst->dst.index);
258                        break;
259
260                case QFILE_TEMP:
261                        dst = temp_registers[qinst->dst.index];
262                        break;
263
264                case QFILE_VPM:
265                        dst = qpu_magic(V3D_QPU_WADDR_VPM);
266                        break;
267
268                case QFILE_SMALL_IMM:
269                case QFILE_LOAD_IMM:
270                        assert(!"not reached");
271                        break;
272                }
273
274                if (qinst->qpu.type == V3D_QPU_INSTR_TYPE_ALU) {
275                        if (qinst->qpu.sig.ldunif) {
276                                assert(qinst->qpu.alu.add.op == V3D_QPU_A_NOP);
277                                assert(qinst->qpu.alu.mul.op == V3D_QPU_M_NOP);
278
279                                if (!dst.magic ||
280                                    dst.index != V3D_QPU_WADDR_R5) {
281                                        assert(c->devinfo->ver >= 40);
282
283                                        qinst->qpu.sig.ldunif = false;
284                                        qinst->qpu.sig.ldunifrf = true;
285                                        qinst->qpu.sig_addr = dst.index;
286                                        qinst->qpu.sig_magic = dst.magic;
287                                }
288                        } else if (v3d_qpu_sig_writes_address(c->devinfo,
289                                                       &qinst->qpu.sig)) {
290                                assert(qinst->qpu.alu.add.op == V3D_QPU_A_NOP);
291                                assert(qinst->qpu.alu.mul.op == V3D_QPU_M_NOP);
292
293                                qinst->qpu.sig_addr = dst.index;
294                                qinst->qpu.sig_magic = dst.magic;
295                        } else if (qinst->qpu.alu.add.op != V3D_QPU_A_NOP) {
296                                assert(qinst->qpu.alu.mul.op == V3D_QPU_M_NOP);
297                                if (nsrc >= 1) {
298                                        set_src(&qinst->qpu,
299                                                &qinst->qpu.alu.add.a, src[0]);
300                                }
301                                if (nsrc >= 2) {
302                                        set_src(&qinst->qpu,
303                                                &qinst->qpu.alu.add.b, src[1]);
304                                }
305
306                                qinst->qpu.alu.add.waddr = dst.index;
307                                qinst->qpu.alu.add.magic_write = dst.magic;
308                        } else {
309                                if (nsrc >= 1) {
310                                        set_src(&qinst->qpu,
311                                                &qinst->qpu.alu.mul.a, src[0]);
312                                }
313                                if (nsrc >= 2) {
314                                        set_src(&qinst->qpu,
315                                                &qinst->qpu.alu.mul.b, src[1]);
316                                }
317
318                                qinst->qpu.alu.mul.waddr = dst.index;
319                                qinst->qpu.alu.mul.magic_write = dst.magic;
320
321                                if (is_no_op_mov(qinst)) {
322                                        vir_remove_instruction(c, qinst);
323                                        continue;
324                                }
325                        }
326                } else {
327                        assert(qinst->qpu.type == V3D_QPU_INSTR_TYPE_BRANCH);
328                }
329        }
330}
331
332static bool
333reads_uniform(const struct v3d_device_info *devinfo, uint64_t instruction)
334{
335        struct v3d_qpu_instr qpu;
336        MAYBE_UNUSED bool ok = v3d_qpu_instr_unpack(devinfo, instruction, &qpu);
337        assert(ok);
338
339        if (qpu.sig.ldunif ||
340            qpu.sig.ldunifrf ||
341            qpu.sig.wrtmuc) {
342                return true;
343        }
344
345        if (qpu.type == V3D_QPU_INSTR_TYPE_BRANCH)
346                return true;
347
348        if (qpu.type == V3D_QPU_INSTR_TYPE_ALU) {
349                if (qpu.alu.add.magic_write &&
350                    v3d_qpu_magic_waddr_loads_unif(qpu.alu.add.waddr)) {
351                        return true;
352                }
353
354                if (qpu.alu.mul.magic_write &&
355                    v3d_qpu_magic_waddr_loads_unif(qpu.alu.mul.waddr)) {
356                        return true;
357                }
358        }
359
360        return false;
361}
362
363static void
364v3d_dump_qpu(struct v3d_compile *c)
365{
366        fprintf(stderr, "%s prog %d/%d QPU:\n",
367                vir_get_stage_name(c),
368                c->program_id, c->variant_id);
369
370        int next_uniform = 0;
371        for (int i = 0; i < c->qpu_inst_count; i++) {
372                const char *str = v3d_qpu_disasm(c->devinfo, c->qpu_insts[i]);
373                fprintf(stderr, "0x%016"PRIx64" %s", c->qpu_insts[i], str);
374
375                /* We can only do this on 4.x, because we're not tracking TMU
376                 * implicit uniforms here on 3.x.
377                 */
378                if (c->devinfo->ver >= 40 &&
379                    reads_uniform(c->devinfo, c->qpu_insts[i])) {
380                        fprintf(stderr, " (");
381                        vir_dump_uniform(c->uniform_contents[next_uniform],
382                                         c->uniform_data[next_uniform]);
383                        fprintf(stderr, ")");
384                        next_uniform++;
385                }
386                fprintf(stderr, "\n");
387                ralloc_free((void *)str);
388        }
389
390        /* Make sure our dumping lined up. */
391        if (c->devinfo->ver >= 40)
392                assert(next_uniform == c->num_uniforms);
393
394        fprintf(stderr, "\n");
395}
396
397void
398v3d_vir_to_qpu(struct v3d_compile *c, struct qpu_reg *temp_registers)
399{
400        /* Reset the uniform count to how many will be actually loaded by the
401         * generated QPU code.
402         */
403        c->num_uniforms = 0;
404
405        vir_for_each_block(block, c)
406                v3d_generate_code_block(c, block, temp_registers);
407
408        v3d_qpu_schedule_instructions(c);
409
410        c->qpu_insts = rzalloc_array(c, uint64_t, c->qpu_inst_count);
411        int i = 0;
412        vir_for_each_inst_inorder(inst, c) {
413                bool ok = v3d_qpu_instr_pack(c->devinfo, &inst->qpu,
414                                             &c->qpu_insts[i++]);
415                if (!ok) {
416                        fprintf(stderr, "Failed to pack instruction:\n");
417                        vir_dump_inst(c, inst);
418                        fprintf(stderr, "\n");
419                        c->failed = true;
420                        return;
421                }
422        }
423        assert(i == c->qpu_inst_count);
424
425        if (V3D_DEBUG & (V3D_DEBUG_QPU |
426                         v3d_debug_flag_for_shader_stage(c->s->info.stage))) {
427                v3d_dump_qpu(c);
428        }
429
430        qpu_validate(c);
431
432        free(temp_registers);
433}
434