101e04c3fSmrg/*
201e04c3fSmrg * Copyright © 2016 Broadcom
301e04c3fSmrg *
401e04c3fSmrg * Permission is hereby granted, free of charge, to any person obtaining a
501e04c3fSmrg * copy of this software and associated documentation files (the "Software"),
601e04c3fSmrg * to deal in the Software without restriction, including without limitation
701e04c3fSmrg * the rights to use, copy, modify, merge, publish, distribute, sublicense,
801e04c3fSmrg * and/or sell copies of the Software, and to permit persons to whom the
901e04c3fSmrg * Software is furnished to do so, subject to the following conditions:
1001e04c3fSmrg *
1101e04c3fSmrg * The above copyright notice and this permission notice (including the next
1201e04c3fSmrg * paragraph) shall be included in all copies or substantial portions of the
1301e04c3fSmrg * Software.
1401e04c3fSmrg *
1501e04c3fSmrg * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
1601e04c3fSmrg * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
1701e04c3fSmrg * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT.  IN NO EVENT SHALL
1801e04c3fSmrg * THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
1901e04c3fSmrg * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING
2001e04c3fSmrg * FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS
2101e04c3fSmrg * IN THE SOFTWARE.
2201e04c3fSmrg */
2301e04c3fSmrg
2401e04c3fSmrg#include "compiler/v3d_compiler.h"
2501e04c3fSmrg#include "qpu/qpu_instr.h"
2601e04c3fSmrg#include "qpu/qpu_disasm.h"
2701e04c3fSmrg
2801e04c3fSmrgstatic inline struct qpu_reg
2901e04c3fSmrgqpu_reg(int index)
3001e04c3fSmrg{
3101e04c3fSmrg        struct qpu_reg reg = {
3201e04c3fSmrg                .magic = false,
3301e04c3fSmrg                .index = index,
3401e04c3fSmrg        };
3501e04c3fSmrg        return reg;
3601e04c3fSmrg}
3701e04c3fSmrg
3801e04c3fSmrgstatic inline struct qpu_reg
3901e04c3fSmrgqpu_magic(enum v3d_qpu_waddr waddr)
4001e04c3fSmrg{
4101e04c3fSmrg        struct qpu_reg reg = {
4201e04c3fSmrg                .magic = true,
4301e04c3fSmrg                .index = waddr,
4401e04c3fSmrg        };
4501e04c3fSmrg        return reg;
4601e04c3fSmrg}
4701e04c3fSmrg
4801e04c3fSmrgstruct v3d_qpu_instr
4901e04c3fSmrgv3d_qpu_nop(void)
5001e04c3fSmrg{
5101e04c3fSmrg        struct v3d_qpu_instr instr = {
5201e04c3fSmrg                .type = V3D_QPU_INSTR_TYPE_ALU,
5301e04c3fSmrg                .alu = {
5401e04c3fSmrg                        .add = {
5501e04c3fSmrg                                .op = V3D_QPU_A_NOP,
5601e04c3fSmrg                                .waddr = V3D_QPU_WADDR_NOP,
5701e04c3fSmrg                                .magic_write = true,
5801e04c3fSmrg                        },
5901e04c3fSmrg                        .mul = {
6001e04c3fSmrg                                .op = V3D_QPU_M_NOP,
6101e04c3fSmrg                                .waddr = V3D_QPU_WADDR_NOP,
6201e04c3fSmrg                                .magic_write = true,
6301e04c3fSmrg                        },
6401e04c3fSmrg                }
6501e04c3fSmrg        };
6601e04c3fSmrg
6701e04c3fSmrg        return instr;
6801e04c3fSmrg}
6901e04c3fSmrg
7001e04c3fSmrgstatic struct qinst *
7101e04c3fSmrgvir_nop(void)
7201e04c3fSmrg{
73ed98bd31Smaya        struct qreg undef = vir_nop_reg();
7401e04c3fSmrg        struct qinst *qinst = vir_add_inst(V3D_QPU_A_NOP, undef, undef, undef);
7501e04c3fSmrg
7601e04c3fSmrg        return qinst;
7701e04c3fSmrg}
7801e04c3fSmrg
7901e04c3fSmrgstatic struct qinst *
8001e04c3fSmrgnew_qpu_nop_before(struct qinst *inst)
8101e04c3fSmrg{
8201e04c3fSmrg        struct qinst *q = vir_nop();
8301e04c3fSmrg
8401e04c3fSmrg        list_addtail(&q->link, &inst->link);
8501e04c3fSmrg
8601e04c3fSmrg        return q;
8701e04c3fSmrg}
8801e04c3fSmrg
8901e04c3fSmrg/**
9001e04c3fSmrg * Allocates the src register (accumulator or register file) into the RADDR
9101e04c3fSmrg * fields of the instruction.
9201e04c3fSmrg */
9301e04c3fSmrgstatic void
9401e04c3fSmrgset_src(struct v3d_qpu_instr *instr, enum v3d_qpu_mux *mux, struct qpu_reg src)
9501e04c3fSmrg{
9601e04c3fSmrg        if (src.smimm) {
9701e04c3fSmrg                assert(instr->sig.small_imm);
9801e04c3fSmrg                *mux = V3D_QPU_MUX_B;
9901e04c3fSmrg                return;
10001e04c3fSmrg        }
10101e04c3fSmrg
10201e04c3fSmrg        if (src.magic) {
10301e04c3fSmrg                assert(src.index >= V3D_QPU_WADDR_R0 &&
10401e04c3fSmrg                       src.index <= V3D_QPU_WADDR_R5);
10501e04c3fSmrg                *mux = src.index - V3D_QPU_WADDR_R0 + V3D_QPU_MUX_R0;
10601e04c3fSmrg                return;
10701e04c3fSmrg        }
10801e04c3fSmrg
10901e04c3fSmrg        if (instr->alu.add.a != V3D_QPU_MUX_A &&
11001e04c3fSmrg            instr->alu.add.b != V3D_QPU_MUX_A &&
11101e04c3fSmrg            instr->alu.mul.a != V3D_QPU_MUX_A &&
11201e04c3fSmrg            instr->alu.mul.b != V3D_QPU_MUX_A) {
11301e04c3fSmrg                instr->raddr_a = src.index;
11401e04c3fSmrg                *mux = V3D_QPU_MUX_A;
11501e04c3fSmrg        } else {
11601e04c3fSmrg                if (instr->raddr_a == src.index) {
11701e04c3fSmrg                        *mux = V3D_QPU_MUX_A;
11801e04c3fSmrg                } else {
11901e04c3fSmrg                        assert(!(instr->alu.add.a == V3D_QPU_MUX_B &&
12001e04c3fSmrg                                 instr->alu.add.b == V3D_QPU_MUX_B &&
12101e04c3fSmrg                                 instr->alu.mul.a == V3D_QPU_MUX_B &&
12201e04c3fSmrg                                 instr->alu.mul.b == V3D_QPU_MUX_B) ||
12301e04c3fSmrg                               src.index == instr->raddr_b);
12401e04c3fSmrg
12501e04c3fSmrg                        instr->raddr_b = src.index;
12601e04c3fSmrg                        *mux = V3D_QPU_MUX_B;
12701e04c3fSmrg                }
12801e04c3fSmrg        }
12901e04c3fSmrg}
13001e04c3fSmrg
13101e04c3fSmrgstatic bool
13201e04c3fSmrgis_no_op_mov(struct qinst *qinst)
13301e04c3fSmrg{
13401e04c3fSmrg        static const struct v3d_qpu_sig no_sig = {0};
13501e04c3fSmrg
13601e04c3fSmrg        /* Make sure it's just a lone MOV. */
13701e04c3fSmrg        if (qinst->qpu.type != V3D_QPU_INSTR_TYPE_ALU ||
13801e04c3fSmrg            qinst->qpu.alu.mul.op != V3D_QPU_M_MOV ||
13901e04c3fSmrg            qinst->qpu.alu.add.op != V3D_QPU_A_NOP ||
14001e04c3fSmrg            memcmp(&qinst->qpu.sig, &no_sig, sizeof(no_sig)) != 0) {
14101e04c3fSmrg                return false;
14201e04c3fSmrg        }
14301e04c3fSmrg
14401e04c3fSmrg        /* Check if it's a MOV from a register to itself. */
14501e04c3fSmrg        enum v3d_qpu_waddr waddr = qinst->qpu.alu.mul.waddr;
14601e04c3fSmrg        if (qinst->qpu.alu.mul.magic_write) {
14701e04c3fSmrg                if (waddr < V3D_QPU_WADDR_R0 || waddr > V3D_QPU_WADDR_R4)
14801e04c3fSmrg                        return false;
14901e04c3fSmrg
15001e04c3fSmrg                if (qinst->qpu.alu.mul.a !=
15101e04c3fSmrg                    V3D_QPU_MUX_R0 + (waddr - V3D_QPU_WADDR_R0)) {
15201e04c3fSmrg                        return false;
15301e04c3fSmrg                }
15401e04c3fSmrg        } else {
15501e04c3fSmrg                int raddr;
15601e04c3fSmrg
15701e04c3fSmrg                switch (qinst->qpu.alu.mul.a) {
15801e04c3fSmrg                case V3D_QPU_MUX_A:
15901e04c3fSmrg                        raddr = qinst->qpu.raddr_a;
16001e04c3fSmrg                        break;
16101e04c3fSmrg                case V3D_QPU_MUX_B:
16201e04c3fSmrg                        raddr = qinst->qpu.raddr_b;
16301e04c3fSmrg                        break;
16401e04c3fSmrg                default:
16501e04c3fSmrg                        return false;
16601e04c3fSmrg                }
16701e04c3fSmrg                if (raddr != waddr)
16801e04c3fSmrg                        return false;
16901e04c3fSmrg        }
17001e04c3fSmrg
17101e04c3fSmrg        /* No packing or flags updates, or we need to execute the
17201e04c3fSmrg         * instruction.
17301e04c3fSmrg         */
17401e04c3fSmrg        if (qinst->qpu.alu.mul.a_unpack != V3D_QPU_UNPACK_NONE ||
17501e04c3fSmrg            qinst->qpu.alu.mul.output_pack != V3D_QPU_PACK_NONE ||
17601e04c3fSmrg            qinst->qpu.flags.mc != V3D_QPU_COND_NONE ||
17701e04c3fSmrg            qinst->qpu.flags.mpf != V3D_QPU_PF_NONE ||
17801e04c3fSmrg            qinst->qpu.flags.muf != V3D_QPU_UF_NONE) {
17901e04c3fSmrg                return false;
18001e04c3fSmrg        }
18101e04c3fSmrg
18201e04c3fSmrg        return true;
18301e04c3fSmrg}
18401e04c3fSmrg
18501e04c3fSmrgstatic void
18601e04c3fSmrgv3d_generate_code_block(struct v3d_compile *c,
18701e04c3fSmrg                        struct qblock *block,
18801e04c3fSmrg                        struct qpu_reg *temp_registers)
18901e04c3fSmrg{
19001e04c3fSmrg        int last_vpm_read_index = -1;
19101e04c3fSmrg
19201e04c3fSmrg        vir_for_each_inst_safe(qinst, block) {
19301e04c3fSmrg#if 0
19401e04c3fSmrg                fprintf(stderr, "translating qinst to qpu: ");
19501e04c3fSmrg                vir_dump_inst(c, qinst);
19601e04c3fSmrg                fprintf(stderr, "\n");
19701e04c3fSmrg#endif
19801e04c3fSmrg
19901e04c3fSmrg                struct qinst *temp;
20001e04c3fSmrg
201ed98bd31Smaya                if (vir_has_uniform(qinst))
20201e04c3fSmrg                        c->num_uniforms++;
20301e04c3fSmrg
204ed98bd31Smaya                int nsrc = vir_get_nsrc(qinst);
20501e04c3fSmrg                struct qpu_reg src[ARRAY_SIZE(qinst->src)];
20601e04c3fSmrg                for (int i = 0; i < nsrc; i++) {
20701e04c3fSmrg                        int index = qinst->src[i].index;
20801e04c3fSmrg                        switch (qinst->src[i].file) {
20901e04c3fSmrg                        case QFILE_REG:
21001e04c3fSmrg                                src[i] = qpu_reg(qinst->src[i].index);
21101e04c3fSmrg                                break;
21201e04c3fSmrg                        case QFILE_MAGIC:
21301e04c3fSmrg                                src[i] = qpu_magic(qinst->src[i].index);
21401e04c3fSmrg                                break;
21501e04c3fSmrg                        case QFILE_NULL:
2167ec681f3Smrg                                /* QFILE_NULL is an undef, so we can load
2177ec681f3Smrg                                 * anything. Using reg 0
2187ec681f3Smrg                                 */
2197ec681f3Smrg                                src[i] = qpu_reg(0);
2207ec681f3Smrg                                break;
22101e04c3fSmrg                        case QFILE_LOAD_IMM:
2227ec681f3Smrg                                assert(!"not reached");
22301e04c3fSmrg                                break;
22401e04c3fSmrg                        case QFILE_TEMP:
22501e04c3fSmrg                                src[i] = temp_registers[index];
22601e04c3fSmrg                                break;
22701e04c3fSmrg                        case QFILE_SMALL_IMM:
22801e04c3fSmrg                                src[i].smimm = true;
22901e04c3fSmrg                                break;
23001e04c3fSmrg
23101e04c3fSmrg                        case QFILE_VPM:
23201e04c3fSmrg                                assert((int)qinst->src[i].index >=
23301e04c3fSmrg                                       last_vpm_read_index);
23401e04c3fSmrg                                (void)last_vpm_read_index;
23501e04c3fSmrg                                last_vpm_read_index = qinst->src[i].index;
23601e04c3fSmrg
23701e04c3fSmrg                                temp = new_qpu_nop_before(qinst);
23801e04c3fSmrg                                temp->qpu.sig.ldvpm = true;
23901e04c3fSmrg
2407ec681f3Smrg                                src[i] = qpu_magic(V3D_QPU_WADDR_R3);
24101e04c3fSmrg                                break;
24201e04c3fSmrg                        }
24301e04c3fSmrg                }
24401e04c3fSmrg
24501e04c3fSmrg                struct qpu_reg dst;
24601e04c3fSmrg                switch (qinst->dst.file) {
24701e04c3fSmrg                case QFILE_NULL:
24801e04c3fSmrg                        dst = qpu_magic(V3D_QPU_WADDR_NOP);
24901e04c3fSmrg                        break;
25001e04c3fSmrg
25101e04c3fSmrg                case QFILE_REG:
25201e04c3fSmrg                        dst = qpu_reg(qinst->dst.index);
25301e04c3fSmrg                        break;
25401e04c3fSmrg
25501e04c3fSmrg                case QFILE_MAGIC:
25601e04c3fSmrg                        dst = qpu_magic(qinst->dst.index);
25701e04c3fSmrg                        break;
25801e04c3fSmrg
25901e04c3fSmrg                case QFILE_TEMP:
26001e04c3fSmrg                        dst = temp_registers[qinst->dst.index];
26101e04c3fSmrg                        break;
26201e04c3fSmrg
26301e04c3fSmrg                case QFILE_VPM:
26401e04c3fSmrg                        dst = qpu_magic(V3D_QPU_WADDR_VPM);
26501e04c3fSmrg                        break;
26601e04c3fSmrg
26701e04c3fSmrg                case QFILE_SMALL_IMM:
26801e04c3fSmrg                case QFILE_LOAD_IMM:
26901e04c3fSmrg                        assert(!"not reached");
27001e04c3fSmrg                        break;
27101e04c3fSmrg                }
27201e04c3fSmrg
27301e04c3fSmrg                if (qinst->qpu.type == V3D_QPU_INSTR_TYPE_ALU) {
2747ec681f3Smrg                        if (qinst->qpu.sig.ldunif || qinst->qpu.sig.ldunifa) {
275ed98bd31Smaya                                assert(qinst->qpu.alu.add.op == V3D_QPU_A_NOP);
276ed98bd31Smaya                                assert(qinst->qpu.alu.mul.op == V3D_QPU_M_NOP);
277ed98bd31Smaya
278ed98bd31Smaya                                if (!dst.magic ||
279ed98bd31Smaya                                    dst.index != V3D_QPU_WADDR_R5) {
280ed98bd31Smaya                                        assert(c->devinfo->ver >= 40);
281ed98bd31Smaya
2827ec681f3Smrg                                        if (qinst->qpu.sig.ldunif) {
2837ec681f3Smrg                                           qinst->qpu.sig.ldunif = false;
2847ec681f3Smrg                                           qinst->qpu.sig.ldunifrf = true;
2857ec681f3Smrg                                        } else {
2867ec681f3Smrg                                           qinst->qpu.sig.ldunifa = false;
2877ec681f3Smrg                                           qinst->qpu.sig.ldunifarf = true;
2887ec681f3Smrg                                        }
289ed98bd31Smaya                                        qinst->qpu.sig_addr = dst.index;
290ed98bd31Smaya                                        qinst->qpu.sig_magic = dst.magic;
291ed98bd31Smaya                                }
292ed98bd31Smaya                        } else if (v3d_qpu_sig_writes_address(c->devinfo,
29301e04c3fSmrg                                                       &qinst->qpu.sig)) {
29401e04c3fSmrg                                assert(qinst->qpu.alu.add.op == V3D_QPU_A_NOP);
29501e04c3fSmrg                                assert(qinst->qpu.alu.mul.op == V3D_QPU_M_NOP);
29601e04c3fSmrg
29701e04c3fSmrg                                qinst->qpu.sig_addr = dst.index;
29801e04c3fSmrg                                qinst->qpu.sig_magic = dst.magic;
29901e04c3fSmrg                        } else if (qinst->qpu.alu.add.op != V3D_QPU_A_NOP) {
30001e04c3fSmrg                                assert(qinst->qpu.alu.mul.op == V3D_QPU_M_NOP);
30101e04c3fSmrg                                if (nsrc >= 1) {
30201e04c3fSmrg                                        set_src(&qinst->qpu,
30301e04c3fSmrg                                                &qinst->qpu.alu.add.a, src[0]);
30401e04c3fSmrg                                }
30501e04c3fSmrg                                if (nsrc >= 2) {
30601e04c3fSmrg                                        set_src(&qinst->qpu,
30701e04c3fSmrg                                                &qinst->qpu.alu.add.b, src[1]);
30801e04c3fSmrg                                }
30901e04c3fSmrg
31001e04c3fSmrg                                qinst->qpu.alu.add.waddr = dst.index;
31101e04c3fSmrg                                qinst->qpu.alu.add.magic_write = dst.magic;
31201e04c3fSmrg                        } else {
31301e04c3fSmrg                                if (nsrc >= 1) {
31401e04c3fSmrg                                        set_src(&qinst->qpu,
31501e04c3fSmrg                                                &qinst->qpu.alu.mul.a, src[0]);
31601e04c3fSmrg                                }
31701e04c3fSmrg                                if (nsrc >= 2) {
31801e04c3fSmrg                                        set_src(&qinst->qpu,
31901e04c3fSmrg                                                &qinst->qpu.alu.mul.b, src[1]);
32001e04c3fSmrg                                }
32101e04c3fSmrg
32201e04c3fSmrg                                qinst->qpu.alu.mul.waddr = dst.index;
32301e04c3fSmrg                                qinst->qpu.alu.mul.magic_write = dst.magic;
32401e04c3fSmrg
32501e04c3fSmrg                                if (is_no_op_mov(qinst)) {
32601e04c3fSmrg                                        vir_remove_instruction(c, qinst);
32701e04c3fSmrg                                        continue;
32801e04c3fSmrg                                }
32901e04c3fSmrg                        }
33001e04c3fSmrg                } else {
33101e04c3fSmrg                        assert(qinst->qpu.type == V3D_QPU_INSTR_TYPE_BRANCH);
33201e04c3fSmrg                }
33301e04c3fSmrg        }
33401e04c3fSmrg}
33501e04c3fSmrg
336ed98bd31Smayastatic bool
337ed98bd31Smayareads_uniform(const struct v3d_device_info *devinfo, uint64_t instruction)
338ed98bd31Smaya{
339ed98bd31Smaya        struct v3d_qpu_instr qpu;
3407ec681f3Smrg        ASSERTED bool ok = v3d_qpu_instr_unpack(devinfo, instruction, &qpu);
341ed98bd31Smaya        assert(ok);
342ed98bd31Smaya
343ed98bd31Smaya        if (qpu.sig.ldunif ||
344ed98bd31Smaya            qpu.sig.ldunifrf ||
3457ec681f3Smrg            qpu.sig.ldtlbu ||
346ed98bd31Smaya            qpu.sig.wrtmuc) {
347ed98bd31Smaya                return true;
348ed98bd31Smaya        }
349ed98bd31Smaya
350ed98bd31Smaya        if (qpu.type == V3D_QPU_INSTR_TYPE_BRANCH)
351ed98bd31Smaya                return true;
352ed98bd31Smaya
353ed98bd31Smaya        if (qpu.type == V3D_QPU_INSTR_TYPE_ALU) {
354ed98bd31Smaya                if (qpu.alu.add.magic_write &&
355ed98bd31Smaya                    v3d_qpu_magic_waddr_loads_unif(qpu.alu.add.waddr)) {
356ed98bd31Smaya                        return true;
357ed98bd31Smaya                }
358ed98bd31Smaya
359ed98bd31Smaya                if (qpu.alu.mul.magic_write &&
360ed98bd31Smaya                    v3d_qpu_magic_waddr_loads_unif(qpu.alu.mul.waddr)) {
361ed98bd31Smaya                        return true;
362ed98bd31Smaya                }
363ed98bd31Smaya        }
364ed98bd31Smaya
365ed98bd31Smaya        return false;
366ed98bd31Smaya}
36701e04c3fSmrg
36801e04c3fSmrgstatic void
36901e04c3fSmrgv3d_dump_qpu(struct v3d_compile *c)
37001e04c3fSmrg{
37101e04c3fSmrg        fprintf(stderr, "%s prog %d/%d QPU:\n",
37201e04c3fSmrg                vir_get_stage_name(c),
37301e04c3fSmrg                c->program_id, c->variant_id);
37401e04c3fSmrg
375ed98bd31Smaya        int next_uniform = 0;
37601e04c3fSmrg        for (int i = 0; i < c->qpu_inst_count; i++) {
37701e04c3fSmrg                const char *str = v3d_qpu_disasm(c->devinfo, c->qpu_insts[i]);
378ed98bd31Smaya                fprintf(stderr, "0x%016"PRIx64" %s", c->qpu_insts[i], str);
379ed98bd31Smaya
380ed98bd31Smaya                /* We can only do this on 4.x, because we're not tracking TMU
381ed98bd31Smaya                 * implicit uniforms here on 3.x.
382ed98bd31Smaya                 */
383ed98bd31Smaya                if (c->devinfo->ver >= 40 &&
384ed98bd31Smaya                    reads_uniform(c->devinfo, c->qpu_insts[i])) {
385ed98bd31Smaya                        fprintf(stderr, " (");
386ed98bd31Smaya                        vir_dump_uniform(c->uniform_contents[next_uniform],
387ed98bd31Smaya                                         c->uniform_data[next_uniform]);
388ed98bd31Smaya                        fprintf(stderr, ")");
389ed98bd31Smaya                        next_uniform++;
390ed98bd31Smaya                }
391ed98bd31Smaya                fprintf(stderr, "\n");
39201e04c3fSmrg                ralloc_free((void *)str);
39301e04c3fSmrg        }
394ed98bd31Smaya
395ed98bd31Smaya        /* Make sure our dumping lined up. */
396ed98bd31Smaya        if (c->devinfo->ver >= 40)
397ed98bd31Smaya                assert(next_uniform == c->num_uniforms);
398ed98bd31Smaya
39901e04c3fSmrg        fprintf(stderr, "\n");
40001e04c3fSmrg}
40101e04c3fSmrg
40201e04c3fSmrgvoid
40301e04c3fSmrgv3d_vir_to_qpu(struct v3d_compile *c, struct qpu_reg *temp_registers)
40401e04c3fSmrg{
40501e04c3fSmrg        /* Reset the uniform count to how many will be actually loaded by the
40601e04c3fSmrg         * generated QPU code.
40701e04c3fSmrg         */
40801e04c3fSmrg        c->num_uniforms = 0;
40901e04c3fSmrg
41001e04c3fSmrg        vir_for_each_block(block, c)
41101e04c3fSmrg                v3d_generate_code_block(c, block, temp_registers);
41201e04c3fSmrg
413ed98bd31Smaya        v3d_qpu_schedule_instructions(c);
41401e04c3fSmrg
41501e04c3fSmrg        c->qpu_insts = rzalloc_array(c, uint64_t, c->qpu_inst_count);
41601e04c3fSmrg        int i = 0;
41701e04c3fSmrg        vir_for_each_inst_inorder(inst, c) {
41801e04c3fSmrg                bool ok = v3d_qpu_instr_pack(c->devinfo, &inst->qpu,
41901e04c3fSmrg                                             &c->qpu_insts[i++]);
42001e04c3fSmrg                if (!ok) {
4217ec681f3Smrg                        fprintf(stderr, "Failed to pack instruction %d:\n", i);
42201e04c3fSmrg                        vir_dump_inst(c, inst);
42301e04c3fSmrg                        fprintf(stderr, "\n");
4247ec681f3Smrg                        c->compilation_result = V3D_COMPILATION_FAILED;
42501e04c3fSmrg                        return;
42601e04c3fSmrg                }
4277ec681f3Smrg
4287ec681f3Smrg                if (v3d_qpu_is_nop(&inst->qpu))
4297ec681f3Smrg                        c->nop_count++;
43001e04c3fSmrg        }
43101e04c3fSmrg        assert(i == c->qpu_inst_count);
43201e04c3fSmrg
43301e04c3fSmrg        if (V3D_DEBUG & (V3D_DEBUG_QPU |
43401e04c3fSmrg                         v3d_debug_flag_for_shader_stage(c->s->info.stage))) {
43501e04c3fSmrg                v3d_dump_qpu(c);
43601e04c3fSmrg        }
43701e04c3fSmrg
43801e04c3fSmrg        qpu_validate(c);
43901e04c3fSmrg
44001e04c3fSmrg        free(temp_registers);
44101e04c3fSmrg}
442