1af69d88dSmrg/*
2af69d88dSmrg * Copyright © 2014 Broadcom
3af69d88dSmrg *
4af69d88dSmrg * Permission is hereby granted, free of charge, to any person obtaining a
5af69d88dSmrg * copy of this software and associated documentation files (the "Software"),
6af69d88dSmrg * to deal in the Software without restriction, including without limitation
7af69d88dSmrg * the rights to use, copy, modify, merge, publish, distribute, sublicense,
8af69d88dSmrg * and/or sell copies of the Software, and to permit persons to whom the
9af69d88dSmrg * Software is furnished to do so, subject to the following conditions:
10af69d88dSmrg *
11af69d88dSmrg * The above copyright notice and this permission notice (including the next
12af69d88dSmrg * paragraph) shall be included in all copies or substantial portions of the
13af69d88dSmrg * Software.
14af69d88dSmrg *
15af69d88dSmrg * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
16af69d88dSmrg * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
17af69d88dSmrg * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT.  IN NO EVENT SHALL
18af69d88dSmrg * THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
19af69d88dSmrg * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING
20af69d88dSmrg * FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS
21af69d88dSmrg * IN THE SOFTWARE.
22af69d88dSmrg */
23af69d88dSmrg
24af69d88dSmrg#include "util/u_memory.h"
2501e04c3fSmrg#include "util/ralloc.h"
26af69d88dSmrg
27af69d88dSmrg#include "vc4_qir.h"
28af69d88dSmrg#include "vc4_qpu.h"
29af69d88dSmrg
30af69d88dSmrgstruct qir_op_info {
31af69d88dSmrg        const char *name;
32af69d88dSmrg        uint8_t ndst, nsrc;
33af69d88dSmrg        bool has_side_effects;
34af69d88dSmrg};
35af69d88dSmrg
36af69d88dSmrgstatic const struct qir_op_info qir_op_info[] = {
37af69d88dSmrg        [QOP_MOV] = { "mov", 1, 1 },
3801e04c3fSmrg        [QOP_FMOV] = { "fmov", 1, 1 },
3901e04c3fSmrg        [QOP_MMOV] = { "mmov", 1, 1 },
40af69d88dSmrg        [QOP_FADD] = { "fadd", 1, 2 },
41af69d88dSmrg        [QOP_FSUB] = { "fsub", 1, 2 },
42af69d88dSmrg        [QOP_FMUL] = { "fmul", 1, 2 },
4301e04c3fSmrg        [QOP_MUL24] = { "mul24", 1, 2 },
4401e04c3fSmrg        [QOP_V8MULD] = {"v8muld", 1, 2 },
4501e04c3fSmrg        [QOP_V8MIN] = {"v8min", 1, 2 },
4601e04c3fSmrg        [QOP_V8MAX] = {"v8max", 1, 2 },
4701e04c3fSmrg        [QOP_V8ADDS] = {"v8adds", 1, 2 },
4801e04c3fSmrg        [QOP_V8SUBS] = {"v8subs", 1, 2 },
49af69d88dSmrg        [QOP_FMIN] = { "fmin", 1, 2 },
50af69d88dSmrg        [QOP_FMAX] = { "fmax", 1, 2 },
51af69d88dSmrg        [QOP_FMINABS] = { "fminabs", 1, 2 },
52af69d88dSmrg        [QOP_FMAXABS] = { "fmaxabs", 1, 2 },
53af69d88dSmrg        [QOP_FTOI] = { "ftoi", 1, 1 },
54af69d88dSmrg        [QOP_ITOF] = { "itof", 1, 1 },
5501e04c3fSmrg        [QOP_ADD] = { "add", 1, 2 },
5601e04c3fSmrg        [QOP_SUB] = { "sub", 1, 2 },
5701e04c3fSmrg        [QOP_SHR] = { "shr", 1, 2 },
5801e04c3fSmrg        [QOP_ASR] = { "asr", 1, 2 },
5901e04c3fSmrg        [QOP_SHL] = { "shl", 1, 2 },
6001e04c3fSmrg        [QOP_MIN] = { "min", 1, 2 },
6101e04c3fSmrg        [QOP_MIN_NOIMM] = { "min_noimm", 1, 2 },
6201e04c3fSmrg        [QOP_MAX] = { "max", 1, 2 },
6301e04c3fSmrg        [QOP_AND] = { "and", 1, 2 },
6401e04c3fSmrg        [QOP_OR] = { "or", 1, 2 },
6501e04c3fSmrg        [QOP_XOR] = { "xor", 1, 2 },
6601e04c3fSmrg        [QOP_NOT] = { "not", 1, 1 },
6701e04c3fSmrg
68af69d88dSmrg        [QOP_RCP] = { "rcp", 1, 1 },
69af69d88dSmrg        [QOP_RSQ] = { "rsq", 1, 1 },
7001e04c3fSmrg        [QOP_EXP2] = { "exp2", 1, 1 },
7101e04c3fSmrg        [QOP_LOG2] = { "log2", 1, 1 },
7201e04c3fSmrg        [QOP_TLB_COLOR_READ] = { "tlb_color_read", 1, 0 },
7301e04c3fSmrg        [QOP_MS_MASK] = { "ms_mask", 0, 1, true },
74af69d88dSmrg        [QOP_VARY_ADD_C] = { "vary_add_c", 1, 1 },
75af69d88dSmrg
76af69d88dSmrg        [QOP_FRAG_Z] = { "frag_z", 1, 0 },
7701e04c3fSmrg        [QOP_FRAG_W] = { "frag_w", 1, 0 },
7801e04c3fSmrg
7901e04c3fSmrg        [QOP_TEX_RESULT] = { "tex_result", 1, 0, true },
8001e04c3fSmrg
8101e04c3fSmrg        [QOP_THRSW] = { "thrsw", 0, 0, true },
8201e04c3fSmrg
8301e04c3fSmrg        [QOP_LOAD_IMM] = { "load_imm", 0, 1 },
8401e04c3fSmrg        [QOP_LOAD_IMM_U2] = { "load_imm_u2", 0, 1 },
8501e04c3fSmrg        [QOP_LOAD_IMM_I2] = { "load_imm_i2", 0, 1 },
8601e04c3fSmrg
8701e04c3fSmrg        [QOP_ROT_MUL] = { "rot_mul", 0, 2 },
8801e04c3fSmrg
8901e04c3fSmrg        [QOP_BRANCH] = { "branch", 0, 0, true },
9001e04c3fSmrg        [QOP_UNIFORMS_RESET] = { "uniforms_reset", 0, 2, true },
91af69d88dSmrg};
92af69d88dSmrg
93af69d88dSmrgstatic const char *
94af69d88dSmrgqir_get_op_name(enum qop qop)
95af69d88dSmrg{
96af69d88dSmrg        if (qop < ARRAY_SIZE(qir_op_info) && qir_op_info[qop].name)
97af69d88dSmrg                return qir_op_info[qop].name;
98af69d88dSmrg        else
99af69d88dSmrg                return "???";
100af69d88dSmrg}
101af69d88dSmrg
102af69d88dSmrgint
10301e04c3fSmrgqir_get_non_sideband_nsrc(struct qinst *inst)
104af69d88dSmrg{
10501e04c3fSmrg        assert(qir_op_info[inst->op].name);
10601e04c3fSmrg        return qir_op_info[inst->op].nsrc;
10701e04c3fSmrg}
10801e04c3fSmrg
10901e04c3fSmrgint
11001e04c3fSmrgqir_get_nsrc(struct qinst *inst)
11101e04c3fSmrg{
11201e04c3fSmrg        assert(qir_op_info[inst->op].name);
11301e04c3fSmrg
11401e04c3fSmrg        int nsrc = qir_get_non_sideband_nsrc(inst);
11501e04c3fSmrg
11601e04c3fSmrg        /* Normal (non-direct) texture coordinate writes also implicitly load
11701e04c3fSmrg         * a uniform for the texture parameters.
11801e04c3fSmrg         */
11901e04c3fSmrg        if (qir_is_tex(inst) && inst->dst.file != QFILE_TEX_S_DIRECT)
12001e04c3fSmrg                nsrc++;
12101e04c3fSmrg
12201e04c3fSmrg        return nsrc;
12301e04c3fSmrg}
12401e04c3fSmrg
12501e04c3fSmrg/* The sideband uniform for textures gets stored after the normal ALU
12601e04c3fSmrg * arguments.
12701e04c3fSmrg */
12801e04c3fSmrgint
12901e04c3fSmrgqir_get_tex_uniform_src(struct qinst *inst)
13001e04c3fSmrg{
13101e04c3fSmrg        return qir_get_nsrc(inst) - 1;
132af69d88dSmrg}
133af69d88dSmrg
13401e04c3fSmrg/**
13501e04c3fSmrg * Returns whether the instruction has any side effects that must be
13601e04c3fSmrg * preserved.
13701e04c3fSmrg */
138af69d88dSmrgbool
13901e04c3fSmrgqir_has_side_effects(struct vc4_compile *c, struct qinst *inst)
140af69d88dSmrg{
14101e04c3fSmrg        switch (inst->dst.file) {
14201e04c3fSmrg        case QFILE_TLB_Z_WRITE:
14301e04c3fSmrg        case QFILE_TLB_COLOR_WRITE:
14401e04c3fSmrg        case QFILE_TLB_COLOR_WRITE_MS:
14501e04c3fSmrg        case QFILE_TLB_STENCIL_SETUP:
14601e04c3fSmrg        case QFILE_TEX_S_DIRECT:
14701e04c3fSmrg        case QFILE_TEX_S:
14801e04c3fSmrg        case QFILE_TEX_T:
14901e04c3fSmrg        case QFILE_TEX_R:
15001e04c3fSmrg        case QFILE_TEX_B:
15101e04c3fSmrg                return true;
15201e04c3fSmrg        default:
15301e04c3fSmrg                break;
154af69d88dSmrg        }
155af69d88dSmrg
156af69d88dSmrg        return qir_op_info[inst->op].has_side_effects;
157af69d88dSmrg}
158af69d88dSmrg
15901e04c3fSmrgbool
16001e04c3fSmrgqir_has_side_effect_reads(struct vc4_compile *c, struct qinst *inst)
16101e04c3fSmrg{
16201e04c3fSmrg        /* We can dead-code eliminate varyings, because we only tell the VS
16301e04c3fSmrg         * about the live ones at the end.  But we have to preserve the
16401e04c3fSmrg         * point/line coordinates reads, because they're generated by
16501e04c3fSmrg         * fixed-function hardware.
16601e04c3fSmrg         */
16701e04c3fSmrg        for (int i = 0; i < qir_get_nsrc(inst); i++) {
16801e04c3fSmrg                if (inst->src[i].file == QFILE_VARY &&
16901e04c3fSmrg                    c->input_slots[inst->src[i].index].slot == 0xff) {
17001e04c3fSmrg                        return true;
17101e04c3fSmrg                }
17201e04c3fSmrg
17301e04c3fSmrg                if (inst->src[i].file == QFILE_VPM)
17401e04c3fSmrg                        return true;
17501e04c3fSmrg        }
17601e04c3fSmrg
17701e04c3fSmrg        if (inst->dst.file == QFILE_VPM)
17801e04c3fSmrg                return true;
17901e04c3fSmrg
18001e04c3fSmrg        return false;
18101e04c3fSmrg}
18201e04c3fSmrg
18301e04c3fSmrgbool
18401e04c3fSmrgqir_has_uniform_read(struct qinst *inst)
18501e04c3fSmrg{
18601e04c3fSmrg        for (int i = 0; i < qir_get_nsrc(inst); i++) {
18701e04c3fSmrg                if (inst->src[i].file == QFILE_UNIF)
18801e04c3fSmrg                        return true;
18901e04c3fSmrg        }
19001e04c3fSmrg
19101e04c3fSmrg        return false;
19201e04c3fSmrg}
19301e04c3fSmrg
19401e04c3fSmrgbool
19501e04c3fSmrgqir_is_mul(struct qinst *inst)
19601e04c3fSmrg{
19701e04c3fSmrg        switch (inst->op) {
19801e04c3fSmrg        case QOP_MMOV:
19901e04c3fSmrg        case QOP_FMUL:
20001e04c3fSmrg        case QOP_MUL24:
20101e04c3fSmrg        case QOP_V8MULD:
20201e04c3fSmrg        case QOP_V8MIN:
20301e04c3fSmrg        case QOP_V8MAX:
20401e04c3fSmrg        case QOP_V8ADDS:
20501e04c3fSmrg        case QOP_V8SUBS:
20601e04c3fSmrg        case QOP_ROT_MUL:
20701e04c3fSmrg                return true;
20801e04c3fSmrg        default:
20901e04c3fSmrg                return false;
21001e04c3fSmrg        }
21101e04c3fSmrg}
21201e04c3fSmrg
21301e04c3fSmrgbool
21401e04c3fSmrgqir_is_float_input(struct qinst *inst)
21501e04c3fSmrg{
21601e04c3fSmrg        switch (inst->op) {
21701e04c3fSmrg        case QOP_FMOV:
21801e04c3fSmrg        case QOP_FMUL:
21901e04c3fSmrg        case QOP_FADD:
22001e04c3fSmrg        case QOP_FSUB:
22101e04c3fSmrg        case QOP_FMIN:
22201e04c3fSmrg        case QOP_FMAX:
22301e04c3fSmrg        case QOP_FMINABS:
22401e04c3fSmrg        case QOP_FMAXABS:
22501e04c3fSmrg        case QOP_FTOI:
22601e04c3fSmrg                return true;
22701e04c3fSmrg        default:
22801e04c3fSmrg                return false;
22901e04c3fSmrg        }
23001e04c3fSmrg}
23101e04c3fSmrg
23201e04c3fSmrgbool
23301e04c3fSmrgqir_is_raw_mov(struct qinst *inst)
23401e04c3fSmrg{
23501e04c3fSmrg        return ((inst->op == QOP_MOV ||
23601e04c3fSmrg                 inst->op == QOP_FMOV ||
23701e04c3fSmrg                 inst->op == QOP_MMOV) &&
23801e04c3fSmrg                inst->cond == QPU_COND_ALWAYS &&
23901e04c3fSmrg                !inst->dst.pack &&
24001e04c3fSmrg                !inst->src[0].pack);
24101e04c3fSmrg}
24201e04c3fSmrg
24301e04c3fSmrgbool
24401e04c3fSmrgqir_is_tex(struct qinst *inst)
24501e04c3fSmrg{
24601e04c3fSmrg        switch (inst->dst.file) {
24701e04c3fSmrg        case QFILE_TEX_S_DIRECT:
24801e04c3fSmrg        case QFILE_TEX_S:
24901e04c3fSmrg        case QFILE_TEX_T:
25001e04c3fSmrg        case QFILE_TEX_R:
25101e04c3fSmrg        case QFILE_TEX_B:
25201e04c3fSmrg                return true;
25301e04c3fSmrg        default:
25401e04c3fSmrg                return false;
25501e04c3fSmrg        }
25601e04c3fSmrg}
25701e04c3fSmrg
25801e04c3fSmrgbool
25901e04c3fSmrgqir_has_implicit_tex_uniform(struct qinst *inst)
26001e04c3fSmrg{
26101e04c3fSmrg        switch (inst->dst.file) {
26201e04c3fSmrg        case QFILE_TEX_S:
26301e04c3fSmrg        case QFILE_TEX_T:
26401e04c3fSmrg        case QFILE_TEX_R:
26501e04c3fSmrg        case QFILE_TEX_B:
26601e04c3fSmrg                return true;
26701e04c3fSmrg        default:
26801e04c3fSmrg                return false;
26901e04c3fSmrg        }
27001e04c3fSmrg}
27101e04c3fSmrg
27201e04c3fSmrgbool
27301e04c3fSmrgqir_depends_on_flags(struct qinst *inst)
27401e04c3fSmrg{
27501e04c3fSmrg        if (inst->op == QOP_BRANCH) {
27601e04c3fSmrg                return inst->cond != QPU_COND_BRANCH_ALWAYS;
27701e04c3fSmrg        } else {
27801e04c3fSmrg                return (inst->cond != QPU_COND_ALWAYS &&
27901e04c3fSmrg                        inst->cond != QPU_COND_NEVER);
28001e04c3fSmrg        }
28101e04c3fSmrg}
28201e04c3fSmrg
28301e04c3fSmrgbool
28401e04c3fSmrgqir_writes_r4(struct qinst *inst)
28501e04c3fSmrg{
28601e04c3fSmrg        switch (inst->op) {
28701e04c3fSmrg        case QOP_TEX_RESULT:
28801e04c3fSmrg        case QOP_TLB_COLOR_READ:
28901e04c3fSmrg        case QOP_RCP:
29001e04c3fSmrg        case QOP_RSQ:
29101e04c3fSmrg        case QOP_EXP2:
29201e04c3fSmrg        case QOP_LOG2:
29301e04c3fSmrg                return true;
29401e04c3fSmrg        default:
29501e04c3fSmrg                return false;
29601e04c3fSmrg        }
29701e04c3fSmrg}
29801e04c3fSmrg
29901e04c3fSmrguint8_t
30001e04c3fSmrgqir_channels_written(struct qinst *inst)
30101e04c3fSmrg{
30201e04c3fSmrg        if (qir_is_mul(inst)) {
30301e04c3fSmrg                switch (inst->dst.pack) {
30401e04c3fSmrg                case QPU_PACK_MUL_NOP:
30501e04c3fSmrg                case QPU_PACK_MUL_8888:
30601e04c3fSmrg                        return 0xf;
30701e04c3fSmrg                case QPU_PACK_MUL_8A:
30801e04c3fSmrg                        return 0x1;
30901e04c3fSmrg                case QPU_PACK_MUL_8B:
31001e04c3fSmrg                        return 0x2;
31101e04c3fSmrg                case QPU_PACK_MUL_8C:
31201e04c3fSmrg                        return 0x4;
31301e04c3fSmrg                case QPU_PACK_MUL_8D:
31401e04c3fSmrg                        return 0x8;
31501e04c3fSmrg                }
31601e04c3fSmrg        } else {
31701e04c3fSmrg                switch (inst->dst.pack) {
31801e04c3fSmrg                case QPU_PACK_A_NOP:
31901e04c3fSmrg                case QPU_PACK_A_8888:
32001e04c3fSmrg                case QPU_PACK_A_8888_SAT:
32101e04c3fSmrg                case QPU_PACK_A_32_SAT:
32201e04c3fSmrg                        return 0xf;
32301e04c3fSmrg                case QPU_PACK_A_8A:
32401e04c3fSmrg                case QPU_PACK_A_8A_SAT:
32501e04c3fSmrg                        return 0x1;
32601e04c3fSmrg                case QPU_PACK_A_8B:
32701e04c3fSmrg                case QPU_PACK_A_8B_SAT:
32801e04c3fSmrg                        return 0x2;
32901e04c3fSmrg                case QPU_PACK_A_8C:
33001e04c3fSmrg                case QPU_PACK_A_8C_SAT:
33101e04c3fSmrg                        return 0x4;
33201e04c3fSmrg                case QPU_PACK_A_8D:
33301e04c3fSmrg                case QPU_PACK_A_8D_SAT:
33401e04c3fSmrg                        return 0x8;
33501e04c3fSmrg                case QPU_PACK_A_16A:
33601e04c3fSmrg                case QPU_PACK_A_16A_SAT:
33701e04c3fSmrg                        return 0x3;
33801e04c3fSmrg                case QPU_PACK_A_16B:
33901e04c3fSmrg                case QPU_PACK_A_16B_SAT:
34001e04c3fSmrg                        return 0xc;
34101e04c3fSmrg                }
34201e04c3fSmrg        }
34301e04c3fSmrg        unreachable("Bad pack field");
34401e04c3fSmrg}
34501e04c3fSmrg
34601e04c3fSmrgchar *
34701e04c3fSmrgqir_describe_uniform(enum quniform_contents contents, uint32_t data,
34801e04c3fSmrg                     const uint32_t *uniforms)
34901e04c3fSmrg{
35001e04c3fSmrg        static const char *quniform_names[] = {
35101e04c3fSmrg                [QUNIFORM_VIEWPORT_X_SCALE] = "vp_x_scale",
35201e04c3fSmrg                [QUNIFORM_VIEWPORT_Y_SCALE] = "vp_y_scale",
35301e04c3fSmrg                [QUNIFORM_VIEWPORT_Z_OFFSET] = "vp_z_offset",
35401e04c3fSmrg                [QUNIFORM_VIEWPORT_Z_SCALE] = "vp_z_scale",
35501e04c3fSmrg                [QUNIFORM_TEXTURE_CONFIG_P0] = "tex_p0",
35601e04c3fSmrg                [QUNIFORM_TEXTURE_CONFIG_P1] = "tex_p1",
35701e04c3fSmrg                [QUNIFORM_TEXTURE_CONFIG_P2] = "tex_p2",
35801e04c3fSmrg                [QUNIFORM_TEXTURE_FIRST_LEVEL] = "tex_first_level",
35901e04c3fSmrg        };
36001e04c3fSmrg
36101e04c3fSmrg        switch (contents) {
36201e04c3fSmrg        case QUNIFORM_CONSTANT:
36301e04c3fSmrg                return ralloc_asprintf(NULL, "0x%08x / %f", data, uif(data));
36401e04c3fSmrg        case QUNIFORM_UNIFORM:
36501e04c3fSmrg                if (uniforms) {
36601e04c3fSmrg                        uint32_t unif = uniforms[data];
36701e04c3fSmrg                        return ralloc_asprintf(NULL, "unif[%d] = 0x%08x / %f",
36801e04c3fSmrg                                               data, unif, uif(unif));
36901e04c3fSmrg                } else {
37001e04c3fSmrg                        return ralloc_asprintf(NULL, "unif[%d]", data);
37101e04c3fSmrg                }
37201e04c3fSmrg
37301e04c3fSmrg        case QUNIFORM_TEXTURE_CONFIG_P0:
37401e04c3fSmrg        case QUNIFORM_TEXTURE_CONFIG_P1:
37501e04c3fSmrg        case QUNIFORM_TEXTURE_CONFIG_P2:
37601e04c3fSmrg        case QUNIFORM_TEXTURE_FIRST_LEVEL:
37701e04c3fSmrg                return ralloc_asprintf(NULL, "%s[%d]",
37801e04c3fSmrg                                       quniform_names[contents], data);
37901e04c3fSmrg
38001e04c3fSmrg        default:
38101e04c3fSmrg                if (contents < ARRAY_SIZE(quniform_names) &&
38201e04c3fSmrg                    quniform_names[contents]) {
38301e04c3fSmrg                        return ralloc_asprintf(NULL, "%s",
38401e04c3fSmrg                                               quniform_names[contents]);
38501e04c3fSmrg                } else {
38601e04c3fSmrg                        return ralloc_asprintf(NULL, "??? %d", contents);
38701e04c3fSmrg                }
38801e04c3fSmrg        }
38901e04c3fSmrg}
39001e04c3fSmrg
391af69d88dSmrgstatic void
39201e04c3fSmrgqir_print_reg(struct vc4_compile *c, struct qreg reg, bool write)
393af69d88dSmrg{
39401e04c3fSmrg        static const char *files[] = {
395af69d88dSmrg                [QFILE_TEMP] = "t",
396af69d88dSmrg                [QFILE_VARY] = "v",
39701e04c3fSmrg                [QFILE_TLB_COLOR_WRITE] = "tlb_c",
39801e04c3fSmrg                [QFILE_TLB_COLOR_WRITE_MS] = "tlb_c_ms",
39901e04c3fSmrg                [QFILE_TLB_Z_WRITE] = "tlb_z",
40001e04c3fSmrg                [QFILE_TLB_STENCIL_SETUP] = "tlb_stencil",
40101e04c3fSmrg                [QFILE_FRAG_X] = "frag_x",
40201e04c3fSmrg                [QFILE_FRAG_Y] = "frag_y",
40301e04c3fSmrg                [QFILE_FRAG_REV_FLAG] = "frag_rev_flag",
40401e04c3fSmrg                [QFILE_QPU_ELEMENT] = "elem",
40501e04c3fSmrg                [QFILE_TEX_S_DIRECT] = "tex_s_direct",
40601e04c3fSmrg                [QFILE_TEX_S] = "tex_s",
40701e04c3fSmrg                [QFILE_TEX_T] = "tex_t",
40801e04c3fSmrg                [QFILE_TEX_R] = "tex_r",
40901e04c3fSmrg                [QFILE_TEX_B] = "tex_b",
410af69d88dSmrg        };
411af69d88dSmrg
41201e04c3fSmrg        switch (reg.file) {
41301e04c3fSmrg
41401e04c3fSmrg        case QFILE_NULL:
415af69d88dSmrg                fprintf(stderr, "null");
41601e04c3fSmrg                break;
41701e04c3fSmrg
41801e04c3fSmrg        case QFILE_LOAD_IMM:
41901e04c3fSmrg                fprintf(stderr, "0x%08x (%f)", reg.index, uif(reg.index));
42001e04c3fSmrg                break;
42101e04c3fSmrg
42201e04c3fSmrg        case QFILE_SMALL_IMM:
42301e04c3fSmrg                if ((int)reg.index >= -16 && (int)reg.index <= 15)
42401e04c3fSmrg                        fprintf(stderr, "%d", reg.index);
42501e04c3fSmrg                else
42601e04c3fSmrg                        fprintf(stderr, "%f", uif(reg.index));
42701e04c3fSmrg                break;
42801e04c3fSmrg
42901e04c3fSmrg        case QFILE_VPM:
43001e04c3fSmrg                if (write) {
43101e04c3fSmrg                        fprintf(stderr, "vpm");
43201e04c3fSmrg                } else {
43301e04c3fSmrg                        fprintf(stderr, "vpm%d.%d",
43401e04c3fSmrg                                reg.index / 4, reg.index % 4);
43501e04c3fSmrg                }
43601e04c3fSmrg                break;
43701e04c3fSmrg
43801e04c3fSmrg        case QFILE_TLB_COLOR_WRITE:
43901e04c3fSmrg        case QFILE_TLB_COLOR_WRITE_MS:
44001e04c3fSmrg        case QFILE_TLB_Z_WRITE:
44101e04c3fSmrg        case QFILE_TLB_STENCIL_SETUP:
44201e04c3fSmrg        case QFILE_TEX_S_DIRECT:
44301e04c3fSmrg        case QFILE_TEX_S:
44401e04c3fSmrg        case QFILE_TEX_T:
44501e04c3fSmrg        case QFILE_TEX_R:
44601e04c3fSmrg        case QFILE_TEX_B:
44701e04c3fSmrg                fprintf(stderr, "%s", files[reg.file]);
44801e04c3fSmrg                break;
44901e04c3fSmrg
45001e04c3fSmrg        case QFILE_UNIF: {
45101e04c3fSmrg                char *desc = qir_describe_uniform(c->uniform_contents[reg.index],
45201e04c3fSmrg                                                  c->uniform_data[reg.index],
45301e04c3fSmrg                                                  NULL);
45401e04c3fSmrg                fprintf(stderr, "u%d (%s)", reg.index, desc);
45501e04c3fSmrg                ralloc_free(desc);
45601e04c3fSmrg                break;
45701e04c3fSmrg        }
45801e04c3fSmrg
45901e04c3fSmrg        default:
460af69d88dSmrg                fprintf(stderr, "%s%d", files[reg.file], reg.index);
46101e04c3fSmrg                break;
46201e04c3fSmrg        }
463af69d88dSmrg}
464af69d88dSmrg
465af69d88dSmrgvoid
46601e04c3fSmrgqir_dump_inst(struct vc4_compile *c, struct qinst *inst)
467af69d88dSmrg{
46801e04c3fSmrg        fprintf(stderr, "%s", qir_get_op_name(inst->op));
46901e04c3fSmrg        if (inst->op == QOP_BRANCH)
47001e04c3fSmrg                vc4_qpu_disasm_cond_branch(stderr, inst->cond);
47101e04c3fSmrg        else
47201e04c3fSmrg                vc4_qpu_disasm_cond(stderr, inst->cond);
47301e04c3fSmrg        if (inst->sf)
47401e04c3fSmrg                fprintf(stderr, ".sf");
47501e04c3fSmrg        fprintf(stderr, " ");
47601e04c3fSmrg
47701e04c3fSmrg        if (inst->op != QOP_BRANCH) {
47801e04c3fSmrg                qir_print_reg(c, inst->dst, true);
47901e04c3fSmrg                if (inst->dst.pack) {
48001e04c3fSmrg                        if (inst->dst.pack) {
48101e04c3fSmrg                                if (qir_is_mul(inst))
48201e04c3fSmrg                                        vc4_qpu_disasm_pack_mul(stderr, inst->dst.pack);
48301e04c3fSmrg                                else
48401e04c3fSmrg                                        vc4_qpu_disasm_pack_a(stderr, inst->dst.pack);
48501e04c3fSmrg                        }
48601e04c3fSmrg                }
48701e04c3fSmrg        }
488af69d88dSmrg
48901e04c3fSmrg        for (int i = 0; i < qir_get_nsrc(inst); i++) {
490af69d88dSmrg                fprintf(stderr, ", ");
49101e04c3fSmrg                qir_print_reg(c, inst->src[i], false);
49201e04c3fSmrg                vc4_qpu_disasm_unpack(stderr, inst->src[i].pack);
493af69d88dSmrg        }
494af69d88dSmrg}
495af69d88dSmrg
496af69d88dSmrgvoid
49701e04c3fSmrgqir_dump(struct vc4_compile *c)
498af69d88dSmrg{
49901e04c3fSmrg        int ip = 0;
50001e04c3fSmrg        int pressure = 0;
50101e04c3fSmrg
50201e04c3fSmrg        qir_for_each_block(block, c) {
50301e04c3fSmrg                fprintf(stderr, "BLOCK %d:\n", block->index);
50401e04c3fSmrg                qir_for_each_inst(inst, block) {
50501e04c3fSmrg                        if (c->temp_start) {
50601e04c3fSmrg                                bool first = true;
50701e04c3fSmrg
50801e04c3fSmrg                                fprintf(stderr, "%3d ", pressure);
50901e04c3fSmrg
51001e04c3fSmrg                                for (int i = 0; i < c->num_temps; i++) {
51101e04c3fSmrg                                        if (c->temp_start[i] != ip)
51201e04c3fSmrg                                                continue;
51301e04c3fSmrg
51401e04c3fSmrg                                        if (first) {
51501e04c3fSmrg                                                first = false;
51601e04c3fSmrg                                        } else {
51701e04c3fSmrg                                                fprintf(stderr, ", ");
51801e04c3fSmrg                                        }
51901e04c3fSmrg                                        fprintf(stderr, "S%4d", i);
52001e04c3fSmrg                                        pressure++;
52101e04c3fSmrg                                }
52201e04c3fSmrg
52301e04c3fSmrg                                if (first)
52401e04c3fSmrg                                        fprintf(stderr, "      ");
52501e04c3fSmrg                                else
52601e04c3fSmrg                                        fprintf(stderr, " ");
52701e04c3fSmrg                        }
52801e04c3fSmrg
52901e04c3fSmrg                        if (c->temp_end) {
53001e04c3fSmrg                                bool first = true;
53101e04c3fSmrg
53201e04c3fSmrg                                for (int i = 0; i < c->num_temps; i++) {
53301e04c3fSmrg                                        if (c->temp_end[i] != ip)
53401e04c3fSmrg                                                continue;
53501e04c3fSmrg
53601e04c3fSmrg                                        if (first) {
53701e04c3fSmrg                                                first = false;
53801e04c3fSmrg                                        } else {
53901e04c3fSmrg                                                fprintf(stderr, ", ");
54001e04c3fSmrg                                        }
54101e04c3fSmrg                                        fprintf(stderr, "E%4d", i);
54201e04c3fSmrg                                        pressure--;
54301e04c3fSmrg                                }
54401e04c3fSmrg
54501e04c3fSmrg                                if (first)
54601e04c3fSmrg                                        fprintf(stderr, "      ");
54701e04c3fSmrg                                else
54801e04c3fSmrg                                        fprintf(stderr, " ");
54901e04c3fSmrg                        }
55001e04c3fSmrg
55101e04c3fSmrg                        qir_dump_inst(c, inst);
55201e04c3fSmrg                        fprintf(stderr, "\n");
55301e04c3fSmrg                        ip++;
55401e04c3fSmrg                }
55501e04c3fSmrg                if (block->successors[1]) {
55601e04c3fSmrg                        fprintf(stderr, "-> BLOCK %d, %d\n",
55701e04c3fSmrg                                block->successors[0]->index,
55801e04c3fSmrg                                block->successors[1]->index);
55901e04c3fSmrg                } else if (block->successors[0]) {
56001e04c3fSmrg                        fprintf(stderr, "-> BLOCK %d\n",
56101e04c3fSmrg                                block->successors[0]->index);
56201e04c3fSmrg                }
563af69d88dSmrg        }
564af69d88dSmrg}
565af69d88dSmrg
566af69d88dSmrgstruct qreg
56701e04c3fSmrgqir_get_temp(struct vc4_compile *c)
568af69d88dSmrg{
569af69d88dSmrg        struct qreg reg;
570af69d88dSmrg
571af69d88dSmrg        reg.file = QFILE_TEMP;
572af69d88dSmrg        reg.index = c->num_temps++;
57301e04c3fSmrg        reg.pack = 0;
57401e04c3fSmrg
57501e04c3fSmrg        if (c->num_temps > c->defs_array_size) {
57601e04c3fSmrg                uint32_t old_size = c->defs_array_size;
57701e04c3fSmrg                c->defs_array_size = MAX2(old_size * 2, 16);
57801e04c3fSmrg                c->defs = reralloc(c, c->defs, struct qinst *,
57901e04c3fSmrg                                   c->defs_array_size);
58001e04c3fSmrg                memset(&c->defs[old_size], 0,
58101e04c3fSmrg                       sizeof(c->defs[0]) * (c->defs_array_size - old_size));
58201e04c3fSmrg        }
583af69d88dSmrg
584af69d88dSmrg        return reg;
585af69d88dSmrg}
586af69d88dSmrg
587af69d88dSmrgstruct qinst *
588af69d88dSmrgqir_inst(enum qop op, struct qreg dst, struct qreg src0, struct qreg src1)
589af69d88dSmrg{
590af69d88dSmrg        struct qinst *inst = CALLOC_STRUCT(qinst);
591af69d88dSmrg
592af69d88dSmrg        inst->op = op;
593af69d88dSmrg        inst->dst = dst;
594af69d88dSmrg        inst->src[0] = src0;
595af69d88dSmrg        inst->src[1] = src1;
59601e04c3fSmrg        inst->cond = QPU_COND_ALWAYS;
597af69d88dSmrg
598af69d88dSmrg        return inst;
599af69d88dSmrg}
600af69d88dSmrg
60101e04c3fSmrgstatic void
60201e04c3fSmrgqir_emit(struct vc4_compile *c, struct qinst *inst)
60301e04c3fSmrg{
60401e04c3fSmrg        list_addtail(&inst->link, &c->cur_block->instructions);
60501e04c3fSmrg}
60601e04c3fSmrg
60701e04c3fSmrg/* Updates inst to write to a new temporary, emits it, and notes the def. */
60801e04c3fSmrgstruct qreg
60901e04c3fSmrgqir_emit_def(struct vc4_compile *c, struct qinst *inst)
61001e04c3fSmrg{
61101e04c3fSmrg        assert(inst->dst.file == QFILE_NULL);
61201e04c3fSmrg
61301e04c3fSmrg        inst->dst = qir_get_temp(c);
61401e04c3fSmrg
61501e04c3fSmrg        if (inst->dst.file == QFILE_TEMP)
61601e04c3fSmrg                c->defs[inst->dst.index] = inst;
61701e04c3fSmrg
61801e04c3fSmrg        qir_emit(c, inst);
61901e04c3fSmrg
62001e04c3fSmrg        return inst->dst;
62101e04c3fSmrg}
62201e04c3fSmrg
623af69d88dSmrgstruct qinst *
62401e04c3fSmrgqir_emit_nondef(struct vc4_compile *c, struct qinst *inst)
625af69d88dSmrg{
62601e04c3fSmrg        if (inst->dst.file == QFILE_TEMP)
62701e04c3fSmrg                c->defs[inst->dst.index] = NULL;
628af69d88dSmrg
62901e04c3fSmrg        qir_emit(c, inst);
630af69d88dSmrg
631af69d88dSmrg        return inst;
632af69d88dSmrg}
633af69d88dSmrg
63401e04c3fSmrgbool
63501e04c3fSmrgqir_reg_equals(struct qreg a, struct qreg b)
63601e04c3fSmrg{
63701e04c3fSmrg        return a.file == b.file && a.index == b.index && a.pack == b.pack;
63801e04c3fSmrg}
63901e04c3fSmrg
64001e04c3fSmrgstruct qblock *
64101e04c3fSmrgqir_new_block(struct vc4_compile *c)
64201e04c3fSmrg{
64301e04c3fSmrg        struct qblock *block = rzalloc(c, struct qblock);
64401e04c3fSmrg
64501e04c3fSmrg        list_inithead(&block->instructions);
64601e04c3fSmrg        list_inithead(&block->qpu_inst_list);
64701e04c3fSmrg
64801e04c3fSmrg        block->predecessors = _mesa_set_create(block,
64901e04c3fSmrg                                               _mesa_hash_pointer,
65001e04c3fSmrg                                               _mesa_key_pointer_equal);
65101e04c3fSmrg
65201e04c3fSmrg        block->index = c->next_block_index++;
65301e04c3fSmrg
65401e04c3fSmrg        return block;
65501e04c3fSmrg}
65601e04c3fSmrg
657af69d88dSmrgvoid
65801e04c3fSmrgqir_set_emit_block(struct vc4_compile *c, struct qblock *block)
659af69d88dSmrg{
66001e04c3fSmrg        c->cur_block = block;
66101e04c3fSmrg        list_addtail(&block->link, &c->blocks);
662af69d88dSmrg}
663af69d88dSmrg
66401e04c3fSmrgstruct qblock *
66501e04c3fSmrgqir_entry_block(struct vc4_compile *c)
666af69d88dSmrg{
66701e04c3fSmrg        return list_first_entry(&c->blocks, struct qblock, link);
668af69d88dSmrg}
669af69d88dSmrg
67001e04c3fSmrgstruct qblock *
67101e04c3fSmrgqir_exit_block(struct vc4_compile *c)
67201e04c3fSmrg{
67301e04c3fSmrg        return list_last_entry(&c->blocks, struct qblock, link);
67401e04c3fSmrg}
67501e04c3fSmrg
67601e04c3fSmrgvoid
67701e04c3fSmrgqir_link_blocks(struct qblock *predecessor, struct qblock *successor)
67801e04c3fSmrg{
67901e04c3fSmrg        _mesa_set_add(successor->predecessors, predecessor);
68001e04c3fSmrg        if (predecessor->successors[0]) {
68101e04c3fSmrg                assert(!predecessor->successors[1]);
68201e04c3fSmrg                predecessor->successors[1] = successor;
68301e04c3fSmrg        } else {
68401e04c3fSmrg                predecessor->successors[0] = successor;
68501e04c3fSmrg        }
68601e04c3fSmrg}
68701e04c3fSmrg
68801e04c3fSmrgstruct vc4_compile *
689af69d88dSmrgqir_compile_init(void)
690af69d88dSmrg{
69101e04c3fSmrg        struct vc4_compile *c = rzalloc(NULL, struct vc4_compile);
692af69d88dSmrg
69301e04c3fSmrg        list_inithead(&c->blocks);
69401e04c3fSmrg        qir_set_emit_block(c, qir_new_block(c));
69501e04c3fSmrg        c->last_top_block = c->cur_block;
69601e04c3fSmrg
69701e04c3fSmrg        c->output_position_index = -1;
69801e04c3fSmrg        c->output_color_index = -1;
69901e04c3fSmrg        c->output_point_size_index = -1;
70001e04c3fSmrg        c->output_sample_mask_index = -1;
70101e04c3fSmrg
70201e04c3fSmrg        c->def_ht = _mesa_hash_table_create(c, _mesa_hash_pointer,
70301e04c3fSmrg                                            _mesa_key_pointer_equal);
704af69d88dSmrg
705af69d88dSmrg        return c;
706af69d88dSmrg}
707af69d88dSmrg
708af69d88dSmrgvoid
70901e04c3fSmrgqir_remove_instruction(struct vc4_compile *c, struct qinst *qinst)
710af69d88dSmrg{
71101e04c3fSmrg        if (qinst->dst.file == QFILE_TEMP)
71201e04c3fSmrg                c->defs[qinst->dst.index] = NULL;
71301e04c3fSmrg
71401e04c3fSmrg        list_del(&qinst->link);
71501e04c3fSmrg        free(qinst);
71601e04c3fSmrg}
71701e04c3fSmrg
71801e04c3fSmrgstruct qreg
71901e04c3fSmrgqir_follow_movs(struct vc4_compile *c, struct qreg reg)
72001e04c3fSmrg{
72101e04c3fSmrg        int pack = reg.pack;
72201e04c3fSmrg
72301e04c3fSmrg        while (reg.file == QFILE_TEMP &&
72401e04c3fSmrg               c->defs[reg.index] &&
72501e04c3fSmrg               (c->defs[reg.index]->op == QOP_MOV ||
72601e04c3fSmrg                c->defs[reg.index]->op == QOP_FMOV ||
72701e04c3fSmrg                c->defs[reg.index]->op == QOP_MMOV)&&
72801e04c3fSmrg               !c->defs[reg.index]->dst.pack &&
72901e04c3fSmrg               !c->defs[reg.index]->src[0].pack) {
73001e04c3fSmrg                reg = c->defs[reg.index]->src[0];
73101e04c3fSmrg        }
73201e04c3fSmrg
73301e04c3fSmrg        reg.pack = pack;
73401e04c3fSmrg        return reg;
73501e04c3fSmrg}
73601e04c3fSmrg
73701e04c3fSmrgvoid
73801e04c3fSmrgqir_compile_destroy(struct vc4_compile *c)
73901e04c3fSmrg{
74001e04c3fSmrg        qir_for_each_block(block, c) {
7417ec681f3Smrg                while (!list_is_empty(&block->instructions)) {
74201e04c3fSmrg                        struct qinst *qinst =
74301e04c3fSmrg                                list_first_entry(&block->instructions,
74401e04c3fSmrg                                                 struct qinst, link);
74501e04c3fSmrg                        qir_remove_instruction(c, qinst);
74601e04c3fSmrg                }
74701e04c3fSmrg        }
74801e04c3fSmrg
74901e04c3fSmrg        ralloc_free(c);
750af69d88dSmrg}
751af69d88dSmrg
752af69d88dSmrgconst char *
753af69d88dSmrgqir_get_stage_name(enum qstage stage)
754af69d88dSmrg{
755af69d88dSmrg        static const char *names[] = {
756af69d88dSmrg                [QSTAGE_FRAG] = "FS",
757af69d88dSmrg                [QSTAGE_VERT] = "VS",
758af69d88dSmrg                [QSTAGE_COORD] = "CS",
759af69d88dSmrg        };
760af69d88dSmrg
761af69d88dSmrg        return names[stage];
762af69d88dSmrg}
763af69d88dSmrg
76401e04c3fSmrgstruct qreg
76501e04c3fSmrgqir_uniform(struct vc4_compile *c,
76601e04c3fSmrg            enum quniform_contents contents,
76701e04c3fSmrg            uint32_t data)
76801e04c3fSmrg{
76901e04c3fSmrg        for (int i = 0; i < c->num_uniforms; i++) {
77001e04c3fSmrg                if (c->uniform_contents[i] == contents &&
77101e04c3fSmrg                    c->uniform_data[i] == data) {
77201e04c3fSmrg                        return qir_reg(QFILE_UNIF, i);
77301e04c3fSmrg                }
77401e04c3fSmrg        }
77501e04c3fSmrg
77601e04c3fSmrg        uint32_t uniform = c->num_uniforms++;
77701e04c3fSmrg
77801e04c3fSmrg        if (uniform >= c->uniform_array_size) {
77901e04c3fSmrg                c->uniform_array_size = MAX2(MAX2(16, uniform + 1),
78001e04c3fSmrg                                             c->uniform_array_size * 2);
78101e04c3fSmrg
78201e04c3fSmrg                c->uniform_data = reralloc(c, c->uniform_data,
78301e04c3fSmrg                                           uint32_t,
78401e04c3fSmrg                                           c->uniform_array_size);
78501e04c3fSmrg                c->uniform_contents = reralloc(c, c->uniform_contents,
78601e04c3fSmrg                                               enum quniform_contents,
78701e04c3fSmrg                                               c->uniform_array_size);
78801e04c3fSmrg        }
78901e04c3fSmrg
79001e04c3fSmrg        c->uniform_contents[uniform] = contents;
79101e04c3fSmrg        c->uniform_data[uniform] = data;
79201e04c3fSmrg
79301e04c3fSmrg        return qir_reg(QFILE_UNIF, uniform);
79401e04c3fSmrg}
79501e04c3fSmrg
79601e04c3fSmrgvoid
79701e04c3fSmrgqir_SF(struct vc4_compile *c, struct qreg src)
79801e04c3fSmrg{
79901e04c3fSmrg        struct qinst *last_inst = NULL;
80001e04c3fSmrg
8017ec681f3Smrg        if (!list_is_empty(&c->cur_block->instructions))
80201e04c3fSmrg                last_inst = (struct qinst *)c->cur_block->instructions.prev;
80301e04c3fSmrg
80401e04c3fSmrg        /* We don't have any way to guess which kind of MOV is implied. */
80501e04c3fSmrg        assert(!src.pack);
80601e04c3fSmrg
80701e04c3fSmrg        if (src.file != QFILE_TEMP ||
80801e04c3fSmrg            !c->defs[src.index] ||
80901e04c3fSmrg            last_inst != c->defs[src.index]) {
81001e04c3fSmrg                last_inst = qir_MOV_dest(c, qir_reg(QFILE_NULL, 0), src);
81101e04c3fSmrg                last_inst = (struct qinst *)c->cur_block->instructions.prev;
81201e04c3fSmrg        }
81301e04c3fSmrg        last_inst->sf = true;
81401e04c3fSmrg}
81501e04c3fSmrg
816af69d88dSmrg#define OPTPASS(func)                                                   \
817af69d88dSmrg        do {                                                            \
818af69d88dSmrg                bool stage_progress = func(c);                          \
819af69d88dSmrg                if (stage_progress) {                                   \
820af69d88dSmrg                        progress = true;                                \
821af69d88dSmrg                        if (print_opt_debug) {                          \
822af69d88dSmrg                                fprintf(stderr,                         \
823af69d88dSmrg                                        "QIR opt pass %2d: %s progress\n", \
824af69d88dSmrg                                        pass, #func);                   \
825af69d88dSmrg                        }                                               \
82601e04c3fSmrg                        qir_validate(c);                                \
827af69d88dSmrg                }                                                       \
828af69d88dSmrg        } while (0)
829af69d88dSmrg
830af69d88dSmrgvoid
83101e04c3fSmrgqir_optimize(struct vc4_compile *c)
832af69d88dSmrg{
833af69d88dSmrg        bool print_opt_debug = false;
834af69d88dSmrg        int pass = 1;
835af69d88dSmrg
836af69d88dSmrg        while (true) {
837af69d88dSmrg                bool progress = false;
838af69d88dSmrg
839af69d88dSmrg                OPTPASS(qir_opt_algebraic);
84001e04c3fSmrg                OPTPASS(qir_opt_constant_folding);
841af69d88dSmrg                OPTPASS(qir_opt_copy_propagation);
84201e04c3fSmrg                OPTPASS(qir_opt_peephole_sf);
843af69d88dSmrg                OPTPASS(qir_opt_dead_code);
84401e04c3fSmrg                OPTPASS(qir_opt_small_immediates);
84501e04c3fSmrg                OPTPASS(qir_opt_vpm);
84601e04c3fSmrg                OPTPASS(qir_opt_coalesce_ff_writes);
847af69d88dSmrg
848af69d88dSmrg                if (!progress)
849af69d88dSmrg                        break;
850af69d88dSmrg
851af69d88dSmrg                pass++;
852af69d88dSmrg        }
853af69d88dSmrg}
854