vir.c revision 01e04c3f
101e04c3fSmrg/*
201e04c3fSmrg * Copyright © 2016-2017 Broadcom
301e04c3fSmrg *
401e04c3fSmrg * Permission is hereby granted, free of charge, to any person obtaining a
501e04c3fSmrg * copy of this software and associated documentation files (the "Software"),
601e04c3fSmrg * to deal in the Software without restriction, including without limitation
701e04c3fSmrg * the rights to use, copy, modify, merge, publish, distribute, sublicense,
801e04c3fSmrg * and/or sell copies of the Software, and to permit persons to whom the
901e04c3fSmrg * Software is furnished to do so, subject to the following conditions:
1001e04c3fSmrg *
1101e04c3fSmrg * The above copyright notice and this permission notice (including the next
1201e04c3fSmrg * paragraph) shall be included in all copies or substantial portions of the
1301e04c3fSmrg * Software.
1401e04c3fSmrg *
1501e04c3fSmrg * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
1601e04c3fSmrg * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
1701e04c3fSmrg * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT.  IN NO EVENT SHALL
1801e04c3fSmrg * THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
1901e04c3fSmrg * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING
2001e04c3fSmrg * FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS
2101e04c3fSmrg * IN THE SOFTWARE.
2201e04c3fSmrg */
2301e04c3fSmrg
2401e04c3fSmrg#include "broadcom/common/v3d_device_info.h"
2501e04c3fSmrg#include "v3d_compiler.h"
2601e04c3fSmrg
2701e04c3fSmrgint
2801e04c3fSmrgvir_get_non_sideband_nsrc(struct qinst *inst)
2901e04c3fSmrg{
3001e04c3fSmrg        switch (inst->qpu.type) {
3101e04c3fSmrg        case V3D_QPU_INSTR_TYPE_BRANCH:
3201e04c3fSmrg                return 0;
3301e04c3fSmrg        case V3D_QPU_INSTR_TYPE_ALU:
3401e04c3fSmrg                if (inst->qpu.alu.add.op != V3D_QPU_A_NOP)
3501e04c3fSmrg                        return v3d_qpu_add_op_num_src(inst->qpu.alu.add.op);
3601e04c3fSmrg                else
3701e04c3fSmrg                        return v3d_qpu_mul_op_num_src(inst->qpu.alu.mul.op);
3801e04c3fSmrg        }
3901e04c3fSmrg
4001e04c3fSmrg        return 0;
4101e04c3fSmrg}
4201e04c3fSmrg
4301e04c3fSmrgint
4401e04c3fSmrgvir_get_nsrc(struct qinst *inst)
4501e04c3fSmrg{
4601e04c3fSmrg        int nsrc = vir_get_non_sideband_nsrc(inst);
4701e04c3fSmrg
4801e04c3fSmrg        if (vir_has_implicit_uniform(inst))
4901e04c3fSmrg                nsrc++;
5001e04c3fSmrg
5101e04c3fSmrg        return nsrc;
5201e04c3fSmrg}
5301e04c3fSmrg
5401e04c3fSmrgbool
5501e04c3fSmrgvir_has_implicit_uniform(struct qinst *inst)
5601e04c3fSmrg{
5701e04c3fSmrg        switch (inst->qpu.type) {
5801e04c3fSmrg        case V3D_QPU_INSTR_TYPE_BRANCH:
5901e04c3fSmrg                return true;
6001e04c3fSmrg        case V3D_QPU_INSTR_TYPE_ALU:
6101e04c3fSmrg                switch (inst->dst.file) {
6201e04c3fSmrg                case QFILE_TLBU:
6301e04c3fSmrg                        return true;
6401e04c3fSmrg                default:
6501e04c3fSmrg                        return inst->has_implicit_uniform;
6601e04c3fSmrg                }
6701e04c3fSmrg        }
6801e04c3fSmrg        return false;
6901e04c3fSmrg}
7001e04c3fSmrg
7101e04c3fSmrg/* The sideband uniform for textures gets stored after the normal ALU
7201e04c3fSmrg * arguments.
7301e04c3fSmrg */
7401e04c3fSmrgint
7501e04c3fSmrgvir_get_implicit_uniform_src(struct qinst *inst)
7601e04c3fSmrg{
7701e04c3fSmrg        if (!vir_has_implicit_uniform(inst))
7801e04c3fSmrg                return -1;
7901e04c3fSmrg        return vir_get_nsrc(inst) - 1;
8001e04c3fSmrg}
8101e04c3fSmrg
8201e04c3fSmrg/**
8301e04c3fSmrg * Returns whether the instruction has any side effects that must be
8401e04c3fSmrg * preserved.
8501e04c3fSmrg */
8601e04c3fSmrgbool
8701e04c3fSmrgvir_has_side_effects(struct v3d_compile *c, struct qinst *inst)
8801e04c3fSmrg{
8901e04c3fSmrg        switch (inst->qpu.type) {
9001e04c3fSmrg        case V3D_QPU_INSTR_TYPE_BRANCH:
9101e04c3fSmrg                return true;
9201e04c3fSmrg        case V3D_QPU_INSTR_TYPE_ALU:
9301e04c3fSmrg                switch (inst->qpu.alu.add.op) {
9401e04c3fSmrg                case V3D_QPU_A_SETREVF:
9501e04c3fSmrg                case V3D_QPU_A_SETMSF:
9601e04c3fSmrg                case V3D_QPU_A_VPMSETUP:
9701e04c3fSmrg                case V3D_QPU_A_STVPMV:
9801e04c3fSmrg                case V3D_QPU_A_STVPMD:
9901e04c3fSmrg                case V3D_QPU_A_STVPMP:
10001e04c3fSmrg                case V3D_QPU_A_VPMWT:
10101e04c3fSmrg                case V3D_QPU_A_TMUWT:
10201e04c3fSmrg                        return true;
10301e04c3fSmrg                default:
10401e04c3fSmrg                        break;
10501e04c3fSmrg                }
10601e04c3fSmrg
10701e04c3fSmrg                switch (inst->qpu.alu.mul.op) {
10801e04c3fSmrg                case V3D_QPU_M_MULTOP:
10901e04c3fSmrg                        return true;
11001e04c3fSmrg                default:
11101e04c3fSmrg                        break;
11201e04c3fSmrg                }
11301e04c3fSmrg        }
11401e04c3fSmrg
11501e04c3fSmrg        if (inst->qpu.sig.ldtmu ||
11601e04c3fSmrg            inst->qpu.sig.ldvary ||
11701e04c3fSmrg            inst->qpu.sig.wrtmuc ||
11801e04c3fSmrg            inst->qpu.sig.thrsw) {
11901e04c3fSmrg                return true;
12001e04c3fSmrg        }
12101e04c3fSmrg
12201e04c3fSmrg        return false;
12301e04c3fSmrg}
12401e04c3fSmrg
12501e04c3fSmrgbool
12601e04c3fSmrgvir_is_float_input(struct qinst *inst)
12701e04c3fSmrg{
12801e04c3fSmrg        /* XXX: More instrs */
12901e04c3fSmrg        switch (inst->qpu.type) {
13001e04c3fSmrg        case V3D_QPU_INSTR_TYPE_BRANCH:
13101e04c3fSmrg                return false;
13201e04c3fSmrg        case V3D_QPU_INSTR_TYPE_ALU:
13301e04c3fSmrg                switch (inst->qpu.alu.add.op) {
13401e04c3fSmrg                case V3D_QPU_A_FADD:
13501e04c3fSmrg                case V3D_QPU_A_FSUB:
13601e04c3fSmrg                case V3D_QPU_A_FMIN:
13701e04c3fSmrg                case V3D_QPU_A_FMAX:
13801e04c3fSmrg                case V3D_QPU_A_FTOIN:
13901e04c3fSmrg                        return true;
14001e04c3fSmrg                default:
14101e04c3fSmrg                        break;
14201e04c3fSmrg                }
14301e04c3fSmrg
14401e04c3fSmrg                switch (inst->qpu.alu.mul.op) {
14501e04c3fSmrg                case V3D_QPU_M_FMOV:
14601e04c3fSmrg                case V3D_QPU_M_VFMUL:
14701e04c3fSmrg                case V3D_QPU_M_FMUL:
14801e04c3fSmrg                        return true;
14901e04c3fSmrg                default:
15001e04c3fSmrg                        break;
15101e04c3fSmrg                }
15201e04c3fSmrg        }
15301e04c3fSmrg
15401e04c3fSmrg        return false;
15501e04c3fSmrg}
15601e04c3fSmrg
15701e04c3fSmrgbool
15801e04c3fSmrgvir_is_raw_mov(struct qinst *inst)
15901e04c3fSmrg{
16001e04c3fSmrg        if (inst->qpu.type != V3D_QPU_INSTR_TYPE_ALU ||
16101e04c3fSmrg            (inst->qpu.alu.mul.op != V3D_QPU_M_FMOV &&
16201e04c3fSmrg             inst->qpu.alu.mul.op != V3D_QPU_M_MOV)) {
16301e04c3fSmrg                return false;
16401e04c3fSmrg        }
16501e04c3fSmrg
16601e04c3fSmrg        if (inst->qpu.alu.add.output_pack != V3D_QPU_PACK_NONE ||
16701e04c3fSmrg            inst->qpu.alu.mul.output_pack != V3D_QPU_PACK_NONE) {
16801e04c3fSmrg                return false;
16901e04c3fSmrg        }
17001e04c3fSmrg
17101e04c3fSmrg        if (inst->qpu.flags.ac != V3D_QPU_COND_NONE ||
17201e04c3fSmrg            inst->qpu.flags.mc != V3D_QPU_COND_NONE)
17301e04c3fSmrg                return false;
17401e04c3fSmrg
17501e04c3fSmrg        return true;
17601e04c3fSmrg}
17701e04c3fSmrg
17801e04c3fSmrgbool
17901e04c3fSmrgvir_is_add(struct qinst *inst)
18001e04c3fSmrg{
18101e04c3fSmrg        return (inst->qpu.type == V3D_QPU_INSTR_TYPE_ALU &&
18201e04c3fSmrg                inst->qpu.alu.add.op != V3D_QPU_A_NOP);
18301e04c3fSmrg}
18401e04c3fSmrg
18501e04c3fSmrgbool
18601e04c3fSmrgvir_is_mul(struct qinst *inst)
18701e04c3fSmrg{
18801e04c3fSmrg        return (inst->qpu.type == V3D_QPU_INSTR_TYPE_ALU &&
18901e04c3fSmrg                inst->qpu.alu.mul.op != V3D_QPU_M_NOP);
19001e04c3fSmrg}
19101e04c3fSmrg
19201e04c3fSmrgbool
19301e04c3fSmrgvir_is_tex(struct qinst *inst)
19401e04c3fSmrg{
19501e04c3fSmrg        if (inst->dst.file == QFILE_MAGIC)
19601e04c3fSmrg                return v3d_qpu_magic_waddr_is_tmu(inst->dst.index);
19701e04c3fSmrg
19801e04c3fSmrg        if (inst->qpu.type == V3D_QPU_INSTR_TYPE_ALU &&
19901e04c3fSmrg            inst->qpu.alu.add.op == V3D_QPU_A_TMUWT) {
20001e04c3fSmrg                return true;
20101e04c3fSmrg        }
20201e04c3fSmrg
20301e04c3fSmrg        return false;
20401e04c3fSmrg}
20501e04c3fSmrg
20601e04c3fSmrgbool
20701e04c3fSmrgvir_depends_on_flags(struct qinst *inst)
20801e04c3fSmrg{
20901e04c3fSmrg        if (inst->qpu.type == V3D_QPU_INSTR_TYPE_BRANCH) {
21001e04c3fSmrg                return (inst->qpu.branch.cond != V3D_QPU_BRANCH_COND_ALWAYS);
21101e04c3fSmrg        } else {
21201e04c3fSmrg                return (inst->qpu.flags.ac != V3D_QPU_COND_NONE &&
21301e04c3fSmrg                        inst->qpu.flags.mc != V3D_QPU_COND_NONE);
21401e04c3fSmrg        }
21501e04c3fSmrg}
21601e04c3fSmrg
21701e04c3fSmrgbool
21801e04c3fSmrgvir_writes_r3(const struct v3d_device_info *devinfo, struct qinst *inst)
21901e04c3fSmrg{
22001e04c3fSmrg        for (int i = 0; i < vir_get_nsrc(inst); i++) {
22101e04c3fSmrg                switch (inst->src[i].file) {
22201e04c3fSmrg                case QFILE_VPM:
22301e04c3fSmrg                        return true;
22401e04c3fSmrg                default:
22501e04c3fSmrg                        break;
22601e04c3fSmrg                }
22701e04c3fSmrg        }
22801e04c3fSmrg
22901e04c3fSmrg        if (devinfo->ver < 41 && (inst->qpu.sig.ldvary ||
23001e04c3fSmrg                                  inst->qpu.sig.ldtlb ||
23101e04c3fSmrg                                  inst->qpu.sig.ldtlbu ||
23201e04c3fSmrg                                  inst->qpu.sig.ldvpm)) {
23301e04c3fSmrg                return true;
23401e04c3fSmrg        }
23501e04c3fSmrg
23601e04c3fSmrg        return false;
23701e04c3fSmrg}
23801e04c3fSmrg
23901e04c3fSmrgbool
24001e04c3fSmrgvir_writes_r4(const struct v3d_device_info *devinfo, struct qinst *inst)
24101e04c3fSmrg{
24201e04c3fSmrg        switch (inst->dst.file) {
24301e04c3fSmrg        case QFILE_MAGIC:
24401e04c3fSmrg                switch (inst->dst.index) {
24501e04c3fSmrg                case V3D_QPU_WADDR_RECIP:
24601e04c3fSmrg                case V3D_QPU_WADDR_RSQRT:
24701e04c3fSmrg                case V3D_QPU_WADDR_EXP:
24801e04c3fSmrg                case V3D_QPU_WADDR_LOG:
24901e04c3fSmrg                case V3D_QPU_WADDR_SIN:
25001e04c3fSmrg                        return true;
25101e04c3fSmrg                }
25201e04c3fSmrg                break;
25301e04c3fSmrg        default:
25401e04c3fSmrg                break;
25501e04c3fSmrg        }
25601e04c3fSmrg
25701e04c3fSmrg        if (devinfo->ver < 41 && inst->qpu.sig.ldtmu)
25801e04c3fSmrg                return true;
25901e04c3fSmrg
26001e04c3fSmrg        return false;
26101e04c3fSmrg}
26201e04c3fSmrg
26301e04c3fSmrgvoid
26401e04c3fSmrgvir_set_unpack(struct qinst *inst, int src,
26501e04c3fSmrg               enum v3d_qpu_input_unpack unpack)
26601e04c3fSmrg{
26701e04c3fSmrg        assert(src == 0 || src == 1);
26801e04c3fSmrg
26901e04c3fSmrg        if (vir_is_add(inst)) {
27001e04c3fSmrg                if (src == 0)
27101e04c3fSmrg                        inst->qpu.alu.add.a_unpack = unpack;
27201e04c3fSmrg                else
27301e04c3fSmrg                        inst->qpu.alu.add.b_unpack = unpack;
27401e04c3fSmrg        } else {
27501e04c3fSmrg                assert(vir_is_mul(inst));
27601e04c3fSmrg                if (src == 0)
27701e04c3fSmrg                        inst->qpu.alu.mul.a_unpack = unpack;
27801e04c3fSmrg                else
27901e04c3fSmrg                        inst->qpu.alu.mul.b_unpack = unpack;
28001e04c3fSmrg        }
28101e04c3fSmrg}
28201e04c3fSmrg
28301e04c3fSmrgvoid
28401e04c3fSmrgvir_set_cond(struct qinst *inst, enum v3d_qpu_cond cond)
28501e04c3fSmrg{
28601e04c3fSmrg        if (vir_is_add(inst)) {
28701e04c3fSmrg                inst->qpu.flags.ac = cond;
28801e04c3fSmrg        } else {
28901e04c3fSmrg                assert(vir_is_mul(inst));
29001e04c3fSmrg                inst->qpu.flags.mc = cond;
29101e04c3fSmrg        }
29201e04c3fSmrg}
29301e04c3fSmrg
29401e04c3fSmrgvoid
29501e04c3fSmrgvir_set_pf(struct qinst *inst, enum v3d_qpu_pf pf)
29601e04c3fSmrg{
29701e04c3fSmrg        if (vir_is_add(inst)) {
29801e04c3fSmrg                inst->qpu.flags.apf = pf;
29901e04c3fSmrg        } else {
30001e04c3fSmrg                assert(vir_is_mul(inst));
30101e04c3fSmrg                inst->qpu.flags.mpf = pf;
30201e04c3fSmrg        }
30301e04c3fSmrg}
30401e04c3fSmrg
30501e04c3fSmrg#if 0
30601e04c3fSmrguint8_t
30701e04c3fSmrgvir_channels_written(struct qinst *inst)
30801e04c3fSmrg{
30901e04c3fSmrg        if (vir_is_mul(inst)) {
31001e04c3fSmrg                switch (inst->dst.pack) {
31101e04c3fSmrg                case QPU_PACK_MUL_NOP:
31201e04c3fSmrg                case QPU_PACK_MUL_8888:
31301e04c3fSmrg                        return 0xf;
31401e04c3fSmrg                case QPU_PACK_MUL_8A:
31501e04c3fSmrg                        return 0x1;
31601e04c3fSmrg                case QPU_PACK_MUL_8B:
31701e04c3fSmrg                        return 0x2;
31801e04c3fSmrg                case QPU_PACK_MUL_8C:
31901e04c3fSmrg                        return 0x4;
32001e04c3fSmrg                case QPU_PACK_MUL_8D:
32101e04c3fSmrg                        return 0x8;
32201e04c3fSmrg                }
32301e04c3fSmrg        } else {
32401e04c3fSmrg                switch (inst->dst.pack) {
32501e04c3fSmrg                case QPU_PACK_A_NOP:
32601e04c3fSmrg                case QPU_PACK_A_8888:
32701e04c3fSmrg                case QPU_PACK_A_8888_SAT:
32801e04c3fSmrg                case QPU_PACK_A_32_SAT:
32901e04c3fSmrg                        return 0xf;
33001e04c3fSmrg                case QPU_PACK_A_8A:
33101e04c3fSmrg                case QPU_PACK_A_8A_SAT:
33201e04c3fSmrg                        return 0x1;
33301e04c3fSmrg                case QPU_PACK_A_8B:
33401e04c3fSmrg                case QPU_PACK_A_8B_SAT:
33501e04c3fSmrg                        return 0x2;
33601e04c3fSmrg                case QPU_PACK_A_8C:
33701e04c3fSmrg                case QPU_PACK_A_8C_SAT:
33801e04c3fSmrg                        return 0x4;
33901e04c3fSmrg                case QPU_PACK_A_8D:
34001e04c3fSmrg                case QPU_PACK_A_8D_SAT:
34101e04c3fSmrg                        return 0x8;
34201e04c3fSmrg                case QPU_PACK_A_16A:
34301e04c3fSmrg                case QPU_PACK_A_16A_SAT:
34401e04c3fSmrg                        return 0x3;
34501e04c3fSmrg                case QPU_PACK_A_16B:
34601e04c3fSmrg                case QPU_PACK_A_16B_SAT:
34701e04c3fSmrg                        return 0xc;
34801e04c3fSmrg                }
34901e04c3fSmrg        }
35001e04c3fSmrg        unreachable("Bad pack field");
35101e04c3fSmrg}
35201e04c3fSmrg#endif
35301e04c3fSmrg
35401e04c3fSmrgstruct qreg
35501e04c3fSmrgvir_get_temp(struct v3d_compile *c)
35601e04c3fSmrg{
35701e04c3fSmrg        struct qreg reg;
35801e04c3fSmrg
35901e04c3fSmrg        reg.file = QFILE_TEMP;
36001e04c3fSmrg        reg.index = c->num_temps++;
36101e04c3fSmrg
36201e04c3fSmrg        if (c->num_temps > c->defs_array_size) {
36301e04c3fSmrg                uint32_t old_size = c->defs_array_size;
36401e04c3fSmrg                c->defs_array_size = MAX2(old_size * 2, 16);
36501e04c3fSmrg
36601e04c3fSmrg                c->defs = reralloc(c, c->defs, struct qinst *,
36701e04c3fSmrg                                   c->defs_array_size);
36801e04c3fSmrg                memset(&c->defs[old_size], 0,
36901e04c3fSmrg                       sizeof(c->defs[0]) * (c->defs_array_size - old_size));
37001e04c3fSmrg
37101e04c3fSmrg                c->spillable = reralloc(c, c->spillable,
37201e04c3fSmrg                                        BITSET_WORD,
37301e04c3fSmrg                                        BITSET_WORDS(c->defs_array_size));
37401e04c3fSmrg                for (int i = old_size; i < c->defs_array_size; i++)
37501e04c3fSmrg                        BITSET_SET(c->spillable, i);
37601e04c3fSmrg        }
37701e04c3fSmrg
37801e04c3fSmrg        return reg;
37901e04c3fSmrg}
38001e04c3fSmrg
38101e04c3fSmrgstruct qinst *
38201e04c3fSmrgvir_add_inst(enum v3d_qpu_add_op op, struct qreg dst, struct qreg src0, struct qreg src1)
38301e04c3fSmrg{
38401e04c3fSmrg        struct qinst *inst = calloc(1, sizeof(*inst));
38501e04c3fSmrg
38601e04c3fSmrg        inst->qpu = v3d_qpu_nop();
38701e04c3fSmrg        inst->qpu.alu.add.op = op;
38801e04c3fSmrg
38901e04c3fSmrg        inst->dst = dst;
39001e04c3fSmrg        inst->src[0] = src0;
39101e04c3fSmrg        inst->src[1] = src1;
39201e04c3fSmrg        inst->uniform = ~0;
39301e04c3fSmrg
39401e04c3fSmrg        return inst;
39501e04c3fSmrg}
39601e04c3fSmrg
39701e04c3fSmrgstruct qinst *
39801e04c3fSmrgvir_mul_inst(enum v3d_qpu_mul_op op, struct qreg dst, struct qreg src0, struct qreg src1)
39901e04c3fSmrg{
40001e04c3fSmrg        struct qinst *inst = calloc(1, sizeof(*inst));
40101e04c3fSmrg
40201e04c3fSmrg        inst->qpu = v3d_qpu_nop();
40301e04c3fSmrg        inst->qpu.alu.mul.op = op;
40401e04c3fSmrg
40501e04c3fSmrg        inst->dst = dst;
40601e04c3fSmrg        inst->src[0] = src0;
40701e04c3fSmrg        inst->src[1] = src1;
40801e04c3fSmrg        inst->uniform = ~0;
40901e04c3fSmrg
41001e04c3fSmrg        return inst;
41101e04c3fSmrg}
41201e04c3fSmrg
41301e04c3fSmrgstruct qinst *
41401e04c3fSmrgvir_branch_inst(enum v3d_qpu_branch_cond cond, struct qreg src)
41501e04c3fSmrg{
41601e04c3fSmrg        struct qinst *inst = calloc(1, sizeof(*inst));
41701e04c3fSmrg
41801e04c3fSmrg        inst->qpu = v3d_qpu_nop();
41901e04c3fSmrg        inst->qpu.type = V3D_QPU_INSTR_TYPE_BRANCH;
42001e04c3fSmrg        inst->qpu.branch.cond = cond;
42101e04c3fSmrg        inst->qpu.branch.msfign = V3D_QPU_MSFIGN_NONE;
42201e04c3fSmrg        inst->qpu.branch.bdi = V3D_QPU_BRANCH_DEST_REL;
42301e04c3fSmrg        inst->qpu.branch.ub = true;
42401e04c3fSmrg        inst->qpu.branch.bdu = V3D_QPU_BRANCH_DEST_REL;
42501e04c3fSmrg
42601e04c3fSmrg        inst->dst = vir_reg(QFILE_NULL, 0);
42701e04c3fSmrg        inst->src[0] = src;
42801e04c3fSmrg        inst->uniform = ~0;
42901e04c3fSmrg
43001e04c3fSmrg        return inst;
43101e04c3fSmrg}
43201e04c3fSmrg
43301e04c3fSmrgstatic void
43401e04c3fSmrgvir_emit(struct v3d_compile *c, struct qinst *inst)
43501e04c3fSmrg{
43601e04c3fSmrg        switch (c->cursor.mode) {
43701e04c3fSmrg        case vir_cursor_add:
43801e04c3fSmrg                list_add(&inst->link, c->cursor.link);
43901e04c3fSmrg                break;
44001e04c3fSmrg        case vir_cursor_addtail:
44101e04c3fSmrg                list_addtail(&inst->link, c->cursor.link);
44201e04c3fSmrg                break;
44301e04c3fSmrg        }
44401e04c3fSmrg
44501e04c3fSmrg        c->cursor = vir_after_inst(inst);
44601e04c3fSmrg        c->live_intervals_valid = false;
44701e04c3fSmrg}
44801e04c3fSmrg
44901e04c3fSmrg/* Updates inst to write to a new temporary, emits it, and notes the def. */
45001e04c3fSmrgstruct qreg
45101e04c3fSmrgvir_emit_def(struct v3d_compile *c, struct qinst *inst)
45201e04c3fSmrg{
45301e04c3fSmrg        assert(inst->dst.file == QFILE_NULL);
45401e04c3fSmrg
45501e04c3fSmrg        /* If we're emitting an instruction that's a def, it had better be
45601e04c3fSmrg         * writing a register.
45701e04c3fSmrg         */
45801e04c3fSmrg        if (inst->qpu.type == V3D_QPU_INSTR_TYPE_ALU) {
45901e04c3fSmrg                assert(inst->qpu.alu.add.op == V3D_QPU_A_NOP ||
46001e04c3fSmrg                       v3d_qpu_add_op_has_dst(inst->qpu.alu.add.op));
46101e04c3fSmrg                assert(inst->qpu.alu.mul.op == V3D_QPU_M_NOP ||
46201e04c3fSmrg                       v3d_qpu_mul_op_has_dst(inst->qpu.alu.mul.op));
46301e04c3fSmrg        }
46401e04c3fSmrg
46501e04c3fSmrg        inst->dst = vir_get_temp(c);
46601e04c3fSmrg
46701e04c3fSmrg        if (inst->dst.file == QFILE_TEMP)
46801e04c3fSmrg                c->defs[inst->dst.index] = inst;
46901e04c3fSmrg
47001e04c3fSmrg        vir_emit(c, inst);
47101e04c3fSmrg
47201e04c3fSmrg        return inst->dst;
47301e04c3fSmrg}
47401e04c3fSmrg
47501e04c3fSmrgstruct qinst *
47601e04c3fSmrgvir_emit_nondef(struct v3d_compile *c, struct qinst *inst)
47701e04c3fSmrg{
47801e04c3fSmrg        if (inst->dst.file == QFILE_TEMP)
47901e04c3fSmrg                c->defs[inst->dst.index] = NULL;
48001e04c3fSmrg
48101e04c3fSmrg        vir_emit(c, inst);
48201e04c3fSmrg
48301e04c3fSmrg        return inst;
48401e04c3fSmrg}
48501e04c3fSmrg
48601e04c3fSmrgstruct qblock *
48701e04c3fSmrgvir_new_block(struct v3d_compile *c)
48801e04c3fSmrg{
48901e04c3fSmrg        struct qblock *block = rzalloc(c, struct qblock);
49001e04c3fSmrg
49101e04c3fSmrg        list_inithead(&block->instructions);
49201e04c3fSmrg
49301e04c3fSmrg        block->predecessors = _mesa_set_create(block,
49401e04c3fSmrg                                               _mesa_hash_pointer,
49501e04c3fSmrg                                               _mesa_key_pointer_equal);
49601e04c3fSmrg
49701e04c3fSmrg        block->index = c->next_block_index++;
49801e04c3fSmrg
49901e04c3fSmrg        return block;
50001e04c3fSmrg}
50101e04c3fSmrg
50201e04c3fSmrgvoid
50301e04c3fSmrgvir_set_emit_block(struct v3d_compile *c, struct qblock *block)
50401e04c3fSmrg{
50501e04c3fSmrg        c->cur_block = block;
50601e04c3fSmrg        c->cursor = vir_after_block(block);
50701e04c3fSmrg        list_addtail(&block->link, &c->blocks);
50801e04c3fSmrg}
50901e04c3fSmrg
51001e04c3fSmrgstruct qblock *
51101e04c3fSmrgvir_entry_block(struct v3d_compile *c)
51201e04c3fSmrg{
51301e04c3fSmrg        return list_first_entry(&c->blocks, struct qblock, link);
51401e04c3fSmrg}
51501e04c3fSmrg
51601e04c3fSmrgstruct qblock *
51701e04c3fSmrgvir_exit_block(struct v3d_compile *c)
51801e04c3fSmrg{
51901e04c3fSmrg        return list_last_entry(&c->blocks, struct qblock, link);
52001e04c3fSmrg}
52101e04c3fSmrg
52201e04c3fSmrgvoid
52301e04c3fSmrgvir_link_blocks(struct qblock *predecessor, struct qblock *successor)
52401e04c3fSmrg{
52501e04c3fSmrg        _mesa_set_add(successor->predecessors, predecessor);
52601e04c3fSmrg        if (predecessor->successors[0]) {
52701e04c3fSmrg                assert(!predecessor->successors[1]);
52801e04c3fSmrg                predecessor->successors[1] = successor;
52901e04c3fSmrg        } else {
53001e04c3fSmrg                predecessor->successors[0] = successor;
53101e04c3fSmrg        }
53201e04c3fSmrg}
53301e04c3fSmrg
53401e04c3fSmrgconst struct v3d_compiler *
53501e04c3fSmrgv3d_compiler_init(const struct v3d_device_info *devinfo)
53601e04c3fSmrg{
53701e04c3fSmrg        struct v3d_compiler *compiler = rzalloc(NULL, struct v3d_compiler);
53801e04c3fSmrg        if (!compiler)
53901e04c3fSmrg                return NULL;
54001e04c3fSmrg
54101e04c3fSmrg        compiler->devinfo = devinfo;
54201e04c3fSmrg
54301e04c3fSmrg        if (!vir_init_reg_sets(compiler)) {
54401e04c3fSmrg                ralloc_free(compiler);
54501e04c3fSmrg                return NULL;
54601e04c3fSmrg        }
54701e04c3fSmrg
54801e04c3fSmrg        return compiler;
54901e04c3fSmrg}
55001e04c3fSmrg
55101e04c3fSmrgvoid
55201e04c3fSmrgv3d_compiler_free(const struct v3d_compiler *compiler)
55301e04c3fSmrg{
55401e04c3fSmrg        ralloc_free((void *)compiler);
55501e04c3fSmrg}
55601e04c3fSmrg
55701e04c3fSmrgstatic struct v3d_compile *
55801e04c3fSmrgvir_compile_init(const struct v3d_compiler *compiler,
55901e04c3fSmrg                 struct v3d_key *key,
56001e04c3fSmrg                 nir_shader *s,
56101e04c3fSmrg                 int program_id, int variant_id)
56201e04c3fSmrg{
56301e04c3fSmrg        struct v3d_compile *c = rzalloc(NULL, struct v3d_compile);
56401e04c3fSmrg
56501e04c3fSmrg        c->compiler = compiler;
56601e04c3fSmrg        c->devinfo = compiler->devinfo;
56701e04c3fSmrg        c->key = key;
56801e04c3fSmrg        c->program_id = program_id;
56901e04c3fSmrg        c->variant_id = variant_id;
57001e04c3fSmrg        c->threads = 4;
57101e04c3fSmrg
57201e04c3fSmrg        s = nir_shader_clone(c, s);
57301e04c3fSmrg        c->s = s;
57401e04c3fSmrg
57501e04c3fSmrg        list_inithead(&c->blocks);
57601e04c3fSmrg        vir_set_emit_block(c, vir_new_block(c));
57701e04c3fSmrg
57801e04c3fSmrg        c->output_position_index = -1;
57901e04c3fSmrg        c->output_point_size_index = -1;
58001e04c3fSmrg        c->output_sample_mask_index = -1;
58101e04c3fSmrg
58201e04c3fSmrg        c->def_ht = _mesa_hash_table_create(c, _mesa_hash_pointer,
58301e04c3fSmrg                                            _mesa_key_pointer_equal);
58401e04c3fSmrg
58501e04c3fSmrg        return c;
58601e04c3fSmrg}
58701e04c3fSmrg
58801e04c3fSmrgstatic int
58901e04c3fSmrgtype_size_vec4(const struct glsl_type *type)
59001e04c3fSmrg{
59101e04c3fSmrg        return glsl_count_attribute_slots(type, false);
59201e04c3fSmrg}
59301e04c3fSmrg
59401e04c3fSmrgstatic void
59501e04c3fSmrgv3d_lower_nir(struct v3d_compile *c)
59601e04c3fSmrg{
59701e04c3fSmrg        struct nir_lower_tex_options tex_options = {
59801e04c3fSmrg                .lower_txd = true,
59901e04c3fSmrg                .lower_rect = false, /* XXX: Use this on V3D 3.x */
60001e04c3fSmrg                .lower_txp = ~0,
60101e04c3fSmrg                /* Apply swizzles to all samplers. */
60201e04c3fSmrg                .swizzle_result = ~0,
60301e04c3fSmrg        };
60401e04c3fSmrg
60501e04c3fSmrg        /* Lower the format swizzle and (for 32-bit returns)
60601e04c3fSmrg         * ARB_texture_swizzle-style swizzle.
60701e04c3fSmrg         */
60801e04c3fSmrg        for (int i = 0; i < ARRAY_SIZE(c->key->tex); i++) {
60901e04c3fSmrg                for (int j = 0; j < 4; j++)
61001e04c3fSmrg                        tex_options.swizzles[i][j] = c->key->tex[i].swizzle[j];
61101e04c3fSmrg
61201e04c3fSmrg                if (c->key->tex[i].clamp_s)
61301e04c3fSmrg                        tex_options.saturate_s |= 1 << i;
61401e04c3fSmrg                if (c->key->tex[i].clamp_t)
61501e04c3fSmrg                        tex_options.saturate_t |= 1 << i;
61601e04c3fSmrg                if (c->key->tex[i].clamp_r)
61701e04c3fSmrg                        tex_options.saturate_r |= 1 << i;
61801e04c3fSmrg        }
61901e04c3fSmrg
62001e04c3fSmrg        NIR_PASS_V(c->s, nir_lower_tex, &tex_options);
62101e04c3fSmrg}
62201e04c3fSmrg
62301e04c3fSmrgstatic void
62401e04c3fSmrgv3d_lower_nir_late(struct v3d_compile *c)
62501e04c3fSmrg{
62601e04c3fSmrg        NIR_PASS_V(c->s, v3d_nir_lower_io, c);
62701e04c3fSmrg        NIR_PASS_V(c->s, v3d_nir_lower_txf_ms, c);
62801e04c3fSmrg        NIR_PASS_V(c->s, nir_lower_idiv);
62901e04c3fSmrg}
63001e04c3fSmrg
63101e04c3fSmrgstatic void
63201e04c3fSmrgv3d_set_prog_data_uniforms(struct v3d_compile *c,
63301e04c3fSmrg                           struct v3d_prog_data *prog_data)
63401e04c3fSmrg{
63501e04c3fSmrg        int count = c->num_uniforms;
63601e04c3fSmrg        struct v3d_uniform_list *ulist = &prog_data->uniforms;
63701e04c3fSmrg
63801e04c3fSmrg        ulist->count = count;
63901e04c3fSmrg        ulist->data = ralloc_array(prog_data, uint32_t, count);
64001e04c3fSmrg        memcpy(ulist->data, c->uniform_data,
64101e04c3fSmrg               count * sizeof(*ulist->data));
64201e04c3fSmrg        ulist->contents = ralloc_array(prog_data, enum quniform_contents, count);
64301e04c3fSmrg        memcpy(ulist->contents, c->uniform_contents,
64401e04c3fSmrg               count * sizeof(*ulist->contents));
64501e04c3fSmrg}
64601e04c3fSmrg
64701e04c3fSmrg/* Copy the compiler UBO range state to the compiled shader, dropping out
64801e04c3fSmrg * arrays that were never referenced by an indirect load.
64901e04c3fSmrg *
65001e04c3fSmrg * (Note that QIR dead code elimination of an array access still leaves that
65101e04c3fSmrg * array alive, though)
65201e04c3fSmrg */
65301e04c3fSmrgstatic void
65401e04c3fSmrgv3d_set_prog_data_ubo(struct v3d_compile *c,
65501e04c3fSmrg                      struct v3d_prog_data *prog_data)
65601e04c3fSmrg{
65701e04c3fSmrg        if (!c->num_ubo_ranges)
65801e04c3fSmrg                return;
65901e04c3fSmrg
66001e04c3fSmrg        prog_data->num_ubo_ranges = 0;
66101e04c3fSmrg        prog_data->ubo_ranges = ralloc_array(prog_data, struct v3d_ubo_range,
66201e04c3fSmrg                                             c->num_ubo_ranges);
66301e04c3fSmrg        for (int i = 0; i < c->num_ubo_ranges; i++) {
66401e04c3fSmrg                if (!c->ubo_range_used[i])
66501e04c3fSmrg                        continue;
66601e04c3fSmrg
66701e04c3fSmrg                struct v3d_ubo_range *range = &c->ubo_ranges[i];
66801e04c3fSmrg                prog_data->ubo_ranges[prog_data->num_ubo_ranges++] = *range;
66901e04c3fSmrg                prog_data->ubo_size += range->size;
67001e04c3fSmrg        }
67101e04c3fSmrg
67201e04c3fSmrg        if (prog_data->ubo_size) {
67301e04c3fSmrg                if (V3D_DEBUG & V3D_DEBUG_SHADERDB) {
67401e04c3fSmrg                        fprintf(stderr, "SHADER-DB: %s prog %d/%d: %d UBO uniforms\n",
67501e04c3fSmrg                                vir_get_stage_name(c),
67601e04c3fSmrg                                c->program_id, c->variant_id,
67701e04c3fSmrg                                prog_data->ubo_size / 4);
67801e04c3fSmrg                }
67901e04c3fSmrg        }
68001e04c3fSmrg}
68101e04c3fSmrg
68201e04c3fSmrgstatic void
68301e04c3fSmrgv3d_set_prog_data(struct v3d_compile *c,
68401e04c3fSmrg                  struct v3d_prog_data *prog_data)
68501e04c3fSmrg{
68601e04c3fSmrg        prog_data->threads = c->threads;
68701e04c3fSmrg        prog_data->single_seg = !c->last_thrsw;
68801e04c3fSmrg        prog_data->spill_size = c->spill_size;
68901e04c3fSmrg
69001e04c3fSmrg        v3d_set_prog_data_uniforms(c, prog_data);
69101e04c3fSmrg        v3d_set_prog_data_ubo(c, prog_data);
69201e04c3fSmrg}
69301e04c3fSmrg
69401e04c3fSmrgstatic uint64_t *
69501e04c3fSmrgv3d_return_qpu_insts(struct v3d_compile *c, uint32_t *final_assembly_size)
69601e04c3fSmrg{
69701e04c3fSmrg        *final_assembly_size = c->qpu_inst_count * sizeof(uint64_t);
69801e04c3fSmrg
69901e04c3fSmrg        uint64_t *qpu_insts = malloc(*final_assembly_size);
70001e04c3fSmrg        if (!qpu_insts)
70101e04c3fSmrg                return NULL;
70201e04c3fSmrg
70301e04c3fSmrg        memcpy(qpu_insts, c->qpu_insts, *final_assembly_size);
70401e04c3fSmrg
70501e04c3fSmrg        vir_compile_destroy(c);
70601e04c3fSmrg
70701e04c3fSmrg        return qpu_insts;
70801e04c3fSmrg}
70901e04c3fSmrg
71001e04c3fSmrguint64_t *v3d_compile_vs(const struct v3d_compiler *compiler,
71101e04c3fSmrg                         struct v3d_vs_key *key,
71201e04c3fSmrg                         struct v3d_vs_prog_data *prog_data,
71301e04c3fSmrg                         nir_shader *s,
71401e04c3fSmrg                         int program_id, int variant_id,
71501e04c3fSmrg                         uint32_t *final_assembly_size)
71601e04c3fSmrg{
71701e04c3fSmrg        struct v3d_compile *c = vir_compile_init(compiler, &key->base, s,
71801e04c3fSmrg                                                 program_id, variant_id);
71901e04c3fSmrg
72001e04c3fSmrg        c->vs_key = key;
72101e04c3fSmrg
72201e04c3fSmrg        /* Split our I/O vars and dead code eliminate the unused
72301e04c3fSmrg         * components.
72401e04c3fSmrg         */
72501e04c3fSmrg        NIR_PASS_V(c->s, nir_lower_io_to_scalar_early,
72601e04c3fSmrg                   nir_var_shader_in | nir_var_shader_out);
72701e04c3fSmrg        uint64_t used_outputs[4] = {0};
72801e04c3fSmrg        for (int i = 0; i < c->vs_key->num_fs_inputs; i++) {
72901e04c3fSmrg                int slot = v3d_slot_get_slot(c->vs_key->fs_inputs[i]);
73001e04c3fSmrg                int comp = v3d_slot_get_component(c->vs_key->fs_inputs[i]);
73101e04c3fSmrg                used_outputs[comp] |= 1ull << slot;
73201e04c3fSmrg        }
73301e04c3fSmrg        NIR_PASS_V(c->s, nir_remove_unused_io_vars,
73401e04c3fSmrg                   &c->s->outputs, used_outputs, NULL); /* demotes to globals */
73501e04c3fSmrg        NIR_PASS_V(c->s, nir_lower_global_vars_to_local);
73601e04c3fSmrg        v3d_optimize_nir(c->s);
73701e04c3fSmrg        NIR_PASS_V(c->s, nir_remove_dead_variables, nir_var_shader_in);
73801e04c3fSmrg        NIR_PASS_V(c->s, nir_lower_io, nir_var_shader_in | nir_var_shader_out,
73901e04c3fSmrg                   type_size_vec4,
74001e04c3fSmrg                   (nir_lower_io_options)0);
74101e04c3fSmrg
74201e04c3fSmrg        v3d_lower_nir(c);
74301e04c3fSmrg
74401e04c3fSmrg        if (key->clamp_color)
74501e04c3fSmrg                NIR_PASS_V(c->s, nir_lower_clamp_color_outputs);
74601e04c3fSmrg
74701e04c3fSmrg        if (key->base.ucp_enables) {
74801e04c3fSmrg                NIR_PASS_V(c->s, nir_lower_clip_vs, key->base.ucp_enables);
74901e04c3fSmrg                NIR_PASS_V(c->s, nir_lower_io_to_scalar,
75001e04c3fSmrg                           nir_var_shader_out);
75101e04c3fSmrg        }
75201e04c3fSmrg
75301e04c3fSmrg        /* Note: VS output scalarizing must happen after nir_lower_clip_vs. */
75401e04c3fSmrg        NIR_PASS_V(c->s, nir_lower_io_to_scalar, nir_var_shader_out);
75501e04c3fSmrg
75601e04c3fSmrg        v3d_lower_nir_late(c);
75701e04c3fSmrg        v3d_optimize_nir(c->s);
75801e04c3fSmrg        NIR_PASS_V(c->s, nir_convert_from_ssa, true);
75901e04c3fSmrg
76001e04c3fSmrg        v3d_nir_to_vir(c);
76101e04c3fSmrg
76201e04c3fSmrg        v3d_set_prog_data(c, &prog_data->base);
76301e04c3fSmrg
76401e04c3fSmrg        prog_data->base.num_inputs = c->num_inputs;
76501e04c3fSmrg
76601e04c3fSmrg        /* The vertex data gets format converted by the VPM so that
76701e04c3fSmrg         * each attribute channel takes up a VPM column.  Precompute
76801e04c3fSmrg         * the sizes for the shader record.
76901e04c3fSmrg         */
77001e04c3fSmrg        for (int i = 0; i < ARRAY_SIZE(prog_data->vattr_sizes); i++) {
77101e04c3fSmrg                prog_data->vattr_sizes[i] = c->vattr_sizes[i];
77201e04c3fSmrg                prog_data->vpm_input_size += c->vattr_sizes[i];
77301e04c3fSmrg        }
77401e04c3fSmrg
77501e04c3fSmrg        prog_data->uses_vid = (s->info.system_values_read &
77601e04c3fSmrg                               (1ull << SYSTEM_VALUE_VERTEX_ID));
77701e04c3fSmrg        prog_data->uses_iid = (s->info.system_values_read &
77801e04c3fSmrg                               (1ull << SYSTEM_VALUE_INSTANCE_ID));
77901e04c3fSmrg
78001e04c3fSmrg        if (prog_data->uses_vid)
78101e04c3fSmrg                prog_data->vpm_input_size++;
78201e04c3fSmrg        if (prog_data->uses_iid)
78301e04c3fSmrg                prog_data->vpm_input_size++;
78401e04c3fSmrg
78501e04c3fSmrg        /* Input/output segment size are in sectors (8 rows of 32 bits per
78601e04c3fSmrg         * channel).
78701e04c3fSmrg         */
78801e04c3fSmrg        prog_data->vpm_input_size = align(prog_data->vpm_input_size, 8) / 8;
78901e04c3fSmrg        prog_data->vpm_output_size = align(c->num_vpm_writes, 8) / 8;
79001e04c3fSmrg
79101e04c3fSmrg        /* Compute VCM cache size.  We set up our program to take up less than
79201e04c3fSmrg         * half of the VPM, so that any set of bin and render programs won't
79301e04c3fSmrg         * run out of space.  We need space for at least one input segment,
79401e04c3fSmrg         * and then allocate the rest to output segments (one for the current
79501e04c3fSmrg         * program, the rest to VCM).  The valid range of the VCM cache size
79601e04c3fSmrg         * field is 1-4 16-vertex batches, but GFXH-1744 limits us to 2-4
79701e04c3fSmrg         * batches.
79801e04c3fSmrg         */
79901e04c3fSmrg        assert(c->devinfo->vpm_size);
80001e04c3fSmrg        int sector_size = 16 * sizeof(uint32_t) * 8;
80101e04c3fSmrg        int vpm_size_in_sectors = c->devinfo->vpm_size / sector_size;
80201e04c3fSmrg        int half_vpm = vpm_size_in_sectors / 2;
80301e04c3fSmrg        int vpm_output_sectors = half_vpm - prog_data->vpm_input_size;
80401e04c3fSmrg        int vpm_output_batches = vpm_output_sectors / prog_data->vpm_output_size;
80501e04c3fSmrg        assert(vpm_output_batches >= 2);
80601e04c3fSmrg        prog_data->vcm_cache_size = CLAMP(vpm_output_batches - 1, 2, 4);
80701e04c3fSmrg
80801e04c3fSmrg        return v3d_return_qpu_insts(c, final_assembly_size);
80901e04c3fSmrg}
81001e04c3fSmrg
81101e04c3fSmrgstatic void
81201e04c3fSmrgv3d_set_fs_prog_data_inputs(struct v3d_compile *c,
81301e04c3fSmrg                            struct v3d_fs_prog_data *prog_data)
81401e04c3fSmrg{
81501e04c3fSmrg        prog_data->base.num_inputs = c->num_inputs;
81601e04c3fSmrg        memcpy(prog_data->input_slots, c->input_slots,
81701e04c3fSmrg               c->num_inputs * sizeof(*c->input_slots));
81801e04c3fSmrg
81901e04c3fSmrg        STATIC_ASSERT(ARRAY_SIZE(prog_data->flat_shade_flags) >
82001e04c3fSmrg                      (V3D_MAX_FS_INPUTS - 1) / 24);
82101e04c3fSmrg        for (int i = 0; i < V3D_MAX_FS_INPUTS; i++) {
82201e04c3fSmrg                if (BITSET_TEST(c->flat_shade_flags, i))
82301e04c3fSmrg                        prog_data->flat_shade_flags[i / 24] |= 1 << (i % 24);
82401e04c3fSmrg
82501e04c3fSmrg                if (BITSET_TEST(c->noperspective_flags, i))
82601e04c3fSmrg                        prog_data->noperspective_flags[i / 24] |= 1 << (i % 24);
82701e04c3fSmrg
82801e04c3fSmrg                if (BITSET_TEST(c->centroid_flags, i))
82901e04c3fSmrg                        prog_data->centroid_flags[i / 24] |= 1 << (i % 24);
83001e04c3fSmrg        }
83101e04c3fSmrg}
83201e04c3fSmrg
83301e04c3fSmrgstatic void
83401e04c3fSmrgv3d_fixup_fs_output_types(struct v3d_compile *c)
83501e04c3fSmrg{
83601e04c3fSmrg        nir_foreach_variable(var, &c->s->outputs) {
83701e04c3fSmrg                uint32_t mask = 0;
83801e04c3fSmrg
83901e04c3fSmrg                switch (var->data.location) {
84001e04c3fSmrg                case FRAG_RESULT_COLOR:
84101e04c3fSmrg                        mask = ~0;
84201e04c3fSmrg                        break;
84301e04c3fSmrg                case FRAG_RESULT_DATA0:
84401e04c3fSmrg                case FRAG_RESULT_DATA1:
84501e04c3fSmrg                case FRAG_RESULT_DATA2:
84601e04c3fSmrg                case FRAG_RESULT_DATA3:
84701e04c3fSmrg                        mask = 1 << (var->data.location - FRAG_RESULT_DATA0);
84801e04c3fSmrg                        break;
84901e04c3fSmrg                }
85001e04c3fSmrg
85101e04c3fSmrg                if (c->fs_key->int_color_rb & mask) {
85201e04c3fSmrg                        var->type =
85301e04c3fSmrg                                glsl_vector_type(GLSL_TYPE_INT,
85401e04c3fSmrg                                                 glsl_get_components(var->type));
85501e04c3fSmrg                } else if (c->fs_key->uint_color_rb & mask) {
85601e04c3fSmrg                        var->type =
85701e04c3fSmrg                                glsl_vector_type(GLSL_TYPE_UINT,
85801e04c3fSmrg                                                 glsl_get_components(var->type));
85901e04c3fSmrg                }
86001e04c3fSmrg        }
86101e04c3fSmrg}
86201e04c3fSmrg
86301e04c3fSmrguint64_t *v3d_compile_fs(const struct v3d_compiler *compiler,
86401e04c3fSmrg                         struct v3d_fs_key *key,
86501e04c3fSmrg                         struct v3d_fs_prog_data *prog_data,
86601e04c3fSmrg                         nir_shader *s,
86701e04c3fSmrg                         int program_id, int variant_id,
86801e04c3fSmrg                         uint32_t *final_assembly_size)
86901e04c3fSmrg{
87001e04c3fSmrg        struct v3d_compile *c = vir_compile_init(compiler, &key->base, s,
87101e04c3fSmrg                                                 program_id, variant_id);
87201e04c3fSmrg
87301e04c3fSmrg        c->fs_key = key;
87401e04c3fSmrg
87501e04c3fSmrg        if (key->int_color_rb || key->uint_color_rb)
87601e04c3fSmrg                v3d_fixup_fs_output_types(c);
87701e04c3fSmrg
87801e04c3fSmrg        v3d_lower_nir(c);
87901e04c3fSmrg
88001e04c3fSmrg        if (key->light_twoside)
88101e04c3fSmrg                NIR_PASS_V(c->s, nir_lower_two_sided_color);
88201e04c3fSmrg
88301e04c3fSmrg        if (key->clamp_color)
88401e04c3fSmrg                NIR_PASS_V(c->s, nir_lower_clamp_color_outputs);
88501e04c3fSmrg
88601e04c3fSmrg        if (key->alpha_test) {
88701e04c3fSmrg                NIR_PASS_V(c->s, nir_lower_alpha_test, key->alpha_test_func,
88801e04c3fSmrg                           false);
88901e04c3fSmrg        }
89001e04c3fSmrg
89101e04c3fSmrg        if (key->base.ucp_enables)
89201e04c3fSmrg                NIR_PASS_V(c->s, nir_lower_clip_fs, key->base.ucp_enables);
89301e04c3fSmrg
89401e04c3fSmrg        /* Note: FS input scalarizing must happen after
89501e04c3fSmrg         * nir_lower_two_sided_color, which only handles a vec4 at a time.
89601e04c3fSmrg         */
89701e04c3fSmrg        NIR_PASS_V(c->s, nir_lower_io_to_scalar, nir_var_shader_in);
89801e04c3fSmrg
89901e04c3fSmrg        v3d_lower_nir_late(c);
90001e04c3fSmrg        v3d_optimize_nir(c->s);
90101e04c3fSmrg        NIR_PASS_V(c->s, nir_convert_from_ssa, true);
90201e04c3fSmrg
90301e04c3fSmrg        v3d_nir_to_vir(c);
90401e04c3fSmrg
90501e04c3fSmrg        v3d_set_prog_data(c, &prog_data->base);
90601e04c3fSmrg        v3d_set_fs_prog_data_inputs(c, prog_data);
90701e04c3fSmrg        prog_data->writes_z = (c->s->info.outputs_written &
90801e04c3fSmrg                               (1 << FRAG_RESULT_DEPTH));
90901e04c3fSmrg        prog_data->discard = (c->s->info.fs.uses_discard ||
91001e04c3fSmrg                              c->fs_key->sample_alpha_to_coverage);
91101e04c3fSmrg        prog_data->uses_center_w = c->uses_center_w;
91201e04c3fSmrg
91301e04c3fSmrg        return v3d_return_qpu_insts(c, final_assembly_size);
91401e04c3fSmrg}
91501e04c3fSmrg
91601e04c3fSmrgvoid
91701e04c3fSmrgvir_remove_instruction(struct v3d_compile *c, struct qinst *qinst)
91801e04c3fSmrg{
91901e04c3fSmrg        if (qinst->dst.file == QFILE_TEMP)
92001e04c3fSmrg                c->defs[qinst->dst.index] = NULL;
92101e04c3fSmrg
92201e04c3fSmrg        assert(&qinst->link != c->cursor.link);
92301e04c3fSmrg
92401e04c3fSmrg        list_del(&qinst->link);
92501e04c3fSmrg        free(qinst);
92601e04c3fSmrg
92701e04c3fSmrg        c->live_intervals_valid = false;
92801e04c3fSmrg}
92901e04c3fSmrg
93001e04c3fSmrgstruct qreg
93101e04c3fSmrgvir_follow_movs(struct v3d_compile *c, struct qreg reg)
93201e04c3fSmrg{
93301e04c3fSmrg        /* XXX
93401e04c3fSmrg        int pack = reg.pack;
93501e04c3fSmrg
93601e04c3fSmrg        while (reg.file == QFILE_TEMP &&
93701e04c3fSmrg               c->defs[reg.index] &&
93801e04c3fSmrg               (c->defs[reg.index]->op == QOP_MOV ||
93901e04c3fSmrg                c->defs[reg.index]->op == QOP_FMOV) &&
94001e04c3fSmrg               !c->defs[reg.index]->dst.pack &&
94101e04c3fSmrg               !c->defs[reg.index]->src[0].pack) {
94201e04c3fSmrg                reg = c->defs[reg.index]->src[0];
94301e04c3fSmrg        }
94401e04c3fSmrg
94501e04c3fSmrg        reg.pack = pack;
94601e04c3fSmrg        */
94701e04c3fSmrg        return reg;
94801e04c3fSmrg}
94901e04c3fSmrg
95001e04c3fSmrgvoid
95101e04c3fSmrgvir_compile_destroy(struct v3d_compile *c)
95201e04c3fSmrg{
95301e04c3fSmrg        /* Defuse the assert that we aren't removing the cursor's instruction.
95401e04c3fSmrg         */
95501e04c3fSmrg        c->cursor.link = NULL;
95601e04c3fSmrg
95701e04c3fSmrg        vir_for_each_block(block, c) {
95801e04c3fSmrg                while (!list_empty(&block->instructions)) {
95901e04c3fSmrg                        struct qinst *qinst =
96001e04c3fSmrg                                list_first_entry(&block->instructions,
96101e04c3fSmrg                                                 struct qinst, link);
96201e04c3fSmrg                        vir_remove_instruction(c, qinst);
96301e04c3fSmrg                }
96401e04c3fSmrg        }
96501e04c3fSmrg
96601e04c3fSmrg        ralloc_free(c);
96701e04c3fSmrg}
96801e04c3fSmrg
96901e04c3fSmrgstruct qreg
97001e04c3fSmrgvir_uniform(struct v3d_compile *c,
97101e04c3fSmrg            enum quniform_contents contents,
97201e04c3fSmrg            uint32_t data)
97301e04c3fSmrg{
97401e04c3fSmrg        for (int i = 0; i < c->num_uniforms; i++) {
97501e04c3fSmrg                if (c->uniform_contents[i] == contents &&
97601e04c3fSmrg                    c->uniform_data[i] == data) {
97701e04c3fSmrg                        return vir_reg(QFILE_UNIF, i);
97801e04c3fSmrg                }
97901e04c3fSmrg        }
98001e04c3fSmrg
98101e04c3fSmrg        uint32_t uniform = c->num_uniforms++;
98201e04c3fSmrg
98301e04c3fSmrg        if (uniform >= c->uniform_array_size) {
98401e04c3fSmrg                c->uniform_array_size = MAX2(MAX2(16, uniform + 1),
98501e04c3fSmrg                                             c->uniform_array_size * 2);
98601e04c3fSmrg
98701e04c3fSmrg                c->uniform_data = reralloc(c, c->uniform_data,
98801e04c3fSmrg                                           uint32_t,
98901e04c3fSmrg                                           c->uniform_array_size);
99001e04c3fSmrg                c->uniform_contents = reralloc(c, c->uniform_contents,
99101e04c3fSmrg                                               enum quniform_contents,
99201e04c3fSmrg                                               c->uniform_array_size);
99301e04c3fSmrg        }
99401e04c3fSmrg
99501e04c3fSmrg        c->uniform_contents[uniform] = contents;
99601e04c3fSmrg        c->uniform_data[uniform] = data;
99701e04c3fSmrg
99801e04c3fSmrg        return vir_reg(QFILE_UNIF, uniform);
99901e04c3fSmrg}
100001e04c3fSmrg
100101e04c3fSmrgstatic bool
100201e04c3fSmrgvir_can_set_flags(struct v3d_compile *c, struct qinst *inst)
100301e04c3fSmrg{
100401e04c3fSmrg        if (c->devinfo->ver >= 40 && (v3d_qpu_reads_vpm(&inst->qpu) ||
100501e04c3fSmrg                                      v3d_qpu_uses_sfu(&inst->qpu))) {
100601e04c3fSmrg                return false;
100701e04c3fSmrg        }
100801e04c3fSmrg
100901e04c3fSmrg        return true;
101001e04c3fSmrg}
101101e04c3fSmrg
101201e04c3fSmrgvoid
101301e04c3fSmrgvir_PF(struct v3d_compile *c, struct qreg src, enum v3d_qpu_pf pf)
101401e04c3fSmrg{
101501e04c3fSmrg        struct qinst *last_inst = NULL;
101601e04c3fSmrg
101701e04c3fSmrg        if (!list_empty(&c->cur_block->instructions)) {
101801e04c3fSmrg                last_inst = (struct qinst *)c->cur_block->instructions.prev;
101901e04c3fSmrg
102001e04c3fSmrg                /* Can't stuff the PF into the last last inst if our cursor
102101e04c3fSmrg                 * isn't pointing after it.
102201e04c3fSmrg                 */
102301e04c3fSmrg                struct vir_cursor after_inst = vir_after_inst(last_inst);
102401e04c3fSmrg                if (c->cursor.mode != after_inst.mode ||
102501e04c3fSmrg                    c->cursor.link != after_inst.link)
102601e04c3fSmrg                        last_inst = NULL;
102701e04c3fSmrg        }
102801e04c3fSmrg
102901e04c3fSmrg        if (src.file != QFILE_TEMP ||
103001e04c3fSmrg            !c->defs[src.index] ||
103101e04c3fSmrg            last_inst != c->defs[src.index] ||
103201e04c3fSmrg            !vir_can_set_flags(c, last_inst)) {
103301e04c3fSmrg                /* XXX: Make the MOV be the appropriate type */
103401e04c3fSmrg                last_inst = vir_MOV_dest(c, vir_reg(QFILE_NULL, 0), src);
103501e04c3fSmrg        }
103601e04c3fSmrg
103701e04c3fSmrg        vir_set_pf(last_inst, pf);
103801e04c3fSmrg}
103901e04c3fSmrg
104001e04c3fSmrg#define OPTPASS(func)                                                   \
104101e04c3fSmrg        do {                                                            \
104201e04c3fSmrg                bool stage_progress = func(c);                          \
104301e04c3fSmrg                if (stage_progress) {                                   \
104401e04c3fSmrg                        progress = true;                                \
104501e04c3fSmrg                        if (print_opt_debug) {                          \
104601e04c3fSmrg                                fprintf(stderr,                         \
104701e04c3fSmrg                                        "VIR opt pass %2d: %s progress\n", \
104801e04c3fSmrg                                        pass, #func);                   \
104901e04c3fSmrg                        }                                               \
105001e04c3fSmrg                        /*XXX vir_validate(c);*/                        \
105101e04c3fSmrg                }                                                       \
105201e04c3fSmrg        } while (0)
105301e04c3fSmrg
105401e04c3fSmrgvoid
105501e04c3fSmrgvir_optimize(struct v3d_compile *c)
105601e04c3fSmrg{
105701e04c3fSmrg        bool print_opt_debug = false;
105801e04c3fSmrg        int pass = 1;
105901e04c3fSmrg
106001e04c3fSmrg        while (true) {
106101e04c3fSmrg                bool progress = false;
106201e04c3fSmrg
106301e04c3fSmrg                OPTPASS(vir_opt_copy_propagate);
106401e04c3fSmrg                OPTPASS(vir_opt_dead_code);
106501e04c3fSmrg                OPTPASS(vir_opt_small_immediates);
106601e04c3fSmrg
106701e04c3fSmrg                if (!progress)
106801e04c3fSmrg                        break;
106901e04c3fSmrg
107001e04c3fSmrg                pass++;
107101e04c3fSmrg        }
107201e04c3fSmrg}
107301e04c3fSmrg
107401e04c3fSmrgconst char *
107501e04c3fSmrgvir_get_stage_name(struct v3d_compile *c)
107601e04c3fSmrg{
107701e04c3fSmrg        if (c->vs_key && c->vs_key->is_coord)
107801e04c3fSmrg                return "MESA_SHADER_COORD";
107901e04c3fSmrg        else
108001e04c3fSmrg                return gl_shader_stage_name(c->s->info.stage);
108101e04c3fSmrg}
1082