vir.c revision 01e04c3f
101e04c3fSmrg/* 201e04c3fSmrg * Copyright © 2016-2017 Broadcom 301e04c3fSmrg * 401e04c3fSmrg * Permission is hereby granted, free of charge, to any person obtaining a 501e04c3fSmrg * copy of this software and associated documentation files (the "Software"), 601e04c3fSmrg * to deal in the Software without restriction, including without limitation 701e04c3fSmrg * the rights to use, copy, modify, merge, publish, distribute, sublicense, 801e04c3fSmrg * and/or sell copies of the Software, and to permit persons to whom the 901e04c3fSmrg * Software is furnished to do so, subject to the following conditions: 1001e04c3fSmrg * 1101e04c3fSmrg * The above copyright notice and this permission notice (including the next 1201e04c3fSmrg * paragraph) shall be included in all copies or substantial portions of the 1301e04c3fSmrg * Software. 1401e04c3fSmrg * 1501e04c3fSmrg * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR 1601e04c3fSmrg * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, 1701e04c3fSmrg * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL 1801e04c3fSmrg * THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER 1901e04c3fSmrg * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING 2001e04c3fSmrg * FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS 2101e04c3fSmrg * IN THE SOFTWARE. 2201e04c3fSmrg */ 2301e04c3fSmrg 2401e04c3fSmrg#include "broadcom/common/v3d_device_info.h" 2501e04c3fSmrg#include "v3d_compiler.h" 2601e04c3fSmrg 2701e04c3fSmrgint 2801e04c3fSmrgvir_get_non_sideband_nsrc(struct qinst *inst) 2901e04c3fSmrg{ 3001e04c3fSmrg switch (inst->qpu.type) { 3101e04c3fSmrg case V3D_QPU_INSTR_TYPE_BRANCH: 3201e04c3fSmrg return 0; 3301e04c3fSmrg case V3D_QPU_INSTR_TYPE_ALU: 3401e04c3fSmrg if (inst->qpu.alu.add.op != V3D_QPU_A_NOP) 3501e04c3fSmrg return v3d_qpu_add_op_num_src(inst->qpu.alu.add.op); 3601e04c3fSmrg else 3701e04c3fSmrg return v3d_qpu_mul_op_num_src(inst->qpu.alu.mul.op); 3801e04c3fSmrg } 3901e04c3fSmrg 4001e04c3fSmrg return 0; 4101e04c3fSmrg} 4201e04c3fSmrg 4301e04c3fSmrgint 4401e04c3fSmrgvir_get_nsrc(struct qinst *inst) 4501e04c3fSmrg{ 4601e04c3fSmrg int nsrc = vir_get_non_sideband_nsrc(inst); 4701e04c3fSmrg 4801e04c3fSmrg if (vir_has_implicit_uniform(inst)) 4901e04c3fSmrg nsrc++; 5001e04c3fSmrg 5101e04c3fSmrg return nsrc; 5201e04c3fSmrg} 5301e04c3fSmrg 5401e04c3fSmrgbool 5501e04c3fSmrgvir_has_implicit_uniform(struct qinst *inst) 5601e04c3fSmrg{ 5701e04c3fSmrg switch (inst->qpu.type) { 5801e04c3fSmrg case V3D_QPU_INSTR_TYPE_BRANCH: 5901e04c3fSmrg return true; 6001e04c3fSmrg case V3D_QPU_INSTR_TYPE_ALU: 6101e04c3fSmrg switch (inst->dst.file) { 6201e04c3fSmrg case QFILE_TLBU: 6301e04c3fSmrg return true; 6401e04c3fSmrg default: 6501e04c3fSmrg return inst->has_implicit_uniform; 6601e04c3fSmrg } 6701e04c3fSmrg } 6801e04c3fSmrg return false; 6901e04c3fSmrg} 7001e04c3fSmrg 7101e04c3fSmrg/* The sideband uniform for textures gets stored after the normal ALU 7201e04c3fSmrg * arguments. 7301e04c3fSmrg */ 7401e04c3fSmrgint 7501e04c3fSmrgvir_get_implicit_uniform_src(struct qinst *inst) 7601e04c3fSmrg{ 7701e04c3fSmrg if (!vir_has_implicit_uniform(inst)) 7801e04c3fSmrg return -1; 7901e04c3fSmrg return vir_get_nsrc(inst) - 1; 8001e04c3fSmrg} 8101e04c3fSmrg 8201e04c3fSmrg/** 8301e04c3fSmrg * Returns whether the instruction has any side effects that must be 8401e04c3fSmrg * preserved. 8501e04c3fSmrg */ 8601e04c3fSmrgbool 8701e04c3fSmrgvir_has_side_effects(struct v3d_compile *c, struct qinst *inst) 8801e04c3fSmrg{ 8901e04c3fSmrg switch (inst->qpu.type) { 9001e04c3fSmrg case V3D_QPU_INSTR_TYPE_BRANCH: 9101e04c3fSmrg return true; 9201e04c3fSmrg case V3D_QPU_INSTR_TYPE_ALU: 9301e04c3fSmrg switch (inst->qpu.alu.add.op) { 9401e04c3fSmrg case V3D_QPU_A_SETREVF: 9501e04c3fSmrg case V3D_QPU_A_SETMSF: 9601e04c3fSmrg case V3D_QPU_A_VPMSETUP: 9701e04c3fSmrg case V3D_QPU_A_STVPMV: 9801e04c3fSmrg case V3D_QPU_A_STVPMD: 9901e04c3fSmrg case V3D_QPU_A_STVPMP: 10001e04c3fSmrg case V3D_QPU_A_VPMWT: 10101e04c3fSmrg case V3D_QPU_A_TMUWT: 10201e04c3fSmrg return true; 10301e04c3fSmrg default: 10401e04c3fSmrg break; 10501e04c3fSmrg } 10601e04c3fSmrg 10701e04c3fSmrg switch (inst->qpu.alu.mul.op) { 10801e04c3fSmrg case V3D_QPU_M_MULTOP: 10901e04c3fSmrg return true; 11001e04c3fSmrg default: 11101e04c3fSmrg break; 11201e04c3fSmrg } 11301e04c3fSmrg } 11401e04c3fSmrg 11501e04c3fSmrg if (inst->qpu.sig.ldtmu || 11601e04c3fSmrg inst->qpu.sig.ldvary || 11701e04c3fSmrg inst->qpu.sig.wrtmuc || 11801e04c3fSmrg inst->qpu.sig.thrsw) { 11901e04c3fSmrg return true; 12001e04c3fSmrg } 12101e04c3fSmrg 12201e04c3fSmrg return false; 12301e04c3fSmrg} 12401e04c3fSmrg 12501e04c3fSmrgbool 12601e04c3fSmrgvir_is_float_input(struct qinst *inst) 12701e04c3fSmrg{ 12801e04c3fSmrg /* XXX: More instrs */ 12901e04c3fSmrg switch (inst->qpu.type) { 13001e04c3fSmrg case V3D_QPU_INSTR_TYPE_BRANCH: 13101e04c3fSmrg return false; 13201e04c3fSmrg case V3D_QPU_INSTR_TYPE_ALU: 13301e04c3fSmrg switch (inst->qpu.alu.add.op) { 13401e04c3fSmrg case V3D_QPU_A_FADD: 13501e04c3fSmrg case V3D_QPU_A_FSUB: 13601e04c3fSmrg case V3D_QPU_A_FMIN: 13701e04c3fSmrg case V3D_QPU_A_FMAX: 13801e04c3fSmrg case V3D_QPU_A_FTOIN: 13901e04c3fSmrg return true; 14001e04c3fSmrg default: 14101e04c3fSmrg break; 14201e04c3fSmrg } 14301e04c3fSmrg 14401e04c3fSmrg switch (inst->qpu.alu.mul.op) { 14501e04c3fSmrg case V3D_QPU_M_FMOV: 14601e04c3fSmrg case V3D_QPU_M_VFMUL: 14701e04c3fSmrg case V3D_QPU_M_FMUL: 14801e04c3fSmrg return true; 14901e04c3fSmrg default: 15001e04c3fSmrg break; 15101e04c3fSmrg } 15201e04c3fSmrg } 15301e04c3fSmrg 15401e04c3fSmrg return false; 15501e04c3fSmrg} 15601e04c3fSmrg 15701e04c3fSmrgbool 15801e04c3fSmrgvir_is_raw_mov(struct qinst *inst) 15901e04c3fSmrg{ 16001e04c3fSmrg if (inst->qpu.type != V3D_QPU_INSTR_TYPE_ALU || 16101e04c3fSmrg (inst->qpu.alu.mul.op != V3D_QPU_M_FMOV && 16201e04c3fSmrg inst->qpu.alu.mul.op != V3D_QPU_M_MOV)) { 16301e04c3fSmrg return false; 16401e04c3fSmrg } 16501e04c3fSmrg 16601e04c3fSmrg if (inst->qpu.alu.add.output_pack != V3D_QPU_PACK_NONE || 16701e04c3fSmrg inst->qpu.alu.mul.output_pack != V3D_QPU_PACK_NONE) { 16801e04c3fSmrg return false; 16901e04c3fSmrg } 17001e04c3fSmrg 17101e04c3fSmrg if (inst->qpu.flags.ac != V3D_QPU_COND_NONE || 17201e04c3fSmrg inst->qpu.flags.mc != V3D_QPU_COND_NONE) 17301e04c3fSmrg return false; 17401e04c3fSmrg 17501e04c3fSmrg return true; 17601e04c3fSmrg} 17701e04c3fSmrg 17801e04c3fSmrgbool 17901e04c3fSmrgvir_is_add(struct qinst *inst) 18001e04c3fSmrg{ 18101e04c3fSmrg return (inst->qpu.type == V3D_QPU_INSTR_TYPE_ALU && 18201e04c3fSmrg inst->qpu.alu.add.op != V3D_QPU_A_NOP); 18301e04c3fSmrg} 18401e04c3fSmrg 18501e04c3fSmrgbool 18601e04c3fSmrgvir_is_mul(struct qinst *inst) 18701e04c3fSmrg{ 18801e04c3fSmrg return (inst->qpu.type == V3D_QPU_INSTR_TYPE_ALU && 18901e04c3fSmrg inst->qpu.alu.mul.op != V3D_QPU_M_NOP); 19001e04c3fSmrg} 19101e04c3fSmrg 19201e04c3fSmrgbool 19301e04c3fSmrgvir_is_tex(struct qinst *inst) 19401e04c3fSmrg{ 19501e04c3fSmrg if (inst->dst.file == QFILE_MAGIC) 19601e04c3fSmrg return v3d_qpu_magic_waddr_is_tmu(inst->dst.index); 19701e04c3fSmrg 19801e04c3fSmrg if (inst->qpu.type == V3D_QPU_INSTR_TYPE_ALU && 19901e04c3fSmrg inst->qpu.alu.add.op == V3D_QPU_A_TMUWT) { 20001e04c3fSmrg return true; 20101e04c3fSmrg } 20201e04c3fSmrg 20301e04c3fSmrg return false; 20401e04c3fSmrg} 20501e04c3fSmrg 20601e04c3fSmrgbool 20701e04c3fSmrgvir_depends_on_flags(struct qinst *inst) 20801e04c3fSmrg{ 20901e04c3fSmrg if (inst->qpu.type == V3D_QPU_INSTR_TYPE_BRANCH) { 21001e04c3fSmrg return (inst->qpu.branch.cond != V3D_QPU_BRANCH_COND_ALWAYS); 21101e04c3fSmrg } else { 21201e04c3fSmrg return (inst->qpu.flags.ac != V3D_QPU_COND_NONE && 21301e04c3fSmrg inst->qpu.flags.mc != V3D_QPU_COND_NONE); 21401e04c3fSmrg } 21501e04c3fSmrg} 21601e04c3fSmrg 21701e04c3fSmrgbool 21801e04c3fSmrgvir_writes_r3(const struct v3d_device_info *devinfo, struct qinst *inst) 21901e04c3fSmrg{ 22001e04c3fSmrg for (int i = 0; i < vir_get_nsrc(inst); i++) { 22101e04c3fSmrg switch (inst->src[i].file) { 22201e04c3fSmrg case QFILE_VPM: 22301e04c3fSmrg return true; 22401e04c3fSmrg default: 22501e04c3fSmrg break; 22601e04c3fSmrg } 22701e04c3fSmrg } 22801e04c3fSmrg 22901e04c3fSmrg if (devinfo->ver < 41 && (inst->qpu.sig.ldvary || 23001e04c3fSmrg inst->qpu.sig.ldtlb || 23101e04c3fSmrg inst->qpu.sig.ldtlbu || 23201e04c3fSmrg inst->qpu.sig.ldvpm)) { 23301e04c3fSmrg return true; 23401e04c3fSmrg } 23501e04c3fSmrg 23601e04c3fSmrg return false; 23701e04c3fSmrg} 23801e04c3fSmrg 23901e04c3fSmrgbool 24001e04c3fSmrgvir_writes_r4(const struct v3d_device_info *devinfo, struct qinst *inst) 24101e04c3fSmrg{ 24201e04c3fSmrg switch (inst->dst.file) { 24301e04c3fSmrg case QFILE_MAGIC: 24401e04c3fSmrg switch (inst->dst.index) { 24501e04c3fSmrg case V3D_QPU_WADDR_RECIP: 24601e04c3fSmrg case V3D_QPU_WADDR_RSQRT: 24701e04c3fSmrg case V3D_QPU_WADDR_EXP: 24801e04c3fSmrg case V3D_QPU_WADDR_LOG: 24901e04c3fSmrg case V3D_QPU_WADDR_SIN: 25001e04c3fSmrg return true; 25101e04c3fSmrg } 25201e04c3fSmrg break; 25301e04c3fSmrg default: 25401e04c3fSmrg break; 25501e04c3fSmrg } 25601e04c3fSmrg 25701e04c3fSmrg if (devinfo->ver < 41 && inst->qpu.sig.ldtmu) 25801e04c3fSmrg return true; 25901e04c3fSmrg 26001e04c3fSmrg return false; 26101e04c3fSmrg} 26201e04c3fSmrg 26301e04c3fSmrgvoid 26401e04c3fSmrgvir_set_unpack(struct qinst *inst, int src, 26501e04c3fSmrg enum v3d_qpu_input_unpack unpack) 26601e04c3fSmrg{ 26701e04c3fSmrg assert(src == 0 || src == 1); 26801e04c3fSmrg 26901e04c3fSmrg if (vir_is_add(inst)) { 27001e04c3fSmrg if (src == 0) 27101e04c3fSmrg inst->qpu.alu.add.a_unpack = unpack; 27201e04c3fSmrg else 27301e04c3fSmrg inst->qpu.alu.add.b_unpack = unpack; 27401e04c3fSmrg } else { 27501e04c3fSmrg assert(vir_is_mul(inst)); 27601e04c3fSmrg if (src == 0) 27701e04c3fSmrg inst->qpu.alu.mul.a_unpack = unpack; 27801e04c3fSmrg else 27901e04c3fSmrg inst->qpu.alu.mul.b_unpack = unpack; 28001e04c3fSmrg } 28101e04c3fSmrg} 28201e04c3fSmrg 28301e04c3fSmrgvoid 28401e04c3fSmrgvir_set_cond(struct qinst *inst, enum v3d_qpu_cond cond) 28501e04c3fSmrg{ 28601e04c3fSmrg if (vir_is_add(inst)) { 28701e04c3fSmrg inst->qpu.flags.ac = cond; 28801e04c3fSmrg } else { 28901e04c3fSmrg assert(vir_is_mul(inst)); 29001e04c3fSmrg inst->qpu.flags.mc = cond; 29101e04c3fSmrg } 29201e04c3fSmrg} 29301e04c3fSmrg 29401e04c3fSmrgvoid 29501e04c3fSmrgvir_set_pf(struct qinst *inst, enum v3d_qpu_pf pf) 29601e04c3fSmrg{ 29701e04c3fSmrg if (vir_is_add(inst)) { 29801e04c3fSmrg inst->qpu.flags.apf = pf; 29901e04c3fSmrg } else { 30001e04c3fSmrg assert(vir_is_mul(inst)); 30101e04c3fSmrg inst->qpu.flags.mpf = pf; 30201e04c3fSmrg } 30301e04c3fSmrg} 30401e04c3fSmrg 30501e04c3fSmrg#if 0 30601e04c3fSmrguint8_t 30701e04c3fSmrgvir_channels_written(struct qinst *inst) 30801e04c3fSmrg{ 30901e04c3fSmrg if (vir_is_mul(inst)) { 31001e04c3fSmrg switch (inst->dst.pack) { 31101e04c3fSmrg case QPU_PACK_MUL_NOP: 31201e04c3fSmrg case QPU_PACK_MUL_8888: 31301e04c3fSmrg return 0xf; 31401e04c3fSmrg case QPU_PACK_MUL_8A: 31501e04c3fSmrg return 0x1; 31601e04c3fSmrg case QPU_PACK_MUL_8B: 31701e04c3fSmrg return 0x2; 31801e04c3fSmrg case QPU_PACK_MUL_8C: 31901e04c3fSmrg return 0x4; 32001e04c3fSmrg case QPU_PACK_MUL_8D: 32101e04c3fSmrg return 0x8; 32201e04c3fSmrg } 32301e04c3fSmrg } else { 32401e04c3fSmrg switch (inst->dst.pack) { 32501e04c3fSmrg case QPU_PACK_A_NOP: 32601e04c3fSmrg case QPU_PACK_A_8888: 32701e04c3fSmrg case QPU_PACK_A_8888_SAT: 32801e04c3fSmrg case QPU_PACK_A_32_SAT: 32901e04c3fSmrg return 0xf; 33001e04c3fSmrg case QPU_PACK_A_8A: 33101e04c3fSmrg case QPU_PACK_A_8A_SAT: 33201e04c3fSmrg return 0x1; 33301e04c3fSmrg case QPU_PACK_A_8B: 33401e04c3fSmrg case QPU_PACK_A_8B_SAT: 33501e04c3fSmrg return 0x2; 33601e04c3fSmrg case QPU_PACK_A_8C: 33701e04c3fSmrg case QPU_PACK_A_8C_SAT: 33801e04c3fSmrg return 0x4; 33901e04c3fSmrg case QPU_PACK_A_8D: 34001e04c3fSmrg case QPU_PACK_A_8D_SAT: 34101e04c3fSmrg return 0x8; 34201e04c3fSmrg case QPU_PACK_A_16A: 34301e04c3fSmrg case QPU_PACK_A_16A_SAT: 34401e04c3fSmrg return 0x3; 34501e04c3fSmrg case QPU_PACK_A_16B: 34601e04c3fSmrg case QPU_PACK_A_16B_SAT: 34701e04c3fSmrg return 0xc; 34801e04c3fSmrg } 34901e04c3fSmrg } 35001e04c3fSmrg unreachable("Bad pack field"); 35101e04c3fSmrg} 35201e04c3fSmrg#endif 35301e04c3fSmrg 35401e04c3fSmrgstruct qreg 35501e04c3fSmrgvir_get_temp(struct v3d_compile *c) 35601e04c3fSmrg{ 35701e04c3fSmrg struct qreg reg; 35801e04c3fSmrg 35901e04c3fSmrg reg.file = QFILE_TEMP; 36001e04c3fSmrg reg.index = c->num_temps++; 36101e04c3fSmrg 36201e04c3fSmrg if (c->num_temps > c->defs_array_size) { 36301e04c3fSmrg uint32_t old_size = c->defs_array_size; 36401e04c3fSmrg c->defs_array_size = MAX2(old_size * 2, 16); 36501e04c3fSmrg 36601e04c3fSmrg c->defs = reralloc(c, c->defs, struct qinst *, 36701e04c3fSmrg c->defs_array_size); 36801e04c3fSmrg memset(&c->defs[old_size], 0, 36901e04c3fSmrg sizeof(c->defs[0]) * (c->defs_array_size - old_size)); 37001e04c3fSmrg 37101e04c3fSmrg c->spillable = reralloc(c, c->spillable, 37201e04c3fSmrg BITSET_WORD, 37301e04c3fSmrg BITSET_WORDS(c->defs_array_size)); 37401e04c3fSmrg for (int i = old_size; i < c->defs_array_size; i++) 37501e04c3fSmrg BITSET_SET(c->spillable, i); 37601e04c3fSmrg } 37701e04c3fSmrg 37801e04c3fSmrg return reg; 37901e04c3fSmrg} 38001e04c3fSmrg 38101e04c3fSmrgstruct qinst * 38201e04c3fSmrgvir_add_inst(enum v3d_qpu_add_op op, struct qreg dst, struct qreg src0, struct qreg src1) 38301e04c3fSmrg{ 38401e04c3fSmrg struct qinst *inst = calloc(1, sizeof(*inst)); 38501e04c3fSmrg 38601e04c3fSmrg inst->qpu = v3d_qpu_nop(); 38701e04c3fSmrg inst->qpu.alu.add.op = op; 38801e04c3fSmrg 38901e04c3fSmrg inst->dst = dst; 39001e04c3fSmrg inst->src[0] = src0; 39101e04c3fSmrg inst->src[1] = src1; 39201e04c3fSmrg inst->uniform = ~0; 39301e04c3fSmrg 39401e04c3fSmrg return inst; 39501e04c3fSmrg} 39601e04c3fSmrg 39701e04c3fSmrgstruct qinst * 39801e04c3fSmrgvir_mul_inst(enum v3d_qpu_mul_op op, struct qreg dst, struct qreg src0, struct qreg src1) 39901e04c3fSmrg{ 40001e04c3fSmrg struct qinst *inst = calloc(1, sizeof(*inst)); 40101e04c3fSmrg 40201e04c3fSmrg inst->qpu = v3d_qpu_nop(); 40301e04c3fSmrg inst->qpu.alu.mul.op = op; 40401e04c3fSmrg 40501e04c3fSmrg inst->dst = dst; 40601e04c3fSmrg inst->src[0] = src0; 40701e04c3fSmrg inst->src[1] = src1; 40801e04c3fSmrg inst->uniform = ~0; 40901e04c3fSmrg 41001e04c3fSmrg return inst; 41101e04c3fSmrg} 41201e04c3fSmrg 41301e04c3fSmrgstruct qinst * 41401e04c3fSmrgvir_branch_inst(enum v3d_qpu_branch_cond cond, struct qreg src) 41501e04c3fSmrg{ 41601e04c3fSmrg struct qinst *inst = calloc(1, sizeof(*inst)); 41701e04c3fSmrg 41801e04c3fSmrg inst->qpu = v3d_qpu_nop(); 41901e04c3fSmrg inst->qpu.type = V3D_QPU_INSTR_TYPE_BRANCH; 42001e04c3fSmrg inst->qpu.branch.cond = cond; 42101e04c3fSmrg inst->qpu.branch.msfign = V3D_QPU_MSFIGN_NONE; 42201e04c3fSmrg inst->qpu.branch.bdi = V3D_QPU_BRANCH_DEST_REL; 42301e04c3fSmrg inst->qpu.branch.ub = true; 42401e04c3fSmrg inst->qpu.branch.bdu = V3D_QPU_BRANCH_DEST_REL; 42501e04c3fSmrg 42601e04c3fSmrg inst->dst = vir_reg(QFILE_NULL, 0); 42701e04c3fSmrg inst->src[0] = src; 42801e04c3fSmrg inst->uniform = ~0; 42901e04c3fSmrg 43001e04c3fSmrg return inst; 43101e04c3fSmrg} 43201e04c3fSmrg 43301e04c3fSmrgstatic void 43401e04c3fSmrgvir_emit(struct v3d_compile *c, struct qinst *inst) 43501e04c3fSmrg{ 43601e04c3fSmrg switch (c->cursor.mode) { 43701e04c3fSmrg case vir_cursor_add: 43801e04c3fSmrg list_add(&inst->link, c->cursor.link); 43901e04c3fSmrg break; 44001e04c3fSmrg case vir_cursor_addtail: 44101e04c3fSmrg list_addtail(&inst->link, c->cursor.link); 44201e04c3fSmrg break; 44301e04c3fSmrg } 44401e04c3fSmrg 44501e04c3fSmrg c->cursor = vir_after_inst(inst); 44601e04c3fSmrg c->live_intervals_valid = false; 44701e04c3fSmrg} 44801e04c3fSmrg 44901e04c3fSmrg/* Updates inst to write to a new temporary, emits it, and notes the def. */ 45001e04c3fSmrgstruct qreg 45101e04c3fSmrgvir_emit_def(struct v3d_compile *c, struct qinst *inst) 45201e04c3fSmrg{ 45301e04c3fSmrg assert(inst->dst.file == QFILE_NULL); 45401e04c3fSmrg 45501e04c3fSmrg /* If we're emitting an instruction that's a def, it had better be 45601e04c3fSmrg * writing a register. 45701e04c3fSmrg */ 45801e04c3fSmrg if (inst->qpu.type == V3D_QPU_INSTR_TYPE_ALU) { 45901e04c3fSmrg assert(inst->qpu.alu.add.op == V3D_QPU_A_NOP || 46001e04c3fSmrg v3d_qpu_add_op_has_dst(inst->qpu.alu.add.op)); 46101e04c3fSmrg assert(inst->qpu.alu.mul.op == V3D_QPU_M_NOP || 46201e04c3fSmrg v3d_qpu_mul_op_has_dst(inst->qpu.alu.mul.op)); 46301e04c3fSmrg } 46401e04c3fSmrg 46501e04c3fSmrg inst->dst = vir_get_temp(c); 46601e04c3fSmrg 46701e04c3fSmrg if (inst->dst.file == QFILE_TEMP) 46801e04c3fSmrg c->defs[inst->dst.index] = inst; 46901e04c3fSmrg 47001e04c3fSmrg vir_emit(c, inst); 47101e04c3fSmrg 47201e04c3fSmrg return inst->dst; 47301e04c3fSmrg} 47401e04c3fSmrg 47501e04c3fSmrgstruct qinst * 47601e04c3fSmrgvir_emit_nondef(struct v3d_compile *c, struct qinst *inst) 47701e04c3fSmrg{ 47801e04c3fSmrg if (inst->dst.file == QFILE_TEMP) 47901e04c3fSmrg c->defs[inst->dst.index] = NULL; 48001e04c3fSmrg 48101e04c3fSmrg vir_emit(c, inst); 48201e04c3fSmrg 48301e04c3fSmrg return inst; 48401e04c3fSmrg} 48501e04c3fSmrg 48601e04c3fSmrgstruct qblock * 48701e04c3fSmrgvir_new_block(struct v3d_compile *c) 48801e04c3fSmrg{ 48901e04c3fSmrg struct qblock *block = rzalloc(c, struct qblock); 49001e04c3fSmrg 49101e04c3fSmrg list_inithead(&block->instructions); 49201e04c3fSmrg 49301e04c3fSmrg block->predecessors = _mesa_set_create(block, 49401e04c3fSmrg _mesa_hash_pointer, 49501e04c3fSmrg _mesa_key_pointer_equal); 49601e04c3fSmrg 49701e04c3fSmrg block->index = c->next_block_index++; 49801e04c3fSmrg 49901e04c3fSmrg return block; 50001e04c3fSmrg} 50101e04c3fSmrg 50201e04c3fSmrgvoid 50301e04c3fSmrgvir_set_emit_block(struct v3d_compile *c, struct qblock *block) 50401e04c3fSmrg{ 50501e04c3fSmrg c->cur_block = block; 50601e04c3fSmrg c->cursor = vir_after_block(block); 50701e04c3fSmrg list_addtail(&block->link, &c->blocks); 50801e04c3fSmrg} 50901e04c3fSmrg 51001e04c3fSmrgstruct qblock * 51101e04c3fSmrgvir_entry_block(struct v3d_compile *c) 51201e04c3fSmrg{ 51301e04c3fSmrg return list_first_entry(&c->blocks, struct qblock, link); 51401e04c3fSmrg} 51501e04c3fSmrg 51601e04c3fSmrgstruct qblock * 51701e04c3fSmrgvir_exit_block(struct v3d_compile *c) 51801e04c3fSmrg{ 51901e04c3fSmrg return list_last_entry(&c->blocks, struct qblock, link); 52001e04c3fSmrg} 52101e04c3fSmrg 52201e04c3fSmrgvoid 52301e04c3fSmrgvir_link_blocks(struct qblock *predecessor, struct qblock *successor) 52401e04c3fSmrg{ 52501e04c3fSmrg _mesa_set_add(successor->predecessors, predecessor); 52601e04c3fSmrg if (predecessor->successors[0]) { 52701e04c3fSmrg assert(!predecessor->successors[1]); 52801e04c3fSmrg predecessor->successors[1] = successor; 52901e04c3fSmrg } else { 53001e04c3fSmrg predecessor->successors[0] = successor; 53101e04c3fSmrg } 53201e04c3fSmrg} 53301e04c3fSmrg 53401e04c3fSmrgconst struct v3d_compiler * 53501e04c3fSmrgv3d_compiler_init(const struct v3d_device_info *devinfo) 53601e04c3fSmrg{ 53701e04c3fSmrg struct v3d_compiler *compiler = rzalloc(NULL, struct v3d_compiler); 53801e04c3fSmrg if (!compiler) 53901e04c3fSmrg return NULL; 54001e04c3fSmrg 54101e04c3fSmrg compiler->devinfo = devinfo; 54201e04c3fSmrg 54301e04c3fSmrg if (!vir_init_reg_sets(compiler)) { 54401e04c3fSmrg ralloc_free(compiler); 54501e04c3fSmrg return NULL; 54601e04c3fSmrg } 54701e04c3fSmrg 54801e04c3fSmrg return compiler; 54901e04c3fSmrg} 55001e04c3fSmrg 55101e04c3fSmrgvoid 55201e04c3fSmrgv3d_compiler_free(const struct v3d_compiler *compiler) 55301e04c3fSmrg{ 55401e04c3fSmrg ralloc_free((void *)compiler); 55501e04c3fSmrg} 55601e04c3fSmrg 55701e04c3fSmrgstatic struct v3d_compile * 55801e04c3fSmrgvir_compile_init(const struct v3d_compiler *compiler, 55901e04c3fSmrg struct v3d_key *key, 56001e04c3fSmrg nir_shader *s, 56101e04c3fSmrg int program_id, int variant_id) 56201e04c3fSmrg{ 56301e04c3fSmrg struct v3d_compile *c = rzalloc(NULL, struct v3d_compile); 56401e04c3fSmrg 56501e04c3fSmrg c->compiler = compiler; 56601e04c3fSmrg c->devinfo = compiler->devinfo; 56701e04c3fSmrg c->key = key; 56801e04c3fSmrg c->program_id = program_id; 56901e04c3fSmrg c->variant_id = variant_id; 57001e04c3fSmrg c->threads = 4; 57101e04c3fSmrg 57201e04c3fSmrg s = nir_shader_clone(c, s); 57301e04c3fSmrg c->s = s; 57401e04c3fSmrg 57501e04c3fSmrg list_inithead(&c->blocks); 57601e04c3fSmrg vir_set_emit_block(c, vir_new_block(c)); 57701e04c3fSmrg 57801e04c3fSmrg c->output_position_index = -1; 57901e04c3fSmrg c->output_point_size_index = -1; 58001e04c3fSmrg c->output_sample_mask_index = -1; 58101e04c3fSmrg 58201e04c3fSmrg c->def_ht = _mesa_hash_table_create(c, _mesa_hash_pointer, 58301e04c3fSmrg _mesa_key_pointer_equal); 58401e04c3fSmrg 58501e04c3fSmrg return c; 58601e04c3fSmrg} 58701e04c3fSmrg 58801e04c3fSmrgstatic int 58901e04c3fSmrgtype_size_vec4(const struct glsl_type *type) 59001e04c3fSmrg{ 59101e04c3fSmrg return glsl_count_attribute_slots(type, false); 59201e04c3fSmrg} 59301e04c3fSmrg 59401e04c3fSmrgstatic void 59501e04c3fSmrgv3d_lower_nir(struct v3d_compile *c) 59601e04c3fSmrg{ 59701e04c3fSmrg struct nir_lower_tex_options tex_options = { 59801e04c3fSmrg .lower_txd = true, 59901e04c3fSmrg .lower_rect = false, /* XXX: Use this on V3D 3.x */ 60001e04c3fSmrg .lower_txp = ~0, 60101e04c3fSmrg /* Apply swizzles to all samplers. */ 60201e04c3fSmrg .swizzle_result = ~0, 60301e04c3fSmrg }; 60401e04c3fSmrg 60501e04c3fSmrg /* Lower the format swizzle and (for 32-bit returns) 60601e04c3fSmrg * ARB_texture_swizzle-style swizzle. 60701e04c3fSmrg */ 60801e04c3fSmrg for (int i = 0; i < ARRAY_SIZE(c->key->tex); i++) { 60901e04c3fSmrg for (int j = 0; j < 4; j++) 61001e04c3fSmrg tex_options.swizzles[i][j] = c->key->tex[i].swizzle[j]; 61101e04c3fSmrg 61201e04c3fSmrg if (c->key->tex[i].clamp_s) 61301e04c3fSmrg tex_options.saturate_s |= 1 << i; 61401e04c3fSmrg if (c->key->tex[i].clamp_t) 61501e04c3fSmrg tex_options.saturate_t |= 1 << i; 61601e04c3fSmrg if (c->key->tex[i].clamp_r) 61701e04c3fSmrg tex_options.saturate_r |= 1 << i; 61801e04c3fSmrg } 61901e04c3fSmrg 62001e04c3fSmrg NIR_PASS_V(c->s, nir_lower_tex, &tex_options); 62101e04c3fSmrg} 62201e04c3fSmrg 62301e04c3fSmrgstatic void 62401e04c3fSmrgv3d_lower_nir_late(struct v3d_compile *c) 62501e04c3fSmrg{ 62601e04c3fSmrg NIR_PASS_V(c->s, v3d_nir_lower_io, c); 62701e04c3fSmrg NIR_PASS_V(c->s, v3d_nir_lower_txf_ms, c); 62801e04c3fSmrg NIR_PASS_V(c->s, nir_lower_idiv); 62901e04c3fSmrg} 63001e04c3fSmrg 63101e04c3fSmrgstatic void 63201e04c3fSmrgv3d_set_prog_data_uniforms(struct v3d_compile *c, 63301e04c3fSmrg struct v3d_prog_data *prog_data) 63401e04c3fSmrg{ 63501e04c3fSmrg int count = c->num_uniforms; 63601e04c3fSmrg struct v3d_uniform_list *ulist = &prog_data->uniforms; 63701e04c3fSmrg 63801e04c3fSmrg ulist->count = count; 63901e04c3fSmrg ulist->data = ralloc_array(prog_data, uint32_t, count); 64001e04c3fSmrg memcpy(ulist->data, c->uniform_data, 64101e04c3fSmrg count * sizeof(*ulist->data)); 64201e04c3fSmrg ulist->contents = ralloc_array(prog_data, enum quniform_contents, count); 64301e04c3fSmrg memcpy(ulist->contents, c->uniform_contents, 64401e04c3fSmrg count * sizeof(*ulist->contents)); 64501e04c3fSmrg} 64601e04c3fSmrg 64701e04c3fSmrg/* Copy the compiler UBO range state to the compiled shader, dropping out 64801e04c3fSmrg * arrays that were never referenced by an indirect load. 64901e04c3fSmrg * 65001e04c3fSmrg * (Note that QIR dead code elimination of an array access still leaves that 65101e04c3fSmrg * array alive, though) 65201e04c3fSmrg */ 65301e04c3fSmrgstatic void 65401e04c3fSmrgv3d_set_prog_data_ubo(struct v3d_compile *c, 65501e04c3fSmrg struct v3d_prog_data *prog_data) 65601e04c3fSmrg{ 65701e04c3fSmrg if (!c->num_ubo_ranges) 65801e04c3fSmrg return; 65901e04c3fSmrg 66001e04c3fSmrg prog_data->num_ubo_ranges = 0; 66101e04c3fSmrg prog_data->ubo_ranges = ralloc_array(prog_data, struct v3d_ubo_range, 66201e04c3fSmrg c->num_ubo_ranges); 66301e04c3fSmrg for (int i = 0; i < c->num_ubo_ranges; i++) { 66401e04c3fSmrg if (!c->ubo_range_used[i]) 66501e04c3fSmrg continue; 66601e04c3fSmrg 66701e04c3fSmrg struct v3d_ubo_range *range = &c->ubo_ranges[i]; 66801e04c3fSmrg prog_data->ubo_ranges[prog_data->num_ubo_ranges++] = *range; 66901e04c3fSmrg prog_data->ubo_size += range->size; 67001e04c3fSmrg } 67101e04c3fSmrg 67201e04c3fSmrg if (prog_data->ubo_size) { 67301e04c3fSmrg if (V3D_DEBUG & V3D_DEBUG_SHADERDB) { 67401e04c3fSmrg fprintf(stderr, "SHADER-DB: %s prog %d/%d: %d UBO uniforms\n", 67501e04c3fSmrg vir_get_stage_name(c), 67601e04c3fSmrg c->program_id, c->variant_id, 67701e04c3fSmrg prog_data->ubo_size / 4); 67801e04c3fSmrg } 67901e04c3fSmrg } 68001e04c3fSmrg} 68101e04c3fSmrg 68201e04c3fSmrgstatic void 68301e04c3fSmrgv3d_set_prog_data(struct v3d_compile *c, 68401e04c3fSmrg struct v3d_prog_data *prog_data) 68501e04c3fSmrg{ 68601e04c3fSmrg prog_data->threads = c->threads; 68701e04c3fSmrg prog_data->single_seg = !c->last_thrsw; 68801e04c3fSmrg prog_data->spill_size = c->spill_size; 68901e04c3fSmrg 69001e04c3fSmrg v3d_set_prog_data_uniforms(c, prog_data); 69101e04c3fSmrg v3d_set_prog_data_ubo(c, prog_data); 69201e04c3fSmrg} 69301e04c3fSmrg 69401e04c3fSmrgstatic uint64_t * 69501e04c3fSmrgv3d_return_qpu_insts(struct v3d_compile *c, uint32_t *final_assembly_size) 69601e04c3fSmrg{ 69701e04c3fSmrg *final_assembly_size = c->qpu_inst_count * sizeof(uint64_t); 69801e04c3fSmrg 69901e04c3fSmrg uint64_t *qpu_insts = malloc(*final_assembly_size); 70001e04c3fSmrg if (!qpu_insts) 70101e04c3fSmrg return NULL; 70201e04c3fSmrg 70301e04c3fSmrg memcpy(qpu_insts, c->qpu_insts, *final_assembly_size); 70401e04c3fSmrg 70501e04c3fSmrg vir_compile_destroy(c); 70601e04c3fSmrg 70701e04c3fSmrg return qpu_insts; 70801e04c3fSmrg} 70901e04c3fSmrg 71001e04c3fSmrguint64_t *v3d_compile_vs(const struct v3d_compiler *compiler, 71101e04c3fSmrg struct v3d_vs_key *key, 71201e04c3fSmrg struct v3d_vs_prog_data *prog_data, 71301e04c3fSmrg nir_shader *s, 71401e04c3fSmrg int program_id, int variant_id, 71501e04c3fSmrg uint32_t *final_assembly_size) 71601e04c3fSmrg{ 71701e04c3fSmrg struct v3d_compile *c = vir_compile_init(compiler, &key->base, s, 71801e04c3fSmrg program_id, variant_id); 71901e04c3fSmrg 72001e04c3fSmrg c->vs_key = key; 72101e04c3fSmrg 72201e04c3fSmrg /* Split our I/O vars and dead code eliminate the unused 72301e04c3fSmrg * components. 72401e04c3fSmrg */ 72501e04c3fSmrg NIR_PASS_V(c->s, nir_lower_io_to_scalar_early, 72601e04c3fSmrg nir_var_shader_in | nir_var_shader_out); 72701e04c3fSmrg uint64_t used_outputs[4] = {0}; 72801e04c3fSmrg for (int i = 0; i < c->vs_key->num_fs_inputs; i++) { 72901e04c3fSmrg int slot = v3d_slot_get_slot(c->vs_key->fs_inputs[i]); 73001e04c3fSmrg int comp = v3d_slot_get_component(c->vs_key->fs_inputs[i]); 73101e04c3fSmrg used_outputs[comp] |= 1ull << slot; 73201e04c3fSmrg } 73301e04c3fSmrg NIR_PASS_V(c->s, nir_remove_unused_io_vars, 73401e04c3fSmrg &c->s->outputs, used_outputs, NULL); /* demotes to globals */ 73501e04c3fSmrg NIR_PASS_V(c->s, nir_lower_global_vars_to_local); 73601e04c3fSmrg v3d_optimize_nir(c->s); 73701e04c3fSmrg NIR_PASS_V(c->s, nir_remove_dead_variables, nir_var_shader_in); 73801e04c3fSmrg NIR_PASS_V(c->s, nir_lower_io, nir_var_shader_in | nir_var_shader_out, 73901e04c3fSmrg type_size_vec4, 74001e04c3fSmrg (nir_lower_io_options)0); 74101e04c3fSmrg 74201e04c3fSmrg v3d_lower_nir(c); 74301e04c3fSmrg 74401e04c3fSmrg if (key->clamp_color) 74501e04c3fSmrg NIR_PASS_V(c->s, nir_lower_clamp_color_outputs); 74601e04c3fSmrg 74701e04c3fSmrg if (key->base.ucp_enables) { 74801e04c3fSmrg NIR_PASS_V(c->s, nir_lower_clip_vs, key->base.ucp_enables); 74901e04c3fSmrg NIR_PASS_V(c->s, nir_lower_io_to_scalar, 75001e04c3fSmrg nir_var_shader_out); 75101e04c3fSmrg } 75201e04c3fSmrg 75301e04c3fSmrg /* Note: VS output scalarizing must happen after nir_lower_clip_vs. */ 75401e04c3fSmrg NIR_PASS_V(c->s, nir_lower_io_to_scalar, nir_var_shader_out); 75501e04c3fSmrg 75601e04c3fSmrg v3d_lower_nir_late(c); 75701e04c3fSmrg v3d_optimize_nir(c->s); 75801e04c3fSmrg NIR_PASS_V(c->s, nir_convert_from_ssa, true); 75901e04c3fSmrg 76001e04c3fSmrg v3d_nir_to_vir(c); 76101e04c3fSmrg 76201e04c3fSmrg v3d_set_prog_data(c, &prog_data->base); 76301e04c3fSmrg 76401e04c3fSmrg prog_data->base.num_inputs = c->num_inputs; 76501e04c3fSmrg 76601e04c3fSmrg /* The vertex data gets format converted by the VPM so that 76701e04c3fSmrg * each attribute channel takes up a VPM column. Precompute 76801e04c3fSmrg * the sizes for the shader record. 76901e04c3fSmrg */ 77001e04c3fSmrg for (int i = 0; i < ARRAY_SIZE(prog_data->vattr_sizes); i++) { 77101e04c3fSmrg prog_data->vattr_sizes[i] = c->vattr_sizes[i]; 77201e04c3fSmrg prog_data->vpm_input_size += c->vattr_sizes[i]; 77301e04c3fSmrg } 77401e04c3fSmrg 77501e04c3fSmrg prog_data->uses_vid = (s->info.system_values_read & 77601e04c3fSmrg (1ull << SYSTEM_VALUE_VERTEX_ID)); 77701e04c3fSmrg prog_data->uses_iid = (s->info.system_values_read & 77801e04c3fSmrg (1ull << SYSTEM_VALUE_INSTANCE_ID)); 77901e04c3fSmrg 78001e04c3fSmrg if (prog_data->uses_vid) 78101e04c3fSmrg prog_data->vpm_input_size++; 78201e04c3fSmrg if (prog_data->uses_iid) 78301e04c3fSmrg prog_data->vpm_input_size++; 78401e04c3fSmrg 78501e04c3fSmrg /* Input/output segment size are in sectors (8 rows of 32 bits per 78601e04c3fSmrg * channel). 78701e04c3fSmrg */ 78801e04c3fSmrg prog_data->vpm_input_size = align(prog_data->vpm_input_size, 8) / 8; 78901e04c3fSmrg prog_data->vpm_output_size = align(c->num_vpm_writes, 8) / 8; 79001e04c3fSmrg 79101e04c3fSmrg /* Compute VCM cache size. We set up our program to take up less than 79201e04c3fSmrg * half of the VPM, so that any set of bin and render programs won't 79301e04c3fSmrg * run out of space. We need space for at least one input segment, 79401e04c3fSmrg * and then allocate the rest to output segments (one for the current 79501e04c3fSmrg * program, the rest to VCM). The valid range of the VCM cache size 79601e04c3fSmrg * field is 1-4 16-vertex batches, but GFXH-1744 limits us to 2-4 79701e04c3fSmrg * batches. 79801e04c3fSmrg */ 79901e04c3fSmrg assert(c->devinfo->vpm_size); 80001e04c3fSmrg int sector_size = 16 * sizeof(uint32_t) * 8; 80101e04c3fSmrg int vpm_size_in_sectors = c->devinfo->vpm_size / sector_size; 80201e04c3fSmrg int half_vpm = vpm_size_in_sectors / 2; 80301e04c3fSmrg int vpm_output_sectors = half_vpm - prog_data->vpm_input_size; 80401e04c3fSmrg int vpm_output_batches = vpm_output_sectors / prog_data->vpm_output_size; 80501e04c3fSmrg assert(vpm_output_batches >= 2); 80601e04c3fSmrg prog_data->vcm_cache_size = CLAMP(vpm_output_batches - 1, 2, 4); 80701e04c3fSmrg 80801e04c3fSmrg return v3d_return_qpu_insts(c, final_assembly_size); 80901e04c3fSmrg} 81001e04c3fSmrg 81101e04c3fSmrgstatic void 81201e04c3fSmrgv3d_set_fs_prog_data_inputs(struct v3d_compile *c, 81301e04c3fSmrg struct v3d_fs_prog_data *prog_data) 81401e04c3fSmrg{ 81501e04c3fSmrg prog_data->base.num_inputs = c->num_inputs; 81601e04c3fSmrg memcpy(prog_data->input_slots, c->input_slots, 81701e04c3fSmrg c->num_inputs * sizeof(*c->input_slots)); 81801e04c3fSmrg 81901e04c3fSmrg STATIC_ASSERT(ARRAY_SIZE(prog_data->flat_shade_flags) > 82001e04c3fSmrg (V3D_MAX_FS_INPUTS - 1) / 24); 82101e04c3fSmrg for (int i = 0; i < V3D_MAX_FS_INPUTS; i++) { 82201e04c3fSmrg if (BITSET_TEST(c->flat_shade_flags, i)) 82301e04c3fSmrg prog_data->flat_shade_flags[i / 24] |= 1 << (i % 24); 82401e04c3fSmrg 82501e04c3fSmrg if (BITSET_TEST(c->noperspective_flags, i)) 82601e04c3fSmrg prog_data->noperspective_flags[i / 24] |= 1 << (i % 24); 82701e04c3fSmrg 82801e04c3fSmrg if (BITSET_TEST(c->centroid_flags, i)) 82901e04c3fSmrg prog_data->centroid_flags[i / 24] |= 1 << (i % 24); 83001e04c3fSmrg } 83101e04c3fSmrg} 83201e04c3fSmrg 83301e04c3fSmrgstatic void 83401e04c3fSmrgv3d_fixup_fs_output_types(struct v3d_compile *c) 83501e04c3fSmrg{ 83601e04c3fSmrg nir_foreach_variable(var, &c->s->outputs) { 83701e04c3fSmrg uint32_t mask = 0; 83801e04c3fSmrg 83901e04c3fSmrg switch (var->data.location) { 84001e04c3fSmrg case FRAG_RESULT_COLOR: 84101e04c3fSmrg mask = ~0; 84201e04c3fSmrg break; 84301e04c3fSmrg case FRAG_RESULT_DATA0: 84401e04c3fSmrg case FRAG_RESULT_DATA1: 84501e04c3fSmrg case FRAG_RESULT_DATA2: 84601e04c3fSmrg case FRAG_RESULT_DATA3: 84701e04c3fSmrg mask = 1 << (var->data.location - FRAG_RESULT_DATA0); 84801e04c3fSmrg break; 84901e04c3fSmrg } 85001e04c3fSmrg 85101e04c3fSmrg if (c->fs_key->int_color_rb & mask) { 85201e04c3fSmrg var->type = 85301e04c3fSmrg glsl_vector_type(GLSL_TYPE_INT, 85401e04c3fSmrg glsl_get_components(var->type)); 85501e04c3fSmrg } else if (c->fs_key->uint_color_rb & mask) { 85601e04c3fSmrg var->type = 85701e04c3fSmrg glsl_vector_type(GLSL_TYPE_UINT, 85801e04c3fSmrg glsl_get_components(var->type)); 85901e04c3fSmrg } 86001e04c3fSmrg } 86101e04c3fSmrg} 86201e04c3fSmrg 86301e04c3fSmrguint64_t *v3d_compile_fs(const struct v3d_compiler *compiler, 86401e04c3fSmrg struct v3d_fs_key *key, 86501e04c3fSmrg struct v3d_fs_prog_data *prog_data, 86601e04c3fSmrg nir_shader *s, 86701e04c3fSmrg int program_id, int variant_id, 86801e04c3fSmrg uint32_t *final_assembly_size) 86901e04c3fSmrg{ 87001e04c3fSmrg struct v3d_compile *c = vir_compile_init(compiler, &key->base, s, 87101e04c3fSmrg program_id, variant_id); 87201e04c3fSmrg 87301e04c3fSmrg c->fs_key = key; 87401e04c3fSmrg 87501e04c3fSmrg if (key->int_color_rb || key->uint_color_rb) 87601e04c3fSmrg v3d_fixup_fs_output_types(c); 87701e04c3fSmrg 87801e04c3fSmrg v3d_lower_nir(c); 87901e04c3fSmrg 88001e04c3fSmrg if (key->light_twoside) 88101e04c3fSmrg NIR_PASS_V(c->s, nir_lower_two_sided_color); 88201e04c3fSmrg 88301e04c3fSmrg if (key->clamp_color) 88401e04c3fSmrg NIR_PASS_V(c->s, nir_lower_clamp_color_outputs); 88501e04c3fSmrg 88601e04c3fSmrg if (key->alpha_test) { 88701e04c3fSmrg NIR_PASS_V(c->s, nir_lower_alpha_test, key->alpha_test_func, 88801e04c3fSmrg false); 88901e04c3fSmrg } 89001e04c3fSmrg 89101e04c3fSmrg if (key->base.ucp_enables) 89201e04c3fSmrg NIR_PASS_V(c->s, nir_lower_clip_fs, key->base.ucp_enables); 89301e04c3fSmrg 89401e04c3fSmrg /* Note: FS input scalarizing must happen after 89501e04c3fSmrg * nir_lower_two_sided_color, which only handles a vec4 at a time. 89601e04c3fSmrg */ 89701e04c3fSmrg NIR_PASS_V(c->s, nir_lower_io_to_scalar, nir_var_shader_in); 89801e04c3fSmrg 89901e04c3fSmrg v3d_lower_nir_late(c); 90001e04c3fSmrg v3d_optimize_nir(c->s); 90101e04c3fSmrg NIR_PASS_V(c->s, nir_convert_from_ssa, true); 90201e04c3fSmrg 90301e04c3fSmrg v3d_nir_to_vir(c); 90401e04c3fSmrg 90501e04c3fSmrg v3d_set_prog_data(c, &prog_data->base); 90601e04c3fSmrg v3d_set_fs_prog_data_inputs(c, prog_data); 90701e04c3fSmrg prog_data->writes_z = (c->s->info.outputs_written & 90801e04c3fSmrg (1 << FRAG_RESULT_DEPTH)); 90901e04c3fSmrg prog_data->discard = (c->s->info.fs.uses_discard || 91001e04c3fSmrg c->fs_key->sample_alpha_to_coverage); 91101e04c3fSmrg prog_data->uses_center_w = c->uses_center_w; 91201e04c3fSmrg 91301e04c3fSmrg return v3d_return_qpu_insts(c, final_assembly_size); 91401e04c3fSmrg} 91501e04c3fSmrg 91601e04c3fSmrgvoid 91701e04c3fSmrgvir_remove_instruction(struct v3d_compile *c, struct qinst *qinst) 91801e04c3fSmrg{ 91901e04c3fSmrg if (qinst->dst.file == QFILE_TEMP) 92001e04c3fSmrg c->defs[qinst->dst.index] = NULL; 92101e04c3fSmrg 92201e04c3fSmrg assert(&qinst->link != c->cursor.link); 92301e04c3fSmrg 92401e04c3fSmrg list_del(&qinst->link); 92501e04c3fSmrg free(qinst); 92601e04c3fSmrg 92701e04c3fSmrg c->live_intervals_valid = false; 92801e04c3fSmrg} 92901e04c3fSmrg 93001e04c3fSmrgstruct qreg 93101e04c3fSmrgvir_follow_movs(struct v3d_compile *c, struct qreg reg) 93201e04c3fSmrg{ 93301e04c3fSmrg /* XXX 93401e04c3fSmrg int pack = reg.pack; 93501e04c3fSmrg 93601e04c3fSmrg while (reg.file == QFILE_TEMP && 93701e04c3fSmrg c->defs[reg.index] && 93801e04c3fSmrg (c->defs[reg.index]->op == QOP_MOV || 93901e04c3fSmrg c->defs[reg.index]->op == QOP_FMOV) && 94001e04c3fSmrg !c->defs[reg.index]->dst.pack && 94101e04c3fSmrg !c->defs[reg.index]->src[0].pack) { 94201e04c3fSmrg reg = c->defs[reg.index]->src[0]; 94301e04c3fSmrg } 94401e04c3fSmrg 94501e04c3fSmrg reg.pack = pack; 94601e04c3fSmrg */ 94701e04c3fSmrg return reg; 94801e04c3fSmrg} 94901e04c3fSmrg 95001e04c3fSmrgvoid 95101e04c3fSmrgvir_compile_destroy(struct v3d_compile *c) 95201e04c3fSmrg{ 95301e04c3fSmrg /* Defuse the assert that we aren't removing the cursor's instruction. 95401e04c3fSmrg */ 95501e04c3fSmrg c->cursor.link = NULL; 95601e04c3fSmrg 95701e04c3fSmrg vir_for_each_block(block, c) { 95801e04c3fSmrg while (!list_empty(&block->instructions)) { 95901e04c3fSmrg struct qinst *qinst = 96001e04c3fSmrg list_first_entry(&block->instructions, 96101e04c3fSmrg struct qinst, link); 96201e04c3fSmrg vir_remove_instruction(c, qinst); 96301e04c3fSmrg } 96401e04c3fSmrg } 96501e04c3fSmrg 96601e04c3fSmrg ralloc_free(c); 96701e04c3fSmrg} 96801e04c3fSmrg 96901e04c3fSmrgstruct qreg 97001e04c3fSmrgvir_uniform(struct v3d_compile *c, 97101e04c3fSmrg enum quniform_contents contents, 97201e04c3fSmrg uint32_t data) 97301e04c3fSmrg{ 97401e04c3fSmrg for (int i = 0; i < c->num_uniforms; i++) { 97501e04c3fSmrg if (c->uniform_contents[i] == contents && 97601e04c3fSmrg c->uniform_data[i] == data) { 97701e04c3fSmrg return vir_reg(QFILE_UNIF, i); 97801e04c3fSmrg } 97901e04c3fSmrg } 98001e04c3fSmrg 98101e04c3fSmrg uint32_t uniform = c->num_uniforms++; 98201e04c3fSmrg 98301e04c3fSmrg if (uniform >= c->uniform_array_size) { 98401e04c3fSmrg c->uniform_array_size = MAX2(MAX2(16, uniform + 1), 98501e04c3fSmrg c->uniform_array_size * 2); 98601e04c3fSmrg 98701e04c3fSmrg c->uniform_data = reralloc(c, c->uniform_data, 98801e04c3fSmrg uint32_t, 98901e04c3fSmrg c->uniform_array_size); 99001e04c3fSmrg c->uniform_contents = reralloc(c, c->uniform_contents, 99101e04c3fSmrg enum quniform_contents, 99201e04c3fSmrg c->uniform_array_size); 99301e04c3fSmrg } 99401e04c3fSmrg 99501e04c3fSmrg c->uniform_contents[uniform] = contents; 99601e04c3fSmrg c->uniform_data[uniform] = data; 99701e04c3fSmrg 99801e04c3fSmrg return vir_reg(QFILE_UNIF, uniform); 99901e04c3fSmrg} 100001e04c3fSmrg 100101e04c3fSmrgstatic bool 100201e04c3fSmrgvir_can_set_flags(struct v3d_compile *c, struct qinst *inst) 100301e04c3fSmrg{ 100401e04c3fSmrg if (c->devinfo->ver >= 40 && (v3d_qpu_reads_vpm(&inst->qpu) || 100501e04c3fSmrg v3d_qpu_uses_sfu(&inst->qpu))) { 100601e04c3fSmrg return false; 100701e04c3fSmrg } 100801e04c3fSmrg 100901e04c3fSmrg return true; 101001e04c3fSmrg} 101101e04c3fSmrg 101201e04c3fSmrgvoid 101301e04c3fSmrgvir_PF(struct v3d_compile *c, struct qreg src, enum v3d_qpu_pf pf) 101401e04c3fSmrg{ 101501e04c3fSmrg struct qinst *last_inst = NULL; 101601e04c3fSmrg 101701e04c3fSmrg if (!list_empty(&c->cur_block->instructions)) { 101801e04c3fSmrg last_inst = (struct qinst *)c->cur_block->instructions.prev; 101901e04c3fSmrg 102001e04c3fSmrg /* Can't stuff the PF into the last last inst if our cursor 102101e04c3fSmrg * isn't pointing after it. 102201e04c3fSmrg */ 102301e04c3fSmrg struct vir_cursor after_inst = vir_after_inst(last_inst); 102401e04c3fSmrg if (c->cursor.mode != after_inst.mode || 102501e04c3fSmrg c->cursor.link != after_inst.link) 102601e04c3fSmrg last_inst = NULL; 102701e04c3fSmrg } 102801e04c3fSmrg 102901e04c3fSmrg if (src.file != QFILE_TEMP || 103001e04c3fSmrg !c->defs[src.index] || 103101e04c3fSmrg last_inst != c->defs[src.index] || 103201e04c3fSmrg !vir_can_set_flags(c, last_inst)) { 103301e04c3fSmrg /* XXX: Make the MOV be the appropriate type */ 103401e04c3fSmrg last_inst = vir_MOV_dest(c, vir_reg(QFILE_NULL, 0), src); 103501e04c3fSmrg } 103601e04c3fSmrg 103701e04c3fSmrg vir_set_pf(last_inst, pf); 103801e04c3fSmrg} 103901e04c3fSmrg 104001e04c3fSmrg#define OPTPASS(func) \ 104101e04c3fSmrg do { \ 104201e04c3fSmrg bool stage_progress = func(c); \ 104301e04c3fSmrg if (stage_progress) { \ 104401e04c3fSmrg progress = true; \ 104501e04c3fSmrg if (print_opt_debug) { \ 104601e04c3fSmrg fprintf(stderr, \ 104701e04c3fSmrg "VIR opt pass %2d: %s progress\n", \ 104801e04c3fSmrg pass, #func); \ 104901e04c3fSmrg } \ 105001e04c3fSmrg /*XXX vir_validate(c);*/ \ 105101e04c3fSmrg } \ 105201e04c3fSmrg } while (0) 105301e04c3fSmrg 105401e04c3fSmrgvoid 105501e04c3fSmrgvir_optimize(struct v3d_compile *c) 105601e04c3fSmrg{ 105701e04c3fSmrg bool print_opt_debug = false; 105801e04c3fSmrg int pass = 1; 105901e04c3fSmrg 106001e04c3fSmrg while (true) { 106101e04c3fSmrg bool progress = false; 106201e04c3fSmrg 106301e04c3fSmrg OPTPASS(vir_opt_copy_propagate); 106401e04c3fSmrg OPTPASS(vir_opt_dead_code); 106501e04c3fSmrg OPTPASS(vir_opt_small_immediates); 106601e04c3fSmrg 106701e04c3fSmrg if (!progress) 106801e04c3fSmrg break; 106901e04c3fSmrg 107001e04c3fSmrg pass++; 107101e04c3fSmrg } 107201e04c3fSmrg} 107301e04c3fSmrg 107401e04c3fSmrgconst char * 107501e04c3fSmrgvir_get_stage_name(struct v3d_compile *c) 107601e04c3fSmrg{ 107701e04c3fSmrg if (c->vs_key && c->vs_key->is_coord) 107801e04c3fSmrg return "MESA_SHADER_COORD"; 107901e04c3fSmrg else 108001e04c3fSmrg return gl_shader_stage_name(c->s->info.stage); 108101e04c3fSmrg} 1082