vir.c revision ed98bd31
101e04c3fSmrg/* 201e04c3fSmrg * Copyright © 2016-2017 Broadcom 301e04c3fSmrg * 401e04c3fSmrg * Permission is hereby granted, free of charge, to any person obtaining a 501e04c3fSmrg * copy of this software and associated documentation files (the "Software"), 601e04c3fSmrg * to deal in the Software without restriction, including without limitation 701e04c3fSmrg * the rights to use, copy, modify, merge, publish, distribute, sublicense, 801e04c3fSmrg * and/or sell copies of the Software, and to permit persons to whom the 901e04c3fSmrg * Software is furnished to do so, subject to the following conditions: 1001e04c3fSmrg * 1101e04c3fSmrg * The above copyright notice and this permission notice (including the next 1201e04c3fSmrg * paragraph) shall be included in all copies or substantial portions of the 1301e04c3fSmrg * Software. 1401e04c3fSmrg * 1501e04c3fSmrg * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR 1601e04c3fSmrg * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, 1701e04c3fSmrg * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL 1801e04c3fSmrg * THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER 1901e04c3fSmrg * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING 2001e04c3fSmrg * FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS 2101e04c3fSmrg * IN THE SOFTWARE. 2201e04c3fSmrg */ 2301e04c3fSmrg 2401e04c3fSmrg#include "broadcom/common/v3d_device_info.h" 2501e04c3fSmrg#include "v3d_compiler.h" 2601e04c3fSmrg 2701e04c3fSmrgint 28ed98bd31Smayavir_get_nsrc(struct qinst *inst) 2901e04c3fSmrg{ 3001e04c3fSmrg switch (inst->qpu.type) { 3101e04c3fSmrg case V3D_QPU_INSTR_TYPE_BRANCH: 3201e04c3fSmrg return 0; 3301e04c3fSmrg case V3D_QPU_INSTR_TYPE_ALU: 3401e04c3fSmrg if (inst->qpu.alu.add.op != V3D_QPU_A_NOP) 3501e04c3fSmrg return v3d_qpu_add_op_num_src(inst->qpu.alu.add.op); 3601e04c3fSmrg else 3701e04c3fSmrg return v3d_qpu_mul_op_num_src(inst->qpu.alu.mul.op); 3801e04c3fSmrg } 3901e04c3fSmrg 4001e04c3fSmrg return 0; 4101e04c3fSmrg} 4201e04c3fSmrg 4301e04c3fSmrg/** 4401e04c3fSmrg * Returns whether the instruction has any side effects that must be 4501e04c3fSmrg * preserved. 4601e04c3fSmrg */ 4701e04c3fSmrgbool 4801e04c3fSmrgvir_has_side_effects(struct v3d_compile *c, struct qinst *inst) 4901e04c3fSmrg{ 5001e04c3fSmrg switch (inst->qpu.type) { 5101e04c3fSmrg case V3D_QPU_INSTR_TYPE_BRANCH: 5201e04c3fSmrg return true; 5301e04c3fSmrg case V3D_QPU_INSTR_TYPE_ALU: 5401e04c3fSmrg switch (inst->qpu.alu.add.op) { 5501e04c3fSmrg case V3D_QPU_A_SETREVF: 5601e04c3fSmrg case V3D_QPU_A_SETMSF: 5701e04c3fSmrg case V3D_QPU_A_VPMSETUP: 5801e04c3fSmrg case V3D_QPU_A_STVPMV: 5901e04c3fSmrg case V3D_QPU_A_STVPMD: 6001e04c3fSmrg case V3D_QPU_A_STVPMP: 6101e04c3fSmrg case V3D_QPU_A_VPMWT: 6201e04c3fSmrg case V3D_QPU_A_TMUWT: 6301e04c3fSmrg return true; 6401e04c3fSmrg default: 6501e04c3fSmrg break; 6601e04c3fSmrg } 6701e04c3fSmrg 6801e04c3fSmrg switch (inst->qpu.alu.mul.op) { 6901e04c3fSmrg case V3D_QPU_M_MULTOP: 7001e04c3fSmrg return true; 7101e04c3fSmrg default: 7201e04c3fSmrg break; 7301e04c3fSmrg } 7401e04c3fSmrg } 7501e04c3fSmrg 7601e04c3fSmrg if (inst->qpu.sig.ldtmu || 7701e04c3fSmrg inst->qpu.sig.ldvary || 7801e04c3fSmrg inst->qpu.sig.wrtmuc || 7901e04c3fSmrg inst->qpu.sig.thrsw) { 8001e04c3fSmrg return true; 8101e04c3fSmrg } 8201e04c3fSmrg 8301e04c3fSmrg return false; 8401e04c3fSmrg} 8501e04c3fSmrg 8601e04c3fSmrgbool 8701e04c3fSmrgvir_is_raw_mov(struct qinst *inst) 8801e04c3fSmrg{ 8901e04c3fSmrg if (inst->qpu.type != V3D_QPU_INSTR_TYPE_ALU || 9001e04c3fSmrg (inst->qpu.alu.mul.op != V3D_QPU_M_FMOV && 9101e04c3fSmrg inst->qpu.alu.mul.op != V3D_QPU_M_MOV)) { 9201e04c3fSmrg return false; 9301e04c3fSmrg } 9401e04c3fSmrg 9501e04c3fSmrg if (inst->qpu.alu.add.output_pack != V3D_QPU_PACK_NONE || 9601e04c3fSmrg inst->qpu.alu.mul.output_pack != V3D_QPU_PACK_NONE) { 9701e04c3fSmrg return false; 9801e04c3fSmrg } 9901e04c3fSmrg 100ed98bd31Smaya if (inst->qpu.alu.add.a_unpack != V3D_QPU_UNPACK_NONE || 101ed98bd31Smaya inst->qpu.alu.add.b_unpack != V3D_QPU_UNPACK_NONE || 102ed98bd31Smaya inst->qpu.alu.mul.a_unpack != V3D_QPU_UNPACK_NONE || 103ed98bd31Smaya inst->qpu.alu.mul.b_unpack != V3D_QPU_UNPACK_NONE) { 104ed98bd31Smaya return false; 105ed98bd31Smaya } 106ed98bd31Smaya 10701e04c3fSmrg if (inst->qpu.flags.ac != V3D_QPU_COND_NONE || 10801e04c3fSmrg inst->qpu.flags.mc != V3D_QPU_COND_NONE) 10901e04c3fSmrg return false; 11001e04c3fSmrg 11101e04c3fSmrg return true; 11201e04c3fSmrg} 11301e04c3fSmrg 11401e04c3fSmrgbool 11501e04c3fSmrgvir_is_add(struct qinst *inst) 11601e04c3fSmrg{ 11701e04c3fSmrg return (inst->qpu.type == V3D_QPU_INSTR_TYPE_ALU && 11801e04c3fSmrg inst->qpu.alu.add.op != V3D_QPU_A_NOP); 11901e04c3fSmrg} 12001e04c3fSmrg 12101e04c3fSmrgbool 12201e04c3fSmrgvir_is_mul(struct qinst *inst) 12301e04c3fSmrg{ 12401e04c3fSmrg return (inst->qpu.type == V3D_QPU_INSTR_TYPE_ALU && 12501e04c3fSmrg inst->qpu.alu.mul.op != V3D_QPU_M_NOP); 12601e04c3fSmrg} 12701e04c3fSmrg 12801e04c3fSmrgbool 12901e04c3fSmrgvir_is_tex(struct qinst *inst) 13001e04c3fSmrg{ 13101e04c3fSmrg if (inst->dst.file == QFILE_MAGIC) 13201e04c3fSmrg return v3d_qpu_magic_waddr_is_tmu(inst->dst.index); 13301e04c3fSmrg 13401e04c3fSmrg if (inst->qpu.type == V3D_QPU_INSTR_TYPE_ALU && 13501e04c3fSmrg inst->qpu.alu.add.op == V3D_QPU_A_TMUWT) { 13601e04c3fSmrg return true; 13701e04c3fSmrg } 13801e04c3fSmrg 13901e04c3fSmrg return false; 14001e04c3fSmrg} 14101e04c3fSmrg 14201e04c3fSmrgbool 14301e04c3fSmrgvir_writes_r3(const struct v3d_device_info *devinfo, struct qinst *inst) 14401e04c3fSmrg{ 14501e04c3fSmrg for (int i = 0; i < vir_get_nsrc(inst); i++) { 14601e04c3fSmrg switch (inst->src[i].file) { 14701e04c3fSmrg case QFILE_VPM: 14801e04c3fSmrg return true; 14901e04c3fSmrg default: 15001e04c3fSmrg break; 15101e04c3fSmrg } 15201e04c3fSmrg } 15301e04c3fSmrg 15401e04c3fSmrg if (devinfo->ver < 41 && (inst->qpu.sig.ldvary || 15501e04c3fSmrg inst->qpu.sig.ldtlb || 15601e04c3fSmrg inst->qpu.sig.ldtlbu || 15701e04c3fSmrg inst->qpu.sig.ldvpm)) { 15801e04c3fSmrg return true; 15901e04c3fSmrg } 16001e04c3fSmrg 16101e04c3fSmrg return false; 16201e04c3fSmrg} 16301e04c3fSmrg 16401e04c3fSmrgbool 16501e04c3fSmrgvir_writes_r4(const struct v3d_device_info *devinfo, struct qinst *inst) 16601e04c3fSmrg{ 16701e04c3fSmrg switch (inst->dst.file) { 16801e04c3fSmrg case QFILE_MAGIC: 16901e04c3fSmrg switch (inst->dst.index) { 17001e04c3fSmrg case V3D_QPU_WADDR_RECIP: 17101e04c3fSmrg case V3D_QPU_WADDR_RSQRT: 17201e04c3fSmrg case V3D_QPU_WADDR_EXP: 17301e04c3fSmrg case V3D_QPU_WADDR_LOG: 17401e04c3fSmrg case V3D_QPU_WADDR_SIN: 17501e04c3fSmrg return true; 17601e04c3fSmrg } 17701e04c3fSmrg break; 17801e04c3fSmrg default: 17901e04c3fSmrg break; 18001e04c3fSmrg } 18101e04c3fSmrg 18201e04c3fSmrg if (devinfo->ver < 41 && inst->qpu.sig.ldtmu) 18301e04c3fSmrg return true; 18401e04c3fSmrg 18501e04c3fSmrg return false; 18601e04c3fSmrg} 18701e04c3fSmrg 18801e04c3fSmrgvoid 18901e04c3fSmrgvir_set_unpack(struct qinst *inst, int src, 19001e04c3fSmrg enum v3d_qpu_input_unpack unpack) 19101e04c3fSmrg{ 19201e04c3fSmrg assert(src == 0 || src == 1); 19301e04c3fSmrg 19401e04c3fSmrg if (vir_is_add(inst)) { 19501e04c3fSmrg if (src == 0) 19601e04c3fSmrg inst->qpu.alu.add.a_unpack = unpack; 19701e04c3fSmrg else 19801e04c3fSmrg inst->qpu.alu.add.b_unpack = unpack; 19901e04c3fSmrg } else { 20001e04c3fSmrg assert(vir_is_mul(inst)); 20101e04c3fSmrg if (src == 0) 20201e04c3fSmrg inst->qpu.alu.mul.a_unpack = unpack; 20301e04c3fSmrg else 20401e04c3fSmrg inst->qpu.alu.mul.b_unpack = unpack; 20501e04c3fSmrg } 20601e04c3fSmrg} 20701e04c3fSmrg 20801e04c3fSmrgvoid 20901e04c3fSmrgvir_set_cond(struct qinst *inst, enum v3d_qpu_cond cond) 21001e04c3fSmrg{ 21101e04c3fSmrg if (vir_is_add(inst)) { 21201e04c3fSmrg inst->qpu.flags.ac = cond; 21301e04c3fSmrg } else { 21401e04c3fSmrg assert(vir_is_mul(inst)); 21501e04c3fSmrg inst->qpu.flags.mc = cond; 21601e04c3fSmrg } 21701e04c3fSmrg} 21801e04c3fSmrg 21901e04c3fSmrgvoid 22001e04c3fSmrgvir_set_pf(struct qinst *inst, enum v3d_qpu_pf pf) 22101e04c3fSmrg{ 22201e04c3fSmrg if (vir_is_add(inst)) { 22301e04c3fSmrg inst->qpu.flags.apf = pf; 22401e04c3fSmrg } else { 22501e04c3fSmrg assert(vir_is_mul(inst)); 22601e04c3fSmrg inst->qpu.flags.mpf = pf; 22701e04c3fSmrg } 22801e04c3fSmrg} 22901e04c3fSmrg 230ed98bd31Smayavoid 231ed98bd31Smayavir_set_uf(struct qinst *inst, enum v3d_qpu_uf uf) 232ed98bd31Smaya{ 233ed98bd31Smaya if (vir_is_add(inst)) { 234ed98bd31Smaya inst->qpu.flags.auf = uf; 235ed98bd31Smaya } else { 236ed98bd31Smaya assert(vir_is_mul(inst)); 237ed98bd31Smaya inst->qpu.flags.muf = uf; 238ed98bd31Smaya } 239ed98bd31Smaya} 240ed98bd31Smaya 24101e04c3fSmrg#if 0 24201e04c3fSmrguint8_t 24301e04c3fSmrgvir_channels_written(struct qinst *inst) 24401e04c3fSmrg{ 24501e04c3fSmrg if (vir_is_mul(inst)) { 24601e04c3fSmrg switch (inst->dst.pack) { 24701e04c3fSmrg case QPU_PACK_MUL_NOP: 24801e04c3fSmrg case QPU_PACK_MUL_8888: 24901e04c3fSmrg return 0xf; 25001e04c3fSmrg case QPU_PACK_MUL_8A: 25101e04c3fSmrg return 0x1; 25201e04c3fSmrg case QPU_PACK_MUL_8B: 25301e04c3fSmrg return 0x2; 25401e04c3fSmrg case QPU_PACK_MUL_8C: 25501e04c3fSmrg return 0x4; 25601e04c3fSmrg case QPU_PACK_MUL_8D: 25701e04c3fSmrg return 0x8; 25801e04c3fSmrg } 25901e04c3fSmrg } else { 26001e04c3fSmrg switch (inst->dst.pack) { 26101e04c3fSmrg case QPU_PACK_A_NOP: 26201e04c3fSmrg case QPU_PACK_A_8888: 26301e04c3fSmrg case QPU_PACK_A_8888_SAT: 26401e04c3fSmrg case QPU_PACK_A_32_SAT: 26501e04c3fSmrg return 0xf; 26601e04c3fSmrg case QPU_PACK_A_8A: 26701e04c3fSmrg case QPU_PACK_A_8A_SAT: 26801e04c3fSmrg return 0x1; 26901e04c3fSmrg case QPU_PACK_A_8B: 27001e04c3fSmrg case QPU_PACK_A_8B_SAT: 27101e04c3fSmrg return 0x2; 27201e04c3fSmrg case QPU_PACK_A_8C: 27301e04c3fSmrg case QPU_PACK_A_8C_SAT: 27401e04c3fSmrg return 0x4; 27501e04c3fSmrg case QPU_PACK_A_8D: 27601e04c3fSmrg case QPU_PACK_A_8D_SAT: 27701e04c3fSmrg return 0x8; 27801e04c3fSmrg case QPU_PACK_A_16A: 27901e04c3fSmrg case QPU_PACK_A_16A_SAT: 28001e04c3fSmrg return 0x3; 28101e04c3fSmrg case QPU_PACK_A_16B: 28201e04c3fSmrg case QPU_PACK_A_16B_SAT: 28301e04c3fSmrg return 0xc; 28401e04c3fSmrg } 28501e04c3fSmrg } 28601e04c3fSmrg unreachable("Bad pack field"); 28701e04c3fSmrg} 28801e04c3fSmrg#endif 28901e04c3fSmrg 29001e04c3fSmrgstruct qreg 29101e04c3fSmrgvir_get_temp(struct v3d_compile *c) 29201e04c3fSmrg{ 29301e04c3fSmrg struct qreg reg; 29401e04c3fSmrg 29501e04c3fSmrg reg.file = QFILE_TEMP; 29601e04c3fSmrg reg.index = c->num_temps++; 29701e04c3fSmrg 29801e04c3fSmrg if (c->num_temps > c->defs_array_size) { 29901e04c3fSmrg uint32_t old_size = c->defs_array_size; 30001e04c3fSmrg c->defs_array_size = MAX2(old_size * 2, 16); 30101e04c3fSmrg 30201e04c3fSmrg c->defs = reralloc(c, c->defs, struct qinst *, 30301e04c3fSmrg c->defs_array_size); 30401e04c3fSmrg memset(&c->defs[old_size], 0, 30501e04c3fSmrg sizeof(c->defs[0]) * (c->defs_array_size - old_size)); 30601e04c3fSmrg 30701e04c3fSmrg c->spillable = reralloc(c, c->spillable, 30801e04c3fSmrg BITSET_WORD, 30901e04c3fSmrg BITSET_WORDS(c->defs_array_size)); 31001e04c3fSmrg for (int i = old_size; i < c->defs_array_size; i++) 31101e04c3fSmrg BITSET_SET(c->spillable, i); 31201e04c3fSmrg } 31301e04c3fSmrg 31401e04c3fSmrg return reg; 31501e04c3fSmrg} 31601e04c3fSmrg 31701e04c3fSmrgstruct qinst * 31801e04c3fSmrgvir_add_inst(enum v3d_qpu_add_op op, struct qreg dst, struct qreg src0, struct qreg src1) 31901e04c3fSmrg{ 32001e04c3fSmrg struct qinst *inst = calloc(1, sizeof(*inst)); 32101e04c3fSmrg 32201e04c3fSmrg inst->qpu = v3d_qpu_nop(); 32301e04c3fSmrg inst->qpu.alu.add.op = op; 32401e04c3fSmrg 32501e04c3fSmrg inst->dst = dst; 32601e04c3fSmrg inst->src[0] = src0; 32701e04c3fSmrg inst->src[1] = src1; 32801e04c3fSmrg inst->uniform = ~0; 32901e04c3fSmrg 33001e04c3fSmrg return inst; 33101e04c3fSmrg} 33201e04c3fSmrg 33301e04c3fSmrgstruct qinst * 33401e04c3fSmrgvir_mul_inst(enum v3d_qpu_mul_op op, struct qreg dst, struct qreg src0, struct qreg src1) 33501e04c3fSmrg{ 33601e04c3fSmrg struct qinst *inst = calloc(1, sizeof(*inst)); 33701e04c3fSmrg 33801e04c3fSmrg inst->qpu = v3d_qpu_nop(); 33901e04c3fSmrg inst->qpu.alu.mul.op = op; 34001e04c3fSmrg 34101e04c3fSmrg inst->dst = dst; 34201e04c3fSmrg inst->src[0] = src0; 34301e04c3fSmrg inst->src[1] = src1; 34401e04c3fSmrg inst->uniform = ~0; 34501e04c3fSmrg 34601e04c3fSmrg return inst; 34701e04c3fSmrg} 34801e04c3fSmrg 34901e04c3fSmrgstruct qinst * 350ed98bd31Smayavir_branch_inst(struct v3d_compile *c, enum v3d_qpu_branch_cond cond) 35101e04c3fSmrg{ 35201e04c3fSmrg struct qinst *inst = calloc(1, sizeof(*inst)); 35301e04c3fSmrg 35401e04c3fSmrg inst->qpu = v3d_qpu_nop(); 35501e04c3fSmrg inst->qpu.type = V3D_QPU_INSTR_TYPE_BRANCH; 35601e04c3fSmrg inst->qpu.branch.cond = cond; 35701e04c3fSmrg inst->qpu.branch.msfign = V3D_QPU_MSFIGN_NONE; 35801e04c3fSmrg inst->qpu.branch.bdi = V3D_QPU_BRANCH_DEST_REL; 35901e04c3fSmrg inst->qpu.branch.ub = true; 36001e04c3fSmrg inst->qpu.branch.bdu = V3D_QPU_BRANCH_DEST_REL; 36101e04c3fSmrg 362ed98bd31Smaya inst->dst = vir_nop_reg(); 363ed98bd31Smaya inst->uniform = vir_get_uniform_index(c, QUNIFORM_CONSTANT, 0); 36401e04c3fSmrg 36501e04c3fSmrg return inst; 36601e04c3fSmrg} 36701e04c3fSmrg 36801e04c3fSmrgstatic void 36901e04c3fSmrgvir_emit(struct v3d_compile *c, struct qinst *inst) 37001e04c3fSmrg{ 37101e04c3fSmrg switch (c->cursor.mode) { 37201e04c3fSmrg case vir_cursor_add: 37301e04c3fSmrg list_add(&inst->link, c->cursor.link); 37401e04c3fSmrg break; 37501e04c3fSmrg case vir_cursor_addtail: 37601e04c3fSmrg list_addtail(&inst->link, c->cursor.link); 37701e04c3fSmrg break; 37801e04c3fSmrg } 37901e04c3fSmrg 38001e04c3fSmrg c->cursor = vir_after_inst(inst); 38101e04c3fSmrg c->live_intervals_valid = false; 38201e04c3fSmrg} 38301e04c3fSmrg 38401e04c3fSmrg/* Updates inst to write to a new temporary, emits it, and notes the def. */ 38501e04c3fSmrgstruct qreg 38601e04c3fSmrgvir_emit_def(struct v3d_compile *c, struct qinst *inst) 38701e04c3fSmrg{ 38801e04c3fSmrg assert(inst->dst.file == QFILE_NULL); 38901e04c3fSmrg 39001e04c3fSmrg /* If we're emitting an instruction that's a def, it had better be 39101e04c3fSmrg * writing a register. 39201e04c3fSmrg */ 39301e04c3fSmrg if (inst->qpu.type == V3D_QPU_INSTR_TYPE_ALU) { 39401e04c3fSmrg assert(inst->qpu.alu.add.op == V3D_QPU_A_NOP || 39501e04c3fSmrg v3d_qpu_add_op_has_dst(inst->qpu.alu.add.op)); 39601e04c3fSmrg assert(inst->qpu.alu.mul.op == V3D_QPU_M_NOP || 39701e04c3fSmrg v3d_qpu_mul_op_has_dst(inst->qpu.alu.mul.op)); 39801e04c3fSmrg } 39901e04c3fSmrg 40001e04c3fSmrg inst->dst = vir_get_temp(c); 40101e04c3fSmrg 40201e04c3fSmrg if (inst->dst.file == QFILE_TEMP) 40301e04c3fSmrg c->defs[inst->dst.index] = inst; 40401e04c3fSmrg 40501e04c3fSmrg vir_emit(c, inst); 40601e04c3fSmrg 40701e04c3fSmrg return inst->dst; 40801e04c3fSmrg} 40901e04c3fSmrg 41001e04c3fSmrgstruct qinst * 41101e04c3fSmrgvir_emit_nondef(struct v3d_compile *c, struct qinst *inst) 41201e04c3fSmrg{ 41301e04c3fSmrg if (inst->dst.file == QFILE_TEMP) 41401e04c3fSmrg c->defs[inst->dst.index] = NULL; 41501e04c3fSmrg 41601e04c3fSmrg vir_emit(c, inst); 41701e04c3fSmrg 41801e04c3fSmrg return inst; 41901e04c3fSmrg} 42001e04c3fSmrg 42101e04c3fSmrgstruct qblock * 42201e04c3fSmrgvir_new_block(struct v3d_compile *c) 42301e04c3fSmrg{ 42401e04c3fSmrg struct qblock *block = rzalloc(c, struct qblock); 42501e04c3fSmrg 42601e04c3fSmrg list_inithead(&block->instructions); 42701e04c3fSmrg 42801e04c3fSmrg block->predecessors = _mesa_set_create(block, 42901e04c3fSmrg _mesa_hash_pointer, 43001e04c3fSmrg _mesa_key_pointer_equal); 43101e04c3fSmrg 43201e04c3fSmrg block->index = c->next_block_index++; 43301e04c3fSmrg 43401e04c3fSmrg return block; 43501e04c3fSmrg} 43601e04c3fSmrg 43701e04c3fSmrgvoid 43801e04c3fSmrgvir_set_emit_block(struct v3d_compile *c, struct qblock *block) 43901e04c3fSmrg{ 44001e04c3fSmrg c->cur_block = block; 44101e04c3fSmrg c->cursor = vir_after_block(block); 44201e04c3fSmrg list_addtail(&block->link, &c->blocks); 44301e04c3fSmrg} 44401e04c3fSmrg 44501e04c3fSmrgstruct qblock * 44601e04c3fSmrgvir_entry_block(struct v3d_compile *c) 44701e04c3fSmrg{ 44801e04c3fSmrg return list_first_entry(&c->blocks, struct qblock, link); 44901e04c3fSmrg} 45001e04c3fSmrg 45101e04c3fSmrgstruct qblock * 45201e04c3fSmrgvir_exit_block(struct v3d_compile *c) 45301e04c3fSmrg{ 45401e04c3fSmrg return list_last_entry(&c->blocks, struct qblock, link); 45501e04c3fSmrg} 45601e04c3fSmrg 45701e04c3fSmrgvoid 45801e04c3fSmrgvir_link_blocks(struct qblock *predecessor, struct qblock *successor) 45901e04c3fSmrg{ 46001e04c3fSmrg _mesa_set_add(successor->predecessors, predecessor); 46101e04c3fSmrg if (predecessor->successors[0]) { 46201e04c3fSmrg assert(!predecessor->successors[1]); 46301e04c3fSmrg predecessor->successors[1] = successor; 46401e04c3fSmrg } else { 46501e04c3fSmrg predecessor->successors[0] = successor; 46601e04c3fSmrg } 46701e04c3fSmrg} 46801e04c3fSmrg 46901e04c3fSmrgconst struct v3d_compiler * 47001e04c3fSmrgv3d_compiler_init(const struct v3d_device_info *devinfo) 47101e04c3fSmrg{ 47201e04c3fSmrg struct v3d_compiler *compiler = rzalloc(NULL, struct v3d_compiler); 47301e04c3fSmrg if (!compiler) 47401e04c3fSmrg return NULL; 47501e04c3fSmrg 47601e04c3fSmrg compiler->devinfo = devinfo; 47701e04c3fSmrg 47801e04c3fSmrg if (!vir_init_reg_sets(compiler)) { 47901e04c3fSmrg ralloc_free(compiler); 48001e04c3fSmrg return NULL; 48101e04c3fSmrg } 48201e04c3fSmrg 48301e04c3fSmrg return compiler; 48401e04c3fSmrg} 48501e04c3fSmrg 48601e04c3fSmrgvoid 48701e04c3fSmrgv3d_compiler_free(const struct v3d_compiler *compiler) 48801e04c3fSmrg{ 48901e04c3fSmrg ralloc_free((void *)compiler); 49001e04c3fSmrg} 49101e04c3fSmrg 49201e04c3fSmrgstatic struct v3d_compile * 49301e04c3fSmrgvir_compile_init(const struct v3d_compiler *compiler, 49401e04c3fSmrg struct v3d_key *key, 49501e04c3fSmrg nir_shader *s, 496ed98bd31Smaya void (*debug_output)(const char *msg, 497ed98bd31Smaya void *debug_output_data), 498ed98bd31Smaya void *debug_output_data, 49901e04c3fSmrg int program_id, int variant_id) 50001e04c3fSmrg{ 50101e04c3fSmrg struct v3d_compile *c = rzalloc(NULL, struct v3d_compile); 50201e04c3fSmrg 50301e04c3fSmrg c->compiler = compiler; 50401e04c3fSmrg c->devinfo = compiler->devinfo; 50501e04c3fSmrg c->key = key; 50601e04c3fSmrg c->program_id = program_id; 50701e04c3fSmrg c->variant_id = variant_id; 50801e04c3fSmrg c->threads = 4; 509ed98bd31Smaya c->debug_output = debug_output; 510ed98bd31Smaya c->debug_output_data = debug_output_data; 51101e04c3fSmrg 51201e04c3fSmrg s = nir_shader_clone(c, s); 51301e04c3fSmrg c->s = s; 51401e04c3fSmrg 51501e04c3fSmrg list_inithead(&c->blocks); 51601e04c3fSmrg vir_set_emit_block(c, vir_new_block(c)); 51701e04c3fSmrg 51801e04c3fSmrg c->output_position_index = -1; 51901e04c3fSmrg c->output_sample_mask_index = -1; 52001e04c3fSmrg 52101e04c3fSmrg c->def_ht = _mesa_hash_table_create(c, _mesa_hash_pointer, 52201e04c3fSmrg _mesa_key_pointer_equal); 52301e04c3fSmrg 52401e04c3fSmrg return c; 52501e04c3fSmrg} 52601e04c3fSmrg 52701e04c3fSmrgstatic int 528ed98bd31Smayatype_size_vec4(const struct glsl_type *type, bool bindless) 52901e04c3fSmrg{ 53001e04c3fSmrg return glsl_count_attribute_slots(type, false); 53101e04c3fSmrg} 53201e04c3fSmrg 53301e04c3fSmrgstatic void 53401e04c3fSmrgv3d_lower_nir(struct v3d_compile *c) 53501e04c3fSmrg{ 53601e04c3fSmrg struct nir_lower_tex_options tex_options = { 53701e04c3fSmrg .lower_txd = true, 538ed98bd31Smaya .lower_tg4_broadcom_swizzle = true, 539ed98bd31Smaya 54001e04c3fSmrg .lower_rect = false, /* XXX: Use this on V3D 3.x */ 54101e04c3fSmrg .lower_txp = ~0, 54201e04c3fSmrg /* Apply swizzles to all samplers. */ 54301e04c3fSmrg .swizzle_result = ~0, 54401e04c3fSmrg }; 54501e04c3fSmrg 54601e04c3fSmrg /* Lower the format swizzle and (for 32-bit returns) 54701e04c3fSmrg * ARB_texture_swizzle-style swizzle. 54801e04c3fSmrg */ 54901e04c3fSmrg for (int i = 0; i < ARRAY_SIZE(c->key->tex); i++) { 55001e04c3fSmrg for (int j = 0; j < 4; j++) 55101e04c3fSmrg tex_options.swizzles[i][j] = c->key->tex[i].swizzle[j]; 55201e04c3fSmrg 55301e04c3fSmrg if (c->key->tex[i].clamp_s) 55401e04c3fSmrg tex_options.saturate_s |= 1 << i; 55501e04c3fSmrg if (c->key->tex[i].clamp_t) 55601e04c3fSmrg tex_options.saturate_t |= 1 << i; 55701e04c3fSmrg if (c->key->tex[i].clamp_r) 55801e04c3fSmrg tex_options.saturate_r |= 1 << i; 559ed98bd31Smaya if (c->key->tex[i].return_size == 16) { 560ed98bd31Smaya tex_options.lower_tex_packing[i] = 561ed98bd31Smaya nir_lower_tex_packing_16; 562ed98bd31Smaya } 563ed98bd31Smaya } 564ed98bd31Smaya 565ed98bd31Smaya /* CS textures may not have return_size reflecting the shadow state. */ 566ed98bd31Smaya nir_foreach_variable(var, &c->s->uniforms) { 567ed98bd31Smaya const struct glsl_type *type = glsl_without_array(var->type); 568ed98bd31Smaya unsigned array_len = MAX2(glsl_get_length(var->type), 1); 569ed98bd31Smaya 570ed98bd31Smaya if (!glsl_type_is_sampler(type) || 571ed98bd31Smaya !glsl_sampler_type_is_shadow(type)) 572ed98bd31Smaya continue; 573ed98bd31Smaya 574ed98bd31Smaya for (int i = 0; i < array_len; i++) { 575ed98bd31Smaya tex_options.lower_tex_packing[var->data.binding + i] = 576ed98bd31Smaya nir_lower_tex_packing_16; 577ed98bd31Smaya } 57801e04c3fSmrg } 57901e04c3fSmrg 58001e04c3fSmrg NIR_PASS_V(c->s, nir_lower_tex, &tex_options); 581ed98bd31Smaya NIR_PASS_V(c->s, nir_lower_system_values); 58201e04c3fSmrg 583ed98bd31Smaya NIR_PASS_V(c->s, nir_lower_vars_to_scratch, 584ed98bd31Smaya nir_var_function_temp, 585ed98bd31Smaya 0, 586ed98bd31Smaya glsl_get_natural_size_align_bytes); 587ed98bd31Smaya NIR_PASS_V(c->s, v3d_nir_lower_scratch); 58801e04c3fSmrg} 58901e04c3fSmrg 59001e04c3fSmrgstatic void 59101e04c3fSmrgv3d_set_prog_data_uniforms(struct v3d_compile *c, 59201e04c3fSmrg struct v3d_prog_data *prog_data) 59301e04c3fSmrg{ 59401e04c3fSmrg int count = c->num_uniforms; 59501e04c3fSmrg struct v3d_uniform_list *ulist = &prog_data->uniforms; 59601e04c3fSmrg 59701e04c3fSmrg ulist->count = count; 59801e04c3fSmrg ulist->data = ralloc_array(prog_data, uint32_t, count); 59901e04c3fSmrg memcpy(ulist->data, c->uniform_data, 60001e04c3fSmrg count * sizeof(*ulist->data)); 60101e04c3fSmrg ulist->contents = ralloc_array(prog_data, enum quniform_contents, count); 60201e04c3fSmrg memcpy(ulist->contents, c->uniform_contents, 60301e04c3fSmrg count * sizeof(*ulist->contents)); 60401e04c3fSmrg} 60501e04c3fSmrg 60601e04c3fSmrgstatic void 607ed98bd31Smayav3d_vs_set_prog_data(struct v3d_compile *c, 608ed98bd31Smaya struct v3d_vs_prog_data *prog_data) 60901e04c3fSmrg{ 61001e04c3fSmrg /* The vertex data gets format converted by the VPM so that 61101e04c3fSmrg * each attribute channel takes up a VPM column. Precompute 61201e04c3fSmrg * the sizes for the shader record. 61301e04c3fSmrg */ 61401e04c3fSmrg for (int i = 0; i < ARRAY_SIZE(prog_data->vattr_sizes); i++) { 61501e04c3fSmrg prog_data->vattr_sizes[i] = c->vattr_sizes[i]; 61601e04c3fSmrg prog_data->vpm_input_size += c->vattr_sizes[i]; 61701e04c3fSmrg } 61801e04c3fSmrg 619ed98bd31Smaya prog_data->uses_vid = (c->s->info.system_values_read & 62001e04c3fSmrg (1ull << SYSTEM_VALUE_VERTEX_ID)); 621ed98bd31Smaya prog_data->uses_iid = (c->s->info.system_values_read & 62201e04c3fSmrg (1ull << SYSTEM_VALUE_INSTANCE_ID)); 62301e04c3fSmrg 62401e04c3fSmrg if (prog_data->uses_vid) 62501e04c3fSmrg prog_data->vpm_input_size++; 62601e04c3fSmrg if (prog_data->uses_iid) 62701e04c3fSmrg prog_data->vpm_input_size++; 62801e04c3fSmrg 62901e04c3fSmrg /* Input/output segment size are in sectors (8 rows of 32 bits per 63001e04c3fSmrg * channel). 63101e04c3fSmrg */ 63201e04c3fSmrg prog_data->vpm_input_size = align(prog_data->vpm_input_size, 8) / 8; 633ed98bd31Smaya prog_data->vpm_output_size = align(c->vpm_output_size, 8) / 8; 634ed98bd31Smaya 635ed98bd31Smaya /* Set us up for shared input/output segments. This is apparently 636ed98bd31Smaya * necessary for our VCM setup to avoid varying corruption. 637ed98bd31Smaya */ 638ed98bd31Smaya prog_data->separate_segments = false; 639ed98bd31Smaya prog_data->vpm_output_size = MAX2(prog_data->vpm_output_size, 640ed98bd31Smaya prog_data->vpm_input_size); 641ed98bd31Smaya prog_data->vpm_input_size = 0; 64201e04c3fSmrg 64301e04c3fSmrg /* Compute VCM cache size. We set up our program to take up less than 64401e04c3fSmrg * half of the VPM, so that any set of bin and render programs won't 64501e04c3fSmrg * run out of space. We need space for at least one input segment, 64601e04c3fSmrg * and then allocate the rest to output segments (one for the current 64701e04c3fSmrg * program, the rest to VCM). The valid range of the VCM cache size 64801e04c3fSmrg * field is 1-4 16-vertex batches, but GFXH-1744 limits us to 2-4 64901e04c3fSmrg * batches. 65001e04c3fSmrg */ 65101e04c3fSmrg assert(c->devinfo->vpm_size); 652ed98bd31Smaya int sector_size = V3D_CHANNELS * sizeof(uint32_t) * 8; 65301e04c3fSmrg int vpm_size_in_sectors = c->devinfo->vpm_size / sector_size; 65401e04c3fSmrg int half_vpm = vpm_size_in_sectors / 2; 65501e04c3fSmrg int vpm_output_sectors = half_vpm - prog_data->vpm_input_size; 65601e04c3fSmrg int vpm_output_batches = vpm_output_sectors / prog_data->vpm_output_size; 65701e04c3fSmrg assert(vpm_output_batches >= 2); 65801e04c3fSmrg prog_data->vcm_cache_size = CLAMP(vpm_output_batches - 1, 2, 4); 65901e04c3fSmrg} 66001e04c3fSmrg 66101e04c3fSmrgstatic void 66201e04c3fSmrgv3d_set_fs_prog_data_inputs(struct v3d_compile *c, 66301e04c3fSmrg struct v3d_fs_prog_data *prog_data) 66401e04c3fSmrg{ 665ed98bd31Smaya prog_data->num_inputs = c->num_inputs; 66601e04c3fSmrg memcpy(prog_data->input_slots, c->input_slots, 66701e04c3fSmrg c->num_inputs * sizeof(*c->input_slots)); 66801e04c3fSmrg 66901e04c3fSmrg STATIC_ASSERT(ARRAY_SIZE(prog_data->flat_shade_flags) > 67001e04c3fSmrg (V3D_MAX_FS_INPUTS - 1) / 24); 67101e04c3fSmrg for (int i = 0; i < V3D_MAX_FS_INPUTS; i++) { 67201e04c3fSmrg if (BITSET_TEST(c->flat_shade_flags, i)) 67301e04c3fSmrg prog_data->flat_shade_flags[i / 24] |= 1 << (i % 24); 67401e04c3fSmrg 67501e04c3fSmrg if (BITSET_TEST(c->noperspective_flags, i)) 67601e04c3fSmrg prog_data->noperspective_flags[i / 24] |= 1 << (i % 24); 67701e04c3fSmrg 67801e04c3fSmrg if (BITSET_TEST(c->centroid_flags, i)) 67901e04c3fSmrg prog_data->centroid_flags[i / 24] |= 1 << (i % 24); 68001e04c3fSmrg } 68101e04c3fSmrg} 68201e04c3fSmrg 683ed98bd31Smayastatic void 684ed98bd31Smayav3d_fs_set_prog_data(struct v3d_compile *c, 685ed98bd31Smaya struct v3d_fs_prog_data *prog_data) 686ed98bd31Smaya{ 687ed98bd31Smaya v3d_set_fs_prog_data_inputs(c, prog_data); 688ed98bd31Smaya prog_data->writes_z = c->writes_z; 689ed98bd31Smaya prog_data->disable_ez = !c->s->info.fs.early_fragment_tests; 690ed98bd31Smaya prog_data->uses_center_w = c->uses_center_w; 691ed98bd31Smaya} 692ed98bd31Smaya 693ed98bd31Smayastatic void 694ed98bd31Smayav3d_cs_set_prog_data(struct v3d_compile *c, 695ed98bd31Smaya struct v3d_compute_prog_data *prog_data) 696ed98bd31Smaya{ 697ed98bd31Smaya prog_data->shared_size = c->s->info.cs.shared_size; 698ed98bd31Smaya} 699ed98bd31Smaya 700ed98bd31Smayastatic void 701ed98bd31Smayav3d_set_prog_data(struct v3d_compile *c, 702ed98bd31Smaya struct v3d_prog_data *prog_data) 703ed98bd31Smaya{ 704ed98bd31Smaya prog_data->threads = c->threads; 705ed98bd31Smaya prog_data->single_seg = !c->last_thrsw; 706ed98bd31Smaya prog_data->spill_size = c->spill_size; 707ed98bd31Smaya 708ed98bd31Smaya v3d_set_prog_data_uniforms(c, prog_data); 709ed98bd31Smaya 710ed98bd31Smaya if (c->s->info.stage == MESA_SHADER_COMPUTE) { 711ed98bd31Smaya v3d_cs_set_prog_data(c, (struct v3d_compute_prog_data *)prog_data); 712ed98bd31Smaya } else if (c->s->info.stage == MESA_SHADER_VERTEX) { 713ed98bd31Smaya v3d_vs_set_prog_data(c, (struct v3d_vs_prog_data *)prog_data); 714ed98bd31Smaya } else { 715ed98bd31Smaya assert(c->s->info.stage == MESA_SHADER_FRAGMENT); 716ed98bd31Smaya v3d_fs_set_prog_data(c, (struct v3d_fs_prog_data *)prog_data); 717ed98bd31Smaya } 718ed98bd31Smaya} 719ed98bd31Smaya 720ed98bd31Smayastatic uint64_t * 721ed98bd31Smayav3d_return_qpu_insts(struct v3d_compile *c, uint32_t *final_assembly_size) 722ed98bd31Smaya{ 723ed98bd31Smaya *final_assembly_size = c->qpu_inst_count * sizeof(uint64_t); 724ed98bd31Smaya 725ed98bd31Smaya uint64_t *qpu_insts = malloc(*final_assembly_size); 726ed98bd31Smaya if (!qpu_insts) 727ed98bd31Smaya return NULL; 728ed98bd31Smaya 729ed98bd31Smaya memcpy(qpu_insts, c->qpu_insts, *final_assembly_size); 730ed98bd31Smaya 731ed98bd31Smaya vir_compile_destroy(c); 732ed98bd31Smaya 733ed98bd31Smaya return qpu_insts; 734ed98bd31Smaya} 735ed98bd31Smaya 736ed98bd31Smayastatic void 737ed98bd31Smayav3d_nir_lower_vs_early(struct v3d_compile *c) 738ed98bd31Smaya{ 739ed98bd31Smaya /* Split our I/O vars and dead code eliminate the unused 740ed98bd31Smaya * components. 741ed98bd31Smaya */ 742ed98bd31Smaya NIR_PASS_V(c->s, nir_lower_io_to_scalar_early, 743ed98bd31Smaya nir_var_shader_in | nir_var_shader_out); 744ed98bd31Smaya uint64_t used_outputs[4] = {0}; 745ed98bd31Smaya for (int i = 0; i < c->vs_key->num_fs_inputs; i++) { 746ed98bd31Smaya int slot = v3d_slot_get_slot(c->vs_key->fs_inputs[i]); 747ed98bd31Smaya int comp = v3d_slot_get_component(c->vs_key->fs_inputs[i]); 748ed98bd31Smaya used_outputs[comp] |= 1ull << slot; 749ed98bd31Smaya } 750ed98bd31Smaya NIR_PASS_V(c->s, nir_remove_unused_io_vars, 751ed98bd31Smaya &c->s->outputs, used_outputs, NULL); /* demotes to globals */ 752ed98bd31Smaya NIR_PASS_V(c->s, nir_lower_global_vars_to_local); 753ed98bd31Smaya v3d_optimize_nir(c->s); 754ed98bd31Smaya NIR_PASS_V(c->s, nir_remove_dead_variables, nir_var_shader_in); 755ed98bd31Smaya NIR_PASS_V(c->s, nir_lower_io, nir_var_shader_in | nir_var_shader_out, 756ed98bd31Smaya type_size_vec4, 757ed98bd31Smaya (nir_lower_io_options)0); 758ed98bd31Smaya} 759ed98bd31Smaya 76001e04c3fSmrgstatic void 76101e04c3fSmrgv3d_fixup_fs_output_types(struct v3d_compile *c) 76201e04c3fSmrg{ 76301e04c3fSmrg nir_foreach_variable(var, &c->s->outputs) { 76401e04c3fSmrg uint32_t mask = 0; 76501e04c3fSmrg 76601e04c3fSmrg switch (var->data.location) { 76701e04c3fSmrg case FRAG_RESULT_COLOR: 76801e04c3fSmrg mask = ~0; 76901e04c3fSmrg break; 77001e04c3fSmrg case FRAG_RESULT_DATA0: 77101e04c3fSmrg case FRAG_RESULT_DATA1: 77201e04c3fSmrg case FRAG_RESULT_DATA2: 77301e04c3fSmrg case FRAG_RESULT_DATA3: 77401e04c3fSmrg mask = 1 << (var->data.location - FRAG_RESULT_DATA0); 77501e04c3fSmrg break; 77601e04c3fSmrg } 77701e04c3fSmrg 77801e04c3fSmrg if (c->fs_key->int_color_rb & mask) { 77901e04c3fSmrg var->type = 78001e04c3fSmrg glsl_vector_type(GLSL_TYPE_INT, 78101e04c3fSmrg glsl_get_components(var->type)); 78201e04c3fSmrg } else if (c->fs_key->uint_color_rb & mask) { 78301e04c3fSmrg var->type = 78401e04c3fSmrg glsl_vector_type(GLSL_TYPE_UINT, 78501e04c3fSmrg glsl_get_components(var->type)); 78601e04c3fSmrg } 78701e04c3fSmrg } 78801e04c3fSmrg} 78901e04c3fSmrg 790ed98bd31Smayastatic void 791ed98bd31Smayav3d_nir_lower_fs_early(struct v3d_compile *c) 79201e04c3fSmrg{ 793ed98bd31Smaya if (c->fs_key->int_color_rb || c->fs_key->uint_color_rb) 794ed98bd31Smaya v3d_fixup_fs_output_types(c); 79501e04c3fSmrg 796ed98bd31Smaya /* If the shader has no non-TLB side effects, we can promote it to 797ed98bd31Smaya * enabling early_fragment_tests even if the user didn't. 798ed98bd31Smaya */ 799ed98bd31Smaya if (!(c->s->info.num_images || 800ed98bd31Smaya c->s->info.num_ssbos || 801ed98bd31Smaya c->s->info.num_abos)) { 802ed98bd31Smaya c->s->info.fs.early_fragment_tests = true; 803ed98bd31Smaya } 804ed98bd31Smaya} 80501e04c3fSmrg 806ed98bd31Smayastatic void 807ed98bd31Smayav3d_nir_lower_vs_late(struct v3d_compile *c) 808ed98bd31Smaya{ 809ed98bd31Smaya if (c->vs_key->clamp_color) 810ed98bd31Smaya NIR_PASS_V(c->s, nir_lower_clamp_color_outputs); 81101e04c3fSmrg 812ed98bd31Smaya if (c->key->ucp_enables) { 813ed98bd31Smaya NIR_PASS_V(c->s, nir_lower_clip_vs, c->key->ucp_enables, 814ed98bd31Smaya false); 815ed98bd31Smaya NIR_PASS_V(c->s, nir_lower_io_to_scalar, 816ed98bd31Smaya nir_var_shader_out); 817ed98bd31Smaya } 818ed98bd31Smaya 819ed98bd31Smaya /* Note: VS output scalarizing must happen after nir_lower_clip_vs. */ 820ed98bd31Smaya NIR_PASS_V(c->s, nir_lower_io_to_scalar, nir_var_shader_out); 821ed98bd31Smaya} 82201e04c3fSmrg 823ed98bd31Smayastatic void 824ed98bd31Smayav3d_nir_lower_fs_late(struct v3d_compile *c) 825ed98bd31Smaya{ 826ed98bd31Smaya if (c->fs_key->light_twoside) 82701e04c3fSmrg NIR_PASS_V(c->s, nir_lower_two_sided_color); 82801e04c3fSmrg 829ed98bd31Smaya if (c->fs_key->clamp_color) 83001e04c3fSmrg NIR_PASS_V(c->s, nir_lower_clamp_color_outputs); 83101e04c3fSmrg 832ed98bd31Smaya if (c->fs_key->alpha_test) { 833ed98bd31Smaya NIR_PASS_V(c->s, nir_lower_alpha_test, 834ed98bd31Smaya c->fs_key->alpha_test_func, 83501e04c3fSmrg false); 83601e04c3fSmrg } 83701e04c3fSmrg 838ed98bd31Smaya if (c->key->ucp_enables) 839ed98bd31Smaya NIR_PASS_V(c->s, nir_lower_clip_fs, c->key->ucp_enables); 84001e04c3fSmrg 84101e04c3fSmrg /* Note: FS input scalarizing must happen after 84201e04c3fSmrg * nir_lower_two_sided_color, which only handles a vec4 at a time. 84301e04c3fSmrg */ 84401e04c3fSmrg NIR_PASS_V(c->s, nir_lower_io_to_scalar, nir_var_shader_in); 845ed98bd31Smaya} 846ed98bd31Smaya 847ed98bd31Smayastatic uint32_t 848ed98bd31Smayavir_get_max_temps(struct v3d_compile *c) 849ed98bd31Smaya{ 850ed98bd31Smaya int max_ip = 0; 851ed98bd31Smaya vir_for_each_inst_inorder(inst, c) 852ed98bd31Smaya max_ip++; 853ed98bd31Smaya 854ed98bd31Smaya uint32_t *pressure = rzalloc_array(NULL, uint32_t, max_ip); 855ed98bd31Smaya 856ed98bd31Smaya for (int t = 0; t < c->num_temps; t++) { 857ed98bd31Smaya for (int i = c->temp_start[t]; (i < c->temp_end[t] && 858ed98bd31Smaya i < max_ip); i++) { 859ed98bd31Smaya if (i > max_ip) 860ed98bd31Smaya break; 861ed98bd31Smaya pressure[i]++; 862ed98bd31Smaya } 863ed98bd31Smaya } 864ed98bd31Smaya 865ed98bd31Smaya uint32_t max_temps = 0; 866ed98bd31Smaya for (int i = 0; i < max_ip; i++) 867ed98bd31Smaya max_temps = MAX2(max_temps, pressure[i]); 868ed98bd31Smaya 869ed98bd31Smaya ralloc_free(pressure); 870ed98bd31Smaya 871ed98bd31Smaya return max_temps; 872ed98bd31Smaya} 873ed98bd31Smaya 874ed98bd31Smayauint64_t *v3d_compile(const struct v3d_compiler *compiler, 875ed98bd31Smaya struct v3d_key *key, 876ed98bd31Smaya struct v3d_prog_data **out_prog_data, 877ed98bd31Smaya nir_shader *s, 878ed98bd31Smaya void (*debug_output)(const char *msg, 879ed98bd31Smaya void *debug_output_data), 880ed98bd31Smaya void *debug_output_data, 881ed98bd31Smaya int program_id, int variant_id, 882ed98bd31Smaya uint32_t *final_assembly_size) 883ed98bd31Smaya{ 884ed98bd31Smaya struct v3d_prog_data *prog_data; 885ed98bd31Smaya struct v3d_compile *c = vir_compile_init(compiler, key, s, 886ed98bd31Smaya debug_output, debug_output_data, 887ed98bd31Smaya program_id, variant_id); 888ed98bd31Smaya 889ed98bd31Smaya switch (c->s->info.stage) { 890ed98bd31Smaya case MESA_SHADER_VERTEX: 891ed98bd31Smaya c->vs_key = (struct v3d_vs_key *)key; 892ed98bd31Smaya prog_data = rzalloc_size(NULL, sizeof(struct v3d_vs_prog_data)); 893ed98bd31Smaya break; 894ed98bd31Smaya case MESA_SHADER_FRAGMENT: 895ed98bd31Smaya c->fs_key = (struct v3d_fs_key *)key; 896ed98bd31Smaya prog_data = rzalloc_size(NULL, sizeof(struct v3d_fs_prog_data)); 897ed98bd31Smaya break; 898ed98bd31Smaya case MESA_SHADER_COMPUTE: 899ed98bd31Smaya prog_data = rzalloc_size(NULL, 900ed98bd31Smaya sizeof(struct v3d_compute_prog_data)); 901ed98bd31Smaya break; 902ed98bd31Smaya default: 903ed98bd31Smaya unreachable("unsupported shader stage"); 904ed98bd31Smaya } 905ed98bd31Smaya 906ed98bd31Smaya if (c->s->info.stage == MESA_SHADER_VERTEX) { 907ed98bd31Smaya v3d_nir_lower_vs_early(c); 908ed98bd31Smaya } else if (c->s->info.stage != MESA_SHADER_COMPUTE) { 909ed98bd31Smaya assert(c->s->info.stage == MESA_SHADER_FRAGMENT); 910ed98bd31Smaya v3d_nir_lower_fs_early(c); 911ed98bd31Smaya } 912ed98bd31Smaya 913ed98bd31Smaya v3d_lower_nir(c); 914ed98bd31Smaya 915ed98bd31Smaya if (c->s->info.stage == MESA_SHADER_VERTEX) { 916ed98bd31Smaya v3d_nir_lower_vs_late(c); 917ed98bd31Smaya } else if (c->s->info.stage != MESA_SHADER_COMPUTE) { 918ed98bd31Smaya assert(c->s->info.stage == MESA_SHADER_FRAGMENT); 919ed98bd31Smaya v3d_nir_lower_fs_late(c); 920ed98bd31Smaya } 921ed98bd31Smaya 922ed98bd31Smaya NIR_PASS_V(c->s, v3d_nir_lower_io, c); 923ed98bd31Smaya NIR_PASS_V(c->s, v3d_nir_lower_txf_ms, c); 924ed98bd31Smaya NIR_PASS_V(c->s, v3d_nir_lower_image_load_store); 925ed98bd31Smaya NIR_PASS_V(c->s, nir_lower_idiv); 92601e04c3fSmrg 92701e04c3fSmrg v3d_optimize_nir(c->s); 928ed98bd31Smaya NIR_PASS_V(c->s, nir_lower_bool_to_int32); 92901e04c3fSmrg NIR_PASS_V(c->s, nir_convert_from_ssa, true); 93001e04c3fSmrg 93101e04c3fSmrg v3d_nir_to_vir(c); 93201e04c3fSmrg 933ed98bd31Smaya v3d_set_prog_data(c, prog_data); 934ed98bd31Smaya 935ed98bd31Smaya *out_prog_data = prog_data; 936ed98bd31Smaya 937ed98bd31Smaya char *shaderdb; 938ed98bd31Smaya int ret = asprintf(&shaderdb, 939ed98bd31Smaya "%s shader: %d inst, %d threads, %d loops, " 940ed98bd31Smaya "%d uniforms, %d max-temps, %d:%d spills:fills", 941ed98bd31Smaya vir_get_stage_name(c), 942ed98bd31Smaya c->qpu_inst_count, 943ed98bd31Smaya c->threads, 944ed98bd31Smaya c->loops, 945ed98bd31Smaya c->num_uniforms, 946ed98bd31Smaya vir_get_max_temps(c), 947ed98bd31Smaya c->spills, 948ed98bd31Smaya c->fills); 949ed98bd31Smaya if (ret >= 0) { 950ed98bd31Smaya if (V3D_DEBUG & V3D_DEBUG_SHADERDB) 951ed98bd31Smaya fprintf(stderr, "SHADER-DB: %s\n", shaderdb); 952ed98bd31Smaya 953ed98bd31Smaya c->debug_output(shaderdb, c->debug_output_data); 954ed98bd31Smaya free(shaderdb); 955ed98bd31Smaya } 95601e04c3fSmrg 957ed98bd31Smaya return v3d_return_qpu_insts(c, final_assembly_size); 95801e04c3fSmrg} 95901e04c3fSmrg 96001e04c3fSmrgvoid 96101e04c3fSmrgvir_remove_instruction(struct v3d_compile *c, struct qinst *qinst) 96201e04c3fSmrg{ 96301e04c3fSmrg if (qinst->dst.file == QFILE_TEMP) 96401e04c3fSmrg c->defs[qinst->dst.index] = NULL; 96501e04c3fSmrg 96601e04c3fSmrg assert(&qinst->link != c->cursor.link); 96701e04c3fSmrg 96801e04c3fSmrg list_del(&qinst->link); 96901e04c3fSmrg free(qinst); 97001e04c3fSmrg 97101e04c3fSmrg c->live_intervals_valid = false; 97201e04c3fSmrg} 97301e04c3fSmrg 97401e04c3fSmrgstruct qreg 97501e04c3fSmrgvir_follow_movs(struct v3d_compile *c, struct qreg reg) 97601e04c3fSmrg{ 97701e04c3fSmrg /* XXX 97801e04c3fSmrg int pack = reg.pack; 97901e04c3fSmrg 98001e04c3fSmrg while (reg.file == QFILE_TEMP && 98101e04c3fSmrg c->defs[reg.index] && 98201e04c3fSmrg (c->defs[reg.index]->op == QOP_MOV || 98301e04c3fSmrg c->defs[reg.index]->op == QOP_FMOV) && 98401e04c3fSmrg !c->defs[reg.index]->dst.pack && 98501e04c3fSmrg !c->defs[reg.index]->src[0].pack) { 98601e04c3fSmrg reg = c->defs[reg.index]->src[0]; 98701e04c3fSmrg } 98801e04c3fSmrg 98901e04c3fSmrg reg.pack = pack; 99001e04c3fSmrg */ 99101e04c3fSmrg return reg; 99201e04c3fSmrg} 99301e04c3fSmrg 99401e04c3fSmrgvoid 99501e04c3fSmrgvir_compile_destroy(struct v3d_compile *c) 99601e04c3fSmrg{ 99701e04c3fSmrg /* Defuse the assert that we aren't removing the cursor's instruction. 99801e04c3fSmrg */ 99901e04c3fSmrg c->cursor.link = NULL; 100001e04c3fSmrg 100101e04c3fSmrg vir_for_each_block(block, c) { 100201e04c3fSmrg while (!list_empty(&block->instructions)) { 100301e04c3fSmrg struct qinst *qinst = 100401e04c3fSmrg list_first_entry(&block->instructions, 100501e04c3fSmrg struct qinst, link); 100601e04c3fSmrg vir_remove_instruction(c, qinst); 100701e04c3fSmrg } 100801e04c3fSmrg } 100901e04c3fSmrg 101001e04c3fSmrg ralloc_free(c); 101101e04c3fSmrg} 101201e04c3fSmrg 1013ed98bd31Smayauint32_t 1014ed98bd31Smayavir_get_uniform_index(struct v3d_compile *c, 1015ed98bd31Smaya enum quniform_contents contents, 1016ed98bd31Smaya uint32_t data) 101701e04c3fSmrg{ 101801e04c3fSmrg for (int i = 0; i < c->num_uniforms; i++) { 101901e04c3fSmrg if (c->uniform_contents[i] == contents && 102001e04c3fSmrg c->uniform_data[i] == data) { 1021ed98bd31Smaya return i; 102201e04c3fSmrg } 102301e04c3fSmrg } 102401e04c3fSmrg 102501e04c3fSmrg uint32_t uniform = c->num_uniforms++; 102601e04c3fSmrg 102701e04c3fSmrg if (uniform >= c->uniform_array_size) { 102801e04c3fSmrg c->uniform_array_size = MAX2(MAX2(16, uniform + 1), 102901e04c3fSmrg c->uniform_array_size * 2); 103001e04c3fSmrg 103101e04c3fSmrg c->uniform_data = reralloc(c, c->uniform_data, 103201e04c3fSmrg uint32_t, 103301e04c3fSmrg c->uniform_array_size); 103401e04c3fSmrg c->uniform_contents = reralloc(c, c->uniform_contents, 103501e04c3fSmrg enum quniform_contents, 103601e04c3fSmrg c->uniform_array_size); 103701e04c3fSmrg } 103801e04c3fSmrg 103901e04c3fSmrg c->uniform_contents[uniform] = contents; 104001e04c3fSmrg c->uniform_data[uniform] = data; 104101e04c3fSmrg 1042ed98bd31Smaya return uniform; 104301e04c3fSmrg} 104401e04c3fSmrg 1045ed98bd31Smayastruct qreg 1046ed98bd31Smayavir_uniform(struct v3d_compile *c, 1047ed98bd31Smaya enum quniform_contents contents, 1048ed98bd31Smaya uint32_t data) 104901e04c3fSmrg{ 1050ed98bd31Smaya struct qinst *inst = vir_NOP(c); 1051ed98bd31Smaya inst->qpu.sig.ldunif = true; 1052ed98bd31Smaya inst->uniform = vir_get_uniform_index(c, contents, data); 1053ed98bd31Smaya inst->dst = vir_get_temp(c); 1054ed98bd31Smaya c->defs[inst->dst.index] = inst; 1055ed98bd31Smaya return inst->dst; 105601e04c3fSmrg} 105701e04c3fSmrg 105801e04c3fSmrg#define OPTPASS(func) \ 105901e04c3fSmrg do { \ 106001e04c3fSmrg bool stage_progress = func(c); \ 106101e04c3fSmrg if (stage_progress) { \ 106201e04c3fSmrg progress = true; \ 106301e04c3fSmrg if (print_opt_debug) { \ 106401e04c3fSmrg fprintf(stderr, \ 106501e04c3fSmrg "VIR opt pass %2d: %s progress\n", \ 106601e04c3fSmrg pass, #func); \ 106701e04c3fSmrg } \ 106801e04c3fSmrg /*XXX vir_validate(c);*/ \ 106901e04c3fSmrg } \ 107001e04c3fSmrg } while (0) 107101e04c3fSmrg 107201e04c3fSmrgvoid 107301e04c3fSmrgvir_optimize(struct v3d_compile *c) 107401e04c3fSmrg{ 107501e04c3fSmrg bool print_opt_debug = false; 107601e04c3fSmrg int pass = 1; 107701e04c3fSmrg 107801e04c3fSmrg while (true) { 107901e04c3fSmrg bool progress = false; 108001e04c3fSmrg 108101e04c3fSmrg OPTPASS(vir_opt_copy_propagate); 1082ed98bd31Smaya OPTPASS(vir_opt_redundant_flags); 108301e04c3fSmrg OPTPASS(vir_opt_dead_code); 108401e04c3fSmrg OPTPASS(vir_opt_small_immediates); 108501e04c3fSmrg 108601e04c3fSmrg if (!progress) 108701e04c3fSmrg break; 108801e04c3fSmrg 108901e04c3fSmrg pass++; 109001e04c3fSmrg } 109101e04c3fSmrg} 109201e04c3fSmrg 109301e04c3fSmrgconst char * 109401e04c3fSmrgvir_get_stage_name(struct v3d_compile *c) 109501e04c3fSmrg{ 109601e04c3fSmrg if (c->vs_key && c->vs_key->is_coord) 109701e04c3fSmrg return "MESA_SHADER_COORD"; 109801e04c3fSmrg else 109901e04c3fSmrg return gl_shader_stage_name(c->s->info.stage); 110001e04c3fSmrg} 1101