1af69d88dSmrg/* 2af69d88dSmrg * Copyright © 2014 Broadcom 3af69d88dSmrg * 4af69d88dSmrg * Permission is hereby granted, free of charge, to any person obtaining a 5af69d88dSmrg * copy of this software and associated documentation files (the "Software"), 6af69d88dSmrg * to deal in the Software without restriction, including without limitation 7af69d88dSmrg * the rights to use, copy, modify, merge, publish, distribute, sublicense, 8af69d88dSmrg * and/or sell copies of the Software, and to permit persons to whom the 9af69d88dSmrg * Software is furnished to do so, subject to the following conditions: 10af69d88dSmrg * 11af69d88dSmrg * The above copyright notice and this permission notice (including the next 12af69d88dSmrg * paragraph) shall be included in all copies or substantial portions of the 13af69d88dSmrg * Software. 14af69d88dSmrg * 15af69d88dSmrg * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR 16af69d88dSmrg * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, 17af69d88dSmrg * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL 18af69d88dSmrg * THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER 19af69d88dSmrg * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING 20af69d88dSmrg * FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS 21af69d88dSmrg * IN THE SOFTWARE. 22af69d88dSmrg */ 23af69d88dSmrg 24af69d88dSmrg#include "util/u_memory.h" 2501e04c3fSmrg#include "util/ralloc.h" 26af69d88dSmrg 27af69d88dSmrg#include "vc4_qir.h" 28af69d88dSmrg#include "vc4_qpu.h" 29af69d88dSmrg 30af69d88dSmrgstruct qir_op_info { 31af69d88dSmrg const char *name; 32af69d88dSmrg uint8_t ndst, nsrc; 33af69d88dSmrg bool has_side_effects; 34af69d88dSmrg}; 35af69d88dSmrg 36af69d88dSmrgstatic const struct qir_op_info qir_op_info[] = { 37af69d88dSmrg [QOP_MOV] = { "mov", 1, 1 }, 3801e04c3fSmrg [QOP_FMOV] = { "fmov", 1, 1 }, 3901e04c3fSmrg [QOP_MMOV] = { "mmov", 1, 1 }, 40af69d88dSmrg [QOP_FADD] = { "fadd", 1, 2 }, 41af69d88dSmrg [QOP_FSUB] = { "fsub", 1, 2 }, 42af69d88dSmrg [QOP_FMUL] = { "fmul", 1, 2 }, 4301e04c3fSmrg [QOP_MUL24] = { "mul24", 1, 2 }, 4401e04c3fSmrg [QOP_V8MULD] = {"v8muld", 1, 2 }, 4501e04c3fSmrg [QOP_V8MIN] = {"v8min", 1, 2 }, 4601e04c3fSmrg [QOP_V8MAX] = {"v8max", 1, 2 }, 4701e04c3fSmrg [QOP_V8ADDS] = {"v8adds", 1, 2 }, 4801e04c3fSmrg [QOP_V8SUBS] = {"v8subs", 1, 2 }, 49af69d88dSmrg [QOP_FMIN] = { "fmin", 1, 2 }, 50af69d88dSmrg [QOP_FMAX] = { "fmax", 1, 2 }, 51af69d88dSmrg [QOP_FMINABS] = { "fminabs", 1, 2 }, 52af69d88dSmrg [QOP_FMAXABS] = { "fmaxabs", 1, 2 }, 53af69d88dSmrg [QOP_FTOI] = { "ftoi", 1, 1 }, 54af69d88dSmrg [QOP_ITOF] = { "itof", 1, 1 }, 5501e04c3fSmrg [QOP_ADD] = { "add", 1, 2 }, 5601e04c3fSmrg [QOP_SUB] = { "sub", 1, 2 }, 5701e04c3fSmrg [QOP_SHR] = { "shr", 1, 2 }, 5801e04c3fSmrg [QOP_ASR] = { "asr", 1, 2 }, 5901e04c3fSmrg [QOP_SHL] = { "shl", 1, 2 }, 6001e04c3fSmrg [QOP_MIN] = { "min", 1, 2 }, 6101e04c3fSmrg [QOP_MIN_NOIMM] = { "min_noimm", 1, 2 }, 6201e04c3fSmrg [QOP_MAX] = { "max", 1, 2 }, 6301e04c3fSmrg [QOP_AND] = { "and", 1, 2 }, 6401e04c3fSmrg [QOP_OR] = { "or", 1, 2 }, 6501e04c3fSmrg [QOP_XOR] = { "xor", 1, 2 }, 6601e04c3fSmrg [QOP_NOT] = { "not", 1, 1 }, 6701e04c3fSmrg 68af69d88dSmrg [QOP_RCP] = { "rcp", 1, 1 }, 69af69d88dSmrg [QOP_RSQ] = { "rsq", 1, 1 }, 7001e04c3fSmrg [QOP_EXP2] = { "exp2", 1, 1 }, 7101e04c3fSmrg [QOP_LOG2] = { "log2", 1, 1 }, 7201e04c3fSmrg [QOP_TLB_COLOR_READ] = { "tlb_color_read", 1, 0 }, 7301e04c3fSmrg [QOP_MS_MASK] = { "ms_mask", 0, 1, true }, 74af69d88dSmrg [QOP_VARY_ADD_C] = { "vary_add_c", 1, 1 }, 75af69d88dSmrg 76af69d88dSmrg [QOP_FRAG_Z] = { "frag_z", 1, 0 }, 7701e04c3fSmrg [QOP_FRAG_W] = { "frag_w", 1, 0 }, 7801e04c3fSmrg 7901e04c3fSmrg [QOP_TEX_RESULT] = { "tex_result", 1, 0, true }, 8001e04c3fSmrg 8101e04c3fSmrg [QOP_THRSW] = { "thrsw", 0, 0, true }, 8201e04c3fSmrg 8301e04c3fSmrg [QOP_LOAD_IMM] = { "load_imm", 0, 1 }, 8401e04c3fSmrg [QOP_LOAD_IMM_U2] = { "load_imm_u2", 0, 1 }, 8501e04c3fSmrg [QOP_LOAD_IMM_I2] = { "load_imm_i2", 0, 1 }, 8601e04c3fSmrg 8701e04c3fSmrg [QOP_ROT_MUL] = { "rot_mul", 0, 2 }, 8801e04c3fSmrg 8901e04c3fSmrg [QOP_BRANCH] = { "branch", 0, 0, true }, 9001e04c3fSmrg [QOP_UNIFORMS_RESET] = { "uniforms_reset", 0, 2, true }, 91af69d88dSmrg}; 92af69d88dSmrg 93af69d88dSmrgstatic const char * 94af69d88dSmrgqir_get_op_name(enum qop qop) 95af69d88dSmrg{ 96af69d88dSmrg if (qop < ARRAY_SIZE(qir_op_info) && qir_op_info[qop].name) 97af69d88dSmrg return qir_op_info[qop].name; 98af69d88dSmrg else 99af69d88dSmrg return "???"; 100af69d88dSmrg} 101af69d88dSmrg 102af69d88dSmrgint 10301e04c3fSmrgqir_get_non_sideband_nsrc(struct qinst *inst) 104af69d88dSmrg{ 10501e04c3fSmrg assert(qir_op_info[inst->op].name); 10601e04c3fSmrg return qir_op_info[inst->op].nsrc; 10701e04c3fSmrg} 10801e04c3fSmrg 10901e04c3fSmrgint 11001e04c3fSmrgqir_get_nsrc(struct qinst *inst) 11101e04c3fSmrg{ 11201e04c3fSmrg assert(qir_op_info[inst->op].name); 11301e04c3fSmrg 11401e04c3fSmrg int nsrc = qir_get_non_sideband_nsrc(inst); 11501e04c3fSmrg 11601e04c3fSmrg /* Normal (non-direct) texture coordinate writes also implicitly load 11701e04c3fSmrg * a uniform for the texture parameters. 11801e04c3fSmrg */ 11901e04c3fSmrg if (qir_is_tex(inst) && inst->dst.file != QFILE_TEX_S_DIRECT) 12001e04c3fSmrg nsrc++; 12101e04c3fSmrg 12201e04c3fSmrg return nsrc; 12301e04c3fSmrg} 12401e04c3fSmrg 12501e04c3fSmrg/* The sideband uniform for textures gets stored after the normal ALU 12601e04c3fSmrg * arguments. 12701e04c3fSmrg */ 12801e04c3fSmrgint 12901e04c3fSmrgqir_get_tex_uniform_src(struct qinst *inst) 13001e04c3fSmrg{ 13101e04c3fSmrg return qir_get_nsrc(inst) - 1; 132af69d88dSmrg} 133af69d88dSmrg 13401e04c3fSmrg/** 13501e04c3fSmrg * Returns whether the instruction has any side effects that must be 13601e04c3fSmrg * preserved. 13701e04c3fSmrg */ 138af69d88dSmrgbool 13901e04c3fSmrgqir_has_side_effects(struct vc4_compile *c, struct qinst *inst) 140af69d88dSmrg{ 14101e04c3fSmrg switch (inst->dst.file) { 14201e04c3fSmrg case QFILE_TLB_Z_WRITE: 14301e04c3fSmrg case QFILE_TLB_COLOR_WRITE: 14401e04c3fSmrg case QFILE_TLB_COLOR_WRITE_MS: 14501e04c3fSmrg case QFILE_TLB_STENCIL_SETUP: 14601e04c3fSmrg case QFILE_TEX_S_DIRECT: 14701e04c3fSmrg case QFILE_TEX_S: 14801e04c3fSmrg case QFILE_TEX_T: 14901e04c3fSmrg case QFILE_TEX_R: 15001e04c3fSmrg case QFILE_TEX_B: 15101e04c3fSmrg return true; 15201e04c3fSmrg default: 15301e04c3fSmrg break; 154af69d88dSmrg } 155af69d88dSmrg 156af69d88dSmrg return qir_op_info[inst->op].has_side_effects; 157af69d88dSmrg} 158af69d88dSmrg 15901e04c3fSmrgbool 16001e04c3fSmrgqir_has_side_effect_reads(struct vc4_compile *c, struct qinst *inst) 16101e04c3fSmrg{ 16201e04c3fSmrg /* We can dead-code eliminate varyings, because we only tell the VS 16301e04c3fSmrg * about the live ones at the end. But we have to preserve the 16401e04c3fSmrg * point/line coordinates reads, because they're generated by 16501e04c3fSmrg * fixed-function hardware. 16601e04c3fSmrg */ 16701e04c3fSmrg for (int i = 0; i < qir_get_nsrc(inst); i++) { 16801e04c3fSmrg if (inst->src[i].file == QFILE_VARY && 16901e04c3fSmrg c->input_slots[inst->src[i].index].slot == 0xff) { 17001e04c3fSmrg return true; 17101e04c3fSmrg } 17201e04c3fSmrg 17301e04c3fSmrg if (inst->src[i].file == QFILE_VPM) 17401e04c3fSmrg return true; 17501e04c3fSmrg } 17601e04c3fSmrg 17701e04c3fSmrg if (inst->dst.file == QFILE_VPM) 17801e04c3fSmrg return true; 17901e04c3fSmrg 18001e04c3fSmrg return false; 18101e04c3fSmrg} 18201e04c3fSmrg 18301e04c3fSmrgbool 18401e04c3fSmrgqir_has_uniform_read(struct qinst *inst) 18501e04c3fSmrg{ 18601e04c3fSmrg for (int i = 0; i < qir_get_nsrc(inst); i++) { 18701e04c3fSmrg if (inst->src[i].file == QFILE_UNIF) 18801e04c3fSmrg return true; 18901e04c3fSmrg } 19001e04c3fSmrg 19101e04c3fSmrg return false; 19201e04c3fSmrg} 19301e04c3fSmrg 19401e04c3fSmrgbool 19501e04c3fSmrgqir_is_mul(struct qinst *inst) 19601e04c3fSmrg{ 19701e04c3fSmrg switch (inst->op) { 19801e04c3fSmrg case QOP_MMOV: 19901e04c3fSmrg case QOP_FMUL: 20001e04c3fSmrg case QOP_MUL24: 20101e04c3fSmrg case QOP_V8MULD: 20201e04c3fSmrg case QOP_V8MIN: 20301e04c3fSmrg case QOP_V8MAX: 20401e04c3fSmrg case QOP_V8ADDS: 20501e04c3fSmrg case QOP_V8SUBS: 20601e04c3fSmrg case QOP_ROT_MUL: 20701e04c3fSmrg return true; 20801e04c3fSmrg default: 20901e04c3fSmrg return false; 21001e04c3fSmrg } 21101e04c3fSmrg} 21201e04c3fSmrg 21301e04c3fSmrgbool 21401e04c3fSmrgqir_is_float_input(struct qinst *inst) 21501e04c3fSmrg{ 21601e04c3fSmrg switch (inst->op) { 21701e04c3fSmrg case QOP_FMOV: 21801e04c3fSmrg case QOP_FMUL: 21901e04c3fSmrg case QOP_FADD: 22001e04c3fSmrg case QOP_FSUB: 22101e04c3fSmrg case QOP_FMIN: 22201e04c3fSmrg case QOP_FMAX: 22301e04c3fSmrg case QOP_FMINABS: 22401e04c3fSmrg case QOP_FMAXABS: 22501e04c3fSmrg case QOP_FTOI: 22601e04c3fSmrg return true; 22701e04c3fSmrg default: 22801e04c3fSmrg return false; 22901e04c3fSmrg } 23001e04c3fSmrg} 23101e04c3fSmrg 23201e04c3fSmrgbool 23301e04c3fSmrgqir_is_raw_mov(struct qinst *inst) 23401e04c3fSmrg{ 23501e04c3fSmrg return ((inst->op == QOP_MOV || 23601e04c3fSmrg inst->op == QOP_FMOV || 23701e04c3fSmrg inst->op == QOP_MMOV) && 23801e04c3fSmrg inst->cond == QPU_COND_ALWAYS && 23901e04c3fSmrg !inst->dst.pack && 24001e04c3fSmrg !inst->src[0].pack); 24101e04c3fSmrg} 24201e04c3fSmrg 24301e04c3fSmrgbool 24401e04c3fSmrgqir_is_tex(struct qinst *inst) 24501e04c3fSmrg{ 24601e04c3fSmrg switch (inst->dst.file) { 24701e04c3fSmrg case QFILE_TEX_S_DIRECT: 24801e04c3fSmrg case QFILE_TEX_S: 24901e04c3fSmrg case QFILE_TEX_T: 25001e04c3fSmrg case QFILE_TEX_R: 25101e04c3fSmrg case QFILE_TEX_B: 25201e04c3fSmrg return true; 25301e04c3fSmrg default: 25401e04c3fSmrg return false; 25501e04c3fSmrg } 25601e04c3fSmrg} 25701e04c3fSmrg 25801e04c3fSmrgbool 25901e04c3fSmrgqir_has_implicit_tex_uniform(struct qinst *inst) 26001e04c3fSmrg{ 26101e04c3fSmrg switch (inst->dst.file) { 26201e04c3fSmrg case QFILE_TEX_S: 26301e04c3fSmrg case QFILE_TEX_T: 26401e04c3fSmrg case QFILE_TEX_R: 26501e04c3fSmrg case QFILE_TEX_B: 26601e04c3fSmrg return true; 26701e04c3fSmrg default: 26801e04c3fSmrg return false; 26901e04c3fSmrg } 27001e04c3fSmrg} 27101e04c3fSmrg 27201e04c3fSmrgbool 27301e04c3fSmrgqir_depends_on_flags(struct qinst *inst) 27401e04c3fSmrg{ 27501e04c3fSmrg if (inst->op == QOP_BRANCH) { 27601e04c3fSmrg return inst->cond != QPU_COND_BRANCH_ALWAYS; 27701e04c3fSmrg } else { 27801e04c3fSmrg return (inst->cond != QPU_COND_ALWAYS && 27901e04c3fSmrg inst->cond != QPU_COND_NEVER); 28001e04c3fSmrg } 28101e04c3fSmrg} 28201e04c3fSmrg 28301e04c3fSmrgbool 28401e04c3fSmrgqir_writes_r4(struct qinst *inst) 28501e04c3fSmrg{ 28601e04c3fSmrg switch (inst->op) { 28701e04c3fSmrg case QOP_TEX_RESULT: 28801e04c3fSmrg case QOP_TLB_COLOR_READ: 28901e04c3fSmrg case QOP_RCP: 29001e04c3fSmrg case QOP_RSQ: 29101e04c3fSmrg case QOP_EXP2: 29201e04c3fSmrg case QOP_LOG2: 29301e04c3fSmrg return true; 29401e04c3fSmrg default: 29501e04c3fSmrg return false; 29601e04c3fSmrg } 29701e04c3fSmrg} 29801e04c3fSmrg 29901e04c3fSmrguint8_t 30001e04c3fSmrgqir_channels_written(struct qinst *inst) 30101e04c3fSmrg{ 30201e04c3fSmrg if (qir_is_mul(inst)) { 30301e04c3fSmrg switch (inst->dst.pack) { 30401e04c3fSmrg case QPU_PACK_MUL_NOP: 30501e04c3fSmrg case QPU_PACK_MUL_8888: 30601e04c3fSmrg return 0xf; 30701e04c3fSmrg case QPU_PACK_MUL_8A: 30801e04c3fSmrg return 0x1; 30901e04c3fSmrg case QPU_PACK_MUL_8B: 31001e04c3fSmrg return 0x2; 31101e04c3fSmrg case QPU_PACK_MUL_8C: 31201e04c3fSmrg return 0x4; 31301e04c3fSmrg case QPU_PACK_MUL_8D: 31401e04c3fSmrg return 0x8; 31501e04c3fSmrg } 31601e04c3fSmrg } else { 31701e04c3fSmrg switch (inst->dst.pack) { 31801e04c3fSmrg case QPU_PACK_A_NOP: 31901e04c3fSmrg case QPU_PACK_A_8888: 32001e04c3fSmrg case QPU_PACK_A_8888_SAT: 32101e04c3fSmrg case QPU_PACK_A_32_SAT: 32201e04c3fSmrg return 0xf; 32301e04c3fSmrg case QPU_PACK_A_8A: 32401e04c3fSmrg case QPU_PACK_A_8A_SAT: 32501e04c3fSmrg return 0x1; 32601e04c3fSmrg case QPU_PACK_A_8B: 32701e04c3fSmrg case QPU_PACK_A_8B_SAT: 32801e04c3fSmrg return 0x2; 32901e04c3fSmrg case QPU_PACK_A_8C: 33001e04c3fSmrg case QPU_PACK_A_8C_SAT: 33101e04c3fSmrg return 0x4; 33201e04c3fSmrg case QPU_PACK_A_8D: 33301e04c3fSmrg case QPU_PACK_A_8D_SAT: 33401e04c3fSmrg return 0x8; 33501e04c3fSmrg case QPU_PACK_A_16A: 33601e04c3fSmrg case QPU_PACK_A_16A_SAT: 33701e04c3fSmrg return 0x3; 33801e04c3fSmrg case QPU_PACK_A_16B: 33901e04c3fSmrg case QPU_PACK_A_16B_SAT: 34001e04c3fSmrg return 0xc; 34101e04c3fSmrg } 34201e04c3fSmrg } 34301e04c3fSmrg unreachable("Bad pack field"); 34401e04c3fSmrg} 34501e04c3fSmrg 34601e04c3fSmrgchar * 34701e04c3fSmrgqir_describe_uniform(enum quniform_contents contents, uint32_t data, 34801e04c3fSmrg const uint32_t *uniforms) 34901e04c3fSmrg{ 35001e04c3fSmrg static const char *quniform_names[] = { 35101e04c3fSmrg [QUNIFORM_VIEWPORT_X_SCALE] = "vp_x_scale", 35201e04c3fSmrg [QUNIFORM_VIEWPORT_Y_SCALE] = "vp_y_scale", 35301e04c3fSmrg [QUNIFORM_VIEWPORT_Z_OFFSET] = "vp_z_offset", 35401e04c3fSmrg [QUNIFORM_VIEWPORT_Z_SCALE] = "vp_z_scale", 35501e04c3fSmrg [QUNIFORM_TEXTURE_CONFIG_P0] = "tex_p0", 35601e04c3fSmrg [QUNIFORM_TEXTURE_CONFIG_P1] = "tex_p1", 35701e04c3fSmrg [QUNIFORM_TEXTURE_CONFIG_P2] = "tex_p2", 35801e04c3fSmrg [QUNIFORM_TEXTURE_FIRST_LEVEL] = "tex_first_level", 35901e04c3fSmrg }; 36001e04c3fSmrg 36101e04c3fSmrg switch (contents) { 36201e04c3fSmrg case QUNIFORM_CONSTANT: 36301e04c3fSmrg return ralloc_asprintf(NULL, "0x%08x / %f", data, uif(data)); 36401e04c3fSmrg case QUNIFORM_UNIFORM: 36501e04c3fSmrg if (uniforms) { 36601e04c3fSmrg uint32_t unif = uniforms[data]; 36701e04c3fSmrg return ralloc_asprintf(NULL, "unif[%d] = 0x%08x / %f", 36801e04c3fSmrg data, unif, uif(unif)); 36901e04c3fSmrg } else { 37001e04c3fSmrg return ralloc_asprintf(NULL, "unif[%d]", data); 37101e04c3fSmrg } 37201e04c3fSmrg 37301e04c3fSmrg case QUNIFORM_TEXTURE_CONFIG_P0: 37401e04c3fSmrg case QUNIFORM_TEXTURE_CONFIG_P1: 37501e04c3fSmrg case QUNIFORM_TEXTURE_CONFIG_P2: 37601e04c3fSmrg case QUNIFORM_TEXTURE_FIRST_LEVEL: 37701e04c3fSmrg return ralloc_asprintf(NULL, "%s[%d]", 37801e04c3fSmrg quniform_names[contents], data); 37901e04c3fSmrg 38001e04c3fSmrg default: 38101e04c3fSmrg if (contents < ARRAY_SIZE(quniform_names) && 38201e04c3fSmrg quniform_names[contents]) { 38301e04c3fSmrg return ralloc_asprintf(NULL, "%s", 38401e04c3fSmrg quniform_names[contents]); 38501e04c3fSmrg } else { 38601e04c3fSmrg return ralloc_asprintf(NULL, "??? %d", contents); 38701e04c3fSmrg } 38801e04c3fSmrg } 38901e04c3fSmrg} 39001e04c3fSmrg 391af69d88dSmrgstatic void 39201e04c3fSmrgqir_print_reg(struct vc4_compile *c, struct qreg reg, bool write) 393af69d88dSmrg{ 39401e04c3fSmrg static const char *files[] = { 395af69d88dSmrg [QFILE_TEMP] = "t", 396af69d88dSmrg [QFILE_VARY] = "v", 39701e04c3fSmrg [QFILE_TLB_COLOR_WRITE] = "tlb_c", 39801e04c3fSmrg [QFILE_TLB_COLOR_WRITE_MS] = "tlb_c_ms", 39901e04c3fSmrg [QFILE_TLB_Z_WRITE] = "tlb_z", 40001e04c3fSmrg [QFILE_TLB_STENCIL_SETUP] = "tlb_stencil", 40101e04c3fSmrg [QFILE_FRAG_X] = "frag_x", 40201e04c3fSmrg [QFILE_FRAG_Y] = "frag_y", 40301e04c3fSmrg [QFILE_FRAG_REV_FLAG] = "frag_rev_flag", 40401e04c3fSmrg [QFILE_QPU_ELEMENT] = "elem", 40501e04c3fSmrg [QFILE_TEX_S_DIRECT] = "tex_s_direct", 40601e04c3fSmrg [QFILE_TEX_S] = "tex_s", 40701e04c3fSmrg [QFILE_TEX_T] = "tex_t", 40801e04c3fSmrg [QFILE_TEX_R] = "tex_r", 40901e04c3fSmrg [QFILE_TEX_B] = "tex_b", 410af69d88dSmrg }; 411af69d88dSmrg 41201e04c3fSmrg switch (reg.file) { 41301e04c3fSmrg 41401e04c3fSmrg case QFILE_NULL: 415af69d88dSmrg fprintf(stderr, "null"); 41601e04c3fSmrg break; 41701e04c3fSmrg 41801e04c3fSmrg case QFILE_LOAD_IMM: 41901e04c3fSmrg fprintf(stderr, "0x%08x (%f)", reg.index, uif(reg.index)); 42001e04c3fSmrg break; 42101e04c3fSmrg 42201e04c3fSmrg case QFILE_SMALL_IMM: 42301e04c3fSmrg if ((int)reg.index >= -16 && (int)reg.index <= 15) 42401e04c3fSmrg fprintf(stderr, "%d", reg.index); 42501e04c3fSmrg else 42601e04c3fSmrg fprintf(stderr, "%f", uif(reg.index)); 42701e04c3fSmrg break; 42801e04c3fSmrg 42901e04c3fSmrg case QFILE_VPM: 43001e04c3fSmrg if (write) { 43101e04c3fSmrg fprintf(stderr, "vpm"); 43201e04c3fSmrg } else { 43301e04c3fSmrg fprintf(stderr, "vpm%d.%d", 43401e04c3fSmrg reg.index / 4, reg.index % 4); 43501e04c3fSmrg } 43601e04c3fSmrg break; 43701e04c3fSmrg 43801e04c3fSmrg case QFILE_TLB_COLOR_WRITE: 43901e04c3fSmrg case QFILE_TLB_COLOR_WRITE_MS: 44001e04c3fSmrg case QFILE_TLB_Z_WRITE: 44101e04c3fSmrg case QFILE_TLB_STENCIL_SETUP: 44201e04c3fSmrg case QFILE_TEX_S_DIRECT: 44301e04c3fSmrg case QFILE_TEX_S: 44401e04c3fSmrg case QFILE_TEX_T: 44501e04c3fSmrg case QFILE_TEX_R: 44601e04c3fSmrg case QFILE_TEX_B: 44701e04c3fSmrg fprintf(stderr, "%s", files[reg.file]); 44801e04c3fSmrg break; 44901e04c3fSmrg 45001e04c3fSmrg case QFILE_UNIF: { 45101e04c3fSmrg char *desc = qir_describe_uniform(c->uniform_contents[reg.index], 45201e04c3fSmrg c->uniform_data[reg.index], 45301e04c3fSmrg NULL); 45401e04c3fSmrg fprintf(stderr, "u%d (%s)", reg.index, desc); 45501e04c3fSmrg ralloc_free(desc); 45601e04c3fSmrg break; 45701e04c3fSmrg } 45801e04c3fSmrg 45901e04c3fSmrg default: 460af69d88dSmrg fprintf(stderr, "%s%d", files[reg.file], reg.index); 46101e04c3fSmrg break; 46201e04c3fSmrg } 463af69d88dSmrg} 464af69d88dSmrg 465af69d88dSmrgvoid 46601e04c3fSmrgqir_dump_inst(struct vc4_compile *c, struct qinst *inst) 467af69d88dSmrg{ 46801e04c3fSmrg fprintf(stderr, "%s", qir_get_op_name(inst->op)); 46901e04c3fSmrg if (inst->op == QOP_BRANCH) 47001e04c3fSmrg vc4_qpu_disasm_cond_branch(stderr, inst->cond); 47101e04c3fSmrg else 47201e04c3fSmrg vc4_qpu_disasm_cond(stderr, inst->cond); 47301e04c3fSmrg if (inst->sf) 47401e04c3fSmrg fprintf(stderr, ".sf"); 47501e04c3fSmrg fprintf(stderr, " "); 47601e04c3fSmrg 47701e04c3fSmrg if (inst->op != QOP_BRANCH) { 47801e04c3fSmrg qir_print_reg(c, inst->dst, true); 47901e04c3fSmrg if (inst->dst.pack) { 48001e04c3fSmrg if (inst->dst.pack) { 48101e04c3fSmrg if (qir_is_mul(inst)) 48201e04c3fSmrg vc4_qpu_disasm_pack_mul(stderr, inst->dst.pack); 48301e04c3fSmrg else 48401e04c3fSmrg vc4_qpu_disasm_pack_a(stderr, inst->dst.pack); 48501e04c3fSmrg } 48601e04c3fSmrg } 48701e04c3fSmrg } 488af69d88dSmrg 48901e04c3fSmrg for (int i = 0; i < qir_get_nsrc(inst); i++) { 490af69d88dSmrg fprintf(stderr, ", "); 49101e04c3fSmrg qir_print_reg(c, inst->src[i], false); 49201e04c3fSmrg vc4_qpu_disasm_unpack(stderr, inst->src[i].pack); 493af69d88dSmrg } 494af69d88dSmrg} 495af69d88dSmrg 496af69d88dSmrgvoid 49701e04c3fSmrgqir_dump(struct vc4_compile *c) 498af69d88dSmrg{ 49901e04c3fSmrg int ip = 0; 50001e04c3fSmrg int pressure = 0; 50101e04c3fSmrg 50201e04c3fSmrg qir_for_each_block(block, c) { 50301e04c3fSmrg fprintf(stderr, "BLOCK %d:\n", block->index); 50401e04c3fSmrg qir_for_each_inst(inst, block) { 50501e04c3fSmrg if (c->temp_start) { 50601e04c3fSmrg bool first = true; 50701e04c3fSmrg 50801e04c3fSmrg fprintf(stderr, "%3d ", pressure); 50901e04c3fSmrg 51001e04c3fSmrg for (int i = 0; i < c->num_temps; i++) { 51101e04c3fSmrg if (c->temp_start[i] != ip) 51201e04c3fSmrg continue; 51301e04c3fSmrg 51401e04c3fSmrg if (first) { 51501e04c3fSmrg first = false; 51601e04c3fSmrg } else { 51701e04c3fSmrg fprintf(stderr, ", "); 51801e04c3fSmrg } 51901e04c3fSmrg fprintf(stderr, "S%4d", i); 52001e04c3fSmrg pressure++; 52101e04c3fSmrg } 52201e04c3fSmrg 52301e04c3fSmrg if (first) 52401e04c3fSmrg fprintf(stderr, " "); 52501e04c3fSmrg else 52601e04c3fSmrg fprintf(stderr, " "); 52701e04c3fSmrg } 52801e04c3fSmrg 52901e04c3fSmrg if (c->temp_end) { 53001e04c3fSmrg bool first = true; 53101e04c3fSmrg 53201e04c3fSmrg for (int i = 0; i < c->num_temps; i++) { 53301e04c3fSmrg if (c->temp_end[i] != ip) 53401e04c3fSmrg continue; 53501e04c3fSmrg 53601e04c3fSmrg if (first) { 53701e04c3fSmrg first = false; 53801e04c3fSmrg } else { 53901e04c3fSmrg fprintf(stderr, ", "); 54001e04c3fSmrg } 54101e04c3fSmrg fprintf(stderr, "E%4d", i); 54201e04c3fSmrg pressure--; 54301e04c3fSmrg } 54401e04c3fSmrg 54501e04c3fSmrg if (first) 54601e04c3fSmrg fprintf(stderr, " "); 54701e04c3fSmrg else 54801e04c3fSmrg fprintf(stderr, " "); 54901e04c3fSmrg } 55001e04c3fSmrg 55101e04c3fSmrg qir_dump_inst(c, inst); 55201e04c3fSmrg fprintf(stderr, "\n"); 55301e04c3fSmrg ip++; 55401e04c3fSmrg } 55501e04c3fSmrg if (block->successors[1]) { 55601e04c3fSmrg fprintf(stderr, "-> BLOCK %d, %d\n", 55701e04c3fSmrg block->successors[0]->index, 55801e04c3fSmrg block->successors[1]->index); 55901e04c3fSmrg } else if (block->successors[0]) { 56001e04c3fSmrg fprintf(stderr, "-> BLOCK %d\n", 56101e04c3fSmrg block->successors[0]->index); 56201e04c3fSmrg } 563af69d88dSmrg } 564af69d88dSmrg} 565af69d88dSmrg 566af69d88dSmrgstruct qreg 56701e04c3fSmrgqir_get_temp(struct vc4_compile *c) 568af69d88dSmrg{ 569af69d88dSmrg struct qreg reg; 570af69d88dSmrg 571af69d88dSmrg reg.file = QFILE_TEMP; 572af69d88dSmrg reg.index = c->num_temps++; 57301e04c3fSmrg reg.pack = 0; 57401e04c3fSmrg 57501e04c3fSmrg if (c->num_temps > c->defs_array_size) { 57601e04c3fSmrg uint32_t old_size = c->defs_array_size; 57701e04c3fSmrg c->defs_array_size = MAX2(old_size * 2, 16); 57801e04c3fSmrg c->defs = reralloc(c, c->defs, struct qinst *, 57901e04c3fSmrg c->defs_array_size); 58001e04c3fSmrg memset(&c->defs[old_size], 0, 58101e04c3fSmrg sizeof(c->defs[0]) * (c->defs_array_size - old_size)); 58201e04c3fSmrg } 583af69d88dSmrg 584af69d88dSmrg return reg; 585af69d88dSmrg} 586af69d88dSmrg 587af69d88dSmrgstruct qinst * 588af69d88dSmrgqir_inst(enum qop op, struct qreg dst, struct qreg src0, struct qreg src1) 589af69d88dSmrg{ 590af69d88dSmrg struct qinst *inst = CALLOC_STRUCT(qinst); 591af69d88dSmrg 592af69d88dSmrg inst->op = op; 593af69d88dSmrg inst->dst = dst; 594af69d88dSmrg inst->src[0] = src0; 595af69d88dSmrg inst->src[1] = src1; 59601e04c3fSmrg inst->cond = QPU_COND_ALWAYS; 597af69d88dSmrg 598af69d88dSmrg return inst; 599af69d88dSmrg} 600af69d88dSmrg 60101e04c3fSmrgstatic void 60201e04c3fSmrgqir_emit(struct vc4_compile *c, struct qinst *inst) 60301e04c3fSmrg{ 60401e04c3fSmrg list_addtail(&inst->link, &c->cur_block->instructions); 60501e04c3fSmrg} 60601e04c3fSmrg 60701e04c3fSmrg/* Updates inst to write to a new temporary, emits it, and notes the def. */ 60801e04c3fSmrgstruct qreg 60901e04c3fSmrgqir_emit_def(struct vc4_compile *c, struct qinst *inst) 61001e04c3fSmrg{ 61101e04c3fSmrg assert(inst->dst.file == QFILE_NULL); 61201e04c3fSmrg 61301e04c3fSmrg inst->dst = qir_get_temp(c); 61401e04c3fSmrg 61501e04c3fSmrg if (inst->dst.file == QFILE_TEMP) 61601e04c3fSmrg c->defs[inst->dst.index] = inst; 61701e04c3fSmrg 61801e04c3fSmrg qir_emit(c, inst); 61901e04c3fSmrg 62001e04c3fSmrg return inst->dst; 62101e04c3fSmrg} 62201e04c3fSmrg 623af69d88dSmrgstruct qinst * 62401e04c3fSmrgqir_emit_nondef(struct vc4_compile *c, struct qinst *inst) 625af69d88dSmrg{ 62601e04c3fSmrg if (inst->dst.file == QFILE_TEMP) 62701e04c3fSmrg c->defs[inst->dst.index] = NULL; 628af69d88dSmrg 62901e04c3fSmrg qir_emit(c, inst); 630af69d88dSmrg 631af69d88dSmrg return inst; 632af69d88dSmrg} 633af69d88dSmrg 63401e04c3fSmrgbool 63501e04c3fSmrgqir_reg_equals(struct qreg a, struct qreg b) 63601e04c3fSmrg{ 63701e04c3fSmrg return a.file == b.file && a.index == b.index && a.pack == b.pack; 63801e04c3fSmrg} 63901e04c3fSmrg 64001e04c3fSmrgstruct qblock * 64101e04c3fSmrgqir_new_block(struct vc4_compile *c) 64201e04c3fSmrg{ 64301e04c3fSmrg struct qblock *block = rzalloc(c, struct qblock); 64401e04c3fSmrg 64501e04c3fSmrg list_inithead(&block->instructions); 64601e04c3fSmrg list_inithead(&block->qpu_inst_list); 64701e04c3fSmrg 64801e04c3fSmrg block->predecessors = _mesa_set_create(block, 64901e04c3fSmrg _mesa_hash_pointer, 65001e04c3fSmrg _mesa_key_pointer_equal); 65101e04c3fSmrg 65201e04c3fSmrg block->index = c->next_block_index++; 65301e04c3fSmrg 65401e04c3fSmrg return block; 65501e04c3fSmrg} 65601e04c3fSmrg 657af69d88dSmrgvoid 65801e04c3fSmrgqir_set_emit_block(struct vc4_compile *c, struct qblock *block) 659af69d88dSmrg{ 66001e04c3fSmrg c->cur_block = block; 66101e04c3fSmrg list_addtail(&block->link, &c->blocks); 662af69d88dSmrg} 663af69d88dSmrg 66401e04c3fSmrgstruct qblock * 66501e04c3fSmrgqir_entry_block(struct vc4_compile *c) 666af69d88dSmrg{ 66701e04c3fSmrg return list_first_entry(&c->blocks, struct qblock, link); 668af69d88dSmrg} 669af69d88dSmrg 67001e04c3fSmrgstruct qblock * 67101e04c3fSmrgqir_exit_block(struct vc4_compile *c) 67201e04c3fSmrg{ 67301e04c3fSmrg return list_last_entry(&c->blocks, struct qblock, link); 67401e04c3fSmrg} 67501e04c3fSmrg 67601e04c3fSmrgvoid 67701e04c3fSmrgqir_link_blocks(struct qblock *predecessor, struct qblock *successor) 67801e04c3fSmrg{ 67901e04c3fSmrg _mesa_set_add(successor->predecessors, predecessor); 68001e04c3fSmrg if (predecessor->successors[0]) { 68101e04c3fSmrg assert(!predecessor->successors[1]); 68201e04c3fSmrg predecessor->successors[1] = successor; 68301e04c3fSmrg } else { 68401e04c3fSmrg predecessor->successors[0] = successor; 68501e04c3fSmrg } 68601e04c3fSmrg} 68701e04c3fSmrg 68801e04c3fSmrgstruct vc4_compile * 689af69d88dSmrgqir_compile_init(void) 690af69d88dSmrg{ 69101e04c3fSmrg struct vc4_compile *c = rzalloc(NULL, struct vc4_compile); 692af69d88dSmrg 69301e04c3fSmrg list_inithead(&c->blocks); 69401e04c3fSmrg qir_set_emit_block(c, qir_new_block(c)); 69501e04c3fSmrg c->last_top_block = c->cur_block; 69601e04c3fSmrg 69701e04c3fSmrg c->output_position_index = -1; 69801e04c3fSmrg c->output_color_index = -1; 69901e04c3fSmrg c->output_point_size_index = -1; 70001e04c3fSmrg c->output_sample_mask_index = -1; 70101e04c3fSmrg 70201e04c3fSmrg c->def_ht = _mesa_hash_table_create(c, _mesa_hash_pointer, 70301e04c3fSmrg _mesa_key_pointer_equal); 704af69d88dSmrg 705af69d88dSmrg return c; 706af69d88dSmrg} 707af69d88dSmrg 708af69d88dSmrgvoid 70901e04c3fSmrgqir_remove_instruction(struct vc4_compile *c, struct qinst *qinst) 710af69d88dSmrg{ 71101e04c3fSmrg if (qinst->dst.file == QFILE_TEMP) 71201e04c3fSmrg c->defs[qinst->dst.index] = NULL; 71301e04c3fSmrg 71401e04c3fSmrg list_del(&qinst->link); 71501e04c3fSmrg free(qinst); 71601e04c3fSmrg} 71701e04c3fSmrg 71801e04c3fSmrgstruct qreg 71901e04c3fSmrgqir_follow_movs(struct vc4_compile *c, struct qreg reg) 72001e04c3fSmrg{ 72101e04c3fSmrg int pack = reg.pack; 72201e04c3fSmrg 72301e04c3fSmrg while (reg.file == QFILE_TEMP && 72401e04c3fSmrg c->defs[reg.index] && 72501e04c3fSmrg (c->defs[reg.index]->op == QOP_MOV || 72601e04c3fSmrg c->defs[reg.index]->op == QOP_FMOV || 72701e04c3fSmrg c->defs[reg.index]->op == QOP_MMOV)&& 72801e04c3fSmrg !c->defs[reg.index]->dst.pack && 72901e04c3fSmrg !c->defs[reg.index]->src[0].pack) { 73001e04c3fSmrg reg = c->defs[reg.index]->src[0]; 73101e04c3fSmrg } 73201e04c3fSmrg 73301e04c3fSmrg reg.pack = pack; 73401e04c3fSmrg return reg; 73501e04c3fSmrg} 73601e04c3fSmrg 73701e04c3fSmrgvoid 73801e04c3fSmrgqir_compile_destroy(struct vc4_compile *c) 73901e04c3fSmrg{ 74001e04c3fSmrg qir_for_each_block(block, c) { 7417ec681f3Smrg while (!list_is_empty(&block->instructions)) { 74201e04c3fSmrg struct qinst *qinst = 74301e04c3fSmrg list_first_entry(&block->instructions, 74401e04c3fSmrg struct qinst, link); 74501e04c3fSmrg qir_remove_instruction(c, qinst); 74601e04c3fSmrg } 74701e04c3fSmrg } 74801e04c3fSmrg 74901e04c3fSmrg ralloc_free(c); 750af69d88dSmrg} 751af69d88dSmrg 752af69d88dSmrgconst char * 753af69d88dSmrgqir_get_stage_name(enum qstage stage) 754af69d88dSmrg{ 755af69d88dSmrg static const char *names[] = { 756af69d88dSmrg [QSTAGE_FRAG] = "FS", 757af69d88dSmrg [QSTAGE_VERT] = "VS", 758af69d88dSmrg [QSTAGE_COORD] = "CS", 759af69d88dSmrg }; 760af69d88dSmrg 761af69d88dSmrg return names[stage]; 762af69d88dSmrg} 763af69d88dSmrg 76401e04c3fSmrgstruct qreg 76501e04c3fSmrgqir_uniform(struct vc4_compile *c, 76601e04c3fSmrg enum quniform_contents contents, 76701e04c3fSmrg uint32_t data) 76801e04c3fSmrg{ 76901e04c3fSmrg for (int i = 0; i < c->num_uniforms; i++) { 77001e04c3fSmrg if (c->uniform_contents[i] == contents && 77101e04c3fSmrg c->uniform_data[i] == data) { 77201e04c3fSmrg return qir_reg(QFILE_UNIF, i); 77301e04c3fSmrg } 77401e04c3fSmrg } 77501e04c3fSmrg 77601e04c3fSmrg uint32_t uniform = c->num_uniforms++; 77701e04c3fSmrg 77801e04c3fSmrg if (uniform >= c->uniform_array_size) { 77901e04c3fSmrg c->uniform_array_size = MAX2(MAX2(16, uniform + 1), 78001e04c3fSmrg c->uniform_array_size * 2); 78101e04c3fSmrg 78201e04c3fSmrg c->uniform_data = reralloc(c, c->uniform_data, 78301e04c3fSmrg uint32_t, 78401e04c3fSmrg c->uniform_array_size); 78501e04c3fSmrg c->uniform_contents = reralloc(c, c->uniform_contents, 78601e04c3fSmrg enum quniform_contents, 78701e04c3fSmrg c->uniform_array_size); 78801e04c3fSmrg } 78901e04c3fSmrg 79001e04c3fSmrg c->uniform_contents[uniform] = contents; 79101e04c3fSmrg c->uniform_data[uniform] = data; 79201e04c3fSmrg 79301e04c3fSmrg return qir_reg(QFILE_UNIF, uniform); 79401e04c3fSmrg} 79501e04c3fSmrg 79601e04c3fSmrgvoid 79701e04c3fSmrgqir_SF(struct vc4_compile *c, struct qreg src) 79801e04c3fSmrg{ 79901e04c3fSmrg struct qinst *last_inst = NULL; 80001e04c3fSmrg 8017ec681f3Smrg if (!list_is_empty(&c->cur_block->instructions)) 80201e04c3fSmrg last_inst = (struct qinst *)c->cur_block->instructions.prev; 80301e04c3fSmrg 80401e04c3fSmrg /* We don't have any way to guess which kind of MOV is implied. */ 80501e04c3fSmrg assert(!src.pack); 80601e04c3fSmrg 80701e04c3fSmrg if (src.file != QFILE_TEMP || 80801e04c3fSmrg !c->defs[src.index] || 80901e04c3fSmrg last_inst != c->defs[src.index]) { 81001e04c3fSmrg last_inst = qir_MOV_dest(c, qir_reg(QFILE_NULL, 0), src); 81101e04c3fSmrg last_inst = (struct qinst *)c->cur_block->instructions.prev; 81201e04c3fSmrg } 81301e04c3fSmrg last_inst->sf = true; 81401e04c3fSmrg} 81501e04c3fSmrg 816af69d88dSmrg#define OPTPASS(func) \ 817af69d88dSmrg do { \ 818af69d88dSmrg bool stage_progress = func(c); \ 819af69d88dSmrg if (stage_progress) { \ 820af69d88dSmrg progress = true; \ 821af69d88dSmrg if (print_opt_debug) { \ 822af69d88dSmrg fprintf(stderr, \ 823af69d88dSmrg "QIR opt pass %2d: %s progress\n", \ 824af69d88dSmrg pass, #func); \ 825af69d88dSmrg } \ 82601e04c3fSmrg qir_validate(c); \ 827af69d88dSmrg } \ 828af69d88dSmrg } while (0) 829af69d88dSmrg 830af69d88dSmrgvoid 83101e04c3fSmrgqir_optimize(struct vc4_compile *c) 832af69d88dSmrg{ 833af69d88dSmrg bool print_opt_debug = false; 834af69d88dSmrg int pass = 1; 835af69d88dSmrg 836af69d88dSmrg while (true) { 837af69d88dSmrg bool progress = false; 838af69d88dSmrg 839af69d88dSmrg OPTPASS(qir_opt_algebraic); 84001e04c3fSmrg OPTPASS(qir_opt_constant_folding); 841af69d88dSmrg OPTPASS(qir_opt_copy_propagation); 84201e04c3fSmrg OPTPASS(qir_opt_peephole_sf); 843af69d88dSmrg OPTPASS(qir_opt_dead_code); 84401e04c3fSmrg OPTPASS(qir_opt_small_immediates); 84501e04c3fSmrg OPTPASS(qir_opt_vpm); 84601e04c3fSmrg OPTPASS(qir_opt_coalesce_ff_writes); 847af69d88dSmrg 848af69d88dSmrg if (!progress) 849af69d88dSmrg break; 850af69d88dSmrg 851af69d88dSmrg pass++; 852af69d88dSmrg } 853af69d88dSmrg} 854