1af69d88dSmrg/* 2af69d88dSmrg * Copyright © 2014 Broadcom 3af69d88dSmrg * 4af69d88dSmrg * Permission is hereby granted, free of charge, to any person obtaining a 5af69d88dSmrg * copy of this software and associated documentation files (the "Software"), 6af69d88dSmrg * to deal in the Software without restriction, including without limitation 7af69d88dSmrg * the rights to use, copy, modify, merge, publish, distribute, sublicense, 8af69d88dSmrg * and/or sell copies of the Software, and to permit persons to whom the 9af69d88dSmrg * Software is furnished to do so, subject to the following conditions: 10af69d88dSmrg * 11af69d88dSmrg * The above copyright notice and this permission notice (including the next 12af69d88dSmrg * paragraph) shall be included in all copies or substantial portions of the 13af69d88dSmrg * Software. 14af69d88dSmrg * 15af69d88dSmrg * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR 16af69d88dSmrg * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, 17af69d88dSmrg * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL 18af69d88dSmrg * THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER 19af69d88dSmrg * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING 20af69d88dSmrg * FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS 21af69d88dSmrg * IN THE SOFTWARE. 22af69d88dSmrg */ 23af69d88dSmrg 24af69d88dSmrg#include <stdbool.h> 2501e04c3fSmrg#include "util/ralloc.h" 2601e04c3fSmrg#include "vc4_qir.h" 27af69d88dSmrg#include "vc4_qpu.h" 28af69d88dSmrg 2901e04c3fSmrg#define QPU_MUX(mux, muxfield) \ 3001e04c3fSmrg QPU_SET_FIELD(mux != QPU_MUX_SMALL_IMM ? mux : QPU_MUX_B, muxfield) 3101e04c3fSmrg 32af69d88dSmrgstatic uint64_t 33af69d88dSmrgset_src_raddr(uint64_t inst, struct qpu_reg src) 34af69d88dSmrg{ 35af69d88dSmrg if (src.mux == QPU_MUX_A) { 3601e04c3fSmrg assert(QPU_GET_FIELD(inst, QPU_RADDR_A) == QPU_R_NOP || 37af69d88dSmrg QPU_GET_FIELD(inst, QPU_RADDR_A) == src.addr); 3801e04c3fSmrg return QPU_UPDATE_FIELD(inst, src.addr, QPU_RADDR_A); 39af69d88dSmrg } 40af69d88dSmrg 41af69d88dSmrg if (src.mux == QPU_MUX_B) { 4201e04c3fSmrg assert((QPU_GET_FIELD(inst, QPU_RADDR_B) == QPU_R_NOP || 4301e04c3fSmrg QPU_GET_FIELD(inst, QPU_RADDR_B) == src.addr) && 4401e04c3fSmrg QPU_GET_FIELD(inst, QPU_SIG) != QPU_SIG_SMALL_IMM); 4501e04c3fSmrg return QPU_UPDATE_FIELD(inst, src.addr, QPU_RADDR_B); 46af69d88dSmrg } 47af69d88dSmrg 4801e04c3fSmrg if (src.mux == QPU_MUX_SMALL_IMM) { 4901e04c3fSmrg if (QPU_GET_FIELD(inst, QPU_SIG) == QPU_SIG_SMALL_IMM) { 5001e04c3fSmrg assert(QPU_GET_FIELD(inst, QPU_RADDR_B) == src.addr); 5101e04c3fSmrg } else { 5201e04c3fSmrg inst = qpu_set_sig(inst, QPU_SIG_SMALL_IMM); 5301e04c3fSmrg assert(QPU_GET_FIELD(inst, QPU_RADDR_B) == QPU_R_NOP); 5401e04c3fSmrg } 5501e04c3fSmrg return ((inst & ~QPU_RADDR_B_MASK) | 5601e04c3fSmrg QPU_SET_FIELD(src.addr, QPU_RADDR_B)); 5701e04c3fSmrg } 58af69d88dSmrg 59af69d88dSmrg return inst; 60af69d88dSmrg} 61af69d88dSmrg 62af69d88dSmrguint64_t 6301e04c3fSmrgqpu_NOP() 64af69d88dSmrg{ 65af69d88dSmrg uint64_t inst = 0; 66af69d88dSmrg 6701e04c3fSmrg inst |= QPU_SET_FIELD(QPU_A_NOP, QPU_OP_ADD); 68af69d88dSmrg inst |= QPU_SET_FIELD(QPU_M_NOP, QPU_OP_MUL); 6901e04c3fSmrg 7001e04c3fSmrg /* Note: These field values are actually non-zero */ 7101e04c3fSmrg inst |= QPU_SET_FIELD(QPU_W_NOP, QPU_WADDR_ADD); 72af69d88dSmrg inst |= QPU_SET_FIELD(QPU_W_NOP, QPU_WADDR_MUL); 7301e04c3fSmrg inst |= QPU_SET_FIELD(QPU_R_NOP, QPU_RADDR_A); 7401e04c3fSmrg inst |= QPU_SET_FIELD(QPU_R_NOP, QPU_RADDR_B); 75af69d88dSmrg inst |= QPU_SET_FIELD(QPU_SIG_NONE, QPU_SIG); 76af69d88dSmrg 77af69d88dSmrg return inst; 78af69d88dSmrg} 79af69d88dSmrg 80af69d88dSmrgstatic uint64_t 81af69d88dSmrgqpu_a_dst(struct qpu_reg dst) 82af69d88dSmrg{ 83af69d88dSmrg uint64_t inst = 0; 84af69d88dSmrg 85af69d88dSmrg if (dst.mux <= QPU_MUX_R5) { 86af69d88dSmrg /* Translate the mux to the ACCn values. */ 87af69d88dSmrg inst |= QPU_SET_FIELD(32 + dst.mux, QPU_WADDR_ADD); 88af69d88dSmrg } else { 89af69d88dSmrg inst |= QPU_SET_FIELD(dst.addr, QPU_WADDR_ADD); 90af69d88dSmrg if (dst.mux == QPU_MUX_B) 91af69d88dSmrg inst |= QPU_WS; 92af69d88dSmrg } 93af69d88dSmrg 94af69d88dSmrg return inst; 95af69d88dSmrg} 96af69d88dSmrg 97af69d88dSmrgstatic uint64_t 98af69d88dSmrgqpu_m_dst(struct qpu_reg dst) 99af69d88dSmrg{ 100af69d88dSmrg uint64_t inst = 0; 101af69d88dSmrg 102af69d88dSmrg if (dst.mux <= QPU_MUX_R5) { 103af69d88dSmrg /* Translate the mux to the ACCn values. */ 104af69d88dSmrg inst |= QPU_SET_FIELD(32 + dst.mux, QPU_WADDR_MUL); 105af69d88dSmrg } else { 106af69d88dSmrg inst |= QPU_SET_FIELD(dst.addr, QPU_WADDR_MUL); 107af69d88dSmrg if (dst.mux == QPU_MUX_A) 108af69d88dSmrg inst |= QPU_WS; 109af69d88dSmrg } 110af69d88dSmrg 111af69d88dSmrg return inst; 112af69d88dSmrg} 113af69d88dSmrg 114af69d88dSmrguint64_t 115af69d88dSmrgqpu_a_MOV(struct qpu_reg dst, struct qpu_reg src) 116af69d88dSmrg{ 117af69d88dSmrg uint64_t inst = 0; 118af69d88dSmrg 11901e04c3fSmrg inst |= QPU_SET_FIELD(QPU_SIG_NONE, QPU_SIG); 120af69d88dSmrg inst |= QPU_SET_FIELD(QPU_A_OR, QPU_OP_ADD); 12101e04c3fSmrg inst |= QPU_SET_FIELD(QPU_R_NOP, QPU_RADDR_A); 12201e04c3fSmrg inst |= QPU_SET_FIELD(QPU_R_NOP, QPU_RADDR_B); 123af69d88dSmrg inst |= qpu_a_dst(dst); 124af69d88dSmrg inst |= QPU_SET_FIELD(QPU_COND_ALWAYS, QPU_COND_ADD); 12501e04c3fSmrg inst |= QPU_MUX(src.mux, QPU_ADD_A); 12601e04c3fSmrg inst |= QPU_MUX(src.mux, QPU_ADD_B); 12701e04c3fSmrg inst = set_src_raddr(inst, src); 12801e04c3fSmrg inst |= QPU_SET_FIELD(QPU_W_NOP, QPU_WADDR_MUL); 129af69d88dSmrg 130af69d88dSmrg return inst; 131af69d88dSmrg} 132af69d88dSmrg 133af69d88dSmrguint64_t 134af69d88dSmrgqpu_m_MOV(struct qpu_reg dst, struct qpu_reg src) 135af69d88dSmrg{ 136af69d88dSmrg uint64_t inst = 0; 137af69d88dSmrg 13801e04c3fSmrg inst |= QPU_SET_FIELD(QPU_SIG_NONE, QPU_SIG); 139af69d88dSmrg inst |= QPU_SET_FIELD(QPU_M_V8MIN, QPU_OP_MUL); 14001e04c3fSmrg inst |= QPU_SET_FIELD(QPU_R_NOP, QPU_RADDR_A); 14101e04c3fSmrg inst |= QPU_SET_FIELD(QPU_R_NOP, QPU_RADDR_B); 142af69d88dSmrg inst |= qpu_m_dst(dst); 143af69d88dSmrg inst |= QPU_SET_FIELD(QPU_COND_ALWAYS, QPU_COND_MUL); 14401e04c3fSmrg inst |= QPU_MUX(src.mux, QPU_MUL_A); 14501e04c3fSmrg inst |= QPU_MUX(src.mux, QPU_MUL_B); 14601e04c3fSmrg inst = set_src_raddr(inst, src); 14701e04c3fSmrg inst |= QPU_SET_FIELD(QPU_W_NOP, QPU_WADDR_ADD); 148af69d88dSmrg 149af69d88dSmrg return inst; 150af69d88dSmrg} 151af69d88dSmrg 152af69d88dSmrguint64_t 153af69d88dSmrgqpu_load_imm_ui(struct qpu_reg dst, uint32_t val) 154af69d88dSmrg{ 155af69d88dSmrg uint64_t inst = 0; 156af69d88dSmrg 157af69d88dSmrg inst |= qpu_a_dst(dst); 15801e04c3fSmrg inst |= QPU_SET_FIELD(QPU_W_NOP, QPU_WADDR_MUL); 159af69d88dSmrg inst |= QPU_SET_FIELD(QPU_COND_ALWAYS, QPU_COND_ADD); 160af69d88dSmrg inst |= QPU_SET_FIELD(QPU_COND_ALWAYS, QPU_COND_MUL); 161af69d88dSmrg inst |= QPU_SET_FIELD(QPU_SIG_LOAD_IMM, QPU_SIG); 162af69d88dSmrg inst |= val; 163af69d88dSmrg 164af69d88dSmrg return inst; 165af69d88dSmrg} 166af69d88dSmrg 16701e04c3fSmrguint64_t 16801e04c3fSmrgqpu_load_imm_u2(struct qpu_reg dst, uint32_t val) 16901e04c3fSmrg{ 17001e04c3fSmrg return qpu_load_imm_ui(dst, val) | QPU_SET_FIELD(QPU_LOAD_IMM_MODE_U2, 17101e04c3fSmrg QPU_LOAD_IMM_MODE); 17201e04c3fSmrg} 17301e04c3fSmrg 17401e04c3fSmrguint64_t 17501e04c3fSmrgqpu_load_imm_i2(struct qpu_reg dst, uint32_t val) 17601e04c3fSmrg{ 17701e04c3fSmrg return qpu_load_imm_ui(dst, val) | QPU_SET_FIELD(QPU_LOAD_IMM_MODE_I2, 17801e04c3fSmrg QPU_LOAD_IMM_MODE); 17901e04c3fSmrg} 18001e04c3fSmrg 18101e04c3fSmrguint64_t 18201e04c3fSmrgqpu_branch(uint32_t cond, uint32_t target) 18301e04c3fSmrg{ 18401e04c3fSmrg uint64_t inst = 0; 18501e04c3fSmrg 18601e04c3fSmrg inst |= qpu_a_dst(qpu_ra(QPU_W_NOP)); 18701e04c3fSmrg inst |= qpu_m_dst(qpu_rb(QPU_W_NOP)); 18801e04c3fSmrg inst |= QPU_SET_FIELD(cond, QPU_BRANCH_COND); 18901e04c3fSmrg inst |= QPU_SET_FIELD(QPU_SIG_BRANCH, QPU_SIG); 19001e04c3fSmrg inst |= QPU_SET_FIELD(target, QPU_BRANCH_TARGET); 19101e04c3fSmrg 19201e04c3fSmrg return inst; 19301e04c3fSmrg} 19401e04c3fSmrg 195af69d88dSmrguint64_t 196af69d88dSmrgqpu_a_alu2(enum qpu_op_add op, 197af69d88dSmrg struct qpu_reg dst, struct qpu_reg src0, struct qpu_reg src1) 198af69d88dSmrg{ 199af69d88dSmrg uint64_t inst = 0; 200af69d88dSmrg 20101e04c3fSmrg inst |= QPU_SET_FIELD(QPU_SIG_NONE, QPU_SIG); 202af69d88dSmrg inst |= QPU_SET_FIELD(op, QPU_OP_ADD); 20301e04c3fSmrg inst |= QPU_SET_FIELD(QPU_R_NOP, QPU_RADDR_A); 20401e04c3fSmrg inst |= QPU_SET_FIELD(QPU_R_NOP, QPU_RADDR_B); 205af69d88dSmrg inst |= qpu_a_dst(dst); 206af69d88dSmrg inst |= QPU_SET_FIELD(QPU_COND_ALWAYS, QPU_COND_ADD); 20701e04c3fSmrg inst |= QPU_MUX(src0.mux, QPU_ADD_A); 20801e04c3fSmrg inst = set_src_raddr(inst, src0); 20901e04c3fSmrg inst |= QPU_MUX(src1.mux, QPU_ADD_B); 21001e04c3fSmrg inst = set_src_raddr(inst, src1); 21101e04c3fSmrg inst |= QPU_SET_FIELD(QPU_W_NOP, QPU_WADDR_MUL); 212af69d88dSmrg 213af69d88dSmrg return inst; 214af69d88dSmrg} 215af69d88dSmrg 216af69d88dSmrguint64_t 217af69d88dSmrgqpu_m_alu2(enum qpu_op_mul op, 218af69d88dSmrg struct qpu_reg dst, struct qpu_reg src0, struct qpu_reg src1) 219af69d88dSmrg{ 220af69d88dSmrg uint64_t inst = 0; 221af69d88dSmrg 22201e04c3fSmrg inst |= QPU_SET_FIELD(QPU_SIG_NONE, QPU_SIG); 223af69d88dSmrg inst |= QPU_SET_FIELD(op, QPU_OP_MUL); 22401e04c3fSmrg inst |= QPU_SET_FIELD(QPU_R_NOP, QPU_RADDR_A); 22501e04c3fSmrg inst |= QPU_SET_FIELD(QPU_R_NOP, QPU_RADDR_B); 226af69d88dSmrg inst |= qpu_m_dst(dst); 227af69d88dSmrg inst |= QPU_SET_FIELD(QPU_COND_ALWAYS, QPU_COND_MUL); 22801e04c3fSmrg inst |= QPU_MUX(src0.mux, QPU_MUL_A); 22901e04c3fSmrg inst = set_src_raddr(inst, src0); 23001e04c3fSmrg inst |= QPU_MUX(src1.mux, QPU_MUL_B); 23101e04c3fSmrg inst = set_src_raddr(inst, src1); 23201e04c3fSmrg inst |= QPU_SET_FIELD(QPU_W_NOP, QPU_WADDR_ADD); 233af69d88dSmrg 234af69d88dSmrg return inst; 235af69d88dSmrg} 236af69d88dSmrg 237af69d88dSmrguint64_t 23801e04c3fSmrgqpu_m_rot(struct qpu_reg dst, struct qpu_reg src0, int rot) 23901e04c3fSmrg{ 24001e04c3fSmrg uint64_t inst = 0; 24101e04c3fSmrg inst = qpu_m_alu2(QPU_M_V8MIN, dst, src0, src0); 24201e04c3fSmrg 24301e04c3fSmrg inst = QPU_UPDATE_FIELD(inst, QPU_SIG_SMALL_IMM, QPU_SIG); 24401e04c3fSmrg inst = QPU_UPDATE_FIELD(inst, QPU_SMALL_IMM_MUL_ROT + rot, 24501e04c3fSmrg QPU_SMALL_IMM); 24601e04c3fSmrg 24701e04c3fSmrg return inst; 24801e04c3fSmrg} 24901e04c3fSmrg 25001e04c3fSmrgstatic bool 25101e04c3fSmrgmerge_fields(uint64_t *merge, 25201e04c3fSmrg uint64_t a, uint64_t b, 25301e04c3fSmrg uint64_t mask, uint64_t ignore) 25401e04c3fSmrg{ 25501e04c3fSmrg if ((a & mask) == ignore) { 25601e04c3fSmrg *merge = (*merge & ~mask) | (b & mask); 25701e04c3fSmrg } else if ((b & mask) == ignore) { 25801e04c3fSmrg *merge = (*merge & ~mask) | (a & mask); 25901e04c3fSmrg } else { 26001e04c3fSmrg if ((a & mask) != (b & mask)) 26101e04c3fSmrg return false; 26201e04c3fSmrg } 26301e04c3fSmrg 26401e04c3fSmrg return true; 26501e04c3fSmrg} 26601e04c3fSmrg 26701e04c3fSmrgint 26801e04c3fSmrgqpu_num_sf_accesses(uint64_t inst) 26901e04c3fSmrg{ 27001e04c3fSmrg int accesses = 0; 27101e04c3fSmrg static const uint32_t specials[] = { 27201e04c3fSmrg QPU_W_TLB_COLOR_MS, 27301e04c3fSmrg QPU_W_TLB_COLOR_ALL, 27401e04c3fSmrg QPU_W_TLB_Z, 27501e04c3fSmrg QPU_W_TMU0_S, 27601e04c3fSmrg QPU_W_TMU0_T, 27701e04c3fSmrg QPU_W_TMU0_R, 27801e04c3fSmrg QPU_W_TMU0_B, 27901e04c3fSmrg QPU_W_TMU1_S, 28001e04c3fSmrg QPU_W_TMU1_T, 28101e04c3fSmrg QPU_W_TMU1_R, 28201e04c3fSmrg QPU_W_TMU1_B, 28301e04c3fSmrg QPU_W_SFU_RECIP, 28401e04c3fSmrg QPU_W_SFU_RECIPSQRT, 28501e04c3fSmrg QPU_W_SFU_EXP, 28601e04c3fSmrg QPU_W_SFU_LOG, 28701e04c3fSmrg }; 28801e04c3fSmrg uint32_t waddr_add = QPU_GET_FIELD(inst, QPU_WADDR_ADD); 28901e04c3fSmrg uint32_t waddr_mul = QPU_GET_FIELD(inst, QPU_WADDR_MUL); 29001e04c3fSmrg uint32_t raddr_a = QPU_GET_FIELD(inst, QPU_RADDR_A); 29101e04c3fSmrg uint32_t raddr_b = QPU_GET_FIELD(inst, QPU_RADDR_B); 29201e04c3fSmrg 29301e04c3fSmrg for (int j = 0; j < ARRAY_SIZE(specials); j++) { 29401e04c3fSmrg if (waddr_add == specials[j]) 29501e04c3fSmrg accesses++; 29601e04c3fSmrg if (waddr_mul == specials[j]) 29701e04c3fSmrg accesses++; 29801e04c3fSmrg } 29901e04c3fSmrg 30001e04c3fSmrg if (raddr_a == QPU_R_MUTEX_ACQUIRE) 30101e04c3fSmrg accesses++; 30201e04c3fSmrg if (raddr_b == QPU_R_MUTEX_ACQUIRE && 30301e04c3fSmrg QPU_GET_FIELD(inst, QPU_SIG) != QPU_SIG_SMALL_IMM) 30401e04c3fSmrg accesses++; 30501e04c3fSmrg 30601e04c3fSmrg /* XXX: semaphore, combined color read/write? */ 30701e04c3fSmrg switch (QPU_GET_FIELD(inst, QPU_SIG)) { 30801e04c3fSmrg case QPU_SIG_COLOR_LOAD: 30901e04c3fSmrg case QPU_SIG_COLOR_LOAD_END: 31001e04c3fSmrg case QPU_SIG_LOAD_TMU0: 31101e04c3fSmrg case QPU_SIG_LOAD_TMU1: 31201e04c3fSmrg accesses++; 31301e04c3fSmrg } 31401e04c3fSmrg 31501e04c3fSmrg return accesses; 31601e04c3fSmrg} 31701e04c3fSmrg 31801e04c3fSmrgstatic bool 31901e04c3fSmrgqpu_waddr_ignores_ws(uint32_t waddr) 32001e04c3fSmrg{ 32101e04c3fSmrg switch(waddr) { 32201e04c3fSmrg case QPU_W_ACC0: 32301e04c3fSmrg case QPU_W_ACC1: 32401e04c3fSmrg case QPU_W_ACC2: 32501e04c3fSmrg case QPU_W_ACC3: 32601e04c3fSmrg case QPU_W_NOP: 32701e04c3fSmrg case QPU_W_TLB_Z: 32801e04c3fSmrg case QPU_W_TLB_COLOR_MS: 32901e04c3fSmrg case QPU_W_TLB_COLOR_ALL: 33001e04c3fSmrg case QPU_W_TLB_ALPHA_MASK: 33101e04c3fSmrg case QPU_W_VPM: 33201e04c3fSmrg case QPU_W_SFU_RECIP: 33301e04c3fSmrg case QPU_W_SFU_RECIPSQRT: 33401e04c3fSmrg case QPU_W_SFU_EXP: 33501e04c3fSmrg case QPU_W_SFU_LOG: 33601e04c3fSmrg case QPU_W_TMU0_S: 33701e04c3fSmrg case QPU_W_TMU0_T: 33801e04c3fSmrg case QPU_W_TMU0_R: 33901e04c3fSmrg case QPU_W_TMU0_B: 34001e04c3fSmrg case QPU_W_TMU1_S: 34101e04c3fSmrg case QPU_W_TMU1_T: 34201e04c3fSmrg case QPU_W_TMU1_R: 34301e04c3fSmrg case QPU_W_TMU1_B: 34401e04c3fSmrg return true; 34501e04c3fSmrg } 34601e04c3fSmrg 34701e04c3fSmrg return false; 34801e04c3fSmrg} 34901e04c3fSmrg 35001e04c3fSmrgstatic void 35101e04c3fSmrgswap_ra_file_mux_helper(uint64_t *merge, uint64_t *a, uint32_t mux_shift) 35201e04c3fSmrg{ 35301e04c3fSmrg uint64_t mux_mask = (uint64_t)0x7 << mux_shift; 35401e04c3fSmrg uint64_t mux_a_val = (uint64_t)QPU_MUX_A << mux_shift; 35501e04c3fSmrg uint64_t mux_b_val = (uint64_t)QPU_MUX_B << mux_shift; 35601e04c3fSmrg 35701e04c3fSmrg if ((*a & mux_mask) == mux_a_val) { 35801e04c3fSmrg *a = (*a & ~mux_mask) | mux_b_val; 35901e04c3fSmrg *merge = (*merge & ~mux_mask) | mux_b_val; 36001e04c3fSmrg } 36101e04c3fSmrg} 36201e04c3fSmrg 36301e04c3fSmrgstatic bool 36401e04c3fSmrgtry_swap_ra_file(uint64_t *merge, uint64_t *a, uint64_t *b) 36501e04c3fSmrg{ 36601e04c3fSmrg uint32_t raddr_a_a = QPU_GET_FIELD(*a, QPU_RADDR_A); 36701e04c3fSmrg uint32_t raddr_a_b = QPU_GET_FIELD(*a, QPU_RADDR_B); 36801e04c3fSmrg uint32_t raddr_b_a = QPU_GET_FIELD(*b, QPU_RADDR_A); 36901e04c3fSmrg uint32_t raddr_b_b = QPU_GET_FIELD(*b, QPU_RADDR_B); 37001e04c3fSmrg 37101e04c3fSmrg if (raddr_a_b != QPU_R_NOP) 37201e04c3fSmrg return false; 37301e04c3fSmrg 37401e04c3fSmrg switch (raddr_a_a) { 37501e04c3fSmrg case QPU_R_UNIF: 37601e04c3fSmrg case QPU_R_VARY: 37701e04c3fSmrg break; 37801e04c3fSmrg default: 37901e04c3fSmrg return false; 38001e04c3fSmrg } 38101e04c3fSmrg 38201e04c3fSmrg if (!(*merge & QPU_PM) && 38301e04c3fSmrg QPU_GET_FIELD(*merge, QPU_UNPACK) != QPU_UNPACK_NOP) { 38401e04c3fSmrg return false; 38501e04c3fSmrg } 38601e04c3fSmrg 38701e04c3fSmrg if (raddr_b_b != QPU_R_NOP && 38801e04c3fSmrg raddr_b_b != raddr_a_a) 38901e04c3fSmrg return false; 39001e04c3fSmrg 39101e04c3fSmrg /* Move raddr A to B in instruction a. */ 39201e04c3fSmrg *a = (*a & ~QPU_RADDR_A_MASK) | QPU_SET_FIELD(QPU_R_NOP, QPU_RADDR_A); 39301e04c3fSmrg *a = (*a & ~QPU_RADDR_B_MASK) | QPU_SET_FIELD(raddr_a_a, QPU_RADDR_B); 39401e04c3fSmrg *merge = QPU_UPDATE_FIELD(*merge, raddr_b_a, QPU_RADDR_A); 39501e04c3fSmrg *merge = QPU_UPDATE_FIELD(*merge, raddr_a_a, QPU_RADDR_B); 39601e04c3fSmrg swap_ra_file_mux_helper(merge, a, QPU_ADD_A_SHIFT); 39701e04c3fSmrg swap_ra_file_mux_helper(merge, a, QPU_ADD_B_SHIFT); 39801e04c3fSmrg swap_ra_file_mux_helper(merge, a, QPU_MUL_A_SHIFT); 39901e04c3fSmrg swap_ra_file_mux_helper(merge, a, QPU_MUL_B_SHIFT); 40001e04c3fSmrg 40101e04c3fSmrg return true; 40201e04c3fSmrg} 40301e04c3fSmrg 40401e04c3fSmrgstatic bool 40501e04c3fSmrgconvert_mov(uint64_t *inst) 40601e04c3fSmrg{ 40701e04c3fSmrg uint32_t add_a = QPU_GET_FIELD(*inst, QPU_ADD_A); 40801e04c3fSmrg uint32_t waddr_add = QPU_GET_FIELD(*inst, QPU_WADDR_ADD); 40901e04c3fSmrg uint32_t cond_add = QPU_GET_FIELD(*inst, QPU_COND_ADD); 41001e04c3fSmrg 41101e04c3fSmrg /* Is it a MOV? */ 41201e04c3fSmrg if (QPU_GET_FIELD(*inst, QPU_OP_ADD) != QPU_A_OR || 41301e04c3fSmrg (add_a != QPU_GET_FIELD(*inst, QPU_ADD_B))) { 41401e04c3fSmrg return false; 41501e04c3fSmrg } 41601e04c3fSmrg 41701e04c3fSmrg if (QPU_GET_FIELD(*inst, QPU_SIG) != QPU_SIG_NONE) 41801e04c3fSmrg return false; 41901e04c3fSmrg 42001e04c3fSmrg /* We could maybe support this in the .8888 and .8a-.8d cases. */ 42101e04c3fSmrg if (*inst & QPU_PM) 42201e04c3fSmrg return false; 42301e04c3fSmrg 42401e04c3fSmrg *inst = QPU_UPDATE_FIELD(*inst, QPU_A_NOP, QPU_OP_ADD); 42501e04c3fSmrg *inst = QPU_UPDATE_FIELD(*inst, QPU_M_V8MIN, QPU_OP_MUL); 42601e04c3fSmrg 42701e04c3fSmrg *inst = QPU_UPDATE_FIELD(*inst, add_a, QPU_MUL_A); 42801e04c3fSmrg *inst = QPU_UPDATE_FIELD(*inst, add_a, QPU_MUL_B); 42901e04c3fSmrg *inst = QPU_UPDATE_FIELD(*inst, QPU_MUX_R0, QPU_ADD_A); 43001e04c3fSmrg *inst = QPU_UPDATE_FIELD(*inst, QPU_MUX_R0, QPU_ADD_B); 43101e04c3fSmrg 43201e04c3fSmrg *inst = QPU_UPDATE_FIELD(*inst, waddr_add, QPU_WADDR_MUL); 43301e04c3fSmrg *inst = QPU_UPDATE_FIELD(*inst, QPU_W_NOP, QPU_WADDR_ADD); 43401e04c3fSmrg 43501e04c3fSmrg *inst = QPU_UPDATE_FIELD(*inst, cond_add, QPU_COND_MUL); 43601e04c3fSmrg *inst = QPU_UPDATE_FIELD(*inst, QPU_COND_NEVER, QPU_COND_ADD); 43701e04c3fSmrg 43801e04c3fSmrg if (!qpu_waddr_ignores_ws(waddr_add)) 43901e04c3fSmrg *inst ^= QPU_WS; 44001e04c3fSmrg 44101e04c3fSmrg return true; 44201e04c3fSmrg} 44301e04c3fSmrg 44401e04c3fSmrgstatic bool 44501e04c3fSmrgwrites_a_file(uint64_t inst) 44601e04c3fSmrg{ 44701e04c3fSmrg if (!(inst & QPU_WS)) 44801e04c3fSmrg return QPU_GET_FIELD(inst, QPU_WADDR_ADD) < 32; 44901e04c3fSmrg else 45001e04c3fSmrg return QPU_GET_FIELD(inst, QPU_WADDR_MUL) < 32; 45101e04c3fSmrg} 45201e04c3fSmrg 45301e04c3fSmrgstatic bool 45401e04c3fSmrgreads_r4(uint64_t inst) 45501e04c3fSmrg{ 45601e04c3fSmrg return (QPU_GET_FIELD(inst, QPU_ADD_A) == QPU_MUX_R4 || 45701e04c3fSmrg QPU_GET_FIELD(inst, QPU_ADD_B) == QPU_MUX_R4 || 45801e04c3fSmrg QPU_GET_FIELD(inst, QPU_MUL_A) == QPU_MUX_R4 || 45901e04c3fSmrg QPU_GET_FIELD(inst, QPU_MUL_B) == QPU_MUX_R4); 46001e04c3fSmrg} 46101e04c3fSmrg 46201e04c3fSmrguint64_t 46301e04c3fSmrgqpu_merge_inst(uint64_t a, uint64_t b) 464af69d88dSmrg{ 46501e04c3fSmrg uint64_t merge = a | b; 46601e04c3fSmrg bool ok = true; 46701e04c3fSmrg uint32_t a_sig = QPU_GET_FIELD(a, QPU_SIG); 46801e04c3fSmrg uint32_t b_sig = QPU_GET_FIELD(b, QPU_SIG); 46901e04c3fSmrg 47001e04c3fSmrg if (QPU_GET_FIELD(a, QPU_OP_ADD) != QPU_A_NOP && 47101e04c3fSmrg QPU_GET_FIELD(b, QPU_OP_ADD) != QPU_A_NOP) { 47201e04c3fSmrg if (QPU_GET_FIELD(a, QPU_OP_MUL) != QPU_M_NOP || 47301e04c3fSmrg QPU_GET_FIELD(b, QPU_OP_MUL) != QPU_M_NOP || 47401e04c3fSmrg !(convert_mov(&a) || convert_mov(&b))) { 47501e04c3fSmrg return 0; 47601e04c3fSmrg } else { 47701e04c3fSmrg merge = a | b; 47801e04c3fSmrg } 47901e04c3fSmrg } 48001e04c3fSmrg 48101e04c3fSmrg if (QPU_GET_FIELD(a, QPU_OP_MUL) != QPU_M_NOP && 48201e04c3fSmrg QPU_GET_FIELD(b, QPU_OP_MUL) != QPU_M_NOP) 48301e04c3fSmrg return 0; 48401e04c3fSmrg 48501e04c3fSmrg if (qpu_num_sf_accesses(a) && qpu_num_sf_accesses(b)) 48601e04c3fSmrg return 0; 48701e04c3fSmrg 48801e04c3fSmrg if (a_sig == QPU_SIG_LOAD_IMM || 48901e04c3fSmrg b_sig == QPU_SIG_LOAD_IMM || 49001e04c3fSmrg a_sig == QPU_SIG_SMALL_IMM || 49101e04c3fSmrg b_sig == QPU_SIG_SMALL_IMM || 49201e04c3fSmrg a_sig == QPU_SIG_BRANCH || 49301e04c3fSmrg b_sig == QPU_SIG_BRANCH) { 49401e04c3fSmrg return 0; 49501e04c3fSmrg } 49601e04c3fSmrg 49701e04c3fSmrg ok = ok && merge_fields(&merge, a, b, QPU_SIG_MASK, 49801e04c3fSmrg QPU_SET_FIELD(QPU_SIG_NONE, QPU_SIG)); 49901e04c3fSmrg 50001e04c3fSmrg /* Misc fields that have to match exactly. */ 50101e04c3fSmrg ok = ok && merge_fields(&merge, a, b, QPU_SF, ~0); 50201e04c3fSmrg 50301e04c3fSmrg if (!merge_fields(&merge, a, b, QPU_RADDR_A_MASK, 50401e04c3fSmrg QPU_SET_FIELD(QPU_R_NOP, QPU_RADDR_A))) { 50501e04c3fSmrg /* Since we tend to use regfile A by default both for register 50601e04c3fSmrg * allocation and for our special values (uniforms and 50701e04c3fSmrg * varyings), try swapping uniforms and varyings to regfile B 50801e04c3fSmrg * to resolve raddr A conflicts. 50901e04c3fSmrg */ 51001e04c3fSmrg if (!try_swap_ra_file(&merge, &a, &b) && 51101e04c3fSmrg !try_swap_ra_file(&merge, &b, &a)) { 51201e04c3fSmrg return 0; 51301e04c3fSmrg } 51401e04c3fSmrg } 51501e04c3fSmrg 51601e04c3fSmrg ok = ok && merge_fields(&merge, a, b, QPU_RADDR_B_MASK, 51701e04c3fSmrg QPU_SET_FIELD(QPU_R_NOP, QPU_RADDR_B)); 518af69d88dSmrg 51901e04c3fSmrg ok = ok && merge_fields(&merge, a, b, QPU_WADDR_ADD_MASK, 52001e04c3fSmrg QPU_SET_FIELD(QPU_W_NOP, QPU_WADDR_ADD)); 52101e04c3fSmrg ok = ok && merge_fields(&merge, a, b, QPU_WADDR_MUL_MASK, 52201e04c3fSmrg QPU_SET_FIELD(QPU_W_NOP, QPU_WADDR_MUL)); 52301e04c3fSmrg 52401e04c3fSmrg /* Allow disagreement on WS (swapping A vs B physical reg file as the 52501e04c3fSmrg * destination for ADD/MUL) if one of the original instructions 52601e04c3fSmrg * ignores it (probably because it's just writing to accumulators). 527af69d88dSmrg */ 52801e04c3fSmrg if (qpu_waddr_ignores_ws(QPU_GET_FIELD(a, QPU_WADDR_ADD)) && 52901e04c3fSmrg qpu_waddr_ignores_ws(QPU_GET_FIELD(a, QPU_WADDR_MUL))) { 53001e04c3fSmrg merge = (merge & ~QPU_WS) | (b & QPU_WS); 53101e04c3fSmrg } else if (qpu_waddr_ignores_ws(QPU_GET_FIELD(b, QPU_WADDR_ADD)) && 53201e04c3fSmrg qpu_waddr_ignores_ws(QPU_GET_FIELD(b, QPU_WADDR_MUL))) { 53301e04c3fSmrg merge = (merge & ~QPU_WS) | (a & QPU_WS); 53401e04c3fSmrg } else { 53501e04c3fSmrg if ((a & QPU_WS) != (b & QPU_WS)) 53601e04c3fSmrg return 0; 537af69d88dSmrg } 538af69d88dSmrg 53901e04c3fSmrg if (!merge_fields(&merge, a, b, QPU_PM, ~0)) { 54001e04c3fSmrg /* If one instruction has PM bit set and the other not, the 54101e04c3fSmrg * one without PM shouldn't do packing/unpacking, and we 54201e04c3fSmrg * have to make sure non-NOP packing/unpacking from PM 54301e04c3fSmrg * instruction aren't added to it. 54401e04c3fSmrg */ 54501e04c3fSmrg uint64_t temp; 54601e04c3fSmrg 54701e04c3fSmrg /* Let a be the one with PM bit */ 54801e04c3fSmrg if (!(a & QPU_PM)) { 54901e04c3fSmrg temp = a; 55001e04c3fSmrg a = b; 55101e04c3fSmrg b = temp; 55201e04c3fSmrg } 55301e04c3fSmrg 55401e04c3fSmrg if ((b & (QPU_PACK_MASK | QPU_UNPACK_MASK)) != 0) 55501e04c3fSmrg return 0; 55601e04c3fSmrg 55701e04c3fSmrg if ((a & QPU_PACK_MASK) != 0 && 55801e04c3fSmrg QPU_GET_FIELD(b, QPU_OP_MUL) != QPU_M_NOP) 55901e04c3fSmrg return 0; 56001e04c3fSmrg 56101e04c3fSmrg if ((a & QPU_UNPACK_MASK) != 0 && reads_r4(b)) 56201e04c3fSmrg return 0; 56301e04c3fSmrg } else { 56401e04c3fSmrg /* packing: Make sure that non-NOP packs agree, then deal with 56501e04c3fSmrg * special-case failing of adding a non-NOP pack to something 56601e04c3fSmrg * with a NOP pack. 56701e04c3fSmrg */ 56801e04c3fSmrg if (!merge_fields(&merge, a, b, QPU_PACK_MASK, 0)) 56901e04c3fSmrg return 0; 57001e04c3fSmrg bool new_a_pack = (QPU_GET_FIELD(a, QPU_PACK) != 57101e04c3fSmrg QPU_GET_FIELD(merge, QPU_PACK)); 57201e04c3fSmrg bool new_b_pack = (QPU_GET_FIELD(b, QPU_PACK) != 57301e04c3fSmrg QPU_GET_FIELD(merge, QPU_PACK)); 57401e04c3fSmrg if (!(merge & QPU_PM)) { 57501e04c3fSmrg /* Make sure we're not going to be putting a new 57601e04c3fSmrg * a-file packing on either half. 57701e04c3fSmrg */ 57801e04c3fSmrg if (new_a_pack && writes_a_file(a)) 57901e04c3fSmrg return 0; 58001e04c3fSmrg 58101e04c3fSmrg if (new_b_pack && writes_a_file(b)) 58201e04c3fSmrg return 0; 58301e04c3fSmrg } else { 58401e04c3fSmrg /* Make sure we're not going to be putting new MUL 5857ec681f3Smrg * packing on either half. 58601e04c3fSmrg */ 58701e04c3fSmrg if (new_a_pack && 58801e04c3fSmrg QPU_GET_FIELD(a, QPU_OP_MUL) != QPU_M_NOP) 58901e04c3fSmrg return 0; 59001e04c3fSmrg 59101e04c3fSmrg if (new_b_pack && 59201e04c3fSmrg QPU_GET_FIELD(b, QPU_OP_MUL) != QPU_M_NOP) 59301e04c3fSmrg return 0; 59401e04c3fSmrg } 59501e04c3fSmrg 59601e04c3fSmrg /* unpacking: Make sure that non-NOP unpacks agree, then deal 59701e04c3fSmrg * with special-case failing of adding a non-NOP unpack to 59801e04c3fSmrg * something with a NOP unpack. 59901e04c3fSmrg */ 60001e04c3fSmrg if (!merge_fields(&merge, a, b, QPU_UNPACK_MASK, 0)) 60101e04c3fSmrg return 0; 60201e04c3fSmrg bool new_a_unpack = (QPU_GET_FIELD(a, QPU_UNPACK) != 60301e04c3fSmrg QPU_GET_FIELD(merge, QPU_UNPACK)); 60401e04c3fSmrg bool new_b_unpack = (QPU_GET_FIELD(b, QPU_UNPACK) != 60501e04c3fSmrg QPU_GET_FIELD(merge, QPU_UNPACK)); 60601e04c3fSmrg if (!(merge & QPU_PM)) { 60701e04c3fSmrg /* Make sure we're not going to be putting a new 60801e04c3fSmrg * a-file packing on either half. 60901e04c3fSmrg */ 61001e04c3fSmrg if (new_a_unpack && 61101e04c3fSmrg QPU_GET_FIELD(a, QPU_RADDR_A) != QPU_R_NOP) 61201e04c3fSmrg return 0; 61301e04c3fSmrg 61401e04c3fSmrg if (new_b_unpack && 61501e04c3fSmrg QPU_GET_FIELD(b, QPU_RADDR_A) != QPU_R_NOP) 61601e04c3fSmrg return 0; 61701e04c3fSmrg } else { 61801e04c3fSmrg /* Make sure we're not going to be putting new r4 61901e04c3fSmrg * unpack on either half. 62001e04c3fSmrg */ 62101e04c3fSmrg if (new_a_unpack && reads_r4(a)) 62201e04c3fSmrg return 0; 62301e04c3fSmrg 62401e04c3fSmrg if (new_b_unpack && reads_r4(b)) 62501e04c3fSmrg return 0; 62601e04c3fSmrg } 62701e04c3fSmrg } 62801e04c3fSmrg 62901e04c3fSmrg if (ok) 63001e04c3fSmrg return merge; 63101e04c3fSmrg else 63201e04c3fSmrg return 0; 633af69d88dSmrg} 634af69d88dSmrg 635af69d88dSmrguint64_t 636af69d88dSmrgqpu_set_sig(uint64_t inst, uint32_t sig) 637af69d88dSmrg{ 638af69d88dSmrg assert(QPU_GET_FIELD(inst, QPU_SIG) == QPU_SIG_NONE); 63901e04c3fSmrg return QPU_UPDATE_FIELD(inst, sig, QPU_SIG); 640af69d88dSmrg} 641af69d88dSmrg 64201e04c3fSmrguint64_t 64301e04c3fSmrgqpu_set_cond_add(uint64_t inst, uint32_t cond) 64401e04c3fSmrg{ 64501e04c3fSmrg assert(QPU_GET_FIELD(inst, QPU_COND_ADD) == QPU_COND_ALWAYS); 64601e04c3fSmrg return QPU_UPDATE_FIELD(inst, cond, QPU_COND_ADD); 64701e04c3fSmrg} 64801e04c3fSmrg 64901e04c3fSmrguint64_t 65001e04c3fSmrgqpu_set_cond_mul(uint64_t inst, uint32_t cond) 65101e04c3fSmrg{ 65201e04c3fSmrg assert(QPU_GET_FIELD(inst, QPU_COND_MUL) == QPU_COND_ALWAYS); 65301e04c3fSmrg return QPU_UPDATE_FIELD(inst, cond, QPU_COND_MUL); 65401e04c3fSmrg} 65501e04c3fSmrg 65601e04c3fSmrgbool 65701e04c3fSmrgqpu_waddr_is_tlb(uint32_t waddr) 65801e04c3fSmrg{ 65901e04c3fSmrg switch (waddr) { 66001e04c3fSmrg case QPU_W_TLB_COLOR_ALL: 66101e04c3fSmrg case QPU_W_TLB_COLOR_MS: 66201e04c3fSmrg case QPU_W_TLB_Z: 66301e04c3fSmrg return true; 66401e04c3fSmrg default: 66501e04c3fSmrg return false; 66601e04c3fSmrg } 66701e04c3fSmrg} 66801e04c3fSmrg 66901e04c3fSmrgbool 67001e04c3fSmrgqpu_inst_is_tlb(uint64_t inst) 67101e04c3fSmrg{ 67201e04c3fSmrg uint32_t sig = QPU_GET_FIELD(inst, QPU_SIG); 67301e04c3fSmrg 67401e04c3fSmrg return (qpu_waddr_is_tlb(QPU_GET_FIELD(inst, QPU_WADDR_ADD)) || 67501e04c3fSmrg qpu_waddr_is_tlb(QPU_GET_FIELD(inst, QPU_WADDR_MUL)) || 67601e04c3fSmrg sig == QPU_SIG_COLOR_LOAD || 67701e04c3fSmrg sig == QPU_SIG_WAIT_FOR_SCOREBOARD); 67801e04c3fSmrg} 67901e04c3fSmrg 68001e04c3fSmrg/** 68101e04c3fSmrg * Returns the small immediate value to be encoded in to the raddr b field if 68201e04c3fSmrg * the argument can be represented as one, or ~0 otherwise. 68301e04c3fSmrg */ 68401e04c3fSmrguint32_t 68501e04c3fSmrgqpu_encode_small_immediate(uint32_t i) 68601e04c3fSmrg{ 68701e04c3fSmrg if (i <= 15) 68801e04c3fSmrg return i; 68901e04c3fSmrg if ((int)i < 0 && (int)i >= -16) 69001e04c3fSmrg return i + 32; 69101e04c3fSmrg 69201e04c3fSmrg switch (i) { 69301e04c3fSmrg case 0x3f800000: 69401e04c3fSmrg return 32; 69501e04c3fSmrg case 0x40000000: 69601e04c3fSmrg return 33; 69701e04c3fSmrg case 0x40800000: 69801e04c3fSmrg return 34; 69901e04c3fSmrg case 0x41000000: 70001e04c3fSmrg return 35; 70101e04c3fSmrg case 0x41800000: 70201e04c3fSmrg return 36; 70301e04c3fSmrg case 0x42000000: 70401e04c3fSmrg return 37; 70501e04c3fSmrg case 0x42800000: 70601e04c3fSmrg return 38; 70701e04c3fSmrg case 0x43000000: 70801e04c3fSmrg return 39; 70901e04c3fSmrg case 0x3b800000: 71001e04c3fSmrg return 40; 71101e04c3fSmrg case 0x3c000000: 71201e04c3fSmrg return 41; 71301e04c3fSmrg case 0x3c800000: 71401e04c3fSmrg return 42; 71501e04c3fSmrg case 0x3d000000: 71601e04c3fSmrg return 43; 71701e04c3fSmrg case 0x3d800000: 71801e04c3fSmrg return 44; 71901e04c3fSmrg case 0x3e000000: 72001e04c3fSmrg return 45; 72101e04c3fSmrg case 0x3e800000: 72201e04c3fSmrg return 46; 72301e04c3fSmrg case 0x3f000000: 72401e04c3fSmrg return 47; 72501e04c3fSmrg } 72601e04c3fSmrg 72701e04c3fSmrg return ~0; 72801e04c3fSmrg} 72901e04c3fSmrg 73001e04c3fSmrgvoid 73101e04c3fSmrgqpu_serialize_one_inst(struct vc4_compile *c, uint64_t inst) 73201e04c3fSmrg{ 73301e04c3fSmrg if (c->qpu_inst_count >= c->qpu_inst_size) { 73401e04c3fSmrg c->qpu_inst_size = MAX2(16, c->qpu_inst_size * 2); 73501e04c3fSmrg c->qpu_insts = reralloc(c, c->qpu_insts, 73601e04c3fSmrg uint64_t, c->qpu_inst_size); 73701e04c3fSmrg } 73801e04c3fSmrg c->qpu_insts[c->qpu_inst_count++] = inst; 73901e04c3fSmrg} 740