101e04c3fSmrg/* 201e04c3fSmrg * Copyright © 2016 Broadcom 301e04c3fSmrg * 401e04c3fSmrg * Permission is hereby granted, free of charge, to any person obtaining a 501e04c3fSmrg * copy of this software and associated documentation files (the "Software"), 601e04c3fSmrg * to deal in the Software without restriction, including without limitation 701e04c3fSmrg * the rights to use, copy, modify, merge, publish, distribute, sublicense, 801e04c3fSmrg * and/or sell copies of the Software, and to permit persons to whom the 901e04c3fSmrg * Software is furnished to do so, subject to the following conditions: 1001e04c3fSmrg * 1101e04c3fSmrg * The above copyright notice and this permission notice (including the next 1201e04c3fSmrg * paragraph) shall be included in all copies or substantial portions of the 1301e04c3fSmrg * Software. 1401e04c3fSmrg * 1501e04c3fSmrg * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR 1601e04c3fSmrg * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, 1701e04c3fSmrg * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL 1801e04c3fSmrg * THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER 1901e04c3fSmrg * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING 2001e04c3fSmrg * FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS 2101e04c3fSmrg * IN THE SOFTWARE. 2201e04c3fSmrg */ 2301e04c3fSmrg 2401e04c3fSmrg#include <string.h> 2501e04c3fSmrg#include "util/macros.h" 267ec681f3Smrg#include "util/bitscan.h" 2701e04c3fSmrg 2801e04c3fSmrg#include "broadcom/common/v3d_device_info.h" 2901e04c3fSmrg#include "qpu_instr.h" 3001e04c3fSmrg 3101e04c3fSmrg#ifndef QPU_MASK 3201e04c3fSmrg#define QPU_MASK(high, low) ((((uint64_t)1<<((high)-(low)+1))-1)<<(low)) 3301e04c3fSmrg/* Using the GNU statement expression extension */ 3401e04c3fSmrg#define QPU_SET_FIELD(value, field) \ 3501e04c3fSmrg ({ \ 3601e04c3fSmrg uint64_t fieldval = (uint64_t)(value) << field ## _SHIFT; \ 3701e04c3fSmrg assert((fieldval & ~ field ## _MASK) == 0); \ 3801e04c3fSmrg fieldval & field ## _MASK; \ 3901e04c3fSmrg }) 4001e04c3fSmrg 4101e04c3fSmrg#define QPU_GET_FIELD(word, field) ((uint32_t)(((word) & field ## _MASK) >> field ## _SHIFT)) 4201e04c3fSmrg 4301e04c3fSmrg#define QPU_UPDATE_FIELD(inst, value, field) \ 4401e04c3fSmrg (((inst) & ~(field ## _MASK)) | QPU_SET_FIELD(value, field)) 4501e04c3fSmrg#endif /* QPU_MASK */ 4601e04c3fSmrg 477ec681f3Smrg#define V3D_QPU_OP_MUL_SHIFT 58 487ec681f3Smrg#define V3D_QPU_OP_MUL_MASK QPU_MASK(63, 58) 4901e04c3fSmrg 507ec681f3Smrg#define V3D_QPU_SIG_SHIFT 53 517ec681f3Smrg#define V3D_QPU_SIG_MASK QPU_MASK(57, 53) 5201e04c3fSmrg 537ec681f3Smrg#define V3D_QPU_COND_SHIFT 46 547ec681f3Smrg#define V3D_QPU_COND_MASK QPU_MASK(52, 46) 557ec681f3Smrg#define V3D_QPU_COND_SIG_MAGIC_ADDR (1 << 6) 5601e04c3fSmrg 577ec681f3Smrg#define V3D_QPU_MM QPU_MASK(45, 45) 587ec681f3Smrg#define V3D_QPU_MA QPU_MASK(44, 44) 5901e04c3fSmrg 6001e04c3fSmrg#define V3D_QPU_WADDR_M_SHIFT 38 6101e04c3fSmrg#define V3D_QPU_WADDR_M_MASK QPU_MASK(43, 38) 6201e04c3fSmrg 637ec681f3Smrg#define V3D_QPU_BRANCH_ADDR_LOW_SHIFT 35 647ec681f3Smrg#define V3D_QPU_BRANCH_ADDR_LOW_MASK QPU_MASK(55, 35) 6501e04c3fSmrg 6601e04c3fSmrg#define V3D_QPU_WADDR_A_SHIFT 32 6701e04c3fSmrg#define V3D_QPU_WADDR_A_MASK QPU_MASK(37, 32) 6801e04c3fSmrg 697ec681f3Smrg#define V3D_QPU_BRANCH_COND_SHIFT 32 707ec681f3Smrg#define V3D_QPU_BRANCH_COND_MASK QPU_MASK(34, 32) 7101e04c3fSmrg 727ec681f3Smrg#define V3D_QPU_BRANCH_ADDR_HIGH_SHIFT 24 737ec681f3Smrg#define V3D_QPU_BRANCH_ADDR_HIGH_MASK QPU_MASK(31, 24) 7401e04c3fSmrg 757ec681f3Smrg#define V3D_QPU_OP_ADD_SHIFT 24 767ec681f3Smrg#define V3D_QPU_OP_ADD_MASK QPU_MASK(31, 24) 7701e04c3fSmrg 787ec681f3Smrg#define V3D_QPU_MUL_B_SHIFT 21 797ec681f3Smrg#define V3D_QPU_MUL_B_MASK QPU_MASK(23, 21) 8001e04c3fSmrg 817ec681f3Smrg#define V3D_QPU_BRANCH_MSFIGN_SHIFT 21 827ec681f3Smrg#define V3D_QPU_BRANCH_MSFIGN_MASK QPU_MASK(22, 21) 8301e04c3fSmrg 847ec681f3Smrg#define V3D_QPU_MUL_A_SHIFT 18 857ec681f3Smrg#define V3D_QPU_MUL_A_MASK QPU_MASK(20, 18) 8601e04c3fSmrg 877ec681f3Smrg#define V3D_QPU_ADD_B_SHIFT 15 887ec681f3Smrg#define V3D_QPU_ADD_B_MASK QPU_MASK(17, 15) 8901e04c3fSmrg 907ec681f3Smrg#define V3D_QPU_BRANCH_BDU_SHIFT 15 917ec681f3Smrg#define V3D_QPU_BRANCH_BDU_MASK QPU_MASK(17, 15) 9201e04c3fSmrg 937ec681f3Smrg#define V3D_QPU_BRANCH_UB QPU_MASK(14, 14) 9401e04c3fSmrg 957ec681f3Smrg#define V3D_QPU_ADD_A_SHIFT 12 967ec681f3Smrg#define V3D_QPU_ADD_A_MASK QPU_MASK(14, 12) 9701e04c3fSmrg 987ec681f3Smrg#define V3D_QPU_BRANCH_BDI_SHIFT 12 997ec681f3Smrg#define V3D_QPU_BRANCH_BDI_MASK QPU_MASK(13, 12) 10001e04c3fSmrg 1017ec681f3Smrg#define V3D_QPU_RADDR_A_SHIFT 6 1027ec681f3Smrg#define V3D_QPU_RADDR_A_MASK QPU_MASK(11, 6) 10301e04c3fSmrg 1047ec681f3Smrg#define V3D_QPU_RADDR_B_SHIFT 0 1057ec681f3Smrg#define V3D_QPU_RADDR_B_MASK QPU_MASK(5, 0) 10601e04c3fSmrg 10701e04c3fSmrg#define THRSW .thrsw = true 10801e04c3fSmrg#define LDUNIF .ldunif = true 10901e04c3fSmrg#define LDUNIFRF .ldunifrf = true 11001e04c3fSmrg#define LDUNIFA .ldunifa = true 11101e04c3fSmrg#define LDUNIFARF .ldunifarf = true 11201e04c3fSmrg#define LDTMU .ldtmu = true 11301e04c3fSmrg#define LDVARY .ldvary = true 11401e04c3fSmrg#define LDVPM .ldvpm = true 11501e04c3fSmrg#define SMIMM .small_imm = true 11601e04c3fSmrg#define LDTLB .ldtlb = true 11701e04c3fSmrg#define LDTLBU .ldtlbu = true 11801e04c3fSmrg#define UCB .ucb = true 11901e04c3fSmrg#define ROT .rotate = true 12001e04c3fSmrg#define WRTMUC .wrtmuc = true 12101e04c3fSmrg 12201e04c3fSmrgstatic const struct v3d_qpu_sig v33_sig_map[] = { 12301e04c3fSmrg /* MISC R3 R4 R5 */ 12401e04c3fSmrg [0] = { }, 12501e04c3fSmrg [1] = { THRSW, }, 12601e04c3fSmrg [2] = { LDUNIF }, 12701e04c3fSmrg [3] = { THRSW, LDUNIF }, 12801e04c3fSmrg [4] = { LDTMU, }, 12901e04c3fSmrg [5] = { THRSW, LDTMU, }, 13001e04c3fSmrg [6] = { LDTMU, LDUNIF }, 13101e04c3fSmrg [7] = { THRSW, LDTMU, LDUNIF }, 13201e04c3fSmrg [8] = { LDVARY, }, 13301e04c3fSmrg [9] = { THRSW, LDVARY, }, 13401e04c3fSmrg [10] = { LDVARY, LDUNIF }, 13501e04c3fSmrg [11] = { THRSW, LDVARY, LDUNIF }, 13601e04c3fSmrg [12] = { LDVARY, LDTMU, }, 13701e04c3fSmrg [13] = { THRSW, LDVARY, LDTMU, }, 13801e04c3fSmrg [14] = { SMIMM, LDVARY, }, 13901e04c3fSmrg [15] = { SMIMM, }, 14001e04c3fSmrg [16] = { LDTLB, }, 14101e04c3fSmrg [17] = { LDTLBU, }, 14201e04c3fSmrg /* 18-21 reserved */ 14301e04c3fSmrg [22] = { UCB, }, 14401e04c3fSmrg [23] = { ROT, }, 14501e04c3fSmrg [24] = { LDVPM, }, 14601e04c3fSmrg [25] = { THRSW, LDVPM, }, 14701e04c3fSmrg [26] = { LDVPM, LDUNIF }, 14801e04c3fSmrg [27] = { THRSW, LDVPM, LDUNIF }, 14901e04c3fSmrg [28] = { LDVPM, LDTMU, }, 15001e04c3fSmrg [29] = { THRSW, LDVPM, LDTMU, }, 15101e04c3fSmrg [30] = { SMIMM, LDVPM, }, 15201e04c3fSmrg [31] = { SMIMM, }, 15301e04c3fSmrg}; 15401e04c3fSmrg 15501e04c3fSmrgstatic const struct v3d_qpu_sig v40_sig_map[] = { 15601e04c3fSmrg /* MISC R3 R4 R5 */ 15701e04c3fSmrg [0] = { }, 15801e04c3fSmrg [1] = { THRSW, }, 15901e04c3fSmrg [2] = { LDUNIF }, 16001e04c3fSmrg [3] = { THRSW, LDUNIF }, 16101e04c3fSmrg [4] = { LDTMU, }, 16201e04c3fSmrg [5] = { THRSW, LDTMU, }, 16301e04c3fSmrg [6] = { LDTMU, LDUNIF }, 16401e04c3fSmrg [7] = { THRSW, LDTMU, LDUNIF }, 16501e04c3fSmrg [8] = { LDVARY, }, 16601e04c3fSmrg [9] = { THRSW, LDVARY, }, 16701e04c3fSmrg [10] = { LDVARY, LDUNIF }, 16801e04c3fSmrg [11] = { THRSW, LDVARY, LDUNIF }, 16901e04c3fSmrg /* 12-13 reserved */ 17001e04c3fSmrg [14] = { SMIMM, LDVARY, }, 17101e04c3fSmrg [15] = { SMIMM, }, 17201e04c3fSmrg [16] = { LDTLB, }, 17301e04c3fSmrg [17] = { LDTLBU, }, 17401e04c3fSmrg [18] = { WRTMUC }, 17501e04c3fSmrg [19] = { THRSW, WRTMUC }, 17601e04c3fSmrg [20] = { LDVARY, WRTMUC }, 17701e04c3fSmrg [21] = { THRSW, LDVARY, WRTMUC }, 17801e04c3fSmrg [22] = { UCB, }, 17901e04c3fSmrg [23] = { ROT, }, 18001e04c3fSmrg /* 24-30 reserved */ 18101e04c3fSmrg [31] = { SMIMM, LDTMU, }, 18201e04c3fSmrg}; 18301e04c3fSmrg 18401e04c3fSmrgstatic const struct v3d_qpu_sig v41_sig_map[] = { 18501e04c3fSmrg /* MISC phys R5 */ 18601e04c3fSmrg [0] = { }, 18701e04c3fSmrg [1] = { THRSW, }, 18801e04c3fSmrg [2] = { LDUNIF }, 18901e04c3fSmrg [3] = { THRSW, LDUNIF }, 19001e04c3fSmrg [4] = { LDTMU, }, 19101e04c3fSmrg [5] = { THRSW, LDTMU, }, 19201e04c3fSmrg [6] = { LDTMU, LDUNIF }, 19301e04c3fSmrg [7] = { THRSW, LDTMU, LDUNIF }, 19401e04c3fSmrg [8] = { LDVARY, }, 19501e04c3fSmrg [9] = { THRSW, LDVARY, }, 19601e04c3fSmrg [10] = { LDVARY, LDUNIF }, 19701e04c3fSmrg [11] = { THRSW, LDVARY, LDUNIF }, 19801e04c3fSmrg [12] = { LDUNIFRF }, 19901e04c3fSmrg [13] = { THRSW, LDUNIFRF }, 20001e04c3fSmrg [14] = { SMIMM, LDVARY, }, 20101e04c3fSmrg [15] = { SMIMM, }, 20201e04c3fSmrg [16] = { LDTLB, }, 20301e04c3fSmrg [17] = { LDTLBU, }, 20401e04c3fSmrg [18] = { WRTMUC }, 20501e04c3fSmrg [19] = { THRSW, WRTMUC }, 20601e04c3fSmrg [20] = { LDVARY, WRTMUC }, 20701e04c3fSmrg [21] = { THRSW, LDVARY, WRTMUC }, 20801e04c3fSmrg [22] = { UCB, }, 20901e04c3fSmrg [23] = { ROT, }, 21001e04c3fSmrg [24] = { LDUNIFA}, 21101e04c3fSmrg [25] = { LDUNIFARF }, 2127ec681f3Smrg /* 26-30 reserved */ 21301e04c3fSmrg [31] = { SMIMM, LDTMU, }, 21401e04c3fSmrg}; 21501e04c3fSmrg 21601e04c3fSmrgbool 21701e04c3fSmrgv3d_qpu_sig_unpack(const struct v3d_device_info *devinfo, 21801e04c3fSmrg uint32_t packed_sig, 21901e04c3fSmrg struct v3d_qpu_sig *sig) 22001e04c3fSmrg{ 22101e04c3fSmrg if (packed_sig >= ARRAY_SIZE(v33_sig_map)) 22201e04c3fSmrg return false; 22301e04c3fSmrg 22401e04c3fSmrg if (devinfo->ver >= 41) 22501e04c3fSmrg *sig = v41_sig_map[packed_sig]; 22601e04c3fSmrg else if (devinfo->ver == 40) 22701e04c3fSmrg *sig = v40_sig_map[packed_sig]; 22801e04c3fSmrg else 22901e04c3fSmrg *sig = v33_sig_map[packed_sig]; 23001e04c3fSmrg 23101e04c3fSmrg /* Signals with zeroed unpacked contents after element 0 are reserved. */ 23201e04c3fSmrg return (packed_sig == 0 || 23301e04c3fSmrg memcmp(sig, &v33_sig_map[0], sizeof(*sig)) != 0); 23401e04c3fSmrg} 23501e04c3fSmrg 23601e04c3fSmrgbool 23701e04c3fSmrgv3d_qpu_sig_pack(const struct v3d_device_info *devinfo, 23801e04c3fSmrg const struct v3d_qpu_sig *sig, 23901e04c3fSmrg uint32_t *packed_sig) 24001e04c3fSmrg{ 24101e04c3fSmrg static const struct v3d_qpu_sig *map; 24201e04c3fSmrg 24301e04c3fSmrg if (devinfo->ver >= 41) 24401e04c3fSmrg map = v41_sig_map; 24501e04c3fSmrg else if (devinfo->ver == 40) 24601e04c3fSmrg map = v40_sig_map; 24701e04c3fSmrg else 24801e04c3fSmrg map = v33_sig_map; 24901e04c3fSmrg 25001e04c3fSmrg for (int i = 0; i < ARRAY_SIZE(v33_sig_map); i++) { 25101e04c3fSmrg if (memcmp(&map[i], sig, sizeof(*sig)) == 0) { 25201e04c3fSmrg *packed_sig = i; 25301e04c3fSmrg return true; 25401e04c3fSmrg } 25501e04c3fSmrg } 25601e04c3fSmrg 25701e04c3fSmrg return false; 25801e04c3fSmrg} 25901e04c3fSmrgstatic inline unsigned 26001e04c3fSmrgfui( float f ) 26101e04c3fSmrg{ 26201e04c3fSmrg union {float f; unsigned ui;} fi; 26301e04c3fSmrg fi.f = f; 26401e04c3fSmrg return fi.ui; 26501e04c3fSmrg} 26601e04c3fSmrg 26701e04c3fSmrgstatic const uint32_t small_immediates[] = { 26801e04c3fSmrg 0, 1, 2, 3, 26901e04c3fSmrg 4, 5, 6, 7, 27001e04c3fSmrg 8, 9, 10, 11, 27101e04c3fSmrg 12, 13, 14, 15, 27201e04c3fSmrg -16, -15, -14, -13, 27301e04c3fSmrg -12, -11, -10, -9, 27401e04c3fSmrg -8, -7, -6, -5, 27501e04c3fSmrg -4, -3, -2, -1, 27601e04c3fSmrg 0x3b800000, /* 2.0^-8 */ 27701e04c3fSmrg 0x3c000000, /* 2.0^-7 */ 27801e04c3fSmrg 0x3c800000, /* 2.0^-6 */ 27901e04c3fSmrg 0x3d000000, /* 2.0^-5 */ 28001e04c3fSmrg 0x3d800000, /* 2.0^-4 */ 28101e04c3fSmrg 0x3e000000, /* 2.0^-3 */ 28201e04c3fSmrg 0x3e800000, /* 2.0^-2 */ 28301e04c3fSmrg 0x3f000000, /* 2.0^-1 */ 28401e04c3fSmrg 0x3f800000, /* 2.0^0 */ 28501e04c3fSmrg 0x40000000, /* 2.0^1 */ 28601e04c3fSmrg 0x40800000, /* 2.0^2 */ 28701e04c3fSmrg 0x41000000, /* 2.0^3 */ 28801e04c3fSmrg 0x41800000, /* 2.0^4 */ 28901e04c3fSmrg 0x42000000, /* 2.0^5 */ 29001e04c3fSmrg 0x42800000, /* 2.0^6 */ 29101e04c3fSmrg 0x43000000, /* 2.0^7 */ 29201e04c3fSmrg}; 29301e04c3fSmrg 29401e04c3fSmrgbool 29501e04c3fSmrgv3d_qpu_small_imm_unpack(const struct v3d_device_info *devinfo, 29601e04c3fSmrg uint32_t packed_small_immediate, 29701e04c3fSmrg uint32_t *small_immediate) 29801e04c3fSmrg{ 29901e04c3fSmrg if (packed_small_immediate >= ARRAY_SIZE(small_immediates)) 30001e04c3fSmrg return false; 30101e04c3fSmrg 30201e04c3fSmrg *small_immediate = small_immediates[packed_small_immediate]; 30301e04c3fSmrg return true; 30401e04c3fSmrg} 30501e04c3fSmrg 30601e04c3fSmrgbool 30701e04c3fSmrgv3d_qpu_small_imm_pack(const struct v3d_device_info *devinfo, 30801e04c3fSmrg uint32_t value, 30901e04c3fSmrg uint32_t *packed_small_immediate) 31001e04c3fSmrg{ 31101e04c3fSmrg STATIC_ASSERT(ARRAY_SIZE(small_immediates) == 48); 31201e04c3fSmrg 31301e04c3fSmrg for (int i = 0; i < ARRAY_SIZE(small_immediates); i++) { 31401e04c3fSmrg if (small_immediates[i] == value) { 31501e04c3fSmrg *packed_small_immediate = i; 31601e04c3fSmrg return true; 31701e04c3fSmrg } 31801e04c3fSmrg } 31901e04c3fSmrg 32001e04c3fSmrg return false; 32101e04c3fSmrg} 32201e04c3fSmrg 32301e04c3fSmrgbool 32401e04c3fSmrgv3d_qpu_flags_unpack(const struct v3d_device_info *devinfo, 32501e04c3fSmrg uint32_t packed_cond, 32601e04c3fSmrg struct v3d_qpu_flags *cond) 32701e04c3fSmrg{ 32801e04c3fSmrg static const enum v3d_qpu_cond cond_map[4] = { 32901e04c3fSmrg [0] = V3D_QPU_COND_IFA, 33001e04c3fSmrg [1] = V3D_QPU_COND_IFB, 33101e04c3fSmrg [2] = V3D_QPU_COND_IFNA, 33201e04c3fSmrg [3] = V3D_QPU_COND_IFNB, 33301e04c3fSmrg }; 33401e04c3fSmrg 33501e04c3fSmrg cond->ac = V3D_QPU_COND_NONE; 33601e04c3fSmrg cond->mc = V3D_QPU_COND_NONE; 33701e04c3fSmrg cond->apf = V3D_QPU_PF_NONE; 33801e04c3fSmrg cond->mpf = V3D_QPU_PF_NONE; 33901e04c3fSmrg cond->auf = V3D_QPU_UF_NONE; 34001e04c3fSmrg cond->muf = V3D_QPU_UF_NONE; 34101e04c3fSmrg 34201e04c3fSmrg if (packed_cond == 0) { 34301e04c3fSmrg return true; 34401e04c3fSmrg } else if (packed_cond >> 2 == 0) { 34501e04c3fSmrg cond->apf = packed_cond & 0x3; 34601e04c3fSmrg } else if (packed_cond >> 4 == 0) { 34701e04c3fSmrg cond->auf = (packed_cond & 0xf) - 4 + V3D_QPU_UF_ANDZ; 34801e04c3fSmrg } else if (packed_cond == 0x10) { 34901e04c3fSmrg return false; 35001e04c3fSmrg } else if (packed_cond >> 2 == 0x4) { 35101e04c3fSmrg cond->mpf = packed_cond & 0x3; 35201e04c3fSmrg } else if (packed_cond >> 4 == 0x1) { 35301e04c3fSmrg cond->muf = (packed_cond & 0xf) - 4 + V3D_QPU_UF_ANDZ; 35401e04c3fSmrg } else if (packed_cond >> 4 == 0x2) { 35501e04c3fSmrg cond->ac = ((packed_cond >> 2) & 0x3) + V3D_QPU_COND_IFA; 35601e04c3fSmrg cond->mpf = packed_cond & 0x3; 35701e04c3fSmrg } else if (packed_cond >> 4 == 0x3) { 35801e04c3fSmrg cond->mc = ((packed_cond >> 2) & 0x3) + V3D_QPU_COND_IFA; 35901e04c3fSmrg cond->apf = packed_cond & 0x3; 36001e04c3fSmrg } else if (packed_cond >> 6) { 36101e04c3fSmrg cond->mc = cond_map[(packed_cond >> 4) & 0x3]; 36201e04c3fSmrg if (((packed_cond >> 2) & 0x3) == 0) { 36301e04c3fSmrg cond->ac = cond_map[packed_cond & 0x3]; 36401e04c3fSmrg } else { 36501e04c3fSmrg cond->auf = (packed_cond & 0xf) - 4 + V3D_QPU_UF_ANDZ; 36601e04c3fSmrg } 36701e04c3fSmrg } 36801e04c3fSmrg 36901e04c3fSmrg return true; 37001e04c3fSmrg} 37101e04c3fSmrg 37201e04c3fSmrgbool 37301e04c3fSmrgv3d_qpu_flags_pack(const struct v3d_device_info *devinfo, 37401e04c3fSmrg const struct v3d_qpu_flags *cond, 37501e04c3fSmrg uint32_t *packed_cond) 37601e04c3fSmrg{ 37701e04c3fSmrg#define AC (1 << 0) 37801e04c3fSmrg#define MC (1 << 1) 37901e04c3fSmrg#define APF (1 << 2) 38001e04c3fSmrg#define MPF (1 << 3) 38101e04c3fSmrg#define AUF (1 << 4) 38201e04c3fSmrg#define MUF (1 << 5) 38301e04c3fSmrg static const struct { 38401e04c3fSmrg uint8_t flags_present; 38501e04c3fSmrg uint8_t bits; 38601e04c3fSmrg } flags_table[] = { 38701e04c3fSmrg { 0, 0 }, 38801e04c3fSmrg { APF, 0 }, 38901e04c3fSmrg { AUF, 0 }, 39001e04c3fSmrg { MPF, (1 << 4) }, 39101e04c3fSmrg { MUF, (1 << 4) }, 39201e04c3fSmrg { AC, (1 << 5) }, 39301e04c3fSmrg { AC | MPF, (1 << 5) }, 39401e04c3fSmrg { MC, (1 << 5) | (1 << 4) }, 39501e04c3fSmrg { MC | APF, (1 << 5) | (1 << 4) }, 39601e04c3fSmrg { MC | AC, (1 << 6) }, 39701e04c3fSmrg { MC | AUF, (1 << 6) }, 39801e04c3fSmrg }; 39901e04c3fSmrg 40001e04c3fSmrg uint8_t flags_present = 0; 40101e04c3fSmrg if (cond->ac != V3D_QPU_COND_NONE) 40201e04c3fSmrg flags_present |= AC; 40301e04c3fSmrg if (cond->mc != V3D_QPU_COND_NONE) 40401e04c3fSmrg flags_present |= MC; 40501e04c3fSmrg if (cond->apf != V3D_QPU_PF_NONE) 40601e04c3fSmrg flags_present |= APF; 40701e04c3fSmrg if (cond->mpf != V3D_QPU_PF_NONE) 40801e04c3fSmrg flags_present |= MPF; 40901e04c3fSmrg if (cond->auf != V3D_QPU_UF_NONE) 41001e04c3fSmrg flags_present |= AUF; 41101e04c3fSmrg if (cond->muf != V3D_QPU_UF_NONE) 41201e04c3fSmrg flags_present |= MUF; 41301e04c3fSmrg 41401e04c3fSmrg for (int i = 0; i < ARRAY_SIZE(flags_table); i++) { 41501e04c3fSmrg if (flags_table[i].flags_present != flags_present) 41601e04c3fSmrg continue; 41701e04c3fSmrg 41801e04c3fSmrg *packed_cond = flags_table[i].bits; 41901e04c3fSmrg 42001e04c3fSmrg *packed_cond |= cond->apf; 42101e04c3fSmrg *packed_cond |= cond->mpf; 42201e04c3fSmrg 42301e04c3fSmrg if (flags_present & AUF) 42401e04c3fSmrg *packed_cond |= cond->auf - V3D_QPU_UF_ANDZ + 4; 42501e04c3fSmrg if (flags_present & MUF) 42601e04c3fSmrg *packed_cond |= cond->muf - V3D_QPU_UF_ANDZ + 4; 42701e04c3fSmrg 42801e04c3fSmrg if (flags_present & AC) 42901e04c3fSmrg *packed_cond |= (cond->ac - V3D_QPU_COND_IFA) << 2; 43001e04c3fSmrg 43101e04c3fSmrg if (flags_present & MC) { 43201e04c3fSmrg if (*packed_cond & (1 << 6)) 43301e04c3fSmrg *packed_cond |= (cond->mc - 43401e04c3fSmrg V3D_QPU_COND_IFA) << 4; 43501e04c3fSmrg else 43601e04c3fSmrg *packed_cond |= (cond->mc - 43701e04c3fSmrg V3D_QPU_COND_IFA) << 2; 43801e04c3fSmrg } 43901e04c3fSmrg 44001e04c3fSmrg return true; 44101e04c3fSmrg } 44201e04c3fSmrg 44301e04c3fSmrg return false; 44401e04c3fSmrg} 44501e04c3fSmrg 44601e04c3fSmrg/* Make a mapping of the table of opcodes in the spec. The opcode is 44701e04c3fSmrg * determined by a combination of the opcode field, and in the case of 0 or 44801e04c3fSmrg * 1-arg opcodes, the mux_b field as well. 44901e04c3fSmrg */ 45001e04c3fSmrg#define MUX_MASK(bot, top) (((1 << (top + 1)) - 1) - ((1 << (bot)) - 1)) 45101e04c3fSmrg#define ANYMUX MUX_MASK(0, 7) 45201e04c3fSmrg 45301e04c3fSmrgstruct opcode_desc { 45401e04c3fSmrg uint8_t opcode_first; 45501e04c3fSmrg uint8_t opcode_last; 45601e04c3fSmrg uint8_t mux_b_mask; 45701e04c3fSmrg uint8_t mux_a_mask; 45801e04c3fSmrg uint8_t op; 4597ec681f3Smrg 4607ec681f3Smrg /* first_ver == 0 if it's the same across all V3D versions. 4617ec681f3Smrg * first_ver == X, last_ver == 0 if it's the same for all V3D versions 4627ec681f3Smrg * starting from X 4637ec681f3Smrg * first_ver == X, last_ver == Y if it's the same for all V3D versions 4647ec681f3Smrg * on the range X through Y 4657ec681f3Smrg */ 4667ec681f3Smrg uint8_t first_ver; 4677ec681f3Smrg uint8_t last_ver; 46801e04c3fSmrg}; 46901e04c3fSmrg 47001e04c3fSmrgstatic const struct opcode_desc add_ops[] = { 47101e04c3fSmrg /* FADD is FADDNF depending on the order of the mux_a/mux_b. */ 47201e04c3fSmrg { 0, 47, ANYMUX, ANYMUX, V3D_QPU_A_FADD }, 47301e04c3fSmrg { 0, 47, ANYMUX, ANYMUX, V3D_QPU_A_FADDNF }, 47401e04c3fSmrg { 53, 55, ANYMUX, ANYMUX, V3D_QPU_A_VFPACK }, 47501e04c3fSmrg { 56, 56, ANYMUX, ANYMUX, V3D_QPU_A_ADD }, 47601e04c3fSmrg { 57, 59, ANYMUX, ANYMUX, V3D_QPU_A_VFPACK }, 47701e04c3fSmrg { 60, 60, ANYMUX, ANYMUX, V3D_QPU_A_SUB }, 47801e04c3fSmrg { 61, 63, ANYMUX, ANYMUX, V3D_QPU_A_VFPACK }, 47901e04c3fSmrg { 64, 111, ANYMUX, ANYMUX, V3D_QPU_A_FSUB }, 48001e04c3fSmrg { 120, 120, ANYMUX, ANYMUX, V3D_QPU_A_MIN }, 48101e04c3fSmrg { 121, 121, ANYMUX, ANYMUX, V3D_QPU_A_MAX }, 48201e04c3fSmrg { 122, 122, ANYMUX, ANYMUX, V3D_QPU_A_UMIN }, 48301e04c3fSmrg { 123, 123, ANYMUX, ANYMUX, V3D_QPU_A_UMAX }, 48401e04c3fSmrg { 124, 124, ANYMUX, ANYMUX, V3D_QPU_A_SHL }, 48501e04c3fSmrg { 125, 125, ANYMUX, ANYMUX, V3D_QPU_A_SHR }, 48601e04c3fSmrg { 126, 126, ANYMUX, ANYMUX, V3D_QPU_A_ASR }, 48701e04c3fSmrg { 127, 127, ANYMUX, ANYMUX, V3D_QPU_A_ROR }, 48801e04c3fSmrg /* FMIN is instead FMAX depending on the order of the mux_a/mux_b. */ 48901e04c3fSmrg { 128, 175, ANYMUX, ANYMUX, V3D_QPU_A_FMIN }, 49001e04c3fSmrg { 128, 175, ANYMUX, ANYMUX, V3D_QPU_A_FMAX }, 49101e04c3fSmrg { 176, 180, ANYMUX, ANYMUX, V3D_QPU_A_VFMIN }, 49201e04c3fSmrg 49301e04c3fSmrg { 181, 181, ANYMUX, ANYMUX, V3D_QPU_A_AND }, 49401e04c3fSmrg { 182, 182, ANYMUX, ANYMUX, V3D_QPU_A_OR }, 49501e04c3fSmrg { 183, 183, ANYMUX, ANYMUX, V3D_QPU_A_XOR }, 49601e04c3fSmrg 49701e04c3fSmrg { 184, 184, ANYMUX, ANYMUX, V3D_QPU_A_VADD }, 49801e04c3fSmrg { 185, 185, ANYMUX, ANYMUX, V3D_QPU_A_VSUB }, 49901e04c3fSmrg { 186, 186, 1 << 0, ANYMUX, V3D_QPU_A_NOT }, 50001e04c3fSmrg { 186, 186, 1 << 1, ANYMUX, V3D_QPU_A_NEG }, 50101e04c3fSmrg { 186, 186, 1 << 2, ANYMUX, V3D_QPU_A_FLAPUSH }, 50201e04c3fSmrg { 186, 186, 1 << 3, ANYMUX, V3D_QPU_A_FLBPUSH }, 50301e04c3fSmrg { 186, 186, 1 << 4, ANYMUX, V3D_QPU_A_FLPOP }, 50401e04c3fSmrg { 186, 186, 1 << 5, ANYMUX, V3D_QPU_A_RECIP }, 50501e04c3fSmrg { 186, 186, 1 << 6, ANYMUX, V3D_QPU_A_SETMSF }, 50601e04c3fSmrg { 186, 186, 1 << 7, ANYMUX, V3D_QPU_A_SETREVF }, 50701e04c3fSmrg { 187, 187, 1 << 0, 1 << 0, V3D_QPU_A_NOP, 0 }, 50801e04c3fSmrg { 187, 187, 1 << 0, 1 << 1, V3D_QPU_A_TIDX }, 50901e04c3fSmrg { 187, 187, 1 << 0, 1 << 2, V3D_QPU_A_EIDX }, 51001e04c3fSmrg { 187, 187, 1 << 0, 1 << 3, V3D_QPU_A_LR }, 51101e04c3fSmrg { 187, 187, 1 << 0, 1 << 4, V3D_QPU_A_VFLA }, 51201e04c3fSmrg { 187, 187, 1 << 0, 1 << 5, V3D_QPU_A_VFLNA }, 51301e04c3fSmrg { 187, 187, 1 << 0, 1 << 6, V3D_QPU_A_VFLB }, 51401e04c3fSmrg { 187, 187, 1 << 0, 1 << 7, V3D_QPU_A_VFLNB }, 51501e04c3fSmrg 51601e04c3fSmrg { 187, 187, 1 << 1, MUX_MASK(0, 2), V3D_QPU_A_FXCD }, 51701e04c3fSmrg { 187, 187, 1 << 1, 1 << 3, V3D_QPU_A_XCD }, 51801e04c3fSmrg { 187, 187, 1 << 1, MUX_MASK(4, 6), V3D_QPU_A_FYCD }, 51901e04c3fSmrg { 187, 187, 1 << 1, 1 << 7, V3D_QPU_A_YCD }, 52001e04c3fSmrg 52101e04c3fSmrg { 187, 187, 1 << 2, 1 << 0, V3D_QPU_A_MSF }, 52201e04c3fSmrg { 187, 187, 1 << 2, 1 << 1, V3D_QPU_A_REVF }, 52301e04c3fSmrg { 187, 187, 1 << 2, 1 << 2, V3D_QPU_A_VDWWT, 33 }, 52401e04c3fSmrg { 187, 187, 1 << 2, 1 << 2, V3D_QPU_A_IID, 40 }, 52501e04c3fSmrg { 187, 187, 1 << 2, 1 << 3, V3D_QPU_A_SAMPID, 40 }, 52601e04c3fSmrg { 187, 187, 1 << 2, 1 << 4, V3D_QPU_A_BARRIERID, 40 }, 52701e04c3fSmrg { 187, 187, 1 << 2, 1 << 5, V3D_QPU_A_TMUWT }, 52801e04c3fSmrg { 187, 187, 1 << 2, 1 << 6, V3D_QPU_A_VPMWT }, 5297ec681f3Smrg { 187, 187, 1 << 2, 1 << 7, V3D_QPU_A_FLAFIRST, 41 }, 5307ec681f3Smrg { 187, 187, 1 << 3, 1 << 0, V3D_QPU_A_FLNAFIRST, 41 }, 53101e04c3fSmrg { 187, 187, 1 << 3, ANYMUX, V3D_QPU_A_VPMSETUP, 33 }, 5327ec681f3Smrg 53301e04c3fSmrg { 188, 188, 1 << 0, ANYMUX, V3D_QPU_A_LDVPMV_IN, 40 }, 5347ec681f3Smrg { 188, 188, 1 << 0, ANYMUX, V3D_QPU_A_LDVPMV_OUT, 40 }, 53501e04c3fSmrg { 188, 188, 1 << 1, ANYMUX, V3D_QPU_A_LDVPMD_IN, 40 }, 5367ec681f3Smrg { 188, 188, 1 << 1, ANYMUX, V3D_QPU_A_LDVPMD_OUT, 40 }, 53701e04c3fSmrg { 188, 188, 1 << 2, ANYMUX, V3D_QPU_A_LDVPMP, 40 }, 53801e04c3fSmrg { 188, 188, 1 << 3, ANYMUX, V3D_QPU_A_RSQRT, 41 }, 53901e04c3fSmrg { 188, 188, 1 << 4, ANYMUX, V3D_QPU_A_EXP, 41 }, 54001e04c3fSmrg { 188, 188, 1 << 5, ANYMUX, V3D_QPU_A_LOG, 41 }, 54101e04c3fSmrg { 188, 188, 1 << 6, ANYMUX, V3D_QPU_A_SIN, 41 }, 54201e04c3fSmrg { 188, 188, 1 << 7, ANYMUX, V3D_QPU_A_RSQRT2, 41 }, 54301e04c3fSmrg { 189, 189, ANYMUX, ANYMUX, V3D_QPU_A_LDVPMG_IN, 40 }, 5447ec681f3Smrg { 189, 189, ANYMUX, ANYMUX, V3D_QPU_A_LDVPMG_OUT, 40 }, 54501e04c3fSmrg 54601e04c3fSmrg /* FIXME: MORE COMPLICATED */ 54701e04c3fSmrg /* { 190, 191, ANYMUX, ANYMUX, V3D_QPU_A_VFMOVABSNEGNAB }, */ 54801e04c3fSmrg 54901e04c3fSmrg { 192, 239, ANYMUX, ANYMUX, V3D_QPU_A_FCMP }, 55001e04c3fSmrg { 240, 244, ANYMUX, ANYMUX, V3D_QPU_A_VFMAX }, 55101e04c3fSmrg 55201e04c3fSmrg { 245, 245, MUX_MASK(0, 2), ANYMUX, V3D_QPU_A_FROUND }, 55301e04c3fSmrg { 245, 245, 1 << 3, ANYMUX, V3D_QPU_A_FTOIN }, 55401e04c3fSmrg { 245, 245, MUX_MASK(4, 6), ANYMUX, V3D_QPU_A_FTRUNC }, 55501e04c3fSmrg { 245, 245, 1 << 7, ANYMUX, V3D_QPU_A_FTOIZ }, 55601e04c3fSmrg { 246, 246, MUX_MASK(0, 2), ANYMUX, V3D_QPU_A_FFLOOR }, 55701e04c3fSmrg { 246, 246, 1 << 3, ANYMUX, V3D_QPU_A_FTOUZ }, 55801e04c3fSmrg { 246, 246, MUX_MASK(4, 6), ANYMUX, V3D_QPU_A_FCEIL }, 55901e04c3fSmrg { 246, 246, 1 << 7, ANYMUX, V3D_QPU_A_FTOC }, 56001e04c3fSmrg 56101e04c3fSmrg { 247, 247, MUX_MASK(0, 2), ANYMUX, V3D_QPU_A_FDX }, 56201e04c3fSmrg { 247, 247, MUX_MASK(4, 6), ANYMUX, V3D_QPU_A_FDY }, 56301e04c3fSmrg 56401e04c3fSmrg /* The stvpms are distinguished by the waddr field. */ 56501e04c3fSmrg { 248, 248, ANYMUX, ANYMUX, V3D_QPU_A_STVPMV }, 56601e04c3fSmrg { 248, 248, ANYMUX, ANYMUX, V3D_QPU_A_STVPMD }, 56701e04c3fSmrg { 248, 248, ANYMUX, ANYMUX, V3D_QPU_A_STVPMP }, 56801e04c3fSmrg 56901e04c3fSmrg { 252, 252, MUX_MASK(0, 2), ANYMUX, V3D_QPU_A_ITOF }, 57001e04c3fSmrg { 252, 252, 1 << 3, ANYMUX, V3D_QPU_A_CLZ }, 57101e04c3fSmrg { 252, 252, MUX_MASK(4, 6), ANYMUX, V3D_QPU_A_UTOF }, 57201e04c3fSmrg}; 57301e04c3fSmrg 57401e04c3fSmrgstatic const struct opcode_desc mul_ops[] = { 57501e04c3fSmrg { 1, 1, ANYMUX, ANYMUX, V3D_QPU_M_ADD }, 57601e04c3fSmrg { 2, 2, ANYMUX, ANYMUX, V3D_QPU_M_SUB }, 57701e04c3fSmrg { 3, 3, ANYMUX, ANYMUX, V3D_QPU_M_UMUL24 }, 57801e04c3fSmrg { 4, 8, ANYMUX, ANYMUX, V3D_QPU_M_VFMUL }, 57901e04c3fSmrg { 9, 9, ANYMUX, ANYMUX, V3D_QPU_M_SMUL24 }, 58001e04c3fSmrg { 10, 10, ANYMUX, ANYMUX, V3D_QPU_M_MULTOP }, 58101e04c3fSmrg { 14, 14, ANYMUX, ANYMUX, V3D_QPU_M_FMOV }, 58201e04c3fSmrg { 15, 15, MUX_MASK(0, 3), ANYMUX, V3D_QPU_M_FMOV }, 58301e04c3fSmrg { 15, 15, 1 << 4, 1 << 0, V3D_QPU_M_NOP, 0 }, 58401e04c3fSmrg { 15, 15, 1 << 7, ANYMUX, V3D_QPU_M_MOV }, 58501e04c3fSmrg { 16, 63, ANYMUX, ANYMUX, V3D_QPU_M_FMUL }, 58601e04c3fSmrg}; 58701e04c3fSmrg 5887ec681f3Smrg/* Returns true if op_desc should be filtered out based on devinfo->ver 5897ec681f3Smrg * against op_desc->first_ver and op_desc->last_ver. Check notes about 5907ec681f3Smrg * first_ver/last_ver on struct opcode_desc comments. 5917ec681f3Smrg */ 5927ec681f3Smrgstatic bool 5937ec681f3Smrgopcode_invalid_in_version(const struct v3d_device_info *devinfo, 5947ec681f3Smrg const struct opcode_desc *op_desc) 5957ec681f3Smrg{ 5967ec681f3Smrg return (op_desc->first_ver != 0 && devinfo->ver < op_desc->first_ver) || 5977ec681f3Smrg (op_desc->last_ver != 0 && devinfo->ver > op_desc->last_ver); 5987ec681f3Smrg} 5997ec681f3Smrg 60001e04c3fSmrgstatic const struct opcode_desc * 6017ec681f3Smrglookup_opcode_from_packed(const struct v3d_device_info *devinfo, 6027ec681f3Smrg const struct opcode_desc *opcodes, 6037ec681f3Smrg size_t num_opcodes, uint32_t opcode, 6047ec681f3Smrg uint32_t mux_a, uint32_t mux_b) 60501e04c3fSmrg{ 60601e04c3fSmrg for (int i = 0; i < num_opcodes; i++) { 60701e04c3fSmrg const struct opcode_desc *op_desc = &opcodes[i]; 60801e04c3fSmrg 60901e04c3fSmrg if (opcode < op_desc->opcode_first || 61001e04c3fSmrg opcode > op_desc->opcode_last) 61101e04c3fSmrg continue; 61201e04c3fSmrg 6137ec681f3Smrg if (opcode_invalid_in_version(devinfo, op_desc)) 6147ec681f3Smrg continue; 6157ec681f3Smrg 61601e04c3fSmrg if (!(op_desc->mux_b_mask & (1 << mux_b))) 61701e04c3fSmrg continue; 61801e04c3fSmrg 61901e04c3fSmrg if (!(op_desc->mux_a_mask & (1 << mux_a))) 62001e04c3fSmrg continue; 62101e04c3fSmrg 62201e04c3fSmrg return op_desc; 62301e04c3fSmrg } 62401e04c3fSmrg 62501e04c3fSmrg return NULL; 62601e04c3fSmrg} 62701e04c3fSmrg 62801e04c3fSmrgstatic bool 62901e04c3fSmrgv3d_qpu_float32_unpack_unpack(uint32_t packed, 63001e04c3fSmrg enum v3d_qpu_input_unpack *unpacked) 63101e04c3fSmrg{ 63201e04c3fSmrg switch (packed) { 63301e04c3fSmrg case 0: 63401e04c3fSmrg *unpacked = V3D_QPU_UNPACK_ABS; 63501e04c3fSmrg return true; 63601e04c3fSmrg case 1: 63701e04c3fSmrg *unpacked = V3D_QPU_UNPACK_NONE; 63801e04c3fSmrg return true; 63901e04c3fSmrg case 2: 64001e04c3fSmrg *unpacked = V3D_QPU_UNPACK_L; 64101e04c3fSmrg return true; 64201e04c3fSmrg case 3: 64301e04c3fSmrg *unpacked = V3D_QPU_UNPACK_H; 64401e04c3fSmrg return true; 64501e04c3fSmrg default: 64601e04c3fSmrg return false; 64701e04c3fSmrg } 64801e04c3fSmrg} 64901e04c3fSmrg 65001e04c3fSmrgstatic bool 65101e04c3fSmrgv3d_qpu_float32_unpack_pack(enum v3d_qpu_input_unpack unpacked, 65201e04c3fSmrg uint32_t *packed) 65301e04c3fSmrg{ 65401e04c3fSmrg switch (unpacked) { 65501e04c3fSmrg case V3D_QPU_UNPACK_ABS: 65601e04c3fSmrg *packed = 0; 65701e04c3fSmrg return true; 65801e04c3fSmrg case V3D_QPU_UNPACK_NONE: 65901e04c3fSmrg *packed = 1; 66001e04c3fSmrg return true; 66101e04c3fSmrg case V3D_QPU_UNPACK_L: 66201e04c3fSmrg *packed = 2; 66301e04c3fSmrg return true; 66401e04c3fSmrg case V3D_QPU_UNPACK_H: 66501e04c3fSmrg *packed = 3; 66601e04c3fSmrg return true; 66701e04c3fSmrg default: 66801e04c3fSmrg return false; 66901e04c3fSmrg } 67001e04c3fSmrg} 67101e04c3fSmrg 67201e04c3fSmrgstatic bool 67301e04c3fSmrgv3d_qpu_float16_unpack_unpack(uint32_t packed, 67401e04c3fSmrg enum v3d_qpu_input_unpack *unpacked) 67501e04c3fSmrg{ 67601e04c3fSmrg switch (packed) { 67701e04c3fSmrg case 0: 67801e04c3fSmrg *unpacked = V3D_QPU_UNPACK_NONE; 67901e04c3fSmrg return true; 68001e04c3fSmrg case 1: 68101e04c3fSmrg *unpacked = V3D_QPU_UNPACK_REPLICATE_32F_16; 68201e04c3fSmrg return true; 68301e04c3fSmrg case 2: 68401e04c3fSmrg *unpacked = V3D_QPU_UNPACK_REPLICATE_L_16; 68501e04c3fSmrg return true; 68601e04c3fSmrg case 3: 68701e04c3fSmrg *unpacked = V3D_QPU_UNPACK_REPLICATE_H_16; 68801e04c3fSmrg return true; 68901e04c3fSmrg case 4: 69001e04c3fSmrg *unpacked = V3D_QPU_UNPACK_SWAP_16; 69101e04c3fSmrg return true; 69201e04c3fSmrg default: 69301e04c3fSmrg return false; 69401e04c3fSmrg } 69501e04c3fSmrg} 69601e04c3fSmrg 69701e04c3fSmrgstatic bool 69801e04c3fSmrgv3d_qpu_float16_unpack_pack(enum v3d_qpu_input_unpack unpacked, 69901e04c3fSmrg uint32_t *packed) 70001e04c3fSmrg{ 70101e04c3fSmrg switch (unpacked) { 70201e04c3fSmrg case V3D_QPU_UNPACK_NONE: 70301e04c3fSmrg *packed = 0; 70401e04c3fSmrg return true; 70501e04c3fSmrg case V3D_QPU_UNPACK_REPLICATE_32F_16: 70601e04c3fSmrg *packed = 1; 70701e04c3fSmrg return true; 70801e04c3fSmrg case V3D_QPU_UNPACK_REPLICATE_L_16: 70901e04c3fSmrg *packed = 2; 71001e04c3fSmrg return true; 71101e04c3fSmrg case V3D_QPU_UNPACK_REPLICATE_H_16: 71201e04c3fSmrg *packed = 3; 71301e04c3fSmrg return true; 71401e04c3fSmrg case V3D_QPU_UNPACK_SWAP_16: 71501e04c3fSmrg *packed = 4; 71601e04c3fSmrg return true; 71701e04c3fSmrg default: 71801e04c3fSmrg return false; 71901e04c3fSmrg } 72001e04c3fSmrg} 72101e04c3fSmrg 72201e04c3fSmrgstatic bool 72301e04c3fSmrgv3d_qpu_float32_pack_pack(enum v3d_qpu_input_unpack unpacked, 72401e04c3fSmrg uint32_t *packed) 72501e04c3fSmrg{ 72601e04c3fSmrg switch (unpacked) { 72701e04c3fSmrg case V3D_QPU_PACK_NONE: 72801e04c3fSmrg *packed = 0; 72901e04c3fSmrg return true; 73001e04c3fSmrg case V3D_QPU_PACK_L: 73101e04c3fSmrg *packed = 1; 73201e04c3fSmrg return true; 73301e04c3fSmrg case V3D_QPU_PACK_H: 73401e04c3fSmrg *packed = 2; 73501e04c3fSmrg return true; 73601e04c3fSmrg default: 73701e04c3fSmrg return false; 73801e04c3fSmrg } 73901e04c3fSmrg} 74001e04c3fSmrg 74101e04c3fSmrgstatic bool 74201e04c3fSmrgv3d_qpu_add_unpack(const struct v3d_device_info *devinfo, uint64_t packed_inst, 74301e04c3fSmrg struct v3d_qpu_instr *instr) 74401e04c3fSmrg{ 7457ec681f3Smrg uint32_t op = QPU_GET_FIELD(packed_inst, V3D_QPU_OP_ADD); 7467ec681f3Smrg uint32_t mux_a = QPU_GET_FIELD(packed_inst, V3D_QPU_ADD_A); 7477ec681f3Smrg uint32_t mux_b = QPU_GET_FIELD(packed_inst, V3D_QPU_ADD_B); 74801e04c3fSmrg uint32_t waddr = QPU_GET_FIELD(packed_inst, V3D_QPU_WADDR_A); 74901e04c3fSmrg 75001e04c3fSmrg uint32_t map_op = op; 75101e04c3fSmrg /* Some big clusters of opcodes are replicated with unpack 75201e04c3fSmrg * flags 75301e04c3fSmrg */ 75401e04c3fSmrg if (map_op >= 249 && map_op <= 251) 75501e04c3fSmrg map_op = (map_op - 249 + 245); 75601e04c3fSmrg if (map_op >= 253 && map_op <= 255) 75701e04c3fSmrg map_op = (map_op - 253 + 245); 75801e04c3fSmrg 75901e04c3fSmrg const struct opcode_desc *desc = 7607ec681f3Smrg lookup_opcode_from_packed(devinfo, add_ops, ARRAY_SIZE(add_ops), 7617ec681f3Smrg map_op, mux_a, mux_b); 7627ec681f3Smrg 76301e04c3fSmrg if (!desc) 76401e04c3fSmrg return false; 76501e04c3fSmrg 76601e04c3fSmrg instr->alu.add.op = desc->op; 76701e04c3fSmrg 76801e04c3fSmrg /* FADD/FADDNF and FMIN/FMAX are determined by the orders of the 76901e04c3fSmrg * operands. 77001e04c3fSmrg */ 77101e04c3fSmrg if (((op >> 2) & 3) * 8 + mux_a > (op & 3) * 8 + mux_b) { 77201e04c3fSmrg if (instr->alu.add.op == V3D_QPU_A_FMIN) 77301e04c3fSmrg instr->alu.add.op = V3D_QPU_A_FMAX; 77401e04c3fSmrg if (instr->alu.add.op == V3D_QPU_A_FADD) 77501e04c3fSmrg instr->alu.add.op = V3D_QPU_A_FADDNF; 77601e04c3fSmrg } 77701e04c3fSmrg 77801e04c3fSmrg /* Some QPU ops require a bit more than just basic opcode and mux a/b 77901e04c3fSmrg * comparisons to distinguish them. 78001e04c3fSmrg */ 78101e04c3fSmrg switch (instr->alu.add.op) { 78201e04c3fSmrg case V3D_QPU_A_STVPMV: 78301e04c3fSmrg case V3D_QPU_A_STVPMD: 78401e04c3fSmrg case V3D_QPU_A_STVPMP: 78501e04c3fSmrg switch (waddr) { 78601e04c3fSmrg case 0: 78701e04c3fSmrg instr->alu.add.op = V3D_QPU_A_STVPMV; 78801e04c3fSmrg break; 78901e04c3fSmrg case 1: 79001e04c3fSmrg instr->alu.add.op = V3D_QPU_A_STVPMD; 79101e04c3fSmrg break; 79201e04c3fSmrg case 2: 79301e04c3fSmrg instr->alu.add.op = V3D_QPU_A_STVPMP; 79401e04c3fSmrg break; 79501e04c3fSmrg default: 79601e04c3fSmrg return false; 79701e04c3fSmrg } 79801e04c3fSmrg break; 79901e04c3fSmrg default: 80001e04c3fSmrg break; 80101e04c3fSmrg } 80201e04c3fSmrg 80301e04c3fSmrg switch (instr->alu.add.op) { 80401e04c3fSmrg case V3D_QPU_A_FADD: 80501e04c3fSmrg case V3D_QPU_A_FADDNF: 80601e04c3fSmrg case V3D_QPU_A_FSUB: 80701e04c3fSmrg case V3D_QPU_A_FMIN: 80801e04c3fSmrg case V3D_QPU_A_FMAX: 80901e04c3fSmrg case V3D_QPU_A_FCMP: 810ed98bd31Smaya case V3D_QPU_A_VFPACK: 811ed98bd31Smaya if (instr->alu.add.op != V3D_QPU_A_VFPACK) 812ed98bd31Smaya instr->alu.add.output_pack = (op >> 4) & 0x3; 813ed98bd31Smaya else 814ed98bd31Smaya instr->alu.add.output_pack = V3D_QPU_PACK_NONE; 81501e04c3fSmrg 81601e04c3fSmrg if (!v3d_qpu_float32_unpack_unpack((op >> 2) & 0x3, 81701e04c3fSmrg &instr->alu.add.a_unpack)) { 81801e04c3fSmrg return false; 81901e04c3fSmrg } 82001e04c3fSmrg 82101e04c3fSmrg if (!v3d_qpu_float32_unpack_unpack((op >> 0) & 0x3, 82201e04c3fSmrg &instr->alu.add.b_unpack)) { 82301e04c3fSmrg return false; 82401e04c3fSmrg } 82501e04c3fSmrg break; 82601e04c3fSmrg 82701e04c3fSmrg case V3D_QPU_A_FFLOOR: 82801e04c3fSmrg case V3D_QPU_A_FROUND: 82901e04c3fSmrg case V3D_QPU_A_FTRUNC: 83001e04c3fSmrg case V3D_QPU_A_FCEIL: 83101e04c3fSmrg case V3D_QPU_A_FDX: 83201e04c3fSmrg case V3D_QPU_A_FDY: 83301e04c3fSmrg instr->alu.add.output_pack = mux_b & 0x3; 83401e04c3fSmrg 83501e04c3fSmrg if (!v3d_qpu_float32_unpack_unpack((op >> 2) & 0x3, 83601e04c3fSmrg &instr->alu.add.a_unpack)) { 83701e04c3fSmrg return false; 83801e04c3fSmrg } 83901e04c3fSmrg break; 84001e04c3fSmrg 84101e04c3fSmrg case V3D_QPU_A_FTOIN: 84201e04c3fSmrg case V3D_QPU_A_FTOIZ: 84301e04c3fSmrg case V3D_QPU_A_FTOUZ: 84401e04c3fSmrg case V3D_QPU_A_FTOC: 84501e04c3fSmrg instr->alu.add.output_pack = V3D_QPU_PACK_NONE; 84601e04c3fSmrg 84701e04c3fSmrg if (!v3d_qpu_float32_unpack_unpack((op >> 2) & 0x3, 84801e04c3fSmrg &instr->alu.add.a_unpack)) { 84901e04c3fSmrg return false; 85001e04c3fSmrg } 85101e04c3fSmrg break; 85201e04c3fSmrg 85301e04c3fSmrg case V3D_QPU_A_VFMIN: 85401e04c3fSmrg case V3D_QPU_A_VFMAX: 85501e04c3fSmrg if (!v3d_qpu_float16_unpack_unpack(op & 0x7, 85601e04c3fSmrg &instr->alu.add.a_unpack)) { 85701e04c3fSmrg return false; 85801e04c3fSmrg } 85901e04c3fSmrg 86001e04c3fSmrg instr->alu.add.output_pack = V3D_QPU_PACK_NONE; 86101e04c3fSmrg instr->alu.add.b_unpack = V3D_QPU_UNPACK_NONE; 86201e04c3fSmrg break; 86301e04c3fSmrg 86401e04c3fSmrg default: 86501e04c3fSmrg instr->alu.add.output_pack = V3D_QPU_PACK_NONE; 86601e04c3fSmrg instr->alu.add.a_unpack = V3D_QPU_UNPACK_NONE; 86701e04c3fSmrg instr->alu.add.b_unpack = V3D_QPU_UNPACK_NONE; 86801e04c3fSmrg break; 86901e04c3fSmrg } 87001e04c3fSmrg 87101e04c3fSmrg instr->alu.add.a = mux_a; 87201e04c3fSmrg instr->alu.add.b = mux_b; 87301e04c3fSmrg instr->alu.add.waddr = QPU_GET_FIELD(packed_inst, V3D_QPU_WADDR_A); 87401e04c3fSmrg 87501e04c3fSmrg instr->alu.add.magic_write = false; 8767ec681f3Smrg if (packed_inst & V3D_QPU_MA) { 87701e04c3fSmrg switch (instr->alu.add.op) { 87801e04c3fSmrg case V3D_QPU_A_LDVPMV_IN: 87901e04c3fSmrg instr->alu.add.op = V3D_QPU_A_LDVPMV_OUT; 88001e04c3fSmrg break; 88101e04c3fSmrg case V3D_QPU_A_LDVPMD_IN: 88201e04c3fSmrg instr->alu.add.op = V3D_QPU_A_LDVPMD_OUT; 88301e04c3fSmrg break; 88401e04c3fSmrg case V3D_QPU_A_LDVPMG_IN: 88501e04c3fSmrg instr->alu.add.op = V3D_QPU_A_LDVPMG_OUT; 88601e04c3fSmrg break; 88701e04c3fSmrg default: 88801e04c3fSmrg instr->alu.add.magic_write = true; 88901e04c3fSmrg break; 89001e04c3fSmrg } 89101e04c3fSmrg } 89201e04c3fSmrg 89301e04c3fSmrg return true; 89401e04c3fSmrg} 89501e04c3fSmrg 89601e04c3fSmrgstatic bool 89701e04c3fSmrgv3d_qpu_mul_unpack(const struct v3d_device_info *devinfo, uint64_t packed_inst, 89801e04c3fSmrg struct v3d_qpu_instr *instr) 89901e04c3fSmrg{ 9007ec681f3Smrg uint32_t op = QPU_GET_FIELD(packed_inst, V3D_QPU_OP_MUL); 9017ec681f3Smrg uint32_t mux_a = QPU_GET_FIELD(packed_inst, V3D_QPU_MUL_A); 9027ec681f3Smrg uint32_t mux_b = QPU_GET_FIELD(packed_inst, V3D_QPU_MUL_B); 90301e04c3fSmrg 90401e04c3fSmrg { 90501e04c3fSmrg const struct opcode_desc *desc = 9067ec681f3Smrg lookup_opcode_from_packed(devinfo, mul_ops, 9077ec681f3Smrg ARRAY_SIZE(mul_ops), 9087ec681f3Smrg op, mux_a, mux_b); 90901e04c3fSmrg if (!desc) 91001e04c3fSmrg return false; 91101e04c3fSmrg 91201e04c3fSmrg instr->alu.mul.op = desc->op; 91301e04c3fSmrg } 91401e04c3fSmrg 91501e04c3fSmrg switch (instr->alu.mul.op) { 91601e04c3fSmrg case V3D_QPU_M_FMUL: 91701e04c3fSmrg instr->alu.mul.output_pack = ((op >> 4) & 0x3) - 1; 91801e04c3fSmrg 91901e04c3fSmrg if (!v3d_qpu_float32_unpack_unpack((op >> 2) & 0x3, 92001e04c3fSmrg &instr->alu.mul.a_unpack)) { 92101e04c3fSmrg return false; 92201e04c3fSmrg } 92301e04c3fSmrg 92401e04c3fSmrg if (!v3d_qpu_float32_unpack_unpack((op >> 0) & 0x3, 92501e04c3fSmrg &instr->alu.mul.b_unpack)) { 92601e04c3fSmrg return false; 92701e04c3fSmrg } 92801e04c3fSmrg 92901e04c3fSmrg break; 93001e04c3fSmrg 93101e04c3fSmrg case V3D_QPU_M_FMOV: 93201e04c3fSmrg instr->alu.mul.output_pack = (((op & 1) << 1) + 93301e04c3fSmrg ((mux_b >> 2) & 1)); 93401e04c3fSmrg 93501e04c3fSmrg if (!v3d_qpu_float32_unpack_unpack(mux_b & 0x3, 93601e04c3fSmrg &instr->alu.mul.a_unpack)) { 93701e04c3fSmrg return false; 93801e04c3fSmrg } 93901e04c3fSmrg 94001e04c3fSmrg break; 94101e04c3fSmrg 94201e04c3fSmrg case V3D_QPU_M_VFMUL: 94301e04c3fSmrg instr->alu.mul.output_pack = V3D_QPU_PACK_NONE; 94401e04c3fSmrg 94501e04c3fSmrg if (!v3d_qpu_float16_unpack_unpack(((op & 0x7) - 4) & 7, 94601e04c3fSmrg &instr->alu.mul.a_unpack)) { 94701e04c3fSmrg return false; 94801e04c3fSmrg } 94901e04c3fSmrg 95001e04c3fSmrg instr->alu.mul.b_unpack = V3D_QPU_UNPACK_NONE; 95101e04c3fSmrg 95201e04c3fSmrg break; 95301e04c3fSmrg 95401e04c3fSmrg default: 95501e04c3fSmrg instr->alu.mul.output_pack = V3D_QPU_PACK_NONE; 95601e04c3fSmrg instr->alu.mul.a_unpack = V3D_QPU_UNPACK_NONE; 95701e04c3fSmrg instr->alu.mul.b_unpack = V3D_QPU_UNPACK_NONE; 95801e04c3fSmrg break; 95901e04c3fSmrg } 96001e04c3fSmrg 96101e04c3fSmrg instr->alu.mul.a = mux_a; 96201e04c3fSmrg instr->alu.mul.b = mux_b; 96301e04c3fSmrg instr->alu.mul.waddr = QPU_GET_FIELD(packed_inst, V3D_QPU_WADDR_M); 9647ec681f3Smrg instr->alu.mul.magic_write = packed_inst & V3D_QPU_MM; 96501e04c3fSmrg 96601e04c3fSmrg return true; 96701e04c3fSmrg} 96801e04c3fSmrg 9697ec681f3Smrgstatic const struct opcode_desc * 9707ec681f3Smrglookup_opcode_from_instr(const struct v3d_device_info *devinfo, 9717ec681f3Smrg const struct opcode_desc *opcodes, size_t num_opcodes, 9727ec681f3Smrg uint8_t op) 9737ec681f3Smrg{ 9747ec681f3Smrg for (int i = 0; i < num_opcodes; i++) { 9757ec681f3Smrg const struct opcode_desc *op_desc = &opcodes[i]; 9767ec681f3Smrg 9777ec681f3Smrg if (op_desc->op != op) 9787ec681f3Smrg continue; 9797ec681f3Smrg 9807ec681f3Smrg if (opcode_invalid_in_version(devinfo, op_desc)) 9817ec681f3Smrg continue; 9827ec681f3Smrg 9837ec681f3Smrg return op_desc; 9847ec681f3Smrg } 9857ec681f3Smrg 9867ec681f3Smrg return NULL; 9877ec681f3Smrg} 9887ec681f3Smrg 98901e04c3fSmrgstatic bool 99001e04c3fSmrgv3d_qpu_add_pack(const struct v3d_device_info *devinfo, 99101e04c3fSmrg const struct v3d_qpu_instr *instr, uint64_t *packed_instr) 99201e04c3fSmrg{ 99301e04c3fSmrg uint32_t waddr = instr->alu.add.waddr; 99401e04c3fSmrg uint32_t mux_a = instr->alu.add.a; 99501e04c3fSmrg uint32_t mux_b = instr->alu.add.b; 99601e04c3fSmrg int nsrc = v3d_qpu_add_op_num_src(instr->alu.add.op); 9977ec681f3Smrg const struct opcode_desc *desc = 9987ec681f3Smrg lookup_opcode_from_instr(devinfo, add_ops, ARRAY_SIZE(add_ops), 9997ec681f3Smrg instr->alu.add.op); 100001e04c3fSmrg 10017ec681f3Smrg if (!desc) 100201e04c3fSmrg return false; 100301e04c3fSmrg 10047ec681f3Smrg uint32_t opcode = desc->opcode_first; 100501e04c3fSmrg 100601e04c3fSmrg /* If an operation doesn't use an arg, its mux values may be used to 100701e04c3fSmrg * identify the operation type. 100801e04c3fSmrg */ 100901e04c3fSmrg if (nsrc < 2) 101001e04c3fSmrg mux_b = ffs(desc->mux_b_mask) - 1; 101101e04c3fSmrg 101201e04c3fSmrg if (nsrc < 1) 101301e04c3fSmrg mux_a = ffs(desc->mux_a_mask) - 1; 101401e04c3fSmrg 101501e04c3fSmrg bool no_magic_write = false; 101601e04c3fSmrg 101701e04c3fSmrg switch (instr->alu.add.op) { 101801e04c3fSmrg case V3D_QPU_A_STVPMV: 101901e04c3fSmrg waddr = 0; 102001e04c3fSmrg no_magic_write = true; 102101e04c3fSmrg break; 102201e04c3fSmrg case V3D_QPU_A_STVPMD: 102301e04c3fSmrg waddr = 1; 102401e04c3fSmrg no_magic_write = true; 102501e04c3fSmrg break; 102601e04c3fSmrg case V3D_QPU_A_STVPMP: 102701e04c3fSmrg waddr = 2; 102801e04c3fSmrg no_magic_write = true; 102901e04c3fSmrg break; 103001e04c3fSmrg 103101e04c3fSmrg case V3D_QPU_A_LDVPMV_IN: 103201e04c3fSmrg case V3D_QPU_A_LDVPMD_IN: 103301e04c3fSmrg case V3D_QPU_A_LDVPMP: 103401e04c3fSmrg case V3D_QPU_A_LDVPMG_IN: 103501e04c3fSmrg assert(!instr->alu.add.magic_write); 103601e04c3fSmrg break; 103701e04c3fSmrg 103801e04c3fSmrg case V3D_QPU_A_LDVPMV_OUT: 103901e04c3fSmrg case V3D_QPU_A_LDVPMD_OUT: 104001e04c3fSmrg case V3D_QPU_A_LDVPMG_OUT: 104101e04c3fSmrg assert(!instr->alu.add.magic_write); 10427ec681f3Smrg *packed_instr |= V3D_QPU_MA; 104301e04c3fSmrg break; 104401e04c3fSmrg 104501e04c3fSmrg default: 104601e04c3fSmrg break; 104701e04c3fSmrg } 104801e04c3fSmrg 104901e04c3fSmrg switch (instr->alu.add.op) { 105001e04c3fSmrg case V3D_QPU_A_FADD: 105101e04c3fSmrg case V3D_QPU_A_FADDNF: 105201e04c3fSmrg case V3D_QPU_A_FSUB: 105301e04c3fSmrg case V3D_QPU_A_FMIN: 105401e04c3fSmrg case V3D_QPU_A_FMAX: 105501e04c3fSmrg case V3D_QPU_A_FCMP: { 105601e04c3fSmrg uint32_t output_pack; 105701e04c3fSmrg uint32_t a_unpack; 105801e04c3fSmrg uint32_t b_unpack; 105901e04c3fSmrg 106001e04c3fSmrg if (!v3d_qpu_float32_pack_pack(instr->alu.add.output_pack, 106101e04c3fSmrg &output_pack)) { 106201e04c3fSmrg return false; 106301e04c3fSmrg } 106401e04c3fSmrg opcode |= output_pack << 4; 106501e04c3fSmrg 106601e04c3fSmrg if (!v3d_qpu_float32_unpack_pack(instr->alu.add.a_unpack, 106701e04c3fSmrg &a_unpack)) { 106801e04c3fSmrg return false; 106901e04c3fSmrg } 107001e04c3fSmrg 107101e04c3fSmrg if (!v3d_qpu_float32_unpack_pack(instr->alu.add.b_unpack, 107201e04c3fSmrg &b_unpack)) { 107301e04c3fSmrg return false; 107401e04c3fSmrg } 107501e04c3fSmrg 107601e04c3fSmrg /* These operations with commutative operands are 107701e04c3fSmrg * distinguished by which order their operands come in. 107801e04c3fSmrg */ 107901e04c3fSmrg bool ordering = a_unpack * 8 + mux_a > b_unpack * 8 + mux_b; 108001e04c3fSmrg if (((instr->alu.add.op == V3D_QPU_A_FMIN || 108101e04c3fSmrg instr->alu.add.op == V3D_QPU_A_FADD) && ordering) || 108201e04c3fSmrg ((instr->alu.add.op == V3D_QPU_A_FMAX || 108301e04c3fSmrg instr->alu.add.op == V3D_QPU_A_FADDNF) && !ordering)) { 108401e04c3fSmrg uint32_t temp; 108501e04c3fSmrg 108601e04c3fSmrg temp = a_unpack; 108701e04c3fSmrg a_unpack = b_unpack; 108801e04c3fSmrg b_unpack = temp; 108901e04c3fSmrg 109001e04c3fSmrg temp = mux_a; 109101e04c3fSmrg mux_a = mux_b; 109201e04c3fSmrg mux_b = temp; 109301e04c3fSmrg } 109401e04c3fSmrg 109501e04c3fSmrg opcode |= a_unpack << 2; 109601e04c3fSmrg opcode |= b_unpack << 0; 1097ed98bd31Smaya 1098ed98bd31Smaya break; 1099ed98bd31Smaya } 1100ed98bd31Smaya 1101ed98bd31Smaya case V3D_QPU_A_VFPACK: { 1102ed98bd31Smaya uint32_t a_unpack; 1103ed98bd31Smaya uint32_t b_unpack; 1104ed98bd31Smaya 1105ed98bd31Smaya if (instr->alu.add.a_unpack == V3D_QPU_UNPACK_ABS || 1106ed98bd31Smaya instr->alu.add.b_unpack == V3D_QPU_UNPACK_ABS) { 1107ed98bd31Smaya return false; 1108ed98bd31Smaya } 1109ed98bd31Smaya 1110ed98bd31Smaya if (!v3d_qpu_float32_unpack_pack(instr->alu.add.a_unpack, 1111ed98bd31Smaya &a_unpack)) { 1112ed98bd31Smaya return false; 1113ed98bd31Smaya } 1114ed98bd31Smaya 1115ed98bd31Smaya if (!v3d_qpu_float32_unpack_pack(instr->alu.add.b_unpack, 1116ed98bd31Smaya &b_unpack)) { 1117ed98bd31Smaya return false; 1118ed98bd31Smaya } 1119ed98bd31Smaya 1120ed98bd31Smaya opcode = (opcode & ~(1 << 2)) | (a_unpack << 2); 1121ed98bd31Smaya opcode = (opcode & ~(1 << 0)) | (b_unpack << 0); 1122ed98bd31Smaya 112301e04c3fSmrg break; 112401e04c3fSmrg } 112501e04c3fSmrg 112601e04c3fSmrg case V3D_QPU_A_FFLOOR: 112701e04c3fSmrg case V3D_QPU_A_FROUND: 112801e04c3fSmrg case V3D_QPU_A_FTRUNC: 112901e04c3fSmrg case V3D_QPU_A_FCEIL: 113001e04c3fSmrg case V3D_QPU_A_FDX: 113101e04c3fSmrg case V3D_QPU_A_FDY: { 113201e04c3fSmrg uint32_t packed; 113301e04c3fSmrg 113401e04c3fSmrg if (!v3d_qpu_float32_pack_pack(instr->alu.add.output_pack, 113501e04c3fSmrg &packed)) { 113601e04c3fSmrg return false; 113701e04c3fSmrg } 113801e04c3fSmrg mux_b |= packed; 113901e04c3fSmrg 114001e04c3fSmrg if (!v3d_qpu_float32_unpack_pack(instr->alu.add.a_unpack, 114101e04c3fSmrg &packed)) { 114201e04c3fSmrg return false; 114301e04c3fSmrg } 114401e04c3fSmrg if (packed == 0) 114501e04c3fSmrg return false; 1146ed98bd31Smaya opcode = (opcode & ~(1 << 2)) | packed << 2; 114701e04c3fSmrg break; 114801e04c3fSmrg } 114901e04c3fSmrg 115001e04c3fSmrg case V3D_QPU_A_FTOIN: 115101e04c3fSmrg case V3D_QPU_A_FTOIZ: 115201e04c3fSmrg case V3D_QPU_A_FTOUZ: 115301e04c3fSmrg case V3D_QPU_A_FTOC: 115401e04c3fSmrg if (instr->alu.add.output_pack != V3D_QPU_PACK_NONE) 115501e04c3fSmrg return false; 115601e04c3fSmrg 115701e04c3fSmrg uint32_t packed; 115801e04c3fSmrg if (!v3d_qpu_float32_unpack_pack(instr->alu.add.a_unpack, 115901e04c3fSmrg &packed)) { 116001e04c3fSmrg return false; 116101e04c3fSmrg } 116201e04c3fSmrg if (packed == 0) 116301e04c3fSmrg return false; 116401e04c3fSmrg opcode |= packed << 2; 116501e04c3fSmrg 116601e04c3fSmrg break; 116701e04c3fSmrg 116801e04c3fSmrg case V3D_QPU_A_VFMIN: 116901e04c3fSmrg case V3D_QPU_A_VFMAX: 117001e04c3fSmrg if (instr->alu.add.output_pack != V3D_QPU_PACK_NONE || 117101e04c3fSmrg instr->alu.add.b_unpack != V3D_QPU_UNPACK_NONE) { 117201e04c3fSmrg return false; 117301e04c3fSmrg } 117401e04c3fSmrg 117501e04c3fSmrg if (!v3d_qpu_float16_unpack_pack(instr->alu.add.a_unpack, 117601e04c3fSmrg &packed)) { 117701e04c3fSmrg return false; 117801e04c3fSmrg } 117901e04c3fSmrg opcode |= packed; 118001e04c3fSmrg break; 118101e04c3fSmrg 118201e04c3fSmrg default: 118301e04c3fSmrg if (instr->alu.add.op != V3D_QPU_A_NOP && 118401e04c3fSmrg (instr->alu.add.output_pack != V3D_QPU_PACK_NONE || 118501e04c3fSmrg instr->alu.add.a_unpack != V3D_QPU_UNPACK_NONE || 118601e04c3fSmrg instr->alu.add.b_unpack != V3D_QPU_UNPACK_NONE)) { 118701e04c3fSmrg return false; 118801e04c3fSmrg } 118901e04c3fSmrg break; 119001e04c3fSmrg } 119101e04c3fSmrg 11927ec681f3Smrg *packed_instr |= QPU_SET_FIELD(mux_a, V3D_QPU_ADD_A); 11937ec681f3Smrg *packed_instr |= QPU_SET_FIELD(mux_b, V3D_QPU_ADD_B); 11947ec681f3Smrg *packed_instr |= QPU_SET_FIELD(opcode, V3D_QPU_OP_ADD); 119501e04c3fSmrg *packed_instr |= QPU_SET_FIELD(waddr, V3D_QPU_WADDR_A); 119601e04c3fSmrg if (instr->alu.add.magic_write && !no_magic_write) 11977ec681f3Smrg *packed_instr |= V3D_QPU_MA; 119801e04c3fSmrg 119901e04c3fSmrg return true; 120001e04c3fSmrg} 120101e04c3fSmrg 120201e04c3fSmrgstatic bool 120301e04c3fSmrgv3d_qpu_mul_pack(const struct v3d_device_info *devinfo, 120401e04c3fSmrg const struct v3d_qpu_instr *instr, uint64_t *packed_instr) 120501e04c3fSmrg{ 120601e04c3fSmrg uint32_t mux_a = instr->alu.mul.a; 120701e04c3fSmrg uint32_t mux_b = instr->alu.mul.b; 120801e04c3fSmrg int nsrc = v3d_qpu_mul_op_num_src(instr->alu.mul.op); 120901e04c3fSmrg 12107ec681f3Smrg const struct opcode_desc *desc = 12117ec681f3Smrg lookup_opcode_from_instr(devinfo, mul_ops, ARRAY_SIZE(mul_ops), 12127ec681f3Smrg instr->alu.mul.op); 12137ec681f3Smrg 12147ec681f3Smrg if (!desc) 121501e04c3fSmrg return false; 121601e04c3fSmrg 121701e04c3fSmrg uint32_t opcode = desc->opcode_first; 121801e04c3fSmrg 121901e04c3fSmrg /* Some opcodes have a single valid value for their mux a/b, so set 122001e04c3fSmrg * that here. If mux a/b determine packing, it will be set below. 122101e04c3fSmrg */ 122201e04c3fSmrg if (nsrc < 2) 122301e04c3fSmrg mux_b = ffs(desc->mux_b_mask) - 1; 122401e04c3fSmrg 122501e04c3fSmrg if (nsrc < 1) 122601e04c3fSmrg mux_a = ffs(desc->mux_a_mask) - 1; 122701e04c3fSmrg 122801e04c3fSmrg switch (instr->alu.mul.op) { 122901e04c3fSmrg case V3D_QPU_M_FMUL: { 123001e04c3fSmrg uint32_t packed; 123101e04c3fSmrg 123201e04c3fSmrg if (!v3d_qpu_float32_pack_pack(instr->alu.mul.output_pack, 123301e04c3fSmrg &packed)) { 123401e04c3fSmrg return false; 123501e04c3fSmrg } 123601e04c3fSmrg /* No need for a +1 because desc->opcode_first has a 1 in this 123701e04c3fSmrg * field. 123801e04c3fSmrg */ 123901e04c3fSmrg opcode += packed << 4; 124001e04c3fSmrg 124101e04c3fSmrg if (!v3d_qpu_float32_unpack_pack(instr->alu.mul.a_unpack, 124201e04c3fSmrg &packed)) { 124301e04c3fSmrg return false; 124401e04c3fSmrg } 124501e04c3fSmrg opcode |= packed << 2; 124601e04c3fSmrg 124701e04c3fSmrg if (!v3d_qpu_float32_unpack_pack(instr->alu.mul.b_unpack, 124801e04c3fSmrg &packed)) { 124901e04c3fSmrg return false; 125001e04c3fSmrg } 125101e04c3fSmrg opcode |= packed << 0; 125201e04c3fSmrg break; 125301e04c3fSmrg } 125401e04c3fSmrg 125501e04c3fSmrg case V3D_QPU_M_FMOV: { 125601e04c3fSmrg uint32_t packed; 125701e04c3fSmrg 125801e04c3fSmrg if (!v3d_qpu_float32_pack_pack(instr->alu.mul.output_pack, 125901e04c3fSmrg &packed)) { 126001e04c3fSmrg return false; 126101e04c3fSmrg } 126201e04c3fSmrg opcode |= (packed >> 1) & 1; 126301e04c3fSmrg mux_b = (packed & 1) << 2; 126401e04c3fSmrg 126501e04c3fSmrg if (!v3d_qpu_float32_unpack_pack(instr->alu.mul.a_unpack, 126601e04c3fSmrg &packed)) { 126701e04c3fSmrg return false; 126801e04c3fSmrg } 126901e04c3fSmrg mux_b |= packed; 127001e04c3fSmrg break; 127101e04c3fSmrg } 127201e04c3fSmrg 127301e04c3fSmrg case V3D_QPU_M_VFMUL: { 127401e04c3fSmrg uint32_t packed; 127501e04c3fSmrg 127601e04c3fSmrg if (instr->alu.mul.output_pack != V3D_QPU_PACK_NONE) 127701e04c3fSmrg return false; 127801e04c3fSmrg 127901e04c3fSmrg if (!v3d_qpu_float16_unpack_pack(instr->alu.mul.a_unpack, 128001e04c3fSmrg &packed)) { 128101e04c3fSmrg return false; 128201e04c3fSmrg } 128301e04c3fSmrg if (instr->alu.mul.a_unpack == V3D_QPU_UNPACK_SWAP_16) 128401e04c3fSmrg opcode = 8; 128501e04c3fSmrg else 128601e04c3fSmrg opcode |= (packed + 4) & 7; 128701e04c3fSmrg 128801e04c3fSmrg if (instr->alu.mul.b_unpack != V3D_QPU_UNPACK_NONE) 128901e04c3fSmrg return false; 129001e04c3fSmrg 129101e04c3fSmrg break; 129201e04c3fSmrg } 129301e04c3fSmrg 129401e04c3fSmrg default: 129501e04c3fSmrg break; 129601e04c3fSmrg } 129701e04c3fSmrg 12987ec681f3Smrg *packed_instr |= QPU_SET_FIELD(mux_a, V3D_QPU_MUL_A); 12997ec681f3Smrg *packed_instr |= QPU_SET_FIELD(mux_b, V3D_QPU_MUL_B); 130001e04c3fSmrg 13017ec681f3Smrg *packed_instr |= QPU_SET_FIELD(opcode, V3D_QPU_OP_MUL); 130201e04c3fSmrg *packed_instr |= QPU_SET_FIELD(instr->alu.mul.waddr, V3D_QPU_WADDR_M); 130301e04c3fSmrg if (instr->alu.mul.magic_write) 13047ec681f3Smrg *packed_instr |= V3D_QPU_MM; 130501e04c3fSmrg 130601e04c3fSmrg return true; 130701e04c3fSmrg} 130801e04c3fSmrg 130901e04c3fSmrgstatic bool 131001e04c3fSmrgv3d_qpu_instr_unpack_alu(const struct v3d_device_info *devinfo, 131101e04c3fSmrg uint64_t packed_instr, 131201e04c3fSmrg struct v3d_qpu_instr *instr) 131301e04c3fSmrg{ 131401e04c3fSmrg instr->type = V3D_QPU_INSTR_TYPE_ALU; 131501e04c3fSmrg 131601e04c3fSmrg if (!v3d_qpu_sig_unpack(devinfo, 13177ec681f3Smrg QPU_GET_FIELD(packed_instr, V3D_QPU_SIG), 131801e04c3fSmrg &instr->sig)) 131901e04c3fSmrg return false; 132001e04c3fSmrg 13217ec681f3Smrg uint32_t packed_cond = QPU_GET_FIELD(packed_instr, V3D_QPU_COND); 132201e04c3fSmrg if (v3d_qpu_sig_writes_address(devinfo, &instr->sig)) { 13237ec681f3Smrg instr->sig_addr = packed_cond & ~V3D_QPU_COND_SIG_MAGIC_ADDR; 13247ec681f3Smrg instr->sig_magic = packed_cond & V3D_QPU_COND_SIG_MAGIC_ADDR; 132501e04c3fSmrg 132601e04c3fSmrg instr->flags.ac = V3D_QPU_COND_NONE; 132701e04c3fSmrg instr->flags.mc = V3D_QPU_COND_NONE; 132801e04c3fSmrg instr->flags.apf = V3D_QPU_PF_NONE; 132901e04c3fSmrg instr->flags.mpf = V3D_QPU_PF_NONE; 133001e04c3fSmrg instr->flags.auf = V3D_QPU_UF_NONE; 133101e04c3fSmrg instr->flags.muf = V3D_QPU_UF_NONE; 133201e04c3fSmrg } else { 133301e04c3fSmrg if (!v3d_qpu_flags_unpack(devinfo, packed_cond, &instr->flags)) 133401e04c3fSmrg return false; 133501e04c3fSmrg } 133601e04c3fSmrg 13377ec681f3Smrg instr->raddr_a = QPU_GET_FIELD(packed_instr, V3D_QPU_RADDR_A); 13387ec681f3Smrg instr->raddr_b = QPU_GET_FIELD(packed_instr, V3D_QPU_RADDR_B); 133901e04c3fSmrg 134001e04c3fSmrg if (!v3d_qpu_add_unpack(devinfo, packed_instr, instr)) 134101e04c3fSmrg return false; 134201e04c3fSmrg 134301e04c3fSmrg if (!v3d_qpu_mul_unpack(devinfo, packed_instr, instr)) 134401e04c3fSmrg return false; 134501e04c3fSmrg 134601e04c3fSmrg return true; 134701e04c3fSmrg} 134801e04c3fSmrg 134901e04c3fSmrgstatic bool 135001e04c3fSmrgv3d_qpu_instr_unpack_branch(const struct v3d_device_info *devinfo, 135101e04c3fSmrg uint64_t packed_instr, 135201e04c3fSmrg struct v3d_qpu_instr *instr) 135301e04c3fSmrg{ 135401e04c3fSmrg instr->type = V3D_QPU_INSTR_TYPE_BRANCH; 135501e04c3fSmrg 13567ec681f3Smrg uint32_t cond = QPU_GET_FIELD(packed_instr, V3D_QPU_BRANCH_COND); 135701e04c3fSmrg if (cond == 0) 135801e04c3fSmrg instr->branch.cond = V3D_QPU_BRANCH_COND_ALWAYS; 135901e04c3fSmrg else if (V3D_QPU_BRANCH_COND_A0 + (cond - 2) <= 136001e04c3fSmrg V3D_QPU_BRANCH_COND_ALLNA) 136101e04c3fSmrg instr->branch.cond = V3D_QPU_BRANCH_COND_A0 + (cond - 2); 136201e04c3fSmrg else 136301e04c3fSmrg return false; 136401e04c3fSmrg 13657ec681f3Smrg uint32_t msfign = QPU_GET_FIELD(packed_instr, V3D_QPU_BRANCH_MSFIGN); 136601e04c3fSmrg if (msfign == 3) 136701e04c3fSmrg return false; 136801e04c3fSmrg instr->branch.msfign = msfign; 136901e04c3fSmrg 13707ec681f3Smrg instr->branch.bdi = QPU_GET_FIELD(packed_instr, V3D_QPU_BRANCH_BDI); 137101e04c3fSmrg 13727ec681f3Smrg instr->branch.ub = packed_instr & V3D_QPU_BRANCH_UB; 137301e04c3fSmrg if (instr->branch.ub) { 137401e04c3fSmrg instr->branch.bdu = QPU_GET_FIELD(packed_instr, 13757ec681f3Smrg V3D_QPU_BRANCH_BDU); 137601e04c3fSmrg } 137701e04c3fSmrg 137801e04c3fSmrg instr->branch.raddr_a = QPU_GET_FIELD(packed_instr, 13797ec681f3Smrg V3D_QPU_RADDR_A); 138001e04c3fSmrg 138101e04c3fSmrg instr->branch.offset = 0; 138201e04c3fSmrg 138301e04c3fSmrg instr->branch.offset += 138401e04c3fSmrg QPU_GET_FIELD(packed_instr, 13857ec681f3Smrg V3D_QPU_BRANCH_ADDR_LOW) << 3; 138601e04c3fSmrg 138701e04c3fSmrg instr->branch.offset += 138801e04c3fSmrg QPU_GET_FIELD(packed_instr, 13897ec681f3Smrg V3D_QPU_BRANCH_ADDR_HIGH) << 24; 139001e04c3fSmrg 139101e04c3fSmrg return true; 139201e04c3fSmrg} 139301e04c3fSmrg 139401e04c3fSmrgbool 139501e04c3fSmrgv3d_qpu_instr_unpack(const struct v3d_device_info *devinfo, 139601e04c3fSmrg uint64_t packed_instr, 139701e04c3fSmrg struct v3d_qpu_instr *instr) 139801e04c3fSmrg{ 13997ec681f3Smrg if (QPU_GET_FIELD(packed_instr, V3D_QPU_OP_MUL) != 0) { 140001e04c3fSmrg return v3d_qpu_instr_unpack_alu(devinfo, packed_instr, instr); 140101e04c3fSmrg } else { 14027ec681f3Smrg uint32_t sig = QPU_GET_FIELD(packed_instr, V3D_QPU_SIG); 140301e04c3fSmrg 140401e04c3fSmrg if ((sig & 24) == 16) { 140501e04c3fSmrg return v3d_qpu_instr_unpack_branch(devinfo, packed_instr, 140601e04c3fSmrg instr); 140701e04c3fSmrg } else { 140801e04c3fSmrg return false; 140901e04c3fSmrg } 141001e04c3fSmrg } 141101e04c3fSmrg} 141201e04c3fSmrg 141301e04c3fSmrgstatic bool 141401e04c3fSmrgv3d_qpu_instr_pack_alu(const struct v3d_device_info *devinfo, 141501e04c3fSmrg const struct v3d_qpu_instr *instr, 141601e04c3fSmrg uint64_t *packed_instr) 141701e04c3fSmrg{ 141801e04c3fSmrg uint32_t sig; 141901e04c3fSmrg if (!v3d_qpu_sig_pack(devinfo, &instr->sig, &sig)) 142001e04c3fSmrg return false; 14217ec681f3Smrg *packed_instr |= QPU_SET_FIELD(sig, V3D_QPU_SIG); 142201e04c3fSmrg 142301e04c3fSmrg if (instr->type == V3D_QPU_INSTR_TYPE_ALU) { 14247ec681f3Smrg *packed_instr |= QPU_SET_FIELD(instr->raddr_a, V3D_QPU_RADDR_A); 14257ec681f3Smrg *packed_instr |= QPU_SET_FIELD(instr->raddr_b, V3D_QPU_RADDR_B); 142601e04c3fSmrg 142701e04c3fSmrg if (!v3d_qpu_add_pack(devinfo, instr, packed_instr)) 142801e04c3fSmrg return false; 142901e04c3fSmrg if (!v3d_qpu_mul_pack(devinfo, instr, packed_instr)) 143001e04c3fSmrg return false; 143101e04c3fSmrg 143201e04c3fSmrg uint32_t flags; 143301e04c3fSmrg if (v3d_qpu_sig_writes_address(devinfo, &instr->sig)) { 143401e04c3fSmrg if (instr->flags.ac != V3D_QPU_COND_NONE || 143501e04c3fSmrg instr->flags.mc != V3D_QPU_COND_NONE || 143601e04c3fSmrg instr->flags.apf != V3D_QPU_PF_NONE || 143701e04c3fSmrg instr->flags.mpf != V3D_QPU_PF_NONE || 143801e04c3fSmrg instr->flags.auf != V3D_QPU_UF_NONE || 143901e04c3fSmrg instr->flags.muf != V3D_QPU_UF_NONE) { 144001e04c3fSmrg return false; 144101e04c3fSmrg } 144201e04c3fSmrg 144301e04c3fSmrg flags = instr->sig_addr; 144401e04c3fSmrg if (instr->sig_magic) 14457ec681f3Smrg flags |= V3D_QPU_COND_SIG_MAGIC_ADDR; 144601e04c3fSmrg } else { 144701e04c3fSmrg if (!v3d_qpu_flags_pack(devinfo, &instr->flags, &flags)) 144801e04c3fSmrg return false; 144901e04c3fSmrg } 145001e04c3fSmrg 14517ec681f3Smrg *packed_instr |= QPU_SET_FIELD(flags, V3D_QPU_COND); 145201e04c3fSmrg } else { 145301e04c3fSmrg if (v3d_qpu_sig_writes_address(devinfo, &instr->sig)) 145401e04c3fSmrg return false; 145501e04c3fSmrg } 145601e04c3fSmrg 145701e04c3fSmrg return true; 145801e04c3fSmrg} 145901e04c3fSmrg 146001e04c3fSmrgstatic bool 146101e04c3fSmrgv3d_qpu_instr_pack_branch(const struct v3d_device_info *devinfo, 146201e04c3fSmrg const struct v3d_qpu_instr *instr, 146301e04c3fSmrg uint64_t *packed_instr) 146401e04c3fSmrg{ 14657ec681f3Smrg *packed_instr |= QPU_SET_FIELD(16, V3D_QPU_SIG); 146601e04c3fSmrg 146701e04c3fSmrg if (instr->branch.cond != V3D_QPU_BRANCH_COND_ALWAYS) { 146801e04c3fSmrg *packed_instr |= QPU_SET_FIELD(2 + (instr->branch.cond - 146901e04c3fSmrg V3D_QPU_BRANCH_COND_A0), 14707ec681f3Smrg V3D_QPU_BRANCH_COND); 147101e04c3fSmrg } 147201e04c3fSmrg 147301e04c3fSmrg *packed_instr |= QPU_SET_FIELD(instr->branch.msfign, 14747ec681f3Smrg V3D_QPU_BRANCH_MSFIGN); 147501e04c3fSmrg 147601e04c3fSmrg *packed_instr |= QPU_SET_FIELD(instr->branch.bdi, 14777ec681f3Smrg V3D_QPU_BRANCH_BDI); 147801e04c3fSmrg 147901e04c3fSmrg if (instr->branch.ub) { 14807ec681f3Smrg *packed_instr |= V3D_QPU_BRANCH_UB; 148101e04c3fSmrg *packed_instr |= QPU_SET_FIELD(instr->branch.bdu, 14827ec681f3Smrg V3D_QPU_BRANCH_BDU); 148301e04c3fSmrg } 148401e04c3fSmrg 148501e04c3fSmrg switch (instr->branch.bdi) { 148601e04c3fSmrg case V3D_QPU_BRANCH_DEST_ABS: 148701e04c3fSmrg case V3D_QPU_BRANCH_DEST_REL: 148801e04c3fSmrg *packed_instr |= QPU_SET_FIELD(instr->branch.msfign, 14897ec681f3Smrg V3D_QPU_BRANCH_MSFIGN); 149001e04c3fSmrg 149101e04c3fSmrg *packed_instr |= QPU_SET_FIELD((instr->branch.offset & 149201e04c3fSmrg ~0xff000000) >> 3, 14937ec681f3Smrg V3D_QPU_BRANCH_ADDR_LOW); 149401e04c3fSmrg 149501e04c3fSmrg *packed_instr |= QPU_SET_FIELD(instr->branch.offset >> 24, 14967ec681f3Smrg V3D_QPU_BRANCH_ADDR_HIGH); 149701e04c3fSmrg break; 149801e04c3fSmrg default: 149901e04c3fSmrg break; 150001e04c3fSmrg } 150101e04c3fSmrg 15027ec681f3Smrg if (instr->branch.bdi == V3D_QPU_BRANCH_DEST_REGFILE || 15037ec681f3Smrg instr->branch.bdu == V3D_QPU_BRANCH_DEST_REGFILE) { 15047ec681f3Smrg *packed_instr |= QPU_SET_FIELD(instr->branch.raddr_a, 15057ec681f3Smrg V3D_QPU_RADDR_A); 15067ec681f3Smrg } 15077ec681f3Smrg 150801e04c3fSmrg return true; 150901e04c3fSmrg} 151001e04c3fSmrg 151101e04c3fSmrgbool 151201e04c3fSmrgv3d_qpu_instr_pack(const struct v3d_device_info *devinfo, 151301e04c3fSmrg const struct v3d_qpu_instr *instr, 151401e04c3fSmrg uint64_t *packed_instr) 151501e04c3fSmrg{ 151601e04c3fSmrg *packed_instr = 0; 151701e04c3fSmrg 151801e04c3fSmrg switch (instr->type) { 151901e04c3fSmrg case V3D_QPU_INSTR_TYPE_ALU: 152001e04c3fSmrg return v3d_qpu_instr_pack_alu(devinfo, instr, packed_instr); 152101e04c3fSmrg case V3D_QPU_INSTR_TYPE_BRANCH: 152201e04c3fSmrg return v3d_qpu_instr_pack_branch(devinfo, instr, packed_instr); 152301e04c3fSmrg default: 152401e04c3fSmrg return false; 152501e04c3fSmrg } 152601e04c3fSmrg} 1527