1b8e80941Smrg/* 2b8e80941Smrg * Copyright © 2016 Broadcom 3b8e80941Smrg * 4b8e80941Smrg * Permission is hereby granted, free of charge, to any person obtaining a 5b8e80941Smrg * copy of this software and associated documentation files (the "Software"), 6b8e80941Smrg * to deal in the Software without restriction, including without limitation 7b8e80941Smrg * the rights to use, copy, modify, merge, publish, distribute, sublicense, 8b8e80941Smrg * and/or sell copies of the Software, and to permit persons to whom the 9b8e80941Smrg * Software is furnished to do so, subject to the following conditions: 10b8e80941Smrg * 11b8e80941Smrg * The above copyright notice and this permission notice (including the next 12b8e80941Smrg * paragraph) shall be included in all copies or substantial portions of the 13b8e80941Smrg * Software. 14b8e80941Smrg * 15b8e80941Smrg * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR 16b8e80941Smrg * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, 17b8e80941Smrg * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL 18b8e80941Smrg * THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER 19b8e80941Smrg * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING 20b8e80941Smrg * FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS 21b8e80941Smrg * IN THE SOFTWARE. 22b8e80941Smrg */ 23b8e80941Smrg 24b8e80941Smrg#include <string.h> 25b8e80941Smrg#include "util/macros.h" 26b8e80941Smrg 27b8e80941Smrg#include "broadcom/common/v3d_device_info.h" 28b8e80941Smrg#include "qpu_instr.h" 29b8e80941Smrg 30b8e80941Smrg#ifndef QPU_MASK 31b8e80941Smrg#define QPU_MASK(high, low) ((((uint64_t)1<<((high)-(low)+1))-1)<<(low)) 32b8e80941Smrg/* Using the GNU statement expression extension */ 33b8e80941Smrg#define QPU_SET_FIELD(value, field) \ 34b8e80941Smrg ({ \ 35b8e80941Smrg uint64_t fieldval = (uint64_t)(value) << field ## _SHIFT; \ 36b8e80941Smrg assert((fieldval & ~ field ## _MASK) == 0); \ 37b8e80941Smrg fieldval & field ## _MASK; \ 38b8e80941Smrg }) 39b8e80941Smrg 40b8e80941Smrg#define QPU_GET_FIELD(word, field) ((uint32_t)(((word) & field ## _MASK) >> field ## _SHIFT)) 41b8e80941Smrg 42b8e80941Smrg#define QPU_UPDATE_FIELD(inst, value, field) \ 43b8e80941Smrg (((inst) & ~(field ## _MASK)) | QPU_SET_FIELD(value, field)) 44b8e80941Smrg#endif /* QPU_MASK */ 45b8e80941Smrg 46b8e80941Smrg#define VC5_QPU_OP_MUL_SHIFT 58 47b8e80941Smrg#define VC5_QPU_OP_MUL_MASK QPU_MASK(63, 58) 48b8e80941Smrg 49b8e80941Smrg#define VC5_QPU_SIG_SHIFT 53 50b8e80941Smrg#define VC5_QPU_SIG_MASK QPU_MASK(57, 53) 51b8e80941Smrg 52b8e80941Smrg#define VC5_QPU_COND_SHIFT 46 53b8e80941Smrg#define VC5_QPU_COND_MASK QPU_MASK(52, 46) 54b8e80941Smrg#define VC5_QPU_COND_SIG_MAGIC_ADDR (1 << 6) 55b8e80941Smrg 56b8e80941Smrg#define VC5_QPU_MM QPU_MASK(45, 45) 57b8e80941Smrg#define VC5_QPU_MA QPU_MASK(44, 44) 58b8e80941Smrg 59b8e80941Smrg#define V3D_QPU_WADDR_M_SHIFT 38 60b8e80941Smrg#define V3D_QPU_WADDR_M_MASK QPU_MASK(43, 38) 61b8e80941Smrg 62b8e80941Smrg#define VC5_QPU_BRANCH_ADDR_LOW_SHIFT 35 63b8e80941Smrg#define VC5_QPU_BRANCH_ADDR_LOW_MASK QPU_MASK(55, 35) 64b8e80941Smrg 65b8e80941Smrg#define V3D_QPU_WADDR_A_SHIFT 32 66b8e80941Smrg#define V3D_QPU_WADDR_A_MASK QPU_MASK(37, 32) 67b8e80941Smrg 68b8e80941Smrg#define VC5_QPU_BRANCH_COND_SHIFT 32 69b8e80941Smrg#define VC5_QPU_BRANCH_COND_MASK QPU_MASK(34, 32) 70b8e80941Smrg 71b8e80941Smrg#define VC5_QPU_BRANCH_ADDR_HIGH_SHIFT 24 72b8e80941Smrg#define VC5_QPU_BRANCH_ADDR_HIGH_MASK QPU_MASK(31, 24) 73b8e80941Smrg 74b8e80941Smrg#define VC5_QPU_OP_ADD_SHIFT 24 75b8e80941Smrg#define VC5_QPU_OP_ADD_MASK QPU_MASK(31, 24) 76b8e80941Smrg 77b8e80941Smrg#define VC5_QPU_MUL_B_SHIFT 21 78b8e80941Smrg#define VC5_QPU_MUL_B_MASK QPU_MASK(23, 21) 79b8e80941Smrg 80b8e80941Smrg#define VC5_QPU_BRANCH_MSFIGN_SHIFT 21 81b8e80941Smrg#define VC5_QPU_BRANCH_MSFIGN_MASK QPU_MASK(22, 21) 82b8e80941Smrg 83b8e80941Smrg#define VC5_QPU_MUL_A_SHIFT 18 84b8e80941Smrg#define VC5_QPU_MUL_A_MASK QPU_MASK(20, 18) 85b8e80941Smrg 86b8e80941Smrg#define VC5_QPU_ADD_B_SHIFT 15 87b8e80941Smrg#define VC5_QPU_ADD_B_MASK QPU_MASK(17, 15) 88b8e80941Smrg 89b8e80941Smrg#define VC5_QPU_BRANCH_BDU_SHIFT 15 90b8e80941Smrg#define VC5_QPU_BRANCH_BDU_MASK QPU_MASK(17, 15) 91b8e80941Smrg 92b8e80941Smrg#define VC5_QPU_BRANCH_UB QPU_MASK(14, 14) 93b8e80941Smrg 94b8e80941Smrg#define VC5_QPU_ADD_A_SHIFT 12 95b8e80941Smrg#define VC5_QPU_ADD_A_MASK QPU_MASK(14, 12) 96b8e80941Smrg 97b8e80941Smrg#define VC5_QPU_BRANCH_BDI_SHIFT 12 98b8e80941Smrg#define VC5_QPU_BRANCH_BDI_MASK QPU_MASK(13, 12) 99b8e80941Smrg 100b8e80941Smrg#define VC5_QPU_RADDR_A_SHIFT 6 101b8e80941Smrg#define VC5_QPU_RADDR_A_MASK QPU_MASK(11, 6) 102b8e80941Smrg 103b8e80941Smrg#define VC5_QPU_RADDR_B_SHIFT 0 104b8e80941Smrg#define VC5_QPU_RADDR_B_MASK QPU_MASK(5, 0) 105b8e80941Smrg 106b8e80941Smrg#define THRSW .thrsw = true 107b8e80941Smrg#define LDUNIF .ldunif = true 108b8e80941Smrg#define LDUNIFRF .ldunifrf = true 109b8e80941Smrg#define LDUNIFA .ldunifa = true 110b8e80941Smrg#define LDUNIFARF .ldunifarf = true 111b8e80941Smrg#define LDTMU .ldtmu = true 112b8e80941Smrg#define LDVARY .ldvary = true 113b8e80941Smrg#define LDVPM .ldvpm = true 114b8e80941Smrg#define SMIMM .small_imm = true 115b8e80941Smrg#define LDTLB .ldtlb = true 116b8e80941Smrg#define LDTLBU .ldtlbu = true 117b8e80941Smrg#define UCB .ucb = true 118b8e80941Smrg#define ROT .rotate = true 119b8e80941Smrg#define WRTMUC .wrtmuc = true 120b8e80941Smrg 121b8e80941Smrgstatic const struct v3d_qpu_sig v33_sig_map[] = { 122b8e80941Smrg /* MISC R3 R4 R5 */ 123b8e80941Smrg [0] = { }, 124b8e80941Smrg [1] = { THRSW, }, 125b8e80941Smrg [2] = { LDUNIF }, 126b8e80941Smrg [3] = { THRSW, LDUNIF }, 127b8e80941Smrg [4] = { LDTMU, }, 128b8e80941Smrg [5] = { THRSW, LDTMU, }, 129b8e80941Smrg [6] = { LDTMU, LDUNIF }, 130b8e80941Smrg [7] = { THRSW, LDTMU, LDUNIF }, 131b8e80941Smrg [8] = { LDVARY, }, 132b8e80941Smrg [9] = { THRSW, LDVARY, }, 133b8e80941Smrg [10] = { LDVARY, LDUNIF }, 134b8e80941Smrg [11] = { THRSW, LDVARY, LDUNIF }, 135b8e80941Smrg [12] = { LDVARY, LDTMU, }, 136b8e80941Smrg [13] = { THRSW, LDVARY, LDTMU, }, 137b8e80941Smrg [14] = { SMIMM, LDVARY, }, 138b8e80941Smrg [15] = { SMIMM, }, 139b8e80941Smrg [16] = { LDTLB, }, 140b8e80941Smrg [17] = { LDTLBU, }, 141b8e80941Smrg /* 18-21 reserved */ 142b8e80941Smrg [22] = { UCB, }, 143b8e80941Smrg [23] = { ROT, }, 144b8e80941Smrg [24] = { LDVPM, }, 145b8e80941Smrg [25] = { THRSW, LDVPM, }, 146b8e80941Smrg [26] = { LDVPM, LDUNIF }, 147b8e80941Smrg [27] = { THRSW, LDVPM, LDUNIF }, 148b8e80941Smrg [28] = { LDVPM, LDTMU, }, 149b8e80941Smrg [29] = { THRSW, LDVPM, LDTMU, }, 150b8e80941Smrg [30] = { SMIMM, LDVPM, }, 151b8e80941Smrg [31] = { SMIMM, }, 152b8e80941Smrg}; 153b8e80941Smrg 154b8e80941Smrgstatic const struct v3d_qpu_sig v40_sig_map[] = { 155b8e80941Smrg /* MISC R3 R4 R5 */ 156b8e80941Smrg [0] = { }, 157b8e80941Smrg [1] = { THRSW, }, 158b8e80941Smrg [2] = { LDUNIF }, 159b8e80941Smrg [3] = { THRSW, LDUNIF }, 160b8e80941Smrg [4] = { LDTMU, }, 161b8e80941Smrg [5] = { THRSW, LDTMU, }, 162b8e80941Smrg [6] = { LDTMU, LDUNIF }, 163b8e80941Smrg [7] = { THRSW, LDTMU, LDUNIF }, 164b8e80941Smrg [8] = { LDVARY, }, 165b8e80941Smrg [9] = { THRSW, LDVARY, }, 166b8e80941Smrg [10] = { LDVARY, LDUNIF }, 167b8e80941Smrg [11] = { THRSW, LDVARY, LDUNIF }, 168b8e80941Smrg /* 12-13 reserved */ 169b8e80941Smrg [14] = { SMIMM, LDVARY, }, 170b8e80941Smrg [15] = { SMIMM, }, 171b8e80941Smrg [16] = { LDTLB, }, 172b8e80941Smrg [17] = { LDTLBU, }, 173b8e80941Smrg [18] = { WRTMUC }, 174b8e80941Smrg [19] = { THRSW, WRTMUC }, 175b8e80941Smrg [20] = { LDVARY, WRTMUC }, 176b8e80941Smrg [21] = { THRSW, LDVARY, WRTMUC }, 177b8e80941Smrg [22] = { UCB, }, 178b8e80941Smrg [23] = { ROT, }, 179b8e80941Smrg /* 24-30 reserved */ 180b8e80941Smrg [31] = { SMIMM, LDTMU, }, 181b8e80941Smrg}; 182b8e80941Smrg 183b8e80941Smrgstatic const struct v3d_qpu_sig v41_sig_map[] = { 184b8e80941Smrg /* MISC phys R5 */ 185b8e80941Smrg [0] = { }, 186b8e80941Smrg [1] = { THRSW, }, 187b8e80941Smrg [2] = { LDUNIF }, 188b8e80941Smrg [3] = { THRSW, LDUNIF }, 189b8e80941Smrg [4] = { LDTMU, }, 190b8e80941Smrg [5] = { THRSW, LDTMU, }, 191b8e80941Smrg [6] = { LDTMU, LDUNIF }, 192b8e80941Smrg [7] = { THRSW, LDTMU, LDUNIF }, 193b8e80941Smrg [8] = { LDVARY, }, 194b8e80941Smrg [9] = { THRSW, LDVARY, }, 195b8e80941Smrg [10] = { LDVARY, LDUNIF }, 196b8e80941Smrg [11] = { THRSW, LDVARY, LDUNIF }, 197b8e80941Smrg [12] = { LDUNIFRF }, 198b8e80941Smrg [13] = { THRSW, LDUNIFRF }, 199b8e80941Smrg [14] = { SMIMM, LDVARY, }, 200b8e80941Smrg [15] = { SMIMM, }, 201b8e80941Smrg [16] = { LDTLB, }, 202b8e80941Smrg [17] = { LDTLBU, }, 203b8e80941Smrg [18] = { WRTMUC }, 204b8e80941Smrg [19] = { THRSW, WRTMUC }, 205b8e80941Smrg [20] = { LDVARY, WRTMUC }, 206b8e80941Smrg [21] = { THRSW, LDVARY, WRTMUC }, 207b8e80941Smrg [22] = { UCB, }, 208b8e80941Smrg [23] = { ROT, }, 209b8e80941Smrg /* 24-30 reserved */ 210b8e80941Smrg [24] = { LDUNIFA}, 211b8e80941Smrg [25] = { LDUNIFARF }, 212b8e80941Smrg [31] = { SMIMM, LDTMU, }, 213b8e80941Smrg}; 214b8e80941Smrg 215b8e80941Smrgbool 216b8e80941Smrgv3d_qpu_sig_unpack(const struct v3d_device_info *devinfo, 217b8e80941Smrg uint32_t packed_sig, 218b8e80941Smrg struct v3d_qpu_sig *sig) 219b8e80941Smrg{ 220b8e80941Smrg if (packed_sig >= ARRAY_SIZE(v33_sig_map)) 221b8e80941Smrg return false; 222b8e80941Smrg 223b8e80941Smrg if (devinfo->ver >= 41) 224b8e80941Smrg *sig = v41_sig_map[packed_sig]; 225b8e80941Smrg else if (devinfo->ver == 40) 226b8e80941Smrg *sig = v40_sig_map[packed_sig]; 227b8e80941Smrg else 228b8e80941Smrg *sig = v33_sig_map[packed_sig]; 229b8e80941Smrg 230b8e80941Smrg /* Signals with zeroed unpacked contents after element 0 are reserved. */ 231b8e80941Smrg return (packed_sig == 0 || 232b8e80941Smrg memcmp(sig, &v33_sig_map[0], sizeof(*sig)) != 0); 233b8e80941Smrg} 234b8e80941Smrg 235b8e80941Smrgbool 236b8e80941Smrgv3d_qpu_sig_pack(const struct v3d_device_info *devinfo, 237b8e80941Smrg const struct v3d_qpu_sig *sig, 238b8e80941Smrg uint32_t *packed_sig) 239b8e80941Smrg{ 240b8e80941Smrg static const struct v3d_qpu_sig *map; 241b8e80941Smrg 242b8e80941Smrg if (devinfo->ver >= 41) 243b8e80941Smrg map = v41_sig_map; 244b8e80941Smrg else if (devinfo->ver == 40) 245b8e80941Smrg map = v40_sig_map; 246b8e80941Smrg else 247b8e80941Smrg map = v33_sig_map; 248b8e80941Smrg 249b8e80941Smrg for (int i = 0; i < ARRAY_SIZE(v33_sig_map); i++) { 250b8e80941Smrg if (memcmp(&map[i], sig, sizeof(*sig)) == 0) { 251b8e80941Smrg *packed_sig = i; 252b8e80941Smrg return true; 253b8e80941Smrg } 254b8e80941Smrg } 255b8e80941Smrg 256b8e80941Smrg return false; 257b8e80941Smrg} 258b8e80941Smrgstatic inline unsigned 259b8e80941Smrgfui( float f ) 260b8e80941Smrg{ 261b8e80941Smrg union {float f; unsigned ui;} fi; 262b8e80941Smrg fi.f = f; 263b8e80941Smrg return fi.ui; 264b8e80941Smrg} 265b8e80941Smrg 266b8e80941Smrgstatic const uint32_t small_immediates[] = { 267b8e80941Smrg 0, 1, 2, 3, 268b8e80941Smrg 4, 5, 6, 7, 269b8e80941Smrg 8, 9, 10, 11, 270b8e80941Smrg 12, 13, 14, 15, 271b8e80941Smrg -16, -15, -14, -13, 272b8e80941Smrg -12, -11, -10, -9, 273b8e80941Smrg -8, -7, -6, -5, 274b8e80941Smrg -4, -3, -2, -1, 275b8e80941Smrg 0x3b800000, /* 2.0^-8 */ 276b8e80941Smrg 0x3c000000, /* 2.0^-7 */ 277b8e80941Smrg 0x3c800000, /* 2.0^-6 */ 278b8e80941Smrg 0x3d000000, /* 2.0^-5 */ 279b8e80941Smrg 0x3d800000, /* 2.0^-4 */ 280b8e80941Smrg 0x3e000000, /* 2.0^-3 */ 281b8e80941Smrg 0x3e800000, /* 2.0^-2 */ 282b8e80941Smrg 0x3f000000, /* 2.0^-1 */ 283b8e80941Smrg 0x3f800000, /* 2.0^0 */ 284b8e80941Smrg 0x40000000, /* 2.0^1 */ 285b8e80941Smrg 0x40800000, /* 2.0^2 */ 286b8e80941Smrg 0x41000000, /* 2.0^3 */ 287b8e80941Smrg 0x41800000, /* 2.0^4 */ 288b8e80941Smrg 0x42000000, /* 2.0^5 */ 289b8e80941Smrg 0x42800000, /* 2.0^6 */ 290b8e80941Smrg 0x43000000, /* 2.0^7 */ 291b8e80941Smrg}; 292b8e80941Smrg 293b8e80941Smrgbool 294b8e80941Smrgv3d_qpu_small_imm_unpack(const struct v3d_device_info *devinfo, 295b8e80941Smrg uint32_t packed_small_immediate, 296b8e80941Smrg uint32_t *small_immediate) 297b8e80941Smrg{ 298b8e80941Smrg if (packed_small_immediate >= ARRAY_SIZE(small_immediates)) 299b8e80941Smrg return false; 300b8e80941Smrg 301b8e80941Smrg *small_immediate = small_immediates[packed_small_immediate]; 302b8e80941Smrg return true; 303b8e80941Smrg} 304b8e80941Smrg 305b8e80941Smrgbool 306b8e80941Smrgv3d_qpu_small_imm_pack(const struct v3d_device_info *devinfo, 307b8e80941Smrg uint32_t value, 308b8e80941Smrg uint32_t *packed_small_immediate) 309b8e80941Smrg{ 310b8e80941Smrg STATIC_ASSERT(ARRAY_SIZE(small_immediates) == 48); 311b8e80941Smrg 312b8e80941Smrg for (int i = 0; i < ARRAY_SIZE(small_immediates); i++) { 313b8e80941Smrg if (small_immediates[i] == value) { 314b8e80941Smrg *packed_small_immediate = i; 315b8e80941Smrg return true; 316b8e80941Smrg } 317b8e80941Smrg } 318b8e80941Smrg 319b8e80941Smrg return false; 320b8e80941Smrg} 321b8e80941Smrg 322b8e80941Smrgbool 323b8e80941Smrgv3d_qpu_flags_unpack(const struct v3d_device_info *devinfo, 324b8e80941Smrg uint32_t packed_cond, 325b8e80941Smrg struct v3d_qpu_flags *cond) 326b8e80941Smrg{ 327b8e80941Smrg static const enum v3d_qpu_cond cond_map[4] = { 328b8e80941Smrg [0] = V3D_QPU_COND_IFA, 329b8e80941Smrg [1] = V3D_QPU_COND_IFB, 330b8e80941Smrg [2] = V3D_QPU_COND_IFNA, 331b8e80941Smrg [3] = V3D_QPU_COND_IFNB, 332b8e80941Smrg }; 333b8e80941Smrg 334b8e80941Smrg cond->ac = V3D_QPU_COND_NONE; 335b8e80941Smrg cond->mc = V3D_QPU_COND_NONE; 336b8e80941Smrg cond->apf = V3D_QPU_PF_NONE; 337b8e80941Smrg cond->mpf = V3D_QPU_PF_NONE; 338b8e80941Smrg cond->auf = V3D_QPU_UF_NONE; 339b8e80941Smrg cond->muf = V3D_QPU_UF_NONE; 340b8e80941Smrg 341b8e80941Smrg if (packed_cond == 0) { 342b8e80941Smrg return true; 343b8e80941Smrg } else if (packed_cond >> 2 == 0) { 344b8e80941Smrg cond->apf = packed_cond & 0x3; 345b8e80941Smrg } else if (packed_cond >> 4 == 0) { 346b8e80941Smrg cond->auf = (packed_cond & 0xf) - 4 + V3D_QPU_UF_ANDZ; 347b8e80941Smrg } else if (packed_cond == 0x10) { 348b8e80941Smrg return false; 349b8e80941Smrg } else if (packed_cond >> 2 == 0x4) { 350b8e80941Smrg cond->mpf = packed_cond & 0x3; 351b8e80941Smrg } else if (packed_cond >> 4 == 0x1) { 352b8e80941Smrg cond->muf = (packed_cond & 0xf) - 4 + V3D_QPU_UF_ANDZ; 353b8e80941Smrg } else if (packed_cond >> 4 == 0x2) { 354b8e80941Smrg cond->ac = ((packed_cond >> 2) & 0x3) + V3D_QPU_COND_IFA; 355b8e80941Smrg cond->mpf = packed_cond & 0x3; 356b8e80941Smrg } else if (packed_cond >> 4 == 0x3) { 357b8e80941Smrg cond->mc = ((packed_cond >> 2) & 0x3) + V3D_QPU_COND_IFA; 358b8e80941Smrg cond->apf = packed_cond & 0x3; 359b8e80941Smrg } else if (packed_cond >> 6) { 360b8e80941Smrg cond->mc = cond_map[(packed_cond >> 4) & 0x3]; 361b8e80941Smrg if (((packed_cond >> 2) & 0x3) == 0) { 362b8e80941Smrg cond->ac = cond_map[packed_cond & 0x3]; 363b8e80941Smrg } else { 364b8e80941Smrg cond->auf = (packed_cond & 0xf) - 4 + V3D_QPU_UF_ANDZ; 365b8e80941Smrg } 366b8e80941Smrg } 367b8e80941Smrg 368b8e80941Smrg return true; 369b8e80941Smrg} 370b8e80941Smrg 371b8e80941Smrgbool 372b8e80941Smrgv3d_qpu_flags_pack(const struct v3d_device_info *devinfo, 373b8e80941Smrg const struct v3d_qpu_flags *cond, 374b8e80941Smrg uint32_t *packed_cond) 375b8e80941Smrg{ 376b8e80941Smrg#define AC (1 << 0) 377b8e80941Smrg#define MC (1 << 1) 378b8e80941Smrg#define APF (1 << 2) 379b8e80941Smrg#define MPF (1 << 3) 380b8e80941Smrg#define AUF (1 << 4) 381b8e80941Smrg#define MUF (1 << 5) 382b8e80941Smrg static const struct { 383b8e80941Smrg uint8_t flags_present; 384b8e80941Smrg uint8_t bits; 385b8e80941Smrg } flags_table[] = { 386b8e80941Smrg { 0, 0 }, 387b8e80941Smrg { APF, 0 }, 388b8e80941Smrg { AUF, 0 }, 389b8e80941Smrg { MPF, (1 << 4) }, 390b8e80941Smrg { MUF, (1 << 4) }, 391b8e80941Smrg { AC, (1 << 5) }, 392b8e80941Smrg { AC | MPF, (1 << 5) }, 393b8e80941Smrg { MC, (1 << 5) | (1 << 4) }, 394b8e80941Smrg { MC | APF, (1 << 5) | (1 << 4) }, 395b8e80941Smrg { MC | AC, (1 << 6) }, 396b8e80941Smrg { MC | AUF, (1 << 6) }, 397b8e80941Smrg }; 398b8e80941Smrg 399b8e80941Smrg uint8_t flags_present = 0; 400b8e80941Smrg if (cond->ac != V3D_QPU_COND_NONE) 401b8e80941Smrg flags_present |= AC; 402b8e80941Smrg if (cond->mc != V3D_QPU_COND_NONE) 403b8e80941Smrg flags_present |= MC; 404b8e80941Smrg if (cond->apf != V3D_QPU_PF_NONE) 405b8e80941Smrg flags_present |= APF; 406b8e80941Smrg if (cond->mpf != V3D_QPU_PF_NONE) 407b8e80941Smrg flags_present |= MPF; 408b8e80941Smrg if (cond->auf != V3D_QPU_UF_NONE) 409b8e80941Smrg flags_present |= AUF; 410b8e80941Smrg if (cond->muf != V3D_QPU_UF_NONE) 411b8e80941Smrg flags_present |= MUF; 412b8e80941Smrg 413b8e80941Smrg for (int i = 0; i < ARRAY_SIZE(flags_table); i++) { 414b8e80941Smrg if (flags_table[i].flags_present != flags_present) 415b8e80941Smrg continue; 416b8e80941Smrg 417b8e80941Smrg *packed_cond = flags_table[i].bits; 418b8e80941Smrg 419b8e80941Smrg *packed_cond |= cond->apf; 420b8e80941Smrg *packed_cond |= cond->mpf; 421b8e80941Smrg 422b8e80941Smrg if (flags_present & AUF) 423b8e80941Smrg *packed_cond |= cond->auf - V3D_QPU_UF_ANDZ + 4; 424b8e80941Smrg if (flags_present & MUF) 425b8e80941Smrg *packed_cond |= cond->muf - V3D_QPU_UF_ANDZ + 4; 426b8e80941Smrg 427b8e80941Smrg if (flags_present & AC) 428b8e80941Smrg *packed_cond |= (cond->ac - V3D_QPU_COND_IFA) << 2; 429b8e80941Smrg 430b8e80941Smrg if (flags_present & MC) { 431b8e80941Smrg if (*packed_cond & (1 << 6)) 432b8e80941Smrg *packed_cond |= (cond->mc - 433b8e80941Smrg V3D_QPU_COND_IFA) << 4; 434b8e80941Smrg else 435b8e80941Smrg *packed_cond |= (cond->mc - 436b8e80941Smrg V3D_QPU_COND_IFA) << 2; 437b8e80941Smrg } 438b8e80941Smrg 439b8e80941Smrg return true; 440b8e80941Smrg } 441b8e80941Smrg 442b8e80941Smrg return false; 443b8e80941Smrg} 444b8e80941Smrg 445b8e80941Smrg/* Make a mapping of the table of opcodes in the spec. The opcode is 446b8e80941Smrg * determined by a combination of the opcode field, and in the case of 0 or 447b8e80941Smrg * 1-arg opcodes, the mux_b field as well. 448b8e80941Smrg */ 449b8e80941Smrg#define MUX_MASK(bot, top) (((1 << (top + 1)) - 1) - ((1 << (bot)) - 1)) 450b8e80941Smrg#define ANYMUX MUX_MASK(0, 7) 451b8e80941Smrg 452b8e80941Smrgstruct opcode_desc { 453b8e80941Smrg uint8_t opcode_first; 454b8e80941Smrg uint8_t opcode_last; 455b8e80941Smrg uint8_t mux_b_mask; 456b8e80941Smrg uint8_t mux_a_mask; 457b8e80941Smrg uint8_t op; 458b8e80941Smrg /* 0 if it's the same across V3D versions, or a specific V3D version. */ 459b8e80941Smrg uint8_t ver; 460b8e80941Smrg}; 461b8e80941Smrg 462b8e80941Smrgstatic const struct opcode_desc add_ops[] = { 463b8e80941Smrg /* FADD is FADDNF depending on the order of the mux_a/mux_b. */ 464b8e80941Smrg { 0, 47, ANYMUX, ANYMUX, V3D_QPU_A_FADD }, 465b8e80941Smrg { 0, 47, ANYMUX, ANYMUX, V3D_QPU_A_FADDNF }, 466b8e80941Smrg { 53, 55, ANYMUX, ANYMUX, V3D_QPU_A_VFPACK }, 467b8e80941Smrg { 56, 56, ANYMUX, ANYMUX, V3D_QPU_A_ADD }, 468b8e80941Smrg { 57, 59, ANYMUX, ANYMUX, V3D_QPU_A_VFPACK }, 469b8e80941Smrg { 60, 60, ANYMUX, ANYMUX, V3D_QPU_A_SUB }, 470b8e80941Smrg { 61, 63, ANYMUX, ANYMUX, V3D_QPU_A_VFPACK }, 471b8e80941Smrg { 64, 111, ANYMUX, ANYMUX, V3D_QPU_A_FSUB }, 472b8e80941Smrg { 120, 120, ANYMUX, ANYMUX, V3D_QPU_A_MIN }, 473b8e80941Smrg { 121, 121, ANYMUX, ANYMUX, V3D_QPU_A_MAX }, 474b8e80941Smrg { 122, 122, ANYMUX, ANYMUX, V3D_QPU_A_UMIN }, 475b8e80941Smrg { 123, 123, ANYMUX, ANYMUX, V3D_QPU_A_UMAX }, 476b8e80941Smrg { 124, 124, ANYMUX, ANYMUX, V3D_QPU_A_SHL }, 477b8e80941Smrg { 125, 125, ANYMUX, ANYMUX, V3D_QPU_A_SHR }, 478b8e80941Smrg { 126, 126, ANYMUX, ANYMUX, V3D_QPU_A_ASR }, 479b8e80941Smrg { 127, 127, ANYMUX, ANYMUX, V3D_QPU_A_ROR }, 480b8e80941Smrg /* FMIN is instead FMAX depending on the order of the mux_a/mux_b. */ 481b8e80941Smrg { 128, 175, ANYMUX, ANYMUX, V3D_QPU_A_FMIN }, 482b8e80941Smrg { 128, 175, ANYMUX, ANYMUX, V3D_QPU_A_FMAX }, 483b8e80941Smrg { 176, 180, ANYMUX, ANYMUX, V3D_QPU_A_VFMIN }, 484b8e80941Smrg 485b8e80941Smrg { 181, 181, ANYMUX, ANYMUX, V3D_QPU_A_AND }, 486b8e80941Smrg { 182, 182, ANYMUX, ANYMUX, V3D_QPU_A_OR }, 487b8e80941Smrg { 183, 183, ANYMUX, ANYMUX, V3D_QPU_A_XOR }, 488b8e80941Smrg 489b8e80941Smrg { 184, 184, ANYMUX, ANYMUX, V3D_QPU_A_VADD }, 490b8e80941Smrg { 185, 185, ANYMUX, ANYMUX, V3D_QPU_A_VSUB }, 491b8e80941Smrg { 186, 186, 1 << 0, ANYMUX, V3D_QPU_A_NOT }, 492b8e80941Smrg { 186, 186, 1 << 1, ANYMUX, V3D_QPU_A_NEG }, 493b8e80941Smrg { 186, 186, 1 << 2, ANYMUX, V3D_QPU_A_FLAPUSH }, 494b8e80941Smrg { 186, 186, 1 << 3, ANYMUX, V3D_QPU_A_FLBPUSH }, 495b8e80941Smrg { 186, 186, 1 << 4, ANYMUX, V3D_QPU_A_FLPOP }, 496b8e80941Smrg { 186, 186, 1 << 5, ANYMUX, V3D_QPU_A_RECIP }, 497b8e80941Smrg { 186, 186, 1 << 6, ANYMUX, V3D_QPU_A_SETMSF }, 498b8e80941Smrg { 186, 186, 1 << 7, ANYMUX, V3D_QPU_A_SETREVF }, 499b8e80941Smrg { 187, 187, 1 << 0, 1 << 0, V3D_QPU_A_NOP, 0 }, 500b8e80941Smrg { 187, 187, 1 << 0, 1 << 1, V3D_QPU_A_TIDX }, 501b8e80941Smrg { 187, 187, 1 << 0, 1 << 2, V3D_QPU_A_EIDX }, 502b8e80941Smrg { 187, 187, 1 << 0, 1 << 3, V3D_QPU_A_LR }, 503b8e80941Smrg { 187, 187, 1 << 0, 1 << 4, V3D_QPU_A_VFLA }, 504b8e80941Smrg { 187, 187, 1 << 0, 1 << 5, V3D_QPU_A_VFLNA }, 505b8e80941Smrg { 187, 187, 1 << 0, 1 << 6, V3D_QPU_A_VFLB }, 506b8e80941Smrg { 187, 187, 1 << 0, 1 << 7, V3D_QPU_A_VFLNB }, 507b8e80941Smrg 508b8e80941Smrg { 187, 187, 1 << 1, MUX_MASK(0, 2), V3D_QPU_A_FXCD }, 509b8e80941Smrg { 187, 187, 1 << 1, 1 << 3, V3D_QPU_A_XCD }, 510b8e80941Smrg { 187, 187, 1 << 1, MUX_MASK(4, 6), V3D_QPU_A_FYCD }, 511b8e80941Smrg { 187, 187, 1 << 1, 1 << 7, V3D_QPU_A_YCD }, 512b8e80941Smrg 513b8e80941Smrg { 187, 187, 1 << 2, 1 << 0, V3D_QPU_A_MSF }, 514b8e80941Smrg { 187, 187, 1 << 2, 1 << 1, V3D_QPU_A_REVF }, 515b8e80941Smrg { 187, 187, 1 << 2, 1 << 2, V3D_QPU_A_VDWWT, 33 }, 516b8e80941Smrg { 187, 187, 1 << 2, 1 << 2, V3D_QPU_A_IID, 40 }, 517b8e80941Smrg { 187, 187, 1 << 2, 1 << 3, V3D_QPU_A_SAMPID, 40 }, 518b8e80941Smrg { 187, 187, 1 << 2, 1 << 4, V3D_QPU_A_BARRIERID, 40 }, 519b8e80941Smrg { 187, 187, 1 << 2, 1 << 5, V3D_QPU_A_TMUWT }, 520b8e80941Smrg { 187, 187, 1 << 2, 1 << 6, V3D_QPU_A_VPMWT }, 521b8e80941Smrg 522b8e80941Smrg { 187, 187, 1 << 3, ANYMUX, V3D_QPU_A_VPMSETUP, 33 }, 523b8e80941Smrg { 188, 188, 1 << 0, ANYMUX, V3D_QPU_A_LDVPMV_IN, 40 }, 524b8e80941Smrg { 188, 188, 1 << 1, ANYMUX, V3D_QPU_A_LDVPMD_IN, 40 }, 525b8e80941Smrg { 188, 188, 1 << 2, ANYMUX, V3D_QPU_A_LDVPMP, 40 }, 526b8e80941Smrg { 188, 188, 1 << 3, ANYMUX, V3D_QPU_A_RSQRT, 41 }, 527b8e80941Smrg { 188, 188, 1 << 4, ANYMUX, V3D_QPU_A_EXP, 41 }, 528b8e80941Smrg { 188, 188, 1 << 5, ANYMUX, V3D_QPU_A_LOG, 41 }, 529b8e80941Smrg { 188, 188, 1 << 6, ANYMUX, V3D_QPU_A_SIN, 41 }, 530b8e80941Smrg { 188, 188, 1 << 7, ANYMUX, V3D_QPU_A_RSQRT2, 41 }, 531b8e80941Smrg { 189, 189, ANYMUX, ANYMUX, V3D_QPU_A_LDVPMG_IN, 40 }, 532b8e80941Smrg 533b8e80941Smrg /* FIXME: MORE COMPLICATED */ 534b8e80941Smrg /* { 190, 191, ANYMUX, ANYMUX, V3D_QPU_A_VFMOVABSNEGNAB }, */ 535b8e80941Smrg 536b8e80941Smrg { 192, 239, ANYMUX, ANYMUX, V3D_QPU_A_FCMP }, 537b8e80941Smrg { 240, 244, ANYMUX, ANYMUX, V3D_QPU_A_VFMAX }, 538b8e80941Smrg 539b8e80941Smrg { 245, 245, MUX_MASK(0, 2), ANYMUX, V3D_QPU_A_FROUND }, 540b8e80941Smrg { 245, 245, 1 << 3, ANYMUX, V3D_QPU_A_FTOIN }, 541b8e80941Smrg { 245, 245, MUX_MASK(4, 6), ANYMUX, V3D_QPU_A_FTRUNC }, 542b8e80941Smrg { 245, 245, 1 << 7, ANYMUX, V3D_QPU_A_FTOIZ }, 543b8e80941Smrg { 246, 246, MUX_MASK(0, 2), ANYMUX, V3D_QPU_A_FFLOOR }, 544b8e80941Smrg { 246, 246, 1 << 3, ANYMUX, V3D_QPU_A_FTOUZ }, 545b8e80941Smrg { 246, 246, MUX_MASK(4, 6), ANYMUX, V3D_QPU_A_FCEIL }, 546b8e80941Smrg { 246, 246, 1 << 7, ANYMUX, V3D_QPU_A_FTOC }, 547b8e80941Smrg 548b8e80941Smrg { 247, 247, MUX_MASK(0, 2), ANYMUX, V3D_QPU_A_FDX }, 549b8e80941Smrg { 247, 247, MUX_MASK(4, 6), ANYMUX, V3D_QPU_A_FDY }, 550b8e80941Smrg 551b8e80941Smrg /* The stvpms are distinguished by the waddr field. */ 552b8e80941Smrg { 248, 248, ANYMUX, ANYMUX, V3D_QPU_A_STVPMV }, 553b8e80941Smrg { 248, 248, ANYMUX, ANYMUX, V3D_QPU_A_STVPMD }, 554b8e80941Smrg { 248, 248, ANYMUX, ANYMUX, V3D_QPU_A_STVPMP }, 555b8e80941Smrg 556b8e80941Smrg { 252, 252, MUX_MASK(0, 2), ANYMUX, V3D_QPU_A_ITOF }, 557b8e80941Smrg { 252, 252, 1 << 3, ANYMUX, V3D_QPU_A_CLZ }, 558b8e80941Smrg { 252, 252, MUX_MASK(4, 6), ANYMUX, V3D_QPU_A_UTOF }, 559b8e80941Smrg}; 560b8e80941Smrg 561b8e80941Smrgstatic const struct opcode_desc mul_ops[] = { 562b8e80941Smrg { 1, 1, ANYMUX, ANYMUX, V3D_QPU_M_ADD }, 563b8e80941Smrg { 2, 2, ANYMUX, ANYMUX, V3D_QPU_M_SUB }, 564b8e80941Smrg { 3, 3, ANYMUX, ANYMUX, V3D_QPU_M_UMUL24 }, 565b8e80941Smrg { 4, 8, ANYMUX, ANYMUX, V3D_QPU_M_VFMUL }, 566b8e80941Smrg { 9, 9, ANYMUX, ANYMUX, V3D_QPU_M_SMUL24 }, 567b8e80941Smrg { 10, 10, ANYMUX, ANYMUX, V3D_QPU_M_MULTOP }, 568b8e80941Smrg { 14, 14, ANYMUX, ANYMUX, V3D_QPU_M_FMOV }, 569b8e80941Smrg { 15, 15, MUX_MASK(0, 3), ANYMUX, V3D_QPU_M_FMOV }, 570b8e80941Smrg { 15, 15, 1 << 4, 1 << 0, V3D_QPU_M_NOP, 0 }, 571b8e80941Smrg { 15, 15, 1 << 7, ANYMUX, V3D_QPU_M_MOV }, 572b8e80941Smrg { 16, 63, ANYMUX, ANYMUX, V3D_QPU_M_FMUL }, 573b8e80941Smrg}; 574b8e80941Smrg 575b8e80941Smrgstatic const struct opcode_desc * 576b8e80941Smrglookup_opcode(const struct opcode_desc *opcodes, size_t num_opcodes, 577b8e80941Smrg uint32_t opcode, uint32_t mux_a, uint32_t mux_b) 578b8e80941Smrg{ 579b8e80941Smrg for (int i = 0; i < num_opcodes; i++) { 580b8e80941Smrg const struct opcode_desc *op_desc = &opcodes[i]; 581b8e80941Smrg 582b8e80941Smrg if (opcode < op_desc->opcode_first || 583b8e80941Smrg opcode > op_desc->opcode_last) 584b8e80941Smrg continue; 585b8e80941Smrg 586b8e80941Smrg if (!(op_desc->mux_b_mask & (1 << mux_b))) 587b8e80941Smrg continue; 588b8e80941Smrg 589b8e80941Smrg if (!(op_desc->mux_a_mask & (1 << mux_a))) 590b8e80941Smrg continue; 591b8e80941Smrg 592b8e80941Smrg return op_desc; 593b8e80941Smrg } 594b8e80941Smrg 595b8e80941Smrg return NULL; 596b8e80941Smrg} 597b8e80941Smrg 598b8e80941Smrgstatic bool 599b8e80941Smrgv3d_qpu_float32_unpack_unpack(uint32_t packed, 600b8e80941Smrg enum v3d_qpu_input_unpack *unpacked) 601b8e80941Smrg{ 602b8e80941Smrg switch (packed) { 603b8e80941Smrg case 0: 604b8e80941Smrg *unpacked = V3D_QPU_UNPACK_ABS; 605b8e80941Smrg return true; 606b8e80941Smrg case 1: 607b8e80941Smrg *unpacked = V3D_QPU_UNPACK_NONE; 608b8e80941Smrg return true; 609b8e80941Smrg case 2: 610b8e80941Smrg *unpacked = V3D_QPU_UNPACK_L; 611b8e80941Smrg return true; 612b8e80941Smrg case 3: 613b8e80941Smrg *unpacked = V3D_QPU_UNPACK_H; 614b8e80941Smrg return true; 615b8e80941Smrg default: 616b8e80941Smrg return false; 617b8e80941Smrg } 618b8e80941Smrg} 619b8e80941Smrg 620b8e80941Smrgstatic bool 621b8e80941Smrgv3d_qpu_float32_unpack_pack(enum v3d_qpu_input_unpack unpacked, 622b8e80941Smrg uint32_t *packed) 623b8e80941Smrg{ 624b8e80941Smrg switch (unpacked) { 625b8e80941Smrg case V3D_QPU_UNPACK_ABS: 626b8e80941Smrg *packed = 0; 627b8e80941Smrg return true; 628b8e80941Smrg case V3D_QPU_UNPACK_NONE: 629b8e80941Smrg *packed = 1; 630b8e80941Smrg return true; 631b8e80941Smrg case V3D_QPU_UNPACK_L: 632b8e80941Smrg *packed = 2; 633b8e80941Smrg return true; 634b8e80941Smrg case V3D_QPU_UNPACK_H: 635b8e80941Smrg *packed = 3; 636b8e80941Smrg return true; 637b8e80941Smrg default: 638b8e80941Smrg return false; 639b8e80941Smrg } 640b8e80941Smrg} 641b8e80941Smrg 642b8e80941Smrgstatic bool 643b8e80941Smrgv3d_qpu_float16_unpack_unpack(uint32_t packed, 644b8e80941Smrg enum v3d_qpu_input_unpack *unpacked) 645b8e80941Smrg{ 646b8e80941Smrg switch (packed) { 647b8e80941Smrg case 0: 648b8e80941Smrg *unpacked = V3D_QPU_UNPACK_NONE; 649b8e80941Smrg return true; 650b8e80941Smrg case 1: 651b8e80941Smrg *unpacked = V3D_QPU_UNPACK_REPLICATE_32F_16; 652b8e80941Smrg return true; 653b8e80941Smrg case 2: 654b8e80941Smrg *unpacked = V3D_QPU_UNPACK_REPLICATE_L_16; 655b8e80941Smrg return true; 656b8e80941Smrg case 3: 657b8e80941Smrg *unpacked = V3D_QPU_UNPACK_REPLICATE_H_16; 658b8e80941Smrg return true; 659b8e80941Smrg case 4: 660b8e80941Smrg *unpacked = V3D_QPU_UNPACK_SWAP_16; 661b8e80941Smrg return true; 662b8e80941Smrg default: 663b8e80941Smrg return false; 664b8e80941Smrg } 665b8e80941Smrg} 666b8e80941Smrg 667b8e80941Smrgstatic bool 668b8e80941Smrgv3d_qpu_float16_unpack_pack(enum v3d_qpu_input_unpack unpacked, 669b8e80941Smrg uint32_t *packed) 670b8e80941Smrg{ 671b8e80941Smrg switch (unpacked) { 672b8e80941Smrg case V3D_QPU_UNPACK_NONE: 673b8e80941Smrg *packed = 0; 674b8e80941Smrg return true; 675b8e80941Smrg case V3D_QPU_UNPACK_REPLICATE_32F_16: 676b8e80941Smrg *packed = 1; 677b8e80941Smrg return true; 678b8e80941Smrg case V3D_QPU_UNPACK_REPLICATE_L_16: 679b8e80941Smrg *packed = 2; 680b8e80941Smrg return true; 681b8e80941Smrg case V3D_QPU_UNPACK_REPLICATE_H_16: 682b8e80941Smrg *packed = 3; 683b8e80941Smrg return true; 684b8e80941Smrg case V3D_QPU_UNPACK_SWAP_16: 685b8e80941Smrg *packed = 4; 686b8e80941Smrg return true; 687b8e80941Smrg default: 688b8e80941Smrg return false; 689b8e80941Smrg } 690b8e80941Smrg} 691b8e80941Smrg 692b8e80941Smrgstatic bool 693b8e80941Smrgv3d_qpu_float32_pack_pack(enum v3d_qpu_input_unpack unpacked, 694b8e80941Smrg uint32_t *packed) 695b8e80941Smrg{ 696b8e80941Smrg switch (unpacked) { 697b8e80941Smrg case V3D_QPU_PACK_NONE: 698b8e80941Smrg *packed = 0; 699b8e80941Smrg return true; 700b8e80941Smrg case V3D_QPU_PACK_L: 701b8e80941Smrg *packed = 1; 702b8e80941Smrg return true; 703b8e80941Smrg case V3D_QPU_PACK_H: 704b8e80941Smrg *packed = 2; 705b8e80941Smrg return true; 706b8e80941Smrg default: 707b8e80941Smrg return false; 708b8e80941Smrg } 709b8e80941Smrg} 710b8e80941Smrg 711b8e80941Smrgstatic bool 712b8e80941Smrgv3d_qpu_add_unpack(const struct v3d_device_info *devinfo, uint64_t packed_inst, 713b8e80941Smrg struct v3d_qpu_instr *instr) 714b8e80941Smrg{ 715b8e80941Smrg uint32_t op = QPU_GET_FIELD(packed_inst, VC5_QPU_OP_ADD); 716b8e80941Smrg uint32_t mux_a = QPU_GET_FIELD(packed_inst, VC5_QPU_ADD_A); 717b8e80941Smrg uint32_t mux_b = QPU_GET_FIELD(packed_inst, VC5_QPU_ADD_B); 718b8e80941Smrg uint32_t waddr = QPU_GET_FIELD(packed_inst, V3D_QPU_WADDR_A); 719b8e80941Smrg 720b8e80941Smrg uint32_t map_op = op; 721b8e80941Smrg /* Some big clusters of opcodes are replicated with unpack 722b8e80941Smrg * flags 723b8e80941Smrg */ 724b8e80941Smrg if (map_op >= 249 && map_op <= 251) 725b8e80941Smrg map_op = (map_op - 249 + 245); 726b8e80941Smrg if (map_op >= 253 && map_op <= 255) 727b8e80941Smrg map_op = (map_op - 253 + 245); 728b8e80941Smrg 729b8e80941Smrg const struct opcode_desc *desc = 730b8e80941Smrg lookup_opcode(add_ops, ARRAY_SIZE(add_ops), 731b8e80941Smrg map_op, mux_a, mux_b); 732b8e80941Smrg if (!desc) 733b8e80941Smrg return false; 734b8e80941Smrg 735b8e80941Smrg instr->alu.add.op = desc->op; 736b8e80941Smrg 737b8e80941Smrg /* FADD/FADDNF and FMIN/FMAX are determined by the orders of the 738b8e80941Smrg * operands. 739b8e80941Smrg */ 740b8e80941Smrg if (((op >> 2) & 3) * 8 + mux_a > (op & 3) * 8 + mux_b) { 741b8e80941Smrg if (instr->alu.add.op == V3D_QPU_A_FMIN) 742b8e80941Smrg instr->alu.add.op = V3D_QPU_A_FMAX; 743b8e80941Smrg if (instr->alu.add.op == V3D_QPU_A_FADD) 744b8e80941Smrg instr->alu.add.op = V3D_QPU_A_FADDNF; 745b8e80941Smrg } 746b8e80941Smrg 747b8e80941Smrg /* Some QPU ops require a bit more than just basic opcode and mux a/b 748b8e80941Smrg * comparisons to distinguish them. 749b8e80941Smrg */ 750b8e80941Smrg switch (instr->alu.add.op) { 751b8e80941Smrg case V3D_QPU_A_STVPMV: 752b8e80941Smrg case V3D_QPU_A_STVPMD: 753b8e80941Smrg case V3D_QPU_A_STVPMP: 754b8e80941Smrg switch (waddr) { 755b8e80941Smrg case 0: 756b8e80941Smrg instr->alu.add.op = V3D_QPU_A_STVPMV; 757b8e80941Smrg break; 758b8e80941Smrg case 1: 759b8e80941Smrg instr->alu.add.op = V3D_QPU_A_STVPMD; 760b8e80941Smrg break; 761b8e80941Smrg case 2: 762b8e80941Smrg instr->alu.add.op = V3D_QPU_A_STVPMP; 763b8e80941Smrg break; 764b8e80941Smrg default: 765b8e80941Smrg return false; 766b8e80941Smrg } 767b8e80941Smrg break; 768b8e80941Smrg default: 769b8e80941Smrg break; 770b8e80941Smrg } 771b8e80941Smrg 772b8e80941Smrg switch (instr->alu.add.op) { 773b8e80941Smrg case V3D_QPU_A_FADD: 774b8e80941Smrg case V3D_QPU_A_FADDNF: 775b8e80941Smrg case V3D_QPU_A_FSUB: 776b8e80941Smrg case V3D_QPU_A_FMIN: 777b8e80941Smrg case V3D_QPU_A_FMAX: 778b8e80941Smrg case V3D_QPU_A_FCMP: 779b8e80941Smrg case V3D_QPU_A_VFPACK: 780b8e80941Smrg if (instr->alu.add.op != V3D_QPU_A_VFPACK) 781b8e80941Smrg instr->alu.add.output_pack = (op >> 4) & 0x3; 782b8e80941Smrg else 783b8e80941Smrg instr->alu.add.output_pack = V3D_QPU_PACK_NONE; 784b8e80941Smrg 785b8e80941Smrg if (!v3d_qpu_float32_unpack_unpack((op >> 2) & 0x3, 786b8e80941Smrg &instr->alu.add.a_unpack)) { 787b8e80941Smrg return false; 788b8e80941Smrg } 789b8e80941Smrg 790b8e80941Smrg if (!v3d_qpu_float32_unpack_unpack((op >> 0) & 0x3, 791b8e80941Smrg &instr->alu.add.b_unpack)) { 792b8e80941Smrg return false; 793b8e80941Smrg } 794b8e80941Smrg break; 795b8e80941Smrg 796b8e80941Smrg case V3D_QPU_A_FFLOOR: 797b8e80941Smrg case V3D_QPU_A_FROUND: 798b8e80941Smrg case V3D_QPU_A_FTRUNC: 799b8e80941Smrg case V3D_QPU_A_FCEIL: 800b8e80941Smrg case V3D_QPU_A_FDX: 801b8e80941Smrg case V3D_QPU_A_FDY: 802b8e80941Smrg instr->alu.add.output_pack = mux_b & 0x3; 803b8e80941Smrg 804b8e80941Smrg if (!v3d_qpu_float32_unpack_unpack((op >> 2) & 0x3, 805b8e80941Smrg &instr->alu.add.a_unpack)) { 806b8e80941Smrg return false; 807b8e80941Smrg } 808b8e80941Smrg break; 809b8e80941Smrg 810b8e80941Smrg case V3D_QPU_A_FTOIN: 811b8e80941Smrg case V3D_QPU_A_FTOIZ: 812b8e80941Smrg case V3D_QPU_A_FTOUZ: 813b8e80941Smrg case V3D_QPU_A_FTOC: 814b8e80941Smrg instr->alu.add.output_pack = V3D_QPU_PACK_NONE; 815b8e80941Smrg 816b8e80941Smrg if (!v3d_qpu_float32_unpack_unpack((op >> 2) & 0x3, 817b8e80941Smrg &instr->alu.add.a_unpack)) { 818b8e80941Smrg return false; 819b8e80941Smrg } 820b8e80941Smrg break; 821b8e80941Smrg 822b8e80941Smrg case V3D_QPU_A_VFMIN: 823b8e80941Smrg case V3D_QPU_A_VFMAX: 824b8e80941Smrg if (!v3d_qpu_float16_unpack_unpack(op & 0x7, 825b8e80941Smrg &instr->alu.add.a_unpack)) { 826b8e80941Smrg return false; 827b8e80941Smrg } 828b8e80941Smrg 829b8e80941Smrg instr->alu.add.output_pack = V3D_QPU_PACK_NONE; 830b8e80941Smrg instr->alu.add.b_unpack = V3D_QPU_UNPACK_NONE; 831b8e80941Smrg break; 832b8e80941Smrg 833b8e80941Smrg default: 834b8e80941Smrg instr->alu.add.output_pack = V3D_QPU_PACK_NONE; 835b8e80941Smrg instr->alu.add.a_unpack = V3D_QPU_UNPACK_NONE; 836b8e80941Smrg instr->alu.add.b_unpack = V3D_QPU_UNPACK_NONE; 837b8e80941Smrg break; 838b8e80941Smrg } 839b8e80941Smrg 840b8e80941Smrg instr->alu.add.a = mux_a; 841b8e80941Smrg instr->alu.add.b = mux_b; 842b8e80941Smrg instr->alu.add.waddr = QPU_GET_FIELD(packed_inst, V3D_QPU_WADDR_A); 843b8e80941Smrg 844b8e80941Smrg instr->alu.add.magic_write = false; 845b8e80941Smrg if (packed_inst & VC5_QPU_MA) { 846b8e80941Smrg switch (instr->alu.add.op) { 847b8e80941Smrg case V3D_QPU_A_LDVPMV_IN: 848b8e80941Smrg instr->alu.add.op = V3D_QPU_A_LDVPMV_OUT; 849b8e80941Smrg break; 850b8e80941Smrg case V3D_QPU_A_LDVPMD_IN: 851b8e80941Smrg instr->alu.add.op = V3D_QPU_A_LDVPMD_OUT; 852b8e80941Smrg break; 853b8e80941Smrg case V3D_QPU_A_LDVPMG_IN: 854b8e80941Smrg instr->alu.add.op = V3D_QPU_A_LDVPMG_OUT; 855b8e80941Smrg break; 856b8e80941Smrg default: 857b8e80941Smrg instr->alu.add.magic_write = true; 858b8e80941Smrg break; 859b8e80941Smrg } 860b8e80941Smrg } 861b8e80941Smrg 862b8e80941Smrg return true; 863b8e80941Smrg} 864b8e80941Smrg 865b8e80941Smrgstatic bool 866b8e80941Smrgv3d_qpu_mul_unpack(const struct v3d_device_info *devinfo, uint64_t packed_inst, 867b8e80941Smrg struct v3d_qpu_instr *instr) 868b8e80941Smrg{ 869b8e80941Smrg uint32_t op = QPU_GET_FIELD(packed_inst, VC5_QPU_OP_MUL); 870b8e80941Smrg uint32_t mux_a = QPU_GET_FIELD(packed_inst, VC5_QPU_MUL_A); 871b8e80941Smrg uint32_t mux_b = QPU_GET_FIELD(packed_inst, VC5_QPU_MUL_B); 872b8e80941Smrg 873b8e80941Smrg { 874b8e80941Smrg const struct opcode_desc *desc = 875b8e80941Smrg lookup_opcode(mul_ops, ARRAY_SIZE(mul_ops), 876b8e80941Smrg op, mux_a, mux_b); 877b8e80941Smrg if (!desc) 878b8e80941Smrg return false; 879b8e80941Smrg 880b8e80941Smrg instr->alu.mul.op = desc->op; 881b8e80941Smrg } 882b8e80941Smrg 883b8e80941Smrg switch (instr->alu.mul.op) { 884b8e80941Smrg case V3D_QPU_M_FMUL: 885b8e80941Smrg instr->alu.mul.output_pack = ((op >> 4) & 0x3) - 1; 886b8e80941Smrg 887b8e80941Smrg if (!v3d_qpu_float32_unpack_unpack((op >> 2) & 0x3, 888b8e80941Smrg &instr->alu.mul.a_unpack)) { 889b8e80941Smrg return false; 890b8e80941Smrg } 891b8e80941Smrg 892b8e80941Smrg if (!v3d_qpu_float32_unpack_unpack((op >> 0) & 0x3, 893b8e80941Smrg &instr->alu.mul.b_unpack)) { 894b8e80941Smrg return false; 895b8e80941Smrg } 896b8e80941Smrg 897b8e80941Smrg break; 898b8e80941Smrg 899b8e80941Smrg case V3D_QPU_M_FMOV: 900b8e80941Smrg instr->alu.mul.output_pack = (((op & 1) << 1) + 901b8e80941Smrg ((mux_b >> 2) & 1)); 902b8e80941Smrg 903b8e80941Smrg if (!v3d_qpu_float32_unpack_unpack(mux_b & 0x3, 904b8e80941Smrg &instr->alu.mul.a_unpack)) { 905b8e80941Smrg return false; 906b8e80941Smrg } 907b8e80941Smrg 908b8e80941Smrg break; 909b8e80941Smrg 910b8e80941Smrg case V3D_QPU_M_VFMUL: 911b8e80941Smrg instr->alu.mul.output_pack = V3D_QPU_PACK_NONE; 912b8e80941Smrg 913b8e80941Smrg if (!v3d_qpu_float16_unpack_unpack(((op & 0x7) - 4) & 7, 914b8e80941Smrg &instr->alu.mul.a_unpack)) { 915b8e80941Smrg return false; 916b8e80941Smrg } 917b8e80941Smrg 918b8e80941Smrg instr->alu.mul.b_unpack = V3D_QPU_UNPACK_NONE; 919b8e80941Smrg 920b8e80941Smrg break; 921b8e80941Smrg 922b8e80941Smrg default: 923b8e80941Smrg instr->alu.mul.output_pack = V3D_QPU_PACK_NONE; 924b8e80941Smrg instr->alu.mul.a_unpack = V3D_QPU_UNPACK_NONE; 925b8e80941Smrg instr->alu.mul.b_unpack = V3D_QPU_UNPACK_NONE; 926b8e80941Smrg break; 927b8e80941Smrg } 928b8e80941Smrg 929b8e80941Smrg instr->alu.mul.a = mux_a; 930b8e80941Smrg instr->alu.mul.b = mux_b; 931b8e80941Smrg instr->alu.mul.waddr = QPU_GET_FIELD(packed_inst, V3D_QPU_WADDR_M); 932b8e80941Smrg instr->alu.mul.magic_write = packed_inst & VC5_QPU_MM; 933b8e80941Smrg 934b8e80941Smrg return true; 935b8e80941Smrg} 936b8e80941Smrg 937b8e80941Smrgstatic bool 938b8e80941Smrgv3d_qpu_add_pack(const struct v3d_device_info *devinfo, 939b8e80941Smrg const struct v3d_qpu_instr *instr, uint64_t *packed_instr) 940b8e80941Smrg{ 941b8e80941Smrg uint32_t waddr = instr->alu.add.waddr; 942b8e80941Smrg uint32_t mux_a = instr->alu.add.a; 943b8e80941Smrg uint32_t mux_b = instr->alu.add.b; 944b8e80941Smrg int nsrc = v3d_qpu_add_op_num_src(instr->alu.add.op); 945b8e80941Smrg const struct opcode_desc *desc; 946b8e80941Smrg 947b8e80941Smrg int opcode; 948b8e80941Smrg for (desc = add_ops; desc != &add_ops[ARRAY_SIZE(add_ops)]; 949b8e80941Smrg desc++) { 950b8e80941Smrg if (desc->op == instr->alu.add.op) 951b8e80941Smrg break; 952b8e80941Smrg } 953b8e80941Smrg if (desc == &add_ops[ARRAY_SIZE(add_ops)]) 954b8e80941Smrg return false; 955b8e80941Smrg 956b8e80941Smrg opcode = desc->opcode_first; 957b8e80941Smrg 958b8e80941Smrg /* If an operation doesn't use an arg, its mux values may be used to 959b8e80941Smrg * identify the operation type. 960b8e80941Smrg */ 961b8e80941Smrg if (nsrc < 2) 962b8e80941Smrg mux_b = ffs(desc->mux_b_mask) - 1; 963b8e80941Smrg 964b8e80941Smrg if (nsrc < 1) 965b8e80941Smrg mux_a = ffs(desc->mux_a_mask) - 1; 966b8e80941Smrg 967b8e80941Smrg bool no_magic_write = false; 968b8e80941Smrg 969b8e80941Smrg switch (instr->alu.add.op) { 970b8e80941Smrg case V3D_QPU_A_STVPMV: 971b8e80941Smrg waddr = 0; 972b8e80941Smrg no_magic_write = true; 973b8e80941Smrg break; 974b8e80941Smrg case V3D_QPU_A_STVPMD: 975b8e80941Smrg waddr = 1; 976b8e80941Smrg no_magic_write = true; 977b8e80941Smrg break; 978b8e80941Smrg case V3D_QPU_A_STVPMP: 979b8e80941Smrg waddr = 2; 980b8e80941Smrg no_magic_write = true; 981b8e80941Smrg break; 982b8e80941Smrg 983b8e80941Smrg case V3D_QPU_A_LDVPMV_IN: 984b8e80941Smrg case V3D_QPU_A_LDVPMD_IN: 985b8e80941Smrg case V3D_QPU_A_LDVPMP: 986b8e80941Smrg case V3D_QPU_A_LDVPMG_IN: 987b8e80941Smrg assert(!instr->alu.add.magic_write); 988b8e80941Smrg break; 989b8e80941Smrg 990b8e80941Smrg case V3D_QPU_A_LDVPMV_OUT: 991b8e80941Smrg case V3D_QPU_A_LDVPMD_OUT: 992b8e80941Smrg case V3D_QPU_A_LDVPMG_OUT: 993b8e80941Smrg assert(!instr->alu.add.magic_write); 994b8e80941Smrg *packed_instr |= VC5_QPU_MA; 995b8e80941Smrg break; 996b8e80941Smrg 997b8e80941Smrg default: 998b8e80941Smrg break; 999b8e80941Smrg } 1000b8e80941Smrg 1001b8e80941Smrg switch (instr->alu.add.op) { 1002b8e80941Smrg case V3D_QPU_A_FADD: 1003b8e80941Smrg case V3D_QPU_A_FADDNF: 1004b8e80941Smrg case V3D_QPU_A_FSUB: 1005b8e80941Smrg case V3D_QPU_A_FMIN: 1006b8e80941Smrg case V3D_QPU_A_FMAX: 1007b8e80941Smrg case V3D_QPU_A_FCMP: { 1008b8e80941Smrg uint32_t output_pack; 1009b8e80941Smrg uint32_t a_unpack; 1010b8e80941Smrg uint32_t b_unpack; 1011b8e80941Smrg 1012b8e80941Smrg if (!v3d_qpu_float32_pack_pack(instr->alu.add.output_pack, 1013b8e80941Smrg &output_pack)) { 1014b8e80941Smrg return false; 1015b8e80941Smrg } 1016b8e80941Smrg opcode |= output_pack << 4; 1017b8e80941Smrg 1018b8e80941Smrg if (!v3d_qpu_float32_unpack_pack(instr->alu.add.a_unpack, 1019b8e80941Smrg &a_unpack)) { 1020b8e80941Smrg return false; 1021b8e80941Smrg } 1022b8e80941Smrg 1023b8e80941Smrg if (!v3d_qpu_float32_unpack_pack(instr->alu.add.b_unpack, 1024b8e80941Smrg &b_unpack)) { 1025b8e80941Smrg return false; 1026b8e80941Smrg } 1027b8e80941Smrg 1028b8e80941Smrg /* These operations with commutative operands are 1029b8e80941Smrg * distinguished by which order their operands come in. 1030b8e80941Smrg */ 1031b8e80941Smrg bool ordering = a_unpack * 8 + mux_a > b_unpack * 8 + mux_b; 1032b8e80941Smrg if (((instr->alu.add.op == V3D_QPU_A_FMIN || 1033b8e80941Smrg instr->alu.add.op == V3D_QPU_A_FADD) && ordering) || 1034b8e80941Smrg ((instr->alu.add.op == V3D_QPU_A_FMAX || 1035b8e80941Smrg instr->alu.add.op == V3D_QPU_A_FADDNF) && !ordering)) { 1036b8e80941Smrg uint32_t temp; 1037b8e80941Smrg 1038b8e80941Smrg temp = a_unpack; 1039b8e80941Smrg a_unpack = b_unpack; 1040b8e80941Smrg b_unpack = temp; 1041b8e80941Smrg 1042b8e80941Smrg temp = mux_a; 1043b8e80941Smrg mux_a = mux_b; 1044b8e80941Smrg mux_b = temp; 1045b8e80941Smrg } 1046b8e80941Smrg 1047b8e80941Smrg opcode |= a_unpack << 2; 1048b8e80941Smrg opcode |= b_unpack << 0; 1049b8e80941Smrg 1050b8e80941Smrg break; 1051b8e80941Smrg } 1052b8e80941Smrg 1053b8e80941Smrg case V3D_QPU_A_VFPACK: { 1054b8e80941Smrg uint32_t a_unpack; 1055b8e80941Smrg uint32_t b_unpack; 1056b8e80941Smrg 1057b8e80941Smrg if (instr->alu.add.a_unpack == V3D_QPU_UNPACK_ABS || 1058b8e80941Smrg instr->alu.add.b_unpack == V3D_QPU_UNPACK_ABS) { 1059b8e80941Smrg return false; 1060b8e80941Smrg } 1061b8e80941Smrg 1062b8e80941Smrg if (!v3d_qpu_float32_unpack_pack(instr->alu.add.a_unpack, 1063b8e80941Smrg &a_unpack)) { 1064b8e80941Smrg return false; 1065b8e80941Smrg } 1066b8e80941Smrg 1067b8e80941Smrg if (!v3d_qpu_float32_unpack_pack(instr->alu.add.b_unpack, 1068b8e80941Smrg &b_unpack)) { 1069b8e80941Smrg return false; 1070b8e80941Smrg } 1071b8e80941Smrg 1072b8e80941Smrg opcode = (opcode & ~(1 << 2)) | (a_unpack << 2); 1073b8e80941Smrg opcode = (opcode & ~(1 << 0)) | (b_unpack << 0); 1074b8e80941Smrg 1075b8e80941Smrg break; 1076b8e80941Smrg } 1077b8e80941Smrg 1078b8e80941Smrg case V3D_QPU_A_FFLOOR: 1079b8e80941Smrg case V3D_QPU_A_FROUND: 1080b8e80941Smrg case V3D_QPU_A_FTRUNC: 1081b8e80941Smrg case V3D_QPU_A_FCEIL: 1082b8e80941Smrg case V3D_QPU_A_FDX: 1083b8e80941Smrg case V3D_QPU_A_FDY: { 1084b8e80941Smrg uint32_t packed; 1085b8e80941Smrg 1086b8e80941Smrg if (!v3d_qpu_float32_pack_pack(instr->alu.add.output_pack, 1087b8e80941Smrg &packed)) { 1088b8e80941Smrg return false; 1089b8e80941Smrg } 1090b8e80941Smrg mux_b |= packed; 1091b8e80941Smrg 1092b8e80941Smrg if (!v3d_qpu_float32_unpack_pack(instr->alu.add.a_unpack, 1093b8e80941Smrg &packed)) { 1094b8e80941Smrg return false; 1095b8e80941Smrg } 1096b8e80941Smrg if (packed == 0) 1097b8e80941Smrg return false; 1098b8e80941Smrg opcode = (opcode & ~(1 << 2)) | packed << 2; 1099b8e80941Smrg break; 1100b8e80941Smrg } 1101b8e80941Smrg 1102b8e80941Smrg case V3D_QPU_A_FTOIN: 1103b8e80941Smrg case V3D_QPU_A_FTOIZ: 1104b8e80941Smrg case V3D_QPU_A_FTOUZ: 1105b8e80941Smrg case V3D_QPU_A_FTOC: 1106b8e80941Smrg if (instr->alu.add.output_pack != V3D_QPU_PACK_NONE) 1107b8e80941Smrg return false; 1108b8e80941Smrg 1109b8e80941Smrg uint32_t packed; 1110b8e80941Smrg if (!v3d_qpu_float32_unpack_pack(instr->alu.add.a_unpack, 1111b8e80941Smrg &packed)) { 1112b8e80941Smrg return false; 1113b8e80941Smrg } 1114b8e80941Smrg if (packed == 0) 1115b8e80941Smrg return false; 1116b8e80941Smrg opcode |= packed << 2; 1117b8e80941Smrg 1118b8e80941Smrg break; 1119b8e80941Smrg 1120b8e80941Smrg case V3D_QPU_A_VFMIN: 1121b8e80941Smrg case V3D_QPU_A_VFMAX: 1122b8e80941Smrg if (instr->alu.add.output_pack != V3D_QPU_PACK_NONE || 1123b8e80941Smrg instr->alu.add.b_unpack != V3D_QPU_UNPACK_NONE) { 1124b8e80941Smrg return false; 1125b8e80941Smrg } 1126b8e80941Smrg 1127b8e80941Smrg if (!v3d_qpu_float16_unpack_pack(instr->alu.add.a_unpack, 1128b8e80941Smrg &packed)) { 1129b8e80941Smrg return false; 1130b8e80941Smrg } 1131b8e80941Smrg opcode |= packed; 1132b8e80941Smrg break; 1133b8e80941Smrg 1134b8e80941Smrg default: 1135b8e80941Smrg if (instr->alu.add.op != V3D_QPU_A_NOP && 1136b8e80941Smrg (instr->alu.add.output_pack != V3D_QPU_PACK_NONE || 1137b8e80941Smrg instr->alu.add.a_unpack != V3D_QPU_UNPACK_NONE || 1138b8e80941Smrg instr->alu.add.b_unpack != V3D_QPU_UNPACK_NONE)) { 1139b8e80941Smrg return false; 1140b8e80941Smrg } 1141b8e80941Smrg break; 1142b8e80941Smrg } 1143b8e80941Smrg 1144b8e80941Smrg *packed_instr |= QPU_SET_FIELD(mux_a, VC5_QPU_ADD_A); 1145b8e80941Smrg *packed_instr |= QPU_SET_FIELD(mux_b, VC5_QPU_ADD_B); 1146b8e80941Smrg *packed_instr |= QPU_SET_FIELD(opcode, VC5_QPU_OP_ADD); 1147b8e80941Smrg *packed_instr |= QPU_SET_FIELD(waddr, V3D_QPU_WADDR_A); 1148b8e80941Smrg if (instr->alu.add.magic_write && !no_magic_write) 1149b8e80941Smrg *packed_instr |= VC5_QPU_MA; 1150b8e80941Smrg 1151b8e80941Smrg return true; 1152b8e80941Smrg} 1153b8e80941Smrg 1154b8e80941Smrgstatic bool 1155b8e80941Smrgv3d_qpu_mul_pack(const struct v3d_device_info *devinfo, 1156b8e80941Smrg const struct v3d_qpu_instr *instr, uint64_t *packed_instr) 1157b8e80941Smrg{ 1158b8e80941Smrg uint32_t mux_a = instr->alu.mul.a; 1159b8e80941Smrg uint32_t mux_b = instr->alu.mul.b; 1160b8e80941Smrg int nsrc = v3d_qpu_mul_op_num_src(instr->alu.mul.op); 1161b8e80941Smrg const struct opcode_desc *desc; 1162b8e80941Smrg 1163b8e80941Smrg for (desc = mul_ops; desc != &mul_ops[ARRAY_SIZE(mul_ops)]; 1164b8e80941Smrg desc++) { 1165b8e80941Smrg if (desc->op == instr->alu.mul.op) 1166b8e80941Smrg break; 1167b8e80941Smrg } 1168b8e80941Smrg if (desc == &mul_ops[ARRAY_SIZE(mul_ops)]) 1169b8e80941Smrg return false; 1170b8e80941Smrg 1171b8e80941Smrg uint32_t opcode = desc->opcode_first; 1172b8e80941Smrg 1173b8e80941Smrg /* Some opcodes have a single valid value for their mux a/b, so set 1174b8e80941Smrg * that here. If mux a/b determine packing, it will be set below. 1175b8e80941Smrg */ 1176b8e80941Smrg if (nsrc < 2) 1177b8e80941Smrg mux_b = ffs(desc->mux_b_mask) - 1; 1178b8e80941Smrg 1179b8e80941Smrg if (nsrc < 1) 1180b8e80941Smrg mux_a = ffs(desc->mux_a_mask) - 1; 1181b8e80941Smrg 1182b8e80941Smrg switch (instr->alu.mul.op) { 1183b8e80941Smrg case V3D_QPU_M_FMUL: { 1184b8e80941Smrg uint32_t packed; 1185b8e80941Smrg 1186b8e80941Smrg if (!v3d_qpu_float32_pack_pack(instr->alu.mul.output_pack, 1187b8e80941Smrg &packed)) { 1188b8e80941Smrg return false; 1189b8e80941Smrg } 1190b8e80941Smrg /* No need for a +1 because desc->opcode_first has a 1 in this 1191b8e80941Smrg * field. 1192b8e80941Smrg */ 1193b8e80941Smrg opcode += packed << 4; 1194b8e80941Smrg 1195b8e80941Smrg if (!v3d_qpu_float32_unpack_pack(instr->alu.mul.a_unpack, 1196b8e80941Smrg &packed)) { 1197b8e80941Smrg return false; 1198b8e80941Smrg } 1199b8e80941Smrg opcode |= packed << 2; 1200b8e80941Smrg 1201b8e80941Smrg if (!v3d_qpu_float32_unpack_pack(instr->alu.mul.b_unpack, 1202b8e80941Smrg &packed)) { 1203b8e80941Smrg return false; 1204b8e80941Smrg } 1205b8e80941Smrg opcode |= packed << 0; 1206b8e80941Smrg break; 1207b8e80941Smrg } 1208b8e80941Smrg 1209b8e80941Smrg case V3D_QPU_M_FMOV: { 1210b8e80941Smrg uint32_t packed; 1211b8e80941Smrg 1212b8e80941Smrg if (!v3d_qpu_float32_pack_pack(instr->alu.mul.output_pack, 1213b8e80941Smrg &packed)) { 1214b8e80941Smrg return false; 1215b8e80941Smrg } 1216b8e80941Smrg opcode |= (packed >> 1) & 1; 1217b8e80941Smrg mux_b = (packed & 1) << 2; 1218b8e80941Smrg 1219b8e80941Smrg if (!v3d_qpu_float32_unpack_pack(instr->alu.mul.a_unpack, 1220b8e80941Smrg &packed)) { 1221b8e80941Smrg return false; 1222b8e80941Smrg } 1223b8e80941Smrg mux_b |= packed; 1224b8e80941Smrg break; 1225b8e80941Smrg } 1226b8e80941Smrg 1227b8e80941Smrg case V3D_QPU_M_VFMUL: { 1228b8e80941Smrg uint32_t packed; 1229b8e80941Smrg 1230b8e80941Smrg if (instr->alu.mul.output_pack != V3D_QPU_PACK_NONE) 1231b8e80941Smrg return false; 1232b8e80941Smrg 1233b8e80941Smrg if (!v3d_qpu_float16_unpack_pack(instr->alu.mul.a_unpack, 1234b8e80941Smrg &packed)) { 1235b8e80941Smrg return false; 1236b8e80941Smrg } 1237b8e80941Smrg if (instr->alu.mul.a_unpack == V3D_QPU_UNPACK_SWAP_16) 1238b8e80941Smrg opcode = 8; 1239b8e80941Smrg else 1240b8e80941Smrg opcode |= (packed + 4) & 7; 1241b8e80941Smrg 1242b8e80941Smrg if (instr->alu.mul.b_unpack != V3D_QPU_UNPACK_NONE) 1243b8e80941Smrg return false; 1244b8e80941Smrg 1245b8e80941Smrg break; 1246b8e80941Smrg } 1247b8e80941Smrg 1248b8e80941Smrg default: 1249b8e80941Smrg break; 1250b8e80941Smrg } 1251b8e80941Smrg 1252b8e80941Smrg *packed_instr |= QPU_SET_FIELD(mux_a, VC5_QPU_MUL_A); 1253b8e80941Smrg *packed_instr |= QPU_SET_FIELD(mux_b, VC5_QPU_MUL_B); 1254b8e80941Smrg 1255b8e80941Smrg *packed_instr |= QPU_SET_FIELD(opcode, VC5_QPU_OP_MUL); 1256b8e80941Smrg *packed_instr |= QPU_SET_FIELD(instr->alu.mul.waddr, V3D_QPU_WADDR_M); 1257b8e80941Smrg if (instr->alu.mul.magic_write) 1258b8e80941Smrg *packed_instr |= VC5_QPU_MM; 1259b8e80941Smrg 1260b8e80941Smrg return true; 1261b8e80941Smrg} 1262b8e80941Smrg 1263b8e80941Smrgstatic bool 1264b8e80941Smrgv3d_qpu_instr_unpack_alu(const struct v3d_device_info *devinfo, 1265b8e80941Smrg uint64_t packed_instr, 1266b8e80941Smrg struct v3d_qpu_instr *instr) 1267b8e80941Smrg{ 1268b8e80941Smrg instr->type = V3D_QPU_INSTR_TYPE_ALU; 1269b8e80941Smrg 1270b8e80941Smrg if (!v3d_qpu_sig_unpack(devinfo, 1271b8e80941Smrg QPU_GET_FIELD(packed_instr, VC5_QPU_SIG), 1272b8e80941Smrg &instr->sig)) 1273b8e80941Smrg return false; 1274b8e80941Smrg 1275b8e80941Smrg uint32_t packed_cond = QPU_GET_FIELD(packed_instr, VC5_QPU_COND); 1276b8e80941Smrg if (v3d_qpu_sig_writes_address(devinfo, &instr->sig)) { 1277b8e80941Smrg instr->sig_addr = packed_cond & ~VC5_QPU_COND_SIG_MAGIC_ADDR; 1278b8e80941Smrg instr->sig_magic = packed_cond & VC5_QPU_COND_SIG_MAGIC_ADDR; 1279b8e80941Smrg 1280b8e80941Smrg instr->flags.ac = V3D_QPU_COND_NONE; 1281b8e80941Smrg instr->flags.mc = V3D_QPU_COND_NONE; 1282b8e80941Smrg instr->flags.apf = V3D_QPU_PF_NONE; 1283b8e80941Smrg instr->flags.mpf = V3D_QPU_PF_NONE; 1284b8e80941Smrg instr->flags.auf = V3D_QPU_UF_NONE; 1285b8e80941Smrg instr->flags.muf = V3D_QPU_UF_NONE; 1286b8e80941Smrg } else { 1287b8e80941Smrg if (!v3d_qpu_flags_unpack(devinfo, packed_cond, &instr->flags)) 1288b8e80941Smrg return false; 1289b8e80941Smrg } 1290b8e80941Smrg 1291b8e80941Smrg instr->raddr_a = QPU_GET_FIELD(packed_instr, VC5_QPU_RADDR_A); 1292b8e80941Smrg instr->raddr_b = QPU_GET_FIELD(packed_instr, VC5_QPU_RADDR_B); 1293b8e80941Smrg 1294b8e80941Smrg if (!v3d_qpu_add_unpack(devinfo, packed_instr, instr)) 1295b8e80941Smrg return false; 1296b8e80941Smrg 1297b8e80941Smrg if (!v3d_qpu_mul_unpack(devinfo, packed_instr, instr)) 1298b8e80941Smrg return false; 1299b8e80941Smrg 1300b8e80941Smrg return true; 1301b8e80941Smrg} 1302b8e80941Smrg 1303b8e80941Smrgstatic bool 1304b8e80941Smrgv3d_qpu_instr_unpack_branch(const struct v3d_device_info *devinfo, 1305b8e80941Smrg uint64_t packed_instr, 1306b8e80941Smrg struct v3d_qpu_instr *instr) 1307b8e80941Smrg{ 1308b8e80941Smrg instr->type = V3D_QPU_INSTR_TYPE_BRANCH; 1309b8e80941Smrg 1310b8e80941Smrg uint32_t cond = QPU_GET_FIELD(packed_instr, VC5_QPU_BRANCH_COND); 1311b8e80941Smrg if (cond == 0) 1312b8e80941Smrg instr->branch.cond = V3D_QPU_BRANCH_COND_ALWAYS; 1313b8e80941Smrg else if (V3D_QPU_BRANCH_COND_A0 + (cond - 2) <= 1314b8e80941Smrg V3D_QPU_BRANCH_COND_ALLNA) 1315b8e80941Smrg instr->branch.cond = V3D_QPU_BRANCH_COND_A0 + (cond - 2); 1316b8e80941Smrg else 1317b8e80941Smrg return false; 1318b8e80941Smrg 1319b8e80941Smrg uint32_t msfign = QPU_GET_FIELD(packed_instr, VC5_QPU_BRANCH_MSFIGN); 1320b8e80941Smrg if (msfign == 3) 1321b8e80941Smrg return false; 1322b8e80941Smrg instr->branch.msfign = msfign; 1323b8e80941Smrg 1324b8e80941Smrg instr->branch.bdi = QPU_GET_FIELD(packed_instr, VC5_QPU_BRANCH_BDI); 1325b8e80941Smrg 1326b8e80941Smrg instr->branch.ub = packed_instr & VC5_QPU_BRANCH_UB; 1327b8e80941Smrg if (instr->branch.ub) { 1328b8e80941Smrg instr->branch.bdu = QPU_GET_FIELD(packed_instr, 1329b8e80941Smrg VC5_QPU_BRANCH_BDU); 1330b8e80941Smrg } 1331b8e80941Smrg 1332b8e80941Smrg instr->branch.raddr_a = QPU_GET_FIELD(packed_instr, 1333b8e80941Smrg VC5_QPU_RADDR_A); 1334b8e80941Smrg 1335b8e80941Smrg instr->branch.offset = 0; 1336b8e80941Smrg 1337b8e80941Smrg instr->branch.offset += 1338b8e80941Smrg QPU_GET_FIELD(packed_instr, 1339b8e80941Smrg VC5_QPU_BRANCH_ADDR_LOW) << 3; 1340b8e80941Smrg 1341b8e80941Smrg instr->branch.offset += 1342b8e80941Smrg QPU_GET_FIELD(packed_instr, 1343b8e80941Smrg VC5_QPU_BRANCH_ADDR_HIGH) << 24; 1344b8e80941Smrg 1345b8e80941Smrg return true; 1346b8e80941Smrg} 1347b8e80941Smrg 1348b8e80941Smrgbool 1349b8e80941Smrgv3d_qpu_instr_unpack(const struct v3d_device_info *devinfo, 1350b8e80941Smrg uint64_t packed_instr, 1351b8e80941Smrg struct v3d_qpu_instr *instr) 1352b8e80941Smrg{ 1353b8e80941Smrg if (QPU_GET_FIELD(packed_instr, VC5_QPU_OP_MUL) != 0) { 1354b8e80941Smrg return v3d_qpu_instr_unpack_alu(devinfo, packed_instr, instr); 1355b8e80941Smrg } else { 1356b8e80941Smrg uint32_t sig = QPU_GET_FIELD(packed_instr, VC5_QPU_SIG); 1357b8e80941Smrg 1358b8e80941Smrg if ((sig & 24) == 16) { 1359b8e80941Smrg return v3d_qpu_instr_unpack_branch(devinfo, packed_instr, 1360b8e80941Smrg instr); 1361b8e80941Smrg } else { 1362b8e80941Smrg return false; 1363b8e80941Smrg } 1364b8e80941Smrg } 1365b8e80941Smrg} 1366b8e80941Smrg 1367b8e80941Smrgstatic bool 1368b8e80941Smrgv3d_qpu_instr_pack_alu(const struct v3d_device_info *devinfo, 1369b8e80941Smrg const struct v3d_qpu_instr *instr, 1370b8e80941Smrg uint64_t *packed_instr) 1371b8e80941Smrg{ 1372b8e80941Smrg uint32_t sig; 1373b8e80941Smrg if (!v3d_qpu_sig_pack(devinfo, &instr->sig, &sig)) 1374b8e80941Smrg return false; 1375b8e80941Smrg *packed_instr |= QPU_SET_FIELD(sig, VC5_QPU_SIG); 1376b8e80941Smrg 1377b8e80941Smrg if (instr->type == V3D_QPU_INSTR_TYPE_ALU) { 1378b8e80941Smrg *packed_instr |= QPU_SET_FIELD(instr->raddr_a, VC5_QPU_RADDR_A); 1379b8e80941Smrg *packed_instr |= QPU_SET_FIELD(instr->raddr_b, VC5_QPU_RADDR_B); 1380b8e80941Smrg 1381b8e80941Smrg if (!v3d_qpu_add_pack(devinfo, instr, packed_instr)) 1382b8e80941Smrg return false; 1383b8e80941Smrg if (!v3d_qpu_mul_pack(devinfo, instr, packed_instr)) 1384b8e80941Smrg return false; 1385b8e80941Smrg 1386b8e80941Smrg uint32_t flags; 1387b8e80941Smrg if (v3d_qpu_sig_writes_address(devinfo, &instr->sig)) { 1388b8e80941Smrg if (instr->flags.ac != V3D_QPU_COND_NONE || 1389b8e80941Smrg instr->flags.mc != V3D_QPU_COND_NONE || 1390b8e80941Smrg instr->flags.apf != V3D_QPU_PF_NONE || 1391b8e80941Smrg instr->flags.mpf != V3D_QPU_PF_NONE || 1392b8e80941Smrg instr->flags.auf != V3D_QPU_UF_NONE || 1393b8e80941Smrg instr->flags.muf != V3D_QPU_UF_NONE) { 1394b8e80941Smrg return false; 1395b8e80941Smrg } 1396b8e80941Smrg 1397b8e80941Smrg flags = instr->sig_addr; 1398b8e80941Smrg if (instr->sig_magic) 1399b8e80941Smrg flags |= VC5_QPU_COND_SIG_MAGIC_ADDR; 1400b8e80941Smrg } else { 1401b8e80941Smrg if (!v3d_qpu_flags_pack(devinfo, &instr->flags, &flags)) 1402b8e80941Smrg return false; 1403b8e80941Smrg } 1404b8e80941Smrg 1405b8e80941Smrg *packed_instr |= QPU_SET_FIELD(flags, VC5_QPU_COND); 1406b8e80941Smrg } else { 1407b8e80941Smrg if (v3d_qpu_sig_writes_address(devinfo, &instr->sig)) 1408b8e80941Smrg return false; 1409b8e80941Smrg } 1410b8e80941Smrg 1411b8e80941Smrg return true; 1412b8e80941Smrg} 1413b8e80941Smrg 1414b8e80941Smrgstatic bool 1415b8e80941Smrgv3d_qpu_instr_pack_branch(const struct v3d_device_info *devinfo, 1416b8e80941Smrg const struct v3d_qpu_instr *instr, 1417b8e80941Smrg uint64_t *packed_instr) 1418b8e80941Smrg{ 1419b8e80941Smrg *packed_instr |= QPU_SET_FIELD(16, VC5_QPU_SIG); 1420b8e80941Smrg 1421b8e80941Smrg if (instr->branch.cond != V3D_QPU_BRANCH_COND_ALWAYS) { 1422b8e80941Smrg *packed_instr |= QPU_SET_FIELD(2 + (instr->branch.cond - 1423b8e80941Smrg V3D_QPU_BRANCH_COND_A0), 1424b8e80941Smrg VC5_QPU_BRANCH_COND); 1425b8e80941Smrg } 1426b8e80941Smrg 1427b8e80941Smrg *packed_instr |= QPU_SET_FIELD(instr->branch.msfign, 1428b8e80941Smrg VC5_QPU_BRANCH_MSFIGN); 1429b8e80941Smrg 1430b8e80941Smrg *packed_instr |= QPU_SET_FIELD(instr->branch.bdi, 1431b8e80941Smrg VC5_QPU_BRANCH_BDI); 1432b8e80941Smrg 1433b8e80941Smrg if (instr->branch.ub) { 1434b8e80941Smrg *packed_instr |= VC5_QPU_BRANCH_UB; 1435b8e80941Smrg *packed_instr |= QPU_SET_FIELD(instr->branch.bdu, 1436b8e80941Smrg VC5_QPU_BRANCH_BDU); 1437b8e80941Smrg } 1438b8e80941Smrg 1439b8e80941Smrg switch (instr->branch.bdi) { 1440b8e80941Smrg case V3D_QPU_BRANCH_DEST_ABS: 1441b8e80941Smrg case V3D_QPU_BRANCH_DEST_REL: 1442b8e80941Smrg *packed_instr |= QPU_SET_FIELD(instr->branch.msfign, 1443b8e80941Smrg VC5_QPU_BRANCH_MSFIGN); 1444b8e80941Smrg 1445b8e80941Smrg *packed_instr |= QPU_SET_FIELD((instr->branch.offset & 1446b8e80941Smrg ~0xff000000) >> 3, 1447b8e80941Smrg VC5_QPU_BRANCH_ADDR_LOW); 1448b8e80941Smrg 1449b8e80941Smrg *packed_instr |= QPU_SET_FIELD(instr->branch.offset >> 24, 1450b8e80941Smrg VC5_QPU_BRANCH_ADDR_HIGH); 1451b8e80941Smrg 1452b8e80941Smrg case V3D_QPU_BRANCH_DEST_REGFILE: 1453b8e80941Smrg *packed_instr |= QPU_SET_FIELD(instr->branch.raddr_a, 1454b8e80941Smrg VC5_QPU_RADDR_A); 1455b8e80941Smrg break; 1456b8e80941Smrg 1457b8e80941Smrg default: 1458b8e80941Smrg break; 1459b8e80941Smrg } 1460b8e80941Smrg 1461b8e80941Smrg return true; 1462b8e80941Smrg} 1463b8e80941Smrg 1464b8e80941Smrgbool 1465b8e80941Smrgv3d_qpu_instr_pack(const struct v3d_device_info *devinfo, 1466b8e80941Smrg const struct v3d_qpu_instr *instr, 1467b8e80941Smrg uint64_t *packed_instr) 1468b8e80941Smrg{ 1469b8e80941Smrg *packed_instr = 0; 1470b8e80941Smrg 1471b8e80941Smrg switch (instr->type) { 1472b8e80941Smrg case V3D_QPU_INSTR_TYPE_ALU: 1473b8e80941Smrg return v3d_qpu_instr_pack_alu(devinfo, instr, packed_instr); 1474b8e80941Smrg case V3D_QPU_INSTR_TYPE_BRANCH: 1475b8e80941Smrg return v3d_qpu_instr_pack_branch(devinfo, instr, packed_instr); 1476b8e80941Smrg default: 1477b8e80941Smrg return false; 1478b8e80941Smrg } 1479b8e80941Smrg} 1480