1/* 2 * Copyright © 2016 Broadcom 3 * 4 * Permission is hereby granted, free of charge, to any person obtaining a 5 * copy of this software and associated documentation files (the "Software"), 6 * to deal in the Software without restriction, including without limitation 7 * the rights to use, copy, modify, merge, publish, distribute, sublicense, 8 * and/or sell copies of the Software, and to permit persons to whom the 9 * Software is furnished to do so, subject to the following conditions: 10 * 11 * The above copyright notice and this permission notice (including the next 12 * paragraph) shall be included in all copies or substantial portions of the 13 * Software. 14 * 15 * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR 16 * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, 17 * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL 18 * THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER 19 * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING 20 * FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS 21 * IN THE SOFTWARE. 22 */ 23 24#include <string.h> 25#include "util/macros.h" 26#include "util/bitscan.h" 27 28#include "broadcom/common/v3d_device_info.h" 29#include "qpu_instr.h" 30 31#ifndef QPU_MASK 32#define QPU_MASK(high, low) ((((uint64_t)1<<((high)-(low)+1))-1)<<(low)) 33/* Using the GNU statement expression extension */ 34#define QPU_SET_FIELD(value, field) \ 35 ({ \ 36 uint64_t fieldval = (uint64_t)(value) << field ## _SHIFT; \ 37 assert((fieldval & ~ field ## _MASK) == 0); \ 38 fieldval & field ## _MASK; \ 39 }) 40 41#define QPU_GET_FIELD(word, field) ((uint32_t)(((word) & field ## _MASK) >> field ## _SHIFT)) 42 43#define QPU_UPDATE_FIELD(inst, value, field) \ 44 (((inst) & ~(field ## _MASK)) | QPU_SET_FIELD(value, field)) 45#endif /* QPU_MASK */ 46 47#define V3D_QPU_OP_MUL_SHIFT 58 48#define V3D_QPU_OP_MUL_MASK QPU_MASK(63, 58) 49 50#define V3D_QPU_SIG_SHIFT 53 51#define V3D_QPU_SIG_MASK QPU_MASK(57, 53) 52 53#define V3D_QPU_COND_SHIFT 46 54#define V3D_QPU_COND_MASK QPU_MASK(52, 46) 55#define V3D_QPU_COND_SIG_MAGIC_ADDR (1 << 6) 56 57#define V3D_QPU_MM QPU_MASK(45, 45) 58#define V3D_QPU_MA QPU_MASK(44, 44) 59 60#define V3D_QPU_WADDR_M_SHIFT 38 61#define V3D_QPU_WADDR_M_MASK QPU_MASK(43, 38) 62 63#define V3D_QPU_BRANCH_ADDR_LOW_SHIFT 35 64#define V3D_QPU_BRANCH_ADDR_LOW_MASK QPU_MASK(55, 35) 65 66#define V3D_QPU_WADDR_A_SHIFT 32 67#define V3D_QPU_WADDR_A_MASK QPU_MASK(37, 32) 68 69#define V3D_QPU_BRANCH_COND_SHIFT 32 70#define V3D_QPU_BRANCH_COND_MASK QPU_MASK(34, 32) 71 72#define V3D_QPU_BRANCH_ADDR_HIGH_SHIFT 24 73#define V3D_QPU_BRANCH_ADDR_HIGH_MASK QPU_MASK(31, 24) 74 75#define V3D_QPU_OP_ADD_SHIFT 24 76#define V3D_QPU_OP_ADD_MASK QPU_MASK(31, 24) 77 78#define V3D_QPU_MUL_B_SHIFT 21 79#define V3D_QPU_MUL_B_MASK QPU_MASK(23, 21) 80 81#define V3D_QPU_BRANCH_MSFIGN_SHIFT 21 82#define V3D_QPU_BRANCH_MSFIGN_MASK QPU_MASK(22, 21) 83 84#define V3D_QPU_MUL_A_SHIFT 18 85#define V3D_QPU_MUL_A_MASK QPU_MASK(20, 18) 86 87#define V3D_QPU_ADD_B_SHIFT 15 88#define V3D_QPU_ADD_B_MASK QPU_MASK(17, 15) 89 90#define V3D_QPU_BRANCH_BDU_SHIFT 15 91#define V3D_QPU_BRANCH_BDU_MASK QPU_MASK(17, 15) 92 93#define V3D_QPU_BRANCH_UB QPU_MASK(14, 14) 94 95#define V3D_QPU_ADD_A_SHIFT 12 96#define V3D_QPU_ADD_A_MASK QPU_MASK(14, 12) 97 98#define V3D_QPU_BRANCH_BDI_SHIFT 12 99#define V3D_QPU_BRANCH_BDI_MASK QPU_MASK(13, 12) 100 101#define V3D_QPU_RADDR_A_SHIFT 6 102#define V3D_QPU_RADDR_A_MASK QPU_MASK(11, 6) 103 104#define V3D_QPU_RADDR_B_SHIFT 0 105#define V3D_QPU_RADDR_B_MASK QPU_MASK(5, 0) 106 107#define THRSW .thrsw = true 108#define LDUNIF .ldunif = true 109#define LDUNIFRF .ldunifrf = true 110#define LDUNIFA .ldunifa = true 111#define LDUNIFARF .ldunifarf = true 112#define LDTMU .ldtmu = true 113#define LDVARY .ldvary = true 114#define LDVPM .ldvpm = true 115#define SMIMM .small_imm = true 116#define LDTLB .ldtlb = true 117#define LDTLBU .ldtlbu = true 118#define UCB .ucb = true 119#define ROT .rotate = true 120#define WRTMUC .wrtmuc = true 121 122static const struct v3d_qpu_sig v33_sig_map[] = { 123 /* MISC R3 R4 R5 */ 124 [0] = { }, 125 [1] = { THRSW, }, 126 [2] = { LDUNIF }, 127 [3] = { THRSW, LDUNIF }, 128 [4] = { LDTMU, }, 129 [5] = { THRSW, LDTMU, }, 130 [6] = { LDTMU, LDUNIF }, 131 [7] = { THRSW, LDTMU, LDUNIF }, 132 [8] = { LDVARY, }, 133 [9] = { THRSW, LDVARY, }, 134 [10] = { LDVARY, LDUNIF }, 135 [11] = { THRSW, LDVARY, LDUNIF }, 136 [12] = { LDVARY, LDTMU, }, 137 [13] = { THRSW, LDVARY, LDTMU, }, 138 [14] = { SMIMM, LDVARY, }, 139 [15] = { SMIMM, }, 140 [16] = { LDTLB, }, 141 [17] = { LDTLBU, }, 142 /* 18-21 reserved */ 143 [22] = { UCB, }, 144 [23] = { ROT, }, 145 [24] = { LDVPM, }, 146 [25] = { THRSW, LDVPM, }, 147 [26] = { LDVPM, LDUNIF }, 148 [27] = { THRSW, LDVPM, LDUNIF }, 149 [28] = { LDVPM, LDTMU, }, 150 [29] = { THRSW, LDVPM, LDTMU, }, 151 [30] = { SMIMM, LDVPM, }, 152 [31] = { SMIMM, }, 153}; 154 155static const struct v3d_qpu_sig v40_sig_map[] = { 156 /* MISC R3 R4 R5 */ 157 [0] = { }, 158 [1] = { THRSW, }, 159 [2] = { LDUNIF }, 160 [3] = { THRSW, LDUNIF }, 161 [4] = { LDTMU, }, 162 [5] = { THRSW, LDTMU, }, 163 [6] = { LDTMU, LDUNIF }, 164 [7] = { THRSW, LDTMU, LDUNIF }, 165 [8] = { LDVARY, }, 166 [9] = { THRSW, LDVARY, }, 167 [10] = { LDVARY, LDUNIF }, 168 [11] = { THRSW, LDVARY, LDUNIF }, 169 /* 12-13 reserved */ 170 [14] = { SMIMM, LDVARY, }, 171 [15] = { SMIMM, }, 172 [16] = { LDTLB, }, 173 [17] = { LDTLBU, }, 174 [18] = { WRTMUC }, 175 [19] = { THRSW, WRTMUC }, 176 [20] = { LDVARY, WRTMUC }, 177 [21] = { THRSW, LDVARY, WRTMUC }, 178 [22] = { UCB, }, 179 [23] = { ROT, }, 180 /* 24-30 reserved */ 181 [31] = { SMIMM, LDTMU, }, 182}; 183 184static const struct v3d_qpu_sig v41_sig_map[] = { 185 /* MISC phys R5 */ 186 [0] = { }, 187 [1] = { THRSW, }, 188 [2] = { LDUNIF }, 189 [3] = { THRSW, LDUNIF }, 190 [4] = { LDTMU, }, 191 [5] = { THRSW, LDTMU, }, 192 [6] = { LDTMU, LDUNIF }, 193 [7] = { THRSW, LDTMU, LDUNIF }, 194 [8] = { LDVARY, }, 195 [9] = { THRSW, LDVARY, }, 196 [10] = { LDVARY, LDUNIF }, 197 [11] = { THRSW, LDVARY, LDUNIF }, 198 [12] = { LDUNIFRF }, 199 [13] = { THRSW, LDUNIFRF }, 200 [14] = { SMIMM, LDVARY, }, 201 [15] = { SMIMM, }, 202 [16] = { LDTLB, }, 203 [17] = { LDTLBU, }, 204 [18] = { WRTMUC }, 205 [19] = { THRSW, WRTMUC }, 206 [20] = { LDVARY, WRTMUC }, 207 [21] = { THRSW, LDVARY, WRTMUC }, 208 [22] = { UCB, }, 209 [23] = { ROT, }, 210 [24] = { LDUNIFA}, 211 [25] = { LDUNIFARF }, 212 /* 26-30 reserved */ 213 [31] = { SMIMM, LDTMU, }, 214}; 215 216bool 217v3d_qpu_sig_unpack(const struct v3d_device_info *devinfo, 218 uint32_t packed_sig, 219 struct v3d_qpu_sig *sig) 220{ 221 if (packed_sig >= ARRAY_SIZE(v33_sig_map)) 222 return false; 223 224 if (devinfo->ver >= 41) 225 *sig = v41_sig_map[packed_sig]; 226 else if (devinfo->ver == 40) 227 *sig = v40_sig_map[packed_sig]; 228 else 229 *sig = v33_sig_map[packed_sig]; 230 231 /* Signals with zeroed unpacked contents after element 0 are reserved. */ 232 return (packed_sig == 0 || 233 memcmp(sig, &v33_sig_map[0], sizeof(*sig)) != 0); 234} 235 236bool 237v3d_qpu_sig_pack(const struct v3d_device_info *devinfo, 238 const struct v3d_qpu_sig *sig, 239 uint32_t *packed_sig) 240{ 241 static const struct v3d_qpu_sig *map; 242 243 if (devinfo->ver >= 41) 244 map = v41_sig_map; 245 else if (devinfo->ver == 40) 246 map = v40_sig_map; 247 else 248 map = v33_sig_map; 249 250 for (int i = 0; i < ARRAY_SIZE(v33_sig_map); i++) { 251 if (memcmp(&map[i], sig, sizeof(*sig)) == 0) { 252 *packed_sig = i; 253 return true; 254 } 255 } 256 257 return false; 258} 259static inline unsigned 260fui( float f ) 261{ 262 union {float f; unsigned ui;} fi; 263 fi.f = f; 264 return fi.ui; 265} 266 267static const uint32_t small_immediates[] = { 268 0, 1, 2, 3, 269 4, 5, 6, 7, 270 8, 9, 10, 11, 271 12, 13, 14, 15, 272 -16, -15, -14, -13, 273 -12, -11, -10, -9, 274 -8, -7, -6, -5, 275 -4, -3, -2, -1, 276 0x3b800000, /* 2.0^-8 */ 277 0x3c000000, /* 2.0^-7 */ 278 0x3c800000, /* 2.0^-6 */ 279 0x3d000000, /* 2.0^-5 */ 280 0x3d800000, /* 2.0^-4 */ 281 0x3e000000, /* 2.0^-3 */ 282 0x3e800000, /* 2.0^-2 */ 283 0x3f000000, /* 2.0^-1 */ 284 0x3f800000, /* 2.0^0 */ 285 0x40000000, /* 2.0^1 */ 286 0x40800000, /* 2.0^2 */ 287 0x41000000, /* 2.0^3 */ 288 0x41800000, /* 2.0^4 */ 289 0x42000000, /* 2.0^5 */ 290 0x42800000, /* 2.0^6 */ 291 0x43000000, /* 2.0^7 */ 292}; 293 294bool 295v3d_qpu_small_imm_unpack(const struct v3d_device_info *devinfo, 296 uint32_t packed_small_immediate, 297 uint32_t *small_immediate) 298{ 299 if (packed_small_immediate >= ARRAY_SIZE(small_immediates)) 300 return false; 301 302 *small_immediate = small_immediates[packed_small_immediate]; 303 return true; 304} 305 306bool 307v3d_qpu_small_imm_pack(const struct v3d_device_info *devinfo, 308 uint32_t value, 309 uint32_t *packed_small_immediate) 310{ 311 STATIC_ASSERT(ARRAY_SIZE(small_immediates) == 48); 312 313 for (int i = 0; i < ARRAY_SIZE(small_immediates); i++) { 314 if (small_immediates[i] == value) { 315 *packed_small_immediate = i; 316 return true; 317 } 318 } 319 320 return false; 321} 322 323bool 324v3d_qpu_flags_unpack(const struct v3d_device_info *devinfo, 325 uint32_t packed_cond, 326 struct v3d_qpu_flags *cond) 327{ 328 static const enum v3d_qpu_cond cond_map[4] = { 329 [0] = V3D_QPU_COND_IFA, 330 [1] = V3D_QPU_COND_IFB, 331 [2] = V3D_QPU_COND_IFNA, 332 [3] = V3D_QPU_COND_IFNB, 333 }; 334 335 cond->ac = V3D_QPU_COND_NONE; 336 cond->mc = V3D_QPU_COND_NONE; 337 cond->apf = V3D_QPU_PF_NONE; 338 cond->mpf = V3D_QPU_PF_NONE; 339 cond->auf = V3D_QPU_UF_NONE; 340 cond->muf = V3D_QPU_UF_NONE; 341 342 if (packed_cond == 0) { 343 return true; 344 } else if (packed_cond >> 2 == 0) { 345 cond->apf = packed_cond & 0x3; 346 } else if (packed_cond >> 4 == 0) { 347 cond->auf = (packed_cond & 0xf) - 4 + V3D_QPU_UF_ANDZ; 348 } else if (packed_cond == 0x10) { 349 return false; 350 } else if (packed_cond >> 2 == 0x4) { 351 cond->mpf = packed_cond & 0x3; 352 } else if (packed_cond >> 4 == 0x1) { 353 cond->muf = (packed_cond & 0xf) - 4 + V3D_QPU_UF_ANDZ; 354 } else if (packed_cond >> 4 == 0x2) { 355 cond->ac = ((packed_cond >> 2) & 0x3) + V3D_QPU_COND_IFA; 356 cond->mpf = packed_cond & 0x3; 357 } else if (packed_cond >> 4 == 0x3) { 358 cond->mc = ((packed_cond >> 2) & 0x3) + V3D_QPU_COND_IFA; 359 cond->apf = packed_cond & 0x3; 360 } else if (packed_cond >> 6) { 361 cond->mc = cond_map[(packed_cond >> 4) & 0x3]; 362 if (((packed_cond >> 2) & 0x3) == 0) { 363 cond->ac = cond_map[packed_cond & 0x3]; 364 } else { 365 cond->auf = (packed_cond & 0xf) - 4 + V3D_QPU_UF_ANDZ; 366 } 367 } 368 369 return true; 370} 371 372bool 373v3d_qpu_flags_pack(const struct v3d_device_info *devinfo, 374 const struct v3d_qpu_flags *cond, 375 uint32_t *packed_cond) 376{ 377#define AC (1 << 0) 378#define MC (1 << 1) 379#define APF (1 << 2) 380#define MPF (1 << 3) 381#define AUF (1 << 4) 382#define MUF (1 << 5) 383 static const struct { 384 uint8_t flags_present; 385 uint8_t bits; 386 } flags_table[] = { 387 { 0, 0 }, 388 { APF, 0 }, 389 { AUF, 0 }, 390 { MPF, (1 << 4) }, 391 { MUF, (1 << 4) }, 392 { AC, (1 << 5) }, 393 { AC | MPF, (1 << 5) }, 394 { MC, (1 << 5) | (1 << 4) }, 395 { MC | APF, (1 << 5) | (1 << 4) }, 396 { MC | AC, (1 << 6) }, 397 { MC | AUF, (1 << 6) }, 398 }; 399 400 uint8_t flags_present = 0; 401 if (cond->ac != V3D_QPU_COND_NONE) 402 flags_present |= AC; 403 if (cond->mc != V3D_QPU_COND_NONE) 404 flags_present |= MC; 405 if (cond->apf != V3D_QPU_PF_NONE) 406 flags_present |= APF; 407 if (cond->mpf != V3D_QPU_PF_NONE) 408 flags_present |= MPF; 409 if (cond->auf != V3D_QPU_UF_NONE) 410 flags_present |= AUF; 411 if (cond->muf != V3D_QPU_UF_NONE) 412 flags_present |= MUF; 413 414 for (int i = 0; i < ARRAY_SIZE(flags_table); i++) { 415 if (flags_table[i].flags_present != flags_present) 416 continue; 417 418 *packed_cond = flags_table[i].bits; 419 420 *packed_cond |= cond->apf; 421 *packed_cond |= cond->mpf; 422 423 if (flags_present & AUF) 424 *packed_cond |= cond->auf - V3D_QPU_UF_ANDZ + 4; 425 if (flags_present & MUF) 426 *packed_cond |= cond->muf - V3D_QPU_UF_ANDZ + 4; 427 428 if (flags_present & AC) 429 *packed_cond |= (cond->ac - V3D_QPU_COND_IFA) << 2; 430 431 if (flags_present & MC) { 432 if (*packed_cond & (1 << 6)) 433 *packed_cond |= (cond->mc - 434 V3D_QPU_COND_IFA) << 4; 435 else 436 *packed_cond |= (cond->mc - 437 V3D_QPU_COND_IFA) << 2; 438 } 439 440 return true; 441 } 442 443 return false; 444} 445 446/* Make a mapping of the table of opcodes in the spec. The opcode is 447 * determined by a combination of the opcode field, and in the case of 0 or 448 * 1-arg opcodes, the mux_b field as well. 449 */ 450#define MUX_MASK(bot, top) (((1 << (top + 1)) - 1) - ((1 << (bot)) - 1)) 451#define ANYMUX MUX_MASK(0, 7) 452 453struct opcode_desc { 454 uint8_t opcode_first; 455 uint8_t opcode_last; 456 uint8_t mux_b_mask; 457 uint8_t mux_a_mask; 458 uint8_t op; 459 460 /* first_ver == 0 if it's the same across all V3D versions. 461 * first_ver == X, last_ver == 0 if it's the same for all V3D versions 462 * starting from X 463 * first_ver == X, last_ver == Y if it's the same for all V3D versions 464 * on the range X through Y 465 */ 466 uint8_t first_ver; 467 uint8_t last_ver; 468}; 469 470static const struct opcode_desc add_ops[] = { 471 /* FADD is FADDNF depending on the order of the mux_a/mux_b. */ 472 { 0, 47, ANYMUX, ANYMUX, V3D_QPU_A_FADD }, 473 { 0, 47, ANYMUX, ANYMUX, V3D_QPU_A_FADDNF }, 474 { 53, 55, ANYMUX, ANYMUX, V3D_QPU_A_VFPACK }, 475 { 56, 56, ANYMUX, ANYMUX, V3D_QPU_A_ADD }, 476 { 57, 59, ANYMUX, ANYMUX, V3D_QPU_A_VFPACK }, 477 { 60, 60, ANYMUX, ANYMUX, V3D_QPU_A_SUB }, 478 { 61, 63, ANYMUX, ANYMUX, V3D_QPU_A_VFPACK }, 479 { 64, 111, ANYMUX, ANYMUX, V3D_QPU_A_FSUB }, 480 { 120, 120, ANYMUX, ANYMUX, V3D_QPU_A_MIN }, 481 { 121, 121, ANYMUX, ANYMUX, V3D_QPU_A_MAX }, 482 { 122, 122, ANYMUX, ANYMUX, V3D_QPU_A_UMIN }, 483 { 123, 123, ANYMUX, ANYMUX, V3D_QPU_A_UMAX }, 484 { 124, 124, ANYMUX, ANYMUX, V3D_QPU_A_SHL }, 485 { 125, 125, ANYMUX, ANYMUX, V3D_QPU_A_SHR }, 486 { 126, 126, ANYMUX, ANYMUX, V3D_QPU_A_ASR }, 487 { 127, 127, ANYMUX, ANYMUX, V3D_QPU_A_ROR }, 488 /* FMIN is instead FMAX depending on the order of the mux_a/mux_b. */ 489 { 128, 175, ANYMUX, ANYMUX, V3D_QPU_A_FMIN }, 490 { 128, 175, ANYMUX, ANYMUX, V3D_QPU_A_FMAX }, 491 { 176, 180, ANYMUX, ANYMUX, V3D_QPU_A_VFMIN }, 492 493 { 181, 181, ANYMUX, ANYMUX, V3D_QPU_A_AND }, 494 { 182, 182, ANYMUX, ANYMUX, V3D_QPU_A_OR }, 495 { 183, 183, ANYMUX, ANYMUX, V3D_QPU_A_XOR }, 496 497 { 184, 184, ANYMUX, ANYMUX, V3D_QPU_A_VADD }, 498 { 185, 185, ANYMUX, ANYMUX, V3D_QPU_A_VSUB }, 499 { 186, 186, 1 << 0, ANYMUX, V3D_QPU_A_NOT }, 500 { 186, 186, 1 << 1, ANYMUX, V3D_QPU_A_NEG }, 501 { 186, 186, 1 << 2, ANYMUX, V3D_QPU_A_FLAPUSH }, 502 { 186, 186, 1 << 3, ANYMUX, V3D_QPU_A_FLBPUSH }, 503 { 186, 186, 1 << 4, ANYMUX, V3D_QPU_A_FLPOP }, 504 { 186, 186, 1 << 5, ANYMUX, V3D_QPU_A_RECIP }, 505 { 186, 186, 1 << 6, ANYMUX, V3D_QPU_A_SETMSF }, 506 { 186, 186, 1 << 7, ANYMUX, V3D_QPU_A_SETREVF }, 507 { 187, 187, 1 << 0, 1 << 0, V3D_QPU_A_NOP, 0 }, 508 { 187, 187, 1 << 0, 1 << 1, V3D_QPU_A_TIDX }, 509 { 187, 187, 1 << 0, 1 << 2, V3D_QPU_A_EIDX }, 510 { 187, 187, 1 << 0, 1 << 3, V3D_QPU_A_LR }, 511 { 187, 187, 1 << 0, 1 << 4, V3D_QPU_A_VFLA }, 512 { 187, 187, 1 << 0, 1 << 5, V3D_QPU_A_VFLNA }, 513 { 187, 187, 1 << 0, 1 << 6, V3D_QPU_A_VFLB }, 514 { 187, 187, 1 << 0, 1 << 7, V3D_QPU_A_VFLNB }, 515 516 { 187, 187, 1 << 1, MUX_MASK(0, 2), V3D_QPU_A_FXCD }, 517 { 187, 187, 1 << 1, 1 << 3, V3D_QPU_A_XCD }, 518 { 187, 187, 1 << 1, MUX_MASK(4, 6), V3D_QPU_A_FYCD }, 519 { 187, 187, 1 << 1, 1 << 7, V3D_QPU_A_YCD }, 520 521 { 187, 187, 1 << 2, 1 << 0, V3D_QPU_A_MSF }, 522 { 187, 187, 1 << 2, 1 << 1, V3D_QPU_A_REVF }, 523 { 187, 187, 1 << 2, 1 << 2, V3D_QPU_A_VDWWT, 33 }, 524 { 187, 187, 1 << 2, 1 << 2, V3D_QPU_A_IID, 40 }, 525 { 187, 187, 1 << 2, 1 << 3, V3D_QPU_A_SAMPID, 40 }, 526 { 187, 187, 1 << 2, 1 << 4, V3D_QPU_A_BARRIERID, 40 }, 527 { 187, 187, 1 << 2, 1 << 5, V3D_QPU_A_TMUWT }, 528 { 187, 187, 1 << 2, 1 << 6, V3D_QPU_A_VPMWT }, 529 { 187, 187, 1 << 2, 1 << 7, V3D_QPU_A_FLAFIRST, 41 }, 530 { 187, 187, 1 << 3, 1 << 0, V3D_QPU_A_FLNAFIRST, 41 }, 531 { 187, 187, 1 << 3, ANYMUX, V3D_QPU_A_VPMSETUP, 33 }, 532 533 { 188, 188, 1 << 0, ANYMUX, V3D_QPU_A_LDVPMV_IN, 40 }, 534 { 188, 188, 1 << 0, ANYMUX, V3D_QPU_A_LDVPMV_OUT, 40 }, 535 { 188, 188, 1 << 1, ANYMUX, V3D_QPU_A_LDVPMD_IN, 40 }, 536 { 188, 188, 1 << 1, ANYMUX, V3D_QPU_A_LDVPMD_OUT, 40 }, 537 { 188, 188, 1 << 2, ANYMUX, V3D_QPU_A_LDVPMP, 40 }, 538 { 188, 188, 1 << 3, ANYMUX, V3D_QPU_A_RSQRT, 41 }, 539 { 188, 188, 1 << 4, ANYMUX, V3D_QPU_A_EXP, 41 }, 540 { 188, 188, 1 << 5, ANYMUX, V3D_QPU_A_LOG, 41 }, 541 { 188, 188, 1 << 6, ANYMUX, V3D_QPU_A_SIN, 41 }, 542 { 188, 188, 1 << 7, ANYMUX, V3D_QPU_A_RSQRT2, 41 }, 543 { 189, 189, ANYMUX, ANYMUX, V3D_QPU_A_LDVPMG_IN, 40 }, 544 { 189, 189, ANYMUX, ANYMUX, V3D_QPU_A_LDVPMG_OUT, 40 }, 545 546 /* FIXME: MORE COMPLICATED */ 547 /* { 190, 191, ANYMUX, ANYMUX, V3D_QPU_A_VFMOVABSNEGNAB }, */ 548 549 { 192, 239, ANYMUX, ANYMUX, V3D_QPU_A_FCMP }, 550 { 240, 244, ANYMUX, ANYMUX, V3D_QPU_A_VFMAX }, 551 552 { 245, 245, MUX_MASK(0, 2), ANYMUX, V3D_QPU_A_FROUND }, 553 { 245, 245, 1 << 3, ANYMUX, V3D_QPU_A_FTOIN }, 554 { 245, 245, MUX_MASK(4, 6), ANYMUX, V3D_QPU_A_FTRUNC }, 555 { 245, 245, 1 << 7, ANYMUX, V3D_QPU_A_FTOIZ }, 556 { 246, 246, MUX_MASK(0, 2), ANYMUX, V3D_QPU_A_FFLOOR }, 557 { 246, 246, 1 << 3, ANYMUX, V3D_QPU_A_FTOUZ }, 558 { 246, 246, MUX_MASK(4, 6), ANYMUX, V3D_QPU_A_FCEIL }, 559 { 246, 246, 1 << 7, ANYMUX, V3D_QPU_A_FTOC }, 560 561 { 247, 247, MUX_MASK(0, 2), ANYMUX, V3D_QPU_A_FDX }, 562 { 247, 247, MUX_MASK(4, 6), ANYMUX, V3D_QPU_A_FDY }, 563 564 /* The stvpms are distinguished by the waddr field. */ 565 { 248, 248, ANYMUX, ANYMUX, V3D_QPU_A_STVPMV }, 566 { 248, 248, ANYMUX, ANYMUX, V3D_QPU_A_STVPMD }, 567 { 248, 248, ANYMUX, ANYMUX, V3D_QPU_A_STVPMP }, 568 569 { 252, 252, MUX_MASK(0, 2), ANYMUX, V3D_QPU_A_ITOF }, 570 { 252, 252, 1 << 3, ANYMUX, V3D_QPU_A_CLZ }, 571 { 252, 252, MUX_MASK(4, 6), ANYMUX, V3D_QPU_A_UTOF }, 572}; 573 574static const struct opcode_desc mul_ops[] = { 575 { 1, 1, ANYMUX, ANYMUX, V3D_QPU_M_ADD }, 576 { 2, 2, ANYMUX, ANYMUX, V3D_QPU_M_SUB }, 577 { 3, 3, ANYMUX, ANYMUX, V3D_QPU_M_UMUL24 }, 578 { 4, 8, ANYMUX, ANYMUX, V3D_QPU_M_VFMUL }, 579 { 9, 9, ANYMUX, ANYMUX, V3D_QPU_M_SMUL24 }, 580 { 10, 10, ANYMUX, ANYMUX, V3D_QPU_M_MULTOP }, 581 { 14, 14, ANYMUX, ANYMUX, V3D_QPU_M_FMOV }, 582 { 15, 15, MUX_MASK(0, 3), ANYMUX, V3D_QPU_M_FMOV }, 583 { 15, 15, 1 << 4, 1 << 0, V3D_QPU_M_NOP, 0 }, 584 { 15, 15, 1 << 7, ANYMUX, V3D_QPU_M_MOV }, 585 { 16, 63, ANYMUX, ANYMUX, V3D_QPU_M_FMUL }, 586}; 587 588/* Returns true if op_desc should be filtered out based on devinfo->ver 589 * against op_desc->first_ver and op_desc->last_ver. Check notes about 590 * first_ver/last_ver on struct opcode_desc comments. 591 */ 592static bool 593opcode_invalid_in_version(const struct v3d_device_info *devinfo, 594 const struct opcode_desc *op_desc) 595{ 596 return (op_desc->first_ver != 0 && devinfo->ver < op_desc->first_ver) || 597 (op_desc->last_ver != 0 && devinfo->ver > op_desc->last_ver); 598} 599 600static const struct opcode_desc * 601lookup_opcode_from_packed(const struct v3d_device_info *devinfo, 602 const struct opcode_desc *opcodes, 603 size_t num_opcodes, uint32_t opcode, 604 uint32_t mux_a, uint32_t mux_b) 605{ 606 for (int i = 0; i < num_opcodes; i++) { 607 const struct opcode_desc *op_desc = &opcodes[i]; 608 609 if (opcode < op_desc->opcode_first || 610 opcode > op_desc->opcode_last) 611 continue; 612 613 if (opcode_invalid_in_version(devinfo, op_desc)) 614 continue; 615 616 if (!(op_desc->mux_b_mask & (1 << mux_b))) 617 continue; 618 619 if (!(op_desc->mux_a_mask & (1 << mux_a))) 620 continue; 621 622 return op_desc; 623 } 624 625 return NULL; 626} 627 628static bool 629v3d_qpu_float32_unpack_unpack(uint32_t packed, 630 enum v3d_qpu_input_unpack *unpacked) 631{ 632 switch (packed) { 633 case 0: 634 *unpacked = V3D_QPU_UNPACK_ABS; 635 return true; 636 case 1: 637 *unpacked = V3D_QPU_UNPACK_NONE; 638 return true; 639 case 2: 640 *unpacked = V3D_QPU_UNPACK_L; 641 return true; 642 case 3: 643 *unpacked = V3D_QPU_UNPACK_H; 644 return true; 645 default: 646 return false; 647 } 648} 649 650static bool 651v3d_qpu_float32_unpack_pack(enum v3d_qpu_input_unpack unpacked, 652 uint32_t *packed) 653{ 654 switch (unpacked) { 655 case V3D_QPU_UNPACK_ABS: 656 *packed = 0; 657 return true; 658 case V3D_QPU_UNPACK_NONE: 659 *packed = 1; 660 return true; 661 case V3D_QPU_UNPACK_L: 662 *packed = 2; 663 return true; 664 case V3D_QPU_UNPACK_H: 665 *packed = 3; 666 return true; 667 default: 668 return false; 669 } 670} 671 672static bool 673v3d_qpu_float16_unpack_unpack(uint32_t packed, 674 enum v3d_qpu_input_unpack *unpacked) 675{ 676 switch (packed) { 677 case 0: 678 *unpacked = V3D_QPU_UNPACK_NONE; 679 return true; 680 case 1: 681 *unpacked = V3D_QPU_UNPACK_REPLICATE_32F_16; 682 return true; 683 case 2: 684 *unpacked = V3D_QPU_UNPACK_REPLICATE_L_16; 685 return true; 686 case 3: 687 *unpacked = V3D_QPU_UNPACK_REPLICATE_H_16; 688 return true; 689 case 4: 690 *unpacked = V3D_QPU_UNPACK_SWAP_16; 691 return true; 692 default: 693 return false; 694 } 695} 696 697static bool 698v3d_qpu_float16_unpack_pack(enum v3d_qpu_input_unpack unpacked, 699 uint32_t *packed) 700{ 701 switch (unpacked) { 702 case V3D_QPU_UNPACK_NONE: 703 *packed = 0; 704 return true; 705 case V3D_QPU_UNPACK_REPLICATE_32F_16: 706 *packed = 1; 707 return true; 708 case V3D_QPU_UNPACK_REPLICATE_L_16: 709 *packed = 2; 710 return true; 711 case V3D_QPU_UNPACK_REPLICATE_H_16: 712 *packed = 3; 713 return true; 714 case V3D_QPU_UNPACK_SWAP_16: 715 *packed = 4; 716 return true; 717 default: 718 return false; 719 } 720} 721 722static bool 723v3d_qpu_float32_pack_pack(enum v3d_qpu_input_unpack unpacked, 724 uint32_t *packed) 725{ 726 switch (unpacked) { 727 case V3D_QPU_PACK_NONE: 728 *packed = 0; 729 return true; 730 case V3D_QPU_PACK_L: 731 *packed = 1; 732 return true; 733 case V3D_QPU_PACK_H: 734 *packed = 2; 735 return true; 736 default: 737 return false; 738 } 739} 740 741static bool 742v3d_qpu_add_unpack(const struct v3d_device_info *devinfo, uint64_t packed_inst, 743 struct v3d_qpu_instr *instr) 744{ 745 uint32_t op = QPU_GET_FIELD(packed_inst, V3D_QPU_OP_ADD); 746 uint32_t mux_a = QPU_GET_FIELD(packed_inst, V3D_QPU_ADD_A); 747 uint32_t mux_b = QPU_GET_FIELD(packed_inst, V3D_QPU_ADD_B); 748 uint32_t waddr = QPU_GET_FIELD(packed_inst, V3D_QPU_WADDR_A); 749 750 uint32_t map_op = op; 751 /* Some big clusters of opcodes are replicated with unpack 752 * flags 753 */ 754 if (map_op >= 249 && map_op <= 251) 755 map_op = (map_op - 249 + 245); 756 if (map_op >= 253 && map_op <= 255) 757 map_op = (map_op - 253 + 245); 758 759 const struct opcode_desc *desc = 760 lookup_opcode_from_packed(devinfo, add_ops, ARRAY_SIZE(add_ops), 761 map_op, mux_a, mux_b); 762 763 if (!desc) 764 return false; 765 766 instr->alu.add.op = desc->op; 767 768 /* FADD/FADDNF and FMIN/FMAX are determined by the orders of the 769 * operands. 770 */ 771 if (((op >> 2) & 3) * 8 + mux_a > (op & 3) * 8 + mux_b) { 772 if (instr->alu.add.op == V3D_QPU_A_FMIN) 773 instr->alu.add.op = V3D_QPU_A_FMAX; 774 if (instr->alu.add.op == V3D_QPU_A_FADD) 775 instr->alu.add.op = V3D_QPU_A_FADDNF; 776 } 777 778 /* Some QPU ops require a bit more than just basic opcode and mux a/b 779 * comparisons to distinguish them. 780 */ 781 switch (instr->alu.add.op) { 782 case V3D_QPU_A_STVPMV: 783 case V3D_QPU_A_STVPMD: 784 case V3D_QPU_A_STVPMP: 785 switch (waddr) { 786 case 0: 787 instr->alu.add.op = V3D_QPU_A_STVPMV; 788 break; 789 case 1: 790 instr->alu.add.op = V3D_QPU_A_STVPMD; 791 break; 792 case 2: 793 instr->alu.add.op = V3D_QPU_A_STVPMP; 794 break; 795 default: 796 return false; 797 } 798 break; 799 default: 800 break; 801 } 802 803 switch (instr->alu.add.op) { 804 case V3D_QPU_A_FADD: 805 case V3D_QPU_A_FADDNF: 806 case V3D_QPU_A_FSUB: 807 case V3D_QPU_A_FMIN: 808 case V3D_QPU_A_FMAX: 809 case V3D_QPU_A_FCMP: 810 case V3D_QPU_A_VFPACK: 811 if (instr->alu.add.op != V3D_QPU_A_VFPACK) 812 instr->alu.add.output_pack = (op >> 4) & 0x3; 813 else 814 instr->alu.add.output_pack = V3D_QPU_PACK_NONE; 815 816 if (!v3d_qpu_float32_unpack_unpack((op >> 2) & 0x3, 817 &instr->alu.add.a_unpack)) { 818 return false; 819 } 820 821 if (!v3d_qpu_float32_unpack_unpack((op >> 0) & 0x3, 822 &instr->alu.add.b_unpack)) { 823 return false; 824 } 825 break; 826 827 case V3D_QPU_A_FFLOOR: 828 case V3D_QPU_A_FROUND: 829 case V3D_QPU_A_FTRUNC: 830 case V3D_QPU_A_FCEIL: 831 case V3D_QPU_A_FDX: 832 case V3D_QPU_A_FDY: 833 instr->alu.add.output_pack = mux_b & 0x3; 834 835 if (!v3d_qpu_float32_unpack_unpack((op >> 2) & 0x3, 836 &instr->alu.add.a_unpack)) { 837 return false; 838 } 839 break; 840 841 case V3D_QPU_A_FTOIN: 842 case V3D_QPU_A_FTOIZ: 843 case V3D_QPU_A_FTOUZ: 844 case V3D_QPU_A_FTOC: 845 instr->alu.add.output_pack = V3D_QPU_PACK_NONE; 846 847 if (!v3d_qpu_float32_unpack_unpack((op >> 2) & 0x3, 848 &instr->alu.add.a_unpack)) { 849 return false; 850 } 851 break; 852 853 case V3D_QPU_A_VFMIN: 854 case V3D_QPU_A_VFMAX: 855 if (!v3d_qpu_float16_unpack_unpack(op & 0x7, 856 &instr->alu.add.a_unpack)) { 857 return false; 858 } 859 860 instr->alu.add.output_pack = V3D_QPU_PACK_NONE; 861 instr->alu.add.b_unpack = V3D_QPU_UNPACK_NONE; 862 break; 863 864 default: 865 instr->alu.add.output_pack = V3D_QPU_PACK_NONE; 866 instr->alu.add.a_unpack = V3D_QPU_UNPACK_NONE; 867 instr->alu.add.b_unpack = V3D_QPU_UNPACK_NONE; 868 break; 869 } 870 871 instr->alu.add.a = mux_a; 872 instr->alu.add.b = mux_b; 873 instr->alu.add.waddr = QPU_GET_FIELD(packed_inst, V3D_QPU_WADDR_A); 874 875 instr->alu.add.magic_write = false; 876 if (packed_inst & V3D_QPU_MA) { 877 switch (instr->alu.add.op) { 878 case V3D_QPU_A_LDVPMV_IN: 879 instr->alu.add.op = V3D_QPU_A_LDVPMV_OUT; 880 break; 881 case V3D_QPU_A_LDVPMD_IN: 882 instr->alu.add.op = V3D_QPU_A_LDVPMD_OUT; 883 break; 884 case V3D_QPU_A_LDVPMG_IN: 885 instr->alu.add.op = V3D_QPU_A_LDVPMG_OUT; 886 break; 887 default: 888 instr->alu.add.magic_write = true; 889 break; 890 } 891 } 892 893 return true; 894} 895 896static bool 897v3d_qpu_mul_unpack(const struct v3d_device_info *devinfo, uint64_t packed_inst, 898 struct v3d_qpu_instr *instr) 899{ 900 uint32_t op = QPU_GET_FIELD(packed_inst, V3D_QPU_OP_MUL); 901 uint32_t mux_a = QPU_GET_FIELD(packed_inst, V3D_QPU_MUL_A); 902 uint32_t mux_b = QPU_GET_FIELD(packed_inst, V3D_QPU_MUL_B); 903 904 { 905 const struct opcode_desc *desc = 906 lookup_opcode_from_packed(devinfo, mul_ops, 907 ARRAY_SIZE(mul_ops), 908 op, mux_a, mux_b); 909 if (!desc) 910 return false; 911 912 instr->alu.mul.op = desc->op; 913 } 914 915 switch (instr->alu.mul.op) { 916 case V3D_QPU_M_FMUL: 917 instr->alu.mul.output_pack = ((op >> 4) & 0x3) - 1; 918 919 if (!v3d_qpu_float32_unpack_unpack((op >> 2) & 0x3, 920 &instr->alu.mul.a_unpack)) { 921 return false; 922 } 923 924 if (!v3d_qpu_float32_unpack_unpack((op >> 0) & 0x3, 925 &instr->alu.mul.b_unpack)) { 926 return false; 927 } 928 929 break; 930 931 case V3D_QPU_M_FMOV: 932 instr->alu.mul.output_pack = (((op & 1) << 1) + 933 ((mux_b >> 2) & 1)); 934 935 if (!v3d_qpu_float32_unpack_unpack(mux_b & 0x3, 936 &instr->alu.mul.a_unpack)) { 937 return false; 938 } 939 940 break; 941 942 case V3D_QPU_M_VFMUL: 943 instr->alu.mul.output_pack = V3D_QPU_PACK_NONE; 944 945 if (!v3d_qpu_float16_unpack_unpack(((op & 0x7) - 4) & 7, 946 &instr->alu.mul.a_unpack)) { 947 return false; 948 } 949 950 instr->alu.mul.b_unpack = V3D_QPU_UNPACK_NONE; 951 952 break; 953 954 default: 955 instr->alu.mul.output_pack = V3D_QPU_PACK_NONE; 956 instr->alu.mul.a_unpack = V3D_QPU_UNPACK_NONE; 957 instr->alu.mul.b_unpack = V3D_QPU_UNPACK_NONE; 958 break; 959 } 960 961 instr->alu.mul.a = mux_a; 962 instr->alu.mul.b = mux_b; 963 instr->alu.mul.waddr = QPU_GET_FIELD(packed_inst, V3D_QPU_WADDR_M); 964 instr->alu.mul.magic_write = packed_inst & V3D_QPU_MM; 965 966 return true; 967} 968 969static const struct opcode_desc * 970lookup_opcode_from_instr(const struct v3d_device_info *devinfo, 971 const struct opcode_desc *opcodes, size_t num_opcodes, 972 uint8_t op) 973{ 974 for (int i = 0; i < num_opcodes; i++) { 975 const struct opcode_desc *op_desc = &opcodes[i]; 976 977 if (op_desc->op != op) 978 continue; 979 980 if (opcode_invalid_in_version(devinfo, op_desc)) 981 continue; 982 983 return op_desc; 984 } 985 986 return NULL; 987} 988 989static bool 990v3d_qpu_add_pack(const struct v3d_device_info *devinfo, 991 const struct v3d_qpu_instr *instr, uint64_t *packed_instr) 992{ 993 uint32_t waddr = instr->alu.add.waddr; 994 uint32_t mux_a = instr->alu.add.a; 995 uint32_t mux_b = instr->alu.add.b; 996 int nsrc = v3d_qpu_add_op_num_src(instr->alu.add.op); 997 const struct opcode_desc *desc = 998 lookup_opcode_from_instr(devinfo, add_ops, ARRAY_SIZE(add_ops), 999 instr->alu.add.op); 1000 1001 if (!desc) 1002 return false; 1003 1004 uint32_t opcode = desc->opcode_first; 1005 1006 /* If an operation doesn't use an arg, its mux values may be used to 1007 * identify the operation type. 1008 */ 1009 if (nsrc < 2) 1010 mux_b = ffs(desc->mux_b_mask) - 1; 1011 1012 if (nsrc < 1) 1013 mux_a = ffs(desc->mux_a_mask) - 1; 1014 1015 bool no_magic_write = false; 1016 1017 switch (instr->alu.add.op) { 1018 case V3D_QPU_A_STVPMV: 1019 waddr = 0; 1020 no_magic_write = true; 1021 break; 1022 case V3D_QPU_A_STVPMD: 1023 waddr = 1; 1024 no_magic_write = true; 1025 break; 1026 case V3D_QPU_A_STVPMP: 1027 waddr = 2; 1028 no_magic_write = true; 1029 break; 1030 1031 case V3D_QPU_A_LDVPMV_IN: 1032 case V3D_QPU_A_LDVPMD_IN: 1033 case V3D_QPU_A_LDVPMP: 1034 case V3D_QPU_A_LDVPMG_IN: 1035 assert(!instr->alu.add.magic_write); 1036 break; 1037 1038 case V3D_QPU_A_LDVPMV_OUT: 1039 case V3D_QPU_A_LDVPMD_OUT: 1040 case V3D_QPU_A_LDVPMG_OUT: 1041 assert(!instr->alu.add.magic_write); 1042 *packed_instr |= V3D_QPU_MA; 1043 break; 1044 1045 default: 1046 break; 1047 } 1048 1049 switch (instr->alu.add.op) { 1050 case V3D_QPU_A_FADD: 1051 case V3D_QPU_A_FADDNF: 1052 case V3D_QPU_A_FSUB: 1053 case V3D_QPU_A_FMIN: 1054 case V3D_QPU_A_FMAX: 1055 case V3D_QPU_A_FCMP: { 1056 uint32_t output_pack; 1057 uint32_t a_unpack; 1058 uint32_t b_unpack; 1059 1060 if (!v3d_qpu_float32_pack_pack(instr->alu.add.output_pack, 1061 &output_pack)) { 1062 return false; 1063 } 1064 opcode |= output_pack << 4; 1065 1066 if (!v3d_qpu_float32_unpack_pack(instr->alu.add.a_unpack, 1067 &a_unpack)) { 1068 return false; 1069 } 1070 1071 if (!v3d_qpu_float32_unpack_pack(instr->alu.add.b_unpack, 1072 &b_unpack)) { 1073 return false; 1074 } 1075 1076 /* These operations with commutative operands are 1077 * distinguished by which order their operands come in. 1078 */ 1079 bool ordering = a_unpack * 8 + mux_a > b_unpack * 8 + mux_b; 1080 if (((instr->alu.add.op == V3D_QPU_A_FMIN || 1081 instr->alu.add.op == V3D_QPU_A_FADD) && ordering) || 1082 ((instr->alu.add.op == V3D_QPU_A_FMAX || 1083 instr->alu.add.op == V3D_QPU_A_FADDNF) && !ordering)) { 1084 uint32_t temp; 1085 1086 temp = a_unpack; 1087 a_unpack = b_unpack; 1088 b_unpack = temp; 1089 1090 temp = mux_a; 1091 mux_a = mux_b; 1092 mux_b = temp; 1093 } 1094 1095 opcode |= a_unpack << 2; 1096 opcode |= b_unpack << 0; 1097 1098 break; 1099 } 1100 1101 case V3D_QPU_A_VFPACK: { 1102 uint32_t a_unpack; 1103 uint32_t b_unpack; 1104 1105 if (instr->alu.add.a_unpack == V3D_QPU_UNPACK_ABS || 1106 instr->alu.add.b_unpack == V3D_QPU_UNPACK_ABS) { 1107 return false; 1108 } 1109 1110 if (!v3d_qpu_float32_unpack_pack(instr->alu.add.a_unpack, 1111 &a_unpack)) { 1112 return false; 1113 } 1114 1115 if (!v3d_qpu_float32_unpack_pack(instr->alu.add.b_unpack, 1116 &b_unpack)) { 1117 return false; 1118 } 1119 1120 opcode = (opcode & ~(1 << 2)) | (a_unpack << 2); 1121 opcode = (opcode & ~(1 << 0)) | (b_unpack << 0); 1122 1123 break; 1124 } 1125 1126 case V3D_QPU_A_FFLOOR: 1127 case V3D_QPU_A_FROUND: 1128 case V3D_QPU_A_FTRUNC: 1129 case V3D_QPU_A_FCEIL: 1130 case V3D_QPU_A_FDX: 1131 case V3D_QPU_A_FDY: { 1132 uint32_t packed; 1133 1134 if (!v3d_qpu_float32_pack_pack(instr->alu.add.output_pack, 1135 &packed)) { 1136 return false; 1137 } 1138 mux_b |= packed; 1139 1140 if (!v3d_qpu_float32_unpack_pack(instr->alu.add.a_unpack, 1141 &packed)) { 1142 return false; 1143 } 1144 if (packed == 0) 1145 return false; 1146 opcode = (opcode & ~(1 << 2)) | packed << 2; 1147 break; 1148 } 1149 1150 case V3D_QPU_A_FTOIN: 1151 case V3D_QPU_A_FTOIZ: 1152 case V3D_QPU_A_FTOUZ: 1153 case V3D_QPU_A_FTOC: 1154 if (instr->alu.add.output_pack != V3D_QPU_PACK_NONE) 1155 return false; 1156 1157 uint32_t packed; 1158 if (!v3d_qpu_float32_unpack_pack(instr->alu.add.a_unpack, 1159 &packed)) { 1160 return false; 1161 } 1162 if (packed == 0) 1163 return false; 1164 opcode |= packed << 2; 1165 1166 break; 1167 1168 case V3D_QPU_A_VFMIN: 1169 case V3D_QPU_A_VFMAX: 1170 if (instr->alu.add.output_pack != V3D_QPU_PACK_NONE || 1171 instr->alu.add.b_unpack != V3D_QPU_UNPACK_NONE) { 1172 return false; 1173 } 1174 1175 if (!v3d_qpu_float16_unpack_pack(instr->alu.add.a_unpack, 1176 &packed)) { 1177 return false; 1178 } 1179 opcode |= packed; 1180 break; 1181 1182 default: 1183 if (instr->alu.add.op != V3D_QPU_A_NOP && 1184 (instr->alu.add.output_pack != V3D_QPU_PACK_NONE || 1185 instr->alu.add.a_unpack != V3D_QPU_UNPACK_NONE || 1186 instr->alu.add.b_unpack != V3D_QPU_UNPACK_NONE)) { 1187 return false; 1188 } 1189 break; 1190 } 1191 1192 *packed_instr |= QPU_SET_FIELD(mux_a, V3D_QPU_ADD_A); 1193 *packed_instr |= QPU_SET_FIELD(mux_b, V3D_QPU_ADD_B); 1194 *packed_instr |= QPU_SET_FIELD(opcode, V3D_QPU_OP_ADD); 1195 *packed_instr |= QPU_SET_FIELD(waddr, V3D_QPU_WADDR_A); 1196 if (instr->alu.add.magic_write && !no_magic_write) 1197 *packed_instr |= V3D_QPU_MA; 1198 1199 return true; 1200} 1201 1202static bool 1203v3d_qpu_mul_pack(const struct v3d_device_info *devinfo, 1204 const struct v3d_qpu_instr *instr, uint64_t *packed_instr) 1205{ 1206 uint32_t mux_a = instr->alu.mul.a; 1207 uint32_t mux_b = instr->alu.mul.b; 1208 int nsrc = v3d_qpu_mul_op_num_src(instr->alu.mul.op); 1209 1210 const struct opcode_desc *desc = 1211 lookup_opcode_from_instr(devinfo, mul_ops, ARRAY_SIZE(mul_ops), 1212 instr->alu.mul.op); 1213 1214 if (!desc) 1215 return false; 1216 1217 uint32_t opcode = desc->opcode_first; 1218 1219 /* Some opcodes have a single valid value for their mux a/b, so set 1220 * that here. If mux a/b determine packing, it will be set below. 1221 */ 1222 if (nsrc < 2) 1223 mux_b = ffs(desc->mux_b_mask) - 1; 1224 1225 if (nsrc < 1) 1226 mux_a = ffs(desc->mux_a_mask) - 1; 1227 1228 switch (instr->alu.mul.op) { 1229 case V3D_QPU_M_FMUL: { 1230 uint32_t packed; 1231 1232 if (!v3d_qpu_float32_pack_pack(instr->alu.mul.output_pack, 1233 &packed)) { 1234 return false; 1235 } 1236 /* No need for a +1 because desc->opcode_first has a 1 in this 1237 * field. 1238 */ 1239 opcode += packed << 4; 1240 1241 if (!v3d_qpu_float32_unpack_pack(instr->alu.mul.a_unpack, 1242 &packed)) { 1243 return false; 1244 } 1245 opcode |= packed << 2; 1246 1247 if (!v3d_qpu_float32_unpack_pack(instr->alu.mul.b_unpack, 1248 &packed)) { 1249 return false; 1250 } 1251 opcode |= packed << 0; 1252 break; 1253 } 1254 1255 case V3D_QPU_M_FMOV: { 1256 uint32_t packed; 1257 1258 if (!v3d_qpu_float32_pack_pack(instr->alu.mul.output_pack, 1259 &packed)) { 1260 return false; 1261 } 1262 opcode |= (packed >> 1) & 1; 1263 mux_b = (packed & 1) << 2; 1264 1265 if (!v3d_qpu_float32_unpack_pack(instr->alu.mul.a_unpack, 1266 &packed)) { 1267 return false; 1268 } 1269 mux_b |= packed; 1270 break; 1271 } 1272 1273 case V3D_QPU_M_VFMUL: { 1274 uint32_t packed; 1275 1276 if (instr->alu.mul.output_pack != V3D_QPU_PACK_NONE) 1277 return false; 1278 1279 if (!v3d_qpu_float16_unpack_pack(instr->alu.mul.a_unpack, 1280 &packed)) { 1281 return false; 1282 } 1283 if (instr->alu.mul.a_unpack == V3D_QPU_UNPACK_SWAP_16) 1284 opcode = 8; 1285 else 1286 opcode |= (packed + 4) & 7; 1287 1288 if (instr->alu.mul.b_unpack != V3D_QPU_UNPACK_NONE) 1289 return false; 1290 1291 break; 1292 } 1293 1294 default: 1295 break; 1296 } 1297 1298 *packed_instr |= QPU_SET_FIELD(mux_a, V3D_QPU_MUL_A); 1299 *packed_instr |= QPU_SET_FIELD(mux_b, V3D_QPU_MUL_B); 1300 1301 *packed_instr |= QPU_SET_FIELD(opcode, V3D_QPU_OP_MUL); 1302 *packed_instr |= QPU_SET_FIELD(instr->alu.mul.waddr, V3D_QPU_WADDR_M); 1303 if (instr->alu.mul.magic_write) 1304 *packed_instr |= V3D_QPU_MM; 1305 1306 return true; 1307} 1308 1309static bool 1310v3d_qpu_instr_unpack_alu(const struct v3d_device_info *devinfo, 1311 uint64_t packed_instr, 1312 struct v3d_qpu_instr *instr) 1313{ 1314 instr->type = V3D_QPU_INSTR_TYPE_ALU; 1315 1316 if (!v3d_qpu_sig_unpack(devinfo, 1317 QPU_GET_FIELD(packed_instr, V3D_QPU_SIG), 1318 &instr->sig)) 1319 return false; 1320 1321 uint32_t packed_cond = QPU_GET_FIELD(packed_instr, V3D_QPU_COND); 1322 if (v3d_qpu_sig_writes_address(devinfo, &instr->sig)) { 1323 instr->sig_addr = packed_cond & ~V3D_QPU_COND_SIG_MAGIC_ADDR; 1324 instr->sig_magic = packed_cond & V3D_QPU_COND_SIG_MAGIC_ADDR; 1325 1326 instr->flags.ac = V3D_QPU_COND_NONE; 1327 instr->flags.mc = V3D_QPU_COND_NONE; 1328 instr->flags.apf = V3D_QPU_PF_NONE; 1329 instr->flags.mpf = V3D_QPU_PF_NONE; 1330 instr->flags.auf = V3D_QPU_UF_NONE; 1331 instr->flags.muf = V3D_QPU_UF_NONE; 1332 } else { 1333 if (!v3d_qpu_flags_unpack(devinfo, packed_cond, &instr->flags)) 1334 return false; 1335 } 1336 1337 instr->raddr_a = QPU_GET_FIELD(packed_instr, V3D_QPU_RADDR_A); 1338 instr->raddr_b = QPU_GET_FIELD(packed_instr, V3D_QPU_RADDR_B); 1339 1340 if (!v3d_qpu_add_unpack(devinfo, packed_instr, instr)) 1341 return false; 1342 1343 if (!v3d_qpu_mul_unpack(devinfo, packed_instr, instr)) 1344 return false; 1345 1346 return true; 1347} 1348 1349static bool 1350v3d_qpu_instr_unpack_branch(const struct v3d_device_info *devinfo, 1351 uint64_t packed_instr, 1352 struct v3d_qpu_instr *instr) 1353{ 1354 instr->type = V3D_QPU_INSTR_TYPE_BRANCH; 1355 1356 uint32_t cond = QPU_GET_FIELD(packed_instr, V3D_QPU_BRANCH_COND); 1357 if (cond == 0) 1358 instr->branch.cond = V3D_QPU_BRANCH_COND_ALWAYS; 1359 else if (V3D_QPU_BRANCH_COND_A0 + (cond - 2) <= 1360 V3D_QPU_BRANCH_COND_ALLNA) 1361 instr->branch.cond = V3D_QPU_BRANCH_COND_A0 + (cond - 2); 1362 else 1363 return false; 1364 1365 uint32_t msfign = QPU_GET_FIELD(packed_instr, V3D_QPU_BRANCH_MSFIGN); 1366 if (msfign == 3) 1367 return false; 1368 instr->branch.msfign = msfign; 1369 1370 instr->branch.bdi = QPU_GET_FIELD(packed_instr, V3D_QPU_BRANCH_BDI); 1371 1372 instr->branch.ub = packed_instr & V3D_QPU_BRANCH_UB; 1373 if (instr->branch.ub) { 1374 instr->branch.bdu = QPU_GET_FIELD(packed_instr, 1375 V3D_QPU_BRANCH_BDU); 1376 } 1377 1378 instr->branch.raddr_a = QPU_GET_FIELD(packed_instr, 1379 V3D_QPU_RADDR_A); 1380 1381 instr->branch.offset = 0; 1382 1383 instr->branch.offset += 1384 QPU_GET_FIELD(packed_instr, 1385 V3D_QPU_BRANCH_ADDR_LOW) << 3; 1386 1387 instr->branch.offset += 1388 QPU_GET_FIELD(packed_instr, 1389 V3D_QPU_BRANCH_ADDR_HIGH) << 24; 1390 1391 return true; 1392} 1393 1394bool 1395v3d_qpu_instr_unpack(const struct v3d_device_info *devinfo, 1396 uint64_t packed_instr, 1397 struct v3d_qpu_instr *instr) 1398{ 1399 if (QPU_GET_FIELD(packed_instr, V3D_QPU_OP_MUL) != 0) { 1400 return v3d_qpu_instr_unpack_alu(devinfo, packed_instr, instr); 1401 } else { 1402 uint32_t sig = QPU_GET_FIELD(packed_instr, V3D_QPU_SIG); 1403 1404 if ((sig & 24) == 16) { 1405 return v3d_qpu_instr_unpack_branch(devinfo, packed_instr, 1406 instr); 1407 } else { 1408 return false; 1409 } 1410 } 1411} 1412 1413static bool 1414v3d_qpu_instr_pack_alu(const struct v3d_device_info *devinfo, 1415 const struct v3d_qpu_instr *instr, 1416 uint64_t *packed_instr) 1417{ 1418 uint32_t sig; 1419 if (!v3d_qpu_sig_pack(devinfo, &instr->sig, &sig)) 1420 return false; 1421 *packed_instr |= QPU_SET_FIELD(sig, V3D_QPU_SIG); 1422 1423 if (instr->type == V3D_QPU_INSTR_TYPE_ALU) { 1424 *packed_instr |= QPU_SET_FIELD(instr->raddr_a, V3D_QPU_RADDR_A); 1425 *packed_instr |= QPU_SET_FIELD(instr->raddr_b, V3D_QPU_RADDR_B); 1426 1427 if (!v3d_qpu_add_pack(devinfo, instr, packed_instr)) 1428 return false; 1429 if (!v3d_qpu_mul_pack(devinfo, instr, packed_instr)) 1430 return false; 1431 1432 uint32_t flags; 1433 if (v3d_qpu_sig_writes_address(devinfo, &instr->sig)) { 1434 if (instr->flags.ac != V3D_QPU_COND_NONE || 1435 instr->flags.mc != V3D_QPU_COND_NONE || 1436 instr->flags.apf != V3D_QPU_PF_NONE || 1437 instr->flags.mpf != V3D_QPU_PF_NONE || 1438 instr->flags.auf != V3D_QPU_UF_NONE || 1439 instr->flags.muf != V3D_QPU_UF_NONE) { 1440 return false; 1441 } 1442 1443 flags = instr->sig_addr; 1444 if (instr->sig_magic) 1445 flags |= V3D_QPU_COND_SIG_MAGIC_ADDR; 1446 } else { 1447 if (!v3d_qpu_flags_pack(devinfo, &instr->flags, &flags)) 1448 return false; 1449 } 1450 1451 *packed_instr |= QPU_SET_FIELD(flags, V3D_QPU_COND); 1452 } else { 1453 if (v3d_qpu_sig_writes_address(devinfo, &instr->sig)) 1454 return false; 1455 } 1456 1457 return true; 1458} 1459 1460static bool 1461v3d_qpu_instr_pack_branch(const struct v3d_device_info *devinfo, 1462 const struct v3d_qpu_instr *instr, 1463 uint64_t *packed_instr) 1464{ 1465 *packed_instr |= QPU_SET_FIELD(16, V3D_QPU_SIG); 1466 1467 if (instr->branch.cond != V3D_QPU_BRANCH_COND_ALWAYS) { 1468 *packed_instr |= QPU_SET_FIELD(2 + (instr->branch.cond - 1469 V3D_QPU_BRANCH_COND_A0), 1470 V3D_QPU_BRANCH_COND); 1471 } 1472 1473 *packed_instr |= QPU_SET_FIELD(instr->branch.msfign, 1474 V3D_QPU_BRANCH_MSFIGN); 1475 1476 *packed_instr |= QPU_SET_FIELD(instr->branch.bdi, 1477 V3D_QPU_BRANCH_BDI); 1478 1479 if (instr->branch.ub) { 1480 *packed_instr |= V3D_QPU_BRANCH_UB; 1481 *packed_instr |= QPU_SET_FIELD(instr->branch.bdu, 1482 V3D_QPU_BRANCH_BDU); 1483 } 1484 1485 switch (instr->branch.bdi) { 1486 case V3D_QPU_BRANCH_DEST_ABS: 1487 case V3D_QPU_BRANCH_DEST_REL: 1488 *packed_instr |= QPU_SET_FIELD(instr->branch.msfign, 1489 V3D_QPU_BRANCH_MSFIGN); 1490 1491 *packed_instr |= QPU_SET_FIELD((instr->branch.offset & 1492 ~0xff000000) >> 3, 1493 V3D_QPU_BRANCH_ADDR_LOW); 1494 1495 *packed_instr |= QPU_SET_FIELD(instr->branch.offset >> 24, 1496 V3D_QPU_BRANCH_ADDR_HIGH); 1497 break; 1498 default: 1499 break; 1500 } 1501 1502 if (instr->branch.bdi == V3D_QPU_BRANCH_DEST_REGFILE || 1503 instr->branch.bdu == V3D_QPU_BRANCH_DEST_REGFILE) { 1504 *packed_instr |= QPU_SET_FIELD(instr->branch.raddr_a, 1505 V3D_QPU_RADDR_A); 1506 } 1507 1508 return true; 1509} 1510 1511bool 1512v3d_qpu_instr_pack(const struct v3d_device_info *devinfo, 1513 const struct v3d_qpu_instr *instr, 1514 uint64_t *packed_instr) 1515{ 1516 *packed_instr = 0; 1517 1518 switch (instr->type) { 1519 case V3D_QPU_INSTR_TYPE_ALU: 1520 return v3d_qpu_instr_pack_alu(devinfo, instr, packed_instr); 1521 case V3D_QPU_INSTR_TYPE_BRANCH: 1522 return v3d_qpu_instr_pack_branch(devinfo, instr, packed_instr); 1523 default: 1524 return false; 1525 } 1526} 1527