1/* 2 * Copyright © 2016 Broadcom 3 * 4 * Permission is hereby granted, free of charge, to any person obtaining a 5 * copy of this software and associated documentation files (the "Software"), 6 * to deal in the Software without restriction, including without limitation 7 * the rights to use, copy, modify, merge, publish, distribute, sublicense, 8 * and/or sell copies of the Software, and to permit persons to whom the 9 * Software is furnished to do so, subject to the following conditions: 10 * 11 * The above copyright notice and this permission notice (including the next 12 * paragraph) shall be included in all copies or substantial portions of the 13 * Software. 14 * 15 * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR 16 * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, 17 * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL 18 * THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER 19 * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING 20 * FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS 21 * IN THE SOFTWARE. 22 */ 23 24#include <string.h> 25#include "util/macros.h" 26 27#include "broadcom/common/v3d_device_info.h" 28#include "qpu_instr.h" 29 30#ifndef QPU_MASK 31#define QPU_MASK(high, low) ((((uint64_t)1<<((high)-(low)+1))-1)<<(low)) 32/* Using the GNU statement expression extension */ 33#define QPU_SET_FIELD(value, field) \ 34 ({ \ 35 uint64_t fieldval = (uint64_t)(value) << field ## _SHIFT; \ 36 assert((fieldval & ~ field ## _MASK) == 0); \ 37 fieldval & field ## _MASK; \ 38 }) 39 40#define QPU_GET_FIELD(word, field) ((uint32_t)(((word) & field ## _MASK) >> field ## _SHIFT)) 41 42#define QPU_UPDATE_FIELD(inst, value, field) \ 43 (((inst) & ~(field ## _MASK)) | QPU_SET_FIELD(value, field)) 44#endif /* QPU_MASK */ 45 46#define VC5_QPU_OP_MUL_SHIFT 58 47#define VC5_QPU_OP_MUL_MASK QPU_MASK(63, 58) 48 49#define VC5_QPU_SIG_SHIFT 53 50#define VC5_QPU_SIG_MASK QPU_MASK(57, 53) 51 52#define VC5_QPU_COND_SHIFT 46 53#define VC5_QPU_COND_MASK QPU_MASK(52, 46) 54#define VC5_QPU_COND_SIG_MAGIC_ADDR (1 << 6) 55 56#define VC5_QPU_MM QPU_MASK(45, 45) 57#define VC5_QPU_MA QPU_MASK(44, 44) 58 59#define V3D_QPU_WADDR_M_SHIFT 38 60#define V3D_QPU_WADDR_M_MASK QPU_MASK(43, 38) 61 62#define VC5_QPU_BRANCH_ADDR_LOW_SHIFT 35 63#define VC5_QPU_BRANCH_ADDR_LOW_MASK QPU_MASK(55, 35) 64 65#define V3D_QPU_WADDR_A_SHIFT 32 66#define V3D_QPU_WADDR_A_MASK QPU_MASK(37, 32) 67 68#define VC5_QPU_BRANCH_COND_SHIFT 32 69#define VC5_QPU_BRANCH_COND_MASK QPU_MASK(34, 32) 70 71#define VC5_QPU_BRANCH_ADDR_HIGH_SHIFT 24 72#define VC5_QPU_BRANCH_ADDR_HIGH_MASK QPU_MASK(31, 24) 73 74#define VC5_QPU_OP_ADD_SHIFT 24 75#define VC5_QPU_OP_ADD_MASK QPU_MASK(31, 24) 76 77#define VC5_QPU_MUL_B_SHIFT 21 78#define VC5_QPU_MUL_B_MASK QPU_MASK(23, 21) 79 80#define VC5_QPU_BRANCH_MSFIGN_SHIFT 21 81#define VC5_QPU_BRANCH_MSFIGN_MASK QPU_MASK(22, 21) 82 83#define VC5_QPU_MUL_A_SHIFT 18 84#define VC5_QPU_MUL_A_MASK QPU_MASK(20, 18) 85 86#define VC5_QPU_ADD_B_SHIFT 15 87#define VC5_QPU_ADD_B_MASK QPU_MASK(17, 15) 88 89#define VC5_QPU_BRANCH_BDU_SHIFT 15 90#define VC5_QPU_BRANCH_BDU_MASK QPU_MASK(17, 15) 91 92#define VC5_QPU_BRANCH_UB QPU_MASK(14, 14) 93 94#define VC5_QPU_ADD_A_SHIFT 12 95#define VC5_QPU_ADD_A_MASK QPU_MASK(14, 12) 96 97#define VC5_QPU_BRANCH_BDI_SHIFT 12 98#define VC5_QPU_BRANCH_BDI_MASK QPU_MASK(13, 12) 99 100#define VC5_QPU_RADDR_A_SHIFT 6 101#define VC5_QPU_RADDR_A_MASK QPU_MASK(11, 6) 102 103#define VC5_QPU_RADDR_B_SHIFT 0 104#define VC5_QPU_RADDR_B_MASK QPU_MASK(5, 0) 105 106#define THRSW .thrsw = true 107#define LDUNIF .ldunif = true 108#define LDUNIFRF .ldunifrf = true 109#define LDUNIFA .ldunifa = true 110#define LDUNIFARF .ldunifarf = true 111#define LDTMU .ldtmu = true 112#define LDVARY .ldvary = true 113#define LDVPM .ldvpm = true 114#define SMIMM .small_imm = true 115#define LDTLB .ldtlb = true 116#define LDTLBU .ldtlbu = true 117#define UCB .ucb = true 118#define ROT .rotate = true 119#define WRTMUC .wrtmuc = true 120 121static const struct v3d_qpu_sig v33_sig_map[] = { 122 /* MISC R3 R4 R5 */ 123 [0] = { }, 124 [1] = { THRSW, }, 125 [2] = { LDUNIF }, 126 [3] = { THRSW, LDUNIF }, 127 [4] = { LDTMU, }, 128 [5] = { THRSW, LDTMU, }, 129 [6] = { LDTMU, LDUNIF }, 130 [7] = { THRSW, LDTMU, LDUNIF }, 131 [8] = { LDVARY, }, 132 [9] = { THRSW, LDVARY, }, 133 [10] = { LDVARY, LDUNIF }, 134 [11] = { THRSW, LDVARY, LDUNIF }, 135 [12] = { LDVARY, LDTMU, }, 136 [13] = { THRSW, LDVARY, LDTMU, }, 137 [14] = { SMIMM, LDVARY, }, 138 [15] = { SMIMM, }, 139 [16] = { LDTLB, }, 140 [17] = { LDTLBU, }, 141 /* 18-21 reserved */ 142 [22] = { UCB, }, 143 [23] = { ROT, }, 144 [24] = { LDVPM, }, 145 [25] = { THRSW, LDVPM, }, 146 [26] = { LDVPM, LDUNIF }, 147 [27] = { THRSW, LDVPM, LDUNIF }, 148 [28] = { LDVPM, LDTMU, }, 149 [29] = { THRSW, LDVPM, LDTMU, }, 150 [30] = { SMIMM, LDVPM, }, 151 [31] = { SMIMM, }, 152}; 153 154static const struct v3d_qpu_sig v40_sig_map[] = { 155 /* MISC R3 R4 R5 */ 156 [0] = { }, 157 [1] = { THRSW, }, 158 [2] = { LDUNIF }, 159 [3] = { THRSW, LDUNIF }, 160 [4] = { LDTMU, }, 161 [5] = { THRSW, LDTMU, }, 162 [6] = { LDTMU, LDUNIF }, 163 [7] = { THRSW, LDTMU, LDUNIF }, 164 [8] = { LDVARY, }, 165 [9] = { THRSW, LDVARY, }, 166 [10] = { LDVARY, LDUNIF }, 167 [11] = { THRSW, LDVARY, LDUNIF }, 168 /* 12-13 reserved */ 169 [14] = { SMIMM, LDVARY, }, 170 [15] = { SMIMM, }, 171 [16] = { LDTLB, }, 172 [17] = { LDTLBU, }, 173 [18] = { WRTMUC }, 174 [19] = { THRSW, WRTMUC }, 175 [20] = { LDVARY, WRTMUC }, 176 [21] = { THRSW, LDVARY, WRTMUC }, 177 [22] = { UCB, }, 178 [23] = { ROT, }, 179 /* 24-30 reserved */ 180 [31] = { SMIMM, LDTMU, }, 181}; 182 183static const struct v3d_qpu_sig v41_sig_map[] = { 184 /* MISC phys R5 */ 185 [0] = { }, 186 [1] = { THRSW, }, 187 [2] = { LDUNIF }, 188 [3] = { THRSW, LDUNIF }, 189 [4] = { LDTMU, }, 190 [5] = { THRSW, LDTMU, }, 191 [6] = { LDTMU, LDUNIF }, 192 [7] = { THRSW, LDTMU, LDUNIF }, 193 [8] = { LDVARY, }, 194 [9] = { THRSW, LDVARY, }, 195 [10] = { LDVARY, LDUNIF }, 196 [11] = { THRSW, LDVARY, LDUNIF }, 197 [12] = { LDUNIFRF }, 198 [13] = { THRSW, LDUNIFRF }, 199 [14] = { SMIMM, LDVARY, }, 200 [15] = { SMIMM, }, 201 [16] = { LDTLB, }, 202 [17] = { LDTLBU, }, 203 [18] = { WRTMUC }, 204 [19] = { THRSW, WRTMUC }, 205 [20] = { LDVARY, WRTMUC }, 206 [21] = { THRSW, LDVARY, WRTMUC }, 207 [22] = { UCB, }, 208 [23] = { ROT, }, 209 /* 24-30 reserved */ 210 [24] = { LDUNIFA}, 211 [25] = { LDUNIFARF }, 212 [31] = { SMIMM, LDTMU, }, 213}; 214 215bool 216v3d_qpu_sig_unpack(const struct v3d_device_info *devinfo, 217 uint32_t packed_sig, 218 struct v3d_qpu_sig *sig) 219{ 220 if (packed_sig >= ARRAY_SIZE(v33_sig_map)) 221 return false; 222 223 if (devinfo->ver >= 41) 224 *sig = v41_sig_map[packed_sig]; 225 else if (devinfo->ver == 40) 226 *sig = v40_sig_map[packed_sig]; 227 else 228 *sig = v33_sig_map[packed_sig]; 229 230 /* Signals with zeroed unpacked contents after element 0 are reserved. */ 231 return (packed_sig == 0 || 232 memcmp(sig, &v33_sig_map[0], sizeof(*sig)) != 0); 233} 234 235bool 236v3d_qpu_sig_pack(const struct v3d_device_info *devinfo, 237 const struct v3d_qpu_sig *sig, 238 uint32_t *packed_sig) 239{ 240 static const struct v3d_qpu_sig *map; 241 242 if (devinfo->ver >= 41) 243 map = v41_sig_map; 244 else if (devinfo->ver == 40) 245 map = v40_sig_map; 246 else 247 map = v33_sig_map; 248 249 for (int i = 0; i < ARRAY_SIZE(v33_sig_map); i++) { 250 if (memcmp(&map[i], sig, sizeof(*sig)) == 0) { 251 *packed_sig = i; 252 return true; 253 } 254 } 255 256 return false; 257} 258static inline unsigned 259fui( float f ) 260{ 261 union {float f; unsigned ui;} fi; 262 fi.f = f; 263 return fi.ui; 264} 265 266static const uint32_t small_immediates[] = { 267 0, 1, 2, 3, 268 4, 5, 6, 7, 269 8, 9, 10, 11, 270 12, 13, 14, 15, 271 -16, -15, -14, -13, 272 -12, -11, -10, -9, 273 -8, -7, -6, -5, 274 -4, -3, -2, -1, 275 0x3b800000, /* 2.0^-8 */ 276 0x3c000000, /* 2.0^-7 */ 277 0x3c800000, /* 2.0^-6 */ 278 0x3d000000, /* 2.0^-5 */ 279 0x3d800000, /* 2.0^-4 */ 280 0x3e000000, /* 2.0^-3 */ 281 0x3e800000, /* 2.0^-2 */ 282 0x3f000000, /* 2.0^-1 */ 283 0x3f800000, /* 2.0^0 */ 284 0x40000000, /* 2.0^1 */ 285 0x40800000, /* 2.0^2 */ 286 0x41000000, /* 2.0^3 */ 287 0x41800000, /* 2.0^4 */ 288 0x42000000, /* 2.0^5 */ 289 0x42800000, /* 2.0^6 */ 290 0x43000000, /* 2.0^7 */ 291}; 292 293bool 294v3d_qpu_small_imm_unpack(const struct v3d_device_info *devinfo, 295 uint32_t packed_small_immediate, 296 uint32_t *small_immediate) 297{ 298 if (packed_small_immediate >= ARRAY_SIZE(small_immediates)) 299 return false; 300 301 *small_immediate = small_immediates[packed_small_immediate]; 302 return true; 303} 304 305bool 306v3d_qpu_small_imm_pack(const struct v3d_device_info *devinfo, 307 uint32_t value, 308 uint32_t *packed_small_immediate) 309{ 310 STATIC_ASSERT(ARRAY_SIZE(small_immediates) == 48); 311 312 for (int i = 0; i < ARRAY_SIZE(small_immediates); i++) { 313 if (small_immediates[i] == value) { 314 *packed_small_immediate = i; 315 return true; 316 } 317 } 318 319 return false; 320} 321 322bool 323v3d_qpu_flags_unpack(const struct v3d_device_info *devinfo, 324 uint32_t packed_cond, 325 struct v3d_qpu_flags *cond) 326{ 327 static const enum v3d_qpu_cond cond_map[4] = { 328 [0] = V3D_QPU_COND_IFA, 329 [1] = V3D_QPU_COND_IFB, 330 [2] = V3D_QPU_COND_IFNA, 331 [3] = V3D_QPU_COND_IFNB, 332 }; 333 334 cond->ac = V3D_QPU_COND_NONE; 335 cond->mc = V3D_QPU_COND_NONE; 336 cond->apf = V3D_QPU_PF_NONE; 337 cond->mpf = V3D_QPU_PF_NONE; 338 cond->auf = V3D_QPU_UF_NONE; 339 cond->muf = V3D_QPU_UF_NONE; 340 341 if (packed_cond == 0) { 342 return true; 343 } else if (packed_cond >> 2 == 0) { 344 cond->apf = packed_cond & 0x3; 345 } else if (packed_cond >> 4 == 0) { 346 cond->auf = (packed_cond & 0xf) - 4 + V3D_QPU_UF_ANDZ; 347 } else if (packed_cond == 0x10) { 348 return false; 349 } else if (packed_cond >> 2 == 0x4) { 350 cond->mpf = packed_cond & 0x3; 351 } else if (packed_cond >> 4 == 0x1) { 352 cond->muf = (packed_cond & 0xf) - 4 + V3D_QPU_UF_ANDZ; 353 } else if (packed_cond >> 4 == 0x2) { 354 cond->ac = ((packed_cond >> 2) & 0x3) + V3D_QPU_COND_IFA; 355 cond->mpf = packed_cond & 0x3; 356 } else if (packed_cond >> 4 == 0x3) { 357 cond->mc = ((packed_cond >> 2) & 0x3) + V3D_QPU_COND_IFA; 358 cond->apf = packed_cond & 0x3; 359 } else if (packed_cond >> 6) { 360 cond->mc = cond_map[(packed_cond >> 4) & 0x3]; 361 if (((packed_cond >> 2) & 0x3) == 0) { 362 cond->ac = cond_map[packed_cond & 0x3]; 363 } else { 364 cond->auf = (packed_cond & 0xf) - 4 + V3D_QPU_UF_ANDZ; 365 } 366 } 367 368 return true; 369} 370 371bool 372v3d_qpu_flags_pack(const struct v3d_device_info *devinfo, 373 const struct v3d_qpu_flags *cond, 374 uint32_t *packed_cond) 375{ 376#define AC (1 << 0) 377#define MC (1 << 1) 378#define APF (1 << 2) 379#define MPF (1 << 3) 380#define AUF (1 << 4) 381#define MUF (1 << 5) 382 static const struct { 383 uint8_t flags_present; 384 uint8_t bits; 385 } flags_table[] = { 386 { 0, 0 }, 387 { APF, 0 }, 388 { AUF, 0 }, 389 { MPF, (1 << 4) }, 390 { MUF, (1 << 4) }, 391 { AC, (1 << 5) }, 392 { AC | MPF, (1 << 5) }, 393 { MC, (1 << 5) | (1 << 4) }, 394 { MC | APF, (1 << 5) | (1 << 4) }, 395 { MC | AC, (1 << 6) }, 396 { MC | AUF, (1 << 6) }, 397 }; 398 399 uint8_t flags_present = 0; 400 if (cond->ac != V3D_QPU_COND_NONE) 401 flags_present |= AC; 402 if (cond->mc != V3D_QPU_COND_NONE) 403 flags_present |= MC; 404 if (cond->apf != V3D_QPU_PF_NONE) 405 flags_present |= APF; 406 if (cond->mpf != V3D_QPU_PF_NONE) 407 flags_present |= MPF; 408 if (cond->auf != V3D_QPU_UF_NONE) 409 flags_present |= AUF; 410 if (cond->muf != V3D_QPU_UF_NONE) 411 flags_present |= MUF; 412 413 for (int i = 0; i < ARRAY_SIZE(flags_table); i++) { 414 if (flags_table[i].flags_present != flags_present) 415 continue; 416 417 *packed_cond = flags_table[i].bits; 418 419 *packed_cond |= cond->apf; 420 *packed_cond |= cond->mpf; 421 422 if (flags_present & AUF) 423 *packed_cond |= cond->auf - V3D_QPU_UF_ANDZ + 4; 424 if (flags_present & MUF) 425 *packed_cond |= cond->muf - V3D_QPU_UF_ANDZ + 4; 426 427 if (flags_present & AC) 428 *packed_cond |= (cond->ac - V3D_QPU_COND_IFA) << 2; 429 430 if (flags_present & MC) { 431 if (*packed_cond & (1 << 6)) 432 *packed_cond |= (cond->mc - 433 V3D_QPU_COND_IFA) << 4; 434 else 435 *packed_cond |= (cond->mc - 436 V3D_QPU_COND_IFA) << 2; 437 } 438 439 return true; 440 } 441 442 return false; 443} 444 445/* Make a mapping of the table of opcodes in the spec. The opcode is 446 * determined by a combination of the opcode field, and in the case of 0 or 447 * 1-arg opcodes, the mux_b field as well. 448 */ 449#define MUX_MASK(bot, top) (((1 << (top + 1)) - 1) - ((1 << (bot)) - 1)) 450#define ANYMUX MUX_MASK(0, 7) 451 452struct opcode_desc { 453 uint8_t opcode_first; 454 uint8_t opcode_last; 455 uint8_t mux_b_mask; 456 uint8_t mux_a_mask; 457 uint8_t op; 458 /* 0 if it's the same across V3D versions, or a specific V3D version. */ 459 uint8_t ver; 460}; 461 462static const struct opcode_desc add_ops[] = { 463 /* FADD is FADDNF depending on the order of the mux_a/mux_b. */ 464 { 0, 47, ANYMUX, ANYMUX, V3D_QPU_A_FADD }, 465 { 0, 47, ANYMUX, ANYMUX, V3D_QPU_A_FADDNF }, 466 { 53, 55, ANYMUX, ANYMUX, V3D_QPU_A_VFPACK }, 467 { 56, 56, ANYMUX, ANYMUX, V3D_QPU_A_ADD }, 468 { 57, 59, ANYMUX, ANYMUX, V3D_QPU_A_VFPACK }, 469 { 60, 60, ANYMUX, ANYMUX, V3D_QPU_A_SUB }, 470 { 61, 63, ANYMUX, ANYMUX, V3D_QPU_A_VFPACK }, 471 { 64, 111, ANYMUX, ANYMUX, V3D_QPU_A_FSUB }, 472 { 120, 120, ANYMUX, ANYMUX, V3D_QPU_A_MIN }, 473 { 121, 121, ANYMUX, ANYMUX, V3D_QPU_A_MAX }, 474 { 122, 122, ANYMUX, ANYMUX, V3D_QPU_A_UMIN }, 475 { 123, 123, ANYMUX, ANYMUX, V3D_QPU_A_UMAX }, 476 { 124, 124, ANYMUX, ANYMUX, V3D_QPU_A_SHL }, 477 { 125, 125, ANYMUX, ANYMUX, V3D_QPU_A_SHR }, 478 { 126, 126, ANYMUX, ANYMUX, V3D_QPU_A_ASR }, 479 { 127, 127, ANYMUX, ANYMUX, V3D_QPU_A_ROR }, 480 /* FMIN is instead FMAX depending on the order of the mux_a/mux_b. */ 481 { 128, 175, ANYMUX, ANYMUX, V3D_QPU_A_FMIN }, 482 { 128, 175, ANYMUX, ANYMUX, V3D_QPU_A_FMAX }, 483 { 176, 180, ANYMUX, ANYMUX, V3D_QPU_A_VFMIN }, 484 485 { 181, 181, ANYMUX, ANYMUX, V3D_QPU_A_AND }, 486 { 182, 182, ANYMUX, ANYMUX, V3D_QPU_A_OR }, 487 { 183, 183, ANYMUX, ANYMUX, V3D_QPU_A_XOR }, 488 489 { 184, 184, ANYMUX, ANYMUX, V3D_QPU_A_VADD }, 490 { 185, 185, ANYMUX, ANYMUX, V3D_QPU_A_VSUB }, 491 { 186, 186, 1 << 0, ANYMUX, V3D_QPU_A_NOT }, 492 { 186, 186, 1 << 1, ANYMUX, V3D_QPU_A_NEG }, 493 { 186, 186, 1 << 2, ANYMUX, V3D_QPU_A_FLAPUSH }, 494 { 186, 186, 1 << 3, ANYMUX, V3D_QPU_A_FLBPUSH }, 495 { 186, 186, 1 << 4, ANYMUX, V3D_QPU_A_FLPOP }, 496 { 186, 186, 1 << 5, ANYMUX, V3D_QPU_A_RECIP }, 497 { 186, 186, 1 << 6, ANYMUX, V3D_QPU_A_SETMSF }, 498 { 186, 186, 1 << 7, ANYMUX, V3D_QPU_A_SETREVF }, 499 { 187, 187, 1 << 0, 1 << 0, V3D_QPU_A_NOP, 0 }, 500 { 187, 187, 1 << 0, 1 << 1, V3D_QPU_A_TIDX }, 501 { 187, 187, 1 << 0, 1 << 2, V3D_QPU_A_EIDX }, 502 { 187, 187, 1 << 0, 1 << 3, V3D_QPU_A_LR }, 503 { 187, 187, 1 << 0, 1 << 4, V3D_QPU_A_VFLA }, 504 { 187, 187, 1 << 0, 1 << 5, V3D_QPU_A_VFLNA }, 505 { 187, 187, 1 << 0, 1 << 6, V3D_QPU_A_VFLB }, 506 { 187, 187, 1 << 0, 1 << 7, V3D_QPU_A_VFLNB }, 507 508 { 187, 187, 1 << 1, MUX_MASK(0, 2), V3D_QPU_A_FXCD }, 509 { 187, 187, 1 << 1, 1 << 3, V3D_QPU_A_XCD }, 510 { 187, 187, 1 << 1, MUX_MASK(4, 6), V3D_QPU_A_FYCD }, 511 { 187, 187, 1 << 1, 1 << 7, V3D_QPU_A_YCD }, 512 513 { 187, 187, 1 << 2, 1 << 0, V3D_QPU_A_MSF }, 514 { 187, 187, 1 << 2, 1 << 1, V3D_QPU_A_REVF }, 515 { 187, 187, 1 << 2, 1 << 2, V3D_QPU_A_VDWWT, 33 }, 516 { 187, 187, 1 << 2, 1 << 2, V3D_QPU_A_IID, 40 }, 517 { 187, 187, 1 << 2, 1 << 3, V3D_QPU_A_SAMPID, 40 }, 518 { 187, 187, 1 << 2, 1 << 4, V3D_QPU_A_BARRIERID, 40 }, 519 { 187, 187, 1 << 2, 1 << 5, V3D_QPU_A_TMUWT }, 520 { 187, 187, 1 << 2, 1 << 6, V3D_QPU_A_VPMWT }, 521 522 { 187, 187, 1 << 3, ANYMUX, V3D_QPU_A_VPMSETUP, 33 }, 523 { 188, 188, 1 << 0, ANYMUX, V3D_QPU_A_LDVPMV_IN, 40 }, 524 { 188, 188, 1 << 1, ANYMUX, V3D_QPU_A_LDVPMD_IN, 40 }, 525 { 188, 188, 1 << 2, ANYMUX, V3D_QPU_A_LDVPMP, 40 }, 526 { 188, 188, 1 << 3, ANYMUX, V3D_QPU_A_RSQRT, 41 }, 527 { 188, 188, 1 << 4, ANYMUX, V3D_QPU_A_EXP, 41 }, 528 { 188, 188, 1 << 5, ANYMUX, V3D_QPU_A_LOG, 41 }, 529 { 188, 188, 1 << 6, ANYMUX, V3D_QPU_A_SIN, 41 }, 530 { 188, 188, 1 << 7, ANYMUX, V3D_QPU_A_RSQRT2, 41 }, 531 { 189, 189, ANYMUX, ANYMUX, V3D_QPU_A_LDVPMG_IN, 40 }, 532 533 /* FIXME: MORE COMPLICATED */ 534 /* { 190, 191, ANYMUX, ANYMUX, V3D_QPU_A_VFMOVABSNEGNAB }, */ 535 536 { 192, 239, ANYMUX, ANYMUX, V3D_QPU_A_FCMP }, 537 { 240, 244, ANYMUX, ANYMUX, V3D_QPU_A_VFMAX }, 538 539 { 245, 245, MUX_MASK(0, 2), ANYMUX, V3D_QPU_A_FROUND }, 540 { 245, 245, 1 << 3, ANYMUX, V3D_QPU_A_FTOIN }, 541 { 245, 245, MUX_MASK(4, 6), ANYMUX, V3D_QPU_A_FTRUNC }, 542 { 245, 245, 1 << 7, ANYMUX, V3D_QPU_A_FTOIZ }, 543 { 246, 246, MUX_MASK(0, 2), ANYMUX, V3D_QPU_A_FFLOOR }, 544 { 246, 246, 1 << 3, ANYMUX, V3D_QPU_A_FTOUZ }, 545 { 246, 246, MUX_MASK(4, 6), ANYMUX, V3D_QPU_A_FCEIL }, 546 { 246, 246, 1 << 7, ANYMUX, V3D_QPU_A_FTOC }, 547 548 { 247, 247, MUX_MASK(0, 2), ANYMUX, V3D_QPU_A_FDX }, 549 { 247, 247, MUX_MASK(4, 6), ANYMUX, V3D_QPU_A_FDY }, 550 551 /* The stvpms are distinguished by the waddr field. */ 552 { 248, 248, ANYMUX, ANYMUX, V3D_QPU_A_STVPMV }, 553 { 248, 248, ANYMUX, ANYMUX, V3D_QPU_A_STVPMD }, 554 { 248, 248, ANYMUX, ANYMUX, V3D_QPU_A_STVPMP }, 555 556 { 252, 252, MUX_MASK(0, 2), ANYMUX, V3D_QPU_A_ITOF }, 557 { 252, 252, 1 << 3, ANYMUX, V3D_QPU_A_CLZ }, 558 { 252, 252, MUX_MASK(4, 6), ANYMUX, V3D_QPU_A_UTOF }, 559}; 560 561static const struct opcode_desc mul_ops[] = { 562 { 1, 1, ANYMUX, ANYMUX, V3D_QPU_M_ADD }, 563 { 2, 2, ANYMUX, ANYMUX, V3D_QPU_M_SUB }, 564 { 3, 3, ANYMUX, ANYMUX, V3D_QPU_M_UMUL24 }, 565 { 4, 8, ANYMUX, ANYMUX, V3D_QPU_M_VFMUL }, 566 { 9, 9, ANYMUX, ANYMUX, V3D_QPU_M_SMUL24 }, 567 { 10, 10, ANYMUX, ANYMUX, V3D_QPU_M_MULTOP }, 568 { 14, 14, ANYMUX, ANYMUX, V3D_QPU_M_FMOV }, 569 { 15, 15, MUX_MASK(0, 3), ANYMUX, V3D_QPU_M_FMOV }, 570 { 15, 15, 1 << 4, 1 << 0, V3D_QPU_M_NOP, 0 }, 571 { 15, 15, 1 << 7, ANYMUX, V3D_QPU_M_MOV }, 572 { 16, 63, ANYMUX, ANYMUX, V3D_QPU_M_FMUL }, 573}; 574 575static const struct opcode_desc * 576lookup_opcode(const struct opcode_desc *opcodes, size_t num_opcodes, 577 uint32_t opcode, uint32_t mux_a, uint32_t mux_b) 578{ 579 for (int i = 0; i < num_opcodes; i++) { 580 const struct opcode_desc *op_desc = &opcodes[i]; 581 582 if (opcode < op_desc->opcode_first || 583 opcode > op_desc->opcode_last) 584 continue; 585 586 if (!(op_desc->mux_b_mask & (1 << mux_b))) 587 continue; 588 589 if (!(op_desc->mux_a_mask & (1 << mux_a))) 590 continue; 591 592 return op_desc; 593 } 594 595 return NULL; 596} 597 598static bool 599v3d_qpu_float32_unpack_unpack(uint32_t packed, 600 enum v3d_qpu_input_unpack *unpacked) 601{ 602 switch (packed) { 603 case 0: 604 *unpacked = V3D_QPU_UNPACK_ABS; 605 return true; 606 case 1: 607 *unpacked = V3D_QPU_UNPACK_NONE; 608 return true; 609 case 2: 610 *unpacked = V3D_QPU_UNPACK_L; 611 return true; 612 case 3: 613 *unpacked = V3D_QPU_UNPACK_H; 614 return true; 615 default: 616 return false; 617 } 618} 619 620static bool 621v3d_qpu_float32_unpack_pack(enum v3d_qpu_input_unpack unpacked, 622 uint32_t *packed) 623{ 624 switch (unpacked) { 625 case V3D_QPU_UNPACK_ABS: 626 *packed = 0; 627 return true; 628 case V3D_QPU_UNPACK_NONE: 629 *packed = 1; 630 return true; 631 case V3D_QPU_UNPACK_L: 632 *packed = 2; 633 return true; 634 case V3D_QPU_UNPACK_H: 635 *packed = 3; 636 return true; 637 default: 638 return false; 639 } 640} 641 642static bool 643v3d_qpu_float16_unpack_unpack(uint32_t packed, 644 enum v3d_qpu_input_unpack *unpacked) 645{ 646 switch (packed) { 647 case 0: 648 *unpacked = V3D_QPU_UNPACK_NONE; 649 return true; 650 case 1: 651 *unpacked = V3D_QPU_UNPACK_REPLICATE_32F_16; 652 return true; 653 case 2: 654 *unpacked = V3D_QPU_UNPACK_REPLICATE_L_16; 655 return true; 656 case 3: 657 *unpacked = V3D_QPU_UNPACK_REPLICATE_H_16; 658 return true; 659 case 4: 660 *unpacked = V3D_QPU_UNPACK_SWAP_16; 661 return true; 662 default: 663 return false; 664 } 665} 666 667static bool 668v3d_qpu_float16_unpack_pack(enum v3d_qpu_input_unpack unpacked, 669 uint32_t *packed) 670{ 671 switch (unpacked) { 672 case V3D_QPU_UNPACK_NONE: 673 *packed = 0; 674 return true; 675 case V3D_QPU_UNPACK_REPLICATE_32F_16: 676 *packed = 1; 677 return true; 678 case V3D_QPU_UNPACK_REPLICATE_L_16: 679 *packed = 2; 680 return true; 681 case V3D_QPU_UNPACK_REPLICATE_H_16: 682 *packed = 3; 683 return true; 684 case V3D_QPU_UNPACK_SWAP_16: 685 *packed = 4; 686 return true; 687 default: 688 return false; 689 } 690} 691 692static bool 693v3d_qpu_float32_pack_pack(enum v3d_qpu_input_unpack unpacked, 694 uint32_t *packed) 695{ 696 switch (unpacked) { 697 case V3D_QPU_PACK_NONE: 698 *packed = 0; 699 return true; 700 case V3D_QPU_PACK_L: 701 *packed = 1; 702 return true; 703 case V3D_QPU_PACK_H: 704 *packed = 2; 705 return true; 706 default: 707 return false; 708 } 709} 710 711static bool 712v3d_qpu_add_unpack(const struct v3d_device_info *devinfo, uint64_t packed_inst, 713 struct v3d_qpu_instr *instr) 714{ 715 uint32_t op = QPU_GET_FIELD(packed_inst, VC5_QPU_OP_ADD); 716 uint32_t mux_a = QPU_GET_FIELD(packed_inst, VC5_QPU_ADD_A); 717 uint32_t mux_b = QPU_GET_FIELD(packed_inst, VC5_QPU_ADD_B); 718 uint32_t waddr = QPU_GET_FIELD(packed_inst, V3D_QPU_WADDR_A); 719 720 uint32_t map_op = op; 721 /* Some big clusters of opcodes are replicated with unpack 722 * flags 723 */ 724 if (map_op >= 249 && map_op <= 251) 725 map_op = (map_op - 249 + 245); 726 if (map_op >= 253 && map_op <= 255) 727 map_op = (map_op - 253 + 245); 728 729 const struct opcode_desc *desc = 730 lookup_opcode(add_ops, ARRAY_SIZE(add_ops), 731 map_op, mux_a, mux_b); 732 if (!desc) 733 return false; 734 735 instr->alu.add.op = desc->op; 736 737 /* FADD/FADDNF and FMIN/FMAX are determined by the orders of the 738 * operands. 739 */ 740 if (((op >> 2) & 3) * 8 + mux_a > (op & 3) * 8 + mux_b) { 741 if (instr->alu.add.op == V3D_QPU_A_FMIN) 742 instr->alu.add.op = V3D_QPU_A_FMAX; 743 if (instr->alu.add.op == V3D_QPU_A_FADD) 744 instr->alu.add.op = V3D_QPU_A_FADDNF; 745 } 746 747 /* Some QPU ops require a bit more than just basic opcode and mux a/b 748 * comparisons to distinguish them. 749 */ 750 switch (instr->alu.add.op) { 751 case V3D_QPU_A_STVPMV: 752 case V3D_QPU_A_STVPMD: 753 case V3D_QPU_A_STVPMP: 754 switch (waddr) { 755 case 0: 756 instr->alu.add.op = V3D_QPU_A_STVPMV; 757 break; 758 case 1: 759 instr->alu.add.op = V3D_QPU_A_STVPMD; 760 break; 761 case 2: 762 instr->alu.add.op = V3D_QPU_A_STVPMP; 763 break; 764 default: 765 return false; 766 } 767 break; 768 default: 769 break; 770 } 771 772 switch (instr->alu.add.op) { 773 case V3D_QPU_A_FADD: 774 case V3D_QPU_A_FADDNF: 775 case V3D_QPU_A_FSUB: 776 case V3D_QPU_A_FMIN: 777 case V3D_QPU_A_FMAX: 778 case V3D_QPU_A_FCMP: 779 case V3D_QPU_A_VFPACK: 780 if (instr->alu.add.op != V3D_QPU_A_VFPACK) 781 instr->alu.add.output_pack = (op >> 4) & 0x3; 782 else 783 instr->alu.add.output_pack = V3D_QPU_PACK_NONE; 784 785 if (!v3d_qpu_float32_unpack_unpack((op >> 2) & 0x3, 786 &instr->alu.add.a_unpack)) { 787 return false; 788 } 789 790 if (!v3d_qpu_float32_unpack_unpack((op >> 0) & 0x3, 791 &instr->alu.add.b_unpack)) { 792 return false; 793 } 794 break; 795 796 case V3D_QPU_A_FFLOOR: 797 case V3D_QPU_A_FROUND: 798 case V3D_QPU_A_FTRUNC: 799 case V3D_QPU_A_FCEIL: 800 case V3D_QPU_A_FDX: 801 case V3D_QPU_A_FDY: 802 instr->alu.add.output_pack = mux_b & 0x3; 803 804 if (!v3d_qpu_float32_unpack_unpack((op >> 2) & 0x3, 805 &instr->alu.add.a_unpack)) { 806 return false; 807 } 808 break; 809 810 case V3D_QPU_A_FTOIN: 811 case V3D_QPU_A_FTOIZ: 812 case V3D_QPU_A_FTOUZ: 813 case V3D_QPU_A_FTOC: 814 instr->alu.add.output_pack = V3D_QPU_PACK_NONE; 815 816 if (!v3d_qpu_float32_unpack_unpack((op >> 2) & 0x3, 817 &instr->alu.add.a_unpack)) { 818 return false; 819 } 820 break; 821 822 case V3D_QPU_A_VFMIN: 823 case V3D_QPU_A_VFMAX: 824 if (!v3d_qpu_float16_unpack_unpack(op & 0x7, 825 &instr->alu.add.a_unpack)) { 826 return false; 827 } 828 829 instr->alu.add.output_pack = V3D_QPU_PACK_NONE; 830 instr->alu.add.b_unpack = V3D_QPU_UNPACK_NONE; 831 break; 832 833 default: 834 instr->alu.add.output_pack = V3D_QPU_PACK_NONE; 835 instr->alu.add.a_unpack = V3D_QPU_UNPACK_NONE; 836 instr->alu.add.b_unpack = V3D_QPU_UNPACK_NONE; 837 break; 838 } 839 840 instr->alu.add.a = mux_a; 841 instr->alu.add.b = mux_b; 842 instr->alu.add.waddr = QPU_GET_FIELD(packed_inst, V3D_QPU_WADDR_A); 843 844 instr->alu.add.magic_write = false; 845 if (packed_inst & VC5_QPU_MA) { 846 switch (instr->alu.add.op) { 847 case V3D_QPU_A_LDVPMV_IN: 848 instr->alu.add.op = V3D_QPU_A_LDVPMV_OUT; 849 break; 850 case V3D_QPU_A_LDVPMD_IN: 851 instr->alu.add.op = V3D_QPU_A_LDVPMD_OUT; 852 break; 853 case V3D_QPU_A_LDVPMG_IN: 854 instr->alu.add.op = V3D_QPU_A_LDVPMG_OUT; 855 break; 856 default: 857 instr->alu.add.magic_write = true; 858 break; 859 } 860 } 861 862 return true; 863} 864 865static bool 866v3d_qpu_mul_unpack(const struct v3d_device_info *devinfo, uint64_t packed_inst, 867 struct v3d_qpu_instr *instr) 868{ 869 uint32_t op = QPU_GET_FIELD(packed_inst, VC5_QPU_OP_MUL); 870 uint32_t mux_a = QPU_GET_FIELD(packed_inst, VC5_QPU_MUL_A); 871 uint32_t mux_b = QPU_GET_FIELD(packed_inst, VC5_QPU_MUL_B); 872 873 { 874 const struct opcode_desc *desc = 875 lookup_opcode(mul_ops, ARRAY_SIZE(mul_ops), 876 op, mux_a, mux_b); 877 if (!desc) 878 return false; 879 880 instr->alu.mul.op = desc->op; 881 } 882 883 switch (instr->alu.mul.op) { 884 case V3D_QPU_M_FMUL: 885 instr->alu.mul.output_pack = ((op >> 4) & 0x3) - 1; 886 887 if (!v3d_qpu_float32_unpack_unpack((op >> 2) & 0x3, 888 &instr->alu.mul.a_unpack)) { 889 return false; 890 } 891 892 if (!v3d_qpu_float32_unpack_unpack((op >> 0) & 0x3, 893 &instr->alu.mul.b_unpack)) { 894 return false; 895 } 896 897 break; 898 899 case V3D_QPU_M_FMOV: 900 instr->alu.mul.output_pack = (((op & 1) << 1) + 901 ((mux_b >> 2) & 1)); 902 903 if (!v3d_qpu_float32_unpack_unpack(mux_b & 0x3, 904 &instr->alu.mul.a_unpack)) { 905 return false; 906 } 907 908 break; 909 910 case V3D_QPU_M_VFMUL: 911 instr->alu.mul.output_pack = V3D_QPU_PACK_NONE; 912 913 if (!v3d_qpu_float16_unpack_unpack(((op & 0x7) - 4) & 7, 914 &instr->alu.mul.a_unpack)) { 915 return false; 916 } 917 918 instr->alu.mul.b_unpack = V3D_QPU_UNPACK_NONE; 919 920 break; 921 922 default: 923 instr->alu.mul.output_pack = V3D_QPU_PACK_NONE; 924 instr->alu.mul.a_unpack = V3D_QPU_UNPACK_NONE; 925 instr->alu.mul.b_unpack = V3D_QPU_UNPACK_NONE; 926 break; 927 } 928 929 instr->alu.mul.a = mux_a; 930 instr->alu.mul.b = mux_b; 931 instr->alu.mul.waddr = QPU_GET_FIELD(packed_inst, V3D_QPU_WADDR_M); 932 instr->alu.mul.magic_write = packed_inst & VC5_QPU_MM; 933 934 return true; 935} 936 937static bool 938v3d_qpu_add_pack(const struct v3d_device_info *devinfo, 939 const struct v3d_qpu_instr *instr, uint64_t *packed_instr) 940{ 941 uint32_t waddr = instr->alu.add.waddr; 942 uint32_t mux_a = instr->alu.add.a; 943 uint32_t mux_b = instr->alu.add.b; 944 int nsrc = v3d_qpu_add_op_num_src(instr->alu.add.op); 945 const struct opcode_desc *desc; 946 947 int opcode; 948 for (desc = add_ops; desc != &add_ops[ARRAY_SIZE(add_ops)]; 949 desc++) { 950 if (desc->op == instr->alu.add.op) 951 break; 952 } 953 if (desc == &add_ops[ARRAY_SIZE(add_ops)]) 954 return false; 955 956 opcode = desc->opcode_first; 957 958 /* If an operation doesn't use an arg, its mux values may be used to 959 * identify the operation type. 960 */ 961 if (nsrc < 2) 962 mux_b = ffs(desc->mux_b_mask) - 1; 963 964 if (nsrc < 1) 965 mux_a = ffs(desc->mux_a_mask) - 1; 966 967 bool no_magic_write = false; 968 969 switch (instr->alu.add.op) { 970 case V3D_QPU_A_STVPMV: 971 waddr = 0; 972 no_magic_write = true; 973 break; 974 case V3D_QPU_A_STVPMD: 975 waddr = 1; 976 no_magic_write = true; 977 break; 978 case V3D_QPU_A_STVPMP: 979 waddr = 2; 980 no_magic_write = true; 981 break; 982 983 case V3D_QPU_A_LDVPMV_IN: 984 case V3D_QPU_A_LDVPMD_IN: 985 case V3D_QPU_A_LDVPMP: 986 case V3D_QPU_A_LDVPMG_IN: 987 assert(!instr->alu.add.magic_write); 988 break; 989 990 case V3D_QPU_A_LDVPMV_OUT: 991 case V3D_QPU_A_LDVPMD_OUT: 992 case V3D_QPU_A_LDVPMG_OUT: 993 assert(!instr->alu.add.magic_write); 994 *packed_instr |= VC5_QPU_MA; 995 break; 996 997 default: 998 break; 999 } 1000 1001 switch (instr->alu.add.op) { 1002 case V3D_QPU_A_FADD: 1003 case V3D_QPU_A_FADDNF: 1004 case V3D_QPU_A_FSUB: 1005 case V3D_QPU_A_FMIN: 1006 case V3D_QPU_A_FMAX: 1007 case V3D_QPU_A_FCMP: { 1008 uint32_t output_pack; 1009 uint32_t a_unpack; 1010 uint32_t b_unpack; 1011 1012 if (!v3d_qpu_float32_pack_pack(instr->alu.add.output_pack, 1013 &output_pack)) { 1014 return false; 1015 } 1016 opcode |= output_pack << 4; 1017 1018 if (!v3d_qpu_float32_unpack_pack(instr->alu.add.a_unpack, 1019 &a_unpack)) { 1020 return false; 1021 } 1022 1023 if (!v3d_qpu_float32_unpack_pack(instr->alu.add.b_unpack, 1024 &b_unpack)) { 1025 return false; 1026 } 1027 1028 /* These operations with commutative operands are 1029 * distinguished by which order their operands come in. 1030 */ 1031 bool ordering = a_unpack * 8 + mux_a > b_unpack * 8 + mux_b; 1032 if (((instr->alu.add.op == V3D_QPU_A_FMIN || 1033 instr->alu.add.op == V3D_QPU_A_FADD) && ordering) || 1034 ((instr->alu.add.op == V3D_QPU_A_FMAX || 1035 instr->alu.add.op == V3D_QPU_A_FADDNF) && !ordering)) { 1036 uint32_t temp; 1037 1038 temp = a_unpack; 1039 a_unpack = b_unpack; 1040 b_unpack = temp; 1041 1042 temp = mux_a; 1043 mux_a = mux_b; 1044 mux_b = temp; 1045 } 1046 1047 opcode |= a_unpack << 2; 1048 opcode |= b_unpack << 0; 1049 1050 break; 1051 } 1052 1053 case V3D_QPU_A_VFPACK: { 1054 uint32_t a_unpack; 1055 uint32_t b_unpack; 1056 1057 if (instr->alu.add.a_unpack == V3D_QPU_UNPACK_ABS || 1058 instr->alu.add.b_unpack == V3D_QPU_UNPACK_ABS) { 1059 return false; 1060 } 1061 1062 if (!v3d_qpu_float32_unpack_pack(instr->alu.add.a_unpack, 1063 &a_unpack)) { 1064 return false; 1065 } 1066 1067 if (!v3d_qpu_float32_unpack_pack(instr->alu.add.b_unpack, 1068 &b_unpack)) { 1069 return false; 1070 } 1071 1072 opcode = (opcode & ~(1 << 2)) | (a_unpack << 2); 1073 opcode = (opcode & ~(1 << 0)) | (b_unpack << 0); 1074 1075 break; 1076 } 1077 1078 case V3D_QPU_A_FFLOOR: 1079 case V3D_QPU_A_FROUND: 1080 case V3D_QPU_A_FTRUNC: 1081 case V3D_QPU_A_FCEIL: 1082 case V3D_QPU_A_FDX: 1083 case V3D_QPU_A_FDY: { 1084 uint32_t packed; 1085 1086 if (!v3d_qpu_float32_pack_pack(instr->alu.add.output_pack, 1087 &packed)) { 1088 return false; 1089 } 1090 mux_b |= packed; 1091 1092 if (!v3d_qpu_float32_unpack_pack(instr->alu.add.a_unpack, 1093 &packed)) { 1094 return false; 1095 } 1096 if (packed == 0) 1097 return false; 1098 opcode = (opcode & ~(1 << 2)) | packed << 2; 1099 break; 1100 } 1101 1102 case V3D_QPU_A_FTOIN: 1103 case V3D_QPU_A_FTOIZ: 1104 case V3D_QPU_A_FTOUZ: 1105 case V3D_QPU_A_FTOC: 1106 if (instr->alu.add.output_pack != V3D_QPU_PACK_NONE) 1107 return false; 1108 1109 uint32_t packed; 1110 if (!v3d_qpu_float32_unpack_pack(instr->alu.add.a_unpack, 1111 &packed)) { 1112 return false; 1113 } 1114 if (packed == 0) 1115 return false; 1116 opcode |= packed << 2; 1117 1118 break; 1119 1120 case V3D_QPU_A_VFMIN: 1121 case V3D_QPU_A_VFMAX: 1122 if (instr->alu.add.output_pack != V3D_QPU_PACK_NONE || 1123 instr->alu.add.b_unpack != V3D_QPU_UNPACK_NONE) { 1124 return false; 1125 } 1126 1127 if (!v3d_qpu_float16_unpack_pack(instr->alu.add.a_unpack, 1128 &packed)) { 1129 return false; 1130 } 1131 opcode |= packed; 1132 break; 1133 1134 default: 1135 if (instr->alu.add.op != V3D_QPU_A_NOP && 1136 (instr->alu.add.output_pack != V3D_QPU_PACK_NONE || 1137 instr->alu.add.a_unpack != V3D_QPU_UNPACK_NONE || 1138 instr->alu.add.b_unpack != V3D_QPU_UNPACK_NONE)) { 1139 return false; 1140 } 1141 break; 1142 } 1143 1144 *packed_instr |= QPU_SET_FIELD(mux_a, VC5_QPU_ADD_A); 1145 *packed_instr |= QPU_SET_FIELD(mux_b, VC5_QPU_ADD_B); 1146 *packed_instr |= QPU_SET_FIELD(opcode, VC5_QPU_OP_ADD); 1147 *packed_instr |= QPU_SET_FIELD(waddr, V3D_QPU_WADDR_A); 1148 if (instr->alu.add.magic_write && !no_magic_write) 1149 *packed_instr |= VC5_QPU_MA; 1150 1151 return true; 1152} 1153 1154static bool 1155v3d_qpu_mul_pack(const struct v3d_device_info *devinfo, 1156 const struct v3d_qpu_instr *instr, uint64_t *packed_instr) 1157{ 1158 uint32_t mux_a = instr->alu.mul.a; 1159 uint32_t mux_b = instr->alu.mul.b; 1160 int nsrc = v3d_qpu_mul_op_num_src(instr->alu.mul.op); 1161 const struct opcode_desc *desc; 1162 1163 for (desc = mul_ops; desc != &mul_ops[ARRAY_SIZE(mul_ops)]; 1164 desc++) { 1165 if (desc->op == instr->alu.mul.op) 1166 break; 1167 } 1168 if (desc == &mul_ops[ARRAY_SIZE(mul_ops)]) 1169 return false; 1170 1171 uint32_t opcode = desc->opcode_first; 1172 1173 /* Some opcodes have a single valid value for their mux a/b, so set 1174 * that here. If mux a/b determine packing, it will be set below. 1175 */ 1176 if (nsrc < 2) 1177 mux_b = ffs(desc->mux_b_mask) - 1; 1178 1179 if (nsrc < 1) 1180 mux_a = ffs(desc->mux_a_mask) - 1; 1181 1182 switch (instr->alu.mul.op) { 1183 case V3D_QPU_M_FMUL: { 1184 uint32_t packed; 1185 1186 if (!v3d_qpu_float32_pack_pack(instr->alu.mul.output_pack, 1187 &packed)) { 1188 return false; 1189 } 1190 /* No need for a +1 because desc->opcode_first has a 1 in this 1191 * field. 1192 */ 1193 opcode += packed << 4; 1194 1195 if (!v3d_qpu_float32_unpack_pack(instr->alu.mul.a_unpack, 1196 &packed)) { 1197 return false; 1198 } 1199 opcode |= packed << 2; 1200 1201 if (!v3d_qpu_float32_unpack_pack(instr->alu.mul.b_unpack, 1202 &packed)) { 1203 return false; 1204 } 1205 opcode |= packed << 0; 1206 break; 1207 } 1208 1209 case V3D_QPU_M_FMOV: { 1210 uint32_t packed; 1211 1212 if (!v3d_qpu_float32_pack_pack(instr->alu.mul.output_pack, 1213 &packed)) { 1214 return false; 1215 } 1216 opcode |= (packed >> 1) & 1; 1217 mux_b = (packed & 1) << 2; 1218 1219 if (!v3d_qpu_float32_unpack_pack(instr->alu.mul.a_unpack, 1220 &packed)) { 1221 return false; 1222 } 1223 mux_b |= packed; 1224 break; 1225 } 1226 1227 case V3D_QPU_M_VFMUL: { 1228 uint32_t packed; 1229 1230 if (instr->alu.mul.output_pack != V3D_QPU_PACK_NONE) 1231 return false; 1232 1233 if (!v3d_qpu_float16_unpack_pack(instr->alu.mul.a_unpack, 1234 &packed)) { 1235 return false; 1236 } 1237 if (instr->alu.mul.a_unpack == V3D_QPU_UNPACK_SWAP_16) 1238 opcode = 8; 1239 else 1240 opcode |= (packed + 4) & 7; 1241 1242 if (instr->alu.mul.b_unpack != V3D_QPU_UNPACK_NONE) 1243 return false; 1244 1245 break; 1246 } 1247 1248 default: 1249 break; 1250 } 1251 1252 *packed_instr |= QPU_SET_FIELD(mux_a, VC5_QPU_MUL_A); 1253 *packed_instr |= QPU_SET_FIELD(mux_b, VC5_QPU_MUL_B); 1254 1255 *packed_instr |= QPU_SET_FIELD(opcode, VC5_QPU_OP_MUL); 1256 *packed_instr |= QPU_SET_FIELD(instr->alu.mul.waddr, V3D_QPU_WADDR_M); 1257 if (instr->alu.mul.magic_write) 1258 *packed_instr |= VC5_QPU_MM; 1259 1260 return true; 1261} 1262 1263static bool 1264v3d_qpu_instr_unpack_alu(const struct v3d_device_info *devinfo, 1265 uint64_t packed_instr, 1266 struct v3d_qpu_instr *instr) 1267{ 1268 instr->type = V3D_QPU_INSTR_TYPE_ALU; 1269 1270 if (!v3d_qpu_sig_unpack(devinfo, 1271 QPU_GET_FIELD(packed_instr, VC5_QPU_SIG), 1272 &instr->sig)) 1273 return false; 1274 1275 uint32_t packed_cond = QPU_GET_FIELD(packed_instr, VC5_QPU_COND); 1276 if (v3d_qpu_sig_writes_address(devinfo, &instr->sig)) { 1277 instr->sig_addr = packed_cond & ~VC5_QPU_COND_SIG_MAGIC_ADDR; 1278 instr->sig_magic = packed_cond & VC5_QPU_COND_SIG_MAGIC_ADDR; 1279 1280 instr->flags.ac = V3D_QPU_COND_NONE; 1281 instr->flags.mc = V3D_QPU_COND_NONE; 1282 instr->flags.apf = V3D_QPU_PF_NONE; 1283 instr->flags.mpf = V3D_QPU_PF_NONE; 1284 instr->flags.auf = V3D_QPU_UF_NONE; 1285 instr->flags.muf = V3D_QPU_UF_NONE; 1286 } else { 1287 if (!v3d_qpu_flags_unpack(devinfo, packed_cond, &instr->flags)) 1288 return false; 1289 } 1290 1291 instr->raddr_a = QPU_GET_FIELD(packed_instr, VC5_QPU_RADDR_A); 1292 instr->raddr_b = QPU_GET_FIELD(packed_instr, VC5_QPU_RADDR_B); 1293 1294 if (!v3d_qpu_add_unpack(devinfo, packed_instr, instr)) 1295 return false; 1296 1297 if (!v3d_qpu_mul_unpack(devinfo, packed_instr, instr)) 1298 return false; 1299 1300 return true; 1301} 1302 1303static bool 1304v3d_qpu_instr_unpack_branch(const struct v3d_device_info *devinfo, 1305 uint64_t packed_instr, 1306 struct v3d_qpu_instr *instr) 1307{ 1308 instr->type = V3D_QPU_INSTR_TYPE_BRANCH; 1309 1310 uint32_t cond = QPU_GET_FIELD(packed_instr, VC5_QPU_BRANCH_COND); 1311 if (cond == 0) 1312 instr->branch.cond = V3D_QPU_BRANCH_COND_ALWAYS; 1313 else if (V3D_QPU_BRANCH_COND_A0 + (cond - 2) <= 1314 V3D_QPU_BRANCH_COND_ALLNA) 1315 instr->branch.cond = V3D_QPU_BRANCH_COND_A0 + (cond - 2); 1316 else 1317 return false; 1318 1319 uint32_t msfign = QPU_GET_FIELD(packed_instr, VC5_QPU_BRANCH_MSFIGN); 1320 if (msfign == 3) 1321 return false; 1322 instr->branch.msfign = msfign; 1323 1324 instr->branch.bdi = QPU_GET_FIELD(packed_instr, VC5_QPU_BRANCH_BDI); 1325 1326 instr->branch.ub = packed_instr & VC5_QPU_BRANCH_UB; 1327 if (instr->branch.ub) { 1328 instr->branch.bdu = QPU_GET_FIELD(packed_instr, 1329 VC5_QPU_BRANCH_BDU); 1330 } 1331 1332 instr->branch.raddr_a = QPU_GET_FIELD(packed_instr, 1333 VC5_QPU_RADDR_A); 1334 1335 instr->branch.offset = 0; 1336 1337 instr->branch.offset += 1338 QPU_GET_FIELD(packed_instr, 1339 VC5_QPU_BRANCH_ADDR_LOW) << 3; 1340 1341 instr->branch.offset += 1342 QPU_GET_FIELD(packed_instr, 1343 VC5_QPU_BRANCH_ADDR_HIGH) << 24; 1344 1345 return true; 1346} 1347 1348bool 1349v3d_qpu_instr_unpack(const struct v3d_device_info *devinfo, 1350 uint64_t packed_instr, 1351 struct v3d_qpu_instr *instr) 1352{ 1353 if (QPU_GET_FIELD(packed_instr, VC5_QPU_OP_MUL) != 0) { 1354 return v3d_qpu_instr_unpack_alu(devinfo, packed_instr, instr); 1355 } else { 1356 uint32_t sig = QPU_GET_FIELD(packed_instr, VC5_QPU_SIG); 1357 1358 if ((sig & 24) == 16) { 1359 return v3d_qpu_instr_unpack_branch(devinfo, packed_instr, 1360 instr); 1361 } else { 1362 return false; 1363 } 1364 } 1365} 1366 1367static bool 1368v3d_qpu_instr_pack_alu(const struct v3d_device_info *devinfo, 1369 const struct v3d_qpu_instr *instr, 1370 uint64_t *packed_instr) 1371{ 1372 uint32_t sig; 1373 if (!v3d_qpu_sig_pack(devinfo, &instr->sig, &sig)) 1374 return false; 1375 *packed_instr |= QPU_SET_FIELD(sig, VC5_QPU_SIG); 1376 1377 if (instr->type == V3D_QPU_INSTR_TYPE_ALU) { 1378 *packed_instr |= QPU_SET_FIELD(instr->raddr_a, VC5_QPU_RADDR_A); 1379 *packed_instr |= QPU_SET_FIELD(instr->raddr_b, VC5_QPU_RADDR_B); 1380 1381 if (!v3d_qpu_add_pack(devinfo, instr, packed_instr)) 1382 return false; 1383 if (!v3d_qpu_mul_pack(devinfo, instr, packed_instr)) 1384 return false; 1385 1386 uint32_t flags; 1387 if (v3d_qpu_sig_writes_address(devinfo, &instr->sig)) { 1388 if (instr->flags.ac != V3D_QPU_COND_NONE || 1389 instr->flags.mc != V3D_QPU_COND_NONE || 1390 instr->flags.apf != V3D_QPU_PF_NONE || 1391 instr->flags.mpf != V3D_QPU_PF_NONE || 1392 instr->flags.auf != V3D_QPU_UF_NONE || 1393 instr->flags.muf != V3D_QPU_UF_NONE) { 1394 return false; 1395 } 1396 1397 flags = instr->sig_addr; 1398 if (instr->sig_magic) 1399 flags |= VC5_QPU_COND_SIG_MAGIC_ADDR; 1400 } else { 1401 if (!v3d_qpu_flags_pack(devinfo, &instr->flags, &flags)) 1402 return false; 1403 } 1404 1405 *packed_instr |= QPU_SET_FIELD(flags, VC5_QPU_COND); 1406 } else { 1407 if (v3d_qpu_sig_writes_address(devinfo, &instr->sig)) 1408 return false; 1409 } 1410 1411 return true; 1412} 1413 1414static bool 1415v3d_qpu_instr_pack_branch(const struct v3d_device_info *devinfo, 1416 const struct v3d_qpu_instr *instr, 1417 uint64_t *packed_instr) 1418{ 1419 *packed_instr |= QPU_SET_FIELD(16, VC5_QPU_SIG); 1420 1421 if (instr->branch.cond != V3D_QPU_BRANCH_COND_ALWAYS) { 1422 *packed_instr |= QPU_SET_FIELD(2 + (instr->branch.cond - 1423 V3D_QPU_BRANCH_COND_A0), 1424 VC5_QPU_BRANCH_COND); 1425 } 1426 1427 *packed_instr |= QPU_SET_FIELD(instr->branch.msfign, 1428 VC5_QPU_BRANCH_MSFIGN); 1429 1430 *packed_instr |= QPU_SET_FIELD(instr->branch.bdi, 1431 VC5_QPU_BRANCH_BDI); 1432 1433 if (instr->branch.ub) { 1434 *packed_instr |= VC5_QPU_BRANCH_UB; 1435 *packed_instr |= QPU_SET_FIELD(instr->branch.bdu, 1436 VC5_QPU_BRANCH_BDU); 1437 } 1438 1439 switch (instr->branch.bdi) { 1440 case V3D_QPU_BRANCH_DEST_ABS: 1441 case V3D_QPU_BRANCH_DEST_REL: 1442 *packed_instr |= QPU_SET_FIELD(instr->branch.msfign, 1443 VC5_QPU_BRANCH_MSFIGN); 1444 1445 *packed_instr |= QPU_SET_FIELD((instr->branch.offset & 1446 ~0xff000000) >> 3, 1447 VC5_QPU_BRANCH_ADDR_LOW); 1448 1449 *packed_instr |= QPU_SET_FIELD(instr->branch.offset >> 24, 1450 VC5_QPU_BRANCH_ADDR_HIGH); 1451 1452 case V3D_QPU_BRANCH_DEST_REGFILE: 1453 *packed_instr |= QPU_SET_FIELD(instr->branch.raddr_a, 1454 VC5_QPU_RADDR_A); 1455 break; 1456 1457 default: 1458 break; 1459 } 1460 1461 return true; 1462} 1463 1464bool 1465v3d_qpu_instr_pack(const struct v3d_device_info *devinfo, 1466 const struct v3d_qpu_instr *instr, 1467 uint64_t *packed_instr) 1468{ 1469 *packed_instr = 0; 1470 1471 switch (instr->type) { 1472 case V3D_QPU_INSTR_TYPE_ALU: 1473 return v3d_qpu_instr_pack_alu(devinfo, instr, packed_instr); 1474 case V3D_QPU_INSTR_TYPE_BRANCH: 1475 return v3d_qpu_instr_pack_branch(devinfo, instr, packed_instr); 1476 default: 1477 return false; 1478 } 1479} 1480