1/* 2 * Copyright © 2016 Broadcom 3 * 4 * Permission is hereby granted, free of charge, to any person obtaining a 5 * copy of this software and associated documentation files (the "Software"), 6 * to deal in the Software without restriction, including without limitation 7 * the rights to use, copy, modify, merge, publish, distribute, sublicense, 8 * and/or sell copies of the Software, and to permit persons to whom the 9 * Software is furnished to do so, subject to the following conditions: 10 * 11 * The above copyright notice and this permission notice (including the next 12 * paragraph) shall be included in all copies or substantial portions of the 13 * Software. 14 * 15 * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR 16 * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, 17 * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL 18 * THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER 19 * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING 20 * FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS 21 * IN THE SOFTWARE. 22 */ 23 24#include <stdlib.h> 25#include "util/macros.h" 26#include "broadcom/common/v3d_device_info.h" 27#include "qpu_instr.h" 28 29const char * 30v3d_qpu_magic_waddr_name(enum v3d_qpu_waddr waddr) 31{ 32 static const char *waddr_magic[] = { 33 [V3D_QPU_WADDR_R0] = "r0", 34 [V3D_QPU_WADDR_R1] = "r1", 35 [V3D_QPU_WADDR_R2] = "r2", 36 [V3D_QPU_WADDR_R3] = "r3", 37 [V3D_QPU_WADDR_R4] = "r4", 38 [V3D_QPU_WADDR_R5] = "r5", 39 [V3D_QPU_WADDR_NOP] = "-", 40 [V3D_QPU_WADDR_TLB] = "tlb", 41 [V3D_QPU_WADDR_TLBU] = "tlbu", 42 [V3D_QPU_WADDR_TMU] = "tmu", 43 [V3D_QPU_WADDR_TMUL] = "tmul", 44 [V3D_QPU_WADDR_TMUD] = "tmud", 45 [V3D_QPU_WADDR_TMUA] = "tmua", 46 [V3D_QPU_WADDR_TMUAU] = "tmuau", 47 [V3D_QPU_WADDR_VPM] = "vpm", 48 [V3D_QPU_WADDR_VPMU] = "vpmu", 49 [V3D_QPU_WADDR_SYNC] = "sync", 50 [V3D_QPU_WADDR_SYNCU] = "syncu", 51 [V3D_QPU_WADDR_SYNCB] = "syncb", 52 [V3D_QPU_WADDR_RECIP] = "recip", 53 [V3D_QPU_WADDR_RSQRT] = "rsqrt", 54 [V3D_QPU_WADDR_EXP] = "exp", 55 [V3D_QPU_WADDR_LOG] = "log", 56 [V3D_QPU_WADDR_SIN] = "sin", 57 [V3D_QPU_WADDR_RSQRT2] = "rsqrt2", 58 [V3D_QPU_WADDR_TMUC] = "tmuc", 59 [V3D_QPU_WADDR_TMUS] = "tmus", 60 [V3D_QPU_WADDR_TMUT] = "tmut", 61 [V3D_QPU_WADDR_TMUR] = "tmur", 62 [V3D_QPU_WADDR_TMUI] = "tmui", 63 [V3D_QPU_WADDR_TMUB] = "tmub", 64 [V3D_QPU_WADDR_TMUDREF] = "tmudref", 65 [V3D_QPU_WADDR_TMUOFF] = "tmuoff", 66 [V3D_QPU_WADDR_TMUSCM] = "tmuscm", 67 [V3D_QPU_WADDR_TMUSF] = "tmusf", 68 [V3D_QPU_WADDR_TMUSLOD] = "tmuslod", 69 [V3D_QPU_WADDR_TMUHS] = "tmuhs", 70 [V3D_QPU_WADDR_TMUHSCM] = "tmuscm", 71 [V3D_QPU_WADDR_TMUHSF] = "tmuhsf", 72 [V3D_QPU_WADDR_TMUHSLOD] = "tmuhslod", 73 [V3D_QPU_WADDR_R5REP] = "r5rep", 74 }; 75 76 return waddr_magic[waddr]; 77} 78 79const char * 80v3d_qpu_add_op_name(enum v3d_qpu_add_op op) 81{ 82 static const char *op_names[] = { 83 [V3D_QPU_A_FADD] = "fadd", 84 [V3D_QPU_A_FADDNF] = "faddnf", 85 [V3D_QPU_A_VFPACK] = "vfpack", 86 [V3D_QPU_A_ADD] = "add", 87 [V3D_QPU_A_SUB] = "sub", 88 [V3D_QPU_A_FSUB] = "fsub", 89 [V3D_QPU_A_MIN] = "min", 90 [V3D_QPU_A_MAX] = "max", 91 [V3D_QPU_A_UMIN] = "umin", 92 [V3D_QPU_A_UMAX] = "umax", 93 [V3D_QPU_A_SHL] = "shl", 94 [V3D_QPU_A_SHR] = "shr", 95 [V3D_QPU_A_ASR] = "asr", 96 [V3D_QPU_A_ROR] = "ror", 97 [V3D_QPU_A_FMIN] = "fmin", 98 [V3D_QPU_A_FMAX] = "fmax", 99 [V3D_QPU_A_VFMIN] = "vfmin", 100 [V3D_QPU_A_AND] = "and", 101 [V3D_QPU_A_OR] = "or", 102 [V3D_QPU_A_XOR] = "xor", 103 [V3D_QPU_A_VADD] = "vadd", 104 [V3D_QPU_A_VSUB] = "vsub", 105 [V3D_QPU_A_NOT] = "not", 106 [V3D_QPU_A_NEG] = "neg", 107 [V3D_QPU_A_FLAPUSH] = "flapush", 108 [V3D_QPU_A_FLBPUSH] = "flbpush", 109 [V3D_QPU_A_FLPOP] = "flpop", 110 [V3D_QPU_A_RECIP] = "recip", 111 [V3D_QPU_A_SETMSF] = "setmsf", 112 [V3D_QPU_A_SETREVF] = "setrevf", 113 [V3D_QPU_A_NOP] = "nop", 114 [V3D_QPU_A_TIDX] = "tidx", 115 [V3D_QPU_A_EIDX] = "eidx", 116 [V3D_QPU_A_LR] = "lr", 117 [V3D_QPU_A_VFLA] = "vfla", 118 [V3D_QPU_A_VFLNA] = "vflna", 119 [V3D_QPU_A_VFLB] = "vflb", 120 [V3D_QPU_A_VFLNB] = "vflnb", 121 [V3D_QPU_A_FXCD] = "fxcd", 122 [V3D_QPU_A_XCD] = "xcd", 123 [V3D_QPU_A_FYCD] = "fycd", 124 [V3D_QPU_A_YCD] = "ycd", 125 [V3D_QPU_A_MSF] = "msf", 126 [V3D_QPU_A_REVF] = "revf", 127 [V3D_QPU_A_VDWWT] = "vdwwt", 128 [V3D_QPU_A_IID] = "iid", 129 [V3D_QPU_A_SAMPID] = "sampid", 130 [V3D_QPU_A_BARRIERID] = "barrierid", 131 [V3D_QPU_A_TMUWT] = "tmuwt", 132 [V3D_QPU_A_VPMSETUP] = "vpmsetup", 133 [V3D_QPU_A_VPMWT] = "vpmwt", 134 [V3D_QPU_A_LDVPMV_IN] = "ldvpmv_in", 135 [V3D_QPU_A_LDVPMV_OUT] = "ldvpmv_out", 136 [V3D_QPU_A_LDVPMD_IN] = "ldvpmd_in", 137 [V3D_QPU_A_LDVPMD_OUT] = "ldvpmd_out", 138 [V3D_QPU_A_LDVPMP] = "ldvpmp", 139 [V3D_QPU_A_RSQRT] = "rsqrt", 140 [V3D_QPU_A_EXP] = "exp", 141 [V3D_QPU_A_LOG] = "log", 142 [V3D_QPU_A_SIN] = "sin", 143 [V3D_QPU_A_RSQRT2] = "rsqrt2", 144 [V3D_QPU_A_LDVPMG_IN] = "ldvpmg_in", 145 [V3D_QPU_A_LDVPMG_OUT] = "ldvpmg_out", 146 [V3D_QPU_A_FCMP] = "fcmp", 147 [V3D_QPU_A_VFMAX] = "vfmax", 148 [V3D_QPU_A_FROUND] = "fround", 149 [V3D_QPU_A_FTOIN] = "ftoin", 150 [V3D_QPU_A_FTRUNC] = "ftrunc", 151 [V3D_QPU_A_FTOIZ] = "ftoiz", 152 [V3D_QPU_A_FFLOOR] = "ffloor", 153 [V3D_QPU_A_FTOUZ] = "ftouz", 154 [V3D_QPU_A_FCEIL] = "fceil", 155 [V3D_QPU_A_FTOC] = "ftoc", 156 [V3D_QPU_A_FDX] = "fdx", 157 [V3D_QPU_A_FDY] = "fdy", 158 [V3D_QPU_A_STVPMV] = "stvpmv", 159 [V3D_QPU_A_STVPMD] = "stvpmd", 160 [V3D_QPU_A_STVPMP] = "stvpmp", 161 [V3D_QPU_A_ITOF] = "itof", 162 [V3D_QPU_A_CLZ] = "clz", 163 [V3D_QPU_A_UTOF] = "utof", 164 }; 165 166 if (op >= ARRAY_SIZE(op_names)) 167 return NULL; 168 169 return op_names[op]; 170} 171 172const char * 173v3d_qpu_mul_op_name(enum v3d_qpu_mul_op op) 174{ 175 static const char *op_names[] = { 176 [V3D_QPU_M_ADD] = "add", 177 [V3D_QPU_M_SUB] = "sub", 178 [V3D_QPU_M_UMUL24] = "umul24", 179 [V3D_QPU_M_VFMUL] = "vfmul", 180 [V3D_QPU_M_SMUL24] = "smul24", 181 [V3D_QPU_M_MULTOP] = "multop", 182 [V3D_QPU_M_FMOV] = "fmov", 183 [V3D_QPU_M_MOV] = "mov", 184 [V3D_QPU_M_NOP] = "nop", 185 [V3D_QPU_M_FMUL] = "fmul", 186 }; 187 188 if (op >= ARRAY_SIZE(op_names)) 189 return NULL; 190 191 return op_names[op]; 192} 193 194const char * 195v3d_qpu_cond_name(enum v3d_qpu_cond cond) 196{ 197 switch (cond) { 198 case V3D_QPU_COND_NONE: 199 return ""; 200 case V3D_QPU_COND_IFA: 201 return ".ifa"; 202 case V3D_QPU_COND_IFB: 203 return ".ifb"; 204 case V3D_QPU_COND_IFNA: 205 return ".ifna"; 206 case V3D_QPU_COND_IFNB: 207 return ".ifnb"; 208 default: 209 unreachable("bad cond value"); 210 } 211} 212 213const char * 214v3d_qpu_branch_cond_name(enum v3d_qpu_branch_cond cond) 215{ 216 switch (cond) { 217 case V3D_QPU_BRANCH_COND_ALWAYS: 218 return ""; 219 case V3D_QPU_BRANCH_COND_A0: 220 return ".a0"; 221 case V3D_QPU_BRANCH_COND_NA0: 222 return ".na0"; 223 case V3D_QPU_BRANCH_COND_ALLA: 224 return ".alla"; 225 case V3D_QPU_BRANCH_COND_ANYNA: 226 return ".anyna"; 227 case V3D_QPU_BRANCH_COND_ANYA: 228 return ".anya"; 229 case V3D_QPU_BRANCH_COND_ALLNA: 230 return ".allna"; 231 default: 232 unreachable("bad branch cond value"); 233 } 234} 235 236const char * 237v3d_qpu_msfign_name(enum v3d_qpu_msfign msfign) 238{ 239 switch (msfign) { 240 case V3D_QPU_MSFIGN_NONE: 241 return ""; 242 case V3D_QPU_MSFIGN_P: 243 return "p"; 244 case V3D_QPU_MSFIGN_Q: 245 return "q"; 246 default: 247 unreachable("bad branch cond value"); 248 } 249} 250 251const char * 252v3d_qpu_pf_name(enum v3d_qpu_pf pf) 253{ 254 switch (pf) { 255 case V3D_QPU_PF_NONE: 256 return ""; 257 case V3D_QPU_PF_PUSHZ: 258 return ".pushz"; 259 case V3D_QPU_PF_PUSHN: 260 return ".pushn"; 261 case V3D_QPU_PF_PUSHC: 262 return ".pushc"; 263 default: 264 unreachable("bad pf value"); 265 } 266} 267 268const char * 269v3d_qpu_uf_name(enum v3d_qpu_uf uf) 270{ 271 switch (uf) { 272 case V3D_QPU_UF_NONE: 273 return ""; 274 case V3D_QPU_UF_ANDZ: 275 return ".andz"; 276 case V3D_QPU_UF_ANDNZ: 277 return ".andnz"; 278 case V3D_QPU_UF_NORZ: 279 return ".norz"; 280 case V3D_QPU_UF_NORNZ: 281 return ".nornz"; 282 case V3D_QPU_UF_ANDN: 283 return ".andn"; 284 case V3D_QPU_UF_ANDNN: 285 return ".andnn"; 286 case V3D_QPU_UF_NORN: 287 return ".norn"; 288 case V3D_QPU_UF_NORNN: 289 return ".nornn"; 290 case V3D_QPU_UF_ANDC: 291 return ".andc"; 292 case V3D_QPU_UF_ANDNC: 293 return ".andnc"; 294 case V3D_QPU_UF_NORC: 295 return ".norc"; 296 case V3D_QPU_UF_NORNC: 297 return ".nornc"; 298 default: 299 unreachable("bad pf value"); 300 } 301} 302 303const char * 304v3d_qpu_pack_name(enum v3d_qpu_output_pack pack) 305{ 306 switch (pack) { 307 case V3D_QPU_PACK_NONE: 308 return ""; 309 case V3D_QPU_PACK_L: 310 return ".l"; 311 case V3D_QPU_PACK_H: 312 return ".h"; 313 default: 314 unreachable("bad pack value"); 315 } 316} 317 318const char * 319v3d_qpu_unpack_name(enum v3d_qpu_input_unpack unpack) 320{ 321 switch (unpack) { 322 case V3D_QPU_UNPACK_NONE: 323 return ""; 324 case V3D_QPU_UNPACK_L: 325 return ".l"; 326 case V3D_QPU_UNPACK_H: 327 return ".h"; 328 case V3D_QPU_UNPACK_ABS: 329 return ".abs"; 330 case V3D_QPU_UNPACK_REPLICATE_32F_16: 331 return ".ff"; 332 case V3D_QPU_UNPACK_REPLICATE_L_16: 333 return ".ll"; 334 case V3D_QPU_UNPACK_REPLICATE_H_16: 335 return ".hh"; 336 case V3D_QPU_UNPACK_SWAP_16: 337 return ".swp"; 338 default: 339 unreachable("bad unpack value"); 340 } 341} 342 343#define D 1 344#define A 2 345#define B 4 346static const uint8_t add_op_args[] = { 347 [V3D_QPU_A_FADD] = D | A | B, 348 [V3D_QPU_A_FADDNF] = D | A | B, 349 [V3D_QPU_A_VFPACK] = D | A | B, 350 [V3D_QPU_A_ADD] = D | A | B, 351 [V3D_QPU_A_VFPACK] = D | A | B, 352 [V3D_QPU_A_SUB] = D | A | B, 353 [V3D_QPU_A_VFPACK] = D | A | B, 354 [V3D_QPU_A_FSUB] = D | A | B, 355 [V3D_QPU_A_MIN] = D | A | B, 356 [V3D_QPU_A_MAX] = D | A | B, 357 [V3D_QPU_A_UMIN] = D | A | B, 358 [V3D_QPU_A_UMAX] = D | A | B, 359 [V3D_QPU_A_SHL] = D | A | B, 360 [V3D_QPU_A_SHR] = D | A | B, 361 [V3D_QPU_A_ASR] = D | A | B, 362 [V3D_QPU_A_ROR] = D | A | B, 363 [V3D_QPU_A_FMIN] = D | A | B, 364 [V3D_QPU_A_FMAX] = D | A | B, 365 [V3D_QPU_A_VFMIN] = D | A | B, 366 367 [V3D_QPU_A_AND] = D | A | B, 368 [V3D_QPU_A_OR] = D | A | B, 369 [V3D_QPU_A_XOR] = D | A | B, 370 371 [V3D_QPU_A_VADD] = D | A | B, 372 [V3D_QPU_A_VSUB] = D | A | B, 373 [V3D_QPU_A_NOT] = D | A, 374 [V3D_QPU_A_NEG] = D | A, 375 [V3D_QPU_A_FLAPUSH] = D | A, 376 [V3D_QPU_A_FLBPUSH] = D | A, 377 [V3D_QPU_A_FLPOP] = D | A, 378 [V3D_QPU_A_RECIP] = D | A, 379 [V3D_QPU_A_SETMSF] = D | A, 380 [V3D_QPU_A_SETREVF] = D | A, 381 [V3D_QPU_A_NOP] = 0, 382 [V3D_QPU_A_TIDX] = D, 383 [V3D_QPU_A_EIDX] = D, 384 [V3D_QPU_A_LR] = D, 385 [V3D_QPU_A_VFLA] = D, 386 [V3D_QPU_A_VFLNA] = D, 387 [V3D_QPU_A_VFLB] = D, 388 [V3D_QPU_A_VFLNB] = D, 389 390 [V3D_QPU_A_FXCD] = D, 391 [V3D_QPU_A_XCD] = D, 392 [V3D_QPU_A_FYCD] = D, 393 [V3D_QPU_A_YCD] = D, 394 395 [V3D_QPU_A_MSF] = D, 396 [V3D_QPU_A_REVF] = D, 397 [V3D_QPU_A_VDWWT] = D, 398 [V3D_QPU_A_IID] = D, 399 [V3D_QPU_A_SAMPID] = D, 400 [V3D_QPU_A_BARRIERID] = D, 401 [V3D_QPU_A_TMUWT] = D, 402 [V3D_QPU_A_VPMWT] = D, 403 404 [V3D_QPU_A_VPMSETUP] = D | A, 405 406 [V3D_QPU_A_LDVPMV_IN] = D | A, 407 [V3D_QPU_A_LDVPMV_OUT] = D | A, 408 [V3D_QPU_A_LDVPMD_IN] = D | A, 409 [V3D_QPU_A_LDVPMD_OUT] = D | A, 410 [V3D_QPU_A_LDVPMP] = D | A, 411 [V3D_QPU_A_RSQRT] = D | A, 412 [V3D_QPU_A_EXP] = D | A, 413 [V3D_QPU_A_LOG] = D | A, 414 [V3D_QPU_A_SIN] = D | A, 415 [V3D_QPU_A_RSQRT2] = D | A, 416 [V3D_QPU_A_LDVPMG_IN] = D | A | B, 417 [V3D_QPU_A_LDVPMG_OUT] = D | A | B, 418 419 /* FIXME: MOVABSNEG */ 420 421 [V3D_QPU_A_FCMP] = D | A | B, 422 [V3D_QPU_A_VFMAX] = D | A | B, 423 424 [V3D_QPU_A_FROUND] = D | A, 425 [V3D_QPU_A_FTOIN] = D | A, 426 [V3D_QPU_A_FTRUNC] = D | A, 427 [V3D_QPU_A_FTOIZ] = D | A, 428 [V3D_QPU_A_FFLOOR] = D | A, 429 [V3D_QPU_A_FTOUZ] = D | A, 430 [V3D_QPU_A_FCEIL] = D | A, 431 [V3D_QPU_A_FTOC] = D | A, 432 433 [V3D_QPU_A_FDX] = D | A, 434 [V3D_QPU_A_FDY] = D | A, 435 436 [V3D_QPU_A_STVPMV] = A | B, 437 [V3D_QPU_A_STVPMD] = A | B, 438 [V3D_QPU_A_STVPMP] = A | B, 439 440 [V3D_QPU_A_ITOF] = D | A, 441 [V3D_QPU_A_CLZ] = D | A, 442 [V3D_QPU_A_UTOF] = D | A, 443}; 444 445static const uint8_t mul_op_args[] = { 446 [V3D_QPU_M_ADD] = D | A | B, 447 [V3D_QPU_M_SUB] = D | A | B, 448 [V3D_QPU_M_UMUL24] = D | A | B, 449 [V3D_QPU_M_VFMUL] = D | A | B, 450 [V3D_QPU_M_SMUL24] = D | A | B, 451 [V3D_QPU_M_MULTOP] = D | A | B, 452 [V3D_QPU_M_FMOV] = D | A, 453 [V3D_QPU_M_NOP] = 0, 454 [V3D_QPU_M_MOV] = D | A, 455 [V3D_QPU_M_FMUL] = D | A | B, 456}; 457 458bool 459v3d_qpu_add_op_has_dst(enum v3d_qpu_add_op op) 460{ 461 assert(op < ARRAY_SIZE(add_op_args)); 462 463 return add_op_args[op] & D; 464} 465 466bool 467v3d_qpu_mul_op_has_dst(enum v3d_qpu_mul_op op) 468{ 469 assert(op < ARRAY_SIZE(mul_op_args)); 470 471 return mul_op_args[op] & D; 472} 473 474int 475v3d_qpu_add_op_num_src(enum v3d_qpu_add_op op) 476{ 477 assert(op < ARRAY_SIZE(add_op_args)); 478 479 uint8_t args = add_op_args[op]; 480 if (args & B) 481 return 2; 482 else if (args & A) 483 return 1; 484 else 485 return 0; 486} 487 488int 489v3d_qpu_mul_op_num_src(enum v3d_qpu_mul_op op) 490{ 491 assert(op < ARRAY_SIZE(mul_op_args)); 492 493 uint8_t args = mul_op_args[op]; 494 if (args & B) 495 return 2; 496 else if (args & A) 497 return 1; 498 else 499 return 0; 500} 501 502enum v3d_qpu_cond 503v3d_qpu_cond_invert(enum v3d_qpu_cond cond) 504{ 505 switch (cond) { 506 case V3D_QPU_COND_IFA: 507 return V3D_QPU_COND_IFNA; 508 case V3D_QPU_COND_IFNA: 509 return V3D_QPU_COND_IFA; 510 case V3D_QPU_COND_IFB: 511 return V3D_QPU_COND_IFNB; 512 case V3D_QPU_COND_IFNB: 513 return V3D_QPU_COND_IFB; 514 default: 515 unreachable("Non-invertible cond"); 516 } 517} 518 519bool 520v3d_qpu_magic_waddr_is_sfu(enum v3d_qpu_waddr waddr) 521{ 522 switch (waddr) { 523 case V3D_QPU_WADDR_RECIP: 524 case V3D_QPU_WADDR_RSQRT: 525 case V3D_QPU_WADDR_EXP: 526 case V3D_QPU_WADDR_LOG: 527 case V3D_QPU_WADDR_SIN: 528 case V3D_QPU_WADDR_RSQRT2: 529 return true; 530 default: 531 return false; 532 } 533} 534 535bool 536v3d_qpu_magic_waddr_is_tmu(enum v3d_qpu_waddr waddr) 537{ 538 /* XXX: WADDR_TMU changed to UNIFA on 4.x */ 539 return ((waddr >= V3D_QPU_WADDR_TMU && 540 waddr <= V3D_QPU_WADDR_TMUAU) || 541 (waddr >= V3D_QPU_WADDR_TMUC && 542 waddr <= V3D_QPU_WADDR_TMUHSLOD)); 543} 544 545bool 546v3d_qpu_waits_on_tmu(const struct v3d_qpu_instr *inst) 547{ 548 return (inst->sig.ldtmu || 549 (inst->type == V3D_QPU_INSTR_TYPE_ALU && 550 inst->alu.add.op == V3D_QPU_A_TMUWT)); 551} 552 553bool 554v3d_qpu_magic_waddr_is_tlb(enum v3d_qpu_waddr waddr) 555{ 556 return (waddr == V3D_QPU_WADDR_TLB || 557 waddr == V3D_QPU_WADDR_TLBU); 558} 559 560bool 561v3d_qpu_magic_waddr_is_vpm(enum v3d_qpu_waddr waddr) 562{ 563 return (waddr == V3D_QPU_WADDR_VPM || 564 waddr == V3D_QPU_WADDR_VPMU); 565} 566 567bool 568v3d_qpu_magic_waddr_is_tsy(enum v3d_qpu_waddr waddr) 569{ 570 return (waddr == V3D_QPU_WADDR_SYNC || 571 waddr == V3D_QPU_WADDR_SYNCB || 572 waddr == V3D_QPU_WADDR_SYNCU); 573} 574 575bool 576v3d_qpu_magic_waddr_loads_unif(enum v3d_qpu_waddr waddr) 577{ 578 switch (waddr) { 579 case V3D_QPU_WADDR_VPMU: 580 case V3D_QPU_WADDR_TLBU: 581 case V3D_QPU_WADDR_TMUAU: 582 case V3D_QPU_WADDR_SYNCU: 583 return true; 584 default: 585 return false; 586 } 587} 588 589static bool 590v3d_qpu_add_op_reads_vpm(enum v3d_qpu_add_op op) 591{ 592 switch (op) { 593 case V3D_QPU_A_VPMSETUP: 594 case V3D_QPU_A_VPMWT: 595 case V3D_QPU_A_LDVPMV_IN: 596 case V3D_QPU_A_LDVPMV_OUT: 597 case V3D_QPU_A_LDVPMD_IN: 598 case V3D_QPU_A_LDVPMD_OUT: 599 case V3D_QPU_A_LDVPMP: 600 case V3D_QPU_A_LDVPMG_IN: 601 case V3D_QPU_A_LDVPMG_OUT: 602 return true; 603 default: 604 return false; 605 } 606} 607 608static bool 609v3d_qpu_add_op_writes_vpm(enum v3d_qpu_add_op op) 610{ 611 switch (op) { 612 case V3D_QPU_A_VPMSETUP: 613 case V3D_QPU_A_VPMWT: 614 case V3D_QPU_A_STVPMV: 615 case V3D_QPU_A_STVPMD: 616 case V3D_QPU_A_STVPMP: 617 return true; 618 default: 619 return false; 620 } 621} 622 623bool 624v3d_qpu_uses_tlb(const struct v3d_qpu_instr *inst) 625{ 626 if (inst->sig.ldtlb || 627 inst->sig.ldtlbu) 628 return true; 629 630 if (inst->type == V3D_QPU_INSTR_TYPE_ALU) { 631 if (inst->alu.add.magic_write && 632 v3d_qpu_magic_waddr_is_tlb(inst->alu.add.waddr)) { 633 return true; 634 } 635 636 if (inst->alu.mul.magic_write && 637 v3d_qpu_magic_waddr_is_tlb(inst->alu.mul.waddr)) { 638 return true; 639 } 640 } 641 642 return false; 643} 644 645bool 646v3d_qpu_uses_sfu(const struct v3d_qpu_instr *inst) 647{ 648 if (inst->type == V3D_QPU_INSTR_TYPE_ALU) { 649 switch (inst->alu.add.op) { 650 case V3D_QPU_A_RECIP: 651 case V3D_QPU_A_RSQRT: 652 case V3D_QPU_A_EXP: 653 case V3D_QPU_A_LOG: 654 case V3D_QPU_A_SIN: 655 case V3D_QPU_A_RSQRT2: 656 return true; 657 default: 658 break; 659 } 660 661 if (inst->alu.add.magic_write && 662 v3d_qpu_magic_waddr_is_sfu(inst->alu.add.waddr)) { 663 return true; 664 } 665 666 if (inst->alu.mul.magic_write && 667 v3d_qpu_magic_waddr_is_sfu(inst->alu.mul.waddr)) { 668 return true; 669 } 670 } 671 672 return false; 673} 674 675bool 676v3d_qpu_writes_tmu(const struct v3d_qpu_instr *inst) 677{ 678 return (inst->type == V3D_QPU_INSTR_TYPE_ALU && 679 ((inst->alu.add.magic_write && 680 v3d_qpu_magic_waddr_is_tmu(inst->alu.add.waddr)) || 681 (inst->alu.mul.magic_write && 682 v3d_qpu_magic_waddr_is_tmu(inst->alu.mul.waddr)))); 683} 684 685bool 686v3d_qpu_reads_vpm(const struct v3d_qpu_instr *inst) 687{ 688 if (inst->sig.ldvpm) 689 return true; 690 691 if (inst->type == V3D_QPU_INSTR_TYPE_ALU) { 692 if (v3d_qpu_add_op_reads_vpm(inst->alu.add.op)) 693 return true; 694 } 695 696 return false; 697} 698 699bool 700v3d_qpu_writes_vpm(const struct v3d_qpu_instr *inst) 701{ 702 if (inst->type == V3D_QPU_INSTR_TYPE_ALU) { 703 if (v3d_qpu_add_op_writes_vpm(inst->alu.add.op)) 704 return true; 705 706 if (inst->alu.add.magic_write && 707 v3d_qpu_magic_waddr_is_vpm(inst->alu.add.waddr)) { 708 return true; 709 } 710 711 if (inst->alu.mul.magic_write && 712 v3d_qpu_magic_waddr_is_vpm(inst->alu.mul.waddr)) { 713 return true; 714 } 715 } 716 717 return false; 718} 719 720bool 721v3d_qpu_uses_vpm(const struct v3d_qpu_instr *inst) 722{ 723 return v3d_qpu_reads_vpm(inst) || v3d_qpu_writes_vpm(inst); 724} 725 726bool 727v3d_qpu_writes_r3(const struct v3d_device_info *devinfo, 728 const struct v3d_qpu_instr *inst) 729{ 730 if (inst->type == V3D_QPU_INSTR_TYPE_ALU) { 731 if (inst->alu.add.magic_write && 732 inst->alu.add.waddr == V3D_QPU_WADDR_R3) { 733 return true; 734 } 735 736 if (inst->alu.mul.magic_write && 737 inst->alu.mul.waddr == V3D_QPU_WADDR_R3) { 738 return true; 739 } 740 } 741 742 if (v3d_qpu_sig_writes_address(devinfo, &inst->sig) && 743 inst->sig_magic && inst->sig_addr == V3D_QPU_WADDR_R3) { 744 return true; 745 } 746 747 return inst->sig.ldvary || inst->sig.ldvpm; 748} 749 750bool 751v3d_qpu_writes_r4(const struct v3d_device_info *devinfo, 752 const struct v3d_qpu_instr *inst) 753{ 754 if (inst->type == V3D_QPU_INSTR_TYPE_ALU) { 755 if (inst->alu.add.magic_write && 756 (inst->alu.add.waddr == V3D_QPU_WADDR_R4 || 757 v3d_qpu_magic_waddr_is_sfu(inst->alu.add.waddr))) { 758 return true; 759 } 760 761 if (inst->alu.mul.magic_write && 762 (inst->alu.mul.waddr == V3D_QPU_WADDR_R4 || 763 v3d_qpu_magic_waddr_is_sfu(inst->alu.mul.waddr))) { 764 return true; 765 } 766 } 767 768 if (v3d_qpu_sig_writes_address(devinfo, &inst->sig)) { 769 if (inst->sig_magic && inst->sig_addr == V3D_QPU_WADDR_R4) 770 return true; 771 } else if (inst->sig.ldtmu) { 772 return true; 773 } 774 775 return false; 776} 777 778bool 779v3d_qpu_writes_r5(const struct v3d_device_info *devinfo, 780 const struct v3d_qpu_instr *inst) 781{ 782 if (inst->type == V3D_QPU_INSTR_TYPE_ALU) { 783 if (inst->alu.add.magic_write && 784 inst->alu.add.waddr == V3D_QPU_WADDR_R5) { 785 return true; 786 } 787 788 if (inst->alu.mul.magic_write && 789 inst->alu.mul.waddr == V3D_QPU_WADDR_R5) { 790 return true; 791 } 792 } 793 794 if (v3d_qpu_sig_writes_address(devinfo, &inst->sig) && 795 inst->sig_magic && inst->sig_addr == V3D_QPU_WADDR_R5) { 796 return true; 797 } 798 799 return inst->sig.ldvary || inst->sig.ldunif || inst->sig.ldunifa; 800} 801 802bool 803v3d_qpu_uses_mux(const struct v3d_qpu_instr *inst, enum v3d_qpu_mux mux) 804{ 805 int add_nsrc = v3d_qpu_add_op_num_src(inst->alu.add.op); 806 int mul_nsrc = v3d_qpu_mul_op_num_src(inst->alu.mul.op); 807 808 return ((add_nsrc > 0 && inst->alu.add.a == mux) || 809 (add_nsrc > 1 && inst->alu.add.b == mux) || 810 (mul_nsrc > 0 && inst->alu.mul.a == mux) || 811 (mul_nsrc > 1 && inst->alu.mul.b == mux)); 812} 813 814bool 815v3d_qpu_sig_writes_address(const struct v3d_device_info *devinfo, 816 const struct v3d_qpu_sig *sig) 817{ 818 if (devinfo->ver < 41) 819 return false; 820 821 return (sig->ldunifrf || 822 sig->ldunifarf || 823 sig->ldvary || 824 sig->ldtmu || 825 sig->ldtlb || 826 sig->ldtlbu); 827} 828 829bool 830v3d_qpu_reads_flags(const struct v3d_qpu_instr *inst) 831{ 832 if (inst->type == V3D_QPU_INSTR_TYPE_BRANCH) { 833 return inst->branch.cond != V3D_QPU_BRANCH_COND_ALWAYS; 834 } else if (inst->type == V3D_QPU_INSTR_TYPE_ALU) { 835 if (inst->flags.ac != V3D_QPU_COND_NONE || 836 inst->flags.mc != V3D_QPU_COND_NONE || 837 inst->flags.auf != V3D_QPU_UF_NONE || 838 inst->flags.muf != V3D_QPU_UF_NONE) 839 return true; 840 841 switch (inst->alu.add.op) { 842 case V3D_QPU_A_VFLA: 843 case V3D_QPU_A_VFLNA: 844 case V3D_QPU_A_VFLB: 845 case V3D_QPU_A_VFLNB: 846 case V3D_QPU_A_FLAPUSH: 847 case V3D_QPU_A_FLBPUSH: 848 return true; 849 default: 850 break; 851 } 852 } 853 854 return false; 855} 856 857bool 858v3d_qpu_writes_flags(const struct v3d_qpu_instr *inst) 859{ 860 if (inst->flags.apf != V3D_QPU_PF_NONE || 861 inst->flags.mpf != V3D_QPU_PF_NONE || 862 inst->flags.auf != V3D_QPU_UF_NONE || 863 inst->flags.muf != V3D_QPU_UF_NONE) { 864 return true; 865 } 866 867 return false; 868} 869 870bool 871v3d_qpu_unpacks_f32(const struct v3d_qpu_instr *inst) 872{ 873 if (inst->type != V3D_QPU_INSTR_TYPE_ALU) 874 return false; 875 876 switch (inst->alu.add.op) { 877 case V3D_QPU_A_FADD: 878 case V3D_QPU_A_FADDNF: 879 case V3D_QPU_A_FSUB: 880 case V3D_QPU_A_FMIN: 881 case V3D_QPU_A_FMAX: 882 case V3D_QPU_A_FCMP: 883 case V3D_QPU_A_FROUND: 884 case V3D_QPU_A_FTRUNC: 885 case V3D_QPU_A_FFLOOR: 886 case V3D_QPU_A_FCEIL: 887 case V3D_QPU_A_FDX: 888 case V3D_QPU_A_FDY: 889 case V3D_QPU_A_FTOIN: 890 case V3D_QPU_A_FTOIZ: 891 case V3D_QPU_A_FTOUZ: 892 case V3D_QPU_A_FTOC: 893 case V3D_QPU_A_VFPACK: 894 return true; 895 break; 896 default: 897 break; 898 } 899 900 switch (inst->alu.mul.op) { 901 case V3D_QPU_M_FMOV: 902 case V3D_QPU_M_FMUL: 903 return true; 904 break; 905 default: 906 break; 907 } 908 909 return false; 910} 911bool 912v3d_qpu_unpacks_f16(const struct v3d_qpu_instr *inst) 913{ 914 if (inst->type != V3D_QPU_INSTR_TYPE_ALU) 915 return false; 916 917 switch (inst->alu.add.op) { 918 case V3D_QPU_A_VFMIN: 919 case V3D_QPU_A_VFMAX: 920 return true; 921 break; 922 default: 923 break; 924 } 925 926 switch (inst->alu.mul.op) { 927 case V3D_QPU_M_VFMUL: 928 return true; 929 break; 930 default: 931 break; 932 } 933 934 return false; 935} 936