1 /* 2 * Copyright (c) 2013 Rob Clark <robdclark (at) gmail.com> 3 * 4 * Permission is hereby granted, free of charge, to any person obtaining a 5 * copy of this software and associated documentation files (the "Software"), 6 * to deal in the Software without restriction, including without limitation 7 * the rights to use, copy, modify, merge, publish, distribute, sublicense, 8 * and/or sell copies of the Software, and to permit persons to whom the 9 * Software is furnished to do so, subject to the following conditions: 10 * 11 * The above copyright notice and this permission notice (including the next 12 * paragraph) shall be included in all copies or substantial portions of the 13 * Software. 14 * 15 * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR 16 * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, 17 * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL 18 * THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER 19 * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, 20 * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE 21 * SOFTWARE. 22 */ 23 24 #ifndef INSTR_A3XX_H_ 25 #define INSTR_A3XX_H_ 26 27 #define PACKED __attribute__((__packed__)) 28 29 #include <assert.h> 30 #include <stdbool.h> 31 #include <stdint.h> 32 #include <stdio.h> 33 34 /* clang-format off */ 35 void ir3_assert_handler(const char *expr, const char *file, int line, 36 const char *func) __attribute__((weak)) __attribute__((__noreturn__)); 37 /* clang-format on */ 38 39 /* A wrapper for assert() that allows overriding handling of a failed 40 * assert. This is needed for tools like crashdec which can want to 41 * attempt to disassemble memory that might not actually be valid 42 * instructions. 43 */ 44 #define ir3_assert(expr) \ 45 do { \ 46 if (!(expr)) { \ 47 if (ir3_assert_handler) { \ 48 ir3_assert_handler(#expr, __FILE__, __LINE__, __func__); \ 49 } \ 50 assert(expr); \ 51 } \ 52 } while (0) 53 /* size of largest OPC field of all the instruction categories: */ 54 #define NOPC_BITS 6 55 56 #define _OPC(cat, opc) (((cat) << NOPC_BITS) | opc) 57 58 /* clang-format off */ 59 typedef enum { 60 /* category 0: */ 61 OPC_NOP = _OPC(0, 0), 62 OPC_B = _OPC(0, 1), 63 OPC_JUMP = _OPC(0, 2), 64 OPC_CALL = _OPC(0, 3), 65 OPC_RET = _OPC(0, 4), 66 OPC_KILL = _OPC(0, 5), 67 OPC_END = _OPC(0, 6), 68 OPC_EMIT = _OPC(0, 7), 69 OPC_CUT = _OPC(0, 8), 70 OPC_CHMASK = _OPC(0, 9), 71 OPC_CHSH = _OPC(0, 10), 72 OPC_FLOW_REV = _OPC(0, 11), 73 74 OPC_BKT = _OPC(0, 16), 75 OPC_STKS = _OPC(0, 17), 76 OPC_STKR = _OPC(0, 18), 77 OPC_XSET = _OPC(0, 19), 78 OPC_XCLR = _OPC(0, 20), 79 OPC_GETONE = _OPC(0, 21), 80 OPC_DBG = _OPC(0, 22), 81 OPC_SHPS = _OPC(0, 23), /* shader prologue start */ 82 OPC_SHPE = _OPC(0, 24), /* shader prologue end */ 83 84 OPC_PREDT = _OPC(0, 29), /* predicated true */ 85 OPC_PREDF = _OPC(0, 30), /* predicated false */ 86 OPC_PREDE = _OPC(0, 31), /* predicated end */ 87 88 /* Logical opcodes for different branch instruction variations: */ 89 OPC_BR = _OPC(0, 40), 90 OPC_BRAO = _OPC(0, 41), 91 OPC_BRAA = _OPC(0, 42), 92 OPC_BRAC = _OPC(0, 43), 93 OPC_BANY = _OPC(0, 44), 94 OPC_BALL = _OPC(0, 45), 95 OPC_BRAX = _OPC(0, 46), 96 97 /* Logical opcode to distinguish kill and demote */ 98 OPC_DEMOTE = _OPC(0, 47), 99 100 /* category 1: */ 101 OPC_MOV = _OPC(1, 0), 102 OPC_MOVP = _OPC(1, 1), 103 /* swz, gat, sct */ 104 OPC_MOVMSK = _OPC(1, 3), 105 106 /* Virtual opcodes for instructions differentiated via a "sub-opcode" that 107 * replaces the repeat field: 108 */ 109 OPC_SWZ = _OPC(1, 4), 110 OPC_GAT = _OPC(1, 5), 111 OPC_SCT = _OPC(1, 6), 112 113 /* Logical opcodes for different variants of mov: */ 114 OPC_MOV_IMMED = _OPC(1, 40), 115 OPC_MOV_CONST = _OPC(1, 41), 116 OPC_MOV_GPR = _OPC(1, 42), 117 OPC_MOV_RELGPR = _OPC(1, 43), 118 OPC_MOV_RELCONST = _OPC(1, 44), 119 120 /* Macros that expand to an if statement + move */ 121 OPC_BALLOT_MACRO = _OPC(1, 50), 122 OPC_ANY_MACRO = _OPC(1, 51), 123 OPC_ALL_MACRO = _OPC(1, 52), 124 OPC_ELECT_MACRO = _OPC(1, 53), 125 OPC_READ_COND_MACRO = _OPC(1, 54), 126 OPC_READ_FIRST_MACRO = _OPC(1, 55), 127 OPC_SWZ_SHARED_MACRO = _OPC(1, 56), 128 129 /* category 2: */ 130 OPC_ADD_F = _OPC(2, 0), 131 OPC_MIN_F = _OPC(2, 1), 132 OPC_MAX_F = _OPC(2, 2), 133 OPC_MUL_F = _OPC(2, 3), 134 OPC_SIGN_F = _OPC(2, 4), 135 OPC_CMPS_F = _OPC(2, 5), 136 OPC_ABSNEG_F = _OPC(2, 6), 137 OPC_CMPV_F = _OPC(2, 7), 138 /* 8 - invalid */ 139 OPC_FLOOR_F = _OPC(2, 9), 140 OPC_CEIL_F = _OPC(2, 10), 141 OPC_RNDNE_F = _OPC(2, 11), 142 OPC_RNDAZ_F = _OPC(2, 12), 143 OPC_TRUNC_F = _OPC(2, 13), 144 /* 14-15 - invalid */ 145 OPC_ADD_U = _OPC(2, 16), 146 OPC_ADD_S = _OPC(2, 17), 147 OPC_SUB_U = _OPC(2, 18), 148 OPC_SUB_S = _OPC(2, 19), 149 OPC_CMPS_U = _OPC(2, 20), 150 OPC_CMPS_S = _OPC(2, 21), 151 OPC_MIN_U = _OPC(2, 22), 152 OPC_MIN_S = _OPC(2, 23), 153 OPC_MAX_U = _OPC(2, 24), 154 OPC_MAX_S = _OPC(2, 25), 155 OPC_ABSNEG_S = _OPC(2, 26), 156 /* 27 - invalid */ 157 OPC_AND_B = _OPC(2, 28), 158 OPC_OR_B = _OPC(2, 29), 159 OPC_NOT_B = _OPC(2, 30), 160 OPC_XOR_B = _OPC(2, 31), 161 /* 32 - invalid */ 162 OPC_CMPV_U = _OPC(2, 33), 163 OPC_CMPV_S = _OPC(2, 34), 164 /* 35-47 - invalid */ 165 OPC_MUL_U24 = _OPC(2, 48), /* 24b mul into 32b result */ 166 OPC_MUL_S24 = _OPC(2, 49), /* 24b mul into 32b result with sign extension */ 167 OPC_MULL_U = _OPC(2, 50), 168 OPC_BFREV_B = _OPC(2, 51), 169 OPC_CLZ_S = _OPC(2, 52), 170 OPC_CLZ_B = _OPC(2, 53), 171 OPC_SHL_B = _OPC(2, 54), 172 OPC_SHR_B = _OPC(2, 55), 173 OPC_ASHR_B = _OPC(2, 56), 174 OPC_BARY_F = _OPC(2, 57), 175 OPC_MGEN_B = _OPC(2, 58), 176 OPC_GETBIT_B = _OPC(2, 59), 177 OPC_SETRM = _OPC(2, 60), 178 OPC_CBITS_B = _OPC(2, 61), 179 OPC_SHB = _OPC(2, 62), 180 OPC_MSAD = _OPC(2, 63), 181 182 /* category 3: */ 183 OPC_MAD_U16 = _OPC(3, 0), 184 OPC_MADSH_U16 = _OPC(3, 1), 185 OPC_MAD_S16 = _OPC(3, 2), 186 OPC_MADSH_M16 = _OPC(3, 3), /* should this be .s16? */ 187 OPC_MAD_U24 = _OPC(3, 4), 188 OPC_MAD_S24 = _OPC(3, 5), 189 OPC_MAD_F16 = _OPC(3, 6), 190 OPC_MAD_F32 = _OPC(3, 7), 191 OPC_SEL_B16 = _OPC(3, 8), 192 OPC_SEL_B32 = _OPC(3, 9), 193 OPC_SEL_S16 = _OPC(3, 10), 194 OPC_SEL_S32 = _OPC(3, 11), 195 OPC_SEL_F16 = _OPC(3, 12), 196 OPC_SEL_F32 = _OPC(3, 13), 197 OPC_SAD_S16 = _OPC(3, 14), 198 OPC_SAD_S32 = _OPC(3, 15), 199 OPC_SHLG_B16 = _OPC(3, 16), 200 201 /* category 4: */ 202 OPC_RCP = _OPC(4, 0), 203 OPC_RSQ = _OPC(4, 1), 204 OPC_LOG2 = _OPC(4, 2), 205 OPC_EXP2 = _OPC(4, 3), 206 OPC_SIN = _OPC(4, 4), 207 OPC_COS = _OPC(4, 5), 208 OPC_SQRT = _OPC(4, 6), 209 /* NOTE that these are 8+opc from their highp equivs, so it's possible 210 * that the high order bit in the opc field has been repurposed for 211 * half-precision use? But note that other ops (rcp/lsin/cos/sqrt) 212 * still use the same opc as highp 213 */ 214 OPC_HRSQ = _OPC(4, 9), 215 OPC_HLOG2 = _OPC(4, 10), 216 OPC_HEXP2 = _OPC(4, 11), 217 218 /* category 5: */ 219 OPC_ISAM = _OPC(5, 0), 220 OPC_ISAML = _OPC(5, 1), 221 OPC_ISAMM = _OPC(5, 2), 222 OPC_SAM = _OPC(5, 3), 223 OPC_SAMB = _OPC(5, 4), 224 OPC_SAML = _OPC(5, 5), 225 OPC_SAMGQ = _OPC(5, 6), 226 OPC_GETLOD = _OPC(5, 7), 227 OPC_CONV = _OPC(5, 8), 228 OPC_CONVM = _OPC(5, 9), 229 OPC_GETSIZE = _OPC(5, 10), 230 OPC_GETBUF = _OPC(5, 11), 231 OPC_GETPOS = _OPC(5, 12), 232 OPC_GETINFO = _OPC(5, 13), 233 OPC_DSX = _OPC(5, 14), 234 OPC_DSY = _OPC(5, 15), 235 OPC_GATHER4R = _OPC(5, 16), 236 OPC_GATHER4G = _OPC(5, 17), 237 OPC_GATHER4B = _OPC(5, 18), 238 OPC_GATHER4A = _OPC(5, 19), 239 OPC_SAMGP0 = _OPC(5, 20), 240 OPC_SAMGP1 = _OPC(5, 21), 241 OPC_SAMGP2 = _OPC(5, 22), 242 OPC_SAMGP3 = _OPC(5, 23), 243 OPC_DSXPP_1 = _OPC(5, 24), 244 OPC_DSYPP_1 = _OPC(5, 25), 245 OPC_RGETPOS = _OPC(5, 26), 246 OPC_RGETINFO = _OPC(5, 27), 247 /* cat5 meta instructions, placed above the cat5 opc field's size */ 248 OPC_DSXPP_MACRO = _OPC(5, 32), 249 OPC_DSYPP_MACRO = _OPC(5, 33), 250 251 /* category 6: */ 252 OPC_LDG = _OPC(6, 0), /* load-global */ 253 OPC_LDL = _OPC(6, 1), 254 OPC_LDP = _OPC(6, 2), 255 OPC_STG = _OPC(6, 3), /* store-global */ 256 OPC_STL = _OPC(6, 4), 257 OPC_STP = _OPC(6, 5), 258 OPC_LDIB = _OPC(6, 6), 259 OPC_G2L = _OPC(6, 7), 260 OPC_L2G = _OPC(6, 8), 261 OPC_PREFETCH = _OPC(6, 9), 262 OPC_LDLW = _OPC(6, 10), 263 OPC_STLW = _OPC(6, 11), 264 OPC_RESFMT = _OPC(6, 14), 265 OPC_RESINFO = _OPC(6, 15), 266 OPC_ATOMIC_ADD = _OPC(6, 16), 267 OPC_ATOMIC_SUB = _OPC(6, 17), 268 OPC_ATOMIC_XCHG = _OPC(6, 18), 269 OPC_ATOMIC_INC = _OPC(6, 19), 270 OPC_ATOMIC_DEC = _OPC(6, 20), 271 OPC_ATOMIC_CMPXCHG = _OPC(6, 21), 272 OPC_ATOMIC_MIN = _OPC(6, 22), 273 OPC_ATOMIC_MAX = _OPC(6, 23), 274 OPC_ATOMIC_AND = _OPC(6, 24), 275 OPC_ATOMIC_OR = _OPC(6, 25), 276 OPC_ATOMIC_XOR = _OPC(6, 26), 277 OPC_LDGB = _OPC(6, 27), 278 OPC_STGB = _OPC(6, 28), 279 OPC_STIB = _OPC(6, 29), 280 OPC_LDC = _OPC(6, 30), 281 OPC_LDLV = _OPC(6, 31), 282 OPC_PIPR = _OPC(6, 32), /* ??? */ 283 OPC_PIPC = _OPC(6, 33), /* ??? */ 284 OPC_EMIT2 = _OPC(6, 34), /* ??? */ 285 OPC_ENDLS = _OPC(6, 35), /* ??? */ 286 OPC_GETSPID = _OPC(6, 36), /* SP ID */ 287 OPC_GETWID = _OPC(6, 37), /* wavefront ID */ 288 289 /* Logical opcodes for things that differ in a6xx+ */ 290 OPC_STC = _OPC(6, 40), 291 OPC_RESINFO_B = _OPC(6, 41), 292 OPC_LDIB_B = _OPC(6, 42), 293 OPC_STIB_B = _OPC(6, 43), 294 295 /* Logical opcodes for different atomic instruction variations: */ 296 OPC_ATOMIC_B_ADD = _OPC(6, 44), 297 OPC_ATOMIC_B_SUB = _OPC(6, 45), 298 OPC_ATOMIC_B_XCHG = _OPC(6, 46), 299 OPC_ATOMIC_B_INC = _OPC(6, 47), 300 OPC_ATOMIC_B_DEC = _OPC(6, 48), 301 OPC_ATOMIC_B_CMPXCHG = _OPC(6, 49), 302 OPC_ATOMIC_B_MIN = _OPC(6, 50), 303 OPC_ATOMIC_B_MAX = _OPC(6, 51), 304 OPC_ATOMIC_B_AND = _OPC(6, 52), 305 OPC_ATOMIC_B_OR = _OPC(6, 53), 306 OPC_ATOMIC_B_XOR = _OPC(6, 54), 307 308 OPC_LDG_A = _OPC(6, 55), 309 OPC_STG_A = _OPC(6, 56), 310 311 OPC_SPILL_MACRO = _OPC(6, 57), 312 OPC_RELOAD_MACRO = _OPC(6, 58), 313 314 /* category 7: */ 315 OPC_BAR = _OPC(7, 0), 316 OPC_FENCE = _OPC(7, 1), 317 318 /* meta instructions (category -1): */ 319 /* placeholder instr to mark shader inputs: */ 320 OPC_META_INPUT = _OPC(-1, 0), 321 /* The "collect" and "split" instructions are used for keeping 322 * track of instructions that write to multiple dst registers 323 * (split) like texture sample instructions, or read multiple 324 * consecutive scalar registers (collect) (bary.f, texture samp) 325 * 326 * A "split" extracts a scalar component from a vecN, and a 327 * "collect" gathers multiple scalar components into a vecN 328 */ 329 OPC_META_SPLIT = _OPC(-1, 2), 330 OPC_META_COLLECT = _OPC(-1, 3), 331 332 /* placeholder for texture fetches that run before FS invocation 333 * starts: 334 */ 335 OPC_META_TEX_PREFETCH = _OPC(-1, 4), 336 337 /* Parallel copies have multiple destinations, and copy each destination 338 * to its corresponding source. This happens "in parallel," meaning that 339 * it happens as-if every source is read first and then every destination 340 * is stored. These are produced in RA when register shuffling is 341 * required, and then lowered away immediately afterwards. 342 */ 343 OPC_META_PARALLEL_COPY = _OPC(-1, 5), 344 OPC_META_PHI = _OPC(-1, 6), 345 } opc_t; 346 /* clang-format on */ 347 348 #define opc_cat(opc) ((int)((opc) >> NOPC_BITS)) 349 #define opc_op(opc) ((unsigned)((opc) & ((1 << NOPC_BITS) - 1))) 350 351 const char *disasm_a3xx_instr_name(opc_t opc); 352 353 typedef enum { 354 TYPE_F16 = 0, 355 TYPE_F32 = 1, 356 TYPE_U16 = 2, 357 TYPE_U32 = 3, 358 TYPE_S16 = 4, 359 TYPE_S32 = 5, 360 TYPE_U8 = 6, 361 TYPE_S8 = 7, // XXX I assume? 362 } type_t; 363 364 static inline uint32_t 365 type_size(type_t type) 366 { 367 switch (type) { 368 case TYPE_F32: 369 case TYPE_U32: 370 case TYPE_S32: 371 return 32; 372 case TYPE_F16: 373 case TYPE_U16: 374 case TYPE_S16: 375 return 16; 376 case TYPE_U8: 377 case TYPE_S8: 378 return 8; 379 default: 380 ir3_assert(0); /* invalid type */ 381 return 0; 382 } 383 } 384 385 static inline int 386 type_float(type_t type) 387 { 388 return (type == TYPE_F32) || (type == TYPE_F16); 389 } 390 391 static inline int 392 type_uint(type_t type) 393 { 394 return (type == TYPE_U32) || (type == TYPE_U16) || (type == TYPE_U8); 395 } 396 397 static inline int 398 type_sint(type_t type) 399 { 400 return (type == TYPE_S32) || (type == TYPE_S16) || (type == TYPE_S8); 401 } 402 403 typedef enum { 404 ROUND_ZERO = 0, 405 ROUND_EVEN = 1, 406 ROUND_POS_INF = 2, 407 ROUND_NEG_INF = 3, 408 } round_t; 409 410 /* comp: 411 * 0 - x 412 * 1 - y 413 * 2 - z 414 * 3 - w 415 */ 416 static inline uint32_t 417 regid(int num, int comp) 418 { 419 return (num << 2) | (comp & 0x3); 420 } 421 422 #define INVALID_REG regid(63, 0) 423 #define VALIDREG(r) ((r) != INVALID_REG) 424 #define CONDREG(r, val) COND(VALIDREG(r), (val)) 425 426 /* special registers: */ 427 #define REG_A0 61 /* address register */ 428 #define REG_P0 62 /* predicate register */ 429 430 typedef enum { 431 BRANCH_PLAIN = 0, /* br */ 432 BRANCH_OR = 1, /* brao */ 433 BRANCH_AND = 2, /* braa */ 434 BRANCH_CONST = 3, /* brac */ 435 BRANCH_ANY = 4, /* bany */ 436 BRANCH_ALL = 5, /* ball */ 437 BRANCH_X = 6, /* brax ??? */ 438 } brtype_t; 439 440 /* With is_bindless_s2en = 1, this determines whether bindless is enabled and 441 * if so, how to get the (base, index) pair for both sampler and texture. 442 * There is a single base embedded in the instruction, which is always used 443 * for the texture. 444 */ 445 typedef enum { 446 /* Use traditional GL binding model, get texture and sampler index 447 * from src3 which is not presumed to be uniform. This is 448 * backwards-compatible with earlier generations, where this field was 449 * always 0 and nonuniform-indexed sampling always worked. 450 */ 451 CAT5_NONUNIFORM = 0, 452 453 /* The sampler base comes from the low 3 bits of a1.x, and the sampler 454 * and texture index come from src3 which is presumed to be uniform. 455 */ 456 CAT5_BINDLESS_A1_UNIFORM = 1, 457 458 /* The texture and sampler share the same base, and the sampler and 459 * texture index come from src3 which is *not* presumed to be uniform. 460 */ 461 CAT5_BINDLESS_NONUNIFORM = 2, 462 463 /* The sampler base comes from the low 3 bits of a1.x, and the sampler 464 * and texture index come from src3 which is *not* presumed to be 465 * uniform. 466 */ 467 CAT5_BINDLESS_A1_NONUNIFORM = 3, 468 469 /* Use traditional GL binding model, get texture and sampler index 470 * from src3 which is presumed to be uniform. 471 */ 472 CAT5_UNIFORM = 4, 473 474 /* The texture and sampler share the same base, and the sampler and 475 * texture index come from src3 which is presumed to be uniform. 476 */ 477 CAT5_BINDLESS_UNIFORM = 5, 478 479 /* The texture and sampler share the same base, get sampler index from low 480 * 4 bits of src3 and texture index from high 4 bits. 481 */ 482 CAT5_BINDLESS_IMM = 6, 483 484 /* The sampler base comes from the low 3 bits of a1.x, and the texture 485 * index comes from the next 8 bits of a1.x. The sampler index is an 486 * immediate in src3. 487 */ 488 CAT5_BINDLESS_A1_IMM = 7, 489 } cat5_desc_mode_t; 490 491 /* Similar to cat5_desc_mode_t, describes how the descriptor is loaded. 492 */ 493 typedef enum { 494 /* Use old GL binding model with an immediate index. */ 495 CAT6_IMM = 0, 496 497 CAT6_UNIFORM = 1, 498 499 CAT6_NONUNIFORM = 2, 500 501 /* Use the bindless model, with an immediate index. 502 */ 503 CAT6_BINDLESS_IMM = 4, 504 505 /* Use the bindless model, with a uniform register index. 506 */ 507 CAT6_BINDLESS_UNIFORM = 5, 508 509 /* Use the bindless model, with a register index that isn't guaranteed 510 * to be uniform. This presumably checks if the indices are equal and 511 * splits up the load/store, because it works the way you would 512 * expect. 513 */ 514 CAT6_BINDLESS_NONUNIFORM = 6, 515 } cat6_desc_mode_t; 516 517 static inline bool 518 is_sat_compatible(opc_t opc) 519 { 520 /* On a6xx saturation doesn't work on cat4 */ 521 if (opc_cat(opc) != 2 && opc_cat(opc) != 3) 522 return false; 523 524 switch (opc) { 525 /* On a3xx and a6xx saturation doesn't work on bary.f */ 526 case OPC_BARY_F: 527 /* On a6xx saturation doesn't work on sel.* */ 528 case OPC_SEL_B16: 529 case OPC_SEL_B32: 530 case OPC_SEL_S16: 531 case OPC_SEL_S32: 532 case OPC_SEL_F16: 533 case OPC_SEL_F32: 534 return false; 535 default: 536 return true; 537 } 538 } 539 540 static inline bool 541 is_mad(opc_t opc) 542 { 543 switch (opc) { 544 case OPC_MAD_U16: 545 case OPC_MAD_S16: 546 case OPC_MAD_U24: 547 case OPC_MAD_S24: 548 case OPC_MAD_F16: 549 case OPC_MAD_F32: 550 return true; 551 default: 552 return false; 553 } 554 } 555 556 static inline bool 557 is_madsh(opc_t opc) 558 { 559 switch (opc) { 560 case OPC_MADSH_U16: 561 case OPC_MADSH_M16: 562 return true; 563 default: 564 return false; 565 } 566 } 567 568 static inline bool 569 is_atomic(opc_t opc) 570 { 571 switch (opc) { 572 case OPC_ATOMIC_ADD: 573 case OPC_ATOMIC_SUB: 574 case OPC_ATOMIC_XCHG: 575 case OPC_ATOMIC_INC: 576 case OPC_ATOMIC_DEC: 577 case OPC_ATOMIC_CMPXCHG: 578 case OPC_ATOMIC_MIN: 579 case OPC_ATOMIC_MAX: 580 case OPC_ATOMIC_AND: 581 case OPC_ATOMIC_OR: 582 case OPC_ATOMIC_XOR: 583 return true; 584 default: 585 return false; 586 } 587 } 588 589 static inline bool 590 is_ssbo(opc_t opc) 591 { 592 switch (opc) { 593 case OPC_RESFMT: 594 case OPC_RESINFO: 595 case OPC_LDGB: 596 case OPC_STGB: 597 case OPC_STIB: 598 return true; 599 default: 600 return false; 601 } 602 } 603 604 static inline bool 605 is_isam(opc_t opc) 606 { 607 switch (opc) { 608 case OPC_ISAM: 609 case OPC_ISAML: 610 case OPC_ISAMM: 611 return true; 612 default: 613 return false; 614 } 615 } 616 617 static inline bool 618 is_cat2_float(opc_t opc) 619 { 620 switch (opc) { 621 case OPC_ADD_F: 622 case OPC_MIN_F: 623 case OPC_MAX_F: 624 case OPC_MUL_F: 625 case OPC_SIGN_F: 626 case OPC_CMPS_F: 627 case OPC_ABSNEG_F: 628 case OPC_CMPV_F: 629 case OPC_FLOOR_F: 630 case OPC_CEIL_F: 631 case OPC_RNDNE_F: 632 case OPC_RNDAZ_F: 633 case OPC_TRUNC_F: 634 return true; 635 636 default: 637 return false; 638 } 639 } 640 641 static inline bool 642 is_cat3_float(opc_t opc) 643 { 644 switch (opc) { 645 case OPC_MAD_F16: 646 case OPC_MAD_F32: 647 case OPC_SEL_F16: 648 case OPC_SEL_F32: 649 return true; 650 default: 651 return false; 652 } 653 } 654 655 #endif /* INSTR_A3XX_H_ */ 656