1/* 2 * Copyright (c) 2013 Rob Clark <robdclark@gmail.com> 3 * 4 * Permission is hereby granted, free of charge, to any person obtaining a 5 * copy of this software and associated documentation files (the "Software"), 6 * to deal in the Software without restriction, including without limitation 7 * the rights to use, copy, modify, merge, publish, distribute, sublicense, 8 * and/or sell copies of the Software, and to permit persons to whom the 9 * Software is furnished to do so, subject to the following conditions: 10 * 11 * The above copyright notice and this permission notice (including the next 12 * paragraph) shall be included in all copies or substantial portions of the 13 * Software. 14 * 15 * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR 16 * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, 17 * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL 18 * THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER 19 * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, 20 * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE 21 * SOFTWARE. 22 */ 23 24#ifndef INSTR_A3XX_H_ 25#define INSTR_A3XX_H_ 26 27#define PACKED __attribute__((__packed__)) 28 29#include <stdint.h> 30#include <stdio.h> 31#include <stdbool.h> 32#include <assert.h> 33 34/* size of largest OPC field of all the instruction categories: */ 35#define NOPC_BITS 6 36 37#define _OPC(cat, opc) (((cat) << NOPC_BITS) | opc) 38 39typedef enum { 40 /* category 0: */ 41 OPC_NOP = _OPC(0, 0), 42 OPC_BR = _OPC(0, 1), 43 OPC_JUMP = _OPC(0, 2), 44 OPC_CALL = _OPC(0, 3), 45 OPC_RET = _OPC(0, 4), 46 OPC_KILL = _OPC(0, 5), 47 OPC_END = _OPC(0, 6), 48 OPC_EMIT = _OPC(0, 7), 49 OPC_CUT = _OPC(0, 8), 50 OPC_CHMASK = _OPC(0, 9), 51 OPC_CHSH = _OPC(0, 10), 52 OPC_FLOW_REV = _OPC(0, 11), 53 54 /* category 1: */ 55 OPC_MOV = _OPC(1, 0), 56 57 /* category 2: */ 58 OPC_ADD_F = _OPC(2, 0), 59 OPC_MIN_F = _OPC(2, 1), 60 OPC_MAX_F = _OPC(2, 2), 61 OPC_MUL_F = _OPC(2, 3), 62 OPC_SIGN_F = _OPC(2, 4), 63 OPC_CMPS_F = _OPC(2, 5), 64 OPC_ABSNEG_F = _OPC(2, 6), 65 OPC_CMPV_F = _OPC(2, 7), 66 /* 8 - invalid */ 67 OPC_FLOOR_F = _OPC(2, 9), 68 OPC_CEIL_F = _OPC(2, 10), 69 OPC_RNDNE_F = _OPC(2, 11), 70 OPC_RNDAZ_F = _OPC(2, 12), 71 OPC_TRUNC_F = _OPC(2, 13), 72 /* 14-15 - invalid */ 73 OPC_ADD_U = _OPC(2, 16), 74 OPC_ADD_S = _OPC(2, 17), 75 OPC_SUB_U = _OPC(2, 18), 76 OPC_SUB_S = _OPC(2, 19), 77 OPC_CMPS_U = _OPC(2, 20), 78 OPC_CMPS_S = _OPC(2, 21), 79 OPC_MIN_U = _OPC(2, 22), 80 OPC_MIN_S = _OPC(2, 23), 81 OPC_MAX_U = _OPC(2, 24), 82 OPC_MAX_S = _OPC(2, 25), 83 OPC_ABSNEG_S = _OPC(2, 26), 84 /* 27 - invalid */ 85 OPC_AND_B = _OPC(2, 28), 86 OPC_OR_B = _OPC(2, 29), 87 OPC_NOT_B = _OPC(2, 30), 88 OPC_XOR_B = _OPC(2, 31), 89 /* 32 - invalid */ 90 OPC_CMPV_U = _OPC(2, 33), 91 OPC_CMPV_S = _OPC(2, 34), 92 /* 35-47 - invalid */ 93 OPC_MUL_U = _OPC(2, 48), 94 OPC_MUL_S = _OPC(2, 49), 95 OPC_MULL_U = _OPC(2, 50), 96 OPC_BFREV_B = _OPC(2, 51), 97 OPC_CLZ_S = _OPC(2, 52), 98 OPC_CLZ_B = _OPC(2, 53), 99 OPC_SHL_B = _OPC(2, 54), 100 OPC_SHR_B = _OPC(2, 55), 101 OPC_ASHR_B = _OPC(2, 56), 102 OPC_BARY_F = _OPC(2, 57), 103 OPC_MGEN_B = _OPC(2, 58), 104 OPC_GETBIT_B = _OPC(2, 59), 105 OPC_SETRM = _OPC(2, 60), 106 OPC_CBITS_B = _OPC(2, 61), 107 OPC_SHB = _OPC(2, 62), 108 OPC_MSAD = _OPC(2, 63), 109 110 /* category 3: */ 111 OPC_MAD_U16 = _OPC(3, 0), 112 OPC_MADSH_U16 = _OPC(3, 1), 113 OPC_MAD_S16 = _OPC(3, 2), 114 OPC_MADSH_M16 = _OPC(3, 3), /* should this be .s16? */ 115 OPC_MAD_U24 = _OPC(3, 4), 116 OPC_MAD_S24 = _OPC(3, 5), 117 OPC_MAD_F16 = _OPC(3, 6), 118 OPC_MAD_F32 = _OPC(3, 7), 119 OPC_SEL_B16 = _OPC(3, 8), 120 OPC_SEL_B32 = _OPC(3, 9), 121 OPC_SEL_S16 = _OPC(3, 10), 122 OPC_SEL_S32 = _OPC(3, 11), 123 OPC_SEL_F16 = _OPC(3, 12), 124 OPC_SEL_F32 = _OPC(3, 13), 125 OPC_SAD_S16 = _OPC(3, 14), 126 OPC_SAD_S32 = _OPC(3, 15), 127 128 /* category 4: */ 129 OPC_RCP = _OPC(4, 0), 130 OPC_RSQ = _OPC(4, 1), 131 OPC_LOG2 = _OPC(4, 2), 132 OPC_EXP2 = _OPC(4, 3), 133 OPC_SIN = _OPC(4, 4), 134 OPC_COS = _OPC(4, 5), 135 OPC_SQRT = _OPC(4, 6), 136 // 7-63 - invalid 137 138 /* category 5: */ 139 OPC_ISAM = _OPC(5, 0), 140 OPC_ISAML = _OPC(5, 1), 141 OPC_ISAMM = _OPC(5, 2), 142 OPC_SAM = _OPC(5, 3), 143 OPC_SAMB = _OPC(5, 4), 144 OPC_SAML = _OPC(5, 5), 145 OPC_SAMGQ = _OPC(5, 6), 146 OPC_GETLOD = _OPC(5, 7), 147 OPC_CONV = _OPC(5, 8), 148 OPC_CONVM = _OPC(5, 9), 149 OPC_GETSIZE = _OPC(5, 10), 150 OPC_GETBUF = _OPC(5, 11), 151 OPC_GETPOS = _OPC(5, 12), 152 OPC_GETINFO = _OPC(5, 13), 153 OPC_DSX = _OPC(5, 14), 154 OPC_DSY = _OPC(5, 15), 155 OPC_GATHER4R = _OPC(5, 16), 156 OPC_GATHER4G = _OPC(5, 17), 157 OPC_GATHER4B = _OPC(5, 18), 158 OPC_GATHER4A = _OPC(5, 19), 159 OPC_SAMGP0 = _OPC(5, 20), 160 OPC_SAMGP1 = _OPC(5, 21), 161 OPC_SAMGP2 = _OPC(5, 22), 162 OPC_SAMGP3 = _OPC(5, 23), 163 OPC_DSXPP_1 = _OPC(5, 24), 164 OPC_DSYPP_1 = _OPC(5, 25), 165 OPC_RGETPOS = _OPC(5, 26), 166 OPC_RGETINFO = _OPC(5, 27), 167 168 /* category 6: */ 169 OPC_LDG = _OPC(6, 0), /* load-global */ 170 OPC_LDL = _OPC(6, 1), 171 OPC_LDP = _OPC(6, 2), 172 OPC_STG = _OPC(6, 3), /* store-global */ 173 OPC_STL = _OPC(6, 4), 174 OPC_STP = _OPC(6, 5), 175 OPC_LDIB = _OPC(6, 6), 176 OPC_G2L = _OPC(6, 7), 177 OPC_L2G = _OPC(6, 8), 178 OPC_PREFETCH = _OPC(6, 9), 179 OPC_LDLW = _OPC(6, 10), 180 OPC_STLW = _OPC(6, 11), 181 OPC_RESFMT = _OPC(6, 14), 182 OPC_RESINFO = _OPC(6, 15), 183 OPC_ATOMIC_ADD = _OPC(6, 16), 184 OPC_ATOMIC_SUB = _OPC(6, 17), 185 OPC_ATOMIC_XCHG = _OPC(6, 18), 186 OPC_ATOMIC_INC = _OPC(6, 19), 187 OPC_ATOMIC_DEC = _OPC(6, 20), 188 OPC_ATOMIC_CMPXCHG = _OPC(6, 21), 189 OPC_ATOMIC_MIN = _OPC(6, 22), 190 OPC_ATOMIC_MAX = _OPC(6, 23), 191 OPC_ATOMIC_AND = _OPC(6, 24), 192 OPC_ATOMIC_OR = _OPC(6, 25), 193 OPC_ATOMIC_XOR = _OPC(6, 26), 194 OPC_LDGB = _OPC(6, 27), 195 OPC_STGB = _OPC(6, 28), 196 OPC_STIB = _OPC(6, 29), 197 OPC_LDC = _OPC(6, 30), 198 OPC_LDLV = _OPC(6, 31), 199 200 /* category 7: */ 201 OPC_BAR = _OPC(7, 0), 202 OPC_FENCE = _OPC(7, 1), 203 204 /* meta instructions (category -1): */ 205 /* placeholder instr to mark shader inputs: */ 206 OPC_META_INPUT = _OPC(-1, 0), 207 /* The "fan-in" and "fan-out" instructions are used for keeping 208 * track of instructions that write to multiple dst registers 209 * (fan-out) like texture sample instructions, or read multiple 210 * consecutive scalar registers (fan-in) (bary.f, texture samp) 211 */ 212 OPC_META_FO = _OPC(-1, 2), 213 OPC_META_FI = _OPC(-1, 3), 214 215} opc_t; 216 217#define opc_cat(opc) ((int)((opc) >> NOPC_BITS)) 218#define opc_op(opc) ((unsigned)((opc) & ((1 << NOPC_BITS) - 1))) 219 220typedef enum { 221 TYPE_F16 = 0, 222 TYPE_F32 = 1, 223 TYPE_U16 = 2, 224 TYPE_U32 = 3, 225 TYPE_S16 = 4, 226 TYPE_S32 = 5, 227 TYPE_U8 = 6, 228 TYPE_S8 = 7, // XXX I assume? 229} type_t; 230 231static inline uint32_t type_size(type_t type) 232{ 233 switch (type) { 234 case TYPE_F32: 235 case TYPE_U32: 236 case TYPE_S32: 237 return 32; 238 case TYPE_F16: 239 case TYPE_U16: 240 case TYPE_S16: 241 return 16; 242 case TYPE_U8: 243 case TYPE_S8: 244 return 8; 245 default: 246 assert(0); /* invalid type */ 247 return 0; 248 } 249} 250 251static inline int type_float(type_t type) 252{ 253 return (type == TYPE_F32) || (type == TYPE_F16); 254} 255 256static inline int type_uint(type_t type) 257{ 258 return (type == TYPE_U32) || (type == TYPE_U16) || (type == TYPE_U8); 259} 260 261static inline int type_sint(type_t type) 262{ 263 return (type == TYPE_S32) || (type == TYPE_S16) || (type == TYPE_S8); 264} 265 266typedef union PACKED { 267 /* normal gpr or const src register: */ 268 struct PACKED { 269 uint32_t comp : 2; 270 uint32_t num : 10; 271 }; 272 /* for immediate val: */ 273 int32_t iim_val : 11; 274 /* to make compiler happy: */ 275 uint32_t dummy32; 276 uint32_t dummy10 : 10; 277 int32_t idummy10 : 10; 278 uint32_t dummy11 : 11; 279 uint32_t dummy12 : 12; 280 uint32_t dummy13 : 13; 281 uint32_t dummy8 : 8; 282} reg_t; 283 284/* special registers: */ 285#define REG_A0 61 /* address register */ 286#define REG_P0 62 /* predicate register */ 287 288static inline int reg_special(reg_t reg) 289{ 290 return (reg.num == REG_A0) || (reg.num == REG_P0); 291} 292 293typedef struct PACKED { 294 /* dword0: */ 295 union PACKED { 296 struct PACKED { 297 int16_t immed : 16; 298 uint32_t dummy1 : 16; 299 } a3xx; 300 struct PACKED { 301 int32_t immed : 20; 302 uint32_t dummy1 : 12; 303 } a4xx; 304 struct PACKED { 305 int32_t immed : 32; 306 } a5xx; 307 }; 308 309 /* dword1: */ 310 uint32_t dummy2 : 8; 311 uint32_t repeat : 3; 312 uint32_t dummy3 : 1; 313 uint32_t ss : 1; 314 uint32_t dummy4 : 7; 315 uint32_t inv : 1; 316 uint32_t comp : 2; 317 uint32_t opc : 4; 318 uint32_t jmp_tgt : 1; 319 uint32_t sync : 1; 320 uint32_t opc_cat : 3; 321} instr_cat0_t; 322 323typedef struct PACKED { 324 /* dword0: */ 325 union PACKED { 326 /* for normal src register: */ 327 struct PACKED { 328 uint32_t src : 11; 329 /* at least low bit of pad must be zero or it will 330 * look like a address relative src 331 */ 332 uint32_t pad : 21; 333 }; 334 /* for address relative: */ 335 struct PACKED { 336 int32_t off : 10; 337 uint32_t src_rel_c : 1; 338 uint32_t src_rel : 1; 339 uint32_t unknown : 20; 340 }; 341 /* for immediate: */ 342 int32_t iim_val; 343 uint32_t uim_val; 344 float fim_val; 345 }; 346 347 /* dword1: */ 348 uint32_t dst : 8; 349 uint32_t repeat : 3; 350 uint32_t src_r : 1; 351 uint32_t ss : 1; 352 uint32_t ul : 1; 353 uint32_t dst_type : 3; 354 uint32_t dst_rel : 1; 355 uint32_t src_type : 3; 356 uint32_t src_c : 1; 357 uint32_t src_im : 1; 358 uint32_t even : 1; 359 uint32_t pos_inf : 1; 360 uint32_t must_be_0 : 2; 361 uint32_t jmp_tgt : 1; 362 uint32_t sync : 1; 363 uint32_t opc_cat : 3; 364} instr_cat1_t; 365 366typedef struct PACKED { 367 /* dword0: */ 368 union PACKED { 369 struct PACKED { 370 uint32_t src1 : 11; 371 uint32_t must_be_zero1: 2; 372 uint32_t src1_im : 1; /* immediate */ 373 uint32_t src1_neg : 1; /* negate */ 374 uint32_t src1_abs : 1; /* absolute value */ 375 }; 376 struct PACKED { 377 uint32_t src1 : 10; 378 uint32_t src1_c : 1; /* relative-const */ 379 uint32_t src1_rel : 1; /* relative address */ 380 uint32_t must_be_zero : 1; 381 uint32_t dummy : 3; 382 } rel1; 383 struct PACKED { 384 uint32_t src1 : 12; 385 uint32_t src1_c : 1; /* const */ 386 uint32_t dummy : 3; 387 } c1; 388 }; 389 390 union PACKED { 391 struct PACKED { 392 uint32_t src2 : 11; 393 uint32_t must_be_zero2: 2; 394 uint32_t src2_im : 1; /* immediate */ 395 uint32_t src2_neg : 1; /* negate */ 396 uint32_t src2_abs : 1; /* absolute value */ 397 }; 398 struct PACKED { 399 uint32_t src2 : 10; 400 uint32_t src2_c : 1; /* relative-const */ 401 uint32_t src2_rel : 1; /* relative address */ 402 uint32_t must_be_zero : 1; 403 uint32_t dummy : 3; 404 } rel2; 405 struct PACKED { 406 uint32_t src2 : 12; 407 uint32_t src2_c : 1; /* const */ 408 uint32_t dummy : 3; 409 } c2; 410 }; 411 412 /* dword1: */ 413 uint32_t dst : 8; 414 uint32_t repeat : 2; 415 uint32_t sat : 1; 416 uint32_t src1_r : 1; /* doubles as nop0 if repeat==0 */ 417 uint32_t ss : 1; 418 uint32_t ul : 1; /* dunno */ 419 uint32_t dst_half : 1; /* or widen/narrow.. ie. dst hrN <-> rN */ 420 uint32_t ei : 1; 421 uint32_t cond : 3; 422 uint32_t src2_r : 1; /* doubles as nop1 if repeat==0 */ 423 uint32_t full : 1; /* not half */ 424 uint32_t opc : 6; 425 uint32_t jmp_tgt : 1; 426 uint32_t sync : 1; 427 uint32_t opc_cat : 3; 428} instr_cat2_t; 429 430typedef struct PACKED { 431 /* dword0: */ 432 union PACKED { 433 struct PACKED { 434 uint32_t src1 : 11; 435 uint32_t must_be_zero1: 2; 436 uint32_t src2_c : 1; 437 uint32_t src1_neg : 1; 438 uint32_t src2_r : 1; /* doubles as nop1 if repeat==0 */ 439 }; 440 struct PACKED { 441 uint32_t src1 : 10; 442 uint32_t src1_c : 1; 443 uint32_t src1_rel : 1; 444 uint32_t must_be_zero : 1; 445 uint32_t dummy : 3; 446 } rel1; 447 struct PACKED { 448 uint32_t src1 : 12; 449 uint32_t src1_c : 1; 450 uint32_t dummy : 3; 451 } c1; 452 }; 453 454 union PACKED { 455 struct PACKED { 456 uint32_t src3 : 11; 457 uint32_t must_be_zero2: 2; 458 uint32_t src3_r : 1; 459 uint32_t src2_neg : 1; 460 uint32_t src3_neg : 1; 461 }; 462 struct PACKED { 463 uint32_t src3 : 10; 464 uint32_t src3_c : 1; 465 uint32_t src3_rel : 1; 466 uint32_t must_be_zero : 1; 467 uint32_t dummy : 3; 468 } rel2; 469 struct PACKED { 470 uint32_t src3 : 12; 471 uint32_t src3_c : 1; 472 uint32_t dummy : 3; 473 } c2; 474 }; 475 476 /* dword1: */ 477 uint32_t dst : 8; 478 uint32_t repeat : 2; 479 uint32_t sat : 1; 480 uint32_t src1_r : 1; /* doubles as nop0 if repeat==0 */ 481 uint32_t ss : 1; 482 uint32_t ul : 1; 483 uint32_t dst_half : 1; /* or widen/narrow.. ie. dst hrN <-> rN */ 484 uint32_t src2 : 8; 485 uint32_t opc : 4; 486 uint32_t jmp_tgt : 1; 487 uint32_t sync : 1; 488 uint32_t opc_cat : 3; 489} instr_cat3_t; 490 491static inline bool instr_cat3_full(instr_cat3_t *cat3) 492{ 493 switch (_OPC(3, cat3->opc)) { 494 case OPC_MAD_F16: 495 case OPC_MAD_U16: 496 case OPC_MAD_S16: 497 case OPC_SEL_B16: 498 case OPC_SEL_S16: 499 case OPC_SEL_F16: 500 case OPC_SAD_S16: 501 case OPC_SAD_S32: // really?? 502 return false; 503 default: 504 return true; 505 } 506} 507 508typedef struct PACKED { 509 /* dword0: */ 510 union PACKED { 511 struct PACKED { 512 uint32_t src : 11; 513 uint32_t must_be_zero1: 2; 514 uint32_t src_im : 1; /* immediate */ 515 uint32_t src_neg : 1; /* negate */ 516 uint32_t src_abs : 1; /* absolute value */ 517 }; 518 struct PACKED { 519 uint32_t src : 10; 520 uint32_t src_c : 1; /* relative-const */ 521 uint32_t src_rel : 1; /* relative address */ 522 uint32_t must_be_zero : 1; 523 uint32_t dummy : 3; 524 } rel; 525 struct PACKED { 526 uint32_t src : 12; 527 uint32_t src_c : 1; /* const */ 528 uint32_t dummy : 3; 529 } c; 530 }; 531 uint32_t dummy1 : 16; /* seem to be ignored */ 532 533 /* dword1: */ 534 uint32_t dst : 8; 535 uint32_t repeat : 2; 536 uint32_t sat : 1; 537 uint32_t src_r : 1; 538 uint32_t ss : 1; 539 uint32_t ul : 1; 540 uint32_t dst_half : 1; /* or widen/narrow.. ie. dst hrN <-> rN */ 541 uint32_t dummy2 : 5; /* seem to be ignored */ 542 uint32_t full : 1; /* not half */ 543 uint32_t opc : 6; 544 uint32_t jmp_tgt : 1; 545 uint32_t sync : 1; 546 uint32_t opc_cat : 3; 547} instr_cat4_t; 548 549typedef struct PACKED { 550 /* dword0: */ 551 union PACKED { 552 /* normal case: */ 553 struct PACKED { 554 uint32_t full : 1; /* not half */ 555 uint32_t src1 : 8; 556 uint32_t src2 : 8; 557 uint32_t dummy1 : 4; /* seem to be ignored */ 558 uint32_t samp : 4; 559 uint32_t tex : 7; 560 } norm; 561 /* s2en case: */ 562 struct PACKED { 563 uint32_t full : 1; /* not half */ 564 uint32_t src1 : 8; 565 uint32_t src2 : 11; 566 uint32_t dummy1 : 1; 567 uint32_t src3 : 8; 568 uint32_t dummy2 : 3; 569 } s2en; 570 /* same in either case: */ 571 // XXX I think, confirm this 572 struct PACKED { 573 uint32_t full : 1; /* not half */ 574 uint32_t src1 : 8; 575 uint32_t pad : 23; 576 }; 577 }; 578 579 /* dword1: */ 580 uint32_t dst : 8; 581 uint32_t wrmask : 4; /* write-mask */ 582 uint32_t type : 3; 583 uint32_t dummy2 : 1; /* seems to be ignored */ 584 uint32_t is_3d : 1; 585 586 uint32_t is_a : 1; 587 uint32_t is_s : 1; 588 uint32_t is_s2en : 1; 589 uint32_t is_o : 1; 590 uint32_t is_p : 1; 591 592 uint32_t opc : 5; 593 uint32_t jmp_tgt : 1; 594 uint32_t sync : 1; 595 uint32_t opc_cat : 3; 596} instr_cat5_t; 597 598/* dword0 encoding for src_off: [src1 + off], src2: */ 599typedef struct PACKED { 600 /* dword0: */ 601 uint32_t mustbe1 : 1; 602 int32_t off : 13; 603 uint32_t src1 : 8; 604 uint32_t src1_im : 1; 605 uint32_t src2_im : 1; 606 uint32_t src2 : 8; 607 608 /* dword1: */ 609 uint32_t dword1; 610} instr_cat6a_t; 611 612/* dword0 encoding for !src_off: [src1], src2 */ 613typedef struct PACKED { 614 /* dword0: */ 615 uint32_t mustbe0 : 1; 616 uint32_t src1 : 13; 617 uint32_t ignore0 : 8; 618 uint32_t src1_im : 1; 619 uint32_t src2_im : 1; 620 uint32_t src2 : 8; 621 622 /* dword1: */ 623 uint32_t dword1; 624} instr_cat6b_t; 625 626/* dword1 encoding for dst_off: */ 627typedef struct PACKED { 628 /* dword0: */ 629 uint32_t dword0; 630 631 /* note: there is some weird stuff going on where sometimes 632 * cat6->a.off is involved.. but that seems like a bug in 633 * the blob, since it is used even if !cat6->src_off 634 * It would make sense for there to be some more bits to 635 * bring us to 11 bits worth of offset, but not sure.. 636 */ 637 int32_t off : 8; 638 uint32_t mustbe1 : 1; 639 uint32_t dst : 8; 640 uint32_t pad1 : 15; 641} instr_cat6c_t; 642 643/* dword1 encoding for !dst_off: */ 644typedef struct PACKED { 645 /* dword0: */ 646 uint32_t dword0; 647 648 uint32_t dst : 8; 649 uint32_t mustbe0 : 1; 650 uint32_t idx : 8; 651 uint32_t pad0 : 15; 652} instr_cat6d_t; 653 654/* ldgb and atomics.. 655 * 656 * ldgb: pad0=0, pad3=1 657 * atomic .g: pad0=1, pad3=1 658 * .l: pad0=1, pad3=0 659 */ 660typedef struct PACKED { 661 /* dword0: */ 662 uint32_t pad0 : 1; 663 uint32_t src3 : 8; 664 uint32_t d : 2; 665 uint32_t typed : 1; 666 uint32_t type_size : 2; 667 uint32_t src1 : 8; 668 uint32_t src1_im : 1; 669 uint32_t src2_im : 1; 670 uint32_t src2 : 8; 671 672 /* dword1: */ 673 uint32_t dst : 8; 674 uint32_t mustbe0 : 1; 675 uint32_t src_ssbo : 8; 676 uint32_t pad2 : 3; // type 677 uint32_t g : 1; 678 uint32_t pad3 : 1; 679 uint32_t pad4 : 10; // opc/jmp_tgt/sync/opc_cat 680} instr_cat6ldgb_t; 681 682/* stgb, pad0=0, pad3=2 683 */ 684typedef struct PACKED { 685 /* dword0: */ 686 uint32_t mustbe1 : 1; // ??? 687 uint32_t src1 : 8; 688 uint32_t d : 2; 689 uint32_t typed : 1; 690 uint32_t type_size : 2; 691 uint32_t pad0 : 9; 692 uint32_t src2_im : 1; 693 uint32_t src2 : 8; 694 695 /* dword1: */ 696 uint32_t src3 : 8; 697 uint32_t src3_im : 1; 698 uint32_t dst_ssbo : 8; 699 uint32_t pad2 : 3; // type 700 uint32_t pad3 : 2; 701 uint32_t pad4 : 10; // opc/jmp_tgt/sync/opc_cat 702} instr_cat6stgb_t; 703 704typedef union PACKED { 705 instr_cat6a_t a; 706 instr_cat6b_t b; 707 instr_cat6c_t c; 708 instr_cat6d_t d; 709 instr_cat6ldgb_t ldgb; 710 instr_cat6stgb_t stgb; 711 struct PACKED { 712 /* dword0: */ 713 uint32_t src_off : 1; 714 uint32_t pad1 : 31; 715 716 /* dword1: */ 717 uint32_t pad2 : 8; 718 uint32_t dst_off : 1; 719 uint32_t pad3 : 8; 720 uint32_t type : 3; 721 uint32_t g : 1; /* or in some cases it means dst immed */ 722 uint32_t pad4 : 1; 723 uint32_t opc : 5; 724 uint32_t jmp_tgt : 1; 725 uint32_t sync : 1; 726 uint32_t opc_cat : 3; 727 }; 728} instr_cat6_t; 729 730/** 731 * For atomic ops (which return a value): 732 * 733 * pad1=1, pad2=c, pad3=0, pad4=3 734 * src1 - vecN offset/coords 735 * src2.x - is actually dest register 736 * src2.y - is 'data' except for cmpxchg where src2.y is 'compare' 737 * and src2.z is 'data' 738 * 739 * For stib (which does not return a value): 740 * pad1=0, pad2=c, pad3=0, pad4=2 741 * src1 - vecN offset/coords 742 * src2 - value to store 743 * 744 * For ldib: 745 * pad1=1, pad2=c, pad3=0, pad4=2 746 * src1 - vecN offset/coords 747 * 748 * for ldc (load from UBO using descriptor): 749 * pad1=0, pad2=8, pad3=0, pad4=2 750 */ 751typedef struct PACKED { 752 /* dword0: */ 753 uint32_t pad1 : 9; 754 uint32_t d : 2; 755 uint32_t typed : 1; 756 uint32_t type_size : 2; 757 uint32_t opc : 5; 758 uint32_t pad2 : 5; 759 uint32_t src1 : 8; /* coordinate/offset */ 760 761 /* dword1: */ 762 uint32_t src2 : 8; /* or the dst for load instructions */ 763 uint32_t pad3 : 1; //mustbe0 ?? or zero means imm vs reg for ssbo?? 764 uint32_t ssbo : 8; /* ssbo/image binding point */ 765 uint32_t type : 3; 766 uint32_t pad4 : 7; 767 uint32_t jmp_tgt : 1; 768 uint32_t sync : 1; 769 uint32_t opc_cat : 3; 770} instr_cat6_a6xx_t; 771 772typedef struct PACKED { 773 /* dword0: */ 774 uint32_t pad1 : 32; 775 776 /* dword1: */ 777 uint32_t pad2 : 12; 778 uint32_t ss : 1; /* maybe in the encoding, but blob only uses (sy) */ 779 uint32_t pad3 : 6; 780 uint32_t w : 1; /* write */ 781 uint32_t r : 1; /* read */ 782 uint32_t l : 1; /* local */ 783 uint32_t g : 1; /* global */ 784 uint32_t opc : 4; /* presumed, but only a couple known OPCs */ 785 uint32_t jmp_tgt : 1; /* (jp) */ 786 uint32_t sync : 1; /* (sy) */ 787 uint32_t opc_cat : 3; 788} instr_cat7_t; 789 790typedef union PACKED { 791 instr_cat0_t cat0; 792 instr_cat1_t cat1; 793 instr_cat2_t cat2; 794 instr_cat3_t cat3; 795 instr_cat4_t cat4; 796 instr_cat5_t cat5; 797 instr_cat6_t cat6; 798 instr_cat6_a6xx_t cat6_a6xx; 799 instr_cat7_t cat7; 800 struct PACKED { 801 /* dword0: */ 802 uint32_t pad1 : 32; 803 804 /* dword1: */ 805 uint32_t pad2 : 12; 806 uint32_t ss : 1; /* cat1-cat4 (cat0??) and cat7 (?) */ 807 uint32_t ul : 1; /* cat2-cat4 (and cat1 in blob.. which may be bug??) */ 808 uint32_t pad3 : 13; 809 uint32_t jmp_tgt : 1; 810 uint32_t sync : 1; 811 uint32_t opc_cat : 3; 812 813 }; 814} instr_t; 815 816static inline uint32_t instr_repeat(instr_t *instr) 817{ 818 switch (instr->opc_cat) { 819 case 0: return instr->cat0.repeat; 820 case 1: return instr->cat1.repeat; 821 case 2: return instr->cat2.repeat; 822 case 3: return instr->cat3.repeat; 823 case 4: return instr->cat4.repeat; 824 default: return 0; 825 } 826} 827 828static inline bool instr_sat(instr_t *instr) 829{ 830 switch (instr->opc_cat) { 831 case 2: return instr->cat2.sat; 832 case 3: return instr->cat3.sat; 833 case 4: return instr->cat4.sat; 834 default: return false; 835 } 836} 837 838/* We can probably drop the gpu_id arg, but keeping it for now so we can 839 * assert if we see something we think should be new encoding on an older 840 * gpu. 841 */ 842static inline bool is_cat6_legacy(instr_t *instr, unsigned gpu_id) 843{ 844 instr_cat6_a6xx_t *cat6 = &instr->cat6_a6xx; 845 846 /* At least one of these two bits is pad in all the possible 847 * "legacy" cat6 encodings, and a analysis of all the pre-a6xx 848 * cmdstream traces I have indicates that the pad bit is zero 849 * in all cases. So we can use this to detect new encoding: 850 */ 851 if ((cat6->pad2 & 0x8) && (cat6->pad4 & 0x2)) { 852 assert(gpu_id >= 600); 853 assert(instr->cat6.opc == 0); 854 return false; 855 } 856 857 return true; 858} 859 860static inline uint32_t instr_opc(instr_t *instr, unsigned gpu_id) 861{ 862 switch (instr->opc_cat) { 863 case 0: return instr->cat0.opc; 864 case 1: return 0; 865 case 2: return instr->cat2.opc; 866 case 3: return instr->cat3.opc; 867 case 4: return instr->cat4.opc; 868 case 5: return instr->cat5.opc; 869 case 6: 870 if (!is_cat6_legacy(instr, gpu_id)) 871 return instr->cat6_a6xx.opc; 872 return instr->cat6.opc; 873 case 7: return instr->cat7.opc; 874 default: return 0; 875 } 876} 877 878static inline bool is_mad(opc_t opc) 879{ 880 switch (opc) { 881 case OPC_MAD_U16: 882 case OPC_MAD_S16: 883 case OPC_MAD_U24: 884 case OPC_MAD_S24: 885 case OPC_MAD_F16: 886 case OPC_MAD_F32: 887 return true; 888 default: 889 return false; 890 } 891} 892 893static inline bool is_madsh(opc_t opc) 894{ 895 switch (opc) { 896 case OPC_MADSH_U16: 897 case OPC_MADSH_M16: 898 return true; 899 default: 900 return false; 901 } 902} 903 904static inline bool is_atomic(opc_t opc) 905{ 906 switch (opc) { 907 case OPC_ATOMIC_ADD: 908 case OPC_ATOMIC_SUB: 909 case OPC_ATOMIC_XCHG: 910 case OPC_ATOMIC_INC: 911 case OPC_ATOMIC_DEC: 912 case OPC_ATOMIC_CMPXCHG: 913 case OPC_ATOMIC_MIN: 914 case OPC_ATOMIC_MAX: 915 case OPC_ATOMIC_AND: 916 case OPC_ATOMIC_OR: 917 case OPC_ATOMIC_XOR: 918 return true; 919 default: 920 return false; 921 } 922} 923 924static inline bool is_ssbo(opc_t opc) 925{ 926 switch (opc) { 927 case OPC_RESFMT: 928 case OPC_RESINFO: 929 case OPC_LDGB: 930 case OPC_STGB: 931 case OPC_STIB: 932 return true; 933 default: 934 return false; 935 } 936} 937 938static inline bool is_isam(opc_t opc) 939{ 940 switch (opc) { 941 case OPC_ISAM: 942 case OPC_ISAML: 943 case OPC_ISAMM: 944 return true; 945 default: 946 return false; 947 } 948} 949 950int disasm_a3xx(uint32_t *dwords, int sizedwords, int level, FILE *out, unsigned gpu_id); 951 952#endif /* INSTR_A3XX_H_ */ 953