midgard.h revision b8e80941
1/* Author(s): 2 * Connor Abbott 3 * Alyssa Rosenzweig 4 * 5 * Copyright (c) 2013 Connor Abbott (connor@abbott.cx) 6 * Copyright (c) 2018 Alyssa Rosenzweig (alyssa@rosenzweig.io) 7 * 8 * Permission is hereby granted, free of charge, to any person obtaining a copy 9 * of this software and associated documentation files (the "Software"), to deal 10 * in the Software without restriction, including without limitation the rights 11 * to use, copy, modify, merge, publish, distribute, sublicense, and/or sell 12 * copies of the Software, and to permit persons to whom the Software is 13 * furnished to do so, subject to the following conditions: 14 * 15 * The above copyright notice and this permission notice shall be included in 16 * all copies or substantial portions of the Software. 17 * 18 * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR 19 * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, 20 * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE 21 * AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER 22 * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, 23 * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN 24 * THE SOFTWARE. 25 */ 26 27#ifndef __midgard_h__ 28#define __midgard_h__ 29 30#include <stdint.h> 31#include <stdbool.h> 32 33#define MIDGARD_DBG_MSGS 0x0001 34#define MIDGARD_DBG_SHADERS 0x0002 35 36extern int midgard_debug; 37 38typedef enum { 39 midgard_word_type_alu, 40 midgard_word_type_load_store, 41 midgard_word_type_texture, 42 midgard_word_type_unknown 43} midgard_word_type; 44 45typedef enum { 46 midgard_alu_vmul, 47 midgard_alu_sadd, 48 midgard_alu_smul, 49 midgard_alu_vadd, 50 midgard_alu_lut 51} midgard_alu; 52 53/* 54 * ALU words 55 */ 56 57typedef enum { 58 midgard_alu_op_fadd = 0x10, 59 midgard_alu_op_fmul = 0x14, 60 61 midgard_alu_op_fmin = 0x28, 62 midgard_alu_op_fmax = 0x2C, 63 64 midgard_alu_op_fmov = 0x30, 65 midgard_alu_op_froundeven = 0x34, 66 midgard_alu_op_ftrunc = 0x35, 67 midgard_alu_op_ffloor = 0x36, 68 midgard_alu_op_fceil = 0x37, 69 midgard_alu_op_ffma = 0x38, 70 midgard_alu_op_fdot3 = 0x3C, 71 midgard_alu_op_fdot3r = 0x3D, 72 midgard_alu_op_fdot4 = 0x3E, 73 midgard_alu_op_freduce = 0x3F, 74 75 midgard_alu_op_iadd = 0x40, 76 midgard_alu_op_ishladd = 0x41, 77 midgard_alu_op_isub = 0x46, 78 79 midgard_alu_op_imul = 0x58, 80 81 midgard_alu_op_imin = 0x60, 82 midgard_alu_op_umin = 0x61, 83 midgard_alu_op_imax = 0x62, 84 midgard_alu_op_umax = 0x63, 85 midgard_alu_op_iasr = 0x68, 86 midgard_alu_op_ilsr = 0x69, 87 midgard_alu_op_ishl = 0x6E, 88 89 midgard_alu_op_iand = 0x70, 90 midgard_alu_op_ior = 0x71, 91 midgard_alu_op_inand = 0x72, /* ~(a & b), for inot let a = b */ 92 midgard_alu_op_inor = 0x73, /* ~(a | b) */ 93 midgard_alu_op_iandnot = 0x74, /* (a & ~b), used for not/b2f */ 94 midgard_alu_op_iornot = 0x75, /* (a | ~b) */ 95 midgard_alu_op_ixor = 0x76, 96 midgard_alu_op_inxor = 0x77, /* ~(a & b) */ 97 midgard_alu_op_iclz = 0x78, /* Number of zeroes on left */ 98 midgard_alu_op_ibitcount8 = 0x7A, /* Counts bits in 8-bit increments */ 99 midgard_alu_op_imov = 0x7B, 100 midgard_alu_op_iabs = 0x7C, 101 102 midgard_alu_op_feq = 0x80, 103 midgard_alu_op_fne = 0x81, 104 midgard_alu_op_flt = 0x82, 105 midgard_alu_op_fle = 0x83, 106 midgard_alu_op_fball_eq = 0x88, 107 midgard_alu_op_bball_eq = 0x89, 108 midgard_alu_op_fball_lt = 0x8A, /* all(lessThan(.., ..)) */ 109 midgard_alu_op_fball_lte = 0x8B, /* all(lessThanEqual(.., ..)) */ 110 111 midgard_alu_op_bbany_neq = 0x90, /* used for bvec4(1) */ 112 midgard_alu_op_fbany_neq = 0x91, /* bvec4(0) also */ 113 midgard_alu_op_fbany_lt = 0x92, /* any(lessThan(.., ..)) */ 114 midgard_alu_op_fbany_lte = 0x93, /* any(lessThanEqual(.., ..)) */ 115 midgard_alu_op_f2i = 0x99, 116 midgard_alu_op_f2u8 = 0x9C, 117 midgard_alu_op_f2u = 0x9D, 118 119 midgard_alu_op_ieq = 0xA0, 120 midgard_alu_op_ine = 0xA1, 121 midgard_alu_op_ult = 0xA2, 122 midgard_alu_op_ule = 0xA3, 123 midgard_alu_op_ilt = 0xA4, 124 midgard_alu_op_ile = 0xA5, 125 midgard_alu_op_iball_eq = 0xA8, 126 midgard_alu_op_iball_neq = 0xA9, 127 midgard_alu_op_uball_lt = 0xAA, 128 midgard_alu_op_uball_lte = 0xAB, 129 midgard_alu_op_iball_lt = 0xAC, 130 midgard_alu_op_iball_lte = 0xAD, 131 132 midgard_alu_op_ibany_eq = 0xB0, 133 midgard_alu_op_ibany_neq = 0xB1, 134 midgard_alu_op_ubany_lt = 0xB2, 135 midgard_alu_op_ubany_lte = 0xB3, 136 midgard_alu_op_ibany_lt = 0xB4, /* any(lessThan(.., ..)) */ 137 midgard_alu_op_ibany_lte = 0xB5, /* any(lessThanEqual(.., ..)) */ 138 midgard_alu_op_i2f = 0xB8, 139 midgard_alu_op_u2f = 0xBC, 140 141 midgard_alu_op_icsel = 0xC1, 142 midgard_alu_op_fcsel_i = 0xC4, 143 midgard_alu_op_fcsel = 0xC5, 144 midgard_alu_op_fround = 0xC6, 145 146 midgard_alu_op_fatan_pt2 = 0xE8, 147 midgard_alu_op_fpow_pt1 = 0xEC, 148 149 midgard_alu_op_frcp = 0xF0, 150 midgard_alu_op_frsqrt = 0xF2, 151 midgard_alu_op_fsqrt = 0xF3, 152 midgard_alu_op_fexp2 = 0xF4, 153 midgard_alu_op_flog2 = 0xF5, 154 midgard_alu_op_fsin = 0xF6, 155 midgard_alu_op_fcos = 0xF7, 156 midgard_alu_op_fatan2_pt1 = 0xF9, 157} midgard_alu_op; 158 159typedef enum { 160 midgard_outmod_none = 0, 161 midgard_outmod_pos = 1, 162 midgard_outmod_int = 2, 163 midgard_outmod_sat = 3 164} midgard_outmod; 165 166typedef enum { 167 midgard_reg_mode_8 = 0, 168 midgard_reg_mode_16 = 1, 169 midgard_reg_mode_32 = 2, 170 midgard_reg_mode_64 = 3 /* TODO: verify */ 171} midgard_reg_mode; 172 173typedef enum { 174 midgard_dest_override_lower = 0, 175 midgard_dest_override_upper = 1, 176 midgard_dest_override_none = 2 177} midgard_dest_override; 178 179typedef enum { 180 midgard_int_sign_extend = 0, 181 midgard_int_zero_extend = 1, 182 midgard_int_normal = 2, 183 midgard_int_reserved = 3 184} midgard_int_mod; 185 186#define MIDGARD_FLOAT_MOD_ABS (1 << 0) 187#define MIDGARD_FLOAT_MOD_NEG (1 << 1) 188 189typedef struct 190__attribute__((__packed__)) 191{ 192 /* Either midgard_int_mod or from midgard_float_mod_*, depending on the 193 * type of op */ 194 unsigned mod : 2; 195 196 /* replicate lower half if dest = half, or low/high half selection if 197 * dest = full 198 */ 199 bool rep_low : 1; 200 bool rep_high : 1; /* unused if dest = full */ 201 bool half : 1; /* only matters if dest = full */ 202 unsigned swizzle : 8; 203} 204midgard_vector_alu_src; 205 206typedef struct 207__attribute__((__packed__)) 208{ 209 midgard_alu_op op : 8; 210 midgard_reg_mode reg_mode : 2; 211 unsigned src1 : 13; 212 unsigned src2 : 13; 213 midgard_dest_override dest_override : 2; 214 midgard_outmod outmod : 2; 215 unsigned mask : 8; 216} 217midgard_vector_alu; 218 219typedef struct 220__attribute__((__packed__)) 221{ 222 bool abs : 1; 223 bool negate : 1; 224 bool full : 1; /* 0 = half, 1 = full */ 225 unsigned component : 3; 226} 227midgard_scalar_alu_src; 228 229typedef struct 230__attribute__((__packed__)) 231{ 232 midgard_alu_op op : 8; 233 unsigned src1 : 6; 234 unsigned src2 : 11; 235 unsigned unknown : 1; 236 midgard_outmod outmod : 2; 237 bool output_full : 1; 238 unsigned output_component : 3; 239} 240midgard_scalar_alu; 241 242typedef struct 243__attribute__((__packed__)) 244{ 245 unsigned src1_reg : 5; 246 unsigned src2_reg : 5; 247 unsigned out_reg : 5; 248 bool src2_imm : 1; 249} 250midgard_reg_info; 251 252/* In addition to conditional branches and jumps (unconditional branches), 253 * Midgard implements a bit of fixed function functionality used in fragment 254 * shaders via specially crafted branches. These have special branch opcodes, 255 * which perform a fixed-function operation and/or use the results of a 256 * fixed-function operation as the branch condition. */ 257 258typedef enum { 259 /* Regular branches */ 260 midgard_jmp_writeout_op_branch_uncond = 1, 261 midgard_jmp_writeout_op_branch_cond = 2, 262 263 /* In a fragment shader, execute a discard_if instruction, with the 264 * corresponding condition code. Terminates the shader, so generally 265 * set the branch target to out of the shader */ 266 midgard_jmp_writeout_op_discard = 4, 267 268 /* Branch if the tilebuffer is not yet ready. At the beginning of a 269 * fragment shader that reads from the tile buffer, for instance via 270 * ARM_shader_framebuffer_fetch or EXT_pixel_local_storage, this branch 271 * operation should be used as a loop. An instruction like 272 * "br.tilebuffer.always -1" does the trick, corresponding to 273 * "while(!is_tilebuffer_ready) */ 274 midgard_jmp_writeout_op_tilebuffer_pending = 6, 275 276 /* In a fragment shader, try to write out the value pushed to r0 to the 277 * tilebuffer, subject to unknown state in r1.z and r1.w. If this 278 * succeeds, the shader terminates. If it fails, it branches to the 279 * specified branch target. Generally, this should be used in a loop to 280 * itself, acting as "do { write(r0); } while(!write_successful);" */ 281 midgard_jmp_writeout_op_writeout = 7, 282} midgard_jmp_writeout_op; 283 284typedef enum { 285 midgard_condition_write0 = 0, 286 287 /* These condition codes denote a conditional branch on FALSE and on 288 * TRUE respectively */ 289 midgard_condition_false = 1, 290 midgard_condition_true = 2, 291 292 /* This condition code always branches. For a pure branch, the 293 * unconditional branch coding should be used instead, but for 294 * fixed-function branch opcodes, this is still useful */ 295 midgard_condition_always = 3, 296} midgard_condition; 297 298typedef struct 299__attribute__((__packed__)) 300{ 301 midgard_jmp_writeout_op op : 3; /* == branch_uncond */ 302 unsigned dest_tag : 4; /* tag of branch destination */ 303 unsigned unknown : 2; 304 int offset : 7; 305} 306midgard_branch_uncond; 307 308typedef struct 309__attribute__((__packed__)) 310{ 311 midgard_jmp_writeout_op op : 3; /* == branch_cond */ 312 unsigned dest_tag : 4; /* tag of branch destination */ 313 int offset : 7; 314 midgard_condition cond : 2; 315} 316midgard_branch_cond; 317 318typedef struct 319__attribute__((__packed__)) 320{ 321 midgard_jmp_writeout_op op : 3; /* == branch_cond */ 322 unsigned dest_tag : 4; /* tag of branch destination */ 323 unsigned unknown : 2; 324 signed offset : 23; 325 unsigned cond : 16; 326} 327midgard_branch_extended; 328 329typedef struct 330__attribute__((__packed__)) 331{ 332 midgard_jmp_writeout_op op : 3; /* == writeout */ 333 unsigned unknown : 13; 334} 335midgard_writeout; 336 337/* 338 * Load/store words 339 */ 340 341typedef enum { 342 midgard_op_ld_st_noop = 0x03, 343 344 /* Unclear why this is on the L/S unit, but (with an address of 0, 345 * appropriate swizzle, magic constant 0x24, and xy mask?) moves fp32 cube 346 * map coordinates in r27 to its cube map texture coordinate 347 * destination (e.g r29). 0x4 magic for loading from fp16 instead */ 348 349 midgard_op_store_cubemap_coords = 0x0E, 350 351 midgard_op_load_attr_16 = 0x95, 352 midgard_op_load_attr_32 = 0x94, 353 midgard_op_load_vary_16 = 0x99, 354 midgard_op_load_vary_32 = 0x98, 355 midgard_op_load_color_buffer_16 = 0x9D, 356 midgard_op_load_color_buffer_8 = 0xBA, 357 midgard_op_load_uniform_16 = 0xAC, 358 midgard_op_load_uniform_32 = 0xB0, 359 midgard_op_store_vary_16 = 0xD5, 360 midgard_op_store_vary_32 = 0xD4 361} midgard_load_store_op; 362 363typedef enum { 364 midgard_interp_centroid = 1, 365 midgard_interp_default = 2 366} midgard_interpolation; 367 368typedef struct 369__attribute__((__packed__)) 370{ 371 unsigned zero1 : 4; /* Always zero */ 372 373 /* Varying qualifiers, zero if not a varying */ 374 unsigned flat : 1; 375 unsigned is_varying : 1; /* Always one for varying, but maybe something else? */ 376 midgard_interpolation interpolation : 2; 377 378 unsigned zero2 : 2; /* Always zero */ 379} 380midgard_varying_parameter; 381 382typedef struct 383__attribute__((__packed__)) 384{ 385 midgard_load_store_op op : 8; 386 unsigned reg : 5; 387 unsigned mask : 4; 388 unsigned swizzle : 8; 389 unsigned unknown : 16; 390 391 unsigned varying_parameters : 10; 392 393 unsigned address : 9; 394} 395midgard_load_store_word; 396 397typedef struct 398__attribute__((__packed__)) 399{ 400 unsigned type : 4; 401 unsigned next_type : 4; 402 uint64_t word1 : 60; 403 uint64_t word2 : 60; 404} 405midgard_load_store; 406 407/* Texture pipeline results are in r28-r29 */ 408#define REG_TEX_BASE 28 409 410/* Texture opcodes... maybe? */ 411#define TEXTURE_OP_NORMAL 0x11 412#define TEXTURE_OP_TEXEL_FETCH 0x14 413 414/* Texture format types, found in format */ 415#define TEXTURE_CUBE 0x00 416#define TEXTURE_2D 0x02 417#define TEXTURE_3D 0x03 418 419typedef struct 420__attribute__((__packed__)) 421{ 422 unsigned type : 4; 423 unsigned next_type : 4; 424 425 unsigned op : 6; 426 unsigned shadow : 1; 427 unsigned unknown3 : 1; 428 429 /* A little obscure, but last is set for the last texture operation in 430 * a shader. cont appears to just be last's opposite (?). Yeah, I know, 431 * kind of funky.. BiOpen thinks it could do with memory hinting, or 432 * tile locking? */ 433 434 unsigned cont : 1; 435 unsigned last : 1; 436 437 unsigned format : 5; 438 unsigned has_offset : 1; 439 440 /* Like in Bifrost */ 441 unsigned filter : 1; 442 443 unsigned in_reg_select : 1; 444 unsigned in_reg_upper : 1; 445 446 unsigned in_reg_swizzle_left : 2; 447 unsigned in_reg_swizzle_right : 2; 448 449 unsigned unknown1 : 2; 450 451 unsigned unknown8 : 4; 452 453 unsigned out_full : 1; 454 455 /* Always 1 afaict... */ 456 unsigned unknown7 : 2; 457 458 unsigned out_reg_select : 1; 459 unsigned out_upper : 1; 460 461 unsigned mask : 4; 462 463 unsigned unknown2 : 2; 464 465 unsigned swizzle : 8; 466 unsigned unknown4 : 8; 467 468 unsigned unknownA : 4; 469 470 unsigned offset_unknown1 : 1; 471 unsigned offset_reg_select : 1; 472 unsigned offset_reg_upper : 1; 473 unsigned offset_unknown4 : 1; 474 unsigned offset_unknown5 : 1; 475 unsigned offset_unknown6 : 1; 476 unsigned offset_unknown7 : 1; 477 unsigned offset_unknown8 : 1; 478 unsigned offset_unknown9 : 1; 479 480 unsigned unknownB : 3; 481 482 /* Texture bias or LOD, depending on whether it is executed in a 483 * fragment/vertex shader respectively. Compute as int(2^8 * biasf). 484 * 485 * For texel fetch, this is the LOD as is. */ 486 unsigned bias : 8; 487 488 unsigned unknown9 : 8; 489 490 unsigned texture_handle : 16; 491 unsigned sampler_handle : 16; 492} 493midgard_texture_word; 494 495static char *load_store_opcode_names[256] = { 496 [midgard_op_store_cubemap_coords] = "st_cubemap_coords", 497 [midgard_op_load_attr_16] = "ld_attr_16", 498 [midgard_op_load_attr_32] = "ld_attr_32", 499 [midgard_op_load_vary_16] = "ld_vary_16", 500 [midgard_op_load_vary_32] = "ld_vary_32", 501 [midgard_op_load_uniform_16] = "ld_uniform_16", 502 [midgard_op_load_uniform_32] = "ld_uniform_32", 503 [midgard_op_load_color_buffer_8] = "ld_color_buffer_8", 504 [midgard_op_load_color_buffer_16] = "ld_color_buffer_16", 505 [midgard_op_store_vary_16] = "st_vary_16", 506 [midgard_op_store_vary_32] = "st_vary_32" 507}; 508 509#endif 510