17ec681f3Smrg/* Author(s): 27ec681f3Smrg * Connor Abbott 37ec681f3Smrg * Alyssa Rosenzweig 47ec681f3Smrg * 57ec681f3Smrg * Copyright (c) 2013 Connor Abbott (connor@abbott.cx) 67ec681f3Smrg * Copyright (c) 2018 Alyssa Rosenzweig (alyssa@rosenzweig.io) 77ec681f3Smrg * Copyright (C) 2019-2020 Collabora, Ltd. 87ec681f3Smrg * 97ec681f3Smrg * Permission is hereby granted, free of charge, to any person obtaining a copy 107ec681f3Smrg * of this software and associated documentation files (the "Software"), to deal 117ec681f3Smrg * in the Software without restriction, including without limitation the rights 127ec681f3Smrg * to use, copy, modify, merge, publish, distribute, sublicense, and/or sell 137ec681f3Smrg * copies of the Software, and to permit persons to whom the Software is 147ec681f3Smrg * furnished to do so, subject to the following conditions: 157ec681f3Smrg * 167ec681f3Smrg * The above copyright notice and this permission notice shall be included in 177ec681f3Smrg * all copies or substantial portions of the Software. 187ec681f3Smrg * 197ec681f3Smrg * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR 207ec681f3Smrg * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, 217ec681f3Smrg * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE 227ec681f3Smrg * AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER 237ec681f3Smrg * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, 247ec681f3Smrg * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN 257ec681f3Smrg * THE SOFTWARE. 267ec681f3Smrg */ 277ec681f3Smrg 287ec681f3Smrg#ifndef __midgard_h__ 297ec681f3Smrg#define __midgard_h__ 307ec681f3Smrg 317ec681f3Smrg#include <stdint.h> 327ec681f3Smrg#include <stdbool.h> 337ec681f3Smrg 347ec681f3Smrg#define MIDGARD_DBG_MSGS 0x0001 357ec681f3Smrg#define MIDGARD_DBG_SHADERS 0x0002 367ec681f3Smrg#define MIDGARD_DBG_SHADERDB 0x0004 377ec681f3Smrg#define MIDGARD_DBG_INORDER 0x0008 387ec681f3Smrg#define MIDGARD_DBG_VERBOSE 0x0010 397ec681f3Smrg#define MIDGARD_DBG_INTERNAL 0x0020 407ec681f3Smrg 417ec681f3Smrgextern int midgard_debug; 427ec681f3Smrg 437ec681f3Smrgtypedef enum { 447ec681f3Smrg midgard_word_type_alu, 457ec681f3Smrg midgard_word_type_load_store, 467ec681f3Smrg midgard_word_type_texture, 477ec681f3Smrg midgard_word_type_unknown 487ec681f3Smrg} midgard_word_type; 497ec681f3Smrg 507ec681f3Smrgtypedef enum { 517ec681f3Smrg midgard_alu_vmul, 527ec681f3Smrg midgard_alu_sadd, 537ec681f3Smrg midgard_alu_smul, 547ec681f3Smrg midgard_alu_vadd, 557ec681f3Smrg midgard_alu_lut 567ec681f3Smrg} midgard_alu; 577ec681f3Smrg 587ec681f3Smrgenum { 597ec681f3Smrg TAG_INVALID = 0x0, 607ec681f3Smrg TAG_BREAK = 0x1, 617ec681f3Smrg TAG_TEXTURE_4_VTX = 0x2, 627ec681f3Smrg TAG_TEXTURE_4 = 0x3, 637ec681f3Smrg TAG_TEXTURE_4_BARRIER = 0x4, 647ec681f3Smrg TAG_LOAD_STORE_4 = 0x5, 657ec681f3Smrg TAG_UNKNOWN_1 = 0x6, 667ec681f3Smrg TAG_UNKNOWN_2 = 0x7, 677ec681f3Smrg TAG_ALU_4 = 0x8, 687ec681f3Smrg TAG_ALU_8 = 0x9, 697ec681f3Smrg TAG_ALU_12 = 0xA, 707ec681f3Smrg TAG_ALU_16 = 0xB, 717ec681f3Smrg TAG_ALU_4_WRITEOUT = 0xC, 727ec681f3Smrg TAG_ALU_8_WRITEOUT = 0xD, 737ec681f3Smrg TAG_ALU_12_WRITEOUT = 0xE, 747ec681f3Smrg TAG_ALU_16_WRITEOUT = 0xF 757ec681f3Smrg}; 767ec681f3Smrg 777ec681f3Smrg/* 787ec681f3Smrg * ALU words 797ec681f3Smrg */ 807ec681f3Smrg 817ec681f3Smrgtypedef enum { 827ec681f3Smrg midgard_alu_op_fadd = 0x10, /* round to even */ 837ec681f3Smrg midgard_alu_op_fadd_rtz = 0x11, 847ec681f3Smrg midgard_alu_op_fadd_rtn = 0x12, 857ec681f3Smrg midgard_alu_op_fadd_rtp = 0x13, 867ec681f3Smrg midgard_alu_op_fmul = 0x14, /* round to even */ 877ec681f3Smrg midgard_alu_op_fmul_rtz = 0x15, 887ec681f3Smrg midgard_alu_op_fmul_rtn = 0x16, 897ec681f3Smrg midgard_alu_op_fmul_rtp = 0x17, 907ec681f3Smrg 917ec681f3Smrg midgard_alu_op_fmin = 0x28, /* if an operand is NaN, propagate the other */ 927ec681f3Smrg midgard_alu_op_fmin_nan = 0x29, /* if an operand is NaN, propagate it */ 937ec681f3Smrg midgard_alu_op_fabsmin = 0x2A, /* min(abs(a,b)) */ 947ec681f3Smrg midgard_alu_op_fabsmin_nan = 0x2B, /* min_nan(abs(a,b)) */ 957ec681f3Smrg midgard_alu_op_fmax = 0x2C, /* if an operand is NaN, propagate the other */ 967ec681f3Smrg midgard_alu_op_fmax_nan = 0x2D, /* if an operand is NaN, propagate it */ 977ec681f3Smrg midgard_alu_op_fabsmax = 0x2E, /* max(abs(a,b)) */ 987ec681f3Smrg midgard_alu_op_fabsmax_nan = 0x2F, /* max_nan(abs(a,b)) */ 997ec681f3Smrg 1007ec681f3Smrg midgard_alu_op_fmov = 0x30, /* fmov_rte */ 1017ec681f3Smrg midgard_alu_op_fmov_rtz = 0x31, 1027ec681f3Smrg midgard_alu_op_fmov_rtn = 0x32, 1037ec681f3Smrg midgard_alu_op_fmov_rtp = 0x33, 1047ec681f3Smrg midgard_alu_op_froundeven = 0x34, 1057ec681f3Smrg midgard_alu_op_ftrunc = 0x35, 1067ec681f3Smrg midgard_alu_op_ffloor = 0x36, 1077ec681f3Smrg midgard_alu_op_fceil = 0x37, 1087ec681f3Smrg midgard_alu_op_ffma = 0x38, /* rte */ 1097ec681f3Smrg midgard_alu_op_ffma_rtz = 0x39, 1107ec681f3Smrg midgard_alu_op_ffma_rtn = 0x3A, 1117ec681f3Smrg midgard_alu_op_ffma_rtp = 0x3B, 1127ec681f3Smrg midgard_alu_op_fdot3 = 0x3C, 1137ec681f3Smrg midgard_alu_op_fdot3r = 0x3D, 1147ec681f3Smrg midgard_alu_op_fdot4 = 0x3E, 1157ec681f3Smrg midgard_alu_op_freduce = 0x3F, 1167ec681f3Smrg 1177ec681f3Smrg midgard_alu_op_iadd = 0x40, 1187ec681f3Smrg midgard_alu_op_ishladd = 0x41, /* (a<<1) + b */ 1197ec681f3Smrg midgard_alu_op_isub = 0x46, 1207ec681f3Smrg midgard_alu_op_ishlsub = 0x47, /* (a<<1) - b */ 1217ec681f3Smrg midgard_alu_op_iaddsat = 0x48, 1227ec681f3Smrg midgard_alu_op_uaddsat = 0x49, 1237ec681f3Smrg midgard_alu_op_isubsat = 0x4E, 1247ec681f3Smrg midgard_alu_op_usubsat = 0x4F, 1257ec681f3Smrg 1267ec681f3Smrg midgard_alu_op_imul = 0x58, 1277ec681f3Smrg /* Multiplies two ints and stores the result in the next larger datasize. */ 1287ec681f3Smrg midgard_alu_op_iwmul = 0x59, /* sint * sint = sint */ 1297ec681f3Smrg midgard_alu_op_uwmul = 0x5A, /* uint * uint = uint */ 1307ec681f3Smrg midgard_alu_op_iuwmul = 0x5B, /* sint * uint = sint */ 1317ec681f3Smrg 1327ec681f3Smrg midgard_alu_op_imin = 0x60, 1337ec681f3Smrg midgard_alu_op_umin = 0x61, 1347ec681f3Smrg midgard_alu_op_imax = 0x62, 1357ec681f3Smrg midgard_alu_op_umax = 0x63, 1367ec681f3Smrg midgard_alu_op_iavg = 0x64, 1377ec681f3Smrg midgard_alu_op_uavg = 0x65, 1387ec681f3Smrg midgard_alu_op_iravg = 0x66, 1397ec681f3Smrg midgard_alu_op_uravg = 0x67, 1407ec681f3Smrg midgard_alu_op_iasr = 0x68, 1417ec681f3Smrg midgard_alu_op_ilsr = 0x69, 1427ec681f3Smrg midgard_alu_op_ishlsat = 0x6C, 1437ec681f3Smrg midgard_alu_op_ushlsat = 0x6D, 1447ec681f3Smrg midgard_alu_op_ishl = 0x6E, 1457ec681f3Smrg 1467ec681f3Smrg midgard_alu_op_iand = 0x70, 1477ec681f3Smrg midgard_alu_op_ior = 0x71, 1487ec681f3Smrg midgard_alu_op_inand = 0x72, /* ~(a & b), for inot let a = b */ 1497ec681f3Smrg midgard_alu_op_inor = 0x73, /* ~(a | b) */ 1507ec681f3Smrg midgard_alu_op_iandnot = 0x74, /* (a & ~b), used for not/b2f */ 1517ec681f3Smrg midgard_alu_op_iornot = 0x75, /* (a | ~b) */ 1527ec681f3Smrg midgard_alu_op_ixor = 0x76, 1537ec681f3Smrg midgard_alu_op_inxor = 0x77, /* ~(a ^ b) */ 1547ec681f3Smrg midgard_alu_op_iclz = 0x78, /* Number of zeroes on left */ 1557ec681f3Smrg midgard_alu_op_ipopcnt = 0x7A, /* Population count */ 1567ec681f3Smrg midgard_alu_op_imov = 0x7B, 1577ec681f3Smrg midgard_alu_op_iabsdiff = 0x7C, 1587ec681f3Smrg midgard_alu_op_uabsdiff = 0x7D, 1597ec681f3Smrg midgard_alu_op_ichoose = 0x7E, /* vector, component number - dupe for shuffle() */ 1607ec681f3Smrg 1617ec681f3Smrg midgard_alu_op_feq = 0x80, 1627ec681f3Smrg midgard_alu_op_fne = 0x81, 1637ec681f3Smrg midgard_alu_op_flt = 0x82, 1647ec681f3Smrg midgard_alu_op_fle = 0x83, 1657ec681f3Smrg midgard_alu_op_fball_eq = 0x88, 1667ec681f3Smrg midgard_alu_op_fball_neq = 0x89, 1677ec681f3Smrg midgard_alu_op_fball_lt = 0x8A, /* all(lessThan(.., ..)) */ 1687ec681f3Smrg midgard_alu_op_fball_lte = 0x8B, /* all(lessThanEqual(.., ..)) */ 1697ec681f3Smrg 1707ec681f3Smrg midgard_alu_op_fbany_eq = 0x90, 1717ec681f3Smrg midgard_alu_op_fbany_neq = 0x91, 1727ec681f3Smrg midgard_alu_op_fbany_lt = 0x92, /* any(lessThan(.., ..)) */ 1737ec681f3Smrg midgard_alu_op_fbany_lte = 0x93, /* any(lessThanEqual(.., ..)) */ 1747ec681f3Smrg 1757ec681f3Smrg midgard_alu_op_f2i_rte = 0x98, 1767ec681f3Smrg midgard_alu_op_f2i_rtz = 0x99, 1777ec681f3Smrg midgard_alu_op_f2i_rtn = 0x9A, 1787ec681f3Smrg midgard_alu_op_f2i_rtp = 0x9B, 1797ec681f3Smrg midgard_alu_op_f2u_rte = 0x9C, 1807ec681f3Smrg midgard_alu_op_f2u_rtz = 0x9D, 1817ec681f3Smrg midgard_alu_op_f2u_rtn = 0x9E, 1827ec681f3Smrg midgard_alu_op_f2u_rtp = 0x9F, 1837ec681f3Smrg 1847ec681f3Smrg midgard_alu_op_ieq = 0xA0, 1857ec681f3Smrg midgard_alu_op_ine = 0xA1, 1867ec681f3Smrg midgard_alu_op_ult = 0xA2, 1877ec681f3Smrg midgard_alu_op_ule = 0xA3, 1887ec681f3Smrg midgard_alu_op_ilt = 0xA4, 1897ec681f3Smrg midgard_alu_op_ile = 0xA5, 1907ec681f3Smrg midgard_alu_op_iball_eq = 0xA8, 1917ec681f3Smrg midgard_alu_op_iball_neq = 0xA9, 1927ec681f3Smrg midgard_alu_op_uball_lt = 0xAA, 1937ec681f3Smrg midgard_alu_op_uball_lte = 0xAB, 1947ec681f3Smrg midgard_alu_op_iball_lt = 0xAC, 1957ec681f3Smrg midgard_alu_op_iball_lte = 0xAD, 1967ec681f3Smrg 1977ec681f3Smrg midgard_alu_op_ibany_eq = 0xB0, 1987ec681f3Smrg midgard_alu_op_ibany_neq = 0xB1, 1997ec681f3Smrg midgard_alu_op_ubany_lt = 0xB2, 2007ec681f3Smrg midgard_alu_op_ubany_lte = 0xB3, 2017ec681f3Smrg midgard_alu_op_ibany_lt = 0xB4, /* any(lessThan(.., ..)) */ 2027ec681f3Smrg midgard_alu_op_ibany_lte = 0xB5, /* any(lessThanEqual(.., ..)) */ 2037ec681f3Smrg midgard_alu_op_i2f_rte = 0xB8, 2047ec681f3Smrg midgard_alu_op_i2f_rtz = 0xB9, 2057ec681f3Smrg midgard_alu_op_i2f_rtn = 0xBA, 2067ec681f3Smrg midgard_alu_op_i2f_rtp = 0xBB, 2077ec681f3Smrg midgard_alu_op_u2f_rte = 0xBC, 2087ec681f3Smrg midgard_alu_op_u2f_rtz = 0xBD, 2097ec681f3Smrg midgard_alu_op_u2f_rtn = 0xBE, 2107ec681f3Smrg midgard_alu_op_u2f_rtp = 0xBF, 2117ec681f3Smrg 2127ec681f3Smrg /* All csel* instructions use as a condition the output of the previous 2137ec681f3Smrg * vector or scalar unit, thus it must run on the second pipeline stage 2147ec681f3Smrg * and be scheduled to the same bundle as the opcode that it uses as a 2157ec681f3Smrg * condition. */ 2167ec681f3Smrg midgard_alu_op_icsel_v = 0xC0, 2177ec681f3Smrg midgard_alu_op_icsel = 0xC1, 2187ec681f3Smrg midgard_alu_op_fcsel_v = 0xC4, 2197ec681f3Smrg midgard_alu_op_fcsel = 0xC5, 2207ec681f3Smrg midgard_alu_op_froundaway = 0xC6, /* round to nearest away */ 2217ec681f3Smrg 2227ec681f3Smrg midgard_alu_op_fatan2_pt2 = 0xE8, 2237ec681f3Smrg midgard_alu_op_fpow_pt1 = 0xEC, 2247ec681f3Smrg midgard_alu_op_fpown_pt1 = 0xED, 2257ec681f3Smrg midgard_alu_op_fpowr_pt1 = 0xEE, 2267ec681f3Smrg 2277ec681f3Smrg midgard_alu_op_frcp = 0xF0, 2287ec681f3Smrg midgard_alu_op_frsqrt = 0xF2, 2297ec681f3Smrg midgard_alu_op_fsqrt = 0xF3, 2307ec681f3Smrg midgard_alu_op_fexp2 = 0xF4, 2317ec681f3Smrg midgard_alu_op_flog2 = 0xF5, 2327ec681f3Smrg midgard_alu_op_fsinpi = 0xF6, /* sin(pi * x) */ 2337ec681f3Smrg midgard_alu_op_fcospi = 0xF7, /* cos(pi * x) */ 2347ec681f3Smrg midgard_alu_op_fatan2_pt1 = 0xF9, 2357ec681f3Smrg} midgard_alu_op; 2367ec681f3Smrg 2377ec681f3Smrgtypedef enum { 2387ec681f3Smrg midgard_outmod_none = 0, 2397ec681f3Smrg midgard_outmod_clamp_0_inf = 1, /* max(x, 0.0), NaNs become +0.0 */ 2407ec681f3Smrg midgard_outmod_clamp_m1_1 = 2, /* clamp(x, -1.0, 1.0), NaNs become -1.0 */ 2417ec681f3Smrg midgard_outmod_clamp_0_1 = 3 /* clamp(x, 0.0, 1.0), NaNs become +0.0 */ 2427ec681f3Smrg} midgard_outmod_float; 2437ec681f3Smrg 2447ec681f3Smrg/* These are applied to the resulting value that's going to be stored in the dest reg. 2457ec681f3Smrg * This should be set to midgard_outmod_keeplo when shrink_mode is midgard_shrink_mode_none. */ 2467ec681f3Smrgtypedef enum { 2477ec681f3Smrg midgard_outmod_ssat = 0, 2487ec681f3Smrg midgard_outmod_usat = 1, 2497ec681f3Smrg midgard_outmod_keeplo = 2, /* Keep low half */ 2507ec681f3Smrg midgard_outmod_keephi = 3, /* Keep high half */ 2517ec681f3Smrg} midgard_outmod_int; 2527ec681f3Smrg 2537ec681f3Smrgtypedef enum { 2547ec681f3Smrg midgard_reg_mode_8 = 0, 2557ec681f3Smrg midgard_reg_mode_16 = 1, 2567ec681f3Smrg midgard_reg_mode_32 = 2, 2577ec681f3Smrg midgard_reg_mode_64 = 3 2587ec681f3Smrg} midgard_reg_mode; 2597ec681f3Smrg 2607ec681f3Smrgtypedef enum { 2617ec681f3Smrg midgard_shrink_mode_lower = 0, 2627ec681f3Smrg midgard_shrink_mode_upper = 1, 2637ec681f3Smrg midgard_shrink_mode_none = 2 2647ec681f3Smrg} midgard_shrink_mode; 2657ec681f3Smrg 2667ec681f3Smrg/* Only used if midgard_src_expand_mode is set to one of midgard_src_expand_*. */ 2677ec681f3Smrgtypedef enum { 2687ec681f3Smrg midgard_int_sign_extend = 0, 2697ec681f3Smrg midgard_int_zero_extend = 1, 2707ec681f3Smrg midgard_int_replicate = 2, 2717ec681f3Smrg midgard_int_left_shift = 3 2727ec681f3Smrg} midgard_int_mod; 2737ec681f3Smrg 2747ec681f3Smrg/* Unlike midgard_int_mod, fload modifiers are applied after the expansion happens, so 2757ec681f3Smrg * they don't depend on midgard_src_expand_mode. */ 2767ec681f3Smrg#define MIDGARD_FLOAT_MOD_ABS (1 << 0) 2777ec681f3Smrg#define MIDGARD_FLOAT_MOD_NEG (1 << 1) 2787ec681f3Smrg 2797ec681f3Smrg/* The expand options depend on both midgard_int_mod and midgard_reg_mode. For 2807ec681f3Smrg * example, a vec4 with midgard_int_sign_extend and midgard_src_expand_low is 2817ec681f3Smrg * treated as a vec8 and each 16-bit element from the low 64-bits is then sign 2827ec681f3Smrg * extended, resulting in a vec4 where each 32-bit element corresponds to a 2837ec681f3Smrg * 16-bit element from the low 64-bits of the input vector. */ 2847ec681f3Smrgtypedef enum { 2857ec681f3Smrg midgard_src_passthrough = 0, 2867ec681f3Smrg midgard_src_rep_low = 1, /* replicate lower 64 bits to higher 64 bits */ 2877ec681f3Smrg midgard_src_rep_high = 2, /* replicate higher 64 bits to lower 64 bits */ 2887ec681f3Smrg midgard_src_swap = 3, /* swap lower 64 bits with higher 64 bits */ 2897ec681f3Smrg midgard_src_expand_low = 4, /* expand low 64 bits */ 2907ec681f3Smrg midgard_src_expand_high = 5, /* expand high 64 bits */ 2917ec681f3Smrg midgard_src_expand_low_swap = 6, /* expand low 64 bits, then swap */ 2927ec681f3Smrg midgard_src_expand_high_swap = 7, /* expand high 64 bits, then swap */ 2937ec681f3Smrg} midgard_src_expand_mode; 2947ec681f3Smrg 2957ec681f3Smrg#define INPUT_EXPANDS(a) \ 2967ec681f3Smrg (a >= midgard_src_expand_low && a <= midgard_src_expand_high_swap) 2977ec681f3Smrg 2987ec681f3Smrg#define INPUT_SWAPS(a) \ 2997ec681f3Smrg (a == midgard_src_swap || a >= midgard_src_expand_low_swap) 3007ec681f3Smrg 3017ec681f3Smrgtypedef struct 3027ec681f3Smrg__attribute__((__packed__)) 3037ec681f3Smrg{ 3047ec681f3Smrg /* Either midgard_int_mod or from midgard_float_mod_*, depending on the 3057ec681f3Smrg * type of op */ 3067ec681f3Smrg unsigned mod : 2; 3077ec681f3Smrg midgard_src_expand_mode expand_mode : 3; 3087ec681f3Smrg unsigned swizzle : 8; 3097ec681f3Smrg} 3107ec681f3Smrgmidgard_vector_alu_src; 3117ec681f3Smrg 3127ec681f3Smrgtypedef struct 3137ec681f3Smrg__attribute__((__packed__)) 3147ec681f3Smrg{ 3157ec681f3Smrg midgard_alu_op op : 8; 3167ec681f3Smrg midgard_reg_mode reg_mode : 2; 3177ec681f3Smrg unsigned src1 : 13; 3187ec681f3Smrg unsigned src2 : 13; 3197ec681f3Smrg midgard_shrink_mode shrink_mode : 2; 3207ec681f3Smrg unsigned outmod : 2; 3217ec681f3Smrg unsigned mask : 8; 3227ec681f3Smrg} 3237ec681f3Smrgmidgard_vector_alu; 3247ec681f3Smrg 3257ec681f3Smrgtypedef struct 3267ec681f3Smrg__attribute__((__packed__)) 3277ec681f3Smrg{ 3287ec681f3Smrg unsigned mod : 2; 3297ec681f3Smrg bool full : 1; /* 0 = 16-bit, 1 = 32-bit */ 3307ec681f3Smrg unsigned component : 3; 3317ec681f3Smrg} 3327ec681f3Smrgmidgard_scalar_alu_src; 3337ec681f3Smrg 3347ec681f3Smrgtypedef struct 3357ec681f3Smrg__attribute__((__packed__)) 3367ec681f3Smrg{ 3377ec681f3Smrg midgard_alu_op op : 8; 3387ec681f3Smrg unsigned src1 : 6; 3397ec681f3Smrg /* last 5 bits are used when src2 is an immediate */ 3407ec681f3Smrg unsigned src2 : 11; 3417ec681f3Smrg unsigned unknown : 1; 3427ec681f3Smrg unsigned outmod : 2; 3437ec681f3Smrg bool output_full : 1; 3447ec681f3Smrg unsigned output_component : 3; 3457ec681f3Smrg} 3467ec681f3Smrgmidgard_scalar_alu; 3477ec681f3Smrg 3487ec681f3Smrgtypedef struct 3497ec681f3Smrg__attribute__((__packed__)) 3507ec681f3Smrg{ 3517ec681f3Smrg unsigned src1_reg : 5; 3527ec681f3Smrg unsigned src2_reg : 5; 3537ec681f3Smrg unsigned out_reg : 5; 3547ec681f3Smrg bool src2_imm : 1; 3557ec681f3Smrg} 3567ec681f3Smrgmidgard_reg_info; 3577ec681f3Smrg 3587ec681f3Smrg/* In addition to conditional branches and jumps (unconditional branches), 3597ec681f3Smrg * Midgard implements a bit of fixed function functionality used in fragment 3607ec681f3Smrg * shaders via specially crafted branches. These have special branch opcodes, 3617ec681f3Smrg * which perform a fixed-function operation and/or use the results of a 3627ec681f3Smrg * fixed-function operation as the branch condition. */ 3637ec681f3Smrg 3647ec681f3Smrgtypedef enum { 3657ec681f3Smrg /* Regular branches */ 3667ec681f3Smrg midgard_jmp_writeout_op_branch_uncond = 1, 3677ec681f3Smrg midgard_jmp_writeout_op_branch_cond = 2, 3687ec681f3Smrg 3697ec681f3Smrg /* In a fragment shader, execute a discard_if instruction, with the 3707ec681f3Smrg * corresponding condition code. Terminates the shader, so generally 3717ec681f3Smrg * set the branch target to out of the shader */ 3727ec681f3Smrg midgard_jmp_writeout_op_discard = 4, 3737ec681f3Smrg 3747ec681f3Smrg /* Branch if the tilebuffer is not yet ready. At the beginning of a 3757ec681f3Smrg * fragment shader that reads from the tile buffer, for instance via 3767ec681f3Smrg * ARM_shader_framebuffer_fetch or EXT_pixel_local_storage, this branch 3777ec681f3Smrg * operation should be used as a loop. An instruction like 3787ec681f3Smrg * "br.tilebuffer.always -1" does the trick, corresponding to 3797ec681f3Smrg * "while(!is_tilebuffer_ready) */ 3807ec681f3Smrg midgard_jmp_writeout_op_tilebuffer_pending = 6, 3817ec681f3Smrg 3827ec681f3Smrg /* In a fragment shader, try to write out the value pushed to r0 to the 3837ec681f3Smrg * tilebuffer, subject to unknown state in r1.z and r1.w. If this 3847ec681f3Smrg * succeeds, the shader terminates. If it fails, it branches to the 3857ec681f3Smrg * specified branch target. Generally, this should be used in a loop to 3867ec681f3Smrg * itself, acting as "do { write(r0); } while(!write_successful);" */ 3877ec681f3Smrg midgard_jmp_writeout_op_writeout = 7, 3887ec681f3Smrg} midgard_jmp_writeout_op; 3897ec681f3Smrg 3907ec681f3Smrgtypedef enum { 3917ec681f3Smrg midgard_condition_write0 = 0, 3927ec681f3Smrg 3937ec681f3Smrg /* These condition codes denote a conditional branch on FALSE and on 3947ec681f3Smrg * TRUE respectively */ 3957ec681f3Smrg midgard_condition_false = 1, 3967ec681f3Smrg midgard_condition_true = 2, 3977ec681f3Smrg 3987ec681f3Smrg /* This condition code always branches. For a pure branch, the 3997ec681f3Smrg * unconditional branch coding should be used instead, but for 4007ec681f3Smrg * fixed-function branch opcodes, this is still useful */ 4017ec681f3Smrg midgard_condition_always = 3, 4027ec681f3Smrg} midgard_condition; 4037ec681f3Smrg 4047ec681f3Smrgtypedef struct 4057ec681f3Smrg__attribute__((__packed__)) 4067ec681f3Smrg{ 4077ec681f3Smrg midgard_jmp_writeout_op op : 3; /* == branch_uncond */ 4087ec681f3Smrg unsigned dest_tag : 4; /* tag of branch destination */ 4097ec681f3Smrg unsigned unknown : 2; 4107ec681f3Smrg int offset : 7; 4117ec681f3Smrg} 4127ec681f3Smrgmidgard_branch_uncond; 4137ec681f3Smrg 4147ec681f3Smrgtypedef struct 4157ec681f3Smrg__attribute__((__packed__)) 4167ec681f3Smrg{ 4177ec681f3Smrg midgard_jmp_writeout_op op : 3; /* == branch_cond */ 4187ec681f3Smrg unsigned dest_tag : 4; /* tag of branch destination */ 4197ec681f3Smrg int offset : 7; 4207ec681f3Smrg midgard_condition cond : 2; 4217ec681f3Smrg} 4227ec681f3Smrgmidgard_branch_cond; 4237ec681f3Smrg 4247ec681f3Smrgtypedef struct 4257ec681f3Smrg__attribute__((__packed__)) 4267ec681f3Smrg{ 4277ec681f3Smrg midgard_jmp_writeout_op op : 3; /* == branch_cond */ 4287ec681f3Smrg unsigned dest_tag : 4; /* tag of branch destination */ 4297ec681f3Smrg unsigned unknown : 2; 4307ec681f3Smrg signed offset : 23; 4317ec681f3Smrg 4327ec681f3Smrg /* Extended branches permit inputting up to 4 conditions loaded into 4337ec681f3Smrg * r31 (two in r31.w and two in r31.x). In the most general case, we 4347ec681f3Smrg * specify a function f(A, B, C, D) mapping 4 1-bit conditions to a 4357ec681f3Smrg * single 1-bit branch criteria. Note that the domain of f has 2^(2^4) 4367ec681f3Smrg * elements, each mapping to 1-bit of output, so we can trivially 4377ec681f3Smrg * construct a Godel numbering of f as a (2^4)=16-bit integer. This 4387ec681f3Smrg * 16-bit integer serves as a lookup table to compute f, subject to 4397ec681f3Smrg * some swaps for ordering. 4407ec681f3Smrg * 4417ec681f3Smrg * Interesting, the standard 2-bit condition codes are also a LUT with 4427ec681f3Smrg * the same format (2^1-bit), but it's usually easier to use enums. */ 4437ec681f3Smrg 4447ec681f3Smrg unsigned cond : 16; 4457ec681f3Smrg} 4467ec681f3Smrgmidgard_branch_extended; 4477ec681f3Smrg 4487ec681f3Smrgtypedef struct 4497ec681f3Smrg__attribute__((__packed__)) 4507ec681f3Smrg{ 4517ec681f3Smrg midgard_jmp_writeout_op op : 3; /* == writeout */ 4527ec681f3Smrg unsigned unknown : 13; 4537ec681f3Smrg} 4547ec681f3Smrgmidgard_writeout; 4557ec681f3Smrg 4567ec681f3Smrg/* 4577ec681f3Smrg * Load/store words 4587ec681f3Smrg */ 4597ec681f3Smrg 4607ec681f3Smrgtypedef enum { 4617ec681f3Smrg midgard_op_ld_st_noop = 0x03, 4627ec681f3Smrg 4637ec681f3Smrg /* Unpacks a colour from a native format to <format> */ 4647ec681f3Smrg midgard_op_unpack_colour_f32 = 0x04, 4657ec681f3Smrg midgard_op_unpack_colour_f16 = 0x05, 4667ec681f3Smrg midgard_op_unpack_colour_u32 = 0x06, 4677ec681f3Smrg midgard_op_unpack_colour_s32 = 0x07, 4687ec681f3Smrg 4697ec681f3Smrg /* Packs a colour from <format> to a native format */ 4707ec681f3Smrg midgard_op_pack_colour_f32 = 0x08, 4717ec681f3Smrg midgard_op_pack_colour_f16 = 0x09, 4727ec681f3Smrg midgard_op_pack_colour_u32 = 0x0A, 4737ec681f3Smrg midgard_op_pack_colour_s32 = 0x0B, 4747ec681f3Smrg 4757ec681f3Smrg /* Computes the effective address of a mem address expression */ 4767ec681f3Smrg midgard_op_lea = 0x0C, 4777ec681f3Smrg 4787ec681f3Smrg /* Converts image coordinates into mem address */ 4797ec681f3Smrg midgard_op_lea_image = 0x0D, 4807ec681f3Smrg 4817ec681f3Smrg /* Unclear why this is on the L/S unit, but moves fp32 cube map 4827ec681f3Smrg * coordinates in r27 to its cube map texture coordinate destination 4837ec681f3Smrg * (e.g r29). */ 4847ec681f3Smrg 4857ec681f3Smrg midgard_op_ld_cubemap_coords = 0x0E, 4867ec681f3Smrg 4877ec681f3Smrg /* A mov between registers that the ldst pipeline can access */ 4887ec681f3Smrg midgard_op_ldst_mov = 0x10, 4897ec681f3Smrg 4907ec681f3Smrg /* The L/S unit can do perspective division a clock faster than the ALU 4917ec681f3Smrg * if you're lucky. Put the vec4 in r27, and call with 0x24 as the 4927ec681f3Smrg * unknown state; the output will be <x/w, y/w, z/w, 1>. Replace w with 4937ec681f3Smrg * z for the z version */ 4947ec681f3Smrg midgard_op_ldst_perspective_div_y = 0x11, 4957ec681f3Smrg midgard_op_ldst_perspective_div_z = 0x12, 4967ec681f3Smrg midgard_op_ldst_perspective_div_w = 0x13, 4977ec681f3Smrg 4987ec681f3Smrg /* val in r27.y, address embedded, outputs result to argument. Invert val for sub. Let val = +-1 for inc/dec. */ 4997ec681f3Smrg midgard_op_atomic_add = 0x40, 5007ec681f3Smrg midgard_op_atomic_add64 = 0x41, 5017ec681f3Smrg midgard_op_atomic_add_be = 0x42, 5027ec681f3Smrg midgard_op_atomic_add64_be = 0x43, 5037ec681f3Smrg 5047ec681f3Smrg midgard_op_atomic_and = 0x44, 5057ec681f3Smrg midgard_op_atomic_and64 = 0x45, 5067ec681f3Smrg midgard_op_atomic_and_be = 0x46, 5077ec681f3Smrg midgard_op_atomic_and64_be = 0x47, 5087ec681f3Smrg midgard_op_atomic_or = 0x48, 5097ec681f3Smrg midgard_op_atomic_or64 = 0x49, 5107ec681f3Smrg midgard_op_atomic_or_be = 0x4A, 5117ec681f3Smrg midgard_op_atomic_or64_be = 0x4B, 5127ec681f3Smrg midgard_op_atomic_xor = 0x4C, 5137ec681f3Smrg midgard_op_atomic_xor64 = 0x4D, 5147ec681f3Smrg midgard_op_atomic_xor_be = 0x4E, 5157ec681f3Smrg midgard_op_atomic_xor64_be = 0x4F, 5167ec681f3Smrg 5177ec681f3Smrg midgard_op_atomic_imin = 0x50, 5187ec681f3Smrg midgard_op_atomic_imin64 = 0x51, 5197ec681f3Smrg midgard_op_atomic_imin_be = 0x52, 5207ec681f3Smrg midgard_op_atomic_imin64_be = 0x53, 5217ec681f3Smrg midgard_op_atomic_umin = 0x54, 5227ec681f3Smrg midgard_op_atomic_umin64 = 0x55, 5237ec681f3Smrg midgard_op_atomic_umin_be = 0x56, 5247ec681f3Smrg midgard_op_atomic_umin64_be = 0x57, 5257ec681f3Smrg midgard_op_atomic_imax = 0x58, 5267ec681f3Smrg midgard_op_atomic_imax64 = 0x59, 5277ec681f3Smrg midgard_op_atomic_imax_be = 0x5A, 5287ec681f3Smrg midgard_op_atomic_imax64_be = 0x5B, 5297ec681f3Smrg midgard_op_atomic_umax = 0x5C, 5307ec681f3Smrg midgard_op_atomic_umax64 = 0x5D, 5317ec681f3Smrg midgard_op_atomic_umax_be = 0x5E, 5327ec681f3Smrg midgard_op_atomic_umax64_be = 0x5F, 5337ec681f3Smrg 5347ec681f3Smrg midgard_op_atomic_xchg = 0x60, 5357ec681f3Smrg midgard_op_atomic_xchg64 = 0x61, 5367ec681f3Smrg midgard_op_atomic_xchg_be = 0x62, 5377ec681f3Smrg midgard_op_atomic_xchg64_be = 0x63, 5387ec681f3Smrg 5397ec681f3Smrg midgard_op_atomic_cmpxchg = 0x64, 5407ec681f3Smrg midgard_op_atomic_cmpxchg64 = 0x65, 5417ec681f3Smrg midgard_op_atomic_cmpxchg_be = 0x66, 5427ec681f3Smrg midgard_op_atomic_cmpxchg64_be = 0x67, 5437ec681f3Smrg 5447ec681f3Smrg /* Used for compute shader's __global arguments, __local 5457ec681f3Smrg * variables (or for register spilling) */ 5467ec681f3Smrg 5477ec681f3Smrg midgard_op_ld_u8 = 0x80, /* zero extends */ 5487ec681f3Smrg midgard_op_ld_i8 = 0x81, /* sign extends */ 5497ec681f3Smrg midgard_op_ld_u16 = 0x84, /* zero extends */ 5507ec681f3Smrg midgard_op_ld_i16 = 0x85, /* sign extends */ 5517ec681f3Smrg midgard_op_ld_u16_be = 0x86, /* zero extends, big endian */ 5527ec681f3Smrg midgard_op_ld_i16_be = 0x87, /* sign extends, big endian */ 5537ec681f3Smrg midgard_op_ld_32 = 0x88, /* short2, int, float */ 5547ec681f3Smrg midgard_op_ld_32_bswap2 = 0x89, /* 16-bit big endian vector */ 5557ec681f3Smrg midgard_op_ld_32_bswap4 = 0x8A, /* 32-bit big endian scalar */ 5567ec681f3Smrg midgard_op_ld_64 = 0x8C, /* int2, float2, long */ 5577ec681f3Smrg midgard_op_ld_64_bswap2 = 0x8D, /* 16-bit big endian vector */ 5587ec681f3Smrg midgard_op_ld_64_bswap4 = 0x8E, /* 32-bit big endian vector */ 5597ec681f3Smrg midgard_op_ld_64_bswap8 = 0x8F, /* 64-bit big endian scalar */ 5607ec681f3Smrg midgard_op_ld_128 = 0x90, /* float4, long2 */ 5617ec681f3Smrg midgard_op_ld_128_bswap2 = 0x91, /* 16-bit big endian vector */ 5627ec681f3Smrg midgard_op_ld_128_bswap4 = 0x92, /* 32-bit big endian vector */ 5637ec681f3Smrg midgard_op_ld_128_bswap8 = 0x93, /* 64-bit big endian vector */ 5647ec681f3Smrg 5657ec681f3Smrg midgard_op_ld_attr_32 = 0x94, 5667ec681f3Smrg midgard_op_ld_attr_16 = 0x95, 5677ec681f3Smrg midgard_op_ld_attr_32u = 0x96, 5687ec681f3Smrg midgard_op_ld_attr_32i = 0x97, 5697ec681f3Smrg midgard_op_ld_vary_32 = 0x98, 5707ec681f3Smrg midgard_op_ld_vary_16 = 0x99, 5717ec681f3Smrg midgard_op_ld_vary_32u = 0x9A, 5727ec681f3Smrg midgard_op_ld_vary_32i = 0x9B, 5737ec681f3Smrg 5747ec681f3Smrg /* This instruction behaves differently depending if the gpu is a v4 5757ec681f3Smrg * or a newer gpu. The main difference hinges on which values of the 5767ec681f3Smrg * second argument are valid for each gpu. 5777ec681f3Smrg * TODO: properly document and decode each possible value for the 5787ec681f3Smrg * second argument. */ 5797ec681f3Smrg midgard_op_ld_special_32f = 0x9C, 5807ec681f3Smrg midgard_op_ld_special_16f = 0x9D, 5817ec681f3Smrg midgard_op_ld_special_32u = 0x9E, 5827ec681f3Smrg midgard_op_ld_special_32i = 0x9F, 5837ec681f3Smrg 5847ec681f3Smrg /* The distinction between these ops is the alignment 5857ec681f3Smrg * requirement / accompanying shift. Thus, the offset to 5867ec681f3Smrg * ld_ubo_128 is in 16-byte units and can load 128-bit. The 5877ec681f3Smrg * offset to ld_ubo_64 is in 8-byte units; ld_ubo_32 in 4-byte 5887ec681f3Smrg * units. */ 5897ec681f3Smrg midgard_op_ld_ubo_u8 = 0xA0, /* theoretical */ 5907ec681f3Smrg midgard_op_ld_ubo_i8 = 0xA1, /* theoretical */ 5917ec681f3Smrg midgard_op_ld_ubo_u16 = 0xA4, /* theoretical */ 5927ec681f3Smrg midgard_op_ld_ubo_i16 = 0xA5, /* theoretical */ 5937ec681f3Smrg midgard_op_ld_ubo_u16_be = 0xA6, /* theoretical */ 5947ec681f3Smrg midgard_op_ld_ubo_i16_be = 0xA7, /* theoretical */ 5957ec681f3Smrg midgard_op_ld_ubo_32 = 0xA8, 5967ec681f3Smrg midgard_op_ld_ubo_32_bswap2 = 0xA9, 5977ec681f3Smrg midgard_op_ld_ubo_32_bswap4 = 0xAA, 5987ec681f3Smrg midgard_op_ld_ubo_64 = 0xAC, 5997ec681f3Smrg midgard_op_ld_ubo_64_bswap2 = 0xAD, 6007ec681f3Smrg midgard_op_ld_ubo_64_bswap4 = 0xAE, 6017ec681f3Smrg midgard_op_ld_ubo_64_bswap8 = 0xAF, 6027ec681f3Smrg midgard_op_ld_ubo_128 = 0xB0, 6037ec681f3Smrg midgard_op_ld_ubo_128_bswap2 = 0xB1, 6047ec681f3Smrg midgard_op_ld_ubo_128_bswap4 = 0xB2, 6057ec681f3Smrg midgard_op_ld_ubo_128_bswap8 = 0xB3, 6067ec681f3Smrg 6077ec681f3Smrg midgard_op_ld_image_32f = 0xB4, 6087ec681f3Smrg midgard_op_ld_image_16f = 0xB5, 6097ec681f3Smrg midgard_op_ld_image_32u = 0xB6, 6107ec681f3Smrg midgard_op_ld_image_32i = 0xB7, 6117ec681f3Smrg 6127ec681f3Smrg /* Only works on v5 or newer. 6137ec681f3Smrg * Older cards must use ld_special with tilebuffer selectors. */ 6147ec681f3Smrg midgard_op_ld_tilebuffer_32f = 0xB8, 6157ec681f3Smrg midgard_op_ld_tilebuffer_16f = 0xB9, 6167ec681f3Smrg midgard_op_ld_tilebuffer_raw = 0xBA, 6177ec681f3Smrg 6187ec681f3Smrg midgard_op_st_u8 = 0xC0, /* zero extends */ 6197ec681f3Smrg midgard_op_st_i8 = 0xC1, /* sign extends */ 6207ec681f3Smrg midgard_op_st_u16 = 0xC4, /* zero extends */ 6217ec681f3Smrg midgard_op_st_i16 = 0xC5, /* sign extends */ 6227ec681f3Smrg midgard_op_st_u16_be = 0xC6, /* zero extends, big endian */ 6237ec681f3Smrg midgard_op_st_i16_be = 0xC7, /* sign extends, big endian */ 6247ec681f3Smrg midgard_op_st_32 = 0xC8, /* short2, int, float */ 6257ec681f3Smrg midgard_op_st_32_bswap2 = 0xC9, /* 16-bit big endian vector */ 6267ec681f3Smrg midgard_op_st_32_bswap4 = 0xCA, /* 32-bit big endian scalar */ 6277ec681f3Smrg midgard_op_st_64 = 0xCC, /* int2, float2, long */ 6287ec681f3Smrg midgard_op_st_64_bswap2 = 0xCD, /* 16-bit big endian vector */ 6297ec681f3Smrg midgard_op_st_64_bswap4 = 0xCE, /* 32-bit big endian vector */ 6307ec681f3Smrg midgard_op_st_64_bswap8 = 0xCF, /* 64-bit big endian scalar */ 6317ec681f3Smrg midgard_op_st_128 = 0xD0, /* float4, long2 */ 6327ec681f3Smrg midgard_op_st_128_bswap2 = 0xD1, /* 16-bit big endian vector */ 6337ec681f3Smrg midgard_op_st_128_bswap4 = 0xD2, /* 32-bit big endian vector */ 6347ec681f3Smrg midgard_op_st_128_bswap8 = 0xD3, /* 64-bit big endian vector */ 6357ec681f3Smrg 6367ec681f3Smrg midgard_op_st_vary_32 = 0xD4, 6377ec681f3Smrg midgard_op_st_vary_16 = 0xD5, 6387ec681f3Smrg midgard_op_st_vary_32u = 0xD6, 6397ec681f3Smrg midgard_op_st_vary_32i = 0xD7, 6407ec681f3Smrg 6417ec681f3Smrg /* Value to st in r27, location r26.w as short2 */ 6427ec681f3Smrg midgard_op_st_image_32f = 0xD8, 6437ec681f3Smrg midgard_op_st_image_16f = 0xD9, 6447ec681f3Smrg midgard_op_st_image_32u = 0xDA, 6457ec681f3Smrg midgard_op_st_image_32i = 0xDB, 6467ec681f3Smrg 6477ec681f3Smrg midgard_op_st_special_32f = 0xDC, 6487ec681f3Smrg midgard_op_st_special_16f = 0xDD, 6497ec681f3Smrg midgard_op_st_special_32u = 0xDE, 6507ec681f3Smrg midgard_op_st_special_32i = 0xDF, 6517ec681f3Smrg 6527ec681f3Smrg /* Only works on v5 or newer. 6537ec681f3Smrg * Older cards must use ld_special with tilebuffer selectors. */ 6547ec681f3Smrg midgard_op_st_tilebuffer_32f = 0xE8, 6557ec681f3Smrg midgard_op_st_tilebuffer_16f = 0xE9, 6567ec681f3Smrg midgard_op_st_tilebuffer_raw = 0xEA, 6577ec681f3Smrg midgard_op_trap = 0xFC, 6587ec681f3Smrg} midgard_load_store_op; 6597ec681f3Smrg 6607ec681f3Smrgtypedef enum { 6617ec681f3Smrg midgard_interp_sample = 0, 6627ec681f3Smrg midgard_interp_centroid = 1, 6637ec681f3Smrg midgard_interp_default = 2 6647ec681f3Smrg} midgard_interpolation; 6657ec681f3Smrg 6667ec681f3Smrgtypedef enum { 6677ec681f3Smrg midgard_varying_mod_none = 0, 6687ec681f3Smrg 6697ec681f3Smrg /* Take the would-be result and divide all components by its y/z/w 6707ec681f3Smrg * (perspective division baked in with the load) */ 6717ec681f3Smrg midgard_varying_mod_perspective_y = 1, 6727ec681f3Smrg midgard_varying_mod_perspective_z = 2, 6737ec681f3Smrg midgard_varying_mod_perspective_w = 3, 6747ec681f3Smrg 6757ec681f3Smrg /* The result is a 64-bit cubemap descriptor to use with 6767ec681f3Smrg * midgard_tex_op_normal or midgard_tex_op_gradient */ 6777ec681f3Smrg midgard_varying_mod_cubemap = 4, 6787ec681f3Smrg} midgard_varying_modifier; 6797ec681f3Smrg 6807ec681f3Smrgtypedef struct 6817ec681f3Smrg__attribute__((__packed__)) 6827ec681f3Smrg{ 6837ec681f3Smrg midgard_varying_modifier modifier : 3; 6847ec681f3Smrg 6857ec681f3Smrg bool flat_shading : 1; 6867ec681f3Smrg 6877ec681f3Smrg /* These are ignored if flat_shading is enabled. */ 6887ec681f3Smrg bool perspective_correction : 1; 6897ec681f3Smrg bool centroid_mapping : 1; 6907ec681f3Smrg 6917ec681f3Smrg /* This is ignored if the shader only runs once per pixel. */ 6927ec681f3Smrg bool interpolate_sample : 1; 6937ec681f3Smrg 6947ec681f3Smrg bool zero0 : 1; /* Always zero */ 6957ec681f3Smrg 6967ec681f3Smrg unsigned direct_sample_pos_x : 4; 6977ec681f3Smrg unsigned direct_sample_pos_y : 4; 6987ec681f3Smrg} 6997ec681f3Smrgmidgard_varying_params; 7007ec681f3Smrg 7017ec681f3Smrg/* 8-bit register/etc selector for load/store ops */ 7027ec681f3Smrgtypedef struct 7037ec681f3Smrg__attribute__((__packed__)) 7047ec681f3Smrg{ 7057ec681f3Smrg /* Indexes into the register */ 7067ec681f3Smrg unsigned component : 2; 7077ec681f3Smrg 7087ec681f3Smrg /* Register select between r26/r27 */ 7097ec681f3Smrg unsigned select : 1; 7107ec681f3Smrg 7117ec681f3Smrg unsigned unknown : 2; 7127ec681f3Smrg 7137ec681f3Smrg /* Like any good Arm instruction set, load/store arguments can be 7147ec681f3Smrg * implicitly left-shifted... but only the second argument. Zero for no 7157ec681f3Smrg * shifting, up to <<7 possible though. This is useful for indexing. 7167ec681f3Smrg * 7177ec681f3Smrg * For the first argument, it's unknown what these bits mean */ 7187ec681f3Smrg unsigned shift : 3; 7197ec681f3Smrg} 7207ec681f3Smrgmidgard_ldst_register_select; 7217ec681f3Smrg 7227ec681f3Smrgtypedef enum { 7237ec681f3Smrg /* 0 is reserved */ 7247ec681f3Smrg midgard_index_address_u64 = 1, 7257ec681f3Smrg midgard_index_address_u32 = 2, 7267ec681f3Smrg midgard_index_address_s32 = 3, 7277ec681f3Smrg} midgard_index_address_format; 7287ec681f3Smrg 7297ec681f3Smrgtypedef struct 7307ec681f3Smrg__attribute__((__packed__)) 7317ec681f3Smrg{ 7327ec681f3Smrg midgard_load_store_op op : 8; 7337ec681f3Smrg 7347ec681f3Smrg /* Source/dest reg */ 7357ec681f3Smrg unsigned reg : 5; 7367ec681f3Smrg 7377ec681f3Smrg /* Generally is a writemask. 7387ec681f3Smrg * For ST_ATTR and ST_TEX, unused. 7397ec681f3Smrg * For other stores, each bit masks 1/4th of the output. */ 7407ec681f3Smrg unsigned mask : 4; 7417ec681f3Smrg 7427ec681f3Smrg /* Swizzle for stores, but for atomics it encodes also the source 7437ec681f3Smrg * register. This fits because atomics dont need a swizzle since they 7447ec681f3Smrg * are not vectorized instructions. */ 7457ec681f3Smrg unsigned swizzle : 8; 7467ec681f3Smrg 7477ec681f3Smrg /* Arg reg, meaning changes according to each opcode */ 7487ec681f3Smrg unsigned arg_comp : 2; 7497ec681f3Smrg unsigned arg_reg : 3; 7507ec681f3Smrg 7517ec681f3Smrg /* 64-bit address enable 7527ec681f3Smrg * 32-bit data type enable for CUBEMAP and perspective div. 7537ec681f3Smrg * Explicit indexing enable for LD_ATTR. 7547ec681f3Smrg * 64-bit coordinate enable for LD_IMAGE. */ 7557ec681f3Smrg bool bitsize_toggle : 1; 7567ec681f3Smrg 7577ec681f3Smrg /* These are mainly used for opcodes that have addresses. 7587ec681f3Smrg * For cmpxchg, index_reg is used for the comparison value. 7597ec681f3Smrg * For ops that access the attrib table, bit 1 encodes which table. 7607ec681f3Smrg * For LD_VAR and LD/ST_ATTR, bit 0 enables dest/src type inferral. */ 7617ec681f3Smrg midgard_index_address_format index_format : 2; 7627ec681f3Smrg unsigned index_comp : 2; 7637ec681f3Smrg unsigned index_reg : 3; 7647ec681f3Smrg unsigned index_shift : 4; 7657ec681f3Smrg 7667ec681f3Smrg /* Generaly is a signed offset, but has different bitsize and starts at 7677ec681f3Smrg * different bits depending on the opcode, LDST_*_DISPLACEMENT helpers 7687ec681f3Smrg * are recommended when packing/unpacking this attribute. 7697ec681f3Smrg * For LD_UBO, bit 0 enables ubo index immediate. 7707ec681f3Smrg * For LD_TILEBUFFER_RAW, bit 0 disables sample index immediate. */ 7717ec681f3Smrg int signed_offset : 18; 7727ec681f3Smrg} 7737ec681f3Smrgmidgard_load_store_word; 7747ec681f3Smrg 7757ec681f3Smrgtypedef struct 7767ec681f3Smrg__attribute__((__packed__)) 7777ec681f3Smrg{ 7787ec681f3Smrg unsigned type : 4; 7797ec681f3Smrg unsigned next_type : 4; 7807ec681f3Smrg uint64_t word1 : 60; 7817ec681f3Smrg uint64_t word2 : 60; 7827ec681f3Smrg} 7837ec681f3Smrgmidgard_load_store; 7847ec681f3Smrg 7857ec681f3Smrg/* 8-bit register selector used in texture ops to select a bias/LOD/gradient 7867ec681f3Smrg * register, shoved into the `bias` field */ 7877ec681f3Smrg 7887ec681f3Smrgtypedef struct 7897ec681f3Smrg__attribute__((__packed__)) 7907ec681f3Smrg{ 7917ec681f3Smrg /* 32-bit register, clear for half-register */ 7927ec681f3Smrg unsigned full : 1; 7937ec681f3Smrg 7947ec681f3Smrg /* Register select between r28/r29 */ 7957ec681f3Smrg unsigned select : 1; 7967ec681f3Smrg 7977ec681f3Smrg /* For a half-register, selects the upper half */ 7987ec681f3Smrg unsigned upper : 1; 7997ec681f3Smrg 8007ec681f3Smrg /* Indexes into the register */ 8017ec681f3Smrg unsigned component : 2; 8027ec681f3Smrg 8037ec681f3Smrg /* Padding to make this 8-bit */ 8047ec681f3Smrg unsigned zero : 3; 8057ec681f3Smrg} 8067ec681f3Smrgmidgard_tex_register_select; 8077ec681f3Smrg 8087ec681f3Smrg/* Texture pipeline results are in r28-r29 */ 8097ec681f3Smrg#define REG_TEX_BASE 28 8107ec681f3Smrg 8117ec681f3Smrgenum mali_texture_op { 8127ec681f3Smrg /* [texture + LOD bias] 8137ec681f3Smrg * If the texture is mipmapped, barriers must be enabled in the 8147ec681f3Smrg * instruction word in order for this opcode to compute the output 8157ec681f3Smrg * correctly. */ 8167ec681f3Smrg midgard_tex_op_normal = 1, 8177ec681f3Smrg 8187ec681f3Smrg /* [texture + gradient for LOD and anisotropy] 8197ec681f3Smrg * Unlike midgard_tex_op_normal, this opcode does not require barriers 8207ec681f3Smrg * to compute the output correctly. */ 8217ec681f3Smrg midgard_tex_op_gradient = 2, 8227ec681f3Smrg 8237ec681f3Smrg /* [unfiltered texturing] 8247ec681f3Smrg * Unlike midgard_tex_op_normal, this opcode does not require barriers 8257ec681f3Smrg * to compute the output correctly. */ 8267ec681f3Smrg midgard_tex_op_fetch = 4, 8277ec681f3Smrg 8287ec681f3Smrg /* [gradient from derivative] */ 8297ec681f3Smrg midgard_tex_op_grad_from_derivative = 9, 8307ec681f3Smrg 8317ec681f3Smrg /* [mov] */ 8327ec681f3Smrg midgard_tex_op_mov = 10, 8337ec681f3Smrg 8347ec681f3Smrg /* [noop] 8357ec681f3Smrg * Mostly used for barriers. */ 8367ec681f3Smrg midgard_tex_op_barrier = 11, 8377ec681f3Smrg 8387ec681f3Smrg /* [gradient from coords] */ 8397ec681f3Smrg midgard_tex_op_grad_from_coords = 12, 8407ec681f3Smrg 8417ec681f3Smrg /* [derivative] 8427ec681f3Smrg * Computes derivatives in 2x2 fragment blocks. */ 8437ec681f3Smrg midgard_tex_op_derivative = 13 8447ec681f3Smrg}; 8457ec681f3Smrg 8467ec681f3Smrgenum mali_sampler_type { 8477ec681f3Smrg /* 0 is reserved */ 8487ec681f3Smrg MALI_SAMPLER_FLOAT = 0x1, /* sampler */ 8497ec681f3Smrg MALI_SAMPLER_UNSIGNED = 0x2, /* usampler */ 8507ec681f3Smrg MALI_SAMPLER_SIGNED = 0x3, /* isampler */ 8517ec681f3Smrg}; 8527ec681f3Smrg 8537ec681f3Smrg/* Texture modes */ 8547ec681f3Smrgenum mali_texture_mode { 8557ec681f3Smrg TEXTURE_NORMAL = 1, 8567ec681f3Smrg TEXTURE_SHADOW = 5, 8577ec681f3Smrg TEXTURE_GATHER_SHADOW = 6, 8587ec681f3Smrg TEXTURE_GATHER_X = 8, 8597ec681f3Smrg TEXTURE_GATHER_Y = 9, 8607ec681f3Smrg TEXTURE_GATHER_Z = 10, 8617ec681f3Smrg TEXTURE_GATHER_W = 11, 8627ec681f3Smrg}; 8637ec681f3Smrg 8647ec681f3Smrgenum mali_derivative_mode { 8657ec681f3Smrg TEXTURE_DFDX = 0, 8667ec681f3Smrg TEXTURE_DFDY = 1, 8677ec681f3Smrg}; 8687ec681f3Smrg 8697ec681f3Smrgtypedef struct 8707ec681f3Smrg__attribute__((__packed__)) 8717ec681f3Smrg{ 8727ec681f3Smrg unsigned type : 4; 8737ec681f3Smrg unsigned next_type : 4; 8747ec681f3Smrg 8757ec681f3Smrg enum mali_texture_op op : 4; 8767ec681f3Smrg unsigned mode : 4; 8777ec681f3Smrg 8787ec681f3Smrg /* A little obscure, but last is set for the last texture operation in 8797ec681f3Smrg * a shader. cont appears to just be last's opposite (?). Yeah, I know, 8807ec681f3Smrg * kind of funky.. BiOpen thinks it could do with memory hinting, or 8817ec681f3Smrg * tile locking? */ 8827ec681f3Smrg 8837ec681f3Smrg unsigned cont : 1; 8847ec681f3Smrg unsigned last : 1; 8857ec681f3Smrg 8867ec681f3Smrg unsigned format : 2; 8877ec681f3Smrg 8887ec681f3Smrg /* Are sampler_handle/texture_handler respectively set by registers? If 8897ec681f3Smrg * true, the lower 8-bits of the respective field is a register word. 8907ec681f3Smrg * If false, they are an immediate */ 8917ec681f3Smrg 8927ec681f3Smrg unsigned sampler_register : 1; 8937ec681f3Smrg unsigned texture_register : 1; 8947ec681f3Smrg 8957ec681f3Smrg /* Is a register used to specify the 8967ec681f3Smrg * LOD/bias/offset? If set, use the `bias` field as 8977ec681f3Smrg * a register index. If clear, use the `bias` field 8987ec681f3Smrg * as an immediate. */ 8997ec681f3Smrg unsigned lod_register : 1; 9007ec681f3Smrg 9017ec681f3Smrg /* Is a register used to specify an offset? If set, use the 9027ec681f3Smrg * offset_reg_* fields to encode this, duplicated for each of the 9037ec681f3Smrg * components. If clear, there is implcitly always an immediate offst 9047ec681f3Smrg * specificed in offset_imm_* */ 9057ec681f3Smrg unsigned offset_register : 1; 9067ec681f3Smrg 9077ec681f3Smrg unsigned in_reg_full : 1; 9087ec681f3Smrg unsigned in_reg_select : 1; 9097ec681f3Smrg unsigned in_reg_upper : 1; 9107ec681f3Smrg unsigned in_reg_swizzle : 8; 9117ec681f3Smrg 9127ec681f3Smrg unsigned unknown8 : 2; 9137ec681f3Smrg 9147ec681f3Smrg unsigned out_full : 1; 9157ec681f3Smrg 9167ec681f3Smrg enum mali_sampler_type sampler_type : 2; 9177ec681f3Smrg 9187ec681f3Smrg unsigned out_reg_select : 1; 9197ec681f3Smrg unsigned out_upper : 1; 9207ec681f3Smrg 9217ec681f3Smrg unsigned mask : 4; 9227ec681f3Smrg 9237ec681f3Smrg /* Intriguingly, textures can take an outmod just like alu ops. Int 9247ec681f3Smrg * outmods are not supported as far as I can tell, so this is only 9257ec681f3Smrg * meaningful for float samplers */ 9267ec681f3Smrg midgard_outmod_float outmod : 2; 9277ec681f3Smrg 9287ec681f3Smrg unsigned swizzle : 8; 9297ec681f3Smrg 9307ec681f3Smrg /* These indicate how many bundles after this texture op may be 9317ec681f3Smrg * executed in parallel with this op. We may execute only ALU and 9327ec681f3Smrg * ld/st in parallel (not other textures), and obviously there cannot 9337ec681f3Smrg * be any dependency (the blob appears to forbid even accessing other 9347ec681f3Smrg * channels of a given texture register). */ 9357ec681f3Smrg 9367ec681f3Smrg unsigned out_of_order : 2; 9377ec681f3Smrg unsigned unknown4 : 10; 9387ec681f3Smrg 9397ec681f3Smrg /* In immediate mode, each offset field is an immediate range [0, 7]. 9407ec681f3Smrg * 9417ec681f3Smrg * In register mode, offset_x becomes a register (full, select, upper) 9427ec681f3Smrg * triplet followed by a vec3 swizzle is splattered across 9437ec681f3Smrg * offset_y/offset_z in a genuinely bizarre way. 9447ec681f3Smrg * 9457ec681f3Smrg * For texel fetches in immediate mode, the range is the full [-8, 7], 9467ec681f3Smrg * but for normal texturing the top bit must be zero and a register 9477ec681f3Smrg * used instead. It's not clear where this limitation is from. 9487ec681f3Smrg * 9497ec681f3Smrg * union { 9507ec681f3Smrg * struct { 9517ec681f3Smrg * signed offset_x : 4; 9527ec681f3Smrg * signed offset_y : 4; 9537ec681f3Smrg * signed offset_z : 4; 9547ec681f3Smrg * } immediate; 9557ec681f3Smrg * struct { 9567ec681f3Smrg * bool full : 1; 9577ec681f3Smrg * bool select : 1; 9587ec681f3Smrg * bool upper : 1; 9597ec681f3Smrg * unsigned swizzle : 8; 9607ec681f3Smrg * unsigned zero : 1; 9617ec681f3Smrg * } register; 9627ec681f3Smrg * } 9637ec681f3Smrg */ 9647ec681f3Smrg 9657ec681f3Smrg unsigned offset : 12; 9667ec681f3Smrg 9677ec681f3Smrg /* In immediate bias mode, for a normal texture op, this is 9687ec681f3Smrg * texture bias, computed as int(2^8 * frac(biasf)), with 9697ec681f3Smrg * bias_int = floor(bias). For a textureLod, it's that, but 9707ec681f3Smrg * s/bias/lod. For a texel fetch, this is the LOD as-is. 9717ec681f3Smrg * 9727ec681f3Smrg * In register mode, this is a midgard_tex_register_select 9737ec681f3Smrg * structure and bias_int is zero */ 9747ec681f3Smrg 9757ec681f3Smrg unsigned bias : 8; 9767ec681f3Smrg signed bias_int : 8; 9777ec681f3Smrg 9787ec681f3Smrg /* If sampler/texture_register is set, the bottom 8-bits are 9797ec681f3Smrg * midgard_tex_register_select and the top 8-bits are zero. If they are 9807ec681f3Smrg * clear, they are immediate texture indices */ 9817ec681f3Smrg 9827ec681f3Smrg unsigned sampler_handle : 16; 9837ec681f3Smrg unsigned texture_handle : 16; 9847ec681f3Smrg} 9857ec681f3Smrgmidgard_texture_word; 9867ec681f3Smrg 9877ec681f3Smrg/* Technically barriers are texture instructions but it's less work to add them 9887ec681f3Smrg * as an explicitly zeroed special case, since most fields are forced to go to 9897ec681f3Smrg * zero */ 9907ec681f3Smrg 9917ec681f3Smrgtypedef struct 9927ec681f3Smrg__attribute__((__packed__)) 9937ec681f3Smrg{ 9947ec681f3Smrg unsigned type : 4; 9957ec681f3Smrg unsigned next_type : 4; 9967ec681f3Smrg 9977ec681f3Smrg /* op = TEXTURE_OP_BARRIER */ 9987ec681f3Smrg unsigned op : 6; 9997ec681f3Smrg unsigned zero1 : 2; 10007ec681f3Smrg 10017ec681f3Smrg /* Since helper invocations don't make any sense, these are forced to one */ 10027ec681f3Smrg unsigned cont : 1; 10037ec681f3Smrg unsigned last : 1; 10047ec681f3Smrg unsigned zero2 : 14; 10057ec681f3Smrg 10067ec681f3Smrg unsigned zero3 : 24; 10077ec681f3Smrg unsigned out_of_order : 4; 10087ec681f3Smrg unsigned zero4 : 4; 10097ec681f3Smrg 10107ec681f3Smrg uint64_t zero5; 10117ec681f3Smrg} midgard_texture_barrier_word; 10127ec681f3Smrg 10137ec681f3Smrgtypedef union midgard_constants { 10147ec681f3Smrg double f64[2]; 10157ec681f3Smrg uint64_t u64[2]; 10167ec681f3Smrg int64_t i64[2]; 10177ec681f3Smrg float f32[4]; 10187ec681f3Smrg uint32_t u32[4]; 10197ec681f3Smrg int32_t i32[4]; 10207ec681f3Smrg uint16_t f16[8]; 10217ec681f3Smrg uint16_t u16[8]; 10227ec681f3Smrg int16_t i16[8]; 10237ec681f3Smrg uint8_t u8[16]; 10247ec681f3Smrg int8_t i8[16]; 10257ec681f3Smrg} 10267ec681f3Smrgmidgard_constants; 10277ec681f3Smrg 10287ec681f3Smrgenum midgard_roundmode { 10297ec681f3Smrg MIDGARD_RTE = 0x0, /* round to even */ 10307ec681f3Smrg MIDGARD_RTZ = 0x1, /* round to zero */ 10317ec681f3Smrg MIDGARD_RTN = 0x2, /* round to negative */ 10327ec681f3Smrg MIDGARD_RTP = 0x3, /* round to positive */ 10337ec681f3Smrg}; 10347ec681f3Smrg 10357ec681f3Smrg#endif 1036