101e04c3fSmrg/* 201e04c3fSmrg Copyright (C) Intel Corp. 2006. All Rights Reserved. 301e04c3fSmrg Intel funded Tungsten Graphics to 401e04c3fSmrg develop this 3D driver. 501e04c3fSmrg 601e04c3fSmrg Permission is hereby granted, free of charge, to any person obtaining 701e04c3fSmrg a copy of this software and associated documentation files (the 801e04c3fSmrg "Software"), to deal in the Software without restriction, including 901e04c3fSmrg without limitation the rights to use, copy, modify, merge, publish, 1001e04c3fSmrg distribute, sublicense, and/or sell copies of the Software, and to 1101e04c3fSmrg permit persons to whom the Software is furnished to do so, subject to 1201e04c3fSmrg the following conditions: 1301e04c3fSmrg 1401e04c3fSmrg The above copyright notice and this permission notice (including the 1501e04c3fSmrg next paragraph) shall be included in all copies or substantial 1601e04c3fSmrg portions of the Software. 1701e04c3fSmrg 1801e04c3fSmrg THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, 1901e04c3fSmrg EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF 2001e04c3fSmrg MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. 2101e04c3fSmrg IN NO EVENT SHALL THE COPYRIGHT OWNER(S) AND/OR ITS SUPPLIERS BE 2201e04c3fSmrg LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION 2301e04c3fSmrg OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION 2401e04c3fSmrg WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE. 2501e04c3fSmrg 2601e04c3fSmrg **********************************************************************/ 2701e04c3fSmrg /* 2801e04c3fSmrg * Authors: 2901e04c3fSmrg * Keith Whitwell <keithw@vmware.com> 3001e04c3fSmrg */ 3101e04c3fSmrg 3201e04c3fSmrg#ifndef BRW_EU_DEFINES_H 3301e04c3fSmrg#define BRW_EU_DEFINES_H 3401e04c3fSmrg 357ec681f3Smrg#include <stdint.h> 367ec681f3Smrg#include <stdlib.h> 3701e04c3fSmrg#include "util/macros.h" 387ec681f3Smrg#include "dev/intel_device_info.h" 3901e04c3fSmrg 4001e04c3fSmrg/* The following hunk, up-to "Execution Unit" is used by both the 4101e04c3fSmrg * intel/compiler and i965 codebase. */ 4201e04c3fSmrg 4301e04c3fSmrg#define INTEL_MASK(high, low) (((1u<<((high)-(low)+1))-1)<<(low)) 4401e04c3fSmrg/* Using the GNU statement expression extension */ 4501e04c3fSmrg#define SET_FIELD(value, field) \ 4601e04c3fSmrg ({ \ 479f464c52Smaya uint32_t fieldval = (uint32_t)(value) << field ## _SHIFT; \ 4801e04c3fSmrg assert((fieldval & ~ field ## _MASK) == 0); \ 4901e04c3fSmrg fieldval & field ## _MASK; \ 5001e04c3fSmrg }) 5101e04c3fSmrg 5201e04c3fSmrg#define SET_BITS(value, high, low) \ 5301e04c3fSmrg ({ \ 549f464c52Smaya const uint32_t fieldval = (uint32_t)(value) << (low); \ 5501e04c3fSmrg assert((fieldval & ~INTEL_MASK(high, low)) == 0); \ 5601e04c3fSmrg fieldval & INTEL_MASK(high, low); \ 5701e04c3fSmrg }) 5801e04c3fSmrg 5901e04c3fSmrg#define GET_BITS(data, high, low) ((data & INTEL_MASK((high), (low))) >> (low)) 6001e04c3fSmrg#define GET_FIELD(word, field) (((word) & field ## _MASK) >> field ## _SHIFT) 6101e04c3fSmrg 6201e04c3fSmrg#define _3DPRIM_POINTLIST 0x01 6301e04c3fSmrg#define _3DPRIM_LINELIST 0x02 6401e04c3fSmrg#define _3DPRIM_LINESTRIP 0x03 6501e04c3fSmrg#define _3DPRIM_TRILIST 0x04 6601e04c3fSmrg#define _3DPRIM_TRISTRIP 0x05 6701e04c3fSmrg#define _3DPRIM_TRIFAN 0x06 6801e04c3fSmrg#define _3DPRIM_QUADLIST 0x07 6901e04c3fSmrg#define _3DPRIM_QUADSTRIP 0x08 7001e04c3fSmrg#define _3DPRIM_LINELIST_ADJ 0x09 /* G45+ */ 7101e04c3fSmrg#define _3DPRIM_LINESTRIP_ADJ 0x0A /* G45+ */ 7201e04c3fSmrg#define _3DPRIM_TRILIST_ADJ 0x0B /* G45+ */ 7301e04c3fSmrg#define _3DPRIM_TRISTRIP_ADJ 0x0C /* G45+ */ 7401e04c3fSmrg#define _3DPRIM_TRISTRIP_REVERSE 0x0D 7501e04c3fSmrg#define _3DPRIM_POLYGON 0x0E 7601e04c3fSmrg#define _3DPRIM_RECTLIST 0x0F 7701e04c3fSmrg#define _3DPRIM_LINELOOP 0x10 7801e04c3fSmrg#define _3DPRIM_POINTLIST_BF 0x11 7901e04c3fSmrg#define _3DPRIM_LINESTRIP_CONT 0x12 8001e04c3fSmrg#define _3DPRIM_LINESTRIP_BF 0x13 8101e04c3fSmrg#define _3DPRIM_LINESTRIP_CONT_BF 0x14 8201e04c3fSmrg#define _3DPRIM_TRIFAN_NOSTIPPLE 0x16 8301e04c3fSmrg#define _3DPRIM_PATCHLIST(n) ({ assert(n > 0 && n <= 32); 0x20 + (n - 1); }) 8401e04c3fSmrg 8501e04c3fSmrg/* Bitfields for the URB_WRITE message, DW2 of message header: */ 8601e04c3fSmrg#define URB_WRITE_PRIM_END 0x1 8701e04c3fSmrg#define URB_WRITE_PRIM_START 0x2 8801e04c3fSmrg#define URB_WRITE_PRIM_TYPE_SHIFT 2 8901e04c3fSmrg 9001e04c3fSmrg#define BRW_SPRITE_POINT_ENABLE 16 9101e04c3fSmrg 927ec681f3Smrg# define GFX7_GS_CONTROL_DATA_FORMAT_GSCTL_CUT 0 937ec681f3Smrg# define GFX7_GS_CONTROL_DATA_FORMAT_GSCTL_SID 1 9401e04c3fSmrg 9501e04c3fSmrg/* Execution Unit (EU) defines 9601e04c3fSmrg */ 9701e04c3fSmrg 9801e04c3fSmrg#define BRW_ALIGN_1 0 9901e04c3fSmrg#define BRW_ALIGN_16 1 10001e04c3fSmrg 10101e04c3fSmrg#define BRW_ADDRESS_DIRECT 0 10201e04c3fSmrg#define BRW_ADDRESS_REGISTER_INDIRECT_REGISTER 1 10301e04c3fSmrg 10401e04c3fSmrg#define BRW_CHANNEL_X 0 10501e04c3fSmrg#define BRW_CHANNEL_Y 1 10601e04c3fSmrg#define BRW_CHANNEL_Z 2 10701e04c3fSmrg#define BRW_CHANNEL_W 3 10801e04c3fSmrg 10901e04c3fSmrgenum brw_compression { 11001e04c3fSmrg BRW_COMPRESSION_NONE = 0, 11101e04c3fSmrg BRW_COMPRESSION_2NDHALF = 1, 11201e04c3fSmrg BRW_COMPRESSION_COMPRESSED = 2, 11301e04c3fSmrg}; 11401e04c3fSmrg 1157ec681f3Smrg#define GFX6_COMPRESSION_1Q 0 1167ec681f3Smrg#define GFX6_COMPRESSION_2Q 1 1177ec681f3Smrg#define GFX6_COMPRESSION_3Q 2 1187ec681f3Smrg#define GFX6_COMPRESSION_4Q 3 1197ec681f3Smrg#define GFX6_COMPRESSION_1H 0 1207ec681f3Smrg#define GFX6_COMPRESSION_2H 2 12101e04c3fSmrg 12201e04c3fSmrgenum PACKED brw_conditional_mod { 12301e04c3fSmrg BRW_CONDITIONAL_NONE = 0, 12401e04c3fSmrg BRW_CONDITIONAL_Z = 1, 12501e04c3fSmrg BRW_CONDITIONAL_NZ = 2, 12601e04c3fSmrg BRW_CONDITIONAL_EQ = 1, /* Z */ 12701e04c3fSmrg BRW_CONDITIONAL_NEQ = 2, /* NZ */ 12801e04c3fSmrg BRW_CONDITIONAL_G = 3, 12901e04c3fSmrg BRW_CONDITIONAL_GE = 4, 13001e04c3fSmrg BRW_CONDITIONAL_L = 5, 13101e04c3fSmrg BRW_CONDITIONAL_LE = 6, 13201e04c3fSmrg BRW_CONDITIONAL_R = 7, /* Gen <= 5 */ 13301e04c3fSmrg BRW_CONDITIONAL_O = 8, 13401e04c3fSmrg BRW_CONDITIONAL_U = 9, 13501e04c3fSmrg}; 13601e04c3fSmrg 13701e04c3fSmrg#define BRW_DEBUG_NONE 0 13801e04c3fSmrg#define BRW_DEBUG_BREAKPOINT 1 13901e04c3fSmrg 14001e04c3fSmrg#define BRW_DEPENDENCY_NORMAL 0 14101e04c3fSmrg#define BRW_DEPENDENCY_NOTCLEARED 1 14201e04c3fSmrg#define BRW_DEPENDENCY_NOTCHECKED 2 14301e04c3fSmrg#define BRW_DEPENDENCY_DISABLE 3 14401e04c3fSmrg 14501e04c3fSmrgenum PACKED brw_execution_size { 14601e04c3fSmrg BRW_EXECUTE_1 = 0, 14701e04c3fSmrg BRW_EXECUTE_2 = 1, 14801e04c3fSmrg BRW_EXECUTE_4 = 2, 14901e04c3fSmrg BRW_EXECUTE_8 = 3, 15001e04c3fSmrg BRW_EXECUTE_16 = 4, 15101e04c3fSmrg BRW_EXECUTE_32 = 5, 15201e04c3fSmrg}; 15301e04c3fSmrg 15401e04c3fSmrgenum PACKED brw_horizontal_stride { 15501e04c3fSmrg BRW_HORIZONTAL_STRIDE_0 = 0, 15601e04c3fSmrg BRW_HORIZONTAL_STRIDE_1 = 1, 15701e04c3fSmrg BRW_HORIZONTAL_STRIDE_2 = 2, 15801e04c3fSmrg BRW_HORIZONTAL_STRIDE_4 = 3, 15901e04c3fSmrg}; 16001e04c3fSmrg 1617ec681f3Smrgenum PACKED gfx10_align1_3src_src_horizontal_stride { 16201e04c3fSmrg BRW_ALIGN1_3SRC_SRC_HORIZONTAL_STRIDE_0 = 0, 16301e04c3fSmrg BRW_ALIGN1_3SRC_SRC_HORIZONTAL_STRIDE_1 = 1, 16401e04c3fSmrg BRW_ALIGN1_3SRC_SRC_HORIZONTAL_STRIDE_2 = 2, 16501e04c3fSmrg BRW_ALIGN1_3SRC_SRC_HORIZONTAL_STRIDE_4 = 3, 16601e04c3fSmrg}; 16701e04c3fSmrg 1687ec681f3Smrgenum PACKED gfx10_align1_3src_dst_horizontal_stride { 16901e04c3fSmrg BRW_ALIGN1_3SRC_DST_HORIZONTAL_STRIDE_1 = 0, 17001e04c3fSmrg BRW_ALIGN1_3SRC_DST_HORIZONTAL_STRIDE_2 = 1, 17101e04c3fSmrg}; 17201e04c3fSmrg 17301e04c3fSmrg#define BRW_INSTRUCTION_NORMAL 0 17401e04c3fSmrg#define BRW_INSTRUCTION_SATURATE 1 17501e04c3fSmrg 17601e04c3fSmrg#define BRW_MASK_ENABLE 0 17701e04c3fSmrg#define BRW_MASK_DISABLE 1 17801e04c3fSmrg 17901e04c3fSmrg/** @{ 18001e04c3fSmrg * 1817ec681f3Smrg * Gfx6 has replaced "mask enable/disable" with WECtrl, which is 18201e04c3fSmrg * effectively the same but much simpler to think about. Now, there 18301e04c3fSmrg * are two contributors ANDed together to whether channels are 18401e04c3fSmrg * executed: The predication on the instruction, and the channel write 18501e04c3fSmrg * enable. 18601e04c3fSmrg */ 18701e04c3fSmrg/** 18801e04c3fSmrg * This is the default value. It means that a channel's write enable is set 18901e04c3fSmrg * if the per-channel IP is pointing at this instruction. 19001e04c3fSmrg */ 19101e04c3fSmrg#define BRW_WE_NORMAL 0 19201e04c3fSmrg/** 19301e04c3fSmrg * This is used like BRW_MASK_DISABLE, and causes all channels to have 19401e04c3fSmrg * their write enable set. Note that predication still contributes to 19501e04c3fSmrg * whether the channel actually gets written. 19601e04c3fSmrg */ 19701e04c3fSmrg#define BRW_WE_ALL 1 19801e04c3fSmrg/** @} */ 19901e04c3fSmrg 20001e04c3fSmrgenum opcode { 2017ec681f3Smrg /* These are the actual hardware instructions. */ 2027ec681f3Smrg BRW_OPCODE_ILLEGAL, 2037ec681f3Smrg BRW_OPCODE_SYNC, 2047ec681f3Smrg BRW_OPCODE_MOV, 2057ec681f3Smrg BRW_OPCODE_SEL, 2067ec681f3Smrg BRW_OPCODE_MOVI, /**< G45+ */ 2077ec681f3Smrg BRW_OPCODE_NOT, 2087ec681f3Smrg BRW_OPCODE_AND, 2097ec681f3Smrg BRW_OPCODE_OR, 2107ec681f3Smrg BRW_OPCODE_XOR, 2117ec681f3Smrg BRW_OPCODE_SHR, 2127ec681f3Smrg BRW_OPCODE_SHL, 2137ec681f3Smrg BRW_OPCODE_DIM, /**< Gfx7.5 only */ 2147ec681f3Smrg BRW_OPCODE_SMOV, /**< Gfx8+ */ 2157ec681f3Smrg BRW_OPCODE_ASR, 2167ec681f3Smrg BRW_OPCODE_ROR, /**< Gfx11+ */ 2177ec681f3Smrg BRW_OPCODE_ROL, /**< Gfx11+ */ 2187ec681f3Smrg BRW_OPCODE_CMP, 2197ec681f3Smrg BRW_OPCODE_CMPN, 2207ec681f3Smrg BRW_OPCODE_CSEL, /**< Gfx8+ */ 2217ec681f3Smrg BRW_OPCODE_F32TO16, /**< Gfx7 only */ 2227ec681f3Smrg BRW_OPCODE_F16TO32, /**< Gfx7 only */ 2237ec681f3Smrg BRW_OPCODE_BFREV, /**< Gfx7+ */ 2247ec681f3Smrg BRW_OPCODE_BFE, /**< Gfx7+ */ 2257ec681f3Smrg BRW_OPCODE_BFI1, /**< Gfx7+ */ 2267ec681f3Smrg BRW_OPCODE_BFI2, /**< Gfx7+ */ 2277ec681f3Smrg BRW_OPCODE_JMPI, 2287ec681f3Smrg BRW_OPCODE_BRD, /**< Gfx7+ */ 2297ec681f3Smrg BRW_OPCODE_IF, 2307ec681f3Smrg BRW_OPCODE_IFF, /**< Pre-Gfx6 */ 2317ec681f3Smrg BRW_OPCODE_BRC, /**< Gfx7+ */ 2327ec681f3Smrg BRW_OPCODE_ELSE, 2337ec681f3Smrg BRW_OPCODE_ENDIF, 2347ec681f3Smrg BRW_OPCODE_DO, /**< Pre-Gfx6 */ 2357ec681f3Smrg BRW_OPCODE_CASE, /**< Gfx6 only */ 2367ec681f3Smrg BRW_OPCODE_WHILE, 2377ec681f3Smrg BRW_OPCODE_BREAK, 2387ec681f3Smrg BRW_OPCODE_CONTINUE, 2397ec681f3Smrg BRW_OPCODE_HALT, 2407ec681f3Smrg BRW_OPCODE_CALLA, /**< Gfx7.5+ */ 2417ec681f3Smrg BRW_OPCODE_MSAVE, /**< Pre-Gfx6 */ 2427ec681f3Smrg BRW_OPCODE_CALL, /**< Gfx6+ */ 2437ec681f3Smrg BRW_OPCODE_MREST, /**< Pre-Gfx6 */ 2447ec681f3Smrg BRW_OPCODE_RET, /**< Gfx6+ */ 2457ec681f3Smrg BRW_OPCODE_PUSH, /**< Pre-Gfx6 */ 2467ec681f3Smrg BRW_OPCODE_FORK, /**< Gfx6 only */ 2477ec681f3Smrg BRW_OPCODE_GOTO, /**< Gfx8+ */ 2487ec681f3Smrg BRW_OPCODE_POP, /**< Pre-Gfx6 */ 2497ec681f3Smrg BRW_OPCODE_WAIT, 2507ec681f3Smrg BRW_OPCODE_SEND, 2517ec681f3Smrg BRW_OPCODE_SENDC, 2527ec681f3Smrg BRW_OPCODE_SENDS, /**< Gfx9+ */ 2537ec681f3Smrg BRW_OPCODE_SENDSC, /**< Gfx9+ */ 2547ec681f3Smrg BRW_OPCODE_MATH, /**< Gfx6+ */ 2557ec681f3Smrg BRW_OPCODE_ADD, 2567ec681f3Smrg BRW_OPCODE_MUL, 2577ec681f3Smrg BRW_OPCODE_AVG, 2587ec681f3Smrg BRW_OPCODE_FRC, 2597ec681f3Smrg BRW_OPCODE_RNDU, 2607ec681f3Smrg BRW_OPCODE_RNDD, 2617ec681f3Smrg BRW_OPCODE_RNDE, 2627ec681f3Smrg BRW_OPCODE_RNDZ, 2637ec681f3Smrg BRW_OPCODE_MAC, 2647ec681f3Smrg BRW_OPCODE_MACH, 2657ec681f3Smrg BRW_OPCODE_LZD, 2667ec681f3Smrg BRW_OPCODE_FBH, /**< Gfx7+ */ 2677ec681f3Smrg BRW_OPCODE_FBL, /**< Gfx7+ */ 2687ec681f3Smrg BRW_OPCODE_CBIT, /**< Gfx7+ */ 2697ec681f3Smrg BRW_OPCODE_ADDC, /**< Gfx7+ */ 2707ec681f3Smrg BRW_OPCODE_SUBB, /**< Gfx7+ */ 2717ec681f3Smrg BRW_OPCODE_SAD2, 2727ec681f3Smrg BRW_OPCODE_SADA2, 2737ec681f3Smrg BRW_OPCODE_ADD3, /* Gen12+ only */ 2747ec681f3Smrg BRW_OPCODE_DP4, 2757ec681f3Smrg BRW_OPCODE_DPH, 2767ec681f3Smrg BRW_OPCODE_DP3, 2777ec681f3Smrg BRW_OPCODE_DP2, 2787ec681f3Smrg BRW_OPCODE_DP4A, /**< Gfx12+ */ 2797ec681f3Smrg BRW_OPCODE_LINE, 2807ec681f3Smrg BRW_OPCODE_PLN, /**< G45+ */ 2817ec681f3Smrg BRW_OPCODE_MAD, /**< Gfx6+ */ 2827ec681f3Smrg BRW_OPCODE_LRP, /**< Gfx6+ */ 2837ec681f3Smrg BRW_OPCODE_MADM, /**< Gfx8+ */ 2847ec681f3Smrg BRW_OPCODE_NENOP, /**< G45 only */ 2857ec681f3Smrg BRW_OPCODE_NOP, 2867ec681f3Smrg 2877ec681f3Smrg NUM_BRW_OPCODES, 28801e04c3fSmrg 28901e04c3fSmrg /* These are compiler backend opcodes that get translated into other 29001e04c3fSmrg * instructions. 29101e04c3fSmrg */ 2927ec681f3Smrg FS_OPCODE_FB_WRITE = NUM_BRW_OPCODES, 29301e04c3fSmrg 29401e04c3fSmrg /** 29501e04c3fSmrg * Same as FS_OPCODE_FB_WRITE but expects its arguments separately as 29601e04c3fSmrg * individual sources instead of as a single payload blob. The 29701e04c3fSmrg * position/ordering of the arguments are defined by the enum 29801e04c3fSmrg * fb_write_logical_srcs. 29901e04c3fSmrg */ 30001e04c3fSmrg FS_OPCODE_FB_WRITE_LOGICAL, 30101e04c3fSmrg 30201e04c3fSmrg FS_OPCODE_REP_FB_WRITE, 30301e04c3fSmrg 30401e04c3fSmrg FS_OPCODE_FB_READ, 30501e04c3fSmrg FS_OPCODE_FB_READ_LOGICAL, 30601e04c3fSmrg 30701e04c3fSmrg SHADER_OPCODE_RCP, 30801e04c3fSmrg SHADER_OPCODE_RSQ, 30901e04c3fSmrg SHADER_OPCODE_SQRT, 31001e04c3fSmrg SHADER_OPCODE_EXP2, 31101e04c3fSmrg SHADER_OPCODE_LOG2, 31201e04c3fSmrg SHADER_OPCODE_POW, 31301e04c3fSmrg SHADER_OPCODE_INT_QUOTIENT, 31401e04c3fSmrg SHADER_OPCODE_INT_REMAINDER, 31501e04c3fSmrg SHADER_OPCODE_SIN, 31601e04c3fSmrg SHADER_OPCODE_COS, 31701e04c3fSmrg 3189f464c52Smaya /** 3199f464c52Smaya * A generic "send" opcode. The first two sources are the message 3209f464c52Smaya * descriptor and extended message descriptor respectively. The third 3219f464c52Smaya * and optional fourth sources are the message payload 3229f464c52Smaya */ 3239f464c52Smaya SHADER_OPCODE_SEND, 3249f464c52Smaya 3257ec681f3Smrg /** 3267ec681f3Smrg * An "undefined" write which does nothing but indicates to liveness that 3277ec681f3Smrg * we don't care about any values in the register which predate this 3287ec681f3Smrg * instruction. Used to prevent partial writes from causing issues with 3297ec681f3Smrg * live ranges. 3307ec681f3Smrg */ 3317ec681f3Smrg SHADER_OPCODE_UNDEF, 3327ec681f3Smrg 33301e04c3fSmrg /** 33401e04c3fSmrg * Texture sampling opcodes. 33501e04c3fSmrg * 33601e04c3fSmrg * LOGICAL opcodes are eventually translated to the matching non-LOGICAL 33701e04c3fSmrg * opcode but instead of taking a single payload blob they expect their 33801e04c3fSmrg * arguments separately as individual sources. The position/ordering of the 33901e04c3fSmrg * arguments are defined by the enum tex_logical_srcs. 34001e04c3fSmrg */ 34101e04c3fSmrg SHADER_OPCODE_TEX, 34201e04c3fSmrg SHADER_OPCODE_TEX_LOGICAL, 34301e04c3fSmrg SHADER_OPCODE_TXD, 34401e04c3fSmrg SHADER_OPCODE_TXD_LOGICAL, 34501e04c3fSmrg SHADER_OPCODE_TXF, 34601e04c3fSmrg SHADER_OPCODE_TXF_LOGICAL, 34701e04c3fSmrg SHADER_OPCODE_TXF_LZ, 34801e04c3fSmrg SHADER_OPCODE_TXL, 34901e04c3fSmrg SHADER_OPCODE_TXL_LOGICAL, 35001e04c3fSmrg SHADER_OPCODE_TXL_LZ, 35101e04c3fSmrg SHADER_OPCODE_TXS, 35201e04c3fSmrg SHADER_OPCODE_TXS_LOGICAL, 35301e04c3fSmrg FS_OPCODE_TXB, 35401e04c3fSmrg FS_OPCODE_TXB_LOGICAL, 35501e04c3fSmrg SHADER_OPCODE_TXF_CMS, 35601e04c3fSmrg SHADER_OPCODE_TXF_CMS_LOGICAL, 35701e04c3fSmrg SHADER_OPCODE_TXF_CMS_W, 35801e04c3fSmrg SHADER_OPCODE_TXF_CMS_W_LOGICAL, 35901e04c3fSmrg SHADER_OPCODE_TXF_UMS, 36001e04c3fSmrg SHADER_OPCODE_TXF_UMS_LOGICAL, 36101e04c3fSmrg SHADER_OPCODE_TXF_MCS, 36201e04c3fSmrg SHADER_OPCODE_TXF_MCS_LOGICAL, 36301e04c3fSmrg SHADER_OPCODE_LOD, 36401e04c3fSmrg SHADER_OPCODE_LOD_LOGICAL, 36501e04c3fSmrg SHADER_OPCODE_TG4, 36601e04c3fSmrg SHADER_OPCODE_TG4_LOGICAL, 36701e04c3fSmrg SHADER_OPCODE_TG4_OFFSET, 36801e04c3fSmrg SHADER_OPCODE_TG4_OFFSET_LOGICAL, 36901e04c3fSmrg SHADER_OPCODE_SAMPLEINFO, 37001e04c3fSmrg SHADER_OPCODE_SAMPLEINFO_LOGICAL, 37101e04c3fSmrg 3729f464c52Smaya SHADER_OPCODE_IMAGE_SIZE_LOGICAL, 37301e04c3fSmrg 37401e04c3fSmrg /** 37501e04c3fSmrg * Combines multiple sources of size 1 into a larger virtual GRF. 37601e04c3fSmrg * For example, parameters for a send-from-GRF message. Or, updating 37701e04c3fSmrg * channels of a size 4 VGRF used to store vec4s such as texturing results. 37801e04c3fSmrg * 37901e04c3fSmrg * This will be lowered into MOVs from each source to consecutive offsets 38001e04c3fSmrg * of the destination VGRF. 38101e04c3fSmrg * 38201e04c3fSmrg * src[0] may be BAD_FILE. If so, the lowering pass skips emitting the MOV, 38301e04c3fSmrg * but still reserves the first channel of the destination VGRF. This can be 38401e04c3fSmrg * used to reserve space for, say, a message header set up by the generators. 38501e04c3fSmrg */ 38601e04c3fSmrg SHADER_OPCODE_LOAD_PAYLOAD, 38701e04c3fSmrg 38801e04c3fSmrg /** 38901e04c3fSmrg * Packs a number of sources into a single value. Unlike LOAD_PAYLOAD, this 39001e04c3fSmrg * acts intra-channel, obtaining the final value for each channel by 39101e04c3fSmrg * combining the sources values for the same channel, the first source 39201e04c3fSmrg * occupying the lowest bits and the last source occupying the highest 39301e04c3fSmrg * bits. 39401e04c3fSmrg */ 39501e04c3fSmrg FS_OPCODE_PACK, 39601e04c3fSmrg 39701e04c3fSmrg SHADER_OPCODE_SHADER_TIME_ADD, 39801e04c3fSmrg 39901e04c3fSmrg /** 40001e04c3fSmrg * Typed and untyped surface access opcodes. 40101e04c3fSmrg * 40201e04c3fSmrg * LOGICAL opcodes are eventually translated to the matching non-LOGICAL 40301e04c3fSmrg * opcode but instead of taking a single payload blob they expect their 40401e04c3fSmrg * arguments separately as individual sources: 40501e04c3fSmrg * 40601e04c3fSmrg * Source 0: [required] Surface coordinates. 40701e04c3fSmrg * Source 1: [optional] Operation source. 40801e04c3fSmrg * Source 2: [required] Surface index. 40901e04c3fSmrg * Source 3: [required] Number of coordinate components (as UD immediate). 41001e04c3fSmrg * Source 4: [required] Opcode-specific control immediate, same as source 2 41101e04c3fSmrg * of the matching non-LOGICAL opcode. 41201e04c3fSmrg */ 4139f464c52Smaya VEC4_OPCODE_UNTYPED_ATOMIC, 41401e04c3fSmrg SHADER_OPCODE_UNTYPED_ATOMIC_LOGICAL, 41501e04c3fSmrg SHADER_OPCODE_UNTYPED_ATOMIC_FLOAT_LOGICAL, 4169f464c52Smaya VEC4_OPCODE_UNTYPED_SURFACE_READ, 41701e04c3fSmrg SHADER_OPCODE_UNTYPED_SURFACE_READ_LOGICAL, 4189f464c52Smaya VEC4_OPCODE_UNTYPED_SURFACE_WRITE, 41901e04c3fSmrg SHADER_OPCODE_UNTYPED_SURFACE_WRITE_LOGICAL, 42001e04c3fSmrg 4217ec681f3Smrg SHADER_OPCODE_OWORD_BLOCK_READ_LOGICAL, 4227ec681f3Smrg SHADER_OPCODE_UNALIGNED_OWORD_BLOCK_READ_LOGICAL, 4237ec681f3Smrg SHADER_OPCODE_OWORD_BLOCK_WRITE_LOGICAL, 4247ec681f3Smrg 4259f464c52Smaya /** 4269f464c52Smaya * Untyped A64 surface access opcodes. 4279f464c52Smaya * 4289f464c52Smaya * Source 0: 64-bit address 4299f464c52Smaya * Source 1: Operational source 4309f464c52Smaya * Source 2: [required] Opcode-specific control immediate, same as source 2 4319f464c52Smaya * of the matching non-LOGICAL opcode. 4329f464c52Smaya */ 4339f464c52Smaya SHADER_OPCODE_A64_UNTYPED_READ_LOGICAL, 4349f464c52Smaya SHADER_OPCODE_A64_UNTYPED_WRITE_LOGICAL, 4359f464c52Smaya SHADER_OPCODE_A64_BYTE_SCATTERED_READ_LOGICAL, 4369f464c52Smaya SHADER_OPCODE_A64_BYTE_SCATTERED_WRITE_LOGICAL, 4377ec681f3Smrg SHADER_OPCODE_A64_OWORD_BLOCK_READ_LOGICAL, 4387ec681f3Smrg SHADER_OPCODE_A64_UNALIGNED_OWORD_BLOCK_READ_LOGICAL, 4397ec681f3Smrg SHADER_OPCODE_A64_OWORD_BLOCK_WRITE_LOGICAL, 4409f464c52Smaya SHADER_OPCODE_A64_UNTYPED_ATOMIC_LOGICAL, 4417ec681f3Smrg SHADER_OPCODE_A64_UNTYPED_ATOMIC_INT16_LOGICAL, 4429f464c52Smaya SHADER_OPCODE_A64_UNTYPED_ATOMIC_INT64_LOGICAL, 4437ec681f3Smrg SHADER_OPCODE_A64_UNTYPED_ATOMIC_FLOAT16_LOGICAL, 4447ec681f3Smrg SHADER_OPCODE_A64_UNTYPED_ATOMIC_FLOAT32_LOGICAL, 4457ec681f3Smrg SHADER_OPCODE_A64_UNTYPED_ATOMIC_FLOAT64_LOGICAL, 4469f464c52Smaya 44701e04c3fSmrg SHADER_OPCODE_TYPED_ATOMIC_LOGICAL, 44801e04c3fSmrg SHADER_OPCODE_TYPED_SURFACE_READ_LOGICAL, 44901e04c3fSmrg SHADER_OPCODE_TYPED_SURFACE_WRITE_LOGICAL, 45001e04c3fSmrg 45101e04c3fSmrg SHADER_OPCODE_RND_MODE, 4527ec681f3Smrg SHADER_OPCODE_FLOAT_CONTROL_MODE, 45301e04c3fSmrg 45401e04c3fSmrg /** 45501e04c3fSmrg * Byte scattered write/read opcodes. 45601e04c3fSmrg * 45701e04c3fSmrg * LOGICAL opcodes are eventually translated to the matching non-LOGICAL 45801e04c3fSmrg * opcode, but instead of taking a single payload blog they expect their 45901e04c3fSmrg * arguments separately as individual sources, like untyped write/read. 46001e04c3fSmrg */ 46101e04c3fSmrg SHADER_OPCODE_BYTE_SCATTERED_READ_LOGICAL, 46201e04c3fSmrg SHADER_OPCODE_BYTE_SCATTERED_WRITE_LOGICAL, 4637ec681f3Smrg SHADER_OPCODE_DWORD_SCATTERED_READ_LOGICAL, 4647ec681f3Smrg SHADER_OPCODE_DWORD_SCATTERED_WRITE_LOGICAL, 46501e04c3fSmrg 4667ec681f3Smrg /** 4677ec681f3Smrg * Memory fence messages. 4687ec681f3Smrg * 4697ec681f3Smrg * Source 0: Must be register g0, used as header. 4707ec681f3Smrg * Source 1: Immediate bool to indicate whether control is returned to the 4717ec681f3Smrg * thread only after the fence has been honored. 4727ec681f3Smrg * Source 2: Immediate byte indicating which memory to fence. Zero means 4737ec681f3Smrg * global memory; GFX7_BTI_SLM means SLM (for Gfx11+ only). 4747ec681f3Smrg * 4757ec681f3Smrg * Vec4 backend only uses Source 0. 4767ec681f3Smrg */ 47701e04c3fSmrg SHADER_OPCODE_MEMORY_FENCE, 47801e04c3fSmrg 4797ec681f3Smrg /** 4807ec681f3Smrg * Scheduling-only fence. 4817ec681f3Smrg * 4827ec681f3Smrg * Sources can be used to force a stall until the registers in those are 4837ec681f3Smrg * available. This might generate MOVs or SYNC_NOPs (Gfx12+). 4847ec681f3Smrg */ 4857ec681f3Smrg FS_OPCODE_SCHEDULING_FENCE, 4867ec681f3Smrg 4877ec681f3Smrg SHADER_OPCODE_GFX4_SCRATCH_READ, 4887ec681f3Smrg SHADER_OPCODE_GFX4_SCRATCH_WRITE, 4897ec681f3Smrg SHADER_OPCODE_GFX7_SCRATCH_READ, 4907ec681f3Smrg 4917ec681f3Smrg SHADER_OPCODE_SCRATCH_HEADER, 49201e04c3fSmrg 49301e04c3fSmrg /** 4947ec681f3Smrg * Gfx8+ SIMD8 URB Read messages. 49501e04c3fSmrg */ 49601e04c3fSmrg SHADER_OPCODE_URB_READ_SIMD8, 49701e04c3fSmrg SHADER_OPCODE_URB_READ_SIMD8_PER_SLOT, 49801e04c3fSmrg 49901e04c3fSmrg SHADER_OPCODE_URB_WRITE_SIMD8, 50001e04c3fSmrg SHADER_OPCODE_URB_WRITE_SIMD8_PER_SLOT, 50101e04c3fSmrg SHADER_OPCODE_URB_WRITE_SIMD8_MASKED, 50201e04c3fSmrg SHADER_OPCODE_URB_WRITE_SIMD8_MASKED_PER_SLOT, 50301e04c3fSmrg 50401e04c3fSmrg /** 50501e04c3fSmrg * Return the index of an arbitrary live channel (i.e. one of the channels 50601e04c3fSmrg * enabled in the current execution mask) and assign it to the first 50701e04c3fSmrg * component of the destination. Expected to be used as input for the 50801e04c3fSmrg * BROADCAST pseudo-opcode. 50901e04c3fSmrg */ 51001e04c3fSmrg SHADER_OPCODE_FIND_LIVE_CHANNEL, 51101e04c3fSmrg 5127ec681f3Smrg /** 5137ec681f3Smrg * Return the current execution mask in the specified flag subregister. 5147ec681f3Smrg * Can be CSE'ed more easily than a plain MOV from the ce0 ARF register. 5157ec681f3Smrg */ 5167ec681f3Smrg FS_OPCODE_LOAD_LIVE_CHANNELS, 5177ec681f3Smrg 51801e04c3fSmrg /** 51901e04c3fSmrg * Pick the channel from its first source register given by the index 52001e04c3fSmrg * specified as second source. Useful for variable indexing of surfaces. 52101e04c3fSmrg * 52201e04c3fSmrg * Note that because the result of this instruction is by definition 52301e04c3fSmrg * uniform and it can always be splatted to multiple channels using a 52401e04c3fSmrg * scalar regioning mode, only the first channel of the destination region 52501e04c3fSmrg * is guaranteed to be updated, which implies that BROADCAST instructions 52601e04c3fSmrg * should usually be marked force_writemask_all. 52701e04c3fSmrg */ 52801e04c3fSmrg SHADER_OPCODE_BROADCAST, 52901e04c3fSmrg 53001e04c3fSmrg /* Pick the channel from its first source register given by the index 53101e04c3fSmrg * specified as second source. 53201e04c3fSmrg * 53301e04c3fSmrg * This is similar to the BROADCAST instruction except that it takes a 53401e04c3fSmrg * dynamic index and potentially puts a different value in each output 53501e04c3fSmrg * channel. 53601e04c3fSmrg */ 53701e04c3fSmrg SHADER_OPCODE_SHUFFLE, 53801e04c3fSmrg 53901e04c3fSmrg /* Select between src0 and src1 based on channel enables. 54001e04c3fSmrg * 54101e04c3fSmrg * This instruction copies src0 into the enabled channels of the 54201e04c3fSmrg * destination and copies src1 into the disabled channels. 54301e04c3fSmrg */ 54401e04c3fSmrg SHADER_OPCODE_SEL_EXEC, 54501e04c3fSmrg 54601e04c3fSmrg /* This turns into an align16 mov from src0 to dst with a swizzle 54701e04c3fSmrg * provided as an immediate in src1. 54801e04c3fSmrg */ 54901e04c3fSmrg SHADER_OPCODE_QUAD_SWIZZLE, 55001e04c3fSmrg 55101e04c3fSmrg /* Take every Nth element in src0 and broadcast it to the group of N 55201e04c3fSmrg * channels in which it lives in the destination. The offset within the 55301e04c3fSmrg * cluster is given by src1 and the cluster size is given by src2. 55401e04c3fSmrg */ 55501e04c3fSmrg SHADER_OPCODE_CLUSTER_BROADCAST, 55601e04c3fSmrg 55701e04c3fSmrg SHADER_OPCODE_GET_BUFFER_SIZE, 55801e04c3fSmrg 55901e04c3fSmrg SHADER_OPCODE_INTERLOCK, 56001e04c3fSmrg 5617ec681f3Smrg /** Target for a HALT 5627ec681f3Smrg * 5637ec681f3Smrg * All HALT instructions in a shader must target the same jump point and 5647ec681f3Smrg * that point is denoted by a HALT_TARGET instruction. 5657ec681f3Smrg */ 5667ec681f3Smrg SHADER_OPCODE_HALT_TARGET, 5677ec681f3Smrg 56801e04c3fSmrg VEC4_OPCODE_MOV_BYTES, 56901e04c3fSmrg VEC4_OPCODE_PACK_BYTES, 57001e04c3fSmrg VEC4_OPCODE_UNPACK_UNIFORM, 57101e04c3fSmrg VEC4_OPCODE_DOUBLE_TO_F32, 57201e04c3fSmrg VEC4_OPCODE_DOUBLE_TO_D32, 57301e04c3fSmrg VEC4_OPCODE_DOUBLE_TO_U32, 57401e04c3fSmrg VEC4_OPCODE_TO_DOUBLE, 57501e04c3fSmrg VEC4_OPCODE_PICK_LOW_32BIT, 57601e04c3fSmrg VEC4_OPCODE_PICK_HIGH_32BIT, 57701e04c3fSmrg VEC4_OPCODE_SET_LOW_32BIT, 57801e04c3fSmrg VEC4_OPCODE_SET_HIGH_32BIT, 5797ec681f3Smrg VEC4_OPCODE_MOV_FOR_SCRATCH, 5807ec681f3Smrg VEC4_OPCODE_ZERO_OOB_PUSH_REGS, 58101e04c3fSmrg 58201e04c3fSmrg FS_OPCODE_DDX_COARSE, 58301e04c3fSmrg FS_OPCODE_DDX_FINE, 58401e04c3fSmrg /** 58501e04c3fSmrg * Compute dFdy(), dFdyCoarse(), or dFdyFine(). 58601e04c3fSmrg */ 58701e04c3fSmrg FS_OPCODE_DDY_COARSE, 58801e04c3fSmrg FS_OPCODE_DDY_FINE, 58901e04c3fSmrg FS_OPCODE_LINTERP, 59001e04c3fSmrg FS_OPCODE_PIXEL_X, 59101e04c3fSmrg FS_OPCODE_PIXEL_Y, 59201e04c3fSmrg FS_OPCODE_UNIFORM_PULL_CONSTANT_LOAD, 5937ec681f3Smrg FS_OPCODE_UNIFORM_PULL_CONSTANT_LOAD_GFX7, 5947ec681f3Smrg FS_OPCODE_VARYING_PULL_CONSTANT_LOAD_GFX4, 59501e04c3fSmrg FS_OPCODE_VARYING_PULL_CONSTANT_LOAD_LOGICAL, 59601e04c3fSmrg FS_OPCODE_SET_SAMPLE_ID, 59701e04c3fSmrg FS_OPCODE_PACK_HALF_2x16_SPLIT, 59801e04c3fSmrg FS_OPCODE_INTERPOLATE_AT_SAMPLE, 59901e04c3fSmrg FS_OPCODE_INTERPOLATE_AT_SHARED_OFFSET, 60001e04c3fSmrg FS_OPCODE_INTERPOLATE_AT_PER_SLOT_OFFSET, 60101e04c3fSmrg 60201e04c3fSmrg VS_OPCODE_URB_WRITE, 60301e04c3fSmrg VS_OPCODE_PULL_CONSTANT_LOAD, 6047ec681f3Smrg VS_OPCODE_PULL_CONSTANT_LOAD_GFX7, 60501e04c3fSmrg 60601e04c3fSmrg VS_OPCODE_UNPACK_FLAGS_SIMD4X2, 60701e04c3fSmrg 60801e04c3fSmrg /** 60901e04c3fSmrg * Write geometry shader output data to the URB. 61001e04c3fSmrg * 61101e04c3fSmrg * Unlike VS_OPCODE_URB_WRITE, this opcode doesn't do an implied move from 61201e04c3fSmrg * R0 to the first MRF. This allows the geometry shader to override the 61301e04c3fSmrg * "Slot {0,1} Offset" fields in the message header. 61401e04c3fSmrg */ 61501e04c3fSmrg GS_OPCODE_URB_WRITE, 61601e04c3fSmrg 61701e04c3fSmrg /** 61801e04c3fSmrg * Write geometry shader output data to the URB and request a new URB 6197ec681f3Smrg * handle (gfx6). 62001e04c3fSmrg * 62101e04c3fSmrg * This opcode doesn't do an implied move from R0 to the first MRF. 62201e04c3fSmrg */ 62301e04c3fSmrg GS_OPCODE_URB_WRITE_ALLOCATE, 62401e04c3fSmrg 62501e04c3fSmrg /** 62601e04c3fSmrg * Terminate the geometry shader thread by doing an empty URB write. 62701e04c3fSmrg * 62801e04c3fSmrg * This opcode doesn't do an implied move from R0 to the first MRF. This 62901e04c3fSmrg * allows the geometry shader to override the "GS Number of Output Vertices 63001e04c3fSmrg * for Slot {0,1}" fields in the message header. 63101e04c3fSmrg */ 63201e04c3fSmrg GS_OPCODE_THREAD_END, 63301e04c3fSmrg 63401e04c3fSmrg /** 63501e04c3fSmrg * Set the "Slot {0,1} Offset" fields of a URB_WRITE message header. 63601e04c3fSmrg * 63701e04c3fSmrg * - dst is the MRF containing the message header. 63801e04c3fSmrg * 63901e04c3fSmrg * - src0.x indicates which portion of the URB should be written to (e.g. a 64001e04c3fSmrg * vertex number) 64101e04c3fSmrg * 64201e04c3fSmrg * - src1 is an immediate multiplier which will be applied to src0 64301e04c3fSmrg * (e.g. the size of a single vertex in the URB). 64401e04c3fSmrg * 64501e04c3fSmrg * Note: the hardware will apply this offset *in addition to* the offset in 64601e04c3fSmrg * vec4_instruction::offset. 64701e04c3fSmrg */ 64801e04c3fSmrg GS_OPCODE_SET_WRITE_OFFSET, 64901e04c3fSmrg 65001e04c3fSmrg /** 65101e04c3fSmrg * Set the "GS Number of Output Vertices for Slot {0,1}" fields of a 65201e04c3fSmrg * URB_WRITE message header. 65301e04c3fSmrg * 65401e04c3fSmrg * - dst is the MRF containing the message header. 65501e04c3fSmrg * 65601e04c3fSmrg * - src0.x is the vertex count. The upper 16 bits will be ignored. 65701e04c3fSmrg */ 65801e04c3fSmrg GS_OPCODE_SET_VERTEX_COUNT, 65901e04c3fSmrg 66001e04c3fSmrg /** 66101e04c3fSmrg * Set DWORD 2 of dst to the value in src. 66201e04c3fSmrg */ 66301e04c3fSmrg GS_OPCODE_SET_DWORD_2, 66401e04c3fSmrg 66501e04c3fSmrg /** 66601e04c3fSmrg * Prepare the dst register for storage in the "Channel Mask" fields of a 66701e04c3fSmrg * URB_WRITE message header. 66801e04c3fSmrg * 66901e04c3fSmrg * DWORD 4 of dst is shifted left by 4 bits, so that later, 67001e04c3fSmrg * GS_OPCODE_SET_CHANNEL_MASKS can OR DWORDs 0 and 4 together to form the 67101e04c3fSmrg * final channel mask. 67201e04c3fSmrg * 67301e04c3fSmrg * Note: since GS_OPCODE_SET_CHANNEL_MASKS ORs DWORDs 0 and 4 together to 67401e04c3fSmrg * form the final channel mask, DWORDs 0 and 4 of the dst register must not 67501e04c3fSmrg * have any extraneous bits set prior to execution of this opcode (that is, 67601e04c3fSmrg * they should be in the range 0x0 to 0xf). 67701e04c3fSmrg */ 67801e04c3fSmrg GS_OPCODE_PREPARE_CHANNEL_MASKS, 67901e04c3fSmrg 68001e04c3fSmrg /** 68101e04c3fSmrg * Set the "Channel Mask" fields of a URB_WRITE message header. 68201e04c3fSmrg * 68301e04c3fSmrg * - dst is the MRF containing the message header. 68401e04c3fSmrg * 68501e04c3fSmrg * - src.x is the channel mask, as prepared by 68601e04c3fSmrg * GS_OPCODE_PREPARE_CHANNEL_MASKS. DWORDs 0 and 4 are OR'ed together to 68701e04c3fSmrg * form the final channel mask. 68801e04c3fSmrg */ 68901e04c3fSmrg GS_OPCODE_SET_CHANNEL_MASKS, 69001e04c3fSmrg 69101e04c3fSmrg /** 69201e04c3fSmrg * Get the "Instance ID" fields from the payload. 69301e04c3fSmrg * 69401e04c3fSmrg * - dst is the GRF for gl_InvocationID. 69501e04c3fSmrg */ 69601e04c3fSmrg GS_OPCODE_GET_INSTANCE_ID, 69701e04c3fSmrg 69801e04c3fSmrg /** 6997ec681f3Smrg * Send a FF_SYNC message to allocate initial URB handles (gfx6). 70001e04c3fSmrg * 70101e04c3fSmrg * - dst will be used as the writeback register for the FF_SYNC operation. 70201e04c3fSmrg * 70301e04c3fSmrg * - src0 is the number of primitives written. 70401e04c3fSmrg * 70501e04c3fSmrg * - src1 is the value to hold in M0.0: number of SO vertices to write 70601e04c3fSmrg * and number of SO primitives needed. Its value will be overwritten 70701e04c3fSmrg * with the SVBI values if transform feedback is enabled. 70801e04c3fSmrg * 70901e04c3fSmrg * Note: This opcode uses an implicit MRF register for the ff_sync message 71001e04c3fSmrg * header, so the caller is expected to set inst->base_mrf and initialize 71101e04c3fSmrg * that MRF register to r0. This opcode will also write to this MRF register 71201e04c3fSmrg * to include the allocated URB handle so it can then be reused directly as 71301e04c3fSmrg * the header in the URB write operation we are allocating the handle for. 71401e04c3fSmrg */ 71501e04c3fSmrg GS_OPCODE_FF_SYNC, 71601e04c3fSmrg 71701e04c3fSmrg /** 7187ec681f3Smrg * Move r0.1 (which holds PrimitiveID information in gfx6) to a separate 71901e04c3fSmrg * register. 72001e04c3fSmrg * 72101e04c3fSmrg * - dst is the GRF where PrimitiveID information will be moved. 72201e04c3fSmrg */ 72301e04c3fSmrg GS_OPCODE_SET_PRIMITIVE_ID, 72401e04c3fSmrg 72501e04c3fSmrg /** 72601e04c3fSmrg * Write transform feedback data to the SVB by sending a SVB WRITE message. 7277ec681f3Smrg * Used in gfx6. 72801e04c3fSmrg * 72901e04c3fSmrg * - dst is the MRF register containing the message header. 73001e04c3fSmrg * 73101e04c3fSmrg * - src0 is the register where the vertex data is going to be copied from. 73201e04c3fSmrg * 73301e04c3fSmrg * - src1 is the destination register when write commit occurs. 73401e04c3fSmrg */ 73501e04c3fSmrg GS_OPCODE_SVB_WRITE, 73601e04c3fSmrg 73701e04c3fSmrg /** 73801e04c3fSmrg * Set destination index in the SVB write message payload (M0.5). Used 7397ec681f3Smrg * in gfx6 for transform feedback. 74001e04c3fSmrg * 74101e04c3fSmrg * - dst is the header to save the destination indices for SVB WRITE. 74201e04c3fSmrg * - src is the register that holds the destination indices value. 74301e04c3fSmrg */ 74401e04c3fSmrg GS_OPCODE_SVB_SET_DST_INDEX, 74501e04c3fSmrg 74601e04c3fSmrg /** 74701e04c3fSmrg * Prepare Mx.0 subregister for being used in the FF_SYNC message header. 7487ec681f3Smrg * Used in gfx6 for transform feedback. 74901e04c3fSmrg * 75001e04c3fSmrg * - dst will hold the register with the final Mx.0 value. 75101e04c3fSmrg * 75201e04c3fSmrg * - src0 has the number of vertices emitted in SO (NumSOVertsToWrite) 75301e04c3fSmrg * 75401e04c3fSmrg * - src1 has the number of needed primitives for SO (NumSOPrimsNeeded) 75501e04c3fSmrg * 75601e04c3fSmrg * - src2 is the value to hold in M0: number of SO vertices to write 75701e04c3fSmrg * and number of SO primitives needed. 75801e04c3fSmrg */ 75901e04c3fSmrg GS_OPCODE_FF_SYNC_SET_PRIMITIVES, 76001e04c3fSmrg 76101e04c3fSmrg /** 76201e04c3fSmrg * Terminate the compute shader. 76301e04c3fSmrg */ 76401e04c3fSmrg CS_OPCODE_CS_TERMINATE, 76501e04c3fSmrg 76601e04c3fSmrg /** 76701e04c3fSmrg * GLSL barrier() 76801e04c3fSmrg */ 76901e04c3fSmrg SHADER_OPCODE_BARRIER, 77001e04c3fSmrg 77101e04c3fSmrg /** 77201e04c3fSmrg * Calculate the high 32-bits of a 32x32 multiply. 77301e04c3fSmrg */ 77401e04c3fSmrg SHADER_OPCODE_MULH, 77501e04c3fSmrg 7767ec681f3Smrg /** Signed subtraction with saturation. */ 7777ec681f3Smrg SHADER_OPCODE_ISUB_SAT, 7787ec681f3Smrg 7797ec681f3Smrg /** Unsigned subtraction with saturation. */ 7807ec681f3Smrg SHADER_OPCODE_USUB_SAT, 7817ec681f3Smrg 78201e04c3fSmrg /** 78301e04c3fSmrg * A MOV that uses VxH indirect addressing. 78401e04c3fSmrg * 78501e04c3fSmrg * Source 0: A register to start from (HW_REG). 78601e04c3fSmrg * Source 1: An indirect offset (in bytes, UD GRF). 78701e04c3fSmrg * Source 2: The length of the region that could be accessed (in bytes, 78801e04c3fSmrg * UD immediate). 78901e04c3fSmrg */ 79001e04c3fSmrg SHADER_OPCODE_MOV_INDIRECT, 79101e04c3fSmrg 7927ec681f3Smrg /** Fills out a relocatable immediate */ 7937ec681f3Smrg SHADER_OPCODE_MOV_RELOC_IMM, 7947ec681f3Smrg 79501e04c3fSmrg VEC4_OPCODE_URB_READ, 79601e04c3fSmrg TCS_OPCODE_GET_INSTANCE_ID, 79701e04c3fSmrg TCS_OPCODE_URB_WRITE, 79801e04c3fSmrg TCS_OPCODE_SET_INPUT_URB_OFFSETS, 79901e04c3fSmrg TCS_OPCODE_SET_OUTPUT_URB_OFFSETS, 80001e04c3fSmrg TCS_OPCODE_GET_PRIMITIVE_ID, 80101e04c3fSmrg TCS_OPCODE_CREATE_BARRIER_HEADER, 80201e04c3fSmrg TCS_OPCODE_SRC0_010_IS_ZERO, 80301e04c3fSmrg TCS_OPCODE_RELEASE_INPUT, 80401e04c3fSmrg TCS_OPCODE_THREAD_END, 80501e04c3fSmrg 80601e04c3fSmrg TES_OPCODE_GET_PRIMITIVE_ID, 80701e04c3fSmrg TES_OPCODE_CREATE_INPUT_READ_HEADER, 80801e04c3fSmrg TES_OPCODE_ADD_INDIRECT_URB_OFFSET, 8097ec681f3Smrg 8107ec681f3Smrg SHADER_OPCODE_GET_DSS_ID, 8117ec681f3Smrg SHADER_OPCODE_BTD_SPAWN_LOGICAL, 8127ec681f3Smrg SHADER_OPCODE_BTD_RETIRE_LOGICAL, 8137ec681f3Smrg 8147ec681f3Smrg RT_OPCODE_TRACE_RAY_LOGICAL, 81501e04c3fSmrg}; 81601e04c3fSmrg 81701e04c3fSmrgenum brw_urb_write_flags { 81801e04c3fSmrg BRW_URB_WRITE_NO_FLAGS = 0, 81901e04c3fSmrg 82001e04c3fSmrg /** 82101e04c3fSmrg * Causes a new URB entry to be allocated, and its address stored in the 82201e04c3fSmrg * destination register (gen < 7). 82301e04c3fSmrg */ 82401e04c3fSmrg BRW_URB_WRITE_ALLOCATE = 0x1, 82501e04c3fSmrg 82601e04c3fSmrg /** 82701e04c3fSmrg * Causes the current URB entry to be deallocated (gen < 7). 82801e04c3fSmrg */ 82901e04c3fSmrg BRW_URB_WRITE_UNUSED = 0x2, 83001e04c3fSmrg 83101e04c3fSmrg /** 83201e04c3fSmrg * Causes the thread to terminate. 83301e04c3fSmrg */ 83401e04c3fSmrg BRW_URB_WRITE_EOT = 0x4, 83501e04c3fSmrg 83601e04c3fSmrg /** 83701e04c3fSmrg * Indicates that the given URB entry is complete, and may be sent further 83801e04c3fSmrg * down the 3D pipeline (gen < 7). 83901e04c3fSmrg */ 84001e04c3fSmrg BRW_URB_WRITE_COMPLETE = 0x8, 84101e04c3fSmrg 84201e04c3fSmrg /** 84301e04c3fSmrg * Indicates that an additional offset (which may be different for the two 84401e04c3fSmrg * vec4 slots) is stored in the message header (gen == 7). 84501e04c3fSmrg */ 84601e04c3fSmrg BRW_URB_WRITE_PER_SLOT_OFFSET = 0x10, 84701e04c3fSmrg 84801e04c3fSmrg /** 84901e04c3fSmrg * Indicates that the channel masks in the URB_WRITE message header should 85001e04c3fSmrg * not be overridden to 0xff (gen == 7). 85101e04c3fSmrg */ 85201e04c3fSmrg BRW_URB_WRITE_USE_CHANNEL_MASKS = 0x20, 85301e04c3fSmrg 85401e04c3fSmrg /** 85501e04c3fSmrg * Indicates that the data should be sent to the URB using the 85601e04c3fSmrg * URB_WRITE_OWORD message rather than URB_WRITE_HWORD (gen == 7). This 85701e04c3fSmrg * causes offsets to be interpreted as multiples of an OWORD instead of an 85801e04c3fSmrg * HWORD, and only allows one OWORD to be written. 85901e04c3fSmrg */ 86001e04c3fSmrg BRW_URB_WRITE_OWORD = 0x40, 86101e04c3fSmrg 86201e04c3fSmrg /** 86301e04c3fSmrg * Convenient combination of flags: end the thread while simultaneously 86401e04c3fSmrg * marking the given URB entry as complete. 86501e04c3fSmrg */ 86601e04c3fSmrg BRW_URB_WRITE_EOT_COMPLETE = BRW_URB_WRITE_EOT | BRW_URB_WRITE_COMPLETE, 86701e04c3fSmrg 86801e04c3fSmrg /** 86901e04c3fSmrg * Convenient combination of flags: mark the given URB entry as complete 87001e04c3fSmrg * and simultaneously allocate a new one. 87101e04c3fSmrg */ 87201e04c3fSmrg BRW_URB_WRITE_ALLOCATE_COMPLETE = 87301e04c3fSmrg BRW_URB_WRITE_ALLOCATE | BRW_URB_WRITE_COMPLETE, 87401e04c3fSmrg}; 87501e04c3fSmrg 87601e04c3fSmrgenum fb_write_logical_srcs { 87701e04c3fSmrg FB_WRITE_LOGICAL_SRC_COLOR0, /* REQUIRED */ 87801e04c3fSmrg FB_WRITE_LOGICAL_SRC_COLOR1, /* for dual source blend messages */ 87901e04c3fSmrg FB_WRITE_LOGICAL_SRC_SRC0_ALPHA, 88001e04c3fSmrg FB_WRITE_LOGICAL_SRC_SRC_DEPTH, /* gl_FragDepth */ 8817ec681f3Smrg FB_WRITE_LOGICAL_SRC_DST_DEPTH, /* GFX4-5: passthrough from thread */ 88201e04c3fSmrg FB_WRITE_LOGICAL_SRC_SRC_STENCIL, /* gl_FragStencilRefARB */ 88301e04c3fSmrg FB_WRITE_LOGICAL_SRC_OMASK, /* Sample Mask (gl_SampleMask) */ 88401e04c3fSmrg FB_WRITE_LOGICAL_SRC_COMPONENTS, /* REQUIRED */ 88501e04c3fSmrg FB_WRITE_LOGICAL_NUM_SRCS 88601e04c3fSmrg}; 88701e04c3fSmrg 88801e04c3fSmrgenum tex_logical_srcs { 88901e04c3fSmrg /** Texture coordinates */ 89001e04c3fSmrg TEX_LOGICAL_SRC_COORDINATE, 89101e04c3fSmrg /** Shadow comparator */ 89201e04c3fSmrg TEX_LOGICAL_SRC_SHADOW_C, 89301e04c3fSmrg /** dPdx if the operation takes explicit derivatives, otherwise LOD value */ 89401e04c3fSmrg TEX_LOGICAL_SRC_LOD, 89501e04c3fSmrg /** dPdy if the operation takes explicit derivatives */ 89601e04c3fSmrg TEX_LOGICAL_SRC_LOD2, 8979f464c52Smaya /** Min LOD */ 8989f464c52Smaya TEX_LOGICAL_SRC_MIN_LOD, 89901e04c3fSmrg /** Sample index */ 90001e04c3fSmrg TEX_LOGICAL_SRC_SAMPLE_INDEX, 90101e04c3fSmrg /** MCS data */ 90201e04c3fSmrg TEX_LOGICAL_SRC_MCS, 90301e04c3fSmrg /** REQUIRED: Texture surface index */ 90401e04c3fSmrg TEX_LOGICAL_SRC_SURFACE, 90501e04c3fSmrg /** Texture sampler index */ 90601e04c3fSmrg TEX_LOGICAL_SRC_SAMPLER, 9079f464c52Smaya /** Texture surface bindless handle */ 9089f464c52Smaya TEX_LOGICAL_SRC_SURFACE_HANDLE, 9099f464c52Smaya /** Texture sampler bindless handle */ 9109f464c52Smaya TEX_LOGICAL_SRC_SAMPLER_HANDLE, 91101e04c3fSmrg /** Texel offset for gathers */ 91201e04c3fSmrg TEX_LOGICAL_SRC_TG4_OFFSET, 91301e04c3fSmrg /** REQUIRED: Number of coordinate components (as UD immediate) */ 91401e04c3fSmrg TEX_LOGICAL_SRC_COORD_COMPONENTS, 91501e04c3fSmrg /** REQUIRED: Number of derivative components (as UD immediate) */ 91601e04c3fSmrg TEX_LOGICAL_SRC_GRAD_COMPONENTS, 91701e04c3fSmrg 91801e04c3fSmrg TEX_LOGICAL_NUM_SRCS, 91901e04c3fSmrg}; 92001e04c3fSmrg 9219f464c52Smayaenum surface_logical_srcs { 9229f464c52Smaya /** Surface binding table index */ 9239f464c52Smaya SURFACE_LOGICAL_SRC_SURFACE, 9249f464c52Smaya /** Surface bindless handle */ 9259f464c52Smaya SURFACE_LOGICAL_SRC_SURFACE_HANDLE, 9269f464c52Smaya /** Surface address; could be multi-dimensional for typed opcodes */ 9279f464c52Smaya SURFACE_LOGICAL_SRC_ADDRESS, 9289f464c52Smaya /** Data to be written or used in an atomic op */ 9299f464c52Smaya SURFACE_LOGICAL_SRC_DATA, 9309f464c52Smaya /** Surface number of dimensions. Affects the size of ADDRESS */ 9319f464c52Smaya SURFACE_LOGICAL_SRC_IMM_DIMS, 9329f464c52Smaya /** Per-opcode immediate argument. For atomics, this is the atomic opcode */ 9339f464c52Smaya SURFACE_LOGICAL_SRC_IMM_ARG, 9347ec681f3Smrg /** 9357ec681f3Smrg * Some instructions with side-effects should not be predicated on 9367ec681f3Smrg * sample mask, e.g. lowered stores to scratch. 9377ec681f3Smrg */ 9387ec681f3Smrg SURFACE_LOGICAL_SRC_ALLOW_SAMPLE_MASK, 9399f464c52Smaya 9409f464c52Smaya SURFACE_LOGICAL_NUM_SRCS 9419f464c52Smaya}; 9429f464c52Smaya 94301e04c3fSmrg#ifdef __cplusplus 94401e04c3fSmrg/** 94501e04c3fSmrg * Allow brw_urb_write_flags enums to be ORed together. 94601e04c3fSmrg */ 94701e04c3fSmrginline brw_urb_write_flags 94801e04c3fSmrgoperator|(brw_urb_write_flags x, brw_urb_write_flags y) 94901e04c3fSmrg{ 95001e04c3fSmrg return static_cast<brw_urb_write_flags>(static_cast<int>(x) | 95101e04c3fSmrg static_cast<int>(y)); 95201e04c3fSmrg} 95301e04c3fSmrg#endif 95401e04c3fSmrg 95501e04c3fSmrgenum PACKED brw_predicate { 95601e04c3fSmrg BRW_PREDICATE_NONE = 0, 95701e04c3fSmrg BRW_PREDICATE_NORMAL = 1, 95801e04c3fSmrg BRW_PREDICATE_ALIGN1_ANYV = 2, 95901e04c3fSmrg BRW_PREDICATE_ALIGN1_ALLV = 3, 96001e04c3fSmrg BRW_PREDICATE_ALIGN1_ANY2H = 4, 96101e04c3fSmrg BRW_PREDICATE_ALIGN1_ALL2H = 5, 96201e04c3fSmrg BRW_PREDICATE_ALIGN1_ANY4H = 6, 96301e04c3fSmrg BRW_PREDICATE_ALIGN1_ALL4H = 7, 96401e04c3fSmrg BRW_PREDICATE_ALIGN1_ANY8H = 8, 96501e04c3fSmrg BRW_PREDICATE_ALIGN1_ALL8H = 9, 96601e04c3fSmrg BRW_PREDICATE_ALIGN1_ANY16H = 10, 96701e04c3fSmrg BRW_PREDICATE_ALIGN1_ALL16H = 11, 96801e04c3fSmrg BRW_PREDICATE_ALIGN1_ANY32H = 12, 96901e04c3fSmrg BRW_PREDICATE_ALIGN1_ALL32H = 13, 97001e04c3fSmrg BRW_PREDICATE_ALIGN16_REPLICATE_X = 2, 97101e04c3fSmrg BRW_PREDICATE_ALIGN16_REPLICATE_Y = 3, 97201e04c3fSmrg BRW_PREDICATE_ALIGN16_REPLICATE_Z = 4, 97301e04c3fSmrg BRW_PREDICATE_ALIGN16_REPLICATE_W = 5, 97401e04c3fSmrg BRW_PREDICATE_ALIGN16_ANY4H = 6, 97501e04c3fSmrg BRW_PREDICATE_ALIGN16_ALL4H = 7, 97601e04c3fSmrg}; 97701e04c3fSmrg 97801e04c3fSmrgenum PACKED brw_reg_file { 97901e04c3fSmrg BRW_ARCHITECTURE_REGISTER_FILE = 0, 98001e04c3fSmrg BRW_GENERAL_REGISTER_FILE = 1, 98101e04c3fSmrg BRW_MESSAGE_REGISTER_FILE = 2, 98201e04c3fSmrg BRW_IMMEDIATE_VALUE = 3, 98301e04c3fSmrg 98401e04c3fSmrg ARF = BRW_ARCHITECTURE_REGISTER_FILE, 98501e04c3fSmrg FIXED_GRF = BRW_GENERAL_REGISTER_FILE, 98601e04c3fSmrg MRF = BRW_MESSAGE_REGISTER_FILE, 98701e04c3fSmrg IMM = BRW_IMMEDIATE_VALUE, 98801e04c3fSmrg 98901e04c3fSmrg /* These are not hardware values */ 99001e04c3fSmrg VGRF, 99101e04c3fSmrg ATTR, 99201e04c3fSmrg UNIFORM, /* prog_data->params[reg] */ 99301e04c3fSmrg BAD_FILE, 99401e04c3fSmrg}; 99501e04c3fSmrg 9967ec681f3Smrgenum PACKED gfx10_align1_3src_reg_file { 99701e04c3fSmrg BRW_ALIGN1_3SRC_GENERAL_REGISTER_FILE = 0, 99801e04c3fSmrg BRW_ALIGN1_3SRC_IMMEDIATE_VALUE = 1, /* src0, src2 */ 99901e04c3fSmrg BRW_ALIGN1_3SRC_ACCUMULATOR = 1, /* dest, src1 */ 100001e04c3fSmrg}; 100101e04c3fSmrg 100201e04c3fSmrg/* CNL adds Align1 support for 3-src instructions. Bit 35 of the instruction 100301e04c3fSmrg * word is "Execution Datatype" which controls whether the instruction operates 100401e04c3fSmrg * on float or integer types. The register arguments have fields that offer 100501e04c3fSmrg * more fine control their respective types. 100601e04c3fSmrg */ 10077ec681f3Smrgenum PACKED gfx10_align1_3src_exec_type { 100801e04c3fSmrg BRW_ALIGN1_3SRC_EXEC_TYPE_INT = 0, 100901e04c3fSmrg BRW_ALIGN1_3SRC_EXEC_TYPE_FLOAT = 1, 101001e04c3fSmrg}; 101101e04c3fSmrg 101201e04c3fSmrg#define BRW_ARF_NULL 0x00 101301e04c3fSmrg#define BRW_ARF_ADDRESS 0x10 101401e04c3fSmrg#define BRW_ARF_ACCUMULATOR 0x20 101501e04c3fSmrg#define BRW_ARF_FLAG 0x30 101601e04c3fSmrg#define BRW_ARF_MASK 0x40 101701e04c3fSmrg#define BRW_ARF_MASK_STACK 0x50 101801e04c3fSmrg#define BRW_ARF_MASK_STACK_DEPTH 0x60 101901e04c3fSmrg#define BRW_ARF_STATE 0x70 102001e04c3fSmrg#define BRW_ARF_CONTROL 0x80 102101e04c3fSmrg#define BRW_ARF_NOTIFICATION_COUNT 0x90 102201e04c3fSmrg#define BRW_ARF_IP 0xA0 102301e04c3fSmrg#define BRW_ARF_TDR 0xB0 102401e04c3fSmrg#define BRW_ARF_TIMESTAMP 0xC0 102501e04c3fSmrg 102601e04c3fSmrg#define BRW_MRF_COMPR4 (1 << 7) 102701e04c3fSmrg 102801e04c3fSmrg#define BRW_AMASK 0 102901e04c3fSmrg#define BRW_IMASK 1 103001e04c3fSmrg#define BRW_LMASK 2 103101e04c3fSmrg#define BRW_CMASK 3 103201e04c3fSmrg 103301e04c3fSmrg 103401e04c3fSmrg 103501e04c3fSmrg#define BRW_THREAD_NORMAL 0 103601e04c3fSmrg#define BRW_THREAD_ATOMIC 1 103701e04c3fSmrg#define BRW_THREAD_SWITCH 2 103801e04c3fSmrg 103901e04c3fSmrgenum PACKED brw_vertical_stride { 104001e04c3fSmrg BRW_VERTICAL_STRIDE_0 = 0, 104101e04c3fSmrg BRW_VERTICAL_STRIDE_1 = 1, 104201e04c3fSmrg BRW_VERTICAL_STRIDE_2 = 2, 104301e04c3fSmrg BRW_VERTICAL_STRIDE_4 = 3, 104401e04c3fSmrg BRW_VERTICAL_STRIDE_8 = 4, 104501e04c3fSmrg BRW_VERTICAL_STRIDE_16 = 5, 104601e04c3fSmrg BRW_VERTICAL_STRIDE_32 = 6, 104701e04c3fSmrg BRW_VERTICAL_STRIDE_ONE_DIMENSIONAL = 0xF, 104801e04c3fSmrg}; 104901e04c3fSmrg 10507ec681f3Smrgenum PACKED gfx10_align1_3src_vertical_stride { 105101e04c3fSmrg BRW_ALIGN1_3SRC_VERTICAL_STRIDE_0 = 0, 10527ec681f3Smrg BRW_ALIGN1_3SRC_VERTICAL_STRIDE_1 = 1, 105301e04c3fSmrg BRW_ALIGN1_3SRC_VERTICAL_STRIDE_2 = 1, 105401e04c3fSmrg BRW_ALIGN1_3SRC_VERTICAL_STRIDE_4 = 2, 105501e04c3fSmrg BRW_ALIGN1_3SRC_VERTICAL_STRIDE_8 = 3, 105601e04c3fSmrg}; 105701e04c3fSmrg 105801e04c3fSmrgenum PACKED brw_width { 105901e04c3fSmrg BRW_WIDTH_1 = 0, 106001e04c3fSmrg BRW_WIDTH_2 = 1, 106101e04c3fSmrg BRW_WIDTH_4 = 2, 106201e04c3fSmrg BRW_WIDTH_8 = 3, 106301e04c3fSmrg BRW_WIDTH_16 = 4, 106401e04c3fSmrg}; 106501e04c3fSmrg 10667ec681f3Smrg/** 10677ec681f3Smrg * Gfx12+ SWSB SBID synchronization mode. 10687ec681f3Smrg * 10697ec681f3Smrg * This is represented as a bitmask including any required SBID token 10707ec681f3Smrg * synchronization modes, used to synchronize out-of-order instructions. Only 10717ec681f3Smrg * the strongest mode of the mask will be provided to the hardware in the SWSB 10727ec681f3Smrg * field of an actual hardware instruction, but virtual instructions may be 10737ec681f3Smrg * able to take into account multiple of them. 10747ec681f3Smrg */ 10757ec681f3Smrgenum tgl_sbid_mode { 10767ec681f3Smrg TGL_SBID_NULL = 0, 10777ec681f3Smrg TGL_SBID_SRC = 1, 10787ec681f3Smrg TGL_SBID_DST = 2, 10797ec681f3Smrg TGL_SBID_SET = 4 10807ec681f3Smrg}; 10817ec681f3Smrg 10827ec681f3Smrg#ifdef __cplusplus 10837ec681f3Smrg/** 10847ec681f3Smrg * Allow bitwise arithmetic of tgl_sbid_mode enums. 10857ec681f3Smrg */ 10867ec681f3Smrginline tgl_sbid_mode 10877ec681f3Smrgoperator|(tgl_sbid_mode x, tgl_sbid_mode y) 10887ec681f3Smrg{ 10897ec681f3Smrg return tgl_sbid_mode(unsigned(x) | unsigned(y)); 10907ec681f3Smrg} 10917ec681f3Smrg 10927ec681f3Smrginline tgl_sbid_mode 10937ec681f3Smrgoperator&(tgl_sbid_mode x, tgl_sbid_mode y) 10947ec681f3Smrg{ 10957ec681f3Smrg return tgl_sbid_mode(unsigned(x) & unsigned(y)); 10967ec681f3Smrg} 10977ec681f3Smrg 10987ec681f3Smrginline tgl_sbid_mode & 10997ec681f3Smrgoperator|=(tgl_sbid_mode &x, tgl_sbid_mode y) 11007ec681f3Smrg{ 11017ec681f3Smrg return x = x | y; 11027ec681f3Smrg} 11037ec681f3Smrg 11047ec681f3Smrg#endif 11057ec681f3Smrg 11067ec681f3Smrg/** 11077ec681f3Smrg * TGL+ SWSB RegDist synchronization pipeline. 11087ec681f3Smrg * 11097ec681f3Smrg * On TGL all instructions that use the RegDist synchronization mechanism are 11107ec681f3Smrg * considered to be executed as a single in-order pipeline, therefore only the 11117ec681f3Smrg * TGL_PIPE_FLOAT pipeline is applicable. On XeHP+ platforms there are two 11127ec681f3Smrg * additional asynchronous ALU pipelines (which still execute instructions 11137ec681f3Smrg * in-order and use the RegDist synchronization mechanism). TGL_PIPE_NONE 11147ec681f3Smrg * doesn't provide any RegDist pipeline synchronization information and allows 11157ec681f3Smrg * the hardware to infer the pipeline based on the source types of the 11167ec681f3Smrg * instruction. TGL_PIPE_ALL can be used when synchronization with all ALU 11177ec681f3Smrg * pipelines is intended. 11187ec681f3Smrg */ 11197ec681f3Smrgenum tgl_pipe { 11207ec681f3Smrg TGL_PIPE_NONE = 0, 11217ec681f3Smrg TGL_PIPE_FLOAT, 11227ec681f3Smrg TGL_PIPE_INT, 11237ec681f3Smrg TGL_PIPE_LONG, 11247ec681f3Smrg TGL_PIPE_ALL 11257ec681f3Smrg}; 11267ec681f3Smrg 11277ec681f3Smrg/** 11287ec681f3Smrg * Logical representation of the SWSB scheduling information of a hardware 11297ec681f3Smrg * instruction. The binary representation is slightly more compact. 11307ec681f3Smrg */ 11317ec681f3Smrgstruct tgl_swsb { 11327ec681f3Smrg unsigned regdist : 3; 11337ec681f3Smrg enum tgl_pipe pipe : 3; 11347ec681f3Smrg unsigned sbid : 4; 11357ec681f3Smrg enum tgl_sbid_mode mode : 3; 11367ec681f3Smrg}; 11377ec681f3Smrg 11387ec681f3Smrg/** 11397ec681f3Smrg * Construct a scheduling annotation with a single RegDist dependency. This 11407ec681f3Smrg * synchronizes with the completion of the d-th previous in-order instruction. 11417ec681f3Smrg * The index is one-based, zero causes a no-op tgl_swsb to be constructed. 11427ec681f3Smrg */ 11437ec681f3Smrgstatic inline struct tgl_swsb 11447ec681f3Smrgtgl_swsb_regdist(unsigned d) 11457ec681f3Smrg{ 11467ec681f3Smrg const struct tgl_swsb swsb = { d, d ? TGL_PIPE_ALL : TGL_PIPE_NONE }; 11477ec681f3Smrg assert(swsb.regdist == d); 11487ec681f3Smrg return swsb; 11497ec681f3Smrg} 11507ec681f3Smrg 11517ec681f3Smrg/** 11527ec681f3Smrg * Construct a scheduling annotation that synchronizes with the specified SBID 11537ec681f3Smrg * token. 11547ec681f3Smrg */ 11557ec681f3Smrgstatic inline struct tgl_swsb 11567ec681f3Smrgtgl_swsb_sbid(enum tgl_sbid_mode mode, unsigned sbid) 11577ec681f3Smrg{ 11587ec681f3Smrg const struct tgl_swsb swsb = { 0, TGL_PIPE_NONE, sbid, mode }; 11597ec681f3Smrg assert(swsb.sbid == sbid); 11607ec681f3Smrg return swsb; 11617ec681f3Smrg} 11627ec681f3Smrg 11637ec681f3Smrg/** 11647ec681f3Smrg * Construct a no-op scheduling annotation. 11657ec681f3Smrg */ 11667ec681f3Smrgstatic inline struct tgl_swsb 11677ec681f3Smrgtgl_swsb_null(void) 11687ec681f3Smrg{ 11697ec681f3Smrg return tgl_swsb_regdist(0); 11707ec681f3Smrg} 11717ec681f3Smrg 11727ec681f3Smrg/** 11737ec681f3Smrg * Return a scheduling annotation that allocates the same SBID synchronization 11747ec681f3Smrg * token as \p swsb. In addition it will synchronize against a previous 11757ec681f3Smrg * in-order instruction if \p regdist is non-zero. 11767ec681f3Smrg */ 11777ec681f3Smrgstatic inline struct tgl_swsb 11787ec681f3Smrgtgl_swsb_dst_dep(struct tgl_swsb swsb, unsigned regdist) 11797ec681f3Smrg{ 11807ec681f3Smrg swsb.regdist = regdist; 11817ec681f3Smrg swsb.mode = swsb.mode & TGL_SBID_SET; 11827ec681f3Smrg swsb.pipe = (regdist ? TGL_PIPE_ALL : TGL_PIPE_NONE); 11837ec681f3Smrg return swsb; 11847ec681f3Smrg} 11857ec681f3Smrg 11867ec681f3Smrg/** 11877ec681f3Smrg * Return a scheduling annotation that synchronizes against the same SBID and 11887ec681f3Smrg * RegDist dependencies as \p swsb, but doesn't allocate any SBID token. 11897ec681f3Smrg */ 11907ec681f3Smrgstatic inline struct tgl_swsb 11917ec681f3Smrgtgl_swsb_src_dep(struct tgl_swsb swsb) 11927ec681f3Smrg{ 11937ec681f3Smrg swsb.mode = swsb.mode & (TGL_SBID_SRC | TGL_SBID_DST); 11947ec681f3Smrg return swsb; 11957ec681f3Smrg} 11967ec681f3Smrg 11977ec681f3Smrg/** 11987ec681f3Smrg * Convert the provided tgl_swsb to the hardware's binary representation of an 11997ec681f3Smrg * SWSB annotation. 12007ec681f3Smrg */ 12017ec681f3Smrgstatic inline uint8_t 12027ec681f3Smrgtgl_swsb_encode(const struct intel_device_info *devinfo, struct tgl_swsb swsb) 12037ec681f3Smrg{ 12047ec681f3Smrg if (!swsb.mode) { 12057ec681f3Smrg const unsigned pipe = devinfo->verx10 < 125 ? 0 : 12067ec681f3Smrg swsb.pipe == TGL_PIPE_FLOAT ? 0x10 : 12077ec681f3Smrg swsb.pipe == TGL_PIPE_INT ? 0x18 : 12087ec681f3Smrg swsb.pipe == TGL_PIPE_LONG ? 0x50 : 12097ec681f3Smrg swsb.pipe == TGL_PIPE_ALL ? 0x8 : 0; 12107ec681f3Smrg return pipe | swsb.regdist; 12117ec681f3Smrg } else if (swsb.regdist) { 12127ec681f3Smrg return 0x80 | swsb.regdist << 4 | swsb.sbid; 12137ec681f3Smrg } else { 12147ec681f3Smrg return swsb.sbid | (swsb.mode & TGL_SBID_SET ? 0x40 : 12157ec681f3Smrg swsb.mode & TGL_SBID_DST ? 0x20 : 0x30); 12167ec681f3Smrg } 12177ec681f3Smrg} 12187ec681f3Smrg 12197ec681f3Smrg/** 12207ec681f3Smrg * Convert the provided binary representation of an SWSB annotation to a 12217ec681f3Smrg * tgl_swsb. 12227ec681f3Smrg */ 12237ec681f3Smrgstatic inline struct tgl_swsb 12247ec681f3Smrgtgl_swsb_decode(const struct intel_device_info *devinfo, const enum opcode opcode, 12257ec681f3Smrg const uint8_t x) 12267ec681f3Smrg{ 12277ec681f3Smrg if (x & 0x80) { 12287ec681f3Smrg const struct tgl_swsb swsb = { (x & 0x70u) >> 4, TGL_PIPE_NONE, 12297ec681f3Smrg x & 0xfu, 12307ec681f3Smrg (opcode == BRW_OPCODE_SEND || 12317ec681f3Smrg opcode == BRW_OPCODE_SENDC || 12327ec681f3Smrg opcode == BRW_OPCODE_MATH) ? 12337ec681f3Smrg TGL_SBID_SET : TGL_SBID_DST }; 12347ec681f3Smrg return swsb; 12357ec681f3Smrg } else if ((x & 0x70) == 0x20) { 12367ec681f3Smrg return tgl_swsb_sbid(TGL_SBID_DST, x & 0xfu); 12377ec681f3Smrg } else if ((x & 0x70) == 0x30) { 12387ec681f3Smrg return tgl_swsb_sbid(TGL_SBID_SRC, x & 0xfu); 12397ec681f3Smrg } else if ((x & 0x70) == 0x40) { 12407ec681f3Smrg return tgl_swsb_sbid(TGL_SBID_SET, x & 0xfu); 12417ec681f3Smrg } else { 12427ec681f3Smrg const struct tgl_swsb swsb = { x & 0x7u, 12437ec681f3Smrg ((x & 0x78) == 0x10 ? TGL_PIPE_FLOAT : 12447ec681f3Smrg (x & 0x78) == 0x18 ? TGL_PIPE_INT : 12457ec681f3Smrg (x & 0x78) == 0x50 ? TGL_PIPE_LONG : 12467ec681f3Smrg (x & 0x78) == 0x8 ? TGL_PIPE_ALL : 12477ec681f3Smrg TGL_PIPE_NONE) }; 12487ec681f3Smrg assert(devinfo->verx10 >= 125 || swsb.pipe == TGL_PIPE_NONE); 12497ec681f3Smrg return swsb; 12507ec681f3Smrg } 12517ec681f3Smrg} 12527ec681f3Smrg 12537ec681f3Smrgenum tgl_sync_function { 12547ec681f3Smrg TGL_SYNC_NOP = 0x0, 12557ec681f3Smrg TGL_SYNC_ALLRD = 0x2, 12567ec681f3Smrg TGL_SYNC_ALLWR = 0x3, 12577ec681f3Smrg TGL_SYNC_BAR = 0xe, 12587ec681f3Smrg TGL_SYNC_HOST = 0xf 12597ec681f3Smrg}; 12607ec681f3Smrg 126101e04c3fSmrg/** 126201e04c3fSmrg * Message target: Shared Function ID for where to SEND a message. 126301e04c3fSmrg * 126401e04c3fSmrg * These are enumerated in the ISA reference under "send - Send Message". 126501e04c3fSmrg * In particular, see the following tables: 126601e04c3fSmrg * - G45 PRM, Volume 4, Table 14-15 "Message Descriptor Definition" 126701e04c3fSmrg * - Sandybridge PRM, Volume 4 Part 2, Table 8-16 "Extended Message Descriptor" 126801e04c3fSmrg * - Ivybridge PRM, Volume 1 Part 1, section 3.2.7 "GPE Function IDs" 126901e04c3fSmrg */ 127001e04c3fSmrgenum brw_message_target { 127101e04c3fSmrg BRW_SFID_NULL = 0, 12727ec681f3Smrg BRW_SFID_MATH = 1, /* Only valid on Gfx4-5 */ 127301e04c3fSmrg BRW_SFID_SAMPLER = 2, 127401e04c3fSmrg BRW_SFID_MESSAGE_GATEWAY = 3, 127501e04c3fSmrg BRW_SFID_DATAPORT_READ = 4, 127601e04c3fSmrg BRW_SFID_DATAPORT_WRITE = 5, 127701e04c3fSmrg BRW_SFID_URB = 6, 127801e04c3fSmrg BRW_SFID_THREAD_SPAWNER = 7, 127901e04c3fSmrg BRW_SFID_VME = 8, 128001e04c3fSmrg 12817ec681f3Smrg GFX6_SFID_DATAPORT_SAMPLER_CACHE = 4, 12827ec681f3Smrg GFX6_SFID_DATAPORT_RENDER_CACHE = 5, 12837ec681f3Smrg GFX6_SFID_DATAPORT_CONSTANT_CACHE = 9, 128401e04c3fSmrg 12857ec681f3Smrg GFX7_SFID_DATAPORT_DATA_CACHE = 10, 12867ec681f3Smrg GFX7_SFID_PIXEL_INTERPOLATOR = 11, 128701e04c3fSmrg HSW_SFID_DATAPORT_DATA_CACHE_1 = 12, 128801e04c3fSmrg HSW_SFID_CRE = 13, 12897ec681f3Smrg 12907ec681f3Smrg GFX12_SFID_TGM = 13, /* Typed Global Memory */ 12917ec681f3Smrg GFX12_SFID_SLM = 14, /* Shared Local Memory */ 12927ec681f3Smrg GFX12_SFID_UGM = 15, /* Untyped Global Memory */ 12937ec681f3Smrg 12947ec681f3Smrg GEN_RT_SFID_BINDLESS_THREAD_DISPATCH = 7, 12957ec681f3Smrg GEN_RT_SFID_RAY_TRACE_ACCELERATOR = 8, 129601e04c3fSmrg}; 129701e04c3fSmrg 12987ec681f3Smrg#define GFX7_MESSAGE_TARGET_DP_DATA_CACHE 10 129901e04c3fSmrg 130001e04c3fSmrg#define BRW_SAMPLER_RETURN_FORMAT_FLOAT32 0 130101e04c3fSmrg#define BRW_SAMPLER_RETURN_FORMAT_UINT32 2 130201e04c3fSmrg#define BRW_SAMPLER_RETURN_FORMAT_SINT32 3 130301e04c3fSmrg 130401e04c3fSmrg#define BRW_SAMPLER_MESSAGE_SIMD8_SAMPLE 0 130501e04c3fSmrg#define BRW_SAMPLER_MESSAGE_SIMD16_SAMPLE 0 130601e04c3fSmrg#define BRW_SAMPLER_MESSAGE_SIMD16_SAMPLE_BIAS 0 130701e04c3fSmrg#define BRW_SAMPLER_MESSAGE_SIMD8_KILLPIX 1 130801e04c3fSmrg#define BRW_SAMPLER_MESSAGE_SIMD4X2_SAMPLE_LOD 1 130901e04c3fSmrg#define BRW_SAMPLER_MESSAGE_SIMD16_SAMPLE_LOD 1 131001e04c3fSmrg#define BRW_SAMPLER_MESSAGE_SIMD4X2_SAMPLE_GRADIENTS 2 131101e04c3fSmrg#define BRW_SAMPLER_MESSAGE_SIMD8_SAMPLE_GRADIENTS 2 131201e04c3fSmrg#define BRW_SAMPLER_MESSAGE_SIMD4X2_SAMPLE_COMPARE 0 131301e04c3fSmrg#define BRW_SAMPLER_MESSAGE_SIMD16_SAMPLE_COMPARE 2 131401e04c3fSmrg#define BRW_SAMPLER_MESSAGE_SIMD8_SAMPLE_BIAS_COMPARE 0 131501e04c3fSmrg#define BRW_SAMPLER_MESSAGE_SIMD4X2_SAMPLE_LOD_COMPARE 1 131601e04c3fSmrg#define BRW_SAMPLER_MESSAGE_SIMD8_SAMPLE_LOD_COMPARE 1 131701e04c3fSmrg#define BRW_SAMPLER_MESSAGE_SIMD4X2_RESINFO 2 131801e04c3fSmrg#define BRW_SAMPLER_MESSAGE_SIMD16_RESINFO 2 131901e04c3fSmrg#define BRW_SAMPLER_MESSAGE_SIMD4X2_LD 3 132001e04c3fSmrg#define BRW_SAMPLER_MESSAGE_SIMD8_LD 3 132101e04c3fSmrg#define BRW_SAMPLER_MESSAGE_SIMD16_LD 3 132201e04c3fSmrg 13237ec681f3Smrg#define GFX5_SAMPLER_MESSAGE_SAMPLE 0 13247ec681f3Smrg#define GFX5_SAMPLER_MESSAGE_SAMPLE_BIAS 1 13257ec681f3Smrg#define GFX5_SAMPLER_MESSAGE_SAMPLE_LOD 2 13267ec681f3Smrg#define GFX5_SAMPLER_MESSAGE_SAMPLE_COMPARE 3 13277ec681f3Smrg#define GFX5_SAMPLER_MESSAGE_SAMPLE_DERIVS 4 13287ec681f3Smrg#define GFX5_SAMPLER_MESSAGE_SAMPLE_BIAS_COMPARE 5 13297ec681f3Smrg#define GFX5_SAMPLER_MESSAGE_SAMPLE_LOD_COMPARE 6 13307ec681f3Smrg#define GFX5_SAMPLER_MESSAGE_SAMPLE_LD 7 13317ec681f3Smrg#define GFX7_SAMPLER_MESSAGE_SAMPLE_GATHER4 8 13327ec681f3Smrg#define GFX5_SAMPLER_MESSAGE_LOD 9 13337ec681f3Smrg#define GFX5_SAMPLER_MESSAGE_SAMPLE_RESINFO 10 13347ec681f3Smrg#define GFX6_SAMPLER_MESSAGE_SAMPLE_SAMPLEINFO 11 13357ec681f3Smrg#define GFX7_SAMPLER_MESSAGE_SAMPLE_GATHER4_C 16 13367ec681f3Smrg#define GFX7_SAMPLER_MESSAGE_SAMPLE_GATHER4_PO 17 13377ec681f3Smrg#define GFX7_SAMPLER_MESSAGE_SAMPLE_GATHER4_PO_C 18 133801e04c3fSmrg#define HSW_SAMPLER_MESSAGE_SAMPLE_DERIV_COMPARE 20 13397ec681f3Smrg#define GFX9_SAMPLER_MESSAGE_SAMPLE_LZ 24 13407ec681f3Smrg#define GFX9_SAMPLER_MESSAGE_SAMPLE_C_LZ 25 13417ec681f3Smrg#define GFX9_SAMPLER_MESSAGE_SAMPLE_LD_LZ 26 13427ec681f3Smrg#define GFX9_SAMPLER_MESSAGE_SAMPLE_LD2DMS_W 28 13437ec681f3Smrg#define GFX7_SAMPLER_MESSAGE_SAMPLE_LD_MCS 29 13447ec681f3Smrg#define GFX7_SAMPLER_MESSAGE_SAMPLE_LD2DMS 30 13457ec681f3Smrg#define GFX7_SAMPLER_MESSAGE_SAMPLE_LD2DSS 31 13467ec681f3Smrg 13477ec681f3Smrg/* for GFX5 only */ 134801e04c3fSmrg#define BRW_SAMPLER_SIMD_MODE_SIMD4X2 0 134901e04c3fSmrg#define BRW_SAMPLER_SIMD_MODE_SIMD8 1 135001e04c3fSmrg#define BRW_SAMPLER_SIMD_MODE_SIMD16 2 135101e04c3fSmrg#define BRW_SAMPLER_SIMD_MODE_SIMD32_64 3 135201e04c3fSmrg 13537ec681f3Smrg/* GFX9 changes SIMD mode 0 to mean SIMD8D, but lets us get the SIMD4x2 135401e04c3fSmrg * behavior by setting bit 22 of dword 2 in the message header. */ 13557ec681f3Smrg#define GFX9_SAMPLER_SIMD_MODE_SIMD8D 0 13567ec681f3Smrg#define GFX9_SAMPLER_SIMD_MODE_EXTENSION_SIMD4X2 (1 << 22) 135701e04c3fSmrg 135801e04c3fSmrg#define BRW_DATAPORT_OWORD_BLOCK_1_OWORDLOW 0 135901e04c3fSmrg#define BRW_DATAPORT_OWORD_BLOCK_1_OWORDHIGH 1 136001e04c3fSmrg#define BRW_DATAPORT_OWORD_BLOCK_2_OWORDS 2 136101e04c3fSmrg#define BRW_DATAPORT_OWORD_BLOCK_4_OWORDS 3 136201e04c3fSmrg#define BRW_DATAPORT_OWORD_BLOCK_8_OWORDS 4 13637ec681f3Smrg#define GFX12_DATAPORT_OWORD_BLOCK_16_OWORDS 5 13647ec681f3Smrg#define BRW_DATAPORT_OWORD_BLOCK_OWORDS(n) \ 13657ec681f3Smrg ((n) == 1 ? BRW_DATAPORT_OWORD_BLOCK_1_OWORDLOW : \ 13667ec681f3Smrg (n) == 2 ? BRW_DATAPORT_OWORD_BLOCK_2_OWORDS : \ 13677ec681f3Smrg (n) == 4 ? BRW_DATAPORT_OWORD_BLOCK_4_OWORDS : \ 13687ec681f3Smrg (n) == 8 ? BRW_DATAPORT_OWORD_BLOCK_8_OWORDS : \ 13697ec681f3Smrg (n) == 16 ? GFX12_DATAPORT_OWORD_BLOCK_16_OWORDS : \ 13707ec681f3Smrg (abort(), ~0)) 137101e04c3fSmrg#define BRW_DATAPORT_OWORD_BLOCK_DWORDS(n) \ 137201e04c3fSmrg ((n) == 4 ? BRW_DATAPORT_OWORD_BLOCK_1_OWORDLOW : \ 137301e04c3fSmrg (n) == 8 ? BRW_DATAPORT_OWORD_BLOCK_2_OWORDS : \ 137401e04c3fSmrg (n) == 16 ? BRW_DATAPORT_OWORD_BLOCK_4_OWORDS : \ 137501e04c3fSmrg (n) == 32 ? BRW_DATAPORT_OWORD_BLOCK_8_OWORDS : \ 137601e04c3fSmrg (abort(), ~0)) 137701e04c3fSmrg 137801e04c3fSmrg#define BRW_DATAPORT_OWORD_DUAL_BLOCK_1OWORD 0 137901e04c3fSmrg#define BRW_DATAPORT_OWORD_DUAL_BLOCK_4OWORDS 2 138001e04c3fSmrg 138101e04c3fSmrg#define BRW_DATAPORT_DWORD_SCATTERED_BLOCK_8DWORDS 2 138201e04c3fSmrg#define BRW_DATAPORT_DWORD_SCATTERED_BLOCK_16DWORDS 3 138301e04c3fSmrg 138401e04c3fSmrg/* This one stays the same across generations. */ 138501e04c3fSmrg#define BRW_DATAPORT_READ_MESSAGE_OWORD_BLOCK_READ 0 13867ec681f3Smrg/* GFX4 */ 138701e04c3fSmrg#define BRW_DATAPORT_READ_MESSAGE_OWORD_DUAL_BLOCK_READ 1 138801e04c3fSmrg#define BRW_DATAPORT_READ_MESSAGE_MEDIA_BLOCK_READ 2 138901e04c3fSmrg#define BRW_DATAPORT_READ_MESSAGE_DWORD_SCATTERED_READ 3 13907ec681f3Smrg/* G45, GFX5 */ 139101e04c3fSmrg#define G45_DATAPORT_READ_MESSAGE_RENDER_UNORM_READ 1 139201e04c3fSmrg#define G45_DATAPORT_READ_MESSAGE_OWORD_DUAL_BLOCK_READ 2 139301e04c3fSmrg#define G45_DATAPORT_READ_MESSAGE_AVC_LOOP_FILTER_READ 3 139401e04c3fSmrg#define G45_DATAPORT_READ_MESSAGE_MEDIA_BLOCK_READ 4 139501e04c3fSmrg#define G45_DATAPORT_READ_MESSAGE_DWORD_SCATTERED_READ 6 13967ec681f3Smrg/* GFX6 */ 13977ec681f3Smrg#define GFX6_DATAPORT_READ_MESSAGE_RENDER_UNORM_READ 1 13987ec681f3Smrg#define GFX6_DATAPORT_READ_MESSAGE_OWORD_DUAL_BLOCK_READ 2 13997ec681f3Smrg#define GFX6_DATAPORT_READ_MESSAGE_MEDIA_BLOCK_READ 4 14007ec681f3Smrg#define GFX6_DATAPORT_READ_MESSAGE_OWORD_UNALIGN_BLOCK_READ 5 14017ec681f3Smrg#define GFX6_DATAPORT_READ_MESSAGE_DWORD_SCATTERED_READ 6 140201e04c3fSmrg 140301e04c3fSmrg#define BRW_DATAPORT_READ_TARGET_DATA_CACHE 0 140401e04c3fSmrg#define BRW_DATAPORT_READ_TARGET_RENDER_CACHE 1 140501e04c3fSmrg#define BRW_DATAPORT_READ_TARGET_SAMPLER_CACHE 2 140601e04c3fSmrg 140701e04c3fSmrg#define BRW_DATAPORT_RENDER_TARGET_WRITE_SIMD16_SINGLE_SOURCE 0 140801e04c3fSmrg#define BRW_DATAPORT_RENDER_TARGET_WRITE_SIMD16_SINGLE_SOURCE_REPLICATED 1 140901e04c3fSmrg#define BRW_DATAPORT_RENDER_TARGET_WRITE_SIMD8_DUAL_SOURCE_SUBSPAN01 2 141001e04c3fSmrg#define BRW_DATAPORT_RENDER_TARGET_WRITE_SIMD8_DUAL_SOURCE_SUBSPAN23 3 141101e04c3fSmrg#define BRW_DATAPORT_RENDER_TARGET_WRITE_SIMD8_SINGLE_SOURCE_SUBSPAN01 4 141201e04c3fSmrg 141301e04c3fSmrg#define BRW_DATAPORT_WRITE_MESSAGE_OWORD_BLOCK_WRITE 0 141401e04c3fSmrg#define BRW_DATAPORT_WRITE_MESSAGE_OWORD_DUAL_BLOCK_WRITE 1 141501e04c3fSmrg#define BRW_DATAPORT_WRITE_MESSAGE_MEDIA_BLOCK_WRITE 2 141601e04c3fSmrg#define BRW_DATAPORT_WRITE_MESSAGE_DWORD_SCATTERED_WRITE 3 141701e04c3fSmrg#define BRW_DATAPORT_WRITE_MESSAGE_RENDER_TARGET_WRITE 4 141801e04c3fSmrg#define BRW_DATAPORT_WRITE_MESSAGE_STREAMED_VERTEX_BUFFER_WRITE 5 141901e04c3fSmrg#define BRW_DATAPORT_WRITE_MESSAGE_FLUSH_RENDER_CACHE 7 142001e04c3fSmrg 14217ec681f3Smrg/* GFX6 */ 14227ec681f3Smrg#define GFX6_DATAPORT_WRITE_MESSAGE_DWORD_ATOMIC_WRITE 7 14237ec681f3Smrg#define GFX6_DATAPORT_WRITE_MESSAGE_OWORD_BLOCK_WRITE 8 14247ec681f3Smrg#define GFX6_DATAPORT_WRITE_MESSAGE_OWORD_DUAL_BLOCK_WRITE 9 14257ec681f3Smrg#define GFX6_DATAPORT_WRITE_MESSAGE_MEDIA_BLOCK_WRITE 10 14267ec681f3Smrg#define GFX6_DATAPORT_WRITE_MESSAGE_DWORD_SCATTERED_WRITE 11 14277ec681f3Smrg#define GFX6_DATAPORT_WRITE_MESSAGE_RENDER_TARGET_WRITE 12 14287ec681f3Smrg#define GFX6_DATAPORT_WRITE_MESSAGE_STREAMED_VB_WRITE 13 14297ec681f3Smrg#define GFX6_DATAPORT_WRITE_MESSAGE_RENDER_TARGET_UNORM_WRITE 14 14307ec681f3Smrg 14317ec681f3Smrg/* GFX7 */ 14327ec681f3Smrg#define GFX7_DATAPORT_RC_MEDIA_BLOCK_READ 4 14337ec681f3Smrg#define GFX7_DATAPORT_RC_TYPED_SURFACE_READ 5 14347ec681f3Smrg#define GFX7_DATAPORT_RC_TYPED_ATOMIC_OP 6 14357ec681f3Smrg#define GFX7_DATAPORT_RC_MEMORY_FENCE 7 14367ec681f3Smrg#define GFX7_DATAPORT_RC_MEDIA_BLOCK_WRITE 10 14377ec681f3Smrg#define GFX7_DATAPORT_RC_RENDER_TARGET_WRITE 12 14387ec681f3Smrg#define GFX7_DATAPORT_RC_TYPED_SURFACE_WRITE 13 14397ec681f3Smrg#define GFX7_DATAPORT_DC_OWORD_BLOCK_READ 0 14407ec681f3Smrg#define GFX7_DATAPORT_DC_UNALIGNED_OWORD_BLOCK_READ 1 14417ec681f3Smrg#define GFX7_DATAPORT_DC_OWORD_DUAL_BLOCK_READ 2 14427ec681f3Smrg#define GFX7_DATAPORT_DC_DWORD_SCATTERED_READ 3 14437ec681f3Smrg#define GFX7_DATAPORT_DC_BYTE_SCATTERED_READ 4 14447ec681f3Smrg#define GFX7_DATAPORT_DC_UNTYPED_SURFACE_READ 5 14457ec681f3Smrg#define GFX7_DATAPORT_DC_UNTYPED_ATOMIC_OP 6 14467ec681f3Smrg#define GFX7_DATAPORT_DC_MEMORY_FENCE 7 14477ec681f3Smrg#define GFX7_DATAPORT_DC_OWORD_BLOCK_WRITE 8 14487ec681f3Smrg#define GFX7_DATAPORT_DC_OWORD_DUAL_BLOCK_WRITE 10 14497ec681f3Smrg#define GFX7_DATAPORT_DC_DWORD_SCATTERED_WRITE 11 14507ec681f3Smrg#define GFX7_DATAPORT_DC_BYTE_SCATTERED_WRITE 12 14517ec681f3Smrg#define GFX7_DATAPORT_DC_UNTYPED_SURFACE_WRITE 13 14527ec681f3Smrg 14537ec681f3Smrg#define GFX7_DATAPORT_SCRATCH_READ ((1 << 18) | \ 145401e04c3fSmrg (0 << 17)) 14557ec681f3Smrg#define GFX7_DATAPORT_SCRATCH_WRITE ((1 << 18) | \ 145601e04c3fSmrg (1 << 17)) 14577ec681f3Smrg#define GFX7_DATAPORT_SCRATCH_NUM_REGS_SHIFT 12 145801e04c3fSmrg 14597ec681f3Smrg#define GFX7_PIXEL_INTERPOLATOR_LOC_SHARED_OFFSET 0 14607ec681f3Smrg#define GFX7_PIXEL_INTERPOLATOR_LOC_SAMPLE 1 14617ec681f3Smrg#define GFX7_PIXEL_INTERPOLATOR_LOC_CENTROID 2 14627ec681f3Smrg#define GFX7_PIXEL_INTERPOLATOR_LOC_PER_SLOT_OFFSET 3 146301e04c3fSmrg 146401e04c3fSmrg/* HSW */ 146501e04c3fSmrg#define HSW_DATAPORT_DC_PORT0_OWORD_BLOCK_READ 0 146601e04c3fSmrg#define HSW_DATAPORT_DC_PORT0_UNALIGNED_OWORD_BLOCK_READ 1 146701e04c3fSmrg#define HSW_DATAPORT_DC_PORT0_OWORD_DUAL_BLOCK_READ 2 146801e04c3fSmrg#define HSW_DATAPORT_DC_PORT0_DWORD_SCATTERED_READ 3 146901e04c3fSmrg#define HSW_DATAPORT_DC_PORT0_BYTE_SCATTERED_READ 4 147001e04c3fSmrg#define HSW_DATAPORT_DC_PORT0_MEMORY_FENCE 7 147101e04c3fSmrg#define HSW_DATAPORT_DC_PORT0_OWORD_BLOCK_WRITE 8 147201e04c3fSmrg#define HSW_DATAPORT_DC_PORT0_OWORD_DUAL_BLOCK_WRITE 10 147301e04c3fSmrg#define HSW_DATAPORT_DC_PORT0_DWORD_SCATTERED_WRITE 11 147401e04c3fSmrg#define HSW_DATAPORT_DC_PORT0_BYTE_SCATTERED_WRITE 12 147501e04c3fSmrg 147601e04c3fSmrg#define HSW_DATAPORT_DC_PORT1_UNTYPED_SURFACE_READ 1 147701e04c3fSmrg#define HSW_DATAPORT_DC_PORT1_UNTYPED_ATOMIC_OP 2 147801e04c3fSmrg#define HSW_DATAPORT_DC_PORT1_UNTYPED_ATOMIC_OP_SIMD4X2 3 147901e04c3fSmrg#define HSW_DATAPORT_DC_PORT1_MEDIA_BLOCK_READ 4 148001e04c3fSmrg#define HSW_DATAPORT_DC_PORT1_TYPED_SURFACE_READ 5 148101e04c3fSmrg#define HSW_DATAPORT_DC_PORT1_TYPED_ATOMIC_OP 6 148201e04c3fSmrg#define HSW_DATAPORT_DC_PORT1_TYPED_ATOMIC_OP_SIMD4X2 7 148301e04c3fSmrg#define HSW_DATAPORT_DC_PORT1_UNTYPED_SURFACE_WRITE 9 148401e04c3fSmrg#define HSW_DATAPORT_DC_PORT1_MEDIA_BLOCK_WRITE 10 148501e04c3fSmrg#define HSW_DATAPORT_DC_PORT1_ATOMIC_COUNTER_OP 11 148601e04c3fSmrg#define HSW_DATAPORT_DC_PORT1_ATOMIC_COUNTER_OP_SIMD4X2 12 148701e04c3fSmrg#define HSW_DATAPORT_DC_PORT1_TYPED_SURFACE_WRITE 13 14887ec681f3Smrg#define GFX9_DATAPORT_DC_PORT1_A64_SCATTERED_READ 0x10 14897ec681f3Smrg#define GFX8_DATAPORT_DC_PORT1_A64_UNTYPED_SURFACE_READ 0x11 14907ec681f3Smrg#define GFX8_DATAPORT_DC_PORT1_A64_UNTYPED_ATOMIC_OP 0x12 14917ec681f3Smrg#define GFX12_DATAPORT_DC_PORT1_A64_UNTYPED_ATOMIC_HALF_INT_OP 0x13 14927ec681f3Smrg#define GFX9_DATAPORT_DC_PORT1_A64_OWORD_BLOCK_READ 0x14 14937ec681f3Smrg#define GFX9_DATAPORT_DC_PORT1_A64_OWORD_BLOCK_WRITE 0x15 14947ec681f3Smrg#define GFX8_DATAPORT_DC_PORT1_A64_UNTYPED_SURFACE_WRITE 0x19 14957ec681f3Smrg#define GFX8_DATAPORT_DC_PORT1_A64_SCATTERED_WRITE 0x1a 14967ec681f3Smrg#define GFX9_DATAPORT_DC_PORT1_UNTYPED_ATOMIC_FLOAT_OP 0x1b 14977ec681f3Smrg#define GFX9_DATAPORT_DC_PORT1_A64_UNTYPED_ATOMIC_FLOAT_OP 0x1d 14987ec681f3Smrg#define GFX12_DATAPORT_DC_PORT1_A64_UNTYPED_ATOMIC_HALF_FLOAT_OP 0x1e 14997ec681f3Smrg 15007ec681f3Smrg/* GFX9 */ 15017ec681f3Smrg#define GFX9_DATAPORT_RC_RENDER_TARGET_WRITE 12 15027ec681f3Smrg#define GFX9_DATAPORT_RC_RENDER_TARGET_READ 13 150301e04c3fSmrg 15049f464c52Smaya/* A64 scattered message subtype */ 15057ec681f3Smrg#define GFX8_A64_SCATTERED_SUBTYPE_BYTE 0 15067ec681f3Smrg#define GFX8_A64_SCATTERED_SUBTYPE_DWORD 1 15077ec681f3Smrg#define GFX8_A64_SCATTERED_SUBTYPE_QWORD 2 15087ec681f3Smrg#define GFX8_A64_SCATTERED_SUBTYPE_HWORD 3 15099f464c52Smaya 151001e04c3fSmrg/* Dataport special binding table indices: */ 151101e04c3fSmrg#define BRW_BTI_STATELESS 255 15127ec681f3Smrg#define GFX7_BTI_SLM 254 15137ec681f3Smrg 15147ec681f3Smrg#define HSW_BTI_STATELESS_LOCALLY_COHERENT 255 15157ec681f3Smrg#define HSW_BTI_STATELESS_NON_COHERENT 253 15167ec681f3Smrg#define HSW_BTI_STATELESS_GLOBALLY_COHERENT 252 15177ec681f3Smrg#define HSW_BTI_STATELESS_LLC_COHERENT 251 15187ec681f3Smrg#define HSW_BTI_STATELESS_L3_UNCACHED 250 15197ec681f3Smrg 15207ec681f3Smrg/* The hardware docs are a bit contradictory here. On Haswell, where they 15217ec681f3Smrg * first added cache ability control, there were 5 different cache modes (see 15227ec681f3Smrg * HSW_BTI_STATELESS_* above). On Broadwell, they reduced to two: 15237ec681f3Smrg * 15247ec681f3Smrg * - IA-Coherent (BTI=255): Coherent within Gen and coherent within the 15257ec681f3Smrg * entire IA cache memory hierarchy. 15267ec681f3Smrg * 15277ec681f3Smrg * - Non-Coherent (BTI=253): Coherent within Gen, same cache type. 15287ec681f3Smrg * 15297ec681f3Smrg * Information about stateless cache coherency can be found in the "A32 15307ec681f3Smrg * Stateless" section of the "3D Media GPGPU" volume of the PRM for each 15317ec681f3Smrg * hardware generation. 15327ec681f3Smrg * 15337ec681f3Smrg * Unfortunately, the docs for MDC_STATELESS appear to have been copied and 15347ec681f3Smrg * pasted from Haswell and give the Haswell definitions for the BTI values of 15357ec681f3Smrg * 255 and 253 including a warning about accessing 253 surfaces from multiple 15367ec681f3Smrg * threads. This seems to be a copy+paste error and the definitions from the 15377ec681f3Smrg * "A32 Stateless" section should be trusted instead. 15387ec681f3Smrg * 15397ec681f3Smrg * Note that because the DRM sets bit 4 of HDC_CHICKEN0 on BDW, CHV and at 15407ec681f3Smrg * least some pre-production steppings of SKL due to WaForceEnableNonCoherent, 15417ec681f3Smrg * HDC memory access may have been overridden by the kernel to be non-coherent 15427ec681f3Smrg * (matching the behavior of the same BTI on pre-Gfx8 hardware) and BTI 255 15437ec681f3Smrg * may actually be an alias for BTI 253. 154401e04c3fSmrg */ 15457ec681f3Smrg#define GFX8_BTI_STATELESS_IA_COHERENT 255 15467ec681f3Smrg#define GFX8_BTI_STATELESS_NON_COHERENT 253 15477ec681f3Smrg#define GFX9_BTI_BINDLESS 252 154801e04c3fSmrg 154901e04c3fSmrg/* Dataport atomic operations for Untyped Atomic Integer Operation message 155001e04c3fSmrg * (and others). 155101e04c3fSmrg */ 155201e04c3fSmrg#define BRW_AOP_AND 1 155301e04c3fSmrg#define BRW_AOP_OR 2 155401e04c3fSmrg#define BRW_AOP_XOR 3 155501e04c3fSmrg#define BRW_AOP_MOV 4 155601e04c3fSmrg#define BRW_AOP_INC 5 155701e04c3fSmrg#define BRW_AOP_DEC 6 155801e04c3fSmrg#define BRW_AOP_ADD 7 155901e04c3fSmrg#define BRW_AOP_SUB 8 156001e04c3fSmrg#define BRW_AOP_REVSUB 9 156101e04c3fSmrg#define BRW_AOP_IMAX 10 156201e04c3fSmrg#define BRW_AOP_IMIN 11 156301e04c3fSmrg#define BRW_AOP_UMAX 12 156401e04c3fSmrg#define BRW_AOP_UMIN 13 156501e04c3fSmrg#define BRW_AOP_CMPWR 14 156601e04c3fSmrg#define BRW_AOP_PREDEC 15 156701e04c3fSmrg 156801e04c3fSmrg/* Dataport atomic operations for Untyped Atomic Float Operation message. */ 156901e04c3fSmrg#define BRW_AOP_FMAX 1 157001e04c3fSmrg#define BRW_AOP_FMIN 2 157101e04c3fSmrg#define BRW_AOP_FCMPWR 3 15727ec681f3Smrg#define BRW_AOP_FADD 4 157301e04c3fSmrg 157401e04c3fSmrg#define BRW_MATH_FUNCTION_INV 1 157501e04c3fSmrg#define BRW_MATH_FUNCTION_LOG 2 157601e04c3fSmrg#define BRW_MATH_FUNCTION_EXP 3 157701e04c3fSmrg#define BRW_MATH_FUNCTION_SQRT 4 157801e04c3fSmrg#define BRW_MATH_FUNCTION_RSQ 5 157901e04c3fSmrg#define BRW_MATH_FUNCTION_SIN 6 158001e04c3fSmrg#define BRW_MATH_FUNCTION_COS 7 15817ec681f3Smrg#define BRW_MATH_FUNCTION_SINCOS 8 /* gfx4, gfx5 */ 15827ec681f3Smrg#define BRW_MATH_FUNCTION_FDIV 9 /* gfx6+ */ 158301e04c3fSmrg#define BRW_MATH_FUNCTION_POW 10 158401e04c3fSmrg#define BRW_MATH_FUNCTION_INT_DIV_QUOTIENT_AND_REMAINDER 11 158501e04c3fSmrg#define BRW_MATH_FUNCTION_INT_DIV_QUOTIENT 12 158601e04c3fSmrg#define BRW_MATH_FUNCTION_INT_DIV_REMAINDER 13 15877ec681f3Smrg#define GFX8_MATH_FUNCTION_INVM 14 15887ec681f3Smrg#define GFX8_MATH_FUNCTION_RSQRTM 15 158901e04c3fSmrg 159001e04c3fSmrg#define BRW_MATH_INTEGER_UNSIGNED 0 159101e04c3fSmrg#define BRW_MATH_INTEGER_SIGNED 1 159201e04c3fSmrg 159301e04c3fSmrg#define BRW_MATH_PRECISION_FULL 0 159401e04c3fSmrg#define BRW_MATH_PRECISION_PARTIAL 1 159501e04c3fSmrg 159601e04c3fSmrg#define BRW_MATH_SATURATE_NONE 0 159701e04c3fSmrg#define BRW_MATH_SATURATE_SATURATE 1 159801e04c3fSmrg 159901e04c3fSmrg#define BRW_MATH_DATA_VECTOR 0 160001e04c3fSmrg#define BRW_MATH_DATA_SCALAR 1 160101e04c3fSmrg 160201e04c3fSmrg#define BRW_URB_OPCODE_WRITE_HWORD 0 160301e04c3fSmrg#define BRW_URB_OPCODE_WRITE_OWORD 1 160401e04c3fSmrg#define BRW_URB_OPCODE_READ_HWORD 2 160501e04c3fSmrg#define BRW_URB_OPCODE_READ_OWORD 3 16067ec681f3Smrg#define GFX7_URB_OPCODE_ATOMIC_MOV 4 16077ec681f3Smrg#define GFX7_URB_OPCODE_ATOMIC_INC 5 16087ec681f3Smrg#define GFX8_URB_OPCODE_ATOMIC_ADD 6 16097ec681f3Smrg#define GFX8_URB_OPCODE_SIMD8_WRITE 7 16107ec681f3Smrg#define GFX8_URB_OPCODE_SIMD8_READ 8 16117ec681f3Smrg#define GFX125_URB_OPCODE_FENCE 9 161201e04c3fSmrg 161301e04c3fSmrg#define BRW_URB_SWIZZLE_NONE 0 161401e04c3fSmrg#define BRW_URB_SWIZZLE_INTERLEAVE 1 161501e04c3fSmrg#define BRW_URB_SWIZZLE_TRANSPOSE 2 161601e04c3fSmrg 161701e04c3fSmrg#define BRW_SCRATCH_SPACE_SIZE_1K 0 161801e04c3fSmrg#define BRW_SCRATCH_SPACE_SIZE_2K 1 161901e04c3fSmrg#define BRW_SCRATCH_SPACE_SIZE_4K 2 162001e04c3fSmrg#define BRW_SCRATCH_SPACE_SIZE_8K 3 162101e04c3fSmrg#define BRW_SCRATCH_SPACE_SIZE_16K 4 162201e04c3fSmrg#define BRW_SCRATCH_SPACE_SIZE_32K 5 162301e04c3fSmrg#define BRW_SCRATCH_SPACE_SIZE_64K 6 162401e04c3fSmrg#define BRW_SCRATCH_SPACE_SIZE_128K 7 162501e04c3fSmrg#define BRW_SCRATCH_SPACE_SIZE_256K 8 162601e04c3fSmrg#define BRW_SCRATCH_SPACE_SIZE_512K 9 162701e04c3fSmrg#define BRW_SCRATCH_SPACE_SIZE_1M 10 162801e04c3fSmrg#define BRW_SCRATCH_SPACE_SIZE_2M 11 162901e04c3fSmrg 163001e04c3fSmrg#define BRW_MESSAGE_GATEWAY_SFID_OPEN_GATEWAY 0 163101e04c3fSmrg#define BRW_MESSAGE_GATEWAY_SFID_CLOSE_GATEWAY 1 163201e04c3fSmrg#define BRW_MESSAGE_GATEWAY_SFID_FORWARD_MSG 2 163301e04c3fSmrg#define BRW_MESSAGE_GATEWAY_SFID_GET_TIMESTAMP 3 163401e04c3fSmrg#define BRW_MESSAGE_GATEWAY_SFID_BARRIER_MSG 4 163501e04c3fSmrg#define BRW_MESSAGE_GATEWAY_SFID_UPDATE_GATEWAY_STATE 5 163601e04c3fSmrg#define BRW_MESSAGE_GATEWAY_SFID_MMIO_READ_WRITE 6 163701e04c3fSmrg 163801e04c3fSmrg 16397ec681f3Smrg/* Gfx7 "GS URB Entry Allocation Size" is a U9-1 field, so the maximum gs_size 164001e04c3fSmrg * is 2^9, or 512. It's counted in multiples of 64 bytes. 164101e04c3fSmrg * 164201e04c3fSmrg * Identical for VS, DS, and HS. 164301e04c3fSmrg */ 16447ec681f3Smrg#define GFX7_MAX_GS_URB_ENTRY_SIZE_BYTES (512*64) 16457ec681f3Smrg#define GFX7_MAX_DS_URB_ENTRY_SIZE_BYTES (512*64) 16467ec681f3Smrg#define GFX7_MAX_HS_URB_ENTRY_SIZE_BYTES (512*64) 16477ec681f3Smrg#define GFX7_MAX_VS_URB_ENTRY_SIZE_BYTES (512*64) 16487ec681f3Smrg 16497ec681f3Smrg#define BRW_GS_EDGE_INDICATOR_0 (1 << 8) 16507ec681f3Smrg#define BRW_GS_EDGE_INDICATOR_1 (1 << 9) 165101e04c3fSmrg 16527ec681f3Smrg/* Gfx6 "GS URB Entry Allocation Size" is defined as a number of 1024-bit 165301e04c3fSmrg * (128 bytes) URB rows and the maximum allowed value is 5 rows. 165401e04c3fSmrg */ 16557ec681f3Smrg#define GFX6_MAX_GS_URB_ENTRY_SIZE_BYTES (5*128) 165601e04c3fSmrg 165701e04c3fSmrg/* GS Thread Payload 165801e04c3fSmrg */ 165901e04c3fSmrg 166001e04c3fSmrg/* 3DSTATE_GS "Output Vertex Size" has an effective maximum of 62. It's 166101e04c3fSmrg * counted in multiples of 16 bytes. 166201e04c3fSmrg */ 16637ec681f3Smrg#define GFX7_MAX_GS_OUTPUT_VERTEX_SIZE_BYTES (62*16) 166401e04c3fSmrg 166501e04c3fSmrg 166601e04c3fSmrg/* R0 */ 16677ec681f3Smrg# define GFX7_GS_PAYLOAD_INSTANCE_ID_SHIFT 27 166801e04c3fSmrg 166901e04c3fSmrg/* CR0.0[5:4] Floating-Point Rounding Modes 167001e04c3fSmrg * Skylake PRM, Volume 7 Part 1, "Control Register", page 756 167101e04c3fSmrg */ 167201e04c3fSmrg 167301e04c3fSmrg#define BRW_CR0_RND_MODE_MASK 0x30 167401e04c3fSmrg#define BRW_CR0_RND_MODE_SHIFT 4 167501e04c3fSmrg 167601e04c3fSmrgenum PACKED brw_rnd_mode { 167701e04c3fSmrg BRW_RND_MODE_RTNE = 0, /* Round to Nearest or Even */ 167801e04c3fSmrg BRW_RND_MODE_RU = 1, /* Round Up, toward +inf */ 167901e04c3fSmrg BRW_RND_MODE_RD = 2, /* Round Down, toward -inf */ 168001e04c3fSmrg BRW_RND_MODE_RTZ = 3, /* Round Toward Zero */ 168101e04c3fSmrg BRW_RND_MODE_UNSPECIFIED, /* Unspecified rounding mode */ 168201e04c3fSmrg}; 168301e04c3fSmrg 16847ec681f3Smrg#define BRW_CR0_FP64_DENORM_PRESERVE (1 << 6) 16857ec681f3Smrg#define BRW_CR0_FP32_DENORM_PRESERVE (1 << 7) 16867ec681f3Smrg#define BRW_CR0_FP16_DENORM_PRESERVE (1 << 10) 16877ec681f3Smrg 16887ec681f3Smrg#define BRW_CR0_FP_MODE_MASK (BRW_CR0_FP64_DENORM_PRESERVE | \ 16897ec681f3Smrg BRW_CR0_FP32_DENORM_PRESERVE | \ 16907ec681f3Smrg BRW_CR0_FP16_DENORM_PRESERVE | \ 16917ec681f3Smrg BRW_CR0_RND_MODE_MASK) 16927ec681f3Smrg 169301e04c3fSmrg/* MDC_DS - Data Size Message Descriptor Control Field 169401e04c3fSmrg * Skylake PRM, Volume 2d, page 129 169501e04c3fSmrg * 169601e04c3fSmrg * Specifies the number of Bytes to be read or written per Dword used at 169701e04c3fSmrg * byte_scattered read/write and byte_scaled read/write messages. 169801e04c3fSmrg */ 16997ec681f3Smrg#define GFX7_BYTE_SCATTERED_DATA_ELEMENT_BYTE 0 17007ec681f3Smrg#define GFX7_BYTE_SCATTERED_DATA_ELEMENT_WORD 1 17017ec681f3Smrg#define GFX7_BYTE_SCATTERED_DATA_ELEMENT_DWORD 2 17027ec681f3Smrg 17037ec681f3Smrg#define GEN_RT_BTD_MESSAGE_SPAWN 1 17047ec681f3Smrg 17057ec681f3Smrg#define GEN_RT_TRACE_RAY_INITAL 0 17067ec681f3Smrg#define GEN_RT_TRACE_RAY_INSTANCE 1 17077ec681f3Smrg#define GEN_RT_TRACE_RAY_COMMIT 2 17087ec681f3Smrg#define GEN_RT_TRACE_RAY_CONTINUE 3 17097ec681f3Smrg 17107ec681f3Smrg#define GEN_RT_BTD_SHADER_TYPE_ANY_HIT 0 17117ec681f3Smrg#define GEN_RT_BTD_SHADER_TYPE_CLOSEST_HIT 1 17127ec681f3Smrg#define GEN_RT_BTD_SHADER_TYPE_MISS 2 17137ec681f3Smrg#define GEN_RT_BTD_SHADER_TYPE_INTERSECTION 3 17147ec681f3Smrg 17157ec681f3Smrg/* Starting with Xe-HPG, the old dataport was massively reworked dataport. 17167ec681f3Smrg * The new thing, called Load/Store Cache or LSC, has a significantly improved 17177ec681f3Smrg * interface. Instead of bespoke messages for every case, there's basically 17187ec681f3Smrg * one or two messages with different bits to control things like address 17197ec681f3Smrg * size, how much data is read/written, etc. It's way nicer but also means we 17207ec681f3Smrg * get to rewrite all our dataport encoding/decoding code. This patch kicks 17217ec681f3Smrg * off the party with all of the new enums. 17227ec681f3Smrg */ 17237ec681f3Smrgenum lsc_opcode { 17247ec681f3Smrg LSC_OP_LOAD = 0, 17257ec681f3Smrg LSC_OP_LOAD_CMASK = 2, 17267ec681f3Smrg LSC_OP_STORE = 4, 17277ec681f3Smrg LSC_OP_STORE_CMASK = 6, 17287ec681f3Smrg LSC_OP_ATOMIC_INC = 8, 17297ec681f3Smrg LSC_OP_ATOMIC_DEC = 9, 17307ec681f3Smrg LSC_OP_ATOMIC_LOAD = 10, 17317ec681f3Smrg LSC_OP_ATOMIC_STORE = 11, 17327ec681f3Smrg LSC_OP_ATOMIC_ADD = 12, 17337ec681f3Smrg LSC_OP_ATOMIC_SUB = 13, 17347ec681f3Smrg LSC_OP_ATOMIC_MIN = 14, 17357ec681f3Smrg LSC_OP_ATOMIC_MAX = 15, 17367ec681f3Smrg LSC_OP_ATOMIC_UMIN = 16, 17377ec681f3Smrg LSC_OP_ATOMIC_UMAX = 17, 17387ec681f3Smrg LSC_OP_ATOMIC_CMPXCHG = 18, 17397ec681f3Smrg LSC_OP_ATOMIC_FADD = 19, 17407ec681f3Smrg LSC_OP_ATOMIC_FSUB = 20, 17417ec681f3Smrg LSC_OP_ATOMIC_FMIN = 21, 17427ec681f3Smrg LSC_OP_ATOMIC_FMAX = 22, 17437ec681f3Smrg LSC_OP_ATOMIC_FCMPXCHG = 23, 17447ec681f3Smrg LSC_OP_ATOMIC_AND = 24, 17457ec681f3Smrg LSC_OP_ATOMIC_OR = 25, 17467ec681f3Smrg LSC_OP_ATOMIC_XOR = 26, 17477ec681f3Smrg LSC_OP_FENCE = 31 17487ec681f3Smrg}; 17497ec681f3Smrg 17507ec681f3Smrg/* 17517ec681f3Smrg * Specifies the size of the dataport address payload in registers. 17527ec681f3Smrg */ 17537ec681f3Smrgenum PACKED lsc_addr_reg_size { 17547ec681f3Smrg LSC_ADDR_REG_SIZE_1 = 1, 17557ec681f3Smrg LSC_ADDR_REG_SIZE_2 = 2, 17567ec681f3Smrg LSC_ADDR_REG_SIZE_3 = 3, 17577ec681f3Smrg LSC_ADDR_REG_SIZE_4 = 4, 17587ec681f3Smrg LSC_ADDR_REG_SIZE_6 = 6, 17597ec681f3Smrg LSC_ADDR_REG_SIZE_8 = 8, 17607ec681f3Smrg}; 17617ec681f3Smrg 17627ec681f3Smrg/* 17637ec681f3Smrg * Specifies the size of the address payload item in a dataport message. 17647ec681f3Smrg */ 17657ec681f3Smrgenum PACKED lsc_addr_size { 17667ec681f3Smrg LSC_ADDR_SIZE_A16 = 1, /* 16-bit address offset */ 17677ec681f3Smrg LSC_ADDR_SIZE_A32 = 2, /* 32-bit address offset */ 17687ec681f3Smrg LSC_ADDR_SIZE_A64 = 3, /* 64-bit address offset */ 17697ec681f3Smrg}; 17707ec681f3Smrg 17717ec681f3Smrg/* 17727ec681f3Smrg * Specifies the type of the address payload item in a dataport message. The 17737ec681f3Smrg * address type specifies how the dataport message decodes the Extended 17747ec681f3Smrg * Descriptor for the surface attributes and address calculation. 17757ec681f3Smrg */ 17767ec681f3Smrgenum PACKED lsc_addr_surface_type { 17777ec681f3Smrg LSC_ADDR_SURFTYPE_FLAT = 0, /* Flat */ 17787ec681f3Smrg LSC_ADDR_SURFTYPE_BSS = 1, /* Bindless surface state */ 17797ec681f3Smrg LSC_ADDR_SURFTYPE_SS = 2, /* Surface state */ 17807ec681f3Smrg LSC_ADDR_SURFTYPE_BTI = 3, /* Binding table index */ 17817ec681f3Smrg}; 17827ec681f3Smrg 17837ec681f3Smrg/* 17847ec681f3Smrg * Specifies the dataport message override to the default L1 and L3 memory 17857ec681f3Smrg * cache policies. Dataport L1 cache policies are uncached (UC), cached (C), 17867ec681f3Smrg * cache streaming (S) and invalidate-after-read (IAR). Dataport L3 cache 17877ec681f3Smrg * policies are uncached (UC) and cached (C). 17887ec681f3Smrg */ 17897ec681f3Smrgenum lsc_cache_load { 17907ec681f3Smrg /* No override. Use the non-pipelined state or surface state cache settings 17917ec681f3Smrg * for L1 and L3. 17927ec681f3Smrg */ 17937ec681f3Smrg LSC_CACHE_LOAD_L1STATE_L3MOCS = 0, 17947ec681f3Smrg /* Override to L1 uncached and L3 uncached */ 17957ec681f3Smrg LSC_CACHE_LOAD_L1UC_L3UC = 1, 17967ec681f3Smrg /* Override to L1 uncached and L3 cached */ 17977ec681f3Smrg LSC_CACHE_LOAD_L1UC_L3C = 2, 17987ec681f3Smrg /* Override to L1 cached and L3 uncached */ 17997ec681f3Smrg LSC_CACHE_LOAD_L1C_L3UC = 3, 18007ec681f3Smrg /* Override to cache at both L1 and L3 */ 18017ec681f3Smrg LSC_CACHE_LOAD_L1C_L3C = 4, 18027ec681f3Smrg /* Override to L1 streaming load and L3 uncached */ 18037ec681f3Smrg LSC_CACHE_LOAD_L1S_L3UC = 5, 18047ec681f3Smrg /* Override to L1 streaming load and L3 cached */ 18057ec681f3Smrg LSC_CACHE_LOAD_L1S_L3C = 6, 18067ec681f3Smrg /* For load messages, override to L1 invalidate-after-read, and L3 cached. */ 18077ec681f3Smrg LSC_CACHE_LOAD_L1IAR_L3C = 7, 18087ec681f3Smrg}; 18097ec681f3Smrg 18107ec681f3Smrg/* 18117ec681f3Smrg * Specifies the dataport message override to the default L1 and L3 memory 18127ec681f3Smrg * cache policies. Dataport L1 cache policies are uncached (UC), write-through 18137ec681f3Smrg * (WT), write-back (WB) and streaming (S). Dataport L3 cache policies are 18147ec681f3Smrg * uncached (UC) and cached (WB). 18157ec681f3Smrg */ 18167ec681f3Smrgenum PACKED lsc_cache_store { 18177ec681f3Smrg /* No override. Use the non-pipelined or surface state cache settings for L1 18187ec681f3Smrg * and L3. 18197ec681f3Smrg */ 18207ec681f3Smrg LSC_CACHE_STORE_L1STATE_L3MOCS = 0, 18217ec681f3Smrg /* Override to L1 uncached and L3 uncached */ 18227ec681f3Smrg LSC_CACHE_STORE_L1UC_L3UC = 1, 18237ec681f3Smrg /* Override to L1 uncached and L3 cached */ 18247ec681f3Smrg LSC_CACHE_STORE_L1UC_L3WB = 2, 18257ec681f3Smrg /* Override to L1 write-through and L3 uncached */ 18267ec681f3Smrg LSC_CACHE_STORE_L1WT_L3UC = 3, 18277ec681f3Smrg /* Override to L1 write-through and L3 cached */ 18287ec681f3Smrg LSC_CACHE_STORE_L1WT_L3WB = 4, 18297ec681f3Smrg /* Override to L1 streaming and L3 uncached */ 18307ec681f3Smrg LSC_CACHE_STORE_L1S_L3UC = 5, 18317ec681f3Smrg /* Override to L1 streaming and L3 cached */ 18327ec681f3Smrg LSC_CACHE_STORE_L1S_L3WB = 6, 18337ec681f3Smrg /* Override to L1 write-back, and L3 cached */ 18347ec681f3Smrg LSC_CACHE_STORE_L1WB_L3WB = 7, 18357ec681f3Smrg 18367ec681f3Smrg}; 18377ec681f3Smrg 18387ec681f3Smrg/* 18397ec681f3Smrg * Specifies which components of the data payload 4-element vector (X,Y,Z,W) is 18407ec681f3Smrg * packed into the register payload. 18417ec681f3Smrg */ 18427ec681f3Smrgenum PACKED lsc_cmask { 18437ec681f3Smrg LSC_CMASK_X = 0x1, 18447ec681f3Smrg LSC_CMASK_Y = 0x2, 18457ec681f3Smrg LSC_CMASK_XY = 0x3, 18467ec681f3Smrg LSC_CMASK_Z = 0x4, 18477ec681f3Smrg LSC_CMASK_XZ = 0x5, 18487ec681f3Smrg LSC_CMASK_YZ = 0x6, 18497ec681f3Smrg LSC_CMASK_XYZ = 0x7, 18507ec681f3Smrg LSC_CMASK_W = 0x8, 18517ec681f3Smrg LSC_CMASK_XW = 0x9, 18527ec681f3Smrg LSC_CMASK_YW = 0xa, 18537ec681f3Smrg LSC_CMASK_XYW = 0xb, 18547ec681f3Smrg LSC_CMASK_ZW = 0xc, 18557ec681f3Smrg LSC_CMASK_XZW = 0xd, 18567ec681f3Smrg LSC_CMASK_YZW = 0xe, 18577ec681f3Smrg LSC_CMASK_XYZW = 0xf, 18587ec681f3Smrg}; 18597ec681f3Smrg 18607ec681f3Smrg/* 18617ec681f3Smrg * Specifies the size of the data payload item in a dataport message. 18627ec681f3Smrg */ 18637ec681f3Smrgenum PACKED lsc_data_size { 18647ec681f3Smrg /* 8-bit scalar data value in memory, packed into a 8-bit data value in 18657ec681f3Smrg * register. 18667ec681f3Smrg */ 18677ec681f3Smrg LSC_DATA_SIZE_D8 = 0, 18687ec681f3Smrg /* 16-bit scalar data value in memory, packed into a 16-bit data value in 18697ec681f3Smrg * register. 18707ec681f3Smrg */ 18717ec681f3Smrg LSC_DATA_SIZE_D16 = 1, 18727ec681f3Smrg /* 32-bit scalar data value in memory, packed into 32-bit data value in 18737ec681f3Smrg * register. 18747ec681f3Smrg */ 18757ec681f3Smrg LSC_DATA_SIZE_D32 = 2, 18767ec681f3Smrg /* 64-bit scalar data value in memory, packed into 64-bit data value in 18777ec681f3Smrg * register. 18787ec681f3Smrg */ 18797ec681f3Smrg LSC_DATA_SIZE_D64 = 3, 18807ec681f3Smrg /* 8-bit scalar data value in memory, packed into 32-bit unsigned data value 18817ec681f3Smrg * in register. 18827ec681f3Smrg */ 18837ec681f3Smrg LSC_DATA_SIZE_D8U32 = 4, 18847ec681f3Smrg /* 16-bit scalar data value in memory, packed into 32-bit unsigned data 18857ec681f3Smrg * value in register. 18867ec681f3Smrg */ 18877ec681f3Smrg LSC_DATA_SIZE_D16U32 = 5, 18887ec681f3Smrg /* 16-bit scalar BigFloat data value in memory, packed into 32-bit float 18897ec681f3Smrg * value in register. 18907ec681f3Smrg */ 18917ec681f3Smrg LSC_DATA_SIZE_D16BF32 = 6, 18927ec681f3Smrg}; 18937ec681f3Smrg 18947ec681f3Smrg/* 18957ec681f3Smrg * Enum specifies the scope of the fence. 18967ec681f3Smrg */ 18977ec681f3Smrgenum PACKED lsc_fence_scope { 18987ec681f3Smrg /* Wait until all previous memory transactions from this thread are observed 18997ec681f3Smrg * within the local thread-group. 19007ec681f3Smrg */ 19017ec681f3Smrg LSC_FENCE_THREADGROUP = 0, 19027ec681f3Smrg /* Wait until all previous memory transactions from this thread are observed 19037ec681f3Smrg * within the local sub-slice. 19047ec681f3Smrg */ 19057ec681f3Smrg LSC_FENCE_LOCAL = 1, 19067ec681f3Smrg /* Wait until all previous memory transactions from this thread are observed 19077ec681f3Smrg * in the local tile. 19087ec681f3Smrg */ 19097ec681f3Smrg LSC_FENCE_TILE = 2, 19107ec681f3Smrg /* Wait until all previous memory transactions from this thread are observed 19117ec681f3Smrg * in the local GPU. 19127ec681f3Smrg */ 19137ec681f3Smrg LSC_FENCE_GPU = 3, 19147ec681f3Smrg /* Wait until all previous memory transactions from this thread are observed 19157ec681f3Smrg * across all GPUs in the system. 19167ec681f3Smrg */ 19177ec681f3Smrg LSC_FENCE_ALL_GPU = 4, 19187ec681f3Smrg /* Wait until all previous memory transactions from this thread are observed 19197ec681f3Smrg * at the "system" level. 19207ec681f3Smrg */ 19217ec681f3Smrg LSC_FENCE_SYSTEM_RELEASE = 5, 19227ec681f3Smrg /* For GPUs that do not follow PCIe Write ordering for downstream writes 19237ec681f3Smrg * targeting device memory, a fence message with scope=System_Acquire will 19247ec681f3Smrg * commit to device memory all downstream and peer writes that have reached 19257ec681f3Smrg * the device. 19267ec681f3Smrg */ 19277ec681f3Smrg LSC_FENCE_SYSTEM_ACQUIRE = 6, 19287ec681f3Smrg}; 19297ec681f3Smrg 19307ec681f3Smrg/* 19317ec681f3Smrg * Specifies the type of cache flush operation to perform after a fence is 19327ec681f3Smrg * complete. 19337ec681f3Smrg */ 19347ec681f3Smrgenum PACKED lsc_flush_type { 19357ec681f3Smrg LSC_FLUSH_TYPE_NONE = 0, 19367ec681f3Smrg /* 19377ec681f3Smrg * For a R/W cache, evict dirty lines (M to I state) and invalidate clean 19387ec681f3Smrg * lines. For a RO cache, invalidate clean lines. 19397ec681f3Smrg */ 19407ec681f3Smrg LSC_FLUSH_TYPE_EVICT = 1, 19417ec681f3Smrg /* 19427ec681f3Smrg * For both R/W and RO cache, invalidate clean lines in the cache. 19437ec681f3Smrg */ 19447ec681f3Smrg LSC_FLUSH_TYPE_INVALIDATE = 2, 19457ec681f3Smrg /* 19467ec681f3Smrg * For a R/W cache, invalidate dirty lines (M to I state), without 19477ec681f3Smrg * write-back to next level. This opcode does nothing for a RO cache. 19487ec681f3Smrg */ 19497ec681f3Smrg LSC_FLUSH_TYPE_DISCARD = 3, 19507ec681f3Smrg /* 19517ec681f3Smrg * For a R/W cache, write-back dirty lines to the next level, but kept in 19527ec681f3Smrg * the cache as "clean" (M to V state). This opcode does nothing for a RO 19537ec681f3Smrg * cache. 19547ec681f3Smrg */ 19557ec681f3Smrg LSC_FLUSH_TYPE_CLEAN = 4, 19567ec681f3Smrg /* 19577ec681f3Smrg * Flush "RW" section of the L3 cache, but leave L1 and L2 caches untouched. 19587ec681f3Smrg */ 19597ec681f3Smrg LSC_FLUSH_TYPE_L3ONLY = 5, 19607ec681f3Smrg}; 19617ec681f3Smrg 19627ec681f3Smrgenum PACKED lsc_backup_fence_routing { 19637ec681f3Smrg /* Normal routing: UGM fence is routed to UGM pipeline. */ 19647ec681f3Smrg LSC_NORMAL_ROUTING, 19657ec681f3Smrg /* Route UGM fence to LSC unit. */ 19667ec681f3Smrg LSC_ROUTE_TO_LSC, 19677ec681f3Smrg}; 19687ec681f3Smrg 19697ec681f3Smrg/* 19707ec681f3Smrg * Specifies the size of the vector in a dataport message. 19717ec681f3Smrg */ 19727ec681f3Smrgenum PACKED lsc_vect_size { 19737ec681f3Smrg LSC_VECT_SIZE_V1 = 0, /* vector length 1 */ 19747ec681f3Smrg LSC_VECT_SIZE_V2 = 1, /* vector length 2 */ 19757ec681f3Smrg LSC_VECT_SIZE_V3 = 2, /* Vector length 3 */ 19767ec681f3Smrg LSC_VECT_SIZE_V4 = 3, /* Vector length 4 */ 19777ec681f3Smrg LSC_VECT_SIZE_V8 = 4, /* Vector length 8 */ 19787ec681f3Smrg LSC_VECT_SIZE_V16 = 5, /* Vector length 16 */ 19797ec681f3Smrg LSC_VECT_SIZE_V32 = 6, /* Vector length 32 */ 19807ec681f3Smrg LSC_VECT_SIZE_V64 = 7, /* Vector length 64 */ 19817ec681f3Smrg}; 19827ec681f3Smrg 19837ec681f3Smrg#define LSC_ONE_ADDR_REG 1 198401e04c3fSmrg 198501e04c3fSmrg#endif /* BRW_EU_DEFINES_H */ 1986