1/* 2 Copyright (C) Intel Corp. 2006. All Rights Reserved. 3 Intel funded Tungsten Graphics (http://www.tungstengraphics.com) to 4 develop this 3D driver. 5 6 Permission is hereby granted, free of charge, to any person obtaining 7 a copy of this software and associated documentation files (the 8 "Software"), to deal in the Software without restriction, including 9 without limitation the rights to use, copy, modify, merge, publish, 10 distribute, sublicense, and/or sell copies of the Software, and to 11 permit persons to whom the Software is furnished to do so, subject to 12 the following conditions: 13 14 The above copyright notice and this permission notice (including the 15 next paragraph) shall be included in all copies or substantial 16 portions of the Software. 17 18 THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, 19 EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF 20 MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. 21 IN NO EVENT SHALL THE COPYRIGHT OWNER(S) AND/OR ITS SUPPLIERS BE 22 LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION 23 OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION 24 WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE. 25 26 **********************************************************************/ 27/* 28 * Authors: 29 * Keith Whitwell <keith@tungstengraphics.com> 30 */ 31 32#ifndef BRW_EU_H 33#define BRW_EU_H 34 35#include <stdbool.h> 36#include <stdint.h> 37#include <stdio.h> 38 39#include <assert.h> 40 41#define BRW_SWIZZLE4(a,b,c,d) (((a)<<0) | ((b)<<2) | ((c)<<4) | ((d)<<6)) 42#define BRW_GET_SWZ(swz, idx) (((swz) >> ((idx)*2)) & 0x3) 43 44#define BRW_SWIZZLE_NOOP BRW_SWIZZLE4(0,1,2,3) 45#define BRW_SWIZZLE_XYZW BRW_SWIZZLE4(0,1,2,3) 46#define BRW_SWIZZLE_XXXX BRW_SWIZZLE4(0,0,0,0) 47#define BRW_SWIZZLE_YYYY BRW_SWIZZLE4(1,1,1,1) 48#define BRW_SWIZZLE_ZZZZ BRW_SWIZZLE4(2,2,2,2) 49#define BRW_SWIZZLE_WWWW BRW_SWIZZLE4(3,3,3,3) 50#define BRW_SWIZZLE_XYXY BRW_SWIZZLE4(0,1,0,1) 51 52#define WRITEMASK_X 0x1 53#define WRITEMASK_Y 0x2 54#define WRITEMASK_Z 0x4 55#define WRITEMASK_W 0x8 56 57#define WRITEMASK_XY (WRITEMASK_X | WRITEMASK_Y) 58#define WRITEMASK_XYZ (WRITEMASK_X | WRITEMASK_Y | WRITEMASK_Z) 59#define WRITEMASK_XYZW (WRITEMASK_X | WRITEMASK_Y | WRITEMASK_Z | WRITEMASK_W) 60 61/** Number of general purpose registers (VS, WM, etc) */ 62#define BRW_MAX_GRF 128 63 64/** Number of message register file registers */ 65#define BRW_MAX_MRF 16 66 67 68#define BRW_ALIGN_1 0 69#define BRW_ALIGN_16 1 70 71#define BRW_ADDRESS_DIRECT 0 72#define BRW_ADDRESS_REGISTER_INDIRECT_REGISTER 1 73 74#define BRW_CHANNEL_X 0 75#define BRW_CHANNEL_Y 1 76#define BRW_CHANNEL_Z 2 77#define BRW_CHANNEL_W 3 78 79enum brw_compression { 80 BRW_COMPRESSION_NONE, 81 BRW_COMPRESSION_2NDHALF, 82 BRW_COMPRESSION_COMPRESSED, 83}; 84 85#define GEN6_COMPRESSION_1Q 0 86#define GEN6_COMPRESSION_2Q 1 87#define GEN6_COMPRESSION_3Q 2 88#define GEN6_COMPRESSION_4Q 3 89#define GEN6_COMPRESSION_1H 0 90#define GEN6_COMPRESSION_2H 2 91 92#define BRW_CONDITIONAL_NONE 0 93#define BRW_CONDITIONAL_Z 1 94#define BRW_CONDITIONAL_NZ 2 95#define BRW_CONDITIONAL_EQ 1 /* Z */ 96#define BRW_CONDITIONAL_NEQ 2 /* NZ */ 97#define BRW_CONDITIONAL_G 3 98#define BRW_CONDITIONAL_GE 4 99#define BRW_CONDITIONAL_L 5 100#define BRW_CONDITIONAL_LE 6 101#define BRW_CONDITIONAL_R 7 102#define BRW_CONDITIONAL_O 8 103#define BRW_CONDITIONAL_U 9 104 105#define BRW_DEBUG_NONE 0 106#define BRW_DEBUG_BREAKPOINT 1 107 108#define BRW_DEPENDENCY_NORMAL 0 109#define BRW_DEPENDENCY_NOTCLEARED 1 110#define BRW_DEPENDENCY_NOTCHECKED 2 111#define BRW_DEPENDENCY_DISABLE 3 112 113#define BRW_EXECUTE_1 0 114#define BRW_EXECUTE_2 1 115#define BRW_EXECUTE_4 2 116#define BRW_EXECUTE_8 3 117#define BRW_EXECUTE_16 4 118#define BRW_EXECUTE_32 5 119 120#define BRW_HORIZONTAL_STRIDE_0 0 121#define BRW_HORIZONTAL_STRIDE_1 1 122#define BRW_HORIZONTAL_STRIDE_2 2 123#define BRW_HORIZONTAL_STRIDE_4 3 124 125#define BRW_INSTRUCTION_NORMAL 0 126#define BRW_INSTRUCTION_SATURATE 1 127 128#define BRW_MASK_ENABLE 0 129#define BRW_MASK_DISABLE 1 130 131/** @{ 132 * 133 * Gen6 has replaced "mask enable/disable" with WECtrl, which is 134 * effectively the same but much simpler to think about. Now, there 135 * are two contributors ANDed together to whether channels are 136 * executed: The predication on the instruction, and the channel write 137 * enable. 138 */ 139/** 140 * This is the default value. It means that a channel's write enable is set 141 * if the per-channel IP is pointing at this instruction. 142 */ 143#define BRW_WE_NORMAL 0 144/** 145 * This is used like BRW_MASK_DISABLE, and causes all channels to have 146 * their write enable set. Note that predication still contributes to 147 * whether the channel actually gets written. 148 */ 149#define BRW_WE_ALL 1 150/** @} */ 151 152enum opcode { 153 /* These are the actual hardware opcodes. */ 154 BRW_OPCODE_MOV = 1, 155 BRW_OPCODE_SEL = 2, 156 BRW_OPCODE_NOT = 4, 157 BRW_OPCODE_AND = 5, 158 BRW_OPCODE_OR = 6, 159 BRW_OPCODE_XOR = 7, 160 BRW_OPCODE_SHR = 8, 161 BRW_OPCODE_SHL = 9, 162 BRW_OPCODE_RSR = 10, 163 BRW_OPCODE_RSL = 11, 164 BRW_OPCODE_ASR = 12, 165 BRW_OPCODE_CMP = 16, 166 BRW_OPCODE_CMPN = 17, 167 BRW_OPCODE_JMPI = 32, 168 BRW_OPCODE_IF = 34, 169 BRW_OPCODE_IFF = 35, 170 BRW_OPCODE_ELSE = 36, 171 BRW_OPCODE_ENDIF = 37, 172 BRW_OPCODE_DO = 38, 173 BRW_OPCODE_WHILE = 39, 174 BRW_OPCODE_BREAK = 40, 175 BRW_OPCODE_CONTINUE = 41, 176 BRW_OPCODE_HALT = 42, 177 BRW_OPCODE_MSAVE = 44, 178 BRW_OPCODE_MRESTORE = 45, 179 BRW_OPCODE_PUSH = 46, 180 BRW_OPCODE_POP = 47, 181 BRW_OPCODE_WAIT = 48, 182 BRW_OPCODE_SEND = 49, 183 BRW_OPCODE_SENDC = 50, 184 BRW_OPCODE_MATH = 56, 185 BRW_OPCODE_ADD = 64, 186 BRW_OPCODE_MUL = 65, 187 BRW_OPCODE_AVG = 66, 188 BRW_OPCODE_FRC = 67, 189 BRW_OPCODE_RNDU = 68, 190 BRW_OPCODE_RNDD = 69, 191 BRW_OPCODE_RNDE = 70, 192 BRW_OPCODE_RNDZ = 71, 193 BRW_OPCODE_MAC = 72, 194 BRW_OPCODE_MACH = 73, 195 BRW_OPCODE_LZD = 74, 196 BRW_OPCODE_SAD2 = 80, 197 BRW_OPCODE_SADA2 = 81, 198 BRW_OPCODE_DP4 = 84, 199 BRW_OPCODE_DPH = 85, 200 BRW_OPCODE_DP3 = 86, 201 BRW_OPCODE_DP2 = 87, 202 BRW_OPCODE_DPA2 = 88, 203 BRW_OPCODE_LINE = 89, 204 BRW_OPCODE_PLN = 90, 205 BRW_OPCODE_NOP = 126, 206 207 /* These are compiler backend opcodes that get translated into other 208 * instructions. 209 */ 210 FS_OPCODE_FB_WRITE = 128, 211 SHADER_OPCODE_RCP, 212 SHADER_OPCODE_RSQ, 213 SHADER_OPCODE_SQRT, 214 SHADER_OPCODE_EXP2, 215 SHADER_OPCODE_LOG2, 216 SHADER_OPCODE_POW, 217 SHADER_OPCODE_SIN, 218 SHADER_OPCODE_COS, 219 FS_OPCODE_DDX, 220 FS_OPCODE_DDY, 221 FS_OPCODE_PIXEL_X, 222 FS_OPCODE_PIXEL_Y, 223 FS_OPCODE_CINTERP, 224 FS_OPCODE_LINTERP, 225 FS_OPCODE_TEX, 226 FS_OPCODE_TXB, 227 FS_OPCODE_TXD, 228 FS_OPCODE_TXF, 229 FS_OPCODE_TXL, 230 FS_OPCODE_TXS, 231 FS_OPCODE_DISCARD, 232 FS_OPCODE_SPILL, 233 FS_OPCODE_UNSPILL, 234 FS_OPCODE_PULL_CONSTANT_LOAD, 235 236 VS_OPCODE_URB_WRITE, 237 VS_OPCODE_SCRATCH_READ, 238 VS_OPCODE_SCRATCH_WRITE, 239 VS_OPCODE_PULL_CONSTANT_LOAD, 240}; 241 242#define BRW_PREDICATE_NONE 0 243#define BRW_PREDICATE_NORMAL 1 244#define BRW_PREDICATE_ALIGN1_ANYV 2 245#define BRW_PREDICATE_ALIGN1_ALLV 3 246#define BRW_PREDICATE_ALIGN1_ANY2H 4 247#define BRW_PREDICATE_ALIGN1_ALL2H 5 248#define BRW_PREDICATE_ALIGN1_ANY4H 6 249#define BRW_PREDICATE_ALIGN1_ALL4H 7 250#define BRW_PREDICATE_ALIGN1_ANY8H 8 251#define BRW_PREDICATE_ALIGN1_ALL8H 9 252#define BRW_PREDICATE_ALIGN1_ANY16H 10 253#define BRW_PREDICATE_ALIGN1_ALL16H 11 254#define BRW_PREDICATE_ALIGN16_REPLICATE_X 2 255#define BRW_PREDICATE_ALIGN16_REPLICATE_Y 3 256#define BRW_PREDICATE_ALIGN16_REPLICATE_Z 4 257#define BRW_PREDICATE_ALIGN16_REPLICATE_W 5 258#define BRW_PREDICATE_ALIGN16_ANY4H 6 259#define BRW_PREDICATE_ALIGN16_ALL4H 7 260 261#define BRW_ARCHITECTURE_REGISTER_FILE 0 262#define BRW_GENERAL_REGISTER_FILE 1 263#define BRW_MESSAGE_REGISTER_FILE 2 264#define BRW_IMMEDIATE_VALUE 3 265 266#define BRW_REGISTER_TYPE_UD 0 267#define BRW_REGISTER_TYPE_D 1 268#define BRW_REGISTER_TYPE_UW 2 269#define BRW_REGISTER_TYPE_W 3 270#define BRW_REGISTER_TYPE_UB 4 271#define BRW_REGISTER_TYPE_B 5 272#define BRW_REGISTER_TYPE_VF 5 /* packed float vector, immediates only? */ 273#define BRW_REGISTER_TYPE_HF 6 274#define BRW_REGISTER_TYPE_V 6 /* packed int vector, immediates only, uword dest only */ 275#define BRW_REGISTER_TYPE_F 7 276 277#define BRW_ARF_NULL 0x00 278#define BRW_ARF_ADDRESS 0x10 279#define BRW_ARF_ACCUMULATOR 0x20 280#define BRW_ARF_FLAG 0x30 281#define BRW_ARF_MASK 0x40 282#define BRW_ARF_MASK_STACK 0x50 283#define BRW_ARF_MASK_STACK_DEPTH 0x60 284#define BRW_ARF_STATE 0x70 285#define BRW_ARF_CONTROL 0x80 286#define BRW_ARF_NOTIFICATION_COUNT 0x90 287#define BRW_ARF_IP 0xA0 288 289#define BRW_MRF_COMPR4 (1 << 7) 290 291#define BRW_AMASK 0 292#define BRW_IMASK 1 293#define BRW_LMASK 2 294#define BRW_CMASK 3 295 296#define BRW_THREAD_NORMAL 0 297#define BRW_THREAD_ATOMIC 1 298#define BRW_THREAD_SWITCH 2 299 300#define BRW_VERTICAL_STRIDE_0 0 301#define BRW_VERTICAL_STRIDE_1 1 302#define BRW_VERTICAL_STRIDE_2 2 303#define BRW_VERTICAL_STRIDE_4 3 304#define BRW_VERTICAL_STRIDE_8 4 305#define BRW_VERTICAL_STRIDE_16 5 306#define BRW_VERTICAL_STRIDE_32 6 307#define BRW_VERTICAL_STRIDE_64 7 308#define BRW_VERTICAL_STRIDE_128 8 309#define BRW_VERTICAL_STRIDE_256 9 310#define BRW_VERTICAL_STRIDE_ONE_DIMENSIONAL 0xF 311 312#define BRW_WIDTH_1 0 313#define BRW_WIDTH_2 1 314#define BRW_WIDTH_4 2 315#define BRW_WIDTH_8 3 316#define BRW_WIDTH_16 4 317 318#define BRW_STATELESS_BUFFER_BOUNDARY_1K 0 319#define BRW_STATELESS_BUFFER_BOUNDARY_2K 1 320#define BRW_STATELESS_BUFFER_BOUNDARY_4K 2 321#define BRW_STATELESS_BUFFER_BOUNDARY_8K 3 322#define BRW_STATELESS_BUFFER_BOUNDARY_16K 4 323#define BRW_STATELESS_BUFFER_BOUNDARY_32K 5 324#define BRW_STATELESS_BUFFER_BOUNDARY_64K 6 325#define BRW_STATELESS_BUFFER_BOUNDARY_128K 7 326#define BRW_STATELESS_BUFFER_BOUNDARY_256K 8 327#define BRW_STATELESS_BUFFER_BOUNDARY_512K 9 328#define BRW_STATELESS_BUFFER_BOUNDARY_1M 10 329#define BRW_STATELESS_BUFFER_BOUNDARY_2M 11 330 331#define BRW_POLYGON_FACING_FRONT 0 332#define BRW_POLYGON_FACING_BACK 1 333 334#define BRW_MESSAGE_TARGET_NULL 0 335#define BRW_MESSAGE_TARGET_MATH 1 /* reserved on GEN6 */ 336#define BRW_MESSAGE_TARGET_SAMPLER 2 337#define BRW_MESSAGE_TARGET_GATEWAY 3 338#define BRW_MESSAGE_TARGET_DATAPORT_READ 4 339#define BRW_MESSAGE_TARGET_DATAPORT_WRITE 5 340#define BRW_MESSAGE_TARGET_URB 6 341#define BRW_MESSAGE_TARGET_THREAD_SPAWNER 7 342 343#define GEN6_MESSAGE_TARGET_DP_SAMPLER_CACHE 4 344#define GEN6_MESSAGE_TARGET_DP_RENDER_CACHE 5 345#define GEN6_MESSAGE_TARGET_DP_CONST_CACHE 9 346 347#define BRW_SAMPLER_RETURN_FORMAT_FLOAT32 0 348#define BRW_SAMPLER_RETURN_FORMAT_UINT32 2 349#define BRW_SAMPLER_RETURN_FORMAT_SINT32 3 350 351#define BRW_SAMPLER_MESSAGE_SAMPLE 0 352#define BRW_SAMPLER_MESSAGE_SIMD8_SAMPLE 0 353#define BRW_SAMPLER_MESSAGE_SIMD16_SAMPLE 0 354#define BRW_SAMPLER_MESSAGE_SIMD16_SAMPLE_BIAS 0 355#define BRW_SAMPLER_MESSAGE_SIMD8_KILLPIX 1 356#define BRW_SAMPLER_MESSAGE_SIMD4X2_SAMPLE_LOD 1 357#define BRW_SAMPLER_MESSAGE_SIMD16_SAMPLE_LOD 1 358#define BRW_SAMPLER_MESSAGE_SIMD4X2_SAMPLE_GRADIENTS 2 359#define BRW_SAMPLER_MESSAGE_SIMD8_SAMPLE_GRADIENTS 2 360#define BRW_SAMPLER_MESSAGE_SIMD4X2_SAMPLE_COMPARE 0 361#define BRW_SAMPLER_MESSAGE_SIMD16_SAMPLE_COMPARE 2 362#define BRW_SAMPLER_MESSAGE_SIMD8_SAMPLE_BIAS_COMPARE 0 363#define BRW_SAMPLER_MESSAGE_SIMD8_SAMPLE_LOD_COMPARE 1 364#define BRW_SAMPLER_MESSAGE_SIMD4X2_RESINFO 2 365#define BRW_SAMPLER_MESSAGE_SIMD16_RESINFO 2 366#define BRW_SAMPLER_MESSAGE_SIMD4X2_LD 3 367#define BRW_SAMPLER_MESSAGE_SIMD8_LD 3 368#define BRW_SAMPLER_MESSAGE_SIMD16_LD 3 369 370#define GEN5_SAMPLER_MESSAGE_SAMPLE 0 371#define GEN5_SAMPLER_MESSAGE_SAMPLE_BIAS 1 372#define GEN5_SAMPLER_MESSAGE_SAMPLE_LOD 2 373#define GEN5_SAMPLER_MESSAGE_SAMPLE_COMPARE 3 374#define GEN5_SAMPLER_MESSAGE_SAMPLE_DERIVS 4 375#define GEN5_SAMPLER_MESSAGE_SAMPLE_BIAS_COMPARE 5 376#define GEN5_SAMPLER_MESSAGE_SAMPLE_LOD_COMPARE 6 377#define GEN5_SAMPLER_MESSAGE_SAMPLE_LD 7 378#define GEN5_SAMPLER_MESSAGE_SAMPLE_RESINFO 10 379 380/* for GEN5 only */ 381#define BRW_SAMPLER_SIMD_MODE_SIMD4X2 0 382#define BRW_SAMPLER_SIMD_MODE_SIMD8 1 383#define BRW_SAMPLER_SIMD_MODE_SIMD16 2 384#define BRW_SAMPLER_SIMD_MODE_SIMD32_64 3 385 386#define BRW_DATAPORT_OWORD_BLOCK_1_OWORDLOW 0 387#define BRW_DATAPORT_OWORD_BLOCK_1_OWORDHIGH 1 388#define BRW_DATAPORT_OWORD_BLOCK_2_OWORDS 2 389#define BRW_DATAPORT_OWORD_BLOCK_4_OWORDS 3 390#define BRW_DATAPORT_OWORD_BLOCK_8_OWORDS 4 391 392#define BRW_DATAPORT_OWORD_DUAL_BLOCK_1OWORD 0 393#define BRW_DATAPORT_OWORD_DUAL_BLOCK_4OWORDS 2 394 395#define BRW_DATAPORT_DWORD_SCATTERED_BLOCK_8DWORDS 2 396#define BRW_DATAPORT_DWORD_SCATTERED_BLOCK_16DWORDS 3 397 398/* This one stays the same across generations. */ 399#define BRW_DATAPORT_READ_MESSAGE_OWORD_BLOCK_READ 0 400/* GEN4 */ 401#define BRW_DATAPORT_READ_MESSAGE_OWORD_DUAL_BLOCK_READ 1 402#define BRW_DATAPORT_READ_MESSAGE_MEDIA_BLOCK_READ 2 403#define BRW_DATAPORT_READ_MESSAGE_DWORD_SCATTERED_READ 3 404/* G45, GEN5 */ 405#define G45_DATAPORT_READ_MESSAGE_RENDER_UNORM_READ 1 406#define G45_DATAPORT_READ_MESSAGE_OWORD_DUAL_BLOCK_READ 2 407#define G45_DATAPORT_READ_MESSAGE_AVC_LOOP_FILTER_READ 3 408#define G45_DATAPORT_READ_MESSAGE_MEDIA_BLOCK_READ 4 409#define G45_DATAPORT_READ_MESSAGE_DWORD_SCATTERED_READ 6 410/* GEN6 */ 411#define GEN6_DATAPORT_READ_MESSAGE_RENDER_UNORM_READ 1 412#define GEN6_DATAPORT_READ_MESSAGE_OWORD_DUAL_BLOCK_READ 2 413#define GEN6_DATAPORT_READ_MESSAGE_MEDIA_BLOCK_READ 4 414#define GEN6_DATAPORT_READ_MESSAGE_OWORD_UNALIGN_BLOCK_READ 5 415#define GEN6_DATAPORT_READ_MESSAGE_DWORD_SCATTERED_READ 6 416 417#define BRW_DATAPORT_READ_TARGET_DATA_CACHE 0 418#define BRW_DATAPORT_READ_TARGET_RENDER_CACHE 1 419#define BRW_DATAPORT_READ_TARGET_SAMPLER_CACHE 2 420 421#define BRW_DATAPORT_RENDER_TARGET_WRITE_SIMD16_SINGLE_SOURCE 0 422#define BRW_DATAPORT_RENDER_TARGET_WRITE_SIMD16_SINGLE_SOURCE_REPLICATED 1 423#define BRW_DATAPORT_RENDER_TARGET_WRITE_SIMD8_DUAL_SOURCE_SUBSPAN01 2 424#define BRW_DATAPORT_RENDER_TARGET_WRITE_SIMD8_DUAL_SOURCE_SUBSPAN23 3 425#define BRW_DATAPORT_RENDER_TARGET_WRITE_SIMD8_SINGLE_SOURCE_SUBSPAN01 4 426 427/** 428 * Message target: Shared Function ID for where to SEND a message. 429 * 430 * These are enumerated in the ISA reference under "send - Send Message". 431 * In particular, see the following tables: 432 * - G45 PRM, Volume 4, Table 14-15 "Message Descriptor Definition" 433 * - Sandybridge PRM, Volume 4 Part 2, Table 8-16 "Extended Message Descriptor" 434 * - BSpec, Volume 1a (GPU Overview) / Graphics Processing Engine (GPE) / 435 * Overview / GPE Function IDs 436 */ 437enum brw_message_target { 438 BRW_SFID_NULL = 0, 439 BRW_SFID_MATH = 1, /* Only valid on Gen4-5 */ 440 BRW_SFID_SAMPLER = 2, 441 BRW_SFID_MESSAGE_GATEWAY = 3, 442 BRW_SFID_DATAPORT_READ = 4, 443 BRW_SFID_DATAPORT_WRITE = 5, 444 BRW_SFID_URB = 6, 445 BRW_SFID_THREAD_SPAWNER = 7, 446 447 GEN6_SFID_DATAPORT_SAMPLER_CACHE = 4, 448 GEN6_SFID_DATAPORT_RENDER_CACHE = 5, 449 GEN6_SFID_DATAPORT_CONSTANT_CACHE = 9, 450 451 GEN7_SFID_DATAPORT_DATA_CACHE = 10, 452}; 453 454#define GEN7_MESSAGE_TARGET_DP_DATA_CACHE 10 455 456#define BRW_DATAPORT_WRITE_MESSAGE_OWORD_BLOCK_WRITE 0 457#define BRW_DATAPORT_WRITE_MESSAGE_OWORD_DUAL_BLOCK_WRITE 1 458#define BRW_DATAPORT_WRITE_MESSAGE_MEDIA_BLOCK_WRITE 2 459#define BRW_DATAPORT_WRITE_MESSAGE_DWORD_SCATTERED_WRITE 3 460#define BRW_DATAPORT_WRITE_MESSAGE_RENDER_TARGET_WRITE 4 461#define BRW_DATAPORT_WRITE_MESSAGE_STREAMED_VERTEX_BUFFER_WRITE 5 462#define BRW_DATAPORT_WRITE_MESSAGE_FLUSH_RENDER_CACHE 7 463 464/* GEN6 */ 465#define GEN6_DATAPORT_WRITE_MESSAGE_DWORD_ATOMIC_WRITE 7 466#define GEN6_DATAPORT_WRITE_MESSAGE_OWORD_BLOCK_WRITE 8 467#define GEN6_DATAPORT_WRITE_MESSAGE_OWORD_DUAL_BLOCK_WRITE 9 468#define GEN6_DATAPORT_WRITE_MESSAGE_MEDIA_BLOCK_WRITE 10 469#define GEN6_DATAPORT_WRITE_MESSAGE_DWORD_SCATTERED_WRITE 11 470#define GEN6_DATAPORT_WRITE_MESSAGE_RENDER_TARGET_WRITE 12 471#define GEN6_DATAPORT_WRITE_MESSAGE_STREAMED_VB_WRITE 13 472#define GEN6_DATAPORT_WRITE_MESSAGE_RENDER_TARGET_UNORM_WRITE 14 473 474#define BRW_MATH_FUNCTION_INV 1 475#define BRW_MATH_FUNCTION_LOG 2 476#define BRW_MATH_FUNCTION_EXP 3 477#define BRW_MATH_FUNCTION_SQRT 4 478#define BRW_MATH_FUNCTION_RSQ 5 479#define BRW_MATH_FUNCTION_SIN 6 /* was 7 */ 480#define BRW_MATH_FUNCTION_COS 7 /* was 8 */ 481#define BRW_MATH_FUNCTION_SINCOS 8 /* was 6 */ 482#define BRW_MATH_FUNCTION_TAN 9 /* gen4 */ 483#define BRW_MATH_FUNCTION_FDIV 9 /* gen6+ */ 484#define BRW_MATH_FUNCTION_POW 10 485#define BRW_MATH_FUNCTION_INT_DIV_QUOTIENT_AND_REMAINDER 11 486#define BRW_MATH_FUNCTION_INT_DIV_QUOTIENT 12 487#define BRW_MATH_FUNCTION_INT_DIV_REMAINDER 13 488 489#define BRW_MATH_INTEGER_UNSIGNED 0 490#define BRW_MATH_INTEGER_SIGNED 1 491 492#define BRW_MATH_PRECISION_FULL 0 493#define BRW_MATH_PRECISION_PARTIAL 1 494 495#define BRW_MATH_SATURATE_NONE 0 496#define BRW_MATH_SATURATE_SATURATE 1 497 498#define BRW_MATH_DATA_VECTOR 0 499#define BRW_MATH_DATA_SCALAR 1 500 501#define BRW_URB_OPCODE_WRITE 0 502 503#define BRW_URB_SWIZZLE_NONE 0 504#define BRW_URB_SWIZZLE_INTERLEAVE 1 505#define BRW_URB_SWIZZLE_TRANSPOSE 2 506 507#define BRW_SCRATCH_SPACE_SIZE_1K 0 508#define BRW_SCRATCH_SPACE_SIZE_2K 1 509#define BRW_SCRATCH_SPACE_SIZE_4K 2 510#define BRW_SCRATCH_SPACE_SIZE_8K 3 511#define BRW_SCRATCH_SPACE_SIZE_16K 4 512#define BRW_SCRATCH_SPACE_SIZE_32K 5 513#define BRW_SCRATCH_SPACE_SIZE_64K 6 514#define BRW_SCRATCH_SPACE_SIZE_128K 7 515#define BRW_SCRATCH_SPACE_SIZE_256K 8 516#define BRW_SCRATCH_SPACE_SIZE_512K 9 517#define BRW_SCRATCH_SPACE_SIZE_1M 10 518#define BRW_SCRATCH_SPACE_SIZE_2M 11 519 520#define REG_SIZE (8*4) 521 522struct brw_instruction { 523 struct { 524 unsigned opcode:7; 525 unsigned pad:1; 526 unsigned access_mode:1; 527 unsigned mask_control:1; 528 unsigned dependency_control:2; 529 unsigned compression_control:2; /* gen6: quater control */ 530 unsigned thread_control:2; 531 unsigned predicate_control:4; 532 unsigned predicate_inverse:1; 533 unsigned execution_size:3; 534 /** 535 * Conditional Modifier for most instructions. On Gen6+, this is also 536 * used for the SEND instruction's Message Target/SFID. 537 */ 538 unsigned destreg__conditionalmod:4; 539 unsigned acc_wr_control:1; 540 unsigned cmpt_control:1; 541 unsigned debug_control:1; 542 unsigned saturate:1; 543 } header; 544 545 union { 546 struct { 547 unsigned dest_reg_file:2; 548 unsigned dest_reg_type:3; 549 unsigned src0_reg_file:2; 550 unsigned src0_reg_type:3; 551 unsigned src1_reg_file:2; 552 unsigned src1_reg_type:3; 553 unsigned pad:1; 554 unsigned dest_subreg_nr:5; 555 unsigned dest_reg_nr:8; 556 unsigned dest_horiz_stride:2; 557 unsigned dest_address_mode:1; 558 } da1; 559 560 struct { 561 unsigned dest_reg_file:2; 562 unsigned dest_reg_type:3; 563 unsigned src0_reg_file:2; 564 unsigned src0_reg_type:3; 565 unsigned src1_reg_file:2; /* 0x00000c00 */ 566 unsigned src1_reg_type:3; /* 0x00007000 */ 567 unsigned pad:1; 568 int dest_indirect_offset:10; /* offset against the deref'd address reg */ 569 unsigned dest_subreg_nr:3; /* subnr for the address reg a0.x */ 570 unsigned dest_horiz_stride:2; 571 unsigned dest_address_mode:1; 572 } ia1; 573 574 struct { 575 unsigned dest_reg_file:2; 576 unsigned dest_reg_type:3; 577 unsigned src0_reg_file:2; 578 unsigned src0_reg_type:3; 579 unsigned src1_reg_file:2; 580 unsigned src1_reg_type:3; 581 unsigned pad:1; 582 unsigned dest_writemask:4; 583 unsigned dest_subreg_nr:1; 584 unsigned dest_reg_nr:8; 585 unsigned dest_horiz_stride:2; 586 unsigned dest_address_mode:1; 587 } da16; 588 589 struct { 590 unsigned dest_reg_file:2; 591 unsigned dest_reg_type:3; 592 unsigned src0_reg_file:2; 593 unsigned src0_reg_type:3; 594 unsigned pad0:6; 595 unsigned dest_writemask:4; 596 int dest_indirect_offset:6; 597 unsigned dest_subreg_nr:3; 598 unsigned dest_horiz_stride:2; 599 unsigned dest_address_mode:1; 600 } ia16; 601 602 struct { 603 unsigned dest_reg_file:2; 604 unsigned dest_reg_type:3; 605 unsigned src0_reg_file:2; 606 unsigned src0_reg_type:3; 607 unsigned src1_reg_file:2; 608 unsigned src1_reg_type:3; 609 unsigned pad:1; 610 611 int jump_count:16; 612 } branch_gen6; 613 614 struct { 615 unsigned dest_reg_file:1; 616 unsigned flag_subreg_num:1; 617 unsigned pad0:2; 618 unsigned src0_abs:1; 619 unsigned src0_negate:1; 620 unsigned src1_abs:1; 621 unsigned src1_negate:1; 622 unsigned src2_abs:1; 623 unsigned src2_negate:1; 624 unsigned pad1:7; 625 unsigned dest_writemask:4; 626 unsigned dest_subreg_nr:3; 627 unsigned dest_reg_nr:8; 628 } da3src; 629 } bits1; 630 631 632 union { 633 struct { 634 unsigned src0_subreg_nr:5; 635 unsigned src0_reg_nr:8; 636 unsigned src0_abs:1; 637 unsigned src0_negate:1; 638 unsigned src0_address_mode:1; 639 unsigned src0_horiz_stride:2; 640 unsigned src0_width:3; 641 unsigned src0_vert_stride:4; 642 unsigned flag_subreg_nr:1; 643 unsigned flag_reg_nr:1; 644 unsigned pad:5; 645 } da1; 646 647 struct { 648 int src0_indirect_offset:10; 649 unsigned src0_subreg_nr:3; 650 unsigned src0_abs:1; 651 unsigned src0_negate:1; 652 unsigned src0_address_mode:1; 653 unsigned src0_horiz_stride:2; 654 unsigned src0_width:3; 655 unsigned src0_vert_stride:4; 656 unsigned flag_subreg_nr:1; 657 unsigned flag_reg_nr:1; 658 unsigned pad:5; 659 } ia1; 660 661 struct { 662 unsigned src0_swz_x:2; 663 unsigned src0_swz_y:2; 664 unsigned src0_subreg_nr:1; 665 unsigned src0_reg_nr:8; 666 unsigned src0_abs:1; 667 unsigned src0_negate:1; 668 unsigned src0_address_mode:1; 669 unsigned src0_swz_z:2; 670 unsigned src0_swz_w:2; 671 unsigned pad0:1; 672 unsigned src0_vert_stride:4; 673 unsigned flag_subreg_nr:1; 674 unsigned flag_reg_nr:1; 675 unsigned pad1:5; 676 } da16; 677 678 struct { 679 unsigned src0_swz_x:2; 680 unsigned src0_swz_y:2; 681 int src0_indirect_offset:6; 682 unsigned src0_subreg_nr:3; 683 unsigned src0_abs:1; 684 unsigned src0_negate:1; 685 unsigned src0_address_mode:1; 686 unsigned src0_swz_z:2; 687 unsigned src0_swz_w:2; 688 unsigned pad0:1; 689 unsigned src0_vert_stride:4; 690 unsigned flag_subreg_nr:1; 691 unsigned flag_reg_nr:1; 692 unsigned pad1:5; 693 } ia16; 694 695 /* Extended Message Descriptor for Ironlake (Gen5) SEND instruction. 696 * 697 * Does not apply to Gen6+. The SFID/message target moved to bits 698 * 27:24 of the header (destreg__conditionalmod); EOT is in bits3. 699 */ 700 struct { 701 unsigned pad:26; 702 unsigned end_of_thread:1; 703 unsigned pad1:1; 704 unsigned sfid:4; 705 } send_gen5; /* for Ironlake only */ 706 707 struct { 708 unsigned src0_rep_ctrl:1; 709 unsigned src0_swizzle:8; 710 unsigned src0_subreg_nr:3; 711 unsigned src0_reg_nr:8; 712 unsigned pad0:1; 713 unsigned src1_rep_ctrl:1; 714 unsigned src1_swizzle:8; 715 unsigned src1_subreg_nr_low:2; 716 } da3src; 717 } bits2; 718 719 union { 720 struct { 721 unsigned src1_subreg_nr:5; 722 unsigned src1_reg_nr:8; 723 unsigned src1_abs:1; 724 unsigned src1_negate:1; 725 unsigned src1_address_mode:1; 726 unsigned src1_horiz_stride:2; 727 unsigned src1_width:3; 728 unsigned src1_vert_stride:4; 729 unsigned pad0:7; 730 } da1; 731 732 struct { 733 unsigned src1_swz_x:2; 734 unsigned src1_swz_y:2; 735 unsigned src1_subreg_nr:1; 736 unsigned src1_reg_nr:8; 737 unsigned src1_abs:1; 738 unsigned src1_negate:1; 739 unsigned src1_address_mode:1; 740 unsigned src1_swz_z:2; 741 unsigned src1_swz_w:2; 742 unsigned pad1:1; 743 unsigned src1_vert_stride:4; 744 unsigned pad2:7; 745 } da16; 746 747 struct { 748 int src1_indirect_offset:10; 749 unsigned src1_subreg_nr:3; 750 unsigned src1_abs:1; 751 unsigned src1_negate:1; 752 unsigned src1_address_mode:1; 753 unsigned src1_horiz_stride:2; 754 unsigned src1_width:3; 755 unsigned src1_vert_stride:4; 756 unsigned flag_subreg_nr:1; 757 unsigned flag_reg_nr:1; 758 unsigned pad1:5; 759 } ia1; 760 761 struct { 762 unsigned src1_swz_x:2; 763 unsigned src1_swz_y:2; 764 int src1_indirect_offset:6; 765 unsigned src1_subreg_nr:3; 766 unsigned src1_abs:1; 767 unsigned src1_negate:1; 768 unsigned pad0:1; 769 unsigned src1_swz_z:2; 770 unsigned src1_swz_w:2; 771 unsigned pad1:1; 772 unsigned src1_vert_stride:4; 773 unsigned flag_subreg_nr:1; 774 unsigned flag_reg_nr:1; 775 unsigned pad2:5; 776 } ia16; 777 778 struct { 779 int jump_count:16; /* note: signed */ 780 unsigned pop_count:4; 781 unsigned pad0:12; 782 } if_else; 783 784 /* This is also used for gen7 IF/ELSE instructions */ 785 struct { 786 /* Signed jump distance to the ip to jump to if all channels 787 * are disabled after the break or continue. It should point 788 * to the end of the innermost control flow block, as that's 789 * where some channel could get re-enabled. 790 */ 791 int jip:16; 792 793 /* Signed jump distance to the location to resume execution 794 * of this channel if it's enabled for the break or continue. 795 */ 796 int uip:16; 797 } break_cont; 798 799 /** 800 * \defgroup SEND instructions / Message Descriptors 801 * 802 * @{ 803 */ 804 805 /** 806 * Generic Message Descriptor for Gen4 SEND instructions. The structs 807 * below expand function_control to something specific for their 808 * message. Due to struct packing issues, they duplicate these bits. 809 * 810 * See the G45 PRM, Volume 4, Table 14-15. 811 */ 812 struct { 813 unsigned function_control:16; 814 unsigned response_length:4; 815 unsigned msg_length:4; 816 unsigned msg_target:4; 817 unsigned pad1:3; 818 unsigned end_of_thread:1; 819 } generic; 820 821 /** 822 * Generic Message Descriptor for Gen5-7 SEND instructions. 823 * 824 * See the Sandybridge PRM, Volume 2 Part 2, Table 8-15. (Sadly, most 825 * of the information on the SEND instruction is missing from the public 826 * Ironlake PRM.) 827 * 828 * The table claims that bit 31 is reserved/MBZ on Gen6+, but it lies. 829 * According to the SEND instruction description: 830 * "The MSb of the message description, the EOT field, always comes from 831 * bit 127 of the instruction word"...which is bit 31 of this field. 832 */ 833 struct { 834 unsigned function_control:19; 835 unsigned header_present:1; 836 unsigned response_length:5; 837 unsigned msg_length:4; 838 unsigned pad1:2; 839 unsigned end_of_thread:1; 840 } generic_gen5; 841 842 /** G45 PRM, Volume 4, Section 6.1.1.1 */ 843 struct { 844 unsigned function:4; 845 unsigned int_type:1; 846 unsigned precision:1; 847 unsigned saturate:1; 848 unsigned data_type:1; 849 unsigned pad0:8; 850 unsigned response_length:4; 851 unsigned msg_length:4; 852 unsigned msg_target:4; 853 unsigned pad1:3; 854 unsigned end_of_thread:1; 855 } math; 856 857 /** Ironlake PRM, Volume 4 Part 1, Section 6.1.1.1 */ 858 struct { 859 unsigned function:4; 860 unsigned int_type:1; 861 unsigned precision:1; 862 unsigned saturate:1; 863 unsigned data_type:1; 864 unsigned snapshot:1; 865 unsigned pad0:10; 866 unsigned header_present:1; 867 unsigned response_length:5; 868 unsigned msg_length:4; 869 unsigned pad1:2; 870 unsigned end_of_thread:1; 871 } math_gen5; 872 873 /** G45 PRM, Volume 4, Section 4.8.1.1.1 [DevBW] and [DevCL] */ 874 struct { 875 unsigned binding_table_index:8; 876 unsigned sampler:4; 877 unsigned return_format:2; 878 unsigned msg_type:2; 879 unsigned response_length:4; 880 unsigned msg_length:4; 881 unsigned msg_target:4; 882 unsigned pad1:3; 883 unsigned end_of_thread:1; 884 } sampler; 885 886 /** G45 PRM, Volume 4, Section 4.8.1.1.2 [DevCTG] */ 887 struct { 888 unsigned binding_table_index:8; 889 unsigned sampler:4; 890 unsigned msg_type:4; 891 unsigned response_length:4; 892 unsigned msg_length:4; 893 unsigned msg_target:4; 894 unsigned pad1:3; 895 unsigned end_of_thread:1; 896 } sampler_g4x; 897 898 /** Ironlake PRM, Volume 4 Part 1, Section 4.11.1.1.3 */ 899 struct { 900 unsigned binding_table_index:8; 901 unsigned sampler:4; 902 unsigned msg_type:4; 903 unsigned simd_mode:2; 904 unsigned pad0:1; 905 unsigned header_present:1; 906 unsigned response_length:5; 907 unsigned msg_length:4; 908 unsigned pad1:2; 909 unsigned end_of_thread:1; 910 } sampler_gen5; 911 912 struct { 913 unsigned binding_table_index:8; 914 unsigned sampler:4; 915 unsigned msg_type:5; 916 unsigned simd_mode:2; 917 unsigned header_present:1; 918 unsigned response_length:5; 919 unsigned msg_length:4; 920 unsigned pad1:2; 921 unsigned end_of_thread:1; 922 } sampler_gen7; 923 924 struct brw_urb_immediate { 925 unsigned opcode:4; 926 unsigned offset:6; 927 unsigned swizzle_control:2; 928 unsigned pad:1; 929 unsigned allocate:1; 930 unsigned used:1; 931 unsigned complete:1; 932 unsigned response_length:4; 933 unsigned msg_length:4; 934 unsigned msg_target:4; 935 unsigned pad1:3; 936 unsigned end_of_thread:1; 937 } urb; 938 939 struct { 940 unsigned opcode:4; 941 unsigned offset:6; 942 unsigned swizzle_control:2; 943 unsigned pad:1; 944 unsigned allocate:1; 945 unsigned used:1; 946 unsigned complete:1; 947 unsigned pad0:3; 948 unsigned header_present:1; 949 unsigned response_length:5; 950 unsigned msg_length:4; 951 unsigned pad1:2; 952 unsigned end_of_thread:1; 953 } urb_gen5; 954 955 struct { 956 unsigned opcode:3; 957 unsigned offset:11; 958 unsigned swizzle_control:1; 959 unsigned complete:1; 960 unsigned per_slot_offset:1; 961 unsigned pad0:2; 962 unsigned header_present:1; 963 unsigned response_length:5; 964 unsigned msg_length:4; 965 unsigned pad1:2; 966 unsigned end_of_thread:1; 967 } urb_gen7; 968 969 /** 965 PRM, Volume 4, Section 5.10.1.1: Message Descriptor */ 970 struct { 971 unsigned binding_table_index:8; 972 unsigned msg_control:4; 973 unsigned msg_type:2; 974 unsigned target_cache:2; 975 unsigned response_length:4; 976 unsigned msg_length:4; 977 unsigned msg_target:4; 978 unsigned pad1:3; 979 unsigned end_of_thread:1; 980 } dp_read; 981 982 /** G45 PRM, Volume 4, Section 5.10.1.1.2 */ 983 struct { 984 unsigned binding_table_index:8; 985 unsigned msg_control:3; 986 unsigned msg_type:3; 987 unsigned target_cache:2; 988 unsigned response_length:4; 989 unsigned msg_length:4; 990 unsigned msg_target:4; 991 unsigned pad1:3; 992 unsigned end_of_thread:1; 993 } dp_read_g4x; 994 995 /** Ironlake PRM, Volume 4 Part 1, Section 5.10.2.1.2. */ 996 struct { 997 unsigned binding_table_index:8; 998 unsigned msg_control:3; 999 unsigned msg_type:3; 1000 unsigned target_cache:2; 1001 unsigned pad0:3; 1002 unsigned header_present:1; 1003 unsigned response_length:5; 1004 unsigned msg_length:4; 1005 unsigned pad1:2; 1006 unsigned end_of_thread:1; 1007 } dp_read_gen5; 1008 1009 /** G45 PRM, Volume 4, Section 5.10.1.1.2. For both Gen4 and G45. */ 1010 struct { 1011 unsigned binding_table_index:8; 1012 unsigned msg_control:3; 1013 unsigned last_render_target:1; 1014 unsigned msg_type:3; 1015 unsigned send_commit_msg:1; 1016 unsigned response_length:4; 1017 unsigned msg_length:4; 1018 unsigned msg_target:4; 1019 unsigned pad1:3; 1020 unsigned end_of_thread:1; 1021 } dp_write; 1022 1023 /** Ironlake PRM, Volume 4 Part 1, Section 5.10.2.1.2. */ 1024 struct { 1025 unsigned binding_table_index:8; 1026 unsigned msg_control:3; 1027 unsigned last_render_target:1; 1028 unsigned msg_type:3; 1029 unsigned send_commit_msg:1; 1030 unsigned pad0:3; 1031 unsigned header_present:1; 1032 unsigned response_length:5; 1033 unsigned msg_length:4; 1034 unsigned pad1:2; 1035 unsigned end_of_thread:1; 1036 } dp_write_gen5; 1037 1038 /** 1039 * Message for the Sandybridge Sampler Cache or Constant Cache Data Port. 1040 * 1041 * See the Sandybridge PRM, Volume 4 Part 1, Section 3.9.2.1.1. 1042 **/ 1043 struct { 1044 unsigned binding_table_index:8; 1045 unsigned msg_control:5; 1046 unsigned msg_type:3; 1047 unsigned pad0:3; 1048 unsigned header_present:1; 1049 unsigned response_length:5; 1050 unsigned msg_length:4; 1051 unsigned pad1:2; 1052 unsigned end_of_thread:1; 1053 } gen6_dp_sampler_const_cache; 1054 1055 /** 1056 * Message for the Sandybridge Render Cache Data Port. 1057 * 1058 * Most fields are defined in the Sandybridge PRM, Volume 4 Part 1, 1059 * Section 3.9.2.1.1: Message Descriptor. 1060 * 1061 * "Slot Group Select" and "Last Render Target" are part of the 1062 * 5-bit message control for Render Target Write messages. See 1063 * Section 3.9.9.2.1 of the same volume. 1064 */ 1065 struct { 1066 unsigned binding_table_index:8; 1067 unsigned msg_control:3; 1068 unsigned slot_group_select:1; 1069 unsigned last_render_target:1; 1070 unsigned msg_type:4; 1071 unsigned send_commit_msg:1; 1072 unsigned pad0:1; 1073 unsigned header_present:1; 1074 unsigned response_length:5; 1075 unsigned msg_length:4; 1076 unsigned pad1:2; 1077 unsigned end_of_thread:1; 1078 } gen6_dp; 1079 1080 /** 1081 * Message for any of the Gen7 Data Port caches. 1082 * 1083 * Most fields are defined in BSpec volume 5c.2 Data Port / Messages / 1084 * Data Port Messages / Message Descriptor. Once again, "Slot Group 1085 * Select" and "Last Render Target" are part of the 6-bit message 1086 * control for Render Target Writes. 1087 */ 1088 struct { 1089 unsigned binding_table_index:8; 1090 unsigned msg_control:3; 1091 unsigned slot_group_select:1; 1092 unsigned last_render_target:1; 1093 unsigned msg_control_pad:1; 1094 unsigned msg_type:4; 1095 unsigned pad1:1; 1096 unsigned header_present:1; 1097 unsigned response_length:5; 1098 unsigned msg_length:4; 1099 unsigned pad2:2; 1100 unsigned end_of_thread:1; 1101 } gen7_dp; 1102 /** @} */ 1103 1104 struct { 1105 unsigned src1_subreg_nr_high:1; 1106 unsigned src1_reg_nr:8; 1107 unsigned pad0:1; 1108 unsigned src2_rep_ctrl:1; 1109 unsigned src2_swizzle:8; 1110 unsigned src2_subreg_nr:3; 1111 unsigned src2_reg_nr:8; 1112 unsigned pad1:2; 1113 } da3src; 1114 1115 int d; 1116 unsigned ud; 1117 float f; 1118 } bits3; 1119}; 1120 1121 1122/* These aren't hardware structs, just something useful for us to pass around: 1123 * 1124 * Align1 operation has a lot of control over input ranges. Used in 1125 * WM programs to implement shaders decomposed into "channel serial" 1126 * or "structure of array" form: 1127 */ 1128struct brw_reg { 1129 unsigned type:4; 1130 unsigned file:2; 1131 unsigned nr:8; 1132 unsigned subnr:5; /* :1 in align16 */ 1133 unsigned negate:1; /* source only */ 1134 unsigned abs:1; /* source only */ 1135 unsigned vstride:4; /* source only */ 1136 unsigned width:3; /* src only, align1 only */ 1137 unsigned hstride:2; /* align1 only */ 1138 unsigned address_mode:1; /* relative addressing, hopefully! */ 1139 unsigned pad0:1; 1140 1141 union { 1142 struct { 1143 unsigned swizzle:8; /* src only, align16 only */ 1144 unsigned writemask:4; /* dest only, align16 only */ 1145 int indirect_offset:10; /* relative addressing offset */ 1146 unsigned pad1:10; /* two dwords total */ 1147 } bits; 1148 1149 float f; 1150 int d; 1151 unsigned ud; 1152 } dw1; 1153}; 1154 1155struct brw_indirect { 1156 unsigned addr_subnr:4; 1157 int addr_offset:10; 1158 unsigned pad:18; 1159}; 1160 1161#define BRW_EU_MAX_INSN_STACK 5 1162#define BRW_EU_MAX_INSN 10000 1163 1164struct brw_compile { 1165 struct brw_instruction *store; 1166 unsigned nr_insn; 1167 1168 int gen; 1169 1170 /* Allow clients to push/pop instruction state: 1171 */ 1172 struct brw_instruction stack[BRW_EU_MAX_INSN_STACK]; 1173 bool compressed_stack[BRW_EU_MAX_INSN_STACK]; 1174 struct brw_instruction *current; 1175 1176 unsigned flag_value; 1177 bool single_program_flow; 1178 bool compressed; 1179 1180 /* Control flow stacks: 1181 * - if_stack contains IF and ELSE instructions which must be patched 1182 * (and popped) once the matching ENDIF instruction is encountered. 1183 */ 1184 struct brw_instruction **if_stack; 1185 int if_stack_depth; 1186 int if_stack_array_size; 1187}; 1188 1189static inline int type_sz(unsigned type) 1190{ 1191 switch (type) { 1192 case BRW_REGISTER_TYPE_UD: 1193 case BRW_REGISTER_TYPE_D: 1194 case BRW_REGISTER_TYPE_F: 1195 return 4; 1196 case BRW_REGISTER_TYPE_HF: 1197 case BRW_REGISTER_TYPE_UW: 1198 case BRW_REGISTER_TYPE_W: 1199 return 2; 1200 case BRW_REGISTER_TYPE_UB: 1201 case BRW_REGISTER_TYPE_B: 1202 return 1; 1203 default: 1204 return 0; 1205 } 1206} 1207 1208/** 1209 * Construct a brw_reg. 1210 * \param file one of the BRW_x_REGISTER_FILE values 1211 * \param nr register number/index 1212 * \param subnr register sub number 1213 * \param type one of BRW_REGISTER_TYPE_x 1214 * \param vstride one of BRW_VERTICAL_STRIDE_x 1215 * \param width one of BRW_WIDTH_x 1216 * \param hstride one of BRW_HORIZONTAL_STRIDE_x 1217 * \param swizzle one of BRW_SWIZZLE_x 1218 * \param writemask WRITEMASK_X/Y/Z/W bitfield 1219 */ 1220static inline struct brw_reg brw_reg(unsigned file, 1221 unsigned nr, 1222 unsigned subnr, 1223 unsigned type, 1224 unsigned vstride, 1225 unsigned width, 1226 unsigned hstride, 1227 unsigned swizzle, 1228 unsigned writemask) 1229{ 1230 struct brw_reg reg; 1231 if (file == BRW_GENERAL_REGISTER_FILE) 1232 assert(nr < BRW_MAX_GRF); 1233 else if (file == BRW_MESSAGE_REGISTER_FILE) 1234 assert((nr & ~(1 << 7)) < BRW_MAX_MRF); 1235 else if (file == BRW_ARCHITECTURE_REGISTER_FILE) 1236 assert(nr <= BRW_ARF_IP); 1237 1238 reg.type = type; 1239 reg.file = file; 1240 reg.nr = nr; 1241 reg.subnr = subnr * type_sz(type); 1242 reg.negate = 0; 1243 reg.abs = 0; 1244 reg.vstride = vstride; 1245 reg.width = width; 1246 reg.hstride = hstride; 1247 reg.address_mode = BRW_ADDRESS_DIRECT; 1248 reg.pad0 = 0; 1249 1250 /* Could do better: If the reg is r5.3<0;1,0>, we probably want to 1251 * set swizzle and writemask to W, as the lower bits of subnr will 1252 * be lost when converted to align16. This is probably too much to 1253 * keep track of as you'd want it adjusted by suboffset(), etc. 1254 * Perhaps fix up when converting to align16? 1255 */ 1256 reg.dw1.bits.swizzle = swizzle; 1257 reg.dw1.bits.writemask = writemask; 1258 reg.dw1.bits.indirect_offset = 0; 1259 reg.dw1.bits.pad1 = 0; 1260 return reg; 1261} 1262 1263/** Construct float[16] register */ 1264static inline struct brw_reg brw_vec16_reg(unsigned file, 1265 unsigned nr, 1266 unsigned subnr) 1267{ 1268 return brw_reg(file, 1269 nr, 1270 subnr, 1271 BRW_REGISTER_TYPE_F, 1272 BRW_VERTICAL_STRIDE_16, 1273 BRW_WIDTH_16, 1274 BRW_HORIZONTAL_STRIDE_1, 1275 BRW_SWIZZLE_XYZW, 1276 WRITEMASK_XYZW); 1277} 1278 1279/** Construct float[8] register */ 1280static inline struct brw_reg brw_vec8_reg(unsigned file, 1281 unsigned nr, 1282 unsigned subnr) 1283{ 1284 return brw_reg(file, 1285 nr, 1286 subnr, 1287 BRW_REGISTER_TYPE_F, 1288 BRW_VERTICAL_STRIDE_8, 1289 BRW_WIDTH_8, 1290 BRW_HORIZONTAL_STRIDE_1, 1291 BRW_SWIZZLE_XYZW, 1292 WRITEMASK_XYZW); 1293} 1294 1295/** Construct float[4] register */ 1296static inline struct brw_reg brw_vec4_reg(unsigned file, 1297 unsigned nr, 1298 unsigned subnr) 1299{ 1300 return brw_reg(file, 1301 nr, 1302 subnr, 1303 BRW_REGISTER_TYPE_F, 1304 BRW_VERTICAL_STRIDE_4, 1305 BRW_WIDTH_4, 1306 BRW_HORIZONTAL_STRIDE_1, 1307 BRW_SWIZZLE_XYZW, 1308 WRITEMASK_XYZW); 1309} 1310 1311/** Construct float[2] register */ 1312static inline struct brw_reg brw_vec2_reg(unsigned file, 1313 unsigned nr, 1314 unsigned subnr) 1315{ 1316 return brw_reg(file, 1317 nr, 1318 subnr, 1319 BRW_REGISTER_TYPE_F, 1320 BRW_VERTICAL_STRIDE_2, 1321 BRW_WIDTH_2, 1322 BRW_HORIZONTAL_STRIDE_1, 1323 BRW_SWIZZLE_XYXY, 1324 WRITEMASK_XY); 1325} 1326 1327/** Construct float[1] register */ 1328static inline struct brw_reg brw_vec1_reg(unsigned file, 1329 unsigned nr, 1330 unsigned subnr) 1331{ 1332 return brw_reg(file, 1333 nr, 1334 subnr, 1335 BRW_REGISTER_TYPE_F, 1336 BRW_VERTICAL_STRIDE_0, 1337 BRW_WIDTH_1, 1338 BRW_HORIZONTAL_STRIDE_0, 1339 BRW_SWIZZLE_XXXX, 1340 WRITEMASK_X); 1341} 1342 1343 1344static inline struct brw_reg __retype(struct brw_reg reg, 1345 unsigned type) 1346{ 1347 reg.type = type; 1348 return reg; 1349} 1350 1351static inline struct brw_reg __retype_d(struct brw_reg reg) 1352{ 1353 return __retype(reg, BRW_REGISTER_TYPE_D); 1354} 1355 1356static inline struct brw_reg __retype_ud(struct brw_reg reg) 1357{ 1358 return __retype(reg, BRW_REGISTER_TYPE_UD); 1359} 1360 1361static inline struct brw_reg __retype_uw(struct brw_reg reg) 1362{ 1363 return __retype(reg, BRW_REGISTER_TYPE_UW); 1364} 1365 1366static inline struct brw_reg __sechalf(struct brw_reg reg) 1367{ 1368 if (reg.vstride) 1369 reg.nr++; 1370 return reg; 1371} 1372 1373static inline struct brw_reg __suboffset(struct brw_reg reg, 1374 unsigned delta) 1375{ 1376 reg.subnr += delta * type_sz(reg.type); 1377 return reg; 1378} 1379 1380static inline struct brw_reg __offset(struct brw_reg reg, 1381 unsigned delta) 1382{ 1383 reg.nr += delta; 1384 return reg; 1385} 1386 1387static inline struct brw_reg byte_offset(struct brw_reg reg, 1388 unsigned bytes) 1389{ 1390 unsigned newoffset = reg.nr * REG_SIZE + reg.subnr + bytes; 1391 reg.nr = newoffset / REG_SIZE; 1392 reg.subnr = newoffset % REG_SIZE; 1393 return reg; 1394} 1395 1396 1397/** Construct unsigned word[16] register */ 1398static inline struct brw_reg brw_uw16_reg(unsigned file, 1399 unsigned nr, 1400 unsigned subnr) 1401{ 1402 return __suboffset(__retype(brw_vec16_reg(file, nr, 0), BRW_REGISTER_TYPE_UW), subnr); 1403} 1404 1405/** Construct unsigned word[8] register */ 1406static inline struct brw_reg brw_uw8_reg(unsigned file, 1407 unsigned nr, 1408 unsigned subnr) 1409{ 1410 return __suboffset(__retype(brw_vec8_reg(file, nr, 0), BRW_REGISTER_TYPE_UW), subnr); 1411} 1412 1413/** Construct unsigned word[1] register */ 1414static inline struct brw_reg brw_uw1_reg(unsigned file, 1415 unsigned nr, 1416 unsigned subnr) 1417{ 1418 return __suboffset(__retype(brw_vec1_reg(file, nr, 0), BRW_REGISTER_TYPE_UW), subnr); 1419} 1420 1421static inline struct brw_reg brw_imm_reg(unsigned type) 1422{ 1423 return brw_reg( BRW_IMMEDIATE_VALUE, 1424 0, 1425 0, 1426 type, 1427 BRW_VERTICAL_STRIDE_0, 1428 BRW_WIDTH_1, 1429 BRW_HORIZONTAL_STRIDE_0, 1430 0, 1431 0); 1432} 1433 1434/** Construct float immediate register */ 1435static inline struct brw_reg brw_imm_f(float f) 1436{ 1437 struct brw_reg imm = brw_imm_reg(BRW_REGISTER_TYPE_F); 1438 imm.dw1.f = f; 1439 return imm; 1440} 1441 1442/** Construct integer immediate register */ 1443static inline struct brw_reg brw_imm_d(int d) 1444{ 1445 struct brw_reg imm = brw_imm_reg(BRW_REGISTER_TYPE_D); 1446 imm.dw1.d = d; 1447 return imm; 1448} 1449 1450/** Construct uint immediate register */ 1451static inline struct brw_reg brw_imm_ud(unsigned ud) 1452{ 1453 struct brw_reg imm = brw_imm_reg(BRW_REGISTER_TYPE_UD); 1454 imm.dw1.ud = ud; 1455 return imm; 1456} 1457 1458/** Construct ushort immediate register */ 1459static inline struct brw_reg brw_imm_uw(uint16_t uw) 1460{ 1461 struct brw_reg imm = brw_imm_reg(BRW_REGISTER_TYPE_UW); 1462 imm.dw1.ud = uw | (uw << 16); 1463 return imm; 1464} 1465 1466/** Construct short immediate register */ 1467static inline struct brw_reg brw_imm_w(int16_t w) 1468{ 1469 struct brw_reg imm = brw_imm_reg(BRW_REGISTER_TYPE_W); 1470 imm.dw1.d = w | (w << 16); 1471 return imm; 1472} 1473 1474/* brw_imm_b and brw_imm_ub aren't supported by hardware - the type 1475 * numbers alias with _V and _VF below: 1476 */ 1477 1478/** Construct vector of eight signed half-byte values */ 1479static inline struct brw_reg brw_imm_v(unsigned v) 1480{ 1481 struct brw_reg imm = brw_imm_reg(BRW_REGISTER_TYPE_V); 1482 imm.vstride = BRW_VERTICAL_STRIDE_0; 1483 imm.width = BRW_WIDTH_8; 1484 imm.hstride = BRW_HORIZONTAL_STRIDE_1; 1485 imm.dw1.ud = v; 1486 return imm; 1487} 1488 1489/** Construct vector of four 8-bit float values */ 1490static inline struct brw_reg brw_imm_vf(unsigned v) 1491{ 1492 struct brw_reg imm = brw_imm_reg(BRW_REGISTER_TYPE_VF); 1493 imm.vstride = BRW_VERTICAL_STRIDE_0; 1494 imm.width = BRW_WIDTH_4; 1495 imm.hstride = BRW_HORIZONTAL_STRIDE_1; 1496 imm.dw1.ud = v; 1497 return imm; 1498} 1499 1500#define VF_ZERO 0x0 1501#define VF_ONE 0x30 1502#define VF_NEG (1<<7) 1503 1504static inline struct brw_reg brw_imm_vf4(unsigned v0, 1505 unsigned v1, 1506 unsigned v2, 1507 unsigned v3) 1508{ 1509 struct brw_reg imm = brw_imm_reg(BRW_REGISTER_TYPE_VF); 1510 imm.vstride = BRW_VERTICAL_STRIDE_0; 1511 imm.width = BRW_WIDTH_4; 1512 imm.hstride = BRW_HORIZONTAL_STRIDE_1; 1513 imm.dw1.ud = ((v0 << 0) | 1514 (v1 << 8) | 1515 (v2 << 16) | 1516 (v3 << 24)); 1517 return imm; 1518} 1519 1520static inline struct brw_reg brw_address(struct brw_reg reg) 1521{ 1522 return brw_imm_uw(reg.nr * REG_SIZE + reg.subnr); 1523} 1524 1525/** Construct float[1] general-purpose register */ 1526static inline struct brw_reg brw_vec1_grf(unsigned nr, unsigned subnr) 1527{ 1528 return brw_vec1_reg(BRW_GENERAL_REGISTER_FILE, nr, subnr); 1529} 1530 1531/** Construct float[2] general-purpose register */ 1532static inline struct brw_reg brw_vec2_grf(unsigned nr, unsigned subnr) 1533{ 1534 return brw_vec2_reg(BRW_GENERAL_REGISTER_FILE, nr, subnr); 1535} 1536 1537/** Construct float[4] general-purpose register */ 1538static inline struct brw_reg brw_vec4_grf(unsigned nr, unsigned subnr) 1539{ 1540 return brw_vec4_reg(BRW_GENERAL_REGISTER_FILE, nr, subnr); 1541} 1542 1543/** Construct float[8] general-purpose register */ 1544static inline struct brw_reg brw_vec8_grf(unsigned nr, unsigned subnr) 1545{ 1546 return brw_vec8_reg(BRW_GENERAL_REGISTER_FILE, nr, subnr); 1547} 1548 1549static inline struct brw_reg brw_uw8_grf(unsigned nr, unsigned subnr) 1550{ 1551 return brw_uw8_reg(BRW_GENERAL_REGISTER_FILE, nr, subnr); 1552} 1553 1554static inline struct brw_reg brw_uw16_grf(unsigned nr, unsigned subnr) 1555{ 1556 return brw_uw16_reg(BRW_GENERAL_REGISTER_FILE, nr, subnr); 1557} 1558 1559/** Construct null register (usually used for setting condition codes) */ 1560static inline struct brw_reg brw_null_reg(void) 1561{ 1562 return brw_vec8_reg(BRW_ARCHITECTURE_REGISTER_FILE, 1563 BRW_ARF_NULL, 1564 0); 1565} 1566 1567static inline struct brw_reg brw_address_reg(unsigned subnr) 1568{ 1569 return brw_uw1_reg(BRW_ARCHITECTURE_REGISTER_FILE, 1570 BRW_ARF_ADDRESS, 1571 subnr); 1572} 1573 1574/* If/else instructions break in align16 mode if writemask & swizzle 1575 * aren't xyzw. This goes against the convention for other scalar 1576 * regs: 1577 */ 1578static inline struct brw_reg brw_ip_reg(void) 1579{ 1580 return brw_reg(BRW_ARCHITECTURE_REGISTER_FILE, 1581 BRW_ARF_IP, 1582 0, 1583 BRW_REGISTER_TYPE_UD, 1584 BRW_VERTICAL_STRIDE_4, /* ? */ 1585 BRW_WIDTH_1, 1586 BRW_HORIZONTAL_STRIDE_0, 1587 BRW_SWIZZLE_XYZW, /* NOTE! */ 1588 WRITEMASK_XYZW); /* NOTE! */ 1589} 1590 1591static inline struct brw_reg brw_acc_reg(void) 1592{ 1593 return brw_vec8_reg(BRW_ARCHITECTURE_REGISTER_FILE, 1594 BRW_ARF_ACCUMULATOR, 1595 0); 1596} 1597 1598static inline struct brw_reg brw_notification_1_reg(void) 1599{ 1600 return brw_reg(BRW_ARCHITECTURE_REGISTER_FILE, 1601 BRW_ARF_NOTIFICATION_COUNT, 1602 1, 1603 BRW_REGISTER_TYPE_UD, 1604 BRW_VERTICAL_STRIDE_0, 1605 BRW_WIDTH_1, 1606 BRW_HORIZONTAL_STRIDE_0, 1607 BRW_SWIZZLE_XXXX, 1608 WRITEMASK_X); 1609} 1610 1611static inline struct brw_reg brw_flag_reg(void) 1612{ 1613 return brw_uw1_reg(BRW_ARCHITECTURE_REGISTER_FILE, 1614 BRW_ARF_FLAG, 1615 0); 1616} 1617 1618static inline struct brw_reg brw_mask_reg(unsigned subnr) 1619{ 1620 return brw_uw1_reg(BRW_ARCHITECTURE_REGISTER_FILE, 1621 BRW_ARF_MASK, 1622 subnr); 1623} 1624 1625static inline struct brw_reg brw_message_reg(unsigned nr) 1626{ 1627 assert((nr & ~(1 << 7)) < BRW_MAX_MRF); 1628 return brw_vec8_reg(BRW_MESSAGE_REGISTER_FILE, nr, 0); 1629} 1630 1631static inline struct brw_reg brw_message4_reg(unsigned nr, 1632 int subnr) 1633{ 1634 assert((nr & ~(1 << 7)) < BRW_MAX_MRF); 1635 return brw_vec4_reg(BRW_MESSAGE_REGISTER_FILE, nr, subnr); 1636} 1637 1638/* This is almost always called with a numeric constant argument, so 1639 * make things easy to evaluate at compile time: 1640 */ 1641static inline unsigned cvt(unsigned val) 1642{ 1643 switch (val) { 1644 case 0: return 0; 1645 case 1: return 1; 1646 case 2: return 2; 1647 case 4: return 3; 1648 case 8: return 4; 1649 case 16: return 5; 1650 case 32: return 6; 1651 } 1652 return 0; 1653} 1654 1655static inline struct brw_reg __stride(struct brw_reg reg, 1656 unsigned vstride, 1657 unsigned width, 1658 unsigned hstride) 1659{ 1660 reg.vstride = cvt(vstride); 1661 reg.width = cvt(width) - 1; 1662 reg.hstride = cvt(hstride); 1663 return reg; 1664} 1665 1666static inline struct brw_reg vec16(struct brw_reg reg) 1667{ 1668 return __stride(reg, 16,16,1); 1669} 1670 1671static inline struct brw_reg vec8(struct brw_reg reg) 1672{ 1673 return __stride(reg, 8,8,1); 1674} 1675 1676static inline struct brw_reg vec4(struct brw_reg reg) 1677{ 1678 return __stride(reg, 4,4,1); 1679} 1680 1681static inline struct brw_reg vec2(struct brw_reg reg) 1682{ 1683 return __stride(reg, 2,2,1); 1684} 1685 1686static inline struct brw_reg vec1(struct brw_reg reg) 1687{ 1688 return __stride(reg, 0,1,0); 1689} 1690 1691static inline struct brw_reg get_element(struct brw_reg reg, unsigned elt) 1692{ 1693 return vec1(__suboffset(reg, elt)); 1694} 1695 1696static inline struct brw_reg get_element_ud(struct brw_reg reg, unsigned elt) 1697{ 1698 return vec1(__suboffset(__retype(reg, BRW_REGISTER_TYPE_UD), elt)); 1699} 1700 1701static inline struct brw_reg brw_swizzle(struct brw_reg reg, 1702 unsigned x, 1703 unsigned y, 1704 unsigned z, 1705 unsigned w) 1706{ 1707 assert(reg.file != BRW_IMMEDIATE_VALUE); 1708 1709 reg.dw1.bits.swizzle = BRW_SWIZZLE4(BRW_GET_SWZ(reg.dw1.bits.swizzle, x), 1710 BRW_GET_SWZ(reg.dw1.bits.swizzle, y), 1711 BRW_GET_SWZ(reg.dw1.bits.swizzle, z), 1712 BRW_GET_SWZ(reg.dw1.bits.swizzle, w)); 1713 return reg; 1714} 1715 1716static inline struct brw_reg brw_swizzle1(struct brw_reg reg, 1717 unsigned x) 1718{ 1719 return brw_swizzle(reg, x, x, x, x); 1720} 1721 1722static inline struct brw_reg brw_writemask(struct brw_reg reg, 1723 unsigned mask) 1724{ 1725 assert(reg.file != BRW_IMMEDIATE_VALUE); 1726 reg.dw1.bits.writemask &= mask; 1727 return reg; 1728} 1729 1730static inline struct brw_reg brw_set_writemask(struct brw_reg reg, 1731 unsigned mask) 1732{ 1733 assert(reg.file != BRW_IMMEDIATE_VALUE); 1734 reg.dw1.bits.writemask = mask; 1735 return reg; 1736} 1737 1738static inline struct brw_reg brw_negate(struct brw_reg reg) 1739{ 1740 reg.negate ^= 1; 1741 return reg; 1742} 1743 1744static inline struct brw_reg brw_abs(struct brw_reg reg) 1745{ 1746 reg.abs = 1; 1747 return reg; 1748} 1749 1750/*********************************************************************** 1751*/ 1752static inline struct brw_reg brw_vec4_indirect(unsigned subnr, 1753 int offset) 1754{ 1755 struct brw_reg reg = brw_vec4_grf(0, 0); 1756 reg.subnr = subnr; 1757 reg.address_mode = BRW_ADDRESS_REGISTER_INDIRECT_REGISTER; 1758 reg.dw1.bits.indirect_offset = offset; 1759 return reg; 1760} 1761 1762static inline struct brw_reg brw_vec1_indirect(unsigned subnr, 1763 int offset) 1764{ 1765 struct brw_reg reg = brw_vec1_grf(0, 0); 1766 reg.subnr = subnr; 1767 reg.address_mode = BRW_ADDRESS_REGISTER_INDIRECT_REGISTER; 1768 reg.dw1.bits.indirect_offset = offset; 1769 return reg; 1770} 1771 1772static inline struct brw_reg deref_4f(struct brw_indirect ptr, int offset) 1773{ 1774 return brw_vec4_indirect(ptr.addr_subnr, ptr.addr_offset + offset); 1775} 1776 1777static inline struct brw_reg deref_1f(struct brw_indirect ptr, int offset) 1778{ 1779 return brw_vec1_indirect(ptr.addr_subnr, ptr.addr_offset + offset); 1780} 1781 1782static inline struct brw_reg deref_4b(struct brw_indirect ptr, int offset) 1783{ 1784 return __retype(deref_4f(ptr, offset), BRW_REGISTER_TYPE_B); 1785} 1786 1787static inline struct brw_reg deref_1uw(struct brw_indirect ptr, int offset) 1788{ 1789 return __retype(deref_1f(ptr, offset), BRW_REGISTER_TYPE_UW); 1790} 1791 1792static inline struct brw_reg deref_1d(struct brw_indirect ptr, int offset) 1793{ 1794 return __retype(deref_1f(ptr, offset), BRW_REGISTER_TYPE_D); 1795} 1796 1797static inline struct brw_reg deref_1ud(struct brw_indirect ptr, int offset) 1798{ 1799 return __retype(deref_1f(ptr, offset), BRW_REGISTER_TYPE_UD); 1800} 1801 1802static inline struct brw_reg get_addr_reg(struct brw_indirect ptr) 1803{ 1804 return brw_address_reg(ptr.addr_subnr); 1805} 1806 1807static inline struct brw_indirect brw_indirect_offset(struct brw_indirect ptr, int offset) 1808{ 1809 ptr.addr_offset += offset; 1810 return ptr; 1811} 1812 1813static inline struct brw_indirect brw_indirect(unsigned addr_subnr, int offset) 1814{ 1815 struct brw_indirect ptr; 1816 ptr.addr_subnr = addr_subnr; 1817 ptr.addr_offset = offset; 1818 ptr.pad = 0; 1819 return ptr; 1820} 1821 1822/** Do two brw_regs refer to the same register? */ 1823static inline bool brw_same_reg(struct brw_reg r1, struct brw_reg r2) 1824{ 1825 return r1.file == r2.file && r1.nr == r2.nr; 1826} 1827 1828static inline struct brw_instruction *current_insn( struct brw_compile *p) 1829{ 1830 return &p->store[p->nr_insn]; 1831} 1832 1833static inline void brw_set_predicate_control( struct brw_compile *p, unsigned pc ) 1834{ 1835 p->current->header.predicate_control = pc; 1836} 1837 1838static inline void brw_set_predicate_inverse(struct brw_compile *p, bool predicate_inverse) 1839{ 1840 p->current->header.predicate_inverse = predicate_inverse; 1841} 1842 1843static inline void brw_set_conditionalmod( struct brw_compile *p, unsigned conditional ) 1844{ 1845 p->current->header.destreg__conditionalmod = conditional; 1846} 1847 1848static inline void brw_set_access_mode(struct brw_compile *p, unsigned access_mode) 1849{ 1850 p->current->header.access_mode = access_mode; 1851} 1852 1853static inline void brw_set_mask_control(struct brw_compile *p, unsigned value) 1854{ 1855 p->current->header.mask_control = value; 1856} 1857 1858static inline void brw_set_saturate(struct brw_compile *p, unsigned value) 1859{ 1860 p->current->header.saturate = value; 1861} 1862 1863static inline void brw_set_acc_write_control(struct brw_compile *p, unsigned value) 1864{ 1865 if (p->gen >= 060) 1866 p->current->header.acc_wr_control = value; 1867} 1868 1869void brw_pop_insn_state(struct brw_compile *p); 1870void brw_push_insn_state(struct brw_compile *p); 1871void brw_set_compression_control(struct brw_compile *p, enum brw_compression control); 1872void brw_set_predicate_control_flag_value( struct brw_compile *p, unsigned value ); 1873 1874void brw_compile_init(struct brw_compile *p, int gen, void *store); 1875 1876void brw_set_dest(struct brw_compile *p, struct brw_instruction *insn, 1877 struct brw_reg dest); 1878void brw_set_src0(struct brw_compile *p, struct brw_instruction *insn, 1879 struct brw_reg reg); 1880void brw_set_src1(struct brw_compile *p, 1881 struct brw_instruction *insn, 1882 struct brw_reg reg); 1883 1884void gen6_resolve_implied_move(struct brw_compile *p, 1885 struct brw_reg *src, 1886 unsigned msg_reg_nr); 1887 1888static inline struct brw_instruction * 1889brw_next_insn(struct brw_compile *p, unsigned opcode) 1890{ 1891 struct brw_instruction *insn; 1892 1893 assert(p->nr_insn + 1 < BRW_EU_MAX_INSN); 1894 1895 insn = &p->store[p->nr_insn++]; 1896 *insn = *p->current; 1897 1898 if (p->current->header.destreg__conditionalmod) { 1899 p->current->header.destreg__conditionalmod = 0; 1900 p->current->header.predicate_control = BRW_PREDICATE_NORMAL; 1901 } 1902 1903 insn->header.opcode = opcode; 1904 return insn; 1905} 1906 1907/* Helpers for regular instructions: */ 1908#define ALU1(OP) \ 1909static inline struct brw_instruction *brw_##OP(struct brw_compile *p, \ 1910 struct brw_reg dest, \ 1911 struct brw_reg src0) \ 1912{ \ 1913 return brw_alu1(p, BRW_OPCODE_##OP, dest, src0); \ 1914} 1915 1916#define ALU2(OP) \ 1917static inline struct brw_instruction *brw_##OP(struct brw_compile *p, \ 1918 struct brw_reg dest, \ 1919 struct brw_reg src0, \ 1920 struct brw_reg src1) \ 1921{ \ 1922 return brw_alu2(p, BRW_OPCODE_##OP, dest, src0, src1); \ 1923} 1924 1925/* Rounding operations (other than RNDD) require two instructions - the first 1926 * stores a rounded value (possibly the wrong way) in the dest register, but 1927 * also sets a per-channel "increment bit" in the flag register. A predicated 1928 * add of 1.0 fixes dest to contain the desired result. 1929 * 1930 * Sandybridge and later appear to round correctly without an ADD. 1931 */ 1932#define ROUND(OP) \ 1933static inline void brw_##OP(struct brw_compile *p, \ 1934 struct brw_reg dest, \ 1935 struct brw_reg src) \ 1936{ \ 1937 struct brw_instruction *rnd, *add; \ 1938 rnd = brw_next_insn(p, BRW_OPCODE_##OP); \ 1939 brw_set_dest(p, rnd, dest); \ 1940 brw_set_src0(p, rnd, src); \ 1941 if (p->gen < 060) { \ 1942 /* turn on round-increments */ \ 1943 rnd->header.destreg__conditionalmod = BRW_CONDITIONAL_R; \ 1944 add = brw_ADD(p, dest, dest, brw_imm_f(1.0f)); \ 1945 add->header.predicate_control = BRW_PREDICATE_NORMAL; \ 1946 } \ 1947} 1948 1949static inline struct brw_instruction *brw_alu1(struct brw_compile *p, 1950 unsigned opcode, 1951 struct brw_reg dest, 1952 struct brw_reg src) 1953{ 1954 struct brw_instruction *insn = brw_next_insn(p, opcode); 1955 brw_set_dest(p, insn, dest); 1956 brw_set_src0(p, insn, src); 1957 return insn; 1958} 1959 1960static inline struct brw_instruction *brw_alu2(struct brw_compile *p, 1961 unsigned opcode, 1962 struct brw_reg dest, 1963 struct brw_reg src0, 1964 struct brw_reg src1 ) 1965{ 1966 struct brw_instruction *insn = brw_next_insn(p, opcode); 1967 brw_set_dest(p, insn, dest); 1968 brw_set_src0(p, insn, src0); 1969 brw_set_src1(p, insn, src1); 1970 return insn; 1971} 1972 1973static inline struct brw_instruction *brw_ADD(struct brw_compile *p, 1974 struct brw_reg dest, 1975 struct brw_reg src0, 1976 struct brw_reg src1) 1977{ 1978 /* 6.2.2: add */ 1979 if (src0.type == BRW_REGISTER_TYPE_F || 1980 (src0.file == BRW_IMMEDIATE_VALUE && 1981 src0.type == BRW_REGISTER_TYPE_VF)) { 1982 assert(src1.type != BRW_REGISTER_TYPE_UD); 1983 assert(src1.type != BRW_REGISTER_TYPE_D); 1984 } 1985 1986 if (src1.type == BRW_REGISTER_TYPE_F || 1987 (src1.file == BRW_IMMEDIATE_VALUE && 1988 src1.type == BRW_REGISTER_TYPE_VF)) { 1989 assert(src0.type != BRW_REGISTER_TYPE_UD); 1990 assert(src0.type != BRW_REGISTER_TYPE_D); 1991 } 1992 1993 return brw_alu2(p, BRW_OPCODE_ADD, dest, src0, src1); 1994} 1995 1996static inline struct brw_instruction *brw_MUL(struct brw_compile *p, 1997 struct brw_reg dest, 1998 struct brw_reg src0, 1999 struct brw_reg src1) 2000{ 2001 /* 6.32.38: mul */ 2002 if (src0.type == BRW_REGISTER_TYPE_D || 2003 src0.type == BRW_REGISTER_TYPE_UD || 2004 src1.type == BRW_REGISTER_TYPE_D || 2005 src1.type == BRW_REGISTER_TYPE_UD) { 2006 assert(dest.type != BRW_REGISTER_TYPE_F); 2007 } 2008 2009 if (src0.type == BRW_REGISTER_TYPE_F || 2010 (src0.file == BRW_IMMEDIATE_VALUE && 2011 src0.type == BRW_REGISTER_TYPE_VF)) { 2012 assert(src1.type != BRW_REGISTER_TYPE_UD); 2013 assert(src1.type != BRW_REGISTER_TYPE_D); 2014 } 2015 2016 if (src1.type == BRW_REGISTER_TYPE_F || 2017 (src1.file == BRW_IMMEDIATE_VALUE && 2018 src1.type == BRW_REGISTER_TYPE_VF)) { 2019 assert(src0.type != BRW_REGISTER_TYPE_UD); 2020 assert(src0.type != BRW_REGISTER_TYPE_D); 2021 } 2022 2023 assert(src0.file != BRW_ARCHITECTURE_REGISTER_FILE || 2024 src0.nr != BRW_ARF_ACCUMULATOR); 2025 assert(src1.file != BRW_ARCHITECTURE_REGISTER_FILE || 2026 src1.nr != BRW_ARF_ACCUMULATOR); 2027 2028 return brw_alu2(p, BRW_OPCODE_MUL, dest, src0, src1); 2029} 2030 2031static inline struct brw_instruction *brw_JMPI(struct brw_compile *p, 2032 struct brw_reg dest, 2033 struct brw_reg src0, 2034 struct brw_reg src1) 2035{ 2036 struct brw_instruction *insn = brw_alu2(p, BRW_OPCODE_JMPI, dest, src0, src1); 2037 2038 insn->header.execution_size = 1; 2039 insn->header.compression_control = BRW_COMPRESSION_NONE; 2040 insn->header.mask_control = BRW_MASK_DISABLE; 2041 2042 p->current->header.predicate_control = BRW_PREDICATE_NONE; 2043 2044 return insn; 2045} 2046 2047 2048ALU1(MOV); 2049ALU2(SEL); 2050ALU1(NOT); 2051ALU2(AND); 2052ALU2(OR); 2053ALU2(XOR); 2054ALU2(SHR); 2055ALU2(SHL); 2056ALU2(RSR); 2057ALU2(RSL); 2058ALU2(ASR); 2059ALU1(FRC); 2060ALU1(RNDD); 2061ALU2(MAC); 2062ALU2(MACH); 2063ALU1(LZD); 2064ALU2(DP4); 2065ALU2(DPH); 2066ALU2(DP3); 2067ALU2(DP2); 2068ALU2(LINE); 2069ALU2(PLN); 2070 2071ROUND(RNDZ); 2072ROUND(RNDE); 2073 2074#undef ALU1 2075#undef ALU2 2076#undef ROUND 2077 2078/* Helpers for SEND instruction */ 2079void brw_set_dp_read_message(struct brw_compile *p, 2080 struct brw_instruction *insn, 2081 unsigned binding_table_index, 2082 unsigned msg_control, 2083 unsigned msg_type, 2084 unsigned target_cache, 2085 unsigned msg_length, 2086 unsigned response_length); 2087 2088void brw_set_dp_write_message(struct brw_compile *p, 2089 struct brw_instruction *insn, 2090 unsigned binding_table_index, 2091 unsigned msg_control, 2092 unsigned msg_type, 2093 unsigned msg_length, 2094 bool header_present, 2095 bool last_render_target, 2096 unsigned response_length, 2097 bool end_of_thread, 2098 bool send_commit_msg); 2099 2100void brw_urb_WRITE(struct brw_compile *p, 2101 struct brw_reg dest, 2102 unsigned msg_reg_nr, 2103 struct brw_reg src0, 2104 bool allocate, 2105 bool used, 2106 unsigned msg_length, 2107 unsigned response_length, 2108 bool eot, 2109 bool writes_complete, 2110 unsigned offset, 2111 unsigned swizzle); 2112 2113void brw_ff_sync(struct brw_compile *p, 2114 struct brw_reg dest, 2115 unsigned msg_reg_nr, 2116 struct brw_reg src0, 2117 bool allocate, 2118 unsigned response_length, 2119 bool eot); 2120 2121void brw_fb_WRITE(struct brw_compile *p, 2122 int dispatch_width, 2123 unsigned msg_reg_nr, 2124 struct brw_reg src0, 2125 unsigned msg_control, 2126 unsigned binding_table_index, 2127 unsigned msg_length, 2128 unsigned response_length, 2129 bool eot, 2130 bool header_present); 2131 2132void brw_SAMPLE(struct brw_compile *p, 2133 struct brw_reg dest, 2134 unsigned msg_reg_nr, 2135 struct brw_reg src0, 2136 unsigned binding_table_index, 2137 unsigned sampler, 2138 unsigned writemask, 2139 unsigned msg_type, 2140 unsigned response_length, 2141 unsigned msg_length, 2142 bool header_present, 2143 unsigned simd_mode); 2144 2145void brw_math_16(struct brw_compile *p, 2146 struct brw_reg dest, 2147 unsigned function, 2148 unsigned saturate, 2149 unsigned msg_reg_nr, 2150 struct brw_reg src, 2151 unsigned precision); 2152 2153void brw_math(struct brw_compile *p, 2154 struct brw_reg dest, 2155 unsigned function, 2156 unsigned saturate, 2157 unsigned msg_reg_nr, 2158 struct brw_reg src, 2159 unsigned data_type, 2160 unsigned precision); 2161 2162void brw_math2(struct brw_compile *p, 2163 struct brw_reg dest, 2164 unsigned function, 2165 struct brw_reg src0, 2166 struct brw_reg src1); 2167 2168void brw_oword_block_read(struct brw_compile *p, 2169 struct brw_reg dest, 2170 struct brw_reg mrf, 2171 uint32_t offset, 2172 uint32_t bind_table_index); 2173 2174void brw_oword_block_read_scratch(struct brw_compile *p, 2175 struct brw_reg dest, 2176 struct brw_reg mrf, 2177 int num_regs, 2178 unsigned offset); 2179 2180void brw_oword_block_write_scratch(struct brw_compile *p, 2181 struct brw_reg mrf, 2182 int num_regs, 2183 unsigned offset); 2184 2185void brw_dword_scattered_read(struct brw_compile *p, 2186 struct brw_reg dest, 2187 struct brw_reg mrf, 2188 uint32_t bind_table_index); 2189 2190void brw_dp_READ_4_vs(struct brw_compile *p, 2191 struct brw_reg dest, 2192 unsigned location, 2193 unsigned bind_table_index); 2194 2195void brw_dp_READ_4_vs_relative(struct brw_compile *p, 2196 struct brw_reg dest, 2197 struct brw_reg addrReg, 2198 unsigned offset, 2199 unsigned bind_table_index); 2200 2201/* If/else/endif. Works by manipulating the execution flags on each 2202 * channel. 2203 */ 2204struct brw_instruction *brw_IF(struct brw_compile *p, 2205 unsigned execute_size); 2206struct brw_instruction *gen6_IF(struct brw_compile *p, uint32_t conditional, 2207 struct brw_reg src0, struct brw_reg src1); 2208 2209void brw_ELSE(struct brw_compile *p); 2210void brw_ENDIF(struct brw_compile *p); 2211 2212/* DO/WHILE loops: 2213*/ 2214struct brw_instruction *brw_DO(struct brw_compile *p, 2215 unsigned execute_size); 2216 2217struct brw_instruction *brw_WHILE(struct brw_compile *p, 2218 struct brw_instruction *patch_insn); 2219 2220struct brw_instruction *brw_BREAK(struct brw_compile *p, int pop_count); 2221struct brw_instruction *brw_CONT(struct brw_compile *p, int pop_count); 2222struct brw_instruction *gen6_CONT(struct brw_compile *p, 2223 struct brw_instruction *do_insn); 2224/* Forward jumps: 2225*/ 2226void brw_land_fwd_jump(struct brw_compile *p, 2227 struct brw_instruction *jmp_insn); 2228 2229void brw_NOP(struct brw_compile *p); 2230 2231void brw_WAIT(struct brw_compile *p); 2232 2233/* Special case: there is never a destination, execution size will be 2234 * taken from src0: 2235 */ 2236void brw_CMP(struct brw_compile *p, 2237 struct brw_reg dest, 2238 unsigned conditional, 2239 struct brw_reg src0, 2240 struct brw_reg src1); 2241 2242static inline void brw_math_invert(struct brw_compile *p, 2243 struct brw_reg dst, 2244 struct brw_reg src) 2245{ 2246 brw_math(p, 2247 dst, 2248 BRW_MATH_FUNCTION_INV, 2249 BRW_MATH_SATURATE_NONE, 2250 0, 2251 src, 2252 BRW_MATH_PRECISION_FULL, 2253 BRW_MATH_DATA_VECTOR); 2254} 2255 2256void brw_set_uip_jip(struct brw_compile *p); 2257 2258uint32_t brw_swap_cmod(uint32_t cmod); 2259 2260void brw_disasm(FILE *file, 2261 const struct brw_instruction *inst, 2262 int gen); 2263 2264#endif 2265