1/*
2   Copyright (C) Intel Corp.  2006.  All Rights Reserved.
3   Intel funded Tungsten Graphics (http://www.tungstengraphics.com) to
4   develop this 3D driver.
5
6   Permission is hereby granted, free of charge, to any person obtaining
7   a copy of this software and associated documentation files (the
8   "Software"), to deal in the Software without restriction, including
9   without limitation the rights to use, copy, modify, merge, publish,
10   distribute, sublicense, and/or sell copies of the Software, and to
11   permit persons to whom the Software is furnished to do so, subject to
12   the following conditions:
13
14   The above copyright notice and this permission notice (including the
15   next paragraph) shall be included in all copies or substantial
16   portions of the Software.
17
18   THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND,
19   EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF
20   MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT.
21   IN NO EVENT SHALL THE COPYRIGHT OWNER(S) AND/OR ITS SUPPLIERS BE
22   LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION
23   OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION
24   WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE.
25
26 **********************************************************************/
27/*
28 * Authors:
29 *   Keith Whitwell <keith@tungstengraphics.com>
30 */
31
32#ifndef BRW_EU_H
33#define BRW_EU_H
34
35#include <stdbool.h>
36#include <stdint.h>
37#include <stdio.h>
38
39#include <assert.h>
40
41#define BRW_SWIZZLE4(a,b,c,d) (((a)<<0) | ((b)<<2) | ((c)<<4) | ((d)<<6))
42#define BRW_GET_SWZ(swz, idx) (((swz) >> ((idx)*2)) & 0x3)
43
44#define BRW_SWIZZLE_NOOP      BRW_SWIZZLE4(0,1,2,3)
45#define BRW_SWIZZLE_XYZW      BRW_SWIZZLE4(0,1,2,3)
46#define BRW_SWIZZLE_XXXX      BRW_SWIZZLE4(0,0,0,0)
47#define BRW_SWIZZLE_YYYY      BRW_SWIZZLE4(1,1,1,1)
48#define BRW_SWIZZLE_ZZZZ      BRW_SWIZZLE4(2,2,2,2)
49#define BRW_SWIZZLE_WWWW      BRW_SWIZZLE4(3,3,3,3)
50#define BRW_SWIZZLE_XYXY      BRW_SWIZZLE4(0,1,0,1)
51
52#define WRITEMASK_X 0x1
53#define WRITEMASK_Y 0x2
54#define WRITEMASK_Z 0x4
55#define WRITEMASK_W 0x8
56
57#define WRITEMASK_XY (WRITEMASK_X | WRITEMASK_Y)
58#define WRITEMASK_XYZ (WRITEMASK_X | WRITEMASK_Y | WRITEMASK_Z)
59#define WRITEMASK_XYZW (WRITEMASK_X | WRITEMASK_Y | WRITEMASK_Z | WRITEMASK_W)
60
61/** Number of general purpose registers (VS, WM, etc) */
62#define BRW_MAX_GRF 128
63
64/** Number of message register file registers */
65#define BRW_MAX_MRF 16
66
67
68#define BRW_ALIGN_1   0
69#define BRW_ALIGN_16  1
70
71#define BRW_ADDRESS_DIRECT                        0
72#define BRW_ADDRESS_REGISTER_INDIRECT_REGISTER    1
73
74#define BRW_CHANNEL_X     0
75#define BRW_CHANNEL_Y     1
76#define BRW_CHANNEL_Z     2
77#define BRW_CHANNEL_W     3
78
79enum brw_compression {
80	BRW_COMPRESSION_NONE,
81	BRW_COMPRESSION_2NDHALF,
82	BRW_COMPRESSION_COMPRESSED,
83};
84
85#define GEN6_COMPRESSION_1Q		0
86#define GEN6_COMPRESSION_2Q		1
87#define GEN6_COMPRESSION_3Q		2
88#define GEN6_COMPRESSION_4Q		3
89#define GEN6_COMPRESSION_1H		0
90#define GEN6_COMPRESSION_2H		2
91
92#define BRW_CONDITIONAL_NONE  0
93#define BRW_CONDITIONAL_Z     1
94#define BRW_CONDITIONAL_NZ    2
95#define BRW_CONDITIONAL_EQ    1	/* Z */
96#define BRW_CONDITIONAL_NEQ   2	/* NZ */
97#define BRW_CONDITIONAL_G     3
98#define BRW_CONDITIONAL_GE    4
99#define BRW_CONDITIONAL_L     5
100#define BRW_CONDITIONAL_LE    6
101#define BRW_CONDITIONAL_R     7
102#define BRW_CONDITIONAL_O     8
103#define BRW_CONDITIONAL_U     9
104
105#define BRW_DEBUG_NONE        0
106#define BRW_DEBUG_BREAKPOINT  1
107
108#define BRW_DEPENDENCY_NORMAL         0
109#define BRW_DEPENDENCY_NOTCLEARED     1
110#define BRW_DEPENDENCY_NOTCHECKED     2
111#define BRW_DEPENDENCY_DISABLE        3
112
113#define BRW_EXECUTE_1     0
114#define BRW_EXECUTE_2     1
115#define BRW_EXECUTE_4     2
116#define BRW_EXECUTE_8     3
117#define BRW_EXECUTE_16    4
118#define BRW_EXECUTE_32    5
119
120#define BRW_HORIZONTAL_STRIDE_0   0
121#define BRW_HORIZONTAL_STRIDE_1   1
122#define BRW_HORIZONTAL_STRIDE_2   2
123#define BRW_HORIZONTAL_STRIDE_4   3
124
125#define BRW_INSTRUCTION_NORMAL    0
126#define BRW_INSTRUCTION_SATURATE  1
127
128#define BRW_MASK_ENABLE   0
129#define BRW_MASK_DISABLE  1
130
131/** @{
132 *
133 * Gen6 has replaced "mask enable/disable" with WECtrl, which is
134 * effectively the same but much simpler to think about.  Now, there
135 * are two contributors ANDed together to whether channels are
136 * executed: The predication on the instruction, and the channel write
137 * enable.
138 */
139/**
140 * This is the default value.  It means that a channel's write enable is set
141 * if the per-channel IP is pointing at this instruction.
142 */
143#define BRW_WE_NORMAL		0
144/**
145 * This is used like BRW_MASK_DISABLE, and causes all channels to have
146 * their write enable set.  Note that predication still contributes to
147 * whether the channel actually gets written.
148 */
149#define BRW_WE_ALL		1
150/** @} */
151
152enum opcode {
153	/* These are the actual hardware opcodes. */
154	BRW_OPCODE_MOV =	1,
155	BRW_OPCODE_SEL =	2,
156	BRW_OPCODE_NOT =	4,
157	BRW_OPCODE_AND =	5,
158	BRW_OPCODE_OR =	6,
159	BRW_OPCODE_XOR =	7,
160	BRW_OPCODE_SHR =	8,
161	BRW_OPCODE_SHL =	9,
162	BRW_OPCODE_RSR =	10,
163	BRW_OPCODE_RSL =	11,
164	BRW_OPCODE_ASR =	12,
165	BRW_OPCODE_CMP =	16,
166	BRW_OPCODE_CMPN =	17,
167	BRW_OPCODE_JMPI =	32,
168	BRW_OPCODE_IF =	34,
169	BRW_OPCODE_IFF =	35,
170	BRW_OPCODE_ELSE =	36,
171	BRW_OPCODE_ENDIF =	37,
172	BRW_OPCODE_DO =	38,
173	BRW_OPCODE_WHILE =	39,
174	BRW_OPCODE_BREAK =	40,
175	BRW_OPCODE_CONTINUE = 41,
176	BRW_OPCODE_HALT =	42,
177	BRW_OPCODE_MSAVE =	44,
178	BRW_OPCODE_MRESTORE = 45,
179	BRW_OPCODE_PUSH =	46,
180	BRW_OPCODE_POP =	47,
181	BRW_OPCODE_WAIT =	48,
182	BRW_OPCODE_SEND =	49,
183	BRW_OPCODE_SENDC =	50,
184	BRW_OPCODE_MATH =	56,
185	BRW_OPCODE_ADD =	64,
186	BRW_OPCODE_MUL =	65,
187	BRW_OPCODE_AVG =	66,
188	BRW_OPCODE_FRC =	67,
189	BRW_OPCODE_RNDU =	68,
190	BRW_OPCODE_RNDD =	69,
191	BRW_OPCODE_RNDE =	70,
192	BRW_OPCODE_RNDZ =	71,
193	BRW_OPCODE_MAC =	72,
194	BRW_OPCODE_MACH =	73,
195	BRW_OPCODE_LZD =	74,
196	BRW_OPCODE_SAD2 =	80,
197	BRW_OPCODE_SADA2 =	81,
198	BRW_OPCODE_DP4 =	84,
199	BRW_OPCODE_DPH =	85,
200	BRW_OPCODE_DP3 =	86,
201	BRW_OPCODE_DP2 =	87,
202	BRW_OPCODE_DPA2 =	88,
203	BRW_OPCODE_LINE =	89,
204	BRW_OPCODE_PLN =	90,
205	BRW_OPCODE_NOP =	126,
206
207	/* These are compiler backend opcodes that get translated into other
208	 * instructions.
209	 */
210	FS_OPCODE_FB_WRITE = 128,
211	SHADER_OPCODE_RCP,
212	SHADER_OPCODE_RSQ,
213	SHADER_OPCODE_SQRT,
214	SHADER_OPCODE_EXP2,
215	SHADER_OPCODE_LOG2,
216	SHADER_OPCODE_POW,
217	SHADER_OPCODE_SIN,
218	SHADER_OPCODE_COS,
219	FS_OPCODE_DDX,
220	FS_OPCODE_DDY,
221	FS_OPCODE_PIXEL_X,
222	FS_OPCODE_PIXEL_Y,
223	FS_OPCODE_CINTERP,
224	FS_OPCODE_LINTERP,
225	FS_OPCODE_TEX,
226	FS_OPCODE_TXB,
227	FS_OPCODE_TXD,
228	FS_OPCODE_TXF,
229	FS_OPCODE_TXL,
230	FS_OPCODE_TXS,
231	FS_OPCODE_DISCARD,
232	FS_OPCODE_SPILL,
233	FS_OPCODE_UNSPILL,
234	FS_OPCODE_PULL_CONSTANT_LOAD,
235
236	VS_OPCODE_URB_WRITE,
237	VS_OPCODE_SCRATCH_READ,
238	VS_OPCODE_SCRATCH_WRITE,
239	VS_OPCODE_PULL_CONSTANT_LOAD,
240};
241
242#define BRW_PREDICATE_NONE             0
243#define BRW_PREDICATE_NORMAL           1
244#define BRW_PREDICATE_ALIGN1_ANYV             2
245#define BRW_PREDICATE_ALIGN1_ALLV             3
246#define BRW_PREDICATE_ALIGN1_ANY2H            4
247#define BRW_PREDICATE_ALIGN1_ALL2H            5
248#define BRW_PREDICATE_ALIGN1_ANY4H            6
249#define BRW_PREDICATE_ALIGN1_ALL4H            7
250#define BRW_PREDICATE_ALIGN1_ANY8H            8
251#define BRW_PREDICATE_ALIGN1_ALL8H            9
252#define BRW_PREDICATE_ALIGN1_ANY16H           10
253#define BRW_PREDICATE_ALIGN1_ALL16H           11
254#define BRW_PREDICATE_ALIGN16_REPLICATE_X     2
255#define BRW_PREDICATE_ALIGN16_REPLICATE_Y     3
256#define BRW_PREDICATE_ALIGN16_REPLICATE_Z     4
257#define BRW_PREDICATE_ALIGN16_REPLICATE_W     5
258#define BRW_PREDICATE_ALIGN16_ANY4H           6
259#define BRW_PREDICATE_ALIGN16_ALL4H           7
260
261#define BRW_ARCHITECTURE_REGISTER_FILE    0
262#define BRW_GENERAL_REGISTER_FILE         1
263#define BRW_MESSAGE_REGISTER_FILE         2
264#define BRW_IMMEDIATE_VALUE               3
265
266#define BRW_REGISTER_TYPE_UD  0
267#define BRW_REGISTER_TYPE_D   1
268#define BRW_REGISTER_TYPE_UW  2
269#define BRW_REGISTER_TYPE_W   3
270#define BRW_REGISTER_TYPE_UB  4
271#define BRW_REGISTER_TYPE_B   5
272#define BRW_REGISTER_TYPE_VF  5	/* packed float vector, immediates only? */
273#define BRW_REGISTER_TYPE_HF  6
274#define BRW_REGISTER_TYPE_V   6	/* packed int vector, immediates only, uword dest only */
275#define BRW_REGISTER_TYPE_F   7
276
277#define BRW_ARF_NULL                  0x00
278#define BRW_ARF_ADDRESS               0x10
279#define BRW_ARF_ACCUMULATOR           0x20
280#define BRW_ARF_FLAG                  0x30
281#define BRW_ARF_MASK                  0x40
282#define BRW_ARF_MASK_STACK            0x50
283#define BRW_ARF_MASK_STACK_DEPTH      0x60
284#define BRW_ARF_STATE                 0x70
285#define BRW_ARF_CONTROL               0x80
286#define BRW_ARF_NOTIFICATION_COUNT    0x90
287#define BRW_ARF_IP                    0xA0
288
289#define BRW_MRF_COMPR4			(1 << 7)
290
291#define BRW_AMASK   0
292#define BRW_IMASK   1
293#define BRW_LMASK   2
294#define BRW_CMASK   3
295
296#define BRW_THREAD_NORMAL     0
297#define BRW_THREAD_ATOMIC     1
298#define BRW_THREAD_SWITCH     2
299
300#define BRW_VERTICAL_STRIDE_0                 0
301#define BRW_VERTICAL_STRIDE_1                 1
302#define BRW_VERTICAL_STRIDE_2                 2
303#define BRW_VERTICAL_STRIDE_4                 3
304#define BRW_VERTICAL_STRIDE_8                 4
305#define BRW_VERTICAL_STRIDE_16                5
306#define BRW_VERTICAL_STRIDE_32                6
307#define BRW_VERTICAL_STRIDE_64                7
308#define BRW_VERTICAL_STRIDE_128               8
309#define BRW_VERTICAL_STRIDE_256               9
310#define BRW_VERTICAL_STRIDE_ONE_DIMENSIONAL   0xF
311
312#define BRW_WIDTH_1       0
313#define BRW_WIDTH_2       1
314#define BRW_WIDTH_4       2
315#define BRW_WIDTH_8       3
316#define BRW_WIDTH_16      4
317
318#define BRW_STATELESS_BUFFER_BOUNDARY_1K      0
319#define BRW_STATELESS_BUFFER_BOUNDARY_2K      1
320#define BRW_STATELESS_BUFFER_BOUNDARY_4K      2
321#define BRW_STATELESS_BUFFER_BOUNDARY_8K      3
322#define BRW_STATELESS_BUFFER_BOUNDARY_16K     4
323#define BRW_STATELESS_BUFFER_BOUNDARY_32K     5
324#define BRW_STATELESS_BUFFER_BOUNDARY_64K     6
325#define BRW_STATELESS_BUFFER_BOUNDARY_128K    7
326#define BRW_STATELESS_BUFFER_BOUNDARY_256K    8
327#define BRW_STATELESS_BUFFER_BOUNDARY_512K    9
328#define BRW_STATELESS_BUFFER_BOUNDARY_1M      10
329#define BRW_STATELESS_BUFFER_BOUNDARY_2M      11
330
331#define BRW_POLYGON_FACING_FRONT      0
332#define BRW_POLYGON_FACING_BACK       1
333
334#define BRW_MESSAGE_TARGET_NULL               0
335#define BRW_MESSAGE_TARGET_MATH               1 /* reserved on GEN6 */
336#define BRW_MESSAGE_TARGET_SAMPLER            2
337#define BRW_MESSAGE_TARGET_GATEWAY            3
338#define BRW_MESSAGE_TARGET_DATAPORT_READ      4
339#define BRW_MESSAGE_TARGET_DATAPORT_WRITE     5
340#define BRW_MESSAGE_TARGET_URB                6
341#define BRW_MESSAGE_TARGET_THREAD_SPAWNER     7
342
343#define GEN6_MESSAGE_TARGET_DP_SAMPLER_CACHE  4
344#define GEN6_MESSAGE_TARGET_DP_RENDER_CACHE   5
345#define GEN6_MESSAGE_TARGET_DP_CONST_CACHE    9
346
347#define BRW_SAMPLER_RETURN_FORMAT_FLOAT32     0
348#define BRW_SAMPLER_RETURN_FORMAT_UINT32      2
349#define BRW_SAMPLER_RETURN_FORMAT_SINT32      3
350
351#define BRW_SAMPLER_MESSAGE_SAMPLE	              0
352#define BRW_SAMPLER_MESSAGE_SIMD8_SAMPLE              0
353#define BRW_SAMPLER_MESSAGE_SIMD16_SAMPLE             0
354#define BRW_SAMPLER_MESSAGE_SIMD16_SAMPLE_BIAS        0
355#define BRW_SAMPLER_MESSAGE_SIMD8_KILLPIX             1
356#define BRW_SAMPLER_MESSAGE_SIMD4X2_SAMPLE_LOD        1
357#define BRW_SAMPLER_MESSAGE_SIMD16_SAMPLE_LOD         1
358#define BRW_SAMPLER_MESSAGE_SIMD4X2_SAMPLE_GRADIENTS  2
359#define BRW_SAMPLER_MESSAGE_SIMD8_SAMPLE_GRADIENTS    2
360#define BRW_SAMPLER_MESSAGE_SIMD4X2_SAMPLE_COMPARE    0
361#define BRW_SAMPLER_MESSAGE_SIMD16_SAMPLE_COMPARE     2
362#define BRW_SAMPLER_MESSAGE_SIMD8_SAMPLE_BIAS_COMPARE 0
363#define BRW_SAMPLER_MESSAGE_SIMD8_SAMPLE_LOD_COMPARE  1
364#define BRW_SAMPLER_MESSAGE_SIMD4X2_RESINFO           2
365#define BRW_SAMPLER_MESSAGE_SIMD16_RESINFO            2
366#define BRW_SAMPLER_MESSAGE_SIMD4X2_LD                3
367#define BRW_SAMPLER_MESSAGE_SIMD8_LD                  3
368#define BRW_SAMPLER_MESSAGE_SIMD16_LD                 3
369
370#define GEN5_SAMPLER_MESSAGE_SAMPLE              0
371#define GEN5_SAMPLER_MESSAGE_SAMPLE_BIAS         1
372#define GEN5_SAMPLER_MESSAGE_SAMPLE_LOD          2
373#define GEN5_SAMPLER_MESSAGE_SAMPLE_COMPARE      3
374#define GEN5_SAMPLER_MESSAGE_SAMPLE_DERIVS       4
375#define GEN5_SAMPLER_MESSAGE_SAMPLE_BIAS_COMPARE 5
376#define GEN5_SAMPLER_MESSAGE_SAMPLE_LOD_COMPARE  6
377#define GEN5_SAMPLER_MESSAGE_SAMPLE_LD           7
378#define GEN5_SAMPLER_MESSAGE_SAMPLE_RESINFO      10
379
380/* for GEN5 only */
381#define BRW_SAMPLER_SIMD_MODE_SIMD4X2                   0
382#define BRW_SAMPLER_SIMD_MODE_SIMD8                     1
383#define BRW_SAMPLER_SIMD_MODE_SIMD16                    2
384#define BRW_SAMPLER_SIMD_MODE_SIMD32_64                 3
385
386#define BRW_DATAPORT_OWORD_BLOCK_1_OWORDLOW   0
387#define BRW_DATAPORT_OWORD_BLOCK_1_OWORDHIGH  1
388#define BRW_DATAPORT_OWORD_BLOCK_2_OWORDS     2
389#define BRW_DATAPORT_OWORD_BLOCK_4_OWORDS     3
390#define BRW_DATAPORT_OWORD_BLOCK_8_OWORDS     4
391
392#define BRW_DATAPORT_OWORD_DUAL_BLOCK_1OWORD     0
393#define BRW_DATAPORT_OWORD_DUAL_BLOCK_4OWORDS    2
394
395#define BRW_DATAPORT_DWORD_SCATTERED_BLOCK_8DWORDS   2
396#define BRW_DATAPORT_DWORD_SCATTERED_BLOCK_16DWORDS  3
397
398/* This one stays the same across generations. */
399#define BRW_DATAPORT_READ_MESSAGE_OWORD_BLOCK_READ          0
400/* GEN4 */
401#define BRW_DATAPORT_READ_MESSAGE_OWORD_DUAL_BLOCK_READ     1
402#define BRW_DATAPORT_READ_MESSAGE_MEDIA_BLOCK_READ          2
403#define BRW_DATAPORT_READ_MESSAGE_DWORD_SCATTERED_READ      3
404/* G45, GEN5 */
405#define G45_DATAPORT_READ_MESSAGE_RENDER_UNORM_READ	    1
406#define G45_DATAPORT_READ_MESSAGE_OWORD_DUAL_BLOCK_READ     2
407#define G45_DATAPORT_READ_MESSAGE_AVC_LOOP_FILTER_READ	    3
408#define G45_DATAPORT_READ_MESSAGE_MEDIA_BLOCK_READ          4
409#define G45_DATAPORT_READ_MESSAGE_DWORD_SCATTERED_READ      6
410/* GEN6 */
411#define GEN6_DATAPORT_READ_MESSAGE_RENDER_UNORM_READ	    1
412#define GEN6_DATAPORT_READ_MESSAGE_OWORD_DUAL_BLOCK_READ     2
413#define GEN6_DATAPORT_READ_MESSAGE_MEDIA_BLOCK_READ          4
414#define GEN6_DATAPORT_READ_MESSAGE_OWORD_UNALIGN_BLOCK_READ  5
415#define GEN6_DATAPORT_READ_MESSAGE_DWORD_SCATTERED_READ      6
416
417#define BRW_DATAPORT_READ_TARGET_DATA_CACHE      0
418#define BRW_DATAPORT_READ_TARGET_RENDER_CACHE    1
419#define BRW_DATAPORT_READ_TARGET_SAMPLER_CACHE   2
420
421#define BRW_DATAPORT_RENDER_TARGET_WRITE_SIMD16_SINGLE_SOURCE                0
422#define BRW_DATAPORT_RENDER_TARGET_WRITE_SIMD16_SINGLE_SOURCE_REPLICATED     1
423#define BRW_DATAPORT_RENDER_TARGET_WRITE_SIMD8_DUAL_SOURCE_SUBSPAN01         2
424#define BRW_DATAPORT_RENDER_TARGET_WRITE_SIMD8_DUAL_SOURCE_SUBSPAN23         3
425#define BRW_DATAPORT_RENDER_TARGET_WRITE_SIMD8_SINGLE_SOURCE_SUBSPAN01       4
426
427/**
428 * Message target: Shared Function ID for where to SEND a message.
429 *
430 * These are enumerated in the ISA reference under "send - Send Message".
431 * In particular, see the following tables:
432 * - G45 PRM, Volume 4, Table 14-15 "Message Descriptor Definition"
433 * - Sandybridge PRM, Volume 4 Part 2, Table 8-16 "Extended Message Descriptor"
434 * - BSpec, Volume 1a (GPU Overview) / Graphics Processing Engine (GPE) /
435 *   Overview / GPE Function IDs
436 */
437enum brw_message_target {
438   BRW_SFID_NULL                     = 0,
439   BRW_SFID_MATH                     = 1, /* Only valid on Gen4-5 */
440   BRW_SFID_SAMPLER                  = 2,
441   BRW_SFID_MESSAGE_GATEWAY          = 3,
442   BRW_SFID_DATAPORT_READ            = 4,
443   BRW_SFID_DATAPORT_WRITE           = 5,
444   BRW_SFID_URB                      = 6,
445   BRW_SFID_THREAD_SPAWNER           = 7,
446
447   GEN6_SFID_DATAPORT_SAMPLER_CACHE  = 4,
448   GEN6_SFID_DATAPORT_RENDER_CACHE   = 5,
449   GEN6_SFID_DATAPORT_CONSTANT_CACHE = 9,
450
451   GEN7_SFID_DATAPORT_DATA_CACHE     = 10,
452};
453
454#define GEN7_MESSAGE_TARGET_DP_DATA_CACHE     10
455
456#define BRW_DATAPORT_WRITE_MESSAGE_OWORD_BLOCK_WRITE                0
457#define BRW_DATAPORT_WRITE_MESSAGE_OWORD_DUAL_BLOCK_WRITE           1
458#define BRW_DATAPORT_WRITE_MESSAGE_MEDIA_BLOCK_WRITE                2
459#define BRW_DATAPORT_WRITE_MESSAGE_DWORD_SCATTERED_WRITE            3
460#define BRW_DATAPORT_WRITE_MESSAGE_RENDER_TARGET_WRITE              4
461#define BRW_DATAPORT_WRITE_MESSAGE_STREAMED_VERTEX_BUFFER_WRITE     5
462#define BRW_DATAPORT_WRITE_MESSAGE_FLUSH_RENDER_CACHE               7
463
464/* GEN6 */
465#define GEN6_DATAPORT_WRITE_MESSAGE_DWORD_ATOMIC_WRITE              7
466#define GEN6_DATAPORT_WRITE_MESSAGE_OWORD_BLOCK_WRITE               8
467#define GEN6_DATAPORT_WRITE_MESSAGE_OWORD_DUAL_BLOCK_WRITE          9
468#define GEN6_DATAPORT_WRITE_MESSAGE_MEDIA_BLOCK_WRITE               10
469#define GEN6_DATAPORT_WRITE_MESSAGE_DWORD_SCATTERED_WRITE           11
470#define GEN6_DATAPORT_WRITE_MESSAGE_RENDER_TARGET_WRITE             12
471#define GEN6_DATAPORT_WRITE_MESSAGE_STREAMED_VB_WRITE               13
472#define GEN6_DATAPORT_WRITE_MESSAGE_RENDER_TARGET_UNORM_WRITE       14
473
474#define BRW_MATH_FUNCTION_INV                              1
475#define BRW_MATH_FUNCTION_LOG                              2
476#define BRW_MATH_FUNCTION_EXP                              3
477#define BRW_MATH_FUNCTION_SQRT                             4
478#define BRW_MATH_FUNCTION_RSQ                              5
479#define BRW_MATH_FUNCTION_SIN                              6 /* was 7 */
480#define BRW_MATH_FUNCTION_COS                              7 /* was 8 */
481#define BRW_MATH_FUNCTION_SINCOS                           8 /* was 6 */
482#define BRW_MATH_FUNCTION_TAN                              9 /* gen4 */
483#define BRW_MATH_FUNCTION_FDIV                             9 /* gen6+ */
484#define BRW_MATH_FUNCTION_POW                              10
485#define BRW_MATH_FUNCTION_INT_DIV_QUOTIENT_AND_REMAINDER   11
486#define BRW_MATH_FUNCTION_INT_DIV_QUOTIENT                 12
487#define BRW_MATH_FUNCTION_INT_DIV_REMAINDER                13
488
489#define BRW_MATH_INTEGER_UNSIGNED     0
490#define BRW_MATH_INTEGER_SIGNED       1
491
492#define BRW_MATH_PRECISION_FULL        0
493#define BRW_MATH_PRECISION_PARTIAL     1
494
495#define BRW_MATH_SATURATE_NONE         0
496#define BRW_MATH_SATURATE_SATURATE     1
497
498#define BRW_MATH_DATA_VECTOR  0
499#define BRW_MATH_DATA_SCALAR  1
500
501#define BRW_URB_OPCODE_WRITE  0
502
503#define BRW_URB_SWIZZLE_NONE          0
504#define BRW_URB_SWIZZLE_INTERLEAVE    1
505#define BRW_URB_SWIZZLE_TRANSPOSE     2
506
507#define BRW_SCRATCH_SPACE_SIZE_1K     0
508#define BRW_SCRATCH_SPACE_SIZE_2K     1
509#define BRW_SCRATCH_SPACE_SIZE_4K     2
510#define BRW_SCRATCH_SPACE_SIZE_8K     3
511#define BRW_SCRATCH_SPACE_SIZE_16K    4
512#define BRW_SCRATCH_SPACE_SIZE_32K    5
513#define BRW_SCRATCH_SPACE_SIZE_64K    6
514#define BRW_SCRATCH_SPACE_SIZE_128K   7
515#define BRW_SCRATCH_SPACE_SIZE_256K   8
516#define BRW_SCRATCH_SPACE_SIZE_512K   9
517#define BRW_SCRATCH_SPACE_SIZE_1M     10
518#define BRW_SCRATCH_SPACE_SIZE_2M     11
519
520#define REG_SIZE (8*4)
521
522struct brw_instruction {
523	struct {
524		unsigned opcode:7;
525		unsigned pad:1;
526		unsigned access_mode:1;
527		unsigned mask_control:1;
528		unsigned dependency_control:2;
529		unsigned compression_control:2; /* gen6: quater control */
530		unsigned thread_control:2;
531		unsigned predicate_control:4;
532		unsigned predicate_inverse:1;
533		unsigned execution_size:3;
534		/**
535		 * Conditional Modifier for most instructions.  On Gen6+, this is also
536		 * used for the SEND instruction's Message Target/SFID.
537		 */
538		unsigned destreg__conditionalmod:4;
539		unsigned acc_wr_control:1;
540		unsigned cmpt_control:1;
541		unsigned debug_control:1;
542		unsigned saturate:1;
543	} header;
544
545	union {
546		struct {
547			unsigned dest_reg_file:2;
548			unsigned dest_reg_type:3;
549			unsigned src0_reg_file:2;
550			unsigned src0_reg_type:3;
551			unsigned src1_reg_file:2;
552			unsigned src1_reg_type:3;
553			unsigned pad:1;
554			unsigned dest_subreg_nr:5;
555			unsigned dest_reg_nr:8;
556			unsigned dest_horiz_stride:2;
557			unsigned dest_address_mode:1;
558		} da1;
559
560		struct {
561			unsigned dest_reg_file:2;
562			unsigned dest_reg_type:3;
563			unsigned src0_reg_file:2;
564			unsigned src0_reg_type:3;
565			unsigned src1_reg_file:2;        /* 0x00000c00 */
566			unsigned src1_reg_type:3;        /* 0x00007000 */
567			unsigned pad:1;
568			int dest_indirect_offset:10;	/* offset against the deref'd address reg */
569			unsigned dest_subreg_nr:3; /* subnr for the address reg a0.x */
570			unsigned dest_horiz_stride:2;
571			unsigned dest_address_mode:1;
572		} ia1;
573
574		struct {
575			unsigned dest_reg_file:2;
576			unsigned dest_reg_type:3;
577			unsigned src0_reg_file:2;
578			unsigned src0_reg_type:3;
579			unsigned src1_reg_file:2;
580			unsigned src1_reg_type:3;
581			unsigned pad:1;
582			unsigned dest_writemask:4;
583			unsigned dest_subreg_nr:1;
584			unsigned dest_reg_nr:8;
585			unsigned dest_horiz_stride:2;
586			unsigned dest_address_mode:1;
587		} da16;
588
589		struct {
590			unsigned dest_reg_file:2;
591			unsigned dest_reg_type:3;
592			unsigned src0_reg_file:2;
593			unsigned src0_reg_type:3;
594			unsigned pad0:6;
595			unsigned dest_writemask:4;
596			int dest_indirect_offset:6;
597			unsigned dest_subreg_nr:3;
598			unsigned dest_horiz_stride:2;
599			unsigned dest_address_mode:1;
600		} ia16;
601
602		struct {
603			unsigned dest_reg_file:2;
604			unsigned dest_reg_type:3;
605			unsigned src0_reg_file:2;
606			unsigned src0_reg_type:3;
607			unsigned src1_reg_file:2;
608			unsigned src1_reg_type:3;
609			unsigned pad:1;
610
611			int jump_count:16;
612		} branch_gen6;
613
614		struct {
615			unsigned dest_reg_file:1;
616			unsigned flag_subreg_num:1;
617			unsigned pad0:2;
618			unsigned src0_abs:1;
619			unsigned src0_negate:1;
620			unsigned src1_abs:1;
621			unsigned src1_negate:1;
622			unsigned src2_abs:1;
623			unsigned src2_negate:1;
624			unsigned pad1:7;
625			unsigned dest_writemask:4;
626			unsigned dest_subreg_nr:3;
627			unsigned dest_reg_nr:8;
628		} da3src;
629	} bits1;
630
631
632	union {
633		struct {
634			unsigned src0_subreg_nr:5;
635			unsigned src0_reg_nr:8;
636			unsigned src0_abs:1;
637			unsigned src0_negate:1;
638			unsigned src0_address_mode:1;
639			unsigned src0_horiz_stride:2;
640			unsigned src0_width:3;
641			unsigned src0_vert_stride:4;
642			unsigned flag_subreg_nr:1;
643			unsigned flag_reg_nr:1;
644			unsigned pad:5;
645		} da1;
646
647		struct {
648			int src0_indirect_offset:10;
649			unsigned src0_subreg_nr:3;
650			unsigned src0_abs:1;
651			unsigned src0_negate:1;
652			unsigned src0_address_mode:1;
653			unsigned src0_horiz_stride:2;
654			unsigned src0_width:3;
655			unsigned src0_vert_stride:4;
656			unsigned flag_subreg_nr:1;
657			unsigned flag_reg_nr:1;
658			unsigned pad:5;
659		} ia1;
660
661		struct {
662			unsigned src0_swz_x:2;
663			unsigned src0_swz_y:2;
664			unsigned src0_subreg_nr:1;
665			unsigned src0_reg_nr:8;
666			unsigned src0_abs:1;
667			unsigned src0_negate:1;
668			unsigned src0_address_mode:1;
669			unsigned src0_swz_z:2;
670			unsigned src0_swz_w:2;
671			unsigned pad0:1;
672			unsigned src0_vert_stride:4;
673			unsigned flag_subreg_nr:1;
674			unsigned flag_reg_nr:1;
675			unsigned pad1:5;
676		} da16;
677
678		struct {
679			unsigned src0_swz_x:2;
680			unsigned src0_swz_y:2;
681			int src0_indirect_offset:6;
682			unsigned src0_subreg_nr:3;
683			unsigned src0_abs:1;
684			unsigned src0_negate:1;
685			unsigned src0_address_mode:1;
686			unsigned src0_swz_z:2;
687			unsigned src0_swz_w:2;
688			unsigned pad0:1;
689			unsigned src0_vert_stride:4;
690			unsigned flag_subreg_nr:1;
691			unsigned flag_reg_nr:1;
692			unsigned pad1:5;
693		} ia16;
694
695		/* Extended Message Descriptor for Ironlake (Gen5) SEND instruction.
696		 *
697		 * Does not apply to Gen6+.  The SFID/message target moved to bits
698		 * 27:24 of the header (destreg__conditionalmod); EOT is in bits3.
699		 */
700		struct {
701			unsigned pad:26;
702			unsigned end_of_thread:1;
703			unsigned pad1:1;
704			unsigned sfid:4;
705		} send_gen5;  /* for Ironlake only */
706
707		struct {
708			unsigned src0_rep_ctrl:1;
709			unsigned src0_swizzle:8;
710			unsigned src0_subreg_nr:3;
711			unsigned src0_reg_nr:8;
712			unsigned pad0:1;
713			unsigned src1_rep_ctrl:1;
714			unsigned src1_swizzle:8;
715			unsigned src1_subreg_nr_low:2;
716		} da3src;
717	} bits2;
718
719	union {
720		struct {
721			unsigned src1_subreg_nr:5;
722			unsigned src1_reg_nr:8;
723			unsigned src1_abs:1;
724			unsigned src1_negate:1;
725			unsigned src1_address_mode:1;
726			unsigned src1_horiz_stride:2;
727			unsigned src1_width:3;
728			unsigned src1_vert_stride:4;
729			unsigned pad0:7;
730		} da1;
731
732		struct {
733			unsigned src1_swz_x:2;
734			unsigned src1_swz_y:2;
735			unsigned src1_subreg_nr:1;
736			unsigned src1_reg_nr:8;
737			unsigned src1_abs:1;
738			unsigned src1_negate:1;
739			unsigned src1_address_mode:1;
740			unsigned src1_swz_z:2;
741			unsigned src1_swz_w:2;
742			unsigned pad1:1;
743			unsigned src1_vert_stride:4;
744			unsigned pad2:7;
745		} da16;
746
747		struct {
748			int src1_indirect_offset:10;
749			unsigned src1_subreg_nr:3;
750			unsigned src1_abs:1;
751			unsigned src1_negate:1;
752			unsigned src1_address_mode:1;
753			unsigned src1_horiz_stride:2;
754			unsigned src1_width:3;
755			unsigned src1_vert_stride:4;
756			unsigned flag_subreg_nr:1;
757			unsigned flag_reg_nr:1;
758			unsigned pad1:5;
759		} ia1;
760
761		struct {
762			unsigned src1_swz_x:2;
763			unsigned src1_swz_y:2;
764			int  src1_indirect_offset:6;
765			unsigned src1_subreg_nr:3;
766			unsigned src1_abs:1;
767			unsigned src1_negate:1;
768			unsigned pad0:1;
769			unsigned src1_swz_z:2;
770			unsigned src1_swz_w:2;
771			unsigned pad1:1;
772			unsigned src1_vert_stride:4;
773			unsigned flag_subreg_nr:1;
774			unsigned flag_reg_nr:1;
775			unsigned pad2:5;
776		} ia16;
777
778		struct {
779			int jump_count:16;	/* note: signed */
780			unsigned pop_count:4;
781			unsigned pad0:12;
782		} if_else;
783
784		/* This is also used for gen7 IF/ELSE instructions */
785		struct {
786			/* Signed jump distance to the ip to jump to if all channels
787			 * are disabled after the break or continue.  It should point
788			 * to the end of the innermost control flow block, as that's
789			 * where some channel could get re-enabled.
790			 */
791			int jip:16;
792
793			/* Signed jump distance to the location to resume execution
794			 * of this channel if it's enabled for the break or continue.
795			 */
796			int uip:16;
797		} break_cont;
798
799		/**
800		 * \defgroup SEND instructions / Message Descriptors
801		 *
802		 * @{
803		 */
804
805		/**
806		 * Generic Message Descriptor for Gen4 SEND instructions.  The structs
807		 * below expand function_control to something specific for their
808		 * message.  Due to struct packing issues, they duplicate these bits.
809		 *
810		 * See the G45 PRM, Volume 4, Table 14-15.
811		 */
812		struct {
813			unsigned function_control:16;
814			unsigned response_length:4;
815			unsigned msg_length:4;
816			unsigned msg_target:4;
817			unsigned pad1:3;
818			unsigned end_of_thread:1;
819		} generic;
820
821		/**
822		 * Generic Message Descriptor for Gen5-7 SEND instructions.
823		 *
824		 * See the Sandybridge PRM, Volume 2 Part 2, Table 8-15.  (Sadly, most
825		 * of the information on the SEND instruction is missing from the public
826		 * Ironlake PRM.)
827		 *
828		 * The table claims that bit 31 is reserved/MBZ on Gen6+, but it lies.
829		 * According to the SEND instruction description:
830		 * "The MSb of the message description, the EOT field, always comes from
831		 *  bit 127 of the instruction word"...which is bit 31 of this field.
832		 */
833		struct {
834			unsigned function_control:19;
835			unsigned header_present:1;
836			unsigned response_length:5;
837			unsigned msg_length:4;
838			unsigned pad1:2;
839			unsigned end_of_thread:1;
840		} generic_gen5;
841
842		/** G45 PRM, Volume 4, Section 6.1.1.1 */
843		struct {
844			unsigned function:4;
845			unsigned int_type:1;
846			unsigned precision:1;
847			unsigned saturate:1;
848			unsigned data_type:1;
849			unsigned pad0:8;
850			unsigned response_length:4;
851			unsigned msg_length:4;
852			unsigned msg_target:4;
853			unsigned pad1:3;
854			unsigned end_of_thread:1;
855		} math;
856
857		/** Ironlake PRM, Volume 4 Part 1, Section 6.1.1.1 */
858		struct {
859			unsigned function:4;
860			unsigned int_type:1;
861			unsigned precision:1;
862			unsigned saturate:1;
863			unsigned data_type:1;
864			unsigned snapshot:1;
865			unsigned pad0:10;
866			unsigned header_present:1;
867			unsigned response_length:5;
868			unsigned msg_length:4;
869			unsigned pad1:2;
870			unsigned end_of_thread:1;
871		} math_gen5;
872
873		/** G45 PRM, Volume 4, Section 4.8.1.1.1 [DevBW] and [DevCL] */
874		struct {
875			unsigned binding_table_index:8;
876			unsigned sampler:4;
877			unsigned return_format:2;
878			unsigned msg_type:2;
879			unsigned response_length:4;
880			unsigned msg_length:4;
881			unsigned msg_target:4;
882			unsigned pad1:3;
883			unsigned end_of_thread:1;
884		} sampler;
885
886		/** G45 PRM, Volume 4, Section 4.8.1.1.2 [DevCTG] */
887		struct {
888			unsigned binding_table_index:8;
889			unsigned sampler:4;
890			unsigned msg_type:4;
891			unsigned response_length:4;
892			unsigned msg_length:4;
893			unsigned msg_target:4;
894			unsigned pad1:3;
895			unsigned end_of_thread:1;
896		} sampler_g4x;
897
898		/** Ironlake PRM, Volume 4 Part 1, Section 4.11.1.1.3 */
899		struct {
900			unsigned binding_table_index:8;
901			unsigned sampler:4;
902			unsigned msg_type:4;
903			unsigned simd_mode:2;
904			unsigned pad0:1;
905			unsigned header_present:1;
906			unsigned response_length:5;
907			unsigned msg_length:4;
908			unsigned pad1:2;
909			unsigned end_of_thread:1;
910		} sampler_gen5;
911
912		struct {
913			unsigned binding_table_index:8;
914			unsigned sampler:4;
915			unsigned msg_type:5;
916			unsigned simd_mode:2;
917			unsigned header_present:1;
918			unsigned response_length:5;
919			unsigned msg_length:4;
920			unsigned pad1:2;
921			unsigned end_of_thread:1;
922		} sampler_gen7;
923
924		struct brw_urb_immediate {
925			unsigned opcode:4;
926			unsigned offset:6;
927			unsigned swizzle_control:2;
928			unsigned pad:1;
929			unsigned allocate:1;
930			unsigned used:1;
931			unsigned complete:1;
932			unsigned response_length:4;
933			unsigned msg_length:4;
934			unsigned msg_target:4;
935			unsigned pad1:3;
936			unsigned end_of_thread:1;
937		} urb;
938
939		struct {
940			unsigned opcode:4;
941			unsigned offset:6;
942			unsigned swizzle_control:2;
943			unsigned pad:1;
944			unsigned allocate:1;
945			unsigned used:1;
946			unsigned complete:1;
947			unsigned pad0:3;
948			unsigned header_present:1;
949			unsigned response_length:5;
950			unsigned msg_length:4;
951			unsigned pad1:2;
952			unsigned end_of_thread:1;
953		} urb_gen5;
954
955		struct {
956			unsigned opcode:3;
957			unsigned offset:11;
958			unsigned swizzle_control:1;
959			unsigned complete:1;
960			unsigned per_slot_offset:1;
961			unsigned pad0:2;
962			unsigned header_present:1;
963			unsigned response_length:5;
964			unsigned msg_length:4;
965			unsigned pad1:2;
966			unsigned end_of_thread:1;
967		} urb_gen7;
968
969		/** 965 PRM, Volume 4, Section 5.10.1.1: Message Descriptor */
970		struct {
971			unsigned binding_table_index:8;
972			unsigned msg_control:4;
973			unsigned msg_type:2;
974			unsigned target_cache:2;
975			unsigned response_length:4;
976			unsigned msg_length:4;
977			unsigned msg_target:4;
978			unsigned pad1:3;
979			unsigned end_of_thread:1;
980		} dp_read;
981
982		/** G45 PRM, Volume 4, Section 5.10.1.1.2 */
983		struct {
984			unsigned binding_table_index:8;
985			unsigned msg_control:3;
986			unsigned msg_type:3;
987			unsigned target_cache:2;
988			unsigned response_length:4;
989			unsigned msg_length:4;
990			unsigned msg_target:4;
991			unsigned pad1:3;
992			unsigned end_of_thread:1;
993		} dp_read_g4x;
994
995		/** Ironlake PRM, Volume 4 Part 1, Section 5.10.2.1.2. */
996		struct {
997			unsigned binding_table_index:8;
998			unsigned msg_control:3;
999			unsigned msg_type:3;
1000			unsigned target_cache:2;
1001			unsigned pad0:3;
1002			unsigned header_present:1;
1003			unsigned response_length:5;
1004			unsigned msg_length:4;
1005			unsigned pad1:2;
1006			unsigned end_of_thread:1;
1007		} dp_read_gen5;
1008
1009		/** G45 PRM, Volume 4, Section 5.10.1.1.2.  For both Gen4 and G45. */
1010		struct {
1011			unsigned binding_table_index:8;
1012			unsigned msg_control:3;
1013			unsigned last_render_target:1;
1014			unsigned msg_type:3;
1015			unsigned send_commit_msg:1;
1016			unsigned response_length:4;
1017			unsigned msg_length:4;
1018			unsigned msg_target:4;
1019			unsigned pad1:3;
1020			unsigned end_of_thread:1;
1021		} dp_write;
1022
1023		/** Ironlake PRM, Volume 4 Part 1, Section 5.10.2.1.2. */
1024		struct {
1025			unsigned binding_table_index:8;
1026			unsigned msg_control:3;
1027			unsigned last_render_target:1;
1028			unsigned msg_type:3;
1029			unsigned send_commit_msg:1;
1030			unsigned pad0:3;
1031			unsigned header_present:1;
1032			unsigned response_length:5;
1033			unsigned msg_length:4;
1034			unsigned pad1:2;
1035			unsigned end_of_thread:1;
1036		} dp_write_gen5;
1037
1038		/**
1039		 * Message for the Sandybridge Sampler Cache or Constant Cache Data Port.
1040		 *
1041		 * See the Sandybridge PRM, Volume 4 Part 1, Section 3.9.2.1.1.
1042		 **/
1043		struct {
1044			unsigned binding_table_index:8;
1045			unsigned msg_control:5;
1046			unsigned msg_type:3;
1047			unsigned pad0:3;
1048			unsigned header_present:1;
1049			unsigned response_length:5;
1050			unsigned msg_length:4;
1051			unsigned pad1:2;
1052			unsigned end_of_thread:1;
1053		} gen6_dp_sampler_const_cache;
1054
1055		/**
1056		 * Message for the Sandybridge Render Cache Data Port.
1057		 *
1058		 * Most fields are defined in the Sandybridge PRM, Volume 4 Part 1,
1059		 * Section 3.9.2.1.1: Message Descriptor.
1060		 *
1061		 * "Slot Group Select" and "Last Render Target" are part of the
1062		 * 5-bit message control for Render Target Write messages.  See
1063		 * Section 3.9.9.2.1 of the same volume.
1064		 */
1065		struct {
1066			unsigned binding_table_index:8;
1067			unsigned msg_control:3;
1068			unsigned slot_group_select:1;
1069			unsigned last_render_target:1;
1070			unsigned msg_type:4;
1071			unsigned send_commit_msg:1;
1072			unsigned pad0:1;
1073			unsigned header_present:1;
1074			unsigned response_length:5;
1075			unsigned msg_length:4;
1076			unsigned pad1:2;
1077			unsigned end_of_thread:1;
1078		} gen6_dp;
1079
1080		/**
1081		 * Message for any of the Gen7 Data Port caches.
1082		 *
1083		 * Most fields are defined in BSpec volume 5c.2 Data Port / Messages /
1084		 * Data Port Messages / Message Descriptor.  Once again, "Slot Group
1085		 * Select" and "Last Render Target" are part of the 6-bit message
1086		 * control for Render Target Writes.
1087		 */
1088		struct {
1089			unsigned binding_table_index:8;
1090			unsigned msg_control:3;
1091			unsigned slot_group_select:1;
1092			unsigned last_render_target:1;
1093			unsigned msg_control_pad:1;
1094			unsigned msg_type:4;
1095			unsigned pad1:1;
1096			unsigned header_present:1;
1097			unsigned response_length:5;
1098			unsigned msg_length:4;
1099			unsigned pad2:2;
1100			unsigned end_of_thread:1;
1101		} gen7_dp;
1102		/** @} */
1103
1104		struct {
1105			unsigned src1_subreg_nr_high:1;
1106			unsigned src1_reg_nr:8;
1107			unsigned pad0:1;
1108			unsigned src2_rep_ctrl:1;
1109			unsigned src2_swizzle:8;
1110			unsigned src2_subreg_nr:3;
1111			unsigned src2_reg_nr:8;
1112			unsigned pad1:2;
1113		} da3src;
1114
1115		int d;
1116		unsigned ud;
1117		float f;
1118	} bits3;
1119};
1120
1121
1122/* These aren't hardware structs, just something useful for us to pass around:
1123 *
1124 * Align1 operation has a lot of control over input ranges.  Used in
1125 * WM programs to implement shaders decomposed into "channel serial"
1126 * or "structure of array" form:
1127 */
1128struct brw_reg {
1129	unsigned type:4;
1130	unsigned file:2;
1131	unsigned nr:8;
1132	unsigned subnr:5;		/* :1 in align16 */
1133	unsigned negate:1;		/* source only */
1134	unsigned abs:1;		/* source only */
1135	unsigned vstride:4;		/* source only */
1136	unsigned width:3;		/* src only, align1 only */
1137	unsigned hstride:2;   		/* align1 only */
1138	unsigned address_mode:1;	/* relative addressing, hopefully! */
1139	unsigned pad0:1;
1140
1141	union {
1142		struct {
1143			unsigned swizzle:8;		/* src only, align16 only */
1144			unsigned writemask:4;		/* dest only, align16 only */
1145			int  indirect_offset:10;	/* relative addressing offset */
1146			unsigned pad1:10;		/* two dwords total */
1147		} bits;
1148
1149		float f;
1150		int   d;
1151		unsigned ud;
1152	} dw1;
1153};
1154
1155struct brw_indirect {
1156	unsigned addr_subnr:4;
1157	int addr_offset:10;
1158	unsigned pad:18;
1159};
1160
1161#define BRW_EU_MAX_INSN_STACK 5
1162#define BRW_EU_MAX_INSN 10000
1163
1164struct brw_compile {
1165	struct brw_instruction *store;
1166	unsigned nr_insn;
1167
1168	int gen;
1169
1170	/* Allow clients to push/pop instruction state:
1171	*/
1172	struct brw_instruction stack[BRW_EU_MAX_INSN_STACK];
1173	bool compressed_stack[BRW_EU_MAX_INSN_STACK];
1174	struct brw_instruction *current;
1175
1176	unsigned flag_value;
1177	bool single_program_flow;
1178	bool compressed;
1179
1180	/* Control flow stacks:
1181	 * - if_stack contains IF and ELSE instructions which must be patched
1182	 *   (and popped) once the matching ENDIF instruction is encountered.
1183	 */
1184	struct brw_instruction **if_stack;
1185	int if_stack_depth;
1186	int if_stack_array_size;
1187};
1188
1189static inline int type_sz(unsigned type)
1190{
1191	switch (type) {
1192	case BRW_REGISTER_TYPE_UD:
1193	case BRW_REGISTER_TYPE_D:
1194	case BRW_REGISTER_TYPE_F:
1195		return 4;
1196	case BRW_REGISTER_TYPE_HF:
1197	case BRW_REGISTER_TYPE_UW:
1198	case BRW_REGISTER_TYPE_W:
1199		return 2;
1200	case BRW_REGISTER_TYPE_UB:
1201	case BRW_REGISTER_TYPE_B:
1202		return 1;
1203	default:
1204		return 0;
1205	}
1206}
1207
1208/**
1209 * Construct a brw_reg.
1210 * \param file  one of the BRW_x_REGISTER_FILE values
1211 * \param nr  register number/index
1212 * \param subnr  register sub number
1213 * \param type  one of BRW_REGISTER_TYPE_x
1214 * \param vstride  one of BRW_VERTICAL_STRIDE_x
1215 * \param width  one of BRW_WIDTH_x
1216 * \param hstride  one of BRW_HORIZONTAL_STRIDE_x
1217 * \param swizzle  one of BRW_SWIZZLE_x
1218 * \param writemask  WRITEMASK_X/Y/Z/W bitfield
1219 */
1220static inline struct brw_reg brw_reg(unsigned file,
1221				     unsigned nr,
1222				     unsigned subnr,
1223				     unsigned type,
1224				     unsigned vstride,
1225				     unsigned width,
1226				     unsigned hstride,
1227				     unsigned swizzle,
1228				     unsigned writemask)
1229{
1230	struct brw_reg reg;
1231	if (file == BRW_GENERAL_REGISTER_FILE)
1232		assert(nr < BRW_MAX_GRF);
1233	else if (file == BRW_MESSAGE_REGISTER_FILE)
1234		assert((nr & ~(1 << 7)) < BRW_MAX_MRF);
1235	else if (file == BRW_ARCHITECTURE_REGISTER_FILE)
1236		assert(nr <= BRW_ARF_IP);
1237
1238	reg.type = type;
1239	reg.file = file;
1240	reg.nr = nr;
1241	reg.subnr = subnr * type_sz(type);
1242	reg.negate = 0;
1243	reg.abs = 0;
1244	reg.vstride = vstride;
1245	reg.width = width;
1246	reg.hstride = hstride;
1247	reg.address_mode = BRW_ADDRESS_DIRECT;
1248	reg.pad0 = 0;
1249
1250	/* Could do better: If the reg is r5.3<0;1,0>, we probably want to
1251	 * set swizzle and writemask to W, as the lower bits of subnr will
1252	 * be lost when converted to align16.  This is probably too much to
1253	 * keep track of as you'd want it adjusted by suboffset(), etc.
1254	 * Perhaps fix up when converting to align16?
1255	 */
1256	reg.dw1.bits.swizzle = swizzle;
1257	reg.dw1.bits.writemask = writemask;
1258	reg.dw1.bits.indirect_offset = 0;
1259	reg.dw1.bits.pad1 = 0;
1260	return reg;
1261}
1262
1263/** Construct float[16] register */
1264static inline struct brw_reg brw_vec16_reg(unsigned file,
1265					   unsigned nr,
1266					   unsigned subnr)
1267{
1268	return brw_reg(file,
1269		       nr,
1270		       subnr,
1271		       BRW_REGISTER_TYPE_F,
1272		       BRW_VERTICAL_STRIDE_16,
1273		       BRW_WIDTH_16,
1274		       BRW_HORIZONTAL_STRIDE_1,
1275		       BRW_SWIZZLE_XYZW,
1276		       WRITEMASK_XYZW);
1277}
1278
1279/** Construct float[8] register */
1280static inline struct brw_reg brw_vec8_reg(unsigned file,
1281					  unsigned nr,
1282					  unsigned subnr)
1283{
1284	return brw_reg(file,
1285		       nr,
1286		       subnr,
1287		       BRW_REGISTER_TYPE_F,
1288		       BRW_VERTICAL_STRIDE_8,
1289		       BRW_WIDTH_8,
1290		       BRW_HORIZONTAL_STRIDE_1,
1291		       BRW_SWIZZLE_XYZW,
1292		       WRITEMASK_XYZW);
1293}
1294
1295/** Construct float[4] register */
1296static inline struct brw_reg brw_vec4_reg(unsigned file,
1297					  unsigned nr,
1298					  unsigned subnr)
1299{
1300	return brw_reg(file,
1301		       nr,
1302		       subnr,
1303		       BRW_REGISTER_TYPE_F,
1304		       BRW_VERTICAL_STRIDE_4,
1305		       BRW_WIDTH_4,
1306		       BRW_HORIZONTAL_STRIDE_1,
1307		       BRW_SWIZZLE_XYZW,
1308		       WRITEMASK_XYZW);
1309}
1310
1311/** Construct float[2] register */
1312static inline struct brw_reg brw_vec2_reg(unsigned file,
1313					  unsigned nr,
1314					  unsigned subnr)
1315{
1316	return brw_reg(file,
1317		       nr,
1318		       subnr,
1319		       BRW_REGISTER_TYPE_F,
1320		       BRW_VERTICAL_STRIDE_2,
1321		       BRW_WIDTH_2,
1322		       BRW_HORIZONTAL_STRIDE_1,
1323		       BRW_SWIZZLE_XYXY,
1324		       WRITEMASK_XY);
1325}
1326
1327/** Construct float[1] register */
1328static inline struct brw_reg brw_vec1_reg(unsigned file,
1329					  unsigned nr,
1330					  unsigned subnr)
1331{
1332	return brw_reg(file,
1333		       nr,
1334		       subnr,
1335		       BRW_REGISTER_TYPE_F,
1336		       BRW_VERTICAL_STRIDE_0,
1337		       BRW_WIDTH_1,
1338		       BRW_HORIZONTAL_STRIDE_0,
1339		       BRW_SWIZZLE_XXXX,
1340		       WRITEMASK_X);
1341}
1342
1343
1344static inline struct brw_reg __retype(struct brw_reg reg,
1345				      unsigned type)
1346{
1347	reg.type = type;
1348	return reg;
1349}
1350
1351static inline struct brw_reg __retype_d(struct brw_reg reg)
1352{
1353	return __retype(reg, BRW_REGISTER_TYPE_D);
1354}
1355
1356static inline struct brw_reg __retype_ud(struct brw_reg reg)
1357{
1358	return __retype(reg, BRW_REGISTER_TYPE_UD);
1359}
1360
1361static inline struct brw_reg __retype_uw(struct brw_reg reg)
1362{
1363	return __retype(reg, BRW_REGISTER_TYPE_UW);
1364}
1365
1366static inline struct brw_reg __sechalf(struct brw_reg reg)
1367{
1368	if (reg.vstride)
1369		reg.nr++;
1370	return reg;
1371}
1372
1373static inline struct brw_reg __suboffset(struct brw_reg reg,
1374					 unsigned delta)
1375{
1376	reg.subnr += delta * type_sz(reg.type);
1377	return reg;
1378}
1379
1380static inline struct brw_reg __offset(struct brw_reg reg,
1381				      unsigned delta)
1382{
1383	reg.nr += delta;
1384	return reg;
1385}
1386
1387static inline struct brw_reg byte_offset(struct brw_reg reg,
1388					 unsigned bytes)
1389{
1390	unsigned newoffset = reg.nr * REG_SIZE + reg.subnr + bytes;
1391	reg.nr = newoffset / REG_SIZE;
1392	reg.subnr = newoffset % REG_SIZE;
1393	return reg;
1394}
1395
1396
1397/** Construct unsigned word[16] register */
1398static inline struct brw_reg brw_uw16_reg(unsigned file,
1399					  unsigned nr,
1400					  unsigned subnr)
1401{
1402	return __suboffset(__retype(brw_vec16_reg(file, nr, 0), BRW_REGISTER_TYPE_UW), subnr);
1403}
1404
1405/** Construct unsigned word[8] register */
1406static inline struct brw_reg brw_uw8_reg(unsigned file,
1407					 unsigned nr,
1408					 unsigned subnr)
1409{
1410	return __suboffset(__retype(brw_vec8_reg(file, nr, 0), BRW_REGISTER_TYPE_UW), subnr);
1411}
1412
1413/** Construct unsigned word[1] register */
1414static inline struct brw_reg brw_uw1_reg(unsigned file,
1415					 unsigned nr,
1416					 unsigned subnr)
1417{
1418	return __suboffset(__retype(brw_vec1_reg(file, nr, 0), BRW_REGISTER_TYPE_UW), subnr);
1419}
1420
1421static inline struct brw_reg brw_imm_reg(unsigned type)
1422{
1423	return brw_reg( BRW_IMMEDIATE_VALUE,
1424			0,
1425			0,
1426			type,
1427			BRW_VERTICAL_STRIDE_0,
1428			BRW_WIDTH_1,
1429			BRW_HORIZONTAL_STRIDE_0,
1430			0,
1431			0);
1432}
1433
1434/** Construct float immediate register */
1435static inline struct brw_reg brw_imm_f(float f)
1436{
1437	struct brw_reg imm = brw_imm_reg(BRW_REGISTER_TYPE_F);
1438	imm.dw1.f = f;
1439	return imm;
1440}
1441
1442/** Construct integer immediate register */
1443static inline struct brw_reg brw_imm_d(int d)
1444{
1445	struct brw_reg imm = brw_imm_reg(BRW_REGISTER_TYPE_D);
1446	imm.dw1.d = d;
1447	return imm;
1448}
1449
1450/** Construct uint immediate register */
1451static inline struct brw_reg brw_imm_ud(unsigned ud)
1452{
1453	struct brw_reg imm = brw_imm_reg(BRW_REGISTER_TYPE_UD);
1454	imm.dw1.ud = ud;
1455	return imm;
1456}
1457
1458/** Construct ushort immediate register */
1459static inline struct brw_reg brw_imm_uw(uint16_t uw)
1460{
1461	struct brw_reg imm = brw_imm_reg(BRW_REGISTER_TYPE_UW);
1462	imm.dw1.ud = uw | (uw << 16);
1463	return imm;
1464}
1465
1466/** Construct short immediate register */
1467static inline struct brw_reg brw_imm_w(int16_t w)
1468{
1469	struct brw_reg imm = brw_imm_reg(BRW_REGISTER_TYPE_W);
1470	imm.dw1.d = w | (w << 16);
1471	return imm;
1472}
1473
1474/* brw_imm_b and brw_imm_ub aren't supported by hardware - the type
1475 * numbers alias with _V and _VF below:
1476 */
1477
1478/** Construct vector of eight signed half-byte values */
1479static inline struct brw_reg brw_imm_v(unsigned v)
1480{
1481	struct brw_reg imm = brw_imm_reg(BRW_REGISTER_TYPE_V);
1482	imm.vstride = BRW_VERTICAL_STRIDE_0;
1483	imm.width = BRW_WIDTH_8;
1484	imm.hstride = BRW_HORIZONTAL_STRIDE_1;
1485	imm.dw1.ud = v;
1486	return imm;
1487}
1488
1489/** Construct vector of four 8-bit float values */
1490static inline struct brw_reg brw_imm_vf(unsigned v)
1491{
1492	struct brw_reg imm = brw_imm_reg(BRW_REGISTER_TYPE_VF);
1493	imm.vstride = BRW_VERTICAL_STRIDE_0;
1494	imm.width = BRW_WIDTH_4;
1495	imm.hstride = BRW_HORIZONTAL_STRIDE_1;
1496	imm.dw1.ud = v;
1497	return imm;
1498}
1499
1500#define VF_ZERO 0x0
1501#define VF_ONE  0x30
1502#define VF_NEG  (1<<7)
1503
1504static inline struct brw_reg brw_imm_vf4(unsigned v0,
1505					 unsigned v1,
1506					 unsigned v2,
1507					 unsigned v3)
1508{
1509	struct brw_reg imm = brw_imm_reg(BRW_REGISTER_TYPE_VF);
1510	imm.vstride = BRW_VERTICAL_STRIDE_0;
1511	imm.width = BRW_WIDTH_4;
1512	imm.hstride = BRW_HORIZONTAL_STRIDE_1;
1513	imm.dw1.ud = ((v0 << 0) |
1514		      (v1 << 8) |
1515		      (v2 << 16) |
1516		      (v3 << 24));
1517	return imm;
1518}
1519
1520static inline struct brw_reg brw_address(struct brw_reg reg)
1521{
1522	return brw_imm_uw(reg.nr * REG_SIZE + reg.subnr);
1523}
1524
1525/** Construct float[1] general-purpose register */
1526static inline struct brw_reg brw_vec1_grf(unsigned nr, unsigned subnr)
1527{
1528	return brw_vec1_reg(BRW_GENERAL_REGISTER_FILE, nr, subnr);
1529}
1530
1531/** Construct float[2] general-purpose register */
1532static inline struct brw_reg brw_vec2_grf(unsigned nr, unsigned subnr)
1533{
1534	return brw_vec2_reg(BRW_GENERAL_REGISTER_FILE, nr, subnr);
1535}
1536
1537/** Construct float[4] general-purpose register */
1538static inline struct brw_reg brw_vec4_grf(unsigned nr, unsigned subnr)
1539{
1540	return brw_vec4_reg(BRW_GENERAL_REGISTER_FILE, nr, subnr);
1541}
1542
1543/** Construct float[8] general-purpose register */
1544static inline struct brw_reg brw_vec8_grf(unsigned nr, unsigned subnr)
1545{
1546	return brw_vec8_reg(BRW_GENERAL_REGISTER_FILE, nr, subnr);
1547}
1548
1549static inline struct brw_reg brw_uw8_grf(unsigned nr, unsigned subnr)
1550{
1551	return brw_uw8_reg(BRW_GENERAL_REGISTER_FILE, nr, subnr);
1552}
1553
1554static inline struct brw_reg brw_uw16_grf(unsigned nr, unsigned subnr)
1555{
1556	return brw_uw16_reg(BRW_GENERAL_REGISTER_FILE, nr, subnr);
1557}
1558
1559/** Construct null register (usually used for setting condition codes) */
1560static inline struct brw_reg brw_null_reg(void)
1561{
1562	return brw_vec8_reg(BRW_ARCHITECTURE_REGISTER_FILE,
1563			    BRW_ARF_NULL,
1564			    0);
1565}
1566
1567static inline struct brw_reg brw_address_reg(unsigned subnr)
1568{
1569	return brw_uw1_reg(BRW_ARCHITECTURE_REGISTER_FILE,
1570			   BRW_ARF_ADDRESS,
1571			   subnr);
1572}
1573
1574/* If/else instructions break in align16 mode if writemask & swizzle
1575 * aren't xyzw.  This goes against the convention for other scalar
1576 * regs:
1577 */
1578static inline struct brw_reg brw_ip_reg(void)
1579{
1580	return brw_reg(BRW_ARCHITECTURE_REGISTER_FILE,
1581		       BRW_ARF_IP,
1582		       0,
1583		       BRW_REGISTER_TYPE_UD,
1584		       BRW_VERTICAL_STRIDE_4, /* ? */
1585		       BRW_WIDTH_1,
1586		       BRW_HORIZONTAL_STRIDE_0,
1587		       BRW_SWIZZLE_XYZW, /* NOTE! */
1588		       WRITEMASK_XYZW); /* NOTE! */
1589}
1590
1591static inline struct brw_reg brw_acc_reg(void)
1592{
1593	return brw_vec8_reg(BRW_ARCHITECTURE_REGISTER_FILE,
1594			    BRW_ARF_ACCUMULATOR,
1595			    0);
1596}
1597
1598static inline struct brw_reg brw_notification_1_reg(void)
1599{
1600	return brw_reg(BRW_ARCHITECTURE_REGISTER_FILE,
1601		       BRW_ARF_NOTIFICATION_COUNT,
1602		       1,
1603		       BRW_REGISTER_TYPE_UD,
1604		       BRW_VERTICAL_STRIDE_0,
1605		       BRW_WIDTH_1,
1606		       BRW_HORIZONTAL_STRIDE_0,
1607		       BRW_SWIZZLE_XXXX,
1608		       WRITEMASK_X);
1609}
1610
1611static inline struct brw_reg brw_flag_reg(void)
1612{
1613	return brw_uw1_reg(BRW_ARCHITECTURE_REGISTER_FILE,
1614			   BRW_ARF_FLAG,
1615			   0);
1616}
1617
1618static inline struct brw_reg brw_mask_reg(unsigned subnr)
1619{
1620	return brw_uw1_reg(BRW_ARCHITECTURE_REGISTER_FILE,
1621			   BRW_ARF_MASK,
1622			   subnr);
1623}
1624
1625static inline struct brw_reg brw_message_reg(unsigned nr)
1626{
1627	assert((nr & ~(1 << 7)) < BRW_MAX_MRF);
1628	return brw_vec8_reg(BRW_MESSAGE_REGISTER_FILE, nr, 0);
1629}
1630
1631static inline struct brw_reg brw_message4_reg(unsigned nr,
1632					      int subnr)
1633{
1634	assert((nr & ~(1 << 7)) < BRW_MAX_MRF);
1635	return brw_vec4_reg(BRW_MESSAGE_REGISTER_FILE, nr, subnr);
1636}
1637
1638/* This is almost always called with a numeric constant argument, so
1639 * make things easy to evaluate at compile time:
1640 */
1641static inline unsigned cvt(unsigned val)
1642{
1643	switch (val) {
1644	case 0: return 0;
1645	case 1: return 1;
1646	case 2: return 2;
1647	case 4: return 3;
1648	case 8: return 4;
1649	case 16: return 5;
1650	case 32: return 6;
1651	}
1652	return 0;
1653}
1654
1655static inline struct brw_reg __stride(struct brw_reg reg,
1656				    unsigned vstride,
1657				    unsigned width,
1658				    unsigned hstride)
1659{
1660	reg.vstride = cvt(vstride);
1661	reg.width = cvt(width) - 1;
1662	reg.hstride = cvt(hstride);
1663	return reg;
1664}
1665
1666static inline struct brw_reg vec16(struct brw_reg reg)
1667{
1668	return __stride(reg, 16,16,1);
1669}
1670
1671static inline struct brw_reg vec8(struct brw_reg reg)
1672{
1673	return __stride(reg, 8,8,1);
1674}
1675
1676static inline struct brw_reg vec4(struct brw_reg reg)
1677{
1678	return __stride(reg, 4,4,1);
1679}
1680
1681static inline struct brw_reg vec2(struct brw_reg reg)
1682{
1683	return __stride(reg, 2,2,1);
1684}
1685
1686static inline struct brw_reg vec1(struct brw_reg reg)
1687{
1688	return __stride(reg, 0,1,0);
1689}
1690
1691static inline struct brw_reg get_element(struct brw_reg reg, unsigned elt)
1692{
1693	return vec1(__suboffset(reg, elt));
1694}
1695
1696static inline struct brw_reg get_element_ud(struct brw_reg reg, unsigned elt)
1697{
1698	return vec1(__suboffset(__retype(reg, BRW_REGISTER_TYPE_UD), elt));
1699}
1700
1701static inline struct brw_reg brw_swizzle(struct brw_reg reg,
1702					 unsigned x,
1703					 unsigned y,
1704					 unsigned z,
1705					 unsigned w)
1706{
1707	assert(reg.file != BRW_IMMEDIATE_VALUE);
1708
1709	reg.dw1.bits.swizzle = BRW_SWIZZLE4(BRW_GET_SWZ(reg.dw1.bits.swizzle, x),
1710					    BRW_GET_SWZ(reg.dw1.bits.swizzle, y),
1711					    BRW_GET_SWZ(reg.dw1.bits.swizzle, z),
1712					    BRW_GET_SWZ(reg.dw1.bits.swizzle, w));
1713	return reg;
1714}
1715
1716static inline struct brw_reg brw_swizzle1(struct brw_reg reg,
1717					  unsigned x)
1718{
1719	return brw_swizzle(reg, x, x, x, x);
1720}
1721
1722static inline struct brw_reg brw_writemask(struct brw_reg reg,
1723					   unsigned mask)
1724{
1725	assert(reg.file != BRW_IMMEDIATE_VALUE);
1726	reg.dw1.bits.writemask &= mask;
1727	return reg;
1728}
1729
1730static inline struct brw_reg brw_set_writemask(struct brw_reg reg,
1731					       unsigned mask)
1732{
1733	assert(reg.file != BRW_IMMEDIATE_VALUE);
1734	reg.dw1.bits.writemask = mask;
1735	return reg;
1736}
1737
1738static inline struct brw_reg brw_negate(struct brw_reg reg)
1739{
1740	reg.negate ^= 1;
1741	return reg;
1742}
1743
1744static inline struct brw_reg brw_abs(struct brw_reg reg)
1745{
1746	reg.abs = 1;
1747	return reg;
1748}
1749
1750/***********************************************************************
1751*/
1752static inline struct brw_reg brw_vec4_indirect(unsigned subnr,
1753					       int offset)
1754{
1755	struct brw_reg reg =  brw_vec4_grf(0, 0);
1756	reg.subnr = subnr;
1757	reg.address_mode = BRW_ADDRESS_REGISTER_INDIRECT_REGISTER;
1758	reg.dw1.bits.indirect_offset = offset;
1759	return reg;
1760}
1761
1762static inline struct brw_reg brw_vec1_indirect(unsigned subnr,
1763					       int offset)
1764{
1765	struct brw_reg reg =  brw_vec1_grf(0, 0);
1766	reg.subnr = subnr;
1767	reg.address_mode = BRW_ADDRESS_REGISTER_INDIRECT_REGISTER;
1768	reg.dw1.bits.indirect_offset = offset;
1769	return reg;
1770}
1771
1772static inline struct brw_reg deref_4f(struct brw_indirect ptr, int offset)
1773{
1774	return brw_vec4_indirect(ptr.addr_subnr, ptr.addr_offset + offset);
1775}
1776
1777static inline struct brw_reg deref_1f(struct brw_indirect ptr, int offset)
1778{
1779	return brw_vec1_indirect(ptr.addr_subnr, ptr.addr_offset + offset);
1780}
1781
1782static inline struct brw_reg deref_4b(struct brw_indirect ptr, int offset)
1783{
1784	return __retype(deref_4f(ptr, offset), BRW_REGISTER_TYPE_B);
1785}
1786
1787static inline struct brw_reg deref_1uw(struct brw_indirect ptr, int offset)
1788{
1789	return __retype(deref_1f(ptr, offset), BRW_REGISTER_TYPE_UW);
1790}
1791
1792static inline struct brw_reg deref_1d(struct brw_indirect ptr, int offset)
1793{
1794	return __retype(deref_1f(ptr, offset), BRW_REGISTER_TYPE_D);
1795}
1796
1797static inline struct brw_reg deref_1ud(struct brw_indirect ptr, int offset)
1798{
1799	return __retype(deref_1f(ptr, offset), BRW_REGISTER_TYPE_UD);
1800}
1801
1802static inline struct brw_reg get_addr_reg(struct brw_indirect ptr)
1803{
1804	return brw_address_reg(ptr.addr_subnr);
1805}
1806
1807static inline struct brw_indirect brw_indirect_offset(struct brw_indirect ptr, int offset)
1808{
1809	ptr.addr_offset += offset;
1810	return ptr;
1811}
1812
1813static inline struct brw_indirect brw_indirect(unsigned addr_subnr, int offset)
1814{
1815	struct brw_indirect ptr;
1816	ptr.addr_subnr = addr_subnr;
1817	ptr.addr_offset = offset;
1818	ptr.pad = 0;
1819	return ptr;
1820}
1821
1822/** Do two brw_regs refer to the same register? */
1823static inline bool brw_same_reg(struct brw_reg r1, struct brw_reg r2)
1824{
1825	return r1.file == r2.file && r1.nr == r2.nr;
1826}
1827
1828static inline struct brw_instruction *current_insn( struct brw_compile *p)
1829{
1830	return &p->store[p->nr_insn];
1831}
1832
1833static inline void brw_set_predicate_control( struct brw_compile *p, unsigned pc )
1834{
1835	p->current->header.predicate_control = pc;
1836}
1837
1838static inline void brw_set_predicate_inverse(struct brw_compile *p, bool predicate_inverse)
1839{
1840	p->current->header.predicate_inverse = predicate_inverse;
1841}
1842
1843static inline void brw_set_conditionalmod( struct brw_compile *p, unsigned conditional )
1844{
1845	p->current->header.destreg__conditionalmod = conditional;
1846}
1847
1848static inline void brw_set_access_mode(struct brw_compile *p, unsigned access_mode)
1849{
1850	p->current->header.access_mode = access_mode;
1851}
1852
1853static inline void brw_set_mask_control(struct brw_compile *p, unsigned value)
1854{
1855	p->current->header.mask_control = value;
1856}
1857
1858static inline void brw_set_saturate(struct brw_compile *p, unsigned value)
1859{
1860	p->current->header.saturate = value;
1861}
1862
1863static inline void brw_set_acc_write_control(struct brw_compile *p, unsigned value)
1864{
1865	if (p->gen >= 060)
1866		p->current->header.acc_wr_control = value;
1867}
1868
1869void brw_pop_insn_state(struct brw_compile *p);
1870void brw_push_insn_state(struct brw_compile *p);
1871void brw_set_compression_control(struct brw_compile *p, enum brw_compression control);
1872void brw_set_predicate_control_flag_value( struct brw_compile *p, unsigned value );
1873
1874void brw_compile_init(struct brw_compile *p, int gen, void *store);
1875
1876void brw_set_dest(struct brw_compile *p, struct brw_instruction *insn,
1877		  struct brw_reg dest);
1878void brw_set_src0(struct brw_compile *p, struct brw_instruction *insn,
1879		  struct brw_reg reg);
1880void brw_set_src1(struct brw_compile *p,
1881		  struct brw_instruction *insn,
1882		  struct brw_reg reg);
1883
1884void gen6_resolve_implied_move(struct brw_compile *p,
1885			       struct brw_reg *src,
1886			       unsigned msg_reg_nr);
1887
1888static inline struct brw_instruction *
1889brw_next_insn(struct brw_compile *p, unsigned opcode)
1890{
1891	struct brw_instruction *insn;
1892
1893	assert(p->nr_insn + 1 < BRW_EU_MAX_INSN);
1894
1895	insn = &p->store[p->nr_insn++];
1896	*insn = *p->current;
1897
1898	if (p->current->header.destreg__conditionalmod) {
1899		p->current->header.destreg__conditionalmod = 0;
1900		p->current->header.predicate_control = BRW_PREDICATE_NORMAL;
1901	}
1902
1903	insn->header.opcode = opcode;
1904	return insn;
1905}
1906
1907/* Helpers for regular instructions: */
1908#define ALU1(OP)							\
1909static inline struct brw_instruction *brw_##OP(struct brw_compile *p,	\
1910					       struct brw_reg dest,	\
1911					       struct brw_reg src0)	\
1912{									\
1913   return brw_alu1(p, BRW_OPCODE_##OP, dest, src0);			\
1914}
1915
1916#define ALU2(OP)							\
1917static inline struct brw_instruction *brw_##OP(struct brw_compile *p,	\
1918					       struct brw_reg dest,	\
1919					       struct brw_reg src0,	\
1920						struct brw_reg src1)	\
1921{									\
1922   return brw_alu2(p, BRW_OPCODE_##OP, dest, src0, src1);		\
1923}
1924
1925/* Rounding operations (other than RNDD) require two instructions - the first
1926 * stores a rounded value (possibly the wrong way) in the dest register, but
1927 * also sets a per-channel "increment bit" in the flag register.  A predicated
1928 * add of 1.0 fixes dest to contain the desired result.
1929 *
1930 * Sandybridge and later appear to round correctly without an ADD.
1931 */
1932#define ROUND(OP)							\
1933static inline void brw_##OP(struct brw_compile *p,			\
1934			    struct brw_reg dest,			\
1935			    struct brw_reg src)				\
1936{									\
1937	struct brw_instruction *rnd, *add;				\
1938	rnd = brw_next_insn(p, BRW_OPCODE_##OP);			\
1939	brw_set_dest(p, rnd, dest);					\
1940	brw_set_src0(p, rnd, src);					\
1941	if (p->gen < 060) {						\
1942		/* turn on round-increments */				\
1943		rnd->header.destreg__conditionalmod = BRW_CONDITIONAL_R; \
1944		add = brw_ADD(p, dest, dest, brw_imm_f(1.0f));		\
1945		add->header.predicate_control = BRW_PREDICATE_NORMAL;	\
1946	}								\
1947}
1948
1949static inline struct brw_instruction *brw_alu1(struct brw_compile *p,
1950					       unsigned opcode,
1951					       struct brw_reg dest,
1952					       struct brw_reg src)
1953{
1954	struct brw_instruction *insn = brw_next_insn(p, opcode);
1955	brw_set_dest(p, insn, dest);
1956	brw_set_src0(p, insn, src);
1957	return insn;
1958}
1959
1960static inline struct brw_instruction *brw_alu2(struct brw_compile *p,
1961					       unsigned opcode,
1962					       struct brw_reg dest,
1963					       struct brw_reg src0,
1964					       struct brw_reg src1 )
1965{
1966	struct brw_instruction *insn = brw_next_insn(p, opcode);
1967	brw_set_dest(p, insn, dest);
1968	brw_set_src0(p, insn, src0);
1969	brw_set_src1(p, insn, src1);
1970	return insn;
1971}
1972
1973static inline struct brw_instruction *brw_ADD(struct brw_compile *p,
1974					      struct brw_reg dest,
1975					      struct brw_reg src0,
1976					      struct brw_reg src1)
1977{
1978	/* 6.2.2: add */
1979	if (src0.type == BRW_REGISTER_TYPE_F ||
1980	    (src0.file == BRW_IMMEDIATE_VALUE &&
1981	     src0.type == BRW_REGISTER_TYPE_VF)) {
1982		assert(src1.type != BRW_REGISTER_TYPE_UD);
1983		assert(src1.type != BRW_REGISTER_TYPE_D);
1984	}
1985
1986	if (src1.type == BRW_REGISTER_TYPE_F ||
1987	    (src1.file == BRW_IMMEDIATE_VALUE &&
1988	     src1.type == BRW_REGISTER_TYPE_VF)) {
1989		assert(src0.type != BRW_REGISTER_TYPE_UD);
1990		assert(src0.type != BRW_REGISTER_TYPE_D);
1991	}
1992
1993	return brw_alu2(p, BRW_OPCODE_ADD, dest, src0, src1);
1994}
1995
1996static inline struct brw_instruction *brw_MUL(struct brw_compile *p,
1997					      struct brw_reg dest,
1998					      struct brw_reg src0,
1999					      struct brw_reg src1)
2000{
2001	/* 6.32.38: mul */
2002	if (src0.type == BRW_REGISTER_TYPE_D ||
2003	    src0.type == BRW_REGISTER_TYPE_UD ||
2004	    src1.type == BRW_REGISTER_TYPE_D ||
2005	    src1.type == BRW_REGISTER_TYPE_UD) {
2006		assert(dest.type != BRW_REGISTER_TYPE_F);
2007	}
2008
2009	if (src0.type == BRW_REGISTER_TYPE_F ||
2010	    (src0.file == BRW_IMMEDIATE_VALUE &&
2011	     src0.type == BRW_REGISTER_TYPE_VF)) {
2012		assert(src1.type != BRW_REGISTER_TYPE_UD);
2013		assert(src1.type != BRW_REGISTER_TYPE_D);
2014	}
2015
2016	if (src1.type == BRW_REGISTER_TYPE_F ||
2017	    (src1.file == BRW_IMMEDIATE_VALUE &&
2018	     src1.type == BRW_REGISTER_TYPE_VF)) {
2019		assert(src0.type != BRW_REGISTER_TYPE_UD);
2020		assert(src0.type != BRW_REGISTER_TYPE_D);
2021	}
2022
2023	assert(src0.file != BRW_ARCHITECTURE_REGISTER_FILE ||
2024	       src0.nr != BRW_ARF_ACCUMULATOR);
2025	assert(src1.file != BRW_ARCHITECTURE_REGISTER_FILE ||
2026	       src1.nr != BRW_ARF_ACCUMULATOR);
2027
2028	return brw_alu2(p, BRW_OPCODE_MUL, dest, src0, src1);
2029}
2030
2031static inline struct brw_instruction *brw_JMPI(struct brw_compile *p,
2032					       struct brw_reg dest,
2033					       struct brw_reg src0,
2034					       struct brw_reg src1)
2035{
2036	struct brw_instruction *insn = brw_alu2(p, BRW_OPCODE_JMPI, dest, src0, src1);
2037
2038	insn->header.execution_size = 1;
2039	insn->header.compression_control = BRW_COMPRESSION_NONE;
2040	insn->header.mask_control = BRW_MASK_DISABLE;
2041
2042	p->current->header.predicate_control = BRW_PREDICATE_NONE;
2043
2044	return insn;
2045}
2046
2047
2048ALU1(MOV);
2049ALU2(SEL);
2050ALU1(NOT);
2051ALU2(AND);
2052ALU2(OR);
2053ALU2(XOR);
2054ALU2(SHR);
2055ALU2(SHL);
2056ALU2(RSR);
2057ALU2(RSL);
2058ALU2(ASR);
2059ALU1(FRC);
2060ALU1(RNDD);
2061ALU2(MAC);
2062ALU2(MACH);
2063ALU1(LZD);
2064ALU2(DP4);
2065ALU2(DPH);
2066ALU2(DP3);
2067ALU2(DP2);
2068ALU2(LINE);
2069ALU2(PLN);
2070
2071ROUND(RNDZ);
2072ROUND(RNDE);
2073
2074#undef ALU1
2075#undef ALU2
2076#undef ROUND
2077
2078/* Helpers for SEND instruction */
2079void brw_set_dp_read_message(struct brw_compile *p,
2080			     struct brw_instruction *insn,
2081			     unsigned binding_table_index,
2082			     unsigned msg_control,
2083			     unsigned msg_type,
2084			     unsigned target_cache,
2085			     unsigned msg_length,
2086			     unsigned response_length);
2087
2088void brw_set_dp_write_message(struct brw_compile *p,
2089			      struct brw_instruction *insn,
2090			      unsigned binding_table_index,
2091			      unsigned msg_control,
2092			      unsigned msg_type,
2093			      unsigned msg_length,
2094			      bool header_present,
2095			      bool last_render_target,
2096			      unsigned response_length,
2097			      bool end_of_thread,
2098			      bool send_commit_msg);
2099
2100void brw_urb_WRITE(struct brw_compile *p,
2101		   struct brw_reg dest,
2102		   unsigned msg_reg_nr,
2103		   struct brw_reg src0,
2104		   bool allocate,
2105		   bool used,
2106		   unsigned msg_length,
2107		   unsigned response_length,
2108		   bool eot,
2109		   bool writes_complete,
2110		   unsigned offset,
2111		   unsigned swizzle);
2112
2113void brw_ff_sync(struct brw_compile *p,
2114		 struct brw_reg dest,
2115		 unsigned msg_reg_nr,
2116		 struct brw_reg src0,
2117		 bool allocate,
2118		 unsigned response_length,
2119		 bool eot);
2120
2121void brw_fb_WRITE(struct brw_compile *p,
2122		  int dispatch_width,
2123                  unsigned msg_reg_nr,
2124                  struct brw_reg src0,
2125                  unsigned msg_control,
2126                  unsigned binding_table_index,
2127                  unsigned msg_length,
2128                  unsigned response_length,
2129                  bool eot,
2130                  bool header_present);
2131
2132void brw_SAMPLE(struct brw_compile *p,
2133		struct brw_reg dest,
2134		unsigned msg_reg_nr,
2135		struct brw_reg src0,
2136		unsigned binding_table_index,
2137		unsigned sampler,
2138		unsigned writemask,
2139		unsigned msg_type,
2140		unsigned response_length,
2141		unsigned msg_length,
2142		bool header_present,
2143		unsigned simd_mode);
2144
2145void brw_math_16(struct brw_compile *p,
2146		 struct brw_reg dest,
2147		 unsigned function,
2148		 unsigned saturate,
2149		 unsigned msg_reg_nr,
2150		 struct brw_reg src,
2151		 unsigned precision);
2152
2153void brw_math(struct brw_compile *p,
2154	      struct brw_reg dest,
2155	      unsigned function,
2156	      unsigned saturate,
2157	      unsigned msg_reg_nr,
2158	      struct brw_reg src,
2159	      unsigned data_type,
2160	      unsigned precision);
2161
2162void brw_math2(struct brw_compile *p,
2163	       struct brw_reg dest,
2164	       unsigned function,
2165	       struct brw_reg src0,
2166	       struct brw_reg src1);
2167
2168void brw_oword_block_read(struct brw_compile *p,
2169			  struct brw_reg dest,
2170			  struct brw_reg mrf,
2171			  uint32_t offset,
2172			  uint32_t bind_table_index);
2173
2174void brw_oword_block_read_scratch(struct brw_compile *p,
2175				  struct brw_reg dest,
2176				  struct brw_reg mrf,
2177				  int num_regs,
2178				  unsigned offset);
2179
2180void brw_oword_block_write_scratch(struct brw_compile *p,
2181				   struct brw_reg mrf,
2182				   int num_regs,
2183				   unsigned offset);
2184
2185void brw_dword_scattered_read(struct brw_compile *p,
2186			      struct brw_reg dest,
2187			      struct brw_reg mrf,
2188			      uint32_t bind_table_index);
2189
2190void brw_dp_READ_4_vs(struct brw_compile *p,
2191		      struct brw_reg dest,
2192		      unsigned location,
2193		      unsigned bind_table_index);
2194
2195void brw_dp_READ_4_vs_relative(struct brw_compile *p,
2196			       struct brw_reg dest,
2197			       struct brw_reg addrReg,
2198			       unsigned offset,
2199			       unsigned bind_table_index);
2200
2201/* If/else/endif.  Works by manipulating the execution flags on each
2202 * channel.
2203 */
2204struct brw_instruction *brw_IF(struct brw_compile *p,
2205			       unsigned execute_size);
2206struct brw_instruction *gen6_IF(struct brw_compile *p, uint32_t conditional,
2207				struct brw_reg src0, struct brw_reg src1);
2208
2209void brw_ELSE(struct brw_compile *p);
2210void brw_ENDIF(struct brw_compile *p);
2211
2212/* DO/WHILE loops:
2213*/
2214struct brw_instruction *brw_DO(struct brw_compile *p,
2215			       unsigned execute_size);
2216
2217struct brw_instruction *brw_WHILE(struct brw_compile *p,
2218				  struct brw_instruction *patch_insn);
2219
2220struct brw_instruction *brw_BREAK(struct brw_compile *p, int pop_count);
2221struct brw_instruction *brw_CONT(struct brw_compile *p, int pop_count);
2222struct brw_instruction *gen6_CONT(struct brw_compile *p,
2223				  struct brw_instruction *do_insn);
2224/* Forward jumps:
2225*/
2226void brw_land_fwd_jump(struct brw_compile *p,
2227		       struct brw_instruction *jmp_insn);
2228
2229void brw_NOP(struct brw_compile *p);
2230
2231void brw_WAIT(struct brw_compile *p);
2232
2233/* Special case: there is never a destination, execution size will be
2234 * taken from src0:
2235 */
2236void brw_CMP(struct brw_compile *p,
2237	     struct brw_reg dest,
2238	     unsigned conditional,
2239	     struct brw_reg src0,
2240	     struct brw_reg src1);
2241
2242static inline void brw_math_invert(struct brw_compile *p,
2243				   struct brw_reg dst,
2244				   struct brw_reg src)
2245{
2246	brw_math(p,
2247		 dst,
2248		 BRW_MATH_FUNCTION_INV,
2249		 BRW_MATH_SATURATE_NONE,
2250		 0,
2251		 src,
2252		 BRW_MATH_PRECISION_FULL,
2253		 BRW_MATH_DATA_VECTOR);
2254}
2255
2256void brw_set_uip_jip(struct brw_compile *p);
2257
2258uint32_t brw_swap_cmod(uint32_t cmod);
2259
2260void brw_disasm(FILE *file,
2261		const struct brw_instruction *inst,
2262		int gen);
2263
2264#endif
2265