1/*
2 * Southern Islands Register documentation
3 *
4 * Copyright (C) 2011  Advanced Micro Devices, Inc.
5 *
6 * Permission is hereby granted, free of charge, to any person obtaining a
7 * copy of this software and associated documentation files (the "Software"),
8 * to deal in the Software without restriction, including without limitation
9 * the rights to use, copy, modify, merge, publish, distribute, sublicense,
10 * and/or sell copies of the Software, and to permit persons to whom the
11 * Software is furnished to do so, subject to the following conditions:
12 *
13 * The above copyright notice and this permission notice shall be included
14 * in all copies or substantial portions of the Software.
15 *
16 * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS
17 * OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
18 * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT.  IN NO EVENT SHALL
19 * THE COPYRIGHT HOLDER(S) BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN
20 * AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN
21 * CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE.
22 */
23
24#ifndef SID_H
25#define SID_H
26
27#include "amdgfxregs.h"
28
29/* si values */
30#define SI_CONFIG_REG_OFFSET       0x00008000
31#define SI_CONFIG_REG_END          0x0000B000
32#define SI_SH_REG_OFFSET           0x0000B000
33#define SI_SH_REG_END              0x0000C000
34#define SI_CONTEXT_REG_OFFSET      0x00028000
35#define SI_CONTEXT_REG_END         0x00030000
36#define CIK_UCONFIG_REG_OFFSET     0x00030000
37#define CIK_UCONFIG_REG_END        0x00040000
38#define SI_UCONFIG_PERF_REG_OFFSET 0x00034000
39#define SI_UCONFIG_PERF_REG_END    0x00038000
40
41/* For register shadowing: */
42#define SI_SH_REG_SPACE_SIZE           (SI_SH_REG_END - SI_SH_REG_OFFSET)
43#define SI_CONTEXT_REG_SPACE_SIZE      (SI_CONTEXT_REG_END - SI_CONTEXT_REG_OFFSET)
44#define SI_UCONFIG_REG_SPACE_SIZE      (CIK_UCONFIG_REG_END - CIK_UCONFIG_REG_OFFSET)
45#define SI_UCONFIG_PERF_REG_SPACE_SIZE (SI_UCONFIG_PERF_REG_END - SI_UCONFIG_PERF_REG_OFFSET)
46
47#define SI_SHADOWED_SH_REG_OFFSET      0
48#define SI_SHADOWED_CONTEXT_REG_OFFSET SI_SH_REG_SPACE_SIZE
49#define SI_SHADOWED_UCONFIG_REG_OFFSET (SI_SH_REG_SPACE_SIZE + SI_CONTEXT_REG_SPACE_SIZE)
50#define SI_SHADOWED_REG_BUFFER_SIZE                                                                \
51   (SI_SH_REG_SPACE_SIZE + SI_CONTEXT_REG_SPACE_SIZE + SI_UCONFIG_REG_SPACE_SIZE)
52
53#define EVENT_TYPE_CACHE_FLUSH                  0x6
54#define EVENT_TYPE_PS_PARTIAL_FLUSH             0x10
55#define EVENT_TYPE_CACHE_FLUSH_AND_INV_TS_EVENT 0x14
56#define EVENT_TYPE_ZPASS_DONE                   0x15
57#define EVENT_TYPE_CACHE_FLUSH_AND_INV_EVENT    0x16
58#define EVENT_TYPE_SO_VGTSTREAMOUT_FLUSH        0x1f
59#define EVENT_TYPE_SAMPLE_STREAMOUTSTATS        0x20
60#define EVENT_TYPE(x)                           ((x) << 0)
61#define EVENT_INDEX(x)                          ((x) << 8)
62/* 0 - any non-TS event
63 * 1 - ZPASS_DONE
64 * 2 - SAMPLE_PIPELINESTAT
65 * 3 - SAMPLE_STREAMOUTSTAT*
66 * 4 - *S_PARTIAL_FLUSH
67 * 5 - TS events
68 */
69
70/* EVENT_WRITE_EOP (SI-VI) & RELEASE_MEM (GFX9) */
71#define EVENT_TCL1_VOL_ACTION_ENA (1 << 12)
72#define EVENT_TC_VOL_ACTION_ENA   (1 << 13)
73#define EVENT_TC_WB_ACTION_ENA    (1 << 15)
74#define EVENT_TCL1_ACTION_ENA     (1 << 16)
75#define EVENT_TC_ACTION_ENA       (1 << 17)
76#define EVENT_TC_NC_ACTION_ENA    (1 << 19) /* GFX9+ */
77#define EVENT_TC_WC_ACTION_ENA    (1 << 20) /* GFX9+ */
78#define EVENT_TC_MD_ACTION_ENA    (1 << 21) /* GFX9+ */
79
80#define PREDICATION_OP_CLEAR     0x0
81#define PREDICATION_OP_ZPASS     0x1
82#define PREDICATION_OP_PRIMCOUNT 0x2
83#define PREDICATION_OP_BOOL64    0x3
84#define PREDICATION_OP_BOOL32    0x4
85
86#define PRED_OP(x) ((x) << 16)
87
88#define PREDICATION_CONTINUE (1 << 31)
89
90#define PREDICATION_HINT_WAIT        (0 << 12)
91#define PREDICATION_HINT_NOWAIT_DRAW (1 << 12)
92
93#define PREDICATION_DRAW_NOT_VISIBLE (0 << 8)
94#define PREDICATION_DRAW_VISIBLE     (1 << 8)
95
96#define R600_TEXEL_PITCH_ALIGNMENT_MASK 0x7
97
98/* All registers defined in this packet section don't exist and the only
99 * purpose of these definitions is to define packet encoding that
100 * the IB parser understands, and also to have an accurate documentation.
101 */
102#define PKT3_NOP                            0x10
103#define PKT3_SET_BASE                       0x11
104#define PKT3_CLEAR_STATE                    0x12
105#define PKT3_INDEX_BUFFER_SIZE              0x13
106#define PKT3_DISPATCH_DIRECT                0x15
107#define PKT3_DISPATCH_INDIRECT              0x16
108#define PKT3_OCCLUSION_QUERY                0x1F /* new for CIK */
109#define PKT3_SET_PREDICATION                0x20
110#define PKT3_COND_EXEC                      0x22
111#define PKT3_PRED_EXEC                      0x23
112#define PKT3_DRAW_INDIRECT                  0x24
113#define PKT3_DRAW_INDEX_INDIRECT            0x25
114#define PKT3_INDEX_BASE                     0x26
115#define PKT3_DRAW_INDEX_2                   0x27
116#define PKT3_CONTEXT_CONTROL                0x28
117#define CC0_LOAD_GLOBAL_CONFIG(x)           (((unsigned)(x)&0x1) << 0)
118#define CC0_LOAD_PER_CONTEXT_STATE(x)       (((unsigned)(x)&0x1) << 1)
119#define CC0_LOAD_GLOBAL_UCONFIG(x)          (((unsigned)(x)&0x1) << 15)
120#define CC0_LOAD_GFX_SH_REGS(x)             (((unsigned)(x)&0x1) << 16)
121#define CC0_LOAD_CS_SH_REGS(x)              (((unsigned)(x)&0x1) << 24)
122#define CC0_LOAD_CE_RAM(x)                  (((unsigned)(x)&0x1) << 28)
123#define CC0_UPDATE_LOAD_ENABLES(x)          (((unsigned)(x)&0x1) << 31)
124#define CC1_SHADOW_GLOBAL_CONFIG(x)         (((unsigned)(x)&0x1) << 0)
125#define CC1_SHADOW_PER_CONTEXT_STATE(x)     (((unsigned)(x)&0x1) << 1)
126#define CC1_SHADOW_GLOBAL_UCONFIG(x)        (((unsigned)(x)&0x1) << 15)
127#define CC1_SHADOW_GFX_SH_REGS(x)           (((unsigned)(x)&0x1) << 16)
128#define CC1_SHADOW_CS_SH_REGS(x)            (((unsigned)(x)&0x1) << 24)
129#define CC1_UPDATE_SHADOW_ENABLES(x)        (((unsigned)(x)&0x1) << 31)
130#define PKT3_INDEX_TYPE                     0x2A /* not on GFX9 */
131#define PKT3_DRAW_INDIRECT_MULTI            0x2C
132#define R_2C3_DRAW_INDEX_LOC                0x2C3
133#define S_2C3_COUNT_INDIRECT_ENABLE(x)      (((unsigned)(x)&0x1) << 30)
134#define S_2C3_DRAW_INDEX_ENABLE(x)          (((unsigned)(x)&0x1) << 31)
135#define PKT3_DRAW_INDEX_AUTO                0x2D
136#define PKT3_DRAW_INDEX_IMMD                0x2E /* not on CIK */
137#define PKT3_NUM_INSTANCES                  0x2F
138#define PKT3_DRAW_INDEX_MULTI_AUTO          0x30
139#define PKT3_INDIRECT_BUFFER_SI             0x32 /* not on CIK */
140#define PKT3_INDIRECT_BUFFER_CONST          0x33
141#define PKT3_STRMOUT_BUFFER_UPDATE          0x34
142#define STRMOUT_STORE_BUFFER_FILLED_SIZE    1
143#define STRMOUT_OFFSET_SOURCE(x)            (((unsigned)(x)&0x3) << 1)
144#define STRMOUT_OFFSET_FROM_PACKET          0
145#define STRMOUT_OFFSET_FROM_VGT_FILLED_SIZE 1
146#define STRMOUT_OFFSET_FROM_MEM             2
147#define STRMOUT_OFFSET_NONE                 3
148#define STRMOUT_DATA_TYPE(x)                (((unsigned)(x)&0x1) << 7)
149#define STRMOUT_SELECT_BUFFER(x)            (((unsigned)(x)&0x3) << 8)
150#define PKT3_DRAW_INDEX_OFFSET_2            0x35
151#define PKT3_WRITE_DATA                     0x37
152#define PKT3_DRAW_INDEX_INDIRECT_MULTI      0x38
153#define PKT3_MEM_SEMAPHORE                  0x39
154#define PKT3_MPEG_INDEX                     0x3A /* not on CIK */
155#define PKT3_WAIT_REG_MEM                   0x3C
156#define WAIT_REG_MEM_EQUAL                  3
157#define WAIT_REG_MEM_NOT_EQUAL              4
158#define WAIT_REG_MEM_GREATER_OR_EQUAL       5
159#define WAIT_REG_MEM_MEM_SPACE(x)           (((unsigned)(x)&0x3) << 4)
160#define WAIT_REG_MEM_PFP                    (1 << 8)
161#define PKT3_MEM_WRITE                      0x3D /* not on CIK */
162#define PKT3_INDIRECT_BUFFER_CIK            0x3F /* new on CIK */
163
164#define PKT3_COPY_DATA                         0x40
165#define COPY_DATA_SRC_SEL(x)                   ((x)&0xf)
166#define COPY_DATA_REG                          0
167#define COPY_DATA_SRC_MEM                      1 /* only valid as source */
168#define COPY_DATA_TC_L2                        2
169#define COPY_DATA_GDS                          3
170#define COPY_DATA_PERF                         4
171#define COPY_DATA_IMM                          5
172#define COPY_DATA_TIMESTAMP                    9
173#define COPY_DATA_DST_SEL(x)                   (((unsigned)(x)&0xf) << 8)
174#define COPY_DATA_DST_MEM_GRBM                 1 /* sync across GRBM, deprecated */
175#define COPY_DATA_TC_L2                        2
176#define COPY_DATA_GDS                          3
177#define COPY_DATA_PERF                         4
178#define COPY_DATA_DST_MEM                      5
179#define COPY_DATA_COUNT_SEL                    (1 << 16)
180#define COPY_DATA_WR_CONFIRM                   (1 << 20)
181#define COPY_DATA_ENGINE_PFP                   (1 << 30)
182#define PKT3_PFP_SYNC_ME                       0x42
183#define PKT3_SURFACE_SYNC                      0x43 /* deprecated on CIK, use ACQUIRE_MEM */
184#define PKT3_ME_INITIALIZE                     0x44 /* not on CIK */
185#define PKT3_COND_WRITE                        0x45
186#define PKT3_EVENT_WRITE                       0x46
187#define PKT3_EVENT_WRITE_EOP                   0x47 /* not on GFX9 */
188#define PKT3_EVENT_WRITE_EOS                   0x48 /* not on GFX9 */
189#define EOP_DST_SEL(x)                         ((x) << 16)
190#define EOP_DST_SEL_MEM                        0
191#define EOP_DST_SEL_TC_L2                      1
192#define EOP_INT_SEL(x)                         ((x) << 24)
193#define EOP_INT_SEL_NONE                       0
194#define EOP_INT_SEL_SEND_DATA_AFTER_WR_CONFIRM 3
195#define EOP_DATA_SEL(x)                        ((x) << 29)
196#define EOP_DATA_SEL_DISCARD                   0
197#define EOP_DATA_SEL_VALUE_32BIT               1
198#define EOP_DATA_SEL_VALUE_64BIT               2
199#define EOP_DATA_SEL_TIMESTAMP                 3
200#define EOP_DATA_SEL_GDS                       5
201#define EOP_DATA_GDS(dw_offset, num_dwords)    ((dw_offset) | ((unsigned)(num_dwords) << 16))
202
203#define EOS_DATA_SEL(x)                        ((x) << 29)
204#define EOS_DATA_SEL_APPEND_COUNT              0
205#define EOS_DATA_SEL_GDS                       1
206#define EOS_DATA_SEL_VALUE_32BIT               2
207
208/* CP DMA bug: Any use of CP_DMA.DST_SEL=TC must be avoided when EOS packets
209 * are used. Use DST_SEL=MC instead. For prefetch, use SRC_SEL=TC and
210 * DST_SEL=MC. Only CIK chips are affected.
211 */
212/* fix CP DMA before uncommenting: */
213/*#define PKT3_EVENT_WRITE_EOS                   0x48*/ /* not on GFX9 */
214#define PKT3_RELEASE_MEM            0x49 /* GFX9+ [any ring] or GFX8 [compute ring only] */
215#define PKT3_CONTEXT_REG_RMW        0x51 /* older firmware versions on older chips don't have this */
216#define PKT3_ONE_REG_WRITE          0x57 /* not on CIK */
217#define PKT3_ACQUIRE_MEM            0x58 /* new for CIK */
218#define PKT3_REWIND                 0x59 /* VI+ [any ring] or CIK [compute ring only] */
219#define PKT3_LOAD_UCONFIG_REG       0x5E /* GFX7+ */
220#define PKT3_LOAD_SH_REG            0x5F
221#define PKT3_LOAD_CONTEXT_REG       0x61
222#define PKT3_SET_CONFIG_REG         0x68
223#define PKT3_SET_CONTEXT_REG        0x69
224#define PKT3_SET_SH_REG             0x76
225#define PKT3_SET_SH_REG_OFFSET      0x77
226#define PKT3_SET_UCONFIG_REG        0x79 /* new for CIK */
227#define PKT3_SET_UCONFIG_REG_INDEX  0x7A /* new for GFX9, CP ucode version >= 26 */
228#define PKT3_LOAD_CONST_RAM         0x80
229#define PKT3_WRITE_CONST_RAM        0x81
230#define PKT3_DUMP_CONST_RAM         0x83
231#define PKT3_INCREMENT_CE_COUNTER   0x84
232#define PKT3_INCREMENT_DE_COUNTER   0x85
233#define PKT3_WAIT_ON_CE_COUNTER     0x86
234#define PKT3_SET_SH_REG_INDEX       0x9B
235#define PKT3_LOAD_CONTEXT_REG_INDEX 0x9F /* new for VI */
236
237#define PKT_TYPE_S(x)         (((unsigned)(x)&0x3) << 30)
238#define PKT_TYPE_G(x)         (((x) >> 30) & 0x3)
239#define PKT_TYPE_C            0x3FFFFFFF
240#define PKT_COUNT_S(x)        (((unsigned)(x)&0x3FFF) << 16)
241#define PKT_COUNT_G(x)        (((x) >> 16) & 0x3FFF)
242#define PKT_COUNT_C           0xC000FFFF
243#define PKT0_BASE_INDEX_S(x)  (((unsigned)(x)&0xFFFF) << 0)
244#define PKT0_BASE_INDEX_G(x)  (((x) >> 0) & 0xFFFF)
245#define PKT0_BASE_INDEX_C     0xFFFF0000
246#define PKT3_IT_OPCODE_S(x)   (((unsigned)(x)&0xFF) << 8)
247#define PKT3_IT_OPCODE_G(x)   (((x) >> 8) & 0xFF)
248#define PKT3_IT_OPCODE_C      0xFFFF00FF
249#define PKT3_PREDICATE(x)     (((x) >> 0) & 0x1)
250#define PKT3_SHADER_TYPE_S(x) (((unsigned)(x)&0x1) << 1)
251#define PKT0(index, count)    (PKT_TYPE_S(0) | PKT0_BASE_INDEX_S(index) | PKT_COUNT_S(count))
252#define PKT3(op, count, predicate)                                                                 \
253   (PKT_TYPE_S(3) | PKT_COUNT_S(count) | PKT3_IT_OPCODE_S(op) | PKT3_PREDICATE(predicate))
254
255#define PKT2_NOP_PAD PKT_TYPE_S(2)
256#define PKT3_NOP_PAD PKT3(PKT3_NOP, 0x3fff, 0) /* header-only version */
257
258#define PKT3_CP_DMA 0x41
259/* 1. header
260 * 2. SRC_ADDR_LO [31:0] or DATA [31:0]
261 * 3. CP_SYNC [31] | SRC_SEL [30:29] | ENGINE [27] | DST_SEL [21:20] | SRC_ADDR_HI [15:0]
262 * 4. DST_ADDR_LO [31:0]
263 * 5. DST_ADDR_HI [15:0]
264 * 6. COMMAND [29:22] | BYTE_COUNT [20:0]
265 */
266
267#define PKT3_DMA_DATA 0x50 /* new for CIK */
268/* 1. header
269 * 2. CP_SYNC [31] | SRC_SEL [30:29] | DST_SEL [21:20] | ENGINE [0]
270 * 2. SRC_ADDR_LO [31:0] or DATA [31:0]
271 * 3. SRC_ADDR_HI [31:0]
272 * 4. DST_ADDR_LO [31:0]
273 * 5. DST_ADDR_HI [31:0]
274 * 6. COMMAND [29:22] | BYTE_COUNT [20:0]
275 */
276
277/* SI async DMA packets */
278#define SI_DMA_PACKET(cmd, sub_cmd, n)                                                             \
279   ((((unsigned)(cmd)&0xF) << 28) | (((unsigned)(sub_cmd)&0xFF) << 20) |                           \
280    (((unsigned)(n)&0xFFFFF) << 0))
281/* SI async DMA Packet types */
282#define SI_DMA_PACKET_WRITE               0x2
283#define SI_DMA_PACKET_COPY                0x3
284#define SI_DMA_COPY_MAX_BYTE_ALIGNED_SIZE 0xfffe0
285/* The documentation says 0xffff8 is the maximum size in dwords, which is
286 * 0x3fffe0 in bytes. */
287#define SI_DMA_COPY_MAX_DWORD_ALIGNED_SIZE 0x3fffe0
288#define SI_DMA_COPY_DWORD_ALIGNED          0x00
289#define SI_DMA_COPY_BYTE_ALIGNED           0x40
290#define SI_DMA_COPY_TILED                  0x8
291#define SI_DMA_PACKET_INDIRECT_BUFFER      0x4
292#define SI_DMA_PACKET_SEMAPHORE            0x5
293#define SI_DMA_PACKET_FENCE                0x6
294#define SI_DMA_PACKET_TRAP                 0x7
295#define SI_DMA_PACKET_SRBM_WRITE           0x9
296#define SI_DMA_PACKET_CONSTANT_FILL        0xd
297#define SI_DMA_PACKET_NOP                  0xf
298
299/* CIK async DMA packets */
300#define CIK_SDMA_PACKET(op, sub_op, n)                                                             \
301   ((((unsigned)(n)&0xFFFF) << 16) | (((unsigned)(sub_op)&0xFF) << 8) |                            \
302    (((unsigned)(op)&0xFF) << 0))
303/* CIK async DMA packet types */
304#define CIK_SDMA_OPCODE_NOP                        0x0
305#define CIK_SDMA_OPCODE_COPY                       0x1
306#define CIK_SDMA_COPY_SUB_OPCODE_LINEAR            0x0
307#define CIK_SDMA_COPY_SUB_OPCODE_TILED             0x1
308#define CIK_SDMA_COPY_SUB_OPCODE_SOA               0x3
309#define CIK_SDMA_COPY_SUB_OPCODE_LINEAR_SUB_WINDOW 0x4
310#define CIK_SDMA_COPY_SUB_OPCODE_TILED_SUB_WINDOW  0x5
311#define CIK_SDMA_COPY_SUB_OPCODE_T2T_SUB_WINDOW    0x6
312#define CIK_SDMA_OPCODE_WRITE                      0x2
313#define SDMA_WRITE_SUB_OPCODE_LINEAR               0x0
314#define SDMA_WRTIE_SUB_OPCODE_TILED                0x1
315#define CIK_SDMA_OPCODE_INDIRECT_BUFFER            0x4
316#define CIK_SDMA_PACKET_FENCE                      0x5
317#define CIK_SDMA_PACKET_TRAP                       0x6
318#define CIK_SDMA_PACKET_SEMAPHORE                  0x7
319#define CIK_SDMA_PACKET_CONSTANT_FILL              0xb
320#define CIK_SDMA_OPCODE_TIMESTAMP                  0xd
321#define SDMA_TS_SUB_OPCODE_SET_LOCAL_TIMESTAMP     0x0
322#define SDMA_TS_SUB_OPCODE_GET_LOCAL_TIMESTAMP     0x1
323#define SDMA_TS_SUB_OPCODE_GET_GLOBAL_TIMESTAMP    0x2
324#define CIK_SDMA_PACKET_SRBM_WRITE                 0xe
325/* There is apparently an undocumented HW limitation that
326   prevents the HW from copying the last 255 bytes of (1 << 22) - 1 */
327#define CIK_SDMA_COPY_MAX_SIZE    0x3fff00   /* almost 4 MB*/
328#define GFX103_SDMA_COPY_MAX_SIZE 0x3fffff00 /* almost 1 GB */
329
330enum amd_cmp_class_flags
331{
332   S_NAN = 1 << 0,       // Signaling NaN
333   Q_NAN = 1 << 1,       // Quiet NaN
334   N_INFINITY = 1 << 2,  // Negative infinity
335   N_NORMAL = 1 << 3,    // Negative normal
336   N_SUBNORMAL = 1 << 4, // Negative subnormal
337   N_ZERO = 1 << 5,      // Negative zero
338   P_ZERO = 1 << 6,      // Positive zero
339   P_SUBNORMAL = 1 << 7, // Positive subnormal
340   P_NORMAL = 1 << 8,    // Positive normal
341   P_INFINITY = 1 << 9   // Positive infinity
342};
343
344#endif /* _SID_H */
345