1/*
2 * Copyright 2020 Advanced Micro Devices, Inc.
3 * Copyright 2020 Valve Corporation
4 * All Rights Reserved.
5 *
6 * Permission is hereby granted, free of charge, to any person obtaining a
7 * copy of this software and associated documentation files (the "Software"),
8 * to deal in the Software without restriction, including without limitation
9 * on the rights to use, copy, modify, merge, publish, distribute, sub
10 * license, and/or sell copies of the Software, and to permit persons to whom
11 * the Software is furnished to do so, subject to the following conditions:
12 *
13 * The above copyright notice and this permission notice (including the next
14 * paragraph) shall be included in all copies or substantial portions of the
15 * Software.
16 *
17 * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
18 * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
19 * FITNESS FOR A PARTICULAR PURPOSE AND NON-INFRINGEMENT. IN NO EVENT SHALL
20 * THE AUTHOR(S) AND/OR THEIR SUPPLIERS BE LIABLE FOR ANY CLAIM,
21 * DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR
22 * OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE
23 * USE OR OTHER DEALINGS IN THE SOFTWARE.
24 */
25
26#ifndef AC_SQTT_H
27#define AC_SQTT_H
28
29#include <stdint.h>
30#include <stdbool.h>
31
32#include <assert.h>
33#include "c11_compat.h"
34#include "ac_rgp.h"
35
36struct radeon_cmdbuf;
37struct radeon_info;
38
39struct ac_thread_trace_data {
40   struct radeon_cmdbuf *start_cs[2];
41   struct radeon_cmdbuf *stop_cs[2];
42   /* struct radeon_winsys_bo or struct pb_buffer */
43   void *bo;
44   void *ptr;
45   uint32_t buffer_size;
46   int start_frame;
47   char *trigger_file;
48
49   struct rgp_code_object rgp_code_object;
50   struct rgp_loader_events rgp_loader_events;
51   struct rgp_pso_correlation rgp_pso_correlation;
52};
53
54#define SQTT_BUFFER_ALIGN_SHIFT 12
55
56struct ac_thread_trace_info {
57   uint32_t cur_offset;
58   uint32_t trace_status;
59   union {
60      uint32_t gfx9_write_counter;
61      uint32_t gfx10_dropped_cntr;
62   };
63};
64
65struct ac_thread_trace_se {
66   struct ac_thread_trace_info info;
67   void *data_ptr;
68   uint32_t shader_engine;
69   uint32_t compute_unit;
70};
71
72struct ac_thread_trace {
73   struct ac_thread_trace_data *data;
74   uint32_t num_traces;
75   struct ac_thread_trace_se traces[4];
76};
77
78uint64_t
79ac_thread_trace_get_info_offset(unsigned se);
80
81uint64_t
82ac_thread_trace_get_data_offset(const struct radeon_info *rad_info,
83                                const struct ac_thread_trace_data *data, unsigned se);
84uint64_t
85ac_thread_trace_get_info_va(uint64_t va, unsigned se);
86
87uint64_t
88ac_thread_trace_get_data_va(const struct radeon_info *rad_info,
89                            const struct ac_thread_trace_data *data, uint64_t va, unsigned se);
90
91bool
92ac_is_thread_trace_complete(struct radeon_info *rad_info,
93                            const struct ac_thread_trace_data *data,
94                            const struct ac_thread_trace_info *info);
95
96uint32_t
97ac_get_expected_buffer_size(struct radeon_info *rad_info,
98                            const struct ac_thread_trace_info *info);
99
100/**
101 * Identifiers for RGP SQ thread-tracing markers (Table 1)
102 */
103enum rgp_sqtt_marker_identifier
104{
105   RGP_SQTT_MARKER_IDENTIFIER_EVENT = 0x0,
106   RGP_SQTT_MARKER_IDENTIFIER_CB_START = 0x1,
107   RGP_SQTT_MARKER_IDENTIFIER_CB_END = 0x2,
108   RGP_SQTT_MARKER_IDENTIFIER_BARRIER_START = 0x3,
109   RGP_SQTT_MARKER_IDENTIFIER_BARRIER_END = 0x4,
110   RGP_SQTT_MARKER_IDENTIFIER_USER_EVENT = 0x5,
111   RGP_SQTT_MARKER_IDENTIFIER_GENERAL_API = 0x6,
112   RGP_SQTT_MARKER_IDENTIFIER_SYNC = 0x7,
113   RGP_SQTT_MARKER_IDENTIFIER_PRESENT = 0x8,
114   RGP_SQTT_MARKER_IDENTIFIER_LAYOUT_TRANSITION = 0x9,
115   RGP_SQTT_MARKER_IDENTIFIER_RENDER_PASS = 0xA,
116   RGP_SQTT_MARKER_IDENTIFIER_RESERVED2 = 0xB,
117   RGP_SQTT_MARKER_IDENTIFIER_BIND_PIPELINE = 0xC,
118   RGP_SQTT_MARKER_IDENTIFIER_RESERVED4 = 0xD,
119   RGP_SQTT_MARKER_IDENTIFIER_RESERVED5 = 0xE,
120   RGP_SQTT_MARKER_IDENTIFIER_RESERVED6 = 0xF
121};
122
123/**
124 * RGP SQ thread-tracing marker for the start of a command buffer. (Table 2)
125 */
126struct rgp_sqtt_marker_cb_start {
127   union {
128      struct {
129         uint32_t identifier : 4;
130         uint32_t ext_dwords : 3;
131         uint32_t cb_id : 20;
132         uint32_t queue : 5;
133      };
134      uint32_t dword01;
135   };
136   union {
137      uint32_t device_id_low;
138      uint32_t dword02;
139   };
140   union {
141      uint32_t device_id_high;
142      uint32_t dword03;
143   };
144   union {
145      uint32_t queue_flags;
146      uint32_t dword04;
147   };
148};
149
150static_assert(sizeof(struct rgp_sqtt_marker_cb_start) == 16,
151              "rgp_sqtt_marker_cb_start doesn't match RGP spec");
152
153/**
154 *
155 * RGP SQ thread-tracing marker for the end of a command buffer. (Table 3)
156 */
157struct rgp_sqtt_marker_cb_end {
158   union {
159      struct {
160         uint32_t identifier : 4;
161         uint32_t ext_dwords : 3;
162         uint32_t cb_id : 20;
163         uint32_t reserved : 5;
164      };
165      uint32_t dword01;
166   };
167   union {
168      uint32_t device_id_low;
169      uint32_t dword02;
170   };
171   union {
172      uint32_t device_id_high;
173      uint32_t dword03;
174   };
175};
176
177static_assert(sizeof(struct rgp_sqtt_marker_cb_end) == 12,
178              "rgp_sqtt_marker_cb_end doesn't match RGP spec");
179
180/**
181 * API types used in RGP SQ thread-tracing markers for the "General API"
182 * packet.
183 */
184enum rgp_sqtt_marker_general_api_type
185{
186   ApiCmdBindPipeline = 0,
187   ApiCmdBindDescriptorSets = 1,
188   ApiCmdBindIndexBuffer = 2,
189   ApiCmdBindVertexBuffers = 3,
190   ApiCmdDraw = 4,
191   ApiCmdDrawIndexed = 5,
192   ApiCmdDrawIndirect = 6,
193   ApiCmdDrawIndexedIndirect = 7,
194   ApiCmdDrawIndirectCountAMD = 8,
195   ApiCmdDrawIndexedIndirectCountAMD = 9,
196   ApiCmdDispatch = 10,
197   ApiCmdDispatchIndirect = 11,
198   ApiCmdCopyBuffer = 12,
199   ApiCmdCopyImage = 13,
200   ApiCmdBlitImage = 14,
201   ApiCmdCopyBufferToImage = 15,
202   ApiCmdCopyImageToBuffer = 16,
203   ApiCmdUpdateBuffer = 17,
204   ApiCmdFillBuffer = 18,
205   ApiCmdClearColorImage = 19,
206   ApiCmdClearDepthStencilImage = 20,
207   ApiCmdClearAttachments = 21,
208   ApiCmdResolveImage = 22,
209   ApiCmdWaitEvents = 23,
210   ApiCmdPipelineBarrier = 24,
211   ApiCmdBeginQuery = 25,
212   ApiCmdEndQuery = 26,
213   ApiCmdResetQueryPool = 27,
214   ApiCmdWriteTimestamp = 28,
215   ApiCmdCopyQueryPoolResults = 29,
216   ApiCmdPushConstants = 30,
217   ApiCmdBeginRenderPass = 31,
218   ApiCmdNextSubpass = 32,
219   ApiCmdEndRenderPass = 33,
220   ApiCmdExecuteCommands = 34,
221   ApiCmdSetViewport = 35,
222   ApiCmdSetScissor = 36,
223   ApiCmdSetLineWidth = 37,
224   ApiCmdSetDepthBias = 38,
225   ApiCmdSetBlendConstants = 39,
226   ApiCmdSetDepthBounds = 40,
227   ApiCmdSetStencilCompareMask = 41,
228   ApiCmdSetStencilWriteMask = 42,
229   ApiCmdSetStencilReference = 43,
230   ApiCmdDrawIndirectCount = 44,
231   ApiCmdDrawIndexedIndirectCount = 45,
232   ApiInvalid = 0xffffffff
233};
234
235/**
236 * RGP SQ thread-tracing marker for a "General API" instrumentation packet.
237 */
238struct rgp_sqtt_marker_general_api {
239   union {
240      struct {
241         uint32_t identifier : 4;
242         uint32_t ext_dwords : 3;
243         uint32_t api_type : 20;
244         uint32_t is_end : 1;
245         uint32_t reserved : 4;
246      };
247      uint32_t dword01;
248   };
249};
250
251static_assert(sizeof(struct rgp_sqtt_marker_general_api) == 4,
252              "rgp_sqtt_marker_general_api doesn't match RGP spec");
253
254/**
255 * API types used in RGP SQ thread-tracing markers (Table 16).
256 */
257enum rgp_sqtt_marker_event_type
258{
259   EventCmdDraw = 0,
260   EventCmdDrawIndexed = 1,
261   EventCmdDrawIndirect = 2,
262   EventCmdDrawIndexedIndirect = 3,
263   EventCmdDrawIndirectCountAMD = 4,
264   EventCmdDrawIndexedIndirectCountAMD = 5,
265   EventCmdDispatch = 6,
266   EventCmdDispatchIndirect = 7,
267   EventCmdCopyBuffer = 8,
268   EventCmdCopyImage = 9,
269   EventCmdBlitImage = 10,
270   EventCmdCopyBufferToImage = 11,
271   EventCmdCopyImageToBuffer = 12,
272   EventCmdUpdateBuffer = 13,
273   EventCmdFillBuffer = 14,
274   EventCmdClearColorImage = 15,
275   EventCmdClearDepthStencilImage = 16,
276   EventCmdClearAttachments = 17,
277   EventCmdResolveImage = 18,
278   EventCmdWaitEvents = 19,
279   EventCmdPipelineBarrier = 20,
280   EventCmdResetQueryPool = 21,
281   EventCmdCopyQueryPoolResults = 22,
282   EventRenderPassColorClear = 23,
283   EventRenderPassDepthStencilClear = 24,
284   EventRenderPassResolve = 25,
285   EventInternalUnknown = 26,
286   EventCmdDrawIndirectCount = 27,
287   EventCmdDrawIndexedIndirectCount = 28,
288   EventInvalid = 0xffffffff
289};
290
291/**
292 * "Event (Per-draw/dispatch)" RGP SQ thread-tracing marker. (Table 4)
293 */
294struct rgp_sqtt_marker_event {
295   union {
296      struct {
297         uint32_t identifier : 4;
298         uint32_t ext_dwords : 3;
299         uint32_t api_type : 24;
300         uint32_t has_thread_dims : 1;
301      };
302      uint32_t dword01;
303   };
304   union {
305      struct {
306         uint32_t cb_id : 20;
307         uint32_t vertex_offset_reg_idx : 4;
308         uint32_t instance_offset_reg_idx : 4;
309         uint32_t draw_index_reg_idx : 4;
310      };
311      uint32_t dword02;
312   };
313   union {
314      uint32_t cmd_id;
315      uint32_t dword03;
316   };
317};
318
319static_assert(sizeof(struct rgp_sqtt_marker_event) == 12,
320              "rgp_sqtt_marker_event doesn't match RGP spec");
321
322/**
323 * Per-dispatch specific marker where workgroup dims are included.
324 */
325struct rgp_sqtt_marker_event_with_dims {
326   struct rgp_sqtt_marker_event event;
327   uint32_t thread_x;
328   uint32_t thread_y;
329   uint32_t thread_z;
330};
331
332static_assert(sizeof(struct rgp_sqtt_marker_event_with_dims) == 24,
333              "rgp_sqtt_marker_event_with_dims doesn't match RGP spec");
334
335/**
336 * "Barrier Start" RGP SQTT instrumentation marker (Table 5)
337 */
338struct rgp_sqtt_marker_barrier_start {
339   union {
340      struct {
341         uint32_t identifier : 4;
342         uint32_t ext_dwords : 3;
343         uint32_t cb_id : 20;
344         uint32_t reserved : 5;
345      };
346      uint32_t dword01;
347   };
348   union {
349      struct {
350         uint32_t driver_reason : 31;
351         uint32_t internal : 1;
352      };
353      uint32_t dword02;
354   };
355};
356
357static_assert(sizeof(struct rgp_sqtt_marker_barrier_start) == 8,
358              "rgp_sqtt_marker_barrier_start doesn't match RGP spec");
359
360/**
361 * "Barrier End" RGP SQTT instrumentation marker (Table 6)
362 */
363struct rgp_sqtt_marker_barrier_end {
364   union {
365      struct {
366         uint32_t identifier : 4;
367         uint32_t ext_dwords : 3;
368         uint32_t cb_id : 20;
369         uint32_t wait_on_eop_ts : 1;
370         uint32_t vs_partial_flush : 1;
371         uint32_t ps_partial_flush : 1;
372         uint32_t cs_partial_flush : 1;
373         uint32_t pfp_sync_me : 1;
374      };
375      uint32_t dword01;
376   };
377   union {
378      struct {
379         uint32_t sync_cp_dma : 1;
380         uint32_t inval_tcp : 1;
381         uint32_t inval_sqI : 1;
382         uint32_t inval_sqK : 1;
383         uint32_t flush_tcc : 1;
384         uint32_t inval_tcc : 1;
385         uint32_t flush_cb : 1;
386         uint32_t inval_cb : 1;
387         uint32_t flush_db : 1;
388         uint32_t inval_db : 1;
389         uint32_t num_layout_transitions : 16;
390         uint32_t inval_gl1 : 1;
391         uint32_t reserved : 5;
392      };
393      uint32_t dword02;
394   };
395};
396
397static_assert(sizeof(struct rgp_sqtt_marker_barrier_end) == 8,
398              "rgp_sqtt_marker_barrier_end doesn't match RGP spec");
399
400/**
401 * "Layout Transition" RGP SQTT instrumentation marker (Table 7)
402 */
403struct rgp_sqtt_marker_layout_transition {
404   union {
405      struct {
406         uint32_t identifier : 4;
407         uint32_t ext_dwords : 3;
408         uint32_t depth_stencil_expand : 1;
409         uint32_t htile_hiz_range_expand : 1;
410         uint32_t depth_stencil_resummarize : 1;
411         uint32_t dcc_decompress : 1;
412         uint32_t fmask_decompress : 1;
413         uint32_t fast_clear_eliminate : 1;
414         uint32_t fmask_color_expand : 1;
415         uint32_t init_mask_ram : 1;
416         uint32_t reserved1 : 17;
417      };
418      uint32_t dword01;
419   };
420   union {
421      struct {
422         uint32_t reserved2 : 32;
423      };
424      uint32_t dword02;
425   };
426};
427
428static_assert(sizeof(struct rgp_sqtt_marker_layout_transition) == 8,
429              "rgp_sqtt_marker_layout_transition doesn't match RGP spec");
430
431
432/**
433 * "User Event" RGP SQTT instrumentation marker (Table 8)
434 */
435struct rgp_sqtt_marker_user_event {
436   union {
437      struct {
438         uint32_t identifier : 4;
439         uint32_t reserved0 : 8;
440         uint32_t data_type : 8;
441         uint32_t reserved1 : 12;
442      };
443      uint32_t dword01;
444   };
445};
446struct rgp_sqtt_marker_user_event_with_length {
447   struct rgp_sqtt_marker_user_event user_event;
448   uint32_t length;
449};
450
451static_assert(sizeof(struct rgp_sqtt_marker_user_event) == 4,
452              "rgp_sqtt_marker_user_event doesn't match RGP spec");
453
454enum rgp_sqtt_marker_user_event_type
455{
456   UserEventTrigger = 0,
457   UserEventPop,
458   UserEventPush,
459   UserEventObjectName,
460};
461
462/**
463 * "Pipeline bind" RGP SQTT instrumentation marker (Table 12)
464 */
465struct rgp_sqtt_marker_pipeline_bind {
466   union {
467      struct {
468         uint32_t identifier : 4;
469         uint32_t ext_dwords : 3;
470         uint32_t bind_point : 1;
471         uint32_t cb_id : 20;
472         uint32_t reserved : 4;
473      };
474      uint32_t dword01;
475   };
476   union {
477      uint32_t api_pso_hash[2];
478      struct {
479         uint32_t dword02;
480         uint32_t dword03;
481      };
482   };
483};
484
485static_assert(sizeof(struct rgp_sqtt_marker_pipeline_bind) == 12,
486              "rgp_sqtt_marker_pipeline_bind doesn't match RGP spec");
487
488
489bool ac_sqtt_add_pso_correlation(struct ac_thread_trace_data *thread_trace_data,
490                                 uint64_t pipeline_hash);
491
492bool ac_sqtt_add_code_object_loader_event(struct ac_thread_trace_data *thread_trace_data,
493                                          uint64_t pipeline_hash,
494                                          uint64_t base_address);
495
496#endif
497