101e04c3fSmrg/*
201e04c3fSmrg * Copyright 2015 Advanced Micro Devices, Inc.
301e04c3fSmrg *
401e04c3fSmrg * Permission is hereby granted, free of charge, to any person obtaining a
501e04c3fSmrg * copy of this software and associated documentation files (the "Software"),
601e04c3fSmrg * to deal in the Software without restriction, including without limitation
701e04c3fSmrg * the rights to use, copy, modify, merge, publish, distribute, sublicense,
801e04c3fSmrg * and/or sell copies of the Software, and to permit persons to whom the
901e04c3fSmrg * Software is furnished to do so, subject to the following conditions:
1001e04c3fSmrg *
1101e04c3fSmrg * The above copyright notice and this permission notice (including the next
1201e04c3fSmrg * paragraph) shall be included in all copies or substantial portions of the
1301e04c3fSmrg * Software.
1401e04c3fSmrg *
1501e04c3fSmrg * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
1601e04c3fSmrg * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
1701e04c3fSmrg * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT.  IN NO EVENT SHALL
1801e04c3fSmrg * THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
1901e04c3fSmrg * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
2001e04c3fSmrg * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
2101e04c3fSmrg * SOFTWARE.
2201e04c3fSmrg *
2301e04c3fSmrg * Authors:
2401e04c3fSmrg *  Nicolai Hähnle <nicolai.haehnle@amd.com>
2501e04c3fSmrg *
2601e04c3fSmrg */
2701e04c3fSmrg
2801e04c3fSmrg#ifndef R600_QUERY_H
2901e04c3fSmrg#define R600_QUERY_H
3001e04c3fSmrg
3101e04c3fSmrg#include "util/u_threaded_context.h"
3201e04c3fSmrg
3301e04c3fSmrgstruct pipe_context;
3401e04c3fSmrgstruct pipe_query;
3501e04c3fSmrgstruct pipe_resource;
3601e04c3fSmrg
3701e04c3fSmrgstruct r600_common_context;
3801e04c3fSmrgstruct r600_common_screen;
3901e04c3fSmrgstruct r600_query;
4001e04c3fSmrgstruct r600_query_hw;
4101e04c3fSmrgstruct r600_resource;
4201e04c3fSmrg
4301e04c3fSmrgenum {
4401e04c3fSmrg	R600_QUERY_DRAW_CALLS = PIPE_QUERY_DRIVER_SPECIFIC,
4501e04c3fSmrg	R600_QUERY_DECOMPRESS_CALLS,
4601e04c3fSmrg	R600_QUERY_MRT_DRAW_CALLS,
4701e04c3fSmrg	R600_QUERY_PRIM_RESTART_CALLS,
4801e04c3fSmrg	R600_QUERY_SPILL_DRAW_CALLS,
4901e04c3fSmrg	R600_QUERY_COMPUTE_CALLS,
5001e04c3fSmrg	R600_QUERY_SPILL_COMPUTE_CALLS,
5101e04c3fSmrg	R600_QUERY_DMA_CALLS,
5201e04c3fSmrg	R600_QUERY_CP_DMA_CALLS,
5301e04c3fSmrg	R600_QUERY_NUM_VS_FLUSHES,
5401e04c3fSmrg	R600_QUERY_NUM_PS_FLUSHES,
5501e04c3fSmrg	R600_QUERY_NUM_CS_FLUSHES,
5601e04c3fSmrg	R600_QUERY_NUM_CB_CACHE_FLUSHES,
5701e04c3fSmrg	R600_QUERY_NUM_DB_CACHE_FLUSHES,
5801e04c3fSmrg	R600_QUERY_NUM_RESIDENT_HANDLES,
5901e04c3fSmrg	R600_QUERY_TC_OFFLOADED_SLOTS,
6001e04c3fSmrg	R600_QUERY_TC_DIRECT_SLOTS,
6101e04c3fSmrg	R600_QUERY_TC_NUM_SYNCS,
6201e04c3fSmrg	R600_QUERY_CS_THREAD_BUSY,
6301e04c3fSmrg	R600_QUERY_GALLIUM_THREAD_BUSY,
6401e04c3fSmrg	R600_QUERY_REQUESTED_VRAM,
6501e04c3fSmrg	R600_QUERY_REQUESTED_GTT,
6601e04c3fSmrg	R600_QUERY_MAPPED_VRAM,
6701e04c3fSmrg	R600_QUERY_MAPPED_GTT,
6801e04c3fSmrg	R600_QUERY_BUFFER_WAIT_TIME,
6901e04c3fSmrg	R600_QUERY_NUM_MAPPED_BUFFERS,
7001e04c3fSmrg	R600_QUERY_NUM_GFX_IBS,
7101e04c3fSmrg	R600_QUERY_NUM_SDMA_IBS,
7201e04c3fSmrg	R600_QUERY_GFX_BO_LIST_SIZE,
7301e04c3fSmrg	R600_QUERY_NUM_BYTES_MOVED,
7401e04c3fSmrg	R600_QUERY_NUM_EVICTIONS,
7501e04c3fSmrg	R600_QUERY_NUM_VRAM_CPU_PAGE_FAULTS,
7601e04c3fSmrg	R600_QUERY_VRAM_USAGE,
7701e04c3fSmrg	R600_QUERY_VRAM_VIS_USAGE,
7801e04c3fSmrg	R600_QUERY_GTT_USAGE,
7901e04c3fSmrg	R600_QUERY_GPU_TEMPERATURE,
8001e04c3fSmrg	R600_QUERY_CURRENT_GPU_SCLK,
8101e04c3fSmrg	R600_QUERY_CURRENT_GPU_MCLK,
8201e04c3fSmrg	R600_QUERY_GPU_LOAD,
8301e04c3fSmrg	R600_QUERY_GPU_SHADERS_BUSY,
8401e04c3fSmrg	R600_QUERY_GPU_TA_BUSY,
8501e04c3fSmrg	R600_QUERY_GPU_GDS_BUSY,
8601e04c3fSmrg	R600_QUERY_GPU_VGT_BUSY,
8701e04c3fSmrg	R600_QUERY_GPU_IA_BUSY,
8801e04c3fSmrg	R600_QUERY_GPU_SX_BUSY,
8901e04c3fSmrg	R600_QUERY_GPU_WD_BUSY,
9001e04c3fSmrg	R600_QUERY_GPU_BCI_BUSY,
9101e04c3fSmrg	R600_QUERY_GPU_SC_BUSY,
9201e04c3fSmrg	R600_QUERY_GPU_PA_BUSY,
9301e04c3fSmrg	R600_QUERY_GPU_DB_BUSY,
9401e04c3fSmrg	R600_QUERY_GPU_CP_BUSY,
9501e04c3fSmrg	R600_QUERY_GPU_CB_BUSY,
9601e04c3fSmrg	R600_QUERY_GPU_SDMA_BUSY,
9701e04c3fSmrg	R600_QUERY_GPU_PFP_BUSY,
9801e04c3fSmrg	R600_QUERY_GPU_MEQ_BUSY,
9901e04c3fSmrg	R600_QUERY_GPU_ME_BUSY,
10001e04c3fSmrg	R600_QUERY_GPU_SURF_SYNC_BUSY,
10101e04c3fSmrg	R600_QUERY_GPU_CP_DMA_BUSY,
10201e04c3fSmrg	R600_QUERY_GPU_SCRATCH_RAM_BUSY,
10301e04c3fSmrg	R600_QUERY_NUM_COMPILATIONS,
10401e04c3fSmrg	R600_QUERY_NUM_SHADERS_CREATED,
10501e04c3fSmrg	R600_QUERY_NUM_SHADER_CACHE_HITS,
10601e04c3fSmrg	R600_QUERY_GPIN_ASIC_ID,
10701e04c3fSmrg	R600_QUERY_GPIN_NUM_SIMD,
10801e04c3fSmrg	R600_QUERY_GPIN_NUM_RB,
10901e04c3fSmrg	R600_QUERY_GPIN_NUM_SPI,
11001e04c3fSmrg	R600_QUERY_GPIN_NUM_SE,
11101e04c3fSmrg
11201e04c3fSmrg	R600_QUERY_FIRST_PERFCOUNTER = PIPE_QUERY_DRIVER_SPECIFIC + 100,
11301e04c3fSmrg};
11401e04c3fSmrg
11501e04c3fSmrgenum {
11601e04c3fSmrg	R600_QUERY_GROUP_GPIN = 0,
11701e04c3fSmrg	R600_NUM_SW_QUERY_GROUPS
11801e04c3fSmrg};
11901e04c3fSmrg
12001e04c3fSmrgstruct r600_query_ops {
12101e04c3fSmrg	void (*destroy)(struct r600_common_screen *, struct r600_query *);
12201e04c3fSmrg	bool (*begin)(struct r600_common_context *, struct r600_query *);
12301e04c3fSmrg	bool (*end)(struct r600_common_context *, struct r600_query *);
12401e04c3fSmrg	bool (*get_result)(struct r600_common_context *,
12501e04c3fSmrg			   struct r600_query *, bool wait,
12601e04c3fSmrg			   union pipe_query_result *result);
12701e04c3fSmrg	void (*get_result_resource)(struct r600_common_context *,
12801e04c3fSmrg				    struct r600_query *, bool wait,
12901e04c3fSmrg				    enum pipe_query_value_type result_type,
13001e04c3fSmrg				    int index,
13101e04c3fSmrg				    struct pipe_resource *resource,
13201e04c3fSmrg				    unsigned offset);
13301e04c3fSmrg};
13401e04c3fSmrg
13501e04c3fSmrgstruct r600_query {
13601e04c3fSmrg	struct threaded_query b;
13701e04c3fSmrg	struct r600_query_ops *ops;
13801e04c3fSmrg
13901e04c3fSmrg	/* The type of query */
14001e04c3fSmrg	unsigned type;
14101e04c3fSmrg};
14201e04c3fSmrg
14301e04c3fSmrgenum {
14401e04c3fSmrg	R600_QUERY_HW_FLAG_NO_START = (1 << 0),
14501e04c3fSmrg	/* gap */
14601e04c3fSmrg	/* whether begin_query doesn't clear the result */
14701e04c3fSmrg	R600_QUERY_HW_FLAG_BEGIN_RESUMES = (1 << 2),
14801e04c3fSmrg};
14901e04c3fSmrg
15001e04c3fSmrgstruct r600_query_hw_ops {
15101e04c3fSmrg	bool (*prepare_buffer)(struct r600_common_screen *,
15201e04c3fSmrg			       struct r600_query_hw *,
15301e04c3fSmrg			       struct r600_resource *);
15401e04c3fSmrg	void (*emit_start)(struct r600_common_context *,
15501e04c3fSmrg			   struct r600_query_hw *,
15601e04c3fSmrg			   struct r600_resource *buffer, uint64_t va);
15701e04c3fSmrg	void (*emit_stop)(struct r600_common_context *,
15801e04c3fSmrg			  struct r600_query_hw *,
15901e04c3fSmrg			  struct r600_resource *buffer, uint64_t va);
16001e04c3fSmrg	void (*clear_result)(struct r600_query_hw *, union pipe_query_result *);
16101e04c3fSmrg	void (*add_result)(struct r600_common_screen *screen,
16201e04c3fSmrg			   struct r600_query_hw *, void *buffer,
16301e04c3fSmrg			   union pipe_query_result *result);
16401e04c3fSmrg};
16501e04c3fSmrg
16601e04c3fSmrgstruct r600_query_buffer {
16701e04c3fSmrg	/* The buffer where query results are stored. */
16801e04c3fSmrg	struct r600_resource		*buf;
16901e04c3fSmrg	/* Offset of the next free result after current query data */
17001e04c3fSmrg	unsigned			results_end;
17101e04c3fSmrg	/* If a query buffer is full, a new buffer is created and the old one
17201e04c3fSmrg	 * is put in here. When we calculate the result, we sum up the samples
17301e04c3fSmrg	 * from all buffers. */
17401e04c3fSmrg	struct r600_query_buffer	*previous;
17501e04c3fSmrg};
17601e04c3fSmrg
17701e04c3fSmrgstruct r600_query_hw {
17801e04c3fSmrg	struct r600_query b;
17901e04c3fSmrg	struct r600_query_hw_ops *ops;
18001e04c3fSmrg	unsigned flags;
18101e04c3fSmrg
18201e04c3fSmrg	/* The query buffer and how many results are in it. */
18301e04c3fSmrg	struct r600_query_buffer buffer;
18401e04c3fSmrg	/* Size of the result in memory for both begin_query and end_query,
18501e04c3fSmrg	 * this can be one or two numbers, or it could even be a size of a structure. */
18601e04c3fSmrg	unsigned result_size;
18701e04c3fSmrg	/* The number of dwords for begin_query or end_query. */
18801e04c3fSmrg	unsigned num_cs_dw_begin;
18901e04c3fSmrg	unsigned num_cs_dw_end;
19001e04c3fSmrg	/* Linked list of queries */
19101e04c3fSmrg	struct list_head list;
19201e04c3fSmrg	/* For transform feedback: which stream the query is for */
19301e04c3fSmrg	unsigned stream;
19401e04c3fSmrg};
19501e04c3fSmrg
19601e04c3fSmrgbool r600_query_hw_init(struct r600_common_screen *rscreen,
19701e04c3fSmrg			struct r600_query_hw *query);
19801e04c3fSmrgvoid r600_query_hw_destroy(struct r600_common_screen *rscreen,
19901e04c3fSmrg			   struct r600_query *rquery);
20001e04c3fSmrgbool r600_query_hw_begin(struct r600_common_context *rctx,
20101e04c3fSmrg			 struct r600_query *rquery);
20201e04c3fSmrgbool r600_query_hw_end(struct r600_common_context *rctx,
20301e04c3fSmrg		       struct r600_query *rquery);
20401e04c3fSmrgbool r600_query_hw_get_result(struct r600_common_context *rctx,
20501e04c3fSmrg			      struct r600_query *rquery,
20601e04c3fSmrg			      bool wait,
20701e04c3fSmrg			      union pipe_query_result *result);
20801e04c3fSmrg
20901e04c3fSmrg/* Performance counters */
21001e04c3fSmrgenum {
21101e04c3fSmrg	/* This block is part of the shader engine */
21201e04c3fSmrg	R600_PC_BLOCK_SE = (1 << 0),
21301e04c3fSmrg
21401e04c3fSmrg	/* Expose per-instance groups instead of summing all instances (within
21501e04c3fSmrg	 * an SE). */
21601e04c3fSmrg	R600_PC_BLOCK_INSTANCE_GROUPS = (1 << 1),
21701e04c3fSmrg
21801e04c3fSmrg	/* Expose per-SE groups instead of summing instances across SEs. */
21901e04c3fSmrg	R600_PC_BLOCK_SE_GROUPS = (1 << 2),
22001e04c3fSmrg
22101e04c3fSmrg	/* Shader block */
22201e04c3fSmrg	R600_PC_BLOCK_SHADER = (1 << 3),
22301e04c3fSmrg
22401e04c3fSmrg	/* Non-shader block with perfcounters windowed by shaders. */
22501e04c3fSmrg	R600_PC_BLOCK_SHADER_WINDOWED = (1 << 4),
22601e04c3fSmrg};
22701e04c3fSmrg
22801e04c3fSmrg/* Describes a hardware block with performance counters. Multiple instances of
22901e04c3fSmrg * each block, possibly per-SE, may exist on the chip. Depending on the block
23001e04c3fSmrg * and on the user's configuration, we either
23101e04c3fSmrg *  (a) expose every instance as a performance counter group,
23201e04c3fSmrg *  (b) expose a single performance counter group that reports the sum over all
23301e04c3fSmrg *      instances, or
23401e04c3fSmrg *  (c) expose one performance counter group per instance, but summed over all
23501e04c3fSmrg *      shader engines.
23601e04c3fSmrg */
23701e04c3fSmrgstruct r600_perfcounter_block {
23801e04c3fSmrg	const char *basename;
23901e04c3fSmrg	unsigned flags;
24001e04c3fSmrg	unsigned num_counters;
24101e04c3fSmrg	unsigned num_selectors;
24201e04c3fSmrg	unsigned num_instances;
24301e04c3fSmrg
24401e04c3fSmrg	unsigned num_groups;
24501e04c3fSmrg	char *group_names;
24601e04c3fSmrg	unsigned group_name_stride;
24701e04c3fSmrg
24801e04c3fSmrg	char *selector_names;
24901e04c3fSmrg	unsigned selector_name_stride;
25001e04c3fSmrg
25101e04c3fSmrg	void *data;
25201e04c3fSmrg};
25301e04c3fSmrg
25401e04c3fSmrgstruct r600_perfcounters {
25501e04c3fSmrg	unsigned num_groups;
25601e04c3fSmrg	unsigned num_blocks;
25701e04c3fSmrg	struct r600_perfcounter_block *blocks;
25801e04c3fSmrg
25901e04c3fSmrg	unsigned num_start_cs_dwords;
26001e04c3fSmrg	unsigned num_stop_cs_dwords;
26101e04c3fSmrg	unsigned num_instance_cs_dwords;
26201e04c3fSmrg	unsigned num_shaders_cs_dwords;
26301e04c3fSmrg
26401e04c3fSmrg	unsigned num_shader_types;
26501e04c3fSmrg	const char * const *shader_type_suffixes;
26601e04c3fSmrg	const unsigned *shader_type_bits;
26701e04c3fSmrg
26801e04c3fSmrg	void (*get_size)(struct r600_perfcounter_block *,
26901e04c3fSmrg			 unsigned count, unsigned *selectors,
27001e04c3fSmrg			 unsigned *num_select_dw, unsigned *num_read_dw);
27101e04c3fSmrg
27201e04c3fSmrg	void (*emit_instance)(struct r600_common_context *,
27301e04c3fSmrg			      int se, int instance);
27401e04c3fSmrg	void (*emit_shaders)(struct r600_common_context *, unsigned shaders);
27501e04c3fSmrg	void (*emit_select)(struct r600_common_context *,
27601e04c3fSmrg			    struct r600_perfcounter_block *,
27701e04c3fSmrg			    unsigned count, unsigned *selectors);
27801e04c3fSmrg	void (*emit_start)(struct r600_common_context *,
27901e04c3fSmrg			  struct r600_resource *buffer, uint64_t va);
28001e04c3fSmrg	void (*emit_stop)(struct r600_common_context *,
28101e04c3fSmrg			  struct r600_resource *buffer, uint64_t va);
28201e04c3fSmrg	void (*emit_read)(struct r600_common_context *,
28301e04c3fSmrg			  struct r600_perfcounter_block *,
28401e04c3fSmrg			  unsigned count, unsigned *selectors,
28501e04c3fSmrg			  struct r600_resource *buffer, uint64_t va);
28601e04c3fSmrg
28701e04c3fSmrg	void (*cleanup)(struct r600_common_screen *);
28801e04c3fSmrg
28901e04c3fSmrg	bool separate_se;
29001e04c3fSmrg	bool separate_instance;
29101e04c3fSmrg};
29201e04c3fSmrg
29301e04c3fSmrgstruct pipe_query *r600_create_batch_query(struct pipe_context *ctx,
29401e04c3fSmrg					   unsigned num_queries,
29501e04c3fSmrg					   unsigned *query_types);
29601e04c3fSmrg
29701e04c3fSmrgint r600_get_perfcounter_info(struct r600_common_screen *,
29801e04c3fSmrg			      unsigned index,
29901e04c3fSmrg			      struct pipe_driver_query_info *info);
30001e04c3fSmrgint r600_get_perfcounter_group_info(struct r600_common_screen *,
30101e04c3fSmrg				    unsigned index,
30201e04c3fSmrg				    struct pipe_driver_query_group_info *info);
30301e04c3fSmrg
30401e04c3fSmrgbool r600_perfcounters_init(struct r600_perfcounters *, unsigned num_blocks);
30501e04c3fSmrgvoid r600_perfcounters_add_block(struct r600_common_screen *,
30601e04c3fSmrg				 struct r600_perfcounters *,
30701e04c3fSmrg				 const char *name, unsigned flags,
30801e04c3fSmrg				 unsigned counters, unsigned selectors,
30901e04c3fSmrg				 unsigned instances, void *data);
31001e04c3fSmrgvoid r600_perfcounters_do_destroy(struct r600_perfcounters *);
31101e04c3fSmrgvoid r600_query_hw_reset_buffers(struct r600_common_context *rctx,
31201e04c3fSmrg				 struct r600_query_hw *query);
31301e04c3fSmrg
31401e04c3fSmrgstruct r600_qbo_state {
31501e04c3fSmrg	void *saved_compute;
31601e04c3fSmrg	struct pipe_constant_buffer saved_const0;
31701e04c3fSmrg	struct pipe_shader_buffer saved_ssbo[3];
31801e04c3fSmrg};
31901e04c3fSmrg
32001e04c3fSmrg#endif /* R600_QUERY_H */
321