101e04c3fSmrg/* 201e04c3fSmrg * Copyright 2015 Advanced Micro Devices, Inc. 301e04c3fSmrg * 401e04c3fSmrg * Permission is hereby granted, free of charge, to any person obtaining a 501e04c3fSmrg * copy of this software and associated documentation files (the "Software"), 601e04c3fSmrg * to deal in the Software without restriction, including without limitation 701e04c3fSmrg * the rights to use, copy, modify, merge, publish, distribute, sublicense, 801e04c3fSmrg * and/or sell copies of the Software, and to permit persons to whom the 901e04c3fSmrg * Software is furnished to do so, subject to the following conditions: 1001e04c3fSmrg * 1101e04c3fSmrg * The above copyright notice and this permission notice (including the next 1201e04c3fSmrg * paragraph) shall be included in all copies or substantial portions of the 1301e04c3fSmrg * Software. 1401e04c3fSmrg * 1501e04c3fSmrg * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR 1601e04c3fSmrg * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, 1701e04c3fSmrg * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL 1801e04c3fSmrg * THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER 1901e04c3fSmrg * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, 2001e04c3fSmrg * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE 2101e04c3fSmrg * SOFTWARE. 2201e04c3fSmrg * 2301e04c3fSmrg * Authors: 2401e04c3fSmrg * Nicolai Hähnle <nicolai.haehnle@amd.com> 2501e04c3fSmrg * 2601e04c3fSmrg */ 2701e04c3fSmrg 2801e04c3fSmrg#ifndef R600_QUERY_H 2901e04c3fSmrg#define R600_QUERY_H 3001e04c3fSmrg 3101e04c3fSmrg#include "util/u_threaded_context.h" 3201e04c3fSmrg 3301e04c3fSmrgstruct pipe_context; 3401e04c3fSmrgstruct pipe_query; 3501e04c3fSmrgstruct pipe_resource; 3601e04c3fSmrg 3701e04c3fSmrgstruct r600_common_context; 3801e04c3fSmrgstruct r600_common_screen; 3901e04c3fSmrgstruct r600_query; 4001e04c3fSmrgstruct r600_query_hw; 4101e04c3fSmrgstruct r600_resource; 4201e04c3fSmrg 4301e04c3fSmrgenum { 4401e04c3fSmrg R600_QUERY_DRAW_CALLS = PIPE_QUERY_DRIVER_SPECIFIC, 4501e04c3fSmrg R600_QUERY_DECOMPRESS_CALLS, 4601e04c3fSmrg R600_QUERY_MRT_DRAW_CALLS, 4701e04c3fSmrg R600_QUERY_PRIM_RESTART_CALLS, 4801e04c3fSmrg R600_QUERY_SPILL_DRAW_CALLS, 4901e04c3fSmrg R600_QUERY_COMPUTE_CALLS, 5001e04c3fSmrg R600_QUERY_SPILL_COMPUTE_CALLS, 5101e04c3fSmrg R600_QUERY_DMA_CALLS, 5201e04c3fSmrg R600_QUERY_CP_DMA_CALLS, 5301e04c3fSmrg R600_QUERY_NUM_VS_FLUSHES, 5401e04c3fSmrg R600_QUERY_NUM_PS_FLUSHES, 5501e04c3fSmrg R600_QUERY_NUM_CS_FLUSHES, 5601e04c3fSmrg R600_QUERY_NUM_CB_CACHE_FLUSHES, 5701e04c3fSmrg R600_QUERY_NUM_DB_CACHE_FLUSHES, 5801e04c3fSmrg R600_QUERY_NUM_RESIDENT_HANDLES, 5901e04c3fSmrg R600_QUERY_TC_OFFLOADED_SLOTS, 6001e04c3fSmrg R600_QUERY_TC_DIRECT_SLOTS, 6101e04c3fSmrg R600_QUERY_TC_NUM_SYNCS, 6201e04c3fSmrg R600_QUERY_CS_THREAD_BUSY, 6301e04c3fSmrg R600_QUERY_GALLIUM_THREAD_BUSY, 6401e04c3fSmrg R600_QUERY_REQUESTED_VRAM, 6501e04c3fSmrg R600_QUERY_REQUESTED_GTT, 6601e04c3fSmrg R600_QUERY_MAPPED_VRAM, 6701e04c3fSmrg R600_QUERY_MAPPED_GTT, 6801e04c3fSmrg R600_QUERY_BUFFER_WAIT_TIME, 6901e04c3fSmrg R600_QUERY_NUM_MAPPED_BUFFERS, 7001e04c3fSmrg R600_QUERY_NUM_GFX_IBS, 7101e04c3fSmrg R600_QUERY_NUM_SDMA_IBS, 7201e04c3fSmrg R600_QUERY_GFX_BO_LIST_SIZE, 7301e04c3fSmrg R600_QUERY_NUM_BYTES_MOVED, 7401e04c3fSmrg R600_QUERY_NUM_EVICTIONS, 7501e04c3fSmrg R600_QUERY_NUM_VRAM_CPU_PAGE_FAULTS, 7601e04c3fSmrg R600_QUERY_VRAM_USAGE, 7701e04c3fSmrg R600_QUERY_VRAM_VIS_USAGE, 7801e04c3fSmrg R600_QUERY_GTT_USAGE, 7901e04c3fSmrg R600_QUERY_GPU_TEMPERATURE, 8001e04c3fSmrg R600_QUERY_CURRENT_GPU_SCLK, 8101e04c3fSmrg R600_QUERY_CURRENT_GPU_MCLK, 8201e04c3fSmrg R600_QUERY_GPU_LOAD, 8301e04c3fSmrg R600_QUERY_GPU_SHADERS_BUSY, 8401e04c3fSmrg R600_QUERY_GPU_TA_BUSY, 8501e04c3fSmrg R600_QUERY_GPU_GDS_BUSY, 8601e04c3fSmrg R600_QUERY_GPU_VGT_BUSY, 8701e04c3fSmrg R600_QUERY_GPU_IA_BUSY, 8801e04c3fSmrg R600_QUERY_GPU_SX_BUSY, 8901e04c3fSmrg R600_QUERY_GPU_WD_BUSY, 9001e04c3fSmrg R600_QUERY_GPU_BCI_BUSY, 9101e04c3fSmrg R600_QUERY_GPU_SC_BUSY, 9201e04c3fSmrg R600_QUERY_GPU_PA_BUSY, 9301e04c3fSmrg R600_QUERY_GPU_DB_BUSY, 9401e04c3fSmrg R600_QUERY_GPU_CP_BUSY, 9501e04c3fSmrg R600_QUERY_GPU_CB_BUSY, 9601e04c3fSmrg R600_QUERY_GPU_SDMA_BUSY, 9701e04c3fSmrg R600_QUERY_GPU_PFP_BUSY, 9801e04c3fSmrg R600_QUERY_GPU_MEQ_BUSY, 9901e04c3fSmrg R600_QUERY_GPU_ME_BUSY, 10001e04c3fSmrg R600_QUERY_GPU_SURF_SYNC_BUSY, 10101e04c3fSmrg R600_QUERY_GPU_CP_DMA_BUSY, 10201e04c3fSmrg R600_QUERY_GPU_SCRATCH_RAM_BUSY, 10301e04c3fSmrg R600_QUERY_NUM_COMPILATIONS, 10401e04c3fSmrg R600_QUERY_NUM_SHADERS_CREATED, 10501e04c3fSmrg R600_QUERY_NUM_SHADER_CACHE_HITS, 10601e04c3fSmrg R600_QUERY_GPIN_ASIC_ID, 10701e04c3fSmrg R600_QUERY_GPIN_NUM_SIMD, 10801e04c3fSmrg R600_QUERY_GPIN_NUM_RB, 10901e04c3fSmrg R600_QUERY_GPIN_NUM_SPI, 11001e04c3fSmrg R600_QUERY_GPIN_NUM_SE, 11101e04c3fSmrg 11201e04c3fSmrg R600_QUERY_FIRST_PERFCOUNTER = PIPE_QUERY_DRIVER_SPECIFIC + 100, 11301e04c3fSmrg}; 11401e04c3fSmrg 11501e04c3fSmrgenum { 11601e04c3fSmrg R600_QUERY_GROUP_GPIN = 0, 11701e04c3fSmrg R600_NUM_SW_QUERY_GROUPS 11801e04c3fSmrg}; 11901e04c3fSmrg 12001e04c3fSmrgstruct r600_query_ops { 12101e04c3fSmrg void (*destroy)(struct r600_common_screen *, struct r600_query *); 12201e04c3fSmrg bool (*begin)(struct r600_common_context *, struct r600_query *); 12301e04c3fSmrg bool (*end)(struct r600_common_context *, struct r600_query *); 12401e04c3fSmrg bool (*get_result)(struct r600_common_context *, 12501e04c3fSmrg struct r600_query *, bool wait, 12601e04c3fSmrg union pipe_query_result *result); 12701e04c3fSmrg void (*get_result_resource)(struct r600_common_context *, 12801e04c3fSmrg struct r600_query *, bool wait, 12901e04c3fSmrg enum pipe_query_value_type result_type, 13001e04c3fSmrg int index, 13101e04c3fSmrg struct pipe_resource *resource, 13201e04c3fSmrg unsigned offset); 13301e04c3fSmrg}; 13401e04c3fSmrg 13501e04c3fSmrgstruct r600_query { 13601e04c3fSmrg struct threaded_query b; 13701e04c3fSmrg struct r600_query_ops *ops; 13801e04c3fSmrg 13901e04c3fSmrg /* The type of query */ 14001e04c3fSmrg unsigned type; 14101e04c3fSmrg}; 14201e04c3fSmrg 14301e04c3fSmrgenum { 14401e04c3fSmrg R600_QUERY_HW_FLAG_NO_START = (1 << 0), 14501e04c3fSmrg /* gap */ 14601e04c3fSmrg /* whether begin_query doesn't clear the result */ 14701e04c3fSmrg R600_QUERY_HW_FLAG_BEGIN_RESUMES = (1 << 2), 14801e04c3fSmrg}; 14901e04c3fSmrg 15001e04c3fSmrgstruct r600_query_hw_ops { 15101e04c3fSmrg bool (*prepare_buffer)(struct r600_common_screen *, 15201e04c3fSmrg struct r600_query_hw *, 15301e04c3fSmrg struct r600_resource *); 15401e04c3fSmrg void (*emit_start)(struct r600_common_context *, 15501e04c3fSmrg struct r600_query_hw *, 15601e04c3fSmrg struct r600_resource *buffer, uint64_t va); 15701e04c3fSmrg void (*emit_stop)(struct r600_common_context *, 15801e04c3fSmrg struct r600_query_hw *, 15901e04c3fSmrg struct r600_resource *buffer, uint64_t va); 16001e04c3fSmrg void (*clear_result)(struct r600_query_hw *, union pipe_query_result *); 16101e04c3fSmrg void (*add_result)(struct r600_common_screen *screen, 16201e04c3fSmrg struct r600_query_hw *, void *buffer, 16301e04c3fSmrg union pipe_query_result *result); 16401e04c3fSmrg}; 16501e04c3fSmrg 16601e04c3fSmrgstruct r600_query_buffer { 16701e04c3fSmrg /* The buffer where query results are stored. */ 16801e04c3fSmrg struct r600_resource *buf; 16901e04c3fSmrg /* Offset of the next free result after current query data */ 17001e04c3fSmrg unsigned results_end; 17101e04c3fSmrg /* If a query buffer is full, a new buffer is created and the old one 17201e04c3fSmrg * is put in here. When we calculate the result, we sum up the samples 17301e04c3fSmrg * from all buffers. */ 17401e04c3fSmrg struct r600_query_buffer *previous; 17501e04c3fSmrg}; 17601e04c3fSmrg 17701e04c3fSmrgstruct r600_query_hw { 17801e04c3fSmrg struct r600_query b; 17901e04c3fSmrg struct r600_query_hw_ops *ops; 18001e04c3fSmrg unsigned flags; 18101e04c3fSmrg 18201e04c3fSmrg /* The query buffer and how many results are in it. */ 18301e04c3fSmrg struct r600_query_buffer buffer; 18401e04c3fSmrg /* Size of the result in memory for both begin_query and end_query, 18501e04c3fSmrg * this can be one or two numbers, or it could even be a size of a structure. */ 18601e04c3fSmrg unsigned result_size; 18701e04c3fSmrg /* The number of dwords for begin_query or end_query. */ 18801e04c3fSmrg unsigned num_cs_dw_begin; 18901e04c3fSmrg unsigned num_cs_dw_end; 19001e04c3fSmrg /* Linked list of queries */ 19101e04c3fSmrg struct list_head list; 19201e04c3fSmrg /* For transform feedback: which stream the query is for */ 19301e04c3fSmrg unsigned stream; 19401e04c3fSmrg}; 19501e04c3fSmrg 19601e04c3fSmrgbool r600_query_hw_init(struct r600_common_screen *rscreen, 19701e04c3fSmrg struct r600_query_hw *query); 19801e04c3fSmrgvoid r600_query_hw_destroy(struct r600_common_screen *rscreen, 19901e04c3fSmrg struct r600_query *rquery); 20001e04c3fSmrgbool r600_query_hw_begin(struct r600_common_context *rctx, 20101e04c3fSmrg struct r600_query *rquery); 20201e04c3fSmrgbool r600_query_hw_end(struct r600_common_context *rctx, 20301e04c3fSmrg struct r600_query *rquery); 20401e04c3fSmrgbool r600_query_hw_get_result(struct r600_common_context *rctx, 20501e04c3fSmrg struct r600_query *rquery, 20601e04c3fSmrg bool wait, 20701e04c3fSmrg union pipe_query_result *result); 20801e04c3fSmrg 20901e04c3fSmrg/* Performance counters */ 21001e04c3fSmrgenum { 21101e04c3fSmrg /* This block is part of the shader engine */ 21201e04c3fSmrg R600_PC_BLOCK_SE = (1 << 0), 21301e04c3fSmrg 21401e04c3fSmrg /* Expose per-instance groups instead of summing all instances (within 21501e04c3fSmrg * an SE). */ 21601e04c3fSmrg R600_PC_BLOCK_INSTANCE_GROUPS = (1 << 1), 21701e04c3fSmrg 21801e04c3fSmrg /* Expose per-SE groups instead of summing instances across SEs. */ 21901e04c3fSmrg R600_PC_BLOCK_SE_GROUPS = (1 << 2), 22001e04c3fSmrg 22101e04c3fSmrg /* Shader block */ 22201e04c3fSmrg R600_PC_BLOCK_SHADER = (1 << 3), 22301e04c3fSmrg 22401e04c3fSmrg /* Non-shader block with perfcounters windowed by shaders. */ 22501e04c3fSmrg R600_PC_BLOCK_SHADER_WINDOWED = (1 << 4), 22601e04c3fSmrg}; 22701e04c3fSmrg 22801e04c3fSmrg/* Describes a hardware block with performance counters. Multiple instances of 22901e04c3fSmrg * each block, possibly per-SE, may exist on the chip. Depending on the block 23001e04c3fSmrg * and on the user's configuration, we either 23101e04c3fSmrg * (a) expose every instance as a performance counter group, 23201e04c3fSmrg * (b) expose a single performance counter group that reports the sum over all 23301e04c3fSmrg * instances, or 23401e04c3fSmrg * (c) expose one performance counter group per instance, but summed over all 23501e04c3fSmrg * shader engines. 23601e04c3fSmrg */ 23701e04c3fSmrgstruct r600_perfcounter_block { 23801e04c3fSmrg const char *basename; 23901e04c3fSmrg unsigned flags; 24001e04c3fSmrg unsigned num_counters; 24101e04c3fSmrg unsigned num_selectors; 24201e04c3fSmrg unsigned num_instances; 24301e04c3fSmrg 24401e04c3fSmrg unsigned num_groups; 24501e04c3fSmrg char *group_names; 24601e04c3fSmrg unsigned group_name_stride; 24701e04c3fSmrg 24801e04c3fSmrg char *selector_names; 24901e04c3fSmrg unsigned selector_name_stride; 25001e04c3fSmrg 25101e04c3fSmrg void *data; 25201e04c3fSmrg}; 25301e04c3fSmrg 25401e04c3fSmrgstruct r600_perfcounters { 25501e04c3fSmrg unsigned num_groups; 25601e04c3fSmrg unsigned num_blocks; 25701e04c3fSmrg struct r600_perfcounter_block *blocks; 25801e04c3fSmrg 25901e04c3fSmrg unsigned num_start_cs_dwords; 26001e04c3fSmrg unsigned num_stop_cs_dwords; 26101e04c3fSmrg unsigned num_instance_cs_dwords; 26201e04c3fSmrg unsigned num_shaders_cs_dwords; 26301e04c3fSmrg 26401e04c3fSmrg unsigned num_shader_types; 26501e04c3fSmrg const char * const *shader_type_suffixes; 26601e04c3fSmrg const unsigned *shader_type_bits; 26701e04c3fSmrg 26801e04c3fSmrg void (*get_size)(struct r600_perfcounter_block *, 26901e04c3fSmrg unsigned count, unsigned *selectors, 27001e04c3fSmrg unsigned *num_select_dw, unsigned *num_read_dw); 27101e04c3fSmrg 27201e04c3fSmrg void (*emit_instance)(struct r600_common_context *, 27301e04c3fSmrg int se, int instance); 27401e04c3fSmrg void (*emit_shaders)(struct r600_common_context *, unsigned shaders); 27501e04c3fSmrg void (*emit_select)(struct r600_common_context *, 27601e04c3fSmrg struct r600_perfcounter_block *, 27701e04c3fSmrg unsigned count, unsigned *selectors); 27801e04c3fSmrg void (*emit_start)(struct r600_common_context *, 27901e04c3fSmrg struct r600_resource *buffer, uint64_t va); 28001e04c3fSmrg void (*emit_stop)(struct r600_common_context *, 28101e04c3fSmrg struct r600_resource *buffer, uint64_t va); 28201e04c3fSmrg void (*emit_read)(struct r600_common_context *, 28301e04c3fSmrg struct r600_perfcounter_block *, 28401e04c3fSmrg unsigned count, unsigned *selectors, 28501e04c3fSmrg struct r600_resource *buffer, uint64_t va); 28601e04c3fSmrg 28701e04c3fSmrg void (*cleanup)(struct r600_common_screen *); 28801e04c3fSmrg 28901e04c3fSmrg bool separate_se; 29001e04c3fSmrg bool separate_instance; 29101e04c3fSmrg}; 29201e04c3fSmrg 29301e04c3fSmrgstruct pipe_query *r600_create_batch_query(struct pipe_context *ctx, 29401e04c3fSmrg unsigned num_queries, 29501e04c3fSmrg unsigned *query_types); 29601e04c3fSmrg 29701e04c3fSmrgint r600_get_perfcounter_info(struct r600_common_screen *, 29801e04c3fSmrg unsigned index, 29901e04c3fSmrg struct pipe_driver_query_info *info); 30001e04c3fSmrgint r600_get_perfcounter_group_info(struct r600_common_screen *, 30101e04c3fSmrg unsigned index, 30201e04c3fSmrg struct pipe_driver_query_group_info *info); 30301e04c3fSmrg 30401e04c3fSmrgbool r600_perfcounters_init(struct r600_perfcounters *, unsigned num_blocks); 30501e04c3fSmrgvoid r600_perfcounters_add_block(struct r600_common_screen *, 30601e04c3fSmrg struct r600_perfcounters *, 30701e04c3fSmrg const char *name, unsigned flags, 30801e04c3fSmrg unsigned counters, unsigned selectors, 30901e04c3fSmrg unsigned instances, void *data); 31001e04c3fSmrgvoid r600_perfcounters_do_destroy(struct r600_perfcounters *); 31101e04c3fSmrgvoid r600_query_hw_reset_buffers(struct r600_common_context *rctx, 31201e04c3fSmrg struct r600_query_hw *query); 31301e04c3fSmrg 31401e04c3fSmrgstruct r600_qbo_state { 31501e04c3fSmrg void *saved_compute; 31601e04c3fSmrg struct pipe_constant_buffer saved_const0; 31701e04c3fSmrg struct pipe_shader_buffer saved_ssbo[3]; 31801e04c3fSmrg}; 31901e04c3fSmrg 32001e04c3fSmrg#endif /* R600_QUERY_H */ 321