101e04c3fSmrg/* 201e04c3fSmrg * Copyright 2015 Advanced Micro Devices, Inc. 301e04c3fSmrg * 401e04c3fSmrg * Permission is hereby granted, free of charge, to any person obtaining a 501e04c3fSmrg * copy of this software and associated documentation files (the "Software"), 601e04c3fSmrg * to deal in the Software without restriction, including without limitation 701e04c3fSmrg * the rights to use, copy, modify, merge, publish, distribute, sublicense, 801e04c3fSmrg * and/or sell copies of the Software, and to permit persons to whom the 901e04c3fSmrg * Software is furnished to do so, subject to the following conditions: 1001e04c3fSmrg * 1101e04c3fSmrg * The above copyright notice and this permission notice (including the next 1201e04c3fSmrg * paragraph) shall be included in all copies or substantial portions of the 1301e04c3fSmrg * Software. 1401e04c3fSmrg * 1501e04c3fSmrg * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR 1601e04c3fSmrg * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, 1701e04c3fSmrg * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL 1801e04c3fSmrg * THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER 1901e04c3fSmrg * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, 2001e04c3fSmrg * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE 2101e04c3fSmrg * SOFTWARE. 2201e04c3fSmrg * 2301e04c3fSmrg * Authors: 2401e04c3fSmrg * Nicolai Hähnle <nicolai.haehnle@amd.com> 2501e04c3fSmrg * 2601e04c3fSmrg */ 2701e04c3fSmrg 2801e04c3fSmrg#include "util/u_memory.h" 2901e04c3fSmrg#include "r600_query.h" 3001e04c3fSmrg#include "r600_pipe_common.h" 3101e04c3fSmrg#include "r600d_common.h" 3201e04c3fSmrg 3301e04c3fSmrg/* Max counters per HW block */ 3401e04c3fSmrg#define R600_QUERY_MAX_COUNTERS 16 3501e04c3fSmrg 3601e04c3fSmrgstatic struct r600_perfcounter_block * 3701e04c3fSmrglookup_counter(struct r600_perfcounters *pc, unsigned index, 3801e04c3fSmrg unsigned *base_gid, unsigned *sub_index) 3901e04c3fSmrg{ 4001e04c3fSmrg struct r600_perfcounter_block *block = pc->blocks; 4101e04c3fSmrg unsigned bid; 4201e04c3fSmrg 4301e04c3fSmrg *base_gid = 0; 4401e04c3fSmrg for (bid = 0; bid < pc->num_blocks; ++bid, ++block) { 4501e04c3fSmrg unsigned total = block->num_groups * block->num_selectors; 4601e04c3fSmrg 4701e04c3fSmrg if (index < total) { 4801e04c3fSmrg *sub_index = index; 4901e04c3fSmrg return block; 5001e04c3fSmrg } 5101e04c3fSmrg 5201e04c3fSmrg index -= total; 5301e04c3fSmrg *base_gid += block->num_groups; 5401e04c3fSmrg } 5501e04c3fSmrg 5601e04c3fSmrg return NULL; 5701e04c3fSmrg} 5801e04c3fSmrg 5901e04c3fSmrgstatic struct r600_perfcounter_block * 6001e04c3fSmrglookup_group(struct r600_perfcounters *pc, unsigned *index) 6101e04c3fSmrg{ 6201e04c3fSmrg unsigned bid; 6301e04c3fSmrg struct r600_perfcounter_block *block = pc->blocks; 6401e04c3fSmrg 6501e04c3fSmrg for (bid = 0; bid < pc->num_blocks; ++bid, ++block) { 6601e04c3fSmrg if (*index < block->num_groups) 6701e04c3fSmrg return block; 6801e04c3fSmrg *index -= block->num_groups; 6901e04c3fSmrg } 7001e04c3fSmrg 7101e04c3fSmrg return NULL; 7201e04c3fSmrg} 7301e04c3fSmrg 7401e04c3fSmrgstruct r600_pc_group { 7501e04c3fSmrg struct r600_pc_group *next; 7601e04c3fSmrg struct r600_perfcounter_block *block; 7701e04c3fSmrg unsigned sub_gid; /* only used during init */ 7801e04c3fSmrg unsigned result_base; /* only used during init */ 7901e04c3fSmrg int se; 8001e04c3fSmrg int instance; 8101e04c3fSmrg unsigned num_counters; 8201e04c3fSmrg unsigned selectors[R600_QUERY_MAX_COUNTERS]; 8301e04c3fSmrg}; 8401e04c3fSmrg 8501e04c3fSmrgstruct r600_pc_counter { 8601e04c3fSmrg unsigned base; 8701e04c3fSmrg unsigned qwords; 8801e04c3fSmrg unsigned stride; /* in uint64s */ 8901e04c3fSmrg}; 9001e04c3fSmrg 9101e04c3fSmrg#define R600_PC_SHADERS_WINDOWING (1 << 31) 9201e04c3fSmrg 9301e04c3fSmrgstruct r600_query_pc { 9401e04c3fSmrg struct r600_query_hw b; 9501e04c3fSmrg 9601e04c3fSmrg unsigned shaders; 9701e04c3fSmrg unsigned num_counters; 9801e04c3fSmrg struct r600_pc_counter *counters; 9901e04c3fSmrg struct r600_pc_group *groups; 10001e04c3fSmrg}; 10101e04c3fSmrg 10201e04c3fSmrgstatic void r600_pc_query_destroy(struct r600_common_screen *rscreen, 10301e04c3fSmrg struct r600_query *rquery) 10401e04c3fSmrg{ 10501e04c3fSmrg struct r600_query_pc *query = (struct r600_query_pc *)rquery; 10601e04c3fSmrg 10701e04c3fSmrg while (query->groups) { 10801e04c3fSmrg struct r600_pc_group *group = query->groups; 10901e04c3fSmrg query->groups = group->next; 11001e04c3fSmrg FREE(group); 11101e04c3fSmrg } 11201e04c3fSmrg 11301e04c3fSmrg FREE(query->counters); 11401e04c3fSmrg 11501e04c3fSmrg r600_query_hw_destroy(rscreen, rquery); 11601e04c3fSmrg} 11701e04c3fSmrg 11801e04c3fSmrgstatic bool r600_pc_query_prepare_buffer(struct r600_common_screen *screen, 11901e04c3fSmrg struct r600_query_hw *hwquery, 12001e04c3fSmrg struct r600_resource *buffer) 12101e04c3fSmrg{ 12201e04c3fSmrg /* no-op */ 12301e04c3fSmrg return true; 12401e04c3fSmrg} 12501e04c3fSmrg 12601e04c3fSmrgstatic void r600_pc_query_emit_start(struct r600_common_context *ctx, 12701e04c3fSmrg struct r600_query_hw *hwquery, 12801e04c3fSmrg struct r600_resource *buffer, uint64_t va) 12901e04c3fSmrg{ 13001e04c3fSmrg struct r600_perfcounters *pc = ctx->screen->perfcounters; 13101e04c3fSmrg struct r600_query_pc *query = (struct r600_query_pc *)hwquery; 13201e04c3fSmrg struct r600_pc_group *group; 13301e04c3fSmrg int current_se = -1; 13401e04c3fSmrg int current_instance = -1; 13501e04c3fSmrg 13601e04c3fSmrg if (query->shaders) 13701e04c3fSmrg pc->emit_shaders(ctx, query->shaders); 13801e04c3fSmrg 13901e04c3fSmrg for (group = query->groups; group; group = group->next) { 14001e04c3fSmrg struct r600_perfcounter_block *block = group->block; 14101e04c3fSmrg 14201e04c3fSmrg if (group->se != current_se || group->instance != current_instance) { 14301e04c3fSmrg current_se = group->se; 14401e04c3fSmrg current_instance = group->instance; 14501e04c3fSmrg pc->emit_instance(ctx, group->se, group->instance); 14601e04c3fSmrg } 14701e04c3fSmrg 14801e04c3fSmrg pc->emit_select(ctx, block, group->num_counters, group->selectors); 14901e04c3fSmrg } 15001e04c3fSmrg 15101e04c3fSmrg if (current_se != -1 || current_instance != -1) 15201e04c3fSmrg pc->emit_instance(ctx, -1, -1); 15301e04c3fSmrg 15401e04c3fSmrg pc->emit_start(ctx, buffer, va); 15501e04c3fSmrg} 15601e04c3fSmrg 15701e04c3fSmrgstatic void r600_pc_query_emit_stop(struct r600_common_context *ctx, 15801e04c3fSmrg struct r600_query_hw *hwquery, 15901e04c3fSmrg struct r600_resource *buffer, uint64_t va) 16001e04c3fSmrg{ 16101e04c3fSmrg struct r600_perfcounters *pc = ctx->screen->perfcounters; 16201e04c3fSmrg struct r600_query_pc *query = (struct r600_query_pc *)hwquery; 16301e04c3fSmrg struct r600_pc_group *group; 16401e04c3fSmrg 16501e04c3fSmrg pc->emit_stop(ctx, buffer, va); 16601e04c3fSmrg 16701e04c3fSmrg for (group = query->groups; group; group = group->next) { 16801e04c3fSmrg struct r600_perfcounter_block *block = group->block; 16901e04c3fSmrg unsigned se = group->se >= 0 ? group->se : 0; 17001e04c3fSmrg unsigned se_end = se + 1; 17101e04c3fSmrg 17201e04c3fSmrg if ((block->flags & R600_PC_BLOCK_SE) && (group->se < 0)) 17301e04c3fSmrg se_end = ctx->screen->info.max_se; 17401e04c3fSmrg 17501e04c3fSmrg do { 17601e04c3fSmrg unsigned instance = group->instance >= 0 ? group->instance : 0; 17701e04c3fSmrg 17801e04c3fSmrg do { 17901e04c3fSmrg pc->emit_instance(ctx, se, instance); 18001e04c3fSmrg pc->emit_read(ctx, block, 18101e04c3fSmrg group->num_counters, group->selectors, 18201e04c3fSmrg buffer, va); 18301e04c3fSmrg va += sizeof(uint64_t) * group->num_counters; 18401e04c3fSmrg } while (group->instance < 0 && ++instance < block->num_instances); 18501e04c3fSmrg } while (++se < se_end); 18601e04c3fSmrg } 18701e04c3fSmrg 18801e04c3fSmrg pc->emit_instance(ctx, -1, -1); 18901e04c3fSmrg} 19001e04c3fSmrg 19101e04c3fSmrgstatic void r600_pc_query_clear_result(struct r600_query_hw *hwquery, 19201e04c3fSmrg union pipe_query_result *result) 19301e04c3fSmrg{ 19401e04c3fSmrg struct r600_query_pc *query = (struct r600_query_pc *)hwquery; 19501e04c3fSmrg 19601e04c3fSmrg memset(result, 0, sizeof(result->batch[0]) * query->num_counters); 19701e04c3fSmrg} 19801e04c3fSmrg 19901e04c3fSmrgstatic void r600_pc_query_add_result(struct r600_common_screen *rscreen, 20001e04c3fSmrg struct r600_query_hw *hwquery, 20101e04c3fSmrg void *buffer, 20201e04c3fSmrg union pipe_query_result *result) 20301e04c3fSmrg{ 20401e04c3fSmrg struct r600_query_pc *query = (struct r600_query_pc *)hwquery; 20501e04c3fSmrg uint64_t *results = buffer; 20601e04c3fSmrg unsigned i, j; 20701e04c3fSmrg 20801e04c3fSmrg for (i = 0; i < query->num_counters; ++i) { 20901e04c3fSmrg struct r600_pc_counter *counter = &query->counters[i]; 21001e04c3fSmrg 21101e04c3fSmrg for (j = 0; j < counter->qwords; ++j) { 21201e04c3fSmrg uint32_t value = results[counter->base + j * counter->stride]; 21301e04c3fSmrg result->batch[i].u64 += value; 21401e04c3fSmrg } 21501e04c3fSmrg } 21601e04c3fSmrg} 21701e04c3fSmrg 21801e04c3fSmrgstatic struct r600_query_ops batch_query_ops = { 21901e04c3fSmrg .destroy = r600_pc_query_destroy, 22001e04c3fSmrg .begin = r600_query_hw_begin, 22101e04c3fSmrg .end = r600_query_hw_end, 22201e04c3fSmrg .get_result = r600_query_hw_get_result 22301e04c3fSmrg}; 22401e04c3fSmrg 22501e04c3fSmrgstatic struct r600_query_hw_ops batch_query_hw_ops = { 22601e04c3fSmrg .prepare_buffer = r600_pc_query_prepare_buffer, 22701e04c3fSmrg .emit_start = r600_pc_query_emit_start, 22801e04c3fSmrg .emit_stop = r600_pc_query_emit_stop, 22901e04c3fSmrg .clear_result = r600_pc_query_clear_result, 23001e04c3fSmrg .add_result = r600_pc_query_add_result, 23101e04c3fSmrg}; 23201e04c3fSmrg 23301e04c3fSmrgstatic struct r600_pc_group *get_group_state(struct r600_common_screen *screen, 23401e04c3fSmrg struct r600_query_pc *query, 23501e04c3fSmrg struct r600_perfcounter_block *block, 23601e04c3fSmrg unsigned sub_gid) 23701e04c3fSmrg{ 23801e04c3fSmrg struct r600_pc_group *group = query->groups; 23901e04c3fSmrg 24001e04c3fSmrg while (group) { 24101e04c3fSmrg if (group->block == block && group->sub_gid == sub_gid) 24201e04c3fSmrg return group; 24301e04c3fSmrg group = group->next; 24401e04c3fSmrg } 24501e04c3fSmrg 24601e04c3fSmrg group = CALLOC_STRUCT(r600_pc_group); 24701e04c3fSmrg if (!group) 24801e04c3fSmrg return NULL; 24901e04c3fSmrg 25001e04c3fSmrg group->block = block; 25101e04c3fSmrg group->sub_gid = sub_gid; 25201e04c3fSmrg 25301e04c3fSmrg if (block->flags & R600_PC_BLOCK_SHADER) { 25401e04c3fSmrg unsigned sub_gids = block->num_instances; 25501e04c3fSmrg unsigned shader_id; 25601e04c3fSmrg unsigned shaders; 25701e04c3fSmrg unsigned query_shaders; 25801e04c3fSmrg 25901e04c3fSmrg if (block->flags & R600_PC_BLOCK_SE_GROUPS) 26001e04c3fSmrg sub_gids = sub_gids * screen->info.max_se; 26101e04c3fSmrg shader_id = sub_gid / sub_gids; 26201e04c3fSmrg sub_gid = sub_gid % sub_gids; 26301e04c3fSmrg 26401e04c3fSmrg shaders = screen->perfcounters->shader_type_bits[shader_id]; 26501e04c3fSmrg 26601e04c3fSmrg query_shaders = query->shaders & ~R600_PC_SHADERS_WINDOWING; 26701e04c3fSmrg if (query_shaders && query_shaders != shaders) { 26801e04c3fSmrg fprintf(stderr, "r600_perfcounter: incompatible shader groups\n"); 26901e04c3fSmrg FREE(group); 27001e04c3fSmrg return NULL; 27101e04c3fSmrg } 27201e04c3fSmrg query->shaders = shaders; 27301e04c3fSmrg } 27401e04c3fSmrg 27501e04c3fSmrg if (block->flags & R600_PC_BLOCK_SHADER_WINDOWED && !query->shaders) { 27601e04c3fSmrg // A non-zero value in query->shaders ensures that the shader 27701e04c3fSmrg // masking is reset unless the user explicitly requests one. 27801e04c3fSmrg query->shaders = R600_PC_SHADERS_WINDOWING; 27901e04c3fSmrg } 28001e04c3fSmrg 28101e04c3fSmrg if (block->flags & R600_PC_BLOCK_SE_GROUPS) { 28201e04c3fSmrg group->se = sub_gid / block->num_instances; 28301e04c3fSmrg sub_gid = sub_gid % block->num_instances; 28401e04c3fSmrg } else { 28501e04c3fSmrg group->se = -1; 28601e04c3fSmrg } 28701e04c3fSmrg 28801e04c3fSmrg if (block->flags & R600_PC_BLOCK_INSTANCE_GROUPS) { 28901e04c3fSmrg group->instance = sub_gid; 29001e04c3fSmrg } else { 29101e04c3fSmrg group->instance = -1; 29201e04c3fSmrg } 29301e04c3fSmrg 29401e04c3fSmrg group->next = query->groups; 29501e04c3fSmrg query->groups = group; 29601e04c3fSmrg 29701e04c3fSmrg return group; 29801e04c3fSmrg} 29901e04c3fSmrg 30001e04c3fSmrgstruct pipe_query *r600_create_batch_query(struct pipe_context *ctx, 30101e04c3fSmrg unsigned num_queries, 30201e04c3fSmrg unsigned *query_types) 30301e04c3fSmrg{ 30401e04c3fSmrg struct r600_common_screen *screen = 30501e04c3fSmrg (struct r600_common_screen *)ctx->screen; 30601e04c3fSmrg struct r600_perfcounters *pc = screen->perfcounters; 30701e04c3fSmrg struct r600_perfcounter_block *block; 30801e04c3fSmrg struct r600_pc_group *group; 30901e04c3fSmrg struct r600_query_pc *query; 31001e04c3fSmrg unsigned base_gid, sub_gid, sub_index; 31101e04c3fSmrg unsigned i, j; 31201e04c3fSmrg 31301e04c3fSmrg if (!pc) 31401e04c3fSmrg return NULL; 31501e04c3fSmrg 31601e04c3fSmrg query = CALLOC_STRUCT(r600_query_pc); 31701e04c3fSmrg if (!query) 31801e04c3fSmrg return NULL; 31901e04c3fSmrg 32001e04c3fSmrg query->b.b.ops = &batch_query_ops; 32101e04c3fSmrg query->b.ops = &batch_query_hw_ops; 32201e04c3fSmrg 32301e04c3fSmrg query->num_counters = num_queries; 32401e04c3fSmrg 32501e04c3fSmrg /* Collect selectors per group */ 32601e04c3fSmrg for (i = 0; i < num_queries; ++i) { 32701e04c3fSmrg unsigned sub_gid; 32801e04c3fSmrg 32901e04c3fSmrg if (query_types[i] < R600_QUERY_FIRST_PERFCOUNTER) 33001e04c3fSmrg goto error; 33101e04c3fSmrg 33201e04c3fSmrg block = lookup_counter(pc, query_types[i] - R600_QUERY_FIRST_PERFCOUNTER, 33301e04c3fSmrg &base_gid, &sub_index); 33401e04c3fSmrg if (!block) 33501e04c3fSmrg goto error; 33601e04c3fSmrg 33701e04c3fSmrg sub_gid = sub_index / block->num_selectors; 33801e04c3fSmrg sub_index = sub_index % block->num_selectors; 33901e04c3fSmrg 34001e04c3fSmrg group = get_group_state(screen, query, block, sub_gid); 34101e04c3fSmrg if (!group) 34201e04c3fSmrg goto error; 34301e04c3fSmrg 34401e04c3fSmrg if (group->num_counters >= block->num_counters) { 34501e04c3fSmrg fprintf(stderr, 34601e04c3fSmrg "perfcounter group %s: too many selected\n", 34701e04c3fSmrg block->basename); 34801e04c3fSmrg goto error; 34901e04c3fSmrg } 35001e04c3fSmrg group->selectors[group->num_counters] = sub_index; 35101e04c3fSmrg ++group->num_counters; 35201e04c3fSmrg } 35301e04c3fSmrg 35401e04c3fSmrg /* Compute result bases and CS size per group */ 35501e04c3fSmrg query->b.num_cs_dw_begin = pc->num_start_cs_dwords; 35601e04c3fSmrg query->b.num_cs_dw_end = pc->num_stop_cs_dwords; 35701e04c3fSmrg 35801e04c3fSmrg query->b.num_cs_dw_begin += pc->num_instance_cs_dwords; /* conservative */ 35901e04c3fSmrg query->b.num_cs_dw_end += pc->num_instance_cs_dwords; 36001e04c3fSmrg 36101e04c3fSmrg i = 0; 36201e04c3fSmrg for (group = query->groups; group; group = group->next) { 36301e04c3fSmrg struct r600_perfcounter_block *block = group->block; 36401e04c3fSmrg unsigned select_dw, read_dw; 36501e04c3fSmrg unsigned instances = 1; 36601e04c3fSmrg 36701e04c3fSmrg if ((block->flags & R600_PC_BLOCK_SE) && group->se < 0) 36801e04c3fSmrg instances = screen->info.max_se; 36901e04c3fSmrg if (group->instance < 0) 37001e04c3fSmrg instances *= block->num_instances; 37101e04c3fSmrg 37201e04c3fSmrg group->result_base = i; 37301e04c3fSmrg query->b.result_size += sizeof(uint64_t) * instances * group->num_counters; 37401e04c3fSmrg i += instances * group->num_counters; 37501e04c3fSmrg 37601e04c3fSmrg pc->get_size(block, group->num_counters, group->selectors, 37701e04c3fSmrg &select_dw, &read_dw); 37801e04c3fSmrg query->b.num_cs_dw_begin += select_dw; 37901e04c3fSmrg query->b.num_cs_dw_end += instances * read_dw; 38001e04c3fSmrg query->b.num_cs_dw_begin += pc->num_instance_cs_dwords; /* conservative */ 38101e04c3fSmrg query->b.num_cs_dw_end += instances * pc->num_instance_cs_dwords; 38201e04c3fSmrg } 38301e04c3fSmrg 38401e04c3fSmrg if (query->shaders) { 38501e04c3fSmrg if (query->shaders == R600_PC_SHADERS_WINDOWING) 38601e04c3fSmrg query->shaders = 0xffffffff; 38701e04c3fSmrg query->b.num_cs_dw_begin += pc->num_shaders_cs_dwords; 38801e04c3fSmrg } 38901e04c3fSmrg 39001e04c3fSmrg /* Map user-supplied query array to result indices */ 39101e04c3fSmrg query->counters = CALLOC(num_queries, sizeof(*query->counters)); 39201e04c3fSmrg for (i = 0; i < num_queries; ++i) { 39301e04c3fSmrg struct r600_pc_counter *counter = &query->counters[i]; 39401e04c3fSmrg struct r600_perfcounter_block *block; 39501e04c3fSmrg 39601e04c3fSmrg block = lookup_counter(pc, query_types[i] - R600_QUERY_FIRST_PERFCOUNTER, 39701e04c3fSmrg &base_gid, &sub_index); 39801e04c3fSmrg 39901e04c3fSmrg sub_gid = sub_index / block->num_selectors; 40001e04c3fSmrg sub_index = sub_index % block->num_selectors; 40101e04c3fSmrg 40201e04c3fSmrg group = get_group_state(screen, query, block, sub_gid); 40301e04c3fSmrg assert(group != NULL); 40401e04c3fSmrg 40501e04c3fSmrg for (j = 0; j < group->num_counters; ++j) { 40601e04c3fSmrg if (group->selectors[j] == sub_index) 40701e04c3fSmrg break; 40801e04c3fSmrg } 40901e04c3fSmrg 41001e04c3fSmrg counter->base = group->result_base + j; 41101e04c3fSmrg counter->stride = group->num_counters; 41201e04c3fSmrg 41301e04c3fSmrg counter->qwords = 1; 41401e04c3fSmrg if ((block->flags & R600_PC_BLOCK_SE) && group->se < 0) 41501e04c3fSmrg counter->qwords = screen->info.max_se; 41601e04c3fSmrg if (group->instance < 0) 41701e04c3fSmrg counter->qwords *= block->num_instances; 41801e04c3fSmrg } 41901e04c3fSmrg 42001e04c3fSmrg if (!r600_query_hw_init(screen, &query->b)) 42101e04c3fSmrg goto error; 42201e04c3fSmrg 42301e04c3fSmrg return (struct pipe_query *)query; 42401e04c3fSmrg 42501e04c3fSmrgerror: 42601e04c3fSmrg r600_pc_query_destroy(screen, &query->b.b); 42701e04c3fSmrg return NULL; 42801e04c3fSmrg} 42901e04c3fSmrg 43001e04c3fSmrgstatic bool r600_init_block_names(struct r600_common_screen *screen, 43101e04c3fSmrg struct r600_perfcounter_block *block) 43201e04c3fSmrg{ 43301e04c3fSmrg unsigned i, j, k; 43401e04c3fSmrg unsigned groups_shader = 1, groups_se = 1, groups_instance = 1; 43501e04c3fSmrg unsigned namelen; 43601e04c3fSmrg char *groupname; 43701e04c3fSmrg char *p; 43801e04c3fSmrg 43901e04c3fSmrg if (block->flags & R600_PC_BLOCK_INSTANCE_GROUPS) 44001e04c3fSmrg groups_instance = block->num_instances; 44101e04c3fSmrg if (block->flags & R600_PC_BLOCK_SE_GROUPS) 44201e04c3fSmrg groups_se = screen->info.max_se; 44301e04c3fSmrg if (block->flags & R600_PC_BLOCK_SHADER) 44401e04c3fSmrg groups_shader = screen->perfcounters->num_shader_types; 44501e04c3fSmrg 44601e04c3fSmrg namelen = strlen(block->basename); 44701e04c3fSmrg block->group_name_stride = namelen + 1; 44801e04c3fSmrg if (block->flags & R600_PC_BLOCK_SHADER) 44901e04c3fSmrg block->group_name_stride += 3; 45001e04c3fSmrg if (block->flags & R600_PC_BLOCK_SE_GROUPS) { 45101e04c3fSmrg assert(groups_se <= 10); 45201e04c3fSmrg block->group_name_stride += 1; 45301e04c3fSmrg 45401e04c3fSmrg if (block->flags & R600_PC_BLOCK_INSTANCE_GROUPS) 45501e04c3fSmrg block->group_name_stride += 1; 45601e04c3fSmrg } 45701e04c3fSmrg if (block->flags & R600_PC_BLOCK_INSTANCE_GROUPS) { 45801e04c3fSmrg assert(groups_instance <= 100); 45901e04c3fSmrg block->group_name_stride += 2; 46001e04c3fSmrg } 46101e04c3fSmrg 46201e04c3fSmrg block->group_names = MALLOC(block->num_groups * block->group_name_stride); 46301e04c3fSmrg if (!block->group_names) 46401e04c3fSmrg return false; 46501e04c3fSmrg 46601e04c3fSmrg groupname = block->group_names; 46701e04c3fSmrg for (i = 0; i < groups_shader; ++i) { 46801e04c3fSmrg const char *shader_suffix = screen->perfcounters->shader_type_suffixes[i]; 46901e04c3fSmrg unsigned shaderlen = strlen(shader_suffix); 47001e04c3fSmrg for (j = 0; j < groups_se; ++j) { 47101e04c3fSmrg for (k = 0; k < groups_instance; ++k) { 47201e04c3fSmrg strcpy(groupname, block->basename); 47301e04c3fSmrg p = groupname + namelen; 47401e04c3fSmrg 47501e04c3fSmrg if (block->flags & R600_PC_BLOCK_SHADER) { 47601e04c3fSmrg strcpy(p, shader_suffix); 47701e04c3fSmrg p += shaderlen; 47801e04c3fSmrg } 47901e04c3fSmrg 48001e04c3fSmrg if (block->flags & R600_PC_BLOCK_SE_GROUPS) { 48101e04c3fSmrg p += sprintf(p, "%d", j); 48201e04c3fSmrg if (block->flags & R600_PC_BLOCK_INSTANCE_GROUPS) 48301e04c3fSmrg *p++ = '_'; 48401e04c3fSmrg } 48501e04c3fSmrg 48601e04c3fSmrg if (block->flags & R600_PC_BLOCK_INSTANCE_GROUPS) 48701e04c3fSmrg p += sprintf(p, "%d", k); 48801e04c3fSmrg 48901e04c3fSmrg groupname += block->group_name_stride; 49001e04c3fSmrg } 49101e04c3fSmrg } 49201e04c3fSmrg } 49301e04c3fSmrg 49401e04c3fSmrg assert(block->num_selectors <= 1000); 49501e04c3fSmrg block->selector_name_stride = block->group_name_stride + 4; 49601e04c3fSmrg block->selector_names = MALLOC(block->num_groups * block->num_selectors * 49701e04c3fSmrg block->selector_name_stride); 49801e04c3fSmrg if (!block->selector_names) 49901e04c3fSmrg return false; 50001e04c3fSmrg 50101e04c3fSmrg groupname = block->group_names; 50201e04c3fSmrg p = block->selector_names; 50301e04c3fSmrg for (i = 0; i < block->num_groups; ++i) { 50401e04c3fSmrg for (j = 0; j < block->num_selectors; ++j) { 50501e04c3fSmrg sprintf(p, "%s_%03d", groupname, j); 50601e04c3fSmrg p += block->selector_name_stride; 50701e04c3fSmrg } 50801e04c3fSmrg groupname += block->group_name_stride; 50901e04c3fSmrg } 51001e04c3fSmrg 51101e04c3fSmrg return true; 51201e04c3fSmrg} 51301e04c3fSmrg 51401e04c3fSmrgint r600_get_perfcounter_info(struct r600_common_screen *screen, 51501e04c3fSmrg unsigned index, 51601e04c3fSmrg struct pipe_driver_query_info *info) 51701e04c3fSmrg{ 51801e04c3fSmrg struct r600_perfcounters *pc = screen->perfcounters; 51901e04c3fSmrg struct r600_perfcounter_block *block; 52001e04c3fSmrg unsigned base_gid, sub; 52101e04c3fSmrg 52201e04c3fSmrg if (!pc) 52301e04c3fSmrg return 0; 52401e04c3fSmrg 52501e04c3fSmrg if (!info) { 52601e04c3fSmrg unsigned bid, num_queries = 0; 52701e04c3fSmrg 52801e04c3fSmrg for (bid = 0; bid < pc->num_blocks; ++bid) { 52901e04c3fSmrg num_queries += pc->blocks[bid].num_selectors * 53001e04c3fSmrg pc->blocks[bid].num_groups; 53101e04c3fSmrg } 53201e04c3fSmrg 53301e04c3fSmrg return num_queries; 53401e04c3fSmrg } 53501e04c3fSmrg 53601e04c3fSmrg block = lookup_counter(pc, index, &base_gid, &sub); 53701e04c3fSmrg if (!block) 53801e04c3fSmrg return 0; 53901e04c3fSmrg 54001e04c3fSmrg if (!block->selector_names) { 54101e04c3fSmrg if (!r600_init_block_names(screen, block)) 54201e04c3fSmrg return 0; 54301e04c3fSmrg } 54401e04c3fSmrg info->name = block->selector_names + sub * block->selector_name_stride; 54501e04c3fSmrg info->query_type = R600_QUERY_FIRST_PERFCOUNTER + index; 54601e04c3fSmrg info->max_value.u64 = 0; 54701e04c3fSmrg info->type = PIPE_DRIVER_QUERY_TYPE_UINT64; 54801e04c3fSmrg info->result_type = PIPE_DRIVER_QUERY_RESULT_TYPE_AVERAGE; 54901e04c3fSmrg info->group_id = base_gid + sub / block->num_selectors; 55001e04c3fSmrg info->flags = PIPE_DRIVER_QUERY_FLAG_BATCH; 55101e04c3fSmrg if (sub > 0 && sub + 1 < block->num_selectors * block->num_groups) 55201e04c3fSmrg info->flags |= PIPE_DRIVER_QUERY_FLAG_DONT_LIST; 55301e04c3fSmrg return 1; 55401e04c3fSmrg} 55501e04c3fSmrg 55601e04c3fSmrgint r600_get_perfcounter_group_info(struct r600_common_screen *screen, 55701e04c3fSmrg unsigned index, 55801e04c3fSmrg struct pipe_driver_query_group_info *info) 55901e04c3fSmrg{ 56001e04c3fSmrg struct r600_perfcounters *pc = screen->perfcounters; 56101e04c3fSmrg struct r600_perfcounter_block *block; 56201e04c3fSmrg 56301e04c3fSmrg if (!pc) 56401e04c3fSmrg return 0; 56501e04c3fSmrg 56601e04c3fSmrg if (!info) 56701e04c3fSmrg return pc->num_groups; 56801e04c3fSmrg 56901e04c3fSmrg block = lookup_group(pc, &index); 57001e04c3fSmrg if (!block) 57101e04c3fSmrg return 0; 57201e04c3fSmrg 57301e04c3fSmrg if (!block->group_names) { 57401e04c3fSmrg if (!r600_init_block_names(screen, block)) 57501e04c3fSmrg return 0; 57601e04c3fSmrg } 57701e04c3fSmrg info->name = block->group_names + index * block->group_name_stride; 57801e04c3fSmrg info->num_queries = block->num_selectors; 57901e04c3fSmrg info->max_active_queries = block->num_counters; 58001e04c3fSmrg return 1; 58101e04c3fSmrg} 58201e04c3fSmrg 58301e04c3fSmrgvoid r600_perfcounters_destroy(struct r600_common_screen *rscreen) 58401e04c3fSmrg{ 58501e04c3fSmrg if (rscreen->perfcounters) 58601e04c3fSmrg rscreen->perfcounters->cleanup(rscreen); 58701e04c3fSmrg} 58801e04c3fSmrg 58901e04c3fSmrgbool r600_perfcounters_init(struct r600_perfcounters *pc, 59001e04c3fSmrg unsigned num_blocks) 59101e04c3fSmrg{ 59201e04c3fSmrg pc->blocks = CALLOC(num_blocks, sizeof(struct r600_perfcounter_block)); 59301e04c3fSmrg if (!pc->blocks) 59401e04c3fSmrg return false; 59501e04c3fSmrg 59601e04c3fSmrg pc->separate_se = debug_get_bool_option("RADEON_PC_SEPARATE_SE", false); 59701e04c3fSmrg pc->separate_instance = debug_get_bool_option("RADEON_PC_SEPARATE_INSTANCE", false); 59801e04c3fSmrg 59901e04c3fSmrg return true; 60001e04c3fSmrg} 60101e04c3fSmrg 60201e04c3fSmrgvoid r600_perfcounters_add_block(struct r600_common_screen *rscreen, 60301e04c3fSmrg struct r600_perfcounters *pc, 60401e04c3fSmrg const char *name, unsigned flags, 60501e04c3fSmrg unsigned counters, unsigned selectors, 60601e04c3fSmrg unsigned instances, void *data) 60701e04c3fSmrg{ 60801e04c3fSmrg struct r600_perfcounter_block *block = &pc->blocks[pc->num_blocks]; 60901e04c3fSmrg 61001e04c3fSmrg assert(counters <= R600_QUERY_MAX_COUNTERS); 61101e04c3fSmrg 61201e04c3fSmrg block->basename = name; 61301e04c3fSmrg block->flags = flags; 61401e04c3fSmrg block->num_counters = counters; 61501e04c3fSmrg block->num_selectors = selectors; 61601e04c3fSmrg block->num_instances = MAX2(instances, 1); 61701e04c3fSmrg block->data = data; 61801e04c3fSmrg 61901e04c3fSmrg if (pc->separate_se && (block->flags & R600_PC_BLOCK_SE)) 62001e04c3fSmrg block->flags |= R600_PC_BLOCK_SE_GROUPS; 62101e04c3fSmrg if (pc->separate_instance && block->num_instances > 1) 62201e04c3fSmrg block->flags |= R600_PC_BLOCK_INSTANCE_GROUPS; 62301e04c3fSmrg 62401e04c3fSmrg if (block->flags & R600_PC_BLOCK_INSTANCE_GROUPS) { 62501e04c3fSmrg block->num_groups = block->num_instances; 62601e04c3fSmrg } else { 62701e04c3fSmrg block->num_groups = 1; 62801e04c3fSmrg } 62901e04c3fSmrg 63001e04c3fSmrg if (block->flags & R600_PC_BLOCK_SE_GROUPS) 63101e04c3fSmrg block->num_groups *= rscreen->info.max_se; 63201e04c3fSmrg if (block->flags & R600_PC_BLOCK_SHADER) 63301e04c3fSmrg block->num_groups *= pc->num_shader_types; 63401e04c3fSmrg 63501e04c3fSmrg ++pc->num_blocks; 63601e04c3fSmrg pc->num_groups += block->num_groups; 63701e04c3fSmrg} 63801e04c3fSmrg 63901e04c3fSmrgvoid r600_perfcounters_do_destroy(struct r600_perfcounters *pc) 64001e04c3fSmrg{ 64101e04c3fSmrg unsigned i; 64201e04c3fSmrg 64301e04c3fSmrg for (i = 0; i < pc->num_blocks; ++i) { 64401e04c3fSmrg FREE(pc->blocks[i].group_names); 64501e04c3fSmrg FREE(pc->blocks[i].selector_names); 64601e04c3fSmrg } 64701e04c3fSmrg FREE(pc->blocks); 64801e04c3fSmrg FREE(pc); 64901e04c3fSmrg} 650