101e04c3fSmrg/*
201e04c3fSmrg * Copyright 2015 Advanced Micro Devices, Inc.
301e04c3fSmrg *
401e04c3fSmrg * Permission is hereby granted, free of charge, to any person obtaining a
501e04c3fSmrg * copy of this software and associated documentation files (the "Software"),
601e04c3fSmrg * to deal in the Software without restriction, including without limitation
701e04c3fSmrg * the rights to use, copy, modify, merge, publish, distribute, sublicense,
801e04c3fSmrg * and/or sell copies of the Software, and to permit persons to whom the
901e04c3fSmrg * Software is furnished to do so, subject to the following conditions:
1001e04c3fSmrg *
1101e04c3fSmrg * The above copyright notice and this permission notice (including the next
1201e04c3fSmrg * paragraph) shall be included in all copies or substantial portions of the
1301e04c3fSmrg * Software.
1401e04c3fSmrg *
1501e04c3fSmrg * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
1601e04c3fSmrg * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
1701e04c3fSmrg * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT.  IN NO EVENT SHALL
1801e04c3fSmrg * THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
1901e04c3fSmrg * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
2001e04c3fSmrg * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
2101e04c3fSmrg * SOFTWARE.
2201e04c3fSmrg *
2301e04c3fSmrg * Authors:
2401e04c3fSmrg *  Nicolai Hähnle <nicolai.haehnle@amd.com>
2501e04c3fSmrg *
2601e04c3fSmrg */
2701e04c3fSmrg
2801e04c3fSmrg#include "util/u_memory.h"
2901e04c3fSmrg#include "r600_query.h"
3001e04c3fSmrg#include "r600_pipe_common.h"
3101e04c3fSmrg#include "r600d_common.h"
3201e04c3fSmrg
3301e04c3fSmrg/* Max counters per HW block */
3401e04c3fSmrg#define R600_QUERY_MAX_COUNTERS 16
3501e04c3fSmrg
3601e04c3fSmrgstatic struct r600_perfcounter_block *
3701e04c3fSmrglookup_counter(struct r600_perfcounters *pc, unsigned index,
3801e04c3fSmrg	       unsigned *base_gid, unsigned *sub_index)
3901e04c3fSmrg{
4001e04c3fSmrg	struct r600_perfcounter_block *block = pc->blocks;
4101e04c3fSmrg	unsigned bid;
4201e04c3fSmrg
4301e04c3fSmrg	*base_gid = 0;
4401e04c3fSmrg	for (bid = 0; bid < pc->num_blocks; ++bid, ++block) {
4501e04c3fSmrg		unsigned total = block->num_groups * block->num_selectors;
4601e04c3fSmrg
4701e04c3fSmrg		if (index < total) {
4801e04c3fSmrg			*sub_index = index;
4901e04c3fSmrg			return block;
5001e04c3fSmrg		}
5101e04c3fSmrg
5201e04c3fSmrg		index -= total;
5301e04c3fSmrg		*base_gid += block->num_groups;
5401e04c3fSmrg	}
5501e04c3fSmrg
5601e04c3fSmrg	return NULL;
5701e04c3fSmrg}
5801e04c3fSmrg
5901e04c3fSmrgstatic struct r600_perfcounter_block *
6001e04c3fSmrglookup_group(struct r600_perfcounters *pc, unsigned *index)
6101e04c3fSmrg{
6201e04c3fSmrg	unsigned bid;
6301e04c3fSmrg	struct r600_perfcounter_block *block = pc->blocks;
6401e04c3fSmrg
6501e04c3fSmrg	for (bid = 0; bid < pc->num_blocks; ++bid, ++block) {
6601e04c3fSmrg		if (*index < block->num_groups)
6701e04c3fSmrg			return block;
6801e04c3fSmrg		*index -= block->num_groups;
6901e04c3fSmrg	}
7001e04c3fSmrg
7101e04c3fSmrg	return NULL;
7201e04c3fSmrg}
7301e04c3fSmrg
7401e04c3fSmrgstruct r600_pc_group {
7501e04c3fSmrg	struct r600_pc_group *next;
7601e04c3fSmrg	struct r600_perfcounter_block *block;
7701e04c3fSmrg	unsigned sub_gid; /* only used during init */
7801e04c3fSmrg	unsigned result_base; /* only used during init */
7901e04c3fSmrg	int se;
8001e04c3fSmrg	int instance;
8101e04c3fSmrg	unsigned num_counters;
8201e04c3fSmrg	unsigned selectors[R600_QUERY_MAX_COUNTERS];
8301e04c3fSmrg};
8401e04c3fSmrg
8501e04c3fSmrgstruct r600_pc_counter {
8601e04c3fSmrg	unsigned base;
8701e04c3fSmrg	unsigned qwords;
8801e04c3fSmrg	unsigned stride; /* in uint64s */
8901e04c3fSmrg};
9001e04c3fSmrg
9101e04c3fSmrg#define R600_PC_SHADERS_WINDOWING (1 << 31)
9201e04c3fSmrg
9301e04c3fSmrgstruct r600_query_pc {
9401e04c3fSmrg	struct r600_query_hw b;
9501e04c3fSmrg
9601e04c3fSmrg	unsigned shaders;
9701e04c3fSmrg	unsigned num_counters;
9801e04c3fSmrg	struct r600_pc_counter *counters;
9901e04c3fSmrg	struct r600_pc_group *groups;
10001e04c3fSmrg};
10101e04c3fSmrg
10201e04c3fSmrgstatic void r600_pc_query_destroy(struct r600_common_screen *rscreen,
10301e04c3fSmrg				  struct r600_query *rquery)
10401e04c3fSmrg{
10501e04c3fSmrg	struct r600_query_pc *query = (struct r600_query_pc *)rquery;
10601e04c3fSmrg
10701e04c3fSmrg	while (query->groups) {
10801e04c3fSmrg		struct r600_pc_group *group = query->groups;
10901e04c3fSmrg		query->groups = group->next;
11001e04c3fSmrg		FREE(group);
11101e04c3fSmrg	}
11201e04c3fSmrg
11301e04c3fSmrg	FREE(query->counters);
11401e04c3fSmrg
11501e04c3fSmrg	r600_query_hw_destroy(rscreen, rquery);
11601e04c3fSmrg}
11701e04c3fSmrg
11801e04c3fSmrgstatic bool r600_pc_query_prepare_buffer(struct r600_common_screen *screen,
11901e04c3fSmrg					 struct r600_query_hw *hwquery,
12001e04c3fSmrg					 struct r600_resource *buffer)
12101e04c3fSmrg{
12201e04c3fSmrg	/* no-op */
12301e04c3fSmrg	return true;
12401e04c3fSmrg}
12501e04c3fSmrg
12601e04c3fSmrgstatic void r600_pc_query_emit_start(struct r600_common_context *ctx,
12701e04c3fSmrg				     struct r600_query_hw *hwquery,
12801e04c3fSmrg				     struct r600_resource *buffer, uint64_t va)
12901e04c3fSmrg{
13001e04c3fSmrg	struct r600_perfcounters *pc = ctx->screen->perfcounters;
13101e04c3fSmrg	struct r600_query_pc *query = (struct r600_query_pc *)hwquery;
13201e04c3fSmrg	struct r600_pc_group *group;
13301e04c3fSmrg	int current_se = -1;
13401e04c3fSmrg	int current_instance = -1;
13501e04c3fSmrg
13601e04c3fSmrg	if (query->shaders)
13701e04c3fSmrg		pc->emit_shaders(ctx, query->shaders);
13801e04c3fSmrg
13901e04c3fSmrg	for (group = query->groups; group; group = group->next) {
14001e04c3fSmrg		struct r600_perfcounter_block *block = group->block;
14101e04c3fSmrg
14201e04c3fSmrg		if (group->se != current_se || group->instance != current_instance) {
14301e04c3fSmrg			current_se = group->se;
14401e04c3fSmrg			current_instance = group->instance;
14501e04c3fSmrg			pc->emit_instance(ctx, group->se, group->instance);
14601e04c3fSmrg		}
14701e04c3fSmrg
14801e04c3fSmrg		pc->emit_select(ctx, block, group->num_counters, group->selectors);
14901e04c3fSmrg	}
15001e04c3fSmrg
15101e04c3fSmrg	if (current_se != -1 || current_instance != -1)
15201e04c3fSmrg		pc->emit_instance(ctx, -1, -1);
15301e04c3fSmrg
15401e04c3fSmrg	pc->emit_start(ctx, buffer, va);
15501e04c3fSmrg}
15601e04c3fSmrg
15701e04c3fSmrgstatic void r600_pc_query_emit_stop(struct r600_common_context *ctx,
15801e04c3fSmrg				    struct r600_query_hw *hwquery,
15901e04c3fSmrg				    struct r600_resource *buffer, uint64_t va)
16001e04c3fSmrg{
16101e04c3fSmrg	struct r600_perfcounters *pc = ctx->screen->perfcounters;
16201e04c3fSmrg	struct r600_query_pc *query = (struct r600_query_pc *)hwquery;
16301e04c3fSmrg	struct r600_pc_group *group;
16401e04c3fSmrg
16501e04c3fSmrg	pc->emit_stop(ctx, buffer, va);
16601e04c3fSmrg
16701e04c3fSmrg	for (group = query->groups; group; group = group->next) {
16801e04c3fSmrg		struct r600_perfcounter_block *block = group->block;
16901e04c3fSmrg		unsigned se = group->se >= 0 ? group->se : 0;
17001e04c3fSmrg		unsigned se_end = se + 1;
17101e04c3fSmrg
17201e04c3fSmrg		if ((block->flags & R600_PC_BLOCK_SE) && (group->se < 0))
17301e04c3fSmrg			se_end = ctx->screen->info.max_se;
17401e04c3fSmrg
17501e04c3fSmrg		do {
17601e04c3fSmrg			unsigned instance = group->instance >= 0 ? group->instance : 0;
17701e04c3fSmrg
17801e04c3fSmrg			do {
17901e04c3fSmrg				pc->emit_instance(ctx, se, instance);
18001e04c3fSmrg				pc->emit_read(ctx, block,
18101e04c3fSmrg					      group->num_counters, group->selectors,
18201e04c3fSmrg					      buffer, va);
18301e04c3fSmrg				va += sizeof(uint64_t) * group->num_counters;
18401e04c3fSmrg			} while (group->instance < 0 && ++instance < block->num_instances);
18501e04c3fSmrg		} while (++se < se_end);
18601e04c3fSmrg	}
18701e04c3fSmrg
18801e04c3fSmrg	pc->emit_instance(ctx, -1, -1);
18901e04c3fSmrg}
19001e04c3fSmrg
19101e04c3fSmrgstatic void r600_pc_query_clear_result(struct r600_query_hw *hwquery,
19201e04c3fSmrg				       union pipe_query_result *result)
19301e04c3fSmrg{
19401e04c3fSmrg	struct r600_query_pc *query = (struct r600_query_pc *)hwquery;
19501e04c3fSmrg
19601e04c3fSmrg	memset(result, 0, sizeof(result->batch[0]) * query->num_counters);
19701e04c3fSmrg}
19801e04c3fSmrg
19901e04c3fSmrgstatic void r600_pc_query_add_result(struct r600_common_screen *rscreen,
20001e04c3fSmrg				     struct r600_query_hw *hwquery,
20101e04c3fSmrg				     void *buffer,
20201e04c3fSmrg				     union pipe_query_result *result)
20301e04c3fSmrg{
20401e04c3fSmrg	struct r600_query_pc *query = (struct r600_query_pc *)hwquery;
20501e04c3fSmrg	uint64_t *results = buffer;
20601e04c3fSmrg	unsigned i, j;
20701e04c3fSmrg
20801e04c3fSmrg	for (i = 0; i < query->num_counters; ++i) {
20901e04c3fSmrg		struct r600_pc_counter *counter = &query->counters[i];
21001e04c3fSmrg
21101e04c3fSmrg		for (j = 0; j < counter->qwords; ++j) {
21201e04c3fSmrg			uint32_t value = results[counter->base + j * counter->stride];
21301e04c3fSmrg			result->batch[i].u64 += value;
21401e04c3fSmrg		}
21501e04c3fSmrg	}
21601e04c3fSmrg}
21701e04c3fSmrg
21801e04c3fSmrgstatic struct r600_query_ops batch_query_ops = {
21901e04c3fSmrg	.destroy = r600_pc_query_destroy,
22001e04c3fSmrg	.begin = r600_query_hw_begin,
22101e04c3fSmrg	.end = r600_query_hw_end,
22201e04c3fSmrg	.get_result = r600_query_hw_get_result
22301e04c3fSmrg};
22401e04c3fSmrg
22501e04c3fSmrgstatic struct r600_query_hw_ops batch_query_hw_ops = {
22601e04c3fSmrg	.prepare_buffer = r600_pc_query_prepare_buffer,
22701e04c3fSmrg	.emit_start = r600_pc_query_emit_start,
22801e04c3fSmrg	.emit_stop = r600_pc_query_emit_stop,
22901e04c3fSmrg	.clear_result = r600_pc_query_clear_result,
23001e04c3fSmrg	.add_result = r600_pc_query_add_result,
23101e04c3fSmrg};
23201e04c3fSmrg
23301e04c3fSmrgstatic struct r600_pc_group *get_group_state(struct r600_common_screen *screen,
23401e04c3fSmrg					     struct r600_query_pc *query,
23501e04c3fSmrg					     struct r600_perfcounter_block *block,
23601e04c3fSmrg					     unsigned sub_gid)
23701e04c3fSmrg{
23801e04c3fSmrg	struct r600_pc_group *group = query->groups;
23901e04c3fSmrg
24001e04c3fSmrg	while (group) {
24101e04c3fSmrg		if (group->block == block && group->sub_gid == sub_gid)
24201e04c3fSmrg			return group;
24301e04c3fSmrg		group = group->next;
24401e04c3fSmrg	}
24501e04c3fSmrg
24601e04c3fSmrg	group = CALLOC_STRUCT(r600_pc_group);
24701e04c3fSmrg	if (!group)
24801e04c3fSmrg		return NULL;
24901e04c3fSmrg
25001e04c3fSmrg	group->block = block;
25101e04c3fSmrg	group->sub_gid = sub_gid;
25201e04c3fSmrg
25301e04c3fSmrg	if (block->flags & R600_PC_BLOCK_SHADER) {
25401e04c3fSmrg		unsigned sub_gids = block->num_instances;
25501e04c3fSmrg		unsigned shader_id;
25601e04c3fSmrg		unsigned shaders;
25701e04c3fSmrg		unsigned query_shaders;
25801e04c3fSmrg
25901e04c3fSmrg		if (block->flags & R600_PC_BLOCK_SE_GROUPS)
26001e04c3fSmrg			sub_gids = sub_gids * screen->info.max_se;
26101e04c3fSmrg		shader_id = sub_gid / sub_gids;
26201e04c3fSmrg		sub_gid = sub_gid % sub_gids;
26301e04c3fSmrg
26401e04c3fSmrg		shaders = screen->perfcounters->shader_type_bits[shader_id];
26501e04c3fSmrg
26601e04c3fSmrg		query_shaders = query->shaders & ~R600_PC_SHADERS_WINDOWING;
26701e04c3fSmrg		if (query_shaders && query_shaders != shaders) {
26801e04c3fSmrg			fprintf(stderr, "r600_perfcounter: incompatible shader groups\n");
26901e04c3fSmrg			FREE(group);
27001e04c3fSmrg			return NULL;
27101e04c3fSmrg		}
27201e04c3fSmrg		query->shaders = shaders;
27301e04c3fSmrg	}
27401e04c3fSmrg
27501e04c3fSmrg	if (block->flags & R600_PC_BLOCK_SHADER_WINDOWED && !query->shaders) {
27601e04c3fSmrg		// A non-zero value in query->shaders ensures that the shader
27701e04c3fSmrg		// masking is reset unless the user explicitly requests one.
27801e04c3fSmrg		query->shaders = R600_PC_SHADERS_WINDOWING;
27901e04c3fSmrg	}
28001e04c3fSmrg
28101e04c3fSmrg	if (block->flags & R600_PC_BLOCK_SE_GROUPS) {
28201e04c3fSmrg		group->se = sub_gid / block->num_instances;
28301e04c3fSmrg		sub_gid = sub_gid % block->num_instances;
28401e04c3fSmrg	} else {
28501e04c3fSmrg		group->se = -1;
28601e04c3fSmrg	}
28701e04c3fSmrg
28801e04c3fSmrg	if (block->flags & R600_PC_BLOCK_INSTANCE_GROUPS) {
28901e04c3fSmrg		group->instance = sub_gid;
29001e04c3fSmrg	} else {
29101e04c3fSmrg		group->instance = -1;
29201e04c3fSmrg	}
29301e04c3fSmrg
29401e04c3fSmrg	group->next = query->groups;
29501e04c3fSmrg	query->groups = group;
29601e04c3fSmrg
29701e04c3fSmrg	return group;
29801e04c3fSmrg}
29901e04c3fSmrg
30001e04c3fSmrgstruct pipe_query *r600_create_batch_query(struct pipe_context *ctx,
30101e04c3fSmrg					   unsigned num_queries,
30201e04c3fSmrg					   unsigned *query_types)
30301e04c3fSmrg{
30401e04c3fSmrg	struct r600_common_screen *screen =
30501e04c3fSmrg		(struct r600_common_screen *)ctx->screen;
30601e04c3fSmrg	struct r600_perfcounters *pc = screen->perfcounters;
30701e04c3fSmrg	struct r600_perfcounter_block *block;
30801e04c3fSmrg	struct r600_pc_group *group;
30901e04c3fSmrg	struct r600_query_pc *query;
31001e04c3fSmrg	unsigned base_gid, sub_gid, sub_index;
31101e04c3fSmrg	unsigned i, j;
31201e04c3fSmrg
31301e04c3fSmrg	if (!pc)
31401e04c3fSmrg		return NULL;
31501e04c3fSmrg
31601e04c3fSmrg	query = CALLOC_STRUCT(r600_query_pc);
31701e04c3fSmrg	if (!query)
31801e04c3fSmrg		return NULL;
31901e04c3fSmrg
32001e04c3fSmrg	query->b.b.ops = &batch_query_ops;
32101e04c3fSmrg	query->b.ops = &batch_query_hw_ops;
32201e04c3fSmrg
32301e04c3fSmrg	query->num_counters = num_queries;
32401e04c3fSmrg
32501e04c3fSmrg	/* Collect selectors per group */
32601e04c3fSmrg	for (i = 0; i < num_queries; ++i) {
32701e04c3fSmrg		unsigned sub_gid;
32801e04c3fSmrg
32901e04c3fSmrg		if (query_types[i] < R600_QUERY_FIRST_PERFCOUNTER)
33001e04c3fSmrg			goto error;
33101e04c3fSmrg
33201e04c3fSmrg		block = lookup_counter(pc, query_types[i] - R600_QUERY_FIRST_PERFCOUNTER,
33301e04c3fSmrg				       &base_gid, &sub_index);
33401e04c3fSmrg		if (!block)
33501e04c3fSmrg			goto error;
33601e04c3fSmrg
33701e04c3fSmrg		sub_gid = sub_index / block->num_selectors;
33801e04c3fSmrg		sub_index = sub_index % block->num_selectors;
33901e04c3fSmrg
34001e04c3fSmrg		group = get_group_state(screen, query, block, sub_gid);
34101e04c3fSmrg		if (!group)
34201e04c3fSmrg			goto error;
34301e04c3fSmrg
34401e04c3fSmrg		if (group->num_counters >= block->num_counters) {
34501e04c3fSmrg			fprintf(stderr,
34601e04c3fSmrg				"perfcounter group %s: too many selected\n",
34701e04c3fSmrg				block->basename);
34801e04c3fSmrg			goto error;
34901e04c3fSmrg		}
35001e04c3fSmrg		group->selectors[group->num_counters] = sub_index;
35101e04c3fSmrg		++group->num_counters;
35201e04c3fSmrg	}
35301e04c3fSmrg
35401e04c3fSmrg	/* Compute result bases and CS size per group */
35501e04c3fSmrg	query->b.num_cs_dw_begin = pc->num_start_cs_dwords;
35601e04c3fSmrg	query->b.num_cs_dw_end = pc->num_stop_cs_dwords;
35701e04c3fSmrg
35801e04c3fSmrg	query->b.num_cs_dw_begin += pc->num_instance_cs_dwords; /* conservative */
35901e04c3fSmrg	query->b.num_cs_dw_end += pc->num_instance_cs_dwords;
36001e04c3fSmrg
36101e04c3fSmrg	i = 0;
36201e04c3fSmrg	for (group = query->groups; group; group = group->next) {
36301e04c3fSmrg		struct r600_perfcounter_block *block = group->block;
36401e04c3fSmrg		unsigned select_dw, read_dw;
36501e04c3fSmrg		unsigned instances = 1;
36601e04c3fSmrg
36701e04c3fSmrg		if ((block->flags & R600_PC_BLOCK_SE) && group->se < 0)
36801e04c3fSmrg			instances = screen->info.max_se;
36901e04c3fSmrg		if (group->instance < 0)
37001e04c3fSmrg			instances *= block->num_instances;
37101e04c3fSmrg
37201e04c3fSmrg		group->result_base = i;
37301e04c3fSmrg		query->b.result_size += sizeof(uint64_t) * instances * group->num_counters;
37401e04c3fSmrg		i += instances * group->num_counters;
37501e04c3fSmrg
37601e04c3fSmrg		pc->get_size(block, group->num_counters, group->selectors,
37701e04c3fSmrg			     &select_dw, &read_dw);
37801e04c3fSmrg		query->b.num_cs_dw_begin += select_dw;
37901e04c3fSmrg		query->b.num_cs_dw_end += instances * read_dw;
38001e04c3fSmrg		query->b.num_cs_dw_begin += pc->num_instance_cs_dwords; /* conservative */
38101e04c3fSmrg		query->b.num_cs_dw_end += instances * pc->num_instance_cs_dwords;
38201e04c3fSmrg	}
38301e04c3fSmrg
38401e04c3fSmrg	if (query->shaders) {
38501e04c3fSmrg		if (query->shaders == R600_PC_SHADERS_WINDOWING)
38601e04c3fSmrg			query->shaders = 0xffffffff;
38701e04c3fSmrg		query->b.num_cs_dw_begin += pc->num_shaders_cs_dwords;
38801e04c3fSmrg	}
38901e04c3fSmrg
39001e04c3fSmrg	/* Map user-supplied query array to result indices */
39101e04c3fSmrg	query->counters = CALLOC(num_queries, sizeof(*query->counters));
39201e04c3fSmrg	for (i = 0; i < num_queries; ++i) {
39301e04c3fSmrg		struct r600_pc_counter *counter = &query->counters[i];
39401e04c3fSmrg		struct r600_perfcounter_block *block;
39501e04c3fSmrg
39601e04c3fSmrg		block = lookup_counter(pc, query_types[i] - R600_QUERY_FIRST_PERFCOUNTER,
39701e04c3fSmrg				       &base_gid, &sub_index);
39801e04c3fSmrg
39901e04c3fSmrg		sub_gid = sub_index / block->num_selectors;
40001e04c3fSmrg		sub_index = sub_index % block->num_selectors;
40101e04c3fSmrg
40201e04c3fSmrg		group = get_group_state(screen, query, block, sub_gid);
40301e04c3fSmrg		assert(group != NULL);
40401e04c3fSmrg
40501e04c3fSmrg		for (j = 0; j < group->num_counters; ++j) {
40601e04c3fSmrg			if (group->selectors[j] == sub_index)
40701e04c3fSmrg				break;
40801e04c3fSmrg		}
40901e04c3fSmrg
41001e04c3fSmrg		counter->base = group->result_base + j;
41101e04c3fSmrg		counter->stride = group->num_counters;
41201e04c3fSmrg
41301e04c3fSmrg		counter->qwords = 1;
41401e04c3fSmrg		if ((block->flags & R600_PC_BLOCK_SE) && group->se < 0)
41501e04c3fSmrg			counter->qwords = screen->info.max_se;
41601e04c3fSmrg		if (group->instance < 0)
41701e04c3fSmrg			counter->qwords *= block->num_instances;
41801e04c3fSmrg	}
41901e04c3fSmrg
42001e04c3fSmrg	if (!r600_query_hw_init(screen, &query->b))
42101e04c3fSmrg		goto error;
42201e04c3fSmrg
42301e04c3fSmrg	return (struct pipe_query *)query;
42401e04c3fSmrg
42501e04c3fSmrgerror:
42601e04c3fSmrg	r600_pc_query_destroy(screen, &query->b.b);
42701e04c3fSmrg	return NULL;
42801e04c3fSmrg}
42901e04c3fSmrg
43001e04c3fSmrgstatic bool r600_init_block_names(struct r600_common_screen *screen,
43101e04c3fSmrg				  struct r600_perfcounter_block *block)
43201e04c3fSmrg{
43301e04c3fSmrg	unsigned i, j, k;
43401e04c3fSmrg	unsigned groups_shader = 1, groups_se = 1, groups_instance = 1;
43501e04c3fSmrg	unsigned namelen;
43601e04c3fSmrg	char *groupname;
43701e04c3fSmrg	char *p;
43801e04c3fSmrg
43901e04c3fSmrg	if (block->flags & R600_PC_BLOCK_INSTANCE_GROUPS)
44001e04c3fSmrg		groups_instance = block->num_instances;
44101e04c3fSmrg	if (block->flags & R600_PC_BLOCK_SE_GROUPS)
44201e04c3fSmrg		groups_se = screen->info.max_se;
44301e04c3fSmrg	if (block->flags & R600_PC_BLOCK_SHADER)
44401e04c3fSmrg		groups_shader = screen->perfcounters->num_shader_types;
44501e04c3fSmrg
44601e04c3fSmrg	namelen = strlen(block->basename);
44701e04c3fSmrg	block->group_name_stride = namelen + 1;
44801e04c3fSmrg	if (block->flags & R600_PC_BLOCK_SHADER)
44901e04c3fSmrg		block->group_name_stride += 3;
45001e04c3fSmrg	if (block->flags & R600_PC_BLOCK_SE_GROUPS) {
45101e04c3fSmrg		assert(groups_se <= 10);
45201e04c3fSmrg		block->group_name_stride += 1;
45301e04c3fSmrg
45401e04c3fSmrg		if (block->flags & R600_PC_BLOCK_INSTANCE_GROUPS)
45501e04c3fSmrg			block->group_name_stride += 1;
45601e04c3fSmrg	}
45701e04c3fSmrg	if (block->flags & R600_PC_BLOCK_INSTANCE_GROUPS) {
45801e04c3fSmrg		assert(groups_instance <= 100);
45901e04c3fSmrg		block->group_name_stride += 2;
46001e04c3fSmrg	}
46101e04c3fSmrg
46201e04c3fSmrg	block->group_names = MALLOC(block->num_groups * block->group_name_stride);
46301e04c3fSmrg	if (!block->group_names)
46401e04c3fSmrg		return false;
46501e04c3fSmrg
46601e04c3fSmrg	groupname = block->group_names;
46701e04c3fSmrg	for (i = 0; i < groups_shader; ++i) {
46801e04c3fSmrg		const char *shader_suffix = screen->perfcounters->shader_type_suffixes[i];
46901e04c3fSmrg		unsigned shaderlen = strlen(shader_suffix);
47001e04c3fSmrg		for (j = 0; j < groups_se; ++j) {
47101e04c3fSmrg			for (k = 0; k < groups_instance; ++k) {
47201e04c3fSmrg				strcpy(groupname, block->basename);
47301e04c3fSmrg				p = groupname + namelen;
47401e04c3fSmrg
47501e04c3fSmrg				if (block->flags & R600_PC_BLOCK_SHADER) {
47601e04c3fSmrg					strcpy(p, shader_suffix);
47701e04c3fSmrg					p += shaderlen;
47801e04c3fSmrg				}
47901e04c3fSmrg
48001e04c3fSmrg				if (block->flags & R600_PC_BLOCK_SE_GROUPS) {
48101e04c3fSmrg					p += sprintf(p, "%d", j);
48201e04c3fSmrg					if (block->flags & R600_PC_BLOCK_INSTANCE_GROUPS)
48301e04c3fSmrg						*p++ = '_';
48401e04c3fSmrg				}
48501e04c3fSmrg
48601e04c3fSmrg				if (block->flags & R600_PC_BLOCK_INSTANCE_GROUPS)
48701e04c3fSmrg					p += sprintf(p, "%d", k);
48801e04c3fSmrg
48901e04c3fSmrg				groupname += block->group_name_stride;
49001e04c3fSmrg			}
49101e04c3fSmrg		}
49201e04c3fSmrg	}
49301e04c3fSmrg
49401e04c3fSmrg	assert(block->num_selectors <= 1000);
49501e04c3fSmrg	block->selector_name_stride = block->group_name_stride + 4;
49601e04c3fSmrg	block->selector_names = MALLOC(block->num_groups * block->num_selectors *
49701e04c3fSmrg				       block->selector_name_stride);
49801e04c3fSmrg	if (!block->selector_names)
49901e04c3fSmrg		return false;
50001e04c3fSmrg
50101e04c3fSmrg	groupname = block->group_names;
50201e04c3fSmrg	p = block->selector_names;
50301e04c3fSmrg	for (i = 0; i < block->num_groups; ++i) {
50401e04c3fSmrg		for (j = 0; j < block->num_selectors; ++j) {
50501e04c3fSmrg			sprintf(p, "%s_%03d", groupname, j);
50601e04c3fSmrg			p += block->selector_name_stride;
50701e04c3fSmrg		}
50801e04c3fSmrg		groupname += block->group_name_stride;
50901e04c3fSmrg	}
51001e04c3fSmrg
51101e04c3fSmrg	return true;
51201e04c3fSmrg}
51301e04c3fSmrg
51401e04c3fSmrgint r600_get_perfcounter_info(struct r600_common_screen *screen,
51501e04c3fSmrg			      unsigned index,
51601e04c3fSmrg			      struct pipe_driver_query_info *info)
51701e04c3fSmrg{
51801e04c3fSmrg	struct r600_perfcounters *pc = screen->perfcounters;
51901e04c3fSmrg	struct r600_perfcounter_block *block;
52001e04c3fSmrg	unsigned base_gid, sub;
52101e04c3fSmrg
52201e04c3fSmrg	if (!pc)
52301e04c3fSmrg		return 0;
52401e04c3fSmrg
52501e04c3fSmrg	if (!info) {
52601e04c3fSmrg		unsigned bid, num_queries = 0;
52701e04c3fSmrg
52801e04c3fSmrg		for (bid = 0; bid < pc->num_blocks; ++bid) {
52901e04c3fSmrg			num_queries += pc->blocks[bid].num_selectors *
53001e04c3fSmrg				       pc->blocks[bid].num_groups;
53101e04c3fSmrg		}
53201e04c3fSmrg
53301e04c3fSmrg		return num_queries;
53401e04c3fSmrg	}
53501e04c3fSmrg
53601e04c3fSmrg	block = lookup_counter(pc, index, &base_gid, &sub);
53701e04c3fSmrg	if (!block)
53801e04c3fSmrg		return 0;
53901e04c3fSmrg
54001e04c3fSmrg	if (!block->selector_names) {
54101e04c3fSmrg		if (!r600_init_block_names(screen, block))
54201e04c3fSmrg			return 0;
54301e04c3fSmrg	}
54401e04c3fSmrg	info->name = block->selector_names + sub * block->selector_name_stride;
54501e04c3fSmrg	info->query_type = R600_QUERY_FIRST_PERFCOUNTER + index;
54601e04c3fSmrg	info->max_value.u64 = 0;
54701e04c3fSmrg	info->type = PIPE_DRIVER_QUERY_TYPE_UINT64;
54801e04c3fSmrg	info->result_type = PIPE_DRIVER_QUERY_RESULT_TYPE_AVERAGE;
54901e04c3fSmrg	info->group_id = base_gid + sub / block->num_selectors;
55001e04c3fSmrg	info->flags = PIPE_DRIVER_QUERY_FLAG_BATCH;
55101e04c3fSmrg	if (sub > 0 && sub + 1 < block->num_selectors * block->num_groups)
55201e04c3fSmrg		info->flags |= PIPE_DRIVER_QUERY_FLAG_DONT_LIST;
55301e04c3fSmrg	return 1;
55401e04c3fSmrg}
55501e04c3fSmrg
55601e04c3fSmrgint r600_get_perfcounter_group_info(struct r600_common_screen *screen,
55701e04c3fSmrg				    unsigned index,
55801e04c3fSmrg				    struct pipe_driver_query_group_info *info)
55901e04c3fSmrg{
56001e04c3fSmrg	struct r600_perfcounters *pc = screen->perfcounters;
56101e04c3fSmrg	struct r600_perfcounter_block *block;
56201e04c3fSmrg
56301e04c3fSmrg	if (!pc)
56401e04c3fSmrg		return 0;
56501e04c3fSmrg
56601e04c3fSmrg	if (!info)
56701e04c3fSmrg		return pc->num_groups;
56801e04c3fSmrg
56901e04c3fSmrg	block = lookup_group(pc, &index);
57001e04c3fSmrg	if (!block)
57101e04c3fSmrg		return 0;
57201e04c3fSmrg
57301e04c3fSmrg	if (!block->group_names) {
57401e04c3fSmrg		if (!r600_init_block_names(screen, block))
57501e04c3fSmrg			return 0;
57601e04c3fSmrg	}
57701e04c3fSmrg	info->name = block->group_names + index * block->group_name_stride;
57801e04c3fSmrg	info->num_queries = block->num_selectors;
57901e04c3fSmrg	info->max_active_queries = block->num_counters;
58001e04c3fSmrg	return 1;
58101e04c3fSmrg}
58201e04c3fSmrg
58301e04c3fSmrgvoid r600_perfcounters_destroy(struct r600_common_screen *rscreen)
58401e04c3fSmrg{
58501e04c3fSmrg	if (rscreen->perfcounters)
58601e04c3fSmrg		rscreen->perfcounters->cleanup(rscreen);
58701e04c3fSmrg}
58801e04c3fSmrg
58901e04c3fSmrgbool r600_perfcounters_init(struct r600_perfcounters *pc,
59001e04c3fSmrg			    unsigned num_blocks)
59101e04c3fSmrg{
59201e04c3fSmrg	pc->blocks = CALLOC(num_blocks, sizeof(struct r600_perfcounter_block));
59301e04c3fSmrg	if (!pc->blocks)
59401e04c3fSmrg		return false;
59501e04c3fSmrg
59601e04c3fSmrg	pc->separate_se = debug_get_bool_option("RADEON_PC_SEPARATE_SE", false);
59701e04c3fSmrg	pc->separate_instance = debug_get_bool_option("RADEON_PC_SEPARATE_INSTANCE", false);
59801e04c3fSmrg
59901e04c3fSmrg	return true;
60001e04c3fSmrg}
60101e04c3fSmrg
60201e04c3fSmrgvoid r600_perfcounters_add_block(struct r600_common_screen *rscreen,
60301e04c3fSmrg				 struct r600_perfcounters *pc,
60401e04c3fSmrg				 const char *name, unsigned flags,
60501e04c3fSmrg				 unsigned counters, unsigned selectors,
60601e04c3fSmrg				 unsigned instances, void *data)
60701e04c3fSmrg{
60801e04c3fSmrg	struct r600_perfcounter_block *block = &pc->blocks[pc->num_blocks];
60901e04c3fSmrg
61001e04c3fSmrg	assert(counters <= R600_QUERY_MAX_COUNTERS);
61101e04c3fSmrg
61201e04c3fSmrg	block->basename = name;
61301e04c3fSmrg	block->flags = flags;
61401e04c3fSmrg	block->num_counters = counters;
61501e04c3fSmrg	block->num_selectors = selectors;
61601e04c3fSmrg	block->num_instances = MAX2(instances, 1);
61701e04c3fSmrg	block->data = data;
61801e04c3fSmrg
61901e04c3fSmrg	if (pc->separate_se && (block->flags & R600_PC_BLOCK_SE))
62001e04c3fSmrg		block->flags |= R600_PC_BLOCK_SE_GROUPS;
62101e04c3fSmrg	if (pc->separate_instance && block->num_instances > 1)
62201e04c3fSmrg		block->flags |= R600_PC_BLOCK_INSTANCE_GROUPS;
62301e04c3fSmrg
62401e04c3fSmrg	if (block->flags & R600_PC_BLOCK_INSTANCE_GROUPS) {
62501e04c3fSmrg		block->num_groups = block->num_instances;
62601e04c3fSmrg	} else {
62701e04c3fSmrg		block->num_groups = 1;
62801e04c3fSmrg	}
62901e04c3fSmrg
63001e04c3fSmrg	if (block->flags & R600_PC_BLOCK_SE_GROUPS)
63101e04c3fSmrg		block->num_groups *= rscreen->info.max_se;
63201e04c3fSmrg	if (block->flags & R600_PC_BLOCK_SHADER)
63301e04c3fSmrg		block->num_groups *= pc->num_shader_types;
63401e04c3fSmrg
63501e04c3fSmrg	++pc->num_blocks;
63601e04c3fSmrg	pc->num_groups += block->num_groups;
63701e04c3fSmrg}
63801e04c3fSmrg
63901e04c3fSmrgvoid r600_perfcounters_do_destroy(struct r600_perfcounters *pc)
64001e04c3fSmrg{
64101e04c3fSmrg	unsigned i;
64201e04c3fSmrg
64301e04c3fSmrg	for (i = 0; i < pc->num_blocks; ++i) {
64401e04c3fSmrg		FREE(pc->blocks[i].group_names);
64501e04c3fSmrg		FREE(pc->blocks[i].selector_names);
64601e04c3fSmrg	}
64701e04c3fSmrg	FREE(pc->blocks);
64801e04c3fSmrg	FREE(pc);
64901e04c3fSmrg}
650