101e04c3fSmrg/*
201e04c3fSmrg * Copyright 2015 Advanced Micro Devices, Inc.
301e04c3fSmrg * All Rights Reserved.
401e04c3fSmrg *
501e04c3fSmrg * Permission is hereby granted, free of charge, to any person obtaining a
601e04c3fSmrg * copy of this software and associated documentation files (the "Software"),
701e04c3fSmrg * to deal in the Software without restriction, including without limitation
801e04c3fSmrg * the rights to use, copy, modify, merge, publish, distribute, sublicense,
901e04c3fSmrg * and/or sell copies of the Software, and to permit persons to whom the
1001e04c3fSmrg * Software is furnished to do so, subject to the following conditions:
1101e04c3fSmrg *
1201e04c3fSmrg * The above copyright notice and this permission notice (including the next
1301e04c3fSmrg * paragraph) shall be included in all copies or substantial portions of the
1401e04c3fSmrg * Software.
1501e04c3fSmrg *
1601e04c3fSmrg * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
1701e04c3fSmrg * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
1801e04c3fSmrg * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT.  IN NO EVENT SHALL
1901e04c3fSmrg * THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
2001e04c3fSmrg * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
2101e04c3fSmrg * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
2201e04c3fSmrg * SOFTWARE.
2301e04c3fSmrg */
2401e04c3fSmrg
2501e04c3fSmrg#include "si_build_pm4.h"
2601e04c3fSmrg#include "si_query.h"
2701e04c3fSmrg#include "util/u_memory.h"
2801e04c3fSmrg
297ec681f3Smrg#include "ac_perfcounter.h"
309f464c52Smaya
319f464c52Smayastruct si_query_group {
327ec681f3Smrg   struct si_query_group *next;
337ec681f3Smrg   struct ac_pc_block *block;
347ec681f3Smrg   unsigned sub_gid;     /* only used during init */
357ec681f3Smrg   unsigned result_base; /* only used during init */
367ec681f3Smrg   int se;
377ec681f3Smrg   int instance;
387ec681f3Smrg   unsigned num_counters;
397ec681f3Smrg   unsigned selectors[AC_QUERY_MAX_COUNTERS];
409f464c52Smaya};
419f464c52Smaya
429f464c52Smayastruct si_query_counter {
437ec681f3Smrg   unsigned base;
447ec681f3Smrg   unsigned qwords;
457ec681f3Smrg   unsigned stride; /* in uint64s */
469f464c52Smaya};
479f464c52Smaya
489f464c52Smayastruct si_query_pc {
497ec681f3Smrg   struct si_query b;
507ec681f3Smrg   struct si_query_buffer buffer;
5101e04c3fSmrg
527ec681f3Smrg   /* Size of the results in memory, in bytes. */
537ec681f3Smrg   unsigned result_size;
5401e04c3fSmrg
557ec681f3Smrg   unsigned shaders;
567ec681f3Smrg   unsigned num_counters;
577ec681f3Smrg   struct si_query_counter *counters;
587ec681f3Smrg   struct si_query_group *groups;
5901e04c3fSmrg};
6001e04c3fSmrg
617ec681f3Smrgstatic void si_pc_emit_instance(struct si_context *sctx, int se, int instance)
629f464c52Smaya{
637ec681f3Smrg   struct radeon_cmdbuf *cs = &sctx->gfx_cs;
647ec681f3Smrg   unsigned value = S_030800_SH_BROADCAST_WRITES(1);
657ec681f3Smrg
667ec681f3Smrg   if (se >= 0) {
677ec681f3Smrg      value |= S_030800_SE_INDEX(se);
687ec681f3Smrg   } else {
697ec681f3Smrg      value |= S_030800_SE_BROADCAST_WRITES(1);
707ec681f3Smrg   }
717ec681f3Smrg
727ec681f3Smrg   if (sctx->chip_class >= GFX10) {
737ec681f3Smrg      /* TODO: Expose counters from each shader array separately if needed. */
747ec681f3Smrg      value |= S_030800_SA_BROADCAST_WRITES(1);
757ec681f3Smrg   }
767ec681f3Smrg
777ec681f3Smrg   if (instance >= 0) {
787ec681f3Smrg      value |= S_030800_INSTANCE_INDEX(instance);
797ec681f3Smrg   } else {
807ec681f3Smrg      value |= S_030800_INSTANCE_BROADCAST_WRITES(1);
817ec681f3Smrg   }
827ec681f3Smrg
837ec681f3Smrg   radeon_begin(cs);
847ec681f3Smrg   radeon_set_uconfig_reg(R_030800_GRBM_GFX_INDEX, value);
857ec681f3Smrg   radeon_end();
869f464c52Smaya}
879f464c52Smaya
887ec681f3Smrgstatic void si_pc_emit_shaders(struct si_context *sctx, unsigned shaders)
899f464c52Smaya{
907ec681f3Smrg   struct radeon_cmdbuf *cs = &sctx->gfx_cs;
919f464c52Smaya
927ec681f3Smrg   radeon_begin(cs);
937ec681f3Smrg   radeon_set_uconfig_reg_seq(R_036780_SQ_PERFCOUNTER_CTRL, 2, false);
947ec681f3Smrg   radeon_emit(shaders & 0x7f);
957ec681f3Smrg   radeon_emit(0xffffffff);
967ec681f3Smrg   radeon_end();
979f464c52Smaya}
989f464c52Smaya
997ec681f3Smrgstatic void si_pc_emit_select(struct si_context *sctx, struct ac_pc_block *block, unsigned count,
1007ec681f3Smrg                              unsigned *selectors)
1019f464c52Smaya{
1027ec681f3Smrg   struct ac_pc_block_base *regs = block->b->b;
1037ec681f3Smrg   struct radeon_cmdbuf *cs = &sctx->gfx_cs;
1047ec681f3Smrg   unsigned idx;
1059f464c52Smaya
1067ec681f3Smrg   assert(count <= regs->num_counters);
1079f464c52Smaya
1087ec681f3Smrg   /* Fake counters. */
1097ec681f3Smrg   if (!regs->select0)
1107ec681f3Smrg      return;
1119f464c52Smaya
1127ec681f3Smrg   radeon_begin(cs);
11301e04c3fSmrg
1147ec681f3Smrg   for (idx = 0; idx < count; ++idx) {
1157ec681f3Smrg      radeon_set_uconfig_reg_seq(regs->select0[idx], 1, false);
1167ec681f3Smrg      radeon_emit(selectors[idx] | regs->select_or);
1177ec681f3Smrg   }
11801e04c3fSmrg
1197ec681f3Smrg   for (idx = 0; idx < regs->num_spm_counters; idx++) {
1207ec681f3Smrg      radeon_set_uconfig_reg_seq(regs->select1[idx], 1, false);
1217ec681f3Smrg      radeon_emit(0);
1227ec681f3Smrg   }
12301e04c3fSmrg
1247ec681f3Smrg   radeon_end();
12501e04c3fSmrg}
12601e04c3fSmrg
1277ec681f3Smrgstatic void si_pc_emit_start(struct si_context *sctx, struct si_resource *buffer, uint64_t va)
12801e04c3fSmrg{
1297ec681f3Smrg   struct radeon_cmdbuf *cs = &sctx->gfx_cs;
1307ec681f3Smrg
1317ec681f3Smrg   si_cp_copy_data(sctx, &sctx->gfx_cs, COPY_DATA_DST_MEM, buffer, va - buffer->gpu_address,
1327ec681f3Smrg                   COPY_DATA_IMM, NULL, 1);
1337ec681f3Smrg
1347ec681f3Smrg   radeon_begin(cs);
1357ec681f3Smrg   radeon_set_uconfig_reg(R_036020_CP_PERFMON_CNTL,
1367ec681f3Smrg                          S_036020_PERFMON_STATE(V_036020_CP_PERFMON_STATE_DISABLE_AND_RESET));
1377ec681f3Smrg   radeon_emit(PKT3(PKT3_EVENT_WRITE, 0, 0));
1387ec681f3Smrg   radeon_emit(EVENT_TYPE(V_028A90_PERFCOUNTER_START) | EVENT_INDEX(0));
1397ec681f3Smrg   radeon_set_uconfig_reg(R_036020_CP_PERFMON_CNTL,
1407ec681f3Smrg                          S_036020_PERFMON_STATE(V_036020_CP_PERFMON_STATE_START_COUNTING));
1417ec681f3Smrg   radeon_end();
14201e04c3fSmrg}
14301e04c3fSmrg
14401e04c3fSmrg/* Note: The buffer was already added in si_pc_emit_start, so we don't have to
14501e04c3fSmrg * do it again in here. */
1467ec681f3Smrgstatic void si_pc_emit_stop(struct si_context *sctx, struct si_resource *buffer, uint64_t va)
14701e04c3fSmrg{
1487ec681f3Smrg   struct radeon_cmdbuf *cs = &sctx->gfx_cs;
1497ec681f3Smrg
1507ec681f3Smrg   si_cp_release_mem(sctx, cs, V_028A90_BOTTOM_OF_PIPE_TS, 0, EOP_DST_SEL_MEM, EOP_INT_SEL_NONE,
1517ec681f3Smrg                     EOP_DATA_SEL_VALUE_32BIT, buffer, va, 0, SI_NOT_QUERY);
1527ec681f3Smrg   si_cp_wait_mem(sctx, cs, va, 0, 0xffffffff, WAIT_REG_MEM_EQUAL);
1537ec681f3Smrg
1547ec681f3Smrg   radeon_begin(cs);
1557ec681f3Smrg   radeon_emit(PKT3(PKT3_EVENT_WRITE, 0, 0));
1567ec681f3Smrg   radeon_emit(EVENT_TYPE(V_028A90_PERFCOUNTER_SAMPLE) | EVENT_INDEX(0));
1577ec681f3Smrg   radeon_emit(PKT3(PKT3_EVENT_WRITE, 0, 0));
1587ec681f3Smrg   radeon_emit(EVENT_TYPE(V_028A90_PERFCOUNTER_STOP) | EVENT_INDEX(0));
1597ec681f3Smrg   radeon_set_uconfig_reg(
1607ec681f3Smrg      R_036020_CP_PERFMON_CNTL,
1617ec681f3Smrg      S_036020_PERFMON_STATE(V_036020_CP_PERFMON_STATE_STOP_COUNTING) | S_036020_PERFMON_SAMPLE_ENABLE(1));
1627ec681f3Smrg   radeon_end();
16301e04c3fSmrg}
16401e04c3fSmrg
1657ec681f3Smrgstatic void si_pc_emit_read(struct si_context *sctx, struct ac_pc_block *block, unsigned count,
1667ec681f3Smrg                            uint64_t va)
16701e04c3fSmrg{
1687ec681f3Smrg   struct ac_pc_block_base *regs = block->b->b;
1697ec681f3Smrg   struct radeon_cmdbuf *cs = &sctx->gfx_cs;
1707ec681f3Smrg   unsigned idx;
1717ec681f3Smrg   unsigned reg = regs->counter0_lo;
1727ec681f3Smrg   unsigned reg_delta = 8;
1737ec681f3Smrg
1747ec681f3Smrg   radeon_begin(cs);
1757ec681f3Smrg
1767ec681f3Smrg   if (regs->select0) {
1777ec681f3Smrg      for (idx = 0; idx < count; ++idx) {
1787ec681f3Smrg         if (regs->counters)
1797ec681f3Smrg            reg = regs->counters[idx];
1807ec681f3Smrg
1817ec681f3Smrg         radeon_emit(PKT3(PKT3_COPY_DATA, 4, 0));
1827ec681f3Smrg         radeon_emit(COPY_DATA_SRC_SEL(COPY_DATA_PERF) | COPY_DATA_DST_SEL(COPY_DATA_DST_MEM) |
1837ec681f3Smrg                            COPY_DATA_COUNT_SEL); /* 64 bits */
1847ec681f3Smrg         radeon_emit(reg >> 2);
1857ec681f3Smrg         radeon_emit(0); /* unused */
1867ec681f3Smrg         radeon_emit(va);
1877ec681f3Smrg         radeon_emit(va >> 32);
1887ec681f3Smrg         va += sizeof(uint64_t);
1897ec681f3Smrg         reg += reg_delta;
1907ec681f3Smrg      }
1917ec681f3Smrg   } else {
1927ec681f3Smrg      /* Fake counters. */
1937ec681f3Smrg      for (idx = 0; idx < count; ++idx) {
1947ec681f3Smrg         radeon_emit(PKT3(PKT3_COPY_DATA, 4, 0));
1957ec681f3Smrg         radeon_emit(COPY_DATA_SRC_SEL(COPY_DATA_IMM) | COPY_DATA_DST_SEL(COPY_DATA_DST_MEM) |
1967ec681f3Smrg                     COPY_DATA_COUNT_SEL);
1977ec681f3Smrg         radeon_emit(0); /* immediate */
1987ec681f3Smrg         radeon_emit(0);
1997ec681f3Smrg         radeon_emit(va);
2007ec681f3Smrg         radeon_emit(va >> 32);
2017ec681f3Smrg         va += sizeof(uint64_t);
2027ec681f3Smrg      }
2037ec681f3Smrg   }
2047ec681f3Smrg   radeon_end();
20501e04c3fSmrg}
20601e04c3fSmrg
2077ec681f3Smrgstatic void si_pc_query_destroy(struct si_context *sctx, struct si_query *squery)
20801e04c3fSmrg{
2097ec681f3Smrg   struct si_query_pc *query = (struct si_query_pc *)squery;
2109f464c52Smaya
2117ec681f3Smrg   while (query->groups) {
2127ec681f3Smrg      struct si_query_group *group = query->groups;
2137ec681f3Smrg      query->groups = group->next;
2147ec681f3Smrg      FREE(group);
2157ec681f3Smrg   }
2169f464c52Smaya
2177ec681f3Smrg   FREE(query->counters);
2189f464c52Smaya
2197ec681f3Smrg   si_query_buffer_destroy(sctx->screen, &query->buffer);
2207ec681f3Smrg   FREE(query);
2217ec681f3Smrg}
2227ec681f3Smrg
2237ec681f3Smrgvoid si_inhibit_clockgating(struct si_context *sctx, struct radeon_cmdbuf *cs, bool inhibit)
2247ec681f3Smrg{
2257ec681f3Smrg   radeon_begin(&sctx->gfx_cs);
2267ec681f3Smrg
2277ec681f3Smrg   if (sctx->chip_class >= GFX10) {
2287ec681f3Smrg      radeon_set_uconfig_reg(R_037390_RLC_PERFMON_CLK_CNTL,
2297ec681f3Smrg                             S_037390_PERFMON_CLOCK_STATE(inhibit));
2307ec681f3Smrg   } else if (sctx->chip_class >= GFX8) {
2317ec681f3Smrg      radeon_set_uconfig_reg(R_0372FC_RLC_PERFMON_CLK_CNTL,
2327ec681f3Smrg                             S_0372FC_PERFMON_CLOCK_STATE(inhibit));
2337ec681f3Smrg   }
2347ec681f3Smrg   radeon_end();
2359f464c52Smaya}
2369f464c52Smaya
2379f464c52Smayastatic void si_pc_query_resume(struct si_context *sctx, struct si_query *squery)
2389f464c52Smaya/*
2397ec681f3Smrg                                   struct si_query_hw *hwquery,
2407ec681f3Smrg                                   struct si_resource *buffer, uint64_t va)*/
2419f464c52Smaya{
2427ec681f3Smrg   struct si_query_pc *query = (struct si_query_pc *)squery;
2437ec681f3Smrg   int current_se = -1;
2447ec681f3Smrg   int current_instance = -1;
2457ec681f3Smrg
2467ec681f3Smrg   if (!si_query_buffer_alloc(sctx, &query->buffer, NULL, query->result_size))
2477ec681f3Smrg      return;
2487ec681f3Smrg   si_need_gfx_cs_space(sctx, 0);
2499f464c52Smaya
2507ec681f3Smrg   if (query->shaders)
2517ec681f3Smrg      si_pc_emit_shaders(sctx, query->shaders);
2529f464c52Smaya
2537ec681f3Smrg   si_inhibit_clockgating(sctx, &sctx->gfx_cs, true);
2549f464c52Smaya
2557ec681f3Smrg   for (struct si_query_group *group = query->groups; group; group = group->next) {
2567ec681f3Smrg      struct ac_pc_block *block = group->block;
2579f464c52Smaya
2587ec681f3Smrg      if (group->se != current_se || group->instance != current_instance) {
2597ec681f3Smrg         current_se = group->se;
2607ec681f3Smrg         current_instance = group->instance;
2617ec681f3Smrg         si_pc_emit_instance(sctx, group->se, group->instance);
2627ec681f3Smrg      }
2639f464c52Smaya
2647ec681f3Smrg      si_pc_emit_select(sctx, block, group->num_counters, group->selectors);
2657ec681f3Smrg   }
2669f464c52Smaya
2677ec681f3Smrg   if (current_se != -1 || current_instance != -1)
2687ec681f3Smrg      si_pc_emit_instance(sctx, -1, -1);
2699f464c52Smaya
2707ec681f3Smrg   uint64_t va = query->buffer.buf->gpu_address + query->buffer.results_end;
2717ec681f3Smrg   si_pc_emit_start(sctx, query->buffer.buf, va);
2729f464c52Smaya}
2739f464c52Smaya
2749f464c52Smayastatic void si_pc_query_suspend(struct si_context *sctx, struct si_query *squery)
2759f464c52Smaya{
2767ec681f3Smrg   struct si_query_pc *query = (struct si_query_pc *)squery;
2779f464c52Smaya
2787ec681f3Smrg   if (!query->buffer.buf)
2797ec681f3Smrg      return;
2809f464c52Smaya
2817ec681f3Smrg   uint64_t va = query->buffer.buf->gpu_address + query->buffer.results_end;
2827ec681f3Smrg   query->buffer.results_end += query->result_size;
2839f464c52Smaya
2847ec681f3Smrg   si_pc_emit_stop(sctx, query->buffer.buf, va);
2859f464c52Smaya
2867ec681f3Smrg   for (struct si_query_group *group = query->groups; group; group = group->next) {
2877ec681f3Smrg      struct ac_pc_block *block = group->block;
2887ec681f3Smrg      unsigned se = group->se >= 0 ? group->se : 0;
2897ec681f3Smrg      unsigned se_end = se + 1;
2909f464c52Smaya
2917ec681f3Smrg      if ((block->b->b->flags & AC_PC_BLOCK_SE) && (group->se < 0))
2927ec681f3Smrg         se_end = sctx->screen->info.max_se;
2939f464c52Smaya
2947ec681f3Smrg      do {
2957ec681f3Smrg         unsigned instance = group->instance >= 0 ? group->instance : 0;
2969f464c52Smaya
2977ec681f3Smrg         do {
2987ec681f3Smrg            si_pc_emit_instance(sctx, se, instance);
2997ec681f3Smrg            si_pc_emit_read(sctx, block, group->num_counters, va);
3007ec681f3Smrg            va += sizeof(uint64_t) * group->num_counters;
3017ec681f3Smrg         } while (group->instance < 0 && ++instance < block->num_instances);
3027ec681f3Smrg      } while (++se < se_end);
3037ec681f3Smrg   }
3049f464c52Smaya
3057ec681f3Smrg   si_pc_emit_instance(sctx, -1, -1);
3067ec681f3Smrg
3077ec681f3Smrg   si_inhibit_clockgating(sctx, &sctx->gfx_cs, false);
3089f464c52Smaya}
3099f464c52Smaya
3109f464c52Smayastatic bool si_pc_query_begin(struct si_context *ctx, struct si_query *squery)
3119f464c52Smaya{
3127ec681f3Smrg   struct si_query_pc *query = (struct si_query_pc *)squery;
3139f464c52Smaya
3147ec681f3Smrg   si_query_buffer_reset(ctx, &query->buffer);
3159f464c52Smaya
3167ec681f3Smrg   list_addtail(&query->b.active_list, &ctx->active_queries);
3177ec681f3Smrg   ctx->num_cs_dw_queries_suspend += query->b.num_cs_dw_suspend;
3189f464c52Smaya
3197ec681f3Smrg   si_pc_query_resume(ctx, squery);
3209f464c52Smaya
3217ec681f3Smrg   return true;
3229f464c52Smaya}
3239f464c52Smaya
3249f464c52Smayastatic bool si_pc_query_end(struct si_context *ctx, struct si_query *squery)
3259f464c52Smaya{
3267ec681f3Smrg   struct si_query_pc *query = (struct si_query_pc *)squery;
3279f464c52Smaya
3287ec681f3Smrg   si_pc_query_suspend(ctx, squery);
3299f464c52Smaya
3307ec681f3Smrg   list_del(&squery->active_list);
3317ec681f3Smrg   ctx->num_cs_dw_queries_suspend -= squery->num_cs_dw_suspend;
3329f464c52Smaya
3337ec681f3Smrg   return query->buffer.buf != NULL;
3349f464c52Smaya}
3359f464c52Smaya
3367ec681f3Smrgstatic void si_pc_query_add_result(struct si_query_pc *query, void *buffer,
3377ec681f3Smrg                                   union pipe_query_result *result)
3389f464c52Smaya{
3397ec681f3Smrg   uint64_t *results = buffer;
3407ec681f3Smrg   unsigned i, j;
3419f464c52Smaya
3427ec681f3Smrg   for (i = 0; i < query->num_counters; ++i) {
3437ec681f3Smrg      struct si_query_counter *counter = &query->counters[i];
3449f464c52Smaya
3457ec681f3Smrg      for (j = 0; j < counter->qwords; ++j) {
3467ec681f3Smrg         uint32_t value = results[counter->base + j * counter->stride];
3477ec681f3Smrg         result->batch[i].u64 += value;
3487ec681f3Smrg      }
3497ec681f3Smrg   }
3509f464c52Smaya}
3519f464c52Smaya
3527ec681f3Smrgstatic bool si_pc_query_get_result(struct si_context *sctx, struct si_query *squery, bool wait,
3537ec681f3Smrg                                   union pipe_query_result *result)
3549f464c52Smaya{
3557ec681f3Smrg   struct si_query_pc *query = (struct si_query_pc *)squery;
3569f464c52Smaya
3577ec681f3Smrg   memset(result, 0, sizeof(result->batch[0]) * query->num_counters);
3589f464c52Smaya
3597ec681f3Smrg   for (struct si_query_buffer *qbuf = &query->buffer; qbuf; qbuf = qbuf->previous) {
3607ec681f3Smrg      unsigned usage = PIPE_MAP_READ | (wait ? 0 : PIPE_MAP_DONTBLOCK);
3617ec681f3Smrg      unsigned results_base = 0;
3627ec681f3Smrg      void *map;
3639f464c52Smaya
3647ec681f3Smrg      if (squery->b.flushed)
3657ec681f3Smrg         map = sctx->ws->buffer_map(sctx->ws, qbuf->buf->buf, NULL, usage);
3667ec681f3Smrg      else
3677ec681f3Smrg         map = si_buffer_map(sctx, qbuf->buf, usage);
3689f464c52Smaya
3697ec681f3Smrg      if (!map)
3707ec681f3Smrg         return false;
3719f464c52Smaya
3727ec681f3Smrg      while (results_base != qbuf->results_end) {
3737ec681f3Smrg         si_pc_query_add_result(query, map + results_base, result);
3747ec681f3Smrg         results_base += query->result_size;
3757ec681f3Smrg      }
3767ec681f3Smrg   }
3779f464c52Smaya
3787ec681f3Smrg   return true;
3799f464c52Smaya}
3809f464c52Smaya
3819f464c52Smayastatic const struct si_query_ops batch_query_ops = {
3827ec681f3Smrg   .destroy = si_pc_query_destroy,
3837ec681f3Smrg   .begin = si_pc_query_begin,
3847ec681f3Smrg   .end = si_pc_query_end,
3857ec681f3Smrg   .get_result = si_pc_query_get_result,
3869f464c52Smaya
3877ec681f3Smrg   .suspend = si_pc_query_suspend,
3887ec681f3Smrg   .resume = si_pc_query_resume,
3899f464c52Smaya};
3909f464c52Smaya
3917ec681f3Smrgstatic struct si_query_group *get_group_state(struct si_screen *screen, struct si_query_pc *query,
3927ec681f3Smrg                                              struct ac_pc_block *block, unsigned sub_gid)
3939f464c52Smaya{
3947ec681f3Smrg   struct si_perfcounters *pc = screen->perfcounters;
3957ec681f3Smrg   struct si_query_group *group = query->groups;
3967ec681f3Smrg
3977ec681f3Smrg   while (group) {
3987ec681f3Smrg      if (group->block == block && group->sub_gid == sub_gid)
3997ec681f3Smrg         return group;
4007ec681f3Smrg      group = group->next;
4017ec681f3Smrg   }
4027ec681f3Smrg
4037ec681f3Smrg   group = CALLOC_STRUCT(si_query_group);
4047ec681f3Smrg   if (!group)
4057ec681f3Smrg      return NULL;
4067ec681f3Smrg
4077ec681f3Smrg   group->block = block;
4087ec681f3Smrg   group->sub_gid = sub_gid;
4097ec681f3Smrg
4107ec681f3Smrg   if (block->b->b->flags & AC_PC_BLOCK_SHADER) {
4117ec681f3Smrg      unsigned sub_gids = block->num_instances;
4127ec681f3Smrg      unsigned shader_id;
4137ec681f3Smrg      unsigned shaders;
4147ec681f3Smrg      unsigned query_shaders;
4157ec681f3Smrg
4167ec681f3Smrg      if (ac_pc_block_has_per_se_groups(&pc->base, block))
4177ec681f3Smrg         sub_gids = sub_gids * screen->info.max_se;
4187ec681f3Smrg      shader_id = sub_gid / sub_gids;
4197ec681f3Smrg      sub_gid = sub_gid % sub_gids;
4207ec681f3Smrg
4217ec681f3Smrg      shaders = ac_pc_shader_type_bits[shader_id];
4227ec681f3Smrg
4237ec681f3Smrg      query_shaders = query->shaders & ~AC_PC_SHADERS_WINDOWING;
4247ec681f3Smrg      if (query_shaders && query_shaders != shaders) {
4257ec681f3Smrg         fprintf(stderr, "si_perfcounter: incompatible shader groups\n");
4267ec681f3Smrg         FREE(group);
4277ec681f3Smrg         return NULL;
4287ec681f3Smrg      }
4297ec681f3Smrg      query->shaders = shaders;
4307ec681f3Smrg   }
4317ec681f3Smrg
4327ec681f3Smrg   if (block->b->b->flags & AC_PC_BLOCK_SHADER_WINDOWED && !query->shaders) {
4337ec681f3Smrg      // A non-zero value in query->shaders ensures that the shader
4347ec681f3Smrg      // masking is reset unless the user explicitly requests one.
4357ec681f3Smrg      query->shaders = AC_PC_SHADERS_WINDOWING;
4367ec681f3Smrg   }
4377ec681f3Smrg
4387ec681f3Smrg   if (ac_pc_block_has_per_se_groups(&pc->base, block)) {
4397ec681f3Smrg      group->se = sub_gid / block->num_instances;
4407ec681f3Smrg      sub_gid = sub_gid % block->num_instances;
4417ec681f3Smrg   } else {
4427ec681f3Smrg      group->se = -1;
4437ec681f3Smrg   }
4447ec681f3Smrg
4457ec681f3Smrg   if (ac_pc_block_has_per_instance_groups(&pc->base, block)) {
4467ec681f3Smrg      group->instance = sub_gid;
4477ec681f3Smrg   } else {
4487ec681f3Smrg      group->instance = -1;
4497ec681f3Smrg   }
4507ec681f3Smrg
4517ec681f3Smrg   group->next = query->groups;
4527ec681f3Smrg   query->groups = group;
4537ec681f3Smrg
4547ec681f3Smrg   return group;
4559f464c52Smaya}
4569f464c52Smaya
4577ec681f3Smrgstruct pipe_query *si_create_batch_query(struct pipe_context *ctx, unsigned num_queries,
4587ec681f3Smrg                                         unsigned *query_types)
4599f464c52Smaya{
4607ec681f3Smrg   struct si_screen *screen = (struct si_screen *)ctx->screen;
4617ec681f3Smrg   struct si_perfcounters *pc = screen->perfcounters;
4627ec681f3Smrg   struct ac_pc_block *block;
4637ec681f3Smrg   struct si_query_group *group;
4647ec681f3Smrg   struct si_query_pc *query;
4657ec681f3Smrg   unsigned base_gid, sub_gid, sub_index;
4667ec681f3Smrg   unsigned i, j;
4677ec681f3Smrg
4687ec681f3Smrg   if (!pc)
4697ec681f3Smrg      return NULL;
4707ec681f3Smrg
4717ec681f3Smrg   query = CALLOC_STRUCT(si_query_pc);
4727ec681f3Smrg   if (!query)
4737ec681f3Smrg      return NULL;
4747ec681f3Smrg
4757ec681f3Smrg   query->b.ops = &batch_query_ops;
4767ec681f3Smrg
4777ec681f3Smrg   query->num_counters = num_queries;
4787ec681f3Smrg
4797ec681f3Smrg   /* Collect selectors per group */
4807ec681f3Smrg   for (i = 0; i < num_queries; ++i) {
4817ec681f3Smrg      unsigned sub_gid;
4827ec681f3Smrg
4837ec681f3Smrg      if (query_types[i] < SI_QUERY_FIRST_PERFCOUNTER)
4847ec681f3Smrg         goto error;
4857ec681f3Smrg
4867ec681f3Smrg      block =
4877ec681f3Smrg         ac_lookup_counter(&pc->base, query_types[i] - SI_QUERY_FIRST_PERFCOUNTER, &base_gid, &sub_index);
4887ec681f3Smrg      if (!block)
4897ec681f3Smrg         goto error;
4907ec681f3Smrg
4917ec681f3Smrg      sub_gid = sub_index / block->b->selectors;
4927ec681f3Smrg      sub_index = sub_index % block->b->selectors;
4937ec681f3Smrg
4947ec681f3Smrg      group = get_group_state(screen, query, block, sub_gid);
4957ec681f3Smrg      if (!group)
4967ec681f3Smrg         goto error;
4977ec681f3Smrg
4987ec681f3Smrg      if (group->num_counters >= block->b->b->num_counters) {
4997ec681f3Smrg         fprintf(stderr, "perfcounter group %s: too many selected\n", block->b->b->name);
5007ec681f3Smrg         goto error;
5017ec681f3Smrg      }
5027ec681f3Smrg      group->selectors[group->num_counters] = sub_index;
5037ec681f3Smrg      ++group->num_counters;
5047ec681f3Smrg   }
5057ec681f3Smrg
5067ec681f3Smrg   /* Compute result bases and CS size per group */
5077ec681f3Smrg   query->b.num_cs_dw_suspend = pc->num_stop_cs_dwords;
5087ec681f3Smrg   query->b.num_cs_dw_suspend += pc->num_instance_cs_dwords;
5097ec681f3Smrg
5107ec681f3Smrg   i = 0;
5117ec681f3Smrg   for (group = query->groups; group; group = group->next) {
5127ec681f3Smrg      struct ac_pc_block *block = group->block;
5137ec681f3Smrg      unsigned read_dw;
5147ec681f3Smrg      unsigned instances = 1;
5157ec681f3Smrg
5167ec681f3Smrg      if ((block->b->b->flags & AC_PC_BLOCK_SE) && group->se < 0)
5177ec681f3Smrg         instances = screen->info.max_se;
5187ec681f3Smrg      if (group->instance < 0)
5197ec681f3Smrg         instances *= block->num_instances;
5207ec681f3Smrg
5217ec681f3Smrg      group->result_base = i;
5227ec681f3Smrg      query->result_size += sizeof(uint64_t) * instances * group->num_counters;
5237ec681f3Smrg      i += instances * group->num_counters;
5247ec681f3Smrg
5257ec681f3Smrg      read_dw = 6 * group->num_counters;
5267ec681f3Smrg      query->b.num_cs_dw_suspend += instances * read_dw;
5277ec681f3Smrg      query->b.num_cs_dw_suspend += instances * pc->num_instance_cs_dwords;
5287ec681f3Smrg   }
5297ec681f3Smrg
5307ec681f3Smrg   if (query->shaders) {
5317ec681f3Smrg      if (query->shaders == AC_PC_SHADERS_WINDOWING)
5327ec681f3Smrg         query->shaders = 0xffffffff;
5337ec681f3Smrg   }
5347ec681f3Smrg
5357ec681f3Smrg   /* Map user-supplied query array to result indices */
5367ec681f3Smrg   query->counters = CALLOC(num_queries, sizeof(*query->counters));
5377ec681f3Smrg   for (i = 0; i < num_queries; ++i) {
5387ec681f3Smrg      struct si_query_counter *counter = &query->counters[i];
5397ec681f3Smrg      struct ac_pc_block *block;
5407ec681f3Smrg
5417ec681f3Smrg      block =
5427ec681f3Smrg         ac_lookup_counter(&pc->base, query_types[i] - SI_QUERY_FIRST_PERFCOUNTER, &base_gid, &sub_index);
5437ec681f3Smrg
5447ec681f3Smrg      sub_gid = sub_index / block->b->selectors;
5457ec681f3Smrg      sub_index = sub_index % block->b->selectors;
5467ec681f3Smrg
5477ec681f3Smrg      group = get_group_state(screen, query, block, sub_gid);
5487ec681f3Smrg      assert(group != NULL);
5497ec681f3Smrg
5507ec681f3Smrg      for (j = 0; j < group->num_counters; ++j) {
5517ec681f3Smrg         if (group->selectors[j] == sub_index)
5527ec681f3Smrg            break;
5537ec681f3Smrg      }
5547ec681f3Smrg
5557ec681f3Smrg      counter->base = group->result_base + j;
5567ec681f3Smrg      counter->stride = group->num_counters;
5577ec681f3Smrg
5587ec681f3Smrg      counter->qwords = 1;
5597ec681f3Smrg      if ((block->b->b->flags & AC_PC_BLOCK_SE) && group->se < 0)
5607ec681f3Smrg         counter->qwords = screen->info.max_se;
5617ec681f3Smrg      if (group->instance < 0)
5627ec681f3Smrg         counter->qwords *= block->num_instances;
5637ec681f3Smrg   }
5649f464c52Smaya
5657ec681f3Smrg   return (struct pipe_query *)query;
5669f464c52Smaya
5679f464c52Smayaerror:
5687ec681f3Smrg   si_pc_query_destroy((struct si_context *)ctx, &query->b);
5697ec681f3Smrg   return NULL;
5709f464c52Smaya}
5719f464c52Smaya
5727ec681f3Smrgint si_get_perfcounter_info(struct si_screen *screen, unsigned index,
5737ec681f3Smrg                            struct pipe_driver_query_info *info)
5749f464c52Smaya{
5757ec681f3Smrg   struct si_perfcounters *pc = screen->perfcounters;
5767ec681f3Smrg   struct ac_pc_block *block;
5777ec681f3Smrg   unsigned base_gid, sub;
5787ec681f3Smrg
5797ec681f3Smrg   if (!pc)
5807ec681f3Smrg      return 0;
5817ec681f3Smrg
5827ec681f3Smrg   if (!info) {
5837ec681f3Smrg      unsigned bid, num_queries = 0;
5847ec681f3Smrg
5857ec681f3Smrg      for (bid = 0; bid < pc->base.num_blocks; ++bid) {
5867ec681f3Smrg         num_queries += pc->base.blocks[bid].b->selectors * pc->base.blocks[bid].num_groups;
5877ec681f3Smrg      }
5887ec681f3Smrg
5897ec681f3Smrg      return num_queries;
5907ec681f3Smrg   }
5917ec681f3Smrg
5927ec681f3Smrg   block = ac_lookup_counter(&pc->base, index, &base_gid, &sub);
5937ec681f3Smrg   if (!block)
5947ec681f3Smrg      return 0;
5957ec681f3Smrg
5967ec681f3Smrg   if (!block->selector_names) {
5977ec681f3Smrg      if (!ac_init_block_names(&screen->info, &pc->base, block))
5987ec681f3Smrg         return 0;
5997ec681f3Smrg   }
6007ec681f3Smrg   info->name = block->selector_names + sub * block->selector_name_stride;
6017ec681f3Smrg   info->query_type = SI_QUERY_FIRST_PERFCOUNTER + index;
6027ec681f3Smrg   info->max_value.u64 = 0;
6037ec681f3Smrg   info->type = PIPE_DRIVER_QUERY_TYPE_UINT64;
6047ec681f3Smrg   info->result_type = PIPE_DRIVER_QUERY_RESULT_TYPE_AVERAGE;
6057ec681f3Smrg   info->group_id = base_gid + sub / block->b->selectors;
6067ec681f3Smrg   info->flags = PIPE_DRIVER_QUERY_FLAG_BATCH;
6077ec681f3Smrg   if (sub > 0 && sub + 1 < block->b->selectors * block->num_groups)
6087ec681f3Smrg      info->flags |= PIPE_DRIVER_QUERY_FLAG_DONT_LIST;
6097ec681f3Smrg   return 1;
6109f464c52Smaya}
6119f464c52Smaya
6127ec681f3Smrgint si_get_perfcounter_group_info(struct si_screen *screen, unsigned index,
6137ec681f3Smrg                                  struct pipe_driver_query_group_info *info)
6149f464c52Smaya{
6157ec681f3Smrg   struct si_perfcounters *pc = screen->perfcounters;
6167ec681f3Smrg   struct ac_pc_block *block;
6177ec681f3Smrg
6187ec681f3Smrg   if (!pc)
6197ec681f3Smrg      return 0;
6207ec681f3Smrg
6217ec681f3Smrg   if (!info)
6227ec681f3Smrg      return pc->base.num_groups;
6237ec681f3Smrg
6247ec681f3Smrg   block = ac_lookup_group(&pc->base, &index);
6257ec681f3Smrg   if (!block)
6267ec681f3Smrg      return 0;
6277ec681f3Smrg
6287ec681f3Smrg   if (!block->group_names) {
6297ec681f3Smrg      if (!ac_init_block_names(&screen->info, &pc->base, block))
6307ec681f3Smrg         return 0;
6317ec681f3Smrg   }
6327ec681f3Smrg   info->name = block->group_names + index * block->group_name_stride;
6337ec681f3Smrg   info->num_queries = block->b->selectors;
6347ec681f3Smrg   info->max_active_queries = block->b->b->num_counters;
6357ec681f3Smrg   return 1;
6369f464c52Smaya}
6379f464c52Smaya
6389f464c52Smayavoid si_destroy_perfcounters(struct si_screen *screen)
6399f464c52Smaya{
6407ec681f3Smrg   struct si_perfcounters *pc = screen->perfcounters;
6417ec681f3Smrg
6427ec681f3Smrg   if (!pc)
6437ec681f3Smrg      return;
6447ec681f3Smrg
6457ec681f3Smrg   ac_destroy_perfcounters(&pc->base);
6467ec681f3Smrg   FREE(pc);
6477ec681f3Smrg   screen->perfcounters = NULL;
64801e04c3fSmrg}
64901e04c3fSmrg
65001e04c3fSmrgvoid si_init_perfcounters(struct si_screen *screen)
65101e04c3fSmrg{
6527ec681f3Smrg   bool separate_se, separate_instance;
65301e04c3fSmrg
6547ec681f3Smrg   separate_se = debug_get_bool_option("RADEON_PC_SEPARATE_SE", false);
6557ec681f3Smrg   separate_instance = debug_get_bool_option("RADEON_PC_SEPARATE_INSTANCE", false);
6567ec681f3Smrg
6577ec681f3Smrg   screen->perfcounters = CALLOC_STRUCT(si_perfcounters);
6587ec681f3Smrg   if (!screen->perfcounters)
6597ec681f3Smrg      return;
6607ec681f3Smrg
6617ec681f3Smrg   screen->perfcounters->num_stop_cs_dwords = 14 + si_cp_write_fence_dwords(screen);
6627ec681f3Smrg   screen->perfcounters->num_instance_cs_dwords = 3;
6637ec681f3Smrg
6647ec681f3Smrg   if (!ac_init_perfcounters(&screen->info, separate_se, separate_instance,
6657ec681f3Smrg                             &screen->perfcounters->base)) {
6667ec681f3Smrg      si_destroy_perfcounters(screen);
6677ec681f3Smrg   }
66801e04c3fSmrg}
669