17ec681f3Smrg/*
27ec681f3Smrg * Copyright 2018 Advanced Micro Devices, Inc.
37ec681f3Smrg * All Rights Reserved.
47ec681f3Smrg *
57ec681f3Smrg * Permission is hereby granted, free of charge, to any person obtaining a
67ec681f3Smrg * copy of this software and associated documentation files (the "Software"),
77ec681f3Smrg * to deal in the Software without restriction, including without limitation
87ec681f3Smrg * on the rights to use, copy, modify, merge, publish, distribute, sub
97ec681f3Smrg * license, and/or sell copies of the Software, and to permit persons to whom
107ec681f3Smrg * the Software is furnished to do so, subject to the following conditions:
117ec681f3Smrg *
127ec681f3Smrg * The above copyright notice and this permission notice (including the next
137ec681f3Smrg * paragraph) shall be included in all copies or substantial portions of the
147ec681f3Smrg * Software.
157ec681f3Smrg *
167ec681f3Smrg * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
177ec681f3Smrg * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
187ec681f3Smrg * FITNESS FOR A PARTICULAR PURPOSE AND NON-INFRINGEMENT. IN NO EVENT SHALL
197ec681f3Smrg * THE AUTHOR(S) AND/OR THEIR SUPPLIERS BE LIABLE FOR ANY CLAIM,
207ec681f3Smrg * DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR
217ec681f3Smrg * OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE
227ec681f3Smrg * USE OR OTHER DEALINGS IN THE SOFTWARE.
237ec681f3Smrg */
247ec681f3Smrg
257ec681f3Smrg#include "si_pipe.h"
267ec681f3Smrg#include "si_query.h"
277ec681f3Smrg#include "sid.h"
287ec681f3Smrg#include "util/u_memory.h"
297ec681f3Smrg#include "util/u_suballoc.h"
307ec681f3Smrg
317ec681f3Smrg#include <stddef.h>
327ec681f3Smrg
337ec681f3Smrgstatic void emit_shader_query(struct si_context *sctx)
347ec681f3Smrg{
357ec681f3Smrg   assert(!list_is_empty(&sctx->shader_query_buffers));
367ec681f3Smrg
377ec681f3Smrg   struct gfx10_sh_query_buffer *qbuf =
387ec681f3Smrg      list_last_entry(&sctx->shader_query_buffers, struct gfx10_sh_query_buffer, list);
397ec681f3Smrg   qbuf->head += sizeof(struct gfx10_sh_query_buffer_mem);
407ec681f3Smrg}
417ec681f3Smrg
427ec681f3Smrgstatic void gfx10_release_query_buffers(struct si_context *sctx,
437ec681f3Smrg                                        struct gfx10_sh_query_buffer *first,
447ec681f3Smrg                                        struct gfx10_sh_query_buffer *last)
457ec681f3Smrg{
467ec681f3Smrg   while (first) {
477ec681f3Smrg      struct gfx10_sh_query_buffer *qbuf = first;
487ec681f3Smrg      if (first != last)
497ec681f3Smrg         first = LIST_ENTRY(struct gfx10_sh_query_buffer, qbuf->list.next, list);
507ec681f3Smrg      else
517ec681f3Smrg         first = NULL;
527ec681f3Smrg
537ec681f3Smrg      qbuf->refcount--;
547ec681f3Smrg      if (qbuf->refcount)
557ec681f3Smrg         continue;
567ec681f3Smrg
577ec681f3Smrg      if (qbuf->list.next == &sctx->shader_query_buffers)
587ec681f3Smrg         continue; /* keep the most recent buffer; it may not be full yet */
597ec681f3Smrg      if (qbuf->list.prev == &sctx->shader_query_buffers)
607ec681f3Smrg         continue; /* keep the oldest buffer for recycling */
617ec681f3Smrg
627ec681f3Smrg      list_del(&qbuf->list);
637ec681f3Smrg      si_resource_reference(&qbuf->buf, NULL);
647ec681f3Smrg      FREE(qbuf);
657ec681f3Smrg   }
667ec681f3Smrg}
677ec681f3Smrg
687ec681f3Smrgstatic bool gfx10_alloc_query_buffer(struct si_context *sctx)
697ec681f3Smrg{
707ec681f3Smrg   if (si_is_atom_dirty(sctx, &sctx->atoms.s.shader_query))
717ec681f3Smrg      return true;
727ec681f3Smrg
737ec681f3Smrg   struct gfx10_sh_query_buffer *qbuf = NULL;
747ec681f3Smrg
757ec681f3Smrg   if (!list_is_empty(&sctx->shader_query_buffers)) {
767ec681f3Smrg      qbuf = list_last_entry(&sctx->shader_query_buffers, struct gfx10_sh_query_buffer, list);
777ec681f3Smrg      if (qbuf->head + sizeof(struct gfx10_sh_query_buffer_mem) <= qbuf->buf->b.b.width0)
787ec681f3Smrg         goto success;
797ec681f3Smrg
807ec681f3Smrg      qbuf = list_first_entry(&sctx->shader_query_buffers, struct gfx10_sh_query_buffer, list);
817ec681f3Smrg      if (!qbuf->refcount &&
827ec681f3Smrg          !si_cs_is_buffer_referenced(sctx, qbuf->buf->buf, RADEON_USAGE_READWRITE) &&
837ec681f3Smrg          sctx->ws->buffer_wait(sctx->ws, qbuf->buf->buf, 0, RADEON_USAGE_READWRITE)) {
847ec681f3Smrg         /* Can immediately re-use the oldest buffer */
857ec681f3Smrg         list_del(&qbuf->list);
867ec681f3Smrg      } else {
877ec681f3Smrg         qbuf = NULL;
887ec681f3Smrg      }
897ec681f3Smrg   }
907ec681f3Smrg
917ec681f3Smrg   if (!qbuf) {
927ec681f3Smrg      qbuf = CALLOC_STRUCT(gfx10_sh_query_buffer);
937ec681f3Smrg      if (unlikely(!qbuf))
947ec681f3Smrg         return false;
957ec681f3Smrg
967ec681f3Smrg      struct si_screen *screen = sctx->screen;
977ec681f3Smrg      unsigned buf_size =
987ec681f3Smrg         MAX2(sizeof(struct gfx10_sh_query_buffer_mem), screen->info.min_alloc_size);
997ec681f3Smrg      qbuf->buf = si_resource(pipe_buffer_create(&screen->b, 0, PIPE_USAGE_STAGING, buf_size));
1007ec681f3Smrg      if (unlikely(!qbuf->buf)) {
1017ec681f3Smrg         FREE(qbuf);
1027ec681f3Smrg         return false;
1037ec681f3Smrg      }
1047ec681f3Smrg   }
1057ec681f3Smrg
1067ec681f3Smrg   /* The buffer is currently unused by the GPU. Initialize it.
1077ec681f3Smrg    *
1087ec681f3Smrg    * We need to set the high bit of all the primitive counters for
1097ec681f3Smrg    * compatibility with the SET_PREDICATION packet.
1107ec681f3Smrg    */
1117ec681f3Smrg   uint64_t *results = sctx->ws->buffer_map(sctx->ws, qbuf->buf->buf, NULL,
1127ec681f3Smrg                                            PIPE_MAP_WRITE | PIPE_MAP_UNSYNCHRONIZED);
1137ec681f3Smrg   assert(results);
1147ec681f3Smrg
1157ec681f3Smrg   for (unsigned i = 0, e = qbuf->buf->b.b.width0 / sizeof(struct gfx10_sh_query_buffer_mem); i < e;
1167ec681f3Smrg        ++i) {
1177ec681f3Smrg      for (unsigned j = 0; j < 16; ++j)
1187ec681f3Smrg         results[32 * i + j] = (uint64_t)1 << 63;
1197ec681f3Smrg      results[32 * i + 16] = 0;
1207ec681f3Smrg   }
1217ec681f3Smrg
1227ec681f3Smrg   list_addtail(&qbuf->list, &sctx->shader_query_buffers);
1237ec681f3Smrg   qbuf->head = 0;
1247ec681f3Smrg   qbuf->refcount = sctx->num_active_shader_queries;
1257ec681f3Smrg
1267ec681f3Smrgsuccess:;
1277ec681f3Smrg   struct pipe_shader_buffer sbuf;
1287ec681f3Smrg   sbuf.buffer = &qbuf->buf->b.b;
1297ec681f3Smrg   sbuf.buffer_offset = qbuf->head;
1307ec681f3Smrg   sbuf.buffer_size = sizeof(struct gfx10_sh_query_buffer_mem);
1317ec681f3Smrg   si_set_internal_shader_buffer(sctx, GFX10_GS_QUERY_BUF, &sbuf);
1327ec681f3Smrg   sctx->current_vs_state |= S_VS_STATE_STREAMOUT_QUERY_ENABLED(1);
1337ec681f3Smrg
1347ec681f3Smrg   si_mark_atom_dirty(sctx, &sctx->atoms.s.shader_query);
1357ec681f3Smrg   return true;
1367ec681f3Smrg}
1377ec681f3Smrg
1387ec681f3Smrgstatic void gfx10_sh_query_destroy(struct si_context *sctx, struct si_query *rquery)
1397ec681f3Smrg{
1407ec681f3Smrg   struct gfx10_sh_query *query = (struct gfx10_sh_query *)rquery;
1417ec681f3Smrg   gfx10_release_query_buffers(sctx, query->first, query->last);
1427ec681f3Smrg   FREE(query);
1437ec681f3Smrg}
1447ec681f3Smrg
1457ec681f3Smrgstatic bool gfx10_sh_query_begin(struct si_context *sctx, struct si_query *rquery)
1467ec681f3Smrg{
1477ec681f3Smrg   struct gfx10_sh_query *query = (struct gfx10_sh_query *)rquery;
1487ec681f3Smrg
1497ec681f3Smrg   gfx10_release_query_buffers(sctx, query->first, query->last);
1507ec681f3Smrg   query->first = query->last = NULL;
1517ec681f3Smrg
1527ec681f3Smrg   if (unlikely(!gfx10_alloc_query_buffer(sctx)))
1537ec681f3Smrg      return false;
1547ec681f3Smrg
1557ec681f3Smrg   query->first = list_last_entry(&sctx->shader_query_buffers, struct gfx10_sh_query_buffer, list);
1567ec681f3Smrg   query->first_begin = query->first->head;
1577ec681f3Smrg
1587ec681f3Smrg   sctx->num_active_shader_queries++;
1597ec681f3Smrg   query->first->refcount++;
1607ec681f3Smrg
1617ec681f3Smrg   return true;
1627ec681f3Smrg}
1637ec681f3Smrg
1647ec681f3Smrgstatic bool gfx10_sh_query_end(struct si_context *sctx, struct si_query *rquery)
1657ec681f3Smrg{
1667ec681f3Smrg   struct gfx10_sh_query *query = (struct gfx10_sh_query *)rquery;
1677ec681f3Smrg
1687ec681f3Smrg   if (unlikely(!query->first))
1697ec681f3Smrg      return false; /* earlier out of memory error */
1707ec681f3Smrg
1717ec681f3Smrg   query->last = list_last_entry(&sctx->shader_query_buffers, struct gfx10_sh_query_buffer, list);
1727ec681f3Smrg   query->last_end = query->last->head;
1737ec681f3Smrg
1747ec681f3Smrg   /* Signal the fence of the previous chunk */
1757ec681f3Smrg   if (query->last_end != 0) {
1767ec681f3Smrg      uint64_t fence_va = query->last->buf->gpu_address;
1777ec681f3Smrg      fence_va += query->last_end - sizeof(struct gfx10_sh_query_buffer_mem);
1787ec681f3Smrg      fence_va += offsetof(struct gfx10_sh_query_buffer_mem, fence);
1797ec681f3Smrg      si_cp_release_mem(sctx, &sctx->gfx_cs, V_028A90_BOTTOM_OF_PIPE_TS, 0, EOP_DST_SEL_MEM,
1807ec681f3Smrg                        EOP_INT_SEL_NONE, EOP_DATA_SEL_VALUE_32BIT, query->last->buf, fence_va,
1817ec681f3Smrg                        0xffffffff, PIPE_QUERY_GPU_FINISHED);
1827ec681f3Smrg   }
1837ec681f3Smrg
1847ec681f3Smrg   sctx->num_active_shader_queries--;
1857ec681f3Smrg
1867ec681f3Smrg   if (sctx->num_active_shader_queries <= 0 || !si_is_atom_dirty(sctx, &sctx->atoms.s.shader_query)) {
1877ec681f3Smrg      si_set_internal_shader_buffer(sctx, GFX10_GS_QUERY_BUF, NULL);
1887ec681f3Smrg      sctx->current_vs_state &= C_VS_STATE_STREAMOUT_QUERY_ENABLED;
1897ec681f3Smrg
1907ec681f3Smrg      /* If a query_begin is followed by a query_end without a draw
1917ec681f3Smrg       * in-between, we need to clear the atom to ensure that the
1927ec681f3Smrg       * next query_begin will re-initialize the shader buffer. */
1937ec681f3Smrg      si_set_atom_dirty(sctx, &sctx->atoms.s.shader_query, false);
1947ec681f3Smrg   }
1957ec681f3Smrg
1967ec681f3Smrg   return true;
1977ec681f3Smrg}
1987ec681f3Smrg
1997ec681f3Smrgstatic void gfx10_sh_query_add_result(struct gfx10_sh_query *query,
2007ec681f3Smrg                                      struct gfx10_sh_query_buffer_mem *qmem,
2017ec681f3Smrg                                      union pipe_query_result *result)
2027ec681f3Smrg{
2037ec681f3Smrg   static const uint64_t mask = ((uint64_t)1 << 63) - 1;
2047ec681f3Smrg
2057ec681f3Smrg   switch (query->b.type) {
2067ec681f3Smrg   case PIPE_QUERY_PRIMITIVES_EMITTED:
2077ec681f3Smrg      result->u64 += qmem->stream[query->stream].emitted_primitives & mask;
2087ec681f3Smrg      break;
2097ec681f3Smrg   case PIPE_QUERY_PRIMITIVES_GENERATED:
2107ec681f3Smrg      result->u64 += qmem->stream[query->stream].generated_primitives & mask;
2117ec681f3Smrg      break;
2127ec681f3Smrg   case PIPE_QUERY_SO_STATISTICS:
2137ec681f3Smrg      result->so_statistics.num_primitives_written +=
2147ec681f3Smrg         qmem->stream[query->stream].emitted_primitives & mask;
2157ec681f3Smrg      result->so_statistics.primitives_storage_needed +=
2167ec681f3Smrg         qmem->stream[query->stream].generated_primitives & mask;
2177ec681f3Smrg      break;
2187ec681f3Smrg   case PIPE_QUERY_SO_OVERFLOW_PREDICATE:
2197ec681f3Smrg      result->b |= qmem->stream[query->stream].emitted_primitives !=
2207ec681f3Smrg                   qmem->stream[query->stream].generated_primitives;
2217ec681f3Smrg      break;
2227ec681f3Smrg   case PIPE_QUERY_SO_OVERFLOW_ANY_PREDICATE:
2237ec681f3Smrg      for (unsigned stream = 0; stream < SI_MAX_STREAMS; ++stream) {
2247ec681f3Smrg         result->b |= qmem->stream[stream].emitted_primitives !=
2257ec681f3Smrg                      qmem->stream[stream].generated_primitives;
2267ec681f3Smrg      }
2277ec681f3Smrg      break;
2287ec681f3Smrg   default:
2297ec681f3Smrg      assert(0);
2307ec681f3Smrg   }
2317ec681f3Smrg}
2327ec681f3Smrg
2337ec681f3Smrgstatic bool gfx10_sh_query_get_result(struct si_context *sctx, struct si_query *rquery, bool wait,
2347ec681f3Smrg                                      union pipe_query_result *result)
2357ec681f3Smrg{
2367ec681f3Smrg   struct gfx10_sh_query *query = (struct gfx10_sh_query *)rquery;
2377ec681f3Smrg
2387ec681f3Smrg   util_query_clear_result(result, query->b.type);
2397ec681f3Smrg
2407ec681f3Smrg   if (unlikely(!query->first))
2417ec681f3Smrg      return false; /* earlier out of memory error */
2427ec681f3Smrg   assert(query->last);
2437ec681f3Smrg
2447ec681f3Smrg   for (struct gfx10_sh_query_buffer *qbuf = query->last;;
2457ec681f3Smrg        qbuf = LIST_ENTRY(struct gfx10_sh_query_buffer, qbuf->list.prev, list)) {
2467ec681f3Smrg      unsigned usage = PIPE_MAP_READ | (wait ? 0 : PIPE_MAP_DONTBLOCK);
2477ec681f3Smrg      void *map;
2487ec681f3Smrg
2497ec681f3Smrg      if (rquery->b.flushed)
2507ec681f3Smrg         map = sctx->ws->buffer_map(sctx->ws, qbuf->buf->buf, NULL, usage);
2517ec681f3Smrg      else
2527ec681f3Smrg         map = si_buffer_map(sctx, qbuf->buf, usage);
2537ec681f3Smrg
2547ec681f3Smrg      if (!map)
2557ec681f3Smrg         return false;
2567ec681f3Smrg
2577ec681f3Smrg      unsigned results_begin = 0;
2587ec681f3Smrg      unsigned results_end = qbuf->head;
2597ec681f3Smrg      if (qbuf == query->first)
2607ec681f3Smrg         results_begin = query->first_begin;
2617ec681f3Smrg      if (qbuf == query->last)
2627ec681f3Smrg         results_end = query->last_end;
2637ec681f3Smrg
2647ec681f3Smrg      while (results_begin != results_end) {
2657ec681f3Smrg         struct gfx10_sh_query_buffer_mem *qmem = map + results_begin;
2667ec681f3Smrg         results_begin += sizeof(*qmem);
2677ec681f3Smrg
2687ec681f3Smrg         gfx10_sh_query_add_result(query, qmem, result);
2697ec681f3Smrg      }
2707ec681f3Smrg
2717ec681f3Smrg      if (qbuf == query->first)
2727ec681f3Smrg         break;
2737ec681f3Smrg   }
2747ec681f3Smrg
2757ec681f3Smrg   return true;
2767ec681f3Smrg}
2777ec681f3Smrg
2787ec681f3Smrgstatic void gfx10_sh_query_get_result_resource(struct si_context *sctx, struct si_query *rquery,
2797ec681f3Smrg                                               bool wait, enum pipe_query_value_type result_type,
2807ec681f3Smrg                                               int index, struct pipe_resource *resource,
2817ec681f3Smrg                                               unsigned offset)
2827ec681f3Smrg{
2837ec681f3Smrg   struct gfx10_sh_query *query = (struct gfx10_sh_query *)rquery;
2847ec681f3Smrg   struct si_qbo_state saved_state = {};
2857ec681f3Smrg   struct pipe_resource *tmp_buffer = NULL;
2867ec681f3Smrg   unsigned tmp_buffer_offset = 0;
2877ec681f3Smrg
2887ec681f3Smrg   if (!sctx->sh_query_result_shader) {
2897ec681f3Smrg      sctx->sh_query_result_shader = gfx10_create_sh_query_result_cs(sctx);
2907ec681f3Smrg      if (!sctx->sh_query_result_shader)
2917ec681f3Smrg         return;
2927ec681f3Smrg   }
2937ec681f3Smrg
2947ec681f3Smrg   if (query->first != query->last) {
2957ec681f3Smrg      u_suballocator_alloc(&sctx->allocator_zeroed_memory, 16, 16, &tmp_buffer_offset, &tmp_buffer);
2967ec681f3Smrg      if (!tmp_buffer)
2977ec681f3Smrg         return;
2987ec681f3Smrg   }
2997ec681f3Smrg
3007ec681f3Smrg   si_save_qbo_state(sctx, &saved_state);
3017ec681f3Smrg
3027ec681f3Smrg   /* Pre-fill the constants configuring the shader behavior. */
3037ec681f3Smrg   struct {
3047ec681f3Smrg      uint32_t config;
3057ec681f3Smrg      uint32_t offset;
3067ec681f3Smrg      uint32_t chain;
3077ec681f3Smrg      uint32_t result_count;
3087ec681f3Smrg   } consts;
3097ec681f3Smrg   struct pipe_constant_buffer constant_buffer = {};
3107ec681f3Smrg
3117ec681f3Smrg   if (index >= 0) {
3127ec681f3Smrg      switch (query->b.type) {
3137ec681f3Smrg      case PIPE_QUERY_PRIMITIVES_GENERATED:
3147ec681f3Smrg         consts.offset = 4 * sizeof(uint64_t) * query->stream + 2 * sizeof(uint64_t);
3157ec681f3Smrg         consts.config = 0;
3167ec681f3Smrg         break;
3177ec681f3Smrg      case PIPE_QUERY_PRIMITIVES_EMITTED:
3187ec681f3Smrg         consts.offset = 4 * sizeof(uint64_t) * query->stream + 3 * sizeof(uint64_t);
3197ec681f3Smrg         consts.config = 0;
3207ec681f3Smrg         break;
3217ec681f3Smrg      case PIPE_QUERY_SO_STATISTICS:
3227ec681f3Smrg         consts.offset = sizeof(uint32_t) * (4 * index + query->stream);
3237ec681f3Smrg         consts.config = 0;
3247ec681f3Smrg         break;
3257ec681f3Smrg      case PIPE_QUERY_SO_OVERFLOW_PREDICATE:
3267ec681f3Smrg         consts.offset = 4 * sizeof(uint64_t) * query->stream;
3277ec681f3Smrg         consts.config = 2;
3287ec681f3Smrg         break;
3297ec681f3Smrg      case PIPE_QUERY_SO_OVERFLOW_ANY_PREDICATE:
3307ec681f3Smrg         consts.offset = 0;
3317ec681f3Smrg         consts.config = 3;
3327ec681f3Smrg         break;
3337ec681f3Smrg      default:
3347ec681f3Smrg         unreachable("bad query type");
3357ec681f3Smrg      }
3367ec681f3Smrg   } else {
3377ec681f3Smrg      /* Check result availability. */
3387ec681f3Smrg      consts.offset = 0;
3397ec681f3Smrg      consts.config = 1;
3407ec681f3Smrg   }
3417ec681f3Smrg
3427ec681f3Smrg   if (result_type == PIPE_QUERY_TYPE_I64 || result_type == PIPE_QUERY_TYPE_U64)
3437ec681f3Smrg      consts.config |= 8;
3447ec681f3Smrg
3457ec681f3Smrg   constant_buffer.buffer_size = sizeof(consts);
3467ec681f3Smrg   constant_buffer.user_buffer = &consts;
3477ec681f3Smrg
3487ec681f3Smrg   /* Pre-fill the SSBOs and grid. */
3497ec681f3Smrg   struct pipe_shader_buffer ssbo[3];
3507ec681f3Smrg   struct pipe_grid_info grid = {};
3517ec681f3Smrg
3527ec681f3Smrg   ssbo[1].buffer = tmp_buffer;
3537ec681f3Smrg   ssbo[1].buffer_offset = tmp_buffer_offset;
3547ec681f3Smrg   ssbo[1].buffer_size = 16;
3557ec681f3Smrg
3567ec681f3Smrg   ssbo[2] = ssbo[1];
3577ec681f3Smrg
3587ec681f3Smrg   grid.block[0] = 1;
3597ec681f3Smrg   grid.block[1] = 1;
3607ec681f3Smrg   grid.block[2] = 1;
3617ec681f3Smrg   grid.grid[0] = 1;
3627ec681f3Smrg   grid.grid[1] = 1;
3637ec681f3Smrg   grid.grid[2] = 1;
3647ec681f3Smrg
3657ec681f3Smrg   struct gfx10_sh_query_buffer *qbuf = query->first;
3667ec681f3Smrg   for (;;) {
3677ec681f3Smrg      unsigned begin = qbuf == query->first ? query->first_begin : 0;
3687ec681f3Smrg      unsigned end = qbuf == query->last ? query->last_end : qbuf->buf->b.b.width0;
3697ec681f3Smrg      if (!end)
3707ec681f3Smrg         continue;
3717ec681f3Smrg
3727ec681f3Smrg      ssbo[0].buffer = &qbuf->buf->b.b;
3737ec681f3Smrg      ssbo[0].buffer_offset = begin;
3747ec681f3Smrg      ssbo[0].buffer_size = end - begin;
3757ec681f3Smrg
3767ec681f3Smrg      consts.result_count = (end - begin) / sizeof(struct gfx10_sh_query_buffer_mem);
3777ec681f3Smrg      consts.chain = 0;
3787ec681f3Smrg      if (qbuf != query->first)
3797ec681f3Smrg         consts.chain |= 1;
3807ec681f3Smrg      if (qbuf != query->last)
3817ec681f3Smrg         consts.chain |= 2;
3827ec681f3Smrg
3837ec681f3Smrg      if (qbuf == query->last) {
3847ec681f3Smrg         ssbo[2].buffer = resource;
3857ec681f3Smrg         ssbo[2].buffer_offset = offset;
3867ec681f3Smrg         ssbo[2].buffer_size = 8;
3877ec681f3Smrg      }
3887ec681f3Smrg
3897ec681f3Smrg      sctx->b.set_constant_buffer(&sctx->b, PIPE_SHADER_COMPUTE, 0, false, &constant_buffer);
3907ec681f3Smrg
3917ec681f3Smrg      if (wait) {
3927ec681f3Smrg         uint64_t va;
3937ec681f3Smrg
3947ec681f3Smrg         /* Wait for result availability. Wait only for readiness
3957ec681f3Smrg          * of the last entry, since the fence writes should be
3967ec681f3Smrg          * serialized in the CP.
3977ec681f3Smrg          */
3987ec681f3Smrg         va = qbuf->buf->gpu_address;
3997ec681f3Smrg         va += end - sizeof(struct gfx10_sh_query_buffer_mem);
4007ec681f3Smrg         va += offsetof(struct gfx10_sh_query_buffer_mem, fence);
4017ec681f3Smrg
4027ec681f3Smrg         si_cp_wait_mem(sctx, &sctx->gfx_cs, va, 0x00000001, 0x00000001, 0);
4037ec681f3Smrg      }
4047ec681f3Smrg
4057ec681f3Smrg      si_launch_grid_internal_ssbos(sctx, &grid, sctx->sh_query_result_shader,
4067ec681f3Smrg                                    SI_OP_SYNC_PS_BEFORE | SI_OP_SYNC_AFTER, SI_COHERENCY_SHADER,
4077ec681f3Smrg                                    3, ssbo, 0x6);
4087ec681f3Smrg
4097ec681f3Smrg      if (qbuf == query->last)
4107ec681f3Smrg         break;
4117ec681f3Smrg      qbuf = LIST_ENTRY(struct gfx10_sh_query_buffer, qbuf->list.next, list);
4127ec681f3Smrg   }
4137ec681f3Smrg
4147ec681f3Smrg   si_restore_qbo_state(sctx, &saved_state);
4157ec681f3Smrg   pipe_resource_reference(&tmp_buffer, NULL);
4167ec681f3Smrg}
4177ec681f3Smrg
4187ec681f3Smrgstatic const struct si_query_ops gfx10_sh_query_ops = {
4197ec681f3Smrg   .destroy = gfx10_sh_query_destroy,
4207ec681f3Smrg   .begin = gfx10_sh_query_begin,
4217ec681f3Smrg   .end = gfx10_sh_query_end,
4227ec681f3Smrg   .get_result = gfx10_sh_query_get_result,
4237ec681f3Smrg   .get_result_resource = gfx10_sh_query_get_result_resource,
4247ec681f3Smrg};
4257ec681f3Smrg
4267ec681f3Smrgstruct pipe_query *gfx10_sh_query_create(struct si_screen *screen, enum pipe_query_type query_type,
4277ec681f3Smrg                                         unsigned index)
4287ec681f3Smrg{
4297ec681f3Smrg   struct gfx10_sh_query *query = CALLOC_STRUCT(gfx10_sh_query);
4307ec681f3Smrg   if (unlikely(!query))
4317ec681f3Smrg      return NULL;
4327ec681f3Smrg
4337ec681f3Smrg   query->b.ops = &gfx10_sh_query_ops;
4347ec681f3Smrg   query->b.type = query_type;
4357ec681f3Smrg   query->stream = index;
4367ec681f3Smrg
4377ec681f3Smrg   return (struct pipe_query *)query;
4387ec681f3Smrg}
4397ec681f3Smrg
4407ec681f3Smrgvoid gfx10_init_query(struct si_context *sctx)
4417ec681f3Smrg{
4427ec681f3Smrg   list_inithead(&sctx->shader_query_buffers);
4437ec681f3Smrg   sctx->atoms.s.shader_query.emit = emit_shader_query;
4447ec681f3Smrg}
4457ec681f3Smrg
4467ec681f3Smrgvoid gfx10_destroy_query(struct si_context *sctx)
4477ec681f3Smrg{
4487ec681f3Smrg   while (!list_is_empty(&sctx->shader_query_buffers)) {
4497ec681f3Smrg      struct gfx10_sh_query_buffer *qbuf =
4507ec681f3Smrg         list_first_entry(&sctx->shader_query_buffers, struct gfx10_sh_query_buffer, list);
4517ec681f3Smrg      list_del(&qbuf->list);
4527ec681f3Smrg
4537ec681f3Smrg      assert(!qbuf->refcount);
4547ec681f3Smrg      si_resource_reference(&qbuf->buf, NULL);
4557ec681f3Smrg      FREE(qbuf);
4567ec681f3Smrg   }
4577ec681f3Smrg}
458