1#include "zink_query.h"
2
3#include "zink_context.h"
4#include "zink_fence.h"
5#include "zink_resource.h"
6#include "zink_screen.h"
7
8#include "util/hash_table.h"
9#include "util/set.h"
10#include "util/u_dump.h"
11#include "util/u_inlines.h"
12#include "util/u_memory.h"
13
14#if defined(PIPE_ARCH_X86_64) || defined(PIPE_ARCH_PPC_64) || defined(PIPE_ARCH_AARCH64) || defined(PIPE_ARCH_MIPS64)
15#define NUM_QUERIES 5000
16#else
17#define NUM_QUERIES 500
18#endif
19
20struct zink_query_buffer {
21   struct list_head list;
22   unsigned num_results;
23   struct pipe_resource *buffer;
24   struct pipe_resource *xfb_buffers[PIPE_MAX_VERTEX_STREAMS - 1];
25};
26
27struct zink_query {
28   struct threaded_query base;
29   enum pipe_query_type type;
30
31   VkQueryPool query_pool;
32   VkQueryPool xfb_query_pool[PIPE_MAX_VERTEX_STREAMS - 1]; //stream 0 is in the base pool
33   unsigned curr_query, last_start;
34
35   VkQueryType vkqtype;
36   unsigned index;
37   bool precise;
38   bool xfb_running;
39   bool xfb_overflow;
40
41   bool active; /* query is considered active by vk */
42   bool needs_reset; /* query is considered active by vk and cannot be destroyed */
43   bool dead; /* query should be destroyed when its fence finishes */
44   bool needs_update; /* query needs to update its qbos */
45
46   struct list_head active_list;
47
48   struct list_head stats_list; /* when active, statistics queries are added to ctx->primitives_generated_queries */
49   bool have_gs[NUM_QUERIES]; /* geometry shaders use GEOMETRY_SHADER_PRIMITIVES_BIT */
50   bool have_xfb[NUM_QUERIES]; /* xfb was active during this query */
51
52   struct zink_batch_usage *batch_id; //batch that the query was started in
53
54   struct list_head buffers;
55   union {
56      struct zink_query_buffer *curr_qbo;
57      struct pipe_fence_handle *fence; //PIPE_QUERY_GPU_FINISHED
58   };
59
60   struct zink_resource *predicate;
61   bool predicate_dirty;
62};
63
64static void
65update_qbo(struct zink_context *ctx, struct zink_query *q);
66static void
67reset_pool(struct zink_context *ctx, struct zink_batch *batch, struct zink_query *q);
68
69static inline unsigned
70get_num_results(enum pipe_query_type query_type)
71{
72   switch (query_type) {
73   case PIPE_QUERY_OCCLUSION_COUNTER:
74   case PIPE_QUERY_OCCLUSION_PREDICATE:
75   case PIPE_QUERY_OCCLUSION_PREDICATE_CONSERVATIVE:
76   case PIPE_QUERY_TIME_ELAPSED:
77   case PIPE_QUERY_TIMESTAMP:
78   case PIPE_QUERY_PIPELINE_STATISTICS_SINGLE:
79      return 1;
80   case PIPE_QUERY_PRIMITIVES_GENERATED:
81   case PIPE_QUERY_SO_OVERFLOW_ANY_PREDICATE:
82   case PIPE_QUERY_SO_OVERFLOW_PREDICATE:
83   case PIPE_QUERY_PRIMITIVES_EMITTED:
84      return 2;
85   default:
86      debug_printf("unknown query: %s\n",
87                   util_str_query_type(query_type, true));
88      unreachable("zink: unknown query type");
89   }
90}
91
92static VkQueryPipelineStatisticFlags
93pipeline_statistic_convert(enum pipe_statistics_query_index idx)
94{
95   unsigned map[] = {
96      [PIPE_STAT_QUERY_IA_VERTICES] = VK_QUERY_PIPELINE_STATISTIC_INPUT_ASSEMBLY_VERTICES_BIT,
97      [PIPE_STAT_QUERY_IA_PRIMITIVES] = VK_QUERY_PIPELINE_STATISTIC_INPUT_ASSEMBLY_PRIMITIVES_BIT,
98      [PIPE_STAT_QUERY_VS_INVOCATIONS] = VK_QUERY_PIPELINE_STATISTIC_VERTEX_SHADER_INVOCATIONS_BIT,
99      [PIPE_STAT_QUERY_GS_INVOCATIONS] = VK_QUERY_PIPELINE_STATISTIC_GEOMETRY_SHADER_INVOCATIONS_BIT,
100      [PIPE_STAT_QUERY_GS_PRIMITIVES] = VK_QUERY_PIPELINE_STATISTIC_GEOMETRY_SHADER_PRIMITIVES_BIT,
101      [PIPE_STAT_QUERY_C_INVOCATIONS] = VK_QUERY_PIPELINE_STATISTIC_CLIPPING_INVOCATIONS_BIT,
102      [PIPE_STAT_QUERY_C_PRIMITIVES] = VK_QUERY_PIPELINE_STATISTIC_CLIPPING_PRIMITIVES_BIT,
103      [PIPE_STAT_QUERY_PS_INVOCATIONS] = VK_QUERY_PIPELINE_STATISTIC_FRAGMENT_SHADER_INVOCATIONS_BIT,
104      [PIPE_STAT_QUERY_HS_INVOCATIONS] = VK_QUERY_PIPELINE_STATISTIC_TESSELLATION_CONTROL_SHADER_PATCHES_BIT,
105      [PIPE_STAT_QUERY_DS_INVOCATIONS] = VK_QUERY_PIPELINE_STATISTIC_TESSELLATION_EVALUATION_SHADER_INVOCATIONS_BIT,
106      [PIPE_STAT_QUERY_CS_INVOCATIONS] = VK_QUERY_PIPELINE_STATISTIC_COMPUTE_SHADER_INVOCATIONS_BIT
107   };
108   assert(idx < ARRAY_SIZE(map));
109   return map[idx];
110}
111
112static void
113timestamp_to_nanoseconds(struct zink_screen *screen, uint64_t *timestamp)
114{
115   /* The number of valid bits in a timestamp value is determined by
116    * the VkQueueFamilyProperties::timestampValidBits property of the queue on which the timestamp is written.
117    * - 17.5. Timestamp Queries
118    */
119   if (screen->timestamp_valid_bits < 64)
120      *timestamp &= (1ull << screen->timestamp_valid_bits) - 1;
121
122   /* The number of nanoseconds it takes for a timestamp value to be incremented by 1
123    * can be obtained from VkPhysicalDeviceLimits::timestampPeriod
124    * - 17.5. Timestamp Queries
125    */
126   *timestamp *= screen->info.props.limits.timestampPeriod;
127}
128
129static VkQueryType
130convert_query_type(unsigned query_type, bool *precise)
131{
132   *precise = false;
133   switch (query_type) {
134   case PIPE_QUERY_OCCLUSION_COUNTER:
135      *precise = true;
136      FALLTHROUGH;
137   case PIPE_QUERY_OCCLUSION_PREDICATE:
138   case PIPE_QUERY_OCCLUSION_PREDICATE_CONSERVATIVE:
139      return VK_QUERY_TYPE_OCCLUSION;
140   case PIPE_QUERY_TIME_ELAPSED:
141   case PIPE_QUERY_TIMESTAMP:
142      return VK_QUERY_TYPE_TIMESTAMP;
143   case PIPE_QUERY_PIPELINE_STATISTICS_SINGLE:
144   case PIPE_QUERY_PRIMITIVES_GENERATED:
145      return VK_QUERY_TYPE_PIPELINE_STATISTICS;
146   case PIPE_QUERY_SO_OVERFLOW_ANY_PREDICATE:
147   case PIPE_QUERY_SO_OVERFLOW_PREDICATE:
148   case PIPE_QUERY_PRIMITIVES_EMITTED:
149      return VK_QUERY_TYPE_TRANSFORM_FEEDBACK_STREAM_EXT;
150   default:
151      debug_printf("unknown query: %s\n",
152                   util_str_query_type(query_type, true));
153      unreachable("zink: unknown query type");
154   }
155}
156
157static bool
158needs_stats_list(struct zink_query *query)
159{
160   return query->type == PIPE_QUERY_PRIMITIVES_GENERATED ||
161          query->type == PIPE_QUERY_SO_OVERFLOW_ANY_PREDICATE ||
162          query->type == PIPE_QUERY_SO_OVERFLOW_PREDICATE;
163}
164
165static bool
166is_time_query(struct zink_query *query)
167{
168   return query->type == PIPE_QUERY_TIMESTAMP || query->type == PIPE_QUERY_TIME_ELAPSED;
169}
170
171static bool
172is_so_overflow_query(struct zink_query *query)
173{
174   return query->type == PIPE_QUERY_SO_OVERFLOW_ANY_PREDICATE || query->type == PIPE_QUERY_SO_OVERFLOW_PREDICATE;
175}
176
177static bool
178is_bool_query(struct zink_query *query)
179{
180   return is_so_overflow_query(query) ||
181          query->type == PIPE_QUERY_OCCLUSION_PREDICATE ||
182          query->type == PIPE_QUERY_OCCLUSION_PREDICATE_CONSERVATIVE ||
183          query->type == PIPE_QUERY_GPU_FINISHED;
184}
185
186static bool
187qbo_append(struct pipe_screen *screen, struct zink_query *query)
188{
189   if (query->curr_qbo && query->curr_qbo->list.next)
190      return true;
191   struct zink_query_buffer *qbo = CALLOC_STRUCT(zink_query_buffer);
192   if (!qbo)
193      return false;
194   qbo->buffer = pipe_buffer_create(screen, PIPE_BIND_QUERY_BUFFER,
195                                  PIPE_USAGE_STAGING,
196                                  /* this is the maximum possible size of the results in a given buffer */
197                                  NUM_QUERIES * get_num_results(query->type) * sizeof(uint64_t));
198   if (!qbo->buffer)
199      goto fail;
200   if (query->type == PIPE_QUERY_PRIMITIVES_GENERATED) {
201      /* need separate xfb buffer */
202      qbo->xfb_buffers[0] = pipe_buffer_create(screen, PIPE_BIND_QUERY_BUFFER,
203                                     PIPE_USAGE_STAGING,
204                                     /* this is the maximum possible size of the results in a given buffer */
205                                     NUM_QUERIES * get_num_results(query->type) * sizeof(uint64_t));
206      if (!qbo->xfb_buffers[0])
207         goto fail;
208   } else if (query->type == PIPE_QUERY_SO_OVERFLOW_ANY_PREDICATE) {
209      /* need to monitor all xfb streams */
210      for (unsigned i = 0; i < ARRAY_SIZE(qbo->xfb_buffers); i++) {
211         /* need separate xfb buffer */
212         qbo->xfb_buffers[i] = pipe_buffer_create(screen, PIPE_BIND_QUERY_BUFFER,
213                                        PIPE_USAGE_STAGING,
214                                        /* this is the maximum possible size of the results in a given buffer */
215                                        NUM_QUERIES * get_num_results(query->type) * sizeof(uint64_t));
216         if (!qbo->xfb_buffers[i])
217            goto fail;
218      }
219   }
220   list_addtail(&qbo->list, &query->buffers);
221
222   return true;
223fail:
224   pipe_resource_reference(&qbo->buffer, NULL);
225   for (unsigned i = 0; i < ARRAY_SIZE(qbo->xfb_buffers); i++)
226      pipe_resource_reference(&qbo->xfb_buffers[i], NULL);
227   FREE(qbo);
228   return false;
229}
230
231static void
232destroy_query(struct zink_screen *screen, struct zink_query *query)
233{
234   assert(zink_screen_usage_check_completion(screen, query->batch_id));
235   if (query->query_pool)
236      VKSCR(DestroyQueryPool)(screen->dev, query->query_pool, NULL);
237   struct zink_query_buffer *qbo, *next;
238   LIST_FOR_EACH_ENTRY_SAFE(qbo, next, &query->buffers, list) {
239      pipe_resource_reference(&qbo->buffer, NULL);
240      for (unsigned i = 0; i < ARRAY_SIZE(qbo->xfb_buffers); i++)
241         pipe_resource_reference(&qbo->xfb_buffers[i], NULL);
242      FREE(qbo);
243   }
244   for (unsigned i = 0; i < ARRAY_SIZE(query->xfb_query_pool); i++) {
245      if (query->xfb_query_pool[i])
246         VKSCR(DestroyQueryPool)(screen->dev, query->xfb_query_pool[i], NULL);
247   }
248   pipe_resource_reference((struct pipe_resource**)&query->predicate, NULL);
249   FREE(query);
250}
251
252static void
253reset_qbo(struct zink_query *q)
254{
255   q->curr_qbo = list_first_entry(&q->buffers, struct zink_query_buffer, list);
256   q->curr_qbo->num_results = 0;
257}
258
259static struct pipe_query *
260zink_create_query(struct pipe_context *pctx,
261                  unsigned query_type, unsigned index)
262{
263   struct zink_screen *screen = zink_screen(pctx->screen);
264   struct zink_query *query = CALLOC_STRUCT(zink_query);
265   VkQueryPoolCreateInfo pool_create = {0};
266
267   if (!query)
268      return NULL;
269   list_inithead(&query->buffers);
270
271   query->index = index;
272   query->type = query_type;
273   if (query->type == PIPE_QUERY_GPU_FINISHED)
274      return (struct pipe_query *)query;
275   query->vkqtype = convert_query_type(query_type, &query->precise);
276   if (query->vkqtype == -1)
277      return NULL;
278
279   assert(!query->precise || query->vkqtype == VK_QUERY_TYPE_OCCLUSION);
280
281   query->curr_query = 0;
282
283   pool_create.sType = VK_STRUCTURE_TYPE_QUERY_POOL_CREATE_INFO;
284   pool_create.queryType = query->vkqtype;
285   pool_create.queryCount = NUM_QUERIES;
286   if (query_type == PIPE_QUERY_PRIMITIVES_GENERATED)
287     pool_create.pipelineStatistics = VK_QUERY_PIPELINE_STATISTIC_GEOMETRY_SHADER_PRIMITIVES_BIT |
288                                      VK_QUERY_PIPELINE_STATISTIC_INPUT_ASSEMBLY_PRIMITIVES_BIT;
289   else if (query_type == PIPE_QUERY_PIPELINE_STATISTICS_SINGLE)
290      pool_create.pipelineStatistics = pipeline_statistic_convert(index);
291
292   VkResult status = VKSCR(CreateQueryPool)(screen->dev, &pool_create, NULL, &query->query_pool);
293   if (status != VK_SUCCESS)
294      goto fail;
295   if (query_type == PIPE_QUERY_PRIMITIVES_GENERATED) {
296      /* if xfb is active, we need to use an xfb query, otherwise we need pipeline statistics */
297      pool_create.sType = VK_STRUCTURE_TYPE_QUERY_POOL_CREATE_INFO;
298      pool_create.queryType = VK_QUERY_TYPE_TRANSFORM_FEEDBACK_STREAM_EXT;
299      pool_create.queryCount = NUM_QUERIES;
300
301      status = VKSCR(CreateQueryPool)(screen->dev, &pool_create, NULL, &query->xfb_query_pool[0]);
302      if (status != VK_SUCCESS)
303         goto fail;
304   } else if (query_type == PIPE_QUERY_SO_OVERFLOW_ANY_PREDICATE) {
305      /* need to monitor all xfb streams */
306      for (unsigned i = 0; i < ARRAY_SIZE(query->xfb_query_pool); i++) {
307         status = VKSCR(CreateQueryPool)(screen->dev, &pool_create, NULL, &query->xfb_query_pool[i]);
308         if (status != VK_SUCCESS)
309            goto fail;
310      }
311   }
312   if (!qbo_append(pctx->screen, query))
313      goto fail;
314   struct zink_batch *batch = &zink_context(pctx)->batch;
315   batch->has_work = true;
316   query->needs_reset = true;
317   if (query->type == PIPE_QUERY_TIMESTAMP) {
318      query->active = true;
319      /* defer pool reset until end_query since we're guaranteed to be threadsafe then */
320      reset_qbo(query);
321   }
322   return (struct pipe_query *)query;
323fail:
324   destroy_query(screen, query);
325   return NULL;
326}
327
328static void
329zink_destroy_query(struct pipe_context *pctx,
330                   struct pipe_query *q)
331{
332   struct zink_screen *screen = zink_screen(pctx->screen);
333   struct zink_query *query = (struct zink_query *)q;
334
335   /* only destroy if this query isn't active on any batches,
336    * otherwise just mark dead and wait
337    */
338   if (query->batch_id) {
339      p_atomic_set(&query->dead, true);
340      return;
341   }
342
343   destroy_query(screen, query);
344}
345
346void
347zink_prune_query(struct zink_screen *screen, struct zink_batch_state *bs, struct zink_query *query)
348{
349   if (!zink_batch_usage_matches(query->batch_id, bs))
350      return;
351   query->batch_id = NULL;
352   if (p_atomic_read(&query->dead))
353      destroy_query(screen, query);
354}
355
356static void
357check_query_results(struct zink_query *query, union pipe_query_result *result,
358                    int num_results, uint64_t *results, uint64_t *xfb_results)
359{
360   uint64_t last_val = 0;
361   int result_size = get_num_results(query->type);
362   for (int i = 0; i < num_results * result_size; i += result_size) {
363      switch (query->type) {
364      case PIPE_QUERY_OCCLUSION_PREDICATE:
365      case PIPE_QUERY_OCCLUSION_PREDICATE_CONSERVATIVE:
366      case PIPE_QUERY_GPU_FINISHED:
367         result->b |= results[i] != 0;
368         break;
369
370      case PIPE_QUERY_TIME_ELAPSED:
371      case PIPE_QUERY_TIMESTAMP:
372         /* the application can sum the differences between all N queries to determine the total execution time.
373          * - 17.5. Timestamp Queries
374          */
375         if (query->type != PIPE_QUERY_TIME_ELAPSED || i)
376            result->u64 += results[i] - last_val;
377         last_val = results[i];
378         break;
379      case PIPE_QUERY_OCCLUSION_COUNTER:
380         result->u64 += results[i];
381         break;
382      case PIPE_QUERY_PRIMITIVES_GENERATED:
383         if (query->have_xfb[query->last_start + i / 2] || query->index)
384            result->u64 += xfb_results[i + 1];
385         else
386            /* if a given draw had a geometry shader, we need to use the second result */
387            result->u64 += results[i + query->have_gs[query->last_start + i / 2]];
388         break;
389      case PIPE_QUERY_PRIMITIVES_EMITTED:
390         /* A query pool created with this type will capture 2 integers -
391          * numPrimitivesWritten and numPrimitivesNeeded -
392          * for the specified vertex stream output from the last vertex processing stage.
393          * - from VK_EXT_transform_feedback spec
394          */
395         result->u64 += results[i];
396         break;
397      case PIPE_QUERY_SO_OVERFLOW_ANY_PREDICATE:
398      case PIPE_QUERY_SO_OVERFLOW_PREDICATE:
399         /* A query pool created with this type will capture 2 integers -
400          * numPrimitivesWritten and numPrimitivesNeeded -
401          * for the specified vertex stream output from the last vertex processing stage.
402          * - from VK_EXT_transform_feedback spec
403          */
404         if (query->have_xfb[query->last_start + i / 2])
405            result->b |= results[i] != results[i + 1];
406         break;
407      case PIPE_QUERY_PIPELINE_STATISTICS_SINGLE:
408         result->u64 += results[i];
409         break;
410
411      default:
412         debug_printf("unhandled query type: %s\n",
413                      util_str_query_type(query->type, true));
414         unreachable("unexpected query type");
415      }
416   }
417}
418
419static bool
420get_query_result(struct pipe_context *pctx,
421                      struct pipe_query *q,
422                      bool wait,
423                      union pipe_query_result *result)
424{
425   struct zink_screen *screen = zink_screen(pctx->screen);
426   struct zink_query *query = (struct zink_query *)q;
427   unsigned flags = PIPE_MAP_READ;
428
429   if (!wait)
430      flags |= PIPE_MAP_DONTBLOCK;
431   if (query->base.flushed)
432      /* this is not a context-safe operation; ensure map doesn't use slab alloc */
433      flags |= PIPE_MAP_THREAD_SAFE;
434
435   util_query_clear_result(result, query->type);
436
437   int num_results = query->curr_query - query->last_start;
438   int result_size = get_num_results(query->type) * sizeof(uint64_t);
439
440   struct zink_query_buffer *qbo;
441   struct pipe_transfer *xfer;
442   LIST_FOR_EACH_ENTRY(qbo, &query->buffers, list) {
443      uint64_t *xfb_results = NULL;
444      uint64_t *results;
445      bool is_timestamp = query->type == PIPE_QUERY_TIMESTAMP || query->type == PIPE_QUERY_TIMESTAMP_DISJOINT;
446      if (!qbo->num_results)
447         continue;
448      results = pipe_buffer_map_range(pctx, qbo->buffer, 0,
449                                      (is_timestamp ? 1 : qbo->num_results) * result_size, flags, &xfer);
450      if (!results) {
451         if (wait)
452            debug_printf("zink: qbo read failed!");
453         return false;
454      }
455      struct pipe_transfer *xfb_xfer = NULL;
456      if (query->type == PIPE_QUERY_PRIMITIVES_GENERATED) {
457         xfb_results = pipe_buffer_map_range(pctx, qbo->xfb_buffers[0], 0,
458                                         qbo->num_results * result_size, flags, &xfb_xfer);
459         if (!xfb_results) {
460            if (wait)
461               debug_printf("zink: xfb qbo read failed!");
462            pipe_buffer_unmap(pctx, xfer);
463            return false;
464         }
465      }
466      check_query_results(query, result, is_timestamp ? 1 : qbo->num_results, results, xfb_results);
467      pipe_buffer_unmap(pctx, xfer);
468      if (xfb_xfer)
469         pipe_buffer_unmap(pctx, xfb_xfer);
470      if (query->type == PIPE_QUERY_SO_OVERFLOW_ANY_PREDICATE) {
471         for (unsigned i = 0; i < ARRAY_SIZE(qbo->xfb_buffers) && !result->b; i++) {
472            uint64_t *results = pipe_buffer_map_range(pctx, qbo->xfb_buffers[i],
473                                              0,
474                                              qbo->num_results * result_size, flags, &xfer);
475            if (!results) {
476               if (wait)
477                  debug_printf("zink: qbo read failed!");
478               return false;
479            }
480            check_query_results(query, result, num_results, results, xfb_results);
481            pipe_buffer_unmap(pctx, xfer);
482         }
483         /* if overflow is detected we can stop */
484         if (result->b)
485            break;
486      }
487   }
488
489   if (is_time_query(query))
490      timestamp_to_nanoseconds(screen, &result->u64);
491
492   return true;
493}
494
495static void
496force_cpu_read(struct zink_context *ctx, struct pipe_query *pquery, enum pipe_query_value_type result_type, struct pipe_resource *pres, unsigned offset)
497{
498   struct pipe_context *pctx = &ctx->base;
499   unsigned result_size = result_type <= PIPE_QUERY_TYPE_U32 ? sizeof(uint32_t) : sizeof(uint64_t);
500   struct zink_query *query = (struct zink_query*)pquery;
501   union pipe_query_result result;
502
503   if (query->needs_update)
504      update_qbo(ctx, query);
505
506   bool success = get_query_result(pctx, pquery, true, &result);
507   if (!success) {
508      debug_printf("zink: getting query result failed\n");
509      return;
510   }
511
512   if (result_type <= PIPE_QUERY_TYPE_U32) {
513      uint32_t u32;
514      uint32_t limit;
515      if (result_type == PIPE_QUERY_TYPE_I32)
516         limit = INT_MAX;
517      else
518         limit = UINT_MAX;
519      if (is_bool_query(query))
520         u32 = result.b;
521      else
522         u32 = MIN2(limit, result.u64);
523      pipe_buffer_write(pctx, pres, offset, result_size, &u32);
524   } else {
525      uint64_t u64;
526      if (is_bool_query(query))
527         u64 = result.b;
528      else
529         u64 = result.u64;
530      pipe_buffer_write(pctx, pres, offset, result_size, &u64);
531   }
532}
533
534static void
535copy_pool_results_to_buffer(struct zink_context *ctx, struct zink_query *query, VkQueryPool pool,
536                            unsigned query_id, struct zink_resource *res, unsigned offset,
537                            int num_results, VkQueryResultFlags flags)
538{
539   struct zink_batch *batch = &ctx->batch;
540   unsigned type_size = (flags & VK_QUERY_RESULT_64_BIT) ? sizeof(uint64_t) : sizeof(uint32_t);
541   unsigned base_result_size = get_num_results(query->type) * type_size;
542   unsigned result_size = base_result_size * num_results;
543   if (flags & VK_QUERY_RESULT_WITH_AVAILABILITY_BIT)
544      result_size += type_size;
545   zink_batch_no_rp(ctx);
546   /* if it's a single query that doesn't need special handling, we can copy it and be done */
547   zink_batch_reference_resource_rw(batch, res, true);
548   zink_resource_buffer_barrier(ctx, res, VK_ACCESS_TRANSFER_WRITE_BIT, 0);
549   util_range_add(&res->base.b, &res->valid_buffer_range, offset, offset + result_size);
550   assert(query_id < NUM_QUERIES);
551   VKCTX(CmdCopyQueryPoolResults)(batch->state->cmdbuf, pool, query_id, num_results, res->obj->buffer,
552                             offset, base_result_size, flags);
553}
554
555static void
556copy_results_to_buffer(struct zink_context *ctx, struct zink_query *query, struct zink_resource *res, unsigned offset, int num_results, VkQueryResultFlags flags)
557{
558   copy_pool_results_to_buffer(ctx, query, query->query_pool, query->last_start, res, offset, num_results, flags);
559}
560
561static void
562reset_pool(struct zink_context *ctx, struct zink_batch *batch, struct zink_query *q)
563{
564   /* This command must only be called outside of a render pass instance
565    *
566    * - vkCmdResetQueryPool spec
567    */
568   zink_batch_no_rp(ctx);
569   if (q->needs_update)
570      update_qbo(ctx, q);
571
572   VKCTX(CmdResetQueryPool)(batch->state->cmdbuf, q->query_pool, 0, NUM_QUERIES);
573   if (q->type == PIPE_QUERY_PRIMITIVES_GENERATED)
574      VKCTX(CmdResetQueryPool)(batch->state->cmdbuf, q->xfb_query_pool[0], 0, NUM_QUERIES);
575   else if (q->type == PIPE_QUERY_SO_OVERFLOW_ANY_PREDICATE) {
576      for (unsigned i = 0; i < ARRAY_SIZE(q->xfb_query_pool); i++)
577         VKCTX(CmdResetQueryPool)(batch->state->cmdbuf, q->xfb_query_pool[i], 0, NUM_QUERIES);
578   }
579   memset(q->have_gs, 0, sizeof(q->have_gs));
580   memset(q->have_xfb, 0, sizeof(q->have_xfb));
581   q->last_start = q->curr_query = 0;
582   q->needs_reset = false;
583   /* create new qbo for non-timestamp queries:
584    * timestamp queries should never need more than 2 entries in the qbo
585    */
586   if (q->type == PIPE_QUERY_TIMESTAMP)
587      return;
588   if (qbo_append(ctx->base.screen, q))
589      reset_qbo(q);
590   else
591      debug_printf("zink: qbo alloc failed on reset!");
592}
593
594static inline unsigned
595get_buffer_offset(struct zink_query *q, struct pipe_resource *pres, unsigned query_id)
596{
597   return (query_id - q->last_start) * get_num_results(q->type) * sizeof(uint64_t);
598}
599
600static void
601update_qbo(struct zink_context *ctx, struct zink_query *q)
602{
603   struct zink_query_buffer *qbo = q->curr_qbo;
604   unsigned offset = 0;
605   uint32_t query_id = q->curr_query - 1;
606   bool is_timestamp = q->type == PIPE_QUERY_TIMESTAMP || q->type == PIPE_QUERY_TIMESTAMP_DISJOINT;
607   /* timestamp queries just write to offset 0 always */
608   if (!is_timestamp)
609      offset = get_buffer_offset(q, qbo->buffer, query_id);
610   copy_pool_results_to_buffer(ctx, q, q->query_pool, query_id, zink_resource(qbo->buffer),
611                          offset,
612                          1, VK_QUERY_RESULT_64_BIT);
613
614   if (q->type == PIPE_QUERY_PRIMITIVES_EMITTED ||
615       q->type == PIPE_QUERY_PRIMITIVES_GENERATED ||
616       q->type == PIPE_QUERY_SO_OVERFLOW_PREDICATE) {
617      copy_pool_results_to_buffer(ctx, q,
618                                  q->xfb_query_pool[0] ? q->xfb_query_pool[0] : q->query_pool,
619                                  query_id,
620                                  zink_resource(qbo->xfb_buffers[0] ? qbo->xfb_buffers[0] : qbo->buffer),
621                             get_buffer_offset(q, qbo->xfb_buffers[0] ? qbo->xfb_buffers[0] : qbo->buffer, query_id),
622                             1, VK_QUERY_RESULT_64_BIT);
623   }
624
625   else if (q->type == PIPE_QUERY_SO_OVERFLOW_ANY_PREDICATE) {
626      for (unsigned i = 0; i < ARRAY_SIZE(q->xfb_query_pool); i++) {
627         copy_pool_results_to_buffer(ctx, q, q->xfb_query_pool[i], query_id, zink_resource(qbo->xfb_buffers[i]),
628                                get_buffer_offset(q, qbo->xfb_buffers[i], query_id),
629                                1, VK_QUERY_RESULT_64_BIT);
630      }
631   }
632
633   if (!is_timestamp)
634      q->curr_qbo->num_results++;
635   else
636      q->curr_qbo->num_results = 1;
637   q->needs_update = false;
638}
639
640static void
641begin_query(struct zink_context *ctx, struct zink_batch *batch, struct zink_query *q)
642{
643   VkQueryControlFlags flags = 0;
644
645   q->predicate_dirty = true;
646   if (q->needs_reset)
647      reset_pool(ctx, batch, q);
648   assert(q->curr_query < NUM_QUERIES);
649   q->active = true;
650   batch->has_work = true;
651   if (q->type == PIPE_QUERY_TIME_ELAPSED) {
652      VKCTX(CmdWriteTimestamp)(batch->state->cmdbuf, VK_PIPELINE_STAGE_TOP_OF_PIPE_BIT, q->query_pool, q->curr_query);
653      q->curr_query++;
654      update_qbo(ctx, q);
655      zink_batch_usage_set(&q->batch_id, batch->state);
656      _mesa_set_add(batch->state->active_queries, q);
657   }
658   /* ignore the rest of begin_query for timestamps */
659   if (is_time_query(q))
660      return;
661   if (q->precise)
662      flags |= VK_QUERY_CONTROL_PRECISE_BIT;
663   if (q->type == PIPE_QUERY_PRIMITIVES_EMITTED ||
664       q->type == PIPE_QUERY_PRIMITIVES_GENERATED ||
665       q->type == PIPE_QUERY_SO_OVERFLOW_PREDICATE) {
666      VKCTX(CmdBeginQueryIndexedEXT)(batch->state->cmdbuf,
667                                     q->xfb_query_pool[0] ? q->xfb_query_pool[0] : q->query_pool,
668                                     q->curr_query,
669                                     flags,
670                                     q->index);
671      q->xfb_running = true;
672   } else if (q->type == PIPE_QUERY_SO_OVERFLOW_ANY_PREDICATE) {
673      VKCTX(CmdBeginQueryIndexedEXT)(batch->state->cmdbuf,
674                                     q->query_pool,
675                                     q->curr_query,
676                                     flags,
677                                     0);
678      for (unsigned i = 0; i < ARRAY_SIZE(q->xfb_query_pool); i++)
679         VKCTX(CmdBeginQueryIndexedEXT)(batch->state->cmdbuf,
680                                        q->xfb_query_pool[i],
681                                        q->curr_query,
682                                        flags,
683                                        i + 1);
684      q->xfb_running = true;
685   }
686   if (q->vkqtype != VK_QUERY_TYPE_TRANSFORM_FEEDBACK_STREAM_EXT)
687      VKCTX(CmdBeginQuery)(batch->state->cmdbuf, q->query_pool, q->curr_query, flags);
688   if (needs_stats_list(q))
689      list_addtail(&q->stats_list, &ctx->primitives_generated_queries);
690   zink_batch_usage_set(&q->batch_id, batch->state);
691   _mesa_set_add(batch->state->active_queries, q);
692}
693
694static bool
695zink_begin_query(struct pipe_context *pctx,
696                 struct pipe_query *q)
697{
698   struct zink_query *query = (struct zink_query *)q;
699   struct zink_context *ctx = zink_context(pctx);
700   struct zink_batch *batch = &ctx->batch;
701
702   query->last_start = query->curr_query;
703   /* drop all past results */
704   reset_qbo(query);
705
706   begin_query(ctx, batch, query);
707
708   return true;
709}
710
711static void
712update_query_id(struct zink_context *ctx, struct zink_query *q)
713{
714   if (++q->curr_query == NUM_QUERIES) {
715      /* always reset on start; this ensures we can actually submit the batch that the current query is on */
716      q->needs_reset = true;
717   }
718   ctx->batch.has_work = true;
719
720   if (ctx->batch.in_rp)
721      q->needs_update = true;
722   else
723      update_qbo(ctx, q);
724}
725
726static void
727end_query(struct zink_context *ctx, struct zink_batch *batch, struct zink_query *q)
728{
729   ASSERTED struct zink_query_buffer *qbo = q->curr_qbo;
730   assert(qbo);
731   assert(!is_time_query(q));
732   q->active = false;
733   if (q->type == PIPE_QUERY_PRIMITIVES_EMITTED ||
734            q->type == PIPE_QUERY_PRIMITIVES_GENERATED ||
735            q->type == PIPE_QUERY_SO_OVERFLOW_PREDICATE) {
736      VKCTX(CmdEndQueryIndexedEXT)(batch->state->cmdbuf,
737                                   q->xfb_query_pool[0] ? q->xfb_query_pool[0] : q->query_pool,
738                                   q->curr_query, q->index);
739   }
740
741   else if (q->type == PIPE_QUERY_SO_OVERFLOW_ANY_PREDICATE) {
742      VKCTX(CmdEndQueryIndexedEXT)(batch->state->cmdbuf, q->query_pool, q->curr_query, 0);
743      for (unsigned i = 0; i < ARRAY_SIZE(q->xfb_query_pool); i++) {
744         VKCTX(CmdEndQueryIndexedEXT)(batch->state->cmdbuf, q->xfb_query_pool[i], q->curr_query, i + 1);
745      }
746   }
747   if (q->vkqtype != VK_QUERY_TYPE_TRANSFORM_FEEDBACK_STREAM_EXT && !is_time_query(q))
748      VKCTX(CmdEndQuery)(batch->state->cmdbuf, q->query_pool, q->curr_query);
749
750   if (needs_stats_list(q))
751      list_delinit(&q->stats_list);
752
753   update_query_id(ctx, q);
754}
755
756static bool
757zink_end_query(struct pipe_context *pctx,
758               struct pipe_query *q)
759{
760   struct zink_context *ctx = zink_context(pctx);
761   struct zink_query *query = (struct zink_query *)q;
762   struct zink_batch *batch = &ctx->batch;
763
764   if (query->type == PIPE_QUERY_GPU_FINISHED) {
765      pctx->flush(pctx, &query->fence, PIPE_FLUSH_DEFERRED);
766      return true;
767   }
768
769   /* FIXME: this can be called from a thread, but it needs to write to the cmdbuf */
770   threaded_context_unwrap_sync(pctx);
771
772   if (needs_stats_list(query))
773      list_delinit(&query->stats_list);
774   if (is_time_query(query)) {
775      if (query->needs_reset)
776         reset_pool(ctx, batch, query);
777      VKCTX(CmdWriteTimestamp)(batch->state->cmdbuf, VK_PIPELINE_STAGE_BOTTOM_OF_PIPE_BIT,
778                          query->query_pool, query->curr_query);
779      zink_batch_usage_set(&query->batch_id, batch->state);
780      _mesa_set_add(batch->state->active_queries, query);
781      update_query_id(ctx, query);
782   } else if (query->active)
783      end_query(ctx, batch, query);
784
785   return true;
786}
787
788static bool
789zink_get_query_result(struct pipe_context *pctx,
790                      struct pipe_query *q,
791                      bool wait,
792                      union pipe_query_result *result)
793{
794   struct zink_query *query = (void*)q;
795   struct zink_context *ctx = zink_context(pctx);
796
797   if (query->type == PIPE_QUERY_GPU_FINISHED) {
798      struct pipe_screen *screen = pctx->screen;
799
800      result->b = screen->fence_finish(screen, query->base.flushed ? NULL : pctx,
801                                        query->fence, wait ? PIPE_TIMEOUT_INFINITE : 0);
802      return result->b;
803   }
804
805   if (query->needs_update)
806      update_qbo(ctx, query);
807
808   if (zink_batch_usage_is_unflushed(query->batch_id)) {
809      if (!threaded_query(q)->flushed)
810         pctx->flush(pctx, NULL, 0);
811      if (!wait)
812         return false;
813   } else if (!threaded_query(q)->flushed &&
814              /* timeline drivers can wait during buffer map */
815              !zink_screen(pctx->screen)->info.have_KHR_timeline_semaphore)
816      zink_batch_usage_check_completion(ctx, query->batch_id);
817
818   return get_query_result(pctx, q, wait, result);
819}
820
821void
822zink_suspend_queries(struct zink_context *ctx, struct zink_batch *batch)
823{
824   set_foreach(batch->state->active_queries, entry) {
825      struct zink_query *query = (void*)entry->key;
826      /* if a query isn't active here then we don't need to reactivate it on the next batch */
827      if (query->active && !is_time_query(query)) {
828         end_query(ctx, batch, query);
829         /* the fence is going to steal the set off the batch, so we have to copy
830          * the active queries onto a list
831          */
832         list_addtail(&query->active_list, &ctx->suspended_queries);
833      }
834      if (query->needs_update)
835         update_qbo(ctx, query);
836      if (query->last_start && query->curr_query > NUM_QUERIES / 2)
837         reset_pool(ctx, batch, query);
838   }
839}
840
841void
842zink_resume_queries(struct zink_context *ctx, struct zink_batch *batch)
843{
844   struct zink_query *query, *next;
845   LIST_FOR_EACH_ENTRY_SAFE(query, next, &ctx->suspended_queries, active_list) {
846      begin_query(ctx, batch, query);
847      list_delinit(&query->active_list);
848   }
849}
850
851void
852zink_query_update_gs_states(struct zink_context *ctx)
853{
854   struct zink_query *query;
855   LIST_FOR_EACH_ENTRY(query, &ctx->primitives_generated_queries, stats_list) {
856      assert(query->curr_query < ARRAY_SIZE(query->have_gs));
857      assert(query->active);
858      query->have_gs[query->curr_query] = !!ctx->gfx_stages[PIPE_SHADER_GEOMETRY];
859      query->have_xfb[query->curr_query] = !!ctx->num_so_targets;
860   }
861}
862
863static void
864zink_set_active_query_state(struct pipe_context *pctx, bool enable)
865{
866   struct zink_context *ctx = zink_context(pctx);
867   ctx->queries_disabled = !enable;
868
869   struct zink_batch *batch = &ctx->batch;
870   if (ctx->queries_disabled)
871      zink_suspend_queries(ctx, batch);
872   else
873      zink_resume_queries(ctx, batch);
874}
875
876void
877zink_start_conditional_render(struct zink_context *ctx)
878{
879   if (unlikely(!zink_screen(ctx->base.screen)->info.have_EXT_conditional_rendering))
880      return;
881   struct zink_batch *batch = &ctx->batch;
882   VkConditionalRenderingFlagsEXT begin_flags = 0;
883   if (ctx->render_condition.inverted)
884      begin_flags = VK_CONDITIONAL_RENDERING_INVERTED_BIT_EXT;
885   VkConditionalRenderingBeginInfoEXT begin_info = {0};
886   begin_info.sType = VK_STRUCTURE_TYPE_CONDITIONAL_RENDERING_BEGIN_INFO_EXT;
887   begin_info.buffer = ctx->render_condition.query->predicate->obj->buffer;
888   begin_info.flags = begin_flags;
889   VKCTX(CmdBeginConditionalRenderingEXT)(batch->state->cmdbuf, &begin_info);
890   zink_batch_reference_resource_rw(batch, ctx->render_condition.query->predicate, false);
891}
892
893void
894zink_stop_conditional_render(struct zink_context *ctx)
895{
896   struct zink_batch *batch = &ctx->batch;
897   zink_clear_apply_conditionals(ctx);
898   if (unlikely(!zink_screen(ctx->base.screen)->info.have_EXT_conditional_rendering))
899      return;
900   VKCTX(CmdEndConditionalRenderingEXT)(batch->state->cmdbuf);
901}
902
903bool
904zink_check_conditional_render(struct zink_context *ctx)
905{
906   if (!ctx->render_condition_active)
907      return true;
908   assert(ctx->render_condition.query);
909
910   union pipe_query_result result;
911   zink_get_query_result(&ctx->base, (struct pipe_query*)ctx->render_condition.query, true, &result);
912   return is_bool_query(ctx->render_condition.query) ?
913          ctx->render_condition.inverted != result.b :
914          ctx->render_condition.inverted != !!result.u64;
915}
916
917static void
918zink_render_condition(struct pipe_context *pctx,
919                      struct pipe_query *pquery,
920                      bool condition,
921                      enum pipe_render_cond_flag mode)
922{
923   struct zink_context *ctx = zink_context(pctx);
924   struct zink_query *query = (struct zink_query *)pquery;
925   zink_batch_no_rp(ctx);
926   VkQueryResultFlagBits flags = 0;
927
928   if (query == NULL) {
929      /* force conditional clears if they exist */
930      if (ctx->clears_enabled && !ctx->batch.in_rp)
931         zink_batch_rp(ctx);
932      if (ctx->batch.in_rp)
933         zink_stop_conditional_render(ctx);
934      ctx->render_condition_active = false;
935      ctx->render_condition.query = NULL;
936      return;
937   }
938
939   if (!query->predicate) {
940      struct pipe_resource *pres;
941
942      /* need to create a vulkan buffer to copy the data into */
943      pres = pipe_buffer_create(pctx->screen, PIPE_BIND_QUERY_BUFFER, PIPE_USAGE_DEFAULT, sizeof(uint64_t));
944      if (!pres)
945         return;
946
947      query->predicate = zink_resource(pres);
948   }
949   if (query->predicate_dirty) {
950      struct zink_resource *res = query->predicate;
951
952      if (mode == PIPE_RENDER_COND_WAIT || mode == PIPE_RENDER_COND_BY_REGION_WAIT)
953         flags |= VK_QUERY_RESULT_WAIT_BIT;
954
955      flags |= VK_QUERY_RESULT_64_BIT;
956      int num_results = query->curr_query - query->last_start;
957      if (query->type != PIPE_QUERY_PRIMITIVES_GENERATED &&
958          !is_so_overflow_query(query)) {
959         copy_results_to_buffer(ctx, query, res, 0, num_results, flags);
960      } else {
961         /* these need special handling */
962         force_cpu_read(ctx, pquery, PIPE_QUERY_TYPE_U32, &res->base.b, 0);
963      }
964      query->predicate_dirty = false;
965   }
966   ctx->render_condition.inverted = condition;
967   ctx->render_condition_active = true;
968   ctx->render_condition.query = query;
969   if (ctx->batch.in_rp)
970      zink_start_conditional_render(ctx);
971}
972
973static void
974zink_get_query_result_resource(struct pipe_context *pctx,
975                               struct pipe_query *pquery,
976                               bool wait,
977                               enum pipe_query_value_type result_type,
978                               int index,
979                               struct pipe_resource *pres,
980                               unsigned offset)
981{
982   struct zink_context *ctx = zink_context(pctx);
983   struct zink_screen *screen = zink_screen(pctx->screen);
984   struct zink_query *query = (struct zink_query*)pquery;
985   struct zink_resource *res = zink_resource(pres);
986   unsigned result_size = result_type <= PIPE_QUERY_TYPE_U32 ? sizeof(uint32_t) : sizeof(uint64_t);
987   VkQueryResultFlagBits size_flags = result_type <= PIPE_QUERY_TYPE_U32 ? 0 : VK_QUERY_RESULT_64_BIT;
988   unsigned num_queries = query->curr_query - query->last_start;
989   unsigned query_id = query->last_start;
990
991   if (index == -1) {
992      /* VK_QUERY_RESULT_WITH_AVAILABILITY_BIT will ALWAYS write some kind of result data
993       * in addition to the availability result, which is a problem if we're just trying to get availability data
994       *
995       * if we know that there's no valid buffer data in the preceding buffer range, then we can just
996       * stomp on it with a glorious queued buffer copy instead of forcing a stall to manually write to the
997       * buffer
998       */
999
1000      VkQueryResultFlags flag = is_time_query(query) ? 0 : VK_QUERY_RESULT_PARTIAL_BIT;
1001      unsigned src_offset = result_size * get_num_results(query->type);
1002      if (zink_batch_usage_check_completion(ctx, query->batch_id)) {
1003         uint64_t u64[4] = {0};
1004         if (VKCTX(GetQueryPoolResults)(screen->dev, query->query_pool, query_id, 1, sizeof(u64), u64,
1005                                   0, size_flags | VK_QUERY_RESULT_WITH_AVAILABILITY_BIT | flag) == VK_SUCCESS) {
1006            pipe_buffer_write(pctx, pres, offset, result_size, (unsigned char*)u64 + src_offset);
1007            return;
1008         }
1009      }
1010      struct pipe_resource *staging = pipe_buffer_create(pctx->screen, 0, PIPE_USAGE_STAGING, src_offset + result_size);
1011      copy_results_to_buffer(ctx, query, zink_resource(staging), 0, 1, size_flags | VK_QUERY_RESULT_WITH_AVAILABILITY_BIT | flag);
1012      zink_copy_buffer(ctx, res, zink_resource(staging), offset, result_size * get_num_results(query->type), result_size);
1013      pipe_resource_reference(&staging, NULL);
1014      return;
1015   }
1016
1017   if (!is_time_query(query) && !is_bool_query(query)) {
1018      if (num_queries == 1 && query->type != PIPE_QUERY_PRIMITIVES_GENERATED &&
1019                              query->type != PIPE_QUERY_PRIMITIVES_EMITTED &&
1020                              !is_bool_query(query)) {
1021         if (size_flags == VK_QUERY_RESULT_64_BIT) {
1022            if (query->needs_update)
1023               update_qbo(ctx, query);
1024            /* internal qbo always writes 64bit value so we can just direct copy */
1025            zink_copy_buffer(ctx, res, zink_resource(query->curr_qbo->buffer), offset,
1026                             get_buffer_offset(query, query->curr_qbo->buffer, query->last_start),
1027                             result_size);
1028         } else
1029            /* have to do a new copy for 32bit */
1030            copy_results_to_buffer(ctx, query, res, offset, 1, size_flags);
1031         return;
1032      }
1033   }
1034
1035   /* TODO: use CS to aggregate results */
1036
1037   /* unfortunately, there's no way to accumulate results from multiple queries on the gpu without either
1038    * clobbering all but the last result or writing the results sequentially, so we have to manually write the result
1039    */
1040   force_cpu_read(ctx, pquery, result_type, pres, offset);
1041}
1042
1043static uint64_t
1044zink_get_timestamp(struct pipe_context *pctx)
1045{
1046   struct zink_screen *screen = zink_screen(pctx->screen);
1047   uint64_t timestamp, deviation;
1048   assert(screen->info.have_EXT_calibrated_timestamps);
1049   VkCalibratedTimestampInfoEXT cti = {0};
1050   cti.sType = VK_STRUCTURE_TYPE_CALIBRATED_TIMESTAMP_INFO_EXT;
1051   cti.timeDomain = VK_TIME_DOMAIN_DEVICE_EXT;
1052   VKSCR(GetCalibratedTimestampsEXT)(screen->dev, 1, &cti, &timestamp, &deviation);
1053   timestamp_to_nanoseconds(screen, &timestamp);
1054   return timestamp;
1055}
1056
1057void
1058zink_context_query_init(struct pipe_context *pctx)
1059{
1060   struct zink_context *ctx = zink_context(pctx);
1061   list_inithead(&ctx->suspended_queries);
1062   list_inithead(&ctx->primitives_generated_queries);
1063
1064   pctx->create_query = zink_create_query;
1065   pctx->destroy_query = zink_destroy_query;
1066   pctx->begin_query = zink_begin_query;
1067   pctx->end_query = zink_end_query;
1068   pctx->get_query_result = zink_get_query_result;
1069   pctx->get_query_result_resource = zink_get_query_result_resource;
1070   pctx->set_active_query_state = zink_set_active_query_state;
1071   pctx->render_condition = zink_render_condition;
1072   pctx->get_timestamp = zink_get_timestamp;
1073}
1074