d3d12_query.cpp revision 7ec681f3
1/*
2 * Copyright © Microsoft Corporation
3 *
4 * Permission is hereby granted, free of charge, to any person obtaining a
5 * copy of this software and associated documentation files (the "Software"),
6 * to deal in the Software without restriction, including without limitation
7 * the rights to use, copy, modify, merge, publish, distribute, sublicense,
8 * and/or sell copies of the Software, and to permit persons to whom the
9 * Software is furnished to do so, subject to the following conditions:
10 *
11 * The above copyright notice and this permission notice (including the next
12 * paragraph) shall be included in all copies or substantial portions of the
13 * Software.
14 *
15 * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
16 * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
17 * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT.  IN NO EVENT SHALL
18 * THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
19 * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING
20 * FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS
21 * IN THE SOFTWARE.
22 */
23
24#include "d3d12_query.h"
25#include "d3d12_context.h"
26#include "d3d12_resource.h"
27#include "d3d12_screen.h"
28
29#include "util/u_dump.h"
30#include "util/u_inlines.h"
31#include "util/u_memory.h"
32
33#include <dxguids/dxguids.h>
34
35struct d3d12_query {
36   enum pipe_query_type type;
37
38   ID3D12QueryHeap *query_heap;
39   unsigned curr_query, num_queries;
40   size_t query_size;
41   struct d3d12_query *subquery;
42
43   D3D12_QUERY_TYPE d3d12qtype;
44
45   pipe_resource *buffer;
46   unsigned buffer_offset;
47   uint64_t fence_value;
48
49   struct list_head active_list;
50   struct d3d12_resource *predicate;
51};
52
53static D3D12_QUERY_HEAP_TYPE
54d3d12_query_heap_type(unsigned query_type)
55{
56   switch (query_type) {
57   case PIPE_QUERY_OCCLUSION_COUNTER:
58   case PIPE_QUERY_OCCLUSION_PREDICATE:
59   case PIPE_QUERY_OCCLUSION_PREDICATE_CONSERVATIVE:
60      return D3D12_QUERY_HEAP_TYPE_OCCLUSION;
61   case PIPE_QUERY_PIPELINE_STATISTICS:
62      return D3D12_QUERY_HEAP_TYPE_PIPELINE_STATISTICS;
63   case PIPE_QUERY_PRIMITIVES_GENERATED:
64   case PIPE_QUERY_PRIMITIVES_EMITTED:
65   case PIPE_QUERY_SO_STATISTICS:
66      return D3D12_QUERY_HEAP_TYPE_SO_STATISTICS;
67   case PIPE_QUERY_TIMESTAMP:
68   case PIPE_QUERY_TIME_ELAPSED:
69      return D3D12_QUERY_HEAP_TYPE_TIMESTAMP;
70
71   default:
72      debug_printf("unknown query: %s\n",
73                   util_str_query_type(query_type, true));
74      unreachable("d3d12: unknown query type");
75   }
76}
77
78static D3D12_QUERY_TYPE
79d3d12_query_type(unsigned query_type)
80{
81   switch (query_type) {
82   case PIPE_QUERY_OCCLUSION_COUNTER:
83      return D3D12_QUERY_TYPE_OCCLUSION;
84   case PIPE_QUERY_OCCLUSION_PREDICATE:
85   case PIPE_QUERY_OCCLUSION_PREDICATE_CONSERVATIVE:
86      return D3D12_QUERY_TYPE_BINARY_OCCLUSION;
87   case PIPE_QUERY_PIPELINE_STATISTICS:
88      return D3D12_QUERY_TYPE_PIPELINE_STATISTICS;
89   case PIPE_QUERY_PRIMITIVES_GENERATED:
90   case PIPE_QUERY_PRIMITIVES_EMITTED:
91   case PIPE_QUERY_SO_STATISTICS:
92      return D3D12_QUERY_TYPE_SO_STATISTICS_STREAM0;
93   case PIPE_QUERY_TIMESTAMP:
94   case PIPE_QUERY_TIME_ELAPSED:
95      return D3D12_QUERY_TYPE_TIMESTAMP;
96   default:
97      debug_printf("unknown query: %s\n",
98                   util_str_query_type(query_type, true));
99      unreachable("d3d12: unknown query type");
100   }
101}
102
103static struct pipe_query *
104d3d12_create_query(struct pipe_context *pctx,
105                   unsigned query_type, unsigned index)
106{
107   struct d3d12_context *ctx = d3d12_context(pctx);
108   struct d3d12_screen *screen = d3d12_screen(pctx->screen);
109   struct d3d12_query *query = CALLOC_STRUCT(d3d12_query);
110   D3D12_QUERY_HEAP_DESC desc = {};
111
112   if (!query)
113      return NULL;
114
115   query->type = (pipe_query_type)query_type;
116   query->d3d12qtype = d3d12_query_type(query_type);
117   query->num_queries = 16;
118
119   /* With timer queries we want a few more queries, especially since we need two slots
120    * per query for TIME_ELAPSED queries */
121   if (unlikely(query->d3d12qtype == D3D12_QUERY_TYPE_TIMESTAMP))
122      query->num_queries = 64;
123
124   query->curr_query = 0;
125
126   switch (query->d3d12qtype) {
127   case D3D12_QUERY_TYPE_PIPELINE_STATISTICS:
128      query->query_size = sizeof(D3D12_QUERY_DATA_PIPELINE_STATISTICS);
129      break;
130   case D3D12_QUERY_TYPE_SO_STATISTICS_STREAM0:
131      query->query_size = sizeof(D3D12_QUERY_DATA_SO_STATISTICS);
132      break;
133   default:
134      query->query_size = sizeof(uint64_t);
135      break;
136   }
137
138   desc.Count = query->num_queries;
139   desc.Type = d3d12_query_heap_type(query_type);
140   if (FAILED(screen->dev->CreateQueryHeap(&desc,
141                                           IID_PPV_ARGS(&query->query_heap)))) {
142      FREE(query);
143      return NULL;
144   }
145
146   /* Query result goes into a readback buffer */
147   size_t buffer_size = query->query_size * query->num_queries;
148   u_suballocator_alloc(&ctx->query_allocator, buffer_size, 256,
149                        &query->buffer_offset, &query->buffer);
150
151   return (struct pipe_query *)query;
152}
153
154static void
155d3d12_destroy_query(struct pipe_context *pctx,
156                    struct pipe_query *q)
157{
158   struct d3d12_query *query = (struct d3d12_query *)q;
159   pipe_resource *predicate = &query->predicate->base;
160   if (query->subquery)
161      d3d12_destroy_query(pctx, (struct pipe_query *)query->subquery);
162   pipe_resource_reference(&predicate, NULL);
163   query->query_heap->Release();
164   FREE(query);
165}
166
167static bool
168accumulate_result(struct d3d12_context *ctx, struct d3d12_query *q,
169                  union pipe_query_result *result, bool write)
170{
171   struct pipe_transfer *transfer = NULL;
172   struct d3d12_screen *screen = d3d12_screen(ctx->base.screen);
173   unsigned access = PIPE_MAP_READ;
174   void *results;
175
176   if (write)
177      access |= PIPE_MAP_WRITE;
178   results = pipe_buffer_map_range(&ctx->base, q->buffer, q->buffer_offset,
179                                   q->num_queries * q->query_size,
180                                   access, &transfer);
181
182   if (results == NULL)
183      return false;
184
185   uint64_t *results_u64 = (uint64_t *)results;
186   D3D12_QUERY_DATA_PIPELINE_STATISTICS *results_stats = (D3D12_QUERY_DATA_PIPELINE_STATISTICS *)results;
187   D3D12_QUERY_DATA_SO_STATISTICS *results_so = (D3D12_QUERY_DATA_SO_STATISTICS *)results;
188
189   util_query_clear_result(result, q->type);
190   for (unsigned i = 0; i < q->curr_query; ++i) {
191      switch (q->type) {
192      case PIPE_QUERY_OCCLUSION_PREDICATE:
193      case PIPE_QUERY_OCCLUSION_PREDICATE_CONSERVATIVE:
194         result->b |= results_u64[i] != 0;
195         break;
196
197      case PIPE_QUERY_OCCLUSION_COUNTER:
198         result->u64 += results_u64[i];
199         break;
200      case PIPE_QUERY_TIMESTAMP:
201         result->u64 = results_u64[i];
202         break;
203
204      case PIPE_QUERY_PIPELINE_STATISTICS:
205         result->pipeline_statistics.ia_vertices += results_stats[i].IAVertices;
206         result->pipeline_statistics.ia_primitives += results_stats[i].IAPrimitives;
207         result->pipeline_statistics.vs_invocations += results_stats[i].VSInvocations;
208         result->pipeline_statistics.gs_invocations += results_stats[i].GSInvocations;
209         result->pipeline_statistics.gs_primitives += results_stats[i].GSPrimitives;
210         result->pipeline_statistics.c_invocations += results_stats[i].CInvocations;
211         result->pipeline_statistics.c_primitives += results_stats[i].CPrimitives;
212         result->pipeline_statistics.ps_invocations += results_stats[i].PSInvocations;
213         result->pipeline_statistics.hs_invocations += results_stats[i].HSInvocations;
214         result->pipeline_statistics.ds_invocations += results_stats[i].DSInvocations;
215         result->pipeline_statistics.cs_invocations += results_stats[i].CSInvocations;
216         break;
217
218      case PIPE_QUERY_PRIMITIVES_GENERATED:
219         result->u64 += results_so[i].PrimitivesStorageNeeded;
220         break;
221
222      case PIPE_QUERY_PRIMITIVES_EMITTED:
223         result->u64 += results_so[i].NumPrimitivesWritten;
224         break;
225
226      case PIPE_QUERY_TIME_ELAPSED:
227         result->u64 += results_u64[2 * i + 1] - results_u64[2 * i];
228         break;
229
230      case PIPE_QUERY_SO_STATISTICS:
231         result->so_statistics.num_primitives_written += results_so[i].NumPrimitivesWritten;
232         result->so_statistics.primitives_storage_needed += results_so[i].PrimitivesStorageNeeded;
233         break;
234
235      default:
236         debug_printf("unsupported query type: %s\n",
237                      util_str_query_type(q->type, true));
238         unreachable("unexpected query type");
239      }
240   }
241
242   if (q->subquery) {
243      union pipe_query_result subresult;
244
245      accumulate_result(ctx, q->subquery, &subresult, false);
246      q->subquery->curr_query = 0;
247      if (q->type == PIPE_QUERY_PRIMITIVES_GENERATED)
248         result->u64 += subresult.pipeline_statistics.ia_primitives;
249   }
250
251   if (write) {
252      if (q->type == PIPE_QUERY_PIPELINE_STATISTICS) {
253         results_stats[0].IAVertices = result->pipeline_statistics.ia_vertices;
254         results_stats[0].IAPrimitives = result->pipeline_statistics.ia_primitives;
255         results_stats[0].VSInvocations = result->pipeline_statistics.vs_invocations;
256         results_stats[0].GSInvocations = result->pipeline_statistics.gs_invocations;
257         results_stats[0].GSPrimitives = result->pipeline_statistics.gs_primitives;
258         results_stats[0].CInvocations = result->pipeline_statistics.c_invocations;
259         results_stats[0].CPrimitives = result->pipeline_statistics.c_primitives;
260         results_stats[0].PSInvocations = result->pipeline_statistics.ps_invocations;
261         results_stats[0].HSInvocations = result->pipeline_statistics.hs_invocations;
262         results_stats[0].DSInvocations = result->pipeline_statistics.ds_invocations;
263         results_stats[0].CSInvocations = result->pipeline_statistics.cs_invocations;
264      } else if (q->type == PIPE_QUERY_SO_STATISTICS) {
265         results_so[0].NumPrimitivesWritten = result->so_statistics.num_primitives_written;
266         results_so[0].PrimitivesStorageNeeded = result->so_statistics.primitives_storage_needed;
267      } else {
268         if (unlikely(q->d3d12qtype == D3D12_QUERY_TYPE_TIMESTAMP)) {
269            results_u64[0] = 0;
270            results_u64[1] = result->u64;
271         } else {
272            results_u64[0] = result->u64;
273         }
274      }
275   }
276
277   pipe_buffer_unmap(&ctx->base, transfer);
278
279   if (q->type == PIPE_QUERY_TIME_ELAPSED ||
280       q->type == PIPE_QUERY_TIMESTAMP)
281      result->u64 = static_cast<uint64_t>(screen->timestamp_multiplier * result->u64);
282
283   return true;
284}
285
286static void
287begin_query(struct d3d12_context *ctx, struct d3d12_query *q, bool restart)
288{
289   if (restart) {
290      q->curr_query = 0;
291   } else if (q->curr_query == q->num_queries) {
292      union pipe_query_result result;
293
294      /* Accumulate current results and store in first slot */
295      d3d12_flush_cmdlist_and_wait(ctx);
296      accumulate_result(ctx, q, &result, true);
297      q->curr_query = 1;
298   }
299
300   if (q->subquery)
301      begin_query(ctx, q->subquery, restart);
302
303   ctx->cmdlist->BeginQuery(q->query_heap, q->d3d12qtype, q->curr_query);
304}
305
306
307static void
308begin_timer_query(struct d3d12_context *ctx, struct d3d12_query *q, bool restart)
309{
310   /* For PIPE_QUERY_TIME_ELAPSED we record one time with BeginQuery and one in
311    * EndQuery, so we need two query slots */
312   unsigned query_index = 2 * q->curr_query;
313
314   if (restart) {
315      q->curr_query = 0;
316      query_index = 0;
317   } else if (query_index == q->num_queries) {
318      union pipe_query_result result;
319
320      /* Accumulate current results and store in first slot */
321      d3d12_flush_cmdlist_and_wait(ctx);
322      accumulate_result(ctx, q, &result, true);
323      q->curr_query = 2;
324   }
325
326   ctx->cmdlist->EndQuery(q->query_heap, q->d3d12qtype, query_index);
327}
328
329static bool
330d3d12_begin_query(struct pipe_context *pctx,
331                  struct pipe_query *q)
332{
333   struct d3d12_context *ctx = d3d12_context(pctx);
334   struct d3d12_query *query = (struct d3d12_query *)q;
335
336   assert(query->type != PIPE_QUERY_TIMESTAMP);
337
338   if (unlikely(query->type == PIPE_QUERY_TIME_ELAPSED))
339      begin_timer_query(ctx, query, true);
340   else {
341      begin_query(ctx, query, true);
342      list_addtail(&query->active_list, &ctx->active_queries);
343   }
344
345   return true;
346}
347
348static void
349end_query(struct d3d12_context *ctx, struct d3d12_query *q)
350{
351   uint64_t offset = 0;
352   struct d3d12_batch *batch = d3d12_current_batch(ctx);
353   struct d3d12_resource *res = (struct d3d12_resource *)q->buffer;
354   ID3D12Resource *d3d12_res = d3d12_resource_underlying(res, &offset);
355
356   /* End subquery first so that we can use fence value from parent */
357   if (q->subquery)
358      end_query(ctx, q->subquery);
359
360   /* With QUERY_TIME_ELAPSED we have recorded one value at
361    * (2 * q->curr_query), and now we record a value at (2 * q->curr_query + 1)
362    * and when resolving the query we subtract the latter from the former */
363
364   unsigned resolve_count = q->type == PIPE_QUERY_TIME_ELAPSED ? 2 : 1;
365   unsigned resolve_index = resolve_count * q->curr_query;
366   unsigned end_index = resolve_index + resolve_count - 1;
367
368   offset += q->buffer_offset + resolve_index * q->query_size;
369   ctx->cmdlist->EndQuery(q->query_heap, q->d3d12qtype, end_index);
370   d3d12_transition_resource_state(ctx, res, D3D12_RESOURCE_STATE_COPY_DEST, D3D12_BIND_INVALIDATE_FULL);
371   d3d12_apply_resource_states(ctx);
372   ctx->cmdlist->ResolveQueryData(q->query_heap, q->d3d12qtype, resolve_index,
373                                  resolve_count, d3d12_res, offset);
374
375   d3d12_batch_reference_object(batch, q->query_heap);
376   d3d12_batch_reference_resource(batch, res);
377
378   assert(q->curr_query < q->num_queries);
379   q->curr_query++;
380}
381
382static bool
383d3d12_end_query(struct pipe_context *pctx,
384               struct pipe_query *q)
385{
386   struct d3d12_context *ctx = d3d12_context(pctx);
387   struct d3d12_query *query = (struct d3d12_query *)q;
388
389   end_query(ctx, query);
390
391   if (query->type != PIPE_QUERY_TIMESTAMP &&
392       query->type != PIPE_QUERY_TIME_ELAPSED)
393      list_delinit(&query->active_list);
394
395   query->fence_value = ctx->fence_value;
396   return true;
397}
398
399static bool
400d3d12_get_query_result(struct pipe_context *pctx,
401                      struct pipe_query *q,
402                      bool wait,
403                      union pipe_query_result *result)
404{
405   struct d3d12_context *ctx = d3d12_context(pctx);
406   struct d3d12_query *query = (struct d3d12_query *)q;
407
408   if (ctx->cmdqueue_fence->GetCompletedValue() < query->fence_value) {
409      if (!wait)
410         return false;
411      d3d12_flush_cmdlist_and_wait(ctx);
412   }
413
414   return accumulate_result(ctx, query, result, false);
415}
416
417void
418d3d12_suspend_queries(struct d3d12_context *ctx)
419{
420   list_for_each_entry(struct d3d12_query, query, &ctx->active_queries, active_list) {
421      end_query(ctx, query);
422   }
423}
424
425void
426d3d12_resume_queries(struct d3d12_context *ctx)
427{
428   list_for_each_entry(struct d3d12_query, query, &ctx->active_queries, active_list) {
429      begin_query(ctx, query, false);
430   }
431}
432
433void
434d3d12_validate_queries(struct d3d12_context *ctx)
435{
436   bool have_xfb = !!ctx->gfx_pipeline_state.num_so_targets;
437
438   list_for_each_entry(struct d3d12_query, query, &ctx->active_queries, active_list) {
439      if (query->type == PIPE_QUERY_PRIMITIVES_GENERATED && !have_xfb && !query->subquery) {
440         struct pipe_query *subquery = d3d12_create_query(&ctx->base, PIPE_QUERY_PIPELINE_STATISTICS, 0);
441         query->subquery = (struct d3d12_query *)subquery;
442         if (!ctx->queries_disabled)
443            begin_query(ctx, query->subquery, true);
444      }
445   }
446}
447
448static void
449d3d12_set_active_query_state(struct pipe_context *pctx, bool enable)
450{
451   struct d3d12_context *ctx = d3d12_context(pctx);
452   ctx->queries_disabled = !enable;
453
454   if (enable)
455      d3d12_resume_queries(ctx);
456   else
457      d3d12_suspend_queries(ctx);
458}
459
460static void
461d3d12_render_condition(struct pipe_context *pctx,
462                       struct pipe_query *pquery,
463                       bool condition,
464                       enum pipe_render_cond_flag mode)
465{
466   struct d3d12_context *ctx = d3d12_context(pctx);
467   struct d3d12_query *query = (struct d3d12_query *)pquery;
468
469   if (query == nullptr) {
470      ctx->cmdlist->SetPredication(nullptr, 0, D3D12_PREDICATION_OP_EQUAL_ZERO);
471      ctx->current_predication = nullptr;
472      return;
473   }
474
475   if (!query->predicate)
476      query->predicate = d3d12_resource(pipe_buffer_create(pctx->screen, 0,
477                                                           PIPE_USAGE_DEFAULT, sizeof(uint64_t)));
478
479   if (mode == PIPE_RENDER_COND_WAIT) {
480      d3d12_flush_cmdlist_and_wait(ctx);
481      union pipe_query_result result;
482      accumulate_result(ctx, (d3d12_query *)pquery, &result, true);
483   }
484
485   struct d3d12_resource *res = (struct d3d12_resource *)query->buffer;
486   d3d12_transition_resource_state(ctx, res, D3D12_RESOURCE_STATE_COPY_SOURCE, D3D12_BIND_INVALIDATE_FULL);
487   d3d12_transition_resource_state(ctx, query->predicate, D3D12_RESOURCE_STATE_COPY_DEST, D3D12_BIND_INVALIDATE_NONE);
488   d3d12_apply_resource_states(ctx);
489   ctx->cmdlist->CopyBufferRegion(d3d12_resource_resource(query->predicate), 0,
490                                  d3d12_resource_resource(res), 0,
491                                  sizeof(uint64_t));
492
493   d3d12_transition_resource_state(ctx, query->predicate, D3D12_RESOURCE_STATE_PREDICATION, D3D12_BIND_INVALIDATE_NONE);
494   d3d12_apply_resource_states(ctx);
495
496   ctx->current_predication = query->predicate;
497   /* documentation of ID3D12GraphicsCommandList::SetPredication method:
498    * "resource manipulation commands are _not_ actually performed
499    *  if the resulting predicate data of the predicate is equal to
500    *  the operation specified."
501    */
502   ctx->cmdlist->SetPredication(d3d12_resource_resource(query->predicate), 0,
503                                condition ? D3D12_PREDICATION_OP_NOT_EQUAL_ZERO :
504                                D3D12_PREDICATION_OP_EQUAL_ZERO);
505}
506
507void
508d3d12_context_query_init(struct pipe_context *pctx)
509{
510   struct d3d12_context *ctx = d3d12_context(pctx);
511   list_inithead(&ctx->active_queries);
512
513   u_suballocator_init(&ctx->query_allocator, &ctx->base, 4096, 0, PIPE_USAGE_STAGING,
514                         0, true);
515
516   pctx->create_query = d3d12_create_query;
517   pctx->destroy_query = d3d12_destroy_query;
518   pctx->begin_query = d3d12_begin_query;
519   pctx->end_query = d3d12_end_query;
520   pctx->get_query_result = d3d12_get_query_result;
521   pctx->set_active_query_state = d3d12_set_active_query_state;
522   pctx->render_condition = d3d12_render_condition;
523}
524