1b8e80941Smrg/*
2b8e80941Smrg * Copyright © 2015 Intel Corporation
3b8e80941Smrg *
4b8e80941Smrg * Permission is hereby granted, free of charge, to any person obtaining a
5b8e80941Smrg * copy of this software and associated documentation files (the "Software"),
6b8e80941Smrg * to deal in the Software without restriction, including without limitation
7b8e80941Smrg * the rights to use, copy, modify, merge, publish, distribute, sublicense,
8b8e80941Smrg * and/or sell copies of the Software, and to permit persons to whom the
9b8e80941Smrg * Software is furnished to do so, subject to the following conditions:
10b8e80941Smrg *
11b8e80941Smrg * The above copyright notice and this permission notice (including the next
12b8e80941Smrg * paragraph) shall be included in all copies or substantial portions of the
13b8e80941Smrg * Software.
14b8e80941Smrg *
15b8e80941Smrg * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
16b8e80941Smrg * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
17b8e80941Smrg * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT.  IN NO EVENT SHALL
18b8e80941Smrg * THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
19b8e80941Smrg * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING
20b8e80941Smrg * FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS
21b8e80941Smrg * IN THE SOFTWARE.
22b8e80941Smrg */
23b8e80941Smrg
24b8e80941Smrg#include <assert.h>
25b8e80941Smrg#include <stdbool.h>
26b8e80941Smrg#include <string.h>
27b8e80941Smrg#include <unistd.h>
28b8e80941Smrg#include <fcntl.h>
29b8e80941Smrg
30b8e80941Smrg#include "anv_private.h"
31b8e80941Smrg
32b8e80941Smrg#include "genxml/gen_macros.h"
33b8e80941Smrg#include "genxml/genX_pack.h"
34b8e80941Smrg
35b8e80941Smrg/* We reserve GPR 14 and 15 for conditional rendering */
36b8e80941Smrg#define GEN_MI_BUILDER_NUM_ALLOC_GPRS 14
37b8e80941Smrg#define __gen_get_batch_dwords anv_batch_emit_dwords
38b8e80941Smrg#define __gen_address_offset anv_address_add
39b8e80941Smrg#include "common/gen_mi_builder.h"
40b8e80941Smrg
41b8e80941SmrgVkResult genX(CreateQueryPool)(
42b8e80941Smrg    VkDevice                                    _device,
43b8e80941Smrg    const VkQueryPoolCreateInfo*                pCreateInfo,
44b8e80941Smrg    const VkAllocationCallbacks*                pAllocator,
45b8e80941Smrg    VkQueryPool*                                pQueryPool)
46b8e80941Smrg{
47b8e80941Smrg   ANV_FROM_HANDLE(anv_device, device, _device);
48b8e80941Smrg   const struct anv_physical_device *pdevice = &device->instance->physicalDevice;
49b8e80941Smrg   struct anv_query_pool *pool;
50b8e80941Smrg   VkResult result;
51b8e80941Smrg
52b8e80941Smrg   assert(pCreateInfo->sType == VK_STRUCTURE_TYPE_QUERY_POOL_CREATE_INFO);
53b8e80941Smrg
54b8e80941Smrg   /* Query pool slots are made up of some number of 64-bit values packed
55b8e80941Smrg    * tightly together.  The first 64-bit value is always the "available" bit
56b8e80941Smrg    * which is 0 when the query is unavailable and 1 when it is available.
57b8e80941Smrg    * The 64-bit values that follow are determined by the type of query.
58b8e80941Smrg    */
59b8e80941Smrg   uint32_t uint64s_per_slot = 1;
60b8e80941Smrg
61b8e80941Smrg   VkQueryPipelineStatisticFlags pipeline_statistics = 0;
62b8e80941Smrg   switch (pCreateInfo->queryType) {
63b8e80941Smrg   case VK_QUERY_TYPE_OCCLUSION:
64b8e80941Smrg      /* Occlusion queries have two values: begin and end. */
65b8e80941Smrg      uint64s_per_slot += 2;
66b8e80941Smrg      break;
67b8e80941Smrg   case VK_QUERY_TYPE_TIMESTAMP:
68b8e80941Smrg      /* Timestamps just have the one timestamp value */
69b8e80941Smrg      uint64s_per_slot += 1;
70b8e80941Smrg      break;
71b8e80941Smrg   case VK_QUERY_TYPE_PIPELINE_STATISTICS:
72b8e80941Smrg      pipeline_statistics = pCreateInfo->pipelineStatistics;
73b8e80941Smrg      /* We're going to trust this field implicitly so we need to ensure that
74b8e80941Smrg       * no unhandled extension bits leak in.
75b8e80941Smrg       */
76b8e80941Smrg      pipeline_statistics &= ANV_PIPELINE_STATISTICS_MASK;
77b8e80941Smrg
78b8e80941Smrg      /* Statistics queries have a min and max for every statistic */
79b8e80941Smrg      uint64s_per_slot += 2 * util_bitcount(pipeline_statistics);
80b8e80941Smrg      break;
81b8e80941Smrg   case VK_QUERY_TYPE_TRANSFORM_FEEDBACK_STREAM_EXT:
82b8e80941Smrg      /* Transform feedback queries are 4 values, begin/end for
83b8e80941Smrg       * written/available.
84b8e80941Smrg       */
85b8e80941Smrg      uint64s_per_slot += 4;
86b8e80941Smrg      break;
87b8e80941Smrg   default:
88b8e80941Smrg      assert(!"Invalid query type");
89b8e80941Smrg   }
90b8e80941Smrg
91b8e80941Smrg   pool = vk_alloc2(&device->alloc, pAllocator, sizeof(*pool), 8,
92b8e80941Smrg                     VK_SYSTEM_ALLOCATION_SCOPE_OBJECT);
93b8e80941Smrg   if (pool == NULL)
94b8e80941Smrg      return vk_error(VK_ERROR_OUT_OF_HOST_MEMORY);
95b8e80941Smrg
96b8e80941Smrg   pool->type = pCreateInfo->queryType;
97b8e80941Smrg   pool->pipeline_statistics = pipeline_statistics;
98b8e80941Smrg   pool->stride = uint64s_per_slot * sizeof(uint64_t);
99b8e80941Smrg   pool->slots = pCreateInfo->queryCount;
100b8e80941Smrg
101b8e80941Smrg   uint64_t size = pool->slots * pool->stride;
102b8e80941Smrg   result = anv_bo_init_new(&pool->bo, device, size);
103b8e80941Smrg   if (result != VK_SUCCESS)
104b8e80941Smrg      goto fail;
105b8e80941Smrg
106b8e80941Smrg   if (pdevice->supports_48bit_addresses)
107b8e80941Smrg      pool->bo.flags |= EXEC_OBJECT_SUPPORTS_48B_ADDRESS;
108b8e80941Smrg
109b8e80941Smrg   if (pdevice->use_softpin)
110b8e80941Smrg      pool->bo.flags |= EXEC_OBJECT_PINNED;
111b8e80941Smrg
112b8e80941Smrg   if (pdevice->has_exec_async)
113b8e80941Smrg      pool->bo.flags |= EXEC_OBJECT_ASYNC;
114b8e80941Smrg
115b8e80941Smrg   anv_vma_alloc(device, &pool->bo);
116b8e80941Smrg
117b8e80941Smrg   /* For query pools, we set the caching mode to I915_CACHING_CACHED.  On LLC
118b8e80941Smrg    * platforms, this does nothing.  On non-LLC platforms, this means snooping
119b8e80941Smrg    * which comes at a slight cost.  However, the buffers aren't big, won't be
120b8e80941Smrg    * written frequently, and trying to handle the flushing manually without
121b8e80941Smrg    * doing too much flushing is extremely painful.
122b8e80941Smrg    */
123b8e80941Smrg   anv_gem_set_caching(device, pool->bo.gem_handle, I915_CACHING_CACHED);
124b8e80941Smrg
125b8e80941Smrg   pool->bo.map = anv_gem_mmap(device, pool->bo.gem_handle, 0, size, 0);
126b8e80941Smrg
127b8e80941Smrg   *pQueryPool = anv_query_pool_to_handle(pool);
128b8e80941Smrg
129b8e80941Smrg   return VK_SUCCESS;
130b8e80941Smrg
131b8e80941Smrg fail:
132b8e80941Smrg   vk_free2(&device->alloc, pAllocator, pool);
133b8e80941Smrg
134b8e80941Smrg   return result;
135b8e80941Smrg}
136b8e80941Smrg
137b8e80941Smrgvoid genX(DestroyQueryPool)(
138b8e80941Smrg    VkDevice                                    _device,
139b8e80941Smrg    VkQueryPool                                 _pool,
140b8e80941Smrg    const VkAllocationCallbacks*                pAllocator)
141b8e80941Smrg{
142b8e80941Smrg   ANV_FROM_HANDLE(anv_device, device, _device);
143b8e80941Smrg   ANV_FROM_HANDLE(anv_query_pool, pool, _pool);
144b8e80941Smrg
145b8e80941Smrg   if (!pool)
146b8e80941Smrg      return;
147b8e80941Smrg
148b8e80941Smrg   anv_gem_munmap(pool->bo.map, pool->bo.size);
149b8e80941Smrg   anv_vma_free(device, &pool->bo);
150b8e80941Smrg   anv_gem_close(device, pool->bo.gem_handle);
151b8e80941Smrg   vk_free2(&device->alloc, pAllocator, pool);
152b8e80941Smrg}
153b8e80941Smrg
154b8e80941Smrgstatic struct anv_address
155b8e80941Smrganv_query_address(struct anv_query_pool *pool, uint32_t query)
156b8e80941Smrg{
157b8e80941Smrg   return (struct anv_address) {
158b8e80941Smrg      .bo = &pool->bo,
159b8e80941Smrg      .offset = query * pool->stride,
160b8e80941Smrg   };
161b8e80941Smrg}
162b8e80941Smrg
163b8e80941Smrgstatic void
164b8e80941Smrgcpu_write_query_result(void *dst_slot, VkQueryResultFlags flags,
165b8e80941Smrg                       uint32_t value_index, uint64_t result)
166b8e80941Smrg{
167b8e80941Smrg   if (flags & VK_QUERY_RESULT_64_BIT) {
168b8e80941Smrg      uint64_t *dst64 = dst_slot;
169b8e80941Smrg      dst64[value_index] = result;
170b8e80941Smrg   } else {
171b8e80941Smrg      uint32_t *dst32 = dst_slot;
172b8e80941Smrg      dst32[value_index] = result;
173b8e80941Smrg   }
174b8e80941Smrg}
175b8e80941Smrg
176b8e80941Smrgstatic bool
177b8e80941Smrgquery_is_available(uint64_t *slot)
178b8e80941Smrg{
179b8e80941Smrg   return *(volatile uint64_t *)slot;
180b8e80941Smrg}
181b8e80941Smrg
182b8e80941Smrgstatic VkResult
183b8e80941Smrgwait_for_available(struct anv_device *device,
184b8e80941Smrg                   struct anv_query_pool *pool, uint64_t *slot)
185b8e80941Smrg{
186b8e80941Smrg   while (true) {
187b8e80941Smrg      if (query_is_available(slot))
188b8e80941Smrg         return VK_SUCCESS;
189b8e80941Smrg
190b8e80941Smrg      int ret = anv_gem_busy(device, pool->bo.gem_handle);
191b8e80941Smrg      if (ret == 1) {
192b8e80941Smrg         /* The BO is still busy, keep waiting. */
193b8e80941Smrg         continue;
194b8e80941Smrg      } else if (ret == -1) {
195b8e80941Smrg         /* We don't know the real error. */
196b8e80941Smrg         return anv_device_set_lost(device, "gem wait failed: %m");
197b8e80941Smrg      } else {
198b8e80941Smrg         assert(ret == 0);
199b8e80941Smrg         /* The BO is no longer busy. */
200b8e80941Smrg         if (query_is_available(slot)) {
201b8e80941Smrg            return VK_SUCCESS;
202b8e80941Smrg         } else {
203b8e80941Smrg            VkResult status = anv_device_query_status(device);
204b8e80941Smrg            if (status != VK_SUCCESS)
205b8e80941Smrg               return status;
206b8e80941Smrg
207b8e80941Smrg            /* If we haven't seen availability yet, then we never will.  This
208b8e80941Smrg             * can only happen if we have a client error where they call
209b8e80941Smrg             * GetQueryPoolResults on a query that they haven't submitted to
210b8e80941Smrg             * the GPU yet.  The spec allows us to do anything in this case,
211b8e80941Smrg             * but returning VK_SUCCESS doesn't seem right and we shouldn't
212b8e80941Smrg             * just keep spinning.
213b8e80941Smrg             */
214b8e80941Smrg            return VK_NOT_READY;
215b8e80941Smrg         }
216b8e80941Smrg      }
217b8e80941Smrg   }
218b8e80941Smrg}
219b8e80941Smrg
220b8e80941SmrgVkResult genX(GetQueryPoolResults)(
221b8e80941Smrg    VkDevice                                    _device,
222b8e80941Smrg    VkQueryPool                                 queryPool,
223b8e80941Smrg    uint32_t                                    firstQuery,
224b8e80941Smrg    uint32_t                                    queryCount,
225b8e80941Smrg    size_t                                      dataSize,
226b8e80941Smrg    void*                                       pData,
227b8e80941Smrg    VkDeviceSize                                stride,
228b8e80941Smrg    VkQueryResultFlags                          flags)
229b8e80941Smrg{
230b8e80941Smrg   ANV_FROM_HANDLE(anv_device, device, _device);
231b8e80941Smrg   ANV_FROM_HANDLE(anv_query_pool, pool, queryPool);
232b8e80941Smrg
233b8e80941Smrg   assert(pool->type == VK_QUERY_TYPE_OCCLUSION ||
234b8e80941Smrg          pool->type == VK_QUERY_TYPE_PIPELINE_STATISTICS ||
235b8e80941Smrg          pool->type == VK_QUERY_TYPE_TIMESTAMP ||
236b8e80941Smrg          pool->type == VK_QUERY_TYPE_TRANSFORM_FEEDBACK_STREAM_EXT);
237b8e80941Smrg
238b8e80941Smrg   if (anv_device_is_lost(device))
239b8e80941Smrg      return VK_ERROR_DEVICE_LOST;
240b8e80941Smrg
241b8e80941Smrg   if (pData == NULL)
242b8e80941Smrg      return VK_SUCCESS;
243b8e80941Smrg
244b8e80941Smrg   void *data_end = pData + dataSize;
245b8e80941Smrg
246b8e80941Smrg   VkResult status = VK_SUCCESS;
247b8e80941Smrg   for (uint32_t i = 0; i < queryCount; i++) {
248b8e80941Smrg      uint64_t *slot = pool->bo.map + (firstQuery + i) * pool->stride;
249b8e80941Smrg
250b8e80941Smrg      /* Availability is always at the start of the slot */
251b8e80941Smrg      bool available = slot[0];
252b8e80941Smrg
253b8e80941Smrg      if (!available && (flags & VK_QUERY_RESULT_WAIT_BIT)) {
254b8e80941Smrg         status = wait_for_available(device, pool, slot);
255b8e80941Smrg         if (status != VK_SUCCESS)
256b8e80941Smrg            return status;
257b8e80941Smrg
258b8e80941Smrg         available = true;
259b8e80941Smrg      }
260b8e80941Smrg
261b8e80941Smrg      /* From the Vulkan 1.0.42 spec:
262b8e80941Smrg       *
263b8e80941Smrg       *    "If VK_QUERY_RESULT_WAIT_BIT and VK_QUERY_RESULT_PARTIAL_BIT are
264b8e80941Smrg       *    both not set then no result values are written to pData for
265b8e80941Smrg       *    queries that are in the unavailable state at the time of the call,
266b8e80941Smrg       *    and vkGetQueryPoolResults returns VK_NOT_READY. However,
267b8e80941Smrg       *    availability state is still written to pData for those queries if
268b8e80941Smrg       *    VK_QUERY_RESULT_WITH_AVAILABILITY_BIT is set."
269b8e80941Smrg       */
270b8e80941Smrg      bool write_results = available || (flags & VK_QUERY_RESULT_PARTIAL_BIT);
271b8e80941Smrg
272b8e80941Smrg      uint32_t idx = 0;
273b8e80941Smrg      switch (pool->type) {
274b8e80941Smrg      case VK_QUERY_TYPE_OCCLUSION:
275b8e80941Smrg         if (write_results)
276b8e80941Smrg            cpu_write_query_result(pData, flags, idx, slot[2] - slot[1]);
277b8e80941Smrg         idx++;
278b8e80941Smrg         break;
279b8e80941Smrg
280b8e80941Smrg      case VK_QUERY_TYPE_PIPELINE_STATISTICS: {
281b8e80941Smrg         uint32_t statistics = pool->pipeline_statistics;
282b8e80941Smrg         while (statistics) {
283b8e80941Smrg            uint32_t stat = u_bit_scan(&statistics);
284b8e80941Smrg            if (write_results) {
285b8e80941Smrg               uint64_t result = slot[idx * 2 + 2] - slot[idx * 2 + 1];
286b8e80941Smrg
287b8e80941Smrg               /* WaDividePSInvocationCountBy4:HSW,BDW */
288b8e80941Smrg               if ((device->info.gen == 8 || device->info.is_haswell) &&
289b8e80941Smrg                   (1 << stat) == VK_QUERY_PIPELINE_STATISTIC_FRAGMENT_SHADER_INVOCATIONS_BIT)
290b8e80941Smrg                  result >>= 2;
291b8e80941Smrg
292b8e80941Smrg               cpu_write_query_result(pData, flags, idx, result);
293b8e80941Smrg            }
294b8e80941Smrg            idx++;
295b8e80941Smrg         }
296b8e80941Smrg         assert(idx == util_bitcount(pool->pipeline_statistics));
297b8e80941Smrg         break;
298b8e80941Smrg      }
299b8e80941Smrg
300b8e80941Smrg      case VK_QUERY_TYPE_TRANSFORM_FEEDBACK_STREAM_EXT:
301b8e80941Smrg         if (write_results)
302b8e80941Smrg            cpu_write_query_result(pData, flags, idx, slot[2] - slot[1]);
303b8e80941Smrg         idx++;
304b8e80941Smrg         if (write_results)
305b8e80941Smrg            cpu_write_query_result(pData, flags, idx, slot[4] - slot[3]);
306b8e80941Smrg         idx++;
307b8e80941Smrg         break;
308b8e80941Smrg
309b8e80941Smrg      case VK_QUERY_TYPE_TIMESTAMP:
310b8e80941Smrg         if (write_results)
311b8e80941Smrg            cpu_write_query_result(pData, flags, idx, slot[1]);
312b8e80941Smrg         idx++;
313b8e80941Smrg         break;
314b8e80941Smrg
315b8e80941Smrg      default:
316b8e80941Smrg         unreachable("invalid pool type");
317b8e80941Smrg      }
318b8e80941Smrg
319b8e80941Smrg      if (!write_results)
320b8e80941Smrg         status = VK_NOT_READY;
321b8e80941Smrg
322b8e80941Smrg      if (flags & VK_QUERY_RESULT_WITH_AVAILABILITY_BIT)
323b8e80941Smrg         cpu_write_query_result(pData, flags, idx, available);
324b8e80941Smrg
325b8e80941Smrg      pData += stride;
326b8e80941Smrg      if (pData >= data_end)
327b8e80941Smrg         break;
328b8e80941Smrg   }
329b8e80941Smrg
330b8e80941Smrg   return status;
331b8e80941Smrg}
332b8e80941Smrg
333b8e80941Smrgstatic void
334b8e80941Smrgemit_ps_depth_count(struct anv_cmd_buffer *cmd_buffer,
335b8e80941Smrg                    struct anv_address addr)
336b8e80941Smrg{
337b8e80941Smrg   anv_batch_emit(&cmd_buffer->batch, GENX(PIPE_CONTROL), pc) {
338b8e80941Smrg      pc.DestinationAddressType  = DAT_PPGTT;
339b8e80941Smrg      pc.PostSyncOperation       = WritePSDepthCount;
340b8e80941Smrg      pc.DepthStallEnable        = true;
341b8e80941Smrg      pc.Address                 = addr;
342b8e80941Smrg
343b8e80941Smrg      if (GEN_GEN == 9 && cmd_buffer->device->info.gt == 4)
344b8e80941Smrg         pc.CommandStreamerStallEnable = true;
345b8e80941Smrg   }
346b8e80941Smrg}
347b8e80941Smrg
348b8e80941Smrgstatic void
349b8e80941Smrgemit_query_mi_availability(struct gen_mi_builder *b,
350b8e80941Smrg                           struct anv_address addr,
351b8e80941Smrg                           bool available)
352b8e80941Smrg{
353b8e80941Smrg   gen_mi_store(b, gen_mi_mem64(addr), gen_mi_imm(available));
354b8e80941Smrg}
355b8e80941Smrg
356b8e80941Smrgstatic void
357b8e80941Smrgemit_query_pc_availability(struct anv_cmd_buffer *cmd_buffer,
358b8e80941Smrg                           struct anv_address addr,
359b8e80941Smrg                           bool available)
360b8e80941Smrg{
361b8e80941Smrg   anv_batch_emit(&cmd_buffer->batch, GENX(PIPE_CONTROL), pc) {
362b8e80941Smrg      pc.DestinationAddressType  = DAT_PPGTT;
363b8e80941Smrg      pc.PostSyncOperation       = WriteImmediateData;
364b8e80941Smrg      pc.Address                 = addr;
365b8e80941Smrg      pc.ImmediateData           = available;
366b8e80941Smrg   }
367b8e80941Smrg}
368b8e80941Smrg
369b8e80941Smrg/**
370b8e80941Smrg * Goes through a series of consecutive query indices in the given pool
371b8e80941Smrg * setting all element values to 0 and emitting them as available.
372b8e80941Smrg */
373b8e80941Smrgstatic void
374b8e80941Smrgemit_zero_queries(struct anv_cmd_buffer *cmd_buffer,
375b8e80941Smrg                  struct gen_mi_builder *b, struct anv_query_pool *pool,
376b8e80941Smrg                  uint32_t first_index, uint32_t num_queries)
377b8e80941Smrg{
378b8e80941Smrg   switch (pool->type) {
379b8e80941Smrg   case VK_QUERY_TYPE_OCCLUSION:
380b8e80941Smrg   case VK_QUERY_TYPE_TIMESTAMP:
381b8e80941Smrg      /* These queries are written with a PIPE_CONTROL so clear them using the
382b8e80941Smrg       * PIPE_CONTROL as well so we don't have to synchronize between 2 types
383b8e80941Smrg       * of operations.
384b8e80941Smrg       */
385b8e80941Smrg      assert((pool->stride % 8) == 0);
386b8e80941Smrg      for (uint32_t i = 0; i < num_queries; i++) {
387b8e80941Smrg         struct anv_address slot_addr =
388b8e80941Smrg            anv_query_address(pool, first_index + i);
389b8e80941Smrg
390b8e80941Smrg         for (uint32_t qword = 1; qword < (pool->stride / 8); qword++) {
391b8e80941Smrg            emit_query_pc_availability(cmd_buffer,
392b8e80941Smrg                                       anv_address_add(slot_addr, qword * 8),
393b8e80941Smrg                                       false);
394b8e80941Smrg         }
395b8e80941Smrg         emit_query_pc_availability(cmd_buffer, slot_addr, true);
396b8e80941Smrg      }
397b8e80941Smrg      break;
398b8e80941Smrg
399b8e80941Smrg   case VK_QUERY_TYPE_PIPELINE_STATISTICS:
400b8e80941Smrg   case VK_QUERY_TYPE_TRANSFORM_FEEDBACK_STREAM_EXT:
401b8e80941Smrg      for (uint32_t i = 0; i < num_queries; i++) {
402b8e80941Smrg         struct anv_address slot_addr =
403b8e80941Smrg            anv_query_address(pool, first_index + i);
404b8e80941Smrg         gen_mi_memset(b, anv_address_add(slot_addr, 8), 0, pool->stride - 8);
405b8e80941Smrg         emit_query_mi_availability(b, slot_addr, true);
406b8e80941Smrg      }
407b8e80941Smrg      break;
408b8e80941Smrg
409b8e80941Smrg   default:
410b8e80941Smrg      unreachable("Unsupported query type");
411b8e80941Smrg   }
412b8e80941Smrg}
413b8e80941Smrg
414b8e80941Smrgvoid genX(CmdResetQueryPool)(
415b8e80941Smrg    VkCommandBuffer                             commandBuffer,
416b8e80941Smrg    VkQueryPool                                 queryPool,
417b8e80941Smrg    uint32_t                                    firstQuery,
418b8e80941Smrg    uint32_t                                    queryCount)
419b8e80941Smrg{
420b8e80941Smrg   ANV_FROM_HANDLE(anv_cmd_buffer, cmd_buffer, commandBuffer);
421b8e80941Smrg   ANV_FROM_HANDLE(anv_query_pool, pool, queryPool);
422b8e80941Smrg
423b8e80941Smrg   switch (pool->type) {
424b8e80941Smrg   case VK_QUERY_TYPE_OCCLUSION:
425b8e80941Smrg   case VK_QUERY_TYPE_TIMESTAMP:
426b8e80941Smrg      for (uint32_t i = 0; i < queryCount; i++) {
427b8e80941Smrg         emit_query_pc_availability(cmd_buffer,
428b8e80941Smrg                                    anv_query_address(pool, firstQuery + i),
429b8e80941Smrg                                    false);
430b8e80941Smrg      }
431b8e80941Smrg      break;
432b8e80941Smrg
433b8e80941Smrg   case VK_QUERY_TYPE_PIPELINE_STATISTICS:
434b8e80941Smrg   case VK_QUERY_TYPE_TRANSFORM_FEEDBACK_STREAM_EXT: {
435b8e80941Smrg      struct gen_mi_builder b;
436b8e80941Smrg      gen_mi_builder_init(&b, &cmd_buffer->batch);
437b8e80941Smrg
438b8e80941Smrg      for (uint32_t i = 0; i < queryCount; i++)
439b8e80941Smrg         emit_query_mi_availability(&b, anv_query_address(pool, firstQuery + i), false);
440b8e80941Smrg      break;
441b8e80941Smrg   }
442b8e80941Smrg
443b8e80941Smrg   default:
444b8e80941Smrg      unreachable("Unsupported query type");
445b8e80941Smrg   }
446b8e80941Smrg}
447b8e80941Smrg
448b8e80941Smrgvoid genX(ResetQueryPoolEXT)(
449b8e80941Smrg    VkDevice                                    _device,
450b8e80941Smrg    VkQueryPool                                 queryPool,
451b8e80941Smrg    uint32_t                                    firstQuery,
452b8e80941Smrg    uint32_t                                    queryCount)
453b8e80941Smrg{
454b8e80941Smrg   ANV_FROM_HANDLE(anv_query_pool, pool, queryPool);
455b8e80941Smrg
456b8e80941Smrg   for (uint32_t i = 0; i < queryCount; i++) {
457b8e80941Smrg      uint64_t *slot = pool->bo.map + (firstQuery + i) * pool->stride;
458b8e80941Smrg      *slot = 0;
459b8e80941Smrg   }
460b8e80941Smrg}
461b8e80941Smrg
462b8e80941Smrgstatic const uint32_t vk_pipeline_stat_to_reg[] = {
463b8e80941Smrg   GENX(IA_VERTICES_COUNT_num),
464b8e80941Smrg   GENX(IA_PRIMITIVES_COUNT_num),
465b8e80941Smrg   GENX(VS_INVOCATION_COUNT_num),
466b8e80941Smrg   GENX(GS_INVOCATION_COUNT_num),
467b8e80941Smrg   GENX(GS_PRIMITIVES_COUNT_num),
468b8e80941Smrg   GENX(CL_INVOCATION_COUNT_num),
469b8e80941Smrg   GENX(CL_PRIMITIVES_COUNT_num),
470b8e80941Smrg   GENX(PS_INVOCATION_COUNT_num),
471b8e80941Smrg   GENX(HS_INVOCATION_COUNT_num),
472b8e80941Smrg   GENX(DS_INVOCATION_COUNT_num),
473b8e80941Smrg   GENX(CS_INVOCATION_COUNT_num),
474b8e80941Smrg};
475b8e80941Smrg
476b8e80941Smrgstatic void
477b8e80941Smrgemit_pipeline_stat(struct gen_mi_builder *b, uint32_t stat,
478b8e80941Smrg                   struct anv_address addr)
479b8e80941Smrg{
480b8e80941Smrg   STATIC_ASSERT(ANV_PIPELINE_STATISTICS_MASK ==
481b8e80941Smrg                 (1 << ARRAY_SIZE(vk_pipeline_stat_to_reg)) - 1);
482b8e80941Smrg
483b8e80941Smrg   assert(stat < ARRAY_SIZE(vk_pipeline_stat_to_reg));
484b8e80941Smrg   gen_mi_store(b, gen_mi_mem64(addr),
485b8e80941Smrg                gen_mi_reg64(vk_pipeline_stat_to_reg[stat]));
486b8e80941Smrg}
487b8e80941Smrg
488b8e80941Smrgstatic void
489b8e80941Smrgemit_xfb_query(struct gen_mi_builder *b, uint32_t stream,
490b8e80941Smrg               struct anv_address addr)
491b8e80941Smrg{
492b8e80941Smrg   assert(stream < MAX_XFB_STREAMS);
493b8e80941Smrg
494b8e80941Smrg   gen_mi_store(b, gen_mi_mem64(anv_address_add(addr, 0)),
495b8e80941Smrg                gen_mi_reg64(GENX(SO_NUM_PRIMS_WRITTEN0_num) + stream * 8));
496b8e80941Smrg   gen_mi_store(b, gen_mi_mem64(anv_address_add(addr, 16)),
497b8e80941Smrg                gen_mi_reg64(GENX(SO_PRIM_STORAGE_NEEDED0_num) + stream * 8));
498b8e80941Smrg}
499b8e80941Smrg
500b8e80941Smrgvoid genX(CmdBeginQuery)(
501b8e80941Smrg    VkCommandBuffer                             commandBuffer,
502b8e80941Smrg    VkQueryPool                                 queryPool,
503b8e80941Smrg    uint32_t                                    query,
504b8e80941Smrg    VkQueryControlFlags                         flags)
505b8e80941Smrg{
506b8e80941Smrg   genX(CmdBeginQueryIndexedEXT)(commandBuffer, queryPool, query, flags, 0);
507b8e80941Smrg}
508b8e80941Smrg
509b8e80941Smrgvoid genX(CmdBeginQueryIndexedEXT)(
510b8e80941Smrg    VkCommandBuffer                             commandBuffer,
511b8e80941Smrg    VkQueryPool                                 queryPool,
512b8e80941Smrg    uint32_t                                    query,
513b8e80941Smrg    VkQueryControlFlags                         flags,
514b8e80941Smrg    uint32_t                                    index)
515b8e80941Smrg{
516b8e80941Smrg   ANV_FROM_HANDLE(anv_cmd_buffer, cmd_buffer, commandBuffer);
517b8e80941Smrg   ANV_FROM_HANDLE(anv_query_pool, pool, queryPool);
518b8e80941Smrg   struct anv_address query_addr = anv_query_address(pool, query);
519b8e80941Smrg
520b8e80941Smrg   struct gen_mi_builder b;
521b8e80941Smrg   gen_mi_builder_init(&b, &cmd_buffer->batch);
522b8e80941Smrg
523b8e80941Smrg   switch (pool->type) {
524b8e80941Smrg   case VK_QUERY_TYPE_OCCLUSION:
525b8e80941Smrg      emit_ps_depth_count(cmd_buffer, anv_address_add(query_addr, 8));
526b8e80941Smrg      break;
527b8e80941Smrg
528b8e80941Smrg   case VK_QUERY_TYPE_PIPELINE_STATISTICS: {
529b8e80941Smrg      /* TODO: This might only be necessary for certain stats */
530b8e80941Smrg      anv_batch_emit(&cmd_buffer->batch, GENX(PIPE_CONTROL), pc) {
531b8e80941Smrg         pc.CommandStreamerStallEnable = true;
532b8e80941Smrg         pc.StallAtPixelScoreboard = true;
533b8e80941Smrg      }
534b8e80941Smrg
535b8e80941Smrg      uint32_t statistics = pool->pipeline_statistics;
536b8e80941Smrg      uint32_t offset = 8;
537b8e80941Smrg      while (statistics) {
538b8e80941Smrg         uint32_t stat = u_bit_scan(&statistics);
539b8e80941Smrg         emit_pipeline_stat(&b, stat, anv_address_add(query_addr, offset));
540b8e80941Smrg         offset += 16;
541b8e80941Smrg      }
542b8e80941Smrg      break;
543b8e80941Smrg   }
544b8e80941Smrg
545b8e80941Smrg   case VK_QUERY_TYPE_TRANSFORM_FEEDBACK_STREAM_EXT:
546b8e80941Smrg      anv_batch_emit(&cmd_buffer->batch, GENX(PIPE_CONTROL), pc) {
547b8e80941Smrg         pc.CommandStreamerStallEnable = true;
548b8e80941Smrg         pc.StallAtPixelScoreboard = true;
549b8e80941Smrg      }
550b8e80941Smrg      emit_xfb_query(&b, index, anv_address_add(query_addr, 8));
551b8e80941Smrg      break;
552b8e80941Smrg
553b8e80941Smrg   default:
554b8e80941Smrg      unreachable("");
555b8e80941Smrg   }
556b8e80941Smrg}
557b8e80941Smrg
558b8e80941Smrgvoid genX(CmdEndQuery)(
559b8e80941Smrg    VkCommandBuffer                             commandBuffer,
560b8e80941Smrg    VkQueryPool                                 queryPool,
561b8e80941Smrg    uint32_t                                    query)
562b8e80941Smrg{
563b8e80941Smrg   genX(CmdEndQueryIndexedEXT)(commandBuffer, queryPool, query, 0);
564b8e80941Smrg}
565b8e80941Smrg
566b8e80941Smrgvoid genX(CmdEndQueryIndexedEXT)(
567b8e80941Smrg    VkCommandBuffer                             commandBuffer,
568b8e80941Smrg    VkQueryPool                                 queryPool,
569b8e80941Smrg    uint32_t                                    query,
570b8e80941Smrg    uint32_t                                    index)
571b8e80941Smrg{
572b8e80941Smrg   ANV_FROM_HANDLE(anv_cmd_buffer, cmd_buffer, commandBuffer);
573b8e80941Smrg   ANV_FROM_HANDLE(anv_query_pool, pool, queryPool);
574b8e80941Smrg   struct anv_address query_addr = anv_query_address(pool, query);
575b8e80941Smrg
576b8e80941Smrg   struct gen_mi_builder b;
577b8e80941Smrg   gen_mi_builder_init(&b, &cmd_buffer->batch);
578b8e80941Smrg
579b8e80941Smrg   switch (pool->type) {
580b8e80941Smrg   case VK_QUERY_TYPE_OCCLUSION:
581b8e80941Smrg      emit_ps_depth_count(cmd_buffer, anv_address_add(query_addr, 16));
582b8e80941Smrg      emit_query_pc_availability(cmd_buffer, query_addr, true);
583b8e80941Smrg      break;
584b8e80941Smrg
585b8e80941Smrg   case VK_QUERY_TYPE_PIPELINE_STATISTICS: {
586b8e80941Smrg      /* TODO: This might only be necessary for certain stats */
587b8e80941Smrg      anv_batch_emit(&cmd_buffer->batch, GENX(PIPE_CONTROL), pc) {
588b8e80941Smrg         pc.CommandStreamerStallEnable = true;
589b8e80941Smrg         pc.StallAtPixelScoreboard = true;
590b8e80941Smrg      }
591b8e80941Smrg
592b8e80941Smrg      uint32_t statistics = pool->pipeline_statistics;
593b8e80941Smrg      uint32_t offset = 16;
594b8e80941Smrg      while (statistics) {
595b8e80941Smrg         uint32_t stat = u_bit_scan(&statistics);
596b8e80941Smrg         emit_pipeline_stat(&b, stat, anv_address_add(query_addr, offset));
597b8e80941Smrg         offset += 16;
598b8e80941Smrg      }
599b8e80941Smrg
600b8e80941Smrg      emit_query_mi_availability(&b, query_addr, true);
601b8e80941Smrg      break;
602b8e80941Smrg   }
603b8e80941Smrg
604b8e80941Smrg   case VK_QUERY_TYPE_TRANSFORM_FEEDBACK_STREAM_EXT:
605b8e80941Smrg      anv_batch_emit(&cmd_buffer->batch, GENX(PIPE_CONTROL), pc) {
606b8e80941Smrg         pc.CommandStreamerStallEnable = true;
607b8e80941Smrg         pc.StallAtPixelScoreboard = true;
608b8e80941Smrg      }
609b8e80941Smrg
610b8e80941Smrg      emit_xfb_query(&b, index, anv_address_add(query_addr, 16));
611b8e80941Smrg      emit_query_mi_availability(&b, query_addr, true);
612b8e80941Smrg      break;
613b8e80941Smrg
614b8e80941Smrg   default:
615b8e80941Smrg      unreachable("");
616b8e80941Smrg   }
617b8e80941Smrg
618b8e80941Smrg   /* When multiview is active the spec requires that N consecutive query
619b8e80941Smrg    * indices are used, where N is the number of active views in the subpass.
620b8e80941Smrg    * The spec allows that we only write the results to one of the queries
621b8e80941Smrg    * but we still need to manage result availability for all the query indices.
622b8e80941Smrg    * Since we only emit a single query for all active views in the
623b8e80941Smrg    * first index, mark the other query indices as being already available
624b8e80941Smrg    * with result 0.
625b8e80941Smrg    */
626b8e80941Smrg   if (cmd_buffer->state.subpass && cmd_buffer->state.subpass->view_mask) {
627b8e80941Smrg      const uint32_t num_queries =
628b8e80941Smrg         util_bitcount(cmd_buffer->state.subpass->view_mask);
629b8e80941Smrg      if (num_queries > 1)
630b8e80941Smrg         emit_zero_queries(cmd_buffer, &b, pool, query + 1, num_queries - 1);
631b8e80941Smrg   }
632b8e80941Smrg}
633b8e80941Smrg
634b8e80941Smrg#define TIMESTAMP 0x2358
635b8e80941Smrg
636b8e80941Smrgvoid genX(CmdWriteTimestamp)(
637b8e80941Smrg    VkCommandBuffer                             commandBuffer,
638b8e80941Smrg    VkPipelineStageFlagBits                     pipelineStage,
639b8e80941Smrg    VkQueryPool                                 queryPool,
640b8e80941Smrg    uint32_t                                    query)
641b8e80941Smrg{
642b8e80941Smrg   ANV_FROM_HANDLE(anv_cmd_buffer, cmd_buffer, commandBuffer);
643b8e80941Smrg   ANV_FROM_HANDLE(anv_query_pool, pool, queryPool);
644b8e80941Smrg   struct anv_address query_addr = anv_query_address(pool, query);
645b8e80941Smrg
646b8e80941Smrg   assert(pool->type == VK_QUERY_TYPE_TIMESTAMP);
647b8e80941Smrg
648b8e80941Smrg   struct gen_mi_builder b;
649b8e80941Smrg   gen_mi_builder_init(&b, &cmd_buffer->batch);
650b8e80941Smrg
651b8e80941Smrg   switch (pipelineStage) {
652b8e80941Smrg   case VK_PIPELINE_STAGE_TOP_OF_PIPE_BIT:
653b8e80941Smrg      gen_mi_store(&b, gen_mi_mem64(anv_address_add(query_addr, 8)),
654b8e80941Smrg                       gen_mi_reg64(TIMESTAMP));
655b8e80941Smrg      break;
656b8e80941Smrg
657b8e80941Smrg   default:
658b8e80941Smrg      /* Everything else is bottom-of-pipe */
659b8e80941Smrg      anv_batch_emit(&cmd_buffer->batch, GENX(PIPE_CONTROL), pc) {
660b8e80941Smrg         pc.DestinationAddressType  = DAT_PPGTT;
661b8e80941Smrg         pc.PostSyncOperation       = WriteTimestamp;
662b8e80941Smrg         pc.Address                 = anv_address_add(query_addr, 8);
663b8e80941Smrg
664b8e80941Smrg         if (GEN_GEN == 9 && cmd_buffer->device->info.gt == 4)
665b8e80941Smrg            pc.CommandStreamerStallEnable = true;
666b8e80941Smrg      }
667b8e80941Smrg      break;
668b8e80941Smrg   }
669b8e80941Smrg
670b8e80941Smrg   emit_query_pc_availability(cmd_buffer, query_addr, true);
671b8e80941Smrg
672b8e80941Smrg   /* When multiview is active the spec requires that N consecutive query
673b8e80941Smrg    * indices are used, where N is the number of active views in the subpass.
674b8e80941Smrg    * The spec allows that we only write the results to one of the queries
675b8e80941Smrg    * but we still need to manage result availability for all the query indices.
676b8e80941Smrg    * Since we only emit a single query for all active views in the
677b8e80941Smrg    * first index, mark the other query indices as being already available
678b8e80941Smrg    * with result 0.
679b8e80941Smrg    */
680b8e80941Smrg   if (cmd_buffer->state.subpass && cmd_buffer->state.subpass->view_mask) {
681b8e80941Smrg      const uint32_t num_queries =
682b8e80941Smrg         util_bitcount(cmd_buffer->state.subpass->view_mask);
683b8e80941Smrg      if (num_queries > 1)
684b8e80941Smrg         emit_zero_queries(cmd_buffer, &b, pool, query + 1, num_queries - 1);
685b8e80941Smrg   }
686b8e80941Smrg}
687b8e80941Smrg
688b8e80941Smrg#if GEN_GEN > 7 || GEN_IS_HASWELL
689b8e80941Smrg
690b8e80941Smrgstatic void
691b8e80941Smrggpu_write_query_result(struct gen_mi_builder *b,
692b8e80941Smrg                       struct anv_address dst_addr,
693b8e80941Smrg                       VkQueryResultFlags flags,
694b8e80941Smrg                       uint32_t value_index,
695b8e80941Smrg                       struct gen_mi_value query_result)
696b8e80941Smrg{
697b8e80941Smrg   if (flags & VK_QUERY_RESULT_64_BIT) {
698b8e80941Smrg      struct anv_address res_addr = anv_address_add(dst_addr, value_index * 8);
699b8e80941Smrg      gen_mi_store(b, gen_mi_mem64(res_addr), query_result);
700b8e80941Smrg   } else {
701b8e80941Smrg      struct anv_address res_addr = anv_address_add(dst_addr, value_index * 4);
702b8e80941Smrg      gen_mi_store(b, gen_mi_mem32(res_addr), query_result);
703b8e80941Smrg   }
704b8e80941Smrg}
705b8e80941Smrg
706b8e80941Smrgstatic struct gen_mi_value
707b8e80941Smrgcompute_query_result(struct gen_mi_builder *b, struct anv_address addr)
708b8e80941Smrg{
709b8e80941Smrg   return gen_mi_isub(b, gen_mi_mem64(anv_address_add(addr, 8)),
710b8e80941Smrg                         gen_mi_mem64(anv_address_add(addr, 0)));
711b8e80941Smrg}
712b8e80941Smrg
713b8e80941Smrgvoid genX(CmdCopyQueryPoolResults)(
714b8e80941Smrg    VkCommandBuffer                             commandBuffer,
715b8e80941Smrg    VkQueryPool                                 queryPool,
716b8e80941Smrg    uint32_t                                    firstQuery,
717b8e80941Smrg    uint32_t                                    queryCount,
718b8e80941Smrg    VkBuffer                                    destBuffer,
719b8e80941Smrg    VkDeviceSize                                destOffset,
720b8e80941Smrg    VkDeviceSize                                destStride,
721b8e80941Smrg    VkQueryResultFlags                          flags)
722b8e80941Smrg{
723b8e80941Smrg   ANV_FROM_HANDLE(anv_cmd_buffer, cmd_buffer, commandBuffer);
724b8e80941Smrg   ANV_FROM_HANDLE(anv_query_pool, pool, queryPool);
725b8e80941Smrg   ANV_FROM_HANDLE(anv_buffer, buffer, destBuffer);
726b8e80941Smrg
727b8e80941Smrg   struct gen_mi_builder b;
728b8e80941Smrg   gen_mi_builder_init(&b, &cmd_buffer->batch);
729b8e80941Smrg   struct gen_mi_value result;
730b8e80941Smrg
731b8e80941Smrg   /* If render target writes are ongoing, request a render target cache flush
732b8e80941Smrg    * to ensure proper ordering of the commands from the 3d pipe and the
733b8e80941Smrg    * command streamer.
734b8e80941Smrg    */
735b8e80941Smrg   if (cmd_buffer->state.pending_pipe_bits & ANV_PIPE_RENDER_TARGET_BUFFER_WRITES) {
736b8e80941Smrg      cmd_buffer->state.pending_pipe_bits |=
737b8e80941Smrg         ANV_PIPE_RENDER_TARGET_CACHE_FLUSH_BIT;
738b8e80941Smrg   }
739b8e80941Smrg
740b8e80941Smrg   if ((flags & VK_QUERY_RESULT_WAIT_BIT) ||
741b8e80941Smrg       (cmd_buffer->state.pending_pipe_bits & ANV_PIPE_FLUSH_BITS) ||
742b8e80941Smrg       /* Occlusion & timestamp queries are written using a PIPE_CONTROL and
743b8e80941Smrg        * because we're about to copy values from MI commands, we need to
744b8e80941Smrg        * stall the command streamer to make sure the PIPE_CONTROL values have
745b8e80941Smrg        * landed, otherwise we could see inconsistent values & availability.
746b8e80941Smrg        *
747b8e80941Smrg        *  From the vulkan spec:
748b8e80941Smrg        *
749b8e80941Smrg        *     "vkCmdCopyQueryPoolResults is guaranteed to see the effect of
750b8e80941Smrg        *     previous uses of vkCmdResetQueryPool in the same queue, without
751b8e80941Smrg        *     any additional synchronization."
752b8e80941Smrg        */
753b8e80941Smrg       pool->type == VK_QUERY_TYPE_OCCLUSION ||
754b8e80941Smrg       pool->type == VK_QUERY_TYPE_TIMESTAMP) {
755b8e80941Smrg      cmd_buffer->state.pending_pipe_bits |= ANV_PIPE_CS_STALL_BIT;
756b8e80941Smrg      genX(cmd_buffer_apply_pipe_flushes)(cmd_buffer);
757b8e80941Smrg   }
758b8e80941Smrg
759b8e80941Smrg   struct anv_address dest_addr = anv_address_add(buffer->address, destOffset);
760b8e80941Smrg   for (uint32_t i = 0; i < queryCount; i++) {
761b8e80941Smrg      struct anv_address query_addr = anv_query_address(pool, firstQuery + i);
762b8e80941Smrg      uint32_t idx = 0;
763b8e80941Smrg      switch (pool->type) {
764b8e80941Smrg      case VK_QUERY_TYPE_OCCLUSION:
765b8e80941Smrg         result = compute_query_result(&b, anv_address_add(query_addr, 8));
766b8e80941Smrg         gpu_write_query_result(&b, dest_addr, flags, idx++, result);
767b8e80941Smrg         break;
768b8e80941Smrg
769b8e80941Smrg      case VK_QUERY_TYPE_PIPELINE_STATISTICS: {
770b8e80941Smrg         uint32_t statistics = pool->pipeline_statistics;
771b8e80941Smrg         while (statistics) {
772b8e80941Smrg            uint32_t stat = u_bit_scan(&statistics);
773b8e80941Smrg
774b8e80941Smrg            result = compute_query_result(&b, anv_address_add(query_addr,
775b8e80941Smrg                                                              idx * 16 + 8));
776b8e80941Smrg
777b8e80941Smrg            /* WaDividePSInvocationCountBy4:HSW,BDW */
778b8e80941Smrg            if ((cmd_buffer->device->info.gen == 8 ||
779b8e80941Smrg                 cmd_buffer->device->info.is_haswell) &&
780b8e80941Smrg                (1 << stat) == VK_QUERY_PIPELINE_STATISTIC_FRAGMENT_SHADER_INVOCATIONS_BIT) {
781b8e80941Smrg               result = gen_mi_ushr32_imm(&b, result, 2);
782b8e80941Smrg            }
783b8e80941Smrg
784b8e80941Smrg            gpu_write_query_result(&b, dest_addr, flags, idx++, result);
785b8e80941Smrg         }
786b8e80941Smrg         assert(idx == util_bitcount(pool->pipeline_statistics));
787b8e80941Smrg         break;
788b8e80941Smrg      }
789b8e80941Smrg
790b8e80941Smrg      case VK_QUERY_TYPE_TRANSFORM_FEEDBACK_STREAM_EXT:
791b8e80941Smrg         result = compute_query_result(&b, anv_address_add(query_addr, 8));
792b8e80941Smrg         gpu_write_query_result(&b, dest_addr, flags, idx++, result);
793b8e80941Smrg         result = compute_query_result(&b, anv_address_add(query_addr, 24));
794b8e80941Smrg         gpu_write_query_result(&b, dest_addr, flags, idx++, result);
795b8e80941Smrg         break;
796b8e80941Smrg
797b8e80941Smrg      case VK_QUERY_TYPE_TIMESTAMP:
798b8e80941Smrg         result = gen_mi_mem64(anv_address_add(query_addr, 8));
799b8e80941Smrg         gpu_write_query_result(&b, dest_addr, flags, 0, result);
800b8e80941Smrg         break;
801b8e80941Smrg
802b8e80941Smrg      default:
803b8e80941Smrg         unreachable("unhandled query type");
804b8e80941Smrg      }
805b8e80941Smrg
806b8e80941Smrg      if (flags & VK_QUERY_RESULT_WITH_AVAILABILITY_BIT) {
807b8e80941Smrg         gpu_write_query_result(&b, dest_addr, flags, idx,
808b8e80941Smrg                                gen_mi_mem64(query_addr));
809b8e80941Smrg      }
810b8e80941Smrg
811b8e80941Smrg      dest_addr = anv_address_add(dest_addr, destStride);
812b8e80941Smrg   }
813b8e80941Smrg}
814b8e80941Smrg
815b8e80941Smrg#else
816b8e80941Smrgvoid genX(CmdCopyQueryPoolResults)(
817b8e80941Smrg    VkCommandBuffer                             commandBuffer,
818b8e80941Smrg    VkQueryPool                                 queryPool,
819b8e80941Smrg    uint32_t                                    firstQuery,
820b8e80941Smrg    uint32_t                                    queryCount,
821b8e80941Smrg    VkBuffer                                    destBuffer,
822b8e80941Smrg    VkDeviceSize                                destOffset,
823b8e80941Smrg    VkDeviceSize                                destStride,
824b8e80941Smrg    VkQueryResultFlags                          flags)
825b8e80941Smrg{
826b8e80941Smrg   anv_finishme("Queries not yet supported on Ivy Bridge");
827b8e80941Smrg}
828b8e80941Smrg#endif
829