1/*
2 * Copyright (C) 2014 Rob Clark <robclark@freedesktop.org>
3 *
4 * Permission is hereby granted, free of charge, to any person obtaining a
5 * copy of this software and associated documentation files (the "Software"),
6 * to deal in the Software without restriction, including without limitation
7 * the rights to use, copy, modify, merge, publish, distribute, sublicense,
8 * and/or sell copies of the Software, and to permit persons to whom the
9 * Software is furnished to do so, subject to the following conditions:
10 *
11 * The above copyright notice and this permission notice (including the next
12 * paragraph) shall be included in all copies or substantial portions of the
13 * Software.
14 *
15 * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
16 * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
17 * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT.  IN NO EVENT SHALL
18 * THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
19 * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
20 * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
21 * SOFTWARE.
22 *
23 * Authors:
24 *    Rob Clark <robclark@freedesktop.org>
25 */
26
27#include "pipe/p_state.h"
28#include "util/u_inlines.h"
29#include "util/u_memory.h"
30
31#include "freedreno_context.h"
32#include "freedreno_query_hw.h"
33#include "freedreno_resource.h"
34#include "freedreno_util.h"
35
36struct fd_hw_sample_period {
37   struct fd_hw_sample *start, *end;
38   struct list_head list;
39};
40
41static struct fd_hw_sample *
42get_sample(struct fd_batch *batch, struct fd_ringbuffer *ring,
43           unsigned query_type) assert_dt
44{
45   struct fd_context *ctx = batch->ctx;
46   struct fd_hw_sample *samp = NULL;
47   int idx = pidx(query_type);
48
49   assume(idx >= 0); /* query never would have been created otherwise */
50
51   if (!batch->sample_cache[idx]) {
52      struct fd_hw_sample *new_samp =
53         ctx->hw_sample_providers[idx]->get_sample(batch, ring);
54      fd_hw_sample_reference(ctx, &batch->sample_cache[idx], new_samp);
55      util_dynarray_append(&batch->samples, struct fd_hw_sample *, new_samp);
56      fd_batch_needs_flush(batch);
57   }
58
59   fd_hw_sample_reference(ctx, &samp, batch->sample_cache[idx]);
60
61   return samp;
62}
63
64static void
65clear_sample_cache(struct fd_batch *batch)
66{
67   int i;
68
69   for (i = 0; i < ARRAY_SIZE(batch->sample_cache); i++)
70      fd_hw_sample_reference(batch->ctx, &batch->sample_cache[i], NULL);
71}
72
73static bool
74query_active_in_batch(struct fd_batch *batch, struct fd_hw_query *hq)
75{
76   int idx = pidx(hq->provider->query_type);
77   return batch->query_providers_active & (1 << idx);
78}
79
80static void
81resume_query(struct fd_batch *batch, struct fd_hw_query *hq,
82             struct fd_ringbuffer *ring) assert_dt
83{
84   int idx = pidx(hq->provider->query_type);
85   DBG("%p", hq);
86   assert(idx >= 0); /* query never would have been created otherwise */
87   assert(!hq->period);
88   batch->query_providers_used |= (1 << idx);
89   batch->query_providers_active |= (1 << idx);
90   hq->period = slab_alloc_st(&batch->ctx->sample_period_pool);
91   list_inithead(&hq->period->list);
92   hq->period->start = get_sample(batch, ring, hq->base.type);
93   /* NOTE: slab_alloc_st() does not zero out the buffer: */
94   hq->period->end = NULL;
95}
96
97static void
98pause_query(struct fd_batch *batch, struct fd_hw_query *hq,
99            struct fd_ringbuffer *ring) assert_dt
100{
101   ASSERTED int idx = pidx(hq->provider->query_type);
102   DBG("%p", hq);
103   assert(idx >= 0); /* query never would have been created otherwise */
104   assert(hq->period && !hq->period->end);
105   assert(query_active_in_batch(batch, hq));
106   batch->query_providers_active &= ~(1 << idx);
107   hq->period->end = get_sample(batch, ring, hq->base.type);
108   list_addtail(&hq->period->list, &hq->periods);
109   hq->period = NULL;
110}
111
112static void
113destroy_periods(struct fd_context *ctx, struct fd_hw_query *hq)
114{
115   struct fd_hw_sample_period *period, *s;
116   LIST_FOR_EACH_ENTRY_SAFE (period, s, &hq->periods, list) {
117      fd_hw_sample_reference(ctx, &period->start, NULL);
118      fd_hw_sample_reference(ctx, &period->end, NULL);
119      list_del(&period->list);
120      slab_free_st(&ctx->sample_period_pool, period);
121   }
122}
123
124static void
125fd_hw_destroy_query(struct fd_context *ctx, struct fd_query *q)
126{
127   struct fd_hw_query *hq = fd_hw_query(q);
128
129   DBG("%p", q);
130
131   destroy_periods(ctx, hq);
132   list_del(&hq->list);
133
134   free(hq);
135}
136
137static void
138fd_hw_begin_query(struct fd_context *ctx, struct fd_query *q) assert_dt
139{
140   struct fd_batch *batch = fd_context_batch_locked(ctx);
141   struct fd_hw_query *hq = fd_hw_query(q);
142
143   DBG("%p", q);
144
145   /* begin_query() should clear previous results: */
146   destroy_periods(ctx, hq);
147
148   if (batch && (ctx->active_queries || hq->provider->always))
149      resume_query(batch, hq, batch->draw);
150
151   /* add to active list: */
152   assert(list_is_empty(&hq->list));
153   list_addtail(&hq->list, &ctx->hw_active_queries);
154
155   fd_batch_unlock_submit(batch);
156   fd_batch_reference(&batch, NULL);
157}
158
159static void
160fd_hw_end_query(struct fd_context *ctx, struct fd_query *q) assert_dt
161{
162   struct fd_batch *batch = fd_context_batch_locked(ctx);
163   struct fd_hw_query *hq = fd_hw_query(q);
164
165   DBG("%p", q);
166
167   if (batch && (ctx->active_queries || hq->provider->always))
168      pause_query(batch, hq, batch->draw);
169
170   /* remove from active list: */
171   list_delinit(&hq->list);
172
173   fd_batch_unlock_submit(batch);
174   fd_batch_reference(&batch, NULL);
175}
176
177/* helper to get ptr to specified sample: */
178static void *
179sampptr(struct fd_hw_sample *samp, uint32_t n, void *ptr)
180{
181   return ((char *)ptr) + (samp->tile_stride * n) + samp->offset;
182}
183
184static bool
185fd_hw_get_query_result(struct fd_context *ctx, struct fd_query *q, bool wait,
186                       union pipe_query_result *result)
187{
188   struct fd_hw_query *hq = fd_hw_query(q);
189   const struct fd_hw_sample_provider *p = hq->provider;
190   struct fd_hw_sample_period *period, *tmp;
191
192   DBG("%p: wait=%d", q, wait);
193
194   if (list_is_empty(&hq->periods))
195      return true;
196
197   assert(list_is_empty(&hq->list));
198   assert(!hq->period);
199
200   /* sum the result across all sample periods.  Start with the last period
201    * so that no-wait will bail quickly.
202    */
203   LIST_FOR_EACH_ENTRY_SAFE_REV (period, tmp, &hq->periods, list) {
204      struct fd_hw_sample *start = period->start;
205      ASSERTED struct fd_hw_sample *end = period->end;
206      unsigned i;
207
208      /* start and end samples should be from same batch: */
209      assert(start->prsc == end->prsc);
210      assert(start->num_tiles == end->num_tiles);
211
212      struct fd_resource *rsc = fd_resource(start->prsc);
213
214      /* ARB_occlusion_query says:
215       *
216       *     "Querying the state for a given occlusion query forces that
217       *      occlusion query to complete within a finite amount of time."
218       *
219       * So, regardless of whether we are supposed to wait or not, we do need to
220       * flush now.
221       */
222      if (fd_get_query_result_in_driver_thread(q)) {
223         tc_assert_driver_thread(ctx->tc);
224         fd_context_access_begin(ctx);
225         fd_bc_flush_writer(ctx, rsc);
226         fd_context_access_end(ctx);
227      }
228
229      /* some piglit tests at least do query with no draws, I guess: */
230      if (!rsc->bo)
231         continue;
232
233      if (!wait) {
234         int ret = fd_resource_wait(
235            ctx, rsc, FD_BO_PREP_READ | FD_BO_PREP_NOSYNC | FD_BO_PREP_FLUSH);
236         if (ret)
237            return false;
238      } else {
239         fd_resource_wait(ctx, rsc, FD_BO_PREP_READ);
240      }
241
242      void *ptr = fd_bo_map(rsc->bo);
243
244      for (i = 0; i < start->num_tiles; i++) {
245         p->accumulate_result(ctx, sampptr(period->start, i, ptr),
246                              sampptr(period->end, i, ptr), result);
247      }
248
249      fd_bo_cpu_fini(rsc->bo);
250   }
251
252   return true;
253}
254
255static const struct fd_query_funcs hw_query_funcs = {
256   .destroy_query = fd_hw_destroy_query,
257   .begin_query = fd_hw_begin_query,
258   .end_query = fd_hw_end_query,
259   .get_query_result = fd_hw_get_query_result,
260};
261
262struct fd_query *
263fd_hw_create_query(struct fd_context *ctx, unsigned query_type, unsigned index)
264{
265   struct fd_hw_query *hq;
266   struct fd_query *q;
267   int idx = pidx(query_type);
268
269   if ((idx < 0) || !ctx->hw_sample_providers[idx])
270      return NULL;
271
272   hq = CALLOC_STRUCT(fd_hw_query);
273   if (!hq)
274      return NULL;
275
276   DBG("%p: query_type=%u", hq, query_type);
277
278   hq->provider = ctx->hw_sample_providers[idx];
279
280   list_inithead(&hq->periods);
281   list_inithead(&hq->list);
282
283   q = &hq->base;
284   q->funcs = &hw_query_funcs;
285   q->type = query_type;
286   q->index = index;
287
288   return q;
289}
290
291struct fd_hw_sample *
292fd_hw_sample_init(struct fd_batch *batch, uint32_t size)
293{
294   struct fd_hw_sample *samp = slab_alloc_st(&batch->ctx->sample_pool);
295   pipe_reference_init(&samp->reference, 1);
296   samp->size = size;
297   debug_assert(util_is_power_of_two_or_zero(size));
298   batch->next_sample_offset = align(batch->next_sample_offset, size);
299   samp->offset = batch->next_sample_offset;
300   /* NOTE: slab_alloc_st() does not zero out the buffer: */
301   samp->prsc = NULL;
302   samp->num_tiles = 0;
303   samp->tile_stride = 0;
304   batch->next_sample_offset += size;
305
306   if (!batch->query_buf) {
307      struct pipe_screen *pscreen = &batch->ctx->screen->base;
308      struct pipe_resource templ = {
309         .target = PIPE_BUFFER,
310         .format = PIPE_FORMAT_R8_UNORM,
311         .bind = PIPE_BIND_QUERY_BUFFER,
312         .width0 = 0, /* create initially zero size buffer */
313         .height0 = 1,
314         .depth0 = 1,
315         .array_size = 1,
316         .last_level = 0,
317         .nr_samples = 1,
318      };
319      batch->query_buf = pscreen->resource_create(pscreen, &templ);
320   }
321
322   pipe_resource_reference(&samp->prsc, batch->query_buf);
323
324   return samp;
325}
326
327void
328__fd_hw_sample_destroy(struct fd_context *ctx, struct fd_hw_sample *samp)
329{
330   pipe_resource_reference(&samp->prsc, NULL);
331   slab_free_st(&ctx->sample_pool, samp);
332}
333
334/* called from gmem code once total storage requirements are known (ie.
335 * number of samples times number of tiles)
336 */
337void
338fd_hw_query_prepare(struct fd_batch *batch, uint32_t num_tiles)
339{
340   uint32_t tile_stride = batch->next_sample_offset;
341
342   if (tile_stride > 0)
343      fd_resource_resize(batch->query_buf, tile_stride * num_tiles);
344
345   batch->query_tile_stride = tile_stride;
346
347   while (batch->samples.size > 0) {
348      struct fd_hw_sample *samp =
349         util_dynarray_pop(&batch->samples, struct fd_hw_sample *);
350      samp->num_tiles = num_tiles;
351      samp->tile_stride = tile_stride;
352      fd_hw_sample_reference(batch->ctx, &samp, NULL);
353   }
354
355   /* reset things for next batch: */
356   batch->next_sample_offset = 0;
357}
358
359void
360fd_hw_query_prepare_tile(struct fd_batch *batch, uint32_t n,
361                         struct fd_ringbuffer *ring)
362{
363   uint32_t tile_stride = batch->query_tile_stride;
364   uint32_t offset = tile_stride * n;
365
366   /* bail if no queries: */
367   if (tile_stride == 0)
368      return;
369
370   fd_wfi(batch, ring);
371   OUT_PKT0(ring, HW_QUERY_BASE_REG, 1);
372   OUT_RELOC(ring, fd_resource(batch->query_buf)->bo, offset, 0, 0);
373}
374
375void
376fd_hw_query_update_batch(struct fd_batch *batch, bool disable_all)
377{
378   struct fd_context *ctx = batch->ctx;
379
380   if (disable_all || ctx->update_active_queries) {
381      struct fd_hw_query *hq;
382      LIST_FOR_EACH_ENTRY (hq, &batch->ctx->hw_active_queries, list) {
383         bool was_active = query_active_in_batch(batch, hq);
384         bool now_active =
385            !disable_all && (ctx->active_queries || hq->provider->always);
386
387         if (now_active && !was_active)
388            resume_query(batch, hq, batch->draw);
389         else if (was_active && !now_active)
390            pause_query(batch, hq, batch->draw);
391      }
392   }
393   clear_sample_cache(batch);
394}
395
396/* call the provider->enable() for all the hw queries that were active
397 * in the current batch.  This sets up perfctr selector regs statically
398 * for the duration of the batch.
399 */
400void
401fd_hw_query_enable(struct fd_batch *batch, struct fd_ringbuffer *ring)
402{
403   struct fd_context *ctx = batch->ctx;
404   for (int idx = 0; idx < MAX_HW_SAMPLE_PROVIDERS; idx++) {
405      if (batch->query_providers_used & (1 << idx)) {
406         assert(ctx->hw_sample_providers[idx]);
407         if (ctx->hw_sample_providers[idx]->enable)
408            ctx->hw_sample_providers[idx]->enable(ctx, ring);
409      }
410   }
411}
412
413void
414fd_hw_query_register_provider(struct pipe_context *pctx,
415                              const struct fd_hw_sample_provider *provider)
416{
417   struct fd_context *ctx = fd_context(pctx);
418   int idx = pidx(provider->query_type);
419
420   assert((0 <= idx) && (idx < MAX_HW_SAMPLE_PROVIDERS));
421   assert(!ctx->hw_sample_providers[idx]);
422
423   ctx->hw_sample_providers[idx] = provider;
424}
425
426void
427fd_hw_query_init(struct pipe_context *pctx)
428{
429   struct fd_context *ctx = fd_context(pctx);
430
431   slab_create(&ctx->sample_pool, sizeof(struct fd_hw_sample), 16);
432   slab_create(&ctx->sample_period_pool, sizeof(struct fd_hw_sample_period),
433               16);
434}
435
436void
437fd_hw_query_fini(struct pipe_context *pctx)
438{
439   struct fd_context *ctx = fd_context(pctx);
440
441   slab_destroy(&ctx->sample_pool);
442   slab_destroy(&ctx->sample_period_pool);
443}
444