1/*
2 * Copyright (C) 2014 Rob Clark <robclark@freedesktop.org>
3 *
4 * Permission is hereby granted, free of charge, to any person obtaining a
5 * copy of this software and associated documentation files (the "Software"),
6 * to deal in the Software without restriction, including without limitation
7 * the rights to use, copy, modify, merge, publish, distribute, sublicense,
8 * and/or sell copies of the Software, and to permit persons to whom the
9 * Software is furnished to do so, subject to the following conditions:
10 *
11 * The above copyright notice and this permission notice (including the next
12 * paragraph) shall be included in all copies or substantial portions of the
13 * Software.
14 *
15 * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
16 * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
17 * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT.  IN NO EVENT SHALL
18 * THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
19 * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
20 * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
21 * SOFTWARE.
22 *
23 * Authors:
24 *    Rob Clark <robclark@freedesktop.org>
25 */
26
27#include "pipe/p_state.h"
28#include "util/u_memory.h"
29#include "util/u_inlines.h"
30
31#include "freedreno_query_hw.h"
32#include "freedreno_context.h"
33#include "freedreno_resource.h"
34#include "freedreno_util.h"
35
36struct fd_hw_sample_period {
37	struct fd_hw_sample *start, *end;
38	struct list_head list;
39};
40
41static struct fd_hw_sample *
42get_sample(struct fd_batch *batch, struct fd_ringbuffer *ring,
43		unsigned query_type)
44{
45	struct fd_context *ctx = batch->ctx;
46	struct fd_hw_sample *samp = NULL;
47	int idx = pidx(query_type);
48
49	assume(idx >= 0);   /* query never would have been created otherwise */
50
51	if (!batch->sample_cache[idx]) {
52		struct fd_hw_sample *new_samp =
53			ctx->hw_sample_providers[idx]->get_sample(batch, ring);
54		fd_hw_sample_reference(ctx, &batch->sample_cache[idx], new_samp);
55		util_dynarray_append(&batch->samples, struct fd_hw_sample *, new_samp);
56		batch->needs_flush = true;
57	}
58
59	fd_hw_sample_reference(ctx, &samp, batch->sample_cache[idx]);
60
61	return samp;
62}
63
64static void
65clear_sample_cache(struct fd_batch *batch)
66{
67	int i;
68
69	for (i = 0; i < ARRAY_SIZE(batch->sample_cache); i++)
70		fd_hw_sample_reference(batch->ctx, &batch->sample_cache[i], NULL);
71}
72
73static bool
74is_active(struct fd_hw_query *hq, enum fd_render_stage stage)
75{
76	return !!(hq->provider->active & stage);
77}
78
79
80static void
81resume_query(struct fd_batch *batch, struct fd_hw_query *hq,
82		struct fd_ringbuffer *ring)
83{
84	int idx = pidx(hq->provider->query_type);
85	DBG("%p", hq);
86	assert(idx >= 0);   /* query never would have been created otherwise */
87	assert(!hq->period);
88	batch->active_providers |= (1 << idx);
89	hq->period = slab_alloc_st(&batch->ctx->sample_period_pool);
90	list_inithead(&hq->period->list);
91	hq->period->start = get_sample(batch, ring, hq->base.type);
92	/* NOTE: slab_alloc_st() does not zero out the buffer: */
93	hq->period->end = NULL;
94}
95
96static void
97pause_query(struct fd_batch *batch, struct fd_hw_query *hq,
98		struct fd_ringbuffer *ring)
99{
100	int idx = pidx(hq->provider->query_type);
101	DBG("%p", hq);
102	assert(idx >= 0);   /* query never would have been created otherwise */
103	assert(hq->period && !hq->period->end);
104	assert(batch->active_providers & (1 << idx));
105	hq->period->end = get_sample(batch, ring, hq->base.type);
106	list_addtail(&hq->period->list, &hq->periods);
107	hq->period = NULL;
108}
109
110static void
111destroy_periods(struct fd_context *ctx, struct fd_hw_query *hq)
112{
113	struct fd_hw_sample_period *period, *s;
114	LIST_FOR_EACH_ENTRY_SAFE(period, s, &hq->periods, list) {
115		fd_hw_sample_reference(ctx, &period->start, NULL);
116		fd_hw_sample_reference(ctx, &period->end, NULL);
117		list_del(&period->list);
118		slab_free_st(&ctx->sample_period_pool, period);
119	}
120}
121
122static void
123fd_hw_destroy_query(struct fd_context *ctx, struct fd_query *q)
124{
125	struct fd_hw_query *hq = fd_hw_query(q);
126
127	DBG("%p: active=%d", q, q->active);
128
129	destroy_periods(ctx, hq);
130	list_del(&hq->list);
131
132	free(hq);
133}
134
135static boolean
136fd_hw_begin_query(struct fd_context *ctx, struct fd_query *q)
137{
138	struct fd_batch *batch = fd_context_batch(ctx);
139	struct fd_hw_query *hq = fd_hw_query(q);
140
141	DBG("%p: active=%d", q, q->active);
142
143	/* begin_query() should clear previous results: */
144	destroy_periods(ctx, hq);
145
146	if (batch && is_active(hq, batch->stage))
147		resume_query(batch, hq, batch->draw);
148
149	/* add to active list: */
150	assert(list_empty(&hq->list));
151	list_addtail(&hq->list, &ctx->hw_active_queries);
152
153	return true;
154}
155
156static void
157fd_hw_end_query(struct fd_context *ctx, struct fd_query *q)
158{
159	struct fd_batch *batch = fd_context_batch(ctx);
160	struct fd_hw_query *hq = fd_hw_query(q);
161
162	DBG("%p: active=%d", q, q->active);
163
164	if (batch && is_active(hq, batch->stage))
165		pause_query(batch, hq, batch->draw);
166
167	/* remove from active list: */
168	list_delinit(&hq->list);
169}
170
171/* helper to get ptr to specified sample: */
172static void * sampptr(struct fd_hw_sample *samp, uint32_t n, void *ptr)
173{
174	return ((char *)ptr) + (samp->tile_stride * n) + samp->offset;
175}
176
177static boolean
178fd_hw_get_query_result(struct fd_context *ctx, struct fd_query *q,
179		boolean wait, union pipe_query_result *result)
180{
181	struct fd_hw_query *hq = fd_hw_query(q);
182	const struct fd_hw_sample_provider *p = hq->provider;
183	struct fd_hw_sample_period *period;
184
185	DBG("%p: wait=%d, active=%d", q, wait, q->active);
186
187	if (LIST_IS_EMPTY(&hq->periods))
188		return true;
189
190	assert(LIST_IS_EMPTY(&hq->list));
191	assert(!hq->period);
192
193	/* if !wait, then check the last sample (the one most likely to
194	 * not be ready yet) and bail if it is not ready:
195	 */
196	if (!wait) {
197		int ret;
198
199		period = LIST_ENTRY(struct fd_hw_sample_period,
200				hq->periods.prev, list);
201
202		struct fd_resource *rsc = fd_resource(period->end->prsc);
203
204		if (pending(rsc, false)) {
205			/* piglit spec@arb_occlusion_query@occlusion_query_conform
206			 * test, and silly apps perhaps, get stuck in a loop trying
207			 * to get  query result forever with wait==false..  we don't
208			 * wait to flush unnecessarily but we also don't want to
209			 * spin forever:
210			 */
211			if (hq->no_wait_cnt++ > 5)
212				fd_batch_flush(rsc->write_batch, false, false);
213			return false;
214		}
215
216		if (!rsc->bo)
217			return false;
218
219		ret = fd_bo_cpu_prep(rsc->bo, ctx->pipe,
220				DRM_FREEDRENO_PREP_READ | DRM_FREEDRENO_PREP_NOSYNC);
221		if (ret)
222			return false;
223
224		fd_bo_cpu_fini(rsc->bo);
225	}
226
227	/* sum the result across all sample periods: */
228	LIST_FOR_EACH_ENTRY(period, &hq->periods, list) {
229		struct fd_hw_sample *start = period->start;
230		struct fd_hw_sample *end = period->end;
231		unsigned i;
232
233		/* start and end samples should be from same batch: */
234		assert(start->prsc == end->prsc);
235		assert(start->num_tiles == end->num_tiles);
236
237		struct fd_resource *rsc = fd_resource(start->prsc);
238
239		if (rsc->write_batch)
240			fd_batch_flush(rsc->write_batch, true, false);
241
242		/* some piglit tests at least do query with no draws, I guess: */
243		if (!rsc->bo)
244			continue;
245
246		fd_bo_cpu_prep(rsc->bo, ctx->pipe, DRM_FREEDRENO_PREP_READ);
247
248		void *ptr = fd_bo_map(rsc->bo);
249
250		for (i = 0; i < start->num_tiles; i++) {
251			p->accumulate_result(ctx, sampptr(period->start, i, ptr),
252					sampptr(period->end, i, ptr), result);
253		}
254
255		fd_bo_cpu_fini(rsc->bo);
256	}
257
258	return true;
259}
260
261static const struct fd_query_funcs hw_query_funcs = {
262		.destroy_query    = fd_hw_destroy_query,
263		.begin_query      = fd_hw_begin_query,
264		.end_query        = fd_hw_end_query,
265		.get_query_result = fd_hw_get_query_result,
266};
267
268struct fd_query *
269fd_hw_create_query(struct fd_context *ctx, unsigned query_type)
270{
271	struct fd_hw_query *hq;
272	struct fd_query *q;
273	int idx = pidx(query_type);
274
275	if ((idx < 0) || !ctx->hw_sample_providers[idx])
276		return NULL;
277
278	hq = CALLOC_STRUCT(fd_hw_query);
279	if (!hq)
280		return NULL;
281
282	DBG("%p: query_type=%u", hq, query_type);
283
284	hq->provider = ctx->hw_sample_providers[idx];
285
286	list_inithead(&hq->periods);
287	list_inithead(&hq->list);
288
289	q = &hq->base;
290	q->funcs = &hw_query_funcs;
291	q->type = query_type;
292
293	return q;
294}
295
296struct fd_hw_sample *
297fd_hw_sample_init(struct fd_batch *batch, uint32_t size)
298{
299	struct fd_hw_sample *samp = slab_alloc_st(&batch->ctx->sample_pool);
300	pipe_reference_init(&samp->reference, 1);
301	samp->size = size;
302	debug_assert(util_is_power_of_two_or_zero(size));
303	batch->next_sample_offset = align(batch->next_sample_offset, size);
304	samp->offset = batch->next_sample_offset;
305	/* NOTE: slab_alloc_st() does not zero out the buffer: */
306	samp->prsc = NULL;
307	samp->num_tiles = 0;
308	samp->tile_stride = 0;
309	batch->next_sample_offset += size;
310
311	if (!batch->query_buf) {
312		struct pipe_screen *pscreen = &batch->ctx->screen->base;
313		struct pipe_resource templ = {
314			.target  = PIPE_BUFFER,
315			.format  = PIPE_FORMAT_R8_UNORM,
316			.bind    = PIPE_BIND_QUERY_BUFFER,
317			.width0  = 0,    /* create initially zero size buffer */
318			.height0 = 1,
319			.depth0  = 1,
320			.array_size = 1,
321			.last_level = 0,
322			.nr_samples = 1,
323		};
324		batch->query_buf = pscreen->resource_create(pscreen, &templ);
325	}
326
327	pipe_resource_reference(&samp->prsc, batch->query_buf);
328
329	return samp;
330}
331
332void
333__fd_hw_sample_destroy(struct fd_context *ctx, struct fd_hw_sample *samp)
334{
335	pipe_resource_reference(&samp->prsc, NULL);
336	slab_free_st(&ctx->sample_pool, samp);
337}
338
339/* called from gmem code once total storage requirements are known (ie.
340 * number of samples times number of tiles)
341 */
342void
343fd_hw_query_prepare(struct fd_batch *batch, uint32_t num_tiles)
344{
345	uint32_t tile_stride = batch->next_sample_offset;
346
347	if (tile_stride > 0)
348		fd_resource_resize(batch->query_buf, tile_stride * num_tiles);
349
350	batch->query_tile_stride = tile_stride;
351
352	while (batch->samples.size > 0) {
353		struct fd_hw_sample *samp =
354			util_dynarray_pop(&batch->samples, struct fd_hw_sample *);
355		samp->num_tiles = num_tiles;
356		samp->tile_stride = tile_stride;
357		fd_hw_sample_reference(batch->ctx, &samp, NULL);
358	}
359
360	/* reset things for next batch: */
361	batch->next_sample_offset = 0;
362}
363
364void
365fd_hw_query_prepare_tile(struct fd_batch *batch, uint32_t n,
366		struct fd_ringbuffer *ring)
367{
368	uint32_t tile_stride = batch->query_tile_stride;
369	uint32_t offset = tile_stride * n;
370
371	/* bail if no queries: */
372	if (tile_stride == 0)
373		return;
374
375	fd_wfi(batch, ring);
376	OUT_PKT0 (ring, HW_QUERY_BASE_REG, 1);
377	OUT_RELOCW(ring, fd_resource(batch->query_buf)->bo, offset, 0, 0);
378}
379
380void
381fd_hw_query_set_stage(struct fd_batch *batch, enum fd_render_stage stage)
382{
383	if (stage != batch->stage) {
384		struct fd_hw_query *hq;
385		LIST_FOR_EACH_ENTRY(hq, &batch->ctx->hw_active_queries, list) {
386			bool was_active = is_active(hq, batch->stage);
387			bool now_active = is_active(hq, stage);
388
389			if (now_active && !was_active)
390				resume_query(batch, hq, batch->draw);
391			else if (was_active && !now_active)
392				pause_query(batch, hq, batch->draw);
393		}
394	}
395	clear_sample_cache(batch);
396}
397
398/* call the provider->enable() for all the hw queries that were active
399 * in the current batch.  This sets up perfctr selector regs statically
400 * for the duration of the batch.
401 */
402void
403fd_hw_query_enable(struct fd_batch *batch, struct fd_ringbuffer *ring)
404{
405	struct fd_context *ctx = batch->ctx;
406	for (int idx = 0; idx < MAX_HW_SAMPLE_PROVIDERS; idx++) {
407		if (batch->active_providers & (1 << idx)) {
408			assert(ctx->hw_sample_providers[idx]);
409			if (ctx->hw_sample_providers[idx]->enable)
410				ctx->hw_sample_providers[idx]->enable(ctx, ring);
411		}
412	}
413	batch->active_providers = 0;  /* clear it for next frame */
414}
415
416void
417fd_hw_query_register_provider(struct pipe_context *pctx,
418		const struct fd_hw_sample_provider *provider)
419{
420	struct fd_context *ctx = fd_context(pctx);
421	int idx = pidx(provider->query_type);
422
423	assert((0 <= idx) && (idx < MAX_HW_SAMPLE_PROVIDERS));
424	assert(!ctx->hw_sample_providers[idx]);
425
426	ctx->hw_sample_providers[idx] = provider;
427}
428
429void
430fd_hw_query_init(struct pipe_context *pctx)
431{
432	struct fd_context *ctx = fd_context(pctx);
433
434	slab_create(&ctx->sample_pool, sizeof(struct fd_hw_sample),
435			16);
436	slab_create(&ctx->sample_period_pool, sizeof(struct fd_hw_sample_period),
437			16);
438}
439
440void
441fd_hw_query_fini(struct pipe_context *pctx)
442{
443	struct fd_context *ctx = fd_context(pctx);
444
445	slab_destroy(&ctx->sample_pool);
446	slab_destroy(&ctx->sample_period_pool);
447}
448