1848b8605Smrg/*
2848b8605Smrg * Copyright (C) 2012 Rob Clark <robclark@freedesktop.org>
3848b8605Smrg *
4848b8605Smrg * Permission is hereby granted, free of charge, to any person obtaining a
5848b8605Smrg * copy of this software and associated documentation files (the "Software"),
6848b8605Smrg * to deal in the Software without restriction, including without limitation
7848b8605Smrg * the rights to use, copy, modify, merge, publish, distribute, sublicense,
8848b8605Smrg * and/or sell copies of the Software, and to permit persons to whom the
9848b8605Smrg * Software is furnished to do so, subject to the following conditions:
10848b8605Smrg *
11848b8605Smrg * The above copyright notice and this permission notice (including the next
12848b8605Smrg * paragraph) shall be included in all copies or substantial portions of the
13848b8605Smrg * Software.
14848b8605Smrg *
15848b8605Smrg * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
16848b8605Smrg * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
17848b8605Smrg * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT.  IN NO EVENT SHALL
18848b8605Smrg * THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
19848b8605Smrg * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
20848b8605Smrg * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
21848b8605Smrg * SOFTWARE.
22848b8605Smrg *
23848b8605Smrg * Authors:
24848b8605Smrg *    Rob Clark <robclark@freedesktop.org>
25848b8605Smrg */
26848b8605Smrg
27848b8605Smrg#include "pipe/p_state.h"
28848b8605Smrg#include "util/u_string.h"
29848b8605Smrg#include "util/u_memory.h"
30848b8605Smrg#include "util/u_inlines.h"
31848b8605Smrg#include "util/u_format.h"
32848b8605Smrg
33848b8605Smrg#include "freedreno_gmem.h"
34848b8605Smrg#include "freedreno_context.h"
35b8e80941Smrg#include "freedreno_fence.h"
36848b8605Smrg#include "freedreno_resource.h"
37848b8605Smrg#include "freedreno_query_hw.h"
38848b8605Smrg#include "freedreno_util.h"
39848b8605Smrg
40848b8605Smrg/*
41848b8605Smrg * GMEM is the small (ie. 256KiB for a200, 512KiB for a220, etc) tile buffer
42848b8605Smrg * inside the GPU.  All rendering happens to GMEM.  Larger render targets
43848b8605Smrg * are split into tiles that are small enough for the color (and depth and/or
44848b8605Smrg * stencil, if enabled) buffers to fit within GMEM.  Before rendering a tile,
45848b8605Smrg * if there was not a clear invalidating the previous tile contents, we need
46848b8605Smrg * to restore the previous tiles contents (system mem -> GMEM), and after all
47848b8605Smrg * the draw calls, before moving to the next tile, we need to save the tile
48848b8605Smrg * contents (GMEM -> system mem).
49848b8605Smrg *
50848b8605Smrg * The code in this file handles dealing with GMEM and tiling.
51848b8605Smrg *
52848b8605Smrg * The structure of the ringbuffer ends up being:
53848b8605Smrg *
54848b8605Smrg *     +--<---<-- IB ---<---+---<---+---<---<---<--+
55848b8605Smrg *     |                    |       |              |
56848b8605Smrg *     v                    ^       ^              ^
57848b8605Smrg *   ------------------------------------------------------
58848b8605Smrg *     | clear/draw cmds | Tile0 | Tile1 | .... | TileN |
59848b8605Smrg *   ------------------------------------------------------
60848b8605Smrg *                       ^
61848b8605Smrg *                       |
62848b8605Smrg *                       address submitted in issueibcmds
63848b8605Smrg *
64848b8605Smrg * Where the per-tile section handles scissor setup, mem2gmem restore (if
65848b8605Smrg * needed), IB to draw cmds earlier in the ringbuffer, and then gmem2mem
66848b8605Smrg * resolve.
67848b8605Smrg */
68848b8605Smrg
69b8e80941Smrgstatic uint32_t bin_width(struct fd_screen *screen)
70848b8605Smrg{
71b8e80941Smrg	if (is_a4xx(screen) || is_a5xx(screen) || is_a6xx(screen))
72b8e80941Smrg		return 1024;
73b8e80941Smrg	if (is_a3xx(screen))
74848b8605Smrg		return 992;
75848b8605Smrg	return 512;
76848b8605Smrg}
77848b8605Smrg
78b8e80941Smrgstatic uint32_t
79b8e80941Smrgtotal_size(uint8_t cbuf_cpp[], uint8_t zsbuf_cpp[2],
80b8e80941Smrg		   uint32_t bin_w, uint32_t bin_h, uint32_t gmem_align,
81b8e80941Smrg		   struct fd_gmem_stateobj *gmem)
82b8e80941Smrg{
83b8e80941Smrg	uint32_t total = 0, i;
84b8e80941Smrg
85b8e80941Smrg	for (i = 0; i < MAX_RENDER_TARGETS; i++) {
86b8e80941Smrg		if (cbuf_cpp[i]) {
87b8e80941Smrg			gmem->cbuf_base[i] = align(total, gmem_align);
88b8e80941Smrg			total = gmem->cbuf_base[i] + cbuf_cpp[i] * bin_w * bin_h;
89b8e80941Smrg		}
90b8e80941Smrg	}
91b8e80941Smrg
92b8e80941Smrg	if (zsbuf_cpp[0]) {
93b8e80941Smrg		gmem->zsbuf_base[0] = align(total, gmem_align);
94b8e80941Smrg		total = gmem->zsbuf_base[0] + zsbuf_cpp[0] * bin_w * bin_h;
95b8e80941Smrg	}
96b8e80941Smrg
97b8e80941Smrg	if (zsbuf_cpp[1]) {
98b8e80941Smrg		gmem->zsbuf_base[1] = align(total, gmem_align);
99b8e80941Smrg		total = gmem->zsbuf_base[1] + zsbuf_cpp[1] * bin_w * bin_h;
100b8e80941Smrg	}
101b8e80941Smrg
102b8e80941Smrg	return total;
103b8e80941Smrg}
104b8e80941Smrg
105848b8605Smrgstatic void
106b8e80941Smrgcalculate_tiles(struct fd_batch *batch)
107848b8605Smrg{
108b8e80941Smrg	struct fd_context *ctx = batch->ctx;
109b8e80941Smrg	struct fd_screen *screen = ctx->screen;
110848b8605Smrg	struct fd_gmem_stateobj *gmem = &ctx->gmem;
111b8e80941Smrg	struct pipe_scissor_state *scissor = &batch->max_scissor;
112b8e80941Smrg	struct pipe_framebuffer_state *pfb = &batch->framebuffer;
113b8e80941Smrg	const uint32_t gmem_alignw = screen->gmem_alignw;
114b8e80941Smrg	const uint32_t gmem_alignh = screen->gmem_alignh;
115b8e80941Smrg	const unsigned npipes = screen->num_vsc_pipes;
116b8e80941Smrg	const uint32_t gmem_size = screen->gmemsize_bytes;
117848b8605Smrg	uint32_t minx, miny, width, height;
118848b8605Smrg	uint32_t nbins_x = 1, nbins_y = 1;
119848b8605Smrg	uint32_t bin_w, bin_h;
120b8e80941Smrg	uint32_t gmem_align = 0x4000;
121b8e80941Smrg	uint32_t max_width = bin_width(screen);
122b8e80941Smrg	uint8_t cbuf_cpp[MAX_RENDER_TARGETS] = {0}, zsbuf_cpp[2] = {0};
123848b8605Smrg	uint32_t i, j, t, xoff, yoff;
124848b8605Smrg	uint32_t tpp_x, tpp_y;
125b8e80941Smrg	bool has_zs = !!(batch->gmem_reason & (FD_GMEM_DEPTH_ENABLED |
126b8e80941Smrg		FD_GMEM_STENCIL_ENABLED | FD_GMEM_CLEARS_DEPTH_STENCIL));
127b8e80941Smrg	int tile_n[npipes];
128848b8605Smrg
129b8e80941Smrg	if (has_zs) {
130b8e80941Smrg		struct fd_resource *rsc = fd_resource(pfb->zsbuf->texture);
131b8e80941Smrg		zsbuf_cpp[0] = rsc->cpp;
132b8e80941Smrg		if (rsc->stencil)
133b8e80941Smrg			zsbuf_cpp[1] = rsc->stencil->cpp;
134b8e80941Smrg	} else {
135b8e80941Smrg		/* we might have a zsbuf, but it isn't used */
136b8e80941Smrg		batch->restore &= ~(FD_BUFFER_DEPTH | FD_BUFFER_STENCIL);
137b8e80941Smrg		batch->resolve &= ~(FD_BUFFER_DEPTH | FD_BUFFER_STENCIL);
138b8e80941Smrg	}
139b8e80941Smrg	for (i = 0; i < pfb->nr_cbufs; i++) {
140b8e80941Smrg		if (pfb->cbufs[i])
141b8e80941Smrg			cbuf_cpp[i] = util_format_get_blocksize(pfb->cbufs[i]->format);
142b8e80941Smrg		else
143b8e80941Smrg			cbuf_cpp[i] = 4;
144b8e80941Smrg		/* if MSAA, color buffers are super-sampled in GMEM: */
145b8e80941Smrg		cbuf_cpp[i] *= pfb->samples;
146b8e80941Smrg	}
147848b8605Smrg
148b8e80941Smrg	if (!memcmp(gmem->zsbuf_cpp, zsbuf_cpp, sizeof(zsbuf_cpp)) &&
149b8e80941Smrg		!memcmp(gmem->cbuf_cpp, cbuf_cpp, sizeof(cbuf_cpp)) &&
150b8e80941Smrg		!memcmp(&gmem->scissor, scissor, sizeof(gmem->scissor))) {
151848b8605Smrg		/* everything is up-to-date */
152848b8605Smrg		return;
153848b8605Smrg	}
154848b8605Smrg
155b8e80941Smrg	if (fd_mesa_debug & FD_DBG_NOSCIS) {
156848b8605Smrg		minx = 0;
157848b8605Smrg		miny = 0;
158848b8605Smrg		width = pfb->width;
159848b8605Smrg		height = pfb->height;
160848b8605Smrg	} else {
161b8e80941Smrg		/* round down to multiple of alignment: */
162b8e80941Smrg		minx = scissor->minx & ~(gmem_alignw - 1);
163b8e80941Smrg		miny = scissor->miny & ~(gmem_alignh - 1);
164848b8605Smrg		width = scissor->maxx - minx;
165848b8605Smrg		height = scissor->maxy - miny;
166848b8605Smrg	}
167848b8605Smrg
168b8e80941Smrg	bin_w = align(width, gmem_alignw);
169b8e80941Smrg	bin_h = align(height, gmem_alignh);
170848b8605Smrg
171848b8605Smrg	/* first, find a bin width that satisfies the maximum width
172848b8605Smrg	 * restrictions:
173848b8605Smrg	 */
174848b8605Smrg	while (bin_w > max_width) {
175848b8605Smrg		nbins_x++;
176b8e80941Smrg		bin_w = align(width / nbins_x, gmem_alignw);
177b8e80941Smrg	}
178b8e80941Smrg
179b8e80941Smrg	if (fd_mesa_debug & FD_DBG_MSGS) {
180b8e80941Smrg		debug_printf("binning input: cbuf cpp:");
181b8e80941Smrg		for (i = 0; i < pfb->nr_cbufs; i++)
182b8e80941Smrg			debug_printf(" %d", cbuf_cpp[i]);
183b8e80941Smrg		debug_printf(", zsbuf cpp: %d; %dx%d\n",
184b8e80941Smrg				zsbuf_cpp[0], width, height);
185b8e80941Smrg	}
186b8e80941Smrg
187b8e80941Smrg	if (is_a20x(screen) && batch->cleared) {
188b8e80941Smrg		/* under normal circumstances the requirement would be 4K
189b8e80941Smrg		 * but the fast clear path requires an alignment of 32K
190b8e80941Smrg		 */
191b8e80941Smrg		gmem_align = 0x8000;
192848b8605Smrg	}
193848b8605Smrg
194848b8605Smrg	/* then find a bin width/height that satisfies the memory
195848b8605Smrg	 * constraints:
196848b8605Smrg	 */
197b8e80941Smrg	while (total_size(cbuf_cpp, zsbuf_cpp, bin_w, bin_h, gmem_align, gmem) >
198b8e80941Smrg		   gmem_size) {
199848b8605Smrg		if (bin_w > bin_h) {
200848b8605Smrg			nbins_x++;
201b8e80941Smrg			bin_w = align(width / nbins_x, gmem_alignw);
202848b8605Smrg		} else {
203848b8605Smrg			nbins_y++;
204b8e80941Smrg			bin_h = align(height / nbins_y, gmem_alignh);
205848b8605Smrg		}
206848b8605Smrg	}
207848b8605Smrg
208848b8605Smrg	DBG("using %d bins of size %dx%d", nbins_x*nbins_y, bin_w, bin_h);
209848b8605Smrg
210848b8605Smrg	gmem->scissor = *scissor;
211b8e80941Smrg	memcpy(gmem->cbuf_cpp, cbuf_cpp, sizeof(cbuf_cpp));
212b8e80941Smrg	memcpy(gmem->zsbuf_cpp, zsbuf_cpp, sizeof(zsbuf_cpp));
213848b8605Smrg	gmem->bin_h = bin_h;
214848b8605Smrg	gmem->bin_w = bin_w;
215848b8605Smrg	gmem->nbins_x = nbins_x;
216848b8605Smrg	gmem->nbins_y = nbins_y;
217848b8605Smrg	gmem->minx = minx;
218848b8605Smrg	gmem->miny = miny;
219848b8605Smrg	gmem->width = width;
220848b8605Smrg	gmem->height = height;
221848b8605Smrg
222848b8605Smrg	/*
223848b8605Smrg	 * Assign tiles and pipes:
224848b8605Smrg	 *
225848b8605Smrg	 * At some point it might be worth playing with different
226848b8605Smrg	 * strategies and seeing if that makes much impact on
227848b8605Smrg	 * performance.
228848b8605Smrg	 */
229848b8605Smrg
230848b8605Smrg#define div_round_up(v, a)  (((v) + (a) - 1) / (a))
231848b8605Smrg	/* figure out number of tiles per pipe: */
232b8e80941Smrg	if (is_a20x(ctx->screen)) {
233b8e80941Smrg		/* for a20x we want to minimize the number of "pipes"
234b8e80941Smrg		 * binning data has 3 bits for x/y (8x8) but the edges are used to
235b8e80941Smrg		 * cull off-screen vertices with hw binning, so we have 6x6 pipes
236b8e80941Smrg		 */
237b8e80941Smrg		tpp_x = 6;
238b8e80941Smrg		tpp_y = 6;
239b8e80941Smrg	} else {
240b8e80941Smrg		tpp_x = tpp_y = 1;
241b8e80941Smrg		while (div_round_up(nbins_y, tpp_y) > screen->num_vsc_pipes)
242b8e80941Smrg			tpp_y += 2;
243b8e80941Smrg		while ((div_round_up(nbins_y, tpp_y) *
244b8e80941Smrg				div_round_up(nbins_x, tpp_x)) > screen->num_vsc_pipes)
245b8e80941Smrg			tpp_x += 1;
246b8e80941Smrg	}
247b8e80941Smrg
248b8e80941Smrg	gmem->maxpw = tpp_x;
249b8e80941Smrg	gmem->maxph = tpp_y;
250848b8605Smrg
251848b8605Smrg	/* configure pipes: */
252848b8605Smrg	xoff = yoff = 0;
253b8e80941Smrg	for (i = 0; i < npipes; i++) {
254b8e80941Smrg		struct fd_vsc_pipe *pipe = &ctx->vsc_pipe[i];
255848b8605Smrg
256848b8605Smrg		if (xoff >= nbins_x) {
257848b8605Smrg			xoff = 0;
258848b8605Smrg			yoff += tpp_y;
259848b8605Smrg		}
260848b8605Smrg
261848b8605Smrg		if (yoff >= nbins_y) {
262848b8605Smrg			break;
263848b8605Smrg		}
264848b8605Smrg
265848b8605Smrg		pipe->x = xoff;
266848b8605Smrg		pipe->y = yoff;
267848b8605Smrg		pipe->w = MIN2(tpp_x, nbins_x - xoff);
268848b8605Smrg		pipe->h = MIN2(tpp_y, nbins_y - yoff);
269848b8605Smrg
270848b8605Smrg		xoff += tpp_x;
271848b8605Smrg	}
272848b8605Smrg
273b8e80941Smrg	/* number of pipes to use for a20x */
274b8e80941Smrg	gmem->num_vsc_pipes = MAX2(1, i);
275b8e80941Smrg
276b8e80941Smrg	for (; i < npipes; i++) {
277b8e80941Smrg		struct fd_vsc_pipe *pipe = &ctx->vsc_pipe[i];
278848b8605Smrg		pipe->x = pipe->y = pipe->w = pipe->h = 0;
279848b8605Smrg	}
280848b8605Smrg
281848b8605Smrg#if 0 /* debug */
282848b8605Smrg	printf("%dx%d ... tpp=%dx%d\n", nbins_x, nbins_y, tpp_x, tpp_y);
283848b8605Smrg	for (i = 0; i < 8; i++) {
284848b8605Smrg		struct fd_vsc_pipe *pipe = &ctx->pipe[i];
285848b8605Smrg		printf("pipe[%d]: %ux%u @ %u,%u\n", i,
286848b8605Smrg				pipe->w, pipe->h, pipe->x, pipe->y);
287848b8605Smrg	}
288848b8605Smrg#endif
289848b8605Smrg
290848b8605Smrg	/* configure tiles: */
291848b8605Smrg	t = 0;
292848b8605Smrg	yoff = miny;
293b8e80941Smrg	memset(tile_n, 0, sizeof(tile_n));
294848b8605Smrg	for (i = 0; i < nbins_y; i++) {
295848b8605Smrg		uint32_t bw, bh;
296848b8605Smrg
297848b8605Smrg		xoff = minx;
298848b8605Smrg
299848b8605Smrg		/* clip bin height: */
300848b8605Smrg		bh = MIN2(bin_h, miny + height - yoff);
301848b8605Smrg
302848b8605Smrg		for (j = 0; j < nbins_x; j++) {
303848b8605Smrg			struct fd_tile *tile = &ctx->tile[t];
304b8e80941Smrg			uint32_t p;
305848b8605Smrg
306848b8605Smrg			assert(t < ARRAY_SIZE(ctx->tile));
307848b8605Smrg
308848b8605Smrg			/* pipe number: */
309848b8605Smrg			p = ((i / tpp_y) * div_round_up(nbins_x, tpp_x)) + (j / tpp_x);
310b8e80941Smrg			assert(p < gmem->num_vsc_pipes);
311848b8605Smrg
312848b8605Smrg			/* clip bin width: */
313848b8605Smrg			bw = MIN2(bin_w, minx + width - xoff);
314b8e80941Smrg			tile->n = !is_a20x(ctx->screen) ? tile_n[p]++ :
315b8e80941Smrg				((i % tpp_y + 1) << 3 | (j % tpp_x + 1));
316848b8605Smrg			tile->p = p;
317848b8605Smrg			tile->bin_w = bw;
318848b8605Smrg			tile->bin_h = bh;
319848b8605Smrg			tile->xoff = xoff;
320848b8605Smrg			tile->yoff = yoff;
321848b8605Smrg
322848b8605Smrg			t++;
323848b8605Smrg
324848b8605Smrg			xoff += bw;
325848b8605Smrg		}
326848b8605Smrg
327848b8605Smrg		yoff += bh;
328848b8605Smrg	}
329848b8605Smrg
330848b8605Smrg#if 0 /* debug */
331848b8605Smrg	t = 0;
332848b8605Smrg	for (i = 0; i < nbins_y; i++) {
333848b8605Smrg		for (j = 0; j < nbins_x; j++) {
334848b8605Smrg			struct fd_tile *tile = &ctx->tile[t++];
335848b8605Smrg			printf("|p:%u n:%u|", tile->p, tile->n);
336848b8605Smrg		}
337848b8605Smrg		printf("\n");
338848b8605Smrg	}
339848b8605Smrg#endif
340848b8605Smrg}
341848b8605Smrg
342848b8605Smrgstatic void
343b8e80941Smrgrender_tiles(struct fd_batch *batch)
344848b8605Smrg{
345b8e80941Smrg	struct fd_context *ctx = batch->ctx;
346848b8605Smrg	struct fd_gmem_stateobj *gmem = &ctx->gmem;
347848b8605Smrg	int i;
348848b8605Smrg
349b8e80941Smrg	ctx->emit_tile_init(batch);
350848b8605Smrg
351b8e80941Smrg	if (batch->restore)
352848b8605Smrg		ctx->stats.batch_restore++;
353848b8605Smrg
354848b8605Smrg	for (i = 0; i < (gmem->nbins_x * gmem->nbins_y); i++) {
355848b8605Smrg		struct fd_tile *tile = &ctx->tile[i];
356848b8605Smrg
357848b8605Smrg		DBG("bin_h=%d, yoff=%d, bin_w=%d, xoff=%d",
358848b8605Smrg			tile->bin_h, tile->yoff, tile->bin_w, tile->xoff);
359848b8605Smrg
360b8e80941Smrg		ctx->emit_tile_prep(batch, tile);
361848b8605Smrg
362b8e80941Smrg		if (batch->restore) {
363b8e80941Smrg			ctx->emit_tile_mem2gmem(batch, tile);
364848b8605Smrg		}
365848b8605Smrg
366b8e80941Smrg		ctx->emit_tile_renderprep(batch, tile);
367848b8605Smrg
368b8e80941Smrg		if (ctx->query_prepare_tile)
369b8e80941Smrg			ctx->query_prepare_tile(batch, i, batch->gmem);
370848b8605Smrg
371848b8605Smrg		/* emit IB to drawcmds: */
372b8e80941Smrg		ctx->emit_ib(batch->gmem, batch->draw);
373b8e80941Smrg		fd_reset_wfi(batch);
374848b8605Smrg
375848b8605Smrg		/* emit gmem2mem to transfer tile back to system memory: */
376b8e80941Smrg		ctx->emit_tile_gmem2mem(batch, tile);
377848b8605Smrg	}
378b8e80941Smrg
379b8e80941Smrg	if (ctx->emit_tile_fini)
380b8e80941Smrg		ctx->emit_tile_fini(batch);
381848b8605Smrg}
382848b8605Smrg
383848b8605Smrgstatic void
384b8e80941Smrgrender_sysmem(struct fd_batch *batch)
385848b8605Smrg{
386b8e80941Smrg	struct fd_context *ctx = batch->ctx;
387b8e80941Smrg
388b8e80941Smrg	ctx->emit_sysmem_prep(batch);
389848b8605Smrg
390b8e80941Smrg	if (ctx->query_prepare_tile)
391b8e80941Smrg		ctx->query_prepare_tile(batch, 0, batch->gmem);
392848b8605Smrg
393848b8605Smrg	/* emit IB to drawcmds: */
394b8e80941Smrg	ctx->emit_ib(batch->gmem, batch->draw);
395b8e80941Smrg	fd_reset_wfi(batch);
396b8e80941Smrg
397b8e80941Smrg	if (ctx->emit_sysmem_fini)
398b8e80941Smrg		ctx->emit_sysmem_fini(batch);
399b8e80941Smrg}
400b8e80941Smrg
401b8e80941Smrgstatic void
402b8e80941Smrgflush_ring(struct fd_batch *batch)
403b8e80941Smrg{
404b8e80941Smrg	uint32_t timestamp;
405b8e80941Smrg	int out_fence_fd = -1;
406b8e80941Smrg
407b8e80941Smrg	fd_submit_flush(batch->submit, batch->in_fence_fd,
408b8e80941Smrg			batch->needs_out_fence_fd ? &out_fence_fd : NULL,
409b8e80941Smrg			&timestamp);
410b8e80941Smrg
411b8e80941Smrg	fd_fence_populate(batch->fence, timestamp, out_fence_fd);
412848b8605Smrg}
413848b8605Smrg
414848b8605Smrgvoid
415b8e80941Smrgfd_gmem_render_tiles(struct fd_batch *batch)
416848b8605Smrg{
417b8e80941Smrg	struct fd_context *ctx = batch->ctx;
418b8e80941Smrg	struct pipe_framebuffer_state *pfb = &batch->framebuffer;
419848b8605Smrg	bool sysmem = false;
420848b8605Smrg
421b8e80941Smrg	if (ctx->emit_sysmem_prep && !batch->nondraw) {
422b8e80941Smrg		if (batch->cleared || batch->gmem_reason ||
423b8e80941Smrg				((batch->num_draws > 5) && !batch->blit) ||
424b8e80941Smrg				(pfb->samples > 1)) {
425b8e80941Smrg			DBG("GMEM: cleared=%x, gmem_reason=%x, num_draws=%u, samples=%u",
426b8e80941Smrg				batch->cleared, batch->gmem_reason, batch->num_draws,
427b8e80941Smrg				pfb->samples);
428b8e80941Smrg		} else if (!(fd_mesa_debug & FD_DBG_NOBYPASS)) {
429848b8605Smrg			sysmem = true;
430848b8605Smrg		}
431848b8605Smrg
432b8e80941Smrg		/* For ARB_framebuffer_no_attachments: */
433b8e80941Smrg		if ((pfb->nr_cbufs == 0) && !pfb->zsbuf) {
434b8e80941Smrg			sysmem = true;
435b8e80941Smrg		}
436b8e80941Smrg	}
437848b8605Smrg
438b8e80941Smrg	fd_reset_wfi(batch);
439848b8605Smrg
440848b8605Smrg	ctx->stats.batch_total++;
441848b8605Smrg
442b8e80941Smrg	if (batch->nondraw) {
443b8e80941Smrg		DBG("%p: rendering non-draw", batch);
444b8e80941Smrg		ctx->stats.batch_nondraw++;
445b8e80941Smrg	} else if (sysmem) {
446b8e80941Smrg		DBG("%p: rendering sysmem %ux%u (%s/%s), num_draws=%u",
447b8e80941Smrg			batch, pfb->width, pfb->height,
448848b8605Smrg			util_format_short_name(pipe_surface_format(pfb->cbufs[0])),
449b8e80941Smrg			util_format_short_name(pipe_surface_format(pfb->zsbuf)),
450b8e80941Smrg			batch->num_draws);
451b8e80941Smrg		if (ctx->query_prepare)
452b8e80941Smrg			ctx->query_prepare(batch, 1);
453b8e80941Smrg		render_sysmem(batch);
454848b8605Smrg		ctx->stats.batch_sysmem++;
455848b8605Smrg	} else {
456848b8605Smrg		struct fd_gmem_stateobj *gmem = &ctx->gmem;
457b8e80941Smrg		calculate_tiles(batch);
458b8e80941Smrg		DBG("%p: rendering %dx%d tiles %ux%u (%s/%s)",
459b8e80941Smrg			batch, pfb->width, pfb->height, gmem->nbins_x, gmem->nbins_y,
460848b8605Smrg			util_format_short_name(pipe_surface_format(pfb->cbufs[0])),
461848b8605Smrg			util_format_short_name(pipe_surface_format(pfb->zsbuf)));
462b8e80941Smrg		if (ctx->query_prepare)
463b8e80941Smrg			ctx->query_prepare(batch, gmem->nbins_x * gmem->nbins_y);
464b8e80941Smrg		render_tiles(batch);
465848b8605Smrg		ctx->stats.batch_gmem++;
466848b8605Smrg	}
467848b8605Smrg
468b8e80941Smrg	flush_ring(batch);
469848b8605Smrg}
470848b8605Smrg
471848b8605Smrg/* When deciding whether a tile needs mem2gmem, we need to take into
472848b8605Smrg * account the scissor rect(s) that were cleared.  To simplify we only
473848b8605Smrg * consider the last scissor rect for each buffer, since the common
474848b8605Smrg * case would be a single clear.
475848b8605Smrg */
476848b8605Smrgbool
477b8e80941Smrgfd_gmem_needs_restore(struct fd_batch *batch, struct fd_tile *tile,
478848b8605Smrg		uint32_t buffers)
479848b8605Smrg{
480b8e80941Smrg	if (!(batch->restore & buffers))
481848b8605Smrg		return false;
482848b8605Smrg
483848b8605Smrg	return true;
484848b8605Smrg}
485