1/*
2 * Copyright (C) 2016 Rob Clark <robclark@freedesktop.org>
3 *
4 * Permission is hereby granted, free of charge, to any person obtaining a
5 * copy of this software and associated documentation files (the "Software"),
6 * to deal in the Software without restriction, including without limitation
7 * the rights to use, copy, modify, merge, publish, distribute, sublicense,
8 * and/or sell copies of the Software, and to permit persons to whom the
9 * Software is furnished to do so, subject to the following conditions:
10 *
11 * The above copyright notice and this permission notice (including the next
12 * paragraph) shall be included in all copies or substantial portions of the
13 * Software.
14 *
15 * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
16 * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
17 * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT.  IN NO EVENT SHALL
18 * THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
19 * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
20 * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
21 * SOFTWARE.
22 *
23 * Authors:
24 *    Rob Clark <robclark@freedesktop.org>
25 */
26
27#include "pipe/p_state.h"
28#include "util/u_string.h"
29#include "util/u_memory.h"
30#include "util/u_inlines.h"
31#include "util/u_format.h"
32
33#include "freedreno_draw.h"
34#include "freedreno_state.h"
35#include "freedreno_resource.h"
36
37#include "fd5_gmem.h"
38#include "fd5_context.h"
39#include "fd5_draw.h"
40#include "fd5_emit.h"
41#include "fd5_program.h"
42#include "fd5_format.h"
43#include "fd5_zsa.h"
44
45static void
46emit_mrt(struct fd_ringbuffer *ring, unsigned nr_bufs,
47		struct pipe_surface **bufs, struct fd_gmem_stateobj *gmem)
48{
49	enum a5xx_tile_mode tile_mode;
50	unsigned i;
51
52	for (i = 0; i < A5XX_MAX_RENDER_TARGETS; i++) {
53		enum a5xx_color_fmt format = 0;
54		enum a3xx_color_swap swap = WZYX;
55		bool srgb = false, sint = false, uint = false;
56		struct fd_resource *rsc = NULL;
57		struct fd_resource_slice *slice = NULL;
58		uint32_t stride = 0;
59		uint32_t size = 0;
60		uint32_t base = 0;
61		uint32_t offset = 0;
62
63		if (gmem) {
64			tile_mode = TILE5_2;
65		} else {
66			tile_mode = TILE5_LINEAR;
67		}
68
69		if ((i < nr_bufs) && bufs[i]) {
70			struct pipe_surface *psurf = bufs[i];
71			enum pipe_format pformat = psurf->format;
72
73			rsc = fd_resource(psurf->texture);
74
75			slice = fd_resource_slice(rsc, psurf->u.tex.level);
76			format = fd5_pipe2color(pformat);
77			swap = fd5_pipe2swap(pformat);
78			srgb = util_format_is_srgb(pformat);
79			sint = util_format_is_pure_sint(pformat);
80			uint = util_format_is_pure_uint(pformat);
81
82			debug_assert(psurf->u.tex.first_layer == psurf->u.tex.last_layer);
83
84			offset = fd_resource_offset(rsc, psurf->u.tex.level,
85					psurf->u.tex.first_layer);
86
87			if (gmem) {
88				stride = gmem->bin_w * gmem->cbuf_cpp[i];
89				size = stride * gmem->bin_h;
90				base = gmem->cbuf_base[i];
91			} else {
92				stride = slice->pitch * rsc->cpp;
93				size = slice->size0;
94
95				if (!fd_resource_level_linear(psurf->texture, psurf->u.tex.level))
96					tile_mode = rsc->tile_mode;
97			}
98		}
99
100		OUT_PKT4(ring, REG_A5XX_RB_MRT_BUF_INFO(i), 5);
101		OUT_RING(ring, A5XX_RB_MRT_BUF_INFO_COLOR_FORMAT(format) |
102				A5XX_RB_MRT_BUF_INFO_COLOR_TILE_MODE(tile_mode) |
103				A5XX_RB_MRT_BUF_INFO_COLOR_SWAP(swap) |
104				COND(gmem, 0x800) | /* XXX 0x1000 for RECTLIST clear, 0x0 for BLIT.. */
105				COND(srgb, A5XX_RB_MRT_BUF_INFO_COLOR_SRGB));
106		OUT_RING(ring, A5XX_RB_MRT_PITCH(stride));
107		OUT_RING(ring, A5XX_RB_MRT_ARRAY_PITCH(size));
108		if (gmem || (i >= nr_bufs) || !bufs[i]) {
109			OUT_RING(ring, base);           /* RB_MRT[i].BASE_LO */
110			OUT_RING(ring, 0x00000000);     /* RB_MRT[i].BASE_HI */
111		} else {
112			debug_assert((offset + size) <= fd_bo_size(rsc->bo));
113			OUT_RELOCW(ring, rsc->bo, offset, 0, 0);  /* BASE_LO/HI */
114		}
115
116		OUT_PKT4(ring, REG_A5XX_SP_FS_MRT_REG(i), 1);
117		OUT_RING(ring, A5XX_SP_FS_MRT_REG_COLOR_FORMAT(format) |
118				COND(sint, A5XX_SP_FS_MRT_REG_COLOR_SINT) |
119				COND(uint, A5XX_SP_FS_MRT_REG_COLOR_UINT) |
120				COND(srgb, A5XX_SP_FS_MRT_REG_COLOR_SRGB));
121
122		/* when we support UBWC, these would be the system memory
123		 * addr/pitch/etc:
124		 */
125		OUT_PKT4(ring, REG_A5XX_RB_MRT_FLAG_BUFFER(i), 4);
126		OUT_RING(ring, 0x00000000);    /* RB_MRT_FLAG_BUFFER[i].ADDR_LO */
127		OUT_RING(ring, 0x00000000);    /* RB_MRT_FLAG_BUFFER[i].ADDR_HI */
128		OUT_RING(ring, A5XX_RB_MRT_FLAG_BUFFER_PITCH(0));
129		OUT_RING(ring, A5XX_RB_MRT_FLAG_BUFFER_ARRAY_PITCH(0));
130	}
131}
132
133static void
134emit_zs(struct fd_ringbuffer *ring, struct pipe_surface *zsbuf,
135		struct fd_gmem_stateobj *gmem)
136{
137	if (zsbuf) {
138		struct fd_resource *rsc = fd_resource(zsbuf->texture);
139		enum a5xx_depth_format fmt = fd5_pipe2depth(zsbuf->format);
140		uint32_t cpp = rsc->cpp;
141		uint32_t stride = 0;
142		uint32_t size = 0;
143
144		if (gmem) {
145			stride = cpp * gmem->bin_w;
146			size = stride * gmem->bin_h;
147		} else {
148			struct fd_resource_slice *slice = fd_resource_slice(rsc, 0);
149			stride = slice->pitch * rsc->cpp;
150			size = slice->size0;
151		}
152
153		OUT_PKT4(ring, REG_A5XX_RB_DEPTH_BUFFER_INFO, 5);
154		OUT_RING(ring, A5XX_RB_DEPTH_BUFFER_INFO_DEPTH_FORMAT(fmt));
155		if (gmem) {
156			OUT_RING(ring, gmem->zsbuf_base[0]); /* RB_DEPTH_BUFFER_BASE_LO */
157			OUT_RING(ring, 0x00000000);          /* RB_DEPTH_BUFFER_BASE_HI */
158		} else {
159			OUT_RELOCW(ring, rsc->bo, 0, 0, 0);  /* RB_DEPTH_BUFFER_BASE_LO/HI */
160		}
161		OUT_RING(ring, A5XX_RB_DEPTH_BUFFER_PITCH(stride));
162		OUT_RING(ring, A5XX_RB_DEPTH_BUFFER_ARRAY_PITCH(size));
163
164		OUT_PKT4(ring, REG_A5XX_GRAS_SU_DEPTH_BUFFER_INFO, 1);
165		OUT_RING(ring, A5XX_GRAS_SU_DEPTH_BUFFER_INFO_DEPTH_FORMAT(fmt));
166
167		OUT_PKT4(ring, REG_A5XX_RB_DEPTH_FLAG_BUFFER_BASE_LO, 3);
168		OUT_RING(ring, 0x00000000);    /* RB_DEPTH_FLAG_BUFFER_BASE_LO */
169		OUT_RING(ring, 0x00000000);    /* RB_DEPTH_FLAG_BUFFER_BASE_HI */
170		OUT_RING(ring, 0x00000000);    /* RB_DEPTH_FLAG_BUFFER_PITCH */
171
172		if (rsc->lrz) {
173			OUT_PKT4(ring, REG_A5XX_GRAS_LRZ_BUFFER_BASE_LO, 3);
174			OUT_RELOCW(ring, rsc->lrz, 0x1000, 0, 0);
175			OUT_RING(ring, A5XX_GRAS_LRZ_BUFFER_PITCH(rsc->lrz_pitch));
176
177			OUT_PKT4(ring, REG_A5XX_GRAS_LRZ_FAST_CLEAR_BUFFER_BASE_LO, 2);
178			OUT_RELOCW(ring, rsc->lrz, 0, 0, 0);
179		} else {
180			OUT_PKT4(ring, REG_A5XX_GRAS_LRZ_BUFFER_BASE_LO, 3);
181			OUT_RING(ring, 0x00000000);
182			OUT_RING(ring, 0x00000000);
183			OUT_RING(ring, 0x00000000);     /* GRAS_LRZ_BUFFER_PITCH */
184
185			OUT_PKT4(ring, REG_A5XX_GRAS_LRZ_FAST_CLEAR_BUFFER_BASE_LO, 2);
186			OUT_RING(ring, 0x00000000);
187			OUT_RING(ring, 0x00000000);
188		}
189
190		if (rsc->stencil) {
191			if (gmem) {
192				stride = 1 * gmem->bin_w;
193				size = stride * gmem->bin_h;
194			} else {
195				struct fd_resource_slice *slice = fd_resource_slice(rsc->stencil, 0);
196				stride = slice->pitch * rsc->cpp;
197				size = slice->size0;
198			}
199
200			OUT_PKT4(ring, REG_A5XX_RB_STENCIL_INFO, 5);
201			OUT_RING(ring, A5XX_RB_STENCIL_INFO_SEPARATE_STENCIL);
202			if (gmem) {
203				OUT_RING(ring, gmem->zsbuf_base[1]);  /* RB_STENCIL_BASE_LO */
204				OUT_RING(ring, 0x00000000);           /* RB_STENCIL_BASE_HI */
205			} else {
206				OUT_RELOCW(ring, rsc->stencil->bo, 0, 0, 0);  /* RB_STENCIL_BASE_LO/HI */
207			}
208			OUT_RING(ring, A5XX_RB_STENCIL_PITCH(stride));
209			OUT_RING(ring, A5XX_RB_STENCIL_ARRAY_PITCH(size));
210		} else {
211			OUT_PKT4(ring, REG_A5XX_RB_STENCIL_INFO, 1);
212			OUT_RING(ring, 0x00000000);     /* RB_STENCIL_INFO */
213		}
214	} else {
215		OUT_PKT4(ring, REG_A5XX_RB_DEPTH_BUFFER_INFO, 5);
216		OUT_RING(ring, A5XX_RB_DEPTH_BUFFER_INFO_DEPTH_FORMAT(DEPTH5_NONE));
217		OUT_RING(ring, 0x00000000);    /* RB_DEPTH_BUFFER_BASE_LO */
218		OUT_RING(ring, 0x00000000);    /* RB_DEPTH_BUFFER_BASE_HI */
219		OUT_RING(ring, 0x00000000);    /* RB_DEPTH_BUFFER_PITCH */
220		OUT_RING(ring, 0x00000000);    /* RB_DEPTH_BUFFER_ARRAY_PITCH */
221
222		OUT_PKT4(ring, REG_A5XX_GRAS_SU_DEPTH_BUFFER_INFO, 1);
223		OUT_RING(ring, A5XX_GRAS_SU_DEPTH_BUFFER_INFO_DEPTH_FORMAT(DEPTH5_NONE));
224
225		OUT_PKT4(ring, REG_A5XX_RB_DEPTH_FLAG_BUFFER_BASE_LO, 3);
226		OUT_RING(ring, 0x00000000);    /* RB_DEPTH_FLAG_BUFFER_BASE_LO */
227		OUT_RING(ring, 0x00000000);    /* RB_DEPTH_FLAG_BUFFER_BASE_HI */
228		OUT_RING(ring, 0x00000000);    /* RB_DEPTH_FLAG_BUFFER_PITCH */
229
230		OUT_PKT4(ring, REG_A5XX_RB_STENCIL_INFO, 1);
231		OUT_RING(ring, 0x00000000);     /* RB_STENCIL_INFO */
232	}
233}
234
235static bool
236use_hw_binning(struct fd_batch *batch)
237{
238	struct fd_gmem_stateobj *gmem = &batch->ctx->gmem;
239
240	if ((gmem->maxpw * gmem->maxph) > 32)
241		return false;
242
243	if ((gmem->maxpw > 15) || (gmem->maxph > 15))
244		return false;
245
246	return fd_binning_enabled && ((gmem->nbins_x * gmem->nbins_y) > 2) &&
247			(batch->num_draws > 0);
248}
249
250static void
251patch_draws(struct fd_batch *batch, enum pc_di_vis_cull_mode vismode)
252{
253	unsigned i;
254	for (i = 0; i < fd_patch_num_elements(&batch->draw_patches); i++) {
255		struct fd_cs_patch *patch = fd_patch_element(&batch->draw_patches, i);
256		*patch->cs = patch->val | DRAW4(0, 0, 0, vismode);
257	}
258	util_dynarray_resize(&batch->draw_patches, 0);
259}
260
261static void
262update_vsc_pipe(struct fd_batch *batch)
263{
264	struct fd_context *ctx = batch->ctx;
265	struct fd5_context *fd5_ctx = fd5_context(ctx);
266	struct fd_gmem_stateobj *gmem = &batch->ctx->gmem;
267	struct fd_ringbuffer *ring = batch->gmem;
268	int i;
269
270	OUT_PKT4(ring, REG_A5XX_VSC_BIN_SIZE, 3);
271	OUT_RING(ring, A5XX_VSC_BIN_SIZE_WIDTH(gmem->bin_w) |
272			A5XX_VSC_BIN_SIZE_HEIGHT(gmem->bin_h));
273	OUT_RELOCW(ring, fd5_ctx->vsc_size_mem, 0, 0, 0); /* VSC_SIZE_ADDRESS_LO/HI */
274
275	OUT_PKT4(ring, REG_A5XX_UNKNOWN_0BC5, 2);
276	OUT_RING(ring, 0x00000000);   /* UNKNOWN_0BC5 */
277	OUT_RING(ring, 0x00000000);   /* UNKNOWN_0BC6 */
278
279	OUT_PKT4(ring, REG_A5XX_VSC_PIPE_CONFIG_REG(0), 16);
280	for (i = 0; i < 16; i++) {
281		struct fd_vsc_pipe *pipe = &ctx->vsc_pipe[i];
282		OUT_RING(ring, A5XX_VSC_PIPE_CONFIG_REG_X(pipe->x) |
283				A5XX_VSC_PIPE_CONFIG_REG_Y(pipe->y) |
284				A5XX_VSC_PIPE_CONFIG_REG_W(pipe->w) |
285				A5XX_VSC_PIPE_CONFIG_REG_H(pipe->h));
286	}
287
288	OUT_PKT4(ring, REG_A5XX_VSC_PIPE_DATA_ADDRESS_LO(0), 32);
289	for (i = 0; i < 16; i++) {
290		struct fd_vsc_pipe *pipe = &ctx->vsc_pipe[i];
291		if (!pipe->bo) {
292			pipe->bo = fd_bo_new(ctx->dev, 0x20000,
293					DRM_FREEDRENO_GEM_TYPE_KMEM, "vsc_pipe[%u]", i);
294		}
295		OUT_RELOCW(ring, pipe->bo, 0, 0, 0);     /* VSC_PIPE_DATA_ADDRESS[i].LO/HI */
296	}
297
298	OUT_PKT4(ring, REG_A5XX_VSC_PIPE_DATA_LENGTH_REG(0), 16);
299	for (i = 0; i < 16; i++) {
300		struct fd_vsc_pipe *pipe = &ctx->vsc_pipe[i];
301		OUT_RING(ring, fd_bo_size(pipe->bo) - 32); /* VSC_PIPE_DATA_LENGTH[i] */
302	}
303}
304
305static void
306emit_binning_pass(struct fd_batch *batch)
307{
308	struct fd_context *ctx = batch->ctx;
309	struct fd_ringbuffer *ring = batch->gmem;
310	struct fd_gmem_stateobj *gmem = &batch->ctx->gmem;
311
312	uint32_t x1 = gmem->minx;
313	uint32_t y1 = gmem->miny;
314	uint32_t x2 = gmem->minx + gmem->width - 1;
315	uint32_t y2 = gmem->miny + gmem->height - 1;
316
317	fd5_set_render_mode(batch->ctx, ring, BINNING);
318
319	OUT_PKT4(ring, REG_A5XX_RB_CNTL, 1);
320	OUT_RING(ring, A5XX_RB_CNTL_WIDTH(gmem->bin_w) |
321			A5XX_RB_CNTL_HEIGHT(gmem->bin_h));
322
323	OUT_PKT4(ring, REG_A5XX_GRAS_SC_WINDOW_SCISSOR_TL, 2);
324	OUT_RING(ring, A5XX_GRAS_SC_WINDOW_SCISSOR_TL_X(x1) |
325			A5XX_GRAS_SC_WINDOW_SCISSOR_TL_Y(y1));
326	OUT_RING(ring, A5XX_GRAS_SC_WINDOW_SCISSOR_BR_X(x2) |
327			A5XX_GRAS_SC_WINDOW_SCISSOR_BR_Y(y2));
328
329	OUT_PKT4(ring, REG_A5XX_RB_RESOLVE_CNTL_1, 2);
330	OUT_RING(ring, A5XX_RB_RESOLVE_CNTL_1_X(x1) |
331			A5XX_RB_RESOLVE_CNTL_1_Y(y1));
332	OUT_RING(ring, A5XX_RB_RESOLVE_CNTL_2_X(x2) |
333			A5XX_RB_RESOLVE_CNTL_2_Y(y2));
334
335	update_vsc_pipe(batch);
336
337	OUT_PKT4(ring, REG_A5XX_VPC_MODE_CNTL, 1);
338	OUT_RING(ring, A5XX_VPC_MODE_CNTL_BINNING_PASS);
339
340	OUT_PKT7(ring, CP_EVENT_WRITE, 1);
341	OUT_RING(ring, UNK_2C);
342
343	OUT_PKT4(ring, REG_A5XX_RB_WINDOW_OFFSET, 1);
344	OUT_RING(ring, A5XX_RB_WINDOW_OFFSET_X(0) |
345			A5XX_RB_WINDOW_OFFSET_Y(0));
346
347	/* emit IB to binning drawcmds: */
348	ctx->emit_ib(ring, batch->binning);
349
350	fd_reset_wfi(batch);
351
352	OUT_PKT7(ring, CP_EVENT_WRITE, 1);
353	OUT_RING(ring, UNK_2D);
354
355	OUT_PKT7(ring, CP_EVENT_WRITE, 4);
356	OUT_RING(ring, CACHE_FLUSH_TS);
357	OUT_RELOCW(ring, fd5_context(ctx)->blit_mem, 0, 0, 0);  /* ADDR_LO/HI */
358	OUT_RING(ring, 0x00000000);
359
360	// TODO CP_COND_WRITE's for all the vsc buffers (check for overflow??)
361
362	fd_wfi(batch, ring);
363
364	OUT_PKT4(ring, REG_A5XX_VPC_MODE_CNTL, 1);
365	OUT_RING(ring, 0x0);
366}
367
368/* before first tile */
369static void
370fd5_emit_tile_init(struct fd_batch *batch)
371{
372	struct fd_context *ctx = batch->ctx;
373	struct fd_ringbuffer *ring = batch->gmem;
374	struct pipe_framebuffer_state *pfb = &batch->framebuffer;
375
376	fd5_emit_restore(batch, ring);
377
378	if (batch->lrz_clear)
379		ctx->emit_ib(ring, batch->lrz_clear);
380
381	fd5_emit_lrz_flush(ring);
382
383	OUT_PKT4(ring, REG_A5XX_GRAS_CL_CNTL, 1);
384	OUT_RING(ring, 0x00000080);   /* GRAS_CL_CNTL */
385
386	OUT_PKT7(ring, CP_SKIP_IB2_ENABLE_GLOBAL, 1);
387	OUT_RING(ring, 0x0);
388
389	OUT_PKT4(ring, REG_A5XX_PC_POWER_CNTL, 1);
390	OUT_RING(ring, 0x00000003);   /* PC_POWER_CNTL */
391
392	OUT_PKT4(ring, REG_A5XX_VFD_POWER_CNTL, 1);
393	OUT_RING(ring, 0x00000003);   /* VFD_POWER_CNTL */
394
395	/* 0x10000000 for BYPASS.. 0x7c13c080 for GMEM: */
396	fd_wfi(batch, ring);
397	OUT_PKT4(ring, REG_A5XX_RB_CCU_CNTL, 1);
398	OUT_RING(ring, 0x7c13c080);   /* RB_CCU_CNTL */
399
400	emit_zs(ring, pfb->zsbuf, &ctx->gmem);
401	emit_mrt(ring, pfb->nr_cbufs, pfb->cbufs, &ctx->gmem);
402
403	if (use_hw_binning(batch)) {
404		emit_binning_pass(batch);
405		fd5_emit_lrz_flush(ring);
406		patch_draws(batch, USE_VISIBILITY);
407	} else {
408		patch_draws(batch, IGNORE_VISIBILITY);
409	}
410
411	fd5_set_render_mode(batch->ctx, ring, GMEM);
412}
413
414/* before mem2gmem */
415static void
416fd5_emit_tile_prep(struct fd_batch *batch, struct fd_tile *tile)
417{
418	struct fd_context *ctx = batch->ctx;
419	struct fd5_context *fd5_ctx = fd5_context(ctx);
420	struct fd_ringbuffer *ring = batch->gmem;
421
422	uint32_t x1 = tile->xoff;
423	uint32_t y1 = tile->yoff;
424	uint32_t x2 = tile->xoff + tile->bin_w - 1;
425	uint32_t y2 = tile->yoff + tile->bin_h - 1;
426
427	OUT_PKT4(ring, REG_A5XX_GRAS_SC_WINDOW_SCISSOR_TL, 2);
428	OUT_RING(ring, A5XX_GRAS_SC_WINDOW_SCISSOR_TL_X(x1) |
429			A5XX_GRAS_SC_WINDOW_SCISSOR_TL_Y(y1));
430	OUT_RING(ring, A5XX_GRAS_SC_WINDOW_SCISSOR_BR_X(x2) |
431			A5XX_GRAS_SC_WINDOW_SCISSOR_BR_Y(y2));
432
433	OUT_PKT4(ring, REG_A5XX_RB_RESOLVE_CNTL_1, 2);
434	OUT_RING(ring, A5XX_RB_RESOLVE_CNTL_1_X(x1) |
435			A5XX_RB_RESOLVE_CNTL_1_Y(y1));
436	OUT_RING(ring, A5XX_RB_RESOLVE_CNTL_2_X(x2) |
437			A5XX_RB_RESOLVE_CNTL_2_Y(y2));
438
439	if (use_hw_binning(batch)) {
440		struct fd_vsc_pipe *pipe = &ctx->vsc_pipe[tile->p];
441
442		OUT_PKT7(ring, CP_WAIT_FOR_ME, 0);
443
444		OUT_PKT7(ring, CP_SET_VISIBILITY_OVERRIDE, 1);
445		OUT_RING(ring, 0x0);
446
447		OUT_PKT7(ring, CP_SET_BIN_DATA5, 5);
448		OUT_RING(ring, CP_SET_BIN_DATA5_0_VSC_SIZE(pipe->w * pipe->h) |
449				CP_SET_BIN_DATA5_0_VSC_N(tile->n));
450		OUT_RELOC(ring, pipe->bo, 0, 0, 0);      /* VSC_PIPE[p].DATA_ADDRESS */
451		OUT_RELOC(ring, fd5_ctx->vsc_size_mem,   /* VSC_SIZE_ADDRESS + (p * 4) */
452				(tile->p * 4), 0, 0);
453	} else {
454		OUT_PKT7(ring, CP_SET_VISIBILITY_OVERRIDE, 1);
455		OUT_RING(ring, 0x1);
456	}
457
458	OUT_PKT4(ring, REG_A5XX_RB_WINDOW_OFFSET, 1);
459	OUT_RING(ring, A5XX_RB_WINDOW_OFFSET_X(x1) |
460			A5XX_RB_WINDOW_OFFSET_Y(y1));
461}
462
463
464/*
465 * transfer from system memory to gmem
466 */
467
468static void
469emit_mem2gmem_surf(struct fd_batch *batch, uint32_t base,
470		struct pipe_surface *psurf, enum a5xx_blit_buf buf)
471{
472	struct fd_ringbuffer *ring = batch->gmem;
473	struct fd_gmem_stateobj *gmem = &batch->ctx->gmem;
474	struct fd_resource *rsc = fd_resource(psurf->texture);
475	uint32_t stride, size;
476
477	debug_assert(psurf->u.tex.first_layer == psurf->u.tex.last_layer);
478
479	if (buf == BLIT_S)
480		rsc = rsc->stencil;
481
482	if ((buf == BLIT_ZS) || (buf == BLIT_S)) {
483		// XXX hack import via BLIT_MRT0 instead of BLIT_ZS, since I don't
484		// know otherwise how to go from linear in sysmem to tiled in gmem.
485		// possibly we want to flip this around gmem2mem and keep depth
486		// tiled in sysmem (and fixup sampler state to assume tiled).. this
487		// might be required for doing depth/stencil in bypass mode?
488		struct fd_resource_slice *slice = fd_resource_slice(rsc, 0);
489		enum a5xx_color_fmt format =
490			fd5_pipe2color(fd_gmem_restore_format(rsc->base.format));
491
492		OUT_PKT4(ring, REG_A5XX_RB_MRT_BUF_INFO(0), 5);
493		OUT_RING(ring, A5XX_RB_MRT_BUF_INFO_COLOR_FORMAT(format) |
494				A5XX_RB_MRT_BUF_INFO_COLOR_TILE_MODE(rsc->tile_mode) |
495				A5XX_RB_MRT_BUF_INFO_COLOR_SWAP(WZYX));
496		OUT_RING(ring, A5XX_RB_MRT_PITCH(slice->pitch * rsc->cpp));
497		OUT_RING(ring, A5XX_RB_MRT_ARRAY_PITCH(slice->size0));
498		OUT_RELOC(ring, rsc->bo, 0, 0, 0);  /* BASE_LO/HI */
499
500		buf = BLIT_MRT0;
501	}
502
503	stride = gmem->bin_w * rsc->cpp;
504	size = stride * gmem->bin_h;
505
506	OUT_PKT4(ring, REG_A5XX_RB_BLIT_FLAG_DST_LO, 4);
507	OUT_RING(ring, 0x00000000);   /* RB_BLIT_FLAG_DST_LO */
508	OUT_RING(ring, 0x00000000);   /* RB_BLIT_FLAG_DST_HI */
509	OUT_RING(ring, 0x00000000);   /* RB_BLIT_FLAG_DST_PITCH */
510	OUT_RING(ring, 0x00000000);   /* RB_BLIT_FLAG_DST_ARRAY_PITCH */
511
512	OUT_PKT4(ring, REG_A5XX_RB_RESOLVE_CNTL_3, 5);
513	OUT_RING(ring, 0x00000000);   /* RB_RESOLVE_CNTL_3 */
514	OUT_RING(ring, base);         /* RB_BLIT_DST_LO */
515	OUT_RING(ring, 0x00000000);   /* RB_BLIT_DST_HI */
516	OUT_RING(ring, A5XX_RB_BLIT_DST_PITCH(stride));
517	OUT_RING(ring, A5XX_RB_BLIT_DST_ARRAY_PITCH(size));
518
519	OUT_PKT4(ring, REG_A5XX_RB_BLIT_CNTL, 1);
520	OUT_RING(ring, A5XX_RB_BLIT_CNTL_BUF(buf));
521
522	fd5_emit_blit(batch->ctx, ring);
523}
524
525static void
526fd5_emit_tile_mem2gmem(struct fd_batch *batch, struct fd_tile *tile)
527{
528	struct fd_ringbuffer *ring = batch->gmem;
529	struct fd_context *ctx = batch->ctx;
530	struct fd_gmem_stateobj *gmem = &ctx->gmem;
531	struct pipe_framebuffer_state *pfb = &batch->framebuffer;
532
533	/*
534	 * setup mrt and zs with system memory base addresses:
535	 */
536
537	emit_mrt(ring, pfb->nr_cbufs, pfb->cbufs, NULL);
538//	emit_zs(ring, pfb->zsbuf, NULL);
539
540	OUT_PKT4(ring, REG_A5XX_RB_CNTL, 1);
541	OUT_RING(ring, A5XX_RB_CNTL_WIDTH(gmem->bin_w) |
542			A5XX_RB_CNTL_HEIGHT(gmem->bin_h) |
543			A5XX_RB_CNTL_BYPASS);
544
545	if (fd_gmem_needs_restore(batch, tile, FD_BUFFER_COLOR)) {
546		unsigned i;
547		for (i = 0; i < pfb->nr_cbufs; i++) {
548			if (!pfb->cbufs[i])
549				continue;
550			if (!(batch->restore & (PIPE_CLEAR_COLOR0 << i)))
551				continue;
552			emit_mem2gmem_surf(batch, gmem->cbuf_base[i],
553					pfb->cbufs[i], BLIT_MRT0 + i);
554		}
555	}
556
557	if (fd_gmem_needs_restore(batch, tile, FD_BUFFER_DEPTH | FD_BUFFER_STENCIL)) {
558		struct fd_resource *rsc = fd_resource(pfb->zsbuf->texture);
559
560		if (!rsc->stencil || fd_gmem_needs_restore(batch, tile, FD_BUFFER_DEPTH))
561			emit_mem2gmem_surf(batch, gmem->zsbuf_base[0], pfb->zsbuf, BLIT_ZS);
562		if (rsc->stencil && fd_gmem_needs_restore(batch, tile, FD_BUFFER_STENCIL))
563			emit_mem2gmem_surf(batch, gmem->zsbuf_base[1], pfb->zsbuf, BLIT_S);
564	}
565}
566
567
568/* before IB to rendering cmds: */
569static void
570fd5_emit_tile_renderprep(struct fd_batch *batch, struct fd_tile *tile)
571{
572	struct fd_ringbuffer *ring = batch->gmem;
573	struct fd_gmem_stateobj *gmem = &batch->ctx->gmem;
574	struct pipe_framebuffer_state *pfb = &batch->framebuffer;
575
576	OUT_PKT4(ring, REG_A5XX_RB_CNTL, 1);
577	OUT_RING(ring, A5XX_RB_CNTL_WIDTH(gmem->bin_w) |
578			A5XX_RB_CNTL_HEIGHT(gmem->bin_h));
579
580	emit_zs(ring, pfb->zsbuf, gmem);
581	emit_mrt(ring, pfb->nr_cbufs, pfb->cbufs, gmem);
582
583	enum a3xx_msaa_samples samples = fd_msaa_samples(pfb->samples);
584
585	OUT_PKT4(ring, REG_A5XX_TPL1_TP_RAS_MSAA_CNTL, 2);
586	OUT_RING(ring, A5XX_TPL1_TP_RAS_MSAA_CNTL_SAMPLES(samples));
587	OUT_RING(ring, A5XX_TPL1_TP_DEST_MSAA_CNTL_SAMPLES(samples) |
588			COND(samples == MSAA_ONE, A5XX_TPL1_TP_DEST_MSAA_CNTL_MSAA_DISABLE));
589
590	OUT_PKT4(ring, REG_A5XX_RB_RAS_MSAA_CNTL, 2);
591	OUT_RING(ring, A5XX_RB_RAS_MSAA_CNTL_SAMPLES(samples));
592	OUT_RING(ring, A5XX_RB_DEST_MSAA_CNTL_SAMPLES(samples) |
593			COND(samples == MSAA_ONE, A5XX_RB_DEST_MSAA_CNTL_MSAA_DISABLE));
594
595
596	OUT_PKT4(ring, REG_A5XX_GRAS_SC_RAS_MSAA_CNTL, 2);
597	OUT_RING(ring, A5XX_GRAS_SC_RAS_MSAA_CNTL_SAMPLES(samples));
598	OUT_RING(ring, A5XX_GRAS_SC_DEST_MSAA_CNTL_SAMPLES(samples) |
599			COND(samples == MSAA_ONE, A5XX_GRAS_SC_DEST_MSAA_CNTL_MSAA_DISABLE));
600}
601
602
603/*
604 * transfer from gmem to system memory (ie. normal RAM)
605 */
606
607static void
608emit_gmem2mem_surf(struct fd_batch *batch, uint32_t base,
609		struct pipe_surface *psurf, enum a5xx_blit_buf buf)
610{
611	struct fd_ringbuffer *ring = batch->gmem;
612	struct fd_resource *rsc = fd_resource(psurf->texture);
613	struct fd_resource_slice *slice;
614	bool tiled;
615	uint32_t offset;
616
617	if (!rsc->valid)
618		return;
619
620	if (buf == BLIT_S)
621		rsc = rsc->stencil;
622
623	slice = fd_resource_slice(rsc, psurf->u.tex.level);
624	offset = fd_resource_offset(rsc, psurf->u.tex.level,
625			psurf->u.tex.first_layer);
626
627	debug_assert(psurf->u.tex.first_layer == psurf->u.tex.last_layer);
628
629	OUT_PKT4(ring, REG_A5XX_RB_BLIT_FLAG_DST_LO, 4);
630	OUT_RING(ring, 0x00000000);   /* RB_BLIT_FLAG_DST_LO */
631	OUT_RING(ring, 0x00000000);   /* RB_BLIT_FLAG_DST_HI */
632	OUT_RING(ring, 0x00000000);   /* RB_BLIT_FLAG_DST_PITCH */
633	OUT_RING(ring, 0x00000000);   /* RB_BLIT_FLAG_DST_ARRAY_PITCH */
634
635	tiled = rsc->tile_mode &&
636		!fd_resource_level_linear(psurf->texture, psurf->u.tex.level);
637
638	OUT_PKT4(ring, REG_A5XX_RB_RESOLVE_CNTL_3, 5);
639	OUT_RING(ring, 0x00000004 |   /* XXX RB_RESOLVE_CNTL_3 */
640			COND(tiled, A5XX_RB_RESOLVE_CNTL_3_TILED));
641	OUT_RELOCW(ring, rsc->bo, offset, 0, 0);     /* RB_BLIT_DST_LO/HI */
642	OUT_RING(ring, A5XX_RB_BLIT_DST_PITCH(slice->pitch * rsc->cpp));
643	OUT_RING(ring, A5XX_RB_BLIT_DST_ARRAY_PITCH(slice->size0));
644
645	OUT_PKT4(ring, REG_A5XX_RB_BLIT_CNTL, 1);
646	OUT_RING(ring, A5XX_RB_BLIT_CNTL_BUF(buf));
647
648//	bool msaa_resolve = pfb->samples > 1;
649	bool msaa_resolve = false;
650	OUT_PKT4(ring, REG_A5XX_RB_CLEAR_CNTL, 1);
651	OUT_RING(ring, COND(msaa_resolve, A5XX_RB_CLEAR_CNTL_MSAA_RESOLVE));
652
653	fd5_emit_blit(batch->ctx, ring);
654}
655
656static void
657fd5_emit_tile_gmem2mem(struct fd_batch *batch, struct fd_tile *tile)
658{
659	struct fd_context *ctx = batch->ctx;
660	struct fd_gmem_stateobj *gmem = &ctx->gmem;
661	struct pipe_framebuffer_state *pfb = &batch->framebuffer;
662
663	if (batch->resolve & (FD_BUFFER_DEPTH | FD_BUFFER_STENCIL)) {
664		struct fd_resource *rsc = fd_resource(pfb->zsbuf->texture);
665
666		if (!rsc->stencil || (batch->resolve & FD_BUFFER_DEPTH))
667			emit_gmem2mem_surf(batch, gmem->zsbuf_base[0], pfb->zsbuf, BLIT_ZS);
668		if (rsc->stencil && (batch->resolve & FD_BUFFER_STENCIL))
669			emit_gmem2mem_surf(batch, gmem->zsbuf_base[1], pfb->zsbuf, BLIT_S);
670	}
671
672	if (batch->resolve & FD_BUFFER_COLOR) {
673		unsigned i;
674		for (i = 0; i < pfb->nr_cbufs; i++) {
675			if (!pfb->cbufs[i])
676				continue;
677			if (!(batch->resolve & (PIPE_CLEAR_COLOR0 << i)))
678				continue;
679			emit_gmem2mem_surf(batch, gmem->cbuf_base[i],
680					pfb->cbufs[i], BLIT_MRT0 + i);
681		}
682	}
683}
684
685static void
686fd5_emit_tile_fini(struct fd_batch *batch)
687{
688	struct fd_ringbuffer *ring = batch->gmem;
689
690	OUT_PKT7(ring, CP_SKIP_IB2_ENABLE_GLOBAL, 1);
691	OUT_RING(ring, 0x0);
692
693	fd5_emit_lrz_flush(ring);
694
695	fd5_cache_flush(batch, ring);
696	fd5_set_render_mode(batch->ctx, ring, BYPASS);
697}
698
699static void
700fd5_emit_sysmem_prep(struct fd_batch *batch)
701{
702	struct pipe_framebuffer_state *pfb = &batch->framebuffer;
703	struct fd_ringbuffer *ring = batch->gmem;
704
705	fd5_emit_restore(batch, ring);
706
707	fd5_emit_lrz_flush(ring);
708
709	OUT_PKT7(ring, CP_SKIP_IB2_ENABLE_GLOBAL, 1);
710	OUT_RING(ring, 0x0);
711
712	OUT_PKT7(ring, CP_EVENT_WRITE, 1);
713	OUT_RING(ring, PC_CCU_INVALIDATE_COLOR);
714
715	OUT_PKT4(ring, REG_A5XX_PC_POWER_CNTL, 1);
716	OUT_RING(ring, 0x00000003);   /* PC_POWER_CNTL */
717
718	OUT_PKT4(ring, REG_A5XX_VFD_POWER_CNTL, 1);
719	OUT_RING(ring, 0x00000003);   /* VFD_POWER_CNTL */
720
721	/* 0x10000000 for BYPASS.. 0x7c13c080 for GMEM: */
722	fd_wfi(batch, ring);
723	OUT_PKT4(ring, REG_A5XX_RB_CCU_CNTL, 1);
724	OUT_RING(ring, 0x10000000);   /* RB_CCU_CNTL */
725
726	OUT_PKT4(ring, REG_A5XX_GRAS_SC_WINDOW_SCISSOR_TL, 2);
727	OUT_RING(ring, A5XX_GRAS_SC_WINDOW_SCISSOR_TL_X(0) |
728			A5XX_GRAS_SC_WINDOW_SCISSOR_TL_Y(0));
729	OUT_RING(ring, A5XX_GRAS_SC_WINDOW_SCISSOR_BR_X(pfb->width - 1) |
730			A5XX_GRAS_SC_WINDOW_SCISSOR_BR_Y(pfb->height - 1));
731
732	OUT_PKT4(ring, REG_A5XX_RB_RESOLVE_CNTL_1, 2);
733	OUT_RING(ring, A5XX_RB_RESOLVE_CNTL_1_X(0) |
734			A5XX_RB_RESOLVE_CNTL_1_Y(0));
735	OUT_RING(ring, A5XX_RB_RESOLVE_CNTL_2_X(pfb->width - 1) |
736			A5XX_RB_RESOLVE_CNTL_2_Y(pfb->height - 1));
737
738	OUT_PKT4(ring, REG_A5XX_RB_WINDOW_OFFSET, 1);
739	OUT_RING(ring, A5XX_RB_WINDOW_OFFSET_X(0) |
740			A5XX_RB_WINDOW_OFFSET_Y(0));
741
742	OUT_PKT7(ring, CP_SET_VISIBILITY_OVERRIDE, 1);
743	OUT_RING(ring, 0x1);
744
745	OUT_PKT4(ring, REG_A5XX_RB_CNTL, 1);
746	OUT_RING(ring, A5XX_RB_CNTL_WIDTH(0) |
747			A5XX_RB_CNTL_HEIGHT(0) |
748			A5XX_RB_CNTL_BYPASS);
749
750	patch_draws(batch, IGNORE_VISIBILITY);
751
752	emit_zs(ring, pfb->zsbuf, NULL);
753	emit_mrt(ring, pfb->nr_cbufs, pfb->cbufs, NULL);
754
755	OUT_PKT4(ring, REG_A5XX_TPL1_TP_RAS_MSAA_CNTL, 2);
756	OUT_RING(ring, A5XX_TPL1_TP_RAS_MSAA_CNTL_SAMPLES(MSAA_ONE));
757	OUT_RING(ring, A5XX_TPL1_TP_DEST_MSAA_CNTL_SAMPLES(MSAA_ONE) |
758			A5XX_TPL1_TP_DEST_MSAA_CNTL_MSAA_DISABLE);
759
760	OUT_PKT4(ring, REG_A5XX_RB_RAS_MSAA_CNTL, 2);
761	OUT_RING(ring, A5XX_RB_RAS_MSAA_CNTL_SAMPLES(MSAA_ONE));
762	OUT_RING(ring, A5XX_RB_DEST_MSAA_CNTL_SAMPLES(MSAA_ONE) |
763			A5XX_RB_DEST_MSAA_CNTL_MSAA_DISABLE);
764
765	OUT_PKT4(ring, REG_A5XX_GRAS_SC_RAS_MSAA_CNTL, 2);
766	OUT_RING(ring, A5XX_GRAS_SC_RAS_MSAA_CNTL_SAMPLES(MSAA_ONE));
767	OUT_RING(ring, A5XX_GRAS_SC_DEST_MSAA_CNTL_SAMPLES(MSAA_ONE) |
768			A5XX_GRAS_SC_DEST_MSAA_CNTL_MSAA_DISABLE);
769}
770
771static void
772fd5_emit_sysmem_fini(struct fd_batch *batch)
773{
774	struct fd5_context *fd5_ctx = fd5_context(batch->ctx);
775	struct fd_ringbuffer *ring = batch->gmem;
776
777	OUT_PKT7(ring, CP_SKIP_IB2_ENABLE_GLOBAL, 1);
778	OUT_RING(ring, 0x0);
779
780	fd5_emit_lrz_flush(ring);
781
782	OUT_PKT7(ring, CP_EVENT_WRITE, 4);
783	OUT_RING(ring, UNK_1D);
784	OUT_RELOCW(ring, fd5_ctx->blit_mem, 0, 0, 0);  /* ADDR_LO/HI */
785	OUT_RING(ring, 0x00000000);
786}
787
788void
789fd5_gmem_init(struct pipe_context *pctx)
790{
791	struct fd_context *ctx = fd_context(pctx);
792
793	ctx->emit_tile_init = fd5_emit_tile_init;
794	ctx->emit_tile_prep = fd5_emit_tile_prep;
795	ctx->emit_tile_mem2gmem = fd5_emit_tile_mem2gmem;
796	ctx->emit_tile_renderprep = fd5_emit_tile_renderprep;
797	ctx->emit_tile_gmem2mem = fd5_emit_tile_gmem2mem;
798	ctx->emit_tile_fini = fd5_emit_tile_fini;
799	ctx->emit_sysmem_prep = fd5_emit_sysmem_prep;
800	ctx->emit_sysmem_fini = fd5_emit_sysmem_fini;
801}
802