1/*
2 * Copyright (C) 2013 Rob Clark <robclark@freedesktop.org>
3 *
4 * Permission is hereby granted, free of charge, to any person obtaining a
5 * copy of this software and associated documentation files (the "Software"),
6 * to deal in the Software without restriction, including without limitation
7 * the rights to use, copy, modify, merge, publish, distribute, sublicense,
8 * and/or sell copies of the Software, and to permit persons to whom the
9 * Software is furnished to do so, subject to the following conditions:
10 *
11 * The above copyright notice and this permission notice (including the next
12 * paragraph) shall be included in all copies or substantial portions of the
13 * Software.
14 *
15 * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
16 * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
17 * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT.  IN NO EVENT SHALL
18 * THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
19 * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
20 * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
21 * SOFTWARE.
22 *
23 * Authors:
24 *    Rob Clark <robclark@freedesktop.org>
25 */
26
27#include "pipe/p_state.h"
28#include "util/u_string.h"
29#include "util/u_memory.h"
30#include "util/u_inlines.h"
31#include "util/u_format.h"
32
33#include "freedreno_draw.h"
34#include "freedreno_state.h"
35#include "freedreno_resource.h"
36
37#include "fd3_gmem.h"
38#include "fd3_context.h"
39#include "fd3_emit.h"
40#include "fd3_program.h"
41#include "fd3_format.h"
42#include "fd3_zsa.h"
43
44static void
45emit_mrt(struct fd_ringbuffer *ring, unsigned nr_bufs,
46		 struct pipe_surface **bufs, uint32_t *bases, uint32_t bin_w,
47		 bool decode_srgb)
48{
49	enum a3xx_tile_mode tile_mode;
50	unsigned i;
51
52	if (bin_w) {
53		tile_mode = TILE_32X32;
54	} else {
55		tile_mode = LINEAR;
56	}
57
58	for (i = 0; i < A3XX_MAX_RENDER_TARGETS; i++) {
59		enum pipe_format pformat = 0;
60		enum a3xx_color_fmt format = 0;
61		enum a3xx_color_swap swap = WZYX;
62		bool srgb = false;
63		struct fd_resource *rsc = NULL;
64		struct fd_resource_slice *slice = NULL;
65		uint32_t stride = 0;
66		uint32_t base = 0;
67		uint32_t offset = 0;
68
69		if ((i < nr_bufs) && bufs[i]) {
70			struct pipe_surface *psurf = bufs[i];
71
72			rsc = fd_resource(psurf->texture);
73			pformat = psurf->format;
74			/* In case we're drawing to Z32F_S8, the "color" actually goes to
75			 * the stencil
76			 */
77			if (rsc->stencil) {
78				rsc = rsc->stencil;
79				pformat = rsc->base.format;
80				if (bases)
81					bases++;
82			}
83			slice = fd_resource_slice(rsc, psurf->u.tex.level);
84			format = fd3_pipe2color(pformat);
85			swap = fd3_pipe2swap(pformat);
86			if (decode_srgb)
87				srgb = util_format_is_srgb(pformat);
88			else
89				pformat = util_format_linear(pformat);
90
91			debug_assert(psurf->u.tex.first_layer == psurf->u.tex.last_layer);
92
93			offset = fd_resource_offset(rsc, psurf->u.tex.level,
94					psurf->u.tex.first_layer);
95
96			if (bin_w) {
97				stride = bin_w * rsc->cpp;
98
99				if (bases) {
100					base = bases[i];
101				}
102			} else {
103				stride = slice->pitch * rsc->cpp;
104			}
105		} else if (i < nr_bufs && bases) {
106			base = bases[i];
107		}
108
109		OUT_PKT0(ring, REG_A3XX_RB_MRT_BUF_INFO(i), 2);
110		OUT_RING(ring, A3XX_RB_MRT_BUF_INFO_COLOR_FORMAT(format) |
111				A3XX_RB_MRT_BUF_INFO_COLOR_TILE_MODE(tile_mode) |
112				A3XX_RB_MRT_BUF_INFO_COLOR_BUF_PITCH(stride) |
113				A3XX_RB_MRT_BUF_INFO_COLOR_SWAP(swap) |
114				COND(srgb, A3XX_RB_MRT_BUF_INFO_COLOR_SRGB));
115		if (bin_w || (i >= nr_bufs) || !bufs[i]) {
116			OUT_RING(ring, A3XX_RB_MRT_BUF_BASE_COLOR_BUF_BASE(base));
117		} else {
118			OUT_RELOCW(ring, rsc->bo, offset, 0, -1);
119		}
120
121		OUT_PKT0(ring, REG_A3XX_SP_FS_IMAGE_OUTPUT_REG(i), 1);
122		OUT_RING(ring, COND((i < nr_bufs) && bufs[i],
123							A3XX_SP_FS_IMAGE_OUTPUT_REG_MRTFORMAT(
124									fd3_fs_output_format(pformat))));
125	}
126}
127
128static bool
129use_hw_binning(struct fd_batch *batch)
130{
131	struct fd_gmem_stateobj *gmem = &batch->ctx->gmem;
132
133	/* workaround: combining scissor optimization and hw binning
134	 * seems problematic.  Seems like we end up with a mismatch
135	 * between binning pass and rendering pass, wrt. where the hw
136	 * thinks the vertices belong.  And the blob driver doesn't
137	 * seem to implement anything like scissor optimization, so
138	 * not entirely sure what I might be missing.
139	 *
140	 * But scissor optimization is mainly for window managers,
141	 * which don't have many vertices (and therefore doesn't
142	 * benefit much from binning pass).
143	 *
144	 * So for now just disable binning if scissor optimization is
145	 * used.
146	 */
147	if (gmem->minx || gmem->miny)
148		return false;
149
150	if ((gmem->maxpw * gmem->maxph) > 32)
151		return false;
152
153	if ((gmem->maxpw > 15) || (gmem->maxph > 15))
154		return false;
155
156	return fd_binning_enabled && ((gmem->nbins_x * gmem->nbins_y) > 2);
157}
158
159/* workaround for (hlsq?) lockup with hw binning on a3xx patchlevel 0 */
160static void update_vsc_pipe(struct fd_batch *batch);
161static void
162emit_binning_workaround(struct fd_batch *batch)
163{
164	struct fd_context *ctx = batch->ctx;
165	struct fd_gmem_stateobj *gmem = &ctx->gmem;
166	struct fd_ringbuffer *ring = batch->gmem;
167	struct fd3_emit emit = {
168			.debug = &ctx->debug,
169			.vtx = &ctx->solid_vbuf_state,
170			.prog = &ctx->solid_prog,
171			.key = {
172				.half_precision = true,
173			},
174	};
175
176	OUT_PKT0(ring, REG_A3XX_RB_MODE_CONTROL, 2);
177	OUT_RING(ring, A3XX_RB_MODE_CONTROL_RENDER_MODE(RB_RESOLVE_PASS) |
178			A3XX_RB_MODE_CONTROL_MARB_CACHE_SPLIT_MODE |
179			A3XX_RB_MODE_CONTROL_MRT(0));
180	OUT_RING(ring, A3XX_RB_RENDER_CONTROL_BIN_WIDTH(32) |
181			A3XX_RB_RENDER_CONTROL_DISABLE_COLOR_PIPE |
182			A3XX_RB_RENDER_CONTROL_ALPHA_TEST_FUNC(FUNC_NEVER));
183
184	OUT_PKT0(ring, REG_A3XX_RB_COPY_CONTROL, 4);
185	OUT_RING(ring, A3XX_RB_COPY_CONTROL_MSAA_RESOLVE(MSAA_ONE) |
186			A3XX_RB_COPY_CONTROL_MODE(0) |
187			A3XX_RB_COPY_CONTROL_GMEM_BASE(0));
188	OUT_RELOCW(ring, fd_resource(ctx->solid_vbuf)->bo, 0x20, 0, -1);  /* RB_COPY_DEST_BASE */
189	OUT_RING(ring, A3XX_RB_COPY_DEST_PITCH_PITCH(128));
190	OUT_RING(ring, A3XX_RB_COPY_DEST_INFO_TILE(LINEAR) |
191			A3XX_RB_COPY_DEST_INFO_FORMAT(RB_R8G8B8A8_UNORM) |
192			A3XX_RB_COPY_DEST_INFO_SWAP(WZYX) |
193			A3XX_RB_COPY_DEST_INFO_COMPONENT_ENABLE(0xf) |
194			A3XX_RB_COPY_DEST_INFO_ENDIAN(ENDIAN_NONE));
195
196	OUT_PKT0(ring, REG_A3XX_GRAS_SC_CONTROL, 1);
197	OUT_RING(ring, A3XX_GRAS_SC_CONTROL_RENDER_MODE(RB_RESOLVE_PASS) |
198			A3XX_GRAS_SC_CONTROL_MSAA_SAMPLES(MSAA_ONE) |
199			A3XX_GRAS_SC_CONTROL_RASTER_MODE(1));
200
201	fd3_program_emit(ring, &emit, 0, NULL);
202	fd3_emit_vertex_bufs(ring, &emit);
203
204	OUT_PKT0(ring, REG_A3XX_HLSQ_CONTROL_0_REG, 4);
205	OUT_RING(ring, A3XX_HLSQ_CONTROL_0_REG_FSTHREADSIZE(FOUR_QUADS) |
206			A3XX_HLSQ_CONTROL_0_REG_FSSUPERTHREADENABLE |
207			A3XX_HLSQ_CONTROL_0_REG_RESERVED2 |
208			A3XX_HLSQ_CONTROL_0_REG_SPCONSTFULLUPDATE);
209	OUT_RING(ring, A3XX_HLSQ_CONTROL_1_REG_VSTHREADSIZE(TWO_QUADS) |
210			A3XX_HLSQ_CONTROL_1_REG_VSSUPERTHREADENABLE);
211	OUT_RING(ring, A3XX_HLSQ_CONTROL_2_REG_PRIMALLOCTHRESHOLD(31));
212	OUT_RING(ring, 0); /* HLSQ_CONTROL_3_REG */
213
214	OUT_PKT0(ring, REG_A3XX_HLSQ_CONST_FSPRESV_RANGE_REG, 1);
215	OUT_RING(ring, A3XX_HLSQ_CONST_FSPRESV_RANGE_REG_STARTENTRY(0x20) |
216			A3XX_HLSQ_CONST_FSPRESV_RANGE_REG_ENDENTRY(0x20));
217
218	OUT_PKT0(ring, REG_A3XX_RB_MSAA_CONTROL, 1);
219	OUT_RING(ring, A3XX_RB_MSAA_CONTROL_DISABLE |
220			A3XX_RB_MSAA_CONTROL_SAMPLES(MSAA_ONE) |
221			A3XX_RB_MSAA_CONTROL_SAMPLE_MASK(0xffff));
222
223	OUT_PKT0(ring, REG_A3XX_RB_DEPTH_CONTROL, 1);
224	OUT_RING(ring, A3XX_RB_DEPTH_CONTROL_ZFUNC(FUNC_NEVER));
225
226	OUT_PKT0(ring, REG_A3XX_RB_STENCIL_CONTROL, 1);
227	OUT_RING(ring, A3XX_RB_STENCIL_CONTROL_FUNC(FUNC_NEVER) |
228			A3XX_RB_STENCIL_CONTROL_FAIL(STENCIL_KEEP) |
229			A3XX_RB_STENCIL_CONTROL_ZPASS(STENCIL_KEEP) |
230			A3XX_RB_STENCIL_CONTROL_ZFAIL(STENCIL_KEEP) |
231			A3XX_RB_STENCIL_CONTROL_FUNC_BF(FUNC_NEVER) |
232			A3XX_RB_STENCIL_CONTROL_FAIL_BF(STENCIL_KEEP) |
233			A3XX_RB_STENCIL_CONTROL_ZPASS_BF(STENCIL_KEEP) |
234			A3XX_RB_STENCIL_CONTROL_ZFAIL_BF(STENCIL_KEEP));
235
236	OUT_PKT0(ring, REG_A3XX_GRAS_SU_MODE_CONTROL, 1);
237	OUT_RING(ring, A3XX_GRAS_SU_MODE_CONTROL_LINEHALFWIDTH(0.0));
238
239	OUT_PKT0(ring, REG_A3XX_VFD_INDEX_MIN, 4);
240	OUT_RING(ring, 0);            /* VFD_INDEX_MIN */
241	OUT_RING(ring, 2);            /* VFD_INDEX_MAX */
242	OUT_RING(ring, 0);            /* VFD_INSTANCEID_OFFSET */
243	OUT_RING(ring, 0);            /* VFD_INDEX_OFFSET */
244
245	OUT_PKT0(ring, REG_A3XX_PC_PRIM_VTX_CNTL, 1);
246	OUT_RING(ring, A3XX_PC_PRIM_VTX_CNTL_STRIDE_IN_VPC(0) |
247			A3XX_PC_PRIM_VTX_CNTL_POLYMODE_FRONT_PTYPE(PC_DRAW_TRIANGLES) |
248			A3XX_PC_PRIM_VTX_CNTL_POLYMODE_BACK_PTYPE(PC_DRAW_TRIANGLES) |
249			A3XX_PC_PRIM_VTX_CNTL_PROVOKING_VTX_LAST);
250
251	OUT_PKT0(ring, REG_A3XX_GRAS_SC_WINDOW_SCISSOR_TL, 2);
252	OUT_RING(ring, A3XX_GRAS_SC_WINDOW_SCISSOR_TL_X(0) |
253			A3XX_GRAS_SC_WINDOW_SCISSOR_TL_Y(1));
254	OUT_RING(ring, A3XX_GRAS_SC_WINDOW_SCISSOR_BR_X(0) |
255			A3XX_GRAS_SC_WINDOW_SCISSOR_BR_Y(1));
256
257	OUT_PKT0(ring, REG_A3XX_GRAS_SC_SCREEN_SCISSOR_TL, 2);
258	OUT_RING(ring, A3XX_GRAS_SC_SCREEN_SCISSOR_TL_X(0) |
259			A3XX_GRAS_SC_SCREEN_SCISSOR_TL_Y(0));
260	OUT_RING(ring, A3XX_GRAS_SC_SCREEN_SCISSOR_BR_X(31) |
261			A3XX_GRAS_SC_SCREEN_SCISSOR_BR_Y(0));
262
263	fd_wfi(batch, ring);
264	OUT_PKT0(ring, REG_A3XX_GRAS_CL_VPORT_XOFFSET, 6);
265	OUT_RING(ring, A3XX_GRAS_CL_VPORT_XOFFSET(0.0));
266	OUT_RING(ring, A3XX_GRAS_CL_VPORT_XSCALE(1.0));
267	OUT_RING(ring, A3XX_GRAS_CL_VPORT_YOFFSET(0.0));
268	OUT_RING(ring, A3XX_GRAS_CL_VPORT_YSCALE(1.0));
269	OUT_RING(ring, A3XX_GRAS_CL_VPORT_ZOFFSET(0.0));
270	OUT_RING(ring, A3XX_GRAS_CL_VPORT_ZSCALE(1.0));
271
272	OUT_PKT0(ring, REG_A3XX_GRAS_CL_CLIP_CNTL, 1);
273	OUT_RING(ring, A3XX_GRAS_CL_CLIP_CNTL_CLIP_DISABLE |
274			A3XX_GRAS_CL_CLIP_CNTL_ZFAR_CLIP_DISABLE |
275			A3XX_GRAS_CL_CLIP_CNTL_VP_CLIP_CODE_IGNORE |
276			A3XX_GRAS_CL_CLIP_CNTL_VP_XFORM_DISABLE |
277			A3XX_GRAS_CL_CLIP_CNTL_PERSP_DIVISION_DISABLE);
278
279	OUT_PKT0(ring, REG_A3XX_GRAS_CL_GB_CLIP_ADJ, 1);
280	OUT_RING(ring, A3XX_GRAS_CL_GB_CLIP_ADJ_HORZ(0) |
281			A3XX_GRAS_CL_GB_CLIP_ADJ_VERT(0));
282
283	OUT_PKT3(ring, CP_DRAW_INDX_2, 5);
284	OUT_RING(ring, 0x00000000);   /* viz query info. */
285	OUT_RING(ring, DRAW(DI_PT_RECTLIST, DI_SRC_SEL_IMMEDIATE,
286						INDEX_SIZE_32_BIT, IGNORE_VISIBILITY, 0));
287	OUT_RING(ring, 2);            /* NumIndices */
288	OUT_RING(ring, 2);
289	OUT_RING(ring, 1);
290	fd_reset_wfi(batch);
291
292	OUT_PKT0(ring, REG_A3XX_HLSQ_CONTROL_0_REG, 1);
293	OUT_RING(ring, A3XX_HLSQ_CONTROL_0_REG_FSTHREADSIZE(TWO_QUADS));
294
295	OUT_PKT0(ring, REG_A3XX_VFD_PERFCOUNTER0_SELECT, 1);
296	OUT_RING(ring, 0x00000000);
297
298	fd_wfi(batch, ring);
299	OUT_PKT0(ring, REG_A3XX_VSC_BIN_SIZE, 1);
300	OUT_RING(ring, A3XX_VSC_BIN_SIZE_WIDTH(gmem->bin_w) |
301			A3XX_VSC_BIN_SIZE_HEIGHT(gmem->bin_h));
302
303	OUT_PKT0(ring, REG_A3XX_GRAS_SC_CONTROL, 1);
304	OUT_RING(ring, A3XX_GRAS_SC_CONTROL_RENDER_MODE(RB_RENDERING_PASS) |
305			A3XX_GRAS_SC_CONTROL_MSAA_SAMPLES(MSAA_ONE) |
306			A3XX_GRAS_SC_CONTROL_RASTER_MODE(0));
307
308	OUT_PKT0(ring, REG_A3XX_GRAS_CL_CLIP_CNTL, 1);
309	OUT_RING(ring, 0x00000000);
310}
311
312/* transfer from gmem to system memory (ie. normal RAM) */
313
314static void
315emit_gmem2mem_surf(struct fd_batch *batch,
316				   enum adreno_rb_copy_control_mode mode,
317				   bool stencil,
318				   uint32_t base, struct pipe_surface *psurf)
319{
320	struct fd_ringbuffer *ring = batch->gmem;
321	struct fd_resource *rsc = fd_resource(psurf->texture);
322	enum pipe_format format = psurf->format;
323
324	if (!rsc->valid)
325		return;
326
327	if (stencil) {
328		rsc = rsc->stencil;
329		format = rsc->base.format;
330	}
331
332	struct fd_resource_slice *slice = fd_resource_slice(rsc, psurf->u.tex.level);
333	uint32_t offset = fd_resource_offset(rsc, psurf->u.tex.level,
334			psurf->u.tex.first_layer);
335
336	debug_assert(psurf->u.tex.first_layer == psurf->u.tex.last_layer);
337
338	OUT_PKT0(ring, REG_A3XX_RB_COPY_CONTROL, 4);
339	OUT_RING(ring, A3XX_RB_COPY_CONTROL_MSAA_RESOLVE(MSAA_ONE) |
340			A3XX_RB_COPY_CONTROL_MODE(mode) |
341			A3XX_RB_COPY_CONTROL_GMEM_BASE(base) |
342			COND(format == PIPE_FORMAT_Z32_FLOAT ||
343				 format == PIPE_FORMAT_Z32_FLOAT_S8X24_UINT,
344				 A3XX_RB_COPY_CONTROL_DEPTH32_RESOLVE));
345
346	OUT_RELOCW(ring, rsc->bo, offset, 0, -1);    /* RB_COPY_DEST_BASE */
347	OUT_RING(ring, A3XX_RB_COPY_DEST_PITCH_PITCH(slice->pitch * rsc->cpp));
348	OUT_RING(ring, A3XX_RB_COPY_DEST_INFO_TILE(LINEAR) |
349			A3XX_RB_COPY_DEST_INFO_FORMAT(fd3_pipe2color(format)) |
350			A3XX_RB_COPY_DEST_INFO_COMPONENT_ENABLE(0xf) |
351			A3XX_RB_COPY_DEST_INFO_ENDIAN(ENDIAN_NONE) |
352			A3XX_RB_COPY_DEST_INFO_SWAP(fd3_pipe2swap(format)));
353
354	fd_draw(batch, ring, DI_PT_RECTLIST, IGNORE_VISIBILITY,
355			DI_SRC_SEL_AUTO_INDEX, 2, 0, INDEX_SIZE_IGN, 0, 0, NULL);
356}
357
358static void
359fd3_emit_tile_gmem2mem(struct fd_batch *batch, struct fd_tile *tile)
360{
361	struct fd_context *ctx = batch->ctx;
362	struct fd_ringbuffer *ring = batch->gmem;
363	struct pipe_framebuffer_state *pfb = &batch->framebuffer;
364	struct fd3_emit emit = {
365			.debug = &ctx->debug,
366			.vtx = &ctx->solid_vbuf_state,
367			.prog = &ctx->solid_prog,
368			.key = {
369				.half_precision = true,
370			},
371	};
372	int i;
373
374	OUT_PKT0(ring, REG_A3XX_RB_DEPTH_CONTROL, 1);
375	OUT_RING(ring, A3XX_RB_DEPTH_CONTROL_ZFUNC(FUNC_NEVER));
376
377	OUT_PKT0(ring, REG_A3XX_RB_STENCIL_CONTROL, 1);
378	OUT_RING(ring, A3XX_RB_STENCIL_CONTROL_FUNC(FUNC_NEVER) |
379			A3XX_RB_STENCIL_CONTROL_FAIL(STENCIL_KEEP) |
380			A3XX_RB_STENCIL_CONTROL_ZPASS(STENCIL_KEEP) |
381			A3XX_RB_STENCIL_CONTROL_ZFAIL(STENCIL_KEEP) |
382			A3XX_RB_STENCIL_CONTROL_FUNC_BF(FUNC_NEVER) |
383			A3XX_RB_STENCIL_CONTROL_FAIL_BF(STENCIL_KEEP) |
384			A3XX_RB_STENCIL_CONTROL_ZPASS_BF(STENCIL_KEEP) |
385			A3XX_RB_STENCIL_CONTROL_ZFAIL_BF(STENCIL_KEEP));
386
387	OUT_PKT0(ring, REG_A3XX_RB_STENCILREFMASK, 2);
388	OUT_RING(ring, 0xff000000 |
389			A3XX_RB_STENCILREFMASK_STENCILREF(0) |
390			A3XX_RB_STENCILREFMASK_STENCILMASK(0) |
391			A3XX_RB_STENCILREFMASK_STENCILWRITEMASK(0xff));
392	OUT_RING(ring, 0xff000000 |
393			A3XX_RB_STENCILREFMASK_STENCILREF(0) |
394			A3XX_RB_STENCILREFMASK_STENCILMASK(0) |
395			A3XX_RB_STENCILREFMASK_STENCILWRITEMASK(0xff));
396
397	OUT_PKT0(ring, REG_A3XX_GRAS_SU_MODE_CONTROL, 1);
398	OUT_RING(ring, A3XX_GRAS_SU_MODE_CONTROL_LINEHALFWIDTH(0));
399
400	OUT_PKT0(ring, REG_A3XX_GRAS_CL_CLIP_CNTL, 1);
401	OUT_RING(ring, 0x00000000);   /* GRAS_CL_CLIP_CNTL */
402
403	fd_wfi(batch, ring);
404	OUT_PKT0(ring, REG_A3XX_GRAS_CL_VPORT_XOFFSET, 6);
405	OUT_RING(ring, A3XX_GRAS_CL_VPORT_XOFFSET((float)pfb->width/2.0 - 0.5));
406	OUT_RING(ring, A3XX_GRAS_CL_VPORT_XSCALE((float)pfb->width/2.0));
407	OUT_RING(ring, A3XX_GRAS_CL_VPORT_YOFFSET((float)pfb->height/2.0 - 0.5));
408	OUT_RING(ring, A3XX_GRAS_CL_VPORT_YSCALE(-(float)pfb->height/2.0));
409	OUT_RING(ring, A3XX_GRAS_CL_VPORT_ZOFFSET(0.0));
410	OUT_RING(ring, A3XX_GRAS_CL_VPORT_ZSCALE(1.0));
411
412	OUT_PKT0(ring, REG_A3XX_RB_MODE_CONTROL, 1);
413	OUT_RING(ring, A3XX_RB_MODE_CONTROL_RENDER_MODE(RB_RESOLVE_PASS) |
414			A3XX_RB_MODE_CONTROL_MARB_CACHE_SPLIT_MODE |
415			A3XX_RB_MODE_CONTROL_MRT(0));
416
417	OUT_PKT0(ring, REG_A3XX_RB_RENDER_CONTROL, 1);
418	OUT_RING(ring, A3XX_RB_RENDER_CONTROL_DISABLE_COLOR_PIPE |
419			A3XX_RB_RENDER_CONTROL_ENABLE_GMEM |
420			A3XX_RB_RENDER_CONTROL_ALPHA_TEST_FUNC(FUNC_NEVER) |
421			A3XX_RB_RENDER_CONTROL_BIN_WIDTH(ctx->gmem.bin_w));
422
423	OUT_PKT0(ring, REG_A3XX_GRAS_SC_CONTROL, 1);
424	OUT_RING(ring, A3XX_GRAS_SC_CONTROL_RENDER_MODE(RB_RESOLVE_PASS) |
425			A3XX_GRAS_SC_CONTROL_MSAA_SAMPLES(MSAA_ONE) |
426			A3XX_GRAS_SC_CONTROL_RASTER_MODE(1));
427
428	OUT_PKT0(ring, REG_A3XX_PC_PRIM_VTX_CNTL, 1);
429	OUT_RING(ring, A3XX_PC_PRIM_VTX_CNTL_STRIDE_IN_VPC(0) |
430			A3XX_PC_PRIM_VTX_CNTL_POLYMODE_FRONT_PTYPE(PC_DRAW_TRIANGLES) |
431			A3XX_PC_PRIM_VTX_CNTL_POLYMODE_BACK_PTYPE(PC_DRAW_TRIANGLES) |
432			A3XX_PC_PRIM_VTX_CNTL_PROVOKING_VTX_LAST);
433
434	OUT_PKT0(ring, REG_A3XX_GRAS_SC_WINDOW_SCISSOR_TL, 2);
435	OUT_RING(ring, A3XX_GRAS_SC_WINDOW_SCISSOR_TL_X(0) |
436			A3XX_GRAS_SC_WINDOW_SCISSOR_TL_Y(0));
437	OUT_RING(ring, A3XX_GRAS_SC_WINDOW_SCISSOR_BR_X(pfb->width - 1) |
438			A3XX_GRAS_SC_WINDOW_SCISSOR_BR_Y(pfb->height - 1));
439
440	OUT_PKT0(ring, REG_A3XX_VFD_INDEX_MIN, 4);
441	OUT_RING(ring, 0);            /* VFD_INDEX_MIN */
442	OUT_RING(ring, 2);            /* VFD_INDEX_MAX */
443	OUT_RING(ring, 0);            /* VFD_INSTANCEID_OFFSET */
444	OUT_RING(ring, 0);            /* VFD_INDEX_OFFSET */
445
446	fd3_program_emit(ring, &emit, 0, NULL);
447	fd3_emit_vertex_bufs(ring, &emit);
448
449	if (batch->resolve & (FD_BUFFER_DEPTH | FD_BUFFER_STENCIL)) {
450		struct fd_resource *rsc = fd_resource(pfb->zsbuf->texture);
451		if (!rsc->stencil || batch->resolve & FD_BUFFER_DEPTH)
452			emit_gmem2mem_surf(batch, RB_COPY_DEPTH_STENCIL, false,
453							   ctx->gmem.zsbuf_base[0], pfb->zsbuf);
454		if (rsc->stencil && batch->resolve & FD_BUFFER_STENCIL)
455			emit_gmem2mem_surf(batch, RB_COPY_DEPTH_STENCIL, true,
456							   ctx->gmem.zsbuf_base[1], pfb->zsbuf);
457	}
458
459	if (batch->resolve & FD_BUFFER_COLOR) {
460		for (i = 0; i < pfb->nr_cbufs; i++) {
461			if (!pfb->cbufs[i])
462				continue;
463			if (!(batch->resolve & (PIPE_CLEAR_COLOR0 << i)))
464				continue;
465			emit_gmem2mem_surf(batch, RB_COPY_RESOLVE, false,
466							   ctx->gmem.cbuf_base[i], pfb->cbufs[i]);
467		}
468	}
469
470	OUT_PKT0(ring, REG_A3XX_RB_MODE_CONTROL, 1);
471	OUT_RING(ring, A3XX_RB_MODE_CONTROL_RENDER_MODE(RB_RENDERING_PASS) |
472			A3XX_RB_MODE_CONTROL_MARB_CACHE_SPLIT_MODE |
473			A3XX_RB_MODE_CONTROL_MRT(MAX2(1, pfb->nr_cbufs) - 1));
474
475	OUT_PKT0(ring, REG_A3XX_GRAS_SC_CONTROL, 1);
476	OUT_RING(ring, A3XX_GRAS_SC_CONTROL_RENDER_MODE(RB_RENDERING_PASS) |
477			A3XX_GRAS_SC_CONTROL_MSAA_SAMPLES(MSAA_ONE) |
478			A3XX_GRAS_SC_CONTROL_RASTER_MODE(0));
479}
480
481/* transfer from system memory to gmem */
482
483static void
484emit_mem2gmem_surf(struct fd_batch *batch, uint32_t bases[],
485		struct pipe_surface **psurf, uint32_t bufs, uint32_t bin_w)
486{
487	struct fd_ringbuffer *ring = batch->gmem;
488	struct pipe_surface *zsbufs[2];
489
490	assert(bufs > 0);
491
492	OUT_PKT0(ring, REG_A3XX_RB_MODE_CONTROL, 1);
493	OUT_RING(ring, A3XX_RB_MODE_CONTROL_RENDER_MODE(RB_RENDERING_PASS) |
494				   A3XX_RB_MODE_CONTROL_MARB_CACHE_SPLIT_MODE |
495				   A3XX_RB_MODE_CONTROL_MRT(bufs - 1));
496
497	emit_mrt(ring, bufs, psurf, bases, bin_w, false);
498
499	if (psurf[0] && (psurf[0]->format == PIPE_FORMAT_Z32_FLOAT ||
500					 psurf[0]->format == PIPE_FORMAT_Z32_FLOAT_S8X24_UINT)) {
501		/* Depth is stored as unorm in gmem, so we have to write it in using a
502		 * special blit shader which writes depth.
503		 */
504		OUT_PKT0(ring, REG_A3XX_RB_DEPTH_CONTROL, 1);
505		OUT_RING(ring, (A3XX_RB_DEPTH_CONTROL_FRAG_WRITES_Z |
506						A3XX_RB_DEPTH_CONTROL_Z_WRITE_ENABLE |
507						A3XX_RB_DEPTH_CONTROL_Z_ENABLE |
508						A3XX_RB_DEPTH_CONTROL_EARLY_Z_DISABLE |
509						A3XX_RB_DEPTH_CONTROL_ZFUNC(FUNC_ALWAYS)));
510
511		OUT_PKT0(ring, REG_A3XX_RB_DEPTH_INFO, 2);
512		OUT_RING(ring, A3XX_RB_DEPTH_INFO_DEPTH_BASE(bases[0]) |
513				 A3XX_RB_DEPTH_INFO_DEPTH_FORMAT(DEPTHX_32));
514		OUT_RING(ring, A3XX_RB_DEPTH_PITCH(4 * batch->ctx->gmem.bin_w));
515
516		if (psurf[0]->format == PIPE_FORMAT_Z32_FLOAT) {
517			OUT_PKT0(ring, REG_A3XX_RB_MRT_CONTROL(0), 1);
518			OUT_RING(ring, 0);
519		} else {
520			/* The gmem_restore_tex logic will put the first buffer's stencil
521			 * as color. Supply it with the proper information to make that
522			 * happen.
523			 */
524			zsbufs[0] = zsbufs[1] = psurf[0];
525			psurf = zsbufs;
526			bufs = 2;
527		}
528	} else {
529		OUT_PKT0(ring, REG_A3XX_SP_FS_OUTPUT_REG, 1);
530		OUT_RING(ring, A3XX_SP_FS_OUTPUT_REG_MRT(bufs - 1));
531	}
532
533	fd3_emit_gmem_restore_tex(ring, psurf, bufs);
534
535	fd_draw(batch, ring, DI_PT_RECTLIST, IGNORE_VISIBILITY,
536			DI_SRC_SEL_AUTO_INDEX, 2, 0, INDEX_SIZE_IGN, 0, 0, NULL);
537}
538
539static void
540fd3_emit_tile_mem2gmem(struct fd_batch *batch, struct fd_tile *tile)
541{
542	struct fd_context *ctx = batch->ctx;
543	struct fd_gmem_stateobj *gmem = &ctx->gmem;
544	struct fd_ringbuffer *ring = batch->gmem;
545	struct pipe_framebuffer_state *pfb = &batch->framebuffer;
546	struct fd3_emit emit = {
547			.debug = &ctx->debug,
548			.vtx = &ctx->blit_vbuf_state,
549			.sprite_coord_enable = 1,
550			/* NOTE: They all use the same VP, this is for vtx bufs. */
551			.prog = &ctx->blit_prog[0],
552			.key = {
553				.half_precision = fd_half_precision(pfb),
554			},
555	};
556	float x0, y0, x1, y1;
557	unsigned bin_w = tile->bin_w;
558	unsigned bin_h = tile->bin_h;
559	unsigned i;
560
561	/* write texture coordinates to vertexbuf: */
562	x0 = ((float)tile->xoff) / ((float)pfb->width);
563	x1 = ((float)tile->xoff + bin_w) / ((float)pfb->width);
564	y0 = ((float)tile->yoff) / ((float)pfb->height);
565	y1 = ((float)tile->yoff + bin_h) / ((float)pfb->height);
566
567	OUT_PKT3(ring, CP_MEM_WRITE, 5);
568	OUT_RELOCW(ring, fd_resource(ctx->blit_texcoord_vbuf)->bo, 0, 0, 0);
569	OUT_RING(ring, fui(x0));
570	OUT_RING(ring, fui(y0));
571	OUT_RING(ring, fui(x1));
572	OUT_RING(ring, fui(y1));
573
574	fd3_emit_cache_flush(batch, ring);
575
576	for (i = 0; i < 4; i++) {
577		OUT_PKT0(ring, REG_A3XX_RB_MRT_CONTROL(i), 1);
578		OUT_RING(ring, A3XX_RB_MRT_CONTROL_ROP_CODE(ROP_COPY) |
579				A3XX_RB_MRT_CONTROL_DITHER_MODE(DITHER_DISABLE) |
580				A3XX_RB_MRT_CONTROL_COMPONENT_ENABLE(0xf));
581
582		OUT_PKT0(ring, REG_A3XX_RB_MRT_BLEND_CONTROL(i), 1);
583		OUT_RING(ring, A3XX_RB_MRT_BLEND_CONTROL_RGB_SRC_FACTOR(FACTOR_ONE) |
584				A3XX_RB_MRT_BLEND_CONTROL_RGB_BLEND_OPCODE(BLEND_DST_PLUS_SRC) |
585				A3XX_RB_MRT_BLEND_CONTROL_RGB_DEST_FACTOR(FACTOR_ZERO) |
586				A3XX_RB_MRT_BLEND_CONTROL_ALPHA_SRC_FACTOR(FACTOR_ONE) |
587				A3XX_RB_MRT_BLEND_CONTROL_ALPHA_BLEND_OPCODE(BLEND_DST_PLUS_SRC) |
588				A3XX_RB_MRT_BLEND_CONTROL_ALPHA_DEST_FACTOR(FACTOR_ZERO));
589	}
590
591	OUT_PKT0(ring, REG_A3XX_RB_RENDER_CONTROL, 1);
592	OUT_RING(ring, A3XX_RB_RENDER_CONTROL_ALPHA_TEST_FUNC(FUNC_ALWAYS) |
593			A3XX_RB_RENDER_CONTROL_BIN_WIDTH(gmem->bin_w));
594
595	fd_wfi(batch, ring);
596	OUT_PKT0(ring, REG_A3XX_RB_DEPTH_CONTROL, 1);
597	OUT_RING(ring, A3XX_RB_DEPTH_CONTROL_ZFUNC(FUNC_LESS));
598
599	OUT_PKT0(ring, REG_A3XX_RB_DEPTH_INFO, 2);
600	OUT_RING(ring, 0);
601	OUT_RING(ring, 0);
602
603	OUT_PKT0(ring, REG_A3XX_GRAS_CL_CLIP_CNTL, 1);
604	OUT_RING(ring, A3XX_GRAS_CL_CLIP_CNTL_IJ_PERSP_CENTER);   /* GRAS_CL_CLIP_CNTL */
605
606	fd_wfi(batch, ring);
607	OUT_PKT0(ring, REG_A3XX_GRAS_CL_VPORT_XOFFSET, 6);
608	OUT_RING(ring, A3XX_GRAS_CL_VPORT_XOFFSET((float)bin_w/2.0 - 0.5));
609	OUT_RING(ring, A3XX_GRAS_CL_VPORT_XSCALE((float)bin_w/2.0));
610	OUT_RING(ring, A3XX_GRAS_CL_VPORT_YOFFSET((float)bin_h/2.0 - 0.5));
611	OUT_RING(ring, A3XX_GRAS_CL_VPORT_YSCALE(-(float)bin_h/2.0));
612	OUT_RING(ring, A3XX_GRAS_CL_VPORT_ZOFFSET(0.0));
613	OUT_RING(ring, A3XX_GRAS_CL_VPORT_ZSCALE(1.0));
614
615	OUT_PKT0(ring, REG_A3XX_GRAS_SC_WINDOW_SCISSOR_TL, 2);
616	OUT_RING(ring, A3XX_GRAS_SC_WINDOW_SCISSOR_TL_X(0) |
617			A3XX_GRAS_SC_WINDOW_SCISSOR_TL_Y(0));
618	OUT_RING(ring, A3XX_GRAS_SC_WINDOW_SCISSOR_BR_X(bin_w - 1) |
619			A3XX_GRAS_SC_WINDOW_SCISSOR_BR_Y(bin_h - 1));
620
621	OUT_PKT0(ring, REG_A3XX_GRAS_SC_SCREEN_SCISSOR_TL, 2);
622	OUT_RING(ring, A3XX_GRAS_SC_SCREEN_SCISSOR_TL_X(0) |
623			A3XX_GRAS_SC_SCREEN_SCISSOR_TL_Y(0));
624	OUT_RING(ring, A3XX_GRAS_SC_SCREEN_SCISSOR_BR_X(bin_w - 1) |
625			A3XX_GRAS_SC_SCREEN_SCISSOR_BR_Y(bin_h - 1));
626
627	OUT_PKT0(ring, REG_A3XX_RB_STENCIL_CONTROL, 1);
628	OUT_RING(ring, 0x2 |
629			A3XX_RB_STENCIL_CONTROL_FUNC(FUNC_ALWAYS) |
630			A3XX_RB_STENCIL_CONTROL_FAIL(STENCIL_KEEP) |
631			A3XX_RB_STENCIL_CONTROL_ZPASS(STENCIL_KEEP) |
632			A3XX_RB_STENCIL_CONTROL_ZFAIL(STENCIL_KEEP) |
633			A3XX_RB_STENCIL_CONTROL_FUNC_BF(FUNC_ALWAYS) |
634			A3XX_RB_STENCIL_CONTROL_FAIL_BF(STENCIL_KEEP) |
635			A3XX_RB_STENCIL_CONTROL_ZPASS_BF(STENCIL_KEEP) |
636			A3XX_RB_STENCIL_CONTROL_ZFAIL_BF(STENCIL_KEEP));
637
638	OUT_PKT0(ring, REG_A3XX_RB_STENCIL_INFO, 2);
639	OUT_RING(ring, 0); /* RB_STENCIL_INFO */
640	OUT_RING(ring, 0); /* RB_STENCIL_PITCH */
641
642	OUT_PKT0(ring, REG_A3XX_GRAS_SC_CONTROL, 1);
643	OUT_RING(ring, A3XX_GRAS_SC_CONTROL_RENDER_MODE(RB_RENDERING_PASS) |
644			A3XX_GRAS_SC_CONTROL_MSAA_SAMPLES(MSAA_ONE) |
645			A3XX_GRAS_SC_CONTROL_RASTER_MODE(1));
646
647	OUT_PKT0(ring, REG_A3XX_PC_PRIM_VTX_CNTL, 1);
648	OUT_RING(ring, A3XX_PC_PRIM_VTX_CNTL_STRIDE_IN_VPC(2) |
649			A3XX_PC_PRIM_VTX_CNTL_POLYMODE_FRONT_PTYPE(PC_DRAW_TRIANGLES) |
650			A3XX_PC_PRIM_VTX_CNTL_POLYMODE_BACK_PTYPE(PC_DRAW_TRIANGLES) |
651			A3XX_PC_PRIM_VTX_CNTL_PROVOKING_VTX_LAST);
652
653	OUT_PKT0(ring, REG_A3XX_VFD_INDEX_MIN, 4);
654	OUT_RING(ring, 0);            /* VFD_INDEX_MIN */
655	OUT_RING(ring, 2);            /* VFD_INDEX_MAX */
656	OUT_RING(ring, 0);            /* VFD_INSTANCEID_OFFSET */
657	OUT_RING(ring, 0);            /* VFD_INDEX_OFFSET */
658
659	fd3_emit_vertex_bufs(ring, &emit);
660
661	/* for gmem pitch/base calculations, we need to use the non-
662	 * truncated tile sizes:
663	 */
664	bin_w = gmem->bin_w;
665	bin_h = gmem->bin_h;
666
667	if (fd_gmem_needs_restore(batch, tile, FD_BUFFER_COLOR)) {
668		emit.prog = &ctx->blit_prog[pfb->nr_cbufs - 1];
669		emit.fp = NULL;      /* frag shader changed so clear cache */
670		fd3_program_emit(ring, &emit, pfb->nr_cbufs, pfb->cbufs);
671		emit_mem2gmem_surf(batch, gmem->cbuf_base, pfb->cbufs, pfb->nr_cbufs, bin_w);
672	}
673
674	if (fd_gmem_needs_restore(batch, tile, FD_BUFFER_DEPTH | FD_BUFFER_STENCIL)) {
675		if (pfb->zsbuf->format != PIPE_FORMAT_Z32_FLOAT_S8X24_UINT &&
676			pfb->zsbuf->format != PIPE_FORMAT_Z32_FLOAT) {
677			/* Non-float can use a regular color write. It's split over 8-bit
678			 * components, so half precision is always sufficient.
679			 */
680			emit.prog = &ctx->blit_prog[0];
681			emit.key.half_precision = true;
682		} else {
683			/* Float depth needs special blit shader that writes depth */
684			if (pfb->zsbuf->format == PIPE_FORMAT_Z32_FLOAT)
685				emit.prog = &ctx->blit_z;
686			else
687				emit.prog = &ctx->blit_zs;
688			emit.key.half_precision = false;
689		}
690		emit.fp = NULL;      /* frag shader changed so clear cache */
691		fd3_program_emit(ring, &emit, 1, &pfb->zsbuf);
692		emit_mem2gmem_surf(batch, gmem->zsbuf_base, &pfb->zsbuf, 1, bin_w);
693	}
694
695	OUT_PKT0(ring, REG_A3XX_GRAS_SC_CONTROL, 1);
696	OUT_RING(ring, A3XX_GRAS_SC_CONTROL_RENDER_MODE(RB_RENDERING_PASS) |
697			A3XX_GRAS_SC_CONTROL_MSAA_SAMPLES(MSAA_ONE) |
698			A3XX_GRAS_SC_CONTROL_RASTER_MODE(0));
699
700	OUT_PKT0(ring, REG_A3XX_RB_MODE_CONTROL, 1);
701	OUT_RING(ring, A3XX_RB_MODE_CONTROL_RENDER_MODE(RB_RENDERING_PASS) |
702				   A3XX_RB_MODE_CONTROL_MARB_CACHE_SPLIT_MODE |
703				   A3XX_RB_MODE_CONTROL_MRT(MAX2(1, pfb->nr_cbufs) - 1));
704}
705
706static void
707patch_draws(struct fd_batch *batch, enum pc_di_vis_cull_mode vismode)
708{
709	unsigned i;
710	for (i = 0; i < fd_patch_num_elements(&batch->draw_patches); i++) {
711		struct fd_cs_patch *patch = fd_patch_element(&batch->draw_patches, i);
712		*patch->cs = patch->val | DRAW(0, 0, 0, vismode, 0);
713	}
714	util_dynarray_resize(&batch->draw_patches, 0);
715}
716
717static void
718patch_rbrc(struct fd_batch *batch, uint32_t val)
719{
720	unsigned i;
721	for (i = 0; i < fd_patch_num_elements(&batch->rbrc_patches); i++) {
722		struct fd_cs_patch *patch = fd_patch_element(&batch->rbrc_patches, i);
723		*patch->cs = patch->val | val;
724	}
725	util_dynarray_resize(&batch->rbrc_patches, 0);
726}
727
728/* for rendering directly to system memory: */
729static void
730fd3_emit_sysmem_prep(struct fd_batch *batch)
731{
732	struct pipe_framebuffer_state *pfb = &batch->framebuffer;
733	struct fd_ringbuffer *ring = batch->gmem;
734	uint32_t i, pitch = 0;
735
736	for (i = 0; i < pfb->nr_cbufs; i++) {
737		struct pipe_surface *psurf = pfb->cbufs[i];
738		if (!psurf)
739			continue;
740		pitch = fd_resource(psurf->texture)->slices[psurf->u.tex.level].pitch;
741	}
742
743	fd3_emit_restore(batch, ring);
744
745	OUT_PKT0(ring, REG_A3XX_RB_FRAME_BUFFER_DIMENSION, 1);
746	OUT_RING(ring, A3XX_RB_FRAME_BUFFER_DIMENSION_WIDTH(pfb->width) |
747			A3XX_RB_FRAME_BUFFER_DIMENSION_HEIGHT(pfb->height));
748
749	emit_mrt(ring, pfb->nr_cbufs, pfb->cbufs, NULL, 0, true);
750
751	/* setup scissor/offset for current tile: */
752	OUT_PKT0(ring, REG_A3XX_RB_WINDOW_OFFSET, 1);
753	OUT_RING(ring, A3XX_RB_WINDOW_OFFSET_X(0) |
754			A3XX_RB_WINDOW_OFFSET_Y(0));
755
756	OUT_PKT0(ring, REG_A3XX_GRAS_SC_SCREEN_SCISSOR_TL, 2);
757	OUT_RING(ring, A3XX_GRAS_SC_SCREEN_SCISSOR_TL_X(0) |
758			A3XX_GRAS_SC_SCREEN_SCISSOR_TL_Y(0));
759	OUT_RING(ring, A3XX_GRAS_SC_SCREEN_SCISSOR_BR_X(pfb->width - 1) |
760			A3XX_GRAS_SC_SCREEN_SCISSOR_BR_Y(pfb->height - 1));
761
762	OUT_PKT0(ring, REG_A3XX_RB_MODE_CONTROL, 1);
763	OUT_RING(ring, A3XX_RB_MODE_CONTROL_RENDER_MODE(RB_RENDERING_PASS) |
764			A3XX_RB_MODE_CONTROL_GMEM_BYPASS |
765			A3XX_RB_MODE_CONTROL_MARB_CACHE_SPLIT_MODE |
766			A3XX_RB_MODE_CONTROL_MRT(MAX2(1, pfb->nr_cbufs) - 1));
767
768	patch_draws(batch, IGNORE_VISIBILITY);
769	patch_rbrc(batch, A3XX_RB_RENDER_CONTROL_BIN_WIDTH(pitch));
770}
771
772static void
773update_vsc_pipe(struct fd_batch *batch)
774{
775	struct fd_context *ctx = batch->ctx;
776	struct fd3_context *fd3_ctx = fd3_context(ctx);
777	struct fd_ringbuffer *ring = batch->gmem;
778	int i;
779
780	OUT_PKT0(ring, REG_A3XX_VSC_SIZE_ADDRESS, 1);
781	OUT_RELOCW(ring, fd3_ctx->vsc_size_mem, 0, 0, 0); /* VSC_SIZE_ADDRESS */
782
783	for (i = 0; i < 8; i++) {
784		struct fd_vsc_pipe *pipe = &ctx->vsc_pipe[i];
785
786		if (!pipe->bo) {
787			pipe->bo = fd_bo_new(ctx->dev, 0x40000,
788					DRM_FREEDRENO_GEM_TYPE_KMEM, "vsc_pipe[%u]", i);
789		}
790
791		OUT_PKT0(ring, REG_A3XX_VSC_PIPE(i), 3);
792		OUT_RING(ring, A3XX_VSC_PIPE_CONFIG_X(pipe->x) |
793				A3XX_VSC_PIPE_CONFIG_Y(pipe->y) |
794				A3XX_VSC_PIPE_CONFIG_W(pipe->w) |
795				A3XX_VSC_PIPE_CONFIG_H(pipe->h));
796		OUT_RELOCW(ring, pipe->bo, 0, 0, 0);       /* VSC_PIPE[i].DATA_ADDRESS */
797		OUT_RING(ring, fd_bo_size(pipe->bo) - 32); /* VSC_PIPE[i].DATA_LENGTH */
798	}
799}
800
801static void
802emit_binning_pass(struct fd_batch *batch)
803{
804	struct fd_context *ctx = batch->ctx;
805	struct fd_gmem_stateobj *gmem = &ctx->gmem;
806	struct pipe_framebuffer_state *pfb = &batch->framebuffer;
807	struct fd_ringbuffer *ring = batch->gmem;
808	int i;
809
810	uint32_t x1 = gmem->minx;
811	uint32_t y1 = gmem->miny;
812	uint32_t x2 = gmem->minx + gmem->width - 1;
813	uint32_t y2 = gmem->miny + gmem->height - 1;
814
815	if (ctx->screen->gpu_id == 320) {
816		emit_binning_workaround(batch);
817		fd_wfi(batch, ring);
818		OUT_PKT3(ring, CP_INVALIDATE_STATE, 1);
819		OUT_RING(ring, 0x00007fff);
820	}
821
822	OUT_PKT0(ring, REG_A3XX_VSC_BIN_CONTROL, 1);
823	OUT_RING(ring, A3XX_VSC_BIN_CONTROL_BINNING_ENABLE);
824
825	OUT_PKT0(ring, REG_A3XX_GRAS_SC_CONTROL, 1);
826	OUT_RING(ring, A3XX_GRAS_SC_CONTROL_RENDER_MODE(RB_TILING_PASS) |
827			A3XX_GRAS_SC_CONTROL_MSAA_SAMPLES(MSAA_ONE) |
828			A3XX_GRAS_SC_CONTROL_RASTER_MODE(0));
829
830	OUT_PKT0(ring, REG_A3XX_RB_FRAME_BUFFER_DIMENSION, 1);
831	OUT_RING(ring, A3XX_RB_FRAME_BUFFER_DIMENSION_WIDTH(pfb->width) |
832			A3XX_RB_FRAME_BUFFER_DIMENSION_HEIGHT(pfb->height));
833
834	OUT_PKT0(ring, REG_A3XX_RB_RENDER_CONTROL, 1);
835	OUT_RING(ring, A3XX_RB_RENDER_CONTROL_ALPHA_TEST_FUNC(FUNC_NEVER) |
836			A3XX_RB_RENDER_CONTROL_DISABLE_COLOR_PIPE |
837			A3XX_RB_RENDER_CONTROL_BIN_WIDTH(gmem->bin_w));
838
839	/* setup scissor/offset for whole screen: */
840	OUT_PKT0(ring, REG_A3XX_RB_WINDOW_OFFSET, 1);
841	OUT_RING(ring, A3XX_RB_WINDOW_OFFSET_X(x1) |
842			A3XX_RB_WINDOW_OFFSET_Y(y1));
843
844	OUT_PKT0(ring, REG_A3XX_RB_LRZ_VSC_CONTROL, 1);
845	OUT_RING(ring, A3XX_RB_LRZ_VSC_CONTROL_BINNING_ENABLE);
846
847	OUT_PKT0(ring, REG_A3XX_GRAS_SC_SCREEN_SCISSOR_TL, 2);
848	OUT_RING(ring, A3XX_GRAS_SC_SCREEN_SCISSOR_TL_X(x1) |
849			A3XX_GRAS_SC_SCREEN_SCISSOR_TL_Y(y1));
850	OUT_RING(ring, A3XX_GRAS_SC_SCREEN_SCISSOR_BR_X(x2) |
851			A3XX_GRAS_SC_SCREEN_SCISSOR_BR_Y(y2));
852
853	OUT_PKT0(ring, REG_A3XX_RB_MODE_CONTROL, 1);
854	OUT_RING(ring, A3XX_RB_MODE_CONTROL_RENDER_MODE(RB_TILING_PASS) |
855			A3XX_RB_MODE_CONTROL_MARB_CACHE_SPLIT_MODE |
856			A3XX_RB_MODE_CONTROL_MRT(0));
857
858	for (i = 0; i < 4; i++) {
859		OUT_PKT0(ring, REG_A3XX_RB_MRT_CONTROL(i), 1);
860		OUT_RING(ring, A3XX_RB_MRT_CONTROL_ROP_CODE(ROP_CLEAR) |
861				A3XX_RB_MRT_CONTROL_DITHER_MODE(DITHER_DISABLE) |
862				A3XX_RB_MRT_CONTROL_COMPONENT_ENABLE(0));
863	}
864
865	OUT_PKT0(ring, REG_A3XX_PC_VSTREAM_CONTROL, 1);
866	OUT_RING(ring, A3XX_PC_VSTREAM_CONTROL_SIZE(1) |
867			A3XX_PC_VSTREAM_CONTROL_N(0));
868
869	/* emit IB to binning drawcmds: */
870	ctx->emit_ib(ring, batch->binning);
871	fd_reset_wfi(batch);
872
873	fd_wfi(batch, ring);
874
875	/* and then put stuff back the way it was: */
876
877	OUT_PKT0(ring, REG_A3XX_VSC_BIN_CONTROL, 1);
878	OUT_RING(ring, 0x00000000);
879
880	OUT_PKT0(ring, REG_A3XX_SP_SP_CTRL_REG, 1);
881	OUT_RING(ring, A3XX_SP_SP_CTRL_REG_RESOLVE |
882			A3XX_SP_SP_CTRL_REG_CONSTMODE(1) |
883			A3XX_SP_SP_CTRL_REG_SLEEPMODE(1) |
884			A3XX_SP_SP_CTRL_REG_L0MODE(0));
885
886	OUT_PKT0(ring, REG_A3XX_RB_LRZ_VSC_CONTROL, 1);
887	OUT_RING(ring, 0x00000000);
888
889	OUT_PKT0(ring, REG_A3XX_GRAS_SC_CONTROL, 1);
890	OUT_RING(ring, A3XX_GRAS_SC_CONTROL_RENDER_MODE(RB_RENDERING_PASS) |
891			A3XX_GRAS_SC_CONTROL_MSAA_SAMPLES(MSAA_ONE) |
892			A3XX_GRAS_SC_CONTROL_RASTER_MODE(0));
893
894	OUT_PKT0(ring, REG_A3XX_RB_MODE_CONTROL, 2);
895	OUT_RING(ring, A3XX_RB_MODE_CONTROL_RENDER_MODE(RB_RENDERING_PASS) |
896			A3XX_RB_MODE_CONTROL_MARB_CACHE_SPLIT_MODE |
897			A3XX_RB_MODE_CONTROL_MRT(pfb->nr_cbufs - 1));
898	OUT_RING(ring, A3XX_RB_RENDER_CONTROL_ENABLE_GMEM |
899			A3XX_RB_RENDER_CONTROL_ALPHA_TEST_FUNC(FUNC_NEVER) |
900			A3XX_RB_RENDER_CONTROL_BIN_WIDTH(gmem->bin_w));
901
902	fd_event_write(batch, ring, CACHE_FLUSH);
903	fd_wfi(batch, ring);
904
905	if (ctx->screen->gpu_id == 320) {
906		/* dummy-draw workaround: */
907		OUT_PKT3(ring, CP_DRAW_INDX, 3);
908		OUT_RING(ring, 0x00000000);
909		OUT_RING(ring, DRAW(1, DI_SRC_SEL_AUTO_INDEX,
910							INDEX_SIZE_IGN, IGNORE_VISIBILITY, 0));
911		OUT_RING(ring, 0);             /* NumIndices */
912		fd_reset_wfi(batch);
913	}
914
915	OUT_PKT3(ring, CP_NOP, 4);
916	OUT_RING(ring, 0x00000000);
917	OUT_RING(ring, 0x00000000);
918	OUT_RING(ring, 0x00000000);
919	OUT_RING(ring, 0x00000000);
920
921	fd_wfi(batch, ring);
922
923	if (ctx->screen->gpu_id == 320) {
924		emit_binning_workaround(batch);
925	}
926}
927
928/* before first tile */
929static void
930fd3_emit_tile_init(struct fd_batch *batch)
931{
932	struct fd_ringbuffer *ring = batch->gmem;
933	struct pipe_framebuffer_state *pfb = &batch->framebuffer;
934	struct fd_gmem_stateobj *gmem = &batch->ctx->gmem;
935	uint32_t rb_render_control;
936
937	fd3_emit_restore(batch, ring);
938
939	/* note: use gmem->bin_w/h, the bin_w/h parameters may be truncated
940	 * at the right and bottom edge tiles
941	 */
942	OUT_PKT0(ring, REG_A3XX_VSC_BIN_SIZE, 1);
943	OUT_RING(ring, A3XX_VSC_BIN_SIZE_WIDTH(gmem->bin_w) |
944			A3XX_VSC_BIN_SIZE_HEIGHT(gmem->bin_h));
945
946	update_vsc_pipe(batch);
947
948	fd_wfi(batch, ring);
949	OUT_PKT0(ring, REG_A3XX_RB_FRAME_BUFFER_DIMENSION, 1);
950	OUT_RING(ring, A3XX_RB_FRAME_BUFFER_DIMENSION_WIDTH(pfb->width) |
951			A3XX_RB_FRAME_BUFFER_DIMENSION_HEIGHT(pfb->height));
952
953	if (use_hw_binning(batch)) {
954		/* emit hw binning pass: */
955		emit_binning_pass(batch);
956
957		patch_draws(batch, USE_VISIBILITY);
958	} else {
959		patch_draws(batch, IGNORE_VISIBILITY);
960	}
961
962	rb_render_control = A3XX_RB_RENDER_CONTROL_ENABLE_GMEM |
963			A3XX_RB_RENDER_CONTROL_BIN_WIDTH(gmem->bin_w);
964
965	patch_rbrc(batch, rb_render_control);
966}
967
968/* before mem2gmem */
969static void
970fd3_emit_tile_prep(struct fd_batch *batch, struct fd_tile *tile)
971{
972	struct fd_ringbuffer *ring = batch->gmem;
973	struct pipe_framebuffer_state *pfb = &batch->framebuffer;
974
975	OUT_PKT0(ring, REG_A3XX_RB_MODE_CONTROL, 1);
976	OUT_RING(ring, A3XX_RB_MODE_CONTROL_RENDER_MODE(RB_RENDERING_PASS) |
977			A3XX_RB_MODE_CONTROL_MARB_CACHE_SPLIT_MODE |
978			A3XX_RB_MODE_CONTROL_MRT(MAX2(1, pfb->nr_cbufs) - 1));
979}
980
981/* before IB to rendering cmds: */
982static void
983fd3_emit_tile_renderprep(struct fd_batch *batch, struct fd_tile *tile)
984{
985	struct fd_context *ctx = batch->ctx;
986	struct fd3_context *fd3_ctx = fd3_context(ctx);
987	struct fd_ringbuffer *ring = batch->gmem;
988	struct fd_gmem_stateobj *gmem = &ctx->gmem;
989	struct pipe_framebuffer_state *pfb = &batch->framebuffer;
990
991	uint32_t x1 = tile->xoff;
992	uint32_t y1 = tile->yoff;
993	uint32_t x2 = tile->xoff + tile->bin_w - 1;
994	uint32_t y2 = tile->yoff + tile->bin_h - 1;
995
996	uint32_t reg;
997
998	OUT_PKT0(ring, REG_A3XX_RB_DEPTH_INFO, 2);
999	reg = A3XX_RB_DEPTH_INFO_DEPTH_BASE(gmem->zsbuf_base[0]);
1000	if (pfb->zsbuf) {
1001		reg |= A3XX_RB_DEPTH_INFO_DEPTH_FORMAT(fd_pipe2depth(pfb->zsbuf->format));
1002	}
1003	OUT_RING(ring, reg);
1004	if (pfb->zsbuf) {
1005		struct fd_resource *rsc = fd_resource(pfb->zsbuf->texture);
1006		OUT_RING(ring, A3XX_RB_DEPTH_PITCH(rsc->cpp * gmem->bin_w));
1007		if (rsc->stencil) {
1008			OUT_PKT0(ring, REG_A3XX_RB_STENCIL_INFO, 2);
1009			OUT_RING(ring, A3XX_RB_STENCIL_INFO_STENCIL_BASE(gmem->zsbuf_base[1]));
1010			OUT_RING(ring, A3XX_RB_STENCIL_PITCH(rsc->stencil->cpp * gmem->bin_w));
1011		}
1012	} else {
1013		OUT_RING(ring, 0x00000000);
1014	}
1015
1016	if (use_hw_binning(batch)) {
1017		struct fd_vsc_pipe *pipe = &ctx->vsc_pipe[tile->p];
1018
1019		assert(pipe->w * pipe->h);
1020
1021		fd_event_write(batch, ring, HLSQ_FLUSH);
1022		fd_wfi(batch, ring);
1023
1024		OUT_PKT0(ring, REG_A3XX_PC_VSTREAM_CONTROL, 1);
1025		OUT_RING(ring, A3XX_PC_VSTREAM_CONTROL_SIZE(pipe->w * pipe->h) |
1026				A3XX_PC_VSTREAM_CONTROL_N(tile->n));
1027
1028
1029		OUT_PKT3(ring, CP_SET_BIN_DATA, 2);
1030		OUT_RELOCW(ring, pipe->bo, 0, 0, 0);    /* BIN_DATA_ADDR <- VSC_PIPE[p].DATA_ADDRESS */
1031		OUT_RELOCW(ring, fd3_ctx->vsc_size_mem, /* BIN_SIZE_ADDR <- VSC_SIZE_ADDRESS + (p * 4) */
1032				(tile->p * 4), 0, 0);
1033	} else {
1034		OUT_PKT0(ring, REG_A3XX_PC_VSTREAM_CONTROL, 1);
1035		OUT_RING(ring, 0x00000000);
1036	}
1037
1038	OUT_PKT3(ring, CP_SET_BIN, 3);
1039	OUT_RING(ring, 0x00000000);
1040	OUT_RING(ring, CP_SET_BIN_1_X1(x1) | CP_SET_BIN_1_Y1(y1));
1041	OUT_RING(ring, CP_SET_BIN_2_X2(x2) | CP_SET_BIN_2_Y2(y2));
1042
1043	emit_mrt(ring, pfb->nr_cbufs, pfb->cbufs, gmem->cbuf_base, gmem->bin_w, true);
1044
1045	/* setup scissor/offset for current tile: */
1046	OUT_PKT0(ring, REG_A3XX_RB_WINDOW_OFFSET, 1);
1047	OUT_RING(ring, A3XX_RB_WINDOW_OFFSET_X(tile->xoff) |
1048			A3XX_RB_WINDOW_OFFSET_Y(tile->yoff));
1049
1050	OUT_PKT0(ring, REG_A3XX_GRAS_SC_SCREEN_SCISSOR_TL, 2);
1051	OUT_RING(ring, A3XX_GRAS_SC_SCREEN_SCISSOR_TL_X(x1) |
1052			A3XX_GRAS_SC_SCREEN_SCISSOR_TL_Y(y1));
1053	OUT_RING(ring, A3XX_GRAS_SC_SCREEN_SCISSOR_BR_X(x2) |
1054			A3XX_GRAS_SC_SCREEN_SCISSOR_BR_Y(y2));
1055}
1056
1057void
1058fd3_gmem_init(struct pipe_context *pctx)
1059{
1060	struct fd_context *ctx = fd_context(pctx);
1061
1062	ctx->emit_sysmem_prep = fd3_emit_sysmem_prep;
1063	ctx->emit_tile_init = fd3_emit_tile_init;
1064	ctx->emit_tile_prep = fd3_emit_tile_prep;
1065	ctx->emit_tile_mem2gmem = fd3_emit_tile_mem2gmem;
1066	ctx->emit_tile_renderprep = fd3_emit_tile_renderprep;
1067	ctx->emit_tile_gmem2mem = fd3_emit_tile_gmem2mem;
1068}
1069