1/*
2 * Copyright (C) 2012 Rob Clark <robclark@freedesktop.org>
3 *
4 * Permission is hereby granted, free of charge, to any person obtaining a
5 * copy of this software and associated documentation files (the "Software"),
6 * to deal in the Software without restriction, including without limitation
7 * the rights to use, copy, modify, merge, publish, distribute, sublicense,
8 * and/or sell copies of the Software, and to permit persons to whom the
9 * Software is furnished to do so, subject to the following conditions:
10 *
11 * The above copyright notice and this permission notice (including the next
12 * paragraph) shall be included in all copies or substantial portions of the
13 * Software.
14 *
15 * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
16 * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
17 * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT.  IN NO EVENT SHALL
18 * THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
19 * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
20 * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
21 * SOFTWARE.
22 *
23 * Authors:
24 *    Rob Clark <robclark@freedesktop.org>
25 */
26
27#include "pipe/p_state.h"
28#include "util/u_string.h"
29#include "util/u_memory.h"
30#include "util/u_inlines.h"
31
32#include "freedreno_draw.h"
33#include "freedreno_state.h"
34#include "freedreno_resource.h"
35
36#include "fd2_gmem.h"
37#include "fd2_context.h"
38#include "fd2_emit.h"
39#include "fd2_program.h"
40#include "fd2_util.h"
41#include "fd2_zsa.h"
42#include "fd2_draw.h"
43#include "instr-a2xx.h"
44
45static uint32_t fmt2swap(enum pipe_format format)
46{
47	switch (format) {
48	case PIPE_FORMAT_B8G8R8A8_UNORM:
49	case PIPE_FORMAT_B8G8R8X8_UNORM:
50	case PIPE_FORMAT_B5G6R5_UNORM:
51	case PIPE_FORMAT_B5G5R5A1_UNORM:
52	case PIPE_FORMAT_B5G5R5X1_UNORM:
53	case PIPE_FORMAT_B4G4R4A4_UNORM:
54	case PIPE_FORMAT_B4G4R4X4_UNORM:
55	/* TODO probably some more.. */
56		return 1;
57	default:
58		return 0;
59	}
60}
61
62static bool
63use_hw_binning(struct fd_batch *batch)
64{
65	struct fd_gmem_stateobj *gmem = &batch->ctx->gmem;
66
67	/* we hardcoded a limit of 8 "pipes", we can increase this limit
68	 * at the cost of a slightly larger command stream
69	 * however very few cases will need more than 8
70	 * gmem->num_vsc_pipes == 0 means empty batch (TODO: does it still happen?)
71	 */
72	if (gmem->num_vsc_pipes > 8 || !gmem->num_vsc_pipes)
73		return false;
74
75	/* only a20x hw binning is implement
76	 * a22x is more like a3xx, but perhaps the a20x works? (TODO)
77	 */
78	if (!is_a20x(batch->ctx->screen))
79		return false;
80
81	return fd_binning_enabled && ((gmem->nbins_x * gmem->nbins_y) > 2);
82}
83
84/* transfer from gmem to system memory (ie. normal RAM) */
85
86static void
87emit_gmem2mem_surf(struct fd_batch *batch, uint32_t base,
88		struct pipe_surface *psurf)
89{
90	struct fd_ringbuffer *ring = batch->tile_fini;
91	struct fd_resource *rsc = fd_resource(psurf->texture);
92	uint32_t swap = fmt2swap(psurf->format);
93	struct fd_resource_slice *slice =
94		fd_resource_slice(rsc, psurf->u.tex.level);
95	uint32_t offset =
96		fd_resource_offset(rsc, psurf->u.tex.level, psurf->u.tex.first_layer);
97
98	assert((slice->pitch & 31) == 0);
99	assert((offset & 0xfff) == 0);
100
101	if (!rsc->valid)
102		return;
103
104	OUT_PKT3(ring, CP_SET_CONSTANT, 2);
105	OUT_RING(ring, CP_REG(REG_A2XX_RB_COLOR_INFO));
106	OUT_RING(ring, A2XX_RB_COLOR_INFO_SWAP(swap) |
107			A2XX_RB_COLOR_INFO_BASE(base) |
108			A2XX_RB_COLOR_INFO_FORMAT(fd2_pipe2color(psurf->format)));
109
110	OUT_PKT3(ring, CP_SET_CONSTANT, 5);
111	OUT_RING(ring, CP_REG(REG_A2XX_RB_COPY_CONTROL));
112	OUT_RING(ring, 0x00000000);             /* RB_COPY_CONTROL */
113	OUT_RELOCW(ring, rsc->bo, offset, 0, 0);     /* RB_COPY_DEST_BASE */
114	OUT_RING(ring, slice->pitch >> 5); /* RB_COPY_DEST_PITCH */
115	OUT_RING(ring,                          /* RB_COPY_DEST_INFO */
116			A2XX_RB_COPY_DEST_INFO_FORMAT(fd2_pipe2color(psurf->format)) |
117			A2XX_RB_COPY_DEST_INFO_LINEAR |
118			A2XX_RB_COPY_DEST_INFO_SWAP(swap) |
119			A2XX_RB_COPY_DEST_INFO_WRITE_RED |
120			A2XX_RB_COPY_DEST_INFO_WRITE_GREEN |
121			A2XX_RB_COPY_DEST_INFO_WRITE_BLUE |
122			A2XX_RB_COPY_DEST_INFO_WRITE_ALPHA);
123
124	if (!is_a20x(batch->ctx->screen)) {
125		OUT_WFI (ring);
126
127		OUT_PKT3(ring, CP_SET_CONSTANT, 3);
128		OUT_RING(ring, CP_REG(REG_A2XX_VGT_MAX_VTX_INDX));
129		OUT_RING(ring, 3);                 /* VGT_MAX_VTX_INDX */
130		OUT_RING(ring, 0);                 /* VGT_MIN_VTX_INDX */
131	}
132
133	fd_draw(batch, ring, DI_PT_RECTLIST, IGNORE_VISIBILITY,
134			DI_SRC_SEL_AUTO_INDEX, 3, 0, INDEX_SIZE_IGN, 0, 0, NULL);
135}
136
137static void
138prepare_tile_fini_ib(struct fd_batch *batch)
139{
140	struct fd_context *ctx = batch->ctx;
141	struct fd2_context *fd2_ctx = fd2_context(ctx);
142	struct fd_gmem_stateobj *gmem = &ctx->gmem;
143	struct pipe_framebuffer_state *pfb = &batch->framebuffer;
144	struct fd_ringbuffer *ring;
145
146	batch->tile_fini = fd_submit_new_ringbuffer(batch->submit, 0x1000,
147			FD_RINGBUFFER_STREAMING);
148	ring = batch->tile_fini;
149
150	fd2_emit_vertex_bufs(ring, 0x9c, (struct fd2_vertex_buf[]) {
151			{ .prsc = fd2_ctx->solid_vertexbuf, .size = 36 },
152		}, 1);
153
154	OUT_PKT3(ring, CP_SET_CONSTANT, 2);
155	OUT_RING(ring, CP_REG(REG_A2XX_PA_SC_WINDOW_OFFSET));
156	OUT_RING(ring, 0x00000000);          /* PA_SC_WINDOW_OFFSET */
157
158	OUT_PKT3(ring, CP_SET_CONSTANT, 2);
159	OUT_RING(ring, CP_REG(REG_A2XX_VGT_INDX_OFFSET));
160	OUT_RING(ring, 0);
161
162	if (!is_a20x(ctx->screen)) {
163		OUT_PKT3(ring, CP_SET_CONSTANT, 2);
164		OUT_RING(ring, CP_REG(REG_A2XX_VGT_VERTEX_REUSE_BLOCK_CNTL));
165		OUT_RING(ring, 0x0000028f);
166	}
167
168	fd2_program_emit(ctx, ring, &ctx->solid_prog);
169
170	OUT_PKT3(ring, CP_SET_CONSTANT, 2);
171	OUT_RING(ring, CP_REG(REG_A2XX_PA_SC_AA_MASK));
172	OUT_RING(ring, 0x0000ffff);
173
174	OUT_PKT3(ring, CP_SET_CONSTANT, 2);
175	OUT_RING(ring, CP_REG(REG_A2XX_RB_DEPTHCONTROL));
176	OUT_RING(ring, A2XX_RB_DEPTHCONTROL_EARLY_Z_ENABLE);
177
178	OUT_PKT3(ring, CP_SET_CONSTANT, 2);
179	OUT_RING(ring, CP_REG(REG_A2XX_PA_SU_SC_MODE_CNTL));
180	OUT_RING(ring, A2XX_PA_SU_SC_MODE_CNTL_PROVOKING_VTX_LAST |  /* PA_SU_SC_MODE_CNTL */
181			A2XX_PA_SU_SC_MODE_CNTL_FRONT_PTYPE(PC_DRAW_TRIANGLES) |
182			A2XX_PA_SU_SC_MODE_CNTL_BACK_PTYPE(PC_DRAW_TRIANGLES));
183
184	OUT_PKT3(ring, CP_SET_CONSTANT, 3);
185	OUT_RING(ring, CP_REG(REG_A2XX_PA_SC_WINDOW_SCISSOR_TL));
186	OUT_RING(ring, xy2d(0, 0));                       /* PA_SC_WINDOW_SCISSOR_TL */
187	OUT_RING(ring, xy2d(pfb->width, pfb->height));    /* PA_SC_WINDOW_SCISSOR_BR */
188
189	OUT_PKT3(ring, CP_SET_CONSTANT, 2);
190	OUT_RING(ring, CP_REG(REG_A2XX_PA_CL_CLIP_CNTL));
191	OUT_RING(ring, 0x00000000);
192
193	OUT_PKT3(ring, CP_SET_CONSTANT, 5);
194	OUT_RING(ring, CP_REG(REG_A2XX_PA_CL_VPORT_XSCALE));
195	OUT_RING(ring, fui((float) gmem->bin_w / 2.0)); /* XSCALE */
196	OUT_RING(ring, fui((float) gmem->bin_w / 2.0)); /* XOFFSET */
197	OUT_RING(ring, fui((float) gmem->bin_h / 2.0)); /* YSCALE */
198	OUT_RING(ring, fui((float) gmem->bin_h / 2.0)); /* YOFFSET */
199
200	OUT_PKT3(ring, CP_SET_CONSTANT, 2);
201	OUT_RING(ring, CP_REG(REG_A2XX_RB_MODECONTROL));
202	OUT_RING(ring, A2XX_RB_MODECONTROL_EDRAM_MODE(EDRAM_COPY));
203
204	if (batch->resolve & (FD_BUFFER_DEPTH | FD_BUFFER_STENCIL))
205		emit_gmem2mem_surf(batch, gmem->zsbuf_base[0], pfb->zsbuf);
206
207	if (batch->resolve & FD_BUFFER_COLOR)
208		emit_gmem2mem_surf(batch, gmem->cbuf_base[0], pfb->cbufs[0]);
209
210	OUT_PKT3(ring, CP_SET_CONSTANT, 2);
211	OUT_RING(ring, CP_REG(REG_A2XX_RB_MODECONTROL));
212	OUT_RING(ring, A2XX_RB_MODECONTROL_EDRAM_MODE(COLOR_DEPTH));
213
214	if (!is_a20x(ctx->screen)) {
215		OUT_PKT3(ring, CP_SET_CONSTANT, 2);
216		OUT_RING(ring, CP_REG(REG_A2XX_VGT_VERTEX_REUSE_BLOCK_CNTL));
217		OUT_RING(ring, 0x0000003b);
218	}
219}
220
221static void
222fd2_emit_tile_gmem2mem(struct fd_batch *batch, struct fd_tile *tile)
223{
224	batch->ctx->emit_ib(batch->gmem, batch->tile_fini);
225}
226
227/* transfer from system memory to gmem */
228
229static void
230emit_mem2gmem_surf(struct fd_batch *batch, uint32_t base,
231		struct pipe_surface *psurf)
232{
233	struct fd_ringbuffer *ring = batch->gmem;
234	struct fd_resource *rsc = fd_resource(psurf->texture);
235	struct fd_resource_slice *slice =
236		fd_resource_slice(rsc, psurf->u.tex.level);
237	uint32_t offset =
238		fd_resource_offset(rsc, psurf->u.tex.level, psurf->u.tex.first_layer);
239	uint32_t swiz;
240
241	OUT_PKT3(ring, CP_SET_CONSTANT, 2);
242	OUT_RING(ring, CP_REG(REG_A2XX_RB_COLOR_INFO));
243	OUT_RING(ring, A2XX_RB_COLOR_INFO_SWAP(fmt2swap(psurf->format)) |
244			A2XX_RB_COLOR_INFO_BASE(base) |
245			A2XX_RB_COLOR_INFO_FORMAT(fd2_pipe2color(psurf->format)));
246
247	swiz = fd2_tex_swiz(psurf->format, PIPE_SWIZZLE_X, PIPE_SWIZZLE_Y,
248			PIPE_SWIZZLE_Z, PIPE_SWIZZLE_W);
249
250	/* emit fb as a texture: */
251	OUT_PKT3(ring, CP_SET_CONSTANT, 7);
252	OUT_RING(ring, 0x00010000);
253	OUT_RING(ring, A2XX_SQ_TEX_0_CLAMP_X(SQ_TEX_WRAP) |
254			A2XX_SQ_TEX_0_CLAMP_Y(SQ_TEX_WRAP) |
255			A2XX_SQ_TEX_0_CLAMP_Z(SQ_TEX_WRAP) |
256			A2XX_SQ_TEX_0_PITCH(slice->pitch));
257	OUT_RELOC(ring, rsc->bo, offset,
258			fd2_pipe2surface(psurf->format) |
259			A2XX_SQ_TEX_1_CLAMP_POLICY(SQ_TEX_CLAMP_POLICY_OGL), 0);
260	OUT_RING(ring, A2XX_SQ_TEX_2_WIDTH(psurf->width - 1) |
261			A2XX_SQ_TEX_2_HEIGHT(psurf->height - 1));
262	OUT_RING(ring, A2XX_SQ_TEX_3_MIP_FILTER(SQ_TEX_FILTER_BASEMAP) |
263			swiz |
264			A2XX_SQ_TEX_3_XY_MAG_FILTER(SQ_TEX_FILTER_POINT) |
265			A2XX_SQ_TEX_3_XY_MIN_FILTER(SQ_TEX_FILTER_POINT));
266	OUT_RING(ring, 0x00000000);
267	OUT_RING(ring, A2XX_SQ_TEX_5_DIMENSION(SQ_TEX_DIMENSION_2D));
268
269	if (!is_a20x(batch->ctx->screen)) {
270		OUT_PKT3(ring, CP_SET_CONSTANT, 3);
271		OUT_RING(ring, CP_REG(REG_A2XX_VGT_MAX_VTX_INDX));
272		OUT_RING(ring, 3);                 /* VGT_MAX_VTX_INDX */
273		OUT_RING(ring, 0);                 /* VGT_MIN_VTX_INDX */
274	}
275
276	fd_draw(batch, ring, DI_PT_RECTLIST, IGNORE_VISIBILITY,
277			DI_SRC_SEL_AUTO_INDEX, 3, 0, INDEX_SIZE_IGN, 0, 0, NULL);
278}
279
280static void
281fd2_emit_tile_mem2gmem(struct fd_batch *batch, struct fd_tile *tile)
282{
283	struct fd_context *ctx = batch->ctx;
284	struct fd2_context *fd2_ctx = fd2_context(ctx);
285	struct fd_gmem_stateobj *gmem = &ctx->gmem;
286	struct fd_ringbuffer *ring = batch->gmem;
287	struct pipe_framebuffer_state *pfb = &batch->framebuffer;
288	unsigned bin_w = tile->bin_w;
289	unsigned bin_h = tile->bin_h;
290	float x0, y0, x1, y1;
291
292	fd2_emit_vertex_bufs(ring, 0x9c, (struct fd2_vertex_buf[]) {
293			{ .prsc = fd2_ctx->solid_vertexbuf, .size = 36 },
294			{ .prsc = fd2_ctx->solid_vertexbuf, .size = 24, .offset = 36 },
295		}, 2);
296
297	/* write texture coordinates to vertexbuf: */
298	x0 = ((float)tile->xoff) / ((float)pfb->width);
299	x1 = ((float)tile->xoff + bin_w) / ((float)pfb->width);
300	y0 = ((float)tile->yoff) / ((float)pfb->height);
301	y1 = ((float)tile->yoff + bin_h) / ((float)pfb->height);
302	OUT_PKT3(ring, CP_MEM_WRITE, 7);
303	OUT_RELOC(ring, fd_resource(fd2_ctx->solid_vertexbuf)->bo, 36, 0, 0);
304	OUT_RING(ring, fui(x0));
305	OUT_RING(ring, fui(y0));
306	OUT_RING(ring, fui(x1));
307	OUT_RING(ring, fui(y0));
308	OUT_RING(ring, fui(x0));
309	OUT_RING(ring, fui(y1));
310
311	OUT_PKT3(ring, CP_SET_CONSTANT, 2);
312	OUT_RING(ring, CP_REG(REG_A2XX_VGT_INDX_OFFSET));
313	OUT_RING(ring, 0);
314
315	fd2_program_emit(ctx, ring, &ctx->blit_prog[0]);
316
317	OUT_PKT0(ring, REG_A2XX_TC_CNTL_STATUS, 1);
318	OUT_RING(ring, A2XX_TC_CNTL_STATUS_L2_INVALIDATE);
319
320	OUT_PKT3(ring, CP_SET_CONSTANT, 2);
321	OUT_RING(ring, CP_REG(REG_A2XX_RB_DEPTHCONTROL));
322	OUT_RING(ring, A2XX_RB_DEPTHCONTROL_EARLY_Z_ENABLE);
323
324	OUT_PKT3(ring, CP_SET_CONSTANT, 2);
325	OUT_RING(ring, CP_REG(REG_A2XX_PA_SU_SC_MODE_CNTL));
326	OUT_RING(ring, A2XX_PA_SU_SC_MODE_CNTL_PROVOKING_VTX_LAST |
327			A2XX_PA_SU_SC_MODE_CNTL_FRONT_PTYPE(PC_DRAW_TRIANGLES) |
328			A2XX_PA_SU_SC_MODE_CNTL_BACK_PTYPE(PC_DRAW_TRIANGLES));
329
330	OUT_PKT3(ring, CP_SET_CONSTANT, 2);
331	OUT_RING(ring, CP_REG(REG_A2XX_PA_SC_AA_MASK));
332	OUT_RING(ring, 0x0000ffff);
333
334	OUT_PKT3(ring, CP_SET_CONSTANT, 2);
335	OUT_RING(ring, CP_REG(REG_A2XX_RB_COLORCONTROL));
336	OUT_RING(ring, A2XX_RB_COLORCONTROL_ALPHA_FUNC(FUNC_ALWAYS) |
337			A2XX_RB_COLORCONTROL_BLEND_DISABLE |
338			A2XX_RB_COLORCONTROL_ROP_CODE(12) |
339			A2XX_RB_COLORCONTROL_DITHER_MODE(DITHER_DISABLE) |
340			A2XX_RB_COLORCONTROL_DITHER_TYPE(DITHER_PIXEL));
341
342	OUT_PKT3(ring, CP_SET_CONSTANT, 2);
343	OUT_RING(ring, CP_REG(REG_A2XX_RB_BLEND_CONTROL));
344	OUT_RING(ring, A2XX_RB_BLEND_CONTROL_COLOR_SRCBLEND(FACTOR_ONE) |
345			A2XX_RB_BLEND_CONTROL_COLOR_COMB_FCN(BLEND2_DST_PLUS_SRC) |
346			A2XX_RB_BLEND_CONTROL_COLOR_DESTBLEND(FACTOR_ZERO) |
347			A2XX_RB_BLEND_CONTROL_ALPHA_SRCBLEND(FACTOR_ONE) |
348			A2XX_RB_BLEND_CONTROL_ALPHA_COMB_FCN(BLEND2_DST_PLUS_SRC) |
349			A2XX_RB_BLEND_CONTROL_ALPHA_DESTBLEND(FACTOR_ZERO));
350
351	OUT_PKT3(ring, CP_SET_CONSTANT, 3);
352	OUT_RING(ring, CP_REG(REG_A2XX_PA_SC_WINDOW_SCISSOR_TL));
353	OUT_RING(ring, A2XX_PA_SC_WINDOW_OFFSET_DISABLE |
354			xy2d(0,0));                     /* PA_SC_WINDOW_SCISSOR_TL */
355	OUT_RING(ring, xy2d(bin_w, bin_h));     /* PA_SC_WINDOW_SCISSOR_BR */
356
357	OUT_PKT3(ring, CP_SET_CONSTANT, 5);
358	OUT_RING(ring, CP_REG(REG_A2XX_PA_CL_VPORT_XSCALE));
359	OUT_RING(ring, fui((float)bin_w/2.0));  /* PA_CL_VPORT_XSCALE */
360	OUT_RING(ring, fui((float)bin_w/2.0));  /* PA_CL_VPORT_XOFFSET */
361	OUT_RING(ring, fui(-(float)bin_h/2.0)); /* PA_CL_VPORT_YSCALE */
362	OUT_RING(ring, fui((float)bin_h/2.0));  /* PA_CL_VPORT_YOFFSET */
363
364	OUT_PKT3(ring, CP_SET_CONSTANT, 2);
365	OUT_RING(ring, CP_REG(REG_A2XX_PA_CL_VTE_CNTL));
366	OUT_RING(ring, A2XX_PA_CL_VTE_CNTL_VTX_XY_FMT |
367			A2XX_PA_CL_VTE_CNTL_VTX_Z_FMT |       // XXX check this???
368			A2XX_PA_CL_VTE_CNTL_VPORT_X_SCALE_ENA |
369			A2XX_PA_CL_VTE_CNTL_VPORT_X_OFFSET_ENA |
370			A2XX_PA_CL_VTE_CNTL_VPORT_Y_SCALE_ENA |
371			A2XX_PA_CL_VTE_CNTL_VPORT_Y_OFFSET_ENA);
372
373	OUT_PKT3(ring, CP_SET_CONSTANT, 2);
374	OUT_RING(ring, CP_REG(REG_A2XX_PA_CL_CLIP_CNTL));
375	OUT_RING(ring, 0x00000000);
376
377	if (fd_gmem_needs_restore(batch, tile, FD_BUFFER_DEPTH | FD_BUFFER_STENCIL))
378		emit_mem2gmem_surf(batch, gmem->zsbuf_base[0], pfb->zsbuf);
379
380	if (fd_gmem_needs_restore(batch, tile, FD_BUFFER_COLOR))
381		emit_mem2gmem_surf(batch, gmem->cbuf_base[0], pfb->cbufs[0]);
382
383	OUT_PKT3(ring, CP_SET_CONSTANT, 2);
384	OUT_RING(ring, CP_REG(REG_A2XX_PA_CL_VTE_CNTL));
385	OUT_RING(ring, A2XX_PA_CL_VTE_CNTL_VTX_W0_FMT |
386			A2XX_PA_CL_VTE_CNTL_VPORT_X_SCALE_ENA |
387			A2XX_PA_CL_VTE_CNTL_VPORT_X_OFFSET_ENA |
388			A2XX_PA_CL_VTE_CNTL_VPORT_Y_SCALE_ENA |
389			A2XX_PA_CL_VTE_CNTL_VPORT_Y_OFFSET_ENA |
390			A2XX_PA_CL_VTE_CNTL_VPORT_Z_SCALE_ENA |
391			A2XX_PA_CL_VTE_CNTL_VPORT_Z_OFFSET_ENA);
392
393	/* TODO blob driver seems to toss in a CACHE_FLUSH after each DRAW_INDX.. */
394}
395
396static void
397patch_draws(struct fd_batch *batch, enum pc_di_vis_cull_mode vismode)
398{
399	unsigned i;
400
401	if (!is_a20x(batch->ctx->screen)) {
402		/* identical to a3xx */
403		for (i = 0; i < fd_patch_num_elements(&batch->draw_patches); i++) {
404			struct fd_cs_patch *patch = fd_patch_element(&batch->draw_patches, i);
405			*patch->cs = patch->val | DRAW(0, 0, 0, vismode, 0);
406		}
407		util_dynarray_resize(&batch->draw_patches, 0);
408		return;
409	}
410
411	if (vismode == USE_VISIBILITY)
412		return;
413
414	for (i = 0; i < batch->draw_patches.size / sizeof(uint32_t*); i++) {
415		uint32_t *ptr = *util_dynarray_element(&batch->draw_patches, uint32_t*, i);
416		unsigned cnt = ptr[0] >> 16 & 0xfff; /* 5 with idx buffer, 3 without */
417
418		/* convert CP_DRAW_INDX_BIN to a CP_DRAW_INDX
419		 * replace first two DWORDS with NOP and move the rest down
420		 * (we don't want to have to move the idx buffer reloc)
421		 */
422		ptr[0] = CP_TYPE3_PKT | (CP_NOP << 8);
423		ptr[1] = 0x00000000;
424
425		ptr[4] = ptr[2] & ~(1 << 14 | 1 << 15); /* remove cull_enable bits */
426		ptr[2] = CP_TYPE3_PKT | ((cnt-2) << 16) | (CP_DRAW_INDX << 8);
427		ptr[3] = 0x00000000;
428	}
429}
430
431static void
432fd2_emit_sysmem_prep(struct fd_batch *batch)
433{
434	struct fd_context *ctx = batch->ctx;
435	struct fd_ringbuffer *ring = batch->gmem;
436	struct pipe_framebuffer_state *pfb = &batch->framebuffer;
437	struct pipe_surface *psurf = pfb->cbufs[0];
438
439	if (!psurf)
440		return;
441
442	struct fd_resource *rsc = fd_resource(psurf->texture);
443	struct fd_resource_slice *slice =
444		fd_resource_slice(rsc, psurf->u.tex.level);
445	uint32_t offset =
446		fd_resource_offset(rsc, psurf->u.tex.level, psurf->u.tex.first_layer);
447
448	assert((slice->pitch & 31) == 0);
449	assert((offset & 0xfff) == 0);
450
451	fd2_emit_restore(ctx, ring);
452
453	OUT_PKT3(ring, CP_SET_CONSTANT, 2);
454	OUT_RING(ring, CP_REG(REG_A2XX_RB_SURFACE_INFO));
455	OUT_RING(ring, A2XX_RB_SURFACE_INFO_SURFACE_PITCH(slice->pitch));
456
457	OUT_PKT3(ring, CP_SET_CONSTANT, 2);
458	OUT_RING(ring, CP_REG(REG_A2XX_RB_COLOR_INFO));
459	OUT_RELOCW(ring, rsc->bo, offset, A2XX_RB_COLOR_INFO_LINEAR |
460		A2XX_RB_COLOR_INFO_SWAP(fmt2swap(psurf->format)) |
461		A2XX_RB_COLOR_INFO_FORMAT(fd2_pipe2color(psurf->format)), 0);
462
463	OUT_PKT3(ring, CP_SET_CONSTANT, 3);
464	OUT_RING(ring, CP_REG(REG_A2XX_PA_SC_SCREEN_SCISSOR_TL));
465	OUT_RING(ring, A2XX_PA_SC_SCREEN_SCISSOR_TL_WINDOW_OFFSET_DISABLE);
466	OUT_RING(ring, A2XX_PA_SC_SCREEN_SCISSOR_BR_X(pfb->width) |
467		A2XX_PA_SC_SCREEN_SCISSOR_BR_Y(pfb->height));
468
469	OUT_PKT3(ring, CP_SET_CONSTANT, 2);
470	OUT_RING(ring, CP_REG(REG_A2XX_PA_SC_WINDOW_OFFSET));
471	OUT_RING(ring, A2XX_PA_SC_WINDOW_OFFSET_X(0) |
472			A2XX_PA_SC_WINDOW_OFFSET_Y(0));
473
474	patch_draws(batch, IGNORE_VISIBILITY);
475	util_dynarray_resize(&batch->draw_patches, 0);
476	util_dynarray_resize(&batch->shader_patches, 0);
477}
478
479/* before first tile */
480static void
481fd2_emit_tile_init(struct fd_batch *batch)
482{
483	struct fd_context *ctx = batch->ctx;
484	struct fd_ringbuffer *ring = batch->gmem;
485	struct pipe_framebuffer_state *pfb = &batch->framebuffer;
486	struct fd_gmem_stateobj *gmem = &ctx->gmem;
487	enum pipe_format format = pipe_surface_format(pfb->cbufs[0]);
488	uint32_t reg;
489
490	fd2_emit_restore(ctx, ring);
491
492	prepare_tile_fini_ib(batch);
493
494	OUT_PKT3(ring, CP_SET_CONSTANT, 4);
495	OUT_RING(ring, CP_REG(REG_A2XX_RB_SURFACE_INFO));
496	OUT_RING(ring, gmem->bin_w);                 /* RB_SURFACE_INFO */
497	OUT_RING(ring, A2XX_RB_COLOR_INFO_SWAP(fmt2swap(format)) |
498			A2XX_RB_COLOR_INFO_FORMAT(fd2_pipe2color(format)));
499	reg = A2XX_RB_DEPTH_INFO_DEPTH_BASE(gmem->zsbuf_base[0]);
500	if (pfb->zsbuf)
501		reg |= A2XX_RB_DEPTH_INFO_DEPTH_FORMAT(fd_pipe2depth(pfb->zsbuf->format));
502	OUT_RING(ring, reg);                         /* RB_DEPTH_INFO */
503
504	/* fast clear patches */
505	int depth_size = -1;
506	int color_size = -1;
507
508	if (pfb->cbufs[0])
509		color_size = util_format_get_blocksizebits(format) == 32 ? 4 : 2;
510
511	if (pfb->zsbuf)
512		depth_size = fd_pipe2depth(pfb->zsbuf->format) == 1 ? 4 : 2;
513
514	for (int i = 0; i < fd_patch_num_elements(&batch->gmem_patches); i++) {
515		struct fd_cs_patch *patch = fd_patch_element(&batch->gmem_patches, i);
516		uint32_t color_base = 0, depth_base = gmem->zsbuf_base[0];
517		uint32_t size, lines;
518
519		/* note: 1 "line" is 512 bytes in both color/depth areas (1K total) */
520		switch (patch->val) {
521		case GMEM_PATCH_FASTCLEAR_COLOR:
522			size = align(gmem->bin_w * gmem->bin_h * color_size, 0x8000);
523			lines = size / 1024;
524			depth_base = size / 2;
525			break;
526		case GMEM_PATCH_FASTCLEAR_DEPTH:
527			size = align(gmem->bin_w * gmem->bin_h * depth_size, 0x8000);
528			lines = size / 1024;
529			color_base = depth_base;
530			depth_base = depth_base + size / 2;
531			break;
532		case GMEM_PATCH_FASTCLEAR_COLOR_DEPTH:
533			lines = align(gmem->bin_w * gmem->bin_h * color_size * 2, 0x8000) / 1024;
534			break;
535		case GMEM_PATCH_RESTORE_INFO:
536			patch->cs[0] = gmem->bin_w;
537			patch->cs[1] = A2XX_RB_COLOR_INFO_SWAP(fmt2swap(format)) |
538					A2XX_RB_COLOR_INFO_FORMAT(fd2_pipe2color(format));
539			patch->cs[2] = A2XX_RB_DEPTH_INFO_DEPTH_BASE(gmem->zsbuf_base[0]);
540			if (pfb->zsbuf)
541				patch->cs[2] |= A2XX_RB_DEPTH_INFO_DEPTH_FORMAT(fd_pipe2depth(pfb->zsbuf->format));
542			continue;
543		default:
544			continue;
545		}
546
547		patch->cs[0] = A2XX_PA_SC_SCREEN_SCISSOR_BR_X(32) |
548			A2XX_PA_SC_SCREEN_SCISSOR_BR_Y(lines);
549		patch->cs[4] = A2XX_RB_COLOR_INFO_BASE(color_base) |
550			A2XX_RB_COLOR_INFO_FORMAT(COLORX_8_8_8_8);
551		patch->cs[5] = A2XX_RB_DEPTH_INFO_DEPTH_BASE(depth_base) |
552			A2XX_RB_DEPTH_INFO_DEPTH_FORMAT(1);
553	}
554	util_dynarray_resize(&batch->gmem_patches, 0);
555
556	/* set to zero, for some reason hardware doesn't like certain values */
557	OUT_PKT3(ring, CP_SET_CONSTANT, 2);
558	OUT_RING(ring, CP_REG(REG_A2XX_VGT_CURRENT_BIN_ID_MIN));
559	OUT_RING(ring, 0);
560
561	OUT_PKT3(ring, CP_SET_CONSTANT, 2);
562	OUT_RING(ring, CP_REG(REG_A2XX_VGT_CURRENT_BIN_ID_MAX));
563	OUT_RING(ring, 0);
564
565	if (use_hw_binning(batch)) {
566		/* patch out unneeded memory exports by changing EXEC CF to EXEC_END
567		 *
568		 * in the shader compiler, we guarantee that the shader ends with
569		 * a specific pattern of ALLOC/EXEC CF pairs for the hw binning exports
570		 *
571		 * the since patches point only to dwords and CFs are 1.5 dwords
572		 * the patch is aligned and might point to a ALLOC CF
573		 */
574		for (int i = 0; i < batch->shader_patches.size / sizeof(void*); i++) {
575			instr_cf_t *cf =
576				*util_dynarray_element(&batch->shader_patches, instr_cf_t*, i);
577			if (cf->opc == ALLOC)
578				cf++;
579			assert(cf->opc == EXEC);
580			assert(cf[ctx->screen->num_vsc_pipes*2-2].opc == EXEC_END);
581			cf[2*(gmem->num_vsc_pipes-1)].opc = EXEC_END;
582		}
583
584		patch_draws(batch, USE_VISIBILITY);
585
586		/* initialize shader constants for the binning memexport */
587		OUT_PKT3(ring, CP_SET_CONSTANT, 1 + gmem->num_vsc_pipes * 4);
588		OUT_RING(ring, 0x0000000C);
589
590		for (int i = 0; i < gmem->num_vsc_pipes; i++) {
591			struct fd_vsc_pipe *pipe = &ctx->vsc_pipe[i];
592
593			/* XXX we know how large this needs to be..
594			 * should do some sort of realloc
595			 * it should be ctx->batch->num_vertices bytes large
596			 * with this size it will break with more than 256k vertices..
597			 */
598			if (!pipe->bo) {
599				pipe->bo = fd_bo_new(ctx->dev, 0x40000,
600						DRM_FREEDRENO_GEM_TYPE_KMEM, "vsc_pipe[%u]", i);
601			}
602
603			/* memory export address (export32):
604			 * .x: (base_address >> 2) | 0x40000000 (?)
605			 * .y: index (float) - set by shader
606			 * .z: 0x4B00D000 (?)
607			 * .w: 0x4B000000 (?) | max_index (?)
608			*/
609			OUT_RELOCW(ring, pipe->bo, 0, 0x40000000, -2);
610			OUT_RING(ring, 0x00000000);
611			OUT_RING(ring, 0x4B00D000);
612			OUT_RING(ring, 0x4B000000 | 0x40000);
613		}
614
615		OUT_PKT3(ring, CP_SET_CONSTANT, 1 + gmem->num_vsc_pipes * 8);
616		OUT_RING(ring, 0x0000018C);
617
618		for (int i = 0; i < gmem->num_vsc_pipes; i++) {
619			struct fd_vsc_pipe *pipe = &ctx->vsc_pipe[i];
620			float off_x, off_y, mul_x, mul_y;
621
622			/* const to tranform from [-1,1] to bin coordinates for this pipe
623			 * for x/y, [0,256/2040] = 0, [256/2040,512/2040] = 1, etc
624			 * 8 possible values on x/y axis,
625			 * to clip at binning stage: only use center 6x6
626			 * TODO: set the z parameters too so that hw binning
627			 * can clip primitives in Z too
628			 */
629
630			mul_x = 1.0f / (float) (gmem->bin_w * 8);
631			mul_y = 1.0f / (float) (gmem->bin_h * 8);
632			off_x = -pipe->x * (1.0/8.0f) + 0.125f - mul_x * gmem->minx;
633			off_y = -pipe->y * (1.0/8.0f) + 0.125f - mul_y * gmem->miny;
634
635			OUT_RING(ring, fui(off_x * (256.0f/255.0f)));
636			OUT_RING(ring, fui(off_y * (256.0f/255.0f)));
637			OUT_RING(ring, 0x3f000000);
638			OUT_RING(ring, fui(0.0f));
639
640			OUT_RING(ring, fui(mul_x * (256.0f/255.0f)));
641			OUT_RING(ring, fui(mul_y * (256.0f/255.0f)));
642			OUT_RING(ring, fui(0.0f));
643			OUT_RING(ring, fui(0.0f));
644		}
645
646		OUT_PKT3(ring, CP_SET_CONSTANT, 2);
647		OUT_RING(ring, CP_REG(REG_A2XX_VGT_VERTEX_REUSE_BLOCK_CNTL));
648		OUT_RING(ring, 0);
649
650		ctx->emit_ib(ring, batch->binning);
651
652		OUT_PKT3(ring, CP_SET_CONSTANT, 2);
653		OUT_RING(ring, CP_REG(REG_A2XX_VGT_VERTEX_REUSE_BLOCK_CNTL));
654		OUT_RING(ring, 0x00000002);
655	} else {
656		patch_draws(batch, IGNORE_VISIBILITY);
657	}
658
659	util_dynarray_resize(&batch->draw_patches, 0);
660	util_dynarray_resize(&batch->shader_patches, 0);
661}
662
663/* before mem2gmem */
664static void
665fd2_emit_tile_prep(struct fd_batch *batch, struct fd_tile *tile)
666{
667	struct fd_ringbuffer *ring = batch->gmem;
668	struct pipe_framebuffer_state *pfb = &batch->framebuffer;
669	enum pipe_format format = pipe_surface_format(pfb->cbufs[0]);
670
671	OUT_PKT3(ring, CP_SET_CONSTANT, 2);
672	OUT_RING(ring, CP_REG(REG_A2XX_RB_COLOR_INFO));
673	OUT_RING(ring, A2XX_RB_COLOR_INFO_SWAP(1) | /* RB_COLOR_INFO */
674			A2XX_RB_COLOR_INFO_FORMAT(fd2_pipe2color(format)));
675
676	/* setup screen scissor for current tile (same for mem2gmem): */
677	OUT_PKT3(ring, CP_SET_CONSTANT, 3);
678	OUT_RING(ring, CP_REG(REG_A2XX_PA_SC_SCREEN_SCISSOR_TL));
679	OUT_RING(ring, A2XX_PA_SC_SCREEN_SCISSOR_TL_X(0) |
680			A2XX_PA_SC_SCREEN_SCISSOR_TL_Y(0));
681	OUT_RING(ring, A2XX_PA_SC_SCREEN_SCISSOR_BR_X(tile->bin_w) |
682			A2XX_PA_SC_SCREEN_SCISSOR_BR_Y(tile->bin_h));
683}
684
685/* before IB to rendering cmds: */
686static void
687fd2_emit_tile_renderprep(struct fd_batch *batch, struct fd_tile *tile)
688{
689	struct fd_context *ctx = batch->ctx;
690	struct fd2_context *fd2_ctx = fd2_context(ctx);
691	struct fd_ringbuffer *ring = batch->gmem;
692	struct pipe_framebuffer_state *pfb = &batch->framebuffer;
693	enum pipe_format format = pipe_surface_format(pfb->cbufs[0]);
694
695	OUT_PKT3(ring, CP_SET_CONSTANT, 2);
696	OUT_RING(ring, CP_REG(REG_A2XX_RB_COLOR_INFO));
697	OUT_RING(ring, A2XX_RB_COLOR_INFO_SWAP(fmt2swap(format)) |
698			A2XX_RB_COLOR_INFO_FORMAT(fd2_pipe2color(format)));
699
700	/* setup window scissor and offset for current tile (different
701	 * from mem2gmem):
702	 */
703	OUT_PKT3(ring, CP_SET_CONSTANT, 2);
704	OUT_RING(ring, CP_REG(REG_A2XX_PA_SC_WINDOW_OFFSET));
705	OUT_RING(ring, A2XX_PA_SC_WINDOW_OFFSET_X(-tile->xoff) |
706			A2XX_PA_SC_WINDOW_OFFSET_Y(-tile->yoff));
707
708	/* write SCISSOR_BR to memory so fast clear path can restore from it */
709	OUT_PKT3(ring, CP_MEM_WRITE, 2);
710	OUT_RELOC(ring, fd_resource(fd2_ctx->solid_vertexbuf)->bo, 60, 0, 0);
711	OUT_RING(ring, A2XX_PA_SC_SCREEN_SCISSOR_BR_X(tile->bin_w) |
712			A2XX_PA_SC_SCREEN_SCISSOR_BR_Y(tile->bin_h));
713
714	/* set the copy offset for gmem2mem */
715	OUT_PKT3(ring, CP_SET_CONSTANT, 2);
716	OUT_RING(ring, CP_REG(REG_A2XX_RB_COPY_DEST_OFFSET));
717	OUT_RING(ring, A2XX_RB_COPY_DEST_OFFSET_X(tile->xoff) |
718			A2XX_RB_COPY_DEST_OFFSET_Y(tile->yoff));
719
720	/* tile offset for gl_FragCoord on a20x (C64 in fragment shader) */
721	if (is_a20x(ctx->screen)) {
722		OUT_PKT3(ring, CP_SET_CONSTANT, 5);
723		OUT_RING(ring, 0x00000580);
724		OUT_RING(ring, fui(tile->xoff));
725		OUT_RING(ring, fui(tile->yoff));
726		OUT_RING(ring, fui(0.0f));
727		OUT_RING(ring, fui(0.0f));
728	}
729
730	if (use_hw_binning(batch)) {
731		struct fd_vsc_pipe *pipe = &ctx->vsc_pipe[tile->p];
732
733		OUT_PKT3(ring, CP_SET_CONSTANT, 2);
734		OUT_RING(ring, CP_REG(REG_A2XX_VGT_CURRENT_BIN_ID_MIN));
735		OUT_RING(ring, tile->n);
736
737		OUT_PKT3(ring, CP_SET_CONSTANT, 2);
738		OUT_RING(ring, CP_REG(REG_A2XX_VGT_CURRENT_BIN_ID_MAX));
739		OUT_RING(ring, tile->n);
740
741		/* TODO only emit this when tile->p changes */
742		OUT_PKT3(ring, CP_SET_DRAW_INIT_FLAGS, 1);
743		OUT_RELOC(ring, pipe->bo, 0, 0, 0);
744	}
745}
746
747void
748fd2_gmem_init(struct pipe_context *pctx)
749{
750	struct fd_context *ctx = fd_context(pctx);
751
752	ctx->emit_sysmem_prep = fd2_emit_sysmem_prep;
753	ctx->emit_tile_init = fd2_emit_tile_init;
754	ctx->emit_tile_prep = fd2_emit_tile_prep;
755	ctx->emit_tile_mem2gmem = fd2_emit_tile_mem2gmem;
756	ctx->emit_tile_renderprep = fd2_emit_tile_renderprep;
757	ctx->emit_tile_gmem2mem = fd2_emit_tile_gmem2mem;
758}
759