1/* 2 * Copyright (C) 2012 Rob Clark <robclark@freedesktop.org> 3 * 4 * Permission is hereby granted, free of charge, to any person obtaining a 5 * copy of this software and associated documentation files (the "Software"), 6 * to deal in the Software without restriction, including without limitation 7 * the rights to use, copy, modify, merge, publish, distribute, sublicense, 8 * and/or sell copies of the Software, and to permit persons to whom the 9 * Software is furnished to do so, subject to the following conditions: 10 * 11 * The above copyright notice and this permission notice (including the next 12 * paragraph) shall be included in all copies or substantial portions of the 13 * Software. 14 * 15 * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR 16 * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, 17 * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL 18 * THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER 19 * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, 20 * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE 21 * SOFTWARE. 22 * 23 * Authors: 24 * Rob Clark <robclark@freedesktop.org> 25 */ 26 27#include "pipe/p_state.h" 28#include "util/u_string.h" 29#include "util/u_memory.h" 30#include "util/u_inlines.h" 31 32#include "freedreno_draw.h" 33#include "freedreno_state.h" 34#include "freedreno_resource.h" 35 36#include "fd2_gmem.h" 37#include "fd2_context.h" 38#include "fd2_emit.h" 39#include "fd2_program.h" 40#include "fd2_util.h" 41#include "fd2_zsa.h" 42#include "fd2_draw.h" 43#include "instr-a2xx.h" 44 45static uint32_t fmt2swap(enum pipe_format format) 46{ 47 switch (format) { 48 case PIPE_FORMAT_B8G8R8A8_UNORM: 49 case PIPE_FORMAT_B8G8R8X8_UNORM: 50 case PIPE_FORMAT_B5G6R5_UNORM: 51 case PIPE_FORMAT_B5G5R5A1_UNORM: 52 case PIPE_FORMAT_B5G5R5X1_UNORM: 53 case PIPE_FORMAT_B4G4R4A4_UNORM: 54 case PIPE_FORMAT_B4G4R4X4_UNORM: 55 /* TODO probably some more.. */ 56 return 1; 57 default: 58 return 0; 59 } 60} 61 62static bool 63use_hw_binning(struct fd_batch *batch) 64{ 65 struct fd_gmem_stateobj *gmem = &batch->ctx->gmem; 66 67 /* we hardcoded a limit of 8 "pipes", we can increase this limit 68 * at the cost of a slightly larger command stream 69 * however very few cases will need more than 8 70 * gmem->num_vsc_pipes == 0 means empty batch (TODO: does it still happen?) 71 */ 72 if (gmem->num_vsc_pipes > 8 || !gmem->num_vsc_pipes) 73 return false; 74 75 /* only a20x hw binning is implement 76 * a22x is more like a3xx, but perhaps the a20x works? (TODO) 77 */ 78 if (!is_a20x(batch->ctx->screen)) 79 return false; 80 81 return fd_binning_enabled && ((gmem->nbins_x * gmem->nbins_y) > 2); 82} 83 84/* transfer from gmem to system memory (ie. normal RAM) */ 85 86static void 87emit_gmem2mem_surf(struct fd_batch *batch, uint32_t base, 88 struct pipe_surface *psurf) 89{ 90 struct fd_ringbuffer *ring = batch->tile_fini; 91 struct fd_resource *rsc = fd_resource(psurf->texture); 92 uint32_t swap = fmt2swap(psurf->format); 93 struct fd_resource_slice *slice = 94 fd_resource_slice(rsc, psurf->u.tex.level); 95 uint32_t offset = 96 fd_resource_offset(rsc, psurf->u.tex.level, psurf->u.tex.first_layer); 97 98 assert((slice->pitch & 31) == 0); 99 assert((offset & 0xfff) == 0); 100 101 if (!rsc->valid) 102 return; 103 104 OUT_PKT3(ring, CP_SET_CONSTANT, 2); 105 OUT_RING(ring, CP_REG(REG_A2XX_RB_COLOR_INFO)); 106 OUT_RING(ring, A2XX_RB_COLOR_INFO_SWAP(swap) | 107 A2XX_RB_COLOR_INFO_BASE(base) | 108 A2XX_RB_COLOR_INFO_FORMAT(fd2_pipe2color(psurf->format))); 109 110 OUT_PKT3(ring, CP_SET_CONSTANT, 5); 111 OUT_RING(ring, CP_REG(REG_A2XX_RB_COPY_CONTROL)); 112 OUT_RING(ring, 0x00000000); /* RB_COPY_CONTROL */ 113 OUT_RELOCW(ring, rsc->bo, offset, 0, 0); /* RB_COPY_DEST_BASE */ 114 OUT_RING(ring, slice->pitch >> 5); /* RB_COPY_DEST_PITCH */ 115 OUT_RING(ring, /* RB_COPY_DEST_INFO */ 116 A2XX_RB_COPY_DEST_INFO_FORMAT(fd2_pipe2color(psurf->format)) | 117 A2XX_RB_COPY_DEST_INFO_LINEAR | 118 A2XX_RB_COPY_DEST_INFO_SWAP(swap) | 119 A2XX_RB_COPY_DEST_INFO_WRITE_RED | 120 A2XX_RB_COPY_DEST_INFO_WRITE_GREEN | 121 A2XX_RB_COPY_DEST_INFO_WRITE_BLUE | 122 A2XX_RB_COPY_DEST_INFO_WRITE_ALPHA); 123 124 if (!is_a20x(batch->ctx->screen)) { 125 OUT_WFI (ring); 126 127 OUT_PKT3(ring, CP_SET_CONSTANT, 3); 128 OUT_RING(ring, CP_REG(REG_A2XX_VGT_MAX_VTX_INDX)); 129 OUT_RING(ring, 3); /* VGT_MAX_VTX_INDX */ 130 OUT_RING(ring, 0); /* VGT_MIN_VTX_INDX */ 131 } 132 133 fd_draw(batch, ring, DI_PT_RECTLIST, IGNORE_VISIBILITY, 134 DI_SRC_SEL_AUTO_INDEX, 3, 0, INDEX_SIZE_IGN, 0, 0, NULL); 135} 136 137static void 138prepare_tile_fini_ib(struct fd_batch *batch) 139{ 140 struct fd_context *ctx = batch->ctx; 141 struct fd2_context *fd2_ctx = fd2_context(ctx); 142 struct fd_gmem_stateobj *gmem = &ctx->gmem; 143 struct pipe_framebuffer_state *pfb = &batch->framebuffer; 144 struct fd_ringbuffer *ring; 145 146 batch->tile_fini = fd_submit_new_ringbuffer(batch->submit, 0x1000, 147 FD_RINGBUFFER_STREAMING); 148 ring = batch->tile_fini; 149 150 fd2_emit_vertex_bufs(ring, 0x9c, (struct fd2_vertex_buf[]) { 151 { .prsc = fd2_ctx->solid_vertexbuf, .size = 36 }, 152 }, 1); 153 154 OUT_PKT3(ring, CP_SET_CONSTANT, 2); 155 OUT_RING(ring, CP_REG(REG_A2XX_PA_SC_WINDOW_OFFSET)); 156 OUT_RING(ring, 0x00000000); /* PA_SC_WINDOW_OFFSET */ 157 158 OUT_PKT3(ring, CP_SET_CONSTANT, 2); 159 OUT_RING(ring, CP_REG(REG_A2XX_VGT_INDX_OFFSET)); 160 OUT_RING(ring, 0); 161 162 if (!is_a20x(ctx->screen)) { 163 OUT_PKT3(ring, CP_SET_CONSTANT, 2); 164 OUT_RING(ring, CP_REG(REG_A2XX_VGT_VERTEX_REUSE_BLOCK_CNTL)); 165 OUT_RING(ring, 0x0000028f); 166 } 167 168 fd2_program_emit(ctx, ring, &ctx->solid_prog); 169 170 OUT_PKT3(ring, CP_SET_CONSTANT, 2); 171 OUT_RING(ring, CP_REG(REG_A2XX_PA_SC_AA_MASK)); 172 OUT_RING(ring, 0x0000ffff); 173 174 OUT_PKT3(ring, CP_SET_CONSTANT, 2); 175 OUT_RING(ring, CP_REG(REG_A2XX_RB_DEPTHCONTROL)); 176 OUT_RING(ring, A2XX_RB_DEPTHCONTROL_EARLY_Z_ENABLE); 177 178 OUT_PKT3(ring, CP_SET_CONSTANT, 2); 179 OUT_RING(ring, CP_REG(REG_A2XX_PA_SU_SC_MODE_CNTL)); 180 OUT_RING(ring, A2XX_PA_SU_SC_MODE_CNTL_PROVOKING_VTX_LAST | /* PA_SU_SC_MODE_CNTL */ 181 A2XX_PA_SU_SC_MODE_CNTL_FRONT_PTYPE(PC_DRAW_TRIANGLES) | 182 A2XX_PA_SU_SC_MODE_CNTL_BACK_PTYPE(PC_DRAW_TRIANGLES)); 183 184 OUT_PKT3(ring, CP_SET_CONSTANT, 3); 185 OUT_RING(ring, CP_REG(REG_A2XX_PA_SC_WINDOW_SCISSOR_TL)); 186 OUT_RING(ring, xy2d(0, 0)); /* PA_SC_WINDOW_SCISSOR_TL */ 187 OUT_RING(ring, xy2d(pfb->width, pfb->height)); /* PA_SC_WINDOW_SCISSOR_BR */ 188 189 OUT_PKT3(ring, CP_SET_CONSTANT, 2); 190 OUT_RING(ring, CP_REG(REG_A2XX_PA_CL_CLIP_CNTL)); 191 OUT_RING(ring, 0x00000000); 192 193 OUT_PKT3(ring, CP_SET_CONSTANT, 5); 194 OUT_RING(ring, CP_REG(REG_A2XX_PA_CL_VPORT_XSCALE)); 195 OUT_RING(ring, fui((float) gmem->bin_w / 2.0)); /* XSCALE */ 196 OUT_RING(ring, fui((float) gmem->bin_w / 2.0)); /* XOFFSET */ 197 OUT_RING(ring, fui((float) gmem->bin_h / 2.0)); /* YSCALE */ 198 OUT_RING(ring, fui((float) gmem->bin_h / 2.0)); /* YOFFSET */ 199 200 OUT_PKT3(ring, CP_SET_CONSTANT, 2); 201 OUT_RING(ring, CP_REG(REG_A2XX_RB_MODECONTROL)); 202 OUT_RING(ring, A2XX_RB_MODECONTROL_EDRAM_MODE(EDRAM_COPY)); 203 204 if (batch->resolve & (FD_BUFFER_DEPTH | FD_BUFFER_STENCIL)) 205 emit_gmem2mem_surf(batch, gmem->zsbuf_base[0], pfb->zsbuf); 206 207 if (batch->resolve & FD_BUFFER_COLOR) 208 emit_gmem2mem_surf(batch, gmem->cbuf_base[0], pfb->cbufs[0]); 209 210 OUT_PKT3(ring, CP_SET_CONSTANT, 2); 211 OUT_RING(ring, CP_REG(REG_A2XX_RB_MODECONTROL)); 212 OUT_RING(ring, A2XX_RB_MODECONTROL_EDRAM_MODE(COLOR_DEPTH)); 213 214 if (!is_a20x(ctx->screen)) { 215 OUT_PKT3(ring, CP_SET_CONSTANT, 2); 216 OUT_RING(ring, CP_REG(REG_A2XX_VGT_VERTEX_REUSE_BLOCK_CNTL)); 217 OUT_RING(ring, 0x0000003b); 218 } 219} 220 221static void 222fd2_emit_tile_gmem2mem(struct fd_batch *batch, struct fd_tile *tile) 223{ 224 batch->ctx->emit_ib(batch->gmem, batch->tile_fini); 225} 226 227/* transfer from system memory to gmem */ 228 229static void 230emit_mem2gmem_surf(struct fd_batch *batch, uint32_t base, 231 struct pipe_surface *psurf) 232{ 233 struct fd_ringbuffer *ring = batch->gmem; 234 struct fd_resource *rsc = fd_resource(psurf->texture); 235 struct fd_resource_slice *slice = 236 fd_resource_slice(rsc, psurf->u.tex.level); 237 uint32_t offset = 238 fd_resource_offset(rsc, psurf->u.tex.level, psurf->u.tex.first_layer); 239 uint32_t swiz; 240 241 OUT_PKT3(ring, CP_SET_CONSTANT, 2); 242 OUT_RING(ring, CP_REG(REG_A2XX_RB_COLOR_INFO)); 243 OUT_RING(ring, A2XX_RB_COLOR_INFO_SWAP(fmt2swap(psurf->format)) | 244 A2XX_RB_COLOR_INFO_BASE(base) | 245 A2XX_RB_COLOR_INFO_FORMAT(fd2_pipe2color(psurf->format))); 246 247 swiz = fd2_tex_swiz(psurf->format, PIPE_SWIZZLE_X, PIPE_SWIZZLE_Y, 248 PIPE_SWIZZLE_Z, PIPE_SWIZZLE_W); 249 250 /* emit fb as a texture: */ 251 OUT_PKT3(ring, CP_SET_CONSTANT, 7); 252 OUT_RING(ring, 0x00010000); 253 OUT_RING(ring, A2XX_SQ_TEX_0_CLAMP_X(SQ_TEX_WRAP) | 254 A2XX_SQ_TEX_0_CLAMP_Y(SQ_TEX_WRAP) | 255 A2XX_SQ_TEX_0_CLAMP_Z(SQ_TEX_WRAP) | 256 A2XX_SQ_TEX_0_PITCH(slice->pitch)); 257 OUT_RELOC(ring, rsc->bo, offset, 258 fd2_pipe2surface(psurf->format) | 259 A2XX_SQ_TEX_1_CLAMP_POLICY(SQ_TEX_CLAMP_POLICY_OGL), 0); 260 OUT_RING(ring, A2XX_SQ_TEX_2_WIDTH(psurf->width - 1) | 261 A2XX_SQ_TEX_2_HEIGHT(psurf->height - 1)); 262 OUT_RING(ring, A2XX_SQ_TEX_3_MIP_FILTER(SQ_TEX_FILTER_BASEMAP) | 263 swiz | 264 A2XX_SQ_TEX_3_XY_MAG_FILTER(SQ_TEX_FILTER_POINT) | 265 A2XX_SQ_TEX_3_XY_MIN_FILTER(SQ_TEX_FILTER_POINT)); 266 OUT_RING(ring, 0x00000000); 267 OUT_RING(ring, A2XX_SQ_TEX_5_DIMENSION(SQ_TEX_DIMENSION_2D)); 268 269 if (!is_a20x(batch->ctx->screen)) { 270 OUT_PKT3(ring, CP_SET_CONSTANT, 3); 271 OUT_RING(ring, CP_REG(REG_A2XX_VGT_MAX_VTX_INDX)); 272 OUT_RING(ring, 3); /* VGT_MAX_VTX_INDX */ 273 OUT_RING(ring, 0); /* VGT_MIN_VTX_INDX */ 274 } 275 276 fd_draw(batch, ring, DI_PT_RECTLIST, IGNORE_VISIBILITY, 277 DI_SRC_SEL_AUTO_INDEX, 3, 0, INDEX_SIZE_IGN, 0, 0, NULL); 278} 279 280static void 281fd2_emit_tile_mem2gmem(struct fd_batch *batch, struct fd_tile *tile) 282{ 283 struct fd_context *ctx = batch->ctx; 284 struct fd2_context *fd2_ctx = fd2_context(ctx); 285 struct fd_gmem_stateobj *gmem = &ctx->gmem; 286 struct fd_ringbuffer *ring = batch->gmem; 287 struct pipe_framebuffer_state *pfb = &batch->framebuffer; 288 unsigned bin_w = tile->bin_w; 289 unsigned bin_h = tile->bin_h; 290 float x0, y0, x1, y1; 291 292 fd2_emit_vertex_bufs(ring, 0x9c, (struct fd2_vertex_buf[]) { 293 { .prsc = fd2_ctx->solid_vertexbuf, .size = 36 }, 294 { .prsc = fd2_ctx->solid_vertexbuf, .size = 24, .offset = 36 }, 295 }, 2); 296 297 /* write texture coordinates to vertexbuf: */ 298 x0 = ((float)tile->xoff) / ((float)pfb->width); 299 x1 = ((float)tile->xoff + bin_w) / ((float)pfb->width); 300 y0 = ((float)tile->yoff) / ((float)pfb->height); 301 y1 = ((float)tile->yoff + bin_h) / ((float)pfb->height); 302 OUT_PKT3(ring, CP_MEM_WRITE, 7); 303 OUT_RELOC(ring, fd_resource(fd2_ctx->solid_vertexbuf)->bo, 36, 0, 0); 304 OUT_RING(ring, fui(x0)); 305 OUT_RING(ring, fui(y0)); 306 OUT_RING(ring, fui(x1)); 307 OUT_RING(ring, fui(y0)); 308 OUT_RING(ring, fui(x0)); 309 OUT_RING(ring, fui(y1)); 310 311 OUT_PKT3(ring, CP_SET_CONSTANT, 2); 312 OUT_RING(ring, CP_REG(REG_A2XX_VGT_INDX_OFFSET)); 313 OUT_RING(ring, 0); 314 315 fd2_program_emit(ctx, ring, &ctx->blit_prog[0]); 316 317 OUT_PKT0(ring, REG_A2XX_TC_CNTL_STATUS, 1); 318 OUT_RING(ring, A2XX_TC_CNTL_STATUS_L2_INVALIDATE); 319 320 OUT_PKT3(ring, CP_SET_CONSTANT, 2); 321 OUT_RING(ring, CP_REG(REG_A2XX_RB_DEPTHCONTROL)); 322 OUT_RING(ring, A2XX_RB_DEPTHCONTROL_EARLY_Z_ENABLE); 323 324 OUT_PKT3(ring, CP_SET_CONSTANT, 2); 325 OUT_RING(ring, CP_REG(REG_A2XX_PA_SU_SC_MODE_CNTL)); 326 OUT_RING(ring, A2XX_PA_SU_SC_MODE_CNTL_PROVOKING_VTX_LAST | 327 A2XX_PA_SU_SC_MODE_CNTL_FRONT_PTYPE(PC_DRAW_TRIANGLES) | 328 A2XX_PA_SU_SC_MODE_CNTL_BACK_PTYPE(PC_DRAW_TRIANGLES)); 329 330 OUT_PKT3(ring, CP_SET_CONSTANT, 2); 331 OUT_RING(ring, CP_REG(REG_A2XX_PA_SC_AA_MASK)); 332 OUT_RING(ring, 0x0000ffff); 333 334 OUT_PKT3(ring, CP_SET_CONSTANT, 2); 335 OUT_RING(ring, CP_REG(REG_A2XX_RB_COLORCONTROL)); 336 OUT_RING(ring, A2XX_RB_COLORCONTROL_ALPHA_FUNC(FUNC_ALWAYS) | 337 A2XX_RB_COLORCONTROL_BLEND_DISABLE | 338 A2XX_RB_COLORCONTROL_ROP_CODE(12) | 339 A2XX_RB_COLORCONTROL_DITHER_MODE(DITHER_DISABLE) | 340 A2XX_RB_COLORCONTROL_DITHER_TYPE(DITHER_PIXEL)); 341 342 OUT_PKT3(ring, CP_SET_CONSTANT, 2); 343 OUT_RING(ring, CP_REG(REG_A2XX_RB_BLEND_CONTROL)); 344 OUT_RING(ring, A2XX_RB_BLEND_CONTROL_COLOR_SRCBLEND(FACTOR_ONE) | 345 A2XX_RB_BLEND_CONTROL_COLOR_COMB_FCN(BLEND2_DST_PLUS_SRC) | 346 A2XX_RB_BLEND_CONTROL_COLOR_DESTBLEND(FACTOR_ZERO) | 347 A2XX_RB_BLEND_CONTROL_ALPHA_SRCBLEND(FACTOR_ONE) | 348 A2XX_RB_BLEND_CONTROL_ALPHA_COMB_FCN(BLEND2_DST_PLUS_SRC) | 349 A2XX_RB_BLEND_CONTROL_ALPHA_DESTBLEND(FACTOR_ZERO)); 350 351 OUT_PKT3(ring, CP_SET_CONSTANT, 3); 352 OUT_RING(ring, CP_REG(REG_A2XX_PA_SC_WINDOW_SCISSOR_TL)); 353 OUT_RING(ring, A2XX_PA_SC_WINDOW_OFFSET_DISABLE | 354 xy2d(0,0)); /* PA_SC_WINDOW_SCISSOR_TL */ 355 OUT_RING(ring, xy2d(bin_w, bin_h)); /* PA_SC_WINDOW_SCISSOR_BR */ 356 357 OUT_PKT3(ring, CP_SET_CONSTANT, 5); 358 OUT_RING(ring, CP_REG(REG_A2XX_PA_CL_VPORT_XSCALE)); 359 OUT_RING(ring, fui((float)bin_w/2.0)); /* PA_CL_VPORT_XSCALE */ 360 OUT_RING(ring, fui((float)bin_w/2.0)); /* PA_CL_VPORT_XOFFSET */ 361 OUT_RING(ring, fui(-(float)bin_h/2.0)); /* PA_CL_VPORT_YSCALE */ 362 OUT_RING(ring, fui((float)bin_h/2.0)); /* PA_CL_VPORT_YOFFSET */ 363 364 OUT_PKT3(ring, CP_SET_CONSTANT, 2); 365 OUT_RING(ring, CP_REG(REG_A2XX_PA_CL_VTE_CNTL)); 366 OUT_RING(ring, A2XX_PA_CL_VTE_CNTL_VTX_XY_FMT | 367 A2XX_PA_CL_VTE_CNTL_VTX_Z_FMT | // XXX check this??? 368 A2XX_PA_CL_VTE_CNTL_VPORT_X_SCALE_ENA | 369 A2XX_PA_CL_VTE_CNTL_VPORT_X_OFFSET_ENA | 370 A2XX_PA_CL_VTE_CNTL_VPORT_Y_SCALE_ENA | 371 A2XX_PA_CL_VTE_CNTL_VPORT_Y_OFFSET_ENA); 372 373 OUT_PKT3(ring, CP_SET_CONSTANT, 2); 374 OUT_RING(ring, CP_REG(REG_A2XX_PA_CL_CLIP_CNTL)); 375 OUT_RING(ring, 0x00000000); 376 377 if (fd_gmem_needs_restore(batch, tile, FD_BUFFER_DEPTH | FD_BUFFER_STENCIL)) 378 emit_mem2gmem_surf(batch, gmem->zsbuf_base[0], pfb->zsbuf); 379 380 if (fd_gmem_needs_restore(batch, tile, FD_BUFFER_COLOR)) 381 emit_mem2gmem_surf(batch, gmem->cbuf_base[0], pfb->cbufs[0]); 382 383 OUT_PKT3(ring, CP_SET_CONSTANT, 2); 384 OUT_RING(ring, CP_REG(REG_A2XX_PA_CL_VTE_CNTL)); 385 OUT_RING(ring, A2XX_PA_CL_VTE_CNTL_VTX_W0_FMT | 386 A2XX_PA_CL_VTE_CNTL_VPORT_X_SCALE_ENA | 387 A2XX_PA_CL_VTE_CNTL_VPORT_X_OFFSET_ENA | 388 A2XX_PA_CL_VTE_CNTL_VPORT_Y_SCALE_ENA | 389 A2XX_PA_CL_VTE_CNTL_VPORT_Y_OFFSET_ENA | 390 A2XX_PA_CL_VTE_CNTL_VPORT_Z_SCALE_ENA | 391 A2XX_PA_CL_VTE_CNTL_VPORT_Z_OFFSET_ENA); 392 393 /* TODO blob driver seems to toss in a CACHE_FLUSH after each DRAW_INDX.. */ 394} 395 396static void 397patch_draws(struct fd_batch *batch, enum pc_di_vis_cull_mode vismode) 398{ 399 unsigned i; 400 401 if (!is_a20x(batch->ctx->screen)) { 402 /* identical to a3xx */ 403 for (i = 0; i < fd_patch_num_elements(&batch->draw_patches); i++) { 404 struct fd_cs_patch *patch = fd_patch_element(&batch->draw_patches, i); 405 *patch->cs = patch->val | DRAW(0, 0, 0, vismode, 0); 406 } 407 util_dynarray_resize(&batch->draw_patches, 0); 408 return; 409 } 410 411 if (vismode == USE_VISIBILITY) 412 return; 413 414 for (i = 0; i < batch->draw_patches.size / sizeof(uint32_t*); i++) { 415 uint32_t *ptr = *util_dynarray_element(&batch->draw_patches, uint32_t*, i); 416 unsigned cnt = ptr[0] >> 16 & 0xfff; /* 5 with idx buffer, 3 without */ 417 418 /* convert CP_DRAW_INDX_BIN to a CP_DRAW_INDX 419 * replace first two DWORDS with NOP and move the rest down 420 * (we don't want to have to move the idx buffer reloc) 421 */ 422 ptr[0] = CP_TYPE3_PKT | (CP_NOP << 8); 423 ptr[1] = 0x00000000; 424 425 ptr[4] = ptr[2] & ~(1 << 14 | 1 << 15); /* remove cull_enable bits */ 426 ptr[2] = CP_TYPE3_PKT | ((cnt-2) << 16) | (CP_DRAW_INDX << 8); 427 ptr[3] = 0x00000000; 428 } 429} 430 431static void 432fd2_emit_sysmem_prep(struct fd_batch *batch) 433{ 434 struct fd_context *ctx = batch->ctx; 435 struct fd_ringbuffer *ring = batch->gmem; 436 struct pipe_framebuffer_state *pfb = &batch->framebuffer; 437 struct pipe_surface *psurf = pfb->cbufs[0]; 438 439 if (!psurf) 440 return; 441 442 struct fd_resource *rsc = fd_resource(psurf->texture); 443 struct fd_resource_slice *slice = 444 fd_resource_slice(rsc, psurf->u.tex.level); 445 uint32_t offset = 446 fd_resource_offset(rsc, psurf->u.tex.level, psurf->u.tex.first_layer); 447 448 assert((slice->pitch & 31) == 0); 449 assert((offset & 0xfff) == 0); 450 451 fd2_emit_restore(ctx, ring); 452 453 OUT_PKT3(ring, CP_SET_CONSTANT, 2); 454 OUT_RING(ring, CP_REG(REG_A2XX_RB_SURFACE_INFO)); 455 OUT_RING(ring, A2XX_RB_SURFACE_INFO_SURFACE_PITCH(slice->pitch)); 456 457 OUT_PKT3(ring, CP_SET_CONSTANT, 2); 458 OUT_RING(ring, CP_REG(REG_A2XX_RB_COLOR_INFO)); 459 OUT_RELOCW(ring, rsc->bo, offset, A2XX_RB_COLOR_INFO_LINEAR | 460 A2XX_RB_COLOR_INFO_SWAP(fmt2swap(psurf->format)) | 461 A2XX_RB_COLOR_INFO_FORMAT(fd2_pipe2color(psurf->format)), 0); 462 463 OUT_PKT3(ring, CP_SET_CONSTANT, 3); 464 OUT_RING(ring, CP_REG(REG_A2XX_PA_SC_SCREEN_SCISSOR_TL)); 465 OUT_RING(ring, A2XX_PA_SC_SCREEN_SCISSOR_TL_WINDOW_OFFSET_DISABLE); 466 OUT_RING(ring, A2XX_PA_SC_SCREEN_SCISSOR_BR_X(pfb->width) | 467 A2XX_PA_SC_SCREEN_SCISSOR_BR_Y(pfb->height)); 468 469 OUT_PKT3(ring, CP_SET_CONSTANT, 2); 470 OUT_RING(ring, CP_REG(REG_A2XX_PA_SC_WINDOW_OFFSET)); 471 OUT_RING(ring, A2XX_PA_SC_WINDOW_OFFSET_X(0) | 472 A2XX_PA_SC_WINDOW_OFFSET_Y(0)); 473 474 patch_draws(batch, IGNORE_VISIBILITY); 475 util_dynarray_resize(&batch->draw_patches, 0); 476 util_dynarray_resize(&batch->shader_patches, 0); 477} 478 479/* before first tile */ 480static void 481fd2_emit_tile_init(struct fd_batch *batch) 482{ 483 struct fd_context *ctx = batch->ctx; 484 struct fd_ringbuffer *ring = batch->gmem; 485 struct pipe_framebuffer_state *pfb = &batch->framebuffer; 486 struct fd_gmem_stateobj *gmem = &ctx->gmem; 487 enum pipe_format format = pipe_surface_format(pfb->cbufs[0]); 488 uint32_t reg; 489 490 fd2_emit_restore(ctx, ring); 491 492 prepare_tile_fini_ib(batch); 493 494 OUT_PKT3(ring, CP_SET_CONSTANT, 4); 495 OUT_RING(ring, CP_REG(REG_A2XX_RB_SURFACE_INFO)); 496 OUT_RING(ring, gmem->bin_w); /* RB_SURFACE_INFO */ 497 OUT_RING(ring, A2XX_RB_COLOR_INFO_SWAP(fmt2swap(format)) | 498 A2XX_RB_COLOR_INFO_FORMAT(fd2_pipe2color(format))); 499 reg = A2XX_RB_DEPTH_INFO_DEPTH_BASE(gmem->zsbuf_base[0]); 500 if (pfb->zsbuf) 501 reg |= A2XX_RB_DEPTH_INFO_DEPTH_FORMAT(fd_pipe2depth(pfb->zsbuf->format)); 502 OUT_RING(ring, reg); /* RB_DEPTH_INFO */ 503 504 /* fast clear patches */ 505 int depth_size = -1; 506 int color_size = -1; 507 508 if (pfb->cbufs[0]) 509 color_size = util_format_get_blocksizebits(format) == 32 ? 4 : 2; 510 511 if (pfb->zsbuf) 512 depth_size = fd_pipe2depth(pfb->zsbuf->format) == 1 ? 4 : 2; 513 514 for (int i = 0; i < fd_patch_num_elements(&batch->gmem_patches); i++) { 515 struct fd_cs_patch *patch = fd_patch_element(&batch->gmem_patches, i); 516 uint32_t color_base = 0, depth_base = gmem->zsbuf_base[0]; 517 uint32_t size, lines; 518 519 /* note: 1 "line" is 512 bytes in both color/depth areas (1K total) */ 520 switch (patch->val) { 521 case GMEM_PATCH_FASTCLEAR_COLOR: 522 size = align(gmem->bin_w * gmem->bin_h * color_size, 0x8000); 523 lines = size / 1024; 524 depth_base = size / 2; 525 break; 526 case GMEM_PATCH_FASTCLEAR_DEPTH: 527 size = align(gmem->bin_w * gmem->bin_h * depth_size, 0x8000); 528 lines = size / 1024; 529 color_base = depth_base; 530 depth_base = depth_base + size / 2; 531 break; 532 case GMEM_PATCH_FASTCLEAR_COLOR_DEPTH: 533 lines = align(gmem->bin_w * gmem->bin_h * color_size * 2, 0x8000) / 1024; 534 break; 535 case GMEM_PATCH_RESTORE_INFO: 536 patch->cs[0] = gmem->bin_w; 537 patch->cs[1] = A2XX_RB_COLOR_INFO_SWAP(fmt2swap(format)) | 538 A2XX_RB_COLOR_INFO_FORMAT(fd2_pipe2color(format)); 539 patch->cs[2] = A2XX_RB_DEPTH_INFO_DEPTH_BASE(gmem->zsbuf_base[0]); 540 if (pfb->zsbuf) 541 patch->cs[2] |= A2XX_RB_DEPTH_INFO_DEPTH_FORMAT(fd_pipe2depth(pfb->zsbuf->format)); 542 continue; 543 default: 544 continue; 545 } 546 547 patch->cs[0] = A2XX_PA_SC_SCREEN_SCISSOR_BR_X(32) | 548 A2XX_PA_SC_SCREEN_SCISSOR_BR_Y(lines); 549 patch->cs[4] = A2XX_RB_COLOR_INFO_BASE(color_base) | 550 A2XX_RB_COLOR_INFO_FORMAT(COLORX_8_8_8_8); 551 patch->cs[5] = A2XX_RB_DEPTH_INFO_DEPTH_BASE(depth_base) | 552 A2XX_RB_DEPTH_INFO_DEPTH_FORMAT(1); 553 } 554 util_dynarray_resize(&batch->gmem_patches, 0); 555 556 /* set to zero, for some reason hardware doesn't like certain values */ 557 OUT_PKT3(ring, CP_SET_CONSTANT, 2); 558 OUT_RING(ring, CP_REG(REG_A2XX_VGT_CURRENT_BIN_ID_MIN)); 559 OUT_RING(ring, 0); 560 561 OUT_PKT3(ring, CP_SET_CONSTANT, 2); 562 OUT_RING(ring, CP_REG(REG_A2XX_VGT_CURRENT_BIN_ID_MAX)); 563 OUT_RING(ring, 0); 564 565 if (use_hw_binning(batch)) { 566 /* patch out unneeded memory exports by changing EXEC CF to EXEC_END 567 * 568 * in the shader compiler, we guarantee that the shader ends with 569 * a specific pattern of ALLOC/EXEC CF pairs for the hw binning exports 570 * 571 * the since patches point only to dwords and CFs are 1.5 dwords 572 * the patch is aligned and might point to a ALLOC CF 573 */ 574 for (int i = 0; i < batch->shader_patches.size / sizeof(void*); i++) { 575 instr_cf_t *cf = 576 *util_dynarray_element(&batch->shader_patches, instr_cf_t*, i); 577 if (cf->opc == ALLOC) 578 cf++; 579 assert(cf->opc == EXEC); 580 assert(cf[ctx->screen->num_vsc_pipes*2-2].opc == EXEC_END); 581 cf[2*(gmem->num_vsc_pipes-1)].opc = EXEC_END; 582 } 583 584 patch_draws(batch, USE_VISIBILITY); 585 586 /* initialize shader constants for the binning memexport */ 587 OUT_PKT3(ring, CP_SET_CONSTANT, 1 + gmem->num_vsc_pipes * 4); 588 OUT_RING(ring, 0x0000000C); 589 590 for (int i = 0; i < gmem->num_vsc_pipes; i++) { 591 struct fd_vsc_pipe *pipe = &ctx->vsc_pipe[i]; 592 593 /* XXX we know how large this needs to be.. 594 * should do some sort of realloc 595 * it should be ctx->batch->num_vertices bytes large 596 * with this size it will break with more than 256k vertices.. 597 */ 598 if (!pipe->bo) { 599 pipe->bo = fd_bo_new(ctx->dev, 0x40000, 600 DRM_FREEDRENO_GEM_TYPE_KMEM, "vsc_pipe[%u]", i); 601 } 602 603 /* memory export address (export32): 604 * .x: (base_address >> 2) | 0x40000000 (?) 605 * .y: index (float) - set by shader 606 * .z: 0x4B00D000 (?) 607 * .w: 0x4B000000 (?) | max_index (?) 608 */ 609 OUT_RELOCW(ring, pipe->bo, 0, 0x40000000, -2); 610 OUT_RING(ring, 0x00000000); 611 OUT_RING(ring, 0x4B00D000); 612 OUT_RING(ring, 0x4B000000 | 0x40000); 613 } 614 615 OUT_PKT3(ring, CP_SET_CONSTANT, 1 + gmem->num_vsc_pipes * 8); 616 OUT_RING(ring, 0x0000018C); 617 618 for (int i = 0; i < gmem->num_vsc_pipes; i++) { 619 struct fd_vsc_pipe *pipe = &ctx->vsc_pipe[i]; 620 float off_x, off_y, mul_x, mul_y; 621 622 /* const to tranform from [-1,1] to bin coordinates for this pipe 623 * for x/y, [0,256/2040] = 0, [256/2040,512/2040] = 1, etc 624 * 8 possible values on x/y axis, 625 * to clip at binning stage: only use center 6x6 626 * TODO: set the z parameters too so that hw binning 627 * can clip primitives in Z too 628 */ 629 630 mul_x = 1.0f / (float) (gmem->bin_w * 8); 631 mul_y = 1.0f / (float) (gmem->bin_h * 8); 632 off_x = -pipe->x * (1.0/8.0f) + 0.125f - mul_x * gmem->minx; 633 off_y = -pipe->y * (1.0/8.0f) + 0.125f - mul_y * gmem->miny; 634 635 OUT_RING(ring, fui(off_x * (256.0f/255.0f))); 636 OUT_RING(ring, fui(off_y * (256.0f/255.0f))); 637 OUT_RING(ring, 0x3f000000); 638 OUT_RING(ring, fui(0.0f)); 639 640 OUT_RING(ring, fui(mul_x * (256.0f/255.0f))); 641 OUT_RING(ring, fui(mul_y * (256.0f/255.0f))); 642 OUT_RING(ring, fui(0.0f)); 643 OUT_RING(ring, fui(0.0f)); 644 } 645 646 OUT_PKT3(ring, CP_SET_CONSTANT, 2); 647 OUT_RING(ring, CP_REG(REG_A2XX_VGT_VERTEX_REUSE_BLOCK_CNTL)); 648 OUT_RING(ring, 0); 649 650 ctx->emit_ib(ring, batch->binning); 651 652 OUT_PKT3(ring, CP_SET_CONSTANT, 2); 653 OUT_RING(ring, CP_REG(REG_A2XX_VGT_VERTEX_REUSE_BLOCK_CNTL)); 654 OUT_RING(ring, 0x00000002); 655 } else { 656 patch_draws(batch, IGNORE_VISIBILITY); 657 } 658 659 util_dynarray_resize(&batch->draw_patches, 0); 660 util_dynarray_resize(&batch->shader_patches, 0); 661} 662 663/* before mem2gmem */ 664static void 665fd2_emit_tile_prep(struct fd_batch *batch, struct fd_tile *tile) 666{ 667 struct fd_ringbuffer *ring = batch->gmem; 668 struct pipe_framebuffer_state *pfb = &batch->framebuffer; 669 enum pipe_format format = pipe_surface_format(pfb->cbufs[0]); 670 671 OUT_PKT3(ring, CP_SET_CONSTANT, 2); 672 OUT_RING(ring, CP_REG(REG_A2XX_RB_COLOR_INFO)); 673 OUT_RING(ring, A2XX_RB_COLOR_INFO_SWAP(1) | /* RB_COLOR_INFO */ 674 A2XX_RB_COLOR_INFO_FORMAT(fd2_pipe2color(format))); 675 676 /* setup screen scissor for current tile (same for mem2gmem): */ 677 OUT_PKT3(ring, CP_SET_CONSTANT, 3); 678 OUT_RING(ring, CP_REG(REG_A2XX_PA_SC_SCREEN_SCISSOR_TL)); 679 OUT_RING(ring, A2XX_PA_SC_SCREEN_SCISSOR_TL_X(0) | 680 A2XX_PA_SC_SCREEN_SCISSOR_TL_Y(0)); 681 OUT_RING(ring, A2XX_PA_SC_SCREEN_SCISSOR_BR_X(tile->bin_w) | 682 A2XX_PA_SC_SCREEN_SCISSOR_BR_Y(tile->bin_h)); 683} 684 685/* before IB to rendering cmds: */ 686static void 687fd2_emit_tile_renderprep(struct fd_batch *batch, struct fd_tile *tile) 688{ 689 struct fd_context *ctx = batch->ctx; 690 struct fd2_context *fd2_ctx = fd2_context(ctx); 691 struct fd_ringbuffer *ring = batch->gmem; 692 struct pipe_framebuffer_state *pfb = &batch->framebuffer; 693 enum pipe_format format = pipe_surface_format(pfb->cbufs[0]); 694 695 OUT_PKT3(ring, CP_SET_CONSTANT, 2); 696 OUT_RING(ring, CP_REG(REG_A2XX_RB_COLOR_INFO)); 697 OUT_RING(ring, A2XX_RB_COLOR_INFO_SWAP(fmt2swap(format)) | 698 A2XX_RB_COLOR_INFO_FORMAT(fd2_pipe2color(format))); 699 700 /* setup window scissor and offset for current tile (different 701 * from mem2gmem): 702 */ 703 OUT_PKT3(ring, CP_SET_CONSTANT, 2); 704 OUT_RING(ring, CP_REG(REG_A2XX_PA_SC_WINDOW_OFFSET)); 705 OUT_RING(ring, A2XX_PA_SC_WINDOW_OFFSET_X(-tile->xoff) | 706 A2XX_PA_SC_WINDOW_OFFSET_Y(-tile->yoff)); 707 708 /* write SCISSOR_BR to memory so fast clear path can restore from it */ 709 OUT_PKT3(ring, CP_MEM_WRITE, 2); 710 OUT_RELOC(ring, fd_resource(fd2_ctx->solid_vertexbuf)->bo, 60, 0, 0); 711 OUT_RING(ring, A2XX_PA_SC_SCREEN_SCISSOR_BR_X(tile->bin_w) | 712 A2XX_PA_SC_SCREEN_SCISSOR_BR_Y(tile->bin_h)); 713 714 /* set the copy offset for gmem2mem */ 715 OUT_PKT3(ring, CP_SET_CONSTANT, 2); 716 OUT_RING(ring, CP_REG(REG_A2XX_RB_COPY_DEST_OFFSET)); 717 OUT_RING(ring, A2XX_RB_COPY_DEST_OFFSET_X(tile->xoff) | 718 A2XX_RB_COPY_DEST_OFFSET_Y(tile->yoff)); 719 720 /* tile offset for gl_FragCoord on a20x (C64 in fragment shader) */ 721 if (is_a20x(ctx->screen)) { 722 OUT_PKT3(ring, CP_SET_CONSTANT, 5); 723 OUT_RING(ring, 0x00000580); 724 OUT_RING(ring, fui(tile->xoff)); 725 OUT_RING(ring, fui(tile->yoff)); 726 OUT_RING(ring, fui(0.0f)); 727 OUT_RING(ring, fui(0.0f)); 728 } 729 730 if (use_hw_binning(batch)) { 731 struct fd_vsc_pipe *pipe = &ctx->vsc_pipe[tile->p]; 732 733 OUT_PKT3(ring, CP_SET_CONSTANT, 2); 734 OUT_RING(ring, CP_REG(REG_A2XX_VGT_CURRENT_BIN_ID_MIN)); 735 OUT_RING(ring, tile->n); 736 737 OUT_PKT3(ring, CP_SET_CONSTANT, 2); 738 OUT_RING(ring, CP_REG(REG_A2XX_VGT_CURRENT_BIN_ID_MAX)); 739 OUT_RING(ring, tile->n); 740 741 /* TODO only emit this when tile->p changes */ 742 OUT_PKT3(ring, CP_SET_DRAW_INIT_FLAGS, 1); 743 OUT_RELOC(ring, pipe->bo, 0, 0, 0); 744 } 745} 746 747void 748fd2_gmem_init(struct pipe_context *pctx) 749{ 750 struct fd_context *ctx = fd_context(pctx); 751 752 ctx->emit_sysmem_prep = fd2_emit_sysmem_prep; 753 ctx->emit_tile_init = fd2_emit_tile_init; 754 ctx->emit_tile_prep = fd2_emit_tile_prep; 755 ctx->emit_tile_mem2gmem = fd2_emit_tile_mem2gmem; 756 ctx->emit_tile_renderprep = fd2_emit_tile_renderprep; 757 ctx->emit_tile_gmem2mem = fd2_emit_tile_gmem2mem; 758} 759