1/* 2 * Copyright (C) 2016 Rob Clark <robclark@freedesktop.org> 3 * 4 * Permission is hereby granted, free of charge, to any person obtaining a 5 * copy of this software and associated documentation files (the "Software"), 6 * to deal in the Software without restriction, including without limitation 7 * the rights to use, copy, modify, merge, publish, distribute, sublicense, 8 * and/or sell copies of the Software, and to permit persons to whom the 9 * Software is furnished to do so, subject to the following conditions: 10 * 11 * The above copyright notice and this permission notice (including the next 12 * paragraph) shall be included in all copies or substantial portions of the 13 * Software. 14 * 15 * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR 16 * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, 17 * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL 18 * THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER 19 * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, 20 * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE 21 * SOFTWARE. 22 * 23 * Authors: 24 * Rob Clark <robclark@freedesktop.org> 25 */ 26 27#include "pipe/p_state.h" 28#include "util/format/u_format.h" 29#include "util/u_inlines.h" 30#include "util/u_memory.h" 31#include "util/u_string.h" 32 33#include "freedreno_draw.h" 34#include "freedreno_resource.h" 35#include "freedreno_state.h" 36 37#include "fd5_context.h" 38#include "fd5_draw.h" 39#include "fd5_emit.h" 40#include "fd5_format.h" 41#include "fd5_gmem.h" 42#include "fd5_program.h" 43#include "fd5_zsa.h" 44 45static void 46emit_mrt(struct fd_ringbuffer *ring, unsigned nr_bufs, 47 struct pipe_surface **bufs, const struct fd_gmem_stateobj *gmem) 48{ 49 enum a5xx_tile_mode tile_mode; 50 unsigned i; 51 52 for (i = 0; i < A5XX_MAX_RENDER_TARGETS; i++) { 53 enum a5xx_color_fmt format = 0; 54 enum a3xx_color_swap swap = WZYX; 55 bool srgb = false, sint = false, uint = false; 56 struct fd_resource *rsc = NULL; 57 struct fdl_slice *slice = NULL; 58 uint32_t stride = 0; 59 uint32_t size = 0; 60 uint32_t base = 0; 61 uint32_t offset = 0; 62 63 if (gmem) { 64 tile_mode = TILE5_2; 65 } else { 66 tile_mode = TILE5_LINEAR; 67 } 68 69 if ((i < nr_bufs) && bufs[i]) { 70 struct pipe_surface *psurf = bufs[i]; 71 enum pipe_format pformat = psurf->format; 72 73 rsc = fd_resource(psurf->texture); 74 75 slice = fd_resource_slice(rsc, psurf->u.tex.level); 76 format = fd5_pipe2color(pformat); 77 swap = fd5_pipe2swap(pformat); 78 srgb = util_format_is_srgb(pformat); 79 sint = util_format_is_pure_sint(pformat); 80 uint = util_format_is_pure_uint(pformat); 81 82 debug_assert(psurf->u.tex.first_layer == psurf->u.tex.last_layer); 83 84 offset = fd_resource_offset(rsc, psurf->u.tex.level, 85 psurf->u.tex.first_layer); 86 87 if (gmem) { 88 stride = gmem->bin_w * gmem->cbuf_cpp[i]; 89 size = stride * gmem->bin_h; 90 base = gmem->cbuf_base[i]; 91 } else { 92 stride = fd_resource_pitch(rsc, psurf->u.tex.level); 93 size = slice->size0; 94 95 tile_mode = 96 fd_resource_tile_mode(psurf->texture, psurf->u.tex.level); 97 } 98 } 99 100 OUT_PKT4(ring, REG_A5XX_RB_MRT_BUF_INFO(i), 5); 101 OUT_RING( 102 ring, 103 A5XX_RB_MRT_BUF_INFO_COLOR_FORMAT(format) | 104 A5XX_RB_MRT_BUF_INFO_COLOR_TILE_MODE(tile_mode) | 105 A5XX_RB_MRT_BUF_INFO_COLOR_SWAP(swap) | 106 COND(gmem, 107 0x800) | /* XXX 0x1000 for RECTLIST clear, 0x0 for BLIT.. */ 108 COND(srgb, A5XX_RB_MRT_BUF_INFO_COLOR_SRGB)); 109 OUT_RING(ring, A5XX_RB_MRT_PITCH(stride)); 110 OUT_RING(ring, A5XX_RB_MRT_ARRAY_PITCH(size)); 111 if (gmem || (i >= nr_bufs) || !bufs[i]) { 112 OUT_RING(ring, base); /* RB_MRT[i].BASE_LO */ 113 OUT_RING(ring, 0x00000000); /* RB_MRT[i].BASE_HI */ 114 } else { 115 debug_assert((offset + size) <= fd_bo_size(rsc->bo)); 116 OUT_RELOC(ring, rsc->bo, offset, 0, 0); /* BASE_LO/HI */ 117 } 118 119 OUT_PKT4(ring, REG_A5XX_SP_FS_MRT_REG(i), 1); 120 OUT_RING(ring, A5XX_SP_FS_MRT_REG_COLOR_FORMAT(format) | 121 COND(sint, A5XX_SP_FS_MRT_REG_COLOR_SINT) | 122 COND(uint, A5XX_SP_FS_MRT_REG_COLOR_UINT) | 123 COND(srgb, A5XX_SP_FS_MRT_REG_COLOR_SRGB)); 124 125 /* when we support UBWC, these would be the system memory 126 * addr/pitch/etc: 127 */ 128 OUT_PKT4(ring, REG_A5XX_RB_MRT_FLAG_BUFFER(i), 4); 129 OUT_RING(ring, 0x00000000); /* RB_MRT_FLAG_BUFFER[i].ADDR_LO */ 130 OUT_RING(ring, 0x00000000); /* RB_MRT_FLAG_BUFFER[i].ADDR_HI */ 131 OUT_RING(ring, A5XX_RB_MRT_FLAG_BUFFER_PITCH(0)); 132 OUT_RING(ring, A5XX_RB_MRT_FLAG_BUFFER_ARRAY_PITCH(0)); 133 } 134} 135 136static void 137emit_zs(struct fd_ringbuffer *ring, struct pipe_surface *zsbuf, 138 const struct fd_gmem_stateobj *gmem) 139{ 140 if (zsbuf) { 141 struct fd_resource *rsc = fd_resource(zsbuf->texture); 142 enum a5xx_depth_format fmt = fd5_pipe2depth(zsbuf->format); 143 uint32_t cpp = rsc->layout.cpp; 144 uint32_t stride = 0; 145 uint32_t size = 0; 146 147 if (gmem) { 148 stride = cpp * gmem->bin_w; 149 size = stride * gmem->bin_h; 150 } else { 151 stride = fd_resource_pitch(rsc, 0); 152 size = fd_resource_slice(rsc, 0)->size0; 153 } 154 155 OUT_PKT4(ring, REG_A5XX_RB_DEPTH_BUFFER_INFO, 5); 156 OUT_RING(ring, A5XX_RB_DEPTH_BUFFER_INFO_DEPTH_FORMAT(fmt)); 157 if (gmem) { 158 OUT_RING(ring, gmem->zsbuf_base[0]); /* RB_DEPTH_BUFFER_BASE_LO */ 159 OUT_RING(ring, 0x00000000); /* RB_DEPTH_BUFFER_BASE_HI */ 160 } else { 161 OUT_RELOC(ring, rsc->bo, 0, 0, 0); /* RB_DEPTH_BUFFER_BASE_LO/HI */ 162 } 163 OUT_RING(ring, A5XX_RB_DEPTH_BUFFER_PITCH(stride)); 164 OUT_RING(ring, A5XX_RB_DEPTH_BUFFER_ARRAY_PITCH(size)); 165 166 OUT_PKT4(ring, REG_A5XX_GRAS_SU_DEPTH_BUFFER_INFO, 1); 167 OUT_RING(ring, A5XX_GRAS_SU_DEPTH_BUFFER_INFO_DEPTH_FORMAT(fmt)); 168 169 OUT_PKT4(ring, REG_A5XX_RB_DEPTH_FLAG_BUFFER_BASE_LO, 3); 170 OUT_RING(ring, 0x00000000); /* RB_DEPTH_FLAG_BUFFER_BASE_LO */ 171 OUT_RING(ring, 0x00000000); /* RB_DEPTH_FLAG_BUFFER_BASE_HI */ 172 OUT_RING(ring, 0x00000000); /* RB_DEPTH_FLAG_BUFFER_PITCH */ 173 174 if (rsc->lrz) { 175 OUT_PKT4(ring, REG_A5XX_GRAS_LRZ_BUFFER_BASE_LO, 3); 176 OUT_RELOC(ring, rsc->lrz, 0x1000, 0, 0); 177 OUT_RING(ring, A5XX_GRAS_LRZ_BUFFER_PITCH(rsc->lrz_pitch)); 178 179 OUT_PKT4(ring, REG_A5XX_GRAS_LRZ_FAST_CLEAR_BUFFER_BASE_LO, 2); 180 OUT_RELOC(ring, rsc->lrz, 0, 0, 0); 181 } else { 182 OUT_PKT4(ring, REG_A5XX_GRAS_LRZ_BUFFER_BASE_LO, 3); 183 OUT_RING(ring, 0x00000000); 184 OUT_RING(ring, 0x00000000); 185 OUT_RING(ring, 0x00000000); /* GRAS_LRZ_BUFFER_PITCH */ 186 187 OUT_PKT4(ring, REG_A5XX_GRAS_LRZ_FAST_CLEAR_BUFFER_BASE_LO, 2); 188 OUT_RING(ring, 0x00000000); 189 OUT_RING(ring, 0x00000000); 190 } 191 192 if (rsc->stencil) { 193 if (gmem) { 194 stride = 1 * gmem->bin_w; 195 size = stride * gmem->bin_h; 196 } else { 197 stride = fd_resource_pitch(rsc->stencil, 0); 198 size = fd_resource_slice(rsc->stencil, 0)->size0; 199 } 200 201 OUT_PKT4(ring, REG_A5XX_RB_STENCIL_INFO, 5); 202 OUT_RING(ring, A5XX_RB_STENCIL_INFO_SEPARATE_STENCIL); 203 if (gmem) { 204 OUT_RING(ring, gmem->zsbuf_base[1]); /* RB_STENCIL_BASE_LO */ 205 OUT_RING(ring, 0x00000000); /* RB_STENCIL_BASE_HI */ 206 } else { 207 OUT_RELOC(ring, rsc->stencil->bo, 0, 0, 208 0); /* RB_STENCIL_BASE_LO/HI */ 209 } 210 OUT_RING(ring, A5XX_RB_STENCIL_PITCH(stride)); 211 OUT_RING(ring, A5XX_RB_STENCIL_ARRAY_PITCH(size)); 212 } else { 213 OUT_PKT4(ring, REG_A5XX_RB_STENCIL_INFO, 1); 214 OUT_RING(ring, 0x00000000); /* RB_STENCIL_INFO */ 215 } 216 } else { 217 OUT_PKT4(ring, REG_A5XX_RB_DEPTH_BUFFER_INFO, 5); 218 OUT_RING(ring, A5XX_RB_DEPTH_BUFFER_INFO_DEPTH_FORMAT(DEPTH5_NONE)); 219 OUT_RING(ring, 0x00000000); /* RB_DEPTH_BUFFER_BASE_LO */ 220 OUT_RING(ring, 0x00000000); /* RB_DEPTH_BUFFER_BASE_HI */ 221 OUT_RING(ring, 0x00000000); /* RB_DEPTH_BUFFER_PITCH */ 222 OUT_RING(ring, 0x00000000); /* RB_DEPTH_BUFFER_ARRAY_PITCH */ 223 224 OUT_PKT4(ring, REG_A5XX_GRAS_SU_DEPTH_BUFFER_INFO, 1); 225 OUT_RING(ring, A5XX_GRAS_SU_DEPTH_BUFFER_INFO_DEPTH_FORMAT(DEPTH5_NONE)); 226 227 OUT_PKT4(ring, REG_A5XX_RB_DEPTH_FLAG_BUFFER_BASE_LO, 3); 228 OUT_RING(ring, 0x00000000); /* RB_DEPTH_FLAG_BUFFER_BASE_LO */ 229 OUT_RING(ring, 0x00000000); /* RB_DEPTH_FLAG_BUFFER_BASE_HI */ 230 OUT_RING(ring, 0x00000000); /* RB_DEPTH_FLAG_BUFFER_PITCH */ 231 232 OUT_PKT4(ring, REG_A5XX_RB_STENCIL_INFO, 1); 233 OUT_RING(ring, 0x00000000); /* RB_STENCIL_INFO */ 234 } 235} 236 237static bool 238use_hw_binning(struct fd_batch *batch) 239{ 240 const struct fd_gmem_stateobj *gmem = batch->gmem_state; 241 242 if ((gmem->maxpw * gmem->maxph) > 32) 243 return false; 244 245 if ((gmem->maxpw > 15) || (gmem->maxph > 15)) 246 return false; 247 248 return fd_binning_enabled && ((gmem->nbins_x * gmem->nbins_y) > 2) && 249 (batch->num_draws > 0); 250} 251 252static void 253patch_draws(struct fd_batch *batch, enum pc_di_vis_cull_mode vismode) 254{ 255 unsigned i; 256 for (i = 0; i < fd_patch_num_elements(&batch->draw_patches); i++) { 257 struct fd_cs_patch *patch = fd_patch_element(&batch->draw_patches, i); 258 *patch->cs = patch->val | DRAW4(0, 0, 0, vismode); 259 } 260 util_dynarray_clear(&batch->draw_patches); 261} 262 263static void 264update_vsc_pipe(struct fd_batch *batch) assert_dt 265{ 266 struct fd_context *ctx = batch->ctx; 267 struct fd5_context *fd5_ctx = fd5_context(ctx); 268 const struct fd_gmem_stateobj *gmem = batch->gmem_state; 269 struct fd_ringbuffer *ring = batch->gmem; 270 int i; 271 272 OUT_PKT4(ring, REG_A5XX_VSC_BIN_SIZE, 3); 273 OUT_RING(ring, A5XX_VSC_BIN_SIZE_WIDTH(gmem->bin_w) | 274 A5XX_VSC_BIN_SIZE_HEIGHT(gmem->bin_h)); 275 OUT_RELOC(ring, fd5_ctx->vsc_size_mem, 0, 0, 0); /* VSC_SIZE_ADDRESS_LO/HI */ 276 277 OUT_PKT4(ring, REG_A5XX_UNKNOWN_0BC5, 2); 278 OUT_RING(ring, 0x00000000); /* UNKNOWN_0BC5 */ 279 OUT_RING(ring, 0x00000000); /* UNKNOWN_0BC6 */ 280 281 OUT_PKT4(ring, REG_A5XX_VSC_PIPE_CONFIG_REG(0), 16); 282 for (i = 0; i < 16; i++) { 283 const struct fd_vsc_pipe *pipe = &gmem->vsc_pipe[i]; 284 OUT_RING(ring, A5XX_VSC_PIPE_CONFIG_REG_X(pipe->x) | 285 A5XX_VSC_PIPE_CONFIG_REG_Y(pipe->y) | 286 A5XX_VSC_PIPE_CONFIG_REG_W(pipe->w) | 287 A5XX_VSC_PIPE_CONFIG_REG_H(pipe->h)); 288 } 289 290 OUT_PKT4(ring, REG_A5XX_VSC_PIPE_DATA_ADDRESS_LO(0), 32); 291 for (i = 0; i < 16; i++) { 292 if (!ctx->vsc_pipe_bo[i]) { 293 ctx->vsc_pipe_bo[i] = fd_bo_new( 294 ctx->dev, 0x20000, 0, "vsc_pipe[%u]", i); 295 } 296 OUT_RELOC(ring, ctx->vsc_pipe_bo[i], 0, 0, 297 0); /* VSC_PIPE_DATA_ADDRESS[i].LO/HI */ 298 } 299 300 OUT_PKT4(ring, REG_A5XX_VSC_PIPE_DATA_LENGTH_REG(0), 16); 301 for (i = 0; i < 16; i++) { 302 OUT_RING(ring, fd_bo_size(ctx->vsc_pipe_bo[i]) - 303 32); /* VSC_PIPE_DATA_LENGTH[i] */ 304 } 305} 306 307static void 308emit_binning_pass(struct fd_batch *batch) assert_dt 309{ 310 struct fd_ringbuffer *ring = batch->gmem; 311 const struct fd_gmem_stateobj *gmem = batch->gmem_state; 312 313 uint32_t x1 = gmem->minx; 314 uint32_t y1 = gmem->miny; 315 uint32_t x2 = gmem->minx + gmem->width - 1; 316 uint32_t y2 = gmem->miny + gmem->height - 1; 317 318 fd5_set_render_mode(batch->ctx, ring, BINNING); 319 320 OUT_PKT4(ring, REG_A5XX_RB_CNTL, 1); 321 OUT_RING(ring, 322 A5XX_RB_CNTL_WIDTH(gmem->bin_w) | A5XX_RB_CNTL_HEIGHT(gmem->bin_h)); 323 324 OUT_PKT4(ring, REG_A5XX_GRAS_SC_WINDOW_SCISSOR_TL, 2); 325 OUT_RING(ring, A5XX_GRAS_SC_WINDOW_SCISSOR_TL_X(x1) | 326 A5XX_GRAS_SC_WINDOW_SCISSOR_TL_Y(y1)); 327 OUT_RING(ring, A5XX_GRAS_SC_WINDOW_SCISSOR_BR_X(x2) | 328 A5XX_GRAS_SC_WINDOW_SCISSOR_BR_Y(y2)); 329 330 OUT_PKT4(ring, REG_A5XX_RB_RESOLVE_CNTL_1, 2); 331 OUT_RING(ring, A5XX_RB_RESOLVE_CNTL_1_X(x1) | A5XX_RB_RESOLVE_CNTL_1_Y(y1)); 332 OUT_RING(ring, A5XX_RB_RESOLVE_CNTL_2_X(x2) | A5XX_RB_RESOLVE_CNTL_2_Y(y2)); 333 334 update_vsc_pipe(batch); 335 336 OUT_PKT4(ring, REG_A5XX_VPC_MODE_CNTL, 1); 337 OUT_RING(ring, A5XX_VPC_MODE_CNTL_BINNING_PASS); 338 339 fd5_event_write(batch, ring, UNK_2C, false); 340 341 OUT_PKT4(ring, REG_A5XX_RB_WINDOW_OFFSET, 1); 342 OUT_RING(ring, A5XX_RB_WINDOW_OFFSET_X(0) | A5XX_RB_WINDOW_OFFSET_Y(0)); 343 344 /* emit IB to binning drawcmds: */ 345 fd5_emit_ib(ring, batch->binning); 346 347 fd_reset_wfi(batch); 348 349 fd5_event_write(batch, ring, UNK_2D, false); 350 351 fd5_event_write(batch, ring, CACHE_FLUSH_TS, true); 352 353 // TODO CP_COND_WRITE's for all the vsc buffers (check for overflow??) 354 355 fd_wfi(batch, ring); 356 357 OUT_PKT4(ring, REG_A5XX_VPC_MODE_CNTL, 1); 358 OUT_RING(ring, 0x0); 359} 360 361/* before first tile */ 362static void 363fd5_emit_tile_init(struct fd_batch *batch) assert_dt 364{ 365 struct fd_ringbuffer *ring = batch->gmem; 366 struct pipe_framebuffer_state *pfb = &batch->framebuffer; 367 368 fd5_emit_restore(batch, ring); 369 370 if (batch->prologue) 371 fd5_emit_ib(ring, batch->prologue); 372 373 fd5_emit_lrz_flush(batch, ring); 374 375 OUT_PKT4(ring, REG_A5XX_GRAS_CL_CNTL, 1); 376 OUT_RING(ring, 0x00000080); /* GRAS_CL_CNTL */ 377 378 OUT_PKT7(ring, CP_SKIP_IB2_ENABLE_GLOBAL, 1); 379 OUT_RING(ring, 0x0); 380 381 OUT_PKT4(ring, REG_A5XX_PC_POWER_CNTL, 1); 382 OUT_RING(ring, 0x00000003); /* PC_POWER_CNTL */ 383 384 OUT_PKT4(ring, REG_A5XX_VFD_POWER_CNTL, 1); 385 OUT_RING(ring, 0x00000003); /* VFD_POWER_CNTL */ 386 387 /* 0x10000000 for BYPASS.. 0x7c13c080 for GMEM: */ 388 fd_wfi(batch, ring); 389 OUT_PKT4(ring, REG_A5XX_RB_CCU_CNTL, 1); 390 OUT_RING(ring, 0x7c13c080); /* RB_CCU_CNTL */ 391 392 emit_zs(ring, pfb->zsbuf, batch->gmem_state); 393 emit_mrt(ring, pfb->nr_cbufs, pfb->cbufs, batch->gmem_state); 394 395 /* Enable stream output for the first pass (likely the binning). */ 396 OUT_PKT4(ring, REG_A5XX_VPC_SO_OVERRIDE, 1); 397 OUT_RING(ring, 0); 398 399 if (use_hw_binning(batch)) { 400 emit_binning_pass(batch); 401 402 /* Disable stream output after binning, since each VS output should get 403 * streamed out once. 404 */ 405 OUT_PKT4(ring, REG_A5XX_VPC_SO_OVERRIDE, 1); 406 OUT_RING(ring, A5XX_VPC_SO_OVERRIDE_SO_DISABLE); 407 408 fd5_emit_lrz_flush(batch, ring); 409 patch_draws(batch, USE_VISIBILITY); 410 } else { 411 patch_draws(batch, IGNORE_VISIBILITY); 412 } 413 414 fd5_set_render_mode(batch->ctx, ring, GMEM); 415 416 /* XXX If we're in gmem mode but not doing HW binning, then after the first 417 * tile we should disable stream output (fd6_gmem.c doesn't do that either). 418 */ 419} 420 421/* before mem2gmem */ 422static void 423fd5_emit_tile_prep(struct fd_batch *batch, const struct fd_tile *tile) assert_dt 424{ 425 struct fd_context *ctx = batch->ctx; 426 const struct fd_gmem_stateobj *gmem = batch->gmem_state; 427 struct fd5_context *fd5_ctx = fd5_context(ctx); 428 struct fd_ringbuffer *ring = batch->gmem; 429 430 uint32_t x1 = tile->xoff; 431 uint32_t y1 = tile->yoff; 432 uint32_t x2 = tile->xoff + tile->bin_w - 1; 433 uint32_t y2 = tile->yoff + tile->bin_h - 1; 434 435 OUT_PKT4(ring, REG_A5XX_GRAS_SC_WINDOW_SCISSOR_TL, 2); 436 OUT_RING(ring, A5XX_GRAS_SC_WINDOW_SCISSOR_TL_X(x1) | 437 A5XX_GRAS_SC_WINDOW_SCISSOR_TL_Y(y1)); 438 OUT_RING(ring, A5XX_GRAS_SC_WINDOW_SCISSOR_BR_X(x2) | 439 A5XX_GRAS_SC_WINDOW_SCISSOR_BR_Y(y2)); 440 441 OUT_PKT4(ring, REG_A5XX_RB_RESOLVE_CNTL_1, 2); 442 OUT_RING(ring, A5XX_RB_RESOLVE_CNTL_1_X(x1) | A5XX_RB_RESOLVE_CNTL_1_Y(y1)); 443 OUT_RING(ring, A5XX_RB_RESOLVE_CNTL_2_X(x2) | A5XX_RB_RESOLVE_CNTL_2_Y(y2)); 444 445 if (use_hw_binning(batch)) { 446 const struct fd_vsc_pipe *pipe = &gmem->vsc_pipe[tile->p]; 447 struct fd_bo *pipe_bo = ctx->vsc_pipe_bo[tile->p]; 448 449 OUT_PKT7(ring, CP_WAIT_FOR_ME, 0); 450 451 OUT_PKT7(ring, CP_SET_VISIBILITY_OVERRIDE, 1); 452 OUT_RING(ring, 0x0); 453 454 OUT_PKT7(ring, CP_SET_BIN_DATA5, 5); 455 OUT_RING(ring, CP_SET_BIN_DATA5_0_VSC_SIZE(pipe->w * pipe->h) | 456 CP_SET_BIN_DATA5_0_VSC_N(tile->n)); 457 OUT_RELOC(ring, pipe_bo, 0, 0, 0); /* VSC_PIPE[p].DATA_ADDRESS */ 458 OUT_RELOC(ring, fd5_ctx->vsc_size_mem, /* VSC_SIZE_ADDRESS + (p * 4) */ 459 (tile->p * 4), 0, 0); 460 } else { 461 OUT_PKT7(ring, CP_SET_VISIBILITY_OVERRIDE, 1); 462 OUT_RING(ring, 0x1); 463 } 464 465 OUT_PKT4(ring, REG_A5XX_RB_WINDOW_OFFSET, 1); 466 OUT_RING(ring, A5XX_RB_WINDOW_OFFSET_X(x1) | A5XX_RB_WINDOW_OFFSET_Y(y1)); 467} 468 469/* 470 * transfer from system memory to gmem 471 */ 472 473static void 474emit_mem2gmem_surf(struct fd_batch *batch, uint32_t base, 475 struct pipe_surface *psurf, enum a5xx_blit_buf buf) 476{ 477 struct fd_ringbuffer *ring = batch->gmem; 478 const struct fd_gmem_stateobj *gmem = batch->gmem_state; 479 struct fd_resource *rsc = fd_resource(psurf->texture); 480 uint32_t stride, size; 481 482 debug_assert(psurf->u.tex.first_layer == psurf->u.tex.last_layer); 483 484 if (buf == BLIT_S) 485 rsc = rsc->stencil; 486 487 if ((buf == BLIT_ZS) || (buf == BLIT_S)) { 488 // XXX hack import via BLIT_MRT0 instead of BLIT_ZS, since I don't 489 // know otherwise how to go from linear in sysmem to tiled in gmem. 490 // possibly we want to flip this around gmem2mem and keep depth 491 // tiled in sysmem (and fixup sampler state to assume tiled).. this 492 // might be required for doing depth/stencil in bypass mode? 493 struct fdl_slice *slice = fd_resource_slice(rsc, 0); 494 enum a5xx_color_fmt format = 495 fd5_pipe2color(fd_gmem_restore_format(rsc->b.b.format)); 496 497 OUT_PKT4(ring, REG_A5XX_RB_MRT_BUF_INFO(0), 5); 498 OUT_RING(ring, 499 A5XX_RB_MRT_BUF_INFO_COLOR_FORMAT(format) | 500 A5XX_RB_MRT_BUF_INFO_COLOR_TILE_MODE(rsc->layout.tile_mode) | 501 A5XX_RB_MRT_BUF_INFO_COLOR_SWAP(WZYX)); 502 OUT_RING(ring, A5XX_RB_MRT_PITCH(fd_resource_pitch(rsc, 0))); 503 OUT_RING(ring, A5XX_RB_MRT_ARRAY_PITCH(slice->size0)); 504 OUT_RELOC(ring, rsc->bo, 0, 0, 0); /* BASE_LO/HI */ 505 506 buf = BLIT_MRT0; 507 } 508 509 stride = gmem->bin_w << fdl_cpp_shift(&rsc->layout); 510 size = stride * gmem->bin_h; 511 512 OUT_PKT4(ring, REG_A5XX_RB_BLIT_FLAG_DST_LO, 4); 513 OUT_RING(ring, 0x00000000); /* RB_BLIT_FLAG_DST_LO */ 514 OUT_RING(ring, 0x00000000); /* RB_BLIT_FLAG_DST_HI */ 515 OUT_RING(ring, 0x00000000); /* RB_BLIT_FLAG_DST_PITCH */ 516 OUT_RING(ring, 0x00000000); /* RB_BLIT_FLAG_DST_ARRAY_PITCH */ 517 518 OUT_PKT4(ring, REG_A5XX_RB_RESOLVE_CNTL_3, 5); 519 OUT_RING(ring, 0x00000000); /* RB_RESOLVE_CNTL_3 */ 520 OUT_RING(ring, base); /* RB_BLIT_DST_LO */ 521 OUT_RING(ring, 0x00000000); /* RB_BLIT_DST_HI */ 522 OUT_RING(ring, A5XX_RB_BLIT_DST_PITCH(stride)); 523 OUT_RING(ring, A5XX_RB_BLIT_DST_ARRAY_PITCH(size)); 524 525 OUT_PKT4(ring, REG_A5XX_RB_BLIT_CNTL, 1); 526 OUT_RING(ring, A5XX_RB_BLIT_CNTL_BUF(buf)); 527 528 fd5_emit_blit(batch, ring); 529} 530 531static void 532fd5_emit_tile_mem2gmem(struct fd_batch *batch, const struct fd_tile *tile) 533{ 534 struct fd_ringbuffer *ring = batch->gmem; 535 const struct fd_gmem_stateobj *gmem = batch->gmem_state; 536 struct pipe_framebuffer_state *pfb = &batch->framebuffer; 537 538 /* 539 * setup mrt and zs with system memory base addresses: 540 */ 541 542 emit_mrt(ring, pfb->nr_cbufs, pfb->cbufs, NULL); 543 // emit_zs(ring, pfb->zsbuf, NULL); 544 545 OUT_PKT4(ring, REG_A5XX_RB_CNTL, 1); 546 OUT_RING(ring, A5XX_RB_CNTL_WIDTH(gmem->bin_w) | 547 A5XX_RB_CNTL_HEIGHT(gmem->bin_h) | A5XX_RB_CNTL_BYPASS); 548 549 if (fd_gmem_needs_restore(batch, tile, FD_BUFFER_COLOR)) { 550 unsigned i; 551 for (i = 0; i < pfb->nr_cbufs; i++) { 552 if (!pfb->cbufs[i]) 553 continue; 554 if (!(batch->restore & (PIPE_CLEAR_COLOR0 << i))) 555 continue; 556 emit_mem2gmem_surf(batch, gmem->cbuf_base[i], pfb->cbufs[i], 557 BLIT_MRT0 + i); 558 } 559 } 560 561 if (fd_gmem_needs_restore(batch, tile, 562 FD_BUFFER_DEPTH | FD_BUFFER_STENCIL)) { 563 struct fd_resource *rsc = fd_resource(pfb->zsbuf->texture); 564 565 if (!rsc->stencil || fd_gmem_needs_restore(batch, tile, FD_BUFFER_DEPTH)) 566 emit_mem2gmem_surf(batch, gmem->zsbuf_base[0], pfb->zsbuf, BLIT_ZS); 567 if (rsc->stencil && fd_gmem_needs_restore(batch, tile, FD_BUFFER_STENCIL)) 568 emit_mem2gmem_surf(batch, gmem->zsbuf_base[1], pfb->zsbuf, BLIT_S); 569 } 570} 571 572/* before IB to rendering cmds: */ 573static void 574fd5_emit_tile_renderprep(struct fd_batch *batch, const struct fd_tile *tile) 575{ 576 struct fd_ringbuffer *ring = batch->gmem; 577 const struct fd_gmem_stateobj *gmem = batch->gmem_state; 578 struct pipe_framebuffer_state *pfb = &batch->framebuffer; 579 580 OUT_PKT4(ring, REG_A5XX_RB_CNTL, 1); 581 OUT_RING(ring, 582 A5XX_RB_CNTL_WIDTH(gmem->bin_w) | A5XX_RB_CNTL_HEIGHT(gmem->bin_h)); 583 584 emit_zs(ring, pfb->zsbuf, gmem); 585 emit_mrt(ring, pfb->nr_cbufs, pfb->cbufs, gmem); 586 587 enum a3xx_msaa_samples samples = fd_msaa_samples(pfb->samples); 588 589 OUT_PKT4(ring, REG_A5XX_TPL1_TP_RAS_MSAA_CNTL, 2); 590 OUT_RING(ring, A5XX_TPL1_TP_RAS_MSAA_CNTL_SAMPLES(samples)); 591 OUT_RING(ring, A5XX_TPL1_TP_DEST_MSAA_CNTL_SAMPLES(samples) | 592 COND(samples == MSAA_ONE, 593 A5XX_TPL1_TP_DEST_MSAA_CNTL_MSAA_DISABLE)); 594 595 OUT_PKT4(ring, REG_A5XX_RB_RAS_MSAA_CNTL, 2); 596 OUT_RING(ring, A5XX_RB_RAS_MSAA_CNTL_SAMPLES(samples)); 597 OUT_RING(ring, 598 A5XX_RB_DEST_MSAA_CNTL_SAMPLES(samples) | 599 COND(samples == MSAA_ONE, A5XX_RB_DEST_MSAA_CNTL_MSAA_DISABLE)); 600 601 OUT_PKT4(ring, REG_A5XX_GRAS_SC_RAS_MSAA_CNTL, 2); 602 OUT_RING(ring, A5XX_GRAS_SC_RAS_MSAA_CNTL_SAMPLES(samples)); 603 OUT_RING(ring, A5XX_GRAS_SC_DEST_MSAA_CNTL_SAMPLES(samples) | 604 COND(samples == MSAA_ONE, 605 A5XX_GRAS_SC_DEST_MSAA_CNTL_MSAA_DISABLE)); 606} 607 608/* 609 * transfer from gmem to system memory (ie. normal RAM) 610 */ 611 612static void 613emit_gmem2mem_surf(struct fd_batch *batch, uint32_t base, 614 struct pipe_surface *psurf, enum a5xx_blit_buf buf) 615{ 616 struct fd_ringbuffer *ring = batch->gmem; 617 struct fd_resource *rsc = fd_resource(psurf->texture); 618 struct fdl_slice *slice; 619 bool tiled; 620 uint32_t offset, pitch; 621 622 if (!rsc->valid) 623 return; 624 625 if (buf == BLIT_S) 626 rsc = rsc->stencil; 627 628 slice = fd_resource_slice(rsc, psurf->u.tex.level); 629 offset = 630 fd_resource_offset(rsc, psurf->u.tex.level, psurf->u.tex.first_layer); 631 pitch = fd_resource_pitch(rsc, psurf->u.tex.level); 632 633 debug_assert(psurf->u.tex.first_layer == psurf->u.tex.last_layer); 634 635 OUT_PKT4(ring, REG_A5XX_RB_BLIT_FLAG_DST_LO, 4); 636 OUT_RING(ring, 0x00000000); /* RB_BLIT_FLAG_DST_LO */ 637 OUT_RING(ring, 0x00000000); /* RB_BLIT_FLAG_DST_HI */ 638 OUT_RING(ring, 0x00000000); /* RB_BLIT_FLAG_DST_PITCH */ 639 OUT_RING(ring, 0x00000000); /* RB_BLIT_FLAG_DST_ARRAY_PITCH */ 640 641 tiled = fd_resource_tile_mode(psurf->texture, psurf->u.tex.level); 642 643 OUT_PKT4(ring, REG_A5XX_RB_RESOLVE_CNTL_3, 5); 644 OUT_RING(ring, 0x00000004 | /* XXX RB_RESOLVE_CNTL_3 */ 645 COND(tiled, A5XX_RB_RESOLVE_CNTL_3_TILED)); 646 OUT_RELOC(ring, rsc->bo, offset, 0, 0); /* RB_BLIT_DST_LO/HI */ 647 OUT_RING(ring, A5XX_RB_BLIT_DST_PITCH(pitch)); 648 OUT_RING(ring, A5XX_RB_BLIT_DST_ARRAY_PITCH(slice->size0)); 649 650 OUT_PKT4(ring, REG_A5XX_RB_BLIT_CNTL, 1); 651 OUT_RING(ring, A5XX_RB_BLIT_CNTL_BUF(buf)); 652 653 // bool msaa_resolve = pfb->samples > 1; 654 bool msaa_resolve = false; 655 OUT_PKT4(ring, REG_A5XX_RB_CLEAR_CNTL, 1); 656 OUT_RING(ring, COND(msaa_resolve, A5XX_RB_CLEAR_CNTL_MSAA_RESOLVE)); 657 658 fd5_emit_blit(batch, ring); 659} 660 661static void 662fd5_emit_tile_gmem2mem(struct fd_batch *batch, const struct fd_tile *tile) 663{ 664 const struct fd_gmem_stateobj *gmem = batch->gmem_state; 665 struct pipe_framebuffer_state *pfb = &batch->framebuffer; 666 667 if (batch->resolve & (FD_BUFFER_DEPTH | FD_BUFFER_STENCIL)) { 668 struct fd_resource *rsc = fd_resource(pfb->zsbuf->texture); 669 670 if (!rsc->stencil || (batch->resolve & FD_BUFFER_DEPTH)) 671 emit_gmem2mem_surf(batch, gmem->zsbuf_base[0], pfb->zsbuf, BLIT_ZS); 672 if (rsc->stencil && (batch->resolve & FD_BUFFER_STENCIL)) 673 emit_gmem2mem_surf(batch, gmem->zsbuf_base[1], pfb->zsbuf, BLIT_S); 674 } 675 676 if (batch->resolve & FD_BUFFER_COLOR) { 677 unsigned i; 678 for (i = 0; i < pfb->nr_cbufs; i++) { 679 if (!pfb->cbufs[i]) 680 continue; 681 if (!(batch->resolve & (PIPE_CLEAR_COLOR0 << i))) 682 continue; 683 emit_gmem2mem_surf(batch, gmem->cbuf_base[i], pfb->cbufs[i], 684 BLIT_MRT0 + i); 685 } 686 } 687} 688 689static void 690fd5_emit_tile_fini(struct fd_batch *batch) assert_dt 691{ 692 struct fd_ringbuffer *ring = batch->gmem; 693 694 OUT_PKT7(ring, CP_SKIP_IB2_ENABLE_GLOBAL, 1); 695 OUT_RING(ring, 0x0); 696 697 fd5_emit_lrz_flush(batch, ring); 698 699 fd5_cache_flush(batch, ring); 700 fd5_set_render_mode(batch->ctx, ring, BYPASS); 701} 702 703static void 704fd5_emit_sysmem_prep(struct fd_batch *batch) assert_dt 705{ 706 struct fd_ringbuffer *ring = batch->gmem; 707 708 fd5_emit_restore(batch, ring); 709 710 fd5_emit_lrz_flush(batch, ring); 711 712 if (batch->prologue) 713 fd5_emit_ib(ring, batch->prologue); 714 715 OUT_PKT7(ring, CP_SKIP_IB2_ENABLE_GLOBAL, 1); 716 OUT_RING(ring, 0x0); 717 718 fd5_event_write(batch, ring, PC_CCU_INVALIDATE_COLOR, false); 719 720 OUT_PKT4(ring, REG_A5XX_PC_POWER_CNTL, 1); 721 OUT_RING(ring, 0x00000003); /* PC_POWER_CNTL */ 722 723 OUT_PKT4(ring, REG_A5XX_VFD_POWER_CNTL, 1); 724 OUT_RING(ring, 0x00000003); /* VFD_POWER_CNTL */ 725 726 /* 0x10000000 for BYPASS.. 0x7c13c080 for GMEM: */ 727 fd_wfi(batch, ring); 728 OUT_PKT4(ring, REG_A5XX_RB_CCU_CNTL, 1); 729 OUT_RING(ring, 0x10000000); /* RB_CCU_CNTL */ 730 731 OUT_PKT4(ring, REG_A5XX_RB_CNTL, 1); 732 OUT_RING(ring, A5XX_RB_CNTL_WIDTH(0) | A5XX_RB_CNTL_HEIGHT(0) | 733 A5XX_RB_CNTL_BYPASS); 734 735 /* remaining setup below here does not apply to blit/compute: */ 736 if (batch->nondraw) 737 return; 738 739 struct pipe_framebuffer_state *pfb = &batch->framebuffer; 740 741 OUT_PKT4(ring, REG_A5XX_GRAS_SC_WINDOW_SCISSOR_TL, 2); 742 OUT_RING(ring, A5XX_GRAS_SC_WINDOW_SCISSOR_TL_X(0) | 743 A5XX_GRAS_SC_WINDOW_SCISSOR_TL_Y(0)); 744 OUT_RING(ring, A5XX_GRAS_SC_WINDOW_SCISSOR_BR_X(pfb->width - 1) | 745 A5XX_GRAS_SC_WINDOW_SCISSOR_BR_Y(pfb->height - 1)); 746 747 OUT_PKT4(ring, REG_A5XX_RB_RESOLVE_CNTL_1, 2); 748 OUT_RING(ring, A5XX_RB_RESOLVE_CNTL_1_X(0) | A5XX_RB_RESOLVE_CNTL_1_Y(0)); 749 OUT_RING(ring, A5XX_RB_RESOLVE_CNTL_2_X(pfb->width - 1) | 750 A5XX_RB_RESOLVE_CNTL_2_Y(pfb->height - 1)); 751 752 OUT_PKT4(ring, REG_A5XX_RB_WINDOW_OFFSET, 1); 753 OUT_RING(ring, A5XX_RB_WINDOW_OFFSET_X(0) | A5XX_RB_WINDOW_OFFSET_Y(0)); 754 755 /* Enable stream output, since there's no binning pass to put it in. */ 756 OUT_PKT4(ring, REG_A5XX_VPC_SO_OVERRIDE, 1); 757 OUT_RING(ring, 0); 758 759 OUT_PKT7(ring, CP_SET_VISIBILITY_OVERRIDE, 1); 760 OUT_RING(ring, 0x1); 761 762 patch_draws(batch, IGNORE_VISIBILITY); 763 764 emit_zs(ring, pfb->zsbuf, NULL); 765 emit_mrt(ring, pfb->nr_cbufs, pfb->cbufs, NULL); 766 767 OUT_PKT4(ring, REG_A5XX_TPL1_TP_RAS_MSAA_CNTL, 2); 768 OUT_RING(ring, A5XX_TPL1_TP_RAS_MSAA_CNTL_SAMPLES(MSAA_ONE)); 769 OUT_RING(ring, A5XX_TPL1_TP_DEST_MSAA_CNTL_SAMPLES(MSAA_ONE) | 770 A5XX_TPL1_TP_DEST_MSAA_CNTL_MSAA_DISABLE); 771 772 OUT_PKT4(ring, REG_A5XX_RB_RAS_MSAA_CNTL, 2); 773 OUT_RING(ring, A5XX_RB_RAS_MSAA_CNTL_SAMPLES(MSAA_ONE)); 774 OUT_RING(ring, A5XX_RB_DEST_MSAA_CNTL_SAMPLES(MSAA_ONE) | 775 A5XX_RB_DEST_MSAA_CNTL_MSAA_DISABLE); 776 777 OUT_PKT4(ring, REG_A5XX_GRAS_SC_RAS_MSAA_CNTL, 2); 778 OUT_RING(ring, A5XX_GRAS_SC_RAS_MSAA_CNTL_SAMPLES(MSAA_ONE)); 779 OUT_RING(ring, A5XX_GRAS_SC_DEST_MSAA_CNTL_SAMPLES(MSAA_ONE) | 780 A5XX_GRAS_SC_DEST_MSAA_CNTL_MSAA_DISABLE); 781} 782 783static void 784fd5_emit_sysmem_fini(struct fd_batch *batch) 785{ 786 struct fd_ringbuffer *ring = batch->gmem; 787 788 OUT_PKT7(ring, CP_SKIP_IB2_ENABLE_GLOBAL, 1); 789 OUT_RING(ring, 0x0); 790 791 fd5_emit_lrz_flush(batch, ring); 792 793 fd5_event_write(batch, ring, PC_CCU_FLUSH_COLOR_TS, true); 794 fd5_event_write(batch, ring, PC_CCU_FLUSH_DEPTH_TS, true); 795} 796 797void 798fd5_gmem_init(struct pipe_context *pctx) disable_thread_safety_analysis 799{ 800 struct fd_context *ctx = fd_context(pctx); 801 802 ctx->emit_tile_init = fd5_emit_tile_init; 803 ctx->emit_tile_prep = fd5_emit_tile_prep; 804 ctx->emit_tile_mem2gmem = fd5_emit_tile_mem2gmem; 805 ctx->emit_tile_renderprep = fd5_emit_tile_renderprep; 806 ctx->emit_tile_gmem2mem = fd5_emit_tile_gmem2mem; 807 ctx->emit_tile_fini = fd5_emit_tile_fini; 808 ctx->emit_sysmem_prep = fd5_emit_sysmem_prep; 809 ctx->emit_sysmem_fini = fd5_emit_sysmem_fini; 810} 811