1848b8605Smrg/* 2848b8605Smrg * Copyright (C) 2012 Rob Clark <robclark@freedesktop.org> 3848b8605Smrg * 4848b8605Smrg * Permission is hereby granted, free of charge, to any person obtaining a 5848b8605Smrg * copy of this software and associated documentation files (the "Software"), 6848b8605Smrg * to deal in the Software without restriction, including without limitation 7848b8605Smrg * the rights to use, copy, modify, merge, publish, distribute, sublicense, 8848b8605Smrg * and/or sell copies of the Software, and to permit persons to whom the 9848b8605Smrg * Software is furnished to do so, subject to the following conditions: 10848b8605Smrg * 11848b8605Smrg * The above copyright notice and this permission notice (including the next 12848b8605Smrg * paragraph) shall be included in all copies or substantial portions of the 13848b8605Smrg * Software. 14848b8605Smrg * 15848b8605Smrg * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR 16848b8605Smrg * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, 17848b8605Smrg * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL 18848b8605Smrg * THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER 19848b8605Smrg * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, 20848b8605Smrg * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE 21848b8605Smrg * SOFTWARE. 22848b8605Smrg * 23848b8605Smrg * Authors: 24848b8605Smrg * Rob Clark <robclark@freedesktop.org> 25848b8605Smrg */ 26848b8605Smrg 27848b8605Smrg#include "pipe/p_state.h" 28848b8605Smrg#include "util/u_string.h" 29848b8605Smrg#include "util/u_memory.h" 30848b8605Smrg#include "util/u_inlines.h" 31848b8605Smrg#include "util/u_format.h" 32848b8605Smrg 33848b8605Smrg#include "freedreno_gmem.h" 34848b8605Smrg#include "freedreno_context.h" 35b8e80941Smrg#include "freedreno_fence.h" 36848b8605Smrg#include "freedreno_resource.h" 37848b8605Smrg#include "freedreno_query_hw.h" 38848b8605Smrg#include "freedreno_util.h" 39848b8605Smrg 40848b8605Smrg/* 41848b8605Smrg * GMEM is the small (ie. 256KiB for a200, 512KiB for a220, etc) tile buffer 42848b8605Smrg * inside the GPU. All rendering happens to GMEM. Larger render targets 43848b8605Smrg * are split into tiles that are small enough for the color (and depth and/or 44848b8605Smrg * stencil, if enabled) buffers to fit within GMEM. Before rendering a tile, 45848b8605Smrg * if there was not a clear invalidating the previous tile contents, we need 46848b8605Smrg * to restore the previous tiles contents (system mem -> GMEM), and after all 47848b8605Smrg * the draw calls, before moving to the next tile, we need to save the tile 48848b8605Smrg * contents (GMEM -> system mem). 49848b8605Smrg * 50848b8605Smrg * The code in this file handles dealing with GMEM and tiling. 51848b8605Smrg * 52848b8605Smrg * The structure of the ringbuffer ends up being: 53848b8605Smrg * 54848b8605Smrg * +--<---<-- IB ---<---+---<---+---<---<---<--+ 55848b8605Smrg * | | | | 56848b8605Smrg * v ^ ^ ^ 57848b8605Smrg * ------------------------------------------------------ 58848b8605Smrg * | clear/draw cmds | Tile0 | Tile1 | .... | TileN | 59848b8605Smrg * ------------------------------------------------------ 60848b8605Smrg * ^ 61848b8605Smrg * | 62848b8605Smrg * address submitted in issueibcmds 63848b8605Smrg * 64848b8605Smrg * Where the per-tile section handles scissor setup, mem2gmem restore (if 65848b8605Smrg * needed), IB to draw cmds earlier in the ringbuffer, and then gmem2mem 66848b8605Smrg * resolve. 67848b8605Smrg */ 68848b8605Smrg 69b8e80941Smrgstatic uint32_t bin_width(struct fd_screen *screen) 70848b8605Smrg{ 71b8e80941Smrg if (is_a4xx(screen) || is_a5xx(screen) || is_a6xx(screen)) 72b8e80941Smrg return 1024; 73b8e80941Smrg if (is_a3xx(screen)) 74848b8605Smrg return 992; 75848b8605Smrg return 512; 76848b8605Smrg} 77848b8605Smrg 78b8e80941Smrgstatic uint32_t 79b8e80941Smrgtotal_size(uint8_t cbuf_cpp[], uint8_t zsbuf_cpp[2], 80b8e80941Smrg uint32_t bin_w, uint32_t bin_h, uint32_t gmem_align, 81b8e80941Smrg struct fd_gmem_stateobj *gmem) 82b8e80941Smrg{ 83b8e80941Smrg uint32_t total = 0, i; 84b8e80941Smrg 85b8e80941Smrg for (i = 0; i < MAX_RENDER_TARGETS; i++) { 86b8e80941Smrg if (cbuf_cpp[i]) { 87b8e80941Smrg gmem->cbuf_base[i] = align(total, gmem_align); 88b8e80941Smrg total = gmem->cbuf_base[i] + cbuf_cpp[i] * bin_w * bin_h; 89b8e80941Smrg } 90b8e80941Smrg } 91b8e80941Smrg 92b8e80941Smrg if (zsbuf_cpp[0]) { 93b8e80941Smrg gmem->zsbuf_base[0] = align(total, gmem_align); 94b8e80941Smrg total = gmem->zsbuf_base[0] + zsbuf_cpp[0] * bin_w * bin_h; 95b8e80941Smrg } 96b8e80941Smrg 97b8e80941Smrg if (zsbuf_cpp[1]) { 98b8e80941Smrg gmem->zsbuf_base[1] = align(total, gmem_align); 99b8e80941Smrg total = gmem->zsbuf_base[1] + zsbuf_cpp[1] * bin_w * bin_h; 100b8e80941Smrg } 101b8e80941Smrg 102b8e80941Smrg return total; 103b8e80941Smrg} 104b8e80941Smrg 105848b8605Smrgstatic void 106b8e80941Smrgcalculate_tiles(struct fd_batch *batch) 107848b8605Smrg{ 108b8e80941Smrg struct fd_context *ctx = batch->ctx; 109b8e80941Smrg struct fd_screen *screen = ctx->screen; 110848b8605Smrg struct fd_gmem_stateobj *gmem = &ctx->gmem; 111b8e80941Smrg struct pipe_scissor_state *scissor = &batch->max_scissor; 112b8e80941Smrg struct pipe_framebuffer_state *pfb = &batch->framebuffer; 113b8e80941Smrg const uint32_t gmem_alignw = screen->gmem_alignw; 114b8e80941Smrg const uint32_t gmem_alignh = screen->gmem_alignh; 115b8e80941Smrg const unsigned npipes = screen->num_vsc_pipes; 116b8e80941Smrg const uint32_t gmem_size = screen->gmemsize_bytes; 117848b8605Smrg uint32_t minx, miny, width, height; 118848b8605Smrg uint32_t nbins_x = 1, nbins_y = 1; 119848b8605Smrg uint32_t bin_w, bin_h; 120b8e80941Smrg uint32_t gmem_align = 0x4000; 121b8e80941Smrg uint32_t max_width = bin_width(screen); 122b8e80941Smrg uint8_t cbuf_cpp[MAX_RENDER_TARGETS] = {0}, zsbuf_cpp[2] = {0}; 123848b8605Smrg uint32_t i, j, t, xoff, yoff; 124848b8605Smrg uint32_t tpp_x, tpp_y; 125b8e80941Smrg bool has_zs = !!(batch->gmem_reason & (FD_GMEM_DEPTH_ENABLED | 126b8e80941Smrg FD_GMEM_STENCIL_ENABLED | FD_GMEM_CLEARS_DEPTH_STENCIL)); 127b8e80941Smrg int tile_n[npipes]; 128848b8605Smrg 129b8e80941Smrg if (has_zs) { 130b8e80941Smrg struct fd_resource *rsc = fd_resource(pfb->zsbuf->texture); 131b8e80941Smrg zsbuf_cpp[0] = rsc->cpp; 132b8e80941Smrg if (rsc->stencil) 133b8e80941Smrg zsbuf_cpp[1] = rsc->stencil->cpp; 134b8e80941Smrg } else { 135b8e80941Smrg /* we might have a zsbuf, but it isn't used */ 136b8e80941Smrg batch->restore &= ~(FD_BUFFER_DEPTH | FD_BUFFER_STENCIL); 137b8e80941Smrg batch->resolve &= ~(FD_BUFFER_DEPTH | FD_BUFFER_STENCIL); 138b8e80941Smrg } 139b8e80941Smrg for (i = 0; i < pfb->nr_cbufs; i++) { 140b8e80941Smrg if (pfb->cbufs[i]) 141b8e80941Smrg cbuf_cpp[i] = util_format_get_blocksize(pfb->cbufs[i]->format); 142b8e80941Smrg else 143b8e80941Smrg cbuf_cpp[i] = 4; 144b8e80941Smrg /* if MSAA, color buffers are super-sampled in GMEM: */ 145b8e80941Smrg cbuf_cpp[i] *= pfb->samples; 146b8e80941Smrg } 147848b8605Smrg 148b8e80941Smrg if (!memcmp(gmem->zsbuf_cpp, zsbuf_cpp, sizeof(zsbuf_cpp)) && 149b8e80941Smrg !memcmp(gmem->cbuf_cpp, cbuf_cpp, sizeof(cbuf_cpp)) && 150b8e80941Smrg !memcmp(&gmem->scissor, scissor, sizeof(gmem->scissor))) { 151848b8605Smrg /* everything is up-to-date */ 152848b8605Smrg return; 153848b8605Smrg } 154848b8605Smrg 155b8e80941Smrg if (fd_mesa_debug & FD_DBG_NOSCIS) { 156848b8605Smrg minx = 0; 157848b8605Smrg miny = 0; 158848b8605Smrg width = pfb->width; 159848b8605Smrg height = pfb->height; 160848b8605Smrg } else { 161b8e80941Smrg /* round down to multiple of alignment: */ 162b8e80941Smrg minx = scissor->minx & ~(gmem_alignw - 1); 163b8e80941Smrg miny = scissor->miny & ~(gmem_alignh - 1); 164848b8605Smrg width = scissor->maxx - minx; 165848b8605Smrg height = scissor->maxy - miny; 166848b8605Smrg } 167848b8605Smrg 168b8e80941Smrg bin_w = align(width, gmem_alignw); 169b8e80941Smrg bin_h = align(height, gmem_alignh); 170848b8605Smrg 171848b8605Smrg /* first, find a bin width that satisfies the maximum width 172848b8605Smrg * restrictions: 173848b8605Smrg */ 174848b8605Smrg while (bin_w > max_width) { 175848b8605Smrg nbins_x++; 176b8e80941Smrg bin_w = align(width / nbins_x, gmem_alignw); 177b8e80941Smrg } 178b8e80941Smrg 179b8e80941Smrg if (fd_mesa_debug & FD_DBG_MSGS) { 180b8e80941Smrg debug_printf("binning input: cbuf cpp:"); 181b8e80941Smrg for (i = 0; i < pfb->nr_cbufs; i++) 182b8e80941Smrg debug_printf(" %d", cbuf_cpp[i]); 183b8e80941Smrg debug_printf(", zsbuf cpp: %d; %dx%d\n", 184b8e80941Smrg zsbuf_cpp[0], width, height); 185b8e80941Smrg } 186b8e80941Smrg 187b8e80941Smrg if (is_a20x(screen) && batch->cleared) { 188b8e80941Smrg /* under normal circumstances the requirement would be 4K 189b8e80941Smrg * but the fast clear path requires an alignment of 32K 190b8e80941Smrg */ 191b8e80941Smrg gmem_align = 0x8000; 192848b8605Smrg } 193848b8605Smrg 194848b8605Smrg /* then find a bin width/height that satisfies the memory 195848b8605Smrg * constraints: 196848b8605Smrg */ 197b8e80941Smrg while (total_size(cbuf_cpp, zsbuf_cpp, bin_w, bin_h, gmem_align, gmem) > 198b8e80941Smrg gmem_size) { 199848b8605Smrg if (bin_w > bin_h) { 200848b8605Smrg nbins_x++; 201b8e80941Smrg bin_w = align(width / nbins_x, gmem_alignw); 202848b8605Smrg } else { 203848b8605Smrg nbins_y++; 204b8e80941Smrg bin_h = align(height / nbins_y, gmem_alignh); 205848b8605Smrg } 206848b8605Smrg } 207848b8605Smrg 208848b8605Smrg DBG("using %d bins of size %dx%d", nbins_x*nbins_y, bin_w, bin_h); 209848b8605Smrg 210848b8605Smrg gmem->scissor = *scissor; 211b8e80941Smrg memcpy(gmem->cbuf_cpp, cbuf_cpp, sizeof(cbuf_cpp)); 212b8e80941Smrg memcpy(gmem->zsbuf_cpp, zsbuf_cpp, sizeof(zsbuf_cpp)); 213848b8605Smrg gmem->bin_h = bin_h; 214848b8605Smrg gmem->bin_w = bin_w; 215848b8605Smrg gmem->nbins_x = nbins_x; 216848b8605Smrg gmem->nbins_y = nbins_y; 217848b8605Smrg gmem->minx = minx; 218848b8605Smrg gmem->miny = miny; 219848b8605Smrg gmem->width = width; 220848b8605Smrg gmem->height = height; 221848b8605Smrg 222848b8605Smrg /* 223848b8605Smrg * Assign tiles and pipes: 224848b8605Smrg * 225848b8605Smrg * At some point it might be worth playing with different 226848b8605Smrg * strategies and seeing if that makes much impact on 227848b8605Smrg * performance. 228848b8605Smrg */ 229848b8605Smrg 230848b8605Smrg#define div_round_up(v, a) (((v) + (a) - 1) / (a)) 231848b8605Smrg /* figure out number of tiles per pipe: */ 232b8e80941Smrg if (is_a20x(ctx->screen)) { 233b8e80941Smrg /* for a20x we want to minimize the number of "pipes" 234b8e80941Smrg * binning data has 3 bits for x/y (8x8) but the edges are used to 235b8e80941Smrg * cull off-screen vertices with hw binning, so we have 6x6 pipes 236b8e80941Smrg */ 237b8e80941Smrg tpp_x = 6; 238b8e80941Smrg tpp_y = 6; 239b8e80941Smrg } else { 240b8e80941Smrg tpp_x = tpp_y = 1; 241b8e80941Smrg while (div_round_up(nbins_y, tpp_y) > screen->num_vsc_pipes) 242b8e80941Smrg tpp_y += 2; 243b8e80941Smrg while ((div_round_up(nbins_y, tpp_y) * 244b8e80941Smrg div_round_up(nbins_x, tpp_x)) > screen->num_vsc_pipes) 245b8e80941Smrg tpp_x += 1; 246b8e80941Smrg } 247b8e80941Smrg 248b8e80941Smrg gmem->maxpw = tpp_x; 249b8e80941Smrg gmem->maxph = tpp_y; 250848b8605Smrg 251848b8605Smrg /* configure pipes: */ 252848b8605Smrg xoff = yoff = 0; 253b8e80941Smrg for (i = 0; i < npipes; i++) { 254b8e80941Smrg struct fd_vsc_pipe *pipe = &ctx->vsc_pipe[i]; 255848b8605Smrg 256848b8605Smrg if (xoff >= nbins_x) { 257848b8605Smrg xoff = 0; 258848b8605Smrg yoff += tpp_y; 259848b8605Smrg } 260848b8605Smrg 261848b8605Smrg if (yoff >= nbins_y) { 262848b8605Smrg break; 263848b8605Smrg } 264848b8605Smrg 265848b8605Smrg pipe->x = xoff; 266848b8605Smrg pipe->y = yoff; 267848b8605Smrg pipe->w = MIN2(tpp_x, nbins_x - xoff); 268848b8605Smrg pipe->h = MIN2(tpp_y, nbins_y - yoff); 269848b8605Smrg 270848b8605Smrg xoff += tpp_x; 271848b8605Smrg } 272848b8605Smrg 273b8e80941Smrg /* number of pipes to use for a20x */ 274b8e80941Smrg gmem->num_vsc_pipes = MAX2(1, i); 275b8e80941Smrg 276b8e80941Smrg for (; i < npipes; i++) { 277b8e80941Smrg struct fd_vsc_pipe *pipe = &ctx->vsc_pipe[i]; 278848b8605Smrg pipe->x = pipe->y = pipe->w = pipe->h = 0; 279848b8605Smrg } 280848b8605Smrg 281848b8605Smrg#if 0 /* debug */ 282848b8605Smrg printf("%dx%d ... tpp=%dx%d\n", nbins_x, nbins_y, tpp_x, tpp_y); 283848b8605Smrg for (i = 0; i < 8; i++) { 284848b8605Smrg struct fd_vsc_pipe *pipe = &ctx->pipe[i]; 285848b8605Smrg printf("pipe[%d]: %ux%u @ %u,%u\n", i, 286848b8605Smrg pipe->w, pipe->h, pipe->x, pipe->y); 287848b8605Smrg } 288848b8605Smrg#endif 289848b8605Smrg 290848b8605Smrg /* configure tiles: */ 291848b8605Smrg t = 0; 292848b8605Smrg yoff = miny; 293b8e80941Smrg memset(tile_n, 0, sizeof(tile_n)); 294848b8605Smrg for (i = 0; i < nbins_y; i++) { 295848b8605Smrg uint32_t bw, bh; 296848b8605Smrg 297848b8605Smrg xoff = minx; 298848b8605Smrg 299848b8605Smrg /* clip bin height: */ 300848b8605Smrg bh = MIN2(bin_h, miny + height - yoff); 301848b8605Smrg 302848b8605Smrg for (j = 0; j < nbins_x; j++) { 303848b8605Smrg struct fd_tile *tile = &ctx->tile[t]; 304b8e80941Smrg uint32_t p; 305848b8605Smrg 306848b8605Smrg assert(t < ARRAY_SIZE(ctx->tile)); 307848b8605Smrg 308848b8605Smrg /* pipe number: */ 309848b8605Smrg p = ((i / tpp_y) * div_round_up(nbins_x, tpp_x)) + (j / tpp_x); 310b8e80941Smrg assert(p < gmem->num_vsc_pipes); 311848b8605Smrg 312848b8605Smrg /* clip bin width: */ 313848b8605Smrg bw = MIN2(bin_w, minx + width - xoff); 314b8e80941Smrg tile->n = !is_a20x(ctx->screen) ? tile_n[p]++ : 315b8e80941Smrg ((i % tpp_y + 1) << 3 | (j % tpp_x + 1)); 316848b8605Smrg tile->p = p; 317848b8605Smrg tile->bin_w = bw; 318848b8605Smrg tile->bin_h = bh; 319848b8605Smrg tile->xoff = xoff; 320848b8605Smrg tile->yoff = yoff; 321848b8605Smrg 322848b8605Smrg t++; 323848b8605Smrg 324848b8605Smrg xoff += bw; 325848b8605Smrg } 326848b8605Smrg 327848b8605Smrg yoff += bh; 328848b8605Smrg } 329848b8605Smrg 330848b8605Smrg#if 0 /* debug */ 331848b8605Smrg t = 0; 332848b8605Smrg for (i = 0; i < nbins_y; i++) { 333848b8605Smrg for (j = 0; j < nbins_x; j++) { 334848b8605Smrg struct fd_tile *tile = &ctx->tile[t++]; 335848b8605Smrg printf("|p:%u n:%u|", tile->p, tile->n); 336848b8605Smrg } 337848b8605Smrg printf("\n"); 338848b8605Smrg } 339848b8605Smrg#endif 340848b8605Smrg} 341848b8605Smrg 342848b8605Smrgstatic void 343b8e80941Smrgrender_tiles(struct fd_batch *batch) 344848b8605Smrg{ 345b8e80941Smrg struct fd_context *ctx = batch->ctx; 346848b8605Smrg struct fd_gmem_stateobj *gmem = &ctx->gmem; 347848b8605Smrg int i; 348848b8605Smrg 349b8e80941Smrg ctx->emit_tile_init(batch); 350848b8605Smrg 351b8e80941Smrg if (batch->restore) 352848b8605Smrg ctx->stats.batch_restore++; 353848b8605Smrg 354848b8605Smrg for (i = 0; i < (gmem->nbins_x * gmem->nbins_y); i++) { 355848b8605Smrg struct fd_tile *tile = &ctx->tile[i]; 356848b8605Smrg 357848b8605Smrg DBG("bin_h=%d, yoff=%d, bin_w=%d, xoff=%d", 358848b8605Smrg tile->bin_h, tile->yoff, tile->bin_w, tile->xoff); 359848b8605Smrg 360b8e80941Smrg ctx->emit_tile_prep(batch, tile); 361848b8605Smrg 362b8e80941Smrg if (batch->restore) { 363b8e80941Smrg ctx->emit_tile_mem2gmem(batch, tile); 364848b8605Smrg } 365848b8605Smrg 366b8e80941Smrg ctx->emit_tile_renderprep(batch, tile); 367848b8605Smrg 368b8e80941Smrg if (ctx->query_prepare_tile) 369b8e80941Smrg ctx->query_prepare_tile(batch, i, batch->gmem); 370848b8605Smrg 371848b8605Smrg /* emit IB to drawcmds: */ 372b8e80941Smrg ctx->emit_ib(batch->gmem, batch->draw); 373b8e80941Smrg fd_reset_wfi(batch); 374848b8605Smrg 375848b8605Smrg /* emit gmem2mem to transfer tile back to system memory: */ 376b8e80941Smrg ctx->emit_tile_gmem2mem(batch, tile); 377848b8605Smrg } 378b8e80941Smrg 379b8e80941Smrg if (ctx->emit_tile_fini) 380b8e80941Smrg ctx->emit_tile_fini(batch); 381848b8605Smrg} 382848b8605Smrg 383848b8605Smrgstatic void 384b8e80941Smrgrender_sysmem(struct fd_batch *batch) 385848b8605Smrg{ 386b8e80941Smrg struct fd_context *ctx = batch->ctx; 387b8e80941Smrg 388b8e80941Smrg ctx->emit_sysmem_prep(batch); 389848b8605Smrg 390b8e80941Smrg if (ctx->query_prepare_tile) 391b8e80941Smrg ctx->query_prepare_tile(batch, 0, batch->gmem); 392848b8605Smrg 393848b8605Smrg /* emit IB to drawcmds: */ 394b8e80941Smrg ctx->emit_ib(batch->gmem, batch->draw); 395b8e80941Smrg fd_reset_wfi(batch); 396b8e80941Smrg 397b8e80941Smrg if (ctx->emit_sysmem_fini) 398b8e80941Smrg ctx->emit_sysmem_fini(batch); 399b8e80941Smrg} 400b8e80941Smrg 401b8e80941Smrgstatic void 402b8e80941Smrgflush_ring(struct fd_batch *batch) 403b8e80941Smrg{ 404b8e80941Smrg uint32_t timestamp; 405b8e80941Smrg int out_fence_fd = -1; 406b8e80941Smrg 407b8e80941Smrg fd_submit_flush(batch->submit, batch->in_fence_fd, 408b8e80941Smrg batch->needs_out_fence_fd ? &out_fence_fd : NULL, 409b8e80941Smrg ×tamp); 410b8e80941Smrg 411b8e80941Smrg fd_fence_populate(batch->fence, timestamp, out_fence_fd); 412848b8605Smrg} 413848b8605Smrg 414848b8605Smrgvoid 415b8e80941Smrgfd_gmem_render_tiles(struct fd_batch *batch) 416848b8605Smrg{ 417b8e80941Smrg struct fd_context *ctx = batch->ctx; 418b8e80941Smrg struct pipe_framebuffer_state *pfb = &batch->framebuffer; 419848b8605Smrg bool sysmem = false; 420848b8605Smrg 421b8e80941Smrg if (ctx->emit_sysmem_prep && !batch->nondraw) { 422b8e80941Smrg if (batch->cleared || batch->gmem_reason || 423b8e80941Smrg ((batch->num_draws > 5) && !batch->blit) || 424b8e80941Smrg (pfb->samples > 1)) { 425b8e80941Smrg DBG("GMEM: cleared=%x, gmem_reason=%x, num_draws=%u, samples=%u", 426b8e80941Smrg batch->cleared, batch->gmem_reason, batch->num_draws, 427b8e80941Smrg pfb->samples); 428b8e80941Smrg } else if (!(fd_mesa_debug & FD_DBG_NOBYPASS)) { 429848b8605Smrg sysmem = true; 430848b8605Smrg } 431848b8605Smrg 432b8e80941Smrg /* For ARB_framebuffer_no_attachments: */ 433b8e80941Smrg if ((pfb->nr_cbufs == 0) && !pfb->zsbuf) { 434b8e80941Smrg sysmem = true; 435b8e80941Smrg } 436b8e80941Smrg } 437848b8605Smrg 438b8e80941Smrg fd_reset_wfi(batch); 439848b8605Smrg 440848b8605Smrg ctx->stats.batch_total++; 441848b8605Smrg 442b8e80941Smrg if (batch->nondraw) { 443b8e80941Smrg DBG("%p: rendering non-draw", batch); 444b8e80941Smrg ctx->stats.batch_nondraw++; 445b8e80941Smrg } else if (sysmem) { 446b8e80941Smrg DBG("%p: rendering sysmem %ux%u (%s/%s), num_draws=%u", 447b8e80941Smrg batch, pfb->width, pfb->height, 448848b8605Smrg util_format_short_name(pipe_surface_format(pfb->cbufs[0])), 449b8e80941Smrg util_format_short_name(pipe_surface_format(pfb->zsbuf)), 450b8e80941Smrg batch->num_draws); 451b8e80941Smrg if (ctx->query_prepare) 452b8e80941Smrg ctx->query_prepare(batch, 1); 453b8e80941Smrg render_sysmem(batch); 454848b8605Smrg ctx->stats.batch_sysmem++; 455848b8605Smrg } else { 456848b8605Smrg struct fd_gmem_stateobj *gmem = &ctx->gmem; 457b8e80941Smrg calculate_tiles(batch); 458b8e80941Smrg DBG("%p: rendering %dx%d tiles %ux%u (%s/%s)", 459b8e80941Smrg batch, pfb->width, pfb->height, gmem->nbins_x, gmem->nbins_y, 460848b8605Smrg util_format_short_name(pipe_surface_format(pfb->cbufs[0])), 461848b8605Smrg util_format_short_name(pipe_surface_format(pfb->zsbuf))); 462b8e80941Smrg if (ctx->query_prepare) 463b8e80941Smrg ctx->query_prepare(batch, gmem->nbins_x * gmem->nbins_y); 464b8e80941Smrg render_tiles(batch); 465848b8605Smrg ctx->stats.batch_gmem++; 466848b8605Smrg } 467848b8605Smrg 468b8e80941Smrg flush_ring(batch); 469848b8605Smrg} 470848b8605Smrg 471848b8605Smrg/* When deciding whether a tile needs mem2gmem, we need to take into 472848b8605Smrg * account the scissor rect(s) that were cleared. To simplify we only 473848b8605Smrg * consider the last scissor rect for each buffer, since the common 474848b8605Smrg * case would be a single clear. 475848b8605Smrg */ 476848b8605Smrgbool 477b8e80941Smrgfd_gmem_needs_restore(struct fd_batch *batch, struct fd_tile *tile, 478848b8605Smrg uint32_t buffers) 479848b8605Smrg{ 480b8e80941Smrg if (!(batch->restore & buffers)) 481848b8605Smrg return false; 482848b8605Smrg 483848b8605Smrg return true; 484848b8605Smrg} 485