1/* 2 * Copyright (C) 2012 Rob Clark <robclark@freedesktop.org> 3 * 4 * Permission is hereby granted, free of charge, to any person obtaining a 5 * copy of this software and associated documentation files (the "Software"), 6 * to deal in the Software without restriction, including without limitation 7 * the rights to use, copy, modify, merge, publish, distribute, sublicense, 8 * and/or sell copies of the Software, and to permit persons to whom the 9 * Software is furnished to do so, subject to the following conditions: 10 * 11 * The above copyright notice and this permission notice (including the next 12 * paragraph) shall be included in all copies or substantial portions of the 13 * Software. 14 * 15 * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR 16 * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, 17 * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL 18 * THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER 19 * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, 20 * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE 21 * SOFTWARE. 22 * 23 * Authors: 24 * Rob Clark <robclark@freedesktop.org> 25 */ 26 27#include "util/format/u_format.h" 28#include "util/format/u_format_rgtc.h" 29#include "util/format/u_format_zs.h" 30#include "util/set.h" 31#include "util/u_drm.h" 32#include "util/u_inlines.h" 33#include "util/u_string.h" 34#include "util/u_surface.h" 35#include "util/u_transfer.h" 36 37#include "decode/util.h" 38 39#include "freedreno_batch_cache.h" 40#include "freedreno_blitter.h" 41#include "freedreno_context.h" 42#include "freedreno_fence.h" 43#include "freedreno_query_hw.h" 44#include "freedreno_resource.h" 45#include "freedreno_screen.h" 46#include "freedreno_surface.h" 47#include "freedreno_util.h" 48 49#include <errno.h> 50#include "drm-uapi/drm_fourcc.h" 51 52/* XXX this should go away, needed for 'struct winsys_handle' */ 53#include "frontend/drm_driver.h" 54 55/* A private modifier for now, so we have a way to request tiled but not 56 * compressed. It would perhaps be good to get real modifiers for the 57 * tiled formats, but would probably need to do some work to figure out 58 * the layout(s) of the tiled modes, and whether they are the same 59 * across generations. 60 */ 61#define FD_FORMAT_MOD_QCOM_TILED fourcc_mod_code(QCOM, 0xffffffff) 62 63/** 64 * Go through the entire state and see if the resource is bound 65 * anywhere. If it is, mark the relevant state as dirty. This is 66 * called on realloc_bo to ensure the necessary state is re- 67 * emitted so the GPU looks at the new backing bo. 68 */ 69static void 70rebind_resource_in_ctx(struct fd_context *ctx, 71 struct fd_resource *rsc) assert_dt 72{ 73 struct pipe_resource *prsc = &rsc->b.b; 74 75 if (ctx->rebind_resource) 76 ctx->rebind_resource(ctx, rsc); 77 78 /* VBOs */ 79 if (rsc->dirty & FD_DIRTY_VTXBUF) { 80 struct fd_vertexbuf_stateobj *vb = &ctx->vtx.vertexbuf; 81 for (unsigned i = 0; i < vb->count && !(ctx->dirty & FD_DIRTY_VTXBUF); 82 i++) { 83 if (vb->vb[i].buffer.resource == prsc) 84 fd_context_dirty(ctx, FD_DIRTY_VTXBUF); 85 } 86 } 87 88 const enum fd_dirty_3d_state per_stage_dirty = 89 FD_DIRTY_CONST | FD_DIRTY_TEX | FD_DIRTY_IMAGE | FD_DIRTY_SSBO; 90 91 if (!(rsc->dirty & per_stage_dirty)) 92 return; 93 94 /* per-shader-stage resources: */ 95 for (unsigned stage = 0; stage < PIPE_SHADER_TYPES; stage++) { 96 /* Constbufs.. note that constbuf[0] is normal uniforms emitted in 97 * cmdstream rather than by pointer.. 98 */ 99 if ((rsc->dirty & FD_DIRTY_CONST) && 100 !(ctx->dirty_shader[stage] & FD_DIRTY_CONST)) { 101 struct fd_constbuf_stateobj *cb = &ctx->constbuf[stage]; 102 const unsigned num_ubos = util_last_bit(cb->enabled_mask); 103 for (unsigned i = 1; i < num_ubos; i++) { 104 if (cb->cb[i].buffer == prsc) { 105 fd_context_dirty_shader(ctx, stage, FD_DIRTY_SHADER_CONST); 106 break; 107 } 108 } 109 } 110 111 /* Textures */ 112 if ((rsc->dirty & FD_DIRTY_TEX) && 113 !(ctx->dirty_shader[stage] & FD_DIRTY_TEX)) { 114 struct fd_texture_stateobj *tex = &ctx->tex[stage]; 115 for (unsigned i = 0; i < tex->num_textures; i++) { 116 if (tex->textures[i] && (tex->textures[i]->texture == prsc)) { 117 fd_context_dirty_shader(ctx, stage, FD_DIRTY_SHADER_TEX); 118 break; 119 } 120 } 121 } 122 123 /* Images */ 124 if ((rsc->dirty & FD_DIRTY_IMAGE) && 125 !(ctx->dirty_shader[stage] & FD_DIRTY_IMAGE)) { 126 struct fd_shaderimg_stateobj *si = &ctx->shaderimg[stage]; 127 const unsigned num_images = util_last_bit(si->enabled_mask); 128 for (unsigned i = 0; i < num_images; i++) { 129 if (si->si[i].resource == prsc) { 130 fd_context_dirty_shader(ctx, stage, FD_DIRTY_SHADER_IMAGE); 131 break; 132 } 133 } 134 } 135 136 /* SSBOs */ 137 if ((rsc->dirty & FD_DIRTY_SSBO) && 138 !(ctx->dirty_shader[stage] & FD_DIRTY_SSBO)) { 139 struct fd_shaderbuf_stateobj *sb = &ctx->shaderbuf[stage]; 140 const unsigned num_ssbos = util_last_bit(sb->enabled_mask); 141 for (unsigned i = 0; i < num_ssbos; i++) { 142 if (sb->sb[i].buffer == prsc) { 143 fd_context_dirty_shader(ctx, stage, FD_DIRTY_SHADER_SSBO); 144 break; 145 } 146 } 147 } 148 } 149} 150 151static void 152rebind_resource(struct fd_resource *rsc) assert_dt 153{ 154 struct fd_screen *screen = fd_screen(rsc->b.b.screen); 155 156 fd_screen_lock(screen); 157 fd_resource_lock(rsc); 158 159 if (rsc->dirty) 160 list_for_each_entry (struct fd_context, ctx, &screen->context_list, node) 161 rebind_resource_in_ctx(ctx, rsc); 162 163 fd_resource_unlock(rsc); 164 fd_screen_unlock(screen); 165} 166 167static inline void 168fd_resource_set_bo(struct fd_resource *rsc, struct fd_bo *bo) 169{ 170 struct fd_screen *screen = fd_screen(rsc->b.b.screen); 171 172 rsc->bo = bo; 173 rsc->seqno = p_atomic_inc_return(&screen->rsc_seqno); 174} 175 176int 177__fd_resource_wait(struct fd_context *ctx, struct fd_resource *rsc, unsigned op, 178 const char *func) 179{ 180 if (op & FD_BO_PREP_NOSYNC) 181 return fd_bo_cpu_prep(rsc->bo, ctx->pipe, op); 182 183 int ret; 184 185 perf_time_ctx (ctx, 10000, "%s: a busy \"%" PRSC_FMT "\" BO stalled", func, 186 PRSC_ARGS(&rsc->b.b)) { 187 ret = fd_bo_cpu_prep(rsc->bo, ctx->pipe, op); 188 } 189 190 return ret; 191} 192 193static void 194realloc_bo(struct fd_resource *rsc, uint32_t size) 195{ 196 struct pipe_resource *prsc = &rsc->b.b; 197 struct fd_screen *screen = fd_screen(rsc->b.b.screen); 198 uint32_t flags = 199 COND(prsc->usage & PIPE_USAGE_STAGING, FD_BO_CACHED_COHERENT) | 200 COND(prsc->bind & PIPE_BIND_SCANOUT, FD_BO_SCANOUT); 201 /* TODO other flags? */ 202 203 /* if we start using things other than write-combine, 204 * be sure to check for PIPE_RESOURCE_FLAG_MAP_COHERENT 205 */ 206 207 if (rsc->bo) 208 fd_bo_del(rsc->bo); 209 210 struct fd_bo *bo = 211 fd_bo_new(screen->dev, size, flags, "%ux%ux%u@%u:%x", prsc->width0, 212 prsc->height0, prsc->depth0, rsc->layout.cpp, prsc->bind); 213 fd_resource_set_bo(rsc, bo); 214 215 /* Zero out the UBWC area on allocation. This fixes intermittent failures 216 * with UBWC, which I suspect are due to the HW having a hard time 217 * interpreting arbitrary values populating the flags buffer when the BO 218 * was recycled through the bo cache (instead of fresh allocations from 219 * the kernel, which are zeroed). sleep(1) in this spot didn't work 220 * around the issue, but any memset value seems to. 221 */ 222 if (rsc->layout.ubwc) { 223 rsc->needs_ubwc_clear = true; 224 } 225 226 util_range_set_empty(&rsc->valid_buffer_range); 227 fd_bc_invalidate_resource(rsc, true); 228} 229 230static void 231do_blit(struct fd_context *ctx, const struct pipe_blit_info *blit, 232 bool fallback) assert_dt 233{ 234 struct pipe_context *pctx = &ctx->base; 235 236 assert(!ctx->in_blit); 237 ctx->in_blit = true; 238 239 /* TODO size threshold too?? */ 240 if (fallback || !fd_blit(pctx, blit)) { 241 /* do blit on cpu: */ 242 util_resource_copy_region(pctx, blit->dst.resource, blit->dst.level, 243 blit->dst.box.x, blit->dst.box.y, 244 blit->dst.box.z, blit->src.resource, 245 blit->src.level, &blit->src.box); 246 } 247 248 ctx->in_blit = false; 249} 250 251/** 252 * Replace the storage of dst with src. This is only used by TC in the 253 * DISCARD_WHOLE_RESOURCE path, and src is a freshly allocated buffer. 254 */ 255void 256fd_replace_buffer_storage(struct pipe_context *pctx, struct pipe_resource *pdst, 257 struct pipe_resource *psrc, unsigned num_rebinds, uint32_t rebind_mask, 258 uint32_t delete_buffer_id) 259{ 260 struct fd_context *ctx = fd_context(pctx); 261 struct fd_resource *dst = fd_resource(pdst); 262 struct fd_resource *src = fd_resource(psrc); 263 264 DBG("pdst=%p, psrc=%p", pdst, psrc); 265 266 /* This should only be called with buffers.. which side-steps some tricker 267 * cases, like a rsc that is in a batch-cache key... 268 */ 269 assert(pdst->target == PIPE_BUFFER); 270 assert(psrc->target == PIPE_BUFFER); 271 assert(dst->track->bc_batch_mask == 0); 272 assert(src->track->bc_batch_mask == 0); 273 assert(src->track->batch_mask == 0); 274 assert(src->track->write_batch == NULL); 275 assert(memcmp(&dst->layout, &src->layout, sizeof(dst->layout)) == 0); 276 277 /* get rid of any references that batch-cache might have to us (which 278 * should empty/destroy rsc->batches hashset) 279 * 280 * Note that we aren't actually destroying dst, but we are replacing 281 * it's storage so we want to go thru the same motions of decoupling 282 * it's batch connections. 283 */ 284 fd_bc_invalidate_resource(dst, true); 285 rebind_resource(dst); 286 287 util_idalloc_mt_free(&ctx->screen->buffer_ids, delete_buffer_id); 288 289 fd_screen_lock(ctx->screen); 290 291 fd_bo_del(dst->bo); 292 dst->bo = fd_bo_ref(src->bo); 293 294 fd_resource_tracking_reference(&dst->track, src->track); 295 src->is_replacement = true; 296 297 dst->seqno = p_atomic_inc_return(&ctx->screen->rsc_seqno); 298 299 fd_screen_unlock(ctx->screen); 300} 301 302static unsigned 303translate_usage(unsigned usage) 304{ 305 uint32_t op = 0; 306 307 if (usage & PIPE_MAP_READ) 308 op |= FD_BO_PREP_READ; 309 310 if (usage & PIPE_MAP_WRITE) 311 op |= FD_BO_PREP_WRITE; 312 313 return op; 314} 315 316bool 317fd_resource_busy(struct pipe_screen *pscreen, struct pipe_resource *prsc, 318 unsigned usage) 319{ 320 struct fd_resource *rsc = fd_resource(prsc); 321 322 if (pending(rsc, !!(usage & PIPE_MAP_WRITE))) 323 return true; 324 325 if (resource_busy(rsc, translate_usage(usage))) 326 return true; 327 328 return false; 329} 330 331static void flush_resource(struct fd_context *ctx, struct fd_resource *rsc, 332 unsigned usage); 333 334/** 335 * Helper to check if the format is something that we can blit/render 336 * to.. if the format is not renderable, there is no point in trying 337 * to do a staging blit (as it will still end up being a cpu copy) 338 */ 339static bool 340is_renderable(struct pipe_resource *prsc) 341{ 342 struct pipe_screen *pscreen = prsc->screen; 343 return pscreen->is_format_supported( 344 pscreen, prsc->format, prsc->target, prsc->nr_samples, 345 prsc->nr_storage_samples, PIPE_BIND_RENDER_TARGET); 346} 347 348/** 349 * @rsc: the resource to shadow 350 * @level: the level to discard (if box != NULL, otherwise ignored) 351 * @box: the box to discard (or NULL if none) 352 * @modifier: the modifier for the new buffer state 353 */ 354static bool 355fd_try_shadow_resource(struct fd_context *ctx, struct fd_resource *rsc, 356 unsigned level, const struct pipe_box *box, 357 uint64_t modifier) assert_dt 358{ 359 struct pipe_context *pctx = &ctx->base; 360 struct pipe_resource *prsc = &rsc->b.b; 361 struct fd_screen *screen = fd_screen(pctx->screen); 362 struct fd_batch *batch; 363 bool fallback = false; 364 365 if (prsc->next) 366 return false; 367 368 /* Flush any pending batches writing the resource before we go mucking around 369 * in its insides. The blit would immediately cause the batch to be flushed, 370 * anyway. 371 */ 372 fd_bc_flush_writer(ctx, rsc); 373 374 /* Because IB1 ("gmem") cmdstream is built only when we flush the 375 * batch, we need to flush any batches that reference this rsc as 376 * a render target. Otherwise the framebuffer state emitted in 377 * IB1 will reference the resources new state, and not the state 378 * at the point in time that the earlier draws referenced it. 379 * 380 * Note that being in the gmem key doesn't necessarily mean the 381 * batch was considered a writer! 382 */ 383 foreach_batch (batch, &screen->batch_cache, rsc->track->bc_batch_mask) { 384 fd_batch_flush(batch); 385 } 386 387 /* TODO: somehow munge dimensions and format to copy unsupported 388 * render target format to something that is supported? 389 */ 390 if (!is_renderable(prsc)) 391 fallback = true; 392 393 /* do shadowing back-blits on the cpu for buffers -- requires about a page of 394 * DMA to make GPU copies worth it according to robclark. Note, if you 395 * decide to do it on the GPU then you'll need to update valid_buffer_range 396 * in the swap()s below. 397 */ 398 if (prsc->target == PIPE_BUFFER) 399 fallback = true; 400 401 bool discard_whole_level = box && util_texrange_covers_whole_level( 402 prsc, level, box->x, box->y, box->z, 403 box->width, box->height, box->depth); 404 405 /* TODO need to be more clever about current level */ 406 if ((prsc->target >= PIPE_TEXTURE_2D) && box && !discard_whole_level) 407 return false; 408 409 struct pipe_resource *pshadow = pctx->screen->resource_create_with_modifiers( 410 pctx->screen, prsc, &modifier, 1); 411 412 if (!pshadow) 413 return false; 414 415 assert(!ctx->in_shadow); 416 ctx->in_shadow = true; 417 418 /* get rid of any references that batch-cache might have to us (which 419 * should empty/destroy rsc->batches hashset) 420 */ 421 fd_bc_invalidate_resource(rsc, false); 422 rebind_resource(rsc); 423 424 fd_screen_lock(ctx->screen); 425 426 /* Swap the backing bo's, so shadow becomes the old buffer, 427 * blit from shadow to new buffer. From here on out, we 428 * cannot fail. 429 * 430 * Note that we need to do it in this order, otherwise if 431 * we go down cpu blit path, the recursive transfer_map() 432 * sees the wrong status.. 433 */ 434 struct fd_resource *shadow = fd_resource(pshadow); 435 436 DBG("shadow: %p (%d, %p) -> %p (%d, %p)", rsc, rsc->b.b.reference.count, 437 rsc->track, shadow, shadow->b.b.reference.count, shadow->track); 438 439 swap(rsc->bo, shadow->bo); 440 swap(rsc->valid, shadow->valid); 441 442 /* swap() doesn't work because you can't typeof() the bitfield. */ 443 bool temp = shadow->needs_ubwc_clear; 444 shadow->needs_ubwc_clear = rsc->needs_ubwc_clear; 445 rsc->needs_ubwc_clear = temp; 446 447 swap(rsc->layout, shadow->layout); 448 rsc->seqno = p_atomic_inc_return(&ctx->screen->rsc_seqno); 449 450 /* at this point, the newly created shadow buffer is not referenced 451 * by any batches, but the existing rsc (probably) is. We need to 452 * transfer those references over: 453 */ 454 debug_assert(shadow->track->batch_mask == 0); 455 foreach_batch (batch, &ctx->screen->batch_cache, rsc->track->batch_mask) { 456 struct set_entry *entry = _mesa_set_search_pre_hashed(batch->resources, rsc->hash, rsc); 457 _mesa_set_remove(batch->resources, entry); 458 _mesa_set_add_pre_hashed(batch->resources, shadow->hash, shadow); 459 } 460 swap(rsc->track, shadow->track); 461 462 fd_screen_unlock(ctx->screen); 463 464 struct pipe_blit_info blit = {}; 465 blit.dst.resource = prsc; 466 blit.dst.format = prsc->format; 467 blit.src.resource = pshadow; 468 blit.src.format = pshadow->format; 469 blit.mask = util_format_get_mask(prsc->format); 470 blit.filter = PIPE_TEX_FILTER_NEAREST; 471 472#define set_box(field, val) \ 473 do { \ 474 blit.dst.field = (val); \ 475 blit.src.field = (val); \ 476 } while (0) 477 478 /* Disable occlusion queries during shadow blits. */ 479 bool saved_active_queries = ctx->active_queries; 480 pctx->set_active_query_state(pctx, false); 481 482 /* blit the other levels in their entirety: */ 483 for (unsigned l = 0; l <= prsc->last_level; l++) { 484 if (box && l == level) 485 continue; 486 487 /* just blit whole level: */ 488 set_box(level, l); 489 set_box(box.width, u_minify(prsc->width0, l)); 490 set_box(box.height, u_minify(prsc->height0, l)); 491 set_box(box.depth, u_minify(prsc->depth0, l)); 492 493 for (int i = 0; i < prsc->array_size; i++) { 494 set_box(box.z, i); 495 do_blit(ctx, &blit, fallback); 496 } 497 } 498 499 /* deal w/ current level specially, since we might need to split 500 * it up into a couple blits: 501 */ 502 if (box && !discard_whole_level) { 503 set_box(level, level); 504 505 switch (prsc->target) { 506 case PIPE_BUFFER: 507 case PIPE_TEXTURE_1D: 508 set_box(box.y, 0); 509 set_box(box.z, 0); 510 set_box(box.height, 1); 511 set_box(box.depth, 1); 512 513 if (box->x > 0) { 514 set_box(box.x, 0); 515 set_box(box.width, box->x); 516 517 do_blit(ctx, &blit, fallback); 518 } 519 if ((box->x + box->width) < u_minify(prsc->width0, level)) { 520 set_box(box.x, box->x + box->width); 521 set_box(box.width, 522 u_minify(prsc->width0, level) - (box->x + box->width)); 523 524 do_blit(ctx, &blit, fallback); 525 } 526 break; 527 case PIPE_TEXTURE_2D: 528 /* TODO */ 529 default: 530 unreachable("TODO"); 531 } 532 } 533 534 pctx->set_active_query_state(pctx, saved_active_queries); 535 536 ctx->in_shadow = false; 537 538 pipe_resource_reference(&pshadow, NULL); 539 540 return true; 541} 542 543/** 544 * Uncompress an UBWC compressed buffer "in place". This works basically 545 * like resource shadowing, creating a new resource, and doing an uncompress 546 * blit, and swapping the state between shadow and original resource so it 547 * appears to the gallium frontends as if nothing changed. 548 */ 549void 550fd_resource_uncompress(struct fd_context *ctx, struct fd_resource *rsc, bool linear) 551{ 552 tc_assert_driver_thread(ctx->tc); 553 554 uint64_t modifier = linear ? DRM_FORMAT_MOD_LINEAR : FD_FORMAT_MOD_QCOM_TILED; 555 556 bool success = fd_try_shadow_resource(ctx, rsc, 0, NULL, modifier); 557 558 /* shadow should not fail in any cases where we need to uncompress: */ 559 debug_assert(success); 560} 561 562/** 563 * Debug helper to hexdump a resource. 564 */ 565void 566fd_resource_dump(struct fd_resource *rsc, const char *name) 567{ 568 fd_bo_cpu_prep(rsc->bo, NULL, FD_BO_PREP_READ); 569 printf("%s: \n", name); 570 dump_hex(fd_bo_map(rsc->bo), fd_bo_size(rsc->bo)); 571} 572 573static struct fd_resource * 574fd_alloc_staging(struct fd_context *ctx, struct fd_resource *rsc, 575 unsigned level, const struct pipe_box *box) 576 assert_dt 577{ 578 struct pipe_context *pctx = &ctx->base; 579 struct pipe_resource tmpl = rsc->b.b; 580 581 /* We cannot currently do stencil export on earlier gens, and 582 * u_blitter cannot do blits involving stencil otherwise: 583 */ 584 if ((ctx->screen->gen < 6) && !ctx->blit && 585 (util_format_get_mask(tmpl.format) & PIPE_MASK_S)) 586 return NULL; 587 588 tmpl.width0 = box->width; 589 tmpl.height0 = box->height; 590 /* for array textures, box->depth is the array_size, otherwise 591 * for 3d textures, it is the depth: 592 */ 593 if (tmpl.array_size > 1) { 594 if (tmpl.target == PIPE_TEXTURE_CUBE) 595 tmpl.target = PIPE_TEXTURE_2D_ARRAY; 596 tmpl.array_size = box->depth; 597 tmpl.depth0 = 1; 598 } else { 599 tmpl.array_size = 1; 600 tmpl.depth0 = box->depth; 601 } 602 tmpl.last_level = 0; 603 tmpl.bind |= PIPE_BIND_LINEAR; 604 tmpl.usage = PIPE_USAGE_STAGING; 605 606 struct pipe_resource *pstaging = 607 pctx->screen->resource_create(pctx->screen, &tmpl); 608 if (!pstaging) 609 return NULL; 610 611 return fd_resource(pstaging); 612} 613 614static void 615fd_blit_from_staging(struct fd_context *ctx, 616 struct fd_transfer *trans) assert_dt 617{ 618 DBG(""); 619 struct pipe_resource *dst = trans->b.b.resource; 620 struct pipe_blit_info blit = {}; 621 622 blit.dst.resource = dst; 623 blit.dst.format = dst->format; 624 blit.dst.level = trans->b.b.level; 625 blit.dst.box = trans->b.b.box; 626 blit.src.resource = trans->staging_prsc; 627 blit.src.format = trans->staging_prsc->format; 628 blit.src.level = 0; 629 blit.src.box = trans->staging_box; 630 blit.mask = util_format_get_mask(trans->staging_prsc->format); 631 blit.filter = PIPE_TEX_FILTER_NEAREST; 632 633 do_blit(ctx, &blit, false); 634} 635 636static void 637fd_blit_to_staging(struct fd_context *ctx, struct fd_transfer *trans) assert_dt 638{ 639 DBG(""); 640 struct pipe_resource *src = trans->b.b.resource; 641 struct pipe_blit_info blit = {}; 642 643 blit.src.resource = src; 644 blit.src.format = src->format; 645 blit.src.level = trans->b.b.level; 646 blit.src.box = trans->b.b.box; 647 blit.dst.resource = trans->staging_prsc; 648 blit.dst.format = trans->staging_prsc->format; 649 blit.dst.level = 0; 650 blit.dst.box = trans->staging_box; 651 blit.mask = util_format_get_mask(trans->staging_prsc->format); 652 blit.filter = PIPE_TEX_FILTER_NEAREST; 653 654 do_blit(ctx, &blit, false); 655} 656 657static void 658fd_resource_transfer_flush_region(struct pipe_context *pctx, 659 struct pipe_transfer *ptrans, 660 const struct pipe_box *box) 661{ 662 struct fd_resource *rsc = fd_resource(ptrans->resource); 663 664 if (ptrans->resource->target == PIPE_BUFFER) 665 util_range_add(&rsc->b.b, &rsc->valid_buffer_range, 666 ptrans->box.x + box->x, 667 ptrans->box.x + box->x + box->width); 668} 669 670static void 671flush_resource(struct fd_context *ctx, struct fd_resource *rsc, 672 unsigned usage) assert_dt 673{ 674 if (usage & PIPE_MAP_WRITE) { 675 fd_bc_flush_readers(ctx, rsc); 676 } else { 677 fd_bc_flush_writer(ctx, rsc); 678 } 679} 680 681static void 682fd_flush_resource(struct pipe_context *pctx, struct pipe_resource *prsc) 683 in_dt 684{ 685 struct fd_context *ctx = fd_context(pctx); 686 struct fd_resource *rsc = fd_resource(prsc); 687 688 flush_resource(ctx, rsc, PIPE_MAP_READ); 689 690 /* If we had to flush a batch, make sure it makes it's way all the 691 * way to the kernel: 692 */ 693 fd_resource_wait(ctx, rsc, FD_BO_PREP_FLUSH); 694} 695 696static void 697fd_resource_transfer_unmap(struct pipe_context *pctx, 698 struct pipe_transfer *ptrans) 699 in_dt /* TODO for threaded-ctx we'll need to split out unsynchronized path */ 700{ 701 struct fd_context *ctx = fd_context(pctx); 702 struct fd_resource *rsc = fd_resource(ptrans->resource); 703 struct fd_transfer *trans = fd_transfer(ptrans); 704 705 if (trans->staging_prsc) { 706 if (ptrans->usage & PIPE_MAP_WRITE) 707 fd_blit_from_staging(ctx, trans); 708 pipe_resource_reference(&trans->staging_prsc, NULL); 709 } 710 711 if (!(ptrans->usage & PIPE_MAP_UNSYNCHRONIZED)) { 712 fd_bo_cpu_fini(rsc->bo); 713 } 714 715 util_range_add(&rsc->b.b, &rsc->valid_buffer_range, ptrans->box.x, 716 ptrans->box.x + ptrans->box.width); 717 718 pipe_resource_reference(&ptrans->resource, NULL); 719 720 assert(trans->b.staging == NULL); /* for threaded context only */ 721 722 /* Don't use pool_transfers_unsync. We are always in the driver 723 * thread. Freeing an object into a different pool is allowed. 724 */ 725 slab_free(&ctx->transfer_pool, ptrans); 726} 727 728static void 729invalidate_resource(struct fd_resource *rsc, unsigned usage) assert_dt 730{ 731 bool needs_flush = pending(rsc, !!(usage & PIPE_MAP_WRITE)); 732 unsigned op = translate_usage(usage); 733 734 if (needs_flush || resource_busy(rsc, op)) { 735 rebind_resource(rsc); 736 realloc_bo(rsc, fd_bo_size(rsc->bo)); 737 } else { 738 util_range_set_empty(&rsc->valid_buffer_range); 739 } 740} 741 742static void * 743resource_transfer_map_unsync(struct pipe_context *pctx, 744 struct pipe_resource *prsc, unsigned level, 745 unsigned usage, const struct pipe_box *box, 746 struct fd_transfer *trans) 747{ 748 struct fd_resource *rsc = fd_resource(prsc); 749 enum pipe_format format = prsc->format; 750 uint32_t offset; 751 char *buf; 752 753 buf = fd_bo_map(rsc->bo); 754 offset = box->y / util_format_get_blockheight(format) * trans->b.b.stride + 755 box->x / util_format_get_blockwidth(format) * rsc->layout.cpp + 756 fd_resource_offset(rsc, level, box->z); 757 758 if (usage & PIPE_MAP_WRITE) 759 rsc->valid = true; 760 761 return buf + offset; 762} 763 764/** 765 * Note, with threaded_context, resource_transfer_map() is only called 766 * in driver thread, but resource_transfer_map_unsync() can be called in 767 * either driver or frontend thread. 768 */ 769static void * 770resource_transfer_map(struct pipe_context *pctx, struct pipe_resource *prsc, 771 unsigned level, unsigned usage, 772 const struct pipe_box *box, 773 struct fd_transfer *trans) in_dt 774{ 775 struct fd_context *ctx = fd_context(pctx); 776 struct fd_resource *rsc = fd_resource(prsc); 777 char *buf; 778 int ret = 0; 779 780 tc_assert_driver_thread(ctx->tc); 781 782 /* Strip the read flag if the buffer has been invalidated (or is freshly 783 * created). Avoids extra staging blits of undefined data on glTexSubImage of 784 * a fresh DEPTH_COMPONENT or STENCIL_INDEX texture being stored as z24s8. 785 */ 786 if (!rsc->valid) 787 usage &= ~PIPE_MAP_READ; 788 789 /* we always need a staging texture for tiled buffers: 790 * 791 * TODO we might sometimes want to *also* shadow the resource to avoid 792 * splitting a batch.. for ex, mid-frame texture uploads to a tiled 793 * texture. 794 */ 795 if (rsc->layout.tile_mode) { 796 struct fd_resource *staging_rsc; 797 798 assert(prsc->target != PIPE_BUFFER); 799 800 staging_rsc = fd_alloc_staging(ctx, rsc, level, box); 801 if (staging_rsc) { 802 trans->staging_prsc = &staging_rsc->b.b; 803 trans->b.b.stride = fd_resource_pitch(staging_rsc, 0); 804 trans->b.b.layer_stride = fd_resource_layer_stride(staging_rsc, 0); 805 trans->staging_box = *box; 806 trans->staging_box.x = 0; 807 trans->staging_box.y = 0; 808 trans->staging_box.z = 0; 809 810 if (usage & PIPE_MAP_READ) { 811 fd_blit_to_staging(ctx, trans); 812 813 fd_resource_wait(ctx, staging_rsc, FD_BO_PREP_READ); 814 } 815 816 buf = fd_bo_map(staging_rsc->bo); 817 818 ctx->stats.staging_uploads++; 819 820 return buf; 821 } 822 } else if ((usage & PIPE_MAP_READ) && !fd_bo_is_cached(rsc->bo)) { 823 perf_debug_ctx(ctx, "wc readback: prsc=%p, level=%u, usage=%x, box=%dx%d+%d,%d", 824 prsc, level, usage, box->width, box->height, box->x, box->y); 825 } 826 827 if (usage & PIPE_MAP_DISCARD_WHOLE_RESOURCE) { 828 invalidate_resource(rsc, usage); 829 } else { 830 unsigned op = translate_usage(usage); 831 bool needs_flush = pending(rsc, !!(usage & PIPE_MAP_WRITE)); 832 833 /* If the GPU is writing to the resource, or if it is reading from the 834 * resource and we're trying to write to it, flush the renders. 835 */ 836 bool busy = needs_flush || resource_busy(rsc, op); 837 838 /* if we need to flush/stall, see if we can make a shadow buffer 839 * to avoid this: 840 * 841 * TODO we could go down this path !reorder && !busy_for_read 842 * ie. we only *don't* want to go down this path if the blit 843 * will trigger a flush! 844 */ 845 if (ctx->screen->reorder && busy && !(usage & PIPE_MAP_READ) && 846 (usage & PIPE_MAP_DISCARD_RANGE)) { 847 848 /* try shadowing only if it avoids a flush, otherwise staging would 849 * be better: 850 */ 851 if (needs_flush && fd_try_shadow_resource(ctx, rsc, level, box, 852 DRM_FORMAT_MOD_LINEAR)) { 853 needs_flush = busy = false; 854 ctx->stats.shadow_uploads++; 855 } else { 856 struct fd_resource *staging_rsc = NULL; 857 858 if (needs_flush) { 859 flush_resource(ctx, rsc, usage); 860 needs_flush = false; 861 } 862 863 /* in this case, we don't need to shadow the whole resource, 864 * since any draw that references the previous contents has 865 * already had rendering flushed for all tiles. So we can 866 * use a staging buffer to do the upload. 867 */ 868 if (is_renderable(prsc)) 869 staging_rsc = fd_alloc_staging(ctx, rsc, level, box); 870 if (staging_rsc) { 871 trans->staging_prsc = &staging_rsc->b.b; 872 trans->b.b.stride = fd_resource_pitch(staging_rsc, 0); 873 trans->b.b.layer_stride = 874 fd_resource_layer_stride(staging_rsc, 0); 875 trans->staging_box = *box; 876 trans->staging_box.x = 0; 877 trans->staging_box.y = 0; 878 trans->staging_box.z = 0; 879 buf = fd_bo_map(staging_rsc->bo); 880 881 ctx->stats.staging_uploads++; 882 883 return buf; 884 } 885 } 886 } 887 888 if (needs_flush) { 889 flush_resource(ctx, rsc, usage); 890 needs_flush = false; 891 } 892 893 /* The GPU keeps track of how the various bo's are being used, and 894 * will wait if necessary for the proper operation to have 895 * completed. 896 */ 897 if (busy) { 898 ret = fd_resource_wait(ctx, rsc, op); 899 if (ret) 900 return NULL; 901 } 902 } 903 904 return resource_transfer_map_unsync(pctx, prsc, level, usage, box, trans); 905} 906 907static unsigned 908improve_transfer_map_usage(struct fd_context *ctx, struct fd_resource *rsc, 909 unsigned usage, const struct pipe_box *box) 910 /* Not *strictly* true, but the access to things that must only be in driver- 911 * thread are protected by !(usage & TC_TRANSFER_MAP_THREADED_UNSYNC): 912 */ 913 in_dt 914{ 915 if (usage & TC_TRANSFER_MAP_NO_INVALIDATE) { 916 usage &= ~PIPE_MAP_DISCARD_WHOLE_RESOURCE; 917 } 918 919 if (usage & TC_TRANSFER_MAP_THREADED_UNSYNC) 920 usage |= PIPE_MAP_UNSYNCHRONIZED; 921 922 if (!(usage & 923 (TC_TRANSFER_MAP_NO_INFER_UNSYNCHRONIZED | PIPE_MAP_UNSYNCHRONIZED))) { 924 if (ctx->in_shadow && !(usage & PIPE_MAP_READ)) { 925 usage |= PIPE_MAP_UNSYNCHRONIZED; 926 } else if ((usage & PIPE_MAP_WRITE) && (rsc->b.b.target == PIPE_BUFFER) && 927 !util_ranges_intersect(&rsc->valid_buffer_range, box->x, 928 box->x + box->width)) { 929 /* We are trying to write to a previously uninitialized range. No need 930 * to synchronize. 931 */ 932 usage |= PIPE_MAP_UNSYNCHRONIZED; 933 } 934 } 935 936 return usage; 937} 938 939static void * 940fd_resource_transfer_map(struct pipe_context *pctx, struct pipe_resource *prsc, 941 unsigned level, unsigned usage, 942 const struct pipe_box *box, 943 struct pipe_transfer **pptrans) 944{ 945 struct fd_context *ctx = fd_context(pctx); 946 struct fd_resource *rsc = fd_resource(prsc); 947 struct fd_transfer *trans; 948 struct pipe_transfer *ptrans; 949 950 DBG("prsc=%p, level=%u, usage=%x, box=%dx%d+%d,%d", prsc, level, usage, 951 box->width, box->height, box->x, box->y); 952 953 if ((usage & PIPE_MAP_DIRECTLY) && rsc->layout.tile_mode) { 954 DBG("CANNOT MAP DIRECTLY!\n"); 955 return NULL; 956 } 957 958 if (usage & TC_TRANSFER_MAP_THREADED_UNSYNC) { 959 ptrans = slab_alloc(&ctx->transfer_pool_unsync); 960 } else { 961 ptrans = slab_alloc(&ctx->transfer_pool); 962 } 963 964 if (!ptrans) 965 return NULL; 966 967 /* slab_alloc_st() doesn't zero: */ 968 trans = fd_transfer(ptrans); 969 memset(trans, 0, sizeof(*trans)); 970 971 usage = improve_transfer_map_usage(ctx, rsc, usage, box); 972 973 pipe_resource_reference(&ptrans->resource, prsc); 974 ptrans->level = level; 975 ptrans->usage = usage; 976 ptrans->box = *box; 977 ptrans->stride = fd_resource_pitch(rsc, level); 978 ptrans->layer_stride = fd_resource_layer_stride(rsc, level); 979 980 void *ret; 981 if (usage & PIPE_MAP_UNSYNCHRONIZED) { 982 ret = resource_transfer_map_unsync(pctx, prsc, level, usage, box, trans); 983 } else { 984 ret = resource_transfer_map(pctx, prsc, level, usage, box, trans); 985 } 986 987 if (ret) { 988 *pptrans = ptrans; 989 } else { 990 fd_resource_transfer_unmap(pctx, ptrans); 991 } 992 993 return ret; 994} 995 996static void 997fd_resource_destroy(struct pipe_screen *pscreen, struct pipe_resource *prsc) 998{ 999 struct fd_screen *screen = fd_screen(prsc->screen); 1000 struct fd_resource *rsc = fd_resource(prsc); 1001 1002 if (!rsc->is_replacement) 1003 fd_bc_invalidate_resource(rsc, true); 1004 if (rsc->bo) 1005 fd_bo_del(rsc->bo); 1006 if (rsc->lrz) 1007 fd_bo_del(rsc->lrz); 1008 if (rsc->scanout) 1009 renderonly_scanout_destroy(rsc->scanout, fd_screen(pscreen)->ro); 1010 1011 if (prsc->target == PIPE_BUFFER) 1012 util_idalloc_mt_free(&screen->buffer_ids, rsc->b.buffer_id_unique); 1013 1014 threaded_resource_deinit(prsc); 1015 1016 util_range_destroy(&rsc->valid_buffer_range); 1017 simple_mtx_destroy(&rsc->lock); 1018 fd_resource_tracking_reference(&rsc->track, NULL); 1019 1020 FREE(rsc); 1021} 1022 1023static uint64_t 1024fd_resource_modifier(struct fd_resource *rsc) 1025{ 1026 if (!rsc->layout.tile_mode) 1027 return DRM_FORMAT_MOD_LINEAR; 1028 1029 if (rsc->layout.ubwc_layer_size) 1030 return DRM_FORMAT_MOD_QCOM_COMPRESSED; 1031 1032 /* TODO invent a modifier for tiled but not UBWC buffers: */ 1033 return DRM_FORMAT_MOD_INVALID; 1034} 1035 1036static bool 1037fd_resource_get_handle(struct pipe_screen *pscreen, struct pipe_context *pctx, 1038 struct pipe_resource *prsc, struct winsys_handle *handle, 1039 unsigned usage) 1040{ 1041 struct fd_resource *rsc = fd_resource(prsc); 1042 1043 rsc->b.is_shared = true; 1044 1045 handle->modifier = fd_resource_modifier(rsc); 1046 1047 DBG("%" PRSC_FMT ", modifier=%" PRIx64, PRSC_ARGS(prsc), handle->modifier); 1048 1049 return fd_screen_bo_get_handle(pscreen, rsc->bo, rsc->scanout, 1050 fd_resource_pitch(rsc, 0), handle); 1051} 1052 1053/* special case to resize query buf after allocated.. */ 1054void 1055fd_resource_resize(struct pipe_resource *prsc, uint32_t sz) 1056{ 1057 struct fd_resource *rsc = fd_resource(prsc); 1058 1059 debug_assert(prsc->width0 == 0); 1060 debug_assert(prsc->target == PIPE_BUFFER); 1061 debug_assert(prsc->bind == PIPE_BIND_QUERY_BUFFER); 1062 1063 prsc->width0 = sz; 1064 realloc_bo(rsc, fd_screen(prsc->screen)->setup_slices(rsc)); 1065} 1066 1067static void 1068fd_resource_layout_init(struct pipe_resource *prsc) 1069{ 1070 struct fd_resource *rsc = fd_resource(prsc); 1071 struct fdl_layout *layout = &rsc->layout; 1072 1073 layout->format = prsc->format; 1074 1075 layout->width0 = prsc->width0; 1076 layout->height0 = prsc->height0; 1077 layout->depth0 = prsc->depth0; 1078 1079 layout->cpp = util_format_get_blocksize(prsc->format); 1080 layout->cpp *= fd_resource_nr_samples(prsc); 1081 layout->cpp_shift = ffs(layout->cpp) - 1; 1082} 1083 1084static struct fd_resource * 1085alloc_resource_struct(struct pipe_screen *pscreen, 1086 const struct pipe_resource *tmpl) 1087{ 1088 struct fd_screen *screen = fd_screen(pscreen); 1089 struct fd_resource *rsc = CALLOC_STRUCT(fd_resource); 1090 1091 if (!rsc) 1092 return NULL; 1093 1094 struct pipe_resource *prsc = &rsc->b.b; 1095 *prsc = *tmpl; 1096 1097 pipe_reference_init(&prsc->reference, 1); 1098 prsc->screen = pscreen; 1099 rsc->hash = _mesa_hash_pointer(rsc); 1100 1101 util_range_init(&rsc->valid_buffer_range); 1102 simple_mtx_init(&rsc->lock, mtx_plain); 1103 1104 rsc->track = CALLOC_STRUCT(fd_resource_tracking); 1105 if (!rsc->track) { 1106 free(rsc); 1107 return NULL; 1108 } 1109 1110 pipe_reference_init(&rsc->track->reference, 1); 1111 1112 threaded_resource_init(prsc); 1113 1114 if (tmpl->target == PIPE_BUFFER) 1115 rsc->b.buffer_id_unique = util_idalloc_mt_alloc(&screen->buffer_ids); 1116 1117 return rsc; 1118} 1119 1120enum fd_layout_type { 1121 ERROR, 1122 LINEAR, 1123 TILED, 1124 UBWC, 1125}; 1126 1127static enum fd_layout_type 1128get_best_layout(struct fd_screen *screen, struct pipe_resource *prsc, 1129 const struct pipe_resource *tmpl, const uint64_t *modifiers, 1130 int count) 1131{ 1132 bool implicit_modifiers = 1133 (count == 0 || 1134 drm_find_modifier(DRM_FORMAT_MOD_INVALID, modifiers, count)); 1135 1136 /* First, find all the conditions which would force us to linear */ 1137 if (!screen->tile_mode) 1138 return LINEAR; 1139 1140 if (!screen->tile_mode(prsc)) 1141 return LINEAR; 1142 1143 if (tmpl->target == PIPE_BUFFER) 1144 return LINEAR; 1145 1146 if (tmpl->bind & PIPE_BIND_LINEAR) { 1147 if (tmpl->usage != PIPE_USAGE_STAGING) 1148 perf_debug("%" PRSC_FMT ": forcing linear: bind flags", 1149 PRSC_ARGS(prsc)); 1150 return LINEAR; 1151 } 1152 1153 if (FD_DBG(NOTILE)) 1154 return LINEAR; 1155 1156 /* Shared resources with implicit modifiers must always be linear */ 1157 if (implicit_modifiers && (tmpl->bind & PIPE_BIND_SHARED)) { 1158 perf_debug("%" PRSC_FMT 1159 ": forcing linear: shared resource + implicit modifiers", 1160 PRSC_ARGS(prsc)); 1161 return LINEAR; 1162 } 1163 1164 bool ubwc_ok = is_a6xx(screen); 1165 if (FD_DBG(NOUBWC)) 1166 ubwc_ok = false; 1167 1168 if (ubwc_ok && !implicit_modifiers && 1169 !drm_find_modifier(DRM_FORMAT_MOD_QCOM_COMPRESSED, modifiers, count)) { 1170 perf_debug("%" PRSC_FMT 1171 ": not using UBWC: not in acceptable modifier set", 1172 PRSC_ARGS(prsc)); 1173 ubwc_ok = false; 1174 } 1175 1176 if (ubwc_ok) 1177 return UBWC; 1178 1179 /* We can't use tiled with explicit modifiers, as there is no modifier token 1180 * defined for it. But we might internally force tiled allocation using a 1181 * private modifier token. 1182 * 1183 * TODO we should probably also limit TILED in a similar way to UBWC above, 1184 * once we have a public modifier token defined. 1185 */ 1186 if (implicit_modifiers || 1187 drm_find_modifier(FD_FORMAT_MOD_QCOM_TILED, modifiers, count)) 1188 return TILED; 1189 1190 if (!drm_find_modifier(DRM_FORMAT_MOD_LINEAR, modifiers, count)) { 1191 perf_debug("%" PRSC_FMT ": need linear but not in modifier set", 1192 PRSC_ARGS(prsc)); 1193 return ERROR; 1194 } 1195 1196 perf_debug("%" PRSC_FMT ": not using tiling: explicit modifiers and no UBWC", 1197 PRSC_ARGS(prsc)); 1198 return LINEAR; 1199} 1200 1201/** 1202 * Helper that allocates a resource and resolves its layout (but doesn't 1203 * allocate its bo). 1204 * 1205 * It returns a pipe_resource (as fd_resource_create_with_modifiers() 1206 * would do), and also bo's minimum required size as an output argument. 1207 */ 1208static struct pipe_resource * 1209fd_resource_allocate_and_resolve(struct pipe_screen *pscreen, 1210 const struct pipe_resource *tmpl, 1211 const uint64_t *modifiers, int count, 1212 uint32_t *psize) 1213{ 1214 struct fd_screen *screen = fd_screen(pscreen); 1215 struct fd_resource *rsc; 1216 struct pipe_resource *prsc; 1217 enum pipe_format format = tmpl->format; 1218 uint32_t size; 1219 1220 rsc = alloc_resource_struct(pscreen, tmpl); 1221 if (!rsc) 1222 return NULL; 1223 1224 prsc = &rsc->b.b; 1225 1226 /* Clover creates buffers with PIPE_FORMAT_NONE: */ 1227 if ((prsc->target == PIPE_BUFFER) && (format == PIPE_FORMAT_NONE)) 1228 format = prsc->format = PIPE_FORMAT_R8_UNORM; 1229 1230 DBG("%" PRSC_FMT, PRSC_ARGS(prsc)); 1231 1232 if (tmpl->bind & PIPE_BIND_SHARED) 1233 rsc->b.is_shared = true; 1234 1235 fd_resource_layout_init(prsc); 1236 1237 enum fd_layout_type layout = 1238 get_best_layout(screen, prsc, tmpl, modifiers, count); 1239 if (layout == ERROR) { 1240 free(prsc); 1241 return NULL; 1242 } 1243 1244 if (layout >= TILED) 1245 rsc->layout.tile_mode = screen->tile_mode(prsc); 1246 if (layout == UBWC) 1247 rsc->layout.ubwc = true; 1248 1249 rsc->internal_format = format; 1250 1251 if (prsc->target == PIPE_BUFFER) { 1252 assert(prsc->format == PIPE_FORMAT_R8_UNORM); 1253 size = prsc->width0; 1254 fdl_layout_buffer(&rsc->layout, size); 1255 } else { 1256 size = screen->setup_slices(rsc); 1257 } 1258 1259 /* special case for hw-query buffer, which we need to allocate before we 1260 * know the size: 1261 */ 1262 if (size == 0) { 1263 /* note, semi-intention == instead of & */ 1264 debug_assert(prsc->bind == PIPE_BIND_QUERY_BUFFER); 1265 *psize = 0; 1266 return prsc; 1267 } 1268 1269 /* Set the layer size if the (non-a6xx) backend hasn't done so. */ 1270 if (rsc->layout.layer_first && !rsc->layout.layer_size) { 1271 rsc->layout.layer_size = align(size, 4096); 1272 size = rsc->layout.layer_size * prsc->array_size; 1273 } 1274 1275 if (FD_DBG(LAYOUT)) 1276 fdl_dump_layout(&rsc->layout); 1277 1278 /* Hand out the resolved size. */ 1279 if (psize) 1280 *psize = size; 1281 1282 return prsc; 1283} 1284 1285/** 1286 * Create a new texture object, using the given template info. 1287 */ 1288static struct pipe_resource * 1289fd_resource_create_with_modifiers(struct pipe_screen *pscreen, 1290 const struct pipe_resource *tmpl, 1291 const uint64_t *modifiers, int count) 1292{ 1293 struct fd_screen *screen = fd_screen(pscreen); 1294 struct fd_resource *rsc; 1295 struct pipe_resource *prsc; 1296 uint32_t size; 1297 1298 /* when using kmsro, scanout buffers are allocated on the display device 1299 * create_with_modifiers() doesn't give us usage flags, so we have to 1300 * assume that all calls with modifiers are scanout-possible 1301 */ 1302 if (screen->ro && 1303 ((tmpl->bind & PIPE_BIND_SCANOUT) || 1304 !(count == 1 && modifiers[0] == DRM_FORMAT_MOD_INVALID))) { 1305 struct pipe_resource scanout_templat = *tmpl; 1306 struct renderonly_scanout *scanout; 1307 struct winsys_handle handle; 1308 1309 /* note: alignment is wrong for a6xx */ 1310 scanout_templat.width0 = align(tmpl->width0, screen->info->gmem_align_w); 1311 1312 scanout = 1313 renderonly_scanout_for_resource(&scanout_templat, screen->ro, &handle); 1314 if (!scanout) 1315 return NULL; 1316 1317 renderonly_scanout_destroy(scanout, screen->ro); 1318 1319 assert(handle.type == WINSYS_HANDLE_TYPE_FD); 1320 rsc = fd_resource(pscreen->resource_from_handle( 1321 pscreen, tmpl, &handle, PIPE_HANDLE_USAGE_FRAMEBUFFER_WRITE)); 1322 close(handle.handle); 1323 if (!rsc) 1324 return NULL; 1325 1326 return &rsc->b.b; 1327 } 1328 1329 prsc = 1330 fd_resource_allocate_and_resolve(pscreen, tmpl, modifiers, count, &size); 1331 if (!prsc) 1332 return NULL; 1333 rsc = fd_resource(prsc); 1334 1335 realloc_bo(rsc, size); 1336 if (!rsc->bo) 1337 goto fail; 1338 1339 return prsc; 1340fail: 1341 fd_resource_destroy(pscreen, prsc); 1342 return NULL; 1343} 1344 1345static struct pipe_resource * 1346fd_resource_create(struct pipe_screen *pscreen, 1347 const struct pipe_resource *tmpl) 1348{ 1349 const uint64_t mod = DRM_FORMAT_MOD_INVALID; 1350 return fd_resource_create_with_modifiers(pscreen, tmpl, &mod, 1); 1351} 1352 1353/** 1354 * Create a texture from a winsys_handle. The handle is often created in 1355 * another process by first creating a pipe texture and then calling 1356 * resource_get_handle. 1357 */ 1358static struct pipe_resource * 1359fd_resource_from_handle(struct pipe_screen *pscreen, 1360 const struct pipe_resource *tmpl, 1361 struct winsys_handle *handle, unsigned usage) 1362{ 1363 struct fd_screen *screen = fd_screen(pscreen); 1364 struct fd_resource *rsc = alloc_resource_struct(pscreen, tmpl); 1365 1366 if (!rsc) 1367 return NULL; 1368 1369 struct fdl_slice *slice = fd_resource_slice(rsc, 0); 1370 struct pipe_resource *prsc = &rsc->b.b; 1371 1372 DBG("%" PRSC_FMT ", modifier=%" PRIx64, PRSC_ARGS(prsc), handle->modifier); 1373 1374 rsc->b.is_shared = true; 1375 1376 fd_resource_layout_init(prsc); 1377 1378 struct fd_bo *bo = fd_screen_bo_from_handle(pscreen, handle); 1379 if (!bo) 1380 goto fail; 1381 1382 fd_resource_set_bo(rsc, bo); 1383 1384 rsc->internal_format = tmpl->format; 1385 rsc->layout.pitch0 = handle->stride; 1386 slice->offset = handle->offset; 1387 slice->size0 = handle->stride * prsc->height0; 1388 1389 /* use a pitchalign of gmem_align_w pixels, because GMEM resolve for 1390 * lower alignments is not implemented (but possible for a6xx at least) 1391 * 1392 * for UBWC-enabled resources, layout_resource_for_modifier will further 1393 * validate the pitch and set the right pitchalign 1394 */ 1395 rsc->layout.pitchalign = 1396 fdl_cpp_shift(&rsc->layout) + util_logbase2(screen->info->gmem_align_w); 1397 1398 /* apply the minimum pitchalign (note: actually 4 for a3xx but doesn't 1399 * matter) */ 1400 if (is_a6xx(screen) || is_a5xx(screen)) 1401 rsc->layout.pitchalign = MAX2(rsc->layout.pitchalign, 6); 1402 else 1403 rsc->layout.pitchalign = MAX2(rsc->layout.pitchalign, 5); 1404 1405 if (rsc->layout.pitch0 < (prsc->width0 * rsc->layout.cpp) || 1406 fd_resource_pitch(rsc, 0) != rsc->layout.pitch0) 1407 goto fail; 1408 1409 assert(rsc->layout.cpp); 1410 1411 if (screen->layout_resource_for_modifier(rsc, handle->modifier) < 0) 1412 goto fail; 1413 1414 if (screen->ro) { 1415 rsc->scanout = 1416 renderonly_create_gpu_import_for_resource(prsc, screen->ro, NULL); 1417 /* failure is expected in some cases.. */ 1418 } 1419 1420 rsc->valid = true; 1421 1422 return prsc; 1423 1424fail: 1425 fd_resource_destroy(pscreen, prsc); 1426 return NULL; 1427} 1428 1429bool 1430fd_render_condition_check(struct pipe_context *pctx) 1431{ 1432 struct fd_context *ctx = fd_context(pctx); 1433 1434 if (!ctx->cond_query) 1435 return true; 1436 1437 perf_debug("Implementing conditional rendering using a CPU read instaed of HW conditional rendering."); 1438 1439 union pipe_query_result res = {0}; 1440 bool wait = ctx->cond_mode != PIPE_RENDER_COND_NO_WAIT && 1441 ctx->cond_mode != PIPE_RENDER_COND_BY_REGION_NO_WAIT; 1442 1443 if (pctx->get_query_result(pctx, ctx->cond_query, wait, &res)) 1444 return (bool)res.u64 != ctx->cond_cond; 1445 1446 return true; 1447} 1448 1449static void 1450fd_invalidate_resource(struct pipe_context *pctx, 1451 struct pipe_resource *prsc) in_dt 1452{ 1453 struct fd_context *ctx = fd_context(pctx); 1454 struct fd_resource *rsc = fd_resource(prsc); 1455 1456 if (prsc->target == PIPE_BUFFER) { 1457 /* Handle the glInvalidateBufferData() case: 1458 */ 1459 invalidate_resource(rsc, PIPE_MAP_READ | PIPE_MAP_WRITE); 1460 } else if (rsc->track->write_batch) { 1461 /* Handle the glInvalidateFramebuffer() case, telling us that 1462 * we can skip resolve. 1463 */ 1464 1465 struct fd_batch *batch = rsc->track->write_batch; 1466 struct pipe_framebuffer_state *pfb = &batch->framebuffer; 1467 1468 if (pfb->zsbuf && pfb->zsbuf->texture == prsc) { 1469 batch->resolve &= ~(FD_BUFFER_DEPTH | FD_BUFFER_STENCIL); 1470 fd_context_dirty(ctx, FD_DIRTY_ZSA); 1471 } 1472 1473 for (unsigned i = 0; i < pfb->nr_cbufs; i++) { 1474 if (pfb->cbufs[i] && pfb->cbufs[i]->texture == prsc) { 1475 batch->resolve &= ~(PIPE_CLEAR_COLOR0 << i); 1476 fd_context_dirty(ctx, FD_DIRTY_FRAMEBUFFER); 1477 } 1478 } 1479 } 1480 1481 rsc->valid = false; 1482} 1483 1484static enum pipe_format 1485fd_resource_get_internal_format(struct pipe_resource *prsc) 1486{ 1487 return fd_resource(prsc)->internal_format; 1488} 1489 1490static void 1491fd_resource_set_stencil(struct pipe_resource *prsc, 1492 struct pipe_resource *stencil) 1493{ 1494 fd_resource(prsc)->stencil = fd_resource(stencil); 1495} 1496 1497static struct pipe_resource * 1498fd_resource_get_stencil(struct pipe_resource *prsc) 1499{ 1500 struct fd_resource *rsc = fd_resource(prsc); 1501 if (rsc->stencil) 1502 return &rsc->stencil->b.b; 1503 return NULL; 1504} 1505 1506static const struct u_transfer_vtbl transfer_vtbl = { 1507 .resource_create = fd_resource_create, 1508 .resource_destroy = fd_resource_destroy, 1509 .transfer_map = fd_resource_transfer_map, 1510 .transfer_flush_region = fd_resource_transfer_flush_region, 1511 .transfer_unmap = fd_resource_transfer_unmap, 1512 .get_internal_format = fd_resource_get_internal_format, 1513 .set_stencil = fd_resource_set_stencil, 1514 .get_stencil = fd_resource_get_stencil, 1515}; 1516 1517static const uint64_t supported_modifiers[] = { 1518 DRM_FORMAT_MOD_LINEAR, 1519}; 1520 1521static int 1522fd_layout_resource_for_modifier(struct fd_resource *rsc, uint64_t modifier) 1523{ 1524 switch (modifier) { 1525 case DRM_FORMAT_MOD_LINEAR: 1526 /* The dri gallium frontend will pass DRM_FORMAT_MOD_INVALID to us 1527 * when it's called through any of the non-modifier BO create entry 1528 * points. Other drivers will determine tiling from the kernel or 1529 * other legacy backchannels, but for freedreno it just means 1530 * LINEAR. */ 1531 case DRM_FORMAT_MOD_INVALID: 1532 return 0; 1533 default: 1534 return -1; 1535 } 1536} 1537 1538static struct pipe_resource * 1539fd_resource_from_memobj(struct pipe_screen *pscreen, 1540 const struct pipe_resource *tmpl, 1541 struct pipe_memory_object *pmemobj, uint64_t offset) 1542{ 1543 struct fd_screen *screen = fd_screen(pscreen); 1544 struct fd_memory_object *memobj = fd_memory_object(pmemobj); 1545 struct pipe_resource *prsc; 1546 struct fd_resource *rsc; 1547 uint32_t size; 1548 assert(memobj->bo); 1549 1550 /* We shouldn't get a scanout buffer here. */ 1551 assert(!(tmpl->bind & PIPE_BIND_SCANOUT)); 1552 1553 uint64_t modifiers = DRM_FORMAT_MOD_INVALID; 1554 if (tmpl->bind & PIPE_BIND_LINEAR) { 1555 modifiers = DRM_FORMAT_MOD_LINEAR; 1556 } else if (is_a6xx(screen) && tmpl->width0 >= FDL_MIN_UBWC_WIDTH) { 1557 modifiers = DRM_FORMAT_MOD_QCOM_COMPRESSED; 1558 } 1559 1560 /* Allocate new pipe resource. */ 1561 prsc = fd_resource_allocate_and_resolve(pscreen, tmpl, &modifiers, 1, &size); 1562 if (!prsc) 1563 return NULL; 1564 rsc = fd_resource(prsc); 1565 rsc->b.is_shared = true; 1566 1567 /* bo's size has to be large enough, otherwise cleanup resource and fail 1568 * gracefully. 1569 */ 1570 if (fd_bo_size(memobj->bo) < size) { 1571 fd_resource_destroy(pscreen, prsc); 1572 return NULL; 1573 } 1574 1575 /* Share the bo with the memory object. */ 1576 fd_resource_set_bo(rsc, fd_bo_ref(memobj->bo)); 1577 1578 return prsc; 1579} 1580 1581static struct pipe_memory_object * 1582fd_memobj_create_from_handle(struct pipe_screen *pscreen, 1583 struct winsys_handle *whandle, bool dedicated) 1584{ 1585 struct fd_memory_object *memobj = CALLOC_STRUCT(fd_memory_object); 1586 if (!memobj) 1587 return NULL; 1588 1589 struct fd_bo *bo = fd_screen_bo_from_handle(pscreen, whandle); 1590 if (!bo) { 1591 free(memobj); 1592 return NULL; 1593 } 1594 1595 memobj->b.dedicated = dedicated; 1596 memobj->bo = bo; 1597 1598 return &memobj->b; 1599} 1600 1601static void 1602fd_memobj_destroy(struct pipe_screen *pscreen, 1603 struct pipe_memory_object *pmemobj) 1604{ 1605 struct fd_memory_object *memobj = fd_memory_object(pmemobj); 1606 1607 assert(memobj->bo); 1608 fd_bo_del(memobj->bo); 1609 1610 free(pmemobj); 1611} 1612 1613void 1614fd_resource_screen_init(struct pipe_screen *pscreen) 1615{ 1616 struct fd_screen *screen = fd_screen(pscreen); 1617 bool fake_rgtc = screen->gen < 4; 1618 1619 pscreen->resource_create = u_transfer_helper_resource_create; 1620 /* NOTE: u_transfer_helper does not yet support the _with_modifiers() 1621 * variant: 1622 */ 1623 pscreen->resource_create_with_modifiers = fd_resource_create_with_modifiers; 1624 pscreen->resource_from_handle = fd_resource_from_handle; 1625 pscreen->resource_get_handle = fd_resource_get_handle; 1626 pscreen->resource_destroy = u_transfer_helper_resource_destroy; 1627 1628 pscreen->transfer_helper = 1629 u_transfer_helper_create(&transfer_vtbl, true, false, fake_rgtc, true); 1630 1631 if (!screen->layout_resource_for_modifier) 1632 screen->layout_resource_for_modifier = fd_layout_resource_for_modifier; 1633 if (!screen->supported_modifiers) { 1634 screen->supported_modifiers = supported_modifiers; 1635 screen->num_supported_modifiers = ARRAY_SIZE(supported_modifiers); 1636 } 1637 1638 /* GL_EXT_memory_object */ 1639 pscreen->memobj_create_from_handle = fd_memobj_create_from_handle; 1640 pscreen->memobj_destroy = fd_memobj_destroy; 1641 pscreen->resource_from_memobj = fd_resource_from_memobj; 1642} 1643 1644static void 1645fd_get_sample_position(struct pipe_context *context, unsigned sample_count, 1646 unsigned sample_index, float *pos_out) 1647{ 1648 /* The following is copied from nouveau/nv50 except for position 1649 * values, which are taken from blob driver */ 1650 static const uint8_t pos1[1][2] = {{0x8, 0x8}}; 1651 static const uint8_t pos2[2][2] = {{0xc, 0xc}, {0x4, 0x4}}; 1652 static const uint8_t pos4[4][2] = {{0x6, 0x2}, 1653 {0xe, 0x6}, 1654 {0x2, 0xa}, 1655 {0xa, 0xe}}; 1656 /* TODO needs to be verified on supported hw */ 1657 static const uint8_t pos8[8][2] = {{0x9, 0x5}, {0x7, 0xb}, {0xd, 0x9}, 1658 {0x5, 0x3}, {0x3, 0xd}, {0x1, 0x7}, 1659 {0xb, 0xf}, {0xf, 0x1}}; 1660 1661 const uint8_t(*ptr)[2]; 1662 1663 switch (sample_count) { 1664 case 1: 1665 ptr = pos1; 1666 break; 1667 case 2: 1668 ptr = pos2; 1669 break; 1670 case 4: 1671 ptr = pos4; 1672 break; 1673 case 8: 1674 ptr = pos8; 1675 break; 1676 default: 1677 assert(0); 1678 return; 1679 } 1680 1681 pos_out[0] = ptr[sample_index][0] / 16.0f; 1682 pos_out[1] = ptr[sample_index][1] / 16.0f; 1683} 1684 1685static void 1686fd_blit_pipe(struct pipe_context *pctx, 1687 const struct pipe_blit_info *blit_info) in_dt 1688{ 1689 /* wrap fd_blit to return void */ 1690 fd_blit(pctx, blit_info); 1691} 1692 1693void 1694fd_resource_context_init(struct pipe_context *pctx) 1695{ 1696 pctx->buffer_map = u_transfer_helper_transfer_map; 1697 pctx->texture_map = u_transfer_helper_transfer_map; 1698 pctx->transfer_flush_region = u_transfer_helper_transfer_flush_region; 1699 pctx->buffer_unmap = u_transfer_helper_transfer_unmap; 1700 pctx->texture_unmap = u_transfer_helper_transfer_unmap; 1701 pctx->buffer_subdata = u_default_buffer_subdata; 1702 pctx->texture_subdata = u_default_texture_subdata; 1703 pctx->create_surface = fd_create_surface; 1704 pctx->surface_destroy = fd_surface_destroy; 1705 pctx->resource_copy_region = fd_resource_copy_region; 1706 pctx->blit = fd_blit_pipe; 1707 pctx->flush_resource = fd_flush_resource; 1708 pctx->invalidate_resource = fd_invalidate_resource; 1709 pctx->get_sample_position = fd_get_sample_position; 1710} 1711