1/* 2 * Copyright 2010 Jerome Glisse <glisse@freedesktop.org> 3 * 4 * Permission is hereby granted, free of charge, to any person obtaining a 5 * copy of this software and associated documentation files (the "Software"), 6 * to deal in the Software without restriction, including without limitation 7 * on the rights to use, copy, modify, merge, publish, distribute, sub 8 * license, and/or sell copies of the Software, and to permit persons to whom 9 * the Software is furnished to do so, subject to the following conditions: 10 * 11 * The above copyright notice and this permission notice (including the next 12 * paragraph) shall be included in all copies or substantial portions of the 13 * Software. 14 * 15 * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR 16 * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, 17 * FITNESS FOR A PARTICULAR PURPOSE AND NON-INFRINGEMENT. IN NO EVENT SHALL 18 * THE AUTHOR(S) AND/OR THEIR SUPPLIERS BE LIABLE FOR ANY CLAIM, 19 * DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR 20 * OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE 21 * USE OR OTHER DEALINGS IN THE SOFTWARE. 22 * 23 * Authors: 24 * Jerome Glisse 25 * Corbin Simpson 26 */ 27#include "r600_pipe_common.h" 28#include "r600_cs.h" 29#include "r600_query.h" 30#include "util/format/u_format.h" 31#include "util/u_log.h" 32#include "util/u_memory.h" 33#include "util/u_pack_color.h" 34#include "util/u_surface.h" 35#include "util/os_time.h" 36#include "frontend/winsys_handle.h" 37#include <errno.h> 38#include <inttypes.h> 39 40static void r600_texture_discard_cmask(struct r600_common_screen *rscreen, 41 struct r600_texture *rtex); 42static enum radeon_surf_mode 43r600_choose_tiling(struct r600_common_screen *rscreen, 44 const struct pipe_resource *templ); 45 46 47bool r600_prepare_for_dma_blit(struct r600_common_context *rctx, 48 struct r600_texture *rdst, 49 unsigned dst_level, unsigned dstx, 50 unsigned dsty, unsigned dstz, 51 struct r600_texture *rsrc, 52 unsigned src_level, 53 const struct pipe_box *src_box) 54{ 55 if (!rctx->dma.cs.priv) 56 return false; 57 58 if (rdst->surface.bpe != rsrc->surface.bpe) 59 return false; 60 61 /* MSAA: Blits don't exist in the real world. */ 62 if (rsrc->resource.b.b.nr_samples > 1 || 63 rdst->resource.b.b.nr_samples > 1) 64 return false; 65 66 /* Depth-stencil surfaces: 67 * When dst is linear, the DB->CB copy preserves HTILE. 68 * When dst is tiled, the 3D path must be used to update HTILE. 69 */ 70 if (rsrc->is_depth || rdst->is_depth) 71 return false; 72 73 /* CMASK as: 74 * src: Both texture and SDMA paths need decompression. Use SDMA. 75 * dst: If overwriting the whole texture, discard CMASK and use 76 * SDMA. Otherwise, use the 3D path. 77 */ 78 if (rdst->cmask.size && rdst->dirty_level_mask & (1 << dst_level)) { 79 /* The CMASK clear is only enabled for the first level. */ 80 assert(dst_level == 0); 81 if (!util_texrange_covers_whole_level(&rdst->resource.b.b, dst_level, 82 dstx, dsty, dstz, src_box->width, 83 src_box->height, src_box->depth)) 84 return false; 85 86 r600_texture_discard_cmask(rctx->screen, rdst); 87 } 88 89 /* All requirements are met. Prepare textures for SDMA. */ 90 if (rsrc->cmask.size && rsrc->dirty_level_mask & (1 << src_level)) 91 rctx->b.flush_resource(&rctx->b, &rsrc->resource.b.b); 92 93 assert(!(rsrc->dirty_level_mask & (1 << src_level))); 94 assert(!(rdst->dirty_level_mask & (1 << dst_level))); 95 96 return true; 97} 98 99/* Same as resource_copy_region, except that both upsampling and downsampling are allowed. */ 100static void r600_copy_region_with_blit(struct pipe_context *pipe, 101 struct pipe_resource *dst, 102 unsigned dst_level, 103 unsigned dstx, unsigned dsty, unsigned dstz, 104 struct pipe_resource *src, 105 unsigned src_level, 106 const struct pipe_box *src_box) 107{ 108 struct pipe_blit_info blit; 109 110 memset(&blit, 0, sizeof(blit)); 111 blit.src.resource = src; 112 blit.src.format = src->format; 113 blit.src.level = src_level; 114 blit.src.box = *src_box; 115 blit.dst.resource = dst; 116 blit.dst.format = dst->format; 117 blit.dst.level = dst_level; 118 blit.dst.box.x = dstx; 119 blit.dst.box.y = dsty; 120 blit.dst.box.z = dstz; 121 blit.dst.box.width = src_box->width; 122 blit.dst.box.height = src_box->height; 123 blit.dst.box.depth = src_box->depth; 124 blit.mask = util_format_get_mask(src->format) & 125 util_format_get_mask(dst->format); 126 blit.filter = PIPE_TEX_FILTER_NEAREST; 127 128 if (blit.mask) { 129 pipe->blit(pipe, &blit); 130 } 131} 132 133/* Copy from a full GPU texture to a transfer's staging one. */ 134static void r600_copy_to_staging_texture(struct pipe_context *ctx, struct r600_transfer *rtransfer) 135{ 136 struct r600_common_context *rctx = (struct r600_common_context*)ctx; 137 struct pipe_transfer *transfer = (struct pipe_transfer*)rtransfer; 138 struct pipe_resource *dst = &rtransfer->staging->b.b; 139 struct pipe_resource *src = transfer->resource; 140 141 if (src->nr_samples > 1) { 142 r600_copy_region_with_blit(ctx, dst, 0, 0, 0, 0, 143 src, transfer->level, &transfer->box); 144 return; 145 } 146 147 rctx->dma_copy(ctx, dst, 0, 0, 0, 0, src, transfer->level, 148 &transfer->box); 149} 150 151/* Copy from a transfer's staging texture to a full GPU one. */ 152static void r600_copy_from_staging_texture(struct pipe_context *ctx, struct r600_transfer *rtransfer) 153{ 154 struct r600_common_context *rctx = (struct r600_common_context*)ctx; 155 struct pipe_transfer *transfer = (struct pipe_transfer*)rtransfer; 156 struct pipe_resource *dst = transfer->resource; 157 struct pipe_resource *src = &rtransfer->staging->b.b; 158 struct pipe_box sbox; 159 160 u_box_3d(0, 0, 0, transfer->box.width, transfer->box.height, transfer->box.depth, &sbox); 161 162 if (dst->nr_samples > 1) { 163 r600_copy_region_with_blit(ctx, dst, transfer->level, 164 transfer->box.x, transfer->box.y, transfer->box.z, 165 src, 0, &sbox); 166 return; 167 } 168 169 rctx->dma_copy(ctx, dst, transfer->level, 170 transfer->box.x, transfer->box.y, transfer->box.z, 171 src, 0, &sbox); 172} 173 174static unsigned r600_texture_get_offset(struct r600_common_screen *rscreen, 175 struct r600_texture *rtex, unsigned level, 176 const struct pipe_box *box, 177 unsigned *stride, 178 unsigned *layer_stride) 179{ 180 *stride = rtex->surface.u.legacy.level[level].nblk_x * 181 rtex->surface.bpe; 182 assert((uint64_t)rtex->surface.u.legacy.level[level].slice_size_dw * 4 <= UINT_MAX); 183 *layer_stride = (uint64_t)rtex->surface.u.legacy.level[level].slice_size_dw * 4; 184 185 if (!box) 186 return (uint64_t)rtex->surface.u.legacy.level[level].offset_256B * 256; 187 188 /* Each texture is an array of mipmap levels. Each level is 189 * an array of slices. */ 190 return (uint64_t)rtex->surface.u.legacy.level[level].offset_256B * 256 + 191 box->z * (uint64_t)rtex->surface.u.legacy.level[level].slice_size_dw * 4 + 192 (box->y / rtex->surface.blk_h * 193 rtex->surface.u.legacy.level[level].nblk_x + 194 box->x / rtex->surface.blk_w) * rtex->surface.bpe; 195} 196 197static int r600_init_surface(struct r600_common_screen *rscreen, 198 struct radeon_surf *surface, 199 const struct pipe_resource *ptex, 200 enum radeon_surf_mode array_mode, 201 unsigned pitch_in_bytes_override, 202 unsigned offset, 203 bool is_imported, 204 bool is_scanout, 205 bool is_flushed_depth) 206{ 207 const struct util_format_description *desc = 208 util_format_description(ptex->format); 209 bool is_depth, is_stencil; 210 int r; 211 unsigned i, bpe, flags = 0; 212 213 is_depth = util_format_has_depth(desc); 214 is_stencil = util_format_has_stencil(desc); 215 216 if (rscreen->chip_class >= EVERGREEN && !is_flushed_depth && 217 ptex->format == PIPE_FORMAT_Z32_FLOAT_S8X24_UINT) { 218 bpe = 4; /* stencil is allocated separately on evergreen */ 219 } else { 220 bpe = util_format_get_blocksize(ptex->format); 221 assert(util_is_power_of_two_or_zero(bpe)); 222 } 223 224 if (!is_flushed_depth && is_depth) { 225 flags |= RADEON_SURF_ZBUFFER; 226 227 if (is_stencil) 228 flags |= RADEON_SURF_SBUFFER; 229 } 230 231 if (ptex->bind & PIPE_BIND_SCANOUT || is_scanout) { 232 /* This should catch bugs in gallium users setting incorrect flags. */ 233 assert(ptex->nr_samples <= 1 && 234 ptex->array_size == 1 && 235 ptex->depth0 == 1 && 236 ptex->last_level == 0 && 237 !(flags & RADEON_SURF_Z_OR_SBUFFER)); 238 239 flags |= RADEON_SURF_SCANOUT; 240 } 241 242 if (ptex->bind & PIPE_BIND_SHARED) 243 flags |= RADEON_SURF_SHAREABLE; 244 if (is_imported) 245 flags |= RADEON_SURF_IMPORTED | RADEON_SURF_SHAREABLE; 246 247 r = rscreen->ws->surface_init(rscreen->ws, ptex, 248 flags, bpe, array_mode, surface); 249 if (r) { 250 return r; 251 } 252 253 if (pitch_in_bytes_override && 254 pitch_in_bytes_override != surface->u.legacy.level[0].nblk_x * bpe) { 255 /* old ddx on evergreen over estimate alignment for 1d, only 1 level 256 * for those 257 */ 258 surface->u.legacy.level[0].nblk_x = pitch_in_bytes_override / bpe; 259 surface->u.legacy.level[0].slice_size_dw = 260 ((uint64_t)pitch_in_bytes_override * surface->u.legacy.level[0].nblk_y) / 4; 261 } 262 263 if (offset) { 264 for (i = 0; i < ARRAY_SIZE(surface->u.legacy.level); ++i) 265 surface->u.legacy.level[i].offset_256B += offset / 256; 266 } 267 268 return 0; 269} 270 271static void r600_texture_init_metadata(struct r600_common_screen *rscreen, 272 struct r600_texture *rtex, 273 struct radeon_bo_metadata *metadata) 274{ 275 struct radeon_surf *surface = &rtex->surface; 276 277 memset(metadata, 0, sizeof(*metadata)); 278 279 metadata->u.legacy.microtile = surface->u.legacy.level[0].mode >= RADEON_SURF_MODE_1D ? 280 RADEON_LAYOUT_TILED : RADEON_LAYOUT_LINEAR; 281 metadata->u.legacy.macrotile = surface->u.legacy.level[0].mode >= RADEON_SURF_MODE_2D ? 282 RADEON_LAYOUT_TILED : RADEON_LAYOUT_LINEAR; 283 metadata->u.legacy.pipe_config = surface->u.legacy.pipe_config; 284 metadata->u.legacy.bankw = surface->u.legacy.bankw; 285 metadata->u.legacy.bankh = surface->u.legacy.bankh; 286 metadata->u.legacy.tile_split = surface->u.legacy.tile_split; 287 metadata->u.legacy.mtilea = surface->u.legacy.mtilea; 288 metadata->u.legacy.num_banks = surface->u.legacy.num_banks; 289 metadata->u.legacy.stride = surface->u.legacy.level[0].nblk_x * surface->bpe; 290 metadata->u.legacy.scanout = (surface->flags & RADEON_SURF_SCANOUT) != 0; 291} 292 293static void r600_surface_import_metadata(struct r600_common_screen *rscreen, 294 struct radeon_surf *surf, 295 struct radeon_bo_metadata *metadata, 296 enum radeon_surf_mode *array_mode, 297 bool *is_scanout) 298{ 299 surf->u.legacy.pipe_config = metadata->u.legacy.pipe_config; 300 surf->u.legacy.bankw = metadata->u.legacy.bankw; 301 surf->u.legacy.bankh = metadata->u.legacy.bankh; 302 surf->u.legacy.tile_split = metadata->u.legacy.tile_split; 303 surf->u.legacy.mtilea = metadata->u.legacy.mtilea; 304 surf->u.legacy.num_banks = metadata->u.legacy.num_banks; 305 306 if (metadata->u.legacy.macrotile == RADEON_LAYOUT_TILED) 307 *array_mode = RADEON_SURF_MODE_2D; 308 else if (metadata->u.legacy.microtile == RADEON_LAYOUT_TILED) 309 *array_mode = RADEON_SURF_MODE_1D; 310 else 311 *array_mode = RADEON_SURF_MODE_LINEAR_ALIGNED; 312 313 *is_scanout = metadata->u.legacy.scanout; 314} 315 316static void r600_eliminate_fast_color_clear(struct r600_common_context *rctx, 317 struct r600_texture *rtex) 318{ 319 struct r600_common_screen *rscreen = rctx->screen; 320 struct pipe_context *ctx = &rctx->b; 321 322 if (ctx == rscreen->aux_context) 323 mtx_lock(&rscreen->aux_context_lock); 324 325 ctx->flush_resource(ctx, &rtex->resource.b.b); 326 ctx->flush(ctx, NULL, 0); 327 328 if (ctx == rscreen->aux_context) 329 mtx_unlock(&rscreen->aux_context_lock); 330} 331 332static void r600_texture_discard_cmask(struct r600_common_screen *rscreen, 333 struct r600_texture *rtex) 334{ 335 if (!rtex->cmask.size) 336 return; 337 338 assert(rtex->resource.b.b.nr_samples <= 1); 339 340 /* Disable CMASK. */ 341 memset(&rtex->cmask, 0, sizeof(rtex->cmask)); 342 rtex->cmask.base_address_reg = rtex->resource.gpu_address >> 8; 343 rtex->dirty_level_mask = 0; 344 345 rtex->cb_color_info &= ~EG_S_028C70_FAST_CLEAR(1); 346 347 if (rtex->cmask_buffer != &rtex->resource) 348 r600_resource_reference(&rtex->cmask_buffer, NULL); 349 350 /* Notify all contexts about the change. */ 351 p_atomic_inc(&rscreen->dirty_tex_counter); 352 p_atomic_inc(&rscreen->compressed_colortex_counter); 353} 354 355static void r600_reallocate_texture_inplace(struct r600_common_context *rctx, 356 struct r600_texture *rtex, 357 unsigned new_bind_flag, 358 bool invalidate_storage) 359{ 360 struct pipe_screen *screen = rctx->b.screen; 361 struct r600_texture *new_tex; 362 struct pipe_resource templ = rtex->resource.b.b; 363 unsigned i; 364 365 templ.bind |= new_bind_flag; 366 367 /* r600g doesn't react to dirty_tex_descriptor_counter */ 368 if (rctx->chip_class < GFX6) 369 return; 370 371 if (rtex->resource.b.is_shared) 372 return; 373 374 if (new_bind_flag == PIPE_BIND_LINEAR) { 375 if (rtex->surface.is_linear) 376 return; 377 378 /* This fails with MSAA, depth, and compressed textures. */ 379 if (r600_choose_tiling(rctx->screen, &templ) != 380 RADEON_SURF_MODE_LINEAR_ALIGNED) 381 return; 382 } 383 384 new_tex = (struct r600_texture*)screen->resource_create(screen, &templ); 385 if (!new_tex) 386 return; 387 388 /* Copy the pixels to the new texture. */ 389 if (!invalidate_storage) { 390 for (i = 0; i <= templ.last_level; i++) { 391 struct pipe_box box; 392 393 u_box_3d(0, 0, 0, 394 u_minify(templ.width0, i), u_minify(templ.height0, i), 395 util_num_layers(&templ, i), &box); 396 397 rctx->dma_copy(&rctx->b, &new_tex->resource.b.b, i, 0, 0, 0, 398 &rtex->resource.b.b, i, &box); 399 } 400 } 401 402 if (new_bind_flag == PIPE_BIND_LINEAR) { 403 r600_texture_discard_cmask(rctx->screen, rtex); 404 } 405 406 /* Replace the structure fields of rtex. */ 407 rtex->resource.b.b.bind = templ.bind; 408 pb_reference(&rtex->resource.buf, new_tex->resource.buf); 409 rtex->resource.gpu_address = new_tex->resource.gpu_address; 410 rtex->resource.vram_usage = new_tex->resource.vram_usage; 411 rtex->resource.gart_usage = new_tex->resource.gart_usage; 412 rtex->resource.bo_size = new_tex->resource.bo_size; 413 rtex->resource.bo_alignment = new_tex->resource.bo_alignment; 414 rtex->resource.domains = new_tex->resource.domains; 415 rtex->resource.flags = new_tex->resource.flags; 416 rtex->size = new_tex->size; 417 rtex->db_render_format = new_tex->db_render_format; 418 rtex->db_compatible = new_tex->db_compatible; 419 rtex->can_sample_z = new_tex->can_sample_z; 420 rtex->can_sample_s = new_tex->can_sample_s; 421 rtex->surface = new_tex->surface; 422 rtex->fmask = new_tex->fmask; 423 rtex->cmask = new_tex->cmask; 424 rtex->cb_color_info = new_tex->cb_color_info; 425 rtex->last_msaa_resolve_target_micro_mode = new_tex->last_msaa_resolve_target_micro_mode; 426 rtex->htile_offset = new_tex->htile_offset; 427 rtex->depth_cleared = new_tex->depth_cleared; 428 rtex->stencil_cleared = new_tex->stencil_cleared; 429 rtex->non_disp_tiling = new_tex->non_disp_tiling; 430 rtex->framebuffers_bound = new_tex->framebuffers_bound; 431 432 if (new_bind_flag == PIPE_BIND_LINEAR) { 433 assert(!rtex->htile_offset); 434 assert(!rtex->cmask.size); 435 assert(!rtex->fmask.size); 436 assert(!rtex->is_depth); 437 } 438 439 r600_texture_reference(&new_tex, NULL); 440 441 p_atomic_inc(&rctx->screen->dirty_tex_counter); 442} 443 444static void r600_texture_get_info(struct pipe_screen* screen, 445 struct pipe_resource *resource, 446 unsigned *pstride, 447 unsigned *poffset) 448{ 449 struct r600_common_screen *rscreen = (struct r600_common_screen*)screen; 450 struct r600_texture *rtex = (struct r600_texture*)resource; 451 unsigned stride = 0; 452 unsigned offset = 0; 453 454 if (!rscreen || !rtex) 455 return; 456 457 if (resource->target != PIPE_BUFFER) { 458 offset = (uint64_t)rtex->surface.u.legacy.level[0].offset_256B * 256; 459 stride = rtex->surface.u.legacy.level[0].nblk_x * 460 rtex->surface.bpe; 461 } 462 463 if (pstride) 464 *pstride = stride; 465 466 if (poffset) 467 *poffset = offset; 468} 469 470static bool r600_texture_get_handle(struct pipe_screen* screen, 471 struct pipe_context *ctx, 472 struct pipe_resource *resource, 473 struct winsys_handle *whandle, 474 unsigned usage) 475{ 476 struct r600_common_screen *rscreen = (struct r600_common_screen*)screen; 477 struct r600_common_context *rctx; 478 struct r600_resource *res = (struct r600_resource*)resource; 479 struct r600_texture *rtex = (struct r600_texture*)resource; 480 struct radeon_bo_metadata metadata; 481 bool update_metadata = false; 482 unsigned stride, offset, slice_size; 483 484 ctx = threaded_context_unwrap_sync(ctx); 485 rctx = (struct r600_common_context*)(ctx ? ctx : rscreen->aux_context); 486 487 if (resource->target != PIPE_BUFFER) { 488 /* This is not supported now, but it might be required for OpenCL 489 * interop in the future. 490 */ 491 if (resource->nr_samples > 1 || rtex->is_depth) 492 return false; 493 494 /* Move a suballocated texture into a non-suballocated allocation. */ 495 if (rscreen->ws->buffer_is_suballocated(res->buf) || 496 rtex->surface.tile_swizzle) { 497 assert(!res->b.is_shared); 498 r600_reallocate_texture_inplace(rctx, rtex, 499 PIPE_BIND_SHARED, false); 500 rctx->b.flush(&rctx->b, NULL, 0); 501 assert(res->b.b.bind & PIPE_BIND_SHARED); 502 assert(res->flags & RADEON_FLAG_NO_SUBALLOC); 503 assert(rtex->surface.tile_swizzle == 0); 504 } 505 506 if (!(usage & PIPE_HANDLE_USAGE_EXPLICIT_FLUSH) && 507 rtex->cmask.size) { 508 /* Eliminate fast clear (CMASK) */ 509 r600_eliminate_fast_color_clear(rctx, rtex); 510 511 /* Disable CMASK if flush_resource isn't going 512 * to be called. 513 */ 514 if (rtex->cmask.size) 515 r600_texture_discard_cmask(rscreen, rtex); 516 } 517 518 /* Set metadata. */ 519 if (!res->b.is_shared || update_metadata) { 520 r600_texture_init_metadata(rscreen, rtex, &metadata); 521 522 rscreen->ws->buffer_set_metadata(rscreen->ws, res->buf, &metadata, NULL); 523 } 524 525 slice_size = (uint64_t)rtex->surface.u.legacy.level[0].slice_size_dw * 4; 526 } else { 527 /* Move a suballocated buffer into a non-suballocated allocation. */ 528 if (rscreen->ws->buffer_is_suballocated(res->buf)) { 529 assert(!res->b.is_shared); 530 531 /* Allocate a new buffer with PIPE_BIND_SHARED. */ 532 struct pipe_resource templ = res->b.b; 533 templ.bind |= PIPE_BIND_SHARED; 534 535 struct pipe_resource *newb = 536 screen->resource_create(screen, &templ); 537 if (!newb) 538 return false; 539 540 /* Copy the old buffer contents to the new one. */ 541 struct pipe_box box; 542 u_box_1d(0, newb->width0, &box); 543 rctx->b.resource_copy_region(&rctx->b, newb, 0, 0, 0, 0, 544 &res->b.b, 0, &box); 545 /* Move the new buffer storage to the old pipe_resource. */ 546 r600_replace_buffer_storage(&rctx->b, &res->b.b, newb); 547 pipe_resource_reference(&newb, NULL); 548 549 assert(res->b.b.bind & PIPE_BIND_SHARED); 550 assert(res->flags & RADEON_FLAG_NO_SUBALLOC); 551 } 552 553 /* Buffers */ 554 slice_size = 0; 555 } 556 557 r600_texture_get_info(screen, resource, &stride, &offset); 558 559 if (res->b.is_shared) { 560 /* USAGE_EXPLICIT_FLUSH must be cleared if at least one user 561 * doesn't set it. 562 */ 563 res->external_usage |= usage & ~PIPE_HANDLE_USAGE_EXPLICIT_FLUSH; 564 if (!(usage & PIPE_HANDLE_USAGE_EXPLICIT_FLUSH)) 565 res->external_usage &= ~PIPE_HANDLE_USAGE_EXPLICIT_FLUSH; 566 } else { 567 res->b.is_shared = true; 568 res->external_usage = usage; 569 } 570 571 whandle->stride = stride; 572 whandle->offset = offset + slice_size * whandle->layer; 573 574 return rscreen->ws->buffer_get_handle(rscreen->ws, res->buf, whandle); 575} 576 577void r600_texture_destroy(struct pipe_screen *screen, struct pipe_resource *ptex) 578{ 579 struct r600_texture *rtex = (struct r600_texture*)ptex; 580 struct r600_resource *resource = &rtex->resource; 581 582 r600_texture_reference(&rtex->flushed_depth_texture, NULL); 583 pipe_resource_reference((struct pipe_resource**)&resource->immed_buffer, NULL); 584 585 if (rtex->cmask_buffer != &rtex->resource) { 586 r600_resource_reference(&rtex->cmask_buffer, NULL); 587 } 588 pb_reference(&resource->buf, NULL); 589 FREE(rtex); 590} 591 592/* The number of samples can be specified independently of the texture. */ 593void r600_texture_get_fmask_info(struct r600_common_screen *rscreen, 594 struct r600_texture *rtex, 595 unsigned nr_samples, 596 struct r600_fmask_info *out) 597{ 598 /* FMASK is allocated like an ordinary texture. */ 599 struct pipe_resource templ = rtex->resource.b.b; 600 struct radeon_surf fmask = {}; 601 unsigned flags, bpe; 602 603 memset(out, 0, sizeof(*out)); 604 605 templ.nr_samples = 1; 606 flags = rtex->surface.flags | RADEON_SURF_FMASK; 607 608 /* Use the same parameters and tile mode. */ 609 fmask.u.legacy.bankw = rtex->surface.u.legacy.bankw; 610 fmask.u.legacy.bankh = rtex->surface.u.legacy.bankh; 611 fmask.u.legacy.mtilea = rtex->surface.u.legacy.mtilea; 612 fmask.u.legacy.tile_split = rtex->surface.u.legacy.tile_split; 613 614 if (nr_samples <= 4) 615 fmask.u.legacy.bankh = 4; 616 617 switch (nr_samples) { 618 case 2: 619 case 4: 620 bpe = 1; 621 break; 622 case 8: 623 bpe = 4; 624 break; 625 default: 626 R600_ERR("Invalid sample count for FMASK allocation.\n"); 627 return; 628 } 629 630 /* Overallocate FMASK on R600-R700 to fix colorbuffer corruption. 631 * This can be fixed by writing a separate FMASK allocator specifically 632 * for R600-R700 asics. */ 633 if (rscreen->chip_class <= R700) { 634 bpe *= 2; 635 } 636 637 if (rscreen->ws->surface_init(rscreen->ws, &templ, 638 flags, bpe, RADEON_SURF_MODE_2D, &fmask)) { 639 R600_ERR("Got error in surface_init while allocating FMASK.\n"); 640 return; 641 } 642 643 assert(fmask.u.legacy.level[0].mode == RADEON_SURF_MODE_2D); 644 645 out->slice_tile_max = (fmask.u.legacy.level[0].nblk_x * fmask.u.legacy.level[0].nblk_y) / 64; 646 if (out->slice_tile_max) 647 out->slice_tile_max -= 1; 648 649 out->tile_mode_index = fmask.u.legacy.tiling_index[0]; 650 out->pitch_in_pixels = fmask.u.legacy.level[0].nblk_x; 651 out->bank_height = fmask.u.legacy.bankh; 652 out->tile_swizzle = fmask.tile_swizzle; 653 out->alignment = MAX2(256, 1 << fmask.surf_alignment_log2); 654 out->size = fmask.surf_size; 655} 656 657static void r600_texture_allocate_fmask(struct r600_common_screen *rscreen, 658 struct r600_texture *rtex) 659{ 660 r600_texture_get_fmask_info(rscreen, rtex, 661 rtex->resource.b.b.nr_samples, &rtex->fmask); 662 663 rtex->fmask.offset = align64(rtex->size, rtex->fmask.alignment); 664 rtex->size = rtex->fmask.offset + rtex->fmask.size; 665} 666 667void r600_texture_get_cmask_info(struct r600_common_screen *rscreen, 668 struct r600_texture *rtex, 669 struct r600_cmask_info *out) 670{ 671 unsigned cmask_tile_width = 8; 672 unsigned cmask_tile_height = 8; 673 unsigned cmask_tile_elements = cmask_tile_width * cmask_tile_height; 674 unsigned element_bits = 4; 675 unsigned cmask_cache_bits = 1024; 676 unsigned num_pipes = rscreen->info.num_tile_pipes; 677 unsigned pipe_interleave_bytes = rscreen->info.pipe_interleave_bytes; 678 679 unsigned elements_per_macro_tile = (cmask_cache_bits / element_bits) * num_pipes; 680 unsigned pixels_per_macro_tile = elements_per_macro_tile * cmask_tile_elements; 681 unsigned sqrt_pixels_per_macro_tile = sqrt(pixels_per_macro_tile); 682 unsigned macro_tile_width = util_next_power_of_two(sqrt_pixels_per_macro_tile); 683 unsigned macro_tile_height = pixels_per_macro_tile / macro_tile_width; 684 685 unsigned pitch_elements = align(rtex->resource.b.b.width0, macro_tile_width); 686 unsigned height = align(rtex->resource.b.b.height0, macro_tile_height); 687 688 unsigned base_align = num_pipes * pipe_interleave_bytes; 689 unsigned slice_bytes = 690 ((pitch_elements * height * element_bits + 7) / 8) / cmask_tile_elements; 691 692 assert(macro_tile_width % 128 == 0); 693 assert(macro_tile_height % 128 == 0); 694 695 out->slice_tile_max = ((pitch_elements * height) / (128*128)) - 1; 696 out->alignment = MAX2(256, base_align); 697 out->size = util_num_layers(&rtex->resource.b.b, 0) * 698 align(slice_bytes, base_align); 699} 700 701static void r600_texture_allocate_cmask(struct r600_common_screen *rscreen, 702 struct r600_texture *rtex) 703{ 704 r600_texture_get_cmask_info(rscreen, rtex, &rtex->cmask); 705 706 rtex->cmask.offset = align64(rtex->size, rtex->cmask.alignment); 707 rtex->size = rtex->cmask.offset + rtex->cmask.size; 708 709 rtex->cb_color_info |= EG_S_028C70_FAST_CLEAR(1); 710} 711 712static void r600_texture_alloc_cmask_separate(struct r600_common_screen *rscreen, 713 struct r600_texture *rtex) 714{ 715 if (rtex->cmask_buffer) 716 return; 717 718 assert(rtex->cmask.size == 0); 719 720 r600_texture_get_cmask_info(rscreen, rtex, &rtex->cmask); 721 722 rtex->cmask_buffer = (struct r600_resource *) 723 r600_aligned_buffer_create(&rscreen->b, 724 R600_RESOURCE_FLAG_UNMAPPABLE, 725 PIPE_USAGE_DEFAULT, 726 rtex->cmask.size, 727 rtex->cmask.alignment); 728 if (rtex->cmask_buffer == NULL) { 729 rtex->cmask.size = 0; 730 return; 731 } 732 733 /* update colorbuffer state bits */ 734 rtex->cmask.base_address_reg = rtex->cmask_buffer->gpu_address >> 8; 735 736 rtex->cb_color_info |= EG_S_028C70_FAST_CLEAR(1); 737 738 p_atomic_inc(&rscreen->compressed_colortex_counter); 739} 740 741void eg_resource_alloc_immed(struct r600_common_screen *rscreen, 742 struct r600_resource *res, 743 unsigned immed_size) 744{ 745 res->immed_buffer = (struct r600_resource *) 746 pipe_buffer_create(&rscreen->b, PIPE_BIND_CUSTOM, 747 PIPE_USAGE_DEFAULT, immed_size); 748} 749 750static void r600_texture_get_htile_size(struct r600_common_screen *rscreen, 751 struct r600_texture *rtex) 752{ 753 unsigned cl_width, cl_height, width, height; 754 unsigned slice_elements, slice_bytes, pipe_interleave_bytes, base_align; 755 unsigned num_pipes = rscreen->info.num_tile_pipes; 756 757 rtex->surface.meta_size = 0; 758 759 if (rscreen->chip_class <= EVERGREEN && 760 rscreen->info.drm_minor < 26) 761 return; 762 763 /* HW bug on R6xx. */ 764 if (rscreen->chip_class == R600 && 765 (rtex->resource.b.b.width0 > 7680 || 766 rtex->resource.b.b.height0 > 7680)) 767 return; 768 769 switch (num_pipes) { 770 case 1: 771 cl_width = 32; 772 cl_height = 16; 773 break; 774 case 2: 775 cl_width = 32; 776 cl_height = 32; 777 break; 778 case 4: 779 cl_width = 64; 780 cl_height = 32; 781 break; 782 case 8: 783 cl_width = 64; 784 cl_height = 64; 785 break; 786 case 16: 787 cl_width = 128; 788 cl_height = 64; 789 break; 790 default: 791 assert(0); 792 return; 793 } 794 795 width = align(rtex->surface.u.legacy.level[0].nblk_x, cl_width * 8); 796 height = align(rtex->surface.u.legacy.level[0].nblk_y, cl_height * 8); 797 798 slice_elements = (width * height) / (8 * 8); 799 slice_bytes = slice_elements * 4; 800 801 pipe_interleave_bytes = rscreen->info.pipe_interleave_bytes; 802 base_align = num_pipes * pipe_interleave_bytes; 803 804 rtex->surface.meta_alignment_log2 = util_logbase2(base_align); 805 rtex->surface.meta_size = 806 util_num_layers(&rtex->resource.b.b, 0) * 807 align(slice_bytes, base_align); 808} 809 810static void r600_texture_allocate_htile(struct r600_common_screen *rscreen, 811 struct r600_texture *rtex) 812{ 813 r600_texture_get_htile_size(rscreen, rtex); 814 815 if (!rtex->surface.meta_size) 816 return; 817 818 rtex->htile_offset = align(rtex->size, 1 << rtex->surface.meta_alignment_log2); 819 rtex->size = rtex->htile_offset + rtex->surface.meta_size; 820} 821 822void r600_print_texture_info(struct r600_common_screen *rscreen, 823 struct r600_texture *rtex, struct u_log_context *log) 824{ 825 int i; 826 827 /* Common parameters. */ 828 u_log_printf(log, " Info: npix_x=%u, npix_y=%u, npix_z=%u, blk_w=%u, " 829 "blk_h=%u, array_size=%u, last_level=%u, " 830 "bpe=%u, nsamples=%u, flags=0x%"PRIx64", %s\n", 831 rtex->resource.b.b.width0, rtex->resource.b.b.height0, 832 rtex->resource.b.b.depth0, rtex->surface.blk_w, 833 rtex->surface.blk_h, 834 rtex->resource.b.b.array_size, rtex->resource.b.b.last_level, 835 rtex->surface.bpe, rtex->resource.b.b.nr_samples, 836 rtex->surface.flags, util_format_short_name(rtex->resource.b.b.format)); 837 838 u_log_printf(log, " Layout: size=%"PRIu64", alignment=%u, bankw=%u, " 839 "bankh=%u, nbanks=%u, mtilea=%u, tilesplit=%u, pipeconfig=%u, scanout=%u\n", 840 rtex->surface.surf_size, 1 << rtex->surface.surf_alignment_log2, rtex->surface.u.legacy.bankw, 841 rtex->surface.u.legacy.bankh, rtex->surface.u.legacy.num_banks, rtex->surface.u.legacy.mtilea, 842 rtex->surface.u.legacy.tile_split, rtex->surface.u.legacy.pipe_config, 843 (rtex->surface.flags & RADEON_SURF_SCANOUT) != 0); 844 845 if (rtex->fmask.size) 846 u_log_printf(log, " FMask: offset=%"PRIu64", size=%"PRIu64", alignment=%u, pitch_in_pixels=%u, " 847 "bankh=%u, slice_tile_max=%u, tile_mode_index=%u\n", 848 rtex->fmask.offset, rtex->fmask.size, rtex->fmask.alignment, 849 rtex->fmask.pitch_in_pixels, rtex->fmask.bank_height, 850 rtex->fmask.slice_tile_max, rtex->fmask.tile_mode_index); 851 852 if (rtex->cmask.size) 853 u_log_printf(log, " CMask: offset=%"PRIu64", size=%"PRIu64", alignment=%u, " 854 "slice_tile_max=%u\n", 855 rtex->cmask.offset, rtex->cmask.size, rtex->cmask.alignment, 856 rtex->cmask.slice_tile_max); 857 858 if (rtex->htile_offset) 859 u_log_printf(log, " HTile: offset=%"PRIu64", size=%u " 860 "alignment=%u\n", 861 rtex->htile_offset, rtex->surface.meta_size, 862 1 << rtex->surface.meta_alignment_log2); 863 864 for (i = 0; i <= rtex->resource.b.b.last_level; i++) 865 u_log_printf(log, " Level[%i]: offset=%"PRIu64", slice_size=%"PRIu64", " 866 "npix_x=%u, npix_y=%u, npix_z=%u, nblk_x=%u, nblk_y=%u, " 867 "mode=%u, tiling_index = %u\n", 868 i, (uint64_t)rtex->surface.u.legacy.level[i].offset_256B * 256, 869 (uint64_t)rtex->surface.u.legacy.level[i].slice_size_dw * 4, 870 u_minify(rtex->resource.b.b.width0, i), 871 u_minify(rtex->resource.b.b.height0, i), 872 u_minify(rtex->resource.b.b.depth0, i), 873 rtex->surface.u.legacy.level[i].nblk_x, 874 rtex->surface.u.legacy.level[i].nblk_y, 875 rtex->surface.u.legacy.level[i].mode, 876 rtex->surface.u.legacy.tiling_index[i]); 877 878 if (rtex->surface.has_stencil) { 879 u_log_printf(log, " StencilLayout: tilesplit=%u\n", 880 rtex->surface.u.legacy.stencil_tile_split); 881 for (i = 0; i <= rtex->resource.b.b.last_level; i++) { 882 u_log_printf(log, " StencilLevel[%i]: offset=%"PRIu64", " 883 "slice_size=%"PRIu64", npix_x=%u, " 884 "npix_y=%u, npix_z=%u, nblk_x=%u, nblk_y=%u, " 885 "mode=%u, tiling_index = %u\n", 886 i, (uint64_t)rtex->surface.u.legacy.zs.stencil_level[i].offset_256B * 256, 887 (uint64_t)rtex->surface.u.legacy.zs.stencil_level[i].slice_size_dw * 4, 888 u_minify(rtex->resource.b.b.width0, i), 889 u_minify(rtex->resource.b.b.height0, i), 890 u_minify(rtex->resource.b.b.depth0, i), 891 rtex->surface.u.legacy.zs.stencil_level[i].nblk_x, 892 rtex->surface.u.legacy.zs.stencil_level[i].nblk_y, 893 rtex->surface.u.legacy.zs.stencil_level[i].mode, 894 rtex->surface.u.legacy.zs.stencil_tiling_index[i]); 895 } 896 } 897} 898 899/* Common processing for r600_texture_create and r600_texture_from_handle */ 900static struct r600_texture * 901r600_texture_create_object(struct pipe_screen *screen, 902 const struct pipe_resource *base, 903 struct pb_buffer *buf, 904 struct radeon_surf *surface) 905{ 906 struct r600_texture *rtex; 907 struct r600_resource *resource; 908 struct r600_common_screen *rscreen = (struct r600_common_screen*)screen; 909 910 rtex = CALLOC_STRUCT(r600_texture); 911 if (!rtex) 912 return NULL; 913 914 resource = &rtex->resource; 915 resource->b.b = *base; 916 pipe_reference_init(&resource->b.b.reference, 1); 917 resource->b.b.screen = screen; 918 919 /* don't include stencil-only formats which we don't support for rendering */ 920 rtex->is_depth = util_format_has_depth(util_format_description(rtex->resource.b.b.format)); 921 922 rtex->surface = *surface; 923 rtex->size = rtex->surface.surf_size; 924 rtex->db_render_format = base->format; 925 926 /* Tiled depth textures utilize the non-displayable tile order. 927 * This must be done after r600_setup_surface. 928 * Applies to R600-Cayman. */ 929 rtex->non_disp_tiling = rtex->is_depth && rtex->surface.u.legacy.level[0].mode >= RADEON_SURF_MODE_1D; 930 /* Applies to GCN. */ 931 rtex->last_msaa_resolve_target_micro_mode = rtex->surface.micro_tile_mode; 932 933 if (rtex->is_depth) { 934 if (base->flags & (R600_RESOURCE_FLAG_TRANSFER | 935 R600_RESOURCE_FLAG_FLUSHED_DEPTH) || 936 rscreen->chip_class >= EVERGREEN) { 937 rtex->can_sample_z = !rtex->surface.u.legacy.depth_adjusted; 938 rtex->can_sample_s = !rtex->surface.u.legacy.stencil_adjusted; 939 } else { 940 if (rtex->resource.b.b.nr_samples <= 1 && 941 (rtex->resource.b.b.format == PIPE_FORMAT_Z16_UNORM || 942 rtex->resource.b.b.format == PIPE_FORMAT_Z32_FLOAT)) 943 rtex->can_sample_z = true; 944 } 945 946 if (!(base->flags & (R600_RESOURCE_FLAG_TRANSFER | 947 R600_RESOURCE_FLAG_FLUSHED_DEPTH))) { 948 rtex->db_compatible = true; 949 950 if (!(rscreen->debug_flags & DBG_NO_HYPERZ)) 951 r600_texture_allocate_htile(rscreen, rtex); 952 } 953 } else { 954 if (base->nr_samples > 1) { 955 if (!buf) { 956 r600_texture_allocate_fmask(rscreen, rtex); 957 r600_texture_allocate_cmask(rscreen, rtex); 958 rtex->cmask_buffer = &rtex->resource; 959 } 960 if (!rtex->fmask.size || !rtex->cmask.size) { 961 FREE(rtex); 962 return NULL; 963 } 964 } 965 } 966 967 /* Now create the backing buffer. */ 968 if (!buf) { 969 r600_init_resource_fields(rscreen, resource, rtex->size, 970 1 << rtex->surface.surf_alignment_log2); 971 972 if (!r600_alloc_resource(rscreen, resource)) { 973 FREE(rtex); 974 return NULL; 975 } 976 } else { 977 resource->buf = buf; 978 resource->gpu_address = rscreen->ws->buffer_get_virtual_address(resource->buf); 979 resource->bo_size = buf->size; 980 resource->bo_alignment = 1 << buf->alignment_log2; 981 resource->domains = rscreen->ws->buffer_get_initial_domain(resource->buf); 982 if (resource->domains & RADEON_DOMAIN_VRAM) 983 resource->vram_usage = buf->size; 984 else if (resource->domains & RADEON_DOMAIN_GTT) 985 resource->gart_usage = buf->size; 986 } 987 988 if (rtex->cmask.size) { 989 /* Initialize the cmask to 0xCC (= compressed state). */ 990 r600_screen_clear_buffer(rscreen, &rtex->cmask_buffer->b.b, 991 rtex->cmask.offset, rtex->cmask.size, 992 0xCCCCCCCC); 993 } 994 if (rtex->htile_offset) { 995 uint32_t clear_value = 0; 996 997 r600_screen_clear_buffer(rscreen, &rtex->resource.b.b, 998 rtex->htile_offset, 999 rtex->surface.meta_size, 1000 clear_value); 1001 } 1002 1003 /* Initialize the CMASK base register value. */ 1004 rtex->cmask.base_address_reg = 1005 (rtex->resource.gpu_address + rtex->cmask.offset) >> 8; 1006 1007 if (rscreen->debug_flags & DBG_VM) { 1008 fprintf(stderr, "VM start=0x%"PRIX64" end=0x%"PRIX64" | Texture %ix%ix%i, %i levels, %i samples, %s\n", 1009 rtex->resource.gpu_address, 1010 rtex->resource.gpu_address + rtex->resource.buf->size, 1011 base->width0, base->height0, util_num_layers(base, 0), base->last_level+1, 1012 base->nr_samples ? base->nr_samples : 1, util_format_short_name(base->format)); 1013 } 1014 1015 if (rscreen->debug_flags & DBG_TEX) { 1016 puts("Texture:"); 1017 struct u_log_context log; 1018 u_log_context_init(&log); 1019 r600_print_texture_info(rscreen, rtex, &log); 1020 u_log_new_page_print(&log, stdout); 1021 fflush(stdout); 1022 u_log_context_destroy(&log); 1023 } 1024 1025 return rtex; 1026} 1027 1028static enum radeon_surf_mode 1029r600_choose_tiling(struct r600_common_screen *rscreen, 1030 const struct pipe_resource *templ) 1031{ 1032 const struct util_format_description *desc = util_format_description(templ->format); 1033 bool force_tiling = templ->flags & R600_RESOURCE_FLAG_FORCE_TILING; 1034 bool is_depth_stencil = util_format_is_depth_or_stencil(templ->format) && 1035 !(templ->flags & R600_RESOURCE_FLAG_FLUSHED_DEPTH); 1036 1037 /* MSAA resources must be 2D tiled. */ 1038 if (templ->nr_samples > 1) 1039 return RADEON_SURF_MODE_2D; 1040 1041 /* Transfer resources should be linear. */ 1042 if (templ->flags & R600_RESOURCE_FLAG_TRANSFER) 1043 return RADEON_SURF_MODE_LINEAR_ALIGNED; 1044 1045 /* r600g: force tiling on TEXTURE_2D and TEXTURE_3D compute resources. */ 1046 if (rscreen->chip_class >= R600 && rscreen->chip_class <= CAYMAN && 1047 (templ->bind & PIPE_BIND_COMPUTE_RESOURCE) && 1048 (templ->target == PIPE_TEXTURE_2D || 1049 templ->target == PIPE_TEXTURE_3D)) 1050 force_tiling = true; 1051 1052 /* Handle common candidates for the linear mode. 1053 * Compressed textures and DB surfaces must always be tiled. 1054 */ 1055 if (!force_tiling && 1056 !is_depth_stencil && 1057 !util_format_is_compressed(templ->format)) { 1058 if (rscreen->debug_flags & DBG_NO_TILING) 1059 return RADEON_SURF_MODE_LINEAR_ALIGNED; 1060 1061 /* Tiling doesn't work with the 422 (SUBSAMPLED) formats on R600+. */ 1062 if (desc->layout == UTIL_FORMAT_LAYOUT_SUBSAMPLED) 1063 return RADEON_SURF_MODE_LINEAR_ALIGNED; 1064 1065 if (templ->bind & PIPE_BIND_LINEAR) 1066 return RADEON_SURF_MODE_LINEAR_ALIGNED; 1067 1068 /* 1D textures should be linear - fixes image operations on 1d */ 1069 if (templ->target == PIPE_TEXTURE_1D || 1070 templ->target == PIPE_TEXTURE_1D_ARRAY) 1071 return RADEON_SURF_MODE_LINEAR_ALIGNED; 1072 1073 /* Textures likely to be mapped often. */ 1074 if (templ->usage == PIPE_USAGE_STAGING || 1075 templ->usage == PIPE_USAGE_STREAM) 1076 return RADEON_SURF_MODE_LINEAR_ALIGNED; 1077 } 1078 1079 /* Make small textures 1D tiled. */ 1080 if (templ->width0 <= 16 || templ->height0 <= 16 || 1081 (rscreen->debug_flags & DBG_NO_2D_TILING)) 1082 return RADEON_SURF_MODE_1D; 1083 1084 /* The allocator will switch to 1D if needed. */ 1085 return RADEON_SURF_MODE_2D; 1086} 1087 1088struct pipe_resource *r600_texture_create(struct pipe_screen *screen, 1089 const struct pipe_resource *templ) 1090{ 1091 struct r600_common_screen *rscreen = (struct r600_common_screen*)screen; 1092 struct radeon_surf surface = {0}; 1093 bool is_flushed_depth = templ->flags & R600_RESOURCE_FLAG_FLUSHED_DEPTH; 1094 int r; 1095 1096 r = r600_init_surface(rscreen, &surface, templ, 1097 r600_choose_tiling(rscreen, templ), 0, 0, 1098 false, false, is_flushed_depth); 1099 if (r) { 1100 return NULL; 1101 } 1102 1103 return (struct pipe_resource *) 1104 r600_texture_create_object(screen, templ, NULL, &surface); 1105} 1106 1107static struct pipe_resource *r600_texture_from_handle(struct pipe_screen *screen, 1108 const struct pipe_resource *templ, 1109 struct winsys_handle *whandle, 1110 unsigned usage) 1111{ 1112 struct r600_common_screen *rscreen = (struct r600_common_screen*)screen; 1113 struct pb_buffer *buf = NULL; 1114 enum radeon_surf_mode array_mode; 1115 struct radeon_surf surface = {}; 1116 int r; 1117 struct radeon_bo_metadata metadata = {}; 1118 struct r600_texture *rtex; 1119 bool is_scanout; 1120 1121 /* Support only 2D textures without mipmaps */ 1122 if ((templ->target != PIPE_TEXTURE_2D && templ->target != PIPE_TEXTURE_RECT) || 1123 templ->depth0 != 1 || templ->last_level != 0) 1124 return NULL; 1125 1126 buf = rscreen->ws->buffer_from_handle(rscreen->ws, whandle, 1127 rscreen->info.max_alignment); 1128 if (!buf) 1129 return NULL; 1130 1131 rscreen->ws->buffer_get_metadata(rscreen->ws, buf, &metadata, NULL); 1132 r600_surface_import_metadata(rscreen, &surface, &metadata, 1133 &array_mode, &is_scanout); 1134 1135 r = r600_init_surface(rscreen, &surface, templ, array_mode, 1136 whandle->stride, whandle->offset, 1137 true, is_scanout, false); 1138 if (r) { 1139 return NULL; 1140 } 1141 1142 rtex = r600_texture_create_object(screen, templ, buf, &surface); 1143 if (!rtex) 1144 return NULL; 1145 1146 rtex->resource.b.is_shared = true; 1147 rtex->resource.external_usage = usage; 1148 1149 assert(rtex->surface.tile_swizzle == 0); 1150 return &rtex->resource.b.b; 1151} 1152 1153bool r600_init_flushed_depth_texture(struct pipe_context *ctx, 1154 struct pipe_resource *texture, 1155 struct r600_texture **staging) 1156{ 1157 struct r600_texture *rtex = (struct r600_texture*)texture; 1158 struct pipe_resource resource; 1159 struct r600_texture **flushed_depth_texture = staging ? 1160 staging : &rtex->flushed_depth_texture; 1161 enum pipe_format pipe_format = texture->format; 1162 1163 if (!staging) { 1164 if (rtex->flushed_depth_texture) 1165 return true; /* it's ready */ 1166 1167 if (!rtex->can_sample_z && rtex->can_sample_s) { 1168 switch (pipe_format) { 1169 case PIPE_FORMAT_Z32_FLOAT_S8X24_UINT: 1170 /* Save memory by not allocating the S plane. */ 1171 pipe_format = PIPE_FORMAT_Z32_FLOAT; 1172 break; 1173 case PIPE_FORMAT_Z24_UNORM_S8_UINT: 1174 case PIPE_FORMAT_S8_UINT_Z24_UNORM: 1175 /* Save memory bandwidth by not copying the 1176 * stencil part during flush. 1177 * 1178 * This potentially increases memory bandwidth 1179 * if an application uses both Z and S texturing 1180 * simultaneously (a flushed Z24S8 texture 1181 * would be stored compactly), but how often 1182 * does that really happen? 1183 */ 1184 pipe_format = PIPE_FORMAT_Z24X8_UNORM; 1185 break; 1186 default:; 1187 } 1188 } else if (!rtex->can_sample_s && rtex->can_sample_z) { 1189 assert(util_format_has_stencil(util_format_description(pipe_format))); 1190 1191 /* DB->CB copies to an 8bpp surface don't work. */ 1192 pipe_format = PIPE_FORMAT_X24S8_UINT; 1193 } 1194 } 1195 1196 memset(&resource, 0, sizeof(resource)); 1197 resource.target = texture->target; 1198 resource.format = pipe_format; 1199 resource.width0 = texture->width0; 1200 resource.height0 = texture->height0; 1201 resource.depth0 = texture->depth0; 1202 resource.array_size = texture->array_size; 1203 resource.last_level = texture->last_level; 1204 resource.nr_samples = texture->nr_samples; 1205 resource.usage = staging ? PIPE_USAGE_STAGING : PIPE_USAGE_DEFAULT; 1206 resource.bind = texture->bind & ~PIPE_BIND_DEPTH_STENCIL; 1207 resource.flags = texture->flags | R600_RESOURCE_FLAG_FLUSHED_DEPTH; 1208 1209 if (staging) 1210 resource.flags |= R600_RESOURCE_FLAG_TRANSFER; 1211 1212 *flushed_depth_texture = (struct r600_texture *)ctx->screen->resource_create(ctx->screen, &resource); 1213 if (*flushed_depth_texture == NULL) { 1214 R600_ERR("failed to create temporary texture to hold flushed depth\n"); 1215 return false; 1216 } 1217 1218 (*flushed_depth_texture)->non_disp_tiling = false; 1219 return true; 1220} 1221 1222/** 1223 * Initialize the pipe_resource descriptor to be of the same size as the box, 1224 * which is supposed to hold a subregion of the texture "orig" at the given 1225 * mipmap level. 1226 */ 1227static void r600_init_temp_resource_from_box(struct pipe_resource *res, 1228 struct pipe_resource *orig, 1229 const struct pipe_box *box, 1230 unsigned level, unsigned flags) 1231{ 1232 memset(res, 0, sizeof(*res)); 1233 res->format = orig->format; 1234 res->width0 = box->width; 1235 res->height0 = box->height; 1236 res->depth0 = 1; 1237 res->array_size = 1; 1238 res->usage = flags & R600_RESOURCE_FLAG_TRANSFER ? PIPE_USAGE_STAGING : PIPE_USAGE_DEFAULT; 1239 res->flags = flags; 1240 1241 /* We must set the correct texture target and dimensions for a 3D box. */ 1242 if (box->depth > 1 && util_max_layer(orig, level) > 0) { 1243 res->target = PIPE_TEXTURE_2D_ARRAY; 1244 res->array_size = box->depth; 1245 } else { 1246 res->target = PIPE_TEXTURE_2D; 1247 } 1248} 1249 1250static bool r600_can_invalidate_texture(struct r600_common_screen *rscreen, 1251 struct r600_texture *rtex, 1252 unsigned transfer_usage, 1253 const struct pipe_box *box) 1254{ 1255 /* r600g doesn't react to dirty_tex_descriptor_counter */ 1256 return rscreen->chip_class >= GFX6 && 1257 !rtex->resource.b.is_shared && 1258 !(transfer_usage & PIPE_MAP_READ) && 1259 rtex->resource.b.b.last_level == 0 && 1260 util_texrange_covers_whole_level(&rtex->resource.b.b, 0, 1261 box->x, box->y, box->z, 1262 box->width, box->height, 1263 box->depth); 1264} 1265 1266static void r600_texture_invalidate_storage(struct r600_common_context *rctx, 1267 struct r600_texture *rtex) 1268{ 1269 struct r600_common_screen *rscreen = rctx->screen; 1270 1271 /* There is no point in discarding depth and tiled buffers. */ 1272 assert(!rtex->is_depth); 1273 assert(rtex->surface.is_linear); 1274 1275 /* Reallocate the buffer in the same pipe_resource. */ 1276 r600_alloc_resource(rscreen, &rtex->resource); 1277 1278 /* Initialize the CMASK base address (needed even without CMASK). */ 1279 rtex->cmask.base_address_reg = 1280 (rtex->resource.gpu_address + rtex->cmask.offset) >> 8; 1281 1282 p_atomic_inc(&rscreen->dirty_tex_counter); 1283 1284 rctx->num_alloc_tex_transfer_bytes += rtex->size; 1285} 1286 1287void *r600_texture_transfer_map(struct pipe_context *ctx, 1288 struct pipe_resource *texture, 1289 unsigned level, 1290 unsigned usage, 1291 const struct pipe_box *box, 1292 struct pipe_transfer **ptransfer) 1293{ 1294 struct r600_common_context *rctx = (struct r600_common_context*)ctx; 1295 struct r600_texture *rtex = (struct r600_texture*)texture; 1296 struct r600_transfer *trans; 1297 struct r600_resource *buf; 1298 unsigned offset = 0; 1299 char *map; 1300 bool use_staging_texture = false; 1301 1302 assert(!(texture->flags & R600_RESOURCE_FLAG_TRANSFER)); 1303 assert(box->width && box->height && box->depth); 1304 1305 /* Depth textures use staging unconditionally. */ 1306 if (!rtex->is_depth) { 1307 /* Degrade the tile mode if we get too many transfers on APUs. 1308 * On dGPUs, the staging texture is always faster. 1309 * Only count uploads that are at least 4x4 pixels large. 1310 */ 1311 if (!rctx->screen->info.has_dedicated_vram && 1312 level == 0 && 1313 box->width >= 4 && box->height >= 4 && 1314 p_atomic_inc_return(&rtex->num_level0_transfers) == 10) { 1315 bool can_invalidate = 1316 r600_can_invalidate_texture(rctx->screen, rtex, 1317 usage, box); 1318 1319 r600_reallocate_texture_inplace(rctx, rtex, 1320 PIPE_BIND_LINEAR, 1321 can_invalidate); 1322 } 1323 1324 /* Tiled textures need to be converted into a linear texture for CPU 1325 * access. The staging texture is always linear and is placed in GART. 1326 * 1327 * Reading from VRAM or GTT WC is slow, always use the staging 1328 * texture in this case. 1329 * 1330 * Use the staging texture for uploads if the underlying BO 1331 * is busy. 1332 */ 1333 if (!rtex->surface.is_linear) 1334 use_staging_texture = true; 1335 else if (usage & PIPE_MAP_READ) 1336 use_staging_texture = 1337 rtex->resource.domains & RADEON_DOMAIN_VRAM || 1338 rtex->resource.flags & RADEON_FLAG_GTT_WC; 1339 /* Write & linear only: */ 1340 else if (r600_rings_is_buffer_referenced(rctx, rtex->resource.buf, 1341 RADEON_USAGE_READWRITE) || 1342 !rctx->ws->buffer_wait(rctx->ws, rtex->resource.buf, 0, 1343 RADEON_USAGE_READWRITE)) { 1344 /* It's busy. */ 1345 if (r600_can_invalidate_texture(rctx->screen, rtex, 1346 usage, box)) 1347 r600_texture_invalidate_storage(rctx, rtex); 1348 else 1349 use_staging_texture = true; 1350 } 1351 } 1352 1353 trans = CALLOC_STRUCT(r600_transfer); 1354 if (!trans) 1355 return NULL; 1356 pipe_resource_reference(&trans->b.b.resource, texture); 1357 trans->b.b.level = level; 1358 trans->b.b.usage = usage; 1359 trans->b.b.box = *box; 1360 1361 if (rtex->is_depth) { 1362 struct r600_texture *staging_depth; 1363 1364 if (rtex->resource.b.b.nr_samples > 1) { 1365 /* MSAA depth buffers need to be converted to single sample buffers. 1366 * 1367 * Mapping MSAA depth buffers can occur if ReadPixels is called 1368 * with a multisample GLX visual. 1369 * 1370 * First downsample the depth buffer to a temporary texture, 1371 * then decompress the temporary one to staging. 1372 * 1373 * Only the region being mapped is transfered. 1374 */ 1375 struct pipe_resource resource; 1376 1377 r600_init_temp_resource_from_box(&resource, texture, box, level, 0); 1378 1379 if (!r600_init_flushed_depth_texture(ctx, &resource, &staging_depth)) { 1380 R600_ERR("failed to create temporary texture to hold untiled copy\n"); 1381 FREE(trans); 1382 return NULL; 1383 } 1384 1385 if (usage & PIPE_MAP_READ) { 1386 struct pipe_resource *temp = ctx->screen->resource_create(ctx->screen, &resource); 1387 if (!temp) { 1388 R600_ERR("failed to create a temporary depth texture\n"); 1389 FREE(trans); 1390 return NULL; 1391 } 1392 1393 r600_copy_region_with_blit(ctx, temp, 0, 0, 0, 0, texture, level, box); 1394 rctx->blit_decompress_depth(ctx, (struct r600_texture*)temp, staging_depth, 1395 0, 0, 0, box->depth, 0, 0); 1396 pipe_resource_reference(&temp, NULL); 1397 } 1398 1399 /* Just get the strides. */ 1400 r600_texture_get_offset(rctx->screen, staging_depth, level, NULL, 1401 &trans->b.b.stride, 1402 &trans->b.b.layer_stride); 1403 } else { 1404 /* XXX: only readback the rectangle which is being mapped? */ 1405 /* XXX: when discard is true, no need to read back from depth texture */ 1406 if (!r600_init_flushed_depth_texture(ctx, texture, &staging_depth)) { 1407 R600_ERR("failed to create temporary texture to hold untiled copy\n"); 1408 FREE(trans); 1409 return NULL; 1410 } 1411 1412 rctx->blit_decompress_depth(ctx, rtex, staging_depth, 1413 level, level, 1414 box->z, box->z + box->depth - 1, 1415 0, 0); 1416 1417 offset = r600_texture_get_offset(rctx->screen, staging_depth, 1418 level, box, 1419 &trans->b.b.stride, 1420 &trans->b.b.layer_stride); 1421 } 1422 1423 trans->staging = (struct r600_resource*)staging_depth; 1424 buf = trans->staging; 1425 } else if (use_staging_texture) { 1426 struct pipe_resource resource; 1427 struct r600_texture *staging; 1428 1429 r600_init_temp_resource_from_box(&resource, texture, box, level, 1430 R600_RESOURCE_FLAG_TRANSFER); 1431 resource.usage = (usage & PIPE_MAP_READ) ? 1432 PIPE_USAGE_STAGING : PIPE_USAGE_STREAM; 1433 1434 /* Create the temporary texture. */ 1435 staging = (struct r600_texture*)ctx->screen->resource_create(ctx->screen, &resource); 1436 if (!staging) { 1437 R600_ERR("failed to create temporary texture to hold untiled copy\n"); 1438 FREE(trans); 1439 return NULL; 1440 } 1441 trans->staging = &staging->resource; 1442 1443 /* Just get the strides. */ 1444 r600_texture_get_offset(rctx->screen, staging, 0, NULL, 1445 &trans->b.b.stride, 1446 &trans->b.b.layer_stride); 1447 1448 if (usage & PIPE_MAP_READ) 1449 r600_copy_to_staging_texture(ctx, trans); 1450 else 1451 usage |= PIPE_MAP_UNSYNCHRONIZED; 1452 1453 buf = trans->staging; 1454 } else { 1455 /* the resource is mapped directly */ 1456 offset = r600_texture_get_offset(rctx->screen, rtex, level, box, 1457 &trans->b.b.stride, 1458 &trans->b.b.layer_stride); 1459 buf = &rtex->resource; 1460 } 1461 1462 if (!(map = r600_buffer_map_sync_with_rings(rctx, buf, usage))) { 1463 r600_resource_reference(&trans->staging, NULL); 1464 FREE(trans); 1465 return NULL; 1466 } 1467 1468 *ptransfer = &trans->b.b; 1469 return map + offset; 1470} 1471 1472void r600_texture_transfer_unmap(struct pipe_context *ctx, 1473 struct pipe_transfer* transfer) 1474{ 1475 struct r600_common_context *rctx = (struct r600_common_context*)ctx; 1476 struct r600_transfer *rtransfer = (struct r600_transfer*)transfer; 1477 struct pipe_resource *texture = transfer->resource; 1478 struct r600_texture *rtex = (struct r600_texture*)texture; 1479 1480 if ((transfer->usage & PIPE_MAP_WRITE) && rtransfer->staging) { 1481 if (rtex->is_depth && rtex->resource.b.b.nr_samples <= 1) { 1482 ctx->resource_copy_region(ctx, texture, transfer->level, 1483 transfer->box.x, transfer->box.y, transfer->box.z, 1484 &rtransfer->staging->b.b, transfer->level, 1485 &transfer->box); 1486 } else { 1487 r600_copy_from_staging_texture(ctx, rtransfer); 1488 } 1489 } 1490 1491 if (rtransfer->staging) { 1492 rctx->num_alloc_tex_transfer_bytes += rtransfer->staging->buf->size; 1493 r600_resource_reference(&rtransfer->staging, NULL); 1494 } 1495 1496 /* Heuristic for {upload, draw, upload, draw, ..}: 1497 * 1498 * Flush the gfx IB if we've allocated too much texture storage. 1499 * 1500 * The idea is that we don't want to build IBs that use too much 1501 * memory and put pressure on the kernel memory manager and we also 1502 * want to make temporary and invalidated buffers go idle ASAP to 1503 * decrease the total memory usage or make them reusable. The memory 1504 * usage will be slightly higher than given here because of the buffer 1505 * cache in the winsys. 1506 * 1507 * The result is that the kernel memory manager is never a bottleneck. 1508 */ 1509 if (rctx->num_alloc_tex_transfer_bytes > rctx->screen->info.gart_size / 4) { 1510 rctx->gfx.flush(rctx, PIPE_FLUSH_ASYNC, NULL); 1511 rctx->num_alloc_tex_transfer_bytes = 0; 1512 } 1513 1514 pipe_resource_reference(&transfer->resource, NULL); 1515 FREE(transfer); 1516} 1517 1518struct pipe_surface *r600_create_surface_custom(struct pipe_context *pipe, 1519 struct pipe_resource *texture, 1520 const struct pipe_surface *templ, 1521 unsigned width0, unsigned height0, 1522 unsigned width, unsigned height) 1523{ 1524 struct r600_surface *surface = CALLOC_STRUCT(r600_surface); 1525 1526 if (!surface) 1527 return NULL; 1528 1529 assert(templ->u.tex.first_layer <= util_max_layer(texture, templ->u.tex.level)); 1530 assert(templ->u.tex.last_layer <= util_max_layer(texture, templ->u.tex.level)); 1531 1532 pipe_reference_init(&surface->base.reference, 1); 1533 pipe_resource_reference(&surface->base.texture, texture); 1534 surface->base.context = pipe; 1535 surface->base.format = templ->format; 1536 surface->base.width = width; 1537 surface->base.height = height; 1538 surface->base.u = templ->u; 1539 1540 surface->width0 = width0; 1541 surface->height0 = height0; 1542 1543 return &surface->base; 1544} 1545 1546static struct pipe_surface *r600_create_surface(struct pipe_context *pipe, 1547 struct pipe_resource *tex, 1548 const struct pipe_surface *templ) 1549{ 1550 unsigned level = templ->u.tex.level; 1551 unsigned width = u_minify(tex->width0, level); 1552 unsigned height = u_minify(tex->height0, level); 1553 unsigned width0 = tex->width0; 1554 unsigned height0 = tex->height0; 1555 1556 if (tex->target != PIPE_BUFFER && templ->format != tex->format) { 1557 const struct util_format_description *tex_desc 1558 = util_format_description(tex->format); 1559 const struct util_format_description *templ_desc 1560 = util_format_description(templ->format); 1561 1562 assert(tex_desc->block.bits == templ_desc->block.bits); 1563 1564 /* Adjust size of surface if and only if the block width or 1565 * height is changed. */ 1566 if (tex_desc->block.width != templ_desc->block.width || 1567 tex_desc->block.height != templ_desc->block.height) { 1568 unsigned nblks_x = util_format_get_nblocksx(tex->format, width); 1569 unsigned nblks_y = util_format_get_nblocksy(tex->format, height); 1570 1571 width = nblks_x * templ_desc->block.width; 1572 height = nblks_y * templ_desc->block.height; 1573 1574 width0 = util_format_get_nblocksx(tex->format, width0); 1575 height0 = util_format_get_nblocksy(tex->format, height0); 1576 } 1577 } 1578 1579 return r600_create_surface_custom(pipe, tex, templ, 1580 width0, height0, 1581 width, height); 1582} 1583 1584static void r600_surface_destroy(struct pipe_context *pipe, 1585 struct pipe_surface *surface) 1586{ 1587 struct r600_surface *surf = (struct r600_surface*)surface; 1588 r600_resource_reference(&surf->cb_buffer_fmask, NULL); 1589 r600_resource_reference(&surf->cb_buffer_cmask, NULL); 1590 pipe_resource_reference(&surface->texture, NULL); 1591 FREE(surface); 1592} 1593 1594static void r600_clear_texture(struct pipe_context *pipe, 1595 struct pipe_resource *tex, 1596 unsigned level, 1597 const struct pipe_box *box, 1598 const void *data) 1599{ 1600 struct pipe_screen *screen = pipe->screen; 1601 struct r600_texture *rtex = (struct r600_texture*)tex; 1602 struct pipe_surface tmpl = {{0}}; 1603 struct pipe_surface *sf; 1604 1605 tmpl.format = tex->format; 1606 tmpl.u.tex.first_layer = box->z; 1607 tmpl.u.tex.last_layer = box->z + box->depth - 1; 1608 tmpl.u.tex.level = level; 1609 sf = pipe->create_surface(pipe, tex, &tmpl); 1610 if (!sf) 1611 return; 1612 1613 if (rtex->is_depth) { 1614 unsigned clear; 1615 float depth; 1616 uint8_t stencil = 0; 1617 1618 /* Depth is always present. */ 1619 clear = PIPE_CLEAR_DEPTH; 1620 util_format_unpack_z_float(tex->format, &depth, data, 1); 1621 1622 if (rtex->surface.has_stencil) { 1623 clear |= PIPE_CLEAR_STENCIL; 1624 util_format_unpack_s_8uint(tex->format, &stencil, data, 1); 1625 } 1626 1627 pipe->clear_depth_stencil(pipe, sf, clear, depth, stencil, 1628 box->x, box->y, 1629 box->width, box->height, false); 1630 } else { 1631 union pipe_color_union color; 1632 1633 util_format_unpack_rgba(tex->format, color.ui, data, 1); 1634 1635 if (screen->is_format_supported(screen, tex->format, 1636 tex->target, 0, 0, 1637 PIPE_BIND_RENDER_TARGET)) { 1638 pipe->clear_render_target(pipe, sf, &color, 1639 box->x, box->y, 1640 box->width, box->height, false); 1641 } else { 1642 /* Software fallback - just for R9G9B9E5_FLOAT */ 1643 util_clear_render_target(pipe, sf, &color, 1644 box->x, box->y, 1645 box->width, box->height); 1646 } 1647 } 1648 pipe_surface_reference(&sf, NULL); 1649} 1650 1651unsigned r600_translate_colorswap(enum pipe_format format, bool do_endian_swap) 1652{ 1653 const struct util_format_description *desc = util_format_description(format); 1654 1655#define HAS_SWIZZLE(chan,swz) (desc->swizzle[chan] == PIPE_SWIZZLE_##swz) 1656 1657 if (format == PIPE_FORMAT_R11G11B10_FLOAT) /* isn't plain */ 1658 return V_0280A0_SWAP_STD; 1659 1660 if (desc->layout != UTIL_FORMAT_LAYOUT_PLAIN) 1661 return ~0U; 1662 1663 switch (desc->nr_channels) { 1664 case 1: 1665 if (HAS_SWIZZLE(0,X)) 1666 return V_0280A0_SWAP_STD; /* X___ */ 1667 else if (HAS_SWIZZLE(3,X)) 1668 return V_0280A0_SWAP_ALT_REV; /* ___X */ 1669 break; 1670 case 2: 1671 if ((HAS_SWIZZLE(0,X) && HAS_SWIZZLE(1,Y)) || 1672 (HAS_SWIZZLE(0,X) && HAS_SWIZZLE(1,NONE)) || 1673 (HAS_SWIZZLE(0,NONE) && HAS_SWIZZLE(1,Y))) 1674 return V_0280A0_SWAP_STD; /* XY__ */ 1675 else if ((HAS_SWIZZLE(0,Y) && HAS_SWIZZLE(1,X)) || 1676 (HAS_SWIZZLE(0,Y) && HAS_SWIZZLE(1,NONE)) || 1677 (HAS_SWIZZLE(0,NONE) && HAS_SWIZZLE(1,X))) 1678 /* YX__ */ 1679 return (do_endian_swap ? V_0280A0_SWAP_STD : V_0280A0_SWAP_STD_REV); 1680 else if (HAS_SWIZZLE(0,X) && HAS_SWIZZLE(3,Y)) 1681 return V_0280A0_SWAP_ALT; /* X__Y */ 1682 else if (HAS_SWIZZLE(0,Y) && HAS_SWIZZLE(3,X)) 1683 return V_0280A0_SWAP_ALT_REV; /* Y__X */ 1684 break; 1685 case 3: 1686 if (HAS_SWIZZLE(0,X)) 1687 return (do_endian_swap ? V_0280A0_SWAP_STD_REV : V_0280A0_SWAP_STD); 1688 else if (HAS_SWIZZLE(0,Z)) 1689 return V_0280A0_SWAP_STD_REV; /* ZYX */ 1690 break; 1691 case 4: 1692 /* check the middle channels, the 1st and 4th channel can be NONE */ 1693 if (HAS_SWIZZLE(1,Y) && HAS_SWIZZLE(2,Z)) { 1694 return V_0280A0_SWAP_STD; /* XYZW */ 1695 } else if (HAS_SWIZZLE(1,Z) && HAS_SWIZZLE(2,Y)) { 1696 return V_0280A0_SWAP_STD_REV; /* WZYX */ 1697 } else if (HAS_SWIZZLE(1,Y) && HAS_SWIZZLE(2,X)) { 1698 return V_0280A0_SWAP_ALT; /* ZYXW */ 1699 } else if (HAS_SWIZZLE(1,Z) && HAS_SWIZZLE(2,W)) { 1700 /* YZWX */ 1701 if (desc->is_array) 1702 return V_0280A0_SWAP_ALT_REV; 1703 else 1704 return (do_endian_swap ? V_0280A0_SWAP_ALT : V_0280A0_SWAP_ALT_REV); 1705 } 1706 break; 1707 } 1708 return ~0U; 1709} 1710 1711/* FAST COLOR CLEAR */ 1712 1713static void evergreen_set_clear_color(struct r600_texture *rtex, 1714 enum pipe_format surface_format, 1715 const union pipe_color_union *color) 1716{ 1717 union util_color uc; 1718 1719 memset(&uc, 0, sizeof(uc)); 1720 1721 if (rtex->surface.bpe == 16) { 1722 /* DCC fast clear only: 1723 * CLEAR_WORD0 = R = G = B 1724 * CLEAR_WORD1 = A 1725 */ 1726 assert(color->ui[0] == color->ui[1] && 1727 color->ui[0] == color->ui[2]); 1728 uc.ui[0] = color->ui[0]; 1729 uc.ui[1] = color->ui[3]; 1730 } else { 1731 util_pack_color_union(surface_format, &uc, color); 1732 } 1733 1734 memcpy(rtex->color_clear_value, &uc, 2 * sizeof(uint32_t)); 1735} 1736 1737void evergreen_do_fast_color_clear(struct r600_common_context *rctx, 1738 struct pipe_framebuffer_state *fb, 1739 struct r600_atom *fb_state, 1740 unsigned *buffers, ubyte *dirty_cbufs, 1741 const union pipe_color_union *color) 1742{ 1743 int i; 1744 1745 /* This function is broken in BE, so just disable this path for now */ 1746#if UTIL_ARCH_BIG_ENDIAN 1747 return; 1748#endif 1749 1750 if (rctx->render_cond) 1751 return; 1752 1753 for (i = 0; i < fb->nr_cbufs; i++) { 1754 struct r600_texture *tex; 1755 unsigned clear_bit = PIPE_CLEAR_COLOR0 << i; 1756 1757 if (!fb->cbufs[i]) 1758 continue; 1759 1760 /* if this colorbuffer is not being cleared */ 1761 if (!(*buffers & clear_bit)) 1762 continue; 1763 1764 tex = (struct r600_texture *)fb->cbufs[i]->texture; 1765 1766 /* the clear is allowed if all layers are bound */ 1767 if (fb->cbufs[i]->u.tex.first_layer != 0 || 1768 fb->cbufs[i]->u.tex.last_layer != util_max_layer(&tex->resource.b.b, 0)) { 1769 continue; 1770 } 1771 1772 /* cannot clear mipmapped textures */ 1773 if (fb->cbufs[i]->texture->last_level != 0) { 1774 continue; 1775 } 1776 1777 /* only supported on tiled surfaces */ 1778 if (tex->surface.is_linear) { 1779 continue; 1780 } 1781 1782 /* shared textures can't use fast clear without an explicit flush, 1783 * because there is no way to communicate the clear color among 1784 * all clients 1785 */ 1786 if (tex->resource.b.is_shared && 1787 !(tex->resource.external_usage & PIPE_HANDLE_USAGE_EXPLICIT_FLUSH)) 1788 continue; 1789 1790 /* Use a slow clear for small surfaces where the cost of 1791 * the eliminate pass can be higher than the benefit of fast 1792 * clear. AMDGPU-pro does this, but the numbers may differ. 1793 * 1794 * This helps on both dGPUs and APUs, even small ones. 1795 */ 1796 if (tex->resource.b.b.nr_samples <= 1 && 1797 tex->resource.b.b.width0 * tex->resource.b.b.height0 <= 300 * 300) 1798 continue; 1799 1800 { 1801 /* 128-bit formats are unusupported */ 1802 if (tex->surface.bpe > 8) { 1803 continue; 1804 } 1805 1806 /* ensure CMASK is enabled */ 1807 r600_texture_alloc_cmask_separate(rctx->screen, tex); 1808 if (tex->cmask.size == 0) { 1809 continue; 1810 } 1811 1812 /* Do the fast clear. */ 1813 rctx->clear_buffer(&rctx->b, &tex->cmask_buffer->b.b, 1814 tex->cmask.offset, tex->cmask.size, 0, 1815 R600_COHERENCY_CB_META); 1816 1817 bool need_compressed_update = !tex->dirty_level_mask; 1818 1819 tex->dirty_level_mask |= 1 << fb->cbufs[i]->u.tex.level; 1820 1821 if (need_compressed_update) 1822 p_atomic_inc(&rctx->screen->compressed_colortex_counter); 1823 } 1824 1825 evergreen_set_clear_color(tex, fb->cbufs[i]->format, color); 1826 1827 if (dirty_cbufs) 1828 *dirty_cbufs |= 1 << i; 1829 rctx->set_atom_dirty(rctx, fb_state, true); 1830 *buffers &= ~clear_bit; 1831 } 1832} 1833 1834static struct pipe_memory_object * 1835r600_memobj_from_handle(struct pipe_screen *screen, 1836 struct winsys_handle *whandle, 1837 bool dedicated) 1838{ 1839 struct r600_common_screen *rscreen = (struct r600_common_screen*)screen; 1840 struct r600_memory_object *memobj = CALLOC_STRUCT(r600_memory_object); 1841 struct pb_buffer *buf = NULL; 1842 1843 if (!memobj) 1844 return NULL; 1845 1846 buf = rscreen->ws->buffer_from_handle(rscreen->ws, whandle, 1847 rscreen->info.max_alignment); 1848 if (!buf) { 1849 free(memobj); 1850 return NULL; 1851 } 1852 1853 memobj->b.dedicated = dedicated; 1854 memobj->buf = buf; 1855 memobj->stride = whandle->stride; 1856 memobj->offset = whandle->offset; 1857 1858 return (struct pipe_memory_object *)memobj; 1859 1860} 1861 1862static void 1863r600_memobj_destroy(struct pipe_screen *screen, 1864 struct pipe_memory_object *_memobj) 1865{ 1866 struct r600_memory_object *memobj = (struct r600_memory_object *)_memobj; 1867 1868 pb_reference(&memobj->buf, NULL); 1869 free(memobj); 1870} 1871 1872static struct pipe_resource * 1873r600_texture_from_memobj(struct pipe_screen *screen, 1874 const struct pipe_resource *templ, 1875 struct pipe_memory_object *_memobj, 1876 uint64_t offset) 1877{ 1878 int r; 1879 struct r600_common_screen *rscreen = (struct r600_common_screen*)screen; 1880 struct r600_memory_object *memobj = (struct r600_memory_object *)_memobj; 1881 struct r600_texture *rtex; 1882 struct radeon_surf surface = {}; 1883 struct radeon_bo_metadata metadata = {}; 1884 enum radeon_surf_mode array_mode; 1885 bool is_scanout; 1886 struct pb_buffer *buf = NULL; 1887 1888 if (memobj->b.dedicated) { 1889 rscreen->ws->buffer_get_metadata(rscreen->ws, memobj->buf, &metadata, NULL); 1890 r600_surface_import_metadata(rscreen, &surface, &metadata, 1891 &array_mode, &is_scanout); 1892 } else { 1893 /** 1894 * The bo metadata is unset for un-dedicated images. So we fall 1895 * back to linear. See answer to question 5 of the 1896 * VK_KHX_external_memory spec for some details. 1897 * 1898 * It is possible that this case isn't going to work if the 1899 * surface pitch isn't correctly aligned by default. 1900 * 1901 * In order to support it correctly we require multi-image 1902 * metadata to be syncrhonized between radv and radeonsi. The 1903 * semantics of associating multiple image metadata to a memory 1904 * object on the vulkan export side are not concretely defined 1905 * either. 1906 * 1907 * All the use cases we are aware of at the moment for memory 1908 * objects use dedicated allocations. So lets keep the initial 1909 * implementation simple. 1910 * 1911 * A possible alternative is to attempt to reconstruct the 1912 * tiling information when the TexParameter TEXTURE_TILING_EXT 1913 * is set. 1914 */ 1915 array_mode = RADEON_SURF_MODE_LINEAR_ALIGNED; 1916 is_scanout = false; 1917 1918 } 1919 1920 r = r600_init_surface(rscreen, &surface, templ, 1921 array_mode, memobj->stride, 1922 offset, true, is_scanout, 1923 false); 1924 if (r) 1925 return NULL; 1926 1927 rtex = r600_texture_create_object(screen, templ, memobj->buf, &surface); 1928 if (!rtex) 1929 return NULL; 1930 1931 /* r600_texture_create_object doesn't increment refcount of 1932 * memobj->buf, so increment it here. 1933 */ 1934 pb_reference(&buf, memobj->buf); 1935 1936 rtex->resource.b.is_shared = true; 1937 rtex->resource.external_usage = PIPE_HANDLE_USAGE_FRAMEBUFFER_WRITE; 1938 1939 return &rtex->resource.b.b; 1940} 1941 1942void r600_init_screen_texture_functions(struct r600_common_screen *rscreen) 1943{ 1944 rscreen->b.resource_from_handle = r600_texture_from_handle; 1945 rscreen->b.resource_get_handle = r600_texture_get_handle; 1946 rscreen->b.resource_get_info = r600_texture_get_info; 1947 rscreen->b.resource_from_memobj = r600_texture_from_memobj; 1948 rscreen->b.memobj_create_from_handle = r600_memobj_from_handle; 1949 rscreen->b.memobj_destroy = r600_memobj_destroy; 1950} 1951 1952void r600_init_context_texture_functions(struct r600_common_context *rctx) 1953{ 1954 rctx->b.create_surface = r600_create_surface; 1955 rctx->b.surface_destroy = r600_surface_destroy; 1956 rctx->b.clear_texture = r600_clear_texture; 1957} 1958