1/* 2 * Copyright 2006 VMware, Inc. 3 * All Rights Reserved. 4 * 5 * Permission is hereby granted, free of charge, to any person obtaining a 6 * copy of this software and associated documentation files (the 7 * "Software"), to deal in the Software without restriction, including 8 * without limitation the rights to use, copy, modify, merge, publish, 9 * distribute, sublicense, and/or sell copies of the Software, and to 10 * permit persons to whom the Software is furnished to do so, subject to 11 * the following conditions: 12 * 13 * The above copyright notice and this permission notice (including the 14 * next paragraph) shall be included in all copies or substantial portions 15 * of the Software. 16 * 17 * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS 18 * OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF 19 * MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. 20 * IN NO EVENT SHALL VMWARE AND/OR ITS SUPPLIERS BE LIABLE FOR 21 * ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, 22 * TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION WITH THE 23 * SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE. 24 */ 25 26#include <GL/gl.h> 27#include <GL/internal/dri_interface.h> 28#include "drm-uapi/drm_fourcc.h" 29 30#include "intel_batchbuffer.h" 31#include "intel_image.h" 32#include "intel_mipmap_tree.h" 33#include "intel_tex.h" 34#include "intel_blit.h" 35#include "intel_fbo.h" 36 37#include "brw_blorp.h" 38#include "brw_context.h" 39#include "brw_state.h" 40 41#include "main/enums.h" 42#include "main/fbobject.h" 43#include "main/formats.h" 44#include "main/glformats.h" 45#include "main/texcompress_etc.h" 46#include "main/teximage.h" 47#include "main/streaming-load-memcpy.h" 48 49#include "util/format_srgb.h" 50 51#include "x86/common_x86_asm.h" 52 53#define FILE_DEBUG_FLAG DEBUG_MIPTREE 54 55static void *intel_miptree_map_raw(struct brw_context *brw, 56 struct intel_mipmap_tree *mt, 57 GLbitfield mode); 58 59static void intel_miptree_unmap_raw(struct intel_mipmap_tree *mt); 60 61static bool 62intel_miptree_supports_mcs(struct brw_context *brw, 63 const struct intel_mipmap_tree *mt) 64{ 65 const struct gen_device_info *devinfo = &brw->screen->devinfo; 66 67 /* MCS compression only applies to multisampled miptrees */ 68 if (mt->surf.samples <= 1) 69 return false; 70 71 /* Prior to Gen7, all MSAA surfaces used IMS layout. */ 72 if (devinfo->gen < 7) 73 return false; 74 75 /* See isl_surf_get_mcs_surf for details. */ 76 if (mt->surf.samples == 16 && mt->surf.logical_level0_px.width > 8192) 77 return false; 78 79 /* In Gen7, IMS layout is only used for depth and stencil buffers. */ 80 switch (_mesa_get_format_base_format(mt->format)) { 81 case GL_DEPTH_COMPONENT: 82 case GL_STENCIL_INDEX: 83 case GL_DEPTH_STENCIL: 84 return false; 85 default: 86 /* From the Ivy Bridge PRM, Vol4 Part1 p77 ("MCS Enable"): 87 * 88 * This field must be set to 0 for all SINT MSRTs when all RT channels 89 * are not written 90 * 91 * In practice this means that we have to disable MCS for all signed 92 * integer MSAA buffers. The alternative, to disable MCS only when one 93 * of the render target channels is disabled, is impractical because it 94 * would require converting between CMS and UMS MSAA layouts on the fly, 95 * which is expensive. 96 */ 97 if (devinfo->gen == 7 && _mesa_get_format_datatype(mt->format) == GL_INT) { 98 return false; 99 } else { 100 return true; 101 } 102 } 103} 104 105static bool 106intel_tiling_supports_ccs(const struct brw_context *brw, 107 enum isl_tiling tiling) 108{ 109 const struct gen_device_info *devinfo = &brw->screen->devinfo; 110 111 /* From the Ivy Bridge PRM, Vol2 Part1 11.7 "MCS Buffer for Render 112 * Target(s)", beneath the "Fast Color Clear" bullet (p326): 113 * 114 * - Support is limited to tiled render targets. 115 * 116 * Gen9 changes the restriction to Y-tile only. 117 */ 118 if (devinfo->gen >= 9) 119 return tiling == ISL_TILING_Y0; 120 else if (devinfo->gen >= 7) 121 return tiling != ISL_TILING_LINEAR; 122 else 123 return false; 124} 125 126/** 127 * For a single-sampled render target ("non-MSRT"), determine if an MCS buffer 128 * can be used. This doesn't (and should not) inspect any of the properties of 129 * the miptree's BO. 130 * 131 * From the Ivy Bridge PRM, Vol2 Part1 11.7 "MCS Buffer for Render Target(s)", 132 * beneath the "Fast Color Clear" bullet (p326): 133 * 134 * - Support is for non-mip-mapped and non-array surface types only. 135 * 136 * And then later, on p327: 137 * 138 * - MCS buffer for non-MSRT is supported only for RT formats 32bpp, 139 * 64bpp, and 128bpp. 140 * 141 * From the Skylake documentation, it is made clear that X-tiling is no longer 142 * supported: 143 * 144 * - MCS and Lossless compression is supported for TiledY/TileYs/TileYf 145 * non-MSRTs only. 146 */ 147static bool 148intel_miptree_supports_ccs(struct brw_context *brw, 149 const struct intel_mipmap_tree *mt) 150{ 151 const struct gen_device_info *devinfo = &brw->screen->devinfo; 152 153 /* MCS support does not exist prior to Gen7 */ 154 if (devinfo->gen < 7) 155 return false; 156 157 /* This function applies only to non-multisampled render targets. */ 158 if (mt->surf.samples > 1) 159 return false; 160 161 /* MCS is only supported for color buffers */ 162 if (!_mesa_is_format_color_format(mt->format)) 163 return false; 164 165 if (mt->cpp != 4 && mt->cpp != 8 && mt->cpp != 16) 166 return false; 167 168 const bool mip_mapped = mt->first_level != 0 || mt->last_level != 0; 169 const bool arrayed = mt->surf.logical_level0_px.array_len > 1 || 170 mt->surf.logical_level0_px.depth > 1; 171 172 if (arrayed) { 173 /* Multisample surfaces with the CMS layout are not layered surfaces, 174 * yet still have physical_depth0 > 1. Assert that we don't 175 * accidentally reject a multisampled surface here. We should have 176 * rejected it earlier by explicitly checking the sample count. 177 */ 178 assert(mt->surf.samples == 1); 179 } 180 181 /* Handle the hardware restrictions... 182 * 183 * All GENs have the following restriction: "MCS buffer for non-MSRT is 184 * supported only for RT formats 32bpp, 64bpp, and 128bpp." 185 * 186 * From the HSW PRM Volume 7: 3D-Media-GPGPU, page 652: (Color Clear of 187 * Non-MultiSampler Render Target Restrictions) Support is for 188 * non-mip-mapped and non-array surface types only. 189 * 190 * From the BDW PRM Volume 7: 3D-Media-GPGPU, page 649: (Color Clear of 191 * Non-MultiSampler Render Target Restriction). Mip-mapped and arrayed 192 * surfaces are supported with MCS buffer layout with these alignments in 193 * the RT space: Horizontal Alignment = 256 and Vertical Alignment = 128. 194 * 195 * From the SKL PRM Volume 7: 3D-Media-GPGPU, page 632: (Color Clear of 196 * Non-MultiSampler Render Target Restriction). Mip-mapped and arrayed 197 * surfaces are supported with MCS buffer layout with these alignments in 198 * the RT space: Horizontal Alignment = 128 and Vertical Alignment = 64. 199 */ 200 if (devinfo->gen < 8 && (mip_mapped || arrayed)) 201 return false; 202 203 /* The PRM doesn't say this explicitly, but fast-clears don't appear to 204 * work for 3D textures until gen9 where the layout of 3D textures changes 205 * to match 2D array textures. 206 */ 207 if (devinfo->gen <= 8 && mt->surf.dim != ISL_SURF_DIM_2D) 208 return false; 209 210 /* There's no point in using an MCS buffer if the surface isn't in a 211 * renderable format. 212 */ 213 if (!brw->mesa_format_supports_render[mt->format]) 214 return false; 215 216 return true; 217} 218 219static bool 220intel_tiling_supports_hiz(const struct brw_context *brw, 221 enum isl_tiling tiling) 222{ 223 const struct gen_device_info *devinfo = &brw->screen->devinfo; 224 225 if (devinfo->gen < 6) 226 return false; 227 228 return tiling == ISL_TILING_Y0; 229} 230 231static bool 232intel_miptree_supports_hiz(const struct brw_context *brw, 233 const struct intel_mipmap_tree *mt) 234{ 235 if (!brw->has_hiz) 236 return false; 237 238 switch (mt->format) { 239 case MESA_FORMAT_Z_FLOAT32: 240 case MESA_FORMAT_Z32_FLOAT_S8X24_UINT: 241 case MESA_FORMAT_Z24_UNORM_X8_UINT: 242 case MESA_FORMAT_Z24_UNORM_S8_UINT: 243 case MESA_FORMAT_Z_UNORM16: 244 return true; 245 default: 246 return false; 247 } 248} 249 250/** 251 * Return true if the format that will be used to access the miptree is 252 * CCS_E-compatible with the miptree's linear/non-sRGB format. 253 * 254 * Why use the linear format? Well, although the miptree may be specified with 255 * an sRGB format, the usage of that color space/format can be toggled. Since 256 * our HW tends to support more linear formats than sRGB ones, we use this 257 * format variant for check for CCS_E compatibility. 258 */ 259static bool 260format_ccs_e_compat_with_miptree(const struct gen_device_info *devinfo, 261 const struct intel_mipmap_tree *mt, 262 enum isl_format access_format) 263{ 264 assert(mt->aux_usage == ISL_AUX_USAGE_CCS_E); 265 266 mesa_format linear_format = _mesa_get_srgb_format_linear(mt->format); 267 enum isl_format isl_format = brw_isl_format_for_mesa_format(linear_format); 268 return isl_formats_are_ccs_e_compatible(devinfo, isl_format, access_format); 269} 270 271static bool 272intel_miptree_supports_ccs_e(struct brw_context *brw, 273 const struct intel_mipmap_tree *mt) 274{ 275 const struct gen_device_info *devinfo = &brw->screen->devinfo; 276 277 if (devinfo->gen < 9) 278 return false; 279 280 /* For now compression is only enabled for integer formats even though 281 * there exist supported floating point formats also. This is a heuristic 282 * decision based on current public benchmarks. In none of the cases these 283 * formats provided any improvement but a few cases were seen to regress. 284 * Hence these are left to to be enabled in the future when they are known 285 * to improve things. 286 */ 287 if (_mesa_get_format_datatype(mt->format) == GL_FLOAT) 288 return false; 289 290 if (!intel_miptree_supports_ccs(brw, mt)) 291 return false; 292 293 /* Many window system buffers are sRGB even if they are never rendered as 294 * sRGB. For those, we want CCS_E for when sRGBEncode is false. When the 295 * surface is used as sRGB, we fall back to CCS_D. 296 */ 297 mesa_format linear_format = _mesa_get_srgb_format_linear(mt->format); 298 enum isl_format isl_format = brw_isl_format_for_mesa_format(linear_format); 299 return isl_format_supports_ccs_e(&brw->screen->devinfo, isl_format); 300} 301 302/** 303 * Determine depth format corresponding to a depth+stencil format, 304 * for separate stencil. 305 */ 306mesa_format 307intel_depth_format_for_depthstencil_format(mesa_format format) { 308 switch (format) { 309 case MESA_FORMAT_Z24_UNORM_S8_UINT: 310 return MESA_FORMAT_Z24_UNORM_X8_UINT; 311 case MESA_FORMAT_Z32_FLOAT_S8X24_UINT: 312 return MESA_FORMAT_Z_FLOAT32; 313 default: 314 return format; 315 } 316} 317 318static bool 319create_mapping_table(GLenum target, unsigned first_level, unsigned last_level, 320 unsigned depth0, struct intel_mipmap_level *table) 321{ 322 for (unsigned level = first_level; level <= last_level; level++) { 323 const unsigned d = 324 target == GL_TEXTURE_3D ? minify(depth0, level) : depth0; 325 326 table[level].slice = calloc(d, sizeof(*table[0].slice)); 327 if (!table[level].slice) 328 goto unwind; 329 } 330 331 return true; 332 333unwind: 334 for (unsigned level = first_level; level <= last_level; level++) 335 free(table[level].slice); 336 337 return false; 338} 339 340static bool 341needs_separate_stencil(const struct brw_context *brw, 342 struct intel_mipmap_tree *mt, 343 mesa_format format) 344{ 345 const struct gen_device_info *devinfo = &brw->screen->devinfo; 346 347 if (_mesa_get_format_base_format(format) != GL_DEPTH_STENCIL) 348 return false; 349 350 if (devinfo->must_use_separate_stencil) 351 return true; 352 353 return brw->has_separate_stencil && 354 intel_miptree_supports_hiz(brw, mt); 355} 356 357/** 358 * Choose the aux usage for this miptree. This function must be called fairly 359 * late in the miptree create process after we have a tiling. 360 */ 361static void 362intel_miptree_choose_aux_usage(struct brw_context *brw, 363 struct intel_mipmap_tree *mt) 364{ 365 assert(mt->aux_usage == ISL_AUX_USAGE_NONE); 366 367 if (intel_miptree_supports_mcs(brw, mt)) { 368 assert(mt->surf.msaa_layout == ISL_MSAA_LAYOUT_ARRAY); 369 mt->aux_usage = ISL_AUX_USAGE_MCS; 370 } else if (intel_tiling_supports_ccs(brw, mt->surf.tiling) && 371 intel_miptree_supports_ccs(brw, mt)) { 372 if (!unlikely(INTEL_DEBUG & DEBUG_NO_RBC) && 373 intel_miptree_supports_ccs_e(brw, mt)) { 374 mt->aux_usage = ISL_AUX_USAGE_CCS_E; 375 } else { 376 mt->aux_usage = ISL_AUX_USAGE_CCS_D; 377 } 378 } else if (intel_tiling_supports_hiz(brw, mt->surf.tiling) && 379 intel_miptree_supports_hiz(brw, mt)) { 380 mt->aux_usage = ISL_AUX_USAGE_HIZ; 381 } 382 383 /* We can do fast-clear on all auxiliary surface types that are 384 * allocated through the normal texture creation paths. 385 */ 386 if (mt->aux_usage != ISL_AUX_USAGE_NONE) 387 mt->supports_fast_clear = true; 388} 389 390 391/** 392 * Choose an appropriate uncompressed format for a requested 393 * compressed format, if unsupported. 394 */ 395mesa_format 396intel_lower_compressed_format(struct brw_context *brw, mesa_format format) 397{ 398 const struct gen_device_info *devinfo = &brw->screen->devinfo; 399 400 /* No need to lower ETC formats on these platforms, 401 * they are supported natively. 402 */ 403 if (devinfo->gen >= 8 || devinfo->is_baytrail) 404 return format; 405 406 switch (format) { 407 case MESA_FORMAT_ETC1_RGB8: 408 return MESA_FORMAT_R8G8B8X8_UNORM; 409 case MESA_FORMAT_ETC2_RGB8: 410 return MESA_FORMAT_R8G8B8X8_UNORM; 411 case MESA_FORMAT_ETC2_SRGB8: 412 case MESA_FORMAT_ETC2_SRGB8_ALPHA8_EAC: 413 case MESA_FORMAT_ETC2_SRGB8_PUNCHTHROUGH_ALPHA1: 414 return MESA_FORMAT_B8G8R8A8_SRGB; 415 case MESA_FORMAT_ETC2_RGBA8_EAC: 416 case MESA_FORMAT_ETC2_RGB8_PUNCHTHROUGH_ALPHA1: 417 return MESA_FORMAT_R8G8B8A8_UNORM; 418 case MESA_FORMAT_ETC2_R11_EAC: 419 return MESA_FORMAT_R_UNORM16; 420 case MESA_FORMAT_ETC2_SIGNED_R11_EAC: 421 return MESA_FORMAT_R_SNORM16; 422 case MESA_FORMAT_ETC2_RG11_EAC: 423 return MESA_FORMAT_R16G16_UNORM; 424 case MESA_FORMAT_ETC2_SIGNED_RG11_EAC: 425 return MESA_FORMAT_R16G16_SNORM; 426 default: 427 /* Non ETC1 / ETC2 format */ 428 return format; 429 } 430} 431 432unsigned 433brw_get_num_logical_layers(const struct intel_mipmap_tree *mt, unsigned level) 434{ 435 if (mt->surf.dim == ISL_SURF_DIM_3D) 436 return minify(mt->surf.logical_level0_px.depth, level); 437 else 438 return mt->surf.logical_level0_px.array_len; 439} 440 441UNUSED static unsigned 442get_num_phys_layers(const struct isl_surf *surf, unsigned level) 443{ 444 /* In case of physical dimensions one needs to consider also the layout. 445 * See isl_calc_phys_level0_extent_sa(). 446 */ 447 if (surf->dim != ISL_SURF_DIM_3D) 448 return surf->phys_level0_sa.array_len; 449 450 if (surf->dim_layout == ISL_DIM_LAYOUT_GEN4_2D) 451 return minify(surf->phys_level0_sa.array_len, level); 452 453 return minify(surf->phys_level0_sa.depth, level); 454} 455 456/** \brief Assert that the level and layer are valid for the miptree. */ 457void 458intel_miptree_check_level_layer(const struct intel_mipmap_tree *mt, 459 uint32_t level, 460 uint32_t layer) 461{ 462 (void) mt; 463 (void) level; 464 (void) layer; 465 466 assert(level >= mt->first_level); 467 assert(level <= mt->last_level); 468 assert(layer < get_num_phys_layers(&mt->surf, level)); 469} 470 471static enum isl_aux_state ** 472create_aux_state_map(struct intel_mipmap_tree *mt, 473 enum isl_aux_state initial) 474{ 475 const uint32_t levels = mt->last_level + 1; 476 477 uint32_t total_slices = 0; 478 for (uint32_t level = 0; level < levels; level++) 479 total_slices += brw_get_num_logical_layers(mt, level); 480 481 const size_t per_level_array_size = levels * sizeof(enum isl_aux_state *); 482 483 /* We're going to allocate a single chunk of data for both the per-level 484 * reference array and the arrays of aux_state. This makes cleanup 485 * significantly easier. 486 */ 487 const size_t total_size = per_level_array_size + 488 total_slices * sizeof(enum isl_aux_state); 489 void *data = malloc(total_size); 490 if (data == NULL) 491 return NULL; 492 493 enum isl_aux_state **per_level_arr = data; 494 enum isl_aux_state *s = data + per_level_array_size; 495 for (uint32_t level = 0; level < levels; level++) { 496 per_level_arr[level] = s; 497 const unsigned level_layers = brw_get_num_logical_layers(mt, level); 498 for (uint32_t a = 0; a < level_layers; a++) 499 *(s++) = initial; 500 } 501 assert((void *)s == data + total_size); 502 503 return per_level_arr; 504} 505 506static void 507free_aux_state_map(enum isl_aux_state **state) 508{ 509 free(state); 510} 511 512static bool 513need_to_retile_as_linear(struct brw_context *brw, unsigned blt_pitch, 514 enum isl_tiling tiling, unsigned samples) 515{ 516 if (samples > 1) 517 return false; 518 519 if (tiling == ISL_TILING_LINEAR) 520 return false; 521 522 if (blt_pitch >= 32768) { 523 perf_debug("blt pitch %u too large to blit, falling back to untiled", 524 blt_pitch); 525 return true; 526 } 527 528 return false; 529} 530 531static bool 532need_to_retile_as_x(const struct brw_context *brw, uint64_t size, 533 enum isl_tiling tiling) 534{ 535 const struct gen_device_info *devinfo = &brw->screen->devinfo; 536 537 /* If the BO is too large to fit in the aperture, we need to use the 538 * BLT engine to support it. Prior to Sandybridge, the BLT paths can't 539 * handle Y-tiling, so we need to fall back to X. 540 */ 541 if (devinfo->gen < 6 && size >= brw->max_gtt_map_object_size && 542 tiling == ISL_TILING_Y0) 543 return true; 544 545 return false; 546} 547 548static struct intel_mipmap_tree * 549make_surface(struct brw_context *brw, GLenum target, mesa_format format, 550 unsigned first_level, unsigned last_level, 551 unsigned width0, unsigned height0, unsigned depth0, 552 unsigned num_samples, isl_tiling_flags_t tiling_flags, 553 isl_surf_usage_flags_t isl_usage_flags, uint32_t alloc_flags, 554 unsigned row_pitch_B, struct brw_bo *bo) 555{ 556 struct intel_mipmap_tree *mt = calloc(sizeof(*mt), 1); 557 if (!mt) 558 return NULL; 559 560 if (!create_mapping_table(target, first_level, last_level, depth0, 561 mt->level)) { 562 free(mt); 563 return NULL; 564 } 565 566 mt->refcount = 1; 567 568 if (target == GL_TEXTURE_CUBE_MAP || 569 target == GL_TEXTURE_CUBE_MAP_ARRAY) 570 isl_usage_flags |= ISL_SURF_USAGE_CUBE_BIT; 571 572 DBG("%s: %s %s %ux %u:%u:%u %d..%d <-- %p\n", 573 __func__, 574 _mesa_enum_to_string(target), 575 _mesa_get_format_name(format), 576 num_samples, width0, height0, depth0, 577 first_level, last_level, mt); 578 579 struct isl_surf_init_info init_info = { 580 .dim = get_isl_surf_dim(target), 581 .format = translate_tex_format(brw, format, false), 582 .width = width0, 583 .height = height0, 584 .depth = target == GL_TEXTURE_3D ? depth0 : 1, 585 .levels = last_level - first_level + 1, 586 .array_len = target == GL_TEXTURE_3D ? 1 : depth0, 587 .samples = num_samples, 588 .row_pitch_B = row_pitch_B, 589 .usage = isl_usage_flags, 590 .tiling_flags = tiling_flags, 591 }; 592 593 if (!isl_surf_init_s(&brw->isl_dev, &mt->surf, &init_info)) 594 goto fail; 595 596 /* Depth surfaces are always Y-tiled and stencil is always W-tiled, although 597 * on gen7 platforms we also need to create Y-tiled copies of stencil for 598 * texturing since the hardware can't sample from W-tiled surfaces. For 599 * everything else, check for corner cases needing special treatment. 600 */ 601 bool is_depth_stencil = 602 mt->surf.usage & (ISL_SURF_USAGE_STENCIL_BIT | ISL_SURF_USAGE_DEPTH_BIT); 603 if (!is_depth_stencil) { 604 if (need_to_retile_as_linear(brw, intel_miptree_blt_pitch(mt), 605 mt->surf.tiling, mt->surf.samples)) { 606 init_info.tiling_flags = 1u << ISL_TILING_LINEAR; 607 if (!isl_surf_init_s(&brw->isl_dev, &mt->surf, &init_info)) 608 goto fail; 609 } else if (need_to_retile_as_x(brw, mt->surf.size_B, mt->surf.tiling)) { 610 init_info.tiling_flags = 1u << ISL_TILING_X; 611 if (!isl_surf_init_s(&brw->isl_dev, &mt->surf, &init_info)) 612 goto fail; 613 } 614 } 615 616 /* In case of linear the buffer gets padded by fixed 64 bytes and therefore 617 * the size may not be multiple of row_pitch. 618 * See isl_apply_surface_padding(). 619 */ 620 if (mt->surf.tiling != ISL_TILING_LINEAR) 621 assert(mt->surf.size_B % mt->surf.row_pitch_B == 0); 622 623 if (!bo) { 624 mt->bo = brw_bo_alloc_tiled(brw->bufmgr, "isl-miptree", 625 mt->surf.size_B, 626 BRW_MEMZONE_OTHER, 627 isl_tiling_to_i915_tiling( 628 mt->surf.tiling), 629 mt->surf.row_pitch_B, alloc_flags); 630 if (!mt->bo) 631 goto fail; 632 } else { 633 mt->bo = bo; 634 } 635 636 mt->first_level = first_level; 637 mt->last_level = last_level; 638 mt->target = target; 639 mt->format = format; 640 mt->aux_state = NULL; 641 mt->cpp = isl_format_get_layout(mt->surf.format)->bpb / 8; 642 mt->compressed = _mesa_is_format_compressed(format); 643 mt->drm_modifier = DRM_FORMAT_MOD_INVALID; 644 645 return mt; 646 647fail: 648 intel_miptree_release(&mt); 649 return NULL; 650} 651 652/* Return the usual surface usage flags for the given format. */ 653static isl_surf_usage_flags_t 654mt_surf_usage(mesa_format format) 655{ 656 switch(_mesa_get_format_base_format(format)) { 657 case GL_DEPTH_COMPONENT: 658 return ISL_SURF_USAGE_DEPTH_BIT | ISL_SURF_USAGE_TEXTURE_BIT; 659 case GL_DEPTH_STENCIL: 660 return ISL_SURF_USAGE_DEPTH_BIT | ISL_SURF_USAGE_STENCIL_BIT | 661 ISL_SURF_USAGE_TEXTURE_BIT; 662 case GL_STENCIL_INDEX: 663 return ISL_SURF_USAGE_STENCIL_BIT | ISL_SURF_USAGE_TEXTURE_BIT; 664 default: 665 return ISL_SURF_USAGE_RENDER_TARGET_BIT | ISL_SURF_USAGE_TEXTURE_BIT; 666 } 667} 668 669static struct intel_mipmap_tree * 670miptree_create(struct brw_context *brw, 671 GLenum target, 672 mesa_format format, 673 GLuint first_level, 674 GLuint last_level, 675 GLuint width0, 676 GLuint height0, 677 GLuint depth0, 678 GLuint num_samples, 679 enum intel_miptree_create_flags flags) 680{ 681 const struct gen_device_info *devinfo = &brw->screen->devinfo; 682 const uint32_t alloc_flags = 683 (flags & MIPTREE_CREATE_BUSY || num_samples > 1) ? BO_ALLOC_BUSY : 0; 684 isl_tiling_flags_t tiling_flags = ISL_TILING_ANY_MASK; 685 686 /* TODO: This used to be because there wasn't BLORP to handle Y-tiling. */ 687 if (devinfo->gen < 6 && _mesa_is_format_color_format(format)) 688 tiling_flags &= ~ISL_TILING_Y0_BIT; 689 690 mesa_format mt_fmt = format; 691 if (!_mesa_is_format_color_format(format) && devinfo->gen >= 6) { 692 /* Fix up the Z miptree format for how we're splitting out separate 693 * stencil. Gen7 expects there to be no stencil bits in its depth buffer. 694 */ 695 mt_fmt = intel_depth_format_for_depthstencil_format(format); 696 } 697 698 struct intel_mipmap_tree *mt = 699 make_surface(brw, target, mt_fmt, first_level, last_level, 700 width0, height0, depth0, num_samples, 701 tiling_flags, mt_surf_usage(mt_fmt), 702 alloc_flags, 0, NULL); 703 704 if (mt == NULL) 705 return NULL; 706 707 if (intel_miptree_needs_fake_etc(brw, mt)) { 708 mesa_format decomp_format = intel_lower_compressed_format(brw, format); 709 mt->shadow_mt = make_surface(brw, target, decomp_format, first_level, 710 last_level, width0, height0, depth0, 711 num_samples, tiling_flags, 712 mt_surf_usage(decomp_format), 713 alloc_flags, 0, NULL); 714 715 if (mt->shadow_mt == NULL) { 716 intel_miptree_release(&mt); 717 return NULL; 718 } 719 } 720 721 if (needs_separate_stencil(brw, mt, format)) { 722 mt->stencil_mt = 723 make_surface(brw, target, MESA_FORMAT_S_UINT8, first_level, last_level, 724 width0, height0, depth0, num_samples, 725 ISL_TILING_W_BIT, mt_surf_usage(MESA_FORMAT_S_UINT8), 726 alloc_flags, 0, NULL); 727 if (mt->stencil_mt == NULL) { 728 intel_miptree_release(&mt); 729 return NULL; 730 } 731 } 732 733 if (!(flags & MIPTREE_CREATE_NO_AUX)) 734 intel_miptree_choose_aux_usage(brw, mt); 735 736 return mt; 737} 738 739struct intel_mipmap_tree * 740intel_miptree_create(struct brw_context *brw, 741 GLenum target, 742 mesa_format format, 743 GLuint first_level, 744 GLuint last_level, 745 GLuint width0, 746 GLuint height0, 747 GLuint depth0, 748 GLuint num_samples, 749 enum intel_miptree_create_flags flags) 750{ 751 assert(num_samples > 0); 752 753 struct intel_mipmap_tree *mt = miptree_create( 754 brw, target, format, 755 first_level, last_level, 756 width0, height0, depth0, num_samples, 757 flags); 758 if (!mt) 759 return NULL; 760 761 mt->offset = 0; 762 763 /* Create the auxiliary surface up-front. CCS_D, on the other hand, can only 764 * compress clear color so we wait until an actual fast-clear to allocate 765 * it. 766 */ 767 if (mt->aux_usage != ISL_AUX_USAGE_CCS_D && 768 !intel_miptree_alloc_aux(brw, mt)) { 769 intel_miptree_release(&mt); 770 return NULL; 771 } 772 773 return mt; 774} 775 776struct intel_mipmap_tree * 777intel_miptree_create_for_bo(struct brw_context *brw, 778 struct brw_bo *bo, 779 mesa_format format, 780 uint32_t offset, 781 uint32_t width, 782 uint32_t height, 783 uint32_t depth, 784 int pitch, 785 enum isl_tiling tiling, 786 enum intel_miptree_create_flags flags) 787{ 788 const struct gen_device_info *devinfo = &brw->screen->devinfo; 789 struct intel_mipmap_tree *mt; 790 const GLenum target = depth > 1 ? GL_TEXTURE_2D_ARRAY : GL_TEXTURE_2D; 791 const GLenum base_format = _mesa_get_format_base_format(format); 792 793 if ((base_format == GL_DEPTH_COMPONENT || 794 base_format == GL_DEPTH_STENCIL)) { 795 const mesa_format mt_fmt = (devinfo->gen < 6) ? format : 796 intel_depth_format_for_depthstencil_format(format); 797 mt = make_surface(brw, target, mt_fmt, 798 0, 0, width, height, depth, 1, ISL_TILING_Y0_BIT, 799 mt_surf_usage(mt_fmt), 800 0, pitch, bo); 801 if (!mt) 802 return NULL; 803 804 brw_bo_reference(bo); 805 806 if (!(flags & MIPTREE_CREATE_NO_AUX)) 807 intel_miptree_choose_aux_usage(brw, mt); 808 809 return mt; 810 } else if (format == MESA_FORMAT_S_UINT8) { 811 mt = make_surface(brw, target, MESA_FORMAT_S_UINT8, 812 0, 0, width, height, depth, 1, 813 ISL_TILING_W_BIT, 814 mt_surf_usage(MESA_FORMAT_S_UINT8), 815 0, pitch, bo); 816 if (!mt) 817 return NULL; 818 819 assert(bo->size >= mt->surf.size_B); 820 821 brw_bo_reference(bo); 822 return mt; 823 } 824 825 /* Nothing will be able to use this miptree with the BO if the offset isn't 826 * aligned. 827 */ 828 if (tiling != ISL_TILING_LINEAR) 829 assert(offset % 4096 == 0); 830 831 /* miptrees can't handle negative pitch. If you need flipping of images, 832 * that's outside of the scope of the mt. 833 */ 834 assert(pitch >= 0); 835 836 mt = make_surface(brw, target, format, 837 0, 0, width, height, depth, 1, 838 1lu << tiling, 839 mt_surf_usage(format), 840 0, pitch, bo); 841 if (!mt) 842 return NULL; 843 844 brw_bo_reference(bo); 845 mt->bo = bo; 846 mt->offset = offset; 847 848 if (!(flags & MIPTREE_CREATE_NO_AUX)) { 849 intel_miptree_choose_aux_usage(brw, mt); 850 851 /* Create the auxiliary surface up-front. CCS_D, on the other hand, can 852 * only compress clear color so we wait until an actual fast-clear to 853 * allocate it. 854 */ 855 if (mt->aux_usage != ISL_AUX_USAGE_CCS_D && 856 !intel_miptree_alloc_aux(brw, mt)) { 857 intel_miptree_release(&mt); 858 return NULL; 859 } 860 } 861 862 return mt; 863} 864 865static struct intel_mipmap_tree * 866miptree_create_for_planar_image(struct brw_context *brw, 867 __DRIimage *image, GLenum target, 868 enum isl_tiling tiling) 869{ 870 const struct intel_image_format *f = image->planar_format; 871 struct intel_mipmap_tree *planar_mt = NULL; 872 873 for (int i = 0; i < f->nplanes; i++) { 874 const int index = f->planes[i].buffer_index; 875 const uint32_t dri_format = f->planes[i].dri_format; 876 const mesa_format format = driImageFormatToGLFormat(dri_format); 877 const uint32_t width = image->width >> f->planes[i].width_shift; 878 const uint32_t height = image->height >> f->planes[i].height_shift; 879 880 /* Disable creation of the texture's aux buffers because the driver 881 * exposes no EGL API to manage them. That is, there is no API for 882 * resolving the aux buffer's content to the main buffer nor for 883 * invalidating the aux buffer's content. 884 */ 885 struct intel_mipmap_tree *mt = 886 intel_miptree_create_for_bo(brw, image->bo, format, 887 image->offsets[index], 888 width, height, 1, 889 image->strides[index], 890 tiling, 891 MIPTREE_CREATE_NO_AUX); 892 if (mt == NULL) { 893 intel_miptree_release(&planar_mt); 894 return NULL; 895 } 896 897 mt->target = target; 898 899 if (i == 0) 900 planar_mt = mt; 901 else 902 planar_mt->plane[i - 1] = mt; 903 } 904 905 planar_mt->drm_modifier = image->modifier; 906 907 return planar_mt; 908} 909 910static bool 911create_ccs_buf_for_image(struct brw_context *brw, 912 __DRIimage *image, 913 struct intel_mipmap_tree *mt, 914 enum isl_aux_state initial_state) 915{ 916 struct isl_surf temp_ccs_surf; 917 918 /* CCS is only supported for very simple miptrees */ 919 assert(image->aux_offset != 0 && image->aux_pitch != 0); 920 assert(image->tile_x == 0 && image->tile_y == 0); 921 assert(mt->surf.samples == 1); 922 assert(mt->surf.levels == 1); 923 assert(mt->surf.logical_level0_px.depth == 1); 924 assert(mt->surf.logical_level0_px.array_len == 1); 925 assert(mt->first_level == 0); 926 assert(mt->last_level == 0); 927 928 /* We shouldn't already have a CCS */ 929 assert(!mt->aux_buf); 930 931 if (!isl_surf_get_ccs_surf(&brw->isl_dev, &mt->surf, &temp_ccs_surf, 932 image->aux_pitch)) 933 return false; 934 935 assert(image->aux_offset < image->bo->size); 936 assert(temp_ccs_surf.size_B <= image->bo->size - image->aux_offset); 937 938 mt->aux_buf = calloc(sizeof(*mt->aux_buf), 1); 939 if (mt->aux_buf == NULL) 940 return false; 941 942 mt->aux_state = create_aux_state_map(mt, initial_state); 943 if (!mt->aux_state) { 944 free(mt->aux_buf); 945 mt->aux_buf = NULL; 946 return false; 947 } 948 949 /* On gen10+ we start using an extra space in the aux buffer to store the 950 * indirect clear color. However, if we imported an image from the window 951 * system with CCS, we don't have the extra space at the end of the aux 952 * buffer. So create a new bo here that will store that clear color. 953 */ 954 if (brw->isl_dev.ss.clear_color_state_size > 0) { 955 mt->aux_buf->clear_color_bo = 956 brw_bo_alloc_tiled(brw->bufmgr, "clear_color_bo", 957 brw->isl_dev.ss.clear_color_state_size, 958 BRW_MEMZONE_OTHER, I915_TILING_NONE, 0, 959 BO_ALLOC_ZEROED); 960 if (!mt->aux_buf->clear_color_bo) { 961 free(mt->aux_buf); 962 mt->aux_buf = NULL; 963 return false; 964 } 965 } 966 967 mt->aux_buf->bo = image->bo; 968 brw_bo_reference(image->bo); 969 970 mt->aux_buf->offset = image->aux_offset; 971 mt->aux_buf->surf = temp_ccs_surf; 972 973 return true; 974} 975 976struct intel_mipmap_tree * 977intel_miptree_create_for_dri_image(struct brw_context *brw, 978 __DRIimage *image, GLenum target, 979 mesa_format format, 980 bool allow_internal_aux) 981{ 982 uint32_t bo_tiling, bo_swizzle; 983 brw_bo_get_tiling(image->bo, &bo_tiling, &bo_swizzle); 984 985 const struct isl_drm_modifier_info *mod_info = 986 isl_drm_modifier_get_info(image->modifier); 987 988 const enum isl_tiling tiling = 989 mod_info ? mod_info->tiling : isl_tiling_from_i915_tiling(bo_tiling); 990 991 if (image->planar_format && image->planar_format->nplanes > 1) 992 return miptree_create_for_planar_image(brw, image, target, tiling); 993 994 if (image->planar_format) 995 assert(image->planar_format->planes[0].dri_format == image->dri_format); 996 997 if (!brw->ctx.TextureFormatSupported[format]) { 998 /* The texture storage paths in core Mesa detect if the driver does not 999 * support the user-requested format, and then searches for a 1000 * fallback format. The DRIimage code bypasses core Mesa, though. So we 1001 * do the fallbacks here for important formats. 1002 * 1003 * We must support DRM_FOURCC_XBGR8888 textures because the Android 1004 * framework produces HAL_PIXEL_FORMAT_RGBX8888 winsys surfaces, which 1005 * the Chrome OS compositor consumes as dma_buf EGLImages. 1006 */ 1007 format = _mesa_format_fallback_rgbx_to_rgba(format); 1008 } 1009 1010 if (!brw->ctx.TextureFormatSupported[format]) 1011 return NULL; 1012 1013 enum intel_miptree_create_flags mt_create_flags = 0; 1014 1015 /* If this image comes in from a window system, we have different 1016 * requirements than if it comes in via an EGL import operation. Window 1017 * system images can use any form of auxiliary compression we wish because 1018 * they get "flushed" before being handed off to the window system and we 1019 * have the opportunity to do resolves. Non window-system images, on the 1020 * other hand, have no resolve point so we can't have aux without a 1021 * modifier. 1022 */ 1023 if (!allow_internal_aux) 1024 mt_create_flags |= MIPTREE_CREATE_NO_AUX; 1025 1026 /* If we have a modifier which specifies aux, don't create one yet */ 1027 if (mod_info && mod_info->aux_usage != ISL_AUX_USAGE_NONE) 1028 mt_create_flags |= MIPTREE_CREATE_NO_AUX; 1029 1030 /* Disable creation of the texture's aux buffers because the driver exposes 1031 * no EGL API to manage them. That is, there is no API for resolving the aux 1032 * buffer's content to the main buffer nor for invalidating the aux buffer's 1033 * content. 1034 */ 1035 struct intel_mipmap_tree *mt = 1036 intel_miptree_create_for_bo(brw, image->bo, format, 1037 image->offset, image->width, image->height, 1, 1038 image->pitch, tiling, mt_create_flags); 1039 if (mt == NULL) 1040 return NULL; 1041 1042 mt->target = target; 1043 mt->level[0].level_x = image->tile_x; 1044 mt->level[0].level_y = image->tile_y; 1045 mt->drm_modifier = image->modifier; 1046 1047 /* From "OES_EGL_image" error reporting. We report GL_INVALID_OPERATION 1048 * for EGL images from non-tile aligned sufaces in gen4 hw and earlier which has 1049 * trouble resolving back to destination image due to alignment issues. 1050 */ 1051 const struct gen_device_info *devinfo = &brw->screen->devinfo; 1052 if (!devinfo->has_surface_tile_offset) { 1053 uint32_t draw_x, draw_y; 1054 intel_miptree_get_tile_offsets(mt, 0, 0, &draw_x, &draw_y); 1055 1056 if (draw_x != 0 || draw_y != 0) { 1057 _mesa_error(&brw->ctx, GL_INVALID_OPERATION, __func__); 1058 intel_miptree_release(&mt); 1059 return NULL; 1060 } 1061 } 1062 1063 if (mod_info && mod_info->aux_usage != ISL_AUX_USAGE_NONE) { 1064 assert(mod_info->aux_usage == ISL_AUX_USAGE_CCS_E); 1065 1066 mt->aux_usage = mod_info->aux_usage; 1067 /* If we are a window system buffer, then we can support fast-clears 1068 * even if the modifier doesn't support them by doing a partial resolve 1069 * as part of the flush operation. 1070 */ 1071 mt->supports_fast_clear = 1072 allow_internal_aux || mod_info->supports_clear_color; 1073 1074 /* We don't know the actual state of the surface when we get it but we 1075 * can make a pretty good guess based on the modifier. What we do know 1076 * for sure is that it isn't in the AUX_INVALID state, so we just assume 1077 * a worst case of compression. 1078 */ 1079 enum isl_aux_state initial_state = 1080 isl_drm_modifier_get_default_aux_state(image->modifier); 1081 1082 if (!create_ccs_buf_for_image(brw, image, mt, initial_state)) { 1083 intel_miptree_release(&mt); 1084 return NULL; 1085 } 1086 } 1087 1088 /* Don't assume coherency for imported EGLimages. We don't know what 1089 * external clients are going to do with it. They may scan it out. 1090 */ 1091 image->bo->cache_coherent = false; 1092 1093 return mt; 1094} 1095 1096/** 1097 * For a singlesample renderbuffer, this simply wraps the given BO with a 1098 * miptree. 1099 * 1100 * For a multisample renderbuffer, this wraps the window system's 1101 * (singlesample) BO with a singlesample miptree attached to the 1102 * intel_renderbuffer, then creates a multisample miptree attached to irb->mt 1103 * that will contain the actual rendering (which is lazily resolved to 1104 * irb->singlesample_mt). 1105 */ 1106bool 1107intel_update_winsys_renderbuffer_miptree(struct brw_context *intel, 1108 struct intel_renderbuffer *irb, 1109 struct intel_mipmap_tree *singlesample_mt, 1110 uint32_t width, uint32_t height, 1111 uint32_t pitch) 1112{ 1113 struct intel_mipmap_tree *multisample_mt = NULL; 1114 struct gl_renderbuffer *rb = &irb->Base.Base; 1115 mesa_format format = rb->Format; 1116 const unsigned num_samples = MAX2(rb->NumSamples, 1); 1117 1118 /* Only the front and back buffers, which are color buffers, are allocated 1119 * through the image loader. 1120 */ 1121 assert(_mesa_get_format_base_format(format) == GL_RGB || 1122 _mesa_get_format_base_format(format) == GL_RGBA); 1123 1124 assert(singlesample_mt); 1125 1126 if (num_samples == 1) { 1127 intel_miptree_release(&irb->mt); 1128 irb->mt = singlesample_mt; 1129 1130 assert(!irb->singlesample_mt); 1131 } else { 1132 intel_miptree_release(&irb->singlesample_mt); 1133 irb->singlesample_mt = singlesample_mt; 1134 1135 if (!irb->mt || 1136 irb->mt->surf.logical_level0_px.width != width || 1137 irb->mt->surf.logical_level0_px.height != height) { 1138 multisample_mt = intel_miptree_create_for_renderbuffer(intel, 1139 format, 1140 width, 1141 height, 1142 num_samples); 1143 if (!multisample_mt) 1144 goto fail; 1145 1146 irb->need_downsample = false; 1147 intel_miptree_release(&irb->mt); 1148 irb->mt = multisample_mt; 1149 } 1150 } 1151 return true; 1152 1153fail: 1154 intel_miptree_release(&irb->mt); 1155 return false; 1156} 1157 1158struct intel_mipmap_tree* 1159intel_miptree_create_for_renderbuffer(struct brw_context *brw, 1160 mesa_format format, 1161 uint32_t width, 1162 uint32_t height, 1163 uint32_t num_samples) 1164{ 1165 struct intel_mipmap_tree *mt; 1166 uint32_t depth = 1; 1167 GLenum target = num_samples > 1 ? GL_TEXTURE_2D_MULTISAMPLE : GL_TEXTURE_2D; 1168 1169 mt = intel_miptree_create(brw, target, format, 0, 0, 1170 width, height, depth, num_samples, 1171 MIPTREE_CREATE_BUSY); 1172 if (!mt) 1173 goto fail; 1174 1175 return mt; 1176 1177fail: 1178 intel_miptree_release(&mt); 1179 return NULL; 1180} 1181 1182void 1183intel_miptree_reference(struct intel_mipmap_tree **dst, 1184 struct intel_mipmap_tree *src) 1185{ 1186 if (*dst == src) 1187 return; 1188 1189 intel_miptree_release(dst); 1190 1191 if (src) { 1192 src->refcount++; 1193 DBG("%s %p refcount now %d\n", __func__, src, src->refcount); 1194 } 1195 1196 *dst = src; 1197} 1198 1199static void 1200intel_miptree_aux_buffer_free(struct intel_miptree_aux_buffer *aux_buf) 1201{ 1202 if (aux_buf == NULL) 1203 return; 1204 1205 brw_bo_unreference(aux_buf->bo); 1206 brw_bo_unreference(aux_buf->clear_color_bo); 1207 1208 free(aux_buf); 1209} 1210 1211void 1212intel_miptree_release(struct intel_mipmap_tree **mt) 1213{ 1214 if (!*mt) 1215 return; 1216 1217 DBG("%s %p refcount will be %d\n", __func__, *mt, (*mt)->refcount - 1); 1218 if (--(*mt)->refcount <= 0) { 1219 GLuint i; 1220 1221 DBG("%s deleting %p\n", __func__, *mt); 1222 1223 brw_bo_unreference((*mt)->bo); 1224 intel_miptree_release(&(*mt)->stencil_mt); 1225 intel_miptree_release(&(*mt)->shadow_mt); 1226 intel_miptree_aux_buffer_free((*mt)->aux_buf); 1227 free_aux_state_map((*mt)->aux_state); 1228 1229 intel_miptree_release(&(*mt)->plane[0]); 1230 intel_miptree_release(&(*mt)->plane[1]); 1231 1232 for (i = 0; i < MAX_TEXTURE_LEVELS; i++) { 1233 free((*mt)->level[i].slice); 1234 } 1235 1236 free(*mt); 1237 } 1238 *mt = NULL; 1239} 1240 1241 1242void 1243intel_get_image_dims(struct gl_texture_image *image, 1244 int *width, int *height, int *depth) 1245{ 1246 switch (image->TexObject->Target) { 1247 case GL_TEXTURE_1D_ARRAY: 1248 /* For a 1D Array texture the OpenGL API will treat the image height as 1249 * the number of array slices. For Intel hardware, we treat the 1D array 1250 * as a 2D Array with a height of 1. So, here we want to swap image 1251 * height and depth. 1252 */ 1253 assert(image->Depth == 1); 1254 *width = image->Width; 1255 *height = 1; 1256 *depth = image->Height; 1257 break; 1258 case GL_TEXTURE_CUBE_MAP: 1259 /* For Cube maps, the mesa/main api layer gives us a depth of 1 even 1260 * though we really have 6 slices. 1261 */ 1262 assert(image->Depth == 1); 1263 *width = image->Width; 1264 *height = image->Height; 1265 *depth = 6; 1266 break; 1267 default: 1268 *width = image->Width; 1269 *height = image->Height; 1270 *depth = image->Depth; 1271 break; 1272 } 1273} 1274 1275/** 1276 * Can the image be pulled into a unified mipmap tree? This mirrors 1277 * the completeness test in a lot of ways. 1278 * 1279 * Not sure whether I want to pass gl_texture_image here. 1280 */ 1281bool 1282intel_miptree_match_image(struct intel_mipmap_tree *mt, 1283 struct gl_texture_image *image) 1284{ 1285 struct intel_texture_image *intelImage = intel_texture_image(image); 1286 GLuint level = intelImage->base.Base.Level; 1287 int width, height, depth; 1288 1289 /* glTexImage* choose the texture object based on the target passed in, and 1290 * objects can't change targets over their lifetimes, so this should be 1291 * true. 1292 */ 1293 assert(image->TexObject->Target == mt->target); 1294 1295 mesa_format mt_format = mt->format; 1296 if (mt->format == MESA_FORMAT_Z24_UNORM_X8_UINT && mt->stencil_mt) 1297 mt_format = MESA_FORMAT_Z24_UNORM_S8_UINT; 1298 if (mt->format == MESA_FORMAT_Z_FLOAT32 && mt->stencil_mt) 1299 mt_format = MESA_FORMAT_Z32_FLOAT_S8X24_UINT; 1300 1301 if (_mesa_get_srgb_format_linear(image->TexFormat) != 1302 _mesa_get_srgb_format_linear(mt_format)) 1303 return false; 1304 1305 intel_get_image_dims(image, &width, &height, &depth); 1306 1307 if (mt->target == GL_TEXTURE_CUBE_MAP) 1308 depth = 6; 1309 1310 if (level >= mt->surf.levels) 1311 return false; 1312 1313 const unsigned level_depth = 1314 mt->surf.dim == ISL_SURF_DIM_3D ? 1315 minify(mt->surf.logical_level0_px.depth, level) : 1316 mt->surf.logical_level0_px.array_len; 1317 1318 return width == minify(mt->surf.logical_level0_px.width, level) && 1319 height == minify(mt->surf.logical_level0_px.height, level) && 1320 depth == level_depth && 1321 MAX2(image->NumSamples, 1) == mt->surf.samples; 1322} 1323 1324void 1325intel_miptree_get_image_offset(const struct intel_mipmap_tree *mt, 1326 GLuint level, GLuint slice, 1327 GLuint *x, GLuint *y) 1328{ 1329 if (level == 0 && slice == 0) { 1330 *x = mt->level[0].level_x; 1331 *y = mt->level[0].level_y; 1332 return; 1333 } 1334 1335 uint32_t x_offset_sa, y_offset_sa; 1336 1337 /* Miptree itself can have an offset only if it represents a single 1338 * slice in an imported buffer object. 1339 * See intel_miptree_create_for_dri_image(). 1340 */ 1341 assert(mt->level[0].level_x == 0); 1342 assert(mt->level[0].level_y == 0); 1343 1344 /* Given level is relative to level zero while the miptree may be 1345 * represent just a subset of all levels starting from 'first_level'. 1346 */ 1347 assert(level >= mt->first_level); 1348 level -= mt->first_level; 1349 1350 const unsigned z = mt->surf.dim == ISL_SURF_DIM_3D ? slice : 0; 1351 slice = mt->surf.dim == ISL_SURF_DIM_3D ? 0 : slice; 1352 isl_surf_get_image_offset_el(&mt->surf, level, slice, z, 1353 &x_offset_sa, &y_offset_sa); 1354 1355 *x = x_offset_sa; 1356 *y = y_offset_sa; 1357} 1358 1359 1360/** 1361 * This function computes the tile_w (in bytes) and tile_h (in rows) of 1362 * different tiling patterns. If the BO is untiled, tile_w is set to cpp 1363 * and tile_h is set to 1. 1364 */ 1365void 1366intel_get_tile_dims(enum isl_tiling tiling, uint32_t cpp, 1367 uint32_t *tile_w, uint32_t *tile_h) 1368{ 1369 switch (tiling) { 1370 case ISL_TILING_X: 1371 *tile_w = 512; 1372 *tile_h = 8; 1373 break; 1374 case ISL_TILING_Y0: 1375 *tile_w = 128; 1376 *tile_h = 32; 1377 break; 1378 case ISL_TILING_LINEAR: 1379 *tile_w = cpp; 1380 *tile_h = 1; 1381 break; 1382 default: 1383 unreachable("not reached"); 1384 } 1385} 1386 1387 1388/** 1389 * This function computes masks that may be used to select the bits of the X 1390 * and Y coordinates that indicate the offset within a tile. If the BO is 1391 * untiled, the masks are set to 0. 1392 */ 1393void 1394intel_get_tile_masks(enum isl_tiling tiling, uint32_t cpp, 1395 uint32_t *mask_x, uint32_t *mask_y) 1396{ 1397 uint32_t tile_w_bytes, tile_h; 1398 1399 intel_get_tile_dims(tiling, cpp, &tile_w_bytes, &tile_h); 1400 1401 *mask_x = tile_w_bytes / cpp - 1; 1402 *mask_y = tile_h - 1; 1403} 1404 1405/** 1406 * Compute the offset (in bytes) from the start of the BO to the given x 1407 * and y coordinate. For tiled BOs, caller must ensure that x and y are 1408 * multiples of the tile size. 1409 */ 1410uint32_t 1411intel_miptree_get_aligned_offset(const struct intel_mipmap_tree *mt, 1412 uint32_t x, uint32_t y) 1413{ 1414 int cpp = mt->cpp; 1415 uint32_t pitch = mt->surf.row_pitch_B; 1416 1417 switch (mt->surf.tiling) { 1418 default: 1419 unreachable("not reached"); 1420 case ISL_TILING_LINEAR: 1421 return y * pitch + x * cpp; 1422 case ISL_TILING_X: 1423 assert((x % (512 / cpp)) == 0); 1424 assert((y % 8) == 0); 1425 return y * pitch + x / (512 / cpp) * 4096; 1426 case ISL_TILING_Y0: 1427 assert((x % (128 / cpp)) == 0); 1428 assert((y % 32) == 0); 1429 return y * pitch + x / (128 / cpp) * 4096; 1430 } 1431} 1432 1433/** 1434 * Rendering with tiled buffers requires that the base address of the buffer 1435 * be aligned to a page boundary. For renderbuffers, and sometimes with 1436 * textures, we may want the surface to point at a texture image level that 1437 * isn't at a page boundary. 1438 * 1439 * This function returns an appropriately-aligned base offset 1440 * according to the tiling restrictions, plus any required x/y offset 1441 * from there. 1442 */ 1443uint32_t 1444intel_miptree_get_tile_offsets(const struct intel_mipmap_tree *mt, 1445 GLuint level, GLuint slice, 1446 uint32_t *tile_x, 1447 uint32_t *tile_y) 1448{ 1449 uint32_t x, y; 1450 uint32_t mask_x, mask_y; 1451 1452 intel_get_tile_masks(mt->surf.tiling, mt->cpp, &mask_x, &mask_y); 1453 intel_miptree_get_image_offset(mt, level, slice, &x, &y); 1454 1455 *tile_x = x & mask_x; 1456 *tile_y = y & mask_y; 1457 1458 return intel_miptree_get_aligned_offset(mt, x & ~mask_x, y & ~mask_y); 1459} 1460 1461static void 1462intel_miptree_copy_slice_sw(struct brw_context *brw, 1463 struct intel_mipmap_tree *src_mt, 1464 unsigned src_level, unsigned src_layer, 1465 struct intel_mipmap_tree *dst_mt, 1466 unsigned dst_level, unsigned dst_layer, 1467 unsigned width, unsigned height) 1468{ 1469 void *src, *dst; 1470 ptrdiff_t src_stride, dst_stride; 1471 const unsigned cpp = (isl_format_get_layout(dst_mt->surf.format)->bpb / 8); 1472 1473 intel_miptree_map(brw, src_mt, 1474 src_level, src_layer, 1475 0, 0, 1476 width, height, 1477 GL_MAP_READ_BIT | BRW_MAP_DIRECT_BIT, 1478 &src, &src_stride); 1479 1480 intel_miptree_map(brw, dst_mt, 1481 dst_level, dst_layer, 1482 0, 0, 1483 width, height, 1484 GL_MAP_WRITE_BIT | GL_MAP_INVALIDATE_RANGE_BIT | 1485 BRW_MAP_DIRECT_BIT, 1486 &dst, &dst_stride); 1487 1488 DBG("sw blit %s mt %p %p/%"PRIdPTR" -> %s mt %p %p/%"PRIdPTR" (%dx%d)\n", 1489 _mesa_get_format_name(src_mt->format), 1490 src_mt, src, src_stride, 1491 _mesa_get_format_name(dst_mt->format), 1492 dst_mt, dst, dst_stride, 1493 width, height); 1494 1495 int row_size = cpp * width; 1496 if (src_stride == row_size && 1497 dst_stride == row_size) { 1498 memcpy(dst, src, row_size * height); 1499 } else { 1500 for (int i = 0; i < height; i++) { 1501 memcpy(dst, src, row_size); 1502 dst += dst_stride; 1503 src += src_stride; 1504 } 1505 } 1506 1507 intel_miptree_unmap(brw, dst_mt, dst_level, dst_layer); 1508 intel_miptree_unmap(brw, src_mt, src_level, src_layer); 1509 1510 /* Don't forget to copy the stencil data over, too. We could have skipped 1511 * passing BRW_MAP_DIRECT_BIT, but that would have meant intel_miptree_map 1512 * shuffling the two data sources in/out of temporary storage instead of 1513 * the direct mapping we get this way. 1514 */ 1515 if (dst_mt->stencil_mt) { 1516 assert(src_mt->stencil_mt); 1517 intel_miptree_copy_slice_sw(brw, 1518 src_mt->stencil_mt, src_level, src_layer, 1519 dst_mt->stencil_mt, dst_level, dst_layer, 1520 width, height); 1521 } 1522} 1523 1524void 1525intel_miptree_copy_slice(struct brw_context *brw, 1526 struct intel_mipmap_tree *src_mt, 1527 unsigned src_level, unsigned src_layer, 1528 struct intel_mipmap_tree *dst_mt, 1529 unsigned dst_level, unsigned dst_layer) 1530 1531{ 1532 const struct gen_device_info *devinfo = &brw->screen->devinfo; 1533 mesa_format format = src_mt->format; 1534 unsigned width = minify(src_mt->surf.phys_level0_sa.width, 1535 src_level - src_mt->first_level); 1536 unsigned height = minify(src_mt->surf.phys_level0_sa.height, 1537 src_level - src_mt->first_level); 1538 1539 assert(src_layer < get_num_phys_layers(&src_mt->surf, 1540 src_level - src_mt->first_level)); 1541 1542 assert(_mesa_get_srgb_format_linear(src_mt->format) == 1543 _mesa_get_srgb_format_linear(dst_mt->format)); 1544 1545 DBG("validate blit mt %s %p %d,%d -> mt %s %p %d,%d (%dx%d)\n", 1546 _mesa_get_format_name(src_mt->format), 1547 src_mt, src_level, src_layer, 1548 _mesa_get_format_name(dst_mt->format), 1549 dst_mt, dst_level, dst_layer, 1550 width, height); 1551 1552 if (devinfo->gen >= 6) { 1553 /* On gen6 and above, we just use blorp. It's faster than the blitter 1554 * and can handle everything without software fallbacks. 1555 */ 1556 brw_blorp_copy_miptrees(brw, 1557 src_mt, src_level, src_layer, 1558 dst_mt, dst_level, dst_layer, 1559 0, 0, 0, 0, width, height); 1560 1561 if (src_mt->stencil_mt) { 1562 assert(dst_mt->stencil_mt); 1563 brw_blorp_copy_miptrees(brw, 1564 src_mt->stencil_mt, src_level, src_layer, 1565 dst_mt->stencil_mt, dst_level, dst_layer, 1566 0, 0, 0, 0, width, height); 1567 } 1568 return; 1569 } 1570 1571 if (dst_mt->compressed) { 1572 unsigned int i, j; 1573 _mesa_get_format_block_size(dst_mt->format, &i, &j); 1574 height = ALIGN_NPOT(height, j) / j; 1575 width = ALIGN_NPOT(width, i) / i; 1576 } 1577 1578 /* Gen4-5 doesn't support separate stencil */ 1579 assert(!src_mt->stencil_mt); 1580 1581 uint32_t dst_x, dst_y, src_x, src_y; 1582 intel_miptree_get_image_offset(dst_mt, dst_level, dst_layer, 1583 &dst_x, &dst_y); 1584 intel_miptree_get_image_offset(src_mt, src_level, src_layer, 1585 &src_x, &src_y); 1586 1587 DBG("validate blit mt %s %p %d,%d/%d -> mt %s %p %d,%d/%d (%dx%d)\n", 1588 _mesa_get_format_name(src_mt->format), 1589 src_mt, src_x, src_y, src_mt->surf.row_pitch_B, 1590 _mesa_get_format_name(dst_mt->format), 1591 dst_mt, dst_x, dst_y, dst_mt->surf.row_pitch_B, 1592 width, height); 1593 1594 if (!intel_miptree_blit(brw, 1595 src_mt, src_level, src_layer, 0, 0, false, 1596 dst_mt, dst_level, dst_layer, 0, 0, false, 1597 width, height, COLOR_LOGICOP_COPY)) { 1598 perf_debug("miptree validate blit for %s failed\n", 1599 _mesa_get_format_name(format)); 1600 1601 intel_miptree_copy_slice_sw(brw, 1602 src_mt, src_level, src_layer, 1603 dst_mt, dst_level, dst_layer, 1604 width, height); 1605 } 1606} 1607 1608/** 1609 * Copies the image's current data to the given miptree, and associates that 1610 * miptree with the image. 1611 */ 1612void 1613intel_miptree_copy_teximage(struct brw_context *brw, 1614 struct intel_texture_image *intelImage, 1615 struct intel_mipmap_tree *dst_mt) 1616{ 1617 struct intel_mipmap_tree *src_mt = intelImage->mt; 1618 struct intel_texture_object *intel_obj = 1619 intel_texture_object(intelImage->base.Base.TexObject); 1620 int level = intelImage->base.Base.Level; 1621 const unsigned face = intelImage->base.Base.Face; 1622 unsigned start_layer, end_layer; 1623 1624 if (intel_obj->base.Target == GL_TEXTURE_1D_ARRAY) { 1625 assert(face == 0); 1626 assert(intelImage->base.Base.Height); 1627 start_layer = 0; 1628 end_layer = intelImage->base.Base.Height - 1; 1629 } else if (face > 0) { 1630 start_layer = face; 1631 end_layer = face; 1632 } else { 1633 assert(intelImage->base.Base.Depth); 1634 start_layer = 0; 1635 end_layer = intelImage->base.Base.Depth - 1; 1636 } 1637 1638 for (unsigned i = start_layer; i <= end_layer; i++) { 1639 intel_miptree_copy_slice(brw, 1640 src_mt, level, i, 1641 dst_mt, level, i); 1642 } 1643 1644 intel_miptree_reference(&intelImage->mt, dst_mt); 1645 intel_obj->needs_validate = true; 1646} 1647 1648static struct intel_miptree_aux_buffer * 1649intel_alloc_aux_buffer(struct brw_context *brw, 1650 const struct isl_surf *aux_surf, 1651 bool wants_memset, 1652 uint8_t memset_value) 1653{ 1654 struct intel_miptree_aux_buffer *buf = calloc(sizeof(*buf), 1); 1655 if (!buf) 1656 return false; 1657 1658 uint64_t size = aux_surf->size_B; 1659 1660 const bool has_indirect_clear = brw->isl_dev.ss.clear_color_state_size > 0; 1661 if (has_indirect_clear) { 1662 /* On CNL+, instead of setting the clear color in the SURFACE_STATE, we 1663 * will set a pointer to a dword somewhere that contains the color. So, 1664 * allocate the space for the clear color value here on the aux buffer. 1665 */ 1666 buf->clear_color_offset = size; 1667 size += brw->isl_dev.ss.clear_color_state_size; 1668 } 1669 1670 /* If the buffer needs to be initialised (requiring the buffer to be 1671 * immediately mapped to cpu space for writing), do not use the gpu access 1672 * flag which can cause an unnecessary delay if the backing pages happened 1673 * to be just used by the GPU. 1674 */ 1675 const bool alloc_zeroed = wants_memset && memset_value == 0; 1676 const bool needs_memset = 1677 !alloc_zeroed && (wants_memset || has_indirect_clear); 1678 const uint32_t alloc_flags = 1679 alloc_zeroed ? BO_ALLOC_ZEROED : (needs_memset ? 0 : BO_ALLOC_BUSY); 1680 1681 /* ISL has stricter set of alignment rules then the drm allocator. 1682 * Therefore one can pass the ISL dimensions in terms of bytes instead of 1683 * trying to recalculate based on different format block sizes. 1684 */ 1685 buf->bo = brw_bo_alloc_tiled(brw->bufmgr, "aux-miptree", size, 1686 BRW_MEMZONE_OTHER, I915_TILING_Y, 1687 aux_surf->row_pitch_B, alloc_flags); 1688 if (!buf->bo) { 1689 free(buf); 1690 return NULL; 1691 } 1692 1693 /* Initialize the bo to the desired value */ 1694 if (needs_memset) { 1695 assert(!(alloc_flags & BO_ALLOC_BUSY)); 1696 1697 void *map = brw_bo_map(brw, buf->bo, MAP_WRITE | MAP_RAW); 1698 if (map == NULL) { 1699 intel_miptree_aux_buffer_free(buf); 1700 return NULL; 1701 } 1702 1703 /* Memset the aux_surf portion of the BO. */ 1704 if (wants_memset) 1705 memset(map, memset_value, aux_surf->size_B); 1706 1707 /* Zero the indirect clear color to match ::fast_clear_color. */ 1708 if (has_indirect_clear) { 1709 memset((char *)map + buf->clear_color_offset, 0, 1710 brw->isl_dev.ss.clear_color_state_size); 1711 } 1712 1713 brw_bo_unmap(buf->bo); 1714 } 1715 1716 if (has_indirect_clear) { 1717 buf->clear_color_bo = buf->bo; 1718 brw_bo_reference(buf->clear_color_bo); 1719 } 1720 1721 buf->surf = *aux_surf; 1722 1723 return buf; 1724} 1725 1726 1727/** 1728 * Helper for intel_miptree_alloc_aux() that sets 1729 * \c mt->level[level].has_hiz. Return true if and only if 1730 * \c has_hiz was set. 1731 */ 1732static bool 1733intel_miptree_level_enable_hiz(struct brw_context *brw, 1734 struct intel_mipmap_tree *mt, 1735 uint32_t level) 1736{ 1737 const struct gen_device_info *devinfo = &brw->screen->devinfo; 1738 1739 assert(mt->aux_buf); 1740 assert(mt->surf.size_B > 0); 1741 1742 if (devinfo->gen >= 8 || devinfo->is_haswell) { 1743 uint32_t width = minify(mt->surf.phys_level0_sa.width, level); 1744 uint32_t height = minify(mt->surf.phys_level0_sa.height, level); 1745 1746 /* Disable HiZ for LOD > 0 unless the width is 8 aligned 1747 * and the height is 4 aligned. This allows our HiZ support 1748 * to fulfill Haswell restrictions for HiZ ops. For LOD == 0, 1749 * we can grow the width & height to allow the HiZ op to 1750 * force the proper size alignments. 1751 */ 1752 if (level > 0 && ((width & 7) || (height & 3))) { 1753 DBG("mt %p level %d: HiZ DISABLED\n", mt, level); 1754 return false; 1755 } 1756 } 1757 1758 DBG("mt %p level %d: HiZ enabled\n", mt, level); 1759 mt->level[level].has_hiz = true; 1760 return true; 1761} 1762 1763 1764/** 1765 * Allocate the initial aux surface for a miptree based on mt->aux_usage 1766 * 1767 * Since MCS, HiZ, and CCS_E can compress more than just clear color, we 1768 * create the auxiliary surfaces up-front. CCS_D, on the other hand, can only 1769 * compress clear color so we wait until an actual fast-clear to allocate it. 1770 */ 1771bool 1772intel_miptree_alloc_aux(struct brw_context *brw, 1773 struct intel_mipmap_tree *mt) 1774{ 1775 assert(mt->aux_buf == NULL); 1776 1777 /* Get the aux buf allocation parameters for this miptree. */ 1778 enum isl_aux_state initial_state; 1779 uint8_t memset_value; 1780 struct isl_surf aux_surf; 1781 MAYBE_UNUSED bool aux_surf_ok = false; 1782 1783 switch (mt->aux_usage) { 1784 case ISL_AUX_USAGE_NONE: 1785 aux_surf.size_B = 0; 1786 aux_surf_ok = true; 1787 break; 1788 case ISL_AUX_USAGE_HIZ: 1789 initial_state = ISL_AUX_STATE_AUX_INVALID; 1790 memset_value = 0; 1791 aux_surf_ok = isl_surf_get_hiz_surf(&brw->isl_dev, &mt->surf, &aux_surf); 1792 break; 1793 case ISL_AUX_USAGE_MCS: 1794 /* From the Ivy Bridge PRM, Vol 2 Part 1 p326: 1795 * 1796 * When MCS buffer is enabled and bound to MSRT, it is required that 1797 * it is cleared prior to any rendering. 1798 * 1799 * Since we don't use the MCS buffer for any purpose other than 1800 * rendering, it makes sense to just clear it immediately upon 1801 * allocation. 1802 * 1803 * Note: the clear value for MCS buffers is all 1's, so we memset to 1804 * 0xff. 1805 */ 1806 initial_state = ISL_AUX_STATE_CLEAR; 1807 memset_value = 0xFF; 1808 aux_surf_ok = isl_surf_get_mcs_surf(&brw->isl_dev, &mt->surf, &aux_surf); 1809 break; 1810 case ISL_AUX_USAGE_CCS_D: 1811 case ISL_AUX_USAGE_CCS_E: 1812 /* When CCS_E is used, we need to ensure that the CCS starts off in a 1813 * valid state. From the Sky Lake PRM, "MCS Buffer for Render 1814 * Target(s)": 1815 * 1816 * "If Software wants to enable Color Compression without Fast 1817 * clear, Software needs to initialize MCS with zeros." 1818 * 1819 * A CCS value of 0 indicates that the corresponding block is in the 1820 * pass-through state which is what we want. 1821 * 1822 * For CCS_D, do the same thing. On gen9+, this avoids having any 1823 * undefined bits in the aux buffer. 1824 */ 1825 initial_state = ISL_AUX_STATE_PASS_THROUGH; 1826 memset_value = 0; 1827 aux_surf_ok = 1828 isl_surf_get_ccs_surf(&brw->isl_dev, &mt->surf, &aux_surf, 0); 1829 break; 1830 } 1831 1832 /* We should have a valid aux_surf. */ 1833 assert(aux_surf_ok); 1834 1835 /* No work is needed for a zero-sized auxiliary buffer. */ 1836 if (aux_surf.size_B == 0) 1837 return true; 1838 1839 /* Create the aux_state for the auxiliary buffer. */ 1840 mt->aux_state = create_aux_state_map(mt, initial_state); 1841 if (mt->aux_state == NULL) 1842 return false; 1843 1844 /* Allocate the auxiliary buffer. */ 1845 const bool needs_memset = initial_state != ISL_AUX_STATE_AUX_INVALID; 1846 mt->aux_buf = intel_alloc_aux_buffer(brw, &aux_surf, needs_memset, 1847 memset_value); 1848 if (mt->aux_buf == NULL) { 1849 free_aux_state_map(mt->aux_state); 1850 mt->aux_state = NULL; 1851 return false; 1852 } 1853 1854 /* Perform aux_usage-specific initialization. */ 1855 if (mt->aux_usage == ISL_AUX_USAGE_HIZ) { 1856 for (unsigned level = mt->first_level; level <= mt->last_level; ++level) 1857 intel_miptree_level_enable_hiz(brw, mt, level); 1858 } 1859 1860 return true; 1861} 1862 1863 1864/** 1865 * Can the miptree sample using the hiz buffer? 1866 */ 1867bool 1868intel_miptree_sample_with_hiz(struct brw_context *brw, 1869 struct intel_mipmap_tree *mt) 1870{ 1871 const struct gen_device_info *devinfo = &brw->screen->devinfo; 1872 1873 if (!devinfo->has_sample_with_hiz) { 1874 return false; 1875 } 1876 1877 if (!mt->aux_buf) { 1878 return false; 1879 } 1880 1881 /* It seems the hardware won't fallback to the depth buffer if some of the 1882 * mipmap levels aren't available in the HiZ buffer. So we need all levels 1883 * of the texture to be HiZ enabled. 1884 */ 1885 for (unsigned level = 0; level < mt->surf.levels; ++level) { 1886 if (!intel_miptree_level_has_hiz(mt, level)) 1887 return false; 1888 } 1889 1890 /* If compressed multisampling is enabled, then we use it for the auxiliary 1891 * buffer instead. 1892 * 1893 * From the BDW PRM (Volume 2d: Command Reference: Structures 1894 * RENDER_SURFACE_STATE.AuxiliarySurfaceMode): 1895 * 1896 * "If this field is set to AUX_HIZ, Number of Multisamples must be 1897 * MULTISAMPLECOUNT_1, and Surface Type cannot be SURFTYPE_3D. 1898 * 1899 * There is no such blurb for 1D textures, but there is sufficient evidence 1900 * that this is broken on SKL+. 1901 */ 1902 return (mt->surf.samples == 1 && 1903 mt->target != GL_TEXTURE_3D && 1904 mt->target != GL_TEXTURE_1D /* gen9+ restriction */); 1905} 1906 1907/** 1908 * Does the miptree slice have hiz enabled? 1909 */ 1910bool 1911intel_miptree_level_has_hiz(const struct intel_mipmap_tree *mt, uint32_t level) 1912{ 1913 intel_miptree_check_level_layer(mt, level, 0); 1914 return mt->level[level].has_hiz; 1915} 1916 1917static inline uint32_t 1918miptree_level_range_length(const struct intel_mipmap_tree *mt, 1919 uint32_t start_level, uint32_t num_levels) 1920{ 1921 assert(start_level >= mt->first_level); 1922 assert(start_level <= mt->last_level); 1923 1924 if (num_levels == INTEL_REMAINING_LAYERS) 1925 num_levels = mt->last_level - start_level + 1; 1926 /* Check for overflow */ 1927 assert(start_level + num_levels >= start_level); 1928 assert(start_level + num_levels <= mt->last_level + 1); 1929 1930 return num_levels; 1931} 1932 1933static inline uint32_t 1934miptree_layer_range_length(const struct intel_mipmap_tree *mt, uint32_t level, 1935 uint32_t start_layer, uint32_t num_layers) 1936{ 1937 assert(level <= mt->last_level); 1938 1939 const uint32_t total_num_layers = brw_get_num_logical_layers(mt, level); 1940 assert(start_layer < total_num_layers); 1941 if (num_layers == INTEL_REMAINING_LAYERS) 1942 num_layers = total_num_layers - start_layer; 1943 /* Check for overflow */ 1944 assert(start_layer + num_layers >= start_layer); 1945 assert(start_layer + num_layers <= total_num_layers); 1946 1947 return num_layers; 1948} 1949 1950bool 1951intel_miptree_has_color_unresolved(const struct intel_mipmap_tree *mt, 1952 unsigned start_level, unsigned num_levels, 1953 unsigned start_layer, unsigned num_layers) 1954{ 1955 assert(_mesa_is_format_color_format(mt->format)); 1956 1957 if (!mt->aux_buf) 1958 return false; 1959 1960 /* Clamp the level range to fit the miptree */ 1961 num_levels = miptree_level_range_length(mt, start_level, num_levels); 1962 1963 for (uint32_t l = 0; l < num_levels; l++) { 1964 const uint32_t level = start_level + l; 1965 const uint32_t level_layers = 1966 miptree_layer_range_length(mt, level, start_layer, num_layers); 1967 for (unsigned a = 0; a < level_layers; a++) { 1968 enum isl_aux_state aux_state = 1969 intel_miptree_get_aux_state(mt, level, start_layer + a); 1970 assert(aux_state != ISL_AUX_STATE_AUX_INVALID); 1971 if (aux_state != ISL_AUX_STATE_PASS_THROUGH) 1972 return true; 1973 } 1974 } 1975 1976 return false; 1977} 1978 1979static void 1980intel_miptree_check_color_resolve(const struct brw_context *brw, 1981 const struct intel_mipmap_tree *mt, 1982 unsigned level, unsigned layer) 1983{ 1984 if (!mt->aux_buf) 1985 return; 1986 1987 /* Fast color clear is supported for mipmapped surfaces only on Gen8+. */ 1988 assert(brw->screen->devinfo.gen >= 8 || 1989 (level == 0 && mt->first_level == 0 && mt->last_level == 0)); 1990 1991 /* Compression of arrayed msaa surfaces is supported. */ 1992 if (mt->surf.samples > 1) 1993 return; 1994 1995 /* Fast color clear is supported for non-msaa arrays only on Gen8+. */ 1996 assert(brw->screen->devinfo.gen >= 8 || 1997 (layer == 0 && 1998 mt->surf.logical_level0_px.depth == 1 && 1999 mt->surf.logical_level0_px.array_len == 1)); 2000 2001 (void)level; 2002 (void)layer; 2003} 2004 2005static enum isl_aux_op 2006get_ccs_d_resolve_op(enum isl_aux_state aux_state, 2007 enum isl_aux_usage aux_usage, 2008 bool fast_clear_supported) 2009{ 2010 assert(aux_usage == ISL_AUX_USAGE_NONE || aux_usage == ISL_AUX_USAGE_CCS_D); 2011 2012 const bool ccs_supported = aux_usage == ISL_AUX_USAGE_CCS_D; 2013 2014 assert(ccs_supported == fast_clear_supported); 2015 2016 switch (aux_state) { 2017 case ISL_AUX_STATE_CLEAR: 2018 case ISL_AUX_STATE_PARTIAL_CLEAR: 2019 if (!ccs_supported) 2020 return ISL_AUX_OP_FULL_RESOLVE; 2021 else 2022 return ISL_AUX_OP_NONE; 2023 2024 case ISL_AUX_STATE_PASS_THROUGH: 2025 return ISL_AUX_OP_NONE; 2026 2027 case ISL_AUX_STATE_RESOLVED: 2028 case ISL_AUX_STATE_AUX_INVALID: 2029 case ISL_AUX_STATE_COMPRESSED_CLEAR: 2030 case ISL_AUX_STATE_COMPRESSED_NO_CLEAR: 2031 break; 2032 } 2033 2034 unreachable("Invalid aux state for CCS_D"); 2035} 2036 2037static enum isl_aux_op 2038get_ccs_e_resolve_op(enum isl_aux_state aux_state, 2039 enum isl_aux_usage aux_usage, 2040 bool fast_clear_supported) 2041{ 2042 /* CCS_E surfaces can be accessed as CCS_D if we're careful. */ 2043 assert(aux_usage == ISL_AUX_USAGE_NONE || 2044 aux_usage == ISL_AUX_USAGE_CCS_D || 2045 aux_usage == ISL_AUX_USAGE_CCS_E); 2046 2047 if (aux_usage == ISL_AUX_USAGE_CCS_D) 2048 assert(fast_clear_supported); 2049 2050 switch (aux_state) { 2051 case ISL_AUX_STATE_CLEAR: 2052 case ISL_AUX_STATE_PARTIAL_CLEAR: 2053 if (fast_clear_supported) 2054 return ISL_AUX_OP_NONE; 2055 else if (aux_usage == ISL_AUX_USAGE_CCS_E) 2056 return ISL_AUX_OP_PARTIAL_RESOLVE; 2057 else 2058 return ISL_AUX_OP_FULL_RESOLVE; 2059 2060 case ISL_AUX_STATE_COMPRESSED_CLEAR: 2061 if (aux_usage != ISL_AUX_USAGE_CCS_E) 2062 return ISL_AUX_OP_FULL_RESOLVE; 2063 else if (!fast_clear_supported) 2064 return ISL_AUX_OP_PARTIAL_RESOLVE; 2065 else 2066 return ISL_AUX_OP_NONE; 2067 2068 case ISL_AUX_STATE_COMPRESSED_NO_CLEAR: 2069 if (aux_usage != ISL_AUX_USAGE_CCS_E) 2070 return ISL_AUX_OP_FULL_RESOLVE; 2071 else 2072 return ISL_AUX_OP_NONE; 2073 2074 case ISL_AUX_STATE_PASS_THROUGH: 2075 return ISL_AUX_OP_NONE; 2076 2077 case ISL_AUX_STATE_RESOLVED: 2078 case ISL_AUX_STATE_AUX_INVALID: 2079 break; 2080 } 2081 2082 unreachable("Invalid aux state for CCS_E"); 2083} 2084 2085static void 2086intel_miptree_prepare_ccs_access(struct brw_context *brw, 2087 struct intel_mipmap_tree *mt, 2088 uint32_t level, uint32_t layer, 2089 enum isl_aux_usage aux_usage, 2090 bool fast_clear_supported) 2091{ 2092 enum isl_aux_state aux_state = intel_miptree_get_aux_state(mt, level, layer); 2093 2094 enum isl_aux_op resolve_op; 2095 if (mt->aux_usage == ISL_AUX_USAGE_CCS_E) { 2096 resolve_op = get_ccs_e_resolve_op(aux_state, aux_usage, 2097 fast_clear_supported); 2098 } else { 2099 assert(mt->aux_usage == ISL_AUX_USAGE_CCS_D); 2100 resolve_op = get_ccs_d_resolve_op(aux_state, aux_usage, 2101 fast_clear_supported); 2102 } 2103 2104 if (resolve_op != ISL_AUX_OP_NONE) { 2105 intel_miptree_check_color_resolve(brw, mt, level, layer); 2106 brw_blorp_resolve_color(brw, mt, level, layer, resolve_op); 2107 2108 switch (resolve_op) { 2109 case ISL_AUX_OP_FULL_RESOLVE: 2110 /* The CCS full resolve operation destroys the CCS and sets it to the 2111 * pass-through state. (You can also think of this as being both a 2112 * resolve and an ambiguate in one operation.) 2113 */ 2114 intel_miptree_set_aux_state(brw, mt, level, layer, 1, 2115 ISL_AUX_STATE_PASS_THROUGH); 2116 break; 2117 2118 case ISL_AUX_OP_PARTIAL_RESOLVE: 2119 intel_miptree_set_aux_state(brw, mt, level, layer, 1, 2120 ISL_AUX_STATE_COMPRESSED_NO_CLEAR); 2121 break; 2122 2123 default: 2124 unreachable("Invalid resolve op"); 2125 } 2126 } 2127} 2128 2129static void 2130intel_miptree_finish_ccs_write(struct brw_context *brw, 2131 struct intel_mipmap_tree *mt, 2132 uint32_t level, uint32_t layer, 2133 enum isl_aux_usage aux_usage) 2134{ 2135 assert(aux_usage == ISL_AUX_USAGE_NONE || 2136 aux_usage == ISL_AUX_USAGE_CCS_D || 2137 aux_usage == ISL_AUX_USAGE_CCS_E); 2138 2139 enum isl_aux_state aux_state = intel_miptree_get_aux_state(mt, level, layer); 2140 2141 if (mt->aux_usage == ISL_AUX_USAGE_CCS_E) { 2142 switch (aux_state) { 2143 case ISL_AUX_STATE_CLEAR: 2144 case ISL_AUX_STATE_PARTIAL_CLEAR: 2145 assert(aux_usage == ISL_AUX_USAGE_CCS_E || 2146 aux_usage == ISL_AUX_USAGE_CCS_D); 2147 2148 if (aux_usage == ISL_AUX_USAGE_CCS_E) { 2149 intel_miptree_set_aux_state(brw, mt, level, layer, 1, 2150 ISL_AUX_STATE_COMPRESSED_CLEAR); 2151 } else if (aux_state != ISL_AUX_STATE_PARTIAL_CLEAR) { 2152 intel_miptree_set_aux_state(brw, mt, level, layer, 1, 2153 ISL_AUX_STATE_PARTIAL_CLEAR); 2154 } 2155 break; 2156 2157 case ISL_AUX_STATE_COMPRESSED_CLEAR: 2158 case ISL_AUX_STATE_COMPRESSED_NO_CLEAR: 2159 assert(aux_usage == ISL_AUX_USAGE_CCS_E); 2160 break; /* Nothing to do */ 2161 2162 case ISL_AUX_STATE_PASS_THROUGH: 2163 if (aux_usage == ISL_AUX_USAGE_CCS_E) { 2164 intel_miptree_set_aux_state(brw, mt, level, layer, 1, 2165 ISL_AUX_STATE_COMPRESSED_NO_CLEAR); 2166 } else { 2167 /* Nothing to do */ 2168 } 2169 break; 2170 2171 case ISL_AUX_STATE_RESOLVED: 2172 case ISL_AUX_STATE_AUX_INVALID: 2173 unreachable("Invalid aux state for CCS_E"); 2174 } 2175 } else { 2176 assert(mt->aux_usage == ISL_AUX_USAGE_CCS_D); 2177 /* CCS_D is a bit simpler */ 2178 switch (aux_state) { 2179 case ISL_AUX_STATE_CLEAR: 2180 assert(aux_usage == ISL_AUX_USAGE_CCS_D); 2181 intel_miptree_set_aux_state(brw, mt, level, layer, 1, 2182 ISL_AUX_STATE_PARTIAL_CLEAR); 2183 break; 2184 2185 case ISL_AUX_STATE_PARTIAL_CLEAR: 2186 assert(aux_usage == ISL_AUX_USAGE_CCS_D); 2187 break; /* Nothing to do */ 2188 2189 case ISL_AUX_STATE_PASS_THROUGH: 2190 /* Nothing to do */ 2191 break; 2192 2193 case ISL_AUX_STATE_COMPRESSED_CLEAR: 2194 case ISL_AUX_STATE_COMPRESSED_NO_CLEAR: 2195 case ISL_AUX_STATE_RESOLVED: 2196 case ISL_AUX_STATE_AUX_INVALID: 2197 unreachable("Invalid aux state for CCS_D"); 2198 } 2199 } 2200} 2201 2202static void 2203intel_miptree_prepare_mcs_access(struct brw_context *brw, 2204 struct intel_mipmap_tree *mt, 2205 uint32_t layer, 2206 enum isl_aux_usage aux_usage, 2207 bool fast_clear_supported) 2208{ 2209 assert(aux_usage == ISL_AUX_USAGE_MCS); 2210 2211 switch (intel_miptree_get_aux_state(mt, 0, layer)) { 2212 case ISL_AUX_STATE_CLEAR: 2213 case ISL_AUX_STATE_COMPRESSED_CLEAR: 2214 if (!fast_clear_supported) { 2215 brw_blorp_mcs_partial_resolve(brw, mt, layer, 1); 2216 intel_miptree_set_aux_state(brw, mt, 0, layer, 1, 2217 ISL_AUX_STATE_COMPRESSED_NO_CLEAR); 2218 } 2219 break; 2220 2221 case ISL_AUX_STATE_COMPRESSED_NO_CLEAR: 2222 break; /* Nothing to do */ 2223 2224 case ISL_AUX_STATE_RESOLVED: 2225 case ISL_AUX_STATE_PASS_THROUGH: 2226 case ISL_AUX_STATE_AUX_INVALID: 2227 case ISL_AUX_STATE_PARTIAL_CLEAR: 2228 unreachable("Invalid aux state for MCS"); 2229 } 2230} 2231 2232static void 2233intel_miptree_finish_mcs_write(struct brw_context *brw, 2234 struct intel_mipmap_tree *mt, 2235 uint32_t layer, 2236 enum isl_aux_usage aux_usage) 2237{ 2238 assert(aux_usage == ISL_AUX_USAGE_MCS); 2239 2240 switch (intel_miptree_get_aux_state(mt, 0, layer)) { 2241 case ISL_AUX_STATE_CLEAR: 2242 intel_miptree_set_aux_state(brw, mt, 0, layer, 1, 2243 ISL_AUX_STATE_COMPRESSED_CLEAR); 2244 break; 2245 2246 case ISL_AUX_STATE_COMPRESSED_CLEAR: 2247 case ISL_AUX_STATE_COMPRESSED_NO_CLEAR: 2248 break; /* Nothing to do */ 2249 2250 case ISL_AUX_STATE_RESOLVED: 2251 case ISL_AUX_STATE_PASS_THROUGH: 2252 case ISL_AUX_STATE_AUX_INVALID: 2253 case ISL_AUX_STATE_PARTIAL_CLEAR: 2254 unreachable("Invalid aux state for MCS"); 2255 } 2256} 2257 2258static void 2259intel_miptree_prepare_hiz_access(struct brw_context *brw, 2260 struct intel_mipmap_tree *mt, 2261 uint32_t level, uint32_t layer, 2262 enum isl_aux_usage aux_usage, 2263 bool fast_clear_supported) 2264{ 2265 assert(aux_usage == ISL_AUX_USAGE_NONE || aux_usage == ISL_AUX_USAGE_HIZ); 2266 2267 enum isl_aux_op hiz_op = ISL_AUX_OP_NONE; 2268 switch (intel_miptree_get_aux_state(mt, level, layer)) { 2269 case ISL_AUX_STATE_CLEAR: 2270 case ISL_AUX_STATE_COMPRESSED_CLEAR: 2271 if (aux_usage != ISL_AUX_USAGE_HIZ || !fast_clear_supported) 2272 hiz_op = ISL_AUX_OP_FULL_RESOLVE; 2273 break; 2274 2275 case ISL_AUX_STATE_COMPRESSED_NO_CLEAR: 2276 if (aux_usage != ISL_AUX_USAGE_HIZ) 2277 hiz_op = ISL_AUX_OP_FULL_RESOLVE; 2278 break; 2279 2280 case ISL_AUX_STATE_PASS_THROUGH: 2281 case ISL_AUX_STATE_RESOLVED: 2282 break; 2283 2284 case ISL_AUX_STATE_AUX_INVALID: 2285 if (aux_usage == ISL_AUX_USAGE_HIZ) 2286 hiz_op = ISL_AUX_OP_AMBIGUATE; 2287 break; 2288 2289 case ISL_AUX_STATE_PARTIAL_CLEAR: 2290 unreachable("Invalid HiZ state"); 2291 } 2292 2293 if (hiz_op != ISL_AUX_OP_NONE) { 2294 intel_hiz_exec(brw, mt, level, layer, 1, hiz_op); 2295 2296 switch (hiz_op) { 2297 case ISL_AUX_OP_FULL_RESOLVE: 2298 intel_miptree_set_aux_state(brw, mt, level, layer, 1, 2299 ISL_AUX_STATE_RESOLVED); 2300 break; 2301 2302 case ISL_AUX_OP_AMBIGUATE: 2303 /* The HiZ resolve operation is actually an ambiguate */ 2304 intel_miptree_set_aux_state(brw, mt, level, layer, 1, 2305 ISL_AUX_STATE_PASS_THROUGH); 2306 break; 2307 2308 default: 2309 unreachable("Invalid HiZ op"); 2310 } 2311 } 2312} 2313 2314static void 2315intel_miptree_finish_hiz_write(struct brw_context *brw, 2316 struct intel_mipmap_tree *mt, 2317 uint32_t level, uint32_t layer, 2318 enum isl_aux_usage aux_usage) 2319{ 2320 assert(aux_usage == ISL_AUX_USAGE_NONE || aux_usage == ISL_AUX_USAGE_HIZ); 2321 2322 switch (intel_miptree_get_aux_state(mt, level, layer)) { 2323 case ISL_AUX_STATE_CLEAR: 2324 assert(aux_usage == ISL_AUX_USAGE_HIZ); 2325 intel_miptree_set_aux_state(brw, mt, level, layer, 1, 2326 ISL_AUX_STATE_COMPRESSED_CLEAR); 2327 break; 2328 2329 case ISL_AUX_STATE_COMPRESSED_NO_CLEAR: 2330 case ISL_AUX_STATE_COMPRESSED_CLEAR: 2331 assert(aux_usage == ISL_AUX_USAGE_HIZ); 2332 break; /* Nothing to do */ 2333 2334 case ISL_AUX_STATE_RESOLVED: 2335 if (aux_usage == ISL_AUX_USAGE_HIZ) { 2336 intel_miptree_set_aux_state(brw, mt, level, layer, 1, 2337 ISL_AUX_STATE_COMPRESSED_NO_CLEAR); 2338 } else { 2339 intel_miptree_set_aux_state(brw, mt, level, layer, 1, 2340 ISL_AUX_STATE_AUX_INVALID); 2341 } 2342 break; 2343 2344 case ISL_AUX_STATE_PASS_THROUGH: 2345 if (aux_usage == ISL_AUX_USAGE_HIZ) { 2346 intel_miptree_set_aux_state(brw, mt, level, layer, 1, 2347 ISL_AUX_STATE_COMPRESSED_NO_CLEAR); 2348 } 2349 break; 2350 2351 case ISL_AUX_STATE_AUX_INVALID: 2352 assert(aux_usage != ISL_AUX_USAGE_HIZ); 2353 break; 2354 2355 case ISL_AUX_STATE_PARTIAL_CLEAR: 2356 unreachable("Invalid HiZ state"); 2357 } 2358} 2359 2360void 2361intel_miptree_prepare_access(struct brw_context *brw, 2362 struct intel_mipmap_tree *mt, 2363 uint32_t start_level, uint32_t num_levels, 2364 uint32_t start_layer, uint32_t num_layers, 2365 enum isl_aux_usage aux_usage, 2366 bool fast_clear_supported) 2367{ 2368 num_levels = miptree_level_range_length(mt, start_level, num_levels); 2369 2370 switch (mt->aux_usage) { 2371 case ISL_AUX_USAGE_NONE: 2372 /* Nothing to do */ 2373 break; 2374 2375 case ISL_AUX_USAGE_MCS: 2376 assert(mt->aux_buf); 2377 assert(start_level == 0 && num_levels == 1); 2378 const uint32_t level_layers = 2379 miptree_layer_range_length(mt, 0, start_layer, num_layers); 2380 for (uint32_t a = 0; a < level_layers; a++) { 2381 intel_miptree_prepare_mcs_access(brw, mt, start_layer + a, 2382 aux_usage, fast_clear_supported); 2383 } 2384 break; 2385 2386 case ISL_AUX_USAGE_CCS_D: 2387 case ISL_AUX_USAGE_CCS_E: 2388 if (!mt->aux_buf) 2389 return; 2390 2391 for (uint32_t l = 0; l < num_levels; l++) { 2392 const uint32_t level = start_level + l; 2393 const uint32_t level_layers = 2394 miptree_layer_range_length(mt, level, start_layer, num_layers); 2395 for (uint32_t a = 0; a < level_layers; a++) { 2396 intel_miptree_prepare_ccs_access(brw, mt, level, 2397 start_layer + a, 2398 aux_usage, fast_clear_supported); 2399 } 2400 } 2401 break; 2402 2403 case ISL_AUX_USAGE_HIZ: 2404 assert(mt->aux_buf); 2405 for (uint32_t l = 0; l < num_levels; l++) { 2406 const uint32_t level = start_level + l; 2407 if (!intel_miptree_level_has_hiz(mt, level)) 2408 continue; 2409 2410 const uint32_t level_layers = 2411 miptree_layer_range_length(mt, level, start_layer, num_layers); 2412 for (uint32_t a = 0; a < level_layers; a++) { 2413 intel_miptree_prepare_hiz_access(brw, mt, level, start_layer + a, 2414 aux_usage, fast_clear_supported); 2415 } 2416 } 2417 break; 2418 2419 default: 2420 unreachable("Invalid aux usage"); 2421 } 2422} 2423 2424void 2425intel_miptree_finish_write(struct brw_context *brw, 2426 struct intel_mipmap_tree *mt, uint32_t level, 2427 uint32_t start_layer, uint32_t num_layers, 2428 enum isl_aux_usage aux_usage) 2429{ 2430 const struct gen_device_info *devinfo = &brw->screen->devinfo; 2431 num_layers = miptree_layer_range_length(mt, level, start_layer, num_layers); 2432 2433 switch (mt->aux_usage) { 2434 case ISL_AUX_USAGE_NONE: 2435 if (mt->format == MESA_FORMAT_S_UINT8 && devinfo->gen <= 7) { 2436 mt->shadow_needs_update = true; 2437 } else if (intel_miptree_has_etc_shadow(brw, mt)) { 2438 mt->shadow_needs_update = true; 2439 } 2440 break; 2441 2442 case ISL_AUX_USAGE_MCS: 2443 assert(mt->aux_buf); 2444 for (uint32_t a = 0; a < num_layers; a++) { 2445 intel_miptree_finish_mcs_write(brw, mt, start_layer + a, 2446 aux_usage); 2447 } 2448 break; 2449 2450 case ISL_AUX_USAGE_CCS_D: 2451 case ISL_AUX_USAGE_CCS_E: 2452 if (!mt->aux_buf) 2453 return; 2454 2455 for (uint32_t a = 0; a < num_layers; a++) { 2456 intel_miptree_finish_ccs_write(brw, mt, level, start_layer + a, 2457 aux_usage); 2458 } 2459 break; 2460 2461 case ISL_AUX_USAGE_HIZ: 2462 if (!intel_miptree_level_has_hiz(mt, level)) 2463 return; 2464 2465 for (uint32_t a = 0; a < num_layers; a++) { 2466 intel_miptree_finish_hiz_write(brw, mt, level, start_layer + a, 2467 aux_usage); 2468 } 2469 break; 2470 2471 default: 2472 unreachable("Invavlid aux usage"); 2473 } 2474} 2475 2476enum isl_aux_state 2477intel_miptree_get_aux_state(const struct intel_mipmap_tree *mt, 2478 uint32_t level, uint32_t layer) 2479{ 2480 intel_miptree_check_level_layer(mt, level, layer); 2481 2482 if (_mesa_is_format_color_format(mt->format)) { 2483 assert(mt->aux_buf != NULL); 2484 assert(mt->surf.samples == 1 || 2485 mt->surf.msaa_layout == ISL_MSAA_LAYOUT_ARRAY); 2486 } else if (mt->format == MESA_FORMAT_S_UINT8) { 2487 unreachable("Cannot get aux state for stencil"); 2488 } else { 2489 assert(intel_miptree_level_has_hiz(mt, level)); 2490 } 2491 2492 return mt->aux_state[level][layer]; 2493} 2494 2495void 2496intel_miptree_set_aux_state(struct brw_context *brw, 2497 struct intel_mipmap_tree *mt, uint32_t level, 2498 uint32_t start_layer, uint32_t num_layers, 2499 enum isl_aux_state aux_state) 2500{ 2501 num_layers = miptree_layer_range_length(mt, level, start_layer, num_layers); 2502 2503 if (_mesa_is_format_color_format(mt->format)) { 2504 assert(mt->aux_buf != NULL); 2505 assert(mt->surf.samples == 1 || 2506 mt->surf.msaa_layout == ISL_MSAA_LAYOUT_ARRAY); 2507 } else if (mt->format == MESA_FORMAT_S_UINT8) { 2508 unreachable("Cannot get aux state for stencil"); 2509 } else { 2510 assert(intel_miptree_level_has_hiz(mt, level)); 2511 } 2512 2513 for (unsigned a = 0; a < num_layers; a++) { 2514 if (mt->aux_state[level][start_layer + a] != aux_state) { 2515 mt->aux_state[level][start_layer + a] = aux_state; 2516 brw->ctx.NewDriverState |= BRW_NEW_AUX_STATE; 2517 } 2518 } 2519} 2520 2521/* On Gen9 color buffers may be compressed by the hardware (lossless 2522 * compression). There are, however, format restrictions and care needs to be 2523 * taken that the sampler engine is capable for re-interpreting a buffer with 2524 * format different the buffer was originally written with. 2525 * 2526 * For example, SRGB formats are not compressible and the sampler engine isn't 2527 * capable of treating RGBA_UNORM as SRGB_ALPHA. In such a case the underlying 2528 * color buffer needs to be resolved so that the sampling surface can be 2529 * sampled as non-compressed (i.e., without the auxiliary MCS buffer being 2530 * set). 2531 */ 2532static bool 2533can_texture_with_ccs(struct brw_context *brw, 2534 struct intel_mipmap_tree *mt, 2535 enum isl_format view_format) 2536{ 2537 if (mt->aux_usage != ISL_AUX_USAGE_CCS_E) 2538 return false; 2539 2540 if (!format_ccs_e_compat_with_miptree(&brw->screen->devinfo, 2541 mt, view_format)) { 2542 perf_debug("Incompatible sampling format (%s) for rbc (%s)\n", 2543 isl_format_get_layout(view_format)->name, 2544 _mesa_get_format_name(mt->format)); 2545 return false; 2546 } 2547 2548 return true; 2549} 2550 2551enum isl_aux_usage 2552intel_miptree_texture_aux_usage(struct brw_context *brw, 2553 struct intel_mipmap_tree *mt, 2554 enum isl_format view_format, 2555 enum gen9_astc5x5_wa_tex_type astc5x5_wa_bits) 2556{ 2557 assert(brw->screen->devinfo.gen == 9 || astc5x5_wa_bits == 0); 2558 2559 /* On gen9, ASTC 5x5 textures cannot live in the sampler cache along side 2560 * CCS or HiZ compressed textures. See gen9_apply_astc5x5_wa_flush() for 2561 * details. 2562 */ 2563 if ((astc5x5_wa_bits & GEN9_ASTC5X5_WA_TEX_TYPE_ASTC5x5) && 2564 mt->aux_usage != ISL_AUX_USAGE_MCS) 2565 return ISL_AUX_USAGE_NONE; 2566 2567 switch (mt->aux_usage) { 2568 case ISL_AUX_USAGE_HIZ: 2569 if (intel_miptree_sample_with_hiz(brw, mt)) 2570 return ISL_AUX_USAGE_HIZ; 2571 break; 2572 2573 case ISL_AUX_USAGE_MCS: 2574 return ISL_AUX_USAGE_MCS; 2575 2576 case ISL_AUX_USAGE_CCS_D: 2577 case ISL_AUX_USAGE_CCS_E: 2578 if (!mt->aux_buf) { 2579 assert(mt->aux_usage == ISL_AUX_USAGE_CCS_D); 2580 return ISL_AUX_USAGE_NONE; 2581 } 2582 2583 /* If we don't have any unresolved color, report an aux usage of 2584 * ISL_AUX_USAGE_NONE. This way, texturing won't even look at the 2585 * aux surface and we can save some bandwidth. 2586 */ 2587 if (!intel_miptree_has_color_unresolved(mt, 0, INTEL_REMAINING_LEVELS, 2588 0, INTEL_REMAINING_LAYERS)) 2589 return ISL_AUX_USAGE_NONE; 2590 2591 if (can_texture_with_ccs(brw, mt, view_format)) 2592 return ISL_AUX_USAGE_CCS_E; 2593 break; 2594 2595 default: 2596 break; 2597 } 2598 2599 return ISL_AUX_USAGE_NONE; 2600} 2601 2602static bool 2603isl_formats_are_fast_clear_compatible(enum isl_format a, enum isl_format b) 2604{ 2605 /* On gen8 and earlier, the hardware was only capable of handling 0/1 clear 2606 * values so sRGB curve application was a no-op for all fast-clearable 2607 * formats. 2608 * 2609 * On gen9+, the hardware supports arbitrary clear values. For sRGB clear 2610 * values, the hardware interprets the floats, not as what would be 2611 * returned from the sampler (or written by the shader), but as being 2612 * between format conversion and sRGB curve application. This means that 2613 * we can switch between sRGB and UNORM without having to whack the clear 2614 * color. 2615 */ 2616 return isl_format_srgb_to_linear(a) == isl_format_srgb_to_linear(b); 2617} 2618 2619void 2620intel_miptree_prepare_texture(struct brw_context *brw, 2621 struct intel_mipmap_tree *mt, 2622 enum isl_format view_format, 2623 uint32_t start_level, uint32_t num_levels, 2624 uint32_t start_layer, uint32_t num_layers, 2625 enum gen9_astc5x5_wa_tex_type astc5x5_wa_bits) 2626{ 2627 enum isl_aux_usage aux_usage = 2628 intel_miptree_texture_aux_usage(brw, mt, view_format, astc5x5_wa_bits); 2629 2630 bool clear_supported = aux_usage != ISL_AUX_USAGE_NONE; 2631 2632 /* Clear color is specified as ints or floats and the conversion is done by 2633 * the sampler. If we have a texture view, we would have to perform the 2634 * clear color conversion manually. Just disable clear color. 2635 */ 2636 if (!isl_formats_are_fast_clear_compatible(mt->surf.format, view_format)) 2637 clear_supported = false; 2638 2639 intel_miptree_prepare_access(brw, mt, start_level, num_levels, 2640 start_layer, num_layers, 2641 aux_usage, clear_supported); 2642} 2643 2644void 2645intel_miptree_prepare_image(struct brw_context *brw, 2646 struct intel_mipmap_tree *mt) 2647{ 2648 /* The data port doesn't understand any compression */ 2649 intel_miptree_prepare_access(brw, mt, 0, INTEL_REMAINING_LEVELS, 2650 0, INTEL_REMAINING_LAYERS, 2651 ISL_AUX_USAGE_NONE, false); 2652} 2653 2654enum isl_aux_usage 2655intel_miptree_render_aux_usage(struct brw_context *brw, 2656 struct intel_mipmap_tree *mt, 2657 enum isl_format render_format, 2658 bool blend_enabled, 2659 bool draw_aux_disabled) 2660{ 2661 struct gen_device_info *devinfo = &brw->screen->devinfo; 2662 2663 if (draw_aux_disabled) 2664 return ISL_AUX_USAGE_NONE; 2665 2666 switch (mt->aux_usage) { 2667 case ISL_AUX_USAGE_MCS: 2668 assert(mt->aux_buf); 2669 return ISL_AUX_USAGE_MCS; 2670 2671 case ISL_AUX_USAGE_CCS_D: 2672 case ISL_AUX_USAGE_CCS_E: 2673 if (!mt->aux_buf) { 2674 assert(mt->aux_usage == ISL_AUX_USAGE_CCS_D); 2675 return ISL_AUX_USAGE_NONE; 2676 } 2677 2678 /* gen9+ hardware technically supports non-0/1 clear colors with sRGB 2679 * formats. However, there are issues with blending where it doesn't 2680 * properly apply the sRGB curve to the clear color when blending. 2681 */ 2682 if (devinfo->gen >= 9 && blend_enabled && 2683 isl_format_is_srgb(render_format) && 2684 !isl_color_value_is_zero_one(mt->fast_clear_color, render_format)) 2685 return ISL_AUX_USAGE_NONE; 2686 2687 if (mt->aux_usage == ISL_AUX_USAGE_CCS_E && 2688 format_ccs_e_compat_with_miptree(&brw->screen->devinfo, 2689 mt, render_format)) 2690 return ISL_AUX_USAGE_CCS_E; 2691 2692 /* Otherwise, we have to fall back to CCS_D */ 2693 return ISL_AUX_USAGE_CCS_D; 2694 2695 default: 2696 return ISL_AUX_USAGE_NONE; 2697 } 2698} 2699 2700void 2701intel_miptree_prepare_render(struct brw_context *brw, 2702 struct intel_mipmap_tree *mt, uint32_t level, 2703 uint32_t start_layer, uint32_t layer_count, 2704 enum isl_aux_usage aux_usage) 2705{ 2706 intel_miptree_prepare_access(brw, mt, level, 1, start_layer, layer_count, 2707 aux_usage, aux_usage != ISL_AUX_USAGE_NONE); 2708} 2709 2710void 2711intel_miptree_finish_render(struct brw_context *brw, 2712 struct intel_mipmap_tree *mt, uint32_t level, 2713 uint32_t start_layer, uint32_t layer_count, 2714 enum isl_aux_usage aux_usage) 2715{ 2716 assert(_mesa_is_format_color_format(mt->format)); 2717 2718 intel_miptree_finish_write(brw, mt, level, start_layer, layer_count, 2719 aux_usage); 2720} 2721 2722void 2723intel_miptree_prepare_depth(struct brw_context *brw, 2724 struct intel_mipmap_tree *mt, uint32_t level, 2725 uint32_t start_layer, uint32_t layer_count) 2726{ 2727 intel_miptree_prepare_access(brw, mt, level, 1, start_layer, layer_count, 2728 mt->aux_usage, mt->aux_buf != NULL); 2729} 2730 2731void 2732intel_miptree_finish_depth(struct brw_context *brw, 2733 struct intel_mipmap_tree *mt, uint32_t level, 2734 uint32_t start_layer, uint32_t layer_count, 2735 bool depth_written) 2736{ 2737 if (depth_written) { 2738 intel_miptree_finish_write(brw, mt, level, start_layer, layer_count, 2739 mt->aux_usage); 2740 } 2741} 2742 2743void 2744intel_miptree_prepare_external(struct brw_context *brw, 2745 struct intel_mipmap_tree *mt) 2746{ 2747 enum isl_aux_usage aux_usage = ISL_AUX_USAGE_NONE; 2748 bool supports_fast_clear = false; 2749 2750 const struct isl_drm_modifier_info *mod_info = 2751 isl_drm_modifier_get_info(mt->drm_modifier); 2752 2753 if (mod_info && mod_info->aux_usage != ISL_AUX_USAGE_NONE) { 2754 /* CCS_E is the only supported aux for external images and it's only 2755 * supported on very simple images. 2756 */ 2757 assert(mod_info->aux_usage == ISL_AUX_USAGE_CCS_E); 2758 assert(_mesa_is_format_color_format(mt->format)); 2759 assert(mt->first_level == 0 && mt->last_level == 0); 2760 assert(mt->surf.logical_level0_px.depth == 1); 2761 assert(mt->surf.logical_level0_px.array_len == 1); 2762 assert(mt->surf.samples == 1); 2763 assert(mt->aux_buf != NULL); 2764 2765 aux_usage = mod_info->aux_usage; 2766 supports_fast_clear = mod_info->supports_clear_color; 2767 } 2768 2769 intel_miptree_prepare_access(brw, mt, 0, INTEL_REMAINING_LEVELS, 2770 0, INTEL_REMAINING_LAYERS, 2771 aux_usage, supports_fast_clear); 2772} 2773 2774void 2775intel_miptree_finish_external(struct brw_context *brw, 2776 struct intel_mipmap_tree *mt) 2777{ 2778 if (!mt->aux_buf) 2779 return; 2780 2781 /* We don't know the actual aux state of the aux surface. The previous 2782 * owner could have given it to us in a number of different states. 2783 * Because we don't know the aux state, we reset the aux state to the 2784 * least common denominator of possible valid states. 2785 */ 2786 enum isl_aux_state default_aux_state = 2787 isl_drm_modifier_get_default_aux_state(mt->drm_modifier); 2788 assert(mt->last_level == mt->first_level); 2789 intel_miptree_set_aux_state(brw, mt, 0, 0, INTEL_REMAINING_LAYERS, 2790 default_aux_state); 2791} 2792 2793/** 2794 * Make it possible to share the BO backing the given miptree with another 2795 * process or another miptree. 2796 * 2797 * Fast color clears are unsafe with shared buffers, so we need to resolve and 2798 * then discard the MCS buffer, if present. We also set the no_ccs flag to 2799 * ensure that no MCS buffer gets allocated in the future. 2800 * 2801 * HiZ is similarly unsafe with shared buffers. 2802 */ 2803void 2804intel_miptree_make_shareable(struct brw_context *brw, 2805 struct intel_mipmap_tree *mt) 2806{ 2807 /* MCS buffers are also used for multisample buffers, but we can't resolve 2808 * away a multisample MCS buffer because it's an integral part of how the 2809 * pixel data is stored. Fortunately this code path should never be 2810 * reached for multisample buffers. 2811 */ 2812 assert(mt->surf.msaa_layout == ISL_MSAA_LAYOUT_NONE || 2813 mt->surf.samples == 1); 2814 2815 intel_miptree_prepare_access(brw, mt, 0, INTEL_REMAINING_LEVELS, 2816 0, INTEL_REMAINING_LAYERS, 2817 ISL_AUX_USAGE_NONE, false); 2818 2819 if (mt->aux_buf) { 2820 intel_miptree_aux_buffer_free(mt->aux_buf); 2821 mt->aux_buf = NULL; 2822 2823 /* Make future calls of intel_miptree_level_has_hiz() return false. */ 2824 for (uint32_t l = mt->first_level; l <= mt->last_level; ++l) { 2825 mt->level[l].has_hiz = false; 2826 } 2827 2828 free(mt->aux_state); 2829 mt->aux_state = NULL; 2830 brw->ctx.NewDriverState |= BRW_NEW_AUX_STATE; 2831 } 2832 2833 mt->aux_usage = ISL_AUX_USAGE_NONE; 2834 mt->supports_fast_clear = false; 2835} 2836 2837 2838/** 2839 * \brief Get pointer offset into stencil buffer. 2840 * 2841 * The stencil buffer is W tiled. Since the GTT is incapable of W fencing, we 2842 * must decode the tile's layout in software. 2843 * 2844 * See 2845 * - PRM, 2011 Sandy Bridge, Volume 1, Part 2, Section 4.5.2.1 W-Major Tile 2846 * Format. 2847 * - PRM, 2011 Sandy Bridge, Volume 1, Part 2, Section 4.5.3 Tiling Algorithm 2848 * 2849 * Even though the returned offset is always positive, the return type is 2850 * signed due to 2851 * commit e8b1c6d6f55f5be3bef25084fdd8b6127517e137 2852 * mesa: Fix return type of _mesa_get_format_bytes() (#37351) 2853 */ 2854static intptr_t 2855intel_offset_S8(uint32_t stride, uint32_t x, uint32_t y, bool swizzled) 2856{ 2857 uint32_t tile_size = 4096; 2858 uint32_t tile_width = 64; 2859 uint32_t tile_height = 64; 2860 uint32_t row_size = 64 * stride / 2; /* Two rows are interleaved. */ 2861 2862 uint32_t tile_x = x / tile_width; 2863 uint32_t tile_y = y / tile_height; 2864 2865 /* The byte's address relative to the tile's base addres. */ 2866 uint32_t byte_x = x % tile_width; 2867 uint32_t byte_y = y % tile_height; 2868 2869 uintptr_t u = tile_y * row_size 2870 + tile_x * tile_size 2871 + 512 * (byte_x / 8) 2872 + 64 * (byte_y / 8) 2873 + 32 * ((byte_y / 4) % 2) 2874 + 16 * ((byte_x / 4) % 2) 2875 + 8 * ((byte_y / 2) % 2) 2876 + 4 * ((byte_x / 2) % 2) 2877 + 2 * (byte_y % 2) 2878 + 1 * (byte_x % 2); 2879 2880 if (swizzled) { 2881 /* adjust for bit6 swizzling */ 2882 if (((byte_x / 8) % 2) == 1) { 2883 if (((byte_y / 8) % 2) == 0) { 2884 u += 64; 2885 } else { 2886 u -= 64; 2887 } 2888 } 2889 } 2890 2891 return u; 2892} 2893 2894void 2895intel_miptree_updownsample(struct brw_context *brw, 2896 struct intel_mipmap_tree *src, 2897 struct intel_mipmap_tree *dst) 2898{ 2899 unsigned src_w = src->surf.logical_level0_px.width; 2900 unsigned src_h = src->surf.logical_level0_px.height; 2901 unsigned dst_w = dst->surf.logical_level0_px.width; 2902 unsigned dst_h = dst->surf.logical_level0_px.height; 2903 2904 brw_blorp_blit_miptrees(brw, 2905 src, 0 /* level */, 0 /* layer */, 2906 src->format, SWIZZLE_XYZW, 2907 dst, 0 /* level */, 0 /* layer */, dst->format, 2908 0, 0, src_w, src_h, 2909 0, 0, dst_w, dst_h, 2910 GL_NEAREST, false, false /*mirror x, y*/, 2911 false, false); 2912 2913 if (src->stencil_mt) { 2914 src_w = src->stencil_mt->surf.logical_level0_px.width; 2915 src_h = src->stencil_mt->surf.logical_level0_px.height; 2916 dst_w = dst->stencil_mt->surf.logical_level0_px.width; 2917 dst_h = dst->stencil_mt->surf.logical_level0_px.height; 2918 2919 brw_blorp_blit_miptrees(brw, 2920 src->stencil_mt, 0 /* level */, 0 /* layer */, 2921 src->stencil_mt->format, SWIZZLE_XYZW, 2922 dst->stencil_mt, 0 /* level */, 0 /* layer */, 2923 dst->stencil_mt->format, 2924 0, 0, src_w, src_h, 2925 0, 0, dst_w, dst_h, 2926 GL_NEAREST, false, false /*mirror x, y*/, 2927 false, false /* decode/encode srgb */); 2928 } 2929} 2930 2931void 2932intel_update_r8stencil(struct brw_context *brw, 2933 struct intel_mipmap_tree *mt) 2934{ 2935 const struct gen_device_info *devinfo = &brw->screen->devinfo; 2936 2937 assert(devinfo->gen >= 7); 2938 struct intel_mipmap_tree *src = 2939 mt->format == MESA_FORMAT_S_UINT8 ? mt : mt->stencil_mt; 2940 if (!src || devinfo->gen >= 8) 2941 return; 2942 2943 assert(src->surf.size_B > 0); 2944 2945 if (!mt->shadow_mt) { 2946 assert(devinfo->gen > 6); /* Handle MIPTREE_LAYOUT_GEN6_HIZ_STENCIL */ 2947 mt->shadow_mt = make_surface( 2948 brw, 2949 src->target, 2950 MESA_FORMAT_R_UINT8, 2951 src->first_level, src->last_level, 2952 src->surf.logical_level0_px.width, 2953 src->surf.logical_level0_px.height, 2954 src->surf.dim == ISL_SURF_DIM_3D ? 2955 src->surf.logical_level0_px.depth : 2956 src->surf.logical_level0_px.array_len, 2957 src->surf.samples, 2958 ISL_TILING_Y0_BIT, 2959 ISL_SURF_USAGE_TEXTURE_BIT, 2960 BO_ALLOC_BUSY, 0, NULL); 2961 assert(mt->shadow_mt); 2962 } 2963 2964 if (src->shadow_needs_update == false) 2965 return; 2966 2967 struct intel_mipmap_tree *dst = mt->shadow_mt; 2968 2969 for (int level = src->first_level; level <= src->last_level; level++) { 2970 const unsigned depth = src->surf.dim == ISL_SURF_DIM_3D ? 2971 minify(src->surf.phys_level0_sa.depth, level) : 2972 src->surf.phys_level0_sa.array_len; 2973 2974 for (unsigned layer = 0; layer < depth; layer++) { 2975 brw_blorp_copy_miptrees(brw, 2976 src, level, layer, 2977 dst, level, layer, 2978 0, 0, 0, 0, 2979 minify(src->surf.logical_level0_px.width, 2980 level), 2981 minify(src->surf.logical_level0_px.height, 2982 level)); 2983 } 2984 } 2985 2986 brw_cache_flush_for_read(brw, dst->bo); 2987 src->shadow_needs_update = false; 2988} 2989 2990static void * 2991intel_miptree_map_raw(struct brw_context *brw, 2992 struct intel_mipmap_tree *mt, 2993 GLbitfield mode) 2994{ 2995 struct brw_bo *bo = mt->bo; 2996 2997 if (brw_batch_references(&brw->batch, bo)) 2998 intel_batchbuffer_flush(brw); 2999 3000 return brw_bo_map(brw, bo, mode); 3001} 3002 3003static void 3004intel_miptree_unmap_raw(struct intel_mipmap_tree *mt) 3005{ 3006 brw_bo_unmap(mt->bo); 3007} 3008 3009static void 3010intel_miptree_unmap_map(struct brw_context *brw, 3011 struct intel_mipmap_tree *mt, 3012 struct intel_miptree_map *map, 3013 unsigned int level, unsigned int slice) 3014{ 3015 intel_miptree_unmap_raw(mt); 3016} 3017 3018static void 3019intel_miptree_map_map(struct brw_context *brw, 3020 struct intel_mipmap_tree *mt, 3021 struct intel_miptree_map *map, 3022 unsigned int level, unsigned int slice) 3023{ 3024 unsigned int bw, bh; 3025 void *base; 3026 unsigned int image_x, image_y; 3027 intptr_t x = map->x; 3028 intptr_t y = map->y; 3029 3030 /* For compressed formats, the stride is the number of bytes per 3031 * row of blocks. intel_miptree_get_image_offset() already does 3032 * the divide. 3033 */ 3034 _mesa_get_format_block_size(mt->format, &bw, &bh); 3035 assert(y % bh == 0); 3036 assert(x % bw == 0); 3037 y /= bh; 3038 x /= bw; 3039 3040 intel_miptree_access_raw(brw, mt, level, slice, 3041 map->mode & GL_MAP_WRITE_BIT); 3042 3043 base = intel_miptree_map_raw(brw, mt, map->mode); 3044 3045 if (base == NULL) 3046 map->ptr = NULL; 3047 else { 3048 base += mt->offset; 3049 3050 /* Note that in the case of cube maps, the caller must have passed the 3051 * slice number referencing the face. 3052 */ 3053 intel_miptree_get_image_offset(mt, level, slice, &image_x, &image_y); 3054 x += image_x; 3055 y += image_y; 3056 3057 map->stride = mt->surf.row_pitch_B; 3058 map->ptr = base + y * map->stride + x * mt->cpp; 3059 } 3060 3061 DBG("%s: %d,%d %dx%d from mt %p (%s) " 3062 "%"PRIiPTR",%"PRIiPTR" = %p/%d\n", __func__, 3063 map->x, map->y, map->w, map->h, 3064 mt, _mesa_get_format_name(mt->format), 3065 x, y, map->ptr, map->stride); 3066 3067 map->unmap = intel_miptree_unmap_map; 3068} 3069 3070static void 3071intel_miptree_unmap_blit(struct brw_context *brw, 3072 struct intel_mipmap_tree *mt, 3073 struct intel_miptree_map *map, 3074 unsigned int level, 3075 unsigned int slice) 3076{ 3077 const struct gen_device_info *devinfo = &brw->screen->devinfo; 3078 struct gl_context *ctx = &brw->ctx; 3079 3080 intel_miptree_unmap_raw(map->linear_mt); 3081 3082 if (map->mode & GL_MAP_WRITE_BIT) { 3083 if (devinfo->gen >= 6) { 3084 brw_blorp_copy_miptrees(brw, map->linear_mt, 0, 0, 3085 mt, level, slice, 3086 0, 0, map->x, map->y, map->w, map->h); 3087 } else { 3088 bool ok = intel_miptree_copy(brw, 3089 map->linear_mt, 0, 0, 0, 0, 3090 mt, level, slice, map->x, map->y, 3091 map->w, map->h); 3092 WARN_ONCE(!ok, "Failed to blit from linear temporary mapping"); 3093 } 3094 } 3095 3096 intel_miptree_release(&map->linear_mt); 3097} 3098 3099/* Compute extent parameters for use with tiled_memcpy functions. 3100 * xs are in units of bytes and ys are in units of strides. 3101 */ 3102static inline void 3103tile_extents(struct intel_mipmap_tree *mt, struct intel_miptree_map *map, 3104 unsigned int level, unsigned int slice, unsigned int *x1_B, 3105 unsigned int *x2_B, unsigned int *y1_el, unsigned int *y2_el) 3106{ 3107 unsigned int block_width, block_height; 3108 unsigned int x0_el, y0_el; 3109 3110 _mesa_get_format_block_size(mt->format, &block_width, &block_height); 3111 3112 assert(map->x % block_width == 0); 3113 assert(map->y % block_height == 0); 3114 3115 intel_miptree_get_image_offset(mt, level, slice, &x0_el, &y0_el); 3116 *x1_B = (map->x / block_width + x0_el) * mt->cpp; 3117 *y1_el = map->y / block_height + y0_el; 3118 *x2_B = (DIV_ROUND_UP(map->x + map->w, block_width) + x0_el) * mt->cpp; 3119 *y2_el = DIV_ROUND_UP(map->y + map->h, block_height) + y0_el; 3120} 3121 3122static void 3123intel_miptree_unmap_tiled_memcpy(struct brw_context *brw, 3124 struct intel_mipmap_tree *mt, 3125 struct intel_miptree_map *map, 3126 unsigned int level, 3127 unsigned int slice) 3128{ 3129 if (map->mode & GL_MAP_WRITE_BIT) { 3130 unsigned int x1, x2, y1, y2; 3131 tile_extents(mt, map, level, slice, &x1, &x2, &y1, &y2); 3132 3133 char *dst = intel_miptree_map_raw(brw, mt, map->mode | MAP_RAW); 3134 dst += mt->offset; 3135 3136 isl_memcpy_linear_to_tiled( 3137 x1, x2, y1, y2, dst, map->ptr, mt->surf.row_pitch_B, map->stride, 3138 brw->has_swizzling, mt->surf.tiling, ISL_MEMCPY); 3139 3140 intel_miptree_unmap_raw(mt); 3141 } 3142 _mesa_align_free(map->buffer); 3143 map->buffer = map->ptr = NULL; 3144} 3145 3146/** 3147 * Determine which copy function to use for the given format combination 3148 * 3149 * The only two possible copy functions which are ever returned are a 3150 * direct memcpy and a RGBA <-> BGRA copy function. Since RGBA -> BGRA and 3151 * BGRA -> RGBA are exactly the same operation (and memcpy is obviously 3152 * symmetric), it doesn't matter whether the copy is from the tiled image 3153 * to the untiled or vice versa. The copy function required is the same in 3154 * either case so this function can be used. 3155 * 3156 * \param[in] tiledFormat The format of the tiled image 3157 * \param[in] format The GL format of the client data 3158 * \param[in] type The GL type of the client data 3159 * \param[out] mem_copy Will be set to one of either the standard 3160 * library's memcpy or a different copy function 3161 * that performs an RGBA to BGRA conversion 3162 * \param[out] cpp Number of bytes per channel 3163 * 3164 * \return true if the format and type combination are valid 3165 */ 3166MAYBE_UNUSED isl_memcpy_type 3167intel_miptree_get_memcpy_type(mesa_format tiledFormat, GLenum format, GLenum type, 3168 uint32_t *cpp) 3169{ 3170 if (type == GL_UNSIGNED_INT_8_8_8_8_REV && 3171 !(format == GL_RGBA || format == GL_BGRA)) 3172 return ISL_MEMCPY_INVALID; /* Invalid type/format combination */ 3173 3174 if ((tiledFormat == MESA_FORMAT_L_UNORM8 && format == GL_LUMINANCE) || 3175 (tiledFormat == MESA_FORMAT_A_UNORM8 && format == GL_ALPHA)) { 3176 *cpp = 1; 3177 return ISL_MEMCPY; 3178 } else if ((tiledFormat == MESA_FORMAT_B8G8R8A8_UNORM) || 3179 (tiledFormat == MESA_FORMAT_B8G8R8X8_UNORM) || 3180 (tiledFormat == MESA_FORMAT_B8G8R8A8_SRGB) || 3181 (tiledFormat == MESA_FORMAT_B8G8R8X8_SRGB)) { 3182 *cpp = 4; 3183 if (format == GL_BGRA) { 3184 return ISL_MEMCPY; 3185 } else if (format == GL_RGBA) { 3186 return ISL_MEMCPY_BGRA8; 3187 } 3188 } else if ((tiledFormat == MESA_FORMAT_R8G8B8A8_UNORM) || 3189 (tiledFormat == MESA_FORMAT_R8G8B8X8_UNORM) || 3190 (tiledFormat == MESA_FORMAT_R8G8B8A8_SRGB) || 3191 (tiledFormat == MESA_FORMAT_R8G8B8X8_SRGB)) { 3192 *cpp = 4; 3193 if (format == GL_BGRA) { 3194 /* Copying from RGBA to BGRA is the same as BGRA to RGBA so we can 3195 * use the same function. 3196 */ 3197 return ISL_MEMCPY_BGRA8; 3198 } else if (format == GL_RGBA) { 3199 return ISL_MEMCPY; 3200 } 3201 } 3202 3203 return ISL_MEMCPY_INVALID; 3204} 3205 3206static void 3207intel_miptree_map_tiled_memcpy(struct brw_context *brw, 3208 struct intel_mipmap_tree *mt, 3209 struct intel_miptree_map *map, 3210 unsigned int level, unsigned int slice) 3211{ 3212 intel_miptree_access_raw(brw, mt, level, slice, 3213 map->mode & GL_MAP_WRITE_BIT); 3214 3215 unsigned int x1, x2, y1, y2; 3216 tile_extents(mt, map, level, slice, &x1, &x2, &y1, &y2); 3217 map->stride = ALIGN(_mesa_format_row_stride(mt->format, map->w), 16); 3218 3219 /* The tiling and detiling functions require that the linear buffer 3220 * has proper 16-byte alignment (that is, its `x0` is 16-byte 3221 * aligned). Here we over-allocate the linear buffer by enough 3222 * bytes to get the proper alignment. 3223 */ 3224 map->buffer = _mesa_align_malloc(map->stride * (y2 - y1) + (x1 & 0xf), 16); 3225 map->ptr = (char *)map->buffer + (x1 & 0xf); 3226 assert(map->buffer); 3227 3228 if (!(map->mode & GL_MAP_INVALIDATE_RANGE_BIT)) { 3229 char *src = intel_miptree_map_raw(brw, mt, map->mode | MAP_RAW); 3230 src += mt->offset; 3231 3232 const isl_memcpy_type copy_type = 3233#if defined(USE_SSE41) 3234 cpu_has_sse4_1 ? ISL_MEMCPY_STREAMING_LOAD : 3235#endif 3236 ISL_MEMCPY; 3237 3238 isl_memcpy_tiled_to_linear( 3239 x1, x2, y1, y2, map->ptr, src, map->stride, 3240 mt->surf.row_pitch_B, brw->has_swizzling, mt->surf.tiling, 3241 copy_type); 3242 3243 intel_miptree_unmap_raw(mt); 3244 } 3245 3246 map->unmap = intel_miptree_unmap_tiled_memcpy; 3247} 3248 3249static void 3250intel_miptree_map_blit(struct brw_context *brw, 3251 struct intel_mipmap_tree *mt, 3252 struct intel_miptree_map *map, 3253 unsigned int level, unsigned int slice) 3254{ 3255 const struct gen_device_info *devinfo = &brw->screen->devinfo; 3256 map->linear_mt = make_surface(brw, GL_TEXTURE_2D, mt->format, 3257 0, 0, map->w, map->h, 1, 1, 3258 ISL_TILING_LINEAR_BIT, 3259 ISL_SURF_USAGE_RENDER_TARGET_BIT | 3260 ISL_SURF_USAGE_TEXTURE_BIT, 3261 0, 0, NULL); 3262 3263 if (!map->linear_mt) { 3264 fprintf(stderr, "Failed to allocate blit temporary\n"); 3265 goto fail; 3266 } 3267 map->stride = map->linear_mt->surf.row_pitch_B; 3268 3269 /* One of either READ_BIT or WRITE_BIT or both is set. READ_BIT implies no 3270 * INVALIDATE_RANGE_BIT. WRITE_BIT needs the original values read in unless 3271 * invalidate is set, since we'll be writing the whole rectangle from our 3272 * temporary buffer back out. 3273 */ 3274 if (!(map->mode & GL_MAP_INVALIDATE_RANGE_BIT)) { 3275 if (devinfo->gen >= 6) { 3276 brw_blorp_copy_miptrees(brw, mt, level, slice, 3277 map->linear_mt, 0, 0, 3278 map->x, map->y, 0, 0, map->w, map->h); 3279 } else { 3280 if (!intel_miptree_copy(brw, 3281 mt, level, slice, map->x, map->y, 3282 map->linear_mt, 0, 0, 0, 0, 3283 map->w, map->h)) { 3284 fprintf(stderr, "Failed to blit\n"); 3285 goto fail; 3286 } 3287 } 3288 } 3289 3290 map->ptr = intel_miptree_map_raw(brw, map->linear_mt, map->mode); 3291 3292 DBG("%s: %d,%d %dx%d from mt %p (%s) %d,%d = %p/%d\n", __func__, 3293 map->x, map->y, map->w, map->h, 3294 mt, _mesa_get_format_name(mt->format), 3295 level, slice, map->ptr, map->stride); 3296 3297 map->unmap = intel_miptree_unmap_blit; 3298 return; 3299 3300fail: 3301 intel_miptree_release(&map->linear_mt); 3302 map->ptr = NULL; 3303 map->stride = 0; 3304} 3305 3306/** 3307 * "Map" a buffer by copying it to an untiled temporary using MOVNTDQA. 3308 */ 3309#if defined(USE_SSE41) 3310static void 3311intel_miptree_unmap_movntdqa(struct brw_context *brw, 3312 struct intel_mipmap_tree *mt, 3313 struct intel_miptree_map *map, 3314 unsigned int level, 3315 unsigned int slice) 3316{ 3317 _mesa_align_free(map->buffer); 3318 map->buffer = NULL; 3319 map->ptr = NULL; 3320} 3321 3322static void 3323intel_miptree_map_movntdqa(struct brw_context *brw, 3324 struct intel_mipmap_tree *mt, 3325 struct intel_miptree_map *map, 3326 unsigned int level, unsigned int slice) 3327{ 3328 assert(map->mode & GL_MAP_READ_BIT); 3329 assert(!(map->mode & GL_MAP_WRITE_BIT)); 3330 3331 intel_miptree_access_raw(brw, mt, level, slice, false); 3332 3333 DBG("%s: %d,%d %dx%d from mt %p (%s) %d,%d = %p/%d\n", __func__, 3334 map->x, map->y, map->w, map->h, 3335 mt, _mesa_get_format_name(mt->format), 3336 level, slice, map->ptr, map->stride); 3337 3338 /* Map the original image */ 3339 uint32_t image_x; 3340 uint32_t image_y; 3341 intel_miptree_get_image_offset(mt, level, slice, &image_x, &image_y); 3342 image_x += map->x; 3343 image_y += map->y; 3344 3345 void *src = intel_miptree_map_raw(brw, mt, map->mode); 3346 if (!src) 3347 return; 3348 3349 src += mt->offset; 3350 3351 src += image_y * mt->surf.row_pitch_B; 3352 src += image_x * mt->cpp; 3353 3354 /* Due to the pixel offsets for the particular image being mapped, our 3355 * src pointer may not be 16-byte aligned. However, if the pitch is 3356 * divisible by 16, then the amount by which it's misaligned will remain 3357 * consistent from row to row. 3358 */ 3359 assert((mt->surf.row_pitch_B % 16) == 0); 3360 const int misalignment = ((uintptr_t) src) & 15; 3361 3362 /* Create an untiled temporary buffer for the mapping. */ 3363 const unsigned width_bytes = _mesa_format_row_stride(mt->format, map->w); 3364 3365 map->stride = ALIGN(misalignment + width_bytes, 16); 3366 3367 map->buffer = _mesa_align_malloc(map->stride * map->h, 16); 3368 /* Offset the destination so it has the same misalignment as src. */ 3369 map->ptr = map->buffer + misalignment; 3370 3371 assert((((uintptr_t) map->ptr) & 15) == misalignment); 3372 3373 for (uint32_t y = 0; y < map->h; y++) { 3374 void *dst_ptr = map->ptr + y * map->stride; 3375 void *src_ptr = src + y * mt->surf.row_pitch_B; 3376 3377 _mesa_streaming_load_memcpy(dst_ptr, src_ptr, width_bytes); 3378 } 3379 3380 intel_miptree_unmap_raw(mt); 3381 3382 map->unmap = intel_miptree_unmap_movntdqa; 3383} 3384#endif 3385 3386static void 3387intel_miptree_unmap_s8(struct brw_context *brw, 3388 struct intel_mipmap_tree *mt, 3389 struct intel_miptree_map *map, 3390 unsigned int level, 3391 unsigned int slice) 3392{ 3393 if (map->mode & GL_MAP_WRITE_BIT) { 3394 unsigned int image_x, image_y; 3395 uint8_t *untiled_s8_map = map->ptr; 3396 uint8_t *tiled_s8_map = intel_miptree_map_raw(brw, mt, GL_MAP_WRITE_BIT); 3397 3398 intel_miptree_get_image_offset(mt, level, slice, &image_x, &image_y); 3399 3400 for (uint32_t y = 0; y < map->h; y++) { 3401 for (uint32_t x = 0; x < map->w; x++) { 3402 ptrdiff_t offset = intel_offset_S8(mt->surf.row_pitch_B, 3403 image_x + x + map->x, 3404 image_y + y + map->y, 3405 brw->has_swizzling); 3406 tiled_s8_map[offset] = untiled_s8_map[y * map->w + x]; 3407 } 3408 } 3409 3410 intel_miptree_unmap_raw(mt); 3411 } 3412 3413 free(map->buffer); 3414} 3415 3416static void 3417intel_miptree_map_s8(struct brw_context *brw, 3418 struct intel_mipmap_tree *mt, 3419 struct intel_miptree_map *map, 3420 unsigned int level, unsigned int slice) 3421{ 3422 map->stride = map->w; 3423 map->buffer = map->ptr = malloc(map->stride * map->h); 3424 if (!map->buffer) 3425 return; 3426 3427 intel_miptree_access_raw(brw, mt, level, slice, 3428 map->mode & GL_MAP_WRITE_BIT); 3429 3430 /* One of either READ_BIT or WRITE_BIT or both is set. READ_BIT implies no 3431 * INVALIDATE_RANGE_BIT. WRITE_BIT needs the original values read in unless 3432 * invalidate is set, since we'll be writing the whole rectangle from our 3433 * temporary buffer back out. 3434 */ 3435 if (!(map->mode & GL_MAP_INVALIDATE_RANGE_BIT)) { 3436 uint8_t *untiled_s8_map = map->ptr; 3437 uint8_t *tiled_s8_map = intel_miptree_map_raw(brw, mt, GL_MAP_READ_BIT); 3438 unsigned int image_x, image_y; 3439 3440 intel_miptree_get_image_offset(mt, level, slice, &image_x, &image_y); 3441 3442 for (uint32_t y = 0; y < map->h; y++) { 3443 for (uint32_t x = 0; x < map->w; x++) { 3444 ptrdiff_t offset = intel_offset_S8(mt->surf.row_pitch_B, 3445 x + image_x + map->x, 3446 y + image_y + map->y, 3447 brw->has_swizzling); 3448 untiled_s8_map[y * map->w + x] = tiled_s8_map[offset]; 3449 } 3450 } 3451 3452 intel_miptree_unmap_raw(mt); 3453 3454 DBG("%s: %d,%d %dx%d from mt %p %d,%d = %p/%d\n", __func__, 3455 map->x, map->y, map->w, map->h, 3456 mt, map->x + image_x, map->y + image_y, map->ptr, map->stride); 3457 } else { 3458 DBG("%s: %d,%d %dx%d from mt %p = %p/%d\n", __func__, 3459 map->x, map->y, map->w, map->h, 3460 mt, map->ptr, map->stride); 3461 } 3462 3463 map->unmap = intel_miptree_unmap_s8; 3464} 3465 3466/** 3467 * Mapping functions for packed depth/stencil miptrees backed by real separate 3468 * miptrees for depth and stencil. 3469 * 3470 * On gen7, and to support HiZ pre-gen7, we have to have the stencil buffer 3471 * separate from the depth buffer. Yet at the GL API level, we have to expose 3472 * packed depth/stencil textures and FBO attachments, and Mesa core expects to 3473 * be able to map that memory for texture storage and glReadPixels-type 3474 * operations. We give Mesa core that access by mallocing a temporary and 3475 * copying the data between the actual backing store and the temporary. 3476 */ 3477static void 3478intel_miptree_unmap_depthstencil(struct brw_context *brw, 3479 struct intel_mipmap_tree *mt, 3480 struct intel_miptree_map *map, 3481 unsigned int level, 3482 unsigned int slice) 3483{ 3484 struct intel_mipmap_tree *z_mt = mt; 3485 struct intel_mipmap_tree *s_mt = mt->stencil_mt; 3486 bool map_z32f_x24s8 = mt->format == MESA_FORMAT_Z_FLOAT32; 3487 3488 if (map->mode & GL_MAP_WRITE_BIT) { 3489 uint32_t *packed_map = map->ptr; 3490 uint8_t *s_map = intel_miptree_map_raw(brw, s_mt, GL_MAP_WRITE_BIT); 3491 uint32_t *z_map = intel_miptree_map_raw(brw, z_mt, GL_MAP_WRITE_BIT); 3492 unsigned int s_image_x, s_image_y; 3493 unsigned int z_image_x, z_image_y; 3494 3495 intel_miptree_get_image_offset(s_mt, level, slice, 3496 &s_image_x, &s_image_y); 3497 intel_miptree_get_image_offset(z_mt, level, slice, 3498 &z_image_x, &z_image_y); 3499 3500 for (uint32_t y = 0; y < map->h; y++) { 3501 for (uint32_t x = 0; x < map->w; x++) { 3502 ptrdiff_t s_offset = intel_offset_S8(s_mt->surf.row_pitch_B, 3503 x + s_image_x + map->x, 3504 y + s_image_y + map->y, 3505 brw->has_swizzling); 3506 ptrdiff_t z_offset = ((y + z_image_y + map->y) * 3507 (z_mt->surf.row_pitch_B / 4) + 3508 (x + z_image_x + map->x)); 3509 3510 if (map_z32f_x24s8) { 3511 z_map[z_offset] = packed_map[(y * map->w + x) * 2 + 0]; 3512 s_map[s_offset] = packed_map[(y * map->w + x) * 2 + 1]; 3513 } else { 3514 uint32_t packed = packed_map[y * map->w + x]; 3515 s_map[s_offset] = packed >> 24; 3516 z_map[z_offset] = packed; 3517 } 3518 } 3519 } 3520 3521 intel_miptree_unmap_raw(s_mt); 3522 intel_miptree_unmap_raw(z_mt); 3523 3524 DBG("%s: %d,%d %dx%d from z mt %p (%s) %d,%d, s mt %p %d,%d = %p/%d\n", 3525 __func__, 3526 map->x, map->y, map->w, map->h, 3527 z_mt, _mesa_get_format_name(z_mt->format), 3528 map->x + z_image_x, map->y + z_image_y, 3529 s_mt, map->x + s_image_x, map->y + s_image_y, 3530 map->ptr, map->stride); 3531 } 3532 3533 free(map->buffer); 3534} 3535 3536static void 3537intel_miptree_map_depthstencil(struct brw_context *brw, 3538 struct intel_mipmap_tree *mt, 3539 struct intel_miptree_map *map, 3540 unsigned int level, unsigned int slice) 3541{ 3542 struct intel_mipmap_tree *z_mt = mt; 3543 struct intel_mipmap_tree *s_mt = mt->stencil_mt; 3544 bool map_z32f_x24s8 = mt->format == MESA_FORMAT_Z_FLOAT32; 3545 int packed_bpp = map_z32f_x24s8 ? 8 : 4; 3546 3547 map->stride = map->w * packed_bpp; 3548 map->buffer = map->ptr = malloc(map->stride * map->h); 3549 if (!map->buffer) 3550 return; 3551 3552 intel_miptree_access_raw(brw, z_mt, level, slice, 3553 map->mode & GL_MAP_WRITE_BIT); 3554 intel_miptree_access_raw(brw, s_mt, level, slice, 3555 map->mode & GL_MAP_WRITE_BIT); 3556 3557 /* One of either READ_BIT or WRITE_BIT or both is set. READ_BIT implies no 3558 * INVALIDATE_RANGE_BIT. WRITE_BIT needs the original values read in unless 3559 * invalidate is set, since we'll be writing the whole rectangle from our 3560 * temporary buffer back out. 3561 */ 3562 if (!(map->mode & GL_MAP_INVALIDATE_RANGE_BIT)) { 3563 uint32_t *packed_map = map->ptr; 3564 uint8_t *s_map = intel_miptree_map_raw(brw, s_mt, GL_MAP_READ_BIT); 3565 uint32_t *z_map = intel_miptree_map_raw(brw, z_mt, GL_MAP_READ_BIT); 3566 unsigned int s_image_x, s_image_y; 3567 unsigned int z_image_x, z_image_y; 3568 3569 intel_miptree_get_image_offset(s_mt, level, slice, 3570 &s_image_x, &s_image_y); 3571 intel_miptree_get_image_offset(z_mt, level, slice, 3572 &z_image_x, &z_image_y); 3573 3574 for (uint32_t y = 0; y < map->h; y++) { 3575 for (uint32_t x = 0; x < map->w; x++) { 3576 int map_x = map->x + x, map_y = map->y + y; 3577 ptrdiff_t s_offset = intel_offset_S8(s_mt->surf.row_pitch_B, 3578 map_x + s_image_x, 3579 map_y + s_image_y, 3580 brw->has_swizzling); 3581 ptrdiff_t z_offset = ((map_y + z_image_y) * 3582 (z_mt->surf.row_pitch_B / 4) + 3583 (map_x + z_image_x)); 3584 uint8_t s = s_map[s_offset]; 3585 uint32_t z = z_map[z_offset]; 3586 3587 if (map_z32f_x24s8) { 3588 packed_map[(y * map->w + x) * 2 + 0] = z; 3589 packed_map[(y * map->w + x) * 2 + 1] = s; 3590 } else { 3591 packed_map[y * map->w + x] = (s << 24) | (z & 0x00ffffff); 3592 } 3593 } 3594 } 3595 3596 intel_miptree_unmap_raw(s_mt); 3597 intel_miptree_unmap_raw(z_mt); 3598 3599 DBG("%s: %d,%d %dx%d from z mt %p %d,%d, s mt %p %d,%d = %p/%d\n", 3600 __func__, 3601 map->x, map->y, map->w, map->h, 3602 z_mt, map->x + z_image_x, map->y + z_image_y, 3603 s_mt, map->x + s_image_x, map->y + s_image_y, 3604 map->ptr, map->stride); 3605 } else { 3606 DBG("%s: %d,%d %dx%d from mt %p = %p/%d\n", __func__, 3607 map->x, map->y, map->w, map->h, 3608 mt, map->ptr, map->stride); 3609 } 3610 3611 map->unmap = intel_miptree_unmap_depthstencil; 3612} 3613 3614/** 3615 * Create and attach a map to the miptree at (level, slice). Return the 3616 * attached map. 3617 */ 3618static struct intel_miptree_map* 3619intel_miptree_attach_map(struct intel_mipmap_tree *mt, 3620 unsigned int level, 3621 unsigned int slice, 3622 unsigned int x, 3623 unsigned int y, 3624 unsigned int w, 3625 unsigned int h, 3626 GLbitfield mode) 3627{ 3628 struct intel_miptree_map *map = calloc(1, sizeof(*map)); 3629 3630 if (!map) 3631 return NULL; 3632 3633 assert(mt->level[level].slice[slice].map == NULL); 3634 mt->level[level].slice[slice].map = map; 3635 3636 map->mode = mode; 3637 map->x = x; 3638 map->y = y; 3639 map->w = w; 3640 map->h = h; 3641 3642 return map; 3643} 3644 3645/** 3646 * Release the map at (level, slice). 3647 */ 3648static void 3649intel_miptree_release_map(struct intel_mipmap_tree *mt, 3650 unsigned int level, 3651 unsigned int slice) 3652{ 3653 struct intel_miptree_map **map; 3654 3655 map = &mt->level[level].slice[slice].map; 3656 free(*map); 3657 *map = NULL; 3658} 3659 3660static bool 3661can_blit_slice(struct intel_mipmap_tree *mt, 3662 const struct intel_miptree_map *map) 3663{ 3664 /* See intel_miptree_blit() for details on the 32k pitch limit. */ 3665 const unsigned src_blt_pitch = intel_miptree_blt_pitch(mt); 3666 const unsigned dst_blt_pitch = ALIGN(map->w * mt->cpp, 64); 3667 return src_blt_pitch < 32768 && dst_blt_pitch < 32768; 3668} 3669 3670static bool 3671use_intel_mipree_map_blit(struct brw_context *brw, 3672 struct intel_mipmap_tree *mt, 3673 const struct intel_miptree_map *map) 3674{ 3675 const struct gen_device_info *devinfo = &brw->screen->devinfo; 3676 3677 if (devinfo->has_llc && 3678 /* It's probably not worth swapping to the blit ring because of 3679 * all the overhead involved. 3680 */ 3681 !(map->mode & GL_MAP_WRITE_BIT) && 3682 !mt->compressed && 3683 (mt->surf.tiling == ISL_TILING_X || 3684 /* Prior to Sandybridge, the blitter can't handle Y tiling */ 3685 (devinfo->gen >= 6 && mt->surf.tiling == ISL_TILING_Y0) || 3686 /* Fast copy blit on skl+ supports all tiling formats. */ 3687 devinfo->gen >= 9) && 3688 can_blit_slice(mt, map)) 3689 return true; 3690 3691 if (mt->surf.tiling != ISL_TILING_LINEAR && 3692 mt->bo->size >= brw->max_gtt_map_object_size) { 3693 assert(can_blit_slice(mt, map)); 3694 return true; 3695 } 3696 3697 return false; 3698} 3699 3700/** 3701 * Parameter \a out_stride has type ptrdiff_t not because the buffer stride may 3702 * exceed 32 bits but to diminish the likelihood subtle bugs in pointer 3703 * arithmetic overflow. 3704 * 3705 * If you call this function and use \a out_stride, then you're doing pointer 3706 * arithmetic on \a out_ptr. The type of \a out_stride doesn't prevent all 3707 * bugs. The caller must still take care to avoid 32-bit overflow errors in 3708 * all arithmetic expressions that contain buffer offsets and pixel sizes, 3709 * which usually have type uint32_t or GLuint. 3710 */ 3711void 3712intel_miptree_map(struct brw_context *brw, 3713 struct intel_mipmap_tree *mt, 3714 unsigned int level, 3715 unsigned int slice, 3716 unsigned int x, 3717 unsigned int y, 3718 unsigned int w, 3719 unsigned int h, 3720 GLbitfield mode, 3721 void **out_ptr, 3722 ptrdiff_t *out_stride) 3723{ 3724 const struct gen_device_info *devinfo = &brw->screen->devinfo; 3725 struct intel_miptree_map *map; 3726 3727 assert(mt->surf.samples == 1); 3728 3729 map = intel_miptree_attach_map(mt, level, slice, x, y, w, h, mode); 3730 if (!map){ 3731 *out_ptr = NULL; 3732 *out_stride = 0; 3733 return; 3734 } 3735 3736 if (mt->format == MESA_FORMAT_S_UINT8) { 3737 intel_miptree_map_s8(brw, mt, map, level, slice); 3738 } else if (mt->stencil_mt && !(mode & BRW_MAP_DIRECT_BIT)) { 3739 intel_miptree_map_depthstencil(brw, mt, map, level, slice); 3740 } else if (use_intel_mipree_map_blit(brw, mt, map)) { 3741 intel_miptree_map_blit(brw, mt, map, level, slice); 3742 } else if (mt->surf.tiling != ISL_TILING_LINEAR && devinfo->gen > 4) { 3743 intel_miptree_map_tiled_memcpy(brw, mt, map, level, slice); 3744#if defined(USE_SSE41) 3745 } else if (!(mode & GL_MAP_WRITE_BIT) && 3746 !mt->compressed && cpu_has_sse4_1 && 3747 (mt->surf.row_pitch_B % 16 == 0)) { 3748 intel_miptree_map_movntdqa(brw, mt, map, level, slice); 3749#endif 3750 } else { 3751 if (mt->surf.tiling != ISL_TILING_LINEAR) 3752 perf_debug("intel_miptree_map: mapping via gtt"); 3753 intel_miptree_map_map(brw, mt, map, level, slice); 3754 } 3755 3756 *out_ptr = map->ptr; 3757 *out_stride = map->stride; 3758 3759 if (map->ptr == NULL) 3760 intel_miptree_release_map(mt, level, slice); 3761} 3762 3763void 3764intel_miptree_unmap(struct brw_context *brw, 3765 struct intel_mipmap_tree *mt, 3766 unsigned int level, 3767 unsigned int slice) 3768{ 3769 struct intel_miptree_map *map = mt->level[level].slice[slice].map; 3770 3771 assert(mt->surf.samples == 1); 3772 3773 if (!map) 3774 return; 3775 3776 DBG("%s: mt %p (%s) level %d slice %d\n", __func__, 3777 mt, _mesa_get_format_name(mt->format), level, slice); 3778 3779 if (map->unmap) 3780 map->unmap(brw, mt, map, level, slice); 3781 3782 intel_miptree_release_map(mt, level, slice); 3783} 3784 3785enum isl_surf_dim 3786get_isl_surf_dim(GLenum target) 3787{ 3788 switch (target) { 3789 case GL_TEXTURE_1D: 3790 case GL_TEXTURE_1D_ARRAY: 3791 return ISL_SURF_DIM_1D; 3792 3793 case GL_TEXTURE_2D: 3794 case GL_TEXTURE_2D_ARRAY: 3795 case GL_TEXTURE_RECTANGLE: 3796 case GL_TEXTURE_CUBE_MAP: 3797 case GL_TEXTURE_CUBE_MAP_ARRAY: 3798 case GL_TEXTURE_2D_MULTISAMPLE: 3799 case GL_TEXTURE_2D_MULTISAMPLE_ARRAY: 3800 case GL_TEXTURE_EXTERNAL_OES: 3801 return ISL_SURF_DIM_2D; 3802 3803 case GL_TEXTURE_3D: 3804 return ISL_SURF_DIM_3D; 3805 } 3806 3807 unreachable("Invalid texture target"); 3808} 3809 3810enum isl_dim_layout 3811get_isl_dim_layout(const struct gen_device_info *devinfo, 3812 enum isl_tiling tiling, GLenum target) 3813{ 3814 switch (target) { 3815 case GL_TEXTURE_1D: 3816 case GL_TEXTURE_1D_ARRAY: 3817 return (devinfo->gen >= 9 && tiling == ISL_TILING_LINEAR ? 3818 ISL_DIM_LAYOUT_GEN9_1D : ISL_DIM_LAYOUT_GEN4_2D); 3819 3820 case GL_TEXTURE_2D: 3821 case GL_TEXTURE_2D_ARRAY: 3822 case GL_TEXTURE_RECTANGLE: 3823 case GL_TEXTURE_2D_MULTISAMPLE: 3824 case GL_TEXTURE_2D_MULTISAMPLE_ARRAY: 3825 case GL_TEXTURE_EXTERNAL_OES: 3826 return ISL_DIM_LAYOUT_GEN4_2D; 3827 3828 case GL_TEXTURE_CUBE_MAP: 3829 case GL_TEXTURE_CUBE_MAP_ARRAY: 3830 return (devinfo->gen == 4 ? ISL_DIM_LAYOUT_GEN4_3D : 3831 ISL_DIM_LAYOUT_GEN4_2D); 3832 3833 case GL_TEXTURE_3D: 3834 return (devinfo->gen >= 9 ? 3835 ISL_DIM_LAYOUT_GEN4_2D : ISL_DIM_LAYOUT_GEN4_3D); 3836 } 3837 3838 unreachable("Invalid texture target"); 3839} 3840 3841bool 3842intel_miptree_set_clear_color(struct brw_context *brw, 3843 struct intel_mipmap_tree *mt, 3844 union isl_color_value clear_color) 3845{ 3846 if (memcmp(&mt->fast_clear_color, &clear_color, sizeof(clear_color)) != 0) { 3847 mt->fast_clear_color = clear_color; 3848 if (mt->aux_buf->clear_color_bo) { 3849 /* We can't update the clear color while the hardware is still using 3850 * the previous one for a resolve or sampling from it. Make sure that 3851 * there are no pending commands at this point. 3852 */ 3853 brw_emit_pipe_control_flush(brw, PIPE_CONTROL_CS_STALL); 3854 for (int i = 0; i < 4; i++) { 3855 brw_store_data_imm32(brw, mt->aux_buf->clear_color_bo, 3856 mt->aux_buf->clear_color_offset + i * 4, 3857 mt->fast_clear_color.u32[i]); 3858 } 3859 brw_emit_pipe_control_flush(brw, PIPE_CONTROL_STATE_CACHE_INVALIDATE); 3860 } 3861 brw->ctx.NewDriverState |= BRW_NEW_AUX_STATE; 3862 return true; 3863 } 3864 return false; 3865} 3866 3867union isl_color_value 3868intel_miptree_get_clear_color(const struct gen_device_info *devinfo, 3869 const struct intel_mipmap_tree *mt, 3870 enum isl_format view_format, bool sampling, 3871 struct brw_bo **clear_color_bo, 3872 uint64_t *clear_color_offset) 3873{ 3874 assert(mt->aux_buf); 3875 3876 if (devinfo->gen == 10 && isl_format_is_srgb(view_format) && sampling) { 3877 /* The gen10 sampler doesn't gamma-correct the clear color. In this case, 3878 * we switch to using the inline clear color and do the sRGB color 3879 * conversion process defined in the OpenGL spec. The red, green, and 3880 * blue channels take part in gamma correction, while the alpha channel 3881 * is unchanged. 3882 */ 3883 union isl_color_value srgb_decoded_value = mt->fast_clear_color; 3884 for (unsigned i = 0; i < 3; i++) { 3885 srgb_decoded_value.f32[i] = 3886 util_format_srgb_to_linear_float(mt->fast_clear_color.f32[i]); 3887 } 3888 *clear_color_bo = 0; 3889 *clear_color_offset = 0; 3890 return srgb_decoded_value; 3891 } else { 3892 *clear_color_bo = mt->aux_buf->clear_color_bo; 3893 *clear_color_offset = mt->aux_buf->clear_color_offset; 3894 return mt->fast_clear_color; 3895 } 3896} 3897 3898static void 3899intel_miptree_update_etc_shadow(struct brw_context *brw, 3900 struct intel_mipmap_tree *mt, 3901 unsigned int level, 3902 unsigned int slice, 3903 int level_w, 3904 int level_h) 3905{ 3906 ptrdiff_t etc_stride, shadow_stride; 3907 void *mptr, *sptr; 3908 struct intel_mipmap_tree *smt = mt->shadow_mt; 3909 3910 assert(intel_miptree_has_etc_shadow(brw, mt)); 3911 3912 intel_miptree_map(brw, mt, level, slice, 0, 0, level_w, level_h, 3913 GL_MAP_READ_BIT, &mptr, &etc_stride); 3914 intel_miptree_map(brw, smt, level, slice, 0, 0, level_w, level_h, 3915 GL_MAP_WRITE_BIT | GL_MAP_INVALIDATE_RANGE_BIT, 3916 &sptr, &shadow_stride); 3917 3918 if (mt->format == MESA_FORMAT_ETC1_RGB8) { 3919 _mesa_etc1_unpack_rgba8888(sptr, shadow_stride, mptr, etc_stride, 3920 level_w, level_h); 3921 } else { 3922 /* destination and source images must have the same swizzle */ 3923 bool is_bgra = (smt->format == MESA_FORMAT_B8G8R8A8_SRGB); 3924 _mesa_unpack_etc2_format(sptr, shadow_stride, mptr, etc_stride, 3925 level_w, level_h, mt->format, is_bgra); 3926 } 3927 3928 intel_miptree_unmap(brw, mt, level, slice); 3929 intel_miptree_unmap(brw, smt, level, slice); 3930} 3931 3932void 3933intel_miptree_update_etc_shadow_levels(struct brw_context *brw, 3934 struct intel_mipmap_tree *mt) 3935{ 3936 struct intel_mipmap_tree *smt; 3937 int num_slices; 3938 3939 assert(mt); 3940 assert(mt->surf.size_B > 0); 3941 assert(intel_miptree_has_etc_shadow(brw, mt)); 3942 3943 smt = mt->shadow_mt; 3944 num_slices = smt->surf.logical_level0_px.array_len; 3945 3946 for (int level = smt->first_level; level <= smt->last_level; level++) { 3947 int level_w = minify(smt->surf.logical_level0_px.width, 3948 level - smt->first_level); 3949 int level_h = minify(smt->surf.logical_level0_px.height, 3950 level - smt->first_level); 3951 3952 for (unsigned int slice = 0; slice < num_slices; slice++) { 3953 intel_miptree_update_etc_shadow(brw, mt, level, slice, level_w, 3954 level_h); 3955 } 3956 } 3957 3958 mt->shadow_needs_update = false; 3959} 3960