isl.c revision 7ec681f3
1/* 2 * Copyright 2015 Intel Corporation 3 * 4 * Permission is hereby granted, free of charge, to any person obtaining a 5 * copy of this software and associated documentation files (the "Software"), 6 * to deal in the Software without restriction, including without limitation 7 * the rights to use, copy, modify, merge, publish, distribute, sublicense, 8 * and/or sell copies of the Software, and to permit persons to whom the 9 * Software is furnished to do so, subject to the following conditions: 10 * 11 * The above copyright notice and this permission notice (including the next 12 * paragraph) shall be included in all copies or substantial portions of the 13 * Software. 14 * 15 * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR 16 * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, 17 * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL 18 * THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER 19 * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING 20 * FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS 21 * IN THE SOFTWARE. 22 */ 23 24#include <assert.h> 25#include <stdarg.h> 26#include <stdio.h> 27 28#include "genxml/genX_bits.h" 29 30#include "isl.h" 31#include "isl_gfx4.h" 32#include "isl_gfx6.h" 33#include "isl_gfx7.h" 34#include "isl_gfx8.h" 35#include "isl_gfx9.h" 36#include "isl_gfx12.h" 37#include "isl_priv.h" 38 39void 40isl_memcpy_linear_to_tiled(uint32_t xt1, uint32_t xt2, 41 uint32_t yt1, uint32_t yt2, 42 char *dst, const char *src, 43 uint32_t dst_pitch, int32_t src_pitch, 44 bool has_swizzling, 45 enum isl_tiling tiling, 46 isl_memcpy_type copy_type) 47{ 48#ifdef USE_SSE41 49 if (copy_type == ISL_MEMCPY_STREAMING_LOAD) { 50 _isl_memcpy_linear_to_tiled_sse41( 51 xt1, xt2, yt1, yt2, dst, src, dst_pitch, src_pitch, has_swizzling, 52 tiling, copy_type); 53 return; 54 } 55#endif 56 57 _isl_memcpy_linear_to_tiled( 58 xt1, xt2, yt1, yt2, dst, src, dst_pitch, src_pitch, has_swizzling, 59 tiling, copy_type); 60} 61 62void 63isl_memcpy_tiled_to_linear(uint32_t xt1, uint32_t xt2, 64 uint32_t yt1, uint32_t yt2, 65 char *dst, const char *src, 66 int32_t dst_pitch, uint32_t src_pitch, 67 bool has_swizzling, 68 enum isl_tiling tiling, 69 isl_memcpy_type copy_type) 70{ 71#ifdef USE_SSE41 72 if (copy_type == ISL_MEMCPY_STREAMING_LOAD) { 73 _isl_memcpy_tiled_to_linear_sse41( 74 xt1, xt2, yt1, yt2, dst, src, dst_pitch, src_pitch, has_swizzling, 75 tiling, copy_type); 76 return; 77 } 78#endif 79 80 _isl_memcpy_tiled_to_linear( 81 xt1, xt2, yt1, yt2, dst, src, dst_pitch, src_pitch, has_swizzling, 82 tiling, copy_type); 83} 84 85void PRINTFLIKE(3, 4) UNUSED 86__isl_finishme(const char *file, int line, const char *fmt, ...) 87{ 88 va_list ap; 89 char buf[512]; 90 91 va_start(ap, fmt); 92 vsnprintf(buf, sizeof(buf), fmt, ap); 93 va_end(ap); 94 95 fprintf(stderr, "%s:%d: FINISHME: %s\n", file, line, buf); 96} 97 98static void 99isl_device_setup_mocs(struct isl_device *dev) 100{ 101 if (dev->info->ver >= 12) { 102 if (dev->info->is_dg2) { 103 /* L3CC=WB; BSpec: 45101 */ 104 dev->mocs.internal = 3 << 1; 105 dev->mocs.external = 3 << 1; 106 } else if (dev->info->is_dg1) { 107 /* L3CC=WB */ 108 dev->mocs.internal = 5 << 1; 109 /* Displayables on DG1 are free to cache in L3 since L3 is transient 110 * and flushed at bottom of each submission. 111 */ 112 dev->mocs.external = 5 << 1; 113 } else { 114 /* TC=1/LLC Only, LeCC=1/UC, LRUM=0, L3CC=3/WB */ 115 dev->mocs.external = 61 << 1; 116 /* TC=LLC/eLLC, LeCC=WB, LRUM=3, L3CC=WB */ 117 dev->mocs.internal = 2 << 1; 118 119 /* L1 - HDC:L1 + L3 + LLC */ 120 dev->mocs.l1_hdc_l3_llc = 48 << 1; 121 } 122 } else if (dev->info->ver >= 9) { 123 /* TC=LLC/eLLC, LeCC=PTE, LRUM=3, L3CC=WB */ 124 dev->mocs.external = 1 << 1; 125 /* TC=LLC/eLLC, LeCC=WB, LRUM=3, L3CC=WB */ 126 dev->mocs.internal = 2 << 1; 127 } else if (dev->info->ver >= 8) { 128 /* MEMORY_OBJECT_CONTROL_STATE: 129 * .MemoryTypeLLCeLLCCacheabilityControl = UCwithFenceifcoherentcycle, 130 * .TargetCache = L3DefertoPATforLLCeLLCselection, 131 * .AgeforQUADLRU = 0 132 */ 133 dev->mocs.external = 0x18; 134 /* MEMORY_OBJECT_CONTROL_STATE: 135 * .MemoryTypeLLCeLLCCacheabilityControl = WB, 136 * .TargetCache = L3DefertoPATforLLCeLLCselection, 137 * .AgeforQUADLRU = 0 138 */ 139 dev->mocs.internal = 0x78; 140 } else if (dev->info->ver >= 7) { 141 if (dev->info->is_haswell) { 142 /* MEMORY_OBJECT_CONTROL_STATE: 143 * .LLCeLLCCacheabilityControlLLCCC = 0, 144 * .L3CacheabilityControlL3CC = 1, 145 */ 146 dev->mocs.internal = 1; 147 dev->mocs.external = 1; 148 } else { 149 /* MEMORY_OBJECT_CONTROL_STATE: 150 * .GraphicsDataTypeGFDT = 0, 151 * .LLCCacheabilityControlLLCCC = 0, 152 * .L3CacheabilityControlL3CC = 1, 153 */ 154 dev->mocs.internal = 1; 155 dev->mocs.external = 1; 156 } 157 } else { 158 dev->mocs.internal = 0; 159 dev->mocs.external = 0; 160 } 161} 162 163/** 164 * Return an appropriate MOCS entry for the given usage flags. 165 */ 166uint32_t 167isl_mocs(const struct isl_device *dev, isl_surf_usage_flags_t usage, 168 bool external) 169{ 170 if (external) 171 return dev->mocs.external; 172 173 if (dev->info->ver >= 12 && !dev->info->is_dg1) { 174 if (usage & ISL_SURF_USAGE_STAGING_BIT) 175 return dev->mocs.internal; 176 177 /* Using L1:HDC for storage buffers breaks Vulkan memory model 178 * tests that use shader atomics. This isn't likely to work out, 179 * and we can't know a priori whether they'll be used. So just 180 * continue with ordinary internal MOCS for now. 181 */ 182 if (usage & ISL_SURF_USAGE_STORAGE_BIT) 183 return dev->mocs.internal; 184 185 if (usage & (ISL_SURF_USAGE_CONSTANT_BUFFER_BIT | 186 ISL_SURF_USAGE_RENDER_TARGET_BIT | 187 ISL_SURF_USAGE_TEXTURE_BIT)) 188 return dev->mocs.l1_hdc_l3_llc; 189 } 190 191 return dev->mocs.internal; 192} 193 194void 195isl_device_init(struct isl_device *dev, 196 const struct intel_device_info *info, 197 bool has_bit6_swizzling) 198{ 199 /* Gfx8+ don't have bit6 swizzling, ensure callsite is not confused. */ 200 assert(!(has_bit6_swizzling && info->ver >= 8)); 201 202 dev->info = info; 203 dev->use_separate_stencil = ISL_GFX_VER(dev) >= 6; 204 dev->has_bit6_swizzling = has_bit6_swizzling; 205 206 /* The ISL_DEV macros may be defined in the CFLAGS, thus hardcoding some 207 * device properties at buildtime. Verify that the macros with the device 208 * properties chosen during runtime. 209 */ 210 ISL_GFX_VER_SANITIZE(dev); 211 ISL_DEV_USE_SEPARATE_STENCIL_SANITIZE(dev); 212 213 /* Did we break hiz or stencil? */ 214 if (ISL_DEV_USE_SEPARATE_STENCIL(dev)) 215 assert(info->has_hiz_and_separate_stencil); 216 if (info->must_use_separate_stencil) 217 assert(ISL_DEV_USE_SEPARATE_STENCIL(dev)); 218 219 dev->ss.size = RENDER_SURFACE_STATE_length(info) * 4; 220 dev->ss.align = isl_align(dev->ss.size, 32); 221 222 dev->ss.clear_color_state_size = 223 isl_align(CLEAR_COLOR_length(info) * 4, 64); 224 dev->ss.clear_color_state_offset = 225 RENDER_SURFACE_STATE_ClearValueAddress_start(info) / 32 * 4; 226 227 dev->ss.clear_value_size = 228 isl_align(RENDER_SURFACE_STATE_RedClearColor_bits(info) + 229 RENDER_SURFACE_STATE_GreenClearColor_bits(info) + 230 RENDER_SURFACE_STATE_BlueClearColor_bits(info) + 231 RENDER_SURFACE_STATE_AlphaClearColor_bits(info), 32) / 8; 232 233 dev->ss.clear_value_offset = 234 RENDER_SURFACE_STATE_RedClearColor_start(info) / 32 * 4; 235 236 assert(RENDER_SURFACE_STATE_SurfaceBaseAddress_start(info) % 8 == 0); 237 dev->ss.addr_offset = 238 RENDER_SURFACE_STATE_SurfaceBaseAddress_start(info) / 8; 239 240 /* The "Auxiliary Surface Base Address" field starts a bit higher up 241 * because the bottom 12 bits are used for other things. Round down to 242 * the nearest dword before. 243 */ 244 dev->ss.aux_addr_offset = 245 (RENDER_SURFACE_STATE_AuxiliarySurfaceBaseAddress_start(info) & ~31) / 8; 246 247 dev->ds.size = _3DSTATE_DEPTH_BUFFER_length(info) * 4; 248 assert(_3DSTATE_DEPTH_BUFFER_SurfaceBaseAddress_start(info) % 8 == 0); 249 dev->ds.depth_offset = 250 _3DSTATE_DEPTH_BUFFER_SurfaceBaseAddress_start(info) / 8; 251 252 if (dev->use_separate_stencil) { 253 dev->ds.size += _3DSTATE_STENCIL_BUFFER_length(info) * 4 + 254 _3DSTATE_HIER_DEPTH_BUFFER_length(info) * 4 + 255 _3DSTATE_CLEAR_PARAMS_length(info) * 4; 256 257 assert(_3DSTATE_STENCIL_BUFFER_SurfaceBaseAddress_start(info) % 8 == 0); 258 dev->ds.stencil_offset = 259 _3DSTATE_DEPTH_BUFFER_length(info) * 4 + 260 _3DSTATE_STENCIL_BUFFER_SurfaceBaseAddress_start(info) / 8; 261 262 assert(_3DSTATE_HIER_DEPTH_BUFFER_SurfaceBaseAddress_start(info) % 8 == 0); 263 dev->ds.hiz_offset = 264 _3DSTATE_DEPTH_BUFFER_length(info) * 4 + 265 _3DSTATE_STENCIL_BUFFER_length(info) * 4 + 266 _3DSTATE_HIER_DEPTH_BUFFER_SurfaceBaseAddress_start(info) / 8; 267 } else { 268 dev->ds.stencil_offset = 0; 269 dev->ds.hiz_offset = 0; 270 } 271 272 if (ISL_GFX_VER(dev) >= 7) { 273 /* From the IVB PRM, SURFACE_STATE::Height, 274 * 275 * For typed buffer and structured buffer surfaces, the number 276 * of entries in the buffer ranges from 1 to 2^27. For raw buffer 277 * surfaces, the number of entries in the buffer is the number of bytes 278 * which can range from 1 to 2^30. 279 * 280 * This limit is only concerned with raw buffers. 281 */ 282 dev->max_buffer_size = 1ull << 30; 283 } else { 284 dev->max_buffer_size = 1ull << 27; 285 } 286 287 isl_device_setup_mocs(dev); 288} 289 290/** 291 * @brief Query the set of multisamples supported by the device. 292 * 293 * This function always returns non-zero, as ISL_SAMPLE_COUNT_1_BIT is always 294 * supported. 295 */ 296isl_sample_count_mask_t ATTRIBUTE_CONST 297isl_device_get_sample_counts(struct isl_device *dev) 298{ 299 if (ISL_GFX_VER(dev) >= 9) { 300 return ISL_SAMPLE_COUNT_1_BIT | 301 ISL_SAMPLE_COUNT_2_BIT | 302 ISL_SAMPLE_COUNT_4_BIT | 303 ISL_SAMPLE_COUNT_8_BIT | 304 ISL_SAMPLE_COUNT_16_BIT; 305 } else if (ISL_GFX_VER(dev) >= 8) { 306 return ISL_SAMPLE_COUNT_1_BIT | 307 ISL_SAMPLE_COUNT_2_BIT | 308 ISL_SAMPLE_COUNT_4_BIT | 309 ISL_SAMPLE_COUNT_8_BIT; 310 } else if (ISL_GFX_VER(dev) >= 7) { 311 return ISL_SAMPLE_COUNT_1_BIT | 312 ISL_SAMPLE_COUNT_4_BIT | 313 ISL_SAMPLE_COUNT_8_BIT; 314 } else if (ISL_GFX_VER(dev) >= 6) { 315 return ISL_SAMPLE_COUNT_1_BIT | 316 ISL_SAMPLE_COUNT_4_BIT; 317 } else { 318 return ISL_SAMPLE_COUNT_1_BIT; 319 } 320} 321 322/** 323 * Returns an isl_tile_info representation of the given isl_tiling when 324 * combined when used in the given configuration. 325 * 326 * @param[in] tiling The tiling format to introspect 327 * @param[in] dim The dimensionality of the surface being tiled 328 * @param[in] msaa_layout The layout of samples in the surface being tiled 329 * @param[in] format_bpb The number of bits per surface element (block) for 330 * the surface being tiled 331 * @param[in] samples The samples in the surface being tiled 332 * @param[out] tile_info Return parameter for the tiling information 333 */ 334void 335isl_tiling_get_info(enum isl_tiling tiling, 336 enum isl_surf_dim dim, 337 enum isl_msaa_layout msaa_layout, 338 uint32_t format_bpb, 339 uint32_t samples, 340 struct isl_tile_info *tile_info) 341{ 342 const uint32_t bs = format_bpb / 8; 343 struct isl_extent4d logical_el; 344 struct isl_extent2d phys_B; 345 346 if (tiling != ISL_TILING_LINEAR && !isl_is_pow2(format_bpb)) { 347 /* It is possible to have non-power-of-two formats in a tiled buffer. 348 * The easiest way to handle this is to treat the tile as if it is three 349 * times as wide. This way no pixel will ever cross a tile boundary. 350 * This really only works on a subset of tiling formats. 351 */ 352 assert(tiling == ISL_TILING_X || tiling == ISL_TILING_Y0 || 353 tiling == ISL_TILING_4); 354 assert(bs % 3 == 0 && isl_is_pow2(format_bpb / 3)); 355 isl_tiling_get_info(tiling, dim, msaa_layout, format_bpb / 3, samples, 356 tile_info); 357 return; 358 } 359 360 switch (tiling) { 361 case ISL_TILING_LINEAR: 362 assert(bs > 0); 363 logical_el = isl_extent4d(1, 1, 1, 1); 364 phys_B = isl_extent2d(bs, 1); 365 break; 366 367 case ISL_TILING_X: 368 assert(bs > 0); 369 logical_el = isl_extent4d(512 / bs, 8, 1, 1); 370 phys_B = isl_extent2d(512, 8); 371 break; 372 373 case ISL_TILING_Y0: 374 case ISL_TILING_4: 375 assert(bs > 0); 376 logical_el = isl_extent4d(128 / bs, 32, 1, 1); 377 phys_B = isl_extent2d(128, 32); 378 break; 379 380 case ISL_TILING_W: 381 assert(bs == 1); 382 logical_el = isl_extent4d(64, 64, 1, 1); 383 /* From the Broadwell PRM Vol 2d, RENDER_SURFACE_STATE::SurfacePitch: 384 * 385 * "If the surface is a stencil buffer (and thus has Tile Mode set 386 * to TILEMODE_WMAJOR), the pitch must be set to 2x the value 387 * computed based on width, as the stencil buffer is stored with two 388 * rows interleaved." 389 * 390 * This, together with the fact that stencil buffers are referred to as 391 * being Y-tiled in the PRMs for older hardware implies that the 392 * physical size of a W-tile is actually the same as for a Y-tile. 393 */ 394 phys_B = isl_extent2d(128, 32); 395 break; 396 397 case ISL_TILING_Yf: 398 case ISL_TILING_Ys: { 399 bool is_Ys = tiling == ISL_TILING_Ys; 400 401 assert(bs > 0); 402 unsigned width = 1 << (6 + (ffs(bs) / 2) + (2 * is_Ys)); 403 unsigned height = 1 << (6 - (ffs(bs) / 2) + (2 * is_Ys)); 404 405 logical_el = isl_extent4d(width / bs, height, 1, 1); 406 phys_B = isl_extent2d(width, height); 407 break; 408 } 409 case ISL_TILING_64: 410 /* The tables below are taken from the "2D Surfaces" page in the Bspec 411 * which are formulated in terms of the Cv and Cu constants. This is 412 * different from the tables in the "Tile64 Format" page which should be 413 * equivalent but are usually in terms of pixels. Also note that Cv and 414 * Cu are HxW order to match the Bspec table, not WxH order like you 415 * might expect. 416 * 417 * From the Bspec's "Tile64 Format" page: 418 * 419 * MSAA Depth/Stencil surface use IMS (Interleaved Multi Samples) 420 * which means: 421 * 422 * - Use the 1X MSAA (non-MSRT) version of the Tile64 equations and 423 * let the client unit do the swizzling internally 424 * 425 * Surfaces using the IMS layout will use the mapping for 1x MSAA. 426 */ 427#define tile_extent(bs, cv, cu, a) \ 428 isl_extent4d((1 << cu) / bs, 1 << cv, 1, a) 429 430 /* Only 2D surfaces are handled. */ 431 assert(dim == ISL_SURF_DIM_2D); 432 433 if (samples == 1 || msaa_layout == ISL_MSAA_LAYOUT_INTERLEAVED) { 434 switch (format_bpb) { 435 case 128: logical_el = tile_extent(bs, 6, 10, 1); break; 436 case 64: logical_el = tile_extent(bs, 6, 10, 1); break; 437 case 32: logical_el = tile_extent(bs, 7, 9, 1); break; 438 case 16: logical_el = tile_extent(bs, 7, 9, 1); break; 439 case 8: logical_el = tile_extent(bs, 8, 8, 1); break; 440 default: unreachable("Unsupported format size."); 441 } 442 } else if (samples == 2) { 443 switch (format_bpb) { 444 case 128: logical_el = tile_extent(bs, 6, 9, 2); break; 445 case 64: logical_el = tile_extent(bs, 6, 9, 2); break; 446 case 32: logical_el = tile_extent(bs, 7, 8, 2); break; 447 case 16: logical_el = tile_extent(bs, 7, 8, 2); break; 448 case 8: logical_el = tile_extent(bs, 8, 7, 2); break; 449 default: unreachable("Unsupported format size."); 450 } 451 } else { 452 switch (format_bpb) { 453 case 128: logical_el = tile_extent(bs, 5, 9, 4); break; 454 case 64: logical_el = tile_extent(bs, 5, 9, 4); break; 455 case 32: logical_el = tile_extent(bs, 6, 8, 4); break; 456 case 16: logical_el = tile_extent(bs, 6, 8, 4); break; 457 case 8: logical_el = tile_extent(bs, 7, 7, 4); break; 458 default: unreachable("Unsupported format size."); 459 } 460 } 461 462#undef tile_extent 463 464 phys_B.w = logical_el.w * bs; 465 phys_B.h = 64 * 1024 / phys_B.w; 466 break; 467 468 case ISL_TILING_HIZ: 469 /* HiZ buffers are required to have ISL_FORMAT_HIZ which is an 8x4 470 * 128bpb format. The tiling has the same physical dimensions as 471 * Y-tiling but actually has two HiZ columns per Y-tiled column. 472 */ 473 assert(bs == 16); 474 logical_el = isl_extent4d(16, 16, 1, 1); 475 phys_B = isl_extent2d(128, 32); 476 break; 477 478 case ISL_TILING_CCS: 479 /* CCS surfaces are required to have one of the GENX_CCS_* formats which 480 * have a block size of 1 or 2 bits per block and each CCS element 481 * corresponds to one cache-line pair in the main surface. From the Sky 482 * Lake PRM Vol. 12 in the section on planes: 483 * 484 * "The Color Control Surface (CCS) contains the compression status 485 * of the cache-line pairs. The compression state of the cache-line 486 * pair is specified by 2 bits in the CCS. Each CCS cache-line 487 * represents an area on the main surface of 16x16 sets of 128 byte 488 * Y-tiled cache-line-pairs. CCS is always Y tiled." 489 * 490 * The CCS being Y-tiled implies that it's an 8x8 grid of cache-lines. 491 * Since each cache line corresponds to a 16x16 set of cache-line pairs, 492 * that yields total tile area of 128x128 cache-line pairs or CCS 493 * elements. On older hardware, each CCS element is 1 bit and the tile 494 * is 128x256 elements. 495 */ 496 assert(format_bpb == 1 || format_bpb == 2); 497 logical_el = isl_extent4d(128, 256 / format_bpb, 1, 1); 498 phys_B = isl_extent2d(128, 32); 499 break; 500 501 case ISL_TILING_GFX12_CCS: 502 /* From the Bspec, Gen Graphics > Gfx12 > Memory Data Formats > Memory 503 * Compression > Memory Compression - Gfx12: 504 * 505 * 4 bits of auxiliary plane data are required for 2 cachelines of 506 * main surface data. This results in a single cacheline of auxiliary 507 * plane data mapping to 4 4K pages of main surface data for the 4K 508 * pages (tile Y ) and 1 64K Tile Ys page. 509 * 510 * The Y-tiled pairing bit of 9 shown in the table below that Bspec 511 * section expresses that the 2 cachelines of main surface data are 512 * horizontally adjacent. 513 * 514 * TODO: Handle Ys, Yf and their pairing bits. 515 * 516 * Therefore, each CCS cacheline represents a 512Bx32 row area and each 517 * element represents a 32Bx4 row area. 518 */ 519 assert(format_bpb == 4); 520 logical_el = isl_extent4d(16, 8, 1, 1); 521 phys_B = isl_extent2d(64, 1); 522 break; 523 524 default: 525 unreachable("not reached"); 526 } /* end switch */ 527 528 *tile_info = (struct isl_tile_info) { 529 .tiling = tiling, 530 .format_bpb = format_bpb, 531 .logical_extent_el = logical_el, 532 .phys_extent_B = phys_B, 533 }; 534} 535 536bool 537isl_color_value_is_zero(union isl_color_value value, 538 enum isl_format format) 539{ 540 const struct isl_format_layout *fmtl = isl_format_get_layout(format); 541 542#define RETURN_FALSE_IF_NOT_0(c, i) \ 543 if (fmtl->channels.c.bits && value.u32[i] != 0) \ 544 return false 545 546 RETURN_FALSE_IF_NOT_0(r, 0); 547 RETURN_FALSE_IF_NOT_0(g, 1); 548 RETURN_FALSE_IF_NOT_0(b, 2); 549 RETURN_FALSE_IF_NOT_0(a, 3); 550 551#undef RETURN_FALSE_IF_NOT_0 552 553 return true; 554} 555 556bool 557isl_color_value_is_zero_one(union isl_color_value value, 558 enum isl_format format) 559{ 560 const struct isl_format_layout *fmtl = isl_format_get_layout(format); 561 562#define RETURN_FALSE_IF_NOT_0_1(c, i, field) \ 563 if (fmtl->channels.c.bits && value.field[i] != 0 && value.field[i] != 1) \ 564 return false 565 566 if (isl_format_has_int_channel(format)) { 567 RETURN_FALSE_IF_NOT_0_1(r, 0, u32); 568 RETURN_FALSE_IF_NOT_0_1(g, 1, u32); 569 RETURN_FALSE_IF_NOT_0_1(b, 2, u32); 570 RETURN_FALSE_IF_NOT_0_1(a, 3, u32); 571 } else { 572 RETURN_FALSE_IF_NOT_0_1(r, 0, f32); 573 RETURN_FALSE_IF_NOT_0_1(g, 1, f32); 574 RETURN_FALSE_IF_NOT_0_1(b, 2, f32); 575 RETURN_FALSE_IF_NOT_0_1(a, 3, f32); 576 } 577 578#undef RETURN_FALSE_IF_NOT_0_1 579 580 return true; 581} 582 583/** 584 * @param[out] tiling is set only on success 585 */ 586static bool 587isl_surf_choose_tiling(const struct isl_device *dev, 588 const struct isl_surf_init_info *restrict info, 589 enum isl_tiling *tiling) 590{ 591 isl_tiling_flags_t tiling_flags = info->tiling_flags; 592 593 /* HiZ surfaces always use the HiZ tiling */ 594 if (info->usage & ISL_SURF_USAGE_HIZ_BIT) { 595 assert(info->format == ISL_FORMAT_HIZ); 596 assert(tiling_flags == ISL_TILING_HIZ_BIT); 597 *tiling = isl_tiling_flag_to_enum(tiling_flags); 598 return true; 599 } 600 601 /* CCS surfaces always use the CCS tiling */ 602 if (info->usage & ISL_SURF_USAGE_CCS_BIT) { 603 assert(isl_format_get_layout(info->format)->txc == ISL_TXC_CCS); 604 UNUSED bool ivb_ccs = ISL_GFX_VER(dev) < 12 && 605 tiling_flags == ISL_TILING_CCS_BIT; 606 UNUSED bool tgl_ccs = ISL_GFX_VER(dev) >= 12 && 607 tiling_flags == ISL_TILING_GFX12_CCS_BIT; 608 assert(ivb_ccs != tgl_ccs); 609 *tiling = isl_tiling_flag_to_enum(tiling_flags); 610 return true; 611 } 612 613 if (ISL_GFX_VERX10(dev) >= 125) { 614 isl_gfx125_filter_tiling(dev, info, &tiling_flags); 615 } else if (ISL_GFX_VER(dev) >= 6) { 616 isl_gfx6_filter_tiling(dev, info, &tiling_flags); 617 } else { 618 isl_gfx4_filter_tiling(dev, info, &tiling_flags); 619 } 620 621 #define CHOOSE(__tiling) \ 622 do { \ 623 if (tiling_flags & (1u << (__tiling))) { \ 624 *tiling = (__tiling); \ 625 return true; \ 626 } \ 627 } while (0) 628 629 /* Of the tiling modes remaining, choose the one that offers the best 630 * performance. 631 */ 632 633 if (info->dim == ISL_SURF_DIM_1D) { 634 /* Prefer linear for 1D surfaces because they do not benefit from 635 * tiling. To the contrary, tiling leads to wasted memory and poor 636 * memory locality due to the swizzling and alignment restrictions 637 * required in tiled surfaces. 638 */ 639 CHOOSE(ISL_TILING_LINEAR); 640 } 641 642 CHOOSE(ISL_TILING_4); 643 CHOOSE(ISL_TILING_64); 644 CHOOSE(ISL_TILING_Ys); 645 CHOOSE(ISL_TILING_Yf); 646 CHOOSE(ISL_TILING_Y0); 647 CHOOSE(ISL_TILING_X); 648 CHOOSE(ISL_TILING_W); 649 CHOOSE(ISL_TILING_LINEAR); 650 651 #undef CHOOSE 652 653 /* No tiling mode accomodates the inputs. */ 654 return false; 655} 656 657static bool 658isl_choose_msaa_layout(const struct isl_device *dev, 659 const struct isl_surf_init_info *info, 660 enum isl_tiling tiling, 661 enum isl_msaa_layout *msaa_layout) 662{ 663 if (ISL_GFX_VER(dev) >= 8) { 664 return isl_gfx8_choose_msaa_layout(dev, info, tiling, msaa_layout); 665 } else if (ISL_GFX_VER(dev) >= 7) { 666 return isl_gfx7_choose_msaa_layout(dev, info, tiling, msaa_layout); 667 } else if (ISL_GFX_VER(dev) >= 6) { 668 return isl_gfx6_choose_msaa_layout(dev, info, tiling, msaa_layout); 669 } else { 670 return isl_gfx4_choose_msaa_layout(dev, info, tiling, msaa_layout); 671 } 672} 673 674struct isl_extent2d 675isl_get_interleaved_msaa_px_size_sa(uint32_t samples) 676{ 677 assert(isl_is_pow2(samples)); 678 679 /* From the Broadwell PRM >> Volume 5: Memory Views >> Computing Mip Level 680 * Sizes (p133): 681 * 682 * If the surface is multisampled and it is a depth or stencil surface 683 * or Multisampled Surface StorageFormat in SURFACE_STATE is 684 * MSFMT_DEPTH_STENCIL, W_L and H_L must be adjusted as follows before 685 * proceeding: [...] 686 */ 687 return (struct isl_extent2d) { 688 .width = 1 << ((ffs(samples) - 0) / 2), 689 .height = 1 << ((ffs(samples) - 1) / 2), 690 }; 691} 692 693static void 694isl_msaa_interleaved_scale_px_to_sa(uint32_t samples, 695 uint32_t *width, uint32_t *height) 696{ 697 const struct isl_extent2d px_size_sa = 698 isl_get_interleaved_msaa_px_size_sa(samples); 699 700 if (width) 701 *width = isl_align(*width, 2) * px_size_sa.width; 702 if (height) 703 *height = isl_align(*height, 2) * px_size_sa.height; 704} 705 706static enum isl_array_pitch_span 707isl_choose_array_pitch_span(const struct isl_device *dev, 708 const struct isl_surf_init_info *restrict info, 709 enum isl_dim_layout dim_layout, 710 const struct isl_extent4d *phys_level0_sa) 711{ 712 switch (dim_layout) { 713 case ISL_DIM_LAYOUT_GFX9_1D: 714 case ISL_DIM_LAYOUT_GFX4_2D: 715 if (ISL_GFX_VER(dev) >= 8) { 716 /* QPitch becomes programmable in Broadwell. So choose the 717 * most compact QPitch possible in order to conserve memory. 718 * 719 * From the Broadwell PRM >> Volume 2d: Command Reference: Structures 720 * >> RENDER_SURFACE_STATE Surface QPitch (p325): 721 * 722 * - Software must ensure that this field is set to a value 723 * sufficiently large such that the array slices in the surface 724 * do not overlap. Refer to the Memory Data Formats section for 725 * information on how surfaces are stored in memory. 726 * 727 * - This field specifies the distance in rows between array 728 * slices. It is used only in the following cases: 729 * 730 * - Surface Array is enabled OR 731 * - Number of Mulitsamples is not NUMSAMPLES_1 and 732 * Multisampled Surface Storage Format set to MSFMT_MSS OR 733 * - Surface Type is SURFTYPE_CUBE 734 */ 735 return ISL_ARRAY_PITCH_SPAN_COMPACT; 736 } else if (ISL_GFX_VER(dev) >= 7) { 737 /* Note that Ivybridge introduces 738 * RENDER_SURFACE_STATE.SurfaceArraySpacing, which provides the 739 * driver more control over the QPitch. 740 */ 741 742 if (phys_level0_sa->array_len == 1) { 743 /* The hardware will never use the QPitch. So choose the most 744 * compact QPitch possible in order to conserve memory. 745 */ 746 return ISL_ARRAY_PITCH_SPAN_COMPACT; 747 } 748 749 if (isl_surf_usage_is_depth_or_stencil(info->usage) || 750 (info->usage & ISL_SURF_USAGE_HIZ_BIT)) { 751 /* From the Ivybridge PRM >> Volume 1 Part 1: Graphics Core >> 752 * Section 6.18.4.7: Surface Arrays (p112): 753 * 754 * If Surface Array Spacing is set to ARYSPC_FULL (note that 755 * the depth buffer and stencil buffer have an implied value of 756 * ARYSPC_FULL): 757 */ 758 return ISL_ARRAY_PITCH_SPAN_FULL; 759 } 760 761 if (info->levels == 1) { 762 /* We are able to set RENDER_SURFACE_STATE.SurfaceArraySpacing 763 * to ARYSPC_LOD0. 764 */ 765 return ISL_ARRAY_PITCH_SPAN_COMPACT; 766 } 767 768 return ISL_ARRAY_PITCH_SPAN_FULL; 769 } else if ((ISL_GFX_VER(dev) == 5 || ISL_GFX_VER(dev) == 6) && 770 ISL_DEV_USE_SEPARATE_STENCIL(dev) && 771 isl_surf_usage_is_stencil(info->usage)) { 772 /* [ILK-SNB] Errata from the Sandy Bridge PRM >> Volume 4 Part 1: 773 * Graphics Core >> Section 7.18.3.7: Surface Arrays: 774 * 775 * The separate stencil buffer does not support mip mapping, thus 776 * the storage for LODs other than LOD 0 is not needed. 777 */ 778 assert(info->levels == 1); 779 return ISL_ARRAY_PITCH_SPAN_COMPACT; 780 } else { 781 if ((ISL_GFX_VER(dev) == 5 || ISL_GFX_VER(dev) == 6) && 782 ISL_DEV_USE_SEPARATE_STENCIL(dev) && 783 isl_surf_usage_is_stencil(info->usage)) { 784 /* [ILK-SNB] Errata from the Sandy Bridge PRM >> Volume 4 Part 1: 785 * Graphics Core >> Section 7.18.3.7: Surface Arrays: 786 * 787 * The separate stencil buffer does not support mip mapping, 788 * thus the storage for LODs other than LOD 0 is not needed. 789 */ 790 assert(info->levels == 1); 791 assert(phys_level0_sa->array_len == 1); 792 return ISL_ARRAY_PITCH_SPAN_COMPACT; 793 } 794 795 if (phys_level0_sa->array_len == 1) { 796 /* The hardware will never use the QPitch. So choose the most 797 * compact QPitch possible in order to conserve memory. 798 */ 799 return ISL_ARRAY_PITCH_SPAN_COMPACT; 800 } 801 802 return ISL_ARRAY_PITCH_SPAN_FULL; 803 } 804 805 case ISL_DIM_LAYOUT_GFX4_3D: 806 /* The hardware will never use the QPitch. So choose the most 807 * compact QPitch possible in order to conserve memory. 808 */ 809 return ISL_ARRAY_PITCH_SPAN_COMPACT; 810 811 case ISL_DIM_LAYOUT_GFX6_STENCIL_HIZ: 812 /* Each array image in the gfx6 stencil of HiZ surface is compact in the 813 * sense that every LOD is a compact array of the same size as LOD0. 814 */ 815 return ISL_ARRAY_PITCH_SPAN_COMPACT; 816 } 817 818 unreachable("bad isl_dim_layout"); 819 return ISL_ARRAY_PITCH_SPAN_FULL; 820} 821 822static void 823isl_choose_image_alignment_el(const struct isl_device *dev, 824 const struct isl_surf_init_info *restrict info, 825 enum isl_tiling tiling, 826 enum isl_dim_layout dim_layout, 827 enum isl_msaa_layout msaa_layout, 828 struct isl_extent3d *image_align_el) 829{ 830 const struct isl_format_layout *fmtl = isl_format_get_layout(info->format); 831 if (fmtl->txc == ISL_TXC_MCS) { 832 assert(tiling == ISL_TILING_Y0); 833 834 /* 835 * IvyBrigde PRM Vol 2, Part 1, "11.7 MCS Buffer for Render Target(s)": 836 * 837 * Height, width, and layout of MCS buffer in this case must match with 838 * Render Target height, width, and layout. MCS buffer is tiledY. 839 * 840 * To avoid wasting memory, choose the smallest alignment possible: 841 * HALIGN_4 and VALIGN_4. 842 */ 843 *image_align_el = isl_extent3d(4, 4, 1); 844 return; 845 } else if (info->format == ISL_FORMAT_HIZ) { 846 assert(ISL_GFX_VER(dev) >= 6); 847 if (ISL_GFX_VER(dev) == 6) { 848 /* HiZ surfaces on Sandy Bridge are packed tightly. */ 849 *image_align_el = isl_extent3d(1, 1, 1); 850 } else if (ISL_GFX_VER(dev) < 12) { 851 /* On gfx7+, HiZ surfaces are always aligned to 16x8 pixels in the 852 * primary surface which works out to 2x2 HiZ elments. 853 */ 854 *image_align_el = isl_extent3d(2, 2, 1); 855 } else { 856 /* On gfx12+, HiZ surfaces are always aligned to 16x16 pixels in the 857 * primary surface which works out to 2x4 HiZ elments. 858 * TODO: Verify 859 */ 860 *image_align_el = isl_extent3d(2, 4, 1); 861 } 862 return; 863 } 864 865 if (ISL_GFX_VERX10(dev) >= 125) { 866 isl_gfx125_choose_image_alignment_el(dev, info, tiling, dim_layout, 867 msaa_layout, image_align_el); 868 } else if (ISL_GFX_VER(dev) >= 12) { 869 isl_gfx12_choose_image_alignment_el(dev, info, tiling, dim_layout, 870 msaa_layout, image_align_el); 871 } else if (ISL_GFX_VER(dev) >= 9) { 872 isl_gfx9_choose_image_alignment_el(dev, info, tiling, dim_layout, 873 msaa_layout, image_align_el); 874 } else if (ISL_GFX_VER(dev) >= 8) { 875 isl_gfx8_choose_image_alignment_el(dev, info, tiling, dim_layout, 876 msaa_layout, image_align_el); 877 } else if (ISL_GFX_VER(dev) >= 7) { 878 isl_gfx7_choose_image_alignment_el(dev, info, tiling, dim_layout, 879 msaa_layout, image_align_el); 880 } else if (ISL_GFX_VER(dev) >= 6) { 881 isl_gfx6_choose_image_alignment_el(dev, info, tiling, dim_layout, 882 msaa_layout, image_align_el); 883 } else { 884 isl_gfx4_choose_image_alignment_el(dev, info, tiling, dim_layout, 885 msaa_layout, image_align_el); 886 } 887} 888 889static enum isl_dim_layout 890isl_surf_choose_dim_layout(const struct isl_device *dev, 891 enum isl_surf_dim logical_dim, 892 enum isl_tiling tiling, 893 isl_surf_usage_flags_t usage) 894{ 895 /* Sandy bridge needs a special layout for HiZ and stencil. */ 896 if (ISL_GFX_VER(dev) == 6 && 897 (tiling == ISL_TILING_W || tiling == ISL_TILING_HIZ)) 898 return ISL_DIM_LAYOUT_GFX6_STENCIL_HIZ; 899 900 if (ISL_GFX_VER(dev) >= 9) { 901 switch (logical_dim) { 902 case ISL_SURF_DIM_1D: 903 /* From the Sky Lake PRM Vol. 5, "1D Surfaces": 904 * 905 * One-dimensional surfaces use a tiling mode of linear. 906 * Technically, they are not tiled resources, but the Tiled 907 * Resource Mode field in RENDER_SURFACE_STATE is still used to 908 * indicate the alignment requirements for this linear surface 909 * (See 1D Alignment requirements for how 4K and 64KB Tiled 910 * Resource Modes impact alignment). Alternatively, a 1D surface 911 * can be defined as a 2D tiled surface (e.g. TileY or TileX) with 912 * a height of 0. 913 * 914 * In other words, ISL_DIM_LAYOUT_GFX9_1D is only used for linear 915 * surfaces and, for tiled surfaces, ISL_DIM_LAYOUT_GFX4_2D is used. 916 */ 917 if (tiling == ISL_TILING_LINEAR) 918 return ISL_DIM_LAYOUT_GFX9_1D; 919 else 920 return ISL_DIM_LAYOUT_GFX4_2D; 921 case ISL_SURF_DIM_2D: 922 case ISL_SURF_DIM_3D: 923 return ISL_DIM_LAYOUT_GFX4_2D; 924 } 925 } else { 926 switch (logical_dim) { 927 case ISL_SURF_DIM_1D: 928 case ISL_SURF_DIM_2D: 929 /* From the G45 PRM Vol. 1a, "6.17.4.1 Hardware Cube Map Layout": 930 * 931 * The cube face textures are stored in the same way as 3D surfaces 932 * are stored (see section 6.17.5 for details). For cube surfaces, 933 * however, the depth is equal to the number of faces (always 6) and 934 * is not reduced for each MIP. 935 */ 936 if (ISL_GFX_VER(dev) == 4 && (usage & ISL_SURF_USAGE_CUBE_BIT)) 937 return ISL_DIM_LAYOUT_GFX4_3D; 938 939 return ISL_DIM_LAYOUT_GFX4_2D; 940 case ISL_SURF_DIM_3D: 941 return ISL_DIM_LAYOUT_GFX4_3D; 942 } 943 } 944 945 unreachable("bad isl_surf_dim"); 946 return ISL_DIM_LAYOUT_GFX4_2D; 947} 948 949/** 950 * Calculate the physical extent of the surface's first level, in units of 951 * surface samples. 952 */ 953static void 954isl_calc_phys_level0_extent_sa(const struct isl_device *dev, 955 const struct isl_surf_init_info *restrict info, 956 enum isl_dim_layout dim_layout, 957 enum isl_tiling tiling, 958 enum isl_msaa_layout msaa_layout, 959 struct isl_extent4d *phys_level0_sa) 960{ 961 const struct isl_format_layout *fmtl = isl_format_get_layout(info->format); 962 963 if (isl_format_is_planar(info->format)) 964 unreachable("Planar formats unsupported"); 965 966 switch (info->dim) { 967 case ISL_SURF_DIM_1D: 968 assert(info->height == 1); 969 assert(info->depth == 1); 970 assert(info->samples == 1); 971 972 switch (dim_layout) { 973 case ISL_DIM_LAYOUT_GFX4_3D: 974 unreachable("bad isl_dim_layout"); 975 976 case ISL_DIM_LAYOUT_GFX9_1D: 977 case ISL_DIM_LAYOUT_GFX4_2D: 978 case ISL_DIM_LAYOUT_GFX6_STENCIL_HIZ: 979 *phys_level0_sa = (struct isl_extent4d) { 980 .w = info->width, 981 .h = 1, 982 .d = 1, 983 .a = info->array_len, 984 }; 985 break; 986 } 987 break; 988 989 case ISL_SURF_DIM_2D: 990 if (ISL_GFX_VER(dev) == 4 && (info->usage & ISL_SURF_USAGE_CUBE_BIT)) 991 assert(dim_layout == ISL_DIM_LAYOUT_GFX4_3D); 992 else 993 assert(dim_layout == ISL_DIM_LAYOUT_GFX4_2D || 994 dim_layout == ISL_DIM_LAYOUT_GFX6_STENCIL_HIZ); 995 996 if (tiling == ISL_TILING_Ys && info->samples > 1) 997 isl_finishme("%s:%s: multisample TileYs layout", __FILE__, __func__); 998 999 switch (msaa_layout) { 1000 case ISL_MSAA_LAYOUT_NONE: 1001 assert(info->depth == 1); 1002 assert(info->samples == 1); 1003 1004 *phys_level0_sa = (struct isl_extent4d) { 1005 .w = info->width, 1006 .h = info->height, 1007 .d = 1, 1008 .a = info->array_len, 1009 }; 1010 break; 1011 1012 case ISL_MSAA_LAYOUT_ARRAY: 1013 assert(info->depth == 1); 1014 assert(info->levels == 1); 1015 assert(isl_format_supports_multisampling(dev->info, info->format)); 1016 assert(fmtl->bw == 1 && fmtl->bh == 1); 1017 1018 *phys_level0_sa = (struct isl_extent4d) { 1019 .w = info->width, 1020 .h = info->height, 1021 .d = 1, 1022 .a = info->array_len * info->samples, 1023 }; 1024 break; 1025 1026 case ISL_MSAA_LAYOUT_INTERLEAVED: 1027 assert(info->depth == 1); 1028 assert(info->levels == 1); 1029 assert(isl_format_supports_multisampling(dev->info, info->format)); 1030 1031 *phys_level0_sa = (struct isl_extent4d) { 1032 .w = info->width, 1033 .h = info->height, 1034 .d = 1, 1035 .a = info->array_len, 1036 }; 1037 1038 isl_msaa_interleaved_scale_px_to_sa(info->samples, 1039 &phys_level0_sa->w, 1040 &phys_level0_sa->h); 1041 break; 1042 } 1043 break; 1044 1045 case ISL_SURF_DIM_3D: 1046 assert(info->array_len == 1); 1047 assert(info->samples == 1); 1048 1049 if (fmtl->bd > 1) { 1050 isl_finishme("%s:%s: compression block with depth > 1", 1051 __FILE__, __func__); 1052 } 1053 1054 switch (dim_layout) { 1055 case ISL_DIM_LAYOUT_GFX9_1D: 1056 case ISL_DIM_LAYOUT_GFX6_STENCIL_HIZ: 1057 unreachable("bad isl_dim_layout"); 1058 1059 case ISL_DIM_LAYOUT_GFX4_2D: 1060 assert(ISL_GFX_VER(dev) >= 9); 1061 1062 *phys_level0_sa = (struct isl_extent4d) { 1063 .w = info->width, 1064 .h = info->height, 1065 .d = 1, 1066 .a = info->depth, 1067 }; 1068 break; 1069 1070 case ISL_DIM_LAYOUT_GFX4_3D: 1071 assert(ISL_GFX_VER(dev) < 9); 1072 *phys_level0_sa = (struct isl_extent4d) { 1073 .w = info->width, 1074 .h = info->height, 1075 .d = info->depth, 1076 .a = 1, 1077 }; 1078 break; 1079 } 1080 break; 1081 } 1082} 1083 1084/** 1085 * Calculate the pitch between physical array slices, in units of rows of 1086 * surface elements. 1087 */ 1088static uint32_t 1089isl_calc_array_pitch_el_rows_gfx4_2d( 1090 const struct isl_device *dev, 1091 const struct isl_surf_init_info *restrict info, 1092 const struct isl_tile_info *tile_info, 1093 const struct isl_extent3d *image_align_sa, 1094 const struct isl_extent4d *phys_level0_sa, 1095 enum isl_array_pitch_span array_pitch_span, 1096 const struct isl_extent2d *phys_slice0_sa) 1097{ 1098 const struct isl_format_layout *fmtl = isl_format_get_layout(info->format); 1099 uint32_t pitch_sa_rows = 0; 1100 1101 switch (array_pitch_span) { 1102 case ISL_ARRAY_PITCH_SPAN_COMPACT: 1103 pitch_sa_rows = isl_align_npot(phys_slice0_sa->h, image_align_sa->h); 1104 break; 1105 case ISL_ARRAY_PITCH_SPAN_FULL: { 1106 /* The QPitch equation is found in the Broadwell PRM >> Volume 5: 1107 * Memory Views >> Common Surface Formats >> Surface Layout >> 2D 1108 * Surfaces >> Surface Arrays. 1109 */ 1110 uint32_t H0_sa = phys_level0_sa->h; 1111 uint32_t H1_sa = isl_minify(H0_sa, 1); 1112 1113 uint32_t h0_sa = isl_align_npot(H0_sa, image_align_sa->h); 1114 uint32_t h1_sa = isl_align_npot(H1_sa, image_align_sa->h); 1115 1116 uint32_t m; 1117 if (ISL_GFX_VER(dev) >= 7) { 1118 /* The QPitch equation changed slightly in Ivybridge. */ 1119 m = 12; 1120 } else { 1121 m = 11; 1122 } 1123 1124 pitch_sa_rows = h0_sa + h1_sa + (m * image_align_sa->h); 1125 1126 if (ISL_GFX_VER(dev) == 6 && info->samples > 1 && 1127 (info->height % 4 == 1)) { 1128 /* [SNB] Errata from the Sandy Bridge PRM >> Volume 4 Part 1: 1129 * Graphics Core >> Section 7.18.3.7: Surface Arrays: 1130 * 1131 * [SNB] Errata: Sampler MSAA Qpitch will be 4 greater than 1132 * the value calculated in the equation above , for every 1133 * other odd Surface Height starting from 1 i.e. 1,5,9,13. 1134 * 1135 * XXX(chadv): Is the errata natural corollary of the physical 1136 * layout of interleaved samples? 1137 */ 1138 pitch_sa_rows += 4; 1139 } 1140 1141 pitch_sa_rows = isl_align_npot(pitch_sa_rows, fmtl->bh); 1142 } /* end case */ 1143 break; 1144 } 1145 1146 assert(pitch_sa_rows % fmtl->bh == 0); 1147 uint32_t pitch_el_rows = pitch_sa_rows / fmtl->bh; 1148 1149 if (ISL_GFX_VER(dev) >= 9 && ISL_GFX_VER(dev) <= 11 && 1150 fmtl->txc == ISL_TXC_CCS) { 1151 /* 1152 * From the Sky Lake PRM Vol 7, "MCS Buffer for Render Target(s)" (p. 632): 1153 * 1154 * "Mip-mapped and arrayed surfaces are supported with MCS buffer 1155 * layout with these alignments in the RT space: Horizontal 1156 * Alignment = 128 and Vertical Alignment = 64." 1157 * 1158 * From the Sky Lake PRM Vol. 2d, "RENDER_SURFACE_STATE" (p. 435): 1159 * 1160 * "For non-multisampled render target's CCS auxiliary surface, 1161 * QPitch must be computed with Horizontal Alignment = 128 and 1162 * Surface Vertical Alignment = 256. These alignments are only for 1163 * CCS buffer and not for associated render target." 1164 * 1165 * The first restriction is already handled by isl_choose_image_alignment_el 1166 * but the second restriction, which is an extension of the first, only 1167 * applies to qpitch and must be applied here. 1168 * 1169 * The second restriction disappears on Gfx12. 1170 */ 1171 assert(fmtl->bh == 4); 1172 pitch_el_rows = isl_align(pitch_el_rows, 256 / 4); 1173 } 1174 1175 if (ISL_GFX_VER(dev) >= 9 && 1176 info->dim == ISL_SURF_DIM_3D && 1177 tile_info->tiling != ISL_TILING_LINEAR) { 1178 /* From the Skylake BSpec >> RENDER_SURFACE_STATE >> Surface QPitch: 1179 * 1180 * Tile Mode != Linear: This field must be set to an integer multiple 1181 * of the tile height 1182 */ 1183 pitch_el_rows = isl_align(pitch_el_rows, tile_info->logical_extent_el.height); 1184 } 1185 1186 return pitch_el_rows; 1187} 1188 1189/** 1190 * A variant of isl_calc_phys_slice0_extent_sa() specific to 1191 * ISL_DIM_LAYOUT_GFX4_2D. 1192 */ 1193static void 1194isl_calc_phys_slice0_extent_sa_gfx4_2d( 1195 const struct isl_device *dev, 1196 const struct isl_surf_init_info *restrict info, 1197 enum isl_msaa_layout msaa_layout, 1198 const struct isl_extent3d *image_align_sa, 1199 const struct isl_extent4d *phys_level0_sa, 1200 struct isl_extent2d *phys_slice0_sa) 1201{ 1202 assert(phys_level0_sa->depth == 1); 1203 1204 if (info->levels == 1) { 1205 /* Do not pad the surface to the image alignment. 1206 * 1207 * For tiled surfaces, using a reduced alignment here avoids wasting CPU 1208 * cycles on the below mipmap layout caluclations. Reducing the 1209 * alignment here is safe because we later align the row pitch and array 1210 * pitch to the tile boundary. It is safe even for 1211 * ISL_MSAA_LAYOUT_INTERLEAVED, because phys_level0_sa is already scaled 1212 * to accomodate the interleaved samples. 1213 * 1214 * For linear surfaces, reducing the alignment here permits us to later 1215 * choose an arbitrary, non-aligned row pitch. If the surface backs 1216 * a VkBuffer, then an arbitrary pitch may be needed to accomodate 1217 * VkBufferImageCopy::bufferRowLength. 1218 */ 1219 *phys_slice0_sa = (struct isl_extent2d) { 1220 .w = phys_level0_sa->w, 1221 .h = phys_level0_sa->h, 1222 }; 1223 return; 1224 } 1225 1226 uint32_t slice_top_w = 0; 1227 uint32_t slice_bottom_w = 0; 1228 uint32_t slice_left_h = 0; 1229 uint32_t slice_right_h = 0; 1230 1231 uint32_t W0 = phys_level0_sa->w; 1232 uint32_t H0 = phys_level0_sa->h; 1233 1234 for (uint32_t l = 0; l < info->levels; ++l) { 1235 uint32_t W = isl_minify(W0, l); 1236 uint32_t H = isl_minify(H0, l); 1237 1238 uint32_t w = isl_align_npot(W, image_align_sa->w); 1239 uint32_t h = isl_align_npot(H, image_align_sa->h); 1240 1241 if (l == 0) { 1242 slice_top_w = w; 1243 slice_left_h = h; 1244 slice_right_h = h; 1245 } else if (l == 1) { 1246 slice_bottom_w = w; 1247 slice_left_h += h; 1248 } else if (l == 2) { 1249 slice_bottom_w += w; 1250 slice_right_h += h; 1251 } else { 1252 slice_right_h += h; 1253 } 1254 } 1255 1256 *phys_slice0_sa = (struct isl_extent2d) { 1257 .w = MAX(slice_top_w, slice_bottom_w), 1258 .h = MAX(slice_left_h, slice_right_h), 1259 }; 1260} 1261 1262static void 1263isl_calc_phys_total_extent_el_gfx4_2d( 1264 const struct isl_device *dev, 1265 const struct isl_surf_init_info *restrict info, 1266 const struct isl_tile_info *tile_info, 1267 enum isl_msaa_layout msaa_layout, 1268 const struct isl_extent3d *image_align_sa, 1269 const struct isl_extent4d *phys_level0_sa, 1270 enum isl_array_pitch_span array_pitch_span, 1271 uint32_t *array_pitch_el_rows, 1272 struct isl_extent4d *phys_total_el) 1273{ 1274 const struct isl_format_layout *fmtl = isl_format_get_layout(info->format); 1275 1276 struct isl_extent2d phys_slice0_sa; 1277 isl_calc_phys_slice0_extent_sa_gfx4_2d(dev, info, msaa_layout, 1278 image_align_sa, phys_level0_sa, 1279 &phys_slice0_sa); 1280 *array_pitch_el_rows = 1281 isl_calc_array_pitch_el_rows_gfx4_2d(dev, info, tile_info, 1282 image_align_sa, phys_level0_sa, 1283 array_pitch_span, 1284 &phys_slice0_sa); 1285 1286 if (tile_info->tiling == ISL_TILING_64) { 1287 *phys_total_el = (struct isl_extent4d) { 1288 .w = isl_align_div_npot(phys_slice0_sa.w, fmtl->bw), 1289 .h = isl_align_div_npot(phys_slice0_sa.h, fmtl->bh), 1290 .d = isl_align_div_npot(phys_level0_sa->d, fmtl->bd), 1291 .a = phys_level0_sa->array_len, 1292 }; 1293 } else { 1294 *phys_total_el = (struct isl_extent4d) { 1295 .w = isl_align_div_npot(phys_slice0_sa.w, fmtl->bw), 1296 .h = *array_pitch_el_rows * (phys_level0_sa->array_len - 1) + 1297 isl_align_div_npot(phys_slice0_sa.h, fmtl->bh), 1298 .d = 1, 1299 .a = 1, 1300 }; 1301 } 1302} 1303 1304/** 1305 * A variant of isl_calc_phys_slice0_extent_sa() specific to 1306 * ISL_DIM_LAYOUT_GFX4_3D. 1307 */ 1308static void 1309isl_calc_phys_total_extent_el_gfx4_3d( 1310 const struct isl_device *dev, 1311 const struct isl_surf_init_info *restrict info, 1312 const struct isl_extent3d *image_align_sa, 1313 const struct isl_extent4d *phys_level0_sa, 1314 uint32_t *array_pitch_el_rows, 1315 struct isl_extent4d *phys_total_el) 1316{ 1317 const struct isl_format_layout *fmtl = isl_format_get_layout(info->format); 1318 1319 assert(info->samples == 1); 1320 1321 if (info->dim != ISL_SURF_DIM_3D) { 1322 /* From the G45 PRM Vol. 1a, "6.17.4.1 Hardware Cube Map Layout": 1323 * 1324 * The cube face textures are stored in the same way as 3D surfaces 1325 * are stored (see section 6.17.5 for details). For cube surfaces, 1326 * however, the depth is equal to the number of faces (always 6) and 1327 * is not reduced for each MIP. 1328 */ 1329 assert(ISL_GFX_VER(dev) == 4); 1330 assert(info->usage & ISL_SURF_USAGE_CUBE_BIT); 1331 assert(phys_level0_sa->array_len == 6); 1332 } else { 1333 assert(phys_level0_sa->array_len == 1); 1334 } 1335 1336 uint32_t total_w = 0; 1337 uint32_t total_h = 0; 1338 1339 uint32_t W0 = phys_level0_sa->w; 1340 uint32_t H0 = phys_level0_sa->h; 1341 uint32_t D0 = phys_level0_sa->d; 1342 uint32_t A0 = phys_level0_sa->a; 1343 1344 for (uint32_t l = 0; l < info->levels; ++l) { 1345 uint32_t level_w = isl_align_npot(isl_minify(W0, l), image_align_sa->w); 1346 uint32_t level_h = isl_align_npot(isl_minify(H0, l), image_align_sa->h); 1347 uint32_t level_d = info->dim == ISL_SURF_DIM_3D ? isl_minify(D0, l) : A0; 1348 1349 uint32_t max_layers_horiz = MIN(level_d, 1u << l); 1350 uint32_t max_layers_vert = isl_align(level_d, 1u << l) / (1u << l); 1351 1352 total_w = MAX(total_w, level_w * max_layers_horiz); 1353 total_h += level_h * max_layers_vert; 1354 } 1355 1356 /* GFX4_3D layouts don't really have an array pitch since each LOD has a 1357 * different number of horizontal and vertical layers. We have to set it 1358 * to something, so at least make it true for LOD0. 1359 */ 1360 *array_pitch_el_rows = 1361 isl_align_npot(phys_level0_sa->h, image_align_sa->h) / fmtl->bw; 1362 *phys_total_el = (struct isl_extent4d) { 1363 .w = isl_assert_div(total_w, fmtl->bw), 1364 .h = isl_assert_div(total_h, fmtl->bh), 1365 .d = 1, 1366 .a = 1, 1367 }; 1368} 1369 1370/** 1371 * A variant of isl_calc_phys_slice0_extent_sa() specific to 1372 * ISL_DIM_LAYOUT_GFX6_STENCIL_HIZ. 1373 */ 1374static void 1375isl_calc_phys_total_extent_el_gfx6_stencil_hiz( 1376 const struct isl_device *dev, 1377 const struct isl_surf_init_info *restrict info, 1378 const struct isl_tile_info *tile_info, 1379 const struct isl_extent3d *image_align_sa, 1380 const struct isl_extent4d *phys_level0_sa, 1381 uint32_t *array_pitch_el_rows, 1382 struct isl_extent4d *phys_total_el) 1383{ 1384 const struct isl_format_layout *fmtl = isl_format_get_layout(info->format); 1385 1386 const struct isl_extent2d tile_extent_sa = { 1387 .w = tile_info->logical_extent_el.w * fmtl->bw, 1388 .h = tile_info->logical_extent_el.h * fmtl->bh, 1389 }; 1390 /* Tile size is a multiple of image alignment */ 1391 assert(tile_extent_sa.w % image_align_sa->w == 0); 1392 assert(tile_extent_sa.h % image_align_sa->h == 0); 1393 1394 const uint32_t W0 = phys_level0_sa->w; 1395 const uint32_t H0 = phys_level0_sa->h; 1396 1397 /* Each image has the same height as LOD0 because the hardware thinks 1398 * everything is LOD0 1399 */ 1400 const uint32_t H = isl_align(H0, image_align_sa->h) * phys_level0_sa->a; 1401 1402 uint32_t total_top_w = 0; 1403 uint32_t total_bottom_w = 0; 1404 uint32_t total_h = 0; 1405 1406 for (uint32_t l = 0; l < info->levels; ++l) { 1407 const uint32_t W = isl_minify(W0, l); 1408 1409 const uint32_t w = isl_align(W, tile_extent_sa.w); 1410 const uint32_t h = isl_align(H, tile_extent_sa.h); 1411 1412 if (l == 0) { 1413 total_top_w = w; 1414 total_h = h; 1415 } else if (l == 1) { 1416 total_bottom_w = w; 1417 total_h += h; 1418 } else { 1419 total_bottom_w += w; 1420 } 1421 } 1422 1423 *array_pitch_el_rows = 1424 isl_assert_div(isl_align(H0, image_align_sa->h), fmtl->bh); 1425 *phys_total_el = (struct isl_extent4d) { 1426 .w = isl_assert_div(MAX(total_top_w, total_bottom_w), fmtl->bw), 1427 .h = isl_assert_div(total_h, fmtl->bh), 1428 .d = 1, 1429 .a = 1, 1430 }; 1431} 1432 1433/** 1434 * A variant of isl_calc_phys_slice0_extent_sa() specific to 1435 * ISL_DIM_LAYOUT_GFX9_1D. 1436 */ 1437static void 1438isl_calc_phys_total_extent_el_gfx9_1d( 1439 const struct isl_device *dev, 1440 const struct isl_surf_init_info *restrict info, 1441 const struct isl_extent3d *image_align_sa, 1442 const struct isl_extent4d *phys_level0_sa, 1443 uint32_t *array_pitch_el_rows, 1444 struct isl_extent4d *phys_total_el) 1445{ 1446 const struct isl_format_layout *fmtl = isl_format_get_layout(info->format); 1447 1448 assert(phys_level0_sa->height == 1); 1449 assert(phys_level0_sa->depth == 1); 1450 assert(info->samples == 1); 1451 assert(image_align_sa->w >= fmtl->bw); 1452 1453 uint32_t slice_w = 0; 1454 const uint32_t W0 = phys_level0_sa->w; 1455 1456 for (uint32_t l = 0; l < info->levels; ++l) { 1457 uint32_t W = isl_minify(W0, l); 1458 uint32_t w = isl_align_npot(W, image_align_sa->w); 1459 1460 slice_w += w; 1461 } 1462 1463 *array_pitch_el_rows = 1; 1464 *phys_total_el = (struct isl_extent4d) { 1465 .w = isl_assert_div(slice_w, fmtl->bw), 1466 .h = phys_level0_sa->array_len, 1467 .d = 1, 1468 .a = 1, 1469 }; 1470} 1471 1472/** 1473 * Calculate the two-dimensional total physical extent of the surface, in 1474 * units of surface elements. 1475 */ 1476static void 1477isl_calc_phys_total_extent_el(const struct isl_device *dev, 1478 const struct isl_surf_init_info *restrict info, 1479 const struct isl_tile_info *tile_info, 1480 enum isl_dim_layout dim_layout, 1481 enum isl_msaa_layout msaa_layout, 1482 const struct isl_extent3d *image_align_sa, 1483 const struct isl_extent4d *phys_level0_sa, 1484 enum isl_array_pitch_span array_pitch_span, 1485 uint32_t *array_pitch_el_rows, 1486 struct isl_extent4d *phys_total_el) 1487{ 1488 switch (dim_layout) { 1489 case ISL_DIM_LAYOUT_GFX9_1D: 1490 assert(array_pitch_span == ISL_ARRAY_PITCH_SPAN_COMPACT); 1491 isl_calc_phys_total_extent_el_gfx9_1d(dev, info, 1492 image_align_sa, phys_level0_sa, 1493 array_pitch_el_rows, 1494 phys_total_el); 1495 return; 1496 case ISL_DIM_LAYOUT_GFX4_2D: 1497 isl_calc_phys_total_extent_el_gfx4_2d(dev, info, tile_info, msaa_layout, 1498 image_align_sa, phys_level0_sa, 1499 array_pitch_span, 1500 array_pitch_el_rows, 1501 phys_total_el); 1502 return; 1503 case ISL_DIM_LAYOUT_GFX6_STENCIL_HIZ: 1504 assert(array_pitch_span == ISL_ARRAY_PITCH_SPAN_COMPACT); 1505 isl_calc_phys_total_extent_el_gfx6_stencil_hiz(dev, info, tile_info, 1506 image_align_sa, 1507 phys_level0_sa, 1508 array_pitch_el_rows, 1509 phys_total_el); 1510 return; 1511 case ISL_DIM_LAYOUT_GFX4_3D: 1512 assert(array_pitch_span == ISL_ARRAY_PITCH_SPAN_COMPACT); 1513 isl_calc_phys_total_extent_el_gfx4_3d(dev, info, 1514 image_align_sa, phys_level0_sa, 1515 array_pitch_el_rows, 1516 phys_total_el); 1517 return; 1518 } 1519 1520 unreachable("invalid value for dim_layout"); 1521} 1522 1523static uint32_t 1524isl_calc_row_pitch_alignment(const struct isl_device *dev, 1525 const struct isl_surf_init_info *surf_info, 1526 const struct isl_tile_info *tile_info) 1527{ 1528 if (tile_info->tiling != ISL_TILING_LINEAR) { 1529 /* According to BSpec: 44930, Gfx12's CCS-compressed surface pitches must 1530 * be 512B-aligned. CCS is only support on Y tilings. 1531 * 1532 * Only consider 512B alignment when : 1533 * - AUX is not explicitly disabled 1534 * - the caller has specified no pitch 1535 * 1536 * isl_surf_get_ccs_surf() will check that the main surface alignment 1537 * matches CCS expectations. 1538 */ 1539 if (ISL_GFX_VER(dev) >= 12 && 1540 isl_format_supports_ccs_e(dev->info, surf_info->format) && 1541 tile_info->tiling != ISL_TILING_X && 1542 !(surf_info->usage & ISL_SURF_USAGE_DISABLE_AUX_BIT) && 1543 surf_info->row_pitch_B == 0) { 1544 return isl_align(tile_info->phys_extent_B.width, 512); 1545 } 1546 1547 return tile_info->phys_extent_B.width; 1548 } 1549 1550 /* From the Broadwel PRM >> Volume 2d: Command Reference: Structures >> 1551 * RENDER_SURFACE_STATE Surface Pitch (p349): 1552 * 1553 * - For linear render target surfaces and surfaces accessed with the 1554 * typed data port messages, the pitch must be a multiple of the 1555 * element size for non-YUV surface formats. Pitch must be 1556 * a multiple of 2 * element size for YUV surface formats. 1557 * 1558 * - [Requirements for SURFTYPE_BUFFER and SURFTYPE_STRBUF, which we 1559 * ignore because isl doesn't do buffers.] 1560 * 1561 * - For other linear surfaces, the pitch can be any multiple of 1562 * bytes. 1563 */ 1564 const struct isl_format_layout *fmtl = isl_format_get_layout(surf_info->format); 1565 const uint32_t bs = fmtl->bpb / 8; 1566 uint32_t alignment; 1567 1568 if (surf_info->usage & ISL_SURF_USAGE_RENDER_TARGET_BIT) { 1569 if (isl_format_is_yuv(surf_info->format)) { 1570 alignment = 2 * bs; 1571 } else { 1572 alignment = bs; 1573 } 1574 } else { 1575 alignment = 1; 1576 } 1577 1578 /* From the Broadwell PRM >> Volume 2c: Command Reference: Registers >> 1579 * PRI_STRIDE Stride (p1254): 1580 * 1581 * "When using linear memory, this must be at least 64 byte aligned." 1582 * 1583 * However, when displaying on NVIDIA and recent AMD GPUs via PRIME, 1584 * we need a larger pitch of 256 bytes. 1585 * 1586 * If the ISL caller didn't specify a row_pitch_B, then we should assume 1587 * the NVIDIA/AMD requirements. Otherwise, if we have a specified 1588 * row_pitch_B, this is probably because the caller is trying to import a 1589 * buffer. In that case we limit the minimum row pitch to the Intel HW 1590 * requirement. 1591 */ 1592 if (surf_info->usage & ISL_SURF_USAGE_DISPLAY_BIT) { 1593 if (surf_info->row_pitch_B == 0) 1594 alignment = isl_align(alignment, 256); 1595 else 1596 alignment = isl_align(alignment, 64); 1597 } 1598 1599 return alignment; 1600} 1601 1602static uint32_t 1603isl_calc_linear_min_row_pitch(const struct isl_device *dev, 1604 const struct isl_surf_init_info *info, 1605 const struct isl_extent4d *phys_total_el, 1606 uint32_t alignment_B) 1607{ 1608 const struct isl_format_layout *fmtl = isl_format_get_layout(info->format); 1609 const uint32_t bs = fmtl->bpb / 8; 1610 1611 return isl_align_npot(bs * phys_total_el->w, alignment_B); 1612} 1613 1614static uint32_t 1615isl_calc_tiled_min_row_pitch(const struct isl_device *dev, 1616 const struct isl_surf_init_info *surf_info, 1617 const struct isl_tile_info *tile_info, 1618 const struct isl_extent4d *phys_total_el, 1619 uint32_t alignment_B) 1620{ 1621 const struct isl_format_layout *fmtl = isl_format_get_layout(surf_info->format); 1622 1623 assert(fmtl->bpb % tile_info->format_bpb == 0); 1624 1625 const uint32_t tile_el_scale = fmtl->bpb / tile_info->format_bpb; 1626 const uint32_t total_w_tl = 1627 isl_align_div(phys_total_el->w * tile_el_scale, 1628 tile_info->logical_extent_el.width); 1629 1630 /* In some cases the alignment of the pitch might be > to the tile size 1631 * (for example Gfx12 CCS requires 512B alignment while the tile's width 1632 * can be 128B), so align the row pitch to the alignment. 1633 */ 1634 assert(alignment_B >= tile_info->phys_extent_B.width); 1635 return isl_align(total_w_tl * tile_info->phys_extent_B.width, alignment_B); 1636} 1637 1638static uint32_t 1639isl_calc_min_row_pitch(const struct isl_device *dev, 1640 const struct isl_surf_init_info *surf_info, 1641 const struct isl_tile_info *tile_info, 1642 const struct isl_extent4d *phys_total_el, 1643 uint32_t alignment_B) 1644{ 1645 if (tile_info->tiling == ISL_TILING_LINEAR) { 1646 return isl_calc_linear_min_row_pitch(dev, surf_info, phys_total_el, 1647 alignment_B); 1648 } else { 1649 return isl_calc_tiled_min_row_pitch(dev, surf_info, tile_info, 1650 phys_total_el, alignment_B); 1651 } 1652} 1653 1654/** 1655 * Is `pitch` in the valid range for a hardware bitfield, if the bitfield's 1656 * size is `bits` bits? 1657 * 1658 * Hardware pitch fields are offset by 1. For example, if the size of 1659 * RENDER_SURFACE_STATE::SurfacePitch is B bits, then the range of valid 1660 * pitches is [1, 2^b] inclusive. If the surface pitch is N, then 1661 * RENDER_SURFACE_STATE::SurfacePitch must be set to N-1. 1662 */ 1663static bool 1664pitch_in_range(uint32_t n, uint32_t bits) 1665{ 1666 assert(n != 0); 1667 return likely(bits != 0 && 1 <= n && n <= (1 << bits)); 1668} 1669 1670static bool 1671isl_calc_row_pitch(const struct isl_device *dev, 1672 const struct isl_surf_init_info *surf_info, 1673 const struct isl_tile_info *tile_info, 1674 enum isl_dim_layout dim_layout, 1675 const struct isl_extent4d *phys_total_el, 1676 uint32_t *out_row_pitch_B) 1677{ 1678 uint32_t alignment_B = 1679 isl_calc_row_pitch_alignment(dev, surf_info, tile_info); 1680 1681 const uint32_t min_row_pitch_B = 1682 isl_calc_min_row_pitch(dev, surf_info, tile_info, phys_total_el, 1683 alignment_B); 1684 1685 if (surf_info->row_pitch_B != 0) { 1686 if (surf_info->row_pitch_B < min_row_pitch_B) 1687 return false; 1688 1689 if (surf_info->row_pitch_B % alignment_B != 0) 1690 return false; 1691 } 1692 1693 const uint32_t row_pitch_B = 1694 surf_info->row_pitch_B != 0 ? surf_info->row_pitch_B : min_row_pitch_B; 1695 1696 const uint32_t row_pitch_tl = row_pitch_B / tile_info->phys_extent_B.width; 1697 1698 if (row_pitch_B == 0) 1699 return false; 1700 1701 if (dim_layout == ISL_DIM_LAYOUT_GFX9_1D) { 1702 /* SurfacePitch is ignored for this layout. */ 1703 goto done; 1704 } 1705 1706 if ((surf_info->usage & (ISL_SURF_USAGE_RENDER_TARGET_BIT | 1707 ISL_SURF_USAGE_TEXTURE_BIT | 1708 ISL_SURF_USAGE_STORAGE_BIT)) && 1709 !pitch_in_range(row_pitch_B, RENDER_SURFACE_STATE_SurfacePitch_bits(dev->info))) 1710 return false; 1711 1712 if ((surf_info->usage & (ISL_SURF_USAGE_CCS_BIT | 1713 ISL_SURF_USAGE_MCS_BIT)) && 1714 !pitch_in_range(row_pitch_tl, RENDER_SURFACE_STATE_AuxiliarySurfacePitch_bits(dev->info))) 1715 return false; 1716 1717 if ((surf_info->usage & ISL_SURF_USAGE_DEPTH_BIT) && 1718 !pitch_in_range(row_pitch_B, _3DSTATE_DEPTH_BUFFER_SurfacePitch_bits(dev->info))) 1719 return false; 1720 1721 if ((surf_info->usage & ISL_SURF_USAGE_HIZ_BIT) && 1722 !pitch_in_range(row_pitch_B, _3DSTATE_HIER_DEPTH_BUFFER_SurfacePitch_bits(dev->info))) 1723 return false; 1724 1725 const uint32_t stencil_pitch_bits = dev->use_separate_stencil ? 1726 _3DSTATE_STENCIL_BUFFER_SurfacePitch_bits(dev->info) : 1727 _3DSTATE_DEPTH_BUFFER_SurfacePitch_bits(dev->info); 1728 1729 if ((surf_info->usage & ISL_SURF_USAGE_STENCIL_BIT) && 1730 !pitch_in_range(row_pitch_B, stencil_pitch_bits)) 1731 return false; 1732 1733 done: 1734 *out_row_pitch_B = row_pitch_B; 1735 return true; 1736} 1737 1738bool 1739isl_surf_init_s(const struct isl_device *dev, 1740 struct isl_surf *surf, 1741 const struct isl_surf_init_info *restrict info) 1742{ 1743 const struct isl_format_layout *fmtl = isl_format_get_layout(info->format); 1744 1745 const struct isl_extent4d logical_level0_px = { 1746 .w = info->width, 1747 .h = info->height, 1748 .d = info->depth, 1749 .a = info->array_len, 1750 }; 1751 1752 enum isl_tiling tiling; 1753 if (!isl_surf_choose_tiling(dev, info, &tiling)) 1754 return false; 1755 1756 const enum isl_dim_layout dim_layout = 1757 isl_surf_choose_dim_layout(dev, info->dim, tiling, info->usage); 1758 1759 enum isl_msaa_layout msaa_layout; 1760 if (!isl_choose_msaa_layout(dev, info, tiling, &msaa_layout)) 1761 return false; 1762 1763 struct isl_tile_info tile_info; 1764 isl_tiling_get_info(tiling, info->dim, msaa_layout, fmtl->bpb, 1765 info->samples, &tile_info); 1766 1767 struct isl_extent3d image_align_el; 1768 isl_choose_image_alignment_el(dev, info, tiling, dim_layout, msaa_layout, 1769 &image_align_el); 1770 1771 struct isl_extent3d image_align_sa = 1772 isl_extent3d_el_to_sa(info->format, image_align_el); 1773 1774 struct isl_extent4d phys_level0_sa; 1775 isl_calc_phys_level0_extent_sa(dev, info, dim_layout, tiling, msaa_layout, 1776 &phys_level0_sa); 1777 1778 enum isl_array_pitch_span array_pitch_span = 1779 isl_choose_array_pitch_span(dev, info, dim_layout, &phys_level0_sa); 1780 1781 uint32_t array_pitch_el_rows; 1782 struct isl_extent4d phys_total_el; 1783 isl_calc_phys_total_extent_el(dev, info, &tile_info, 1784 dim_layout, msaa_layout, 1785 &image_align_sa, &phys_level0_sa, 1786 array_pitch_span, &array_pitch_el_rows, 1787 &phys_total_el); 1788 1789 uint32_t row_pitch_B; 1790 if (!isl_calc_row_pitch(dev, info, &tile_info, dim_layout, 1791 &phys_total_el, &row_pitch_B)) 1792 return false; 1793 1794 uint32_t base_alignment_B; 1795 uint64_t size_B; 1796 if (tiling == ISL_TILING_LINEAR) { 1797 /* LINEAR tiling has no concept of intra-tile arrays */ 1798 assert(phys_total_el.d == 1 && phys_total_el.a == 1); 1799 1800 size_B = (uint64_t) row_pitch_B * phys_total_el.h; 1801 1802 /* From the Broadwell PRM Vol 2d, RENDER_SURFACE_STATE::SurfaceBaseAddress: 1803 * 1804 * "The Base Address for linear render target surfaces and surfaces 1805 * accessed with the typed surface read/write data port messages must 1806 * be element-size aligned, for non-YUV surface formats, or a 1807 * multiple of 2 element-sizes for YUV surface formats. Other linear 1808 * surfaces have no alignment requirements (byte alignment is 1809 * sufficient.)" 1810 */ 1811 base_alignment_B = MAX(1, info->min_alignment_B); 1812 if (info->usage & ISL_SURF_USAGE_RENDER_TARGET_BIT) { 1813 if (isl_format_is_yuv(info->format)) { 1814 base_alignment_B = MAX(base_alignment_B, fmtl->bpb / 4); 1815 } else { 1816 base_alignment_B = MAX(base_alignment_B, fmtl->bpb / 8); 1817 } 1818 } 1819 base_alignment_B = isl_round_up_to_power_of_two(base_alignment_B); 1820 1821 /* From the Skylake PRM Vol 2c, PLANE_STRIDE::Stride: 1822 * 1823 * "For Linear memory, this field specifies the stride in chunks of 1824 * 64 bytes (1 cache line)." 1825 */ 1826 if (isl_surf_usage_is_display(info->usage)) 1827 base_alignment_B = MAX(base_alignment_B, 64); 1828 } else { 1829 /* Pitches must make sense with the tiling */ 1830 assert(row_pitch_B % tile_info.phys_extent_B.width == 0); 1831 1832 uint32_t array_slices, array_pitch_tl_rows; 1833 if (phys_total_el.d > 1) { 1834 assert(phys_total_el.a == 1); 1835 array_pitch_tl_rows = isl_assert_div(array_pitch_el_rows, 1836 tile_info.logical_extent_el.h); 1837 array_slices = isl_align_div(phys_total_el.d, 1838 tile_info.logical_extent_el.d); 1839 } else if (phys_total_el.a > 1) { 1840 assert(phys_total_el.d == 1); 1841 array_pitch_tl_rows = isl_assert_div(array_pitch_el_rows, 1842 tile_info.logical_extent_el.h); 1843 array_slices = isl_align_div(phys_total_el.a, 1844 tile_info.logical_extent_el.a); 1845 } else { 1846 assert(phys_total_el.d == 1 && phys_total_el.a == 1); 1847 array_pitch_tl_rows = 0; 1848 array_slices = 1; 1849 } 1850 1851 const uint32_t total_h_tl = 1852 (array_slices - 1) * array_pitch_tl_rows + 1853 isl_align_div(phys_total_el.h, tile_info.logical_extent_el.height); 1854 1855 size_B = (uint64_t) total_h_tl * tile_info.phys_extent_B.height * row_pitch_B; 1856 1857 const uint32_t tile_size_B = tile_info.phys_extent_B.width * 1858 tile_info.phys_extent_B.height; 1859 assert(isl_is_pow2(info->min_alignment_B) && isl_is_pow2(tile_size_B)); 1860 base_alignment_B = MAX(info->min_alignment_B, tile_size_B); 1861 1862 /* The diagram in the Bspec section Memory Compression - Gfx12, shows 1863 * that the CCS is indexed in 256B chunks. However, the 1864 * PLANE_AUX_DIST::Auxiliary Surface Distance field is in units of 4K 1865 * pages. We currently don't assign the usage field like we do for main 1866 * surfaces, so just use 4K for now. 1867 */ 1868 if (tiling == ISL_TILING_GFX12_CCS) 1869 base_alignment_B = MAX(base_alignment_B, 4096); 1870 1871 /* Gfx12+ requires that images be 64K-aligned if they're going to used 1872 * with CCS. This is because the Aux translation table maps main 1873 * surface addresses to aux addresses at a 64K (in the main surface) 1874 * granularity. Because we don't know for sure in ISL if a surface will 1875 * use CCS, we have to guess based on the DISABLE_AUX usage bit. The 1876 * one thing we do know is that we haven't enable CCS on linear images 1877 * yet so we can avoid the extra alignment there. 1878 */ 1879 if (ISL_GFX_VER(dev) >= 12 && 1880 !(info->usage & ISL_SURF_USAGE_DISABLE_AUX_BIT)) { 1881 base_alignment_B = MAX(base_alignment_B, 64 * 1024); 1882 } 1883 } 1884 1885 if (ISL_GFX_VER(dev) < 9) { 1886 /* From the Broadwell PRM Vol 5, Surface Layout: 1887 * 1888 * "In addition to restrictions on maximum height, width, and depth, 1889 * surfaces are also restricted to a maximum size in bytes. This 1890 * maximum is 2 GB for all products and all surface types." 1891 * 1892 * This comment is applicable to all Pre-gfx9 platforms. 1893 */ 1894 if (size_B > (uint64_t) 1 << 31) 1895 return false; 1896 } else if (ISL_GFX_VER(dev) < 11) { 1897 /* From the Skylake PRM Vol 5, Maximum Surface Size in Bytes: 1898 * "In addition to restrictions on maximum height, width, and depth, 1899 * surfaces are also restricted to a maximum size of 2^38 bytes. 1900 * All pixels within the surface must be contained within 2^38 bytes 1901 * of the base address." 1902 */ 1903 if (size_B > (uint64_t) 1 << 38) 1904 return false; 1905 } else { 1906 /* gfx11+ platforms raised this limit to 2^44 bytes. */ 1907 if (size_B > (uint64_t) 1 << 44) 1908 return false; 1909 } 1910 1911 *surf = (struct isl_surf) { 1912 .dim = info->dim, 1913 .dim_layout = dim_layout, 1914 .msaa_layout = msaa_layout, 1915 .tiling = tiling, 1916 .format = info->format, 1917 1918 .levels = info->levels, 1919 .samples = info->samples, 1920 1921 .image_alignment_el = image_align_el, 1922 .logical_level0_px = logical_level0_px, 1923 .phys_level0_sa = phys_level0_sa, 1924 1925 .size_B = size_B, 1926 .alignment_B = base_alignment_B, 1927 .row_pitch_B = row_pitch_B, 1928 .array_pitch_el_rows = array_pitch_el_rows, 1929 .array_pitch_span = array_pitch_span, 1930 1931 .usage = info->usage, 1932 }; 1933 1934 return true; 1935} 1936 1937void 1938isl_surf_get_tile_info(const struct isl_surf *surf, 1939 struct isl_tile_info *tile_info) 1940{ 1941 const struct isl_format_layout *fmtl = isl_format_get_layout(surf->format); 1942 isl_tiling_get_info(surf->tiling, surf->dim, surf->msaa_layout, fmtl->bpb, 1943 surf->samples, tile_info); 1944} 1945 1946bool 1947isl_surf_get_hiz_surf(const struct isl_device *dev, 1948 const struct isl_surf *surf, 1949 struct isl_surf *hiz_surf) 1950{ 1951 assert(ISL_GFX_VER(dev) >= 5 && ISL_DEV_USE_SEPARATE_STENCIL(dev)); 1952 1953 if (!isl_surf_usage_is_depth(surf->usage)) 1954 return false; 1955 1956 /* HiZ only works with Y-tiled depth buffers */ 1957 if (!isl_tiling_is_any_y(surf->tiling)) 1958 return false; 1959 1960 /* On SNB+, compressed depth buffers cannot be interleaved with stencil. */ 1961 switch (surf->format) { 1962 case ISL_FORMAT_R24_UNORM_X8_TYPELESS: 1963 if (isl_surf_usage_is_depth_and_stencil(surf->usage)) { 1964 assert(ISL_GFX_VER(dev) == 5); 1965 unreachable("This should work, but is untested"); 1966 } 1967 FALLTHROUGH; 1968 case ISL_FORMAT_R16_UNORM: 1969 case ISL_FORMAT_R32_FLOAT: 1970 break; 1971 case ISL_FORMAT_R32_FLOAT_X8X24_TYPELESS: 1972 if (ISL_GFX_VER(dev) == 5) { 1973 assert(isl_surf_usage_is_depth_and_stencil(surf->usage)); 1974 unreachable("This should work, but is untested"); 1975 } 1976 FALLTHROUGH; 1977 default: 1978 return false; 1979 } 1980 1981 /* Multisampled depth is always interleaved */ 1982 assert(surf->msaa_layout == ISL_MSAA_LAYOUT_NONE || 1983 surf->msaa_layout == ISL_MSAA_LAYOUT_INTERLEAVED); 1984 1985 /* From the Broadwell PRM Vol. 7, "Hierarchical Depth Buffer": 1986 * 1987 * "The Surface Type, Height, Width, Depth, Minimum Array Element, Render 1988 * Target View Extent, and Depth Coordinate Offset X/Y of the 1989 * hierarchical depth buffer are inherited from the depth buffer. The 1990 * height and width of the hierarchical depth buffer that must be 1991 * allocated are computed by the following formulas, where HZ is the 1992 * hierarchical depth buffer and Z is the depth buffer. The Z_Height, 1993 * Z_Width, and Z_Depth values given in these formulas are those present 1994 * in 3DSTATE_DEPTH_BUFFER incremented by one. 1995 * 1996 * "The value of Z_Height and Z_Width must each be multiplied by 2 before 1997 * being applied to the table below if Number of Multisamples is set to 1998 * NUMSAMPLES_4. The value of Z_Height must be multiplied by 2 and 1999 * Z_Width must be multiplied by 4 before being applied to the table 2000 * below if Number of Multisamples is set to NUMSAMPLES_8." 2001 * 2002 * In the Sky Lake PRM, the second paragraph is replaced with this: 2003 * 2004 * "The Z_Height and Z_Width values must equal those present in 2005 * 3DSTATE_DEPTH_BUFFER incremented by one." 2006 * 2007 * In other words, on Sandy Bridge through Broadwell, each 128-bit HiZ 2008 * block corresponds to a region of 8x4 samples in the primary depth 2009 * surface. On Sky Lake, on the other hand, each HiZ block corresponds to 2010 * a region of 8x4 pixels in the primary depth surface regardless of the 2011 * number of samples. The dimensions of a HiZ block in both pixels and 2012 * samples are given in the table below: 2013 * 2014 * | SNB - BDW | SKL+ 2015 * ------+-----------+------------- 2016 * 1x | 8 x 4 sa | 8 x 4 sa 2017 * MSAA | 8 x 4 px | 8 x 4 px 2018 * ------+-----------+------------- 2019 * 2x | 8 x 4 sa | 16 x 4 sa 2020 * MSAA | 4 x 4 px | 8 x 4 px 2021 * ------+-----------+------------- 2022 * 4x | 8 x 4 sa | 16 x 8 sa 2023 * MSAA | 4 x 2 px | 8 x 4 px 2024 * ------+-----------+------------- 2025 * 8x | 8 x 4 sa | 32 x 8 sa 2026 * MSAA | 2 x 2 px | 8 x 4 px 2027 * ------+-----------+------------- 2028 * 16x | N/A | 32 x 16 sa 2029 * MSAA | N/A | 8 x 4 px 2030 * ------+-----------+------------- 2031 * 2032 * There are a number of different ways that this discrepency could be 2033 * handled. The way we have chosen is to simply make MSAA HiZ have the 2034 * same number of samples as the parent surface pre-Sky Lake and always be 2035 * single-sampled on Sky Lake and above. Since the block sizes of 2036 * compressed formats are given in samples, this neatly handles everything 2037 * without the need for additional HiZ formats with different block sizes 2038 * on SKL+. 2039 */ 2040 const unsigned samples = ISL_GFX_VER(dev) >= 9 ? 1 : surf->samples; 2041 2042 return isl_surf_init(dev, hiz_surf, 2043 .dim = surf->dim, 2044 .format = ISL_FORMAT_HIZ, 2045 .width = surf->logical_level0_px.width, 2046 .height = surf->logical_level0_px.height, 2047 .depth = surf->logical_level0_px.depth, 2048 .levels = surf->levels, 2049 .array_len = surf->logical_level0_px.array_len, 2050 .samples = samples, 2051 .usage = ISL_SURF_USAGE_HIZ_BIT, 2052 .tiling_flags = ISL_TILING_HIZ_BIT); 2053} 2054 2055bool 2056isl_surf_get_mcs_surf(const struct isl_device *dev, 2057 const struct isl_surf *surf, 2058 struct isl_surf *mcs_surf) 2059{ 2060 /* It must be multisampled with an array layout */ 2061 if (surf->msaa_layout != ISL_MSAA_LAYOUT_ARRAY) 2062 return false; 2063 2064 if (mcs_surf->size_B > 0) 2065 return false; 2066 2067 /* The following are true of all multisampled surfaces */ 2068 assert(surf->samples > 1); 2069 assert(surf->dim == ISL_SURF_DIM_2D); 2070 assert(surf->levels == 1); 2071 assert(surf->logical_level0_px.depth == 1); 2072 2073 /* From the Ivy Bridge PRM, Vol4 Part1 p77 ("MCS Enable"): 2074 * 2075 * This field must be set to 0 for all SINT MSRTs when all RT channels 2076 * are not written 2077 * 2078 * In practice this means that we have to disable MCS for all signed 2079 * integer MSAA buffers. The alternative, to disable MCS only when one 2080 * of the render target channels is disabled, is impractical because it 2081 * would require converting between CMS and UMS MSAA layouts on the fly, 2082 * which is expensive. 2083 */ 2084 if (ISL_GFX_VER(dev) == 7 && isl_format_has_sint_channel(surf->format)) 2085 return false; 2086 2087 /* The "Auxiliary Surface Pitch" field in RENDER_SURFACE_STATE is only 9 2088 * bits which means the maximum pitch of a compression surface is 512 2089 * tiles or 64KB (since MCS is always Y-tiled). Since a 16x MCS buffer is 2090 * 64bpp, this gives us a maximum width of 8192 pixels. We can create 2091 * larger multisampled surfaces, we just can't compress them. For 2x, 4x, 2092 * and 8x, we have enough room for the full 16k supported by the hardware. 2093 */ 2094 if (surf->samples == 16 && surf->logical_level0_px.width > 8192) 2095 return false; 2096 2097 enum isl_format mcs_format; 2098 switch (surf->samples) { 2099 case 2: mcs_format = ISL_FORMAT_MCS_2X; break; 2100 case 4: mcs_format = ISL_FORMAT_MCS_4X; break; 2101 case 8: mcs_format = ISL_FORMAT_MCS_8X; break; 2102 case 16: mcs_format = ISL_FORMAT_MCS_16X; break; 2103 default: 2104 unreachable("Invalid sample count"); 2105 } 2106 2107 return isl_surf_init(dev, mcs_surf, 2108 .dim = ISL_SURF_DIM_2D, 2109 .format = mcs_format, 2110 .width = surf->logical_level0_px.width, 2111 .height = surf->logical_level0_px.height, 2112 .depth = 1, 2113 .levels = 1, 2114 .array_len = surf->logical_level0_px.array_len, 2115 .samples = 1, /* MCS surfaces are really single-sampled */ 2116 .usage = ISL_SURF_USAGE_MCS_BIT, 2117 .tiling_flags = ISL_TILING_Y0_BIT); 2118} 2119 2120bool 2121isl_surf_supports_ccs(const struct isl_device *dev, 2122 const struct isl_surf *surf, 2123 const struct isl_surf *hiz_or_mcs_surf) 2124{ 2125 /* CCS support does not exist prior to Gfx7 */ 2126 if (ISL_GFX_VER(dev) <= 6) 2127 return false; 2128 2129 /* Wa_22011186057: Disable compression on ADL-P A0 */ 2130 if (dev->info->is_alderlake && dev->info->gt == 2 && 2131 dev->info->revision == 0) 2132 return false; 2133 2134 if (surf->usage & ISL_SURF_USAGE_DISABLE_AUX_BIT) 2135 return false; 2136 2137 if (isl_format_is_compressed(surf->format)) 2138 return false; 2139 2140 if (!isl_is_pow2(isl_format_get_layout(surf->format)->bpb)) 2141 return false; 2142 2143 /* From the Ivy Bridge PRM, Vol2 Part1 11.7 "MCS Buffer for Render 2144 * Target(s)", beneath the "Fast Color Clear" bullet (p326): 2145 * 2146 * - Support is limited to tiled render targets. 2147 * 2148 * From the Skylake documentation, it is made clear that X-tiling is no 2149 * longer supported: 2150 * 2151 * - MCS and Lossless compression is supported for 2152 * TiledY/TileYs/TileYf non-MSRTs only. 2153 * 2154 * From the BSpec (44930) for Gfx12: 2155 * 2156 * Linear CCS is only allowed for Untyped Buffers but only via HDC 2157 * Data-Port messages. 2158 * 2159 * We never use untyped messages on surfaces created by ISL on Gfx9+ so 2160 * this means linear is out on Gfx12+ as well. 2161 */ 2162 if (surf->tiling == ISL_TILING_LINEAR) 2163 return false; 2164 2165 if (ISL_GFX_VER(dev) >= 12) { 2166 if (isl_surf_usage_is_stencil(surf->usage)) { 2167 /* HiZ and MCS aren't allowed with stencil */ 2168 assert(hiz_or_mcs_surf == NULL || hiz_or_mcs_surf->size_B == 0); 2169 2170 /* Multi-sampled stencil cannot have CCS */ 2171 if (surf->samples > 1) 2172 return false; 2173 } else if (isl_surf_usage_is_depth(surf->usage)) { 2174 const struct isl_surf *hiz_surf = hiz_or_mcs_surf; 2175 2176 /* With depth surfaces, HIZ is required for CCS. */ 2177 if (hiz_surf == NULL || hiz_surf->size_B == 0) 2178 return false; 2179 2180 assert(hiz_surf->usage & ISL_SURF_USAGE_HIZ_BIT); 2181 assert(hiz_surf->tiling == ISL_TILING_HIZ); 2182 assert(hiz_surf->format == ISL_FORMAT_HIZ); 2183 } else if (surf->samples > 1) { 2184 const struct isl_surf *mcs_surf = hiz_or_mcs_surf; 2185 2186 /* With multisampled color, CCS requires MCS */ 2187 if (mcs_surf == NULL || mcs_surf->size_B == 0) 2188 return false; 2189 2190 assert(mcs_surf->usage & ISL_SURF_USAGE_MCS_BIT); 2191 assert(isl_tiling_is_any_y(mcs_surf->tiling)); 2192 assert(isl_format_is_mcs(mcs_surf->format)); 2193 } else { 2194 /* Single-sampled color can't have MCS or HiZ */ 2195 assert(hiz_or_mcs_surf == NULL || hiz_or_mcs_surf->size_B == 0); 2196 } 2197 2198 /* On Gfx12, all CCS-compressed surface pitches must be multiples of 2199 * 512B. 2200 */ 2201 if (surf->row_pitch_B % 512 != 0) 2202 return false; 2203 2204 /* According to Wa_1406738321, 3D textures need a blit to a new 2205 * surface in order to perform a resolve. For now, just disable CCS. 2206 */ 2207 if (surf->dim == ISL_SURF_DIM_3D) { 2208 isl_finishme("%s:%s: CCS for 3D textures is disabled, but a workaround" 2209 " is available.", __FILE__, __func__); 2210 return false; 2211 } 2212 2213 /* Wa_1207137018 2214 * 2215 * TODO: implement following workaround currently covered by the 2216 * restriction above. If following conditions are met: 2217 * 2218 * - RENDER_SURFACE_STATE.Surface Type == 3D 2219 * - RENDER_SURFACE_STATE.Auxiliary Surface Mode != AUX_NONE 2220 * - RENDER_SURFACE_STATE.Tiled ResourceMode is TYF or TYS 2221 * 2222 * Set the value of RENDER_SURFACE_STATE.Mip Tail Start LOD to a mip 2223 * that larger than those present in the surface (i.e. 15) 2224 */ 2225 2226 /* TODO: Handle the other tiling formats */ 2227 if (surf->tiling != ISL_TILING_Y0) 2228 return false; 2229 } else { 2230 /* ISL_GFX_VER(dev) < 12 */ 2231 if (surf->samples > 1) 2232 return false; 2233 2234 /* CCS is only for color images on Gfx7-11 */ 2235 if (isl_surf_usage_is_depth_or_stencil(surf->usage)) 2236 return false; 2237 2238 /* We're single-sampled color so having HiZ or MCS makes no sense */ 2239 assert(hiz_or_mcs_surf == NULL || hiz_or_mcs_surf->size_B == 0); 2240 2241 /* The PRM doesn't say this explicitly, but fast-clears don't appear to 2242 * work for 3D textures until gfx9 where the layout of 3D textures 2243 * changes to match 2D array textures. 2244 */ 2245 if (ISL_GFX_VER(dev) <= 8 && surf->dim != ISL_SURF_DIM_2D) 2246 return false; 2247 2248 /* From the HSW PRM Volume 7: 3D-Media-GPGPU, page 652 (Color Clear of 2249 * Non-MultiSampler Render Target Restrictions): 2250 * 2251 * "Support is for non-mip-mapped and non-array surface types only." 2252 * 2253 * This restriction is lifted on gfx8+. Technically, it may be possible 2254 * to create a CCS for an arrayed or mipmapped image and only enable 2255 * CCS_D when rendering to the base slice. However, there is no 2256 * documentation tell us what the hardware would do in that case or what 2257 * it does if you walk off the bases slice. (Does it ignore CCS or does 2258 * it start scribbling over random memory?) We play it safe and just 2259 * follow the docs and don't allow CCS_D for arrayed or mip-mapped 2260 * surfaces. 2261 */ 2262 if (ISL_GFX_VER(dev) <= 7 && 2263 (surf->levels > 1 || surf->logical_level0_px.array_len > 1)) 2264 return false; 2265 2266 /* From the Ivy Bridge PRM, Vol2 Part1 11.7 "MCS Buffer for Render 2267 * Target(s)", beneath the "Fast Color Clear" bullet (p326): 2268 * 2269 * - MCS buffer for non-MSRT is supported only for RT formats 32bpp, 2270 * 64bpp, and 128bpp. 2271 */ 2272 if (isl_format_get_layout(surf->format)->bpb < 32) 2273 return false; 2274 2275 /* From the Skylake documentation, it is made clear that X-tiling is no 2276 * longer supported: 2277 * 2278 * - MCS and Lossless compression is supported for 2279 * TiledY/TileYs/TileYf non-MSRTs only. 2280 */ 2281 if (ISL_GFX_VER(dev) >= 9 && !isl_tiling_is_any_y(surf->tiling)) 2282 return false; 2283 } 2284 2285 return true; 2286} 2287 2288bool 2289isl_surf_get_ccs_surf(const struct isl_device *dev, 2290 const struct isl_surf *surf, 2291 const struct isl_surf *hiz_or_mcs_surf, 2292 struct isl_surf *ccs_surf, 2293 uint32_t row_pitch_B) 2294{ 2295 if (!isl_surf_supports_ccs(dev, surf, hiz_or_mcs_surf)) 2296 return false; 2297 2298 if (ISL_GFX_VER(dev) >= 12) { 2299 enum isl_format ccs_format; 2300 switch (isl_format_get_layout(surf->format)->bpb) { 2301 case 8: ccs_format = ISL_FORMAT_GFX12_CCS_8BPP_Y0; break; 2302 case 16: ccs_format = ISL_FORMAT_GFX12_CCS_16BPP_Y0; break; 2303 case 32: ccs_format = ISL_FORMAT_GFX12_CCS_32BPP_Y0; break; 2304 case 64: ccs_format = ISL_FORMAT_GFX12_CCS_64BPP_Y0; break; 2305 case 128: ccs_format = ISL_FORMAT_GFX12_CCS_128BPP_Y0; break; 2306 default: 2307 return false; 2308 } 2309 2310 /* On Gfx12, the CCS is a scaled-down version of the main surface. We 2311 * model this as the CCS compressing a 2D-view of the entire surface. 2312 */ 2313 const bool ok = 2314 isl_surf_init(dev, ccs_surf, 2315 .dim = ISL_SURF_DIM_2D, 2316 .format = ccs_format, 2317 .width = isl_surf_get_row_pitch_el(surf), 2318 .height = surf->size_B / surf->row_pitch_B, 2319 .depth = 1, 2320 .levels = 1, 2321 .array_len = 1, 2322 .samples = 1, 2323 .row_pitch_B = row_pitch_B, 2324 .usage = ISL_SURF_USAGE_CCS_BIT, 2325 .tiling_flags = ISL_TILING_GFX12_CCS_BIT); 2326 assert(!ok || ccs_surf->size_B == surf->size_B / 256); 2327 return ok; 2328 } else { 2329 enum isl_format ccs_format; 2330 if (ISL_GFX_VER(dev) >= 9) { 2331 switch (isl_format_get_layout(surf->format)->bpb) { 2332 case 32: ccs_format = ISL_FORMAT_GFX9_CCS_32BPP; break; 2333 case 64: ccs_format = ISL_FORMAT_GFX9_CCS_64BPP; break; 2334 case 128: ccs_format = ISL_FORMAT_GFX9_CCS_128BPP; break; 2335 default: unreachable("Unsupported CCS format"); 2336 return false; 2337 } 2338 } else if (surf->tiling == ISL_TILING_Y0) { 2339 switch (isl_format_get_layout(surf->format)->bpb) { 2340 case 32: ccs_format = ISL_FORMAT_GFX7_CCS_32BPP_Y; break; 2341 case 64: ccs_format = ISL_FORMAT_GFX7_CCS_64BPP_Y; break; 2342 case 128: ccs_format = ISL_FORMAT_GFX7_CCS_128BPP_Y; break; 2343 default: unreachable("Unsupported CCS format"); 2344 } 2345 } else if (surf->tiling == ISL_TILING_X) { 2346 switch (isl_format_get_layout(surf->format)->bpb) { 2347 case 32: ccs_format = ISL_FORMAT_GFX7_CCS_32BPP_X; break; 2348 case 64: ccs_format = ISL_FORMAT_GFX7_CCS_64BPP_X; break; 2349 case 128: ccs_format = ISL_FORMAT_GFX7_CCS_128BPP_X; break; 2350 default: unreachable("Unsupported CCS format"); 2351 } 2352 } else { 2353 unreachable("Invalid tiling format"); 2354 } 2355 2356 return isl_surf_init(dev, ccs_surf, 2357 .dim = surf->dim, 2358 .format = ccs_format, 2359 .width = surf->logical_level0_px.width, 2360 .height = surf->logical_level0_px.height, 2361 .depth = surf->logical_level0_px.depth, 2362 .levels = surf->levels, 2363 .array_len = surf->logical_level0_px.array_len, 2364 .samples = 1, 2365 .row_pitch_B = row_pitch_B, 2366 .usage = ISL_SURF_USAGE_CCS_BIT, 2367 .tiling_flags = ISL_TILING_CCS_BIT); 2368 } 2369} 2370 2371#define isl_genX_call(dev, func, ...) \ 2372 switch (ISL_GFX_VERX10(dev)) { \ 2373 case 40: \ 2374 isl_gfx4_##func(__VA_ARGS__); \ 2375 break; \ 2376 case 45: \ 2377 /* G45 surface state is the same as gfx5 */ \ 2378 case 50: \ 2379 isl_gfx5_##func(__VA_ARGS__); \ 2380 break; \ 2381 case 60: \ 2382 isl_gfx6_##func(__VA_ARGS__); \ 2383 break; \ 2384 case 70: \ 2385 isl_gfx7_##func(__VA_ARGS__); \ 2386 break; \ 2387 case 75: \ 2388 isl_gfx75_##func(__VA_ARGS__); \ 2389 break; \ 2390 case 80: \ 2391 isl_gfx8_##func(__VA_ARGS__); \ 2392 break; \ 2393 case 90: \ 2394 isl_gfx9_##func(__VA_ARGS__); \ 2395 break; \ 2396 case 110: \ 2397 isl_gfx11_##func(__VA_ARGS__); \ 2398 break; \ 2399 case 120: \ 2400 isl_gfx12_##func(__VA_ARGS__); \ 2401 break; \ 2402 case 125: \ 2403 isl_gfx125_##func(__VA_ARGS__); \ 2404 break; \ 2405 default: \ 2406 assert(!"Unknown hardware generation"); \ 2407 } 2408 2409void 2410isl_surf_fill_state_s(const struct isl_device *dev, void *state, 2411 const struct isl_surf_fill_state_info *restrict info) 2412{ 2413#ifndef NDEBUG 2414 isl_surf_usage_flags_t _base_usage = 2415 info->view->usage & (ISL_SURF_USAGE_RENDER_TARGET_BIT | 2416 ISL_SURF_USAGE_TEXTURE_BIT | 2417 ISL_SURF_USAGE_STORAGE_BIT); 2418 /* They may only specify one of the above bits at a time */ 2419 assert(__builtin_popcount(_base_usage) == 1); 2420 /* The only other allowed bit is ISL_SURF_USAGE_CUBE_BIT */ 2421 assert((info->view->usage & ~ISL_SURF_USAGE_CUBE_BIT) == _base_usage); 2422#endif 2423 2424 if (info->surf->dim == ISL_SURF_DIM_3D) { 2425 assert(info->view->base_array_layer + info->view->array_len <= 2426 info->surf->logical_level0_px.depth); 2427 } else { 2428 assert(info->view->base_array_layer + info->view->array_len <= 2429 info->surf->logical_level0_px.array_len); 2430 } 2431 2432 isl_genX_call(dev, surf_fill_state_s, dev, state, info); 2433} 2434 2435void 2436isl_buffer_fill_state_s(const struct isl_device *dev, void *state, 2437 const struct isl_buffer_fill_state_info *restrict info) 2438{ 2439 isl_genX_call(dev, buffer_fill_state_s, dev, state, info); 2440} 2441 2442void 2443isl_null_fill_state_s(const struct isl_device *dev, void *state, 2444 const struct isl_null_fill_state_info *restrict info) 2445{ 2446 isl_genX_call(dev, null_fill_state, state, info); 2447} 2448 2449void 2450isl_emit_depth_stencil_hiz_s(const struct isl_device *dev, void *batch, 2451 const struct isl_depth_stencil_hiz_emit_info *restrict info) 2452{ 2453 if (info->depth_surf && info->stencil_surf) { 2454 if (!dev->info->has_hiz_and_separate_stencil) { 2455 assert(info->depth_surf == info->stencil_surf); 2456 assert(info->depth_address == info->stencil_address); 2457 } 2458 assert(info->depth_surf->dim == info->stencil_surf->dim); 2459 } 2460 2461 if (info->depth_surf) { 2462 assert((info->depth_surf->usage & ISL_SURF_USAGE_DEPTH_BIT)); 2463 if (info->depth_surf->dim == ISL_SURF_DIM_3D) { 2464 assert(info->view->base_array_layer + info->view->array_len <= 2465 info->depth_surf->logical_level0_px.depth); 2466 } else { 2467 assert(info->view->base_array_layer + info->view->array_len <= 2468 info->depth_surf->logical_level0_px.array_len); 2469 } 2470 } 2471 2472 if (info->stencil_surf) { 2473 assert((info->stencil_surf->usage & ISL_SURF_USAGE_STENCIL_BIT)); 2474 if (info->stencil_surf->dim == ISL_SURF_DIM_3D) { 2475 assert(info->view->base_array_layer + info->view->array_len <= 2476 info->stencil_surf->logical_level0_px.depth); 2477 } else { 2478 assert(info->view->base_array_layer + info->view->array_len <= 2479 info->stencil_surf->logical_level0_px.array_len); 2480 } 2481 } 2482 2483 isl_genX_call(dev, emit_depth_stencil_hiz_s, dev, batch, info); 2484} 2485 2486/** 2487 * A variant of isl_surf_get_image_offset_sa() specific to 2488 * ISL_DIM_LAYOUT_GFX4_2D. 2489 */ 2490static void 2491get_image_offset_sa_gfx4_2d(const struct isl_surf *surf, 2492 uint32_t level, uint32_t logical_array_layer, 2493 uint32_t *x_offset_sa, 2494 uint32_t *y_offset_sa) 2495{ 2496 assert(level < surf->levels); 2497 if (surf->dim == ISL_SURF_DIM_3D) 2498 assert(logical_array_layer < surf->logical_level0_px.depth); 2499 else 2500 assert(logical_array_layer < surf->logical_level0_px.array_len); 2501 2502 const struct isl_extent3d image_align_sa = 2503 isl_surf_get_image_alignment_sa(surf); 2504 2505 const uint32_t W0 = surf->phys_level0_sa.width; 2506 const uint32_t H0 = surf->phys_level0_sa.height; 2507 2508 const uint32_t phys_layer = logical_array_layer * 2509 (surf->msaa_layout == ISL_MSAA_LAYOUT_ARRAY ? surf->samples : 1); 2510 2511 uint32_t x = 0; 2512 uint32_t y = phys_layer * isl_surf_get_array_pitch_sa_rows(surf); 2513 2514 for (uint32_t l = 0; l < level; ++l) { 2515 if (l == 1) { 2516 uint32_t W = isl_minify(W0, l); 2517 x += isl_align_npot(W, image_align_sa.w); 2518 } else { 2519 uint32_t H = isl_minify(H0, l); 2520 y += isl_align_npot(H, image_align_sa.h); 2521 } 2522 } 2523 2524 *x_offset_sa = x; 2525 *y_offset_sa = y; 2526} 2527 2528/** 2529 * A variant of isl_surf_get_image_offset_sa() specific to 2530 * ISL_DIM_LAYOUT_GFX4_3D. 2531 */ 2532static void 2533get_image_offset_sa_gfx4_3d(const struct isl_surf *surf, 2534 uint32_t level, uint32_t logical_z_offset_px, 2535 uint32_t *x_offset_sa, 2536 uint32_t *y_offset_sa) 2537{ 2538 assert(level < surf->levels); 2539 if (surf->dim == ISL_SURF_DIM_3D) { 2540 assert(surf->phys_level0_sa.array_len == 1); 2541 assert(logical_z_offset_px < isl_minify(surf->phys_level0_sa.depth, level)); 2542 } else { 2543 assert(surf->dim == ISL_SURF_DIM_2D); 2544 assert(surf->usage & ISL_SURF_USAGE_CUBE_BIT); 2545 assert(surf->phys_level0_sa.array_len == 6); 2546 assert(logical_z_offset_px < surf->phys_level0_sa.array_len); 2547 } 2548 2549 const struct isl_extent3d image_align_sa = 2550 isl_surf_get_image_alignment_sa(surf); 2551 2552 const uint32_t W0 = surf->phys_level0_sa.width; 2553 const uint32_t H0 = surf->phys_level0_sa.height; 2554 const uint32_t D0 = surf->phys_level0_sa.depth; 2555 const uint32_t AL = surf->phys_level0_sa.array_len; 2556 2557 uint32_t x = 0; 2558 uint32_t y = 0; 2559 2560 for (uint32_t l = 0; l < level; ++l) { 2561 const uint32_t level_h = isl_align_npot(isl_minify(H0, l), image_align_sa.h); 2562 const uint32_t level_d = 2563 isl_align_npot(surf->dim == ISL_SURF_DIM_3D ? isl_minify(D0, l) : AL, 2564 image_align_sa.d); 2565 const uint32_t max_layers_vert = isl_align(level_d, 1u << l) / (1u << l); 2566 2567 y += level_h * max_layers_vert; 2568 } 2569 2570 const uint32_t level_w = isl_align_npot(isl_minify(W0, level), image_align_sa.w); 2571 const uint32_t level_h = isl_align_npot(isl_minify(H0, level), image_align_sa.h); 2572 const uint32_t level_d = 2573 isl_align_npot(surf->dim == ISL_SURF_DIM_3D ? isl_minify(D0, level) : AL, 2574 image_align_sa.d); 2575 2576 const uint32_t max_layers_horiz = MIN(level_d, 1u << level); 2577 2578 x += level_w * (logical_z_offset_px % max_layers_horiz); 2579 y += level_h * (logical_z_offset_px / max_layers_horiz); 2580 2581 *x_offset_sa = x; 2582 *y_offset_sa = y; 2583} 2584 2585static void 2586get_image_offset_sa_gfx6_stencil_hiz(const struct isl_surf *surf, 2587 uint32_t level, 2588 uint32_t logical_array_layer, 2589 uint32_t *x_offset_sa, 2590 uint32_t *y_offset_sa) 2591{ 2592 assert(level < surf->levels); 2593 assert(surf->logical_level0_px.depth == 1); 2594 assert(logical_array_layer < surf->logical_level0_px.array_len); 2595 2596 const struct isl_format_layout *fmtl = isl_format_get_layout(surf->format); 2597 2598 const struct isl_extent3d image_align_sa = 2599 isl_surf_get_image_alignment_sa(surf); 2600 2601 struct isl_tile_info tile_info; 2602 isl_surf_get_tile_info(surf, &tile_info); 2603 const struct isl_extent2d tile_extent_sa = { 2604 .w = tile_info.logical_extent_el.w * fmtl->bw, 2605 .h = tile_info.logical_extent_el.h * fmtl->bh, 2606 }; 2607 /* Tile size is a multiple of image alignment */ 2608 assert(tile_extent_sa.w % image_align_sa.w == 0); 2609 assert(tile_extent_sa.h % image_align_sa.h == 0); 2610 2611 const uint32_t W0 = surf->phys_level0_sa.w; 2612 const uint32_t H0 = surf->phys_level0_sa.h; 2613 2614 /* Each image has the same height as LOD0 because the hardware thinks 2615 * everything is LOD0 2616 */ 2617 const uint32_t H = isl_align(H0, image_align_sa.h); 2618 2619 /* Quick sanity check for consistency */ 2620 if (surf->phys_level0_sa.array_len > 1) 2621 assert(surf->array_pitch_el_rows == isl_assert_div(H, fmtl->bh)); 2622 2623 uint32_t x = 0, y = 0; 2624 for (uint32_t l = 0; l < level; ++l) { 2625 const uint32_t W = isl_minify(W0, l); 2626 2627 const uint32_t w = isl_align(W, tile_extent_sa.w); 2628 const uint32_t h = isl_align(H * surf->phys_level0_sa.a, 2629 tile_extent_sa.h); 2630 2631 if (l == 0) { 2632 y += h; 2633 } else { 2634 x += w; 2635 } 2636 } 2637 2638 y += H * logical_array_layer; 2639 2640 *x_offset_sa = x; 2641 *y_offset_sa = y; 2642} 2643 2644/** 2645 * A variant of isl_surf_get_image_offset_sa() specific to 2646 * ISL_DIM_LAYOUT_GFX9_1D. 2647 */ 2648static void 2649get_image_offset_sa_gfx9_1d(const struct isl_surf *surf, 2650 uint32_t level, uint32_t layer, 2651 uint32_t *x_offset_sa, 2652 uint32_t *y_offset_sa) 2653{ 2654 assert(level < surf->levels); 2655 assert(layer < surf->phys_level0_sa.array_len); 2656 assert(surf->phys_level0_sa.height == 1); 2657 assert(surf->phys_level0_sa.depth == 1); 2658 assert(surf->samples == 1); 2659 2660 const uint32_t W0 = surf->phys_level0_sa.width; 2661 const struct isl_extent3d image_align_sa = 2662 isl_surf_get_image_alignment_sa(surf); 2663 2664 uint32_t x = 0; 2665 2666 for (uint32_t l = 0; l < level; ++l) { 2667 uint32_t W = isl_minify(W0, l); 2668 uint32_t w = isl_align_npot(W, image_align_sa.w); 2669 2670 x += w; 2671 } 2672 2673 *x_offset_sa = x; 2674 *y_offset_sa = layer * isl_surf_get_array_pitch_sa_rows(surf); 2675} 2676 2677/** 2678 * Calculate the offset, in units of surface samples, to a subimage in the 2679 * surface. 2680 * 2681 * @invariant level < surface levels 2682 * @invariant logical_array_layer < logical array length of surface 2683 * @invariant logical_z_offset_px < logical depth of surface at level 2684 */ 2685void 2686isl_surf_get_image_offset_sa(const struct isl_surf *surf, 2687 uint32_t level, 2688 uint32_t logical_array_layer, 2689 uint32_t logical_z_offset_px, 2690 uint32_t *x_offset_sa, 2691 uint32_t *y_offset_sa, 2692 uint32_t *z_offset_sa, 2693 uint32_t *array_offset) 2694{ 2695 assert(level < surf->levels); 2696 assert(logical_array_layer < surf->logical_level0_px.array_len); 2697 assert(logical_z_offset_px 2698 < isl_minify(surf->logical_level0_px.depth, level)); 2699 2700 switch (surf->dim_layout) { 2701 case ISL_DIM_LAYOUT_GFX9_1D: 2702 get_image_offset_sa_gfx9_1d(surf, level, logical_array_layer, 2703 x_offset_sa, y_offset_sa); 2704 *z_offset_sa = 0; 2705 *array_offset = 0; 2706 break; 2707 case ISL_DIM_LAYOUT_GFX4_2D: 2708 get_image_offset_sa_gfx4_2d(surf, level, logical_array_layer 2709 + logical_z_offset_px, 2710 x_offset_sa, y_offset_sa); 2711 *z_offset_sa = 0; 2712 *array_offset = 0; 2713 break; 2714 case ISL_DIM_LAYOUT_GFX4_3D: 2715 get_image_offset_sa_gfx4_3d(surf, level, logical_array_layer + 2716 logical_z_offset_px, 2717 x_offset_sa, y_offset_sa); 2718 *z_offset_sa = 0; 2719 *array_offset = 0; 2720 break; 2721 case ISL_DIM_LAYOUT_GFX6_STENCIL_HIZ: 2722 get_image_offset_sa_gfx6_stencil_hiz(surf, level, logical_array_layer + 2723 logical_z_offset_px, 2724 x_offset_sa, y_offset_sa); 2725 *z_offset_sa = 0; 2726 *array_offset = 0; 2727 break; 2728 2729 default: 2730 unreachable("not reached"); 2731 } 2732} 2733 2734void 2735isl_surf_get_image_offset_el(const struct isl_surf *surf, 2736 uint32_t level, 2737 uint32_t logical_array_layer, 2738 uint32_t logical_z_offset_px, 2739 uint32_t *x_offset_el, 2740 uint32_t *y_offset_el, 2741 uint32_t *z_offset_el, 2742 uint32_t *array_offset) 2743{ 2744 const struct isl_format_layout *fmtl = isl_format_get_layout(surf->format); 2745 2746 assert(level < surf->levels); 2747 assert(logical_array_layer < surf->logical_level0_px.array_len); 2748 assert(logical_z_offset_px 2749 < isl_minify(surf->logical_level0_px.depth, level)); 2750 2751 uint32_t x_offset_sa, y_offset_sa, z_offset_sa; 2752 isl_surf_get_image_offset_sa(surf, level, 2753 logical_array_layer, 2754 logical_z_offset_px, 2755 &x_offset_sa, 2756 &y_offset_sa, 2757 &z_offset_sa, 2758 array_offset); 2759 2760 *x_offset_el = x_offset_sa / fmtl->bw; 2761 *y_offset_el = y_offset_sa / fmtl->bh; 2762 *z_offset_el = z_offset_sa / fmtl->bd; 2763} 2764 2765void 2766isl_surf_get_image_offset_B_tile_sa(const struct isl_surf *surf, 2767 uint32_t level, 2768 uint32_t logical_array_layer, 2769 uint32_t logical_z_offset_px, 2770 uint64_t *offset_B, 2771 uint32_t *x_offset_sa, 2772 uint32_t *y_offset_sa) 2773{ 2774 const struct isl_format_layout *fmtl = isl_format_get_layout(surf->format); 2775 2776 uint32_t x_offset_el, y_offset_el; 2777 isl_surf_get_image_offset_B_tile_el(surf, level, 2778 logical_array_layer, 2779 logical_z_offset_px, 2780 offset_B, 2781 &x_offset_el, 2782 &y_offset_el); 2783 2784 if (x_offset_sa) { 2785 *x_offset_sa = x_offset_el * fmtl->bw; 2786 } else { 2787 assert(x_offset_el == 0); 2788 } 2789 2790 if (y_offset_sa) { 2791 *y_offset_sa = y_offset_el * fmtl->bh; 2792 } else { 2793 assert(y_offset_el == 0); 2794 } 2795} 2796 2797void 2798isl_surf_get_image_offset_B_tile_el(const struct isl_surf *surf, 2799 uint32_t level, 2800 uint32_t logical_array_layer, 2801 uint32_t logical_z_offset_px, 2802 uint64_t *offset_B, 2803 uint32_t *x_offset_el, 2804 uint32_t *y_offset_el) 2805{ 2806 const struct isl_format_layout *fmtl = isl_format_get_layout(surf->format); 2807 2808 uint32_t total_x_offset_el, total_y_offset_el; 2809 uint32_t total_z_offset_el, total_array_offset; 2810 isl_surf_get_image_offset_el(surf, level, logical_array_layer, 2811 logical_z_offset_px, 2812 &total_x_offset_el, 2813 &total_y_offset_el, 2814 &total_z_offset_el, 2815 &total_array_offset); 2816 2817 uint32_t z_offset_el, array_offset; 2818 isl_tiling_get_intratile_offset_el(surf->tiling, surf->dim, 2819 surf->msaa_layout, fmtl->bpb, 2820 surf->samples, 2821 surf->row_pitch_B, 2822 surf->array_pitch_el_rows, 2823 total_x_offset_el, 2824 total_y_offset_el, 2825 total_z_offset_el, 2826 total_array_offset, 2827 offset_B, 2828 x_offset_el, 2829 y_offset_el, 2830 &z_offset_el, 2831 &array_offset); 2832 assert(z_offset_el == 0); 2833 assert(array_offset == 0); 2834} 2835 2836void 2837isl_surf_get_image_range_B_tile(const struct isl_surf *surf, 2838 uint32_t level, 2839 uint32_t logical_array_layer, 2840 uint32_t logical_z_offset_px, 2841 uint64_t *start_tile_B, 2842 uint64_t *end_tile_B) 2843{ 2844 uint32_t start_x_offset_el, start_y_offset_el; 2845 uint32_t start_z_offset_el, start_array_slice; 2846 isl_surf_get_image_offset_el(surf, level, logical_array_layer, 2847 logical_z_offset_px, 2848 &start_x_offset_el, 2849 &start_y_offset_el, 2850 &start_z_offset_el, 2851 &start_array_slice); 2852 2853 /* Compute the size of the subimage in surface elements */ 2854 const uint32_t subimage_w_sa = isl_minify(surf->phys_level0_sa.w, level); 2855 const uint32_t subimage_h_sa = isl_minify(surf->phys_level0_sa.h, level); 2856 const struct isl_format_layout *fmtl = isl_format_get_layout(surf->format); 2857 const uint32_t subimage_w_el = isl_align_div_npot(subimage_w_sa, fmtl->bw); 2858 const uint32_t subimage_h_el = isl_align_div_npot(subimage_h_sa, fmtl->bh); 2859 2860 /* Find the last pixel */ 2861 uint32_t end_x_offset_el = start_x_offset_el + subimage_w_el - 1; 2862 uint32_t end_y_offset_el = start_y_offset_el + subimage_h_el - 1; 2863 2864 /* We only consider one Z or array slice */ 2865 const uint32_t end_z_offset_el = start_z_offset_el; 2866 const uint32_t end_array_slice = start_array_slice; 2867 2868 UNUSED uint32_t x_offset_el, y_offset_el, z_offset_el, array_slice; 2869 isl_tiling_get_intratile_offset_el(surf->tiling, surf->dim, 2870 surf->msaa_layout, fmtl->bpb, 2871 surf->samples, 2872 surf->row_pitch_B, 2873 surf->array_pitch_el_rows, 2874 start_x_offset_el, 2875 start_y_offset_el, 2876 start_z_offset_el, 2877 start_array_slice, 2878 start_tile_B, 2879 &x_offset_el, 2880 &y_offset_el, 2881 &z_offset_el, 2882 &array_slice); 2883 2884 isl_tiling_get_intratile_offset_el(surf->tiling, surf->dim, 2885 surf->msaa_layout, fmtl->bpb, 2886 surf->samples, 2887 surf->row_pitch_B, 2888 surf->array_pitch_el_rows, 2889 end_x_offset_el, 2890 end_y_offset_el, 2891 end_z_offset_el, 2892 end_array_slice, 2893 end_tile_B, 2894 &x_offset_el, 2895 &y_offset_el, 2896 &z_offset_el, 2897 &array_slice); 2898 2899 /* We want the range we return to be exclusive but the tile containing the 2900 * last pixel (what we just calculated) is inclusive. Add one. 2901 */ 2902 (*end_tile_B)++; 2903 2904 assert(*end_tile_B <= surf->size_B); 2905} 2906 2907void 2908isl_surf_get_image_surf(const struct isl_device *dev, 2909 const struct isl_surf *surf, 2910 uint32_t level, 2911 uint32_t logical_array_layer, 2912 uint32_t logical_z_offset_px, 2913 struct isl_surf *image_surf, 2914 uint64_t *offset_B, 2915 uint32_t *x_offset_sa, 2916 uint32_t *y_offset_sa) 2917{ 2918 isl_surf_get_image_offset_B_tile_sa(surf, 2919 level, 2920 logical_array_layer, 2921 logical_z_offset_px, 2922 offset_B, 2923 x_offset_sa, 2924 y_offset_sa); 2925 2926 /* Even for cube maps there will be only single face, therefore drop the 2927 * corresponding flag if present. 2928 */ 2929 const isl_surf_usage_flags_t usage = 2930 surf->usage & (~ISL_SURF_USAGE_CUBE_BIT); 2931 2932 bool ok UNUSED; 2933 ok = isl_surf_init(dev, image_surf, 2934 .dim = ISL_SURF_DIM_2D, 2935 .format = surf->format, 2936 .width = isl_minify(surf->logical_level0_px.w, level), 2937 .height = isl_minify(surf->logical_level0_px.h, level), 2938 .depth = 1, 2939 .levels = 1, 2940 .array_len = 1, 2941 .samples = surf->samples, 2942 .row_pitch_B = surf->row_pitch_B, 2943 .usage = usage, 2944 .tiling_flags = (1 << surf->tiling)); 2945 assert(ok); 2946} 2947 2948bool 2949isl_surf_get_uncompressed_surf(const struct isl_device *dev, 2950 const struct isl_surf *surf, 2951 const struct isl_view *view, 2952 struct isl_surf *ucompr_surf, 2953 struct isl_view *ucompr_view, 2954 uint64_t *offset_B, 2955 uint32_t *x_offset_el, 2956 uint32_t *y_offset_el) 2957{ 2958 const struct isl_format_layout *fmtl = 2959 isl_format_get_layout(surf->format); 2960 const enum isl_format view_format = view->format; 2961 2962 assert(fmtl->bw > 1 || fmtl->bh > 1 || fmtl->bd > 1); 2963 assert(isl_format_is_compressed(surf->format)); 2964 assert(!isl_format_is_compressed(view->format)); 2965 assert(isl_format_get_layout(view->format)->bpb == fmtl->bpb); 2966 assert(view->levels == 1); 2967 2968 const uint32_t view_width_px = 2969 isl_minify(surf->logical_level0_px.width, view->base_level); 2970 const uint32_t view_height_px = 2971 isl_minify(surf->logical_level0_px.height, view->base_level); 2972 2973 assert(surf->samples == 1); 2974 const uint32_t view_width_el = isl_align_div_npot(view_width_px, fmtl->bw); 2975 const uint32_t view_height_el = isl_align_div_npot(view_height_px, fmtl->bh); 2976 2977 /* If we ever enable 3D block formats, we'll need to re-think this */ 2978 assert(fmtl->bd == 1); 2979 2980 if (view->array_len > 1) { 2981 /* The Skylake PRM Vol. 2d, "RENDER_SURFACE_STATE::X Offset" says: 2982 * 2983 * "If Surface Array is enabled, this field must be zero." 2984 * 2985 * The PRMs for other hardware have similar text. This is also tricky 2986 * to handle with things like BLORP's SW offsetting because the 2987 * increased surface size required for the offset may result in an image 2988 * height greater than qpitch. 2989 */ 2990 if (view->base_level > 0) 2991 return false; 2992 2993 /* On Haswell and earlier, RENDER_SURFACE_STATE doesn't have a QPitch 2994 * field; it only has "array pitch span" which means the QPitch is 2995 * automatically calculated. Since we're smashing the surface format 2996 * (block formats are subtly different) and the number of miplevels, 2997 * that calculation will get thrown off. This means we can't do arrays 2998 * even at LOD0 2999 * 3000 * On Broadwell, we do have a QPitch field which we can control. 3001 * However, HALIGN and VALIGN are specified in pixels and are 3002 * hard-coded to align to exactly the block size of the compressed 3003 * texture. This means that, when reinterpreted as a non-compressed 3004 * the QPitch may be anything but the HW requires it to be properly 3005 * aligned. 3006 */ 3007 if (ISL_GFX_VER(dev) < 9) 3008 return false; 3009 3010 *ucompr_surf = *surf; 3011 ucompr_surf->levels = 1; 3012 ucompr_surf->format = view_format; 3013 3014 /* We're making an uncompressed view here. The image dimensions 3015 * need to be scaled down by the block size. 3016 */ 3017 assert(ucompr_surf->logical_level0_px.width == view_width_px); 3018 assert(ucompr_surf->logical_level0_px.height == view_height_px); 3019 ucompr_surf->logical_level0_px.width = view_width_el; 3020 ucompr_surf->logical_level0_px.height = view_height_el; 3021 ucompr_surf->phys_level0_sa = isl_surf_get_phys_level0_el(surf); 3022 3023 /* The surface mostly stays as-is; there is no offset */ 3024 *offset_B = 0; 3025 *x_offset_el = 0; 3026 *y_offset_el = 0; 3027 3028 /* The view remains the same */ 3029 *ucompr_view = *view; 3030 } else { 3031 /* If only one array slice is requested, directly offset to that slice. 3032 * We could, in theory, still use arrays in some cases but BLORP isn't 3033 * prepared for this and everyone who calls this function should be 3034 * prepared to handle an X/Y offset. 3035 */ 3036 isl_surf_get_image_offset_B_tile_el(surf, 3037 view->base_level, 3038 surf->dim == ISL_SURF_DIM_3D ? 3039 0 : view->base_array_layer, 3040 surf->dim == ISL_SURF_DIM_3D ? 3041 view->base_array_layer : 0, 3042 offset_B, 3043 x_offset_el, 3044 y_offset_el); 3045 3046 /* Even for cube maps there will be only single face, therefore drop the 3047 * corresponding flag if present. 3048 */ 3049 const isl_surf_usage_flags_t usage = 3050 surf->usage & (~ISL_SURF_USAGE_CUBE_BIT); 3051 3052 bool ok UNUSED; 3053 ok = isl_surf_init(dev, ucompr_surf, 3054 .dim = ISL_SURF_DIM_2D, 3055 .format = view_format, 3056 .width = view_width_el, 3057 .height = view_height_el, 3058 .depth = 1, 3059 .levels = 1, 3060 .array_len = 1, 3061 .samples = 1, 3062 .row_pitch_B = surf->row_pitch_B, 3063 .usage = usage, 3064 .tiling_flags = (1 << surf->tiling)); 3065 assert(ok); 3066 3067 /* The newly created image represents the one subimage we're 3068 * referencing with this view so it only has one array slice and 3069 * miplevel. 3070 */ 3071 *ucompr_view = *view; 3072 ucompr_view->base_array_layer = 0; 3073 ucompr_view->base_level = 0; 3074 } 3075 3076 return true; 3077} 3078 3079void 3080isl_tiling_get_intratile_offset_el(enum isl_tiling tiling, 3081 enum isl_surf_dim dim, 3082 enum isl_msaa_layout msaa_layout, 3083 uint32_t bpb, 3084 uint32_t samples, 3085 uint32_t row_pitch_B, 3086 uint32_t array_pitch_el_rows, 3087 uint32_t total_x_offset_el, 3088 uint32_t total_y_offset_el, 3089 uint32_t total_z_offset_el, 3090 uint32_t total_array_offset, 3091 uint64_t *tile_offset_B, 3092 uint32_t *x_offset_el, 3093 uint32_t *y_offset_el, 3094 uint32_t *z_offset_el, 3095 uint32_t *array_offset) 3096{ 3097 if (tiling == ISL_TILING_LINEAR) { 3098 assert(bpb % 8 == 0); 3099 assert(samples == 1); 3100 assert(total_z_offset_el == 0 && total_array_offset == 0); 3101 *tile_offset_B = (uint64_t)total_y_offset_el * row_pitch_B + 3102 (uint64_t)total_x_offset_el * (bpb / 8); 3103 *x_offset_el = 0; 3104 *y_offset_el = 0; 3105 *z_offset_el = 0; 3106 *array_offset = 0; 3107 return; 3108 } 3109 3110 struct isl_tile_info tile_info; 3111 isl_tiling_get_info(tiling, dim, msaa_layout, bpb, samples, &tile_info); 3112 3113 /* Pitches must make sense with the tiling */ 3114 assert(row_pitch_B % tile_info.phys_extent_B.width == 0); 3115 if (tile_info.logical_extent_el.d > 1 || tile_info.logical_extent_el.a > 1) 3116 assert(array_pitch_el_rows % tile_info.logical_extent_el.h == 0); 3117 3118 /* For non-power-of-two formats, we need the address to be both tile and 3119 * element-aligned. The easiest way to achieve this is to work with a tile 3120 * that is three times as wide as the regular tile. 3121 * 3122 * The tile info returned by get_tile_info has a logical size that is an 3123 * integer number of tile_info.format_bpb size elements. To scale the 3124 * tile, we scale up the physical width and then treat the logical tile 3125 * size as if it has bpb size elements. 3126 */ 3127 const uint32_t tile_el_scale = bpb / tile_info.format_bpb; 3128 tile_info.phys_extent_B.width *= tile_el_scale; 3129 3130 /* Compute the offset into the tile */ 3131 *x_offset_el = total_x_offset_el % tile_info.logical_extent_el.w; 3132 *y_offset_el = total_y_offset_el % tile_info.logical_extent_el.h; 3133 *z_offset_el = total_z_offset_el % tile_info.logical_extent_el.d; 3134 *array_offset = total_array_offset % tile_info.logical_extent_el.a; 3135 3136 /* Compute the offset of the tile in units of whole tiles */ 3137 uint32_t x_offset_tl = total_x_offset_el / tile_info.logical_extent_el.w; 3138 uint32_t y_offset_tl = total_y_offset_el / tile_info.logical_extent_el.h; 3139 uint32_t z_offset_tl = total_z_offset_el / tile_info.logical_extent_el.d; 3140 uint32_t a_offset_tl = total_array_offset / tile_info.logical_extent_el.a; 3141 3142 /* Compute an array pitch in number of tiles */ 3143 uint32_t array_pitch_tl_rows = 3144 array_pitch_el_rows / tile_info.logical_extent_el.h; 3145 3146 /* Add the Z and array offset to the Y offset to get a 2D offset */ 3147 y_offset_tl += (z_offset_tl + a_offset_tl) * array_pitch_tl_rows; 3148 3149 *tile_offset_B = 3150 (uint64_t)y_offset_tl * tile_info.phys_extent_B.h * row_pitch_B + 3151 (uint64_t)x_offset_tl * tile_info.phys_extent_B.h * tile_info.phys_extent_B.w; 3152} 3153 3154uint32_t 3155isl_surf_get_depth_format(const struct isl_device *dev, 3156 const struct isl_surf *surf) 3157{ 3158 /* Support for separate stencil buffers began in gfx5. Support for 3159 * interleaved depthstencil buffers ceased in gfx7. The intermediate gens, 3160 * those that supported separate and interleaved stencil, were gfx5 and 3161 * gfx6. 3162 * 3163 * For a list of all available formats, see the Sandybridge PRM >> Volume 3164 * 2 Part 1: 3D/Media - 3D Pipeline >> 3DSTATE_DEPTH_BUFFER >> Surface 3165 * Format (p321). 3166 */ 3167 3168 bool has_stencil = surf->usage & ISL_SURF_USAGE_STENCIL_BIT; 3169 3170 assert(surf->usage & ISL_SURF_USAGE_DEPTH_BIT); 3171 3172 if (has_stencil) 3173 assert(ISL_GFX_VER(dev) < 7); 3174 3175 switch (surf->format) { 3176 default: 3177 unreachable("bad isl depth format"); 3178 case ISL_FORMAT_R32_FLOAT_X8X24_TYPELESS: 3179 assert(ISL_GFX_VER(dev) < 7); 3180 return 0; /* D32_FLOAT_S8X24_UINT */ 3181 case ISL_FORMAT_R32_FLOAT: 3182 assert(!has_stencil); 3183 return 1; /* D32_FLOAT */ 3184 case ISL_FORMAT_R24_UNORM_X8_TYPELESS: 3185 if (has_stencil) { 3186 assert(ISL_GFX_VER(dev) < 7); 3187 return 2; /* D24_UNORM_S8_UINT */ 3188 } else { 3189 assert(ISL_GFX_VER(dev) >= 5); 3190 return 3; /* D24_UNORM_X8_UINT */ 3191 } 3192 case ISL_FORMAT_R16_UNORM: 3193 assert(!has_stencil); 3194 return 5; /* D16_UNORM */ 3195 } 3196} 3197 3198bool 3199isl_swizzle_supports_rendering(const struct intel_device_info *devinfo, 3200 struct isl_swizzle swizzle) 3201{ 3202 if (devinfo->is_haswell) { 3203 /* From the Haswell PRM, 3204 * RENDER_SURFACE_STATE::Shader Channel Select Red 3205 * 3206 * "The Shader channel selects also define which shader channels are 3207 * written to which surface channel. If the Shader channel select is 3208 * SCS_ZERO or SCS_ONE then it is not written to the surface. If the 3209 * shader channel select is SCS_RED it is written to the surface red 3210 * channel and so on. If more than one shader channel select is set 3211 * to the same surface channel only the first shader channel in RGBA 3212 * order will be written." 3213 */ 3214 return true; 3215 } else if (devinfo->ver <= 7) { 3216 /* Ivy Bridge and early doesn't have any swizzling */ 3217 return isl_swizzle_is_identity(swizzle); 3218 } else { 3219 /* From the Sky Lake PRM Vol. 2d, 3220 * RENDER_SURFACE_STATE::Shader Channel Select Red 3221 * 3222 * "For Render Target, Red, Green and Blue Shader Channel Selects 3223 * MUST be such that only valid components can be swapped i.e. only 3224 * change the order of components in the pixel. Any other values for 3225 * these Shader Channel Select fields are not valid for Render 3226 * Targets. This also means that there MUST not be multiple shader 3227 * channels mapped to the same RT channel." 3228 * 3229 * From the Sky Lake PRM Vol. 2d, 3230 * RENDER_SURFACE_STATE::Shader Channel Select Alpha 3231 * 3232 * "For Render Target, this field MUST be programmed to 3233 * value = SCS_ALPHA." 3234 */ 3235 return (swizzle.r == ISL_CHANNEL_SELECT_RED || 3236 swizzle.r == ISL_CHANNEL_SELECT_GREEN || 3237 swizzle.r == ISL_CHANNEL_SELECT_BLUE) && 3238 (swizzle.g == ISL_CHANNEL_SELECT_RED || 3239 swizzle.g == ISL_CHANNEL_SELECT_GREEN || 3240 swizzle.g == ISL_CHANNEL_SELECT_BLUE) && 3241 (swizzle.b == ISL_CHANNEL_SELECT_RED || 3242 swizzle.b == ISL_CHANNEL_SELECT_GREEN || 3243 swizzle.b == ISL_CHANNEL_SELECT_BLUE) && 3244 swizzle.r != swizzle.g && 3245 swizzle.r != swizzle.b && 3246 swizzle.g != swizzle.b && 3247 swizzle.a == ISL_CHANNEL_SELECT_ALPHA; 3248 } 3249} 3250 3251static enum isl_channel_select 3252swizzle_select(enum isl_channel_select chan, struct isl_swizzle swizzle) 3253{ 3254 switch (chan) { 3255 case ISL_CHANNEL_SELECT_ZERO: 3256 case ISL_CHANNEL_SELECT_ONE: 3257 return chan; 3258 case ISL_CHANNEL_SELECT_RED: 3259 return swizzle.r; 3260 case ISL_CHANNEL_SELECT_GREEN: 3261 return swizzle.g; 3262 case ISL_CHANNEL_SELECT_BLUE: 3263 return swizzle.b; 3264 case ISL_CHANNEL_SELECT_ALPHA: 3265 return swizzle.a; 3266 default: 3267 unreachable("Invalid swizzle component"); 3268 } 3269} 3270 3271/** 3272 * Returns the single swizzle that is equivalent to applying the two given 3273 * swizzles in sequence. 3274 */ 3275struct isl_swizzle 3276isl_swizzle_compose(struct isl_swizzle first, struct isl_swizzle second) 3277{ 3278 return (struct isl_swizzle) { 3279 .r = swizzle_select(first.r, second), 3280 .g = swizzle_select(first.g, second), 3281 .b = swizzle_select(first.b, second), 3282 .a = swizzle_select(first.a, second), 3283 }; 3284} 3285 3286/** 3287 * Returns a swizzle that is the pseudo-inverse of this swizzle. 3288 */ 3289struct isl_swizzle 3290isl_swizzle_invert(struct isl_swizzle swizzle) 3291{ 3292 /* Default to zero for channels which do not show up in the swizzle */ 3293 enum isl_channel_select chans[4] = { 3294 ISL_CHANNEL_SELECT_ZERO, 3295 ISL_CHANNEL_SELECT_ZERO, 3296 ISL_CHANNEL_SELECT_ZERO, 3297 ISL_CHANNEL_SELECT_ZERO, 3298 }; 3299 3300 /* We go in ABGR order so that, if there are any duplicates, the first one 3301 * is taken if you look at it in RGBA order. This is what Haswell hardware 3302 * does for render target swizzles. 3303 */ 3304 if ((unsigned)(swizzle.a - ISL_CHANNEL_SELECT_RED) < 4) 3305 chans[swizzle.a - ISL_CHANNEL_SELECT_RED] = ISL_CHANNEL_SELECT_ALPHA; 3306 if ((unsigned)(swizzle.b - ISL_CHANNEL_SELECT_RED) < 4) 3307 chans[swizzle.b - ISL_CHANNEL_SELECT_RED] = ISL_CHANNEL_SELECT_BLUE; 3308 if ((unsigned)(swizzle.g - ISL_CHANNEL_SELECT_RED) < 4) 3309 chans[swizzle.g - ISL_CHANNEL_SELECT_RED] = ISL_CHANNEL_SELECT_GREEN; 3310 if ((unsigned)(swizzle.r - ISL_CHANNEL_SELECT_RED) < 4) 3311 chans[swizzle.r - ISL_CHANNEL_SELECT_RED] = ISL_CHANNEL_SELECT_RED; 3312 3313 return (struct isl_swizzle) { chans[0], chans[1], chans[2], chans[3] }; 3314} 3315 3316/** Applies an inverse swizzle to a color value */ 3317union isl_color_value 3318isl_color_value_swizzle_inv(union isl_color_value src, 3319 struct isl_swizzle swizzle) 3320{ 3321 union isl_color_value dst = { .u32 = { 0, } }; 3322 3323 /* We assign colors in ABGR order so that the first one will be taken in 3324 * RGBA precedence order. According to the PRM docs for shader channel 3325 * select, this matches Haswell hardware behavior. 3326 */ 3327 if ((unsigned)(swizzle.a - ISL_CHANNEL_SELECT_RED) < 4) 3328 dst.u32[swizzle.a - ISL_CHANNEL_SELECT_RED] = src.u32[3]; 3329 if ((unsigned)(swizzle.b - ISL_CHANNEL_SELECT_RED) < 4) 3330 dst.u32[swizzle.b - ISL_CHANNEL_SELECT_RED] = src.u32[2]; 3331 if ((unsigned)(swizzle.g - ISL_CHANNEL_SELECT_RED) < 4) 3332 dst.u32[swizzle.g - ISL_CHANNEL_SELECT_RED] = src.u32[1]; 3333 if ((unsigned)(swizzle.r - ISL_CHANNEL_SELECT_RED) < 4) 3334 dst.u32[swizzle.r - ISL_CHANNEL_SELECT_RED] = src.u32[0]; 3335 3336 return dst; 3337} 3338 3339uint8_t 3340isl_format_get_aux_map_encoding(enum isl_format format) 3341{ 3342 switch(format) { 3343 case ISL_FORMAT_R32G32B32A32_FLOAT: return 0x11; 3344 case ISL_FORMAT_R32G32B32X32_FLOAT: return 0x11; 3345 case ISL_FORMAT_R32G32B32A32_SINT: return 0x12; 3346 case ISL_FORMAT_R32G32B32A32_UINT: return 0x13; 3347 case ISL_FORMAT_R16G16B16A16_UNORM: return 0x14; 3348 case ISL_FORMAT_R16G16B16A16_SNORM: return 0x15; 3349 case ISL_FORMAT_R16G16B16A16_SINT: return 0x16; 3350 case ISL_FORMAT_R16G16B16A16_UINT: return 0x17; 3351 case ISL_FORMAT_R16G16B16A16_FLOAT: return 0x10; 3352 case ISL_FORMAT_R16G16B16X16_FLOAT: return 0x10; 3353 case ISL_FORMAT_R32G32_FLOAT: return 0x11; 3354 case ISL_FORMAT_R32G32_SINT: return 0x12; 3355 case ISL_FORMAT_R32G32_UINT: return 0x13; 3356 case ISL_FORMAT_B8G8R8A8_UNORM: return 0xA; 3357 case ISL_FORMAT_B8G8R8X8_UNORM: return 0xA; 3358 case ISL_FORMAT_B8G8R8A8_UNORM_SRGB: return 0xA; 3359 case ISL_FORMAT_B8G8R8X8_UNORM_SRGB: return 0xA; 3360 case ISL_FORMAT_R10G10B10A2_UNORM: return 0x18; 3361 case ISL_FORMAT_R10G10B10A2_UNORM_SRGB: return 0x18; 3362 case ISL_FORMAT_R10G10B10_FLOAT_A2_UNORM: return 0x19; 3363 case ISL_FORMAT_R10G10B10A2_UINT: return 0x1A; 3364 case ISL_FORMAT_R8G8B8A8_UNORM: return 0xA; 3365 case ISL_FORMAT_R8G8B8A8_UNORM_SRGB: return 0xA; 3366 case ISL_FORMAT_R8G8B8A8_SNORM: return 0x1B; 3367 case ISL_FORMAT_R8G8B8A8_SINT: return 0x1C; 3368 case ISL_FORMAT_R8G8B8A8_UINT: return 0x1D; 3369 case ISL_FORMAT_R16G16_UNORM: return 0x14; 3370 case ISL_FORMAT_R16G16_SNORM: return 0x15; 3371 case ISL_FORMAT_R16G16_SINT: return 0x16; 3372 case ISL_FORMAT_R16G16_UINT: return 0x17; 3373 case ISL_FORMAT_R16G16_FLOAT: return 0x10; 3374 case ISL_FORMAT_B10G10R10A2_UNORM: return 0x18; 3375 case ISL_FORMAT_B10G10R10A2_UNORM_SRGB: return 0x18; 3376 case ISL_FORMAT_R11G11B10_FLOAT: return 0x1E; 3377 case ISL_FORMAT_R32_SINT: return 0x12; 3378 case ISL_FORMAT_R32_UINT: return 0x13; 3379 case ISL_FORMAT_R32_FLOAT: return 0x11; 3380 case ISL_FORMAT_R24_UNORM_X8_TYPELESS: return 0x13; 3381 case ISL_FORMAT_B5G6R5_UNORM: return 0xA; 3382 case ISL_FORMAT_B5G6R5_UNORM_SRGB: return 0xA; 3383 case ISL_FORMAT_B5G5R5A1_UNORM: return 0xA; 3384 case ISL_FORMAT_B5G5R5A1_UNORM_SRGB: return 0xA; 3385 case ISL_FORMAT_B4G4R4A4_UNORM: return 0xA; 3386 case ISL_FORMAT_B4G4R4A4_UNORM_SRGB: return 0xA; 3387 case ISL_FORMAT_R8G8_UNORM: return 0xA; 3388 case ISL_FORMAT_R8G8_SNORM: return 0x1B; 3389 case ISL_FORMAT_R8G8_SINT: return 0x1C; 3390 case ISL_FORMAT_R8G8_UINT: return 0x1D; 3391 case ISL_FORMAT_R16_UNORM: return 0x14; 3392 case ISL_FORMAT_R16_SNORM: return 0x15; 3393 case ISL_FORMAT_R16_SINT: return 0x16; 3394 case ISL_FORMAT_R16_UINT: return 0x17; 3395 case ISL_FORMAT_R16_FLOAT: return 0x10; 3396 case ISL_FORMAT_B5G5R5X1_UNORM: return 0xA; 3397 case ISL_FORMAT_B5G5R5X1_UNORM_SRGB: return 0xA; 3398 case ISL_FORMAT_A1B5G5R5_UNORM: return 0xA; 3399 case ISL_FORMAT_A4B4G4R4_UNORM: return 0xA; 3400 case ISL_FORMAT_R8_UNORM: return 0xA; 3401 case ISL_FORMAT_R8_SNORM: return 0x1B; 3402 case ISL_FORMAT_R8_SINT: return 0x1C; 3403 case ISL_FORMAT_R8_UINT: return 0x1D; 3404 case ISL_FORMAT_A8_UNORM: return 0xA; 3405 case ISL_FORMAT_PLANAR_420_8: return 0xF; 3406 case ISL_FORMAT_PLANAR_420_10: return 0x7; 3407 case ISL_FORMAT_PLANAR_420_12: return 0x8; 3408 case ISL_FORMAT_PLANAR_420_16: return 0x8; 3409 case ISL_FORMAT_YCRCB_NORMAL: return 0x3; 3410 case ISL_FORMAT_YCRCB_SWAPY: return 0xB; 3411 default: 3412 unreachable("Unsupported aux-map format!"); 3413 return 0; 3414 } 3415} 3416 3417/* 3418 * Returns compression format encoding for Unified Lossless Compression 3419 */ 3420uint8_t 3421isl_get_render_compression_format(enum isl_format format) 3422{ 3423 /* From the Bspec, Enumeration_RenderCompressionFormat section (53726): */ 3424 switch(format) { 3425 case ISL_FORMAT_R32G32B32A32_FLOAT: 3426 case ISL_FORMAT_R32G32B32X32_FLOAT: 3427 case ISL_FORMAT_R32G32B32A32_SINT: 3428 return 0x0; 3429 case ISL_FORMAT_R32G32B32A32_UINT: 3430 return 0x1; 3431 case ISL_FORMAT_R32G32_FLOAT: 3432 case ISL_FORMAT_R32G32_SINT: 3433 return 0x2; 3434 case ISL_FORMAT_R32G32_UINT: 3435 return 0x3; 3436 case ISL_FORMAT_R16G16B16A16_UNORM: 3437 case ISL_FORMAT_R16G16B16X16_UNORM: 3438 case ISL_FORMAT_R16G16B16A16_UINT: 3439 return 0x4; 3440 case ISL_FORMAT_R16G16B16A16_SNORM: 3441 case ISL_FORMAT_R16G16B16A16_SINT: 3442 case ISL_FORMAT_R16G16B16A16_FLOAT: 3443 case ISL_FORMAT_R16G16B16X16_FLOAT: 3444 return 0x5; 3445 case ISL_FORMAT_R16G16_UNORM: 3446 case ISL_FORMAT_R16G16_UINT: 3447 return 0x6; 3448 case ISL_FORMAT_R16G16_SNORM: 3449 case ISL_FORMAT_R16G16_SINT: 3450 case ISL_FORMAT_R16G16_FLOAT: 3451 return 0x7; 3452 case ISL_FORMAT_B8G8R8A8_UNORM: 3453 case ISL_FORMAT_B8G8R8X8_UNORM: 3454 case ISL_FORMAT_B8G8R8A8_UNORM_SRGB: 3455 case ISL_FORMAT_B8G8R8X8_UNORM_SRGB: 3456 case ISL_FORMAT_R8G8B8A8_UNORM: 3457 case ISL_FORMAT_R8G8B8X8_UNORM: 3458 case ISL_FORMAT_R8G8B8A8_UNORM_SRGB: 3459 case ISL_FORMAT_R8G8B8X8_UNORM_SRGB: 3460 case ISL_FORMAT_R8G8B8A8_UINT: 3461 return 0x8; 3462 case ISL_FORMAT_R8G8B8A8_SNORM: 3463 case ISL_FORMAT_R8G8B8A8_SINT: 3464 return 0x9; 3465 case ISL_FORMAT_B5G6R5_UNORM: 3466 case ISL_FORMAT_B5G6R5_UNORM_SRGB: 3467 case ISL_FORMAT_B5G5R5A1_UNORM: 3468 case ISL_FORMAT_B5G5R5A1_UNORM_SRGB: 3469 case ISL_FORMAT_B4G4R4A4_UNORM: 3470 case ISL_FORMAT_B4G4R4A4_UNORM_SRGB: 3471 case ISL_FORMAT_B5G5R5X1_UNORM: 3472 case ISL_FORMAT_B5G5R5X1_UNORM_SRGB: 3473 case ISL_FORMAT_A1B5G5R5_UNORM: 3474 case ISL_FORMAT_A4B4G4R4_UNORM: 3475 case ISL_FORMAT_R8G8_UNORM: 3476 case ISL_FORMAT_R8G8_UINT: 3477 return 0xA; 3478 case ISL_FORMAT_R8G8_SNORM: 3479 case ISL_FORMAT_R8G8_SINT: 3480 return 0xB; 3481 case ISL_FORMAT_R10G10B10A2_UNORM: 3482 case ISL_FORMAT_R10G10B10A2_UNORM_SRGB: 3483 case ISL_FORMAT_R10G10B10_FLOAT_A2_UNORM: 3484 case ISL_FORMAT_R10G10B10A2_UINT: 3485 case ISL_FORMAT_B10G10R10A2_UNORM: 3486 case ISL_FORMAT_B10G10R10X2_UNORM: 3487 case ISL_FORMAT_B10G10R10A2_UNORM_SRGB: 3488 return 0xC; 3489 case ISL_FORMAT_R11G11B10_FLOAT: 3490 return 0xD; 3491 case ISL_FORMAT_R32_SINT: 3492 case ISL_FORMAT_R32_FLOAT: 3493 return 0x10; 3494 case ISL_FORMAT_R32_UINT: 3495 case ISL_FORMAT_R24_UNORM_X8_TYPELESS: 3496 return 0x11; 3497 case ISL_FORMAT_R16_UNORM: 3498 case ISL_FORMAT_R16_UINT: 3499 return 0x14; 3500 case ISL_FORMAT_R16_SNORM: 3501 case ISL_FORMAT_R16_SINT: 3502 case ISL_FORMAT_R16_FLOAT: 3503 return 0x15; 3504 case ISL_FORMAT_R8_UNORM: 3505 case ISL_FORMAT_R8_UINT: 3506 case ISL_FORMAT_A8_UNORM: 3507 return 0x18; 3508 case ISL_FORMAT_R8_SNORM: 3509 case ISL_FORMAT_R8_SINT: 3510 return 0x19; 3511 default: 3512 unreachable("Unsupported render compression format!"); 3513 return 0; 3514 } 3515} 3516