1/* 2 * Copyright 2015 Intel Corporation 3 * 4 * Permission is hereby granted, free of charge, to any person obtaining a 5 * copy of this software and associated documentation files (the "Software"), 6 * to deal in the Software without restriction, including without limitation 7 * the rights to use, copy, modify, merge, publish, distribute, sublicense, 8 * and/or sell copies of the Software, and to permit persons to whom the 9 * Software is furnished to do so, subject to the following conditions: 10 * 11 * The above copyright notice and this permission notice (including the next 12 * paragraph) shall be included in all copies or substantial portions of the 13 * Software. 14 * 15 * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR 16 * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, 17 * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL 18 * THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER 19 * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING 20 * FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS 21 * IN THE SOFTWARE. 22 */ 23 24#include <assert.h> 25#include <stdarg.h> 26#include <stdio.h> 27 28#include "genxml/genX_bits.h" 29 30#include "isl.h" 31#include "isl_gen4.h" 32#include "isl_gen6.h" 33#include "isl_gen7.h" 34#include "isl_gen8.h" 35#include "isl_gen9.h" 36#include "isl_priv.h" 37 38void 39isl_memcpy_linear_to_tiled(uint32_t xt1, uint32_t xt2, 40 uint32_t yt1, uint32_t yt2, 41 char *dst, const char *src, 42 uint32_t dst_pitch, int32_t src_pitch, 43 bool has_swizzling, 44 enum isl_tiling tiling, 45 isl_memcpy_type copy_type) 46{ 47#ifdef USE_SSE41 48 if (copy_type == ISL_MEMCPY_STREAMING_LOAD) { 49 _isl_memcpy_linear_to_tiled_sse41( 50 xt1, xt2, yt1, yt2, dst, src, dst_pitch, src_pitch, has_swizzling, 51 tiling, copy_type); 52 return; 53 } 54#endif 55 56 _isl_memcpy_linear_to_tiled( 57 xt1, xt2, yt1, yt2, dst, src, dst_pitch, src_pitch, has_swizzling, 58 tiling, copy_type); 59} 60 61void 62isl_memcpy_tiled_to_linear(uint32_t xt1, uint32_t xt2, 63 uint32_t yt1, uint32_t yt2, 64 char *dst, const char *src, 65 int32_t dst_pitch, uint32_t src_pitch, 66 bool has_swizzling, 67 enum isl_tiling tiling, 68 isl_memcpy_type copy_type) 69{ 70#ifdef USE_SSE41 71 if (copy_type == ISL_MEMCPY_STREAMING_LOAD) { 72 _isl_memcpy_tiled_to_linear_sse41( 73 xt1, xt2, yt1, yt2, dst, src, dst_pitch, src_pitch, has_swizzling, 74 tiling, copy_type); 75 return; 76 } 77#endif 78 79 _isl_memcpy_tiled_to_linear( 80 xt1, xt2, yt1, yt2, dst, src, dst_pitch, src_pitch, has_swizzling, 81 tiling, copy_type); 82} 83 84void PRINTFLIKE(3, 4) UNUSED 85__isl_finishme(const char *file, int line, const char *fmt, ...) 86{ 87 va_list ap; 88 char buf[512]; 89 90 va_start(ap, fmt); 91 vsnprintf(buf, sizeof(buf), fmt, ap); 92 va_end(ap); 93 94 fprintf(stderr, "%s:%d: FINISHME: %s\n", file, line, buf); 95} 96 97void 98isl_device_init(struct isl_device *dev, 99 const struct gen_device_info *info, 100 bool has_bit6_swizzling) 101{ 102 /* Gen8+ don't have bit6 swizzling, ensure callsite is not confused. */ 103 assert(!(has_bit6_swizzling && info->gen >= 8)); 104 105 dev->info = info; 106 dev->use_separate_stencil = ISL_DEV_GEN(dev) >= 6; 107 dev->has_bit6_swizzling = has_bit6_swizzling; 108 109 /* The ISL_DEV macros may be defined in the CFLAGS, thus hardcoding some 110 * device properties at buildtime. Verify that the macros with the device 111 * properties chosen during runtime. 112 */ 113 ISL_DEV_GEN_SANITIZE(dev); 114 ISL_DEV_USE_SEPARATE_STENCIL_SANITIZE(dev); 115 116 /* Did we break hiz or stencil? */ 117 if (ISL_DEV_USE_SEPARATE_STENCIL(dev)) 118 assert(info->has_hiz_and_separate_stencil); 119 if (info->must_use_separate_stencil) 120 assert(ISL_DEV_USE_SEPARATE_STENCIL(dev)); 121 122 dev->ss.size = RENDER_SURFACE_STATE_length(info) * 4; 123 dev->ss.align = isl_align(dev->ss.size, 32); 124 125 dev->ss.clear_color_state_size = 126 isl_align(CLEAR_COLOR_length(info) * 4, 64); 127 dev->ss.clear_color_state_offset = 128 RENDER_SURFACE_STATE_ClearValueAddress_start(info) / 32 * 4; 129 130 dev->ss.clear_value_size = 131 isl_align(RENDER_SURFACE_STATE_RedClearColor_bits(info) + 132 RENDER_SURFACE_STATE_GreenClearColor_bits(info) + 133 RENDER_SURFACE_STATE_BlueClearColor_bits(info) + 134 RENDER_SURFACE_STATE_AlphaClearColor_bits(info), 32) / 8; 135 136 dev->ss.clear_value_offset = 137 RENDER_SURFACE_STATE_RedClearColor_start(info) / 32 * 4; 138 139 assert(RENDER_SURFACE_STATE_SurfaceBaseAddress_start(info) % 8 == 0); 140 dev->ss.addr_offset = 141 RENDER_SURFACE_STATE_SurfaceBaseAddress_start(info) / 8; 142 143 /* The "Auxiliary Surface Base Address" field starts a bit higher up 144 * because the bottom 12 bits are used for other things. Round down to 145 * the nearest dword before. 146 */ 147 dev->ss.aux_addr_offset = 148 (RENDER_SURFACE_STATE_AuxiliarySurfaceBaseAddress_start(info) & ~31) / 8; 149 150 dev->ds.size = _3DSTATE_DEPTH_BUFFER_length(info) * 4; 151 assert(_3DSTATE_DEPTH_BUFFER_SurfaceBaseAddress_start(info) % 8 == 0); 152 dev->ds.depth_offset = 153 _3DSTATE_DEPTH_BUFFER_SurfaceBaseAddress_start(info) / 8; 154 155 if (dev->use_separate_stencil) { 156 dev->ds.size += _3DSTATE_STENCIL_BUFFER_length(info) * 4 + 157 _3DSTATE_HIER_DEPTH_BUFFER_length(info) * 4 + 158 _3DSTATE_CLEAR_PARAMS_length(info) * 4; 159 160 assert(_3DSTATE_STENCIL_BUFFER_SurfaceBaseAddress_start(info) % 8 == 0); 161 dev->ds.stencil_offset = 162 _3DSTATE_DEPTH_BUFFER_length(info) * 4 + 163 _3DSTATE_STENCIL_BUFFER_SurfaceBaseAddress_start(info) / 8; 164 165 assert(_3DSTATE_HIER_DEPTH_BUFFER_SurfaceBaseAddress_start(info) % 8 == 0); 166 dev->ds.hiz_offset = 167 _3DSTATE_DEPTH_BUFFER_length(info) * 4 + 168 _3DSTATE_STENCIL_BUFFER_length(info) * 4 + 169 _3DSTATE_HIER_DEPTH_BUFFER_SurfaceBaseAddress_start(info) / 8; 170 } else { 171 dev->ds.stencil_offset = 0; 172 dev->ds.hiz_offset = 0; 173 } 174} 175 176/** 177 * @brief Query the set of multisamples supported by the device. 178 * 179 * This function always returns non-zero, as ISL_SAMPLE_COUNT_1_BIT is always 180 * supported. 181 */ 182isl_sample_count_mask_t ATTRIBUTE_CONST 183isl_device_get_sample_counts(struct isl_device *dev) 184{ 185 if (ISL_DEV_GEN(dev) >= 9) { 186 return ISL_SAMPLE_COUNT_1_BIT | 187 ISL_SAMPLE_COUNT_2_BIT | 188 ISL_SAMPLE_COUNT_4_BIT | 189 ISL_SAMPLE_COUNT_8_BIT | 190 ISL_SAMPLE_COUNT_16_BIT; 191 } else if (ISL_DEV_GEN(dev) >= 8) { 192 return ISL_SAMPLE_COUNT_1_BIT | 193 ISL_SAMPLE_COUNT_2_BIT | 194 ISL_SAMPLE_COUNT_4_BIT | 195 ISL_SAMPLE_COUNT_8_BIT; 196 } else if (ISL_DEV_GEN(dev) >= 7) { 197 return ISL_SAMPLE_COUNT_1_BIT | 198 ISL_SAMPLE_COUNT_4_BIT | 199 ISL_SAMPLE_COUNT_8_BIT; 200 } else if (ISL_DEV_GEN(dev) >= 6) { 201 return ISL_SAMPLE_COUNT_1_BIT | 202 ISL_SAMPLE_COUNT_4_BIT; 203 } else { 204 return ISL_SAMPLE_COUNT_1_BIT; 205 } 206} 207 208/** 209 * @param[out] info is written only on success 210 */ 211static void 212isl_tiling_get_info(enum isl_tiling tiling, 213 uint32_t format_bpb, 214 struct isl_tile_info *tile_info) 215{ 216 const uint32_t bs = format_bpb / 8; 217 struct isl_extent2d logical_el, phys_B; 218 219 if (tiling != ISL_TILING_LINEAR && !isl_is_pow2(format_bpb)) { 220 /* It is possible to have non-power-of-two formats in a tiled buffer. 221 * The easiest way to handle this is to treat the tile as if it is three 222 * times as wide. This way no pixel will ever cross a tile boundary. 223 * This really only works on legacy X and Y tiling formats. 224 */ 225 assert(tiling == ISL_TILING_X || tiling == ISL_TILING_Y0); 226 assert(bs % 3 == 0 && isl_is_pow2(format_bpb / 3)); 227 isl_tiling_get_info(tiling, format_bpb / 3, tile_info); 228 return; 229 } 230 231 switch (tiling) { 232 case ISL_TILING_LINEAR: 233 assert(bs > 0); 234 logical_el = isl_extent2d(1, 1); 235 phys_B = isl_extent2d(bs, 1); 236 break; 237 238 case ISL_TILING_X: 239 assert(bs > 0); 240 logical_el = isl_extent2d(512 / bs, 8); 241 phys_B = isl_extent2d(512, 8); 242 break; 243 244 case ISL_TILING_Y0: 245 assert(bs > 0); 246 logical_el = isl_extent2d(128 / bs, 32); 247 phys_B = isl_extent2d(128, 32); 248 break; 249 250 case ISL_TILING_W: 251 assert(bs == 1); 252 logical_el = isl_extent2d(64, 64); 253 /* From the Broadwell PRM Vol 2d, RENDER_SURFACE_STATE::SurfacePitch: 254 * 255 * "If the surface is a stencil buffer (and thus has Tile Mode set 256 * to TILEMODE_WMAJOR), the pitch must be set to 2x the value 257 * computed based on width, as the stencil buffer is stored with two 258 * rows interleaved." 259 * 260 * This, together with the fact that stencil buffers are referred to as 261 * being Y-tiled in the PRMs for older hardware implies that the 262 * physical size of a W-tile is actually the same as for a Y-tile. 263 */ 264 phys_B = isl_extent2d(128, 32); 265 break; 266 267 case ISL_TILING_Yf: 268 case ISL_TILING_Ys: { 269 bool is_Ys = tiling == ISL_TILING_Ys; 270 271 assert(bs > 0); 272 unsigned width = 1 << (6 + (ffs(bs) / 2) + (2 * is_Ys)); 273 unsigned height = 1 << (6 - (ffs(bs) / 2) + (2 * is_Ys)); 274 275 logical_el = isl_extent2d(width / bs, height); 276 phys_B = isl_extent2d(width, height); 277 break; 278 } 279 280 case ISL_TILING_HIZ: 281 /* HiZ buffers are required to have ISL_FORMAT_HIZ which is an 8x4 282 * 128bpb format. The tiling has the same physical dimensions as 283 * Y-tiling but actually has two HiZ columns per Y-tiled column. 284 */ 285 assert(bs == 16); 286 logical_el = isl_extent2d(16, 16); 287 phys_B = isl_extent2d(128, 32); 288 break; 289 290 case ISL_TILING_CCS: 291 /* CCS surfaces are required to have one of the GENX_CCS_* formats which 292 * have a block size of 1 or 2 bits per block and each CCS element 293 * corresponds to one cache-line pair in the main surface. From the Sky 294 * Lake PRM Vol. 12 in the section on planes: 295 * 296 * "The Color Control Surface (CCS) contains the compression status 297 * of the cache-line pairs. The compression state of the cache-line 298 * pair is specified by 2 bits in the CCS. Each CCS cache-line 299 * represents an area on the main surface of 16x16 sets of 128 byte 300 * Y-tiled cache-line-pairs. CCS is always Y tiled." 301 * 302 * The CCS being Y-tiled implies that it's an 8x8 grid of cache-lines. 303 * Since each cache line corresponds to a 16x16 set of cache-line pairs, 304 * that yields total tile area of 128x128 cache-line pairs or CCS 305 * elements. On older hardware, each CCS element is 1 bit and the tile 306 * is 128x256 elements. 307 */ 308 assert(format_bpb == 1 || format_bpb == 2); 309 logical_el = isl_extent2d(128, 256 / format_bpb); 310 phys_B = isl_extent2d(128, 32); 311 break; 312 313 default: 314 unreachable("not reached"); 315 } /* end switch */ 316 317 *tile_info = (struct isl_tile_info) { 318 .tiling = tiling, 319 .format_bpb = format_bpb, 320 .logical_extent_el = logical_el, 321 .phys_extent_B = phys_B, 322 }; 323} 324 325bool 326isl_color_value_is_zero(union isl_color_value value, 327 enum isl_format format) 328{ 329 const struct isl_format_layout *fmtl = isl_format_get_layout(format); 330 331#define RETURN_FALSE_IF_NOT_0(c, i) \ 332 if (fmtl->channels.c.bits && value.u32[i] != 0) \ 333 return false 334 335 RETURN_FALSE_IF_NOT_0(r, 0); 336 RETURN_FALSE_IF_NOT_0(g, 1); 337 RETURN_FALSE_IF_NOT_0(b, 2); 338 RETURN_FALSE_IF_NOT_0(a, 3); 339 340#undef RETURN_FALSE_IF_NOT_0 341 342 return true; 343} 344 345bool 346isl_color_value_is_zero_one(union isl_color_value value, 347 enum isl_format format) 348{ 349 const struct isl_format_layout *fmtl = isl_format_get_layout(format); 350 351#define RETURN_FALSE_IF_NOT_0_1(c, i, field) \ 352 if (fmtl->channels.c.bits && value.field[i] != 0 && value.field[i] != 1) \ 353 return false 354 355 if (isl_format_has_int_channel(format)) { 356 RETURN_FALSE_IF_NOT_0_1(r, 0, u32); 357 RETURN_FALSE_IF_NOT_0_1(g, 1, u32); 358 RETURN_FALSE_IF_NOT_0_1(b, 2, u32); 359 RETURN_FALSE_IF_NOT_0_1(a, 3, u32); 360 } else { 361 RETURN_FALSE_IF_NOT_0_1(r, 0, f32); 362 RETURN_FALSE_IF_NOT_0_1(g, 1, f32); 363 RETURN_FALSE_IF_NOT_0_1(b, 2, f32); 364 RETURN_FALSE_IF_NOT_0_1(a, 3, f32); 365 } 366 367#undef RETURN_FALSE_IF_NOT_0_1 368 369 return true; 370} 371 372/** 373 * @param[out] tiling is set only on success 374 */ 375static bool 376isl_surf_choose_tiling(const struct isl_device *dev, 377 const struct isl_surf_init_info *restrict info, 378 enum isl_tiling *tiling) 379{ 380 isl_tiling_flags_t tiling_flags = info->tiling_flags; 381 382 /* HiZ surfaces always use the HiZ tiling */ 383 if (info->usage & ISL_SURF_USAGE_HIZ_BIT) { 384 assert(info->format == ISL_FORMAT_HIZ); 385 assert(tiling_flags == ISL_TILING_HIZ_BIT); 386 *tiling = ISL_TILING_HIZ; 387 return true; 388 } 389 390 /* CCS surfaces always use the CCS tiling */ 391 if (info->usage & ISL_SURF_USAGE_CCS_BIT) { 392 assert(isl_format_get_layout(info->format)->txc == ISL_TXC_CCS); 393 assert(tiling_flags == ISL_TILING_CCS_BIT); 394 *tiling = ISL_TILING_CCS; 395 return true; 396 } 397 398 if (ISL_DEV_GEN(dev) >= 6) { 399 isl_gen6_filter_tiling(dev, info, &tiling_flags); 400 } else { 401 isl_gen4_filter_tiling(dev, info, &tiling_flags); 402 } 403 404 #define CHOOSE(__tiling) \ 405 do { \ 406 if (tiling_flags & (1u << (__tiling))) { \ 407 *tiling = (__tiling); \ 408 return true; \ 409 } \ 410 } while (0) 411 412 /* Of the tiling modes remaining, choose the one that offers the best 413 * performance. 414 */ 415 416 if (info->dim == ISL_SURF_DIM_1D) { 417 /* Prefer linear for 1D surfaces because they do not benefit from 418 * tiling. To the contrary, tiling leads to wasted memory and poor 419 * memory locality due to the swizzling and alignment restrictions 420 * required in tiled surfaces. 421 */ 422 CHOOSE(ISL_TILING_LINEAR); 423 } 424 425 CHOOSE(ISL_TILING_Ys); 426 CHOOSE(ISL_TILING_Yf); 427 CHOOSE(ISL_TILING_Y0); 428 CHOOSE(ISL_TILING_X); 429 CHOOSE(ISL_TILING_W); 430 CHOOSE(ISL_TILING_LINEAR); 431 432 #undef CHOOSE 433 434 /* No tiling mode accomodates the inputs. */ 435 return false; 436} 437 438static bool 439isl_choose_msaa_layout(const struct isl_device *dev, 440 const struct isl_surf_init_info *info, 441 enum isl_tiling tiling, 442 enum isl_msaa_layout *msaa_layout) 443{ 444 if (ISL_DEV_GEN(dev) >= 8) { 445 return isl_gen8_choose_msaa_layout(dev, info, tiling, msaa_layout); 446 } else if (ISL_DEV_GEN(dev) >= 7) { 447 return isl_gen7_choose_msaa_layout(dev, info, tiling, msaa_layout); 448 } else if (ISL_DEV_GEN(dev) >= 6) { 449 return isl_gen6_choose_msaa_layout(dev, info, tiling, msaa_layout); 450 } else { 451 return isl_gen4_choose_msaa_layout(dev, info, tiling, msaa_layout); 452 } 453} 454 455struct isl_extent2d 456isl_get_interleaved_msaa_px_size_sa(uint32_t samples) 457{ 458 assert(isl_is_pow2(samples)); 459 460 /* From the Broadwell PRM >> Volume 5: Memory Views >> Computing Mip Level 461 * Sizes (p133): 462 * 463 * If the surface is multisampled and it is a depth or stencil surface 464 * or Multisampled Surface StorageFormat in SURFACE_STATE is 465 * MSFMT_DEPTH_STENCIL, W_L and H_L must be adjusted as follows before 466 * proceeding: [...] 467 */ 468 return (struct isl_extent2d) { 469 .width = 1 << ((ffs(samples) - 0) / 2), 470 .height = 1 << ((ffs(samples) - 1) / 2), 471 }; 472} 473 474static void 475isl_msaa_interleaved_scale_px_to_sa(uint32_t samples, 476 uint32_t *width, uint32_t *height) 477{ 478 const struct isl_extent2d px_size_sa = 479 isl_get_interleaved_msaa_px_size_sa(samples); 480 481 if (width) 482 *width = isl_align(*width, 2) * px_size_sa.width; 483 if (height) 484 *height = isl_align(*height, 2) * px_size_sa.height; 485} 486 487static enum isl_array_pitch_span 488isl_choose_array_pitch_span(const struct isl_device *dev, 489 const struct isl_surf_init_info *restrict info, 490 enum isl_dim_layout dim_layout, 491 const struct isl_extent4d *phys_level0_sa) 492{ 493 switch (dim_layout) { 494 case ISL_DIM_LAYOUT_GEN9_1D: 495 case ISL_DIM_LAYOUT_GEN4_2D: 496 if (ISL_DEV_GEN(dev) >= 8) { 497 /* QPitch becomes programmable in Broadwell. So choose the 498 * most compact QPitch possible in order to conserve memory. 499 * 500 * From the Broadwell PRM >> Volume 2d: Command Reference: Structures 501 * >> RENDER_SURFACE_STATE Surface QPitch (p325): 502 * 503 * - Software must ensure that this field is set to a value 504 * sufficiently large such that the array slices in the surface 505 * do not overlap. Refer to the Memory Data Formats section for 506 * information on how surfaces are stored in memory. 507 * 508 * - This field specifies the distance in rows between array 509 * slices. It is used only in the following cases: 510 * 511 * - Surface Array is enabled OR 512 * - Number of Mulitsamples is not NUMSAMPLES_1 and 513 * Multisampled Surface Storage Format set to MSFMT_MSS OR 514 * - Surface Type is SURFTYPE_CUBE 515 */ 516 return ISL_ARRAY_PITCH_SPAN_COMPACT; 517 } else if (ISL_DEV_GEN(dev) >= 7) { 518 /* Note that Ivybridge introduces 519 * RENDER_SURFACE_STATE.SurfaceArraySpacing, which provides the 520 * driver more control over the QPitch. 521 */ 522 523 if (phys_level0_sa->array_len == 1) { 524 /* The hardware will never use the QPitch. So choose the most 525 * compact QPitch possible in order to conserve memory. 526 */ 527 return ISL_ARRAY_PITCH_SPAN_COMPACT; 528 } 529 530 if (isl_surf_usage_is_depth_or_stencil(info->usage) || 531 (info->usage & ISL_SURF_USAGE_HIZ_BIT)) { 532 /* From the Ivybridge PRM >> Volume 1 Part 1: Graphics Core >> 533 * Section 6.18.4.7: Surface Arrays (p112): 534 * 535 * If Surface Array Spacing is set to ARYSPC_FULL (note that 536 * the depth buffer and stencil buffer have an implied value of 537 * ARYSPC_FULL): 538 */ 539 return ISL_ARRAY_PITCH_SPAN_FULL; 540 } 541 542 if (info->levels == 1) { 543 /* We are able to set RENDER_SURFACE_STATE.SurfaceArraySpacing 544 * to ARYSPC_LOD0. 545 */ 546 return ISL_ARRAY_PITCH_SPAN_COMPACT; 547 } 548 549 return ISL_ARRAY_PITCH_SPAN_FULL; 550 } else if ((ISL_DEV_GEN(dev) == 5 || ISL_DEV_GEN(dev) == 6) && 551 ISL_DEV_USE_SEPARATE_STENCIL(dev) && 552 isl_surf_usage_is_stencil(info->usage)) { 553 /* [ILK-SNB] Errata from the Sandy Bridge PRM >> Volume 4 Part 1: 554 * Graphics Core >> Section 7.18.3.7: Surface Arrays: 555 * 556 * The separate stencil buffer does not support mip mapping, thus 557 * the storage for LODs other than LOD 0 is not needed. 558 */ 559 assert(info->levels == 1); 560 return ISL_ARRAY_PITCH_SPAN_COMPACT; 561 } else { 562 if ((ISL_DEV_GEN(dev) == 5 || ISL_DEV_GEN(dev) == 6) && 563 ISL_DEV_USE_SEPARATE_STENCIL(dev) && 564 isl_surf_usage_is_stencil(info->usage)) { 565 /* [ILK-SNB] Errata from the Sandy Bridge PRM >> Volume 4 Part 1: 566 * Graphics Core >> Section 7.18.3.7: Surface Arrays: 567 * 568 * The separate stencil buffer does not support mip mapping, 569 * thus the storage for LODs other than LOD 0 is not needed. 570 */ 571 assert(info->levels == 1); 572 assert(phys_level0_sa->array_len == 1); 573 return ISL_ARRAY_PITCH_SPAN_COMPACT; 574 } 575 576 if (phys_level0_sa->array_len == 1) { 577 /* The hardware will never use the QPitch. So choose the most 578 * compact QPitch possible in order to conserve memory. 579 */ 580 return ISL_ARRAY_PITCH_SPAN_COMPACT; 581 } 582 583 return ISL_ARRAY_PITCH_SPAN_FULL; 584 } 585 586 case ISL_DIM_LAYOUT_GEN4_3D: 587 /* The hardware will never use the QPitch. So choose the most 588 * compact QPitch possible in order to conserve memory. 589 */ 590 return ISL_ARRAY_PITCH_SPAN_COMPACT; 591 592 case ISL_DIM_LAYOUT_GEN6_STENCIL_HIZ: 593 /* Each array image in the gen6 stencil of HiZ surface is compact in the 594 * sense that every LOD is a compact array of the same size as LOD0. 595 */ 596 return ISL_ARRAY_PITCH_SPAN_COMPACT; 597 } 598 599 unreachable("bad isl_dim_layout"); 600 return ISL_ARRAY_PITCH_SPAN_FULL; 601} 602 603static void 604isl_choose_image_alignment_el(const struct isl_device *dev, 605 const struct isl_surf_init_info *restrict info, 606 enum isl_tiling tiling, 607 enum isl_dim_layout dim_layout, 608 enum isl_msaa_layout msaa_layout, 609 struct isl_extent3d *image_align_el) 610{ 611 const struct isl_format_layout *fmtl = isl_format_get_layout(info->format); 612 if (fmtl->txc == ISL_TXC_MCS) { 613 assert(tiling == ISL_TILING_Y0); 614 615 /* 616 * IvyBrigde PRM Vol 2, Part 1, "11.7 MCS Buffer for Render Target(s)": 617 * 618 * Height, width, and layout of MCS buffer in this case must match with 619 * Render Target height, width, and layout. MCS buffer is tiledY. 620 * 621 * To avoid wasting memory, choose the smallest alignment possible: 622 * HALIGN_4 and VALIGN_4. 623 */ 624 *image_align_el = isl_extent3d(4, 4, 1); 625 return; 626 } else if (info->format == ISL_FORMAT_HIZ) { 627 assert(ISL_DEV_GEN(dev) >= 6); 628 if (ISL_DEV_GEN(dev) == 6) { 629 /* HiZ surfaces on Sandy Bridge are packed tightly. */ 630 *image_align_el = isl_extent3d(1, 1, 1); 631 } else { 632 /* On gen7+, HiZ surfaces are always aligned to 16x8 pixels in the 633 * primary surface which works out to 2x2 HiZ elments. 634 */ 635 *image_align_el = isl_extent3d(2, 2, 1); 636 } 637 return; 638 } 639 640 if (ISL_DEV_GEN(dev) >= 9) { 641 isl_gen9_choose_image_alignment_el(dev, info, tiling, dim_layout, 642 msaa_layout, image_align_el); 643 } else if (ISL_DEV_GEN(dev) >= 8) { 644 isl_gen8_choose_image_alignment_el(dev, info, tiling, dim_layout, 645 msaa_layout, image_align_el); 646 } else if (ISL_DEV_GEN(dev) >= 7) { 647 isl_gen7_choose_image_alignment_el(dev, info, tiling, dim_layout, 648 msaa_layout, image_align_el); 649 } else if (ISL_DEV_GEN(dev) >= 6) { 650 isl_gen6_choose_image_alignment_el(dev, info, tiling, dim_layout, 651 msaa_layout, image_align_el); 652 } else { 653 isl_gen4_choose_image_alignment_el(dev, info, tiling, dim_layout, 654 msaa_layout, image_align_el); 655 } 656} 657 658static enum isl_dim_layout 659isl_surf_choose_dim_layout(const struct isl_device *dev, 660 enum isl_surf_dim logical_dim, 661 enum isl_tiling tiling, 662 isl_surf_usage_flags_t usage) 663{ 664 /* Sandy bridge needs a special layout for HiZ and stencil. */ 665 if (ISL_DEV_GEN(dev) == 6 && 666 (tiling == ISL_TILING_W || tiling == ISL_TILING_HIZ)) 667 return ISL_DIM_LAYOUT_GEN6_STENCIL_HIZ; 668 669 if (ISL_DEV_GEN(dev) >= 9) { 670 switch (logical_dim) { 671 case ISL_SURF_DIM_1D: 672 /* From the Sky Lake PRM Vol. 5, "1D Surfaces": 673 * 674 * One-dimensional surfaces use a tiling mode of linear. 675 * Technically, they are not tiled resources, but the Tiled 676 * Resource Mode field in RENDER_SURFACE_STATE is still used to 677 * indicate the alignment requirements for this linear surface 678 * (See 1D Alignment requirements for how 4K and 64KB Tiled 679 * Resource Modes impact alignment). Alternatively, a 1D surface 680 * can be defined as a 2D tiled surface (e.g. TileY or TileX) with 681 * a height of 0. 682 * 683 * In other words, ISL_DIM_LAYOUT_GEN9_1D is only used for linear 684 * surfaces and, for tiled surfaces, ISL_DIM_LAYOUT_GEN4_2D is used. 685 */ 686 if (tiling == ISL_TILING_LINEAR) 687 return ISL_DIM_LAYOUT_GEN9_1D; 688 else 689 return ISL_DIM_LAYOUT_GEN4_2D; 690 case ISL_SURF_DIM_2D: 691 case ISL_SURF_DIM_3D: 692 return ISL_DIM_LAYOUT_GEN4_2D; 693 } 694 } else { 695 switch (logical_dim) { 696 case ISL_SURF_DIM_1D: 697 case ISL_SURF_DIM_2D: 698 /* From the G45 PRM Vol. 1a, "6.17.4.1 Hardware Cube Map Layout": 699 * 700 * The cube face textures are stored in the same way as 3D surfaces 701 * are stored (see section 6.17.5 for details). For cube surfaces, 702 * however, the depth is equal to the number of faces (always 6) and 703 * is not reduced for each MIP. 704 */ 705 if (ISL_DEV_GEN(dev) == 4 && (usage & ISL_SURF_USAGE_CUBE_BIT)) 706 return ISL_DIM_LAYOUT_GEN4_3D; 707 708 return ISL_DIM_LAYOUT_GEN4_2D; 709 case ISL_SURF_DIM_3D: 710 return ISL_DIM_LAYOUT_GEN4_3D; 711 } 712 } 713 714 unreachable("bad isl_surf_dim"); 715 return ISL_DIM_LAYOUT_GEN4_2D; 716} 717 718/** 719 * Calculate the physical extent of the surface's first level, in units of 720 * surface samples. 721 */ 722static void 723isl_calc_phys_level0_extent_sa(const struct isl_device *dev, 724 const struct isl_surf_init_info *restrict info, 725 enum isl_dim_layout dim_layout, 726 enum isl_tiling tiling, 727 enum isl_msaa_layout msaa_layout, 728 struct isl_extent4d *phys_level0_sa) 729{ 730 const struct isl_format_layout *fmtl = isl_format_get_layout(info->format); 731 732 if (isl_format_is_yuv(info->format)) 733 isl_finishme("%s:%s: YUV format", __FILE__, __func__); 734 735 switch (info->dim) { 736 case ISL_SURF_DIM_1D: 737 assert(info->height == 1); 738 assert(info->depth == 1); 739 assert(info->samples == 1); 740 741 switch (dim_layout) { 742 case ISL_DIM_LAYOUT_GEN4_3D: 743 unreachable("bad isl_dim_layout"); 744 745 case ISL_DIM_LAYOUT_GEN9_1D: 746 case ISL_DIM_LAYOUT_GEN4_2D: 747 case ISL_DIM_LAYOUT_GEN6_STENCIL_HIZ: 748 *phys_level0_sa = (struct isl_extent4d) { 749 .w = info->width, 750 .h = 1, 751 .d = 1, 752 .a = info->array_len, 753 }; 754 break; 755 } 756 break; 757 758 case ISL_SURF_DIM_2D: 759 if (ISL_DEV_GEN(dev) == 4 && (info->usage & ISL_SURF_USAGE_CUBE_BIT)) 760 assert(dim_layout == ISL_DIM_LAYOUT_GEN4_3D); 761 else 762 assert(dim_layout == ISL_DIM_LAYOUT_GEN4_2D || 763 dim_layout == ISL_DIM_LAYOUT_GEN6_STENCIL_HIZ); 764 765 if (tiling == ISL_TILING_Ys && info->samples > 1) 766 isl_finishme("%s:%s: multisample TileYs layout", __FILE__, __func__); 767 768 switch (msaa_layout) { 769 case ISL_MSAA_LAYOUT_NONE: 770 assert(info->depth == 1); 771 assert(info->samples == 1); 772 773 *phys_level0_sa = (struct isl_extent4d) { 774 .w = info->width, 775 .h = info->height, 776 .d = 1, 777 .a = info->array_len, 778 }; 779 break; 780 781 case ISL_MSAA_LAYOUT_ARRAY: 782 assert(info->depth == 1); 783 assert(info->levels == 1); 784 assert(isl_format_supports_multisampling(dev->info, info->format)); 785 assert(fmtl->bw == 1 && fmtl->bh == 1); 786 787 *phys_level0_sa = (struct isl_extent4d) { 788 .w = info->width, 789 .h = info->height, 790 .d = 1, 791 .a = info->array_len * info->samples, 792 }; 793 break; 794 795 case ISL_MSAA_LAYOUT_INTERLEAVED: 796 assert(info->depth == 1); 797 assert(info->levels == 1); 798 assert(isl_format_supports_multisampling(dev->info, info->format)); 799 800 *phys_level0_sa = (struct isl_extent4d) { 801 .w = info->width, 802 .h = info->height, 803 .d = 1, 804 .a = info->array_len, 805 }; 806 807 isl_msaa_interleaved_scale_px_to_sa(info->samples, 808 &phys_level0_sa->w, 809 &phys_level0_sa->h); 810 break; 811 } 812 break; 813 814 case ISL_SURF_DIM_3D: 815 assert(info->array_len == 1); 816 assert(info->samples == 1); 817 818 if (fmtl->bd > 1) { 819 isl_finishme("%s:%s: compression block with depth > 1", 820 __FILE__, __func__); 821 } 822 823 switch (dim_layout) { 824 case ISL_DIM_LAYOUT_GEN9_1D: 825 case ISL_DIM_LAYOUT_GEN6_STENCIL_HIZ: 826 unreachable("bad isl_dim_layout"); 827 828 case ISL_DIM_LAYOUT_GEN4_2D: 829 assert(ISL_DEV_GEN(dev) >= 9); 830 831 *phys_level0_sa = (struct isl_extent4d) { 832 .w = info->width, 833 .h = info->height, 834 .d = 1, 835 .a = info->depth, 836 }; 837 break; 838 839 case ISL_DIM_LAYOUT_GEN4_3D: 840 assert(ISL_DEV_GEN(dev) < 9); 841 *phys_level0_sa = (struct isl_extent4d) { 842 .w = info->width, 843 .h = info->height, 844 .d = info->depth, 845 .a = 1, 846 }; 847 break; 848 } 849 break; 850 } 851} 852 853/** 854 * Calculate the pitch between physical array slices, in units of rows of 855 * surface elements. 856 */ 857static uint32_t 858isl_calc_array_pitch_el_rows_gen4_2d( 859 const struct isl_device *dev, 860 const struct isl_surf_init_info *restrict info, 861 const struct isl_tile_info *tile_info, 862 const struct isl_extent3d *image_align_sa, 863 const struct isl_extent4d *phys_level0_sa, 864 enum isl_array_pitch_span array_pitch_span, 865 const struct isl_extent2d *phys_slice0_sa) 866{ 867 const struct isl_format_layout *fmtl = isl_format_get_layout(info->format); 868 uint32_t pitch_sa_rows = 0; 869 870 switch (array_pitch_span) { 871 case ISL_ARRAY_PITCH_SPAN_COMPACT: 872 pitch_sa_rows = isl_align_npot(phys_slice0_sa->h, image_align_sa->h); 873 break; 874 case ISL_ARRAY_PITCH_SPAN_FULL: { 875 /* The QPitch equation is found in the Broadwell PRM >> Volume 5: 876 * Memory Views >> Common Surface Formats >> Surface Layout >> 2D 877 * Surfaces >> Surface Arrays. 878 */ 879 uint32_t H0_sa = phys_level0_sa->h; 880 uint32_t H1_sa = isl_minify(H0_sa, 1); 881 882 uint32_t h0_sa = isl_align_npot(H0_sa, image_align_sa->h); 883 uint32_t h1_sa = isl_align_npot(H1_sa, image_align_sa->h); 884 885 uint32_t m; 886 if (ISL_DEV_GEN(dev) >= 7) { 887 /* The QPitch equation changed slightly in Ivybridge. */ 888 m = 12; 889 } else { 890 m = 11; 891 } 892 893 pitch_sa_rows = h0_sa + h1_sa + (m * image_align_sa->h); 894 895 if (ISL_DEV_GEN(dev) == 6 && info->samples > 1 && 896 (info->height % 4 == 1)) { 897 /* [SNB] Errata from the Sandy Bridge PRM >> Volume 4 Part 1: 898 * Graphics Core >> Section 7.18.3.7: Surface Arrays: 899 * 900 * [SNB] Errata: Sampler MSAA Qpitch will be 4 greater than 901 * the value calculated in the equation above , for every 902 * other odd Surface Height starting from 1 i.e. 1,5,9,13. 903 * 904 * XXX(chadv): Is the errata natural corollary of the physical 905 * layout of interleaved samples? 906 */ 907 pitch_sa_rows += 4; 908 } 909 910 pitch_sa_rows = isl_align_npot(pitch_sa_rows, fmtl->bh); 911 } /* end case */ 912 break; 913 } 914 915 assert(pitch_sa_rows % fmtl->bh == 0); 916 uint32_t pitch_el_rows = pitch_sa_rows / fmtl->bh; 917 918 if (ISL_DEV_GEN(dev) >= 9 && fmtl->txc == ISL_TXC_CCS) { 919 /* 920 * From the Sky Lake PRM Vol 7, "MCS Buffer for Render Target(s)" (p. 632): 921 * 922 * "Mip-mapped and arrayed surfaces are supported with MCS buffer 923 * layout with these alignments in the RT space: Horizontal 924 * Alignment = 128 and Vertical Alignment = 64." 925 * 926 * From the Sky Lake PRM Vol. 2d, "RENDER_SURFACE_STATE" (p. 435): 927 * 928 * "For non-multisampled render target's CCS auxiliary surface, 929 * QPitch must be computed with Horizontal Alignment = 128 and 930 * Surface Vertical Alignment = 256. These alignments are only for 931 * CCS buffer and not for associated render target." 932 * 933 * The first restriction is already handled by isl_choose_image_alignment_el 934 * but the second restriction, which is an extension of the first, only 935 * applies to qpitch and must be applied here. 936 */ 937 assert(fmtl->bh == 4); 938 pitch_el_rows = isl_align(pitch_el_rows, 256 / 4); 939 } 940 941 if (ISL_DEV_GEN(dev) >= 9 && 942 info->dim == ISL_SURF_DIM_3D && 943 tile_info->tiling != ISL_TILING_LINEAR) { 944 /* From the Skylake BSpec >> RENDER_SURFACE_STATE >> Surface QPitch: 945 * 946 * Tile Mode != Linear: This field must be set to an integer multiple 947 * of the tile height 948 */ 949 pitch_el_rows = isl_align(pitch_el_rows, tile_info->logical_extent_el.height); 950 } 951 952 return pitch_el_rows; 953} 954 955/** 956 * A variant of isl_calc_phys_slice0_extent_sa() specific to 957 * ISL_DIM_LAYOUT_GEN4_2D. 958 */ 959static void 960isl_calc_phys_slice0_extent_sa_gen4_2d( 961 const struct isl_device *dev, 962 const struct isl_surf_init_info *restrict info, 963 enum isl_msaa_layout msaa_layout, 964 const struct isl_extent3d *image_align_sa, 965 const struct isl_extent4d *phys_level0_sa, 966 struct isl_extent2d *phys_slice0_sa) 967{ 968 assert(phys_level0_sa->depth == 1); 969 970 if (info->levels == 1) { 971 /* Do not pad the surface to the image alignment. 972 * 973 * For tiled surfaces, using a reduced alignment here avoids wasting CPU 974 * cycles on the below mipmap layout caluclations. Reducing the 975 * alignment here is safe because we later align the row pitch and array 976 * pitch to the tile boundary. It is safe even for 977 * ISL_MSAA_LAYOUT_INTERLEAVED, because phys_level0_sa is already scaled 978 * to accomodate the interleaved samples. 979 * 980 * For linear surfaces, reducing the alignment here permits us to later 981 * choose an arbitrary, non-aligned row pitch. If the surface backs 982 * a VkBuffer, then an arbitrary pitch may be needed to accomodate 983 * VkBufferImageCopy::bufferRowLength. 984 */ 985 *phys_slice0_sa = (struct isl_extent2d) { 986 .w = phys_level0_sa->w, 987 .h = phys_level0_sa->h, 988 }; 989 return; 990 } 991 992 uint32_t slice_top_w = 0; 993 uint32_t slice_bottom_w = 0; 994 uint32_t slice_left_h = 0; 995 uint32_t slice_right_h = 0; 996 997 uint32_t W0 = phys_level0_sa->w; 998 uint32_t H0 = phys_level0_sa->h; 999 1000 for (uint32_t l = 0; l < info->levels; ++l) { 1001 uint32_t W = isl_minify(W0, l); 1002 uint32_t H = isl_minify(H0, l); 1003 1004 uint32_t w = isl_align_npot(W, image_align_sa->w); 1005 uint32_t h = isl_align_npot(H, image_align_sa->h); 1006 1007 if (l == 0) { 1008 slice_top_w = w; 1009 slice_left_h = h; 1010 slice_right_h = h; 1011 } else if (l == 1) { 1012 slice_bottom_w = w; 1013 slice_left_h += h; 1014 } else if (l == 2) { 1015 slice_bottom_w += w; 1016 slice_right_h += h; 1017 } else { 1018 slice_right_h += h; 1019 } 1020 } 1021 1022 *phys_slice0_sa = (struct isl_extent2d) { 1023 .w = MAX(slice_top_w, slice_bottom_w), 1024 .h = MAX(slice_left_h, slice_right_h), 1025 }; 1026} 1027 1028static void 1029isl_calc_phys_total_extent_el_gen4_2d( 1030 const struct isl_device *dev, 1031 const struct isl_surf_init_info *restrict info, 1032 const struct isl_tile_info *tile_info, 1033 enum isl_msaa_layout msaa_layout, 1034 const struct isl_extent3d *image_align_sa, 1035 const struct isl_extent4d *phys_level0_sa, 1036 enum isl_array_pitch_span array_pitch_span, 1037 uint32_t *array_pitch_el_rows, 1038 struct isl_extent2d *total_extent_el) 1039{ 1040 const struct isl_format_layout *fmtl = isl_format_get_layout(info->format); 1041 1042 struct isl_extent2d phys_slice0_sa; 1043 isl_calc_phys_slice0_extent_sa_gen4_2d(dev, info, msaa_layout, 1044 image_align_sa, phys_level0_sa, 1045 &phys_slice0_sa); 1046 *array_pitch_el_rows = 1047 isl_calc_array_pitch_el_rows_gen4_2d(dev, info, tile_info, 1048 image_align_sa, phys_level0_sa, 1049 array_pitch_span, 1050 &phys_slice0_sa); 1051 *total_extent_el = (struct isl_extent2d) { 1052 .w = isl_align_div_npot(phys_slice0_sa.w, fmtl->bw), 1053 .h = *array_pitch_el_rows * (phys_level0_sa->array_len - 1) + 1054 isl_align_div_npot(phys_slice0_sa.h, fmtl->bh), 1055 }; 1056} 1057 1058/** 1059 * A variant of isl_calc_phys_slice0_extent_sa() specific to 1060 * ISL_DIM_LAYOUT_GEN4_3D. 1061 */ 1062static void 1063isl_calc_phys_total_extent_el_gen4_3d( 1064 const struct isl_device *dev, 1065 const struct isl_surf_init_info *restrict info, 1066 const struct isl_extent3d *image_align_sa, 1067 const struct isl_extent4d *phys_level0_sa, 1068 uint32_t *array_pitch_el_rows, 1069 struct isl_extent2d *phys_total_el) 1070{ 1071 const struct isl_format_layout *fmtl = isl_format_get_layout(info->format); 1072 1073 assert(info->samples == 1); 1074 1075 if (info->dim != ISL_SURF_DIM_3D) { 1076 /* From the G45 PRM Vol. 1a, "6.17.4.1 Hardware Cube Map Layout": 1077 * 1078 * The cube face textures are stored in the same way as 3D surfaces 1079 * are stored (see section 6.17.5 for details). For cube surfaces, 1080 * however, the depth is equal to the number of faces (always 6) and 1081 * is not reduced for each MIP. 1082 */ 1083 assert(ISL_DEV_GEN(dev) == 4); 1084 assert(info->usage & ISL_SURF_USAGE_CUBE_BIT); 1085 assert(phys_level0_sa->array_len == 6); 1086 } else { 1087 assert(phys_level0_sa->array_len == 1); 1088 } 1089 1090 uint32_t total_w = 0; 1091 uint32_t total_h = 0; 1092 1093 uint32_t W0 = phys_level0_sa->w; 1094 uint32_t H0 = phys_level0_sa->h; 1095 uint32_t D0 = phys_level0_sa->d; 1096 uint32_t A0 = phys_level0_sa->a; 1097 1098 for (uint32_t l = 0; l < info->levels; ++l) { 1099 uint32_t level_w = isl_align_npot(isl_minify(W0, l), image_align_sa->w); 1100 uint32_t level_h = isl_align_npot(isl_minify(H0, l), image_align_sa->h); 1101 uint32_t level_d = info->dim == ISL_SURF_DIM_3D ? isl_minify(D0, l) : A0; 1102 1103 uint32_t max_layers_horiz = MIN(level_d, 1u << l); 1104 uint32_t max_layers_vert = isl_align(level_d, 1u << l) / (1u << l); 1105 1106 total_w = MAX(total_w, level_w * max_layers_horiz); 1107 total_h += level_h * max_layers_vert; 1108 } 1109 1110 /* GEN4_3D layouts don't really have an array pitch since each LOD has a 1111 * different number of horizontal and vertical layers. We have to set it 1112 * to something, so at least make it true for LOD0. 1113 */ 1114 *array_pitch_el_rows = 1115 isl_align_npot(phys_level0_sa->h, image_align_sa->h) / fmtl->bw; 1116 *phys_total_el = (struct isl_extent2d) { 1117 .w = isl_assert_div(total_w, fmtl->bw), 1118 .h = isl_assert_div(total_h, fmtl->bh), 1119 }; 1120} 1121 1122/** 1123 * A variant of isl_calc_phys_slice0_extent_sa() specific to 1124 * ISL_DIM_LAYOUT_GEN6_STENCIL_HIZ. 1125 */ 1126static void 1127isl_calc_phys_total_extent_el_gen6_stencil_hiz( 1128 const struct isl_device *dev, 1129 const struct isl_surf_init_info *restrict info, 1130 const struct isl_tile_info *tile_info, 1131 const struct isl_extent3d *image_align_sa, 1132 const struct isl_extent4d *phys_level0_sa, 1133 uint32_t *array_pitch_el_rows, 1134 struct isl_extent2d *phys_total_el) 1135{ 1136 const struct isl_format_layout *fmtl = isl_format_get_layout(info->format); 1137 1138 const struct isl_extent2d tile_extent_sa = { 1139 .w = tile_info->logical_extent_el.w * fmtl->bw, 1140 .h = tile_info->logical_extent_el.h * fmtl->bh, 1141 }; 1142 /* Tile size is a multiple of image alignment */ 1143 assert(tile_extent_sa.w % image_align_sa->w == 0); 1144 assert(tile_extent_sa.h % image_align_sa->h == 0); 1145 1146 const uint32_t W0 = phys_level0_sa->w; 1147 const uint32_t H0 = phys_level0_sa->h; 1148 1149 /* Each image has the same height as LOD0 because the hardware thinks 1150 * everything is LOD0 1151 */ 1152 const uint32_t H = isl_align(H0, image_align_sa->h) * phys_level0_sa->a; 1153 1154 uint32_t total_top_w = 0; 1155 uint32_t total_bottom_w = 0; 1156 uint32_t total_h = 0; 1157 1158 for (uint32_t l = 0; l < info->levels; ++l) { 1159 const uint32_t W = isl_minify(W0, l); 1160 1161 const uint32_t w = isl_align(W, tile_extent_sa.w); 1162 const uint32_t h = isl_align(H, tile_extent_sa.h); 1163 1164 if (l == 0) { 1165 total_top_w = w; 1166 total_h = h; 1167 } else if (l == 1) { 1168 total_bottom_w = w; 1169 total_h += h; 1170 } else { 1171 total_bottom_w += w; 1172 } 1173 } 1174 1175 *array_pitch_el_rows = 1176 isl_assert_div(isl_align(H0, image_align_sa->h), fmtl->bh); 1177 *phys_total_el = (struct isl_extent2d) { 1178 .w = isl_assert_div(MAX(total_top_w, total_bottom_w), fmtl->bw), 1179 .h = isl_assert_div(total_h, fmtl->bh), 1180 }; 1181} 1182 1183/** 1184 * A variant of isl_calc_phys_slice0_extent_sa() specific to 1185 * ISL_DIM_LAYOUT_GEN9_1D. 1186 */ 1187static void 1188isl_calc_phys_total_extent_el_gen9_1d( 1189 const struct isl_device *dev, 1190 const struct isl_surf_init_info *restrict info, 1191 const struct isl_extent3d *image_align_sa, 1192 const struct isl_extent4d *phys_level0_sa, 1193 uint32_t *array_pitch_el_rows, 1194 struct isl_extent2d *phys_total_el) 1195{ 1196 MAYBE_UNUSED const struct isl_format_layout *fmtl = isl_format_get_layout(info->format); 1197 1198 assert(phys_level0_sa->height == 1); 1199 assert(phys_level0_sa->depth == 1); 1200 assert(info->samples == 1); 1201 assert(image_align_sa->w >= fmtl->bw); 1202 1203 uint32_t slice_w = 0; 1204 const uint32_t W0 = phys_level0_sa->w; 1205 1206 for (uint32_t l = 0; l < info->levels; ++l) { 1207 uint32_t W = isl_minify(W0, l); 1208 uint32_t w = isl_align_npot(W, image_align_sa->w); 1209 1210 slice_w += w; 1211 } 1212 1213 *array_pitch_el_rows = 1; 1214 *phys_total_el = (struct isl_extent2d) { 1215 .w = isl_assert_div(slice_w, fmtl->bw), 1216 .h = phys_level0_sa->array_len, 1217 }; 1218} 1219 1220/** 1221 * Calculate the two-dimensional total physical extent of the surface, in 1222 * units of surface elements. 1223 */ 1224static void 1225isl_calc_phys_total_extent_el(const struct isl_device *dev, 1226 const struct isl_surf_init_info *restrict info, 1227 const struct isl_tile_info *tile_info, 1228 enum isl_dim_layout dim_layout, 1229 enum isl_msaa_layout msaa_layout, 1230 const struct isl_extent3d *image_align_sa, 1231 const struct isl_extent4d *phys_level0_sa, 1232 enum isl_array_pitch_span array_pitch_span, 1233 uint32_t *array_pitch_el_rows, 1234 struct isl_extent2d *total_extent_el) 1235{ 1236 switch (dim_layout) { 1237 case ISL_DIM_LAYOUT_GEN9_1D: 1238 assert(array_pitch_span == ISL_ARRAY_PITCH_SPAN_COMPACT); 1239 isl_calc_phys_total_extent_el_gen9_1d(dev, info, 1240 image_align_sa, phys_level0_sa, 1241 array_pitch_el_rows, 1242 total_extent_el); 1243 return; 1244 case ISL_DIM_LAYOUT_GEN4_2D: 1245 isl_calc_phys_total_extent_el_gen4_2d(dev, info, tile_info, msaa_layout, 1246 image_align_sa, phys_level0_sa, 1247 array_pitch_span, 1248 array_pitch_el_rows, 1249 total_extent_el); 1250 return; 1251 case ISL_DIM_LAYOUT_GEN6_STENCIL_HIZ: 1252 assert(array_pitch_span == ISL_ARRAY_PITCH_SPAN_COMPACT); 1253 isl_calc_phys_total_extent_el_gen6_stencil_hiz(dev, info, tile_info, 1254 image_align_sa, 1255 phys_level0_sa, 1256 array_pitch_el_rows, 1257 total_extent_el); 1258 return; 1259 case ISL_DIM_LAYOUT_GEN4_3D: 1260 assert(array_pitch_span == ISL_ARRAY_PITCH_SPAN_COMPACT); 1261 isl_calc_phys_total_extent_el_gen4_3d(dev, info, 1262 image_align_sa, phys_level0_sa, 1263 array_pitch_el_rows, 1264 total_extent_el); 1265 return; 1266 } 1267} 1268 1269static uint32_t 1270isl_calc_row_pitch_alignment(const struct isl_surf_init_info *surf_info, 1271 const struct isl_tile_info *tile_info) 1272{ 1273 if (tile_info->tiling != ISL_TILING_LINEAR) 1274 return tile_info->phys_extent_B.width; 1275 1276 /* From the Broadwel PRM >> Volume 2d: Command Reference: Structures >> 1277 * RENDER_SURFACE_STATE Surface Pitch (p349): 1278 * 1279 * - For linear render target surfaces and surfaces accessed with the 1280 * typed data port messages, the pitch must be a multiple of the 1281 * element size for non-YUV surface formats. Pitch must be 1282 * a multiple of 2 * element size for YUV surface formats. 1283 * 1284 * - [Requirements for SURFTYPE_BUFFER and SURFTYPE_STRBUF, which we 1285 * ignore because isl doesn't do buffers.] 1286 * 1287 * - For other linear surfaces, the pitch can be any multiple of 1288 * bytes. 1289 */ 1290 const struct isl_format_layout *fmtl = isl_format_get_layout(surf_info->format); 1291 const uint32_t bs = fmtl->bpb / 8; 1292 1293 if (surf_info->usage & ISL_SURF_USAGE_RENDER_TARGET_BIT) { 1294 if (isl_format_is_yuv(surf_info->format)) { 1295 return 2 * bs; 1296 } else { 1297 return bs; 1298 } 1299 } 1300 1301 return 1; 1302} 1303 1304static uint32_t 1305isl_calc_linear_min_row_pitch(const struct isl_device *dev, 1306 const struct isl_surf_init_info *info, 1307 const struct isl_extent2d *phys_total_el, 1308 uint32_t alignment_B) 1309{ 1310 const struct isl_format_layout *fmtl = isl_format_get_layout(info->format); 1311 const uint32_t bs = fmtl->bpb / 8; 1312 1313 return isl_align_npot(bs * phys_total_el->w, alignment_B); 1314} 1315 1316static uint32_t 1317isl_calc_tiled_min_row_pitch(const struct isl_device *dev, 1318 const struct isl_surf_init_info *surf_info, 1319 const struct isl_tile_info *tile_info, 1320 const struct isl_extent2d *phys_total_el, 1321 uint32_t alignment_B) 1322{ 1323 const struct isl_format_layout *fmtl = isl_format_get_layout(surf_info->format); 1324 1325 assert(fmtl->bpb % tile_info->format_bpb == 0); 1326 1327 const uint32_t tile_el_scale = fmtl->bpb / tile_info->format_bpb; 1328 const uint32_t total_w_tl = 1329 isl_align_div(phys_total_el->w * tile_el_scale, 1330 tile_info->logical_extent_el.width); 1331 1332 assert(alignment_B == tile_info->phys_extent_B.width); 1333 return total_w_tl * tile_info->phys_extent_B.width; 1334} 1335 1336static uint32_t 1337isl_calc_min_row_pitch(const struct isl_device *dev, 1338 const struct isl_surf_init_info *surf_info, 1339 const struct isl_tile_info *tile_info, 1340 const struct isl_extent2d *phys_total_el, 1341 uint32_t alignment_B) 1342{ 1343 if (tile_info->tiling == ISL_TILING_LINEAR) { 1344 return isl_calc_linear_min_row_pitch(dev, surf_info, phys_total_el, 1345 alignment_B); 1346 } else { 1347 return isl_calc_tiled_min_row_pitch(dev, surf_info, tile_info, 1348 phys_total_el, alignment_B); 1349 } 1350} 1351 1352/** 1353 * Is `pitch` in the valid range for a hardware bitfield, if the bitfield's 1354 * size is `bits` bits? 1355 * 1356 * Hardware pitch fields are offset by 1. For example, if the size of 1357 * RENDER_SURFACE_STATE::SurfacePitch is B bits, then the range of valid 1358 * pitches is [1, 2^b] inclusive. If the surface pitch is N, then 1359 * RENDER_SURFACE_STATE::SurfacePitch must be set to N-1. 1360 */ 1361static bool 1362pitch_in_range(uint32_t n, uint32_t bits) 1363{ 1364 assert(n != 0); 1365 return likely(bits != 0 && 1 <= n && n <= (1 << bits)); 1366} 1367 1368static bool 1369isl_calc_row_pitch(const struct isl_device *dev, 1370 const struct isl_surf_init_info *surf_info, 1371 const struct isl_tile_info *tile_info, 1372 enum isl_dim_layout dim_layout, 1373 const struct isl_extent2d *phys_total_el, 1374 uint32_t *out_row_pitch_B) 1375{ 1376 uint32_t alignment_B = 1377 isl_calc_row_pitch_alignment(surf_info, tile_info); 1378 1379 const uint32_t min_row_pitch_B = 1380 isl_calc_min_row_pitch(dev, surf_info, tile_info, phys_total_el, 1381 alignment_B); 1382 1383 uint32_t row_pitch_B = min_row_pitch_B; 1384 1385 if (surf_info->row_pitch_B != 0) { 1386 row_pitch_B = surf_info->row_pitch_B; 1387 1388 if (row_pitch_B < min_row_pitch_B) 1389 return false; 1390 1391 if (row_pitch_B % alignment_B != 0) 1392 return false; 1393 } 1394 1395 const uint32_t row_pitch_tl = row_pitch_B / tile_info->phys_extent_B.width; 1396 1397 if (row_pitch_B == 0) 1398 return false; 1399 1400 if (dim_layout == ISL_DIM_LAYOUT_GEN9_1D) { 1401 /* SurfacePitch is ignored for this layout. */ 1402 goto done; 1403 } 1404 1405 if ((surf_info->usage & (ISL_SURF_USAGE_RENDER_TARGET_BIT | 1406 ISL_SURF_USAGE_TEXTURE_BIT | 1407 ISL_SURF_USAGE_STORAGE_BIT)) && 1408 !pitch_in_range(row_pitch_B, RENDER_SURFACE_STATE_SurfacePitch_bits(dev->info))) 1409 return false; 1410 1411 if ((surf_info->usage & (ISL_SURF_USAGE_CCS_BIT | 1412 ISL_SURF_USAGE_MCS_BIT)) && 1413 !pitch_in_range(row_pitch_tl, RENDER_SURFACE_STATE_AuxiliarySurfacePitch_bits(dev->info))) 1414 return false; 1415 1416 if ((surf_info->usage & ISL_SURF_USAGE_DEPTH_BIT) && 1417 !pitch_in_range(row_pitch_B, _3DSTATE_DEPTH_BUFFER_SurfacePitch_bits(dev->info))) 1418 return false; 1419 1420 if ((surf_info->usage & ISL_SURF_USAGE_HIZ_BIT) && 1421 !pitch_in_range(row_pitch_B, _3DSTATE_HIER_DEPTH_BUFFER_SurfacePitch_bits(dev->info))) 1422 return false; 1423 1424 const uint32_t stencil_pitch_bits = dev->use_separate_stencil ? 1425 _3DSTATE_STENCIL_BUFFER_SurfacePitch_bits(dev->info) : 1426 _3DSTATE_DEPTH_BUFFER_SurfacePitch_bits(dev->info); 1427 1428 if ((surf_info->usage & ISL_SURF_USAGE_STENCIL_BIT) && 1429 !pitch_in_range(row_pitch_B, stencil_pitch_bits)) 1430 return false; 1431 1432 done: 1433 *out_row_pitch_B = row_pitch_B; 1434 return true; 1435} 1436 1437bool 1438isl_surf_init_s(const struct isl_device *dev, 1439 struct isl_surf *surf, 1440 const struct isl_surf_init_info *restrict info) 1441{ 1442 const struct isl_format_layout *fmtl = isl_format_get_layout(info->format); 1443 1444 const struct isl_extent4d logical_level0_px = { 1445 .w = info->width, 1446 .h = info->height, 1447 .d = info->depth, 1448 .a = info->array_len, 1449 }; 1450 1451 enum isl_tiling tiling; 1452 if (!isl_surf_choose_tiling(dev, info, &tiling)) 1453 return false; 1454 1455 struct isl_tile_info tile_info; 1456 isl_tiling_get_info(tiling, fmtl->bpb, &tile_info); 1457 1458 const enum isl_dim_layout dim_layout = 1459 isl_surf_choose_dim_layout(dev, info->dim, tiling, info->usage); 1460 1461 enum isl_msaa_layout msaa_layout; 1462 if (!isl_choose_msaa_layout(dev, info, tiling, &msaa_layout)) 1463 return false; 1464 1465 struct isl_extent3d image_align_el; 1466 isl_choose_image_alignment_el(dev, info, tiling, dim_layout, msaa_layout, 1467 &image_align_el); 1468 1469 struct isl_extent3d image_align_sa = 1470 isl_extent3d_el_to_sa(info->format, image_align_el); 1471 1472 struct isl_extent4d phys_level0_sa; 1473 isl_calc_phys_level0_extent_sa(dev, info, dim_layout, tiling, msaa_layout, 1474 &phys_level0_sa); 1475 1476 enum isl_array_pitch_span array_pitch_span = 1477 isl_choose_array_pitch_span(dev, info, dim_layout, &phys_level0_sa); 1478 1479 uint32_t array_pitch_el_rows; 1480 struct isl_extent2d phys_total_el; 1481 isl_calc_phys_total_extent_el(dev, info, &tile_info, 1482 dim_layout, msaa_layout, 1483 &image_align_sa, &phys_level0_sa, 1484 array_pitch_span, &array_pitch_el_rows, 1485 &phys_total_el); 1486 1487 uint32_t row_pitch_B; 1488 if (!isl_calc_row_pitch(dev, info, &tile_info, dim_layout, 1489 &phys_total_el, &row_pitch_B)) 1490 return false; 1491 1492 uint32_t base_alignment_B; 1493 uint64_t size_B; 1494 if (tiling == ISL_TILING_LINEAR) { 1495 size_B = (uint64_t) row_pitch_B * phys_total_el.h; 1496 1497 /* From the Broadwell PRM Vol 2d, RENDER_SURFACE_STATE::SurfaceBaseAddress: 1498 * 1499 * "The Base Address for linear render target surfaces and surfaces 1500 * accessed with the typed surface read/write data port messages must 1501 * be element-size aligned, for non-YUV surface formats, or a 1502 * multiple of 2 element-sizes for YUV surface formats. Other linear 1503 * surfaces have no alignment requirements (byte alignment is 1504 * sufficient.)" 1505 */ 1506 base_alignment_B = MAX(1, info->min_alignment_B); 1507 if (info->usage & ISL_SURF_USAGE_RENDER_TARGET_BIT) { 1508 if (isl_format_is_yuv(info->format)) { 1509 base_alignment_B = MAX(base_alignment_B, fmtl->bpb / 4); 1510 } else { 1511 base_alignment_B = MAX(base_alignment_B, fmtl->bpb / 8); 1512 } 1513 } 1514 base_alignment_B = isl_round_up_to_power_of_two(base_alignment_B); 1515 1516 /* From the Skylake PRM Vol 2c, PLANE_STRIDE::Stride: 1517 * 1518 * "For Linear memory, this field specifies the stride in chunks of 1519 * 64 bytes (1 cache line)." 1520 */ 1521 if (isl_surf_usage_is_display(info->usage)) 1522 base_alignment_B = MAX(base_alignment_B, 64); 1523 } else { 1524 const uint32_t total_h_tl = 1525 isl_align_div(phys_total_el.h, tile_info.logical_extent_el.height); 1526 1527 size_B = (uint64_t) total_h_tl * tile_info.phys_extent_B.height * row_pitch_B; 1528 1529 const uint32_t tile_size_B = tile_info.phys_extent_B.width * 1530 tile_info.phys_extent_B.height; 1531 assert(isl_is_pow2(info->min_alignment_B) && isl_is_pow2(tile_size_B)); 1532 base_alignment_B = MAX(info->min_alignment_B, tile_size_B); 1533 } 1534 1535 if (ISL_DEV_GEN(dev) < 9) { 1536 /* From the Broadwell PRM Vol 5, Surface Layout: 1537 * 1538 * "In addition to restrictions on maximum height, width, and depth, 1539 * surfaces are also restricted to a maximum size in bytes. This 1540 * maximum is 2 GB for all products and all surface types." 1541 * 1542 * This comment is applicable to all Pre-gen9 platforms. 1543 */ 1544 if (size_B > (uint64_t) 1 << 31) 1545 return false; 1546 } else if (ISL_DEV_GEN(dev) < 11) { 1547 /* From the Skylake PRM Vol 5, Maximum Surface Size in Bytes: 1548 * "In addition to restrictions on maximum height, width, and depth, 1549 * surfaces are also restricted to a maximum size of 2^38 bytes. 1550 * All pixels within the surface must be contained within 2^38 bytes 1551 * of the base address." 1552 */ 1553 if (size_B > (uint64_t) 1 << 38) 1554 return false; 1555 } else { 1556 /* gen11+ platforms raised this limit to 2^44 bytes. */ 1557 if (size_B > (uint64_t) 1 << 44) 1558 return false; 1559 } 1560 1561 *surf = (struct isl_surf) { 1562 .dim = info->dim, 1563 .dim_layout = dim_layout, 1564 .msaa_layout = msaa_layout, 1565 .tiling = tiling, 1566 .format = info->format, 1567 1568 .levels = info->levels, 1569 .samples = info->samples, 1570 1571 .image_alignment_el = image_align_el, 1572 .logical_level0_px = logical_level0_px, 1573 .phys_level0_sa = phys_level0_sa, 1574 1575 .size_B = size_B, 1576 .alignment_B = base_alignment_B, 1577 .row_pitch_B = row_pitch_B, 1578 .array_pitch_el_rows = array_pitch_el_rows, 1579 .array_pitch_span = array_pitch_span, 1580 1581 .usage = info->usage, 1582 }; 1583 1584 return true; 1585} 1586 1587void 1588isl_surf_get_tile_info(const struct isl_surf *surf, 1589 struct isl_tile_info *tile_info) 1590{ 1591 const struct isl_format_layout *fmtl = isl_format_get_layout(surf->format); 1592 isl_tiling_get_info(surf->tiling, fmtl->bpb, tile_info); 1593} 1594 1595bool 1596isl_surf_get_hiz_surf(const struct isl_device *dev, 1597 const struct isl_surf *surf, 1598 struct isl_surf *hiz_surf) 1599{ 1600 assert(ISL_DEV_GEN(dev) >= 5 && ISL_DEV_USE_SEPARATE_STENCIL(dev)); 1601 1602 /* Multisampled depth is always interleaved */ 1603 assert(surf->msaa_layout == ISL_MSAA_LAYOUT_NONE || 1604 surf->msaa_layout == ISL_MSAA_LAYOUT_INTERLEAVED); 1605 1606 /* From the Broadwell PRM Vol. 7, "Hierarchical Depth Buffer": 1607 * 1608 * "The Surface Type, Height, Width, Depth, Minimum Array Element, Render 1609 * Target View Extent, and Depth Coordinate Offset X/Y of the 1610 * hierarchical depth buffer are inherited from the depth buffer. The 1611 * height and width of the hierarchical depth buffer that must be 1612 * allocated are computed by the following formulas, where HZ is the 1613 * hierarchical depth buffer and Z is the depth buffer. The Z_Height, 1614 * Z_Width, and Z_Depth values given in these formulas are those present 1615 * in 3DSTATE_DEPTH_BUFFER incremented by one. 1616 * 1617 * "The value of Z_Height and Z_Width must each be multiplied by 2 before 1618 * being applied to the table below if Number of Multisamples is set to 1619 * NUMSAMPLES_4. The value of Z_Height must be multiplied by 2 and 1620 * Z_Width must be multiplied by 4 before being applied to the table 1621 * below if Number of Multisamples is set to NUMSAMPLES_8." 1622 * 1623 * In the Sky Lake PRM, the second paragraph is replaced with this: 1624 * 1625 * "The Z_Height and Z_Width values must equal those present in 1626 * 3DSTATE_DEPTH_BUFFER incremented by one." 1627 * 1628 * In other words, on Sandy Bridge through Broadwell, each 128-bit HiZ 1629 * block corresponds to a region of 8x4 samples in the primary depth 1630 * surface. On Sky Lake, on the other hand, each HiZ block corresponds to 1631 * a region of 8x4 pixels in the primary depth surface regardless of the 1632 * number of samples. The dimensions of a HiZ block in both pixels and 1633 * samples are given in the table below: 1634 * 1635 * | SNB - BDW | SKL+ 1636 * ------+-----------+------------- 1637 * 1x | 8 x 4 sa | 8 x 4 sa 1638 * MSAA | 8 x 4 px | 8 x 4 px 1639 * ------+-----------+------------- 1640 * 2x | 8 x 4 sa | 16 x 4 sa 1641 * MSAA | 4 x 4 px | 8 x 4 px 1642 * ------+-----------+------------- 1643 * 4x | 8 x 4 sa | 16 x 8 sa 1644 * MSAA | 4 x 2 px | 8 x 4 px 1645 * ------+-----------+------------- 1646 * 8x | 8 x 4 sa | 32 x 8 sa 1647 * MSAA | 2 x 2 px | 8 x 4 px 1648 * ------+-----------+------------- 1649 * 16x | N/A | 32 x 16 sa 1650 * MSAA | N/A | 8 x 4 px 1651 * ------+-----------+------------- 1652 * 1653 * There are a number of different ways that this discrepency could be 1654 * handled. The way we have chosen is to simply make MSAA HiZ have the 1655 * same number of samples as the parent surface pre-Sky Lake and always be 1656 * single-sampled on Sky Lake and above. Since the block sizes of 1657 * compressed formats are given in samples, this neatly handles everything 1658 * without the need for additional HiZ formats with different block sizes 1659 * on SKL+. 1660 */ 1661 const unsigned samples = ISL_DEV_GEN(dev) >= 9 ? 1 : surf->samples; 1662 1663 return isl_surf_init(dev, hiz_surf, 1664 .dim = surf->dim, 1665 .format = ISL_FORMAT_HIZ, 1666 .width = surf->logical_level0_px.width, 1667 .height = surf->logical_level0_px.height, 1668 .depth = surf->logical_level0_px.depth, 1669 .levels = surf->levels, 1670 .array_len = surf->logical_level0_px.array_len, 1671 .samples = samples, 1672 .usage = ISL_SURF_USAGE_HIZ_BIT, 1673 .tiling_flags = ISL_TILING_HIZ_BIT); 1674} 1675 1676bool 1677isl_surf_get_mcs_surf(const struct isl_device *dev, 1678 const struct isl_surf *surf, 1679 struct isl_surf *mcs_surf) 1680{ 1681 assert(ISL_DEV_GEN(dev) >= 7); 1682 1683 /* It must be multisampled with an array layout */ 1684 assert(surf->samples > 1 && surf->msaa_layout == ISL_MSAA_LAYOUT_ARRAY); 1685 1686 /* The following are true of all multisampled surfaces */ 1687 assert(surf->dim == ISL_SURF_DIM_2D); 1688 assert(surf->levels == 1); 1689 assert(surf->logical_level0_px.depth == 1); 1690 1691 /* The "Auxiliary Surface Pitch" field in RENDER_SURFACE_STATE is only 9 1692 * bits which means the maximum pitch of a compression surface is 512 1693 * tiles or 64KB (since MCS is always Y-tiled). Since a 16x MCS buffer is 1694 * 64bpp, this gives us a maximum width of 8192 pixels. We can create 1695 * larger multisampled surfaces, we just can't compress them. For 2x, 4x, 1696 * and 8x, we have enough room for the full 16k supported by the hardware. 1697 */ 1698 if (surf->samples == 16 && surf->logical_level0_px.width > 8192) 1699 return false; 1700 1701 enum isl_format mcs_format; 1702 switch (surf->samples) { 1703 case 2: mcs_format = ISL_FORMAT_MCS_2X; break; 1704 case 4: mcs_format = ISL_FORMAT_MCS_4X; break; 1705 case 8: mcs_format = ISL_FORMAT_MCS_8X; break; 1706 case 16: mcs_format = ISL_FORMAT_MCS_16X; break; 1707 default: 1708 unreachable("Invalid sample count"); 1709 } 1710 1711 return isl_surf_init(dev, mcs_surf, 1712 .dim = ISL_SURF_DIM_2D, 1713 .format = mcs_format, 1714 .width = surf->logical_level0_px.width, 1715 .height = surf->logical_level0_px.height, 1716 .depth = 1, 1717 .levels = 1, 1718 .array_len = surf->logical_level0_px.array_len, 1719 .samples = 1, /* MCS surfaces are really single-sampled */ 1720 .usage = ISL_SURF_USAGE_MCS_BIT, 1721 .tiling_flags = ISL_TILING_Y0_BIT); 1722} 1723 1724bool 1725isl_surf_get_ccs_surf(const struct isl_device *dev, 1726 const struct isl_surf *surf, 1727 struct isl_surf *ccs_surf, 1728 uint32_t row_pitch_B) 1729{ 1730 assert(surf->samples == 1 && surf->msaa_layout == ISL_MSAA_LAYOUT_NONE); 1731 assert(ISL_DEV_GEN(dev) >= 7); 1732 1733 if (surf->usage & ISL_SURF_USAGE_DISABLE_AUX_BIT) 1734 return false; 1735 1736 /* The PRM doesn't say this explicitly, but fast-clears don't appear to 1737 * work for 3D textures until gen9 where the layout of 3D textures changes 1738 * to match 2D array textures. 1739 */ 1740 if (ISL_DEV_GEN(dev) <= 8 && surf->dim != ISL_SURF_DIM_2D) 1741 return false; 1742 1743 /* From the HSW PRM Volume 7: 3D-Media-GPGPU, page 652 (Color Clear of 1744 * Non-MultiSampler Render Target Restrictions): 1745 * 1746 * "Support is for non-mip-mapped and non-array surface types only." 1747 * 1748 * This restriction is lifted on gen8+. Technically, it may be possible to 1749 * create a CCS for an arrayed or mipmapped image and only enable CCS_D 1750 * when rendering to the base slice. However, there is no documentation 1751 * tell us what the hardware would do in that case or what it does if you 1752 * walk off the bases slice. (Does it ignore CCS or does it start 1753 * scribbling over random memory?) We play it safe and just follow the 1754 * docs and don't allow CCS_D for arrayed or mip-mapped surfaces. 1755 */ 1756 if (ISL_DEV_GEN(dev) <= 7 && 1757 (surf->levels > 1 || surf->logical_level0_px.array_len > 1)) 1758 return false; 1759 1760 if (isl_format_is_compressed(surf->format)) 1761 return false; 1762 1763 /* TODO: More conditions where it can fail. */ 1764 1765 enum isl_format ccs_format; 1766 if (ISL_DEV_GEN(dev) >= 9) { 1767 if (!isl_tiling_is_any_y(surf->tiling)) 1768 return false; 1769 1770 switch (isl_format_get_layout(surf->format)->bpb) { 1771 case 32: ccs_format = ISL_FORMAT_GEN9_CCS_32BPP; break; 1772 case 64: ccs_format = ISL_FORMAT_GEN9_CCS_64BPP; break; 1773 case 128: ccs_format = ISL_FORMAT_GEN9_CCS_128BPP; break; 1774 default: 1775 return false; 1776 } 1777 } else if (surf->tiling == ISL_TILING_Y0) { 1778 switch (isl_format_get_layout(surf->format)->bpb) { 1779 case 32: ccs_format = ISL_FORMAT_GEN7_CCS_32BPP_Y; break; 1780 case 64: ccs_format = ISL_FORMAT_GEN7_CCS_64BPP_Y; break; 1781 case 128: ccs_format = ISL_FORMAT_GEN7_CCS_128BPP_Y; break; 1782 default: 1783 return false; 1784 } 1785 } else if (surf->tiling == ISL_TILING_X) { 1786 switch (isl_format_get_layout(surf->format)->bpb) { 1787 case 32: ccs_format = ISL_FORMAT_GEN7_CCS_32BPP_X; break; 1788 case 64: ccs_format = ISL_FORMAT_GEN7_CCS_64BPP_X; break; 1789 case 128: ccs_format = ISL_FORMAT_GEN7_CCS_128BPP_X; break; 1790 default: 1791 return false; 1792 } 1793 } else { 1794 return false; 1795 } 1796 1797 return isl_surf_init(dev, ccs_surf, 1798 .dim = surf->dim, 1799 .format = ccs_format, 1800 .width = surf->logical_level0_px.width, 1801 .height = surf->logical_level0_px.height, 1802 .depth = surf->logical_level0_px.depth, 1803 .levels = surf->levels, 1804 .array_len = surf->logical_level0_px.array_len, 1805 .samples = 1, 1806 .row_pitch_B = row_pitch_B, 1807 .usage = ISL_SURF_USAGE_CCS_BIT, 1808 .tiling_flags = ISL_TILING_CCS_BIT); 1809} 1810 1811#define isl_genX_call(dev, func, ...) \ 1812 switch (ISL_DEV_GEN(dev)) { \ 1813 case 4: \ 1814 /* G45 surface state is the same as gen5 */ \ 1815 if (ISL_DEV_IS_G4X(dev)) { \ 1816 isl_gen5_##func(__VA_ARGS__); \ 1817 } else { \ 1818 isl_gen4_##func(__VA_ARGS__); \ 1819 } \ 1820 break; \ 1821 case 5: \ 1822 isl_gen5_##func(__VA_ARGS__); \ 1823 break; \ 1824 case 6: \ 1825 isl_gen6_##func(__VA_ARGS__); \ 1826 break; \ 1827 case 7: \ 1828 if (ISL_DEV_IS_HASWELL(dev)) { \ 1829 isl_gen75_##func(__VA_ARGS__); \ 1830 } else { \ 1831 isl_gen7_##func(__VA_ARGS__); \ 1832 } \ 1833 break; \ 1834 case 8: \ 1835 isl_gen8_##func(__VA_ARGS__); \ 1836 break; \ 1837 case 9: \ 1838 isl_gen9_##func(__VA_ARGS__); \ 1839 break; \ 1840 case 10: \ 1841 isl_gen10_##func(__VA_ARGS__); \ 1842 break; \ 1843 case 11: \ 1844 isl_gen11_##func(__VA_ARGS__); \ 1845 break; \ 1846 default: \ 1847 assert(!"Unknown hardware generation"); \ 1848 } 1849 1850void 1851isl_surf_fill_state_s(const struct isl_device *dev, void *state, 1852 const struct isl_surf_fill_state_info *restrict info) 1853{ 1854#ifndef NDEBUG 1855 isl_surf_usage_flags_t _base_usage = 1856 info->view->usage & (ISL_SURF_USAGE_RENDER_TARGET_BIT | 1857 ISL_SURF_USAGE_TEXTURE_BIT | 1858 ISL_SURF_USAGE_STORAGE_BIT); 1859 /* They may only specify one of the above bits at a time */ 1860 assert(__builtin_popcount(_base_usage) == 1); 1861 /* The only other allowed bit is ISL_SURF_USAGE_CUBE_BIT */ 1862 assert((info->view->usage & ~ISL_SURF_USAGE_CUBE_BIT) == _base_usage); 1863#endif 1864 1865 if (info->surf->dim == ISL_SURF_DIM_3D) { 1866 assert(info->view->base_array_layer + info->view->array_len <= 1867 info->surf->logical_level0_px.depth); 1868 } else { 1869 assert(info->view->base_array_layer + info->view->array_len <= 1870 info->surf->logical_level0_px.array_len); 1871 } 1872 1873 isl_genX_call(dev, surf_fill_state_s, dev, state, info); 1874} 1875 1876void 1877isl_buffer_fill_state_s(const struct isl_device *dev, void *state, 1878 const struct isl_buffer_fill_state_info *restrict info) 1879{ 1880 isl_genX_call(dev, buffer_fill_state_s, state, info); 1881} 1882 1883void 1884isl_null_fill_state(const struct isl_device *dev, void *state, 1885 struct isl_extent3d size) 1886{ 1887 isl_genX_call(dev, null_fill_state, state, size); 1888} 1889 1890void 1891isl_emit_depth_stencil_hiz_s(const struct isl_device *dev, void *batch, 1892 const struct isl_depth_stencil_hiz_emit_info *restrict info) 1893{ 1894 if (info->depth_surf && info->stencil_surf) { 1895 if (!dev->info->has_hiz_and_separate_stencil) { 1896 assert(info->depth_surf == info->stencil_surf); 1897 assert(info->depth_address == info->stencil_address); 1898 } 1899 assert(info->depth_surf->dim == info->stencil_surf->dim); 1900 } 1901 1902 if (info->depth_surf) { 1903 assert((info->depth_surf->usage & ISL_SURF_USAGE_DEPTH_BIT)); 1904 if (info->depth_surf->dim == ISL_SURF_DIM_3D) { 1905 assert(info->view->base_array_layer + info->view->array_len <= 1906 info->depth_surf->logical_level0_px.depth); 1907 } else { 1908 assert(info->view->base_array_layer + info->view->array_len <= 1909 info->depth_surf->logical_level0_px.array_len); 1910 } 1911 } 1912 1913 if (info->stencil_surf) { 1914 assert((info->stencil_surf->usage & ISL_SURF_USAGE_STENCIL_BIT)); 1915 if (info->stencil_surf->dim == ISL_SURF_DIM_3D) { 1916 assert(info->view->base_array_layer + info->view->array_len <= 1917 info->stencil_surf->logical_level0_px.depth); 1918 } else { 1919 assert(info->view->base_array_layer + info->view->array_len <= 1920 info->stencil_surf->logical_level0_px.array_len); 1921 } 1922 } 1923 1924 isl_genX_call(dev, emit_depth_stencil_hiz_s, dev, batch, info); 1925} 1926 1927/** 1928 * A variant of isl_surf_get_image_offset_sa() specific to 1929 * ISL_DIM_LAYOUT_GEN4_2D. 1930 */ 1931static void 1932get_image_offset_sa_gen4_2d(const struct isl_surf *surf, 1933 uint32_t level, uint32_t logical_array_layer, 1934 uint32_t *x_offset_sa, 1935 uint32_t *y_offset_sa) 1936{ 1937 assert(level < surf->levels); 1938 if (surf->dim == ISL_SURF_DIM_3D) 1939 assert(logical_array_layer < surf->logical_level0_px.depth); 1940 else 1941 assert(logical_array_layer < surf->logical_level0_px.array_len); 1942 1943 const struct isl_extent3d image_align_sa = 1944 isl_surf_get_image_alignment_sa(surf); 1945 1946 const uint32_t W0 = surf->phys_level0_sa.width; 1947 const uint32_t H0 = surf->phys_level0_sa.height; 1948 1949 const uint32_t phys_layer = logical_array_layer * 1950 (surf->msaa_layout == ISL_MSAA_LAYOUT_ARRAY ? surf->samples : 1); 1951 1952 uint32_t x = 0; 1953 uint32_t y = phys_layer * isl_surf_get_array_pitch_sa_rows(surf); 1954 1955 for (uint32_t l = 0; l < level; ++l) { 1956 if (l == 1) { 1957 uint32_t W = isl_minify(W0, l); 1958 x += isl_align_npot(W, image_align_sa.w); 1959 } else { 1960 uint32_t H = isl_minify(H0, l); 1961 y += isl_align_npot(H, image_align_sa.h); 1962 } 1963 } 1964 1965 *x_offset_sa = x; 1966 *y_offset_sa = y; 1967} 1968 1969/** 1970 * A variant of isl_surf_get_image_offset_sa() specific to 1971 * ISL_DIM_LAYOUT_GEN4_3D. 1972 */ 1973static void 1974get_image_offset_sa_gen4_3d(const struct isl_surf *surf, 1975 uint32_t level, uint32_t logical_z_offset_px, 1976 uint32_t *x_offset_sa, 1977 uint32_t *y_offset_sa) 1978{ 1979 assert(level < surf->levels); 1980 if (surf->dim == ISL_SURF_DIM_3D) { 1981 assert(surf->phys_level0_sa.array_len == 1); 1982 assert(logical_z_offset_px < isl_minify(surf->phys_level0_sa.depth, level)); 1983 } else { 1984 assert(surf->dim == ISL_SURF_DIM_2D); 1985 assert(surf->usage & ISL_SURF_USAGE_CUBE_BIT); 1986 assert(surf->phys_level0_sa.array_len == 6); 1987 assert(logical_z_offset_px < surf->phys_level0_sa.array_len); 1988 } 1989 1990 const struct isl_extent3d image_align_sa = 1991 isl_surf_get_image_alignment_sa(surf); 1992 1993 const uint32_t W0 = surf->phys_level0_sa.width; 1994 const uint32_t H0 = surf->phys_level0_sa.height; 1995 const uint32_t D0 = surf->phys_level0_sa.depth; 1996 const uint32_t AL = surf->phys_level0_sa.array_len; 1997 1998 uint32_t x = 0; 1999 uint32_t y = 0; 2000 2001 for (uint32_t l = 0; l < level; ++l) { 2002 const uint32_t level_h = isl_align_npot(isl_minify(H0, l), image_align_sa.h); 2003 const uint32_t level_d = 2004 isl_align_npot(surf->dim == ISL_SURF_DIM_3D ? isl_minify(D0, l) : AL, 2005 image_align_sa.d); 2006 const uint32_t max_layers_vert = isl_align(level_d, 1u << l) / (1u << l); 2007 2008 y += level_h * max_layers_vert; 2009 } 2010 2011 const uint32_t level_w = isl_align_npot(isl_minify(W0, level), image_align_sa.w); 2012 const uint32_t level_h = isl_align_npot(isl_minify(H0, level), image_align_sa.h); 2013 const uint32_t level_d = 2014 isl_align_npot(surf->dim == ISL_SURF_DIM_3D ? isl_minify(D0, level) : AL, 2015 image_align_sa.d); 2016 2017 const uint32_t max_layers_horiz = MIN(level_d, 1u << level); 2018 2019 x += level_w * (logical_z_offset_px % max_layers_horiz); 2020 y += level_h * (logical_z_offset_px / max_layers_horiz); 2021 2022 *x_offset_sa = x; 2023 *y_offset_sa = y; 2024} 2025 2026static void 2027get_image_offset_sa_gen6_stencil_hiz(const struct isl_surf *surf, 2028 uint32_t level, 2029 uint32_t logical_array_layer, 2030 uint32_t *x_offset_sa, 2031 uint32_t *y_offset_sa) 2032{ 2033 assert(level < surf->levels); 2034 assert(surf->logical_level0_px.depth == 1); 2035 assert(logical_array_layer < surf->logical_level0_px.array_len); 2036 2037 const struct isl_format_layout *fmtl = isl_format_get_layout(surf->format); 2038 2039 const struct isl_extent3d image_align_sa = 2040 isl_surf_get_image_alignment_sa(surf); 2041 2042 struct isl_tile_info tile_info; 2043 isl_tiling_get_info(surf->tiling, fmtl->bpb, &tile_info); 2044 const struct isl_extent2d tile_extent_sa = { 2045 .w = tile_info.logical_extent_el.w * fmtl->bw, 2046 .h = tile_info.logical_extent_el.h * fmtl->bh, 2047 }; 2048 /* Tile size is a multiple of image alignment */ 2049 assert(tile_extent_sa.w % image_align_sa.w == 0); 2050 assert(tile_extent_sa.h % image_align_sa.h == 0); 2051 2052 const uint32_t W0 = surf->phys_level0_sa.w; 2053 const uint32_t H0 = surf->phys_level0_sa.h; 2054 2055 /* Each image has the same height as LOD0 because the hardware thinks 2056 * everything is LOD0 2057 */ 2058 const uint32_t H = isl_align(H0, image_align_sa.h); 2059 2060 /* Quick sanity check for consistency */ 2061 if (surf->phys_level0_sa.array_len > 1) 2062 assert(surf->array_pitch_el_rows == isl_assert_div(H, fmtl->bh)); 2063 2064 uint32_t x = 0, y = 0; 2065 for (uint32_t l = 0; l < level; ++l) { 2066 const uint32_t W = isl_minify(W0, l); 2067 2068 const uint32_t w = isl_align(W, tile_extent_sa.w); 2069 const uint32_t h = isl_align(H * surf->phys_level0_sa.a, 2070 tile_extent_sa.h); 2071 2072 if (l == 0) { 2073 y += h; 2074 } else { 2075 x += w; 2076 } 2077 } 2078 2079 y += H * logical_array_layer; 2080 2081 *x_offset_sa = x; 2082 *y_offset_sa = y; 2083} 2084 2085/** 2086 * A variant of isl_surf_get_image_offset_sa() specific to 2087 * ISL_DIM_LAYOUT_GEN9_1D. 2088 */ 2089static void 2090get_image_offset_sa_gen9_1d(const struct isl_surf *surf, 2091 uint32_t level, uint32_t layer, 2092 uint32_t *x_offset_sa, 2093 uint32_t *y_offset_sa) 2094{ 2095 assert(level < surf->levels); 2096 assert(layer < surf->phys_level0_sa.array_len); 2097 assert(surf->phys_level0_sa.height == 1); 2098 assert(surf->phys_level0_sa.depth == 1); 2099 assert(surf->samples == 1); 2100 2101 const uint32_t W0 = surf->phys_level0_sa.width; 2102 const struct isl_extent3d image_align_sa = 2103 isl_surf_get_image_alignment_sa(surf); 2104 2105 uint32_t x = 0; 2106 2107 for (uint32_t l = 0; l < level; ++l) { 2108 uint32_t W = isl_minify(W0, l); 2109 uint32_t w = isl_align_npot(W, image_align_sa.w); 2110 2111 x += w; 2112 } 2113 2114 *x_offset_sa = x; 2115 *y_offset_sa = layer * isl_surf_get_array_pitch_sa_rows(surf); 2116} 2117 2118/** 2119 * Calculate the offset, in units of surface samples, to a subimage in the 2120 * surface. 2121 * 2122 * @invariant level < surface levels 2123 * @invariant logical_array_layer < logical array length of surface 2124 * @invariant logical_z_offset_px < logical depth of surface at level 2125 */ 2126void 2127isl_surf_get_image_offset_sa(const struct isl_surf *surf, 2128 uint32_t level, 2129 uint32_t logical_array_layer, 2130 uint32_t logical_z_offset_px, 2131 uint32_t *x_offset_sa, 2132 uint32_t *y_offset_sa) 2133{ 2134 assert(level < surf->levels); 2135 assert(logical_array_layer < surf->logical_level0_px.array_len); 2136 assert(logical_z_offset_px 2137 < isl_minify(surf->logical_level0_px.depth, level)); 2138 2139 switch (surf->dim_layout) { 2140 case ISL_DIM_LAYOUT_GEN9_1D: 2141 get_image_offset_sa_gen9_1d(surf, level, logical_array_layer, 2142 x_offset_sa, y_offset_sa); 2143 break; 2144 case ISL_DIM_LAYOUT_GEN4_2D: 2145 get_image_offset_sa_gen4_2d(surf, level, logical_array_layer 2146 + logical_z_offset_px, 2147 x_offset_sa, y_offset_sa); 2148 break; 2149 case ISL_DIM_LAYOUT_GEN4_3D: 2150 get_image_offset_sa_gen4_3d(surf, level, logical_array_layer + 2151 logical_z_offset_px, 2152 x_offset_sa, y_offset_sa); 2153 break; 2154 case ISL_DIM_LAYOUT_GEN6_STENCIL_HIZ: 2155 get_image_offset_sa_gen6_stencil_hiz(surf, level, logical_array_layer + 2156 logical_z_offset_px, 2157 x_offset_sa, y_offset_sa); 2158 break; 2159 2160 default: 2161 unreachable("not reached"); 2162 } 2163} 2164 2165void 2166isl_surf_get_image_offset_el(const struct isl_surf *surf, 2167 uint32_t level, 2168 uint32_t logical_array_layer, 2169 uint32_t logical_z_offset_px, 2170 uint32_t *x_offset_el, 2171 uint32_t *y_offset_el) 2172{ 2173 const struct isl_format_layout *fmtl = isl_format_get_layout(surf->format); 2174 2175 assert(level < surf->levels); 2176 assert(logical_array_layer < surf->logical_level0_px.array_len); 2177 assert(logical_z_offset_px 2178 < isl_minify(surf->logical_level0_px.depth, level)); 2179 2180 uint32_t x_offset_sa, y_offset_sa; 2181 isl_surf_get_image_offset_sa(surf, level, 2182 logical_array_layer, 2183 logical_z_offset_px, 2184 &x_offset_sa, 2185 &y_offset_sa); 2186 2187 *x_offset_el = x_offset_sa / fmtl->bw; 2188 *y_offset_el = y_offset_sa / fmtl->bh; 2189} 2190 2191void 2192isl_surf_get_image_offset_B_tile_sa(const struct isl_surf *surf, 2193 uint32_t level, 2194 uint32_t logical_array_layer, 2195 uint32_t logical_z_offset_px, 2196 uint32_t *offset_B, 2197 uint32_t *x_offset_sa, 2198 uint32_t *y_offset_sa) 2199{ 2200 const struct isl_format_layout *fmtl = isl_format_get_layout(surf->format); 2201 2202 uint32_t total_x_offset_el, total_y_offset_el; 2203 isl_surf_get_image_offset_el(surf, level, logical_array_layer, 2204 logical_z_offset_px, 2205 &total_x_offset_el, 2206 &total_y_offset_el); 2207 2208 uint32_t x_offset_el, y_offset_el; 2209 isl_tiling_get_intratile_offset_el(surf->tiling, fmtl->bpb, 2210 surf->row_pitch_B, 2211 total_x_offset_el, 2212 total_y_offset_el, 2213 offset_B, 2214 &x_offset_el, 2215 &y_offset_el); 2216 2217 if (x_offset_sa) { 2218 *x_offset_sa = x_offset_el * fmtl->bw; 2219 } else { 2220 assert(x_offset_el == 0); 2221 } 2222 2223 if (y_offset_sa) { 2224 *y_offset_sa = y_offset_el * fmtl->bh; 2225 } else { 2226 assert(y_offset_el == 0); 2227 } 2228} 2229 2230void 2231isl_surf_get_image_surf(const struct isl_device *dev, 2232 const struct isl_surf *surf, 2233 uint32_t level, 2234 uint32_t logical_array_layer, 2235 uint32_t logical_z_offset_px, 2236 struct isl_surf *image_surf, 2237 uint32_t *offset_B, 2238 uint32_t *x_offset_sa, 2239 uint32_t *y_offset_sa) 2240{ 2241 isl_surf_get_image_offset_B_tile_sa(surf, 2242 level, 2243 logical_array_layer, 2244 logical_z_offset_px, 2245 offset_B, 2246 x_offset_sa, 2247 y_offset_sa); 2248 2249 /* Even for cube maps there will be only single face, therefore drop the 2250 * corresponding flag if present. 2251 */ 2252 const isl_surf_usage_flags_t usage = 2253 surf->usage & (~ISL_SURF_USAGE_CUBE_BIT); 2254 2255 bool ok UNUSED; 2256 ok = isl_surf_init(dev, image_surf, 2257 .dim = ISL_SURF_DIM_2D, 2258 .format = surf->format, 2259 .width = isl_minify(surf->logical_level0_px.w, level), 2260 .height = isl_minify(surf->logical_level0_px.h, level), 2261 .depth = 1, 2262 .levels = 1, 2263 .array_len = 1, 2264 .samples = surf->samples, 2265 .row_pitch_B = surf->row_pitch_B, 2266 .usage = usage, 2267 .tiling_flags = (1 << surf->tiling)); 2268 assert(ok); 2269} 2270 2271void 2272isl_tiling_get_intratile_offset_el(enum isl_tiling tiling, 2273 uint32_t bpb, 2274 uint32_t row_pitch_B, 2275 uint32_t total_x_offset_el, 2276 uint32_t total_y_offset_el, 2277 uint32_t *base_address_offset, 2278 uint32_t *x_offset_el, 2279 uint32_t *y_offset_el) 2280{ 2281 if (tiling == ISL_TILING_LINEAR) { 2282 assert(bpb % 8 == 0); 2283 *base_address_offset = total_y_offset_el * row_pitch_B + 2284 total_x_offset_el * (bpb / 8); 2285 *x_offset_el = 0; 2286 *y_offset_el = 0; 2287 return; 2288 } 2289 2290 struct isl_tile_info tile_info; 2291 isl_tiling_get_info(tiling, bpb, &tile_info); 2292 2293 assert(row_pitch_B % tile_info.phys_extent_B.width == 0); 2294 2295 /* For non-power-of-two formats, we need the address to be both tile and 2296 * element-aligned. The easiest way to achieve this is to work with a tile 2297 * that is three times as wide as the regular tile. 2298 * 2299 * The tile info returned by get_tile_info has a logical size that is an 2300 * integer number of tile_info.format_bpb size elements. To scale the 2301 * tile, we scale up the physical width and then treat the logical tile 2302 * size as if it has bpb size elements. 2303 */ 2304 const uint32_t tile_el_scale = bpb / tile_info.format_bpb; 2305 tile_info.phys_extent_B.width *= tile_el_scale; 2306 2307 /* Compute the offset into the tile */ 2308 *x_offset_el = total_x_offset_el % tile_info.logical_extent_el.w; 2309 *y_offset_el = total_y_offset_el % tile_info.logical_extent_el.h; 2310 2311 /* Compute the offset of the tile in units of whole tiles */ 2312 uint32_t x_offset_tl = total_x_offset_el / tile_info.logical_extent_el.w; 2313 uint32_t y_offset_tl = total_y_offset_el / tile_info.logical_extent_el.h; 2314 2315 *base_address_offset = 2316 y_offset_tl * tile_info.phys_extent_B.h * row_pitch_B + 2317 x_offset_tl * tile_info.phys_extent_B.h * tile_info.phys_extent_B.w; 2318} 2319 2320uint32_t 2321isl_surf_get_depth_format(const struct isl_device *dev, 2322 const struct isl_surf *surf) 2323{ 2324 /* Support for separate stencil buffers began in gen5. Support for 2325 * interleaved depthstencil buffers ceased in gen7. The intermediate gens, 2326 * those that supported separate and interleaved stencil, were gen5 and 2327 * gen6. 2328 * 2329 * For a list of all available formats, see the Sandybridge PRM >> Volume 2330 * 2 Part 1: 3D/Media - 3D Pipeline >> 3DSTATE_DEPTH_BUFFER >> Surface 2331 * Format (p321). 2332 */ 2333 2334 bool has_stencil = surf->usage & ISL_SURF_USAGE_STENCIL_BIT; 2335 2336 assert(surf->usage & ISL_SURF_USAGE_DEPTH_BIT); 2337 2338 if (has_stencil) 2339 assert(ISL_DEV_GEN(dev) < 7); 2340 2341 switch (surf->format) { 2342 default: 2343 unreachable("bad isl depth format"); 2344 case ISL_FORMAT_R32_FLOAT_X8X24_TYPELESS: 2345 assert(ISL_DEV_GEN(dev) < 7); 2346 return 0; /* D32_FLOAT_S8X24_UINT */ 2347 case ISL_FORMAT_R32_FLOAT: 2348 assert(!has_stencil); 2349 return 1; /* D32_FLOAT */ 2350 case ISL_FORMAT_R24_UNORM_X8_TYPELESS: 2351 if (has_stencil) { 2352 assert(ISL_DEV_GEN(dev) < 7); 2353 return 2; /* D24_UNORM_S8_UINT */ 2354 } else { 2355 assert(ISL_DEV_GEN(dev) >= 5); 2356 return 3; /* D24_UNORM_X8_UINT */ 2357 } 2358 case ISL_FORMAT_R16_UNORM: 2359 assert(!has_stencil); 2360 return 5; /* D16_UNORM */ 2361 } 2362} 2363 2364bool 2365isl_swizzle_supports_rendering(const struct gen_device_info *devinfo, 2366 struct isl_swizzle swizzle) 2367{ 2368 if (devinfo->is_haswell) { 2369 /* From the Haswell PRM, 2370 * RENDER_SURFACE_STATE::Shader Channel Select Red 2371 * 2372 * "The Shader channel selects also define which shader channels are 2373 * written to which surface channel. If the Shader channel select is 2374 * SCS_ZERO or SCS_ONE then it is not written to the surface. If the 2375 * shader channel select is SCS_RED it is written to the surface red 2376 * channel and so on. If more than one shader channel select is set 2377 * to the same surface channel only the first shader channel in RGBA 2378 * order will be written." 2379 */ 2380 return true; 2381 } else if (devinfo->gen <= 7) { 2382 /* Ivy Bridge and early doesn't have any swizzling */ 2383 return isl_swizzle_is_identity(swizzle); 2384 } else { 2385 /* From the Sky Lake PRM Vol. 2d, 2386 * RENDER_SURFACE_STATE::Shader Channel Select Red 2387 * 2388 * "For Render Target, Red, Green and Blue Shader Channel Selects 2389 * MUST be such that only valid components can be swapped i.e. only 2390 * change the order of components in the pixel. Any other values for 2391 * these Shader Channel Select fields are not valid for Render 2392 * Targets. This also means that there MUST not be multiple shader 2393 * channels mapped to the same RT channel." 2394 * 2395 * From the Sky Lake PRM Vol. 2d, 2396 * RENDER_SURFACE_STATE::Shader Channel Select Alpha 2397 * 2398 * "For Render Target, this field MUST be programmed to 2399 * value = SCS_ALPHA." 2400 */ 2401 return (swizzle.r == ISL_CHANNEL_SELECT_RED || 2402 swizzle.r == ISL_CHANNEL_SELECT_GREEN || 2403 swizzle.r == ISL_CHANNEL_SELECT_BLUE) && 2404 (swizzle.g == ISL_CHANNEL_SELECT_RED || 2405 swizzle.g == ISL_CHANNEL_SELECT_GREEN || 2406 swizzle.g == ISL_CHANNEL_SELECT_BLUE) && 2407 (swizzle.b == ISL_CHANNEL_SELECT_RED || 2408 swizzle.b == ISL_CHANNEL_SELECT_GREEN || 2409 swizzle.b == ISL_CHANNEL_SELECT_BLUE) && 2410 swizzle.r != swizzle.g && 2411 swizzle.r != swizzle.b && 2412 swizzle.g != swizzle.b && 2413 swizzle.a == ISL_CHANNEL_SELECT_ALPHA; 2414 } 2415} 2416 2417static enum isl_channel_select 2418swizzle_select(enum isl_channel_select chan, struct isl_swizzle swizzle) 2419{ 2420 switch (chan) { 2421 case ISL_CHANNEL_SELECT_ZERO: 2422 case ISL_CHANNEL_SELECT_ONE: 2423 return chan; 2424 case ISL_CHANNEL_SELECT_RED: 2425 return swizzle.r; 2426 case ISL_CHANNEL_SELECT_GREEN: 2427 return swizzle.g; 2428 case ISL_CHANNEL_SELECT_BLUE: 2429 return swizzle.b; 2430 case ISL_CHANNEL_SELECT_ALPHA: 2431 return swizzle.a; 2432 default: 2433 unreachable("Invalid swizzle component"); 2434 } 2435} 2436 2437/** 2438 * Returns the single swizzle that is equivalent to applying the two given 2439 * swizzles in sequence. 2440 */ 2441struct isl_swizzle 2442isl_swizzle_compose(struct isl_swizzle first, struct isl_swizzle second) 2443{ 2444 return (struct isl_swizzle) { 2445 .r = swizzle_select(first.r, second), 2446 .g = swizzle_select(first.g, second), 2447 .b = swizzle_select(first.b, second), 2448 .a = swizzle_select(first.a, second), 2449 }; 2450} 2451 2452/** 2453 * Returns a swizzle that is the pseudo-inverse of this swizzle. 2454 */ 2455struct isl_swizzle 2456isl_swizzle_invert(struct isl_swizzle swizzle) 2457{ 2458 /* Default to zero for channels which do not show up in the swizzle */ 2459 enum isl_channel_select chans[4] = { 2460 ISL_CHANNEL_SELECT_ZERO, 2461 ISL_CHANNEL_SELECT_ZERO, 2462 ISL_CHANNEL_SELECT_ZERO, 2463 ISL_CHANNEL_SELECT_ZERO, 2464 }; 2465 2466 /* We go in ABGR order so that, if there are any duplicates, the first one 2467 * is taken if you look at it in RGBA order. This is what Haswell hardware 2468 * does for render target swizzles. 2469 */ 2470 if ((unsigned)(swizzle.a - ISL_CHANNEL_SELECT_RED) < 4) 2471 chans[swizzle.a - ISL_CHANNEL_SELECT_RED] = ISL_CHANNEL_SELECT_ALPHA; 2472 if ((unsigned)(swizzle.b - ISL_CHANNEL_SELECT_RED) < 4) 2473 chans[swizzle.b - ISL_CHANNEL_SELECT_RED] = ISL_CHANNEL_SELECT_BLUE; 2474 if ((unsigned)(swizzle.g - ISL_CHANNEL_SELECT_RED) < 4) 2475 chans[swizzle.g - ISL_CHANNEL_SELECT_RED] = ISL_CHANNEL_SELECT_GREEN; 2476 if ((unsigned)(swizzle.r - ISL_CHANNEL_SELECT_RED) < 4) 2477 chans[swizzle.r - ISL_CHANNEL_SELECT_RED] = ISL_CHANNEL_SELECT_RED; 2478 2479 return (struct isl_swizzle) { chans[0], chans[1], chans[2], chans[3] }; 2480} 2481