1b8e80941Smrg/* 2b8e80941Smrg * Copyright 2015 Intel Corporation 3b8e80941Smrg * 4b8e80941Smrg * Permission is hereby granted, free of charge, to any person obtaining a 5b8e80941Smrg * copy of this software and associated documentation files (the "Software"), 6b8e80941Smrg * to deal in the Software without restriction, including without limitation 7b8e80941Smrg * the rights to use, copy, modify, merge, publish, distribute, sublicense, 8b8e80941Smrg * and/or sell copies of the Software, and to permit persons to whom the 9b8e80941Smrg * Software is furnished to do so, subject to the following conditions: 10b8e80941Smrg * 11b8e80941Smrg * The above copyright notice and this permission notice (including the next 12b8e80941Smrg * paragraph) shall be included in all copies or substantial portions of the 13b8e80941Smrg * Software. 14b8e80941Smrg * 15b8e80941Smrg * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR 16b8e80941Smrg * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, 17b8e80941Smrg * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL 18b8e80941Smrg * THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER 19b8e80941Smrg * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING 20b8e80941Smrg * FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS 21b8e80941Smrg * IN THE SOFTWARE. 22b8e80941Smrg */ 23b8e80941Smrg 24b8e80941Smrg#include <assert.h> 25b8e80941Smrg#include <stdarg.h> 26b8e80941Smrg#include <stdio.h> 27b8e80941Smrg 28b8e80941Smrg#include "genxml/genX_bits.h" 29b8e80941Smrg 30b8e80941Smrg#include "isl.h" 31b8e80941Smrg#include "isl_gen4.h" 32b8e80941Smrg#include "isl_gen6.h" 33b8e80941Smrg#include "isl_gen7.h" 34b8e80941Smrg#include "isl_gen8.h" 35b8e80941Smrg#include "isl_gen9.h" 36b8e80941Smrg#include "isl_priv.h" 37b8e80941Smrg 38b8e80941Smrgvoid 39b8e80941Smrgisl_memcpy_linear_to_tiled(uint32_t xt1, uint32_t xt2, 40b8e80941Smrg uint32_t yt1, uint32_t yt2, 41b8e80941Smrg char *dst, const char *src, 42b8e80941Smrg uint32_t dst_pitch, int32_t src_pitch, 43b8e80941Smrg bool has_swizzling, 44b8e80941Smrg enum isl_tiling tiling, 45b8e80941Smrg isl_memcpy_type copy_type) 46b8e80941Smrg{ 47b8e80941Smrg#ifdef USE_SSE41 48b8e80941Smrg if (copy_type == ISL_MEMCPY_STREAMING_LOAD) { 49b8e80941Smrg _isl_memcpy_linear_to_tiled_sse41( 50b8e80941Smrg xt1, xt2, yt1, yt2, dst, src, dst_pitch, src_pitch, has_swizzling, 51b8e80941Smrg tiling, copy_type); 52b8e80941Smrg return; 53b8e80941Smrg } 54b8e80941Smrg#endif 55b8e80941Smrg 56b8e80941Smrg _isl_memcpy_linear_to_tiled( 57b8e80941Smrg xt1, xt2, yt1, yt2, dst, src, dst_pitch, src_pitch, has_swizzling, 58b8e80941Smrg tiling, copy_type); 59b8e80941Smrg} 60b8e80941Smrg 61b8e80941Smrgvoid 62b8e80941Smrgisl_memcpy_tiled_to_linear(uint32_t xt1, uint32_t xt2, 63b8e80941Smrg uint32_t yt1, uint32_t yt2, 64b8e80941Smrg char *dst, const char *src, 65b8e80941Smrg int32_t dst_pitch, uint32_t src_pitch, 66b8e80941Smrg bool has_swizzling, 67b8e80941Smrg enum isl_tiling tiling, 68b8e80941Smrg isl_memcpy_type copy_type) 69b8e80941Smrg{ 70b8e80941Smrg#ifdef USE_SSE41 71b8e80941Smrg if (copy_type == ISL_MEMCPY_STREAMING_LOAD) { 72b8e80941Smrg _isl_memcpy_tiled_to_linear_sse41( 73b8e80941Smrg xt1, xt2, yt1, yt2, dst, src, dst_pitch, src_pitch, has_swizzling, 74b8e80941Smrg tiling, copy_type); 75b8e80941Smrg return; 76b8e80941Smrg } 77b8e80941Smrg#endif 78b8e80941Smrg 79b8e80941Smrg _isl_memcpy_tiled_to_linear( 80b8e80941Smrg xt1, xt2, yt1, yt2, dst, src, dst_pitch, src_pitch, has_swizzling, 81b8e80941Smrg tiling, copy_type); 82b8e80941Smrg} 83b8e80941Smrg 84b8e80941Smrgvoid PRINTFLIKE(3, 4) UNUSED 85b8e80941Smrg__isl_finishme(const char *file, int line, const char *fmt, ...) 86b8e80941Smrg{ 87b8e80941Smrg va_list ap; 88b8e80941Smrg char buf[512]; 89b8e80941Smrg 90b8e80941Smrg va_start(ap, fmt); 91b8e80941Smrg vsnprintf(buf, sizeof(buf), fmt, ap); 92b8e80941Smrg va_end(ap); 93b8e80941Smrg 94b8e80941Smrg fprintf(stderr, "%s:%d: FINISHME: %s\n", file, line, buf); 95b8e80941Smrg} 96b8e80941Smrg 97b8e80941Smrgvoid 98b8e80941Smrgisl_device_init(struct isl_device *dev, 99b8e80941Smrg const struct gen_device_info *info, 100b8e80941Smrg bool has_bit6_swizzling) 101b8e80941Smrg{ 102b8e80941Smrg /* Gen8+ don't have bit6 swizzling, ensure callsite is not confused. */ 103b8e80941Smrg assert(!(has_bit6_swizzling && info->gen >= 8)); 104b8e80941Smrg 105b8e80941Smrg dev->info = info; 106b8e80941Smrg dev->use_separate_stencil = ISL_DEV_GEN(dev) >= 6; 107b8e80941Smrg dev->has_bit6_swizzling = has_bit6_swizzling; 108b8e80941Smrg 109b8e80941Smrg /* The ISL_DEV macros may be defined in the CFLAGS, thus hardcoding some 110b8e80941Smrg * device properties at buildtime. Verify that the macros with the device 111b8e80941Smrg * properties chosen during runtime. 112b8e80941Smrg */ 113b8e80941Smrg ISL_DEV_GEN_SANITIZE(dev); 114b8e80941Smrg ISL_DEV_USE_SEPARATE_STENCIL_SANITIZE(dev); 115b8e80941Smrg 116b8e80941Smrg /* Did we break hiz or stencil? */ 117b8e80941Smrg if (ISL_DEV_USE_SEPARATE_STENCIL(dev)) 118b8e80941Smrg assert(info->has_hiz_and_separate_stencil); 119b8e80941Smrg if (info->must_use_separate_stencil) 120b8e80941Smrg assert(ISL_DEV_USE_SEPARATE_STENCIL(dev)); 121b8e80941Smrg 122b8e80941Smrg dev->ss.size = RENDER_SURFACE_STATE_length(info) * 4; 123b8e80941Smrg dev->ss.align = isl_align(dev->ss.size, 32); 124b8e80941Smrg 125b8e80941Smrg dev->ss.clear_color_state_size = 126b8e80941Smrg isl_align(CLEAR_COLOR_length(info) * 4, 64); 127b8e80941Smrg dev->ss.clear_color_state_offset = 128b8e80941Smrg RENDER_SURFACE_STATE_ClearValueAddress_start(info) / 32 * 4; 129b8e80941Smrg 130b8e80941Smrg dev->ss.clear_value_size = 131b8e80941Smrg isl_align(RENDER_SURFACE_STATE_RedClearColor_bits(info) + 132b8e80941Smrg RENDER_SURFACE_STATE_GreenClearColor_bits(info) + 133b8e80941Smrg RENDER_SURFACE_STATE_BlueClearColor_bits(info) + 134b8e80941Smrg RENDER_SURFACE_STATE_AlphaClearColor_bits(info), 32) / 8; 135b8e80941Smrg 136b8e80941Smrg dev->ss.clear_value_offset = 137b8e80941Smrg RENDER_SURFACE_STATE_RedClearColor_start(info) / 32 * 4; 138b8e80941Smrg 139b8e80941Smrg assert(RENDER_SURFACE_STATE_SurfaceBaseAddress_start(info) % 8 == 0); 140b8e80941Smrg dev->ss.addr_offset = 141b8e80941Smrg RENDER_SURFACE_STATE_SurfaceBaseAddress_start(info) / 8; 142b8e80941Smrg 143b8e80941Smrg /* The "Auxiliary Surface Base Address" field starts a bit higher up 144b8e80941Smrg * because the bottom 12 bits are used for other things. Round down to 145b8e80941Smrg * the nearest dword before. 146b8e80941Smrg */ 147b8e80941Smrg dev->ss.aux_addr_offset = 148b8e80941Smrg (RENDER_SURFACE_STATE_AuxiliarySurfaceBaseAddress_start(info) & ~31) / 8; 149b8e80941Smrg 150b8e80941Smrg dev->ds.size = _3DSTATE_DEPTH_BUFFER_length(info) * 4; 151b8e80941Smrg assert(_3DSTATE_DEPTH_BUFFER_SurfaceBaseAddress_start(info) % 8 == 0); 152b8e80941Smrg dev->ds.depth_offset = 153b8e80941Smrg _3DSTATE_DEPTH_BUFFER_SurfaceBaseAddress_start(info) / 8; 154b8e80941Smrg 155b8e80941Smrg if (dev->use_separate_stencil) { 156b8e80941Smrg dev->ds.size += _3DSTATE_STENCIL_BUFFER_length(info) * 4 + 157b8e80941Smrg _3DSTATE_HIER_DEPTH_BUFFER_length(info) * 4 + 158b8e80941Smrg _3DSTATE_CLEAR_PARAMS_length(info) * 4; 159b8e80941Smrg 160b8e80941Smrg assert(_3DSTATE_STENCIL_BUFFER_SurfaceBaseAddress_start(info) % 8 == 0); 161b8e80941Smrg dev->ds.stencil_offset = 162b8e80941Smrg _3DSTATE_DEPTH_BUFFER_length(info) * 4 + 163b8e80941Smrg _3DSTATE_STENCIL_BUFFER_SurfaceBaseAddress_start(info) / 8; 164b8e80941Smrg 165b8e80941Smrg assert(_3DSTATE_HIER_DEPTH_BUFFER_SurfaceBaseAddress_start(info) % 8 == 0); 166b8e80941Smrg dev->ds.hiz_offset = 167b8e80941Smrg _3DSTATE_DEPTH_BUFFER_length(info) * 4 + 168b8e80941Smrg _3DSTATE_STENCIL_BUFFER_length(info) * 4 + 169b8e80941Smrg _3DSTATE_HIER_DEPTH_BUFFER_SurfaceBaseAddress_start(info) / 8; 170b8e80941Smrg } else { 171b8e80941Smrg dev->ds.stencil_offset = 0; 172b8e80941Smrg dev->ds.hiz_offset = 0; 173b8e80941Smrg } 174b8e80941Smrg} 175b8e80941Smrg 176b8e80941Smrg/** 177b8e80941Smrg * @brief Query the set of multisamples supported by the device. 178b8e80941Smrg * 179b8e80941Smrg * This function always returns non-zero, as ISL_SAMPLE_COUNT_1_BIT is always 180b8e80941Smrg * supported. 181b8e80941Smrg */ 182b8e80941Smrgisl_sample_count_mask_t ATTRIBUTE_CONST 183b8e80941Smrgisl_device_get_sample_counts(struct isl_device *dev) 184b8e80941Smrg{ 185b8e80941Smrg if (ISL_DEV_GEN(dev) >= 9) { 186b8e80941Smrg return ISL_SAMPLE_COUNT_1_BIT | 187b8e80941Smrg ISL_SAMPLE_COUNT_2_BIT | 188b8e80941Smrg ISL_SAMPLE_COUNT_4_BIT | 189b8e80941Smrg ISL_SAMPLE_COUNT_8_BIT | 190b8e80941Smrg ISL_SAMPLE_COUNT_16_BIT; 191b8e80941Smrg } else if (ISL_DEV_GEN(dev) >= 8) { 192b8e80941Smrg return ISL_SAMPLE_COUNT_1_BIT | 193b8e80941Smrg ISL_SAMPLE_COUNT_2_BIT | 194b8e80941Smrg ISL_SAMPLE_COUNT_4_BIT | 195b8e80941Smrg ISL_SAMPLE_COUNT_8_BIT; 196b8e80941Smrg } else if (ISL_DEV_GEN(dev) >= 7) { 197b8e80941Smrg return ISL_SAMPLE_COUNT_1_BIT | 198b8e80941Smrg ISL_SAMPLE_COUNT_4_BIT | 199b8e80941Smrg ISL_SAMPLE_COUNT_8_BIT; 200b8e80941Smrg } else if (ISL_DEV_GEN(dev) >= 6) { 201b8e80941Smrg return ISL_SAMPLE_COUNT_1_BIT | 202b8e80941Smrg ISL_SAMPLE_COUNT_4_BIT; 203b8e80941Smrg } else { 204b8e80941Smrg return ISL_SAMPLE_COUNT_1_BIT; 205b8e80941Smrg } 206b8e80941Smrg} 207b8e80941Smrg 208b8e80941Smrg/** 209b8e80941Smrg * @param[out] info is written only on success 210b8e80941Smrg */ 211b8e80941Smrgstatic void 212b8e80941Smrgisl_tiling_get_info(enum isl_tiling tiling, 213b8e80941Smrg uint32_t format_bpb, 214b8e80941Smrg struct isl_tile_info *tile_info) 215b8e80941Smrg{ 216b8e80941Smrg const uint32_t bs = format_bpb / 8; 217b8e80941Smrg struct isl_extent2d logical_el, phys_B; 218b8e80941Smrg 219b8e80941Smrg if (tiling != ISL_TILING_LINEAR && !isl_is_pow2(format_bpb)) { 220b8e80941Smrg /* It is possible to have non-power-of-two formats in a tiled buffer. 221b8e80941Smrg * The easiest way to handle this is to treat the tile as if it is three 222b8e80941Smrg * times as wide. This way no pixel will ever cross a tile boundary. 223b8e80941Smrg * This really only works on legacy X and Y tiling formats. 224b8e80941Smrg */ 225b8e80941Smrg assert(tiling == ISL_TILING_X || tiling == ISL_TILING_Y0); 226b8e80941Smrg assert(bs % 3 == 0 && isl_is_pow2(format_bpb / 3)); 227b8e80941Smrg isl_tiling_get_info(tiling, format_bpb / 3, tile_info); 228b8e80941Smrg return; 229b8e80941Smrg } 230b8e80941Smrg 231b8e80941Smrg switch (tiling) { 232b8e80941Smrg case ISL_TILING_LINEAR: 233b8e80941Smrg assert(bs > 0); 234b8e80941Smrg logical_el = isl_extent2d(1, 1); 235b8e80941Smrg phys_B = isl_extent2d(bs, 1); 236b8e80941Smrg break; 237b8e80941Smrg 238b8e80941Smrg case ISL_TILING_X: 239b8e80941Smrg assert(bs > 0); 240b8e80941Smrg logical_el = isl_extent2d(512 / bs, 8); 241b8e80941Smrg phys_B = isl_extent2d(512, 8); 242b8e80941Smrg break; 243b8e80941Smrg 244b8e80941Smrg case ISL_TILING_Y0: 245b8e80941Smrg assert(bs > 0); 246b8e80941Smrg logical_el = isl_extent2d(128 / bs, 32); 247b8e80941Smrg phys_B = isl_extent2d(128, 32); 248b8e80941Smrg break; 249b8e80941Smrg 250b8e80941Smrg case ISL_TILING_W: 251b8e80941Smrg assert(bs == 1); 252b8e80941Smrg logical_el = isl_extent2d(64, 64); 253b8e80941Smrg /* From the Broadwell PRM Vol 2d, RENDER_SURFACE_STATE::SurfacePitch: 254b8e80941Smrg * 255b8e80941Smrg * "If the surface is a stencil buffer (and thus has Tile Mode set 256b8e80941Smrg * to TILEMODE_WMAJOR), the pitch must be set to 2x the value 257b8e80941Smrg * computed based on width, as the stencil buffer is stored with two 258b8e80941Smrg * rows interleaved." 259b8e80941Smrg * 260b8e80941Smrg * This, together with the fact that stencil buffers are referred to as 261b8e80941Smrg * being Y-tiled in the PRMs for older hardware implies that the 262b8e80941Smrg * physical size of a W-tile is actually the same as for a Y-tile. 263b8e80941Smrg */ 264b8e80941Smrg phys_B = isl_extent2d(128, 32); 265b8e80941Smrg break; 266b8e80941Smrg 267b8e80941Smrg case ISL_TILING_Yf: 268b8e80941Smrg case ISL_TILING_Ys: { 269b8e80941Smrg bool is_Ys = tiling == ISL_TILING_Ys; 270b8e80941Smrg 271b8e80941Smrg assert(bs > 0); 272b8e80941Smrg unsigned width = 1 << (6 + (ffs(bs) / 2) + (2 * is_Ys)); 273b8e80941Smrg unsigned height = 1 << (6 - (ffs(bs) / 2) + (2 * is_Ys)); 274b8e80941Smrg 275b8e80941Smrg logical_el = isl_extent2d(width / bs, height); 276b8e80941Smrg phys_B = isl_extent2d(width, height); 277b8e80941Smrg break; 278b8e80941Smrg } 279b8e80941Smrg 280b8e80941Smrg case ISL_TILING_HIZ: 281b8e80941Smrg /* HiZ buffers are required to have ISL_FORMAT_HIZ which is an 8x4 282b8e80941Smrg * 128bpb format. The tiling has the same physical dimensions as 283b8e80941Smrg * Y-tiling but actually has two HiZ columns per Y-tiled column. 284b8e80941Smrg */ 285b8e80941Smrg assert(bs == 16); 286b8e80941Smrg logical_el = isl_extent2d(16, 16); 287b8e80941Smrg phys_B = isl_extent2d(128, 32); 288b8e80941Smrg break; 289b8e80941Smrg 290b8e80941Smrg case ISL_TILING_CCS: 291b8e80941Smrg /* CCS surfaces are required to have one of the GENX_CCS_* formats which 292b8e80941Smrg * have a block size of 1 or 2 bits per block and each CCS element 293b8e80941Smrg * corresponds to one cache-line pair in the main surface. From the Sky 294b8e80941Smrg * Lake PRM Vol. 12 in the section on planes: 295b8e80941Smrg * 296b8e80941Smrg * "The Color Control Surface (CCS) contains the compression status 297b8e80941Smrg * of the cache-line pairs. The compression state of the cache-line 298b8e80941Smrg * pair is specified by 2 bits in the CCS. Each CCS cache-line 299b8e80941Smrg * represents an area on the main surface of 16x16 sets of 128 byte 300b8e80941Smrg * Y-tiled cache-line-pairs. CCS is always Y tiled." 301b8e80941Smrg * 302b8e80941Smrg * The CCS being Y-tiled implies that it's an 8x8 grid of cache-lines. 303b8e80941Smrg * Since each cache line corresponds to a 16x16 set of cache-line pairs, 304b8e80941Smrg * that yields total tile area of 128x128 cache-line pairs or CCS 305b8e80941Smrg * elements. On older hardware, each CCS element is 1 bit and the tile 306b8e80941Smrg * is 128x256 elements. 307b8e80941Smrg */ 308b8e80941Smrg assert(format_bpb == 1 || format_bpb == 2); 309b8e80941Smrg logical_el = isl_extent2d(128, 256 / format_bpb); 310b8e80941Smrg phys_B = isl_extent2d(128, 32); 311b8e80941Smrg break; 312b8e80941Smrg 313b8e80941Smrg default: 314b8e80941Smrg unreachable("not reached"); 315b8e80941Smrg } /* end switch */ 316b8e80941Smrg 317b8e80941Smrg *tile_info = (struct isl_tile_info) { 318b8e80941Smrg .tiling = tiling, 319b8e80941Smrg .format_bpb = format_bpb, 320b8e80941Smrg .logical_extent_el = logical_el, 321b8e80941Smrg .phys_extent_B = phys_B, 322b8e80941Smrg }; 323b8e80941Smrg} 324b8e80941Smrg 325b8e80941Smrgbool 326b8e80941Smrgisl_color_value_is_zero(union isl_color_value value, 327b8e80941Smrg enum isl_format format) 328b8e80941Smrg{ 329b8e80941Smrg const struct isl_format_layout *fmtl = isl_format_get_layout(format); 330b8e80941Smrg 331b8e80941Smrg#define RETURN_FALSE_IF_NOT_0(c, i) \ 332b8e80941Smrg if (fmtl->channels.c.bits && value.u32[i] != 0) \ 333b8e80941Smrg return false 334b8e80941Smrg 335b8e80941Smrg RETURN_FALSE_IF_NOT_0(r, 0); 336b8e80941Smrg RETURN_FALSE_IF_NOT_0(g, 1); 337b8e80941Smrg RETURN_FALSE_IF_NOT_0(b, 2); 338b8e80941Smrg RETURN_FALSE_IF_NOT_0(a, 3); 339b8e80941Smrg 340b8e80941Smrg#undef RETURN_FALSE_IF_NOT_0 341b8e80941Smrg 342b8e80941Smrg return true; 343b8e80941Smrg} 344b8e80941Smrg 345b8e80941Smrgbool 346b8e80941Smrgisl_color_value_is_zero_one(union isl_color_value value, 347b8e80941Smrg enum isl_format format) 348b8e80941Smrg{ 349b8e80941Smrg const struct isl_format_layout *fmtl = isl_format_get_layout(format); 350b8e80941Smrg 351b8e80941Smrg#define RETURN_FALSE_IF_NOT_0_1(c, i, field) \ 352b8e80941Smrg if (fmtl->channels.c.bits && value.field[i] != 0 && value.field[i] != 1) \ 353b8e80941Smrg return false 354b8e80941Smrg 355b8e80941Smrg if (isl_format_has_int_channel(format)) { 356b8e80941Smrg RETURN_FALSE_IF_NOT_0_1(r, 0, u32); 357b8e80941Smrg RETURN_FALSE_IF_NOT_0_1(g, 1, u32); 358b8e80941Smrg RETURN_FALSE_IF_NOT_0_1(b, 2, u32); 359b8e80941Smrg RETURN_FALSE_IF_NOT_0_1(a, 3, u32); 360b8e80941Smrg } else { 361b8e80941Smrg RETURN_FALSE_IF_NOT_0_1(r, 0, f32); 362b8e80941Smrg RETURN_FALSE_IF_NOT_0_1(g, 1, f32); 363b8e80941Smrg RETURN_FALSE_IF_NOT_0_1(b, 2, f32); 364b8e80941Smrg RETURN_FALSE_IF_NOT_0_1(a, 3, f32); 365b8e80941Smrg } 366b8e80941Smrg 367b8e80941Smrg#undef RETURN_FALSE_IF_NOT_0_1 368b8e80941Smrg 369b8e80941Smrg return true; 370b8e80941Smrg} 371b8e80941Smrg 372b8e80941Smrg/** 373b8e80941Smrg * @param[out] tiling is set only on success 374b8e80941Smrg */ 375b8e80941Smrgstatic bool 376b8e80941Smrgisl_surf_choose_tiling(const struct isl_device *dev, 377b8e80941Smrg const struct isl_surf_init_info *restrict info, 378b8e80941Smrg enum isl_tiling *tiling) 379b8e80941Smrg{ 380b8e80941Smrg isl_tiling_flags_t tiling_flags = info->tiling_flags; 381b8e80941Smrg 382b8e80941Smrg /* HiZ surfaces always use the HiZ tiling */ 383b8e80941Smrg if (info->usage & ISL_SURF_USAGE_HIZ_BIT) { 384b8e80941Smrg assert(info->format == ISL_FORMAT_HIZ); 385b8e80941Smrg assert(tiling_flags == ISL_TILING_HIZ_BIT); 386b8e80941Smrg *tiling = ISL_TILING_HIZ; 387b8e80941Smrg return true; 388b8e80941Smrg } 389b8e80941Smrg 390b8e80941Smrg /* CCS surfaces always use the CCS tiling */ 391b8e80941Smrg if (info->usage & ISL_SURF_USAGE_CCS_BIT) { 392b8e80941Smrg assert(isl_format_get_layout(info->format)->txc == ISL_TXC_CCS); 393b8e80941Smrg assert(tiling_flags == ISL_TILING_CCS_BIT); 394b8e80941Smrg *tiling = ISL_TILING_CCS; 395b8e80941Smrg return true; 396b8e80941Smrg } 397b8e80941Smrg 398b8e80941Smrg if (ISL_DEV_GEN(dev) >= 6) { 399b8e80941Smrg isl_gen6_filter_tiling(dev, info, &tiling_flags); 400b8e80941Smrg } else { 401b8e80941Smrg isl_gen4_filter_tiling(dev, info, &tiling_flags); 402b8e80941Smrg } 403b8e80941Smrg 404b8e80941Smrg #define CHOOSE(__tiling) \ 405b8e80941Smrg do { \ 406b8e80941Smrg if (tiling_flags & (1u << (__tiling))) { \ 407b8e80941Smrg *tiling = (__tiling); \ 408b8e80941Smrg return true; \ 409b8e80941Smrg } \ 410b8e80941Smrg } while (0) 411b8e80941Smrg 412b8e80941Smrg /* Of the tiling modes remaining, choose the one that offers the best 413b8e80941Smrg * performance. 414b8e80941Smrg */ 415b8e80941Smrg 416b8e80941Smrg if (info->dim == ISL_SURF_DIM_1D) { 417b8e80941Smrg /* Prefer linear for 1D surfaces because they do not benefit from 418b8e80941Smrg * tiling. To the contrary, tiling leads to wasted memory and poor 419b8e80941Smrg * memory locality due to the swizzling and alignment restrictions 420b8e80941Smrg * required in tiled surfaces. 421b8e80941Smrg */ 422b8e80941Smrg CHOOSE(ISL_TILING_LINEAR); 423b8e80941Smrg } 424b8e80941Smrg 425b8e80941Smrg CHOOSE(ISL_TILING_Ys); 426b8e80941Smrg CHOOSE(ISL_TILING_Yf); 427b8e80941Smrg CHOOSE(ISL_TILING_Y0); 428b8e80941Smrg CHOOSE(ISL_TILING_X); 429b8e80941Smrg CHOOSE(ISL_TILING_W); 430b8e80941Smrg CHOOSE(ISL_TILING_LINEAR); 431b8e80941Smrg 432b8e80941Smrg #undef CHOOSE 433b8e80941Smrg 434b8e80941Smrg /* No tiling mode accomodates the inputs. */ 435b8e80941Smrg return false; 436b8e80941Smrg} 437b8e80941Smrg 438b8e80941Smrgstatic bool 439b8e80941Smrgisl_choose_msaa_layout(const struct isl_device *dev, 440b8e80941Smrg const struct isl_surf_init_info *info, 441b8e80941Smrg enum isl_tiling tiling, 442b8e80941Smrg enum isl_msaa_layout *msaa_layout) 443b8e80941Smrg{ 444b8e80941Smrg if (ISL_DEV_GEN(dev) >= 8) { 445b8e80941Smrg return isl_gen8_choose_msaa_layout(dev, info, tiling, msaa_layout); 446b8e80941Smrg } else if (ISL_DEV_GEN(dev) >= 7) { 447b8e80941Smrg return isl_gen7_choose_msaa_layout(dev, info, tiling, msaa_layout); 448b8e80941Smrg } else if (ISL_DEV_GEN(dev) >= 6) { 449b8e80941Smrg return isl_gen6_choose_msaa_layout(dev, info, tiling, msaa_layout); 450b8e80941Smrg } else { 451b8e80941Smrg return isl_gen4_choose_msaa_layout(dev, info, tiling, msaa_layout); 452b8e80941Smrg } 453b8e80941Smrg} 454b8e80941Smrg 455b8e80941Smrgstruct isl_extent2d 456b8e80941Smrgisl_get_interleaved_msaa_px_size_sa(uint32_t samples) 457b8e80941Smrg{ 458b8e80941Smrg assert(isl_is_pow2(samples)); 459b8e80941Smrg 460b8e80941Smrg /* From the Broadwell PRM >> Volume 5: Memory Views >> Computing Mip Level 461b8e80941Smrg * Sizes (p133): 462b8e80941Smrg * 463b8e80941Smrg * If the surface is multisampled and it is a depth or stencil surface 464b8e80941Smrg * or Multisampled Surface StorageFormat in SURFACE_STATE is 465b8e80941Smrg * MSFMT_DEPTH_STENCIL, W_L and H_L must be adjusted as follows before 466b8e80941Smrg * proceeding: [...] 467b8e80941Smrg */ 468b8e80941Smrg return (struct isl_extent2d) { 469b8e80941Smrg .width = 1 << ((ffs(samples) - 0) / 2), 470b8e80941Smrg .height = 1 << ((ffs(samples) - 1) / 2), 471b8e80941Smrg }; 472b8e80941Smrg} 473b8e80941Smrg 474b8e80941Smrgstatic void 475b8e80941Smrgisl_msaa_interleaved_scale_px_to_sa(uint32_t samples, 476b8e80941Smrg uint32_t *width, uint32_t *height) 477b8e80941Smrg{ 478b8e80941Smrg const struct isl_extent2d px_size_sa = 479b8e80941Smrg isl_get_interleaved_msaa_px_size_sa(samples); 480b8e80941Smrg 481b8e80941Smrg if (width) 482b8e80941Smrg *width = isl_align(*width, 2) * px_size_sa.width; 483b8e80941Smrg if (height) 484b8e80941Smrg *height = isl_align(*height, 2) * px_size_sa.height; 485b8e80941Smrg} 486b8e80941Smrg 487b8e80941Smrgstatic enum isl_array_pitch_span 488b8e80941Smrgisl_choose_array_pitch_span(const struct isl_device *dev, 489b8e80941Smrg const struct isl_surf_init_info *restrict info, 490b8e80941Smrg enum isl_dim_layout dim_layout, 491b8e80941Smrg const struct isl_extent4d *phys_level0_sa) 492b8e80941Smrg{ 493b8e80941Smrg switch (dim_layout) { 494b8e80941Smrg case ISL_DIM_LAYOUT_GEN9_1D: 495b8e80941Smrg case ISL_DIM_LAYOUT_GEN4_2D: 496b8e80941Smrg if (ISL_DEV_GEN(dev) >= 8) { 497b8e80941Smrg /* QPitch becomes programmable in Broadwell. So choose the 498b8e80941Smrg * most compact QPitch possible in order to conserve memory. 499b8e80941Smrg * 500b8e80941Smrg * From the Broadwell PRM >> Volume 2d: Command Reference: Structures 501b8e80941Smrg * >> RENDER_SURFACE_STATE Surface QPitch (p325): 502b8e80941Smrg * 503b8e80941Smrg * - Software must ensure that this field is set to a value 504b8e80941Smrg * sufficiently large such that the array slices in the surface 505b8e80941Smrg * do not overlap. Refer to the Memory Data Formats section for 506b8e80941Smrg * information on how surfaces are stored in memory. 507b8e80941Smrg * 508b8e80941Smrg * - This field specifies the distance in rows between array 509b8e80941Smrg * slices. It is used only in the following cases: 510b8e80941Smrg * 511b8e80941Smrg * - Surface Array is enabled OR 512b8e80941Smrg * - Number of Mulitsamples is not NUMSAMPLES_1 and 513b8e80941Smrg * Multisampled Surface Storage Format set to MSFMT_MSS OR 514b8e80941Smrg * - Surface Type is SURFTYPE_CUBE 515b8e80941Smrg */ 516b8e80941Smrg return ISL_ARRAY_PITCH_SPAN_COMPACT; 517b8e80941Smrg } else if (ISL_DEV_GEN(dev) >= 7) { 518b8e80941Smrg /* Note that Ivybridge introduces 519b8e80941Smrg * RENDER_SURFACE_STATE.SurfaceArraySpacing, which provides the 520b8e80941Smrg * driver more control over the QPitch. 521b8e80941Smrg */ 522b8e80941Smrg 523b8e80941Smrg if (phys_level0_sa->array_len == 1) { 524b8e80941Smrg /* The hardware will never use the QPitch. So choose the most 525b8e80941Smrg * compact QPitch possible in order to conserve memory. 526b8e80941Smrg */ 527b8e80941Smrg return ISL_ARRAY_PITCH_SPAN_COMPACT; 528b8e80941Smrg } 529b8e80941Smrg 530b8e80941Smrg if (isl_surf_usage_is_depth_or_stencil(info->usage) || 531b8e80941Smrg (info->usage & ISL_SURF_USAGE_HIZ_BIT)) { 532b8e80941Smrg /* From the Ivybridge PRM >> Volume 1 Part 1: Graphics Core >> 533b8e80941Smrg * Section 6.18.4.7: Surface Arrays (p112): 534b8e80941Smrg * 535b8e80941Smrg * If Surface Array Spacing is set to ARYSPC_FULL (note that 536b8e80941Smrg * the depth buffer and stencil buffer have an implied value of 537b8e80941Smrg * ARYSPC_FULL): 538b8e80941Smrg */ 539b8e80941Smrg return ISL_ARRAY_PITCH_SPAN_FULL; 540b8e80941Smrg } 541b8e80941Smrg 542b8e80941Smrg if (info->levels == 1) { 543b8e80941Smrg /* We are able to set RENDER_SURFACE_STATE.SurfaceArraySpacing 544b8e80941Smrg * to ARYSPC_LOD0. 545b8e80941Smrg */ 546b8e80941Smrg return ISL_ARRAY_PITCH_SPAN_COMPACT; 547b8e80941Smrg } 548b8e80941Smrg 549b8e80941Smrg return ISL_ARRAY_PITCH_SPAN_FULL; 550b8e80941Smrg } else if ((ISL_DEV_GEN(dev) == 5 || ISL_DEV_GEN(dev) == 6) && 551b8e80941Smrg ISL_DEV_USE_SEPARATE_STENCIL(dev) && 552b8e80941Smrg isl_surf_usage_is_stencil(info->usage)) { 553b8e80941Smrg /* [ILK-SNB] Errata from the Sandy Bridge PRM >> Volume 4 Part 1: 554b8e80941Smrg * Graphics Core >> Section 7.18.3.7: Surface Arrays: 555b8e80941Smrg * 556b8e80941Smrg * The separate stencil buffer does not support mip mapping, thus 557b8e80941Smrg * the storage for LODs other than LOD 0 is not needed. 558b8e80941Smrg */ 559b8e80941Smrg assert(info->levels == 1); 560b8e80941Smrg return ISL_ARRAY_PITCH_SPAN_COMPACT; 561b8e80941Smrg } else { 562b8e80941Smrg if ((ISL_DEV_GEN(dev) == 5 || ISL_DEV_GEN(dev) == 6) && 563b8e80941Smrg ISL_DEV_USE_SEPARATE_STENCIL(dev) && 564b8e80941Smrg isl_surf_usage_is_stencil(info->usage)) { 565b8e80941Smrg /* [ILK-SNB] Errata from the Sandy Bridge PRM >> Volume 4 Part 1: 566b8e80941Smrg * Graphics Core >> Section 7.18.3.7: Surface Arrays: 567b8e80941Smrg * 568b8e80941Smrg * The separate stencil buffer does not support mip mapping, 569b8e80941Smrg * thus the storage for LODs other than LOD 0 is not needed. 570b8e80941Smrg */ 571b8e80941Smrg assert(info->levels == 1); 572b8e80941Smrg assert(phys_level0_sa->array_len == 1); 573b8e80941Smrg return ISL_ARRAY_PITCH_SPAN_COMPACT; 574b8e80941Smrg } 575b8e80941Smrg 576b8e80941Smrg if (phys_level0_sa->array_len == 1) { 577b8e80941Smrg /* The hardware will never use the QPitch. So choose the most 578b8e80941Smrg * compact QPitch possible in order to conserve memory. 579b8e80941Smrg */ 580b8e80941Smrg return ISL_ARRAY_PITCH_SPAN_COMPACT; 581b8e80941Smrg } 582b8e80941Smrg 583b8e80941Smrg return ISL_ARRAY_PITCH_SPAN_FULL; 584b8e80941Smrg } 585b8e80941Smrg 586b8e80941Smrg case ISL_DIM_LAYOUT_GEN4_3D: 587b8e80941Smrg /* The hardware will never use the QPitch. So choose the most 588b8e80941Smrg * compact QPitch possible in order to conserve memory. 589b8e80941Smrg */ 590b8e80941Smrg return ISL_ARRAY_PITCH_SPAN_COMPACT; 591b8e80941Smrg 592b8e80941Smrg case ISL_DIM_LAYOUT_GEN6_STENCIL_HIZ: 593b8e80941Smrg /* Each array image in the gen6 stencil of HiZ surface is compact in the 594b8e80941Smrg * sense that every LOD is a compact array of the same size as LOD0. 595b8e80941Smrg */ 596b8e80941Smrg return ISL_ARRAY_PITCH_SPAN_COMPACT; 597b8e80941Smrg } 598b8e80941Smrg 599b8e80941Smrg unreachable("bad isl_dim_layout"); 600b8e80941Smrg return ISL_ARRAY_PITCH_SPAN_FULL; 601b8e80941Smrg} 602b8e80941Smrg 603b8e80941Smrgstatic void 604b8e80941Smrgisl_choose_image_alignment_el(const struct isl_device *dev, 605b8e80941Smrg const struct isl_surf_init_info *restrict info, 606b8e80941Smrg enum isl_tiling tiling, 607b8e80941Smrg enum isl_dim_layout dim_layout, 608b8e80941Smrg enum isl_msaa_layout msaa_layout, 609b8e80941Smrg struct isl_extent3d *image_align_el) 610b8e80941Smrg{ 611b8e80941Smrg const struct isl_format_layout *fmtl = isl_format_get_layout(info->format); 612b8e80941Smrg if (fmtl->txc == ISL_TXC_MCS) { 613b8e80941Smrg assert(tiling == ISL_TILING_Y0); 614b8e80941Smrg 615b8e80941Smrg /* 616b8e80941Smrg * IvyBrigde PRM Vol 2, Part 1, "11.7 MCS Buffer for Render Target(s)": 617b8e80941Smrg * 618b8e80941Smrg * Height, width, and layout of MCS buffer in this case must match with 619b8e80941Smrg * Render Target height, width, and layout. MCS buffer is tiledY. 620b8e80941Smrg * 621b8e80941Smrg * To avoid wasting memory, choose the smallest alignment possible: 622b8e80941Smrg * HALIGN_4 and VALIGN_4. 623b8e80941Smrg */ 624b8e80941Smrg *image_align_el = isl_extent3d(4, 4, 1); 625b8e80941Smrg return; 626b8e80941Smrg } else if (info->format == ISL_FORMAT_HIZ) { 627b8e80941Smrg assert(ISL_DEV_GEN(dev) >= 6); 628b8e80941Smrg if (ISL_DEV_GEN(dev) == 6) { 629b8e80941Smrg /* HiZ surfaces on Sandy Bridge are packed tightly. */ 630b8e80941Smrg *image_align_el = isl_extent3d(1, 1, 1); 631b8e80941Smrg } else { 632b8e80941Smrg /* On gen7+, HiZ surfaces are always aligned to 16x8 pixels in the 633b8e80941Smrg * primary surface which works out to 2x2 HiZ elments. 634b8e80941Smrg */ 635b8e80941Smrg *image_align_el = isl_extent3d(2, 2, 1); 636b8e80941Smrg } 637b8e80941Smrg return; 638b8e80941Smrg } 639b8e80941Smrg 640b8e80941Smrg if (ISL_DEV_GEN(dev) >= 9) { 641b8e80941Smrg isl_gen9_choose_image_alignment_el(dev, info, tiling, dim_layout, 642b8e80941Smrg msaa_layout, image_align_el); 643b8e80941Smrg } else if (ISL_DEV_GEN(dev) >= 8) { 644b8e80941Smrg isl_gen8_choose_image_alignment_el(dev, info, tiling, dim_layout, 645b8e80941Smrg msaa_layout, image_align_el); 646b8e80941Smrg } else if (ISL_DEV_GEN(dev) >= 7) { 647b8e80941Smrg isl_gen7_choose_image_alignment_el(dev, info, tiling, dim_layout, 648b8e80941Smrg msaa_layout, image_align_el); 649b8e80941Smrg } else if (ISL_DEV_GEN(dev) >= 6) { 650b8e80941Smrg isl_gen6_choose_image_alignment_el(dev, info, tiling, dim_layout, 651b8e80941Smrg msaa_layout, image_align_el); 652b8e80941Smrg } else { 653b8e80941Smrg isl_gen4_choose_image_alignment_el(dev, info, tiling, dim_layout, 654b8e80941Smrg msaa_layout, image_align_el); 655b8e80941Smrg } 656b8e80941Smrg} 657b8e80941Smrg 658b8e80941Smrgstatic enum isl_dim_layout 659b8e80941Smrgisl_surf_choose_dim_layout(const struct isl_device *dev, 660b8e80941Smrg enum isl_surf_dim logical_dim, 661b8e80941Smrg enum isl_tiling tiling, 662b8e80941Smrg isl_surf_usage_flags_t usage) 663b8e80941Smrg{ 664b8e80941Smrg /* Sandy bridge needs a special layout for HiZ and stencil. */ 665b8e80941Smrg if (ISL_DEV_GEN(dev) == 6 && 666b8e80941Smrg (tiling == ISL_TILING_W || tiling == ISL_TILING_HIZ)) 667b8e80941Smrg return ISL_DIM_LAYOUT_GEN6_STENCIL_HIZ; 668b8e80941Smrg 669b8e80941Smrg if (ISL_DEV_GEN(dev) >= 9) { 670b8e80941Smrg switch (logical_dim) { 671b8e80941Smrg case ISL_SURF_DIM_1D: 672b8e80941Smrg /* From the Sky Lake PRM Vol. 5, "1D Surfaces": 673b8e80941Smrg * 674b8e80941Smrg * One-dimensional surfaces use a tiling mode of linear. 675b8e80941Smrg * Technically, they are not tiled resources, but the Tiled 676b8e80941Smrg * Resource Mode field in RENDER_SURFACE_STATE is still used to 677b8e80941Smrg * indicate the alignment requirements for this linear surface 678b8e80941Smrg * (See 1D Alignment requirements for how 4K and 64KB Tiled 679b8e80941Smrg * Resource Modes impact alignment). Alternatively, a 1D surface 680b8e80941Smrg * can be defined as a 2D tiled surface (e.g. TileY or TileX) with 681b8e80941Smrg * a height of 0. 682b8e80941Smrg * 683b8e80941Smrg * In other words, ISL_DIM_LAYOUT_GEN9_1D is only used for linear 684b8e80941Smrg * surfaces and, for tiled surfaces, ISL_DIM_LAYOUT_GEN4_2D is used. 685b8e80941Smrg */ 686b8e80941Smrg if (tiling == ISL_TILING_LINEAR) 687b8e80941Smrg return ISL_DIM_LAYOUT_GEN9_1D; 688b8e80941Smrg else 689b8e80941Smrg return ISL_DIM_LAYOUT_GEN4_2D; 690b8e80941Smrg case ISL_SURF_DIM_2D: 691b8e80941Smrg case ISL_SURF_DIM_3D: 692b8e80941Smrg return ISL_DIM_LAYOUT_GEN4_2D; 693b8e80941Smrg } 694b8e80941Smrg } else { 695b8e80941Smrg switch (logical_dim) { 696b8e80941Smrg case ISL_SURF_DIM_1D: 697b8e80941Smrg case ISL_SURF_DIM_2D: 698b8e80941Smrg /* From the G45 PRM Vol. 1a, "6.17.4.1 Hardware Cube Map Layout": 699b8e80941Smrg * 700b8e80941Smrg * The cube face textures are stored in the same way as 3D surfaces 701b8e80941Smrg * are stored (see section 6.17.5 for details). For cube surfaces, 702b8e80941Smrg * however, the depth is equal to the number of faces (always 6) and 703b8e80941Smrg * is not reduced for each MIP. 704b8e80941Smrg */ 705b8e80941Smrg if (ISL_DEV_GEN(dev) == 4 && (usage & ISL_SURF_USAGE_CUBE_BIT)) 706b8e80941Smrg return ISL_DIM_LAYOUT_GEN4_3D; 707b8e80941Smrg 708b8e80941Smrg return ISL_DIM_LAYOUT_GEN4_2D; 709b8e80941Smrg case ISL_SURF_DIM_3D: 710b8e80941Smrg return ISL_DIM_LAYOUT_GEN4_3D; 711b8e80941Smrg } 712b8e80941Smrg } 713b8e80941Smrg 714b8e80941Smrg unreachable("bad isl_surf_dim"); 715b8e80941Smrg return ISL_DIM_LAYOUT_GEN4_2D; 716b8e80941Smrg} 717b8e80941Smrg 718b8e80941Smrg/** 719b8e80941Smrg * Calculate the physical extent of the surface's first level, in units of 720b8e80941Smrg * surface samples. 721b8e80941Smrg */ 722b8e80941Smrgstatic void 723b8e80941Smrgisl_calc_phys_level0_extent_sa(const struct isl_device *dev, 724b8e80941Smrg const struct isl_surf_init_info *restrict info, 725b8e80941Smrg enum isl_dim_layout dim_layout, 726b8e80941Smrg enum isl_tiling tiling, 727b8e80941Smrg enum isl_msaa_layout msaa_layout, 728b8e80941Smrg struct isl_extent4d *phys_level0_sa) 729b8e80941Smrg{ 730b8e80941Smrg const struct isl_format_layout *fmtl = isl_format_get_layout(info->format); 731b8e80941Smrg 732b8e80941Smrg if (isl_format_is_yuv(info->format)) 733b8e80941Smrg isl_finishme("%s:%s: YUV format", __FILE__, __func__); 734b8e80941Smrg 735b8e80941Smrg switch (info->dim) { 736b8e80941Smrg case ISL_SURF_DIM_1D: 737b8e80941Smrg assert(info->height == 1); 738b8e80941Smrg assert(info->depth == 1); 739b8e80941Smrg assert(info->samples == 1); 740b8e80941Smrg 741b8e80941Smrg switch (dim_layout) { 742b8e80941Smrg case ISL_DIM_LAYOUT_GEN4_3D: 743b8e80941Smrg unreachable("bad isl_dim_layout"); 744b8e80941Smrg 745b8e80941Smrg case ISL_DIM_LAYOUT_GEN9_1D: 746b8e80941Smrg case ISL_DIM_LAYOUT_GEN4_2D: 747b8e80941Smrg case ISL_DIM_LAYOUT_GEN6_STENCIL_HIZ: 748b8e80941Smrg *phys_level0_sa = (struct isl_extent4d) { 749b8e80941Smrg .w = info->width, 750b8e80941Smrg .h = 1, 751b8e80941Smrg .d = 1, 752b8e80941Smrg .a = info->array_len, 753b8e80941Smrg }; 754b8e80941Smrg break; 755b8e80941Smrg } 756b8e80941Smrg break; 757b8e80941Smrg 758b8e80941Smrg case ISL_SURF_DIM_2D: 759b8e80941Smrg if (ISL_DEV_GEN(dev) == 4 && (info->usage & ISL_SURF_USAGE_CUBE_BIT)) 760b8e80941Smrg assert(dim_layout == ISL_DIM_LAYOUT_GEN4_3D); 761b8e80941Smrg else 762b8e80941Smrg assert(dim_layout == ISL_DIM_LAYOUT_GEN4_2D || 763b8e80941Smrg dim_layout == ISL_DIM_LAYOUT_GEN6_STENCIL_HIZ); 764b8e80941Smrg 765b8e80941Smrg if (tiling == ISL_TILING_Ys && info->samples > 1) 766b8e80941Smrg isl_finishme("%s:%s: multisample TileYs layout", __FILE__, __func__); 767b8e80941Smrg 768b8e80941Smrg switch (msaa_layout) { 769b8e80941Smrg case ISL_MSAA_LAYOUT_NONE: 770b8e80941Smrg assert(info->depth == 1); 771b8e80941Smrg assert(info->samples == 1); 772b8e80941Smrg 773b8e80941Smrg *phys_level0_sa = (struct isl_extent4d) { 774b8e80941Smrg .w = info->width, 775b8e80941Smrg .h = info->height, 776b8e80941Smrg .d = 1, 777b8e80941Smrg .a = info->array_len, 778b8e80941Smrg }; 779b8e80941Smrg break; 780b8e80941Smrg 781b8e80941Smrg case ISL_MSAA_LAYOUT_ARRAY: 782b8e80941Smrg assert(info->depth == 1); 783b8e80941Smrg assert(info->levels == 1); 784b8e80941Smrg assert(isl_format_supports_multisampling(dev->info, info->format)); 785b8e80941Smrg assert(fmtl->bw == 1 && fmtl->bh == 1); 786b8e80941Smrg 787b8e80941Smrg *phys_level0_sa = (struct isl_extent4d) { 788b8e80941Smrg .w = info->width, 789b8e80941Smrg .h = info->height, 790b8e80941Smrg .d = 1, 791b8e80941Smrg .a = info->array_len * info->samples, 792b8e80941Smrg }; 793b8e80941Smrg break; 794b8e80941Smrg 795b8e80941Smrg case ISL_MSAA_LAYOUT_INTERLEAVED: 796b8e80941Smrg assert(info->depth == 1); 797b8e80941Smrg assert(info->levels == 1); 798b8e80941Smrg assert(isl_format_supports_multisampling(dev->info, info->format)); 799b8e80941Smrg 800b8e80941Smrg *phys_level0_sa = (struct isl_extent4d) { 801b8e80941Smrg .w = info->width, 802b8e80941Smrg .h = info->height, 803b8e80941Smrg .d = 1, 804b8e80941Smrg .a = info->array_len, 805b8e80941Smrg }; 806b8e80941Smrg 807b8e80941Smrg isl_msaa_interleaved_scale_px_to_sa(info->samples, 808b8e80941Smrg &phys_level0_sa->w, 809b8e80941Smrg &phys_level0_sa->h); 810b8e80941Smrg break; 811b8e80941Smrg } 812b8e80941Smrg break; 813b8e80941Smrg 814b8e80941Smrg case ISL_SURF_DIM_3D: 815b8e80941Smrg assert(info->array_len == 1); 816b8e80941Smrg assert(info->samples == 1); 817b8e80941Smrg 818b8e80941Smrg if (fmtl->bd > 1) { 819b8e80941Smrg isl_finishme("%s:%s: compression block with depth > 1", 820b8e80941Smrg __FILE__, __func__); 821b8e80941Smrg } 822b8e80941Smrg 823b8e80941Smrg switch (dim_layout) { 824b8e80941Smrg case ISL_DIM_LAYOUT_GEN9_1D: 825b8e80941Smrg case ISL_DIM_LAYOUT_GEN6_STENCIL_HIZ: 826b8e80941Smrg unreachable("bad isl_dim_layout"); 827b8e80941Smrg 828b8e80941Smrg case ISL_DIM_LAYOUT_GEN4_2D: 829b8e80941Smrg assert(ISL_DEV_GEN(dev) >= 9); 830b8e80941Smrg 831b8e80941Smrg *phys_level0_sa = (struct isl_extent4d) { 832b8e80941Smrg .w = info->width, 833b8e80941Smrg .h = info->height, 834b8e80941Smrg .d = 1, 835b8e80941Smrg .a = info->depth, 836b8e80941Smrg }; 837b8e80941Smrg break; 838b8e80941Smrg 839b8e80941Smrg case ISL_DIM_LAYOUT_GEN4_3D: 840b8e80941Smrg assert(ISL_DEV_GEN(dev) < 9); 841b8e80941Smrg *phys_level0_sa = (struct isl_extent4d) { 842b8e80941Smrg .w = info->width, 843b8e80941Smrg .h = info->height, 844b8e80941Smrg .d = info->depth, 845b8e80941Smrg .a = 1, 846b8e80941Smrg }; 847b8e80941Smrg break; 848b8e80941Smrg } 849b8e80941Smrg break; 850b8e80941Smrg } 851b8e80941Smrg} 852b8e80941Smrg 853b8e80941Smrg/** 854b8e80941Smrg * Calculate the pitch between physical array slices, in units of rows of 855b8e80941Smrg * surface elements. 856b8e80941Smrg */ 857b8e80941Smrgstatic uint32_t 858b8e80941Smrgisl_calc_array_pitch_el_rows_gen4_2d( 859b8e80941Smrg const struct isl_device *dev, 860b8e80941Smrg const struct isl_surf_init_info *restrict info, 861b8e80941Smrg const struct isl_tile_info *tile_info, 862b8e80941Smrg const struct isl_extent3d *image_align_sa, 863b8e80941Smrg const struct isl_extent4d *phys_level0_sa, 864b8e80941Smrg enum isl_array_pitch_span array_pitch_span, 865b8e80941Smrg const struct isl_extent2d *phys_slice0_sa) 866b8e80941Smrg{ 867b8e80941Smrg const struct isl_format_layout *fmtl = isl_format_get_layout(info->format); 868b8e80941Smrg uint32_t pitch_sa_rows = 0; 869b8e80941Smrg 870b8e80941Smrg switch (array_pitch_span) { 871b8e80941Smrg case ISL_ARRAY_PITCH_SPAN_COMPACT: 872b8e80941Smrg pitch_sa_rows = isl_align_npot(phys_slice0_sa->h, image_align_sa->h); 873b8e80941Smrg break; 874b8e80941Smrg case ISL_ARRAY_PITCH_SPAN_FULL: { 875b8e80941Smrg /* The QPitch equation is found in the Broadwell PRM >> Volume 5: 876b8e80941Smrg * Memory Views >> Common Surface Formats >> Surface Layout >> 2D 877b8e80941Smrg * Surfaces >> Surface Arrays. 878b8e80941Smrg */ 879b8e80941Smrg uint32_t H0_sa = phys_level0_sa->h; 880b8e80941Smrg uint32_t H1_sa = isl_minify(H0_sa, 1); 881b8e80941Smrg 882b8e80941Smrg uint32_t h0_sa = isl_align_npot(H0_sa, image_align_sa->h); 883b8e80941Smrg uint32_t h1_sa = isl_align_npot(H1_sa, image_align_sa->h); 884b8e80941Smrg 885b8e80941Smrg uint32_t m; 886b8e80941Smrg if (ISL_DEV_GEN(dev) >= 7) { 887b8e80941Smrg /* The QPitch equation changed slightly in Ivybridge. */ 888b8e80941Smrg m = 12; 889b8e80941Smrg } else { 890b8e80941Smrg m = 11; 891b8e80941Smrg } 892b8e80941Smrg 893b8e80941Smrg pitch_sa_rows = h0_sa + h1_sa + (m * image_align_sa->h); 894b8e80941Smrg 895b8e80941Smrg if (ISL_DEV_GEN(dev) == 6 && info->samples > 1 && 896b8e80941Smrg (info->height % 4 == 1)) { 897b8e80941Smrg /* [SNB] Errata from the Sandy Bridge PRM >> Volume 4 Part 1: 898b8e80941Smrg * Graphics Core >> Section 7.18.3.7: Surface Arrays: 899b8e80941Smrg * 900b8e80941Smrg * [SNB] Errata: Sampler MSAA Qpitch will be 4 greater than 901b8e80941Smrg * the value calculated in the equation above , for every 902b8e80941Smrg * other odd Surface Height starting from 1 i.e. 1,5,9,13. 903b8e80941Smrg * 904b8e80941Smrg * XXX(chadv): Is the errata natural corollary of the physical 905b8e80941Smrg * layout of interleaved samples? 906b8e80941Smrg */ 907b8e80941Smrg pitch_sa_rows += 4; 908b8e80941Smrg } 909b8e80941Smrg 910b8e80941Smrg pitch_sa_rows = isl_align_npot(pitch_sa_rows, fmtl->bh); 911b8e80941Smrg } /* end case */ 912b8e80941Smrg break; 913b8e80941Smrg } 914b8e80941Smrg 915b8e80941Smrg assert(pitch_sa_rows % fmtl->bh == 0); 916b8e80941Smrg uint32_t pitch_el_rows = pitch_sa_rows / fmtl->bh; 917b8e80941Smrg 918b8e80941Smrg if (ISL_DEV_GEN(dev) >= 9 && fmtl->txc == ISL_TXC_CCS) { 919b8e80941Smrg /* 920b8e80941Smrg * From the Sky Lake PRM Vol 7, "MCS Buffer for Render Target(s)" (p. 632): 921b8e80941Smrg * 922b8e80941Smrg * "Mip-mapped and arrayed surfaces are supported with MCS buffer 923b8e80941Smrg * layout with these alignments in the RT space: Horizontal 924b8e80941Smrg * Alignment = 128 and Vertical Alignment = 64." 925b8e80941Smrg * 926b8e80941Smrg * From the Sky Lake PRM Vol. 2d, "RENDER_SURFACE_STATE" (p. 435): 927b8e80941Smrg * 928b8e80941Smrg * "For non-multisampled render target's CCS auxiliary surface, 929b8e80941Smrg * QPitch must be computed with Horizontal Alignment = 128 and 930b8e80941Smrg * Surface Vertical Alignment = 256. These alignments are only for 931b8e80941Smrg * CCS buffer and not for associated render target." 932b8e80941Smrg * 933b8e80941Smrg * The first restriction is already handled by isl_choose_image_alignment_el 934b8e80941Smrg * but the second restriction, which is an extension of the first, only 935b8e80941Smrg * applies to qpitch and must be applied here. 936b8e80941Smrg */ 937b8e80941Smrg assert(fmtl->bh == 4); 938b8e80941Smrg pitch_el_rows = isl_align(pitch_el_rows, 256 / 4); 939b8e80941Smrg } 940b8e80941Smrg 941b8e80941Smrg if (ISL_DEV_GEN(dev) >= 9 && 942b8e80941Smrg info->dim == ISL_SURF_DIM_3D && 943b8e80941Smrg tile_info->tiling != ISL_TILING_LINEAR) { 944b8e80941Smrg /* From the Skylake BSpec >> RENDER_SURFACE_STATE >> Surface QPitch: 945b8e80941Smrg * 946b8e80941Smrg * Tile Mode != Linear: This field must be set to an integer multiple 947b8e80941Smrg * of the tile height 948b8e80941Smrg */ 949b8e80941Smrg pitch_el_rows = isl_align(pitch_el_rows, tile_info->logical_extent_el.height); 950b8e80941Smrg } 951b8e80941Smrg 952b8e80941Smrg return pitch_el_rows; 953b8e80941Smrg} 954b8e80941Smrg 955b8e80941Smrg/** 956b8e80941Smrg * A variant of isl_calc_phys_slice0_extent_sa() specific to 957b8e80941Smrg * ISL_DIM_LAYOUT_GEN4_2D. 958b8e80941Smrg */ 959b8e80941Smrgstatic void 960b8e80941Smrgisl_calc_phys_slice0_extent_sa_gen4_2d( 961b8e80941Smrg const struct isl_device *dev, 962b8e80941Smrg const struct isl_surf_init_info *restrict info, 963b8e80941Smrg enum isl_msaa_layout msaa_layout, 964b8e80941Smrg const struct isl_extent3d *image_align_sa, 965b8e80941Smrg const struct isl_extent4d *phys_level0_sa, 966b8e80941Smrg struct isl_extent2d *phys_slice0_sa) 967b8e80941Smrg{ 968b8e80941Smrg assert(phys_level0_sa->depth == 1); 969b8e80941Smrg 970b8e80941Smrg if (info->levels == 1) { 971b8e80941Smrg /* Do not pad the surface to the image alignment. 972b8e80941Smrg * 973b8e80941Smrg * For tiled surfaces, using a reduced alignment here avoids wasting CPU 974b8e80941Smrg * cycles on the below mipmap layout caluclations. Reducing the 975b8e80941Smrg * alignment here is safe because we later align the row pitch and array 976b8e80941Smrg * pitch to the tile boundary. It is safe even for 977b8e80941Smrg * ISL_MSAA_LAYOUT_INTERLEAVED, because phys_level0_sa is already scaled 978b8e80941Smrg * to accomodate the interleaved samples. 979b8e80941Smrg * 980b8e80941Smrg * For linear surfaces, reducing the alignment here permits us to later 981b8e80941Smrg * choose an arbitrary, non-aligned row pitch. If the surface backs 982b8e80941Smrg * a VkBuffer, then an arbitrary pitch may be needed to accomodate 983b8e80941Smrg * VkBufferImageCopy::bufferRowLength. 984b8e80941Smrg */ 985b8e80941Smrg *phys_slice0_sa = (struct isl_extent2d) { 986b8e80941Smrg .w = phys_level0_sa->w, 987b8e80941Smrg .h = phys_level0_sa->h, 988b8e80941Smrg }; 989b8e80941Smrg return; 990b8e80941Smrg } 991b8e80941Smrg 992b8e80941Smrg uint32_t slice_top_w = 0; 993b8e80941Smrg uint32_t slice_bottom_w = 0; 994b8e80941Smrg uint32_t slice_left_h = 0; 995b8e80941Smrg uint32_t slice_right_h = 0; 996b8e80941Smrg 997b8e80941Smrg uint32_t W0 = phys_level0_sa->w; 998b8e80941Smrg uint32_t H0 = phys_level0_sa->h; 999b8e80941Smrg 1000b8e80941Smrg for (uint32_t l = 0; l < info->levels; ++l) { 1001b8e80941Smrg uint32_t W = isl_minify(W0, l); 1002b8e80941Smrg uint32_t H = isl_minify(H0, l); 1003b8e80941Smrg 1004b8e80941Smrg uint32_t w = isl_align_npot(W, image_align_sa->w); 1005b8e80941Smrg uint32_t h = isl_align_npot(H, image_align_sa->h); 1006b8e80941Smrg 1007b8e80941Smrg if (l == 0) { 1008b8e80941Smrg slice_top_w = w; 1009b8e80941Smrg slice_left_h = h; 1010b8e80941Smrg slice_right_h = h; 1011b8e80941Smrg } else if (l == 1) { 1012b8e80941Smrg slice_bottom_w = w; 1013b8e80941Smrg slice_left_h += h; 1014b8e80941Smrg } else if (l == 2) { 1015b8e80941Smrg slice_bottom_w += w; 1016b8e80941Smrg slice_right_h += h; 1017b8e80941Smrg } else { 1018b8e80941Smrg slice_right_h += h; 1019b8e80941Smrg } 1020b8e80941Smrg } 1021b8e80941Smrg 1022b8e80941Smrg *phys_slice0_sa = (struct isl_extent2d) { 1023b8e80941Smrg .w = MAX(slice_top_w, slice_bottom_w), 1024b8e80941Smrg .h = MAX(slice_left_h, slice_right_h), 1025b8e80941Smrg }; 1026b8e80941Smrg} 1027b8e80941Smrg 1028b8e80941Smrgstatic void 1029b8e80941Smrgisl_calc_phys_total_extent_el_gen4_2d( 1030b8e80941Smrg const struct isl_device *dev, 1031b8e80941Smrg const struct isl_surf_init_info *restrict info, 1032b8e80941Smrg const struct isl_tile_info *tile_info, 1033b8e80941Smrg enum isl_msaa_layout msaa_layout, 1034b8e80941Smrg const struct isl_extent3d *image_align_sa, 1035b8e80941Smrg const struct isl_extent4d *phys_level0_sa, 1036b8e80941Smrg enum isl_array_pitch_span array_pitch_span, 1037b8e80941Smrg uint32_t *array_pitch_el_rows, 1038b8e80941Smrg struct isl_extent2d *total_extent_el) 1039b8e80941Smrg{ 1040b8e80941Smrg const struct isl_format_layout *fmtl = isl_format_get_layout(info->format); 1041b8e80941Smrg 1042b8e80941Smrg struct isl_extent2d phys_slice0_sa; 1043b8e80941Smrg isl_calc_phys_slice0_extent_sa_gen4_2d(dev, info, msaa_layout, 1044b8e80941Smrg image_align_sa, phys_level0_sa, 1045b8e80941Smrg &phys_slice0_sa); 1046b8e80941Smrg *array_pitch_el_rows = 1047b8e80941Smrg isl_calc_array_pitch_el_rows_gen4_2d(dev, info, tile_info, 1048b8e80941Smrg image_align_sa, phys_level0_sa, 1049b8e80941Smrg array_pitch_span, 1050b8e80941Smrg &phys_slice0_sa); 1051b8e80941Smrg *total_extent_el = (struct isl_extent2d) { 1052b8e80941Smrg .w = isl_align_div_npot(phys_slice0_sa.w, fmtl->bw), 1053b8e80941Smrg .h = *array_pitch_el_rows * (phys_level0_sa->array_len - 1) + 1054b8e80941Smrg isl_align_div_npot(phys_slice0_sa.h, fmtl->bh), 1055b8e80941Smrg }; 1056b8e80941Smrg} 1057b8e80941Smrg 1058b8e80941Smrg/** 1059b8e80941Smrg * A variant of isl_calc_phys_slice0_extent_sa() specific to 1060b8e80941Smrg * ISL_DIM_LAYOUT_GEN4_3D. 1061b8e80941Smrg */ 1062b8e80941Smrgstatic void 1063b8e80941Smrgisl_calc_phys_total_extent_el_gen4_3d( 1064b8e80941Smrg const struct isl_device *dev, 1065b8e80941Smrg const struct isl_surf_init_info *restrict info, 1066b8e80941Smrg const struct isl_extent3d *image_align_sa, 1067b8e80941Smrg const struct isl_extent4d *phys_level0_sa, 1068b8e80941Smrg uint32_t *array_pitch_el_rows, 1069b8e80941Smrg struct isl_extent2d *phys_total_el) 1070b8e80941Smrg{ 1071b8e80941Smrg const struct isl_format_layout *fmtl = isl_format_get_layout(info->format); 1072b8e80941Smrg 1073b8e80941Smrg assert(info->samples == 1); 1074b8e80941Smrg 1075b8e80941Smrg if (info->dim != ISL_SURF_DIM_3D) { 1076b8e80941Smrg /* From the G45 PRM Vol. 1a, "6.17.4.1 Hardware Cube Map Layout": 1077b8e80941Smrg * 1078b8e80941Smrg * The cube face textures are stored in the same way as 3D surfaces 1079b8e80941Smrg * are stored (see section 6.17.5 for details). For cube surfaces, 1080b8e80941Smrg * however, the depth is equal to the number of faces (always 6) and 1081b8e80941Smrg * is not reduced for each MIP. 1082b8e80941Smrg */ 1083b8e80941Smrg assert(ISL_DEV_GEN(dev) == 4); 1084b8e80941Smrg assert(info->usage & ISL_SURF_USAGE_CUBE_BIT); 1085b8e80941Smrg assert(phys_level0_sa->array_len == 6); 1086b8e80941Smrg } else { 1087b8e80941Smrg assert(phys_level0_sa->array_len == 1); 1088b8e80941Smrg } 1089b8e80941Smrg 1090b8e80941Smrg uint32_t total_w = 0; 1091b8e80941Smrg uint32_t total_h = 0; 1092b8e80941Smrg 1093b8e80941Smrg uint32_t W0 = phys_level0_sa->w; 1094b8e80941Smrg uint32_t H0 = phys_level0_sa->h; 1095b8e80941Smrg uint32_t D0 = phys_level0_sa->d; 1096b8e80941Smrg uint32_t A0 = phys_level0_sa->a; 1097b8e80941Smrg 1098b8e80941Smrg for (uint32_t l = 0; l < info->levels; ++l) { 1099b8e80941Smrg uint32_t level_w = isl_align_npot(isl_minify(W0, l), image_align_sa->w); 1100b8e80941Smrg uint32_t level_h = isl_align_npot(isl_minify(H0, l), image_align_sa->h); 1101b8e80941Smrg uint32_t level_d = info->dim == ISL_SURF_DIM_3D ? isl_minify(D0, l) : A0; 1102b8e80941Smrg 1103b8e80941Smrg uint32_t max_layers_horiz = MIN(level_d, 1u << l); 1104b8e80941Smrg uint32_t max_layers_vert = isl_align(level_d, 1u << l) / (1u << l); 1105b8e80941Smrg 1106b8e80941Smrg total_w = MAX(total_w, level_w * max_layers_horiz); 1107b8e80941Smrg total_h += level_h * max_layers_vert; 1108b8e80941Smrg } 1109b8e80941Smrg 1110b8e80941Smrg /* GEN4_3D layouts don't really have an array pitch since each LOD has a 1111b8e80941Smrg * different number of horizontal and vertical layers. We have to set it 1112b8e80941Smrg * to something, so at least make it true for LOD0. 1113b8e80941Smrg */ 1114b8e80941Smrg *array_pitch_el_rows = 1115b8e80941Smrg isl_align_npot(phys_level0_sa->h, image_align_sa->h) / fmtl->bw; 1116b8e80941Smrg *phys_total_el = (struct isl_extent2d) { 1117b8e80941Smrg .w = isl_assert_div(total_w, fmtl->bw), 1118b8e80941Smrg .h = isl_assert_div(total_h, fmtl->bh), 1119b8e80941Smrg }; 1120b8e80941Smrg} 1121b8e80941Smrg 1122b8e80941Smrg/** 1123b8e80941Smrg * A variant of isl_calc_phys_slice0_extent_sa() specific to 1124b8e80941Smrg * ISL_DIM_LAYOUT_GEN6_STENCIL_HIZ. 1125b8e80941Smrg */ 1126b8e80941Smrgstatic void 1127b8e80941Smrgisl_calc_phys_total_extent_el_gen6_stencil_hiz( 1128b8e80941Smrg const struct isl_device *dev, 1129b8e80941Smrg const struct isl_surf_init_info *restrict info, 1130b8e80941Smrg const struct isl_tile_info *tile_info, 1131b8e80941Smrg const struct isl_extent3d *image_align_sa, 1132b8e80941Smrg const struct isl_extent4d *phys_level0_sa, 1133b8e80941Smrg uint32_t *array_pitch_el_rows, 1134b8e80941Smrg struct isl_extent2d *phys_total_el) 1135b8e80941Smrg{ 1136b8e80941Smrg const struct isl_format_layout *fmtl = isl_format_get_layout(info->format); 1137b8e80941Smrg 1138b8e80941Smrg const struct isl_extent2d tile_extent_sa = { 1139b8e80941Smrg .w = tile_info->logical_extent_el.w * fmtl->bw, 1140b8e80941Smrg .h = tile_info->logical_extent_el.h * fmtl->bh, 1141b8e80941Smrg }; 1142b8e80941Smrg /* Tile size is a multiple of image alignment */ 1143b8e80941Smrg assert(tile_extent_sa.w % image_align_sa->w == 0); 1144b8e80941Smrg assert(tile_extent_sa.h % image_align_sa->h == 0); 1145b8e80941Smrg 1146b8e80941Smrg const uint32_t W0 = phys_level0_sa->w; 1147b8e80941Smrg const uint32_t H0 = phys_level0_sa->h; 1148b8e80941Smrg 1149b8e80941Smrg /* Each image has the same height as LOD0 because the hardware thinks 1150b8e80941Smrg * everything is LOD0 1151b8e80941Smrg */ 1152b8e80941Smrg const uint32_t H = isl_align(H0, image_align_sa->h) * phys_level0_sa->a; 1153b8e80941Smrg 1154b8e80941Smrg uint32_t total_top_w = 0; 1155b8e80941Smrg uint32_t total_bottom_w = 0; 1156b8e80941Smrg uint32_t total_h = 0; 1157b8e80941Smrg 1158b8e80941Smrg for (uint32_t l = 0; l < info->levels; ++l) { 1159b8e80941Smrg const uint32_t W = isl_minify(W0, l); 1160b8e80941Smrg 1161b8e80941Smrg const uint32_t w = isl_align(W, tile_extent_sa.w); 1162b8e80941Smrg const uint32_t h = isl_align(H, tile_extent_sa.h); 1163b8e80941Smrg 1164b8e80941Smrg if (l == 0) { 1165b8e80941Smrg total_top_w = w; 1166b8e80941Smrg total_h = h; 1167b8e80941Smrg } else if (l == 1) { 1168b8e80941Smrg total_bottom_w = w; 1169b8e80941Smrg total_h += h; 1170b8e80941Smrg } else { 1171b8e80941Smrg total_bottom_w += w; 1172b8e80941Smrg } 1173b8e80941Smrg } 1174b8e80941Smrg 1175b8e80941Smrg *array_pitch_el_rows = 1176b8e80941Smrg isl_assert_div(isl_align(H0, image_align_sa->h), fmtl->bh); 1177b8e80941Smrg *phys_total_el = (struct isl_extent2d) { 1178b8e80941Smrg .w = isl_assert_div(MAX(total_top_w, total_bottom_w), fmtl->bw), 1179b8e80941Smrg .h = isl_assert_div(total_h, fmtl->bh), 1180b8e80941Smrg }; 1181b8e80941Smrg} 1182b8e80941Smrg 1183b8e80941Smrg/** 1184b8e80941Smrg * A variant of isl_calc_phys_slice0_extent_sa() specific to 1185b8e80941Smrg * ISL_DIM_LAYOUT_GEN9_1D. 1186b8e80941Smrg */ 1187b8e80941Smrgstatic void 1188b8e80941Smrgisl_calc_phys_total_extent_el_gen9_1d( 1189b8e80941Smrg const struct isl_device *dev, 1190b8e80941Smrg const struct isl_surf_init_info *restrict info, 1191b8e80941Smrg const struct isl_extent3d *image_align_sa, 1192b8e80941Smrg const struct isl_extent4d *phys_level0_sa, 1193b8e80941Smrg uint32_t *array_pitch_el_rows, 1194b8e80941Smrg struct isl_extent2d *phys_total_el) 1195b8e80941Smrg{ 1196b8e80941Smrg MAYBE_UNUSED const struct isl_format_layout *fmtl = isl_format_get_layout(info->format); 1197b8e80941Smrg 1198b8e80941Smrg assert(phys_level0_sa->height == 1); 1199b8e80941Smrg assert(phys_level0_sa->depth == 1); 1200b8e80941Smrg assert(info->samples == 1); 1201b8e80941Smrg assert(image_align_sa->w >= fmtl->bw); 1202b8e80941Smrg 1203b8e80941Smrg uint32_t slice_w = 0; 1204b8e80941Smrg const uint32_t W0 = phys_level0_sa->w; 1205b8e80941Smrg 1206b8e80941Smrg for (uint32_t l = 0; l < info->levels; ++l) { 1207b8e80941Smrg uint32_t W = isl_minify(W0, l); 1208b8e80941Smrg uint32_t w = isl_align_npot(W, image_align_sa->w); 1209b8e80941Smrg 1210b8e80941Smrg slice_w += w; 1211b8e80941Smrg } 1212b8e80941Smrg 1213b8e80941Smrg *array_pitch_el_rows = 1; 1214b8e80941Smrg *phys_total_el = (struct isl_extent2d) { 1215b8e80941Smrg .w = isl_assert_div(slice_w, fmtl->bw), 1216b8e80941Smrg .h = phys_level0_sa->array_len, 1217b8e80941Smrg }; 1218b8e80941Smrg} 1219b8e80941Smrg 1220b8e80941Smrg/** 1221b8e80941Smrg * Calculate the two-dimensional total physical extent of the surface, in 1222b8e80941Smrg * units of surface elements. 1223b8e80941Smrg */ 1224b8e80941Smrgstatic void 1225b8e80941Smrgisl_calc_phys_total_extent_el(const struct isl_device *dev, 1226b8e80941Smrg const struct isl_surf_init_info *restrict info, 1227b8e80941Smrg const struct isl_tile_info *tile_info, 1228b8e80941Smrg enum isl_dim_layout dim_layout, 1229b8e80941Smrg enum isl_msaa_layout msaa_layout, 1230b8e80941Smrg const struct isl_extent3d *image_align_sa, 1231b8e80941Smrg const struct isl_extent4d *phys_level0_sa, 1232b8e80941Smrg enum isl_array_pitch_span array_pitch_span, 1233b8e80941Smrg uint32_t *array_pitch_el_rows, 1234b8e80941Smrg struct isl_extent2d *total_extent_el) 1235b8e80941Smrg{ 1236b8e80941Smrg switch (dim_layout) { 1237b8e80941Smrg case ISL_DIM_LAYOUT_GEN9_1D: 1238b8e80941Smrg assert(array_pitch_span == ISL_ARRAY_PITCH_SPAN_COMPACT); 1239b8e80941Smrg isl_calc_phys_total_extent_el_gen9_1d(dev, info, 1240b8e80941Smrg image_align_sa, phys_level0_sa, 1241b8e80941Smrg array_pitch_el_rows, 1242b8e80941Smrg total_extent_el); 1243b8e80941Smrg return; 1244b8e80941Smrg case ISL_DIM_LAYOUT_GEN4_2D: 1245b8e80941Smrg isl_calc_phys_total_extent_el_gen4_2d(dev, info, tile_info, msaa_layout, 1246b8e80941Smrg image_align_sa, phys_level0_sa, 1247b8e80941Smrg array_pitch_span, 1248b8e80941Smrg array_pitch_el_rows, 1249b8e80941Smrg total_extent_el); 1250b8e80941Smrg return; 1251b8e80941Smrg case ISL_DIM_LAYOUT_GEN6_STENCIL_HIZ: 1252b8e80941Smrg assert(array_pitch_span == ISL_ARRAY_PITCH_SPAN_COMPACT); 1253b8e80941Smrg isl_calc_phys_total_extent_el_gen6_stencil_hiz(dev, info, tile_info, 1254b8e80941Smrg image_align_sa, 1255b8e80941Smrg phys_level0_sa, 1256b8e80941Smrg array_pitch_el_rows, 1257b8e80941Smrg total_extent_el); 1258b8e80941Smrg return; 1259b8e80941Smrg case ISL_DIM_LAYOUT_GEN4_3D: 1260b8e80941Smrg assert(array_pitch_span == ISL_ARRAY_PITCH_SPAN_COMPACT); 1261b8e80941Smrg isl_calc_phys_total_extent_el_gen4_3d(dev, info, 1262b8e80941Smrg image_align_sa, phys_level0_sa, 1263b8e80941Smrg array_pitch_el_rows, 1264b8e80941Smrg total_extent_el); 1265b8e80941Smrg return; 1266b8e80941Smrg } 1267b8e80941Smrg} 1268b8e80941Smrg 1269b8e80941Smrgstatic uint32_t 1270b8e80941Smrgisl_calc_row_pitch_alignment(const struct isl_surf_init_info *surf_info, 1271b8e80941Smrg const struct isl_tile_info *tile_info) 1272b8e80941Smrg{ 1273b8e80941Smrg if (tile_info->tiling != ISL_TILING_LINEAR) 1274b8e80941Smrg return tile_info->phys_extent_B.width; 1275b8e80941Smrg 1276b8e80941Smrg /* From the Broadwel PRM >> Volume 2d: Command Reference: Structures >> 1277b8e80941Smrg * RENDER_SURFACE_STATE Surface Pitch (p349): 1278b8e80941Smrg * 1279b8e80941Smrg * - For linear render target surfaces and surfaces accessed with the 1280b8e80941Smrg * typed data port messages, the pitch must be a multiple of the 1281b8e80941Smrg * element size for non-YUV surface formats. Pitch must be 1282b8e80941Smrg * a multiple of 2 * element size for YUV surface formats. 1283b8e80941Smrg * 1284b8e80941Smrg * - [Requirements for SURFTYPE_BUFFER and SURFTYPE_STRBUF, which we 1285b8e80941Smrg * ignore because isl doesn't do buffers.] 1286b8e80941Smrg * 1287b8e80941Smrg * - For other linear surfaces, the pitch can be any multiple of 1288b8e80941Smrg * bytes. 1289b8e80941Smrg */ 1290b8e80941Smrg const struct isl_format_layout *fmtl = isl_format_get_layout(surf_info->format); 1291b8e80941Smrg const uint32_t bs = fmtl->bpb / 8; 1292b8e80941Smrg 1293b8e80941Smrg if (surf_info->usage & ISL_SURF_USAGE_RENDER_TARGET_BIT) { 1294b8e80941Smrg if (isl_format_is_yuv(surf_info->format)) { 1295b8e80941Smrg return 2 * bs; 1296b8e80941Smrg } else { 1297b8e80941Smrg return bs; 1298b8e80941Smrg } 1299b8e80941Smrg } 1300b8e80941Smrg 1301b8e80941Smrg return 1; 1302b8e80941Smrg} 1303b8e80941Smrg 1304b8e80941Smrgstatic uint32_t 1305b8e80941Smrgisl_calc_linear_min_row_pitch(const struct isl_device *dev, 1306b8e80941Smrg const struct isl_surf_init_info *info, 1307b8e80941Smrg const struct isl_extent2d *phys_total_el, 1308b8e80941Smrg uint32_t alignment_B) 1309b8e80941Smrg{ 1310b8e80941Smrg const struct isl_format_layout *fmtl = isl_format_get_layout(info->format); 1311b8e80941Smrg const uint32_t bs = fmtl->bpb / 8; 1312b8e80941Smrg 1313b8e80941Smrg return isl_align_npot(bs * phys_total_el->w, alignment_B); 1314b8e80941Smrg} 1315b8e80941Smrg 1316b8e80941Smrgstatic uint32_t 1317b8e80941Smrgisl_calc_tiled_min_row_pitch(const struct isl_device *dev, 1318b8e80941Smrg const struct isl_surf_init_info *surf_info, 1319b8e80941Smrg const struct isl_tile_info *tile_info, 1320b8e80941Smrg const struct isl_extent2d *phys_total_el, 1321b8e80941Smrg uint32_t alignment_B) 1322b8e80941Smrg{ 1323b8e80941Smrg const struct isl_format_layout *fmtl = isl_format_get_layout(surf_info->format); 1324b8e80941Smrg 1325b8e80941Smrg assert(fmtl->bpb % tile_info->format_bpb == 0); 1326b8e80941Smrg 1327b8e80941Smrg const uint32_t tile_el_scale = fmtl->bpb / tile_info->format_bpb; 1328b8e80941Smrg const uint32_t total_w_tl = 1329b8e80941Smrg isl_align_div(phys_total_el->w * tile_el_scale, 1330b8e80941Smrg tile_info->logical_extent_el.width); 1331b8e80941Smrg 1332b8e80941Smrg assert(alignment_B == tile_info->phys_extent_B.width); 1333b8e80941Smrg return total_w_tl * tile_info->phys_extent_B.width; 1334b8e80941Smrg} 1335b8e80941Smrg 1336b8e80941Smrgstatic uint32_t 1337b8e80941Smrgisl_calc_min_row_pitch(const struct isl_device *dev, 1338b8e80941Smrg const struct isl_surf_init_info *surf_info, 1339b8e80941Smrg const struct isl_tile_info *tile_info, 1340b8e80941Smrg const struct isl_extent2d *phys_total_el, 1341b8e80941Smrg uint32_t alignment_B) 1342b8e80941Smrg{ 1343b8e80941Smrg if (tile_info->tiling == ISL_TILING_LINEAR) { 1344b8e80941Smrg return isl_calc_linear_min_row_pitch(dev, surf_info, phys_total_el, 1345b8e80941Smrg alignment_B); 1346b8e80941Smrg } else { 1347b8e80941Smrg return isl_calc_tiled_min_row_pitch(dev, surf_info, tile_info, 1348b8e80941Smrg phys_total_el, alignment_B); 1349b8e80941Smrg } 1350b8e80941Smrg} 1351b8e80941Smrg 1352b8e80941Smrg/** 1353b8e80941Smrg * Is `pitch` in the valid range for a hardware bitfield, if the bitfield's 1354b8e80941Smrg * size is `bits` bits? 1355b8e80941Smrg * 1356b8e80941Smrg * Hardware pitch fields are offset by 1. For example, if the size of 1357b8e80941Smrg * RENDER_SURFACE_STATE::SurfacePitch is B bits, then the range of valid 1358b8e80941Smrg * pitches is [1, 2^b] inclusive. If the surface pitch is N, then 1359b8e80941Smrg * RENDER_SURFACE_STATE::SurfacePitch must be set to N-1. 1360b8e80941Smrg */ 1361b8e80941Smrgstatic bool 1362b8e80941Smrgpitch_in_range(uint32_t n, uint32_t bits) 1363b8e80941Smrg{ 1364b8e80941Smrg assert(n != 0); 1365b8e80941Smrg return likely(bits != 0 && 1 <= n && n <= (1 << bits)); 1366b8e80941Smrg} 1367b8e80941Smrg 1368b8e80941Smrgstatic bool 1369b8e80941Smrgisl_calc_row_pitch(const struct isl_device *dev, 1370b8e80941Smrg const struct isl_surf_init_info *surf_info, 1371b8e80941Smrg const struct isl_tile_info *tile_info, 1372b8e80941Smrg enum isl_dim_layout dim_layout, 1373b8e80941Smrg const struct isl_extent2d *phys_total_el, 1374b8e80941Smrg uint32_t *out_row_pitch_B) 1375b8e80941Smrg{ 1376b8e80941Smrg uint32_t alignment_B = 1377b8e80941Smrg isl_calc_row_pitch_alignment(surf_info, tile_info); 1378b8e80941Smrg 1379b8e80941Smrg const uint32_t min_row_pitch_B = 1380b8e80941Smrg isl_calc_min_row_pitch(dev, surf_info, tile_info, phys_total_el, 1381b8e80941Smrg alignment_B); 1382b8e80941Smrg 1383b8e80941Smrg uint32_t row_pitch_B = min_row_pitch_B; 1384b8e80941Smrg 1385b8e80941Smrg if (surf_info->row_pitch_B != 0) { 1386b8e80941Smrg row_pitch_B = surf_info->row_pitch_B; 1387b8e80941Smrg 1388b8e80941Smrg if (row_pitch_B < min_row_pitch_B) 1389b8e80941Smrg return false; 1390b8e80941Smrg 1391b8e80941Smrg if (row_pitch_B % alignment_B != 0) 1392b8e80941Smrg return false; 1393b8e80941Smrg } 1394b8e80941Smrg 1395b8e80941Smrg const uint32_t row_pitch_tl = row_pitch_B / tile_info->phys_extent_B.width; 1396b8e80941Smrg 1397b8e80941Smrg if (row_pitch_B == 0) 1398b8e80941Smrg return false; 1399b8e80941Smrg 1400b8e80941Smrg if (dim_layout == ISL_DIM_LAYOUT_GEN9_1D) { 1401b8e80941Smrg /* SurfacePitch is ignored for this layout. */ 1402b8e80941Smrg goto done; 1403b8e80941Smrg } 1404b8e80941Smrg 1405b8e80941Smrg if ((surf_info->usage & (ISL_SURF_USAGE_RENDER_TARGET_BIT | 1406b8e80941Smrg ISL_SURF_USAGE_TEXTURE_BIT | 1407b8e80941Smrg ISL_SURF_USAGE_STORAGE_BIT)) && 1408b8e80941Smrg !pitch_in_range(row_pitch_B, RENDER_SURFACE_STATE_SurfacePitch_bits(dev->info))) 1409b8e80941Smrg return false; 1410b8e80941Smrg 1411b8e80941Smrg if ((surf_info->usage & (ISL_SURF_USAGE_CCS_BIT | 1412b8e80941Smrg ISL_SURF_USAGE_MCS_BIT)) && 1413b8e80941Smrg !pitch_in_range(row_pitch_tl, RENDER_SURFACE_STATE_AuxiliarySurfacePitch_bits(dev->info))) 1414b8e80941Smrg return false; 1415b8e80941Smrg 1416b8e80941Smrg if ((surf_info->usage & ISL_SURF_USAGE_DEPTH_BIT) && 1417b8e80941Smrg !pitch_in_range(row_pitch_B, _3DSTATE_DEPTH_BUFFER_SurfacePitch_bits(dev->info))) 1418b8e80941Smrg return false; 1419b8e80941Smrg 1420b8e80941Smrg if ((surf_info->usage & ISL_SURF_USAGE_HIZ_BIT) && 1421b8e80941Smrg !pitch_in_range(row_pitch_B, _3DSTATE_HIER_DEPTH_BUFFER_SurfacePitch_bits(dev->info))) 1422b8e80941Smrg return false; 1423b8e80941Smrg 1424b8e80941Smrg const uint32_t stencil_pitch_bits = dev->use_separate_stencil ? 1425b8e80941Smrg _3DSTATE_STENCIL_BUFFER_SurfacePitch_bits(dev->info) : 1426b8e80941Smrg _3DSTATE_DEPTH_BUFFER_SurfacePitch_bits(dev->info); 1427b8e80941Smrg 1428b8e80941Smrg if ((surf_info->usage & ISL_SURF_USAGE_STENCIL_BIT) && 1429b8e80941Smrg !pitch_in_range(row_pitch_B, stencil_pitch_bits)) 1430b8e80941Smrg return false; 1431b8e80941Smrg 1432b8e80941Smrg done: 1433b8e80941Smrg *out_row_pitch_B = row_pitch_B; 1434b8e80941Smrg return true; 1435b8e80941Smrg} 1436b8e80941Smrg 1437b8e80941Smrgbool 1438b8e80941Smrgisl_surf_init_s(const struct isl_device *dev, 1439b8e80941Smrg struct isl_surf *surf, 1440b8e80941Smrg const struct isl_surf_init_info *restrict info) 1441b8e80941Smrg{ 1442b8e80941Smrg const struct isl_format_layout *fmtl = isl_format_get_layout(info->format); 1443b8e80941Smrg 1444b8e80941Smrg const struct isl_extent4d logical_level0_px = { 1445b8e80941Smrg .w = info->width, 1446b8e80941Smrg .h = info->height, 1447b8e80941Smrg .d = info->depth, 1448b8e80941Smrg .a = info->array_len, 1449b8e80941Smrg }; 1450b8e80941Smrg 1451b8e80941Smrg enum isl_tiling tiling; 1452b8e80941Smrg if (!isl_surf_choose_tiling(dev, info, &tiling)) 1453b8e80941Smrg return false; 1454b8e80941Smrg 1455b8e80941Smrg struct isl_tile_info tile_info; 1456b8e80941Smrg isl_tiling_get_info(tiling, fmtl->bpb, &tile_info); 1457b8e80941Smrg 1458b8e80941Smrg const enum isl_dim_layout dim_layout = 1459b8e80941Smrg isl_surf_choose_dim_layout(dev, info->dim, tiling, info->usage); 1460b8e80941Smrg 1461b8e80941Smrg enum isl_msaa_layout msaa_layout; 1462b8e80941Smrg if (!isl_choose_msaa_layout(dev, info, tiling, &msaa_layout)) 1463b8e80941Smrg return false; 1464b8e80941Smrg 1465b8e80941Smrg struct isl_extent3d image_align_el; 1466b8e80941Smrg isl_choose_image_alignment_el(dev, info, tiling, dim_layout, msaa_layout, 1467b8e80941Smrg &image_align_el); 1468b8e80941Smrg 1469b8e80941Smrg struct isl_extent3d image_align_sa = 1470b8e80941Smrg isl_extent3d_el_to_sa(info->format, image_align_el); 1471b8e80941Smrg 1472b8e80941Smrg struct isl_extent4d phys_level0_sa; 1473b8e80941Smrg isl_calc_phys_level0_extent_sa(dev, info, dim_layout, tiling, msaa_layout, 1474b8e80941Smrg &phys_level0_sa); 1475b8e80941Smrg 1476b8e80941Smrg enum isl_array_pitch_span array_pitch_span = 1477b8e80941Smrg isl_choose_array_pitch_span(dev, info, dim_layout, &phys_level0_sa); 1478b8e80941Smrg 1479b8e80941Smrg uint32_t array_pitch_el_rows; 1480b8e80941Smrg struct isl_extent2d phys_total_el; 1481b8e80941Smrg isl_calc_phys_total_extent_el(dev, info, &tile_info, 1482b8e80941Smrg dim_layout, msaa_layout, 1483b8e80941Smrg &image_align_sa, &phys_level0_sa, 1484b8e80941Smrg array_pitch_span, &array_pitch_el_rows, 1485b8e80941Smrg &phys_total_el); 1486b8e80941Smrg 1487b8e80941Smrg uint32_t row_pitch_B; 1488b8e80941Smrg if (!isl_calc_row_pitch(dev, info, &tile_info, dim_layout, 1489b8e80941Smrg &phys_total_el, &row_pitch_B)) 1490b8e80941Smrg return false; 1491b8e80941Smrg 1492b8e80941Smrg uint32_t base_alignment_B; 1493b8e80941Smrg uint64_t size_B; 1494b8e80941Smrg if (tiling == ISL_TILING_LINEAR) { 1495b8e80941Smrg size_B = (uint64_t) row_pitch_B * phys_total_el.h; 1496b8e80941Smrg 1497b8e80941Smrg /* From the Broadwell PRM Vol 2d, RENDER_SURFACE_STATE::SurfaceBaseAddress: 1498b8e80941Smrg * 1499b8e80941Smrg * "The Base Address for linear render target surfaces and surfaces 1500b8e80941Smrg * accessed with the typed surface read/write data port messages must 1501b8e80941Smrg * be element-size aligned, for non-YUV surface formats, or a 1502b8e80941Smrg * multiple of 2 element-sizes for YUV surface formats. Other linear 1503b8e80941Smrg * surfaces have no alignment requirements (byte alignment is 1504b8e80941Smrg * sufficient.)" 1505b8e80941Smrg */ 1506b8e80941Smrg base_alignment_B = MAX(1, info->min_alignment_B); 1507b8e80941Smrg if (info->usage & ISL_SURF_USAGE_RENDER_TARGET_BIT) { 1508b8e80941Smrg if (isl_format_is_yuv(info->format)) { 1509b8e80941Smrg base_alignment_B = MAX(base_alignment_B, fmtl->bpb / 4); 1510b8e80941Smrg } else { 1511b8e80941Smrg base_alignment_B = MAX(base_alignment_B, fmtl->bpb / 8); 1512b8e80941Smrg } 1513b8e80941Smrg } 1514b8e80941Smrg base_alignment_B = isl_round_up_to_power_of_two(base_alignment_B); 1515b8e80941Smrg 1516b8e80941Smrg /* From the Skylake PRM Vol 2c, PLANE_STRIDE::Stride: 1517b8e80941Smrg * 1518b8e80941Smrg * "For Linear memory, this field specifies the stride in chunks of 1519b8e80941Smrg * 64 bytes (1 cache line)." 1520b8e80941Smrg */ 1521b8e80941Smrg if (isl_surf_usage_is_display(info->usage)) 1522b8e80941Smrg base_alignment_B = MAX(base_alignment_B, 64); 1523b8e80941Smrg } else { 1524b8e80941Smrg const uint32_t total_h_tl = 1525b8e80941Smrg isl_align_div(phys_total_el.h, tile_info.logical_extent_el.height); 1526b8e80941Smrg 1527b8e80941Smrg size_B = (uint64_t) total_h_tl * tile_info.phys_extent_B.height * row_pitch_B; 1528b8e80941Smrg 1529b8e80941Smrg const uint32_t tile_size_B = tile_info.phys_extent_B.width * 1530b8e80941Smrg tile_info.phys_extent_B.height; 1531b8e80941Smrg assert(isl_is_pow2(info->min_alignment_B) && isl_is_pow2(tile_size_B)); 1532b8e80941Smrg base_alignment_B = MAX(info->min_alignment_B, tile_size_B); 1533b8e80941Smrg } 1534b8e80941Smrg 1535b8e80941Smrg if (ISL_DEV_GEN(dev) < 9) { 1536b8e80941Smrg /* From the Broadwell PRM Vol 5, Surface Layout: 1537b8e80941Smrg * 1538b8e80941Smrg * "In addition to restrictions on maximum height, width, and depth, 1539b8e80941Smrg * surfaces are also restricted to a maximum size in bytes. This 1540b8e80941Smrg * maximum is 2 GB for all products and all surface types." 1541b8e80941Smrg * 1542b8e80941Smrg * This comment is applicable to all Pre-gen9 platforms. 1543b8e80941Smrg */ 1544b8e80941Smrg if (size_B > (uint64_t) 1 << 31) 1545b8e80941Smrg return false; 1546b8e80941Smrg } else if (ISL_DEV_GEN(dev) < 11) { 1547b8e80941Smrg /* From the Skylake PRM Vol 5, Maximum Surface Size in Bytes: 1548b8e80941Smrg * "In addition to restrictions on maximum height, width, and depth, 1549b8e80941Smrg * surfaces are also restricted to a maximum size of 2^38 bytes. 1550b8e80941Smrg * All pixels within the surface must be contained within 2^38 bytes 1551b8e80941Smrg * of the base address." 1552b8e80941Smrg */ 1553b8e80941Smrg if (size_B > (uint64_t) 1 << 38) 1554b8e80941Smrg return false; 1555b8e80941Smrg } else { 1556b8e80941Smrg /* gen11+ platforms raised this limit to 2^44 bytes. */ 1557b8e80941Smrg if (size_B > (uint64_t) 1 << 44) 1558b8e80941Smrg return false; 1559b8e80941Smrg } 1560b8e80941Smrg 1561b8e80941Smrg *surf = (struct isl_surf) { 1562b8e80941Smrg .dim = info->dim, 1563b8e80941Smrg .dim_layout = dim_layout, 1564b8e80941Smrg .msaa_layout = msaa_layout, 1565b8e80941Smrg .tiling = tiling, 1566b8e80941Smrg .format = info->format, 1567b8e80941Smrg 1568b8e80941Smrg .levels = info->levels, 1569b8e80941Smrg .samples = info->samples, 1570b8e80941Smrg 1571b8e80941Smrg .image_alignment_el = image_align_el, 1572b8e80941Smrg .logical_level0_px = logical_level0_px, 1573b8e80941Smrg .phys_level0_sa = phys_level0_sa, 1574b8e80941Smrg 1575b8e80941Smrg .size_B = size_B, 1576b8e80941Smrg .alignment_B = base_alignment_B, 1577b8e80941Smrg .row_pitch_B = row_pitch_B, 1578b8e80941Smrg .array_pitch_el_rows = array_pitch_el_rows, 1579b8e80941Smrg .array_pitch_span = array_pitch_span, 1580b8e80941Smrg 1581b8e80941Smrg .usage = info->usage, 1582b8e80941Smrg }; 1583b8e80941Smrg 1584b8e80941Smrg return true; 1585b8e80941Smrg} 1586b8e80941Smrg 1587b8e80941Smrgvoid 1588b8e80941Smrgisl_surf_get_tile_info(const struct isl_surf *surf, 1589b8e80941Smrg struct isl_tile_info *tile_info) 1590b8e80941Smrg{ 1591b8e80941Smrg const struct isl_format_layout *fmtl = isl_format_get_layout(surf->format); 1592b8e80941Smrg isl_tiling_get_info(surf->tiling, fmtl->bpb, tile_info); 1593b8e80941Smrg} 1594b8e80941Smrg 1595b8e80941Smrgbool 1596b8e80941Smrgisl_surf_get_hiz_surf(const struct isl_device *dev, 1597b8e80941Smrg const struct isl_surf *surf, 1598b8e80941Smrg struct isl_surf *hiz_surf) 1599b8e80941Smrg{ 1600b8e80941Smrg assert(ISL_DEV_GEN(dev) >= 5 && ISL_DEV_USE_SEPARATE_STENCIL(dev)); 1601b8e80941Smrg 1602b8e80941Smrg /* Multisampled depth is always interleaved */ 1603b8e80941Smrg assert(surf->msaa_layout == ISL_MSAA_LAYOUT_NONE || 1604b8e80941Smrg surf->msaa_layout == ISL_MSAA_LAYOUT_INTERLEAVED); 1605b8e80941Smrg 1606b8e80941Smrg /* From the Broadwell PRM Vol. 7, "Hierarchical Depth Buffer": 1607b8e80941Smrg * 1608b8e80941Smrg * "The Surface Type, Height, Width, Depth, Minimum Array Element, Render 1609b8e80941Smrg * Target View Extent, and Depth Coordinate Offset X/Y of the 1610b8e80941Smrg * hierarchical depth buffer are inherited from the depth buffer. The 1611b8e80941Smrg * height and width of the hierarchical depth buffer that must be 1612b8e80941Smrg * allocated are computed by the following formulas, where HZ is the 1613b8e80941Smrg * hierarchical depth buffer and Z is the depth buffer. The Z_Height, 1614b8e80941Smrg * Z_Width, and Z_Depth values given in these formulas are those present 1615b8e80941Smrg * in 3DSTATE_DEPTH_BUFFER incremented by one. 1616b8e80941Smrg * 1617b8e80941Smrg * "The value of Z_Height and Z_Width must each be multiplied by 2 before 1618b8e80941Smrg * being applied to the table below if Number of Multisamples is set to 1619b8e80941Smrg * NUMSAMPLES_4. The value of Z_Height must be multiplied by 2 and 1620b8e80941Smrg * Z_Width must be multiplied by 4 before being applied to the table 1621b8e80941Smrg * below if Number of Multisamples is set to NUMSAMPLES_8." 1622b8e80941Smrg * 1623b8e80941Smrg * In the Sky Lake PRM, the second paragraph is replaced with this: 1624b8e80941Smrg * 1625b8e80941Smrg * "The Z_Height and Z_Width values must equal those present in 1626b8e80941Smrg * 3DSTATE_DEPTH_BUFFER incremented by one." 1627b8e80941Smrg * 1628b8e80941Smrg * In other words, on Sandy Bridge through Broadwell, each 128-bit HiZ 1629b8e80941Smrg * block corresponds to a region of 8x4 samples in the primary depth 1630b8e80941Smrg * surface. On Sky Lake, on the other hand, each HiZ block corresponds to 1631b8e80941Smrg * a region of 8x4 pixels in the primary depth surface regardless of the 1632b8e80941Smrg * number of samples. The dimensions of a HiZ block in both pixels and 1633b8e80941Smrg * samples are given in the table below: 1634b8e80941Smrg * 1635b8e80941Smrg * | SNB - BDW | SKL+ 1636b8e80941Smrg * ------+-----------+------------- 1637b8e80941Smrg * 1x | 8 x 4 sa | 8 x 4 sa 1638b8e80941Smrg * MSAA | 8 x 4 px | 8 x 4 px 1639b8e80941Smrg * ------+-----------+------------- 1640b8e80941Smrg * 2x | 8 x 4 sa | 16 x 4 sa 1641b8e80941Smrg * MSAA | 4 x 4 px | 8 x 4 px 1642b8e80941Smrg * ------+-----------+------------- 1643b8e80941Smrg * 4x | 8 x 4 sa | 16 x 8 sa 1644b8e80941Smrg * MSAA | 4 x 2 px | 8 x 4 px 1645b8e80941Smrg * ------+-----------+------------- 1646b8e80941Smrg * 8x | 8 x 4 sa | 32 x 8 sa 1647b8e80941Smrg * MSAA | 2 x 2 px | 8 x 4 px 1648b8e80941Smrg * ------+-----------+------------- 1649b8e80941Smrg * 16x | N/A | 32 x 16 sa 1650b8e80941Smrg * MSAA | N/A | 8 x 4 px 1651b8e80941Smrg * ------+-----------+------------- 1652b8e80941Smrg * 1653b8e80941Smrg * There are a number of different ways that this discrepency could be 1654b8e80941Smrg * handled. The way we have chosen is to simply make MSAA HiZ have the 1655b8e80941Smrg * same number of samples as the parent surface pre-Sky Lake and always be 1656b8e80941Smrg * single-sampled on Sky Lake and above. Since the block sizes of 1657b8e80941Smrg * compressed formats are given in samples, this neatly handles everything 1658b8e80941Smrg * without the need for additional HiZ formats with different block sizes 1659b8e80941Smrg * on SKL+. 1660b8e80941Smrg */ 1661b8e80941Smrg const unsigned samples = ISL_DEV_GEN(dev) >= 9 ? 1 : surf->samples; 1662b8e80941Smrg 1663b8e80941Smrg return isl_surf_init(dev, hiz_surf, 1664b8e80941Smrg .dim = surf->dim, 1665b8e80941Smrg .format = ISL_FORMAT_HIZ, 1666b8e80941Smrg .width = surf->logical_level0_px.width, 1667b8e80941Smrg .height = surf->logical_level0_px.height, 1668b8e80941Smrg .depth = surf->logical_level0_px.depth, 1669b8e80941Smrg .levels = surf->levels, 1670b8e80941Smrg .array_len = surf->logical_level0_px.array_len, 1671b8e80941Smrg .samples = samples, 1672b8e80941Smrg .usage = ISL_SURF_USAGE_HIZ_BIT, 1673b8e80941Smrg .tiling_flags = ISL_TILING_HIZ_BIT); 1674b8e80941Smrg} 1675b8e80941Smrg 1676b8e80941Smrgbool 1677b8e80941Smrgisl_surf_get_mcs_surf(const struct isl_device *dev, 1678b8e80941Smrg const struct isl_surf *surf, 1679b8e80941Smrg struct isl_surf *mcs_surf) 1680b8e80941Smrg{ 1681b8e80941Smrg assert(ISL_DEV_GEN(dev) >= 7); 1682b8e80941Smrg 1683b8e80941Smrg /* It must be multisampled with an array layout */ 1684b8e80941Smrg assert(surf->samples > 1 && surf->msaa_layout == ISL_MSAA_LAYOUT_ARRAY); 1685b8e80941Smrg 1686b8e80941Smrg /* The following are true of all multisampled surfaces */ 1687b8e80941Smrg assert(surf->dim == ISL_SURF_DIM_2D); 1688b8e80941Smrg assert(surf->levels == 1); 1689b8e80941Smrg assert(surf->logical_level0_px.depth == 1); 1690b8e80941Smrg 1691b8e80941Smrg /* The "Auxiliary Surface Pitch" field in RENDER_SURFACE_STATE is only 9 1692b8e80941Smrg * bits which means the maximum pitch of a compression surface is 512 1693b8e80941Smrg * tiles or 64KB (since MCS is always Y-tiled). Since a 16x MCS buffer is 1694b8e80941Smrg * 64bpp, this gives us a maximum width of 8192 pixels. We can create 1695b8e80941Smrg * larger multisampled surfaces, we just can't compress them. For 2x, 4x, 1696b8e80941Smrg * and 8x, we have enough room for the full 16k supported by the hardware. 1697b8e80941Smrg */ 1698b8e80941Smrg if (surf->samples == 16 && surf->logical_level0_px.width > 8192) 1699b8e80941Smrg return false; 1700b8e80941Smrg 1701b8e80941Smrg enum isl_format mcs_format; 1702b8e80941Smrg switch (surf->samples) { 1703b8e80941Smrg case 2: mcs_format = ISL_FORMAT_MCS_2X; break; 1704b8e80941Smrg case 4: mcs_format = ISL_FORMAT_MCS_4X; break; 1705b8e80941Smrg case 8: mcs_format = ISL_FORMAT_MCS_8X; break; 1706b8e80941Smrg case 16: mcs_format = ISL_FORMAT_MCS_16X; break; 1707b8e80941Smrg default: 1708b8e80941Smrg unreachable("Invalid sample count"); 1709b8e80941Smrg } 1710b8e80941Smrg 1711b8e80941Smrg return isl_surf_init(dev, mcs_surf, 1712b8e80941Smrg .dim = ISL_SURF_DIM_2D, 1713b8e80941Smrg .format = mcs_format, 1714b8e80941Smrg .width = surf->logical_level0_px.width, 1715b8e80941Smrg .height = surf->logical_level0_px.height, 1716b8e80941Smrg .depth = 1, 1717b8e80941Smrg .levels = 1, 1718b8e80941Smrg .array_len = surf->logical_level0_px.array_len, 1719b8e80941Smrg .samples = 1, /* MCS surfaces are really single-sampled */ 1720b8e80941Smrg .usage = ISL_SURF_USAGE_MCS_BIT, 1721b8e80941Smrg .tiling_flags = ISL_TILING_Y0_BIT); 1722b8e80941Smrg} 1723b8e80941Smrg 1724b8e80941Smrgbool 1725b8e80941Smrgisl_surf_get_ccs_surf(const struct isl_device *dev, 1726b8e80941Smrg const struct isl_surf *surf, 1727b8e80941Smrg struct isl_surf *ccs_surf, 1728b8e80941Smrg uint32_t row_pitch_B) 1729b8e80941Smrg{ 1730b8e80941Smrg assert(surf->samples == 1 && surf->msaa_layout == ISL_MSAA_LAYOUT_NONE); 1731b8e80941Smrg assert(ISL_DEV_GEN(dev) >= 7); 1732b8e80941Smrg 1733b8e80941Smrg if (surf->usage & ISL_SURF_USAGE_DISABLE_AUX_BIT) 1734b8e80941Smrg return false; 1735b8e80941Smrg 1736b8e80941Smrg /* The PRM doesn't say this explicitly, but fast-clears don't appear to 1737b8e80941Smrg * work for 3D textures until gen9 where the layout of 3D textures changes 1738b8e80941Smrg * to match 2D array textures. 1739b8e80941Smrg */ 1740b8e80941Smrg if (ISL_DEV_GEN(dev) <= 8 && surf->dim != ISL_SURF_DIM_2D) 1741b8e80941Smrg return false; 1742b8e80941Smrg 1743b8e80941Smrg /* From the HSW PRM Volume 7: 3D-Media-GPGPU, page 652 (Color Clear of 1744b8e80941Smrg * Non-MultiSampler Render Target Restrictions): 1745b8e80941Smrg * 1746b8e80941Smrg * "Support is for non-mip-mapped and non-array surface types only." 1747b8e80941Smrg * 1748b8e80941Smrg * This restriction is lifted on gen8+. Technically, it may be possible to 1749b8e80941Smrg * create a CCS for an arrayed or mipmapped image and only enable CCS_D 1750b8e80941Smrg * when rendering to the base slice. However, there is no documentation 1751b8e80941Smrg * tell us what the hardware would do in that case or what it does if you 1752b8e80941Smrg * walk off the bases slice. (Does it ignore CCS or does it start 1753b8e80941Smrg * scribbling over random memory?) We play it safe and just follow the 1754b8e80941Smrg * docs and don't allow CCS_D for arrayed or mip-mapped surfaces. 1755b8e80941Smrg */ 1756b8e80941Smrg if (ISL_DEV_GEN(dev) <= 7 && 1757b8e80941Smrg (surf->levels > 1 || surf->logical_level0_px.array_len > 1)) 1758b8e80941Smrg return false; 1759b8e80941Smrg 1760b8e80941Smrg if (isl_format_is_compressed(surf->format)) 1761b8e80941Smrg return false; 1762b8e80941Smrg 1763b8e80941Smrg /* TODO: More conditions where it can fail. */ 1764b8e80941Smrg 1765b8e80941Smrg enum isl_format ccs_format; 1766b8e80941Smrg if (ISL_DEV_GEN(dev) >= 9) { 1767b8e80941Smrg if (!isl_tiling_is_any_y(surf->tiling)) 1768b8e80941Smrg return false; 1769b8e80941Smrg 1770b8e80941Smrg switch (isl_format_get_layout(surf->format)->bpb) { 1771b8e80941Smrg case 32: ccs_format = ISL_FORMAT_GEN9_CCS_32BPP; break; 1772b8e80941Smrg case 64: ccs_format = ISL_FORMAT_GEN9_CCS_64BPP; break; 1773b8e80941Smrg case 128: ccs_format = ISL_FORMAT_GEN9_CCS_128BPP; break; 1774b8e80941Smrg default: 1775b8e80941Smrg return false; 1776b8e80941Smrg } 1777b8e80941Smrg } else if (surf->tiling == ISL_TILING_Y0) { 1778b8e80941Smrg switch (isl_format_get_layout(surf->format)->bpb) { 1779b8e80941Smrg case 32: ccs_format = ISL_FORMAT_GEN7_CCS_32BPP_Y; break; 1780b8e80941Smrg case 64: ccs_format = ISL_FORMAT_GEN7_CCS_64BPP_Y; break; 1781b8e80941Smrg case 128: ccs_format = ISL_FORMAT_GEN7_CCS_128BPP_Y; break; 1782b8e80941Smrg default: 1783b8e80941Smrg return false; 1784b8e80941Smrg } 1785b8e80941Smrg } else if (surf->tiling == ISL_TILING_X) { 1786b8e80941Smrg switch (isl_format_get_layout(surf->format)->bpb) { 1787b8e80941Smrg case 32: ccs_format = ISL_FORMAT_GEN7_CCS_32BPP_X; break; 1788b8e80941Smrg case 64: ccs_format = ISL_FORMAT_GEN7_CCS_64BPP_X; break; 1789b8e80941Smrg case 128: ccs_format = ISL_FORMAT_GEN7_CCS_128BPP_X; break; 1790b8e80941Smrg default: 1791b8e80941Smrg return false; 1792b8e80941Smrg } 1793b8e80941Smrg } else { 1794b8e80941Smrg return false; 1795b8e80941Smrg } 1796b8e80941Smrg 1797b8e80941Smrg return isl_surf_init(dev, ccs_surf, 1798b8e80941Smrg .dim = surf->dim, 1799b8e80941Smrg .format = ccs_format, 1800b8e80941Smrg .width = surf->logical_level0_px.width, 1801b8e80941Smrg .height = surf->logical_level0_px.height, 1802b8e80941Smrg .depth = surf->logical_level0_px.depth, 1803b8e80941Smrg .levels = surf->levels, 1804b8e80941Smrg .array_len = surf->logical_level0_px.array_len, 1805b8e80941Smrg .samples = 1, 1806b8e80941Smrg .row_pitch_B = row_pitch_B, 1807b8e80941Smrg .usage = ISL_SURF_USAGE_CCS_BIT, 1808b8e80941Smrg .tiling_flags = ISL_TILING_CCS_BIT); 1809b8e80941Smrg} 1810b8e80941Smrg 1811b8e80941Smrg#define isl_genX_call(dev, func, ...) \ 1812b8e80941Smrg switch (ISL_DEV_GEN(dev)) { \ 1813b8e80941Smrg case 4: \ 1814b8e80941Smrg /* G45 surface state is the same as gen5 */ \ 1815b8e80941Smrg if (ISL_DEV_IS_G4X(dev)) { \ 1816b8e80941Smrg isl_gen5_##func(__VA_ARGS__); \ 1817b8e80941Smrg } else { \ 1818b8e80941Smrg isl_gen4_##func(__VA_ARGS__); \ 1819b8e80941Smrg } \ 1820b8e80941Smrg break; \ 1821b8e80941Smrg case 5: \ 1822b8e80941Smrg isl_gen5_##func(__VA_ARGS__); \ 1823b8e80941Smrg break; \ 1824b8e80941Smrg case 6: \ 1825b8e80941Smrg isl_gen6_##func(__VA_ARGS__); \ 1826b8e80941Smrg break; \ 1827b8e80941Smrg case 7: \ 1828b8e80941Smrg if (ISL_DEV_IS_HASWELL(dev)) { \ 1829b8e80941Smrg isl_gen75_##func(__VA_ARGS__); \ 1830b8e80941Smrg } else { \ 1831b8e80941Smrg isl_gen7_##func(__VA_ARGS__); \ 1832b8e80941Smrg } \ 1833b8e80941Smrg break; \ 1834b8e80941Smrg case 8: \ 1835b8e80941Smrg isl_gen8_##func(__VA_ARGS__); \ 1836b8e80941Smrg break; \ 1837b8e80941Smrg case 9: \ 1838b8e80941Smrg isl_gen9_##func(__VA_ARGS__); \ 1839b8e80941Smrg break; \ 1840b8e80941Smrg case 10: \ 1841b8e80941Smrg isl_gen10_##func(__VA_ARGS__); \ 1842b8e80941Smrg break; \ 1843b8e80941Smrg case 11: \ 1844b8e80941Smrg isl_gen11_##func(__VA_ARGS__); \ 1845b8e80941Smrg break; \ 1846b8e80941Smrg default: \ 1847b8e80941Smrg assert(!"Unknown hardware generation"); \ 1848b8e80941Smrg } 1849b8e80941Smrg 1850b8e80941Smrgvoid 1851b8e80941Smrgisl_surf_fill_state_s(const struct isl_device *dev, void *state, 1852b8e80941Smrg const struct isl_surf_fill_state_info *restrict info) 1853b8e80941Smrg{ 1854b8e80941Smrg#ifndef NDEBUG 1855b8e80941Smrg isl_surf_usage_flags_t _base_usage = 1856b8e80941Smrg info->view->usage & (ISL_SURF_USAGE_RENDER_TARGET_BIT | 1857b8e80941Smrg ISL_SURF_USAGE_TEXTURE_BIT | 1858b8e80941Smrg ISL_SURF_USAGE_STORAGE_BIT); 1859b8e80941Smrg /* They may only specify one of the above bits at a time */ 1860b8e80941Smrg assert(__builtin_popcount(_base_usage) == 1); 1861b8e80941Smrg /* The only other allowed bit is ISL_SURF_USAGE_CUBE_BIT */ 1862b8e80941Smrg assert((info->view->usage & ~ISL_SURF_USAGE_CUBE_BIT) == _base_usage); 1863b8e80941Smrg#endif 1864b8e80941Smrg 1865b8e80941Smrg if (info->surf->dim == ISL_SURF_DIM_3D) { 1866b8e80941Smrg assert(info->view->base_array_layer + info->view->array_len <= 1867b8e80941Smrg info->surf->logical_level0_px.depth); 1868b8e80941Smrg } else { 1869b8e80941Smrg assert(info->view->base_array_layer + info->view->array_len <= 1870b8e80941Smrg info->surf->logical_level0_px.array_len); 1871b8e80941Smrg } 1872b8e80941Smrg 1873b8e80941Smrg isl_genX_call(dev, surf_fill_state_s, dev, state, info); 1874b8e80941Smrg} 1875b8e80941Smrg 1876b8e80941Smrgvoid 1877b8e80941Smrgisl_buffer_fill_state_s(const struct isl_device *dev, void *state, 1878b8e80941Smrg const struct isl_buffer_fill_state_info *restrict info) 1879b8e80941Smrg{ 1880b8e80941Smrg isl_genX_call(dev, buffer_fill_state_s, state, info); 1881b8e80941Smrg} 1882b8e80941Smrg 1883b8e80941Smrgvoid 1884b8e80941Smrgisl_null_fill_state(const struct isl_device *dev, void *state, 1885b8e80941Smrg struct isl_extent3d size) 1886b8e80941Smrg{ 1887b8e80941Smrg isl_genX_call(dev, null_fill_state, state, size); 1888b8e80941Smrg} 1889b8e80941Smrg 1890b8e80941Smrgvoid 1891b8e80941Smrgisl_emit_depth_stencil_hiz_s(const struct isl_device *dev, void *batch, 1892b8e80941Smrg const struct isl_depth_stencil_hiz_emit_info *restrict info) 1893b8e80941Smrg{ 1894b8e80941Smrg if (info->depth_surf && info->stencil_surf) { 1895b8e80941Smrg if (!dev->info->has_hiz_and_separate_stencil) { 1896b8e80941Smrg assert(info->depth_surf == info->stencil_surf); 1897b8e80941Smrg assert(info->depth_address == info->stencil_address); 1898b8e80941Smrg } 1899b8e80941Smrg assert(info->depth_surf->dim == info->stencil_surf->dim); 1900b8e80941Smrg } 1901b8e80941Smrg 1902b8e80941Smrg if (info->depth_surf) { 1903b8e80941Smrg assert((info->depth_surf->usage & ISL_SURF_USAGE_DEPTH_BIT)); 1904b8e80941Smrg if (info->depth_surf->dim == ISL_SURF_DIM_3D) { 1905b8e80941Smrg assert(info->view->base_array_layer + info->view->array_len <= 1906b8e80941Smrg info->depth_surf->logical_level0_px.depth); 1907b8e80941Smrg } else { 1908b8e80941Smrg assert(info->view->base_array_layer + info->view->array_len <= 1909b8e80941Smrg info->depth_surf->logical_level0_px.array_len); 1910b8e80941Smrg } 1911b8e80941Smrg } 1912b8e80941Smrg 1913b8e80941Smrg if (info->stencil_surf) { 1914b8e80941Smrg assert((info->stencil_surf->usage & ISL_SURF_USAGE_STENCIL_BIT)); 1915b8e80941Smrg if (info->stencil_surf->dim == ISL_SURF_DIM_3D) { 1916b8e80941Smrg assert(info->view->base_array_layer + info->view->array_len <= 1917b8e80941Smrg info->stencil_surf->logical_level0_px.depth); 1918b8e80941Smrg } else { 1919b8e80941Smrg assert(info->view->base_array_layer + info->view->array_len <= 1920b8e80941Smrg info->stencil_surf->logical_level0_px.array_len); 1921b8e80941Smrg } 1922b8e80941Smrg } 1923b8e80941Smrg 1924b8e80941Smrg isl_genX_call(dev, emit_depth_stencil_hiz_s, dev, batch, info); 1925b8e80941Smrg} 1926b8e80941Smrg 1927b8e80941Smrg/** 1928b8e80941Smrg * A variant of isl_surf_get_image_offset_sa() specific to 1929b8e80941Smrg * ISL_DIM_LAYOUT_GEN4_2D. 1930b8e80941Smrg */ 1931b8e80941Smrgstatic void 1932b8e80941Smrgget_image_offset_sa_gen4_2d(const struct isl_surf *surf, 1933b8e80941Smrg uint32_t level, uint32_t logical_array_layer, 1934b8e80941Smrg uint32_t *x_offset_sa, 1935b8e80941Smrg uint32_t *y_offset_sa) 1936b8e80941Smrg{ 1937b8e80941Smrg assert(level < surf->levels); 1938b8e80941Smrg if (surf->dim == ISL_SURF_DIM_3D) 1939b8e80941Smrg assert(logical_array_layer < surf->logical_level0_px.depth); 1940b8e80941Smrg else 1941b8e80941Smrg assert(logical_array_layer < surf->logical_level0_px.array_len); 1942b8e80941Smrg 1943b8e80941Smrg const struct isl_extent3d image_align_sa = 1944b8e80941Smrg isl_surf_get_image_alignment_sa(surf); 1945b8e80941Smrg 1946b8e80941Smrg const uint32_t W0 = surf->phys_level0_sa.width; 1947b8e80941Smrg const uint32_t H0 = surf->phys_level0_sa.height; 1948b8e80941Smrg 1949b8e80941Smrg const uint32_t phys_layer = logical_array_layer * 1950b8e80941Smrg (surf->msaa_layout == ISL_MSAA_LAYOUT_ARRAY ? surf->samples : 1); 1951b8e80941Smrg 1952b8e80941Smrg uint32_t x = 0; 1953b8e80941Smrg uint32_t y = phys_layer * isl_surf_get_array_pitch_sa_rows(surf); 1954b8e80941Smrg 1955b8e80941Smrg for (uint32_t l = 0; l < level; ++l) { 1956b8e80941Smrg if (l == 1) { 1957b8e80941Smrg uint32_t W = isl_minify(W0, l); 1958b8e80941Smrg x += isl_align_npot(W, image_align_sa.w); 1959b8e80941Smrg } else { 1960b8e80941Smrg uint32_t H = isl_minify(H0, l); 1961b8e80941Smrg y += isl_align_npot(H, image_align_sa.h); 1962b8e80941Smrg } 1963b8e80941Smrg } 1964b8e80941Smrg 1965b8e80941Smrg *x_offset_sa = x; 1966b8e80941Smrg *y_offset_sa = y; 1967b8e80941Smrg} 1968b8e80941Smrg 1969b8e80941Smrg/** 1970b8e80941Smrg * A variant of isl_surf_get_image_offset_sa() specific to 1971b8e80941Smrg * ISL_DIM_LAYOUT_GEN4_3D. 1972b8e80941Smrg */ 1973b8e80941Smrgstatic void 1974b8e80941Smrgget_image_offset_sa_gen4_3d(const struct isl_surf *surf, 1975b8e80941Smrg uint32_t level, uint32_t logical_z_offset_px, 1976b8e80941Smrg uint32_t *x_offset_sa, 1977b8e80941Smrg uint32_t *y_offset_sa) 1978b8e80941Smrg{ 1979b8e80941Smrg assert(level < surf->levels); 1980b8e80941Smrg if (surf->dim == ISL_SURF_DIM_3D) { 1981b8e80941Smrg assert(surf->phys_level0_sa.array_len == 1); 1982b8e80941Smrg assert(logical_z_offset_px < isl_minify(surf->phys_level0_sa.depth, level)); 1983b8e80941Smrg } else { 1984b8e80941Smrg assert(surf->dim == ISL_SURF_DIM_2D); 1985b8e80941Smrg assert(surf->usage & ISL_SURF_USAGE_CUBE_BIT); 1986b8e80941Smrg assert(surf->phys_level0_sa.array_len == 6); 1987b8e80941Smrg assert(logical_z_offset_px < surf->phys_level0_sa.array_len); 1988b8e80941Smrg } 1989b8e80941Smrg 1990b8e80941Smrg const struct isl_extent3d image_align_sa = 1991b8e80941Smrg isl_surf_get_image_alignment_sa(surf); 1992b8e80941Smrg 1993b8e80941Smrg const uint32_t W0 = surf->phys_level0_sa.width; 1994b8e80941Smrg const uint32_t H0 = surf->phys_level0_sa.height; 1995b8e80941Smrg const uint32_t D0 = surf->phys_level0_sa.depth; 1996b8e80941Smrg const uint32_t AL = surf->phys_level0_sa.array_len; 1997b8e80941Smrg 1998b8e80941Smrg uint32_t x = 0; 1999b8e80941Smrg uint32_t y = 0; 2000b8e80941Smrg 2001b8e80941Smrg for (uint32_t l = 0; l < level; ++l) { 2002b8e80941Smrg const uint32_t level_h = isl_align_npot(isl_minify(H0, l), image_align_sa.h); 2003b8e80941Smrg const uint32_t level_d = 2004b8e80941Smrg isl_align_npot(surf->dim == ISL_SURF_DIM_3D ? isl_minify(D0, l) : AL, 2005b8e80941Smrg image_align_sa.d); 2006b8e80941Smrg const uint32_t max_layers_vert = isl_align(level_d, 1u << l) / (1u << l); 2007b8e80941Smrg 2008b8e80941Smrg y += level_h * max_layers_vert; 2009b8e80941Smrg } 2010b8e80941Smrg 2011b8e80941Smrg const uint32_t level_w = isl_align_npot(isl_minify(W0, level), image_align_sa.w); 2012b8e80941Smrg const uint32_t level_h = isl_align_npot(isl_minify(H0, level), image_align_sa.h); 2013b8e80941Smrg const uint32_t level_d = 2014b8e80941Smrg isl_align_npot(surf->dim == ISL_SURF_DIM_3D ? isl_minify(D0, level) : AL, 2015b8e80941Smrg image_align_sa.d); 2016b8e80941Smrg 2017b8e80941Smrg const uint32_t max_layers_horiz = MIN(level_d, 1u << level); 2018b8e80941Smrg 2019b8e80941Smrg x += level_w * (logical_z_offset_px % max_layers_horiz); 2020b8e80941Smrg y += level_h * (logical_z_offset_px / max_layers_horiz); 2021b8e80941Smrg 2022b8e80941Smrg *x_offset_sa = x; 2023b8e80941Smrg *y_offset_sa = y; 2024b8e80941Smrg} 2025b8e80941Smrg 2026b8e80941Smrgstatic void 2027b8e80941Smrgget_image_offset_sa_gen6_stencil_hiz(const struct isl_surf *surf, 2028b8e80941Smrg uint32_t level, 2029b8e80941Smrg uint32_t logical_array_layer, 2030b8e80941Smrg uint32_t *x_offset_sa, 2031b8e80941Smrg uint32_t *y_offset_sa) 2032b8e80941Smrg{ 2033b8e80941Smrg assert(level < surf->levels); 2034b8e80941Smrg assert(surf->logical_level0_px.depth == 1); 2035b8e80941Smrg assert(logical_array_layer < surf->logical_level0_px.array_len); 2036b8e80941Smrg 2037b8e80941Smrg const struct isl_format_layout *fmtl = isl_format_get_layout(surf->format); 2038b8e80941Smrg 2039b8e80941Smrg const struct isl_extent3d image_align_sa = 2040b8e80941Smrg isl_surf_get_image_alignment_sa(surf); 2041b8e80941Smrg 2042b8e80941Smrg struct isl_tile_info tile_info; 2043b8e80941Smrg isl_tiling_get_info(surf->tiling, fmtl->bpb, &tile_info); 2044b8e80941Smrg const struct isl_extent2d tile_extent_sa = { 2045b8e80941Smrg .w = tile_info.logical_extent_el.w * fmtl->bw, 2046b8e80941Smrg .h = tile_info.logical_extent_el.h * fmtl->bh, 2047b8e80941Smrg }; 2048b8e80941Smrg /* Tile size is a multiple of image alignment */ 2049b8e80941Smrg assert(tile_extent_sa.w % image_align_sa.w == 0); 2050b8e80941Smrg assert(tile_extent_sa.h % image_align_sa.h == 0); 2051b8e80941Smrg 2052b8e80941Smrg const uint32_t W0 = surf->phys_level0_sa.w; 2053b8e80941Smrg const uint32_t H0 = surf->phys_level0_sa.h; 2054b8e80941Smrg 2055b8e80941Smrg /* Each image has the same height as LOD0 because the hardware thinks 2056b8e80941Smrg * everything is LOD0 2057b8e80941Smrg */ 2058b8e80941Smrg const uint32_t H = isl_align(H0, image_align_sa.h); 2059b8e80941Smrg 2060b8e80941Smrg /* Quick sanity check for consistency */ 2061b8e80941Smrg if (surf->phys_level0_sa.array_len > 1) 2062b8e80941Smrg assert(surf->array_pitch_el_rows == isl_assert_div(H, fmtl->bh)); 2063b8e80941Smrg 2064b8e80941Smrg uint32_t x = 0, y = 0; 2065b8e80941Smrg for (uint32_t l = 0; l < level; ++l) { 2066b8e80941Smrg const uint32_t W = isl_minify(W0, l); 2067b8e80941Smrg 2068b8e80941Smrg const uint32_t w = isl_align(W, tile_extent_sa.w); 2069b8e80941Smrg const uint32_t h = isl_align(H * surf->phys_level0_sa.a, 2070b8e80941Smrg tile_extent_sa.h); 2071b8e80941Smrg 2072b8e80941Smrg if (l == 0) { 2073b8e80941Smrg y += h; 2074b8e80941Smrg } else { 2075b8e80941Smrg x += w; 2076b8e80941Smrg } 2077b8e80941Smrg } 2078b8e80941Smrg 2079b8e80941Smrg y += H * logical_array_layer; 2080b8e80941Smrg 2081b8e80941Smrg *x_offset_sa = x; 2082b8e80941Smrg *y_offset_sa = y; 2083b8e80941Smrg} 2084b8e80941Smrg 2085b8e80941Smrg/** 2086b8e80941Smrg * A variant of isl_surf_get_image_offset_sa() specific to 2087b8e80941Smrg * ISL_DIM_LAYOUT_GEN9_1D. 2088b8e80941Smrg */ 2089b8e80941Smrgstatic void 2090b8e80941Smrgget_image_offset_sa_gen9_1d(const struct isl_surf *surf, 2091b8e80941Smrg uint32_t level, uint32_t layer, 2092b8e80941Smrg uint32_t *x_offset_sa, 2093b8e80941Smrg uint32_t *y_offset_sa) 2094b8e80941Smrg{ 2095b8e80941Smrg assert(level < surf->levels); 2096b8e80941Smrg assert(layer < surf->phys_level0_sa.array_len); 2097b8e80941Smrg assert(surf->phys_level0_sa.height == 1); 2098b8e80941Smrg assert(surf->phys_level0_sa.depth == 1); 2099b8e80941Smrg assert(surf->samples == 1); 2100b8e80941Smrg 2101b8e80941Smrg const uint32_t W0 = surf->phys_level0_sa.width; 2102b8e80941Smrg const struct isl_extent3d image_align_sa = 2103b8e80941Smrg isl_surf_get_image_alignment_sa(surf); 2104b8e80941Smrg 2105b8e80941Smrg uint32_t x = 0; 2106b8e80941Smrg 2107b8e80941Smrg for (uint32_t l = 0; l < level; ++l) { 2108b8e80941Smrg uint32_t W = isl_minify(W0, l); 2109b8e80941Smrg uint32_t w = isl_align_npot(W, image_align_sa.w); 2110b8e80941Smrg 2111b8e80941Smrg x += w; 2112b8e80941Smrg } 2113b8e80941Smrg 2114b8e80941Smrg *x_offset_sa = x; 2115b8e80941Smrg *y_offset_sa = layer * isl_surf_get_array_pitch_sa_rows(surf); 2116b8e80941Smrg} 2117b8e80941Smrg 2118b8e80941Smrg/** 2119b8e80941Smrg * Calculate the offset, in units of surface samples, to a subimage in the 2120b8e80941Smrg * surface. 2121b8e80941Smrg * 2122b8e80941Smrg * @invariant level < surface levels 2123b8e80941Smrg * @invariant logical_array_layer < logical array length of surface 2124b8e80941Smrg * @invariant logical_z_offset_px < logical depth of surface at level 2125b8e80941Smrg */ 2126b8e80941Smrgvoid 2127b8e80941Smrgisl_surf_get_image_offset_sa(const struct isl_surf *surf, 2128b8e80941Smrg uint32_t level, 2129b8e80941Smrg uint32_t logical_array_layer, 2130b8e80941Smrg uint32_t logical_z_offset_px, 2131b8e80941Smrg uint32_t *x_offset_sa, 2132b8e80941Smrg uint32_t *y_offset_sa) 2133b8e80941Smrg{ 2134b8e80941Smrg assert(level < surf->levels); 2135b8e80941Smrg assert(logical_array_layer < surf->logical_level0_px.array_len); 2136b8e80941Smrg assert(logical_z_offset_px 2137b8e80941Smrg < isl_minify(surf->logical_level0_px.depth, level)); 2138b8e80941Smrg 2139b8e80941Smrg switch (surf->dim_layout) { 2140b8e80941Smrg case ISL_DIM_LAYOUT_GEN9_1D: 2141b8e80941Smrg get_image_offset_sa_gen9_1d(surf, level, logical_array_layer, 2142b8e80941Smrg x_offset_sa, y_offset_sa); 2143b8e80941Smrg break; 2144b8e80941Smrg case ISL_DIM_LAYOUT_GEN4_2D: 2145b8e80941Smrg get_image_offset_sa_gen4_2d(surf, level, logical_array_layer 2146b8e80941Smrg + logical_z_offset_px, 2147b8e80941Smrg x_offset_sa, y_offset_sa); 2148b8e80941Smrg break; 2149b8e80941Smrg case ISL_DIM_LAYOUT_GEN4_3D: 2150b8e80941Smrg get_image_offset_sa_gen4_3d(surf, level, logical_array_layer + 2151b8e80941Smrg logical_z_offset_px, 2152b8e80941Smrg x_offset_sa, y_offset_sa); 2153b8e80941Smrg break; 2154b8e80941Smrg case ISL_DIM_LAYOUT_GEN6_STENCIL_HIZ: 2155b8e80941Smrg get_image_offset_sa_gen6_stencil_hiz(surf, level, logical_array_layer + 2156b8e80941Smrg logical_z_offset_px, 2157b8e80941Smrg x_offset_sa, y_offset_sa); 2158b8e80941Smrg break; 2159b8e80941Smrg 2160b8e80941Smrg default: 2161b8e80941Smrg unreachable("not reached"); 2162b8e80941Smrg } 2163b8e80941Smrg} 2164b8e80941Smrg 2165b8e80941Smrgvoid 2166b8e80941Smrgisl_surf_get_image_offset_el(const struct isl_surf *surf, 2167b8e80941Smrg uint32_t level, 2168b8e80941Smrg uint32_t logical_array_layer, 2169b8e80941Smrg uint32_t logical_z_offset_px, 2170b8e80941Smrg uint32_t *x_offset_el, 2171b8e80941Smrg uint32_t *y_offset_el) 2172b8e80941Smrg{ 2173b8e80941Smrg const struct isl_format_layout *fmtl = isl_format_get_layout(surf->format); 2174b8e80941Smrg 2175b8e80941Smrg assert(level < surf->levels); 2176b8e80941Smrg assert(logical_array_layer < surf->logical_level0_px.array_len); 2177b8e80941Smrg assert(logical_z_offset_px 2178b8e80941Smrg < isl_minify(surf->logical_level0_px.depth, level)); 2179b8e80941Smrg 2180b8e80941Smrg uint32_t x_offset_sa, y_offset_sa; 2181b8e80941Smrg isl_surf_get_image_offset_sa(surf, level, 2182b8e80941Smrg logical_array_layer, 2183b8e80941Smrg logical_z_offset_px, 2184b8e80941Smrg &x_offset_sa, 2185b8e80941Smrg &y_offset_sa); 2186b8e80941Smrg 2187b8e80941Smrg *x_offset_el = x_offset_sa / fmtl->bw; 2188b8e80941Smrg *y_offset_el = y_offset_sa / fmtl->bh; 2189b8e80941Smrg} 2190b8e80941Smrg 2191b8e80941Smrgvoid 2192b8e80941Smrgisl_surf_get_image_offset_B_tile_sa(const struct isl_surf *surf, 2193b8e80941Smrg uint32_t level, 2194b8e80941Smrg uint32_t logical_array_layer, 2195b8e80941Smrg uint32_t logical_z_offset_px, 2196b8e80941Smrg uint32_t *offset_B, 2197b8e80941Smrg uint32_t *x_offset_sa, 2198b8e80941Smrg uint32_t *y_offset_sa) 2199b8e80941Smrg{ 2200b8e80941Smrg const struct isl_format_layout *fmtl = isl_format_get_layout(surf->format); 2201b8e80941Smrg 2202b8e80941Smrg uint32_t total_x_offset_el, total_y_offset_el; 2203b8e80941Smrg isl_surf_get_image_offset_el(surf, level, logical_array_layer, 2204b8e80941Smrg logical_z_offset_px, 2205b8e80941Smrg &total_x_offset_el, 2206b8e80941Smrg &total_y_offset_el); 2207b8e80941Smrg 2208b8e80941Smrg uint32_t x_offset_el, y_offset_el; 2209b8e80941Smrg isl_tiling_get_intratile_offset_el(surf->tiling, fmtl->bpb, 2210b8e80941Smrg surf->row_pitch_B, 2211b8e80941Smrg total_x_offset_el, 2212b8e80941Smrg total_y_offset_el, 2213b8e80941Smrg offset_B, 2214b8e80941Smrg &x_offset_el, 2215b8e80941Smrg &y_offset_el); 2216b8e80941Smrg 2217b8e80941Smrg if (x_offset_sa) { 2218b8e80941Smrg *x_offset_sa = x_offset_el * fmtl->bw; 2219b8e80941Smrg } else { 2220b8e80941Smrg assert(x_offset_el == 0); 2221b8e80941Smrg } 2222b8e80941Smrg 2223b8e80941Smrg if (y_offset_sa) { 2224b8e80941Smrg *y_offset_sa = y_offset_el * fmtl->bh; 2225b8e80941Smrg } else { 2226b8e80941Smrg assert(y_offset_el == 0); 2227b8e80941Smrg } 2228b8e80941Smrg} 2229b8e80941Smrg 2230b8e80941Smrgvoid 2231b8e80941Smrgisl_surf_get_image_surf(const struct isl_device *dev, 2232b8e80941Smrg const struct isl_surf *surf, 2233b8e80941Smrg uint32_t level, 2234b8e80941Smrg uint32_t logical_array_layer, 2235b8e80941Smrg uint32_t logical_z_offset_px, 2236b8e80941Smrg struct isl_surf *image_surf, 2237b8e80941Smrg uint32_t *offset_B, 2238b8e80941Smrg uint32_t *x_offset_sa, 2239b8e80941Smrg uint32_t *y_offset_sa) 2240b8e80941Smrg{ 2241b8e80941Smrg isl_surf_get_image_offset_B_tile_sa(surf, 2242b8e80941Smrg level, 2243b8e80941Smrg logical_array_layer, 2244b8e80941Smrg logical_z_offset_px, 2245b8e80941Smrg offset_B, 2246b8e80941Smrg x_offset_sa, 2247b8e80941Smrg y_offset_sa); 2248b8e80941Smrg 2249b8e80941Smrg /* Even for cube maps there will be only single face, therefore drop the 2250b8e80941Smrg * corresponding flag if present. 2251b8e80941Smrg */ 2252b8e80941Smrg const isl_surf_usage_flags_t usage = 2253b8e80941Smrg surf->usage & (~ISL_SURF_USAGE_CUBE_BIT); 2254b8e80941Smrg 2255b8e80941Smrg bool ok UNUSED; 2256b8e80941Smrg ok = isl_surf_init(dev, image_surf, 2257b8e80941Smrg .dim = ISL_SURF_DIM_2D, 2258b8e80941Smrg .format = surf->format, 2259b8e80941Smrg .width = isl_minify(surf->logical_level0_px.w, level), 2260b8e80941Smrg .height = isl_minify(surf->logical_level0_px.h, level), 2261b8e80941Smrg .depth = 1, 2262b8e80941Smrg .levels = 1, 2263b8e80941Smrg .array_len = 1, 2264b8e80941Smrg .samples = surf->samples, 2265b8e80941Smrg .row_pitch_B = surf->row_pitch_B, 2266b8e80941Smrg .usage = usage, 2267b8e80941Smrg .tiling_flags = (1 << surf->tiling)); 2268b8e80941Smrg assert(ok); 2269b8e80941Smrg} 2270b8e80941Smrg 2271b8e80941Smrgvoid 2272b8e80941Smrgisl_tiling_get_intratile_offset_el(enum isl_tiling tiling, 2273b8e80941Smrg uint32_t bpb, 2274b8e80941Smrg uint32_t row_pitch_B, 2275b8e80941Smrg uint32_t total_x_offset_el, 2276b8e80941Smrg uint32_t total_y_offset_el, 2277b8e80941Smrg uint32_t *base_address_offset, 2278b8e80941Smrg uint32_t *x_offset_el, 2279b8e80941Smrg uint32_t *y_offset_el) 2280b8e80941Smrg{ 2281b8e80941Smrg if (tiling == ISL_TILING_LINEAR) { 2282b8e80941Smrg assert(bpb % 8 == 0); 2283b8e80941Smrg *base_address_offset = total_y_offset_el * row_pitch_B + 2284b8e80941Smrg total_x_offset_el * (bpb / 8); 2285b8e80941Smrg *x_offset_el = 0; 2286b8e80941Smrg *y_offset_el = 0; 2287b8e80941Smrg return; 2288b8e80941Smrg } 2289b8e80941Smrg 2290b8e80941Smrg struct isl_tile_info tile_info; 2291b8e80941Smrg isl_tiling_get_info(tiling, bpb, &tile_info); 2292b8e80941Smrg 2293b8e80941Smrg assert(row_pitch_B % tile_info.phys_extent_B.width == 0); 2294b8e80941Smrg 2295b8e80941Smrg /* For non-power-of-two formats, we need the address to be both tile and 2296b8e80941Smrg * element-aligned. The easiest way to achieve this is to work with a tile 2297b8e80941Smrg * that is three times as wide as the regular tile. 2298b8e80941Smrg * 2299b8e80941Smrg * The tile info returned by get_tile_info has a logical size that is an 2300b8e80941Smrg * integer number of tile_info.format_bpb size elements. To scale the 2301b8e80941Smrg * tile, we scale up the physical width and then treat the logical tile 2302b8e80941Smrg * size as if it has bpb size elements. 2303b8e80941Smrg */ 2304b8e80941Smrg const uint32_t tile_el_scale = bpb / tile_info.format_bpb; 2305b8e80941Smrg tile_info.phys_extent_B.width *= tile_el_scale; 2306b8e80941Smrg 2307b8e80941Smrg /* Compute the offset into the tile */ 2308b8e80941Smrg *x_offset_el = total_x_offset_el % tile_info.logical_extent_el.w; 2309b8e80941Smrg *y_offset_el = total_y_offset_el % tile_info.logical_extent_el.h; 2310b8e80941Smrg 2311b8e80941Smrg /* Compute the offset of the tile in units of whole tiles */ 2312b8e80941Smrg uint32_t x_offset_tl = total_x_offset_el / tile_info.logical_extent_el.w; 2313b8e80941Smrg uint32_t y_offset_tl = total_y_offset_el / tile_info.logical_extent_el.h; 2314b8e80941Smrg 2315b8e80941Smrg *base_address_offset = 2316b8e80941Smrg y_offset_tl * tile_info.phys_extent_B.h * row_pitch_B + 2317b8e80941Smrg x_offset_tl * tile_info.phys_extent_B.h * tile_info.phys_extent_B.w; 2318b8e80941Smrg} 2319b8e80941Smrg 2320b8e80941Smrguint32_t 2321b8e80941Smrgisl_surf_get_depth_format(const struct isl_device *dev, 2322b8e80941Smrg const struct isl_surf *surf) 2323b8e80941Smrg{ 2324b8e80941Smrg /* Support for separate stencil buffers began in gen5. Support for 2325b8e80941Smrg * interleaved depthstencil buffers ceased in gen7. The intermediate gens, 2326b8e80941Smrg * those that supported separate and interleaved stencil, were gen5 and 2327b8e80941Smrg * gen6. 2328b8e80941Smrg * 2329b8e80941Smrg * For a list of all available formats, see the Sandybridge PRM >> Volume 2330b8e80941Smrg * 2 Part 1: 3D/Media - 3D Pipeline >> 3DSTATE_DEPTH_BUFFER >> Surface 2331b8e80941Smrg * Format (p321). 2332b8e80941Smrg */ 2333b8e80941Smrg 2334b8e80941Smrg bool has_stencil = surf->usage & ISL_SURF_USAGE_STENCIL_BIT; 2335b8e80941Smrg 2336b8e80941Smrg assert(surf->usage & ISL_SURF_USAGE_DEPTH_BIT); 2337b8e80941Smrg 2338b8e80941Smrg if (has_stencil) 2339b8e80941Smrg assert(ISL_DEV_GEN(dev) < 7); 2340b8e80941Smrg 2341b8e80941Smrg switch (surf->format) { 2342b8e80941Smrg default: 2343b8e80941Smrg unreachable("bad isl depth format"); 2344b8e80941Smrg case ISL_FORMAT_R32_FLOAT_X8X24_TYPELESS: 2345b8e80941Smrg assert(ISL_DEV_GEN(dev) < 7); 2346b8e80941Smrg return 0; /* D32_FLOAT_S8X24_UINT */ 2347b8e80941Smrg case ISL_FORMAT_R32_FLOAT: 2348b8e80941Smrg assert(!has_stencil); 2349b8e80941Smrg return 1; /* D32_FLOAT */ 2350b8e80941Smrg case ISL_FORMAT_R24_UNORM_X8_TYPELESS: 2351b8e80941Smrg if (has_stencil) { 2352b8e80941Smrg assert(ISL_DEV_GEN(dev) < 7); 2353b8e80941Smrg return 2; /* D24_UNORM_S8_UINT */ 2354b8e80941Smrg } else { 2355b8e80941Smrg assert(ISL_DEV_GEN(dev) >= 5); 2356b8e80941Smrg return 3; /* D24_UNORM_X8_UINT */ 2357b8e80941Smrg } 2358b8e80941Smrg case ISL_FORMAT_R16_UNORM: 2359b8e80941Smrg assert(!has_stencil); 2360b8e80941Smrg return 5; /* D16_UNORM */ 2361b8e80941Smrg } 2362b8e80941Smrg} 2363b8e80941Smrg 2364b8e80941Smrgbool 2365b8e80941Smrgisl_swizzle_supports_rendering(const struct gen_device_info *devinfo, 2366b8e80941Smrg struct isl_swizzle swizzle) 2367b8e80941Smrg{ 2368b8e80941Smrg if (devinfo->is_haswell) { 2369b8e80941Smrg /* From the Haswell PRM, 2370b8e80941Smrg * RENDER_SURFACE_STATE::Shader Channel Select Red 2371b8e80941Smrg * 2372b8e80941Smrg * "The Shader channel selects also define which shader channels are 2373b8e80941Smrg * written to which surface channel. If the Shader channel select is 2374b8e80941Smrg * SCS_ZERO or SCS_ONE then it is not written to the surface. If the 2375b8e80941Smrg * shader channel select is SCS_RED it is written to the surface red 2376b8e80941Smrg * channel and so on. If more than one shader channel select is set 2377b8e80941Smrg * to the same surface channel only the first shader channel in RGBA 2378b8e80941Smrg * order will be written." 2379b8e80941Smrg */ 2380b8e80941Smrg return true; 2381b8e80941Smrg } else if (devinfo->gen <= 7) { 2382b8e80941Smrg /* Ivy Bridge and early doesn't have any swizzling */ 2383b8e80941Smrg return isl_swizzle_is_identity(swizzle); 2384b8e80941Smrg } else { 2385b8e80941Smrg /* From the Sky Lake PRM Vol. 2d, 2386b8e80941Smrg * RENDER_SURFACE_STATE::Shader Channel Select Red 2387b8e80941Smrg * 2388b8e80941Smrg * "For Render Target, Red, Green and Blue Shader Channel Selects 2389b8e80941Smrg * MUST be such that only valid components can be swapped i.e. only 2390b8e80941Smrg * change the order of components in the pixel. Any other values for 2391b8e80941Smrg * these Shader Channel Select fields are not valid for Render 2392b8e80941Smrg * Targets. This also means that there MUST not be multiple shader 2393b8e80941Smrg * channels mapped to the same RT channel." 2394b8e80941Smrg * 2395b8e80941Smrg * From the Sky Lake PRM Vol. 2d, 2396b8e80941Smrg * RENDER_SURFACE_STATE::Shader Channel Select Alpha 2397b8e80941Smrg * 2398b8e80941Smrg * "For Render Target, this field MUST be programmed to 2399b8e80941Smrg * value = SCS_ALPHA." 2400b8e80941Smrg */ 2401b8e80941Smrg return (swizzle.r == ISL_CHANNEL_SELECT_RED || 2402b8e80941Smrg swizzle.r == ISL_CHANNEL_SELECT_GREEN || 2403b8e80941Smrg swizzle.r == ISL_CHANNEL_SELECT_BLUE) && 2404b8e80941Smrg (swizzle.g == ISL_CHANNEL_SELECT_RED || 2405b8e80941Smrg swizzle.g == ISL_CHANNEL_SELECT_GREEN || 2406b8e80941Smrg swizzle.g == ISL_CHANNEL_SELECT_BLUE) && 2407b8e80941Smrg (swizzle.b == ISL_CHANNEL_SELECT_RED || 2408b8e80941Smrg swizzle.b == ISL_CHANNEL_SELECT_GREEN || 2409b8e80941Smrg swizzle.b == ISL_CHANNEL_SELECT_BLUE) && 2410b8e80941Smrg swizzle.r != swizzle.g && 2411b8e80941Smrg swizzle.r != swizzle.b && 2412b8e80941Smrg swizzle.g != swizzle.b && 2413b8e80941Smrg swizzle.a == ISL_CHANNEL_SELECT_ALPHA; 2414b8e80941Smrg } 2415b8e80941Smrg} 2416b8e80941Smrg 2417b8e80941Smrgstatic enum isl_channel_select 2418b8e80941Smrgswizzle_select(enum isl_channel_select chan, struct isl_swizzle swizzle) 2419b8e80941Smrg{ 2420b8e80941Smrg switch (chan) { 2421b8e80941Smrg case ISL_CHANNEL_SELECT_ZERO: 2422b8e80941Smrg case ISL_CHANNEL_SELECT_ONE: 2423b8e80941Smrg return chan; 2424b8e80941Smrg case ISL_CHANNEL_SELECT_RED: 2425b8e80941Smrg return swizzle.r; 2426b8e80941Smrg case ISL_CHANNEL_SELECT_GREEN: 2427b8e80941Smrg return swizzle.g; 2428b8e80941Smrg case ISL_CHANNEL_SELECT_BLUE: 2429b8e80941Smrg return swizzle.b; 2430b8e80941Smrg case ISL_CHANNEL_SELECT_ALPHA: 2431b8e80941Smrg return swizzle.a; 2432b8e80941Smrg default: 2433b8e80941Smrg unreachable("Invalid swizzle component"); 2434b8e80941Smrg } 2435b8e80941Smrg} 2436b8e80941Smrg 2437b8e80941Smrg/** 2438b8e80941Smrg * Returns the single swizzle that is equivalent to applying the two given 2439b8e80941Smrg * swizzles in sequence. 2440b8e80941Smrg */ 2441b8e80941Smrgstruct isl_swizzle 2442b8e80941Smrgisl_swizzle_compose(struct isl_swizzle first, struct isl_swizzle second) 2443b8e80941Smrg{ 2444b8e80941Smrg return (struct isl_swizzle) { 2445b8e80941Smrg .r = swizzle_select(first.r, second), 2446b8e80941Smrg .g = swizzle_select(first.g, second), 2447b8e80941Smrg .b = swizzle_select(first.b, second), 2448b8e80941Smrg .a = swizzle_select(first.a, second), 2449b8e80941Smrg }; 2450b8e80941Smrg} 2451b8e80941Smrg 2452b8e80941Smrg/** 2453b8e80941Smrg * Returns a swizzle that is the pseudo-inverse of this swizzle. 2454b8e80941Smrg */ 2455b8e80941Smrgstruct isl_swizzle 2456b8e80941Smrgisl_swizzle_invert(struct isl_swizzle swizzle) 2457b8e80941Smrg{ 2458b8e80941Smrg /* Default to zero for channels which do not show up in the swizzle */ 2459b8e80941Smrg enum isl_channel_select chans[4] = { 2460b8e80941Smrg ISL_CHANNEL_SELECT_ZERO, 2461b8e80941Smrg ISL_CHANNEL_SELECT_ZERO, 2462b8e80941Smrg ISL_CHANNEL_SELECT_ZERO, 2463b8e80941Smrg ISL_CHANNEL_SELECT_ZERO, 2464b8e80941Smrg }; 2465b8e80941Smrg 2466b8e80941Smrg /* We go in ABGR order so that, if there are any duplicates, the first one 2467b8e80941Smrg * is taken if you look at it in RGBA order. This is what Haswell hardware 2468b8e80941Smrg * does for render target swizzles. 2469b8e80941Smrg */ 2470b8e80941Smrg if ((unsigned)(swizzle.a - ISL_CHANNEL_SELECT_RED) < 4) 2471b8e80941Smrg chans[swizzle.a - ISL_CHANNEL_SELECT_RED] = ISL_CHANNEL_SELECT_ALPHA; 2472b8e80941Smrg if ((unsigned)(swizzle.b - ISL_CHANNEL_SELECT_RED) < 4) 2473b8e80941Smrg chans[swizzle.b - ISL_CHANNEL_SELECT_RED] = ISL_CHANNEL_SELECT_BLUE; 2474b8e80941Smrg if ((unsigned)(swizzle.g - ISL_CHANNEL_SELECT_RED) < 4) 2475b8e80941Smrg chans[swizzle.g - ISL_CHANNEL_SELECT_RED] = ISL_CHANNEL_SELECT_GREEN; 2476b8e80941Smrg if ((unsigned)(swizzle.r - ISL_CHANNEL_SELECT_RED) < 4) 2477b8e80941Smrg chans[swizzle.r - ISL_CHANNEL_SELECT_RED] = ISL_CHANNEL_SELECT_RED; 2478b8e80941Smrg 2479b8e80941Smrg return (struct isl_swizzle) { chans[0], chans[1], chans[2], chans[3] }; 2480b8e80941Smrg} 2481