isl.c revision 01e04c3f
101e04c3fSmrg/* 201e04c3fSmrg * Copyright 2015 Intel Corporation 301e04c3fSmrg * 401e04c3fSmrg * Permission is hereby granted, free of charge, to any person obtaining a 501e04c3fSmrg * copy of this software and associated documentation files (the "Software"), 601e04c3fSmrg * to deal in the Software without restriction, including without limitation 701e04c3fSmrg * the rights to use, copy, modify, merge, publish, distribute, sublicense, 801e04c3fSmrg * and/or sell copies of the Software, and to permit persons to whom the 901e04c3fSmrg * Software is furnished to do so, subject to the following conditions: 1001e04c3fSmrg * 1101e04c3fSmrg * The above copyright notice and this permission notice (including the next 1201e04c3fSmrg * paragraph) shall be included in all copies or substantial portions of the 1301e04c3fSmrg * Software. 1401e04c3fSmrg * 1501e04c3fSmrg * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR 1601e04c3fSmrg * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, 1701e04c3fSmrg * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL 1801e04c3fSmrg * THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER 1901e04c3fSmrg * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING 2001e04c3fSmrg * FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS 2101e04c3fSmrg * IN THE SOFTWARE. 2201e04c3fSmrg */ 2301e04c3fSmrg 2401e04c3fSmrg#include <assert.h> 2501e04c3fSmrg#include <stdarg.h> 2601e04c3fSmrg#include <stdio.h> 2701e04c3fSmrg 2801e04c3fSmrg#include "genxml/genX_bits.h" 2901e04c3fSmrg 3001e04c3fSmrg#include "isl.h" 3101e04c3fSmrg#include "isl_gen4.h" 3201e04c3fSmrg#include "isl_gen6.h" 3301e04c3fSmrg#include "isl_gen7.h" 3401e04c3fSmrg#include "isl_gen8.h" 3501e04c3fSmrg#include "isl_gen9.h" 3601e04c3fSmrg#include "isl_priv.h" 3701e04c3fSmrg 3801e04c3fSmrgvoid PRINTFLIKE(3, 4) UNUSED 3901e04c3fSmrg__isl_finishme(const char *file, int line, const char *fmt, ...) 4001e04c3fSmrg{ 4101e04c3fSmrg va_list ap; 4201e04c3fSmrg char buf[512]; 4301e04c3fSmrg 4401e04c3fSmrg va_start(ap, fmt); 4501e04c3fSmrg vsnprintf(buf, sizeof(buf), fmt, ap); 4601e04c3fSmrg va_end(ap); 4701e04c3fSmrg 4801e04c3fSmrg fprintf(stderr, "%s:%d: FINISHME: %s\n", file, line, buf); 4901e04c3fSmrg} 5001e04c3fSmrg 5101e04c3fSmrgvoid 5201e04c3fSmrgisl_device_init(struct isl_device *dev, 5301e04c3fSmrg const struct gen_device_info *info, 5401e04c3fSmrg bool has_bit6_swizzling) 5501e04c3fSmrg{ 5601e04c3fSmrg dev->info = info; 5701e04c3fSmrg dev->use_separate_stencil = ISL_DEV_GEN(dev) >= 6; 5801e04c3fSmrg dev->has_bit6_swizzling = has_bit6_swizzling; 5901e04c3fSmrg 6001e04c3fSmrg /* The ISL_DEV macros may be defined in the CFLAGS, thus hardcoding some 6101e04c3fSmrg * device properties at buildtime. Verify that the macros with the device 6201e04c3fSmrg * properties chosen during runtime. 6301e04c3fSmrg */ 6401e04c3fSmrg ISL_DEV_GEN_SANITIZE(dev); 6501e04c3fSmrg ISL_DEV_USE_SEPARATE_STENCIL_SANITIZE(dev); 6601e04c3fSmrg 6701e04c3fSmrg /* Did we break hiz or stencil? */ 6801e04c3fSmrg if (ISL_DEV_USE_SEPARATE_STENCIL(dev)) 6901e04c3fSmrg assert(info->has_hiz_and_separate_stencil); 7001e04c3fSmrg if (info->must_use_separate_stencil) 7101e04c3fSmrg assert(ISL_DEV_USE_SEPARATE_STENCIL(dev)); 7201e04c3fSmrg 7301e04c3fSmrg dev->ss.size = RENDER_SURFACE_STATE_length(info) * 4; 7401e04c3fSmrg dev->ss.align = isl_align(dev->ss.size, 32); 7501e04c3fSmrg 7601e04c3fSmrg dev->ss.clear_color_state_size = CLEAR_COLOR_length(info) * 4; 7701e04c3fSmrg dev->ss.clear_color_state_offset = 7801e04c3fSmrg RENDER_SURFACE_STATE_ClearValueAddress_start(info) / 32 * 4; 7901e04c3fSmrg 8001e04c3fSmrg dev->ss.clear_value_size = 8101e04c3fSmrg isl_align(RENDER_SURFACE_STATE_RedClearColor_bits(info) + 8201e04c3fSmrg RENDER_SURFACE_STATE_GreenClearColor_bits(info) + 8301e04c3fSmrg RENDER_SURFACE_STATE_BlueClearColor_bits(info) + 8401e04c3fSmrg RENDER_SURFACE_STATE_AlphaClearColor_bits(info), 32) / 8; 8501e04c3fSmrg 8601e04c3fSmrg dev->ss.clear_value_offset = 8701e04c3fSmrg RENDER_SURFACE_STATE_RedClearColor_start(info) / 32 * 4; 8801e04c3fSmrg 8901e04c3fSmrg assert(RENDER_SURFACE_STATE_SurfaceBaseAddress_start(info) % 8 == 0); 9001e04c3fSmrg dev->ss.addr_offset = 9101e04c3fSmrg RENDER_SURFACE_STATE_SurfaceBaseAddress_start(info) / 8; 9201e04c3fSmrg 9301e04c3fSmrg /* The "Auxiliary Surface Base Address" field starts a bit higher up 9401e04c3fSmrg * because the bottom 12 bits are used for other things. Round down to 9501e04c3fSmrg * the nearest dword before. 9601e04c3fSmrg */ 9701e04c3fSmrg dev->ss.aux_addr_offset = 9801e04c3fSmrg (RENDER_SURFACE_STATE_AuxiliarySurfaceBaseAddress_start(info) & ~31) / 8; 9901e04c3fSmrg 10001e04c3fSmrg dev->ds.size = _3DSTATE_DEPTH_BUFFER_length(info) * 4; 10101e04c3fSmrg assert(_3DSTATE_DEPTH_BUFFER_SurfaceBaseAddress_start(info) % 8 == 0); 10201e04c3fSmrg dev->ds.depth_offset = 10301e04c3fSmrg _3DSTATE_DEPTH_BUFFER_SurfaceBaseAddress_start(info) / 8; 10401e04c3fSmrg 10501e04c3fSmrg if (dev->use_separate_stencil) { 10601e04c3fSmrg dev->ds.size += _3DSTATE_STENCIL_BUFFER_length(info) * 4 + 10701e04c3fSmrg _3DSTATE_HIER_DEPTH_BUFFER_length(info) * 4 + 10801e04c3fSmrg _3DSTATE_CLEAR_PARAMS_length(info) * 4; 10901e04c3fSmrg 11001e04c3fSmrg assert(_3DSTATE_STENCIL_BUFFER_SurfaceBaseAddress_start(info) % 8 == 0); 11101e04c3fSmrg dev->ds.stencil_offset = 11201e04c3fSmrg _3DSTATE_DEPTH_BUFFER_length(info) * 4 + 11301e04c3fSmrg _3DSTATE_STENCIL_BUFFER_SurfaceBaseAddress_start(info) / 8; 11401e04c3fSmrg 11501e04c3fSmrg assert(_3DSTATE_HIER_DEPTH_BUFFER_SurfaceBaseAddress_start(info) % 8 == 0); 11601e04c3fSmrg dev->ds.hiz_offset = 11701e04c3fSmrg _3DSTATE_DEPTH_BUFFER_length(info) * 4 + 11801e04c3fSmrg _3DSTATE_STENCIL_BUFFER_length(info) * 4 + 11901e04c3fSmrg _3DSTATE_HIER_DEPTH_BUFFER_SurfaceBaseAddress_start(info) / 8; 12001e04c3fSmrg } else { 12101e04c3fSmrg dev->ds.stencil_offset = 0; 12201e04c3fSmrg dev->ds.hiz_offset = 0; 12301e04c3fSmrg } 12401e04c3fSmrg} 12501e04c3fSmrg 12601e04c3fSmrg/** 12701e04c3fSmrg * @brief Query the set of multisamples supported by the device. 12801e04c3fSmrg * 12901e04c3fSmrg * This function always returns non-zero, as ISL_SAMPLE_COUNT_1_BIT is always 13001e04c3fSmrg * supported. 13101e04c3fSmrg */ 13201e04c3fSmrgisl_sample_count_mask_t ATTRIBUTE_CONST 13301e04c3fSmrgisl_device_get_sample_counts(struct isl_device *dev) 13401e04c3fSmrg{ 13501e04c3fSmrg if (ISL_DEV_GEN(dev) >= 9) { 13601e04c3fSmrg return ISL_SAMPLE_COUNT_1_BIT | 13701e04c3fSmrg ISL_SAMPLE_COUNT_2_BIT | 13801e04c3fSmrg ISL_SAMPLE_COUNT_4_BIT | 13901e04c3fSmrg ISL_SAMPLE_COUNT_8_BIT | 14001e04c3fSmrg ISL_SAMPLE_COUNT_16_BIT; 14101e04c3fSmrg } else if (ISL_DEV_GEN(dev) >= 8) { 14201e04c3fSmrg return ISL_SAMPLE_COUNT_1_BIT | 14301e04c3fSmrg ISL_SAMPLE_COUNT_2_BIT | 14401e04c3fSmrg ISL_SAMPLE_COUNT_4_BIT | 14501e04c3fSmrg ISL_SAMPLE_COUNT_8_BIT; 14601e04c3fSmrg } else if (ISL_DEV_GEN(dev) >= 7) { 14701e04c3fSmrg return ISL_SAMPLE_COUNT_1_BIT | 14801e04c3fSmrg ISL_SAMPLE_COUNT_4_BIT | 14901e04c3fSmrg ISL_SAMPLE_COUNT_8_BIT; 15001e04c3fSmrg } else if (ISL_DEV_GEN(dev) >= 6) { 15101e04c3fSmrg return ISL_SAMPLE_COUNT_1_BIT | 15201e04c3fSmrg ISL_SAMPLE_COUNT_4_BIT; 15301e04c3fSmrg } else { 15401e04c3fSmrg return ISL_SAMPLE_COUNT_1_BIT; 15501e04c3fSmrg } 15601e04c3fSmrg} 15701e04c3fSmrg 15801e04c3fSmrg/** 15901e04c3fSmrg * @param[out] info is written only on success 16001e04c3fSmrg */ 16101e04c3fSmrgstatic void 16201e04c3fSmrgisl_tiling_get_info(enum isl_tiling tiling, 16301e04c3fSmrg uint32_t format_bpb, 16401e04c3fSmrg struct isl_tile_info *tile_info) 16501e04c3fSmrg{ 16601e04c3fSmrg const uint32_t bs = format_bpb / 8; 16701e04c3fSmrg struct isl_extent2d logical_el, phys_B; 16801e04c3fSmrg 16901e04c3fSmrg if (tiling != ISL_TILING_LINEAR && !isl_is_pow2(format_bpb)) { 17001e04c3fSmrg /* It is possible to have non-power-of-two formats in a tiled buffer. 17101e04c3fSmrg * The easiest way to handle this is to treat the tile as if it is three 17201e04c3fSmrg * times as wide. This way no pixel will ever cross a tile boundary. 17301e04c3fSmrg * This really only works on legacy X and Y tiling formats. 17401e04c3fSmrg */ 17501e04c3fSmrg assert(tiling == ISL_TILING_X || tiling == ISL_TILING_Y0); 17601e04c3fSmrg assert(bs % 3 == 0 && isl_is_pow2(format_bpb / 3)); 17701e04c3fSmrg isl_tiling_get_info(tiling, format_bpb / 3, tile_info); 17801e04c3fSmrg return; 17901e04c3fSmrg } 18001e04c3fSmrg 18101e04c3fSmrg switch (tiling) { 18201e04c3fSmrg case ISL_TILING_LINEAR: 18301e04c3fSmrg assert(bs > 0); 18401e04c3fSmrg logical_el = isl_extent2d(1, 1); 18501e04c3fSmrg phys_B = isl_extent2d(bs, 1); 18601e04c3fSmrg break; 18701e04c3fSmrg 18801e04c3fSmrg case ISL_TILING_X: 18901e04c3fSmrg assert(bs > 0); 19001e04c3fSmrg logical_el = isl_extent2d(512 / bs, 8); 19101e04c3fSmrg phys_B = isl_extent2d(512, 8); 19201e04c3fSmrg break; 19301e04c3fSmrg 19401e04c3fSmrg case ISL_TILING_Y0: 19501e04c3fSmrg assert(bs > 0); 19601e04c3fSmrg logical_el = isl_extent2d(128 / bs, 32); 19701e04c3fSmrg phys_B = isl_extent2d(128, 32); 19801e04c3fSmrg break; 19901e04c3fSmrg 20001e04c3fSmrg case ISL_TILING_W: 20101e04c3fSmrg assert(bs == 1); 20201e04c3fSmrg logical_el = isl_extent2d(64, 64); 20301e04c3fSmrg /* From the Broadwell PRM Vol 2d, RENDER_SURFACE_STATE::SurfacePitch: 20401e04c3fSmrg * 20501e04c3fSmrg * "If the surface is a stencil buffer (and thus has Tile Mode set 20601e04c3fSmrg * to TILEMODE_WMAJOR), the pitch must be set to 2x the value 20701e04c3fSmrg * computed based on width, as the stencil buffer is stored with two 20801e04c3fSmrg * rows interleaved." 20901e04c3fSmrg * 21001e04c3fSmrg * This, together with the fact that stencil buffers are referred to as 21101e04c3fSmrg * being Y-tiled in the PRMs for older hardware implies that the 21201e04c3fSmrg * physical size of a W-tile is actually the same as for a Y-tile. 21301e04c3fSmrg */ 21401e04c3fSmrg phys_B = isl_extent2d(128, 32); 21501e04c3fSmrg break; 21601e04c3fSmrg 21701e04c3fSmrg case ISL_TILING_Yf: 21801e04c3fSmrg case ISL_TILING_Ys: { 21901e04c3fSmrg bool is_Ys = tiling == ISL_TILING_Ys; 22001e04c3fSmrg 22101e04c3fSmrg assert(bs > 0); 22201e04c3fSmrg unsigned width = 1 << (6 + (ffs(bs) / 2) + (2 * is_Ys)); 22301e04c3fSmrg unsigned height = 1 << (6 - (ffs(bs) / 2) + (2 * is_Ys)); 22401e04c3fSmrg 22501e04c3fSmrg logical_el = isl_extent2d(width / bs, height); 22601e04c3fSmrg phys_B = isl_extent2d(width, height); 22701e04c3fSmrg break; 22801e04c3fSmrg } 22901e04c3fSmrg 23001e04c3fSmrg case ISL_TILING_HIZ: 23101e04c3fSmrg /* HiZ buffers are required to have ISL_FORMAT_HIZ which is an 8x4 23201e04c3fSmrg * 128bpb format. The tiling has the same physical dimensions as 23301e04c3fSmrg * Y-tiling but actually has two HiZ columns per Y-tiled column. 23401e04c3fSmrg */ 23501e04c3fSmrg assert(bs == 16); 23601e04c3fSmrg logical_el = isl_extent2d(16, 16); 23701e04c3fSmrg phys_B = isl_extent2d(128, 32); 23801e04c3fSmrg break; 23901e04c3fSmrg 24001e04c3fSmrg case ISL_TILING_CCS: 24101e04c3fSmrg /* CCS surfaces are required to have one of the GENX_CCS_* formats which 24201e04c3fSmrg * have a block size of 1 or 2 bits per block and each CCS element 24301e04c3fSmrg * corresponds to one cache-line pair in the main surface. From the Sky 24401e04c3fSmrg * Lake PRM Vol. 12 in the section on planes: 24501e04c3fSmrg * 24601e04c3fSmrg * "The Color Control Surface (CCS) contains the compression status 24701e04c3fSmrg * of the cache-line pairs. The compression state of the cache-line 24801e04c3fSmrg * pair is specified by 2 bits in the CCS. Each CCS cache-line 24901e04c3fSmrg * represents an area on the main surface of 16x16 sets of 128 byte 25001e04c3fSmrg * Y-tiled cache-line-pairs. CCS is always Y tiled." 25101e04c3fSmrg * 25201e04c3fSmrg * The CCS being Y-tiled implies that it's an 8x8 grid of cache-lines. 25301e04c3fSmrg * Since each cache line corresponds to a 16x16 set of cache-line pairs, 25401e04c3fSmrg * that yields total tile area of 128x128 cache-line pairs or CCS 25501e04c3fSmrg * elements. On older hardware, each CCS element is 1 bit and the tile 25601e04c3fSmrg * is 128x256 elements. 25701e04c3fSmrg */ 25801e04c3fSmrg assert(format_bpb == 1 || format_bpb == 2); 25901e04c3fSmrg logical_el = isl_extent2d(128, 256 / format_bpb); 26001e04c3fSmrg phys_B = isl_extent2d(128, 32); 26101e04c3fSmrg break; 26201e04c3fSmrg 26301e04c3fSmrg default: 26401e04c3fSmrg unreachable("not reached"); 26501e04c3fSmrg } /* end switch */ 26601e04c3fSmrg 26701e04c3fSmrg *tile_info = (struct isl_tile_info) { 26801e04c3fSmrg .tiling = tiling, 26901e04c3fSmrg .format_bpb = format_bpb, 27001e04c3fSmrg .logical_extent_el = logical_el, 27101e04c3fSmrg .phys_extent_B = phys_B, 27201e04c3fSmrg }; 27301e04c3fSmrg} 27401e04c3fSmrg 27501e04c3fSmrgbool 27601e04c3fSmrgisl_color_value_is_zero(union isl_color_value value, 27701e04c3fSmrg enum isl_format format) 27801e04c3fSmrg{ 27901e04c3fSmrg const struct isl_format_layout *fmtl = isl_format_get_layout(format); 28001e04c3fSmrg 28101e04c3fSmrg#define RETURN_FALSE_IF_NOT_0(c, i) \ 28201e04c3fSmrg if (fmtl->channels.c.bits && value.u32[i] != 0) \ 28301e04c3fSmrg return false 28401e04c3fSmrg 28501e04c3fSmrg RETURN_FALSE_IF_NOT_0(r, 0); 28601e04c3fSmrg RETURN_FALSE_IF_NOT_0(g, 1); 28701e04c3fSmrg RETURN_FALSE_IF_NOT_0(b, 2); 28801e04c3fSmrg RETURN_FALSE_IF_NOT_0(a, 3); 28901e04c3fSmrg 29001e04c3fSmrg#undef RETURN_FALSE_IF_NOT_0 29101e04c3fSmrg 29201e04c3fSmrg return true; 29301e04c3fSmrg} 29401e04c3fSmrg 29501e04c3fSmrgbool 29601e04c3fSmrgisl_color_value_is_zero_one(union isl_color_value value, 29701e04c3fSmrg enum isl_format format) 29801e04c3fSmrg{ 29901e04c3fSmrg const struct isl_format_layout *fmtl = isl_format_get_layout(format); 30001e04c3fSmrg 30101e04c3fSmrg#define RETURN_FALSE_IF_NOT_0_1(c, i, field) \ 30201e04c3fSmrg if (fmtl->channels.c.bits && value.field[i] != 0 && value.field[i] != 1) \ 30301e04c3fSmrg return false 30401e04c3fSmrg 30501e04c3fSmrg if (isl_format_has_int_channel(format)) { 30601e04c3fSmrg RETURN_FALSE_IF_NOT_0_1(r, 0, u32); 30701e04c3fSmrg RETURN_FALSE_IF_NOT_0_1(g, 1, u32); 30801e04c3fSmrg RETURN_FALSE_IF_NOT_0_1(b, 2, u32); 30901e04c3fSmrg RETURN_FALSE_IF_NOT_0_1(a, 3, u32); 31001e04c3fSmrg } else { 31101e04c3fSmrg RETURN_FALSE_IF_NOT_0_1(r, 0, f32); 31201e04c3fSmrg RETURN_FALSE_IF_NOT_0_1(g, 1, f32); 31301e04c3fSmrg RETURN_FALSE_IF_NOT_0_1(b, 2, f32); 31401e04c3fSmrg RETURN_FALSE_IF_NOT_0_1(a, 3, f32); 31501e04c3fSmrg } 31601e04c3fSmrg 31701e04c3fSmrg#undef RETURN_FALSE_IF_NOT_0_1 31801e04c3fSmrg 31901e04c3fSmrg return true; 32001e04c3fSmrg} 32101e04c3fSmrg 32201e04c3fSmrg/** 32301e04c3fSmrg * @param[out] tiling is set only on success 32401e04c3fSmrg */ 32501e04c3fSmrgstatic bool 32601e04c3fSmrgisl_surf_choose_tiling(const struct isl_device *dev, 32701e04c3fSmrg const struct isl_surf_init_info *restrict info, 32801e04c3fSmrg enum isl_tiling *tiling) 32901e04c3fSmrg{ 33001e04c3fSmrg isl_tiling_flags_t tiling_flags = info->tiling_flags; 33101e04c3fSmrg 33201e04c3fSmrg /* HiZ surfaces always use the HiZ tiling */ 33301e04c3fSmrg if (info->usage & ISL_SURF_USAGE_HIZ_BIT) { 33401e04c3fSmrg assert(info->format == ISL_FORMAT_HIZ); 33501e04c3fSmrg assert(tiling_flags == ISL_TILING_HIZ_BIT); 33601e04c3fSmrg *tiling = ISL_TILING_HIZ; 33701e04c3fSmrg return true; 33801e04c3fSmrg } 33901e04c3fSmrg 34001e04c3fSmrg /* CCS surfaces always use the CCS tiling */ 34101e04c3fSmrg if (info->usage & ISL_SURF_USAGE_CCS_BIT) { 34201e04c3fSmrg assert(isl_format_get_layout(info->format)->txc == ISL_TXC_CCS); 34301e04c3fSmrg assert(tiling_flags == ISL_TILING_CCS_BIT); 34401e04c3fSmrg *tiling = ISL_TILING_CCS; 34501e04c3fSmrg return true; 34601e04c3fSmrg } 34701e04c3fSmrg 34801e04c3fSmrg if (ISL_DEV_GEN(dev) >= 6) { 34901e04c3fSmrg isl_gen6_filter_tiling(dev, info, &tiling_flags); 35001e04c3fSmrg } else { 35101e04c3fSmrg isl_gen4_filter_tiling(dev, info, &tiling_flags); 35201e04c3fSmrg } 35301e04c3fSmrg 35401e04c3fSmrg #define CHOOSE(__tiling) \ 35501e04c3fSmrg do { \ 35601e04c3fSmrg if (tiling_flags & (1u << (__tiling))) { \ 35701e04c3fSmrg *tiling = (__tiling); \ 35801e04c3fSmrg return true; \ 35901e04c3fSmrg } \ 36001e04c3fSmrg } while (0) 36101e04c3fSmrg 36201e04c3fSmrg /* Of the tiling modes remaining, choose the one that offers the best 36301e04c3fSmrg * performance. 36401e04c3fSmrg */ 36501e04c3fSmrg 36601e04c3fSmrg if (info->dim == ISL_SURF_DIM_1D) { 36701e04c3fSmrg /* Prefer linear for 1D surfaces because they do not benefit from 36801e04c3fSmrg * tiling. To the contrary, tiling leads to wasted memory and poor 36901e04c3fSmrg * memory locality due to the swizzling and alignment restrictions 37001e04c3fSmrg * required in tiled surfaces. 37101e04c3fSmrg */ 37201e04c3fSmrg CHOOSE(ISL_TILING_LINEAR); 37301e04c3fSmrg } 37401e04c3fSmrg 37501e04c3fSmrg CHOOSE(ISL_TILING_Ys); 37601e04c3fSmrg CHOOSE(ISL_TILING_Yf); 37701e04c3fSmrg CHOOSE(ISL_TILING_Y0); 37801e04c3fSmrg CHOOSE(ISL_TILING_X); 37901e04c3fSmrg CHOOSE(ISL_TILING_W); 38001e04c3fSmrg CHOOSE(ISL_TILING_LINEAR); 38101e04c3fSmrg 38201e04c3fSmrg #undef CHOOSE 38301e04c3fSmrg 38401e04c3fSmrg /* No tiling mode accomodates the inputs. */ 38501e04c3fSmrg return false; 38601e04c3fSmrg} 38701e04c3fSmrg 38801e04c3fSmrgstatic bool 38901e04c3fSmrgisl_choose_msaa_layout(const struct isl_device *dev, 39001e04c3fSmrg const struct isl_surf_init_info *info, 39101e04c3fSmrg enum isl_tiling tiling, 39201e04c3fSmrg enum isl_msaa_layout *msaa_layout) 39301e04c3fSmrg{ 39401e04c3fSmrg if (ISL_DEV_GEN(dev) >= 8) { 39501e04c3fSmrg return isl_gen8_choose_msaa_layout(dev, info, tiling, msaa_layout); 39601e04c3fSmrg } else if (ISL_DEV_GEN(dev) >= 7) { 39701e04c3fSmrg return isl_gen7_choose_msaa_layout(dev, info, tiling, msaa_layout); 39801e04c3fSmrg } else if (ISL_DEV_GEN(dev) >= 6) { 39901e04c3fSmrg return isl_gen6_choose_msaa_layout(dev, info, tiling, msaa_layout); 40001e04c3fSmrg } else { 40101e04c3fSmrg return isl_gen4_choose_msaa_layout(dev, info, tiling, msaa_layout); 40201e04c3fSmrg } 40301e04c3fSmrg} 40401e04c3fSmrg 40501e04c3fSmrgstruct isl_extent2d 40601e04c3fSmrgisl_get_interleaved_msaa_px_size_sa(uint32_t samples) 40701e04c3fSmrg{ 40801e04c3fSmrg assert(isl_is_pow2(samples)); 40901e04c3fSmrg 41001e04c3fSmrg /* From the Broadwell PRM >> Volume 5: Memory Views >> Computing Mip Level 41101e04c3fSmrg * Sizes (p133): 41201e04c3fSmrg * 41301e04c3fSmrg * If the surface is multisampled and it is a depth or stencil surface 41401e04c3fSmrg * or Multisampled Surface StorageFormat in SURFACE_STATE is 41501e04c3fSmrg * MSFMT_DEPTH_STENCIL, W_L and H_L must be adjusted as follows before 41601e04c3fSmrg * proceeding: [...] 41701e04c3fSmrg */ 41801e04c3fSmrg return (struct isl_extent2d) { 41901e04c3fSmrg .width = 1 << ((ffs(samples) - 0) / 2), 42001e04c3fSmrg .height = 1 << ((ffs(samples) - 1) / 2), 42101e04c3fSmrg }; 42201e04c3fSmrg} 42301e04c3fSmrg 42401e04c3fSmrgstatic void 42501e04c3fSmrgisl_msaa_interleaved_scale_px_to_sa(uint32_t samples, 42601e04c3fSmrg uint32_t *width, uint32_t *height) 42701e04c3fSmrg{ 42801e04c3fSmrg const struct isl_extent2d px_size_sa = 42901e04c3fSmrg isl_get_interleaved_msaa_px_size_sa(samples); 43001e04c3fSmrg 43101e04c3fSmrg if (width) 43201e04c3fSmrg *width = isl_align(*width, 2) * px_size_sa.width; 43301e04c3fSmrg if (height) 43401e04c3fSmrg *height = isl_align(*height, 2) * px_size_sa.height; 43501e04c3fSmrg} 43601e04c3fSmrg 43701e04c3fSmrgstatic enum isl_array_pitch_span 43801e04c3fSmrgisl_choose_array_pitch_span(const struct isl_device *dev, 43901e04c3fSmrg const struct isl_surf_init_info *restrict info, 44001e04c3fSmrg enum isl_dim_layout dim_layout, 44101e04c3fSmrg const struct isl_extent4d *phys_level0_sa) 44201e04c3fSmrg{ 44301e04c3fSmrg switch (dim_layout) { 44401e04c3fSmrg case ISL_DIM_LAYOUT_GEN9_1D: 44501e04c3fSmrg case ISL_DIM_LAYOUT_GEN4_2D: 44601e04c3fSmrg if (ISL_DEV_GEN(dev) >= 8) { 44701e04c3fSmrg /* QPitch becomes programmable in Broadwell. So choose the 44801e04c3fSmrg * most compact QPitch possible in order to conserve memory. 44901e04c3fSmrg * 45001e04c3fSmrg * From the Broadwell PRM >> Volume 2d: Command Reference: Structures 45101e04c3fSmrg * >> RENDER_SURFACE_STATE Surface QPitch (p325): 45201e04c3fSmrg * 45301e04c3fSmrg * - Software must ensure that this field is set to a value 45401e04c3fSmrg * sufficiently large such that the array slices in the surface 45501e04c3fSmrg * do not overlap. Refer to the Memory Data Formats section for 45601e04c3fSmrg * information on how surfaces are stored in memory. 45701e04c3fSmrg * 45801e04c3fSmrg * - This field specifies the distance in rows between array 45901e04c3fSmrg * slices. It is used only in the following cases: 46001e04c3fSmrg * 46101e04c3fSmrg * - Surface Array is enabled OR 46201e04c3fSmrg * - Number of Mulitsamples is not NUMSAMPLES_1 and 46301e04c3fSmrg * Multisampled Surface Storage Format set to MSFMT_MSS OR 46401e04c3fSmrg * - Surface Type is SURFTYPE_CUBE 46501e04c3fSmrg */ 46601e04c3fSmrg return ISL_ARRAY_PITCH_SPAN_COMPACT; 46701e04c3fSmrg } else if (ISL_DEV_GEN(dev) >= 7) { 46801e04c3fSmrg /* Note that Ivybridge introduces 46901e04c3fSmrg * RENDER_SURFACE_STATE.SurfaceArraySpacing, which provides the 47001e04c3fSmrg * driver more control over the QPitch. 47101e04c3fSmrg */ 47201e04c3fSmrg 47301e04c3fSmrg if (phys_level0_sa->array_len == 1) { 47401e04c3fSmrg /* The hardware will never use the QPitch. So choose the most 47501e04c3fSmrg * compact QPitch possible in order to conserve memory. 47601e04c3fSmrg */ 47701e04c3fSmrg return ISL_ARRAY_PITCH_SPAN_COMPACT; 47801e04c3fSmrg } 47901e04c3fSmrg 48001e04c3fSmrg if (isl_surf_usage_is_depth_or_stencil(info->usage) || 48101e04c3fSmrg (info->usage & ISL_SURF_USAGE_HIZ_BIT)) { 48201e04c3fSmrg /* From the Ivybridge PRM >> Volume 1 Part 1: Graphics Core >> 48301e04c3fSmrg * Section 6.18.4.7: Surface Arrays (p112): 48401e04c3fSmrg * 48501e04c3fSmrg * If Surface Array Spacing is set to ARYSPC_FULL (note that 48601e04c3fSmrg * the depth buffer and stencil buffer have an implied value of 48701e04c3fSmrg * ARYSPC_FULL): 48801e04c3fSmrg */ 48901e04c3fSmrg return ISL_ARRAY_PITCH_SPAN_FULL; 49001e04c3fSmrg } 49101e04c3fSmrg 49201e04c3fSmrg if (info->levels == 1) { 49301e04c3fSmrg /* We are able to set RENDER_SURFACE_STATE.SurfaceArraySpacing 49401e04c3fSmrg * to ARYSPC_LOD0. 49501e04c3fSmrg */ 49601e04c3fSmrg return ISL_ARRAY_PITCH_SPAN_COMPACT; 49701e04c3fSmrg } 49801e04c3fSmrg 49901e04c3fSmrg return ISL_ARRAY_PITCH_SPAN_FULL; 50001e04c3fSmrg } else if ((ISL_DEV_GEN(dev) == 5 || ISL_DEV_GEN(dev) == 6) && 50101e04c3fSmrg ISL_DEV_USE_SEPARATE_STENCIL(dev) && 50201e04c3fSmrg isl_surf_usage_is_stencil(info->usage)) { 50301e04c3fSmrg /* [ILK-SNB] Errata from the Sandy Bridge PRM >> Volume 4 Part 1: 50401e04c3fSmrg * Graphics Core >> Section 7.18.3.7: Surface Arrays: 50501e04c3fSmrg * 50601e04c3fSmrg * The separate stencil buffer does not support mip mapping, thus 50701e04c3fSmrg * the storage for LODs other than LOD 0 is not needed. 50801e04c3fSmrg */ 50901e04c3fSmrg assert(info->levels == 1); 51001e04c3fSmrg return ISL_ARRAY_PITCH_SPAN_COMPACT; 51101e04c3fSmrg } else { 51201e04c3fSmrg if ((ISL_DEV_GEN(dev) == 5 || ISL_DEV_GEN(dev) == 6) && 51301e04c3fSmrg ISL_DEV_USE_SEPARATE_STENCIL(dev) && 51401e04c3fSmrg isl_surf_usage_is_stencil(info->usage)) { 51501e04c3fSmrg /* [ILK-SNB] Errata from the Sandy Bridge PRM >> Volume 4 Part 1: 51601e04c3fSmrg * Graphics Core >> Section 7.18.3.7: Surface Arrays: 51701e04c3fSmrg * 51801e04c3fSmrg * The separate stencil buffer does not support mip mapping, 51901e04c3fSmrg * thus the storage for LODs other than LOD 0 is not needed. 52001e04c3fSmrg */ 52101e04c3fSmrg assert(info->levels == 1); 52201e04c3fSmrg assert(phys_level0_sa->array_len == 1); 52301e04c3fSmrg return ISL_ARRAY_PITCH_SPAN_COMPACT; 52401e04c3fSmrg } 52501e04c3fSmrg 52601e04c3fSmrg if (phys_level0_sa->array_len == 1) { 52701e04c3fSmrg /* The hardware will never use the QPitch. So choose the most 52801e04c3fSmrg * compact QPitch possible in order to conserve memory. 52901e04c3fSmrg */ 53001e04c3fSmrg return ISL_ARRAY_PITCH_SPAN_COMPACT; 53101e04c3fSmrg } 53201e04c3fSmrg 53301e04c3fSmrg return ISL_ARRAY_PITCH_SPAN_FULL; 53401e04c3fSmrg } 53501e04c3fSmrg 53601e04c3fSmrg case ISL_DIM_LAYOUT_GEN4_3D: 53701e04c3fSmrg /* The hardware will never use the QPitch. So choose the most 53801e04c3fSmrg * compact QPitch possible in order to conserve memory. 53901e04c3fSmrg */ 54001e04c3fSmrg return ISL_ARRAY_PITCH_SPAN_COMPACT; 54101e04c3fSmrg 54201e04c3fSmrg case ISL_DIM_LAYOUT_GEN6_STENCIL_HIZ: 54301e04c3fSmrg /* Each array image in the gen6 stencil of HiZ surface is compact in the 54401e04c3fSmrg * sense that every LOD is a compact array of the same size as LOD0. 54501e04c3fSmrg */ 54601e04c3fSmrg return ISL_ARRAY_PITCH_SPAN_COMPACT; 54701e04c3fSmrg } 54801e04c3fSmrg 54901e04c3fSmrg unreachable("bad isl_dim_layout"); 55001e04c3fSmrg return ISL_ARRAY_PITCH_SPAN_FULL; 55101e04c3fSmrg} 55201e04c3fSmrg 55301e04c3fSmrgstatic void 55401e04c3fSmrgisl_choose_image_alignment_el(const struct isl_device *dev, 55501e04c3fSmrg const struct isl_surf_init_info *restrict info, 55601e04c3fSmrg enum isl_tiling tiling, 55701e04c3fSmrg enum isl_dim_layout dim_layout, 55801e04c3fSmrg enum isl_msaa_layout msaa_layout, 55901e04c3fSmrg struct isl_extent3d *image_align_el) 56001e04c3fSmrg{ 56101e04c3fSmrg const struct isl_format_layout *fmtl = isl_format_get_layout(info->format); 56201e04c3fSmrg if (fmtl->txc == ISL_TXC_MCS) { 56301e04c3fSmrg assert(tiling == ISL_TILING_Y0); 56401e04c3fSmrg 56501e04c3fSmrg /* 56601e04c3fSmrg * IvyBrigde PRM Vol 2, Part 1, "11.7 MCS Buffer for Render Target(s)": 56701e04c3fSmrg * 56801e04c3fSmrg * Height, width, and layout of MCS buffer in this case must match with 56901e04c3fSmrg * Render Target height, width, and layout. MCS buffer is tiledY. 57001e04c3fSmrg * 57101e04c3fSmrg * To avoid wasting memory, choose the smallest alignment possible: 57201e04c3fSmrg * HALIGN_4 and VALIGN_4. 57301e04c3fSmrg */ 57401e04c3fSmrg *image_align_el = isl_extent3d(4, 4, 1); 57501e04c3fSmrg return; 57601e04c3fSmrg } else if (info->format == ISL_FORMAT_HIZ) { 57701e04c3fSmrg assert(ISL_DEV_GEN(dev) >= 6); 57801e04c3fSmrg if (ISL_DEV_GEN(dev) == 6) { 57901e04c3fSmrg /* HiZ surfaces on Sandy Bridge are packed tightly. */ 58001e04c3fSmrg *image_align_el = isl_extent3d(1, 1, 1); 58101e04c3fSmrg } else { 58201e04c3fSmrg /* On gen7+, HiZ surfaces are always aligned to 16x8 pixels in the 58301e04c3fSmrg * primary surface which works out to 2x2 HiZ elments. 58401e04c3fSmrg */ 58501e04c3fSmrg *image_align_el = isl_extent3d(2, 2, 1); 58601e04c3fSmrg } 58701e04c3fSmrg return; 58801e04c3fSmrg } 58901e04c3fSmrg 59001e04c3fSmrg if (ISL_DEV_GEN(dev) >= 9) { 59101e04c3fSmrg isl_gen9_choose_image_alignment_el(dev, info, tiling, dim_layout, 59201e04c3fSmrg msaa_layout, image_align_el); 59301e04c3fSmrg } else if (ISL_DEV_GEN(dev) >= 8) { 59401e04c3fSmrg isl_gen8_choose_image_alignment_el(dev, info, tiling, dim_layout, 59501e04c3fSmrg msaa_layout, image_align_el); 59601e04c3fSmrg } else if (ISL_DEV_GEN(dev) >= 7) { 59701e04c3fSmrg isl_gen7_choose_image_alignment_el(dev, info, tiling, dim_layout, 59801e04c3fSmrg msaa_layout, image_align_el); 59901e04c3fSmrg } else if (ISL_DEV_GEN(dev) >= 6) { 60001e04c3fSmrg isl_gen6_choose_image_alignment_el(dev, info, tiling, dim_layout, 60101e04c3fSmrg msaa_layout, image_align_el); 60201e04c3fSmrg } else { 60301e04c3fSmrg isl_gen4_choose_image_alignment_el(dev, info, tiling, dim_layout, 60401e04c3fSmrg msaa_layout, image_align_el); 60501e04c3fSmrg } 60601e04c3fSmrg} 60701e04c3fSmrg 60801e04c3fSmrgstatic enum isl_dim_layout 60901e04c3fSmrgisl_surf_choose_dim_layout(const struct isl_device *dev, 61001e04c3fSmrg enum isl_surf_dim logical_dim, 61101e04c3fSmrg enum isl_tiling tiling, 61201e04c3fSmrg isl_surf_usage_flags_t usage) 61301e04c3fSmrg{ 61401e04c3fSmrg /* Sandy bridge needs a special layout for HiZ and stencil. */ 61501e04c3fSmrg if (ISL_DEV_GEN(dev) == 6 && 61601e04c3fSmrg (tiling == ISL_TILING_W || tiling == ISL_TILING_HIZ)) 61701e04c3fSmrg return ISL_DIM_LAYOUT_GEN6_STENCIL_HIZ; 61801e04c3fSmrg 61901e04c3fSmrg if (ISL_DEV_GEN(dev) >= 9) { 62001e04c3fSmrg switch (logical_dim) { 62101e04c3fSmrg case ISL_SURF_DIM_1D: 62201e04c3fSmrg /* From the Sky Lake PRM Vol. 5, "1D Surfaces": 62301e04c3fSmrg * 62401e04c3fSmrg * One-dimensional surfaces use a tiling mode of linear. 62501e04c3fSmrg * Technically, they are not tiled resources, but the Tiled 62601e04c3fSmrg * Resource Mode field in RENDER_SURFACE_STATE is still used to 62701e04c3fSmrg * indicate the alignment requirements for this linear surface 62801e04c3fSmrg * (See 1D Alignment requirements for how 4K and 64KB Tiled 62901e04c3fSmrg * Resource Modes impact alignment). Alternatively, a 1D surface 63001e04c3fSmrg * can be defined as a 2D tiled surface (e.g. TileY or TileX) with 63101e04c3fSmrg * a height of 0. 63201e04c3fSmrg * 63301e04c3fSmrg * In other words, ISL_DIM_LAYOUT_GEN9_1D is only used for linear 63401e04c3fSmrg * surfaces and, for tiled surfaces, ISL_DIM_LAYOUT_GEN4_2D is used. 63501e04c3fSmrg */ 63601e04c3fSmrg if (tiling == ISL_TILING_LINEAR) 63701e04c3fSmrg return ISL_DIM_LAYOUT_GEN9_1D; 63801e04c3fSmrg else 63901e04c3fSmrg return ISL_DIM_LAYOUT_GEN4_2D; 64001e04c3fSmrg case ISL_SURF_DIM_2D: 64101e04c3fSmrg case ISL_SURF_DIM_3D: 64201e04c3fSmrg return ISL_DIM_LAYOUT_GEN4_2D; 64301e04c3fSmrg } 64401e04c3fSmrg } else { 64501e04c3fSmrg switch (logical_dim) { 64601e04c3fSmrg case ISL_SURF_DIM_1D: 64701e04c3fSmrg case ISL_SURF_DIM_2D: 64801e04c3fSmrg /* From the G45 PRM Vol. 1a, "6.17.4.1 Hardware Cube Map Layout": 64901e04c3fSmrg * 65001e04c3fSmrg * The cube face textures are stored in the same way as 3D surfaces 65101e04c3fSmrg * are stored (see section 6.17.5 for details). For cube surfaces, 65201e04c3fSmrg * however, the depth is equal to the number of faces (always 6) and 65301e04c3fSmrg * is not reduced for each MIP. 65401e04c3fSmrg */ 65501e04c3fSmrg if (ISL_DEV_GEN(dev) == 4 && (usage & ISL_SURF_USAGE_CUBE_BIT)) 65601e04c3fSmrg return ISL_DIM_LAYOUT_GEN4_3D; 65701e04c3fSmrg 65801e04c3fSmrg return ISL_DIM_LAYOUT_GEN4_2D; 65901e04c3fSmrg case ISL_SURF_DIM_3D: 66001e04c3fSmrg return ISL_DIM_LAYOUT_GEN4_3D; 66101e04c3fSmrg } 66201e04c3fSmrg } 66301e04c3fSmrg 66401e04c3fSmrg unreachable("bad isl_surf_dim"); 66501e04c3fSmrg return ISL_DIM_LAYOUT_GEN4_2D; 66601e04c3fSmrg} 66701e04c3fSmrg 66801e04c3fSmrg/** 66901e04c3fSmrg * Calculate the physical extent of the surface's first level, in units of 67001e04c3fSmrg * surface samples. The result is aligned to the format's compression block. 67101e04c3fSmrg */ 67201e04c3fSmrgstatic void 67301e04c3fSmrgisl_calc_phys_level0_extent_sa(const struct isl_device *dev, 67401e04c3fSmrg const struct isl_surf_init_info *restrict info, 67501e04c3fSmrg enum isl_dim_layout dim_layout, 67601e04c3fSmrg enum isl_tiling tiling, 67701e04c3fSmrg enum isl_msaa_layout msaa_layout, 67801e04c3fSmrg struct isl_extent4d *phys_level0_sa) 67901e04c3fSmrg{ 68001e04c3fSmrg const struct isl_format_layout *fmtl = isl_format_get_layout(info->format); 68101e04c3fSmrg 68201e04c3fSmrg if (isl_format_is_yuv(info->format)) 68301e04c3fSmrg isl_finishme("%s:%s: YUV format", __FILE__, __func__); 68401e04c3fSmrg 68501e04c3fSmrg switch (info->dim) { 68601e04c3fSmrg case ISL_SURF_DIM_1D: 68701e04c3fSmrg assert(info->height == 1); 68801e04c3fSmrg assert(info->depth == 1); 68901e04c3fSmrg assert(info->samples == 1); 69001e04c3fSmrg 69101e04c3fSmrg switch (dim_layout) { 69201e04c3fSmrg case ISL_DIM_LAYOUT_GEN4_3D: 69301e04c3fSmrg unreachable("bad isl_dim_layout"); 69401e04c3fSmrg 69501e04c3fSmrg case ISL_DIM_LAYOUT_GEN9_1D: 69601e04c3fSmrg case ISL_DIM_LAYOUT_GEN4_2D: 69701e04c3fSmrg case ISL_DIM_LAYOUT_GEN6_STENCIL_HIZ: 69801e04c3fSmrg *phys_level0_sa = (struct isl_extent4d) { 69901e04c3fSmrg .w = isl_align_npot(info->width, fmtl->bw), 70001e04c3fSmrg .h = fmtl->bh, 70101e04c3fSmrg .d = 1, 70201e04c3fSmrg .a = info->array_len, 70301e04c3fSmrg }; 70401e04c3fSmrg break; 70501e04c3fSmrg } 70601e04c3fSmrg break; 70701e04c3fSmrg 70801e04c3fSmrg case ISL_SURF_DIM_2D: 70901e04c3fSmrg if (ISL_DEV_GEN(dev) == 4 && (info->usage & ISL_SURF_USAGE_CUBE_BIT)) 71001e04c3fSmrg assert(dim_layout == ISL_DIM_LAYOUT_GEN4_3D); 71101e04c3fSmrg else 71201e04c3fSmrg assert(dim_layout == ISL_DIM_LAYOUT_GEN4_2D || 71301e04c3fSmrg dim_layout == ISL_DIM_LAYOUT_GEN6_STENCIL_HIZ); 71401e04c3fSmrg 71501e04c3fSmrg if (tiling == ISL_TILING_Ys && info->samples > 1) 71601e04c3fSmrg isl_finishme("%s:%s: multisample TileYs layout", __FILE__, __func__); 71701e04c3fSmrg 71801e04c3fSmrg switch (msaa_layout) { 71901e04c3fSmrg case ISL_MSAA_LAYOUT_NONE: 72001e04c3fSmrg assert(info->depth == 1); 72101e04c3fSmrg assert(info->samples == 1); 72201e04c3fSmrg 72301e04c3fSmrg *phys_level0_sa = (struct isl_extent4d) { 72401e04c3fSmrg .w = isl_align_npot(info->width, fmtl->bw), 72501e04c3fSmrg .h = isl_align_npot(info->height, fmtl->bh), 72601e04c3fSmrg .d = 1, 72701e04c3fSmrg .a = info->array_len, 72801e04c3fSmrg }; 72901e04c3fSmrg break; 73001e04c3fSmrg 73101e04c3fSmrg case ISL_MSAA_LAYOUT_ARRAY: 73201e04c3fSmrg assert(info->depth == 1); 73301e04c3fSmrg assert(info->levels == 1); 73401e04c3fSmrg assert(isl_format_supports_multisampling(dev->info, info->format)); 73501e04c3fSmrg assert(fmtl->bw == 1 && fmtl->bh == 1); 73601e04c3fSmrg 73701e04c3fSmrg *phys_level0_sa = (struct isl_extent4d) { 73801e04c3fSmrg .w = info->width, 73901e04c3fSmrg .h = info->height, 74001e04c3fSmrg .d = 1, 74101e04c3fSmrg .a = info->array_len * info->samples, 74201e04c3fSmrg }; 74301e04c3fSmrg break; 74401e04c3fSmrg 74501e04c3fSmrg case ISL_MSAA_LAYOUT_INTERLEAVED: 74601e04c3fSmrg assert(info->depth == 1); 74701e04c3fSmrg assert(info->levels == 1); 74801e04c3fSmrg assert(isl_format_supports_multisampling(dev->info, info->format)); 74901e04c3fSmrg 75001e04c3fSmrg *phys_level0_sa = (struct isl_extent4d) { 75101e04c3fSmrg .w = info->width, 75201e04c3fSmrg .h = info->height, 75301e04c3fSmrg .d = 1, 75401e04c3fSmrg .a = info->array_len, 75501e04c3fSmrg }; 75601e04c3fSmrg 75701e04c3fSmrg isl_msaa_interleaved_scale_px_to_sa(info->samples, 75801e04c3fSmrg &phys_level0_sa->w, 75901e04c3fSmrg &phys_level0_sa->h); 76001e04c3fSmrg 76101e04c3fSmrg phys_level0_sa->w = isl_align(phys_level0_sa->w, fmtl->bw); 76201e04c3fSmrg phys_level0_sa->h = isl_align(phys_level0_sa->h, fmtl->bh); 76301e04c3fSmrg break; 76401e04c3fSmrg } 76501e04c3fSmrg break; 76601e04c3fSmrg 76701e04c3fSmrg case ISL_SURF_DIM_3D: 76801e04c3fSmrg assert(info->array_len == 1); 76901e04c3fSmrg assert(info->samples == 1); 77001e04c3fSmrg 77101e04c3fSmrg if (fmtl->bd > 1) { 77201e04c3fSmrg isl_finishme("%s:%s: compression block with depth > 1", 77301e04c3fSmrg __FILE__, __func__); 77401e04c3fSmrg } 77501e04c3fSmrg 77601e04c3fSmrg switch (dim_layout) { 77701e04c3fSmrg case ISL_DIM_LAYOUT_GEN9_1D: 77801e04c3fSmrg case ISL_DIM_LAYOUT_GEN6_STENCIL_HIZ: 77901e04c3fSmrg unreachable("bad isl_dim_layout"); 78001e04c3fSmrg 78101e04c3fSmrg case ISL_DIM_LAYOUT_GEN4_2D: 78201e04c3fSmrg assert(ISL_DEV_GEN(dev) >= 9); 78301e04c3fSmrg 78401e04c3fSmrg *phys_level0_sa = (struct isl_extent4d) { 78501e04c3fSmrg .w = isl_align_npot(info->width, fmtl->bw), 78601e04c3fSmrg .h = isl_align_npot(info->height, fmtl->bh), 78701e04c3fSmrg .d = 1, 78801e04c3fSmrg .a = info->depth, 78901e04c3fSmrg }; 79001e04c3fSmrg break; 79101e04c3fSmrg 79201e04c3fSmrg case ISL_DIM_LAYOUT_GEN4_3D: 79301e04c3fSmrg assert(ISL_DEV_GEN(dev) < 9); 79401e04c3fSmrg *phys_level0_sa = (struct isl_extent4d) { 79501e04c3fSmrg .w = isl_align(info->width, fmtl->bw), 79601e04c3fSmrg .h = isl_align(info->height, fmtl->bh), 79701e04c3fSmrg .d = info->depth, 79801e04c3fSmrg .a = 1, 79901e04c3fSmrg }; 80001e04c3fSmrg break; 80101e04c3fSmrg } 80201e04c3fSmrg break; 80301e04c3fSmrg } 80401e04c3fSmrg} 80501e04c3fSmrg 80601e04c3fSmrg/** 80701e04c3fSmrg * Calculate the pitch between physical array slices, in units of rows of 80801e04c3fSmrg * surface elements. 80901e04c3fSmrg */ 81001e04c3fSmrgstatic uint32_t 81101e04c3fSmrgisl_calc_array_pitch_el_rows_gen4_2d( 81201e04c3fSmrg const struct isl_device *dev, 81301e04c3fSmrg const struct isl_surf_init_info *restrict info, 81401e04c3fSmrg const struct isl_tile_info *tile_info, 81501e04c3fSmrg const struct isl_extent3d *image_align_sa, 81601e04c3fSmrg const struct isl_extent4d *phys_level0_sa, 81701e04c3fSmrg enum isl_array_pitch_span array_pitch_span, 81801e04c3fSmrg const struct isl_extent2d *phys_slice0_sa) 81901e04c3fSmrg{ 82001e04c3fSmrg const struct isl_format_layout *fmtl = isl_format_get_layout(info->format); 82101e04c3fSmrg uint32_t pitch_sa_rows = 0; 82201e04c3fSmrg 82301e04c3fSmrg switch (array_pitch_span) { 82401e04c3fSmrg case ISL_ARRAY_PITCH_SPAN_COMPACT: 82501e04c3fSmrg pitch_sa_rows = isl_align_npot(phys_slice0_sa->h, image_align_sa->h); 82601e04c3fSmrg break; 82701e04c3fSmrg case ISL_ARRAY_PITCH_SPAN_FULL: { 82801e04c3fSmrg /* The QPitch equation is found in the Broadwell PRM >> Volume 5: 82901e04c3fSmrg * Memory Views >> Common Surface Formats >> Surface Layout >> 2D 83001e04c3fSmrg * Surfaces >> Surface Arrays. 83101e04c3fSmrg */ 83201e04c3fSmrg uint32_t H0_sa = phys_level0_sa->h; 83301e04c3fSmrg uint32_t H1_sa = isl_minify(H0_sa, 1); 83401e04c3fSmrg 83501e04c3fSmrg uint32_t h0_sa = isl_align_npot(H0_sa, image_align_sa->h); 83601e04c3fSmrg uint32_t h1_sa = isl_align_npot(H1_sa, image_align_sa->h); 83701e04c3fSmrg 83801e04c3fSmrg uint32_t m; 83901e04c3fSmrg if (ISL_DEV_GEN(dev) >= 7) { 84001e04c3fSmrg /* The QPitch equation changed slightly in Ivybridge. */ 84101e04c3fSmrg m = 12; 84201e04c3fSmrg } else { 84301e04c3fSmrg m = 11; 84401e04c3fSmrg } 84501e04c3fSmrg 84601e04c3fSmrg pitch_sa_rows = h0_sa + h1_sa + (m * image_align_sa->h); 84701e04c3fSmrg 84801e04c3fSmrg if (ISL_DEV_GEN(dev) == 6 && info->samples > 1 && 84901e04c3fSmrg (info->height % 4 == 1)) { 85001e04c3fSmrg /* [SNB] Errata from the Sandy Bridge PRM >> Volume 4 Part 1: 85101e04c3fSmrg * Graphics Core >> Section 7.18.3.7: Surface Arrays: 85201e04c3fSmrg * 85301e04c3fSmrg * [SNB] Errata: Sampler MSAA Qpitch will be 4 greater than 85401e04c3fSmrg * the value calculated in the equation above , for every 85501e04c3fSmrg * other odd Surface Height starting from 1 i.e. 1,5,9,13. 85601e04c3fSmrg * 85701e04c3fSmrg * XXX(chadv): Is the errata natural corollary of the physical 85801e04c3fSmrg * layout of interleaved samples? 85901e04c3fSmrg */ 86001e04c3fSmrg pitch_sa_rows += 4; 86101e04c3fSmrg } 86201e04c3fSmrg 86301e04c3fSmrg pitch_sa_rows = isl_align_npot(pitch_sa_rows, fmtl->bh); 86401e04c3fSmrg } /* end case */ 86501e04c3fSmrg break; 86601e04c3fSmrg } 86701e04c3fSmrg 86801e04c3fSmrg assert(pitch_sa_rows % fmtl->bh == 0); 86901e04c3fSmrg uint32_t pitch_el_rows = pitch_sa_rows / fmtl->bh; 87001e04c3fSmrg 87101e04c3fSmrg if (ISL_DEV_GEN(dev) >= 9 && fmtl->txc == ISL_TXC_CCS) { 87201e04c3fSmrg /* 87301e04c3fSmrg * From the Sky Lake PRM Vol 7, "MCS Buffer for Render Target(s)" (p. 632): 87401e04c3fSmrg * 87501e04c3fSmrg * "Mip-mapped and arrayed surfaces are supported with MCS buffer 87601e04c3fSmrg * layout with these alignments in the RT space: Horizontal 87701e04c3fSmrg * Alignment = 128 and Vertical Alignment = 64." 87801e04c3fSmrg * 87901e04c3fSmrg * From the Sky Lake PRM Vol. 2d, "RENDER_SURFACE_STATE" (p. 435): 88001e04c3fSmrg * 88101e04c3fSmrg * "For non-multisampled render target's CCS auxiliary surface, 88201e04c3fSmrg * QPitch must be computed with Horizontal Alignment = 128 and 88301e04c3fSmrg * Surface Vertical Alignment = 256. These alignments are only for 88401e04c3fSmrg * CCS buffer and not for associated render target." 88501e04c3fSmrg * 88601e04c3fSmrg * The first restriction is already handled by isl_choose_image_alignment_el 88701e04c3fSmrg * but the second restriction, which is an extension of the first, only 88801e04c3fSmrg * applies to qpitch and must be applied here. 88901e04c3fSmrg */ 89001e04c3fSmrg assert(fmtl->bh == 4); 89101e04c3fSmrg pitch_el_rows = isl_align(pitch_el_rows, 256 / 4); 89201e04c3fSmrg } 89301e04c3fSmrg 89401e04c3fSmrg if (ISL_DEV_GEN(dev) >= 9 && 89501e04c3fSmrg info->dim == ISL_SURF_DIM_3D && 89601e04c3fSmrg tile_info->tiling != ISL_TILING_LINEAR) { 89701e04c3fSmrg /* From the Skylake BSpec >> RENDER_SURFACE_STATE >> Surface QPitch: 89801e04c3fSmrg * 89901e04c3fSmrg * Tile Mode != Linear: This field must be set to an integer multiple 90001e04c3fSmrg * of the tile height 90101e04c3fSmrg */ 90201e04c3fSmrg pitch_el_rows = isl_align(pitch_el_rows, tile_info->logical_extent_el.height); 90301e04c3fSmrg } 90401e04c3fSmrg 90501e04c3fSmrg return pitch_el_rows; 90601e04c3fSmrg} 90701e04c3fSmrg 90801e04c3fSmrg/** 90901e04c3fSmrg * A variant of isl_calc_phys_slice0_extent_sa() specific to 91001e04c3fSmrg * ISL_DIM_LAYOUT_GEN4_2D. 91101e04c3fSmrg */ 91201e04c3fSmrgstatic void 91301e04c3fSmrgisl_calc_phys_slice0_extent_sa_gen4_2d( 91401e04c3fSmrg const struct isl_device *dev, 91501e04c3fSmrg const struct isl_surf_init_info *restrict info, 91601e04c3fSmrg enum isl_msaa_layout msaa_layout, 91701e04c3fSmrg const struct isl_extent3d *image_align_sa, 91801e04c3fSmrg const struct isl_extent4d *phys_level0_sa, 91901e04c3fSmrg struct isl_extent2d *phys_slice0_sa) 92001e04c3fSmrg{ 92101e04c3fSmrg const struct isl_format_layout *fmtl = isl_format_get_layout(info->format); 92201e04c3fSmrg 92301e04c3fSmrg assert(phys_level0_sa->depth == 1); 92401e04c3fSmrg 92501e04c3fSmrg if (info->levels == 1) { 92601e04c3fSmrg /* Do not pad the surface to the image alignment. Instead, pad it only 92701e04c3fSmrg * to the pixel format's block alignment. 92801e04c3fSmrg * 92901e04c3fSmrg * For tiled surfaces, using a reduced alignment here avoids wasting CPU 93001e04c3fSmrg * cycles on the below mipmap layout caluclations. Reducing the 93101e04c3fSmrg * alignment here is safe because we later align the row pitch and array 93201e04c3fSmrg * pitch to the tile boundary. It is safe even for 93301e04c3fSmrg * ISL_MSAA_LAYOUT_INTERLEAVED, because phys_level0_sa is already scaled 93401e04c3fSmrg * to accomodate the interleaved samples. 93501e04c3fSmrg * 93601e04c3fSmrg * For linear surfaces, reducing the alignment here permits us to later 93701e04c3fSmrg * choose an arbitrary, non-aligned row pitch. If the surface backs 93801e04c3fSmrg * a VkBuffer, then an arbitrary pitch may be needed to accomodate 93901e04c3fSmrg * VkBufferImageCopy::bufferRowLength. 94001e04c3fSmrg */ 94101e04c3fSmrg *phys_slice0_sa = (struct isl_extent2d) { 94201e04c3fSmrg .w = isl_align_npot(phys_level0_sa->w, fmtl->bw), 94301e04c3fSmrg .h = isl_align_npot(phys_level0_sa->h, fmtl->bh), 94401e04c3fSmrg }; 94501e04c3fSmrg return; 94601e04c3fSmrg } 94701e04c3fSmrg 94801e04c3fSmrg uint32_t slice_top_w = 0; 94901e04c3fSmrg uint32_t slice_bottom_w = 0; 95001e04c3fSmrg uint32_t slice_left_h = 0; 95101e04c3fSmrg uint32_t slice_right_h = 0; 95201e04c3fSmrg 95301e04c3fSmrg uint32_t W0 = phys_level0_sa->w; 95401e04c3fSmrg uint32_t H0 = phys_level0_sa->h; 95501e04c3fSmrg 95601e04c3fSmrg for (uint32_t l = 0; l < info->levels; ++l) { 95701e04c3fSmrg uint32_t W = isl_minify(W0, l); 95801e04c3fSmrg uint32_t H = isl_minify(H0, l); 95901e04c3fSmrg 96001e04c3fSmrg uint32_t w = isl_align_npot(W, image_align_sa->w); 96101e04c3fSmrg uint32_t h = isl_align_npot(H, image_align_sa->h); 96201e04c3fSmrg 96301e04c3fSmrg if (l == 0) { 96401e04c3fSmrg slice_top_w = w; 96501e04c3fSmrg slice_left_h = h; 96601e04c3fSmrg slice_right_h = h; 96701e04c3fSmrg } else if (l == 1) { 96801e04c3fSmrg slice_bottom_w = w; 96901e04c3fSmrg slice_left_h += h; 97001e04c3fSmrg } else if (l == 2) { 97101e04c3fSmrg slice_bottom_w += w; 97201e04c3fSmrg slice_right_h += h; 97301e04c3fSmrg } else { 97401e04c3fSmrg slice_right_h += h; 97501e04c3fSmrg } 97601e04c3fSmrg } 97701e04c3fSmrg 97801e04c3fSmrg *phys_slice0_sa = (struct isl_extent2d) { 97901e04c3fSmrg .w = MAX(slice_top_w, slice_bottom_w), 98001e04c3fSmrg .h = MAX(slice_left_h, slice_right_h), 98101e04c3fSmrg }; 98201e04c3fSmrg} 98301e04c3fSmrg 98401e04c3fSmrgstatic void 98501e04c3fSmrgisl_calc_phys_total_extent_el_gen4_2d( 98601e04c3fSmrg const struct isl_device *dev, 98701e04c3fSmrg const struct isl_surf_init_info *restrict info, 98801e04c3fSmrg const struct isl_tile_info *tile_info, 98901e04c3fSmrg enum isl_msaa_layout msaa_layout, 99001e04c3fSmrg const struct isl_extent3d *image_align_sa, 99101e04c3fSmrg const struct isl_extent4d *phys_level0_sa, 99201e04c3fSmrg enum isl_array_pitch_span array_pitch_span, 99301e04c3fSmrg uint32_t *array_pitch_el_rows, 99401e04c3fSmrg struct isl_extent2d *total_extent_el) 99501e04c3fSmrg{ 99601e04c3fSmrg const struct isl_format_layout *fmtl = isl_format_get_layout(info->format); 99701e04c3fSmrg 99801e04c3fSmrg struct isl_extent2d phys_slice0_sa; 99901e04c3fSmrg isl_calc_phys_slice0_extent_sa_gen4_2d(dev, info, msaa_layout, 100001e04c3fSmrg image_align_sa, phys_level0_sa, 100101e04c3fSmrg &phys_slice0_sa); 100201e04c3fSmrg *array_pitch_el_rows = 100301e04c3fSmrg isl_calc_array_pitch_el_rows_gen4_2d(dev, info, tile_info, 100401e04c3fSmrg image_align_sa, phys_level0_sa, 100501e04c3fSmrg array_pitch_span, 100601e04c3fSmrg &phys_slice0_sa); 100701e04c3fSmrg *total_extent_el = (struct isl_extent2d) { 100801e04c3fSmrg .w = isl_assert_div(phys_slice0_sa.w, fmtl->bw), 100901e04c3fSmrg .h = *array_pitch_el_rows * (phys_level0_sa->array_len - 1) + 101001e04c3fSmrg isl_assert_div(phys_slice0_sa.h, fmtl->bh), 101101e04c3fSmrg }; 101201e04c3fSmrg} 101301e04c3fSmrg 101401e04c3fSmrg/** 101501e04c3fSmrg * A variant of isl_calc_phys_slice0_extent_sa() specific to 101601e04c3fSmrg * ISL_DIM_LAYOUT_GEN4_3D. 101701e04c3fSmrg */ 101801e04c3fSmrgstatic void 101901e04c3fSmrgisl_calc_phys_total_extent_el_gen4_3d( 102001e04c3fSmrg const struct isl_device *dev, 102101e04c3fSmrg const struct isl_surf_init_info *restrict info, 102201e04c3fSmrg const struct isl_extent3d *image_align_sa, 102301e04c3fSmrg const struct isl_extent4d *phys_level0_sa, 102401e04c3fSmrg uint32_t *array_pitch_el_rows, 102501e04c3fSmrg struct isl_extent2d *phys_total_el) 102601e04c3fSmrg{ 102701e04c3fSmrg const struct isl_format_layout *fmtl = isl_format_get_layout(info->format); 102801e04c3fSmrg 102901e04c3fSmrg assert(info->samples == 1); 103001e04c3fSmrg 103101e04c3fSmrg if (info->dim != ISL_SURF_DIM_3D) { 103201e04c3fSmrg /* From the G45 PRM Vol. 1a, "6.17.4.1 Hardware Cube Map Layout": 103301e04c3fSmrg * 103401e04c3fSmrg * The cube face textures are stored in the same way as 3D surfaces 103501e04c3fSmrg * are stored (see section 6.17.5 for details). For cube surfaces, 103601e04c3fSmrg * however, the depth is equal to the number of faces (always 6) and 103701e04c3fSmrg * is not reduced for each MIP. 103801e04c3fSmrg */ 103901e04c3fSmrg assert(ISL_DEV_GEN(dev) == 4); 104001e04c3fSmrg assert(info->usage & ISL_SURF_USAGE_CUBE_BIT); 104101e04c3fSmrg assert(phys_level0_sa->array_len == 6); 104201e04c3fSmrg } else { 104301e04c3fSmrg assert(phys_level0_sa->array_len == 1); 104401e04c3fSmrg } 104501e04c3fSmrg 104601e04c3fSmrg uint32_t total_w = 0; 104701e04c3fSmrg uint32_t total_h = 0; 104801e04c3fSmrg 104901e04c3fSmrg uint32_t W0 = phys_level0_sa->w; 105001e04c3fSmrg uint32_t H0 = phys_level0_sa->h; 105101e04c3fSmrg uint32_t D0 = phys_level0_sa->d; 105201e04c3fSmrg uint32_t A0 = phys_level0_sa->a; 105301e04c3fSmrg 105401e04c3fSmrg for (uint32_t l = 0; l < info->levels; ++l) { 105501e04c3fSmrg uint32_t level_w = isl_align_npot(isl_minify(W0, l), image_align_sa->w); 105601e04c3fSmrg uint32_t level_h = isl_align_npot(isl_minify(H0, l), image_align_sa->h); 105701e04c3fSmrg uint32_t level_d = info->dim == ISL_SURF_DIM_3D ? isl_minify(D0, l) : A0; 105801e04c3fSmrg 105901e04c3fSmrg uint32_t max_layers_horiz = MIN(level_d, 1u << l); 106001e04c3fSmrg uint32_t max_layers_vert = isl_align(level_d, 1u << l) / (1u << l); 106101e04c3fSmrg 106201e04c3fSmrg total_w = MAX(total_w, level_w * max_layers_horiz); 106301e04c3fSmrg total_h += level_h * max_layers_vert; 106401e04c3fSmrg } 106501e04c3fSmrg 106601e04c3fSmrg /* GEN4_3D layouts don't really have an array pitch since each LOD has a 106701e04c3fSmrg * different number of horizontal and vertical layers. We have to set it 106801e04c3fSmrg * to something, so at least make it true for LOD0. 106901e04c3fSmrg */ 107001e04c3fSmrg *array_pitch_el_rows = 107101e04c3fSmrg isl_align_npot(phys_level0_sa->h, image_align_sa->h) / fmtl->bw; 107201e04c3fSmrg *phys_total_el = (struct isl_extent2d) { 107301e04c3fSmrg .w = isl_assert_div(total_w, fmtl->bw), 107401e04c3fSmrg .h = isl_assert_div(total_h, fmtl->bh), 107501e04c3fSmrg }; 107601e04c3fSmrg} 107701e04c3fSmrg 107801e04c3fSmrg/** 107901e04c3fSmrg * A variant of isl_calc_phys_slice0_extent_sa() specific to 108001e04c3fSmrg * ISL_DIM_LAYOUT_GEN6_STENCIL_HIZ. 108101e04c3fSmrg */ 108201e04c3fSmrgstatic void 108301e04c3fSmrgisl_calc_phys_total_extent_el_gen6_stencil_hiz( 108401e04c3fSmrg const struct isl_device *dev, 108501e04c3fSmrg const struct isl_surf_init_info *restrict info, 108601e04c3fSmrg const struct isl_tile_info *tile_info, 108701e04c3fSmrg const struct isl_extent3d *image_align_sa, 108801e04c3fSmrg const struct isl_extent4d *phys_level0_sa, 108901e04c3fSmrg uint32_t *array_pitch_el_rows, 109001e04c3fSmrg struct isl_extent2d *phys_total_el) 109101e04c3fSmrg{ 109201e04c3fSmrg const struct isl_format_layout *fmtl = isl_format_get_layout(info->format); 109301e04c3fSmrg 109401e04c3fSmrg const struct isl_extent2d tile_extent_sa = { 109501e04c3fSmrg .w = tile_info->logical_extent_el.w * fmtl->bw, 109601e04c3fSmrg .h = tile_info->logical_extent_el.h * fmtl->bh, 109701e04c3fSmrg }; 109801e04c3fSmrg /* Tile size is a multiple of image alignment */ 109901e04c3fSmrg assert(tile_extent_sa.w % image_align_sa->w == 0); 110001e04c3fSmrg assert(tile_extent_sa.h % image_align_sa->h == 0); 110101e04c3fSmrg 110201e04c3fSmrg const uint32_t W0 = phys_level0_sa->w; 110301e04c3fSmrg const uint32_t H0 = phys_level0_sa->h; 110401e04c3fSmrg 110501e04c3fSmrg /* Each image has the same height as LOD0 because the hardware thinks 110601e04c3fSmrg * everything is LOD0 110701e04c3fSmrg */ 110801e04c3fSmrg const uint32_t H = isl_align(H0, image_align_sa->h) * phys_level0_sa->a; 110901e04c3fSmrg 111001e04c3fSmrg uint32_t total_top_w = 0; 111101e04c3fSmrg uint32_t total_bottom_w = 0; 111201e04c3fSmrg uint32_t total_h = 0; 111301e04c3fSmrg 111401e04c3fSmrg for (uint32_t l = 0; l < info->levels; ++l) { 111501e04c3fSmrg const uint32_t W = isl_minify(W0, l); 111601e04c3fSmrg 111701e04c3fSmrg const uint32_t w = isl_align(W, tile_extent_sa.w); 111801e04c3fSmrg const uint32_t h = isl_align(H, tile_extent_sa.h); 111901e04c3fSmrg 112001e04c3fSmrg if (l == 0) { 112101e04c3fSmrg total_top_w = w; 112201e04c3fSmrg total_h = h; 112301e04c3fSmrg } else if (l == 1) { 112401e04c3fSmrg total_bottom_w = w; 112501e04c3fSmrg total_h += h; 112601e04c3fSmrg } else { 112701e04c3fSmrg total_bottom_w += w; 112801e04c3fSmrg } 112901e04c3fSmrg } 113001e04c3fSmrg 113101e04c3fSmrg *array_pitch_el_rows = 113201e04c3fSmrg isl_assert_div(isl_align(H0, image_align_sa->h), fmtl->bh); 113301e04c3fSmrg *phys_total_el = (struct isl_extent2d) { 113401e04c3fSmrg .w = isl_assert_div(MAX(total_top_w, total_bottom_w), fmtl->bw), 113501e04c3fSmrg .h = isl_assert_div(total_h, fmtl->bh), 113601e04c3fSmrg }; 113701e04c3fSmrg} 113801e04c3fSmrg 113901e04c3fSmrg/** 114001e04c3fSmrg * A variant of isl_calc_phys_slice0_extent_sa() specific to 114101e04c3fSmrg * ISL_DIM_LAYOUT_GEN9_1D. 114201e04c3fSmrg */ 114301e04c3fSmrgstatic void 114401e04c3fSmrgisl_calc_phys_total_extent_el_gen9_1d( 114501e04c3fSmrg const struct isl_device *dev, 114601e04c3fSmrg const struct isl_surf_init_info *restrict info, 114701e04c3fSmrg const struct isl_extent3d *image_align_sa, 114801e04c3fSmrg const struct isl_extent4d *phys_level0_sa, 114901e04c3fSmrg uint32_t *array_pitch_el_rows, 115001e04c3fSmrg struct isl_extent2d *phys_total_el) 115101e04c3fSmrg{ 115201e04c3fSmrg MAYBE_UNUSED const struct isl_format_layout *fmtl = isl_format_get_layout(info->format); 115301e04c3fSmrg 115401e04c3fSmrg assert(phys_level0_sa->height / fmtl->bh == 1); 115501e04c3fSmrg assert(phys_level0_sa->depth == 1); 115601e04c3fSmrg assert(info->samples == 1); 115701e04c3fSmrg assert(image_align_sa->w >= fmtl->bw); 115801e04c3fSmrg 115901e04c3fSmrg uint32_t slice_w = 0; 116001e04c3fSmrg const uint32_t W0 = phys_level0_sa->w; 116101e04c3fSmrg 116201e04c3fSmrg for (uint32_t l = 0; l < info->levels; ++l) { 116301e04c3fSmrg uint32_t W = isl_minify(W0, l); 116401e04c3fSmrg uint32_t w = isl_align_npot(W, image_align_sa->w); 116501e04c3fSmrg 116601e04c3fSmrg slice_w += w; 116701e04c3fSmrg } 116801e04c3fSmrg 116901e04c3fSmrg *array_pitch_el_rows = 1; 117001e04c3fSmrg *phys_total_el = (struct isl_extent2d) { 117101e04c3fSmrg .w = isl_assert_div(slice_w, fmtl->bw), 117201e04c3fSmrg .h = phys_level0_sa->array_len, 117301e04c3fSmrg }; 117401e04c3fSmrg} 117501e04c3fSmrg 117601e04c3fSmrg/** 117701e04c3fSmrg * Calculate the two-dimensional total physical extent of the surface, in 117801e04c3fSmrg * units of surface elements. 117901e04c3fSmrg */ 118001e04c3fSmrgstatic void 118101e04c3fSmrgisl_calc_phys_total_extent_el(const struct isl_device *dev, 118201e04c3fSmrg const struct isl_surf_init_info *restrict info, 118301e04c3fSmrg const struct isl_tile_info *tile_info, 118401e04c3fSmrg enum isl_dim_layout dim_layout, 118501e04c3fSmrg enum isl_msaa_layout msaa_layout, 118601e04c3fSmrg const struct isl_extent3d *image_align_sa, 118701e04c3fSmrg const struct isl_extent4d *phys_level0_sa, 118801e04c3fSmrg enum isl_array_pitch_span array_pitch_span, 118901e04c3fSmrg uint32_t *array_pitch_el_rows, 119001e04c3fSmrg struct isl_extent2d *total_extent_el) 119101e04c3fSmrg{ 119201e04c3fSmrg switch (dim_layout) { 119301e04c3fSmrg case ISL_DIM_LAYOUT_GEN9_1D: 119401e04c3fSmrg assert(array_pitch_span == ISL_ARRAY_PITCH_SPAN_COMPACT); 119501e04c3fSmrg isl_calc_phys_total_extent_el_gen9_1d(dev, info, 119601e04c3fSmrg image_align_sa, phys_level0_sa, 119701e04c3fSmrg array_pitch_el_rows, 119801e04c3fSmrg total_extent_el); 119901e04c3fSmrg return; 120001e04c3fSmrg case ISL_DIM_LAYOUT_GEN4_2D: 120101e04c3fSmrg isl_calc_phys_total_extent_el_gen4_2d(dev, info, tile_info, msaa_layout, 120201e04c3fSmrg image_align_sa, phys_level0_sa, 120301e04c3fSmrg array_pitch_span, 120401e04c3fSmrg array_pitch_el_rows, 120501e04c3fSmrg total_extent_el); 120601e04c3fSmrg return; 120701e04c3fSmrg case ISL_DIM_LAYOUT_GEN6_STENCIL_HIZ: 120801e04c3fSmrg assert(array_pitch_span == ISL_ARRAY_PITCH_SPAN_COMPACT); 120901e04c3fSmrg isl_calc_phys_total_extent_el_gen6_stencil_hiz(dev, info, tile_info, 121001e04c3fSmrg image_align_sa, 121101e04c3fSmrg phys_level0_sa, 121201e04c3fSmrg array_pitch_el_rows, 121301e04c3fSmrg total_extent_el); 121401e04c3fSmrg return; 121501e04c3fSmrg case ISL_DIM_LAYOUT_GEN4_3D: 121601e04c3fSmrg assert(array_pitch_span == ISL_ARRAY_PITCH_SPAN_COMPACT); 121701e04c3fSmrg isl_calc_phys_total_extent_el_gen4_3d(dev, info, 121801e04c3fSmrg image_align_sa, phys_level0_sa, 121901e04c3fSmrg array_pitch_el_rows, 122001e04c3fSmrg total_extent_el); 122101e04c3fSmrg return; 122201e04c3fSmrg } 122301e04c3fSmrg} 122401e04c3fSmrg 122501e04c3fSmrgstatic uint32_t 122601e04c3fSmrgisl_calc_row_pitch_alignment(const struct isl_surf_init_info *surf_info, 122701e04c3fSmrg const struct isl_tile_info *tile_info) 122801e04c3fSmrg{ 122901e04c3fSmrg if (tile_info->tiling != ISL_TILING_LINEAR) 123001e04c3fSmrg return tile_info->phys_extent_B.width; 123101e04c3fSmrg 123201e04c3fSmrg /* From the Broadwel PRM >> Volume 2d: Command Reference: Structures >> 123301e04c3fSmrg * RENDER_SURFACE_STATE Surface Pitch (p349): 123401e04c3fSmrg * 123501e04c3fSmrg * - For linear render target surfaces and surfaces accessed with the 123601e04c3fSmrg * typed data port messages, the pitch must be a multiple of the 123701e04c3fSmrg * element size for non-YUV surface formats. Pitch must be 123801e04c3fSmrg * a multiple of 2 * element size for YUV surface formats. 123901e04c3fSmrg * 124001e04c3fSmrg * - [Requirements for SURFTYPE_BUFFER and SURFTYPE_STRBUF, which we 124101e04c3fSmrg * ignore because isl doesn't do buffers.] 124201e04c3fSmrg * 124301e04c3fSmrg * - For other linear surfaces, the pitch can be any multiple of 124401e04c3fSmrg * bytes. 124501e04c3fSmrg */ 124601e04c3fSmrg const struct isl_format_layout *fmtl = isl_format_get_layout(surf_info->format); 124701e04c3fSmrg const uint32_t bs = fmtl->bpb / 8; 124801e04c3fSmrg 124901e04c3fSmrg if (surf_info->usage & ISL_SURF_USAGE_RENDER_TARGET_BIT) { 125001e04c3fSmrg if (isl_format_is_yuv(surf_info->format)) { 125101e04c3fSmrg return 2 * bs; 125201e04c3fSmrg } else { 125301e04c3fSmrg return bs; 125401e04c3fSmrg } 125501e04c3fSmrg } 125601e04c3fSmrg 125701e04c3fSmrg return 1; 125801e04c3fSmrg} 125901e04c3fSmrg 126001e04c3fSmrgstatic uint32_t 126101e04c3fSmrgisl_calc_linear_min_row_pitch(const struct isl_device *dev, 126201e04c3fSmrg const struct isl_surf_init_info *info, 126301e04c3fSmrg const struct isl_extent2d *phys_total_el, 126401e04c3fSmrg uint32_t alignment_B) 126501e04c3fSmrg{ 126601e04c3fSmrg const struct isl_format_layout *fmtl = isl_format_get_layout(info->format); 126701e04c3fSmrg const uint32_t bs = fmtl->bpb / 8; 126801e04c3fSmrg 126901e04c3fSmrg return isl_align_npot(bs * phys_total_el->w, alignment_B); 127001e04c3fSmrg} 127101e04c3fSmrg 127201e04c3fSmrgstatic uint32_t 127301e04c3fSmrgisl_calc_tiled_min_row_pitch(const struct isl_device *dev, 127401e04c3fSmrg const struct isl_surf_init_info *surf_info, 127501e04c3fSmrg const struct isl_tile_info *tile_info, 127601e04c3fSmrg const struct isl_extent2d *phys_total_el, 127701e04c3fSmrg uint32_t alignment_B) 127801e04c3fSmrg{ 127901e04c3fSmrg const struct isl_format_layout *fmtl = isl_format_get_layout(surf_info->format); 128001e04c3fSmrg 128101e04c3fSmrg assert(fmtl->bpb % tile_info->format_bpb == 0); 128201e04c3fSmrg 128301e04c3fSmrg const uint32_t tile_el_scale = fmtl->bpb / tile_info->format_bpb; 128401e04c3fSmrg const uint32_t total_w_tl = 128501e04c3fSmrg isl_align_div(phys_total_el->w * tile_el_scale, 128601e04c3fSmrg tile_info->logical_extent_el.width); 128701e04c3fSmrg 128801e04c3fSmrg assert(alignment_B == tile_info->phys_extent_B.width); 128901e04c3fSmrg return total_w_tl * tile_info->phys_extent_B.width; 129001e04c3fSmrg} 129101e04c3fSmrg 129201e04c3fSmrgstatic uint32_t 129301e04c3fSmrgisl_calc_min_row_pitch(const struct isl_device *dev, 129401e04c3fSmrg const struct isl_surf_init_info *surf_info, 129501e04c3fSmrg const struct isl_tile_info *tile_info, 129601e04c3fSmrg const struct isl_extent2d *phys_total_el, 129701e04c3fSmrg uint32_t alignment_B) 129801e04c3fSmrg{ 129901e04c3fSmrg if (tile_info->tiling == ISL_TILING_LINEAR) { 130001e04c3fSmrg return isl_calc_linear_min_row_pitch(dev, surf_info, phys_total_el, 130101e04c3fSmrg alignment_B); 130201e04c3fSmrg } else { 130301e04c3fSmrg return isl_calc_tiled_min_row_pitch(dev, surf_info, tile_info, 130401e04c3fSmrg phys_total_el, alignment_B); 130501e04c3fSmrg } 130601e04c3fSmrg} 130701e04c3fSmrg 130801e04c3fSmrg/** 130901e04c3fSmrg * Is `pitch` in the valid range for a hardware bitfield, if the bitfield's 131001e04c3fSmrg * size is `bits` bits? 131101e04c3fSmrg * 131201e04c3fSmrg * Hardware pitch fields are offset by 1. For example, if the size of 131301e04c3fSmrg * RENDER_SURFACE_STATE::SurfacePitch is B bits, then the range of valid 131401e04c3fSmrg * pitches is [1, 2^b] inclusive. If the surface pitch is N, then 131501e04c3fSmrg * RENDER_SURFACE_STATE::SurfacePitch must be set to N-1. 131601e04c3fSmrg */ 131701e04c3fSmrgstatic bool 131801e04c3fSmrgpitch_in_range(uint32_t n, uint32_t bits) 131901e04c3fSmrg{ 132001e04c3fSmrg assert(n != 0); 132101e04c3fSmrg return likely(bits != 0 && 1 <= n && n <= (1 << bits)); 132201e04c3fSmrg} 132301e04c3fSmrg 132401e04c3fSmrgstatic bool 132501e04c3fSmrgisl_calc_row_pitch(const struct isl_device *dev, 132601e04c3fSmrg const struct isl_surf_init_info *surf_info, 132701e04c3fSmrg const struct isl_tile_info *tile_info, 132801e04c3fSmrg enum isl_dim_layout dim_layout, 132901e04c3fSmrg const struct isl_extent2d *phys_total_el, 133001e04c3fSmrg uint32_t *out_row_pitch_B) 133101e04c3fSmrg{ 133201e04c3fSmrg uint32_t alignment_B = 133301e04c3fSmrg isl_calc_row_pitch_alignment(surf_info, tile_info); 133401e04c3fSmrg 133501e04c3fSmrg /* If pitch isn't given and it can be chosen freely, align it by cache line 133601e04c3fSmrg * allowing one to use blit engine on the surface. 133701e04c3fSmrg */ 133801e04c3fSmrg if (surf_info->row_pitch_B == 0 && tile_info->tiling == ISL_TILING_LINEAR) { 133901e04c3fSmrg /* From the Broadwell PRM docs for XY_SRC_COPY_BLT::SourceBaseAddress: 134001e04c3fSmrg * 134101e04c3fSmrg * "Base address of the destination surface: X=0, Y=0. Lower 32bits 134201e04c3fSmrg * of the 48bit addressing. When Src Tiling is enabled (Bit_15 134301e04c3fSmrg * enabled), this address must be 4KB-aligned. When Tiling is not 134401e04c3fSmrg * enabled, this address should be CL (64byte) aligned." 134501e04c3fSmrg */ 134601e04c3fSmrg alignment_B = MAX2(alignment_B, 64); 134701e04c3fSmrg } 134801e04c3fSmrg 134901e04c3fSmrg const uint32_t min_row_pitch_B = 135001e04c3fSmrg isl_calc_min_row_pitch(dev, surf_info, tile_info, phys_total_el, 135101e04c3fSmrg alignment_B); 135201e04c3fSmrg 135301e04c3fSmrg uint32_t row_pitch_B = min_row_pitch_B; 135401e04c3fSmrg 135501e04c3fSmrg if (surf_info->row_pitch_B != 0) { 135601e04c3fSmrg row_pitch_B = surf_info->row_pitch_B; 135701e04c3fSmrg 135801e04c3fSmrg if (row_pitch_B < min_row_pitch_B) 135901e04c3fSmrg return false; 136001e04c3fSmrg 136101e04c3fSmrg if (row_pitch_B % alignment_B != 0) 136201e04c3fSmrg return false; 136301e04c3fSmrg } 136401e04c3fSmrg 136501e04c3fSmrg const uint32_t row_pitch_tl = row_pitch_B / tile_info->phys_extent_B.width; 136601e04c3fSmrg 136701e04c3fSmrg if (row_pitch_B == 0) 136801e04c3fSmrg return false; 136901e04c3fSmrg 137001e04c3fSmrg if (dim_layout == ISL_DIM_LAYOUT_GEN9_1D) { 137101e04c3fSmrg /* SurfacePitch is ignored for this layout. */ 137201e04c3fSmrg goto done; 137301e04c3fSmrg } 137401e04c3fSmrg 137501e04c3fSmrg if ((surf_info->usage & (ISL_SURF_USAGE_RENDER_TARGET_BIT | 137601e04c3fSmrg ISL_SURF_USAGE_TEXTURE_BIT | 137701e04c3fSmrg ISL_SURF_USAGE_STORAGE_BIT)) && 137801e04c3fSmrg !pitch_in_range(row_pitch_B, RENDER_SURFACE_STATE_SurfacePitch_bits(dev->info))) 137901e04c3fSmrg return false; 138001e04c3fSmrg 138101e04c3fSmrg if ((surf_info->usage & (ISL_SURF_USAGE_CCS_BIT | 138201e04c3fSmrg ISL_SURF_USAGE_MCS_BIT)) && 138301e04c3fSmrg !pitch_in_range(row_pitch_tl, RENDER_SURFACE_STATE_AuxiliarySurfacePitch_bits(dev->info))) 138401e04c3fSmrg return false; 138501e04c3fSmrg 138601e04c3fSmrg if ((surf_info->usage & ISL_SURF_USAGE_DEPTH_BIT) && 138701e04c3fSmrg !pitch_in_range(row_pitch_B, _3DSTATE_DEPTH_BUFFER_SurfacePitch_bits(dev->info))) 138801e04c3fSmrg return false; 138901e04c3fSmrg 139001e04c3fSmrg if ((surf_info->usage & ISL_SURF_USAGE_HIZ_BIT) && 139101e04c3fSmrg !pitch_in_range(row_pitch_B, _3DSTATE_HIER_DEPTH_BUFFER_SurfacePitch_bits(dev->info))) 139201e04c3fSmrg return false; 139301e04c3fSmrg 139401e04c3fSmrg const uint32_t stencil_pitch_bits = dev->use_separate_stencil ? 139501e04c3fSmrg _3DSTATE_STENCIL_BUFFER_SurfacePitch_bits(dev->info) : 139601e04c3fSmrg _3DSTATE_DEPTH_BUFFER_SurfacePitch_bits(dev->info); 139701e04c3fSmrg 139801e04c3fSmrg if ((surf_info->usage & ISL_SURF_USAGE_STENCIL_BIT) && 139901e04c3fSmrg !pitch_in_range(row_pitch_B, stencil_pitch_bits)) 140001e04c3fSmrg return false; 140101e04c3fSmrg 140201e04c3fSmrg done: 140301e04c3fSmrg *out_row_pitch_B = row_pitch_B; 140401e04c3fSmrg return true; 140501e04c3fSmrg} 140601e04c3fSmrg 140701e04c3fSmrgbool 140801e04c3fSmrgisl_surf_init_s(const struct isl_device *dev, 140901e04c3fSmrg struct isl_surf *surf, 141001e04c3fSmrg const struct isl_surf_init_info *restrict info) 141101e04c3fSmrg{ 141201e04c3fSmrg const struct isl_format_layout *fmtl = isl_format_get_layout(info->format); 141301e04c3fSmrg 141401e04c3fSmrg const struct isl_extent4d logical_level0_px = { 141501e04c3fSmrg .w = info->width, 141601e04c3fSmrg .h = info->height, 141701e04c3fSmrg .d = info->depth, 141801e04c3fSmrg .a = info->array_len, 141901e04c3fSmrg }; 142001e04c3fSmrg 142101e04c3fSmrg enum isl_tiling tiling; 142201e04c3fSmrg if (!isl_surf_choose_tiling(dev, info, &tiling)) 142301e04c3fSmrg return false; 142401e04c3fSmrg 142501e04c3fSmrg struct isl_tile_info tile_info; 142601e04c3fSmrg isl_tiling_get_info(tiling, fmtl->bpb, &tile_info); 142701e04c3fSmrg 142801e04c3fSmrg const enum isl_dim_layout dim_layout = 142901e04c3fSmrg isl_surf_choose_dim_layout(dev, info->dim, tiling, info->usage); 143001e04c3fSmrg 143101e04c3fSmrg enum isl_msaa_layout msaa_layout; 143201e04c3fSmrg if (!isl_choose_msaa_layout(dev, info, tiling, &msaa_layout)) 143301e04c3fSmrg return false; 143401e04c3fSmrg 143501e04c3fSmrg struct isl_extent3d image_align_el; 143601e04c3fSmrg isl_choose_image_alignment_el(dev, info, tiling, dim_layout, msaa_layout, 143701e04c3fSmrg &image_align_el); 143801e04c3fSmrg 143901e04c3fSmrg struct isl_extent3d image_align_sa = 144001e04c3fSmrg isl_extent3d_el_to_sa(info->format, image_align_el); 144101e04c3fSmrg 144201e04c3fSmrg struct isl_extent4d phys_level0_sa; 144301e04c3fSmrg isl_calc_phys_level0_extent_sa(dev, info, dim_layout, tiling, msaa_layout, 144401e04c3fSmrg &phys_level0_sa); 144501e04c3fSmrg assert(phys_level0_sa.w % fmtl->bw == 0); 144601e04c3fSmrg assert(phys_level0_sa.h % fmtl->bh == 0); 144701e04c3fSmrg 144801e04c3fSmrg enum isl_array_pitch_span array_pitch_span = 144901e04c3fSmrg isl_choose_array_pitch_span(dev, info, dim_layout, &phys_level0_sa); 145001e04c3fSmrg 145101e04c3fSmrg uint32_t array_pitch_el_rows; 145201e04c3fSmrg struct isl_extent2d phys_total_el; 145301e04c3fSmrg isl_calc_phys_total_extent_el(dev, info, &tile_info, 145401e04c3fSmrg dim_layout, msaa_layout, 145501e04c3fSmrg &image_align_sa, &phys_level0_sa, 145601e04c3fSmrg array_pitch_span, &array_pitch_el_rows, 145701e04c3fSmrg &phys_total_el); 145801e04c3fSmrg 145901e04c3fSmrg uint32_t row_pitch_B; 146001e04c3fSmrg if (!isl_calc_row_pitch(dev, info, &tile_info, dim_layout, 146101e04c3fSmrg &phys_total_el, &row_pitch_B)) 146201e04c3fSmrg return false; 146301e04c3fSmrg 146401e04c3fSmrg uint32_t base_alignment_B; 146501e04c3fSmrg uint64_t size_B; 146601e04c3fSmrg if (tiling == ISL_TILING_LINEAR) { 146701e04c3fSmrg size_B = (uint64_t) row_pitch_B * phys_total_el.h; 146801e04c3fSmrg 146901e04c3fSmrg /* From the Broadwell PRM Vol 2d, RENDER_SURFACE_STATE::SurfaceBaseAddress: 147001e04c3fSmrg * 147101e04c3fSmrg * "The Base Address for linear render target surfaces and surfaces 147201e04c3fSmrg * accessed with the typed surface read/write data port messages must 147301e04c3fSmrg * be element-size aligned, for non-YUV surface formats, or a 147401e04c3fSmrg * multiple of 2 element-sizes for YUV surface formats. Other linear 147501e04c3fSmrg * surfaces have no alignment requirements (byte alignment is 147601e04c3fSmrg * sufficient.)" 147701e04c3fSmrg */ 147801e04c3fSmrg base_alignment_B = MAX(1, info->min_alignment_B); 147901e04c3fSmrg if (info->usage & ISL_SURF_USAGE_RENDER_TARGET_BIT) { 148001e04c3fSmrg if (isl_format_is_yuv(info->format)) { 148101e04c3fSmrg base_alignment_B = MAX(base_alignment_B, fmtl->bpb / 4); 148201e04c3fSmrg } else { 148301e04c3fSmrg base_alignment_B = MAX(base_alignment_B, fmtl->bpb / 8); 148401e04c3fSmrg } 148501e04c3fSmrg } 148601e04c3fSmrg base_alignment_B = isl_round_up_to_power_of_two(base_alignment_B); 148701e04c3fSmrg } else { 148801e04c3fSmrg const uint32_t total_h_tl = 148901e04c3fSmrg isl_align_div(phys_total_el.h, tile_info.logical_extent_el.height); 149001e04c3fSmrg 149101e04c3fSmrg size_B = (uint64_t) total_h_tl * tile_info.phys_extent_B.height * row_pitch_B; 149201e04c3fSmrg 149301e04c3fSmrg const uint32_t tile_size_B = tile_info.phys_extent_B.width * 149401e04c3fSmrg tile_info.phys_extent_B.height; 149501e04c3fSmrg assert(isl_is_pow2(info->min_alignment_B) && isl_is_pow2(tile_size_B)); 149601e04c3fSmrg base_alignment_B = MAX(info->min_alignment_B, tile_size_B); 149701e04c3fSmrg } 149801e04c3fSmrg 149901e04c3fSmrg if (ISL_DEV_GEN(dev) < 9) { 150001e04c3fSmrg /* From the Broadwell PRM Vol 5, Surface Layout: 150101e04c3fSmrg * 150201e04c3fSmrg * "In addition to restrictions on maximum height, width, and depth, 150301e04c3fSmrg * surfaces are also restricted to a maximum size in bytes. This 150401e04c3fSmrg * maximum is 2 GB for all products and all surface types." 150501e04c3fSmrg * 150601e04c3fSmrg * This comment is applicable to all Pre-gen9 platforms. 150701e04c3fSmrg */ 150801e04c3fSmrg if (size_B > (uint64_t) 1 << 31) 150901e04c3fSmrg return false; 151001e04c3fSmrg } else if (ISL_DEV_GEN(dev) < 11) { 151101e04c3fSmrg /* From the Skylake PRM Vol 5, Maximum Surface Size in Bytes: 151201e04c3fSmrg * "In addition to restrictions on maximum height, width, and depth, 151301e04c3fSmrg * surfaces are also restricted to a maximum size of 2^38 bytes. 151401e04c3fSmrg * All pixels within the surface must be contained within 2^38 bytes 151501e04c3fSmrg * of the base address." 151601e04c3fSmrg */ 151701e04c3fSmrg if (size_B > (uint64_t) 1 << 38) 151801e04c3fSmrg return false; 151901e04c3fSmrg } else { 152001e04c3fSmrg /* gen11+ platforms raised this limit to 2^44 bytes. */ 152101e04c3fSmrg if (size_B > (uint64_t) 1 << 44) 152201e04c3fSmrg return false; 152301e04c3fSmrg } 152401e04c3fSmrg 152501e04c3fSmrg *surf = (struct isl_surf) { 152601e04c3fSmrg .dim = info->dim, 152701e04c3fSmrg .dim_layout = dim_layout, 152801e04c3fSmrg .msaa_layout = msaa_layout, 152901e04c3fSmrg .tiling = tiling, 153001e04c3fSmrg .format = info->format, 153101e04c3fSmrg 153201e04c3fSmrg .levels = info->levels, 153301e04c3fSmrg .samples = info->samples, 153401e04c3fSmrg 153501e04c3fSmrg .image_alignment_el = image_align_el, 153601e04c3fSmrg .logical_level0_px = logical_level0_px, 153701e04c3fSmrg .phys_level0_sa = phys_level0_sa, 153801e04c3fSmrg 153901e04c3fSmrg .size_B = size_B, 154001e04c3fSmrg .alignment_B = base_alignment_B, 154101e04c3fSmrg .row_pitch_B = row_pitch_B, 154201e04c3fSmrg .array_pitch_el_rows = array_pitch_el_rows, 154301e04c3fSmrg .array_pitch_span = array_pitch_span, 154401e04c3fSmrg 154501e04c3fSmrg .usage = info->usage, 154601e04c3fSmrg }; 154701e04c3fSmrg 154801e04c3fSmrg return true; 154901e04c3fSmrg} 155001e04c3fSmrg 155101e04c3fSmrgvoid 155201e04c3fSmrgisl_surf_get_tile_info(const struct isl_surf *surf, 155301e04c3fSmrg struct isl_tile_info *tile_info) 155401e04c3fSmrg{ 155501e04c3fSmrg const struct isl_format_layout *fmtl = isl_format_get_layout(surf->format); 155601e04c3fSmrg isl_tiling_get_info(surf->tiling, fmtl->bpb, tile_info); 155701e04c3fSmrg} 155801e04c3fSmrg 155901e04c3fSmrgbool 156001e04c3fSmrgisl_surf_get_hiz_surf(const struct isl_device *dev, 156101e04c3fSmrg const struct isl_surf *surf, 156201e04c3fSmrg struct isl_surf *hiz_surf) 156301e04c3fSmrg{ 156401e04c3fSmrg assert(ISL_DEV_GEN(dev) >= 5 && ISL_DEV_USE_SEPARATE_STENCIL(dev)); 156501e04c3fSmrg 156601e04c3fSmrg /* Multisampled depth is always interleaved */ 156701e04c3fSmrg assert(surf->msaa_layout == ISL_MSAA_LAYOUT_NONE || 156801e04c3fSmrg surf->msaa_layout == ISL_MSAA_LAYOUT_INTERLEAVED); 156901e04c3fSmrg 157001e04c3fSmrg /* From the Broadwell PRM Vol. 7, "Hierarchical Depth Buffer": 157101e04c3fSmrg * 157201e04c3fSmrg * "The Surface Type, Height, Width, Depth, Minimum Array Element, Render 157301e04c3fSmrg * Target View Extent, and Depth Coordinate Offset X/Y of the 157401e04c3fSmrg * hierarchical depth buffer are inherited from the depth buffer. The 157501e04c3fSmrg * height and width of the hierarchical depth buffer that must be 157601e04c3fSmrg * allocated are computed by the following formulas, where HZ is the 157701e04c3fSmrg * hierarchical depth buffer and Z is the depth buffer. The Z_Height, 157801e04c3fSmrg * Z_Width, and Z_Depth values given in these formulas are those present 157901e04c3fSmrg * in 3DSTATE_DEPTH_BUFFER incremented by one. 158001e04c3fSmrg * 158101e04c3fSmrg * "The value of Z_Height and Z_Width must each be multiplied by 2 before 158201e04c3fSmrg * being applied to the table below if Number of Multisamples is set to 158301e04c3fSmrg * NUMSAMPLES_4. The value of Z_Height must be multiplied by 2 and 158401e04c3fSmrg * Z_Width must be multiplied by 4 before being applied to the table 158501e04c3fSmrg * below if Number of Multisamples is set to NUMSAMPLES_8." 158601e04c3fSmrg * 158701e04c3fSmrg * In the Sky Lake PRM, the second paragraph is replaced with this: 158801e04c3fSmrg * 158901e04c3fSmrg * "The Z_Height and Z_Width values must equal those present in 159001e04c3fSmrg * 3DSTATE_DEPTH_BUFFER incremented by one." 159101e04c3fSmrg * 159201e04c3fSmrg * In other words, on Sandy Bridge through Broadwell, each 128-bit HiZ 159301e04c3fSmrg * block corresponds to a region of 8x4 samples in the primary depth 159401e04c3fSmrg * surface. On Sky Lake, on the other hand, each HiZ block corresponds to 159501e04c3fSmrg * a region of 8x4 pixels in the primary depth surface regardless of the 159601e04c3fSmrg * number of samples. The dimensions of a HiZ block in both pixels and 159701e04c3fSmrg * samples are given in the table below: 159801e04c3fSmrg * 159901e04c3fSmrg * | SNB - BDW | SKL+ 160001e04c3fSmrg * ------+-----------+------------- 160101e04c3fSmrg * 1x | 8 x 4 sa | 8 x 4 sa 160201e04c3fSmrg * MSAA | 8 x 4 px | 8 x 4 px 160301e04c3fSmrg * ------+-----------+------------- 160401e04c3fSmrg * 2x | 8 x 4 sa | 16 x 4 sa 160501e04c3fSmrg * MSAA | 4 x 4 px | 8 x 4 px 160601e04c3fSmrg * ------+-----------+------------- 160701e04c3fSmrg * 4x | 8 x 4 sa | 16 x 8 sa 160801e04c3fSmrg * MSAA | 4 x 2 px | 8 x 4 px 160901e04c3fSmrg * ------+-----------+------------- 161001e04c3fSmrg * 8x | 8 x 4 sa | 32 x 8 sa 161101e04c3fSmrg * MSAA | 2 x 2 px | 8 x 4 px 161201e04c3fSmrg * ------+-----------+------------- 161301e04c3fSmrg * 16x | N/A | 32 x 16 sa 161401e04c3fSmrg * MSAA | N/A | 8 x 4 px 161501e04c3fSmrg * ------+-----------+------------- 161601e04c3fSmrg * 161701e04c3fSmrg * There are a number of different ways that this discrepency could be 161801e04c3fSmrg * handled. The way we have chosen is to simply make MSAA HiZ have the 161901e04c3fSmrg * same number of samples as the parent surface pre-Sky Lake and always be 162001e04c3fSmrg * single-sampled on Sky Lake and above. Since the block sizes of 162101e04c3fSmrg * compressed formats are given in samples, this neatly handles everything 162201e04c3fSmrg * without the need for additional HiZ formats with different block sizes 162301e04c3fSmrg * on SKL+. 162401e04c3fSmrg */ 162501e04c3fSmrg const unsigned samples = ISL_DEV_GEN(dev) >= 9 ? 1 : surf->samples; 162601e04c3fSmrg 162701e04c3fSmrg return isl_surf_init(dev, hiz_surf, 162801e04c3fSmrg .dim = surf->dim, 162901e04c3fSmrg .format = ISL_FORMAT_HIZ, 163001e04c3fSmrg .width = surf->logical_level0_px.width, 163101e04c3fSmrg .height = surf->logical_level0_px.height, 163201e04c3fSmrg .depth = surf->logical_level0_px.depth, 163301e04c3fSmrg .levels = surf->levels, 163401e04c3fSmrg .array_len = surf->logical_level0_px.array_len, 163501e04c3fSmrg .samples = samples, 163601e04c3fSmrg .usage = ISL_SURF_USAGE_HIZ_BIT, 163701e04c3fSmrg .tiling_flags = ISL_TILING_HIZ_BIT); 163801e04c3fSmrg} 163901e04c3fSmrg 164001e04c3fSmrgbool 164101e04c3fSmrgisl_surf_get_mcs_surf(const struct isl_device *dev, 164201e04c3fSmrg const struct isl_surf *surf, 164301e04c3fSmrg struct isl_surf *mcs_surf) 164401e04c3fSmrg{ 164501e04c3fSmrg assert(ISL_DEV_GEN(dev) >= 7); 164601e04c3fSmrg 164701e04c3fSmrg /* It must be multisampled with an array layout */ 164801e04c3fSmrg assert(surf->samples > 1 && surf->msaa_layout == ISL_MSAA_LAYOUT_ARRAY); 164901e04c3fSmrg 165001e04c3fSmrg /* The following are true of all multisampled surfaces */ 165101e04c3fSmrg assert(surf->dim == ISL_SURF_DIM_2D); 165201e04c3fSmrg assert(surf->levels == 1); 165301e04c3fSmrg assert(surf->logical_level0_px.depth == 1); 165401e04c3fSmrg 165501e04c3fSmrg /* The "Auxiliary Surface Pitch" field in RENDER_SURFACE_STATE is only 9 165601e04c3fSmrg * bits which means the maximum pitch of a compression surface is 512 165701e04c3fSmrg * tiles or 64KB (since MCS is always Y-tiled). Since a 16x MCS buffer is 165801e04c3fSmrg * 64bpp, this gives us a maximum width of 8192 pixels. We can create 165901e04c3fSmrg * larger multisampled surfaces, we just can't compress them. For 2x, 4x, 166001e04c3fSmrg * and 8x, we have enough room for the full 16k supported by the hardware. 166101e04c3fSmrg */ 166201e04c3fSmrg if (surf->samples == 16 && surf->logical_level0_px.width > 8192) 166301e04c3fSmrg return false; 166401e04c3fSmrg 166501e04c3fSmrg enum isl_format mcs_format; 166601e04c3fSmrg switch (surf->samples) { 166701e04c3fSmrg case 2: mcs_format = ISL_FORMAT_MCS_2X; break; 166801e04c3fSmrg case 4: mcs_format = ISL_FORMAT_MCS_4X; break; 166901e04c3fSmrg case 8: mcs_format = ISL_FORMAT_MCS_8X; break; 167001e04c3fSmrg case 16: mcs_format = ISL_FORMAT_MCS_16X; break; 167101e04c3fSmrg default: 167201e04c3fSmrg unreachable("Invalid sample count"); 167301e04c3fSmrg } 167401e04c3fSmrg 167501e04c3fSmrg return isl_surf_init(dev, mcs_surf, 167601e04c3fSmrg .dim = ISL_SURF_DIM_2D, 167701e04c3fSmrg .format = mcs_format, 167801e04c3fSmrg .width = surf->logical_level0_px.width, 167901e04c3fSmrg .height = surf->logical_level0_px.height, 168001e04c3fSmrg .depth = 1, 168101e04c3fSmrg .levels = 1, 168201e04c3fSmrg .array_len = surf->logical_level0_px.array_len, 168301e04c3fSmrg .samples = 1, /* MCS surfaces are really single-sampled */ 168401e04c3fSmrg .usage = ISL_SURF_USAGE_MCS_BIT, 168501e04c3fSmrg .tiling_flags = ISL_TILING_Y0_BIT); 168601e04c3fSmrg} 168701e04c3fSmrg 168801e04c3fSmrgbool 168901e04c3fSmrgisl_surf_get_ccs_surf(const struct isl_device *dev, 169001e04c3fSmrg const struct isl_surf *surf, 169101e04c3fSmrg struct isl_surf *ccs_surf, 169201e04c3fSmrg uint32_t row_pitch_B) 169301e04c3fSmrg{ 169401e04c3fSmrg assert(surf->samples == 1 && surf->msaa_layout == ISL_MSAA_LAYOUT_NONE); 169501e04c3fSmrg assert(ISL_DEV_GEN(dev) >= 7); 169601e04c3fSmrg 169701e04c3fSmrg if (surf->usage & ISL_SURF_USAGE_DISABLE_AUX_BIT) 169801e04c3fSmrg return false; 169901e04c3fSmrg 170001e04c3fSmrg /* The PRM doesn't say this explicitly, but fast-clears don't appear to 170101e04c3fSmrg * work for 3D textures until gen9 where the layout of 3D textures changes 170201e04c3fSmrg * to match 2D array textures. 170301e04c3fSmrg */ 170401e04c3fSmrg if (ISL_DEV_GEN(dev) <= 8 && surf->dim != ISL_SURF_DIM_2D) 170501e04c3fSmrg return false; 170601e04c3fSmrg 170701e04c3fSmrg /* From the HSW PRM Volume 7: 3D-Media-GPGPU, page 652 (Color Clear of 170801e04c3fSmrg * Non-MultiSampler Render Target Restrictions): 170901e04c3fSmrg * 171001e04c3fSmrg * "Support is for non-mip-mapped and non-array surface types only." 171101e04c3fSmrg * 171201e04c3fSmrg * This restriction is lifted on gen8+. Technically, it may be possible to 171301e04c3fSmrg * create a CCS for an arrayed or mipmapped image and only enable CCS_D 171401e04c3fSmrg * when rendering to the base slice. However, there is no documentation 171501e04c3fSmrg * tell us what the hardware would do in that case or what it does if you 171601e04c3fSmrg * walk off the bases slice. (Does it ignore CCS or does it start 171701e04c3fSmrg * scribbling over random memory?) We play it safe and just follow the 171801e04c3fSmrg * docs and don't allow CCS_D for arrayed or mip-mapped surfaces. 171901e04c3fSmrg */ 172001e04c3fSmrg if (ISL_DEV_GEN(dev) <= 7 && 172101e04c3fSmrg (surf->levels > 1 || surf->logical_level0_px.array_len > 1)) 172201e04c3fSmrg return false; 172301e04c3fSmrg 172401e04c3fSmrg if (isl_format_is_compressed(surf->format)) 172501e04c3fSmrg return false; 172601e04c3fSmrg 172701e04c3fSmrg /* TODO: More conditions where it can fail. */ 172801e04c3fSmrg 172901e04c3fSmrg enum isl_format ccs_format; 173001e04c3fSmrg if (ISL_DEV_GEN(dev) >= 9) { 173101e04c3fSmrg if (!isl_tiling_is_any_y(surf->tiling)) 173201e04c3fSmrg return false; 173301e04c3fSmrg 173401e04c3fSmrg switch (isl_format_get_layout(surf->format)->bpb) { 173501e04c3fSmrg case 32: ccs_format = ISL_FORMAT_GEN9_CCS_32BPP; break; 173601e04c3fSmrg case 64: ccs_format = ISL_FORMAT_GEN9_CCS_64BPP; break; 173701e04c3fSmrg case 128: ccs_format = ISL_FORMAT_GEN9_CCS_128BPP; break; 173801e04c3fSmrg default: 173901e04c3fSmrg return false; 174001e04c3fSmrg } 174101e04c3fSmrg } else if (surf->tiling == ISL_TILING_Y0) { 174201e04c3fSmrg switch (isl_format_get_layout(surf->format)->bpb) { 174301e04c3fSmrg case 32: ccs_format = ISL_FORMAT_GEN7_CCS_32BPP_Y; break; 174401e04c3fSmrg case 64: ccs_format = ISL_FORMAT_GEN7_CCS_64BPP_Y; break; 174501e04c3fSmrg case 128: ccs_format = ISL_FORMAT_GEN7_CCS_128BPP_Y; break; 174601e04c3fSmrg default: 174701e04c3fSmrg return false; 174801e04c3fSmrg } 174901e04c3fSmrg } else if (surf->tiling == ISL_TILING_X) { 175001e04c3fSmrg switch (isl_format_get_layout(surf->format)->bpb) { 175101e04c3fSmrg case 32: ccs_format = ISL_FORMAT_GEN7_CCS_32BPP_X; break; 175201e04c3fSmrg case 64: ccs_format = ISL_FORMAT_GEN7_CCS_64BPP_X; break; 175301e04c3fSmrg case 128: ccs_format = ISL_FORMAT_GEN7_CCS_128BPP_X; break; 175401e04c3fSmrg default: 175501e04c3fSmrg return false; 175601e04c3fSmrg } 175701e04c3fSmrg } else { 175801e04c3fSmrg return false; 175901e04c3fSmrg } 176001e04c3fSmrg 176101e04c3fSmrg return isl_surf_init(dev, ccs_surf, 176201e04c3fSmrg .dim = surf->dim, 176301e04c3fSmrg .format = ccs_format, 176401e04c3fSmrg .width = surf->logical_level0_px.width, 176501e04c3fSmrg .height = surf->logical_level0_px.height, 176601e04c3fSmrg .depth = surf->logical_level0_px.depth, 176701e04c3fSmrg .levels = surf->levels, 176801e04c3fSmrg .array_len = surf->logical_level0_px.array_len, 176901e04c3fSmrg .samples = 1, 177001e04c3fSmrg .row_pitch_B = row_pitch_B, 177101e04c3fSmrg .usage = ISL_SURF_USAGE_CCS_BIT, 177201e04c3fSmrg .tiling_flags = ISL_TILING_CCS_BIT); 177301e04c3fSmrg} 177401e04c3fSmrg 177501e04c3fSmrg#define isl_genX_call(dev, func, ...) \ 177601e04c3fSmrg switch (ISL_DEV_GEN(dev)) { \ 177701e04c3fSmrg case 4: \ 177801e04c3fSmrg /* G45 surface state is the same as gen5 */ \ 177901e04c3fSmrg if (ISL_DEV_IS_G4X(dev)) { \ 178001e04c3fSmrg isl_gen5_##func(__VA_ARGS__); \ 178101e04c3fSmrg } else { \ 178201e04c3fSmrg isl_gen4_##func(__VA_ARGS__); \ 178301e04c3fSmrg } \ 178401e04c3fSmrg break; \ 178501e04c3fSmrg case 5: \ 178601e04c3fSmrg isl_gen5_##func(__VA_ARGS__); \ 178701e04c3fSmrg break; \ 178801e04c3fSmrg case 6: \ 178901e04c3fSmrg isl_gen6_##func(__VA_ARGS__); \ 179001e04c3fSmrg break; \ 179101e04c3fSmrg case 7: \ 179201e04c3fSmrg if (ISL_DEV_IS_HASWELL(dev)) { \ 179301e04c3fSmrg isl_gen75_##func(__VA_ARGS__); \ 179401e04c3fSmrg } else { \ 179501e04c3fSmrg isl_gen7_##func(__VA_ARGS__); \ 179601e04c3fSmrg } \ 179701e04c3fSmrg break; \ 179801e04c3fSmrg case 8: \ 179901e04c3fSmrg isl_gen8_##func(__VA_ARGS__); \ 180001e04c3fSmrg break; \ 180101e04c3fSmrg case 9: \ 180201e04c3fSmrg isl_gen9_##func(__VA_ARGS__); \ 180301e04c3fSmrg break; \ 180401e04c3fSmrg case 10: \ 180501e04c3fSmrg isl_gen10_##func(__VA_ARGS__); \ 180601e04c3fSmrg break; \ 180701e04c3fSmrg case 11: \ 180801e04c3fSmrg isl_gen11_##func(__VA_ARGS__); \ 180901e04c3fSmrg break; \ 181001e04c3fSmrg default: \ 181101e04c3fSmrg assert(!"Unknown hardware generation"); \ 181201e04c3fSmrg } 181301e04c3fSmrg 181401e04c3fSmrgvoid 181501e04c3fSmrgisl_surf_fill_state_s(const struct isl_device *dev, void *state, 181601e04c3fSmrg const struct isl_surf_fill_state_info *restrict info) 181701e04c3fSmrg{ 181801e04c3fSmrg#ifndef NDEBUG 181901e04c3fSmrg isl_surf_usage_flags_t _base_usage = 182001e04c3fSmrg info->view->usage & (ISL_SURF_USAGE_RENDER_TARGET_BIT | 182101e04c3fSmrg ISL_SURF_USAGE_TEXTURE_BIT | 182201e04c3fSmrg ISL_SURF_USAGE_STORAGE_BIT); 182301e04c3fSmrg /* They may only specify one of the above bits at a time */ 182401e04c3fSmrg assert(__builtin_popcount(_base_usage) == 1); 182501e04c3fSmrg /* The only other allowed bit is ISL_SURF_USAGE_CUBE_BIT */ 182601e04c3fSmrg assert((info->view->usage & ~ISL_SURF_USAGE_CUBE_BIT) == _base_usage); 182701e04c3fSmrg#endif 182801e04c3fSmrg 182901e04c3fSmrg if (info->surf->dim == ISL_SURF_DIM_3D) { 183001e04c3fSmrg assert(info->view->base_array_layer + info->view->array_len <= 183101e04c3fSmrg info->surf->logical_level0_px.depth); 183201e04c3fSmrg } else { 183301e04c3fSmrg assert(info->view->base_array_layer + info->view->array_len <= 183401e04c3fSmrg info->surf->logical_level0_px.array_len); 183501e04c3fSmrg } 183601e04c3fSmrg 183701e04c3fSmrg isl_genX_call(dev, surf_fill_state_s, dev, state, info); 183801e04c3fSmrg} 183901e04c3fSmrg 184001e04c3fSmrgvoid 184101e04c3fSmrgisl_buffer_fill_state_s(const struct isl_device *dev, void *state, 184201e04c3fSmrg const struct isl_buffer_fill_state_info *restrict info) 184301e04c3fSmrg{ 184401e04c3fSmrg isl_genX_call(dev, buffer_fill_state_s, state, info); 184501e04c3fSmrg} 184601e04c3fSmrg 184701e04c3fSmrgvoid 184801e04c3fSmrgisl_null_fill_state(const struct isl_device *dev, void *state, 184901e04c3fSmrg struct isl_extent3d size) 185001e04c3fSmrg{ 185101e04c3fSmrg isl_genX_call(dev, null_fill_state, state, size); 185201e04c3fSmrg} 185301e04c3fSmrg 185401e04c3fSmrgvoid 185501e04c3fSmrgisl_emit_depth_stencil_hiz_s(const struct isl_device *dev, void *batch, 185601e04c3fSmrg const struct isl_depth_stencil_hiz_emit_info *restrict info) 185701e04c3fSmrg{ 185801e04c3fSmrg if (info->depth_surf && info->stencil_surf) { 185901e04c3fSmrg if (!dev->info->has_hiz_and_separate_stencil) { 186001e04c3fSmrg assert(info->depth_surf == info->stencil_surf); 186101e04c3fSmrg assert(info->depth_address == info->stencil_address); 186201e04c3fSmrg } 186301e04c3fSmrg assert(info->depth_surf->dim == info->stencil_surf->dim); 186401e04c3fSmrg } 186501e04c3fSmrg 186601e04c3fSmrg if (info->depth_surf) { 186701e04c3fSmrg assert((info->depth_surf->usage & ISL_SURF_USAGE_DEPTH_BIT)); 186801e04c3fSmrg if (info->depth_surf->dim == ISL_SURF_DIM_3D) { 186901e04c3fSmrg assert(info->view->base_array_layer + info->view->array_len <= 187001e04c3fSmrg info->depth_surf->logical_level0_px.depth); 187101e04c3fSmrg } else { 187201e04c3fSmrg assert(info->view->base_array_layer + info->view->array_len <= 187301e04c3fSmrg info->depth_surf->logical_level0_px.array_len); 187401e04c3fSmrg } 187501e04c3fSmrg } 187601e04c3fSmrg 187701e04c3fSmrg if (info->stencil_surf) { 187801e04c3fSmrg assert((info->stencil_surf->usage & ISL_SURF_USAGE_STENCIL_BIT)); 187901e04c3fSmrg if (info->stencil_surf->dim == ISL_SURF_DIM_3D) { 188001e04c3fSmrg assert(info->view->base_array_layer + info->view->array_len <= 188101e04c3fSmrg info->stencil_surf->logical_level0_px.depth); 188201e04c3fSmrg } else { 188301e04c3fSmrg assert(info->view->base_array_layer + info->view->array_len <= 188401e04c3fSmrg info->stencil_surf->logical_level0_px.array_len); 188501e04c3fSmrg } 188601e04c3fSmrg } 188701e04c3fSmrg 188801e04c3fSmrg isl_genX_call(dev, emit_depth_stencil_hiz_s, dev, batch, info); 188901e04c3fSmrg} 189001e04c3fSmrg 189101e04c3fSmrg/** 189201e04c3fSmrg * A variant of isl_surf_get_image_offset_sa() specific to 189301e04c3fSmrg * ISL_DIM_LAYOUT_GEN4_2D. 189401e04c3fSmrg */ 189501e04c3fSmrgstatic void 189601e04c3fSmrgget_image_offset_sa_gen4_2d(const struct isl_surf *surf, 189701e04c3fSmrg uint32_t level, uint32_t logical_array_layer, 189801e04c3fSmrg uint32_t *x_offset_sa, 189901e04c3fSmrg uint32_t *y_offset_sa) 190001e04c3fSmrg{ 190101e04c3fSmrg assert(level < surf->levels); 190201e04c3fSmrg if (surf->dim == ISL_SURF_DIM_3D) 190301e04c3fSmrg assert(logical_array_layer < surf->logical_level0_px.depth); 190401e04c3fSmrg else 190501e04c3fSmrg assert(logical_array_layer < surf->logical_level0_px.array_len); 190601e04c3fSmrg 190701e04c3fSmrg const struct isl_extent3d image_align_sa = 190801e04c3fSmrg isl_surf_get_image_alignment_sa(surf); 190901e04c3fSmrg 191001e04c3fSmrg const uint32_t W0 = surf->phys_level0_sa.width; 191101e04c3fSmrg const uint32_t H0 = surf->phys_level0_sa.height; 191201e04c3fSmrg 191301e04c3fSmrg const uint32_t phys_layer = logical_array_layer * 191401e04c3fSmrg (surf->msaa_layout == ISL_MSAA_LAYOUT_ARRAY ? surf->samples : 1); 191501e04c3fSmrg 191601e04c3fSmrg uint32_t x = 0; 191701e04c3fSmrg uint32_t y = phys_layer * isl_surf_get_array_pitch_sa_rows(surf); 191801e04c3fSmrg 191901e04c3fSmrg for (uint32_t l = 0; l < level; ++l) { 192001e04c3fSmrg if (l == 1) { 192101e04c3fSmrg uint32_t W = isl_minify(W0, l); 192201e04c3fSmrg x += isl_align_npot(W, image_align_sa.w); 192301e04c3fSmrg } else { 192401e04c3fSmrg uint32_t H = isl_minify(H0, l); 192501e04c3fSmrg y += isl_align_npot(H, image_align_sa.h); 192601e04c3fSmrg } 192701e04c3fSmrg } 192801e04c3fSmrg 192901e04c3fSmrg *x_offset_sa = x; 193001e04c3fSmrg *y_offset_sa = y; 193101e04c3fSmrg} 193201e04c3fSmrg 193301e04c3fSmrg/** 193401e04c3fSmrg * A variant of isl_surf_get_image_offset_sa() specific to 193501e04c3fSmrg * ISL_DIM_LAYOUT_GEN4_3D. 193601e04c3fSmrg */ 193701e04c3fSmrgstatic void 193801e04c3fSmrgget_image_offset_sa_gen4_3d(const struct isl_surf *surf, 193901e04c3fSmrg uint32_t level, uint32_t logical_z_offset_px, 194001e04c3fSmrg uint32_t *x_offset_sa, 194101e04c3fSmrg uint32_t *y_offset_sa) 194201e04c3fSmrg{ 194301e04c3fSmrg assert(level < surf->levels); 194401e04c3fSmrg if (surf->dim == ISL_SURF_DIM_3D) { 194501e04c3fSmrg assert(surf->phys_level0_sa.array_len == 1); 194601e04c3fSmrg assert(logical_z_offset_px < isl_minify(surf->phys_level0_sa.depth, level)); 194701e04c3fSmrg } else { 194801e04c3fSmrg assert(surf->dim == ISL_SURF_DIM_2D); 194901e04c3fSmrg assert(surf->usage & ISL_SURF_USAGE_CUBE_BIT); 195001e04c3fSmrg assert(surf->phys_level0_sa.array_len == 6); 195101e04c3fSmrg assert(logical_z_offset_px < surf->phys_level0_sa.array_len); 195201e04c3fSmrg } 195301e04c3fSmrg 195401e04c3fSmrg const struct isl_extent3d image_align_sa = 195501e04c3fSmrg isl_surf_get_image_alignment_sa(surf); 195601e04c3fSmrg 195701e04c3fSmrg const uint32_t W0 = surf->phys_level0_sa.width; 195801e04c3fSmrg const uint32_t H0 = surf->phys_level0_sa.height; 195901e04c3fSmrg const uint32_t D0 = surf->phys_level0_sa.depth; 196001e04c3fSmrg const uint32_t AL = surf->phys_level0_sa.array_len; 196101e04c3fSmrg 196201e04c3fSmrg uint32_t x = 0; 196301e04c3fSmrg uint32_t y = 0; 196401e04c3fSmrg 196501e04c3fSmrg for (uint32_t l = 0; l < level; ++l) { 196601e04c3fSmrg const uint32_t level_h = isl_align_npot(isl_minify(H0, l), image_align_sa.h); 196701e04c3fSmrg const uint32_t level_d = 196801e04c3fSmrg isl_align_npot(surf->dim == ISL_SURF_DIM_3D ? isl_minify(D0, l) : AL, 196901e04c3fSmrg image_align_sa.d); 197001e04c3fSmrg const uint32_t max_layers_vert = isl_align(level_d, 1u << l) / (1u << l); 197101e04c3fSmrg 197201e04c3fSmrg y += level_h * max_layers_vert; 197301e04c3fSmrg } 197401e04c3fSmrg 197501e04c3fSmrg const uint32_t level_w = isl_align_npot(isl_minify(W0, level), image_align_sa.w); 197601e04c3fSmrg const uint32_t level_h = isl_align_npot(isl_minify(H0, level), image_align_sa.h); 197701e04c3fSmrg const uint32_t level_d = 197801e04c3fSmrg isl_align_npot(surf->dim == ISL_SURF_DIM_3D ? isl_minify(D0, level) : AL, 197901e04c3fSmrg image_align_sa.d); 198001e04c3fSmrg 198101e04c3fSmrg const uint32_t max_layers_horiz = MIN(level_d, 1u << level); 198201e04c3fSmrg 198301e04c3fSmrg x += level_w * (logical_z_offset_px % max_layers_horiz); 198401e04c3fSmrg y += level_h * (logical_z_offset_px / max_layers_horiz); 198501e04c3fSmrg 198601e04c3fSmrg *x_offset_sa = x; 198701e04c3fSmrg *y_offset_sa = y; 198801e04c3fSmrg} 198901e04c3fSmrg 199001e04c3fSmrgstatic void 199101e04c3fSmrgget_image_offset_sa_gen6_stencil_hiz(const struct isl_surf *surf, 199201e04c3fSmrg uint32_t level, 199301e04c3fSmrg uint32_t logical_array_layer, 199401e04c3fSmrg uint32_t *x_offset_sa, 199501e04c3fSmrg uint32_t *y_offset_sa) 199601e04c3fSmrg{ 199701e04c3fSmrg assert(level < surf->levels); 199801e04c3fSmrg assert(surf->logical_level0_px.depth == 1); 199901e04c3fSmrg assert(logical_array_layer < surf->logical_level0_px.array_len); 200001e04c3fSmrg 200101e04c3fSmrg const struct isl_format_layout *fmtl = isl_format_get_layout(surf->format); 200201e04c3fSmrg 200301e04c3fSmrg const struct isl_extent3d image_align_sa = 200401e04c3fSmrg isl_surf_get_image_alignment_sa(surf); 200501e04c3fSmrg 200601e04c3fSmrg struct isl_tile_info tile_info; 200701e04c3fSmrg isl_tiling_get_info(surf->tiling, fmtl->bpb, &tile_info); 200801e04c3fSmrg const struct isl_extent2d tile_extent_sa = { 200901e04c3fSmrg .w = tile_info.logical_extent_el.w * fmtl->bw, 201001e04c3fSmrg .h = tile_info.logical_extent_el.h * fmtl->bh, 201101e04c3fSmrg }; 201201e04c3fSmrg /* Tile size is a multiple of image alignment */ 201301e04c3fSmrg assert(tile_extent_sa.w % image_align_sa.w == 0); 201401e04c3fSmrg assert(tile_extent_sa.h % image_align_sa.h == 0); 201501e04c3fSmrg 201601e04c3fSmrg const uint32_t W0 = surf->phys_level0_sa.w; 201701e04c3fSmrg const uint32_t H0 = surf->phys_level0_sa.h; 201801e04c3fSmrg 201901e04c3fSmrg /* Each image has the same height as LOD0 because the hardware thinks 202001e04c3fSmrg * everything is LOD0 202101e04c3fSmrg */ 202201e04c3fSmrg const uint32_t H = isl_align(H0, image_align_sa.h); 202301e04c3fSmrg 202401e04c3fSmrg /* Quick sanity check for consistency */ 202501e04c3fSmrg if (surf->phys_level0_sa.array_len > 1) 202601e04c3fSmrg assert(surf->array_pitch_el_rows == isl_assert_div(H, fmtl->bh)); 202701e04c3fSmrg 202801e04c3fSmrg uint32_t x = 0, y = 0; 202901e04c3fSmrg for (uint32_t l = 0; l < level; ++l) { 203001e04c3fSmrg const uint32_t W = isl_minify(W0, l); 203101e04c3fSmrg 203201e04c3fSmrg const uint32_t w = isl_align(W, tile_extent_sa.w); 203301e04c3fSmrg const uint32_t h = isl_align(H * surf->phys_level0_sa.a, 203401e04c3fSmrg tile_extent_sa.h); 203501e04c3fSmrg 203601e04c3fSmrg if (l == 0) { 203701e04c3fSmrg y += h; 203801e04c3fSmrg } else { 203901e04c3fSmrg x += w; 204001e04c3fSmrg } 204101e04c3fSmrg } 204201e04c3fSmrg 204301e04c3fSmrg y += H * logical_array_layer; 204401e04c3fSmrg 204501e04c3fSmrg *x_offset_sa = x; 204601e04c3fSmrg *y_offset_sa = y; 204701e04c3fSmrg} 204801e04c3fSmrg 204901e04c3fSmrg/** 205001e04c3fSmrg * A variant of isl_surf_get_image_offset_sa() specific to 205101e04c3fSmrg * ISL_DIM_LAYOUT_GEN9_1D. 205201e04c3fSmrg */ 205301e04c3fSmrgstatic void 205401e04c3fSmrgget_image_offset_sa_gen9_1d(const struct isl_surf *surf, 205501e04c3fSmrg uint32_t level, uint32_t layer, 205601e04c3fSmrg uint32_t *x_offset_sa, 205701e04c3fSmrg uint32_t *y_offset_sa) 205801e04c3fSmrg{ 205901e04c3fSmrg assert(level < surf->levels); 206001e04c3fSmrg assert(layer < surf->phys_level0_sa.array_len); 206101e04c3fSmrg assert(surf->phys_level0_sa.height == 1); 206201e04c3fSmrg assert(surf->phys_level0_sa.depth == 1); 206301e04c3fSmrg assert(surf->samples == 1); 206401e04c3fSmrg 206501e04c3fSmrg const uint32_t W0 = surf->phys_level0_sa.width; 206601e04c3fSmrg const struct isl_extent3d image_align_sa = 206701e04c3fSmrg isl_surf_get_image_alignment_sa(surf); 206801e04c3fSmrg 206901e04c3fSmrg uint32_t x = 0; 207001e04c3fSmrg 207101e04c3fSmrg for (uint32_t l = 0; l < level; ++l) { 207201e04c3fSmrg uint32_t W = isl_minify(W0, l); 207301e04c3fSmrg uint32_t w = isl_align_npot(W, image_align_sa.w); 207401e04c3fSmrg 207501e04c3fSmrg x += w; 207601e04c3fSmrg } 207701e04c3fSmrg 207801e04c3fSmrg *x_offset_sa = x; 207901e04c3fSmrg *y_offset_sa = layer * isl_surf_get_array_pitch_sa_rows(surf); 208001e04c3fSmrg} 208101e04c3fSmrg 208201e04c3fSmrg/** 208301e04c3fSmrg * Calculate the offset, in units of surface samples, to a subimage in the 208401e04c3fSmrg * surface. 208501e04c3fSmrg * 208601e04c3fSmrg * @invariant level < surface levels 208701e04c3fSmrg * @invariant logical_array_layer < logical array length of surface 208801e04c3fSmrg * @invariant logical_z_offset_px < logical depth of surface at level 208901e04c3fSmrg */ 209001e04c3fSmrgvoid 209101e04c3fSmrgisl_surf_get_image_offset_sa(const struct isl_surf *surf, 209201e04c3fSmrg uint32_t level, 209301e04c3fSmrg uint32_t logical_array_layer, 209401e04c3fSmrg uint32_t logical_z_offset_px, 209501e04c3fSmrg uint32_t *x_offset_sa, 209601e04c3fSmrg uint32_t *y_offset_sa) 209701e04c3fSmrg{ 209801e04c3fSmrg assert(level < surf->levels); 209901e04c3fSmrg assert(logical_array_layer < surf->logical_level0_px.array_len); 210001e04c3fSmrg assert(logical_z_offset_px 210101e04c3fSmrg < isl_minify(surf->logical_level0_px.depth, level)); 210201e04c3fSmrg 210301e04c3fSmrg switch (surf->dim_layout) { 210401e04c3fSmrg case ISL_DIM_LAYOUT_GEN9_1D: 210501e04c3fSmrg get_image_offset_sa_gen9_1d(surf, level, logical_array_layer, 210601e04c3fSmrg x_offset_sa, y_offset_sa); 210701e04c3fSmrg break; 210801e04c3fSmrg case ISL_DIM_LAYOUT_GEN4_2D: 210901e04c3fSmrg get_image_offset_sa_gen4_2d(surf, level, logical_array_layer 211001e04c3fSmrg + logical_z_offset_px, 211101e04c3fSmrg x_offset_sa, y_offset_sa); 211201e04c3fSmrg break; 211301e04c3fSmrg case ISL_DIM_LAYOUT_GEN4_3D: 211401e04c3fSmrg get_image_offset_sa_gen4_3d(surf, level, logical_array_layer + 211501e04c3fSmrg logical_z_offset_px, 211601e04c3fSmrg x_offset_sa, y_offset_sa); 211701e04c3fSmrg break; 211801e04c3fSmrg case ISL_DIM_LAYOUT_GEN6_STENCIL_HIZ: 211901e04c3fSmrg get_image_offset_sa_gen6_stencil_hiz(surf, level, logical_array_layer + 212001e04c3fSmrg logical_z_offset_px, 212101e04c3fSmrg x_offset_sa, y_offset_sa); 212201e04c3fSmrg break; 212301e04c3fSmrg 212401e04c3fSmrg default: 212501e04c3fSmrg unreachable("not reached"); 212601e04c3fSmrg } 212701e04c3fSmrg} 212801e04c3fSmrg 212901e04c3fSmrgvoid 213001e04c3fSmrgisl_surf_get_image_offset_el(const struct isl_surf *surf, 213101e04c3fSmrg uint32_t level, 213201e04c3fSmrg uint32_t logical_array_layer, 213301e04c3fSmrg uint32_t logical_z_offset_px, 213401e04c3fSmrg uint32_t *x_offset_el, 213501e04c3fSmrg uint32_t *y_offset_el) 213601e04c3fSmrg{ 213701e04c3fSmrg const struct isl_format_layout *fmtl = isl_format_get_layout(surf->format); 213801e04c3fSmrg 213901e04c3fSmrg assert(level < surf->levels); 214001e04c3fSmrg assert(logical_array_layer < surf->logical_level0_px.array_len); 214101e04c3fSmrg assert(logical_z_offset_px 214201e04c3fSmrg < isl_minify(surf->logical_level0_px.depth, level)); 214301e04c3fSmrg 214401e04c3fSmrg uint32_t x_offset_sa, y_offset_sa; 214501e04c3fSmrg isl_surf_get_image_offset_sa(surf, level, 214601e04c3fSmrg logical_array_layer, 214701e04c3fSmrg logical_z_offset_px, 214801e04c3fSmrg &x_offset_sa, 214901e04c3fSmrg &y_offset_sa); 215001e04c3fSmrg 215101e04c3fSmrg *x_offset_el = x_offset_sa / fmtl->bw; 215201e04c3fSmrg *y_offset_el = y_offset_sa / fmtl->bh; 215301e04c3fSmrg} 215401e04c3fSmrg 215501e04c3fSmrgvoid 215601e04c3fSmrgisl_surf_get_image_offset_B_tile_sa(const struct isl_surf *surf, 215701e04c3fSmrg uint32_t level, 215801e04c3fSmrg uint32_t logical_array_layer, 215901e04c3fSmrg uint32_t logical_z_offset_px, 216001e04c3fSmrg uint32_t *offset_B, 216101e04c3fSmrg uint32_t *x_offset_sa, 216201e04c3fSmrg uint32_t *y_offset_sa) 216301e04c3fSmrg{ 216401e04c3fSmrg const struct isl_format_layout *fmtl = isl_format_get_layout(surf->format); 216501e04c3fSmrg 216601e04c3fSmrg uint32_t total_x_offset_el, total_y_offset_el; 216701e04c3fSmrg isl_surf_get_image_offset_el(surf, level, logical_array_layer, 216801e04c3fSmrg logical_z_offset_px, 216901e04c3fSmrg &total_x_offset_el, 217001e04c3fSmrg &total_y_offset_el); 217101e04c3fSmrg 217201e04c3fSmrg uint32_t x_offset_el, y_offset_el; 217301e04c3fSmrg isl_tiling_get_intratile_offset_el(surf->tiling, fmtl->bpb, 217401e04c3fSmrg surf->row_pitch_B, 217501e04c3fSmrg total_x_offset_el, 217601e04c3fSmrg total_y_offset_el, 217701e04c3fSmrg offset_B, 217801e04c3fSmrg &x_offset_el, 217901e04c3fSmrg &y_offset_el); 218001e04c3fSmrg 218101e04c3fSmrg if (x_offset_sa) { 218201e04c3fSmrg *x_offset_sa = x_offset_el * fmtl->bw; 218301e04c3fSmrg } else { 218401e04c3fSmrg assert(x_offset_el == 0); 218501e04c3fSmrg } 218601e04c3fSmrg 218701e04c3fSmrg if (y_offset_sa) { 218801e04c3fSmrg *y_offset_sa = y_offset_el * fmtl->bh; 218901e04c3fSmrg } else { 219001e04c3fSmrg assert(y_offset_el == 0); 219101e04c3fSmrg } 219201e04c3fSmrg} 219301e04c3fSmrg 219401e04c3fSmrgvoid 219501e04c3fSmrgisl_surf_get_image_surf(const struct isl_device *dev, 219601e04c3fSmrg const struct isl_surf *surf, 219701e04c3fSmrg uint32_t level, 219801e04c3fSmrg uint32_t logical_array_layer, 219901e04c3fSmrg uint32_t logical_z_offset_px, 220001e04c3fSmrg struct isl_surf *image_surf, 220101e04c3fSmrg uint32_t *offset_B, 220201e04c3fSmrg uint32_t *x_offset_sa, 220301e04c3fSmrg uint32_t *y_offset_sa) 220401e04c3fSmrg{ 220501e04c3fSmrg isl_surf_get_image_offset_B_tile_sa(surf, 220601e04c3fSmrg level, 220701e04c3fSmrg logical_array_layer, 220801e04c3fSmrg logical_z_offset_px, 220901e04c3fSmrg offset_B, 221001e04c3fSmrg x_offset_sa, 221101e04c3fSmrg y_offset_sa); 221201e04c3fSmrg 221301e04c3fSmrg /* Even for cube maps there will be only single face, therefore drop the 221401e04c3fSmrg * corresponding flag if present. 221501e04c3fSmrg */ 221601e04c3fSmrg const isl_surf_usage_flags_t usage = 221701e04c3fSmrg surf->usage & (~ISL_SURF_USAGE_CUBE_BIT); 221801e04c3fSmrg 221901e04c3fSmrg bool ok UNUSED; 222001e04c3fSmrg ok = isl_surf_init(dev, image_surf, 222101e04c3fSmrg .dim = ISL_SURF_DIM_2D, 222201e04c3fSmrg .format = surf->format, 222301e04c3fSmrg .width = isl_minify(surf->logical_level0_px.w, level), 222401e04c3fSmrg .height = isl_minify(surf->logical_level0_px.h, level), 222501e04c3fSmrg .depth = 1, 222601e04c3fSmrg .levels = 1, 222701e04c3fSmrg .array_len = 1, 222801e04c3fSmrg .samples = surf->samples, 222901e04c3fSmrg .row_pitch_B = surf->row_pitch_B, 223001e04c3fSmrg .usage = usage, 223101e04c3fSmrg .tiling_flags = (1 << surf->tiling)); 223201e04c3fSmrg assert(ok); 223301e04c3fSmrg} 223401e04c3fSmrg 223501e04c3fSmrgvoid 223601e04c3fSmrgisl_tiling_get_intratile_offset_el(enum isl_tiling tiling, 223701e04c3fSmrg uint32_t bpb, 223801e04c3fSmrg uint32_t row_pitch_B, 223901e04c3fSmrg uint32_t total_x_offset_el, 224001e04c3fSmrg uint32_t total_y_offset_el, 224101e04c3fSmrg uint32_t *base_address_offset, 224201e04c3fSmrg uint32_t *x_offset_el, 224301e04c3fSmrg uint32_t *y_offset_el) 224401e04c3fSmrg{ 224501e04c3fSmrg if (tiling == ISL_TILING_LINEAR) { 224601e04c3fSmrg assert(bpb % 8 == 0); 224701e04c3fSmrg *base_address_offset = total_y_offset_el * row_pitch_B + 224801e04c3fSmrg total_x_offset_el * (bpb / 8); 224901e04c3fSmrg *x_offset_el = 0; 225001e04c3fSmrg *y_offset_el = 0; 225101e04c3fSmrg return; 225201e04c3fSmrg } 225301e04c3fSmrg 225401e04c3fSmrg struct isl_tile_info tile_info; 225501e04c3fSmrg isl_tiling_get_info(tiling, bpb, &tile_info); 225601e04c3fSmrg 225701e04c3fSmrg assert(row_pitch_B % tile_info.phys_extent_B.width == 0); 225801e04c3fSmrg 225901e04c3fSmrg /* For non-power-of-two formats, we need the address to be both tile and 226001e04c3fSmrg * element-aligned. The easiest way to achieve this is to work with a tile 226101e04c3fSmrg * that is three times as wide as the regular tile. 226201e04c3fSmrg * 226301e04c3fSmrg * The tile info returned by get_tile_info has a logical size that is an 226401e04c3fSmrg * integer number of tile_info.format_bpb size elements. To scale the 226501e04c3fSmrg * tile, we scale up the physical width and then treat the logical tile 226601e04c3fSmrg * size as if it has bpb size elements. 226701e04c3fSmrg */ 226801e04c3fSmrg const uint32_t tile_el_scale = bpb / tile_info.format_bpb; 226901e04c3fSmrg tile_info.phys_extent_B.width *= tile_el_scale; 227001e04c3fSmrg 227101e04c3fSmrg /* Compute the offset into the tile */ 227201e04c3fSmrg *x_offset_el = total_x_offset_el % tile_info.logical_extent_el.w; 227301e04c3fSmrg *y_offset_el = total_y_offset_el % tile_info.logical_extent_el.h; 227401e04c3fSmrg 227501e04c3fSmrg /* Compute the offset of the tile in units of whole tiles */ 227601e04c3fSmrg uint32_t x_offset_tl = total_x_offset_el / tile_info.logical_extent_el.w; 227701e04c3fSmrg uint32_t y_offset_tl = total_y_offset_el / tile_info.logical_extent_el.h; 227801e04c3fSmrg 227901e04c3fSmrg *base_address_offset = 228001e04c3fSmrg y_offset_tl * tile_info.phys_extent_B.h * row_pitch_B + 228101e04c3fSmrg x_offset_tl * tile_info.phys_extent_B.h * tile_info.phys_extent_B.w; 228201e04c3fSmrg} 228301e04c3fSmrg 228401e04c3fSmrguint32_t 228501e04c3fSmrgisl_surf_get_depth_format(const struct isl_device *dev, 228601e04c3fSmrg const struct isl_surf *surf) 228701e04c3fSmrg{ 228801e04c3fSmrg /* Support for separate stencil buffers began in gen5. Support for 228901e04c3fSmrg * interleaved depthstencil buffers ceased in gen7. The intermediate gens, 229001e04c3fSmrg * those that supported separate and interleaved stencil, were gen5 and 229101e04c3fSmrg * gen6. 229201e04c3fSmrg * 229301e04c3fSmrg * For a list of all available formats, see the Sandybridge PRM >> Volume 229401e04c3fSmrg * 2 Part 1: 3D/Media - 3D Pipeline >> 3DSTATE_DEPTH_BUFFER >> Surface 229501e04c3fSmrg * Format (p321). 229601e04c3fSmrg */ 229701e04c3fSmrg 229801e04c3fSmrg bool has_stencil = surf->usage & ISL_SURF_USAGE_STENCIL_BIT; 229901e04c3fSmrg 230001e04c3fSmrg assert(surf->usage & ISL_SURF_USAGE_DEPTH_BIT); 230101e04c3fSmrg 230201e04c3fSmrg if (has_stencil) 230301e04c3fSmrg assert(ISL_DEV_GEN(dev) < 7); 230401e04c3fSmrg 230501e04c3fSmrg switch (surf->format) { 230601e04c3fSmrg default: 230701e04c3fSmrg unreachable("bad isl depth format"); 230801e04c3fSmrg case ISL_FORMAT_R32_FLOAT_X8X24_TYPELESS: 230901e04c3fSmrg assert(ISL_DEV_GEN(dev) < 7); 231001e04c3fSmrg return 0; /* D32_FLOAT_S8X24_UINT */ 231101e04c3fSmrg case ISL_FORMAT_R32_FLOAT: 231201e04c3fSmrg assert(!has_stencil); 231301e04c3fSmrg return 1; /* D32_FLOAT */ 231401e04c3fSmrg case ISL_FORMAT_R24_UNORM_X8_TYPELESS: 231501e04c3fSmrg if (has_stencil) { 231601e04c3fSmrg assert(ISL_DEV_GEN(dev) < 7); 231701e04c3fSmrg return 2; /* D24_UNORM_S8_UINT */ 231801e04c3fSmrg } else { 231901e04c3fSmrg assert(ISL_DEV_GEN(dev) >= 5); 232001e04c3fSmrg return 3; /* D24_UNORM_X8_UINT */ 232101e04c3fSmrg } 232201e04c3fSmrg case ISL_FORMAT_R16_UNORM: 232301e04c3fSmrg assert(!has_stencil); 232401e04c3fSmrg return 5; /* D16_UNORM */ 232501e04c3fSmrg } 232601e04c3fSmrg} 232701e04c3fSmrg 232801e04c3fSmrgbool 232901e04c3fSmrgisl_swizzle_supports_rendering(const struct gen_device_info *devinfo, 233001e04c3fSmrg struct isl_swizzle swizzle) 233101e04c3fSmrg{ 233201e04c3fSmrg if (devinfo->is_haswell) { 233301e04c3fSmrg /* From the Haswell PRM, 233401e04c3fSmrg * RENDER_SURFACE_STATE::Shader Channel Select Red 233501e04c3fSmrg * 233601e04c3fSmrg * "The Shader channel selects also define which shader channels are 233701e04c3fSmrg * written to which surface channel. If the Shader channel select is 233801e04c3fSmrg * SCS_ZERO or SCS_ONE then it is not written to the surface. If the 233901e04c3fSmrg * shader channel select is SCS_RED it is written to the surface red 234001e04c3fSmrg * channel and so on. If more than one shader channel select is set 234101e04c3fSmrg * to the same surface channel only the first shader channel in RGBA 234201e04c3fSmrg * order will be written." 234301e04c3fSmrg */ 234401e04c3fSmrg return true; 234501e04c3fSmrg } else if (devinfo->gen <= 7) { 234601e04c3fSmrg /* Ivy Bridge and early doesn't have any swizzling */ 234701e04c3fSmrg return isl_swizzle_is_identity(swizzle); 234801e04c3fSmrg } else { 234901e04c3fSmrg /* From the Sky Lake PRM Vol. 2d, 235001e04c3fSmrg * RENDER_SURFACE_STATE::Shader Channel Select Red 235101e04c3fSmrg * 235201e04c3fSmrg * "For Render Target, Red, Green and Blue Shader Channel Selects 235301e04c3fSmrg * MUST be such that only valid components can be swapped i.e. only 235401e04c3fSmrg * change the order of components in the pixel. Any other values for 235501e04c3fSmrg * these Shader Channel Select fields are not valid for Render 235601e04c3fSmrg * Targets. This also means that there MUST not be multiple shader 235701e04c3fSmrg * channels mapped to the same RT channel." 235801e04c3fSmrg * 235901e04c3fSmrg * From the Sky Lake PRM Vol. 2d, 236001e04c3fSmrg * RENDER_SURFACE_STATE::Shader Channel Select Alpha 236101e04c3fSmrg * 236201e04c3fSmrg * "For Render Target, this field MUST be programmed to 236301e04c3fSmrg * value = SCS_ALPHA." 236401e04c3fSmrg */ 236501e04c3fSmrg return (swizzle.r == ISL_CHANNEL_SELECT_RED || 236601e04c3fSmrg swizzle.r == ISL_CHANNEL_SELECT_GREEN || 236701e04c3fSmrg swizzle.r == ISL_CHANNEL_SELECT_BLUE) && 236801e04c3fSmrg (swizzle.g == ISL_CHANNEL_SELECT_RED || 236901e04c3fSmrg swizzle.g == ISL_CHANNEL_SELECT_GREEN || 237001e04c3fSmrg swizzle.g == ISL_CHANNEL_SELECT_BLUE) && 237101e04c3fSmrg (swizzle.b == ISL_CHANNEL_SELECT_RED || 237201e04c3fSmrg swizzle.b == ISL_CHANNEL_SELECT_GREEN || 237301e04c3fSmrg swizzle.b == ISL_CHANNEL_SELECT_BLUE) && 237401e04c3fSmrg swizzle.r != swizzle.g && 237501e04c3fSmrg swizzle.r != swizzle.b && 237601e04c3fSmrg swizzle.g != swizzle.b && 237701e04c3fSmrg swizzle.a == ISL_CHANNEL_SELECT_ALPHA; 237801e04c3fSmrg } 237901e04c3fSmrg} 238001e04c3fSmrg 238101e04c3fSmrgstatic enum isl_channel_select 238201e04c3fSmrgswizzle_select(enum isl_channel_select chan, struct isl_swizzle swizzle) 238301e04c3fSmrg{ 238401e04c3fSmrg switch (chan) { 238501e04c3fSmrg case ISL_CHANNEL_SELECT_ZERO: 238601e04c3fSmrg case ISL_CHANNEL_SELECT_ONE: 238701e04c3fSmrg return chan; 238801e04c3fSmrg case ISL_CHANNEL_SELECT_RED: 238901e04c3fSmrg return swizzle.r; 239001e04c3fSmrg case ISL_CHANNEL_SELECT_GREEN: 239101e04c3fSmrg return swizzle.g; 239201e04c3fSmrg case ISL_CHANNEL_SELECT_BLUE: 239301e04c3fSmrg return swizzle.b; 239401e04c3fSmrg case ISL_CHANNEL_SELECT_ALPHA: 239501e04c3fSmrg return swizzle.a; 239601e04c3fSmrg default: 239701e04c3fSmrg unreachable("Invalid swizzle component"); 239801e04c3fSmrg } 239901e04c3fSmrg} 240001e04c3fSmrg 240101e04c3fSmrg/** 240201e04c3fSmrg * Returns the single swizzle that is equivalent to applying the two given 240301e04c3fSmrg * swizzles in sequence. 240401e04c3fSmrg */ 240501e04c3fSmrgstruct isl_swizzle 240601e04c3fSmrgisl_swizzle_compose(struct isl_swizzle first, struct isl_swizzle second) 240701e04c3fSmrg{ 240801e04c3fSmrg return (struct isl_swizzle) { 240901e04c3fSmrg .r = swizzle_select(first.r, second), 241001e04c3fSmrg .g = swizzle_select(first.g, second), 241101e04c3fSmrg .b = swizzle_select(first.b, second), 241201e04c3fSmrg .a = swizzle_select(first.a, second), 241301e04c3fSmrg }; 241401e04c3fSmrg} 241501e04c3fSmrg 241601e04c3fSmrg/** 241701e04c3fSmrg * Returns a swizzle that is the pseudo-inverse of this swizzle. 241801e04c3fSmrg */ 241901e04c3fSmrgstruct isl_swizzle 242001e04c3fSmrgisl_swizzle_invert(struct isl_swizzle swizzle) 242101e04c3fSmrg{ 242201e04c3fSmrg /* Default to zero for channels which do not show up in the swizzle */ 242301e04c3fSmrg enum isl_channel_select chans[4] = { 242401e04c3fSmrg ISL_CHANNEL_SELECT_ZERO, 242501e04c3fSmrg ISL_CHANNEL_SELECT_ZERO, 242601e04c3fSmrg ISL_CHANNEL_SELECT_ZERO, 242701e04c3fSmrg ISL_CHANNEL_SELECT_ZERO, 242801e04c3fSmrg }; 242901e04c3fSmrg 243001e04c3fSmrg /* We go in ABGR order so that, if there are any duplicates, the first one 243101e04c3fSmrg * is taken if you look at it in RGBA order. This is what Haswell hardware 243201e04c3fSmrg * does for render target swizzles. 243301e04c3fSmrg */ 243401e04c3fSmrg if ((unsigned)(swizzle.a - ISL_CHANNEL_SELECT_RED) < 4) 243501e04c3fSmrg chans[swizzle.a - ISL_CHANNEL_SELECT_RED] = ISL_CHANNEL_SELECT_ALPHA; 243601e04c3fSmrg if ((unsigned)(swizzle.b - ISL_CHANNEL_SELECT_RED) < 4) 243701e04c3fSmrg chans[swizzle.b - ISL_CHANNEL_SELECT_RED] = ISL_CHANNEL_SELECT_BLUE; 243801e04c3fSmrg if ((unsigned)(swizzle.g - ISL_CHANNEL_SELECT_RED) < 4) 243901e04c3fSmrg chans[swizzle.g - ISL_CHANNEL_SELECT_RED] = ISL_CHANNEL_SELECT_GREEN; 244001e04c3fSmrg if ((unsigned)(swizzle.r - ISL_CHANNEL_SELECT_RED) < 4) 244101e04c3fSmrg chans[swizzle.r - ISL_CHANNEL_SELECT_RED] = ISL_CHANNEL_SELECT_RED; 244201e04c3fSmrg 244301e04c3fSmrg return (struct isl_swizzle) { chans[0], chans[1], chans[2], chans[3] }; 244401e04c3fSmrg} 2445