1/* 2 * Copyright © 2011 Red Hat All Rights Reserved. 3 * Copyright © 2017 Advanced Micro Devices, Inc. 4 * All Rights Reserved. 5 * 6 * Permission is hereby granted, free of charge, to any person obtaining 7 * a copy of this software and associated documentation files (the 8 * "Software"), to deal in the Software without restriction, including 9 * without limitation the rights to use, copy, modify, merge, publish, 10 * distribute, sub license, and/or sell copies of the Software, and to 11 * permit persons to whom the Software is furnished to do so, subject to 12 * the following conditions: 13 * 14 * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, 15 * EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES 16 * OF MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND 17 * NON-INFRINGEMENT. IN NO EVENT SHALL THE COPYRIGHT HOLDERS, AUTHORS 18 * AND/OR ITS SUPPLIERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER 19 * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, 20 * ARISING FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE 21 * USE OR OTHER DEALINGS IN THE SOFTWARE. 22 * 23 * The above copyright notice and this permission notice (including the 24 * next paragraph) shall be included in all copies or substantial portions 25 * of the Software. 26 */ 27 28#define AC_SURFACE_INCLUDE_NIR 29#include "ac_surface.h" 30 31#include "ac_drm_fourcc.h" 32#include "ac_gpu_info.h" 33#include "addrlib/inc/addrinterface.h" 34#include "addrlib/src/amdgpu_asic_addr.h" 35#include "amd_family.h" 36#include "sid.h" 37#include "util/hash_table.h" 38#include "util/macros.h" 39#include "util/simple_mtx.h" 40#include "util/u_atomic.h" 41#include "util/format/u_format.h" 42#include "util/u_math.h" 43#include "util/u_memory.h" 44 45#include <errno.h> 46#include <stdio.h> 47#include <stdlib.h> 48 49#ifdef _WIN32 50#define AMDGPU_TILING_ARRAY_MODE_SHIFT 0 51#define AMDGPU_TILING_ARRAY_MODE_MASK 0xf 52#define AMDGPU_TILING_PIPE_CONFIG_SHIFT 4 53#define AMDGPU_TILING_PIPE_CONFIG_MASK 0x1f 54#define AMDGPU_TILING_TILE_SPLIT_SHIFT 9 55#define AMDGPU_TILING_TILE_SPLIT_MASK 0x7 56#define AMDGPU_TILING_MICRO_TILE_MODE_SHIFT 12 57#define AMDGPU_TILING_MICRO_TILE_MODE_MASK 0x7 58#define AMDGPU_TILING_BANK_WIDTH_SHIFT 15 59#define AMDGPU_TILING_BANK_WIDTH_MASK 0x3 60#define AMDGPU_TILING_BANK_HEIGHT_SHIFT 17 61#define AMDGPU_TILING_BANK_HEIGHT_MASK 0x3 62#define AMDGPU_TILING_MACRO_TILE_ASPECT_SHIFT 19 63#define AMDGPU_TILING_MACRO_TILE_ASPECT_MASK 0x3 64#define AMDGPU_TILING_NUM_BANKS_SHIFT 21 65#define AMDGPU_TILING_NUM_BANKS_MASK 0x3 66#define AMDGPU_TILING_SWIZZLE_MODE_SHIFT 0 67#define AMDGPU_TILING_SWIZZLE_MODE_MASK 0x1f 68#define AMDGPU_TILING_DCC_OFFSET_256B_SHIFT 5 69#define AMDGPU_TILING_DCC_OFFSET_256B_MASK 0xFFFFFF 70#define AMDGPU_TILING_DCC_PITCH_MAX_SHIFT 29 71#define AMDGPU_TILING_DCC_PITCH_MAX_MASK 0x3FFF 72#define AMDGPU_TILING_DCC_INDEPENDENT_64B_SHIFT 43 73#define AMDGPU_TILING_DCC_INDEPENDENT_64B_MASK 0x1 74#define AMDGPU_TILING_DCC_INDEPENDENT_128B_SHIFT 44 75#define AMDGPU_TILING_DCC_INDEPENDENT_128B_MASK 0x1 76#define AMDGPU_TILING_SCANOUT_SHIFT 63 77#define AMDGPU_TILING_SCANOUT_MASK 0x1 78#define AMDGPU_TILING_SET(field, value) \ 79 (((__u64)(value) & AMDGPU_TILING_##field##_MASK) << AMDGPU_TILING_##field##_SHIFT) 80#define AMDGPU_TILING_GET(value, field) \ 81 (((__u64)(value) >> AMDGPU_TILING_##field##_SHIFT) & AMDGPU_TILING_##field##_MASK) 82#else 83#include "drm-uapi/amdgpu_drm.h" 84#endif 85 86#ifndef CIASICIDGFXENGINE_SOUTHERNISLAND 87#define CIASICIDGFXENGINE_SOUTHERNISLAND 0x0000000A 88#endif 89 90#ifndef CIASICIDGFXENGINE_ARCTICISLAND 91#define CIASICIDGFXENGINE_ARCTICISLAND 0x0000000D 92#endif 93 94struct ac_addrlib { 95 ADDR_HANDLE handle; 96}; 97 98bool ac_modifier_has_dcc(uint64_t modifier) 99{ 100 return IS_AMD_FMT_MOD(modifier) && AMD_FMT_MOD_GET(DCC, modifier); 101} 102 103bool ac_modifier_has_dcc_retile(uint64_t modifier) 104{ 105 return IS_AMD_FMT_MOD(modifier) && AMD_FMT_MOD_GET(DCC_RETILE, modifier); 106} 107 108bool ac_modifier_supports_dcc_image_stores(uint64_t modifier) 109{ 110 if (!ac_modifier_has_dcc(modifier)) 111 return false; 112 113 return (!AMD_FMT_MOD_GET(DCC_INDEPENDENT_64B, modifier) && 114 AMD_FMT_MOD_GET(DCC_INDEPENDENT_128B, modifier) && 115 AMD_FMT_MOD_GET(DCC_MAX_COMPRESSED_BLOCK, modifier) == AMD_FMT_MOD_DCC_BLOCK_128B) || 116 (AMD_FMT_MOD_GET(TILE_VERSION, modifier) >= AMD_FMT_MOD_TILE_VER_GFX10_RBPLUS && /* gfx10.3 */ 117 AMD_FMT_MOD_GET(DCC_INDEPENDENT_64B, modifier) && 118 AMD_FMT_MOD_GET(DCC_INDEPENDENT_128B, modifier) && 119 AMD_FMT_MOD_GET(DCC_MAX_COMPRESSED_BLOCK, modifier) == AMD_FMT_MOD_DCC_BLOCK_64B); 120 121} 122 123 124bool ac_surface_supports_dcc_image_stores(enum chip_class chip_class, 125 const struct radeon_surf *surf) 126{ 127 /* DCC image stores is only available for GFX10+. */ 128 if (chip_class < GFX10) 129 return false; 130 131 /* DCC image stores support the following settings: 132 * - INDEPENDENT_64B_BLOCKS = 0 133 * - INDEPENDENT_128B_BLOCKS = 1 134 * - MAX_COMPRESSED_BLOCK_SIZE = 128B 135 * - MAX_UNCOMPRESSED_BLOCK_SIZE = 256B (always used) 136 * 137 * gfx10.3 also supports the following setting: 138 * - INDEPENDENT_64B_BLOCKS = 1 139 * - INDEPENDENT_128B_BLOCKS = 1 140 * - MAX_COMPRESSED_BLOCK_SIZE = 64B 141 * - MAX_UNCOMPRESSED_BLOCK_SIZE = 256B (always used) 142 * 143 * The compressor only looks at MAX_COMPRESSED_BLOCK_SIZE to determine 144 * the INDEPENDENT_xx_BLOCKS settings. 128B implies INDEP_128B, while 64B 145 * implies INDEP_64B && INDEP_128B. 146 * 147 * The same limitations apply to SDMA compressed stores because 148 * SDMA uses the same DCC codec. 149 */ 150 return (!surf->u.gfx9.color.dcc.independent_64B_blocks && 151 surf->u.gfx9.color.dcc.independent_128B_blocks && 152 surf->u.gfx9.color.dcc.max_compressed_block_size == V_028C78_MAX_BLOCK_SIZE_128B) || 153 (chip_class >= GFX10_3 && /* gfx10.3 */ 154 surf->u.gfx9.color.dcc.independent_64B_blocks && 155 surf->u.gfx9.color.dcc.independent_128B_blocks && 156 surf->u.gfx9.color.dcc.max_compressed_block_size == V_028C78_MAX_BLOCK_SIZE_64B); 157} 158 159static 160AddrSwizzleMode ac_modifier_gfx9_swizzle_mode(uint64_t modifier) 161{ 162 if (modifier == DRM_FORMAT_MOD_LINEAR) 163 return ADDR_SW_LINEAR; 164 165 return AMD_FMT_MOD_GET(TILE, modifier); 166} 167static void 168ac_modifier_fill_dcc_params(uint64_t modifier, struct radeon_surf *surf, 169 ADDR2_COMPUTE_SURFACE_INFO_INPUT *surf_info) 170{ 171 assert(ac_modifier_has_dcc(modifier)); 172 173 if (AMD_FMT_MOD_GET(DCC_RETILE, modifier)) { 174 surf_info->flags.metaPipeUnaligned = 0; 175 } else { 176 surf_info->flags.metaPipeUnaligned = !AMD_FMT_MOD_GET(DCC_PIPE_ALIGN, modifier); 177 } 178 179 /* The metaPipeUnaligned is not strictly necessary, but ensure we don't set metaRbUnaligned on 180 * non-displayable DCC surfaces just because num_render_backends = 1 */ 181 surf_info->flags.metaRbUnaligned = AMD_FMT_MOD_GET(TILE_VERSION, modifier) == AMD_FMT_MOD_TILE_VER_GFX9 && 182 AMD_FMT_MOD_GET(RB, modifier) == 0 && 183 surf_info->flags.metaPipeUnaligned; 184 185 surf->u.gfx9.color.dcc.independent_64B_blocks = AMD_FMT_MOD_GET(DCC_INDEPENDENT_64B, modifier); 186 surf->u.gfx9.color.dcc.independent_128B_blocks = AMD_FMT_MOD_GET(DCC_INDEPENDENT_128B, modifier); 187 surf->u.gfx9.color.dcc.max_compressed_block_size = AMD_FMT_MOD_GET(DCC_MAX_COMPRESSED_BLOCK, modifier); 188} 189 190bool ac_is_modifier_supported(const struct radeon_info *info, 191 const struct ac_modifier_options *options, 192 enum pipe_format format, 193 uint64_t modifier) 194{ 195 196 if (util_format_is_compressed(format) || 197 util_format_is_depth_or_stencil(format) || 198 util_format_get_blocksizebits(format) > 64) 199 return false; 200 201 if (info->chip_class < GFX9) 202 return false; 203 204 if(modifier == DRM_FORMAT_MOD_LINEAR) 205 return true; 206 207 /* GFX8 may need a different modifier for each plane */ 208 if (info->chip_class < GFX9 && util_format_get_num_planes(format) > 1) 209 return false; 210 211 uint32_t allowed_swizzles = 0xFFFFFFFF; 212 switch(info->chip_class) { 213 case GFX9: 214 allowed_swizzles = ac_modifier_has_dcc(modifier) ? 0x06000000 : 0x06660660; 215 break; 216 case GFX10: 217 case GFX10_3: 218 allowed_swizzles = ac_modifier_has_dcc(modifier) ? 0x08000000 : 0x0E660660; 219 break; 220 default: 221 return false; 222 } 223 224 if (!((1u << ac_modifier_gfx9_swizzle_mode(modifier)) & allowed_swizzles)) 225 return false; 226 227 if (ac_modifier_has_dcc(modifier)) { 228 /* TODO: support multi-planar formats with DCC */ 229 if (util_format_get_num_planes(format) > 1) 230 return false; 231 232 if (!info->has_graphics) 233 return false; 234 235 if (!options->dcc) 236 return false; 237 238 if (ac_modifier_has_dcc_retile(modifier) && !options->dcc_retile) 239 return false; 240 } 241 242 return true; 243} 244 245bool ac_get_supported_modifiers(const struct radeon_info *info, 246 const struct ac_modifier_options *options, 247 enum pipe_format format, 248 unsigned *mod_count, 249 uint64_t *mods) 250{ 251 unsigned current_mod = 0; 252 253#define ADD_MOD(name) \ 254 if (ac_is_modifier_supported(info, options, format, (name))) { \ 255 if (mods && current_mod < *mod_count) \ 256 mods[current_mod] = (name); \ 257 ++current_mod; \ 258 } 259 260 /* The modifiers have to be added in descending order of estimated 261 * performance. The drivers will prefer modifiers that come earlier 262 * in the list. */ 263 switch (info->chip_class) { 264 case GFX9: { 265 unsigned pipe_xor_bits = MIN2(G_0098F8_NUM_PIPES(info->gb_addr_config) + 266 G_0098F8_NUM_SHADER_ENGINES_GFX9(info->gb_addr_config), 8); 267 unsigned bank_xor_bits = MIN2(G_0098F8_NUM_BANKS(info->gb_addr_config), 8 - pipe_xor_bits); 268 unsigned pipes = G_0098F8_NUM_PIPES(info->gb_addr_config); 269 unsigned rb = G_0098F8_NUM_RB_PER_SE(info->gb_addr_config) + 270 G_0098F8_NUM_SHADER_ENGINES_GFX9(info->gb_addr_config); 271 272 uint64_t common_dcc = AMD_FMT_MOD_SET(DCC, 1) | 273 AMD_FMT_MOD_SET(DCC_INDEPENDENT_64B, 1) | 274 AMD_FMT_MOD_SET(DCC_MAX_COMPRESSED_BLOCK, AMD_FMT_MOD_DCC_BLOCK_64B) | 275 AMD_FMT_MOD_SET(DCC_CONSTANT_ENCODE, info->has_dcc_constant_encode) | 276 AMD_FMT_MOD_SET(PIPE_XOR_BITS, pipe_xor_bits) | 277 AMD_FMT_MOD_SET(BANK_XOR_BITS, bank_xor_bits); 278 279 ADD_MOD(AMD_FMT_MOD | 280 AMD_FMT_MOD_SET(TILE, AMD_FMT_MOD_TILE_GFX9_64K_D_X) | 281 AMD_FMT_MOD_SET(TILE_VERSION, AMD_FMT_MOD_TILE_VER_GFX9) | 282 AMD_FMT_MOD_SET(DCC_PIPE_ALIGN, 1) | 283 common_dcc | 284 AMD_FMT_MOD_SET(PIPE, pipes) | 285 AMD_FMT_MOD_SET(RB, rb)) 286 287 ADD_MOD(AMD_FMT_MOD | 288 AMD_FMT_MOD_SET(TILE, AMD_FMT_MOD_TILE_GFX9_64K_S_X) | 289 AMD_FMT_MOD_SET(TILE_VERSION, AMD_FMT_MOD_TILE_VER_GFX9) | 290 AMD_FMT_MOD_SET(DCC_PIPE_ALIGN, 1) | 291 common_dcc | 292 AMD_FMT_MOD_SET(PIPE, pipes) | 293 AMD_FMT_MOD_SET(RB, rb)) 294 295 if (util_format_get_blocksizebits(format) == 32) { 296 if (info->max_render_backends == 1) { 297 ADD_MOD(AMD_FMT_MOD | 298 AMD_FMT_MOD_SET(TILE, AMD_FMT_MOD_TILE_GFX9_64K_S_X) | 299 AMD_FMT_MOD_SET(TILE_VERSION, AMD_FMT_MOD_TILE_VER_GFX9) | 300 common_dcc); 301 } 302 303 304 ADD_MOD(AMD_FMT_MOD | 305 AMD_FMT_MOD_SET(TILE, AMD_FMT_MOD_TILE_GFX9_64K_S_X) | 306 AMD_FMT_MOD_SET(TILE_VERSION, AMD_FMT_MOD_TILE_VER_GFX9) | 307 AMD_FMT_MOD_SET(DCC_RETILE, 1) | 308 common_dcc | 309 AMD_FMT_MOD_SET(PIPE, pipes) | 310 AMD_FMT_MOD_SET(RB, rb)) 311 } 312 313 314 ADD_MOD(AMD_FMT_MOD | 315 AMD_FMT_MOD_SET(TILE, AMD_FMT_MOD_TILE_GFX9_64K_D_X) | 316 AMD_FMT_MOD_SET(TILE_VERSION, AMD_FMT_MOD_TILE_VER_GFX9) | 317 AMD_FMT_MOD_SET(PIPE_XOR_BITS, pipe_xor_bits) | 318 AMD_FMT_MOD_SET(BANK_XOR_BITS, bank_xor_bits)); 319 320 ADD_MOD(AMD_FMT_MOD | 321 AMD_FMT_MOD_SET(TILE, AMD_FMT_MOD_TILE_GFX9_64K_S_X) | 322 AMD_FMT_MOD_SET(TILE_VERSION, AMD_FMT_MOD_TILE_VER_GFX9) | 323 AMD_FMT_MOD_SET(PIPE_XOR_BITS, pipe_xor_bits) | 324 AMD_FMT_MOD_SET(BANK_XOR_BITS, bank_xor_bits)); 325 326 ADD_MOD(AMD_FMT_MOD | 327 AMD_FMT_MOD_SET(TILE, AMD_FMT_MOD_TILE_GFX9_64K_D) | 328 AMD_FMT_MOD_SET(TILE_VERSION, AMD_FMT_MOD_TILE_VER_GFX9)); 329 330 ADD_MOD(AMD_FMT_MOD | 331 AMD_FMT_MOD_SET(TILE, AMD_FMT_MOD_TILE_GFX9_64K_S) | 332 AMD_FMT_MOD_SET(TILE_VERSION, AMD_FMT_MOD_TILE_VER_GFX9)); 333 334 ADD_MOD(DRM_FORMAT_MOD_LINEAR) 335 break; 336 } 337 case GFX10: 338 case GFX10_3: { 339 bool rbplus = info->chip_class >= GFX10_3; 340 unsigned pipe_xor_bits = G_0098F8_NUM_PIPES(info->gb_addr_config); 341 unsigned pkrs = rbplus ? G_0098F8_NUM_PKRS(info->gb_addr_config) : 0; 342 343 unsigned version = rbplus ? AMD_FMT_MOD_TILE_VER_GFX10_RBPLUS : AMD_FMT_MOD_TILE_VER_GFX10; 344 uint64_t common_dcc = AMD_FMT_MOD_SET(TILE_VERSION, version) | 345 AMD_FMT_MOD_SET(TILE, AMD_FMT_MOD_TILE_GFX9_64K_R_X) | 346 AMD_FMT_MOD_SET(DCC, 1) | 347 AMD_FMT_MOD_SET(DCC_CONSTANT_ENCODE, 1) | 348 AMD_FMT_MOD_SET(PIPE_XOR_BITS, pipe_xor_bits) | 349 AMD_FMT_MOD_SET(PACKERS, pkrs); 350 351 ADD_MOD(AMD_FMT_MOD | common_dcc | 352 AMD_FMT_MOD_SET(DCC_PIPE_ALIGN, 1) | 353 AMD_FMT_MOD_SET(DCC_INDEPENDENT_128B, 1) | 354 AMD_FMT_MOD_SET(DCC_MAX_COMPRESSED_BLOCK, AMD_FMT_MOD_DCC_BLOCK_128B)) 355 356 if (info->chip_class >= GFX10_3) { 357 if (info->max_render_backends == 1) { 358 ADD_MOD(AMD_FMT_MOD | common_dcc | 359 AMD_FMT_MOD_SET(DCC_INDEPENDENT_128B, 1) | 360 AMD_FMT_MOD_SET(DCC_MAX_COMPRESSED_BLOCK, AMD_FMT_MOD_DCC_BLOCK_128B)) 361 } 362 363 ADD_MOD(AMD_FMT_MOD | common_dcc | 364 AMD_FMT_MOD_SET(DCC_RETILE, 1) | 365 AMD_FMT_MOD_SET(DCC_INDEPENDENT_128B, 1) | 366 AMD_FMT_MOD_SET(DCC_MAX_COMPRESSED_BLOCK, AMD_FMT_MOD_DCC_BLOCK_128B)) 367 } 368 369 if (info->family == CHIP_NAVI12 || info->family == CHIP_NAVI14 || info->chip_class >= GFX10_3) { 370 bool independent_128b = info->chip_class >= GFX10_3; 371 372 if (info->max_render_backends == 1) { 373 ADD_MOD(AMD_FMT_MOD | common_dcc | 374 AMD_FMT_MOD_SET(DCC_INDEPENDENT_64B, 1) | 375 AMD_FMT_MOD_SET(DCC_INDEPENDENT_128B, independent_128b) | 376 AMD_FMT_MOD_SET(DCC_MAX_COMPRESSED_BLOCK, AMD_FMT_MOD_DCC_BLOCK_64B)) 377 } 378 379 ADD_MOD(AMD_FMT_MOD | common_dcc | 380 AMD_FMT_MOD_SET(DCC_RETILE, 1) | 381 AMD_FMT_MOD_SET(DCC_INDEPENDENT_64B, 1) | 382 AMD_FMT_MOD_SET(DCC_INDEPENDENT_128B, independent_128b) | 383 AMD_FMT_MOD_SET(DCC_MAX_COMPRESSED_BLOCK, AMD_FMT_MOD_DCC_BLOCK_64B)) 384 } 385 386 ADD_MOD(AMD_FMT_MOD | 387 AMD_FMT_MOD_SET(TILE_VERSION, version) | 388 AMD_FMT_MOD_SET(TILE, AMD_FMT_MOD_TILE_GFX9_64K_R_X) | 389 AMD_FMT_MOD_SET(PIPE_XOR_BITS, pipe_xor_bits) | 390 AMD_FMT_MOD_SET(PACKERS, pkrs)) 391 392 ADD_MOD(AMD_FMT_MOD | 393 AMD_FMT_MOD_SET(TILE_VERSION, AMD_FMT_MOD_TILE_VER_GFX10) | 394 AMD_FMT_MOD_SET(TILE, AMD_FMT_MOD_TILE_GFX9_64K_S_X) | 395 AMD_FMT_MOD_SET(PIPE_XOR_BITS, pipe_xor_bits)) 396 397 if (util_format_get_blocksizebits(format) != 32) { 398 ADD_MOD(AMD_FMT_MOD | 399 AMD_FMT_MOD_SET(TILE, AMD_FMT_MOD_TILE_GFX9_64K_D) | 400 AMD_FMT_MOD_SET(TILE_VERSION, AMD_FMT_MOD_TILE_VER_GFX9)); 401 } 402 403 ADD_MOD(AMD_FMT_MOD | 404 AMD_FMT_MOD_SET(TILE, AMD_FMT_MOD_TILE_GFX9_64K_S) | 405 AMD_FMT_MOD_SET(TILE_VERSION, AMD_FMT_MOD_TILE_VER_GFX9)); 406 407 ADD_MOD(DRM_FORMAT_MOD_LINEAR) 408 break; 409 } 410 default: 411 break; 412 } 413 414#undef ADD_MOD 415 416 if (!mods) { 417 *mod_count = current_mod; 418 return true; 419 } 420 421 bool complete = current_mod <= *mod_count; 422 *mod_count = MIN2(*mod_count, current_mod); 423 return complete; 424} 425 426static void *ADDR_API allocSysMem(const ADDR_ALLOCSYSMEM_INPUT *pInput) 427{ 428 return malloc(pInput->sizeInBytes); 429} 430 431static ADDR_E_RETURNCODE ADDR_API freeSysMem(const ADDR_FREESYSMEM_INPUT *pInput) 432{ 433 free(pInput->pVirtAddr); 434 return ADDR_OK; 435} 436 437struct ac_addrlib *ac_addrlib_create(const struct radeon_info *info, 438 uint64_t *max_alignment) 439{ 440 ADDR_CREATE_INPUT addrCreateInput = {0}; 441 ADDR_CREATE_OUTPUT addrCreateOutput = {0}; 442 ADDR_REGISTER_VALUE regValue = {0}; 443 ADDR_CREATE_FLAGS createFlags = {{0}}; 444 ADDR_GET_MAX_ALIGNMENTS_OUTPUT addrGetMaxAlignmentsOutput = {0}; 445 ADDR_E_RETURNCODE addrRet; 446 447 addrCreateInput.size = sizeof(ADDR_CREATE_INPUT); 448 addrCreateOutput.size = sizeof(ADDR_CREATE_OUTPUT); 449 450 regValue.gbAddrConfig = info->gb_addr_config; 451 createFlags.value = 0; 452 453 addrCreateInput.chipFamily = info->family_id; 454 addrCreateInput.chipRevision = info->chip_external_rev; 455 456 if (addrCreateInput.chipFamily == FAMILY_UNKNOWN) 457 return NULL; 458 459 if (addrCreateInput.chipFamily >= FAMILY_AI) { 460 addrCreateInput.chipEngine = CIASICIDGFXENGINE_ARCTICISLAND; 461 } else { 462 regValue.noOfBanks = info->mc_arb_ramcfg & 0x3; 463 regValue.noOfRanks = (info->mc_arb_ramcfg & 0x4) >> 2; 464 465 regValue.backendDisables = info->enabled_rb_mask; 466 regValue.pTileConfig = info->si_tile_mode_array; 467 regValue.noOfEntries = ARRAY_SIZE(info->si_tile_mode_array); 468 if (addrCreateInput.chipFamily == FAMILY_SI) { 469 regValue.pMacroTileConfig = NULL; 470 regValue.noOfMacroEntries = 0; 471 } else { 472 regValue.pMacroTileConfig = info->cik_macrotile_mode_array; 473 regValue.noOfMacroEntries = ARRAY_SIZE(info->cik_macrotile_mode_array); 474 } 475 476 createFlags.useTileIndex = 1; 477 createFlags.useHtileSliceAlign = 1; 478 479 addrCreateInput.chipEngine = CIASICIDGFXENGINE_SOUTHERNISLAND; 480 } 481 482 addrCreateInput.callbacks.allocSysMem = allocSysMem; 483 addrCreateInput.callbacks.freeSysMem = freeSysMem; 484 addrCreateInput.callbacks.debugPrint = 0; 485 addrCreateInput.createFlags = createFlags; 486 addrCreateInput.regValue = regValue; 487 488 addrRet = AddrCreate(&addrCreateInput, &addrCreateOutput); 489 if (addrRet != ADDR_OK) 490 return NULL; 491 492 if (max_alignment) { 493 addrRet = AddrGetMaxAlignments(addrCreateOutput.hLib, &addrGetMaxAlignmentsOutput); 494 if (addrRet == ADDR_OK) { 495 *max_alignment = addrGetMaxAlignmentsOutput.baseAlign; 496 } 497 } 498 499 struct ac_addrlib *addrlib = calloc(1, sizeof(struct ac_addrlib)); 500 if (!addrlib) { 501 AddrDestroy(addrCreateOutput.hLib); 502 return NULL; 503 } 504 505 addrlib->handle = addrCreateOutput.hLib; 506 return addrlib; 507} 508 509void ac_addrlib_destroy(struct ac_addrlib *addrlib) 510{ 511 AddrDestroy(addrlib->handle); 512 free(addrlib); 513} 514 515void *ac_addrlib_get_handle(struct ac_addrlib *addrlib) 516{ 517 return addrlib->handle; 518} 519 520static int surf_config_sanity(const struct ac_surf_config *config, unsigned flags) 521{ 522 /* FMASK is allocated together with the color surface and can't be 523 * allocated separately. 524 */ 525 assert(!(flags & RADEON_SURF_FMASK)); 526 if (flags & RADEON_SURF_FMASK) 527 return -EINVAL; 528 529 /* all dimension must be at least 1 ! */ 530 if (!config->info.width || !config->info.height || !config->info.depth || 531 !config->info.array_size || !config->info.levels) 532 return -EINVAL; 533 534 switch (config->info.samples) { 535 case 0: 536 case 1: 537 case 2: 538 case 4: 539 case 8: 540 break; 541 case 16: 542 if (flags & RADEON_SURF_Z_OR_SBUFFER) 543 return -EINVAL; 544 break; 545 default: 546 return -EINVAL; 547 } 548 549 if (!(flags & RADEON_SURF_Z_OR_SBUFFER)) { 550 switch (config->info.storage_samples) { 551 case 0: 552 case 1: 553 case 2: 554 case 4: 555 case 8: 556 break; 557 default: 558 return -EINVAL; 559 } 560 } 561 562 if (config->is_3d && config->info.array_size > 1) 563 return -EINVAL; 564 if (config->is_cube && config->info.depth > 1) 565 return -EINVAL; 566 567 return 0; 568} 569 570static int gfx6_compute_level(ADDR_HANDLE addrlib, const struct ac_surf_config *config, 571 struct radeon_surf *surf, bool is_stencil, unsigned level, 572 bool compressed, ADDR_COMPUTE_SURFACE_INFO_INPUT *AddrSurfInfoIn, 573 ADDR_COMPUTE_SURFACE_INFO_OUTPUT *AddrSurfInfoOut, 574 ADDR_COMPUTE_DCCINFO_INPUT *AddrDccIn, 575 ADDR_COMPUTE_DCCINFO_OUTPUT *AddrDccOut, 576 ADDR_COMPUTE_HTILE_INFO_INPUT *AddrHtileIn, 577 ADDR_COMPUTE_HTILE_INFO_OUTPUT *AddrHtileOut) 578{ 579 struct legacy_surf_level *surf_level; 580 struct legacy_surf_dcc_level *dcc_level; 581 ADDR_E_RETURNCODE ret; 582 583 AddrSurfInfoIn->mipLevel = level; 584 AddrSurfInfoIn->width = u_minify(config->info.width, level); 585 AddrSurfInfoIn->height = u_minify(config->info.height, level); 586 587 /* Make GFX6 linear surfaces compatible with GFX9 for hybrid graphics, 588 * because GFX9 needs linear alignment of 256 bytes. 589 */ 590 if (config->info.levels == 1 && AddrSurfInfoIn->tileMode == ADDR_TM_LINEAR_ALIGNED && 591 AddrSurfInfoIn->bpp && util_is_power_of_two_or_zero(AddrSurfInfoIn->bpp)) { 592 unsigned alignment = 256 / (AddrSurfInfoIn->bpp / 8); 593 594 AddrSurfInfoIn->width = align(AddrSurfInfoIn->width, alignment); 595 } 596 597 /* addrlib assumes the bytes/pixel is a divisor of 64, which is not 598 * true for r32g32b32 formats. */ 599 if (AddrSurfInfoIn->bpp == 96) { 600 assert(config->info.levels == 1); 601 assert(AddrSurfInfoIn->tileMode == ADDR_TM_LINEAR_ALIGNED); 602 603 /* The least common multiple of 64 bytes and 12 bytes/pixel is 604 * 192 bytes, or 16 pixels. */ 605 AddrSurfInfoIn->width = align(AddrSurfInfoIn->width, 16); 606 } 607 608 if (config->is_3d) 609 AddrSurfInfoIn->numSlices = u_minify(config->info.depth, level); 610 else if (config->is_cube) 611 AddrSurfInfoIn->numSlices = 6; 612 else 613 AddrSurfInfoIn->numSlices = config->info.array_size; 614 615 if (level > 0) { 616 /* Set the base level pitch. This is needed for calculation 617 * of non-zero levels. */ 618 if (is_stencil) 619 AddrSurfInfoIn->basePitch = surf->u.legacy.zs.stencil_level[0].nblk_x; 620 else 621 AddrSurfInfoIn->basePitch = surf->u.legacy.level[0].nblk_x; 622 623 /* Convert blocks to pixels for compressed formats. */ 624 if (compressed) 625 AddrSurfInfoIn->basePitch *= surf->blk_w; 626 } 627 628 ret = AddrComputeSurfaceInfo(addrlib, AddrSurfInfoIn, AddrSurfInfoOut); 629 if (ret != ADDR_OK) { 630 return ret; 631 } 632 633 surf_level = is_stencil ? &surf->u.legacy.zs.stencil_level[level] : &surf->u.legacy.level[level]; 634 dcc_level = &surf->u.legacy.color.dcc_level[level]; 635 surf_level->offset_256B = align64(surf->surf_size, AddrSurfInfoOut->baseAlign) / 256; 636 surf_level->slice_size_dw = AddrSurfInfoOut->sliceSize / 4; 637 surf_level->nblk_x = AddrSurfInfoOut->pitch; 638 surf_level->nblk_y = AddrSurfInfoOut->height; 639 640 switch (AddrSurfInfoOut->tileMode) { 641 case ADDR_TM_LINEAR_ALIGNED: 642 surf_level->mode = RADEON_SURF_MODE_LINEAR_ALIGNED; 643 break; 644 case ADDR_TM_1D_TILED_THIN1: 645 case ADDR_TM_PRT_TILED_THIN1: 646 surf_level->mode = RADEON_SURF_MODE_1D; 647 break; 648 case ADDR_TM_2D_TILED_THIN1: 649 case ADDR_TM_PRT_2D_TILED_THIN1: 650 surf_level->mode = RADEON_SURF_MODE_2D; 651 break; 652 default: 653 assert(0); 654 } 655 656 if (is_stencil) 657 surf->u.legacy.zs.stencil_tiling_index[level] = AddrSurfInfoOut->tileIndex; 658 else 659 surf->u.legacy.tiling_index[level] = AddrSurfInfoOut->tileIndex; 660 661 if (AddrSurfInfoIn->flags.prt) { 662 if (level == 0) { 663 surf->prt_tile_width = AddrSurfInfoOut->pitchAlign; 664 surf->prt_tile_height = AddrSurfInfoOut->heightAlign; 665 } 666 if (surf_level->nblk_x >= surf->prt_tile_width && 667 surf_level->nblk_y >= surf->prt_tile_height) { 668 /* +1 because the current level is not in the miptail */ 669 surf->first_mip_tail_level = level + 1; 670 } 671 } 672 673 surf->surf_size = (uint64_t)surf_level->offset_256B * 256 + AddrSurfInfoOut->surfSize; 674 675 /* Clear DCC fields at the beginning. */ 676 if (!AddrSurfInfoIn->flags.depth && !AddrSurfInfoIn->flags.stencil) 677 dcc_level->dcc_offset = 0; 678 679 /* The previous level's flag tells us if we can use DCC for this level. */ 680 if (AddrSurfInfoIn->flags.dccCompatible && (level == 0 || AddrDccOut->subLvlCompressible)) { 681 bool prev_level_clearable = level == 0 || AddrDccOut->dccRamSizeAligned; 682 683 AddrDccIn->colorSurfSize = AddrSurfInfoOut->surfSize; 684 AddrDccIn->tileMode = AddrSurfInfoOut->tileMode; 685 AddrDccIn->tileInfo = *AddrSurfInfoOut->pTileInfo; 686 AddrDccIn->tileIndex = AddrSurfInfoOut->tileIndex; 687 AddrDccIn->macroModeIndex = AddrSurfInfoOut->macroModeIndex; 688 689 ret = AddrComputeDccInfo(addrlib, AddrDccIn, AddrDccOut); 690 691 if (ret == ADDR_OK) { 692 dcc_level->dcc_offset = surf->meta_size; 693 surf->num_meta_levels = level + 1; 694 surf->meta_size = dcc_level->dcc_offset + AddrDccOut->dccRamSize; 695 surf->meta_alignment_log2 = MAX2(surf->meta_alignment_log2, util_logbase2(AddrDccOut->dccRamBaseAlign)); 696 697 /* If the DCC size of a subresource (1 mip level or 1 slice) 698 * is not aligned, the DCC memory layout is not contiguous for 699 * that subresource, which means we can't use fast clear. 700 * 701 * We only do fast clears for whole mipmap levels. If we did 702 * per-slice fast clears, the same restriction would apply. 703 * (i.e. only compute the slice size and see if it's aligned) 704 * 705 * The last level can be non-contiguous and still be clearable 706 * if it's interleaved with the next level that doesn't exist. 707 */ 708 if (AddrDccOut->dccRamSizeAligned || 709 (prev_level_clearable && level == config->info.levels - 1)) 710 dcc_level->dcc_fast_clear_size = AddrDccOut->dccFastClearSize; 711 else 712 dcc_level->dcc_fast_clear_size = 0; 713 714 /* Compute the DCC slice size because addrlib doesn't 715 * provide this info. As DCC memory is linear (each 716 * slice is the same size) it's easy to compute. 717 */ 718 surf->meta_slice_size = AddrDccOut->dccRamSize / config->info.array_size; 719 720 /* For arrays, we have to compute the DCC info again 721 * with one slice size to get a correct fast clear 722 * size. 723 */ 724 if (config->info.array_size > 1) { 725 AddrDccIn->colorSurfSize = AddrSurfInfoOut->sliceSize; 726 AddrDccIn->tileMode = AddrSurfInfoOut->tileMode; 727 AddrDccIn->tileInfo = *AddrSurfInfoOut->pTileInfo; 728 AddrDccIn->tileIndex = AddrSurfInfoOut->tileIndex; 729 AddrDccIn->macroModeIndex = AddrSurfInfoOut->macroModeIndex; 730 731 ret = AddrComputeDccInfo(addrlib, AddrDccIn, AddrDccOut); 732 if (ret == ADDR_OK) { 733 /* If the DCC memory isn't properly 734 * aligned, the data are interleaved 735 * accross slices. 736 */ 737 if (AddrDccOut->dccRamSizeAligned) 738 dcc_level->dcc_slice_fast_clear_size = AddrDccOut->dccFastClearSize; 739 else 740 dcc_level->dcc_slice_fast_clear_size = 0; 741 } 742 743 if (surf->flags & RADEON_SURF_CONTIGUOUS_DCC_LAYERS && 744 surf->meta_slice_size != dcc_level->dcc_slice_fast_clear_size) { 745 surf->meta_size = 0; 746 surf->num_meta_levels = 0; 747 AddrDccOut->subLvlCompressible = false; 748 } 749 } else { 750 dcc_level->dcc_slice_fast_clear_size = dcc_level->dcc_fast_clear_size; 751 } 752 } 753 } 754 755 /* HTILE. */ 756 if (!is_stencil && AddrSurfInfoIn->flags.depth && surf_level->mode == RADEON_SURF_MODE_2D && 757 level == 0 && !(surf->flags & RADEON_SURF_NO_HTILE)) { 758 AddrHtileIn->flags.tcCompatible = AddrSurfInfoOut->tcCompatible; 759 AddrHtileIn->pitch = AddrSurfInfoOut->pitch; 760 AddrHtileIn->height = AddrSurfInfoOut->height; 761 AddrHtileIn->numSlices = AddrSurfInfoOut->depth; 762 AddrHtileIn->blockWidth = ADDR_HTILE_BLOCKSIZE_8; 763 AddrHtileIn->blockHeight = ADDR_HTILE_BLOCKSIZE_8; 764 AddrHtileIn->pTileInfo = AddrSurfInfoOut->pTileInfo; 765 AddrHtileIn->tileIndex = AddrSurfInfoOut->tileIndex; 766 AddrHtileIn->macroModeIndex = AddrSurfInfoOut->macroModeIndex; 767 768 ret = AddrComputeHtileInfo(addrlib, AddrHtileIn, AddrHtileOut); 769 770 if (ret == ADDR_OK) { 771 surf->meta_size = AddrHtileOut->htileBytes; 772 surf->meta_slice_size = AddrHtileOut->sliceSize; 773 surf->meta_alignment_log2 = util_logbase2(AddrHtileOut->baseAlign); 774 surf->meta_pitch = AddrHtileOut->pitch; 775 surf->num_meta_levels = level + 1; 776 } 777 } 778 779 return 0; 780} 781 782static void gfx6_set_micro_tile_mode(struct radeon_surf *surf, const struct radeon_info *info) 783{ 784 uint32_t tile_mode = info->si_tile_mode_array[surf->u.legacy.tiling_index[0]]; 785 786 if (info->chip_class >= GFX7) 787 surf->micro_tile_mode = G_009910_MICRO_TILE_MODE_NEW(tile_mode); 788 else 789 surf->micro_tile_mode = G_009910_MICRO_TILE_MODE(tile_mode); 790} 791 792static unsigned cik_get_macro_tile_index(struct radeon_surf *surf) 793{ 794 unsigned index, tileb; 795 796 tileb = 8 * 8 * surf->bpe; 797 tileb = MIN2(surf->u.legacy.tile_split, tileb); 798 799 for (index = 0; tileb > 64; index++) 800 tileb >>= 1; 801 802 assert(index < 16); 803 return index; 804} 805 806static bool get_display_flag(const struct ac_surf_config *config, const struct radeon_surf *surf) 807{ 808 unsigned num_channels = config->info.num_channels; 809 unsigned bpe = surf->bpe; 810 811 /* With modifiers the kernel is in charge of whether it is displayable. 812 * We need to ensure at least 32 pixels pitch alignment, but this is 813 * always the case when the blocksize >= 4K. 814 */ 815 if (surf->modifier != DRM_FORMAT_MOD_INVALID) 816 return false; 817 818 if (!config->is_3d && !config->is_cube && !(surf->flags & RADEON_SURF_Z_OR_SBUFFER) && 819 surf->flags & RADEON_SURF_SCANOUT && config->info.samples <= 1 && surf->blk_w <= 2 && 820 surf->blk_h == 1) { 821 /* subsampled */ 822 if (surf->blk_w == 2 && surf->blk_h == 1) 823 return true; 824 825 if (/* RGBA8 or RGBA16F */ 826 (bpe >= 4 && bpe <= 8 && num_channels == 4) || 827 /* R5G6B5 or R5G5B5A1 */ 828 (bpe == 2 && num_channels >= 3) || 829 /* C8 palette */ 830 (bpe == 1 && num_channels == 1)) 831 return true; 832 } 833 return false; 834} 835 836/** 837 * This must be called after the first level is computed. 838 * 839 * Copy surface-global settings like pipe/bank config from level 0 surface 840 * computation, and compute tile swizzle. 841 */ 842static int gfx6_surface_settings(ADDR_HANDLE addrlib, const struct radeon_info *info, 843 const struct ac_surf_config *config, 844 ADDR_COMPUTE_SURFACE_INFO_OUTPUT *csio, struct radeon_surf *surf) 845{ 846 surf->surf_alignment_log2 = util_logbase2(csio->baseAlign); 847 surf->u.legacy.pipe_config = csio->pTileInfo->pipeConfig - 1; 848 gfx6_set_micro_tile_mode(surf, info); 849 850 /* For 2D modes only. */ 851 if (csio->tileMode >= ADDR_TM_2D_TILED_THIN1) { 852 surf->u.legacy.bankw = csio->pTileInfo->bankWidth; 853 surf->u.legacy.bankh = csio->pTileInfo->bankHeight; 854 surf->u.legacy.mtilea = csio->pTileInfo->macroAspectRatio; 855 surf->u.legacy.tile_split = csio->pTileInfo->tileSplitBytes; 856 surf->u.legacy.num_banks = csio->pTileInfo->banks; 857 surf->u.legacy.macro_tile_index = csio->macroModeIndex; 858 } else { 859 surf->u.legacy.macro_tile_index = 0; 860 } 861 862 /* Compute tile swizzle. */ 863 /* TODO: fix tile swizzle with mipmapping for GFX6 */ 864 if ((info->chip_class >= GFX7 || config->info.levels == 1) && config->info.surf_index && 865 surf->u.legacy.level[0].mode == RADEON_SURF_MODE_2D && 866 !(surf->flags & (RADEON_SURF_Z_OR_SBUFFER | RADEON_SURF_SHAREABLE)) && 867 !get_display_flag(config, surf)) { 868 ADDR_COMPUTE_BASE_SWIZZLE_INPUT AddrBaseSwizzleIn = {0}; 869 ADDR_COMPUTE_BASE_SWIZZLE_OUTPUT AddrBaseSwizzleOut = {0}; 870 871 AddrBaseSwizzleIn.size = sizeof(ADDR_COMPUTE_BASE_SWIZZLE_INPUT); 872 AddrBaseSwizzleOut.size = sizeof(ADDR_COMPUTE_BASE_SWIZZLE_OUTPUT); 873 874 AddrBaseSwizzleIn.surfIndex = p_atomic_inc_return(config->info.surf_index) - 1; 875 AddrBaseSwizzleIn.tileIndex = csio->tileIndex; 876 AddrBaseSwizzleIn.macroModeIndex = csio->macroModeIndex; 877 AddrBaseSwizzleIn.pTileInfo = csio->pTileInfo; 878 AddrBaseSwizzleIn.tileMode = csio->tileMode; 879 880 int r = AddrComputeBaseSwizzle(addrlib, &AddrBaseSwizzleIn, &AddrBaseSwizzleOut); 881 if (r != ADDR_OK) 882 return r; 883 884 assert(AddrBaseSwizzleOut.tileSwizzle <= 885 u_bit_consecutive(0, sizeof(surf->tile_swizzle) * 8)); 886 surf->tile_swizzle = AddrBaseSwizzleOut.tileSwizzle; 887 } 888 return 0; 889} 890 891static void ac_compute_cmask(const struct radeon_info *info, const struct ac_surf_config *config, 892 struct radeon_surf *surf) 893{ 894 unsigned pipe_interleave_bytes = info->pipe_interleave_bytes; 895 unsigned num_pipes = info->num_tile_pipes; 896 unsigned cl_width, cl_height; 897 898 if (surf->flags & RADEON_SURF_Z_OR_SBUFFER || surf->is_linear || 899 (config->info.samples >= 2 && !surf->fmask_size)) 900 return; 901 902 assert(info->chip_class <= GFX8); 903 904 switch (num_pipes) { 905 case 2: 906 cl_width = 32; 907 cl_height = 16; 908 break; 909 case 4: 910 cl_width = 32; 911 cl_height = 32; 912 break; 913 case 8: 914 cl_width = 64; 915 cl_height = 32; 916 break; 917 case 16: /* Hawaii */ 918 cl_width = 64; 919 cl_height = 64; 920 break; 921 default: 922 assert(0); 923 return; 924 } 925 926 unsigned base_align = num_pipes * pipe_interleave_bytes; 927 928 unsigned width = align(surf->u.legacy.level[0].nblk_x, cl_width * 8); 929 unsigned height = align(surf->u.legacy.level[0].nblk_y, cl_height * 8); 930 unsigned slice_elements = (width * height) / (8 * 8); 931 932 /* Each element of CMASK is a nibble. */ 933 unsigned slice_bytes = slice_elements / 2; 934 935 surf->u.legacy.color.cmask_slice_tile_max = (width * height) / (128 * 128); 936 if (surf->u.legacy.color.cmask_slice_tile_max) 937 surf->u.legacy.color.cmask_slice_tile_max -= 1; 938 939 unsigned num_layers; 940 if (config->is_3d) 941 num_layers = config->info.depth; 942 else if (config->is_cube) 943 num_layers = 6; 944 else 945 num_layers = config->info.array_size; 946 947 surf->cmask_alignment_log2 = util_logbase2(MAX2(256, base_align)); 948 surf->cmask_slice_size = align(slice_bytes, base_align); 949 surf->cmask_size = surf->cmask_slice_size * num_layers; 950} 951 952/** 953 * Fill in the tiling information in \p surf based on the given surface config. 954 * 955 * The following fields of \p surf must be initialized by the caller: 956 * blk_w, blk_h, bpe, flags. 957 */ 958static int gfx6_compute_surface(ADDR_HANDLE addrlib, const struct radeon_info *info, 959 const struct ac_surf_config *config, enum radeon_surf_mode mode, 960 struct radeon_surf *surf) 961{ 962 unsigned level; 963 bool compressed; 964 ADDR_COMPUTE_SURFACE_INFO_INPUT AddrSurfInfoIn = {0}; 965 ADDR_COMPUTE_SURFACE_INFO_OUTPUT AddrSurfInfoOut = {0}; 966 ADDR_COMPUTE_DCCINFO_INPUT AddrDccIn = {0}; 967 ADDR_COMPUTE_DCCINFO_OUTPUT AddrDccOut = {0}; 968 ADDR_COMPUTE_HTILE_INFO_INPUT AddrHtileIn = {0}; 969 ADDR_COMPUTE_HTILE_INFO_OUTPUT AddrHtileOut = {0}; 970 ADDR_TILEINFO AddrTileInfoIn = {0}; 971 ADDR_TILEINFO AddrTileInfoOut = {0}; 972 int r; 973 974 AddrSurfInfoIn.size = sizeof(ADDR_COMPUTE_SURFACE_INFO_INPUT); 975 AddrSurfInfoOut.size = sizeof(ADDR_COMPUTE_SURFACE_INFO_OUTPUT); 976 AddrDccIn.size = sizeof(ADDR_COMPUTE_DCCINFO_INPUT); 977 AddrDccOut.size = sizeof(ADDR_COMPUTE_DCCINFO_OUTPUT); 978 AddrHtileIn.size = sizeof(ADDR_COMPUTE_HTILE_INFO_INPUT); 979 AddrHtileOut.size = sizeof(ADDR_COMPUTE_HTILE_INFO_OUTPUT); 980 AddrSurfInfoOut.pTileInfo = &AddrTileInfoOut; 981 982 compressed = surf->blk_w == 4 && surf->blk_h == 4; 983 984 /* MSAA requires 2D tiling. */ 985 if (config->info.samples > 1) 986 mode = RADEON_SURF_MODE_2D; 987 988 /* DB doesn't support linear layouts. */ 989 if (surf->flags & (RADEON_SURF_Z_OR_SBUFFER) && mode < RADEON_SURF_MODE_1D) 990 mode = RADEON_SURF_MODE_1D; 991 992 /* Set the requested tiling mode. */ 993 switch (mode) { 994 case RADEON_SURF_MODE_LINEAR_ALIGNED: 995 AddrSurfInfoIn.tileMode = ADDR_TM_LINEAR_ALIGNED; 996 break; 997 case RADEON_SURF_MODE_1D: 998 if (surf->flags & RADEON_SURF_PRT) 999 AddrSurfInfoIn.tileMode = ADDR_TM_PRT_TILED_THIN1; 1000 else 1001 AddrSurfInfoIn.tileMode = ADDR_TM_1D_TILED_THIN1; 1002 break; 1003 case RADEON_SURF_MODE_2D: 1004 if (surf->flags & RADEON_SURF_PRT) 1005 AddrSurfInfoIn.tileMode = ADDR_TM_PRT_2D_TILED_THIN1; 1006 else 1007 AddrSurfInfoIn.tileMode = ADDR_TM_2D_TILED_THIN1; 1008 break; 1009 default: 1010 assert(0); 1011 } 1012 1013 /* The format must be set correctly for the allocation of compressed 1014 * textures to work. In other cases, setting the bpp is sufficient. 1015 */ 1016 if (compressed) { 1017 switch (surf->bpe) { 1018 case 8: 1019 AddrSurfInfoIn.format = ADDR_FMT_BC1; 1020 break; 1021 case 16: 1022 AddrSurfInfoIn.format = ADDR_FMT_BC3; 1023 break; 1024 default: 1025 assert(0); 1026 } 1027 } else { 1028 AddrDccIn.bpp = AddrSurfInfoIn.bpp = surf->bpe * 8; 1029 } 1030 1031 AddrDccIn.numSamples = AddrSurfInfoIn.numSamples = MAX2(1, config->info.samples); 1032 AddrSurfInfoIn.tileIndex = -1; 1033 1034 if (!(surf->flags & RADEON_SURF_Z_OR_SBUFFER)) { 1035 AddrDccIn.numSamples = AddrSurfInfoIn.numFrags = MAX2(1, config->info.storage_samples); 1036 } 1037 1038 /* Set the micro tile type. */ 1039 if (surf->flags & RADEON_SURF_SCANOUT) 1040 AddrSurfInfoIn.tileType = ADDR_DISPLAYABLE; 1041 else if (surf->flags & RADEON_SURF_Z_OR_SBUFFER) 1042 AddrSurfInfoIn.tileType = ADDR_DEPTH_SAMPLE_ORDER; 1043 else 1044 AddrSurfInfoIn.tileType = ADDR_NON_DISPLAYABLE; 1045 1046 AddrSurfInfoIn.flags.color = !(surf->flags & RADEON_SURF_Z_OR_SBUFFER); 1047 AddrSurfInfoIn.flags.depth = (surf->flags & RADEON_SURF_ZBUFFER) != 0; 1048 AddrSurfInfoIn.flags.cube = config->is_cube; 1049 AddrSurfInfoIn.flags.display = get_display_flag(config, surf); 1050 AddrSurfInfoIn.flags.pow2Pad = config->info.levels > 1; 1051 AddrSurfInfoIn.flags.tcCompatible = (surf->flags & RADEON_SURF_TC_COMPATIBLE_HTILE) != 0; 1052 AddrSurfInfoIn.flags.prt = (surf->flags & RADEON_SURF_PRT) != 0; 1053 1054 /* Only degrade the tile mode for space if TC-compatible HTILE hasn't been 1055 * requested, because TC-compatible HTILE requires 2D tiling. 1056 */ 1057 AddrSurfInfoIn.flags.opt4Space = !AddrSurfInfoIn.flags.tcCompatible && 1058 !AddrSurfInfoIn.flags.fmask && config->info.samples <= 1 && 1059 !(surf->flags & RADEON_SURF_FORCE_SWIZZLE_MODE); 1060 1061 /* DCC notes: 1062 * - If we add MSAA support, keep in mind that CB can't decompress 8bpp 1063 * with samples >= 4. 1064 * - Mipmapped array textures have low performance (discovered by a closed 1065 * driver team). 1066 */ 1067 AddrSurfInfoIn.flags.dccCompatible = 1068 info->chip_class >= GFX8 && info->has_graphics && /* disable DCC on compute-only chips */ 1069 !(surf->flags & RADEON_SURF_Z_OR_SBUFFER) && !(surf->flags & RADEON_SURF_DISABLE_DCC) && 1070 !compressed && 1071 ((config->info.array_size == 1 && config->info.depth == 1) || config->info.levels == 1); 1072 1073 AddrSurfInfoIn.flags.noStencil = (surf->flags & RADEON_SURF_SBUFFER) == 0; 1074 AddrSurfInfoIn.flags.compressZ = !!(surf->flags & RADEON_SURF_Z_OR_SBUFFER); 1075 1076 /* On GFX7-GFX8, the DB uses the same pitch and tile mode (except tilesplit) 1077 * for Z and stencil. This can cause a number of problems which we work 1078 * around here: 1079 * 1080 * - a depth part that is incompatible with mipmapped texturing 1081 * - at least on Stoney, entirely incompatible Z/S aspects (e.g. 1082 * incorrect tiling applied to the stencil part, stencil buffer 1083 * memory accesses that go out of bounds) even without mipmapping 1084 * 1085 * Some piglit tests that are prone to different types of related 1086 * failures: 1087 * ./bin/ext_framebuffer_multisample-upsample 2 stencil 1088 * ./bin/framebuffer-blit-levels {draw,read} stencil 1089 * ./bin/ext_framebuffer_multisample-unaligned-blit N {depth,stencil} {msaa,upsample,downsample} 1090 * ./bin/fbo-depth-array fs-writes-{depth,stencil} / {depth,stencil}-{clear,layered-clear,draw} 1091 * ./bin/depthstencil-render-miplevels 1024 d=s=z24_s8 1092 */ 1093 int stencil_tile_idx = -1; 1094 1095 if (AddrSurfInfoIn.flags.depth && !AddrSurfInfoIn.flags.noStencil && 1096 (config->info.levels > 1 || info->family == CHIP_STONEY)) { 1097 /* Compute stencilTileIdx that is compatible with the (depth) 1098 * tileIdx. This degrades the depth surface if necessary to 1099 * ensure that a matching stencilTileIdx exists. */ 1100 AddrSurfInfoIn.flags.matchStencilTileCfg = 1; 1101 1102 /* Keep the depth mip-tail compatible with texturing. */ 1103 AddrSurfInfoIn.flags.noStencil = 1; 1104 } 1105 1106 /* Set preferred macrotile parameters. This is usually required 1107 * for shared resources. This is for 2D tiling only. */ 1108 if (!(surf->flags & RADEON_SURF_Z_OR_SBUFFER) && 1109 AddrSurfInfoIn.tileMode >= ADDR_TM_2D_TILED_THIN1 && surf->u.legacy.bankw && 1110 surf->u.legacy.bankh && surf->u.legacy.mtilea && surf->u.legacy.tile_split) { 1111 /* If any of these parameters are incorrect, the calculation 1112 * will fail. */ 1113 AddrTileInfoIn.banks = surf->u.legacy.num_banks; 1114 AddrTileInfoIn.bankWidth = surf->u.legacy.bankw; 1115 AddrTileInfoIn.bankHeight = surf->u.legacy.bankh; 1116 AddrTileInfoIn.macroAspectRatio = surf->u.legacy.mtilea; 1117 AddrTileInfoIn.tileSplitBytes = surf->u.legacy.tile_split; 1118 AddrTileInfoIn.pipeConfig = surf->u.legacy.pipe_config + 1; /* +1 compared to GB_TILE_MODE */ 1119 AddrSurfInfoIn.flags.opt4Space = 0; 1120 AddrSurfInfoIn.pTileInfo = &AddrTileInfoIn; 1121 1122 /* If AddrSurfInfoIn.pTileInfo is set, Addrlib doesn't set 1123 * the tile index, because we are expected to know it if 1124 * we know the other parameters. 1125 * 1126 * This is something that can easily be fixed in Addrlib. 1127 * For now, just figure it out here. 1128 * Note that only 2D_TILE_THIN1 is handled here. 1129 */ 1130 assert(!(surf->flags & RADEON_SURF_Z_OR_SBUFFER)); 1131 assert(AddrSurfInfoIn.tileMode == ADDR_TM_2D_TILED_THIN1); 1132 1133 if (info->chip_class == GFX6) { 1134 if (AddrSurfInfoIn.tileType == ADDR_DISPLAYABLE) { 1135 if (surf->bpe == 2) 1136 AddrSurfInfoIn.tileIndex = 11; /* 16bpp */ 1137 else 1138 AddrSurfInfoIn.tileIndex = 12; /* 32bpp */ 1139 } else { 1140 if (surf->bpe == 1) 1141 AddrSurfInfoIn.tileIndex = 14; /* 8bpp */ 1142 else if (surf->bpe == 2) 1143 AddrSurfInfoIn.tileIndex = 15; /* 16bpp */ 1144 else if (surf->bpe == 4) 1145 AddrSurfInfoIn.tileIndex = 16; /* 32bpp */ 1146 else 1147 AddrSurfInfoIn.tileIndex = 17; /* 64bpp (and 128bpp) */ 1148 } 1149 } else { 1150 /* GFX7 - GFX8 */ 1151 if (AddrSurfInfoIn.tileType == ADDR_DISPLAYABLE) 1152 AddrSurfInfoIn.tileIndex = 10; /* 2D displayable */ 1153 else 1154 AddrSurfInfoIn.tileIndex = 14; /* 2D non-displayable */ 1155 1156 /* Addrlib doesn't set this if tileIndex is forced like above. */ 1157 AddrSurfInfoOut.macroModeIndex = cik_get_macro_tile_index(surf); 1158 } 1159 } 1160 1161 surf->has_stencil = !!(surf->flags & RADEON_SURF_SBUFFER); 1162 surf->num_meta_levels = 0; 1163 surf->surf_size = 0; 1164 surf->meta_size = 0; 1165 surf->meta_slice_size = 0; 1166 surf->meta_alignment_log2 = 0; 1167 1168 const bool only_stencil = 1169 (surf->flags & RADEON_SURF_SBUFFER) && !(surf->flags & RADEON_SURF_ZBUFFER); 1170 1171 /* Calculate texture layout information. */ 1172 if (!only_stencil) { 1173 for (level = 0; level < config->info.levels; level++) { 1174 r = gfx6_compute_level(addrlib, config, surf, false, level, compressed, &AddrSurfInfoIn, 1175 &AddrSurfInfoOut, &AddrDccIn, &AddrDccOut, &AddrHtileIn, 1176 &AddrHtileOut); 1177 if (r) 1178 return r; 1179 1180 if (level > 0) 1181 continue; 1182 1183 if (!AddrSurfInfoOut.tcCompatible) { 1184 AddrSurfInfoIn.flags.tcCompatible = 0; 1185 surf->flags &= ~RADEON_SURF_TC_COMPATIBLE_HTILE; 1186 } 1187 1188 if (AddrSurfInfoIn.flags.matchStencilTileCfg) { 1189 AddrSurfInfoIn.flags.matchStencilTileCfg = 0; 1190 AddrSurfInfoIn.tileIndex = AddrSurfInfoOut.tileIndex; 1191 stencil_tile_idx = AddrSurfInfoOut.stencilTileIdx; 1192 1193 assert(stencil_tile_idx >= 0); 1194 } 1195 1196 r = gfx6_surface_settings(addrlib, info, config, &AddrSurfInfoOut, surf); 1197 if (r) 1198 return r; 1199 } 1200 } 1201 1202 /* Calculate texture layout information for stencil. */ 1203 if (surf->flags & RADEON_SURF_SBUFFER) { 1204 AddrSurfInfoIn.tileIndex = stencil_tile_idx; 1205 AddrSurfInfoIn.bpp = 8; 1206 AddrSurfInfoIn.flags.depth = 0; 1207 AddrSurfInfoIn.flags.stencil = 1; 1208 AddrSurfInfoIn.flags.tcCompatible = 0; 1209 /* This will be ignored if AddrSurfInfoIn.pTileInfo is NULL. */ 1210 AddrTileInfoIn.tileSplitBytes = surf->u.legacy.stencil_tile_split; 1211 1212 for (level = 0; level < config->info.levels; level++) { 1213 r = gfx6_compute_level(addrlib, config, surf, true, level, compressed, &AddrSurfInfoIn, 1214 &AddrSurfInfoOut, &AddrDccIn, &AddrDccOut, NULL, NULL); 1215 if (r) 1216 return r; 1217 1218 /* DB uses the depth pitch for both stencil and depth. */ 1219 if (!only_stencil) { 1220 if (surf->u.legacy.zs.stencil_level[level].nblk_x != surf->u.legacy.level[level].nblk_x) 1221 surf->u.legacy.stencil_adjusted = true; 1222 } else { 1223 surf->u.legacy.level[level].nblk_x = surf->u.legacy.zs.stencil_level[level].nblk_x; 1224 } 1225 1226 if (level == 0) { 1227 if (only_stencil) { 1228 r = gfx6_surface_settings(addrlib, info, config, &AddrSurfInfoOut, surf); 1229 if (r) 1230 return r; 1231 } 1232 1233 /* For 2D modes only. */ 1234 if (AddrSurfInfoOut.tileMode >= ADDR_TM_2D_TILED_THIN1) { 1235 surf->u.legacy.stencil_tile_split = AddrSurfInfoOut.pTileInfo->tileSplitBytes; 1236 } 1237 } 1238 } 1239 } 1240 1241 /* Compute FMASK. */ 1242 if (config->info.samples >= 2 && AddrSurfInfoIn.flags.color && info->has_graphics && 1243 !(surf->flags & RADEON_SURF_NO_FMASK)) { 1244 ADDR_COMPUTE_FMASK_INFO_INPUT fin = {0}; 1245 ADDR_COMPUTE_FMASK_INFO_OUTPUT fout = {0}; 1246 ADDR_TILEINFO fmask_tile_info = {0}; 1247 1248 fin.size = sizeof(fin); 1249 fout.size = sizeof(fout); 1250 1251 fin.tileMode = AddrSurfInfoOut.tileMode; 1252 fin.pitch = AddrSurfInfoOut.pitch; 1253 fin.height = config->info.height; 1254 fin.numSlices = AddrSurfInfoIn.numSlices; 1255 fin.numSamples = AddrSurfInfoIn.numSamples; 1256 fin.numFrags = AddrSurfInfoIn.numFrags; 1257 fin.tileIndex = -1; 1258 fout.pTileInfo = &fmask_tile_info; 1259 1260 r = AddrComputeFmaskInfo(addrlib, &fin, &fout); 1261 if (r) 1262 return r; 1263 1264 surf->fmask_size = fout.fmaskBytes; 1265 surf->fmask_alignment_log2 = util_logbase2(fout.baseAlign); 1266 surf->fmask_slice_size = fout.sliceSize; 1267 surf->fmask_tile_swizzle = 0; 1268 1269 surf->u.legacy.color.fmask.slice_tile_max = (fout.pitch * fout.height) / 64; 1270 if (surf->u.legacy.color.fmask.slice_tile_max) 1271 surf->u.legacy.color.fmask.slice_tile_max -= 1; 1272 1273 surf->u.legacy.color.fmask.tiling_index = fout.tileIndex; 1274 surf->u.legacy.color.fmask.bankh = fout.pTileInfo->bankHeight; 1275 surf->u.legacy.color.fmask.pitch_in_pixels = fout.pitch; 1276 1277 /* Compute tile swizzle for FMASK. */ 1278 if (config->info.fmask_surf_index && !(surf->flags & RADEON_SURF_SHAREABLE)) { 1279 ADDR_COMPUTE_BASE_SWIZZLE_INPUT xin = {0}; 1280 ADDR_COMPUTE_BASE_SWIZZLE_OUTPUT xout = {0}; 1281 1282 xin.size = sizeof(ADDR_COMPUTE_BASE_SWIZZLE_INPUT); 1283 xout.size = sizeof(ADDR_COMPUTE_BASE_SWIZZLE_OUTPUT); 1284 1285 /* This counter starts from 1 instead of 0. */ 1286 xin.surfIndex = p_atomic_inc_return(config->info.fmask_surf_index); 1287 xin.tileIndex = fout.tileIndex; 1288 xin.macroModeIndex = fout.macroModeIndex; 1289 xin.pTileInfo = fout.pTileInfo; 1290 xin.tileMode = fin.tileMode; 1291 1292 int r = AddrComputeBaseSwizzle(addrlib, &xin, &xout); 1293 if (r != ADDR_OK) 1294 return r; 1295 1296 assert(xout.tileSwizzle <= u_bit_consecutive(0, sizeof(surf->tile_swizzle) * 8)); 1297 surf->fmask_tile_swizzle = xout.tileSwizzle; 1298 } 1299 } 1300 1301 /* Recalculate the whole DCC miptree size including disabled levels. 1302 * This is what addrlib does, but calling addrlib would be a lot more 1303 * complicated. 1304 */ 1305 if (!(surf->flags & RADEON_SURF_Z_OR_SBUFFER) && surf->meta_size && config->info.levels > 1) { 1306 /* The smallest miplevels that are never compressed by DCC 1307 * still read the DCC buffer via TC if the base level uses DCC, 1308 * and for some reason the DCC buffer needs to be larger if 1309 * the miptree uses non-zero tile_swizzle. Otherwise there are 1310 * VM faults. 1311 * 1312 * "dcc_alignment * 4" was determined by trial and error. 1313 */ 1314 surf->meta_size = align64(surf->surf_size >> 8, (1 << surf->meta_alignment_log2) * 4); 1315 } 1316 1317 /* Make sure HTILE covers the whole miptree, because the shader reads 1318 * TC-compatible HTILE even for levels where it's disabled by DB. 1319 */ 1320 if (surf->flags & (RADEON_SURF_Z_OR_SBUFFER | RADEON_SURF_TC_COMPATIBLE_HTILE) && 1321 surf->meta_size && config->info.levels > 1) { 1322 /* MSAA can't occur with levels > 1, so ignore the sample count. */ 1323 const unsigned total_pixels = surf->surf_size / surf->bpe; 1324 const unsigned htile_block_size = 8 * 8; 1325 const unsigned htile_element_size = 4; 1326 1327 surf->meta_size = (total_pixels / htile_block_size) * htile_element_size; 1328 surf->meta_size = align(surf->meta_size, 1 << surf->meta_alignment_log2); 1329 } else if (surf->flags & RADEON_SURF_Z_OR_SBUFFER && !surf->meta_size) { 1330 /* Unset this if HTILE is not present. */ 1331 surf->flags &= ~RADEON_SURF_TC_COMPATIBLE_HTILE; 1332 } 1333 1334 surf->is_linear = surf->u.legacy.level[0].mode == RADEON_SURF_MODE_LINEAR_ALIGNED; 1335 surf->is_displayable = surf->is_linear || surf->micro_tile_mode == RADEON_MICRO_MODE_DISPLAY || 1336 surf->micro_tile_mode == RADEON_MICRO_MODE_RENDER; 1337 1338 /* The rotated micro tile mode doesn't work if both CMASK and RB+ are 1339 * used at the same time. This case is not currently expected to occur 1340 * because we don't use rotated. Enforce this restriction on all chips 1341 * to facilitate testing. 1342 */ 1343 if (surf->micro_tile_mode == RADEON_MICRO_MODE_RENDER) { 1344 assert(!"rotate micro tile mode is unsupported"); 1345 return ADDR_ERROR; 1346 } 1347 1348 ac_compute_cmask(info, config, surf); 1349 return 0; 1350} 1351 1352/* This is only called when expecting a tiled layout. */ 1353static int gfx9_get_preferred_swizzle_mode(ADDR_HANDLE addrlib, const struct radeon_info *info, 1354 struct radeon_surf *surf, 1355 ADDR2_COMPUTE_SURFACE_INFO_INPUT *in, bool is_fmask, 1356 AddrSwizzleMode *swizzle_mode) 1357{ 1358 ADDR_E_RETURNCODE ret; 1359 ADDR2_GET_PREFERRED_SURF_SETTING_INPUT sin = {0}; 1360 ADDR2_GET_PREFERRED_SURF_SETTING_OUTPUT sout = {0}; 1361 1362 sin.size = sizeof(ADDR2_GET_PREFERRED_SURF_SETTING_INPUT); 1363 sout.size = sizeof(ADDR2_GET_PREFERRED_SURF_SETTING_OUTPUT); 1364 1365 sin.flags = in->flags; 1366 sin.resourceType = in->resourceType; 1367 sin.format = in->format; 1368 sin.resourceLoction = ADDR_RSRC_LOC_INVIS; 1369 /* TODO: We could allow some of these: */ 1370 sin.forbiddenBlock.micro = 1; /* don't allow the 256B swizzle modes */ 1371 sin.forbiddenBlock.var = 1; /* don't allow the variable-sized swizzle modes */ 1372 sin.bpp = in->bpp; 1373 sin.width = in->width; 1374 sin.height = in->height; 1375 sin.numSlices = in->numSlices; 1376 sin.numMipLevels = in->numMipLevels; 1377 sin.numSamples = in->numSamples; 1378 sin.numFrags = in->numFrags; 1379 1380 if (is_fmask) { 1381 sin.flags.display = 0; 1382 sin.flags.color = 0; 1383 sin.flags.fmask = 1; 1384 } 1385 1386 /* With PRT images we want to force 64 KiB block size so that the image 1387 * created is consistent with the format properties returned in Vulkan 1388 * independent of the image. */ 1389 if (sin.flags.prt) { 1390 sin.forbiddenBlock.macroThin4KB = 1; 1391 sin.forbiddenBlock.macroThick4KB = 1; 1392 sin.forbiddenBlock.linear = 1; 1393 } 1394 1395 if (surf->flags & RADEON_SURF_FORCE_MICRO_TILE_MODE) { 1396 sin.forbiddenBlock.linear = 1; 1397 1398 if (surf->micro_tile_mode == RADEON_MICRO_MODE_DISPLAY) 1399 sin.preferredSwSet.sw_D = 1; 1400 else if (surf->micro_tile_mode == RADEON_MICRO_MODE_STANDARD) 1401 sin.preferredSwSet.sw_S = 1; 1402 else if (surf->micro_tile_mode == RADEON_MICRO_MODE_DEPTH) 1403 sin.preferredSwSet.sw_Z = 1; 1404 else if (surf->micro_tile_mode == RADEON_MICRO_MODE_RENDER) 1405 sin.preferredSwSet.sw_R = 1; 1406 } 1407 1408 if (info->chip_class >= GFX10 && in->resourceType == ADDR_RSRC_TEX_3D && in->numSlices > 1) { 1409 /* 3D textures should use S swizzle modes for the best performance. 1410 * THe only exception is 3D render targets, which prefer 64KB_D_X. 1411 * 1412 * 3D texture sampler performance with a very large 3D texture: 1413 * ADDR_SW_64KB_R_X = 19 FPS (DCC on), 26 FPS (DCC off) 1414 * ADDR_SW_64KB_Z_X = 25 FPS 1415 * ADDR_SW_64KB_D_X = 53 FPS 1416 * ADDR_SW_4KB_S = 53 FPS 1417 * ADDR_SW_64KB_S = 53 FPS 1418 * ADDR_SW_64KB_S_T = 61 FPS 1419 * ADDR_SW_4KB_S_X = 63 FPS 1420 * ADDR_SW_64KB_S_X = 62 FPS 1421 */ 1422 sin.preferredSwSet.sw_S = 1; 1423 } 1424 1425 ret = Addr2GetPreferredSurfaceSetting(addrlib, &sin, &sout); 1426 if (ret != ADDR_OK) 1427 return ret; 1428 1429 *swizzle_mode = sout.swizzleMode; 1430 return 0; 1431} 1432 1433static bool is_dcc_supported_by_CB(const struct radeon_info *info, unsigned sw_mode) 1434{ 1435 if (info->chip_class >= GFX10) 1436 return sw_mode == ADDR_SW_64KB_Z_X || sw_mode == ADDR_SW_64KB_R_X; 1437 1438 return sw_mode != ADDR_SW_LINEAR; 1439} 1440 1441ASSERTED static bool is_dcc_supported_by_L2(const struct radeon_info *info, 1442 const struct radeon_surf *surf) 1443{ 1444 if (info->chip_class <= GFX9) { 1445 /* Only independent 64B blocks are supported. */ 1446 return surf->u.gfx9.color.dcc.independent_64B_blocks && !surf->u.gfx9.color.dcc.independent_128B_blocks && 1447 surf->u.gfx9.color.dcc.max_compressed_block_size == V_028C78_MAX_BLOCK_SIZE_64B; 1448 } 1449 1450 if (info->family == CHIP_NAVI10) { 1451 /* Only independent 128B blocks are supported. */ 1452 return !surf->u.gfx9.color.dcc.independent_64B_blocks && surf->u.gfx9.color.dcc.independent_128B_blocks && 1453 surf->u.gfx9.color.dcc.max_compressed_block_size <= V_028C78_MAX_BLOCK_SIZE_128B; 1454 } 1455 1456 if (info->family == CHIP_NAVI12 || info->family == CHIP_NAVI14) { 1457 /* Either 64B or 128B can be used, but not both. 1458 * If 64B is used, DCC image stores are unsupported. 1459 */ 1460 return surf->u.gfx9.color.dcc.independent_64B_blocks != surf->u.gfx9.color.dcc.independent_128B_blocks && 1461 (!surf->u.gfx9.color.dcc.independent_64B_blocks || 1462 surf->u.gfx9.color.dcc.max_compressed_block_size == V_028C78_MAX_BLOCK_SIZE_64B) && 1463 (!surf->u.gfx9.color.dcc.independent_128B_blocks || 1464 surf->u.gfx9.color.dcc.max_compressed_block_size <= V_028C78_MAX_BLOCK_SIZE_128B); 1465 } 1466 1467 /* 128B is recommended, but 64B can be set too if needed for 4K by DCN. 1468 * Since there is no reason to ever disable 128B, require it. 1469 * If 64B is used, DCC image stores are unsupported. 1470 */ 1471 return surf->u.gfx9.color.dcc.independent_128B_blocks && 1472 surf->u.gfx9.color.dcc.max_compressed_block_size <= V_028C78_MAX_BLOCK_SIZE_128B; 1473} 1474 1475static bool gfx10_DCN_requires_independent_64B_blocks(const struct radeon_info *info, 1476 const struct ac_surf_config *config) 1477{ 1478 assert(info->chip_class >= GFX10); 1479 1480 /* Older kernels have buggy DAL. */ 1481 if (info->drm_minor <= 43) 1482 return true; 1483 1484 /* For 4K, DCN requires INDEPENDENT_64B_BLOCKS = 1 and MAX_COMPRESSED_BLOCK_SIZE = 64B. */ 1485 return config->info.width > 2560 || config->info.height > 2560; 1486} 1487 1488void ac_modifier_max_extent(const struct radeon_info *info, 1489 uint64_t modifier, uint32_t *width, uint32_t *height) 1490{ 1491 if (ac_modifier_has_dcc(modifier)) { 1492 bool independent_64B_blocks = AMD_FMT_MOD_GET(DCC_INDEPENDENT_64B, modifier); 1493 1494 if (info->chip_class >= GFX10 && !independent_64B_blocks) { 1495 /* For 4K, DCN requires INDEPENDENT_64B_BLOCKS = 1 and MAX_COMPRESSED_BLOCK_SIZE = 64B. */ 1496 *width = 2560; 1497 *height = 2560; 1498 } else { 1499 /* DCC is not supported on surfaces above resolutions af 5760. */ 1500 *width = 5760; 1501 *height = 5760; 1502 } 1503 } else { 1504 /* Non-dcc modifiers */ 1505 *width = 16384; 1506 *height = 16384; 1507 } 1508} 1509 1510static bool is_dcc_supported_by_DCN(const struct radeon_info *info, 1511 const struct ac_surf_config *config, 1512 const struct radeon_surf *surf, bool rb_aligned, 1513 bool pipe_aligned) 1514{ 1515 if (!info->use_display_dcc_unaligned && !info->use_display_dcc_with_retile_blit) 1516 return false; 1517 1518 /* 16bpp and 64bpp are more complicated, so they are disallowed for now. */ 1519 if (surf->bpe != 4) 1520 return false; 1521 1522 /* Handle unaligned DCC. */ 1523 if (info->use_display_dcc_unaligned && (rb_aligned || pipe_aligned)) 1524 return false; 1525 1526 /* Big resolutions don't support DCC. */ 1527 if (config->info.width > 5760 || config->info.height > 5760) 1528 return false; 1529 1530 switch (info->chip_class) { 1531 case GFX9: 1532 /* There are more constraints, but we always set 1533 * INDEPENDENT_64B_BLOCKS = 1 and MAX_COMPRESSED_BLOCK_SIZE = 64B, 1534 * which always works. 1535 */ 1536 assert(surf->u.gfx9.color.dcc.independent_64B_blocks && 1537 surf->u.gfx9.color.dcc.max_compressed_block_size == V_028C78_MAX_BLOCK_SIZE_64B); 1538 return true; 1539 case GFX10: 1540 case GFX10_3: 1541 /* DCN requires INDEPENDENT_128B_BLOCKS = 0 only on Navi1x. */ 1542 if (info->chip_class == GFX10 && surf->u.gfx9.color.dcc.independent_128B_blocks) 1543 return false; 1544 1545 return (!gfx10_DCN_requires_independent_64B_blocks(info, config) || 1546 (surf->u.gfx9.color.dcc.independent_64B_blocks && 1547 surf->u.gfx9.color.dcc.max_compressed_block_size == V_028C78_MAX_BLOCK_SIZE_64B)); 1548 default: 1549 unreachable("unhandled chip"); 1550 return false; 1551 } 1552} 1553 1554static void ac_copy_dcc_equation(const struct radeon_info *info, 1555 ADDR2_COMPUTE_DCCINFO_OUTPUT *dcc, 1556 struct gfx9_meta_equation *equation) 1557{ 1558 equation->meta_block_width = dcc->metaBlkWidth; 1559 equation->meta_block_height = dcc->metaBlkHeight; 1560 equation->meta_block_depth = dcc->metaBlkDepth; 1561 1562 if (info->chip_class >= GFX10) { 1563 /* gfx9_meta_equation doesn't store the first 4 and the last 8 elements. They must be 0. */ 1564 for (unsigned i = 0; i < 4; i++) 1565 assert(dcc->equation.gfx10_bits[i] == 0); 1566 1567 for (unsigned i = ARRAY_SIZE(equation->u.gfx10_bits) + 4; i < 68; i++) 1568 assert(dcc->equation.gfx10_bits[i] == 0); 1569 1570 memcpy(equation->u.gfx10_bits, dcc->equation.gfx10_bits + 4, 1571 sizeof(equation->u.gfx10_bits)); 1572 } else { 1573 assert(dcc->equation.gfx9.num_bits <= ARRAY_SIZE(equation->u.gfx9.bit)); 1574 1575 equation->u.gfx9.num_bits = dcc->equation.gfx9.num_bits; 1576 equation->u.gfx9.num_pipe_bits = dcc->equation.gfx9.numPipeBits; 1577 for (unsigned b = 0; b < ARRAY_SIZE(equation->u.gfx9.bit); b++) { 1578 for (unsigned c = 0; c < ARRAY_SIZE(equation->u.gfx9.bit[b].coord); c++) { 1579 equation->u.gfx9.bit[b].coord[c].dim = dcc->equation.gfx9.bit[b].coord[c].dim; 1580 equation->u.gfx9.bit[b].coord[c].ord = dcc->equation.gfx9.bit[b].coord[c].ord; 1581 } 1582 } 1583 } 1584} 1585 1586static void ac_copy_cmask_equation(const struct radeon_info *info, 1587 ADDR2_COMPUTE_CMASK_INFO_OUTPUT *cmask, 1588 struct gfx9_meta_equation *equation) 1589{ 1590 equation->meta_block_width = cmask->metaBlkWidth; 1591 equation->meta_block_height = cmask->metaBlkHeight; 1592 equation->meta_block_depth = 1; 1593 1594 if (info->chip_class == GFX9) { 1595 assert(cmask->equation.gfx9.num_bits <= ARRAY_SIZE(equation->u.gfx9.bit)); 1596 1597 equation->u.gfx9.num_bits = cmask->equation.gfx9.num_bits; 1598 equation->u.gfx9.num_pipe_bits = cmask->equation.gfx9.numPipeBits; 1599 for (unsigned b = 0; b < ARRAY_SIZE(equation->u.gfx9.bit); b++) { 1600 for (unsigned c = 0; c < ARRAY_SIZE(equation->u.gfx9.bit[b].coord); c++) { 1601 equation->u.gfx9.bit[b].coord[c].dim = cmask->equation.gfx9.bit[b].coord[c].dim; 1602 equation->u.gfx9.bit[b].coord[c].ord = cmask->equation.gfx9.bit[b].coord[c].ord; 1603 } 1604 } 1605 } 1606} 1607 1608static void ac_copy_htile_equation(const struct radeon_info *info, 1609 ADDR2_COMPUTE_HTILE_INFO_OUTPUT *htile, 1610 struct gfx9_meta_equation *equation) 1611{ 1612 equation->meta_block_width = htile->metaBlkWidth; 1613 equation->meta_block_height = htile->metaBlkHeight; 1614 1615 /* gfx9_meta_equation doesn't store the first 8 and the last 4 elements. They must be 0. */ 1616 for (unsigned i = 0; i < 8; i++) 1617 assert(htile->equation.gfx10_bits[i] == 0); 1618 1619 for (unsigned i = ARRAY_SIZE(equation->u.gfx10_bits) + 8; i < 72; i++) 1620 assert(htile->equation.gfx10_bits[i] == 0); 1621 1622 memcpy(equation->u.gfx10_bits, htile->equation.gfx10_bits + 8, 1623 sizeof(equation->u.gfx10_bits)); 1624} 1625 1626static int gfx9_compute_miptree(struct ac_addrlib *addrlib, const struct radeon_info *info, 1627 const struct ac_surf_config *config, struct radeon_surf *surf, 1628 bool compressed, ADDR2_COMPUTE_SURFACE_INFO_INPUT *in) 1629{ 1630 ADDR2_MIP_INFO mip_info[RADEON_SURF_MAX_LEVELS] = {0}; 1631 ADDR2_COMPUTE_SURFACE_INFO_OUTPUT out = {0}; 1632 ADDR_E_RETURNCODE ret; 1633 1634 out.size = sizeof(ADDR2_COMPUTE_SURFACE_INFO_OUTPUT); 1635 out.pMipInfo = mip_info; 1636 1637 ret = Addr2ComputeSurfaceInfo(addrlib->handle, in, &out); 1638 if (ret != ADDR_OK) 1639 return ret; 1640 1641 if (in->flags.prt) { 1642 surf->prt_tile_width = out.blockWidth; 1643 surf->prt_tile_height = out.blockHeight; 1644 1645 for (surf->first_mip_tail_level = 0; surf->first_mip_tail_level < in->numMipLevels; 1646 ++surf->first_mip_tail_level) { 1647 if(mip_info[surf->first_mip_tail_level].pitch < out.blockWidth || 1648 mip_info[surf->first_mip_tail_level].height < out.blockHeight) 1649 break; 1650 } 1651 1652 for (unsigned i = 0; i < in->numMipLevels; i++) { 1653 surf->u.gfx9.prt_level_offset[i] = mip_info[i].macroBlockOffset + mip_info[i].mipTailOffset; 1654 1655 if (info->chip_class >= GFX10) 1656 surf->u.gfx9.prt_level_pitch[i] = mip_info[i].pitch; 1657 else 1658 surf->u.gfx9.prt_level_pitch[i] = out.mipChainPitch; 1659 } 1660 } 1661 1662 if (in->flags.stencil) { 1663 surf->u.gfx9.zs.stencil_swizzle_mode = in->swizzleMode; 1664 surf->u.gfx9.zs.stencil_epitch = 1665 out.epitchIsHeight ? out.mipChainHeight - 1 : out.mipChainPitch - 1; 1666 surf->surf_alignment_log2 = MAX2(surf->surf_alignment_log2, util_logbase2(out.baseAlign)); 1667 surf->u.gfx9.zs.stencil_offset = align(surf->surf_size, out.baseAlign); 1668 surf->surf_size = surf->u.gfx9.zs.stencil_offset + out.surfSize; 1669 return 0; 1670 } 1671 1672 surf->u.gfx9.swizzle_mode = in->swizzleMode; 1673 surf->u.gfx9.epitch = out.epitchIsHeight ? out.mipChainHeight - 1 : out.mipChainPitch - 1; 1674 1675 /* CMASK fast clear uses these even if FMASK isn't allocated. 1676 * FMASK only supports the Z swizzle modes, whose numbers are multiples of 4. 1677 */ 1678 if (!in->flags.depth) { 1679 surf->u.gfx9.color.fmask_swizzle_mode = surf->u.gfx9.swizzle_mode & ~0x3; 1680 surf->u.gfx9.color.fmask_epitch = surf->u.gfx9.epitch; 1681 } 1682 1683 surf->u.gfx9.surf_slice_size = out.sliceSize; 1684 surf->u.gfx9.surf_pitch = out.pitch; 1685 surf->u.gfx9.surf_height = out.height; 1686 surf->surf_size = out.surfSize; 1687 surf->surf_alignment_log2 = util_logbase2(out.baseAlign); 1688 1689 if (!compressed && surf->blk_w > 1 && out.pitch == out.pixelPitch && 1690 surf->u.gfx9.swizzle_mode == ADDR_SW_LINEAR) { 1691 /* Adjust surf_pitch to be in elements units not in pixels */ 1692 surf->u.gfx9.surf_pitch = align(surf->u.gfx9.surf_pitch / surf->blk_w, 256 / surf->bpe); 1693 surf->u.gfx9.epitch = 1694 MAX2(surf->u.gfx9.epitch, surf->u.gfx9.surf_pitch * surf->blk_w - 1); 1695 /* The surface is really a surf->bpe bytes per pixel surface even if we 1696 * use it as a surf->bpe bytes per element one. 1697 * Adjust surf_slice_size and surf_size to reflect the change 1698 * made to surf_pitch. 1699 */ 1700 surf->u.gfx9.surf_slice_size = 1701 MAX2(surf->u.gfx9.surf_slice_size, 1702 surf->u.gfx9.surf_pitch * out.height * surf->bpe * surf->blk_w); 1703 surf->surf_size = surf->u.gfx9.surf_slice_size * in->numSlices; 1704 } 1705 1706 if (in->swizzleMode == ADDR_SW_LINEAR) { 1707 for (unsigned i = 0; i < in->numMipLevels; i++) { 1708 surf->u.gfx9.offset[i] = mip_info[i].offset; 1709 surf->u.gfx9.pitch[i] = mip_info[i].pitch; 1710 } 1711 } 1712 1713 surf->u.gfx9.base_mip_width = mip_info[0].pitch; 1714 surf->u.gfx9.base_mip_height = mip_info[0].height; 1715 1716 if (in->flags.depth) { 1717 assert(in->swizzleMode != ADDR_SW_LINEAR); 1718 1719 if (surf->flags & RADEON_SURF_NO_HTILE) 1720 return 0; 1721 1722 /* HTILE */ 1723 ADDR2_COMPUTE_HTILE_INFO_INPUT hin = {0}; 1724 ADDR2_COMPUTE_HTILE_INFO_OUTPUT hout = {0}; 1725 ADDR2_META_MIP_INFO meta_mip_info[RADEON_SURF_MAX_LEVELS] = {0}; 1726 1727 hin.size = sizeof(ADDR2_COMPUTE_HTILE_INFO_INPUT); 1728 hout.size = sizeof(ADDR2_COMPUTE_HTILE_INFO_OUTPUT); 1729 hout.pMipInfo = meta_mip_info; 1730 1731 assert(in->flags.metaPipeUnaligned == 0); 1732 assert(in->flags.metaRbUnaligned == 0); 1733 1734 hin.hTileFlags.pipeAligned = 1; 1735 hin.hTileFlags.rbAligned = 1; 1736 hin.depthFlags = in->flags; 1737 hin.swizzleMode = in->swizzleMode; 1738 hin.unalignedWidth = in->width; 1739 hin.unalignedHeight = in->height; 1740 hin.numSlices = in->numSlices; 1741 hin.numMipLevels = in->numMipLevels; 1742 hin.firstMipIdInTail = out.firstMipIdInTail; 1743 1744 ret = Addr2ComputeHtileInfo(addrlib->handle, &hin, &hout); 1745 if (ret != ADDR_OK) 1746 return ret; 1747 1748 surf->meta_size = hout.htileBytes; 1749 surf->meta_slice_size = hout.sliceSize; 1750 surf->meta_alignment_log2 = util_logbase2(hout.baseAlign); 1751 surf->meta_pitch = hout.pitch; 1752 surf->num_meta_levels = in->numMipLevels; 1753 1754 for (unsigned i = 0; i < in->numMipLevels; i++) { 1755 surf->u.gfx9.meta_levels[i].offset = meta_mip_info[i].offset; 1756 surf->u.gfx9.meta_levels[i].size = meta_mip_info[i].sliceSize; 1757 1758 if (meta_mip_info[i].inMiptail) { 1759 /* GFX10 can only compress the first level 1760 * in the mip tail. 1761 */ 1762 surf->num_meta_levels = i + 1; 1763 break; 1764 } 1765 } 1766 1767 if (!surf->num_meta_levels) 1768 surf->meta_size = 0; 1769 1770 if (info->chip_class >= GFX10) 1771 ac_copy_htile_equation(info, &hout, &surf->u.gfx9.zs.htile_equation); 1772 return 0; 1773 } 1774 1775 { 1776 /* Compute tile swizzle for the color surface. 1777 * All *_X and *_T modes can use the swizzle. 1778 */ 1779 if (config->info.surf_index && in->swizzleMode >= ADDR_SW_64KB_Z_T && !out.mipChainInTail && 1780 !(surf->flags & RADEON_SURF_SHAREABLE) && !in->flags.display) { 1781 ADDR2_COMPUTE_PIPEBANKXOR_INPUT xin = {0}; 1782 ADDR2_COMPUTE_PIPEBANKXOR_OUTPUT xout = {0}; 1783 1784 xin.size = sizeof(ADDR2_COMPUTE_PIPEBANKXOR_INPUT); 1785 xout.size = sizeof(ADDR2_COMPUTE_PIPEBANKXOR_OUTPUT); 1786 1787 xin.surfIndex = p_atomic_inc_return(config->info.surf_index) - 1; 1788 xin.flags = in->flags; 1789 xin.swizzleMode = in->swizzleMode; 1790 xin.resourceType = in->resourceType; 1791 xin.format = in->format; 1792 xin.numSamples = in->numSamples; 1793 xin.numFrags = in->numFrags; 1794 1795 ret = Addr2ComputePipeBankXor(addrlib->handle, &xin, &xout); 1796 if (ret != ADDR_OK) 1797 return ret; 1798 1799 assert(xout.pipeBankXor <= u_bit_consecutive(0, sizeof(surf->tile_swizzle) * 8)); 1800 surf->tile_swizzle = xout.pipeBankXor; 1801 } 1802 1803 /* DCC */ 1804 if (info->has_graphics && !(surf->flags & RADEON_SURF_DISABLE_DCC) && !compressed && 1805 is_dcc_supported_by_CB(info, in->swizzleMode) && 1806 (!in->flags.display || 1807 is_dcc_supported_by_DCN(info, config, surf, !in->flags.metaRbUnaligned, 1808 !in->flags.metaPipeUnaligned)) && 1809 (surf->modifier == DRM_FORMAT_MOD_INVALID || 1810 ac_modifier_has_dcc(surf->modifier))) { 1811 ADDR2_COMPUTE_DCCINFO_INPUT din = {0}; 1812 ADDR2_COMPUTE_DCCINFO_OUTPUT dout = {0}; 1813 ADDR2_META_MIP_INFO meta_mip_info[RADEON_SURF_MAX_LEVELS] = {0}; 1814 1815 din.size = sizeof(ADDR2_COMPUTE_DCCINFO_INPUT); 1816 dout.size = sizeof(ADDR2_COMPUTE_DCCINFO_OUTPUT); 1817 dout.pMipInfo = meta_mip_info; 1818 1819 din.dccKeyFlags.pipeAligned = !in->flags.metaPipeUnaligned; 1820 din.dccKeyFlags.rbAligned = !in->flags.metaRbUnaligned; 1821 din.resourceType = in->resourceType; 1822 din.swizzleMode = in->swizzleMode; 1823 din.bpp = in->bpp; 1824 din.unalignedWidth = in->width; 1825 din.unalignedHeight = in->height; 1826 din.numSlices = in->numSlices; 1827 din.numFrags = in->numFrags; 1828 din.numMipLevels = in->numMipLevels; 1829 din.dataSurfaceSize = out.surfSize; 1830 din.firstMipIdInTail = out.firstMipIdInTail; 1831 1832 ret = Addr2ComputeDccInfo(addrlib->handle, &din, &dout); 1833 if (ret != ADDR_OK) 1834 return ret; 1835 1836 surf->u.gfx9.color.dcc.rb_aligned = din.dccKeyFlags.rbAligned; 1837 surf->u.gfx9.color.dcc.pipe_aligned = din.dccKeyFlags.pipeAligned; 1838 surf->u.gfx9.color.dcc_block_width = dout.compressBlkWidth; 1839 surf->u.gfx9.color.dcc_block_height = dout.compressBlkHeight; 1840 surf->u.gfx9.color.dcc_block_depth = dout.compressBlkDepth; 1841 surf->u.gfx9.color.dcc_pitch_max = dout.pitch - 1; 1842 surf->u.gfx9.color.dcc_height = dout.height; 1843 surf->meta_size = dout.dccRamSize; 1844 surf->meta_slice_size = dout.dccRamSliceSize; 1845 surf->meta_alignment_log2 = util_logbase2(dout.dccRamBaseAlign); 1846 surf->num_meta_levels = in->numMipLevels; 1847 1848 /* Disable DCC for levels that are in the mip tail. 1849 * 1850 * There are two issues that this is intended to 1851 * address: 1852 * 1853 * 1. Multiple mip levels may share a cache line. This 1854 * can lead to corruption when switching between 1855 * rendering to different mip levels because the 1856 * RBs don't maintain coherency. 1857 * 1858 * 2. Texturing with metadata after rendering sometimes 1859 * fails with corruption, probably for a similar 1860 * reason. 1861 * 1862 * Working around these issues for all levels in the 1863 * mip tail may be overly conservative, but it's what 1864 * Vulkan does. 1865 * 1866 * Alternative solutions that also work but are worse: 1867 * - Disable DCC entirely. 1868 * - Flush TC L2 after rendering. 1869 */ 1870 for (unsigned i = 0; i < in->numMipLevels; i++) { 1871 surf->u.gfx9.meta_levels[i].offset = meta_mip_info[i].offset; 1872 surf->u.gfx9.meta_levels[i].size = meta_mip_info[i].sliceSize; 1873 1874 if (meta_mip_info[i].inMiptail) { 1875 /* GFX10 can only compress the first level 1876 * in the mip tail. 1877 * 1878 * TODO: Try to do the same thing for gfx9 1879 * if there are no regressions. 1880 */ 1881 if (info->chip_class >= GFX10) 1882 surf->num_meta_levels = i + 1; 1883 else 1884 surf->num_meta_levels = i; 1885 break; 1886 } 1887 } 1888 1889 if (!surf->num_meta_levels) 1890 surf->meta_size = 0; 1891 1892 surf->u.gfx9.color.display_dcc_size = surf->meta_size; 1893 surf->u.gfx9.color.display_dcc_alignment_log2 = surf->meta_alignment_log2; 1894 surf->u.gfx9.color.display_dcc_pitch_max = surf->u.gfx9.color.dcc_pitch_max; 1895 surf->u.gfx9.color.display_dcc_height = surf->u.gfx9.color.dcc_height; 1896 1897 if (in->resourceType == ADDR_RSRC_TEX_2D) 1898 ac_copy_dcc_equation(info, &dout, &surf->u.gfx9.color.dcc_equation); 1899 1900 /* Compute displayable DCC. */ 1901 if (((in->flags.display && info->use_display_dcc_with_retile_blit) || 1902 ac_modifier_has_dcc_retile(surf->modifier)) && surf->num_meta_levels) { 1903 /* Compute displayable DCC info. */ 1904 din.dccKeyFlags.pipeAligned = 0; 1905 din.dccKeyFlags.rbAligned = 0; 1906 1907 assert(din.numSlices == 1); 1908 assert(din.numMipLevels == 1); 1909 assert(din.numFrags == 1); 1910 assert(surf->tile_swizzle == 0); 1911 assert(surf->u.gfx9.color.dcc.pipe_aligned || surf->u.gfx9.color.dcc.rb_aligned); 1912 1913 ret = Addr2ComputeDccInfo(addrlib->handle, &din, &dout); 1914 if (ret != ADDR_OK) 1915 return ret; 1916 1917 surf->u.gfx9.color.display_dcc_size = dout.dccRamSize; 1918 surf->u.gfx9.color.display_dcc_alignment_log2 = util_logbase2(dout.dccRamBaseAlign); 1919 surf->u.gfx9.color.display_dcc_pitch_max = dout.pitch - 1; 1920 surf->u.gfx9.color.display_dcc_height = dout.height; 1921 assert(surf->u.gfx9.color.display_dcc_size <= surf->meta_size); 1922 1923 ac_copy_dcc_equation(info, &dout, &surf->u.gfx9.color.display_dcc_equation); 1924 surf->u.gfx9.color.dcc.display_equation_valid = true; 1925 } 1926 } 1927 1928 /* FMASK */ 1929 if (in->numSamples > 1 && info->has_graphics && !(surf->flags & RADEON_SURF_NO_FMASK)) { 1930 ADDR2_COMPUTE_FMASK_INFO_INPUT fin = {0}; 1931 ADDR2_COMPUTE_FMASK_INFO_OUTPUT fout = {0}; 1932 1933 fin.size = sizeof(ADDR2_COMPUTE_FMASK_INFO_INPUT); 1934 fout.size = sizeof(ADDR2_COMPUTE_FMASK_INFO_OUTPUT); 1935 1936 ret = gfx9_get_preferred_swizzle_mode(addrlib->handle, info, surf, in, true, &fin.swizzleMode); 1937 if (ret != ADDR_OK) 1938 return ret; 1939 1940 fin.unalignedWidth = in->width; 1941 fin.unalignedHeight = in->height; 1942 fin.numSlices = in->numSlices; 1943 fin.numSamples = in->numSamples; 1944 fin.numFrags = in->numFrags; 1945 1946 ret = Addr2ComputeFmaskInfo(addrlib->handle, &fin, &fout); 1947 if (ret != ADDR_OK) 1948 return ret; 1949 1950 surf->u.gfx9.color.fmask_swizzle_mode = fin.swizzleMode; 1951 surf->u.gfx9.color.fmask_epitch = fout.pitch - 1; 1952 surf->fmask_size = fout.fmaskBytes; 1953 surf->fmask_alignment_log2 = util_logbase2(fout.baseAlign); 1954 surf->fmask_slice_size = fout.sliceSize; 1955 1956 /* Compute tile swizzle for the FMASK surface. */ 1957 if (config->info.fmask_surf_index && fin.swizzleMode >= ADDR_SW_64KB_Z_T && 1958 !(surf->flags & RADEON_SURF_SHAREABLE)) { 1959 ADDR2_COMPUTE_PIPEBANKXOR_INPUT xin = {0}; 1960 ADDR2_COMPUTE_PIPEBANKXOR_OUTPUT xout = {0}; 1961 1962 xin.size = sizeof(ADDR2_COMPUTE_PIPEBANKXOR_INPUT); 1963 xout.size = sizeof(ADDR2_COMPUTE_PIPEBANKXOR_OUTPUT); 1964 1965 /* This counter starts from 1 instead of 0. */ 1966 xin.surfIndex = p_atomic_inc_return(config->info.fmask_surf_index); 1967 xin.flags = in->flags; 1968 xin.swizzleMode = fin.swizzleMode; 1969 xin.resourceType = in->resourceType; 1970 xin.format = in->format; 1971 xin.numSamples = in->numSamples; 1972 xin.numFrags = in->numFrags; 1973 1974 ret = Addr2ComputePipeBankXor(addrlib->handle, &xin, &xout); 1975 if (ret != ADDR_OK) 1976 return ret; 1977 1978 assert(xout.pipeBankXor <= u_bit_consecutive(0, sizeof(surf->fmask_tile_swizzle) * 8)); 1979 surf->fmask_tile_swizzle = xout.pipeBankXor; 1980 } 1981 } 1982 1983 /* CMASK -- on GFX10 only for FMASK */ 1984 if (in->swizzleMode != ADDR_SW_LINEAR && in->resourceType == ADDR_RSRC_TEX_2D && 1985 ((info->chip_class <= GFX9 && in->numSamples == 1 && in->flags.metaPipeUnaligned == 0 && 1986 in->flags.metaRbUnaligned == 0) || 1987 (surf->fmask_size && in->numSamples >= 2))) { 1988 ADDR2_COMPUTE_CMASK_INFO_INPUT cin = {0}; 1989 ADDR2_COMPUTE_CMASK_INFO_OUTPUT cout = {0}; 1990 ADDR2_META_MIP_INFO meta_mip_info[RADEON_SURF_MAX_LEVELS] = {0}; 1991 1992 cin.size = sizeof(ADDR2_COMPUTE_CMASK_INFO_INPUT); 1993 cout.size = sizeof(ADDR2_COMPUTE_CMASK_INFO_OUTPUT); 1994 cout.pMipInfo = meta_mip_info; 1995 1996 assert(in->flags.metaPipeUnaligned == 0); 1997 assert(in->flags.metaRbUnaligned == 0); 1998 1999 cin.cMaskFlags.pipeAligned = 1; 2000 cin.cMaskFlags.rbAligned = 1; 2001 cin.resourceType = in->resourceType; 2002 cin.unalignedWidth = in->width; 2003 cin.unalignedHeight = in->height; 2004 cin.numSlices = in->numSlices; 2005 cin.numMipLevels = in->numMipLevels; 2006 cin.firstMipIdInTail = out.firstMipIdInTail; 2007 2008 if (in->numSamples > 1) 2009 cin.swizzleMode = surf->u.gfx9.color.fmask_swizzle_mode; 2010 else 2011 cin.swizzleMode = in->swizzleMode; 2012 2013 ret = Addr2ComputeCmaskInfo(addrlib->handle, &cin, &cout); 2014 if (ret != ADDR_OK) 2015 return ret; 2016 2017 surf->cmask_size = cout.cmaskBytes; 2018 surf->cmask_alignment_log2 = util_logbase2(cout.baseAlign); 2019 surf->cmask_slice_size = cout.sliceSize; 2020 surf->cmask_pitch = cout.pitch; 2021 surf->cmask_height = cout.height; 2022 surf->u.gfx9.color.cmask_level0.offset = meta_mip_info[0].offset; 2023 surf->u.gfx9.color.cmask_level0.size = meta_mip_info[0].sliceSize; 2024 2025 ac_copy_cmask_equation(info, &cout, &surf->u.gfx9.color.cmask_equation); 2026 } 2027 } 2028 2029 return 0; 2030} 2031 2032static int gfx9_compute_surface(struct ac_addrlib *addrlib, const struct radeon_info *info, 2033 const struct ac_surf_config *config, enum radeon_surf_mode mode, 2034 struct radeon_surf *surf) 2035{ 2036 bool compressed; 2037 ADDR2_COMPUTE_SURFACE_INFO_INPUT AddrSurfInfoIn = {0}; 2038 int r; 2039 2040 AddrSurfInfoIn.size = sizeof(ADDR2_COMPUTE_SURFACE_INFO_INPUT); 2041 2042 compressed = surf->blk_w == 4 && surf->blk_h == 4; 2043 2044 /* The format must be set correctly for the allocation of compressed 2045 * textures to work. In other cases, setting the bpp is sufficient. */ 2046 if (compressed) { 2047 switch (surf->bpe) { 2048 case 8: 2049 AddrSurfInfoIn.format = ADDR_FMT_BC1; 2050 break; 2051 case 16: 2052 AddrSurfInfoIn.format = ADDR_FMT_BC3; 2053 break; 2054 default: 2055 assert(0); 2056 } 2057 } else { 2058 switch (surf->bpe) { 2059 case 1: 2060 assert(!(surf->flags & RADEON_SURF_ZBUFFER)); 2061 AddrSurfInfoIn.format = ADDR_FMT_8; 2062 break; 2063 case 2: 2064 assert(surf->flags & RADEON_SURF_ZBUFFER || !(surf->flags & RADEON_SURF_SBUFFER)); 2065 AddrSurfInfoIn.format = ADDR_FMT_16; 2066 break; 2067 case 4: 2068 assert(surf->flags & RADEON_SURF_ZBUFFER || !(surf->flags & RADEON_SURF_SBUFFER)); 2069 AddrSurfInfoIn.format = ADDR_FMT_32; 2070 break; 2071 case 8: 2072 assert(!(surf->flags & RADEON_SURF_Z_OR_SBUFFER)); 2073 AddrSurfInfoIn.format = ADDR_FMT_32_32; 2074 break; 2075 case 12: 2076 assert(!(surf->flags & RADEON_SURF_Z_OR_SBUFFER)); 2077 AddrSurfInfoIn.format = ADDR_FMT_32_32_32; 2078 break; 2079 case 16: 2080 assert(!(surf->flags & RADEON_SURF_Z_OR_SBUFFER)); 2081 AddrSurfInfoIn.format = ADDR_FMT_32_32_32_32; 2082 break; 2083 default: 2084 assert(0); 2085 } 2086 AddrSurfInfoIn.bpp = surf->bpe * 8; 2087 } 2088 2089 bool is_color_surface = !(surf->flags & RADEON_SURF_Z_OR_SBUFFER); 2090 AddrSurfInfoIn.flags.color = is_color_surface && !(surf->flags & RADEON_SURF_NO_RENDER_TARGET); 2091 AddrSurfInfoIn.flags.depth = (surf->flags & RADEON_SURF_ZBUFFER) != 0; 2092 AddrSurfInfoIn.flags.display = get_display_flag(config, surf); 2093 /* flags.texture currently refers to TC-compatible HTILE */ 2094 AddrSurfInfoIn.flags.texture = is_color_surface || surf->flags & RADEON_SURF_TC_COMPATIBLE_HTILE; 2095 AddrSurfInfoIn.flags.opt4space = 1; 2096 AddrSurfInfoIn.flags.prt = (surf->flags & RADEON_SURF_PRT) != 0; 2097 2098 AddrSurfInfoIn.numMipLevels = config->info.levels; 2099 AddrSurfInfoIn.numSamples = MAX2(1, config->info.samples); 2100 AddrSurfInfoIn.numFrags = AddrSurfInfoIn.numSamples; 2101 2102 if (!(surf->flags & RADEON_SURF_Z_OR_SBUFFER)) 2103 AddrSurfInfoIn.numFrags = MAX2(1, config->info.storage_samples); 2104 2105 /* GFX9 doesn't support 1D depth textures, so allocate all 1D textures 2106 * as 2D to avoid having shader variants for 1D vs 2D, so all shaders 2107 * must sample 1D textures as 2D. */ 2108 if (config->is_3d) 2109 AddrSurfInfoIn.resourceType = ADDR_RSRC_TEX_3D; 2110 else if (info->chip_class != GFX9 && config->is_1d) 2111 AddrSurfInfoIn.resourceType = ADDR_RSRC_TEX_1D; 2112 else 2113 AddrSurfInfoIn.resourceType = ADDR_RSRC_TEX_2D; 2114 2115 AddrSurfInfoIn.width = config->info.width; 2116 AddrSurfInfoIn.height = config->info.height; 2117 2118 if (config->is_3d) 2119 AddrSurfInfoIn.numSlices = config->info.depth; 2120 else if (config->is_cube) 2121 AddrSurfInfoIn.numSlices = 6; 2122 else 2123 AddrSurfInfoIn.numSlices = config->info.array_size; 2124 2125 /* This is propagated to DCC. It must be 0 for HTILE and CMASK. */ 2126 AddrSurfInfoIn.flags.metaPipeUnaligned = 0; 2127 AddrSurfInfoIn.flags.metaRbUnaligned = 0; 2128 2129 if (ac_modifier_has_dcc(surf->modifier)) { 2130 ac_modifier_fill_dcc_params(surf->modifier, surf, &AddrSurfInfoIn); 2131 } else if (!AddrSurfInfoIn.flags.depth && !AddrSurfInfoIn.flags.stencil) { 2132 /* Optimal values for the L2 cache. */ 2133 /* Don't change the DCC settings for imported buffers - they might differ. */ 2134 if (!(surf->flags & RADEON_SURF_IMPORTED)) { 2135 if (info->chip_class == GFX9) { 2136 surf->u.gfx9.color.dcc.independent_64B_blocks = 1; 2137 surf->u.gfx9.color.dcc.independent_128B_blocks = 0; 2138 surf->u.gfx9.color.dcc.max_compressed_block_size = V_028C78_MAX_BLOCK_SIZE_64B; 2139 } else if (info->chip_class >= GFX10) { 2140 surf->u.gfx9.color.dcc.independent_64B_blocks = 0; 2141 surf->u.gfx9.color.dcc.independent_128B_blocks = 1; 2142 surf->u.gfx9.color.dcc.max_compressed_block_size = V_028C78_MAX_BLOCK_SIZE_128B; 2143 } 2144 } 2145 2146 if (AddrSurfInfoIn.flags.display) { 2147 /* The display hardware can only read DCC with RB_ALIGNED=0 and 2148 * PIPE_ALIGNED=0. PIPE_ALIGNED really means L2CACHE_ALIGNED. 2149 * 2150 * The CB block requires RB_ALIGNED=1 except 1 RB chips. 2151 * PIPE_ALIGNED is optional, but PIPE_ALIGNED=0 requires L2 flushes 2152 * after rendering, so PIPE_ALIGNED=1 is recommended. 2153 */ 2154 if (info->use_display_dcc_unaligned) { 2155 AddrSurfInfoIn.flags.metaPipeUnaligned = 1; 2156 AddrSurfInfoIn.flags.metaRbUnaligned = 1; 2157 } 2158 2159 /* Adjust DCC settings to meet DCN requirements. */ 2160 /* Don't change the DCC settings for imported buffers - they might differ. */ 2161 if (!(surf->flags & RADEON_SURF_IMPORTED) && 2162 (info->use_display_dcc_unaligned || info->use_display_dcc_with_retile_blit)) { 2163 /* Only Navi12/14 support independent 64B blocks in L2, 2164 * but without DCC image stores. 2165 */ 2166 if (info->family == CHIP_NAVI12 || info->family == CHIP_NAVI14) { 2167 surf->u.gfx9.color.dcc.independent_64B_blocks = 1; 2168 surf->u.gfx9.color.dcc.independent_128B_blocks = 0; 2169 surf->u.gfx9.color.dcc.max_compressed_block_size = V_028C78_MAX_BLOCK_SIZE_64B; 2170 } 2171 2172 if ((info->chip_class >= GFX10_3 && info->family <= CHIP_YELLOW_CARP) || 2173 /* Newer chips will skip this when possible to get better performance. 2174 * This is also possible for other gfx10.3 chips, but is disabled for 2175 * interoperability between different Mesa versions. 2176 */ 2177 (info->family > CHIP_YELLOW_CARP && 2178 gfx10_DCN_requires_independent_64B_blocks(info, config))) { 2179 surf->u.gfx9.color.dcc.independent_64B_blocks = 1; 2180 surf->u.gfx9.color.dcc.independent_128B_blocks = 1; 2181 surf->u.gfx9.color.dcc.max_compressed_block_size = V_028C78_MAX_BLOCK_SIZE_64B; 2182 } 2183 } 2184 } 2185 } 2186 2187 if (surf->modifier == DRM_FORMAT_MOD_INVALID) { 2188 switch (mode) { 2189 case RADEON_SURF_MODE_LINEAR_ALIGNED: 2190 assert(config->info.samples <= 1); 2191 assert(!(surf->flags & RADEON_SURF_Z_OR_SBUFFER)); 2192 AddrSurfInfoIn.swizzleMode = ADDR_SW_LINEAR; 2193 break; 2194 2195 case RADEON_SURF_MODE_1D: 2196 case RADEON_SURF_MODE_2D: 2197 if (surf->flags & RADEON_SURF_IMPORTED || 2198 (info->chip_class >= GFX10 && surf->flags & RADEON_SURF_FORCE_SWIZZLE_MODE)) { 2199 AddrSurfInfoIn.swizzleMode = surf->u.gfx9.swizzle_mode; 2200 break; 2201 } 2202 2203 r = gfx9_get_preferred_swizzle_mode(addrlib->handle, info, surf, &AddrSurfInfoIn, false, 2204 &AddrSurfInfoIn.swizzleMode); 2205 if (r) 2206 return r; 2207 break; 2208 2209 default: 2210 assert(0); 2211 } 2212 } else { 2213 /* We have a valid and required modifier here. */ 2214 2215 assert(!compressed); 2216 assert(!ac_modifier_has_dcc(surf->modifier) || 2217 !(surf->flags & RADEON_SURF_DISABLE_DCC)); 2218 2219 AddrSurfInfoIn.swizzleMode = ac_modifier_gfx9_swizzle_mode(surf->modifier); 2220 } 2221 2222 surf->u.gfx9.resource_type = AddrSurfInfoIn.resourceType; 2223 surf->has_stencil = !!(surf->flags & RADEON_SURF_SBUFFER); 2224 2225 surf->num_meta_levels = 0; 2226 surf->surf_size = 0; 2227 surf->fmask_size = 0; 2228 surf->meta_size = 0; 2229 surf->meta_slice_size = 0; 2230 surf->u.gfx9.surf_offset = 0; 2231 if (AddrSurfInfoIn.flags.stencil) 2232 surf->u.gfx9.zs.stencil_offset = 0; 2233 surf->cmask_size = 0; 2234 2235 const bool only_stencil = 2236 (surf->flags & RADEON_SURF_SBUFFER) && !(surf->flags & RADEON_SURF_ZBUFFER); 2237 2238 /* Calculate texture layout information. */ 2239 if (!only_stencil) { 2240 r = gfx9_compute_miptree(addrlib, info, config, surf, compressed, &AddrSurfInfoIn); 2241 if (r) 2242 return r; 2243 } 2244 2245 /* Calculate texture layout information for stencil. */ 2246 if (surf->flags & RADEON_SURF_SBUFFER) { 2247 AddrSurfInfoIn.flags.stencil = 1; 2248 AddrSurfInfoIn.bpp = 8; 2249 AddrSurfInfoIn.format = ADDR_FMT_8; 2250 2251 if (!AddrSurfInfoIn.flags.depth) { 2252 r = gfx9_get_preferred_swizzle_mode(addrlib->handle, info, surf, &AddrSurfInfoIn, false, 2253 &AddrSurfInfoIn.swizzleMode); 2254 if (r) 2255 return r; 2256 } else 2257 AddrSurfInfoIn.flags.depth = 0; 2258 2259 r = gfx9_compute_miptree(addrlib, info, config, surf, compressed, &AddrSurfInfoIn); 2260 if (r) 2261 return r; 2262 } 2263 2264 surf->is_linear = surf->u.gfx9.swizzle_mode == ADDR_SW_LINEAR; 2265 2266 /* Query whether the surface is displayable. */ 2267 /* This is only useful for surfaces that are allocated without SCANOUT. */ 2268 BOOL_32 displayable = false; 2269 if (!config->is_3d && !config->is_cube) { 2270 r = Addr2IsValidDisplaySwizzleMode(addrlib->handle, surf->u.gfx9.swizzle_mode, 2271 surf->bpe * 8, &displayable); 2272 if (r) 2273 return r; 2274 2275 /* Display needs unaligned DCC. */ 2276 if (!(surf->flags & RADEON_SURF_Z_OR_SBUFFER) && 2277 surf->num_meta_levels && 2278 (!is_dcc_supported_by_DCN(info, config, surf, surf->u.gfx9.color.dcc.rb_aligned, 2279 surf->u.gfx9.color.dcc.pipe_aligned) || 2280 /* Don't set is_displayable if displayable DCC is missing. */ 2281 (info->use_display_dcc_with_retile_blit && !surf->u.gfx9.color.dcc.display_equation_valid))) 2282 displayable = false; 2283 } 2284 surf->is_displayable = displayable; 2285 2286 /* Validate that we allocated a displayable surface if requested. */ 2287 assert(!AddrSurfInfoIn.flags.display || surf->is_displayable); 2288 2289 /* Validate that DCC is set up correctly. */ 2290 if (!(surf->flags & RADEON_SURF_Z_OR_SBUFFER) && surf->num_meta_levels) { 2291 assert(is_dcc_supported_by_L2(info, surf)); 2292 if (AddrSurfInfoIn.flags.color) 2293 assert(is_dcc_supported_by_CB(info, surf->u.gfx9.swizzle_mode)); 2294 if (AddrSurfInfoIn.flags.display) { 2295 assert(is_dcc_supported_by_DCN(info, config, surf, surf->u.gfx9.color.dcc.rb_aligned, 2296 surf->u.gfx9.color.dcc.pipe_aligned)); 2297 } 2298 } 2299 2300 if (info->has_graphics && !compressed && !config->is_3d && config->info.levels == 1 && 2301 AddrSurfInfoIn.flags.color && !surf->is_linear && 2302 (1 << surf->surf_alignment_log2) >= 64 * 1024 && /* 64KB tiling */ 2303 !(surf->flags & (RADEON_SURF_DISABLE_DCC | RADEON_SURF_FORCE_SWIZZLE_MODE | 2304 RADEON_SURF_FORCE_MICRO_TILE_MODE)) && 2305 (surf->modifier == DRM_FORMAT_MOD_INVALID || 2306 ac_modifier_has_dcc(surf->modifier)) && 2307 is_dcc_supported_by_DCN(info, config, surf, surf->u.gfx9.color.dcc.rb_aligned, 2308 surf->u.gfx9.color.dcc.pipe_aligned)) { 2309 /* Validate that DCC is enabled if DCN can do it. */ 2310 if ((info->use_display_dcc_unaligned || info->use_display_dcc_with_retile_blit) && 2311 AddrSurfInfoIn.flags.display && surf->bpe == 4) { 2312 assert(surf->num_meta_levels); 2313 } 2314 2315 /* Validate that non-scanout DCC is always enabled. */ 2316 if (!AddrSurfInfoIn.flags.display) 2317 assert(surf->num_meta_levels); 2318 } 2319 2320 if (!surf->meta_size) { 2321 /* Unset this if HTILE is not present. */ 2322 surf->flags &= ~RADEON_SURF_TC_COMPATIBLE_HTILE; 2323 } 2324 2325 switch (surf->u.gfx9.swizzle_mode) { 2326 /* S = standard. */ 2327 case ADDR_SW_256B_S: 2328 case ADDR_SW_4KB_S: 2329 case ADDR_SW_64KB_S: 2330 case ADDR_SW_64KB_S_T: 2331 case ADDR_SW_4KB_S_X: 2332 case ADDR_SW_64KB_S_X: 2333 surf->micro_tile_mode = RADEON_MICRO_MODE_STANDARD; 2334 break; 2335 2336 /* D = display. */ 2337 case ADDR_SW_LINEAR: 2338 case ADDR_SW_256B_D: 2339 case ADDR_SW_4KB_D: 2340 case ADDR_SW_64KB_D: 2341 case ADDR_SW_64KB_D_T: 2342 case ADDR_SW_4KB_D_X: 2343 case ADDR_SW_64KB_D_X: 2344 surf->micro_tile_mode = RADEON_MICRO_MODE_DISPLAY; 2345 break; 2346 2347 /* R = rotated (gfx9), render target (gfx10). */ 2348 case ADDR_SW_256B_R: 2349 case ADDR_SW_4KB_R: 2350 case ADDR_SW_64KB_R: 2351 case ADDR_SW_64KB_R_T: 2352 case ADDR_SW_4KB_R_X: 2353 case ADDR_SW_64KB_R_X: 2354 case ADDR_SW_VAR_R_X: 2355 /* The rotated micro tile mode doesn't work if both CMASK and RB+ are 2356 * used at the same time. We currently do not use rotated 2357 * in gfx9. 2358 */ 2359 assert(info->chip_class >= GFX10 || !"rotate micro tile mode is unsupported"); 2360 surf->micro_tile_mode = RADEON_MICRO_MODE_RENDER; 2361 break; 2362 2363 /* Z = depth. */ 2364 case ADDR_SW_4KB_Z: 2365 case ADDR_SW_64KB_Z: 2366 case ADDR_SW_64KB_Z_T: 2367 case ADDR_SW_4KB_Z_X: 2368 case ADDR_SW_64KB_Z_X: 2369 case ADDR_SW_VAR_Z_X: 2370 surf->micro_tile_mode = RADEON_MICRO_MODE_DEPTH; 2371 break; 2372 2373 default: 2374 assert(0); 2375 } 2376 2377 return 0; 2378} 2379 2380int ac_compute_surface(struct ac_addrlib *addrlib, const struct radeon_info *info, 2381 const struct ac_surf_config *config, enum radeon_surf_mode mode, 2382 struct radeon_surf *surf) 2383{ 2384 int r; 2385 2386 r = surf_config_sanity(config, surf->flags); 2387 if (r) 2388 return r; 2389 2390 if (info->family_id >= FAMILY_AI) 2391 r = gfx9_compute_surface(addrlib, info, config, mode, surf); 2392 else 2393 r = gfx6_compute_surface(addrlib->handle, info, config, mode, surf); 2394 2395 if (r) 2396 return r; 2397 2398 /* Determine the memory layout of multiple allocations in one buffer. */ 2399 surf->total_size = surf->surf_size; 2400 surf->alignment_log2 = surf->surf_alignment_log2; 2401 2402 /* Ensure the offsets are always 0 if not available. */ 2403 surf->meta_offset = surf->display_dcc_offset = surf->fmask_offset = surf->cmask_offset = 0; 2404 2405 if (surf->fmask_size) { 2406 assert(config->info.samples >= 2); 2407 surf->fmask_offset = align64(surf->total_size, 1 << surf->fmask_alignment_log2); 2408 surf->total_size = surf->fmask_offset + surf->fmask_size; 2409 surf->alignment_log2 = MAX2(surf->alignment_log2, surf->fmask_alignment_log2); 2410 } 2411 2412 /* Single-sample CMASK is in a separate buffer. */ 2413 if (surf->cmask_size && config->info.samples >= 2) { 2414 surf->cmask_offset = align64(surf->total_size, 1 << surf->cmask_alignment_log2); 2415 surf->total_size = surf->cmask_offset + surf->cmask_size; 2416 surf->alignment_log2 = MAX2(surf->alignment_log2, surf->cmask_alignment_log2); 2417 } 2418 2419 if (surf->is_displayable) 2420 surf->flags |= RADEON_SURF_SCANOUT; 2421 2422 if (surf->meta_size && 2423 /* dcc_size is computed on GFX9+ only if it's displayable. */ 2424 (info->chip_class >= GFX9 || !get_display_flag(config, surf))) { 2425 /* It's better when displayable DCC is immediately after 2426 * the image due to hw-specific reasons. 2427 */ 2428 if (info->chip_class >= GFX9 && 2429 !(surf->flags & RADEON_SURF_Z_OR_SBUFFER) && 2430 surf->u.gfx9.color.dcc.display_equation_valid) { 2431 /* Add space for the displayable DCC buffer. */ 2432 surf->display_dcc_offset = align64(surf->total_size, 1 << surf->u.gfx9.color.display_dcc_alignment_log2); 2433 surf->total_size = surf->display_dcc_offset + surf->u.gfx9.color.display_dcc_size; 2434 } 2435 2436 surf->meta_offset = align64(surf->total_size, 1 << surf->meta_alignment_log2); 2437 surf->total_size = surf->meta_offset + surf->meta_size; 2438 surf->alignment_log2 = MAX2(surf->alignment_log2, surf->meta_alignment_log2); 2439 } 2440 2441 return 0; 2442} 2443 2444/* This is meant to be used for disabling DCC. */ 2445void ac_surface_zero_dcc_fields(struct radeon_surf *surf) 2446{ 2447 if (surf->flags & RADEON_SURF_Z_OR_SBUFFER) 2448 return; 2449 2450 surf->meta_offset = 0; 2451 surf->display_dcc_offset = 0; 2452 if (!surf->fmask_offset && !surf->cmask_offset) { 2453 surf->total_size = surf->surf_size; 2454 surf->alignment_log2 = surf->surf_alignment_log2; 2455 } 2456} 2457 2458static unsigned eg_tile_split(unsigned tile_split) 2459{ 2460 switch (tile_split) { 2461 case 0: 2462 tile_split = 64; 2463 break; 2464 case 1: 2465 tile_split = 128; 2466 break; 2467 case 2: 2468 tile_split = 256; 2469 break; 2470 case 3: 2471 tile_split = 512; 2472 break; 2473 default: 2474 case 4: 2475 tile_split = 1024; 2476 break; 2477 case 5: 2478 tile_split = 2048; 2479 break; 2480 case 6: 2481 tile_split = 4096; 2482 break; 2483 } 2484 return tile_split; 2485} 2486 2487static unsigned eg_tile_split_rev(unsigned eg_tile_split) 2488{ 2489 switch (eg_tile_split) { 2490 case 64: 2491 return 0; 2492 case 128: 2493 return 1; 2494 case 256: 2495 return 2; 2496 case 512: 2497 return 3; 2498 default: 2499 case 1024: 2500 return 4; 2501 case 2048: 2502 return 5; 2503 case 4096: 2504 return 6; 2505 } 2506} 2507 2508#define AMDGPU_TILING_DCC_MAX_COMPRESSED_BLOCK_SIZE_SHIFT 45 2509#define AMDGPU_TILING_DCC_MAX_COMPRESSED_BLOCK_SIZE_MASK 0x3 2510 2511/* This should be called before ac_compute_surface. */ 2512void ac_surface_set_bo_metadata(const struct radeon_info *info, struct radeon_surf *surf, 2513 uint64_t tiling_flags, enum radeon_surf_mode *mode) 2514{ 2515 bool scanout; 2516 2517 if (info->chip_class >= GFX9) { 2518 surf->u.gfx9.swizzle_mode = AMDGPU_TILING_GET(tiling_flags, SWIZZLE_MODE); 2519 surf->u.gfx9.color.dcc.independent_64B_blocks = 2520 AMDGPU_TILING_GET(tiling_flags, DCC_INDEPENDENT_64B); 2521 surf->u.gfx9.color.dcc.independent_128B_blocks = 2522 AMDGPU_TILING_GET(tiling_flags, DCC_INDEPENDENT_128B); 2523 surf->u.gfx9.color.dcc.max_compressed_block_size = 2524 AMDGPU_TILING_GET(tiling_flags, DCC_MAX_COMPRESSED_BLOCK_SIZE); 2525 surf->u.gfx9.color.display_dcc_pitch_max = AMDGPU_TILING_GET(tiling_flags, DCC_PITCH_MAX); 2526 scanout = AMDGPU_TILING_GET(tiling_flags, SCANOUT); 2527 *mode = 2528 surf->u.gfx9.swizzle_mode > 0 ? RADEON_SURF_MODE_2D : RADEON_SURF_MODE_LINEAR_ALIGNED; 2529 } else { 2530 surf->u.legacy.pipe_config = AMDGPU_TILING_GET(tiling_flags, PIPE_CONFIG); 2531 surf->u.legacy.bankw = 1 << AMDGPU_TILING_GET(tiling_flags, BANK_WIDTH); 2532 surf->u.legacy.bankh = 1 << AMDGPU_TILING_GET(tiling_flags, BANK_HEIGHT); 2533 surf->u.legacy.tile_split = eg_tile_split(AMDGPU_TILING_GET(tiling_flags, TILE_SPLIT)); 2534 surf->u.legacy.mtilea = 1 << AMDGPU_TILING_GET(tiling_flags, MACRO_TILE_ASPECT); 2535 surf->u.legacy.num_banks = 2 << AMDGPU_TILING_GET(tiling_flags, NUM_BANKS); 2536 scanout = AMDGPU_TILING_GET(tiling_flags, MICRO_TILE_MODE) == 0; /* DISPLAY */ 2537 2538 if (AMDGPU_TILING_GET(tiling_flags, ARRAY_MODE) == 4) /* 2D_TILED_THIN1 */ 2539 *mode = RADEON_SURF_MODE_2D; 2540 else if (AMDGPU_TILING_GET(tiling_flags, ARRAY_MODE) == 2) /* 1D_TILED_THIN1 */ 2541 *mode = RADEON_SURF_MODE_1D; 2542 else 2543 *mode = RADEON_SURF_MODE_LINEAR_ALIGNED; 2544 } 2545 2546 if (scanout) 2547 surf->flags |= RADEON_SURF_SCANOUT; 2548 else 2549 surf->flags &= ~RADEON_SURF_SCANOUT; 2550} 2551 2552void ac_surface_get_bo_metadata(const struct radeon_info *info, struct radeon_surf *surf, 2553 uint64_t *tiling_flags) 2554{ 2555 *tiling_flags = 0; 2556 2557 if (info->chip_class >= GFX9) { 2558 uint64_t dcc_offset = 0; 2559 2560 if (surf->meta_offset) { 2561 dcc_offset = surf->display_dcc_offset ? surf->display_dcc_offset : surf->meta_offset; 2562 assert((dcc_offset >> 8) != 0 && (dcc_offset >> 8) < (1 << 24)); 2563 } 2564 2565 *tiling_flags |= AMDGPU_TILING_SET(SWIZZLE_MODE, surf->u.gfx9.swizzle_mode); 2566 *tiling_flags |= AMDGPU_TILING_SET(DCC_OFFSET_256B, dcc_offset >> 8); 2567 *tiling_flags |= AMDGPU_TILING_SET(DCC_PITCH_MAX, surf->u.gfx9.color.display_dcc_pitch_max); 2568 *tiling_flags |= 2569 AMDGPU_TILING_SET(DCC_INDEPENDENT_64B, surf->u.gfx9.color.dcc.independent_64B_blocks); 2570 *tiling_flags |= 2571 AMDGPU_TILING_SET(DCC_INDEPENDENT_128B, surf->u.gfx9.color.dcc.independent_128B_blocks); 2572 *tiling_flags |= AMDGPU_TILING_SET(DCC_MAX_COMPRESSED_BLOCK_SIZE, 2573 surf->u.gfx9.color.dcc.max_compressed_block_size); 2574 *tiling_flags |= AMDGPU_TILING_SET(SCANOUT, (surf->flags & RADEON_SURF_SCANOUT) != 0); 2575 } else { 2576 if (surf->u.legacy.level[0].mode >= RADEON_SURF_MODE_2D) 2577 *tiling_flags |= AMDGPU_TILING_SET(ARRAY_MODE, 4); /* 2D_TILED_THIN1 */ 2578 else if (surf->u.legacy.level[0].mode >= RADEON_SURF_MODE_1D) 2579 *tiling_flags |= AMDGPU_TILING_SET(ARRAY_MODE, 2); /* 1D_TILED_THIN1 */ 2580 else 2581 *tiling_flags |= AMDGPU_TILING_SET(ARRAY_MODE, 1); /* LINEAR_ALIGNED */ 2582 2583 *tiling_flags |= AMDGPU_TILING_SET(PIPE_CONFIG, surf->u.legacy.pipe_config); 2584 *tiling_flags |= AMDGPU_TILING_SET(BANK_WIDTH, util_logbase2(surf->u.legacy.bankw)); 2585 *tiling_flags |= AMDGPU_TILING_SET(BANK_HEIGHT, util_logbase2(surf->u.legacy.bankh)); 2586 if (surf->u.legacy.tile_split) 2587 *tiling_flags |= 2588 AMDGPU_TILING_SET(TILE_SPLIT, eg_tile_split_rev(surf->u.legacy.tile_split)); 2589 *tiling_flags |= AMDGPU_TILING_SET(MACRO_TILE_ASPECT, util_logbase2(surf->u.legacy.mtilea)); 2590 *tiling_flags |= AMDGPU_TILING_SET(NUM_BANKS, util_logbase2(surf->u.legacy.num_banks) - 1); 2591 2592 if (surf->flags & RADEON_SURF_SCANOUT) 2593 *tiling_flags |= AMDGPU_TILING_SET(MICRO_TILE_MODE, 0); /* DISPLAY_MICRO_TILING */ 2594 else 2595 *tiling_flags |= AMDGPU_TILING_SET(MICRO_TILE_MODE, 1); /* THIN_MICRO_TILING */ 2596 } 2597} 2598 2599static uint32_t ac_get_umd_metadata_word1(const struct radeon_info *info) 2600{ 2601 return (ATI_VENDOR_ID << 16) | info->pci_id; 2602} 2603 2604/* This should be called after ac_compute_surface. */ 2605bool ac_surface_set_umd_metadata(const struct radeon_info *info, struct radeon_surf *surf, 2606 unsigned num_storage_samples, unsigned num_mipmap_levels, 2607 unsigned size_metadata, const uint32_t metadata[64]) 2608{ 2609 const uint32_t *desc = &metadata[2]; 2610 uint64_t offset; 2611 2612 if (surf->modifier != DRM_FORMAT_MOD_INVALID) 2613 return true; 2614 2615 if (info->chip_class >= GFX9) 2616 offset = surf->u.gfx9.surf_offset; 2617 else 2618 offset = (uint64_t)surf->u.legacy.level[0].offset_256B * 256; 2619 2620 if (offset || /* Non-zero planes ignore metadata. */ 2621 size_metadata < 10 * 4 || /* at least 2(header) + 8(desc) dwords */ 2622 metadata[0] == 0 || /* invalid version number */ 2623 metadata[1] != ac_get_umd_metadata_word1(info)) /* invalid PCI ID */ { 2624 /* Disable DCC because it might not be enabled. */ 2625 ac_surface_zero_dcc_fields(surf); 2626 2627 /* Don't report an error if the texture comes from an incompatible driver, 2628 * but this might not work. 2629 */ 2630 return true; 2631 } 2632 2633 /* Validate that sample counts and the number of mipmap levels match. */ 2634 unsigned desc_last_level = G_008F1C_LAST_LEVEL(desc[3]); 2635 unsigned type = G_008F1C_TYPE(desc[3]); 2636 2637 if (type == V_008F1C_SQ_RSRC_IMG_2D_MSAA || type == V_008F1C_SQ_RSRC_IMG_2D_MSAA_ARRAY) { 2638 unsigned log_samples = util_logbase2(MAX2(1, num_storage_samples)); 2639 2640 if (desc_last_level != log_samples) { 2641 fprintf(stderr, 2642 "amdgpu: invalid MSAA texture import, " 2643 "metadata has log2(samples) = %u, the caller set %u\n", 2644 desc_last_level, log_samples); 2645 return false; 2646 } 2647 } else { 2648 if (desc_last_level != num_mipmap_levels - 1) { 2649 fprintf(stderr, 2650 "amdgpu: invalid mipmapped texture import, " 2651 "metadata has last_level = %u, the caller set %u\n", 2652 desc_last_level, num_mipmap_levels - 1); 2653 return false; 2654 } 2655 } 2656 2657 if (info->chip_class >= GFX8 && G_008F28_COMPRESSION_EN(desc[6])) { 2658 /* Read DCC information. */ 2659 switch (info->chip_class) { 2660 case GFX8: 2661 surf->meta_offset = (uint64_t)desc[7] << 8; 2662 break; 2663 2664 case GFX9: 2665 surf->meta_offset = 2666 ((uint64_t)desc[7] << 8) | ((uint64_t)G_008F24_META_DATA_ADDRESS(desc[5]) << 40); 2667 surf->u.gfx9.color.dcc.pipe_aligned = G_008F24_META_PIPE_ALIGNED(desc[5]); 2668 surf->u.gfx9.color.dcc.rb_aligned = G_008F24_META_RB_ALIGNED(desc[5]); 2669 2670 /* If DCC is unaligned, this can only be a displayable image. */ 2671 if (!surf->u.gfx9.color.dcc.pipe_aligned && !surf->u.gfx9.color.dcc.rb_aligned) 2672 assert(surf->is_displayable); 2673 break; 2674 2675 case GFX10: 2676 case GFX10_3: 2677 surf->meta_offset = 2678 ((uint64_t)G_00A018_META_DATA_ADDRESS_LO(desc[6]) << 8) | ((uint64_t)desc[7] << 16); 2679 surf->u.gfx9.color.dcc.pipe_aligned = G_00A018_META_PIPE_ALIGNED(desc[6]); 2680 break; 2681 2682 default: 2683 assert(0); 2684 return false; 2685 } 2686 } else { 2687 /* Disable DCC. dcc_offset is always set by texture_from_handle 2688 * and must be cleared here. 2689 */ 2690 ac_surface_zero_dcc_fields(surf); 2691 } 2692 2693 return true; 2694} 2695 2696void ac_surface_get_umd_metadata(const struct radeon_info *info, struct radeon_surf *surf, 2697 unsigned num_mipmap_levels, uint32_t desc[8], 2698 unsigned *size_metadata, uint32_t metadata[64]) 2699{ 2700 /* Clear the base address and set the relative DCC offset. */ 2701 desc[0] = 0; 2702 desc[1] &= C_008F14_BASE_ADDRESS_HI; 2703 2704 switch (info->chip_class) { 2705 case GFX6: 2706 case GFX7: 2707 break; 2708 case GFX8: 2709 desc[7] = surf->meta_offset >> 8; 2710 break; 2711 case GFX9: 2712 desc[7] = surf->meta_offset >> 8; 2713 desc[5] &= C_008F24_META_DATA_ADDRESS; 2714 desc[5] |= S_008F24_META_DATA_ADDRESS(surf->meta_offset >> 40); 2715 break; 2716 case GFX10: 2717 case GFX10_3: 2718 desc[6] &= C_00A018_META_DATA_ADDRESS_LO; 2719 desc[6] |= S_00A018_META_DATA_ADDRESS_LO(surf->meta_offset >> 8); 2720 desc[7] = surf->meta_offset >> 16; 2721 break; 2722 default: 2723 assert(0); 2724 } 2725 2726 /* Metadata image format format version 1: 2727 * [0] = 1 (metadata format identifier) 2728 * [1] = (VENDOR_ID << 16) | PCI_ID 2729 * [2:9] = image descriptor for the whole resource 2730 * [2] is always 0, because the base address is cleared 2731 * [9] is the DCC offset bits [39:8] from the beginning of 2732 * the buffer 2733 * [10:10+LAST_LEVEL] = mipmap level offset bits [39:8] for each level 2734 */ 2735 2736 metadata[0] = 1; /* metadata image format version 1 */ 2737 2738 /* Tiling modes are ambiguous without a PCI ID. */ 2739 metadata[1] = ac_get_umd_metadata_word1(info); 2740 2741 /* Dwords [2:9] contain the image descriptor. */ 2742 memcpy(&metadata[2], desc, 8 * 4); 2743 *size_metadata = 10 * 4; 2744 2745 /* Dwords [10:..] contain the mipmap level offsets. */ 2746 if (info->chip_class <= GFX8) { 2747 for (unsigned i = 0; i < num_mipmap_levels; i++) 2748 metadata[10 + i] = surf->u.legacy.level[i].offset_256B; 2749 2750 *size_metadata += num_mipmap_levels * 4; 2751 } 2752} 2753 2754static uint32_t ac_surface_get_gfx9_pitch_align(struct radeon_surf *surf) 2755{ 2756 if (surf->u.gfx9.swizzle_mode == ADDR_SW_LINEAR) 2757 return 256 / surf->bpe; 2758 2759 if (surf->u.gfx9.resource_type == RADEON_RESOURCE_3D) 2760 return 1; /* TODO */ 2761 2762 unsigned bpe_shift = util_logbase2(surf->bpe) / 2; 2763 switch(surf->u.gfx9.swizzle_mode & ~3) { 2764 case ADDR_SW_LINEAR: /* 256B block. */ 2765 return 16 >> bpe_shift; 2766 case ADDR_SW_4KB_Z: 2767 case ADDR_SW_4KB_Z_X: 2768 return 64 >> bpe_shift; 2769 case ADDR_SW_64KB_Z: 2770 case ADDR_SW_64KB_Z_T: 2771 case ADDR_SW_64KB_Z_X: 2772 return 256 >> bpe_shift; 2773 case ADDR_SW_VAR_Z_X: 2774 default: 2775 return 1; /* TODO */ 2776 } 2777} 2778 2779bool ac_surface_override_offset_stride(const struct radeon_info *info, struct radeon_surf *surf, 2780 unsigned num_mipmap_levels, uint64_t offset, unsigned pitch) 2781{ 2782 /* 2783 * GFX10 and newer don't support custom strides. Furthermore, for 2784 * multiple miplevels or compression data we'd really need to rerun 2785 * addrlib to update all the fields in the surface. That, however, is a 2786 * software limitation and could be relaxed later. 2787 */ 2788 bool require_equal_pitch = surf->surf_size != surf->total_size || 2789 num_mipmap_levels != 1 || 2790 info->chip_class >= GFX10; 2791 2792 if (info->chip_class >= GFX9) { 2793 if (pitch) { 2794 if (surf->u.gfx9.surf_pitch != pitch && require_equal_pitch) 2795 return false; 2796 2797 if ((ac_surface_get_gfx9_pitch_align(surf) - 1) & pitch) 2798 return false; 2799 2800 if (pitch != surf->u.gfx9.surf_pitch) { 2801 unsigned slices = surf->surf_size / surf->u.gfx9.surf_slice_size; 2802 2803 surf->u.gfx9.surf_pitch = pitch; 2804 surf->u.gfx9.epitch = pitch - 1; 2805 surf->u.gfx9.surf_slice_size = (uint64_t)pitch * surf->u.gfx9.surf_height * surf->bpe; 2806 surf->total_size = surf->surf_size = surf->u.gfx9.surf_slice_size * slices; 2807 } 2808 } 2809 surf->u.gfx9.surf_offset = offset; 2810 if (surf->u.gfx9.zs.stencil_offset) 2811 surf->u.gfx9.zs.stencil_offset += offset; 2812 } else { 2813 if (pitch) { 2814 if (surf->u.legacy.level[0].nblk_x != pitch && require_equal_pitch) 2815 return false; 2816 2817 surf->u.legacy.level[0].nblk_x = pitch; 2818 surf->u.legacy.level[0].slice_size_dw = 2819 ((uint64_t)pitch * surf->u.legacy.level[0].nblk_y * surf->bpe) / 4; 2820 } 2821 2822 if (offset) { 2823 for (unsigned i = 0; i < ARRAY_SIZE(surf->u.legacy.level); ++i) 2824 surf->u.legacy.level[i].offset_256B += offset / 256; 2825 } 2826 } 2827 2828 if (offset & ((1 << surf->alignment_log2) - 1) || 2829 offset >= UINT64_MAX - surf->total_size) 2830 return false; 2831 2832 if (surf->meta_offset) 2833 surf->meta_offset += offset; 2834 if (surf->fmask_offset) 2835 surf->fmask_offset += offset; 2836 if (surf->cmask_offset) 2837 surf->cmask_offset += offset; 2838 if (surf->display_dcc_offset) 2839 surf->display_dcc_offset += offset; 2840 return true; 2841} 2842 2843unsigned ac_surface_get_nplanes(const struct radeon_surf *surf) 2844{ 2845 if (surf->modifier == DRM_FORMAT_MOD_INVALID) 2846 return 1; 2847 else if (surf->display_dcc_offset) 2848 return 3; 2849 else if (surf->meta_offset) 2850 return 2; 2851 else 2852 return 1; 2853} 2854 2855uint64_t ac_surface_get_plane_offset(enum chip_class chip_class, 2856 const struct radeon_surf *surf, 2857 unsigned plane, unsigned layer) 2858{ 2859 switch (plane) { 2860 case 0: 2861 if (chip_class >= GFX9) { 2862 return surf->u.gfx9.surf_offset + 2863 layer * surf->u.gfx9.surf_slice_size; 2864 } else { 2865 return (uint64_t)surf->u.legacy.level[0].offset_256B * 256 + 2866 layer * (uint64_t)surf->u.legacy.level[0].slice_size_dw * 4; 2867 } 2868 case 1: 2869 assert(!layer); 2870 return surf->display_dcc_offset ? 2871 surf->display_dcc_offset : surf->meta_offset; 2872 case 2: 2873 assert(!layer); 2874 return surf->meta_offset; 2875 default: 2876 unreachable("Invalid plane index"); 2877 } 2878} 2879 2880uint64_t ac_surface_get_plane_stride(enum chip_class chip_class, 2881 const struct radeon_surf *surf, 2882 unsigned plane) 2883{ 2884 switch (plane) { 2885 case 0: 2886 if (chip_class >= GFX9) { 2887 return surf->u.gfx9.surf_pitch * surf->bpe; 2888 } else { 2889 return surf->u.legacy.level[0].nblk_x * surf->bpe; 2890 } 2891 case 1: 2892 return 1 + (surf->display_dcc_offset ? 2893 surf->u.gfx9.color.display_dcc_pitch_max : surf->u.gfx9.color.dcc_pitch_max); 2894 case 2: 2895 return surf->u.gfx9.color.dcc_pitch_max + 1; 2896 default: 2897 unreachable("Invalid plane index"); 2898 } 2899} 2900 2901uint64_t ac_surface_get_plane_size(const struct radeon_surf *surf, 2902 unsigned plane) 2903{ 2904 switch (plane) { 2905 case 0: 2906 return surf->surf_size; 2907 case 1: 2908 return surf->display_dcc_offset ? 2909 surf->u.gfx9.color.display_dcc_size : surf->meta_size; 2910 case 2: 2911 return surf->meta_size; 2912 default: 2913 unreachable("Invalid plane index"); 2914 } 2915} 2916 2917void ac_surface_print_info(FILE *out, const struct radeon_info *info, 2918 const struct radeon_surf *surf) 2919{ 2920 if (info->chip_class >= GFX9) { 2921 fprintf(out, 2922 " Surf: size=%" PRIu64 ", slice_size=%" PRIu64 ", " 2923 "alignment=%u, swmode=%u, epitch=%u, pitch=%u, blk_w=%u, " 2924 "blk_h=%u, bpe=%u, flags=0x%"PRIx64"\n", 2925 surf->surf_size, surf->u.gfx9.surf_slice_size, 2926 1 << surf->surf_alignment_log2, surf->u.gfx9.swizzle_mode, 2927 surf->u.gfx9.epitch, surf->u.gfx9.surf_pitch, 2928 surf->blk_w, surf->blk_h, surf->bpe, surf->flags); 2929 2930 if (surf->fmask_offset) 2931 fprintf(out, 2932 " FMask: offset=%" PRIu64 ", size=%" PRIu64 ", " 2933 "alignment=%u, swmode=%u, epitch=%u\n", 2934 surf->fmask_offset, surf->fmask_size, 2935 1 << surf->fmask_alignment_log2, surf->u.gfx9.color.fmask_swizzle_mode, 2936 surf->u.gfx9.color.fmask_epitch); 2937 2938 if (surf->cmask_offset) 2939 fprintf(out, 2940 " CMask: offset=%" PRIu64 ", size=%u, " 2941 "alignment=%u\n", 2942 surf->cmask_offset, surf->cmask_size, 2943 1 << surf->cmask_alignment_log2); 2944 2945 if (surf->flags & RADEON_SURF_Z_OR_SBUFFER && surf->meta_offset) 2946 fprintf(out, 2947 " HTile: offset=%" PRIu64 ", size=%u, alignment=%u\n", 2948 surf->meta_offset, surf->meta_size, 2949 1 << surf->meta_alignment_log2); 2950 2951 if (!(surf->flags & RADEON_SURF_Z_OR_SBUFFER) && surf->meta_offset) 2952 fprintf(out, 2953 " DCC: offset=%" PRIu64 ", size=%u, " 2954 "alignment=%u, pitch_max=%u, num_dcc_levels=%u\n", 2955 surf->meta_offset, surf->meta_size, 1 << surf->meta_alignment_log2, 2956 surf->u.gfx9.color.display_dcc_pitch_max, surf->num_meta_levels); 2957 2958 if (surf->has_stencil) 2959 fprintf(out, 2960 " Stencil: offset=%" PRIu64 ", swmode=%u, epitch=%u\n", 2961 surf->u.gfx9.zs.stencil_offset, 2962 surf->u.gfx9.zs.stencil_swizzle_mode, 2963 surf->u.gfx9.zs.stencil_epitch); 2964 } else { 2965 fprintf(out, 2966 " Surf: size=%" PRIu64 ", alignment=%u, blk_w=%u, blk_h=%u, " 2967 "bpe=%u, flags=0x%"PRIx64"\n", 2968 surf->surf_size, 1 << surf->surf_alignment_log2, surf->blk_w, 2969 surf->blk_h, surf->bpe, surf->flags); 2970 2971 fprintf(out, 2972 " Layout: size=%" PRIu64 ", alignment=%u, bankw=%u, bankh=%u, " 2973 "nbanks=%u, mtilea=%u, tilesplit=%u, pipeconfig=%u, scanout=%u\n", 2974 surf->surf_size, 1 << surf->surf_alignment_log2, 2975 surf->u.legacy.bankw, surf->u.legacy.bankh, 2976 surf->u.legacy.num_banks, surf->u.legacy.mtilea, 2977 surf->u.legacy.tile_split, surf->u.legacy.pipe_config, 2978 (surf->flags & RADEON_SURF_SCANOUT) != 0); 2979 2980 if (surf->fmask_offset) 2981 fprintf(out, 2982 " FMask: offset=%" PRIu64 ", size=%" PRIu64 ", " 2983 "alignment=%u, pitch_in_pixels=%u, bankh=%u, " 2984 "slice_tile_max=%u, tile_mode_index=%u\n", 2985 surf->fmask_offset, surf->fmask_size, 2986 1 << surf->fmask_alignment_log2, surf->u.legacy.color.fmask.pitch_in_pixels, 2987 surf->u.legacy.color.fmask.bankh, 2988 surf->u.legacy.color.fmask.slice_tile_max, 2989 surf->u.legacy.color.fmask.tiling_index); 2990 2991 if (surf->cmask_offset) 2992 fprintf(out, 2993 " CMask: offset=%" PRIu64 ", size=%u, alignment=%u, " 2994 "slice_tile_max=%u\n", 2995 surf->cmask_offset, surf->cmask_size, 2996 1 << surf->cmask_alignment_log2, surf->u.legacy.color.cmask_slice_tile_max); 2997 2998 if (surf->flags & RADEON_SURF_Z_OR_SBUFFER && surf->meta_offset) 2999 fprintf(out, " HTile: offset=%" PRIu64 ", size=%u, alignment=%u\n", 3000 surf->meta_offset, surf->meta_size, 3001 1 << surf->meta_alignment_log2); 3002 3003 if (!(surf->flags & RADEON_SURF_Z_OR_SBUFFER) && surf->meta_offset) 3004 fprintf(out, " DCC: offset=%" PRIu64 ", size=%u, alignment=%u\n", 3005 surf->meta_offset, surf->meta_size, 1 << surf->meta_alignment_log2); 3006 3007 if (surf->has_stencil) 3008 fprintf(out, " StencilLayout: tilesplit=%u\n", 3009 surf->u.legacy.stencil_tile_split); 3010 } 3011} 3012 3013static nir_ssa_def *gfx10_nir_meta_addr_from_coord(nir_builder *b, const struct radeon_info *info, 3014 struct gfx9_meta_equation *equation, 3015 int blkSizeBias, unsigned blkStart, 3016 nir_ssa_def *meta_pitch, nir_ssa_def *meta_slice_size, 3017 nir_ssa_def *x, nir_ssa_def *y, nir_ssa_def *z, 3018 nir_ssa_def *pipe_xor, 3019 nir_ssa_def **bit_position) 3020{ 3021 nir_ssa_def *zero = nir_imm_int(b, 0); 3022 nir_ssa_def *one = nir_imm_int(b, 1); 3023 3024 assert(info->chip_class >= GFX10); 3025 3026 unsigned meta_block_width_log2 = util_logbase2(equation->meta_block_width); 3027 unsigned meta_block_height_log2 = util_logbase2(equation->meta_block_height); 3028 unsigned blkSizeLog2 = meta_block_width_log2 + meta_block_height_log2 + blkSizeBias; 3029 3030 nir_ssa_def *coord[] = {x, y, z, 0}; 3031 nir_ssa_def *address = zero; 3032 3033 for (unsigned i = blkStart; i < blkSizeLog2 + 1; i++) { 3034 nir_ssa_def *v = zero; 3035 3036 for (unsigned c = 0; c < 4; c++) { 3037 unsigned index = i * 4 + c - (blkStart * 4); 3038 if (equation->u.gfx10_bits[index]) { 3039 unsigned mask = equation->u.gfx10_bits[index]; 3040 nir_ssa_def *bits = coord[c]; 3041 3042 while (mask) 3043 v = nir_ixor(b, v, nir_iand(b, nir_ushr_imm(b, bits, u_bit_scan(&mask)), one)); 3044 } 3045 } 3046 3047 address = nir_ior(b, address, nir_ishl(b, v, nir_imm_int(b, i))); 3048 } 3049 3050 unsigned blkMask = (1 << blkSizeLog2) - 1; 3051 unsigned pipeMask = (1 << G_0098F8_NUM_PIPES(info->gb_addr_config)) - 1; 3052 unsigned m_pipeInterleaveLog2 = 8 + G_0098F8_PIPE_INTERLEAVE_SIZE_GFX9(info->gb_addr_config); 3053 nir_ssa_def *xb = nir_ushr_imm(b, x, meta_block_width_log2); 3054 nir_ssa_def *yb = nir_ushr_imm(b, y, meta_block_height_log2); 3055 nir_ssa_def *pb = nir_ushr_imm(b, meta_pitch, meta_block_width_log2); 3056 nir_ssa_def *blkIndex = nir_iadd(b, nir_imul(b, yb, pb), xb); 3057 nir_ssa_def *pipeXor = nir_iand_imm(b, nir_ishl(b, nir_iand_imm(b, pipe_xor, pipeMask), 3058 nir_imm_int(b, m_pipeInterleaveLog2)), blkMask); 3059 3060 if (bit_position) 3061 *bit_position = nir_ishl(b, nir_iand(b, address, nir_imm_int(b, 1)), 3062 nir_imm_int(b, 2)); 3063 3064 return nir_iadd(b, nir_iadd(b, nir_imul(b, meta_slice_size, z), 3065 nir_imul(b, blkIndex, nir_ishl(b, one, nir_imm_int(b, blkSizeLog2)))), 3066 nir_ixor(b, nir_ushr(b, address, one), pipeXor)); 3067} 3068 3069static nir_ssa_def *gfx9_nir_meta_addr_from_coord(nir_builder *b, const struct radeon_info *info, 3070 struct gfx9_meta_equation *equation, 3071 nir_ssa_def *meta_pitch, nir_ssa_def *meta_height, 3072 nir_ssa_def *x, nir_ssa_def *y, nir_ssa_def *z, 3073 nir_ssa_def *sample, nir_ssa_def *pipe_xor, 3074 nir_ssa_def **bit_position) 3075{ 3076 nir_ssa_def *zero = nir_imm_int(b, 0); 3077 nir_ssa_def *one = nir_imm_int(b, 1); 3078 3079 assert(info->chip_class >= GFX9); 3080 3081 unsigned meta_block_width_log2 = util_logbase2(equation->meta_block_width); 3082 unsigned meta_block_height_log2 = util_logbase2(equation->meta_block_height); 3083 unsigned meta_block_depth_log2 = util_logbase2(equation->meta_block_depth); 3084 3085 unsigned m_pipeInterleaveLog2 = 8 + G_0098F8_PIPE_INTERLEAVE_SIZE_GFX9(info->gb_addr_config); 3086 unsigned numPipeBits = equation->u.gfx9.num_pipe_bits; 3087 nir_ssa_def *pitchInBlock = nir_ushr_imm(b, meta_pitch, meta_block_width_log2); 3088 nir_ssa_def *sliceSizeInBlock = nir_imul(b, nir_ushr_imm(b, meta_height, meta_block_height_log2), 3089 pitchInBlock); 3090 3091 nir_ssa_def *xb = nir_ushr_imm(b, x, meta_block_width_log2); 3092 nir_ssa_def *yb = nir_ushr_imm(b, y, meta_block_height_log2); 3093 nir_ssa_def *zb = nir_ushr_imm(b, z, meta_block_depth_log2); 3094 3095 nir_ssa_def *blockIndex = nir_iadd(b, nir_iadd(b, nir_imul(b, zb, sliceSizeInBlock), 3096 nir_imul(b, yb, pitchInBlock)), xb); 3097 nir_ssa_def *coords[] = {x, y, z, sample, blockIndex}; 3098 3099 nir_ssa_def *address = zero; 3100 unsigned num_bits = equation->u.gfx9.num_bits; 3101 assert(num_bits <= 32); 3102 3103 /* Compute the address up until the last bit that doesn't use the block index. */ 3104 for (unsigned i = 0; i < num_bits - 1; i++) { 3105 nir_ssa_def *xor = zero; 3106 3107 for (unsigned c = 0; c < 5; c++) { 3108 if (equation->u.gfx9.bit[i].coord[c].dim >= 5) 3109 continue; 3110 3111 assert(equation->u.gfx9.bit[i].coord[c].ord < 32); 3112 nir_ssa_def *ison = 3113 nir_iand(b, nir_ushr_imm(b, coords[equation->u.gfx9.bit[i].coord[c].dim], 3114 equation->u.gfx9.bit[i].coord[c].ord), one); 3115 3116 xor = nir_ixor(b, xor, ison); 3117 } 3118 address = nir_ior(b, address, nir_ishl(b, xor, nir_imm_int(b, i))); 3119 } 3120 3121 /* Fill the remaining bits with the block index. */ 3122 unsigned last = num_bits - 1; 3123 address = nir_ior(b, address, 3124 nir_ishl(b, nir_ushr_imm(b, blockIndex, 3125 equation->u.gfx9.bit[last].coord[0].ord), 3126 nir_imm_int(b, last))); 3127 3128 if (bit_position) 3129 *bit_position = nir_ishl(b, nir_iand(b, address, nir_imm_int(b, 1)), 3130 nir_imm_int(b, 2)); 3131 3132 nir_ssa_def *pipeXor = nir_iand_imm(b, pipe_xor, (1 << numPipeBits) - 1); 3133 return nir_ixor(b, nir_ushr(b, address, one), 3134 nir_ishl(b, pipeXor, nir_imm_int(b, m_pipeInterleaveLog2))); 3135} 3136 3137nir_ssa_def *ac_nir_dcc_addr_from_coord(nir_builder *b, const struct radeon_info *info, 3138 unsigned bpe, struct gfx9_meta_equation *equation, 3139 nir_ssa_def *dcc_pitch, nir_ssa_def *dcc_height, 3140 nir_ssa_def *dcc_slice_size, 3141 nir_ssa_def *x, nir_ssa_def *y, nir_ssa_def *z, 3142 nir_ssa_def *sample, nir_ssa_def *pipe_xor) 3143{ 3144 if (info->chip_class >= GFX10) { 3145 unsigned bpp_log2 = util_logbase2(bpe); 3146 3147 return gfx10_nir_meta_addr_from_coord(b, info, equation, bpp_log2 - 8, 1, 3148 dcc_pitch, dcc_slice_size, 3149 x, y, z, pipe_xor, NULL); 3150 } else { 3151 return gfx9_nir_meta_addr_from_coord(b, info, equation, dcc_pitch, 3152 dcc_height, x, y, z, 3153 sample, pipe_xor, NULL); 3154 } 3155} 3156 3157nir_ssa_def *ac_nir_cmask_addr_from_coord(nir_builder *b, const struct radeon_info *info, 3158 struct gfx9_meta_equation *equation, 3159 nir_ssa_def *cmask_pitch, nir_ssa_def *cmask_height, 3160 nir_ssa_def *cmask_slice_size, 3161 nir_ssa_def *x, nir_ssa_def *y, nir_ssa_def *z, 3162 nir_ssa_def *pipe_xor, 3163 nir_ssa_def **bit_position) 3164{ 3165 nir_ssa_def *zero = nir_imm_int(b, 0); 3166 3167 if (info->chip_class >= GFX10) { 3168 return gfx10_nir_meta_addr_from_coord(b, info, equation, -7, 1, 3169 cmask_pitch, cmask_slice_size, 3170 x, y, z, pipe_xor, bit_position); 3171 } else { 3172 return gfx9_nir_meta_addr_from_coord(b, info, equation, cmask_pitch, 3173 cmask_height, x, y, z, zero, 3174 pipe_xor, bit_position); 3175 } 3176} 3177 3178nir_ssa_def *ac_nir_htile_addr_from_coord(nir_builder *b, const struct radeon_info *info, 3179 struct gfx9_meta_equation *equation, 3180 nir_ssa_def *htile_pitch, 3181 nir_ssa_def *htile_slice_size, 3182 nir_ssa_def *x, nir_ssa_def *y, nir_ssa_def *z, 3183 nir_ssa_def *pipe_xor) 3184{ 3185 return gfx10_nir_meta_addr_from_coord(b, info, equation, -4, 2, 3186 htile_pitch, htile_slice_size, 3187 x, y, z, pipe_xor, NULL); 3188} 3189