1/* 2 * Copyright © 2021 Advanced Micro Devices, Inc. 3 * All Rights Reserved. 4 * 5 * Permission is hereby granted, free of charge, to any person obtaining 6 * a copy of this software and associated documentation files (the 7 * "Software"), to deal in the Software without restriction, including 8 * without limitation the rights to use, copy, modify, merge, publish, 9 * distribute, sub license, and/or sell copies of the Software, and to 10 * permit persons to whom the Software is furnished to do so, subject to 11 * the following conditions: 12 * 13 * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, 14 * EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES 15 * OF MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND 16 * NON-INFRINGEMENT. IN NO EVENT SHALL THE COPYRIGHT HOLDERS, AUTHORS 17 * AND/OR ITS SUPPLIERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER 18 * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, 19 * ARISING FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE 20 * USE OR OTHER DEALINGS IN THE SOFTWARE. 21 * 22 * The above copyright notice and this permission notice (including the 23 * next paragraph) shall be included in all copies or substantial portions 24 * of the Software. 25 */ 26 27/* Make the test not meaningless when asserts are disabled. */ 28#undef NDEBUG 29 30#include <assert.h> 31#include <inttypes.h> 32#include <stdio.h> 33#include <stdlib.h> 34 35#include <amdgpu.h> 36#include "drm-uapi/amdgpu_drm.h" 37#include "drm-uapi/drm_fourcc.h" 38 39#include "ac_surface.h" 40#include "util/macros.h" 41#include "util/u_atomic.h" 42#include "util/u_math.h" 43#include "util/u_vector.h" 44#include "util/mesa-sha1.h" 45#include "addrlib/inc/addrinterface.h" 46 47#include "ac_surface_test_common.h" 48 49/* 50 * The main goal of this test is to validate that our dcc/htile addressing 51 * functions match addrlib behavior. 52 */ 53 54/* DCC address computation without mipmapping. 55 * CMASK address computation without mipmapping and without multisampling. 56 */ 57static unsigned gfx9_meta_addr_from_coord(const struct radeon_info *info, 58 /* Shader key inputs: */ 59 /* equation varies with resource_type, swizzle_mode, 60 * bpp, number of fragments, pipe_aligned, rb_aligned */ 61 const struct gfx9_addr_meta_equation *eq, 62 unsigned meta_block_width, unsigned meta_block_height, 63 unsigned meta_block_depth, 64 /* Shader inputs: */ 65 unsigned meta_pitch, unsigned meta_height, 66 unsigned x, unsigned y, unsigned z, 67 unsigned sample, unsigned pipe_xor, 68 /* Shader outputs (CMASK only): */ 69 unsigned *bit_position) 70{ 71 /* The compiled shader shouldn't be complicated considering there are a lot of constants here. */ 72 unsigned meta_block_width_log2 = util_logbase2(meta_block_width); 73 unsigned meta_block_height_log2 = util_logbase2(meta_block_height); 74 unsigned meta_block_depth_log2 = util_logbase2(meta_block_depth); 75 76 unsigned m_pipeInterleaveLog2 = 8 + G_0098F8_PIPE_INTERLEAVE_SIZE_GFX9(info->gb_addr_config); 77 unsigned numPipeBits = eq->numPipeBits; 78 unsigned pitchInBlock = meta_pitch >> meta_block_width_log2; 79 unsigned sliceSizeInBlock = (meta_height >> meta_block_height_log2) * pitchInBlock; 80 81 unsigned xb = x >> meta_block_width_log2; 82 unsigned yb = y >> meta_block_height_log2; 83 unsigned zb = z >> meta_block_depth_log2; 84 85 unsigned blockIndex = zb * sliceSizeInBlock + yb * pitchInBlock + xb; 86 unsigned coords[] = {x, y, z, sample, blockIndex}; 87 88 unsigned address = 0; 89 unsigned num_bits = eq->num_bits; 90 assert(num_bits <= 32); 91 92 /* Compute the address up until the last bit that doesn't use the block index. */ 93 for (unsigned b = 0; b < num_bits - 1; b++) { 94 unsigned xor = 0; 95 for (unsigned c = 0; c < 5; c++) { 96 if (eq->bit[b].coord[c].dim >= 5) 97 continue; 98 99 assert(eq->bit[b].coord[c].ord < 32); 100 unsigned ison = (coords[eq->bit[b].coord[c].dim] >> 101 eq->bit[b].coord[c].ord) & 0x1; 102 103 xor ^= ison; 104 } 105 address |= xor << b; 106 } 107 108 /* Fill the remaining bits with the block index. */ 109 unsigned last = num_bits - 1; 110 address |= (blockIndex >> eq->bit[last].coord[0].ord) << last; 111 112 if (bit_position) 113 *bit_position = (address & 1) << 2; 114 115 unsigned pipeXor = pipe_xor & ((1 << numPipeBits) - 1); 116 return (address >> 1) ^ (pipeXor << m_pipeInterleaveLog2); 117} 118 119/* DCC/CMASK/HTILE address computation for GFX10. */ 120static unsigned gfx10_meta_addr_from_coord(const struct radeon_info *info, 121 /* Shader key inputs: */ 122 const uint16_t *equation, 123 unsigned meta_block_width, unsigned meta_block_height, 124 unsigned blkSizeLog2, 125 /* Shader inputs: */ 126 unsigned meta_pitch, unsigned meta_slice_size, 127 unsigned x, unsigned y, unsigned z, 128 unsigned pipe_xor, 129 /* Shader outputs: (CMASK only) */ 130 unsigned *bit_position) 131{ 132 /* The compiled shader shouldn't be complicated considering there are a lot of constants here. */ 133 unsigned meta_block_width_log2 = util_logbase2(meta_block_width); 134 unsigned meta_block_height_log2 = util_logbase2(meta_block_height); 135 136 unsigned coord[] = {x, y, z, 0}; 137 unsigned address = 0; 138 139 for (unsigned i = 0; i < blkSizeLog2 + 1; i++) { 140 unsigned v = 0; 141 142 for (unsigned c = 0; c < 4; c++) { 143 if (equation[i*4+c] != 0) { 144 unsigned mask = equation[i*4+c]; 145 unsigned bits = coord[c]; 146 147 while (mask) 148 v ^= (bits >> u_bit_scan(&mask)) & 0x1; 149 } 150 } 151 152 address |= v << i; 153 } 154 155 unsigned blkMask = (1 << blkSizeLog2) - 1; 156 unsigned pipeMask = (1 << G_0098F8_NUM_PIPES(info->gb_addr_config)) - 1; 157 unsigned m_pipeInterleaveLog2 = 8 + G_0098F8_PIPE_INTERLEAVE_SIZE_GFX9(info->gb_addr_config); 158 unsigned xb = x >> meta_block_width_log2; 159 unsigned yb = y >> meta_block_height_log2; 160 unsigned pb = meta_pitch >> meta_block_width_log2; 161 unsigned blkIndex = (yb * pb) + xb; 162 unsigned pipeXor = ((pipe_xor & pipeMask) << m_pipeInterleaveLog2) & blkMask; 163 164 if (bit_position) 165 *bit_position = (address & 1) << 2; 166 167 return (meta_slice_size * z) + 168 (blkIndex * (1 << blkSizeLog2)) + 169 ((address >> 1) ^ pipeXor); 170} 171 172/* DCC address computation without mipmapping and MSAA. */ 173static unsigned gfx10_dcc_addr_from_coord(const struct radeon_info *info, 174 /* Shader key inputs: */ 175 /* equation varies with bpp and pipe_aligned */ 176 const uint16_t *equation, unsigned bpp, 177 unsigned meta_block_width, unsigned meta_block_height, 178 /* Shader inputs: */ 179 unsigned dcc_pitch, unsigned dcc_slice_size, 180 unsigned x, unsigned y, unsigned z, 181 unsigned pipe_xor) 182{ 183 unsigned bpp_log2 = util_logbase2(bpp >> 3); 184 unsigned meta_block_width_log2 = util_logbase2(meta_block_width); 185 unsigned meta_block_height_log2 = util_logbase2(meta_block_height); 186 unsigned blkSizeLog2 = meta_block_width_log2 + meta_block_height_log2 + bpp_log2 - 8; 187 188 return gfx10_meta_addr_from_coord(info, equation, 189 meta_block_width, meta_block_height, 190 blkSizeLog2, 191 dcc_pitch, dcc_slice_size, 192 x, y, z, pipe_xor, NULL); 193} 194 195static bool one_dcc_address_test(const char *name, const char *test, ADDR_HANDLE addrlib, 196 const struct radeon_info *info, unsigned width, unsigned height, 197 unsigned depth, unsigned samples, unsigned bpp, 198 unsigned swizzle_mode, bool pipe_aligned, bool rb_aligned, 199 unsigned mrt_index, 200 unsigned start_x, unsigned start_y, unsigned start_z, 201 unsigned start_sample) 202{ 203 ADDR2_COMPUTE_PIPEBANKXOR_INPUT xin = {sizeof(ADDR2_COMPUTE_PIPEBANKXOR_INPUT)}; 204 ADDR2_COMPUTE_PIPEBANKXOR_OUTPUT xout = {sizeof(ADDR2_COMPUTE_PIPEBANKXOR_OUTPUT)}; 205 ADDR2_COMPUTE_DCCINFO_INPUT din = {sizeof(din)}; 206 ADDR2_COMPUTE_DCCINFO_OUTPUT dout = {sizeof(dout)}; 207 ADDR2_COMPUTE_DCC_ADDRFROMCOORD_INPUT in = {sizeof(in)}; 208 ADDR2_COMPUTE_DCC_ADDRFROMCOORD_OUTPUT out = {sizeof(out)}; 209 ADDR2_META_MIP_INFO meta_mip_info[RADEON_SURF_MAX_LEVELS] = {0}; 210 211 dout.pMipInfo = meta_mip_info; 212 213 /* Compute DCC info. */ 214 in.dccKeyFlags.pipeAligned = din.dccKeyFlags.pipeAligned = pipe_aligned; 215 in.dccKeyFlags.rbAligned = din.dccKeyFlags.rbAligned = rb_aligned; 216 xin.resourceType = in.resourceType = din.resourceType = ADDR_RSRC_TEX_2D; 217 xin.swizzleMode = in.swizzleMode = din.swizzleMode = swizzle_mode; 218 in.bpp = din.bpp = bpp; 219 xin.numFrags = xin.numSamples = in.numFrags = din.numFrags = samples; 220 in.numMipLevels = din.numMipLevels = 1; /* addrlib can't do DccAddrFromCoord with mipmapping */ 221 din.unalignedWidth = width; 222 din.unalignedHeight = height; 223 din.numSlices = depth; 224 din.firstMipIdInTail = 1; 225 226 int ret = Addr2ComputeDccInfo(addrlib, &din, &dout); 227 assert(ret == ADDR_OK); 228 229 /* Compute xor. */ 230 static AddrFormat format[] = { 231 ADDR_FMT_8, 232 ADDR_FMT_16, 233 ADDR_FMT_32, 234 ADDR_FMT_32_32, 235 ADDR_FMT_32_32_32_32, 236 }; 237 xin.flags.color = 1; 238 xin.flags.texture = 1; 239 xin.flags.opt4space = 1; 240 xin.flags.metaRbUnaligned = !rb_aligned; 241 xin.flags.metaPipeUnaligned = !pipe_aligned; 242 xin.format = format[util_logbase2(bpp / 8)]; 243 xin.surfIndex = mrt_index; 244 245 ret = Addr2ComputePipeBankXor(addrlib, &xin, &xout); 246 assert(ret == ADDR_OK); 247 248 /* Compute addresses */ 249 in.compressBlkWidth = dout.compressBlkWidth; 250 in.compressBlkHeight = dout.compressBlkHeight; 251 in.compressBlkDepth = dout.compressBlkDepth; 252 in.metaBlkWidth = dout.metaBlkWidth; 253 in.metaBlkHeight = dout.metaBlkHeight; 254 in.metaBlkDepth = dout.metaBlkDepth; 255 in.dccRamSliceSize = dout.dccRamSliceSize; 256 257 in.mipId = 0; 258 in.pitch = dout.pitch; 259 in.height = dout.height; 260 in.pipeXor = xout.pipeBankXor; 261 262 /* Validate that the packed gfx9_meta_equation structure can fit all fields. */ 263 const struct gfx9_meta_equation eq; 264 if (info->chip_class == GFX9) { 265 /* The bit array is smaller in gfx9_meta_equation than in addrlib. */ 266 assert(dout.equation.gfx9.num_bits <= ARRAY_SIZE(eq.u.gfx9.bit)); 267 } else { 268 /* gfx9_meta_equation doesn't store the first 4 and the last 8 elements. They must be 0. */ 269 for (unsigned i = 0; i < 4; i++) 270 assert(dout.equation.gfx10_bits[i] == 0); 271 272 for (unsigned i = ARRAY_SIZE(eq.u.gfx10_bits) + 4; i < 68; i++) 273 assert(dout.equation.gfx10_bits[i] == 0); 274 } 275 276 for (in.x = start_x; in.x < in.pitch; in.x += dout.compressBlkWidth) { 277 for (in.y = start_y; in.y < in.height; in.y += dout.compressBlkHeight) { 278 for (in.slice = start_z; in.slice < depth; in.slice += dout.compressBlkDepth) { 279 for (in.sample = start_sample; in.sample < samples; in.sample++) { 280 int r = Addr2ComputeDccAddrFromCoord(addrlib, &in, &out); 281 if (r != ADDR_OK) { 282 printf("%s addrlib error: %s\n", name, test); 283 abort(); 284 } 285 286 unsigned addr; 287 if (info->chip_class == GFX9) { 288 addr = gfx9_meta_addr_from_coord(info, &dout.equation.gfx9, dout.metaBlkWidth, dout.metaBlkHeight, 289 dout.metaBlkDepth, dout.pitch, dout.height, 290 in.x, in.y, in.slice, in.sample, in.pipeXor, NULL); 291 if (in.sample == 1) { 292 /* Sample 0 should be one byte before sample 1. The DCC MSAA clear relies on it. */ 293 assert(addr - 1 == 294 gfx9_meta_addr_from_coord(info, &dout.equation.gfx9, dout.metaBlkWidth, dout.metaBlkHeight, 295 dout.metaBlkDepth, dout.pitch, dout.height, 296 in.x, in.y, in.slice, 0, in.pipeXor, NULL)); 297 } 298 } else { 299 addr = gfx10_dcc_addr_from_coord(info, dout.equation.gfx10_bits, 300 in.bpp, dout.metaBlkWidth, dout.metaBlkHeight, 301 dout.pitch, dout.dccRamSliceSize, 302 in.x, in.y, in.slice, in.pipeXor); 303 } 304 305 if (out.addr != addr) { 306 printf("%s fail (%s) at %ux%ux%u@%u: expected = %llu, got = %u\n", 307 name, test, in.x, in.y, in.slice, in.sample, out.addr, addr); 308 return false; 309 } 310 } 311 } 312 } 313 } 314 return true; 315} 316 317static void run_dcc_address_test(const char *name, const struct radeon_info *info, bool full) 318{ 319 unsigned total = 0; 320 unsigned fails = 0; 321 unsigned swizzle_mode = info->chip_class == GFX9 ? ADDR_SW_64KB_S_X : ADDR_SW_64KB_R_X; 322 unsigned last_size, max_samples, min_bpp, max_bpp; 323 324 if (full) { 325 last_size = 6*6 - 1; 326 max_samples = 8; 327 min_bpp = 8; 328 max_bpp = 128; 329 } else { 330 /* The test coverage is reduced for Gitlab CI because it timeouts. */ 331 last_size = 0; 332 max_samples = 2; 333 min_bpp = 32; 334 max_bpp = 64; 335 } 336 337#ifdef HAVE_OPENMP 338#pragma omp parallel for 339#endif 340 for (unsigned size = 0; size <= last_size; size++) { 341 unsigned width = 8 + 379 * (size % 6); 342 unsigned height = 8 + 379 * ((size / 6) % 6); 343 344 struct ac_addrlib *ac_addrlib = ac_addrlib_create(info, NULL); 345 ADDR_HANDLE addrlib = ac_addrlib_get_handle(ac_addrlib); 346 347 unsigned local_fails = 0; 348 unsigned local_total = 0; 349 350 for (unsigned bpp = min_bpp; bpp <= max_bpp; bpp *= 2) { 351 /* addrlib can do DccAddrFromCoord with MSAA images only on gfx9 */ 352 for (unsigned samples = 1; samples <= (info->chip_class == GFX9 ? max_samples : 1); samples *= 2) { 353 for (int rb_aligned = true; rb_aligned >= (samples > 1 ? true : false); rb_aligned--) { 354 for (int pipe_aligned = true; pipe_aligned >= (samples > 1 ? true : false); pipe_aligned--) { 355 for (unsigned mrt_index = 0; mrt_index < 2; mrt_index++) { 356 unsigned depth = 2; 357 char test[256]; 358 359 snprintf(test, sizeof(test), "%ux%ux%u %ubpp %u samples rb:%u pipe:%u", 360 width, height, depth, bpp, samples, rb_aligned, pipe_aligned); 361 362 if (one_dcc_address_test(name, test, addrlib, info, width, height, depth, samples, 363 bpp, swizzle_mode, pipe_aligned, rb_aligned, mrt_index, 364 0, 0, 0, 0)) { 365 } else { 366 local_fails++; 367 } 368 local_total++; 369 } 370 } 371 } 372 } 373 } 374 375 ac_addrlib_destroy(ac_addrlib); 376 p_atomic_add(&fails, local_fails); 377 p_atomic_add(&total, local_total); 378 } 379 printf("%16s total: %u, fail: %u\n", name, total, fails); 380} 381 382/* HTILE address computation without mipmapping. */ 383static unsigned gfx10_htile_addr_from_coord(const struct radeon_info *info, 384 const uint16_t *equation, 385 unsigned meta_block_width, 386 unsigned meta_block_height, 387 unsigned htile_pitch, unsigned htile_slice_size, 388 unsigned x, unsigned y, unsigned z, 389 unsigned pipe_xor) 390{ 391 unsigned meta_block_width_log2 = util_logbase2(meta_block_width); 392 unsigned meta_block_height_log2 = util_logbase2(meta_block_height); 393 unsigned blkSizeLog2 = meta_block_width_log2 + meta_block_height_log2 - 4; 394 395 return gfx10_meta_addr_from_coord(info, equation, 396 meta_block_width, meta_block_height, 397 blkSizeLog2, 398 htile_pitch, htile_slice_size, 399 x, y, z, pipe_xor, NULL); 400} 401 402static bool one_htile_address_test(const char *name, const char *test, ADDR_HANDLE addrlib, 403 const struct radeon_info *info, 404 unsigned width, unsigned height, unsigned depth, 405 unsigned bpp, unsigned swizzle_mode, 406 unsigned start_x, unsigned start_y, unsigned start_z) 407{ 408 ADDR2_COMPUTE_PIPEBANKXOR_INPUT xin = {0}; 409 ADDR2_COMPUTE_PIPEBANKXOR_OUTPUT xout = {0}; 410 ADDR2_COMPUTE_HTILE_INFO_INPUT hin = {0}; 411 ADDR2_COMPUTE_HTILE_INFO_OUTPUT hout = {0}; 412 ADDR2_COMPUTE_HTILE_ADDRFROMCOORD_INPUT in = {0}; 413 ADDR2_COMPUTE_HTILE_ADDRFROMCOORD_OUTPUT out = {0}; 414 ADDR2_META_MIP_INFO meta_mip_info[RADEON_SURF_MAX_LEVELS] = {0}; 415 416 hout.pMipInfo = meta_mip_info; 417 418 /* Compute HTILE info. */ 419 hin.hTileFlags.pipeAligned = 1; 420 hin.hTileFlags.rbAligned = 1; 421 hin.depthFlags.depth = 1; 422 hin.depthFlags.texture = 1; 423 hin.depthFlags.opt4space = 1; 424 hin.swizzleMode = in.swizzleMode = xin.swizzleMode = swizzle_mode; 425 hin.unalignedWidth = in.unalignedWidth = width; 426 hin.unalignedHeight = in.unalignedHeight = height; 427 hin.numSlices = in.numSlices = depth; 428 hin.numMipLevels = in.numMipLevels = 1; /* addrlib can't do HtileAddrFromCoord with mipmapping. */ 429 hin.firstMipIdInTail = 1; 430 431 int ret = Addr2ComputeHtileInfo(addrlib, &hin, &hout); 432 assert(ret == ADDR_OK); 433 434 /* Compute xor. */ 435 static AddrFormat format[] = { 436 ADDR_FMT_8, /* unused */ 437 ADDR_FMT_16, 438 ADDR_FMT_32, 439 }; 440 xin.flags = hin.depthFlags; 441 xin.resourceType = ADDR_RSRC_TEX_2D; 442 xin.format = format[util_logbase2(bpp / 8)]; 443 xin.numFrags = xin.numSamples = in.numSamples = 1; 444 445 ret = Addr2ComputePipeBankXor(addrlib, &xin, &xout); 446 assert(ret == ADDR_OK); 447 448 in.hTileFlags = hin.hTileFlags; 449 in.depthflags = xin.flags; 450 in.bpp = bpp; 451 in.pipeXor = xout.pipeBankXor; 452 453 for (in.x = start_x; in.x < width; in.x++) { 454 for (in.y = start_y; in.y < height; in.y++) { 455 for (in.slice = start_z; in.slice < depth; in.slice++) { 456 int r = Addr2ComputeHtileAddrFromCoord(addrlib, &in, &out); 457 if (r != ADDR_OK) { 458 printf("%s addrlib error: %s\n", name, test); 459 abort(); 460 } 461 462 unsigned addr = 463 gfx10_htile_addr_from_coord(info, hout.equation.gfx10_bits, 464 hout.metaBlkWidth, hout.metaBlkHeight, 465 hout.pitch, hout.sliceSize, 466 in.x, in.y, in.slice, in.pipeXor); 467 if (out.addr != addr) { 468 printf("%s fail (%s) at %ux%ux%u: expected = %llu, got = %u\n", 469 name, test, in.x, in.y, in.slice, out.addr, addr); 470 return false; 471 } 472 } 473 } 474 } 475 476 return true; 477} 478 479static void run_htile_address_test(const char *name, const struct radeon_info *info, bool full) 480{ 481 unsigned total = 0; 482 unsigned fails = 0; 483 unsigned first_size = 0, last_size = 6*6 - 1, max_bpp = 32; 484 485 /* The test coverage is reduced for Gitlab CI because it timeouts. */ 486 if (!full) { 487 first_size = last_size = 0; 488 } 489 490#ifdef HAVE_OPENMP 491#pragma omp parallel for 492#endif 493 for (unsigned size = first_size; size <= last_size; size++) { 494 unsigned width = 8 + 379 * (size % 6); 495 unsigned height = 8 + 379 * (size / 6); 496 497 struct ac_addrlib *ac_addrlib = ac_addrlib_create(info, NULL); 498 ADDR_HANDLE addrlib = ac_addrlib_get_handle(ac_addrlib); 499 500 for (unsigned depth = 1; depth <= 2; depth *= 2) { 501 for (unsigned bpp = 16; bpp <= max_bpp; bpp *= 2) { 502 if (one_htile_address_test(name, name, addrlib, info, width, height, depth, 503 bpp, ADDR_SW_64KB_Z_X, 0, 0, 0)) { 504 } else { 505 p_atomic_inc(&fails); 506 } 507 p_atomic_inc(&total); 508 } 509 } 510 511 ac_addrlib_destroy(ac_addrlib); 512 } 513 printf("%16s total: %u, fail: %u\n", name, total, fails); 514} 515 516/* CMASK address computation without mipmapping and MSAA. */ 517static unsigned gfx10_cmask_addr_from_coord(const struct radeon_info *info, 518 /* Shader key inputs: */ 519 /* equation varies with bpp and pipe_aligned */ 520 const uint16_t *equation, unsigned bpp, 521 unsigned meta_block_width, unsigned meta_block_height, 522 /* Shader inputs: */ 523 unsigned cmask_pitch, unsigned cmask_slice_size, 524 unsigned x, unsigned y, unsigned z, 525 unsigned pipe_xor, 526 /* Shader outputs: */ 527 unsigned *bit_position) 528 529{ 530 unsigned meta_block_width_log2 = util_logbase2(meta_block_width); 531 unsigned meta_block_height_log2 = util_logbase2(meta_block_height); 532 unsigned blkSizeLog2 = meta_block_width_log2 + meta_block_height_log2 - 7; 533 534 return gfx10_meta_addr_from_coord(info, equation, 535 meta_block_width, meta_block_height, 536 blkSizeLog2, 537 cmask_pitch, cmask_slice_size, 538 x, y, z, pipe_xor, bit_position); 539} 540 541static bool one_cmask_address_test(const char *name, const char *test, ADDR_HANDLE addrlib, 542 const struct radeon_info *info, 543 unsigned width, unsigned height, unsigned depth, 544 unsigned bpp, unsigned swizzle_mode, 545 bool pipe_aligned, bool rb_aligned, unsigned mrt_index, 546 unsigned start_x, unsigned start_y, unsigned start_z) 547{ 548 ADDR2_COMPUTE_PIPEBANKXOR_INPUT xin = {sizeof(xin)}; 549 ADDR2_COMPUTE_PIPEBANKXOR_OUTPUT xout = {sizeof(xout)}; 550 ADDR2_COMPUTE_CMASK_INFO_INPUT cin = {sizeof(cin)}; 551 ADDR2_COMPUTE_CMASK_INFO_OUTPUT cout = {sizeof(cout)}; 552 ADDR2_COMPUTE_CMASK_ADDRFROMCOORD_INPUT in = {sizeof(in)}; 553 ADDR2_COMPUTE_CMASK_ADDRFROMCOORD_OUTPUT out = {sizeof(out)}; 554 555 /* Compute CMASK info. */ 556 cin.resourceType = xin.resourceType = in.resourceType = ADDR_RSRC_TEX_2D; 557 cin.swizzleMode = xin.swizzleMode = in.swizzleMode = swizzle_mode; 558 cin.unalignedWidth = in.unalignedWidth = width; 559 cin.unalignedHeight = in.unalignedHeight = height; 560 cin.numSlices = in.numSlices = depth; 561 cin.numMipLevels = 1; 562 cin.firstMipIdInTail = 1; 563 cin.cMaskFlags.pipeAligned = pipe_aligned; 564 cin.cMaskFlags.rbAligned = rb_aligned; 565 cin.cMaskFlags.linear = false; 566 cin.colorFlags.color = 1; 567 cin.colorFlags.texture = 1; 568 cin.colorFlags.opt4space = 1; 569 cin.colorFlags.metaRbUnaligned = !rb_aligned; 570 cin.colorFlags.metaPipeUnaligned = !pipe_aligned; 571 572 int ret = Addr2ComputeCmaskInfo(addrlib, &cin, &cout); 573 assert(ret == ADDR_OK); 574 575 /* Compute xor. */ 576 static AddrFormat format[] = { 577 ADDR_FMT_8, 578 ADDR_FMT_16, 579 ADDR_FMT_32, 580 ADDR_FMT_32_32, 581 ADDR_FMT_32_32_32_32, 582 }; 583 xin.flags = cin.colorFlags; 584 xin.format = format[util_logbase2(bpp / 8)]; 585 xin.surfIndex = mrt_index; 586 xin.numSamples = in.numSamples = xin.numFrags = in.numFrags = 1; 587 588 ret = Addr2ComputePipeBankXor(addrlib, &xin, &xout); 589 assert(ret == ADDR_OK); 590 591 in.cMaskFlags = cin.cMaskFlags; 592 in.colorFlags = cin.colorFlags; 593 in.pipeXor = xout.pipeBankXor; 594 595 for (in.x = start_x; in.x < width; in.x++) { 596 for (in.y = start_y; in.y < height; in.y++) { 597 for (in.slice = start_z; in.slice < depth; in.slice++) { 598 int r = Addr2ComputeCmaskAddrFromCoord(addrlib, &in, &out); 599 if (r != ADDR_OK) { 600 printf("%s addrlib error: %s\n", name, test); 601 abort(); 602 } 603 604 unsigned addr, bit_position; 605 606 if (info->chip_class == GFX9) { 607 addr = gfx9_meta_addr_from_coord(info, &cout.equation.gfx9, 608 cout.metaBlkWidth, cout.metaBlkHeight, 1, 609 cout.pitch, cout.height, 610 in.x, in.y, in.slice, 0, in.pipeXor, 611 &bit_position); 612 } else { 613 addr = gfx10_cmask_addr_from_coord(info, cout.equation.gfx10_bits, 614 bpp, cout.metaBlkWidth, 615 cout.metaBlkHeight, 616 cout.pitch, cout.sliceSize, 617 in.x, in.y, in.slice, 618 in.pipeXor, 619 &bit_position); 620 } 621 622 if (out.addr != addr || out.bitPosition != bit_position) { 623 printf("%s fail (%s) at %ux%ux%u: expected (addr) = %llu, got = %u, " 624 "expected (bit_position) = %u, got = %u\n", 625 name, test, in.x, in.y, in.slice, out.addr, addr, 626 out.bitPosition, bit_position); 627 return false; 628 } 629 } 630 } 631 } 632 633 return true; 634} 635 636static void run_cmask_address_test(const char *name, const struct radeon_info *info, bool full) 637{ 638 unsigned total = 0; 639 unsigned fails = 0; 640 unsigned swizzle_mode = info->chip_class == GFX9 ? ADDR_SW_64KB_S_X : ADDR_SW_64KB_Z_X; 641 unsigned first_size = 0, last_size = 6*6 - 1, max_bpp = 32; 642 643 /* The test coverage is reduced for Gitlab CI because it timeouts. */ 644 if (!full) { 645 first_size = last_size = 0; 646 } 647 648#ifdef HAVE_OPENMP 649#pragma omp parallel for 650#endif 651 for (unsigned size = first_size; size <= last_size; size++) { 652 unsigned width = 8 + 379 * (size % 6); 653 unsigned height = 8 + 379 * (size / 6); 654 655 struct ac_addrlib *ac_addrlib = ac_addrlib_create(info, NULL); 656 ADDR_HANDLE addrlib = ac_addrlib_get_handle(ac_addrlib); 657 658 for (unsigned depth = 1; depth <= 2; depth *= 2) { 659 for (unsigned bpp = 16; bpp <= max_bpp; bpp *= 2) { 660 for (int rb_aligned = true; rb_aligned >= true; rb_aligned--) { 661 for (int pipe_aligned = true; pipe_aligned >= true; pipe_aligned--) { 662 if (one_cmask_address_test(name, name, addrlib, info, 663 width, height, depth, bpp, 664 swizzle_mode, 665 pipe_aligned, rb_aligned, 666 0, 0, 0, 0)) { 667 } else { 668 p_atomic_inc(&fails); 669 } 670 p_atomic_inc(&total); 671 } 672 } 673 } 674 } 675 676 ac_addrlib_destroy(ac_addrlib); 677 } 678 printf("%16s total: %u, fail: %u\n", name, total, fails); 679} 680 681int main(int argc, char **argv) 682{ 683 bool full = false; 684 685 if (argc == 2 && !strcmp(argv[1], "--full")) 686 full = true; 687 else 688 puts("Specify --full to run the full test."); 689 690 puts("DCC:"); 691 for (unsigned i = 0; i < ARRAY_SIZE(testcases); ++i) { 692 struct radeon_info info = get_radeon_info(&testcases[i]); 693 694 run_dcc_address_test(testcases[i].name, &info, full); 695 } 696 697 puts("HTILE:"); 698 for (unsigned i = 0; i < ARRAY_SIZE(testcases); ++i) { 699 struct radeon_info info = get_radeon_info(&testcases[i]); 700 701 /* Only GFX10+ is currently supported. */ 702 if (info.chip_class < GFX10) 703 continue; 704 705 run_htile_address_test(testcases[i].name, &info, full); 706 } 707 708 puts("CMASK:"); 709 for (unsigned i = 0; i < ARRAY_SIZE(testcases); ++i) { 710 struct radeon_info info = get_radeon_info(&testcases[i]); 711 712 run_cmask_address_test(testcases[i].name, &info, full); 713 } 714 715 return 0; 716} 717