1/*
2 * Copyright © 2021 Advanced Micro Devices, Inc.
3 * All Rights Reserved.
4 *
5 * Permission is hereby granted, free of charge, to any person obtaining
6 * a copy of this software and associated documentation files (the
7 * "Software"), to deal in the Software without restriction, including
8 * without limitation the rights to use, copy, modify, merge, publish,
9 * distribute, sub license, and/or sell copies of the Software, and to
10 * permit persons to whom the Software is furnished to do so, subject to
11 * the following conditions:
12 *
13 * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND,
14 * EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES
15 * OF MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND
16 * NON-INFRINGEMENT. IN NO EVENT SHALL THE COPYRIGHT HOLDERS, AUTHORS
17 * AND/OR ITS SUPPLIERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
18 * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE,
19 * ARISING FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE
20 * USE OR OTHER DEALINGS IN THE SOFTWARE.
21 *
22 * The above copyright notice and this permission notice (including the
23 * next paragraph) shall be included in all copies or substantial portions
24 * of the Software.
25 */
26
27/* Make the test not meaningless when asserts are disabled. */
28#undef NDEBUG
29
30#include <assert.h>
31#include <inttypes.h>
32#include <stdio.h>
33#include <stdlib.h>
34
35#include <amdgpu.h>
36#include "drm-uapi/amdgpu_drm.h"
37#include "drm-uapi/drm_fourcc.h"
38
39#include "ac_surface.h"
40#include "util/macros.h"
41#include "util/u_atomic.h"
42#include "util/u_math.h"
43#include "util/u_vector.h"
44#include "util/mesa-sha1.h"
45#include "addrlib/inc/addrinterface.h"
46
47#include "ac_surface_test_common.h"
48
49/*
50 * The main goal of this test is to validate that our dcc/htile addressing
51 * functions match addrlib behavior.
52 */
53
54/* DCC address computation without mipmapping.
55 * CMASK address computation without mipmapping and without multisampling.
56 */
57static unsigned gfx9_meta_addr_from_coord(const struct radeon_info *info,
58                                          /* Shader key inputs: */
59                                          /* equation varies with resource_type, swizzle_mode,
60                                           * bpp, number of fragments, pipe_aligned, rb_aligned */
61                                          const struct gfx9_addr_meta_equation *eq,
62                                          unsigned meta_block_width, unsigned meta_block_height,
63                                          unsigned meta_block_depth,
64                                          /* Shader inputs: */
65                                          unsigned meta_pitch, unsigned meta_height,
66                                          unsigned x, unsigned y, unsigned z,
67                                          unsigned sample, unsigned pipe_xor,
68                                          /* Shader outputs (CMASK only): */
69                                          unsigned *bit_position)
70{
71   /* The compiled shader shouldn't be complicated considering there are a lot of constants here. */
72   unsigned meta_block_width_log2 = util_logbase2(meta_block_width);
73   unsigned meta_block_height_log2 = util_logbase2(meta_block_height);
74   unsigned meta_block_depth_log2 = util_logbase2(meta_block_depth);
75
76   unsigned m_pipeInterleaveLog2 = 8 + G_0098F8_PIPE_INTERLEAVE_SIZE_GFX9(info->gb_addr_config);
77   unsigned numPipeBits = eq->numPipeBits;
78   unsigned pitchInBlock = meta_pitch >> meta_block_width_log2;
79   unsigned sliceSizeInBlock = (meta_height >> meta_block_height_log2) * pitchInBlock;
80
81   unsigned xb = x >> meta_block_width_log2;
82   unsigned yb = y >> meta_block_height_log2;
83   unsigned zb = z >> meta_block_depth_log2;
84
85   unsigned blockIndex = zb * sliceSizeInBlock + yb * pitchInBlock + xb;
86   unsigned coords[] = {x, y, z, sample, blockIndex};
87
88   unsigned address = 0;
89   unsigned num_bits = eq->num_bits;
90   assert(num_bits <= 32);
91
92   /* Compute the address up until the last bit that doesn't use the block index. */
93   for (unsigned b = 0; b < num_bits - 1; b++) {
94      unsigned xor = 0;
95      for (unsigned c = 0; c < 5; c++) {
96         if (eq->bit[b].coord[c].dim >= 5)
97            continue;
98
99         assert(eq->bit[b].coord[c].ord < 32);
100         unsigned ison = (coords[eq->bit[b].coord[c].dim] >>
101                                 eq->bit[b].coord[c].ord) & 0x1;
102
103         xor ^= ison;
104      }
105      address |= xor << b;
106   }
107
108   /* Fill the remaining bits with the block index. */
109   unsigned last = num_bits - 1;
110   address |= (blockIndex >> eq->bit[last].coord[0].ord) << last;
111
112   if (bit_position)
113      *bit_position = (address & 1) << 2;
114
115   unsigned pipeXor = pipe_xor & ((1 << numPipeBits) - 1);
116   return (address >> 1) ^ (pipeXor << m_pipeInterleaveLog2);
117}
118
119/* DCC/CMASK/HTILE address computation for GFX10. */
120static unsigned gfx10_meta_addr_from_coord(const struct radeon_info *info,
121                                           /* Shader key inputs: */
122                                           const uint16_t *equation,
123                                           unsigned meta_block_width, unsigned meta_block_height,
124                                           unsigned blkSizeLog2,
125                                           /* Shader inputs: */
126                                           unsigned meta_pitch, unsigned meta_slice_size,
127                                           unsigned x, unsigned y, unsigned z,
128                                           unsigned pipe_xor,
129                                           /* Shader outputs: (CMASK only) */
130                                           unsigned *bit_position)
131{
132   /* The compiled shader shouldn't be complicated considering there are a lot of constants here. */
133   unsigned meta_block_width_log2 = util_logbase2(meta_block_width);
134   unsigned meta_block_height_log2 = util_logbase2(meta_block_height);
135
136   unsigned coord[] = {x, y, z, 0};
137   unsigned address = 0;
138
139   for (unsigned i = 0; i < blkSizeLog2 + 1; i++) {
140      unsigned v = 0;
141
142      for (unsigned c = 0; c < 4; c++) {
143         if (equation[i*4+c] != 0) {
144            unsigned mask = equation[i*4+c];
145            unsigned bits = coord[c];
146
147            while (mask)
148               v ^= (bits >> u_bit_scan(&mask)) & 0x1;
149         }
150      }
151
152      address |= v << i;
153   }
154
155   unsigned blkMask = (1 << blkSizeLog2) - 1;
156   unsigned pipeMask = (1 << G_0098F8_NUM_PIPES(info->gb_addr_config)) - 1;
157   unsigned m_pipeInterleaveLog2 = 8 + G_0098F8_PIPE_INTERLEAVE_SIZE_GFX9(info->gb_addr_config);
158   unsigned xb = x >> meta_block_width_log2;
159   unsigned yb = y >> meta_block_height_log2;
160   unsigned pb = meta_pitch >> meta_block_width_log2;
161   unsigned blkIndex = (yb * pb) + xb;
162   unsigned pipeXor = ((pipe_xor & pipeMask) << m_pipeInterleaveLog2) & blkMask;
163
164   if (bit_position)
165      *bit_position = (address & 1) << 2;
166
167   return (meta_slice_size * z) +
168          (blkIndex * (1 << blkSizeLog2)) +
169          ((address >> 1) ^ pipeXor);
170}
171
172/* DCC address computation without mipmapping and MSAA. */
173static unsigned gfx10_dcc_addr_from_coord(const struct radeon_info *info,
174                                          /* Shader key inputs: */
175                                          /* equation varies with bpp and pipe_aligned */
176                                          const uint16_t *equation, unsigned bpp,
177                                          unsigned meta_block_width, unsigned meta_block_height,
178                                          /* Shader inputs: */
179                                          unsigned dcc_pitch, unsigned dcc_slice_size,
180                                          unsigned x, unsigned y, unsigned z,
181                                          unsigned pipe_xor)
182{
183   unsigned bpp_log2 = util_logbase2(bpp >> 3);
184   unsigned meta_block_width_log2 = util_logbase2(meta_block_width);
185   unsigned meta_block_height_log2 = util_logbase2(meta_block_height);
186   unsigned blkSizeLog2 = meta_block_width_log2 + meta_block_height_log2 + bpp_log2 - 8;
187
188   return gfx10_meta_addr_from_coord(info, equation,
189                                     meta_block_width, meta_block_height,
190                                     blkSizeLog2,
191                                     dcc_pitch, dcc_slice_size,
192                                     x, y, z, pipe_xor, NULL);
193}
194
195static bool one_dcc_address_test(const char *name, const char *test, ADDR_HANDLE addrlib,
196                                 const struct radeon_info *info, unsigned width, unsigned height,
197                                 unsigned depth, unsigned samples, unsigned bpp,
198                                 unsigned swizzle_mode, bool pipe_aligned, bool rb_aligned,
199                                 unsigned mrt_index,
200                                 unsigned start_x, unsigned start_y, unsigned start_z,
201                                 unsigned start_sample)
202{
203   ADDR2_COMPUTE_PIPEBANKXOR_INPUT xin = {sizeof(ADDR2_COMPUTE_PIPEBANKXOR_INPUT)};
204   ADDR2_COMPUTE_PIPEBANKXOR_OUTPUT xout = {sizeof(ADDR2_COMPUTE_PIPEBANKXOR_OUTPUT)};
205   ADDR2_COMPUTE_DCCINFO_INPUT din = {sizeof(din)};
206   ADDR2_COMPUTE_DCCINFO_OUTPUT dout = {sizeof(dout)};
207   ADDR2_COMPUTE_DCC_ADDRFROMCOORD_INPUT in = {sizeof(in)};
208   ADDR2_COMPUTE_DCC_ADDRFROMCOORD_OUTPUT out = {sizeof(out)};
209   ADDR2_META_MIP_INFO meta_mip_info[RADEON_SURF_MAX_LEVELS] = {0};
210
211   dout.pMipInfo = meta_mip_info;
212
213   /* Compute DCC info. */
214   in.dccKeyFlags.pipeAligned = din.dccKeyFlags.pipeAligned = pipe_aligned;
215   in.dccKeyFlags.rbAligned = din.dccKeyFlags.rbAligned = rb_aligned;
216   xin.resourceType = in.resourceType = din.resourceType = ADDR_RSRC_TEX_2D;
217   xin.swizzleMode = in.swizzleMode = din.swizzleMode = swizzle_mode;
218   in.bpp = din.bpp = bpp;
219   xin.numFrags = xin.numSamples = in.numFrags = din.numFrags = samples;
220   in.numMipLevels = din.numMipLevels = 1; /* addrlib can't do DccAddrFromCoord with mipmapping */
221   din.unalignedWidth = width;
222   din.unalignedHeight = height;
223   din.numSlices = depth;
224   din.firstMipIdInTail = 1;
225
226   int ret = Addr2ComputeDccInfo(addrlib, &din, &dout);
227   assert(ret == ADDR_OK);
228
229   /* Compute xor. */
230   static AddrFormat format[] = {
231      ADDR_FMT_8,
232      ADDR_FMT_16,
233      ADDR_FMT_32,
234      ADDR_FMT_32_32,
235      ADDR_FMT_32_32_32_32,
236   };
237   xin.flags.color = 1;
238   xin.flags.texture = 1;
239   xin.flags.opt4space = 1;
240   xin.flags.metaRbUnaligned = !rb_aligned;
241   xin.flags.metaPipeUnaligned = !pipe_aligned;
242   xin.format = format[util_logbase2(bpp / 8)];
243   xin.surfIndex = mrt_index;
244
245   ret = Addr2ComputePipeBankXor(addrlib, &xin, &xout);
246   assert(ret == ADDR_OK);
247
248   /* Compute addresses */
249   in.compressBlkWidth = dout.compressBlkWidth;
250   in.compressBlkHeight = dout.compressBlkHeight;
251   in.compressBlkDepth = dout.compressBlkDepth;
252   in.metaBlkWidth = dout.metaBlkWidth;
253   in.metaBlkHeight = dout.metaBlkHeight;
254   in.metaBlkDepth = dout.metaBlkDepth;
255   in.dccRamSliceSize = dout.dccRamSliceSize;
256
257   in.mipId = 0;
258   in.pitch = dout.pitch;
259   in.height = dout.height;
260   in.pipeXor = xout.pipeBankXor;
261
262   /* Validate that the packed gfx9_meta_equation structure can fit all fields. */
263   const struct gfx9_meta_equation eq;
264   if (info->chip_class == GFX9) {
265      /* The bit array is smaller in gfx9_meta_equation than in addrlib. */
266      assert(dout.equation.gfx9.num_bits <= ARRAY_SIZE(eq.u.gfx9.bit));
267   } else {
268      /* gfx9_meta_equation doesn't store the first 4 and the last 8 elements. They must be 0. */
269      for (unsigned i = 0; i < 4; i++)
270         assert(dout.equation.gfx10_bits[i] == 0);
271
272      for (unsigned i = ARRAY_SIZE(eq.u.gfx10_bits) + 4; i < 68; i++)
273         assert(dout.equation.gfx10_bits[i] == 0);
274   }
275
276   for (in.x = start_x; in.x < in.pitch; in.x += dout.compressBlkWidth) {
277      for (in.y = start_y; in.y < in.height; in.y += dout.compressBlkHeight) {
278         for (in.slice = start_z; in.slice < depth; in.slice += dout.compressBlkDepth) {
279            for (in.sample = start_sample; in.sample < samples; in.sample++) {
280               int r = Addr2ComputeDccAddrFromCoord(addrlib, &in, &out);
281               if (r != ADDR_OK) {
282                  printf("%s addrlib error: %s\n", name, test);
283                  abort();
284               }
285
286               unsigned addr;
287               if (info->chip_class == GFX9) {
288                  addr = gfx9_meta_addr_from_coord(info, &dout.equation.gfx9, dout.metaBlkWidth, dout.metaBlkHeight,
289                                                   dout.metaBlkDepth, dout.pitch, dout.height,
290                                                   in.x, in.y, in.slice, in.sample, in.pipeXor, NULL);
291                  if (in.sample == 1) {
292                     /* Sample 0 should be one byte before sample 1. The DCC MSAA clear relies on it. */
293                     assert(addr - 1 ==
294                            gfx9_meta_addr_from_coord(info, &dout.equation.gfx9, dout.metaBlkWidth, dout.metaBlkHeight,
295                                                      dout.metaBlkDepth, dout.pitch, dout.height,
296                                                      in.x, in.y, in.slice, 0, in.pipeXor, NULL));
297                  }
298               } else {
299                  addr = gfx10_dcc_addr_from_coord(info, dout.equation.gfx10_bits,
300                                                   in.bpp, dout.metaBlkWidth, dout.metaBlkHeight,
301                                                   dout.pitch, dout.dccRamSliceSize,
302                                                   in.x, in.y, in.slice, in.pipeXor);
303               }
304
305               if (out.addr != addr) {
306                  printf("%s fail (%s) at %ux%ux%u@%u: expected = %llu, got = %u\n",
307                         name, test, in.x, in.y, in.slice, in.sample, out.addr, addr);
308                  return false;
309               }
310            }
311         }
312      }
313   }
314   return true;
315}
316
317static void run_dcc_address_test(const char *name, const struct radeon_info *info, bool full)
318{
319   unsigned total = 0;
320   unsigned fails = 0;
321   unsigned swizzle_mode = info->chip_class == GFX9 ? ADDR_SW_64KB_S_X : ADDR_SW_64KB_R_X;
322   unsigned last_size, max_samples, min_bpp, max_bpp;
323
324   if (full) {
325      last_size = 6*6 - 1;
326      max_samples = 8;
327      min_bpp = 8;
328      max_bpp = 128;
329   } else {
330      /* The test coverage is reduced for Gitlab CI because it timeouts. */
331      last_size = 0;
332      max_samples = 2;
333      min_bpp = 32;
334      max_bpp = 64;
335   }
336
337#ifdef HAVE_OPENMP
338#pragma omp parallel for
339#endif
340   for (unsigned size = 0; size <= last_size; size++) {
341      unsigned width = 8 + 379 * (size % 6);
342      unsigned height = 8 + 379 * ((size / 6) % 6);
343
344      struct ac_addrlib *ac_addrlib = ac_addrlib_create(info, NULL);
345      ADDR_HANDLE addrlib = ac_addrlib_get_handle(ac_addrlib);
346
347      unsigned local_fails = 0;
348      unsigned local_total = 0;
349
350      for (unsigned bpp = min_bpp; bpp <= max_bpp; bpp *= 2) {
351         /* addrlib can do DccAddrFromCoord with MSAA images only on gfx9 */
352         for (unsigned samples = 1; samples <= (info->chip_class == GFX9 ? max_samples : 1); samples *= 2) {
353            for (int rb_aligned = true; rb_aligned >= (samples > 1 ? true : false); rb_aligned--) {
354               for (int pipe_aligned = true; pipe_aligned >= (samples > 1 ? true : false); pipe_aligned--) {
355                  for (unsigned mrt_index = 0; mrt_index < 2; mrt_index++) {
356                     unsigned depth = 2;
357                     char test[256];
358
359                     snprintf(test, sizeof(test), "%ux%ux%u %ubpp %u samples rb:%u pipe:%u",
360                              width, height, depth, bpp, samples, rb_aligned, pipe_aligned);
361
362                     if (one_dcc_address_test(name, test, addrlib, info, width, height, depth, samples,
363                                              bpp, swizzle_mode, pipe_aligned, rb_aligned, mrt_index,
364                                              0, 0, 0, 0)) {
365                     } else {
366                        local_fails++;
367                     }
368                     local_total++;
369                  }
370               }
371            }
372         }
373      }
374
375      ac_addrlib_destroy(ac_addrlib);
376      p_atomic_add(&fails, local_fails);
377      p_atomic_add(&total, local_total);
378   }
379   printf("%16s total: %u, fail: %u\n", name, total, fails);
380}
381
382/* HTILE address computation without mipmapping. */
383static unsigned gfx10_htile_addr_from_coord(const struct radeon_info *info,
384                                            const uint16_t *equation,
385                                            unsigned meta_block_width,
386                                            unsigned meta_block_height,
387                                            unsigned htile_pitch, unsigned htile_slice_size,
388                                            unsigned x, unsigned y, unsigned z,
389                                            unsigned pipe_xor)
390{
391   unsigned meta_block_width_log2 = util_logbase2(meta_block_width);
392   unsigned meta_block_height_log2 = util_logbase2(meta_block_height);
393   unsigned blkSizeLog2 = meta_block_width_log2 + meta_block_height_log2 - 4;
394
395   return gfx10_meta_addr_from_coord(info, equation,
396                                     meta_block_width, meta_block_height,
397                                     blkSizeLog2,
398                                     htile_pitch, htile_slice_size,
399                                     x, y, z, pipe_xor, NULL);
400}
401
402static bool one_htile_address_test(const char *name, const char *test, ADDR_HANDLE addrlib,
403                                   const struct radeon_info *info,
404                                   unsigned width, unsigned height, unsigned depth,
405                                   unsigned bpp, unsigned swizzle_mode,
406                                   unsigned start_x, unsigned start_y, unsigned start_z)
407{
408   ADDR2_COMPUTE_PIPEBANKXOR_INPUT xin = {0};
409   ADDR2_COMPUTE_PIPEBANKXOR_OUTPUT xout = {0};
410   ADDR2_COMPUTE_HTILE_INFO_INPUT hin = {0};
411   ADDR2_COMPUTE_HTILE_INFO_OUTPUT hout = {0};
412   ADDR2_COMPUTE_HTILE_ADDRFROMCOORD_INPUT in = {0};
413   ADDR2_COMPUTE_HTILE_ADDRFROMCOORD_OUTPUT out = {0};
414   ADDR2_META_MIP_INFO meta_mip_info[RADEON_SURF_MAX_LEVELS] = {0};
415
416   hout.pMipInfo = meta_mip_info;
417
418   /* Compute HTILE info. */
419   hin.hTileFlags.pipeAligned = 1;
420   hin.hTileFlags.rbAligned = 1;
421   hin.depthFlags.depth = 1;
422   hin.depthFlags.texture = 1;
423   hin.depthFlags.opt4space = 1;
424   hin.swizzleMode = in.swizzleMode = xin.swizzleMode = swizzle_mode;
425   hin.unalignedWidth = in.unalignedWidth = width;
426   hin.unalignedHeight = in.unalignedHeight = height;
427   hin.numSlices = in.numSlices = depth;
428   hin.numMipLevels = in.numMipLevels = 1; /* addrlib can't do HtileAddrFromCoord with mipmapping. */
429   hin.firstMipIdInTail = 1;
430
431   int ret = Addr2ComputeHtileInfo(addrlib, &hin, &hout);
432   assert(ret == ADDR_OK);
433
434   /* Compute xor. */
435   static AddrFormat format[] = {
436      ADDR_FMT_8, /* unused */
437      ADDR_FMT_16,
438      ADDR_FMT_32,
439   };
440   xin.flags = hin.depthFlags;
441   xin.resourceType = ADDR_RSRC_TEX_2D;
442   xin.format = format[util_logbase2(bpp / 8)];
443   xin.numFrags = xin.numSamples = in.numSamples = 1;
444
445   ret = Addr2ComputePipeBankXor(addrlib, &xin, &xout);
446   assert(ret == ADDR_OK);
447
448   in.hTileFlags = hin.hTileFlags;
449   in.depthflags = xin.flags;
450   in.bpp = bpp;
451   in.pipeXor = xout.pipeBankXor;
452
453   for (in.x = start_x; in.x < width; in.x++) {
454      for (in.y = start_y; in.y < height; in.y++) {
455         for (in.slice = start_z; in.slice < depth; in.slice++) {
456            int r = Addr2ComputeHtileAddrFromCoord(addrlib, &in, &out);
457            if (r != ADDR_OK) {
458               printf("%s addrlib error: %s\n", name, test);
459               abort();
460            }
461
462            unsigned addr =
463               gfx10_htile_addr_from_coord(info, hout.equation.gfx10_bits,
464                                           hout.metaBlkWidth, hout.metaBlkHeight,
465                                           hout.pitch, hout.sliceSize,
466                                           in.x, in.y, in.slice, in.pipeXor);
467            if (out.addr != addr) {
468               printf("%s fail (%s) at %ux%ux%u: expected = %llu, got = %u\n",
469                      name, test, in.x, in.y, in.slice, out.addr, addr);
470               return false;
471            }
472         }
473      }
474   }
475
476   return true;
477}
478
479static void run_htile_address_test(const char *name, const struct radeon_info *info, bool full)
480{
481   unsigned total = 0;
482   unsigned fails = 0;
483   unsigned first_size = 0, last_size = 6*6 - 1, max_bpp = 32;
484
485   /* The test coverage is reduced for Gitlab CI because it timeouts. */
486   if (!full) {
487      first_size = last_size = 0;
488   }
489
490#ifdef HAVE_OPENMP
491#pragma omp parallel for
492#endif
493   for (unsigned size = first_size; size <= last_size; size++) {
494      unsigned width = 8 + 379 * (size % 6);
495      unsigned height = 8 + 379 * (size / 6);
496
497      struct ac_addrlib *ac_addrlib = ac_addrlib_create(info, NULL);
498      ADDR_HANDLE addrlib = ac_addrlib_get_handle(ac_addrlib);
499
500      for (unsigned depth = 1; depth <= 2; depth *= 2) {
501         for (unsigned bpp = 16; bpp <= max_bpp; bpp *= 2) {
502            if (one_htile_address_test(name, name, addrlib, info, width, height, depth,
503                                       bpp, ADDR_SW_64KB_Z_X, 0, 0, 0)) {
504            } else {
505               p_atomic_inc(&fails);
506            }
507            p_atomic_inc(&total);
508         }
509      }
510
511      ac_addrlib_destroy(ac_addrlib);
512   }
513   printf("%16s total: %u, fail: %u\n", name, total, fails);
514}
515
516/* CMASK address computation without mipmapping and MSAA. */
517static unsigned gfx10_cmask_addr_from_coord(const struct radeon_info *info,
518                                            /* Shader key inputs: */
519                                            /* equation varies with bpp and pipe_aligned */
520                                            const uint16_t *equation, unsigned bpp,
521                                            unsigned meta_block_width, unsigned meta_block_height,
522                                            /* Shader inputs: */
523                                            unsigned cmask_pitch, unsigned cmask_slice_size,
524                                            unsigned x, unsigned y, unsigned z,
525                                            unsigned pipe_xor,
526                                            /* Shader outputs: */
527                                            unsigned *bit_position)
528
529{
530   unsigned meta_block_width_log2 = util_logbase2(meta_block_width);
531   unsigned meta_block_height_log2 = util_logbase2(meta_block_height);
532   unsigned blkSizeLog2 = meta_block_width_log2 + meta_block_height_log2 - 7;
533
534   return gfx10_meta_addr_from_coord(info, equation,
535                                     meta_block_width, meta_block_height,
536                                     blkSizeLog2,
537                                     cmask_pitch, cmask_slice_size,
538                                     x, y, z, pipe_xor, bit_position);
539}
540
541static bool one_cmask_address_test(const char *name, const char *test, ADDR_HANDLE addrlib,
542                                   const struct radeon_info *info,
543                                   unsigned width, unsigned height, unsigned depth,
544                                   unsigned bpp, unsigned swizzle_mode,
545                                   bool pipe_aligned, bool rb_aligned, unsigned mrt_index,
546                                   unsigned start_x, unsigned start_y, unsigned start_z)
547{
548   ADDR2_COMPUTE_PIPEBANKXOR_INPUT xin = {sizeof(xin)};
549   ADDR2_COMPUTE_PIPEBANKXOR_OUTPUT xout = {sizeof(xout)};
550   ADDR2_COMPUTE_CMASK_INFO_INPUT cin = {sizeof(cin)};
551   ADDR2_COMPUTE_CMASK_INFO_OUTPUT cout = {sizeof(cout)};
552   ADDR2_COMPUTE_CMASK_ADDRFROMCOORD_INPUT in = {sizeof(in)};
553   ADDR2_COMPUTE_CMASK_ADDRFROMCOORD_OUTPUT out = {sizeof(out)};
554
555   /* Compute CMASK info. */
556   cin.resourceType = xin.resourceType = in.resourceType = ADDR_RSRC_TEX_2D;
557   cin.swizzleMode = xin.swizzleMode = in.swizzleMode = swizzle_mode;
558   cin.unalignedWidth = in.unalignedWidth = width;
559   cin.unalignedHeight = in.unalignedHeight = height;
560   cin.numSlices = in.numSlices = depth;
561   cin.numMipLevels = 1;
562   cin.firstMipIdInTail = 1;
563   cin.cMaskFlags.pipeAligned = pipe_aligned;
564   cin.cMaskFlags.rbAligned = rb_aligned;
565   cin.cMaskFlags.linear = false;
566   cin.colorFlags.color = 1;
567   cin.colorFlags.texture = 1;
568   cin.colorFlags.opt4space = 1;
569   cin.colorFlags.metaRbUnaligned = !rb_aligned;
570   cin.colorFlags.metaPipeUnaligned = !pipe_aligned;
571
572   int ret = Addr2ComputeCmaskInfo(addrlib, &cin, &cout);
573   assert(ret == ADDR_OK);
574
575   /* Compute xor. */
576   static AddrFormat format[] = {
577      ADDR_FMT_8,
578      ADDR_FMT_16,
579      ADDR_FMT_32,
580      ADDR_FMT_32_32,
581      ADDR_FMT_32_32_32_32,
582   };
583   xin.flags = cin.colorFlags;
584   xin.format = format[util_logbase2(bpp / 8)];
585   xin.surfIndex = mrt_index;
586   xin.numSamples = in.numSamples = xin.numFrags = in.numFrags = 1;
587
588   ret = Addr2ComputePipeBankXor(addrlib, &xin, &xout);
589   assert(ret == ADDR_OK);
590
591   in.cMaskFlags = cin.cMaskFlags;
592   in.colorFlags = cin.colorFlags;
593   in.pipeXor = xout.pipeBankXor;
594
595   for (in.x = start_x; in.x < width; in.x++) {
596      for (in.y = start_y; in.y < height; in.y++) {
597         for (in.slice = start_z; in.slice < depth; in.slice++) {
598            int r = Addr2ComputeCmaskAddrFromCoord(addrlib, &in, &out);
599            if (r != ADDR_OK) {
600               printf("%s addrlib error: %s\n", name, test);
601               abort();
602            }
603
604            unsigned addr, bit_position;
605
606            if (info->chip_class == GFX9) {
607               addr = gfx9_meta_addr_from_coord(info, &cout.equation.gfx9,
608                                                cout.metaBlkWidth, cout.metaBlkHeight, 1,
609                                                cout.pitch, cout.height,
610                                                in.x, in.y, in.slice, 0, in.pipeXor,
611                                                &bit_position);
612            } else {
613               addr = gfx10_cmask_addr_from_coord(info, cout.equation.gfx10_bits,
614                                                  bpp, cout.metaBlkWidth,
615                                                  cout.metaBlkHeight,
616                                                  cout.pitch, cout.sliceSize,
617                                                  in.x, in.y, in.slice,
618                                                  in.pipeXor,
619                                                  &bit_position);
620            }
621
622            if (out.addr != addr || out.bitPosition != bit_position) {
623               printf("%s fail (%s) at %ux%ux%u: expected (addr) = %llu, got = %u, "
624                      "expected (bit_position) = %u, got = %u\n",
625                      name, test, in.x, in.y, in.slice, out.addr, addr,
626                      out.bitPosition, bit_position);
627               return false;
628            }
629         }
630      }
631   }
632
633   return true;
634}
635
636static void run_cmask_address_test(const char *name, const struct radeon_info *info, bool full)
637{
638   unsigned total = 0;
639   unsigned fails = 0;
640   unsigned swizzle_mode = info->chip_class == GFX9 ? ADDR_SW_64KB_S_X : ADDR_SW_64KB_Z_X;
641   unsigned first_size = 0, last_size = 6*6 - 1, max_bpp = 32;
642
643   /* The test coverage is reduced for Gitlab CI because it timeouts. */
644   if (!full) {
645      first_size = last_size = 0;
646   }
647
648#ifdef HAVE_OPENMP
649#pragma omp parallel for
650#endif
651   for (unsigned size = first_size; size <= last_size; size++) {
652      unsigned width = 8 + 379 * (size % 6);
653      unsigned height = 8 + 379 * (size / 6);
654
655      struct ac_addrlib *ac_addrlib = ac_addrlib_create(info, NULL);
656      ADDR_HANDLE addrlib = ac_addrlib_get_handle(ac_addrlib);
657
658      for (unsigned depth = 1; depth <= 2; depth *= 2) {
659         for (unsigned bpp = 16; bpp <= max_bpp; bpp *= 2) {
660            for (int rb_aligned = true; rb_aligned >= true; rb_aligned--) {
661               for (int pipe_aligned = true; pipe_aligned >= true; pipe_aligned--) {
662                  if (one_cmask_address_test(name, name, addrlib, info,
663                                             width, height, depth, bpp,
664                                             swizzle_mode,
665                                             pipe_aligned, rb_aligned,
666                                             0, 0, 0, 0)) {
667                  } else {
668                     p_atomic_inc(&fails);
669                  }
670                  p_atomic_inc(&total);
671               }
672            }
673         }
674      }
675
676      ac_addrlib_destroy(ac_addrlib);
677   }
678   printf("%16s total: %u, fail: %u\n", name, total, fails);
679}
680
681int main(int argc, char **argv)
682{
683   bool full = false;
684
685   if (argc == 2 && !strcmp(argv[1], "--full"))
686      full = true;
687   else
688      puts("Specify --full to run the full test.");
689
690   puts("DCC:");
691   for (unsigned i = 0; i < ARRAY_SIZE(testcases); ++i) {
692      struct radeon_info info = get_radeon_info(&testcases[i]);
693
694      run_dcc_address_test(testcases[i].name, &info, full);
695   }
696
697   puts("HTILE:");
698   for (unsigned i = 0; i < ARRAY_SIZE(testcases); ++i) {
699      struct radeon_info info = get_radeon_info(&testcases[i]);
700
701      /* Only GFX10+ is currently supported. */
702      if (info.chip_class < GFX10)
703         continue;
704
705      run_htile_address_test(testcases[i].name, &info, full);
706   }
707
708   puts("CMASK:");
709   for (unsigned i = 0; i < ARRAY_SIZE(testcases); ++i) {
710      struct radeon_info info = get_radeon_info(&testcases[i]);
711
712      run_cmask_address_test(testcases[i].name, &info, full);
713   }
714
715   return 0;
716}
717