isl.c revision 7ec681f3
1/*
2 * Copyright 2015 Intel Corporation
3 *
4 *  Permission is hereby granted, free of charge, to any person obtaining a
5 *  copy of this software and associated documentation files (the "Software"),
6 *  to deal in the Software without restriction, including without limitation
7 *  the rights to use, copy, modify, merge, publish, distribute, sublicense,
8 *  and/or sell copies of the Software, and to permit persons to whom the
9 *  Software is furnished to do so, subject to the following conditions:
10 *
11 *  The above copyright notice and this permission notice (including the next
12 *  paragraph) shall be included in all copies or substantial portions of the
13 *  Software.
14 *
15 *  THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
16 *  IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
17 *  FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT.  IN NO EVENT SHALL
18 *  THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
19 *  LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING
20 *  FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS
21 *  IN THE SOFTWARE.
22 */
23
24#include <assert.h>
25#include <stdarg.h>
26#include <stdio.h>
27
28#include "genxml/genX_bits.h"
29
30#include "isl.h"
31#include "isl_gfx4.h"
32#include "isl_gfx6.h"
33#include "isl_gfx7.h"
34#include "isl_gfx8.h"
35#include "isl_gfx9.h"
36#include "isl_gfx12.h"
37#include "isl_priv.h"
38
39void
40isl_memcpy_linear_to_tiled(uint32_t xt1, uint32_t xt2,
41                           uint32_t yt1, uint32_t yt2,
42                           char *dst, const char *src,
43                           uint32_t dst_pitch, int32_t src_pitch,
44                           bool has_swizzling,
45                           enum isl_tiling tiling,
46                           isl_memcpy_type copy_type)
47{
48#ifdef USE_SSE41
49   if (copy_type == ISL_MEMCPY_STREAMING_LOAD) {
50      _isl_memcpy_linear_to_tiled_sse41(
51         xt1, xt2, yt1, yt2, dst, src, dst_pitch, src_pitch, has_swizzling,
52         tiling, copy_type);
53      return;
54   }
55#endif
56
57   _isl_memcpy_linear_to_tiled(
58      xt1, xt2, yt1, yt2, dst, src, dst_pitch, src_pitch, has_swizzling,
59      tiling, copy_type);
60}
61
62void
63isl_memcpy_tiled_to_linear(uint32_t xt1, uint32_t xt2,
64                           uint32_t yt1, uint32_t yt2,
65                           char *dst, const char *src,
66                           int32_t dst_pitch, uint32_t src_pitch,
67                           bool has_swizzling,
68                           enum isl_tiling tiling,
69                           isl_memcpy_type copy_type)
70{
71#ifdef USE_SSE41
72   if (copy_type == ISL_MEMCPY_STREAMING_LOAD) {
73      _isl_memcpy_tiled_to_linear_sse41(
74         xt1, xt2, yt1, yt2, dst, src, dst_pitch, src_pitch, has_swizzling,
75         tiling, copy_type);
76      return;
77   }
78#endif
79
80   _isl_memcpy_tiled_to_linear(
81      xt1, xt2, yt1, yt2, dst, src, dst_pitch, src_pitch, has_swizzling,
82      tiling, copy_type);
83}
84
85void PRINTFLIKE(3, 4) UNUSED
86__isl_finishme(const char *file, int line, const char *fmt, ...)
87{
88   va_list ap;
89   char buf[512];
90
91   va_start(ap, fmt);
92   vsnprintf(buf, sizeof(buf), fmt, ap);
93   va_end(ap);
94
95   fprintf(stderr, "%s:%d: FINISHME: %s\n", file, line, buf);
96}
97
98static void
99isl_device_setup_mocs(struct isl_device *dev)
100{
101   if (dev->info->ver >= 12) {
102      if (dev->info->is_dg2) {
103         /* L3CC=WB; BSpec: 45101 */
104         dev->mocs.internal = 3 << 1;
105         dev->mocs.external = 3 << 1;
106      } else if (dev->info->is_dg1) {
107         /* L3CC=WB */
108         dev->mocs.internal = 5 << 1;
109         /* Displayables on DG1 are free to cache in L3 since L3 is transient
110          * and flushed at bottom of each submission.
111          */
112         dev->mocs.external = 5 << 1;
113      } else {
114         /* TC=1/LLC Only, LeCC=1/UC, LRUM=0, L3CC=3/WB */
115         dev->mocs.external = 61 << 1;
116         /* TC=LLC/eLLC, LeCC=WB, LRUM=3, L3CC=WB */
117         dev->mocs.internal = 2 << 1;
118
119         /* L1 - HDC:L1 + L3 + LLC */
120         dev->mocs.l1_hdc_l3_llc = 48 << 1;
121      }
122   } else if (dev->info->ver >= 9) {
123      /* TC=LLC/eLLC, LeCC=PTE, LRUM=3, L3CC=WB */
124      dev->mocs.external = 1 << 1;
125      /* TC=LLC/eLLC, LeCC=WB, LRUM=3, L3CC=WB */
126      dev->mocs.internal = 2 << 1;
127   } else if (dev->info->ver >= 8) {
128      /* MEMORY_OBJECT_CONTROL_STATE:
129       * .MemoryTypeLLCeLLCCacheabilityControl = UCwithFenceifcoherentcycle,
130       * .TargetCache = L3DefertoPATforLLCeLLCselection,
131       * .AgeforQUADLRU = 0
132       */
133      dev->mocs.external = 0x18;
134      /* MEMORY_OBJECT_CONTROL_STATE:
135       * .MemoryTypeLLCeLLCCacheabilityControl = WB,
136       * .TargetCache = L3DefertoPATforLLCeLLCselection,
137       * .AgeforQUADLRU = 0
138       */
139      dev->mocs.internal = 0x78;
140   } else if (dev->info->ver >= 7) {
141      if (dev->info->is_haswell) {
142         /* MEMORY_OBJECT_CONTROL_STATE:
143          * .LLCeLLCCacheabilityControlLLCCC             = 0,
144          * .L3CacheabilityControlL3CC                   = 1,
145          */
146         dev->mocs.internal = 1;
147         dev->mocs.external = 1;
148      } else {
149         /* MEMORY_OBJECT_CONTROL_STATE:
150          * .GraphicsDataTypeGFDT                        = 0,
151          * .LLCCacheabilityControlLLCCC                 = 0,
152          * .L3CacheabilityControlL3CC                   = 1,
153          */
154         dev->mocs.internal = 1;
155         dev->mocs.external = 1;
156      }
157   } else {
158      dev->mocs.internal = 0;
159      dev->mocs.external = 0;
160   }
161}
162
163/**
164 * Return an appropriate MOCS entry for the given usage flags.
165 */
166uint32_t
167isl_mocs(const struct isl_device *dev, isl_surf_usage_flags_t usage,
168         bool external)
169{
170   if (external)
171      return dev->mocs.external;
172
173   if (dev->info->ver >= 12 && !dev->info->is_dg1) {
174      if (usage & ISL_SURF_USAGE_STAGING_BIT)
175         return dev->mocs.internal;
176
177      /* Using L1:HDC for storage buffers breaks Vulkan memory model
178       * tests that use shader atomics.  This isn't likely to work out,
179       * and we can't know a priori whether they'll be used.  So just
180       * continue with ordinary internal MOCS for now.
181       */
182      if (usage & ISL_SURF_USAGE_STORAGE_BIT)
183         return dev->mocs.internal;
184
185      if (usage & (ISL_SURF_USAGE_CONSTANT_BUFFER_BIT |
186                   ISL_SURF_USAGE_RENDER_TARGET_BIT |
187                   ISL_SURF_USAGE_TEXTURE_BIT))
188         return dev->mocs.l1_hdc_l3_llc;
189   }
190
191   return dev->mocs.internal;
192}
193
194void
195isl_device_init(struct isl_device *dev,
196                const struct intel_device_info *info,
197                bool has_bit6_swizzling)
198{
199   /* Gfx8+ don't have bit6 swizzling, ensure callsite is not confused. */
200   assert(!(has_bit6_swizzling && info->ver >= 8));
201
202   dev->info = info;
203   dev->use_separate_stencil = ISL_GFX_VER(dev) >= 6;
204   dev->has_bit6_swizzling = has_bit6_swizzling;
205
206   /* The ISL_DEV macros may be defined in the CFLAGS, thus hardcoding some
207    * device properties at buildtime. Verify that the macros with the device
208    * properties chosen during runtime.
209    */
210   ISL_GFX_VER_SANITIZE(dev);
211   ISL_DEV_USE_SEPARATE_STENCIL_SANITIZE(dev);
212
213   /* Did we break hiz or stencil? */
214   if (ISL_DEV_USE_SEPARATE_STENCIL(dev))
215      assert(info->has_hiz_and_separate_stencil);
216   if (info->must_use_separate_stencil)
217      assert(ISL_DEV_USE_SEPARATE_STENCIL(dev));
218
219   dev->ss.size = RENDER_SURFACE_STATE_length(info) * 4;
220   dev->ss.align = isl_align(dev->ss.size, 32);
221
222   dev->ss.clear_color_state_size =
223      isl_align(CLEAR_COLOR_length(info) * 4, 64);
224   dev->ss.clear_color_state_offset =
225      RENDER_SURFACE_STATE_ClearValueAddress_start(info) / 32 * 4;
226
227   dev->ss.clear_value_size =
228      isl_align(RENDER_SURFACE_STATE_RedClearColor_bits(info) +
229                RENDER_SURFACE_STATE_GreenClearColor_bits(info) +
230                RENDER_SURFACE_STATE_BlueClearColor_bits(info) +
231                RENDER_SURFACE_STATE_AlphaClearColor_bits(info), 32) / 8;
232
233   dev->ss.clear_value_offset =
234      RENDER_SURFACE_STATE_RedClearColor_start(info) / 32 * 4;
235
236   assert(RENDER_SURFACE_STATE_SurfaceBaseAddress_start(info) % 8 == 0);
237   dev->ss.addr_offset =
238      RENDER_SURFACE_STATE_SurfaceBaseAddress_start(info) / 8;
239
240   /* The "Auxiliary Surface Base Address" field starts a bit higher up
241    * because the bottom 12 bits are used for other things.  Round down to
242    * the nearest dword before.
243    */
244   dev->ss.aux_addr_offset =
245      (RENDER_SURFACE_STATE_AuxiliarySurfaceBaseAddress_start(info) & ~31) / 8;
246
247   dev->ds.size = _3DSTATE_DEPTH_BUFFER_length(info) * 4;
248   assert(_3DSTATE_DEPTH_BUFFER_SurfaceBaseAddress_start(info) % 8 == 0);
249   dev->ds.depth_offset =
250      _3DSTATE_DEPTH_BUFFER_SurfaceBaseAddress_start(info) / 8;
251
252   if (dev->use_separate_stencil) {
253      dev->ds.size += _3DSTATE_STENCIL_BUFFER_length(info) * 4 +
254                      _3DSTATE_HIER_DEPTH_BUFFER_length(info) * 4 +
255                      _3DSTATE_CLEAR_PARAMS_length(info) * 4;
256
257      assert(_3DSTATE_STENCIL_BUFFER_SurfaceBaseAddress_start(info) % 8 == 0);
258      dev->ds.stencil_offset =
259         _3DSTATE_DEPTH_BUFFER_length(info) * 4 +
260         _3DSTATE_STENCIL_BUFFER_SurfaceBaseAddress_start(info) / 8;
261
262      assert(_3DSTATE_HIER_DEPTH_BUFFER_SurfaceBaseAddress_start(info) % 8 == 0);
263      dev->ds.hiz_offset =
264         _3DSTATE_DEPTH_BUFFER_length(info) * 4 +
265         _3DSTATE_STENCIL_BUFFER_length(info) * 4 +
266         _3DSTATE_HIER_DEPTH_BUFFER_SurfaceBaseAddress_start(info) / 8;
267   } else {
268      dev->ds.stencil_offset = 0;
269      dev->ds.hiz_offset = 0;
270   }
271
272   if (ISL_GFX_VER(dev) >= 7) {
273      /* From the IVB PRM, SURFACE_STATE::Height,
274       *
275       *    For typed buffer and structured buffer surfaces, the number
276       *    of entries in the buffer ranges from 1 to 2^27. For raw buffer
277       *    surfaces, the number of entries in the buffer is the number of bytes
278       *    which can range from 1 to 2^30.
279       *
280       * This limit is only concerned with raw buffers.
281       */
282      dev->max_buffer_size = 1ull << 30;
283   } else {
284      dev->max_buffer_size = 1ull << 27;
285   }
286
287   isl_device_setup_mocs(dev);
288}
289
290/**
291 * @brief Query the set of multisamples supported by the device.
292 *
293 * This function always returns non-zero, as ISL_SAMPLE_COUNT_1_BIT is always
294 * supported.
295 */
296isl_sample_count_mask_t ATTRIBUTE_CONST
297isl_device_get_sample_counts(struct isl_device *dev)
298{
299   if (ISL_GFX_VER(dev) >= 9) {
300      return ISL_SAMPLE_COUNT_1_BIT |
301             ISL_SAMPLE_COUNT_2_BIT |
302             ISL_SAMPLE_COUNT_4_BIT |
303             ISL_SAMPLE_COUNT_8_BIT |
304             ISL_SAMPLE_COUNT_16_BIT;
305   } else if (ISL_GFX_VER(dev) >= 8) {
306      return ISL_SAMPLE_COUNT_1_BIT |
307             ISL_SAMPLE_COUNT_2_BIT |
308             ISL_SAMPLE_COUNT_4_BIT |
309             ISL_SAMPLE_COUNT_8_BIT;
310   } else if (ISL_GFX_VER(dev) >= 7) {
311      return ISL_SAMPLE_COUNT_1_BIT |
312             ISL_SAMPLE_COUNT_4_BIT |
313             ISL_SAMPLE_COUNT_8_BIT;
314   } else if (ISL_GFX_VER(dev) >= 6) {
315      return ISL_SAMPLE_COUNT_1_BIT |
316             ISL_SAMPLE_COUNT_4_BIT;
317   } else {
318      return ISL_SAMPLE_COUNT_1_BIT;
319   }
320}
321
322/**
323 * Returns an isl_tile_info representation of the given isl_tiling when
324 * combined when used in the given configuration.
325 *
326 * @param[in]  tiling      The tiling format to introspect
327 * @param[in]  dim         The dimensionality of the surface being tiled
328 * @param[in]  msaa_layout The layout of samples in the surface being tiled
329 * @param[in]  format_bpb  The number of bits per surface element (block) for
330 *                         the surface being tiled
331 * @param[in]  samples     The samples in the surface being tiled
332 * @param[out] tile_info   Return parameter for the tiling information
333 */
334void
335isl_tiling_get_info(enum isl_tiling tiling,
336                    enum isl_surf_dim dim,
337                    enum isl_msaa_layout msaa_layout,
338                    uint32_t format_bpb,
339                    uint32_t samples,
340                    struct isl_tile_info *tile_info)
341{
342   const uint32_t bs = format_bpb / 8;
343   struct isl_extent4d logical_el;
344   struct isl_extent2d phys_B;
345
346   if (tiling != ISL_TILING_LINEAR && !isl_is_pow2(format_bpb)) {
347      /* It is possible to have non-power-of-two formats in a tiled buffer.
348       * The easiest way to handle this is to treat the tile as if it is three
349       * times as wide.  This way no pixel will ever cross a tile boundary.
350       * This really only works on a subset of tiling formats.
351       */
352      assert(tiling == ISL_TILING_X || tiling == ISL_TILING_Y0 ||
353             tiling == ISL_TILING_4);
354      assert(bs % 3 == 0 && isl_is_pow2(format_bpb / 3));
355      isl_tiling_get_info(tiling, dim, msaa_layout, format_bpb / 3, samples,
356                          tile_info);
357      return;
358   }
359
360   switch (tiling) {
361   case ISL_TILING_LINEAR:
362      assert(bs > 0);
363      logical_el = isl_extent4d(1, 1, 1, 1);
364      phys_B = isl_extent2d(bs, 1);
365      break;
366
367   case ISL_TILING_X:
368      assert(bs > 0);
369      logical_el = isl_extent4d(512 / bs, 8, 1, 1);
370      phys_B = isl_extent2d(512, 8);
371      break;
372
373   case ISL_TILING_Y0:
374   case ISL_TILING_4:
375      assert(bs > 0);
376      logical_el = isl_extent4d(128 / bs, 32, 1, 1);
377      phys_B = isl_extent2d(128, 32);
378      break;
379
380   case ISL_TILING_W:
381      assert(bs == 1);
382      logical_el = isl_extent4d(64, 64, 1, 1);
383      /* From the Broadwell PRM Vol 2d, RENDER_SURFACE_STATE::SurfacePitch:
384       *
385       *    "If the surface is a stencil buffer (and thus has Tile Mode set
386       *    to TILEMODE_WMAJOR), the pitch must be set to 2x the value
387       *    computed based on width, as the stencil buffer is stored with two
388       *    rows interleaved."
389       *
390       * This, together with the fact that stencil buffers are referred to as
391       * being Y-tiled in the PRMs for older hardware implies that the
392       * physical size of a W-tile is actually the same as for a Y-tile.
393       */
394      phys_B = isl_extent2d(128, 32);
395      break;
396
397   case ISL_TILING_Yf:
398   case ISL_TILING_Ys: {
399      bool is_Ys = tiling == ISL_TILING_Ys;
400
401      assert(bs > 0);
402      unsigned width = 1 << (6 + (ffs(bs) / 2) + (2 * is_Ys));
403      unsigned height = 1 << (6 - (ffs(bs) / 2) + (2 * is_Ys));
404
405      logical_el = isl_extent4d(width / bs, height, 1, 1);
406      phys_B = isl_extent2d(width, height);
407      break;
408   }
409   case ISL_TILING_64:
410      /* The tables below are taken from the "2D Surfaces" page in the Bspec
411       * which are formulated in terms of the Cv and Cu constants. This is
412       * different from the tables in the "Tile64 Format" page which should be
413       * equivalent but are usually in terms of pixels. Also note that Cv and
414       * Cu are HxW order to match the Bspec table, not WxH order like you
415       * might expect.
416       *
417       * From the Bspec's "Tile64 Format" page:
418       *
419       *    MSAA Depth/Stencil surface use IMS (Interleaved Multi Samples)
420       *    which means:
421       *
422       *    - Use the 1X MSAA (non-MSRT) version of the Tile64 equations and
423       *      let the client unit do the swizzling internally
424       *
425       * Surfaces using the IMS layout will use the mapping for 1x MSAA.
426       */
427#define tile_extent(bs, cv, cu, a) \
428      isl_extent4d((1 << cu) / bs, 1 << cv, 1, a)
429
430      /* Only 2D surfaces are handled. */
431      assert(dim == ISL_SURF_DIM_2D);
432
433      if (samples == 1 || msaa_layout == ISL_MSAA_LAYOUT_INTERLEAVED) {
434         switch (format_bpb) {
435         case 128: logical_el = tile_extent(bs, 6, 10, 1); break;
436         case  64: logical_el = tile_extent(bs, 6, 10, 1); break;
437         case  32: logical_el = tile_extent(bs, 7,  9, 1); break;
438         case  16: logical_el = tile_extent(bs, 7,  9, 1); break;
439         case   8: logical_el = tile_extent(bs, 8,  8, 1); break;
440         default: unreachable("Unsupported format size.");
441         }
442      } else if (samples == 2) {
443         switch (format_bpb) {
444         case 128: logical_el = tile_extent(bs, 6,  9, 2); break;
445         case  64: logical_el = tile_extent(bs, 6,  9, 2); break;
446         case  32: logical_el = tile_extent(bs, 7,  8, 2); break;
447         case  16: logical_el = tile_extent(bs, 7,  8, 2); break;
448         case   8: logical_el = tile_extent(bs, 8,  7, 2); break;
449         default: unreachable("Unsupported format size.");
450         }
451      } else {
452         switch (format_bpb) {
453         case 128: logical_el = tile_extent(bs, 5,  9, 4); break;
454         case  64: logical_el = tile_extent(bs, 5,  9, 4); break;
455         case  32: logical_el = tile_extent(bs, 6,  8, 4); break;
456         case  16: logical_el = tile_extent(bs, 6,  8, 4); break;
457         case   8: logical_el = tile_extent(bs, 7,  7, 4); break;
458         default: unreachable("Unsupported format size.");
459         }
460      }
461
462#undef tile_extent
463
464      phys_B.w = logical_el.w * bs;
465      phys_B.h = 64 * 1024 / phys_B.w;
466      break;
467
468   case ISL_TILING_HIZ:
469      /* HiZ buffers are required to have ISL_FORMAT_HIZ which is an 8x4
470       * 128bpb format.  The tiling has the same physical dimensions as
471       * Y-tiling but actually has two HiZ columns per Y-tiled column.
472       */
473      assert(bs == 16);
474      logical_el = isl_extent4d(16, 16, 1, 1);
475      phys_B = isl_extent2d(128, 32);
476      break;
477
478   case ISL_TILING_CCS:
479      /* CCS surfaces are required to have one of the GENX_CCS_* formats which
480       * have a block size of 1 or 2 bits per block and each CCS element
481       * corresponds to one cache-line pair in the main surface.  From the Sky
482       * Lake PRM Vol. 12 in the section on planes:
483       *
484       *    "The Color Control Surface (CCS) contains the compression status
485       *    of the cache-line pairs. The compression state of the cache-line
486       *    pair is specified by 2 bits in the CCS.  Each CCS cache-line
487       *    represents an area on the main surface of 16x16 sets of 128 byte
488       *    Y-tiled cache-line-pairs. CCS is always Y tiled."
489       *
490       * The CCS being Y-tiled implies that it's an 8x8 grid of cache-lines.
491       * Since each cache line corresponds to a 16x16 set of cache-line pairs,
492       * that yields total tile area of 128x128 cache-line pairs or CCS
493       * elements.  On older hardware, each CCS element is 1 bit and the tile
494       * is 128x256 elements.
495       */
496      assert(format_bpb == 1 || format_bpb == 2);
497      logical_el = isl_extent4d(128, 256 / format_bpb, 1, 1);
498      phys_B = isl_extent2d(128, 32);
499      break;
500
501   case ISL_TILING_GFX12_CCS:
502      /* From the Bspec, Gen Graphics > Gfx12 > Memory Data Formats > Memory
503       * Compression > Memory Compression - Gfx12:
504       *
505       *    4 bits of auxiliary plane data are required for 2 cachelines of
506       *    main surface data. This results in a single cacheline of auxiliary
507       *    plane data mapping to 4 4K pages of main surface data for the 4K
508       *    pages (tile Y ) and 1 64K Tile Ys page.
509       *
510       * The Y-tiled pairing bit of 9 shown in the table below that Bspec
511       * section expresses that the 2 cachelines of main surface data are
512       * horizontally adjacent.
513       *
514       * TODO: Handle Ys, Yf and their pairing bits.
515       *
516       * Therefore, each CCS cacheline represents a 512Bx32 row area and each
517       * element represents a 32Bx4 row area.
518       */
519      assert(format_bpb == 4);
520      logical_el = isl_extent4d(16, 8, 1, 1);
521      phys_B = isl_extent2d(64, 1);
522      break;
523
524   default:
525      unreachable("not reached");
526   } /* end switch */
527
528   *tile_info = (struct isl_tile_info) {
529      .tiling = tiling,
530      .format_bpb = format_bpb,
531      .logical_extent_el = logical_el,
532      .phys_extent_B = phys_B,
533   };
534}
535
536bool
537isl_color_value_is_zero(union isl_color_value value,
538                        enum isl_format format)
539{
540   const struct isl_format_layout *fmtl = isl_format_get_layout(format);
541
542#define RETURN_FALSE_IF_NOT_0(c, i) \
543   if (fmtl->channels.c.bits && value.u32[i] != 0) \
544      return false
545
546   RETURN_FALSE_IF_NOT_0(r, 0);
547   RETURN_FALSE_IF_NOT_0(g, 1);
548   RETURN_FALSE_IF_NOT_0(b, 2);
549   RETURN_FALSE_IF_NOT_0(a, 3);
550
551#undef RETURN_FALSE_IF_NOT_0
552
553   return true;
554}
555
556bool
557isl_color_value_is_zero_one(union isl_color_value value,
558                            enum isl_format format)
559{
560   const struct isl_format_layout *fmtl = isl_format_get_layout(format);
561
562#define RETURN_FALSE_IF_NOT_0_1(c, i, field) \
563   if (fmtl->channels.c.bits && value.field[i] != 0 && value.field[i] != 1) \
564      return false
565
566   if (isl_format_has_int_channel(format)) {
567      RETURN_FALSE_IF_NOT_0_1(r, 0, u32);
568      RETURN_FALSE_IF_NOT_0_1(g, 1, u32);
569      RETURN_FALSE_IF_NOT_0_1(b, 2, u32);
570      RETURN_FALSE_IF_NOT_0_1(a, 3, u32);
571   } else {
572      RETURN_FALSE_IF_NOT_0_1(r, 0, f32);
573      RETURN_FALSE_IF_NOT_0_1(g, 1, f32);
574      RETURN_FALSE_IF_NOT_0_1(b, 2, f32);
575      RETURN_FALSE_IF_NOT_0_1(a, 3, f32);
576   }
577
578#undef RETURN_FALSE_IF_NOT_0_1
579
580   return true;
581}
582
583/**
584 * @param[out] tiling is set only on success
585 */
586static bool
587isl_surf_choose_tiling(const struct isl_device *dev,
588                       const struct isl_surf_init_info *restrict info,
589                       enum isl_tiling *tiling)
590{
591   isl_tiling_flags_t tiling_flags = info->tiling_flags;
592
593   /* HiZ surfaces always use the HiZ tiling */
594   if (info->usage & ISL_SURF_USAGE_HIZ_BIT) {
595      assert(info->format == ISL_FORMAT_HIZ);
596      assert(tiling_flags == ISL_TILING_HIZ_BIT);
597      *tiling = isl_tiling_flag_to_enum(tiling_flags);
598      return true;
599   }
600
601   /* CCS surfaces always use the CCS tiling */
602   if (info->usage & ISL_SURF_USAGE_CCS_BIT) {
603      assert(isl_format_get_layout(info->format)->txc == ISL_TXC_CCS);
604      UNUSED bool ivb_ccs = ISL_GFX_VER(dev) < 12 &&
605                            tiling_flags == ISL_TILING_CCS_BIT;
606      UNUSED bool tgl_ccs = ISL_GFX_VER(dev) >= 12 &&
607                            tiling_flags == ISL_TILING_GFX12_CCS_BIT;
608      assert(ivb_ccs != tgl_ccs);
609      *tiling = isl_tiling_flag_to_enum(tiling_flags);
610      return true;
611   }
612
613   if (ISL_GFX_VERX10(dev) >= 125) {
614      isl_gfx125_filter_tiling(dev, info, &tiling_flags);
615   } else if (ISL_GFX_VER(dev) >= 6) {
616      isl_gfx6_filter_tiling(dev, info, &tiling_flags);
617   } else {
618      isl_gfx4_filter_tiling(dev, info, &tiling_flags);
619   }
620
621   #define CHOOSE(__tiling) \
622      do { \
623         if (tiling_flags & (1u << (__tiling))) { \
624            *tiling = (__tiling); \
625            return true; \
626          } \
627      } while (0)
628
629   /* Of the tiling modes remaining, choose the one that offers the best
630    * performance.
631    */
632
633   if (info->dim == ISL_SURF_DIM_1D) {
634      /* Prefer linear for 1D surfaces because they do not benefit from
635       * tiling. To the contrary, tiling leads to wasted memory and poor
636       * memory locality due to the swizzling and alignment restrictions
637       * required in tiled surfaces.
638       */
639      CHOOSE(ISL_TILING_LINEAR);
640   }
641
642   CHOOSE(ISL_TILING_4);
643   CHOOSE(ISL_TILING_64);
644   CHOOSE(ISL_TILING_Ys);
645   CHOOSE(ISL_TILING_Yf);
646   CHOOSE(ISL_TILING_Y0);
647   CHOOSE(ISL_TILING_X);
648   CHOOSE(ISL_TILING_W);
649   CHOOSE(ISL_TILING_LINEAR);
650
651   #undef CHOOSE
652
653   /* No tiling mode accomodates the inputs. */
654   return false;
655}
656
657static bool
658isl_choose_msaa_layout(const struct isl_device *dev,
659                 const struct isl_surf_init_info *info,
660                 enum isl_tiling tiling,
661                 enum isl_msaa_layout *msaa_layout)
662{
663   if (ISL_GFX_VER(dev) >= 8) {
664      return isl_gfx8_choose_msaa_layout(dev, info, tiling, msaa_layout);
665   } else if (ISL_GFX_VER(dev) >= 7) {
666      return isl_gfx7_choose_msaa_layout(dev, info, tiling, msaa_layout);
667   } else if (ISL_GFX_VER(dev) >= 6) {
668      return isl_gfx6_choose_msaa_layout(dev, info, tiling, msaa_layout);
669   } else {
670      return isl_gfx4_choose_msaa_layout(dev, info, tiling, msaa_layout);
671   }
672}
673
674struct isl_extent2d
675isl_get_interleaved_msaa_px_size_sa(uint32_t samples)
676{
677   assert(isl_is_pow2(samples));
678
679   /* From the Broadwell PRM >> Volume 5: Memory Views >> Computing Mip Level
680    * Sizes (p133):
681    *
682    *    If the surface is multisampled and it is a depth or stencil surface
683    *    or Multisampled Surface StorageFormat in SURFACE_STATE is
684    *    MSFMT_DEPTH_STENCIL, W_L and H_L must be adjusted as follows before
685    *    proceeding: [...]
686    */
687   return (struct isl_extent2d) {
688      .width = 1 << ((ffs(samples) - 0) / 2),
689      .height = 1 << ((ffs(samples) - 1) / 2),
690   };
691}
692
693static void
694isl_msaa_interleaved_scale_px_to_sa(uint32_t samples,
695                                    uint32_t *width, uint32_t *height)
696{
697   const struct isl_extent2d px_size_sa =
698      isl_get_interleaved_msaa_px_size_sa(samples);
699
700   if (width)
701      *width = isl_align(*width, 2) * px_size_sa.width;
702   if (height)
703      *height = isl_align(*height, 2) * px_size_sa.height;
704}
705
706static enum isl_array_pitch_span
707isl_choose_array_pitch_span(const struct isl_device *dev,
708                            const struct isl_surf_init_info *restrict info,
709                            enum isl_dim_layout dim_layout,
710                            const struct isl_extent4d *phys_level0_sa)
711{
712   switch (dim_layout) {
713   case ISL_DIM_LAYOUT_GFX9_1D:
714   case ISL_DIM_LAYOUT_GFX4_2D:
715      if (ISL_GFX_VER(dev) >= 8) {
716         /* QPitch becomes programmable in Broadwell. So choose the
717          * most compact QPitch possible in order to conserve memory.
718          *
719          * From the Broadwell PRM >> Volume 2d: Command Reference: Structures
720          * >> RENDER_SURFACE_STATE Surface QPitch (p325):
721          *
722          *    - Software must ensure that this field is set to a value
723          *      sufficiently large such that the array slices in the surface
724          *      do not overlap. Refer to the Memory Data Formats section for
725          *      information on how surfaces are stored in memory.
726          *
727          *    - This field specifies the distance in rows between array
728          *      slices.  It is used only in the following cases:
729          *
730          *          - Surface Array is enabled OR
731          *          - Number of Mulitsamples is not NUMSAMPLES_1 and
732          *            Multisampled Surface Storage Format set to MSFMT_MSS OR
733          *          - Surface Type is SURFTYPE_CUBE
734          */
735         return ISL_ARRAY_PITCH_SPAN_COMPACT;
736      } else if (ISL_GFX_VER(dev) >= 7) {
737         /* Note that Ivybridge introduces
738          * RENDER_SURFACE_STATE.SurfaceArraySpacing, which provides the
739          * driver more control over the QPitch.
740          */
741
742         if (phys_level0_sa->array_len == 1) {
743            /* The hardware will never use the QPitch. So choose the most
744             * compact QPitch possible in order to conserve memory.
745             */
746            return ISL_ARRAY_PITCH_SPAN_COMPACT;
747         }
748
749         if (isl_surf_usage_is_depth_or_stencil(info->usage) ||
750             (info->usage & ISL_SURF_USAGE_HIZ_BIT)) {
751            /* From the Ivybridge PRM >> Volume 1 Part 1: Graphics Core >>
752             * Section 6.18.4.7: Surface Arrays (p112):
753             *
754             *    If Surface Array Spacing is set to ARYSPC_FULL (note that
755             *    the depth buffer and stencil buffer have an implied value of
756             *    ARYSPC_FULL):
757             */
758            return ISL_ARRAY_PITCH_SPAN_FULL;
759         }
760
761         if (info->levels == 1) {
762            /* We are able to set RENDER_SURFACE_STATE.SurfaceArraySpacing
763             * to ARYSPC_LOD0.
764             */
765            return ISL_ARRAY_PITCH_SPAN_COMPACT;
766         }
767
768         return ISL_ARRAY_PITCH_SPAN_FULL;
769      } else if ((ISL_GFX_VER(dev) == 5 || ISL_GFX_VER(dev) == 6) &&
770                 ISL_DEV_USE_SEPARATE_STENCIL(dev) &&
771                 isl_surf_usage_is_stencil(info->usage)) {
772         /* [ILK-SNB] Errata from the Sandy Bridge PRM >> Volume 4 Part 1:
773          * Graphics Core >> Section 7.18.3.7: Surface Arrays:
774          *
775          *    The separate stencil buffer does not support mip mapping, thus
776          *    the storage for LODs other than LOD 0 is not needed.
777          */
778         assert(info->levels == 1);
779         return ISL_ARRAY_PITCH_SPAN_COMPACT;
780      } else {
781         if ((ISL_GFX_VER(dev) == 5 || ISL_GFX_VER(dev) == 6) &&
782             ISL_DEV_USE_SEPARATE_STENCIL(dev) &&
783             isl_surf_usage_is_stencil(info->usage)) {
784            /* [ILK-SNB] Errata from the Sandy Bridge PRM >> Volume 4 Part 1:
785             * Graphics Core >> Section 7.18.3.7: Surface Arrays:
786             *
787             *    The separate stencil buffer does not support mip mapping,
788             *    thus the storage for LODs other than LOD 0 is not needed.
789             */
790            assert(info->levels == 1);
791            assert(phys_level0_sa->array_len == 1);
792            return ISL_ARRAY_PITCH_SPAN_COMPACT;
793         }
794
795         if (phys_level0_sa->array_len == 1) {
796            /* The hardware will never use the QPitch. So choose the most
797             * compact QPitch possible in order to conserve memory.
798             */
799            return ISL_ARRAY_PITCH_SPAN_COMPACT;
800         }
801
802         return ISL_ARRAY_PITCH_SPAN_FULL;
803      }
804
805   case ISL_DIM_LAYOUT_GFX4_3D:
806      /* The hardware will never use the QPitch. So choose the most
807       * compact QPitch possible in order to conserve memory.
808       */
809      return ISL_ARRAY_PITCH_SPAN_COMPACT;
810
811   case ISL_DIM_LAYOUT_GFX6_STENCIL_HIZ:
812      /* Each array image in the gfx6 stencil of HiZ surface is compact in the
813       * sense that every LOD is a compact array of the same size as LOD0.
814       */
815      return ISL_ARRAY_PITCH_SPAN_COMPACT;
816   }
817
818   unreachable("bad isl_dim_layout");
819   return ISL_ARRAY_PITCH_SPAN_FULL;
820}
821
822static void
823isl_choose_image_alignment_el(const struct isl_device *dev,
824                              const struct isl_surf_init_info *restrict info,
825                              enum isl_tiling tiling,
826                              enum isl_dim_layout dim_layout,
827                              enum isl_msaa_layout msaa_layout,
828                              struct isl_extent3d *image_align_el)
829{
830   const struct isl_format_layout *fmtl = isl_format_get_layout(info->format);
831   if (fmtl->txc == ISL_TXC_MCS) {
832      assert(tiling == ISL_TILING_Y0);
833
834      /*
835       * IvyBrigde PRM Vol 2, Part 1, "11.7 MCS Buffer for Render Target(s)":
836       *
837       * Height, width, and layout of MCS buffer in this case must match with
838       * Render Target height, width, and layout. MCS buffer is tiledY.
839       *
840       * To avoid wasting memory, choose the smallest alignment possible:
841       * HALIGN_4 and VALIGN_4.
842       */
843      *image_align_el = isl_extent3d(4, 4, 1);
844      return;
845   } else if (info->format == ISL_FORMAT_HIZ) {
846      assert(ISL_GFX_VER(dev) >= 6);
847      if (ISL_GFX_VER(dev) == 6) {
848         /* HiZ surfaces on Sandy Bridge are packed tightly. */
849         *image_align_el = isl_extent3d(1, 1, 1);
850      } else if (ISL_GFX_VER(dev) < 12) {
851         /* On gfx7+, HiZ surfaces are always aligned to 16x8 pixels in the
852          * primary surface which works out to 2x2 HiZ elments.
853          */
854         *image_align_el = isl_extent3d(2, 2, 1);
855      } else {
856         /* On gfx12+, HiZ surfaces are always aligned to 16x16 pixels in the
857          * primary surface which works out to 2x4 HiZ elments.
858          * TODO: Verify
859          */
860         *image_align_el = isl_extent3d(2, 4, 1);
861      }
862      return;
863   }
864
865   if (ISL_GFX_VERX10(dev) >= 125) {
866      isl_gfx125_choose_image_alignment_el(dev, info, tiling, dim_layout,
867                                           msaa_layout, image_align_el);
868   } else if (ISL_GFX_VER(dev) >= 12) {
869      isl_gfx12_choose_image_alignment_el(dev, info, tiling, dim_layout,
870                                          msaa_layout, image_align_el);
871   } else if (ISL_GFX_VER(dev) >= 9) {
872      isl_gfx9_choose_image_alignment_el(dev, info, tiling, dim_layout,
873                                         msaa_layout, image_align_el);
874   } else if (ISL_GFX_VER(dev) >= 8) {
875      isl_gfx8_choose_image_alignment_el(dev, info, tiling, dim_layout,
876                                         msaa_layout, image_align_el);
877   } else if (ISL_GFX_VER(dev) >= 7) {
878      isl_gfx7_choose_image_alignment_el(dev, info, tiling, dim_layout,
879                                          msaa_layout, image_align_el);
880   } else if (ISL_GFX_VER(dev) >= 6) {
881      isl_gfx6_choose_image_alignment_el(dev, info, tiling, dim_layout,
882                                         msaa_layout, image_align_el);
883   } else {
884      isl_gfx4_choose_image_alignment_el(dev, info, tiling, dim_layout,
885                                         msaa_layout, image_align_el);
886   }
887}
888
889static enum isl_dim_layout
890isl_surf_choose_dim_layout(const struct isl_device *dev,
891                           enum isl_surf_dim logical_dim,
892                           enum isl_tiling tiling,
893                           isl_surf_usage_flags_t usage)
894{
895   /* Sandy bridge needs a special layout for HiZ and stencil. */
896   if (ISL_GFX_VER(dev) == 6 &&
897       (tiling == ISL_TILING_W || tiling == ISL_TILING_HIZ))
898      return ISL_DIM_LAYOUT_GFX6_STENCIL_HIZ;
899
900   if (ISL_GFX_VER(dev) >= 9) {
901      switch (logical_dim) {
902      case ISL_SURF_DIM_1D:
903         /* From the Sky Lake PRM Vol. 5, "1D Surfaces":
904          *
905          *    One-dimensional surfaces use a tiling mode of linear.
906          *    Technically, they are not tiled resources, but the Tiled
907          *    Resource Mode field in RENDER_SURFACE_STATE is still used to
908          *    indicate the alignment requirements for this linear surface
909          *    (See 1D Alignment requirements for how 4K and 64KB Tiled
910          *    Resource Modes impact alignment). Alternatively, a 1D surface
911          *    can be defined as a 2D tiled surface (e.g. TileY or TileX) with
912          *    a height of 0.
913          *
914          * In other words, ISL_DIM_LAYOUT_GFX9_1D is only used for linear
915          * surfaces and, for tiled surfaces, ISL_DIM_LAYOUT_GFX4_2D is used.
916          */
917         if (tiling == ISL_TILING_LINEAR)
918            return ISL_DIM_LAYOUT_GFX9_1D;
919         else
920            return ISL_DIM_LAYOUT_GFX4_2D;
921      case ISL_SURF_DIM_2D:
922      case ISL_SURF_DIM_3D:
923         return ISL_DIM_LAYOUT_GFX4_2D;
924      }
925   } else {
926      switch (logical_dim) {
927      case ISL_SURF_DIM_1D:
928      case ISL_SURF_DIM_2D:
929         /* From the G45 PRM Vol. 1a, "6.17.4.1 Hardware Cube Map Layout":
930          *
931          * The cube face textures are stored in the same way as 3D surfaces
932          * are stored (see section 6.17.5 for details).  For cube surfaces,
933          * however, the depth is equal to the number of faces (always 6) and
934          * is not reduced for each MIP.
935          */
936         if (ISL_GFX_VER(dev) == 4 && (usage & ISL_SURF_USAGE_CUBE_BIT))
937            return ISL_DIM_LAYOUT_GFX4_3D;
938
939         return ISL_DIM_LAYOUT_GFX4_2D;
940      case ISL_SURF_DIM_3D:
941         return ISL_DIM_LAYOUT_GFX4_3D;
942      }
943   }
944
945   unreachable("bad isl_surf_dim");
946   return ISL_DIM_LAYOUT_GFX4_2D;
947}
948
949/**
950 * Calculate the physical extent of the surface's first level, in units of
951 * surface samples.
952 */
953static void
954isl_calc_phys_level0_extent_sa(const struct isl_device *dev,
955                               const struct isl_surf_init_info *restrict info,
956                               enum isl_dim_layout dim_layout,
957                               enum isl_tiling tiling,
958                               enum isl_msaa_layout msaa_layout,
959                               struct isl_extent4d *phys_level0_sa)
960{
961   const struct isl_format_layout *fmtl = isl_format_get_layout(info->format);
962
963   if (isl_format_is_planar(info->format))
964      unreachable("Planar formats unsupported");
965
966   switch (info->dim) {
967   case ISL_SURF_DIM_1D:
968      assert(info->height == 1);
969      assert(info->depth == 1);
970      assert(info->samples == 1);
971
972      switch (dim_layout) {
973      case ISL_DIM_LAYOUT_GFX4_3D:
974         unreachable("bad isl_dim_layout");
975
976      case ISL_DIM_LAYOUT_GFX9_1D:
977      case ISL_DIM_LAYOUT_GFX4_2D:
978      case ISL_DIM_LAYOUT_GFX6_STENCIL_HIZ:
979         *phys_level0_sa = (struct isl_extent4d) {
980            .w = info->width,
981            .h = 1,
982            .d = 1,
983            .a = info->array_len,
984         };
985         break;
986      }
987      break;
988
989   case ISL_SURF_DIM_2D:
990      if (ISL_GFX_VER(dev) == 4 && (info->usage & ISL_SURF_USAGE_CUBE_BIT))
991         assert(dim_layout == ISL_DIM_LAYOUT_GFX4_3D);
992      else
993         assert(dim_layout == ISL_DIM_LAYOUT_GFX4_2D ||
994                dim_layout == ISL_DIM_LAYOUT_GFX6_STENCIL_HIZ);
995
996      if (tiling == ISL_TILING_Ys && info->samples > 1)
997         isl_finishme("%s:%s: multisample TileYs layout", __FILE__, __func__);
998
999      switch (msaa_layout) {
1000      case ISL_MSAA_LAYOUT_NONE:
1001         assert(info->depth == 1);
1002         assert(info->samples == 1);
1003
1004         *phys_level0_sa = (struct isl_extent4d) {
1005            .w = info->width,
1006            .h = info->height,
1007            .d = 1,
1008            .a = info->array_len,
1009         };
1010         break;
1011
1012      case ISL_MSAA_LAYOUT_ARRAY:
1013         assert(info->depth == 1);
1014         assert(info->levels == 1);
1015         assert(isl_format_supports_multisampling(dev->info, info->format));
1016         assert(fmtl->bw == 1 && fmtl->bh == 1);
1017
1018         *phys_level0_sa = (struct isl_extent4d) {
1019            .w = info->width,
1020            .h = info->height,
1021            .d = 1,
1022            .a = info->array_len * info->samples,
1023         };
1024         break;
1025
1026      case ISL_MSAA_LAYOUT_INTERLEAVED:
1027         assert(info->depth == 1);
1028         assert(info->levels == 1);
1029         assert(isl_format_supports_multisampling(dev->info, info->format));
1030
1031         *phys_level0_sa = (struct isl_extent4d) {
1032            .w = info->width,
1033            .h = info->height,
1034            .d = 1,
1035            .a = info->array_len,
1036         };
1037
1038         isl_msaa_interleaved_scale_px_to_sa(info->samples,
1039                                             &phys_level0_sa->w,
1040                                             &phys_level0_sa->h);
1041         break;
1042      }
1043      break;
1044
1045   case ISL_SURF_DIM_3D:
1046      assert(info->array_len == 1);
1047      assert(info->samples == 1);
1048
1049      if (fmtl->bd > 1) {
1050         isl_finishme("%s:%s: compression block with depth > 1",
1051                      __FILE__, __func__);
1052      }
1053
1054      switch (dim_layout) {
1055      case ISL_DIM_LAYOUT_GFX9_1D:
1056      case ISL_DIM_LAYOUT_GFX6_STENCIL_HIZ:
1057         unreachable("bad isl_dim_layout");
1058
1059      case ISL_DIM_LAYOUT_GFX4_2D:
1060         assert(ISL_GFX_VER(dev) >= 9);
1061
1062         *phys_level0_sa = (struct isl_extent4d) {
1063            .w = info->width,
1064            .h = info->height,
1065            .d = 1,
1066            .a = info->depth,
1067         };
1068         break;
1069
1070      case ISL_DIM_LAYOUT_GFX4_3D:
1071         assert(ISL_GFX_VER(dev) < 9);
1072         *phys_level0_sa = (struct isl_extent4d) {
1073            .w = info->width,
1074            .h = info->height,
1075            .d = info->depth,
1076            .a = 1,
1077         };
1078         break;
1079      }
1080      break;
1081   }
1082}
1083
1084/**
1085 * Calculate the pitch between physical array slices, in units of rows of
1086 * surface elements.
1087 */
1088static uint32_t
1089isl_calc_array_pitch_el_rows_gfx4_2d(
1090      const struct isl_device *dev,
1091      const struct isl_surf_init_info *restrict info,
1092      const struct isl_tile_info *tile_info,
1093      const struct isl_extent3d *image_align_sa,
1094      const struct isl_extent4d *phys_level0_sa,
1095      enum isl_array_pitch_span array_pitch_span,
1096      const struct isl_extent2d *phys_slice0_sa)
1097{
1098   const struct isl_format_layout *fmtl = isl_format_get_layout(info->format);
1099   uint32_t pitch_sa_rows = 0;
1100
1101   switch (array_pitch_span) {
1102   case ISL_ARRAY_PITCH_SPAN_COMPACT:
1103      pitch_sa_rows = isl_align_npot(phys_slice0_sa->h, image_align_sa->h);
1104      break;
1105   case ISL_ARRAY_PITCH_SPAN_FULL: {
1106      /* The QPitch equation is found in the Broadwell PRM >> Volume 5:
1107       * Memory Views >> Common Surface Formats >> Surface Layout >> 2D
1108       * Surfaces >> Surface Arrays.
1109       */
1110      uint32_t H0_sa = phys_level0_sa->h;
1111      uint32_t H1_sa = isl_minify(H0_sa, 1);
1112
1113      uint32_t h0_sa = isl_align_npot(H0_sa, image_align_sa->h);
1114      uint32_t h1_sa = isl_align_npot(H1_sa, image_align_sa->h);
1115
1116      uint32_t m;
1117      if (ISL_GFX_VER(dev) >= 7) {
1118         /* The QPitch equation changed slightly in Ivybridge. */
1119         m = 12;
1120      } else {
1121         m = 11;
1122      }
1123
1124      pitch_sa_rows = h0_sa + h1_sa + (m * image_align_sa->h);
1125
1126      if (ISL_GFX_VER(dev) == 6 && info->samples > 1 &&
1127          (info->height % 4 == 1)) {
1128         /* [SNB] Errata from the Sandy Bridge PRM >> Volume 4 Part 1:
1129          * Graphics Core >> Section 7.18.3.7: Surface Arrays:
1130          *
1131          *    [SNB] Errata: Sampler MSAA Qpitch will be 4 greater than
1132          *    the value calculated in the equation above , for every
1133          *    other odd Surface Height starting from 1 i.e. 1,5,9,13.
1134          *
1135          * XXX(chadv): Is the errata natural corollary of the physical
1136          * layout of interleaved samples?
1137          */
1138         pitch_sa_rows += 4;
1139      }
1140
1141      pitch_sa_rows = isl_align_npot(pitch_sa_rows, fmtl->bh);
1142      } /* end case */
1143      break;
1144   }
1145
1146   assert(pitch_sa_rows % fmtl->bh == 0);
1147   uint32_t pitch_el_rows = pitch_sa_rows / fmtl->bh;
1148
1149   if (ISL_GFX_VER(dev) >= 9 && ISL_GFX_VER(dev) <= 11 &&
1150       fmtl->txc == ISL_TXC_CCS) {
1151      /*
1152       * From the Sky Lake PRM Vol 7, "MCS Buffer for Render Target(s)" (p. 632):
1153       *
1154       *    "Mip-mapped and arrayed surfaces are supported with MCS buffer
1155       *    layout with these alignments in the RT space: Horizontal
1156       *    Alignment = 128 and Vertical Alignment = 64."
1157       *
1158       * From the Sky Lake PRM Vol. 2d, "RENDER_SURFACE_STATE" (p. 435):
1159       *
1160       *    "For non-multisampled render target's CCS auxiliary surface,
1161       *    QPitch must be computed with Horizontal Alignment = 128 and
1162       *    Surface Vertical Alignment = 256. These alignments are only for
1163       *    CCS buffer and not for associated render target."
1164       *
1165       * The first restriction is already handled by isl_choose_image_alignment_el
1166       * but the second restriction, which is an extension of the first, only
1167       * applies to qpitch and must be applied here.
1168       *
1169       * The second restriction disappears on Gfx12.
1170       */
1171      assert(fmtl->bh == 4);
1172      pitch_el_rows = isl_align(pitch_el_rows, 256 / 4);
1173   }
1174
1175   if (ISL_GFX_VER(dev) >= 9 &&
1176       info->dim == ISL_SURF_DIM_3D &&
1177       tile_info->tiling != ISL_TILING_LINEAR) {
1178      /* From the Skylake BSpec >> RENDER_SURFACE_STATE >> Surface QPitch:
1179       *
1180       *    Tile Mode != Linear: This field must be set to an integer multiple
1181       *    of the tile height
1182       */
1183      pitch_el_rows = isl_align(pitch_el_rows, tile_info->logical_extent_el.height);
1184   }
1185
1186   return pitch_el_rows;
1187}
1188
1189/**
1190 * A variant of isl_calc_phys_slice0_extent_sa() specific to
1191 * ISL_DIM_LAYOUT_GFX4_2D.
1192 */
1193static void
1194isl_calc_phys_slice0_extent_sa_gfx4_2d(
1195      const struct isl_device *dev,
1196      const struct isl_surf_init_info *restrict info,
1197      enum isl_msaa_layout msaa_layout,
1198      const struct isl_extent3d *image_align_sa,
1199      const struct isl_extent4d *phys_level0_sa,
1200      struct isl_extent2d *phys_slice0_sa)
1201{
1202   assert(phys_level0_sa->depth == 1);
1203
1204   if (info->levels == 1) {
1205      /* Do not pad the surface to the image alignment.
1206       *
1207       * For tiled surfaces, using a reduced alignment here avoids wasting CPU
1208       * cycles on the below mipmap layout caluclations. Reducing the
1209       * alignment here is safe because we later align the row pitch and array
1210       * pitch to the tile boundary. It is safe even for
1211       * ISL_MSAA_LAYOUT_INTERLEAVED, because phys_level0_sa is already scaled
1212       * to accomodate the interleaved samples.
1213       *
1214       * For linear surfaces, reducing the alignment here permits us to later
1215       * choose an arbitrary, non-aligned row pitch. If the surface backs
1216       * a VkBuffer, then an arbitrary pitch may be needed to accomodate
1217       * VkBufferImageCopy::bufferRowLength.
1218       */
1219      *phys_slice0_sa = (struct isl_extent2d) {
1220         .w = phys_level0_sa->w,
1221         .h = phys_level0_sa->h,
1222      };
1223      return;
1224   }
1225
1226   uint32_t slice_top_w = 0;
1227   uint32_t slice_bottom_w = 0;
1228   uint32_t slice_left_h = 0;
1229   uint32_t slice_right_h = 0;
1230
1231   uint32_t W0 = phys_level0_sa->w;
1232   uint32_t H0 = phys_level0_sa->h;
1233
1234   for (uint32_t l = 0; l < info->levels; ++l) {
1235      uint32_t W = isl_minify(W0, l);
1236      uint32_t H = isl_minify(H0, l);
1237
1238      uint32_t w = isl_align_npot(W, image_align_sa->w);
1239      uint32_t h = isl_align_npot(H, image_align_sa->h);
1240
1241      if (l == 0) {
1242         slice_top_w = w;
1243         slice_left_h = h;
1244         slice_right_h = h;
1245      } else if (l == 1) {
1246         slice_bottom_w = w;
1247         slice_left_h += h;
1248      } else if (l == 2) {
1249         slice_bottom_w += w;
1250         slice_right_h += h;
1251      } else {
1252         slice_right_h += h;
1253      }
1254   }
1255
1256   *phys_slice0_sa = (struct isl_extent2d) {
1257      .w = MAX(slice_top_w, slice_bottom_w),
1258      .h = MAX(slice_left_h, slice_right_h),
1259   };
1260}
1261
1262static void
1263isl_calc_phys_total_extent_el_gfx4_2d(
1264      const struct isl_device *dev,
1265      const struct isl_surf_init_info *restrict info,
1266      const struct isl_tile_info *tile_info,
1267      enum isl_msaa_layout msaa_layout,
1268      const struct isl_extent3d *image_align_sa,
1269      const struct isl_extent4d *phys_level0_sa,
1270      enum isl_array_pitch_span array_pitch_span,
1271      uint32_t *array_pitch_el_rows,
1272      struct isl_extent4d *phys_total_el)
1273{
1274   const struct isl_format_layout *fmtl = isl_format_get_layout(info->format);
1275
1276   struct isl_extent2d phys_slice0_sa;
1277   isl_calc_phys_slice0_extent_sa_gfx4_2d(dev, info, msaa_layout,
1278                                          image_align_sa, phys_level0_sa,
1279                                          &phys_slice0_sa);
1280   *array_pitch_el_rows =
1281      isl_calc_array_pitch_el_rows_gfx4_2d(dev, info, tile_info,
1282                                           image_align_sa, phys_level0_sa,
1283                                           array_pitch_span,
1284                                           &phys_slice0_sa);
1285
1286   if (tile_info->tiling == ISL_TILING_64) {
1287      *phys_total_el = (struct isl_extent4d) {
1288         .w = isl_align_div_npot(phys_slice0_sa.w, fmtl->bw),
1289         .h = isl_align_div_npot(phys_slice0_sa.h, fmtl->bh),
1290         .d = isl_align_div_npot(phys_level0_sa->d, fmtl->bd),
1291         .a = phys_level0_sa->array_len,
1292      };
1293   } else {
1294      *phys_total_el = (struct isl_extent4d) {
1295         .w = isl_align_div_npot(phys_slice0_sa.w, fmtl->bw),
1296         .h = *array_pitch_el_rows * (phys_level0_sa->array_len - 1) +
1297              isl_align_div_npot(phys_slice0_sa.h, fmtl->bh),
1298         .d = 1,
1299         .a = 1,
1300      };
1301   }
1302}
1303
1304/**
1305 * A variant of isl_calc_phys_slice0_extent_sa() specific to
1306 * ISL_DIM_LAYOUT_GFX4_3D.
1307 */
1308static void
1309isl_calc_phys_total_extent_el_gfx4_3d(
1310      const struct isl_device *dev,
1311      const struct isl_surf_init_info *restrict info,
1312      const struct isl_extent3d *image_align_sa,
1313      const struct isl_extent4d *phys_level0_sa,
1314      uint32_t *array_pitch_el_rows,
1315      struct isl_extent4d *phys_total_el)
1316{
1317   const struct isl_format_layout *fmtl = isl_format_get_layout(info->format);
1318
1319   assert(info->samples == 1);
1320
1321   if (info->dim != ISL_SURF_DIM_3D) {
1322      /* From the G45 PRM Vol. 1a, "6.17.4.1 Hardware Cube Map Layout":
1323       *
1324       * The cube face textures are stored in the same way as 3D surfaces
1325       * are stored (see section 6.17.5 for details).  For cube surfaces,
1326       * however, the depth is equal to the number of faces (always 6) and
1327       * is not reduced for each MIP.
1328       */
1329      assert(ISL_GFX_VER(dev) == 4);
1330      assert(info->usage & ISL_SURF_USAGE_CUBE_BIT);
1331      assert(phys_level0_sa->array_len == 6);
1332   } else {
1333      assert(phys_level0_sa->array_len == 1);
1334   }
1335
1336   uint32_t total_w = 0;
1337   uint32_t total_h = 0;
1338
1339   uint32_t W0 = phys_level0_sa->w;
1340   uint32_t H0 = phys_level0_sa->h;
1341   uint32_t D0 = phys_level0_sa->d;
1342   uint32_t A0 = phys_level0_sa->a;
1343
1344   for (uint32_t l = 0; l < info->levels; ++l) {
1345      uint32_t level_w = isl_align_npot(isl_minify(W0, l), image_align_sa->w);
1346      uint32_t level_h = isl_align_npot(isl_minify(H0, l), image_align_sa->h);
1347      uint32_t level_d = info->dim == ISL_SURF_DIM_3D ? isl_minify(D0, l) : A0;
1348
1349      uint32_t max_layers_horiz = MIN(level_d, 1u << l);
1350      uint32_t max_layers_vert = isl_align(level_d, 1u << l) / (1u << l);
1351
1352      total_w = MAX(total_w, level_w * max_layers_horiz);
1353      total_h += level_h * max_layers_vert;
1354   }
1355
1356   /* GFX4_3D layouts don't really have an array pitch since each LOD has a
1357    * different number of horizontal and vertical layers.  We have to set it
1358    * to something, so at least make it true for LOD0.
1359    */
1360   *array_pitch_el_rows =
1361      isl_align_npot(phys_level0_sa->h, image_align_sa->h) / fmtl->bw;
1362   *phys_total_el = (struct isl_extent4d) {
1363      .w = isl_assert_div(total_w, fmtl->bw),
1364      .h = isl_assert_div(total_h, fmtl->bh),
1365      .d = 1,
1366      .a = 1,
1367   };
1368}
1369
1370/**
1371 * A variant of isl_calc_phys_slice0_extent_sa() specific to
1372 * ISL_DIM_LAYOUT_GFX6_STENCIL_HIZ.
1373 */
1374static void
1375isl_calc_phys_total_extent_el_gfx6_stencil_hiz(
1376      const struct isl_device *dev,
1377      const struct isl_surf_init_info *restrict info,
1378      const struct isl_tile_info *tile_info,
1379      const struct isl_extent3d *image_align_sa,
1380      const struct isl_extent4d *phys_level0_sa,
1381      uint32_t *array_pitch_el_rows,
1382      struct isl_extent4d *phys_total_el)
1383{
1384   const struct isl_format_layout *fmtl = isl_format_get_layout(info->format);
1385
1386   const struct isl_extent2d tile_extent_sa = {
1387      .w = tile_info->logical_extent_el.w * fmtl->bw,
1388      .h = tile_info->logical_extent_el.h * fmtl->bh,
1389   };
1390   /* Tile size is a multiple of image alignment */
1391   assert(tile_extent_sa.w % image_align_sa->w == 0);
1392   assert(tile_extent_sa.h % image_align_sa->h == 0);
1393
1394   const uint32_t W0 = phys_level0_sa->w;
1395   const uint32_t H0 = phys_level0_sa->h;
1396
1397   /* Each image has the same height as LOD0 because the hardware thinks
1398    * everything is LOD0
1399    */
1400   const uint32_t H = isl_align(H0, image_align_sa->h) * phys_level0_sa->a;
1401
1402   uint32_t total_top_w = 0;
1403   uint32_t total_bottom_w = 0;
1404   uint32_t total_h = 0;
1405
1406   for (uint32_t l = 0; l < info->levels; ++l) {
1407      const uint32_t W = isl_minify(W0, l);
1408
1409      const uint32_t w = isl_align(W, tile_extent_sa.w);
1410      const uint32_t h = isl_align(H, tile_extent_sa.h);
1411
1412      if (l == 0) {
1413         total_top_w = w;
1414         total_h = h;
1415      } else if (l == 1) {
1416         total_bottom_w = w;
1417         total_h += h;
1418      } else {
1419         total_bottom_w += w;
1420      }
1421   }
1422
1423   *array_pitch_el_rows =
1424      isl_assert_div(isl_align(H0, image_align_sa->h), fmtl->bh);
1425   *phys_total_el = (struct isl_extent4d) {
1426      .w = isl_assert_div(MAX(total_top_w, total_bottom_w), fmtl->bw),
1427      .h = isl_assert_div(total_h, fmtl->bh),
1428      .d = 1,
1429      .a = 1,
1430   };
1431}
1432
1433/**
1434 * A variant of isl_calc_phys_slice0_extent_sa() specific to
1435 * ISL_DIM_LAYOUT_GFX9_1D.
1436 */
1437static void
1438isl_calc_phys_total_extent_el_gfx9_1d(
1439      const struct isl_device *dev,
1440      const struct isl_surf_init_info *restrict info,
1441      const struct isl_extent3d *image_align_sa,
1442      const struct isl_extent4d *phys_level0_sa,
1443      uint32_t *array_pitch_el_rows,
1444      struct isl_extent4d *phys_total_el)
1445{
1446   const struct isl_format_layout *fmtl = isl_format_get_layout(info->format);
1447
1448   assert(phys_level0_sa->height == 1);
1449   assert(phys_level0_sa->depth == 1);
1450   assert(info->samples == 1);
1451   assert(image_align_sa->w >= fmtl->bw);
1452
1453   uint32_t slice_w = 0;
1454   const uint32_t W0 = phys_level0_sa->w;
1455
1456   for (uint32_t l = 0; l < info->levels; ++l) {
1457      uint32_t W = isl_minify(W0, l);
1458      uint32_t w = isl_align_npot(W, image_align_sa->w);
1459
1460      slice_w += w;
1461   }
1462
1463   *array_pitch_el_rows = 1;
1464   *phys_total_el = (struct isl_extent4d) {
1465      .w = isl_assert_div(slice_w, fmtl->bw),
1466      .h = phys_level0_sa->array_len,
1467      .d = 1,
1468      .a = 1,
1469   };
1470}
1471
1472/**
1473 * Calculate the two-dimensional total physical extent of the surface, in
1474 * units of surface elements.
1475 */
1476static void
1477isl_calc_phys_total_extent_el(const struct isl_device *dev,
1478                              const struct isl_surf_init_info *restrict info,
1479                              const struct isl_tile_info *tile_info,
1480                              enum isl_dim_layout dim_layout,
1481                              enum isl_msaa_layout msaa_layout,
1482                              const struct isl_extent3d *image_align_sa,
1483                              const struct isl_extent4d *phys_level0_sa,
1484                              enum isl_array_pitch_span array_pitch_span,
1485                              uint32_t *array_pitch_el_rows,
1486                              struct isl_extent4d *phys_total_el)
1487{
1488   switch (dim_layout) {
1489   case ISL_DIM_LAYOUT_GFX9_1D:
1490      assert(array_pitch_span == ISL_ARRAY_PITCH_SPAN_COMPACT);
1491      isl_calc_phys_total_extent_el_gfx9_1d(dev, info,
1492                                            image_align_sa, phys_level0_sa,
1493                                            array_pitch_el_rows,
1494                                            phys_total_el);
1495      return;
1496   case ISL_DIM_LAYOUT_GFX4_2D:
1497      isl_calc_phys_total_extent_el_gfx4_2d(dev, info, tile_info, msaa_layout,
1498                                            image_align_sa, phys_level0_sa,
1499                                            array_pitch_span,
1500                                            array_pitch_el_rows,
1501                                            phys_total_el);
1502      return;
1503   case ISL_DIM_LAYOUT_GFX6_STENCIL_HIZ:
1504      assert(array_pitch_span == ISL_ARRAY_PITCH_SPAN_COMPACT);
1505      isl_calc_phys_total_extent_el_gfx6_stencil_hiz(dev, info, tile_info,
1506                                                     image_align_sa,
1507                                                     phys_level0_sa,
1508                                                     array_pitch_el_rows,
1509                                                     phys_total_el);
1510      return;
1511   case ISL_DIM_LAYOUT_GFX4_3D:
1512      assert(array_pitch_span == ISL_ARRAY_PITCH_SPAN_COMPACT);
1513      isl_calc_phys_total_extent_el_gfx4_3d(dev, info,
1514                                            image_align_sa, phys_level0_sa,
1515                                            array_pitch_el_rows,
1516                                            phys_total_el);
1517      return;
1518   }
1519
1520   unreachable("invalid value for dim_layout");
1521}
1522
1523static uint32_t
1524isl_calc_row_pitch_alignment(const struct isl_device *dev,
1525                             const struct isl_surf_init_info *surf_info,
1526                             const struct isl_tile_info *tile_info)
1527{
1528   if (tile_info->tiling != ISL_TILING_LINEAR) {
1529      /* According to BSpec: 44930, Gfx12's CCS-compressed surface pitches must
1530       * be 512B-aligned. CCS is only support on Y tilings.
1531       *
1532       * Only consider 512B alignment when :
1533       *    - AUX is not explicitly disabled
1534       *    - the caller has specified no pitch
1535       *
1536       * isl_surf_get_ccs_surf() will check that the main surface alignment
1537       * matches CCS expectations.
1538       */
1539      if (ISL_GFX_VER(dev) >= 12 &&
1540          isl_format_supports_ccs_e(dev->info, surf_info->format) &&
1541          tile_info->tiling != ISL_TILING_X &&
1542          !(surf_info->usage & ISL_SURF_USAGE_DISABLE_AUX_BIT) &&
1543          surf_info->row_pitch_B == 0) {
1544         return isl_align(tile_info->phys_extent_B.width, 512);
1545      }
1546
1547      return tile_info->phys_extent_B.width;
1548   }
1549
1550   /* From the Broadwel PRM >> Volume 2d: Command Reference: Structures >>
1551    * RENDER_SURFACE_STATE Surface Pitch (p349):
1552    *
1553    *    - For linear render target surfaces and surfaces accessed with the
1554    *      typed data port messages, the pitch must be a multiple of the
1555    *      element size for non-YUV surface formats.  Pitch must be
1556    *      a multiple of 2 * element size for YUV surface formats.
1557    *
1558    *    - [Requirements for SURFTYPE_BUFFER and SURFTYPE_STRBUF, which we
1559    *      ignore because isl doesn't do buffers.]
1560    *
1561    *    - For other linear surfaces, the pitch can be any multiple of
1562    *      bytes.
1563    */
1564   const struct isl_format_layout *fmtl = isl_format_get_layout(surf_info->format);
1565   const uint32_t bs = fmtl->bpb / 8;
1566   uint32_t alignment;
1567
1568   if (surf_info->usage & ISL_SURF_USAGE_RENDER_TARGET_BIT) {
1569      if (isl_format_is_yuv(surf_info->format)) {
1570         alignment = 2 * bs;
1571      } else  {
1572         alignment = bs;
1573      }
1574   } else {
1575      alignment = 1;
1576   }
1577
1578   /* From the Broadwell PRM >> Volume 2c: Command Reference: Registers >>
1579    * PRI_STRIDE Stride (p1254):
1580    *
1581    *    "When using linear memory, this must be at least 64 byte aligned."
1582    *
1583    * However, when displaying on NVIDIA and recent AMD GPUs via PRIME,
1584    * we need a larger pitch of 256 bytes.
1585    *
1586    * If the ISL caller didn't specify a row_pitch_B, then we should assume
1587    * the NVIDIA/AMD requirements. Otherwise, if we have a specified
1588    * row_pitch_B, this is probably because the caller is trying to import a
1589    * buffer. In that case we limit the minimum row pitch to the Intel HW
1590    * requirement.
1591    */
1592   if (surf_info->usage & ISL_SURF_USAGE_DISPLAY_BIT) {
1593      if (surf_info->row_pitch_B == 0)
1594         alignment = isl_align(alignment, 256);
1595      else
1596         alignment = isl_align(alignment, 64);
1597   }
1598
1599   return alignment;
1600}
1601
1602static uint32_t
1603isl_calc_linear_min_row_pitch(const struct isl_device *dev,
1604                              const struct isl_surf_init_info *info,
1605                              const struct isl_extent4d *phys_total_el,
1606                              uint32_t alignment_B)
1607{
1608   const struct isl_format_layout *fmtl = isl_format_get_layout(info->format);
1609   const uint32_t bs = fmtl->bpb / 8;
1610
1611   return isl_align_npot(bs * phys_total_el->w, alignment_B);
1612}
1613
1614static uint32_t
1615isl_calc_tiled_min_row_pitch(const struct isl_device *dev,
1616                             const struct isl_surf_init_info *surf_info,
1617                             const struct isl_tile_info *tile_info,
1618                             const struct isl_extent4d *phys_total_el,
1619                             uint32_t alignment_B)
1620{
1621   const struct isl_format_layout *fmtl = isl_format_get_layout(surf_info->format);
1622
1623   assert(fmtl->bpb % tile_info->format_bpb == 0);
1624
1625   const uint32_t tile_el_scale = fmtl->bpb / tile_info->format_bpb;
1626   const uint32_t total_w_tl =
1627      isl_align_div(phys_total_el->w * tile_el_scale,
1628                    tile_info->logical_extent_el.width);
1629
1630   /* In some cases the alignment of the pitch might be > to the tile size
1631    * (for example Gfx12 CCS requires 512B alignment while the tile's width
1632    * can be 128B), so align the row pitch to the alignment.
1633    */
1634   assert(alignment_B >= tile_info->phys_extent_B.width);
1635   return isl_align(total_w_tl * tile_info->phys_extent_B.width, alignment_B);
1636}
1637
1638static uint32_t
1639isl_calc_min_row_pitch(const struct isl_device *dev,
1640                       const struct isl_surf_init_info *surf_info,
1641                       const struct isl_tile_info *tile_info,
1642                       const struct isl_extent4d *phys_total_el,
1643                       uint32_t alignment_B)
1644{
1645   if (tile_info->tiling == ISL_TILING_LINEAR) {
1646      return isl_calc_linear_min_row_pitch(dev, surf_info, phys_total_el,
1647                                           alignment_B);
1648   } else {
1649      return isl_calc_tiled_min_row_pitch(dev, surf_info, tile_info,
1650                                          phys_total_el, alignment_B);
1651   }
1652}
1653
1654/**
1655 * Is `pitch` in the valid range for a hardware bitfield, if the bitfield's
1656 * size is `bits` bits?
1657 *
1658 * Hardware pitch fields are offset by 1. For example, if the size of
1659 * RENDER_SURFACE_STATE::SurfacePitch is B bits, then the range of valid
1660 * pitches is [1, 2^b] inclusive.  If the surface pitch is N, then
1661 * RENDER_SURFACE_STATE::SurfacePitch must be set to N-1.
1662 */
1663static bool
1664pitch_in_range(uint32_t n, uint32_t bits)
1665{
1666   assert(n != 0);
1667   return likely(bits != 0 && 1 <= n && n <= (1 << bits));
1668}
1669
1670static bool
1671isl_calc_row_pitch(const struct isl_device *dev,
1672                   const struct isl_surf_init_info *surf_info,
1673                   const struct isl_tile_info *tile_info,
1674                   enum isl_dim_layout dim_layout,
1675                   const struct isl_extent4d *phys_total_el,
1676                   uint32_t *out_row_pitch_B)
1677{
1678   uint32_t alignment_B =
1679      isl_calc_row_pitch_alignment(dev, surf_info, tile_info);
1680
1681   const uint32_t min_row_pitch_B =
1682      isl_calc_min_row_pitch(dev, surf_info, tile_info, phys_total_el,
1683                             alignment_B);
1684
1685   if (surf_info->row_pitch_B != 0) {
1686      if (surf_info->row_pitch_B < min_row_pitch_B)
1687         return false;
1688
1689      if (surf_info->row_pitch_B % alignment_B != 0)
1690         return false;
1691   }
1692
1693   const uint32_t row_pitch_B =
1694      surf_info->row_pitch_B != 0 ? surf_info->row_pitch_B : min_row_pitch_B;
1695
1696   const uint32_t row_pitch_tl = row_pitch_B / tile_info->phys_extent_B.width;
1697
1698   if (row_pitch_B == 0)
1699      return false;
1700
1701   if (dim_layout == ISL_DIM_LAYOUT_GFX9_1D) {
1702      /* SurfacePitch is ignored for this layout. */
1703      goto done;
1704   }
1705
1706   if ((surf_info->usage & (ISL_SURF_USAGE_RENDER_TARGET_BIT |
1707                            ISL_SURF_USAGE_TEXTURE_BIT |
1708                            ISL_SURF_USAGE_STORAGE_BIT)) &&
1709       !pitch_in_range(row_pitch_B, RENDER_SURFACE_STATE_SurfacePitch_bits(dev->info)))
1710      return false;
1711
1712   if ((surf_info->usage & (ISL_SURF_USAGE_CCS_BIT |
1713                            ISL_SURF_USAGE_MCS_BIT)) &&
1714       !pitch_in_range(row_pitch_tl, RENDER_SURFACE_STATE_AuxiliarySurfacePitch_bits(dev->info)))
1715      return false;
1716
1717   if ((surf_info->usage & ISL_SURF_USAGE_DEPTH_BIT) &&
1718       !pitch_in_range(row_pitch_B, _3DSTATE_DEPTH_BUFFER_SurfacePitch_bits(dev->info)))
1719      return false;
1720
1721   if ((surf_info->usage & ISL_SURF_USAGE_HIZ_BIT) &&
1722       !pitch_in_range(row_pitch_B, _3DSTATE_HIER_DEPTH_BUFFER_SurfacePitch_bits(dev->info)))
1723      return false;
1724
1725   const uint32_t stencil_pitch_bits = dev->use_separate_stencil ?
1726      _3DSTATE_STENCIL_BUFFER_SurfacePitch_bits(dev->info) :
1727      _3DSTATE_DEPTH_BUFFER_SurfacePitch_bits(dev->info);
1728
1729   if ((surf_info->usage & ISL_SURF_USAGE_STENCIL_BIT) &&
1730       !pitch_in_range(row_pitch_B, stencil_pitch_bits))
1731      return false;
1732
1733 done:
1734   *out_row_pitch_B = row_pitch_B;
1735   return true;
1736}
1737
1738bool
1739isl_surf_init_s(const struct isl_device *dev,
1740                struct isl_surf *surf,
1741                const struct isl_surf_init_info *restrict info)
1742{
1743   const struct isl_format_layout *fmtl = isl_format_get_layout(info->format);
1744
1745   const struct isl_extent4d logical_level0_px = {
1746      .w = info->width,
1747      .h = info->height,
1748      .d = info->depth,
1749      .a = info->array_len,
1750   };
1751
1752   enum isl_tiling tiling;
1753   if (!isl_surf_choose_tiling(dev, info, &tiling))
1754      return false;
1755
1756   const enum isl_dim_layout dim_layout =
1757      isl_surf_choose_dim_layout(dev, info->dim, tiling, info->usage);
1758
1759   enum isl_msaa_layout msaa_layout;
1760   if (!isl_choose_msaa_layout(dev, info, tiling, &msaa_layout))
1761       return false;
1762
1763   struct isl_tile_info tile_info;
1764   isl_tiling_get_info(tiling, info->dim, msaa_layout, fmtl->bpb,
1765                       info->samples, &tile_info);
1766
1767   struct isl_extent3d image_align_el;
1768   isl_choose_image_alignment_el(dev, info, tiling, dim_layout, msaa_layout,
1769                                 &image_align_el);
1770
1771   struct isl_extent3d image_align_sa =
1772      isl_extent3d_el_to_sa(info->format, image_align_el);
1773
1774   struct isl_extent4d phys_level0_sa;
1775   isl_calc_phys_level0_extent_sa(dev, info, dim_layout, tiling, msaa_layout,
1776                                  &phys_level0_sa);
1777
1778   enum isl_array_pitch_span array_pitch_span =
1779      isl_choose_array_pitch_span(dev, info, dim_layout, &phys_level0_sa);
1780
1781   uint32_t array_pitch_el_rows;
1782   struct isl_extent4d phys_total_el;
1783   isl_calc_phys_total_extent_el(dev, info, &tile_info,
1784                                 dim_layout, msaa_layout,
1785                                 &image_align_sa, &phys_level0_sa,
1786                                 array_pitch_span, &array_pitch_el_rows,
1787                                 &phys_total_el);
1788
1789   uint32_t row_pitch_B;
1790   if (!isl_calc_row_pitch(dev, info, &tile_info, dim_layout,
1791                           &phys_total_el, &row_pitch_B))
1792      return false;
1793
1794   uint32_t base_alignment_B;
1795   uint64_t size_B;
1796   if (tiling == ISL_TILING_LINEAR) {
1797      /* LINEAR tiling has no concept of intra-tile arrays */
1798      assert(phys_total_el.d == 1 && phys_total_el.a == 1);
1799
1800      size_B = (uint64_t) row_pitch_B * phys_total_el.h;
1801
1802      /* From the Broadwell PRM Vol 2d, RENDER_SURFACE_STATE::SurfaceBaseAddress:
1803       *
1804       *    "The Base Address for linear render target surfaces and surfaces
1805       *    accessed with the typed surface read/write data port messages must
1806       *    be element-size aligned, for non-YUV surface formats, or a
1807       *    multiple of 2 element-sizes for YUV surface formats. Other linear
1808       *    surfaces have no alignment requirements (byte alignment is
1809       *    sufficient.)"
1810       */
1811      base_alignment_B = MAX(1, info->min_alignment_B);
1812      if (info->usage & ISL_SURF_USAGE_RENDER_TARGET_BIT) {
1813         if (isl_format_is_yuv(info->format)) {
1814            base_alignment_B = MAX(base_alignment_B, fmtl->bpb / 4);
1815         } else {
1816            base_alignment_B = MAX(base_alignment_B, fmtl->bpb / 8);
1817         }
1818      }
1819      base_alignment_B = isl_round_up_to_power_of_two(base_alignment_B);
1820
1821      /* From the Skylake PRM Vol 2c, PLANE_STRIDE::Stride:
1822       *
1823       *     "For Linear memory, this field specifies the stride in chunks of
1824       *     64 bytes (1 cache line)."
1825       */
1826      if (isl_surf_usage_is_display(info->usage))
1827         base_alignment_B = MAX(base_alignment_B, 64);
1828   } else {
1829      /* Pitches must make sense with the tiling */
1830      assert(row_pitch_B % tile_info.phys_extent_B.width == 0);
1831
1832      uint32_t array_slices, array_pitch_tl_rows;
1833      if (phys_total_el.d > 1) {
1834         assert(phys_total_el.a == 1);
1835         array_pitch_tl_rows = isl_assert_div(array_pitch_el_rows,
1836                                              tile_info.logical_extent_el.h);
1837         array_slices = isl_align_div(phys_total_el.d,
1838                                      tile_info.logical_extent_el.d);
1839      } else if (phys_total_el.a > 1) {
1840         assert(phys_total_el.d == 1);
1841         array_pitch_tl_rows = isl_assert_div(array_pitch_el_rows,
1842                                              tile_info.logical_extent_el.h);
1843         array_slices = isl_align_div(phys_total_el.a,
1844                                      tile_info.logical_extent_el.a);
1845      } else {
1846         assert(phys_total_el.d == 1 && phys_total_el.a == 1);
1847         array_pitch_tl_rows = 0;
1848         array_slices = 1;
1849      }
1850
1851      const uint32_t total_h_tl =
1852         (array_slices - 1) * array_pitch_tl_rows +
1853         isl_align_div(phys_total_el.h, tile_info.logical_extent_el.height);
1854
1855      size_B = (uint64_t) total_h_tl * tile_info.phys_extent_B.height * row_pitch_B;
1856
1857      const uint32_t tile_size_B = tile_info.phys_extent_B.width *
1858                                   tile_info.phys_extent_B.height;
1859      assert(isl_is_pow2(info->min_alignment_B) && isl_is_pow2(tile_size_B));
1860      base_alignment_B = MAX(info->min_alignment_B, tile_size_B);
1861
1862      /* The diagram in the Bspec section Memory Compression - Gfx12, shows
1863       * that the CCS is indexed in 256B chunks. However, the
1864       * PLANE_AUX_DIST::Auxiliary Surface Distance field is in units of 4K
1865       * pages. We currently don't assign the usage field like we do for main
1866       * surfaces, so just use 4K for now.
1867       */
1868      if (tiling == ISL_TILING_GFX12_CCS)
1869         base_alignment_B = MAX(base_alignment_B, 4096);
1870
1871      /* Gfx12+ requires that images be 64K-aligned if they're going to used
1872       * with CCS.  This is because the Aux translation table maps main
1873       * surface addresses to aux addresses at a 64K (in the main surface)
1874       * granularity.  Because we don't know for sure in ISL if a surface will
1875       * use CCS, we have to guess based on the DISABLE_AUX usage bit.  The
1876       * one thing we do know is that we haven't enable CCS on linear images
1877       * yet so we can avoid the extra alignment there.
1878       */
1879      if (ISL_GFX_VER(dev) >= 12 &&
1880          !(info->usage & ISL_SURF_USAGE_DISABLE_AUX_BIT)) {
1881         base_alignment_B = MAX(base_alignment_B, 64 * 1024);
1882      }
1883   }
1884
1885   if (ISL_GFX_VER(dev) < 9) {
1886      /* From the Broadwell PRM Vol 5, Surface Layout:
1887       *
1888       *    "In addition to restrictions on maximum height, width, and depth,
1889       *     surfaces are also restricted to a maximum size in bytes. This
1890       *     maximum is 2 GB for all products and all surface types."
1891       *
1892       * This comment is applicable to all Pre-gfx9 platforms.
1893       */
1894      if (size_B > (uint64_t) 1 << 31)
1895         return false;
1896   } else if (ISL_GFX_VER(dev) < 11) {
1897      /* From the Skylake PRM Vol 5, Maximum Surface Size in Bytes:
1898       *    "In addition to restrictions on maximum height, width, and depth,
1899       *     surfaces are also restricted to a maximum size of 2^38 bytes.
1900       *     All pixels within the surface must be contained within 2^38 bytes
1901       *     of the base address."
1902       */
1903      if (size_B > (uint64_t) 1 << 38)
1904         return false;
1905   } else {
1906      /* gfx11+ platforms raised this limit to 2^44 bytes. */
1907      if (size_B > (uint64_t) 1 << 44)
1908         return false;
1909   }
1910
1911   *surf = (struct isl_surf) {
1912      .dim = info->dim,
1913      .dim_layout = dim_layout,
1914      .msaa_layout = msaa_layout,
1915      .tiling = tiling,
1916      .format = info->format,
1917
1918      .levels = info->levels,
1919      .samples = info->samples,
1920
1921      .image_alignment_el = image_align_el,
1922      .logical_level0_px = logical_level0_px,
1923      .phys_level0_sa = phys_level0_sa,
1924
1925      .size_B = size_B,
1926      .alignment_B = base_alignment_B,
1927      .row_pitch_B = row_pitch_B,
1928      .array_pitch_el_rows = array_pitch_el_rows,
1929      .array_pitch_span = array_pitch_span,
1930
1931      .usage = info->usage,
1932   };
1933
1934   return true;
1935}
1936
1937void
1938isl_surf_get_tile_info(const struct isl_surf *surf,
1939                       struct isl_tile_info *tile_info)
1940{
1941   const struct isl_format_layout *fmtl = isl_format_get_layout(surf->format);
1942   isl_tiling_get_info(surf->tiling, surf->dim, surf->msaa_layout, fmtl->bpb,
1943                       surf->samples, tile_info);
1944}
1945
1946bool
1947isl_surf_get_hiz_surf(const struct isl_device *dev,
1948                      const struct isl_surf *surf,
1949                      struct isl_surf *hiz_surf)
1950{
1951   assert(ISL_GFX_VER(dev) >= 5 && ISL_DEV_USE_SEPARATE_STENCIL(dev));
1952
1953   if (!isl_surf_usage_is_depth(surf->usage))
1954      return false;
1955
1956   /* HiZ only works with Y-tiled depth buffers */
1957   if (!isl_tiling_is_any_y(surf->tiling))
1958      return false;
1959
1960   /* On SNB+, compressed depth buffers cannot be interleaved with stencil. */
1961   switch (surf->format) {
1962   case ISL_FORMAT_R24_UNORM_X8_TYPELESS:
1963      if (isl_surf_usage_is_depth_and_stencil(surf->usage)) {
1964         assert(ISL_GFX_VER(dev) == 5);
1965         unreachable("This should work, but is untested");
1966      }
1967      FALLTHROUGH;
1968   case ISL_FORMAT_R16_UNORM:
1969   case ISL_FORMAT_R32_FLOAT:
1970      break;
1971   case ISL_FORMAT_R32_FLOAT_X8X24_TYPELESS:
1972      if (ISL_GFX_VER(dev) == 5) {
1973         assert(isl_surf_usage_is_depth_and_stencil(surf->usage));
1974         unreachable("This should work, but is untested");
1975      }
1976      FALLTHROUGH;
1977   default:
1978      return false;
1979   }
1980
1981   /* Multisampled depth is always interleaved */
1982   assert(surf->msaa_layout == ISL_MSAA_LAYOUT_NONE ||
1983          surf->msaa_layout == ISL_MSAA_LAYOUT_INTERLEAVED);
1984
1985   /* From the Broadwell PRM Vol. 7, "Hierarchical Depth Buffer":
1986    *
1987    *    "The Surface Type, Height, Width, Depth, Minimum Array Element, Render
1988    *    Target View Extent, and Depth Coordinate Offset X/Y of the
1989    *    hierarchical depth buffer are inherited from the depth buffer. The
1990    *    height and width of the hierarchical depth buffer that must be
1991    *    allocated are computed by the following formulas, where HZ is the
1992    *    hierarchical depth buffer and Z is the depth buffer. The Z_Height,
1993    *    Z_Width, and Z_Depth values given in these formulas are those present
1994    *    in 3DSTATE_DEPTH_BUFFER incremented by one.
1995    *
1996    *    "The value of Z_Height and Z_Width must each be multiplied by 2 before
1997    *    being applied to the table below if Number of Multisamples is set to
1998    *    NUMSAMPLES_4. The value of Z_Height must be multiplied by 2 and
1999    *    Z_Width must be multiplied by 4 before being applied to the table
2000    *    below if Number of Multisamples is set to NUMSAMPLES_8."
2001    *
2002    * In the Sky Lake PRM, the second paragraph is replaced with this:
2003    *
2004    *    "The Z_Height and Z_Width values must equal those present in
2005    *    3DSTATE_DEPTH_BUFFER incremented by one."
2006    *
2007    * In other words, on Sandy Bridge through Broadwell, each 128-bit HiZ
2008    * block corresponds to a region of 8x4 samples in the primary depth
2009    * surface.  On Sky Lake, on the other hand, each HiZ block corresponds to
2010    * a region of 8x4 pixels in the primary depth surface regardless of the
2011    * number of samples.  The dimensions of a HiZ block in both pixels and
2012    * samples are given in the table below:
2013    *
2014    *                    | SNB - BDW |     SKL+
2015    *              ------+-----------+-------------
2016    *                1x  |  8 x 4 sa |   8 x 4 sa
2017    *               MSAA |  8 x 4 px |   8 x 4 px
2018    *              ------+-----------+-------------
2019    *                2x  |  8 x 4 sa |  16 x 4 sa
2020    *               MSAA |  4 x 4 px |   8 x 4 px
2021    *              ------+-----------+-------------
2022    *                4x  |  8 x 4 sa |  16 x 8 sa
2023    *               MSAA |  4 x 2 px |   8 x 4 px
2024    *              ------+-----------+-------------
2025    *                8x  |  8 x 4 sa |  32 x 8 sa
2026    *               MSAA |  2 x 2 px |   8 x 4 px
2027    *              ------+-----------+-------------
2028    *               16x  |    N/A    | 32 x 16 sa
2029    *               MSAA |    N/A    |  8 x  4 px
2030    *              ------+-----------+-------------
2031    *
2032    * There are a number of different ways that this discrepency could be
2033    * handled.  The way we have chosen is to simply make MSAA HiZ have the
2034    * same number of samples as the parent surface pre-Sky Lake and always be
2035    * single-sampled on Sky Lake and above.  Since the block sizes of
2036    * compressed formats are given in samples, this neatly handles everything
2037    * without the need for additional HiZ formats with different block sizes
2038    * on SKL+.
2039    */
2040   const unsigned samples = ISL_GFX_VER(dev) >= 9 ? 1 : surf->samples;
2041
2042   return isl_surf_init(dev, hiz_surf,
2043                        .dim = surf->dim,
2044                        .format = ISL_FORMAT_HIZ,
2045                        .width = surf->logical_level0_px.width,
2046                        .height = surf->logical_level0_px.height,
2047                        .depth = surf->logical_level0_px.depth,
2048                        .levels = surf->levels,
2049                        .array_len = surf->logical_level0_px.array_len,
2050                        .samples = samples,
2051                        .usage = ISL_SURF_USAGE_HIZ_BIT,
2052                        .tiling_flags = ISL_TILING_HIZ_BIT);
2053}
2054
2055bool
2056isl_surf_get_mcs_surf(const struct isl_device *dev,
2057                      const struct isl_surf *surf,
2058                      struct isl_surf *mcs_surf)
2059{
2060   /* It must be multisampled with an array layout */
2061   if (surf->msaa_layout != ISL_MSAA_LAYOUT_ARRAY)
2062      return false;
2063
2064   if (mcs_surf->size_B > 0)
2065      return false;
2066
2067   /* The following are true of all multisampled surfaces */
2068   assert(surf->samples > 1);
2069   assert(surf->dim == ISL_SURF_DIM_2D);
2070   assert(surf->levels == 1);
2071   assert(surf->logical_level0_px.depth == 1);
2072
2073   /* From the Ivy Bridge PRM, Vol4 Part1 p77 ("MCS Enable"):
2074    *
2075    *   This field must be set to 0 for all SINT MSRTs when all RT channels
2076    *   are not written
2077    *
2078    * In practice this means that we have to disable MCS for all signed
2079    * integer MSAA buffers.  The alternative, to disable MCS only when one
2080    * of the render target channels is disabled, is impractical because it
2081    * would require converting between CMS and UMS MSAA layouts on the fly,
2082    * which is expensive.
2083    */
2084   if (ISL_GFX_VER(dev) == 7 && isl_format_has_sint_channel(surf->format))
2085      return false;
2086
2087   /* The "Auxiliary Surface Pitch" field in RENDER_SURFACE_STATE is only 9
2088    * bits which means the maximum pitch of a compression surface is 512
2089    * tiles or 64KB (since MCS is always Y-tiled).  Since a 16x MCS buffer is
2090    * 64bpp, this gives us a maximum width of 8192 pixels.  We can create
2091    * larger multisampled surfaces, we just can't compress them.   For 2x, 4x,
2092    * and 8x, we have enough room for the full 16k supported by the hardware.
2093    */
2094   if (surf->samples == 16 && surf->logical_level0_px.width > 8192)
2095      return false;
2096
2097   enum isl_format mcs_format;
2098   switch (surf->samples) {
2099   case 2:  mcs_format = ISL_FORMAT_MCS_2X;  break;
2100   case 4:  mcs_format = ISL_FORMAT_MCS_4X;  break;
2101   case 8:  mcs_format = ISL_FORMAT_MCS_8X;  break;
2102   case 16: mcs_format = ISL_FORMAT_MCS_16X; break;
2103   default:
2104      unreachable("Invalid sample count");
2105   }
2106
2107   return isl_surf_init(dev, mcs_surf,
2108                        .dim = ISL_SURF_DIM_2D,
2109                        .format = mcs_format,
2110                        .width = surf->logical_level0_px.width,
2111                        .height = surf->logical_level0_px.height,
2112                        .depth = 1,
2113                        .levels = 1,
2114                        .array_len = surf->logical_level0_px.array_len,
2115                        .samples = 1, /* MCS surfaces are really single-sampled */
2116                        .usage = ISL_SURF_USAGE_MCS_BIT,
2117                        .tiling_flags = ISL_TILING_Y0_BIT);
2118}
2119
2120bool
2121isl_surf_supports_ccs(const struct isl_device *dev,
2122                      const struct isl_surf *surf,
2123                      const struct isl_surf *hiz_or_mcs_surf)
2124{
2125   /* CCS support does not exist prior to Gfx7 */
2126   if (ISL_GFX_VER(dev) <= 6)
2127      return false;
2128
2129   /* Wa_22011186057: Disable compression on ADL-P A0 */
2130   if (dev->info->is_alderlake && dev->info->gt == 2 &&
2131       dev->info->revision == 0)
2132      return false;
2133
2134   if (surf->usage & ISL_SURF_USAGE_DISABLE_AUX_BIT)
2135      return false;
2136
2137   if (isl_format_is_compressed(surf->format))
2138      return false;
2139
2140   if (!isl_is_pow2(isl_format_get_layout(surf->format)->bpb))
2141      return false;
2142
2143   /* From the Ivy Bridge PRM, Vol2 Part1 11.7 "MCS Buffer for Render
2144    * Target(s)", beneath the "Fast Color Clear" bullet (p326):
2145    *
2146    *     - Support is limited to tiled render targets.
2147    *
2148    * From the Skylake documentation, it is made clear that X-tiling is no
2149    * longer supported:
2150    *
2151    *     - MCS and Lossless compression is supported for
2152    *       TiledY/TileYs/TileYf non-MSRTs only.
2153    *
2154    * From the BSpec (44930) for Gfx12:
2155    *
2156    *    Linear CCS is only allowed for Untyped Buffers but only via HDC
2157    *    Data-Port messages.
2158    *
2159    * We never use untyped messages on surfaces created by ISL on Gfx9+ so
2160    * this means linear is out on Gfx12+ as well.
2161    */
2162   if (surf->tiling == ISL_TILING_LINEAR)
2163      return false;
2164
2165   if (ISL_GFX_VER(dev) >= 12) {
2166      if (isl_surf_usage_is_stencil(surf->usage)) {
2167         /* HiZ and MCS aren't allowed with stencil */
2168         assert(hiz_or_mcs_surf == NULL || hiz_or_mcs_surf->size_B == 0);
2169
2170         /* Multi-sampled stencil cannot have CCS */
2171         if (surf->samples > 1)
2172            return false;
2173      } else if (isl_surf_usage_is_depth(surf->usage)) {
2174         const struct isl_surf *hiz_surf = hiz_or_mcs_surf;
2175
2176         /* With depth surfaces, HIZ is required for CCS. */
2177         if (hiz_surf == NULL || hiz_surf->size_B == 0)
2178            return false;
2179
2180         assert(hiz_surf->usage & ISL_SURF_USAGE_HIZ_BIT);
2181         assert(hiz_surf->tiling == ISL_TILING_HIZ);
2182         assert(hiz_surf->format == ISL_FORMAT_HIZ);
2183      } else if (surf->samples > 1) {
2184         const struct isl_surf *mcs_surf = hiz_or_mcs_surf;
2185
2186         /* With multisampled color, CCS requires MCS */
2187         if (mcs_surf == NULL || mcs_surf->size_B == 0)
2188            return false;
2189
2190         assert(mcs_surf->usage & ISL_SURF_USAGE_MCS_BIT);
2191         assert(isl_tiling_is_any_y(mcs_surf->tiling));
2192         assert(isl_format_is_mcs(mcs_surf->format));
2193      } else {
2194         /* Single-sampled color can't have MCS or HiZ */
2195         assert(hiz_or_mcs_surf == NULL || hiz_or_mcs_surf->size_B == 0);
2196      }
2197
2198      /* On Gfx12, all CCS-compressed surface pitches must be multiples of
2199       * 512B.
2200       */
2201      if (surf->row_pitch_B % 512 != 0)
2202         return false;
2203
2204      /* According to Wa_1406738321, 3D textures need a blit to a new
2205       * surface in order to perform a resolve. For now, just disable CCS.
2206       */
2207      if (surf->dim == ISL_SURF_DIM_3D) {
2208         isl_finishme("%s:%s: CCS for 3D textures is disabled, but a workaround"
2209                      " is available.", __FILE__, __func__);
2210         return false;
2211      }
2212
2213      /* Wa_1207137018
2214       *
2215       * TODO: implement following workaround currently covered by the
2216       * restriction above. If following conditions are met:
2217       *
2218       *    - RENDER_SURFACE_STATE.Surface Type == 3D
2219       *    - RENDER_SURFACE_STATE.Auxiliary Surface Mode != AUX_NONE
2220       *    - RENDER_SURFACE_STATE.Tiled ResourceMode is TYF or TYS
2221       *
2222       * Set the value of RENDER_SURFACE_STATE.Mip Tail Start LOD to a mip
2223       * that larger than those present in the surface (i.e. 15)
2224       */
2225
2226      /* TODO: Handle the other tiling formats */
2227      if (surf->tiling != ISL_TILING_Y0)
2228         return false;
2229   } else {
2230      /* ISL_GFX_VER(dev) < 12 */
2231      if (surf->samples > 1)
2232         return false;
2233
2234      /* CCS is only for color images on Gfx7-11 */
2235      if (isl_surf_usage_is_depth_or_stencil(surf->usage))
2236         return false;
2237
2238      /* We're single-sampled color so having HiZ or MCS makes no sense */
2239      assert(hiz_or_mcs_surf == NULL || hiz_or_mcs_surf->size_B == 0);
2240
2241      /* The PRM doesn't say this explicitly, but fast-clears don't appear to
2242       * work for 3D textures until gfx9 where the layout of 3D textures
2243       * changes to match 2D array textures.
2244       */
2245      if (ISL_GFX_VER(dev) <= 8 && surf->dim != ISL_SURF_DIM_2D)
2246         return false;
2247
2248      /* From the HSW PRM Volume 7: 3D-Media-GPGPU, page 652 (Color Clear of
2249       * Non-MultiSampler Render Target Restrictions):
2250       *
2251       *    "Support is for non-mip-mapped and non-array surface types only."
2252       *
2253       * This restriction is lifted on gfx8+.  Technically, it may be possible
2254       * to create a CCS for an arrayed or mipmapped image and only enable
2255       * CCS_D when rendering to the base slice.  However, there is no
2256       * documentation tell us what the hardware would do in that case or what
2257       * it does if you walk off the bases slice.  (Does it ignore CCS or does
2258       * it start scribbling over random memory?)  We play it safe and just
2259       * follow the docs and don't allow CCS_D for arrayed or mip-mapped
2260       * surfaces.
2261       */
2262      if (ISL_GFX_VER(dev) <= 7 &&
2263          (surf->levels > 1 || surf->logical_level0_px.array_len > 1))
2264         return false;
2265
2266      /* From the Ivy Bridge PRM, Vol2 Part1 11.7 "MCS Buffer for Render
2267       * Target(s)", beneath the "Fast Color Clear" bullet (p326):
2268       *
2269       *     - MCS buffer for non-MSRT is supported only for RT formats 32bpp,
2270       *       64bpp, and 128bpp.
2271       */
2272      if (isl_format_get_layout(surf->format)->bpb < 32)
2273         return false;
2274
2275      /* From the Skylake documentation, it is made clear that X-tiling is no
2276       * longer supported:
2277       *
2278       *     - MCS and Lossless compression is supported for
2279       *     TiledY/TileYs/TileYf non-MSRTs only.
2280       */
2281      if (ISL_GFX_VER(dev) >= 9 && !isl_tiling_is_any_y(surf->tiling))
2282         return false;
2283   }
2284
2285   return true;
2286}
2287
2288bool
2289isl_surf_get_ccs_surf(const struct isl_device *dev,
2290                      const struct isl_surf *surf,
2291                      const struct isl_surf *hiz_or_mcs_surf,
2292                      struct isl_surf *ccs_surf,
2293                      uint32_t row_pitch_B)
2294{
2295   if (!isl_surf_supports_ccs(dev, surf, hiz_or_mcs_surf))
2296      return false;
2297
2298   if (ISL_GFX_VER(dev) >= 12) {
2299      enum isl_format ccs_format;
2300      switch (isl_format_get_layout(surf->format)->bpb) {
2301      case 8:     ccs_format = ISL_FORMAT_GFX12_CCS_8BPP_Y0;    break;
2302      case 16:    ccs_format = ISL_FORMAT_GFX12_CCS_16BPP_Y0;   break;
2303      case 32:    ccs_format = ISL_FORMAT_GFX12_CCS_32BPP_Y0;   break;
2304      case 64:    ccs_format = ISL_FORMAT_GFX12_CCS_64BPP_Y0;   break;
2305      case 128:   ccs_format = ISL_FORMAT_GFX12_CCS_128BPP_Y0;  break;
2306      default:
2307         return false;
2308      }
2309
2310      /* On Gfx12, the CCS is a scaled-down version of the main surface. We
2311       * model this as the CCS compressing a 2D-view of the entire surface.
2312       */
2313      const bool ok =
2314         isl_surf_init(dev, ccs_surf,
2315                       .dim = ISL_SURF_DIM_2D,
2316                       .format = ccs_format,
2317                       .width = isl_surf_get_row_pitch_el(surf),
2318                       .height = surf->size_B / surf->row_pitch_B,
2319                       .depth = 1,
2320                       .levels = 1,
2321                       .array_len = 1,
2322                       .samples = 1,
2323                       .row_pitch_B = row_pitch_B,
2324                       .usage = ISL_SURF_USAGE_CCS_BIT,
2325                       .tiling_flags = ISL_TILING_GFX12_CCS_BIT);
2326      assert(!ok || ccs_surf->size_B == surf->size_B / 256);
2327      return ok;
2328   } else {
2329      enum isl_format ccs_format;
2330      if (ISL_GFX_VER(dev) >= 9) {
2331         switch (isl_format_get_layout(surf->format)->bpb) {
2332         case 32:    ccs_format = ISL_FORMAT_GFX9_CCS_32BPP;   break;
2333         case 64:    ccs_format = ISL_FORMAT_GFX9_CCS_64BPP;   break;
2334         case 128:   ccs_format = ISL_FORMAT_GFX9_CCS_128BPP;  break;
2335         default:    unreachable("Unsupported CCS format");
2336            return false;
2337         }
2338      } else if (surf->tiling == ISL_TILING_Y0) {
2339         switch (isl_format_get_layout(surf->format)->bpb) {
2340         case 32:    ccs_format = ISL_FORMAT_GFX7_CCS_32BPP_Y;    break;
2341         case 64:    ccs_format = ISL_FORMAT_GFX7_CCS_64BPP_Y;    break;
2342         case 128:   ccs_format = ISL_FORMAT_GFX7_CCS_128BPP_Y;   break;
2343         default:    unreachable("Unsupported CCS format");
2344         }
2345      } else if (surf->tiling == ISL_TILING_X) {
2346         switch (isl_format_get_layout(surf->format)->bpb) {
2347         case 32:    ccs_format = ISL_FORMAT_GFX7_CCS_32BPP_X;    break;
2348         case 64:    ccs_format = ISL_FORMAT_GFX7_CCS_64BPP_X;    break;
2349         case 128:   ccs_format = ISL_FORMAT_GFX7_CCS_128BPP_X;   break;
2350         default:    unreachable("Unsupported CCS format");
2351         }
2352      } else {
2353         unreachable("Invalid tiling format");
2354      }
2355
2356      return isl_surf_init(dev, ccs_surf,
2357                           .dim = surf->dim,
2358                           .format = ccs_format,
2359                           .width = surf->logical_level0_px.width,
2360                           .height = surf->logical_level0_px.height,
2361                           .depth = surf->logical_level0_px.depth,
2362                           .levels = surf->levels,
2363                           .array_len = surf->logical_level0_px.array_len,
2364                           .samples = 1,
2365                           .row_pitch_B = row_pitch_B,
2366                           .usage = ISL_SURF_USAGE_CCS_BIT,
2367                           .tiling_flags = ISL_TILING_CCS_BIT);
2368   }
2369}
2370
2371#define isl_genX_call(dev, func, ...)              \
2372   switch (ISL_GFX_VERX10(dev)) {                  \
2373   case 40:                                        \
2374      isl_gfx4_##func(__VA_ARGS__);                \
2375      break;                                       \
2376   case 45:                                        \
2377      /* G45 surface state is the same as gfx5 */  \
2378   case 50:                                        \
2379      isl_gfx5_##func(__VA_ARGS__);                \
2380      break;                                       \
2381   case 60:                                        \
2382      isl_gfx6_##func(__VA_ARGS__);                \
2383      break;                                       \
2384   case 70:                                        \
2385      isl_gfx7_##func(__VA_ARGS__);                \
2386      break;                                       \
2387   case 75:                                        \
2388      isl_gfx75_##func(__VA_ARGS__);               \
2389      break;                                       \
2390   case 80:                                        \
2391      isl_gfx8_##func(__VA_ARGS__);                \
2392      break;                                       \
2393   case 90:                                        \
2394      isl_gfx9_##func(__VA_ARGS__);                \
2395      break;                                       \
2396   case 110:                                       \
2397      isl_gfx11_##func(__VA_ARGS__);               \
2398      break;                                       \
2399   case 120:                                       \
2400      isl_gfx12_##func(__VA_ARGS__);               \
2401      break;                                       \
2402   case 125:                                       \
2403      isl_gfx125_##func(__VA_ARGS__);              \
2404      break;                                       \
2405   default:                                        \
2406      assert(!"Unknown hardware generation");      \
2407   }
2408
2409void
2410isl_surf_fill_state_s(const struct isl_device *dev, void *state,
2411                      const struct isl_surf_fill_state_info *restrict info)
2412{
2413#ifndef NDEBUG
2414   isl_surf_usage_flags_t _base_usage =
2415      info->view->usage & (ISL_SURF_USAGE_RENDER_TARGET_BIT |
2416                           ISL_SURF_USAGE_TEXTURE_BIT |
2417                           ISL_SURF_USAGE_STORAGE_BIT);
2418   /* They may only specify one of the above bits at a time */
2419   assert(__builtin_popcount(_base_usage) == 1);
2420   /* The only other allowed bit is ISL_SURF_USAGE_CUBE_BIT */
2421   assert((info->view->usage & ~ISL_SURF_USAGE_CUBE_BIT) == _base_usage);
2422#endif
2423
2424   if (info->surf->dim == ISL_SURF_DIM_3D) {
2425      assert(info->view->base_array_layer + info->view->array_len <=
2426             info->surf->logical_level0_px.depth);
2427   } else {
2428      assert(info->view->base_array_layer + info->view->array_len <=
2429             info->surf->logical_level0_px.array_len);
2430   }
2431
2432   isl_genX_call(dev, surf_fill_state_s, dev, state, info);
2433}
2434
2435void
2436isl_buffer_fill_state_s(const struct isl_device *dev, void *state,
2437                        const struct isl_buffer_fill_state_info *restrict info)
2438{
2439   isl_genX_call(dev, buffer_fill_state_s, dev, state, info);
2440}
2441
2442void
2443isl_null_fill_state_s(const struct isl_device *dev, void *state,
2444                      const struct isl_null_fill_state_info *restrict info)
2445{
2446   isl_genX_call(dev, null_fill_state, state, info);
2447}
2448
2449void
2450isl_emit_depth_stencil_hiz_s(const struct isl_device *dev, void *batch,
2451                             const struct isl_depth_stencil_hiz_emit_info *restrict info)
2452{
2453   if (info->depth_surf && info->stencil_surf) {
2454      if (!dev->info->has_hiz_and_separate_stencil) {
2455         assert(info->depth_surf == info->stencil_surf);
2456         assert(info->depth_address == info->stencil_address);
2457      }
2458      assert(info->depth_surf->dim == info->stencil_surf->dim);
2459   }
2460
2461   if (info->depth_surf) {
2462      assert((info->depth_surf->usage & ISL_SURF_USAGE_DEPTH_BIT));
2463      if (info->depth_surf->dim == ISL_SURF_DIM_3D) {
2464         assert(info->view->base_array_layer + info->view->array_len <=
2465                info->depth_surf->logical_level0_px.depth);
2466      } else {
2467         assert(info->view->base_array_layer + info->view->array_len <=
2468                info->depth_surf->logical_level0_px.array_len);
2469      }
2470   }
2471
2472   if (info->stencil_surf) {
2473      assert((info->stencil_surf->usage & ISL_SURF_USAGE_STENCIL_BIT));
2474      if (info->stencil_surf->dim == ISL_SURF_DIM_3D) {
2475         assert(info->view->base_array_layer + info->view->array_len <=
2476                info->stencil_surf->logical_level0_px.depth);
2477      } else {
2478         assert(info->view->base_array_layer + info->view->array_len <=
2479                info->stencil_surf->logical_level0_px.array_len);
2480      }
2481   }
2482
2483   isl_genX_call(dev, emit_depth_stencil_hiz_s, dev, batch, info);
2484}
2485
2486/**
2487 * A variant of isl_surf_get_image_offset_sa() specific to
2488 * ISL_DIM_LAYOUT_GFX4_2D.
2489 */
2490static void
2491get_image_offset_sa_gfx4_2d(const struct isl_surf *surf,
2492                            uint32_t level, uint32_t logical_array_layer,
2493                            uint32_t *x_offset_sa,
2494                            uint32_t *y_offset_sa)
2495{
2496   assert(level < surf->levels);
2497   if (surf->dim == ISL_SURF_DIM_3D)
2498      assert(logical_array_layer < surf->logical_level0_px.depth);
2499   else
2500      assert(logical_array_layer < surf->logical_level0_px.array_len);
2501
2502   const struct isl_extent3d image_align_sa =
2503      isl_surf_get_image_alignment_sa(surf);
2504
2505   const uint32_t W0 = surf->phys_level0_sa.width;
2506   const uint32_t H0 = surf->phys_level0_sa.height;
2507
2508   const uint32_t phys_layer = logical_array_layer *
2509      (surf->msaa_layout == ISL_MSAA_LAYOUT_ARRAY ? surf->samples : 1);
2510
2511   uint32_t x = 0;
2512   uint32_t y = phys_layer * isl_surf_get_array_pitch_sa_rows(surf);
2513
2514   for (uint32_t l = 0; l < level; ++l) {
2515      if (l == 1) {
2516         uint32_t W = isl_minify(W0, l);
2517         x += isl_align_npot(W, image_align_sa.w);
2518      } else {
2519         uint32_t H = isl_minify(H0, l);
2520         y += isl_align_npot(H, image_align_sa.h);
2521      }
2522   }
2523
2524   *x_offset_sa = x;
2525   *y_offset_sa = y;
2526}
2527
2528/**
2529 * A variant of isl_surf_get_image_offset_sa() specific to
2530 * ISL_DIM_LAYOUT_GFX4_3D.
2531 */
2532static void
2533get_image_offset_sa_gfx4_3d(const struct isl_surf *surf,
2534                            uint32_t level, uint32_t logical_z_offset_px,
2535                            uint32_t *x_offset_sa,
2536                            uint32_t *y_offset_sa)
2537{
2538   assert(level < surf->levels);
2539   if (surf->dim == ISL_SURF_DIM_3D) {
2540      assert(surf->phys_level0_sa.array_len == 1);
2541      assert(logical_z_offset_px < isl_minify(surf->phys_level0_sa.depth, level));
2542   } else {
2543      assert(surf->dim == ISL_SURF_DIM_2D);
2544      assert(surf->usage & ISL_SURF_USAGE_CUBE_BIT);
2545      assert(surf->phys_level0_sa.array_len == 6);
2546      assert(logical_z_offset_px < surf->phys_level0_sa.array_len);
2547   }
2548
2549   const struct isl_extent3d image_align_sa =
2550      isl_surf_get_image_alignment_sa(surf);
2551
2552   const uint32_t W0 = surf->phys_level0_sa.width;
2553   const uint32_t H0 = surf->phys_level0_sa.height;
2554   const uint32_t D0 = surf->phys_level0_sa.depth;
2555   const uint32_t AL = surf->phys_level0_sa.array_len;
2556
2557   uint32_t x = 0;
2558   uint32_t y = 0;
2559
2560   for (uint32_t l = 0; l < level; ++l) {
2561      const uint32_t level_h = isl_align_npot(isl_minify(H0, l), image_align_sa.h);
2562      const uint32_t level_d =
2563         isl_align_npot(surf->dim == ISL_SURF_DIM_3D ? isl_minify(D0, l) : AL,
2564                        image_align_sa.d);
2565      const uint32_t max_layers_vert = isl_align(level_d, 1u << l) / (1u << l);
2566
2567      y += level_h * max_layers_vert;
2568   }
2569
2570   const uint32_t level_w = isl_align_npot(isl_minify(W0, level), image_align_sa.w);
2571   const uint32_t level_h = isl_align_npot(isl_minify(H0, level), image_align_sa.h);
2572   const uint32_t level_d =
2573      isl_align_npot(surf->dim == ISL_SURF_DIM_3D ? isl_minify(D0, level) : AL,
2574                     image_align_sa.d);
2575
2576   const uint32_t max_layers_horiz = MIN(level_d, 1u << level);
2577
2578   x += level_w * (logical_z_offset_px % max_layers_horiz);
2579   y += level_h * (logical_z_offset_px / max_layers_horiz);
2580
2581   *x_offset_sa = x;
2582   *y_offset_sa = y;
2583}
2584
2585static void
2586get_image_offset_sa_gfx6_stencil_hiz(const struct isl_surf *surf,
2587                                     uint32_t level,
2588                                     uint32_t logical_array_layer,
2589                                     uint32_t *x_offset_sa,
2590                                     uint32_t *y_offset_sa)
2591{
2592   assert(level < surf->levels);
2593   assert(surf->logical_level0_px.depth == 1);
2594   assert(logical_array_layer < surf->logical_level0_px.array_len);
2595
2596   const struct isl_format_layout *fmtl = isl_format_get_layout(surf->format);
2597
2598   const struct isl_extent3d image_align_sa =
2599      isl_surf_get_image_alignment_sa(surf);
2600
2601   struct isl_tile_info tile_info;
2602   isl_surf_get_tile_info(surf, &tile_info);
2603   const struct isl_extent2d tile_extent_sa = {
2604      .w = tile_info.logical_extent_el.w * fmtl->bw,
2605      .h = tile_info.logical_extent_el.h * fmtl->bh,
2606   };
2607   /* Tile size is a multiple of image alignment */
2608   assert(tile_extent_sa.w % image_align_sa.w == 0);
2609   assert(tile_extent_sa.h % image_align_sa.h == 0);
2610
2611   const uint32_t W0 = surf->phys_level0_sa.w;
2612   const uint32_t H0 = surf->phys_level0_sa.h;
2613
2614   /* Each image has the same height as LOD0 because the hardware thinks
2615    * everything is LOD0
2616    */
2617   const uint32_t H = isl_align(H0, image_align_sa.h);
2618
2619   /* Quick sanity check for consistency */
2620   if (surf->phys_level0_sa.array_len > 1)
2621      assert(surf->array_pitch_el_rows == isl_assert_div(H, fmtl->bh));
2622
2623   uint32_t x = 0, y = 0;
2624   for (uint32_t l = 0; l < level; ++l) {
2625      const uint32_t W = isl_minify(W0, l);
2626
2627      const uint32_t w = isl_align(W, tile_extent_sa.w);
2628      const uint32_t h = isl_align(H * surf->phys_level0_sa.a,
2629                                   tile_extent_sa.h);
2630
2631      if (l == 0) {
2632         y += h;
2633      } else {
2634         x += w;
2635      }
2636   }
2637
2638   y += H * logical_array_layer;
2639
2640   *x_offset_sa = x;
2641   *y_offset_sa = y;
2642}
2643
2644/**
2645 * A variant of isl_surf_get_image_offset_sa() specific to
2646 * ISL_DIM_LAYOUT_GFX9_1D.
2647 */
2648static void
2649get_image_offset_sa_gfx9_1d(const struct isl_surf *surf,
2650                            uint32_t level, uint32_t layer,
2651                            uint32_t *x_offset_sa,
2652                            uint32_t *y_offset_sa)
2653{
2654   assert(level < surf->levels);
2655   assert(layer < surf->phys_level0_sa.array_len);
2656   assert(surf->phys_level0_sa.height == 1);
2657   assert(surf->phys_level0_sa.depth == 1);
2658   assert(surf->samples == 1);
2659
2660   const uint32_t W0 = surf->phys_level0_sa.width;
2661   const struct isl_extent3d image_align_sa =
2662      isl_surf_get_image_alignment_sa(surf);
2663
2664   uint32_t x = 0;
2665
2666   for (uint32_t l = 0; l < level; ++l) {
2667      uint32_t W = isl_minify(W0, l);
2668      uint32_t w = isl_align_npot(W, image_align_sa.w);
2669
2670      x += w;
2671   }
2672
2673   *x_offset_sa = x;
2674   *y_offset_sa = layer * isl_surf_get_array_pitch_sa_rows(surf);
2675}
2676
2677/**
2678 * Calculate the offset, in units of surface samples, to a subimage in the
2679 * surface.
2680 *
2681 * @invariant level < surface levels
2682 * @invariant logical_array_layer < logical array length of surface
2683 * @invariant logical_z_offset_px < logical depth of surface at level
2684 */
2685void
2686isl_surf_get_image_offset_sa(const struct isl_surf *surf,
2687                             uint32_t level,
2688                             uint32_t logical_array_layer,
2689                             uint32_t logical_z_offset_px,
2690                             uint32_t *x_offset_sa,
2691                             uint32_t *y_offset_sa,
2692                             uint32_t *z_offset_sa,
2693                             uint32_t *array_offset)
2694{
2695   assert(level < surf->levels);
2696   assert(logical_array_layer < surf->logical_level0_px.array_len);
2697   assert(logical_z_offset_px
2698          < isl_minify(surf->logical_level0_px.depth, level));
2699
2700   switch (surf->dim_layout) {
2701   case ISL_DIM_LAYOUT_GFX9_1D:
2702      get_image_offset_sa_gfx9_1d(surf, level, logical_array_layer,
2703                                  x_offset_sa, y_offset_sa);
2704      *z_offset_sa = 0;
2705      *array_offset = 0;
2706      break;
2707   case ISL_DIM_LAYOUT_GFX4_2D:
2708      get_image_offset_sa_gfx4_2d(surf, level, logical_array_layer
2709                                  + logical_z_offset_px,
2710                                  x_offset_sa, y_offset_sa);
2711      *z_offset_sa = 0;
2712      *array_offset = 0;
2713      break;
2714   case ISL_DIM_LAYOUT_GFX4_3D:
2715      get_image_offset_sa_gfx4_3d(surf, level, logical_array_layer +
2716                                  logical_z_offset_px,
2717                                  x_offset_sa, y_offset_sa);
2718      *z_offset_sa = 0;
2719      *array_offset = 0;
2720      break;
2721   case ISL_DIM_LAYOUT_GFX6_STENCIL_HIZ:
2722      get_image_offset_sa_gfx6_stencil_hiz(surf, level, logical_array_layer +
2723                                           logical_z_offset_px,
2724                                           x_offset_sa, y_offset_sa);
2725      *z_offset_sa = 0;
2726      *array_offset = 0;
2727      break;
2728
2729   default:
2730      unreachable("not reached");
2731   }
2732}
2733
2734void
2735isl_surf_get_image_offset_el(const struct isl_surf *surf,
2736                             uint32_t level,
2737                             uint32_t logical_array_layer,
2738                             uint32_t logical_z_offset_px,
2739                             uint32_t *x_offset_el,
2740                             uint32_t *y_offset_el,
2741                             uint32_t *z_offset_el,
2742                             uint32_t *array_offset)
2743{
2744   const struct isl_format_layout *fmtl = isl_format_get_layout(surf->format);
2745
2746   assert(level < surf->levels);
2747   assert(logical_array_layer < surf->logical_level0_px.array_len);
2748   assert(logical_z_offset_px
2749          < isl_minify(surf->logical_level0_px.depth, level));
2750
2751   uint32_t x_offset_sa, y_offset_sa, z_offset_sa;
2752   isl_surf_get_image_offset_sa(surf, level,
2753                                logical_array_layer,
2754                                logical_z_offset_px,
2755                                &x_offset_sa,
2756                                &y_offset_sa,
2757                                &z_offset_sa,
2758                                array_offset);
2759
2760   *x_offset_el = x_offset_sa / fmtl->bw;
2761   *y_offset_el = y_offset_sa / fmtl->bh;
2762   *z_offset_el = z_offset_sa / fmtl->bd;
2763}
2764
2765void
2766isl_surf_get_image_offset_B_tile_sa(const struct isl_surf *surf,
2767                                    uint32_t level,
2768                                    uint32_t logical_array_layer,
2769                                    uint32_t logical_z_offset_px,
2770                                    uint64_t *offset_B,
2771                                    uint32_t *x_offset_sa,
2772                                    uint32_t *y_offset_sa)
2773{
2774   const struct isl_format_layout *fmtl = isl_format_get_layout(surf->format);
2775
2776   uint32_t x_offset_el, y_offset_el;
2777   isl_surf_get_image_offset_B_tile_el(surf, level,
2778                                       logical_array_layer,
2779                                       logical_z_offset_px,
2780                                       offset_B,
2781                                       &x_offset_el,
2782                                       &y_offset_el);
2783
2784   if (x_offset_sa) {
2785      *x_offset_sa = x_offset_el * fmtl->bw;
2786   } else {
2787      assert(x_offset_el == 0);
2788   }
2789
2790   if (y_offset_sa) {
2791      *y_offset_sa = y_offset_el * fmtl->bh;
2792   } else {
2793      assert(y_offset_el == 0);
2794   }
2795}
2796
2797void
2798isl_surf_get_image_offset_B_tile_el(const struct isl_surf *surf,
2799                                    uint32_t level,
2800                                    uint32_t logical_array_layer,
2801                                    uint32_t logical_z_offset_px,
2802                                    uint64_t *offset_B,
2803                                    uint32_t *x_offset_el,
2804                                    uint32_t *y_offset_el)
2805{
2806   const struct isl_format_layout *fmtl = isl_format_get_layout(surf->format);
2807
2808   uint32_t total_x_offset_el, total_y_offset_el;
2809   uint32_t total_z_offset_el, total_array_offset;
2810   isl_surf_get_image_offset_el(surf, level, logical_array_layer,
2811                                logical_z_offset_px,
2812                                &total_x_offset_el,
2813                                &total_y_offset_el,
2814                                &total_z_offset_el,
2815                                &total_array_offset);
2816
2817   uint32_t z_offset_el, array_offset;
2818   isl_tiling_get_intratile_offset_el(surf->tiling, surf->dim,
2819                                      surf->msaa_layout, fmtl->bpb,
2820                                      surf->samples,
2821                                      surf->row_pitch_B,
2822                                      surf->array_pitch_el_rows,
2823                                      total_x_offset_el,
2824                                      total_y_offset_el,
2825                                      total_z_offset_el,
2826                                      total_array_offset,
2827                                      offset_B,
2828                                      x_offset_el,
2829                                      y_offset_el,
2830                                      &z_offset_el,
2831                                      &array_offset);
2832   assert(z_offset_el == 0);
2833   assert(array_offset == 0);
2834}
2835
2836void
2837isl_surf_get_image_range_B_tile(const struct isl_surf *surf,
2838                                uint32_t level,
2839                                uint32_t logical_array_layer,
2840                                uint32_t logical_z_offset_px,
2841                                uint64_t *start_tile_B,
2842                                uint64_t *end_tile_B)
2843{
2844   uint32_t start_x_offset_el, start_y_offset_el;
2845   uint32_t start_z_offset_el, start_array_slice;
2846   isl_surf_get_image_offset_el(surf, level, logical_array_layer,
2847                                logical_z_offset_px,
2848                                &start_x_offset_el,
2849                                &start_y_offset_el,
2850                                &start_z_offset_el,
2851                                &start_array_slice);
2852
2853   /* Compute the size of the subimage in surface elements */
2854   const uint32_t subimage_w_sa = isl_minify(surf->phys_level0_sa.w, level);
2855   const uint32_t subimage_h_sa = isl_minify(surf->phys_level0_sa.h, level);
2856   const struct isl_format_layout *fmtl = isl_format_get_layout(surf->format);
2857   const uint32_t subimage_w_el = isl_align_div_npot(subimage_w_sa, fmtl->bw);
2858   const uint32_t subimage_h_el = isl_align_div_npot(subimage_h_sa, fmtl->bh);
2859
2860   /* Find the last pixel */
2861   uint32_t end_x_offset_el = start_x_offset_el + subimage_w_el - 1;
2862   uint32_t end_y_offset_el = start_y_offset_el + subimage_h_el - 1;
2863
2864   /* We only consider one Z or array slice */
2865   const uint32_t end_z_offset_el = start_z_offset_el;
2866   const uint32_t end_array_slice = start_array_slice;
2867
2868   UNUSED uint32_t x_offset_el, y_offset_el, z_offset_el, array_slice;
2869   isl_tiling_get_intratile_offset_el(surf->tiling, surf->dim,
2870                                      surf->msaa_layout, fmtl->bpb,
2871                                      surf->samples,
2872                                      surf->row_pitch_B,
2873                                      surf->array_pitch_el_rows,
2874                                      start_x_offset_el,
2875                                      start_y_offset_el,
2876                                      start_z_offset_el,
2877                                      start_array_slice,
2878                                      start_tile_B,
2879                                      &x_offset_el,
2880                                      &y_offset_el,
2881                                      &z_offset_el,
2882                                      &array_slice);
2883
2884   isl_tiling_get_intratile_offset_el(surf->tiling, surf->dim,
2885                                      surf->msaa_layout, fmtl->bpb,
2886                                      surf->samples,
2887                                      surf->row_pitch_B,
2888                                      surf->array_pitch_el_rows,
2889                                      end_x_offset_el,
2890                                      end_y_offset_el,
2891                                      end_z_offset_el,
2892                                      end_array_slice,
2893                                      end_tile_B,
2894                                      &x_offset_el,
2895                                      &y_offset_el,
2896                                      &z_offset_el,
2897                                      &array_slice);
2898
2899   /* We want the range we return to be exclusive but the tile containing the
2900    * last pixel (what we just calculated) is inclusive.  Add one.
2901    */
2902   (*end_tile_B)++;
2903
2904   assert(*end_tile_B <= surf->size_B);
2905}
2906
2907void
2908isl_surf_get_image_surf(const struct isl_device *dev,
2909                        const struct isl_surf *surf,
2910                        uint32_t level,
2911                        uint32_t logical_array_layer,
2912                        uint32_t logical_z_offset_px,
2913                        struct isl_surf *image_surf,
2914                        uint64_t *offset_B,
2915                        uint32_t *x_offset_sa,
2916                        uint32_t *y_offset_sa)
2917{
2918   isl_surf_get_image_offset_B_tile_sa(surf,
2919                                       level,
2920                                       logical_array_layer,
2921                                       logical_z_offset_px,
2922                                       offset_B,
2923                                       x_offset_sa,
2924                                       y_offset_sa);
2925
2926   /* Even for cube maps there will be only single face, therefore drop the
2927    * corresponding flag if present.
2928    */
2929   const isl_surf_usage_flags_t usage =
2930      surf->usage & (~ISL_SURF_USAGE_CUBE_BIT);
2931
2932   bool ok UNUSED;
2933   ok = isl_surf_init(dev, image_surf,
2934                      .dim = ISL_SURF_DIM_2D,
2935                      .format = surf->format,
2936                      .width = isl_minify(surf->logical_level0_px.w, level),
2937                      .height = isl_minify(surf->logical_level0_px.h, level),
2938                      .depth = 1,
2939                      .levels = 1,
2940                      .array_len = 1,
2941                      .samples = surf->samples,
2942                      .row_pitch_B = surf->row_pitch_B,
2943                      .usage = usage,
2944                      .tiling_flags = (1 << surf->tiling));
2945   assert(ok);
2946}
2947
2948bool
2949isl_surf_get_uncompressed_surf(const struct isl_device *dev,
2950                               const struct isl_surf *surf,
2951                               const struct isl_view *view,
2952                               struct isl_surf *ucompr_surf,
2953                               struct isl_view *ucompr_view,
2954                               uint64_t *offset_B,
2955                               uint32_t *x_offset_el,
2956                               uint32_t *y_offset_el)
2957{
2958   const struct isl_format_layout *fmtl =
2959      isl_format_get_layout(surf->format);
2960   const enum isl_format view_format = view->format;
2961
2962   assert(fmtl->bw > 1 || fmtl->bh > 1 || fmtl->bd > 1);
2963   assert(isl_format_is_compressed(surf->format));
2964   assert(!isl_format_is_compressed(view->format));
2965   assert(isl_format_get_layout(view->format)->bpb == fmtl->bpb);
2966   assert(view->levels == 1);
2967
2968   const uint32_t view_width_px =
2969      isl_minify(surf->logical_level0_px.width, view->base_level);
2970   const uint32_t view_height_px =
2971      isl_minify(surf->logical_level0_px.height, view->base_level);
2972
2973   assert(surf->samples == 1);
2974   const uint32_t view_width_el = isl_align_div_npot(view_width_px, fmtl->bw);
2975   const uint32_t view_height_el = isl_align_div_npot(view_height_px, fmtl->bh);
2976
2977   /* If we ever enable 3D block formats, we'll need to re-think this */
2978   assert(fmtl->bd == 1);
2979
2980   if (view->array_len > 1) {
2981      /* The Skylake PRM Vol. 2d, "RENDER_SURFACE_STATE::X Offset" says:
2982       *
2983       *    "If Surface Array is enabled, this field must be zero."
2984       *
2985       * The PRMs for other hardware have similar text.  This is also tricky
2986       * to handle with things like BLORP's SW offsetting because the
2987       * increased surface size required for the offset may result in an image
2988       * height greater than qpitch.
2989       */
2990      if (view->base_level > 0)
2991         return false;
2992
2993      /* On Haswell and earlier, RENDER_SURFACE_STATE doesn't have a QPitch
2994       * field; it only has "array pitch span" which means the QPitch is
2995       * automatically calculated.  Since we're smashing the surface format
2996       * (block formats are subtly different) and the number of miplevels,
2997       * that calculation will get thrown off.  This means we can't do arrays
2998       * even at LOD0
2999       *
3000       * On Broadwell, we do have a QPitch field which we can control.
3001       * However, HALIGN and VALIGN are specified in pixels and are
3002       * hard-coded to align to exactly the block size of the compressed
3003       * texture.  This means that, when reinterpreted as a non-compressed
3004       * the QPitch may be anything but the HW requires it to be properly
3005       * aligned.
3006       */
3007      if (ISL_GFX_VER(dev) < 9)
3008         return false;
3009
3010      *ucompr_surf = *surf;
3011      ucompr_surf->levels = 1;
3012      ucompr_surf->format = view_format;
3013
3014      /* We're making an uncompressed view here.  The image dimensions
3015       * need to be scaled down by the block size.
3016       */
3017      assert(ucompr_surf->logical_level0_px.width == view_width_px);
3018      assert(ucompr_surf->logical_level0_px.height == view_height_px);
3019      ucompr_surf->logical_level0_px.width = view_width_el;
3020      ucompr_surf->logical_level0_px.height = view_height_el;
3021      ucompr_surf->phys_level0_sa = isl_surf_get_phys_level0_el(surf);
3022
3023      /* The surface mostly stays as-is; there is no offset */
3024      *offset_B = 0;
3025      *x_offset_el = 0;
3026      *y_offset_el = 0;
3027
3028      /* The view remains the same */
3029      *ucompr_view = *view;
3030   } else {
3031      /* If only one array slice is requested, directly offset to that slice.
3032       * We could, in theory, still use arrays in some cases but BLORP isn't
3033       * prepared for this and everyone who calls this function should be
3034       * prepared to handle an X/Y offset.
3035       */
3036      isl_surf_get_image_offset_B_tile_el(surf,
3037                                          view->base_level,
3038                                          surf->dim == ISL_SURF_DIM_3D ?
3039                                             0 : view->base_array_layer,
3040                                          surf->dim == ISL_SURF_DIM_3D ?
3041                                             view->base_array_layer : 0,
3042                                          offset_B,
3043                                          x_offset_el,
3044                                          y_offset_el);
3045
3046      /* Even for cube maps there will be only single face, therefore drop the
3047       * corresponding flag if present.
3048       */
3049      const isl_surf_usage_flags_t usage =
3050         surf->usage & (~ISL_SURF_USAGE_CUBE_BIT);
3051
3052      bool ok UNUSED;
3053      ok = isl_surf_init(dev, ucompr_surf,
3054                         .dim = ISL_SURF_DIM_2D,
3055                         .format = view_format,
3056                         .width = view_width_el,
3057                         .height = view_height_el,
3058                         .depth = 1,
3059                         .levels = 1,
3060                         .array_len = 1,
3061                         .samples = 1,
3062                         .row_pitch_B = surf->row_pitch_B,
3063                         .usage = usage,
3064                         .tiling_flags = (1 << surf->tiling));
3065      assert(ok);
3066
3067      /* The newly created image represents the one subimage we're
3068       * referencing with this view so it only has one array slice and
3069       * miplevel.
3070       */
3071      *ucompr_view = *view;
3072      ucompr_view->base_array_layer = 0;
3073      ucompr_view->base_level = 0;
3074   }
3075
3076   return true;
3077}
3078
3079void
3080isl_tiling_get_intratile_offset_el(enum isl_tiling tiling,
3081                                   enum isl_surf_dim dim,
3082                                   enum isl_msaa_layout msaa_layout,
3083                                   uint32_t bpb,
3084                                   uint32_t samples,
3085                                   uint32_t row_pitch_B,
3086                                   uint32_t array_pitch_el_rows,
3087                                   uint32_t total_x_offset_el,
3088                                   uint32_t total_y_offset_el,
3089                                   uint32_t total_z_offset_el,
3090                                   uint32_t total_array_offset,
3091                                   uint64_t *tile_offset_B,
3092                                   uint32_t *x_offset_el,
3093                                   uint32_t *y_offset_el,
3094                                   uint32_t *z_offset_el,
3095                                   uint32_t *array_offset)
3096{
3097   if (tiling == ISL_TILING_LINEAR) {
3098      assert(bpb % 8 == 0);
3099      assert(samples == 1);
3100      assert(total_z_offset_el == 0 && total_array_offset == 0);
3101      *tile_offset_B = (uint64_t)total_y_offset_el * row_pitch_B +
3102                       (uint64_t)total_x_offset_el * (bpb / 8);
3103      *x_offset_el = 0;
3104      *y_offset_el = 0;
3105      *z_offset_el = 0;
3106      *array_offset = 0;
3107      return;
3108   }
3109
3110   struct isl_tile_info tile_info;
3111   isl_tiling_get_info(tiling, dim, msaa_layout, bpb, samples, &tile_info);
3112
3113   /* Pitches must make sense with the tiling */
3114   assert(row_pitch_B % tile_info.phys_extent_B.width == 0);
3115   if (tile_info.logical_extent_el.d > 1 || tile_info.logical_extent_el.a > 1)
3116      assert(array_pitch_el_rows % tile_info.logical_extent_el.h == 0);
3117
3118   /* For non-power-of-two formats, we need the address to be both tile and
3119    * element-aligned.  The easiest way to achieve this is to work with a tile
3120    * that is three times as wide as the regular tile.
3121    *
3122    * The tile info returned by get_tile_info has a logical size that is an
3123    * integer number of tile_info.format_bpb size elements.  To scale the
3124    * tile, we scale up the physical width and then treat the logical tile
3125    * size as if it has bpb size elements.
3126    */
3127   const uint32_t tile_el_scale = bpb / tile_info.format_bpb;
3128   tile_info.phys_extent_B.width *= tile_el_scale;
3129
3130   /* Compute the offset into the tile */
3131   *x_offset_el = total_x_offset_el % tile_info.logical_extent_el.w;
3132   *y_offset_el = total_y_offset_el % tile_info.logical_extent_el.h;
3133   *z_offset_el = total_z_offset_el % tile_info.logical_extent_el.d;
3134   *array_offset = total_array_offset % tile_info.logical_extent_el.a;
3135
3136   /* Compute the offset of the tile in units of whole tiles */
3137   uint32_t x_offset_tl = total_x_offset_el / tile_info.logical_extent_el.w;
3138   uint32_t y_offset_tl = total_y_offset_el / tile_info.logical_extent_el.h;
3139   uint32_t z_offset_tl = total_z_offset_el / tile_info.logical_extent_el.d;
3140   uint32_t a_offset_tl = total_array_offset / tile_info.logical_extent_el.a;
3141
3142   /* Compute an array pitch in number of tiles */
3143   uint32_t array_pitch_tl_rows =
3144      array_pitch_el_rows / tile_info.logical_extent_el.h;
3145
3146   /* Add the Z and array offset to the Y offset to get a 2D offset */
3147   y_offset_tl += (z_offset_tl + a_offset_tl) * array_pitch_tl_rows;
3148
3149   *tile_offset_B =
3150      (uint64_t)y_offset_tl * tile_info.phys_extent_B.h * row_pitch_B +
3151      (uint64_t)x_offset_tl * tile_info.phys_extent_B.h * tile_info.phys_extent_B.w;
3152}
3153
3154uint32_t
3155isl_surf_get_depth_format(const struct isl_device *dev,
3156                          const struct isl_surf *surf)
3157{
3158   /* Support for separate stencil buffers began in gfx5. Support for
3159    * interleaved depthstencil buffers ceased in gfx7. The intermediate gens,
3160    * those that supported separate and interleaved stencil, were gfx5 and
3161    * gfx6.
3162    *
3163    * For a list of all available formats, see the Sandybridge PRM >> Volume
3164    * 2 Part 1: 3D/Media - 3D Pipeline >> 3DSTATE_DEPTH_BUFFER >> Surface
3165    * Format (p321).
3166    */
3167
3168   bool has_stencil = surf->usage & ISL_SURF_USAGE_STENCIL_BIT;
3169
3170   assert(surf->usage & ISL_SURF_USAGE_DEPTH_BIT);
3171
3172   if (has_stencil)
3173      assert(ISL_GFX_VER(dev) < 7);
3174
3175   switch (surf->format) {
3176   default:
3177      unreachable("bad isl depth format");
3178   case ISL_FORMAT_R32_FLOAT_X8X24_TYPELESS:
3179      assert(ISL_GFX_VER(dev) < 7);
3180      return 0; /* D32_FLOAT_S8X24_UINT */
3181   case ISL_FORMAT_R32_FLOAT:
3182      assert(!has_stencil);
3183      return 1; /* D32_FLOAT */
3184   case ISL_FORMAT_R24_UNORM_X8_TYPELESS:
3185      if (has_stencil) {
3186         assert(ISL_GFX_VER(dev) < 7);
3187         return 2; /* D24_UNORM_S8_UINT */
3188      } else {
3189         assert(ISL_GFX_VER(dev) >= 5);
3190         return 3; /* D24_UNORM_X8_UINT */
3191      }
3192   case ISL_FORMAT_R16_UNORM:
3193      assert(!has_stencil);
3194      return 5; /* D16_UNORM */
3195   }
3196}
3197
3198bool
3199isl_swizzle_supports_rendering(const struct intel_device_info *devinfo,
3200                               struct isl_swizzle swizzle)
3201{
3202   if (devinfo->is_haswell) {
3203      /* From the Haswell PRM,
3204       * RENDER_SURFACE_STATE::Shader Channel Select Red
3205       *
3206       *    "The Shader channel selects also define which shader channels are
3207       *    written to which surface channel. If the Shader channel select is
3208       *    SCS_ZERO or SCS_ONE then it is not written to the surface. If the
3209       *    shader channel select is SCS_RED it is written to the surface red
3210       *    channel and so on. If more than one shader channel select is set
3211       *    to the same surface channel only the first shader channel in RGBA
3212       *    order will be written."
3213       */
3214      return true;
3215   } else if (devinfo->ver <= 7) {
3216      /* Ivy Bridge and early doesn't have any swizzling */
3217      return isl_swizzle_is_identity(swizzle);
3218   } else {
3219      /* From the Sky Lake PRM Vol. 2d,
3220       * RENDER_SURFACE_STATE::Shader Channel Select Red
3221       *
3222       *    "For Render Target, Red, Green and Blue Shader Channel Selects
3223       *    MUST be such that only valid components can be swapped i.e. only
3224       *    change the order of components in the pixel. Any other values for
3225       *    these Shader Channel Select fields are not valid for Render
3226       *    Targets. This also means that there MUST not be multiple shader
3227       *    channels mapped to the same RT channel."
3228       *
3229       * From the Sky Lake PRM Vol. 2d,
3230       * RENDER_SURFACE_STATE::Shader Channel Select Alpha
3231       *
3232       *    "For Render Target, this field MUST be programmed to
3233       *    value = SCS_ALPHA."
3234       */
3235      return (swizzle.r == ISL_CHANNEL_SELECT_RED ||
3236              swizzle.r == ISL_CHANNEL_SELECT_GREEN ||
3237              swizzle.r == ISL_CHANNEL_SELECT_BLUE) &&
3238             (swizzle.g == ISL_CHANNEL_SELECT_RED ||
3239              swizzle.g == ISL_CHANNEL_SELECT_GREEN ||
3240              swizzle.g == ISL_CHANNEL_SELECT_BLUE) &&
3241             (swizzle.b == ISL_CHANNEL_SELECT_RED ||
3242              swizzle.b == ISL_CHANNEL_SELECT_GREEN ||
3243              swizzle.b == ISL_CHANNEL_SELECT_BLUE) &&
3244             swizzle.r != swizzle.g &&
3245             swizzle.r != swizzle.b &&
3246             swizzle.g != swizzle.b &&
3247             swizzle.a == ISL_CHANNEL_SELECT_ALPHA;
3248   }
3249}
3250
3251static enum isl_channel_select
3252swizzle_select(enum isl_channel_select chan, struct isl_swizzle swizzle)
3253{
3254   switch (chan) {
3255   case ISL_CHANNEL_SELECT_ZERO:
3256   case ISL_CHANNEL_SELECT_ONE:
3257      return chan;
3258   case ISL_CHANNEL_SELECT_RED:
3259      return swizzle.r;
3260   case ISL_CHANNEL_SELECT_GREEN:
3261      return swizzle.g;
3262   case ISL_CHANNEL_SELECT_BLUE:
3263      return swizzle.b;
3264   case ISL_CHANNEL_SELECT_ALPHA:
3265      return swizzle.a;
3266   default:
3267      unreachable("Invalid swizzle component");
3268   }
3269}
3270
3271/**
3272 * Returns the single swizzle that is equivalent to applying the two given
3273 * swizzles in sequence.
3274 */
3275struct isl_swizzle
3276isl_swizzle_compose(struct isl_swizzle first, struct isl_swizzle second)
3277{
3278   return (struct isl_swizzle) {
3279      .r = swizzle_select(first.r, second),
3280      .g = swizzle_select(first.g, second),
3281      .b = swizzle_select(first.b, second),
3282      .a = swizzle_select(first.a, second),
3283   };
3284}
3285
3286/**
3287 * Returns a swizzle that is the pseudo-inverse of this swizzle.
3288 */
3289struct isl_swizzle
3290isl_swizzle_invert(struct isl_swizzle swizzle)
3291{
3292   /* Default to zero for channels which do not show up in the swizzle */
3293   enum isl_channel_select chans[4] = {
3294      ISL_CHANNEL_SELECT_ZERO,
3295      ISL_CHANNEL_SELECT_ZERO,
3296      ISL_CHANNEL_SELECT_ZERO,
3297      ISL_CHANNEL_SELECT_ZERO,
3298   };
3299
3300   /* We go in ABGR order so that, if there are any duplicates, the first one
3301    * is taken if you look at it in RGBA order.  This is what Haswell hardware
3302    * does for render target swizzles.
3303    */
3304   if ((unsigned)(swizzle.a - ISL_CHANNEL_SELECT_RED) < 4)
3305      chans[swizzle.a - ISL_CHANNEL_SELECT_RED] = ISL_CHANNEL_SELECT_ALPHA;
3306   if ((unsigned)(swizzle.b - ISL_CHANNEL_SELECT_RED) < 4)
3307      chans[swizzle.b - ISL_CHANNEL_SELECT_RED] = ISL_CHANNEL_SELECT_BLUE;
3308   if ((unsigned)(swizzle.g - ISL_CHANNEL_SELECT_RED) < 4)
3309      chans[swizzle.g - ISL_CHANNEL_SELECT_RED] = ISL_CHANNEL_SELECT_GREEN;
3310   if ((unsigned)(swizzle.r - ISL_CHANNEL_SELECT_RED) < 4)
3311      chans[swizzle.r - ISL_CHANNEL_SELECT_RED] = ISL_CHANNEL_SELECT_RED;
3312
3313   return (struct isl_swizzle) { chans[0], chans[1], chans[2], chans[3] };
3314}
3315
3316/** Applies an inverse swizzle to a color value */
3317union isl_color_value
3318isl_color_value_swizzle_inv(union isl_color_value src,
3319                            struct isl_swizzle swizzle)
3320{
3321   union isl_color_value dst = { .u32 = { 0, } };
3322
3323   /* We assign colors in ABGR order so that the first one will be taken in
3324    * RGBA precedence order.  According to the PRM docs for shader channel
3325    * select, this matches Haswell hardware behavior.
3326    */
3327   if ((unsigned)(swizzle.a - ISL_CHANNEL_SELECT_RED) < 4)
3328      dst.u32[swizzle.a - ISL_CHANNEL_SELECT_RED] = src.u32[3];
3329   if ((unsigned)(swizzle.b - ISL_CHANNEL_SELECT_RED) < 4)
3330      dst.u32[swizzle.b - ISL_CHANNEL_SELECT_RED] = src.u32[2];
3331   if ((unsigned)(swizzle.g - ISL_CHANNEL_SELECT_RED) < 4)
3332      dst.u32[swizzle.g - ISL_CHANNEL_SELECT_RED] = src.u32[1];
3333   if ((unsigned)(swizzle.r - ISL_CHANNEL_SELECT_RED) < 4)
3334      dst.u32[swizzle.r - ISL_CHANNEL_SELECT_RED] = src.u32[0];
3335
3336   return dst;
3337}
3338
3339uint8_t
3340isl_format_get_aux_map_encoding(enum isl_format format)
3341{
3342   switch(format) {
3343   case ISL_FORMAT_R32G32B32A32_FLOAT: return 0x11;
3344   case ISL_FORMAT_R32G32B32X32_FLOAT: return 0x11;
3345   case ISL_FORMAT_R32G32B32A32_SINT: return 0x12;
3346   case ISL_FORMAT_R32G32B32A32_UINT: return 0x13;
3347   case ISL_FORMAT_R16G16B16A16_UNORM: return 0x14;
3348   case ISL_FORMAT_R16G16B16A16_SNORM: return 0x15;
3349   case ISL_FORMAT_R16G16B16A16_SINT: return 0x16;
3350   case ISL_FORMAT_R16G16B16A16_UINT: return 0x17;
3351   case ISL_FORMAT_R16G16B16A16_FLOAT: return 0x10;
3352   case ISL_FORMAT_R16G16B16X16_FLOAT: return 0x10;
3353   case ISL_FORMAT_R32G32_FLOAT: return 0x11;
3354   case ISL_FORMAT_R32G32_SINT: return 0x12;
3355   case ISL_FORMAT_R32G32_UINT: return 0x13;
3356   case ISL_FORMAT_B8G8R8A8_UNORM: return 0xA;
3357   case ISL_FORMAT_B8G8R8X8_UNORM: return 0xA;
3358   case ISL_FORMAT_B8G8R8A8_UNORM_SRGB: return 0xA;
3359   case ISL_FORMAT_B8G8R8X8_UNORM_SRGB: return 0xA;
3360   case ISL_FORMAT_R10G10B10A2_UNORM: return 0x18;
3361   case ISL_FORMAT_R10G10B10A2_UNORM_SRGB: return 0x18;
3362   case ISL_FORMAT_R10G10B10_FLOAT_A2_UNORM: return 0x19;
3363   case ISL_FORMAT_R10G10B10A2_UINT: return 0x1A;
3364   case ISL_FORMAT_R8G8B8A8_UNORM: return 0xA;
3365   case ISL_FORMAT_R8G8B8A8_UNORM_SRGB: return 0xA;
3366   case ISL_FORMAT_R8G8B8A8_SNORM: return 0x1B;
3367   case ISL_FORMAT_R8G8B8A8_SINT: return 0x1C;
3368   case ISL_FORMAT_R8G8B8A8_UINT: return 0x1D;
3369   case ISL_FORMAT_R16G16_UNORM: return 0x14;
3370   case ISL_FORMAT_R16G16_SNORM: return 0x15;
3371   case ISL_FORMAT_R16G16_SINT: return 0x16;
3372   case ISL_FORMAT_R16G16_UINT: return 0x17;
3373   case ISL_FORMAT_R16G16_FLOAT: return 0x10;
3374   case ISL_FORMAT_B10G10R10A2_UNORM: return 0x18;
3375   case ISL_FORMAT_B10G10R10A2_UNORM_SRGB: return 0x18;
3376   case ISL_FORMAT_R11G11B10_FLOAT: return 0x1E;
3377   case ISL_FORMAT_R32_SINT: return 0x12;
3378   case ISL_FORMAT_R32_UINT: return 0x13;
3379   case ISL_FORMAT_R32_FLOAT: return 0x11;
3380   case ISL_FORMAT_R24_UNORM_X8_TYPELESS: return 0x13;
3381   case ISL_FORMAT_B5G6R5_UNORM: return 0xA;
3382   case ISL_FORMAT_B5G6R5_UNORM_SRGB: return 0xA;
3383   case ISL_FORMAT_B5G5R5A1_UNORM: return 0xA;
3384   case ISL_FORMAT_B5G5R5A1_UNORM_SRGB: return 0xA;
3385   case ISL_FORMAT_B4G4R4A4_UNORM: return 0xA;
3386   case ISL_FORMAT_B4G4R4A4_UNORM_SRGB: return 0xA;
3387   case ISL_FORMAT_R8G8_UNORM: return 0xA;
3388   case ISL_FORMAT_R8G8_SNORM: return 0x1B;
3389   case ISL_FORMAT_R8G8_SINT: return 0x1C;
3390   case ISL_FORMAT_R8G8_UINT: return 0x1D;
3391   case ISL_FORMAT_R16_UNORM: return 0x14;
3392   case ISL_FORMAT_R16_SNORM: return 0x15;
3393   case ISL_FORMAT_R16_SINT: return 0x16;
3394   case ISL_FORMAT_R16_UINT: return 0x17;
3395   case ISL_FORMAT_R16_FLOAT: return 0x10;
3396   case ISL_FORMAT_B5G5R5X1_UNORM: return 0xA;
3397   case ISL_FORMAT_B5G5R5X1_UNORM_SRGB: return 0xA;
3398   case ISL_FORMAT_A1B5G5R5_UNORM: return 0xA;
3399   case ISL_FORMAT_A4B4G4R4_UNORM: return 0xA;
3400   case ISL_FORMAT_R8_UNORM: return 0xA;
3401   case ISL_FORMAT_R8_SNORM: return 0x1B;
3402   case ISL_FORMAT_R8_SINT: return 0x1C;
3403   case ISL_FORMAT_R8_UINT: return 0x1D;
3404   case ISL_FORMAT_A8_UNORM: return 0xA;
3405   case ISL_FORMAT_PLANAR_420_8: return 0xF;
3406   case ISL_FORMAT_PLANAR_420_10: return 0x7;
3407   case ISL_FORMAT_PLANAR_420_12: return 0x8;
3408   case ISL_FORMAT_PLANAR_420_16: return 0x8;
3409   case ISL_FORMAT_YCRCB_NORMAL: return 0x3;
3410   case ISL_FORMAT_YCRCB_SWAPY: return 0xB;
3411   default:
3412      unreachable("Unsupported aux-map format!");
3413      return 0;
3414   }
3415}
3416
3417/*
3418 * Returns compression format encoding for Unified Lossless Compression
3419 */
3420uint8_t
3421isl_get_render_compression_format(enum isl_format format)
3422{
3423   /* From the Bspec, Enumeration_RenderCompressionFormat section (53726): */
3424   switch(format) {
3425   case ISL_FORMAT_R32G32B32A32_FLOAT:
3426   case ISL_FORMAT_R32G32B32X32_FLOAT:
3427   case ISL_FORMAT_R32G32B32A32_SINT:
3428      return 0x0;
3429   case ISL_FORMAT_R32G32B32A32_UINT:
3430      return 0x1;
3431   case ISL_FORMAT_R32G32_FLOAT:
3432   case ISL_FORMAT_R32G32_SINT:
3433      return 0x2;
3434   case ISL_FORMAT_R32G32_UINT:
3435      return 0x3;
3436   case ISL_FORMAT_R16G16B16A16_UNORM:
3437   case ISL_FORMAT_R16G16B16X16_UNORM:
3438   case ISL_FORMAT_R16G16B16A16_UINT:
3439      return 0x4;
3440   case ISL_FORMAT_R16G16B16A16_SNORM:
3441   case ISL_FORMAT_R16G16B16A16_SINT:
3442   case ISL_FORMAT_R16G16B16A16_FLOAT:
3443   case ISL_FORMAT_R16G16B16X16_FLOAT:
3444      return 0x5;
3445   case ISL_FORMAT_R16G16_UNORM:
3446   case ISL_FORMAT_R16G16_UINT:
3447      return 0x6;
3448   case ISL_FORMAT_R16G16_SNORM:
3449   case ISL_FORMAT_R16G16_SINT:
3450   case ISL_FORMAT_R16G16_FLOAT:
3451      return 0x7;
3452   case ISL_FORMAT_B8G8R8A8_UNORM:
3453   case ISL_FORMAT_B8G8R8X8_UNORM:
3454   case ISL_FORMAT_B8G8R8A8_UNORM_SRGB:
3455   case ISL_FORMAT_B8G8R8X8_UNORM_SRGB:
3456   case ISL_FORMAT_R8G8B8A8_UNORM:
3457   case ISL_FORMAT_R8G8B8X8_UNORM:
3458   case ISL_FORMAT_R8G8B8A8_UNORM_SRGB:
3459   case ISL_FORMAT_R8G8B8X8_UNORM_SRGB:
3460   case ISL_FORMAT_R8G8B8A8_UINT:
3461      return 0x8;
3462   case ISL_FORMAT_R8G8B8A8_SNORM:
3463   case ISL_FORMAT_R8G8B8A8_SINT:
3464      return 0x9;
3465   case ISL_FORMAT_B5G6R5_UNORM:
3466   case ISL_FORMAT_B5G6R5_UNORM_SRGB:
3467   case ISL_FORMAT_B5G5R5A1_UNORM:
3468   case ISL_FORMAT_B5G5R5A1_UNORM_SRGB:
3469   case ISL_FORMAT_B4G4R4A4_UNORM:
3470   case ISL_FORMAT_B4G4R4A4_UNORM_SRGB:
3471   case ISL_FORMAT_B5G5R5X1_UNORM:
3472   case ISL_FORMAT_B5G5R5X1_UNORM_SRGB:
3473   case ISL_FORMAT_A1B5G5R5_UNORM:
3474   case ISL_FORMAT_A4B4G4R4_UNORM:
3475   case ISL_FORMAT_R8G8_UNORM:
3476   case ISL_FORMAT_R8G8_UINT:
3477      return 0xA;
3478   case ISL_FORMAT_R8G8_SNORM:
3479   case ISL_FORMAT_R8G8_SINT:
3480      return 0xB;
3481   case ISL_FORMAT_R10G10B10A2_UNORM:
3482   case ISL_FORMAT_R10G10B10A2_UNORM_SRGB:
3483   case ISL_FORMAT_R10G10B10_FLOAT_A2_UNORM:
3484   case ISL_FORMAT_R10G10B10A2_UINT:
3485   case ISL_FORMAT_B10G10R10A2_UNORM:
3486   case ISL_FORMAT_B10G10R10X2_UNORM:
3487   case ISL_FORMAT_B10G10R10A2_UNORM_SRGB:
3488      return 0xC;
3489   case ISL_FORMAT_R11G11B10_FLOAT:
3490      return 0xD;
3491   case ISL_FORMAT_R32_SINT:
3492   case ISL_FORMAT_R32_FLOAT:
3493      return 0x10;
3494   case ISL_FORMAT_R32_UINT:
3495   case ISL_FORMAT_R24_UNORM_X8_TYPELESS:
3496      return 0x11;
3497   case ISL_FORMAT_R16_UNORM:
3498   case ISL_FORMAT_R16_UINT:
3499      return 0x14;
3500   case ISL_FORMAT_R16_SNORM:
3501   case ISL_FORMAT_R16_SINT:
3502   case ISL_FORMAT_R16_FLOAT:
3503      return 0x15;
3504   case ISL_FORMAT_R8_UNORM:
3505   case ISL_FORMAT_R8_UINT:
3506   case ISL_FORMAT_A8_UNORM:
3507      return 0x18;
3508   case ISL_FORMAT_R8_SNORM:
3509   case ISL_FORMAT_R8_SINT:
3510      return 0x19;
3511   default:
3512      unreachable("Unsupported render compression format!");
3513      return 0;
3514   }
3515}
3516