1/*
2 * Copyright 2015 Intel Corporation
3 *
4 *  Permission is hereby granted, free of charge, to any person obtaining a
5 *  copy of this software and associated documentation files (the "Software"),
6 *  to deal in the Software without restriction, including without limitation
7 *  the rights to use, copy, modify, merge, publish, distribute, sublicense,
8 *  and/or sell copies of the Software, and to permit persons to whom the
9 *  Software is furnished to do so, subject to the following conditions:
10 *
11 *  The above copyright notice and this permission notice (including the next
12 *  paragraph) shall be included in all copies or substantial portions of the
13 *  Software.
14 *
15 *  THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
16 *  IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
17 *  FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT.  IN NO EVENT SHALL
18 *  THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
19 *  LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING
20 *  FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS
21 *  IN THE SOFTWARE.
22 */
23
24#include <assert.h>
25#include <stdarg.h>
26#include <stdio.h>
27
28#include "genxml/genX_bits.h"
29
30#include "isl.h"
31#include "isl_gen4.h"
32#include "isl_gen6.h"
33#include "isl_gen7.h"
34#include "isl_gen8.h"
35#include "isl_gen9.h"
36#include "isl_priv.h"
37
38void
39isl_memcpy_linear_to_tiled(uint32_t xt1, uint32_t xt2,
40                           uint32_t yt1, uint32_t yt2,
41                           char *dst, const char *src,
42                           uint32_t dst_pitch, int32_t src_pitch,
43                           bool has_swizzling,
44                           enum isl_tiling tiling,
45                           isl_memcpy_type copy_type)
46{
47#ifdef USE_SSE41
48   if (copy_type == ISL_MEMCPY_STREAMING_LOAD) {
49      _isl_memcpy_linear_to_tiled_sse41(
50         xt1, xt2, yt1, yt2, dst, src, dst_pitch, src_pitch, has_swizzling,
51         tiling, copy_type);
52      return;
53   }
54#endif
55
56   _isl_memcpy_linear_to_tiled(
57      xt1, xt2, yt1, yt2, dst, src, dst_pitch, src_pitch, has_swizzling,
58      tiling, copy_type);
59}
60
61void
62isl_memcpy_tiled_to_linear(uint32_t xt1, uint32_t xt2,
63                           uint32_t yt1, uint32_t yt2,
64                           char *dst, const char *src,
65                           int32_t dst_pitch, uint32_t src_pitch,
66                           bool has_swizzling,
67                           enum isl_tiling tiling,
68                           isl_memcpy_type copy_type)
69{
70#ifdef USE_SSE41
71   if (copy_type == ISL_MEMCPY_STREAMING_LOAD) {
72      _isl_memcpy_tiled_to_linear_sse41(
73         xt1, xt2, yt1, yt2, dst, src, dst_pitch, src_pitch, has_swizzling,
74         tiling, copy_type);
75      return;
76   }
77#endif
78
79   _isl_memcpy_tiled_to_linear(
80      xt1, xt2, yt1, yt2, dst, src, dst_pitch, src_pitch, has_swizzling,
81      tiling, copy_type);
82}
83
84void PRINTFLIKE(3, 4) UNUSED
85__isl_finishme(const char *file, int line, const char *fmt, ...)
86{
87   va_list ap;
88   char buf[512];
89
90   va_start(ap, fmt);
91   vsnprintf(buf, sizeof(buf), fmt, ap);
92   va_end(ap);
93
94   fprintf(stderr, "%s:%d: FINISHME: %s\n", file, line, buf);
95}
96
97void
98isl_device_init(struct isl_device *dev,
99                const struct gen_device_info *info,
100                bool has_bit6_swizzling)
101{
102   /* Gen8+ don't have bit6 swizzling, ensure callsite is not confused. */
103   assert(!(has_bit6_swizzling && info->gen >= 8));
104
105   dev->info = info;
106   dev->use_separate_stencil = ISL_DEV_GEN(dev) >= 6;
107   dev->has_bit6_swizzling = has_bit6_swizzling;
108
109   /* The ISL_DEV macros may be defined in the CFLAGS, thus hardcoding some
110    * device properties at buildtime. Verify that the macros with the device
111    * properties chosen during runtime.
112    */
113   ISL_DEV_GEN_SANITIZE(dev);
114   ISL_DEV_USE_SEPARATE_STENCIL_SANITIZE(dev);
115
116   /* Did we break hiz or stencil? */
117   if (ISL_DEV_USE_SEPARATE_STENCIL(dev))
118      assert(info->has_hiz_and_separate_stencil);
119   if (info->must_use_separate_stencil)
120      assert(ISL_DEV_USE_SEPARATE_STENCIL(dev));
121
122   dev->ss.size = RENDER_SURFACE_STATE_length(info) * 4;
123   dev->ss.align = isl_align(dev->ss.size, 32);
124
125   dev->ss.clear_color_state_size =
126      isl_align(CLEAR_COLOR_length(info) * 4, 64);
127   dev->ss.clear_color_state_offset =
128      RENDER_SURFACE_STATE_ClearValueAddress_start(info) / 32 * 4;
129
130   dev->ss.clear_value_size =
131      isl_align(RENDER_SURFACE_STATE_RedClearColor_bits(info) +
132                RENDER_SURFACE_STATE_GreenClearColor_bits(info) +
133                RENDER_SURFACE_STATE_BlueClearColor_bits(info) +
134                RENDER_SURFACE_STATE_AlphaClearColor_bits(info), 32) / 8;
135
136   dev->ss.clear_value_offset =
137      RENDER_SURFACE_STATE_RedClearColor_start(info) / 32 * 4;
138
139   assert(RENDER_SURFACE_STATE_SurfaceBaseAddress_start(info) % 8 == 0);
140   dev->ss.addr_offset =
141      RENDER_SURFACE_STATE_SurfaceBaseAddress_start(info) / 8;
142
143   /* The "Auxiliary Surface Base Address" field starts a bit higher up
144    * because the bottom 12 bits are used for other things.  Round down to
145    * the nearest dword before.
146    */
147   dev->ss.aux_addr_offset =
148      (RENDER_SURFACE_STATE_AuxiliarySurfaceBaseAddress_start(info) & ~31) / 8;
149
150   dev->ds.size = _3DSTATE_DEPTH_BUFFER_length(info) * 4;
151   assert(_3DSTATE_DEPTH_BUFFER_SurfaceBaseAddress_start(info) % 8 == 0);
152   dev->ds.depth_offset =
153      _3DSTATE_DEPTH_BUFFER_SurfaceBaseAddress_start(info) / 8;
154
155   if (dev->use_separate_stencil) {
156      dev->ds.size += _3DSTATE_STENCIL_BUFFER_length(info) * 4 +
157                      _3DSTATE_HIER_DEPTH_BUFFER_length(info) * 4 +
158                      _3DSTATE_CLEAR_PARAMS_length(info) * 4;
159
160      assert(_3DSTATE_STENCIL_BUFFER_SurfaceBaseAddress_start(info) % 8 == 0);
161      dev->ds.stencil_offset =
162         _3DSTATE_DEPTH_BUFFER_length(info) * 4 +
163         _3DSTATE_STENCIL_BUFFER_SurfaceBaseAddress_start(info) / 8;
164
165      assert(_3DSTATE_HIER_DEPTH_BUFFER_SurfaceBaseAddress_start(info) % 8 == 0);
166      dev->ds.hiz_offset =
167         _3DSTATE_DEPTH_BUFFER_length(info) * 4 +
168         _3DSTATE_STENCIL_BUFFER_length(info) * 4 +
169         _3DSTATE_HIER_DEPTH_BUFFER_SurfaceBaseAddress_start(info) / 8;
170   } else {
171      dev->ds.stencil_offset = 0;
172      dev->ds.hiz_offset = 0;
173   }
174}
175
176/**
177 * @brief Query the set of multisamples supported by the device.
178 *
179 * This function always returns non-zero, as ISL_SAMPLE_COUNT_1_BIT is always
180 * supported.
181 */
182isl_sample_count_mask_t ATTRIBUTE_CONST
183isl_device_get_sample_counts(struct isl_device *dev)
184{
185   if (ISL_DEV_GEN(dev) >= 9) {
186      return ISL_SAMPLE_COUNT_1_BIT |
187             ISL_SAMPLE_COUNT_2_BIT |
188             ISL_SAMPLE_COUNT_4_BIT |
189             ISL_SAMPLE_COUNT_8_BIT |
190             ISL_SAMPLE_COUNT_16_BIT;
191   } else if (ISL_DEV_GEN(dev) >= 8) {
192      return ISL_SAMPLE_COUNT_1_BIT |
193             ISL_SAMPLE_COUNT_2_BIT |
194             ISL_SAMPLE_COUNT_4_BIT |
195             ISL_SAMPLE_COUNT_8_BIT;
196   } else if (ISL_DEV_GEN(dev) >= 7) {
197      return ISL_SAMPLE_COUNT_1_BIT |
198             ISL_SAMPLE_COUNT_4_BIT |
199             ISL_SAMPLE_COUNT_8_BIT;
200   } else if (ISL_DEV_GEN(dev) >= 6) {
201      return ISL_SAMPLE_COUNT_1_BIT |
202             ISL_SAMPLE_COUNT_4_BIT;
203   } else {
204      return ISL_SAMPLE_COUNT_1_BIT;
205   }
206}
207
208/**
209 * @param[out] info is written only on success
210 */
211static void
212isl_tiling_get_info(enum isl_tiling tiling,
213                    uint32_t format_bpb,
214                    struct isl_tile_info *tile_info)
215{
216   const uint32_t bs = format_bpb / 8;
217   struct isl_extent2d logical_el, phys_B;
218
219   if (tiling != ISL_TILING_LINEAR && !isl_is_pow2(format_bpb)) {
220      /* It is possible to have non-power-of-two formats in a tiled buffer.
221       * The easiest way to handle this is to treat the tile as if it is three
222       * times as wide.  This way no pixel will ever cross a tile boundary.
223       * This really only works on legacy X and Y tiling formats.
224       */
225      assert(tiling == ISL_TILING_X || tiling == ISL_TILING_Y0);
226      assert(bs % 3 == 0 && isl_is_pow2(format_bpb / 3));
227      isl_tiling_get_info(tiling, format_bpb / 3, tile_info);
228      return;
229   }
230
231   switch (tiling) {
232   case ISL_TILING_LINEAR:
233      assert(bs > 0);
234      logical_el = isl_extent2d(1, 1);
235      phys_B = isl_extent2d(bs, 1);
236      break;
237
238   case ISL_TILING_X:
239      assert(bs > 0);
240      logical_el = isl_extent2d(512 / bs, 8);
241      phys_B = isl_extent2d(512, 8);
242      break;
243
244   case ISL_TILING_Y0:
245      assert(bs > 0);
246      logical_el = isl_extent2d(128 / bs, 32);
247      phys_B = isl_extent2d(128, 32);
248      break;
249
250   case ISL_TILING_W:
251      assert(bs == 1);
252      logical_el = isl_extent2d(64, 64);
253      /* From the Broadwell PRM Vol 2d, RENDER_SURFACE_STATE::SurfacePitch:
254       *
255       *    "If the surface is a stencil buffer (and thus has Tile Mode set
256       *    to TILEMODE_WMAJOR), the pitch must be set to 2x the value
257       *    computed based on width, as the stencil buffer is stored with two
258       *    rows interleaved."
259       *
260       * This, together with the fact that stencil buffers are referred to as
261       * being Y-tiled in the PRMs for older hardware implies that the
262       * physical size of a W-tile is actually the same as for a Y-tile.
263       */
264      phys_B = isl_extent2d(128, 32);
265      break;
266
267   case ISL_TILING_Yf:
268   case ISL_TILING_Ys: {
269      bool is_Ys = tiling == ISL_TILING_Ys;
270
271      assert(bs > 0);
272      unsigned width = 1 << (6 + (ffs(bs) / 2) + (2 * is_Ys));
273      unsigned height = 1 << (6 - (ffs(bs) / 2) + (2 * is_Ys));
274
275      logical_el = isl_extent2d(width / bs, height);
276      phys_B = isl_extent2d(width, height);
277      break;
278   }
279
280   case ISL_TILING_HIZ:
281      /* HiZ buffers are required to have ISL_FORMAT_HIZ which is an 8x4
282       * 128bpb format.  The tiling has the same physical dimensions as
283       * Y-tiling but actually has two HiZ columns per Y-tiled column.
284       */
285      assert(bs == 16);
286      logical_el = isl_extent2d(16, 16);
287      phys_B = isl_extent2d(128, 32);
288      break;
289
290   case ISL_TILING_CCS:
291      /* CCS surfaces are required to have one of the GENX_CCS_* formats which
292       * have a block size of 1 or 2 bits per block and each CCS element
293       * corresponds to one cache-line pair in the main surface.  From the Sky
294       * Lake PRM Vol. 12 in the section on planes:
295       *
296       *    "The Color Control Surface (CCS) contains the compression status
297       *    of the cache-line pairs. The compression state of the cache-line
298       *    pair is specified by 2 bits in the CCS.  Each CCS cache-line
299       *    represents an area on the main surface of 16x16 sets of 128 byte
300       *    Y-tiled cache-line-pairs. CCS is always Y tiled."
301       *
302       * The CCS being Y-tiled implies that it's an 8x8 grid of cache-lines.
303       * Since each cache line corresponds to a 16x16 set of cache-line pairs,
304       * that yields total tile area of 128x128 cache-line pairs or CCS
305       * elements.  On older hardware, each CCS element is 1 bit and the tile
306       * is 128x256 elements.
307       */
308      assert(format_bpb == 1 || format_bpb == 2);
309      logical_el = isl_extent2d(128, 256 / format_bpb);
310      phys_B = isl_extent2d(128, 32);
311      break;
312
313   default:
314      unreachable("not reached");
315   } /* end switch */
316
317   *tile_info = (struct isl_tile_info) {
318      .tiling = tiling,
319      .format_bpb = format_bpb,
320      .logical_extent_el = logical_el,
321      .phys_extent_B = phys_B,
322   };
323}
324
325bool
326isl_color_value_is_zero(union isl_color_value value,
327                        enum isl_format format)
328{
329   const struct isl_format_layout *fmtl = isl_format_get_layout(format);
330
331#define RETURN_FALSE_IF_NOT_0(c, i) \
332   if (fmtl->channels.c.bits && value.u32[i] != 0) \
333      return false
334
335   RETURN_FALSE_IF_NOT_0(r, 0);
336   RETURN_FALSE_IF_NOT_0(g, 1);
337   RETURN_FALSE_IF_NOT_0(b, 2);
338   RETURN_FALSE_IF_NOT_0(a, 3);
339
340#undef RETURN_FALSE_IF_NOT_0
341
342   return true;
343}
344
345bool
346isl_color_value_is_zero_one(union isl_color_value value,
347                            enum isl_format format)
348{
349   const struct isl_format_layout *fmtl = isl_format_get_layout(format);
350
351#define RETURN_FALSE_IF_NOT_0_1(c, i, field) \
352   if (fmtl->channels.c.bits && value.field[i] != 0 && value.field[i] != 1) \
353      return false
354
355   if (isl_format_has_int_channel(format)) {
356      RETURN_FALSE_IF_NOT_0_1(r, 0, u32);
357      RETURN_FALSE_IF_NOT_0_1(g, 1, u32);
358      RETURN_FALSE_IF_NOT_0_1(b, 2, u32);
359      RETURN_FALSE_IF_NOT_0_1(a, 3, u32);
360   } else {
361      RETURN_FALSE_IF_NOT_0_1(r, 0, f32);
362      RETURN_FALSE_IF_NOT_0_1(g, 1, f32);
363      RETURN_FALSE_IF_NOT_0_1(b, 2, f32);
364      RETURN_FALSE_IF_NOT_0_1(a, 3, f32);
365   }
366
367#undef RETURN_FALSE_IF_NOT_0_1
368
369   return true;
370}
371
372/**
373 * @param[out] tiling is set only on success
374 */
375static bool
376isl_surf_choose_tiling(const struct isl_device *dev,
377                       const struct isl_surf_init_info *restrict info,
378                       enum isl_tiling *tiling)
379{
380   isl_tiling_flags_t tiling_flags = info->tiling_flags;
381
382   /* HiZ surfaces always use the HiZ tiling */
383   if (info->usage & ISL_SURF_USAGE_HIZ_BIT) {
384      assert(info->format == ISL_FORMAT_HIZ);
385      assert(tiling_flags == ISL_TILING_HIZ_BIT);
386      *tiling = ISL_TILING_HIZ;
387      return true;
388   }
389
390   /* CCS surfaces always use the CCS tiling */
391   if (info->usage & ISL_SURF_USAGE_CCS_BIT) {
392      assert(isl_format_get_layout(info->format)->txc == ISL_TXC_CCS);
393      assert(tiling_flags == ISL_TILING_CCS_BIT);
394      *tiling = ISL_TILING_CCS;
395      return true;
396   }
397
398   if (ISL_DEV_GEN(dev) >= 6) {
399      isl_gen6_filter_tiling(dev, info, &tiling_flags);
400   } else {
401      isl_gen4_filter_tiling(dev, info, &tiling_flags);
402   }
403
404   #define CHOOSE(__tiling) \
405      do { \
406         if (tiling_flags & (1u << (__tiling))) { \
407            *tiling = (__tiling); \
408            return true; \
409          } \
410      } while (0)
411
412   /* Of the tiling modes remaining, choose the one that offers the best
413    * performance.
414    */
415
416   if (info->dim == ISL_SURF_DIM_1D) {
417      /* Prefer linear for 1D surfaces because they do not benefit from
418       * tiling. To the contrary, tiling leads to wasted memory and poor
419       * memory locality due to the swizzling and alignment restrictions
420       * required in tiled surfaces.
421       */
422      CHOOSE(ISL_TILING_LINEAR);
423   }
424
425   CHOOSE(ISL_TILING_Ys);
426   CHOOSE(ISL_TILING_Yf);
427   CHOOSE(ISL_TILING_Y0);
428   CHOOSE(ISL_TILING_X);
429   CHOOSE(ISL_TILING_W);
430   CHOOSE(ISL_TILING_LINEAR);
431
432   #undef CHOOSE
433
434   /* No tiling mode accomodates the inputs. */
435   return false;
436}
437
438static bool
439isl_choose_msaa_layout(const struct isl_device *dev,
440                 const struct isl_surf_init_info *info,
441                 enum isl_tiling tiling,
442                 enum isl_msaa_layout *msaa_layout)
443{
444   if (ISL_DEV_GEN(dev) >= 8) {
445      return isl_gen8_choose_msaa_layout(dev, info, tiling, msaa_layout);
446   } else if (ISL_DEV_GEN(dev) >= 7) {
447      return isl_gen7_choose_msaa_layout(dev, info, tiling, msaa_layout);
448   } else if (ISL_DEV_GEN(dev) >= 6) {
449      return isl_gen6_choose_msaa_layout(dev, info, tiling, msaa_layout);
450   } else {
451      return isl_gen4_choose_msaa_layout(dev, info, tiling, msaa_layout);
452   }
453}
454
455struct isl_extent2d
456isl_get_interleaved_msaa_px_size_sa(uint32_t samples)
457{
458   assert(isl_is_pow2(samples));
459
460   /* From the Broadwell PRM >> Volume 5: Memory Views >> Computing Mip Level
461    * Sizes (p133):
462    *
463    *    If the surface is multisampled and it is a depth or stencil surface
464    *    or Multisampled Surface StorageFormat in SURFACE_STATE is
465    *    MSFMT_DEPTH_STENCIL, W_L and H_L must be adjusted as follows before
466    *    proceeding: [...]
467    */
468   return (struct isl_extent2d) {
469      .width = 1 << ((ffs(samples) - 0) / 2),
470      .height = 1 << ((ffs(samples) - 1) / 2),
471   };
472}
473
474static void
475isl_msaa_interleaved_scale_px_to_sa(uint32_t samples,
476                                    uint32_t *width, uint32_t *height)
477{
478   const struct isl_extent2d px_size_sa =
479      isl_get_interleaved_msaa_px_size_sa(samples);
480
481   if (width)
482      *width = isl_align(*width, 2) * px_size_sa.width;
483   if (height)
484      *height = isl_align(*height, 2) * px_size_sa.height;
485}
486
487static enum isl_array_pitch_span
488isl_choose_array_pitch_span(const struct isl_device *dev,
489                            const struct isl_surf_init_info *restrict info,
490                            enum isl_dim_layout dim_layout,
491                            const struct isl_extent4d *phys_level0_sa)
492{
493   switch (dim_layout) {
494   case ISL_DIM_LAYOUT_GEN9_1D:
495   case ISL_DIM_LAYOUT_GEN4_2D:
496      if (ISL_DEV_GEN(dev) >= 8) {
497         /* QPitch becomes programmable in Broadwell. So choose the
498          * most compact QPitch possible in order to conserve memory.
499          *
500          * From the Broadwell PRM >> Volume 2d: Command Reference: Structures
501          * >> RENDER_SURFACE_STATE Surface QPitch (p325):
502          *
503          *    - Software must ensure that this field is set to a value
504          *      sufficiently large such that the array slices in the surface
505          *      do not overlap. Refer to the Memory Data Formats section for
506          *      information on how surfaces are stored in memory.
507          *
508          *    - This field specifies the distance in rows between array
509          *      slices.  It is used only in the following cases:
510          *
511          *          - Surface Array is enabled OR
512          *          - Number of Mulitsamples is not NUMSAMPLES_1 and
513          *            Multisampled Surface Storage Format set to MSFMT_MSS OR
514          *          - Surface Type is SURFTYPE_CUBE
515          */
516         return ISL_ARRAY_PITCH_SPAN_COMPACT;
517      } else if (ISL_DEV_GEN(dev) >= 7) {
518         /* Note that Ivybridge introduces
519          * RENDER_SURFACE_STATE.SurfaceArraySpacing, which provides the
520          * driver more control over the QPitch.
521          */
522
523         if (phys_level0_sa->array_len == 1) {
524            /* The hardware will never use the QPitch. So choose the most
525             * compact QPitch possible in order to conserve memory.
526             */
527            return ISL_ARRAY_PITCH_SPAN_COMPACT;
528         }
529
530         if (isl_surf_usage_is_depth_or_stencil(info->usage) ||
531             (info->usage & ISL_SURF_USAGE_HIZ_BIT)) {
532            /* From the Ivybridge PRM >> Volume 1 Part 1: Graphics Core >>
533             * Section 6.18.4.7: Surface Arrays (p112):
534             *
535             *    If Surface Array Spacing is set to ARYSPC_FULL (note that
536             *    the depth buffer and stencil buffer have an implied value of
537             *    ARYSPC_FULL):
538             */
539            return ISL_ARRAY_PITCH_SPAN_FULL;
540         }
541
542         if (info->levels == 1) {
543            /* We are able to set RENDER_SURFACE_STATE.SurfaceArraySpacing
544             * to ARYSPC_LOD0.
545             */
546            return ISL_ARRAY_PITCH_SPAN_COMPACT;
547         }
548
549         return ISL_ARRAY_PITCH_SPAN_FULL;
550      } else if ((ISL_DEV_GEN(dev) == 5 || ISL_DEV_GEN(dev) == 6) &&
551                 ISL_DEV_USE_SEPARATE_STENCIL(dev) &&
552                 isl_surf_usage_is_stencil(info->usage)) {
553         /* [ILK-SNB] Errata from the Sandy Bridge PRM >> Volume 4 Part 1:
554          * Graphics Core >> Section 7.18.3.7: Surface Arrays:
555          *
556          *    The separate stencil buffer does not support mip mapping, thus
557          *    the storage for LODs other than LOD 0 is not needed.
558          */
559         assert(info->levels == 1);
560         return ISL_ARRAY_PITCH_SPAN_COMPACT;
561      } else {
562         if ((ISL_DEV_GEN(dev) == 5 || ISL_DEV_GEN(dev) == 6) &&
563             ISL_DEV_USE_SEPARATE_STENCIL(dev) &&
564             isl_surf_usage_is_stencil(info->usage)) {
565            /* [ILK-SNB] Errata from the Sandy Bridge PRM >> Volume 4 Part 1:
566             * Graphics Core >> Section 7.18.3.7: Surface Arrays:
567             *
568             *    The separate stencil buffer does not support mip mapping,
569             *    thus the storage for LODs other than LOD 0 is not needed.
570             */
571            assert(info->levels == 1);
572            assert(phys_level0_sa->array_len == 1);
573            return ISL_ARRAY_PITCH_SPAN_COMPACT;
574         }
575
576         if (phys_level0_sa->array_len == 1) {
577            /* The hardware will never use the QPitch. So choose the most
578             * compact QPitch possible in order to conserve memory.
579             */
580            return ISL_ARRAY_PITCH_SPAN_COMPACT;
581         }
582
583         return ISL_ARRAY_PITCH_SPAN_FULL;
584      }
585
586   case ISL_DIM_LAYOUT_GEN4_3D:
587      /* The hardware will never use the QPitch. So choose the most
588       * compact QPitch possible in order to conserve memory.
589       */
590      return ISL_ARRAY_PITCH_SPAN_COMPACT;
591
592   case ISL_DIM_LAYOUT_GEN6_STENCIL_HIZ:
593      /* Each array image in the gen6 stencil of HiZ surface is compact in the
594       * sense that every LOD is a compact array of the same size as LOD0.
595       */
596      return ISL_ARRAY_PITCH_SPAN_COMPACT;
597   }
598
599   unreachable("bad isl_dim_layout");
600   return ISL_ARRAY_PITCH_SPAN_FULL;
601}
602
603static void
604isl_choose_image_alignment_el(const struct isl_device *dev,
605                              const struct isl_surf_init_info *restrict info,
606                              enum isl_tiling tiling,
607                              enum isl_dim_layout dim_layout,
608                              enum isl_msaa_layout msaa_layout,
609                              struct isl_extent3d *image_align_el)
610{
611   const struct isl_format_layout *fmtl = isl_format_get_layout(info->format);
612   if (fmtl->txc == ISL_TXC_MCS) {
613      assert(tiling == ISL_TILING_Y0);
614
615      /*
616       * IvyBrigde PRM Vol 2, Part 1, "11.7 MCS Buffer for Render Target(s)":
617       *
618       * Height, width, and layout of MCS buffer in this case must match with
619       * Render Target height, width, and layout. MCS buffer is tiledY.
620       *
621       * To avoid wasting memory, choose the smallest alignment possible:
622       * HALIGN_4 and VALIGN_4.
623       */
624      *image_align_el = isl_extent3d(4, 4, 1);
625      return;
626   } else if (info->format == ISL_FORMAT_HIZ) {
627      assert(ISL_DEV_GEN(dev) >= 6);
628      if (ISL_DEV_GEN(dev) == 6) {
629         /* HiZ surfaces on Sandy Bridge are packed tightly. */
630         *image_align_el = isl_extent3d(1, 1, 1);
631      } else {
632         /* On gen7+, HiZ surfaces are always aligned to 16x8 pixels in the
633          * primary surface which works out to 2x2 HiZ elments.
634          */
635         *image_align_el = isl_extent3d(2, 2, 1);
636      }
637      return;
638   }
639
640   if (ISL_DEV_GEN(dev) >= 9) {
641      isl_gen9_choose_image_alignment_el(dev, info, tiling, dim_layout,
642                                         msaa_layout, image_align_el);
643   } else if (ISL_DEV_GEN(dev) >= 8) {
644      isl_gen8_choose_image_alignment_el(dev, info, tiling, dim_layout,
645                                         msaa_layout, image_align_el);
646   } else if (ISL_DEV_GEN(dev) >= 7) {
647      isl_gen7_choose_image_alignment_el(dev, info, tiling, dim_layout,
648                                          msaa_layout, image_align_el);
649   } else if (ISL_DEV_GEN(dev) >= 6) {
650      isl_gen6_choose_image_alignment_el(dev, info, tiling, dim_layout,
651                                         msaa_layout, image_align_el);
652   } else {
653      isl_gen4_choose_image_alignment_el(dev, info, tiling, dim_layout,
654                                         msaa_layout, image_align_el);
655   }
656}
657
658static enum isl_dim_layout
659isl_surf_choose_dim_layout(const struct isl_device *dev,
660                           enum isl_surf_dim logical_dim,
661                           enum isl_tiling tiling,
662                           isl_surf_usage_flags_t usage)
663{
664   /* Sandy bridge needs a special layout for HiZ and stencil. */
665   if (ISL_DEV_GEN(dev) == 6 &&
666       (tiling == ISL_TILING_W || tiling == ISL_TILING_HIZ))
667      return ISL_DIM_LAYOUT_GEN6_STENCIL_HIZ;
668
669   if (ISL_DEV_GEN(dev) >= 9) {
670      switch (logical_dim) {
671      case ISL_SURF_DIM_1D:
672         /* From the Sky Lake PRM Vol. 5, "1D Surfaces":
673          *
674          *    One-dimensional surfaces use a tiling mode of linear.
675          *    Technically, they are not tiled resources, but the Tiled
676          *    Resource Mode field in RENDER_SURFACE_STATE is still used to
677          *    indicate the alignment requirements for this linear surface
678          *    (See 1D Alignment requirements for how 4K and 64KB Tiled
679          *    Resource Modes impact alignment). Alternatively, a 1D surface
680          *    can be defined as a 2D tiled surface (e.g. TileY or TileX) with
681          *    a height of 0.
682          *
683          * In other words, ISL_DIM_LAYOUT_GEN9_1D is only used for linear
684          * surfaces and, for tiled surfaces, ISL_DIM_LAYOUT_GEN4_2D is used.
685          */
686         if (tiling == ISL_TILING_LINEAR)
687            return ISL_DIM_LAYOUT_GEN9_1D;
688         else
689            return ISL_DIM_LAYOUT_GEN4_2D;
690      case ISL_SURF_DIM_2D:
691      case ISL_SURF_DIM_3D:
692         return ISL_DIM_LAYOUT_GEN4_2D;
693      }
694   } else {
695      switch (logical_dim) {
696      case ISL_SURF_DIM_1D:
697      case ISL_SURF_DIM_2D:
698         /* From the G45 PRM Vol. 1a, "6.17.4.1 Hardware Cube Map Layout":
699          *
700          * The cube face textures are stored in the same way as 3D surfaces
701          * are stored (see section 6.17.5 for details).  For cube surfaces,
702          * however, the depth is equal to the number of faces (always 6) and
703          * is not reduced for each MIP.
704          */
705         if (ISL_DEV_GEN(dev) == 4 && (usage & ISL_SURF_USAGE_CUBE_BIT))
706            return ISL_DIM_LAYOUT_GEN4_3D;
707
708         return ISL_DIM_LAYOUT_GEN4_2D;
709      case ISL_SURF_DIM_3D:
710         return ISL_DIM_LAYOUT_GEN4_3D;
711      }
712   }
713
714   unreachable("bad isl_surf_dim");
715   return ISL_DIM_LAYOUT_GEN4_2D;
716}
717
718/**
719 * Calculate the physical extent of the surface's first level, in units of
720 * surface samples.
721 */
722static void
723isl_calc_phys_level0_extent_sa(const struct isl_device *dev,
724                               const struct isl_surf_init_info *restrict info,
725                               enum isl_dim_layout dim_layout,
726                               enum isl_tiling tiling,
727                               enum isl_msaa_layout msaa_layout,
728                               struct isl_extent4d *phys_level0_sa)
729{
730   const struct isl_format_layout *fmtl = isl_format_get_layout(info->format);
731
732   if (isl_format_is_yuv(info->format))
733      isl_finishme("%s:%s: YUV format", __FILE__, __func__);
734
735   switch (info->dim) {
736   case ISL_SURF_DIM_1D:
737      assert(info->height == 1);
738      assert(info->depth == 1);
739      assert(info->samples == 1);
740
741      switch (dim_layout) {
742      case ISL_DIM_LAYOUT_GEN4_3D:
743         unreachable("bad isl_dim_layout");
744
745      case ISL_DIM_LAYOUT_GEN9_1D:
746      case ISL_DIM_LAYOUT_GEN4_2D:
747      case ISL_DIM_LAYOUT_GEN6_STENCIL_HIZ:
748         *phys_level0_sa = (struct isl_extent4d) {
749            .w = info->width,
750            .h = 1,
751            .d = 1,
752            .a = info->array_len,
753         };
754         break;
755      }
756      break;
757
758   case ISL_SURF_DIM_2D:
759      if (ISL_DEV_GEN(dev) == 4 && (info->usage & ISL_SURF_USAGE_CUBE_BIT))
760         assert(dim_layout == ISL_DIM_LAYOUT_GEN4_3D);
761      else
762         assert(dim_layout == ISL_DIM_LAYOUT_GEN4_2D ||
763                dim_layout == ISL_DIM_LAYOUT_GEN6_STENCIL_HIZ);
764
765      if (tiling == ISL_TILING_Ys && info->samples > 1)
766         isl_finishme("%s:%s: multisample TileYs layout", __FILE__, __func__);
767
768      switch (msaa_layout) {
769      case ISL_MSAA_LAYOUT_NONE:
770         assert(info->depth == 1);
771         assert(info->samples == 1);
772
773         *phys_level0_sa = (struct isl_extent4d) {
774            .w = info->width,
775            .h = info->height,
776            .d = 1,
777            .a = info->array_len,
778         };
779         break;
780
781      case ISL_MSAA_LAYOUT_ARRAY:
782         assert(info->depth == 1);
783         assert(info->levels == 1);
784         assert(isl_format_supports_multisampling(dev->info, info->format));
785         assert(fmtl->bw == 1 && fmtl->bh == 1);
786
787         *phys_level0_sa = (struct isl_extent4d) {
788            .w = info->width,
789            .h = info->height,
790            .d = 1,
791            .a = info->array_len * info->samples,
792         };
793         break;
794
795      case ISL_MSAA_LAYOUT_INTERLEAVED:
796         assert(info->depth == 1);
797         assert(info->levels == 1);
798         assert(isl_format_supports_multisampling(dev->info, info->format));
799
800         *phys_level0_sa = (struct isl_extent4d) {
801            .w = info->width,
802            .h = info->height,
803            .d = 1,
804            .a = info->array_len,
805         };
806
807         isl_msaa_interleaved_scale_px_to_sa(info->samples,
808                                             &phys_level0_sa->w,
809                                             &phys_level0_sa->h);
810         break;
811      }
812      break;
813
814   case ISL_SURF_DIM_3D:
815      assert(info->array_len == 1);
816      assert(info->samples == 1);
817
818      if (fmtl->bd > 1) {
819         isl_finishme("%s:%s: compression block with depth > 1",
820                      __FILE__, __func__);
821      }
822
823      switch (dim_layout) {
824      case ISL_DIM_LAYOUT_GEN9_1D:
825      case ISL_DIM_LAYOUT_GEN6_STENCIL_HIZ:
826         unreachable("bad isl_dim_layout");
827
828      case ISL_DIM_LAYOUT_GEN4_2D:
829         assert(ISL_DEV_GEN(dev) >= 9);
830
831         *phys_level0_sa = (struct isl_extent4d) {
832            .w = info->width,
833            .h = info->height,
834            .d = 1,
835            .a = info->depth,
836         };
837         break;
838
839      case ISL_DIM_LAYOUT_GEN4_3D:
840         assert(ISL_DEV_GEN(dev) < 9);
841         *phys_level0_sa = (struct isl_extent4d) {
842            .w = info->width,
843            .h = info->height,
844            .d = info->depth,
845            .a = 1,
846         };
847         break;
848      }
849      break;
850   }
851}
852
853/**
854 * Calculate the pitch between physical array slices, in units of rows of
855 * surface elements.
856 */
857static uint32_t
858isl_calc_array_pitch_el_rows_gen4_2d(
859      const struct isl_device *dev,
860      const struct isl_surf_init_info *restrict info,
861      const struct isl_tile_info *tile_info,
862      const struct isl_extent3d *image_align_sa,
863      const struct isl_extent4d *phys_level0_sa,
864      enum isl_array_pitch_span array_pitch_span,
865      const struct isl_extent2d *phys_slice0_sa)
866{
867   const struct isl_format_layout *fmtl = isl_format_get_layout(info->format);
868   uint32_t pitch_sa_rows = 0;
869
870   switch (array_pitch_span) {
871   case ISL_ARRAY_PITCH_SPAN_COMPACT:
872      pitch_sa_rows = isl_align_npot(phys_slice0_sa->h, image_align_sa->h);
873      break;
874   case ISL_ARRAY_PITCH_SPAN_FULL: {
875      /* The QPitch equation is found in the Broadwell PRM >> Volume 5:
876       * Memory Views >> Common Surface Formats >> Surface Layout >> 2D
877       * Surfaces >> Surface Arrays.
878       */
879      uint32_t H0_sa = phys_level0_sa->h;
880      uint32_t H1_sa = isl_minify(H0_sa, 1);
881
882      uint32_t h0_sa = isl_align_npot(H0_sa, image_align_sa->h);
883      uint32_t h1_sa = isl_align_npot(H1_sa, image_align_sa->h);
884
885      uint32_t m;
886      if (ISL_DEV_GEN(dev) >= 7) {
887         /* The QPitch equation changed slightly in Ivybridge. */
888         m = 12;
889      } else {
890         m = 11;
891      }
892
893      pitch_sa_rows = h0_sa + h1_sa + (m * image_align_sa->h);
894
895      if (ISL_DEV_GEN(dev) == 6 && info->samples > 1 &&
896          (info->height % 4 == 1)) {
897         /* [SNB] Errata from the Sandy Bridge PRM >> Volume 4 Part 1:
898          * Graphics Core >> Section 7.18.3.7: Surface Arrays:
899          *
900          *    [SNB] Errata: Sampler MSAA Qpitch will be 4 greater than
901          *    the value calculated in the equation above , for every
902          *    other odd Surface Height starting from 1 i.e. 1,5,9,13.
903          *
904          * XXX(chadv): Is the errata natural corollary of the physical
905          * layout of interleaved samples?
906          */
907         pitch_sa_rows += 4;
908      }
909
910      pitch_sa_rows = isl_align_npot(pitch_sa_rows, fmtl->bh);
911      } /* end case */
912      break;
913   }
914
915   assert(pitch_sa_rows % fmtl->bh == 0);
916   uint32_t pitch_el_rows = pitch_sa_rows / fmtl->bh;
917
918   if (ISL_DEV_GEN(dev) >= 9 && fmtl->txc == ISL_TXC_CCS) {
919      /*
920       * From the Sky Lake PRM Vol 7, "MCS Buffer for Render Target(s)" (p. 632):
921       *
922       *    "Mip-mapped and arrayed surfaces are supported with MCS buffer
923       *    layout with these alignments in the RT space: Horizontal
924       *    Alignment = 128 and Vertical Alignment = 64."
925       *
926       * From the Sky Lake PRM Vol. 2d, "RENDER_SURFACE_STATE" (p. 435):
927       *
928       *    "For non-multisampled render target's CCS auxiliary surface,
929       *    QPitch must be computed with Horizontal Alignment = 128 and
930       *    Surface Vertical Alignment = 256. These alignments are only for
931       *    CCS buffer and not for associated render target."
932       *
933       * The first restriction is already handled by isl_choose_image_alignment_el
934       * but the second restriction, which is an extension of the first, only
935       * applies to qpitch and must be applied here.
936       */
937      assert(fmtl->bh == 4);
938      pitch_el_rows = isl_align(pitch_el_rows, 256 / 4);
939   }
940
941   if (ISL_DEV_GEN(dev) >= 9 &&
942       info->dim == ISL_SURF_DIM_3D &&
943       tile_info->tiling != ISL_TILING_LINEAR) {
944      /* From the Skylake BSpec >> RENDER_SURFACE_STATE >> Surface QPitch:
945       *
946       *    Tile Mode != Linear: This field must be set to an integer multiple
947       *    of the tile height
948       */
949      pitch_el_rows = isl_align(pitch_el_rows, tile_info->logical_extent_el.height);
950   }
951
952   return pitch_el_rows;
953}
954
955/**
956 * A variant of isl_calc_phys_slice0_extent_sa() specific to
957 * ISL_DIM_LAYOUT_GEN4_2D.
958 */
959static void
960isl_calc_phys_slice0_extent_sa_gen4_2d(
961      const struct isl_device *dev,
962      const struct isl_surf_init_info *restrict info,
963      enum isl_msaa_layout msaa_layout,
964      const struct isl_extent3d *image_align_sa,
965      const struct isl_extent4d *phys_level0_sa,
966      struct isl_extent2d *phys_slice0_sa)
967{
968   assert(phys_level0_sa->depth == 1);
969
970   if (info->levels == 1) {
971      /* Do not pad the surface to the image alignment.
972       *
973       * For tiled surfaces, using a reduced alignment here avoids wasting CPU
974       * cycles on the below mipmap layout caluclations. Reducing the
975       * alignment here is safe because we later align the row pitch and array
976       * pitch to the tile boundary. It is safe even for
977       * ISL_MSAA_LAYOUT_INTERLEAVED, because phys_level0_sa is already scaled
978       * to accomodate the interleaved samples.
979       *
980       * For linear surfaces, reducing the alignment here permits us to later
981       * choose an arbitrary, non-aligned row pitch. If the surface backs
982       * a VkBuffer, then an arbitrary pitch may be needed to accomodate
983       * VkBufferImageCopy::bufferRowLength.
984       */
985      *phys_slice0_sa = (struct isl_extent2d) {
986         .w = phys_level0_sa->w,
987         .h = phys_level0_sa->h,
988      };
989      return;
990   }
991
992   uint32_t slice_top_w = 0;
993   uint32_t slice_bottom_w = 0;
994   uint32_t slice_left_h = 0;
995   uint32_t slice_right_h = 0;
996
997   uint32_t W0 = phys_level0_sa->w;
998   uint32_t H0 = phys_level0_sa->h;
999
1000   for (uint32_t l = 0; l < info->levels; ++l) {
1001      uint32_t W = isl_minify(W0, l);
1002      uint32_t H = isl_minify(H0, l);
1003
1004      uint32_t w = isl_align_npot(W, image_align_sa->w);
1005      uint32_t h = isl_align_npot(H, image_align_sa->h);
1006
1007      if (l == 0) {
1008         slice_top_w = w;
1009         slice_left_h = h;
1010         slice_right_h = h;
1011      } else if (l == 1) {
1012         slice_bottom_w = w;
1013         slice_left_h += h;
1014      } else if (l == 2) {
1015         slice_bottom_w += w;
1016         slice_right_h += h;
1017      } else {
1018         slice_right_h += h;
1019      }
1020   }
1021
1022   *phys_slice0_sa = (struct isl_extent2d) {
1023      .w = MAX(slice_top_w, slice_bottom_w),
1024      .h = MAX(slice_left_h, slice_right_h),
1025   };
1026}
1027
1028static void
1029isl_calc_phys_total_extent_el_gen4_2d(
1030      const struct isl_device *dev,
1031      const struct isl_surf_init_info *restrict info,
1032      const struct isl_tile_info *tile_info,
1033      enum isl_msaa_layout msaa_layout,
1034      const struct isl_extent3d *image_align_sa,
1035      const struct isl_extent4d *phys_level0_sa,
1036      enum isl_array_pitch_span array_pitch_span,
1037      uint32_t *array_pitch_el_rows,
1038      struct isl_extent2d *total_extent_el)
1039{
1040   const struct isl_format_layout *fmtl = isl_format_get_layout(info->format);
1041
1042   struct isl_extent2d phys_slice0_sa;
1043   isl_calc_phys_slice0_extent_sa_gen4_2d(dev, info, msaa_layout,
1044                                          image_align_sa, phys_level0_sa,
1045                                          &phys_slice0_sa);
1046   *array_pitch_el_rows =
1047      isl_calc_array_pitch_el_rows_gen4_2d(dev, info, tile_info,
1048                                           image_align_sa, phys_level0_sa,
1049                                           array_pitch_span,
1050                                           &phys_slice0_sa);
1051   *total_extent_el = (struct isl_extent2d) {
1052      .w = isl_align_div_npot(phys_slice0_sa.w, fmtl->bw),
1053      .h = *array_pitch_el_rows * (phys_level0_sa->array_len - 1) +
1054           isl_align_div_npot(phys_slice0_sa.h, fmtl->bh),
1055   };
1056}
1057
1058/**
1059 * A variant of isl_calc_phys_slice0_extent_sa() specific to
1060 * ISL_DIM_LAYOUT_GEN4_3D.
1061 */
1062static void
1063isl_calc_phys_total_extent_el_gen4_3d(
1064      const struct isl_device *dev,
1065      const struct isl_surf_init_info *restrict info,
1066      const struct isl_extent3d *image_align_sa,
1067      const struct isl_extent4d *phys_level0_sa,
1068      uint32_t *array_pitch_el_rows,
1069      struct isl_extent2d *phys_total_el)
1070{
1071   const struct isl_format_layout *fmtl = isl_format_get_layout(info->format);
1072
1073   assert(info->samples == 1);
1074
1075   if (info->dim != ISL_SURF_DIM_3D) {
1076      /* From the G45 PRM Vol. 1a, "6.17.4.1 Hardware Cube Map Layout":
1077       *
1078       * The cube face textures are stored in the same way as 3D surfaces
1079       * are stored (see section 6.17.5 for details).  For cube surfaces,
1080       * however, the depth is equal to the number of faces (always 6) and
1081       * is not reduced for each MIP.
1082       */
1083      assert(ISL_DEV_GEN(dev) == 4);
1084      assert(info->usage & ISL_SURF_USAGE_CUBE_BIT);
1085      assert(phys_level0_sa->array_len == 6);
1086   } else {
1087      assert(phys_level0_sa->array_len == 1);
1088   }
1089
1090   uint32_t total_w = 0;
1091   uint32_t total_h = 0;
1092
1093   uint32_t W0 = phys_level0_sa->w;
1094   uint32_t H0 = phys_level0_sa->h;
1095   uint32_t D0 = phys_level0_sa->d;
1096   uint32_t A0 = phys_level0_sa->a;
1097
1098   for (uint32_t l = 0; l < info->levels; ++l) {
1099      uint32_t level_w = isl_align_npot(isl_minify(W0, l), image_align_sa->w);
1100      uint32_t level_h = isl_align_npot(isl_minify(H0, l), image_align_sa->h);
1101      uint32_t level_d = info->dim == ISL_SURF_DIM_3D ? isl_minify(D0, l) : A0;
1102
1103      uint32_t max_layers_horiz = MIN(level_d, 1u << l);
1104      uint32_t max_layers_vert = isl_align(level_d, 1u << l) / (1u << l);
1105
1106      total_w = MAX(total_w, level_w * max_layers_horiz);
1107      total_h += level_h * max_layers_vert;
1108   }
1109
1110   /* GEN4_3D layouts don't really have an array pitch since each LOD has a
1111    * different number of horizontal and vertical layers.  We have to set it
1112    * to something, so at least make it true for LOD0.
1113    */
1114   *array_pitch_el_rows =
1115      isl_align_npot(phys_level0_sa->h, image_align_sa->h) / fmtl->bw;
1116   *phys_total_el = (struct isl_extent2d) {
1117      .w = isl_assert_div(total_w, fmtl->bw),
1118      .h = isl_assert_div(total_h, fmtl->bh),
1119   };
1120}
1121
1122/**
1123 * A variant of isl_calc_phys_slice0_extent_sa() specific to
1124 * ISL_DIM_LAYOUT_GEN6_STENCIL_HIZ.
1125 */
1126static void
1127isl_calc_phys_total_extent_el_gen6_stencil_hiz(
1128      const struct isl_device *dev,
1129      const struct isl_surf_init_info *restrict info,
1130      const struct isl_tile_info *tile_info,
1131      const struct isl_extent3d *image_align_sa,
1132      const struct isl_extent4d *phys_level0_sa,
1133      uint32_t *array_pitch_el_rows,
1134      struct isl_extent2d *phys_total_el)
1135{
1136   const struct isl_format_layout *fmtl = isl_format_get_layout(info->format);
1137
1138   const struct isl_extent2d tile_extent_sa = {
1139      .w = tile_info->logical_extent_el.w * fmtl->bw,
1140      .h = tile_info->logical_extent_el.h * fmtl->bh,
1141   };
1142   /* Tile size is a multiple of image alignment */
1143   assert(tile_extent_sa.w % image_align_sa->w == 0);
1144   assert(tile_extent_sa.h % image_align_sa->h == 0);
1145
1146   const uint32_t W0 = phys_level0_sa->w;
1147   const uint32_t H0 = phys_level0_sa->h;
1148
1149   /* Each image has the same height as LOD0 because the hardware thinks
1150    * everything is LOD0
1151    */
1152   const uint32_t H = isl_align(H0, image_align_sa->h) * phys_level0_sa->a;
1153
1154   uint32_t total_top_w = 0;
1155   uint32_t total_bottom_w = 0;
1156   uint32_t total_h = 0;
1157
1158   for (uint32_t l = 0; l < info->levels; ++l) {
1159      const uint32_t W = isl_minify(W0, l);
1160
1161      const uint32_t w = isl_align(W, tile_extent_sa.w);
1162      const uint32_t h = isl_align(H, tile_extent_sa.h);
1163
1164      if (l == 0) {
1165         total_top_w = w;
1166         total_h = h;
1167      } else if (l == 1) {
1168         total_bottom_w = w;
1169         total_h += h;
1170      } else {
1171         total_bottom_w += w;
1172      }
1173   }
1174
1175   *array_pitch_el_rows =
1176      isl_assert_div(isl_align(H0, image_align_sa->h), fmtl->bh);
1177   *phys_total_el = (struct isl_extent2d) {
1178      .w = isl_assert_div(MAX(total_top_w, total_bottom_w), fmtl->bw),
1179      .h = isl_assert_div(total_h, fmtl->bh),
1180   };
1181}
1182
1183/**
1184 * A variant of isl_calc_phys_slice0_extent_sa() specific to
1185 * ISL_DIM_LAYOUT_GEN9_1D.
1186 */
1187static void
1188isl_calc_phys_total_extent_el_gen9_1d(
1189      const struct isl_device *dev,
1190      const struct isl_surf_init_info *restrict info,
1191      const struct isl_extent3d *image_align_sa,
1192      const struct isl_extent4d *phys_level0_sa,
1193      uint32_t *array_pitch_el_rows,
1194      struct isl_extent2d *phys_total_el)
1195{
1196   MAYBE_UNUSED const struct isl_format_layout *fmtl = isl_format_get_layout(info->format);
1197
1198   assert(phys_level0_sa->height == 1);
1199   assert(phys_level0_sa->depth == 1);
1200   assert(info->samples == 1);
1201   assert(image_align_sa->w >= fmtl->bw);
1202
1203   uint32_t slice_w = 0;
1204   const uint32_t W0 = phys_level0_sa->w;
1205
1206   for (uint32_t l = 0; l < info->levels; ++l) {
1207      uint32_t W = isl_minify(W0, l);
1208      uint32_t w = isl_align_npot(W, image_align_sa->w);
1209
1210      slice_w += w;
1211   }
1212
1213   *array_pitch_el_rows = 1;
1214   *phys_total_el = (struct isl_extent2d) {
1215      .w = isl_assert_div(slice_w, fmtl->bw),
1216      .h = phys_level0_sa->array_len,
1217   };
1218}
1219
1220/**
1221 * Calculate the two-dimensional total physical extent of the surface, in
1222 * units of surface elements.
1223 */
1224static void
1225isl_calc_phys_total_extent_el(const struct isl_device *dev,
1226                              const struct isl_surf_init_info *restrict info,
1227                              const struct isl_tile_info *tile_info,
1228                              enum isl_dim_layout dim_layout,
1229                              enum isl_msaa_layout msaa_layout,
1230                              const struct isl_extent3d *image_align_sa,
1231                              const struct isl_extent4d *phys_level0_sa,
1232                              enum isl_array_pitch_span array_pitch_span,
1233                              uint32_t *array_pitch_el_rows,
1234                              struct isl_extent2d *total_extent_el)
1235{
1236   switch (dim_layout) {
1237   case ISL_DIM_LAYOUT_GEN9_1D:
1238      assert(array_pitch_span == ISL_ARRAY_PITCH_SPAN_COMPACT);
1239      isl_calc_phys_total_extent_el_gen9_1d(dev, info,
1240                                            image_align_sa, phys_level0_sa,
1241                                            array_pitch_el_rows,
1242                                            total_extent_el);
1243      return;
1244   case ISL_DIM_LAYOUT_GEN4_2D:
1245      isl_calc_phys_total_extent_el_gen4_2d(dev, info, tile_info, msaa_layout,
1246                                            image_align_sa, phys_level0_sa,
1247                                            array_pitch_span,
1248                                            array_pitch_el_rows,
1249                                            total_extent_el);
1250      return;
1251   case ISL_DIM_LAYOUT_GEN6_STENCIL_HIZ:
1252      assert(array_pitch_span == ISL_ARRAY_PITCH_SPAN_COMPACT);
1253      isl_calc_phys_total_extent_el_gen6_stencil_hiz(dev, info, tile_info,
1254                                                     image_align_sa,
1255                                                     phys_level0_sa,
1256                                                     array_pitch_el_rows,
1257                                                     total_extent_el);
1258      return;
1259   case ISL_DIM_LAYOUT_GEN4_3D:
1260      assert(array_pitch_span == ISL_ARRAY_PITCH_SPAN_COMPACT);
1261      isl_calc_phys_total_extent_el_gen4_3d(dev, info,
1262                                            image_align_sa, phys_level0_sa,
1263                                            array_pitch_el_rows,
1264                                            total_extent_el);
1265      return;
1266   }
1267}
1268
1269static uint32_t
1270isl_calc_row_pitch_alignment(const struct isl_surf_init_info *surf_info,
1271                             const struct isl_tile_info *tile_info)
1272{
1273   if (tile_info->tiling != ISL_TILING_LINEAR)
1274      return tile_info->phys_extent_B.width;
1275
1276   /* From the Broadwel PRM >> Volume 2d: Command Reference: Structures >>
1277    * RENDER_SURFACE_STATE Surface Pitch (p349):
1278    *
1279    *    - For linear render target surfaces and surfaces accessed with the
1280    *      typed data port messages, the pitch must be a multiple of the
1281    *      element size for non-YUV surface formats.  Pitch must be
1282    *      a multiple of 2 * element size for YUV surface formats.
1283    *
1284    *    - [Requirements for SURFTYPE_BUFFER and SURFTYPE_STRBUF, which we
1285    *      ignore because isl doesn't do buffers.]
1286    *
1287    *    - For other linear surfaces, the pitch can be any multiple of
1288    *      bytes.
1289    */
1290   const struct isl_format_layout *fmtl = isl_format_get_layout(surf_info->format);
1291   const uint32_t bs = fmtl->bpb / 8;
1292
1293   if (surf_info->usage & ISL_SURF_USAGE_RENDER_TARGET_BIT) {
1294      if (isl_format_is_yuv(surf_info->format)) {
1295         return 2 * bs;
1296      } else  {
1297         return bs;
1298      }
1299   }
1300
1301   return 1;
1302}
1303
1304static uint32_t
1305isl_calc_linear_min_row_pitch(const struct isl_device *dev,
1306                              const struct isl_surf_init_info *info,
1307                              const struct isl_extent2d *phys_total_el,
1308                              uint32_t alignment_B)
1309{
1310   const struct isl_format_layout *fmtl = isl_format_get_layout(info->format);
1311   const uint32_t bs = fmtl->bpb / 8;
1312
1313   return isl_align_npot(bs * phys_total_el->w, alignment_B);
1314}
1315
1316static uint32_t
1317isl_calc_tiled_min_row_pitch(const struct isl_device *dev,
1318                             const struct isl_surf_init_info *surf_info,
1319                             const struct isl_tile_info *tile_info,
1320                             const struct isl_extent2d *phys_total_el,
1321                             uint32_t alignment_B)
1322{
1323   const struct isl_format_layout *fmtl = isl_format_get_layout(surf_info->format);
1324
1325   assert(fmtl->bpb % tile_info->format_bpb == 0);
1326
1327   const uint32_t tile_el_scale = fmtl->bpb / tile_info->format_bpb;
1328   const uint32_t total_w_tl =
1329      isl_align_div(phys_total_el->w * tile_el_scale,
1330                    tile_info->logical_extent_el.width);
1331
1332   assert(alignment_B == tile_info->phys_extent_B.width);
1333   return total_w_tl * tile_info->phys_extent_B.width;
1334}
1335
1336static uint32_t
1337isl_calc_min_row_pitch(const struct isl_device *dev,
1338                       const struct isl_surf_init_info *surf_info,
1339                       const struct isl_tile_info *tile_info,
1340                       const struct isl_extent2d *phys_total_el,
1341                       uint32_t alignment_B)
1342{
1343   if (tile_info->tiling == ISL_TILING_LINEAR) {
1344      return isl_calc_linear_min_row_pitch(dev, surf_info, phys_total_el,
1345                                           alignment_B);
1346   } else {
1347      return isl_calc_tiled_min_row_pitch(dev, surf_info, tile_info,
1348                                          phys_total_el, alignment_B);
1349   }
1350}
1351
1352/**
1353 * Is `pitch` in the valid range for a hardware bitfield, if the bitfield's
1354 * size is `bits` bits?
1355 *
1356 * Hardware pitch fields are offset by 1. For example, if the size of
1357 * RENDER_SURFACE_STATE::SurfacePitch is B bits, then the range of valid
1358 * pitches is [1, 2^b] inclusive.  If the surface pitch is N, then
1359 * RENDER_SURFACE_STATE::SurfacePitch must be set to N-1.
1360 */
1361static bool
1362pitch_in_range(uint32_t n, uint32_t bits)
1363{
1364   assert(n != 0);
1365   return likely(bits != 0 && 1 <= n && n <= (1 << bits));
1366}
1367
1368static bool
1369isl_calc_row_pitch(const struct isl_device *dev,
1370                   const struct isl_surf_init_info *surf_info,
1371                   const struct isl_tile_info *tile_info,
1372                   enum isl_dim_layout dim_layout,
1373                   const struct isl_extent2d *phys_total_el,
1374                   uint32_t *out_row_pitch_B)
1375{
1376   uint32_t alignment_B =
1377      isl_calc_row_pitch_alignment(surf_info, tile_info);
1378
1379   const uint32_t min_row_pitch_B =
1380      isl_calc_min_row_pitch(dev, surf_info, tile_info, phys_total_el,
1381                             alignment_B);
1382
1383   uint32_t row_pitch_B = min_row_pitch_B;
1384
1385   if (surf_info->row_pitch_B != 0) {
1386      row_pitch_B = surf_info->row_pitch_B;
1387
1388      if (row_pitch_B < min_row_pitch_B)
1389         return false;
1390
1391      if (row_pitch_B % alignment_B != 0)
1392         return false;
1393   }
1394
1395   const uint32_t row_pitch_tl = row_pitch_B / tile_info->phys_extent_B.width;
1396
1397   if (row_pitch_B == 0)
1398      return false;
1399
1400   if (dim_layout == ISL_DIM_LAYOUT_GEN9_1D) {
1401      /* SurfacePitch is ignored for this layout. */
1402      goto done;
1403   }
1404
1405   if ((surf_info->usage & (ISL_SURF_USAGE_RENDER_TARGET_BIT |
1406                            ISL_SURF_USAGE_TEXTURE_BIT |
1407                            ISL_SURF_USAGE_STORAGE_BIT)) &&
1408       !pitch_in_range(row_pitch_B, RENDER_SURFACE_STATE_SurfacePitch_bits(dev->info)))
1409      return false;
1410
1411   if ((surf_info->usage & (ISL_SURF_USAGE_CCS_BIT |
1412                            ISL_SURF_USAGE_MCS_BIT)) &&
1413       !pitch_in_range(row_pitch_tl, RENDER_SURFACE_STATE_AuxiliarySurfacePitch_bits(dev->info)))
1414      return false;
1415
1416   if ((surf_info->usage & ISL_SURF_USAGE_DEPTH_BIT) &&
1417       !pitch_in_range(row_pitch_B, _3DSTATE_DEPTH_BUFFER_SurfacePitch_bits(dev->info)))
1418      return false;
1419
1420   if ((surf_info->usage & ISL_SURF_USAGE_HIZ_BIT) &&
1421       !pitch_in_range(row_pitch_B, _3DSTATE_HIER_DEPTH_BUFFER_SurfacePitch_bits(dev->info)))
1422      return false;
1423
1424   const uint32_t stencil_pitch_bits = dev->use_separate_stencil ?
1425      _3DSTATE_STENCIL_BUFFER_SurfacePitch_bits(dev->info) :
1426      _3DSTATE_DEPTH_BUFFER_SurfacePitch_bits(dev->info);
1427
1428   if ((surf_info->usage & ISL_SURF_USAGE_STENCIL_BIT) &&
1429       !pitch_in_range(row_pitch_B, stencil_pitch_bits))
1430      return false;
1431
1432 done:
1433   *out_row_pitch_B = row_pitch_B;
1434   return true;
1435}
1436
1437bool
1438isl_surf_init_s(const struct isl_device *dev,
1439                struct isl_surf *surf,
1440                const struct isl_surf_init_info *restrict info)
1441{
1442   const struct isl_format_layout *fmtl = isl_format_get_layout(info->format);
1443
1444   const struct isl_extent4d logical_level0_px = {
1445      .w = info->width,
1446      .h = info->height,
1447      .d = info->depth,
1448      .a = info->array_len,
1449   };
1450
1451   enum isl_tiling tiling;
1452   if (!isl_surf_choose_tiling(dev, info, &tiling))
1453      return false;
1454
1455   struct isl_tile_info tile_info;
1456   isl_tiling_get_info(tiling, fmtl->bpb, &tile_info);
1457
1458   const enum isl_dim_layout dim_layout =
1459      isl_surf_choose_dim_layout(dev, info->dim, tiling, info->usage);
1460
1461   enum isl_msaa_layout msaa_layout;
1462   if (!isl_choose_msaa_layout(dev, info, tiling, &msaa_layout))
1463       return false;
1464
1465   struct isl_extent3d image_align_el;
1466   isl_choose_image_alignment_el(dev, info, tiling, dim_layout, msaa_layout,
1467                                 &image_align_el);
1468
1469   struct isl_extent3d image_align_sa =
1470      isl_extent3d_el_to_sa(info->format, image_align_el);
1471
1472   struct isl_extent4d phys_level0_sa;
1473   isl_calc_phys_level0_extent_sa(dev, info, dim_layout, tiling, msaa_layout,
1474                                  &phys_level0_sa);
1475
1476   enum isl_array_pitch_span array_pitch_span =
1477      isl_choose_array_pitch_span(dev, info, dim_layout, &phys_level0_sa);
1478
1479   uint32_t array_pitch_el_rows;
1480   struct isl_extent2d phys_total_el;
1481   isl_calc_phys_total_extent_el(dev, info, &tile_info,
1482                                 dim_layout, msaa_layout,
1483                                 &image_align_sa, &phys_level0_sa,
1484                                 array_pitch_span, &array_pitch_el_rows,
1485                                 &phys_total_el);
1486
1487   uint32_t row_pitch_B;
1488   if (!isl_calc_row_pitch(dev, info, &tile_info, dim_layout,
1489                           &phys_total_el, &row_pitch_B))
1490      return false;
1491
1492   uint32_t base_alignment_B;
1493   uint64_t size_B;
1494   if (tiling == ISL_TILING_LINEAR) {
1495      size_B = (uint64_t) row_pitch_B * phys_total_el.h;
1496
1497      /* From the Broadwell PRM Vol 2d, RENDER_SURFACE_STATE::SurfaceBaseAddress:
1498       *
1499       *    "The Base Address for linear render target surfaces and surfaces
1500       *    accessed with the typed surface read/write data port messages must
1501       *    be element-size aligned, for non-YUV surface formats, or a
1502       *    multiple of 2 element-sizes for YUV surface formats. Other linear
1503       *    surfaces have no alignment requirements (byte alignment is
1504       *    sufficient.)"
1505       */
1506      base_alignment_B = MAX(1, info->min_alignment_B);
1507      if (info->usage & ISL_SURF_USAGE_RENDER_TARGET_BIT) {
1508         if (isl_format_is_yuv(info->format)) {
1509            base_alignment_B = MAX(base_alignment_B, fmtl->bpb / 4);
1510         } else {
1511            base_alignment_B = MAX(base_alignment_B, fmtl->bpb / 8);
1512         }
1513      }
1514      base_alignment_B = isl_round_up_to_power_of_two(base_alignment_B);
1515
1516      /* From the Skylake PRM Vol 2c, PLANE_STRIDE::Stride:
1517       *
1518       *     "For Linear memory, this field specifies the stride in chunks of
1519       *     64 bytes (1 cache line)."
1520       */
1521      if (isl_surf_usage_is_display(info->usage))
1522         base_alignment_B = MAX(base_alignment_B, 64);
1523   } else {
1524      const uint32_t total_h_tl =
1525         isl_align_div(phys_total_el.h, tile_info.logical_extent_el.height);
1526
1527      size_B = (uint64_t) total_h_tl * tile_info.phys_extent_B.height * row_pitch_B;
1528
1529      const uint32_t tile_size_B = tile_info.phys_extent_B.width *
1530                                   tile_info.phys_extent_B.height;
1531      assert(isl_is_pow2(info->min_alignment_B) && isl_is_pow2(tile_size_B));
1532      base_alignment_B = MAX(info->min_alignment_B, tile_size_B);
1533   }
1534
1535   if (ISL_DEV_GEN(dev) < 9) {
1536      /* From the Broadwell PRM Vol 5, Surface Layout:
1537       *
1538       *    "In addition to restrictions on maximum height, width, and depth,
1539       *     surfaces are also restricted to a maximum size in bytes. This
1540       *     maximum is 2 GB for all products and all surface types."
1541       *
1542       * This comment is applicable to all Pre-gen9 platforms.
1543       */
1544      if (size_B > (uint64_t) 1 << 31)
1545         return false;
1546   } else if (ISL_DEV_GEN(dev) < 11) {
1547      /* From the Skylake PRM Vol 5, Maximum Surface Size in Bytes:
1548       *    "In addition to restrictions on maximum height, width, and depth,
1549       *     surfaces are also restricted to a maximum size of 2^38 bytes.
1550       *     All pixels within the surface must be contained within 2^38 bytes
1551       *     of the base address."
1552       */
1553      if (size_B > (uint64_t) 1 << 38)
1554         return false;
1555   } else {
1556      /* gen11+ platforms raised this limit to 2^44 bytes. */
1557      if (size_B > (uint64_t) 1 << 44)
1558         return false;
1559   }
1560
1561   *surf = (struct isl_surf) {
1562      .dim = info->dim,
1563      .dim_layout = dim_layout,
1564      .msaa_layout = msaa_layout,
1565      .tiling = tiling,
1566      .format = info->format,
1567
1568      .levels = info->levels,
1569      .samples = info->samples,
1570
1571      .image_alignment_el = image_align_el,
1572      .logical_level0_px = logical_level0_px,
1573      .phys_level0_sa = phys_level0_sa,
1574
1575      .size_B = size_B,
1576      .alignment_B = base_alignment_B,
1577      .row_pitch_B = row_pitch_B,
1578      .array_pitch_el_rows = array_pitch_el_rows,
1579      .array_pitch_span = array_pitch_span,
1580
1581      .usage = info->usage,
1582   };
1583
1584   return true;
1585}
1586
1587void
1588isl_surf_get_tile_info(const struct isl_surf *surf,
1589                       struct isl_tile_info *tile_info)
1590{
1591   const struct isl_format_layout *fmtl = isl_format_get_layout(surf->format);
1592   isl_tiling_get_info(surf->tiling, fmtl->bpb, tile_info);
1593}
1594
1595bool
1596isl_surf_get_hiz_surf(const struct isl_device *dev,
1597                      const struct isl_surf *surf,
1598                      struct isl_surf *hiz_surf)
1599{
1600   assert(ISL_DEV_GEN(dev) >= 5 && ISL_DEV_USE_SEPARATE_STENCIL(dev));
1601
1602   /* Multisampled depth is always interleaved */
1603   assert(surf->msaa_layout == ISL_MSAA_LAYOUT_NONE ||
1604          surf->msaa_layout == ISL_MSAA_LAYOUT_INTERLEAVED);
1605
1606   /* From the Broadwell PRM Vol. 7, "Hierarchical Depth Buffer":
1607    *
1608    *    "The Surface Type, Height, Width, Depth, Minimum Array Element, Render
1609    *    Target View Extent, and Depth Coordinate Offset X/Y of the
1610    *    hierarchical depth buffer are inherited from the depth buffer. The
1611    *    height and width of the hierarchical depth buffer that must be
1612    *    allocated are computed by the following formulas, where HZ is the
1613    *    hierarchical depth buffer and Z is the depth buffer. The Z_Height,
1614    *    Z_Width, and Z_Depth values given in these formulas are those present
1615    *    in 3DSTATE_DEPTH_BUFFER incremented by one.
1616    *
1617    *    "The value of Z_Height and Z_Width must each be multiplied by 2 before
1618    *    being applied to the table below if Number of Multisamples is set to
1619    *    NUMSAMPLES_4. The value of Z_Height must be multiplied by 2 and
1620    *    Z_Width must be multiplied by 4 before being applied to the table
1621    *    below if Number of Multisamples is set to NUMSAMPLES_8."
1622    *
1623    * In the Sky Lake PRM, the second paragraph is replaced with this:
1624    *
1625    *    "The Z_Height and Z_Width values must equal those present in
1626    *    3DSTATE_DEPTH_BUFFER incremented by one."
1627    *
1628    * In other words, on Sandy Bridge through Broadwell, each 128-bit HiZ
1629    * block corresponds to a region of 8x4 samples in the primary depth
1630    * surface.  On Sky Lake, on the other hand, each HiZ block corresponds to
1631    * a region of 8x4 pixels in the primary depth surface regardless of the
1632    * number of samples.  The dimensions of a HiZ block in both pixels and
1633    * samples are given in the table below:
1634    *
1635    *                    | SNB - BDW |     SKL+
1636    *              ------+-----------+-------------
1637    *                1x  |  8 x 4 sa |   8 x 4 sa
1638    *               MSAA |  8 x 4 px |   8 x 4 px
1639    *              ------+-----------+-------------
1640    *                2x  |  8 x 4 sa |  16 x 4 sa
1641    *               MSAA |  4 x 4 px |   8 x 4 px
1642    *              ------+-----------+-------------
1643    *                4x  |  8 x 4 sa |  16 x 8 sa
1644    *               MSAA |  4 x 2 px |   8 x 4 px
1645    *              ------+-----------+-------------
1646    *                8x  |  8 x 4 sa |  32 x 8 sa
1647    *               MSAA |  2 x 2 px |   8 x 4 px
1648    *              ------+-----------+-------------
1649    *               16x  |    N/A    | 32 x 16 sa
1650    *               MSAA |    N/A    |  8 x  4 px
1651    *              ------+-----------+-------------
1652    *
1653    * There are a number of different ways that this discrepency could be
1654    * handled.  The way we have chosen is to simply make MSAA HiZ have the
1655    * same number of samples as the parent surface pre-Sky Lake and always be
1656    * single-sampled on Sky Lake and above.  Since the block sizes of
1657    * compressed formats are given in samples, this neatly handles everything
1658    * without the need for additional HiZ formats with different block sizes
1659    * on SKL+.
1660    */
1661   const unsigned samples = ISL_DEV_GEN(dev) >= 9 ? 1 : surf->samples;
1662
1663   return isl_surf_init(dev, hiz_surf,
1664                        .dim = surf->dim,
1665                        .format = ISL_FORMAT_HIZ,
1666                        .width = surf->logical_level0_px.width,
1667                        .height = surf->logical_level0_px.height,
1668                        .depth = surf->logical_level0_px.depth,
1669                        .levels = surf->levels,
1670                        .array_len = surf->logical_level0_px.array_len,
1671                        .samples = samples,
1672                        .usage = ISL_SURF_USAGE_HIZ_BIT,
1673                        .tiling_flags = ISL_TILING_HIZ_BIT);
1674}
1675
1676bool
1677isl_surf_get_mcs_surf(const struct isl_device *dev,
1678                      const struct isl_surf *surf,
1679                      struct isl_surf *mcs_surf)
1680{
1681   assert(ISL_DEV_GEN(dev) >= 7);
1682
1683   /* It must be multisampled with an array layout */
1684   assert(surf->samples > 1 && surf->msaa_layout == ISL_MSAA_LAYOUT_ARRAY);
1685
1686   /* The following are true of all multisampled surfaces */
1687   assert(surf->dim == ISL_SURF_DIM_2D);
1688   assert(surf->levels == 1);
1689   assert(surf->logical_level0_px.depth == 1);
1690
1691   /* The "Auxiliary Surface Pitch" field in RENDER_SURFACE_STATE is only 9
1692    * bits which means the maximum pitch of a compression surface is 512
1693    * tiles or 64KB (since MCS is always Y-tiled).  Since a 16x MCS buffer is
1694    * 64bpp, this gives us a maximum width of 8192 pixels.  We can create
1695    * larger multisampled surfaces, we just can't compress them.   For 2x, 4x,
1696    * and 8x, we have enough room for the full 16k supported by the hardware.
1697    */
1698   if (surf->samples == 16 && surf->logical_level0_px.width > 8192)
1699      return false;
1700
1701   enum isl_format mcs_format;
1702   switch (surf->samples) {
1703   case 2:  mcs_format = ISL_FORMAT_MCS_2X;  break;
1704   case 4:  mcs_format = ISL_FORMAT_MCS_4X;  break;
1705   case 8:  mcs_format = ISL_FORMAT_MCS_8X;  break;
1706   case 16: mcs_format = ISL_FORMAT_MCS_16X; break;
1707   default:
1708      unreachable("Invalid sample count");
1709   }
1710
1711   return isl_surf_init(dev, mcs_surf,
1712                        .dim = ISL_SURF_DIM_2D,
1713                        .format = mcs_format,
1714                        .width = surf->logical_level0_px.width,
1715                        .height = surf->logical_level0_px.height,
1716                        .depth = 1,
1717                        .levels = 1,
1718                        .array_len = surf->logical_level0_px.array_len,
1719                        .samples = 1, /* MCS surfaces are really single-sampled */
1720                        .usage = ISL_SURF_USAGE_MCS_BIT,
1721                        .tiling_flags = ISL_TILING_Y0_BIT);
1722}
1723
1724bool
1725isl_surf_get_ccs_surf(const struct isl_device *dev,
1726                      const struct isl_surf *surf,
1727                      struct isl_surf *ccs_surf,
1728                      uint32_t row_pitch_B)
1729{
1730   assert(surf->samples == 1 && surf->msaa_layout == ISL_MSAA_LAYOUT_NONE);
1731   assert(ISL_DEV_GEN(dev) >= 7);
1732
1733   if (surf->usage & ISL_SURF_USAGE_DISABLE_AUX_BIT)
1734      return false;
1735
1736   /* The PRM doesn't say this explicitly, but fast-clears don't appear to
1737    * work for 3D textures until gen9 where the layout of 3D textures changes
1738    * to match 2D array textures.
1739    */
1740   if (ISL_DEV_GEN(dev) <= 8 && surf->dim != ISL_SURF_DIM_2D)
1741      return false;
1742
1743   /* From the HSW PRM Volume 7: 3D-Media-GPGPU, page 652 (Color Clear of
1744    * Non-MultiSampler Render Target Restrictions):
1745    *
1746    *    "Support is for non-mip-mapped and non-array surface types only."
1747    *
1748    * This restriction is lifted on gen8+.  Technically, it may be possible to
1749    * create a CCS for an arrayed or mipmapped image and only enable CCS_D
1750    * when rendering to the base slice.  However, there is no documentation
1751    * tell us what the hardware would do in that case or what it does if you
1752    * walk off the bases slice.  (Does it ignore CCS or does it start
1753    * scribbling over random memory?)  We play it safe and just follow the
1754    * docs and don't allow CCS_D for arrayed or mip-mapped surfaces.
1755    */
1756   if (ISL_DEV_GEN(dev) <= 7 &&
1757       (surf->levels > 1 || surf->logical_level0_px.array_len > 1))
1758      return false;
1759
1760   if (isl_format_is_compressed(surf->format))
1761      return false;
1762
1763   /* TODO: More conditions where it can fail. */
1764
1765   enum isl_format ccs_format;
1766   if (ISL_DEV_GEN(dev) >= 9) {
1767      if (!isl_tiling_is_any_y(surf->tiling))
1768         return false;
1769
1770      switch (isl_format_get_layout(surf->format)->bpb) {
1771      case 32:    ccs_format = ISL_FORMAT_GEN9_CCS_32BPP;   break;
1772      case 64:    ccs_format = ISL_FORMAT_GEN9_CCS_64BPP;   break;
1773      case 128:   ccs_format = ISL_FORMAT_GEN9_CCS_128BPP;  break;
1774      default:
1775         return false;
1776      }
1777   } else if (surf->tiling == ISL_TILING_Y0) {
1778      switch (isl_format_get_layout(surf->format)->bpb) {
1779      case 32:    ccs_format = ISL_FORMAT_GEN7_CCS_32BPP_Y;    break;
1780      case 64:    ccs_format = ISL_FORMAT_GEN7_CCS_64BPP_Y;    break;
1781      case 128:   ccs_format = ISL_FORMAT_GEN7_CCS_128BPP_Y;   break;
1782      default:
1783         return false;
1784      }
1785   } else if (surf->tiling == ISL_TILING_X) {
1786      switch (isl_format_get_layout(surf->format)->bpb) {
1787      case 32:    ccs_format = ISL_FORMAT_GEN7_CCS_32BPP_X;    break;
1788      case 64:    ccs_format = ISL_FORMAT_GEN7_CCS_64BPP_X;    break;
1789      case 128:   ccs_format = ISL_FORMAT_GEN7_CCS_128BPP_X;   break;
1790      default:
1791         return false;
1792      }
1793   } else {
1794      return false;
1795   }
1796
1797   return isl_surf_init(dev, ccs_surf,
1798                        .dim = surf->dim,
1799                        .format = ccs_format,
1800                        .width = surf->logical_level0_px.width,
1801                        .height = surf->logical_level0_px.height,
1802                        .depth = surf->logical_level0_px.depth,
1803                        .levels = surf->levels,
1804                        .array_len = surf->logical_level0_px.array_len,
1805                        .samples = 1,
1806                        .row_pitch_B = row_pitch_B,
1807                        .usage = ISL_SURF_USAGE_CCS_BIT,
1808                        .tiling_flags = ISL_TILING_CCS_BIT);
1809}
1810
1811#define isl_genX_call(dev, func, ...)              \
1812   switch (ISL_DEV_GEN(dev)) {                     \
1813   case 4:                                         \
1814      /* G45 surface state is the same as gen5 */  \
1815      if (ISL_DEV_IS_G4X(dev)) {                   \
1816         isl_gen5_##func(__VA_ARGS__);             \
1817      } else {                                     \
1818         isl_gen4_##func(__VA_ARGS__);             \
1819      }                                            \
1820      break;                                       \
1821   case 5:                                         \
1822      isl_gen5_##func(__VA_ARGS__);                \
1823      break;                                       \
1824   case 6:                                         \
1825      isl_gen6_##func(__VA_ARGS__);                \
1826      break;                                       \
1827   case 7:                                         \
1828      if (ISL_DEV_IS_HASWELL(dev)) {               \
1829         isl_gen75_##func(__VA_ARGS__);            \
1830      } else {                                     \
1831         isl_gen7_##func(__VA_ARGS__);             \
1832      }                                            \
1833      break;                                       \
1834   case 8:                                         \
1835      isl_gen8_##func(__VA_ARGS__);                \
1836      break;                                       \
1837   case 9:                                         \
1838      isl_gen9_##func(__VA_ARGS__);                \
1839      break;                                       \
1840   case 10:                                        \
1841      isl_gen10_##func(__VA_ARGS__);               \
1842      break;                                       \
1843   case 11:                                        \
1844      isl_gen11_##func(__VA_ARGS__);               \
1845      break;                                       \
1846   default:                                        \
1847      assert(!"Unknown hardware generation");      \
1848   }
1849
1850void
1851isl_surf_fill_state_s(const struct isl_device *dev, void *state,
1852                      const struct isl_surf_fill_state_info *restrict info)
1853{
1854#ifndef NDEBUG
1855   isl_surf_usage_flags_t _base_usage =
1856      info->view->usage & (ISL_SURF_USAGE_RENDER_TARGET_BIT |
1857                           ISL_SURF_USAGE_TEXTURE_BIT |
1858                           ISL_SURF_USAGE_STORAGE_BIT);
1859   /* They may only specify one of the above bits at a time */
1860   assert(__builtin_popcount(_base_usage) == 1);
1861   /* The only other allowed bit is ISL_SURF_USAGE_CUBE_BIT */
1862   assert((info->view->usage & ~ISL_SURF_USAGE_CUBE_BIT) == _base_usage);
1863#endif
1864
1865   if (info->surf->dim == ISL_SURF_DIM_3D) {
1866      assert(info->view->base_array_layer + info->view->array_len <=
1867             info->surf->logical_level0_px.depth);
1868   } else {
1869      assert(info->view->base_array_layer + info->view->array_len <=
1870             info->surf->logical_level0_px.array_len);
1871   }
1872
1873   isl_genX_call(dev, surf_fill_state_s, dev, state, info);
1874}
1875
1876void
1877isl_buffer_fill_state_s(const struct isl_device *dev, void *state,
1878                        const struct isl_buffer_fill_state_info *restrict info)
1879{
1880   isl_genX_call(dev, buffer_fill_state_s, state, info);
1881}
1882
1883void
1884isl_null_fill_state(const struct isl_device *dev, void *state,
1885                    struct isl_extent3d size)
1886{
1887   isl_genX_call(dev, null_fill_state, state, size);
1888}
1889
1890void
1891isl_emit_depth_stencil_hiz_s(const struct isl_device *dev, void *batch,
1892                             const struct isl_depth_stencil_hiz_emit_info *restrict info)
1893{
1894   if (info->depth_surf && info->stencil_surf) {
1895      if (!dev->info->has_hiz_and_separate_stencil) {
1896         assert(info->depth_surf == info->stencil_surf);
1897         assert(info->depth_address == info->stencil_address);
1898      }
1899      assert(info->depth_surf->dim == info->stencil_surf->dim);
1900   }
1901
1902   if (info->depth_surf) {
1903      assert((info->depth_surf->usage & ISL_SURF_USAGE_DEPTH_BIT));
1904      if (info->depth_surf->dim == ISL_SURF_DIM_3D) {
1905         assert(info->view->base_array_layer + info->view->array_len <=
1906                info->depth_surf->logical_level0_px.depth);
1907      } else {
1908         assert(info->view->base_array_layer + info->view->array_len <=
1909                info->depth_surf->logical_level0_px.array_len);
1910      }
1911   }
1912
1913   if (info->stencil_surf) {
1914      assert((info->stencil_surf->usage & ISL_SURF_USAGE_STENCIL_BIT));
1915      if (info->stencil_surf->dim == ISL_SURF_DIM_3D) {
1916         assert(info->view->base_array_layer + info->view->array_len <=
1917                info->stencil_surf->logical_level0_px.depth);
1918      } else {
1919         assert(info->view->base_array_layer + info->view->array_len <=
1920                info->stencil_surf->logical_level0_px.array_len);
1921      }
1922   }
1923
1924   isl_genX_call(dev, emit_depth_stencil_hiz_s, dev, batch, info);
1925}
1926
1927/**
1928 * A variant of isl_surf_get_image_offset_sa() specific to
1929 * ISL_DIM_LAYOUT_GEN4_2D.
1930 */
1931static void
1932get_image_offset_sa_gen4_2d(const struct isl_surf *surf,
1933                            uint32_t level, uint32_t logical_array_layer,
1934                            uint32_t *x_offset_sa,
1935                            uint32_t *y_offset_sa)
1936{
1937   assert(level < surf->levels);
1938   if (surf->dim == ISL_SURF_DIM_3D)
1939      assert(logical_array_layer < surf->logical_level0_px.depth);
1940   else
1941      assert(logical_array_layer < surf->logical_level0_px.array_len);
1942
1943   const struct isl_extent3d image_align_sa =
1944      isl_surf_get_image_alignment_sa(surf);
1945
1946   const uint32_t W0 = surf->phys_level0_sa.width;
1947   const uint32_t H0 = surf->phys_level0_sa.height;
1948
1949   const uint32_t phys_layer = logical_array_layer *
1950      (surf->msaa_layout == ISL_MSAA_LAYOUT_ARRAY ? surf->samples : 1);
1951
1952   uint32_t x = 0;
1953   uint32_t y = phys_layer * isl_surf_get_array_pitch_sa_rows(surf);
1954
1955   for (uint32_t l = 0; l < level; ++l) {
1956      if (l == 1) {
1957         uint32_t W = isl_minify(W0, l);
1958         x += isl_align_npot(W, image_align_sa.w);
1959      } else {
1960         uint32_t H = isl_minify(H0, l);
1961         y += isl_align_npot(H, image_align_sa.h);
1962      }
1963   }
1964
1965   *x_offset_sa = x;
1966   *y_offset_sa = y;
1967}
1968
1969/**
1970 * A variant of isl_surf_get_image_offset_sa() specific to
1971 * ISL_DIM_LAYOUT_GEN4_3D.
1972 */
1973static void
1974get_image_offset_sa_gen4_3d(const struct isl_surf *surf,
1975                            uint32_t level, uint32_t logical_z_offset_px,
1976                            uint32_t *x_offset_sa,
1977                            uint32_t *y_offset_sa)
1978{
1979   assert(level < surf->levels);
1980   if (surf->dim == ISL_SURF_DIM_3D) {
1981      assert(surf->phys_level0_sa.array_len == 1);
1982      assert(logical_z_offset_px < isl_minify(surf->phys_level0_sa.depth, level));
1983   } else {
1984      assert(surf->dim == ISL_SURF_DIM_2D);
1985      assert(surf->usage & ISL_SURF_USAGE_CUBE_BIT);
1986      assert(surf->phys_level0_sa.array_len == 6);
1987      assert(logical_z_offset_px < surf->phys_level0_sa.array_len);
1988   }
1989
1990   const struct isl_extent3d image_align_sa =
1991      isl_surf_get_image_alignment_sa(surf);
1992
1993   const uint32_t W0 = surf->phys_level0_sa.width;
1994   const uint32_t H0 = surf->phys_level0_sa.height;
1995   const uint32_t D0 = surf->phys_level0_sa.depth;
1996   const uint32_t AL = surf->phys_level0_sa.array_len;
1997
1998   uint32_t x = 0;
1999   uint32_t y = 0;
2000
2001   for (uint32_t l = 0; l < level; ++l) {
2002      const uint32_t level_h = isl_align_npot(isl_minify(H0, l), image_align_sa.h);
2003      const uint32_t level_d =
2004         isl_align_npot(surf->dim == ISL_SURF_DIM_3D ? isl_minify(D0, l) : AL,
2005                        image_align_sa.d);
2006      const uint32_t max_layers_vert = isl_align(level_d, 1u << l) / (1u << l);
2007
2008      y += level_h * max_layers_vert;
2009   }
2010
2011   const uint32_t level_w = isl_align_npot(isl_minify(W0, level), image_align_sa.w);
2012   const uint32_t level_h = isl_align_npot(isl_minify(H0, level), image_align_sa.h);
2013   const uint32_t level_d =
2014      isl_align_npot(surf->dim == ISL_SURF_DIM_3D ? isl_minify(D0, level) : AL,
2015                     image_align_sa.d);
2016
2017   const uint32_t max_layers_horiz = MIN(level_d, 1u << level);
2018
2019   x += level_w * (logical_z_offset_px % max_layers_horiz);
2020   y += level_h * (logical_z_offset_px / max_layers_horiz);
2021
2022   *x_offset_sa = x;
2023   *y_offset_sa = y;
2024}
2025
2026static void
2027get_image_offset_sa_gen6_stencil_hiz(const struct isl_surf *surf,
2028                                     uint32_t level,
2029                                     uint32_t logical_array_layer,
2030                                     uint32_t *x_offset_sa,
2031                                     uint32_t *y_offset_sa)
2032{
2033   assert(level < surf->levels);
2034   assert(surf->logical_level0_px.depth == 1);
2035   assert(logical_array_layer < surf->logical_level0_px.array_len);
2036
2037   const struct isl_format_layout *fmtl = isl_format_get_layout(surf->format);
2038
2039   const struct isl_extent3d image_align_sa =
2040      isl_surf_get_image_alignment_sa(surf);
2041
2042   struct isl_tile_info tile_info;
2043   isl_tiling_get_info(surf->tiling, fmtl->bpb, &tile_info);
2044   const struct isl_extent2d tile_extent_sa = {
2045      .w = tile_info.logical_extent_el.w * fmtl->bw,
2046      .h = tile_info.logical_extent_el.h * fmtl->bh,
2047   };
2048   /* Tile size is a multiple of image alignment */
2049   assert(tile_extent_sa.w % image_align_sa.w == 0);
2050   assert(tile_extent_sa.h % image_align_sa.h == 0);
2051
2052   const uint32_t W0 = surf->phys_level0_sa.w;
2053   const uint32_t H0 = surf->phys_level0_sa.h;
2054
2055   /* Each image has the same height as LOD0 because the hardware thinks
2056    * everything is LOD0
2057    */
2058   const uint32_t H = isl_align(H0, image_align_sa.h);
2059
2060   /* Quick sanity check for consistency */
2061   if (surf->phys_level0_sa.array_len > 1)
2062      assert(surf->array_pitch_el_rows == isl_assert_div(H, fmtl->bh));
2063
2064   uint32_t x = 0, y = 0;
2065   for (uint32_t l = 0; l < level; ++l) {
2066      const uint32_t W = isl_minify(W0, l);
2067
2068      const uint32_t w = isl_align(W, tile_extent_sa.w);
2069      const uint32_t h = isl_align(H * surf->phys_level0_sa.a,
2070                                   tile_extent_sa.h);
2071
2072      if (l == 0) {
2073         y += h;
2074      } else {
2075         x += w;
2076      }
2077   }
2078
2079   y += H * logical_array_layer;
2080
2081   *x_offset_sa = x;
2082   *y_offset_sa = y;
2083}
2084
2085/**
2086 * A variant of isl_surf_get_image_offset_sa() specific to
2087 * ISL_DIM_LAYOUT_GEN9_1D.
2088 */
2089static void
2090get_image_offset_sa_gen9_1d(const struct isl_surf *surf,
2091                            uint32_t level, uint32_t layer,
2092                            uint32_t *x_offset_sa,
2093                            uint32_t *y_offset_sa)
2094{
2095   assert(level < surf->levels);
2096   assert(layer < surf->phys_level0_sa.array_len);
2097   assert(surf->phys_level0_sa.height == 1);
2098   assert(surf->phys_level0_sa.depth == 1);
2099   assert(surf->samples == 1);
2100
2101   const uint32_t W0 = surf->phys_level0_sa.width;
2102   const struct isl_extent3d image_align_sa =
2103      isl_surf_get_image_alignment_sa(surf);
2104
2105   uint32_t x = 0;
2106
2107   for (uint32_t l = 0; l < level; ++l) {
2108      uint32_t W = isl_minify(W0, l);
2109      uint32_t w = isl_align_npot(W, image_align_sa.w);
2110
2111      x += w;
2112   }
2113
2114   *x_offset_sa = x;
2115   *y_offset_sa = layer * isl_surf_get_array_pitch_sa_rows(surf);
2116}
2117
2118/**
2119 * Calculate the offset, in units of surface samples, to a subimage in the
2120 * surface.
2121 *
2122 * @invariant level < surface levels
2123 * @invariant logical_array_layer < logical array length of surface
2124 * @invariant logical_z_offset_px < logical depth of surface at level
2125 */
2126void
2127isl_surf_get_image_offset_sa(const struct isl_surf *surf,
2128                             uint32_t level,
2129                             uint32_t logical_array_layer,
2130                             uint32_t logical_z_offset_px,
2131                             uint32_t *x_offset_sa,
2132                             uint32_t *y_offset_sa)
2133{
2134   assert(level < surf->levels);
2135   assert(logical_array_layer < surf->logical_level0_px.array_len);
2136   assert(logical_z_offset_px
2137          < isl_minify(surf->logical_level0_px.depth, level));
2138
2139   switch (surf->dim_layout) {
2140   case ISL_DIM_LAYOUT_GEN9_1D:
2141      get_image_offset_sa_gen9_1d(surf, level, logical_array_layer,
2142                                  x_offset_sa, y_offset_sa);
2143      break;
2144   case ISL_DIM_LAYOUT_GEN4_2D:
2145      get_image_offset_sa_gen4_2d(surf, level, logical_array_layer
2146                                  + logical_z_offset_px,
2147                                  x_offset_sa, y_offset_sa);
2148      break;
2149   case ISL_DIM_LAYOUT_GEN4_3D:
2150      get_image_offset_sa_gen4_3d(surf, level, logical_array_layer +
2151                                  logical_z_offset_px,
2152                                  x_offset_sa, y_offset_sa);
2153      break;
2154   case ISL_DIM_LAYOUT_GEN6_STENCIL_HIZ:
2155      get_image_offset_sa_gen6_stencil_hiz(surf, level, logical_array_layer +
2156                                           logical_z_offset_px,
2157                                           x_offset_sa, y_offset_sa);
2158      break;
2159
2160   default:
2161      unreachable("not reached");
2162   }
2163}
2164
2165void
2166isl_surf_get_image_offset_el(const struct isl_surf *surf,
2167                             uint32_t level,
2168                             uint32_t logical_array_layer,
2169                             uint32_t logical_z_offset_px,
2170                             uint32_t *x_offset_el,
2171                             uint32_t *y_offset_el)
2172{
2173   const struct isl_format_layout *fmtl = isl_format_get_layout(surf->format);
2174
2175   assert(level < surf->levels);
2176   assert(logical_array_layer < surf->logical_level0_px.array_len);
2177   assert(logical_z_offset_px
2178          < isl_minify(surf->logical_level0_px.depth, level));
2179
2180   uint32_t x_offset_sa, y_offset_sa;
2181   isl_surf_get_image_offset_sa(surf, level,
2182                                logical_array_layer,
2183                                logical_z_offset_px,
2184                                &x_offset_sa,
2185                                &y_offset_sa);
2186
2187   *x_offset_el = x_offset_sa / fmtl->bw;
2188   *y_offset_el = y_offset_sa / fmtl->bh;
2189}
2190
2191void
2192isl_surf_get_image_offset_B_tile_sa(const struct isl_surf *surf,
2193                                    uint32_t level,
2194                                    uint32_t logical_array_layer,
2195                                    uint32_t logical_z_offset_px,
2196                                    uint32_t *offset_B,
2197                                    uint32_t *x_offset_sa,
2198                                    uint32_t *y_offset_sa)
2199{
2200   const struct isl_format_layout *fmtl = isl_format_get_layout(surf->format);
2201
2202   uint32_t total_x_offset_el, total_y_offset_el;
2203   isl_surf_get_image_offset_el(surf, level, logical_array_layer,
2204                                logical_z_offset_px,
2205                                &total_x_offset_el,
2206                                &total_y_offset_el);
2207
2208   uint32_t x_offset_el, y_offset_el;
2209   isl_tiling_get_intratile_offset_el(surf->tiling, fmtl->bpb,
2210                                      surf->row_pitch_B,
2211                                      total_x_offset_el,
2212                                      total_y_offset_el,
2213                                      offset_B,
2214                                      &x_offset_el,
2215                                      &y_offset_el);
2216
2217   if (x_offset_sa) {
2218      *x_offset_sa = x_offset_el * fmtl->bw;
2219   } else {
2220      assert(x_offset_el == 0);
2221   }
2222
2223   if (y_offset_sa) {
2224      *y_offset_sa = y_offset_el * fmtl->bh;
2225   } else {
2226      assert(y_offset_el == 0);
2227   }
2228}
2229
2230void
2231isl_surf_get_image_surf(const struct isl_device *dev,
2232                        const struct isl_surf *surf,
2233                        uint32_t level,
2234                        uint32_t logical_array_layer,
2235                        uint32_t logical_z_offset_px,
2236                        struct isl_surf *image_surf,
2237                        uint32_t *offset_B,
2238                        uint32_t *x_offset_sa,
2239                        uint32_t *y_offset_sa)
2240{
2241   isl_surf_get_image_offset_B_tile_sa(surf,
2242                                       level,
2243                                       logical_array_layer,
2244                                       logical_z_offset_px,
2245                                       offset_B,
2246                                       x_offset_sa,
2247                                       y_offset_sa);
2248
2249   /* Even for cube maps there will be only single face, therefore drop the
2250    * corresponding flag if present.
2251    */
2252   const isl_surf_usage_flags_t usage =
2253      surf->usage & (~ISL_SURF_USAGE_CUBE_BIT);
2254
2255   bool ok UNUSED;
2256   ok = isl_surf_init(dev, image_surf,
2257                      .dim = ISL_SURF_DIM_2D,
2258                      .format = surf->format,
2259                      .width = isl_minify(surf->logical_level0_px.w, level),
2260                      .height = isl_minify(surf->logical_level0_px.h, level),
2261                      .depth = 1,
2262                      .levels = 1,
2263                      .array_len = 1,
2264                      .samples = surf->samples,
2265                      .row_pitch_B = surf->row_pitch_B,
2266                      .usage = usage,
2267                      .tiling_flags = (1 << surf->tiling));
2268   assert(ok);
2269}
2270
2271void
2272isl_tiling_get_intratile_offset_el(enum isl_tiling tiling,
2273                                   uint32_t bpb,
2274                                   uint32_t row_pitch_B,
2275                                   uint32_t total_x_offset_el,
2276                                   uint32_t total_y_offset_el,
2277                                   uint32_t *base_address_offset,
2278                                   uint32_t *x_offset_el,
2279                                   uint32_t *y_offset_el)
2280{
2281   if (tiling == ISL_TILING_LINEAR) {
2282      assert(bpb % 8 == 0);
2283      *base_address_offset = total_y_offset_el * row_pitch_B +
2284                             total_x_offset_el * (bpb / 8);
2285      *x_offset_el = 0;
2286      *y_offset_el = 0;
2287      return;
2288   }
2289
2290   struct isl_tile_info tile_info;
2291   isl_tiling_get_info(tiling, bpb, &tile_info);
2292
2293   assert(row_pitch_B % tile_info.phys_extent_B.width == 0);
2294
2295   /* For non-power-of-two formats, we need the address to be both tile and
2296    * element-aligned.  The easiest way to achieve this is to work with a tile
2297    * that is three times as wide as the regular tile.
2298    *
2299    * The tile info returned by get_tile_info has a logical size that is an
2300    * integer number of tile_info.format_bpb size elements.  To scale the
2301    * tile, we scale up the physical width and then treat the logical tile
2302    * size as if it has bpb size elements.
2303    */
2304   const uint32_t tile_el_scale = bpb / tile_info.format_bpb;
2305   tile_info.phys_extent_B.width *= tile_el_scale;
2306
2307   /* Compute the offset into the tile */
2308   *x_offset_el = total_x_offset_el % tile_info.logical_extent_el.w;
2309   *y_offset_el = total_y_offset_el % tile_info.logical_extent_el.h;
2310
2311   /* Compute the offset of the tile in units of whole tiles */
2312   uint32_t x_offset_tl = total_x_offset_el / tile_info.logical_extent_el.w;
2313   uint32_t y_offset_tl = total_y_offset_el / tile_info.logical_extent_el.h;
2314
2315   *base_address_offset =
2316      y_offset_tl * tile_info.phys_extent_B.h * row_pitch_B +
2317      x_offset_tl * tile_info.phys_extent_B.h * tile_info.phys_extent_B.w;
2318}
2319
2320uint32_t
2321isl_surf_get_depth_format(const struct isl_device *dev,
2322                          const struct isl_surf *surf)
2323{
2324   /* Support for separate stencil buffers began in gen5. Support for
2325    * interleaved depthstencil buffers ceased in gen7. The intermediate gens,
2326    * those that supported separate and interleaved stencil, were gen5 and
2327    * gen6.
2328    *
2329    * For a list of all available formats, see the Sandybridge PRM >> Volume
2330    * 2 Part 1: 3D/Media - 3D Pipeline >> 3DSTATE_DEPTH_BUFFER >> Surface
2331    * Format (p321).
2332    */
2333
2334   bool has_stencil = surf->usage & ISL_SURF_USAGE_STENCIL_BIT;
2335
2336   assert(surf->usage & ISL_SURF_USAGE_DEPTH_BIT);
2337
2338   if (has_stencil)
2339      assert(ISL_DEV_GEN(dev) < 7);
2340
2341   switch (surf->format) {
2342   default:
2343      unreachable("bad isl depth format");
2344   case ISL_FORMAT_R32_FLOAT_X8X24_TYPELESS:
2345      assert(ISL_DEV_GEN(dev) < 7);
2346      return 0; /* D32_FLOAT_S8X24_UINT */
2347   case ISL_FORMAT_R32_FLOAT:
2348      assert(!has_stencil);
2349      return 1; /* D32_FLOAT */
2350   case ISL_FORMAT_R24_UNORM_X8_TYPELESS:
2351      if (has_stencil) {
2352         assert(ISL_DEV_GEN(dev) < 7);
2353         return 2; /* D24_UNORM_S8_UINT */
2354      } else {
2355         assert(ISL_DEV_GEN(dev) >= 5);
2356         return 3; /* D24_UNORM_X8_UINT */
2357      }
2358   case ISL_FORMAT_R16_UNORM:
2359      assert(!has_stencil);
2360      return 5; /* D16_UNORM */
2361   }
2362}
2363
2364bool
2365isl_swizzle_supports_rendering(const struct gen_device_info *devinfo,
2366                               struct isl_swizzle swizzle)
2367{
2368   if (devinfo->is_haswell) {
2369      /* From the Haswell PRM,
2370       * RENDER_SURFACE_STATE::Shader Channel Select Red
2371       *
2372       *    "The Shader channel selects also define which shader channels are
2373       *    written to which surface channel. If the Shader channel select is
2374       *    SCS_ZERO or SCS_ONE then it is not written to the surface. If the
2375       *    shader channel select is SCS_RED it is written to the surface red
2376       *    channel and so on. If more than one shader channel select is set
2377       *    to the same surface channel only the first shader channel in RGBA
2378       *    order will be written."
2379       */
2380      return true;
2381   } else if (devinfo->gen <= 7) {
2382      /* Ivy Bridge and early doesn't have any swizzling */
2383      return isl_swizzle_is_identity(swizzle);
2384   } else {
2385      /* From the Sky Lake PRM Vol. 2d,
2386       * RENDER_SURFACE_STATE::Shader Channel Select Red
2387       *
2388       *    "For Render Target, Red, Green and Blue Shader Channel Selects
2389       *    MUST be such that only valid components can be swapped i.e. only
2390       *    change the order of components in the pixel. Any other values for
2391       *    these Shader Channel Select fields are not valid for Render
2392       *    Targets. This also means that there MUST not be multiple shader
2393       *    channels mapped to the same RT channel."
2394       *
2395       * From the Sky Lake PRM Vol. 2d,
2396       * RENDER_SURFACE_STATE::Shader Channel Select Alpha
2397       *
2398       *    "For Render Target, this field MUST be programmed to
2399       *    value = SCS_ALPHA."
2400       */
2401      return (swizzle.r == ISL_CHANNEL_SELECT_RED ||
2402              swizzle.r == ISL_CHANNEL_SELECT_GREEN ||
2403              swizzle.r == ISL_CHANNEL_SELECT_BLUE) &&
2404             (swizzle.g == ISL_CHANNEL_SELECT_RED ||
2405              swizzle.g == ISL_CHANNEL_SELECT_GREEN ||
2406              swizzle.g == ISL_CHANNEL_SELECT_BLUE) &&
2407             (swizzle.b == ISL_CHANNEL_SELECT_RED ||
2408              swizzle.b == ISL_CHANNEL_SELECT_GREEN ||
2409              swizzle.b == ISL_CHANNEL_SELECT_BLUE) &&
2410             swizzle.r != swizzle.g &&
2411             swizzle.r != swizzle.b &&
2412             swizzle.g != swizzle.b &&
2413             swizzle.a == ISL_CHANNEL_SELECT_ALPHA;
2414   }
2415}
2416
2417static enum isl_channel_select
2418swizzle_select(enum isl_channel_select chan, struct isl_swizzle swizzle)
2419{
2420   switch (chan) {
2421   case ISL_CHANNEL_SELECT_ZERO:
2422   case ISL_CHANNEL_SELECT_ONE:
2423      return chan;
2424   case ISL_CHANNEL_SELECT_RED:
2425      return swizzle.r;
2426   case ISL_CHANNEL_SELECT_GREEN:
2427      return swizzle.g;
2428   case ISL_CHANNEL_SELECT_BLUE:
2429      return swizzle.b;
2430   case ISL_CHANNEL_SELECT_ALPHA:
2431      return swizzle.a;
2432   default:
2433      unreachable("Invalid swizzle component");
2434   }
2435}
2436
2437/**
2438 * Returns the single swizzle that is equivalent to applying the two given
2439 * swizzles in sequence.
2440 */
2441struct isl_swizzle
2442isl_swizzle_compose(struct isl_swizzle first, struct isl_swizzle second)
2443{
2444   return (struct isl_swizzle) {
2445      .r = swizzle_select(first.r, second),
2446      .g = swizzle_select(first.g, second),
2447      .b = swizzle_select(first.b, second),
2448      .a = swizzle_select(first.a, second),
2449   };
2450}
2451
2452/**
2453 * Returns a swizzle that is the pseudo-inverse of this swizzle.
2454 */
2455struct isl_swizzle
2456isl_swizzle_invert(struct isl_swizzle swizzle)
2457{
2458   /* Default to zero for channels which do not show up in the swizzle */
2459   enum isl_channel_select chans[4] = {
2460      ISL_CHANNEL_SELECT_ZERO,
2461      ISL_CHANNEL_SELECT_ZERO,
2462      ISL_CHANNEL_SELECT_ZERO,
2463      ISL_CHANNEL_SELECT_ZERO,
2464   };
2465
2466   /* We go in ABGR order so that, if there are any duplicates, the first one
2467    * is taken if you look at it in RGBA order.  This is what Haswell hardware
2468    * does for render target swizzles.
2469    */
2470   if ((unsigned)(swizzle.a - ISL_CHANNEL_SELECT_RED) < 4)
2471      chans[swizzle.a - ISL_CHANNEL_SELECT_RED] = ISL_CHANNEL_SELECT_ALPHA;
2472   if ((unsigned)(swizzle.b - ISL_CHANNEL_SELECT_RED) < 4)
2473      chans[swizzle.b - ISL_CHANNEL_SELECT_RED] = ISL_CHANNEL_SELECT_BLUE;
2474   if ((unsigned)(swizzle.g - ISL_CHANNEL_SELECT_RED) < 4)
2475      chans[swizzle.g - ISL_CHANNEL_SELECT_RED] = ISL_CHANNEL_SELECT_GREEN;
2476   if ((unsigned)(swizzle.r - ISL_CHANNEL_SELECT_RED) < 4)
2477      chans[swizzle.r - ISL_CHANNEL_SELECT_RED] = ISL_CHANNEL_SELECT_RED;
2478
2479   return (struct isl_swizzle) { chans[0], chans[1], chans[2], chans[3] };
2480}
2481