isl.c revision 01e04c3f
1/*
2 * Copyright 2015 Intel Corporation
3 *
4 *  Permission is hereby granted, free of charge, to any person obtaining a
5 *  copy of this software and associated documentation files (the "Software"),
6 *  to deal in the Software without restriction, including without limitation
7 *  the rights to use, copy, modify, merge, publish, distribute, sublicense,
8 *  and/or sell copies of the Software, and to permit persons to whom the
9 *  Software is furnished to do so, subject to the following conditions:
10 *
11 *  The above copyright notice and this permission notice (including the next
12 *  paragraph) shall be included in all copies or substantial portions of the
13 *  Software.
14 *
15 *  THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
16 *  IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
17 *  FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT.  IN NO EVENT SHALL
18 *  THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
19 *  LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING
20 *  FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS
21 *  IN THE SOFTWARE.
22 */
23
24#include <assert.h>
25#include <stdarg.h>
26#include <stdio.h>
27
28#include "genxml/genX_bits.h"
29
30#include "isl.h"
31#include "isl_gen4.h"
32#include "isl_gen6.h"
33#include "isl_gen7.h"
34#include "isl_gen8.h"
35#include "isl_gen9.h"
36#include "isl_priv.h"
37
38void PRINTFLIKE(3, 4) UNUSED
39__isl_finishme(const char *file, int line, const char *fmt, ...)
40{
41   va_list ap;
42   char buf[512];
43
44   va_start(ap, fmt);
45   vsnprintf(buf, sizeof(buf), fmt, ap);
46   va_end(ap);
47
48   fprintf(stderr, "%s:%d: FINISHME: %s\n", file, line, buf);
49}
50
51void
52isl_device_init(struct isl_device *dev,
53                const struct gen_device_info *info,
54                bool has_bit6_swizzling)
55{
56   dev->info = info;
57   dev->use_separate_stencil = ISL_DEV_GEN(dev) >= 6;
58   dev->has_bit6_swizzling = has_bit6_swizzling;
59
60   /* The ISL_DEV macros may be defined in the CFLAGS, thus hardcoding some
61    * device properties at buildtime. Verify that the macros with the device
62    * properties chosen during runtime.
63    */
64   ISL_DEV_GEN_SANITIZE(dev);
65   ISL_DEV_USE_SEPARATE_STENCIL_SANITIZE(dev);
66
67   /* Did we break hiz or stencil? */
68   if (ISL_DEV_USE_SEPARATE_STENCIL(dev))
69      assert(info->has_hiz_and_separate_stencil);
70   if (info->must_use_separate_stencil)
71      assert(ISL_DEV_USE_SEPARATE_STENCIL(dev));
72
73   dev->ss.size = RENDER_SURFACE_STATE_length(info) * 4;
74   dev->ss.align = isl_align(dev->ss.size, 32);
75
76   dev->ss.clear_color_state_size = CLEAR_COLOR_length(info) * 4;
77   dev->ss.clear_color_state_offset =
78      RENDER_SURFACE_STATE_ClearValueAddress_start(info) / 32 * 4;
79
80   dev->ss.clear_value_size =
81      isl_align(RENDER_SURFACE_STATE_RedClearColor_bits(info) +
82                RENDER_SURFACE_STATE_GreenClearColor_bits(info) +
83                RENDER_SURFACE_STATE_BlueClearColor_bits(info) +
84                RENDER_SURFACE_STATE_AlphaClearColor_bits(info), 32) / 8;
85
86   dev->ss.clear_value_offset =
87      RENDER_SURFACE_STATE_RedClearColor_start(info) / 32 * 4;
88
89   assert(RENDER_SURFACE_STATE_SurfaceBaseAddress_start(info) % 8 == 0);
90   dev->ss.addr_offset =
91      RENDER_SURFACE_STATE_SurfaceBaseAddress_start(info) / 8;
92
93   /* The "Auxiliary Surface Base Address" field starts a bit higher up
94    * because the bottom 12 bits are used for other things.  Round down to
95    * the nearest dword before.
96    */
97   dev->ss.aux_addr_offset =
98      (RENDER_SURFACE_STATE_AuxiliarySurfaceBaseAddress_start(info) & ~31) / 8;
99
100   dev->ds.size = _3DSTATE_DEPTH_BUFFER_length(info) * 4;
101   assert(_3DSTATE_DEPTH_BUFFER_SurfaceBaseAddress_start(info) % 8 == 0);
102   dev->ds.depth_offset =
103      _3DSTATE_DEPTH_BUFFER_SurfaceBaseAddress_start(info) / 8;
104
105   if (dev->use_separate_stencil) {
106      dev->ds.size += _3DSTATE_STENCIL_BUFFER_length(info) * 4 +
107                      _3DSTATE_HIER_DEPTH_BUFFER_length(info) * 4 +
108                      _3DSTATE_CLEAR_PARAMS_length(info) * 4;
109
110      assert(_3DSTATE_STENCIL_BUFFER_SurfaceBaseAddress_start(info) % 8 == 0);
111      dev->ds.stencil_offset =
112         _3DSTATE_DEPTH_BUFFER_length(info) * 4 +
113         _3DSTATE_STENCIL_BUFFER_SurfaceBaseAddress_start(info) / 8;
114
115      assert(_3DSTATE_HIER_DEPTH_BUFFER_SurfaceBaseAddress_start(info) % 8 == 0);
116      dev->ds.hiz_offset =
117         _3DSTATE_DEPTH_BUFFER_length(info) * 4 +
118         _3DSTATE_STENCIL_BUFFER_length(info) * 4 +
119         _3DSTATE_HIER_DEPTH_BUFFER_SurfaceBaseAddress_start(info) / 8;
120   } else {
121      dev->ds.stencil_offset = 0;
122      dev->ds.hiz_offset = 0;
123   }
124}
125
126/**
127 * @brief Query the set of multisamples supported by the device.
128 *
129 * This function always returns non-zero, as ISL_SAMPLE_COUNT_1_BIT is always
130 * supported.
131 */
132isl_sample_count_mask_t ATTRIBUTE_CONST
133isl_device_get_sample_counts(struct isl_device *dev)
134{
135   if (ISL_DEV_GEN(dev) >= 9) {
136      return ISL_SAMPLE_COUNT_1_BIT |
137             ISL_SAMPLE_COUNT_2_BIT |
138             ISL_SAMPLE_COUNT_4_BIT |
139             ISL_SAMPLE_COUNT_8_BIT |
140             ISL_SAMPLE_COUNT_16_BIT;
141   } else if (ISL_DEV_GEN(dev) >= 8) {
142      return ISL_SAMPLE_COUNT_1_BIT |
143             ISL_SAMPLE_COUNT_2_BIT |
144             ISL_SAMPLE_COUNT_4_BIT |
145             ISL_SAMPLE_COUNT_8_BIT;
146   } else if (ISL_DEV_GEN(dev) >= 7) {
147      return ISL_SAMPLE_COUNT_1_BIT |
148             ISL_SAMPLE_COUNT_4_BIT |
149             ISL_SAMPLE_COUNT_8_BIT;
150   } else if (ISL_DEV_GEN(dev) >= 6) {
151      return ISL_SAMPLE_COUNT_1_BIT |
152             ISL_SAMPLE_COUNT_4_BIT;
153   } else {
154      return ISL_SAMPLE_COUNT_1_BIT;
155   }
156}
157
158/**
159 * @param[out] info is written only on success
160 */
161static void
162isl_tiling_get_info(enum isl_tiling tiling,
163                    uint32_t format_bpb,
164                    struct isl_tile_info *tile_info)
165{
166   const uint32_t bs = format_bpb / 8;
167   struct isl_extent2d logical_el, phys_B;
168
169   if (tiling != ISL_TILING_LINEAR && !isl_is_pow2(format_bpb)) {
170      /* It is possible to have non-power-of-two formats in a tiled buffer.
171       * The easiest way to handle this is to treat the tile as if it is three
172       * times as wide.  This way no pixel will ever cross a tile boundary.
173       * This really only works on legacy X and Y tiling formats.
174       */
175      assert(tiling == ISL_TILING_X || tiling == ISL_TILING_Y0);
176      assert(bs % 3 == 0 && isl_is_pow2(format_bpb / 3));
177      isl_tiling_get_info(tiling, format_bpb / 3, tile_info);
178      return;
179   }
180
181   switch (tiling) {
182   case ISL_TILING_LINEAR:
183      assert(bs > 0);
184      logical_el = isl_extent2d(1, 1);
185      phys_B = isl_extent2d(bs, 1);
186      break;
187
188   case ISL_TILING_X:
189      assert(bs > 0);
190      logical_el = isl_extent2d(512 / bs, 8);
191      phys_B = isl_extent2d(512, 8);
192      break;
193
194   case ISL_TILING_Y0:
195      assert(bs > 0);
196      logical_el = isl_extent2d(128 / bs, 32);
197      phys_B = isl_extent2d(128, 32);
198      break;
199
200   case ISL_TILING_W:
201      assert(bs == 1);
202      logical_el = isl_extent2d(64, 64);
203      /* From the Broadwell PRM Vol 2d, RENDER_SURFACE_STATE::SurfacePitch:
204       *
205       *    "If the surface is a stencil buffer (and thus has Tile Mode set
206       *    to TILEMODE_WMAJOR), the pitch must be set to 2x the value
207       *    computed based on width, as the stencil buffer is stored with two
208       *    rows interleaved."
209       *
210       * This, together with the fact that stencil buffers are referred to as
211       * being Y-tiled in the PRMs for older hardware implies that the
212       * physical size of a W-tile is actually the same as for a Y-tile.
213       */
214      phys_B = isl_extent2d(128, 32);
215      break;
216
217   case ISL_TILING_Yf:
218   case ISL_TILING_Ys: {
219      bool is_Ys = tiling == ISL_TILING_Ys;
220
221      assert(bs > 0);
222      unsigned width = 1 << (6 + (ffs(bs) / 2) + (2 * is_Ys));
223      unsigned height = 1 << (6 - (ffs(bs) / 2) + (2 * is_Ys));
224
225      logical_el = isl_extent2d(width / bs, height);
226      phys_B = isl_extent2d(width, height);
227      break;
228   }
229
230   case ISL_TILING_HIZ:
231      /* HiZ buffers are required to have ISL_FORMAT_HIZ which is an 8x4
232       * 128bpb format.  The tiling has the same physical dimensions as
233       * Y-tiling but actually has two HiZ columns per Y-tiled column.
234       */
235      assert(bs == 16);
236      logical_el = isl_extent2d(16, 16);
237      phys_B = isl_extent2d(128, 32);
238      break;
239
240   case ISL_TILING_CCS:
241      /* CCS surfaces are required to have one of the GENX_CCS_* formats which
242       * have a block size of 1 or 2 bits per block and each CCS element
243       * corresponds to one cache-line pair in the main surface.  From the Sky
244       * Lake PRM Vol. 12 in the section on planes:
245       *
246       *    "The Color Control Surface (CCS) contains the compression status
247       *    of the cache-line pairs. The compression state of the cache-line
248       *    pair is specified by 2 bits in the CCS.  Each CCS cache-line
249       *    represents an area on the main surface of 16x16 sets of 128 byte
250       *    Y-tiled cache-line-pairs. CCS is always Y tiled."
251       *
252       * The CCS being Y-tiled implies that it's an 8x8 grid of cache-lines.
253       * Since each cache line corresponds to a 16x16 set of cache-line pairs,
254       * that yields total tile area of 128x128 cache-line pairs or CCS
255       * elements.  On older hardware, each CCS element is 1 bit and the tile
256       * is 128x256 elements.
257       */
258      assert(format_bpb == 1 || format_bpb == 2);
259      logical_el = isl_extent2d(128, 256 / format_bpb);
260      phys_B = isl_extent2d(128, 32);
261      break;
262
263   default:
264      unreachable("not reached");
265   } /* end switch */
266
267   *tile_info = (struct isl_tile_info) {
268      .tiling = tiling,
269      .format_bpb = format_bpb,
270      .logical_extent_el = logical_el,
271      .phys_extent_B = phys_B,
272   };
273}
274
275bool
276isl_color_value_is_zero(union isl_color_value value,
277                        enum isl_format format)
278{
279   const struct isl_format_layout *fmtl = isl_format_get_layout(format);
280
281#define RETURN_FALSE_IF_NOT_0(c, i) \
282   if (fmtl->channels.c.bits && value.u32[i] != 0) \
283      return false
284
285   RETURN_FALSE_IF_NOT_0(r, 0);
286   RETURN_FALSE_IF_NOT_0(g, 1);
287   RETURN_FALSE_IF_NOT_0(b, 2);
288   RETURN_FALSE_IF_NOT_0(a, 3);
289
290#undef RETURN_FALSE_IF_NOT_0
291
292   return true;
293}
294
295bool
296isl_color_value_is_zero_one(union isl_color_value value,
297                            enum isl_format format)
298{
299   const struct isl_format_layout *fmtl = isl_format_get_layout(format);
300
301#define RETURN_FALSE_IF_NOT_0_1(c, i, field) \
302   if (fmtl->channels.c.bits && value.field[i] != 0 && value.field[i] != 1) \
303      return false
304
305   if (isl_format_has_int_channel(format)) {
306      RETURN_FALSE_IF_NOT_0_1(r, 0, u32);
307      RETURN_FALSE_IF_NOT_0_1(g, 1, u32);
308      RETURN_FALSE_IF_NOT_0_1(b, 2, u32);
309      RETURN_FALSE_IF_NOT_0_1(a, 3, u32);
310   } else {
311      RETURN_FALSE_IF_NOT_0_1(r, 0, f32);
312      RETURN_FALSE_IF_NOT_0_1(g, 1, f32);
313      RETURN_FALSE_IF_NOT_0_1(b, 2, f32);
314      RETURN_FALSE_IF_NOT_0_1(a, 3, f32);
315   }
316
317#undef RETURN_FALSE_IF_NOT_0_1
318
319   return true;
320}
321
322/**
323 * @param[out] tiling is set only on success
324 */
325static bool
326isl_surf_choose_tiling(const struct isl_device *dev,
327                       const struct isl_surf_init_info *restrict info,
328                       enum isl_tiling *tiling)
329{
330   isl_tiling_flags_t tiling_flags = info->tiling_flags;
331
332   /* HiZ surfaces always use the HiZ tiling */
333   if (info->usage & ISL_SURF_USAGE_HIZ_BIT) {
334      assert(info->format == ISL_FORMAT_HIZ);
335      assert(tiling_flags == ISL_TILING_HIZ_BIT);
336      *tiling = ISL_TILING_HIZ;
337      return true;
338   }
339
340   /* CCS surfaces always use the CCS tiling */
341   if (info->usage & ISL_SURF_USAGE_CCS_BIT) {
342      assert(isl_format_get_layout(info->format)->txc == ISL_TXC_CCS);
343      assert(tiling_flags == ISL_TILING_CCS_BIT);
344      *tiling = ISL_TILING_CCS;
345      return true;
346   }
347
348   if (ISL_DEV_GEN(dev) >= 6) {
349      isl_gen6_filter_tiling(dev, info, &tiling_flags);
350   } else {
351      isl_gen4_filter_tiling(dev, info, &tiling_flags);
352   }
353
354   #define CHOOSE(__tiling) \
355      do { \
356         if (tiling_flags & (1u << (__tiling))) { \
357            *tiling = (__tiling); \
358            return true; \
359          } \
360      } while (0)
361
362   /* Of the tiling modes remaining, choose the one that offers the best
363    * performance.
364    */
365
366   if (info->dim == ISL_SURF_DIM_1D) {
367      /* Prefer linear for 1D surfaces because they do not benefit from
368       * tiling. To the contrary, tiling leads to wasted memory and poor
369       * memory locality due to the swizzling and alignment restrictions
370       * required in tiled surfaces.
371       */
372      CHOOSE(ISL_TILING_LINEAR);
373   }
374
375   CHOOSE(ISL_TILING_Ys);
376   CHOOSE(ISL_TILING_Yf);
377   CHOOSE(ISL_TILING_Y0);
378   CHOOSE(ISL_TILING_X);
379   CHOOSE(ISL_TILING_W);
380   CHOOSE(ISL_TILING_LINEAR);
381
382   #undef CHOOSE
383
384   /* No tiling mode accomodates the inputs. */
385   return false;
386}
387
388static bool
389isl_choose_msaa_layout(const struct isl_device *dev,
390                 const struct isl_surf_init_info *info,
391                 enum isl_tiling tiling,
392                 enum isl_msaa_layout *msaa_layout)
393{
394   if (ISL_DEV_GEN(dev) >= 8) {
395      return isl_gen8_choose_msaa_layout(dev, info, tiling, msaa_layout);
396   } else if (ISL_DEV_GEN(dev) >= 7) {
397      return isl_gen7_choose_msaa_layout(dev, info, tiling, msaa_layout);
398   } else if (ISL_DEV_GEN(dev) >= 6) {
399      return isl_gen6_choose_msaa_layout(dev, info, tiling, msaa_layout);
400   } else {
401      return isl_gen4_choose_msaa_layout(dev, info, tiling, msaa_layout);
402   }
403}
404
405struct isl_extent2d
406isl_get_interleaved_msaa_px_size_sa(uint32_t samples)
407{
408   assert(isl_is_pow2(samples));
409
410   /* From the Broadwell PRM >> Volume 5: Memory Views >> Computing Mip Level
411    * Sizes (p133):
412    *
413    *    If the surface is multisampled and it is a depth or stencil surface
414    *    or Multisampled Surface StorageFormat in SURFACE_STATE is
415    *    MSFMT_DEPTH_STENCIL, W_L and H_L must be adjusted as follows before
416    *    proceeding: [...]
417    */
418   return (struct isl_extent2d) {
419      .width = 1 << ((ffs(samples) - 0) / 2),
420      .height = 1 << ((ffs(samples) - 1) / 2),
421   };
422}
423
424static void
425isl_msaa_interleaved_scale_px_to_sa(uint32_t samples,
426                                    uint32_t *width, uint32_t *height)
427{
428   const struct isl_extent2d px_size_sa =
429      isl_get_interleaved_msaa_px_size_sa(samples);
430
431   if (width)
432      *width = isl_align(*width, 2) * px_size_sa.width;
433   if (height)
434      *height = isl_align(*height, 2) * px_size_sa.height;
435}
436
437static enum isl_array_pitch_span
438isl_choose_array_pitch_span(const struct isl_device *dev,
439                            const struct isl_surf_init_info *restrict info,
440                            enum isl_dim_layout dim_layout,
441                            const struct isl_extent4d *phys_level0_sa)
442{
443   switch (dim_layout) {
444   case ISL_DIM_LAYOUT_GEN9_1D:
445   case ISL_DIM_LAYOUT_GEN4_2D:
446      if (ISL_DEV_GEN(dev) >= 8) {
447         /* QPitch becomes programmable in Broadwell. So choose the
448          * most compact QPitch possible in order to conserve memory.
449          *
450          * From the Broadwell PRM >> Volume 2d: Command Reference: Structures
451          * >> RENDER_SURFACE_STATE Surface QPitch (p325):
452          *
453          *    - Software must ensure that this field is set to a value
454          *      sufficiently large such that the array slices in the surface
455          *      do not overlap. Refer to the Memory Data Formats section for
456          *      information on how surfaces are stored in memory.
457          *
458          *    - This field specifies the distance in rows between array
459          *      slices.  It is used only in the following cases:
460          *
461          *          - Surface Array is enabled OR
462          *          - Number of Mulitsamples is not NUMSAMPLES_1 and
463          *            Multisampled Surface Storage Format set to MSFMT_MSS OR
464          *          - Surface Type is SURFTYPE_CUBE
465          */
466         return ISL_ARRAY_PITCH_SPAN_COMPACT;
467      } else if (ISL_DEV_GEN(dev) >= 7) {
468         /* Note that Ivybridge introduces
469          * RENDER_SURFACE_STATE.SurfaceArraySpacing, which provides the
470          * driver more control over the QPitch.
471          */
472
473         if (phys_level0_sa->array_len == 1) {
474            /* The hardware will never use the QPitch. So choose the most
475             * compact QPitch possible in order to conserve memory.
476             */
477            return ISL_ARRAY_PITCH_SPAN_COMPACT;
478         }
479
480         if (isl_surf_usage_is_depth_or_stencil(info->usage) ||
481             (info->usage & ISL_SURF_USAGE_HIZ_BIT)) {
482            /* From the Ivybridge PRM >> Volume 1 Part 1: Graphics Core >>
483             * Section 6.18.4.7: Surface Arrays (p112):
484             *
485             *    If Surface Array Spacing is set to ARYSPC_FULL (note that
486             *    the depth buffer and stencil buffer have an implied value of
487             *    ARYSPC_FULL):
488             */
489            return ISL_ARRAY_PITCH_SPAN_FULL;
490         }
491
492         if (info->levels == 1) {
493            /* We are able to set RENDER_SURFACE_STATE.SurfaceArraySpacing
494             * to ARYSPC_LOD0.
495             */
496            return ISL_ARRAY_PITCH_SPAN_COMPACT;
497         }
498
499         return ISL_ARRAY_PITCH_SPAN_FULL;
500      } else if ((ISL_DEV_GEN(dev) == 5 || ISL_DEV_GEN(dev) == 6) &&
501                 ISL_DEV_USE_SEPARATE_STENCIL(dev) &&
502                 isl_surf_usage_is_stencil(info->usage)) {
503         /* [ILK-SNB] Errata from the Sandy Bridge PRM >> Volume 4 Part 1:
504          * Graphics Core >> Section 7.18.3.7: Surface Arrays:
505          *
506          *    The separate stencil buffer does not support mip mapping, thus
507          *    the storage for LODs other than LOD 0 is not needed.
508          */
509         assert(info->levels == 1);
510         return ISL_ARRAY_PITCH_SPAN_COMPACT;
511      } else {
512         if ((ISL_DEV_GEN(dev) == 5 || ISL_DEV_GEN(dev) == 6) &&
513             ISL_DEV_USE_SEPARATE_STENCIL(dev) &&
514             isl_surf_usage_is_stencil(info->usage)) {
515            /* [ILK-SNB] Errata from the Sandy Bridge PRM >> Volume 4 Part 1:
516             * Graphics Core >> Section 7.18.3.7: Surface Arrays:
517             *
518             *    The separate stencil buffer does not support mip mapping,
519             *    thus the storage for LODs other than LOD 0 is not needed.
520             */
521            assert(info->levels == 1);
522            assert(phys_level0_sa->array_len == 1);
523            return ISL_ARRAY_PITCH_SPAN_COMPACT;
524         }
525
526         if (phys_level0_sa->array_len == 1) {
527            /* The hardware will never use the QPitch. So choose the most
528             * compact QPitch possible in order to conserve memory.
529             */
530            return ISL_ARRAY_PITCH_SPAN_COMPACT;
531         }
532
533         return ISL_ARRAY_PITCH_SPAN_FULL;
534      }
535
536   case ISL_DIM_LAYOUT_GEN4_3D:
537      /* The hardware will never use the QPitch. So choose the most
538       * compact QPitch possible in order to conserve memory.
539       */
540      return ISL_ARRAY_PITCH_SPAN_COMPACT;
541
542   case ISL_DIM_LAYOUT_GEN6_STENCIL_HIZ:
543      /* Each array image in the gen6 stencil of HiZ surface is compact in the
544       * sense that every LOD is a compact array of the same size as LOD0.
545       */
546      return ISL_ARRAY_PITCH_SPAN_COMPACT;
547   }
548
549   unreachable("bad isl_dim_layout");
550   return ISL_ARRAY_PITCH_SPAN_FULL;
551}
552
553static void
554isl_choose_image_alignment_el(const struct isl_device *dev,
555                              const struct isl_surf_init_info *restrict info,
556                              enum isl_tiling tiling,
557                              enum isl_dim_layout dim_layout,
558                              enum isl_msaa_layout msaa_layout,
559                              struct isl_extent3d *image_align_el)
560{
561   const struct isl_format_layout *fmtl = isl_format_get_layout(info->format);
562   if (fmtl->txc == ISL_TXC_MCS) {
563      assert(tiling == ISL_TILING_Y0);
564
565      /*
566       * IvyBrigde PRM Vol 2, Part 1, "11.7 MCS Buffer for Render Target(s)":
567       *
568       * Height, width, and layout of MCS buffer in this case must match with
569       * Render Target height, width, and layout. MCS buffer is tiledY.
570       *
571       * To avoid wasting memory, choose the smallest alignment possible:
572       * HALIGN_4 and VALIGN_4.
573       */
574      *image_align_el = isl_extent3d(4, 4, 1);
575      return;
576   } else if (info->format == ISL_FORMAT_HIZ) {
577      assert(ISL_DEV_GEN(dev) >= 6);
578      if (ISL_DEV_GEN(dev) == 6) {
579         /* HiZ surfaces on Sandy Bridge are packed tightly. */
580         *image_align_el = isl_extent3d(1, 1, 1);
581      } else {
582         /* On gen7+, HiZ surfaces are always aligned to 16x8 pixels in the
583          * primary surface which works out to 2x2 HiZ elments.
584          */
585         *image_align_el = isl_extent3d(2, 2, 1);
586      }
587      return;
588   }
589
590   if (ISL_DEV_GEN(dev) >= 9) {
591      isl_gen9_choose_image_alignment_el(dev, info, tiling, dim_layout,
592                                         msaa_layout, image_align_el);
593   } else if (ISL_DEV_GEN(dev) >= 8) {
594      isl_gen8_choose_image_alignment_el(dev, info, tiling, dim_layout,
595                                         msaa_layout, image_align_el);
596   } else if (ISL_DEV_GEN(dev) >= 7) {
597      isl_gen7_choose_image_alignment_el(dev, info, tiling, dim_layout,
598                                          msaa_layout, image_align_el);
599   } else if (ISL_DEV_GEN(dev) >= 6) {
600      isl_gen6_choose_image_alignment_el(dev, info, tiling, dim_layout,
601                                         msaa_layout, image_align_el);
602   } else {
603      isl_gen4_choose_image_alignment_el(dev, info, tiling, dim_layout,
604                                         msaa_layout, image_align_el);
605   }
606}
607
608static enum isl_dim_layout
609isl_surf_choose_dim_layout(const struct isl_device *dev,
610                           enum isl_surf_dim logical_dim,
611                           enum isl_tiling tiling,
612                           isl_surf_usage_flags_t usage)
613{
614   /* Sandy bridge needs a special layout for HiZ and stencil. */
615   if (ISL_DEV_GEN(dev) == 6 &&
616       (tiling == ISL_TILING_W || tiling == ISL_TILING_HIZ))
617      return ISL_DIM_LAYOUT_GEN6_STENCIL_HIZ;
618
619   if (ISL_DEV_GEN(dev) >= 9) {
620      switch (logical_dim) {
621      case ISL_SURF_DIM_1D:
622         /* From the Sky Lake PRM Vol. 5, "1D Surfaces":
623          *
624          *    One-dimensional surfaces use a tiling mode of linear.
625          *    Technically, they are not tiled resources, but the Tiled
626          *    Resource Mode field in RENDER_SURFACE_STATE is still used to
627          *    indicate the alignment requirements for this linear surface
628          *    (See 1D Alignment requirements for how 4K and 64KB Tiled
629          *    Resource Modes impact alignment). Alternatively, a 1D surface
630          *    can be defined as a 2D tiled surface (e.g. TileY or TileX) with
631          *    a height of 0.
632          *
633          * In other words, ISL_DIM_LAYOUT_GEN9_1D is only used for linear
634          * surfaces and, for tiled surfaces, ISL_DIM_LAYOUT_GEN4_2D is used.
635          */
636         if (tiling == ISL_TILING_LINEAR)
637            return ISL_DIM_LAYOUT_GEN9_1D;
638         else
639            return ISL_DIM_LAYOUT_GEN4_2D;
640      case ISL_SURF_DIM_2D:
641      case ISL_SURF_DIM_3D:
642         return ISL_DIM_LAYOUT_GEN4_2D;
643      }
644   } else {
645      switch (logical_dim) {
646      case ISL_SURF_DIM_1D:
647      case ISL_SURF_DIM_2D:
648         /* From the G45 PRM Vol. 1a, "6.17.4.1 Hardware Cube Map Layout":
649          *
650          * The cube face textures are stored in the same way as 3D surfaces
651          * are stored (see section 6.17.5 for details).  For cube surfaces,
652          * however, the depth is equal to the number of faces (always 6) and
653          * is not reduced for each MIP.
654          */
655         if (ISL_DEV_GEN(dev) == 4 && (usage & ISL_SURF_USAGE_CUBE_BIT))
656            return ISL_DIM_LAYOUT_GEN4_3D;
657
658         return ISL_DIM_LAYOUT_GEN4_2D;
659      case ISL_SURF_DIM_3D:
660         return ISL_DIM_LAYOUT_GEN4_3D;
661      }
662   }
663
664   unreachable("bad isl_surf_dim");
665   return ISL_DIM_LAYOUT_GEN4_2D;
666}
667
668/**
669 * Calculate the physical extent of the surface's first level, in units of
670 * surface samples. The result is aligned to the format's compression block.
671 */
672static void
673isl_calc_phys_level0_extent_sa(const struct isl_device *dev,
674                               const struct isl_surf_init_info *restrict info,
675                               enum isl_dim_layout dim_layout,
676                               enum isl_tiling tiling,
677                               enum isl_msaa_layout msaa_layout,
678                               struct isl_extent4d *phys_level0_sa)
679{
680   const struct isl_format_layout *fmtl = isl_format_get_layout(info->format);
681
682   if (isl_format_is_yuv(info->format))
683      isl_finishme("%s:%s: YUV format", __FILE__, __func__);
684
685   switch (info->dim) {
686   case ISL_SURF_DIM_1D:
687      assert(info->height == 1);
688      assert(info->depth == 1);
689      assert(info->samples == 1);
690
691      switch (dim_layout) {
692      case ISL_DIM_LAYOUT_GEN4_3D:
693         unreachable("bad isl_dim_layout");
694
695      case ISL_DIM_LAYOUT_GEN9_1D:
696      case ISL_DIM_LAYOUT_GEN4_2D:
697      case ISL_DIM_LAYOUT_GEN6_STENCIL_HIZ:
698         *phys_level0_sa = (struct isl_extent4d) {
699            .w = isl_align_npot(info->width, fmtl->bw),
700            .h = fmtl->bh,
701            .d = 1,
702            .a = info->array_len,
703         };
704         break;
705      }
706      break;
707
708   case ISL_SURF_DIM_2D:
709      if (ISL_DEV_GEN(dev) == 4 && (info->usage & ISL_SURF_USAGE_CUBE_BIT))
710         assert(dim_layout == ISL_DIM_LAYOUT_GEN4_3D);
711      else
712         assert(dim_layout == ISL_DIM_LAYOUT_GEN4_2D ||
713                dim_layout == ISL_DIM_LAYOUT_GEN6_STENCIL_HIZ);
714
715      if (tiling == ISL_TILING_Ys && info->samples > 1)
716         isl_finishme("%s:%s: multisample TileYs layout", __FILE__, __func__);
717
718      switch (msaa_layout) {
719      case ISL_MSAA_LAYOUT_NONE:
720         assert(info->depth == 1);
721         assert(info->samples == 1);
722
723         *phys_level0_sa = (struct isl_extent4d) {
724            .w = isl_align_npot(info->width, fmtl->bw),
725            .h = isl_align_npot(info->height, fmtl->bh),
726            .d = 1,
727            .a = info->array_len,
728         };
729         break;
730
731      case ISL_MSAA_LAYOUT_ARRAY:
732         assert(info->depth == 1);
733         assert(info->levels == 1);
734         assert(isl_format_supports_multisampling(dev->info, info->format));
735         assert(fmtl->bw == 1 && fmtl->bh == 1);
736
737         *phys_level0_sa = (struct isl_extent4d) {
738            .w = info->width,
739            .h = info->height,
740            .d = 1,
741            .a = info->array_len * info->samples,
742         };
743         break;
744
745      case ISL_MSAA_LAYOUT_INTERLEAVED:
746         assert(info->depth == 1);
747         assert(info->levels == 1);
748         assert(isl_format_supports_multisampling(dev->info, info->format));
749
750         *phys_level0_sa = (struct isl_extent4d) {
751            .w = info->width,
752            .h = info->height,
753            .d = 1,
754            .a = info->array_len,
755         };
756
757         isl_msaa_interleaved_scale_px_to_sa(info->samples,
758                                             &phys_level0_sa->w,
759                                             &phys_level0_sa->h);
760
761         phys_level0_sa->w = isl_align(phys_level0_sa->w, fmtl->bw);
762         phys_level0_sa->h = isl_align(phys_level0_sa->h, fmtl->bh);
763         break;
764      }
765      break;
766
767   case ISL_SURF_DIM_3D:
768      assert(info->array_len == 1);
769      assert(info->samples == 1);
770
771      if (fmtl->bd > 1) {
772         isl_finishme("%s:%s: compression block with depth > 1",
773                      __FILE__, __func__);
774      }
775
776      switch (dim_layout) {
777      case ISL_DIM_LAYOUT_GEN9_1D:
778      case ISL_DIM_LAYOUT_GEN6_STENCIL_HIZ:
779         unreachable("bad isl_dim_layout");
780
781      case ISL_DIM_LAYOUT_GEN4_2D:
782         assert(ISL_DEV_GEN(dev) >= 9);
783
784         *phys_level0_sa = (struct isl_extent4d) {
785            .w = isl_align_npot(info->width, fmtl->bw),
786            .h = isl_align_npot(info->height, fmtl->bh),
787            .d = 1,
788            .a = info->depth,
789         };
790         break;
791
792      case ISL_DIM_LAYOUT_GEN4_3D:
793         assert(ISL_DEV_GEN(dev) < 9);
794         *phys_level0_sa = (struct isl_extent4d) {
795            .w = isl_align(info->width, fmtl->bw),
796            .h = isl_align(info->height, fmtl->bh),
797            .d = info->depth,
798            .a = 1,
799         };
800         break;
801      }
802      break;
803   }
804}
805
806/**
807 * Calculate the pitch between physical array slices, in units of rows of
808 * surface elements.
809 */
810static uint32_t
811isl_calc_array_pitch_el_rows_gen4_2d(
812      const struct isl_device *dev,
813      const struct isl_surf_init_info *restrict info,
814      const struct isl_tile_info *tile_info,
815      const struct isl_extent3d *image_align_sa,
816      const struct isl_extent4d *phys_level0_sa,
817      enum isl_array_pitch_span array_pitch_span,
818      const struct isl_extent2d *phys_slice0_sa)
819{
820   const struct isl_format_layout *fmtl = isl_format_get_layout(info->format);
821   uint32_t pitch_sa_rows = 0;
822
823   switch (array_pitch_span) {
824   case ISL_ARRAY_PITCH_SPAN_COMPACT:
825      pitch_sa_rows = isl_align_npot(phys_slice0_sa->h, image_align_sa->h);
826      break;
827   case ISL_ARRAY_PITCH_SPAN_FULL: {
828      /* The QPitch equation is found in the Broadwell PRM >> Volume 5:
829       * Memory Views >> Common Surface Formats >> Surface Layout >> 2D
830       * Surfaces >> Surface Arrays.
831       */
832      uint32_t H0_sa = phys_level0_sa->h;
833      uint32_t H1_sa = isl_minify(H0_sa, 1);
834
835      uint32_t h0_sa = isl_align_npot(H0_sa, image_align_sa->h);
836      uint32_t h1_sa = isl_align_npot(H1_sa, image_align_sa->h);
837
838      uint32_t m;
839      if (ISL_DEV_GEN(dev) >= 7) {
840         /* The QPitch equation changed slightly in Ivybridge. */
841         m = 12;
842      } else {
843         m = 11;
844      }
845
846      pitch_sa_rows = h0_sa + h1_sa + (m * image_align_sa->h);
847
848      if (ISL_DEV_GEN(dev) == 6 && info->samples > 1 &&
849          (info->height % 4 == 1)) {
850         /* [SNB] Errata from the Sandy Bridge PRM >> Volume 4 Part 1:
851          * Graphics Core >> Section 7.18.3.7: Surface Arrays:
852          *
853          *    [SNB] Errata: Sampler MSAA Qpitch will be 4 greater than
854          *    the value calculated in the equation above , for every
855          *    other odd Surface Height starting from 1 i.e. 1,5,9,13.
856          *
857          * XXX(chadv): Is the errata natural corollary of the physical
858          * layout of interleaved samples?
859          */
860         pitch_sa_rows += 4;
861      }
862
863      pitch_sa_rows = isl_align_npot(pitch_sa_rows, fmtl->bh);
864      } /* end case */
865      break;
866   }
867
868   assert(pitch_sa_rows % fmtl->bh == 0);
869   uint32_t pitch_el_rows = pitch_sa_rows / fmtl->bh;
870
871   if (ISL_DEV_GEN(dev) >= 9 && fmtl->txc == ISL_TXC_CCS) {
872      /*
873       * From the Sky Lake PRM Vol 7, "MCS Buffer for Render Target(s)" (p. 632):
874       *
875       *    "Mip-mapped and arrayed surfaces are supported with MCS buffer
876       *    layout with these alignments in the RT space: Horizontal
877       *    Alignment = 128 and Vertical Alignment = 64."
878       *
879       * From the Sky Lake PRM Vol. 2d, "RENDER_SURFACE_STATE" (p. 435):
880       *
881       *    "For non-multisampled render target's CCS auxiliary surface,
882       *    QPitch must be computed with Horizontal Alignment = 128 and
883       *    Surface Vertical Alignment = 256. These alignments are only for
884       *    CCS buffer and not for associated render target."
885       *
886       * The first restriction is already handled by isl_choose_image_alignment_el
887       * but the second restriction, which is an extension of the first, only
888       * applies to qpitch and must be applied here.
889       */
890      assert(fmtl->bh == 4);
891      pitch_el_rows = isl_align(pitch_el_rows, 256 / 4);
892   }
893
894   if (ISL_DEV_GEN(dev) >= 9 &&
895       info->dim == ISL_SURF_DIM_3D &&
896       tile_info->tiling != ISL_TILING_LINEAR) {
897      /* From the Skylake BSpec >> RENDER_SURFACE_STATE >> Surface QPitch:
898       *
899       *    Tile Mode != Linear: This field must be set to an integer multiple
900       *    of the tile height
901       */
902      pitch_el_rows = isl_align(pitch_el_rows, tile_info->logical_extent_el.height);
903   }
904
905   return pitch_el_rows;
906}
907
908/**
909 * A variant of isl_calc_phys_slice0_extent_sa() specific to
910 * ISL_DIM_LAYOUT_GEN4_2D.
911 */
912static void
913isl_calc_phys_slice0_extent_sa_gen4_2d(
914      const struct isl_device *dev,
915      const struct isl_surf_init_info *restrict info,
916      enum isl_msaa_layout msaa_layout,
917      const struct isl_extent3d *image_align_sa,
918      const struct isl_extent4d *phys_level0_sa,
919      struct isl_extent2d *phys_slice0_sa)
920{
921   const struct isl_format_layout *fmtl = isl_format_get_layout(info->format);
922
923   assert(phys_level0_sa->depth == 1);
924
925   if (info->levels == 1) {
926      /* Do not pad the surface to the image alignment. Instead, pad it only
927       * to the pixel format's block alignment.
928       *
929       * For tiled surfaces, using a reduced alignment here avoids wasting CPU
930       * cycles on the below mipmap layout caluclations. Reducing the
931       * alignment here is safe because we later align the row pitch and array
932       * pitch to the tile boundary. It is safe even for
933       * ISL_MSAA_LAYOUT_INTERLEAVED, because phys_level0_sa is already scaled
934       * to accomodate the interleaved samples.
935       *
936       * For linear surfaces, reducing the alignment here permits us to later
937       * choose an arbitrary, non-aligned row pitch. If the surface backs
938       * a VkBuffer, then an arbitrary pitch may be needed to accomodate
939       * VkBufferImageCopy::bufferRowLength.
940       */
941      *phys_slice0_sa = (struct isl_extent2d) {
942         .w = isl_align_npot(phys_level0_sa->w, fmtl->bw),
943         .h = isl_align_npot(phys_level0_sa->h, fmtl->bh),
944      };
945      return;
946   }
947
948   uint32_t slice_top_w = 0;
949   uint32_t slice_bottom_w = 0;
950   uint32_t slice_left_h = 0;
951   uint32_t slice_right_h = 0;
952
953   uint32_t W0 = phys_level0_sa->w;
954   uint32_t H0 = phys_level0_sa->h;
955
956   for (uint32_t l = 0; l < info->levels; ++l) {
957      uint32_t W = isl_minify(W0, l);
958      uint32_t H = isl_minify(H0, l);
959
960      uint32_t w = isl_align_npot(W, image_align_sa->w);
961      uint32_t h = isl_align_npot(H, image_align_sa->h);
962
963      if (l == 0) {
964         slice_top_w = w;
965         slice_left_h = h;
966         slice_right_h = h;
967      } else if (l == 1) {
968         slice_bottom_w = w;
969         slice_left_h += h;
970      } else if (l == 2) {
971         slice_bottom_w += w;
972         slice_right_h += h;
973      } else {
974         slice_right_h += h;
975      }
976   }
977
978   *phys_slice0_sa = (struct isl_extent2d) {
979      .w = MAX(slice_top_w, slice_bottom_w),
980      .h = MAX(slice_left_h, slice_right_h),
981   };
982}
983
984static void
985isl_calc_phys_total_extent_el_gen4_2d(
986      const struct isl_device *dev,
987      const struct isl_surf_init_info *restrict info,
988      const struct isl_tile_info *tile_info,
989      enum isl_msaa_layout msaa_layout,
990      const struct isl_extent3d *image_align_sa,
991      const struct isl_extent4d *phys_level0_sa,
992      enum isl_array_pitch_span array_pitch_span,
993      uint32_t *array_pitch_el_rows,
994      struct isl_extent2d *total_extent_el)
995{
996   const struct isl_format_layout *fmtl = isl_format_get_layout(info->format);
997
998   struct isl_extent2d phys_slice0_sa;
999   isl_calc_phys_slice0_extent_sa_gen4_2d(dev, info, msaa_layout,
1000                                          image_align_sa, phys_level0_sa,
1001                                          &phys_slice0_sa);
1002   *array_pitch_el_rows =
1003      isl_calc_array_pitch_el_rows_gen4_2d(dev, info, tile_info,
1004                                           image_align_sa, phys_level0_sa,
1005                                           array_pitch_span,
1006                                           &phys_slice0_sa);
1007   *total_extent_el = (struct isl_extent2d) {
1008      .w = isl_assert_div(phys_slice0_sa.w, fmtl->bw),
1009      .h = *array_pitch_el_rows * (phys_level0_sa->array_len - 1) +
1010           isl_assert_div(phys_slice0_sa.h, fmtl->bh),
1011   };
1012}
1013
1014/**
1015 * A variant of isl_calc_phys_slice0_extent_sa() specific to
1016 * ISL_DIM_LAYOUT_GEN4_3D.
1017 */
1018static void
1019isl_calc_phys_total_extent_el_gen4_3d(
1020      const struct isl_device *dev,
1021      const struct isl_surf_init_info *restrict info,
1022      const struct isl_extent3d *image_align_sa,
1023      const struct isl_extent4d *phys_level0_sa,
1024      uint32_t *array_pitch_el_rows,
1025      struct isl_extent2d *phys_total_el)
1026{
1027   const struct isl_format_layout *fmtl = isl_format_get_layout(info->format);
1028
1029   assert(info->samples == 1);
1030
1031   if (info->dim != ISL_SURF_DIM_3D) {
1032      /* From the G45 PRM Vol. 1a, "6.17.4.1 Hardware Cube Map Layout":
1033       *
1034       * The cube face textures are stored in the same way as 3D surfaces
1035       * are stored (see section 6.17.5 for details).  For cube surfaces,
1036       * however, the depth is equal to the number of faces (always 6) and
1037       * is not reduced for each MIP.
1038       */
1039      assert(ISL_DEV_GEN(dev) == 4);
1040      assert(info->usage & ISL_SURF_USAGE_CUBE_BIT);
1041      assert(phys_level0_sa->array_len == 6);
1042   } else {
1043      assert(phys_level0_sa->array_len == 1);
1044   }
1045
1046   uint32_t total_w = 0;
1047   uint32_t total_h = 0;
1048
1049   uint32_t W0 = phys_level0_sa->w;
1050   uint32_t H0 = phys_level0_sa->h;
1051   uint32_t D0 = phys_level0_sa->d;
1052   uint32_t A0 = phys_level0_sa->a;
1053
1054   for (uint32_t l = 0; l < info->levels; ++l) {
1055      uint32_t level_w = isl_align_npot(isl_minify(W0, l), image_align_sa->w);
1056      uint32_t level_h = isl_align_npot(isl_minify(H0, l), image_align_sa->h);
1057      uint32_t level_d = info->dim == ISL_SURF_DIM_3D ? isl_minify(D0, l) : A0;
1058
1059      uint32_t max_layers_horiz = MIN(level_d, 1u << l);
1060      uint32_t max_layers_vert = isl_align(level_d, 1u << l) / (1u << l);
1061
1062      total_w = MAX(total_w, level_w * max_layers_horiz);
1063      total_h += level_h * max_layers_vert;
1064   }
1065
1066   /* GEN4_3D layouts don't really have an array pitch since each LOD has a
1067    * different number of horizontal and vertical layers.  We have to set it
1068    * to something, so at least make it true for LOD0.
1069    */
1070   *array_pitch_el_rows =
1071      isl_align_npot(phys_level0_sa->h, image_align_sa->h) / fmtl->bw;
1072   *phys_total_el = (struct isl_extent2d) {
1073      .w = isl_assert_div(total_w, fmtl->bw),
1074      .h = isl_assert_div(total_h, fmtl->bh),
1075   };
1076}
1077
1078/**
1079 * A variant of isl_calc_phys_slice0_extent_sa() specific to
1080 * ISL_DIM_LAYOUT_GEN6_STENCIL_HIZ.
1081 */
1082static void
1083isl_calc_phys_total_extent_el_gen6_stencil_hiz(
1084      const struct isl_device *dev,
1085      const struct isl_surf_init_info *restrict info,
1086      const struct isl_tile_info *tile_info,
1087      const struct isl_extent3d *image_align_sa,
1088      const struct isl_extent4d *phys_level0_sa,
1089      uint32_t *array_pitch_el_rows,
1090      struct isl_extent2d *phys_total_el)
1091{
1092   const struct isl_format_layout *fmtl = isl_format_get_layout(info->format);
1093
1094   const struct isl_extent2d tile_extent_sa = {
1095      .w = tile_info->logical_extent_el.w * fmtl->bw,
1096      .h = tile_info->logical_extent_el.h * fmtl->bh,
1097   };
1098   /* Tile size is a multiple of image alignment */
1099   assert(tile_extent_sa.w % image_align_sa->w == 0);
1100   assert(tile_extent_sa.h % image_align_sa->h == 0);
1101
1102   const uint32_t W0 = phys_level0_sa->w;
1103   const uint32_t H0 = phys_level0_sa->h;
1104
1105   /* Each image has the same height as LOD0 because the hardware thinks
1106    * everything is LOD0
1107    */
1108   const uint32_t H = isl_align(H0, image_align_sa->h) * phys_level0_sa->a;
1109
1110   uint32_t total_top_w = 0;
1111   uint32_t total_bottom_w = 0;
1112   uint32_t total_h = 0;
1113
1114   for (uint32_t l = 0; l < info->levels; ++l) {
1115      const uint32_t W = isl_minify(W0, l);
1116
1117      const uint32_t w = isl_align(W, tile_extent_sa.w);
1118      const uint32_t h = isl_align(H, tile_extent_sa.h);
1119
1120      if (l == 0) {
1121         total_top_w = w;
1122         total_h = h;
1123      } else if (l == 1) {
1124         total_bottom_w = w;
1125         total_h += h;
1126      } else {
1127         total_bottom_w += w;
1128      }
1129   }
1130
1131   *array_pitch_el_rows =
1132      isl_assert_div(isl_align(H0, image_align_sa->h), fmtl->bh);
1133   *phys_total_el = (struct isl_extent2d) {
1134      .w = isl_assert_div(MAX(total_top_w, total_bottom_w), fmtl->bw),
1135      .h = isl_assert_div(total_h, fmtl->bh),
1136   };
1137}
1138
1139/**
1140 * A variant of isl_calc_phys_slice0_extent_sa() specific to
1141 * ISL_DIM_LAYOUT_GEN9_1D.
1142 */
1143static void
1144isl_calc_phys_total_extent_el_gen9_1d(
1145      const struct isl_device *dev,
1146      const struct isl_surf_init_info *restrict info,
1147      const struct isl_extent3d *image_align_sa,
1148      const struct isl_extent4d *phys_level0_sa,
1149      uint32_t *array_pitch_el_rows,
1150      struct isl_extent2d *phys_total_el)
1151{
1152   MAYBE_UNUSED const struct isl_format_layout *fmtl = isl_format_get_layout(info->format);
1153
1154   assert(phys_level0_sa->height / fmtl->bh == 1);
1155   assert(phys_level0_sa->depth == 1);
1156   assert(info->samples == 1);
1157   assert(image_align_sa->w >= fmtl->bw);
1158
1159   uint32_t slice_w = 0;
1160   const uint32_t W0 = phys_level0_sa->w;
1161
1162   for (uint32_t l = 0; l < info->levels; ++l) {
1163      uint32_t W = isl_minify(W0, l);
1164      uint32_t w = isl_align_npot(W, image_align_sa->w);
1165
1166      slice_w += w;
1167   }
1168
1169   *array_pitch_el_rows = 1;
1170   *phys_total_el = (struct isl_extent2d) {
1171      .w = isl_assert_div(slice_w, fmtl->bw),
1172      .h = phys_level0_sa->array_len,
1173   };
1174}
1175
1176/**
1177 * Calculate the two-dimensional total physical extent of the surface, in
1178 * units of surface elements.
1179 */
1180static void
1181isl_calc_phys_total_extent_el(const struct isl_device *dev,
1182                              const struct isl_surf_init_info *restrict info,
1183                              const struct isl_tile_info *tile_info,
1184                              enum isl_dim_layout dim_layout,
1185                              enum isl_msaa_layout msaa_layout,
1186                              const struct isl_extent3d *image_align_sa,
1187                              const struct isl_extent4d *phys_level0_sa,
1188                              enum isl_array_pitch_span array_pitch_span,
1189                              uint32_t *array_pitch_el_rows,
1190                              struct isl_extent2d *total_extent_el)
1191{
1192   switch (dim_layout) {
1193   case ISL_DIM_LAYOUT_GEN9_1D:
1194      assert(array_pitch_span == ISL_ARRAY_PITCH_SPAN_COMPACT);
1195      isl_calc_phys_total_extent_el_gen9_1d(dev, info,
1196                                            image_align_sa, phys_level0_sa,
1197                                            array_pitch_el_rows,
1198                                            total_extent_el);
1199      return;
1200   case ISL_DIM_LAYOUT_GEN4_2D:
1201      isl_calc_phys_total_extent_el_gen4_2d(dev, info, tile_info, msaa_layout,
1202                                            image_align_sa, phys_level0_sa,
1203                                            array_pitch_span,
1204                                            array_pitch_el_rows,
1205                                            total_extent_el);
1206      return;
1207   case ISL_DIM_LAYOUT_GEN6_STENCIL_HIZ:
1208      assert(array_pitch_span == ISL_ARRAY_PITCH_SPAN_COMPACT);
1209      isl_calc_phys_total_extent_el_gen6_stencil_hiz(dev, info, tile_info,
1210                                                     image_align_sa,
1211                                                     phys_level0_sa,
1212                                                     array_pitch_el_rows,
1213                                                     total_extent_el);
1214      return;
1215   case ISL_DIM_LAYOUT_GEN4_3D:
1216      assert(array_pitch_span == ISL_ARRAY_PITCH_SPAN_COMPACT);
1217      isl_calc_phys_total_extent_el_gen4_3d(dev, info,
1218                                            image_align_sa, phys_level0_sa,
1219                                            array_pitch_el_rows,
1220                                            total_extent_el);
1221      return;
1222   }
1223}
1224
1225static uint32_t
1226isl_calc_row_pitch_alignment(const struct isl_surf_init_info *surf_info,
1227                             const struct isl_tile_info *tile_info)
1228{
1229   if (tile_info->tiling != ISL_TILING_LINEAR)
1230      return tile_info->phys_extent_B.width;
1231
1232   /* From the Broadwel PRM >> Volume 2d: Command Reference: Structures >>
1233    * RENDER_SURFACE_STATE Surface Pitch (p349):
1234    *
1235    *    - For linear render target surfaces and surfaces accessed with the
1236    *      typed data port messages, the pitch must be a multiple of the
1237    *      element size for non-YUV surface formats.  Pitch must be
1238    *      a multiple of 2 * element size for YUV surface formats.
1239    *
1240    *    - [Requirements for SURFTYPE_BUFFER and SURFTYPE_STRBUF, which we
1241    *      ignore because isl doesn't do buffers.]
1242    *
1243    *    - For other linear surfaces, the pitch can be any multiple of
1244    *      bytes.
1245    */
1246   const struct isl_format_layout *fmtl = isl_format_get_layout(surf_info->format);
1247   const uint32_t bs = fmtl->bpb / 8;
1248
1249   if (surf_info->usage & ISL_SURF_USAGE_RENDER_TARGET_BIT) {
1250      if (isl_format_is_yuv(surf_info->format)) {
1251         return 2 * bs;
1252      } else  {
1253         return bs;
1254      }
1255   }
1256
1257   return 1;
1258}
1259
1260static uint32_t
1261isl_calc_linear_min_row_pitch(const struct isl_device *dev,
1262                              const struct isl_surf_init_info *info,
1263                              const struct isl_extent2d *phys_total_el,
1264                              uint32_t alignment_B)
1265{
1266   const struct isl_format_layout *fmtl = isl_format_get_layout(info->format);
1267   const uint32_t bs = fmtl->bpb / 8;
1268
1269   return isl_align_npot(bs * phys_total_el->w, alignment_B);
1270}
1271
1272static uint32_t
1273isl_calc_tiled_min_row_pitch(const struct isl_device *dev,
1274                             const struct isl_surf_init_info *surf_info,
1275                             const struct isl_tile_info *tile_info,
1276                             const struct isl_extent2d *phys_total_el,
1277                             uint32_t alignment_B)
1278{
1279   const struct isl_format_layout *fmtl = isl_format_get_layout(surf_info->format);
1280
1281   assert(fmtl->bpb % tile_info->format_bpb == 0);
1282
1283   const uint32_t tile_el_scale = fmtl->bpb / tile_info->format_bpb;
1284   const uint32_t total_w_tl =
1285      isl_align_div(phys_total_el->w * tile_el_scale,
1286                    tile_info->logical_extent_el.width);
1287
1288   assert(alignment_B == tile_info->phys_extent_B.width);
1289   return total_w_tl * tile_info->phys_extent_B.width;
1290}
1291
1292static uint32_t
1293isl_calc_min_row_pitch(const struct isl_device *dev,
1294                       const struct isl_surf_init_info *surf_info,
1295                       const struct isl_tile_info *tile_info,
1296                       const struct isl_extent2d *phys_total_el,
1297                       uint32_t alignment_B)
1298{
1299   if (tile_info->tiling == ISL_TILING_LINEAR) {
1300      return isl_calc_linear_min_row_pitch(dev, surf_info, phys_total_el,
1301                                           alignment_B);
1302   } else {
1303      return isl_calc_tiled_min_row_pitch(dev, surf_info, tile_info,
1304                                          phys_total_el, alignment_B);
1305   }
1306}
1307
1308/**
1309 * Is `pitch` in the valid range for a hardware bitfield, if the bitfield's
1310 * size is `bits` bits?
1311 *
1312 * Hardware pitch fields are offset by 1. For example, if the size of
1313 * RENDER_SURFACE_STATE::SurfacePitch is B bits, then the range of valid
1314 * pitches is [1, 2^b] inclusive.  If the surface pitch is N, then
1315 * RENDER_SURFACE_STATE::SurfacePitch must be set to N-1.
1316 */
1317static bool
1318pitch_in_range(uint32_t n, uint32_t bits)
1319{
1320   assert(n != 0);
1321   return likely(bits != 0 && 1 <= n && n <= (1 << bits));
1322}
1323
1324static bool
1325isl_calc_row_pitch(const struct isl_device *dev,
1326                   const struct isl_surf_init_info *surf_info,
1327                   const struct isl_tile_info *tile_info,
1328                   enum isl_dim_layout dim_layout,
1329                   const struct isl_extent2d *phys_total_el,
1330                   uint32_t *out_row_pitch_B)
1331{
1332   uint32_t alignment_B =
1333      isl_calc_row_pitch_alignment(surf_info, tile_info);
1334
1335   /* If pitch isn't given and it can be chosen freely, align it by cache line
1336    * allowing one to use blit engine on the surface.
1337    */
1338   if (surf_info->row_pitch_B == 0 && tile_info->tiling == ISL_TILING_LINEAR) {
1339      /* From the Broadwell PRM docs for XY_SRC_COPY_BLT::SourceBaseAddress:
1340       *
1341       *    "Base address of the destination surface: X=0, Y=0. Lower 32bits
1342       *    of the 48bit addressing. When Src Tiling is enabled (Bit_15
1343       *    enabled), this address must be 4KB-aligned. When Tiling is not
1344       *    enabled, this address should be CL (64byte) aligned."
1345       */
1346      alignment_B = MAX2(alignment_B, 64);
1347   }
1348
1349   const uint32_t min_row_pitch_B =
1350      isl_calc_min_row_pitch(dev, surf_info, tile_info, phys_total_el,
1351                             alignment_B);
1352
1353   uint32_t row_pitch_B = min_row_pitch_B;
1354
1355   if (surf_info->row_pitch_B != 0) {
1356      row_pitch_B = surf_info->row_pitch_B;
1357
1358      if (row_pitch_B < min_row_pitch_B)
1359         return false;
1360
1361      if (row_pitch_B % alignment_B != 0)
1362         return false;
1363   }
1364
1365   const uint32_t row_pitch_tl = row_pitch_B / tile_info->phys_extent_B.width;
1366
1367   if (row_pitch_B == 0)
1368      return false;
1369
1370   if (dim_layout == ISL_DIM_LAYOUT_GEN9_1D) {
1371      /* SurfacePitch is ignored for this layout. */
1372      goto done;
1373   }
1374
1375   if ((surf_info->usage & (ISL_SURF_USAGE_RENDER_TARGET_BIT |
1376                            ISL_SURF_USAGE_TEXTURE_BIT |
1377                            ISL_SURF_USAGE_STORAGE_BIT)) &&
1378       !pitch_in_range(row_pitch_B, RENDER_SURFACE_STATE_SurfacePitch_bits(dev->info)))
1379      return false;
1380
1381   if ((surf_info->usage & (ISL_SURF_USAGE_CCS_BIT |
1382                            ISL_SURF_USAGE_MCS_BIT)) &&
1383       !pitch_in_range(row_pitch_tl, RENDER_SURFACE_STATE_AuxiliarySurfacePitch_bits(dev->info)))
1384      return false;
1385
1386   if ((surf_info->usage & ISL_SURF_USAGE_DEPTH_BIT) &&
1387       !pitch_in_range(row_pitch_B, _3DSTATE_DEPTH_BUFFER_SurfacePitch_bits(dev->info)))
1388      return false;
1389
1390   if ((surf_info->usage & ISL_SURF_USAGE_HIZ_BIT) &&
1391       !pitch_in_range(row_pitch_B, _3DSTATE_HIER_DEPTH_BUFFER_SurfacePitch_bits(dev->info)))
1392      return false;
1393
1394   const uint32_t stencil_pitch_bits = dev->use_separate_stencil ?
1395      _3DSTATE_STENCIL_BUFFER_SurfacePitch_bits(dev->info) :
1396      _3DSTATE_DEPTH_BUFFER_SurfacePitch_bits(dev->info);
1397
1398   if ((surf_info->usage & ISL_SURF_USAGE_STENCIL_BIT) &&
1399       !pitch_in_range(row_pitch_B, stencil_pitch_bits))
1400      return false;
1401
1402 done:
1403   *out_row_pitch_B = row_pitch_B;
1404   return true;
1405}
1406
1407bool
1408isl_surf_init_s(const struct isl_device *dev,
1409                struct isl_surf *surf,
1410                const struct isl_surf_init_info *restrict info)
1411{
1412   const struct isl_format_layout *fmtl = isl_format_get_layout(info->format);
1413
1414   const struct isl_extent4d logical_level0_px = {
1415      .w = info->width,
1416      .h = info->height,
1417      .d = info->depth,
1418      .a = info->array_len,
1419   };
1420
1421   enum isl_tiling tiling;
1422   if (!isl_surf_choose_tiling(dev, info, &tiling))
1423      return false;
1424
1425   struct isl_tile_info tile_info;
1426   isl_tiling_get_info(tiling, fmtl->bpb, &tile_info);
1427
1428   const enum isl_dim_layout dim_layout =
1429      isl_surf_choose_dim_layout(dev, info->dim, tiling, info->usage);
1430
1431   enum isl_msaa_layout msaa_layout;
1432   if (!isl_choose_msaa_layout(dev, info, tiling, &msaa_layout))
1433       return false;
1434
1435   struct isl_extent3d image_align_el;
1436   isl_choose_image_alignment_el(dev, info, tiling, dim_layout, msaa_layout,
1437                                 &image_align_el);
1438
1439   struct isl_extent3d image_align_sa =
1440      isl_extent3d_el_to_sa(info->format, image_align_el);
1441
1442   struct isl_extent4d phys_level0_sa;
1443   isl_calc_phys_level0_extent_sa(dev, info, dim_layout, tiling, msaa_layout,
1444                                  &phys_level0_sa);
1445   assert(phys_level0_sa.w % fmtl->bw == 0);
1446   assert(phys_level0_sa.h % fmtl->bh == 0);
1447
1448   enum isl_array_pitch_span array_pitch_span =
1449      isl_choose_array_pitch_span(dev, info, dim_layout, &phys_level0_sa);
1450
1451   uint32_t array_pitch_el_rows;
1452   struct isl_extent2d phys_total_el;
1453   isl_calc_phys_total_extent_el(dev, info, &tile_info,
1454                                 dim_layout, msaa_layout,
1455                                 &image_align_sa, &phys_level0_sa,
1456                                 array_pitch_span, &array_pitch_el_rows,
1457                                 &phys_total_el);
1458
1459   uint32_t row_pitch_B;
1460   if (!isl_calc_row_pitch(dev, info, &tile_info, dim_layout,
1461                           &phys_total_el, &row_pitch_B))
1462      return false;
1463
1464   uint32_t base_alignment_B;
1465   uint64_t size_B;
1466   if (tiling == ISL_TILING_LINEAR) {
1467      size_B = (uint64_t) row_pitch_B * phys_total_el.h;
1468
1469      /* From the Broadwell PRM Vol 2d, RENDER_SURFACE_STATE::SurfaceBaseAddress:
1470       *
1471       *    "The Base Address for linear render target surfaces and surfaces
1472       *    accessed with the typed surface read/write data port messages must
1473       *    be element-size aligned, for non-YUV surface formats, or a
1474       *    multiple of 2 element-sizes for YUV surface formats. Other linear
1475       *    surfaces have no alignment requirements (byte alignment is
1476       *    sufficient.)"
1477       */
1478      base_alignment_B = MAX(1, info->min_alignment_B);
1479      if (info->usage & ISL_SURF_USAGE_RENDER_TARGET_BIT) {
1480         if (isl_format_is_yuv(info->format)) {
1481            base_alignment_B = MAX(base_alignment_B, fmtl->bpb / 4);
1482         } else {
1483            base_alignment_B = MAX(base_alignment_B, fmtl->bpb / 8);
1484         }
1485      }
1486      base_alignment_B = isl_round_up_to_power_of_two(base_alignment_B);
1487   } else {
1488      const uint32_t total_h_tl =
1489         isl_align_div(phys_total_el.h, tile_info.logical_extent_el.height);
1490
1491      size_B = (uint64_t) total_h_tl * tile_info.phys_extent_B.height * row_pitch_B;
1492
1493      const uint32_t tile_size_B = tile_info.phys_extent_B.width *
1494                                   tile_info.phys_extent_B.height;
1495      assert(isl_is_pow2(info->min_alignment_B) && isl_is_pow2(tile_size_B));
1496      base_alignment_B = MAX(info->min_alignment_B, tile_size_B);
1497   }
1498
1499   if (ISL_DEV_GEN(dev) < 9) {
1500      /* From the Broadwell PRM Vol 5, Surface Layout:
1501       *
1502       *    "In addition to restrictions on maximum height, width, and depth,
1503       *     surfaces are also restricted to a maximum size in bytes. This
1504       *     maximum is 2 GB for all products and all surface types."
1505       *
1506       * This comment is applicable to all Pre-gen9 platforms.
1507       */
1508      if (size_B > (uint64_t) 1 << 31)
1509         return false;
1510   } else if (ISL_DEV_GEN(dev) < 11) {
1511      /* From the Skylake PRM Vol 5, Maximum Surface Size in Bytes:
1512       *    "In addition to restrictions on maximum height, width, and depth,
1513       *     surfaces are also restricted to a maximum size of 2^38 bytes.
1514       *     All pixels within the surface must be contained within 2^38 bytes
1515       *     of the base address."
1516       */
1517      if (size_B > (uint64_t) 1 << 38)
1518         return false;
1519   } else {
1520      /* gen11+ platforms raised this limit to 2^44 bytes. */
1521      if (size_B > (uint64_t) 1 << 44)
1522         return false;
1523   }
1524
1525   *surf = (struct isl_surf) {
1526      .dim = info->dim,
1527      .dim_layout = dim_layout,
1528      .msaa_layout = msaa_layout,
1529      .tiling = tiling,
1530      .format = info->format,
1531
1532      .levels = info->levels,
1533      .samples = info->samples,
1534
1535      .image_alignment_el = image_align_el,
1536      .logical_level0_px = logical_level0_px,
1537      .phys_level0_sa = phys_level0_sa,
1538
1539      .size_B = size_B,
1540      .alignment_B = base_alignment_B,
1541      .row_pitch_B = row_pitch_B,
1542      .array_pitch_el_rows = array_pitch_el_rows,
1543      .array_pitch_span = array_pitch_span,
1544
1545      .usage = info->usage,
1546   };
1547
1548   return true;
1549}
1550
1551void
1552isl_surf_get_tile_info(const struct isl_surf *surf,
1553                       struct isl_tile_info *tile_info)
1554{
1555   const struct isl_format_layout *fmtl = isl_format_get_layout(surf->format);
1556   isl_tiling_get_info(surf->tiling, fmtl->bpb, tile_info);
1557}
1558
1559bool
1560isl_surf_get_hiz_surf(const struct isl_device *dev,
1561                      const struct isl_surf *surf,
1562                      struct isl_surf *hiz_surf)
1563{
1564   assert(ISL_DEV_GEN(dev) >= 5 && ISL_DEV_USE_SEPARATE_STENCIL(dev));
1565
1566   /* Multisampled depth is always interleaved */
1567   assert(surf->msaa_layout == ISL_MSAA_LAYOUT_NONE ||
1568          surf->msaa_layout == ISL_MSAA_LAYOUT_INTERLEAVED);
1569
1570   /* From the Broadwell PRM Vol. 7, "Hierarchical Depth Buffer":
1571    *
1572    *    "The Surface Type, Height, Width, Depth, Minimum Array Element, Render
1573    *    Target View Extent, and Depth Coordinate Offset X/Y of the
1574    *    hierarchical depth buffer are inherited from the depth buffer. The
1575    *    height and width of the hierarchical depth buffer that must be
1576    *    allocated are computed by the following formulas, where HZ is the
1577    *    hierarchical depth buffer and Z is the depth buffer. The Z_Height,
1578    *    Z_Width, and Z_Depth values given in these formulas are those present
1579    *    in 3DSTATE_DEPTH_BUFFER incremented by one.
1580    *
1581    *    "The value of Z_Height and Z_Width must each be multiplied by 2 before
1582    *    being applied to the table below if Number of Multisamples is set to
1583    *    NUMSAMPLES_4. The value of Z_Height must be multiplied by 2 and
1584    *    Z_Width must be multiplied by 4 before being applied to the table
1585    *    below if Number of Multisamples is set to NUMSAMPLES_8."
1586    *
1587    * In the Sky Lake PRM, the second paragraph is replaced with this:
1588    *
1589    *    "The Z_Height and Z_Width values must equal those present in
1590    *    3DSTATE_DEPTH_BUFFER incremented by one."
1591    *
1592    * In other words, on Sandy Bridge through Broadwell, each 128-bit HiZ
1593    * block corresponds to a region of 8x4 samples in the primary depth
1594    * surface.  On Sky Lake, on the other hand, each HiZ block corresponds to
1595    * a region of 8x4 pixels in the primary depth surface regardless of the
1596    * number of samples.  The dimensions of a HiZ block in both pixels and
1597    * samples are given in the table below:
1598    *
1599    *                    | SNB - BDW |     SKL+
1600    *              ------+-----------+-------------
1601    *                1x  |  8 x 4 sa |   8 x 4 sa
1602    *               MSAA |  8 x 4 px |   8 x 4 px
1603    *              ------+-----------+-------------
1604    *                2x  |  8 x 4 sa |  16 x 4 sa
1605    *               MSAA |  4 x 4 px |   8 x 4 px
1606    *              ------+-----------+-------------
1607    *                4x  |  8 x 4 sa |  16 x 8 sa
1608    *               MSAA |  4 x 2 px |   8 x 4 px
1609    *              ------+-----------+-------------
1610    *                8x  |  8 x 4 sa |  32 x 8 sa
1611    *               MSAA |  2 x 2 px |   8 x 4 px
1612    *              ------+-----------+-------------
1613    *               16x  |    N/A    | 32 x 16 sa
1614    *               MSAA |    N/A    |  8 x  4 px
1615    *              ------+-----------+-------------
1616    *
1617    * There are a number of different ways that this discrepency could be
1618    * handled.  The way we have chosen is to simply make MSAA HiZ have the
1619    * same number of samples as the parent surface pre-Sky Lake and always be
1620    * single-sampled on Sky Lake and above.  Since the block sizes of
1621    * compressed formats are given in samples, this neatly handles everything
1622    * without the need for additional HiZ formats with different block sizes
1623    * on SKL+.
1624    */
1625   const unsigned samples = ISL_DEV_GEN(dev) >= 9 ? 1 : surf->samples;
1626
1627   return isl_surf_init(dev, hiz_surf,
1628                        .dim = surf->dim,
1629                        .format = ISL_FORMAT_HIZ,
1630                        .width = surf->logical_level0_px.width,
1631                        .height = surf->logical_level0_px.height,
1632                        .depth = surf->logical_level0_px.depth,
1633                        .levels = surf->levels,
1634                        .array_len = surf->logical_level0_px.array_len,
1635                        .samples = samples,
1636                        .usage = ISL_SURF_USAGE_HIZ_BIT,
1637                        .tiling_flags = ISL_TILING_HIZ_BIT);
1638}
1639
1640bool
1641isl_surf_get_mcs_surf(const struct isl_device *dev,
1642                      const struct isl_surf *surf,
1643                      struct isl_surf *mcs_surf)
1644{
1645   assert(ISL_DEV_GEN(dev) >= 7);
1646
1647   /* It must be multisampled with an array layout */
1648   assert(surf->samples > 1 && surf->msaa_layout == ISL_MSAA_LAYOUT_ARRAY);
1649
1650   /* The following are true of all multisampled surfaces */
1651   assert(surf->dim == ISL_SURF_DIM_2D);
1652   assert(surf->levels == 1);
1653   assert(surf->logical_level0_px.depth == 1);
1654
1655   /* The "Auxiliary Surface Pitch" field in RENDER_SURFACE_STATE is only 9
1656    * bits which means the maximum pitch of a compression surface is 512
1657    * tiles or 64KB (since MCS is always Y-tiled).  Since a 16x MCS buffer is
1658    * 64bpp, this gives us a maximum width of 8192 pixels.  We can create
1659    * larger multisampled surfaces, we just can't compress them.   For 2x, 4x,
1660    * and 8x, we have enough room for the full 16k supported by the hardware.
1661    */
1662   if (surf->samples == 16 && surf->logical_level0_px.width > 8192)
1663      return false;
1664
1665   enum isl_format mcs_format;
1666   switch (surf->samples) {
1667   case 2:  mcs_format = ISL_FORMAT_MCS_2X;  break;
1668   case 4:  mcs_format = ISL_FORMAT_MCS_4X;  break;
1669   case 8:  mcs_format = ISL_FORMAT_MCS_8X;  break;
1670   case 16: mcs_format = ISL_FORMAT_MCS_16X; break;
1671   default:
1672      unreachable("Invalid sample count");
1673   }
1674
1675   return isl_surf_init(dev, mcs_surf,
1676                        .dim = ISL_SURF_DIM_2D,
1677                        .format = mcs_format,
1678                        .width = surf->logical_level0_px.width,
1679                        .height = surf->logical_level0_px.height,
1680                        .depth = 1,
1681                        .levels = 1,
1682                        .array_len = surf->logical_level0_px.array_len,
1683                        .samples = 1, /* MCS surfaces are really single-sampled */
1684                        .usage = ISL_SURF_USAGE_MCS_BIT,
1685                        .tiling_flags = ISL_TILING_Y0_BIT);
1686}
1687
1688bool
1689isl_surf_get_ccs_surf(const struct isl_device *dev,
1690                      const struct isl_surf *surf,
1691                      struct isl_surf *ccs_surf,
1692                      uint32_t row_pitch_B)
1693{
1694   assert(surf->samples == 1 && surf->msaa_layout == ISL_MSAA_LAYOUT_NONE);
1695   assert(ISL_DEV_GEN(dev) >= 7);
1696
1697   if (surf->usage & ISL_SURF_USAGE_DISABLE_AUX_BIT)
1698      return false;
1699
1700   /* The PRM doesn't say this explicitly, but fast-clears don't appear to
1701    * work for 3D textures until gen9 where the layout of 3D textures changes
1702    * to match 2D array textures.
1703    */
1704   if (ISL_DEV_GEN(dev) <= 8 && surf->dim != ISL_SURF_DIM_2D)
1705      return false;
1706
1707   /* From the HSW PRM Volume 7: 3D-Media-GPGPU, page 652 (Color Clear of
1708    * Non-MultiSampler Render Target Restrictions):
1709    *
1710    *    "Support is for non-mip-mapped and non-array surface types only."
1711    *
1712    * This restriction is lifted on gen8+.  Technically, it may be possible to
1713    * create a CCS for an arrayed or mipmapped image and only enable CCS_D
1714    * when rendering to the base slice.  However, there is no documentation
1715    * tell us what the hardware would do in that case or what it does if you
1716    * walk off the bases slice.  (Does it ignore CCS or does it start
1717    * scribbling over random memory?)  We play it safe and just follow the
1718    * docs and don't allow CCS_D for arrayed or mip-mapped surfaces.
1719    */
1720   if (ISL_DEV_GEN(dev) <= 7 &&
1721       (surf->levels > 1 || surf->logical_level0_px.array_len > 1))
1722      return false;
1723
1724   if (isl_format_is_compressed(surf->format))
1725      return false;
1726
1727   /* TODO: More conditions where it can fail. */
1728
1729   enum isl_format ccs_format;
1730   if (ISL_DEV_GEN(dev) >= 9) {
1731      if (!isl_tiling_is_any_y(surf->tiling))
1732         return false;
1733
1734      switch (isl_format_get_layout(surf->format)->bpb) {
1735      case 32:    ccs_format = ISL_FORMAT_GEN9_CCS_32BPP;   break;
1736      case 64:    ccs_format = ISL_FORMAT_GEN9_CCS_64BPP;   break;
1737      case 128:   ccs_format = ISL_FORMAT_GEN9_CCS_128BPP;  break;
1738      default:
1739         return false;
1740      }
1741   } else if (surf->tiling == ISL_TILING_Y0) {
1742      switch (isl_format_get_layout(surf->format)->bpb) {
1743      case 32:    ccs_format = ISL_FORMAT_GEN7_CCS_32BPP_Y;    break;
1744      case 64:    ccs_format = ISL_FORMAT_GEN7_CCS_64BPP_Y;    break;
1745      case 128:   ccs_format = ISL_FORMAT_GEN7_CCS_128BPP_Y;   break;
1746      default:
1747         return false;
1748      }
1749   } else if (surf->tiling == ISL_TILING_X) {
1750      switch (isl_format_get_layout(surf->format)->bpb) {
1751      case 32:    ccs_format = ISL_FORMAT_GEN7_CCS_32BPP_X;    break;
1752      case 64:    ccs_format = ISL_FORMAT_GEN7_CCS_64BPP_X;    break;
1753      case 128:   ccs_format = ISL_FORMAT_GEN7_CCS_128BPP_X;   break;
1754      default:
1755         return false;
1756      }
1757   } else {
1758      return false;
1759   }
1760
1761   return isl_surf_init(dev, ccs_surf,
1762                        .dim = surf->dim,
1763                        .format = ccs_format,
1764                        .width = surf->logical_level0_px.width,
1765                        .height = surf->logical_level0_px.height,
1766                        .depth = surf->logical_level0_px.depth,
1767                        .levels = surf->levels,
1768                        .array_len = surf->logical_level0_px.array_len,
1769                        .samples = 1,
1770                        .row_pitch_B = row_pitch_B,
1771                        .usage = ISL_SURF_USAGE_CCS_BIT,
1772                        .tiling_flags = ISL_TILING_CCS_BIT);
1773}
1774
1775#define isl_genX_call(dev, func, ...)              \
1776   switch (ISL_DEV_GEN(dev)) {                     \
1777   case 4:                                         \
1778      /* G45 surface state is the same as gen5 */  \
1779      if (ISL_DEV_IS_G4X(dev)) {                   \
1780         isl_gen5_##func(__VA_ARGS__);             \
1781      } else {                                     \
1782         isl_gen4_##func(__VA_ARGS__);             \
1783      }                                            \
1784      break;                                       \
1785   case 5:                                         \
1786      isl_gen5_##func(__VA_ARGS__);                \
1787      break;                                       \
1788   case 6:                                         \
1789      isl_gen6_##func(__VA_ARGS__);                \
1790      break;                                       \
1791   case 7:                                         \
1792      if (ISL_DEV_IS_HASWELL(dev)) {               \
1793         isl_gen75_##func(__VA_ARGS__);            \
1794      } else {                                     \
1795         isl_gen7_##func(__VA_ARGS__);             \
1796      }                                            \
1797      break;                                       \
1798   case 8:                                         \
1799      isl_gen8_##func(__VA_ARGS__);                \
1800      break;                                       \
1801   case 9:                                         \
1802      isl_gen9_##func(__VA_ARGS__);                \
1803      break;                                       \
1804   case 10:                                        \
1805      isl_gen10_##func(__VA_ARGS__);               \
1806      break;                                       \
1807   case 11:                                        \
1808      isl_gen11_##func(__VA_ARGS__);               \
1809      break;                                       \
1810   default:                                        \
1811      assert(!"Unknown hardware generation");      \
1812   }
1813
1814void
1815isl_surf_fill_state_s(const struct isl_device *dev, void *state,
1816                      const struct isl_surf_fill_state_info *restrict info)
1817{
1818#ifndef NDEBUG
1819   isl_surf_usage_flags_t _base_usage =
1820      info->view->usage & (ISL_SURF_USAGE_RENDER_TARGET_BIT |
1821                           ISL_SURF_USAGE_TEXTURE_BIT |
1822                           ISL_SURF_USAGE_STORAGE_BIT);
1823   /* They may only specify one of the above bits at a time */
1824   assert(__builtin_popcount(_base_usage) == 1);
1825   /* The only other allowed bit is ISL_SURF_USAGE_CUBE_BIT */
1826   assert((info->view->usage & ~ISL_SURF_USAGE_CUBE_BIT) == _base_usage);
1827#endif
1828
1829   if (info->surf->dim == ISL_SURF_DIM_3D) {
1830      assert(info->view->base_array_layer + info->view->array_len <=
1831             info->surf->logical_level0_px.depth);
1832   } else {
1833      assert(info->view->base_array_layer + info->view->array_len <=
1834             info->surf->logical_level0_px.array_len);
1835   }
1836
1837   isl_genX_call(dev, surf_fill_state_s, dev, state, info);
1838}
1839
1840void
1841isl_buffer_fill_state_s(const struct isl_device *dev, void *state,
1842                        const struct isl_buffer_fill_state_info *restrict info)
1843{
1844   isl_genX_call(dev, buffer_fill_state_s, state, info);
1845}
1846
1847void
1848isl_null_fill_state(const struct isl_device *dev, void *state,
1849                    struct isl_extent3d size)
1850{
1851   isl_genX_call(dev, null_fill_state, state, size);
1852}
1853
1854void
1855isl_emit_depth_stencil_hiz_s(const struct isl_device *dev, void *batch,
1856                             const struct isl_depth_stencil_hiz_emit_info *restrict info)
1857{
1858   if (info->depth_surf && info->stencil_surf) {
1859      if (!dev->info->has_hiz_and_separate_stencil) {
1860         assert(info->depth_surf == info->stencil_surf);
1861         assert(info->depth_address == info->stencil_address);
1862      }
1863      assert(info->depth_surf->dim == info->stencil_surf->dim);
1864   }
1865
1866   if (info->depth_surf) {
1867      assert((info->depth_surf->usage & ISL_SURF_USAGE_DEPTH_BIT));
1868      if (info->depth_surf->dim == ISL_SURF_DIM_3D) {
1869         assert(info->view->base_array_layer + info->view->array_len <=
1870                info->depth_surf->logical_level0_px.depth);
1871      } else {
1872         assert(info->view->base_array_layer + info->view->array_len <=
1873                info->depth_surf->logical_level0_px.array_len);
1874      }
1875   }
1876
1877   if (info->stencil_surf) {
1878      assert((info->stencil_surf->usage & ISL_SURF_USAGE_STENCIL_BIT));
1879      if (info->stencil_surf->dim == ISL_SURF_DIM_3D) {
1880         assert(info->view->base_array_layer + info->view->array_len <=
1881                info->stencil_surf->logical_level0_px.depth);
1882      } else {
1883         assert(info->view->base_array_layer + info->view->array_len <=
1884                info->stencil_surf->logical_level0_px.array_len);
1885      }
1886   }
1887
1888   isl_genX_call(dev, emit_depth_stencil_hiz_s, dev, batch, info);
1889}
1890
1891/**
1892 * A variant of isl_surf_get_image_offset_sa() specific to
1893 * ISL_DIM_LAYOUT_GEN4_2D.
1894 */
1895static void
1896get_image_offset_sa_gen4_2d(const struct isl_surf *surf,
1897                            uint32_t level, uint32_t logical_array_layer,
1898                            uint32_t *x_offset_sa,
1899                            uint32_t *y_offset_sa)
1900{
1901   assert(level < surf->levels);
1902   if (surf->dim == ISL_SURF_DIM_3D)
1903      assert(logical_array_layer < surf->logical_level0_px.depth);
1904   else
1905      assert(logical_array_layer < surf->logical_level0_px.array_len);
1906
1907   const struct isl_extent3d image_align_sa =
1908      isl_surf_get_image_alignment_sa(surf);
1909
1910   const uint32_t W0 = surf->phys_level0_sa.width;
1911   const uint32_t H0 = surf->phys_level0_sa.height;
1912
1913   const uint32_t phys_layer = logical_array_layer *
1914      (surf->msaa_layout == ISL_MSAA_LAYOUT_ARRAY ? surf->samples : 1);
1915
1916   uint32_t x = 0;
1917   uint32_t y = phys_layer * isl_surf_get_array_pitch_sa_rows(surf);
1918
1919   for (uint32_t l = 0; l < level; ++l) {
1920      if (l == 1) {
1921         uint32_t W = isl_minify(W0, l);
1922         x += isl_align_npot(W, image_align_sa.w);
1923      } else {
1924         uint32_t H = isl_minify(H0, l);
1925         y += isl_align_npot(H, image_align_sa.h);
1926      }
1927   }
1928
1929   *x_offset_sa = x;
1930   *y_offset_sa = y;
1931}
1932
1933/**
1934 * A variant of isl_surf_get_image_offset_sa() specific to
1935 * ISL_DIM_LAYOUT_GEN4_3D.
1936 */
1937static void
1938get_image_offset_sa_gen4_3d(const struct isl_surf *surf,
1939                            uint32_t level, uint32_t logical_z_offset_px,
1940                            uint32_t *x_offset_sa,
1941                            uint32_t *y_offset_sa)
1942{
1943   assert(level < surf->levels);
1944   if (surf->dim == ISL_SURF_DIM_3D) {
1945      assert(surf->phys_level0_sa.array_len == 1);
1946      assert(logical_z_offset_px < isl_minify(surf->phys_level0_sa.depth, level));
1947   } else {
1948      assert(surf->dim == ISL_SURF_DIM_2D);
1949      assert(surf->usage & ISL_SURF_USAGE_CUBE_BIT);
1950      assert(surf->phys_level0_sa.array_len == 6);
1951      assert(logical_z_offset_px < surf->phys_level0_sa.array_len);
1952   }
1953
1954   const struct isl_extent3d image_align_sa =
1955      isl_surf_get_image_alignment_sa(surf);
1956
1957   const uint32_t W0 = surf->phys_level0_sa.width;
1958   const uint32_t H0 = surf->phys_level0_sa.height;
1959   const uint32_t D0 = surf->phys_level0_sa.depth;
1960   const uint32_t AL = surf->phys_level0_sa.array_len;
1961
1962   uint32_t x = 0;
1963   uint32_t y = 0;
1964
1965   for (uint32_t l = 0; l < level; ++l) {
1966      const uint32_t level_h = isl_align_npot(isl_minify(H0, l), image_align_sa.h);
1967      const uint32_t level_d =
1968         isl_align_npot(surf->dim == ISL_SURF_DIM_3D ? isl_minify(D0, l) : AL,
1969                        image_align_sa.d);
1970      const uint32_t max_layers_vert = isl_align(level_d, 1u << l) / (1u << l);
1971
1972      y += level_h * max_layers_vert;
1973   }
1974
1975   const uint32_t level_w = isl_align_npot(isl_minify(W0, level), image_align_sa.w);
1976   const uint32_t level_h = isl_align_npot(isl_minify(H0, level), image_align_sa.h);
1977   const uint32_t level_d =
1978      isl_align_npot(surf->dim == ISL_SURF_DIM_3D ? isl_minify(D0, level) : AL,
1979                     image_align_sa.d);
1980
1981   const uint32_t max_layers_horiz = MIN(level_d, 1u << level);
1982
1983   x += level_w * (logical_z_offset_px % max_layers_horiz);
1984   y += level_h * (logical_z_offset_px / max_layers_horiz);
1985
1986   *x_offset_sa = x;
1987   *y_offset_sa = y;
1988}
1989
1990static void
1991get_image_offset_sa_gen6_stencil_hiz(const struct isl_surf *surf,
1992                                     uint32_t level,
1993                                     uint32_t logical_array_layer,
1994                                     uint32_t *x_offset_sa,
1995                                     uint32_t *y_offset_sa)
1996{
1997   assert(level < surf->levels);
1998   assert(surf->logical_level0_px.depth == 1);
1999   assert(logical_array_layer < surf->logical_level0_px.array_len);
2000
2001   const struct isl_format_layout *fmtl = isl_format_get_layout(surf->format);
2002
2003   const struct isl_extent3d image_align_sa =
2004      isl_surf_get_image_alignment_sa(surf);
2005
2006   struct isl_tile_info tile_info;
2007   isl_tiling_get_info(surf->tiling, fmtl->bpb, &tile_info);
2008   const struct isl_extent2d tile_extent_sa = {
2009      .w = tile_info.logical_extent_el.w * fmtl->bw,
2010      .h = tile_info.logical_extent_el.h * fmtl->bh,
2011   };
2012   /* Tile size is a multiple of image alignment */
2013   assert(tile_extent_sa.w % image_align_sa.w == 0);
2014   assert(tile_extent_sa.h % image_align_sa.h == 0);
2015
2016   const uint32_t W0 = surf->phys_level0_sa.w;
2017   const uint32_t H0 = surf->phys_level0_sa.h;
2018
2019   /* Each image has the same height as LOD0 because the hardware thinks
2020    * everything is LOD0
2021    */
2022   const uint32_t H = isl_align(H0, image_align_sa.h);
2023
2024   /* Quick sanity check for consistency */
2025   if (surf->phys_level0_sa.array_len > 1)
2026      assert(surf->array_pitch_el_rows == isl_assert_div(H, fmtl->bh));
2027
2028   uint32_t x = 0, y = 0;
2029   for (uint32_t l = 0; l < level; ++l) {
2030      const uint32_t W = isl_minify(W0, l);
2031
2032      const uint32_t w = isl_align(W, tile_extent_sa.w);
2033      const uint32_t h = isl_align(H * surf->phys_level0_sa.a,
2034                                   tile_extent_sa.h);
2035
2036      if (l == 0) {
2037         y += h;
2038      } else {
2039         x += w;
2040      }
2041   }
2042
2043   y += H * logical_array_layer;
2044
2045   *x_offset_sa = x;
2046   *y_offset_sa = y;
2047}
2048
2049/**
2050 * A variant of isl_surf_get_image_offset_sa() specific to
2051 * ISL_DIM_LAYOUT_GEN9_1D.
2052 */
2053static void
2054get_image_offset_sa_gen9_1d(const struct isl_surf *surf,
2055                            uint32_t level, uint32_t layer,
2056                            uint32_t *x_offset_sa,
2057                            uint32_t *y_offset_sa)
2058{
2059   assert(level < surf->levels);
2060   assert(layer < surf->phys_level0_sa.array_len);
2061   assert(surf->phys_level0_sa.height == 1);
2062   assert(surf->phys_level0_sa.depth == 1);
2063   assert(surf->samples == 1);
2064
2065   const uint32_t W0 = surf->phys_level0_sa.width;
2066   const struct isl_extent3d image_align_sa =
2067      isl_surf_get_image_alignment_sa(surf);
2068
2069   uint32_t x = 0;
2070
2071   for (uint32_t l = 0; l < level; ++l) {
2072      uint32_t W = isl_minify(W0, l);
2073      uint32_t w = isl_align_npot(W, image_align_sa.w);
2074
2075      x += w;
2076   }
2077
2078   *x_offset_sa = x;
2079   *y_offset_sa = layer * isl_surf_get_array_pitch_sa_rows(surf);
2080}
2081
2082/**
2083 * Calculate the offset, in units of surface samples, to a subimage in the
2084 * surface.
2085 *
2086 * @invariant level < surface levels
2087 * @invariant logical_array_layer < logical array length of surface
2088 * @invariant logical_z_offset_px < logical depth of surface at level
2089 */
2090void
2091isl_surf_get_image_offset_sa(const struct isl_surf *surf,
2092                             uint32_t level,
2093                             uint32_t logical_array_layer,
2094                             uint32_t logical_z_offset_px,
2095                             uint32_t *x_offset_sa,
2096                             uint32_t *y_offset_sa)
2097{
2098   assert(level < surf->levels);
2099   assert(logical_array_layer < surf->logical_level0_px.array_len);
2100   assert(logical_z_offset_px
2101          < isl_minify(surf->logical_level0_px.depth, level));
2102
2103   switch (surf->dim_layout) {
2104   case ISL_DIM_LAYOUT_GEN9_1D:
2105      get_image_offset_sa_gen9_1d(surf, level, logical_array_layer,
2106                                  x_offset_sa, y_offset_sa);
2107      break;
2108   case ISL_DIM_LAYOUT_GEN4_2D:
2109      get_image_offset_sa_gen4_2d(surf, level, logical_array_layer
2110                                  + logical_z_offset_px,
2111                                  x_offset_sa, y_offset_sa);
2112      break;
2113   case ISL_DIM_LAYOUT_GEN4_3D:
2114      get_image_offset_sa_gen4_3d(surf, level, logical_array_layer +
2115                                  logical_z_offset_px,
2116                                  x_offset_sa, y_offset_sa);
2117      break;
2118   case ISL_DIM_LAYOUT_GEN6_STENCIL_HIZ:
2119      get_image_offset_sa_gen6_stencil_hiz(surf, level, logical_array_layer +
2120                                           logical_z_offset_px,
2121                                           x_offset_sa, y_offset_sa);
2122      break;
2123
2124   default:
2125      unreachable("not reached");
2126   }
2127}
2128
2129void
2130isl_surf_get_image_offset_el(const struct isl_surf *surf,
2131                             uint32_t level,
2132                             uint32_t logical_array_layer,
2133                             uint32_t logical_z_offset_px,
2134                             uint32_t *x_offset_el,
2135                             uint32_t *y_offset_el)
2136{
2137   const struct isl_format_layout *fmtl = isl_format_get_layout(surf->format);
2138
2139   assert(level < surf->levels);
2140   assert(logical_array_layer < surf->logical_level0_px.array_len);
2141   assert(logical_z_offset_px
2142          < isl_minify(surf->logical_level0_px.depth, level));
2143
2144   uint32_t x_offset_sa, y_offset_sa;
2145   isl_surf_get_image_offset_sa(surf, level,
2146                                logical_array_layer,
2147                                logical_z_offset_px,
2148                                &x_offset_sa,
2149                                &y_offset_sa);
2150
2151   *x_offset_el = x_offset_sa / fmtl->bw;
2152   *y_offset_el = y_offset_sa / fmtl->bh;
2153}
2154
2155void
2156isl_surf_get_image_offset_B_tile_sa(const struct isl_surf *surf,
2157                                    uint32_t level,
2158                                    uint32_t logical_array_layer,
2159                                    uint32_t logical_z_offset_px,
2160                                    uint32_t *offset_B,
2161                                    uint32_t *x_offset_sa,
2162                                    uint32_t *y_offset_sa)
2163{
2164   const struct isl_format_layout *fmtl = isl_format_get_layout(surf->format);
2165
2166   uint32_t total_x_offset_el, total_y_offset_el;
2167   isl_surf_get_image_offset_el(surf, level, logical_array_layer,
2168                                logical_z_offset_px,
2169                                &total_x_offset_el,
2170                                &total_y_offset_el);
2171
2172   uint32_t x_offset_el, y_offset_el;
2173   isl_tiling_get_intratile_offset_el(surf->tiling, fmtl->bpb,
2174                                      surf->row_pitch_B,
2175                                      total_x_offset_el,
2176                                      total_y_offset_el,
2177                                      offset_B,
2178                                      &x_offset_el,
2179                                      &y_offset_el);
2180
2181   if (x_offset_sa) {
2182      *x_offset_sa = x_offset_el * fmtl->bw;
2183   } else {
2184      assert(x_offset_el == 0);
2185   }
2186
2187   if (y_offset_sa) {
2188      *y_offset_sa = y_offset_el * fmtl->bh;
2189   } else {
2190      assert(y_offset_el == 0);
2191   }
2192}
2193
2194void
2195isl_surf_get_image_surf(const struct isl_device *dev,
2196                        const struct isl_surf *surf,
2197                        uint32_t level,
2198                        uint32_t logical_array_layer,
2199                        uint32_t logical_z_offset_px,
2200                        struct isl_surf *image_surf,
2201                        uint32_t *offset_B,
2202                        uint32_t *x_offset_sa,
2203                        uint32_t *y_offset_sa)
2204{
2205   isl_surf_get_image_offset_B_tile_sa(surf,
2206                                       level,
2207                                       logical_array_layer,
2208                                       logical_z_offset_px,
2209                                       offset_B,
2210                                       x_offset_sa,
2211                                       y_offset_sa);
2212
2213   /* Even for cube maps there will be only single face, therefore drop the
2214    * corresponding flag if present.
2215    */
2216   const isl_surf_usage_flags_t usage =
2217      surf->usage & (~ISL_SURF_USAGE_CUBE_BIT);
2218
2219   bool ok UNUSED;
2220   ok = isl_surf_init(dev, image_surf,
2221                      .dim = ISL_SURF_DIM_2D,
2222                      .format = surf->format,
2223                      .width = isl_minify(surf->logical_level0_px.w, level),
2224                      .height = isl_minify(surf->logical_level0_px.h, level),
2225                      .depth = 1,
2226                      .levels = 1,
2227                      .array_len = 1,
2228                      .samples = surf->samples,
2229                      .row_pitch_B = surf->row_pitch_B,
2230                      .usage = usage,
2231                      .tiling_flags = (1 << surf->tiling));
2232   assert(ok);
2233}
2234
2235void
2236isl_tiling_get_intratile_offset_el(enum isl_tiling tiling,
2237                                   uint32_t bpb,
2238                                   uint32_t row_pitch_B,
2239                                   uint32_t total_x_offset_el,
2240                                   uint32_t total_y_offset_el,
2241                                   uint32_t *base_address_offset,
2242                                   uint32_t *x_offset_el,
2243                                   uint32_t *y_offset_el)
2244{
2245   if (tiling == ISL_TILING_LINEAR) {
2246      assert(bpb % 8 == 0);
2247      *base_address_offset = total_y_offset_el * row_pitch_B +
2248                             total_x_offset_el * (bpb / 8);
2249      *x_offset_el = 0;
2250      *y_offset_el = 0;
2251      return;
2252   }
2253
2254   struct isl_tile_info tile_info;
2255   isl_tiling_get_info(tiling, bpb, &tile_info);
2256
2257   assert(row_pitch_B % tile_info.phys_extent_B.width == 0);
2258
2259   /* For non-power-of-two formats, we need the address to be both tile and
2260    * element-aligned.  The easiest way to achieve this is to work with a tile
2261    * that is three times as wide as the regular tile.
2262    *
2263    * The tile info returned by get_tile_info has a logical size that is an
2264    * integer number of tile_info.format_bpb size elements.  To scale the
2265    * tile, we scale up the physical width and then treat the logical tile
2266    * size as if it has bpb size elements.
2267    */
2268   const uint32_t tile_el_scale = bpb / tile_info.format_bpb;
2269   tile_info.phys_extent_B.width *= tile_el_scale;
2270
2271   /* Compute the offset into the tile */
2272   *x_offset_el = total_x_offset_el % tile_info.logical_extent_el.w;
2273   *y_offset_el = total_y_offset_el % tile_info.logical_extent_el.h;
2274
2275   /* Compute the offset of the tile in units of whole tiles */
2276   uint32_t x_offset_tl = total_x_offset_el / tile_info.logical_extent_el.w;
2277   uint32_t y_offset_tl = total_y_offset_el / tile_info.logical_extent_el.h;
2278
2279   *base_address_offset =
2280      y_offset_tl * tile_info.phys_extent_B.h * row_pitch_B +
2281      x_offset_tl * tile_info.phys_extent_B.h * tile_info.phys_extent_B.w;
2282}
2283
2284uint32_t
2285isl_surf_get_depth_format(const struct isl_device *dev,
2286                          const struct isl_surf *surf)
2287{
2288   /* Support for separate stencil buffers began in gen5. Support for
2289    * interleaved depthstencil buffers ceased in gen7. The intermediate gens,
2290    * those that supported separate and interleaved stencil, were gen5 and
2291    * gen6.
2292    *
2293    * For a list of all available formats, see the Sandybridge PRM >> Volume
2294    * 2 Part 1: 3D/Media - 3D Pipeline >> 3DSTATE_DEPTH_BUFFER >> Surface
2295    * Format (p321).
2296    */
2297
2298   bool has_stencil = surf->usage & ISL_SURF_USAGE_STENCIL_BIT;
2299
2300   assert(surf->usage & ISL_SURF_USAGE_DEPTH_BIT);
2301
2302   if (has_stencil)
2303      assert(ISL_DEV_GEN(dev) < 7);
2304
2305   switch (surf->format) {
2306   default:
2307      unreachable("bad isl depth format");
2308   case ISL_FORMAT_R32_FLOAT_X8X24_TYPELESS:
2309      assert(ISL_DEV_GEN(dev) < 7);
2310      return 0; /* D32_FLOAT_S8X24_UINT */
2311   case ISL_FORMAT_R32_FLOAT:
2312      assert(!has_stencil);
2313      return 1; /* D32_FLOAT */
2314   case ISL_FORMAT_R24_UNORM_X8_TYPELESS:
2315      if (has_stencil) {
2316         assert(ISL_DEV_GEN(dev) < 7);
2317         return 2; /* D24_UNORM_S8_UINT */
2318      } else {
2319         assert(ISL_DEV_GEN(dev) >= 5);
2320         return 3; /* D24_UNORM_X8_UINT */
2321      }
2322   case ISL_FORMAT_R16_UNORM:
2323      assert(!has_stencil);
2324      return 5; /* D16_UNORM */
2325   }
2326}
2327
2328bool
2329isl_swizzle_supports_rendering(const struct gen_device_info *devinfo,
2330                               struct isl_swizzle swizzle)
2331{
2332   if (devinfo->is_haswell) {
2333      /* From the Haswell PRM,
2334       * RENDER_SURFACE_STATE::Shader Channel Select Red
2335       *
2336       *    "The Shader channel selects also define which shader channels are
2337       *    written to which surface channel. If the Shader channel select is
2338       *    SCS_ZERO or SCS_ONE then it is not written to the surface. If the
2339       *    shader channel select is SCS_RED it is written to the surface red
2340       *    channel and so on. If more than one shader channel select is set
2341       *    to the same surface channel only the first shader channel in RGBA
2342       *    order will be written."
2343       */
2344      return true;
2345   } else if (devinfo->gen <= 7) {
2346      /* Ivy Bridge and early doesn't have any swizzling */
2347      return isl_swizzle_is_identity(swizzle);
2348   } else {
2349      /* From the Sky Lake PRM Vol. 2d,
2350       * RENDER_SURFACE_STATE::Shader Channel Select Red
2351       *
2352       *    "For Render Target, Red, Green and Blue Shader Channel Selects
2353       *    MUST be such that only valid components can be swapped i.e. only
2354       *    change the order of components in the pixel. Any other values for
2355       *    these Shader Channel Select fields are not valid for Render
2356       *    Targets. This also means that there MUST not be multiple shader
2357       *    channels mapped to the same RT channel."
2358       *
2359       * From the Sky Lake PRM Vol. 2d,
2360       * RENDER_SURFACE_STATE::Shader Channel Select Alpha
2361       *
2362       *    "For Render Target, this field MUST be programmed to
2363       *    value = SCS_ALPHA."
2364       */
2365      return (swizzle.r == ISL_CHANNEL_SELECT_RED ||
2366              swizzle.r == ISL_CHANNEL_SELECT_GREEN ||
2367              swizzle.r == ISL_CHANNEL_SELECT_BLUE) &&
2368             (swizzle.g == ISL_CHANNEL_SELECT_RED ||
2369              swizzle.g == ISL_CHANNEL_SELECT_GREEN ||
2370              swizzle.g == ISL_CHANNEL_SELECT_BLUE) &&
2371             (swizzle.b == ISL_CHANNEL_SELECT_RED ||
2372              swizzle.b == ISL_CHANNEL_SELECT_GREEN ||
2373              swizzle.b == ISL_CHANNEL_SELECT_BLUE) &&
2374             swizzle.r != swizzle.g &&
2375             swizzle.r != swizzle.b &&
2376             swizzle.g != swizzle.b &&
2377             swizzle.a == ISL_CHANNEL_SELECT_ALPHA;
2378   }
2379}
2380
2381static enum isl_channel_select
2382swizzle_select(enum isl_channel_select chan, struct isl_swizzle swizzle)
2383{
2384   switch (chan) {
2385   case ISL_CHANNEL_SELECT_ZERO:
2386   case ISL_CHANNEL_SELECT_ONE:
2387      return chan;
2388   case ISL_CHANNEL_SELECT_RED:
2389      return swizzle.r;
2390   case ISL_CHANNEL_SELECT_GREEN:
2391      return swizzle.g;
2392   case ISL_CHANNEL_SELECT_BLUE:
2393      return swizzle.b;
2394   case ISL_CHANNEL_SELECT_ALPHA:
2395      return swizzle.a;
2396   default:
2397      unreachable("Invalid swizzle component");
2398   }
2399}
2400
2401/**
2402 * Returns the single swizzle that is equivalent to applying the two given
2403 * swizzles in sequence.
2404 */
2405struct isl_swizzle
2406isl_swizzle_compose(struct isl_swizzle first, struct isl_swizzle second)
2407{
2408   return (struct isl_swizzle) {
2409      .r = swizzle_select(first.r, second),
2410      .g = swizzle_select(first.g, second),
2411      .b = swizzle_select(first.b, second),
2412      .a = swizzle_select(first.a, second),
2413   };
2414}
2415
2416/**
2417 * Returns a swizzle that is the pseudo-inverse of this swizzle.
2418 */
2419struct isl_swizzle
2420isl_swizzle_invert(struct isl_swizzle swizzle)
2421{
2422   /* Default to zero for channels which do not show up in the swizzle */
2423   enum isl_channel_select chans[4] = {
2424      ISL_CHANNEL_SELECT_ZERO,
2425      ISL_CHANNEL_SELECT_ZERO,
2426      ISL_CHANNEL_SELECT_ZERO,
2427      ISL_CHANNEL_SELECT_ZERO,
2428   };
2429
2430   /* We go in ABGR order so that, if there are any duplicates, the first one
2431    * is taken if you look at it in RGBA order.  This is what Haswell hardware
2432    * does for render target swizzles.
2433    */
2434   if ((unsigned)(swizzle.a - ISL_CHANNEL_SELECT_RED) < 4)
2435      chans[swizzle.a - ISL_CHANNEL_SELECT_RED] = ISL_CHANNEL_SELECT_ALPHA;
2436   if ((unsigned)(swizzle.b - ISL_CHANNEL_SELECT_RED) < 4)
2437      chans[swizzle.b - ISL_CHANNEL_SELECT_RED] = ISL_CHANNEL_SELECT_BLUE;
2438   if ((unsigned)(swizzle.g - ISL_CHANNEL_SELECT_RED) < 4)
2439      chans[swizzle.g - ISL_CHANNEL_SELECT_RED] = ISL_CHANNEL_SELECT_GREEN;
2440   if ((unsigned)(swizzle.r - ISL_CHANNEL_SELECT_RED) < 4)
2441      chans[swizzle.r - ISL_CHANNEL_SELECT_RED] = ISL_CHANNEL_SELECT_RED;
2442
2443   return (struct isl_swizzle) { chans[0], chans[1], chans[2], chans[3] };
2444}
2445