1/*
2 * Copyright (C) 2008 VMware, Inc.
3 * Copyright (C) 2014 Broadcom
4 * Copyright (C) 2018-2019 Alyssa Rosenzweig
5 * Copyright (C) 2019-2020 Collabora, Ltd.
6 *
7 * Permission is hereby granted, free of charge, to any person obtaining a
8 * copy of this software and associated documentation files (the "Software"),
9 * to deal in the Software without restriction, including without limitation
10 * the rights to use, copy, modify, merge, publish, distribute, sublicense,
11 * and/or sell copies of the Software, and to permit persons to whom the
12 * Software is furnished to do so, subject to the following conditions:
13 *
14 * The above copyright notice and this permission notice (including the next
15 * paragraph) shall be included in all copies or substantial portions of the
16 * Software.
17 *
18 * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
19 * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
20 * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT.  IN NO EVENT SHALL
21 * THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
22 * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
23 * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
24 * SOFTWARE.
25 *
26 */
27
28#include "util/macros.h"
29#include "util/u_math.h"
30#include "pan_texture.h"
31#include "panfrost-quirks.h"
32
33#ifndef PAN_ARCH
34
35/* Generates a texture descriptor. Ideally, descriptors are immutable after the
36 * texture is created, so we can keep these hanging around in GPU memory in a
37 * dedicated BO and not have to worry. In practice there are some minor gotchas
38 * with this (the driver sometimes will change the format of a texture on the
39 * fly for compression) but it's fast enough to just regenerate the descriptor
40 * in those cases, rather than monkeypatching at drawtime. A texture descriptor
41 * consists of a 32-byte header followed by pointers.
42 */
43
44/* List of supported modifiers, in descending order of preference. AFBC is
45 * faster than u-interleaved tiling which is faster than linear. Within AFBC,
46 * enabling the YUV-like transform is typically a win where possible. */
47
48uint64_t pan_best_modifiers[PAN_MODIFIER_COUNT] = {
49        DRM_FORMAT_MOD_ARM_AFBC(
50                AFBC_FORMAT_MOD_BLOCK_SIZE_16x16 |
51                AFBC_FORMAT_MOD_SPARSE |
52                AFBC_FORMAT_MOD_YTR),
53
54        DRM_FORMAT_MOD_ARM_AFBC(
55                AFBC_FORMAT_MOD_BLOCK_SIZE_16x16 |
56                AFBC_FORMAT_MOD_SPARSE),
57
58        DRM_FORMAT_MOD_ARM_16X16_BLOCK_U_INTERLEAVED,
59        DRM_FORMAT_MOD_LINEAR
60};
61
62/* If not explicitly, line stride is calculated for block-based formats as
63 * (ceil(width / block_width) * block_size). As a special case, this is left
64 * zero if there is only a single block vertically. So, we have a helper to
65 * extract the dimensions of a block-based format and use that to calculate the
66 * line stride as such.
67 */
68
69unsigned
70panfrost_block_dim(uint64_t modifier, bool width, unsigned plane)
71{
72        if (!drm_is_afbc(modifier)) {
73                assert(modifier == DRM_FORMAT_MOD_ARM_16X16_BLOCK_U_INTERLEAVED);
74                return 16;
75        }
76
77        switch (modifier & AFBC_FORMAT_MOD_BLOCK_SIZE_MASK) {
78        case AFBC_FORMAT_MOD_BLOCK_SIZE_16x16:
79                return 16;
80        case AFBC_FORMAT_MOD_BLOCK_SIZE_32x8:
81                return width ? 32 : 8;
82        case AFBC_FORMAT_MOD_BLOCK_SIZE_64x4:
83                return width ? 64 : 4;
84        case AFBC_FORMAT_MOD_BLOCK_SIZE_32x8_64x4:
85                return plane ? (width ? 64 : 4) : (width ? 32 : 8);
86        default:
87                unreachable("Invalid AFBC block size");
88        }
89}
90
91/* Computes sizes for checksumming, which is 8 bytes per 16x16 tile.
92 * Checksumming is believed to be a CRC variant (CRC64 based on the size?).
93 * This feature is also known as "transaction elimination". */
94
95#define CHECKSUM_TILE_WIDTH 16
96#define CHECKSUM_TILE_HEIGHT 16
97#define CHECKSUM_BYTES_PER_TILE 8
98
99unsigned
100panfrost_compute_checksum_size(
101        struct pan_image_slice_layout *slice,
102        unsigned width,
103        unsigned height)
104{
105        unsigned tile_count_x = DIV_ROUND_UP(width, CHECKSUM_TILE_WIDTH);
106        unsigned tile_count_y = DIV_ROUND_UP(height, CHECKSUM_TILE_HEIGHT);
107
108        slice->crc.stride = tile_count_x * CHECKSUM_BYTES_PER_TILE;
109
110        return slice->crc.stride * tile_count_y;
111}
112
113unsigned
114panfrost_get_layer_stride(const struct pan_image_layout *layout,
115                          unsigned level)
116{
117        if (layout->dim != MALI_TEXTURE_DIMENSION_3D)
118                return layout->array_stride;
119        else if (drm_is_afbc(layout->modifier))
120                return layout->slices[level].afbc.surface_stride;
121        else
122                return layout->slices[level].surface_stride;
123}
124
125/* Computes the offset into a texture at a particular level/face. Add to
126 * the base address of a texture to get the address to that level/face */
127
128unsigned
129panfrost_texture_offset(const struct pan_image_layout *layout,
130                        unsigned level, unsigned array_idx,
131                        unsigned surface_idx)
132{
133        return layout->slices[level].offset +
134               (array_idx * layout->array_stride) +
135               (surface_idx * layout->slices[level].surface_stride);
136}
137
138bool
139pan_image_layout_init(const struct panfrost_device *dev,
140                      struct pan_image_layout *layout,
141                      uint64_t modifier,
142                      enum pipe_format format,
143                      enum mali_texture_dimension dim,
144                      unsigned width, unsigned height, unsigned depth,
145                      unsigned array_size, unsigned nr_samples,
146                      unsigned nr_slices, enum pan_image_crc_mode crc_mode,
147                      const struct pan_image_explicit_layout *explicit_layout)
148{
149        /* Explicit stride only work with non-mipmap, non-array; single-sample
150         * 2D image, and in-band CRC can't be used.
151         */
152        if (explicit_layout &&
153	    (depth > 1 || nr_samples > 1 || array_size > 1 ||
154             dim != MALI_TEXTURE_DIMENSION_2D || nr_slices > 1 ||
155             crc_mode == PAN_IMAGE_CRC_INBAND))
156                return false;
157
158        /* Mandate 64 byte alignement */
159        if (explicit_layout && (explicit_layout->offset & 63))
160                return false;
161
162        layout->crc_mode = crc_mode;
163        layout->modifier = modifier;
164        layout->format = format;
165        layout->dim = dim;
166        layout->width = width;
167        layout->height = height;
168        layout->depth = depth;
169        layout->array_size = array_size;
170        layout->nr_samples = nr_samples;
171        layout->nr_slices = nr_slices;
172
173        unsigned bytes_per_pixel = util_format_get_blocksize(format);
174
175        /* MSAA is implemented as a 3D texture with z corresponding to the
176         * sample #, horrifyingly enough */
177
178        assert(depth == 1 || nr_samples == 1);
179
180        bool afbc = drm_is_afbc(layout->modifier);
181        bool tiled = layout->modifier == DRM_FORMAT_MOD_ARM_16X16_BLOCK_U_INTERLEAVED;
182        bool linear = layout->modifier == DRM_FORMAT_MOD_LINEAR;
183        bool should_align = tiled || afbc;
184        bool is_3d = layout->dim == MALI_TEXTURE_DIMENSION_3D;
185
186        unsigned oob_crc_offset = 0;
187        unsigned offset = explicit_layout ? explicit_layout->offset : 0;
188        unsigned tile_h = 1, tile_w = 1, tile_shift = 0;
189
190        if (tiled || afbc) {
191                tile_w = panfrost_block_dim(layout->modifier, true, 0);
192                tile_h = panfrost_block_dim(layout->modifier, false, 0);
193                if (util_format_is_compressed(format))
194                        tile_shift = 2;
195        }
196
197        for (unsigned l = 0; l < nr_slices; ++l) {
198                struct pan_image_slice_layout *slice = &layout->slices[l];
199
200                unsigned effective_width = width;
201                unsigned effective_height = height;
202                unsigned effective_depth = depth;
203
204                if (should_align) {
205                        effective_width = ALIGN_POT(effective_width, tile_w) >> tile_shift;
206                        effective_height = ALIGN_POT(effective_height, tile_h);
207
208                        /* We don't need to align depth */
209                }
210
211                /* Align levels to cache-line as a performance improvement for
212                 * linear/tiled and as a requirement for AFBC */
213
214                offset = ALIGN_POT(offset, 64);
215
216                slice->offset = offset;
217
218                /* Compute the would-be stride */
219                unsigned stride = bytes_per_pixel * effective_width;
220
221                if (explicit_layout) {
222                        /* Make sure the explicit stride is valid */
223                        if (explicit_layout->line_stride < stride)
224                                return false;
225
226                        stride = explicit_layout->line_stride;
227                } else if (linear) {
228                        /* Keep lines alignment on 64 byte for performance */
229                        stride = ALIGN_POT(stride, 64);
230                }
231
232                slice->line_stride = stride;
233                slice->row_stride = stride * (tile_h >> tile_shift);
234
235                unsigned slice_one_size = slice->line_stride * effective_height;
236
237                /* Compute AFBC sizes if necessary */
238                if (afbc) {
239                        slice->afbc.header_size =
240                                panfrost_afbc_header_size(width, height);
241
242                        /* Stride between two rows of AFBC headers */
243                        slice->afbc.row_stride =
244                                (effective_width / tile_w) *
245                                AFBC_HEADER_BYTES_PER_TILE;
246
247                        /* AFBC body size */
248                        slice->afbc.body_size = slice_one_size;
249
250                        /* 3D AFBC resources have all headers placed at the
251                         * beginning instead of having them split per depth
252                         * level
253                         */
254                        if (is_3d) {
255                                slice->afbc.surface_stride =
256                                        slice->afbc.header_size;
257                                slice->afbc.header_size *= effective_depth;
258                                slice->afbc.body_size *= effective_depth;
259                                offset += slice->afbc.header_size;
260                        } else {
261                                slice_one_size += slice->afbc.header_size;
262                                slice->afbc.surface_stride = slice_one_size;
263                        }
264                }
265
266                unsigned slice_full_size =
267                        slice_one_size * effective_depth * nr_samples;
268
269                slice->surface_stride = slice_one_size;
270
271                /* Compute AFBC sizes if necessary */
272
273                offset += slice_full_size;
274                slice->size = slice_full_size;
275
276                /* Add a checksum region if necessary */
277                if (crc_mode != PAN_IMAGE_CRC_NONE) {
278                        slice->crc.size =
279                                panfrost_compute_checksum_size(slice, width, height);
280
281                        if (crc_mode == PAN_IMAGE_CRC_INBAND) {
282                                slice->crc.offset = offset;
283                                offset += slice->crc.size;
284                                slice->size += slice->crc.size;
285                        } else {
286                                slice->crc.offset = oob_crc_offset;
287                                oob_crc_offset += slice->crc.size;
288                        }
289                }
290
291                width = u_minify(width, 1);
292                height = u_minify(height, 1);
293                depth = u_minify(depth, 1);
294        }
295
296        /* Arrays and cubemaps have the entire miptree duplicated */
297        layout->array_stride = ALIGN_POT(offset, 64);
298        if (explicit_layout)
299                layout->data_size = offset;
300        else
301                layout->data_size = ALIGN_POT(layout->array_stride * array_size, 4096);
302        layout->crc_size = oob_crc_offset;
303
304        return true;
305}
306
307void
308pan_iview_get_surface(const struct pan_image_view *iview,
309                      unsigned level, unsigned layer, unsigned sample,
310                      struct pan_surface *surf)
311{
312        level += iview->first_level;
313        assert(level < iview->image->layout.nr_slices);
314
315       layer += iview->first_layer;
316
317        bool is_3d = iview->image->layout.dim == MALI_TEXTURE_DIMENSION_3D;
318        const struct pan_image_slice_layout *slice = &iview->image->layout.slices[level];
319        mali_ptr base = iview->image->data.bo->ptr.gpu + iview->image->data.offset;
320
321        if (drm_is_afbc(iview->image->layout.modifier)) {
322                assert(!sample);
323
324                if (is_3d) {
325                        ASSERTED unsigned depth = u_minify(iview->image->layout.depth, level);
326                        assert(layer < depth);
327                        surf->afbc.header = base + slice->offset +
328                                           (layer * slice->afbc.surface_stride);
329                        surf->afbc.body = base + slice->offset +
330                                          slice->afbc.header_size +
331                                          (slice->surface_stride * layer);
332                } else {
333                        assert(layer < iview->image->layout.array_size);
334                        surf->afbc.header = base +
335                                            panfrost_texture_offset(&iview->image->layout,
336                                                                    level, layer, 0);
337                        surf->afbc.body = surf->afbc.header + slice->afbc.header_size;
338                }
339        } else {
340                unsigned array_idx = is_3d ? 0 : layer;
341                unsigned surface_idx = is_3d ? layer : sample;
342
343                surf->data = base +
344                             panfrost_texture_offset(&iview->image->layout, level,
345                                                     array_idx, surface_idx);
346        }
347}
348
349#else /* ifndef PAN_ARCH */
350
351#if PAN_ARCH >= 5
352/* Arm Scalable Texture Compression (ASTC) corresponds to just a few formats.
353 * The block dimension is not part of the format. Instead, it is encoded as a
354 * 6-bit tag on the payload pointer. Map the block size for a single dimension.
355 */
356
357static inline enum mali_astc_2d_dimension
358panfrost_astc_dim_2d(unsigned dim)
359{
360        switch (dim) {
361        case  4: return MALI_ASTC_2D_DIMENSION_4;
362        case  5: return MALI_ASTC_2D_DIMENSION_5;
363        case  6: return MALI_ASTC_2D_DIMENSION_6;
364        case  8: return MALI_ASTC_2D_DIMENSION_8;
365        case 10: return MALI_ASTC_2D_DIMENSION_10;
366        case 12: return MALI_ASTC_2D_DIMENSION_12;
367        default: unreachable("Invalid ASTC dimension");
368        }
369}
370
371static inline enum mali_astc_3d_dimension
372panfrost_astc_dim_3d(unsigned dim)
373{
374        switch (dim) {
375        case  3: return MALI_ASTC_3D_DIMENSION_3;
376        case  4: return MALI_ASTC_3D_DIMENSION_4;
377        case  5: return MALI_ASTC_3D_DIMENSION_5;
378        case  6: return MALI_ASTC_3D_DIMENSION_6;
379        default: unreachable("Invalid ASTC dimension");
380        }
381}
382
383/* Texture addresses are tagged with information about compressed formats.
384 * AFBC uses a bit for whether the colorspace transform is enabled (RGB and
385 * RGBA only).
386 * For ASTC, this is a "stretch factor" encoding the block size. */
387
388static unsigned
389panfrost_compression_tag(const struct util_format_description *desc,
390                         enum mali_texture_dimension dim,
391                         uint64_t modifier)
392{
393        if (drm_is_afbc(modifier)) {
394                unsigned flags = (modifier & AFBC_FORMAT_MOD_YTR) ?
395                                 MALI_AFBC_SURFACE_FLAG_YTR : 0;
396
397#if PAN_ARCH >= 6
398                /* Prefetch enable */
399                flags |= MALI_AFBC_SURFACE_FLAG_PREFETCH;
400
401                /* Wide blocks (> 16x16) */
402                if (panfrost_block_dim(modifier, true, 0) > 16)
403                        flags |= MALI_AFBC_SURFACE_FLAG_WIDE_BLOCK;
404
405                /* Used to make sure AFBC headers don't point outside the AFBC
406                 * body. HW is using the AFBC surface stride to do this check,
407                 * which doesn't work for 3D textures because the surface
408                 * stride does not cover the body. Only supported on v7+.
409                 */
410#endif
411
412#if PAN_ARCH >= 7
413                if (dim != MALI_TEXTURE_DIMENSION_3D)
414                        flags |= MALI_AFBC_SURFACE_FLAG_CHECK_PAYLOAD_RANGE;
415#endif
416
417                return flags;
418        } else if (desc->layout == UTIL_FORMAT_LAYOUT_ASTC) {
419                if (desc->block.depth > 1) {
420                        return (panfrost_astc_dim_3d(desc->block.depth) << 4) |
421                               (panfrost_astc_dim_3d(desc->block.height) << 2) |
422                                panfrost_astc_dim_3d(desc->block.width);
423                } else {
424                        return (panfrost_astc_dim_2d(desc->block.height) << 3) |
425                                panfrost_astc_dim_2d(desc->block.width);
426                }
427        } else {
428                return 0;
429        }
430}
431#endif
432
433/* Cubemaps have 6 faces as "layers" in between each actual layer. We
434 * need to fix this up. TODO: logic wrong in the asserted out cases ...
435 * can they happen, perhaps from cubemap arrays? */
436
437static void
438panfrost_adjust_cube_dimensions(
439                unsigned *first_face, unsigned *last_face,
440                unsigned *first_layer, unsigned *last_layer)
441{
442        *first_face = *first_layer % 6;
443        *last_face = *last_layer % 6;
444        *first_layer /= 6;
445        *last_layer /= 6;
446
447        assert((*first_layer == *last_layer) || (*first_face == 0 && *last_face == 5));
448}
449
450/* Following the texture descriptor is a number of pointers. How many? */
451
452static unsigned
453panfrost_texture_num_elements(
454                unsigned first_level, unsigned last_level,
455                unsigned first_layer, unsigned last_layer,
456                unsigned nr_samples,
457                bool is_cube, bool manual_stride)
458{
459        unsigned first_face  = 0, last_face = 0;
460
461        if (is_cube) {
462                panfrost_adjust_cube_dimensions(&first_face, &last_face,
463                                &first_layer, &last_layer);
464        }
465
466        unsigned levels = 1 + last_level - first_level;
467        unsigned layers = 1 + last_layer - first_layer;
468        unsigned faces  = 1 + last_face  - first_face;
469        unsigned num_elements = levels * layers * faces * MAX2(nr_samples, 1);
470
471        if (manual_stride)
472                num_elements *= 2;
473
474        return num_elements;
475}
476
477/* Conservative estimate of the size of the texture payload a priori.
478 * Average case, size equal to the actual size. Worst case, off by 2x (if
479 * a manual stride is not needed on a linear texture). Returned value
480 * must be greater than or equal to the actual size, so it's safe to use
481 * as an allocation amount */
482
483unsigned
484GENX(panfrost_estimate_texture_payload_size)(const struct pan_image_view *iview)
485{
486        /* Assume worst case */
487        unsigned manual_stride = PAN_ARCH >= 6 ||
488                                 (iview->image->layout.modifier == DRM_FORMAT_MOD_LINEAR);
489
490        unsigned elements =
491                panfrost_texture_num_elements(iview->first_level, iview->last_level,
492                                              iview->first_layer, iview->last_layer,
493                                              iview->image->layout.nr_samples,
494                                              iview->dim == MALI_TEXTURE_DIMENSION_CUBE,
495                                              manual_stride);
496
497        return sizeof(mali_ptr) * elements;
498}
499
500struct panfrost_surface_iter {
501        unsigned layer, last_layer;
502        unsigned level, first_level, last_level;
503        unsigned face, first_face, last_face;
504        unsigned sample, first_sample, last_sample;
505};
506
507static void
508panfrost_surface_iter_begin(struct panfrost_surface_iter *iter,
509                            unsigned first_layer, unsigned last_layer,
510                            unsigned first_level, unsigned last_level,
511                            unsigned first_face, unsigned last_face,
512                            unsigned nr_samples)
513{
514        iter->layer = first_layer;
515        iter->last_layer = last_layer;
516        iter->level = iter->first_level = first_level;
517        iter->last_level = last_level;
518        iter->face = iter->first_face = first_face;
519        iter->last_face = last_face;
520        iter->sample = iter->first_sample = 0;
521        iter->last_sample = nr_samples - 1;
522}
523
524static bool
525panfrost_surface_iter_end(const struct panfrost_surface_iter *iter)
526{
527        return iter->layer > iter->last_layer;
528}
529
530static void
531panfrost_surface_iter_next(struct panfrost_surface_iter *iter)
532{
533#define INC_TEST(field) \
534        do { \
535                if (iter->field++ < iter->last_ ## field) \
536                       return; \
537                iter->field = iter->first_ ## field; \
538        } while (0)
539
540        /* Ordering is different on v7: inner loop is iterating on levels */
541        if (PAN_ARCH >= 7)
542                INC_TEST(level);
543
544        INC_TEST(sample);
545        INC_TEST(face);
546
547        if (PAN_ARCH < 7)
548                INC_TEST(level);
549
550        iter->layer++;
551
552#undef INC_TEST
553}
554
555static void
556panfrost_get_surface_strides(const struct pan_image_layout *layout,
557                             unsigned l,
558                             int32_t *row_stride, int32_t *surf_stride)
559{
560        const struct pan_image_slice_layout *slice = &layout->slices[l];
561
562        if (drm_is_afbc(layout->modifier)) {
563                /* Pre v7 don't have a row stride field. This field is
564                 * repurposed as a Y offset which we don't use */
565                *row_stride = PAN_ARCH < 7 ? 0 : slice->afbc.row_stride;
566                *surf_stride = slice->afbc.surface_stride;
567        } else {
568                *row_stride = slice->row_stride;
569                *surf_stride = slice->surface_stride;
570        }
571}
572
573static mali_ptr
574panfrost_get_surface_pointer(const struct pan_image_layout *layout,
575                             enum mali_texture_dimension dim,
576                             mali_ptr base,
577                             unsigned l, unsigned w, unsigned f, unsigned s)
578{
579        unsigned face_mult = dim == MALI_TEXTURE_DIMENSION_CUBE ? 6 : 1;
580        unsigned offset;
581
582        if (layout->dim == MALI_TEXTURE_DIMENSION_3D) {
583                assert(!f && !s);
584                offset = layout->slices[l].offset +
585                         (w * panfrost_get_layer_stride(layout, l));
586        } else {
587                offset = panfrost_texture_offset(layout, l, (w * face_mult) + f, s);
588        }
589
590        return base + offset;
591}
592
593static void
594panfrost_emit_texture_payload(const struct pan_image_view *iview,
595                              enum pipe_format format,
596                              bool manual_stride,
597                              void *payload)
598{
599        const struct pan_image_layout *layout = &iview->image->layout;
600        ASSERTED const struct util_format_description *desc =
601                util_format_description(format);
602
603        mali_ptr base = iview->image->data.bo->ptr.gpu + iview->image->data.offset;
604
605        if (iview->buf.size) {
606                assert (iview->dim == MALI_TEXTURE_DIMENSION_1D);
607                base += iview->buf.offset;
608        }
609
610#if PAN_ARCH >= 5
611        /* panfrost_compression_tag() wants the dimension of the resource, not the
612         * one of the image view (those might differ).
613         */
614        base |= panfrost_compression_tag(desc, layout->dim, layout->modifier);
615#else
616        assert(!drm_is_afbc(layout->modifier) && "no AFBC on v4");
617        assert(desc->layout != UTIL_FORMAT_LAYOUT_ASTC && "no ASTC on v4");
618#endif
619
620        /* Inject the addresses in, interleaving array indices, mip levels,
621         * cube faces, and strides in that order */
622
623        unsigned first_layer = iview->first_layer, last_layer = iview->last_layer;
624        unsigned nr_samples = layout->nr_samples;
625        unsigned first_face = 0, last_face = 0;
626
627        if (iview->dim == MALI_TEXTURE_DIMENSION_CUBE) {
628                panfrost_adjust_cube_dimensions(&first_face, &last_face,
629                                                &first_layer, &last_layer);
630        }
631
632        struct panfrost_surface_iter iter;
633
634        for (panfrost_surface_iter_begin(&iter, first_layer, last_layer,
635                                         iview->first_level, iview->last_level,
636                                         first_face, last_face, nr_samples);
637             !panfrost_surface_iter_end(&iter);
638             panfrost_surface_iter_next(&iter)) {
639                mali_ptr pointer =
640                        panfrost_get_surface_pointer(layout, iview->dim, base,
641                                                     iter.level, iter.layer,
642                                                     iter.face, iter.sample);
643
644                if (!manual_stride) {
645                        pan_pack(payload, SURFACE, cfg) {
646                                cfg.pointer = pointer;
647                        }
648                        payload += pan_size(SURFACE);
649                } else {
650                        pan_pack(payload, SURFACE_WITH_STRIDE, cfg) {
651                                cfg.pointer = pointer;
652                                panfrost_get_surface_strides(layout, iter.level,
653                                                             &cfg.row_stride,
654                                                             &cfg.surface_stride);
655                        }
656                        payload += pan_size(SURFACE_WITH_STRIDE);
657                }
658        }
659}
660
661/* Check if we need to set a custom stride by computing the "expected"
662 * stride and comparing it to what the user actually wants. Only applies
663 * to linear textures, since tiled/compressed textures have strict
664 * alignment requirements for their strides as it is */
665
666static bool
667panfrost_needs_explicit_stride(const struct pan_image_view *iview)
668{
669        /* Stride is explicit on Bifrost */
670        if (PAN_ARCH >= 6)
671                return true;
672
673        if (iview->image->layout.modifier != DRM_FORMAT_MOD_LINEAR)
674                return false;
675
676        unsigned bytes_per_block = util_format_get_blocksize(iview->format);
677        unsigned block_w = util_format_get_blockwidth(iview->format);
678
679        for (unsigned l = iview->first_level; l <= iview->last_level; ++l) {
680                unsigned actual = iview->image->layout.slices[l].line_stride;
681                unsigned expected =
682                        DIV_ROUND_UP(u_minify(iview->image->layout.width, l), block_w) *
683                        bytes_per_block;
684
685                if (actual != expected)
686                        return true;
687        }
688
689        return false;
690}
691
692/* Map modifiers to mali_texture_layout for packing in a texture descriptor */
693
694static enum mali_texture_layout
695panfrost_modifier_to_layout(uint64_t modifier)
696{
697        if (drm_is_afbc(modifier))
698                return MALI_TEXTURE_LAYOUT_AFBC;
699        else if (modifier == DRM_FORMAT_MOD_ARM_16X16_BLOCK_U_INTERLEAVED)
700                return MALI_TEXTURE_LAYOUT_TILED;
701        else if (modifier == DRM_FORMAT_MOD_LINEAR)
702                return MALI_TEXTURE_LAYOUT_LINEAR;
703        else
704                unreachable("Invalid modifer");
705}
706
707void
708GENX(panfrost_new_texture)(const struct panfrost_device *dev,
709                           const struct pan_image_view *iview,
710                           void *out, const struct panfrost_ptr *payload)
711{
712        const struct pan_image_layout *layout = &iview->image->layout;
713        enum pipe_format format = iview->format;
714        unsigned swizzle;
715
716        if (PAN_ARCH == 7 && util_format_is_depth_or_stencil(format)) {
717                /* v7 doesn't have an _RRRR component order, combine the
718                 * user swizzle with a .XXXX swizzle to emulate that.
719                 */
720                static const unsigned char replicate_x[4] = {
721                        PIPE_SWIZZLE_X, PIPE_SWIZZLE_X,
722                        PIPE_SWIZZLE_X, PIPE_SWIZZLE_X,
723                };
724                unsigned char patched_swizzle[4];
725
726                util_format_compose_swizzles(replicate_x,
727                                             iview->swizzle,
728                                             patched_swizzle);
729                swizzle = panfrost_translate_swizzle_4(patched_swizzle);
730        } else {
731                swizzle = panfrost_translate_swizzle_4(iview->swizzle);
732        }
733
734        bool manual_stride =
735                panfrost_needs_explicit_stride(iview);
736
737        panfrost_emit_texture_payload(iview, format,
738                                      manual_stride,
739                                      payload->cpu);
740
741        unsigned array_size = iview->last_layer - iview->first_layer + 1;
742
743        if (iview->dim == MALI_TEXTURE_DIMENSION_CUBE) {
744                assert(iview->first_layer % 6 == 0);
745                assert(iview->last_layer % 6 == 5);
746                array_size /=  6;
747        }
748
749        unsigned width;
750
751        if (iview->buf.size) {
752                assert(iview->dim == MALI_TEXTURE_DIMENSION_1D);
753                assert(!iview->first_level && !iview->last_level);
754                assert(!iview->first_layer && !iview->last_layer);
755                assert(layout->nr_samples == 1);
756                assert(layout->height == 1 && layout->depth == 1);
757                assert(iview->buf.offset + iview->buf.size <= layout->width);
758                width = iview->buf.size;
759        } else {
760                width = u_minify(layout->width, iview->first_level);
761        }
762
763        pan_pack(out, TEXTURE, cfg) {
764                cfg.dimension = iview->dim;
765                cfg.format = dev->formats[format].hw;
766                cfg.width = width;
767                cfg.height = u_minify(layout->height, iview->first_level);
768                if (iview->dim == MALI_TEXTURE_DIMENSION_3D)
769                        cfg.depth = u_minify(layout->depth, iview->first_level);
770                else
771                        cfg.sample_count = layout->nr_samples;
772                cfg.swizzle = swizzle;
773                cfg.texel_ordering =
774                        panfrost_modifier_to_layout(layout->modifier);
775                cfg.levels = iview->last_level - iview->first_level + 1;
776                cfg.array_size = array_size;
777
778#if PAN_ARCH >= 6
779                cfg.surfaces = payload->gpu;
780
781                /* We specify API-level LOD clamps in the sampler descriptor
782                 * and use these clamps simply for bounds checking */
783                cfg.minimum_lod = FIXED_16(0, false);
784                cfg.maximum_lod = FIXED_16(cfg.levels - 1, false);
785#else
786                cfg.manual_stride = manual_stride;
787#endif
788        }
789}
790#endif /* ifdef PAN_ARCH */
791