pan_cs.c revision 7ec681f3
1/*
2 * Copyright (C) 2021 Collabora, Ltd.
3 *
4 * Permission is hereby granted, free of charge, to any person obtaining a
5 * copy of this software and associated documentation files (the "Software"),
6 * to deal in the Software without restriction, including without limitation
7 * the rights to use, copy, modify, merge, publish, distribute, sublicense,
8 * and/or sell copies of the Software, and to permit persons to whom the
9 * Software is furnished to do so, subject to the following conditions:
10 *
11 * The above copyright notice and this permission notice (including the next
12 * paragraph) shall be included in all copies or substantial portions of the
13 * Software.
14 *
15 * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
16 * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
17 * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT.  IN NO EVENT SHALL
18 * THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
19 * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
20 * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
21 * SOFTWARE.
22 *
23 * Authors:
24 *   Alyssa Rosenzweig <alyssa.rosenzweig@collabora.com>
25 *   Boris Brezillon <boris.brezillon@collabora.com>
26 */
27
28#include "util/macros.h"
29
30#include "panfrost-quirks.h"
31
32#include "pan_cs.h"
33#include "pan_encoder.h"
34#include "pan_texture.h"
35
36static unsigned
37mod_to_block_fmt(uint64_t mod)
38{
39        switch (mod) {
40        case DRM_FORMAT_MOD_LINEAR:
41                return MALI_BLOCK_FORMAT_LINEAR;
42	case DRM_FORMAT_MOD_ARM_16X16_BLOCK_U_INTERLEAVED:
43                return MALI_BLOCK_FORMAT_TILED_U_INTERLEAVED;
44        default:
45#if PAN_ARCH >= 5
46                if (drm_is_afbc(mod))
47                        return MALI_BLOCK_FORMAT_AFBC;
48#endif
49
50                unreachable("Unsupported modifer");
51        }
52}
53
54static enum mali_msaa
55mali_sampling_mode(const struct pan_image_view *view)
56{
57        if (view->image->layout.nr_samples > 1) {
58                assert(view->nr_samples == view->image->layout.nr_samples);
59                assert(view->image->layout.slices[0].surface_stride != 0);
60                return MALI_MSAA_LAYERED;
61        }
62
63        if (view->nr_samples > view->image->layout.nr_samples) {
64                assert(view->image->layout.nr_samples == 1);
65                return MALI_MSAA_AVERAGE;
66        }
67
68        assert(view->nr_samples == view->image->layout.nr_samples);
69        assert(view->nr_samples == 1);
70
71        return MALI_MSAA_SINGLE;
72}
73
74static inline enum mali_sample_pattern
75pan_sample_pattern(unsigned samples)
76{
77        switch (samples) {
78        case 1:  return MALI_SAMPLE_PATTERN_SINGLE_SAMPLED;
79        case 4:  return MALI_SAMPLE_PATTERN_ROTATED_4X_GRID;
80        case 8:  return MALI_SAMPLE_PATTERN_D3D_8X_GRID;
81        case 16: return MALI_SAMPLE_PATTERN_D3D_16X_GRID;
82        default: unreachable("Unsupported sample count");
83        }
84}
85
86int
87GENX(pan_select_crc_rt)(const struct pan_fb_info *fb)
88{
89#if PAN_ARCH <= 6
90        if (fb->rt_count == 1 && fb->rts[0].view && !fb->rts[0].discard &&
91            fb->rts[0].view->image->layout.crc_mode != PAN_IMAGE_CRC_NONE)
92                return 0;
93
94        return -1;
95#else
96        bool best_rt_valid = false;
97        int best_rt = -1;
98
99        for (unsigned i = 0; i < fb->rt_count; i++) {
100		if (!fb->rts[i].view || fb->rts[0].discard ||
101                    fb->rts[i].view->image->layout.crc_mode == PAN_IMAGE_CRC_NONE)
102                        continue;
103
104                bool valid = *(fb->rts[i].crc_valid);
105                bool full = !fb->extent.minx && !fb->extent.miny &&
106                            fb->extent.maxx == (fb->width - 1) &&
107                            fb->extent.maxy == (fb->height - 1);
108                if (!full && !valid)
109                        continue;
110
111                if (best_rt < 0 || (valid && !best_rt_valid)) {
112                        best_rt = i;
113                        best_rt_valid = valid;
114                }
115
116                if (valid)
117                        break;
118        }
119
120        return best_rt;
121#endif
122}
123
124static enum mali_zs_format
125translate_zs_format(enum pipe_format in)
126{
127        switch (in) {
128        case PIPE_FORMAT_Z16_UNORM: return MALI_ZS_FORMAT_D16;
129        case PIPE_FORMAT_Z24_UNORM_S8_UINT: return MALI_ZS_FORMAT_D24S8;
130        case PIPE_FORMAT_Z24X8_UNORM: return MALI_ZS_FORMAT_D24X8;
131        case PIPE_FORMAT_Z32_FLOAT: return MALI_ZS_FORMAT_D32;
132        case PIPE_FORMAT_Z32_FLOAT_S8X24_UINT: return MALI_ZS_FORMAT_D32_S8X24;
133        default: unreachable("Unsupported depth/stencil format.");
134        }
135}
136
137#if PAN_ARCH >= 5
138static enum mali_s_format
139translate_s_format(enum pipe_format in)
140{
141        switch (in) {
142        case PIPE_FORMAT_S8_UINT: return MALI_S_FORMAT_S8;
143        case PIPE_FORMAT_S8_UINT_Z24_UNORM:
144        case PIPE_FORMAT_S8X24_UINT:
145                return MALI_S_FORMAT_S8X24;
146        case PIPE_FORMAT_Z24_UNORM_S8_UINT:
147        case PIPE_FORMAT_X24S8_UINT:
148                return MALI_S_FORMAT_X24S8;
149        case PIPE_FORMAT_Z32_FLOAT_S8X24_UINT:
150                return MALI_S_FORMAT_X32_S8X24;
151        default:
152                unreachable("Unsupported stencil format.");
153        }
154}
155
156static void
157pan_prepare_s(const struct pan_fb_info *fb,
158              struct MALI_ZS_CRC_EXTENSION *ext)
159{
160        const struct pan_image_view *s = fb->zs.view.s;
161
162        if (!s)
163                return;
164
165        unsigned level = s->first_level;
166
167        ext->s_msaa = mali_sampling_mode(s);
168
169        struct pan_surface surf;
170        pan_iview_get_surface(s, 0, 0, 0, &surf);
171
172        assert(s->image->layout.modifier == DRM_FORMAT_MOD_ARM_16X16_BLOCK_U_INTERLEAVED ||
173               s->image->layout.modifier == DRM_FORMAT_MOD_LINEAR);
174        ext->s_writeback_base = surf.data;
175        ext->s_writeback_row_stride = s->image->layout.slices[level].row_stride;
176        ext->s_writeback_surface_stride =
177                (s->image->layout.nr_samples > 1) ?
178                s->image->layout.slices[level].surface_stride : 0;
179        ext->s_block_format = mod_to_block_fmt(s->image->layout.modifier);
180        ext->s_write_format = translate_s_format(s->format);
181}
182
183static void
184pan_prepare_zs(const struct pan_fb_info *fb,
185               struct MALI_ZS_CRC_EXTENSION *ext)
186{
187        const struct pan_image_view *zs = fb->zs.view.zs;
188
189        if (!zs)
190                return;
191
192        unsigned level = zs->first_level;
193
194        ext->zs_msaa = mali_sampling_mode(zs);
195
196        struct pan_surface surf;
197        pan_iview_get_surface(zs, 0, 0, 0, &surf);
198
199        if (drm_is_afbc(zs->image->layout.modifier)) {
200#if PAN_ARCH >= 6
201                const struct pan_image_slice_layout *slice = &zs->image->layout.slices[level];
202
203                ext->zs_afbc_row_stride = slice->afbc.row_stride /
204                                          AFBC_HEADER_BYTES_PER_TILE;
205#else
206                ext->zs_block_format = MALI_BLOCK_FORMAT_AFBC;
207                ext->zs_afbc_body_size = 0x1000;
208                ext->zs_afbc_chunk_size = 9;
209                ext->zs_afbc_sparse = true;
210#endif
211
212                ext->zs_afbc_header = surf.afbc.header;
213                ext->zs_afbc_body = surf.afbc.body;
214        } else {
215                assert(zs->image->layout.modifier == DRM_FORMAT_MOD_ARM_16X16_BLOCK_U_INTERLEAVED ||
216                       zs->image->layout.modifier == DRM_FORMAT_MOD_LINEAR);
217
218                /* TODO: Z32F(S8) support, which is always linear */
219
220                ext->zs_writeback_base = surf.data;
221                ext->zs_writeback_row_stride =
222                        zs->image->layout.slices[level].row_stride;
223                ext->zs_writeback_surface_stride =
224                        (zs->image->layout.nr_samples > 1) ?
225                        zs->image->layout.slices[level].surface_stride : 0;
226        }
227
228        ext->zs_block_format = mod_to_block_fmt(zs->image->layout.modifier);
229        ext->zs_write_format = translate_zs_format(zs->format);
230        if (ext->zs_write_format == MALI_ZS_FORMAT_D24S8)
231                ext->s_writeback_base = ext->zs_writeback_base;
232}
233
234static void
235pan_prepare_crc(const struct pan_fb_info *fb, int rt_crc,
236                struct MALI_ZS_CRC_EXTENSION *ext)
237{
238        if (rt_crc < 0)
239                return;
240
241        assert(rt_crc < fb->rt_count);
242
243        const struct pan_image_view *rt = fb->rts[rt_crc].view;
244        const struct pan_image_slice_layout *slice = &rt->image->layout.slices[rt->first_level];
245        ext->crc_base = (rt->image->layout.crc_mode == PAN_IMAGE_CRC_INBAND ?
246                         (rt->image->data.bo->ptr.gpu + rt->image->data.offset) :
247                         (rt->image->crc.bo->ptr.gpu + rt->image->crc.offset)) +
248                        slice->crc.offset;
249        ext->crc_row_stride = slice->crc.stride;
250
251#if PAN_ARCH >= 7
252        ext->crc_render_target = rt_crc;
253
254        if (fb->rts[rt_crc].clear) {
255                uint32_t clear_val = fb->rts[rt_crc].clear_value[0];
256                ext->crc_clear_color = clear_val | 0xc000000000000000 |
257                                       (((uint64_t)clear_val & 0xffff) << 32);
258        }
259#endif
260}
261
262static void
263pan_emit_zs_crc_ext(const struct pan_fb_info *fb, int rt_crc,
264                    void *zs_crc_ext)
265{
266        pan_pack(zs_crc_ext, ZS_CRC_EXTENSION, cfg) {
267                pan_prepare_crc(fb, rt_crc, &cfg);
268                cfg.zs_clean_pixel_write_enable = fb->zs.clear.z || fb->zs.clear.s;
269                pan_prepare_zs(fb, &cfg);
270                pan_prepare_s(fb, &cfg);
271        }
272}
273
274/* Measure format as it appears in the tile buffer */
275
276static unsigned
277pan_bytes_per_pixel_tib(enum pipe_format format)
278{
279        if (panfrost_blendable_formats_v7[format].internal) {
280                /* Blendable formats are always 32-bits in the tile buffer,
281                 * extra bits are used as padding or to dither */
282                return 4;
283        } else {
284                /* Non-blendable formats are raw, rounded up to the nearest
285                 * power-of-two size */
286                unsigned bytes = util_format_get_blocksize(format);
287                return util_next_power_of_two(bytes);
288        }
289}
290
291static unsigned
292pan_internal_cbuf_size(const struct pan_fb_info *fb,
293                       unsigned *tile_size)
294{
295        unsigned total_size = 0;
296
297        *tile_size = 16 * 16;
298        for (int cb = 0; cb < fb->rt_count; ++cb) {
299                const struct pan_image_view *rt = fb->rts[cb].view;
300
301                if (!rt)
302                        continue;
303
304                total_size += pan_bytes_per_pixel_tib(rt->format) *
305                              rt->nr_samples * (*tile_size);
306        }
307
308        /* We have a 4KB budget, let's reduce the tile size until it fits. */
309        while (total_size > 4096) {
310                total_size >>= 1;
311                *tile_size >>= 1;
312        }
313
314        /* Align on 1k. */
315        total_size = ALIGN_POT(total_size, 1024);
316
317        /* Minimum tile size is 4x4. */
318        assert(*tile_size >= 4 * 4);
319        return total_size;
320}
321
322static enum mali_color_format
323pan_mfbd_raw_format(unsigned bits)
324{
325        switch (bits) {
326        case    8: return MALI_COLOR_FORMAT_RAW8;
327        case   16: return MALI_COLOR_FORMAT_RAW16;
328        case   24: return MALI_COLOR_FORMAT_RAW24;
329        case   32: return MALI_COLOR_FORMAT_RAW32;
330        case   48: return MALI_COLOR_FORMAT_RAW48;
331        case   64: return MALI_COLOR_FORMAT_RAW64;
332        case   96: return MALI_COLOR_FORMAT_RAW96;
333        case  128: return MALI_COLOR_FORMAT_RAW128;
334        case  192: return MALI_COLOR_FORMAT_RAW192;
335        case  256: return MALI_COLOR_FORMAT_RAW256;
336        case  384: return MALI_COLOR_FORMAT_RAW384;
337        case  512: return MALI_COLOR_FORMAT_RAW512;
338        case  768: return MALI_COLOR_FORMAT_RAW768;
339        case 1024: return MALI_COLOR_FORMAT_RAW1024;
340        case 1536: return MALI_COLOR_FORMAT_RAW1536;
341        case 2048: return MALI_COLOR_FORMAT_RAW2048;
342        default: unreachable("invalid raw bpp");
343        }
344}
345
346static void
347pan_rt_init_format(const struct pan_image_view *rt,
348                   struct MALI_RENDER_TARGET *cfg)
349{
350        /* Explode details on the format */
351
352        const struct util_format_description *desc =
353                util_format_description(rt->format);
354
355        /* The swizzle for rendering is inverted from texturing */
356
357        unsigned char swizzle[4] = {
358                PIPE_SWIZZLE_X, PIPE_SWIZZLE_Y, PIPE_SWIZZLE_Z, PIPE_SWIZZLE_W,
359        };
360
361        /* Fill in accordingly, defaulting to 8-bit UNORM */
362
363        if (desc->colorspace == UTIL_FORMAT_COLORSPACE_SRGB)
364                cfg->srgb = true;
365
366        struct pan_blendable_format fmt = panfrost_blendable_formats_v7[rt->format];
367
368        if (fmt.internal) {
369                cfg->internal_format = fmt.internal;
370                cfg->writeback_format = fmt.writeback;
371                panfrost_invert_swizzle(desc->swizzle, swizzle);
372        } else {
373                /* Construct RAW internal/writeback, where internal is
374                 * specified logarithmically (round to next power-of-two).
375                 * Offset specified from RAW8, where 8 = 2^3 */
376
377                unsigned bits = desc->block.bits;
378                unsigned offset = util_logbase2_ceil(bits) - 3;
379                assert(offset <= 4);
380
381                cfg->internal_format =
382                        MALI_COLOR_BUFFER_INTERNAL_FORMAT_RAW8 + offset;
383
384                cfg->writeback_format = pan_mfbd_raw_format(bits);
385        }
386
387        cfg->swizzle = panfrost_translate_swizzle_4(swizzle);
388}
389
390static void
391pan_prepare_rt(const struct pan_fb_info *fb, unsigned idx,
392               unsigned cbuf_offset,
393               struct MALI_RENDER_TARGET *cfg)
394{
395        cfg->clean_pixel_write_enable = fb->rts[idx].clear;
396        cfg->internal_buffer_offset = cbuf_offset;
397        if (fb->rts[idx].clear) {
398                cfg->clear.color_0 = fb->rts[idx].clear_value[0];
399                cfg->clear.color_1 = fb->rts[idx].clear_value[1];
400                cfg->clear.color_2 = fb->rts[idx].clear_value[2];
401                cfg->clear.color_3 = fb->rts[idx].clear_value[3];
402        }
403
404        const struct pan_image_view *rt = fb->rts[idx].view;
405        if (!rt || fb->rts[idx].discard) {
406                cfg->internal_format = MALI_COLOR_BUFFER_INTERNAL_FORMAT_R8G8B8A8;
407                cfg->internal_buffer_offset = cbuf_offset;
408#if PAN_ARCH >= 7
409                cfg->writeback_block_format = MALI_BLOCK_FORMAT_TILED_U_INTERLEAVED;
410                cfg->dithering_enable = true;
411#endif
412                return;
413        }
414
415        cfg->write_enable = true;
416        cfg->dithering_enable = true;
417
418        unsigned level = rt->first_level;
419        assert(rt->last_level == rt->first_level);
420        assert(rt->last_layer == rt->first_layer);
421
422        int row_stride = rt->image->layout.slices[level].row_stride;
423
424        /* Only set layer_stride for layered MSAA rendering  */
425
426        unsigned layer_stride =
427                (rt->image->layout.nr_samples > 1) ?
428                        rt->image->layout.slices[level].surface_stride : 0;
429
430        cfg->writeback_msaa = mali_sampling_mode(rt);
431
432        pan_rt_init_format(rt, cfg);
433
434#if PAN_ARCH <= 5
435        cfg->writeback_block_format = mod_to_block_fmt(rt->image->layout.modifier);
436#else
437        cfg->writeback_block_format = mod_to_block_fmt(rt->image->layout.modifier);
438#endif
439
440        struct pan_surface surf;
441        pan_iview_get_surface(rt, 0, 0, 0, &surf);
442
443        if (drm_is_afbc(rt->image->layout.modifier)) {
444                const struct pan_image_slice_layout *slice = &rt->image->layout.slices[level];
445
446#if PAN_ARCH >= 6
447                cfg->afbc.row_stride = slice->afbc.row_stride /
448                                       AFBC_HEADER_BYTES_PER_TILE;
449                cfg->afbc.afbc_wide_block_enable =
450                        panfrost_block_dim(rt->image->layout.modifier, true, 0) > 16;
451#else
452                cfg->afbc.chunk_size = 9;
453                cfg->afbc.sparse = true;
454                cfg->afbc.body_size = slice->afbc.body_size;
455#endif
456
457                cfg->afbc.header = surf.afbc.header;
458                cfg->afbc.body = surf.afbc.body;
459
460                if (rt->image->layout.modifier & AFBC_FORMAT_MOD_YTR)
461                        cfg->afbc.yuv_transform_enable = true;
462        } else {
463                assert(rt->image->layout.modifier == DRM_FORMAT_MOD_LINEAR ||
464                       rt->image->layout.modifier == DRM_FORMAT_MOD_ARM_16X16_BLOCK_U_INTERLEAVED);
465                cfg->rgb.base = surf.data;
466                cfg->rgb.row_stride = row_stride;
467                cfg->rgb.surface_stride = layer_stride;
468        }
469}
470#endif
471
472void
473GENX(pan_emit_tls)(const struct pan_tls_info *info,
474                   void *out)
475{
476        pan_pack(out, LOCAL_STORAGE, cfg) {
477                if (info->tls.size) {
478                        unsigned shift =
479                                panfrost_get_stack_shift(info->tls.size);
480
481                        cfg.tls_size = shift;
482                        cfg.tls_base_pointer = info->tls.ptr;
483                }
484
485                if (info->wls.size) {
486                        assert(!(info->wls.ptr & 4095));
487                        assert((info->wls.ptr & 0xffffffff00000000ULL) == ((info->wls.ptr + info->wls.size - 1) & 0xffffffff00000000ULL));
488                        cfg.wls_base_pointer = info->wls.ptr;
489                        unsigned wls_size = pan_wls_adjust_size(info->wls.size);
490                        cfg.wls_instances = pan_wls_instances(&info->wls.dim);
491                        cfg.wls_size_scale = util_logbase2(wls_size) + 1;
492                } else {
493                        cfg.wls_instances = MALI_LOCAL_STORAGE_NO_WORKGROUP_MEM;
494                }
495        }
496}
497
498#if PAN_ARCH <= 5
499static void
500pan_emit_midgard_tiler(const struct panfrost_device *dev,
501                       const struct pan_fb_info *fb,
502                       const struct pan_tiler_context *tiler_ctx,
503                       void *out)
504{
505        bool hierarchy = !(dev->quirks & MIDGARD_NO_HIER_TILING);
506
507        assert(tiler_ctx->midgard.polygon_list->ptr.gpu);
508
509        pan_pack(out, TILER_CONTEXT, cfg) {
510                unsigned header_size;
511
512                if (tiler_ctx->midgard.disable) {
513                        cfg.hierarchy_mask =
514                                hierarchy ?
515                                MALI_MIDGARD_TILER_DISABLED :
516                                MALI_MIDGARD_TILER_USER;
517                        header_size = MALI_MIDGARD_TILER_MINIMUM_HEADER_SIZE;
518                        cfg.polygon_list_size = header_size + (hierarchy ? 0 : 4);
519                        cfg.heap_start = tiler_ctx->midgard.polygon_list->ptr.gpu;
520                        cfg.heap_end = tiler_ctx->midgard.polygon_list->ptr.gpu;
521		} else {
522                        cfg.hierarchy_mask =
523                                panfrost_choose_hierarchy_mask(fb->width,
524                                                               fb->height,
525                                                               1, hierarchy);
526                        header_size = panfrost_tiler_header_size(fb->width,
527                                                                 fb->height,
528                                                                 cfg.hierarchy_mask,
529                                                                 hierarchy);
530                        cfg.polygon_list_size =
531                                panfrost_tiler_full_size(fb->width, fb->height,
532                                                         cfg.hierarchy_mask,
533                                                         hierarchy);
534                        cfg.heap_start = dev->tiler_heap->ptr.gpu;
535                        cfg.heap_end = dev->tiler_heap->ptr.gpu + dev->tiler_heap->size;
536                }
537
538                cfg.polygon_list = tiler_ctx->midgard.polygon_list->ptr.gpu;
539                cfg.polygon_list_body = cfg.polygon_list + header_size;
540        }
541}
542#endif
543
544#if PAN_ARCH >= 5
545static void
546pan_emit_rt(const struct pan_fb_info *fb,
547            unsigned idx, unsigned cbuf_offset, void *out)
548{
549        pan_pack(out, RENDER_TARGET, cfg) {
550                pan_prepare_rt(fb, idx, cbuf_offset, &cfg);
551        }
552}
553
554#if PAN_ARCH >= 6
555/* All Bifrost and Valhall GPUs are affected by issue TSIX-2033:
556 *
557 *      Forcing clean_tile_writes breaks INTERSECT readbacks
558 *
559 * To workaround, use the frame shader mode ALWAYS instead of INTERSECT if
560 * clean tile writes is forced. Since INTERSECT is a hint that the hardware may
561 * ignore, this cannot affect correctness, only performance */
562
563static enum mali_pre_post_frame_shader_mode
564pan_fix_frame_shader_mode(enum mali_pre_post_frame_shader_mode mode, bool force_clean_tile)
565{
566        if (force_clean_tile && mode == MALI_PRE_POST_FRAME_SHADER_MODE_INTERSECT)
567                return MALI_PRE_POST_FRAME_SHADER_MODE_ALWAYS;
568        else
569                return mode;
570}
571
572/* Regardless of clean_tile_write_enable, the hardware writes clean tiles if
573 * the effective tile size differs from the superblock size of any enabled AFBC
574 * render target. Check this condition. */
575
576static bool
577pan_force_clean_write_rt(const struct pan_image_view *rt, unsigned tile_size)
578{
579        if (!drm_is_afbc(rt->image->layout.modifier))
580                return false;
581
582        unsigned superblock = panfrost_block_dim(rt->image->layout.modifier, true, 0);
583
584        assert(superblock >= 16);
585        assert(tile_size <= 16*16);
586
587        /* Tile size and superblock differ unless they are both 16x16 */
588        return !(superblock == 16 && tile_size == 16*16);
589}
590
591static bool
592pan_force_clean_write(const struct pan_fb_info *fb, unsigned tile_size)
593{
594        /* Maximum tile size */
595        assert(tile_size <= 16*16);
596
597        for (unsigned i = 0; i < fb->rt_count; ++i) {
598                if (fb->rts[i].view && !fb->rts[i].discard &&
599                    pan_force_clean_write_rt(fb->rts[i].view, tile_size))
600                        return true;
601        }
602
603        if (fb->zs.view.zs && !fb->zs.discard.z &&
604            pan_force_clean_write_rt(fb->zs.view.zs, tile_size))
605                return true;
606
607        if (fb->zs.view.s && !fb->zs.discard.s &&
608            pan_force_clean_write_rt(fb->zs.view.s, tile_size))
609                return true;
610
611        return false;
612}
613
614#endif
615
616static unsigned
617pan_emit_mfbd(const struct panfrost_device *dev,
618              const struct pan_fb_info *fb,
619              const struct pan_tls_info *tls,
620              const struct pan_tiler_context *tiler_ctx,
621              void *out)
622{
623        unsigned tags = MALI_FBD_TAG_IS_MFBD;
624        void *fbd = out;
625        void *rtd = out + pan_size(FRAMEBUFFER);
626
627#if PAN_ARCH <= 5
628        GENX(pan_emit_tls)(tls,
629                           pan_section_ptr(fbd, FRAMEBUFFER, LOCAL_STORAGE));
630#endif
631
632        unsigned tile_size;
633        unsigned internal_cbuf_size = pan_internal_cbuf_size(fb, &tile_size);
634        int crc_rt = GENX(pan_select_crc_rt)(fb);
635        bool has_zs_crc_ext = pan_fbd_has_zs_crc_ext(fb);
636
637        pan_section_pack(fbd, FRAMEBUFFER, PARAMETERS, cfg) {
638#if PAN_ARCH >= 6
639                bool force_clean_write = pan_force_clean_write(fb, tile_size);
640
641                cfg.sample_locations =
642                        panfrost_sample_positions(dev, pan_sample_pattern(fb->nr_samples));
643                cfg.pre_frame_0 = pan_fix_frame_shader_mode(fb->bifrost.pre_post.modes[0], force_clean_write);
644                cfg.pre_frame_1 = pan_fix_frame_shader_mode(fb->bifrost.pre_post.modes[1], force_clean_write);
645                cfg.post_frame  = pan_fix_frame_shader_mode(fb->bifrost.pre_post.modes[2], force_clean_write);
646                cfg.frame_shader_dcds = fb->bifrost.pre_post.dcds.gpu;
647                cfg.tiler = tiler_ctx->bifrost;
648#endif
649                cfg.width = fb->width;
650                cfg.height = fb->height;
651                cfg.bound_max_x = fb->width - 1;
652                cfg.bound_max_y = fb->height - 1;
653
654                cfg.effective_tile_size = tile_size;
655                cfg.tie_break_rule = MALI_TIE_BREAK_RULE_MINUS_180_IN_0_OUT;
656                cfg.render_target_count = MAX2(fb->rt_count, 1);
657
658                /* Default to 24 bit depth if there's no surface. */
659                cfg.z_internal_format =
660                        fb->zs.view.zs ?
661                        panfrost_get_z_internal_format(fb->zs.view.zs->format) :
662                        MALI_Z_INTERNAL_FORMAT_D24;
663
664                cfg.z_clear = fb->zs.clear_value.depth;
665                cfg.s_clear = fb->zs.clear_value.stencil;
666                cfg.color_buffer_allocation = internal_cbuf_size;
667                cfg.sample_count = fb->nr_samples;
668                cfg.sample_pattern = pan_sample_pattern(fb->nr_samples);
669                cfg.z_write_enable = (fb->zs.view.zs && !fb->zs.discard.z);
670                cfg.s_write_enable = (fb->zs.view.s && !fb->zs.discard.s);
671                cfg.has_zs_crc_extension = has_zs_crc_ext;
672
673                if (crc_rt >= 0) {
674                        bool *valid = fb->rts[crc_rt].crc_valid;
675                        bool full = !fb->extent.minx && !fb->extent.miny &&
676                                    fb->extent.maxx == (fb->width - 1) &&
677                                    fb->extent.maxy == (fb->height - 1);
678
679                        cfg.crc_read_enable = *valid;
680
681                        /* If the data is currently invalid, still write CRC
682                         * data if we are doing a full write, so that it is
683                         * valid for next time. */
684                        cfg.crc_write_enable = *valid || full;
685
686                        *valid |= full;
687                }
688        }
689
690#if PAN_ARCH >= 6
691        pan_section_pack(fbd, FRAMEBUFFER, PADDING, padding);
692#else
693        pan_emit_midgard_tiler(dev, fb, tiler_ctx,
694                               pan_section_ptr(fbd, FRAMEBUFFER, TILER));
695
696        /* All weights set to 0, nothing to do here */
697        pan_section_pack(fbd, FRAMEBUFFER, TILER_WEIGHTS, w);
698#endif
699
700        if (has_zs_crc_ext) {
701                pan_emit_zs_crc_ext(fb, crc_rt,
702                                    out + pan_size(FRAMEBUFFER));
703                rtd += pan_size(ZS_CRC_EXTENSION);
704                tags |= MALI_FBD_TAG_HAS_ZS_RT;
705        }
706
707        unsigned rt_count = MAX2(fb->rt_count, 1);
708        unsigned cbuf_offset = 0;
709        for (unsigned i = 0; i < rt_count; i++) {
710                pan_emit_rt(fb, i, cbuf_offset, rtd);
711                rtd += pan_size(RENDER_TARGET);
712                if (!fb->rts[i].view)
713                        continue;
714
715                cbuf_offset += pan_bytes_per_pixel_tib(fb->rts[i].view->format) *
716                               tile_size * fb->rts[i].view->image->layout.nr_samples;
717
718                if (i != crc_rt)
719                        *(fb->rts[i].crc_valid) = false;
720        }
721        tags |= MALI_POSITIVE(MAX2(fb->rt_count, 1)) << 2;
722
723        return tags;
724}
725#else /* PAN_ARCH == 4 */
726static void
727pan_emit_sfbd_tiler(const struct panfrost_device *dev,
728                    const struct pan_fb_info *fb,
729                    const struct pan_tiler_context *ctx,
730                    void *fbd)
731{
732       pan_emit_midgard_tiler(dev, fb, ctx,
733                              pan_section_ptr(fbd, FRAMEBUFFER, TILER));
734
735        /* All weights set to 0, nothing to do here */
736        pan_section_pack(fbd, FRAMEBUFFER, PADDING_1, padding);
737        pan_section_pack(fbd, FRAMEBUFFER, TILER_WEIGHTS, w);
738}
739
740static void
741pan_emit_sfbd(const struct panfrost_device *dev,
742              const struct pan_fb_info *fb,
743              const struct pan_tls_info *tls,
744              const struct pan_tiler_context *tiler_ctx,
745              void *fbd)
746{
747        GENX(pan_emit_tls)(tls,
748                           pan_section_ptr(fbd, FRAMEBUFFER,
749                                           LOCAL_STORAGE));
750        pan_section_pack(fbd, FRAMEBUFFER, PARAMETERS, cfg) {
751                cfg.bound_max_x = fb->width - 1;
752                cfg.bound_max_y = fb->height - 1;
753                cfg.dithering_enable = true;
754                cfg.clean_pixel_write_enable = true;
755                cfg.tie_break_rule = MALI_TIE_BREAK_RULE_MINUS_180_IN_0_OUT;
756                if (fb->rts[0].clear) {
757                        cfg.clear_color_0 = fb->rts[0].clear_value[0];
758                        cfg.clear_color_1 = fb->rts[0].clear_value[1];
759                        cfg.clear_color_2 = fb->rts[0].clear_value[2];
760                        cfg.clear_color_3 = fb->rts[0].clear_value[3];
761                }
762
763                if (fb->zs.clear.z)
764                        cfg.z_clear = fb->zs.clear_value.depth;
765
766                if (fb->zs.clear.s)
767                        cfg.s_clear = fb->zs.clear_value.stencil;
768
769                if (fb->rt_count && fb->rts[0].view) {
770                        const struct pan_image_view *rt = fb->rts[0].view;
771
772                        const struct util_format_description *desc =
773                                util_format_description(rt->format);
774
775                        /* The swizzle for rendering is inverted from texturing */
776                        unsigned char swizzle[4];
777                        panfrost_invert_swizzle(desc->swizzle, swizzle);
778                        cfg.swizzle = panfrost_translate_swizzle_4(swizzle);
779
780                        struct pan_blendable_format fmt = panfrost_blendable_formats_v7[rt->format];
781                        if (fmt.internal) {
782                                cfg.internal_format = fmt.internal;
783                                cfg.color_writeback_format = fmt.writeback;
784                        } else {
785                                unreachable("raw formats not finished for SFBD");
786                        }
787
788                        unsigned level = rt->first_level;
789                        struct pan_surface surf;
790
791                        pan_iview_get_surface(rt, 0, 0, 0, &surf);
792
793                        cfg.color_write_enable = !fb->rts[0].discard;
794                        cfg.color_writeback.base = surf.data;
795                        cfg.color_writeback.row_stride =
796	                        rt->image->layout.slices[level].row_stride;
797
798                        cfg.color_block_format = mod_to_block_fmt(rt->image->layout.modifier);
799                        assert(cfg.color_block_format == MALI_BLOCK_FORMAT_LINEAR ||
800                               cfg.color_block_format == MALI_BLOCK_FORMAT_TILED_U_INTERLEAVED);
801
802                        if (rt->image->layout.crc_mode != PAN_IMAGE_CRC_NONE) {
803                                const struct pan_image_slice_layout *slice =
804                                        &rt->image->layout.slices[level];
805
806                                cfg.crc_buffer.row_stride = slice->crc.stride;
807                                if (rt->image->layout.crc_mode == PAN_IMAGE_CRC_INBAND) {
808                                        cfg.crc_buffer.base = rt->image->data.bo->ptr.gpu +
809                                                              rt->image->data.offset +
810                                                              slice->crc.offset;
811                                } else {
812                                        cfg.crc_buffer.base = rt->image->crc.bo->ptr.gpu +
813                                                              rt->image->crc.offset +
814                                                              slice->crc.offset;
815                                }
816                        }
817                }
818
819                if (fb->zs.view.zs) {
820                        const struct pan_image_view *zs = fb->zs.view.zs;
821                        unsigned level = zs->first_level;
822                        struct pan_surface surf;
823
824                        pan_iview_get_surface(zs, 0, 0, 0, &surf);
825
826                        cfg.zs_write_enable = !fb->zs.discard.z;
827                        cfg.zs_writeback.base = surf.data;
828                        cfg.zs_writeback.row_stride =
829                                zs->image->layout.slices[level].row_stride;
830                        cfg.zs_block_format = mod_to_block_fmt(zs->image->layout.modifier);
831                        assert(cfg.zs_block_format == MALI_BLOCK_FORMAT_LINEAR ||
832                               cfg.zs_block_format == MALI_BLOCK_FORMAT_TILED_U_INTERLEAVED);
833
834                        cfg.zs_format = translate_zs_format(zs->format);
835                }
836
837                cfg.sample_count = fb->nr_samples;
838
839                if (fb->rt_count)
840                        cfg.msaa = mali_sampling_mode(fb->rts[0].view);
841        }
842        pan_emit_sfbd_tiler(dev, fb, tiler_ctx, fbd);
843        pan_section_pack(fbd, FRAMEBUFFER, PADDING_2, padding);
844}
845#endif
846
847unsigned
848GENX(pan_emit_fbd)(const struct panfrost_device *dev,
849                   const struct pan_fb_info *fb,
850                   const struct pan_tls_info *tls,
851                   const struct pan_tiler_context *tiler_ctx,
852                   void *out)
853{
854#if PAN_ARCH == 4
855        assert(fb->rt_count <= 1);
856        pan_emit_sfbd(dev, fb, tls, tiler_ctx, out);
857        return 0;
858#else
859        return pan_emit_mfbd(dev, fb, tls, tiler_ctx, out);
860#endif
861}
862
863#if PAN_ARCH >= 6
864void
865GENX(pan_emit_tiler_heap)(const struct panfrost_device *dev,
866                          void *out)
867{
868        pan_pack(out, TILER_HEAP, heap) {
869                heap.size = dev->tiler_heap->size;
870                heap.base = dev->tiler_heap->ptr.gpu;
871                heap.bottom = dev->tiler_heap->ptr.gpu;
872                heap.top = dev->tiler_heap->ptr.gpu + dev->tiler_heap->size;
873        }
874}
875
876void
877GENX(pan_emit_tiler_ctx)(const struct panfrost_device *dev,
878                         unsigned fb_width, unsigned fb_height,
879                         unsigned nr_samples,
880                         mali_ptr heap,
881                         void *out)
882{
883        unsigned max_levels = dev->tiler_features.max_levels;
884        assert(max_levels >= 2);
885
886        pan_pack(out, TILER_CONTEXT, tiler) {
887                /* TODO: Select hierarchy mask more effectively */
888                tiler.hierarchy_mask = (max_levels >= 8) ? 0xFF : 0x28;
889                tiler.fb_width = fb_width;
890                tiler.fb_height = fb_height;
891                tiler.heap = heap;
892                tiler.sample_pattern = pan_sample_pattern(nr_samples);
893        }
894}
895#endif
896
897void
898GENX(pan_emit_fragment_job)(const struct pan_fb_info *fb,
899                            mali_ptr fbd,
900                            void *out)
901{
902        pan_section_pack(out, FRAGMENT_JOB, HEADER, header) {
903                header.type = MALI_JOB_TYPE_FRAGMENT;
904                header.index = 1;
905        }
906
907        pan_section_pack(out, FRAGMENT_JOB, PAYLOAD, payload) {
908                payload.bound_min_x = fb->extent.minx >> MALI_TILE_SHIFT;
909                payload.bound_min_y = fb->extent.miny >> MALI_TILE_SHIFT;
910                payload.bound_max_x = fb->extent.maxx >> MALI_TILE_SHIFT;
911                payload.bound_max_y = fb->extent.maxy >> MALI_TILE_SHIFT;
912                payload.framebuffer = fbd;
913
914#if PAN_ARCH >= 5
915                if (fb->tile_map.base) {
916                        payload.has_tile_enable_map = true;
917                        payload.tile_enable_map = fb->tile_map.base;
918                        payload.tile_enable_map_row_stride = fb->tile_map.stride;
919                }
920#endif
921        }
922}
923