1b8e80941Smrg/*
2b8e80941Smrg * Copyright 2018-2019 Alyssa Rosenzweig
3b8e80941Smrg *
4b8e80941Smrg * Permission is hereby granted, free of charge, to any person obtaining a
5b8e80941Smrg * copy of this software and associated documentation files (the "Software"),
6b8e80941Smrg * to deal in the Software without restriction, including without limitation
7b8e80941Smrg * the rights to use, copy, modify, merge, publish, distribute, sublicense,
8b8e80941Smrg * and/or sell copies of the Software, and to permit persons to whom the
9b8e80941Smrg * Software is furnished to do so, subject to the following conditions:
10b8e80941Smrg *
11b8e80941Smrg * The above copyright notice and this permission notice (including the next
12b8e80941Smrg * paragraph) shall be included in all copies or substantial portions of the
13b8e80941Smrg * Software.
14b8e80941Smrg *
15b8e80941Smrg * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
16b8e80941Smrg * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
17b8e80941Smrg * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT.  IN NO EVENT SHALL
18b8e80941Smrg * THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
19b8e80941Smrg * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
20b8e80941Smrg * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
21b8e80941Smrg * SOFTWARE.
22b8e80941Smrg *
23b8e80941Smrg */
24b8e80941Smrg
25b8e80941Smrg#include "pan_context.h"
26b8e80941Smrg#include "pan_util.h"
27b8e80941Smrg#include "pan_format.h"
28b8e80941Smrg
29b8e80941Smrg#include "util/u_format.h"
30b8e80941Smrg
31b8e80941Smrgstatic struct mali_rt_format
32b8e80941Smrgpanfrost_mfbd_format(struct pipe_surface *surf)
33b8e80941Smrg{
34b8e80941Smrg        /* Explode details on the format */
35b8e80941Smrg
36b8e80941Smrg        const struct util_format_description *desc =
37b8e80941Smrg                util_format_description(surf->texture->format);
38b8e80941Smrg
39b8e80941Smrg        /* Fill in accordingly, defaulting to RGBA8888 (UNORM) */
40b8e80941Smrg
41b8e80941Smrg        struct mali_rt_format fmt = {
42b8e80941Smrg                .unk1 = 0x4000000,
43b8e80941Smrg                .unk2 = 0x1,
44b8e80941Smrg                .nr_channels = MALI_POSITIVE(desc->nr_channels),
45b8e80941Smrg                .flags = 0x444,
46b8e80941Smrg                .swizzle = panfrost_translate_swizzle_4(desc->swizzle),
47b8e80941Smrg                .unk4 = 0x8
48b8e80941Smrg        };
49b8e80941Smrg
50b8e80941Smrg        /* Set flags for alternative formats */
51b8e80941Smrg
52b8e80941Smrg        if (surf->texture->format == PIPE_FORMAT_B5G6R5_UNORM) {
53b8e80941Smrg                fmt.unk1 = 0x14000000;
54b8e80941Smrg                fmt.nr_channels = MALI_POSITIVE(2);
55b8e80941Smrg                fmt.flags |= 0x1;
56b8e80941Smrg        }
57b8e80941Smrg
58b8e80941Smrg        return fmt;
59b8e80941Smrg}
60b8e80941Smrg
61b8e80941Smrg
62b8e80941Smrgstatic void
63b8e80941Smrgpanfrost_mfbd_clear(
64b8e80941Smrg                struct panfrost_job *job,
65b8e80941Smrg                struct bifrost_framebuffer *fb,
66b8e80941Smrg                struct bifrost_fb_extra *fbx,
67b8e80941Smrg                struct bifrost_render_target *rt)
68b8e80941Smrg{
69b8e80941Smrg        if (job->clear & PIPE_CLEAR_COLOR) {
70b8e80941Smrg                rt->clear_color_1 = job->clear_color;
71b8e80941Smrg                rt->clear_color_2 = job->clear_color;
72b8e80941Smrg                rt->clear_color_3 = job->clear_color;
73b8e80941Smrg                rt->clear_color_4 = job->clear_color;
74b8e80941Smrg        }
75b8e80941Smrg
76b8e80941Smrg        if (job->clear & PIPE_CLEAR_DEPTH) {
77b8e80941Smrg                fb->clear_depth = job->clear_depth;
78b8e80941Smrg        }
79b8e80941Smrg
80b8e80941Smrg        if (job->clear & PIPE_CLEAR_STENCIL) {
81b8e80941Smrg                fb->clear_stencil = job->clear_stencil;
82b8e80941Smrg        }
83b8e80941Smrg}
84b8e80941Smrg
85b8e80941Smrgstatic void
86b8e80941Smrgpanfrost_mfbd_set_cbuf(
87b8e80941Smrg                struct bifrost_render_target *rt,
88b8e80941Smrg                struct pipe_surface *surf,
89b8e80941Smrg                bool flip_y)
90b8e80941Smrg{
91b8e80941Smrg        struct panfrost_resource *rsrc = pan_resource(surf->texture);
92b8e80941Smrg        int stride = rsrc->bo->slices[0].stride;
93b8e80941Smrg
94b8e80941Smrg        rt->format = panfrost_mfbd_format(surf);
95b8e80941Smrg
96b8e80941Smrg        /* Now, we set the layout specific pieces */
97b8e80941Smrg
98b8e80941Smrg        if (rsrc->bo->layout == PAN_LINEAR) {
99b8e80941Smrg                mali_ptr framebuffer = rsrc->bo->gpu;
100b8e80941Smrg
101b8e80941Smrg                if (flip_y) {
102b8e80941Smrg                        framebuffer += stride * (surf->texture->height0 - 1);
103b8e80941Smrg                        stride = -stride;
104b8e80941Smrg                }
105b8e80941Smrg
106b8e80941Smrg                rt->framebuffer = framebuffer;
107b8e80941Smrg                rt->framebuffer_stride = stride / 16;
108b8e80941Smrg        } else if (rsrc->bo->layout == PAN_AFBC) {
109b8e80941Smrg                rt->afbc.metadata = rsrc->bo->afbc_slab.gpu;
110b8e80941Smrg                rt->afbc.stride = 0;
111b8e80941Smrg                rt->afbc.unk = 0x30009;
112b8e80941Smrg
113b8e80941Smrg                rt->format.flags |= MALI_MFBD_FORMAT_AFBC;
114b8e80941Smrg
115b8e80941Smrg                mali_ptr afbc_main = rsrc->bo->afbc_slab.gpu + rsrc->bo->afbc_metadata_size;
116b8e80941Smrg                rt->framebuffer = afbc_main;
117b8e80941Smrg
118b8e80941Smrg                /* TODO: Investigate shift */
119b8e80941Smrg                rt->framebuffer_stride = stride << 1;
120b8e80941Smrg        } else {
121b8e80941Smrg                fprintf(stderr, "Invalid render layout (cbuf)");
122b8e80941Smrg                assert(0);
123b8e80941Smrg        }
124b8e80941Smrg}
125b8e80941Smrg
126b8e80941Smrgstatic void
127b8e80941Smrgpanfrost_mfbd_set_zsbuf(
128b8e80941Smrg                struct bifrost_framebuffer *fb,
129b8e80941Smrg                struct bifrost_fb_extra *fbx,
130b8e80941Smrg                struct pipe_surface *surf)
131b8e80941Smrg{
132b8e80941Smrg        struct panfrost_resource *rsrc = pan_resource(surf->texture);
133b8e80941Smrg
134b8e80941Smrg        if (rsrc->bo->layout == PAN_AFBC) {
135b8e80941Smrg                fb->unk3 |= MALI_MFBD_EXTRA;
136b8e80941Smrg
137b8e80941Smrg                fbx->flags =
138b8e80941Smrg                        MALI_EXTRA_PRESENT |
139b8e80941Smrg                        MALI_EXTRA_AFBC |
140b8e80941Smrg                        MALI_EXTRA_AFBC_ZS |
141b8e80941Smrg                        MALI_EXTRA_ZS |
142b8e80941Smrg                        0x1; /* unknown */
143b8e80941Smrg
144b8e80941Smrg                fbx->ds_afbc.depth_stencil_afbc_metadata = rsrc->bo->afbc_slab.gpu;
145b8e80941Smrg                fbx->ds_afbc.depth_stencil_afbc_stride = 0;
146b8e80941Smrg
147b8e80941Smrg                fbx->ds_afbc.depth_stencil = rsrc->bo->afbc_slab.gpu + rsrc->bo->afbc_metadata_size;
148b8e80941Smrg
149b8e80941Smrg                fbx->ds_afbc.zero1 = 0x10009;
150b8e80941Smrg                fbx->ds_afbc.padding = 0x1000;
151b8e80941Smrg        } else if (rsrc->bo->layout == PAN_LINEAR) {
152b8e80941Smrg                fb->unk3 |= MALI_MFBD_EXTRA;
153b8e80941Smrg                fbx->flags |= MALI_EXTRA_PRESENT | MALI_EXTRA_ZS | 0x1;
154b8e80941Smrg
155b8e80941Smrg                fbx->ds_linear.depth = rsrc->bo->gpu;
156b8e80941Smrg                fbx->ds_linear.depth_stride = rsrc->bo->slices[0].stride;
157b8e80941Smrg        } else {
158b8e80941Smrg                assert(0);
159b8e80941Smrg        }
160b8e80941Smrg}
161b8e80941Smrg
162b8e80941Smrg/* Helper for sequential uploads used for MFBD */
163b8e80941Smrg
164b8e80941Smrg#define UPLOAD(dest, offset, src, max) { \
165b8e80941Smrg        size_t sz = sizeof(*src); \
166b8e80941Smrg        memcpy(dest.cpu + offset, src, sz); \
167b8e80941Smrg        assert((offset + sz) <= max); \
168b8e80941Smrg        offset += sz; \
169b8e80941Smrg}
170b8e80941Smrg
171b8e80941Smrgstatic mali_ptr
172b8e80941Smrgpanfrost_mfbd_upload(
173b8e80941Smrg                struct panfrost_context *ctx,
174b8e80941Smrg                struct bifrost_framebuffer *fb,
175b8e80941Smrg                struct bifrost_fb_extra *fbx,
176b8e80941Smrg                struct bifrost_render_target *rts,
177b8e80941Smrg                unsigned cbufs)
178b8e80941Smrg{
179b8e80941Smrg        off_t offset = 0;
180b8e80941Smrg
181b8e80941Smrg        /* There may be extra data stuck in the middle */
182b8e80941Smrg        bool has_extra = fb->unk3 & MALI_MFBD_EXTRA;
183b8e80941Smrg
184b8e80941Smrg        /* Compute total size for transfer */
185b8e80941Smrg
186b8e80941Smrg        size_t total_sz =
187b8e80941Smrg                sizeof(struct bifrost_framebuffer) +
188b8e80941Smrg                (has_extra ? sizeof(struct bifrost_fb_extra) : 0) +
189b8e80941Smrg                sizeof(struct bifrost_render_target) * cbufs;
190b8e80941Smrg
191b8e80941Smrg        struct panfrost_transfer m_f_trans =
192b8e80941Smrg                panfrost_allocate_transient(ctx, total_sz);
193b8e80941Smrg
194b8e80941Smrg        /* Do the transfer */
195b8e80941Smrg
196b8e80941Smrg        UPLOAD(m_f_trans, offset, fb, total_sz);
197b8e80941Smrg
198b8e80941Smrg        if (has_extra)
199b8e80941Smrg                UPLOAD(m_f_trans, offset, fbx, total_sz);
200b8e80941Smrg
201b8e80941Smrg        for (unsigned c = 0; c < cbufs; ++c) {
202b8e80941Smrg                UPLOAD(m_f_trans, offset, &rts[c], total_sz);
203b8e80941Smrg        }
204b8e80941Smrg
205b8e80941Smrg        /* Return pointer suitable for the fragment section */
206b8e80941Smrg        return m_f_trans.gpu | MALI_MFBD | (has_extra ? 2 : 0);
207b8e80941Smrg}
208b8e80941Smrg
209b8e80941Smrg#undef UPLOAD
210b8e80941Smrg
211b8e80941Smrg/* Creates an MFBD for the FRAGMENT section of the bound framebuffer */
212b8e80941Smrg
213b8e80941Smrgmali_ptr
214b8e80941Smrgpanfrost_mfbd_fragment(struct panfrost_context *ctx, bool flip_y)
215b8e80941Smrg{
216b8e80941Smrg        struct panfrost_job *job = panfrost_get_job_for_fbo(ctx);
217b8e80941Smrg
218b8e80941Smrg        struct bifrost_framebuffer fb = panfrost_emit_mfbd(ctx);
219b8e80941Smrg        struct bifrost_fb_extra fbx = {};
220b8e80941Smrg        struct bifrost_render_target rts[4] = {};
221b8e80941Smrg
222b8e80941Smrg        /* XXX: MRT case */
223b8e80941Smrg        fb.rt_count_2 = 1;
224b8e80941Smrg        fb.unk3 = 0x100;
225b8e80941Smrg
226b8e80941Smrg        /* TODO: MRT clear */
227b8e80941Smrg        panfrost_mfbd_clear(job, &fb, &fbx, &rts[0]);
228b8e80941Smrg
229b8e80941Smrg        for (int cb = 0; cb < ctx->pipe_framebuffer.nr_cbufs; ++cb) {
230b8e80941Smrg                struct pipe_surface *surf = ctx->pipe_framebuffer.cbufs[cb];
231b8e80941Smrg                panfrost_mfbd_set_cbuf(&rts[cb], surf, flip_y);
232b8e80941Smrg        }
233b8e80941Smrg
234b8e80941Smrg        if (ctx->pipe_framebuffer.zsbuf) {
235b8e80941Smrg                panfrost_mfbd_set_zsbuf(&fb, &fbx, ctx->pipe_framebuffer.zsbuf);
236b8e80941Smrg        }
237b8e80941Smrg
238b8e80941Smrg        /* For the special case of a depth-only FBO, we need to attach a dummy render target */
239b8e80941Smrg
240b8e80941Smrg        if (ctx->pipe_framebuffer.nr_cbufs == 0) {
241b8e80941Smrg                struct mali_rt_format null_rt = {
242b8e80941Smrg                        .unk1 = 0x4000000,
243b8e80941Smrg                        .unk4 = 0x8
244b8e80941Smrg                };
245b8e80941Smrg
246b8e80941Smrg                rts[0].format = null_rt;
247b8e80941Smrg                rts[0].framebuffer = 0;
248b8e80941Smrg                rts[0].framebuffer_stride = 0;
249b8e80941Smrg        }
250b8e80941Smrg
251b8e80941Smrg        /* When scanning out, the depth buffer is immediately invalidated, so
252b8e80941Smrg         * we don't need to waste bandwidth writing it out. This can improve
253b8e80941Smrg         * performance substantially (Z32_UNORM 1080p @ 60fps is 475 MB/s of
254b8e80941Smrg         * memory bandwidth!).
255b8e80941Smrg         *
256b8e80941Smrg         * The exception is ReadPixels, but this is not supported on GLES so we
257b8e80941Smrg         * can safely ignore it. */
258b8e80941Smrg
259b8e80941Smrg        if (panfrost_is_scanout(ctx)) {
260b8e80941Smrg                job->requirements &= ~PAN_REQ_DEPTH_WRITE;
261b8e80941Smrg        }
262b8e80941Smrg
263b8e80941Smrg        /* Actualize the requirements */
264b8e80941Smrg
265b8e80941Smrg        if (job->requirements & PAN_REQ_MSAA) {
266b8e80941Smrg                rts[0].format.flags |= MALI_MFBD_FORMAT_MSAA;
267b8e80941Smrg
268b8e80941Smrg                /* XXX */
269b8e80941Smrg                fb.unk1 |= (1 << 4) | (1 << 1);
270b8e80941Smrg                fb.rt_count_2 = 4;
271b8e80941Smrg        }
272b8e80941Smrg
273b8e80941Smrg        if (job->requirements & PAN_REQ_DEPTH_WRITE)
274b8e80941Smrg                fb.unk3 |= MALI_MFBD_DEPTH_WRITE;
275b8e80941Smrg
276b8e80941Smrg        if (ctx->pipe_framebuffer.nr_cbufs == 1) {
277b8e80941Smrg                struct panfrost_resource *rsrc = (struct panfrost_resource *) ctx->pipe_framebuffer.cbufs[0]->texture;
278b8e80941Smrg
279b8e80941Smrg                if (rsrc->bo->has_checksum) {
280b8e80941Smrg                        fb.unk3 |= MALI_MFBD_EXTRA;
281b8e80941Smrg                        fbx.flags |= MALI_EXTRA_PRESENT;
282b8e80941Smrg                        fbx.checksum_stride = rsrc->bo->checksum_stride;
283b8e80941Smrg                        fbx.checksum = rsrc->bo->gpu + rsrc->bo->slices[0].stride * rsrc->base.height0;
284b8e80941Smrg                }
285b8e80941Smrg        }
286b8e80941Smrg
287b8e80941Smrg        /* We always upload at least one (dummy) cbuf */
288b8e80941Smrg        unsigned cbufs = MAX2(ctx->pipe_framebuffer.nr_cbufs, 1);
289b8e80941Smrg
290b8e80941Smrg        return panfrost_mfbd_upload(ctx, &fb, &fbx, rts, cbufs);
291b8e80941Smrg}
292