1b8e80941Smrg/*
2b8e80941Smrg * © Copyright 2018 Alyssa Rosenzweig
3b8e80941Smrg *
4b8e80941Smrg * Permission is hereby granted, free of charge, to any person obtaining a
5b8e80941Smrg * copy of this software and associated documentation files (the "Software"),
6b8e80941Smrg * to deal in the Software without restriction, including without limitation
7b8e80941Smrg * the rights to use, copy, modify, merge, publish, distribute, sublicense,
8b8e80941Smrg * and/or sell copies of the Software, and to permit persons to whom the
9b8e80941Smrg * Software is furnished to do so, subject to the following conditions:
10b8e80941Smrg *
11b8e80941Smrg * The above copyright notice and this permission notice (including the next
12b8e80941Smrg * paragraph) shall be included in all copies or substantial portions of the
13b8e80941Smrg * Software.
14b8e80941Smrg *
15b8e80941Smrg * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
16b8e80941Smrg * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
17b8e80941Smrg * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT.  IN NO EVENT SHALL
18b8e80941Smrg * THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
19b8e80941Smrg * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
20b8e80941Smrg * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
21b8e80941Smrg * SOFTWARE.
22b8e80941Smrg *
23b8e80941Smrg */
24b8e80941Smrg
25b8e80941Smrg#include <sys/poll.h>
26b8e80941Smrg#include <errno.h>
27b8e80941Smrg
28b8e80941Smrg#include "pan_context.h"
29b8e80941Smrg#include "pan_swizzle.h"
30b8e80941Smrg#include "pan_format.h"
31b8e80941Smrg
32b8e80941Smrg#include "util/macros.h"
33b8e80941Smrg#include "util/u_format.h"
34b8e80941Smrg#include "util/u_inlines.h"
35b8e80941Smrg#include "util/u_upload_mgr.h"
36b8e80941Smrg#include "util/u_memory.h"
37b8e80941Smrg#include "util/u_vbuf.h"
38b8e80941Smrg#include "util/half_float.h"
39b8e80941Smrg#include "util/u_helpers.h"
40b8e80941Smrg#include "util/u_format.h"
41b8e80941Smrg#include "indices/u_primconvert.h"
42b8e80941Smrg#include "tgsi/tgsi_parse.h"
43b8e80941Smrg#include "util/u_math.h"
44b8e80941Smrg
45b8e80941Smrg#include "pan_screen.h"
46b8e80941Smrg#include "pan_blending.h"
47b8e80941Smrg#include "pan_blend_shaders.h"
48b8e80941Smrg#include "pan_util.h"
49b8e80941Smrg#include "pan_wallpaper.h"
50b8e80941Smrg
51b8e80941Smrgstatic int performance_counter_number = 0;
52b8e80941Smrgextern const char *pan_counters_base;
53b8e80941Smrg
54b8e80941Smrg/* Do not actually send anything to the GPU; merely generate the cmdstream as fast as possible. Disables framebuffer writes */
55b8e80941Smrg//#define DRY_RUN
56b8e80941Smrg
57b8e80941Smrg/* Can a given format support AFBC? Not all can. */
58b8e80941Smrg
59b8e80941Smrgstatic bool
60b8e80941Smrgpanfrost_can_afbc(enum pipe_format format)
61b8e80941Smrg{
62b8e80941Smrg        const struct util_format_description *desc =
63b8e80941Smrg                util_format_description(format);
64b8e80941Smrg
65b8e80941Smrg        if (util_format_is_rgba8_variant(desc))
66b8e80941Smrg                return true;
67b8e80941Smrg
68b8e80941Smrg        /* TODO: AFBC of other formats */
69b8e80941Smrg
70b8e80941Smrg        return false;
71b8e80941Smrg}
72b8e80941Smrg
73b8e80941Smrg/* AFBC is enabled on a per-resource basis (AFBC enabling is theoretically
74b8e80941Smrg * indepdent between color buffers and depth/stencil). To enable, we allocate
75b8e80941Smrg * the AFBC metadata buffer and mark that it is enabled. We do -not- actually
76b8e80941Smrg * edit the fragment job here. This routine should be called ONCE per
77b8e80941Smrg * AFBC-compressed buffer, rather than on every frame. */
78b8e80941Smrg
79b8e80941Smrgstatic void
80b8e80941Smrgpanfrost_enable_afbc(struct panfrost_context *ctx, struct panfrost_resource *rsrc, bool ds)
81b8e80941Smrg{
82b8e80941Smrg        if (ctx->require_sfbd) {
83b8e80941Smrg                DBG("AFBC not supported yet on SFBD\n");
84b8e80941Smrg                assert(0);
85b8e80941Smrg        }
86b8e80941Smrg
87b8e80941Smrg        struct pipe_context *gallium = (struct pipe_context *) ctx;
88b8e80941Smrg        struct panfrost_screen *screen = pan_screen(gallium->screen);
89b8e80941Smrg       /* AFBC metadata is 16 bytes per tile */
90b8e80941Smrg        int tile_w = (rsrc->base.width0 + (MALI_TILE_LENGTH - 1)) >> MALI_TILE_SHIFT;
91b8e80941Smrg        int tile_h = (rsrc->base.height0 + (MALI_TILE_LENGTH - 1)) >> MALI_TILE_SHIFT;
92b8e80941Smrg        int bytes_per_pixel = util_format_get_blocksize(rsrc->base.format);
93b8e80941Smrg        int stride = bytes_per_pixel * ALIGN(rsrc->base.width0, 16);
94b8e80941Smrg
95b8e80941Smrg        stride *= 2;  /* TODO: Should this be carried over? */
96b8e80941Smrg        int main_size = stride * rsrc->base.height0;
97b8e80941Smrg        rsrc->bo->afbc_metadata_size = tile_w * tile_h * 16;
98b8e80941Smrg
99b8e80941Smrg        /* Allocate the AFBC slab itself, large enough to hold the above */
100b8e80941Smrg        screen->driver->allocate_slab(screen, &rsrc->bo->afbc_slab,
101b8e80941Smrg                               (rsrc->bo->afbc_metadata_size + main_size + 4095) / 4096,
102b8e80941Smrg                               true, 0, 0, 0);
103b8e80941Smrg
104b8e80941Smrg        rsrc->bo->layout = PAN_AFBC;
105b8e80941Smrg
106b8e80941Smrg        /* Compressed textured reads use a tagged pointer to the metadata */
107b8e80941Smrg
108b8e80941Smrg        rsrc->bo->gpu = rsrc->bo->afbc_slab.gpu | (ds ? 0 : 1);
109b8e80941Smrg        rsrc->bo->cpu = rsrc->bo->afbc_slab.cpu;
110b8e80941Smrg        rsrc->bo->gem_handle = rsrc->bo->afbc_slab.gem_handle;
111b8e80941Smrg}
112b8e80941Smrg
113b8e80941Smrgstatic void
114b8e80941Smrgpanfrost_enable_checksum(struct panfrost_context *ctx, struct panfrost_resource *rsrc)
115b8e80941Smrg{
116b8e80941Smrg        struct pipe_context *gallium = (struct pipe_context *) ctx;
117b8e80941Smrg        struct panfrost_screen *screen = pan_screen(gallium->screen);
118b8e80941Smrg        int tile_w = (rsrc->base.width0 + (MALI_TILE_LENGTH - 1)) >> MALI_TILE_SHIFT;
119b8e80941Smrg        int tile_h = (rsrc->base.height0 + (MALI_TILE_LENGTH - 1)) >> MALI_TILE_SHIFT;
120b8e80941Smrg
121b8e80941Smrg        /* 8 byte checksum per tile */
122b8e80941Smrg        rsrc->bo->checksum_stride = tile_w * 8;
123b8e80941Smrg        int pages = (((rsrc->bo->checksum_stride * tile_h) + 4095) / 4096);
124b8e80941Smrg        screen->driver->allocate_slab(screen, &rsrc->bo->checksum_slab, pages, false, 0, 0, 0);
125b8e80941Smrg
126b8e80941Smrg        rsrc->bo->has_checksum = true;
127b8e80941Smrg}
128b8e80941Smrg
129b8e80941Smrg/* Framebuffer descriptor */
130b8e80941Smrg
131b8e80941Smrgstatic void
132b8e80941Smrgpanfrost_set_framebuffer_resolution(struct mali_single_framebuffer *fb, int w, int h)
133b8e80941Smrg{
134b8e80941Smrg        fb->width = MALI_POSITIVE(w);
135b8e80941Smrg        fb->height = MALI_POSITIVE(h);
136b8e80941Smrg
137b8e80941Smrg        /* No idea why this is needed, but it's how resolution_check is
138b8e80941Smrg         * calculated.  It's not clear to us yet why the hardware wants this.
139b8e80941Smrg         * The formula itself was discovered mostly by manual bruteforce and
140b8e80941Smrg         * aggressive algebraic simplification. */
141b8e80941Smrg
142b8e80941Smrg        fb->resolution_check = ((w + h) / 3) << 4;
143b8e80941Smrg}
144b8e80941Smrg
145b8e80941Smrgstruct mali_single_framebuffer
146b8e80941Smrgpanfrost_emit_sfbd(struct panfrost_context *ctx)
147b8e80941Smrg{
148b8e80941Smrg        struct mali_single_framebuffer framebuffer = {
149b8e80941Smrg                .unknown2 = 0x1f,
150b8e80941Smrg                .format = 0x30000000,
151b8e80941Smrg                .clear_flags = 0x1000,
152b8e80941Smrg                .unknown_address_0 = ctx->scratchpad.gpu,
153b8e80941Smrg                .unknown_address_1 = ctx->misc_0.gpu,
154b8e80941Smrg                .unknown_address_2 = ctx->misc_0.gpu + 40960,
155b8e80941Smrg                .tiler_flags = 0xf0,
156b8e80941Smrg                .tiler_heap_free = ctx->tiler_heap.gpu,
157b8e80941Smrg                .tiler_heap_end = ctx->tiler_heap.gpu + ctx->tiler_heap.size,
158b8e80941Smrg        };
159b8e80941Smrg
160b8e80941Smrg        panfrost_set_framebuffer_resolution(&framebuffer, ctx->pipe_framebuffer.width, ctx->pipe_framebuffer.height);
161b8e80941Smrg
162b8e80941Smrg        return framebuffer;
163b8e80941Smrg}
164b8e80941Smrg
165b8e80941Smrgstruct bifrost_framebuffer
166b8e80941Smrgpanfrost_emit_mfbd(struct panfrost_context *ctx)
167b8e80941Smrg{
168b8e80941Smrg        struct bifrost_framebuffer framebuffer = {
169b8e80941Smrg                /* It is not yet clear what tiler_meta means or how it's
170b8e80941Smrg                 * calculated, but we can tell the lower 32-bits are a
171b8e80941Smrg                 * (monotonically increasing?) function of tile count and
172b8e80941Smrg                 * geometry complexity; I suspect it defines a memory size of
173b8e80941Smrg                 * some kind? for the tiler. It's really unclear at the
174b8e80941Smrg                 * moment... but to add to the confusion, the hardware is happy
175b8e80941Smrg                 * enough to accept a zero in this field, so we don't even have
176b8e80941Smrg                 * to worry about it right now.
177b8e80941Smrg                 *
178b8e80941Smrg                 * The byte (just after the 32-bit mark) is much more
179b8e80941Smrg                 * interesting. The higher nibble I've only ever seen as 0xF,
180b8e80941Smrg                 * but the lower one I've seen as 0x0 or 0xF, and it's not
181b8e80941Smrg                 * obvious what the difference is. But what -is- obvious is
182b8e80941Smrg                 * that when the lower nibble is zero, performance is severely
183b8e80941Smrg                 * degraded compared to when the lower nibble is set.
184b8e80941Smrg                 * Evidently, that nibble enables some sort of fast path,
185b8e80941Smrg                 * perhaps relating to caching or tile flush? Regardless, at
186b8e80941Smrg                 * this point there's no clear reason not to set it, aside from
187b8e80941Smrg                 * substantially increased memory requirements (of the misc_0
188b8e80941Smrg                 * buffer) */
189b8e80941Smrg
190b8e80941Smrg                .tiler_meta = ((uint64_t) 0xff << 32) | 0x0,
191b8e80941Smrg
192b8e80941Smrg                .width1 = MALI_POSITIVE(ctx->pipe_framebuffer.width),
193b8e80941Smrg                .height1 = MALI_POSITIVE(ctx->pipe_framebuffer.height),
194b8e80941Smrg                .width2 = MALI_POSITIVE(ctx->pipe_framebuffer.width),
195b8e80941Smrg                .height2 = MALI_POSITIVE(ctx->pipe_framebuffer.height),
196b8e80941Smrg
197b8e80941Smrg                .unk1 = 0x1080,
198b8e80941Smrg
199b8e80941Smrg                /* TODO: MRT */
200b8e80941Smrg                .rt_count_1 = MALI_POSITIVE(1),
201b8e80941Smrg                .rt_count_2 = 4,
202b8e80941Smrg
203b8e80941Smrg                .unknown2 = 0x1f,
204b8e80941Smrg
205b8e80941Smrg                /* Corresponds to unknown_address_X of SFBD */
206b8e80941Smrg                .scratchpad = ctx->scratchpad.gpu,
207b8e80941Smrg                .tiler_scratch_start  = ctx->misc_0.gpu,
208b8e80941Smrg
209b8e80941Smrg                /* The constant added here is, like the lower word of
210b8e80941Smrg                 * tiler_meta, (loosely) another product of framebuffer size
211b8e80941Smrg                 * and geometry complexity. It must be sufficiently large for
212b8e80941Smrg                 * the tiler_meta fast path to work; if it's too small, there
213b8e80941Smrg                 * will be DATA_INVALID_FAULTs. Conversely, it must be less
214b8e80941Smrg                 * than the total size of misc_0, or else there's no room. It's
215b8e80941Smrg                 * possible this constant configures a partition between two
216b8e80941Smrg                 * parts of misc_0? We haven't investigated the functionality,
217b8e80941Smrg                 * as these buffers are internally used by the hardware
218b8e80941Smrg                 * (presumably by the tiler) but not seemingly touched by the driver
219b8e80941Smrg                 */
220b8e80941Smrg
221b8e80941Smrg                .tiler_scratch_middle = ctx->misc_0.gpu + 0xf0000,
222b8e80941Smrg
223b8e80941Smrg                .tiler_heap_start = ctx->tiler_heap.gpu,
224b8e80941Smrg                .tiler_heap_end = ctx->tiler_heap.gpu + ctx->tiler_heap.size,
225b8e80941Smrg        };
226b8e80941Smrg
227b8e80941Smrg        return framebuffer;
228b8e80941Smrg}
229b8e80941Smrg
230b8e80941Smrg/* Are we currently rendering to the screen (rather than an FBO)? */
231b8e80941Smrg
232b8e80941Smrgbool
233b8e80941Smrgpanfrost_is_scanout(struct panfrost_context *ctx)
234b8e80941Smrg{
235b8e80941Smrg        /* If there is no color buffer, it's an FBO */
236b8e80941Smrg        if (!ctx->pipe_framebuffer.nr_cbufs)
237b8e80941Smrg                return false;
238b8e80941Smrg
239b8e80941Smrg        /* If we're too early that no framebuffer was sent, it's scanout */
240b8e80941Smrg        if (!ctx->pipe_framebuffer.cbufs[0])
241b8e80941Smrg                return true;
242b8e80941Smrg
243b8e80941Smrg        return ctx->pipe_framebuffer.cbufs[0]->texture->bind & PIPE_BIND_DISPLAY_TARGET ||
244b8e80941Smrg               ctx->pipe_framebuffer.cbufs[0]->texture->bind & PIPE_BIND_SCANOUT ||
245b8e80941Smrg               ctx->pipe_framebuffer.cbufs[0]->texture->bind & PIPE_BIND_SHARED;
246b8e80941Smrg}
247b8e80941Smrg
248b8e80941Smrgstatic uint32_t
249b8e80941Smrgpan_pack_color(const union pipe_color_union *color, enum pipe_format format)
250b8e80941Smrg{
251b8e80941Smrg        /* Alpha magicked to 1.0 if there is no alpha */
252b8e80941Smrg
253b8e80941Smrg        bool has_alpha = util_format_has_alpha(format);
254b8e80941Smrg        float clear_alpha = has_alpha ? color->f[3] : 1.0f;
255b8e80941Smrg
256b8e80941Smrg        /* Packed color depends on the framebuffer format */
257b8e80941Smrg
258b8e80941Smrg        const struct util_format_description *desc =
259b8e80941Smrg                util_format_description(format);
260b8e80941Smrg
261b8e80941Smrg        if (util_format_is_rgba8_variant(desc)) {
262b8e80941Smrg                return (float_to_ubyte(clear_alpha) << 24) |
263b8e80941Smrg                       (float_to_ubyte(color->f[2]) << 16) |
264b8e80941Smrg                       (float_to_ubyte(color->f[1]) <<  8) |
265b8e80941Smrg                       (float_to_ubyte(color->f[0]) <<  0);
266b8e80941Smrg        } else if (format == PIPE_FORMAT_B5G6R5_UNORM) {
267b8e80941Smrg                /* First, we convert the components to R5, G6, B5 separately */
268b8e80941Smrg                unsigned r5 = CLAMP(color->f[0], 0.0, 1.0) * 31.0;
269b8e80941Smrg                unsigned g6 = CLAMP(color->f[1], 0.0, 1.0) * 63.0;
270b8e80941Smrg                unsigned b5 = CLAMP(color->f[2], 0.0, 1.0) * 31.0;
271b8e80941Smrg
272b8e80941Smrg                /* Then we pack into a sparse u32. TODO: Why these shifts? */
273b8e80941Smrg                return (b5 << 25) | (g6 << 14) | (r5 << 5);
274b8e80941Smrg        } else {
275b8e80941Smrg                /* Unknown format */
276b8e80941Smrg                assert(0);
277b8e80941Smrg        }
278b8e80941Smrg
279b8e80941Smrg        return 0;
280b8e80941Smrg}
281b8e80941Smrg
282b8e80941Smrgstatic void
283b8e80941Smrgpanfrost_clear(
284b8e80941Smrg        struct pipe_context *pipe,
285b8e80941Smrg        unsigned buffers,
286b8e80941Smrg        const union pipe_color_union *color,
287b8e80941Smrg        double depth, unsigned stencil)
288b8e80941Smrg{
289b8e80941Smrg        struct panfrost_context *ctx = pan_context(pipe);
290b8e80941Smrg        struct panfrost_job *job = panfrost_get_job_for_fbo(ctx);
291b8e80941Smrg
292b8e80941Smrg        if (buffers & PIPE_CLEAR_COLOR) {
293b8e80941Smrg                enum pipe_format format = ctx->pipe_framebuffer.cbufs[0]->format;
294b8e80941Smrg                job->clear_color = pan_pack_color(color, format);
295b8e80941Smrg        }
296b8e80941Smrg
297b8e80941Smrg        if (buffers & PIPE_CLEAR_DEPTH) {
298b8e80941Smrg                job->clear_depth = depth;
299b8e80941Smrg        }
300b8e80941Smrg
301b8e80941Smrg        if (buffers & PIPE_CLEAR_STENCIL) {
302b8e80941Smrg                job->clear_stencil = stencil;
303b8e80941Smrg        }
304b8e80941Smrg
305b8e80941Smrg        job->clear |= buffers;
306b8e80941Smrg}
307b8e80941Smrg
308b8e80941Smrgstatic mali_ptr
309b8e80941Smrgpanfrost_attach_vt_mfbd(struct panfrost_context *ctx)
310b8e80941Smrg{
311b8e80941Smrg        /* MFBD needs a sequential semi-render target upload, but what exactly this is, is beyond me for now */
312b8e80941Smrg        struct bifrost_render_target rts_list[] = {
313b8e80941Smrg                {
314b8e80941Smrg                        .chunknown = {
315b8e80941Smrg                                .unk = 0x30005,
316b8e80941Smrg                        },
317b8e80941Smrg                        .framebuffer = ctx->misc_0.gpu,
318b8e80941Smrg                        .zero2 = 0x3,
319b8e80941Smrg                },
320b8e80941Smrg        };
321b8e80941Smrg
322b8e80941Smrg        /* Allocate memory for the three components */
323b8e80941Smrg        int size = 1024 + sizeof(ctx->vt_framebuffer_mfbd) + sizeof(rts_list);
324b8e80941Smrg        struct panfrost_transfer transfer = panfrost_allocate_transient(ctx, size);
325b8e80941Smrg
326b8e80941Smrg        /* Opaque 1024-block */
327b8e80941Smrg        rts_list[0].chunknown.pointer = transfer.gpu;
328b8e80941Smrg
329b8e80941Smrg        memcpy(transfer.cpu + 1024, &ctx->vt_framebuffer_mfbd, sizeof(ctx->vt_framebuffer_mfbd));
330b8e80941Smrg        memcpy(transfer.cpu + 1024 + sizeof(ctx->vt_framebuffer_mfbd), rts_list, sizeof(rts_list));
331b8e80941Smrg
332b8e80941Smrg        return (transfer.gpu + 1024) | MALI_MFBD;
333b8e80941Smrg}
334b8e80941Smrg
335b8e80941Smrgstatic mali_ptr
336b8e80941Smrgpanfrost_attach_vt_sfbd(struct panfrost_context *ctx)
337b8e80941Smrg{
338b8e80941Smrg        return panfrost_upload_transient(ctx, &ctx->vt_framebuffer_sfbd, sizeof(ctx->vt_framebuffer_sfbd)) | MALI_SFBD;
339b8e80941Smrg}
340b8e80941Smrg
341b8e80941Smrgstatic void
342b8e80941Smrgpanfrost_attach_vt_framebuffer(struct panfrost_context *ctx)
343b8e80941Smrg{
344b8e80941Smrg        mali_ptr framebuffer = ctx->require_sfbd ?
345b8e80941Smrg                panfrost_attach_vt_sfbd(ctx) :
346b8e80941Smrg                panfrost_attach_vt_mfbd(ctx);
347b8e80941Smrg
348b8e80941Smrg        ctx->payload_vertex.postfix.framebuffer = framebuffer;
349b8e80941Smrg        ctx->payload_tiler.postfix.framebuffer = framebuffer;
350b8e80941Smrg}
351b8e80941Smrg
352b8e80941Smrg/* Reset per-frame context, called on context initialisation as well as after
353b8e80941Smrg * flushing a frame */
354b8e80941Smrg
355b8e80941Smrgstatic void
356b8e80941Smrgpanfrost_invalidate_frame(struct panfrost_context *ctx)
357b8e80941Smrg{
358b8e80941Smrg        unsigned transient_count = ctx->transient_pools[ctx->cmdstream_i].entry_index*ctx->transient_pools[0].entry_size + ctx->transient_pools[ctx->cmdstream_i].entry_offset;
359b8e80941Smrg	DBG("Uploaded transient %d bytes\n", transient_count);
360b8e80941Smrg
361b8e80941Smrg        /* Rotate cmdstream */
362b8e80941Smrg        if ((++ctx->cmdstream_i) == (sizeof(ctx->transient_pools) / sizeof(ctx->transient_pools[0])))
363b8e80941Smrg                ctx->cmdstream_i = 0;
364b8e80941Smrg
365b8e80941Smrg        if (ctx->require_sfbd)
366b8e80941Smrg                ctx->vt_framebuffer_sfbd = panfrost_emit_sfbd(ctx);
367b8e80941Smrg        else
368b8e80941Smrg                ctx->vt_framebuffer_mfbd = panfrost_emit_mfbd(ctx);
369b8e80941Smrg
370b8e80941Smrg        /* Reset varyings allocated */
371b8e80941Smrg        ctx->varying_height = 0;
372b8e80941Smrg
373b8e80941Smrg        /* The transient cmdstream is dirty every frame; the only bits worth preserving
374b8e80941Smrg         * (textures, shaders, etc) are in other buffers anyways */
375b8e80941Smrg
376b8e80941Smrg        ctx->transient_pools[ctx->cmdstream_i].entry_index = 0;
377b8e80941Smrg        ctx->transient_pools[ctx->cmdstream_i].entry_offset = 0;
378b8e80941Smrg
379b8e80941Smrg        /* Regenerate payloads */
380b8e80941Smrg        panfrost_attach_vt_framebuffer(ctx);
381b8e80941Smrg
382b8e80941Smrg        if (ctx->rasterizer)
383b8e80941Smrg                ctx->dirty |= PAN_DIRTY_RASTERIZER;
384b8e80941Smrg
385b8e80941Smrg        /* XXX */
386b8e80941Smrg        ctx->dirty |= PAN_DIRTY_SAMPLERS | PAN_DIRTY_TEXTURES;
387b8e80941Smrg}
388b8e80941Smrg
389b8e80941Smrg/* In practice, every field of these payloads should be configurable
390b8e80941Smrg * arbitrarily, which means these functions are basically catch-all's for
391b8e80941Smrg * as-of-yet unwavering unknowns */
392b8e80941Smrg
393b8e80941Smrgstatic void
394b8e80941Smrgpanfrost_emit_vertex_payload(struct panfrost_context *ctx)
395b8e80941Smrg{
396b8e80941Smrg        struct midgard_payload_vertex_tiler payload = {
397b8e80941Smrg                .prefix = {
398b8e80941Smrg                        .workgroups_z_shift = 32,
399b8e80941Smrg                        .workgroups_x_shift_2 = 0x2,
400b8e80941Smrg                        .workgroups_x_shift_3 = 0x5,
401b8e80941Smrg                },
402b8e80941Smrg		.gl_enables = 0x4 | (ctx->is_t6xx ? 0 : 0x2),
403b8e80941Smrg        };
404b8e80941Smrg
405b8e80941Smrg        memcpy(&ctx->payload_vertex, &payload, sizeof(payload));
406b8e80941Smrg}
407b8e80941Smrg
408b8e80941Smrgstatic void
409b8e80941Smrgpanfrost_emit_tiler_payload(struct panfrost_context *ctx)
410b8e80941Smrg{
411b8e80941Smrg        struct midgard_payload_vertex_tiler payload = {
412b8e80941Smrg                .prefix = {
413b8e80941Smrg                        .workgroups_z_shift = 32,
414b8e80941Smrg                        .workgroups_x_shift_2 = 0x2,
415b8e80941Smrg                        .workgroups_x_shift_3 = 0x6,
416b8e80941Smrg
417b8e80941Smrg                        .zero1 = 0xffff, /* Why is this only seen on test-quad-textured? */
418b8e80941Smrg                },
419b8e80941Smrg        };
420b8e80941Smrg
421b8e80941Smrg        memcpy(&ctx->payload_tiler, &payload, sizeof(payload));
422b8e80941Smrg}
423b8e80941Smrg
424b8e80941Smrgstatic unsigned
425b8e80941Smrgtranslate_tex_wrap(enum pipe_tex_wrap w)
426b8e80941Smrg{
427b8e80941Smrg        switch (w) {
428b8e80941Smrg        case PIPE_TEX_WRAP_REPEAT:
429b8e80941Smrg                return MALI_WRAP_REPEAT;
430b8e80941Smrg
431b8e80941Smrg        case PIPE_TEX_WRAP_CLAMP_TO_EDGE:
432b8e80941Smrg                return MALI_WRAP_CLAMP_TO_EDGE;
433b8e80941Smrg
434b8e80941Smrg        case PIPE_TEX_WRAP_CLAMP_TO_BORDER:
435b8e80941Smrg                return MALI_WRAP_CLAMP_TO_BORDER;
436b8e80941Smrg
437b8e80941Smrg        case PIPE_TEX_WRAP_MIRROR_REPEAT:
438b8e80941Smrg                return MALI_WRAP_MIRRORED_REPEAT;
439b8e80941Smrg
440b8e80941Smrg        default:
441b8e80941Smrg                assert(0);
442b8e80941Smrg                return 0;
443b8e80941Smrg        }
444b8e80941Smrg}
445b8e80941Smrg
446b8e80941Smrgstatic unsigned
447b8e80941Smrgtranslate_tex_filter(enum pipe_tex_filter f)
448b8e80941Smrg{
449b8e80941Smrg        switch (f) {
450b8e80941Smrg        case PIPE_TEX_FILTER_NEAREST:
451b8e80941Smrg                return MALI_NEAREST;
452b8e80941Smrg
453b8e80941Smrg        case PIPE_TEX_FILTER_LINEAR:
454b8e80941Smrg                return MALI_LINEAR;
455b8e80941Smrg
456b8e80941Smrg        default:
457b8e80941Smrg                assert(0);
458b8e80941Smrg                return 0;
459b8e80941Smrg        }
460b8e80941Smrg}
461b8e80941Smrg
462b8e80941Smrgstatic unsigned
463b8e80941Smrgtranslate_mip_filter(enum pipe_tex_mipfilter f)
464b8e80941Smrg{
465b8e80941Smrg        return (f == PIPE_TEX_MIPFILTER_LINEAR) ? MALI_MIP_LINEAR : 0;
466b8e80941Smrg}
467b8e80941Smrg
468b8e80941Smrgstatic unsigned
469b8e80941Smrgpanfrost_translate_compare_func(enum pipe_compare_func in)
470b8e80941Smrg{
471b8e80941Smrg        switch (in) {
472b8e80941Smrg        case PIPE_FUNC_NEVER:
473b8e80941Smrg                return MALI_FUNC_NEVER;
474b8e80941Smrg
475b8e80941Smrg        case PIPE_FUNC_LESS:
476b8e80941Smrg                return MALI_FUNC_LESS;
477b8e80941Smrg
478b8e80941Smrg        case PIPE_FUNC_EQUAL:
479b8e80941Smrg                return MALI_FUNC_EQUAL;
480b8e80941Smrg
481b8e80941Smrg        case PIPE_FUNC_LEQUAL:
482b8e80941Smrg                return MALI_FUNC_LEQUAL;
483b8e80941Smrg
484b8e80941Smrg        case PIPE_FUNC_GREATER:
485b8e80941Smrg                return MALI_FUNC_GREATER;
486b8e80941Smrg
487b8e80941Smrg        case PIPE_FUNC_NOTEQUAL:
488b8e80941Smrg                return MALI_FUNC_NOTEQUAL;
489b8e80941Smrg
490b8e80941Smrg        case PIPE_FUNC_GEQUAL:
491b8e80941Smrg                return MALI_FUNC_GEQUAL;
492b8e80941Smrg
493b8e80941Smrg        case PIPE_FUNC_ALWAYS:
494b8e80941Smrg                return MALI_FUNC_ALWAYS;
495b8e80941Smrg        }
496b8e80941Smrg
497b8e80941Smrg        assert (0);
498b8e80941Smrg        return 0; /* Unreachable */
499b8e80941Smrg}
500b8e80941Smrg
501b8e80941Smrgstatic unsigned
502b8e80941Smrgpanfrost_translate_alt_compare_func(enum pipe_compare_func in)
503b8e80941Smrg{
504b8e80941Smrg        switch (in) {
505b8e80941Smrg        case PIPE_FUNC_NEVER:
506b8e80941Smrg                return MALI_ALT_FUNC_NEVER;
507b8e80941Smrg
508b8e80941Smrg        case PIPE_FUNC_LESS:
509b8e80941Smrg                return MALI_ALT_FUNC_LESS;
510b8e80941Smrg
511b8e80941Smrg        case PIPE_FUNC_EQUAL:
512b8e80941Smrg                return MALI_ALT_FUNC_EQUAL;
513b8e80941Smrg
514b8e80941Smrg        case PIPE_FUNC_LEQUAL:
515b8e80941Smrg                return MALI_ALT_FUNC_LEQUAL;
516b8e80941Smrg
517b8e80941Smrg        case PIPE_FUNC_GREATER:
518b8e80941Smrg                return MALI_ALT_FUNC_GREATER;
519b8e80941Smrg
520b8e80941Smrg        case PIPE_FUNC_NOTEQUAL:
521b8e80941Smrg                return MALI_ALT_FUNC_NOTEQUAL;
522b8e80941Smrg
523b8e80941Smrg        case PIPE_FUNC_GEQUAL:
524b8e80941Smrg                return MALI_ALT_FUNC_GEQUAL;
525b8e80941Smrg
526b8e80941Smrg        case PIPE_FUNC_ALWAYS:
527b8e80941Smrg                return MALI_ALT_FUNC_ALWAYS;
528b8e80941Smrg        }
529b8e80941Smrg
530b8e80941Smrg        assert (0);
531b8e80941Smrg        return 0; /* Unreachable */
532b8e80941Smrg}
533b8e80941Smrg
534b8e80941Smrgstatic unsigned
535b8e80941Smrgpanfrost_translate_stencil_op(enum pipe_stencil_op in)
536b8e80941Smrg{
537b8e80941Smrg        switch (in) {
538b8e80941Smrg        case PIPE_STENCIL_OP_KEEP:
539b8e80941Smrg                return MALI_STENCIL_KEEP;
540b8e80941Smrg
541b8e80941Smrg        case PIPE_STENCIL_OP_ZERO:
542b8e80941Smrg                return MALI_STENCIL_ZERO;
543b8e80941Smrg
544b8e80941Smrg        case PIPE_STENCIL_OP_REPLACE:
545b8e80941Smrg                return MALI_STENCIL_REPLACE;
546b8e80941Smrg
547b8e80941Smrg        case PIPE_STENCIL_OP_INCR:
548b8e80941Smrg                return MALI_STENCIL_INCR;
549b8e80941Smrg
550b8e80941Smrg        case PIPE_STENCIL_OP_DECR:
551b8e80941Smrg                return MALI_STENCIL_DECR;
552b8e80941Smrg
553b8e80941Smrg        case PIPE_STENCIL_OP_INCR_WRAP:
554b8e80941Smrg                return MALI_STENCIL_INCR_WRAP;
555b8e80941Smrg
556b8e80941Smrg        case PIPE_STENCIL_OP_DECR_WRAP:
557b8e80941Smrg                return MALI_STENCIL_DECR_WRAP;
558b8e80941Smrg
559b8e80941Smrg        case PIPE_STENCIL_OP_INVERT:
560b8e80941Smrg                return MALI_STENCIL_INVERT;
561b8e80941Smrg        }
562b8e80941Smrg
563b8e80941Smrg        assert (0);
564b8e80941Smrg        return 0; /* Unreachable */
565b8e80941Smrg}
566b8e80941Smrg
567b8e80941Smrgstatic void
568b8e80941Smrgpanfrost_make_stencil_state(const struct pipe_stencil_state *in, struct mali_stencil_test *out)
569b8e80941Smrg{
570b8e80941Smrg        out->ref = 0; /* Gallium gets it from elsewhere */
571b8e80941Smrg
572b8e80941Smrg        out->mask = in->valuemask;
573b8e80941Smrg        out->func = panfrost_translate_compare_func(in->func);
574b8e80941Smrg        out->sfail = panfrost_translate_stencil_op(in->fail_op);
575b8e80941Smrg        out->dpfail = panfrost_translate_stencil_op(in->zfail_op);
576b8e80941Smrg        out->dppass = panfrost_translate_stencil_op(in->zpass_op);
577b8e80941Smrg}
578b8e80941Smrg
579b8e80941Smrgstatic void
580b8e80941Smrgpanfrost_default_shader_backend(struct panfrost_context *ctx)
581b8e80941Smrg{
582b8e80941Smrg        struct mali_shader_meta shader = {
583b8e80941Smrg                .alpha_coverage = ~MALI_ALPHA_COVERAGE(0.000000),
584b8e80941Smrg
585b8e80941Smrg                .unknown2_3 = MALI_DEPTH_FUNC(MALI_FUNC_ALWAYS) | 0x3010,
586b8e80941Smrg                .unknown2_4 = MALI_NO_MSAA | 0x4e0,
587b8e80941Smrg        };
588b8e80941Smrg
589b8e80941Smrg	if (ctx->is_t6xx) {
590b8e80941Smrg                shader.unknown2_4 |= 0x10;
591b8e80941Smrg	}
592b8e80941Smrg
593b8e80941Smrg        struct pipe_stencil_state default_stencil = {
594b8e80941Smrg                .enabled = 0,
595b8e80941Smrg                .func = PIPE_FUNC_ALWAYS,
596b8e80941Smrg                .fail_op = MALI_STENCIL_KEEP,
597b8e80941Smrg                .zfail_op = MALI_STENCIL_KEEP,
598b8e80941Smrg                .zpass_op = MALI_STENCIL_KEEP,
599b8e80941Smrg                .writemask = 0xFF,
600b8e80941Smrg                .valuemask = 0xFF
601b8e80941Smrg        };
602b8e80941Smrg
603b8e80941Smrg        panfrost_make_stencil_state(&default_stencil, &shader.stencil_front);
604b8e80941Smrg        shader.stencil_mask_front = default_stencil.writemask;
605b8e80941Smrg
606b8e80941Smrg        panfrost_make_stencil_state(&default_stencil, &shader.stencil_back);
607b8e80941Smrg        shader.stencil_mask_back = default_stencil.writemask;
608b8e80941Smrg
609b8e80941Smrg        if (default_stencil.enabled)
610b8e80941Smrg                shader.unknown2_4 |= MALI_STENCIL_TEST;
611b8e80941Smrg
612b8e80941Smrg        memcpy(&ctx->fragment_shader_core, &shader, sizeof(shader));
613b8e80941Smrg}
614b8e80941Smrg
615b8e80941Smrg/* Generates a vertex/tiler job. This is, in some sense, the heart of the
616b8e80941Smrg * graphics command stream. It should be called once per draw, accordding to
617b8e80941Smrg * presentations. Set is_tiler for "tiler" jobs (fragment shader jobs, but in
618b8e80941Smrg * Mali parlance, "fragment" refers to framebuffer writeout). Clear it for
619b8e80941Smrg * vertex jobs. */
620b8e80941Smrg
621b8e80941Smrgstruct panfrost_transfer
622b8e80941Smrgpanfrost_vertex_tiler_job(struct panfrost_context *ctx, bool is_tiler, bool is_elided_tiler)
623b8e80941Smrg{
624b8e80941Smrg        /* Each draw call corresponds to two jobs, and we want to offset to leave room for the set-value job */
625b8e80941Smrg        int draw_job_index = 1 + (2 * ctx->draw_count);
626b8e80941Smrg
627b8e80941Smrg        struct mali_job_descriptor_header job = {
628b8e80941Smrg                .job_type = is_tiler ? JOB_TYPE_TILER : JOB_TYPE_VERTEX,
629b8e80941Smrg                .job_index = draw_job_index + (is_tiler ? 1 : 0),
630b8e80941Smrg#ifdef __LP64__
631b8e80941Smrg                .job_descriptor_size = 1,
632b8e80941Smrg#endif
633b8e80941Smrg        };
634b8e80941Smrg
635b8e80941Smrg        /* Only non-elided tiler jobs have dependencies which are known at this point */
636b8e80941Smrg
637b8e80941Smrg        if (is_tiler && !is_elided_tiler) {
638b8e80941Smrg                /* Tiler jobs depend on vertex jobs */
639b8e80941Smrg
640b8e80941Smrg                job.job_dependency_index_1 = draw_job_index;
641b8e80941Smrg
642b8e80941Smrg                /* Tiler jobs also depend on the previous tiler job */
643b8e80941Smrg
644b8e80941Smrg                if (ctx->draw_count)
645b8e80941Smrg                        job.job_dependency_index_2 = draw_job_index - 1;
646b8e80941Smrg        }
647b8e80941Smrg
648b8e80941Smrg        struct midgard_payload_vertex_tiler *payload = is_tiler ? &ctx->payload_tiler : &ctx->payload_vertex;
649b8e80941Smrg
650b8e80941Smrg        /* There's some padding hacks on 32-bit */
651b8e80941Smrg
652b8e80941Smrg#ifdef __LP64__
653b8e80941Smrg        int offset = 0;
654b8e80941Smrg#else
655b8e80941Smrg        int offset = 4;
656b8e80941Smrg#endif
657b8e80941Smrg        struct panfrost_transfer transfer = panfrost_allocate_transient(ctx, sizeof(job) + sizeof(*payload));
658b8e80941Smrg        memcpy(transfer.cpu, &job, sizeof(job));
659b8e80941Smrg        memcpy(transfer.cpu + sizeof(job) - offset, payload, sizeof(*payload));
660b8e80941Smrg        return transfer;
661b8e80941Smrg}
662b8e80941Smrg
663b8e80941Smrg/* Generates a set value job. It's unclear what exactly this does, why it's
664b8e80941Smrg * necessary, and when to call it. */
665b8e80941Smrg
666b8e80941Smrgstatic void
667b8e80941Smrgpanfrost_set_value_job(struct panfrost_context *ctx)
668b8e80941Smrg{
669b8e80941Smrg        struct mali_job_descriptor_header job = {
670b8e80941Smrg                .job_type = JOB_TYPE_SET_VALUE,
671b8e80941Smrg                .job_descriptor_size = 1,
672b8e80941Smrg                .job_index = 1 + (2 * ctx->draw_count),
673b8e80941Smrg        };
674b8e80941Smrg
675b8e80941Smrg        struct mali_payload_set_value payload = {
676b8e80941Smrg                .out = ctx->misc_0.gpu,
677b8e80941Smrg                .unknown = 0x3,
678b8e80941Smrg        };
679b8e80941Smrg
680b8e80941Smrg        struct panfrost_transfer transfer = panfrost_allocate_transient(ctx, sizeof(job) + sizeof(payload));
681b8e80941Smrg        memcpy(transfer.cpu, &job, sizeof(job));
682b8e80941Smrg        memcpy(transfer.cpu + sizeof(job), &payload, sizeof(payload));
683b8e80941Smrg
684b8e80941Smrg        ctx->u_set_value_job = (struct mali_job_descriptor_header *) transfer.cpu;
685b8e80941Smrg        ctx->set_value_job = transfer.gpu;
686b8e80941Smrg}
687b8e80941Smrg
688b8e80941Smrgstatic mali_ptr
689b8e80941Smrgpanfrost_emit_varyings(
690b8e80941Smrg                struct panfrost_context *ctx,
691b8e80941Smrg                union mali_attr *slot,
692b8e80941Smrg                unsigned stride,
693b8e80941Smrg                unsigned count)
694b8e80941Smrg{
695b8e80941Smrg        mali_ptr varying_address = ctx->varying_mem.gpu + ctx->varying_height;
696b8e80941Smrg
697b8e80941Smrg        /* Fill out the descriptor */
698b8e80941Smrg        slot->elements = varying_address | MALI_ATTR_LINEAR;
699b8e80941Smrg        slot->stride = stride;
700b8e80941Smrg        slot->size = stride * count;
701b8e80941Smrg
702b8e80941Smrg        ctx->varying_height += ALIGN(slot->size, 64);
703b8e80941Smrg        assert(ctx->varying_height < ctx->varying_mem.size);
704b8e80941Smrg
705b8e80941Smrg        return varying_address;
706b8e80941Smrg}
707b8e80941Smrg
708b8e80941Smrgstatic void
709b8e80941Smrgpanfrost_emit_point_coord(union mali_attr *slot)
710b8e80941Smrg{
711b8e80941Smrg        slot->elements = MALI_VARYING_POINT_COORD | MALI_ATTR_LINEAR;
712b8e80941Smrg        slot->stride = slot->size = 0;
713b8e80941Smrg}
714b8e80941Smrg
715b8e80941Smrgstatic void
716b8e80941Smrgpanfrost_emit_varying_descriptor(
717b8e80941Smrg                struct panfrost_context *ctx,
718b8e80941Smrg                unsigned invocation_count)
719b8e80941Smrg{
720b8e80941Smrg        /* Load the shaders */
721b8e80941Smrg
722b8e80941Smrg        struct panfrost_shader_state *vs = &ctx->vs->variants[ctx->vs->active_variant];
723b8e80941Smrg        struct panfrost_shader_state *fs = &ctx->fs->variants[ctx->fs->active_variant];
724b8e80941Smrg
725b8e80941Smrg        /* Allocate the varying descriptor */
726b8e80941Smrg
727b8e80941Smrg        size_t vs_size = sizeof(struct mali_attr_meta) * vs->tripipe->varying_count;
728b8e80941Smrg        size_t fs_size = sizeof(struct mali_attr_meta) * fs->tripipe->varying_count;
729b8e80941Smrg
730b8e80941Smrg        struct panfrost_transfer trans = panfrost_allocate_transient(ctx,
731b8e80941Smrg                        vs_size + fs_size);
732b8e80941Smrg
733b8e80941Smrg        memcpy(trans.cpu, vs->varyings, vs_size);
734b8e80941Smrg        memcpy(trans.cpu + vs_size, fs->varyings, fs_size);
735b8e80941Smrg
736b8e80941Smrg        ctx->payload_vertex.postfix.varying_meta = trans.gpu;
737b8e80941Smrg        ctx->payload_tiler.postfix.varying_meta = trans.gpu + vs_size;
738b8e80941Smrg
739b8e80941Smrg        /* Buffer indices must be in this order per our convention */
740b8e80941Smrg        union mali_attr varyings[PIPE_MAX_ATTRIBS];
741b8e80941Smrg        unsigned idx = 0;
742b8e80941Smrg
743b8e80941Smrg        /* General varyings -- use the VS's, since those are more likely to be
744b8e80941Smrg         * accurate on desktop */
745b8e80941Smrg
746b8e80941Smrg        panfrost_emit_varyings(ctx, &varyings[idx++],
747b8e80941Smrg                        vs->general_varying_stride, invocation_count);
748b8e80941Smrg
749b8e80941Smrg        /* fp32 vec4 gl_Position */
750b8e80941Smrg        ctx->payload_tiler.postfix.position_varying =
751b8e80941Smrg                panfrost_emit_varyings(ctx, &varyings[idx++],
752b8e80941Smrg                                sizeof(float) * 4, invocation_count);
753b8e80941Smrg
754b8e80941Smrg
755b8e80941Smrg        if (vs->writes_point_size || fs->reads_point_coord) {
756b8e80941Smrg                /* fp16 vec1 gl_PointSize */
757b8e80941Smrg                ctx->payload_tiler.primitive_size.pointer =
758b8e80941Smrg                        panfrost_emit_varyings(ctx, &varyings[idx++],
759b8e80941Smrg                                        2, invocation_count);
760b8e80941Smrg        }
761b8e80941Smrg
762b8e80941Smrg        if (fs->reads_point_coord) {
763b8e80941Smrg                /* Special descriptor */
764b8e80941Smrg                panfrost_emit_point_coord(&varyings[idx++]);
765b8e80941Smrg        }
766b8e80941Smrg
767b8e80941Smrg        mali_ptr varyings_p = panfrost_upload_transient(ctx, &varyings, idx * sizeof(union mali_attr));
768b8e80941Smrg        ctx->payload_vertex.postfix.varyings = varyings_p;
769b8e80941Smrg        ctx->payload_tiler.postfix.varyings = varyings_p;
770b8e80941Smrg}
771b8e80941Smrg
772b8e80941Smrgstatic mali_ptr
773b8e80941Smrgpanfrost_vertex_buffer_address(struct panfrost_context *ctx, unsigned i)
774b8e80941Smrg{
775b8e80941Smrg        struct pipe_vertex_buffer *buf = &ctx->vertex_buffers[i];
776b8e80941Smrg        struct panfrost_resource *rsrc = (struct panfrost_resource *) (buf->buffer.resource);
777b8e80941Smrg
778b8e80941Smrg        return rsrc->bo->gpu + buf->buffer_offset;
779b8e80941Smrg}
780b8e80941Smrg
781b8e80941Smrg/* Emits attributes and varying descriptors, which should be called every draw,
782b8e80941Smrg * excepting some obscure circumstances */
783b8e80941Smrg
784b8e80941Smrgstatic void
785b8e80941Smrgpanfrost_emit_vertex_data(struct panfrost_context *ctx, struct panfrost_job *job)
786b8e80941Smrg{
787b8e80941Smrg        /* Staged mali_attr, and index into them. i =/= k, depending on the
788b8e80941Smrg         * vertex buffer mask */
789b8e80941Smrg        union mali_attr attrs[PIPE_MAX_ATTRIBS];
790b8e80941Smrg        unsigned k = 0;
791b8e80941Smrg
792b8e80941Smrg        unsigned invocation_count = MALI_NEGATIVE(ctx->payload_tiler.prefix.invocation_count);
793b8e80941Smrg
794b8e80941Smrg        for (int i = 0; i < ARRAY_SIZE(ctx->vertex_buffers); ++i) {
795b8e80941Smrg                if (!(ctx->vb_mask & (1 << i))) continue;
796b8e80941Smrg
797b8e80941Smrg                struct pipe_vertex_buffer *buf = &ctx->vertex_buffers[i];
798b8e80941Smrg                struct panfrost_resource *rsrc = (struct panfrost_resource *) (buf->buffer.resource);
799b8e80941Smrg
800b8e80941Smrg                if (!rsrc) continue;
801b8e80941Smrg
802b8e80941Smrg                /* Align to 64 bytes by masking off the lower bits. This
803b8e80941Smrg                 * will be adjusted back when we fixup the src_offset in
804b8e80941Smrg                 * mali_attr_meta */
805b8e80941Smrg
806b8e80941Smrg                mali_ptr addr = panfrost_vertex_buffer_address(ctx, i) & ~63;
807b8e80941Smrg
808b8e80941Smrg                /* Offset vertex count by draw_start to make sure we upload enough */
809b8e80941Smrg                attrs[k].stride = buf->stride;
810b8e80941Smrg                attrs[k].size = rsrc->base.width0;
811b8e80941Smrg
812b8e80941Smrg                panfrost_job_add_bo(job, rsrc->bo);
813b8e80941Smrg                attrs[k].elements = addr | MALI_ATTR_LINEAR;
814b8e80941Smrg
815b8e80941Smrg                ++k;
816b8e80941Smrg        }
817b8e80941Smrg
818b8e80941Smrg        ctx->payload_vertex.postfix.attributes = panfrost_upload_transient(ctx, attrs, k * sizeof(union mali_attr));
819b8e80941Smrg
820b8e80941Smrg        panfrost_emit_varying_descriptor(ctx, invocation_count);
821b8e80941Smrg}
822b8e80941Smrg
823b8e80941Smrgstatic bool
824b8e80941Smrgpanfrost_writes_point_size(struct panfrost_context *ctx)
825b8e80941Smrg{
826b8e80941Smrg        assert(ctx->vs);
827b8e80941Smrg        struct panfrost_shader_state *vs = &ctx->vs->variants[ctx->vs->active_variant];
828b8e80941Smrg
829b8e80941Smrg        return vs->writes_point_size && ctx->payload_tiler.prefix.draw_mode == MALI_POINTS;
830b8e80941Smrg}
831b8e80941Smrg
832b8e80941Smrg/* Stage the attribute descriptors so we can adjust src_offset
833b8e80941Smrg * to let BOs align nicely */
834b8e80941Smrg
835b8e80941Smrgstatic void
836b8e80941Smrgpanfrost_stage_attributes(struct panfrost_context *ctx)
837b8e80941Smrg{
838b8e80941Smrg        struct panfrost_vertex_state *so = ctx->vertex;
839b8e80941Smrg
840b8e80941Smrg        size_t sz = sizeof(struct mali_attr_meta) * so->num_elements;
841b8e80941Smrg        struct panfrost_transfer transfer = panfrost_allocate_transient(ctx, sz);
842b8e80941Smrg        struct mali_attr_meta *target = (struct mali_attr_meta *) transfer.cpu;
843b8e80941Smrg
844b8e80941Smrg        /* Copy as-is for the first pass */
845b8e80941Smrg        memcpy(target, so->hw, sz);
846b8e80941Smrg
847b8e80941Smrg        /* Fixup offsets for the second pass. Recall that the hardware
848b8e80941Smrg         * calculates attribute addresses as:
849b8e80941Smrg         *
850b8e80941Smrg         *      addr = base + (stride * vtx) + src_offset;
851b8e80941Smrg         *
852b8e80941Smrg         * However, on Mali, base must be aligned to 64-bytes, so we
853b8e80941Smrg         * instead let:
854b8e80941Smrg         *
855b8e80941Smrg         *      base' = base & ~63 = base - (base & 63)
856b8e80941Smrg         *
857b8e80941Smrg         * To compensate when using base' (see emit_vertex_data), we have
858b8e80941Smrg         * to adjust src_offset by the masked off piece:
859b8e80941Smrg         *
860b8e80941Smrg         *      addr' = base' + (stride * vtx) + (src_offset + (base & 63))
861b8e80941Smrg         *            = base - (base & 63) + (stride * vtx) + src_offset + (base & 63)
862b8e80941Smrg         *            = base + (stride * vtx) + src_offset
863b8e80941Smrg         *            = addr;
864b8e80941Smrg         *
865b8e80941Smrg         * QED.
866b8e80941Smrg         */
867b8e80941Smrg
868b8e80941Smrg        for (unsigned i = 0; i < so->num_elements; ++i) {
869b8e80941Smrg                unsigned vbi = so->pipe[i].vertex_buffer_index;
870b8e80941Smrg                mali_ptr addr = panfrost_vertex_buffer_address(ctx, vbi);
871b8e80941Smrg
872b8e80941Smrg                /* Adjust by the masked off bits of the offset */
873b8e80941Smrg                target[i].src_offset += (addr & 63);
874b8e80941Smrg        }
875b8e80941Smrg
876b8e80941Smrg        ctx->payload_vertex.postfix.attribute_meta = transfer.gpu;
877b8e80941Smrg}
878b8e80941Smrg
879b8e80941Smrg/* Go through dirty flags and actualise them in the cmdstream. */
880b8e80941Smrg
881b8e80941Smrgvoid
882b8e80941Smrgpanfrost_emit_for_draw(struct panfrost_context *ctx, bool with_vertex_data)
883b8e80941Smrg{
884b8e80941Smrg        struct panfrost_job *job = panfrost_get_job_for_fbo(ctx);
885b8e80941Smrg
886b8e80941Smrg        if (with_vertex_data) {
887b8e80941Smrg                panfrost_emit_vertex_data(ctx, job);
888b8e80941Smrg        }
889b8e80941Smrg
890b8e80941Smrg        bool msaa = ctx->rasterizer->base.multisample;
891b8e80941Smrg
892b8e80941Smrg        if (ctx->dirty & PAN_DIRTY_RASTERIZER) {
893b8e80941Smrg                ctx->payload_tiler.gl_enables = ctx->rasterizer->tiler_gl_enables;
894b8e80941Smrg
895b8e80941Smrg                /* TODO: Sample size */
896b8e80941Smrg                SET_BIT(ctx->fragment_shader_core.unknown2_3, MALI_HAS_MSAA, msaa);
897b8e80941Smrg                SET_BIT(ctx->fragment_shader_core.unknown2_4, MALI_NO_MSAA, !msaa);
898b8e80941Smrg        }
899b8e80941Smrg
900b8e80941Smrg        /* Enable job requirements at draw-time */
901b8e80941Smrg
902b8e80941Smrg        if (msaa)
903b8e80941Smrg                job->requirements |= PAN_REQ_MSAA;
904b8e80941Smrg
905b8e80941Smrg        if (ctx->depth_stencil->depth.writemask)
906b8e80941Smrg                job->requirements |= PAN_REQ_DEPTH_WRITE;
907b8e80941Smrg
908b8e80941Smrg        if (ctx->occlusion_query) {
909b8e80941Smrg                ctx->payload_tiler.gl_enables |= MALI_OCCLUSION_QUERY | MALI_OCCLUSION_PRECISE;
910b8e80941Smrg                ctx->payload_tiler.postfix.occlusion_counter = ctx->occlusion_query->transfer.gpu;
911b8e80941Smrg        }
912b8e80941Smrg
913b8e80941Smrg        if (ctx->dirty & PAN_DIRTY_VS) {
914b8e80941Smrg                assert(ctx->vs);
915b8e80941Smrg
916b8e80941Smrg                struct panfrost_shader_state *vs = &ctx->vs->variants[ctx->vs->active_variant];
917b8e80941Smrg
918b8e80941Smrg                /* Late shader descriptor assignments */
919b8e80941Smrg
920b8e80941Smrg                vs->tripipe->texture_count = ctx->sampler_view_count[PIPE_SHADER_VERTEX];
921b8e80941Smrg                vs->tripipe->sampler_count = ctx->sampler_count[PIPE_SHADER_VERTEX];
922b8e80941Smrg
923b8e80941Smrg                /* Who knows */
924b8e80941Smrg                vs->tripipe->midgard1.unknown1 = 0x2201;
925b8e80941Smrg
926b8e80941Smrg                ctx->payload_vertex.postfix._shader_upper = vs->tripipe_gpu >> 4;
927b8e80941Smrg        }
928b8e80941Smrg
929b8e80941Smrg        if (ctx->dirty & (PAN_DIRTY_RASTERIZER | PAN_DIRTY_VS)) {
930b8e80941Smrg                /* Check if we need to link the gl_PointSize varying */
931b8e80941Smrg                if (!panfrost_writes_point_size(ctx)) {
932b8e80941Smrg                        /* If the size is constant, write it out. Otherwise,
933b8e80941Smrg                         * don't touch primitive_size (since we would clobber
934b8e80941Smrg                         * the pointer there) */
935b8e80941Smrg
936b8e80941Smrg                        ctx->payload_tiler.primitive_size.constant = ctx->rasterizer->base.line_width;
937b8e80941Smrg                }
938b8e80941Smrg        }
939b8e80941Smrg
940b8e80941Smrg        /* TODO: Maybe dirty track FS, maybe not. For now, it's transient. */
941b8e80941Smrg        if (ctx->fs)
942b8e80941Smrg                ctx->dirty |= PAN_DIRTY_FS;
943b8e80941Smrg
944b8e80941Smrg        if (ctx->dirty & PAN_DIRTY_FS) {
945b8e80941Smrg                assert(ctx->fs);
946b8e80941Smrg                struct panfrost_shader_state *variant = &ctx->fs->variants[ctx->fs->active_variant];
947b8e80941Smrg
948b8e80941Smrg#define COPY(name) ctx->fragment_shader_core.name = variant->tripipe->name
949b8e80941Smrg
950b8e80941Smrg                COPY(shader);
951b8e80941Smrg                COPY(attribute_count);
952b8e80941Smrg                COPY(varying_count);
953b8e80941Smrg                COPY(midgard1.uniform_count);
954b8e80941Smrg                COPY(midgard1.work_count);
955b8e80941Smrg                COPY(midgard1.unknown2);
956b8e80941Smrg
957b8e80941Smrg#undef COPY
958b8e80941Smrg                /* If there is a blend shader, work registers are shared */
959b8e80941Smrg
960b8e80941Smrg                if (ctx->blend->has_blend_shader)
961b8e80941Smrg                        ctx->fragment_shader_core.midgard1.work_count = /*MAX2(ctx->fragment_shader_core.midgard1.work_count, ctx->blend->blend_work_count)*/16;
962b8e80941Smrg
963b8e80941Smrg                /* Set late due to depending on render state */
964b8e80941Smrg                /* The one at the end seems to mean "1 UBO" */
965b8e80941Smrg                ctx->fragment_shader_core.midgard1.unknown1 = MALI_NO_ALPHA_TO_COVERAGE | 0x200 | 0x2201;
966b8e80941Smrg
967b8e80941Smrg                /* Assign texture/sample count right before upload */
968b8e80941Smrg                ctx->fragment_shader_core.texture_count = ctx->sampler_view_count[PIPE_SHADER_FRAGMENT];
969b8e80941Smrg                ctx->fragment_shader_core.sampler_count = ctx->sampler_count[PIPE_SHADER_FRAGMENT];
970b8e80941Smrg
971b8e80941Smrg                /* Assign the stencil refs late */
972b8e80941Smrg                ctx->fragment_shader_core.stencil_front.ref = ctx->stencil_ref.ref_value[0];
973b8e80941Smrg                ctx->fragment_shader_core.stencil_back.ref = ctx->stencil_ref.ref_value[1];
974b8e80941Smrg
975b8e80941Smrg                /* CAN_DISCARD should be set if the fragment shader possibly
976b8e80941Smrg                 * contains a 'discard' instruction. It is likely this is
977b8e80941Smrg                 * related to optimizations related to forward-pixel kill, as
978b8e80941Smrg                 * per "Mali Performance 3: Is EGL_BUFFER_PRESERVED a good
979b8e80941Smrg                 * thing?" by Peter Harris
980b8e80941Smrg                 */
981b8e80941Smrg
982b8e80941Smrg                if (variant->can_discard) {
983b8e80941Smrg                        ctx->fragment_shader_core.unknown2_3 |= MALI_CAN_DISCARD;
984b8e80941Smrg                        ctx->fragment_shader_core.midgard1.unknown1 &= ~MALI_NO_ALPHA_TO_COVERAGE;
985b8e80941Smrg                        ctx->fragment_shader_core.midgard1.unknown1 |= 0x4000;
986b8e80941Smrg                        ctx->fragment_shader_core.midgard1.unknown1 = 0x4200;
987b8e80941Smrg                }
988b8e80941Smrg
989b8e80941Smrg		/* Check if we're using the default blend descriptor (fast path) */
990b8e80941Smrg
991b8e80941Smrg		bool no_blending =
992b8e80941Smrg			!ctx->blend->has_blend_shader &&
993b8e80941Smrg			(ctx->blend->equation.rgb_mode == 0x122) &&
994b8e80941Smrg			(ctx->blend->equation.alpha_mode == 0x122) &&
995b8e80941Smrg			(ctx->blend->equation.color_mask == 0xf);
996b8e80941Smrg
997b8e80941Smrg                /* Even on MFBD, the shader descriptor gets blend shaders. It's
998b8e80941Smrg                 * *also* copied to the blend_meta appended (by convention),
999b8e80941Smrg                 * but this is the field actually read by the hardware. (Or
1000b8e80941Smrg                 * maybe both are read...?) */
1001b8e80941Smrg
1002b8e80941Smrg                if (ctx->blend->has_blend_shader) {
1003b8e80941Smrg                        ctx->fragment_shader_core.blend_shader = ctx->blend->blend_shader;
1004b8e80941Smrg                }
1005b8e80941Smrg
1006b8e80941Smrg                if (ctx->require_sfbd) {
1007b8e80941Smrg                        /* When only a single render target platform is used, the blend
1008b8e80941Smrg                         * information is inside the shader meta itself. We
1009b8e80941Smrg                         * additionally need to signal CAN_DISCARD for nontrivial blend
1010b8e80941Smrg                         * modes (so we're able to read back the destination buffer) */
1011b8e80941Smrg
1012b8e80941Smrg                        if (!ctx->blend->has_blend_shader) {
1013b8e80941Smrg                                memcpy(&ctx->fragment_shader_core.blend_equation, &ctx->blend->equation, sizeof(ctx->blend->equation));
1014b8e80941Smrg                        }
1015b8e80941Smrg
1016b8e80941Smrg                        if (!no_blending) {
1017b8e80941Smrg                                ctx->fragment_shader_core.unknown2_3 |= MALI_CAN_DISCARD;
1018b8e80941Smrg                        }
1019b8e80941Smrg                }
1020b8e80941Smrg
1021b8e80941Smrg                size_t size = sizeof(struct mali_shader_meta) + sizeof(struct mali_blend_meta);
1022b8e80941Smrg                struct panfrost_transfer transfer = panfrost_allocate_transient(ctx, size);
1023b8e80941Smrg                memcpy(transfer.cpu, &ctx->fragment_shader_core, sizeof(struct mali_shader_meta));
1024b8e80941Smrg
1025b8e80941Smrg                ctx->payload_tiler.postfix._shader_upper = (transfer.gpu) >> 4;
1026b8e80941Smrg
1027b8e80941Smrg                if (!ctx->require_sfbd) {
1028b8e80941Smrg                        /* Additional blend descriptor tacked on for jobs using MFBD */
1029b8e80941Smrg
1030b8e80941Smrg                        unsigned blend_count = 0;
1031b8e80941Smrg
1032b8e80941Smrg                        if (ctx->blend->has_blend_shader) {
1033b8e80941Smrg                                /* For a blend shader, the bottom nibble corresponds to
1034b8e80941Smrg                                 * the number of work registers used, which signals the
1035b8e80941Smrg                                 * -existence- of a blend shader */
1036b8e80941Smrg
1037b8e80941Smrg                                assert(ctx->blend->blend_work_count >= 2);
1038b8e80941Smrg                                blend_count |= MIN2(ctx->blend->blend_work_count, 3);
1039b8e80941Smrg                        } else {
1040b8e80941Smrg                                /* Otherwise, the bottom bit simply specifies if
1041b8e80941Smrg                                 * blending (anything other than REPLACE) is enabled */
1042b8e80941Smrg
1043b8e80941Smrg
1044b8e80941Smrg                                if (!no_blending)
1045b8e80941Smrg                                        blend_count |= 0x1;
1046b8e80941Smrg                        }
1047b8e80941Smrg
1048b8e80941Smrg                        /* Second blend equation is always a simple replace */
1049b8e80941Smrg
1050b8e80941Smrg                        uint64_t replace_magic = 0xf0122122;
1051b8e80941Smrg                        struct mali_blend_equation replace_mode;
1052b8e80941Smrg                        memcpy(&replace_mode, &replace_magic, sizeof(replace_mode));
1053b8e80941Smrg
1054b8e80941Smrg                        struct mali_blend_meta blend_meta[] = {
1055b8e80941Smrg                                {
1056b8e80941Smrg                                        .unk1 = 0x200 | blend_count,
1057b8e80941Smrg                                        .blend_equation_1 = ctx->blend->equation,
1058b8e80941Smrg                                        .blend_equation_2 = replace_mode
1059b8e80941Smrg                                },
1060b8e80941Smrg                        };
1061b8e80941Smrg
1062b8e80941Smrg                        if (ctx->blend->has_blend_shader) {
1063b8e80941Smrg                                blend_meta[0].blend_shader = ctx->blend->blend_shader;
1064b8e80941Smrg                        }
1065b8e80941Smrg
1066b8e80941Smrg                        memcpy(transfer.cpu + sizeof(struct mali_shader_meta), blend_meta, sizeof(blend_meta));
1067b8e80941Smrg                }
1068b8e80941Smrg        }
1069b8e80941Smrg
1070b8e80941Smrg        /* We stage to transient, so always dirty.. */
1071b8e80941Smrg        panfrost_stage_attributes(ctx);
1072b8e80941Smrg
1073b8e80941Smrg        if (ctx->dirty & PAN_DIRTY_SAMPLERS) {
1074b8e80941Smrg                /* Upload samplers back to back, no padding */
1075b8e80941Smrg
1076b8e80941Smrg                for (int t = 0; t <= PIPE_SHADER_FRAGMENT; ++t) {
1077b8e80941Smrg                        if (!ctx->sampler_count[t]) continue;
1078b8e80941Smrg
1079b8e80941Smrg                        struct panfrost_transfer transfer = panfrost_allocate_transient(ctx, sizeof(struct mali_sampler_descriptor) * ctx->sampler_count[t]);
1080b8e80941Smrg                        struct mali_sampler_descriptor *desc = (struct mali_sampler_descriptor *) transfer.cpu;
1081b8e80941Smrg
1082b8e80941Smrg                        for (int i = 0; i < ctx->sampler_count[t]; ++i) {
1083b8e80941Smrg                                desc[i] = ctx->samplers[t][i]->hw;
1084b8e80941Smrg                        }
1085b8e80941Smrg
1086b8e80941Smrg                        if (t == PIPE_SHADER_FRAGMENT)
1087b8e80941Smrg                                ctx->payload_tiler.postfix.sampler_descriptor = transfer.gpu;
1088b8e80941Smrg                        else if (t == PIPE_SHADER_VERTEX)
1089b8e80941Smrg                                ctx->payload_vertex.postfix.sampler_descriptor = transfer.gpu;
1090b8e80941Smrg                        else
1091b8e80941Smrg                                assert(0);
1092b8e80941Smrg                }
1093b8e80941Smrg        }
1094b8e80941Smrg
1095b8e80941Smrg        if (ctx->dirty & PAN_DIRTY_TEXTURES) {
1096b8e80941Smrg                for (int t = 0; t <= PIPE_SHADER_FRAGMENT; ++t) {
1097b8e80941Smrg                        /* Shortcircuit */
1098b8e80941Smrg                        if (!ctx->sampler_view_count[t]) continue;
1099b8e80941Smrg
1100b8e80941Smrg                        uint64_t trampolines[PIPE_MAX_SHADER_SAMPLER_VIEWS];
1101b8e80941Smrg
1102b8e80941Smrg                        for (int i = 0; i < ctx->sampler_view_count[t]; ++i) {
1103b8e80941Smrg                                if (!ctx->sampler_views[t][i])
1104b8e80941Smrg                                        continue;
1105b8e80941Smrg
1106b8e80941Smrg                                struct pipe_resource *tex_rsrc = ctx->sampler_views[t][i]->base.texture;
1107b8e80941Smrg                                struct panfrost_resource *rsrc = (struct panfrost_resource *) tex_rsrc;
1108b8e80941Smrg
1109b8e80941Smrg                                /* Inject the addresses in, interleaving cube
1110b8e80941Smrg                                 * faces and mip levels appropriately. */
1111b8e80941Smrg
1112b8e80941Smrg                                for (int l = 0; l <= tex_rsrc->last_level; ++l) {
1113b8e80941Smrg                                        for (int f = 0; f < tex_rsrc->array_size; ++f) {
1114b8e80941Smrg                                                unsigned idx = (l * tex_rsrc->array_size) + f;
1115b8e80941Smrg
1116b8e80941Smrg                                                ctx->sampler_views[t][i]->hw.swizzled_bitmaps[idx] =
1117b8e80941Smrg                                                        rsrc->bo->gpu +
1118b8e80941Smrg                                                        rsrc->bo->slices[l].offset +
1119b8e80941Smrg                                                        f * rsrc->bo->cubemap_stride;
1120b8e80941Smrg                                        }
1121b8e80941Smrg                                }
1122b8e80941Smrg
1123b8e80941Smrg                                trampolines[i] = panfrost_upload_transient(ctx, &ctx->sampler_views[t][i]->hw, sizeof(struct mali_texture_descriptor));
1124b8e80941Smrg                        }
1125b8e80941Smrg
1126b8e80941Smrg                        mali_ptr trampoline = panfrost_upload_transient(ctx, trampolines, sizeof(uint64_t) * ctx->sampler_view_count[t]);
1127b8e80941Smrg
1128b8e80941Smrg                        if (t == PIPE_SHADER_FRAGMENT)
1129b8e80941Smrg                                ctx->payload_tiler.postfix.texture_trampoline = trampoline;
1130b8e80941Smrg                        else if (t == PIPE_SHADER_VERTEX)
1131b8e80941Smrg                                ctx->payload_vertex.postfix.texture_trampoline = trampoline;
1132b8e80941Smrg                        else
1133b8e80941Smrg                                assert(0);
1134b8e80941Smrg                }
1135b8e80941Smrg        }
1136b8e80941Smrg
1137b8e80941Smrg        const struct pipe_viewport_state *vp = &ctx->pipe_viewport;
1138b8e80941Smrg
1139b8e80941Smrg        /* For flipped-Y buffers (signaled by negative scale), the translate is
1140b8e80941Smrg         * flipped as well */
1141b8e80941Smrg
1142b8e80941Smrg        bool invert_y = vp->scale[1] < 0.0;
1143b8e80941Smrg        float translate_y = vp->translate[1];
1144b8e80941Smrg
1145b8e80941Smrg        if (invert_y)
1146b8e80941Smrg                translate_y = ctx->pipe_framebuffer.height - translate_y;
1147b8e80941Smrg
1148b8e80941Smrg        for (int i = 0; i <= PIPE_SHADER_FRAGMENT; ++i) {
1149b8e80941Smrg                struct panfrost_constant_buffer *buf = &ctx->constant_buffer[i];
1150b8e80941Smrg
1151b8e80941Smrg                struct panfrost_shader_state *vs = &ctx->vs->variants[ctx->vs->active_variant];
1152b8e80941Smrg                struct panfrost_shader_state *fs = &ctx->fs->variants[ctx->fs->active_variant];
1153b8e80941Smrg                struct panfrost_shader_state *ss = (i == PIPE_SHADER_FRAGMENT) ? fs : vs;
1154b8e80941Smrg
1155b8e80941Smrg                /* Allocate room for the sysval and the uniforms */
1156b8e80941Smrg                size_t sys_size = sizeof(float) * 4 * ss->sysval_count;
1157b8e80941Smrg                size_t size = sys_size + buf->size;
1158b8e80941Smrg                struct panfrost_transfer transfer = panfrost_allocate_transient(ctx, size);
1159b8e80941Smrg
1160b8e80941Smrg                /* Upload sysvals requested by the shader */
1161b8e80941Smrg                float *uniforms = (float *) transfer.cpu;
1162b8e80941Smrg                for (unsigned i = 0; i < ss->sysval_count; ++i) {
1163b8e80941Smrg                        int sysval = ss->sysval[i];
1164b8e80941Smrg
1165b8e80941Smrg                        if (sysval == PAN_SYSVAL_VIEWPORT_SCALE) {
1166b8e80941Smrg                                uniforms[4*i + 0] = vp->scale[0];
1167b8e80941Smrg                                uniforms[4*i + 1] = fabsf(vp->scale[1]);
1168b8e80941Smrg                                uniforms[4*i + 2] = vp->scale[2];
1169b8e80941Smrg                        } else if (sysval == PAN_SYSVAL_VIEWPORT_OFFSET) {
1170b8e80941Smrg                                uniforms[4*i + 0] = vp->translate[0];
1171b8e80941Smrg                                uniforms[4*i + 1] = translate_y;
1172b8e80941Smrg                                uniforms[4*i + 2] = vp->translate[2];
1173b8e80941Smrg                        } else {
1174b8e80941Smrg                                assert(0);
1175b8e80941Smrg                        }
1176b8e80941Smrg                }
1177b8e80941Smrg
1178b8e80941Smrg                /* Upload uniforms */
1179b8e80941Smrg                memcpy(transfer.cpu + sys_size, buf->buffer, buf->size);
1180b8e80941Smrg
1181b8e80941Smrg                int uniform_count = 0;
1182b8e80941Smrg
1183b8e80941Smrg                struct mali_vertex_tiler_postfix *postfix;
1184b8e80941Smrg
1185b8e80941Smrg                switch (i) {
1186b8e80941Smrg                case PIPE_SHADER_VERTEX:
1187b8e80941Smrg                        uniform_count = ctx->vs->variants[ctx->vs->active_variant].uniform_count;
1188b8e80941Smrg                        postfix = &ctx->payload_vertex.postfix;
1189b8e80941Smrg                        break;
1190b8e80941Smrg
1191b8e80941Smrg                case PIPE_SHADER_FRAGMENT:
1192b8e80941Smrg                        uniform_count = ctx->fs->variants[ctx->fs->active_variant].uniform_count;
1193b8e80941Smrg                        postfix = &ctx->payload_tiler.postfix;
1194b8e80941Smrg                        break;
1195b8e80941Smrg
1196b8e80941Smrg                default:
1197b8e80941Smrg                        DBG("Unknown shader stage %d in uniform upload\n", i);
1198b8e80941Smrg                        assert(0);
1199b8e80941Smrg                }
1200b8e80941Smrg
1201b8e80941Smrg                /* Also attach the same buffer as a UBO for extended access */
1202b8e80941Smrg
1203b8e80941Smrg                struct mali_uniform_buffer_meta uniform_buffers[] = {
1204b8e80941Smrg                        {
1205b8e80941Smrg                                .size = MALI_POSITIVE((2 + uniform_count)),
1206b8e80941Smrg                                .ptr = transfer.gpu >> 2,
1207b8e80941Smrg                        },
1208b8e80941Smrg                };
1209b8e80941Smrg
1210b8e80941Smrg                mali_ptr ubufs = panfrost_upload_transient(ctx, uniform_buffers, sizeof(uniform_buffers));
1211b8e80941Smrg                postfix->uniforms = transfer.gpu;
1212b8e80941Smrg                postfix->uniform_buffers = ubufs;
1213b8e80941Smrg
1214b8e80941Smrg                buf->dirty = 0;
1215b8e80941Smrg        }
1216b8e80941Smrg
1217b8e80941Smrg        /* TODO: Upload the viewport somewhere more appropriate */
1218b8e80941Smrg
1219b8e80941Smrg        /* Clip bounds are encoded as floats. The viewport itself is encoded as
1220b8e80941Smrg         * (somewhat) asymmetric ints. */
1221b8e80941Smrg        const struct pipe_scissor_state *ss = &ctx->scissor;
1222b8e80941Smrg
1223b8e80941Smrg        struct mali_viewport view = {
1224b8e80941Smrg                /* By default, do no viewport clipping, i.e. clip to (-inf,
1225b8e80941Smrg                 * inf) in each direction. Clipping to the viewport in theory
1226b8e80941Smrg                 * should work, but in practice causes issues when we're not
1227b8e80941Smrg                 * explicitly trying to scissor */
1228b8e80941Smrg
1229b8e80941Smrg                .clip_minx = -inff,
1230b8e80941Smrg                .clip_miny = -inff,
1231b8e80941Smrg                .clip_maxx = inff,
1232b8e80941Smrg                .clip_maxy = inff,
1233b8e80941Smrg
1234b8e80941Smrg                .clip_minz = 0.0,
1235b8e80941Smrg                .clip_maxz = 1.0,
1236b8e80941Smrg        };
1237b8e80941Smrg
1238b8e80941Smrg        /* Always scissor to the viewport by default. */
1239b8e80941Smrg        view.viewport0[0] = (int) (vp->translate[0] - vp->scale[0]);
1240b8e80941Smrg        view.viewport1[0] = MALI_POSITIVE((int) (vp->translate[0] + vp->scale[0]));
1241b8e80941Smrg
1242b8e80941Smrg        view.viewport0[1] = (int) (translate_y - fabs(vp->scale[1]));
1243b8e80941Smrg        view.viewport1[1] = MALI_POSITIVE((int) (translate_y + fabs(vp->scale[1])));
1244b8e80941Smrg
1245b8e80941Smrg        if (ss && ctx->rasterizer && ctx->rasterizer->base.scissor) {
1246b8e80941Smrg                /* Invert scissor if needed */
1247b8e80941Smrg                unsigned miny = invert_y ?
1248b8e80941Smrg                        ctx->pipe_framebuffer.height - ss->maxy : ss->miny;
1249b8e80941Smrg
1250b8e80941Smrg                unsigned maxy = invert_y ?
1251b8e80941Smrg                        ctx->pipe_framebuffer.height - ss->miny : ss->maxy;
1252b8e80941Smrg
1253b8e80941Smrg                /* Set the actual scissor */
1254b8e80941Smrg                view.viewport0[0] = ss->minx;
1255b8e80941Smrg                view.viewport0[1] = miny;
1256b8e80941Smrg                view.viewport1[0] = MALI_POSITIVE(ss->maxx);
1257b8e80941Smrg                view.viewport1[1] = MALI_POSITIVE(maxy);
1258b8e80941Smrg        }
1259b8e80941Smrg
1260b8e80941Smrg        ctx->payload_tiler.postfix.viewport =
1261b8e80941Smrg                panfrost_upload_transient(ctx,
1262b8e80941Smrg                                &view,
1263b8e80941Smrg                                sizeof(struct mali_viewport));
1264b8e80941Smrg
1265b8e80941Smrg        ctx->dirty = 0;
1266b8e80941Smrg}
1267b8e80941Smrg
1268b8e80941Smrg/* Corresponds to exactly one draw, but does not submit anything */
1269b8e80941Smrg
1270b8e80941Smrgstatic void
1271b8e80941Smrgpanfrost_queue_draw(struct panfrost_context *ctx)
1272b8e80941Smrg{
1273b8e80941Smrg        /* TODO: Expand the array? */
1274b8e80941Smrg        if (ctx->draw_count >= MAX_DRAW_CALLS) {
1275b8e80941Smrg                DBG("Job buffer overflow, ignoring draw\n");
1276b8e80941Smrg                assert(0);
1277b8e80941Smrg        }
1278b8e80941Smrg
1279b8e80941Smrg        /* Handle dirty flags now */
1280b8e80941Smrg        panfrost_emit_for_draw(ctx, true);
1281b8e80941Smrg
1282b8e80941Smrg        struct panfrost_transfer vertex = panfrost_vertex_tiler_job(ctx, false, false);
1283b8e80941Smrg        struct panfrost_transfer tiler = panfrost_vertex_tiler_job(ctx, true, false);
1284b8e80941Smrg
1285b8e80941Smrg        ctx->u_vertex_jobs[ctx->vertex_job_count] = (struct mali_job_descriptor_header *) vertex.cpu;
1286b8e80941Smrg        ctx->vertex_jobs[ctx->vertex_job_count++] = vertex.gpu;
1287b8e80941Smrg
1288b8e80941Smrg        ctx->u_tiler_jobs[ctx->tiler_job_count] = (struct mali_job_descriptor_header *) tiler.cpu;
1289b8e80941Smrg        ctx->tiler_jobs[ctx->tiler_job_count++] = tiler.gpu;
1290b8e80941Smrg
1291b8e80941Smrg        ctx->draw_count++;
1292b8e80941Smrg}
1293b8e80941Smrg
1294b8e80941Smrg/* At the end of the frame, the vertex and tiler jobs are linked together and
1295b8e80941Smrg * then the fragment job is plonked at the end. Set value job is first for
1296b8e80941Smrg * unknown reasons. */
1297b8e80941Smrg
1298b8e80941Smrgstatic void
1299b8e80941Smrgpanfrost_link_job_pair(struct mali_job_descriptor_header *first, mali_ptr next)
1300b8e80941Smrg{
1301b8e80941Smrg        if (first->job_descriptor_size)
1302b8e80941Smrg                first->next_job_64 = (u64) (uintptr_t) next;
1303b8e80941Smrg        else
1304b8e80941Smrg                first->next_job_32 = (u32) (uintptr_t) next;
1305b8e80941Smrg}
1306b8e80941Smrg
1307b8e80941Smrgstatic void
1308b8e80941Smrgpanfrost_link_jobs(struct panfrost_context *ctx)
1309b8e80941Smrg{
1310b8e80941Smrg        if (ctx->draw_count) {
1311b8e80941Smrg                /* Generate the set_value_job */
1312b8e80941Smrg                panfrost_set_value_job(ctx);
1313b8e80941Smrg
1314b8e80941Smrg                /* Have the first vertex job depend on the set value job */
1315b8e80941Smrg                ctx->u_vertex_jobs[0]->job_dependency_index_1 = ctx->u_set_value_job->job_index;
1316b8e80941Smrg
1317b8e80941Smrg                /* SV -> V */
1318b8e80941Smrg                panfrost_link_job_pair(ctx->u_set_value_job, ctx->vertex_jobs[0]);
1319b8e80941Smrg        }
1320b8e80941Smrg
1321b8e80941Smrg        /* V -> V/T ; T -> T/null */
1322b8e80941Smrg        for (int i = 0; i < ctx->vertex_job_count; ++i) {
1323b8e80941Smrg                bool isLast = (i + 1) == ctx->vertex_job_count;
1324b8e80941Smrg
1325b8e80941Smrg                panfrost_link_job_pair(ctx->u_vertex_jobs[i], isLast ? ctx->tiler_jobs[0] : ctx->vertex_jobs[i + 1]);
1326b8e80941Smrg        }
1327b8e80941Smrg
1328b8e80941Smrg        /* T -> T/null */
1329b8e80941Smrg        for (int i = 0; i < ctx->tiler_job_count; ++i) {
1330b8e80941Smrg                bool isLast = (i + 1) == ctx->tiler_job_count;
1331b8e80941Smrg                panfrost_link_job_pair(ctx->u_tiler_jobs[i], isLast ? 0 : ctx->tiler_jobs[i + 1]);
1332b8e80941Smrg        }
1333b8e80941Smrg}
1334b8e80941Smrg
1335b8e80941Smrg/* The entire frame is in memory -- send it off to the kernel! */
1336b8e80941Smrg
1337b8e80941Smrgstatic void
1338b8e80941Smrgpanfrost_submit_frame(struct panfrost_context *ctx, bool flush_immediate,
1339b8e80941Smrg		      struct pipe_fence_handle **fence,
1340b8e80941Smrg                      struct panfrost_job *job)
1341b8e80941Smrg{
1342b8e80941Smrg        struct pipe_context *gallium = (struct pipe_context *) ctx;
1343b8e80941Smrg        struct panfrost_screen *screen = pan_screen(gallium->screen);
1344b8e80941Smrg
1345b8e80941Smrg        /* Edge case if screen is cleared and nothing else */
1346b8e80941Smrg        bool has_draws = ctx->draw_count > 0;
1347b8e80941Smrg
1348b8e80941Smrg        /* Workaround a bizarre lockup (a hardware errata?) */
1349b8e80941Smrg        if (!has_draws)
1350b8e80941Smrg                flush_immediate = true;
1351b8e80941Smrg
1352b8e80941Smrg        /* A number of jobs are batched -- this must be linked and cleared */
1353b8e80941Smrg        panfrost_link_jobs(ctx);
1354b8e80941Smrg
1355b8e80941Smrg        ctx->draw_count = 0;
1356b8e80941Smrg        ctx->vertex_job_count = 0;
1357b8e80941Smrg        ctx->tiler_job_count = 0;
1358b8e80941Smrg
1359b8e80941Smrg#ifndef DRY_RUN
1360b8e80941Smrg
1361b8e80941Smrg        bool is_scanout = panfrost_is_scanout(ctx);
1362b8e80941Smrg        screen->driver->submit_vs_fs_job(ctx, has_draws, is_scanout);
1363b8e80941Smrg
1364b8e80941Smrg        /* If visual, we can stall a frame */
1365b8e80941Smrg
1366b8e80941Smrg        if (!flush_immediate)
1367b8e80941Smrg                screen->driver->force_flush_fragment(ctx, fence);
1368b8e80941Smrg
1369b8e80941Smrg        screen->last_fragment_flushed = false;
1370b8e80941Smrg        screen->last_job = job;
1371b8e80941Smrg
1372b8e80941Smrg        /* If readback, flush now (hurts the pipelined performance) */
1373b8e80941Smrg        if (flush_immediate)
1374b8e80941Smrg                screen->driver->force_flush_fragment(ctx, fence);
1375b8e80941Smrg
1376b8e80941Smrg        if (screen->driver->dump_counters && pan_counters_base) {
1377b8e80941Smrg                screen->driver->dump_counters(screen);
1378b8e80941Smrg
1379b8e80941Smrg                char filename[128];
1380b8e80941Smrg                snprintf(filename, sizeof(filename), "%s/frame%d.mdgprf", pan_counters_base, ++performance_counter_number);
1381b8e80941Smrg                FILE *fp = fopen(filename, "wb");
1382b8e80941Smrg                fwrite(screen->perf_counters.cpu,  4096, sizeof(uint32_t), fp);
1383b8e80941Smrg                fclose(fp);
1384b8e80941Smrg        }
1385b8e80941Smrg
1386b8e80941Smrg#endif
1387b8e80941Smrg}
1388b8e80941Smrg
1389b8e80941Smrgvoid
1390b8e80941Smrgpanfrost_flush(
1391b8e80941Smrg        struct pipe_context *pipe,
1392b8e80941Smrg        struct pipe_fence_handle **fence,
1393b8e80941Smrg        unsigned flags)
1394b8e80941Smrg{
1395b8e80941Smrg        struct panfrost_context *ctx = pan_context(pipe);
1396b8e80941Smrg        struct panfrost_job *job = panfrost_get_job_for_fbo(ctx);
1397b8e80941Smrg
1398b8e80941Smrg        /* Nothing to do! */
1399b8e80941Smrg        if (!ctx->draw_count && !job->clear) return;
1400b8e80941Smrg
1401b8e80941Smrg        /* Whether to stall the pipeline for immediately correct results */
1402b8e80941Smrg        bool flush_immediate = flags & PIPE_FLUSH_END_OF_FRAME;
1403b8e80941Smrg
1404b8e80941Smrg        /* Submit the frame itself */
1405b8e80941Smrg        panfrost_submit_frame(ctx, flush_immediate, fence, job);
1406b8e80941Smrg
1407b8e80941Smrg        /* Prepare for the next frame */
1408b8e80941Smrg        panfrost_invalidate_frame(ctx);
1409b8e80941Smrg}
1410b8e80941Smrg
1411b8e80941Smrg#define DEFINE_CASE(c) case PIPE_PRIM_##c: return MALI_##c;
1412b8e80941Smrg
1413b8e80941Smrgstatic int
1414b8e80941Smrgg2m_draw_mode(enum pipe_prim_type mode)
1415b8e80941Smrg{
1416b8e80941Smrg        switch (mode) {
1417b8e80941Smrg                DEFINE_CASE(POINTS);
1418b8e80941Smrg                DEFINE_CASE(LINES);
1419b8e80941Smrg                DEFINE_CASE(LINE_LOOP);
1420b8e80941Smrg                DEFINE_CASE(LINE_STRIP);
1421b8e80941Smrg                DEFINE_CASE(TRIANGLES);
1422b8e80941Smrg                DEFINE_CASE(TRIANGLE_STRIP);
1423b8e80941Smrg                DEFINE_CASE(TRIANGLE_FAN);
1424b8e80941Smrg                DEFINE_CASE(QUADS);
1425b8e80941Smrg                DEFINE_CASE(QUAD_STRIP);
1426b8e80941Smrg                DEFINE_CASE(POLYGON);
1427b8e80941Smrg
1428b8e80941Smrg        default:
1429b8e80941Smrg                DBG("Illegal draw mode %d\n", mode);
1430b8e80941Smrg                assert(0);
1431b8e80941Smrg                return MALI_LINE_LOOP;
1432b8e80941Smrg        }
1433b8e80941Smrg}
1434b8e80941Smrg
1435b8e80941Smrg#undef DEFINE_CASE
1436b8e80941Smrg
1437b8e80941Smrgstatic unsigned
1438b8e80941Smrgpanfrost_translate_index_size(unsigned size)
1439b8e80941Smrg{
1440b8e80941Smrg        switch (size) {
1441b8e80941Smrg        case 1:
1442b8e80941Smrg                return MALI_DRAW_INDEXED_UINT8;
1443b8e80941Smrg
1444b8e80941Smrg        case 2:
1445b8e80941Smrg                return MALI_DRAW_INDEXED_UINT16;
1446b8e80941Smrg
1447b8e80941Smrg        case 4:
1448b8e80941Smrg                return MALI_DRAW_INDEXED_UINT32;
1449b8e80941Smrg
1450b8e80941Smrg        default:
1451b8e80941Smrg                DBG("Unknown index size %d\n", size);
1452b8e80941Smrg                assert(0);
1453b8e80941Smrg                return 0;
1454b8e80941Smrg        }
1455b8e80941Smrg}
1456b8e80941Smrg
1457b8e80941Smrg/* Gets a GPU address for the associated index buffer. Only gauranteed to be
1458b8e80941Smrg * good for the duration of the draw (transient), could last longer */
1459b8e80941Smrg
1460b8e80941Smrgstatic mali_ptr
1461b8e80941Smrgpanfrost_get_index_buffer_mapped(struct panfrost_context *ctx, const struct pipe_draw_info *info)
1462b8e80941Smrg{
1463b8e80941Smrg        struct panfrost_resource *rsrc = (struct panfrost_resource *) (info->index.resource);
1464b8e80941Smrg
1465b8e80941Smrg        off_t offset = info->start * info->index_size;
1466b8e80941Smrg
1467b8e80941Smrg        if (!info->has_user_indices) {
1468b8e80941Smrg                /* Only resources can be directly mapped */
1469b8e80941Smrg                return rsrc->bo->gpu + offset;
1470b8e80941Smrg        } else {
1471b8e80941Smrg                /* Otherwise, we need to upload to transient memory */
1472b8e80941Smrg                const uint8_t *ibuf8 = (const uint8_t *) info->index.user;
1473b8e80941Smrg                return panfrost_upload_transient(ctx, ibuf8 + offset, info->count * info->index_size);
1474b8e80941Smrg        }
1475b8e80941Smrg}
1476b8e80941Smrg
1477b8e80941Smrgstatic void
1478b8e80941Smrgpanfrost_draw_vbo(
1479b8e80941Smrg        struct pipe_context *pipe,
1480b8e80941Smrg        const struct pipe_draw_info *info)
1481b8e80941Smrg{
1482b8e80941Smrg        struct panfrost_context *ctx = pan_context(pipe);
1483b8e80941Smrg
1484b8e80941Smrg        ctx->payload_vertex.draw_start = info->start;
1485b8e80941Smrg        ctx->payload_tiler.draw_start = info->start;
1486b8e80941Smrg
1487b8e80941Smrg        int mode = info->mode;
1488b8e80941Smrg
1489b8e80941Smrg        /* Fallback for unsupported modes */
1490b8e80941Smrg
1491b8e80941Smrg        if (!(ctx->draw_modes & (1 << mode))) {
1492b8e80941Smrg                if (mode == PIPE_PRIM_QUADS && info->count == 4 && ctx->rasterizer && !ctx->rasterizer->base.flatshade) {
1493b8e80941Smrg                        mode = PIPE_PRIM_TRIANGLE_FAN;
1494b8e80941Smrg                } else {
1495b8e80941Smrg                        if (info->count < 4) {
1496b8e80941Smrg                                /* Degenerate case? */
1497b8e80941Smrg                                return;
1498b8e80941Smrg                        }
1499b8e80941Smrg
1500b8e80941Smrg                        util_primconvert_save_rasterizer_state(ctx->primconvert, &ctx->rasterizer->base);
1501b8e80941Smrg                        util_primconvert_draw_vbo(ctx->primconvert, info);
1502b8e80941Smrg                        return;
1503b8e80941Smrg                }
1504b8e80941Smrg        }
1505b8e80941Smrg
1506b8e80941Smrg        /* Now that we have a guaranteed terminating path, find the job.
1507b8e80941Smrg         * Assignment commented out to prevent unused warning */
1508b8e80941Smrg
1509b8e80941Smrg        /* struct panfrost_job *job = */ panfrost_get_job_for_fbo(ctx);
1510b8e80941Smrg
1511b8e80941Smrg        ctx->payload_tiler.prefix.draw_mode = g2m_draw_mode(mode);
1512b8e80941Smrg
1513b8e80941Smrg        ctx->vertex_count = info->count;
1514b8e80941Smrg
1515b8e80941Smrg        /* For non-indexed draws, they're the same */
1516b8e80941Smrg        unsigned invocation_count = ctx->vertex_count;
1517b8e80941Smrg
1518b8e80941Smrg        unsigned draw_flags = 0;
1519b8e80941Smrg
1520b8e80941Smrg        /* The draw flags interpret how primitive size is interpreted */
1521b8e80941Smrg
1522b8e80941Smrg        if (panfrost_writes_point_size(ctx))
1523b8e80941Smrg                draw_flags |= MALI_DRAW_VARYING_SIZE;
1524b8e80941Smrg
1525b8e80941Smrg        /* For higher amounts of vertices (greater than what fits in a 16-bit
1526b8e80941Smrg         * short), the other value is needed, otherwise there will be bizarre
1527b8e80941Smrg         * rendering artefacts. It's not clear what these values mean yet. */
1528b8e80941Smrg
1529b8e80941Smrg        draw_flags |= (mode == PIPE_PRIM_POINTS || ctx->vertex_count > 65535) ? 0x3000 : 0x18000;
1530b8e80941Smrg
1531b8e80941Smrg        if (info->index_size) {
1532b8e80941Smrg                /* Calculate the min/max index used so we can figure out how
1533b8e80941Smrg                 * many times to invoke the vertex shader */
1534b8e80941Smrg
1535b8e80941Smrg                /* Fetch / calculate index bounds */
1536b8e80941Smrg                unsigned min_index = 0, max_index = 0;
1537b8e80941Smrg
1538b8e80941Smrg                if (info->max_index == ~0u) {
1539b8e80941Smrg                        u_vbuf_get_minmax_index(pipe, info, &min_index, &max_index);
1540b8e80941Smrg                } else {
1541b8e80941Smrg                        min_index = info->min_index;
1542b8e80941Smrg                        max_index = info->max_index;
1543b8e80941Smrg                }
1544b8e80941Smrg
1545b8e80941Smrg                /* Use the corresponding values */
1546b8e80941Smrg                invocation_count = max_index - min_index + 1;
1547b8e80941Smrg                ctx->payload_vertex.draw_start = min_index;
1548b8e80941Smrg                ctx->payload_tiler.draw_start = min_index;
1549b8e80941Smrg
1550b8e80941Smrg                ctx->payload_tiler.prefix.negative_start = -min_index;
1551b8e80941Smrg                ctx->payload_tiler.prefix.index_count = MALI_POSITIVE(info->count);
1552b8e80941Smrg
1553b8e80941Smrg                //assert(!info->restart_index); /* TODO: Research */
1554b8e80941Smrg                assert(!info->index_bias);
1555b8e80941Smrg
1556b8e80941Smrg                draw_flags |= panfrost_translate_index_size(info->index_size);
1557b8e80941Smrg                ctx->payload_tiler.prefix.indices = panfrost_get_index_buffer_mapped(ctx, info);
1558b8e80941Smrg        } else {
1559b8e80941Smrg                /* Index count == vertex count, if no indexing is applied, as
1560b8e80941Smrg                 * if it is internally indexed in the expected order */
1561b8e80941Smrg
1562b8e80941Smrg                ctx->payload_tiler.prefix.negative_start = 0;
1563b8e80941Smrg                ctx->payload_tiler.prefix.index_count = MALI_POSITIVE(ctx->vertex_count);
1564b8e80941Smrg
1565b8e80941Smrg                /* Reverse index state */
1566b8e80941Smrg                ctx->payload_tiler.prefix.indices = (uintptr_t) NULL;
1567b8e80941Smrg        }
1568b8e80941Smrg
1569b8e80941Smrg        ctx->payload_vertex.prefix.invocation_count = MALI_POSITIVE(invocation_count);
1570b8e80941Smrg        ctx->payload_tiler.prefix.invocation_count = MALI_POSITIVE(invocation_count);
1571b8e80941Smrg        ctx->payload_tiler.prefix.unknown_draw = draw_flags;
1572b8e80941Smrg
1573b8e80941Smrg        /* Fire off the draw itself */
1574b8e80941Smrg        panfrost_queue_draw(ctx);
1575b8e80941Smrg}
1576b8e80941Smrg
1577b8e80941Smrg/* CSO state */
1578b8e80941Smrg
1579b8e80941Smrgstatic void
1580b8e80941Smrgpanfrost_generic_cso_delete(struct pipe_context *pctx, void *hwcso)
1581b8e80941Smrg{
1582b8e80941Smrg        free(hwcso);
1583b8e80941Smrg}
1584b8e80941Smrg
1585b8e80941Smrgstatic void *
1586b8e80941Smrgpanfrost_create_rasterizer_state(
1587b8e80941Smrg        struct pipe_context *pctx,
1588b8e80941Smrg        const struct pipe_rasterizer_state *cso)
1589b8e80941Smrg{
1590b8e80941Smrg        struct panfrost_context *ctx = pan_context(pctx);
1591b8e80941Smrg        struct panfrost_rasterizer *so = CALLOC_STRUCT(panfrost_rasterizer);
1592b8e80941Smrg
1593b8e80941Smrg        so->base = *cso;
1594b8e80941Smrg
1595b8e80941Smrg        /* Bitmask, unknown meaning of the start value */
1596b8e80941Smrg        so->tiler_gl_enables = ctx->is_t6xx ? 0x105 : 0x7;
1597b8e80941Smrg
1598b8e80941Smrg        so->tiler_gl_enables |= MALI_FRONT_FACE(
1599b8e80941Smrg                                        cso->front_ccw ? MALI_CCW : MALI_CW);
1600b8e80941Smrg
1601b8e80941Smrg        if (cso->cull_face & PIPE_FACE_FRONT)
1602b8e80941Smrg                so->tiler_gl_enables |= MALI_CULL_FACE_FRONT;
1603b8e80941Smrg
1604b8e80941Smrg        if (cso->cull_face & PIPE_FACE_BACK)
1605b8e80941Smrg                so->tiler_gl_enables |= MALI_CULL_FACE_BACK;
1606b8e80941Smrg
1607b8e80941Smrg        return so;
1608b8e80941Smrg}
1609b8e80941Smrg
1610b8e80941Smrgstatic void
1611b8e80941Smrgpanfrost_bind_rasterizer_state(
1612b8e80941Smrg        struct pipe_context *pctx,
1613b8e80941Smrg        void *hwcso)
1614b8e80941Smrg{
1615b8e80941Smrg        struct panfrost_context *ctx = pan_context(pctx);
1616b8e80941Smrg
1617b8e80941Smrg        /* TODO: Why can't rasterizer be NULL ever? Other drivers are fine.. */
1618b8e80941Smrg        if (!hwcso)
1619b8e80941Smrg                return;
1620b8e80941Smrg
1621b8e80941Smrg        ctx->rasterizer = hwcso;
1622b8e80941Smrg        ctx->dirty |= PAN_DIRTY_RASTERIZER;
1623b8e80941Smrg}
1624b8e80941Smrg
1625b8e80941Smrgstatic void *
1626b8e80941Smrgpanfrost_create_vertex_elements_state(
1627b8e80941Smrg        struct pipe_context *pctx,
1628b8e80941Smrg        unsigned num_elements,
1629b8e80941Smrg        const struct pipe_vertex_element *elements)
1630b8e80941Smrg{
1631b8e80941Smrg        struct panfrost_vertex_state *so = CALLOC_STRUCT(panfrost_vertex_state);
1632b8e80941Smrg
1633b8e80941Smrg        so->num_elements = num_elements;
1634b8e80941Smrg        memcpy(so->pipe, elements, sizeof(*elements) * num_elements);
1635b8e80941Smrg
1636b8e80941Smrg        /* XXX: What the cornball? This is totally, 100%, unapologetically
1637b8e80941Smrg         * nonsense. And yet it somehow fixes a regression in -bshadow
1638b8e80941Smrg         * (previously, we allocated the descriptor here... a newer commit
1639b8e80941Smrg         * removed that allocation, and then memory corruption led to
1640b8e80941Smrg         * shader_meta getting overwritten in bad ways and then the whole test
1641b8e80941Smrg         * case falling apart . TODO: LOOK INTO PLEASE XXX XXX BAD XXX XXX XXX
1642b8e80941Smrg         */
1643b8e80941Smrg        panfrost_allocate_chunk(pan_context(pctx), 0, HEAP_DESCRIPTOR);
1644b8e80941Smrg
1645b8e80941Smrg        for (int i = 0; i < num_elements; ++i) {
1646b8e80941Smrg                so->hw[i].index = elements[i].vertex_buffer_index;
1647b8e80941Smrg
1648b8e80941Smrg                enum pipe_format fmt = elements[i].src_format;
1649b8e80941Smrg                const struct util_format_description *desc = util_format_description(fmt);
1650b8e80941Smrg                so->hw[i].unknown1 = 0x2;
1651b8e80941Smrg                so->hw[i].swizzle = panfrost_get_default_swizzle(desc->nr_channels);
1652b8e80941Smrg
1653b8e80941Smrg                so->hw[i].format = panfrost_find_format(desc);
1654b8e80941Smrg
1655b8e80941Smrg                /* The field itself should probably be shifted over */
1656b8e80941Smrg                so->hw[i].src_offset = elements[i].src_offset;
1657b8e80941Smrg        }
1658b8e80941Smrg
1659b8e80941Smrg        return so;
1660b8e80941Smrg}
1661b8e80941Smrg
1662b8e80941Smrgstatic void
1663b8e80941Smrgpanfrost_bind_vertex_elements_state(
1664b8e80941Smrg        struct pipe_context *pctx,
1665b8e80941Smrg        void *hwcso)
1666b8e80941Smrg{
1667b8e80941Smrg        struct panfrost_context *ctx = pan_context(pctx);
1668b8e80941Smrg
1669b8e80941Smrg        ctx->vertex = hwcso;
1670b8e80941Smrg        ctx->dirty |= PAN_DIRTY_VERTEX;
1671b8e80941Smrg}
1672b8e80941Smrg
1673b8e80941Smrgstatic void
1674b8e80941Smrgpanfrost_delete_vertex_elements_state(struct pipe_context *pctx, void *hwcso)
1675b8e80941Smrg{
1676b8e80941Smrg        struct panfrost_vertex_state *so = (struct panfrost_vertex_state *) hwcso;
1677b8e80941Smrg        unsigned bytes = sizeof(struct mali_attr_meta) * so->num_elements;
1678b8e80941Smrg        DBG("Vertex elements delete leaks descriptor (%d bytes)\n", bytes);
1679b8e80941Smrg        free(hwcso);
1680b8e80941Smrg}
1681b8e80941Smrg
1682b8e80941Smrgstatic void *
1683b8e80941Smrgpanfrost_create_shader_state(
1684b8e80941Smrg        struct pipe_context *pctx,
1685b8e80941Smrg        const struct pipe_shader_state *cso)
1686b8e80941Smrg{
1687b8e80941Smrg        struct panfrost_shader_variants *so = CALLOC_STRUCT(panfrost_shader_variants);
1688b8e80941Smrg        so->base = *cso;
1689b8e80941Smrg
1690b8e80941Smrg        /* Token deep copy to prevent memory corruption */
1691b8e80941Smrg
1692b8e80941Smrg        if (cso->type == PIPE_SHADER_IR_TGSI)
1693b8e80941Smrg                so->base.tokens = tgsi_dup_tokens(so->base.tokens);
1694b8e80941Smrg
1695b8e80941Smrg        return so;
1696b8e80941Smrg}
1697b8e80941Smrg
1698b8e80941Smrgstatic void
1699b8e80941Smrgpanfrost_delete_shader_state(
1700b8e80941Smrg        struct pipe_context *pctx,
1701b8e80941Smrg        void *so)
1702b8e80941Smrg{
1703b8e80941Smrg        struct panfrost_shader_variants *cso = (struct panfrost_shader_variants *) so;
1704b8e80941Smrg
1705b8e80941Smrg        if (cso->base.type == PIPE_SHADER_IR_TGSI) {
1706b8e80941Smrg                DBG("Deleting TGSI shader leaks duplicated tokens\n");
1707b8e80941Smrg        }
1708b8e80941Smrg
1709b8e80941Smrg        unsigned leak = cso->variant_count * sizeof(struct mali_shader_meta);
1710b8e80941Smrg        DBG("Deleting shader state leaks descriptors (%d bytes), and shader bytecode\n", leak);
1711b8e80941Smrg
1712b8e80941Smrg        free(so);
1713b8e80941Smrg}
1714b8e80941Smrg
1715b8e80941Smrgstatic void *
1716b8e80941Smrgpanfrost_create_sampler_state(
1717b8e80941Smrg        struct pipe_context *pctx,
1718b8e80941Smrg        const struct pipe_sampler_state *cso)
1719b8e80941Smrg{
1720b8e80941Smrg        struct panfrost_sampler_state *so = CALLOC_STRUCT(panfrost_sampler_state);
1721b8e80941Smrg        so->base = *cso;
1722b8e80941Smrg
1723b8e80941Smrg        /* sampler_state corresponds to mali_sampler_descriptor, which we can generate entirely here */
1724b8e80941Smrg
1725b8e80941Smrg        struct mali_sampler_descriptor sampler_descriptor = {
1726b8e80941Smrg                .filter_mode = MALI_TEX_MIN(translate_tex_filter(cso->min_img_filter))
1727b8e80941Smrg                | MALI_TEX_MAG(translate_tex_filter(cso->mag_img_filter))
1728b8e80941Smrg                | translate_mip_filter(cso->min_mip_filter)
1729b8e80941Smrg                | 0x20,
1730b8e80941Smrg
1731b8e80941Smrg                .wrap_s = translate_tex_wrap(cso->wrap_s),
1732b8e80941Smrg                .wrap_t = translate_tex_wrap(cso->wrap_t),
1733b8e80941Smrg                .wrap_r = translate_tex_wrap(cso->wrap_r),
1734b8e80941Smrg                .compare_func = panfrost_translate_alt_compare_func(cso->compare_func),
1735b8e80941Smrg                .border_color = {
1736b8e80941Smrg                        cso->border_color.f[0],
1737b8e80941Smrg                        cso->border_color.f[1],
1738b8e80941Smrg                        cso->border_color.f[2],
1739b8e80941Smrg                        cso->border_color.f[3]
1740b8e80941Smrg                },
1741b8e80941Smrg                .min_lod = FIXED_16(cso->min_lod),
1742b8e80941Smrg                .max_lod = FIXED_16(cso->max_lod),
1743b8e80941Smrg                .unknown2 = 1,
1744b8e80941Smrg        };
1745b8e80941Smrg
1746b8e80941Smrg        so->hw = sampler_descriptor;
1747b8e80941Smrg
1748b8e80941Smrg        return so;
1749b8e80941Smrg}
1750b8e80941Smrg
1751b8e80941Smrgstatic void
1752b8e80941Smrgpanfrost_bind_sampler_states(
1753b8e80941Smrg        struct pipe_context *pctx,
1754b8e80941Smrg        enum pipe_shader_type shader,
1755b8e80941Smrg        unsigned start_slot, unsigned num_sampler,
1756b8e80941Smrg        void **sampler)
1757b8e80941Smrg{
1758b8e80941Smrg        assert(start_slot == 0);
1759b8e80941Smrg
1760b8e80941Smrg        struct panfrost_context *ctx = pan_context(pctx);
1761b8e80941Smrg
1762b8e80941Smrg        /* XXX: Should upload, not just copy? */
1763b8e80941Smrg        ctx->sampler_count[shader] = num_sampler;
1764b8e80941Smrg        memcpy(ctx->samplers[shader], sampler, num_sampler * sizeof (void *));
1765b8e80941Smrg
1766b8e80941Smrg        ctx->dirty |= PAN_DIRTY_SAMPLERS;
1767b8e80941Smrg}
1768b8e80941Smrg
1769b8e80941Smrgstatic bool
1770b8e80941Smrgpanfrost_variant_matches(struct panfrost_context *ctx, struct panfrost_shader_state *variant)
1771b8e80941Smrg{
1772b8e80941Smrg        struct pipe_alpha_state *alpha = &ctx->depth_stencil->alpha;
1773b8e80941Smrg
1774b8e80941Smrg        if (alpha->enabled || variant->alpha_state.enabled) {
1775b8e80941Smrg                /* Make sure enable state is at least the same */
1776b8e80941Smrg                if (alpha->enabled != variant->alpha_state.enabled) {
1777b8e80941Smrg                        return false;
1778b8e80941Smrg                }
1779b8e80941Smrg
1780b8e80941Smrg                /* Check that the contents of the test are the same */
1781b8e80941Smrg                bool same_func = alpha->func == variant->alpha_state.func;
1782b8e80941Smrg                bool same_ref = alpha->ref_value == variant->alpha_state.ref_value;
1783b8e80941Smrg
1784b8e80941Smrg                if (!(same_func && same_ref)) {
1785b8e80941Smrg                        return false;
1786b8e80941Smrg                }
1787b8e80941Smrg        }
1788b8e80941Smrg        /* Otherwise, we're good to go */
1789b8e80941Smrg        return true;
1790b8e80941Smrg}
1791b8e80941Smrg
1792b8e80941Smrgstatic void
1793b8e80941Smrgpanfrost_bind_fs_state(
1794b8e80941Smrg        struct pipe_context *pctx,
1795b8e80941Smrg        void *hwcso)
1796b8e80941Smrg{
1797b8e80941Smrg        struct panfrost_context *ctx = pan_context(pctx);
1798b8e80941Smrg
1799b8e80941Smrg        ctx->fs = hwcso;
1800b8e80941Smrg
1801b8e80941Smrg        if (hwcso) {
1802b8e80941Smrg                /* Match the appropriate variant */
1803b8e80941Smrg
1804b8e80941Smrg                signed variant = -1;
1805b8e80941Smrg
1806b8e80941Smrg                struct panfrost_shader_variants *variants = (struct panfrost_shader_variants *) hwcso;
1807b8e80941Smrg
1808b8e80941Smrg                for (unsigned i = 0; i < variants->variant_count; ++i) {
1809b8e80941Smrg                        if (panfrost_variant_matches(ctx, &variants->variants[i])) {
1810b8e80941Smrg                                variant = i;
1811b8e80941Smrg                                break;
1812b8e80941Smrg                        }
1813b8e80941Smrg                }
1814b8e80941Smrg
1815b8e80941Smrg                if (variant == -1) {
1816b8e80941Smrg                        /* No variant matched, so create a new one */
1817b8e80941Smrg                        variant = variants->variant_count++;
1818b8e80941Smrg                        assert(variants->variant_count < MAX_SHADER_VARIANTS);
1819b8e80941Smrg
1820b8e80941Smrg                        variants->variants[variant].base = hwcso;
1821b8e80941Smrg                        variants->variants[variant].alpha_state = ctx->depth_stencil->alpha;
1822b8e80941Smrg
1823b8e80941Smrg                        /* Allocate the mapped descriptor ahead-of-time. TODO: Use for FS as well as VS */
1824b8e80941Smrg                        struct panfrost_context *ctx = pan_context(pctx);
1825b8e80941Smrg                        struct panfrost_transfer transfer = panfrost_allocate_chunk(ctx, sizeof(struct mali_shader_meta), HEAP_DESCRIPTOR);
1826b8e80941Smrg
1827b8e80941Smrg                        variants->variants[variant].tripipe = (struct mali_shader_meta *) transfer.cpu;
1828b8e80941Smrg                        variants->variants[variant].tripipe_gpu = transfer.gpu;
1829b8e80941Smrg
1830b8e80941Smrg                }
1831b8e80941Smrg
1832b8e80941Smrg                /* Select this variant */
1833b8e80941Smrg                variants->active_variant = variant;
1834b8e80941Smrg
1835b8e80941Smrg                struct panfrost_shader_state *shader_state = &variants->variants[variant];
1836b8e80941Smrg                assert(panfrost_variant_matches(ctx, shader_state));
1837b8e80941Smrg
1838b8e80941Smrg                /* Now we have a variant selected, so compile and go */
1839b8e80941Smrg
1840b8e80941Smrg                if (!shader_state->compiled) {
1841b8e80941Smrg                        panfrost_shader_compile(ctx, shader_state->tripipe, NULL, JOB_TYPE_TILER, shader_state);
1842b8e80941Smrg                        shader_state->compiled = true;
1843b8e80941Smrg                }
1844b8e80941Smrg        }
1845b8e80941Smrg
1846b8e80941Smrg        ctx->dirty |= PAN_DIRTY_FS;
1847b8e80941Smrg}
1848b8e80941Smrg
1849b8e80941Smrgstatic void
1850b8e80941Smrgpanfrost_bind_vs_state(
1851b8e80941Smrg        struct pipe_context *pctx,
1852b8e80941Smrg        void *hwcso)
1853b8e80941Smrg{
1854b8e80941Smrg        struct panfrost_context *ctx = pan_context(pctx);
1855b8e80941Smrg
1856b8e80941Smrg        ctx->vs = hwcso;
1857b8e80941Smrg
1858b8e80941Smrg        if (hwcso) {
1859b8e80941Smrg                if (!ctx->vs->variants[0].compiled) {
1860b8e80941Smrg                        ctx->vs->variants[0].base = hwcso;
1861b8e80941Smrg
1862b8e80941Smrg                        /* TODO DRY from above */
1863b8e80941Smrg                        struct panfrost_transfer transfer = panfrost_allocate_chunk(ctx, sizeof(struct mali_shader_meta), HEAP_DESCRIPTOR);
1864b8e80941Smrg                        ctx->vs->variants[0].tripipe = (struct mali_shader_meta *) transfer.cpu;
1865b8e80941Smrg                        ctx->vs->variants[0].tripipe_gpu = transfer.gpu;
1866b8e80941Smrg
1867b8e80941Smrg                        panfrost_shader_compile(ctx, ctx->vs->variants[0].tripipe, NULL, JOB_TYPE_VERTEX, &ctx->vs->variants[0]);
1868b8e80941Smrg                        ctx->vs->variants[0].compiled = true;
1869b8e80941Smrg                }
1870b8e80941Smrg        }
1871b8e80941Smrg
1872b8e80941Smrg        ctx->dirty |= PAN_DIRTY_VS;
1873b8e80941Smrg}
1874b8e80941Smrg
1875b8e80941Smrgstatic void
1876b8e80941Smrgpanfrost_set_vertex_buffers(
1877b8e80941Smrg        struct pipe_context *pctx,
1878b8e80941Smrg        unsigned start_slot,
1879b8e80941Smrg        unsigned num_buffers,
1880b8e80941Smrg        const struct pipe_vertex_buffer *buffers)
1881b8e80941Smrg{
1882b8e80941Smrg        struct panfrost_context *ctx = pan_context(pctx);
1883b8e80941Smrg
1884b8e80941Smrg        util_set_vertex_buffers_mask(ctx->vertex_buffers, &ctx->vb_mask, buffers, start_slot, num_buffers);
1885b8e80941Smrg}
1886b8e80941Smrg
1887b8e80941Smrgstatic void
1888b8e80941Smrgpanfrost_set_constant_buffer(
1889b8e80941Smrg        struct pipe_context *pctx,
1890b8e80941Smrg        enum pipe_shader_type shader, uint index,
1891b8e80941Smrg        const struct pipe_constant_buffer *buf)
1892b8e80941Smrg{
1893b8e80941Smrg        struct panfrost_context *ctx = pan_context(pctx);
1894b8e80941Smrg        struct panfrost_constant_buffer *pbuf = &ctx->constant_buffer[shader];
1895b8e80941Smrg
1896b8e80941Smrg        size_t sz = buf ? buf->buffer_size : 0;
1897b8e80941Smrg
1898b8e80941Smrg        /* Free previous buffer */
1899b8e80941Smrg
1900b8e80941Smrg        pbuf->dirty = true;
1901b8e80941Smrg        pbuf->size = sz;
1902b8e80941Smrg
1903b8e80941Smrg        if (pbuf->buffer) {
1904b8e80941Smrg                free(pbuf->buffer);
1905b8e80941Smrg                pbuf->buffer = NULL;
1906b8e80941Smrg        }
1907b8e80941Smrg
1908b8e80941Smrg        /* If unbinding, we're done */
1909b8e80941Smrg
1910b8e80941Smrg        if (!buf)
1911b8e80941Smrg                return;
1912b8e80941Smrg
1913b8e80941Smrg        /* Multiple constant buffers not yet supported */
1914b8e80941Smrg        assert(index == 0);
1915b8e80941Smrg
1916b8e80941Smrg        const uint8_t *cpu;
1917b8e80941Smrg
1918b8e80941Smrg        struct panfrost_resource *rsrc = (struct panfrost_resource *) (buf->buffer);
1919b8e80941Smrg
1920b8e80941Smrg        if (rsrc) {
1921b8e80941Smrg                cpu = rsrc->bo->cpu;
1922b8e80941Smrg        } else if (buf->user_buffer) {
1923b8e80941Smrg                cpu = buf->user_buffer;
1924b8e80941Smrg        } else {
1925b8e80941Smrg                DBG("No constant buffer?\n");
1926b8e80941Smrg                return;
1927b8e80941Smrg        }
1928b8e80941Smrg
1929b8e80941Smrg        /* Copy the constant buffer into the driver context for later upload */
1930b8e80941Smrg
1931b8e80941Smrg        pbuf->buffer = malloc(sz);
1932b8e80941Smrg        memcpy(pbuf->buffer, cpu + buf->buffer_offset, sz);
1933b8e80941Smrg}
1934b8e80941Smrg
1935b8e80941Smrgstatic void
1936b8e80941Smrgpanfrost_set_stencil_ref(
1937b8e80941Smrg        struct pipe_context *pctx,
1938b8e80941Smrg        const struct pipe_stencil_ref *ref)
1939b8e80941Smrg{
1940b8e80941Smrg        struct panfrost_context *ctx = pan_context(pctx);
1941b8e80941Smrg        ctx->stencil_ref = *ref;
1942b8e80941Smrg
1943b8e80941Smrg        /* Shader core dirty */
1944b8e80941Smrg        ctx->dirty |= PAN_DIRTY_FS;
1945b8e80941Smrg}
1946b8e80941Smrg
1947b8e80941Smrgstatic struct pipe_sampler_view *
1948b8e80941Smrgpanfrost_create_sampler_view(
1949b8e80941Smrg        struct pipe_context *pctx,
1950b8e80941Smrg        struct pipe_resource *texture,
1951b8e80941Smrg        const struct pipe_sampler_view *template)
1952b8e80941Smrg{
1953b8e80941Smrg        struct panfrost_sampler_view *so = CALLOC_STRUCT(panfrost_sampler_view);
1954b8e80941Smrg        int bytes_per_pixel = util_format_get_blocksize(texture->format);
1955b8e80941Smrg
1956b8e80941Smrg        pipe_reference(NULL, &texture->reference);
1957b8e80941Smrg
1958b8e80941Smrg        struct panfrost_resource *prsrc = (struct panfrost_resource *) texture;
1959b8e80941Smrg
1960b8e80941Smrg        so->base = *template;
1961b8e80941Smrg        so->base.texture = texture;
1962b8e80941Smrg        so->base.reference.count = 1;
1963b8e80941Smrg        so->base.context = pctx;
1964b8e80941Smrg
1965b8e80941Smrg        /* sampler_views correspond to texture descriptors, minus the texture
1966b8e80941Smrg         * (data) itself. So, we serialise the descriptor here and cache it for
1967b8e80941Smrg         * later. */
1968b8e80941Smrg
1969b8e80941Smrg        /* Make sure it's something with which we're familiar */
1970b8e80941Smrg        assert(bytes_per_pixel >= 1 && bytes_per_pixel <= 4);
1971b8e80941Smrg
1972b8e80941Smrg        /* TODO: Detect from format better */
1973b8e80941Smrg        const struct util_format_description *desc = util_format_description(prsrc->base.format);
1974b8e80941Smrg
1975b8e80941Smrg        unsigned char user_swizzle[4] = {
1976b8e80941Smrg                template->swizzle_r,
1977b8e80941Smrg                template->swizzle_g,
1978b8e80941Smrg                template->swizzle_b,
1979b8e80941Smrg                template->swizzle_a
1980b8e80941Smrg        };
1981b8e80941Smrg
1982b8e80941Smrg        enum mali_format format = panfrost_find_format(desc);
1983b8e80941Smrg
1984b8e80941Smrg        bool is_depth = desc->format == PIPE_FORMAT_Z32_UNORM;
1985b8e80941Smrg
1986b8e80941Smrg        unsigned usage2_layout = 0x10;
1987b8e80941Smrg
1988b8e80941Smrg        switch (prsrc->bo->layout) {
1989b8e80941Smrg                case PAN_AFBC:
1990b8e80941Smrg                        usage2_layout |= 0x8 | 0x4;
1991b8e80941Smrg                        break;
1992b8e80941Smrg                case PAN_TILED:
1993b8e80941Smrg                        usage2_layout |= 0x1;
1994b8e80941Smrg                        break;
1995b8e80941Smrg                case PAN_LINEAR:
1996b8e80941Smrg                        usage2_layout |= is_depth ? 0x1 : 0x2;
1997b8e80941Smrg                        break;
1998b8e80941Smrg                default:
1999b8e80941Smrg                        assert(0);
2000b8e80941Smrg                        break;
2001b8e80941Smrg        }
2002b8e80941Smrg
2003b8e80941Smrg        struct mali_texture_descriptor texture_descriptor = {
2004b8e80941Smrg                .width = MALI_POSITIVE(texture->width0),
2005b8e80941Smrg                .height = MALI_POSITIVE(texture->height0),
2006b8e80941Smrg                .depth = MALI_POSITIVE(texture->depth0),
2007b8e80941Smrg
2008b8e80941Smrg                /* TODO: Decode */
2009b8e80941Smrg                .format = {
2010b8e80941Smrg                        .swizzle = panfrost_translate_swizzle_4(desc->swizzle),
2011b8e80941Smrg                        .format = format,
2012b8e80941Smrg
2013b8e80941Smrg                        .usage1 = 0x0,
2014b8e80941Smrg                        .is_not_cubemap = texture->target != PIPE_TEXTURE_CUBE,
2015b8e80941Smrg
2016b8e80941Smrg                        .usage2 = usage2_layout
2017b8e80941Smrg                },
2018b8e80941Smrg
2019b8e80941Smrg                .swizzle = panfrost_translate_swizzle_4(user_swizzle)
2020b8e80941Smrg        };
2021b8e80941Smrg
2022b8e80941Smrg        /* TODO: Other base levels require adjusting dimensions / level numbers / etc */
2023b8e80941Smrg        assert (template->u.tex.first_level == 0);
2024b8e80941Smrg
2025b8e80941Smrg        /* Disable mipmapping for now to avoid regressions while automipmapping
2026b8e80941Smrg         * is being implemented. TODO: Remove me once automipmaps work */
2027b8e80941Smrg
2028b8e80941Smrg        //texture_descriptor.nr_mipmap_levels = template->u.tex.last_level - template->u.tex.first_level;
2029b8e80941Smrg        texture_descriptor.nr_mipmap_levels = 0;
2030b8e80941Smrg
2031b8e80941Smrg        so->hw = texture_descriptor;
2032b8e80941Smrg
2033b8e80941Smrg        return (struct pipe_sampler_view *) so;
2034b8e80941Smrg}
2035b8e80941Smrg
2036b8e80941Smrgstatic void
2037b8e80941Smrgpanfrost_set_sampler_views(
2038b8e80941Smrg        struct pipe_context *pctx,
2039b8e80941Smrg        enum pipe_shader_type shader,
2040b8e80941Smrg        unsigned start_slot, unsigned num_views,
2041b8e80941Smrg        struct pipe_sampler_view **views)
2042b8e80941Smrg{
2043b8e80941Smrg        struct panfrost_context *ctx = pan_context(pctx);
2044b8e80941Smrg
2045b8e80941Smrg        assert(start_slot == 0);
2046b8e80941Smrg
2047b8e80941Smrg        ctx->sampler_view_count[shader] = num_views;
2048b8e80941Smrg        memcpy(ctx->sampler_views[shader], views, num_views * sizeof (void *));
2049b8e80941Smrg
2050b8e80941Smrg        ctx->dirty |= PAN_DIRTY_TEXTURES;
2051b8e80941Smrg}
2052b8e80941Smrg
2053b8e80941Smrgstatic void
2054b8e80941Smrgpanfrost_sampler_view_destroy(
2055b8e80941Smrg        struct pipe_context *pctx,
2056b8e80941Smrg        struct pipe_sampler_view *views)
2057b8e80941Smrg{
2058b8e80941Smrg        //struct panfrost_context *ctx = pan_context(pctx);
2059b8e80941Smrg
2060b8e80941Smrg        /* TODO */
2061b8e80941Smrg
2062b8e80941Smrg        free(views);
2063b8e80941Smrg}
2064b8e80941Smrg
2065b8e80941Smrgstatic void
2066b8e80941Smrgpanfrost_set_framebuffer_state(struct pipe_context *pctx,
2067b8e80941Smrg                               const struct pipe_framebuffer_state *fb)
2068b8e80941Smrg{
2069b8e80941Smrg        struct panfrost_context *ctx = pan_context(pctx);
2070b8e80941Smrg
2071b8e80941Smrg        /* Flush when switching away from an FBO */
2072b8e80941Smrg
2073b8e80941Smrg        if (!panfrost_is_scanout(ctx)) {
2074b8e80941Smrg                panfrost_flush(pctx, NULL, 0);
2075b8e80941Smrg        }
2076b8e80941Smrg
2077b8e80941Smrg        ctx->pipe_framebuffer.nr_cbufs = fb->nr_cbufs;
2078b8e80941Smrg        ctx->pipe_framebuffer.samples = fb->samples;
2079b8e80941Smrg        ctx->pipe_framebuffer.layers = fb->layers;
2080b8e80941Smrg        ctx->pipe_framebuffer.width = fb->width;
2081b8e80941Smrg        ctx->pipe_framebuffer.height = fb->height;
2082b8e80941Smrg
2083b8e80941Smrg        for (int i = 0; i < PIPE_MAX_COLOR_BUFS; i++) {
2084b8e80941Smrg                struct pipe_surface *cb = i < fb->nr_cbufs ? fb->cbufs[i] : NULL;
2085b8e80941Smrg
2086b8e80941Smrg                /* check if changing cbuf */
2087b8e80941Smrg                if (ctx->pipe_framebuffer.cbufs[i] == cb) continue;
2088b8e80941Smrg
2089b8e80941Smrg                if (cb && (i != 0)) {
2090b8e80941Smrg                        DBG("XXX: Multiple render targets not supported before t7xx!\n");
2091b8e80941Smrg                        assert(0);
2092b8e80941Smrg                }
2093b8e80941Smrg
2094b8e80941Smrg                /* assign new */
2095b8e80941Smrg                pipe_surface_reference(&ctx->pipe_framebuffer.cbufs[i], cb);
2096b8e80941Smrg
2097b8e80941Smrg                if (!cb)
2098b8e80941Smrg                        continue;
2099b8e80941Smrg
2100b8e80941Smrg                if (ctx->require_sfbd)
2101b8e80941Smrg                        ctx->vt_framebuffer_sfbd = panfrost_emit_sfbd(ctx);
2102b8e80941Smrg                else
2103b8e80941Smrg                        ctx->vt_framebuffer_mfbd = panfrost_emit_mfbd(ctx);
2104b8e80941Smrg
2105b8e80941Smrg                panfrost_attach_vt_framebuffer(ctx);
2106b8e80941Smrg
2107b8e80941Smrg                struct panfrost_resource *tex = ((struct panfrost_resource *) ctx->pipe_framebuffer.cbufs[i]->texture);
2108b8e80941Smrg                enum pipe_format format = ctx->pipe_framebuffer.cbufs[i]->format;
2109b8e80941Smrg                bool is_scanout = panfrost_is_scanout(ctx);
2110b8e80941Smrg
2111b8e80941Smrg                if (!is_scanout && tex->bo->layout != PAN_AFBC && panfrost_can_afbc(format)) {
2112b8e80941Smrg                        /* The blob is aggressive about enabling AFBC. As such,
2113b8e80941Smrg                         * it's pretty much necessary to use it here, since we
2114b8e80941Smrg                         * have no traces of non-compressed FBO. */
2115b8e80941Smrg
2116b8e80941Smrg                        panfrost_enable_afbc(ctx, tex, false);
2117b8e80941Smrg                }
2118b8e80941Smrg
2119b8e80941Smrg                if (!is_scanout && !tex->bo->has_checksum) {
2120b8e80941Smrg                        /* Enable transaction elimination if we can */
2121b8e80941Smrg                        panfrost_enable_checksum(ctx, tex);
2122b8e80941Smrg                }
2123b8e80941Smrg        }
2124b8e80941Smrg
2125b8e80941Smrg        {
2126b8e80941Smrg                struct pipe_surface *zb = fb->zsbuf;
2127b8e80941Smrg
2128b8e80941Smrg                if (ctx->pipe_framebuffer.zsbuf != zb) {
2129b8e80941Smrg                        pipe_surface_reference(&ctx->pipe_framebuffer.zsbuf, zb);
2130b8e80941Smrg
2131b8e80941Smrg                        if (zb) {
2132b8e80941Smrg                                /* FBO has depth */
2133b8e80941Smrg
2134b8e80941Smrg                                if (ctx->require_sfbd)
2135b8e80941Smrg                                        ctx->vt_framebuffer_sfbd = panfrost_emit_sfbd(ctx);
2136b8e80941Smrg                                else
2137b8e80941Smrg                                        ctx->vt_framebuffer_mfbd = panfrost_emit_mfbd(ctx);
2138b8e80941Smrg
2139b8e80941Smrg                                panfrost_attach_vt_framebuffer(ctx);
2140b8e80941Smrg
2141b8e80941Smrg                                /* Keep the depth FBO linear */
2142b8e80941Smrg                        }
2143b8e80941Smrg                }
2144b8e80941Smrg        }
2145b8e80941Smrg}
2146b8e80941Smrg
2147b8e80941Smrgstatic void *
2148b8e80941Smrgpanfrost_create_blend_state(struct pipe_context *pipe,
2149b8e80941Smrg                            const struct pipe_blend_state *blend)
2150b8e80941Smrg{
2151b8e80941Smrg        struct panfrost_context *ctx = pan_context(pipe);
2152b8e80941Smrg        struct panfrost_blend_state *so = CALLOC_STRUCT(panfrost_blend_state);
2153b8e80941Smrg        so->base = *blend;
2154b8e80941Smrg
2155b8e80941Smrg        /* TODO: The following features are not yet implemented */
2156b8e80941Smrg        assert(!blend->logicop_enable);
2157b8e80941Smrg        assert(!blend->alpha_to_coverage);
2158b8e80941Smrg        assert(!blend->alpha_to_one);
2159b8e80941Smrg
2160b8e80941Smrg        /* Compile the blend state, first as fixed-function if we can */
2161b8e80941Smrg
2162b8e80941Smrg        if (panfrost_make_fixed_blend_mode(&blend->rt[0], &so->equation, blend->rt[0].colormask, &ctx->blend_color))
2163b8e80941Smrg                return so;
2164b8e80941Smrg
2165b8e80941Smrg        /* If we can't, compile a blend shader instead */
2166b8e80941Smrg
2167b8e80941Smrg        panfrost_make_blend_shader(ctx, so, &ctx->blend_color);
2168b8e80941Smrg
2169b8e80941Smrg        return so;
2170b8e80941Smrg}
2171b8e80941Smrg
2172b8e80941Smrgstatic void
2173b8e80941Smrgpanfrost_bind_blend_state(struct pipe_context *pipe,
2174b8e80941Smrg                          void *cso)
2175b8e80941Smrg{
2176b8e80941Smrg        struct panfrost_context *ctx = pan_context(pipe);
2177b8e80941Smrg        struct pipe_blend_state *blend = (struct pipe_blend_state *) cso;
2178b8e80941Smrg        struct panfrost_blend_state *pblend = (struct panfrost_blend_state *) cso;
2179b8e80941Smrg        ctx->blend = pblend;
2180b8e80941Smrg
2181b8e80941Smrg        if (!blend)
2182b8e80941Smrg                return;
2183b8e80941Smrg
2184b8e80941Smrg        SET_BIT(ctx->fragment_shader_core.unknown2_4, MALI_NO_DITHER, !blend->dither);
2185b8e80941Smrg
2186b8e80941Smrg        /* TODO: Attach color */
2187b8e80941Smrg
2188b8e80941Smrg        /* Shader itself is not dirty, but the shader core is */
2189b8e80941Smrg        ctx->dirty |= PAN_DIRTY_FS;
2190b8e80941Smrg}
2191b8e80941Smrg
2192b8e80941Smrgstatic void
2193b8e80941Smrgpanfrost_delete_blend_state(struct pipe_context *pipe,
2194b8e80941Smrg                            void *blend)
2195b8e80941Smrg{
2196b8e80941Smrg        struct panfrost_blend_state *so = (struct panfrost_blend_state *) blend;
2197b8e80941Smrg
2198b8e80941Smrg        if (so->has_blend_shader) {
2199b8e80941Smrg                DBG("Deleting blend state leak blend shaders bytecode\n");
2200b8e80941Smrg        }
2201b8e80941Smrg
2202b8e80941Smrg        free(blend);
2203b8e80941Smrg}
2204b8e80941Smrg
2205b8e80941Smrgstatic void
2206b8e80941Smrgpanfrost_set_blend_color(struct pipe_context *pipe,
2207b8e80941Smrg                         const struct pipe_blend_color *blend_color)
2208b8e80941Smrg{
2209b8e80941Smrg        struct panfrost_context *ctx = pan_context(pipe);
2210b8e80941Smrg
2211b8e80941Smrg        /* If blend_color is we're unbinding, so ctx->blend_color is now undefined -> nothing to do */
2212b8e80941Smrg
2213b8e80941Smrg        if (blend_color) {
2214b8e80941Smrg                ctx->blend_color = *blend_color;
2215b8e80941Smrg
2216b8e80941Smrg                /* The blend mode depends on the blend constant color, due to the
2217b8e80941Smrg                 * fixed/programmable split. So, we're forced to regenerate the blend
2218b8e80941Smrg                 * equation */
2219b8e80941Smrg
2220b8e80941Smrg                /* TODO: Attach color */
2221b8e80941Smrg        }
2222b8e80941Smrg}
2223b8e80941Smrg
2224b8e80941Smrgstatic void *
2225b8e80941Smrgpanfrost_create_depth_stencil_state(struct pipe_context *pipe,
2226b8e80941Smrg                                    const struct pipe_depth_stencil_alpha_state *depth_stencil)
2227b8e80941Smrg{
2228b8e80941Smrg        return mem_dup(depth_stencil, sizeof(*depth_stencil));
2229b8e80941Smrg}
2230b8e80941Smrg
2231b8e80941Smrgstatic void
2232b8e80941Smrgpanfrost_bind_depth_stencil_state(struct pipe_context *pipe,
2233b8e80941Smrg                                  void *cso)
2234b8e80941Smrg{
2235b8e80941Smrg        struct panfrost_context *ctx = pan_context(pipe);
2236b8e80941Smrg        struct pipe_depth_stencil_alpha_state *depth_stencil = cso;
2237b8e80941Smrg        ctx->depth_stencil = depth_stencil;
2238b8e80941Smrg
2239b8e80941Smrg        if (!depth_stencil)
2240b8e80941Smrg                return;
2241b8e80941Smrg
2242b8e80941Smrg        /* Alpha does not exist in the hardware (it's not in ES3), so it's
2243b8e80941Smrg         * emulated in the fragment shader */
2244b8e80941Smrg
2245b8e80941Smrg        if (depth_stencil->alpha.enabled) {
2246b8e80941Smrg                /* We need to trigger a new shader (maybe) */
2247b8e80941Smrg                ctx->base.bind_fs_state(&ctx->base, ctx->fs);
2248b8e80941Smrg        }
2249b8e80941Smrg
2250b8e80941Smrg        /* Stencil state */
2251b8e80941Smrg        SET_BIT(ctx->fragment_shader_core.unknown2_4, MALI_STENCIL_TEST, depth_stencil->stencil[0].enabled); /* XXX: which one? */
2252b8e80941Smrg
2253b8e80941Smrg        panfrost_make_stencil_state(&depth_stencil->stencil[0], &ctx->fragment_shader_core.stencil_front);
2254b8e80941Smrg        ctx->fragment_shader_core.stencil_mask_front = depth_stencil->stencil[0].writemask;
2255b8e80941Smrg
2256b8e80941Smrg        panfrost_make_stencil_state(&depth_stencil->stencil[1], &ctx->fragment_shader_core.stencil_back);
2257b8e80941Smrg        ctx->fragment_shader_core.stencil_mask_back = depth_stencil->stencil[1].writemask;
2258b8e80941Smrg
2259b8e80941Smrg        /* Depth state (TODO: Refactor) */
2260b8e80941Smrg        SET_BIT(ctx->fragment_shader_core.unknown2_3, MALI_DEPTH_TEST, depth_stencil->depth.enabled);
2261b8e80941Smrg
2262b8e80941Smrg        int func = depth_stencil->depth.enabled ? depth_stencil->depth.func : PIPE_FUNC_ALWAYS;
2263b8e80941Smrg
2264b8e80941Smrg        ctx->fragment_shader_core.unknown2_3 &= ~MALI_DEPTH_FUNC_MASK;
2265b8e80941Smrg        ctx->fragment_shader_core.unknown2_3 |= MALI_DEPTH_FUNC(panfrost_translate_compare_func(func));
2266b8e80941Smrg
2267b8e80941Smrg        /* Bounds test not implemented */
2268b8e80941Smrg        assert(!depth_stencil->depth.bounds_test);
2269b8e80941Smrg
2270b8e80941Smrg        ctx->dirty |= PAN_DIRTY_FS;
2271b8e80941Smrg}
2272b8e80941Smrg
2273b8e80941Smrgstatic void
2274b8e80941Smrgpanfrost_delete_depth_stencil_state(struct pipe_context *pipe, void *depth)
2275b8e80941Smrg{
2276b8e80941Smrg        free( depth );
2277b8e80941Smrg}
2278b8e80941Smrg
2279b8e80941Smrgstatic void
2280b8e80941Smrgpanfrost_set_sample_mask(struct pipe_context *pipe,
2281b8e80941Smrg                         unsigned sample_mask)
2282b8e80941Smrg{
2283b8e80941Smrg}
2284b8e80941Smrg
2285b8e80941Smrgstatic void
2286b8e80941Smrgpanfrost_set_clip_state(struct pipe_context *pipe,
2287b8e80941Smrg                        const struct pipe_clip_state *clip)
2288b8e80941Smrg{
2289b8e80941Smrg        //struct panfrost_context *panfrost = pan_context(pipe);
2290b8e80941Smrg}
2291b8e80941Smrg
2292b8e80941Smrgstatic void
2293b8e80941Smrgpanfrost_set_viewport_states(struct pipe_context *pipe,
2294b8e80941Smrg                             unsigned start_slot,
2295b8e80941Smrg                             unsigned num_viewports,
2296b8e80941Smrg                             const struct pipe_viewport_state *viewports)
2297b8e80941Smrg{
2298b8e80941Smrg        struct panfrost_context *ctx = pan_context(pipe);
2299b8e80941Smrg
2300b8e80941Smrg        assert(start_slot == 0);
2301b8e80941Smrg        assert(num_viewports == 1);
2302b8e80941Smrg
2303b8e80941Smrg        ctx->pipe_viewport = *viewports;
2304b8e80941Smrg
2305b8e80941Smrg#if 0
2306b8e80941Smrg        /* TODO: What if not centered? */
2307b8e80941Smrg        float w = abs(viewports->scale[0]) * 2.0;
2308b8e80941Smrg        float h = abs(viewports->scale[1]) * 2.0;
2309b8e80941Smrg
2310b8e80941Smrg        ctx->viewport.viewport1[0] = MALI_POSITIVE((int) w);
2311b8e80941Smrg        ctx->viewport.viewport1[1] = MALI_POSITIVE((int) h);
2312b8e80941Smrg#endif
2313b8e80941Smrg}
2314b8e80941Smrg
2315b8e80941Smrgstatic void
2316b8e80941Smrgpanfrost_set_scissor_states(struct pipe_context *pipe,
2317b8e80941Smrg                            unsigned start_slot,
2318b8e80941Smrg                            unsigned num_scissors,
2319b8e80941Smrg                            const struct pipe_scissor_state *scissors)
2320b8e80941Smrg{
2321b8e80941Smrg        struct panfrost_context *ctx = pan_context(pipe);
2322b8e80941Smrg
2323b8e80941Smrg        assert(start_slot == 0);
2324b8e80941Smrg        assert(num_scissors == 1);
2325b8e80941Smrg
2326b8e80941Smrg        ctx->scissor = *scissors;
2327b8e80941Smrg}
2328b8e80941Smrg
2329b8e80941Smrgstatic void
2330b8e80941Smrgpanfrost_set_polygon_stipple(struct pipe_context *pipe,
2331b8e80941Smrg                             const struct pipe_poly_stipple *stipple)
2332b8e80941Smrg{
2333b8e80941Smrg        //struct panfrost_context *panfrost = pan_context(pipe);
2334b8e80941Smrg}
2335b8e80941Smrg
2336b8e80941Smrgstatic void
2337b8e80941Smrgpanfrost_set_active_query_state(struct pipe_context *pipe,
2338b8e80941Smrg                                boolean enable)
2339b8e80941Smrg{
2340b8e80941Smrg        //struct panfrost_context *panfrost = pan_context(pipe);
2341b8e80941Smrg}
2342b8e80941Smrg
2343b8e80941Smrgstatic void
2344b8e80941Smrgpanfrost_destroy(struct pipe_context *pipe)
2345b8e80941Smrg{
2346b8e80941Smrg        struct panfrost_context *panfrost = pan_context(pipe);
2347b8e80941Smrg        struct panfrost_screen *screen = pan_screen(pipe->screen);
2348b8e80941Smrg
2349b8e80941Smrg        if (panfrost->blitter)
2350b8e80941Smrg                util_blitter_destroy(panfrost->blitter);
2351b8e80941Smrg
2352b8e80941Smrg        screen->driver->free_slab(screen, &panfrost->scratchpad);
2353b8e80941Smrg        screen->driver->free_slab(screen, &panfrost->varying_mem);
2354b8e80941Smrg        screen->driver->free_slab(screen, &panfrost->shaders);
2355b8e80941Smrg        screen->driver->free_slab(screen, &panfrost->tiler_heap);
2356b8e80941Smrg        screen->driver->free_slab(screen, &panfrost->misc_0);
2357b8e80941Smrg}
2358b8e80941Smrg
2359b8e80941Smrgstatic struct pipe_query *
2360b8e80941Smrgpanfrost_create_query(struct pipe_context *pipe,
2361b8e80941Smrg		      unsigned type,
2362b8e80941Smrg		      unsigned index)
2363b8e80941Smrg{
2364b8e80941Smrg        struct panfrost_query *q = CALLOC_STRUCT(panfrost_query);
2365b8e80941Smrg
2366b8e80941Smrg        q->type = type;
2367b8e80941Smrg        q->index = index;
2368b8e80941Smrg
2369b8e80941Smrg        return (struct pipe_query *) q;
2370b8e80941Smrg}
2371b8e80941Smrg
2372b8e80941Smrgstatic void
2373b8e80941Smrgpanfrost_destroy_query(struct pipe_context *pipe, struct pipe_query *q)
2374b8e80941Smrg{
2375b8e80941Smrg        FREE(q);
2376b8e80941Smrg}
2377b8e80941Smrg
2378b8e80941Smrgstatic boolean
2379b8e80941Smrgpanfrost_begin_query(struct pipe_context *pipe, struct pipe_query *q)
2380b8e80941Smrg{
2381b8e80941Smrg        struct panfrost_context *ctx = pan_context(pipe);
2382b8e80941Smrg        struct panfrost_query *query = (struct panfrost_query *) q;
2383b8e80941Smrg
2384b8e80941Smrg        switch (query->type) {
2385b8e80941Smrg                case PIPE_QUERY_OCCLUSION_COUNTER:
2386b8e80941Smrg                case PIPE_QUERY_OCCLUSION_PREDICATE:
2387b8e80941Smrg                case PIPE_QUERY_OCCLUSION_PREDICATE_CONSERVATIVE:
2388b8e80941Smrg                {
2389b8e80941Smrg                        /* Allocate a word for the query results to be stored */
2390b8e80941Smrg                        query->transfer = panfrost_allocate_chunk(ctx, sizeof(unsigned), HEAP_DESCRIPTOR);
2391b8e80941Smrg
2392b8e80941Smrg                        ctx->occlusion_query = query;
2393b8e80941Smrg
2394b8e80941Smrg                        break;
2395b8e80941Smrg                }
2396b8e80941Smrg
2397b8e80941Smrg                default:
2398b8e80941Smrg                        DBG("Skipping query %d\n", query->type);
2399b8e80941Smrg                        break;
2400b8e80941Smrg        }
2401b8e80941Smrg
2402b8e80941Smrg        return true;
2403b8e80941Smrg}
2404b8e80941Smrg
2405b8e80941Smrgstatic bool
2406b8e80941Smrgpanfrost_end_query(struct pipe_context *pipe, struct pipe_query *q)
2407b8e80941Smrg{
2408b8e80941Smrg        struct panfrost_context *ctx = pan_context(pipe);
2409b8e80941Smrg        ctx->occlusion_query = NULL;
2410b8e80941Smrg        return true;
2411b8e80941Smrg}
2412b8e80941Smrg
2413b8e80941Smrgstatic boolean
2414b8e80941Smrgpanfrost_get_query_result(struct pipe_context *pipe,
2415b8e80941Smrg                          struct pipe_query *q,
2416b8e80941Smrg                          boolean wait,
2417b8e80941Smrg                          union pipe_query_result *vresult)
2418b8e80941Smrg{
2419b8e80941Smrg        /* STUB */
2420b8e80941Smrg        struct panfrost_query *query = (struct panfrost_query *) q;
2421b8e80941Smrg
2422b8e80941Smrg        /* We need to flush out the jobs to actually run the counter, TODO
2423b8e80941Smrg         * check wait, TODO wallpaper after if needed */
2424b8e80941Smrg
2425b8e80941Smrg        panfrost_flush(pipe, NULL, PIPE_FLUSH_END_OF_FRAME);
2426b8e80941Smrg
2427b8e80941Smrg        switch (query->type) {
2428b8e80941Smrg                case PIPE_QUERY_OCCLUSION_COUNTER:
2429b8e80941Smrg                case PIPE_QUERY_OCCLUSION_PREDICATE:
2430b8e80941Smrg                case PIPE_QUERY_OCCLUSION_PREDICATE_CONSERVATIVE: {
2431b8e80941Smrg                        /* Read back the query results */
2432b8e80941Smrg                        unsigned *result = (unsigned *) query->transfer.cpu;
2433b8e80941Smrg                        unsigned passed = *result;
2434b8e80941Smrg
2435b8e80941Smrg                        if (query->type == PIPE_QUERY_OCCLUSION_COUNTER) {
2436b8e80941Smrg                                vresult->u64 = passed;
2437b8e80941Smrg                        } else {
2438b8e80941Smrg                                vresult->b = !!passed;
2439b8e80941Smrg                        }
2440b8e80941Smrg
2441b8e80941Smrg                        break;
2442b8e80941Smrg                }
2443b8e80941Smrg                default:
2444b8e80941Smrg                        DBG("Skipped query get %d\n", query->type);
2445b8e80941Smrg                        break;
2446b8e80941Smrg        }
2447b8e80941Smrg
2448b8e80941Smrg        return true;
2449b8e80941Smrg}
2450b8e80941Smrg
2451b8e80941Smrgstatic struct pipe_stream_output_target *
2452b8e80941Smrgpanfrost_create_stream_output_target(struct pipe_context *pctx,
2453b8e80941Smrg                                struct pipe_resource *prsc,
2454b8e80941Smrg                                unsigned buffer_offset,
2455b8e80941Smrg                                unsigned buffer_size)
2456b8e80941Smrg{
2457b8e80941Smrg        struct pipe_stream_output_target *target;
2458b8e80941Smrg
2459b8e80941Smrg        target = CALLOC_STRUCT(pipe_stream_output_target);
2460b8e80941Smrg
2461b8e80941Smrg        if (!target)
2462b8e80941Smrg                return NULL;
2463b8e80941Smrg
2464b8e80941Smrg        pipe_reference_init(&target->reference, 1);
2465b8e80941Smrg        pipe_resource_reference(&target->buffer, prsc);
2466b8e80941Smrg
2467b8e80941Smrg        target->context = pctx;
2468b8e80941Smrg        target->buffer_offset = buffer_offset;
2469b8e80941Smrg        target->buffer_size = buffer_size;
2470b8e80941Smrg
2471b8e80941Smrg        return target;
2472b8e80941Smrg}
2473b8e80941Smrg
2474b8e80941Smrgstatic void
2475b8e80941Smrgpanfrost_stream_output_target_destroy(struct pipe_context *pctx,
2476b8e80941Smrg                                 struct pipe_stream_output_target *target)
2477b8e80941Smrg{
2478b8e80941Smrg        pipe_resource_reference(&target->buffer, NULL);
2479b8e80941Smrg        free(target);
2480b8e80941Smrg}
2481b8e80941Smrg
2482b8e80941Smrgstatic void
2483b8e80941Smrgpanfrost_set_stream_output_targets(struct pipe_context *pctx,
2484b8e80941Smrg                              unsigned num_targets,
2485b8e80941Smrg                              struct pipe_stream_output_target **targets,
2486b8e80941Smrg                              const unsigned *offsets)
2487b8e80941Smrg{
2488b8e80941Smrg        /* STUB */
2489b8e80941Smrg}
2490b8e80941Smrg
2491b8e80941Smrgstatic void
2492b8e80941Smrgpanfrost_setup_hardware(struct panfrost_context *ctx)
2493b8e80941Smrg{
2494b8e80941Smrg        struct pipe_context *gallium = (struct pipe_context *) ctx;
2495b8e80941Smrg        struct panfrost_screen *screen = pan_screen(gallium->screen);
2496b8e80941Smrg
2497b8e80941Smrg        for (int i = 0; i < ARRAY_SIZE(ctx->transient_pools); ++i) {
2498b8e80941Smrg                /* Allocate the beginning of the transient pool */
2499b8e80941Smrg                int entry_size = (1 << 22); /* 4MB */
2500b8e80941Smrg
2501b8e80941Smrg                ctx->transient_pools[i].entry_size = entry_size;
2502b8e80941Smrg                ctx->transient_pools[i].entry_count = 1;
2503b8e80941Smrg
2504b8e80941Smrg                ctx->transient_pools[i].entries[0] = (struct panfrost_memory_entry *) pb_slab_alloc(&screen->slabs, entry_size, HEAP_TRANSIENT);
2505b8e80941Smrg        }
2506b8e80941Smrg
2507b8e80941Smrg        screen->driver->allocate_slab(screen, &ctx->scratchpad, 64, false, 0, 0, 0);
2508b8e80941Smrg        screen->driver->allocate_slab(screen, &ctx->varying_mem, 16384, false, PAN_ALLOCATE_INVISIBLE | PAN_ALLOCATE_COHERENT_LOCAL, 0, 0);
2509b8e80941Smrg        screen->driver->allocate_slab(screen, &ctx->shaders, 4096, true, PAN_ALLOCATE_EXECUTE, 0, 0);
2510b8e80941Smrg        screen->driver->allocate_slab(screen, &ctx->tiler_heap, 32768, false, PAN_ALLOCATE_INVISIBLE | PAN_ALLOCATE_GROWABLE, 1, 128);
2511b8e80941Smrg        screen->driver->allocate_slab(screen, &ctx->misc_0, 128*128, false, PAN_ALLOCATE_INVISIBLE | PAN_ALLOCATE_GROWABLE, 1, 128);
2512b8e80941Smrg
2513b8e80941Smrg}
2514b8e80941Smrg
2515b8e80941Smrg/* New context creation, which also does hardware initialisation since I don't
2516b8e80941Smrg * know the better way to structure this :smirk: */
2517b8e80941Smrg
2518b8e80941Smrgstruct pipe_context *
2519b8e80941Smrgpanfrost_create_context(struct pipe_screen *screen, void *priv, unsigned flags)
2520b8e80941Smrg{
2521b8e80941Smrg        struct panfrost_context *ctx = CALLOC_STRUCT(panfrost_context);
2522b8e80941Smrg        struct panfrost_screen *pscreen = pan_screen(screen);
2523b8e80941Smrg        memset(ctx, 0, sizeof(*ctx));
2524b8e80941Smrg        struct pipe_context *gallium = (struct pipe_context *) ctx;
2525b8e80941Smrg        unsigned gpu_id;
2526b8e80941Smrg
2527b8e80941Smrg        gpu_id = pscreen->driver->query_gpu_version(pscreen);
2528b8e80941Smrg
2529b8e80941Smrg        ctx->is_t6xx = gpu_id <= 0x0750; /* For now, this flag means T760 or less */
2530b8e80941Smrg        ctx->require_sfbd = gpu_id < 0x0750; /* T760 is the first to support MFBD */
2531b8e80941Smrg
2532b8e80941Smrg        gallium->screen = screen;
2533b8e80941Smrg
2534b8e80941Smrg        gallium->destroy = panfrost_destroy;
2535b8e80941Smrg
2536b8e80941Smrg        gallium->set_framebuffer_state = panfrost_set_framebuffer_state;
2537b8e80941Smrg
2538b8e80941Smrg        gallium->flush = panfrost_flush;
2539b8e80941Smrg        gallium->clear = panfrost_clear;
2540b8e80941Smrg        gallium->draw_vbo = panfrost_draw_vbo;
2541b8e80941Smrg
2542b8e80941Smrg        gallium->set_vertex_buffers = panfrost_set_vertex_buffers;
2543b8e80941Smrg        gallium->set_constant_buffer = panfrost_set_constant_buffer;
2544b8e80941Smrg
2545b8e80941Smrg        gallium->set_stencil_ref = panfrost_set_stencil_ref;
2546b8e80941Smrg
2547b8e80941Smrg        gallium->create_sampler_view = panfrost_create_sampler_view;
2548b8e80941Smrg        gallium->set_sampler_views = panfrost_set_sampler_views;
2549b8e80941Smrg        gallium->sampler_view_destroy = panfrost_sampler_view_destroy;
2550b8e80941Smrg
2551b8e80941Smrg        gallium->create_rasterizer_state = panfrost_create_rasterizer_state;
2552b8e80941Smrg        gallium->bind_rasterizer_state = panfrost_bind_rasterizer_state;
2553b8e80941Smrg        gallium->delete_rasterizer_state = panfrost_generic_cso_delete;
2554b8e80941Smrg
2555b8e80941Smrg        gallium->create_vertex_elements_state = panfrost_create_vertex_elements_state;
2556b8e80941Smrg        gallium->bind_vertex_elements_state = panfrost_bind_vertex_elements_state;
2557b8e80941Smrg        gallium->delete_vertex_elements_state = panfrost_delete_vertex_elements_state;
2558b8e80941Smrg
2559b8e80941Smrg        gallium->create_fs_state = panfrost_create_shader_state;
2560b8e80941Smrg        gallium->delete_fs_state = panfrost_delete_shader_state;
2561b8e80941Smrg        gallium->bind_fs_state = panfrost_bind_fs_state;
2562b8e80941Smrg
2563b8e80941Smrg        gallium->create_vs_state = panfrost_create_shader_state;
2564b8e80941Smrg        gallium->delete_vs_state = panfrost_delete_shader_state;
2565b8e80941Smrg        gallium->bind_vs_state = panfrost_bind_vs_state;
2566b8e80941Smrg
2567b8e80941Smrg        gallium->create_sampler_state = panfrost_create_sampler_state;
2568b8e80941Smrg        gallium->delete_sampler_state = panfrost_generic_cso_delete;
2569b8e80941Smrg        gallium->bind_sampler_states = panfrost_bind_sampler_states;
2570b8e80941Smrg
2571b8e80941Smrg        gallium->create_blend_state = panfrost_create_blend_state;
2572b8e80941Smrg        gallium->bind_blend_state   = panfrost_bind_blend_state;
2573b8e80941Smrg        gallium->delete_blend_state = panfrost_delete_blend_state;
2574b8e80941Smrg
2575b8e80941Smrg        gallium->set_blend_color = panfrost_set_blend_color;
2576b8e80941Smrg
2577b8e80941Smrg        gallium->create_depth_stencil_alpha_state = panfrost_create_depth_stencil_state;
2578b8e80941Smrg        gallium->bind_depth_stencil_alpha_state   = panfrost_bind_depth_stencil_state;
2579b8e80941Smrg        gallium->delete_depth_stencil_alpha_state = panfrost_delete_depth_stencil_state;
2580b8e80941Smrg
2581b8e80941Smrg        gallium->set_sample_mask = panfrost_set_sample_mask;
2582b8e80941Smrg
2583b8e80941Smrg        gallium->set_clip_state = panfrost_set_clip_state;
2584b8e80941Smrg        gallium->set_viewport_states = panfrost_set_viewport_states;
2585b8e80941Smrg        gallium->set_scissor_states = panfrost_set_scissor_states;
2586b8e80941Smrg        gallium->set_polygon_stipple = panfrost_set_polygon_stipple;
2587b8e80941Smrg        gallium->set_active_query_state = panfrost_set_active_query_state;
2588b8e80941Smrg
2589b8e80941Smrg        gallium->create_query = panfrost_create_query;
2590b8e80941Smrg        gallium->destroy_query = panfrost_destroy_query;
2591b8e80941Smrg        gallium->begin_query = panfrost_begin_query;
2592b8e80941Smrg        gallium->end_query = panfrost_end_query;
2593b8e80941Smrg        gallium->get_query_result = panfrost_get_query_result;
2594b8e80941Smrg
2595b8e80941Smrg        gallium->create_stream_output_target = panfrost_create_stream_output_target;
2596b8e80941Smrg        gallium->stream_output_target_destroy = panfrost_stream_output_target_destroy;
2597b8e80941Smrg        gallium->set_stream_output_targets = panfrost_set_stream_output_targets;
2598b8e80941Smrg
2599b8e80941Smrg        panfrost_resource_context_init(gallium);
2600b8e80941Smrg
2601b8e80941Smrg        pscreen->driver->init_context(ctx);
2602b8e80941Smrg
2603b8e80941Smrg        panfrost_setup_hardware(ctx);
2604b8e80941Smrg
2605b8e80941Smrg        /* XXX: leaks */
2606b8e80941Smrg        gallium->stream_uploader = u_upload_create_default(gallium);
2607b8e80941Smrg        gallium->const_uploader = gallium->stream_uploader;
2608b8e80941Smrg        assert(gallium->stream_uploader);
2609b8e80941Smrg
2610b8e80941Smrg        /* Midgard supports ES modes, plus QUADS/QUAD_STRIPS/POLYGON */
2611b8e80941Smrg        ctx->draw_modes = (1 << (PIPE_PRIM_POLYGON + 1)) - 1;
2612b8e80941Smrg
2613b8e80941Smrg        ctx->primconvert = util_primconvert_create(gallium, ctx->draw_modes);
2614b8e80941Smrg
2615b8e80941Smrg        ctx->blitter = util_blitter_create(gallium);
2616b8e80941Smrg        assert(ctx->blitter);
2617b8e80941Smrg
2618b8e80941Smrg        /* Prepare for render! */
2619b8e80941Smrg
2620b8e80941Smrg        panfrost_job_init(ctx);
2621b8e80941Smrg        panfrost_emit_vertex_payload(ctx);
2622b8e80941Smrg        panfrost_emit_tiler_payload(ctx);
2623b8e80941Smrg        panfrost_invalidate_frame(ctx);
2624b8e80941Smrg        panfrost_default_shader_backend(ctx);
2625b8e80941Smrg        panfrost_generate_space_filler_indices();
2626b8e80941Smrg
2627b8e80941Smrg        return gallium;
2628b8e80941Smrg}
2629