pan_context.c revision 9f464c52
1/*
2 * © Copyright 2018 Alyssa Rosenzweig
3 *
4 * Permission is hereby granted, free of charge, to any person obtaining a
5 * copy of this software and associated documentation files (the "Software"),
6 * to deal in the Software without restriction, including without limitation
7 * the rights to use, copy, modify, merge, publish, distribute, sublicense,
8 * and/or sell copies of the Software, and to permit persons to whom the
9 * Software is furnished to do so, subject to the following conditions:
10 *
11 * The above copyright notice and this permission notice (including the next
12 * paragraph) shall be included in all copies or substantial portions of the
13 * Software.
14 *
15 * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
16 * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
17 * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT.  IN NO EVENT SHALL
18 * THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
19 * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
20 * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
21 * SOFTWARE.
22 *
23 */
24
25#include <sys/poll.h>
26#include <errno.h>
27
28#include "pan_context.h"
29#include "pan_swizzle.h"
30#include "pan_format.h"
31
32#include "util/macros.h"
33#include "util/u_format.h"
34#include "util/u_inlines.h"
35#include "util/u_upload_mgr.h"
36#include "util/u_memory.h"
37#include "util/u_vbuf.h"
38#include "util/half_float.h"
39#include "util/u_helpers.h"
40#include "util/u_format.h"
41#include "indices/u_primconvert.h"
42#include "tgsi/tgsi_parse.h"
43#include "util/u_math.h"
44
45#include "pan_screen.h"
46#include "pan_blending.h"
47#include "pan_blend_shaders.h"
48#include "pan_util.h"
49#include "pan_wallpaper.h"
50
51static int performance_counter_number = 0;
52extern const char *pan_counters_base;
53
54/* Do not actually send anything to the GPU; merely generate the cmdstream as fast as possible. Disables framebuffer writes */
55//#define DRY_RUN
56
57/* Can a given format support AFBC? Not all can. */
58
59static bool
60panfrost_can_afbc(enum pipe_format format)
61{
62        const struct util_format_description *desc =
63                util_format_description(format);
64
65        if (util_format_is_rgba8_variant(desc))
66                return true;
67
68        /* TODO: AFBC of other formats */
69
70        return false;
71}
72
73/* AFBC is enabled on a per-resource basis (AFBC enabling is theoretically
74 * indepdent between color buffers and depth/stencil). To enable, we allocate
75 * the AFBC metadata buffer and mark that it is enabled. We do -not- actually
76 * edit the fragment job here. This routine should be called ONCE per
77 * AFBC-compressed buffer, rather than on every frame. */
78
79static void
80panfrost_enable_afbc(struct panfrost_context *ctx, struct panfrost_resource *rsrc, bool ds)
81{
82        if (ctx->require_sfbd) {
83                DBG("AFBC not supported yet on SFBD\n");
84                assert(0);
85        }
86
87        struct pipe_context *gallium = (struct pipe_context *) ctx;
88        struct panfrost_screen *screen = pan_screen(gallium->screen);
89       /* AFBC metadata is 16 bytes per tile */
90        int tile_w = (rsrc->base.width0 + (MALI_TILE_LENGTH - 1)) >> MALI_TILE_SHIFT;
91        int tile_h = (rsrc->base.height0 + (MALI_TILE_LENGTH - 1)) >> MALI_TILE_SHIFT;
92        int bytes_per_pixel = util_format_get_blocksize(rsrc->base.format);
93        int stride = bytes_per_pixel * ALIGN(rsrc->base.width0, 16);
94
95        stride *= 2;  /* TODO: Should this be carried over? */
96        int main_size = stride * rsrc->base.height0;
97        rsrc->bo->afbc_metadata_size = tile_w * tile_h * 16;
98
99        /* Allocate the AFBC slab itself, large enough to hold the above */
100        screen->driver->allocate_slab(screen, &rsrc->bo->afbc_slab,
101                               (rsrc->bo->afbc_metadata_size + main_size + 4095) / 4096,
102                               true, 0, 0, 0);
103
104        rsrc->bo->layout = PAN_AFBC;
105
106        /* Compressed textured reads use a tagged pointer to the metadata */
107
108        rsrc->bo->gpu = rsrc->bo->afbc_slab.gpu | (ds ? 0 : 1);
109        rsrc->bo->cpu = rsrc->bo->afbc_slab.cpu;
110        rsrc->bo->gem_handle = rsrc->bo->afbc_slab.gem_handle;
111}
112
113static void
114panfrost_enable_checksum(struct panfrost_context *ctx, struct panfrost_resource *rsrc)
115{
116        struct pipe_context *gallium = (struct pipe_context *) ctx;
117        struct panfrost_screen *screen = pan_screen(gallium->screen);
118        int tile_w = (rsrc->base.width0 + (MALI_TILE_LENGTH - 1)) >> MALI_TILE_SHIFT;
119        int tile_h = (rsrc->base.height0 + (MALI_TILE_LENGTH - 1)) >> MALI_TILE_SHIFT;
120
121        /* 8 byte checksum per tile */
122        rsrc->bo->checksum_stride = tile_w * 8;
123        int pages = (((rsrc->bo->checksum_stride * tile_h) + 4095) / 4096);
124        screen->driver->allocate_slab(screen, &rsrc->bo->checksum_slab, pages, false, 0, 0, 0);
125
126        rsrc->bo->has_checksum = true;
127}
128
129/* Framebuffer descriptor */
130
131static void
132panfrost_set_framebuffer_resolution(struct mali_single_framebuffer *fb, int w, int h)
133{
134        fb->width = MALI_POSITIVE(w);
135        fb->height = MALI_POSITIVE(h);
136
137        /* No idea why this is needed, but it's how resolution_check is
138         * calculated.  It's not clear to us yet why the hardware wants this.
139         * The formula itself was discovered mostly by manual bruteforce and
140         * aggressive algebraic simplification. */
141
142        fb->resolution_check = ((w + h) / 3) << 4;
143}
144
145struct mali_single_framebuffer
146panfrost_emit_sfbd(struct panfrost_context *ctx)
147{
148        struct mali_single_framebuffer framebuffer = {
149                .unknown2 = 0x1f,
150                .format = 0x30000000,
151                .clear_flags = 0x1000,
152                .unknown_address_0 = ctx->scratchpad.gpu,
153                .unknown_address_1 = ctx->misc_0.gpu,
154                .unknown_address_2 = ctx->misc_0.gpu + 40960,
155                .tiler_flags = 0xf0,
156                .tiler_heap_free = ctx->tiler_heap.gpu,
157                .tiler_heap_end = ctx->tiler_heap.gpu + ctx->tiler_heap.size,
158        };
159
160        panfrost_set_framebuffer_resolution(&framebuffer, ctx->pipe_framebuffer.width, ctx->pipe_framebuffer.height);
161
162        return framebuffer;
163}
164
165struct bifrost_framebuffer
166panfrost_emit_mfbd(struct panfrost_context *ctx)
167{
168        struct bifrost_framebuffer framebuffer = {
169                /* It is not yet clear what tiler_meta means or how it's
170                 * calculated, but we can tell the lower 32-bits are a
171                 * (monotonically increasing?) function of tile count and
172                 * geometry complexity; I suspect it defines a memory size of
173                 * some kind? for the tiler. It's really unclear at the
174                 * moment... but to add to the confusion, the hardware is happy
175                 * enough to accept a zero in this field, so we don't even have
176                 * to worry about it right now.
177                 *
178                 * The byte (just after the 32-bit mark) is much more
179                 * interesting. The higher nibble I've only ever seen as 0xF,
180                 * but the lower one I've seen as 0x0 or 0xF, and it's not
181                 * obvious what the difference is. But what -is- obvious is
182                 * that when the lower nibble is zero, performance is severely
183                 * degraded compared to when the lower nibble is set.
184                 * Evidently, that nibble enables some sort of fast path,
185                 * perhaps relating to caching or tile flush? Regardless, at
186                 * this point there's no clear reason not to set it, aside from
187                 * substantially increased memory requirements (of the misc_0
188                 * buffer) */
189
190                .tiler_meta = ((uint64_t) 0xff << 32) | 0x0,
191
192                .width1 = MALI_POSITIVE(ctx->pipe_framebuffer.width),
193                .height1 = MALI_POSITIVE(ctx->pipe_framebuffer.height),
194                .width2 = MALI_POSITIVE(ctx->pipe_framebuffer.width),
195                .height2 = MALI_POSITIVE(ctx->pipe_framebuffer.height),
196
197                .unk1 = 0x1080,
198
199                /* TODO: MRT */
200                .rt_count_1 = MALI_POSITIVE(1),
201                .rt_count_2 = 4,
202
203                .unknown2 = 0x1f,
204
205                /* Corresponds to unknown_address_X of SFBD */
206                .scratchpad = ctx->scratchpad.gpu,
207                .tiler_scratch_start  = ctx->misc_0.gpu,
208
209                /* The constant added here is, like the lower word of
210                 * tiler_meta, (loosely) another product of framebuffer size
211                 * and geometry complexity. It must be sufficiently large for
212                 * the tiler_meta fast path to work; if it's too small, there
213                 * will be DATA_INVALID_FAULTs. Conversely, it must be less
214                 * than the total size of misc_0, or else there's no room. It's
215                 * possible this constant configures a partition between two
216                 * parts of misc_0? We haven't investigated the functionality,
217                 * as these buffers are internally used by the hardware
218                 * (presumably by the tiler) but not seemingly touched by the driver
219                 */
220
221                .tiler_scratch_middle = ctx->misc_0.gpu + 0xf0000,
222
223                .tiler_heap_start = ctx->tiler_heap.gpu,
224                .tiler_heap_end = ctx->tiler_heap.gpu + ctx->tiler_heap.size,
225        };
226
227        return framebuffer;
228}
229
230/* Are we currently rendering to the screen (rather than an FBO)? */
231
232bool
233panfrost_is_scanout(struct panfrost_context *ctx)
234{
235        /* If there is no color buffer, it's an FBO */
236        if (!ctx->pipe_framebuffer.nr_cbufs)
237                return false;
238
239        /* If we're too early that no framebuffer was sent, it's scanout */
240        if (!ctx->pipe_framebuffer.cbufs[0])
241                return true;
242
243        return ctx->pipe_framebuffer.cbufs[0]->texture->bind & PIPE_BIND_DISPLAY_TARGET ||
244               ctx->pipe_framebuffer.cbufs[0]->texture->bind & PIPE_BIND_SCANOUT ||
245               ctx->pipe_framebuffer.cbufs[0]->texture->bind & PIPE_BIND_SHARED;
246}
247
248static uint32_t
249pan_pack_color(const union pipe_color_union *color, enum pipe_format format)
250{
251        /* Alpha magicked to 1.0 if there is no alpha */
252
253        bool has_alpha = util_format_has_alpha(format);
254        float clear_alpha = has_alpha ? color->f[3] : 1.0f;
255
256        /* Packed color depends on the framebuffer format */
257
258        const struct util_format_description *desc =
259                util_format_description(format);
260
261        if (util_format_is_rgba8_variant(desc)) {
262                return (float_to_ubyte(clear_alpha) << 24) |
263                       (float_to_ubyte(color->f[2]) << 16) |
264                       (float_to_ubyte(color->f[1]) <<  8) |
265                       (float_to_ubyte(color->f[0]) <<  0);
266        } else if (format == PIPE_FORMAT_B5G6R5_UNORM) {
267                /* First, we convert the components to R5, G6, B5 separately */
268                unsigned r5 = CLAMP(color->f[0], 0.0, 1.0) * 31.0;
269                unsigned g6 = CLAMP(color->f[1], 0.0, 1.0) * 63.0;
270                unsigned b5 = CLAMP(color->f[2], 0.0, 1.0) * 31.0;
271
272                /* Then we pack into a sparse u32. TODO: Why these shifts? */
273                return (b5 << 25) | (g6 << 14) | (r5 << 5);
274        } else {
275                /* Unknown format */
276                assert(0);
277        }
278
279        return 0;
280}
281
282static void
283panfrost_clear(
284        struct pipe_context *pipe,
285        unsigned buffers,
286        const union pipe_color_union *color,
287        double depth, unsigned stencil)
288{
289        struct panfrost_context *ctx = pan_context(pipe);
290        struct panfrost_job *job = panfrost_get_job_for_fbo(ctx);
291
292        if (buffers & PIPE_CLEAR_COLOR) {
293                enum pipe_format format = ctx->pipe_framebuffer.cbufs[0]->format;
294                job->clear_color = pan_pack_color(color, format);
295        }
296
297        if (buffers & PIPE_CLEAR_DEPTH) {
298                job->clear_depth = depth;
299        }
300
301        if (buffers & PIPE_CLEAR_STENCIL) {
302                job->clear_stencil = stencil;
303        }
304
305        job->clear |= buffers;
306}
307
308static mali_ptr
309panfrost_attach_vt_mfbd(struct panfrost_context *ctx)
310{
311        /* MFBD needs a sequential semi-render target upload, but what exactly this is, is beyond me for now */
312        struct bifrost_render_target rts_list[] = {
313                {
314                        .chunknown = {
315                                .unk = 0x30005,
316                        },
317                        .framebuffer = ctx->misc_0.gpu,
318                        .zero2 = 0x3,
319                },
320        };
321
322        /* Allocate memory for the three components */
323        int size = 1024 + sizeof(ctx->vt_framebuffer_mfbd) + sizeof(rts_list);
324        struct panfrost_transfer transfer = panfrost_allocate_transient(ctx, size);
325
326        /* Opaque 1024-block */
327        rts_list[0].chunknown.pointer = transfer.gpu;
328
329        memcpy(transfer.cpu + 1024, &ctx->vt_framebuffer_mfbd, sizeof(ctx->vt_framebuffer_mfbd));
330        memcpy(transfer.cpu + 1024 + sizeof(ctx->vt_framebuffer_mfbd), rts_list, sizeof(rts_list));
331
332        return (transfer.gpu + 1024) | MALI_MFBD;
333}
334
335static mali_ptr
336panfrost_attach_vt_sfbd(struct panfrost_context *ctx)
337{
338        return panfrost_upload_transient(ctx, &ctx->vt_framebuffer_sfbd, sizeof(ctx->vt_framebuffer_sfbd)) | MALI_SFBD;
339}
340
341static void
342panfrost_attach_vt_framebuffer(struct panfrost_context *ctx)
343{
344        mali_ptr framebuffer = ctx->require_sfbd ?
345                panfrost_attach_vt_sfbd(ctx) :
346                panfrost_attach_vt_mfbd(ctx);
347
348        ctx->payload_vertex.postfix.framebuffer = framebuffer;
349        ctx->payload_tiler.postfix.framebuffer = framebuffer;
350}
351
352/* Reset per-frame context, called on context initialisation as well as after
353 * flushing a frame */
354
355static void
356panfrost_invalidate_frame(struct panfrost_context *ctx)
357{
358        unsigned transient_count = ctx->transient_pools[ctx->cmdstream_i].entry_index*ctx->transient_pools[0].entry_size + ctx->transient_pools[ctx->cmdstream_i].entry_offset;
359	DBG("Uploaded transient %d bytes\n", transient_count);
360
361        /* Rotate cmdstream */
362        if ((++ctx->cmdstream_i) == (sizeof(ctx->transient_pools) / sizeof(ctx->transient_pools[0])))
363                ctx->cmdstream_i = 0;
364
365        if (ctx->require_sfbd)
366                ctx->vt_framebuffer_sfbd = panfrost_emit_sfbd(ctx);
367        else
368                ctx->vt_framebuffer_mfbd = panfrost_emit_mfbd(ctx);
369
370        /* Reset varyings allocated */
371        ctx->varying_height = 0;
372
373        /* The transient cmdstream is dirty every frame; the only bits worth preserving
374         * (textures, shaders, etc) are in other buffers anyways */
375
376        ctx->transient_pools[ctx->cmdstream_i].entry_index = 0;
377        ctx->transient_pools[ctx->cmdstream_i].entry_offset = 0;
378
379        /* Regenerate payloads */
380        panfrost_attach_vt_framebuffer(ctx);
381
382        if (ctx->rasterizer)
383                ctx->dirty |= PAN_DIRTY_RASTERIZER;
384
385        /* XXX */
386        ctx->dirty |= PAN_DIRTY_SAMPLERS | PAN_DIRTY_TEXTURES;
387}
388
389/* In practice, every field of these payloads should be configurable
390 * arbitrarily, which means these functions are basically catch-all's for
391 * as-of-yet unwavering unknowns */
392
393static void
394panfrost_emit_vertex_payload(struct panfrost_context *ctx)
395{
396        struct midgard_payload_vertex_tiler payload = {
397                .prefix = {
398                        .workgroups_z_shift = 32,
399                        .workgroups_x_shift_2 = 0x2,
400                        .workgroups_x_shift_3 = 0x5,
401                },
402		.gl_enables = 0x4 | (ctx->is_t6xx ? 0 : 0x2),
403        };
404
405        memcpy(&ctx->payload_vertex, &payload, sizeof(payload));
406}
407
408static void
409panfrost_emit_tiler_payload(struct panfrost_context *ctx)
410{
411        struct midgard_payload_vertex_tiler payload = {
412                .prefix = {
413                        .workgroups_z_shift = 32,
414                        .workgroups_x_shift_2 = 0x2,
415                        .workgroups_x_shift_3 = 0x6,
416
417                        .zero1 = 0xffff, /* Why is this only seen on test-quad-textured? */
418                },
419        };
420
421        memcpy(&ctx->payload_tiler, &payload, sizeof(payload));
422}
423
424static unsigned
425translate_tex_wrap(enum pipe_tex_wrap w)
426{
427        switch (w) {
428        case PIPE_TEX_WRAP_REPEAT:
429                return MALI_WRAP_REPEAT;
430
431        case PIPE_TEX_WRAP_CLAMP_TO_EDGE:
432                return MALI_WRAP_CLAMP_TO_EDGE;
433
434        case PIPE_TEX_WRAP_CLAMP_TO_BORDER:
435                return MALI_WRAP_CLAMP_TO_BORDER;
436
437        case PIPE_TEX_WRAP_MIRROR_REPEAT:
438                return MALI_WRAP_MIRRORED_REPEAT;
439
440        default:
441                assert(0);
442                return 0;
443        }
444}
445
446static unsigned
447translate_tex_filter(enum pipe_tex_filter f)
448{
449        switch (f) {
450        case PIPE_TEX_FILTER_NEAREST:
451                return MALI_NEAREST;
452
453        case PIPE_TEX_FILTER_LINEAR:
454                return MALI_LINEAR;
455
456        default:
457                assert(0);
458                return 0;
459        }
460}
461
462static unsigned
463translate_mip_filter(enum pipe_tex_mipfilter f)
464{
465        return (f == PIPE_TEX_MIPFILTER_LINEAR) ? MALI_MIP_LINEAR : 0;
466}
467
468static unsigned
469panfrost_translate_compare_func(enum pipe_compare_func in)
470{
471        switch (in) {
472        case PIPE_FUNC_NEVER:
473                return MALI_FUNC_NEVER;
474
475        case PIPE_FUNC_LESS:
476                return MALI_FUNC_LESS;
477
478        case PIPE_FUNC_EQUAL:
479                return MALI_FUNC_EQUAL;
480
481        case PIPE_FUNC_LEQUAL:
482                return MALI_FUNC_LEQUAL;
483
484        case PIPE_FUNC_GREATER:
485                return MALI_FUNC_GREATER;
486
487        case PIPE_FUNC_NOTEQUAL:
488                return MALI_FUNC_NOTEQUAL;
489
490        case PIPE_FUNC_GEQUAL:
491                return MALI_FUNC_GEQUAL;
492
493        case PIPE_FUNC_ALWAYS:
494                return MALI_FUNC_ALWAYS;
495        }
496
497        assert (0);
498        return 0; /* Unreachable */
499}
500
501static unsigned
502panfrost_translate_alt_compare_func(enum pipe_compare_func in)
503{
504        switch (in) {
505        case PIPE_FUNC_NEVER:
506                return MALI_ALT_FUNC_NEVER;
507
508        case PIPE_FUNC_LESS:
509                return MALI_ALT_FUNC_LESS;
510
511        case PIPE_FUNC_EQUAL:
512                return MALI_ALT_FUNC_EQUAL;
513
514        case PIPE_FUNC_LEQUAL:
515                return MALI_ALT_FUNC_LEQUAL;
516
517        case PIPE_FUNC_GREATER:
518                return MALI_ALT_FUNC_GREATER;
519
520        case PIPE_FUNC_NOTEQUAL:
521                return MALI_ALT_FUNC_NOTEQUAL;
522
523        case PIPE_FUNC_GEQUAL:
524                return MALI_ALT_FUNC_GEQUAL;
525
526        case PIPE_FUNC_ALWAYS:
527                return MALI_ALT_FUNC_ALWAYS;
528        }
529
530        assert (0);
531        return 0; /* Unreachable */
532}
533
534static unsigned
535panfrost_translate_stencil_op(enum pipe_stencil_op in)
536{
537        switch (in) {
538        case PIPE_STENCIL_OP_KEEP:
539                return MALI_STENCIL_KEEP;
540
541        case PIPE_STENCIL_OP_ZERO:
542                return MALI_STENCIL_ZERO;
543
544        case PIPE_STENCIL_OP_REPLACE:
545                return MALI_STENCIL_REPLACE;
546
547        case PIPE_STENCIL_OP_INCR:
548                return MALI_STENCIL_INCR;
549
550        case PIPE_STENCIL_OP_DECR:
551                return MALI_STENCIL_DECR;
552
553        case PIPE_STENCIL_OP_INCR_WRAP:
554                return MALI_STENCIL_INCR_WRAP;
555
556        case PIPE_STENCIL_OP_DECR_WRAP:
557                return MALI_STENCIL_DECR_WRAP;
558
559        case PIPE_STENCIL_OP_INVERT:
560                return MALI_STENCIL_INVERT;
561        }
562
563        assert (0);
564        return 0; /* Unreachable */
565}
566
567static void
568panfrost_make_stencil_state(const struct pipe_stencil_state *in, struct mali_stencil_test *out)
569{
570        out->ref = 0; /* Gallium gets it from elsewhere */
571
572        out->mask = in->valuemask;
573        out->func = panfrost_translate_compare_func(in->func);
574        out->sfail = panfrost_translate_stencil_op(in->fail_op);
575        out->dpfail = panfrost_translate_stencil_op(in->zfail_op);
576        out->dppass = panfrost_translate_stencil_op(in->zpass_op);
577}
578
579static void
580panfrost_default_shader_backend(struct panfrost_context *ctx)
581{
582        struct mali_shader_meta shader = {
583                .alpha_coverage = ~MALI_ALPHA_COVERAGE(0.000000),
584
585                .unknown2_3 = MALI_DEPTH_FUNC(MALI_FUNC_ALWAYS) | 0x3010,
586                .unknown2_4 = MALI_NO_MSAA | 0x4e0,
587        };
588
589	if (ctx->is_t6xx) {
590                shader.unknown2_4 |= 0x10;
591	}
592
593        struct pipe_stencil_state default_stencil = {
594                .enabled = 0,
595                .func = PIPE_FUNC_ALWAYS,
596                .fail_op = MALI_STENCIL_KEEP,
597                .zfail_op = MALI_STENCIL_KEEP,
598                .zpass_op = MALI_STENCIL_KEEP,
599                .writemask = 0xFF,
600                .valuemask = 0xFF
601        };
602
603        panfrost_make_stencil_state(&default_stencil, &shader.stencil_front);
604        shader.stencil_mask_front = default_stencil.writemask;
605
606        panfrost_make_stencil_state(&default_stencil, &shader.stencil_back);
607        shader.stencil_mask_back = default_stencil.writemask;
608
609        if (default_stencil.enabled)
610                shader.unknown2_4 |= MALI_STENCIL_TEST;
611
612        memcpy(&ctx->fragment_shader_core, &shader, sizeof(shader));
613}
614
615/* Generates a vertex/tiler job. This is, in some sense, the heart of the
616 * graphics command stream. It should be called once per draw, accordding to
617 * presentations. Set is_tiler for "tiler" jobs (fragment shader jobs, but in
618 * Mali parlance, "fragment" refers to framebuffer writeout). Clear it for
619 * vertex jobs. */
620
621struct panfrost_transfer
622panfrost_vertex_tiler_job(struct panfrost_context *ctx, bool is_tiler, bool is_elided_tiler)
623{
624        /* Each draw call corresponds to two jobs, and we want to offset to leave room for the set-value job */
625        int draw_job_index = 1 + (2 * ctx->draw_count);
626
627        struct mali_job_descriptor_header job = {
628                .job_type = is_tiler ? JOB_TYPE_TILER : JOB_TYPE_VERTEX,
629                .job_index = draw_job_index + (is_tiler ? 1 : 0),
630#ifdef __LP64__
631                .job_descriptor_size = 1,
632#endif
633        };
634
635        /* Only non-elided tiler jobs have dependencies which are known at this point */
636
637        if (is_tiler && !is_elided_tiler) {
638                /* Tiler jobs depend on vertex jobs */
639
640                job.job_dependency_index_1 = draw_job_index;
641
642                /* Tiler jobs also depend on the previous tiler job */
643
644                if (ctx->draw_count)
645                        job.job_dependency_index_2 = draw_job_index - 1;
646        }
647
648        struct midgard_payload_vertex_tiler *payload = is_tiler ? &ctx->payload_tiler : &ctx->payload_vertex;
649
650        /* There's some padding hacks on 32-bit */
651
652#ifdef __LP64__
653        int offset = 0;
654#else
655        int offset = 4;
656#endif
657        struct panfrost_transfer transfer = panfrost_allocate_transient(ctx, sizeof(job) + sizeof(*payload));
658        memcpy(transfer.cpu, &job, sizeof(job));
659        memcpy(transfer.cpu + sizeof(job) - offset, payload, sizeof(*payload));
660        return transfer;
661}
662
663/* Generates a set value job. It's unclear what exactly this does, why it's
664 * necessary, and when to call it. */
665
666static void
667panfrost_set_value_job(struct panfrost_context *ctx)
668{
669        struct mali_job_descriptor_header job = {
670                .job_type = JOB_TYPE_SET_VALUE,
671                .job_descriptor_size = 1,
672                .job_index = 1 + (2 * ctx->draw_count),
673        };
674
675        struct mali_payload_set_value payload = {
676                .out = ctx->misc_0.gpu,
677                .unknown = 0x3,
678        };
679
680        struct panfrost_transfer transfer = panfrost_allocate_transient(ctx, sizeof(job) + sizeof(payload));
681        memcpy(transfer.cpu, &job, sizeof(job));
682        memcpy(transfer.cpu + sizeof(job), &payload, sizeof(payload));
683
684        ctx->u_set_value_job = (struct mali_job_descriptor_header *) transfer.cpu;
685        ctx->set_value_job = transfer.gpu;
686}
687
688static mali_ptr
689panfrost_emit_varyings(
690                struct panfrost_context *ctx,
691                union mali_attr *slot,
692                unsigned stride,
693                unsigned count)
694{
695        mali_ptr varying_address = ctx->varying_mem.gpu + ctx->varying_height;
696
697        /* Fill out the descriptor */
698        slot->elements = varying_address | MALI_ATTR_LINEAR;
699        slot->stride = stride;
700        slot->size = stride * count;
701
702        ctx->varying_height += ALIGN(slot->size, 64);
703        assert(ctx->varying_height < ctx->varying_mem.size);
704
705        return varying_address;
706}
707
708static void
709panfrost_emit_point_coord(union mali_attr *slot)
710{
711        slot->elements = MALI_VARYING_POINT_COORD | MALI_ATTR_LINEAR;
712        slot->stride = slot->size = 0;
713}
714
715static void
716panfrost_emit_varying_descriptor(
717                struct panfrost_context *ctx,
718                unsigned invocation_count)
719{
720        /* Load the shaders */
721
722        struct panfrost_shader_state *vs = &ctx->vs->variants[ctx->vs->active_variant];
723        struct panfrost_shader_state *fs = &ctx->fs->variants[ctx->fs->active_variant];
724
725        /* Allocate the varying descriptor */
726
727        size_t vs_size = sizeof(struct mali_attr_meta) * vs->tripipe->varying_count;
728        size_t fs_size = sizeof(struct mali_attr_meta) * fs->tripipe->varying_count;
729
730        struct panfrost_transfer trans = panfrost_allocate_transient(ctx,
731                        vs_size + fs_size);
732
733        memcpy(trans.cpu, vs->varyings, vs_size);
734        memcpy(trans.cpu + vs_size, fs->varyings, fs_size);
735
736        ctx->payload_vertex.postfix.varying_meta = trans.gpu;
737        ctx->payload_tiler.postfix.varying_meta = trans.gpu + vs_size;
738
739        /* Buffer indices must be in this order per our convention */
740        union mali_attr varyings[PIPE_MAX_ATTRIBS];
741        unsigned idx = 0;
742
743        /* General varyings -- use the VS's, since those are more likely to be
744         * accurate on desktop */
745
746        panfrost_emit_varyings(ctx, &varyings[idx++],
747                        vs->general_varying_stride, invocation_count);
748
749        /* fp32 vec4 gl_Position */
750        ctx->payload_tiler.postfix.position_varying =
751                panfrost_emit_varyings(ctx, &varyings[idx++],
752                                sizeof(float) * 4, invocation_count);
753
754
755        if (vs->writes_point_size || fs->reads_point_coord) {
756                /* fp16 vec1 gl_PointSize */
757                ctx->payload_tiler.primitive_size.pointer =
758                        panfrost_emit_varyings(ctx, &varyings[idx++],
759                                        2, invocation_count);
760        }
761
762        if (fs->reads_point_coord) {
763                /* Special descriptor */
764                panfrost_emit_point_coord(&varyings[idx++]);
765        }
766
767        mali_ptr varyings_p = panfrost_upload_transient(ctx, &varyings, idx * sizeof(union mali_attr));
768        ctx->payload_vertex.postfix.varyings = varyings_p;
769        ctx->payload_tiler.postfix.varyings = varyings_p;
770}
771
772static mali_ptr
773panfrost_vertex_buffer_address(struct panfrost_context *ctx, unsigned i)
774{
775        struct pipe_vertex_buffer *buf = &ctx->vertex_buffers[i];
776        struct panfrost_resource *rsrc = (struct panfrost_resource *) (buf->buffer.resource);
777
778        return rsrc->bo->gpu + buf->buffer_offset;
779}
780
781/* Emits attributes and varying descriptors, which should be called every draw,
782 * excepting some obscure circumstances */
783
784static void
785panfrost_emit_vertex_data(struct panfrost_context *ctx, struct panfrost_job *job)
786{
787        /* Staged mali_attr, and index into them. i =/= k, depending on the
788         * vertex buffer mask */
789        union mali_attr attrs[PIPE_MAX_ATTRIBS];
790        unsigned k = 0;
791
792        unsigned invocation_count = MALI_NEGATIVE(ctx->payload_tiler.prefix.invocation_count);
793
794        for (int i = 0; i < ARRAY_SIZE(ctx->vertex_buffers); ++i) {
795                if (!(ctx->vb_mask & (1 << i))) continue;
796
797                struct pipe_vertex_buffer *buf = &ctx->vertex_buffers[i];
798                struct panfrost_resource *rsrc = (struct panfrost_resource *) (buf->buffer.resource);
799
800                if (!rsrc) continue;
801
802                /* Align to 64 bytes by masking off the lower bits. This
803                 * will be adjusted back when we fixup the src_offset in
804                 * mali_attr_meta */
805
806                mali_ptr addr = panfrost_vertex_buffer_address(ctx, i) & ~63;
807
808                /* Offset vertex count by draw_start to make sure we upload enough */
809                attrs[k].stride = buf->stride;
810                attrs[k].size = rsrc->base.width0;
811
812                panfrost_job_add_bo(job, rsrc->bo);
813                attrs[k].elements = addr | MALI_ATTR_LINEAR;
814
815                ++k;
816        }
817
818        ctx->payload_vertex.postfix.attributes = panfrost_upload_transient(ctx, attrs, k * sizeof(union mali_attr));
819
820        panfrost_emit_varying_descriptor(ctx, invocation_count);
821}
822
823static bool
824panfrost_writes_point_size(struct panfrost_context *ctx)
825{
826        assert(ctx->vs);
827        struct panfrost_shader_state *vs = &ctx->vs->variants[ctx->vs->active_variant];
828
829        return vs->writes_point_size && ctx->payload_tiler.prefix.draw_mode == MALI_POINTS;
830}
831
832/* Stage the attribute descriptors so we can adjust src_offset
833 * to let BOs align nicely */
834
835static void
836panfrost_stage_attributes(struct panfrost_context *ctx)
837{
838        struct panfrost_vertex_state *so = ctx->vertex;
839
840        size_t sz = sizeof(struct mali_attr_meta) * so->num_elements;
841        struct panfrost_transfer transfer = panfrost_allocate_transient(ctx, sz);
842        struct mali_attr_meta *target = (struct mali_attr_meta *) transfer.cpu;
843
844        /* Copy as-is for the first pass */
845        memcpy(target, so->hw, sz);
846
847        /* Fixup offsets for the second pass. Recall that the hardware
848         * calculates attribute addresses as:
849         *
850         *      addr = base + (stride * vtx) + src_offset;
851         *
852         * However, on Mali, base must be aligned to 64-bytes, so we
853         * instead let:
854         *
855         *      base' = base & ~63 = base - (base & 63)
856         *
857         * To compensate when using base' (see emit_vertex_data), we have
858         * to adjust src_offset by the masked off piece:
859         *
860         *      addr' = base' + (stride * vtx) + (src_offset + (base & 63))
861         *            = base - (base & 63) + (stride * vtx) + src_offset + (base & 63)
862         *            = base + (stride * vtx) + src_offset
863         *            = addr;
864         *
865         * QED.
866         */
867
868        for (unsigned i = 0; i < so->num_elements; ++i) {
869                unsigned vbi = so->pipe[i].vertex_buffer_index;
870                mali_ptr addr = panfrost_vertex_buffer_address(ctx, vbi);
871
872                /* Adjust by the masked off bits of the offset */
873                target[i].src_offset += (addr & 63);
874        }
875
876        ctx->payload_vertex.postfix.attribute_meta = transfer.gpu;
877}
878
879/* Go through dirty flags and actualise them in the cmdstream. */
880
881void
882panfrost_emit_for_draw(struct panfrost_context *ctx, bool with_vertex_data)
883{
884        struct panfrost_job *job = panfrost_get_job_for_fbo(ctx);
885
886        if (with_vertex_data) {
887                panfrost_emit_vertex_data(ctx, job);
888        }
889
890        bool msaa = ctx->rasterizer->base.multisample;
891
892        if (ctx->dirty & PAN_DIRTY_RASTERIZER) {
893                ctx->payload_tiler.gl_enables = ctx->rasterizer->tiler_gl_enables;
894
895                /* TODO: Sample size */
896                SET_BIT(ctx->fragment_shader_core.unknown2_3, MALI_HAS_MSAA, msaa);
897                SET_BIT(ctx->fragment_shader_core.unknown2_4, MALI_NO_MSAA, !msaa);
898        }
899
900        /* Enable job requirements at draw-time */
901
902        if (msaa)
903                job->requirements |= PAN_REQ_MSAA;
904
905        if (ctx->depth_stencil->depth.writemask)
906                job->requirements |= PAN_REQ_DEPTH_WRITE;
907
908        if (ctx->occlusion_query) {
909                ctx->payload_tiler.gl_enables |= MALI_OCCLUSION_QUERY | MALI_OCCLUSION_PRECISE;
910                ctx->payload_tiler.postfix.occlusion_counter = ctx->occlusion_query->transfer.gpu;
911        }
912
913        if (ctx->dirty & PAN_DIRTY_VS) {
914                assert(ctx->vs);
915
916                struct panfrost_shader_state *vs = &ctx->vs->variants[ctx->vs->active_variant];
917
918                /* Late shader descriptor assignments */
919
920                vs->tripipe->texture_count = ctx->sampler_view_count[PIPE_SHADER_VERTEX];
921                vs->tripipe->sampler_count = ctx->sampler_count[PIPE_SHADER_VERTEX];
922
923                /* Who knows */
924                vs->tripipe->midgard1.unknown1 = 0x2201;
925
926                ctx->payload_vertex.postfix._shader_upper = vs->tripipe_gpu >> 4;
927        }
928
929        if (ctx->dirty & (PAN_DIRTY_RASTERIZER | PAN_DIRTY_VS)) {
930                /* Check if we need to link the gl_PointSize varying */
931                if (!panfrost_writes_point_size(ctx)) {
932                        /* If the size is constant, write it out. Otherwise,
933                         * don't touch primitive_size (since we would clobber
934                         * the pointer there) */
935
936                        ctx->payload_tiler.primitive_size.constant = ctx->rasterizer->base.line_width;
937                }
938        }
939
940        /* TODO: Maybe dirty track FS, maybe not. For now, it's transient. */
941        if (ctx->fs)
942                ctx->dirty |= PAN_DIRTY_FS;
943
944        if (ctx->dirty & PAN_DIRTY_FS) {
945                assert(ctx->fs);
946                struct panfrost_shader_state *variant = &ctx->fs->variants[ctx->fs->active_variant];
947
948#define COPY(name) ctx->fragment_shader_core.name = variant->tripipe->name
949
950                COPY(shader);
951                COPY(attribute_count);
952                COPY(varying_count);
953                COPY(midgard1.uniform_count);
954                COPY(midgard1.work_count);
955                COPY(midgard1.unknown2);
956
957#undef COPY
958                /* If there is a blend shader, work registers are shared */
959
960                if (ctx->blend->has_blend_shader)
961                        ctx->fragment_shader_core.midgard1.work_count = /*MAX2(ctx->fragment_shader_core.midgard1.work_count, ctx->blend->blend_work_count)*/16;
962
963                /* Set late due to depending on render state */
964                /* The one at the end seems to mean "1 UBO" */
965                ctx->fragment_shader_core.midgard1.unknown1 = MALI_NO_ALPHA_TO_COVERAGE | 0x200 | 0x2201;
966
967                /* Assign texture/sample count right before upload */
968                ctx->fragment_shader_core.texture_count = ctx->sampler_view_count[PIPE_SHADER_FRAGMENT];
969                ctx->fragment_shader_core.sampler_count = ctx->sampler_count[PIPE_SHADER_FRAGMENT];
970
971                /* Assign the stencil refs late */
972                ctx->fragment_shader_core.stencil_front.ref = ctx->stencil_ref.ref_value[0];
973                ctx->fragment_shader_core.stencil_back.ref = ctx->stencil_ref.ref_value[1];
974
975                /* CAN_DISCARD should be set if the fragment shader possibly
976                 * contains a 'discard' instruction. It is likely this is
977                 * related to optimizations related to forward-pixel kill, as
978                 * per "Mali Performance 3: Is EGL_BUFFER_PRESERVED a good
979                 * thing?" by Peter Harris
980                 */
981
982                if (variant->can_discard) {
983                        ctx->fragment_shader_core.unknown2_3 |= MALI_CAN_DISCARD;
984                        ctx->fragment_shader_core.midgard1.unknown1 &= ~MALI_NO_ALPHA_TO_COVERAGE;
985                        ctx->fragment_shader_core.midgard1.unknown1 |= 0x4000;
986                        ctx->fragment_shader_core.midgard1.unknown1 = 0x4200;
987                }
988
989		/* Check if we're using the default blend descriptor (fast path) */
990
991		bool no_blending =
992			!ctx->blend->has_blend_shader &&
993			(ctx->blend->equation.rgb_mode == 0x122) &&
994			(ctx->blend->equation.alpha_mode == 0x122) &&
995			(ctx->blend->equation.color_mask == 0xf);
996
997                /* Even on MFBD, the shader descriptor gets blend shaders. It's
998                 * *also* copied to the blend_meta appended (by convention),
999                 * but this is the field actually read by the hardware. (Or
1000                 * maybe both are read...?) */
1001
1002                if (ctx->blend->has_blend_shader) {
1003                        ctx->fragment_shader_core.blend_shader = ctx->blend->blend_shader;
1004                }
1005
1006                if (ctx->require_sfbd) {
1007                        /* When only a single render target platform is used, the blend
1008                         * information is inside the shader meta itself. We
1009                         * additionally need to signal CAN_DISCARD for nontrivial blend
1010                         * modes (so we're able to read back the destination buffer) */
1011
1012                        if (!ctx->blend->has_blend_shader) {
1013                                memcpy(&ctx->fragment_shader_core.blend_equation, &ctx->blend->equation, sizeof(ctx->blend->equation));
1014                        }
1015
1016                        if (!no_blending) {
1017                                ctx->fragment_shader_core.unknown2_3 |= MALI_CAN_DISCARD;
1018                        }
1019                }
1020
1021                size_t size = sizeof(struct mali_shader_meta) + sizeof(struct mali_blend_meta);
1022                struct panfrost_transfer transfer = panfrost_allocate_transient(ctx, size);
1023                memcpy(transfer.cpu, &ctx->fragment_shader_core, sizeof(struct mali_shader_meta));
1024
1025                ctx->payload_tiler.postfix._shader_upper = (transfer.gpu) >> 4;
1026
1027                if (!ctx->require_sfbd) {
1028                        /* Additional blend descriptor tacked on for jobs using MFBD */
1029
1030                        unsigned blend_count = 0;
1031
1032                        if (ctx->blend->has_blend_shader) {
1033                                /* For a blend shader, the bottom nibble corresponds to
1034                                 * the number of work registers used, which signals the
1035                                 * -existence- of a blend shader */
1036
1037                                assert(ctx->blend->blend_work_count >= 2);
1038                                blend_count |= MIN2(ctx->blend->blend_work_count, 3);
1039                        } else {
1040                                /* Otherwise, the bottom bit simply specifies if
1041                                 * blending (anything other than REPLACE) is enabled */
1042
1043
1044                                if (!no_blending)
1045                                        blend_count |= 0x1;
1046                        }
1047
1048                        /* Second blend equation is always a simple replace */
1049
1050                        uint64_t replace_magic = 0xf0122122;
1051                        struct mali_blend_equation replace_mode;
1052                        memcpy(&replace_mode, &replace_magic, sizeof(replace_mode));
1053
1054                        struct mali_blend_meta blend_meta[] = {
1055                                {
1056                                        .unk1 = 0x200 | blend_count,
1057                                        .blend_equation_1 = ctx->blend->equation,
1058                                        .blend_equation_2 = replace_mode
1059                                },
1060                        };
1061
1062                        if (ctx->blend->has_blend_shader) {
1063                                blend_meta[0].blend_shader = ctx->blend->blend_shader;
1064                        }
1065
1066                        memcpy(transfer.cpu + sizeof(struct mali_shader_meta), blend_meta, sizeof(blend_meta));
1067                }
1068        }
1069
1070        /* We stage to transient, so always dirty.. */
1071        panfrost_stage_attributes(ctx);
1072
1073        if (ctx->dirty & PAN_DIRTY_SAMPLERS) {
1074                /* Upload samplers back to back, no padding */
1075
1076                for (int t = 0; t <= PIPE_SHADER_FRAGMENT; ++t) {
1077                        if (!ctx->sampler_count[t]) continue;
1078
1079                        struct panfrost_transfer transfer = panfrost_allocate_transient(ctx, sizeof(struct mali_sampler_descriptor) * ctx->sampler_count[t]);
1080                        struct mali_sampler_descriptor *desc = (struct mali_sampler_descriptor *) transfer.cpu;
1081
1082                        for (int i = 0; i < ctx->sampler_count[t]; ++i) {
1083                                desc[i] = ctx->samplers[t][i]->hw;
1084                        }
1085
1086                        if (t == PIPE_SHADER_FRAGMENT)
1087                                ctx->payload_tiler.postfix.sampler_descriptor = transfer.gpu;
1088                        else if (t == PIPE_SHADER_VERTEX)
1089                                ctx->payload_vertex.postfix.sampler_descriptor = transfer.gpu;
1090                        else
1091                                assert(0);
1092                }
1093        }
1094
1095        if (ctx->dirty & PAN_DIRTY_TEXTURES) {
1096                for (int t = 0; t <= PIPE_SHADER_FRAGMENT; ++t) {
1097                        /* Shortcircuit */
1098                        if (!ctx->sampler_view_count[t]) continue;
1099
1100                        uint64_t trampolines[PIPE_MAX_SHADER_SAMPLER_VIEWS];
1101
1102                        for (int i = 0; i < ctx->sampler_view_count[t]; ++i) {
1103                                if (!ctx->sampler_views[t][i])
1104                                        continue;
1105
1106                                struct pipe_resource *tex_rsrc = ctx->sampler_views[t][i]->base.texture;
1107                                struct panfrost_resource *rsrc = (struct panfrost_resource *) tex_rsrc;
1108
1109                                /* Inject the addresses in, interleaving cube
1110                                 * faces and mip levels appropriately. */
1111
1112                                for (int l = 0; l <= tex_rsrc->last_level; ++l) {
1113                                        for (int f = 0; f < tex_rsrc->array_size; ++f) {
1114                                                unsigned idx = (l * tex_rsrc->array_size) + f;
1115
1116                                                ctx->sampler_views[t][i]->hw.swizzled_bitmaps[idx] =
1117                                                        rsrc->bo->gpu +
1118                                                        rsrc->bo->slices[l].offset +
1119                                                        f * rsrc->bo->cubemap_stride;
1120                                        }
1121                                }
1122
1123                                trampolines[i] = panfrost_upload_transient(ctx, &ctx->sampler_views[t][i]->hw, sizeof(struct mali_texture_descriptor));
1124                        }
1125
1126                        mali_ptr trampoline = panfrost_upload_transient(ctx, trampolines, sizeof(uint64_t) * ctx->sampler_view_count[t]);
1127
1128                        if (t == PIPE_SHADER_FRAGMENT)
1129                                ctx->payload_tiler.postfix.texture_trampoline = trampoline;
1130                        else if (t == PIPE_SHADER_VERTEX)
1131                                ctx->payload_vertex.postfix.texture_trampoline = trampoline;
1132                        else
1133                                assert(0);
1134                }
1135        }
1136
1137        const struct pipe_viewport_state *vp = &ctx->pipe_viewport;
1138
1139        /* For flipped-Y buffers (signaled by negative scale), the translate is
1140         * flipped as well */
1141
1142        bool invert_y = vp->scale[1] < 0.0;
1143        float translate_y = vp->translate[1];
1144
1145        if (invert_y)
1146                translate_y = ctx->pipe_framebuffer.height - translate_y;
1147
1148        for (int i = 0; i <= PIPE_SHADER_FRAGMENT; ++i) {
1149                struct panfrost_constant_buffer *buf = &ctx->constant_buffer[i];
1150
1151                struct panfrost_shader_state *vs = &ctx->vs->variants[ctx->vs->active_variant];
1152                struct panfrost_shader_state *fs = &ctx->fs->variants[ctx->fs->active_variant];
1153                struct panfrost_shader_state *ss = (i == PIPE_SHADER_FRAGMENT) ? fs : vs;
1154
1155                /* Allocate room for the sysval and the uniforms */
1156                size_t sys_size = sizeof(float) * 4 * ss->sysval_count;
1157                size_t size = sys_size + buf->size;
1158                struct panfrost_transfer transfer = panfrost_allocate_transient(ctx, size);
1159
1160                /* Upload sysvals requested by the shader */
1161                float *uniforms = (float *) transfer.cpu;
1162                for (unsigned i = 0; i < ss->sysval_count; ++i) {
1163                        int sysval = ss->sysval[i];
1164
1165                        if (sysval == PAN_SYSVAL_VIEWPORT_SCALE) {
1166                                uniforms[4*i + 0] = vp->scale[0];
1167                                uniforms[4*i + 1] = fabsf(vp->scale[1]);
1168                                uniforms[4*i + 2] = vp->scale[2];
1169                        } else if (sysval == PAN_SYSVAL_VIEWPORT_OFFSET) {
1170                                uniforms[4*i + 0] = vp->translate[0];
1171                                uniforms[4*i + 1] = translate_y;
1172                                uniforms[4*i + 2] = vp->translate[2];
1173                        } else {
1174                                assert(0);
1175                        }
1176                }
1177
1178                /* Upload uniforms */
1179                memcpy(transfer.cpu + sys_size, buf->buffer, buf->size);
1180
1181                int uniform_count = 0;
1182
1183                struct mali_vertex_tiler_postfix *postfix;
1184
1185                switch (i) {
1186                case PIPE_SHADER_VERTEX:
1187                        uniform_count = ctx->vs->variants[ctx->vs->active_variant].uniform_count;
1188                        postfix = &ctx->payload_vertex.postfix;
1189                        break;
1190
1191                case PIPE_SHADER_FRAGMENT:
1192                        uniform_count = ctx->fs->variants[ctx->fs->active_variant].uniform_count;
1193                        postfix = &ctx->payload_tiler.postfix;
1194                        break;
1195
1196                default:
1197                        DBG("Unknown shader stage %d in uniform upload\n", i);
1198                        assert(0);
1199                }
1200
1201                /* Also attach the same buffer as a UBO for extended access */
1202
1203                struct mali_uniform_buffer_meta uniform_buffers[] = {
1204                        {
1205                                .size = MALI_POSITIVE((2 + uniform_count)),
1206                                .ptr = transfer.gpu >> 2,
1207                        },
1208                };
1209
1210                mali_ptr ubufs = panfrost_upload_transient(ctx, uniform_buffers, sizeof(uniform_buffers));
1211                postfix->uniforms = transfer.gpu;
1212                postfix->uniform_buffers = ubufs;
1213
1214                buf->dirty = 0;
1215        }
1216
1217        /* TODO: Upload the viewport somewhere more appropriate */
1218
1219        /* Clip bounds are encoded as floats. The viewport itself is encoded as
1220         * (somewhat) asymmetric ints. */
1221        const struct pipe_scissor_state *ss = &ctx->scissor;
1222
1223        struct mali_viewport view = {
1224                /* By default, do no viewport clipping, i.e. clip to (-inf,
1225                 * inf) in each direction. Clipping to the viewport in theory
1226                 * should work, but in practice causes issues when we're not
1227                 * explicitly trying to scissor */
1228
1229                .clip_minx = -inff,
1230                .clip_miny = -inff,
1231                .clip_maxx = inff,
1232                .clip_maxy = inff,
1233
1234                .clip_minz = 0.0,
1235                .clip_maxz = 1.0,
1236        };
1237
1238        /* Always scissor to the viewport by default. */
1239        view.viewport0[0] = (int) (vp->translate[0] - vp->scale[0]);
1240        view.viewport1[0] = MALI_POSITIVE((int) (vp->translate[0] + vp->scale[0]));
1241
1242        view.viewport0[1] = (int) (translate_y - fabs(vp->scale[1]));
1243        view.viewport1[1] = MALI_POSITIVE((int) (translate_y + fabs(vp->scale[1])));
1244
1245        if (ss && ctx->rasterizer && ctx->rasterizer->base.scissor) {
1246                /* Invert scissor if needed */
1247                unsigned miny = invert_y ?
1248                        ctx->pipe_framebuffer.height - ss->maxy : ss->miny;
1249
1250                unsigned maxy = invert_y ?
1251                        ctx->pipe_framebuffer.height - ss->miny : ss->maxy;
1252
1253                /* Set the actual scissor */
1254                view.viewport0[0] = ss->minx;
1255                view.viewport0[1] = miny;
1256                view.viewport1[0] = MALI_POSITIVE(ss->maxx);
1257                view.viewport1[1] = MALI_POSITIVE(maxy);
1258        }
1259
1260        ctx->payload_tiler.postfix.viewport =
1261                panfrost_upload_transient(ctx,
1262                                &view,
1263                                sizeof(struct mali_viewport));
1264
1265        ctx->dirty = 0;
1266}
1267
1268/* Corresponds to exactly one draw, but does not submit anything */
1269
1270static void
1271panfrost_queue_draw(struct panfrost_context *ctx)
1272{
1273        /* TODO: Expand the array? */
1274        if (ctx->draw_count >= MAX_DRAW_CALLS) {
1275                DBG("Job buffer overflow, ignoring draw\n");
1276                assert(0);
1277        }
1278
1279        /* Handle dirty flags now */
1280        panfrost_emit_for_draw(ctx, true);
1281
1282        struct panfrost_transfer vertex = panfrost_vertex_tiler_job(ctx, false, false);
1283        struct panfrost_transfer tiler = panfrost_vertex_tiler_job(ctx, true, false);
1284
1285        ctx->u_vertex_jobs[ctx->vertex_job_count] = (struct mali_job_descriptor_header *) vertex.cpu;
1286        ctx->vertex_jobs[ctx->vertex_job_count++] = vertex.gpu;
1287
1288        ctx->u_tiler_jobs[ctx->tiler_job_count] = (struct mali_job_descriptor_header *) tiler.cpu;
1289        ctx->tiler_jobs[ctx->tiler_job_count++] = tiler.gpu;
1290
1291        ctx->draw_count++;
1292}
1293
1294/* At the end of the frame, the vertex and tiler jobs are linked together and
1295 * then the fragment job is plonked at the end. Set value job is first for
1296 * unknown reasons. */
1297
1298static void
1299panfrost_link_job_pair(struct mali_job_descriptor_header *first, mali_ptr next)
1300{
1301        if (first->job_descriptor_size)
1302                first->next_job_64 = (u64) (uintptr_t) next;
1303        else
1304                first->next_job_32 = (u32) (uintptr_t) next;
1305}
1306
1307static void
1308panfrost_link_jobs(struct panfrost_context *ctx)
1309{
1310        if (ctx->draw_count) {
1311                /* Generate the set_value_job */
1312                panfrost_set_value_job(ctx);
1313
1314                /* Have the first vertex job depend on the set value job */
1315                ctx->u_vertex_jobs[0]->job_dependency_index_1 = ctx->u_set_value_job->job_index;
1316
1317                /* SV -> V */
1318                panfrost_link_job_pair(ctx->u_set_value_job, ctx->vertex_jobs[0]);
1319        }
1320
1321        /* V -> V/T ; T -> T/null */
1322        for (int i = 0; i < ctx->vertex_job_count; ++i) {
1323                bool isLast = (i + 1) == ctx->vertex_job_count;
1324
1325                panfrost_link_job_pair(ctx->u_vertex_jobs[i], isLast ? ctx->tiler_jobs[0] : ctx->vertex_jobs[i + 1]);
1326        }
1327
1328        /* T -> T/null */
1329        for (int i = 0; i < ctx->tiler_job_count; ++i) {
1330                bool isLast = (i + 1) == ctx->tiler_job_count;
1331                panfrost_link_job_pair(ctx->u_tiler_jobs[i], isLast ? 0 : ctx->tiler_jobs[i + 1]);
1332        }
1333}
1334
1335/* The entire frame is in memory -- send it off to the kernel! */
1336
1337static void
1338panfrost_submit_frame(struct panfrost_context *ctx, bool flush_immediate,
1339		      struct pipe_fence_handle **fence,
1340                      struct panfrost_job *job)
1341{
1342        struct pipe_context *gallium = (struct pipe_context *) ctx;
1343        struct panfrost_screen *screen = pan_screen(gallium->screen);
1344
1345        /* Edge case if screen is cleared and nothing else */
1346        bool has_draws = ctx->draw_count > 0;
1347
1348        /* Workaround a bizarre lockup (a hardware errata?) */
1349        if (!has_draws)
1350                flush_immediate = true;
1351
1352        /* A number of jobs are batched -- this must be linked and cleared */
1353        panfrost_link_jobs(ctx);
1354
1355        ctx->draw_count = 0;
1356        ctx->vertex_job_count = 0;
1357        ctx->tiler_job_count = 0;
1358
1359#ifndef DRY_RUN
1360
1361        bool is_scanout = panfrost_is_scanout(ctx);
1362        screen->driver->submit_vs_fs_job(ctx, has_draws, is_scanout);
1363
1364        /* If visual, we can stall a frame */
1365
1366        if (!flush_immediate)
1367                screen->driver->force_flush_fragment(ctx, fence);
1368
1369        screen->last_fragment_flushed = false;
1370        screen->last_job = job;
1371
1372        /* If readback, flush now (hurts the pipelined performance) */
1373        if (flush_immediate)
1374                screen->driver->force_flush_fragment(ctx, fence);
1375
1376        if (screen->driver->dump_counters && pan_counters_base) {
1377                screen->driver->dump_counters(screen);
1378
1379                char filename[128];
1380                snprintf(filename, sizeof(filename), "%s/frame%d.mdgprf", pan_counters_base, ++performance_counter_number);
1381                FILE *fp = fopen(filename, "wb");
1382                fwrite(screen->perf_counters.cpu,  4096, sizeof(uint32_t), fp);
1383                fclose(fp);
1384        }
1385
1386#endif
1387}
1388
1389void
1390panfrost_flush(
1391        struct pipe_context *pipe,
1392        struct pipe_fence_handle **fence,
1393        unsigned flags)
1394{
1395        struct panfrost_context *ctx = pan_context(pipe);
1396        struct panfrost_job *job = panfrost_get_job_for_fbo(ctx);
1397
1398        /* Nothing to do! */
1399        if (!ctx->draw_count && !job->clear) return;
1400
1401        /* Whether to stall the pipeline for immediately correct results */
1402        bool flush_immediate = flags & PIPE_FLUSH_END_OF_FRAME;
1403
1404        /* Submit the frame itself */
1405        panfrost_submit_frame(ctx, flush_immediate, fence, job);
1406
1407        /* Prepare for the next frame */
1408        panfrost_invalidate_frame(ctx);
1409}
1410
1411#define DEFINE_CASE(c) case PIPE_PRIM_##c: return MALI_##c;
1412
1413static int
1414g2m_draw_mode(enum pipe_prim_type mode)
1415{
1416        switch (mode) {
1417                DEFINE_CASE(POINTS);
1418                DEFINE_CASE(LINES);
1419                DEFINE_CASE(LINE_LOOP);
1420                DEFINE_CASE(LINE_STRIP);
1421                DEFINE_CASE(TRIANGLES);
1422                DEFINE_CASE(TRIANGLE_STRIP);
1423                DEFINE_CASE(TRIANGLE_FAN);
1424                DEFINE_CASE(QUADS);
1425                DEFINE_CASE(QUAD_STRIP);
1426                DEFINE_CASE(POLYGON);
1427
1428        default:
1429                DBG("Illegal draw mode %d\n", mode);
1430                assert(0);
1431                return MALI_LINE_LOOP;
1432        }
1433}
1434
1435#undef DEFINE_CASE
1436
1437static unsigned
1438panfrost_translate_index_size(unsigned size)
1439{
1440        switch (size) {
1441        case 1:
1442                return MALI_DRAW_INDEXED_UINT8;
1443
1444        case 2:
1445                return MALI_DRAW_INDEXED_UINT16;
1446
1447        case 4:
1448                return MALI_DRAW_INDEXED_UINT32;
1449
1450        default:
1451                DBG("Unknown index size %d\n", size);
1452                assert(0);
1453                return 0;
1454        }
1455}
1456
1457/* Gets a GPU address for the associated index buffer. Only gauranteed to be
1458 * good for the duration of the draw (transient), could last longer */
1459
1460static mali_ptr
1461panfrost_get_index_buffer_mapped(struct panfrost_context *ctx, const struct pipe_draw_info *info)
1462{
1463        struct panfrost_resource *rsrc = (struct panfrost_resource *) (info->index.resource);
1464
1465        off_t offset = info->start * info->index_size;
1466
1467        if (!info->has_user_indices) {
1468                /* Only resources can be directly mapped */
1469                return rsrc->bo->gpu + offset;
1470        } else {
1471                /* Otherwise, we need to upload to transient memory */
1472                const uint8_t *ibuf8 = (const uint8_t *) info->index.user;
1473                return panfrost_upload_transient(ctx, ibuf8 + offset, info->count * info->index_size);
1474        }
1475}
1476
1477static void
1478panfrost_draw_vbo(
1479        struct pipe_context *pipe,
1480        const struct pipe_draw_info *info)
1481{
1482        struct panfrost_context *ctx = pan_context(pipe);
1483
1484        ctx->payload_vertex.draw_start = info->start;
1485        ctx->payload_tiler.draw_start = info->start;
1486
1487        int mode = info->mode;
1488
1489        /* Fallback for unsupported modes */
1490
1491        if (!(ctx->draw_modes & (1 << mode))) {
1492                if (mode == PIPE_PRIM_QUADS && info->count == 4 && ctx->rasterizer && !ctx->rasterizer->base.flatshade) {
1493                        mode = PIPE_PRIM_TRIANGLE_FAN;
1494                } else {
1495                        if (info->count < 4) {
1496                                /* Degenerate case? */
1497                                return;
1498                        }
1499
1500                        util_primconvert_save_rasterizer_state(ctx->primconvert, &ctx->rasterizer->base);
1501                        util_primconvert_draw_vbo(ctx->primconvert, info);
1502                        return;
1503                }
1504        }
1505
1506        /* Now that we have a guaranteed terminating path, find the job.
1507         * Assignment commented out to prevent unused warning */
1508
1509        /* struct panfrost_job *job = */ panfrost_get_job_for_fbo(ctx);
1510
1511        ctx->payload_tiler.prefix.draw_mode = g2m_draw_mode(mode);
1512
1513        ctx->vertex_count = info->count;
1514
1515        /* For non-indexed draws, they're the same */
1516        unsigned invocation_count = ctx->vertex_count;
1517
1518        unsigned draw_flags = 0;
1519
1520        /* The draw flags interpret how primitive size is interpreted */
1521
1522        if (panfrost_writes_point_size(ctx))
1523                draw_flags |= MALI_DRAW_VARYING_SIZE;
1524
1525        /* For higher amounts of vertices (greater than what fits in a 16-bit
1526         * short), the other value is needed, otherwise there will be bizarre
1527         * rendering artefacts. It's not clear what these values mean yet. */
1528
1529        draw_flags |= (mode == PIPE_PRIM_POINTS || ctx->vertex_count > 65535) ? 0x3000 : 0x18000;
1530
1531        if (info->index_size) {
1532                /* Calculate the min/max index used so we can figure out how
1533                 * many times to invoke the vertex shader */
1534
1535                /* Fetch / calculate index bounds */
1536                unsigned min_index = 0, max_index = 0;
1537
1538                if (info->max_index == ~0u) {
1539                        u_vbuf_get_minmax_index(pipe, info, &min_index, &max_index);
1540                } else {
1541                        min_index = info->min_index;
1542                        max_index = info->max_index;
1543                }
1544
1545                /* Use the corresponding values */
1546                invocation_count = max_index - min_index + 1;
1547                ctx->payload_vertex.draw_start = min_index;
1548                ctx->payload_tiler.draw_start = min_index;
1549
1550                ctx->payload_tiler.prefix.negative_start = -min_index;
1551                ctx->payload_tiler.prefix.index_count = MALI_POSITIVE(info->count);
1552
1553                //assert(!info->restart_index); /* TODO: Research */
1554                assert(!info->index_bias);
1555
1556                draw_flags |= panfrost_translate_index_size(info->index_size);
1557                ctx->payload_tiler.prefix.indices = panfrost_get_index_buffer_mapped(ctx, info);
1558        } else {
1559                /* Index count == vertex count, if no indexing is applied, as
1560                 * if it is internally indexed in the expected order */
1561
1562                ctx->payload_tiler.prefix.negative_start = 0;
1563                ctx->payload_tiler.prefix.index_count = MALI_POSITIVE(ctx->vertex_count);
1564
1565                /* Reverse index state */
1566                ctx->payload_tiler.prefix.indices = (uintptr_t) NULL;
1567        }
1568
1569        ctx->payload_vertex.prefix.invocation_count = MALI_POSITIVE(invocation_count);
1570        ctx->payload_tiler.prefix.invocation_count = MALI_POSITIVE(invocation_count);
1571        ctx->payload_tiler.prefix.unknown_draw = draw_flags;
1572
1573        /* Fire off the draw itself */
1574        panfrost_queue_draw(ctx);
1575}
1576
1577/* CSO state */
1578
1579static void
1580panfrost_generic_cso_delete(struct pipe_context *pctx, void *hwcso)
1581{
1582        free(hwcso);
1583}
1584
1585static void *
1586panfrost_create_rasterizer_state(
1587        struct pipe_context *pctx,
1588        const struct pipe_rasterizer_state *cso)
1589{
1590        struct panfrost_context *ctx = pan_context(pctx);
1591        struct panfrost_rasterizer *so = CALLOC_STRUCT(panfrost_rasterizer);
1592
1593        so->base = *cso;
1594
1595        /* Bitmask, unknown meaning of the start value */
1596        so->tiler_gl_enables = ctx->is_t6xx ? 0x105 : 0x7;
1597
1598        so->tiler_gl_enables |= MALI_FRONT_FACE(
1599                                        cso->front_ccw ? MALI_CCW : MALI_CW);
1600
1601        if (cso->cull_face & PIPE_FACE_FRONT)
1602                so->tiler_gl_enables |= MALI_CULL_FACE_FRONT;
1603
1604        if (cso->cull_face & PIPE_FACE_BACK)
1605                so->tiler_gl_enables |= MALI_CULL_FACE_BACK;
1606
1607        return so;
1608}
1609
1610static void
1611panfrost_bind_rasterizer_state(
1612        struct pipe_context *pctx,
1613        void *hwcso)
1614{
1615        struct panfrost_context *ctx = pan_context(pctx);
1616
1617        /* TODO: Why can't rasterizer be NULL ever? Other drivers are fine.. */
1618        if (!hwcso)
1619                return;
1620
1621        ctx->rasterizer = hwcso;
1622        ctx->dirty |= PAN_DIRTY_RASTERIZER;
1623}
1624
1625static void *
1626panfrost_create_vertex_elements_state(
1627        struct pipe_context *pctx,
1628        unsigned num_elements,
1629        const struct pipe_vertex_element *elements)
1630{
1631        struct panfrost_vertex_state *so = CALLOC_STRUCT(panfrost_vertex_state);
1632
1633        so->num_elements = num_elements;
1634        memcpy(so->pipe, elements, sizeof(*elements) * num_elements);
1635
1636        /* XXX: What the cornball? This is totally, 100%, unapologetically
1637         * nonsense. And yet it somehow fixes a regression in -bshadow
1638         * (previously, we allocated the descriptor here... a newer commit
1639         * removed that allocation, and then memory corruption led to
1640         * shader_meta getting overwritten in bad ways and then the whole test
1641         * case falling apart . TODO: LOOK INTO PLEASE XXX XXX BAD XXX XXX XXX
1642         */
1643        panfrost_allocate_chunk(pan_context(pctx), 0, HEAP_DESCRIPTOR);
1644
1645        for (int i = 0; i < num_elements; ++i) {
1646                so->hw[i].index = elements[i].vertex_buffer_index;
1647
1648                enum pipe_format fmt = elements[i].src_format;
1649                const struct util_format_description *desc = util_format_description(fmt);
1650                so->hw[i].unknown1 = 0x2;
1651                so->hw[i].swizzle = panfrost_get_default_swizzle(desc->nr_channels);
1652
1653                so->hw[i].format = panfrost_find_format(desc);
1654
1655                /* The field itself should probably be shifted over */
1656                so->hw[i].src_offset = elements[i].src_offset;
1657        }
1658
1659        return so;
1660}
1661
1662static void
1663panfrost_bind_vertex_elements_state(
1664        struct pipe_context *pctx,
1665        void *hwcso)
1666{
1667        struct panfrost_context *ctx = pan_context(pctx);
1668
1669        ctx->vertex = hwcso;
1670        ctx->dirty |= PAN_DIRTY_VERTEX;
1671}
1672
1673static void
1674panfrost_delete_vertex_elements_state(struct pipe_context *pctx, void *hwcso)
1675{
1676        struct panfrost_vertex_state *so = (struct panfrost_vertex_state *) hwcso;
1677        unsigned bytes = sizeof(struct mali_attr_meta) * so->num_elements;
1678        DBG("Vertex elements delete leaks descriptor (%d bytes)\n", bytes);
1679        free(hwcso);
1680}
1681
1682static void *
1683panfrost_create_shader_state(
1684        struct pipe_context *pctx,
1685        const struct pipe_shader_state *cso)
1686{
1687        struct panfrost_shader_variants *so = CALLOC_STRUCT(panfrost_shader_variants);
1688        so->base = *cso;
1689
1690        /* Token deep copy to prevent memory corruption */
1691
1692        if (cso->type == PIPE_SHADER_IR_TGSI)
1693                so->base.tokens = tgsi_dup_tokens(so->base.tokens);
1694
1695        return so;
1696}
1697
1698static void
1699panfrost_delete_shader_state(
1700        struct pipe_context *pctx,
1701        void *so)
1702{
1703        struct panfrost_shader_variants *cso = (struct panfrost_shader_variants *) so;
1704
1705        if (cso->base.type == PIPE_SHADER_IR_TGSI) {
1706                DBG("Deleting TGSI shader leaks duplicated tokens\n");
1707        }
1708
1709        unsigned leak = cso->variant_count * sizeof(struct mali_shader_meta);
1710        DBG("Deleting shader state leaks descriptors (%d bytes), and shader bytecode\n", leak);
1711
1712        free(so);
1713}
1714
1715static void *
1716panfrost_create_sampler_state(
1717        struct pipe_context *pctx,
1718        const struct pipe_sampler_state *cso)
1719{
1720        struct panfrost_sampler_state *so = CALLOC_STRUCT(panfrost_sampler_state);
1721        so->base = *cso;
1722
1723        /* sampler_state corresponds to mali_sampler_descriptor, which we can generate entirely here */
1724
1725        struct mali_sampler_descriptor sampler_descriptor = {
1726                .filter_mode = MALI_TEX_MIN(translate_tex_filter(cso->min_img_filter))
1727                | MALI_TEX_MAG(translate_tex_filter(cso->mag_img_filter))
1728                | translate_mip_filter(cso->min_mip_filter)
1729                | 0x20,
1730
1731                .wrap_s = translate_tex_wrap(cso->wrap_s),
1732                .wrap_t = translate_tex_wrap(cso->wrap_t),
1733                .wrap_r = translate_tex_wrap(cso->wrap_r),
1734                .compare_func = panfrost_translate_alt_compare_func(cso->compare_func),
1735                .border_color = {
1736                        cso->border_color.f[0],
1737                        cso->border_color.f[1],
1738                        cso->border_color.f[2],
1739                        cso->border_color.f[3]
1740                },
1741                .min_lod = FIXED_16(cso->min_lod),
1742                .max_lod = FIXED_16(cso->max_lod),
1743                .unknown2 = 1,
1744        };
1745
1746        so->hw = sampler_descriptor;
1747
1748        return so;
1749}
1750
1751static void
1752panfrost_bind_sampler_states(
1753        struct pipe_context *pctx,
1754        enum pipe_shader_type shader,
1755        unsigned start_slot, unsigned num_sampler,
1756        void **sampler)
1757{
1758        assert(start_slot == 0);
1759
1760        struct panfrost_context *ctx = pan_context(pctx);
1761
1762        /* XXX: Should upload, not just copy? */
1763        ctx->sampler_count[shader] = num_sampler;
1764        memcpy(ctx->samplers[shader], sampler, num_sampler * sizeof (void *));
1765
1766        ctx->dirty |= PAN_DIRTY_SAMPLERS;
1767}
1768
1769static bool
1770panfrost_variant_matches(struct panfrost_context *ctx, struct panfrost_shader_state *variant)
1771{
1772        struct pipe_alpha_state *alpha = &ctx->depth_stencil->alpha;
1773
1774        if (alpha->enabled || variant->alpha_state.enabled) {
1775                /* Make sure enable state is at least the same */
1776                if (alpha->enabled != variant->alpha_state.enabled) {
1777                        return false;
1778                }
1779
1780                /* Check that the contents of the test are the same */
1781                bool same_func = alpha->func == variant->alpha_state.func;
1782                bool same_ref = alpha->ref_value == variant->alpha_state.ref_value;
1783
1784                if (!(same_func && same_ref)) {
1785                        return false;
1786                }
1787        }
1788        /* Otherwise, we're good to go */
1789        return true;
1790}
1791
1792static void
1793panfrost_bind_fs_state(
1794        struct pipe_context *pctx,
1795        void *hwcso)
1796{
1797        struct panfrost_context *ctx = pan_context(pctx);
1798
1799        ctx->fs = hwcso;
1800
1801        if (hwcso) {
1802                /* Match the appropriate variant */
1803
1804                signed variant = -1;
1805
1806                struct panfrost_shader_variants *variants = (struct panfrost_shader_variants *) hwcso;
1807
1808                for (unsigned i = 0; i < variants->variant_count; ++i) {
1809                        if (panfrost_variant_matches(ctx, &variants->variants[i])) {
1810                                variant = i;
1811                                break;
1812                        }
1813                }
1814
1815                if (variant == -1) {
1816                        /* No variant matched, so create a new one */
1817                        variant = variants->variant_count++;
1818                        assert(variants->variant_count < MAX_SHADER_VARIANTS);
1819
1820                        variants->variants[variant].base = hwcso;
1821                        variants->variants[variant].alpha_state = ctx->depth_stencil->alpha;
1822
1823                        /* Allocate the mapped descriptor ahead-of-time. TODO: Use for FS as well as VS */
1824                        struct panfrost_context *ctx = pan_context(pctx);
1825                        struct panfrost_transfer transfer = panfrost_allocate_chunk(ctx, sizeof(struct mali_shader_meta), HEAP_DESCRIPTOR);
1826
1827                        variants->variants[variant].tripipe = (struct mali_shader_meta *) transfer.cpu;
1828                        variants->variants[variant].tripipe_gpu = transfer.gpu;
1829
1830                }
1831
1832                /* Select this variant */
1833                variants->active_variant = variant;
1834
1835                struct panfrost_shader_state *shader_state = &variants->variants[variant];
1836                assert(panfrost_variant_matches(ctx, shader_state));
1837
1838                /* Now we have a variant selected, so compile and go */
1839
1840                if (!shader_state->compiled) {
1841                        panfrost_shader_compile(ctx, shader_state->tripipe, NULL, JOB_TYPE_TILER, shader_state);
1842                        shader_state->compiled = true;
1843                }
1844        }
1845
1846        ctx->dirty |= PAN_DIRTY_FS;
1847}
1848
1849static void
1850panfrost_bind_vs_state(
1851        struct pipe_context *pctx,
1852        void *hwcso)
1853{
1854        struct panfrost_context *ctx = pan_context(pctx);
1855
1856        ctx->vs = hwcso;
1857
1858        if (hwcso) {
1859                if (!ctx->vs->variants[0].compiled) {
1860                        ctx->vs->variants[0].base = hwcso;
1861
1862                        /* TODO DRY from above */
1863                        struct panfrost_transfer transfer = panfrost_allocate_chunk(ctx, sizeof(struct mali_shader_meta), HEAP_DESCRIPTOR);
1864                        ctx->vs->variants[0].tripipe = (struct mali_shader_meta *) transfer.cpu;
1865                        ctx->vs->variants[0].tripipe_gpu = transfer.gpu;
1866
1867                        panfrost_shader_compile(ctx, ctx->vs->variants[0].tripipe, NULL, JOB_TYPE_VERTEX, &ctx->vs->variants[0]);
1868                        ctx->vs->variants[0].compiled = true;
1869                }
1870        }
1871
1872        ctx->dirty |= PAN_DIRTY_VS;
1873}
1874
1875static void
1876panfrost_set_vertex_buffers(
1877        struct pipe_context *pctx,
1878        unsigned start_slot,
1879        unsigned num_buffers,
1880        const struct pipe_vertex_buffer *buffers)
1881{
1882        struct panfrost_context *ctx = pan_context(pctx);
1883
1884        util_set_vertex_buffers_mask(ctx->vertex_buffers, &ctx->vb_mask, buffers, start_slot, num_buffers);
1885}
1886
1887static void
1888panfrost_set_constant_buffer(
1889        struct pipe_context *pctx,
1890        enum pipe_shader_type shader, uint index,
1891        const struct pipe_constant_buffer *buf)
1892{
1893        struct panfrost_context *ctx = pan_context(pctx);
1894        struct panfrost_constant_buffer *pbuf = &ctx->constant_buffer[shader];
1895
1896        size_t sz = buf ? buf->buffer_size : 0;
1897
1898        /* Free previous buffer */
1899
1900        pbuf->dirty = true;
1901        pbuf->size = sz;
1902
1903        if (pbuf->buffer) {
1904                free(pbuf->buffer);
1905                pbuf->buffer = NULL;
1906        }
1907
1908        /* If unbinding, we're done */
1909
1910        if (!buf)
1911                return;
1912
1913        /* Multiple constant buffers not yet supported */
1914        assert(index == 0);
1915
1916        const uint8_t *cpu;
1917
1918        struct panfrost_resource *rsrc = (struct panfrost_resource *) (buf->buffer);
1919
1920        if (rsrc) {
1921                cpu = rsrc->bo->cpu;
1922        } else if (buf->user_buffer) {
1923                cpu = buf->user_buffer;
1924        } else {
1925                DBG("No constant buffer?\n");
1926                return;
1927        }
1928
1929        /* Copy the constant buffer into the driver context for later upload */
1930
1931        pbuf->buffer = malloc(sz);
1932        memcpy(pbuf->buffer, cpu + buf->buffer_offset, sz);
1933}
1934
1935static void
1936panfrost_set_stencil_ref(
1937        struct pipe_context *pctx,
1938        const struct pipe_stencil_ref *ref)
1939{
1940        struct panfrost_context *ctx = pan_context(pctx);
1941        ctx->stencil_ref = *ref;
1942
1943        /* Shader core dirty */
1944        ctx->dirty |= PAN_DIRTY_FS;
1945}
1946
1947static struct pipe_sampler_view *
1948panfrost_create_sampler_view(
1949        struct pipe_context *pctx,
1950        struct pipe_resource *texture,
1951        const struct pipe_sampler_view *template)
1952{
1953        struct panfrost_sampler_view *so = CALLOC_STRUCT(panfrost_sampler_view);
1954        int bytes_per_pixel = util_format_get_blocksize(texture->format);
1955
1956        pipe_reference(NULL, &texture->reference);
1957
1958        struct panfrost_resource *prsrc = (struct panfrost_resource *) texture;
1959
1960        so->base = *template;
1961        so->base.texture = texture;
1962        so->base.reference.count = 1;
1963        so->base.context = pctx;
1964
1965        /* sampler_views correspond to texture descriptors, minus the texture
1966         * (data) itself. So, we serialise the descriptor here and cache it for
1967         * later. */
1968
1969        /* Make sure it's something with which we're familiar */
1970        assert(bytes_per_pixel >= 1 && bytes_per_pixel <= 4);
1971
1972        /* TODO: Detect from format better */
1973        const struct util_format_description *desc = util_format_description(prsrc->base.format);
1974
1975        unsigned char user_swizzle[4] = {
1976                template->swizzle_r,
1977                template->swizzle_g,
1978                template->swizzle_b,
1979                template->swizzle_a
1980        };
1981
1982        enum mali_format format = panfrost_find_format(desc);
1983
1984        bool is_depth = desc->format == PIPE_FORMAT_Z32_UNORM;
1985
1986        unsigned usage2_layout = 0x10;
1987
1988        switch (prsrc->bo->layout) {
1989                case PAN_AFBC:
1990                        usage2_layout |= 0x8 | 0x4;
1991                        break;
1992                case PAN_TILED:
1993                        usage2_layout |= 0x1;
1994                        break;
1995                case PAN_LINEAR:
1996                        usage2_layout |= is_depth ? 0x1 : 0x2;
1997                        break;
1998                default:
1999                        assert(0);
2000                        break;
2001        }
2002
2003        struct mali_texture_descriptor texture_descriptor = {
2004                .width = MALI_POSITIVE(texture->width0),
2005                .height = MALI_POSITIVE(texture->height0),
2006                .depth = MALI_POSITIVE(texture->depth0),
2007
2008                /* TODO: Decode */
2009                .format = {
2010                        .swizzle = panfrost_translate_swizzle_4(desc->swizzle),
2011                        .format = format,
2012
2013                        .usage1 = 0x0,
2014                        .is_not_cubemap = texture->target != PIPE_TEXTURE_CUBE,
2015
2016                        .usage2 = usage2_layout
2017                },
2018
2019                .swizzle = panfrost_translate_swizzle_4(user_swizzle)
2020        };
2021
2022        /* TODO: Other base levels require adjusting dimensions / level numbers / etc */
2023        assert (template->u.tex.first_level == 0);
2024
2025        /* Disable mipmapping for now to avoid regressions while automipmapping
2026         * is being implemented. TODO: Remove me once automipmaps work */
2027
2028        //texture_descriptor.nr_mipmap_levels = template->u.tex.last_level - template->u.tex.first_level;
2029        texture_descriptor.nr_mipmap_levels = 0;
2030
2031        so->hw = texture_descriptor;
2032
2033        return (struct pipe_sampler_view *) so;
2034}
2035
2036static void
2037panfrost_set_sampler_views(
2038        struct pipe_context *pctx,
2039        enum pipe_shader_type shader,
2040        unsigned start_slot, unsigned num_views,
2041        struct pipe_sampler_view **views)
2042{
2043        struct panfrost_context *ctx = pan_context(pctx);
2044
2045        assert(start_slot == 0);
2046
2047        ctx->sampler_view_count[shader] = num_views;
2048        memcpy(ctx->sampler_views[shader], views, num_views * sizeof (void *));
2049
2050        ctx->dirty |= PAN_DIRTY_TEXTURES;
2051}
2052
2053static void
2054panfrost_sampler_view_destroy(
2055        struct pipe_context *pctx,
2056        struct pipe_sampler_view *views)
2057{
2058        //struct panfrost_context *ctx = pan_context(pctx);
2059
2060        /* TODO */
2061
2062        free(views);
2063}
2064
2065static void
2066panfrost_set_framebuffer_state(struct pipe_context *pctx,
2067                               const struct pipe_framebuffer_state *fb)
2068{
2069        struct panfrost_context *ctx = pan_context(pctx);
2070
2071        /* Flush when switching away from an FBO */
2072
2073        if (!panfrost_is_scanout(ctx)) {
2074                panfrost_flush(pctx, NULL, 0);
2075        }
2076
2077        ctx->pipe_framebuffer.nr_cbufs = fb->nr_cbufs;
2078        ctx->pipe_framebuffer.samples = fb->samples;
2079        ctx->pipe_framebuffer.layers = fb->layers;
2080        ctx->pipe_framebuffer.width = fb->width;
2081        ctx->pipe_framebuffer.height = fb->height;
2082
2083        for (int i = 0; i < PIPE_MAX_COLOR_BUFS; i++) {
2084                struct pipe_surface *cb = i < fb->nr_cbufs ? fb->cbufs[i] : NULL;
2085
2086                /* check if changing cbuf */
2087                if (ctx->pipe_framebuffer.cbufs[i] == cb) continue;
2088
2089                if (cb && (i != 0)) {
2090                        DBG("XXX: Multiple render targets not supported before t7xx!\n");
2091                        assert(0);
2092                }
2093
2094                /* assign new */
2095                pipe_surface_reference(&ctx->pipe_framebuffer.cbufs[i], cb);
2096
2097                if (!cb)
2098                        continue;
2099
2100                if (ctx->require_sfbd)
2101                        ctx->vt_framebuffer_sfbd = panfrost_emit_sfbd(ctx);
2102                else
2103                        ctx->vt_framebuffer_mfbd = panfrost_emit_mfbd(ctx);
2104
2105                panfrost_attach_vt_framebuffer(ctx);
2106
2107                struct panfrost_resource *tex = ((struct panfrost_resource *) ctx->pipe_framebuffer.cbufs[i]->texture);
2108                enum pipe_format format = ctx->pipe_framebuffer.cbufs[i]->format;
2109                bool is_scanout = panfrost_is_scanout(ctx);
2110
2111                if (!is_scanout && tex->bo->layout != PAN_AFBC && panfrost_can_afbc(format)) {
2112                        /* The blob is aggressive about enabling AFBC. As such,
2113                         * it's pretty much necessary to use it here, since we
2114                         * have no traces of non-compressed FBO. */
2115
2116                        panfrost_enable_afbc(ctx, tex, false);
2117                }
2118
2119                if (!is_scanout && !tex->bo->has_checksum) {
2120                        /* Enable transaction elimination if we can */
2121                        panfrost_enable_checksum(ctx, tex);
2122                }
2123        }
2124
2125        {
2126                struct pipe_surface *zb = fb->zsbuf;
2127
2128                if (ctx->pipe_framebuffer.zsbuf != zb) {
2129                        pipe_surface_reference(&ctx->pipe_framebuffer.zsbuf, zb);
2130
2131                        if (zb) {
2132                                /* FBO has depth */
2133
2134                                if (ctx->require_sfbd)
2135                                        ctx->vt_framebuffer_sfbd = panfrost_emit_sfbd(ctx);
2136                                else
2137                                        ctx->vt_framebuffer_mfbd = panfrost_emit_mfbd(ctx);
2138
2139                                panfrost_attach_vt_framebuffer(ctx);
2140
2141                                /* Keep the depth FBO linear */
2142                        }
2143                }
2144        }
2145}
2146
2147static void *
2148panfrost_create_blend_state(struct pipe_context *pipe,
2149                            const struct pipe_blend_state *blend)
2150{
2151        struct panfrost_context *ctx = pan_context(pipe);
2152        struct panfrost_blend_state *so = CALLOC_STRUCT(panfrost_blend_state);
2153        so->base = *blend;
2154
2155        /* TODO: The following features are not yet implemented */
2156        assert(!blend->logicop_enable);
2157        assert(!blend->alpha_to_coverage);
2158        assert(!blend->alpha_to_one);
2159
2160        /* Compile the blend state, first as fixed-function if we can */
2161
2162        if (panfrost_make_fixed_blend_mode(&blend->rt[0], &so->equation, blend->rt[0].colormask, &ctx->blend_color))
2163                return so;
2164
2165        /* If we can't, compile a blend shader instead */
2166
2167        panfrost_make_blend_shader(ctx, so, &ctx->blend_color);
2168
2169        return so;
2170}
2171
2172static void
2173panfrost_bind_blend_state(struct pipe_context *pipe,
2174                          void *cso)
2175{
2176        struct panfrost_context *ctx = pan_context(pipe);
2177        struct pipe_blend_state *blend = (struct pipe_blend_state *) cso;
2178        struct panfrost_blend_state *pblend = (struct panfrost_blend_state *) cso;
2179        ctx->blend = pblend;
2180
2181        if (!blend)
2182                return;
2183
2184        SET_BIT(ctx->fragment_shader_core.unknown2_4, MALI_NO_DITHER, !blend->dither);
2185
2186        /* TODO: Attach color */
2187
2188        /* Shader itself is not dirty, but the shader core is */
2189        ctx->dirty |= PAN_DIRTY_FS;
2190}
2191
2192static void
2193panfrost_delete_blend_state(struct pipe_context *pipe,
2194                            void *blend)
2195{
2196        struct panfrost_blend_state *so = (struct panfrost_blend_state *) blend;
2197
2198        if (so->has_blend_shader) {
2199                DBG("Deleting blend state leak blend shaders bytecode\n");
2200        }
2201
2202        free(blend);
2203}
2204
2205static void
2206panfrost_set_blend_color(struct pipe_context *pipe,
2207                         const struct pipe_blend_color *blend_color)
2208{
2209        struct panfrost_context *ctx = pan_context(pipe);
2210
2211        /* If blend_color is we're unbinding, so ctx->blend_color is now undefined -> nothing to do */
2212
2213        if (blend_color) {
2214                ctx->blend_color = *blend_color;
2215
2216                /* The blend mode depends on the blend constant color, due to the
2217                 * fixed/programmable split. So, we're forced to regenerate the blend
2218                 * equation */
2219
2220                /* TODO: Attach color */
2221        }
2222}
2223
2224static void *
2225panfrost_create_depth_stencil_state(struct pipe_context *pipe,
2226                                    const struct pipe_depth_stencil_alpha_state *depth_stencil)
2227{
2228        return mem_dup(depth_stencil, sizeof(*depth_stencil));
2229}
2230
2231static void
2232panfrost_bind_depth_stencil_state(struct pipe_context *pipe,
2233                                  void *cso)
2234{
2235        struct panfrost_context *ctx = pan_context(pipe);
2236        struct pipe_depth_stencil_alpha_state *depth_stencil = cso;
2237        ctx->depth_stencil = depth_stencil;
2238
2239        if (!depth_stencil)
2240                return;
2241
2242        /* Alpha does not exist in the hardware (it's not in ES3), so it's
2243         * emulated in the fragment shader */
2244
2245        if (depth_stencil->alpha.enabled) {
2246                /* We need to trigger a new shader (maybe) */
2247                ctx->base.bind_fs_state(&ctx->base, ctx->fs);
2248        }
2249
2250        /* Stencil state */
2251        SET_BIT(ctx->fragment_shader_core.unknown2_4, MALI_STENCIL_TEST, depth_stencil->stencil[0].enabled); /* XXX: which one? */
2252
2253        panfrost_make_stencil_state(&depth_stencil->stencil[0], &ctx->fragment_shader_core.stencil_front);
2254        ctx->fragment_shader_core.stencil_mask_front = depth_stencil->stencil[0].writemask;
2255
2256        panfrost_make_stencil_state(&depth_stencil->stencil[1], &ctx->fragment_shader_core.stencil_back);
2257        ctx->fragment_shader_core.stencil_mask_back = depth_stencil->stencil[1].writemask;
2258
2259        /* Depth state (TODO: Refactor) */
2260        SET_BIT(ctx->fragment_shader_core.unknown2_3, MALI_DEPTH_TEST, depth_stencil->depth.enabled);
2261
2262        int func = depth_stencil->depth.enabled ? depth_stencil->depth.func : PIPE_FUNC_ALWAYS;
2263
2264        ctx->fragment_shader_core.unknown2_3 &= ~MALI_DEPTH_FUNC_MASK;
2265        ctx->fragment_shader_core.unknown2_3 |= MALI_DEPTH_FUNC(panfrost_translate_compare_func(func));
2266
2267        /* Bounds test not implemented */
2268        assert(!depth_stencil->depth.bounds_test);
2269
2270        ctx->dirty |= PAN_DIRTY_FS;
2271}
2272
2273static void
2274panfrost_delete_depth_stencil_state(struct pipe_context *pipe, void *depth)
2275{
2276        free( depth );
2277}
2278
2279static void
2280panfrost_set_sample_mask(struct pipe_context *pipe,
2281                         unsigned sample_mask)
2282{
2283}
2284
2285static void
2286panfrost_set_clip_state(struct pipe_context *pipe,
2287                        const struct pipe_clip_state *clip)
2288{
2289        //struct panfrost_context *panfrost = pan_context(pipe);
2290}
2291
2292static void
2293panfrost_set_viewport_states(struct pipe_context *pipe,
2294                             unsigned start_slot,
2295                             unsigned num_viewports,
2296                             const struct pipe_viewport_state *viewports)
2297{
2298        struct panfrost_context *ctx = pan_context(pipe);
2299
2300        assert(start_slot == 0);
2301        assert(num_viewports == 1);
2302
2303        ctx->pipe_viewport = *viewports;
2304
2305#if 0
2306        /* TODO: What if not centered? */
2307        float w = abs(viewports->scale[0]) * 2.0;
2308        float h = abs(viewports->scale[1]) * 2.0;
2309
2310        ctx->viewport.viewport1[0] = MALI_POSITIVE((int) w);
2311        ctx->viewport.viewport1[1] = MALI_POSITIVE((int) h);
2312#endif
2313}
2314
2315static void
2316panfrost_set_scissor_states(struct pipe_context *pipe,
2317                            unsigned start_slot,
2318                            unsigned num_scissors,
2319                            const struct pipe_scissor_state *scissors)
2320{
2321        struct panfrost_context *ctx = pan_context(pipe);
2322
2323        assert(start_slot == 0);
2324        assert(num_scissors == 1);
2325
2326        ctx->scissor = *scissors;
2327}
2328
2329static void
2330panfrost_set_polygon_stipple(struct pipe_context *pipe,
2331                             const struct pipe_poly_stipple *stipple)
2332{
2333        //struct panfrost_context *panfrost = pan_context(pipe);
2334}
2335
2336static void
2337panfrost_set_active_query_state(struct pipe_context *pipe,
2338                                boolean enable)
2339{
2340        //struct panfrost_context *panfrost = pan_context(pipe);
2341}
2342
2343static void
2344panfrost_destroy(struct pipe_context *pipe)
2345{
2346        struct panfrost_context *panfrost = pan_context(pipe);
2347        struct panfrost_screen *screen = pan_screen(pipe->screen);
2348
2349        if (panfrost->blitter)
2350                util_blitter_destroy(panfrost->blitter);
2351
2352        screen->driver->free_slab(screen, &panfrost->scratchpad);
2353        screen->driver->free_slab(screen, &panfrost->varying_mem);
2354        screen->driver->free_slab(screen, &panfrost->shaders);
2355        screen->driver->free_slab(screen, &panfrost->tiler_heap);
2356        screen->driver->free_slab(screen, &panfrost->misc_0);
2357}
2358
2359static struct pipe_query *
2360panfrost_create_query(struct pipe_context *pipe,
2361		      unsigned type,
2362		      unsigned index)
2363{
2364        struct panfrost_query *q = CALLOC_STRUCT(panfrost_query);
2365
2366        q->type = type;
2367        q->index = index;
2368
2369        return (struct pipe_query *) q;
2370}
2371
2372static void
2373panfrost_destroy_query(struct pipe_context *pipe, struct pipe_query *q)
2374{
2375        FREE(q);
2376}
2377
2378static boolean
2379panfrost_begin_query(struct pipe_context *pipe, struct pipe_query *q)
2380{
2381        struct panfrost_context *ctx = pan_context(pipe);
2382        struct panfrost_query *query = (struct panfrost_query *) q;
2383
2384        switch (query->type) {
2385                case PIPE_QUERY_OCCLUSION_COUNTER:
2386                case PIPE_QUERY_OCCLUSION_PREDICATE:
2387                case PIPE_QUERY_OCCLUSION_PREDICATE_CONSERVATIVE:
2388                {
2389                        /* Allocate a word for the query results to be stored */
2390                        query->transfer = panfrost_allocate_chunk(ctx, sizeof(unsigned), HEAP_DESCRIPTOR);
2391
2392                        ctx->occlusion_query = query;
2393
2394                        break;
2395                }
2396
2397                default:
2398                        DBG("Skipping query %d\n", query->type);
2399                        break;
2400        }
2401
2402        return true;
2403}
2404
2405static bool
2406panfrost_end_query(struct pipe_context *pipe, struct pipe_query *q)
2407{
2408        struct panfrost_context *ctx = pan_context(pipe);
2409        ctx->occlusion_query = NULL;
2410        return true;
2411}
2412
2413static boolean
2414panfrost_get_query_result(struct pipe_context *pipe,
2415                          struct pipe_query *q,
2416                          boolean wait,
2417                          union pipe_query_result *vresult)
2418{
2419        /* STUB */
2420        struct panfrost_query *query = (struct panfrost_query *) q;
2421
2422        /* We need to flush out the jobs to actually run the counter, TODO
2423         * check wait, TODO wallpaper after if needed */
2424
2425        panfrost_flush(pipe, NULL, PIPE_FLUSH_END_OF_FRAME);
2426
2427        switch (query->type) {
2428                case PIPE_QUERY_OCCLUSION_COUNTER:
2429                case PIPE_QUERY_OCCLUSION_PREDICATE:
2430                case PIPE_QUERY_OCCLUSION_PREDICATE_CONSERVATIVE: {
2431                        /* Read back the query results */
2432                        unsigned *result = (unsigned *) query->transfer.cpu;
2433                        unsigned passed = *result;
2434
2435                        if (query->type == PIPE_QUERY_OCCLUSION_COUNTER) {
2436                                vresult->u64 = passed;
2437                        } else {
2438                                vresult->b = !!passed;
2439                        }
2440
2441                        break;
2442                }
2443                default:
2444                        DBG("Skipped query get %d\n", query->type);
2445                        break;
2446        }
2447
2448        return true;
2449}
2450
2451static struct pipe_stream_output_target *
2452panfrost_create_stream_output_target(struct pipe_context *pctx,
2453                                struct pipe_resource *prsc,
2454                                unsigned buffer_offset,
2455                                unsigned buffer_size)
2456{
2457        struct pipe_stream_output_target *target;
2458
2459        target = CALLOC_STRUCT(pipe_stream_output_target);
2460
2461        if (!target)
2462                return NULL;
2463
2464        pipe_reference_init(&target->reference, 1);
2465        pipe_resource_reference(&target->buffer, prsc);
2466
2467        target->context = pctx;
2468        target->buffer_offset = buffer_offset;
2469        target->buffer_size = buffer_size;
2470
2471        return target;
2472}
2473
2474static void
2475panfrost_stream_output_target_destroy(struct pipe_context *pctx,
2476                                 struct pipe_stream_output_target *target)
2477{
2478        pipe_resource_reference(&target->buffer, NULL);
2479        free(target);
2480}
2481
2482static void
2483panfrost_set_stream_output_targets(struct pipe_context *pctx,
2484                              unsigned num_targets,
2485                              struct pipe_stream_output_target **targets,
2486                              const unsigned *offsets)
2487{
2488        /* STUB */
2489}
2490
2491static void
2492panfrost_setup_hardware(struct panfrost_context *ctx)
2493{
2494        struct pipe_context *gallium = (struct pipe_context *) ctx;
2495        struct panfrost_screen *screen = pan_screen(gallium->screen);
2496
2497        for (int i = 0; i < ARRAY_SIZE(ctx->transient_pools); ++i) {
2498                /* Allocate the beginning of the transient pool */
2499                int entry_size = (1 << 22); /* 4MB */
2500
2501                ctx->transient_pools[i].entry_size = entry_size;
2502                ctx->transient_pools[i].entry_count = 1;
2503
2504                ctx->transient_pools[i].entries[0] = (struct panfrost_memory_entry *) pb_slab_alloc(&screen->slabs, entry_size, HEAP_TRANSIENT);
2505        }
2506
2507        screen->driver->allocate_slab(screen, &ctx->scratchpad, 64, false, 0, 0, 0);
2508        screen->driver->allocate_slab(screen, &ctx->varying_mem, 16384, false, PAN_ALLOCATE_INVISIBLE | PAN_ALLOCATE_COHERENT_LOCAL, 0, 0);
2509        screen->driver->allocate_slab(screen, &ctx->shaders, 4096, true, PAN_ALLOCATE_EXECUTE, 0, 0);
2510        screen->driver->allocate_slab(screen, &ctx->tiler_heap, 32768, false, PAN_ALLOCATE_INVISIBLE | PAN_ALLOCATE_GROWABLE, 1, 128);
2511        screen->driver->allocate_slab(screen, &ctx->misc_0, 128*128, false, PAN_ALLOCATE_INVISIBLE | PAN_ALLOCATE_GROWABLE, 1, 128);
2512
2513}
2514
2515/* New context creation, which also does hardware initialisation since I don't
2516 * know the better way to structure this :smirk: */
2517
2518struct pipe_context *
2519panfrost_create_context(struct pipe_screen *screen, void *priv, unsigned flags)
2520{
2521        struct panfrost_context *ctx = CALLOC_STRUCT(panfrost_context);
2522        struct panfrost_screen *pscreen = pan_screen(screen);
2523        memset(ctx, 0, sizeof(*ctx));
2524        struct pipe_context *gallium = (struct pipe_context *) ctx;
2525        unsigned gpu_id;
2526
2527        gpu_id = pscreen->driver->query_gpu_version(pscreen);
2528
2529        ctx->is_t6xx = gpu_id <= 0x0750; /* For now, this flag means T760 or less */
2530        ctx->require_sfbd = gpu_id < 0x0750; /* T760 is the first to support MFBD */
2531
2532        gallium->screen = screen;
2533
2534        gallium->destroy = panfrost_destroy;
2535
2536        gallium->set_framebuffer_state = panfrost_set_framebuffer_state;
2537
2538        gallium->flush = panfrost_flush;
2539        gallium->clear = panfrost_clear;
2540        gallium->draw_vbo = panfrost_draw_vbo;
2541
2542        gallium->set_vertex_buffers = panfrost_set_vertex_buffers;
2543        gallium->set_constant_buffer = panfrost_set_constant_buffer;
2544
2545        gallium->set_stencil_ref = panfrost_set_stencil_ref;
2546
2547        gallium->create_sampler_view = panfrost_create_sampler_view;
2548        gallium->set_sampler_views = panfrost_set_sampler_views;
2549        gallium->sampler_view_destroy = panfrost_sampler_view_destroy;
2550
2551        gallium->create_rasterizer_state = panfrost_create_rasterizer_state;
2552        gallium->bind_rasterizer_state = panfrost_bind_rasterizer_state;
2553        gallium->delete_rasterizer_state = panfrost_generic_cso_delete;
2554
2555        gallium->create_vertex_elements_state = panfrost_create_vertex_elements_state;
2556        gallium->bind_vertex_elements_state = panfrost_bind_vertex_elements_state;
2557        gallium->delete_vertex_elements_state = panfrost_delete_vertex_elements_state;
2558
2559        gallium->create_fs_state = panfrost_create_shader_state;
2560        gallium->delete_fs_state = panfrost_delete_shader_state;
2561        gallium->bind_fs_state = panfrost_bind_fs_state;
2562
2563        gallium->create_vs_state = panfrost_create_shader_state;
2564        gallium->delete_vs_state = panfrost_delete_shader_state;
2565        gallium->bind_vs_state = panfrost_bind_vs_state;
2566
2567        gallium->create_sampler_state = panfrost_create_sampler_state;
2568        gallium->delete_sampler_state = panfrost_generic_cso_delete;
2569        gallium->bind_sampler_states = panfrost_bind_sampler_states;
2570
2571        gallium->create_blend_state = panfrost_create_blend_state;
2572        gallium->bind_blend_state   = panfrost_bind_blend_state;
2573        gallium->delete_blend_state = panfrost_delete_blend_state;
2574
2575        gallium->set_blend_color = panfrost_set_blend_color;
2576
2577        gallium->create_depth_stencil_alpha_state = panfrost_create_depth_stencil_state;
2578        gallium->bind_depth_stencil_alpha_state   = panfrost_bind_depth_stencil_state;
2579        gallium->delete_depth_stencil_alpha_state = panfrost_delete_depth_stencil_state;
2580
2581        gallium->set_sample_mask = panfrost_set_sample_mask;
2582
2583        gallium->set_clip_state = panfrost_set_clip_state;
2584        gallium->set_viewport_states = panfrost_set_viewport_states;
2585        gallium->set_scissor_states = panfrost_set_scissor_states;
2586        gallium->set_polygon_stipple = panfrost_set_polygon_stipple;
2587        gallium->set_active_query_state = panfrost_set_active_query_state;
2588
2589        gallium->create_query = panfrost_create_query;
2590        gallium->destroy_query = panfrost_destroy_query;
2591        gallium->begin_query = panfrost_begin_query;
2592        gallium->end_query = panfrost_end_query;
2593        gallium->get_query_result = panfrost_get_query_result;
2594
2595        gallium->create_stream_output_target = panfrost_create_stream_output_target;
2596        gallium->stream_output_target_destroy = panfrost_stream_output_target_destroy;
2597        gallium->set_stream_output_targets = panfrost_set_stream_output_targets;
2598
2599        panfrost_resource_context_init(gallium);
2600
2601        pscreen->driver->init_context(ctx);
2602
2603        panfrost_setup_hardware(ctx);
2604
2605        /* XXX: leaks */
2606        gallium->stream_uploader = u_upload_create_default(gallium);
2607        gallium->const_uploader = gallium->stream_uploader;
2608        assert(gallium->stream_uploader);
2609
2610        /* Midgard supports ES modes, plus QUADS/QUAD_STRIPS/POLYGON */
2611        ctx->draw_modes = (1 << (PIPE_PRIM_POLYGON + 1)) - 1;
2612
2613        ctx->primconvert = util_primconvert_create(gallium, ctx->draw_modes);
2614
2615        ctx->blitter = util_blitter_create(gallium);
2616        assert(ctx->blitter);
2617
2618        /* Prepare for render! */
2619
2620        panfrost_job_init(ctx);
2621        panfrost_emit_vertex_payload(ctx);
2622        panfrost_emit_tiler_payload(ctx);
2623        panfrost_invalidate_frame(ctx);
2624        panfrost_default_shader_backend(ctx);
2625        panfrost_generate_space_filler_indices();
2626
2627        return gallium;
2628}
2629