pan_blitter.c revision 7ec681f3
1/*
2 * Copyright (C) 2020-2021 Collabora, Ltd.
3 *
4 * Permission is hereby granted, free of charge, to any person obtaining a
5 * copy of this software and associated documentation files (the "Software"),
6 * to deal in the Software without restriction, including without limitation
7 * the rights to use, copy, modify, merge, publish, distribute, sublicense,
8 * and/or sell copies of the Software, and to permit persons to whom the
9 * Software is furnished to do so, subject to the following conditions:
10 *
11 * The above copyright notice and this permission notice (including the next
12 * paragraph) shall be included in all copies or substantial portions of the
13 * Software.
14 *
15 * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
16 * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
17 * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT.  IN NO EVENT SHALL
18 * THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
19 * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
20 * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
21 * SOFTWARE.
22 *
23 * Authors:
24 *   Alyssa Rosenzweig <alyssa.rosenzweig@collabora.com>
25 *   Boris Brezillon <boris.brezillon@collabora.com>
26 */
27
28#include <math.h>
29#include <stdio.h>
30#include "pan_blend.h"
31#include "pan_blitter.h"
32#include "pan_cs.h"
33#include "pan_encoder.h"
34#include "pan_pool.h"
35#include "pan_shader.h"
36#include "pan_scoreboard.h"
37#include "pan_texture.h"
38#include "panfrost-quirks.h"
39#include "compiler/nir/nir_builder.h"
40#include "util/u_math.h"
41
42#if PAN_ARCH >= 6
43/* On Midgard, the native blit infrastructure (via MFBD preloads) is broken or
44 * missing in many cases. We instead use software paths as fallbacks to
45 * implement blits, which are done as TILER jobs. No vertex shader is
46 * necessary since we can supply screen-space coordinates directly.
47 *
48 * This is primarily designed as a fallback for preloads but could be extended
49 * for other clears/blits if needed in the future. */
50
51static enum mali_register_file_format
52blit_type_to_reg_fmt(nir_alu_type in)
53{
54        switch (in) {
55        case nir_type_float32:
56                return MALI_REGISTER_FILE_FORMAT_F32;
57        case nir_type_int32:
58                return MALI_REGISTER_FILE_FORMAT_I32;
59        case nir_type_uint32:
60                return MALI_REGISTER_FILE_FORMAT_U32;
61        default:
62                unreachable("Invalid blit type");
63        }
64}
65#endif
66
67struct pan_blit_surface {
68        gl_frag_result loc : 4;
69        nir_alu_type type : 8;
70        enum mali_texture_dimension dim : 2;
71        bool array : 1;
72        unsigned src_samples: 5;
73        unsigned dst_samples: 5;
74};
75
76struct pan_blit_shader_key {
77        struct pan_blit_surface surfaces[8];
78};
79
80struct pan_blit_shader_data {
81        struct pan_blit_shader_key key;
82        mali_ptr address;
83        unsigned blend_ret_offsets[8];
84        nir_alu_type blend_types[8];
85};
86
87struct pan_blit_blend_shader_key {
88        enum pipe_format format;
89        nir_alu_type type;
90        unsigned rt : 3;
91        unsigned nr_samples : 5;
92        unsigned pad : 24;
93};
94
95struct pan_blit_blend_shader_data {
96        struct pan_blit_blend_shader_key key;
97        mali_ptr address;
98};
99
100struct pan_blit_rsd_key {
101        struct {
102                enum pipe_format format;
103                nir_alu_type type : 8;
104                unsigned src_samples : 5;
105                unsigned dst_samples : 5;
106                enum mali_texture_dimension dim : 2;
107                bool array : 1;
108        } rts[8], z, s;
109};
110
111struct pan_blit_rsd_data {
112        struct pan_blit_rsd_key key;
113        mali_ptr address;
114};
115
116#if PAN_ARCH >= 5
117static void
118pan_blitter_emit_blend(const struct panfrost_device *dev,
119                       unsigned rt,
120                       const struct pan_image_view *iview,
121                       const struct pan_blit_shader_data *blit_shader,
122                       mali_ptr blend_shader,
123                       void *out)
124{
125        pan_pack(out, BLEND, cfg) {
126                if (!iview) {
127                        cfg.enable = false;
128#if PAN_ARCH >= 6
129                        cfg.internal.mode = MALI_BLEND_MODE_OFF;
130#endif
131                        continue;
132                }
133
134                cfg.round_to_fb_precision = true;
135                cfg.srgb = util_format_is_srgb(iview->format);
136
137#if PAN_ARCH >= 6
138                cfg.internal.mode = blend_shader ?
139                                    MALI_BLEND_MODE_SHADER :
140                                    MALI_BLEND_MODE_OPAQUE;
141#endif
142
143                if (!blend_shader) {
144                        cfg.equation.rgb.a = MALI_BLEND_OPERAND_A_SRC;
145                        cfg.equation.rgb.b = MALI_BLEND_OPERAND_B_SRC;
146                        cfg.equation.rgb.c = MALI_BLEND_OPERAND_C_ZERO;
147                        cfg.equation.alpha.a = MALI_BLEND_OPERAND_A_SRC;
148                        cfg.equation.alpha.b = MALI_BLEND_OPERAND_B_SRC;
149                        cfg.equation.alpha.c = MALI_BLEND_OPERAND_C_ZERO;
150                        cfg.equation.color_mask = 0xf;
151
152#if PAN_ARCH >= 6
153                        nir_alu_type type = blit_shader->key.surfaces[rt].type;
154
155                        cfg.internal.fixed_function.num_comps = 4;
156                        cfg.internal.fixed_function.conversion.memory_format =
157                                panfrost_format_to_bifrost_blend(dev, iview->format, false);
158                        cfg.internal.fixed_function.conversion.register_format =
159                                blit_type_to_reg_fmt(type);
160
161                        cfg.internal.fixed_function.rt = rt;
162#endif
163                } else {
164#if PAN_ARCH >= 6
165                        cfg.internal.shader.pc = blend_shader;
166                        if (blit_shader->blend_ret_offsets[rt]) {
167                                cfg.internal.shader.return_value =
168                                        blit_shader->address +
169                                        blit_shader->blend_ret_offsets[rt];
170                        }
171#else
172                        cfg.blend_shader = true;
173                        cfg.shader_pc = blend_shader;
174#endif
175                }
176        }
177}
178#endif
179
180static void
181pan_blitter_emit_rsd(const struct panfrost_device *dev,
182                     const struct pan_blit_shader_data *blit_shader,
183                     unsigned rt_count,
184                     const struct pan_image_view **rts,
185                     mali_ptr *blend_shaders,
186                     const struct pan_image_view *z,
187                     const struct pan_image_view *s,
188                     void *out)
189{
190        unsigned tex_count = 0;
191        bool zs = (z || s);
192        bool ms = false;
193
194        for (unsigned i = 0; i < rt_count; i++) {
195                if (rts[i]) {
196                        tex_count++;
197                        if (rts[i]->nr_samples > 1)
198                                ms = true;
199                }
200        }
201
202        if (z) {
203                if (z->image->layout.nr_samples > 1)
204                        ms = true;
205                tex_count++;
206        }
207
208        if (s) {
209                if (s->image->layout.nr_samples > 1)
210                        ms = true;
211                tex_count++;
212        }
213
214        pan_pack(out, RENDERER_STATE, cfg) {
215                assert(blit_shader->address);
216                cfg.shader.shader = blit_shader->address;
217                cfg.shader.varying_count = 1;
218                cfg.shader.texture_count = tex_count;
219                cfg.shader.sampler_count = 1;
220
221                cfg.properties.stencil_from_shader = s != NULL;
222                cfg.properties.depth_source =
223                        z ?
224                        MALI_DEPTH_SOURCE_SHADER :
225                        MALI_DEPTH_SOURCE_FIXED_FUNCTION;
226
227                cfg.multisample_misc.sample_mask = 0xFFFF;
228                cfg.multisample_misc.multisample_enable = ms;
229                cfg.multisample_misc.evaluate_per_sample = ms;
230                cfg.multisample_misc.depth_write_mask = z != NULL;
231                cfg.multisample_misc.depth_function = MALI_FUNC_ALWAYS;
232
233                cfg.stencil_mask_misc.stencil_enable = s != NULL;
234                cfg.stencil_mask_misc.stencil_mask_front = 0xFF;
235                cfg.stencil_mask_misc.stencil_mask_back = 0xFF;
236                cfg.stencil_front.compare_function = MALI_FUNC_ALWAYS;
237                cfg.stencil_front.stencil_fail = MALI_STENCIL_OP_REPLACE;
238                cfg.stencil_front.depth_fail = MALI_STENCIL_OP_REPLACE;
239                cfg.stencil_front.depth_pass = MALI_STENCIL_OP_REPLACE;
240                cfg.stencil_front.mask = 0xFF;
241                cfg.stencil_back = cfg.stencil_front;
242
243#if PAN_ARCH >= 6
244                if (zs) {
245                        cfg.properties.zs_update_operation =
246                                MALI_PIXEL_KILL_FORCE_LATE;
247                        cfg.properties.pixel_kill_operation =
248                                MALI_PIXEL_KILL_FORCE_LATE;
249                } else {
250                        cfg.properties.zs_update_operation =
251                                MALI_PIXEL_KILL_STRONG_EARLY;
252                        cfg.properties.pixel_kill_operation =
253                                MALI_PIXEL_KILL_FORCE_EARLY;
254                }
255
256                /* We can only allow blit shader fragments to kill if they write all
257                 * colour outputs. This is true for our colour (non-Z/S) blit shaders,
258                 * but obviously not true for Z/S shaders. However, blit shaders
259                 * otherwise lack side effects, so other fragments may kill them.
260                 * However, while shaders writing Z/S can normally be killed, on v6
261                 * for frame shaders it can cause GPU timeouts, so only allow colour
262                 * blit shaders to be killed. */
263
264                cfg.properties.allow_forward_pixel_to_kill = !zs;
265                cfg.properties.allow_forward_pixel_to_be_killed = (dev->arch >= 7) || !zs;
266
267                cfg.preload.fragment.coverage = true;
268                cfg.preload.fragment.sample_mask_id = ms;
269#else
270                mali_ptr blend_shader = blend_shaders ?
271                        panfrost_last_nonnull(blend_shaders, rt_count) : 0;
272
273                cfg.properties.work_register_count = 4;
274                cfg.properties.force_early_z = !zs;
275                cfg.stencil_mask_misc.alpha_test_compare_function = MALI_FUNC_ALWAYS;
276
277                /* Set even on v5 for erratum workaround */
278#if PAN_ARCH == 5
279                cfg.legacy_blend_shader = blend_shader;
280#else
281                cfg.blend_shader = blend_shader;
282                cfg.stencil_mask_misc.write_enable = true;
283                cfg.stencil_mask_misc.dither_disable = true;
284                cfg.multisample_misc.blend_shader = !!blend_shader;
285                cfg.blend_shader = blend_shader;
286                if (!cfg.multisample_misc.blend_shader) {
287                        cfg.blend_equation.rgb.a = MALI_BLEND_OPERAND_A_SRC;
288                        cfg.blend_equation.rgb.b = MALI_BLEND_OPERAND_B_SRC;
289                        cfg.blend_equation.rgb.c = MALI_BLEND_OPERAND_C_ZERO;
290                        cfg.blend_equation.alpha.a = MALI_BLEND_OPERAND_A_SRC;
291                        cfg.blend_equation.alpha.b = MALI_BLEND_OPERAND_B_SRC;
292                        cfg.blend_equation.alpha.c = MALI_BLEND_OPERAND_C_ZERO;
293                        cfg.blend_constant = 0;
294
295                        if (rts && rts[0]) {
296                                cfg.stencil_mask_misc.srgb =
297                                        util_format_is_srgb(rts[0]->format);
298                                cfg.blend_equation.color_mask = 0xf;
299                        }
300               }
301#endif
302#endif
303        }
304
305#if PAN_ARCH >= 5
306        for (unsigned i = 0; i < MAX2(rt_count, 1); ++i) {
307                void *dest = out + pan_size(RENDERER_STATE) + pan_size(BLEND) * i;
308                const struct pan_image_view *rt_view = rts ? rts[i] : NULL;
309                mali_ptr blend_shader = blend_shaders ? blend_shaders[i] : 0;
310
311                pan_blitter_emit_blend(dev, i, rt_view, blit_shader,
312                                       blend_shader, dest);
313        }
314#endif
315}
316
317static void
318pan_blitter_get_blend_shaders(struct panfrost_device *dev,
319                              unsigned rt_count,
320                              const struct pan_image_view **rts,
321                              const struct pan_blit_shader_data *blit_shader,
322                              mali_ptr *blend_shaders)
323{
324        if (!rt_count)
325                return;
326
327        struct pan_blend_state blend_state = {
328                .rt_count = rt_count,
329        };
330
331        for (unsigned i = 0; i < rt_count; i++) {
332                if (!rts[i] || panfrost_blendable_formats_v7[rts[i]->format].internal)
333                        continue;
334
335                struct pan_blit_blend_shader_key key = {
336                        .format = rts[i]->format,
337                        .rt = i,
338                        .nr_samples = rts[i]->image->layout.nr_samples,
339                        .type = blit_shader->blend_types[i],
340                };
341
342                pthread_mutex_lock(&dev->blitter.shaders.lock);
343                struct hash_entry *he =
344                        _mesa_hash_table_search(dev->blitter.shaders.blend, &key);
345                struct pan_blit_blend_shader_data *blend_shader = he ? he->data : NULL;
346                if (blend_shader) {
347                         blend_shaders[i] = blend_shader->address;
348                         pthread_mutex_unlock(&dev->blitter.shaders.lock);
349                         continue;
350                }
351
352                blend_shader = rzalloc(dev->blitter.shaders.blend,
353                                       struct pan_blit_blend_shader_data);
354                blend_shader->key = key;
355
356                blend_state.rts[i] = (struct pan_blend_rt_state) {
357                        .format = rts[i]->format,
358                        .nr_samples = rts[i]->image->layout.nr_samples,
359                        .equation = {
360                                .blend_enable = true,
361                                .rgb_src_factor = BLEND_FACTOR_ZERO,
362                                .rgb_invert_src_factor = true,
363                                .rgb_dst_factor = BLEND_FACTOR_ZERO,
364                                .rgb_func = BLEND_FUNC_ADD,
365                                .alpha_src_factor = BLEND_FACTOR_ZERO,
366                                .alpha_invert_src_factor = true,
367                                .alpha_dst_factor = BLEND_FACTOR_ZERO,
368                                .alpha_func = BLEND_FUNC_ADD,
369                                .color_mask = 0xf,
370                        },
371                };
372
373                pthread_mutex_lock(&dev->blend_shaders.lock);
374                struct pan_blend_shader_variant *b =
375                        GENX(pan_blend_get_shader_locked)(dev, &blend_state,
376                                                          blit_shader->blend_types[i],
377                                                          nir_type_float32, /* unused */
378                                                          i);
379
380                ASSERTED unsigned full_threads =
381                        (dev->arch >= 7) ? 32 : ((dev->arch == 6) ? 64 : 4);
382                assert(b->work_reg_count <= full_threads);
383                struct panfrost_ptr bin =
384                        pan_pool_alloc_aligned(dev->blitter.shaders.pool,
385                                               b->binary.size,
386                                               PAN_ARCH >= 6 ? 128 : 64);
387                memcpy(bin.cpu, b->binary.data, b->binary.size);
388
389                blend_shader->address = bin.gpu | b->first_tag;
390                pthread_mutex_unlock(&dev->blend_shaders.lock);
391                _mesa_hash_table_insert(dev->blitter.shaders.blend,
392                                        &blend_shader->key, blend_shader);
393                pthread_mutex_unlock(&dev->blitter.shaders.lock);
394                blend_shaders[i] = blend_shader->address;
395        }
396}
397
398static const struct pan_blit_shader_data *
399pan_blitter_get_blit_shader(struct panfrost_device *dev,
400                            const struct pan_blit_shader_key *key)
401{
402        pthread_mutex_lock(&dev->blitter.shaders.lock);
403        struct hash_entry *he = _mesa_hash_table_search(dev->blitter.shaders.blit, key);
404        struct pan_blit_shader_data *shader = he ? he->data : NULL;
405
406        if (shader)
407                goto out;
408
409        unsigned coord_comps = 0;
410        unsigned sig_offset = 0;
411        char sig[256];
412        bool first = true;
413        for (unsigned i = 0; i < ARRAY_SIZE(key->surfaces); i++) {
414                const char *type_str, *dim_str;
415                if (key->surfaces[i].type == nir_type_invalid)
416                        continue;
417
418                switch (key->surfaces[i].type) {
419                case nir_type_float32: type_str = "float"; break;
420                case nir_type_uint32: type_str = "uint"; break;
421                case nir_type_int32: type_str = "int"; break;
422                default: unreachable("Invalid type\n");
423                }
424
425                switch (key->surfaces[i].dim) {
426                case MALI_TEXTURE_DIMENSION_CUBE: dim_str = "cube"; break;
427                case MALI_TEXTURE_DIMENSION_1D: dim_str = "1D"; break;
428                case MALI_TEXTURE_DIMENSION_2D: dim_str = "2D"; break;
429                case MALI_TEXTURE_DIMENSION_3D: dim_str = "3D"; break;
430                default: unreachable("Invalid dim\n");
431                }
432
433                coord_comps = MAX2(coord_comps,
434                                   (key->surfaces[i].dim ? : 3) +
435                                   (key->surfaces[i].array ? 1 : 0));
436                first = false;
437
438                if (sig_offset >= sizeof(sig))
439                        continue;
440
441                sig_offset += snprintf(sig + sig_offset, sizeof(sig) - sig_offset,
442                                       "%s[%s;%s;%s%s;src_samples=%d,dst_samples=%d]",
443                                       first ? "" : ",",
444                                       gl_frag_result_name(key->surfaces[i].loc),
445                                       type_str, dim_str,
446                                       key->surfaces[i].array ? "[]" : "",
447                                       key->surfaces[i].src_samples,
448                                       key->surfaces[i].dst_samples);
449        }
450
451        nir_builder b =
452                nir_builder_init_simple_shader(MESA_SHADER_FRAGMENT,
453                                               GENX(pan_shader_get_compiler_options)(),
454                                               "pan_blit(%s)", sig);
455        b.shader->info.internal = true;
456
457        nir_variable *coord_var =
458                nir_variable_create(b.shader, nir_var_shader_in,
459                                    glsl_vector_type(GLSL_TYPE_FLOAT, coord_comps),
460                                    "coord");
461        coord_var->data.location = VARYING_SLOT_TEX0;
462
463        nir_ssa_def *coord = nir_load_var(&b, coord_var);
464
465        unsigned active_count = 0;
466        for (unsigned i = 0; i < ARRAY_SIZE(key->surfaces); i++) {
467                if (key->surfaces[i].type == nir_type_invalid)
468                        continue;
469
470                /* Resolve operations only work for N -> 1 samples. */
471                assert(key->surfaces[i].dst_samples == 1 ||
472                       key->surfaces[i].src_samples == key->surfaces[i].dst_samples);
473
474                static const char *out_names[] = {
475                        "out0", "out1", "out2", "out3", "out4", "out5", "out6", "out7",
476                };
477
478                unsigned ncomps = key->surfaces[i].loc >= FRAG_RESULT_DATA0 ? 4 : 1;
479                nir_variable *out =
480                        nir_variable_create(b.shader, nir_var_shader_out,
481                                            glsl_vector_type(GLSL_TYPE_FLOAT, ncomps),
482                                            out_names[active_count]);
483                out->data.location = key->surfaces[i].loc;
484                out->data.driver_location = active_count;
485
486                bool resolve = key->surfaces[i].src_samples > key->surfaces[i].dst_samples;
487                bool ms = key->surfaces[i].src_samples > 1;
488                enum glsl_sampler_dim sampler_dim;
489
490                switch (key->surfaces[i].dim) {
491                case MALI_TEXTURE_DIMENSION_1D:
492                        sampler_dim = GLSL_SAMPLER_DIM_1D;
493                        break;
494                case MALI_TEXTURE_DIMENSION_2D:
495                        sampler_dim = ms ?
496                                      GLSL_SAMPLER_DIM_MS :
497                                      GLSL_SAMPLER_DIM_2D;
498                        break;
499                case MALI_TEXTURE_DIMENSION_3D:
500                        sampler_dim = GLSL_SAMPLER_DIM_3D;
501                        break;
502                case MALI_TEXTURE_DIMENSION_CUBE:
503                        sampler_dim = GLSL_SAMPLER_DIM_CUBE;
504                        break;
505                }
506
507                nir_ssa_def *res = NULL;
508
509                if (resolve) {
510                        /* When resolving a float type, we need to calculate
511                         * the average of all samples. For integer resolve, GL
512                         * and Vulkan say that one sample should be chosen
513                         * without telling which. Let's just pick the first one
514                         * in that case.
515                         */
516                        nir_alu_type base_type =
517                                nir_alu_type_get_base_type(key->surfaces[i].type);
518                        unsigned nsamples = base_type == nir_type_float ?
519                                            key->surfaces[i].src_samples : 1;
520
521                        for (unsigned s = 0; s < nsamples; s++) {
522                                nir_tex_instr *tex = nir_tex_instr_create(b.shader, 3);
523
524                                tex->op = nir_texop_txf_ms;
525                                tex->dest_type = key->surfaces[i].type;
526                                tex->texture_index = active_count;
527                                tex->is_array = key->surfaces[i].array;
528                                tex->sampler_dim = sampler_dim;
529
530                                tex->src[0].src_type = nir_tex_src_coord;
531                                tex->src[0].src = nir_src_for_ssa(nir_f2i32(&b, coord));
532                                tex->coord_components = coord_comps;
533
534                                tex->src[1].src_type = nir_tex_src_ms_index;
535                                tex->src[1].src = nir_src_for_ssa(nir_imm_int(&b, s));
536
537                                tex->src[2].src_type = nir_tex_src_lod;
538                                tex->src[2].src = nir_src_for_ssa(nir_imm_int(&b, 0));
539                                nir_ssa_dest_init(&tex->instr, &tex->dest, 4, 32, NULL);
540                                nir_builder_instr_insert(&b, &tex->instr);
541
542                                res = res ? nir_fadd(&b, res, &tex->dest.ssa) : &tex->dest.ssa;
543			}
544
545                        if (base_type == nir_type_float) {
546                                unsigned type_sz =
547                                        nir_alu_type_get_type_size(key->surfaces[i].type);
548                                res = nir_fmul(&b, res,
549                                               nir_imm_floatN_t(&b, 1.0f / nsamples, type_sz));
550                        }
551                } else {
552                        nir_tex_instr *tex =
553                                nir_tex_instr_create(b.shader, ms ? 3 : 1);
554
555                        tex->dest_type = key->surfaces[i].type;
556                        tex->texture_index = active_count;
557                        tex->is_array = key->surfaces[i].array;
558                        tex->sampler_dim = sampler_dim;
559
560                        if (ms) {
561                                tex->op = nir_texop_txf_ms;
562
563                                tex->src[0].src_type = nir_tex_src_coord;
564                                tex->src[0].src = nir_src_for_ssa(nir_f2i32(&b, coord));
565                                tex->coord_components = coord_comps;
566
567                                tex->src[1].src_type = nir_tex_src_ms_index;
568                                tex->src[1].src = nir_src_for_ssa(nir_load_sample_id(&b));
569
570                                tex->src[2].src_type = nir_tex_src_lod;
571                                tex->src[2].src = nir_src_for_ssa(nir_imm_int(&b, 0));
572                        } else {
573                                tex->op = nir_texop_tex;
574
575                                tex->src[0].src_type = nir_tex_src_coord;
576                                tex->src[0].src = nir_src_for_ssa(coord);
577                                tex->coord_components = coord_comps;
578                        }
579
580                        nir_ssa_dest_init(&tex->instr, &tex->dest, 4, 32, NULL);
581                        nir_builder_instr_insert(&b, &tex->instr);
582                        res = &tex->dest.ssa;
583                }
584
585                assert(res);
586
587                if (key->surfaces[i].loc >= FRAG_RESULT_DATA0) {
588                        nir_store_var(&b, out, res, 0xFF);
589                } else {
590                        unsigned c = key->surfaces[i].loc == FRAG_RESULT_STENCIL ? 1 : 0;
591                        nir_store_var(&b, out, nir_channel(&b, res, c), 0xFF);
592                }
593                active_count++;
594        }
595
596        struct panfrost_compile_inputs inputs = {
597                .gpu_id = dev->gpu_id,
598                .is_blit = true,
599        };
600        struct util_dynarray binary;
601        struct pan_shader_info info;
602
603        util_dynarray_init(&binary, NULL);
604
605        GENX(pan_shader_compile)(b.shader, &inputs, &binary, &info);
606
607        shader = rzalloc(dev->blitter.shaders.blit,
608                         struct pan_blit_shader_data);
609        shader->key = *key;
610        shader->address =
611                pan_pool_upload_aligned(dev->blitter.shaders.pool,
612                                        binary.data, binary.size,
613                                        PAN_ARCH >= 6 ? 128 : 64);
614
615        util_dynarray_fini(&binary);
616        ralloc_free(b.shader);
617
618#if PAN_ARCH <= 5
619        shader->address |= info.midgard.first_tag;
620#else
621        for (unsigned i = 0; i < ARRAY_SIZE(shader->blend_ret_offsets); i++) {
622                shader->blend_ret_offsets[i] = info.bifrost.blend[i].return_offset;
623                shader->blend_types[i] = info.bifrost.blend[i].type;
624        }
625#endif
626
627        _mesa_hash_table_insert(dev->blitter.shaders.blit, &shader->key, shader);
628
629out:
630        pthread_mutex_unlock(&dev->blitter.shaders.lock);
631        return shader;
632}
633
634static mali_ptr
635pan_blitter_get_rsd(struct panfrost_device *dev,
636                    unsigned rt_count,
637                    const struct pan_image_view **src_rts,
638                    const struct pan_image_view **dst_rts,
639                    const struct pan_image_view *src_z,
640                    const struct pan_image_view *dst_z,
641                    const struct pan_image_view *src_s,
642                    const struct pan_image_view *dst_s)
643{
644        struct pan_blit_rsd_key rsd_key = { 0 };
645
646        assert(!rt_count || (!src_z && !src_s));
647
648        struct pan_blit_shader_key blit_key = { 0 };
649
650        if (src_z) {
651                assert(dst_z);
652                rsd_key.z.format = dst_z->format;
653                blit_key.surfaces[0].loc = FRAG_RESULT_DEPTH;
654                rsd_key.z.type = blit_key.surfaces[0].type = nir_type_float32;
655                rsd_key.z.src_samples = blit_key.surfaces[0].src_samples = src_z->image->layout.nr_samples;
656                rsd_key.z.dst_samples = blit_key.surfaces[0].dst_samples = dst_z->image->layout.nr_samples;
657                rsd_key.z.dim = blit_key.surfaces[0].dim = src_z->dim;
658                rsd_key.z.array = blit_key.surfaces[0].array = src_z->first_layer != src_z->last_layer;
659        }
660
661        if (src_s) {
662                assert(dst_s);
663                rsd_key.s.format = dst_s->format;
664                blit_key.surfaces[1].loc = FRAG_RESULT_STENCIL;
665                rsd_key.s.type = blit_key.surfaces[1].type = nir_type_uint32;
666                rsd_key.s.src_samples = blit_key.surfaces[1].src_samples = src_s->image->layout.nr_samples;
667                rsd_key.s.dst_samples = blit_key.surfaces[1].dst_samples = dst_s->image->layout.nr_samples;
668                rsd_key.s.dim = blit_key.surfaces[1].dim = src_s->dim;
669                rsd_key.s.array = blit_key.surfaces[1].array = src_s->first_layer != src_s->last_layer;
670        }
671
672        for (unsigned i = 0; i < rt_count; i++) {
673                if (!src_rts[i])
674                        continue;
675
676                assert(dst_rts[i]);
677                rsd_key.rts[i].format = dst_rts[i]->format;
678                blit_key.surfaces[i].loc = FRAG_RESULT_DATA0 + i;
679                rsd_key.rts[i].type = blit_key.surfaces[i].type =
680                        util_format_is_pure_uint(src_rts[i]->format) ? nir_type_uint32 :
681                        util_format_is_pure_sint(src_rts[i]->format) ? nir_type_int32 :
682                        nir_type_float32;
683                rsd_key.rts[i].src_samples = blit_key.surfaces[i].src_samples = src_rts[i]->image->layout.nr_samples;
684                rsd_key.rts[i].dst_samples = blit_key.surfaces[i].dst_samples = dst_rts[i]->image->layout.nr_samples;
685                rsd_key.rts[i].dim = blit_key.surfaces[i].dim = src_rts[i]->dim;
686                rsd_key.rts[i].array = blit_key.surfaces[i].array = src_rts[i]->first_layer != src_rts[i]->last_layer;
687        }
688
689        pthread_mutex_lock(&dev->blitter.rsds.lock);
690        struct hash_entry *he =
691                _mesa_hash_table_search(dev->blitter.rsds.rsds, &rsd_key);
692        struct pan_blit_rsd_data *rsd = he ? he->data : NULL;
693        if (rsd)
694                goto out;
695
696        rsd = rzalloc(dev->blitter.rsds.rsds, struct pan_blit_rsd_data);
697        rsd->key = rsd_key;
698
699        unsigned bd_count = PAN_ARCH >= 5 ? MAX2(rt_count, 1) : 0;
700        struct panfrost_ptr rsd_ptr =
701                pan_pool_alloc_desc_aggregate(dev->blitter.rsds.pool,
702                                              PAN_DESC(RENDERER_STATE),
703                                              PAN_DESC_ARRAY(bd_count, BLEND));
704
705        mali_ptr blend_shaders[8] = { 0 };
706
707        const struct pan_blit_shader_data *blit_shader =
708                pan_blitter_get_blit_shader(dev, &blit_key);
709
710        pan_blitter_get_blend_shaders(dev, rt_count, dst_rts,
711                                      blit_shader, blend_shaders);
712
713        pan_blitter_emit_rsd(dev, blit_shader,
714                             MAX2(rt_count, 1), dst_rts, blend_shaders,
715                             dst_z, dst_s, rsd_ptr.cpu);
716        rsd->address = rsd_ptr.gpu;
717        _mesa_hash_table_insert(dev->blitter.rsds.rsds, &rsd->key, rsd);
718
719out:
720        pthread_mutex_unlock(&dev->blitter.rsds.lock);
721        return rsd->address;
722}
723
724static mali_ptr
725pan_preload_get_rsd(struct panfrost_device *dev,
726                    const struct pan_fb_info *fb,
727                    bool zs)
728{
729        const struct pan_image_view *rts[8] = { NULL };
730        const struct pan_image_view *z = NULL, *s = NULL;
731        struct pan_image_view patched_s_view;
732        unsigned rt_count = 0;
733
734        if (zs) {
735                if (fb->zs.preload.z)
736                        z = fb->zs.view.zs;
737
738                if (fb->zs.preload.s) {
739                        const struct pan_image_view *view = fb->zs.view.s ? : fb->zs.view.zs;
740                        enum pipe_format fmt = util_format_get_depth_only(view->format);
741
742                        switch (view->format) {
743                        case PIPE_FORMAT_Z24_UNORM_S8_UINT: fmt = PIPE_FORMAT_X24S8_UINT; break;
744                        case PIPE_FORMAT_Z32_FLOAT_S8X24_UINT: fmt = PIPE_FORMAT_X32_S8X24_UINT; break;
745                        default: fmt = view->format; break;
746                        }
747
748                        if (fmt != view->format) {
749                                patched_s_view = *view;
750                                patched_s_view.format = fmt;
751                                s = &patched_s_view;
752                        } else {
753                                s = view;
754                        }
755                }
756        } else {
757                for (unsigned i = 0; i < fb->rt_count; i++) {
758                        if (fb->rts[i].preload)
759                                rts[i] = fb->rts[i].view;
760                }
761
762                rt_count = fb->rt_count;
763        }
764
765        return pan_blitter_get_rsd(dev, rt_count, rts, rts, z, z, s, s);
766}
767
768static mali_ptr
769pan_blit_get_rsd(struct panfrost_device *dev,
770                 const struct pan_image_view *src_views,
771                 const struct pan_image_view *dst_view)
772{
773        const struct util_format_description *desc =
774                util_format_description(src_views[0].format);
775        const struct pan_image_view *src_rt = NULL, *dst_rt = NULL;
776        const struct pan_image_view *src_z = NULL, *dst_z = NULL;
777        const struct pan_image_view *src_s = NULL, *dst_s = NULL;
778
779        if (util_format_has_depth(desc)) {
780                src_z = &src_views[0];
781                dst_z = dst_view;
782        }
783
784        if (src_views[1].format) {
785                src_s = &src_views[1];
786                dst_s = dst_view;
787        } else if (util_format_has_stencil(desc)) {
788                src_s = &src_views[0];
789                dst_s = dst_view;
790        }
791
792        if (!src_z && !src_s) {
793                src_rt = &src_views[0];
794                dst_rt = dst_view;
795        }
796
797        return pan_blitter_get_rsd(dev, src_rt ? 1 : 0, &src_rt, &dst_rt,
798                                   src_z, dst_z, src_s, dst_s);
799}
800
801static bool
802pan_preload_needed(const struct pan_fb_info *fb, bool zs)
803{
804        if (zs) {
805                if (fb->zs.preload.z || fb->zs.preload.s)
806                        return true;
807        } else {
808                for (unsigned i = 0; i < fb->rt_count; i++) {
809                        if (fb->rts[i].preload)
810                                return true;
811                }
812        }
813
814        return false;
815}
816
817static void
818pan_blitter_emit_varying(struct pan_pool *pool,
819                         mali_ptr coordinates,
820                         struct MALI_DRAW *draw)
821{
822        /* Bifrost needs an empty desc to mark end of prefetching */
823        bool padding_buffer = PAN_ARCH >= 6;
824
825        struct panfrost_ptr varying =
826                pan_pool_alloc_desc(pool, ATTRIBUTE);
827        struct panfrost_ptr varying_buffer =
828                pan_pool_alloc_desc_array(pool, (padding_buffer ? 2 : 1),
829                                          ATTRIBUTE_BUFFER);
830
831        pan_pack(varying_buffer.cpu, ATTRIBUTE_BUFFER, cfg) {
832                cfg.pointer = coordinates;
833                cfg.stride = 4 * sizeof(float);
834                cfg.size = cfg.stride * 4;
835        }
836
837        if (padding_buffer) {
838                pan_pack(varying_buffer.cpu + pan_size(ATTRIBUTE_BUFFER),
839                         ATTRIBUTE_BUFFER, cfg);
840        }
841
842        pan_pack(varying.cpu, ATTRIBUTE, cfg) {
843                cfg.buffer_index = 0;
844                cfg.offset_enable = PAN_ARCH <= 5;
845                cfg.format = pool->dev->formats[PIPE_FORMAT_R32G32B32_FLOAT].hw;
846        }
847
848        draw->varyings = varying.gpu;
849        draw->varying_buffers = varying_buffer.gpu;
850}
851
852static mali_ptr
853pan_blitter_emit_sampler(struct pan_pool *pool,
854                         bool nearest_filter)
855{
856        struct panfrost_ptr sampler =
857                 pan_pool_alloc_desc(pool, SAMPLER);
858
859        pan_pack(sampler.cpu, SAMPLER, cfg) {
860                cfg.seamless_cube_map = false;
861                cfg.normalized_coordinates = false;
862                cfg.minify_nearest = nearest_filter;
863                cfg.magnify_nearest = nearest_filter;
864        }
865
866        return sampler.gpu;
867}
868
869static mali_ptr
870pan_blitter_emit_textures(struct pan_pool *pool,
871                          unsigned tex_count,
872                          const struct pan_image_view **views)
873{
874#if PAN_ARCH >= 6
875        struct panfrost_ptr textures =
876                pan_pool_alloc_desc_array(pool, tex_count, TEXTURE);
877
878        for (unsigned i = 0; i < tex_count; i++) {
879                void *texture = textures.cpu + (pan_size(TEXTURE) * i);
880                size_t payload_size =
881                        GENX(panfrost_estimate_texture_payload_size)(views[i]);
882                struct panfrost_ptr surfaces =
883                        pan_pool_alloc_aligned(pool, payload_size,
884                                               pan_alignment(SURFACE_WITH_STRIDE));
885
886                GENX(panfrost_new_texture)(pool->dev, views[i], texture, &surfaces);
887        }
888
889        return textures.gpu;
890#else
891        mali_ptr textures[8] = { 0 };
892
893        for (unsigned i = 0; i < tex_count; i++) {
894                size_t sz = pan_size(TEXTURE) +
895                            GENX(panfrost_estimate_texture_payload_size)(views[i]);
896                struct panfrost_ptr texture =
897                        pan_pool_alloc_aligned(pool, sz, pan_alignment(TEXTURE));
898                struct panfrost_ptr surfaces = {
899                        .cpu = texture.cpu + pan_size(TEXTURE),
900                        .gpu = texture.gpu + pan_size(TEXTURE),
901                };
902
903                GENX(panfrost_new_texture)(pool->dev, views[i], texture.cpu, &surfaces);
904                textures[i] = texture.gpu;
905        }
906
907        return pan_pool_upload_aligned(pool, textures,
908                                       tex_count * sizeof(mali_ptr),
909                                       sizeof(mali_ptr));
910#endif
911}
912
913static void
914pan_preload_emit_textures(struct pan_pool *pool,
915                          const struct pan_fb_info *fb, bool zs,
916                          struct MALI_DRAW *draw)
917{
918        const struct pan_image_view *views[8];
919        struct pan_image_view patched_s_view;
920        unsigned tex_count = 0;
921
922        if (zs) {
923                if (fb->zs.preload.z)
924                        views[tex_count++] = fb->zs.view.zs;
925
926                if (fb->zs.preload.s) {
927                        const struct pan_image_view *view = fb->zs.view.s ? : fb->zs.view.zs;
928                        enum pipe_format fmt = util_format_get_depth_only(view->format);
929
930                        switch (view->format) {
931                        case PIPE_FORMAT_Z24_UNORM_S8_UINT: fmt = PIPE_FORMAT_X24S8_UINT; break;
932                        case PIPE_FORMAT_Z32_FLOAT_S8X24_UINT: fmt = PIPE_FORMAT_X32_S8X24_UINT; break;
933                        default: fmt = view->format; break;
934                        }
935
936                        if (fmt != view->format) {
937                                patched_s_view = *view;
938                                patched_s_view.format = fmt;
939                                view = &patched_s_view;
940                        }
941                        views[tex_count++] = view;
942                }
943        } else {
944                for (unsigned i = 0; i < fb->rt_count; i++) {
945                        if (fb->rts[i].preload)
946                                views[tex_count++] = fb->rts[i].view;
947                }
948
949        }
950
951        draw->textures = pan_blitter_emit_textures(pool, tex_count, views);
952}
953
954static mali_ptr
955pan_blitter_emit_viewport(struct pan_pool *pool,
956                          uint16_t minx, uint16_t miny,
957                          uint16_t maxx, uint16_t maxy)
958{
959        struct panfrost_ptr vp = pan_pool_alloc_desc(pool, VIEWPORT);
960
961        pan_pack(vp.cpu, VIEWPORT, cfg) {
962                cfg.scissor_minimum_x = minx;
963                cfg.scissor_minimum_y = miny;
964                cfg.scissor_maximum_x = maxx;
965                cfg.scissor_maximum_y = maxy;
966        }
967
968        return vp.gpu;
969}
970
971static void
972pan_preload_emit_dcd(struct pan_pool *pool,
973                     struct pan_fb_info *fb, bool zs,
974                     mali_ptr coordinates,
975                     mali_ptr tsd, mali_ptr rsd,
976                     void *out, bool always_write)
977{
978        pan_pack(out, DRAW, cfg) {
979                cfg.four_components_per_vertex = true;
980                cfg.draw_descriptor_is_64b = true;
981                cfg.thread_storage = tsd;
982                cfg.state = rsd;
983
984                cfg.position = coordinates;
985                pan_blitter_emit_varying(pool, coordinates, &cfg);
986                uint16_t minx = 0, miny = 0, maxx, maxy;
987
988#if PAN_ARCH == 4
989                maxx = fb->width - 1;
990                maxy = fb->height - 1;
991#else
992                /* Align on 32x32 tiles */
993                minx = fb->extent.minx & ~31;
994                miny = fb->extent.miny & ~31;
995                maxx = MIN2(ALIGN_POT(fb->extent.maxx + 1, 32), fb->width) - 1;
996                maxy = MIN2(ALIGN_POT(fb->extent.maxy + 1, 32), fb->height) - 1;
997#endif
998
999                cfg.viewport =
1000                        pan_blitter_emit_viewport(pool, minx, miny, maxx, maxy);
1001
1002                pan_preload_emit_textures(pool, fb, zs, &cfg);
1003
1004                cfg.samplers = pan_blitter_emit_sampler(pool, true);
1005
1006#if PAN_ARCH >= 6
1007                /* Tiles updated by blit shaders are still considered
1008                 * clean (separate for colour and Z/S), allowing us to
1009                 * suppress unnecessary writeback */
1010                cfg.clean_fragment_write = !always_write;
1011#endif
1012        }
1013}
1014
1015static void
1016pan_blit_emit_dcd(struct pan_pool *pool,
1017                  mali_ptr src_coords, mali_ptr dst_coords,
1018                  mali_ptr textures, mali_ptr samplers,
1019                  mali_ptr vpd, mali_ptr tsd, mali_ptr rsd,
1020                  void *out)
1021{
1022        pan_pack(out, DRAW, cfg) {
1023                cfg.four_components_per_vertex = true;
1024                cfg.draw_descriptor_is_64b = true;
1025                cfg.thread_storage = tsd;
1026                cfg.state = rsd;
1027
1028                cfg.position = dst_coords;
1029                pan_blitter_emit_varying(pool, src_coords, &cfg);
1030                cfg.viewport = vpd;
1031                cfg.textures = textures;
1032                cfg.samplers = samplers;
1033        }
1034}
1035
1036static struct panfrost_ptr
1037pan_blit_emit_tiler_job(struct pan_pool *desc_pool,
1038                        struct pan_scoreboard *scoreboard,
1039                        mali_ptr src_coords, mali_ptr dst_coords,
1040                        mali_ptr textures, mali_ptr samplers,
1041                        mali_ptr vpd, mali_ptr rsd, mali_ptr tsd,
1042                        mali_ptr tiler)
1043{
1044        struct panfrost_ptr job =
1045                pan_pool_alloc_desc(desc_pool, TILER_JOB);
1046
1047        pan_blit_emit_dcd(desc_pool,
1048                          src_coords, dst_coords, textures, samplers,
1049                          vpd, tsd, rsd,
1050                          pan_section_ptr(job.cpu, TILER_JOB, DRAW));
1051
1052        pan_section_pack(job.cpu, TILER_JOB, PRIMITIVE, cfg) {
1053                cfg.draw_mode = MALI_DRAW_MODE_TRIANGLE_STRIP;
1054                cfg.index_count = 4;
1055                cfg.job_task_split = 6;
1056        }
1057
1058        pan_section_pack(job.cpu, TILER_JOB, PRIMITIVE_SIZE, cfg) {
1059                cfg.constant = 1.0f;
1060        }
1061
1062        void *invoc = pan_section_ptr(job.cpu,
1063                                      TILER_JOB,
1064                                      INVOCATION);
1065        panfrost_pack_work_groups_compute(invoc, 1, 4,
1066                                          1, 1, 1, 1, true, false);
1067
1068#if PAN_ARCH >= 6
1069        pan_section_pack(job.cpu, TILER_JOB, PADDING, cfg);
1070        pan_section_pack(job.cpu, TILER_JOB, TILER, cfg) {
1071                cfg.address = tiler;
1072        }
1073#endif
1074
1075        panfrost_add_job(desc_pool, scoreboard, MALI_JOB_TYPE_TILER,
1076                         false, false, 0, 0, &job, false);
1077        return job;
1078}
1079
1080#if PAN_ARCH >= 6
1081static void
1082pan_preload_fb_alloc_pre_post_dcds(struct pan_pool *desc_pool,
1083                                   struct pan_fb_info *fb)
1084{
1085        if (fb->bifrost.pre_post.dcds.gpu)
1086                return;
1087
1088        fb->bifrost.pre_post.dcds =
1089                pan_pool_alloc_desc_array(desc_pool, 3, DRAW);
1090}
1091
1092static void
1093pan_preload_emit_pre_frame_dcd(struct pan_pool *desc_pool,
1094                               struct pan_fb_info *fb, bool zs,
1095                               mali_ptr coords, mali_ptr rsd,
1096                               mali_ptr tsd)
1097{
1098        unsigned dcd_idx = zs ? 0 : 1;
1099        pan_preload_fb_alloc_pre_post_dcds(desc_pool, fb);
1100        assert(fb->bifrost.pre_post.dcds.cpu);
1101        void *dcd = fb->bifrost.pre_post.dcds.cpu +
1102                    (dcd_idx * pan_size(DRAW));
1103
1104        int crc_rt = GENX(pan_select_crc_rt)(fb);
1105
1106        bool always_write = false;
1107
1108        /* If CRC data is currently invalid and this batch will make it valid,
1109         * write even clean tiles to make sure CRC data is updated. */
1110        if (crc_rt >= 0) {
1111                bool *valid = fb->rts[crc_rt].crc_valid;
1112                bool full = !fb->extent.minx && !fb->extent.miny &&
1113                        fb->extent.maxx == (fb->width - 1) &&
1114                        fb->extent.maxy == (fb->height - 1);
1115
1116                if (full && !(*valid))
1117                        always_write = true;
1118        }
1119
1120        pan_preload_emit_dcd(desc_pool, fb, zs, coords, tsd, rsd, dcd, always_write);
1121        if (zs) {
1122                enum pipe_format fmt = fb->zs.view.zs ?
1123                                       fb->zs.view.zs->image->layout.format :
1124                                       fb->zs.view.s->image->layout.format;
1125                bool always = false;
1126
1127                /* If we're dealing with a combined ZS resource and only one
1128                 * component is cleared, we need to reload the whole surface
1129                 * because the zs_clean_pixel_write_enable flag is set in that
1130                 * case.
1131                 */
1132                if (util_format_is_depth_and_stencil(fmt) &&
1133                    fb->zs.clear.z != fb->zs.clear.s)
1134                        always = true;
1135
1136                /* We could use INTERSECT on Bifrost v7 too, but
1137                 * EARLY_ZS_ALWAYS has the advantage of reloading the ZS tile
1138                 * buffer one or more tiles ahead, making ZS data immediately
1139                 * available for any ZS tests taking place in other shaders.
1140                 * Thing's haven't been benchmarked to determine what's
1141                 * preferable (saving bandwidth vs having ZS preloaded
1142                 * earlier), so let's leave it like that for now.
1143                 */
1144                fb->bifrost.pre_post.modes[dcd_idx] =
1145                        desc_pool->dev->arch > 6 ?
1146                        MALI_PRE_POST_FRAME_SHADER_MODE_EARLY_ZS_ALWAYS :
1147                        always ? MALI_PRE_POST_FRAME_SHADER_MODE_ALWAYS :
1148                        MALI_PRE_POST_FRAME_SHADER_MODE_INTERSECT;
1149        } else {
1150                fb->bifrost.pre_post.modes[dcd_idx] =
1151                        always_write ? MALI_PRE_POST_FRAME_SHADER_MODE_ALWAYS :
1152                        MALI_PRE_POST_FRAME_SHADER_MODE_INTERSECT;
1153        }
1154}
1155#else
1156static struct panfrost_ptr
1157pan_preload_emit_tiler_job(struct pan_pool *desc_pool,
1158                           struct pan_scoreboard *scoreboard,
1159                           struct pan_fb_info *fb, bool zs,
1160                           mali_ptr coords, mali_ptr rsd, mali_ptr tsd)
1161{
1162        struct panfrost_ptr job =
1163                pan_pool_alloc_desc(desc_pool, TILER_JOB);
1164
1165        pan_preload_emit_dcd(desc_pool, fb, zs, coords, tsd, rsd,
1166                             pan_section_ptr(job.cpu, TILER_JOB, DRAW),
1167                             false);
1168
1169        pan_section_pack(job.cpu, TILER_JOB, PRIMITIVE, cfg) {
1170                cfg.draw_mode = MALI_DRAW_MODE_TRIANGLE_STRIP;
1171                cfg.index_count = 4;
1172                cfg.job_task_split = 6;
1173        }
1174
1175        pan_section_pack(job.cpu, TILER_JOB, PRIMITIVE_SIZE, cfg) {
1176                cfg.constant = 1.0f;
1177        }
1178
1179        void *invoc = pan_section_ptr(job.cpu,
1180                                      TILER_JOB,
1181                                      INVOCATION);
1182        panfrost_pack_work_groups_compute(invoc, 1, 4,
1183                                          1, 1, 1, 1, true, false);
1184
1185        panfrost_add_job(desc_pool, scoreboard, MALI_JOB_TYPE_TILER,
1186                         false, false, 0, 0, &job, true);
1187        return job;
1188}
1189#endif
1190
1191static struct panfrost_ptr
1192pan_preload_fb_part(struct pan_pool *pool,
1193                    struct pan_scoreboard *scoreboard,
1194                    struct pan_fb_info *fb, bool zs,
1195                    mali_ptr coords, mali_ptr tsd, mali_ptr tiler)
1196{
1197        struct panfrost_device *dev = pool->dev;
1198        mali_ptr rsd = pan_preload_get_rsd(dev, fb, zs);
1199        struct panfrost_ptr job = { 0 };
1200
1201#if PAN_ARCH >= 6
1202        pan_preload_emit_pre_frame_dcd(pool, fb, zs,
1203                                       coords, rsd, tsd);
1204#else
1205        job = pan_preload_emit_tiler_job(pool, scoreboard,
1206                                         fb, zs, coords, rsd, tsd);
1207#endif
1208        return job;
1209}
1210
1211unsigned
1212GENX(pan_preload_fb)(struct pan_pool *pool,
1213                     struct pan_scoreboard *scoreboard,
1214                     struct pan_fb_info *fb,
1215                     mali_ptr tsd, mali_ptr tiler,
1216                     struct panfrost_ptr *jobs)
1217{
1218        bool preload_zs = pan_preload_needed(fb, true);
1219        bool preload_rts = pan_preload_needed(fb, false);
1220        mali_ptr coords;
1221
1222        if (!preload_zs && !preload_rts)
1223                return 0;
1224
1225        float rect[] = {
1226                0.0, 0.0, 0.0, 1.0,
1227                fb->width, 0.0, 0.0, 1.0,
1228                0.0, fb->height, 0.0, 1.0,
1229                fb->width, fb->height, 0.0, 1.0,
1230        };
1231
1232        coords = pan_pool_upload_aligned(pool, rect,
1233                                         sizeof(rect), 64);
1234
1235        unsigned njobs = 0;
1236        if (preload_zs) {
1237                struct panfrost_ptr job =
1238                        pan_preload_fb_part(pool, scoreboard, fb, true,
1239                                            coords, tsd, tiler);
1240                if (jobs && job.cpu)
1241                        jobs[njobs++] = job;
1242        }
1243
1244        if (preload_rts) {
1245                struct panfrost_ptr job =
1246                        pan_preload_fb_part(pool, scoreboard, fb, false,
1247                                            coords, tsd, tiler);
1248                if (jobs && job.cpu)
1249                        jobs[njobs++] = job;
1250        }
1251
1252        return njobs;
1253}
1254
1255void
1256GENX(pan_blit_ctx_init)(struct panfrost_device *dev,
1257                        const struct pan_blit_info *info,
1258                        struct pan_pool *blit_pool,
1259                        struct pan_blit_context *ctx)
1260{
1261        memset(ctx, 0, sizeof(*ctx));
1262
1263        struct pan_image_view sviews[2] = {
1264                {
1265                        .format = info->src.planes[0].format,
1266                        .image = info->src.planes[0].image,
1267                        .dim = info->src.planes[0].image->layout.dim == MALI_TEXTURE_DIMENSION_CUBE ?
1268                               MALI_TEXTURE_DIMENSION_2D : info->src.planes[0].image->layout.dim,
1269                        .first_level = info->src.level,
1270                        .last_level = info->src.level,
1271                        .first_layer = info->src.start.layer,
1272                        .last_layer = info->src.end.layer,
1273                        .swizzle = {
1274                                PIPE_SWIZZLE_X, PIPE_SWIZZLE_Y,
1275                                PIPE_SWIZZLE_Z, PIPE_SWIZZLE_W,
1276                        },
1277                },
1278        };
1279
1280        struct pan_image_view dview = {
1281                .format = info->dst.planes[0].format,
1282                .image = info->dst.planes[0].image,
1283                .dim = info->dst.planes[0].image->layout.dim == MALI_TEXTURE_DIMENSION_1D ?
1284                       MALI_TEXTURE_DIMENSION_1D : MALI_TEXTURE_DIMENSION_2D,
1285                .first_level = info->dst.level,
1286                .last_level = info->dst.level,
1287                .first_layer = info->dst.start.layer,
1288                .last_layer = info->dst.start.layer,
1289                .swizzle = {
1290                        PIPE_SWIZZLE_X, PIPE_SWIZZLE_Y,
1291                        PIPE_SWIZZLE_Z, PIPE_SWIZZLE_W,
1292                },
1293        };
1294
1295        ctx->src.start.x = info->src.start.x;
1296        ctx->src.start.y = info->src.start.y;
1297        ctx->src.end.x = info->src.end.x;
1298        ctx->src.end.y = info->src.end.y;
1299        ctx->src.dim = sviews[0].dim;
1300
1301        if (info->dst.planes[0].image->layout.dim == MALI_TEXTURE_DIMENSION_3D) {
1302                unsigned max_z = u_minify(info->dst.planes[0].image->layout.depth, info->dst.level) - 1;
1303
1304                ctx->z_scale = (float)(info->src.end.z - info->src.start.z) /
1305                               (info->dst.end.z - info->dst.start.z);
1306                assert(info->dst.start.z != info->dst.end.z);
1307                if (info->dst.start.z > info->dst.end.z) {
1308                        ctx->dst.cur_layer = info->dst.start.z - 1;
1309                        ctx->dst.last_layer = info->dst.end.z;
1310                } else {
1311                        ctx->dst.cur_layer = info->dst.start.z;
1312                        ctx->dst.last_layer = info->dst.end.z - 1;
1313                }
1314                ctx->dst.cur_layer = MIN2(MAX2(ctx->dst.cur_layer, 0), max_z);
1315                ctx->dst.last_layer = MIN2(MAX2(ctx->dst.last_layer, 0), max_z);
1316                ctx->dst.layer_offset = ctx->dst.cur_layer;
1317        } else {
1318                unsigned max_layer = info->dst.planes[0].image->layout.array_size - 1;
1319                ctx->dst.layer_offset = info->dst.start.layer;
1320                ctx->dst.cur_layer = info->dst.start.layer;
1321                ctx->dst.last_layer = MIN2(info->dst.end.layer, max_layer);
1322                ctx->z_scale = 1;
1323        }
1324
1325        if (sviews[0].dim == MALI_TEXTURE_DIMENSION_3D) {
1326                if (info->src.start.z < info->src.end.z)
1327                        ctx->src.z_offset = info->src.start.z + fabs(ctx->z_scale * 0.5f);
1328                else
1329                        ctx->src.z_offset = info->src.start.z - fabs(ctx->z_scale * 0.5f);
1330        } else {
1331                ctx->src.layer_offset = info->src.start.layer;
1332        }
1333
1334        /* Split depth and stencil */
1335        if (util_format_is_depth_and_stencil(sviews[0].format)) {
1336                sviews[1] = sviews[0];
1337                sviews[0].format = util_format_get_depth_only(sviews[0].format);
1338                sviews[1].format = util_format_stencil_only(sviews[1].format);
1339        } else if (info->src.planes[1].format) {
1340                sviews[1] = sviews[0];
1341                sviews[1].format = info->src.planes[1].format;
1342                sviews[1].image = info->src.planes[1].image;
1343        }
1344
1345        ctx->rsd = pan_blit_get_rsd(dev, sviews, &dview);
1346
1347        ASSERTED unsigned nlayers = info->src.end.layer - info->src.start.layer + 1;
1348
1349        assert(nlayers == (info->dst.end.layer - info->dst.start.layer + 1));
1350
1351        unsigned dst_w = u_minify(info->dst.planes[0].image->layout.width, info->dst.level);
1352        unsigned dst_h = u_minify(info->dst.planes[0].image->layout.height, info->dst.level);
1353        unsigned maxx = MIN2(MAX2(info->dst.start.x, info->dst.end.x), dst_w - 1);
1354        unsigned maxy = MIN2(MAX2(info->dst.start.y, info->dst.end.y), dst_h - 1);
1355        unsigned minx = MAX2(MIN3(info->dst.start.x, info->dst.end.x, maxx), 0);
1356        unsigned miny = MAX2(MIN3(info->dst.start.y, info->dst.end.y, maxy), 0);
1357
1358        if (info->scissor.enable) {
1359                minx = MAX2(minx, info->scissor.minx);
1360                miny = MAX2(miny, info->scissor.miny);
1361                maxx = MIN2(maxx, info->scissor.maxx);
1362                maxy = MIN2(maxy, info->scissor.maxy);
1363        }
1364
1365        const struct pan_image_view *sview_ptrs[] = { &sviews[0], &sviews[1] };
1366        unsigned nviews = sviews[1].format ? 2 : 1;
1367
1368        ctx->textures = pan_blitter_emit_textures(blit_pool, nviews, sview_ptrs);
1369        ctx->samplers = pan_blitter_emit_sampler(blit_pool, info->nearest);
1370
1371        ctx->vpd = pan_blitter_emit_viewport(blit_pool,
1372                                             minx, miny, maxx, maxy);
1373
1374        float dst_rect[] = {
1375                info->dst.start.x, info->dst.start.y, 0.0, 1.0,
1376                info->dst.end.x, info->dst.start.y, 0.0, 1.0,
1377                info->dst.start.x, info->dst.end.y, 0.0, 1.0,
1378                info->dst.end.x, info->dst.end.y, 0.0, 1.0,
1379        };
1380
1381        ctx->position =
1382                pan_pool_upload_aligned(blit_pool, dst_rect,
1383                                        sizeof(dst_rect), 64);
1384}
1385
1386struct panfrost_ptr
1387GENX(pan_blit)(struct pan_blit_context *ctx,
1388               struct pan_pool *pool,
1389               struct pan_scoreboard *scoreboard,
1390               mali_ptr tsd, mali_ptr tiler)
1391{
1392        if (ctx->dst.cur_layer < 0 ||
1393            (ctx->dst.last_layer >= ctx->dst.layer_offset &&
1394             ctx->dst.cur_layer > ctx->dst.last_layer) ||
1395            (ctx->dst.last_layer < ctx->dst.layer_offset &&
1396             ctx->dst.cur_layer < ctx->dst.last_layer))
1397                return (struct panfrost_ptr){ 0 };
1398
1399        int32_t layer = ctx->dst.cur_layer - ctx->dst.layer_offset;
1400        float src_z;
1401        if (ctx->src.dim == MALI_TEXTURE_DIMENSION_3D)
1402                src_z = (ctx->z_scale * layer) + ctx->src.z_offset;
1403        else
1404                src_z = ctx->src.layer_offset + layer;
1405
1406        float src_rect[] = {
1407                ctx->src.start.x, ctx->src.start.y, src_z, 1.0,
1408                ctx->src.end.x, ctx->src.start.y, src_z, 1.0,
1409                ctx->src.start.x, ctx->src.end.y, src_z, 1.0,
1410                ctx->src.end.x, ctx->src.end.y, src_z, 1.0,
1411        };
1412
1413        mali_ptr src_coords =
1414                pan_pool_upload_aligned(pool, src_rect,
1415                                        sizeof(src_rect), 64);
1416
1417        return pan_blit_emit_tiler_job(pool, scoreboard,
1418                                       src_coords, ctx->position,
1419                                       ctx->textures, ctx->samplers,
1420                                       ctx->vpd, ctx->rsd, tsd, tiler);
1421}
1422
1423static uint32_t pan_blit_shader_key_hash(const void *key)
1424{
1425        return _mesa_hash_data(key, sizeof(struct pan_blit_shader_key));
1426}
1427
1428static bool pan_blit_shader_key_equal(const void *a, const void *b)
1429{
1430        return !memcmp(a, b, sizeof(struct pan_blit_shader_key));
1431}
1432
1433static uint32_t pan_blit_blend_shader_key_hash(const void *key)
1434{
1435        return _mesa_hash_data(key, sizeof(struct pan_blit_blend_shader_key));
1436}
1437
1438static bool pan_blit_blend_shader_key_equal(const void *a, const void *b)
1439{
1440        return !memcmp(a, b, sizeof(struct pan_blit_blend_shader_key));
1441}
1442
1443static uint32_t pan_blit_rsd_key_hash(const void *key)
1444{
1445        return _mesa_hash_data(key, sizeof(struct pan_blit_rsd_key));
1446}
1447
1448static bool pan_blit_rsd_key_equal(const void *a, const void *b)
1449{
1450        return !memcmp(a, b, sizeof(struct pan_blit_rsd_key));
1451}
1452
1453static void
1454pan_blitter_prefill_blit_shader_cache(struct panfrost_device *dev)
1455{
1456        static const struct pan_blit_shader_key prefill[] = {
1457                {
1458                        .surfaces[0] = {
1459                                .loc = FRAG_RESULT_DEPTH,
1460                                .type = nir_type_float32,
1461                                .dim = MALI_TEXTURE_DIMENSION_2D,
1462                                .src_samples = 1,
1463                                .dst_samples = 1,
1464                        },
1465                },
1466                {
1467                        .surfaces[1] = {
1468                                .loc = FRAG_RESULT_STENCIL,
1469                                .type = nir_type_uint32,
1470                                .dim = MALI_TEXTURE_DIMENSION_2D,
1471                                .src_samples = 1,
1472                                .dst_samples = 1,
1473                        },
1474                },
1475                {
1476                        .surfaces[0] = {
1477                                .loc = FRAG_RESULT_DATA0,
1478                                .type = nir_type_float32,
1479                                .dim = MALI_TEXTURE_DIMENSION_2D,
1480                                .src_samples = 1,
1481                                .dst_samples = 1,
1482                        },
1483                },
1484        };
1485
1486        for (unsigned i = 0; i < ARRAY_SIZE(prefill); i++)
1487                pan_blitter_get_blit_shader(dev, &prefill[i]);
1488}
1489
1490void
1491GENX(pan_blitter_init)(struct panfrost_device *dev,
1492                       struct pan_pool *bin_pool,
1493                       struct pan_pool *desc_pool)
1494{
1495        dev->blitter.shaders.blit =
1496                _mesa_hash_table_create(NULL, pan_blit_shader_key_hash,
1497                                        pan_blit_shader_key_equal);
1498        dev->blitter.shaders.blend =
1499                _mesa_hash_table_create(NULL, pan_blit_blend_shader_key_hash,
1500                                        pan_blit_blend_shader_key_equal);
1501        dev->blitter.shaders.pool = bin_pool;
1502        pthread_mutex_init(&dev->blitter.shaders.lock, NULL);
1503        pan_blitter_prefill_blit_shader_cache(dev);
1504
1505        dev->blitter.rsds.pool = desc_pool;
1506        dev->blitter.rsds.rsds =
1507                _mesa_hash_table_create(NULL, pan_blit_rsd_key_hash,
1508                                        pan_blit_rsd_key_equal);
1509        pthread_mutex_init(&dev->blitter.rsds.lock, NULL);
1510}
1511
1512void
1513GENX(pan_blitter_cleanup)(struct panfrost_device *dev)
1514{
1515        _mesa_hash_table_destroy(dev->blitter.shaders.blit, NULL);
1516        _mesa_hash_table_destroy(dev->blitter.shaders.blend, NULL);
1517        pthread_mutex_destroy(&dev->blitter.shaders.lock);
1518        _mesa_hash_table_destroy(dev->blitter.rsds.rsds, NULL);
1519        pthread_mutex_destroy(&dev->blitter.rsds.lock);
1520}
1521